1 /* 2 * Copyright (C) 1993-2001, 2003 by Darren Reed. 3 * 4 * See the IPFILTER.LICENCE file for details on licencing. 5 * 6 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 7 * 8 * Copyright 2018 Joyent, Inc. 9 */ 10 11 #if !defined(lint) 12 static const char sccsid[] = "@(#)ip_fil_solaris.c 1.7 07/22/06 (C) 1993-2000 Darren Reed"; 13 static const char rcsid[] = "@(#)$Id: ip_fil_solaris.c,v 2.62.2.19 2005/07/13 21:40:46 darrenr Exp $"; 14 #endif 15 16 #include <sys/types.h> 17 #include <sys/errno.h> 18 #include <sys/param.h> 19 #include <sys/cpuvar.h> 20 #include <sys/open.h> 21 #include <sys/ioctl.h> 22 #include <sys/filio.h> 23 #include <sys/systm.h> 24 #include <sys/strsubr.h> 25 #include <sys/strsun.h> 26 #include <sys/cred.h> 27 #include <sys/ddi.h> 28 #include <sys/sunddi.h> 29 #include <sys/ksynch.h> 30 #include <sys/kmem.h> 31 #include <sys/mac_provider.h> 32 #include <sys/mkdev.h> 33 #include <sys/protosw.h> 34 #include <sys/socket.h> 35 #include <sys/dditypes.h> 36 #include <sys/cmn_err.h> 37 #include <sys/zone.h> 38 #include <net/if.h> 39 #include <net/af.h> 40 #include <net/route.h> 41 #include <netinet/in.h> 42 #include <netinet/in_systm.h> 43 #include <netinet/ip.h> 44 #include <netinet/ip_var.h> 45 #include <netinet/tcp.h> 46 #include <netinet/udp.h> 47 #include <netinet/tcpip.h> 48 #include <netinet/ip_icmp.h> 49 #include "netinet/ip_compat.h" 50 #ifdef USE_INET6 51 # include <netinet/icmp6.h> 52 #endif 53 #include "netinet/ip_fil.h" 54 #include "netinet/ip_nat.h" 55 #include "netinet/ip_frag.h" 56 #include "netinet/ip_state.h" 57 #include "netinet/ip_auth.h" 58 #include "netinet/ip_proxy.h" 59 #include "netinet/ipf_stack.h" 60 #ifdef IPFILTER_LOOKUP 61 # include "netinet/ip_lookup.h" 62 #endif 63 #include <inet/ip_ire.h> 64 65 #include <sys/md5.h> 66 #include <sys/neti.h> 67 68 static int frzerostats __P((caddr_t, ipf_stack_t *)); 69 static int fr_setipfloopback __P((int, ipf_stack_t *)); 70 static int fr_enableipf __P((ipf_stack_t *, int)); 71 static int fr_send_ip __P((fr_info_t *fin, mblk_t *m, mblk_t **mp)); 72 static int ipf_nic_event_v4 __P((hook_event_token_t, hook_data_t, void *)); 73 static int ipf_nic_event_v6 __P((hook_event_token_t, hook_data_t, void *)); 74 static int ipf_hook __P((hook_data_t, int, int, void *)); 75 static int ipf_hook4_in __P((hook_event_token_t, hook_data_t, void *)); 76 static int ipf_hook4_out __P((hook_event_token_t, hook_data_t, void *)); 77 static int ipf_hook4_loop_out __P((hook_event_token_t, hook_data_t, 78 void *)); 79 static int ipf_hook4_loop_in __P((hook_event_token_t, hook_data_t, void *)); 80 static int ipf_hook4 __P((hook_data_t, int, int, void *)); 81 static int ipf_hook6_out __P((hook_event_token_t, hook_data_t, void *)); 82 static int ipf_hook6_in __P((hook_event_token_t, hook_data_t, void *)); 83 static int ipf_hook6_loop_out __P((hook_event_token_t, hook_data_t, 84 void *)); 85 static int ipf_hook6_loop_in __P((hook_event_token_t, hook_data_t, 86 void *)); 87 static int ipf_hook6 __P((hook_data_t, int, int, void *)); 88 89 static int ipf_hookviona_in __P((hook_event_token_t, hook_data_t, void *)); 90 static int ipf_hookviona_out __P((hook_event_token_t, hook_data_t, 91 void *)); 92 93 extern int ipf_geniter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); 94 extern int ipf_frruleiter __P((void *, int, void *, ipf_stack_t *)); 95 96 static int ipf_hook_protocol_notify __P((hook_notify_cmd_t, void *, 97 const char *, const char *, const char *)); 98 static int ipf_hook_instance_notify __P((hook_notify_cmd_t, void *, 99 const char *, const char *, const char *)); 100 101 #if SOLARIS2 < 10 102 #if SOLARIS2 >= 7 103 u_int *ip_ttl_ptr = NULL; 104 u_int *ip_mtudisc = NULL; 105 # if SOLARIS2 >= 8 106 int *ip_forwarding = NULL; 107 u_int *ip6_forwarding = NULL; 108 # else 109 u_int *ip_forwarding = NULL; 110 # endif 111 #else 112 u_long *ip_ttl_ptr = NULL; 113 u_long *ip_mtudisc = NULL; 114 u_long *ip_forwarding = NULL; 115 #endif 116 #endif 117 118 vmem_t *ipf_minor; /* minor number arena */ 119 void *ipf_state; /* DDI state */ 120 121 /* 122 * GZ-controlled and per-zone stacks: 123 * 124 * For each non-global zone, we create two ipf stacks: the per-zone stack and 125 * the GZ-controlled stack. The per-zone stack can be controlled and observed 126 * from inside the zone or from the global zone. The GZ-controlled stack can 127 * only be controlled and observed from the global zone (though the rules 128 * still only affect that non-global zone). 129 * 130 * The two hooks are always arranged so that the GZ-controlled stack is always 131 * "outermost" with respect to the zone. The traffic flow then looks like 132 * this: 133 * 134 * Inbound: 135 * 136 * nic ---> [ GZ-controlled rules ] ---> [ per-zone rules ] ---> zone 137 * 138 * Outbound: 139 * 140 * nic <--- [ GZ-controlled rules ] <--- [ per-zone rules ] <--- zone 141 */ 142 143 /* IPv4 hook names */ 144 char *hook4_nicevents = "ipfilter_hook4_nicevents"; 145 char *hook4_nicevents_gz = "ipfilter_hook4_nicevents_gz"; 146 char *hook4_in = "ipfilter_hook4_in"; 147 char *hook4_in_gz = "ipfilter_hook4_in_gz"; 148 char *hook4_out = "ipfilter_hook4_out"; 149 char *hook4_out_gz = "ipfilter_hook4_out_gz"; 150 char *hook4_loop_in = "ipfilter_hook4_loop_in"; 151 char *hook4_loop_in_gz = "ipfilter_hook4_loop_in_gz"; 152 char *hook4_loop_out = "ipfilter_hook4_loop_out"; 153 char *hook4_loop_out_gz = "ipfilter_hook4_loop_out_gz"; 154 155 /* IPv6 hook names */ 156 char *hook6_nicevents = "ipfilter_hook6_nicevents"; 157 char *hook6_nicevents_gz = "ipfilter_hook6_nicevents_gz"; 158 char *hook6_in = "ipfilter_hook6_in"; 159 char *hook6_in_gz = "ipfilter_hook6_in_gz"; 160 char *hook6_out = "ipfilter_hook6_out"; 161 char *hook6_out_gz = "ipfilter_hook6_out_gz"; 162 char *hook6_loop_in = "ipfilter_hook6_loop_in"; 163 char *hook6_loop_in_gz = "ipfilter_hook6_loop_in_gz"; 164 char *hook6_loop_out = "ipfilter_hook6_loop_out"; 165 char *hook6_loop_out_gz = "ipfilter_hook6_loop_out_gz"; 166 167 /* viona hook names */ 168 char *hook_viona_in = "ipfilter_hookviona_in"; 169 char *hook_viona_in_gz = "ipfilter_hookviona_in_gz"; 170 char *hook_viona_out = "ipfilter_hookviona_out"; 171 char *hook_viona_out_gz = "ipfilter_hookviona_out_gz"; 172 173 /* ------------------------------------------------------------------------ */ 174 /* Function: ipldetach */ 175 /* Returns: int - 0 == success, else error. */ 176 /* Parameters: Nil */ 177 /* */ 178 /* This function is responsible for undoing anything that might have been */ 179 /* done in a call to iplattach(). It must be able to clean up from a call */ 180 /* to iplattach() that did not succeed. Why might that happen? Someone */ 181 /* configures a table to be so large that we cannot allocate enough memory */ 182 /* for it. */ 183 /* ------------------------------------------------------------------------ */ 184 int ipldetach(ifs) 185 ipf_stack_t *ifs; 186 { 187 188 ASSERT(RW_WRITE_HELD(&ifs->ifs_ipf_global.ipf_lk)); 189 190 #if SOLARIS2 < 10 191 192 if (ifs->ifs_fr_control_forwarding & 2) { 193 if (ip_forwarding != NULL) 194 *ip_forwarding = 0; 195 #if SOLARIS2 >= 8 196 if (ip6_forwarding != NULL) 197 *ip6_forwarding = 0; 198 #endif 199 } 200 #endif 201 202 /* 203 * This lock needs to be dropped around the net_hook_unregister calls 204 * because we can deadlock here with: 205 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs 206 * R(hook_family)->R(hei_lock)->R(ipf_global) (active hook running) 207 */ 208 RWLOCK_EXIT(&ifs->ifs_ipf_global); 209 210 #define UNDO_HOOK(_f, _b, _e, _h) \ 211 do { \ 212 if (ifs->_f != NULL) { \ 213 if (ifs->_b) { \ 214 int tmp = net_hook_unregister(ifs->_f, \ 215 _e, ifs->_h); \ 216 ifs->_b = (tmp != 0 && tmp != ENXIO); \ 217 if (!ifs->_b && ifs->_h != NULL) { \ 218 hook_free(ifs->_h); \ 219 ifs->_h = NULL; \ 220 } \ 221 } else if (ifs->_h != NULL) { \ 222 hook_free(ifs->_h); \ 223 ifs->_h = NULL; \ 224 } \ 225 } \ 226 _NOTE(CONSTCOND) \ 227 } while (0) 228 229 /* 230 * Remove IPv6 Hooks 231 */ 232 if (ifs->ifs_ipf_ipv6 != NULL) { 233 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_in, 234 NH_PHYSICAL_IN, ifs_ipfhook6_in); 235 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_out, 236 NH_PHYSICAL_OUT, ifs_ipfhook6_out); 237 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_nic_events, 238 NH_NIC_EVENTS, ifs_ipfhook6_nicevents); 239 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_in, 240 NH_LOOPBACK_IN, ifs_ipfhook6_loop_in); 241 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_out, 242 NH_LOOPBACK_OUT, ifs_ipfhook6_loop_out); 243 244 if (net_protocol_release(ifs->ifs_ipf_ipv6) != 0) 245 goto detach_failed; 246 ifs->ifs_ipf_ipv6 = NULL; 247 } 248 249 /* 250 * Remove IPv4 Hooks 251 */ 252 if (ifs->ifs_ipf_ipv4 != NULL) { 253 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_in, 254 NH_PHYSICAL_IN, ifs_ipfhook4_in); 255 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_out, 256 NH_PHYSICAL_OUT, ifs_ipfhook4_out); 257 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_nic_events, 258 NH_NIC_EVENTS, ifs_ipfhook4_nicevents); 259 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_in, 260 NH_LOOPBACK_IN, ifs_ipfhook4_loop_in); 261 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_out, 262 NH_LOOPBACK_OUT, ifs_ipfhook4_loop_out); 263 264 if (net_protocol_release(ifs->ifs_ipf_ipv4) != 0) 265 goto detach_failed; 266 ifs->ifs_ipf_ipv4 = NULL; 267 } 268 269 /* 270 * Remove notification of viona hooks 271 */ 272 net_instance_notify_unregister(ifs->ifs_netid, 273 ipf_hook_instance_notify); 274 275 #undef UNDO_HOOK 276 277 /* 278 * Normally, viona will unregister itself before ipldetach() is called, 279 * so these will be no-ops, but out of caution, we try to make sure 280 * we've removed any of our references. 281 */ 282 (void) ipf_hook_protocol_notify(HN_UNREGISTER, ifs, Hn_VIONA, NULL, 283 NH_PHYSICAL_IN); 284 (void) ipf_hook_protocol_notify(HN_UNREGISTER, ifs, Hn_VIONA, NULL, 285 NH_PHYSICAL_OUT); 286 287 { 288 char netidstr[12]; /* Large enough for INT_MAX + NUL */ 289 (void) snprintf(netidstr, sizeof (netidstr), "%d", 290 ifs->ifs_netid); 291 292 /* 293 * The notify callbacks expect the netid value passed as a 294 * string in the third argument. To prevent confusion if 295 * traced, we pass the same value the nethook framework would 296 * pass, even though the callback does not currently use the 297 * value. 298 */ 299 (void) ipf_hook_instance_notify(HN_UNREGISTER, ifs, netidstr, 300 NULL, Hn_VIONA); 301 } 302 303 #ifdef IPFDEBUG 304 cmn_err(CE_CONT, "ipldetach()\n"); 305 #endif 306 307 WRITE_ENTER(&ifs->ifs_ipf_global); 308 fr_deinitialise(ifs); 309 310 (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE, ifs); 311 (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE, ifs); 312 313 if (ifs->ifs_ipf_locks_done == 1) { 314 MUTEX_DESTROY(&ifs->ifs_ipf_timeoutlock); 315 MUTEX_DESTROY(&ifs->ifs_ipf_rw); 316 RW_DESTROY(&ifs->ifs_ipf_tokens); 317 RW_DESTROY(&ifs->ifs_ipf_ipidfrag); 318 ifs->ifs_ipf_locks_done = 0; 319 } 320 321 if (ifs->ifs_hook4_physical_in || ifs->ifs_hook4_physical_out || 322 ifs->ifs_hook4_nic_events || ifs->ifs_hook4_loopback_in || 323 ifs->ifs_hook4_loopback_out || ifs->ifs_hook6_nic_events || 324 ifs->ifs_hook6_physical_in || ifs->ifs_hook6_physical_out || 325 ifs->ifs_hook6_loopback_in || ifs->ifs_hook6_loopback_out) 326 return -1; 327 328 return 0; 329 330 detach_failed: 331 WRITE_ENTER(&ifs->ifs_ipf_global); 332 return -1; 333 } 334 335 int iplattach(ifs) 336 ipf_stack_t *ifs; 337 { 338 #if SOLARIS2 < 10 339 int i; 340 #endif 341 netid_t id = ifs->ifs_netid; 342 343 #ifdef IPFDEBUG 344 cmn_err(CE_CONT, "iplattach()\n"); 345 #endif 346 347 ASSERT(RW_WRITE_HELD(&ifs->ifs_ipf_global.ipf_lk)); 348 ifs->ifs_fr_flags = IPF_LOGGING; 349 #ifdef _KERNEL 350 ifs->ifs_fr_update_ipid = 0; 351 #else 352 ifs->ifs_fr_update_ipid = 1; 353 #endif 354 ifs->ifs_fr_minttl = 4; 355 ifs->ifs_fr_icmpminfragmtu = 68; 356 #if defined(IPFILTER_DEFAULT_BLOCK) 357 ifs->ifs_fr_pass = FR_BLOCK|FR_NOMATCH; 358 #else 359 ifs->ifs_fr_pass = (IPF_DEFAULT_PASS)|FR_NOMATCH; 360 #endif 361 362 bzero((char *)ifs->ifs_frcache, sizeof(ifs->ifs_frcache)); 363 MUTEX_INIT(&ifs->ifs_ipf_rw, "ipf rw mutex"); 364 MUTEX_INIT(&ifs->ifs_ipf_timeoutlock, "ipf timeout lock mutex"); 365 RWLOCK_INIT(&ifs->ifs_ipf_ipidfrag, "ipf IP NAT-Frag rwlock"); 366 RWLOCK_INIT(&ifs->ifs_ipf_tokens, "ipf token rwlock"); 367 ifs->ifs_ipf_locks_done = 1; 368 369 if (fr_initialise(ifs) < 0) 370 return -1; 371 372 /* 373 * For incoming packets, we want the GZ-controlled hooks to run before 374 * the per-zone hooks, regardless of what order they're are installed. 375 * See the "GZ-controlled and per-zone stacks" comment block at the top 376 * of this file. 377 */ 378 #define HOOK_INIT_GZ_BEFORE(x, fn, n, gzn, a) \ 379 HOOK_INIT(x, fn, ifs->ifs_gz_controlled ? gzn : n, ifs); \ 380 (x)->h_hint = ifs->ifs_gz_controlled ? HH_BEFORE : HH_AFTER; \ 381 (x)->h_hintvalue = (uintptr_t) (ifs->ifs_gz_controlled ? n : gzn); 382 383 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_nicevents, ipf_nic_event_v4, 384 hook4_nicevents, hook4_nicevents_gz, ifs); 385 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_in, ipf_hook4_in, 386 hook4_in, hook4_in_gz, ifs); 387 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_loop_in, ipf_hook4_loop_in, 388 hook4_loop_in, hook4_loop_in_gz, ifs); 389 390 /* 391 * For outgoing packets, we want the GZ-controlled hooks to run after 392 * the per-zone hooks, regardless of what order they're are installed. 393 * See the "GZ-controlled and per-zone stacks" comment block at the top 394 * of this file. 395 */ 396 #define HOOK_INIT_GZ_AFTER(x, fn, n, gzn, a) \ 397 HOOK_INIT(x, fn, ifs->ifs_gz_controlled ? gzn : n, ifs); \ 398 (x)->h_hint = ifs->ifs_gz_controlled ? HH_AFTER : HH_BEFORE; \ 399 (x)->h_hintvalue = (uintptr_t) (ifs->ifs_gz_controlled ? n : gzn); 400 401 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook4_out, ipf_hook4_out, 402 hook4_out, hook4_out_gz, ifs); 403 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook4_loop_out, ipf_hook4_loop_out, 404 hook4_loop_out, hook4_loop_out_gz, ifs); 405 406 /* 407 * If we hold this lock over all of the net_hook_register calls, we 408 * can cause a deadlock to occur with the following lock ordering: 409 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs 410 * R(hook_family)->R(hei_lock)->R(ipf_global) (packet path) 411 */ 412 RWLOCK_EXIT(&ifs->ifs_ipf_global); 413 414 /* 415 * Add IPv4 hooks 416 */ 417 ifs->ifs_ipf_ipv4 = net_protocol_lookup(id, NHF_INET); 418 if (ifs->ifs_ipf_ipv4 == NULL) 419 goto hookup_failed; 420 421 ifs->ifs_hook4_nic_events = (net_hook_register(ifs->ifs_ipf_ipv4, 422 NH_NIC_EVENTS, ifs->ifs_ipfhook4_nicevents) == 0); 423 if (!ifs->ifs_hook4_nic_events) 424 goto hookup_failed; 425 426 ifs->ifs_hook4_physical_in = (net_hook_register(ifs->ifs_ipf_ipv4, 427 NH_PHYSICAL_IN, ifs->ifs_ipfhook4_in) == 0); 428 if (!ifs->ifs_hook4_physical_in) 429 goto hookup_failed; 430 431 ifs->ifs_hook4_physical_out = (net_hook_register(ifs->ifs_ipf_ipv4, 432 NH_PHYSICAL_OUT, ifs->ifs_ipfhook4_out) == 0); 433 if (!ifs->ifs_hook4_physical_out) 434 goto hookup_failed; 435 436 if (ifs->ifs_ipf_loopback) { 437 ifs->ifs_hook4_loopback_in = (net_hook_register( 438 ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN, 439 ifs->ifs_ipfhook4_loop_in) == 0); 440 if (!ifs->ifs_hook4_loopback_in) 441 goto hookup_failed; 442 443 ifs->ifs_hook4_loopback_out = (net_hook_register( 444 ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT, 445 ifs->ifs_ipfhook4_loop_out) == 0); 446 if (!ifs->ifs_hook4_loopback_out) 447 goto hookup_failed; 448 } 449 450 /* 451 * Add IPv6 hooks 452 */ 453 ifs->ifs_ipf_ipv6 = net_protocol_lookup(id, NHF_INET6); 454 if (ifs->ifs_ipf_ipv6 == NULL) 455 goto hookup_failed; 456 457 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_nicevents, ipf_nic_event_v6, 458 hook6_nicevents, hook6_nicevents_gz, ifs); 459 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_in, ipf_hook6_in, 460 hook6_in, hook6_in_gz, ifs); 461 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_loop_in, ipf_hook6_loop_in, 462 hook6_loop_in, hook6_loop_in_gz, ifs); 463 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook6_out, ipf_hook6_out, 464 hook6_out, hook6_out_gz, ifs); 465 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook6_loop_out, ipf_hook6_loop_out, 466 hook6_loop_out, hook6_loop_out_gz, ifs); 467 468 ifs->ifs_hook6_nic_events = (net_hook_register(ifs->ifs_ipf_ipv6, 469 NH_NIC_EVENTS, ifs->ifs_ipfhook6_nicevents) == 0); 470 if (!ifs->ifs_hook6_nic_events) 471 goto hookup_failed; 472 473 ifs->ifs_hook6_physical_in = (net_hook_register(ifs->ifs_ipf_ipv6, 474 NH_PHYSICAL_IN, ifs->ifs_ipfhook6_in) == 0); 475 if (!ifs->ifs_hook6_physical_in) 476 goto hookup_failed; 477 478 ifs->ifs_hook6_physical_out = (net_hook_register(ifs->ifs_ipf_ipv6, 479 NH_PHYSICAL_OUT, ifs->ifs_ipfhook6_out) == 0); 480 if (!ifs->ifs_hook6_physical_out) 481 goto hookup_failed; 482 483 if (ifs->ifs_ipf_loopback) { 484 ifs->ifs_hook6_loopback_in = (net_hook_register( 485 ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN, 486 ifs->ifs_ipfhook6_loop_in) == 0); 487 if (!ifs->ifs_hook6_loopback_in) 488 goto hookup_failed; 489 490 ifs->ifs_hook6_loopback_out = (net_hook_register( 491 ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT, 492 ifs->ifs_ipfhook6_loop_out) == 0); 493 if (!ifs->ifs_hook6_loopback_out) 494 goto hookup_failed; 495 } 496 497 /* 498 * VIONA INET hooks. While the nethook framework allows us to register 499 * hooks for events that haven't been registered yet, we instead 500 * register and unregister our hooks in response to notifications 501 * about the viona hooks from the nethook framework. This prevents 502 * problems when the viona module gets unloaded while the ipf module 503 * does not. If we do not unregister our hooks after the viona module 504 * is unloaded, the viona module cannot later re-register them if it 505 * gets reloaded. As the ip, vnd, and ipf modules are rarely unloaded 506 * even on DEBUG kernels, they do not experience this issue. 507 */ 508 if (net_instance_notify_register(id, ipf_hook_instance_notify, 509 ifs) != 0) 510 goto hookup_failed; 511 512 /* 513 * Reacquire ipf_global, now it is safe. 514 */ 515 WRITE_ENTER(&ifs->ifs_ipf_global); 516 517 /* Do not use private interface ip_params_arr[] in Solaris 10 */ 518 #if SOLARIS2 < 10 519 520 #if SOLARIS2 >= 8 521 ip_forwarding = &ip_g_forward; 522 #endif 523 /* 524 * XXX - There is no terminator for this array, so it is not possible 525 * to tell if what we are looking for is missing and go off the end 526 * of the array. 527 */ 528 529 #if SOLARIS2 <= 8 530 for (i = 0; ; i++) { 531 if (!strcmp(ip_param_arr[i].ip_param_name, "ip_def_ttl")) { 532 ip_ttl_ptr = &ip_param_arr[i].ip_param_value; 533 } else if (!strcmp(ip_param_arr[i].ip_param_name, 534 "ip_path_mtu_discovery")) { 535 ip_mtudisc = &ip_param_arr[i].ip_param_value; 536 } 537 #if SOLARIS2 < 8 538 else if (!strcmp(ip_param_arr[i].ip_param_name, 539 "ip_forwarding")) { 540 ip_forwarding = &ip_param_arr[i].ip_param_value; 541 } 542 #else 543 else if (!strcmp(ip_param_arr[i].ip_param_name, 544 "ip6_forwarding")) { 545 ip6_forwarding = &ip_param_arr[i].ip_param_value; 546 } 547 #endif 548 549 if (ip_mtudisc != NULL && ip_ttl_ptr != NULL && 550 #if SOLARIS2 >= 8 551 ip6_forwarding != NULL && 552 #endif 553 ip_forwarding != NULL) 554 break; 555 } 556 #endif 557 558 if (ifs->ifs_fr_control_forwarding & 1) { 559 if (ip_forwarding != NULL) 560 *ip_forwarding = 1; 561 #if SOLARIS2 >= 8 562 if (ip6_forwarding != NULL) 563 *ip6_forwarding = 1; 564 #endif 565 } 566 567 #endif 568 569 return 0; 570 hookup_failed: 571 WRITE_ENTER(&ifs->ifs_ipf_global); 572 return -1; 573 } 574 575 /* ------------------------------------------------------------------------ */ 576 /* 577 * Called whenever a nethook protocol is registered or unregistered. Currently 578 * only used to add or remove the hooks for viona. 579 * 580 * While the function signature requires returning int, nothing 581 * in usr/src/uts/common/io/hook.c that invokes the callbacks 582 * captures the return value (nor is there currently any documentation 583 * on what return values should be). For now at least, we'll return 0 584 * on success (or 'not applicable') or an error value. Even if the 585 * nethook framework doesn't use the return address, it can be observed via 586 * dtrace if needed. 587 */ 588 static int 589 ipf_hook_protocol_notify(hook_notify_cmd_t command, void *arg, 590 const char *name, const char *dummy __unused, const char *he_name) 591 { 592 ipf_stack_t *ifs = arg; 593 hook_t **hookpp; 594 char *hook_name, *hint_name; 595 hook_func_t hookfn; 596 boolean_t *hookedp; 597 hook_hint_t hint; 598 boolean_t out; 599 int ret = 0; 600 601 const boolean_t gz = ifs->ifs_gz_controlled; 602 603 /* We currently only care about viona hooks notifications */ 604 if (strcmp(name, Hn_VIONA) != 0) 605 return (0); 606 607 if (strcmp(he_name, NH_PHYSICAL_IN) == 0) { 608 out = B_FALSE; 609 } else if (strcmp(he_name, NH_PHYSICAL_OUT) == 0) { 610 out = B_TRUE; 611 } else { 612 /* 613 * If we've added more hook events to viona, we must add 614 * the corresponding handling here (even if it's just to 615 * ignore it) to prevent the firewall from not working as 616 * intended. 617 */ 618 cmn_err(CE_PANIC, "%s: unhandled hook event %s", __func__, 619 he_name); 620 621 return (0); 622 } 623 624 if (out) { 625 hookpp = &ifs->ifs_ipfhookviona_out; 626 hookfn = ipf_hookviona_out; 627 hookedp = &ifs->ifs_hookviona_physical_out; 628 name = gz ? hook_viona_out_gz : hook_viona_out; 629 hint = gz ? HH_AFTER : HH_BEFORE; 630 hint_name = gz ? hook_viona_out : hook_viona_out_gz; 631 } else { 632 hookpp = &ifs->ifs_ipfhookviona_in; 633 hookfn = ipf_hookviona_in; 634 hookedp = &ifs->ifs_hookviona_physical_in; 635 name = gz ? hook_viona_in_gz : hook_viona_in; 636 hint = gz ? HH_BEFORE : HH_AFTER; 637 hint_name = gz ? hook_viona_in : hook_viona_in_gz; 638 } 639 640 switch (command) { 641 default: 642 case HN_NONE: 643 break; 644 case HN_REGISTER: 645 HOOK_INIT(*hookpp, hookfn, (char *)name, ifs); 646 (*hookpp)->h_hint = hint; 647 (*hookpp)->h_hintvalue = (uintptr_t)hint_name; 648 ret = net_hook_register(ifs->ifs_ipf_viona, 649 (char *)he_name, *hookpp); 650 if (ret != 0) { 651 cmn_err(CE_NOTE, "%s: could not register hook " 652 "(hook family=%s hook=%s) err=%d", __func__, 653 name, he_name, ret); 654 *hookedp = B_FALSE; 655 return (ret); 656 } 657 *hookedp = B_TRUE; 658 break; 659 case HN_UNREGISTER: 660 if (ifs->ifs_ipf_viona == NULL) 661 break; 662 663 ret = *hookedp ? net_hook_unregister(ifs->ifs_ipf_viona, 664 (char *)he_name, *hookpp) : 0; 665 if ((ret == 0 || ret == ENXIO)) { 666 if (*hookpp != NULL) { 667 hook_free(*hookpp); 668 *hookpp = NULL; 669 } 670 *hookedp = B_FALSE; 671 } 672 break; 673 } 674 675 return (ret); 676 } 677 678 /* 679 * Called whenever a new nethook instance is created. Currently only used 680 * with the Hn_VIONA nethooks. Similar to ipf_hook_protocol_notify, the out 681 * function signature must return an int, though the result is never used. 682 * We elect to return 0 on success (or not applicable) or a non-zero value 683 * on error. 684 */ 685 static int 686 ipf_hook_instance_notify(hook_notify_cmd_t command, void *arg, 687 const char *netid, const char *dummy __unused, const char *instance) 688 { 689 ipf_stack_t *ifs = arg; 690 int ret = 0; 691 692 /* We currently only care about viona hooks */ 693 if (strcmp(instance, Hn_VIONA) != 0) 694 return (0); 695 696 switch (command) { 697 case HN_NONE: 698 default: 699 return (0); 700 case HN_REGISTER: 701 ifs->ifs_ipf_viona = net_protocol_lookup(ifs->ifs_netid, 702 NHF_VIONA); 703 704 if (ifs->ifs_ipf_viona == NULL) 705 return (EPROTONOSUPPORT); 706 707 ret = net_protocol_notify_register(ifs->ifs_ipf_viona, 708 ipf_hook_protocol_notify, ifs); 709 VERIFY(ret == 0 || ret == ESHUTDOWN); 710 break; 711 case HN_UNREGISTER: 712 if (ifs->ifs_ipf_viona == NULL) 713 break; 714 VERIFY0(net_protocol_notify_unregister(ifs->ifs_ipf_viona, 715 ipf_hook_protocol_notify)); 716 VERIFY0(net_protocol_release(ifs->ifs_ipf_viona)); 717 ifs->ifs_ipf_viona = NULL; 718 break; 719 } 720 721 return (ret); 722 } 723 724 static int fr_setipfloopback(set, ifs) 725 int set; 726 ipf_stack_t *ifs; 727 { 728 if (ifs->ifs_ipf_ipv4 == NULL || ifs->ifs_ipf_ipv6 == NULL) 729 return EFAULT; 730 731 if (set && !ifs->ifs_ipf_loopback) { 732 ifs->ifs_ipf_loopback = 1; 733 734 ifs->ifs_hook4_loopback_in = (net_hook_register( 735 ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN, 736 ifs->ifs_ipfhook4_loop_in) == 0); 737 if (!ifs->ifs_hook4_loopback_in) 738 return EINVAL; 739 740 ifs->ifs_hook4_loopback_out = (net_hook_register( 741 ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT, 742 ifs->ifs_ipfhook4_loop_out) == 0); 743 if (!ifs->ifs_hook4_loopback_out) 744 return EINVAL; 745 746 ifs->ifs_hook6_loopback_in = (net_hook_register( 747 ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN, 748 ifs->ifs_ipfhook6_loop_in) == 0); 749 if (!ifs->ifs_hook6_loopback_in) 750 return EINVAL; 751 752 ifs->ifs_hook6_loopback_out = (net_hook_register( 753 ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT, 754 ifs->ifs_ipfhook6_loop_out) == 0); 755 if (!ifs->ifs_hook6_loopback_out) 756 return EINVAL; 757 758 } else if (!set && ifs->ifs_ipf_loopback) { 759 ifs->ifs_ipf_loopback = 0; 760 761 ifs->ifs_hook4_loopback_in = 762 (net_hook_unregister(ifs->ifs_ipf_ipv4, 763 NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0); 764 if (ifs->ifs_hook4_loopback_in) 765 return EBUSY; 766 767 ifs->ifs_hook4_loopback_out = 768 (net_hook_unregister(ifs->ifs_ipf_ipv4, 769 NH_LOOPBACK_OUT, ifs->ifs_ipfhook4_loop_out) != 0); 770 if (ifs->ifs_hook4_loopback_out) 771 return EBUSY; 772 773 ifs->ifs_hook6_loopback_in = 774 (net_hook_unregister(ifs->ifs_ipf_ipv6, 775 NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0); 776 if (ifs->ifs_hook6_loopback_in) 777 return EBUSY; 778 779 ifs->ifs_hook6_loopback_out = 780 (net_hook_unregister(ifs->ifs_ipf_ipv6, 781 NH_LOOPBACK_OUT, ifs->ifs_ipfhook6_loop_out) != 0); 782 if (ifs->ifs_hook6_loopback_out) 783 return EBUSY; 784 } 785 return 0; 786 } 787 788 789 /* 790 * Filter ioctl interface. 791 */ 792 /*ARGSUSED*/ 793 int iplioctl(dev, cmd, data, mode, cp, rp) 794 dev_t dev; 795 int cmd; 796 #if SOLARIS2 >= 7 797 intptr_t data; 798 #else 799 int *data; 800 #endif 801 int mode; 802 cred_t *cp; 803 int *rp; 804 { 805 int error = 0, tmp; 806 friostat_t fio; 807 minor_t unit; 808 u_int enable; 809 ipf_stack_t *ifs; 810 zoneid_t zid; 811 ipf_devstate_t *isp; 812 813 #ifdef IPFDEBUG 814 cmn_err(CE_CONT, "iplioctl(%x,%x,%x,%d,%x,%d)\n", 815 dev, cmd, data, mode, cp, rp); 816 #endif 817 unit = getminor(dev); 818 819 isp = ddi_get_soft_state(ipf_state, unit); 820 if (isp == NULL) 821 return ENXIO; 822 unit = isp->ipfs_minor; 823 824 zid = crgetzoneid(cp); 825 if (cmd == SIOCIPFZONESET) { 826 if (zid == GLOBAL_ZONEID) 827 return fr_setzoneid(isp, (caddr_t) data); 828 return EACCES; 829 } 830 831 /* 832 * ipf_find_stack returns with a read lock on ifs_ipf_global 833 */ 834 ifs = ipf_find_stack(zid, isp); 835 if (ifs == NULL) 836 return ENXIO; 837 838 if (ifs->ifs_fr_running <= 0) { 839 if (unit != IPL_LOGIPF) { 840 RWLOCK_EXIT(&ifs->ifs_ipf_global); 841 return EIO; 842 } 843 if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET && 844 cmd != SIOCIPFSET && cmd != SIOCFRENB && 845 cmd != SIOCGETFS && cmd != SIOCGETFF) { 846 RWLOCK_EXIT(&ifs->ifs_ipf_global); 847 return EIO; 848 } 849 } 850 851 if (ifs->ifs_fr_enable_active != 0) { 852 RWLOCK_EXIT(&ifs->ifs_ipf_global); 853 return EBUSY; 854 } 855 856 error = fr_ioctlswitch(unit, (caddr_t)data, cmd, mode, crgetuid(cp), 857 curproc, ifs); 858 if (error != -1) { 859 RWLOCK_EXIT(&ifs->ifs_ipf_global); 860 return error; 861 } 862 error = 0; 863 864 switch (cmd) 865 { 866 case SIOCFRENB : 867 if (!(mode & FWRITE)) 868 error = EPERM; 869 else { 870 error = COPYIN((caddr_t)data, (caddr_t)&enable, 871 sizeof(enable)); 872 if (error != 0) { 873 error = EFAULT; 874 break; 875 } 876 877 RWLOCK_EXIT(&ifs->ifs_ipf_global); 878 WRITE_ENTER(&ifs->ifs_ipf_global); 879 880 /* 881 * We must recheck fr_enable_active here, since we've 882 * dropped ifs_ipf_global from R in order to get it 883 * exclusively. 884 */ 885 if (ifs->ifs_fr_enable_active == 0) { 886 ifs->ifs_fr_enable_active = 1; 887 error = fr_enableipf(ifs, enable); 888 ifs->ifs_fr_enable_active = 0; 889 } 890 } 891 break; 892 case SIOCIPFSET : 893 if (!(mode & FWRITE)) { 894 error = EPERM; 895 break; 896 } 897 /* FALLTHRU */ 898 case SIOCIPFGETNEXT : 899 case SIOCIPFGET : 900 error = fr_ipftune(cmd, (void *)data, ifs); 901 break; 902 case SIOCSETFF : 903 if (!(mode & FWRITE)) 904 error = EPERM; 905 else { 906 error = COPYIN((caddr_t)data, 907 (caddr_t)&ifs->ifs_fr_flags, 908 sizeof(ifs->ifs_fr_flags)); 909 if (error != 0) 910 error = EFAULT; 911 } 912 break; 913 case SIOCIPFLP : 914 error = COPYIN((caddr_t)data, (caddr_t)&tmp, 915 sizeof(tmp)); 916 if (error != 0) 917 error = EFAULT; 918 else 919 error = fr_setipfloopback(tmp, ifs); 920 break; 921 case SIOCGETFF : 922 error = COPYOUT((caddr_t)&ifs->ifs_fr_flags, (caddr_t)data, 923 sizeof(ifs->ifs_fr_flags)); 924 if (error != 0) 925 error = EFAULT; 926 break; 927 case SIOCFUNCL : 928 error = fr_resolvefunc((void *)data); 929 break; 930 case SIOCINAFR : 931 case SIOCRMAFR : 932 case SIOCADAFR : 933 case SIOCZRLST : 934 if (!(mode & FWRITE)) 935 error = EPERM; 936 else 937 error = frrequest(unit, cmd, (caddr_t)data, 938 ifs->ifs_fr_active, 1, ifs); 939 break; 940 case SIOCINIFR : 941 case SIOCRMIFR : 942 case SIOCADIFR : 943 if (!(mode & FWRITE)) 944 error = EPERM; 945 else 946 error = frrequest(unit, cmd, (caddr_t)data, 947 1 - ifs->ifs_fr_active, 1, ifs); 948 break; 949 case SIOCSWAPA : 950 if (!(mode & FWRITE)) 951 error = EPERM; 952 else { 953 WRITE_ENTER(&ifs->ifs_ipf_mutex); 954 bzero((char *)ifs->ifs_frcache, 955 sizeof (ifs->ifs_frcache)); 956 error = COPYOUT((caddr_t)&ifs->ifs_fr_active, 957 (caddr_t)data, 958 sizeof(ifs->ifs_fr_active)); 959 if (error != 0) 960 error = EFAULT; 961 else 962 ifs->ifs_fr_active = 1 - ifs->ifs_fr_active; 963 RWLOCK_EXIT(&ifs->ifs_ipf_mutex); 964 } 965 break; 966 case SIOCGETFS : 967 fr_getstat(&fio, ifs); 968 error = fr_outobj((void *)data, &fio, IPFOBJ_IPFSTAT); 969 break; 970 case SIOCFRZST : 971 if (!(mode & FWRITE)) 972 error = EPERM; 973 else 974 error = fr_zerostats((caddr_t)data, ifs); 975 break; 976 case SIOCIPFFL : 977 if (!(mode & FWRITE)) 978 error = EPERM; 979 else { 980 error = COPYIN((caddr_t)data, (caddr_t)&tmp, 981 sizeof(tmp)); 982 if (!error) { 983 tmp = frflush(unit, 4, tmp, ifs); 984 error = COPYOUT((caddr_t)&tmp, (caddr_t)data, 985 sizeof(tmp)); 986 if (error != 0) 987 error = EFAULT; 988 } else 989 error = EFAULT; 990 } 991 break; 992 #ifdef USE_INET6 993 case SIOCIPFL6 : 994 if (!(mode & FWRITE)) 995 error = EPERM; 996 else { 997 error = COPYIN((caddr_t)data, (caddr_t)&tmp, 998 sizeof(tmp)); 999 if (!error) { 1000 tmp = frflush(unit, 6, tmp, ifs); 1001 error = COPYOUT((caddr_t)&tmp, (caddr_t)data, 1002 sizeof(tmp)); 1003 if (error != 0) 1004 error = EFAULT; 1005 } else 1006 error = EFAULT; 1007 } 1008 break; 1009 #endif 1010 case SIOCSTLCK : 1011 error = COPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp)); 1012 if (error == 0) { 1013 ifs->ifs_fr_state_lock = tmp; 1014 ifs->ifs_fr_nat_lock = tmp; 1015 ifs->ifs_fr_frag_lock = tmp; 1016 ifs->ifs_fr_auth_lock = tmp; 1017 } else 1018 error = EFAULT; 1019 break; 1020 #ifdef IPFILTER_LOG 1021 case SIOCIPFFB : 1022 if (!(mode & FWRITE)) 1023 error = EPERM; 1024 else { 1025 tmp = ipflog_clear(unit, ifs); 1026 error = COPYOUT((caddr_t)&tmp, (caddr_t)data, 1027 sizeof(tmp)); 1028 if (error) 1029 error = EFAULT; 1030 } 1031 break; 1032 #endif /* IPFILTER_LOG */ 1033 case SIOCFRSYN : 1034 if (!(mode & FWRITE)) 1035 error = EPERM; 1036 else { 1037 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1038 WRITE_ENTER(&ifs->ifs_ipf_global); 1039 1040 frsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs); 1041 fr_natifpsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs); 1042 fr_nataddrsync(0, NULL, NULL, ifs); 1043 fr_statesync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs); 1044 error = 0; 1045 } 1046 break; 1047 case SIOCGFRST : 1048 error = fr_outobj((void *)data, fr_fragstats(ifs), 1049 IPFOBJ_FRAGSTAT); 1050 break; 1051 case FIONREAD : 1052 #ifdef IPFILTER_LOG 1053 tmp = (int)ifs->ifs_iplused[IPL_LOGIPF]; 1054 1055 error = COPYOUT((caddr_t)&tmp, (caddr_t)data, sizeof(tmp)); 1056 if (error != 0) 1057 error = EFAULT; 1058 #endif 1059 break; 1060 case SIOCIPFITER : 1061 error = ipf_frruleiter((caddr_t)data, crgetuid(cp), 1062 curproc, ifs); 1063 break; 1064 1065 case SIOCGENITER : 1066 error = ipf_genericiter((caddr_t)data, crgetuid(cp), 1067 curproc, ifs); 1068 break; 1069 1070 case SIOCIPFDELTOK : 1071 error = BCOPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp)); 1072 if (error != 0) { 1073 error = EFAULT; 1074 } else { 1075 error = ipf_deltoken(tmp, crgetuid(cp), curproc, ifs); 1076 } 1077 break; 1078 1079 default : 1080 #ifdef IPFDEBUG 1081 cmn_err(CE_NOTE, "Unknown: cmd 0x%x data %p", 1082 cmd, (void *)data); 1083 #endif 1084 error = EINVAL; 1085 break; 1086 } 1087 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1088 return error; 1089 } 1090 1091 1092 static int fr_enableipf(ifs, enable) 1093 ipf_stack_t *ifs; 1094 int enable; 1095 { 1096 int error; 1097 1098 if (!enable) { 1099 error = ipldetach(ifs); 1100 if (error == 0) 1101 ifs->ifs_fr_running = -1; 1102 return error; 1103 } 1104 1105 if (ifs->ifs_fr_running > 0) 1106 return 0; 1107 1108 error = iplattach(ifs); 1109 if (error == 0) { 1110 if (ifs->ifs_fr_timer_id == NULL) { 1111 int hz = drv_usectohz(500000); 1112 1113 ifs->ifs_fr_timer_id = timeout(fr_slowtimer, 1114 (void *)ifs, 1115 hz); 1116 } 1117 ifs->ifs_fr_running = 1; 1118 } else { 1119 (void) ipldetach(ifs); 1120 } 1121 return error; 1122 } 1123 1124 1125 phy_if_t get_unit(name, v, ifs) 1126 char *name; 1127 int v; 1128 ipf_stack_t *ifs; 1129 { 1130 net_handle_t nif; 1131 1132 if (v == 4) 1133 nif = ifs->ifs_ipf_ipv4; 1134 else if (v == 6) 1135 nif = ifs->ifs_ipf_ipv6; 1136 else 1137 return 0; 1138 1139 return (net_phylookup(nif, name)); 1140 } 1141 1142 /* 1143 * routines below for saving IP headers to buffer 1144 */ 1145 /*ARGSUSED*/ 1146 int iplopen(devp, flags, otype, cred) 1147 dev_t *devp; 1148 int flags, otype; 1149 cred_t *cred; 1150 { 1151 ipf_devstate_t *isp; 1152 minor_t min = getminor(*devp); 1153 minor_t minor; 1154 1155 #ifdef IPFDEBUG 1156 cmn_err(CE_CONT, "iplopen(%x,%x,%x,%x)\n", devp, flags, otype, cred); 1157 #endif 1158 if (!(otype & OTYP_CHR)) 1159 return ENXIO; 1160 1161 if (IPL_LOGMAX < min) 1162 return ENXIO; 1163 1164 minor = (minor_t)(uintptr_t)vmem_alloc(ipf_minor, 1, 1165 VM_BESTFIT | VM_SLEEP); 1166 1167 if (ddi_soft_state_zalloc(ipf_state, minor) != 0) { 1168 vmem_free(ipf_minor, (void *)(uintptr_t)minor, 1); 1169 return ENXIO; 1170 } 1171 1172 *devp = makedevice(getmajor(*devp), minor); 1173 isp = ddi_get_soft_state(ipf_state, minor); 1174 VERIFY(isp != NULL); 1175 1176 isp->ipfs_minor = min; 1177 isp->ipfs_zoneid = IPFS_ZONE_UNSET; 1178 1179 return 0; 1180 } 1181 1182 1183 /*ARGSUSED*/ 1184 int iplclose(dev, flags, otype, cred) 1185 dev_t dev; 1186 int flags, otype; 1187 cred_t *cred; 1188 { 1189 minor_t min = getminor(dev); 1190 1191 #ifdef IPFDEBUG 1192 cmn_err(CE_CONT, "iplclose(%x,%x,%x,%x)\n", dev, flags, otype, cred); 1193 #endif 1194 1195 if (IPL_LOGMAX < min) 1196 return ENXIO; 1197 1198 ddi_soft_state_free(ipf_state, min); 1199 vmem_free(ipf_minor, (void *)(uintptr_t)min, 1); 1200 1201 return 0; 1202 } 1203 1204 #ifdef IPFILTER_LOG 1205 /* 1206 * iplread/ipllog 1207 * both of these must operate with at least splnet() lest they be 1208 * called during packet processing and cause an inconsistancy to appear in 1209 * the filter lists. 1210 */ 1211 /*ARGSUSED*/ 1212 int iplread(dev, uio, cp) 1213 dev_t dev; 1214 register struct uio *uio; 1215 cred_t *cp; 1216 { 1217 ipf_stack_t *ifs; 1218 int ret; 1219 minor_t unit; 1220 ipf_devstate_t *isp; 1221 1222 unit = getminor(dev); 1223 isp = ddi_get_soft_state(ipf_state, unit); 1224 if (isp == NULL) 1225 return ENXIO; 1226 unit = isp->ipfs_minor; 1227 1228 1229 /* 1230 * ipf_find_stack returns with a read lock on ifs_ipf_global 1231 */ 1232 ifs = ipf_find_stack(crgetzoneid(cp), isp); 1233 if (ifs == NULL) 1234 return ENXIO; 1235 1236 # ifdef IPFDEBUG 1237 cmn_err(CE_CONT, "iplread(%x,%x,%x)\n", dev, uio, cp); 1238 # endif 1239 1240 if (ifs->ifs_fr_running < 1) { 1241 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1242 return EIO; 1243 } 1244 1245 # ifdef IPFILTER_SYNC 1246 if (unit == IPL_LOGSYNC) { 1247 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1248 return ipfsync_read(uio); 1249 } 1250 # endif 1251 1252 ret = ipflog_read(unit, uio, ifs); 1253 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1254 return ret; 1255 } 1256 #endif /* IPFILTER_LOG */ 1257 1258 1259 /* 1260 * iplread/ipllog 1261 * both of these must operate with at least splnet() lest they be 1262 * called during packet processing and cause an inconsistancy to appear in 1263 * the filter lists. 1264 */ 1265 int iplwrite(dev, uio, cp) 1266 dev_t dev; 1267 register struct uio *uio; 1268 cred_t *cp; 1269 { 1270 ipf_stack_t *ifs; 1271 minor_t unit; 1272 ipf_devstate_t *isp; 1273 1274 unit = getminor(dev); 1275 isp = ddi_get_soft_state(ipf_state, unit); 1276 if (isp == NULL) 1277 return ENXIO; 1278 unit = isp->ipfs_minor; 1279 1280 /* 1281 * ipf_find_stack returns with a read lock on ifs_ipf_global 1282 */ 1283 ifs = ipf_find_stack(crgetzoneid(cp), isp); 1284 if (ifs == NULL) 1285 return ENXIO; 1286 1287 #ifdef IPFDEBUG 1288 cmn_err(CE_CONT, "iplwrite(%x,%x,%x)\n", dev, uio, cp); 1289 #endif 1290 1291 if (ifs->ifs_fr_running < 1) { 1292 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1293 return EIO; 1294 } 1295 1296 #ifdef IPFILTER_SYNC 1297 if (getminor(dev) == IPL_LOGSYNC) { 1298 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1299 return ipfsync_write(uio); 1300 } 1301 #endif /* IPFILTER_SYNC */ 1302 dev = dev; /* LINT */ 1303 uio = uio; /* LINT */ 1304 cp = cp; /* LINT */ 1305 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1306 return ENXIO; 1307 } 1308 1309 1310 /* 1311 * fr_send_reset - this could conceivably be a call to tcp_respond(), but that 1312 * requires a large amount of setting up and isn't any more efficient. 1313 */ 1314 int fr_send_reset(fin) 1315 fr_info_t *fin; 1316 { 1317 tcphdr_t *tcp, *tcp2; 1318 int tlen, hlen; 1319 mblk_t *m; 1320 #ifdef USE_INET6 1321 ip6_t *ip6; 1322 #endif 1323 ip_t *ip; 1324 1325 tcp = fin->fin_dp; 1326 if (tcp->th_flags & TH_RST) 1327 return -1; 1328 1329 #ifndef IPFILTER_CKSUM 1330 if (fr_checkl4sum(fin) == -1) 1331 return -1; 1332 #endif 1333 1334 tlen = (tcp->th_flags & (TH_SYN|TH_FIN)) ? 1 : 0; 1335 #ifdef USE_INET6 1336 if (fin->fin_v == 6) 1337 hlen = sizeof(ip6_t); 1338 else 1339 #endif 1340 hlen = sizeof(ip_t); 1341 hlen += sizeof(*tcp2); 1342 if ((m = (mblk_t *)allocb(hlen + 64, BPRI_HI)) == NULL) 1343 return -1; 1344 1345 m->b_rptr += 64; 1346 MTYPE(m) = M_DATA; 1347 m->b_wptr = m->b_rptr + hlen; 1348 ip = (ip_t *)m->b_rptr; 1349 bzero((char *)ip, hlen); 1350 tcp2 = (struct tcphdr *)(m->b_rptr + hlen - sizeof(*tcp2)); 1351 tcp2->th_dport = tcp->th_sport; 1352 tcp2->th_sport = tcp->th_dport; 1353 if (tcp->th_flags & TH_ACK) { 1354 tcp2->th_seq = tcp->th_ack; 1355 tcp2->th_flags = TH_RST; 1356 } else { 1357 tcp2->th_ack = ntohl(tcp->th_seq); 1358 tcp2->th_ack += tlen; 1359 tcp2->th_ack = htonl(tcp2->th_ack); 1360 tcp2->th_flags = TH_RST|TH_ACK; 1361 } 1362 tcp2->th_off = sizeof(struct tcphdr) >> 2; 1363 1364 ip->ip_v = fin->fin_v; 1365 #ifdef USE_INET6 1366 if (fin->fin_v == 6) { 1367 ip6 = (ip6_t *)m->b_rptr; 1368 ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow; 1369 ip6->ip6_src = fin->fin_dst6.in6; 1370 ip6->ip6_dst = fin->fin_src6.in6; 1371 ip6->ip6_plen = htons(sizeof(*tcp)); 1372 ip6->ip6_nxt = IPPROTO_TCP; 1373 tcp2->th_sum = fr_cksum(m, (ip_t *)ip6, IPPROTO_TCP, tcp2); 1374 } else 1375 #endif 1376 { 1377 ip->ip_src.s_addr = fin->fin_daddr; 1378 ip->ip_dst.s_addr = fin->fin_saddr; 1379 ip->ip_id = fr_nextipid(fin); 1380 ip->ip_hl = sizeof(*ip) >> 2; 1381 ip->ip_p = IPPROTO_TCP; 1382 ip->ip_len = sizeof(*ip) + sizeof(*tcp); 1383 ip->ip_tos = fin->fin_ip->ip_tos; 1384 tcp2->th_sum = fr_cksum(m, ip, IPPROTO_TCP, tcp2); 1385 } 1386 return fr_send_ip(fin, m, &m); 1387 } 1388 1389 /* 1390 * Function: fr_send_ip 1391 * Returns: 0: success 1392 * -1: failed 1393 * Parameters: 1394 * fin: packet information 1395 * m: the message block where ip head starts 1396 * 1397 * Send a new packet through the IP stack. 1398 * 1399 * For IPv4 packets, ip_len must be in host byte order, and ip_v, 1400 * ip_ttl, ip_off, and ip_sum are ignored (filled in by this 1401 * function). 1402 * 1403 * For IPv6 packets, ip6_flow, ip6_vfc, and ip6_hlim are filled 1404 * in by this function. 1405 * 1406 * All other portions of the packet must be in on-the-wire format. 1407 */ 1408 /*ARGSUSED*/ 1409 static int fr_send_ip(fin, m, mpp) 1410 fr_info_t *fin; 1411 mblk_t *m, **mpp; 1412 { 1413 qpktinfo_t qpi, *qpip; 1414 fr_info_t fnew; 1415 ip_t *ip; 1416 int i, hlen; 1417 ipf_stack_t *ifs = fin->fin_ifs; 1418 1419 ip = (ip_t *)m->b_rptr; 1420 bzero((char *)&fnew, sizeof(fnew)); 1421 1422 #ifdef USE_INET6 1423 if (fin->fin_v == 6) { 1424 ip6_t *ip6; 1425 1426 ip6 = (ip6_t *)ip; 1427 ip6->ip6_vfc = 0x60; 1428 ip6->ip6_hlim = 127; 1429 fnew.fin_v = 6; 1430 hlen = sizeof(*ip6); 1431 fnew.fin_plen = ntohs(ip6->ip6_plen) + hlen; 1432 } else 1433 #endif 1434 { 1435 fnew.fin_v = 4; 1436 #if SOLARIS2 >= 10 1437 ip->ip_ttl = 255; 1438 if (net_getpmtuenabled(ifs->ifs_ipf_ipv4) == 1) 1439 ip->ip_off = htons(IP_DF); 1440 #else 1441 if (ip_ttl_ptr != NULL) 1442 ip->ip_ttl = (u_char)(*ip_ttl_ptr); 1443 else 1444 ip->ip_ttl = 63; 1445 if (ip_mtudisc != NULL) 1446 ip->ip_off = htons(*ip_mtudisc ? IP_DF : 0); 1447 else 1448 ip->ip_off = htons(IP_DF); 1449 #endif 1450 /* 1451 * The dance with byte order and ip_len/ip_off is because in 1452 * fr_fastroute, it expects them to be in host byte order but 1453 * ipf_cksum expects them to be in network byte order. 1454 */ 1455 ip->ip_len = htons(ip->ip_len); 1456 ip->ip_sum = ipf_cksum((u_short *)ip, sizeof(*ip)); 1457 ip->ip_len = ntohs(ip->ip_len); 1458 ip->ip_off = ntohs(ip->ip_off); 1459 hlen = sizeof(*ip); 1460 fnew.fin_plen = ip->ip_len; 1461 } 1462 1463 qpip = fin->fin_qpi; 1464 qpi.qpi_off = 0; 1465 qpi.qpi_ill = qpip->qpi_ill; 1466 qpi.qpi_m = m; 1467 qpi.qpi_data = ip; 1468 fnew.fin_qpi = &qpi; 1469 fnew.fin_ifp = fin->fin_ifp; 1470 fnew.fin_flx = FI_NOCKSUM; 1471 fnew.fin_m = m; 1472 fnew.fin_qfm = m; 1473 fnew.fin_ip = ip; 1474 fnew.fin_mp = mpp; 1475 fnew.fin_hlen = hlen; 1476 fnew.fin_dp = (char *)ip + hlen; 1477 fnew.fin_ifs = fin->fin_ifs; 1478 (void) fr_makefrip(hlen, ip, &fnew); 1479 1480 i = fr_fastroute(m, mpp, &fnew, NULL); 1481 return i; 1482 } 1483 1484 1485 int fr_send_icmp_err(type, fin, dst) 1486 int type; 1487 fr_info_t *fin; 1488 int dst; 1489 { 1490 struct in_addr dst4; 1491 struct icmp *icmp; 1492 qpktinfo_t *qpi; 1493 int hlen, code; 1494 phy_if_t phy; 1495 u_short sz; 1496 #ifdef USE_INET6 1497 mblk_t *mb; 1498 #endif 1499 mblk_t *m; 1500 #ifdef USE_INET6 1501 ip6_t *ip6; 1502 #endif 1503 ip_t *ip; 1504 ipf_stack_t *ifs = fin->fin_ifs; 1505 1506 if ((type < 0) || (type > ICMP_MAXTYPE)) 1507 return -1; 1508 1509 code = fin->fin_icode; 1510 #ifdef USE_INET6 1511 if ((code < 0) || (code >= ICMP_MAX_UNREACH)) 1512 return -1; 1513 #endif 1514 1515 #ifndef IPFILTER_CKSUM 1516 if (fr_checkl4sum(fin) == -1) 1517 return -1; 1518 #endif 1519 1520 qpi = fin->fin_qpi; 1521 1522 #ifdef USE_INET6 1523 mb = fin->fin_qfm; 1524 1525 if (fin->fin_v == 6) { 1526 sz = sizeof(ip6_t); 1527 sz += MIN(mb->b_wptr - mb->b_rptr, 512); 1528 hlen = sizeof(ip6_t); 1529 type = icmptoicmp6types[type]; 1530 if (type == ICMP6_DST_UNREACH) 1531 code = icmptoicmp6unreach[code]; 1532 } else 1533 #endif 1534 { 1535 if ((fin->fin_p == IPPROTO_ICMP) && 1536 !(fin->fin_flx & FI_SHORT)) 1537 switch (ntohs(fin->fin_data[0]) >> 8) 1538 { 1539 case ICMP_ECHO : 1540 case ICMP_TSTAMP : 1541 case ICMP_IREQ : 1542 case ICMP_MASKREQ : 1543 break; 1544 default : 1545 return 0; 1546 } 1547 1548 sz = sizeof(ip_t) * 2; 1549 sz += 8; /* 64 bits of data */ 1550 hlen = sizeof(ip_t); 1551 } 1552 1553 sz += offsetof(struct icmp, icmp_ip); 1554 if ((m = (mblk_t *)allocb((size_t)sz + 64, BPRI_HI)) == NULL) 1555 return -1; 1556 MTYPE(m) = M_DATA; 1557 m->b_rptr += 64; 1558 m->b_wptr = m->b_rptr + sz; 1559 bzero((char *)m->b_rptr, (size_t)sz); 1560 ip = (ip_t *)m->b_rptr; 1561 ip->ip_v = fin->fin_v; 1562 icmp = (struct icmp *)(m->b_rptr + hlen); 1563 icmp->icmp_type = type & 0xff; 1564 icmp->icmp_code = code & 0xff; 1565 phy = (phy_if_t)qpi->qpi_ill; 1566 if (type == ICMP_UNREACH && (phy != 0) && 1567 fin->fin_icode == ICMP_UNREACH_NEEDFRAG) 1568 icmp->icmp_nextmtu = net_getmtu(ifs->ifs_ipf_ipv4, phy,0 ); 1569 1570 #ifdef USE_INET6 1571 if (fin->fin_v == 6) { 1572 struct in6_addr dst6; 1573 int csz; 1574 1575 if (dst == 0) { 1576 ipf_stack_t *ifs = fin->fin_ifs; 1577 1578 if (fr_ifpaddr(6, FRI_NORMAL, (void *)phy, 1579 (void *)&dst6, NULL, ifs) == -1) { 1580 FREE_MB_T(m); 1581 return -1; 1582 } 1583 } else 1584 dst6 = fin->fin_dst6.in6; 1585 1586 csz = sz; 1587 sz -= sizeof(ip6_t); 1588 ip6 = (ip6_t *)m->b_rptr; 1589 ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow; 1590 ip6->ip6_plen = htons((u_short)sz); 1591 ip6->ip6_nxt = IPPROTO_ICMPV6; 1592 ip6->ip6_src = dst6; 1593 ip6->ip6_dst = fin->fin_src6.in6; 1594 sz -= offsetof(struct icmp, icmp_ip); 1595 bcopy((char *)mb->b_rptr, (char *)&icmp->icmp_ip, sz); 1596 icmp->icmp_cksum = csz - sizeof(ip6_t); 1597 } else 1598 #endif 1599 { 1600 ip->ip_hl = sizeof(*ip) >> 2; 1601 ip->ip_p = IPPROTO_ICMP; 1602 ip->ip_id = fin->fin_ip->ip_id; 1603 ip->ip_tos = fin->fin_ip->ip_tos; 1604 ip->ip_len = (u_short)sz; 1605 if (dst == 0) { 1606 ipf_stack_t *ifs = fin->fin_ifs; 1607 1608 if (fr_ifpaddr(4, FRI_NORMAL, (void *)phy, 1609 (void *)&dst4, NULL, ifs) == -1) { 1610 FREE_MB_T(m); 1611 return -1; 1612 } 1613 } else { 1614 dst4 = fin->fin_dst; 1615 } 1616 ip->ip_src = dst4; 1617 ip->ip_dst = fin->fin_src; 1618 bcopy((char *)fin->fin_ip, (char *)&icmp->icmp_ip, 1619 sizeof(*fin->fin_ip)); 1620 bcopy((char *)fin->fin_ip + fin->fin_hlen, 1621 (char *)&icmp->icmp_ip + sizeof(*fin->fin_ip), 8); 1622 icmp->icmp_ip.ip_len = htons(icmp->icmp_ip.ip_len); 1623 icmp->icmp_ip.ip_off = htons(icmp->icmp_ip.ip_off); 1624 icmp->icmp_cksum = ipf_cksum((u_short *)icmp, 1625 sz - sizeof(ip_t)); 1626 } 1627 1628 /* 1629 * Need to exit out of these so we don't recursively call rw_enter 1630 * from fr_qout. 1631 */ 1632 return fr_send_ip(fin, m, &m); 1633 } 1634 1635 #include <sys/time.h> 1636 #include <sys/varargs.h> 1637 1638 #ifndef _KERNEL 1639 #include <stdio.h> 1640 #endif 1641 1642 /* 1643 * Return the first IP Address associated with an interface 1644 * For IPv6, we walk through the list of logical interfaces and return 1645 * the address of the first one that isn't a link-local interface. 1646 * We can't assume that it is :1 because another link-local address 1647 * may have been assigned there. 1648 */ 1649 /*ARGSUSED*/ 1650 int fr_ifpaddr(v, atype, ifptr, inp, inpmask, ifs) 1651 int v, atype; 1652 void *ifptr; 1653 struct in_addr *inp, *inpmask; 1654 ipf_stack_t *ifs; 1655 { 1656 struct sockaddr_in6 v6addr[2]; 1657 struct sockaddr_in v4addr[2]; 1658 net_ifaddr_t type[2]; 1659 net_handle_t net_data; 1660 phy_if_t phyif; 1661 void *array; 1662 1663 switch (v) 1664 { 1665 case 4: 1666 net_data = ifs->ifs_ipf_ipv4; 1667 array = v4addr; 1668 break; 1669 case 6: 1670 net_data = ifs->ifs_ipf_ipv6; 1671 array = v6addr; 1672 break; 1673 default: 1674 net_data = NULL; 1675 break; 1676 } 1677 1678 if (net_data == NULL) 1679 return -1; 1680 1681 phyif = (phy_if_t)ifptr; 1682 1683 switch (atype) 1684 { 1685 case FRI_PEERADDR : 1686 type[0] = NA_PEER; 1687 break; 1688 1689 case FRI_BROADCAST : 1690 type[0] = NA_BROADCAST; 1691 break; 1692 1693 default : 1694 type[0] = NA_ADDRESS; 1695 break; 1696 } 1697 1698 type[1] = NA_NETMASK; 1699 1700 if (v == 6) { 1701 lif_if_t idx = 0; 1702 1703 do { 1704 idx = net_lifgetnext(net_data, phyif, idx); 1705 if (net_getlifaddr(net_data, phyif, idx, 2, type, 1706 array) < 0) 1707 return -1; 1708 if (!IN6_IS_ADDR_LINKLOCAL(&v6addr[0].sin6_addr) && 1709 !IN6_IS_ADDR_MULTICAST(&v6addr[0].sin6_addr)) 1710 break; 1711 } while (idx != 0); 1712 1713 if (idx == 0) 1714 return -1; 1715 1716 return fr_ifpfillv6addr(atype, &v6addr[0], &v6addr[1], 1717 inp, inpmask); 1718 } 1719 1720 if (net_getlifaddr(net_data, phyif, 0, 2, type, array) < 0) 1721 return -1; 1722 1723 return fr_ifpfillv4addr(atype, &v4addr[0], &v4addr[1], inp, inpmask); 1724 } 1725 1726 1727 u_32_t fr_newisn(fin) 1728 fr_info_t *fin; 1729 { 1730 static int iss_seq_off = 0; 1731 u_char hash[16]; 1732 u_32_t newiss; 1733 MD5_CTX ctx; 1734 ipf_stack_t *ifs = fin->fin_ifs; 1735 1736 /* 1737 * Compute the base value of the ISS. It is a hash 1738 * of (saddr, sport, daddr, dport, secret). 1739 */ 1740 MD5Init(&ctx); 1741 1742 MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_src, 1743 sizeof(fin->fin_fi.fi_src)); 1744 MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_dst, 1745 sizeof(fin->fin_fi.fi_dst)); 1746 MD5Update(&ctx, (u_char *) &fin->fin_dat, sizeof(fin->fin_dat)); 1747 1748 MD5Update(&ctx, ifs->ifs_ipf_iss_secret, sizeof(ifs->ifs_ipf_iss_secret)); 1749 1750 MD5Final(hash, &ctx); 1751 1752 bcopy(hash, &newiss, sizeof(newiss)); 1753 1754 /* 1755 * Now increment our "timer", and add it in to 1756 * the computed value. 1757 * 1758 * XXX Use `addin'? 1759 * XXX TCP_ISSINCR too large to use? 1760 */ 1761 iss_seq_off += 0x00010000; 1762 newiss += iss_seq_off; 1763 return newiss; 1764 } 1765 1766 1767 /* ------------------------------------------------------------------------ */ 1768 /* Function: fr_nextipid */ 1769 /* Returns: int - 0 == success, -1 == error (packet should be droppped) */ 1770 /* Parameters: fin(I) - pointer to packet information */ 1771 /* */ 1772 /* Returns the next IPv4 ID to use for this packet. */ 1773 /* ------------------------------------------------------------------------ */ 1774 u_short fr_nextipid(fin) 1775 fr_info_t *fin; 1776 { 1777 static u_short ipid = 0; 1778 u_short id; 1779 ipf_stack_t *ifs = fin->fin_ifs; 1780 1781 MUTEX_ENTER(&ifs->ifs_ipf_rw); 1782 if (fin->fin_pktnum != 0) { 1783 id = fin->fin_pktnum & 0xffff; 1784 } else { 1785 id = ipid++; 1786 } 1787 MUTEX_EXIT(&ifs->ifs_ipf_rw); 1788 1789 return id; 1790 } 1791 1792 1793 #ifndef IPFILTER_CKSUM 1794 /* ARGSUSED */ 1795 #endif 1796 INLINE void fr_checkv4sum(fin) 1797 fr_info_t *fin; 1798 { 1799 #ifdef IPFILTER_CKSUM 1800 if (fr_checkl4sum(fin) == -1) 1801 fin->fin_flx |= FI_BAD; 1802 #endif 1803 } 1804 1805 1806 #ifdef USE_INET6 1807 # ifndef IPFILTER_CKSUM 1808 /* ARGSUSED */ 1809 # endif 1810 INLINE void fr_checkv6sum(fin) 1811 fr_info_t *fin; 1812 { 1813 # ifdef IPFILTER_CKSUM 1814 if (fr_checkl4sum(fin) == -1) 1815 fin->fin_flx |= FI_BAD; 1816 # endif 1817 } 1818 #endif /* USE_INET6 */ 1819 1820 1821 #if (SOLARIS2 < 7) 1822 void fr_slowtimer() 1823 #else 1824 /*ARGSUSED*/ 1825 void fr_slowtimer __P((void *arg)) 1826 #endif 1827 { 1828 ipf_stack_t *ifs = arg; 1829 1830 READ_ENTER(&ifs->ifs_ipf_global); 1831 if (ifs->ifs_fr_running != 1) { 1832 ifs->ifs_fr_timer_id = NULL; 1833 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1834 return; 1835 } 1836 ipf_expiretokens(ifs); 1837 fr_fragexpire(ifs); 1838 fr_timeoutstate(ifs); 1839 fr_natexpire(ifs); 1840 fr_authexpire(ifs); 1841 ifs->ifs_fr_ticks++; 1842 if (ifs->ifs_fr_running == 1) 1843 ifs->ifs_fr_timer_id = timeout(fr_slowtimer, arg, 1844 drv_usectohz(500000)); 1845 else 1846 ifs->ifs_fr_timer_id = NULL; 1847 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1848 } 1849 1850 1851 /* ------------------------------------------------------------------------ */ 1852 /* Function: fr_pullup */ 1853 /* Returns: NULL == pullup failed, else pointer to protocol header */ 1854 /* Parameters: m(I) - pointer to buffer where data packet starts */ 1855 /* fin(I) - pointer to packet information */ 1856 /* len(I) - number of bytes to pullup */ 1857 /* */ 1858 /* Attempt to move at least len bytes (from the start of the buffer) into a */ 1859 /* single buffer for ease of access. Operating system native functions are */ 1860 /* used to manage buffers - if necessary. If the entire packet ends up in */ 1861 /* a single buffer, set the FI_COALESCE flag even though fr_coalesce() has */ 1862 /* not been called. Both fin_ip and fin_dp are updated before exiting _IF_ */ 1863 /* and ONLY if the pullup succeeds. */ 1864 /* */ 1865 /* We assume that 'min' is a pointer to a buffer that is part of the chain */ 1866 /* of buffers that starts at *fin->fin_mp. */ 1867 /* ------------------------------------------------------------------------ */ 1868 void *fr_pullup(min, fin, len) 1869 mb_t *min; 1870 fr_info_t *fin; 1871 int len; 1872 { 1873 qpktinfo_t *qpi = fin->fin_qpi; 1874 int out = fin->fin_out, dpoff, ipoff; 1875 mb_t *m = min, *m1, *m2; 1876 char *ip; 1877 uint32_t start, stuff, end, value, flags; 1878 ipf_stack_t *ifs = fin->fin_ifs; 1879 1880 if (m == NULL) 1881 return NULL; 1882 1883 ip = (char *)fin->fin_ip; 1884 if ((fin->fin_flx & FI_COALESCE) != 0) 1885 return ip; 1886 1887 ipoff = fin->fin_ipoff; 1888 if (fin->fin_dp != NULL) 1889 dpoff = (char *)fin->fin_dp - (char *)ip; 1890 else 1891 dpoff = 0; 1892 1893 if (M_LEN(m) < len + ipoff) { 1894 1895 /* 1896 * pfil_precheck ensures the IP header is on a 32bit 1897 * aligned address so simply fail if that isn't currently 1898 * the case (should never happen). 1899 */ 1900 int inc = 0; 1901 1902 if (ipoff > 0) { 1903 if ((ipoff & 3) != 0) { 1904 inc = 4 - (ipoff & 3); 1905 if (m->b_rptr - inc >= m->b_datap->db_base) 1906 m->b_rptr -= inc; 1907 else 1908 inc = 0; 1909 } 1910 } 1911 1912 /* 1913 * XXX This is here as a work around for a bug with DEBUG 1914 * XXX Solaris kernels. The problem is b_prev is used by IP 1915 * XXX code as a way to stash the phyint_index for a packet, 1916 * XXX this doesn't get reset by IP but freeb does an ASSERT() 1917 * XXX for both of these to be NULL. See 6442390. 1918 */ 1919 m1 = m; 1920 m2 = m->b_prev; 1921 1922 do { 1923 m1->b_next = NULL; 1924 m1->b_prev = NULL; 1925 m1 = m1->b_cont; 1926 } while (m1); 1927 1928 /* 1929 * Need to preserve checksum information by copying them 1930 * to newmp which heads the pulluped message. 1931 */ 1932 mac_hcksum_get(m, &start, &stuff, &end, &value, &flags); 1933 1934 if (pullupmsg(m, len + ipoff + inc) == 0) { 1935 ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[1]); 1936 FREE_MB_T(*fin->fin_mp); 1937 *fin->fin_mp = NULL; 1938 fin->fin_m = NULL; 1939 fin->fin_ip = NULL; 1940 fin->fin_dp = NULL; 1941 qpi->qpi_data = NULL; 1942 return NULL; 1943 } 1944 1945 mac_hcksum_set(m, start, stuff, end, value, flags); 1946 1947 m->b_prev = m2; 1948 m->b_rptr += inc; 1949 fin->fin_m = m; 1950 ip = MTOD(m, char *) + ipoff; 1951 qpi->qpi_data = ip; 1952 } 1953 1954 ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[0]); 1955 fin->fin_ip = (ip_t *)ip; 1956 if (fin->fin_dp != NULL) 1957 fin->fin_dp = (char *)fin->fin_ip + dpoff; 1958 1959 if (len == fin->fin_plen) 1960 fin->fin_flx |= FI_COALESCE; 1961 return ip; 1962 } 1963 1964 1965 /* 1966 * Function: fr_verifysrc 1967 * Returns: int (really boolean) 1968 * Parameters: fin - packet information 1969 * 1970 * Check whether the packet has a valid source address for the interface on 1971 * which the packet arrived, implementing the "fr_chksrc" feature. 1972 * Returns true iff the packet's source address is valid. 1973 */ 1974 int fr_verifysrc(fin) 1975 fr_info_t *fin; 1976 { 1977 net_handle_t net_data_p; 1978 phy_if_t phy_ifdata_routeto; 1979 struct sockaddr sin; 1980 ipf_stack_t *ifs = fin->fin_ifs; 1981 1982 if (fin->fin_v == 4) { 1983 net_data_p = ifs->ifs_ipf_ipv4; 1984 } else if (fin->fin_v == 6) { 1985 net_data_p = ifs->ifs_ipf_ipv6; 1986 } else { 1987 return (0); 1988 } 1989 1990 /* Get the index corresponding to the if name */ 1991 sin.sa_family = (fin->fin_v == 4) ? AF_INET : AF_INET6; 1992 bcopy(&fin->fin_saddr, &sin.sa_data, sizeof (struct in_addr)); 1993 phy_ifdata_routeto = net_routeto(net_data_p, &sin, NULL); 1994 1995 return (((phy_if_t)fin->fin_ifp == phy_ifdata_routeto) ? 1 : 0); 1996 } 1997 1998 /* 1999 * Return true only if forwarding is enabled on the interface. 2000 */ 2001 static int 2002 fr_forwarding_enabled(phy_if_t phyif, net_handle_t ndp) 2003 { 2004 lif_if_t lif; 2005 2006 for (lif = net_lifgetnext(ndp, phyif, 0); lif > 0; 2007 lif = net_lifgetnext(ndp, phyif, lif)) { 2008 int res; 2009 uint64_t flags; 2010 2011 res = net_getlifflags(ndp, phyif, lif, &flags); 2012 if (res != 0) 2013 return (0); 2014 if (flags & IFF_ROUTER) 2015 return (1); 2016 } 2017 2018 return (0); 2019 } 2020 2021 /* 2022 * Function: fr_fastroute 2023 * Returns: 0: success; 2024 * -1: failed 2025 * Parameters: 2026 * mb: the message block where ip head starts 2027 * mpp: the pointer to the pointer of the orignal 2028 * packet message 2029 * fin: packet information 2030 * fdp: destination interface information 2031 * if it is NULL, no interface information provided. 2032 * 2033 * This function is for fastroute/to/dup-to rules. It calls 2034 * pfil_make_lay2_packet to search route, make lay-2 header 2035 * ,and identify output queue for the IP packet. 2036 * The destination address depends on the following conditions: 2037 * 1: for fastroute rule, fdp is passed in as NULL, so the 2038 * destination address is the IP Packet's destination address 2039 * 2: for to/dup-to rule, if an ip address is specified after 2040 * the interface name, this address is the as destination 2041 * address. Otherwise IP Packet's destination address is used 2042 */ 2043 int fr_fastroute(mb, mpp, fin, fdp) 2044 mblk_t *mb, **mpp; 2045 fr_info_t *fin; 2046 frdest_t *fdp; 2047 { 2048 net_handle_t net_data_p; 2049 net_inject_t *inj; 2050 mblk_t *mp = NULL; 2051 frentry_t *fr = fin->fin_fr; 2052 qpktinfo_t *qpi; 2053 ip_t *ip; 2054 2055 struct sockaddr_in *sin; 2056 struct sockaddr_in6 *sin6; 2057 struct sockaddr *sinp; 2058 ipf_stack_t *ifs = fin->fin_ifs; 2059 #ifndef sparc 2060 u_short __iplen, __ipoff; 2061 #endif 2062 2063 if (fin->fin_v == 4) { 2064 net_data_p = ifs->ifs_ipf_ipv4; 2065 } else if (fin->fin_v == 6) { 2066 net_data_p = ifs->ifs_ipf_ipv6; 2067 } else { 2068 return (-1); 2069 } 2070 2071 /* Check the src here, fin_ifp is the src interface. */ 2072 if (!fr_forwarding_enabled((phy_if_t)fin->fin_ifp, net_data_p)) 2073 return (-1); 2074 2075 inj = net_inject_alloc(NETINFO_VERSION); 2076 if (inj == NULL) 2077 return -1; 2078 2079 ip = fin->fin_ip; 2080 qpi = fin->fin_qpi; 2081 2082 /* 2083 * If this is a duplicate mblk then we want ip to point at that 2084 * data, not the original, if and only if it is already pointing at 2085 * the current mblk data. 2086 * 2087 * Otherwise, if it's not a duplicate, and we're not already pointing 2088 * at the current mblk data, then we want to ensure that the data 2089 * points at ip. 2090 */ 2091 2092 if ((ip == (ip_t *)qpi->qpi_m->b_rptr) && (qpi->qpi_m != mb)) { 2093 ip = (ip_t *)mb->b_rptr; 2094 } else if ((qpi->qpi_m == mb) && (ip != (ip_t *)qpi->qpi_m->b_rptr)) { 2095 qpi->qpi_m->b_rptr = (uchar_t *)ip; 2096 qpi->qpi_off = 0; 2097 } 2098 2099 /* 2100 * If there is another M_PROTO, we don't want it 2101 */ 2102 if (*mpp != mb) { 2103 mp = unlinkb(*mpp); 2104 freeb(*mpp); 2105 *mpp = mp; 2106 } 2107 2108 sinp = (struct sockaddr *)&inj->ni_addr; 2109 sin = (struct sockaddr_in *)sinp; 2110 sin6 = (struct sockaddr_in6 *)sinp; 2111 bzero((char *)&inj->ni_addr, sizeof (inj->ni_addr)); 2112 inj->ni_addr.ss_family = (fin->fin_v == 4) ? AF_INET : AF_INET6; 2113 inj->ni_packet = mb; 2114 2115 /* 2116 * In case we're here due to "to <if>" being used with 2117 * "keep state", check that we're going in the correct 2118 * direction. 2119 */ 2120 if (fdp != NULL) { 2121 if ((fr != NULL) && (fdp->fd_ifp != NULL) && 2122 (fin->fin_rev != 0) && (fdp == &fr->fr_tif)) 2123 goto bad_fastroute; 2124 inj->ni_physical = (phy_if_t)fdp->fd_ifp; 2125 if (fin->fin_v == 4) { 2126 sin->sin_addr = fdp->fd_ip; 2127 } else { 2128 sin6->sin6_addr = fdp->fd_ip6.in6; 2129 } 2130 } else { 2131 if (fin->fin_v == 4) { 2132 sin->sin_addr = ip->ip_dst; 2133 } else { 2134 sin6->sin6_addr = ((ip6_t *)ip)->ip6_dst; 2135 } 2136 inj->ni_physical = net_routeto(net_data_p, sinp, NULL); 2137 } 2138 2139 /* we're checking the destinatation here */ 2140 if (!fr_forwarding_enabled(inj->ni_physical, net_data_p)) 2141 goto bad_fastroute; 2142 2143 /* 2144 * Clear the hardware checksum flags from packets that we are doing 2145 * input processing on as leaving them set will cause the outgoing 2146 * NIC (if it supports hardware checksum) to calculate them anew, 2147 * using the old (correct) checksums as the pseudo value to start 2148 * from. 2149 */ 2150 if (fin->fin_out == 0) { 2151 DB_CKSUMFLAGS(mb) = 0; 2152 } 2153 2154 *mpp = mb; 2155 2156 if (fin->fin_out == 0) { 2157 void *saveifp; 2158 u_32_t pass; 2159 2160 saveifp = fin->fin_ifp; 2161 fin->fin_ifp = (void *)inj->ni_physical; 2162 fin->fin_flx &= ~FI_STATE; 2163 fin->fin_out = 1; 2164 (void) fr_acctpkt(fin, &pass); 2165 fin->fin_fr = NULL; 2166 if (!fr || !(fr->fr_flags & FR_RETMASK)) 2167 (void) fr_checkstate(fin, &pass); 2168 if (fr_checknatout(fin, NULL) == -1) 2169 goto bad_fastroute; 2170 fin->fin_out = 0; 2171 fin->fin_ifp = saveifp; 2172 } 2173 #ifndef sparc 2174 if (fin->fin_v == 4) { 2175 __iplen = (u_short)ip->ip_len, 2176 __ipoff = (u_short)ip->ip_off; 2177 2178 ip->ip_len = htons(__iplen); 2179 ip->ip_off = htons(__ipoff); 2180 } 2181 #endif 2182 2183 if (net_data_p) { 2184 if (net_inject(net_data_p, NI_DIRECT_OUT, inj) < 0) { 2185 net_inject_free(inj); 2186 return (-1); 2187 } 2188 } 2189 2190 ifs->ifs_fr_frouteok[0]++; 2191 net_inject_free(inj); 2192 return 0; 2193 bad_fastroute: 2194 net_inject_free(inj); 2195 freemsg(mb); 2196 ifs->ifs_fr_frouteok[1]++; 2197 return -1; 2198 } 2199 2200 2201 /* ------------------------------------------------------------------------ */ 2202 /* Function: ipf_hook4_out */ 2203 /* Returns: int - 0 == packet ok, else problem, free packet if not done */ 2204 /* Parameters: event(I) - pointer to event */ 2205 /* info(I) - pointer to hook information for firewalling */ 2206 /* */ 2207 /* Calling ipf_hook. */ 2208 /* ------------------------------------------------------------------------ */ 2209 /*ARGSUSED*/ 2210 int ipf_hook4_out(hook_event_token_t token, hook_data_t info, void *arg) 2211 { 2212 return ipf_hook(info, 1, 0, arg); 2213 } 2214 /*ARGSUSED*/ 2215 int ipf_hook6_out(hook_event_token_t token, hook_data_t info, void *arg) 2216 { 2217 return ipf_hook6(info, 1, 0, arg); 2218 } 2219 2220 /* ------------------------------------------------------------------------ */ 2221 /* Function: ipf_hook4_in */ 2222 /* Returns: int - 0 == packet ok, else problem, free packet if not done */ 2223 /* Parameters: event(I) - pointer to event */ 2224 /* info(I) - pointer to hook information for firewalling */ 2225 /* */ 2226 /* Calling ipf_hook. */ 2227 /* ------------------------------------------------------------------------ */ 2228 /*ARGSUSED*/ 2229 int ipf_hook4_in(hook_event_token_t token, hook_data_t info, void *arg) 2230 { 2231 return ipf_hook(info, 0, 0, arg); 2232 } 2233 /*ARGSUSED*/ 2234 int ipf_hook6_in(hook_event_token_t token, hook_data_t info, void *arg) 2235 { 2236 return ipf_hook6(info, 0, 0, arg); 2237 } 2238 2239 2240 /* ------------------------------------------------------------------------ */ 2241 /* Function: ipf_hook4_loop_out */ 2242 /* Returns: int - 0 == packet ok, else problem, free packet if not done */ 2243 /* Parameters: event(I) - pointer to event */ 2244 /* info(I) - pointer to hook information for firewalling */ 2245 /* */ 2246 /* Calling ipf_hook. */ 2247 /* ------------------------------------------------------------------------ */ 2248 /*ARGSUSED*/ 2249 int ipf_hook4_loop_out(hook_event_token_t token, hook_data_t info, void *arg) 2250 { 2251 return ipf_hook(info, 1, FI_NOCKSUM, arg); 2252 } 2253 /*ARGSUSED*/ 2254 int ipf_hook6_loop_out(hook_event_token_t token, hook_data_t info, void *arg) 2255 { 2256 return ipf_hook6(info, 1, FI_NOCKSUM, arg); 2257 } 2258 2259 /* Static constants used by ipf_hook_ether */ 2260 static uint8_t ipf_eth_bcast_addr[ETHERADDRL] = { 2261 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF 2262 }; 2263 static uint8_t ipf_eth_ipv4_mcast[3] = { 0x01, 0x00, 0x5E }; 2264 static uint8_t ipf_eth_ipv6_mcast[2] = { 0x33, 0x33 }; 2265 2266 /* ------------------------------------------------------------------------ */ 2267 /* Function: ipf_hook_ether */ 2268 /* Returns: int - 0 == packet ok, else problem, free packet if not done */ 2269 /* Parameters: token(I) - pointer to event */ 2270 /* info(I) - pointer to hook information for firewalling */ 2271 /* */ 2272 /* The ipf_hook_ether hook is currently private to illumos. It represents */ 2273 /* a layer 2 datapath generally used by virtual machines. Currently the */ 2274 /* hook is only used by the viona driver to pass along L2 frames for */ 2275 /* inspection. It requires that the L2 ethernet header is contained within */ 2276 /* a single dblk_t (however layers above the L2 header have no restrctions */ 2277 /* in ipf). ipf does not currently support filtering on L2 fields (e.g. */ 2278 /* filtering on a MAC address or ethertype), however virtual machines do */ 2279 /* not have native IP stack instances where ipf traditionally hooks in. */ 2280 /* Instead this entry point is used to determine if the packet is unicast, */ 2281 /* broadcast, or multicast. The IPv4 or IPv6 packet is then passed to the */ 2282 /* traditional ip hooks for filtering. Non IPv4 or non IPv6 packets are */ 2283 /* not subject to examination. */ 2284 /* ------------------------------------------------------------------------ */ 2285 int ipf_hook_ether(hook_event_token_t token, hook_data_t info, void *arg, 2286 boolean_t out) 2287 { 2288 struct ether_header *ethp; 2289 hook_pkt_event_t *hpe = (hook_pkt_event_t *)info; 2290 mblk_t *mp; 2291 size_t offset, len; 2292 uint16_t etype; 2293 boolean_t v6; 2294 2295 /* 2296 * viona will only pass us mblks with the L2 header contained in a 2297 * single data block. 2298 */ 2299 mp = *hpe->hpe_mp; 2300 len = MBLKL(mp); 2301 2302 VERIFY3S(len, >=, sizeof (struct ether_header)); 2303 2304 ethp = (struct ether_header *)mp->b_rptr; 2305 if ((etype = ntohs(ethp->ether_type)) == ETHERTYPE_VLAN) { 2306 struct ether_vlan_header *evh = 2307 (struct ether_vlan_header *)ethp; 2308 2309 VERIFY3S(len, >=, sizeof (struct ether_vlan_header)); 2310 2311 etype = ntohs(evh->ether_type); 2312 offset = sizeof (*evh); 2313 } else { 2314 offset = sizeof (*ethp); 2315 } 2316 2317 /* 2318 * ipf only support filtering IPv4 and IPv6. Ignore other types. 2319 */ 2320 if (etype == ETHERTYPE_IP) 2321 v6 = B_FALSE; 2322 else if (etype == ETHERTYPE_IPV6) 2323 v6 = B_TRUE; 2324 else 2325 return (0); 2326 2327 if (bcmp(ipf_eth_bcast_addr, ethp, ETHERADDRL) == 0) 2328 hpe->hpe_flags |= HPE_BROADCAST; 2329 else if (bcmp(ipf_eth_ipv4_mcast, ethp, 2330 sizeof (ipf_eth_ipv4_mcast)) == 0) 2331 hpe->hpe_flags |= HPE_MULTICAST; 2332 else if (bcmp(ipf_eth_ipv6_mcast, ethp, 2333 sizeof (ipf_eth_ipv6_mcast)) == 0) 2334 hpe->hpe_flags |= HPE_MULTICAST; 2335 2336 /* Find the start of the IPv4 or IPv6 header */ 2337 for (; offset >= len; len = MBLKL(mp)) { 2338 offset -= len; 2339 mp = mp->b_cont; 2340 if (mp == NULL) { 2341 freemsg(*hpe->hpe_mp); 2342 *hpe->hpe_mp = NULL; 2343 return (-1); 2344 } 2345 } 2346 hpe->hpe_mb = mp; 2347 hpe->hpe_hdr = mp->b_rptr + offset; 2348 2349 return (v6 ? ipf_hook6(info, out, 0, arg) : 2350 ipf_hook(info, out, 0, arg)); 2351 } 2352 2353 /* ------------------------------------------------------------------------ */ 2354 /* Function: ipf_hookviona_{in,out} */ 2355 /* Returns: int - 0 == packet ok, else problem, free packet if not done */ 2356 /* Parameters: event(I) - pointer to event */ 2357 /* info(I) - pointer to hook information for firewalling */ 2358 /* */ 2359 /* The viona hooks are private hooks to illumos. They represents a layer 2 */ 2360 /* datapath generally used to implement virtual machines. */ 2361 /* along L2 packets. */ 2362 /* */ 2363 /* They end up calling the appropriate traditional ip hooks. */ 2364 /* ------------------------------------------------------------------------ */ 2365 int 2366 ipf_hookviona_in(hook_event_token_t token, hook_data_t info, void *arg) 2367 { 2368 return (ipf_hook_ether(token, info, arg, B_FALSE)); 2369 } 2370 2371 int 2372 ipf_hookviona_out(hook_event_token_t token, hook_data_t info, void *arg) 2373 { 2374 return (ipf_hook_ether(token, info, arg, B_TRUE)); 2375 } 2376 2377 /* ------------------------------------------------------------------------ */ 2378 /* Function: ipf_hook4_loop_in */ 2379 /* Returns: int - 0 == packet ok, else problem, free packet if not done */ 2380 /* Parameters: event(I) - pointer to event */ 2381 /* info(I) - pointer to hook information for firewalling */ 2382 /* */ 2383 /* Calling ipf_hook. */ 2384 /* ------------------------------------------------------------------------ */ 2385 /*ARGSUSED*/ 2386 int ipf_hook4_loop_in(hook_event_token_t token, hook_data_t info, void *arg) 2387 { 2388 return ipf_hook(info, 0, FI_NOCKSUM, arg); 2389 } 2390 /*ARGSUSED*/ 2391 int ipf_hook6_loop_in(hook_event_token_t token, hook_data_t info, void *arg) 2392 { 2393 return ipf_hook6(info, 0, FI_NOCKSUM, arg); 2394 } 2395 2396 /* ------------------------------------------------------------------------ */ 2397 /* Function: ipf_hook */ 2398 /* Returns: int - 0 == packet ok, else problem, free packet if not done */ 2399 /* Parameters: info(I) - pointer to hook information for firewalling */ 2400 /* out(I) - whether packet is going in or out */ 2401 /* loopback(I) - whether packet is a loopback packet or not */ 2402 /* */ 2403 /* Stepping stone function between the IP mainline and IPFilter. Extracts */ 2404 /* parameters out of the info structure and forms them up to be useful for */ 2405 /* calling ipfilter. */ 2406 /* ------------------------------------------------------------------------ */ 2407 int ipf_hook(hook_data_t info, int out, int loopback, void *arg) 2408 { 2409 hook_pkt_event_t *fw; 2410 ipf_stack_t *ifs; 2411 qpktinfo_t qpi; 2412 int rval, hlen; 2413 u_short swap; 2414 phy_if_t phy; 2415 ip_t *ip; 2416 2417 ifs = arg; 2418 fw = (hook_pkt_event_t *)info; 2419 2420 ASSERT(fw != NULL); 2421 phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp; 2422 2423 ip = fw->hpe_hdr; 2424 swap = ntohs(ip->ip_len); 2425 ip->ip_len = swap; 2426 swap = ntohs(ip->ip_off); 2427 ip->ip_off = swap; 2428 hlen = IPH_HDR_LENGTH(ip); 2429 2430 qpi.qpi_m = fw->hpe_mb; 2431 qpi.qpi_data = fw->hpe_hdr; 2432 qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr; 2433 qpi.qpi_ill = (void *)phy; 2434 qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST); 2435 if (qpi.qpi_flags) 2436 qpi.qpi_flags |= FI_MBCAST; 2437 qpi.qpi_flags |= loopback; 2438 2439 rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out, 2440 &qpi, fw->hpe_mp, ifs); 2441 2442 /* For fastroute cases, fr_check returns 0 with mp set to NULL */ 2443 if (rval == 0 && *(fw->hpe_mp) == NULL) 2444 rval = 1; 2445 2446 /* Notify IP the packet mblk_t and IP header pointers. */ 2447 fw->hpe_mb = qpi.qpi_m; 2448 fw->hpe_hdr = qpi.qpi_data; 2449 if (rval == 0) { 2450 ip = qpi.qpi_data; 2451 swap = ntohs(ip->ip_len); 2452 ip->ip_len = swap; 2453 swap = ntohs(ip->ip_off); 2454 ip->ip_off = swap; 2455 } 2456 return rval; 2457 2458 } 2459 int ipf_hook6(hook_data_t info, int out, int loopback, void *arg) 2460 { 2461 hook_pkt_event_t *fw; 2462 int rval, hlen; 2463 qpktinfo_t qpi; 2464 phy_if_t phy; 2465 2466 fw = (hook_pkt_event_t *)info; 2467 2468 ASSERT(fw != NULL); 2469 phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp; 2470 2471 hlen = sizeof (ip6_t); 2472 2473 qpi.qpi_m = fw->hpe_mb; 2474 qpi.qpi_data = fw->hpe_hdr; 2475 qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr; 2476 qpi.qpi_ill = (void *)phy; 2477 qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST); 2478 if (qpi.qpi_flags) 2479 qpi.qpi_flags |= FI_MBCAST; 2480 qpi.qpi_flags |= loopback; 2481 2482 rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out, 2483 &qpi, fw->hpe_mp, arg); 2484 2485 /* For fastroute cases, fr_check returns 0 with mp set to NULL */ 2486 if (rval == 0 && *(fw->hpe_mp) == NULL) 2487 rval = 1; 2488 2489 /* Notify IP the packet mblk_t and IP header pointers. */ 2490 fw->hpe_mb = qpi.qpi_m; 2491 fw->hpe_hdr = qpi.qpi_data; 2492 return rval; 2493 } 2494 2495 2496 /* ------------------------------------------------------------------------ */ 2497 /* Function: ipf_nic_event_v4 */ 2498 /* Returns: int - 0 == no problems encountered */ 2499 /* Parameters: event(I) - pointer to event */ 2500 /* info(I) - pointer to information about a NIC event */ 2501 /* */ 2502 /* Function to receive asynchronous NIC events from IP */ 2503 /* ------------------------------------------------------------------------ */ 2504 /*ARGSUSED*/ 2505 int ipf_nic_event_v4(hook_event_token_t event, hook_data_t info, void *arg) 2506 { 2507 struct sockaddr_in *sin; 2508 hook_nic_event_t *hn; 2509 ipf_stack_t *ifs = arg; 2510 void *new_ifp = NULL; 2511 2512 if (ifs->ifs_fr_running <= 0) 2513 return (0); 2514 2515 hn = (hook_nic_event_t *)info; 2516 2517 switch (hn->hne_event) 2518 { 2519 case NE_PLUMB : 2520 frsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, hn->hne_data, 2521 ifs); 2522 fr_natifpsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, 2523 hn->hne_data, ifs); 2524 fr_statesync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, 2525 hn->hne_data, ifs); 2526 break; 2527 2528 case NE_UNPLUMB : 2529 frsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs); 2530 fr_natifpsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, 2531 ifs); 2532 fr_statesync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs); 2533 break; 2534 2535 case NE_ADDRESS_CHANGE : 2536 /* 2537 * We only respond to events for logical interface 0 because 2538 * IPFilter only uses the first address given to a network 2539 * interface. We check for hne_lif==1 because the netinfo 2540 * code maps adds 1 to the lif number so that it can return 2541 * 0 to indicate "no more lifs" when walking them. 2542 */ 2543 if (hn->hne_lif == 1) { 2544 frsync(IPFSYNC_RESYNC, 4, (void *)hn->hne_nic, NULL, 2545 ifs); 2546 sin = hn->hne_data; 2547 fr_nataddrsync(4, (void *)hn->hne_nic, &sin->sin_addr, 2548 ifs); 2549 } 2550 break; 2551 2552 #if SOLARIS2 >= 10 2553 case NE_IFINDEX_CHANGE : 2554 WRITE_ENTER(&ifs->ifs_ipf_mutex); 2555 2556 if (hn->hne_data != NULL) { 2557 /* 2558 * The netinfo passes interface index as int (hne_data should be 2559 * handled as a pointer to int), which is always 32bit. We need to 2560 * convert it to void pointer here, since interfaces are 2561 * represented as pointers to void in IPF. The pointers are 64 bits 2562 * long on 64bit platforms. Doing something like 2563 * (void *)((int) x) 2564 * will throw warning: 2565 * "cast to pointer from integer of different size" 2566 * during 64bit compilation. 2567 * 2568 * The line below uses (size_t) to typecast int to 2569 * size_t, which might be 64bit/32bit (depending 2570 * on architecture). Once we have proper 64bit/32bit 2571 * type (size_t), we can safely convert it to void pointer. 2572 */ 2573 new_ifp = (void *)(size_t)*((int *)hn->hne_data); 2574 fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs); 2575 fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs); 2576 fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs); 2577 } 2578 RWLOCK_EXIT(&ifs->ifs_ipf_mutex); 2579 break; 2580 #endif 2581 2582 default : 2583 break; 2584 } 2585 2586 return 0; 2587 } 2588 2589 2590 /* ------------------------------------------------------------------------ */ 2591 /* Function: ipf_nic_event_v6 */ 2592 /* Returns: int - 0 == no problems encountered */ 2593 /* Parameters: event(I) - pointer to event */ 2594 /* info(I) - pointer to information about a NIC event */ 2595 /* */ 2596 /* Function to receive asynchronous NIC events from IP */ 2597 /* ------------------------------------------------------------------------ */ 2598 /*ARGSUSED*/ 2599 int ipf_nic_event_v6(hook_event_token_t event, hook_data_t info, void *arg) 2600 { 2601 struct sockaddr_in6 *sin6; 2602 hook_nic_event_t *hn; 2603 ipf_stack_t *ifs = arg; 2604 void *new_ifp = NULL; 2605 2606 if (ifs->ifs_fr_running <= 0) 2607 return (0); 2608 2609 hn = (hook_nic_event_t *)info; 2610 2611 switch (hn->hne_event) 2612 { 2613 case NE_PLUMB : 2614 frsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic, 2615 hn->hne_data, ifs); 2616 fr_natifpsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic, 2617 hn->hne_data, ifs); 2618 fr_statesync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic, 2619 hn->hne_data, ifs); 2620 break; 2621 2622 case NE_UNPLUMB : 2623 frsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs); 2624 fr_natifpsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, 2625 ifs); 2626 fr_statesync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs); 2627 break; 2628 2629 case NE_ADDRESS_CHANGE : 2630 if (hn->hne_lif == 1) { 2631 sin6 = hn->hne_data; 2632 fr_nataddrsync(6, (void *)hn->hne_nic, &sin6->sin6_addr, 2633 ifs); 2634 } 2635 break; 2636 2637 #if SOLARIS2 >= 10 2638 case NE_IFINDEX_CHANGE : 2639 WRITE_ENTER(&ifs->ifs_ipf_mutex); 2640 if (hn->hne_data != NULL) { 2641 /* 2642 * The netinfo passes interface index as int (hne_data should be 2643 * handled as a pointer to int), which is always 32bit. We need to 2644 * convert it to void pointer here, since interfaces are 2645 * represented as pointers to void in IPF. The pointers are 64 bits 2646 * long on 64bit platforms. Doing something like 2647 * (void *)((int) x) 2648 * will throw warning: 2649 * "cast to pointer from integer of different size" 2650 * during 64bit compilation. 2651 * 2652 * The line below uses (size_t) to typecast int to 2653 * size_t, which might be 64bit/32bit (depending 2654 * on architecture). Once we have proper 64bit/32bit 2655 * type (size_t), we can safely convert it to void pointer. 2656 */ 2657 new_ifp = (void *)(size_t)*((int *)hn->hne_data); 2658 fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs); 2659 fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs); 2660 fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs); 2661 } 2662 RWLOCK_EXIT(&ifs->ifs_ipf_mutex); 2663 break; 2664 #endif 2665 2666 default : 2667 break; 2668 } 2669 2670 return 0; 2671 } 2672 2673 /* 2674 * Functions fr_make_rst(), fr_make_icmp_v4(), fr_make_icmp_v6() 2675 * are needed in Solaris kernel only. We don't need them in 2676 * ipftest to pretend the ICMP/RST packet was sent as a response. 2677 */ 2678 #if defined(_KERNEL) && (SOLARIS2 >= 10) 2679 /* ------------------------------------------------------------------------ */ 2680 /* Function: fr_make_rst */ 2681 /* Returns: int - 0 on success, -1 on failure */ 2682 /* Parameters: fin(I) - pointer to packet information */ 2683 /* */ 2684 /* We must alter the original mblks passed to IPF from IP stack via */ 2685 /* FW_HOOKS. FW_HOOKS interface is powerfull, but it has some limitations. */ 2686 /* IPF can basicaly do only these things with mblk representing the packet: */ 2687 /* leave it as it is (pass the packet) */ 2688 /* */ 2689 /* discard it (block the packet) */ 2690 /* */ 2691 /* alter it (i.e. NAT) */ 2692 /* */ 2693 /* As you can see IPF can not simply discard the mblk and supply a new one */ 2694 /* instead to IP stack via FW_HOOKS. */ 2695 /* */ 2696 /* The return-rst action for packets coming via NIC is handled as follows: */ 2697 /* mblk with packet is discarded */ 2698 /* */ 2699 /* new mblk with RST response is constructed and injected to network */ 2700 /* */ 2701 /* IPF can't inject packets to loopback interface, this is just another */ 2702 /* limitation we have to deal with here. The only option to send RST */ 2703 /* response to offending TCP packet coming via loopback is to alter it. */ 2704 /* */ 2705 /* The fr_make_rst() function alters TCP SYN/FIN packet intercepted on */ 2706 /* loopback interface into TCP RST packet. fin->fin_mp is pointer to */ 2707 /* mblk L3 (IP) and L4 (TCP/UDP) packet headers. */ 2708 /* ------------------------------------------------------------------------ */ 2709 int fr_make_rst(fin) 2710 fr_info_t *fin; 2711 { 2712 uint16_t tmp_port; 2713 int rv = -1; 2714 uint32_t old_ack; 2715 tcphdr_t *tcp = NULL; 2716 struct in_addr tmp_src; 2717 #ifdef USE_INET6 2718 struct in6_addr tmp_src6; 2719 #endif 2720 2721 ASSERT(fin->fin_p == IPPROTO_TCP); 2722 2723 /* 2724 * We do not need to adjust chksum, since it is not being checked by 2725 * Solaris IP stack for loopback clients. 2726 */ 2727 if ((fin->fin_v == 4) && (fin->fin_p == IPPROTO_TCP) && 2728 ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) { 2729 2730 if (tcp->th_flags & (TH_SYN | TH_FIN)) { 2731 /* Swap IPv4 addresses. */ 2732 tmp_src = fin->fin_ip->ip_src; 2733 fin->fin_ip->ip_src = fin->fin_ip->ip_dst; 2734 fin->fin_ip->ip_dst = tmp_src; 2735 2736 rv = 0; 2737 } 2738 else 2739 tcp = NULL; 2740 } 2741 #ifdef USE_INET6 2742 else if ((fin->fin_v == 6) && (fin->fin_p == IPPROTO_TCP) && 2743 ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) { 2744 /* 2745 * We are relying on fact the next header is TCP, which is true 2746 * for regular TCP packets coming in over loopback. 2747 */ 2748 if (tcp->th_flags & (TH_SYN | TH_FIN)) { 2749 /* Swap IPv6 addresses. */ 2750 tmp_src6 = fin->fin_ip6->ip6_src; 2751 fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst; 2752 fin->fin_ip6->ip6_dst = tmp_src6; 2753 2754 rv = 0; 2755 } 2756 else 2757 tcp = NULL; 2758 } 2759 #endif 2760 2761 if (tcp != NULL) { 2762 /* 2763 * Adjust TCP header: 2764 * swap ports, 2765 * set flags, 2766 * set correct ACK number 2767 */ 2768 tmp_port = tcp->th_sport; 2769 tcp->th_sport = tcp->th_dport; 2770 tcp->th_dport = tmp_port; 2771 old_ack = tcp->th_ack; 2772 tcp->th_ack = htonl(ntohl(tcp->th_seq) + 1); 2773 tcp->th_seq = old_ack; 2774 tcp->th_flags = TH_RST | TH_ACK; 2775 } 2776 2777 return (rv); 2778 } 2779 2780 /* ------------------------------------------------------------------------ */ 2781 /* Function: fr_make_icmp_v4 */ 2782 /* Returns: int - 0 on success, -1 on failure */ 2783 /* Parameters: fin(I) - pointer to packet information */ 2784 /* */ 2785 /* Please read comment at fr_make_icmp() wrapper function to get an idea */ 2786 /* what is going to happen here and why. Once you read the comment there, */ 2787 /* continue here with next paragraph. */ 2788 /* */ 2789 /* To turn IPv4 packet into ICMPv4 response packet, these things must */ 2790 /* happen here: */ 2791 /* (1) Original mblk is copied (duplicated). */ 2792 /* */ 2793 /* (2) ICMP header is created. */ 2794 /* */ 2795 /* (3) Link ICMP header with copy of original mblk, we have ICMPv4 */ 2796 /* data ready then. */ 2797 /* */ 2798 /* (4) Swap IP addresses in original mblk and adjust IP header data. */ 2799 /* */ 2800 /* (5) The mblk containing original packet is trimmed to contain IP */ 2801 /* header only and ICMP chksum is computed. */ 2802 /* */ 2803 /* (6) The ICMP header we have from (3) is linked to original mblk, */ 2804 /* which now contains new IP header. If original packet was spread */ 2805 /* over several mblks, only the first mblk is kept. */ 2806 /* ------------------------------------------------------------------------ */ 2807 static int fr_make_icmp_v4(fin) 2808 fr_info_t *fin; 2809 { 2810 struct in_addr tmp_src; 2811 tcphdr_t *tcp; 2812 struct icmp *icmp; 2813 mblk_t *mblk_icmp; 2814 mblk_t *mblk_ip; 2815 size_t icmp_pld_len; /* octets to append to ICMP header */ 2816 size_t orig_iphdr_len; /* length of IP header only */ 2817 uint32_t sum; 2818 uint16_t *buf; 2819 int len; 2820 2821 2822 if (fin->fin_v != 4) 2823 return (-1); 2824 2825 /* 2826 * If we are dealing with TCP, then packet must be SYN/FIN to be routed 2827 * by IP stack. If it is not SYN/FIN, then we must drop it silently. 2828 */ 2829 tcp = (tcphdr_t *) fin->fin_dp; 2830 2831 if ((fin->fin_p == IPPROTO_TCP) && 2832 ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0))) 2833 return (-1); 2834 2835 /* 2836 * Step (1) 2837 * 2838 * Make copy of original mblk. 2839 * 2840 * We want to copy as much data as necessary, not less, not more. The 2841 * ICMPv4 payload length for unreachable messages is: 2842 * original IP header + 8 bytes of L4 (if there are any). 2843 * 2844 * We determine if there are at least 8 bytes of L4 data following IP 2845 * header first. 2846 */ 2847 icmp_pld_len = (fin->fin_dlen > ICMPERR_ICMPHLEN) ? 2848 ICMPERR_ICMPHLEN : fin->fin_dlen; 2849 /* 2850 * Since we don't want to copy more data than necessary, we must trim 2851 * the original mblk here. The right way (STREAMish) would be to use 2852 * adjmsg() to trim it. However we would have to calculate the length 2853 * argument for adjmsg() from pointers we already have here. 2854 * 2855 * Since we have pointers and offsets, it's faster and easier for 2856 * us to just adjust pointers by hand instead of using adjmsg(). 2857 */ 2858 fin->fin_m->b_wptr = (unsigned char *) fin->fin_dp; 2859 fin->fin_m->b_wptr += icmp_pld_len; 2860 icmp_pld_len = fin->fin_m->b_wptr - (unsigned char *) fin->fin_ip; 2861 2862 /* 2863 * Also we don't want to copy any L2 stuff, which might precede IP 2864 * header, so we have have to set b_rptr to point to the start of IP 2865 * header. 2866 */ 2867 fin->fin_m->b_rptr += fin->fin_ipoff; 2868 if ((mblk_ip = copyb(fin->fin_m)) == NULL) 2869 return (-1); 2870 fin->fin_m->b_rptr -= fin->fin_ipoff; 2871 2872 /* 2873 * Step (2) 2874 * 2875 * Create an ICMP header, which will be appened to original mblk later. 2876 * ICMP header is just another mblk. 2877 */ 2878 mblk_icmp = (mblk_t *) allocb(ICMPERR_ICMPHLEN, BPRI_HI); 2879 if (mblk_icmp == NULL) { 2880 FREE_MB_T(mblk_ip); 2881 return (-1); 2882 } 2883 2884 MTYPE(mblk_icmp) = M_DATA; 2885 icmp = (struct icmp *) mblk_icmp->b_wptr; 2886 icmp->icmp_type = ICMP_UNREACH; 2887 icmp->icmp_code = fin->fin_icode & 0xFF; 2888 icmp->icmp_void = 0; 2889 icmp->icmp_cksum = 0; 2890 mblk_icmp->b_wptr += ICMPERR_ICMPHLEN; 2891 2892 /* 2893 * Step (3) 2894 * 2895 * Complete ICMP packet - link ICMP header with L4 data from original 2896 * IP packet. 2897 */ 2898 linkb(mblk_icmp, mblk_ip); 2899 2900 /* 2901 * Step (4) 2902 * 2903 * Swap IP addresses and change IP header fields accordingly in 2904 * original IP packet. 2905 * 2906 * There is a rule option return-icmp as a dest for physical 2907 * interfaces. This option becomes useless for loopback, since IPF box 2908 * uses same address as a loopback destination. We ignore the option 2909 * here, the ICMP packet will always look like as it would have been 2910 * sent from the original destination host. 2911 */ 2912 tmp_src = fin->fin_ip->ip_src; 2913 fin->fin_ip->ip_src = fin->fin_ip->ip_dst; 2914 fin->fin_ip->ip_dst = tmp_src; 2915 fin->fin_ip->ip_p = IPPROTO_ICMP; 2916 fin->fin_ip->ip_sum = 0; 2917 2918 /* 2919 * Step (5) 2920 * 2921 * We trim the orignal mblk to hold IP header only. 2922 */ 2923 fin->fin_m->b_wptr = fin->fin_dp; 2924 orig_iphdr_len = fin->fin_m->b_wptr - 2925 (fin->fin_m->b_rptr + fin->fin_ipoff); 2926 fin->fin_ip->ip_len = htons(icmp_pld_len + ICMPERR_ICMPHLEN + 2927 orig_iphdr_len); 2928 2929 /* 2930 * ICMP chksum calculation. The data we are calculating chksum for are 2931 * spread over two mblks, therefore we have to use two for loops. 2932 * 2933 * First for loop computes chksum part for ICMP header. 2934 */ 2935 buf = (uint16_t *) icmp; 2936 len = ICMPERR_ICMPHLEN; 2937 for (sum = 0; len > 1; len -= 2) 2938 sum += *buf++; 2939 2940 /* 2941 * Here we add chksum part for ICMP payload. 2942 */ 2943 len = icmp_pld_len; 2944 buf = (uint16_t *) mblk_ip->b_rptr; 2945 for (; len > 1; len -= 2) 2946 sum += *buf++; 2947 2948 /* 2949 * Chksum is done. 2950 */ 2951 sum = (sum >> 16) + (sum & 0xffff); 2952 sum += (sum >> 16); 2953 icmp->icmp_cksum = ~sum; 2954 2955 /* 2956 * Step (6) 2957 * 2958 * Release all packet mblks, except the first one. 2959 */ 2960 if (fin->fin_m->b_cont != NULL) { 2961 FREE_MB_T(fin->fin_m->b_cont); 2962 } 2963 2964 /* 2965 * Append ICMP payload to first mblk, which already contains new IP 2966 * header. 2967 */ 2968 linkb(fin->fin_m, mblk_icmp); 2969 2970 return (0); 2971 } 2972 2973 #ifdef USE_INET6 2974 /* ------------------------------------------------------------------------ */ 2975 /* Function: fr_make_icmp_v6 */ 2976 /* Returns: int - 0 on success, -1 on failure */ 2977 /* Parameters: fin(I) - pointer to packet information */ 2978 /* */ 2979 /* Please read comment at fr_make_icmp() wrapper function to get an idea */ 2980 /* what and why is going to happen here. Once you read the comment there, */ 2981 /* continue here with next paragraph. */ 2982 /* */ 2983 /* This function turns IPv6 packet (UDP, TCP, ...) into ICMPv6 response. */ 2984 /* The algorithm is fairly simple: */ 2985 /* 1) We need to get copy of complete mblk. */ 2986 /* */ 2987 /* 2) New ICMPv6 header is created. */ 2988 /* */ 2989 /* 3) The copy of original mblk with packet is linked to ICMPv6 */ 2990 /* header. */ 2991 /* */ 2992 /* 4) The checksum must be adjusted. */ 2993 /* */ 2994 /* 5) IP addresses in original mblk are swapped and IP header data */ 2995 /* are adjusted (protocol number). */ 2996 /* */ 2997 /* 6) Original mblk is trimmed to hold IPv6 header only, then it is */ 2998 /* linked with the ICMPv6 data we got from (3). */ 2999 /* ------------------------------------------------------------------------ */ 3000 static int fr_make_icmp_v6(fin) 3001 fr_info_t *fin; 3002 { 3003 struct icmp6_hdr *icmp6; 3004 tcphdr_t *tcp; 3005 struct in6_addr tmp_src6; 3006 size_t icmp_pld_len; 3007 mblk_t *mblk_ip, *mblk_icmp; 3008 3009 if (fin->fin_v != 6) 3010 return (-1); 3011 3012 /* 3013 * If we are dealing with TCP, then packet must SYN/FIN to be routed by 3014 * IP stack. If it is not SYN/FIN, then we must drop it silently. 3015 */ 3016 tcp = (tcphdr_t *) fin->fin_dp; 3017 3018 if ((fin->fin_p == IPPROTO_TCP) && 3019 ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0))) 3020 return (-1); 3021 3022 /* 3023 * Step (1) 3024 * 3025 * We need to copy complete packet in case of IPv6, no trimming is 3026 * needed (except the L2 headers). 3027 */ 3028 icmp_pld_len = M_LEN(fin->fin_m); 3029 fin->fin_m->b_rptr += fin->fin_ipoff; 3030 if ((mblk_ip = copyb(fin->fin_m)) == NULL) 3031 return (-1); 3032 fin->fin_m->b_rptr -= fin->fin_ipoff; 3033 3034 /* 3035 * Step (2) 3036 * 3037 * Allocate and create ICMP header. 3038 */ 3039 mblk_icmp = (mblk_t *) allocb(sizeof (struct icmp6_hdr), 3040 BPRI_HI); 3041 3042 if (mblk_icmp == NULL) 3043 return (-1); 3044 3045 MTYPE(mblk_icmp) = M_DATA; 3046 icmp6 = (struct icmp6_hdr *) mblk_icmp->b_wptr; 3047 icmp6->icmp6_type = ICMP6_DST_UNREACH; 3048 icmp6->icmp6_code = fin->fin_icode & 0xFF; 3049 icmp6->icmp6_data32[0] = 0; 3050 mblk_icmp->b_wptr += sizeof (struct icmp6_hdr); 3051 3052 /* 3053 * Step (3) 3054 * 3055 * Link the copy of IP packet to ICMP header. 3056 */ 3057 linkb(mblk_icmp, mblk_ip); 3058 3059 /* 3060 * Step (4) 3061 * 3062 * Calculate chksum - this is much more easier task than in case of 3063 * IPv4 - ICMPv6 chksum only covers IP addresses, and payload length. 3064 * We are making compensation just for change of packet length. 3065 */ 3066 icmp6->icmp6_cksum = icmp_pld_len + sizeof (struct icmp6_hdr); 3067 3068 /* 3069 * Step (5) 3070 * 3071 * Swap IP addresses. 3072 */ 3073 tmp_src6 = fin->fin_ip6->ip6_src; 3074 fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst; 3075 fin->fin_ip6->ip6_dst = tmp_src6; 3076 3077 /* 3078 * and adjust IP header data. 3079 */ 3080 fin->fin_ip6->ip6_nxt = IPPROTO_ICMPV6; 3081 fin->fin_ip6->ip6_plen = htons(icmp_pld_len + sizeof (struct icmp6_hdr)); 3082 3083 /* 3084 * Step (6) 3085 * 3086 * We must release all linked mblks from original packet and keep only 3087 * the first mblk with IP header to link ICMP data. 3088 */ 3089 fin->fin_m->b_wptr = (unsigned char *) fin->fin_ip6 + sizeof (ip6_t); 3090 3091 if (fin->fin_m->b_cont != NULL) { 3092 FREE_MB_T(fin->fin_m->b_cont); 3093 } 3094 3095 /* 3096 * Append ICMP payload to IP header. 3097 */ 3098 linkb(fin->fin_m, mblk_icmp); 3099 3100 return (0); 3101 } 3102 #endif /* USE_INET6 */ 3103 3104 /* ------------------------------------------------------------------------ */ 3105 /* Function: fr_make_icmp */ 3106 /* Returns: int - 0 on success, -1 on failure */ 3107 /* Parameters: fin(I) - pointer to packet information */ 3108 /* */ 3109 /* We must alter the original mblks passed to IPF from IP stack via */ 3110 /* FW_HOOKS. The reasons why we must alter packet are discussed within */ 3111 /* comment at fr_make_rst() function. */ 3112 /* */ 3113 /* The fr_make_icmp() function acts as a wrapper, which passes the code */ 3114 /* execution to fr_make_icmp_v4() or fr_make_icmp_v6() depending on */ 3115 /* protocol version. However there are some details, which are common to */ 3116 /* both IP versions. The details are going to be explained here. */ 3117 /* */ 3118 /* The packet looks as follows: */ 3119 /* xxx | IP hdr | IP payload ... | */ 3120 /* ^ ^ ^ ^ */ 3121 /* | | | | */ 3122 /* | | | fin_m->b_wptr = fin->fin_dp + fin->fin_dlen */ 3123 /* | | | */ 3124 /* | | `- fin_m->fin_dp (in case of IPv4 points to L4 header) */ 3125 /* | | */ 3126 /* | `- fin_m->b_rptr + fin_ipoff (fin_ipoff is most likely 0 in case */ 3127 /* | of loopback) */ 3128 /* | */ 3129 /* `- fin_m->b_rptr - points to L2 header in case of physical NIC */ 3130 /* */ 3131 /* All relevant IP headers are pulled up into the first mblk. It happened */ 3132 /* well in advance before the matching rule was found (the rule, which took */ 3133 /* us here, to fr_make_icmp() function). */ 3134 /* */ 3135 /* Both functions will turn packet passed in fin->fin_m mblk into a new */ 3136 /* packet. New packet will be represented as chain of mblks. */ 3137 /* orig mblk |- b_cont ---. */ 3138 /* ^ `-> ICMP hdr |- b_cont--. */ 3139 /* | ^ `-> duped orig mblk */ 3140 /* | | ^ */ 3141 /* `- The original mblk | | */ 3142 /* will be trimmed to | | */ 3143 /* to contain IP header | | */ 3144 /* only | | */ 3145 /* | | */ 3146 /* `- This is newly | */ 3147 /* allocated mblk to | */ 3148 /* hold ICMPv6 data. | */ 3149 /* | */ 3150 /* | */ 3151 /* | */ 3152 /* This is the copy of original mblk, it will contain -' */ 3153 /* orignal IP packet in case of ICMPv6. In case of */ 3154 /* ICMPv4 it will contain up to 8 bytes of IP payload */ 3155 /* (TCP/UDP/L4) data from original packet. */ 3156 /* ------------------------------------------------------------------------ */ 3157 int fr_make_icmp(fin) 3158 fr_info_t *fin; 3159 { 3160 int rv; 3161 3162 if (fin->fin_v == 4) 3163 rv = fr_make_icmp_v4(fin); 3164 #ifdef USE_INET6 3165 else if (fin->fin_v == 6) 3166 rv = fr_make_icmp_v6(fin); 3167 #endif 3168 else 3169 rv = -1; 3170 3171 return (rv); 3172 } 3173 3174 /* ------------------------------------------------------------------------ */ 3175 /* Function: fr_buf_sum */ 3176 /* Returns: unsigned int - sum of buffer buf */ 3177 /* Parameters: buf - pointer to buf we want to sum up */ 3178 /* len - length of buffer buf */ 3179 /* */ 3180 /* Sums buffer buf. The result is used for chksum calculation. The buf */ 3181 /* argument must be aligned. */ 3182 /* ------------------------------------------------------------------------ */ 3183 static uint32_t fr_buf_sum(buf, len) 3184 const void *buf; 3185 unsigned int len; 3186 { 3187 uint32_t sum = 0; 3188 uint16_t *b = (uint16_t *)buf; 3189 3190 while (len > 1) { 3191 sum += *b++; 3192 len -= 2; 3193 } 3194 3195 if (len == 1) 3196 sum += htons((*(unsigned char *)b) << 8); 3197 3198 return (sum); 3199 } 3200 3201 /* ------------------------------------------------------------------------ */ 3202 /* Function: fr_calc_chksum */ 3203 /* Returns: void */ 3204 /* Parameters: fin - pointer to fr_info_t instance with packet data */ 3205 /* pkt - pointer to duplicated packet */ 3206 /* */ 3207 /* Calculates all chksums (L3, L4) for packet pkt. Works for both IP */ 3208 /* versions. */ 3209 /* ------------------------------------------------------------------------ */ 3210 void fr_calc_chksum(fin, pkt) 3211 fr_info_t *fin; 3212 mb_t *pkt; 3213 { 3214 struct pseudo_hdr { 3215 union { 3216 struct in_addr in4; 3217 #ifdef USE_INET6 3218 struct in6_addr in6; 3219 #endif 3220 } src_addr; 3221 union { 3222 struct in_addr in4; 3223 #ifdef USE_INET6 3224 struct in6_addr in6; 3225 #endif 3226 } dst_addr; 3227 char zero; 3228 char proto; 3229 uint16_t len; 3230 } phdr; 3231 uint32_t sum, ip_sum; 3232 void *buf; 3233 uint16_t *l4_csum_p; 3234 tcphdr_t *tcp; 3235 udphdr_t *udp; 3236 icmphdr_t *icmp; 3237 #ifdef USE_INET6 3238 struct icmp6_hdr *icmp6; 3239 #endif 3240 ip_t *ip; 3241 unsigned int len; 3242 int pld_len; 3243 3244 /* 3245 * We need to pullup the packet to the single continuous buffer to avoid 3246 * potential misaligment of b_rptr member in mblk chain. 3247 */ 3248 if (pullupmsg(pkt, -1) == 0) { 3249 cmn_err(CE_WARN, "Failed to pullup loopback pkt -> chksum" 3250 " will not be computed by IPF"); 3251 return; 3252 } 3253 3254 /* 3255 * It is guaranteed IP header starts right at b_rptr, because we are 3256 * working with a copy of the original packet. 3257 * 3258 * Compute pseudo header chksum for TCP and UDP. 3259 */ 3260 if ((fin->fin_p == IPPROTO_UDP) || 3261 (fin->fin_p == IPPROTO_TCP)) { 3262 bzero(&phdr, sizeof (phdr)); 3263 #ifdef USE_INET6 3264 if (fin->fin_v == 6) { 3265 phdr.src_addr.in6 = fin->fin_srcip6; 3266 phdr.dst_addr.in6 = fin->fin_dstip6; 3267 } else { 3268 phdr.src_addr.in4 = fin->fin_src; 3269 phdr.dst_addr.in4 = fin->fin_dst; 3270 } 3271 #else 3272 phdr.src_addr.in4 = fin->fin_src; 3273 phdr.dst_addr.in4 = fin->fin_dst; 3274 #endif 3275 phdr.zero = (char) 0; 3276 phdr.proto = fin->fin_p; 3277 phdr.len = htons((uint16_t)fin->fin_dlen); 3278 sum = fr_buf_sum(&phdr, (unsigned int)sizeof (phdr)); 3279 } else { 3280 sum = 0; 3281 } 3282 3283 /* 3284 * Set pointer to the L4 chksum field in the packet, set buf pointer to 3285 * the L4 header start. 3286 */ 3287 switch (fin->fin_p) { 3288 case IPPROTO_UDP: 3289 udp = (udphdr_t *)(pkt->b_rptr + fin->fin_hlen); 3290 l4_csum_p = &udp->uh_sum; 3291 buf = udp; 3292 break; 3293 case IPPROTO_TCP: 3294 tcp = (tcphdr_t *)(pkt->b_rptr + fin->fin_hlen); 3295 l4_csum_p = &tcp->th_sum; 3296 buf = tcp; 3297 break; 3298 case IPPROTO_ICMP: 3299 icmp = (icmphdr_t *)(pkt->b_rptr + fin->fin_hlen); 3300 l4_csum_p = &icmp->icmp_cksum; 3301 buf = icmp; 3302 break; 3303 #ifdef USE_INET6 3304 case IPPROTO_ICMPV6: 3305 icmp6 = (struct icmp6_hdr *)(pkt->b_rptr + fin->fin_hlen); 3306 l4_csum_p = &icmp6->icmp6_cksum; 3307 buf = icmp6; 3308 break; 3309 #endif 3310 default: 3311 l4_csum_p = NULL; 3312 } 3313 3314 /* 3315 * Compute L4 chksum if needed. 3316 */ 3317 if (l4_csum_p != NULL) { 3318 *l4_csum_p = (uint16_t)0; 3319 pld_len = fin->fin_dlen; 3320 len = pkt->b_wptr - (unsigned char *)buf; 3321 ASSERT(len == pld_len); 3322 /* 3323 * Add payload sum to pseudoheader sum. 3324 */ 3325 sum += fr_buf_sum(buf, len); 3326 while (sum >> 16) 3327 sum = (sum & 0xFFFF) + (sum >> 16); 3328 3329 *l4_csum_p = ~((uint16_t)sum); 3330 DTRACE_PROBE1(l4_sum, uint16_t, *l4_csum_p); 3331 } 3332 3333 /* 3334 * The IP header chksum is needed just for IPv4. 3335 */ 3336 if (fin->fin_v == 4) { 3337 /* 3338 * Compute IPv4 header chksum. 3339 */ 3340 ip = (ip_t *)pkt->b_rptr; 3341 ip->ip_sum = (uint16_t)0; 3342 ip_sum = fr_buf_sum(ip, (unsigned int)fin->fin_hlen); 3343 while (ip_sum >> 16) 3344 ip_sum = (ip_sum & 0xFFFF) + (ip_sum >> 16); 3345 3346 ip->ip_sum = ~((uint16_t)ip_sum); 3347 DTRACE_PROBE1(l3_sum, uint16_t, ip->ip_sum); 3348 } 3349 3350 return; 3351 } 3352 3353 #endif /* _KERNEL && SOLARIS2 >= 10 */ 3354