1 /* 2 * Copyright (C) 1993-2001, 2003 by Darren Reed. 3 * 4 * See the IPFILTER.LICENCE file for details on licencing. 5 * 6 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 7 * 8 * Copyright (c) 2015, Joyent, Inc. All rights reserved. 9 */ 10 11 #if !defined(lint) 12 static const char sccsid[] = "@(#)ip_fil_solaris.c 1.7 07/22/06 (C) 1993-2000 Darren Reed"; 13 static const char rcsid[] = "@(#)$Id: ip_fil_solaris.c,v 2.62.2.19 2005/07/13 21:40:46 darrenr Exp $"; 14 #endif 15 16 #include <sys/types.h> 17 #include <sys/errno.h> 18 #include <sys/param.h> 19 #include <sys/cpuvar.h> 20 #include <sys/open.h> 21 #include <sys/ioctl.h> 22 #include <sys/filio.h> 23 #include <sys/systm.h> 24 #include <sys/strsubr.h> 25 #include <sys/cred.h> 26 #include <sys/ddi.h> 27 #include <sys/sunddi.h> 28 #include <sys/ksynch.h> 29 #include <sys/kmem.h> 30 #include <sys/mkdev.h> 31 #include <sys/protosw.h> 32 #include <sys/socket.h> 33 #include <sys/dditypes.h> 34 #include <sys/cmn_err.h> 35 #include <sys/zone.h> 36 #include <net/if.h> 37 #include <net/af.h> 38 #include <net/route.h> 39 #include <netinet/in.h> 40 #include <netinet/in_systm.h> 41 #include <netinet/ip.h> 42 #include <netinet/ip_var.h> 43 #include <netinet/tcp.h> 44 #include <netinet/udp.h> 45 #include <netinet/tcpip.h> 46 #include <netinet/ip_icmp.h> 47 #include "netinet/ip_compat.h" 48 #ifdef USE_INET6 49 # include <netinet/icmp6.h> 50 #endif 51 #include "netinet/ip_fil.h" 52 #include "netinet/ip_nat.h" 53 #include "netinet/ip_frag.h" 54 #include "netinet/ip_state.h" 55 #include "netinet/ip_auth.h" 56 #include "netinet/ip_proxy.h" 57 #include "netinet/ipf_stack.h" 58 #ifdef IPFILTER_LOOKUP 59 # include "netinet/ip_lookup.h" 60 #endif 61 #include <inet/ip_ire.h> 62 63 #include <sys/md5.h> 64 #include <sys/neti.h> 65 66 static int frzerostats __P((caddr_t, ipf_stack_t *)); 67 static int fr_setipfloopback __P((int, ipf_stack_t *)); 68 static int fr_enableipf __P((ipf_stack_t *, int)); 69 static int fr_send_ip __P((fr_info_t *fin, mblk_t *m, mblk_t **mp)); 70 static int ipf_nic_event_v4 __P((hook_event_token_t, hook_data_t, void *)); 71 static int ipf_nic_event_v6 __P((hook_event_token_t, hook_data_t, void *)); 72 static int ipf_hook __P((hook_data_t, int, int, void *)); 73 static int ipf_hook4_in __P((hook_event_token_t, hook_data_t, void *)); 74 static int ipf_hook4_out __P((hook_event_token_t, hook_data_t, void *)); 75 static int ipf_hook4_loop_out __P((hook_event_token_t, hook_data_t, 76 void *)); 77 static int ipf_hook4_loop_in __P((hook_event_token_t, hook_data_t, void *)); 78 static int ipf_hook4 __P((hook_data_t, int, int, void *)); 79 static int ipf_hook6_out __P((hook_event_token_t, hook_data_t, void *)); 80 static int ipf_hook6_in __P((hook_event_token_t, hook_data_t, void *)); 81 static int ipf_hook6_loop_out __P((hook_event_token_t, hook_data_t, 82 void *)); 83 static int ipf_hook6_loop_in __P((hook_event_token_t, hook_data_t, 84 void *)); 85 static int ipf_hook6 __P((hook_data_t, int, int, void *)); 86 extern int ipf_geniter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); 87 extern int ipf_frruleiter __P((void *, int, void *, ipf_stack_t *)); 88 89 #if SOLARIS2 < 10 90 #if SOLARIS2 >= 7 91 u_int *ip_ttl_ptr = NULL; 92 u_int *ip_mtudisc = NULL; 93 # if SOLARIS2 >= 8 94 int *ip_forwarding = NULL; 95 u_int *ip6_forwarding = NULL; 96 # else 97 u_int *ip_forwarding = NULL; 98 # endif 99 #else 100 u_long *ip_ttl_ptr = NULL; 101 u_long *ip_mtudisc = NULL; 102 u_long *ip_forwarding = NULL; 103 #endif 104 #endif 105 106 vmem_t *ipf_minor; /* minor number arena */ 107 void *ipf_state; /* DDI state */ 108 109 /* 110 * GZ-controlled and per-zone stacks: 111 * 112 * For each non-global zone, we create two ipf stacks: the per-zone stack and 113 * the GZ-controlled stack. The per-zone stack can be controlled and observed 114 * from inside the zone or from the global zone. The GZ-controlled stack can 115 * only be controlled and observed from the global zone (though the rules 116 * still only affect that non-global zone). 117 * 118 * The two hooks are always arranged so that the GZ-controlled stack is always 119 * "outermost" with respect to the zone. The traffic flow then looks like 120 * this: 121 * 122 * Inbound: 123 * 124 * nic ---> [ GZ-controlled rules ] ---> [ per-zone rules ] ---> zone 125 * 126 * Outbound: 127 * 128 * nic <--- [ GZ-controlled rules ] <--- [ per-zone rules ] <--- zone 129 */ 130 131 /* IPv4 hook names */ 132 char *hook4_nicevents = "ipfilter_hook4_nicevents"; 133 char *hook4_nicevents_gz = "ipfilter_hook4_nicevents_gz"; 134 char *hook4_in = "ipfilter_hook4_in"; 135 char *hook4_in_gz = "ipfilter_hook4_in_gz"; 136 char *hook4_out = "ipfilter_hook4_out"; 137 char *hook4_out_gz = "ipfilter_hook4_out_gz"; 138 char *hook4_loop_in = "ipfilter_hook4_loop_in"; 139 char *hook4_loop_in_gz = "ipfilter_hook4_loop_in_gz"; 140 char *hook4_loop_out = "ipfilter_hook4_loop_out"; 141 char *hook4_loop_out_gz = "ipfilter_hook4_loop_out_gz"; 142 143 /* IPv6 hook names */ 144 char *hook6_nicevents = "ipfilter_hook6_nicevents"; 145 char *hook6_nicevents_gz = "ipfilter_hook6_nicevents_gz"; 146 char *hook6_in = "ipfilter_hook6_in"; 147 char *hook6_in_gz = "ipfilter_hook6_in_gz"; 148 char *hook6_out = "ipfilter_hook6_out"; 149 char *hook6_out_gz = "ipfilter_hook6_out_gz"; 150 char *hook6_loop_in = "ipfilter_hook6_loop_in"; 151 char *hook6_loop_in_gz = "ipfilter_hook6_loop_in_gz"; 152 char *hook6_loop_out = "ipfilter_hook6_loop_out"; 153 char *hook6_loop_out_gz = "ipfilter_hook6_loop_out_gz"; 154 155 /* ------------------------------------------------------------------------ */ 156 /* Function: ipldetach */ 157 /* Returns: int - 0 == success, else error. */ 158 /* Parameters: Nil */ 159 /* */ 160 /* This function is responsible for undoing anything that might have been */ 161 /* done in a call to iplattach(). It must be able to clean up from a call */ 162 /* to iplattach() that did not succeed. Why might that happen? Someone */ 163 /* configures a table to be so large that we cannot allocate enough memory */ 164 /* for it. */ 165 /* ------------------------------------------------------------------------ */ 166 int ipldetach(ifs) 167 ipf_stack_t *ifs; 168 { 169 170 ASSERT(RW_WRITE_HELD(&ifs->ifs_ipf_global.ipf_lk)); 171 172 #if SOLARIS2 < 10 173 174 if (ifs->ifs_fr_control_forwarding & 2) { 175 if (ip_forwarding != NULL) 176 *ip_forwarding = 0; 177 #if SOLARIS2 >= 8 178 if (ip6_forwarding != NULL) 179 *ip6_forwarding = 0; 180 #endif 181 } 182 #endif 183 184 /* 185 * This lock needs to be dropped around the net_hook_unregister calls 186 * because we can deadlock here with: 187 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs 188 * R(hook_family)->R(hei_lock)->R(ipf_global) (active hook running) 189 */ 190 RWLOCK_EXIT(&ifs->ifs_ipf_global); 191 192 #define UNDO_HOOK(_f, _b, _e, _h) \ 193 do { \ 194 if (ifs->_f != NULL) { \ 195 if (ifs->_b) { \ 196 int tmp = net_hook_unregister(ifs->_f, \ 197 _e, ifs->_h); \ 198 ifs->_b = (tmp != 0 && tmp != ENXIO); \ 199 if (!ifs->_b && ifs->_h != NULL) { \ 200 hook_free(ifs->_h); \ 201 ifs->_h = NULL; \ 202 } \ 203 } else if (ifs->_h != NULL) { \ 204 hook_free(ifs->_h); \ 205 ifs->_h = NULL; \ 206 } \ 207 } \ 208 _NOTE(CONSTCOND) \ 209 } while (0) 210 211 /* 212 * Remove IPv6 Hooks 213 */ 214 if (ifs->ifs_ipf_ipv6 != NULL) { 215 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_in, 216 NH_PHYSICAL_IN, ifs_ipfhook6_in); 217 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_out, 218 NH_PHYSICAL_OUT, ifs_ipfhook6_out); 219 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_nic_events, 220 NH_NIC_EVENTS, ifs_ipfhook6_nicevents); 221 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_in, 222 NH_LOOPBACK_IN, ifs_ipfhook6_loop_in); 223 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_out, 224 NH_LOOPBACK_OUT, ifs_ipfhook6_loop_out); 225 226 if (net_protocol_release(ifs->ifs_ipf_ipv6) != 0) 227 goto detach_failed; 228 ifs->ifs_ipf_ipv6 = NULL; 229 } 230 231 /* 232 * Remove IPv4 Hooks 233 */ 234 if (ifs->ifs_ipf_ipv4 != NULL) { 235 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_in, 236 NH_PHYSICAL_IN, ifs_ipfhook4_in); 237 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_out, 238 NH_PHYSICAL_OUT, ifs_ipfhook4_out); 239 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_nic_events, 240 NH_NIC_EVENTS, ifs_ipfhook4_nicevents); 241 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_in, 242 NH_LOOPBACK_IN, ifs_ipfhook4_loop_in); 243 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_out, 244 NH_LOOPBACK_OUT, ifs_ipfhook4_loop_out); 245 246 if (net_protocol_release(ifs->ifs_ipf_ipv4) != 0) 247 goto detach_failed; 248 ifs->ifs_ipf_ipv4 = NULL; 249 } 250 251 #undef UNDO_HOOK 252 253 #ifdef IPFDEBUG 254 cmn_err(CE_CONT, "ipldetach()\n"); 255 #endif 256 257 WRITE_ENTER(&ifs->ifs_ipf_global); 258 fr_deinitialise(ifs); 259 260 (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE, ifs); 261 (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE, ifs); 262 263 if (ifs->ifs_ipf_locks_done == 1) { 264 MUTEX_DESTROY(&ifs->ifs_ipf_timeoutlock); 265 MUTEX_DESTROY(&ifs->ifs_ipf_rw); 266 RW_DESTROY(&ifs->ifs_ipf_tokens); 267 RW_DESTROY(&ifs->ifs_ipf_ipidfrag); 268 ifs->ifs_ipf_locks_done = 0; 269 } 270 271 if (ifs->ifs_hook4_physical_in || ifs->ifs_hook4_physical_out || 272 ifs->ifs_hook4_nic_events || ifs->ifs_hook4_loopback_in || 273 ifs->ifs_hook4_loopback_out || ifs->ifs_hook6_nic_events || 274 ifs->ifs_hook6_physical_in || ifs->ifs_hook6_physical_out || 275 ifs->ifs_hook6_loopback_in || ifs->ifs_hook6_loopback_out) 276 return -1; 277 278 return 0; 279 280 detach_failed: 281 WRITE_ENTER(&ifs->ifs_ipf_global); 282 return -1; 283 } 284 285 int iplattach(ifs) 286 ipf_stack_t *ifs; 287 { 288 #if SOLARIS2 < 10 289 int i; 290 #endif 291 netid_t id = ifs->ifs_netid; 292 293 #ifdef IPFDEBUG 294 cmn_err(CE_CONT, "iplattach()\n"); 295 #endif 296 297 ASSERT(RW_WRITE_HELD(&ifs->ifs_ipf_global.ipf_lk)); 298 ifs->ifs_fr_flags = IPF_LOGGING; 299 #ifdef _KERNEL 300 ifs->ifs_fr_update_ipid = 0; 301 #else 302 ifs->ifs_fr_update_ipid = 1; 303 #endif 304 ifs->ifs_fr_minttl = 4; 305 ifs->ifs_fr_icmpminfragmtu = 68; 306 #if defined(IPFILTER_DEFAULT_BLOCK) 307 ifs->ifs_fr_pass = FR_BLOCK|FR_NOMATCH; 308 #else 309 ifs->ifs_fr_pass = (IPF_DEFAULT_PASS)|FR_NOMATCH; 310 #endif 311 312 bzero((char *)ifs->ifs_frcache, sizeof(ifs->ifs_frcache)); 313 MUTEX_INIT(&ifs->ifs_ipf_rw, "ipf rw mutex"); 314 MUTEX_INIT(&ifs->ifs_ipf_timeoutlock, "ipf timeout lock mutex"); 315 RWLOCK_INIT(&ifs->ifs_ipf_ipidfrag, "ipf IP NAT-Frag rwlock"); 316 RWLOCK_INIT(&ifs->ifs_ipf_tokens, "ipf token rwlock"); 317 ifs->ifs_ipf_locks_done = 1; 318 319 if (fr_initialise(ifs) < 0) 320 return -1; 321 322 /* 323 * For incoming packets, we want the GZ-controlled hooks to run before 324 * the per-zone hooks, regardless of what order they're are installed. 325 * See the "GZ-controlled and per-zone stacks" comment block at the top 326 * of this file. 327 */ 328 #define HOOK_INIT_GZ_BEFORE(x, fn, n, gzn, a) \ 329 HOOK_INIT(x, fn, ifs->ifs_gz_controlled ? gzn : n, ifs); \ 330 (x)->h_hint = ifs->ifs_gz_controlled ? HH_BEFORE : HH_AFTER; \ 331 (x)->h_hintvalue = (uintptr_t) (ifs->ifs_gz_controlled ? n : gzn); 332 333 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_nicevents, ipf_nic_event_v4, 334 hook4_nicevents, hook4_nicevents_gz, ifs); 335 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_in, ipf_hook4_in, 336 hook4_in, hook4_in_gz, ifs); 337 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_loop_in, ipf_hook4_loop_in, 338 hook4_loop_in, hook4_loop_in_gz, ifs); 339 340 /* 341 * For outgoing packets, we want the GZ-controlled hooks to run after 342 * the per-zone hooks, regardless of what order they're are installed. 343 * See the "GZ-controlled and per-zone stacks" comment block at the top 344 * of this file. 345 */ 346 #define HOOK_INIT_GZ_AFTER(x, fn, n, gzn, a) \ 347 HOOK_INIT(x, fn, ifs->ifs_gz_controlled ? gzn : n, ifs); \ 348 (x)->h_hint = ifs->ifs_gz_controlled ? HH_AFTER : HH_BEFORE; \ 349 (x)->h_hintvalue = (uintptr_t) (ifs->ifs_gz_controlled ? n : gzn); 350 351 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook4_out, ipf_hook4_out, 352 hook4_out, hook4_out_gz, ifs); 353 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook4_loop_out, ipf_hook4_loop_out, 354 hook4_loop_out, hook4_loop_out_gz, ifs); 355 356 /* 357 * If we hold this lock over all of the net_hook_register calls, we 358 * can cause a deadlock to occur with the following lock ordering: 359 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs 360 * R(hook_family)->R(hei_lock)->R(ipf_global) (packet path) 361 */ 362 RWLOCK_EXIT(&ifs->ifs_ipf_global); 363 364 /* 365 * Add IPv4 hooks 366 */ 367 ifs->ifs_ipf_ipv4 = net_protocol_lookup(id, NHF_INET); 368 if (ifs->ifs_ipf_ipv4 == NULL) 369 goto hookup_failed; 370 371 ifs->ifs_hook4_nic_events = (net_hook_register(ifs->ifs_ipf_ipv4, 372 NH_NIC_EVENTS, ifs->ifs_ipfhook4_nicevents) == 0); 373 if (!ifs->ifs_hook4_nic_events) 374 goto hookup_failed; 375 376 ifs->ifs_hook4_physical_in = (net_hook_register(ifs->ifs_ipf_ipv4, 377 NH_PHYSICAL_IN, ifs->ifs_ipfhook4_in) == 0); 378 if (!ifs->ifs_hook4_physical_in) 379 goto hookup_failed; 380 381 ifs->ifs_hook4_physical_out = (net_hook_register(ifs->ifs_ipf_ipv4, 382 NH_PHYSICAL_OUT, ifs->ifs_ipfhook4_out) == 0); 383 if (!ifs->ifs_hook4_physical_out) 384 goto hookup_failed; 385 386 if (ifs->ifs_ipf_loopback) { 387 ifs->ifs_hook4_loopback_in = (net_hook_register( 388 ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN, 389 ifs->ifs_ipfhook4_loop_in) == 0); 390 if (!ifs->ifs_hook4_loopback_in) 391 goto hookup_failed; 392 393 ifs->ifs_hook4_loopback_out = (net_hook_register( 394 ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT, 395 ifs->ifs_ipfhook4_loop_out) == 0); 396 if (!ifs->ifs_hook4_loopback_out) 397 goto hookup_failed; 398 } 399 400 /* 401 * Add IPv6 hooks 402 */ 403 ifs->ifs_ipf_ipv6 = net_protocol_lookup(id, NHF_INET6); 404 if (ifs->ifs_ipf_ipv6 == NULL) 405 goto hookup_failed; 406 407 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_nicevents, ipf_nic_event_v6, 408 hook6_nicevents, hook6_nicevents_gz, ifs); 409 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_in, ipf_hook6_in, 410 hook6_in, hook6_in_gz, ifs); 411 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_loop_in, ipf_hook6_loop_in, 412 hook6_loop_in, hook6_loop_in_gz, ifs); 413 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook6_out, ipf_hook6_out, 414 hook6_out, hook6_out_gz, ifs); 415 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook6_loop_out, ipf_hook6_loop_out, 416 hook6_loop_out, hook6_loop_out_gz, ifs); 417 418 ifs->ifs_hook6_nic_events = (net_hook_register(ifs->ifs_ipf_ipv6, 419 NH_NIC_EVENTS, ifs->ifs_ipfhook6_nicevents) == 0); 420 if (!ifs->ifs_hook6_nic_events) 421 goto hookup_failed; 422 423 ifs->ifs_hook6_physical_in = (net_hook_register(ifs->ifs_ipf_ipv6, 424 NH_PHYSICAL_IN, ifs->ifs_ipfhook6_in) == 0); 425 if (!ifs->ifs_hook6_physical_in) 426 goto hookup_failed; 427 428 ifs->ifs_hook6_physical_out = (net_hook_register(ifs->ifs_ipf_ipv6, 429 NH_PHYSICAL_OUT, ifs->ifs_ipfhook6_out) == 0); 430 if (!ifs->ifs_hook6_physical_out) 431 goto hookup_failed; 432 433 if (ifs->ifs_ipf_loopback) { 434 ifs->ifs_hook6_loopback_in = (net_hook_register( 435 ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN, 436 ifs->ifs_ipfhook6_loop_in) == 0); 437 if (!ifs->ifs_hook6_loopback_in) 438 goto hookup_failed; 439 440 ifs->ifs_hook6_loopback_out = (net_hook_register( 441 ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT, 442 ifs->ifs_ipfhook6_loop_out) == 0); 443 if (!ifs->ifs_hook6_loopback_out) 444 goto hookup_failed; 445 } 446 447 /* 448 * Reacquire ipf_global, now it is safe. 449 */ 450 WRITE_ENTER(&ifs->ifs_ipf_global); 451 452 /* Do not use private interface ip_params_arr[] in Solaris 10 */ 453 #if SOLARIS2 < 10 454 455 #if SOLARIS2 >= 8 456 ip_forwarding = &ip_g_forward; 457 #endif 458 /* 459 * XXX - There is no terminator for this array, so it is not possible 460 * to tell if what we are looking for is missing and go off the end 461 * of the array. 462 */ 463 464 #if SOLARIS2 <= 8 465 for (i = 0; ; i++) { 466 if (!strcmp(ip_param_arr[i].ip_param_name, "ip_def_ttl")) { 467 ip_ttl_ptr = &ip_param_arr[i].ip_param_value; 468 } else if (!strcmp(ip_param_arr[i].ip_param_name, 469 "ip_path_mtu_discovery")) { 470 ip_mtudisc = &ip_param_arr[i].ip_param_value; 471 } 472 #if SOLARIS2 < 8 473 else if (!strcmp(ip_param_arr[i].ip_param_name, 474 "ip_forwarding")) { 475 ip_forwarding = &ip_param_arr[i].ip_param_value; 476 } 477 #else 478 else if (!strcmp(ip_param_arr[i].ip_param_name, 479 "ip6_forwarding")) { 480 ip6_forwarding = &ip_param_arr[i].ip_param_value; 481 } 482 #endif 483 484 if (ip_mtudisc != NULL && ip_ttl_ptr != NULL && 485 #if SOLARIS2 >= 8 486 ip6_forwarding != NULL && 487 #endif 488 ip_forwarding != NULL) 489 break; 490 } 491 #endif 492 493 if (ifs->ifs_fr_control_forwarding & 1) { 494 if (ip_forwarding != NULL) 495 *ip_forwarding = 1; 496 #if SOLARIS2 >= 8 497 if (ip6_forwarding != NULL) 498 *ip6_forwarding = 1; 499 #endif 500 } 501 502 #endif 503 504 return 0; 505 hookup_failed: 506 WRITE_ENTER(&ifs->ifs_ipf_global); 507 return -1; 508 } 509 510 static int fr_setipfloopback(set, ifs) 511 int set; 512 ipf_stack_t *ifs; 513 { 514 if (ifs->ifs_ipf_ipv4 == NULL || ifs->ifs_ipf_ipv6 == NULL) 515 return EFAULT; 516 517 if (set && !ifs->ifs_ipf_loopback) { 518 ifs->ifs_ipf_loopback = 1; 519 520 ifs->ifs_hook4_loopback_in = (net_hook_register( 521 ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN, 522 ifs->ifs_ipfhook4_loop_in) == 0); 523 if (!ifs->ifs_hook4_loopback_in) 524 return EINVAL; 525 526 ifs->ifs_hook4_loopback_out = (net_hook_register( 527 ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT, 528 ifs->ifs_ipfhook4_loop_out) == 0); 529 if (!ifs->ifs_hook4_loopback_out) 530 return EINVAL; 531 532 ifs->ifs_hook6_loopback_in = (net_hook_register( 533 ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN, 534 ifs->ifs_ipfhook6_loop_in) == 0); 535 if (!ifs->ifs_hook6_loopback_in) 536 return EINVAL; 537 538 ifs->ifs_hook6_loopback_out = (net_hook_register( 539 ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT, 540 ifs->ifs_ipfhook6_loop_out) == 0); 541 if (!ifs->ifs_hook6_loopback_out) 542 return EINVAL; 543 544 } else if (!set && ifs->ifs_ipf_loopback) { 545 ifs->ifs_ipf_loopback = 0; 546 547 ifs->ifs_hook4_loopback_in = 548 (net_hook_unregister(ifs->ifs_ipf_ipv4, 549 NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0); 550 if (ifs->ifs_hook4_loopback_in) 551 return EBUSY; 552 553 ifs->ifs_hook4_loopback_out = 554 (net_hook_unregister(ifs->ifs_ipf_ipv4, 555 NH_LOOPBACK_OUT, ifs->ifs_ipfhook4_loop_out) != 0); 556 if (ifs->ifs_hook4_loopback_out) 557 return EBUSY; 558 559 ifs->ifs_hook6_loopback_in = 560 (net_hook_unregister(ifs->ifs_ipf_ipv6, 561 NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0); 562 if (ifs->ifs_hook6_loopback_in) 563 return EBUSY; 564 565 ifs->ifs_hook6_loopback_out = 566 (net_hook_unregister(ifs->ifs_ipf_ipv6, 567 NH_LOOPBACK_OUT, ifs->ifs_ipfhook6_loop_out) != 0); 568 if (ifs->ifs_hook6_loopback_out) 569 return EBUSY; 570 } 571 return 0; 572 } 573 574 575 /* 576 * Filter ioctl interface. 577 */ 578 /*ARGSUSED*/ 579 int iplioctl(dev, cmd, data, mode, cp, rp) 580 dev_t dev; 581 int cmd; 582 #if SOLARIS2 >= 7 583 intptr_t data; 584 #else 585 int *data; 586 #endif 587 int mode; 588 cred_t *cp; 589 int *rp; 590 { 591 int error = 0, tmp; 592 friostat_t fio; 593 minor_t unit; 594 u_int enable; 595 ipf_stack_t *ifs; 596 zoneid_t zid; 597 ipf_devstate_t *isp; 598 599 #ifdef IPFDEBUG 600 cmn_err(CE_CONT, "iplioctl(%x,%x,%x,%d,%x,%d)\n", 601 dev, cmd, data, mode, cp, rp); 602 #endif 603 unit = getminor(dev); 604 605 isp = ddi_get_soft_state(ipf_state, unit); 606 if (isp == NULL) 607 return ENXIO; 608 unit = isp->ipfs_minor; 609 610 zid = crgetzoneid(cp); 611 if (cmd == SIOCIPFZONESET) { 612 if (zid == GLOBAL_ZONEID) 613 return fr_setzoneid(isp, (caddr_t) data); 614 return EACCES; 615 } 616 617 /* 618 * ipf_find_stack returns with a read lock on ifs_ipf_global 619 */ 620 ifs = ipf_find_stack(zid, isp); 621 if (ifs == NULL) 622 return ENXIO; 623 624 if (ifs->ifs_fr_running <= 0) { 625 if (unit != IPL_LOGIPF) { 626 RWLOCK_EXIT(&ifs->ifs_ipf_global); 627 return EIO; 628 } 629 if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET && 630 cmd != SIOCIPFSET && cmd != SIOCFRENB && 631 cmd != SIOCGETFS && cmd != SIOCGETFF) { 632 RWLOCK_EXIT(&ifs->ifs_ipf_global); 633 return EIO; 634 } 635 } 636 637 if (ifs->ifs_fr_enable_active != 0) { 638 RWLOCK_EXIT(&ifs->ifs_ipf_global); 639 return EBUSY; 640 } 641 642 error = fr_ioctlswitch(unit, (caddr_t)data, cmd, mode, crgetuid(cp), 643 curproc, ifs); 644 if (error != -1) { 645 RWLOCK_EXIT(&ifs->ifs_ipf_global); 646 return error; 647 } 648 error = 0; 649 650 switch (cmd) 651 { 652 case SIOCFRENB : 653 if (!(mode & FWRITE)) 654 error = EPERM; 655 else { 656 error = COPYIN((caddr_t)data, (caddr_t)&enable, 657 sizeof(enable)); 658 if (error != 0) { 659 error = EFAULT; 660 break; 661 } 662 663 RWLOCK_EXIT(&ifs->ifs_ipf_global); 664 WRITE_ENTER(&ifs->ifs_ipf_global); 665 666 /* 667 * We must recheck fr_enable_active here, since we've 668 * dropped ifs_ipf_global from R in order to get it 669 * exclusively. 670 */ 671 if (ifs->ifs_fr_enable_active == 0) { 672 ifs->ifs_fr_enable_active = 1; 673 error = fr_enableipf(ifs, enable); 674 ifs->ifs_fr_enable_active = 0; 675 } 676 } 677 break; 678 case SIOCIPFSET : 679 if (!(mode & FWRITE)) { 680 error = EPERM; 681 break; 682 } 683 /* FALLTHRU */ 684 case SIOCIPFGETNEXT : 685 case SIOCIPFGET : 686 error = fr_ipftune(cmd, (void *)data, ifs); 687 break; 688 case SIOCSETFF : 689 if (!(mode & FWRITE)) 690 error = EPERM; 691 else { 692 error = COPYIN((caddr_t)data, 693 (caddr_t)&ifs->ifs_fr_flags, 694 sizeof(ifs->ifs_fr_flags)); 695 if (error != 0) 696 error = EFAULT; 697 } 698 break; 699 case SIOCIPFLP : 700 error = COPYIN((caddr_t)data, (caddr_t)&tmp, 701 sizeof(tmp)); 702 if (error != 0) 703 error = EFAULT; 704 else 705 error = fr_setipfloopback(tmp, ifs); 706 break; 707 case SIOCGETFF : 708 error = COPYOUT((caddr_t)&ifs->ifs_fr_flags, (caddr_t)data, 709 sizeof(ifs->ifs_fr_flags)); 710 if (error != 0) 711 error = EFAULT; 712 break; 713 case SIOCFUNCL : 714 error = fr_resolvefunc((void *)data); 715 break; 716 case SIOCINAFR : 717 case SIOCRMAFR : 718 case SIOCADAFR : 719 case SIOCZRLST : 720 if (!(mode & FWRITE)) 721 error = EPERM; 722 else 723 error = frrequest(unit, cmd, (caddr_t)data, 724 ifs->ifs_fr_active, 1, ifs); 725 break; 726 case SIOCINIFR : 727 case SIOCRMIFR : 728 case SIOCADIFR : 729 if (!(mode & FWRITE)) 730 error = EPERM; 731 else 732 error = frrequest(unit, cmd, (caddr_t)data, 733 1 - ifs->ifs_fr_active, 1, ifs); 734 break; 735 case SIOCSWAPA : 736 if (!(mode & FWRITE)) 737 error = EPERM; 738 else { 739 WRITE_ENTER(&ifs->ifs_ipf_mutex); 740 bzero((char *)ifs->ifs_frcache, 741 sizeof (ifs->ifs_frcache)); 742 error = COPYOUT((caddr_t)&ifs->ifs_fr_active, 743 (caddr_t)data, 744 sizeof(ifs->ifs_fr_active)); 745 if (error != 0) 746 error = EFAULT; 747 else 748 ifs->ifs_fr_active = 1 - ifs->ifs_fr_active; 749 RWLOCK_EXIT(&ifs->ifs_ipf_mutex); 750 } 751 break; 752 case SIOCGETFS : 753 fr_getstat(&fio, ifs); 754 error = fr_outobj((void *)data, &fio, IPFOBJ_IPFSTAT); 755 break; 756 case SIOCFRZST : 757 if (!(mode & FWRITE)) 758 error = EPERM; 759 else 760 error = fr_zerostats((caddr_t)data, ifs); 761 break; 762 case SIOCIPFFL : 763 if (!(mode & FWRITE)) 764 error = EPERM; 765 else { 766 error = COPYIN((caddr_t)data, (caddr_t)&tmp, 767 sizeof(tmp)); 768 if (!error) { 769 tmp = frflush(unit, 4, tmp, ifs); 770 error = COPYOUT((caddr_t)&tmp, (caddr_t)data, 771 sizeof(tmp)); 772 if (error != 0) 773 error = EFAULT; 774 } else 775 error = EFAULT; 776 } 777 break; 778 #ifdef USE_INET6 779 case SIOCIPFL6 : 780 if (!(mode & FWRITE)) 781 error = EPERM; 782 else { 783 error = COPYIN((caddr_t)data, (caddr_t)&tmp, 784 sizeof(tmp)); 785 if (!error) { 786 tmp = frflush(unit, 6, tmp, ifs); 787 error = COPYOUT((caddr_t)&tmp, (caddr_t)data, 788 sizeof(tmp)); 789 if (error != 0) 790 error = EFAULT; 791 } else 792 error = EFAULT; 793 } 794 break; 795 #endif 796 case SIOCSTLCK : 797 error = COPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp)); 798 if (error == 0) { 799 ifs->ifs_fr_state_lock = tmp; 800 ifs->ifs_fr_nat_lock = tmp; 801 ifs->ifs_fr_frag_lock = tmp; 802 ifs->ifs_fr_auth_lock = tmp; 803 } else 804 error = EFAULT; 805 break; 806 #ifdef IPFILTER_LOG 807 case SIOCIPFFB : 808 if (!(mode & FWRITE)) 809 error = EPERM; 810 else { 811 tmp = ipflog_clear(unit, ifs); 812 error = COPYOUT((caddr_t)&tmp, (caddr_t)data, 813 sizeof(tmp)); 814 if (error) 815 error = EFAULT; 816 } 817 break; 818 #endif /* IPFILTER_LOG */ 819 case SIOCFRSYN : 820 if (!(mode & FWRITE)) 821 error = EPERM; 822 else { 823 RWLOCK_EXIT(&ifs->ifs_ipf_global); 824 WRITE_ENTER(&ifs->ifs_ipf_global); 825 826 frsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs); 827 fr_natifpsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs); 828 fr_nataddrsync(0, NULL, NULL, ifs); 829 fr_statesync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs); 830 error = 0; 831 } 832 break; 833 case SIOCGFRST : 834 error = fr_outobj((void *)data, fr_fragstats(ifs), 835 IPFOBJ_FRAGSTAT); 836 break; 837 case FIONREAD : 838 #ifdef IPFILTER_LOG 839 tmp = (int)ifs->ifs_iplused[IPL_LOGIPF]; 840 841 error = COPYOUT((caddr_t)&tmp, (caddr_t)data, sizeof(tmp)); 842 if (error != 0) 843 error = EFAULT; 844 #endif 845 break; 846 case SIOCIPFITER : 847 error = ipf_frruleiter((caddr_t)data, crgetuid(cp), 848 curproc, ifs); 849 break; 850 851 case SIOCGENITER : 852 error = ipf_genericiter((caddr_t)data, crgetuid(cp), 853 curproc, ifs); 854 break; 855 856 case SIOCIPFDELTOK : 857 error = BCOPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp)); 858 if (error != 0) { 859 error = EFAULT; 860 } else { 861 error = ipf_deltoken(tmp, crgetuid(cp), curproc, ifs); 862 } 863 break; 864 865 default : 866 #ifdef IPFDEBUG 867 cmn_err(CE_NOTE, "Unknown: cmd 0x%x data %p", 868 cmd, (void *)data); 869 #endif 870 error = EINVAL; 871 break; 872 } 873 RWLOCK_EXIT(&ifs->ifs_ipf_global); 874 return error; 875 } 876 877 878 static int fr_enableipf(ifs, enable) 879 ipf_stack_t *ifs; 880 int enable; 881 { 882 int error; 883 884 if (!enable) { 885 error = ipldetach(ifs); 886 if (error == 0) 887 ifs->ifs_fr_running = -1; 888 return error; 889 } 890 891 if (ifs->ifs_fr_running > 0) 892 return 0; 893 894 error = iplattach(ifs); 895 if (error == 0) { 896 if (ifs->ifs_fr_timer_id == NULL) { 897 int hz = drv_usectohz(500000); 898 899 ifs->ifs_fr_timer_id = timeout(fr_slowtimer, 900 (void *)ifs, 901 hz); 902 } 903 ifs->ifs_fr_running = 1; 904 } else { 905 (void) ipldetach(ifs); 906 } 907 return error; 908 } 909 910 911 phy_if_t get_unit(name, v, ifs) 912 char *name; 913 int v; 914 ipf_stack_t *ifs; 915 { 916 net_handle_t nif; 917 918 if (v == 4) 919 nif = ifs->ifs_ipf_ipv4; 920 else if (v == 6) 921 nif = ifs->ifs_ipf_ipv6; 922 else 923 return 0; 924 925 return (net_phylookup(nif, name)); 926 } 927 928 /* 929 * routines below for saving IP headers to buffer 930 */ 931 /*ARGSUSED*/ 932 int iplopen(devp, flags, otype, cred) 933 dev_t *devp; 934 int flags, otype; 935 cred_t *cred; 936 { 937 ipf_devstate_t *isp; 938 minor_t min = getminor(*devp); 939 minor_t minor; 940 941 #ifdef IPFDEBUG 942 cmn_err(CE_CONT, "iplopen(%x,%x,%x,%x)\n", devp, flags, otype, cred); 943 #endif 944 if (!(otype & OTYP_CHR)) 945 return ENXIO; 946 947 if (IPL_LOGMAX < min) 948 return ENXIO; 949 950 minor = (minor_t)(uintptr_t)vmem_alloc(ipf_minor, 1, 951 VM_BESTFIT | VM_SLEEP); 952 953 if (ddi_soft_state_zalloc(ipf_state, minor) != 0) { 954 vmem_free(ipf_minor, (void *)(uintptr_t)minor, 1); 955 return ENXIO; 956 } 957 958 *devp = makedevice(getmajor(*devp), minor); 959 isp = ddi_get_soft_state(ipf_state, minor); 960 VERIFY(isp != NULL); 961 962 isp->ipfs_minor = min; 963 isp->ipfs_zoneid = IPFS_ZONE_UNSET; 964 965 return 0; 966 } 967 968 969 /*ARGSUSED*/ 970 int iplclose(dev, flags, otype, cred) 971 dev_t dev; 972 int flags, otype; 973 cred_t *cred; 974 { 975 minor_t min = getminor(dev); 976 977 #ifdef IPFDEBUG 978 cmn_err(CE_CONT, "iplclose(%x,%x,%x,%x)\n", dev, flags, otype, cred); 979 #endif 980 981 if (IPL_LOGMAX < min) 982 return ENXIO; 983 984 ddi_soft_state_free(ipf_state, min); 985 vmem_free(ipf_minor, (void *)(uintptr_t)min, 1); 986 987 return 0; 988 } 989 990 #ifdef IPFILTER_LOG 991 /* 992 * iplread/ipllog 993 * both of these must operate with at least splnet() lest they be 994 * called during packet processing and cause an inconsistancy to appear in 995 * the filter lists. 996 */ 997 /*ARGSUSED*/ 998 int iplread(dev, uio, cp) 999 dev_t dev; 1000 register struct uio *uio; 1001 cred_t *cp; 1002 { 1003 ipf_stack_t *ifs; 1004 int ret; 1005 minor_t unit; 1006 ipf_devstate_t *isp; 1007 1008 unit = getminor(dev); 1009 isp = ddi_get_soft_state(ipf_state, unit); 1010 if (isp == NULL) 1011 return ENXIO; 1012 unit = isp->ipfs_minor; 1013 1014 1015 /* 1016 * ipf_find_stack returns with a read lock on ifs_ipf_global 1017 */ 1018 ifs = ipf_find_stack(crgetzoneid(cp), isp); 1019 if (ifs == NULL) 1020 return ENXIO; 1021 1022 # ifdef IPFDEBUG 1023 cmn_err(CE_CONT, "iplread(%x,%x,%x)\n", dev, uio, cp); 1024 # endif 1025 1026 if (ifs->ifs_fr_running < 1) { 1027 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1028 return EIO; 1029 } 1030 1031 # ifdef IPFILTER_SYNC 1032 if (unit == IPL_LOGSYNC) { 1033 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1034 return ipfsync_read(uio); 1035 } 1036 # endif 1037 1038 ret = ipflog_read(unit, uio, ifs); 1039 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1040 return ret; 1041 } 1042 #endif /* IPFILTER_LOG */ 1043 1044 1045 /* 1046 * iplread/ipllog 1047 * both of these must operate with at least splnet() lest they be 1048 * called during packet processing and cause an inconsistancy to appear in 1049 * the filter lists. 1050 */ 1051 int iplwrite(dev, uio, cp) 1052 dev_t dev; 1053 register struct uio *uio; 1054 cred_t *cp; 1055 { 1056 ipf_stack_t *ifs; 1057 minor_t unit; 1058 ipf_devstate_t *isp; 1059 1060 unit = getminor(dev); 1061 isp = ddi_get_soft_state(ipf_state, unit); 1062 if (isp == NULL) 1063 return ENXIO; 1064 unit = isp->ipfs_minor; 1065 1066 /* 1067 * ipf_find_stack returns with a read lock on ifs_ipf_global 1068 */ 1069 ifs = ipf_find_stack(crgetzoneid(cp), isp); 1070 if (ifs == NULL) 1071 return ENXIO; 1072 1073 #ifdef IPFDEBUG 1074 cmn_err(CE_CONT, "iplwrite(%x,%x,%x)\n", dev, uio, cp); 1075 #endif 1076 1077 if (ifs->ifs_fr_running < 1) { 1078 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1079 return EIO; 1080 } 1081 1082 #ifdef IPFILTER_SYNC 1083 if (getminor(dev) == IPL_LOGSYNC) { 1084 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1085 return ipfsync_write(uio); 1086 } 1087 #endif /* IPFILTER_SYNC */ 1088 dev = dev; /* LINT */ 1089 uio = uio; /* LINT */ 1090 cp = cp; /* LINT */ 1091 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1092 return ENXIO; 1093 } 1094 1095 1096 /* 1097 * fr_send_reset - this could conceivably be a call to tcp_respond(), but that 1098 * requires a large amount of setting up and isn't any more efficient. 1099 */ 1100 int fr_send_reset(fin) 1101 fr_info_t *fin; 1102 { 1103 tcphdr_t *tcp, *tcp2; 1104 int tlen, hlen; 1105 mblk_t *m; 1106 #ifdef USE_INET6 1107 ip6_t *ip6; 1108 #endif 1109 ip_t *ip; 1110 1111 tcp = fin->fin_dp; 1112 if (tcp->th_flags & TH_RST) 1113 return -1; 1114 1115 #ifndef IPFILTER_CKSUM 1116 if (fr_checkl4sum(fin) == -1) 1117 return -1; 1118 #endif 1119 1120 tlen = (tcp->th_flags & (TH_SYN|TH_FIN)) ? 1 : 0; 1121 #ifdef USE_INET6 1122 if (fin->fin_v == 6) 1123 hlen = sizeof(ip6_t); 1124 else 1125 #endif 1126 hlen = sizeof(ip_t); 1127 hlen += sizeof(*tcp2); 1128 if ((m = (mblk_t *)allocb(hlen + 64, BPRI_HI)) == NULL) 1129 return -1; 1130 1131 m->b_rptr += 64; 1132 MTYPE(m) = M_DATA; 1133 m->b_wptr = m->b_rptr + hlen; 1134 ip = (ip_t *)m->b_rptr; 1135 bzero((char *)ip, hlen); 1136 tcp2 = (struct tcphdr *)(m->b_rptr + hlen - sizeof(*tcp2)); 1137 tcp2->th_dport = tcp->th_sport; 1138 tcp2->th_sport = tcp->th_dport; 1139 if (tcp->th_flags & TH_ACK) { 1140 tcp2->th_seq = tcp->th_ack; 1141 tcp2->th_flags = TH_RST; 1142 } else { 1143 tcp2->th_ack = ntohl(tcp->th_seq); 1144 tcp2->th_ack += tlen; 1145 tcp2->th_ack = htonl(tcp2->th_ack); 1146 tcp2->th_flags = TH_RST|TH_ACK; 1147 } 1148 tcp2->th_off = sizeof(struct tcphdr) >> 2; 1149 1150 ip->ip_v = fin->fin_v; 1151 #ifdef USE_INET6 1152 if (fin->fin_v == 6) { 1153 ip6 = (ip6_t *)m->b_rptr; 1154 ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow; 1155 ip6->ip6_src = fin->fin_dst6.in6; 1156 ip6->ip6_dst = fin->fin_src6.in6; 1157 ip6->ip6_plen = htons(sizeof(*tcp)); 1158 ip6->ip6_nxt = IPPROTO_TCP; 1159 tcp2->th_sum = fr_cksum(m, (ip_t *)ip6, IPPROTO_TCP, tcp2); 1160 } else 1161 #endif 1162 { 1163 ip->ip_src.s_addr = fin->fin_daddr; 1164 ip->ip_dst.s_addr = fin->fin_saddr; 1165 ip->ip_id = fr_nextipid(fin); 1166 ip->ip_hl = sizeof(*ip) >> 2; 1167 ip->ip_p = IPPROTO_TCP; 1168 ip->ip_len = sizeof(*ip) + sizeof(*tcp); 1169 ip->ip_tos = fin->fin_ip->ip_tos; 1170 tcp2->th_sum = fr_cksum(m, ip, IPPROTO_TCP, tcp2); 1171 } 1172 return fr_send_ip(fin, m, &m); 1173 } 1174 1175 /* 1176 * Function: fr_send_ip 1177 * Returns: 0: success 1178 * -1: failed 1179 * Parameters: 1180 * fin: packet information 1181 * m: the message block where ip head starts 1182 * 1183 * Send a new packet through the IP stack. 1184 * 1185 * For IPv4 packets, ip_len must be in host byte order, and ip_v, 1186 * ip_ttl, ip_off, and ip_sum are ignored (filled in by this 1187 * function). 1188 * 1189 * For IPv6 packets, ip6_flow, ip6_vfc, and ip6_hlim are filled 1190 * in by this function. 1191 * 1192 * All other portions of the packet must be in on-the-wire format. 1193 */ 1194 /*ARGSUSED*/ 1195 static int fr_send_ip(fin, m, mpp) 1196 fr_info_t *fin; 1197 mblk_t *m, **mpp; 1198 { 1199 qpktinfo_t qpi, *qpip; 1200 fr_info_t fnew; 1201 ip_t *ip; 1202 int i, hlen; 1203 ipf_stack_t *ifs = fin->fin_ifs; 1204 1205 ip = (ip_t *)m->b_rptr; 1206 bzero((char *)&fnew, sizeof(fnew)); 1207 1208 #ifdef USE_INET6 1209 if (fin->fin_v == 6) { 1210 ip6_t *ip6; 1211 1212 ip6 = (ip6_t *)ip; 1213 ip6->ip6_vfc = 0x60; 1214 ip6->ip6_hlim = 127; 1215 fnew.fin_v = 6; 1216 hlen = sizeof(*ip6); 1217 fnew.fin_plen = ntohs(ip6->ip6_plen) + hlen; 1218 } else 1219 #endif 1220 { 1221 fnew.fin_v = 4; 1222 #if SOLARIS2 >= 10 1223 ip->ip_ttl = 255; 1224 if (net_getpmtuenabled(ifs->ifs_ipf_ipv4) == 1) 1225 ip->ip_off = htons(IP_DF); 1226 #else 1227 if (ip_ttl_ptr != NULL) 1228 ip->ip_ttl = (u_char)(*ip_ttl_ptr); 1229 else 1230 ip->ip_ttl = 63; 1231 if (ip_mtudisc != NULL) 1232 ip->ip_off = htons(*ip_mtudisc ? IP_DF : 0); 1233 else 1234 ip->ip_off = htons(IP_DF); 1235 #endif 1236 /* 1237 * The dance with byte order and ip_len/ip_off is because in 1238 * fr_fastroute, it expects them to be in host byte order but 1239 * ipf_cksum expects them to be in network byte order. 1240 */ 1241 ip->ip_len = htons(ip->ip_len); 1242 ip->ip_sum = ipf_cksum((u_short *)ip, sizeof(*ip)); 1243 ip->ip_len = ntohs(ip->ip_len); 1244 ip->ip_off = ntohs(ip->ip_off); 1245 hlen = sizeof(*ip); 1246 fnew.fin_plen = ip->ip_len; 1247 } 1248 1249 qpip = fin->fin_qpi; 1250 qpi.qpi_off = 0; 1251 qpi.qpi_ill = qpip->qpi_ill; 1252 qpi.qpi_m = m; 1253 qpi.qpi_data = ip; 1254 fnew.fin_qpi = &qpi; 1255 fnew.fin_ifp = fin->fin_ifp; 1256 fnew.fin_flx = FI_NOCKSUM; 1257 fnew.fin_m = m; 1258 fnew.fin_qfm = m; 1259 fnew.fin_ip = ip; 1260 fnew.fin_mp = mpp; 1261 fnew.fin_hlen = hlen; 1262 fnew.fin_dp = (char *)ip + hlen; 1263 fnew.fin_ifs = fin->fin_ifs; 1264 (void) fr_makefrip(hlen, ip, &fnew); 1265 1266 i = fr_fastroute(m, mpp, &fnew, NULL); 1267 return i; 1268 } 1269 1270 1271 int fr_send_icmp_err(type, fin, dst) 1272 int type; 1273 fr_info_t *fin; 1274 int dst; 1275 { 1276 struct in_addr dst4; 1277 struct icmp *icmp; 1278 qpktinfo_t *qpi; 1279 int hlen, code; 1280 phy_if_t phy; 1281 u_short sz; 1282 #ifdef USE_INET6 1283 mblk_t *mb; 1284 #endif 1285 mblk_t *m; 1286 #ifdef USE_INET6 1287 ip6_t *ip6; 1288 #endif 1289 ip_t *ip; 1290 ipf_stack_t *ifs = fin->fin_ifs; 1291 1292 if ((type < 0) || (type > ICMP_MAXTYPE)) 1293 return -1; 1294 1295 code = fin->fin_icode; 1296 #ifdef USE_INET6 1297 if ((code < 0) || (code >= ICMP_MAX_UNREACH)) 1298 return -1; 1299 #endif 1300 1301 #ifndef IPFILTER_CKSUM 1302 if (fr_checkl4sum(fin) == -1) 1303 return -1; 1304 #endif 1305 1306 qpi = fin->fin_qpi; 1307 1308 #ifdef USE_INET6 1309 mb = fin->fin_qfm; 1310 1311 if (fin->fin_v == 6) { 1312 sz = sizeof(ip6_t); 1313 sz += MIN(mb->b_wptr - mb->b_rptr, 512); 1314 hlen = sizeof(ip6_t); 1315 type = icmptoicmp6types[type]; 1316 if (type == ICMP6_DST_UNREACH) 1317 code = icmptoicmp6unreach[code]; 1318 } else 1319 #endif 1320 { 1321 if ((fin->fin_p == IPPROTO_ICMP) && 1322 !(fin->fin_flx & FI_SHORT)) 1323 switch (ntohs(fin->fin_data[0]) >> 8) 1324 { 1325 case ICMP_ECHO : 1326 case ICMP_TSTAMP : 1327 case ICMP_IREQ : 1328 case ICMP_MASKREQ : 1329 break; 1330 default : 1331 return 0; 1332 } 1333 1334 sz = sizeof(ip_t) * 2; 1335 sz += 8; /* 64 bits of data */ 1336 hlen = sizeof(ip_t); 1337 } 1338 1339 sz += offsetof(struct icmp, icmp_ip); 1340 if ((m = (mblk_t *)allocb((size_t)sz + 64, BPRI_HI)) == NULL) 1341 return -1; 1342 MTYPE(m) = M_DATA; 1343 m->b_rptr += 64; 1344 m->b_wptr = m->b_rptr + sz; 1345 bzero((char *)m->b_rptr, (size_t)sz); 1346 ip = (ip_t *)m->b_rptr; 1347 ip->ip_v = fin->fin_v; 1348 icmp = (struct icmp *)(m->b_rptr + hlen); 1349 icmp->icmp_type = type & 0xff; 1350 icmp->icmp_code = code & 0xff; 1351 phy = (phy_if_t)qpi->qpi_ill; 1352 if (type == ICMP_UNREACH && (phy != 0) && 1353 fin->fin_icode == ICMP_UNREACH_NEEDFRAG) 1354 icmp->icmp_nextmtu = net_getmtu(ifs->ifs_ipf_ipv4, phy,0 ); 1355 1356 #ifdef USE_INET6 1357 if (fin->fin_v == 6) { 1358 struct in6_addr dst6; 1359 int csz; 1360 1361 if (dst == 0) { 1362 ipf_stack_t *ifs = fin->fin_ifs; 1363 1364 if (fr_ifpaddr(6, FRI_NORMAL, (void *)phy, 1365 (void *)&dst6, NULL, ifs) == -1) { 1366 FREE_MB_T(m); 1367 return -1; 1368 } 1369 } else 1370 dst6 = fin->fin_dst6.in6; 1371 1372 csz = sz; 1373 sz -= sizeof(ip6_t); 1374 ip6 = (ip6_t *)m->b_rptr; 1375 ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow; 1376 ip6->ip6_plen = htons((u_short)sz); 1377 ip6->ip6_nxt = IPPROTO_ICMPV6; 1378 ip6->ip6_src = dst6; 1379 ip6->ip6_dst = fin->fin_src6.in6; 1380 sz -= offsetof(struct icmp, icmp_ip); 1381 bcopy((char *)mb->b_rptr, (char *)&icmp->icmp_ip, sz); 1382 icmp->icmp_cksum = csz - sizeof(ip6_t); 1383 } else 1384 #endif 1385 { 1386 ip->ip_hl = sizeof(*ip) >> 2; 1387 ip->ip_p = IPPROTO_ICMP; 1388 ip->ip_id = fin->fin_ip->ip_id; 1389 ip->ip_tos = fin->fin_ip->ip_tos; 1390 ip->ip_len = (u_short)sz; 1391 if (dst == 0) { 1392 ipf_stack_t *ifs = fin->fin_ifs; 1393 1394 if (fr_ifpaddr(4, FRI_NORMAL, (void *)phy, 1395 (void *)&dst4, NULL, ifs) == -1) { 1396 FREE_MB_T(m); 1397 return -1; 1398 } 1399 } else { 1400 dst4 = fin->fin_dst; 1401 } 1402 ip->ip_src = dst4; 1403 ip->ip_dst = fin->fin_src; 1404 bcopy((char *)fin->fin_ip, (char *)&icmp->icmp_ip, 1405 sizeof(*fin->fin_ip)); 1406 bcopy((char *)fin->fin_ip + fin->fin_hlen, 1407 (char *)&icmp->icmp_ip + sizeof(*fin->fin_ip), 8); 1408 icmp->icmp_ip.ip_len = htons(icmp->icmp_ip.ip_len); 1409 icmp->icmp_ip.ip_off = htons(icmp->icmp_ip.ip_off); 1410 icmp->icmp_cksum = ipf_cksum((u_short *)icmp, 1411 sz - sizeof(ip_t)); 1412 } 1413 1414 /* 1415 * Need to exit out of these so we don't recursively call rw_enter 1416 * from fr_qout. 1417 */ 1418 return fr_send_ip(fin, m, &m); 1419 } 1420 1421 #include <sys/time.h> 1422 #include <sys/varargs.h> 1423 1424 #ifndef _KERNEL 1425 #include <stdio.h> 1426 #endif 1427 1428 /* 1429 * Return the first IP Address associated with an interface 1430 * For IPv6, we walk through the list of logical interfaces and return 1431 * the address of the first one that isn't a link-local interface. 1432 * We can't assume that it is :1 because another link-local address 1433 * may have been assigned there. 1434 */ 1435 /*ARGSUSED*/ 1436 int fr_ifpaddr(v, atype, ifptr, inp, inpmask, ifs) 1437 int v, atype; 1438 void *ifptr; 1439 struct in_addr *inp, *inpmask; 1440 ipf_stack_t *ifs; 1441 { 1442 struct sockaddr_in6 v6addr[2]; 1443 struct sockaddr_in v4addr[2]; 1444 net_ifaddr_t type[2]; 1445 net_handle_t net_data; 1446 phy_if_t phyif; 1447 void *array; 1448 1449 switch (v) 1450 { 1451 case 4: 1452 net_data = ifs->ifs_ipf_ipv4; 1453 array = v4addr; 1454 break; 1455 case 6: 1456 net_data = ifs->ifs_ipf_ipv6; 1457 array = v6addr; 1458 break; 1459 default: 1460 net_data = NULL; 1461 break; 1462 } 1463 1464 if (net_data == NULL) 1465 return -1; 1466 1467 phyif = (phy_if_t)ifptr; 1468 1469 switch (atype) 1470 { 1471 case FRI_PEERADDR : 1472 type[0] = NA_PEER; 1473 break; 1474 1475 case FRI_BROADCAST : 1476 type[0] = NA_BROADCAST; 1477 break; 1478 1479 default : 1480 type[0] = NA_ADDRESS; 1481 break; 1482 } 1483 1484 type[1] = NA_NETMASK; 1485 1486 if (v == 6) { 1487 lif_if_t idx = 0; 1488 1489 do { 1490 idx = net_lifgetnext(net_data, phyif, idx); 1491 if (net_getlifaddr(net_data, phyif, idx, 2, type, 1492 array) < 0) 1493 return -1; 1494 if (!IN6_IS_ADDR_LINKLOCAL(&v6addr[0].sin6_addr) && 1495 !IN6_IS_ADDR_MULTICAST(&v6addr[0].sin6_addr)) 1496 break; 1497 } while (idx != 0); 1498 1499 if (idx == 0) 1500 return -1; 1501 1502 return fr_ifpfillv6addr(atype, &v6addr[0], &v6addr[1], 1503 inp, inpmask); 1504 } 1505 1506 if (net_getlifaddr(net_data, phyif, 0, 2, type, array) < 0) 1507 return -1; 1508 1509 return fr_ifpfillv4addr(atype, &v4addr[0], &v4addr[1], inp, inpmask); 1510 } 1511 1512 1513 u_32_t fr_newisn(fin) 1514 fr_info_t *fin; 1515 { 1516 static int iss_seq_off = 0; 1517 u_char hash[16]; 1518 u_32_t newiss; 1519 MD5_CTX ctx; 1520 ipf_stack_t *ifs = fin->fin_ifs; 1521 1522 /* 1523 * Compute the base value of the ISS. It is a hash 1524 * of (saddr, sport, daddr, dport, secret). 1525 */ 1526 MD5Init(&ctx); 1527 1528 MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_src, 1529 sizeof(fin->fin_fi.fi_src)); 1530 MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_dst, 1531 sizeof(fin->fin_fi.fi_dst)); 1532 MD5Update(&ctx, (u_char *) &fin->fin_dat, sizeof(fin->fin_dat)); 1533 1534 MD5Update(&ctx, ifs->ifs_ipf_iss_secret, sizeof(ifs->ifs_ipf_iss_secret)); 1535 1536 MD5Final(hash, &ctx); 1537 1538 bcopy(hash, &newiss, sizeof(newiss)); 1539 1540 /* 1541 * Now increment our "timer", and add it in to 1542 * the computed value. 1543 * 1544 * XXX Use `addin'? 1545 * XXX TCP_ISSINCR too large to use? 1546 */ 1547 iss_seq_off += 0x00010000; 1548 newiss += iss_seq_off; 1549 return newiss; 1550 } 1551 1552 1553 /* ------------------------------------------------------------------------ */ 1554 /* Function: fr_nextipid */ 1555 /* Returns: int - 0 == success, -1 == error (packet should be droppped) */ 1556 /* Parameters: fin(I) - pointer to packet information */ 1557 /* */ 1558 /* Returns the next IPv4 ID to use for this packet. */ 1559 /* ------------------------------------------------------------------------ */ 1560 u_short fr_nextipid(fin) 1561 fr_info_t *fin; 1562 { 1563 static u_short ipid = 0; 1564 u_short id; 1565 ipf_stack_t *ifs = fin->fin_ifs; 1566 1567 MUTEX_ENTER(&ifs->ifs_ipf_rw); 1568 if (fin->fin_pktnum != 0) { 1569 id = fin->fin_pktnum & 0xffff; 1570 } else { 1571 id = ipid++; 1572 } 1573 MUTEX_EXIT(&ifs->ifs_ipf_rw); 1574 1575 return id; 1576 } 1577 1578 1579 #ifndef IPFILTER_CKSUM 1580 /* ARGSUSED */ 1581 #endif 1582 INLINE void fr_checkv4sum(fin) 1583 fr_info_t *fin; 1584 { 1585 #ifdef IPFILTER_CKSUM 1586 if (fr_checkl4sum(fin) == -1) 1587 fin->fin_flx |= FI_BAD; 1588 #endif 1589 } 1590 1591 1592 #ifdef USE_INET6 1593 # ifndef IPFILTER_CKSUM 1594 /* ARGSUSED */ 1595 # endif 1596 INLINE void fr_checkv6sum(fin) 1597 fr_info_t *fin; 1598 { 1599 # ifdef IPFILTER_CKSUM 1600 if (fr_checkl4sum(fin) == -1) 1601 fin->fin_flx |= FI_BAD; 1602 # endif 1603 } 1604 #endif /* USE_INET6 */ 1605 1606 1607 #if (SOLARIS2 < 7) 1608 void fr_slowtimer() 1609 #else 1610 /*ARGSUSED*/ 1611 void fr_slowtimer __P((void *arg)) 1612 #endif 1613 { 1614 ipf_stack_t *ifs = arg; 1615 1616 READ_ENTER(&ifs->ifs_ipf_global); 1617 if (ifs->ifs_fr_running != 1) { 1618 ifs->ifs_fr_timer_id = NULL; 1619 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1620 return; 1621 } 1622 ipf_expiretokens(ifs); 1623 fr_fragexpire(ifs); 1624 fr_timeoutstate(ifs); 1625 fr_natexpire(ifs); 1626 fr_authexpire(ifs); 1627 ifs->ifs_fr_ticks++; 1628 if (ifs->ifs_fr_running == 1) 1629 ifs->ifs_fr_timer_id = timeout(fr_slowtimer, arg, 1630 drv_usectohz(500000)); 1631 else 1632 ifs->ifs_fr_timer_id = NULL; 1633 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1634 } 1635 1636 1637 /* ------------------------------------------------------------------------ */ 1638 /* Function: fr_pullup */ 1639 /* Returns: NULL == pullup failed, else pointer to protocol header */ 1640 /* Parameters: m(I) - pointer to buffer where data packet starts */ 1641 /* fin(I) - pointer to packet information */ 1642 /* len(I) - number of bytes to pullup */ 1643 /* */ 1644 /* Attempt to move at least len bytes (from the start of the buffer) into a */ 1645 /* single buffer for ease of access. Operating system native functions are */ 1646 /* used to manage buffers - if necessary. If the entire packet ends up in */ 1647 /* a single buffer, set the FI_COALESCE flag even though fr_coalesce() has */ 1648 /* not been called. Both fin_ip and fin_dp are updated before exiting _IF_ */ 1649 /* and ONLY if the pullup succeeds. */ 1650 /* */ 1651 /* We assume that 'min' is a pointer to a buffer that is part of the chain */ 1652 /* of buffers that starts at *fin->fin_mp. */ 1653 /* ------------------------------------------------------------------------ */ 1654 void *fr_pullup(min, fin, len) 1655 mb_t *min; 1656 fr_info_t *fin; 1657 int len; 1658 { 1659 qpktinfo_t *qpi = fin->fin_qpi; 1660 int out = fin->fin_out, dpoff, ipoff; 1661 mb_t *m = min, *m1, *m2; 1662 char *ip; 1663 uint32_t start, stuff, end, value, flags; 1664 ipf_stack_t *ifs = fin->fin_ifs; 1665 1666 if (m == NULL) 1667 return NULL; 1668 1669 ip = (char *)fin->fin_ip; 1670 if ((fin->fin_flx & FI_COALESCE) != 0) 1671 return ip; 1672 1673 ipoff = fin->fin_ipoff; 1674 if (fin->fin_dp != NULL) 1675 dpoff = (char *)fin->fin_dp - (char *)ip; 1676 else 1677 dpoff = 0; 1678 1679 if (M_LEN(m) < len + ipoff) { 1680 1681 /* 1682 * pfil_precheck ensures the IP header is on a 32bit 1683 * aligned address so simply fail if that isn't currently 1684 * the case (should never happen). 1685 */ 1686 int inc = 0; 1687 1688 if (ipoff > 0) { 1689 if ((ipoff & 3) != 0) { 1690 inc = 4 - (ipoff & 3); 1691 if (m->b_rptr - inc >= m->b_datap->db_base) 1692 m->b_rptr -= inc; 1693 else 1694 inc = 0; 1695 } 1696 } 1697 1698 /* 1699 * XXX This is here as a work around for a bug with DEBUG 1700 * XXX Solaris kernels. The problem is b_prev is used by IP 1701 * XXX code as a way to stash the phyint_index for a packet, 1702 * XXX this doesn't get reset by IP but freeb does an ASSERT() 1703 * XXX for both of these to be NULL. See 6442390. 1704 */ 1705 m1 = m; 1706 m2 = m->b_prev; 1707 1708 do { 1709 m1->b_next = NULL; 1710 m1->b_prev = NULL; 1711 m1 = m1->b_cont; 1712 } while (m1); 1713 1714 /* 1715 * Need to preserve checksum information by copying them 1716 * to newmp which heads the pulluped message. 1717 */ 1718 hcksum_retrieve(m, NULL, NULL, &start, &stuff, &end, 1719 &value, &flags); 1720 1721 if (pullupmsg(m, len + ipoff + inc) == 0) { 1722 ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[1]); 1723 FREE_MB_T(*fin->fin_mp); 1724 *fin->fin_mp = NULL; 1725 fin->fin_m = NULL; 1726 fin->fin_ip = NULL; 1727 fin->fin_dp = NULL; 1728 qpi->qpi_data = NULL; 1729 return NULL; 1730 } 1731 1732 (void) hcksum_assoc(m, NULL, NULL, start, stuff, end, 1733 value, flags, 0); 1734 1735 m->b_prev = m2; 1736 m->b_rptr += inc; 1737 fin->fin_m = m; 1738 ip = MTOD(m, char *) + ipoff; 1739 qpi->qpi_data = ip; 1740 } 1741 1742 ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[0]); 1743 fin->fin_ip = (ip_t *)ip; 1744 if (fin->fin_dp != NULL) 1745 fin->fin_dp = (char *)fin->fin_ip + dpoff; 1746 1747 if (len == fin->fin_plen) 1748 fin->fin_flx |= FI_COALESCE; 1749 return ip; 1750 } 1751 1752 1753 /* 1754 * Function: fr_verifysrc 1755 * Returns: int (really boolean) 1756 * Parameters: fin - packet information 1757 * 1758 * Check whether the packet has a valid source address for the interface on 1759 * which the packet arrived, implementing the "fr_chksrc" feature. 1760 * Returns true iff the packet's source address is valid. 1761 */ 1762 int fr_verifysrc(fin) 1763 fr_info_t *fin; 1764 { 1765 net_handle_t net_data_p; 1766 phy_if_t phy_ifdata_routeto; 1767 struct sockaddr sin; 1768 ipf_stack_t *ifs = fin->fin_ifs; 1769 1770 if (fin->fin_v == 4) { 1771 net_data_p = ifs->ifs_ipf_ipv4; 1772 } else if (fin->fin_v == 6) { 1773 net_data_p = ifs->ifs_ipf_ipv6; 1774 } else { 1775 return (0); 1776 } 1777 1778 /* Get the index corresponding to the if name */ 1779 sin.sa_family = (fin->fin_v == 4) ? AF_INET : AF_INET6; 1780 bcopy(&fin->fin_saddr, &sin.sa_data, sizeof (struct in_addr)); 1781 phy_ifdata_routeto = net_routeto(net_data_p, &sin, NULL); 1782 1783 return (((phy_if_t)fin->fin_ifp == phy_ifdata_routeto) ? 1 : 0); 1784 } 1785 1786 /* 1787 * Return true only if forwarding is enabled on the interface. 1788 */ 1789 static int 1790 fr_forwarding_enabled(phy_if_t phyif, net_handle_t ndp) 1791 { 1792 lif_if_t lif; 1793 1794 for (lif = net_lifgetnext(ndp, phyif, 0); lif > 0; 1795 lif = net_lifgetnext(ndp, phyif, lif)) { 1796 int res; 1797 uint64_t flags; 1798 1799 res = net_getlifflags(ndp, phyif, lif, &flags); 1800 if (res != 0) 1801 return (0); 1802 if (flags & IFF_ROUTER) 1803 return (1); 1804 } 1805 1806 return (0); 1807 } 1808 1809 /* 1810 * Function: fr_fastroute 1811 * Returns: 0: success; 1812 * -1: failed 1813 * Parameters: 1814 * mb: the message block where ip head starts 1815 * mpp: the pointer to the pointer of the orignal 1816 * packet message 1817 * fin: packet information 1818 * fdp: destination interface information 1819 * if it is NULL, no interface information provided. 1820 * 1821 * This function is for fastroute/to/dup-to rules. It calls 1822 * pfil_make_lay2_packet to search route, make lay-2 header 1823 * ,and identify output queue for the IP packet. 1824 * The destination address depends on the following conditions: 1825 * 1: for fastroute rule, fdp is passed in as NULL, so the 1826 * destination address is the IP Packet's destination address 1827 * 2: for to/dup-to rule, if an ip address is specified after 1828 * the interface name, this address is the as destination 1829 * address. Otherwise IP Packet's destination address is used 1830 */ 1831 int fr_fastroute(mb, mpp, fin, fdp) 1832 mblk_t *mb, **mpp; 1833 fr_info_t *fin; 1834 frdest_t *fdp; 1835 { 1836 net_handle_t net_data_p; 1837 net_inject_t *inj; 1838 mblk_t *mp = NULL; 1839 frentry_t *fr = fin->fin_fr; 1840 qpktinfo_t *qpi; 1841 ip_t *ip; 1842 1843 struct sockaddr_in *sin; 1844 struct sockaddr_in6 *sin6; 1845 struct sockaddr *sinp; 1846 ipf_stack_t *ifs = fin->fin_ifs; 1847 #ifndef sparc 1848 u_short __iplen, __ipoff; 1849 #endif 1850 1851 if (fin->fin_v == 4) { 1852 net_data_p = ifs->ifs_ipf_ipv4; 1853 } else if (fin->fin_v == 6) { 1854 net_data_p = ifs->ifs_ipf_ipv6; 1855 } else { 1856 return (-1); 1857 } 1858 1859 /* Check the src here, fin_ifp is the src interface. */ 1860 if (!fr_forwarding_enabled((phy_if_t)fin->fin_ifp, net_data_p)) 1861 return (-1); 1862 1863 inj = net_inject_alloc(NETINFO_VERSION); 1864 if (inj == NULL) 1865 return -1; 1866 1867 ip = fin->fin_ip; 1868 qpi = fin->fin_qpi; 1869 1870 /* 1871 * If this is a duplicate mblk then we want ip to point at that 1872 * data, not the original, if and only if it is already pointing at 1873 * the current mblk data. 1874 * 1875 * Otherwise, if it's not a duplicate, and we're not already pointing 1876 * at the current mblk data, then we want to ensure that the data 1877 * points at ip. 1878 */ 1879 1880 if ((ip == (ip_t *)qpi->qpi_m->b_rptr) && (qpi->qpi_m != mb)) { 1881 ip = (ip_t *)mb->b_rptr; 1882 } else if ((qpi->qpi_m == mb) && (ip != (ip_t *)qpi->qpi_m->b_rptr)) { 1883 qpi->qpi_m->b_rptr = (uchar_t *)ip; 1884 qpi->qpi_off = 0; 1885 } 1886 1887 /* 1888 * If there is another M_PROTO, we don't want it 1889 */ 1890 if (*mpp != mb) { 1891 mp = unlinkb(*mpp); 1892 freeb(*mpp); 1893 *mpp = mp; 1894 } 1895 1896 sinp = (struct sockaddr *)&inj->ni_addr; 1897 sin = (struct sockaddr_in *)sinp; 1898 sin6 = (struct sockaddr_in6 *)sinp; 1899 bzero((char *)&inj->ni_addr, sizeof (inj->ni_addr)); 1900 inj->ni_addr.ss_family = (fin->fin_v == 4) ? AF_INET : AF_INET6; 1901 inj->ni_packet = mb; 1902 1903 /* 1904 * In case we're here due to "to <if>" being used with 1905 * "keep state", check that we're going in the correct 1906 * direction. 1907 */ 1908 if (fdp != NULL) { 1909 if ((fr != NULL) && (fdp->fd_ifp != NULL) && 1910 (fin->fin_rev != 0) && (fdp == &fr->fr_tif)) 1911 goto bad_fastroute; 1912 inj->ni_physical = (phy_if_t)fdp->fd_ifp; 1913 if (fin->fin_v == 4) { 1914 sin->sin_addr = fdp->fd_ip; 1915 } else { 1916 sin6->sin6_addr = fdp->fd_ip6.in6; 1917 } 1918 } else { 1919 if (fin->fin_v == 4) { 1920 sin->sin_addr = ip->ip_dst; 1921 } else { 1922 sin6->sin6_addr = ((ip6_t *)ip)->ip6_dst; 1923 } 1924 inj->ni_physical = net_routeto(net_data_p, sinp, NULL); 1925 } 1926 1927 /* we're checking the destinatation here */ 1928 if (!fr_forwarding_enabled(inj->ni_physical, net_data_p)) 1929 goto bad_fastroute; 1930 1931 /* 1932 * Clear the hardware checksum flags from packets that we are doing 1933 * input processing on as leaving them set will cause the outgoing 1934 * NIC (if it supports hardware checksum) to calculate them anew, 1935 * using the old (correct) checksums as the pseudo value to start 1936 * from. 1937 */ 1938 if (fin->fin_out == 0) { 1939 DB_CKSUMFLAGS(mb) = 0; 1940 } 1941 1942 *mpp = mb; 1943 1944 if (fin->fin_out == 0) { 1945 void *saveifp; 1946 u_32_t pass; 1947 1948 saveifp = fin->fin_ifp; 1949 fin->fin_ifp = (void *)inj->ni_physical; 1950 fin->fin_flx &= ~FI_STATE; 1951 fin->fin_out = 1; 1952 (void) fr_acctpkt(fin, &pass); 1953 fin->fin_fr = NULL; 1954 if (!fr || !(fr->fr_flags & FR_RETMASK)) 1955 (void) fr_checkstate(fin, &pass); 1956 if (fr_checknatout(fin, NULL) == -1) 1957 goto bad_fastroute; 1958 fin->fin_out = 0; 1959 fin->fin_ifp = saveifp; 1960 } 1961 #ifndef sparc 1962 if (fin->fin_v == 4) { 1963 __iplen = (u_short)ip->ip_len, 1964 __ipoff = (u_short)ip->ip_off; 1965 1966 ip->ip_len = htons(__iplen); 1967 ip->ip_off = htons(__ipoff); 1968 } 1969 #endif 1970 1971 if (net_data_p) { 1972 if (net_inject(net_data_p, NI_DIRECT_OUT, inj) < 0) { 1973 net_inject_free(inj); 1974 return (-1); 1975 } 1976 } 1977 1978 ifs->ifs_fr_frouteok[0]++; 1979 net_inject_free(inj); 1980 return 0; 1981 bad_fastroute: 1982 net_inject_free(inj); 1983 freemsg(mb); 1984 ifs->ifs_fr_frouteok[1]++; 1985 return -1; 1986 } 1987 1988 1989 /* ------------------------------------------------------------------------ */ 1990 /* Function: ipf_hook4_out */ 1991 /* Returns: int - 0 == packet ok, else problem, free packet if not done */ 1992 /* Parameters: event(I) - pointer to event */ 1993 /* info(I) - pointer to hook information for firewalling */ 1994 /* */ 1995 /* Calling ipf_hook. */ 1996 /* ------------------------------------------------------------------------ */ 1997 /*ARGSUSED*/ 1998 int ipf_hook4_out(hook_event_token_t token, hook_data_t info, void *arg) 1999 { 2000 return ipf_hook(info, 1, 0, arg); 2001 } 2002 /*ARGSUSED*/ 2003 int ipf_hook6_out(hook_event_token_t token, hook_data_t info, void *arg) 2004 { 2005 return ipf_hook6(info, 1, 0, arg); 2006 } 2007 2008 /* ------------------------------------------------------------------------ */ 2009 /* Function: ipf_hook4_in */ 2010 /* Returns: int - 0 == packet ok, else problem, free packet if not done */ 2011 /* Parameters: event(I) - pointer to event */ 2012 /* info(I) - pointer to hook information for firewalling */ 2013 /* */ 2014 /* Calling ipf_hook. */ 2015 /* ------------------------------------------------------------------------ */ 2016 /*ARGSUSED*/ 2017 int ipf_hook4_in(hook_event_token_t token, hook_data_t info, void *arg) 2018 { 2019 return ipf_hook(info, 0, 0, arg); 2020 } 2021 /*ARGSUSED*/ 2022 int ipf_hook6_in(hook_event_token_t token, hook_data_t info, void *arg) 2023 { 2024 return ipf_hook6(info, 0, 0, arg); 2025 } 2026 2027 2028 /* ------------------------------------------------------------------------ */ 2029 /* Function: ipf_hook4_loop_out */ 2030 /* Returns: int - 0 == packet ok, else problem, free packet if not done */ 2031 /* Parameters: event(I) - pointer to event */ 2032 /* info(I) - pointer to hook information for firewalling */ 2033 /* */ 2034 /* Calling ipf_hook. */ 2035 /* ------------------------------------------------------------------------ */ 2036 /*ARGSUSED*/ 2037 int ipf_hook4_loop_out(hook_event_token_t token, hook_data_t info, void *arg) 2038 { 2039 return ipf_hook(info, 1, FI_NOCKSUM, arg); 2040 } 2041 /*ARGSUSED*/ 2042 int ipf_hook6_loop_out(hook_event_token_t token, hook_data_t info, void *arg) 2043 { 2044 return ipf_hook6(info, 1, FI_NOCKSUM, arg); 2045 } 2046 2047 /* ------------------------------------------------------------------------ */ 2048 /* Function: ipf_hook4_loop_in */ 2049 /* Returns: int - 0 == packet ok, else problem, free packet if not done */ 2050 /* Parameters: event(I) - pointer to event */ 2051 /* info(I) - pointer to hook information for firewalling */ 2052 /* */ 2053 /* Calling ipf_hook. */ 2054 /* ------------------------------------------------------------------------ */ 2055 /*ARGSUSED*/ 2056 int ipf_hook4_loop_in(hook_event_token_t token, hook_data_t info, void *arg) 2057 { 2058 return ipf_hook(info, 0, FI_NOCKSUM, arg); 2059 } 2060 /*ARGSUSED*/ 2061 int ipf_hook6_loop_in(hook_event_token_t token, hook_data_t info, void *arg) 2062 { 2063 return ipf_hook6(info, 0, FI_NOCKSUM, arg); 2064 } 2065 2066 /* ------------------------------------------------------------------------ */ 2067 /* Function: ipf_hook */ 2068 /* Returns: int - 0 == packet ok, else problem, free packet if not done */ 2069 /* Parameters: info(I) - pointer to hook information for firewalling */ 2070 /* out(I) - whether packet is going in or out */ 2071 /* loopback(I) - whether packet is a loopback packet or not */ 2072 /* */ 2073 /* Stepping stone function between the IP mainline and IPFilter. Extracts */ 2074 /* parameters out of the info structure and forms them up to be useful for */ 2075 /* calling ipfilter. */ 2076 /* ------------------------------------------------------------------------ */ 2077 int ipf_hook(hook_data_t info, int out, int loopback, void *arg) 2078 { 2079 hook_pkt_event_t *fw; 2080 ipf_stack_t *ifs; 2081 qpktinfo_t qpi; 2082 int rval, hlen; 2083 u_short swap; 2084 phy_if_t phy; 2085 ip_t *ip; 2086 2087 ifs = arg; 2088 fw = (hook_pkt_event_t *)info; 2089 2090 ASSERT(fw != NULL); 2091 phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp; 2092 2093 ip = fw->hpe_hdr; 2094 swap = ntohs(ip->ip_len); 2095 ip->ip_len = swap; 2096 swap = ntohs(ip->ip_off); 2097 ip->ip_off = swap; 2098 hlen = IPH_HDR_LENGTH(ip); 2099 2100 qpi.qpi_m = fw->hpe_mb; 2101 qpi.qpi_data = fw->hpe_hdr; 2102 qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr; 2103 qpi.qpi_ill = (void *)phy; 2104 qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST); 2105 if (qpi.qpi_flags) 2106 qpi.qpi_flags |= FI_MBCAST; 2107 qpi.qpi_flags |= loopback; 2108 2109 rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out, 2110 &qpi, fw->hpe_mp, ifs); 2111 2112 /* For fastroute cases, fr_check returns 0 with mp set to NULL */ 2113 if (rval == 0 && *(fw->hpe_mp) == NULL) 2114 rval = 1; 2115 2116 /* Notify IP the packet mblk_t and IP header pointers. */ 2117 fw->hpe_mb = qpi.qpi_m; 2118 fw->hpe_hdr = qpi.qpi_data; 2119 if (rval == 0) { 2120 ip = qpi.qpi_data; 2121 swap = ntohs(ip->ip_len); 2122 ip->ip_len = swap; 2123 swap = ntohs(ip->ip_off); 2124 ip->ip_off = swap; 2125 } 2126 return rval; 2127 2128 } 2129 int ipf_hook6(hook_data_t info, int out, int loopback, void *arg) 2130 { 2131 hook_pkt_event_t *fw; 2132 int rval, hlen; 2133 qpktinfo_t qpi; 2134 phy_if_t phy; 2135 2136 fw = (hook_pkt_event_t *)info; 2137 2138 ASSERT(fw != NULL); 2139 phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp; 2140 2141 hlen = sizeof (ip6_t); 2142 2143 qpi.qpi_m = fw->hpe_mb; 2144 qpi.qpi_data = fw->hpe_hdr; 2145 qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr; 2146 qpi.qpi_ill = (void *)phy; 2147 qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST); 2148 if (qpi.qpi_flags) 2149 qpi.qpi_flags |= FI_MBCAST; 2150 qpi.qpi_flags |= loopback; 2151 2152 rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out, 2153 &qpi, fw->hpe_mp, arg); 2154 2155 /* For fastroute cases, fr_check returns 0 with mp set to NULL */ 2156 if (rval == 0 && *(fw->hpe_mp) == NULL) 2157 rval = 1; 2158 2159 /* Notify IP the packet mblk_t and IP header pointers. */ 2160 fw->hpe_mb = qpi.qpi_m; 2161 fw->hpe_hdr = qpi.qpi_data; 2162 return rval; 2163 } 2164 2165 2166 /* ------------------------------------------------------------------------ */ 2167 /* Function: ipf_nic_event_v4 */ 2168 /* Returns: int - 0 == no problems encountered */ 2169 /* Parameters: event(I) - pointer to event */ 2170 /* info(I) - pointer to information about a NIC event */ 2171 /* */ 2172 /* Function to receive asynchronous NIC events from IP */ 2173 /* ------------------------------------------------------------------------ */ 2174 /*ARGSUSED*/ 2175 int ipf_nic_event_v4(hook_event_token_t event, hook_data_t info, void *arg) 2176 { 2177 struct sockaddr_in *sin; 2178 hook_nic_event_t *hn; 2179 ipf_stack_t *ifs = arg; 2180 void *new_ifp = NULL; 2181 2182 if (ifs->ifs_fr_running <= 0) 2183 return (0); 2184 2185 hn = (hook_nic_event_t *)info; 2186 2187 switch (hn->hne_event) 2188 { 2189 case NE_PLUMB : 2190 frsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, hn->hne_data, 2191 ifs); 2192 fr_natifpsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, 2193 hn->hne_data, ifs); 2194 fr_statesync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, 2195 hn->hne_data, ifs); 2196 break; 2197 2198 case NE_UNPLUMB : 2199 frsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs); 2200 fr_natifpsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, 2201 ifs); 2202 fr_statesync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs); 2203 break; 2204 2205 case NE_ADDRESS_CHANGE : 2206 /* 2207 * We only respond to events for logical interface 0 because 2208 * IPFilter only uses the first address given to a network 2209 * interface. We check for hne_lif==1 because the netinfo 2210 * code maps adds 1 to the lif number so that it can return 2211 * 0 to indicate "no more lifs" when walking them. 2212 */ 2213 if (hn->hne_lif == 1) { 2214 frsync(IPFSYNC_RESYNC, 4, (void *)hn->hne_nic, NULL, 2215 ifs); 2216 sin = hn->hne_data; 2217 fr_nataddrsync(4, (void *)hn->hne_nic, &sin->sin_addr, 2218 ifs); 2219 } 2220 break; 2221 2222 #if SOLARIS2 >= 10 2223 case NE_IFINDEX_CHANGE : 2224 WRITE_ENTER(&ifs->ifs_ipf_mutex); 2225 2226 if (hn->hne_data != NULL) { 2227 /* 2228 * The netinfo passes interface index as int (hne_data should be 2229 * handled as a pointer to int), which is always 32bit. We need to 2230 * convert it to void pointer here, since interfaces are 2231 * represented as pointers to void in IPF. The pointers are 64 bits 2232 * long on 64bit platforms. Doing something like 2233 * (void *)((int) x) 2234 * will throw warning: 2235 * "cast to pointer from integer of different size" 2236 * during 64bit compilation. 2237 * 2238 * The line below uses (size_t) to typecast int to 2239 * size_t, which might be 64bit/32bit (depending 2240 * on architecture). Once we have proper 64bit/32bit 2241 * type (size_t), we can safely convert it to void pointer. 2242 */ 2243 new_ifp = (void *)(size_t)*((int *)hn->hne_data); 2244 fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs); 2245 fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs); 2246 fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs); 2247 } 2248 RWLOCK_EXIT(&ifs->ifs_ipf_mutex); 2249 break; 2250 #endif 2251 2252 default : 2253 break; 2254 } 2255 2256 return 0; 2257 } 2258 2259 2260 /* ------------------------------------------------------------------------ */ 2261 /* Function: ipf_nic_event_v6 */ 2262 /* Returns: int - 0 == no problems encountered */ 2263 /* Parameters: event(I) - pointer to event */ 2264 /* info(I) - pointer to information about a NIC event */ 2265 /* */ 2266 /* Function to receive asynchronous NIC events from IP */ 2267 /* ------------------------------------------------------------------------ */ 2268 /*ARGSUSED*/ 2269 int ipf_nic_event_v6(hook_event_token_t event, hook_data_t info, void *arg) 2270 { 2271 struct sockaddr_in6 *sin6; 2272 hook_nic_event_t *hn; 2273 ipf_stack_t *ifs = arg; 2274 void *new_ifp = NULL; 2275 2276 if (ifs->ifs_fr_running <= 0) 2277 return (0); 2278 2279 hn = (hook_nic_event_t *)info; 2280 2281 switch (hn->hne_event) 2282 { 2283 case NE_PLUMB : 2284 frsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic, 2285 hn->hne_data, ifs); 2286 fr_natifpsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic, 2287 hn->hne_data, ifs); 2288 fr_statesync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic, 2289 hn->hne_data, ifs); 2290 break; 2291 2292 case NE_UNPLUMB : 2293 frsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs); 2294 fr_natifpsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, 2295 ifs); 2296 fr_statesync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs); 2297 break; 2298 2299 case NE_ADDRESS_CHANGE : 2300 if (hn->hne_lif == 1) { 2301 sin6 = hn->hne_data; 2302 fr_nataddrsync(6, (void *)hn->hne_nic, &sin6->sin6_addr, 2303 ifs); 2304 } 2305 break; 2306 2307 #if SOLARIS2 >= 10 2308 case NE_IFINDEX_CHANGE : 2309 WRITE_ENTER(&ifs->ifs_ipf_mutex); 2310 if (hn->hne_data != NULL) { 2311 /* 2312 * The netinfo passes interface index as int (hne_data should be 2313 * handled as a pointer to int), which is always 32bit. We need to 2314 * convert it to void pointer here, since interfaces are 2315 * represented as pointers to void in IPF. The pointers are 64 bits 2316 * long on 64bit platforms. Doing something like 2317 * (void *)((int) x) 2318 * will throw warning: 2319 * "cast to pointer from integer of different size" 2320 * during 64bit compilation. 2321 * 2322 * The line below uses (size_t) to typecast int to 2323 * size_t, which might be 64bit/32bit (depending 2324 * on architecture). Once we have proper 64bit/32bit 2325 * type (size_t), we can safely convert it to void pointer. 2326 */ 2327 new_ifp = (void *)(size_t)*((int *)hn->hne_data); 2328 fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs); 2329 fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs); 2330 fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs); 2331 } 2332 RWLOCK_EXIT(&ifs->ifs_ipf_mutex); 2333 break; 2334 #endif 2335 2336 default : 2337 break; 2338 } 2339 2340 return 0; 2341 } 2342 2343 /* 2344 * Functions fr_make_rst(), fr_make_icmp_v4(), fr_make_icmp_v6() 2345 * are needed in Solaris kernel only. We don't need them in 2346 * ipftest to pretend the ICMP/RST packet was sent as a response. 2347 */ 2348 #if defined(_KERNEL) && (SOLARIS2 >= 10) 2349 /* ------------------------------------------------------------------------ */ 2350 /* Function: fr_make_rst */ 2351 /* Returns: int - 0 on success, -1 on failure */ 2352 /* Parameters: fin(I) - pointer to packet information */ 2353 /* */ 2354 /* We must alter the original mblks passed to IPF from IP stack via */ 2355 /* FW_HOOKS. FW_HOOKS interface is powerfull, but it has some limitations. */ 2356 /* IPF can basicaly do only these things with mblk representing the packet: */ 2357 /* leave it as it is (pass the packet) */ 2358 /* */ 2359 /* discard it (block the packet) */ 2360 /* */ 2361 /* alter it (i.e. NAT) */ 2362 /* */ 2363 /* As you can see IPF can not simply discard the mblk and supply a new one */ 2364 /* instead to IP stack via FW_HOOKS. */ 2365 /* */ 2366 /* The return-rst action for packets coming via NIC is handled as follows: */ 2367 /* mblk with packet is discarded */ 2368 /* */ 2369 /* new mblk with RST response is constructed and injected to network */ 2370 /* */ 2371 /* IPF can't inject packets to loopback interface, this is just another */ 2372 /* limitation we have to deal with here. The only option to send RST */ 2373 /* response to offending TCP packet coming via loopback is to alter it. */ 2374 /* */ 2375 /* The fr_make_rst() function alters TCP SYN/FIN packet intercepted on */ 2376 /* loopback interface into TCP RST packet. fin->fin_mp is pointer to */ 2377 /* mblk L3 (IP) and L4 (TCP/UDP) packet headers. */ 2378 /* ------------------------------------------------------------------------ */ 2379 int fr_make_rst(fin) 2380 fr_info_t *fin; 2381 { 2382 uint16_t tmp_port; 2383 int rv = -1; 2384 uint32_t old_ack; 2385 tcphdr_t *tcp = NULL; 2386 struct in_addr tmp_src; 2387 #ifdef USE_INET6 2388 struct in6_addr tmp_src6; 2389 #endif 2390 2391 ASSERT(fin->fin_p == IPPROTO_TCP); 2392 2393 /* 2394 * We do not need to adjust chksum, since it is not being checked by 2395 * Solaris IP stack for loopback clients. 2396 */ 2397 if ((fin->fin_v == 4) && (fin->fin_p == IPPROTO_TCP) && 2398 ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) { 2399 2400 if (tcp->th_flags & (TH_SYN | TH_FIN)) { 2401 /* Swap IPv4 addresses. */ 2402 tmp_src = fin->fin_ip->ip_src; 2403 fin->fin_ip->ip_src = fin->fin_ip->ip_dst; 2404 fin->fin_ip->ip_dst = tmp_src; 2405 2406 rv = 0; 2407 } 2408 else 2409 tcp = NULL; 2410 } 2411 #ifdef USE_INET6 2412 else if ((fin->fin_v == 6) && (fin->fin_p == IPPROTO_TCP) && 2413 ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) { 2414 /* 2415 * We are relying on fact the next header is TCP, which is true 2416 * for regular TCP packets coming in over loopback. 2417 */ 2418 if (tcp->th_flags & (TH_SYN | TH_FIN)) { 2419 /* Swap IPv6 addresses. */ 2420 tmp_src6 = fin->fin_ip6->ip6_src; 2421 fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst; 2422 fin->fin_ip6->ip6_dst = tmp_src6; 2423 2424 rv = 0; 2425 } 2426 else 2427 tcp = NULL; 2428 } 2429 #endif 2430 2431 if (tcp != NULL) { 2432 /* 2433 * Adjust TCP header: 2434 * swap ports, 2435 * set flags, 2436 * set correct ACK number 2437 */ 2438 tmp_port = tcp->th_sport; 2439 tcp->th_sport = tcp->th_dport; 2440 tcp->th_dport = tmp_port; 2441 old_ack = tcp->th_ack; 2442 tcp->th_ack = htonl(ntohl(tcp->th_seq) + 1); 2443 tcp->th_seq = old_ack; 2444 tcp->th_flags = TH_RST | TH_ACK; 2445 } 2446 2447 return (rv); 2448 } 2449 2450 /* ------------------------------------------------------------------------ */ 2451 /* Function: fr_make_icmp_v4 */ 2452 /* Returns: int - 0 on success, -1 on failure */ 2453 /* Parameters: fin(I) - pointer to packet information */ 2454 /* */ 2455 /* Please read comment at fr_make_icmp() wrapper function to get an idea */ 2456 /* what is going to happen here and why. Once you read the comment there, */ 2457 /* continue here with next paragraph. */ 2458 /* */ 2459 /* To turn IPv4 packet into ICMPv4 response packet, these things must */ 2460 /* happen here: */ 2461 /* (1) Original mblk is copied (duplicated). */ 2462 /* */ 2463 /* (2) ICMP header is created. */ 2464 /* */ 2465 /* (3) Link ICMP header with copy of original mblk, we have ICMPv4 */ 2466 /* data ready then. */ 2467 /* */ 2468 /* (4) Swap IP addresses in original mblk and adjust IP header data. */ 2469 /* */ 2470 /* (5) The mblk containing original packet is trimmed to contain IP */ 2471 /* header only and ICMP chksum is computed. */ 2472 /* */ 2473 /* (6) The ICMP header we have from (3) is linked to original mblk, */ 2474 /* which now contains new IP header. If original packet was spread */ 2475 /* over several mblks, only the first mblk is kept. */ 2476 /* ------------------------------------------------------------------------ */ 2477 static int fr_make_icmp_v4(fin) 2478 fr_info_t *fin; 2479 { 2480 struct in_addr tmp_src; 2481 tcphdr_t *tcp; 2482 struct icmp *icmp; 2483 mblk_t *mblk_icmp; 2484 mblk_t *mblk_ip; 2485 size_t icmp_pld_len; /* octets to append to ICMP header */ 2486 size_t orig_iphdr_len; /* length of IP header only */ 2487 uint32_t sum; 2488 uint16_t *buf; 2489 int len; 2490 2491 2492 if (fin->fin_v != 4) 2493 return (-1); 2494 2495 /* 2496 * If we are dealing with TCP, then packet must be SYN/FIN to be routed 2497 * by IP stack. If it is not SYN/FIN, then we must drop it silently. 2498 */ 2499 tcp = (tcphdr_t *) fin->fin_dp; 2500 2501 if ((fin->fin_p == IPPROTO_TCP) && 2502 ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0))) 2503 return (-1); 2504 2505 /* 2506 * Step (1) 2507 * 2508 * Make copy of original mblk. 2509 * 2510 * We want to copy as much data as necessary, not less, not more. The 2511 * ICMPv4 payload length for unreachable messages is: 2512 * original IP header + 8 bytes of L4 (if there are any). 2513 * 2514 * We determine if there are at least 8 bytes of L4 data following IP 2515 * header first. 2516 */ 2517 icmp_pld_len = (fin->fin_dlen > ICMPERR_ICMPHLEN) ? 2518 ICMPERR_ICMPHLEN : fin->fin_dlen; 2519 /* 2520 * Since we don't want to copy more data than necessary, we must trim 2521 * the original mblk here. The right way (STREAMish) would be to use 2522 * adjmsg() to trim it. However we would have to calculate the length 2523 * argument for adjmsg() from pointers we already have here. 2524 * 2525 * Since we have pointers and offsets, it's faster and easier for 2526 * us to just adjust pointers by hand instead of using adjmsg(). 2527 */ 2528 fin->fin_m->b_wptr = (unsigned char *) fin->fin_dp; 2529 fin->fin_m->b_wptr += icmp_pld_len; 2530 icmp_pld_len = fin->fin_m->b_wptr - (unsigned char *) fin->fin_ip; 2531 2532 /* 2533 * Also we don't want to copy any L2 stuff, which might precede IP 2534 * header, so we have have to set b_rptr to point to the start of IP 2535 * header. 2536 */ 2537 fin->fin_m->b_rptr += fin->fin_ipoff; 2538 if ((mblk_ip = copyb(fin->fin_m)) == NULL) 2539 return (-1); 2540 fin->fin_m->b_rptr -= fin->fin_ipoff; 2541 2542 /* 2543 * Step (2) 2544 * 2545 * Create an ICMP header, which will be appened to original mblk later. 2546 * ICMP header is just another mblk. 2547 */ 2548 mblk_icmp = (mblk_t *) allocb(ICMPERR_ICMPHLEN, BPRI_HI); 2549 if (mblk_icmp == NULL) { 2550 FREE_MB_T(mblk_ip); 2551 return (-1); 2552 } 2553 2554 MTYPE(mblk_icmp) = M_DATA; 2555 icmp = (struct icmp *) mblk_icmp->b_wptr; 2556 icmp->icmp_type = ICMP_UNREACH; 2557 icmp->icmp_code = fin->fin_icode & 0xFF; 2558 icmp->icmp_void = 0; 2559 icmp->icmp_cksum = 0; 2560 mblk_icmp->b_wptr += ICMPERR_ICMPHLEN; 2561 2562 /* 2563 * Step (3) 2564 * 2565 * Complete ICMP packet - link ICMP header with L4 data from original 2566 * IP packet. 2567 */ 2568 linkb(mblk_icmp, mblk_ip); 2569 2570 /* 2571 * Step (4) 2572 * 2573 * Swap IP addresses and change IP header fields accordingly in 2574 * original IP packet. 2575 * 2576 * There is a rule option return-icmp as a dest for physical 2577 * interfaces. This option becomes useless for loopback, since IPF box 2578 * uses same address as a loopback destination. We ignore the option 2579 * here, the ICMP packet will always look like as it would have been 2580 * sent from the original destination host. 2581 */ 2582 tmp_src = fin->fin_ip->ip_src; 2583 fin->fin_ip->ip_src = fin->fin_ip->ip_dst; 2584 fin->fin_ip->ip_dst = tmp_src; 2585 fin->fin_ip->ip_p = IPPROTO_ICMP; 2586 fin->fin_ip->ip_sum = 0; 2587 2588 /* 2589 * Step (5) 2590 * 2591 * We trim the orignal mblk to hold IP header only. 2592 */ 2593 fin->fin_m->b_wptr = fin->fin_dp; 2594 orig_iphdr_len = fin->fin_m->b_wptr - 2595 (fin->fin_m->b_rptr + fin->fin_ipoff); 2596 fin->fin_ip->ip_len = htons(icmp_pld_len + ICMPERR_ICMPHLEN + 2597 orig_iphdr_len); 2598 2599 /* 2600 * ICMP chksum calculation. The data we are calculating chksum for are 2601 * spread over two mblks, therefore we have to use two for loops. 2602 * 2603 * First for loop computes chksum part for ICMP header. 2604 */ 2605 buf = (uint16_t *) icmp; 2606 len = ICMPERR_ICMPHLEN; 2607 for (sum = 0; len > 1; len -= 2) 2608 sum += *buf++; 2609 2610 /* 2611 * Here we add chksum part for ICMP payload. 2612 */ 2613 len = icmp_pld_len; 2614 buf = (uint16_t *) mblk_ip->b_rptr; 2615 for (; len > 1; len -= 2) 2616 sum += *buf++; 2617 2618 /* 2619 * Chksum is done. 2620 */ 2621 sum = (sum >> 16) + (sum & 0xffff); 2622 sum += (sum >> 16); 2623 icmp->icmp_cksum = ~sum; 2624 2625 /* 2626 * Step (6) 2627 * 2628 * Release all packet mblks, except the first one. 2629 */ 2630 if (fin->fin_m->b_cont != NULL) { 2631 FREE_MB_T(fin->fin_m->b_cont); 2632 } 2633 2634 /* 2635 * Append ICMP payload to first mblk, which already contains new IP 2636 * header. 2637 */ 2638 linkb(fin->fin_m, mblk_icmp); 2639 2640 return (0); 2641 } 2642 2643 #ifdef USE_INET6 2644 /* ------------------------------------------------------------------------ */ 2645 /* Function: fr_make_icmp_v6 */ 2646 /* Returns: int - 0 on success, -1 on failure */ 2647 /* Parameters: fin(I) - pointer to packet information */ 2648 /* */ 2649 /* Please read comment at fr_make_icmp() wrapper function to get an idea */ 2650 /* what and why is going to happen here. Once you read the comment there, */ 2651 /* continue here with next paragraph. */ 2652 /* */ 2653 /* This function turns IPv6 packet (UDP, TCP, ...) into ICMPv6 response. */ 2654 /* The algorithm is fairly simple: */ 2655 /* 1) We need to get copy of complete mblk. */ 2656 /* */ 2657 /* 2) New ICMPv6 header is created. */ 2658 /* */ 2659 /* 3) The copy of original mblk with packet is linked to ICMPv6 */ 2660 /* header. */ 2661 /* */ 2662 /* 4) The checksum must be adjusted. */ 2663 /* */ 2664 /* 5) IP addresses in original mblk are swapped and IP header data */ 2665 /* are adjusted (protocol number). */ 2666 /* */ 2667 /* 6) Original mblk is trimmed to hold IPv6 header only, then it is */ 2668 /* linked with the ICMPv6 data we got from (3). */ 2669 /* ------------------------------------------------------------------------ */ 2670 static int fr_make_icmp_v6(fin) 2671 fr_info_t *fin; 2672 { 2673 struct icmp6_hdr *icmp6; 2674 tcphdr_t *tcp; 2675 struct in6_addr tmp_src6; 2676 size_t icmp_pld_len; 2677 mblk_t *mblk_ip, *mblk_icmp; 2678 2679 if (fin->fin_v != 6) 2680 return (-1); 2681 2682 /* 2683 * If we are dealing with TCP, then packet must SYN/FIN to be routed by 2684 * IP stack. If it is not SYN/FIN, then we must drop it silently. 2685 */ 2686 tcp = (tcphdr_t *) fin->fin_dp; 2687 2688 if ((fin->fin_p == IPPROTO_TCP) && 2689 ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0))) 2690 return (-1); 2691 2692 /* 2693 * Step (1) 2694 * 2695 * We need to copy complete packet in case of IPv6, no trimming is 2696 * needed (except the L2 headers). 2697 */ 2698 icmp_pld_len = M_LEN(fin->fin_m); 2699 fin->fin_m->b_rptr += fin->fin_ipoff; 2700 if ((mblk_ip = copyb(fin->fin_m)) == NULL) 2701 return (-1); 2702 fin->fin_m->b_rptr -= fin->fin_ipoff; 2703 2704 /* 2705 * Step (2) 2706 * 2707 * Allocate and create ICMP header. 2708 */ 2709 mblk_icmp = (mblk_t *) allocb(sizeof (struct icmp6_hdr), 2710 BPRI_HI); 2711 2712 if (mblk_icmp == NULL) 2713 return (-1); 2714 2715 MTYPE(mblk_icmp) = M_DATA; 2716 icmp6 = (struct icmp6_hdr *) mblk_icmp->b_wptr; 2717 icmp6->icmp6_type = ICMP6_DST_UNREACH; 2718 icmp6->icmp6_code = fin->fin_icode & 0xFF; 2719 icmp6->icmp6_data32[0] = 0; 2720 mblk_icmp->b_wptr += sizeof (struct icmp6_hdr); 2721 2722 /* 2723 * Step (3) 2724 * 2725 * Link the copy of IP packet to ICMP header. 2726 */ 2727 linkb(mblk_icmp, mblk_ip); 2728 2729 /* 2730 * Step (4) 2731 * 2732 * Calculate chksum - this is much more easier task than in case of 2733 * IPv4 - ICMPv6 chksum only covers IP addresses, and payload length. 2734 * We are making compensation just for change of packet length. 2735 */ 2736 icmp6->icmp6_cksum = icmp_pld_len + sizeof (struct icmp6_hdr); 2737 2738 /* 2739 * Step (5) 2740 * 2741 * Swap IP addresses. 2742 */ 2743 tmp_src6 = fin->fin_ip6->ip6_src; 2744 fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst; 2745 fin->fin_ip6->ip6_dst = tmp_src6; 2746 2747 /* 2748 * and adjust IP header data. 2749 */ 2750 fin->fin_ip6->ip6_nxt = IPPROTO_ICMPV6; 2751 fin->fin_ip6->ip6_plen = htons(icmp_pld_len + sizeof (struct icmp6_hdr)); 2752 2753 /* 2754 * Step (6) 2755 * 2756 * We must release all linked mblks from original packet and keep only 2757 * the first mblk with IP header to link ICMP data. 2758 */ 2759 fin->fin_m->b_wptr = (unsigned char *) fin->fin_ip6 + sizeof (ip6_t); 2760 2761 if (fin->fin_m->b_cont != NULL) { 2762 FREE_MB_T(fin->fin_m->b_cont); 2763 } 2764 2765 /* 2766 * Append ICMP payload to IP header. 2767 */ 2768 linkb(fin->fin_m, mblk_icmp); 2769 2770 return (0); 2771 } 2772 #endif /* USE_INET6 */ 2773 2774 /* ------------------------------------------------------------------------ */ 2775 /* Function: fr_make_icmp */ 2776 /* Returns: int - 0 on success, -1 on failure */ 2777 /* Parameters: fin(I) - pointer to packet information */ 2778 /* */ 2779 /* We must alter the original mblks passed to IPF from IP stack via */ 2780 /* FW_HOOKS. The reasons why we must alter packet are discussed within */ 2781 /* comment at fr_make_rst() function. */ 2782 /* */ 2783 /* The fr_make_icmp() function acts as a wrapper, which passes the code */ 2784 /* execution to fr_make_icmp_v4() or fr_make_icmp_v6() depending on */ 2785 /* protocol version. However there are some details, which are common to */ 2786 /* both IP versions. The details are going to be explained here. */ 2787 /* */ 2788 /* The packet looks as follows: */ 2789 /* xxx | IP hdr | IP payload ... | */ 2790 /* ^ ^ ^ ^ */ 2791 /* | | | | */ 2792 /* | | | fin_m->b_wptr = fin->fin_dp + fin->fin_dlen */ 2793 /* | | | */ 2794 /* | | `- fin_m->fin_dp (in case of IPv4 points to L4 header) */ 2795 /* | | */ 2796 /* | `- fin_m->b_rptr + fin_ipoff (fin_ipoff is most likely 0 in case */ 2797 /* | of loopback) */ 2798 /* | */ 2799 /* `- fin_m->b_rptr - points to L2 header in case of physical NIC */ 2800 /* */ 2801 /* All relevant IP headers are pulled up into the first mblk. It happened */ 2802 /* well in advance before the matching rule was found (the rule, which took */ 2803 /* us here, to fr_make_icmp() function). */ 2804 /* */ 2805 /* Both functions will turn packet passed in fin->fin_m mblk into a new */ 2806 /* packet. New packet will be represented as chain of mblks. */ 2807 /* orig mblk |- b_cont ---. */ 2808 /* ^ `-> ICMP hdr |- b_cont--. */ 2809 /* | ^ `-> duped orig mblk */ 2810 /* | | ^ */ 2811 /* `- The original mblk | | */ 2812 /* will be trimmed to | | */ 2813 /* to contain IP header | | */ 2814 /* only | | */ 2815 /* | | */ 2816 /* `- This is newly | */ 2817 /* allocated mblk to | */ 2818 /* hold ICMPv6 data. | */ 2819 /* | */ 2820 /* | */ 2821 /* | */ 2822 /* This is the copy of original mblk, it will contain -' */ 2823 /* orignal IP packet in case of ICMPv6. In case of */ 2824 /* ICMPv4 it will contain up to 8 bytes of IP payload */ 2825 /* (TCP/UDP/L4) data from original packet. */ 2826 /* ------------------------------------------------------------------------ */ 2827 int fr_make_icmp(fin) 2828 fr_info_t *fin; 2829 { 2830 int rv; 2831 2832 if (fin->fin_v == 4) 2833 rv = fr_make_icmp_v4(fin); 2834 #ifdef USE_INET6 2835 else if (fin->fin_v == 6) 2836 rv = fr_make_icmp_v6(fin); 2837 #endif 2838 else 2839 rv = -1; 2840 2841 return (rv); 2842 } 2843 2844 /* ------------------------------------------------------------------------ */ 2845 /* Function: fr_buf_sum */ 2846 /* Returns: unsigned int - sum of buffer buf */ 2847 /* Parameters: buf - pointer to buf we want to sum up */ 2848 /* len - length of buffer buf */ 2849 /* */ 2850 /* Sums buffer buf. The result is used for chksum calculation. The buf */ 2851 /* argument must be aligned. */ 2852 /* ------------------------------------------------------------------------ */ 2853 static uint32_t fr_buf_sum(buf, len) 2854 const void *buf; 2855 unsigned int len; 2856 { 2857 uint32_t sum = 0; 2858 uint16_t *b = (uint16_t *)buf; 2859 2860 while (len > 1) { 2861 sum += *b++; 2862 len -= 2; 2863 } 2864 2865 if (len == 1) 2866 sum += htons((*(unsigned char *)b) << 8); 2867 2868 return (sum); 2869 } 2870 2871 /* ------------------------------------------------------------------------ */ 2872 /* Function: fr_calc_chksum */ 2873 /* Returns: void */ 2874 /* Parameters: fin - pointer to fr_info_t instance with packet data */ 2875 /* pkt - pointer to duplicated packet */ 2876 /* */ 2877 /* Calculates all chksums (L3, L4) for packet pkt. Works for both IP */ 2878 /* versions. */ 2879 /* ------------------------------------------------------------------------ */ 2880 void fr_calc_chksum(fin, pkt) 2881 fr_info_t *fin; 2882 mb_t *pkt; 2883 { 2884 struct pseudo_hdr { 2885 union { 2886 struct in_addr in4; 2887 #ifdef USE_INET6 2888 struct in6_addr in6; 2889 #endif 2890 } src_addr; 2891 union { 2892 struct in_addr in4; 2893 #ifdef USE_INET6 2894 struct in6_addr in6; 2895 #endif 2896 } dst_addr; 2897 char zero; 2898 char proto; 2899 uint16_t len; 2900 } phdr; 2901 uint32_t sum, ip_sum; 2902 void *buf; 2903 uint16_t *l4_csum_p; 2904 tcphdr_t *tcp; 2905 udphdr_t *udp; 2906 icmphdr_t *icmp; 2907 #ifdef USE_INET6 2908 struct icmp6_hdr *icmp6; 2909 #endif 2910 ip_t *ip; 2911 unsigned int len; 2912 int pld_len; 2913 2914 /* 2915 * We need to pullup the packet to the single continuous buffer to avoid 2916 * potential misaligment of b_rptr member in mblk chain. 2917 */ 2918 if (pullupmsg(pkt, -1) == 0) { 2919 cmn_err(CE_WARN, "Failed to pullup loopback pkt -> chksum" 2920 " will not be computed by IPF"); 2921 return; 2922 } 2923 2924 /* 2925 * It is guaranteed IP header starts right at b_rptr, because we are 2926 * working with a copy of the original packet. 2927 * 2928 * Compute pseudo header chksum for TCP and UDP. 2929 */ 2930 if ((fin->fin_p == IPPROTO_UDP) || 2931 (fin->fin_p == IPPROTO_TCP)) { 2932 bzero(&phdr, sizeof (phdr)); 2933 #ifdef USE_INET6 2934 if (fin->fin_v == 6) { 2935 phdr.src_addr.in6 = fin->fin_srcip6; 2936 phdr.dst_addr.in6 = fin->fin_dstip6; 2937 } else { 2938 phdr.src_addr.in4 = fin->fin_src; 2939 phdr.dst_addr.in4 = fin->fin_dst; 2940 } 2941 #else 2942 phdr.src_addr.in4 = fin->fin_src; 2943 phdr.dst_addr.in4 = fin->fin_dst; 2944 #endif 2945 phdr.zero = (char) 0; 2946 phdr.proto = fin->fin_p; 2947 phdr.len = htons((uint16_t)fin->fin_dlen); 2948 sum = fr_buf_sum(&phdr, (unsigned int)sizeof (phdr)); 2949 } else { 2950 sum = 0; 2951 } 2952 2953 /* 2954 * Set pointer to the L4 chksum field in the packet, set buf pointer to 2955 * the L4 header start. 2956 */ 2957 switch (fin->fin_p) { 2958 case IPPROTO_UDP: 2959 udp = (udphdr_t *)(pkt->b_rptr + fin->fin_hlen); 2960 l4_csum_p = &udp->uh_sum; 2961 buf = udp; 2962 break; 2963 case IPPROTO_TCP: 2964 tcp = (tcphdr_t *)(pkt->b_rptr + fin->fin_hlen); 2965 l4_csum_p = &tcp->th_sum; 2966 buf = tcp; 2967 break; 2968 case IPPROTO_ICMP: 2969 icmp = (icmphdr_t *)(pkt->b_rptr + fin->fin_hlen); 2970 l4_csum_p = &icmp->icmp_cksum; 2971 buf = icmp; 2972 break; 2973 #ifdef USE_INET6 2974 case IPPROTO_ICMPV6: 2975 icmp6 = (struct icmp6_hdr *)(pkt->b_rptr + fin->fin_hlen); 2976 l4_csum_p = &icmp6->icmp6_cksum; 2977 buf = icmp6; 2978 break; 2979 #endif 2980 default: 2981 l4_csum_p = NULL; 2982 } 2983 2984 /* 2985 * Compute L4 chksum if needed. 2986 */ 2987 if (l4_csum_p != NULL) { 2988 *l4_csum_p = (uint16_t)0; 2989 pld_len = fin->fin_dlen; 2990 len = pkt->b_wptr - (unsigned char *)buf; 2991 ASSERT(len == pld_len); 2992 /* 2993 * Add payload sum to pseudoheader sum. 2994 */ 2995 sum += fr_buf_sum(buf, len); 2996 while (sum >> 16) 2997 sum = (sum & 0xFFFF) + (sum >> 16); 2998 2999 *l4_csum_p = ~((uint16_t)sum); 3000 DTRACE_PROBE1(l4_sum, uint16_t, *l4_csum_p); 3001 } 3002 3003 /* 3004 * The IP header chksum is needed just for IPv4. 3005 */ 3006 if (fin->fin_v == 4) { 3007 /* 3008 * Compute IPv4 header chksum. 3009 */ 3010 ip = (ip_t *)pkt->b_rptr; 3011 ip->ip_sum = (uint16_t)0; 3012 ip_sum = fr_buf_sum(ip, (unsigned int)fin->fin_hlen); 3013 while (ip_sum >> 16) 3014 ip_sum = (ip_sum & 0xFFFF) + (ip_sum >> 16); 3015 3016 ip->ip_sum = ~((uint16_t)ip_sum); 3017 DTRACE_PROBE1(l3_sum, uint16_t, ip->ip_sum); 3018 } 3019 3020 return; 3021 } 3022 3023 #endif /* _KERNEL && SOLARIS2 >= 10 */ 3024