1 /* 2 * Copyright (C) 1993-2001, 2003 by Darren Reed. 3 * 4 * See the IPFILTER.LICENCE file for details on licencing. 5 * 6 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 7 * 8 * Copyright (c) 2013, Joyent, Inc. All rights reserved. 9 */ 10 11 #if !defined(lint) 12 static const char sccsid[] = "@(#)ip_fil_solaris.c 1.7 07/22/06 (C) 1993-2000 Darren Reed"; 13 static const char rcsid[] = "@(#)$Id: ip_fil_solaris.c,v 2.62.2.19 2005/07/13 21:40:46 darrenr Exp $"; 14 #endif 15 16 #include <sys/types.h> 17 #include <sys/errno.h> 18 #include <sys/param.h> 19 #include <sys/cpuvar.h> 20 #include <sys/open.h> 21 #include <sys/ioctl.h> 22 #include <sys/filio.h> 23 #include <sys/systm.h> 24 #include <sys/strsubr.h> 25 #include <sys/cred.h> 26 #include <sys/ddi.h> 27 #include <sys/sunddi.h> 28 #include <sys/ksynch.h> 29 #include <sys/kmem.h> 30 #include <sys/mkdev.h> 31 #include <sys/protosw.h> 32 #include <sys/socket.h> 33 #include <sys/dditypes.h> 34 #include <sys/cmn_err.h> 35 #include <sys/zone.h> 36 #include <net/if.h> 37 #include <net/af.h> 38 #include <net/route.h> 39 #include <netinet/in.h> 40 #include <netinet/in_systm.h> 41 #include <netinet/ip.h> 42 #include <netinet/ip_var.h> 43 #include <netinet/tcp.h> 44 #include <netinet/udp.h> 45 #include <netinet/tcpip.h> 46 #include <netinet/ip_icmp.h> 47 #include "netinet/ip_compat.h" 48 #ifdef USE_INET6 49 # include <netinet/icmp6.h> 50 #endif 51 #include "netinet/ip_fil.h" 52 #include "netinet/ip_nat.h" 53 #include "netinet/ip_frag.h" 54 #include "netinet/ip_state.h" 55 #include "netinet/ip_auth.h" 56 #include "netinet/ip_proxy.h" 57 #include "netinet/ipf_stack.h" 58 #ifdef IPFILTER_LOOKUP 59 # include "netinet/ip_lookup.h" 60 #endif 61 #include <inet/ip_ire.h> 62 63 #include <sys/md5.h> 64 #include <sys/neti.h> 65 66 static int frzerostats __P((caddr_t, ipf_stack_t *)); 67 static int fr_setipfloopback __P((int, ipf_stack_t *)); 68 static int fr_enableipf __P((ipf_stack_t *, int)); 69 static int fr_send_ip __P((fr_info_t *fin, mblk_t *m, mblk_t **mp)); 70 static int ipf_nic_event_v4 __P((hook_event_token_t, hook_data_t, void *)); 71 static int ipf_nic_event_v6 __P((hook_event_token_t, hook_data_t, void *)); 72 static int ipf_hook __P((hook_data_t, int, int, void *)); 73 static int ipf_hook4_in __P((hook_event_token_t, hook_data_t, void *)); 74 static int ipf_hook4_out __P((hook_event_token_t, hook_data_t, void *)); 75 static int ipf_hook4_loop_out __P((hook_event_token_t, hook_data_t, 76 void *)); 77 static int ipf_hook4_loop_in __P((hook_event_token_t, hook_data_t, void *)); 78 static int ipf_hook4 __P((hook_data_t, int, int, void *)); 79 static int ipf_hook6_out __P((hook_event_token_t, hook_data_t, void *)); 80 static int ipf_hook6_in __P((hook_event_token_t, hook_data_t, void *)); 81 static int ipf_hook6_loop_out __P((hook_event_token_t, hook_data_t, 82 void *)); 83 static int ipf_hook6_loop_in __P((hook_event_token_t, hook_data_t, 84 void *)); 85 static int ipf_hook6 __P((hook_data_t, int, int, void *)); 86 extern int ipf_geniter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); 87 extern int ipf_frruleiter __P((void *, int, void *, ipf_stack_t *)); 88 89 #if SOLARIS2 < 10 90 #if SOLARIS2 >= 7 91 u_int *ip_ttl_ptr = NULL; 92 u_int *ip_mtudisc = NULL; 93 # if SOLARIS2 >= 8 94 int *ip_forwarding = NULL; 95 u_int *ip6_forwarding = NULL; 96 # else 97 u_int *ip_forwarding = NULL; 98 # endif 99 #else 100 u_long *ip_ttl_ptr = NULL; 101 u_long *ip_mtudisc = NULL; 102 u_long *ip_forwarding = NULL; 103 #endif 104 #endif 105 106 vmem_t *ipf_minor; /* minor number arena */ 107 void *ipf_state; /* DDI state */ 108 109 /* 110 * GZ-controlled and per-zone stacks: 111 * 112 * For each non-global zone, we create two ipf stacks: the per-zone stack and 113 * the GZ-controlled stack. The per-zone stack can be controlled and observed 114 * from inside the zone or from the global zone. The GZ-controlled stack can 115 * only be controlled and observed from the global zone (though the rules 116 * still only affect that non-global zone). 117 * 118 * The two hooks are always arranged so that the GZ-controlled stack is always 119 * "outermost" with respect to the zone. The traffic flow then looks like 120 * this: 121 * 122 * Inbound: 123 * 124 * nic ---> [ GZ-controlled rules ] ---> [ per-zone rules ] ---> zone 125 * 126 * Outbound: 127 * 128 * nic <--- [ GZ-controlled rules ] <--- [ per-zone rules ] <--- zone 129 */ 130 131 /* IPv4 hook names */ 132 char *hook4_nicevents = "ipfilter_hook4_nicevents"; 133 char *hook4_nicevents_gz = "ipfilter_hook4_nicevents_gz"; 134 char *hook4_in = "ipfilter_hook4_in"; 135 char *hook4_in_gz = "ipfilter_hook4_in_gz"; 136 char *hook4_out = "ipfilter_hook4_out"; 137 char *hook4_out_gz = "ipfilter_hook4_out_gz"; 138 char *hook4_loop_in = "ipfilter_hook4_loop_in"; 139 char *hook4_loop_in_gz = "ipfilter_hook4_loop_in_gz"; 140 char *hook4_loop_out = "ipfilter_hook4_loop_out"; 141 char *hook4_loop_out_gz = "ipfilter_hook4_loop_out_gz"; 142 143 /* IPv6 hook names */ 144 char *hook6_nicevents = "ipfilter_hook6_nicevents"; 145 char *hook6_nicevents_gz = "ipfilter_hook6_nicevents_gz"; 146 char *hook6_in = "ipfilter_hook6_in"; 147 char *hook6_in_gz = "ipfilter_hook6_in_gz"; 148 char *hook6_out = "ipfilter_hook6_out"; 149 char *hook6_out_gz = "ipfilter_hook6_out_gz"; 150 char *hook6_loop_in = "ipfilter_hook6_loop_in"; 151 char *hook6_loop_in_gz = "ipfilter_hook6_loop_in_gz"; 152 char *hook6_loop_out = "ipfilter_hook6_loop_out"; 153 char *hook6_loop_out_gz = "ipfilter_hook6_loop_out_gz"; 154 155 /* ------------------------------------------------------------------------ */ 156 /* Function: ipldetach */ 157 /* Returns: int - 0 == success, else error. */ 158 /* Parameters: Nil */ 159 /* */ 160 /* This function is responsible for undoing anything that might have been */ 161 /* done in a call to iplattach(). It must be able to clean up from a call */ 162 /* to iplattach() that did not succeed. Why might that happen? Someone */ 163 /* configures a table to be so large that we cannot allocate enough memory */ 164 /* for it. */ 165 /* ------------------------------------------------------------------------ */ 166 int ipldetach(ifs) 167 ipf_stack_t *ifs; 168 { 169 170 ASSERT(RW_WRITE_HELD(&ifs->ifs_ipf_global.ipf_lk)); 171 172 #if SOLARIS2 < 10 173 174 if (ifs->ifs_fr_control_forwarding & 2) { 175 if (ip_forwarding != NULL) 176 *ip_forwarding = 0; 177 #if SOLARIS2 >= 8 178 if (ip6_forwarding != NULL) 179 *ip6_forwarding = 0; 180 #endif 181 } 182 #endif 183 184 /* 185 * This lock needs to be dropped around the net_hook_unregister calls 186 * because we can deadlock here with: 187 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs 188 * R(hook_family)->R(hei_lock)->R(ipf_global) (active hook running) 189 */ 190 RWLOCK_EXIT(&ifs->ifs_ipf_global); 191 192 #define UNDO_HOOK(_f, _b, _e, _h) \ 193 do { \ 194 if (ifs->_f != NULL) { \ 195 if (ifs->_b) { \ 196 int tmp = net_hook_unregister(ifs->_f, \ 197 _e, ifs->_h); \ 198 ifs->_b = (tmp != 0 && tmp != ENXIO); \ 199 if (!ifs->_b && ifs->_h != NULL) { \ 200 hook_free(ifs->_h); \ 201 ifs->_h = NULL; \ 202 } \ 203 } else if (ifs->_h != NULL) { \ 204 hook_free(ifs->_h); \ 205 ifs->_h = NULL; \ 206 } \ 207 } \ 208 _NOTE(CONSTCOND) \ 209 } while (0) 210 211 /* 212 * Remove IPv6 Hooks 213 */ 214 if (ifs->ifs_ipf_ipv6 != NULL) { 215 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_in, 216 NH_PHYSICAL_IN, ifs_ipfhook6_in); 217 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_out, 218 NH_PHYSICAL_OUT, ifs_ipfhook6_out); 219 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_nic_events, 220 NH_NIC_EVENTS, ifs_ipfhook6_nicevents); 221 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_in, 222 NH_LOOPBACK_IN, ifs_ipfhook6_loop_in); 223 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_out, 224 NH_LOOPBACK_OUT, ifs_ipfhook6_loop_out); 225 226 if (net_protocol_release(ifs->ifs_ipf_ipv6) != 0) 227 goto detach_failed; 228 ifs->ifs_ipf_ipv6 = NULL; 229 } 230 231 /* 232 * Remove IPv4 Hooks 233 */ 234 if (ifs->ifs_ipf_ipv4 != NULL) { 235 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_in, 236 NH_PHYSICAL_IN, ifs_ipfhook4_in); 237 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_out, 238 NH_PHYSICAL_OUT, ifs_ipfhook4_out); 239 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_nic_events, 240 NH_NIC_EVENTS, ifs_ipfhook4_nicevents); 241 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_in, 242 NH_LOOPBACK_IN, ifs_ipfhook4_loop_in); 243 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_out, 244 NH_LOOPBACK_OUT, ifs_ipfhook4_loop_out); 245 246 if (net_protocol_release(ifs->ifs_ipf_ipv4) != 0) 247 goto detach_failed; 248 ifs->ifs_ipf_ipv4 = NULL; 249 } 250 251 #undef UNDO_HOOK 252 253 #ifdef IPFDEBUG 254 cmn_err(CE_CONT, "ipldetach()\n"); 255 #endif 256 257 WRITE_ENTER(&ifs->ifs_ipf_global); 258 fr_deinitialise(ifs); 259 260 (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE, ifs); 261 (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE, ifs); 262 263 if (ifs->ifs_ipf_locks_done == 1) { 264 MUTEX_DESTROY(&ifs->ifs_ipf_timeoutlock); 265 MUTEX_DESTROY(&ifs->ifs_ipf_rw); 266 RW_DESTROY(&ifs->ifs_ipf_tokens); 267 RW_DESTROY(&ifs->ifs_ipf_ipidfrag); 268 ifs->ifs_ipf_locks_done = 0; 269 } 270 271 if (ifs->ifs_hook4_physical_in || ifs->ifs_hook4_physical_out || 272 ifs->ifs_hook4_nic_events || ifs->ifs_hook4_loopback_in || 273 ifs->ifs_hook4_loopback_out || ifs->ifs_hook6_nic_events || 274 ifs->ifs_hook6_physical_in || ifs->ifs_hook6_physical_out || 275 ifs->ifs_hook6_loopback_in || ifs->ifs_hook6_loopback_out) 276 return -1; 277 278 return 0; 279 280 detach_failed: 281 WRITE_ENTER(&ifs->ifs_ipf_global); 282 return -1; 283 } 284 285 int iplattach(ifs) 286 ipf_stack_t *ifs; 287 { 288 #if SOLARIS2 < 10 289 int i; 290 #endif 291 netid_t id = ifs->ifs_netid; 292 293 #ifdef IPFDEBUG 294 cmn_err(CE_CONT, "iplattach()\n"); 295 #endif 296 297 ASSERT(RW_WRITE_HELD(&ifs->ifs_ipf_global.ipf_lk)); 298 ifs->ifs_fr_flags = IPF_LOGGING; 299 #ifdef _KERNEL 300 ifs->ifs_fr_update_ipid = 0; 301 #else 302 ifs->ifs_fr_update_ipid = 1; 303 #endif 304 ifs->ifs_fr_minttl = 4; 305 ifs->ifs_fr_icmpminfragmtu = 68; 306 #if defined(IPFILTER_DEFAULT_BLOCK) 307 ifs->ifs_fr_pass = FR_BLOCK|FR_NOMATCH; 308 #else 309 ifs->ifs_fr_pass = (IPF_DEFAULT_PASS)|FR_NOMATCH; 310 #endif 311 312 bzero((char *)ifs->ifs_frcache, sizeof(ifs->ifs_frcache)); 313 MUTEX_INIT(&ifs->ifs_ipf_rw, "ipf rw mutex"); 314 MUTEX_INIT(&ifs->ifs_ipf_timeoutlock, "ipf timeout lock mutex"); 315 RWLOCK_INIT(&ifs->ifs_ipf_ipidfrag, "ipf IP NAT-Frag rwlock"); 316 RWLOCK_INIT(&ifs->ifs_ipf_tokens, "ipf token rwlock"); 317 ifs->ifs_ipf_locks_done = 1; 318 319 if (fr_initialise(ifs) < 0) 320 return -1; 321 322 /* 323 * For incoming packets, we want the GZ-controlled hooks to run before 324 * the per-zone hooks, regardless of what order they're are installed. 325 * See the "GZ-controlled and per-zone stacks" comment block at the top 326 * of this file. 327 */ 328 #define HOOK_INIT_GZ_BEFORE(x, fn, n, gzn, a) \ 329 HOOK_INIT(x, fn, ifs->ifs_gz_controlled ? gzn : n, ifs); \ 330 (x)->h_hint = ifs->ifs_gz_controlled ? HH_BEFORE : HH_AFTER; \ 331 (x)->h_hintvalue = (uintptr_t) (ifs->ifs_gz_controlled ? n : gzn); 332 333 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_nicevents, ipf_nic_event_v4, 334 hook4_nicevents, hook4_nicevents_gz, ifs); 335 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_in, ipf_hook4_in, 336 hook4_in, hook4_in_gz, ifs); 337 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_loop_in, ipf_hook4_loop_in, 338 hook4_loop_in, hook4_loop_in_gz, ifs); 339 340 /* 341 * For outgoing packets, we want the GZ-controlled hooks to run after 342 * the per-zone hooks, regardless of what order they're are installed. 343 * See the "GZ-controlled and per-zone stacks" comment block at the top 344 * of this file. 345 */ 346 #define HOOK_INIT_GZ_AFTER(x, fn, n, gzn, a) \ 347 HOOK_INIT(x, fn, ifs->ifs_gz_controlled ? gzn : n, ifs); \ 348 (x)->h_hint = ifs->ifs_gz_controlled ? HH_AFTER : HH_BEFORE; \ 349 (x)->h_hintvalue = (uintptr_t) (ifs->ifs_gz_controlled ? n : gzn); 350 351 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook4_out, ipf_hook4_out, 352 hook4_out, hook4_out_gz, ifs); 353 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook4_loop_out, ipf_hook4_loop_out, 354 hook4_loop_out, hook4_loop_out_gz, ifs); 355 356 /* 357 * If we hold this lock over all of the net_hook_register calls, we 358 * can cause a deadlock to occur with the following lock ordering: 359 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs 360 * R(hook_family)->R(hei_lock)->R(ipf_global) (packet path) 361 */ 362 RWLOCK_EXIT(&ifs->ifs_ipf_global); 363 364 /* 365 * Add IPv4 hooks 366 */ 367 ifs->ifs_ipf_ipv4 = net_protocol_lookup(id, NHF_INET); 368 if (ifs->ifs_ipf_ipv4 == NULL) 369 goto hookup_failed; 370 371 ifs->ifs_hook4_nic_events = (net_hook_register(ifs->ifs_ipf_ipv4, 372 NH_NIC_EVENTS, ifs->ifs_ipfhook4_nicevents) == 0); 373 if (!ifs->ifs_hook4_nic_events) 374 goto hookup_failed; 375 376 ifs->ifs_hook4_physical_in = (net_hook_register(ifs->ifs_ipf_ipv4, 377 NH_PHYSICAL_IN, ifs->ifs_ipfhook4_in) == 0); 378 if (!ifs->ifs_hook4_physical_in) 379 goto hookup_failed; 380 381 ifs->ifs_hook4_physical_out = (net_hook_register(ifs->ifs_ipf_ipv4, 382 NH_PHYSICAL_OUT, ifs->ifs_ipfhook4_out) == 0); 383 if (!ifs->ifs_hook4_physical_out) 384 goto hookup_failed; 385 386 if (ifs->ifs_ipf_loopback) { 387 ifs->ifs_hook4_loopback_in = (net_hook_register( 388 ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN, 389 ifs->ifs_ipfhook4_loop_in) == 0); 390 if (!ifs->ifs_hook4_loopback_in) 391 goto hookup_failed; 392 393 ifs->ifs_hook4_loopback_out = (net_hook_register( 394 ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT, 395 ifs->ifs_ipfhook4_loop_out) == 0); 396 if (!ifs->ifs_hook4_loopback_out) 397 goto hookup_failed; 398 } 399 400 /* 401 * Add IPv6 hooks 402 */ 403 ifs->ifs_ipf_ipv6 = net_protocol_lookup(id, NHF_INET6); 404 if (ifs->ifs_ipf_ipv6 == NULL) 405 goto hookup_failed; 406 407 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_nicevents, ipf_nic_event_v6, 408 hook6_nicevents, hook6_nicevents_gz, ifs); 409 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_in, ipf_hook6_in, 410 hook6_in, hook6_in_gz, ifs); 411 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_loop_in, ipf_hook6_loop_in, 412 hook6_loop_in, hook6_loop_in_gz, ifs); 413 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook6_out, ipf_hook6_out, 414 hook6_out, hook6_out_gz, ifs); 415 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook6_loop_out, ipf_hook6_loop_out, 416 hook6_loop_out, hook6_loop_out_gz, ifs); 417 418 ifs->ifs_hook6_nic_events = (net_hook_register(ifs->ifs_ipf_ipv6, 419 NH_NIC_EVENTS, ifs->ifs_ipfhook6_nicevents) == 0); 420 if (!ifs->ifs_hook6_nic_events) 421 goto hookup_failed; 422 423 ifs->ifs_hook6_physical_in = (net_hook_register(ifs->ifs_ipf_ipv6, 424 NH_PHYSICAL_IN, ifs->ifs_ipfhook6_in) == 0); 425 if (!ifs->ifs_hook6_physical_in) 426 goto hookup_failed; 427 428 ifs->ifs_hook6_physical_out = (net_hook_register(ifs->ifs_ipf_ipv6, 429 NH_PHYSICAL_OUT, ifs->ifs_ipfhook6_out) == 0); 430 if (!ifs->ifs_hook6_physical_out) 431 goto hookup_failed; 432 433 if (ifs->ifs_ipf_loopback) { 434 ifs->ifs_hook6_loopback_in = (net_hook_register( 435 ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN, 436 ifs->ifs_ipfhook6_loop_in) == 0); 437 if (!ifs->ifs_hook6_loopback_in) 438 goto hookup_failed; 439 440 ifs->ifs_hook6_loopback_out = (net_hook_register( 441 ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT, 442 ifs->ifs_ipfhook6_loop_out) == 0); 443 if (!ifs->ifs_hook6_loopback_out) 444 goto hookup_failed; 445 } 446 447 /* 448 * Reacquire ipf_global, now it is safe. 449 */ 450 WRITE_ENTER(&ifs->ifs_ipf_global); 451 452 /* Do not use private interface ip_params_arr[] in Solaris 10 */ 453 #if SOLARIS2 < 10 454 455 #if SOLARIS2 >= 8 456 ip_forwarding = &ip_g_forward; 457 #endif 458 /* 459 * XXX - There is no terminator for this array, so it is not possible 460 * to tell if what we are looking for is missing and go off the end 461 * of the array. 462 */ 463 464 #if SOLARIS2 <= 8 465 for (i = 0; ; i++) { 466 if (!strcmp(ip_param_arr[i].ip_param_name, "ip_def_ttl")) { 467 ip_ttl_ptr = &ip_param_arr[i].ip_param_value; 468 } else if (!strcmp(ip_param_arr[i].ip_param_name, 469 "ip_path_mtu_discovery")) { 470 ip_mtudisc = &ip_param_arr[i].ip_param_value; 471 } 472 #if SOLARIS2 < 8 473 else if (!strcmp(ip_param_arr[i].ip_param_name, 474 "ip_forwarding")) { 475 ip_forwarding = &ip_param_arr[i].ip_param_value; 476 } 477 #else 478 else if (!strcmp(ip_param_arr[i].ip_param_name, 479 "ip6_forwarding")) { 480 ip6_forwarding = &ip_param_arr[i].ip_param_value; 481 } 482 #endif 483 484 if (ip_mtudisc != NULL && ip_ttl_ptr != NULL && 485 #if SOLARIS2 >= 8 486 ip6_forwarding != NULL && 487 #endif 488 ip_forwarding != NULL) 489 break; 490 } 491 #endif 492 493 if (ifs->ifs_fr_control_forwarding & 1) { 494 if (ip_forwarding != NULL) 495 *ip_forwarding = 1; 496 #if SOLARIS2 >= 8 497 if (ip6_forwarding != NULL) 498 *ip6_forwarding = 1; 499 #endif 500 } 501 502 #endif 503 504 return 0; 505 hookup_failed: 506 WRITE_ENTER(&ifs->ifs_ipf_global); 507 return -1; 508 } 509 510 static int fr_setipfloopback(set, ifs) 511 int set; 512 ipf_stack_t *ifs; 513 { 514 if (ifs->ifs_ipf_ipv4 == NULL || ifs->ifs_ipf_ipv6 == NULL) 515 return EFAULT; 516 517 if (set && !ifs->ifs_ipf_loopback) { 518 ifs->ifs_ipf_loopback = 1; 519 520 ifs->ifs_hook4_loopback_in = (net_hook_register( 521 ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN, 522 ifs->ifs_ipfhook4_loop_in) == 0); 523 if (!ifs->ifs_hook4_loopback_in) 524 return EINVAL; 525 526 ifs->ifs_hook4_loopback_out = (net_hook_register( 527 ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT, 528 ifs->ifs_ipfhook4_loop_out) == 0); 529 if (!ifs->ifs_hook4_loopback_out) 530 return EINVAL; 531 532 ifs->ifs_hook6_loopback_in = (net_hook_register( 533 ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN, 534 ifs->ifs_ipfhook6_loop_in) == 0); 535 if (!ifs->ifs_hook6_loopback_in) 536 return EINVAL; 537 538 ifs->ifs_hook6_loopback_out = (net_hook_register( 539 ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT, 540 ifs->ifs_ipfhook6_loop_out) == 0); 541 if (!ifs->ifs_hook6_loopback_out) 542 return EINVAL; 543 544 } else if (!set && ifs->ifs_ipf_loopback) { 545 ifs->ifs_ipf_loopback = 0; 546 547 ifs->ifs_hook4_loopback_in = 548 (net_hook_unregister(ifs->ifs_ipf_ipv4, 549 NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0); 550 if (ifs->ifs_hook4_loopback_in) 551 return EBUSY; 552 553 ifs->ifs_hook4_loopback_out = 554 (net_hook_unregister(ifs->ifs_ipf_ipv4, 555 NH_LOOPBACK_OUT, ifs->ifs_ipfhook4_loop_out) != 0); 556 if (ifs->ifs_hook4_loopback_out) 557 return EBUSY; 558 559 ifs->ifs_hook6_loopback_in = 560 (net_hook_unregister(ifs->ifs_ipf_ipv6, 561 NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0); 562 if (ifs->ifs_hook6_loopback_in) 563 return EBUSY; 564 565 ifs->ifs_hook6_loopback_out = 566 (net_hook_unregister(ifs->ifs_ipf_ipv6, 567 NH_LOOPBACK_OUT, ifs->ifs_ipfhook6_loop_out) != 0); 568 if (ifs->ifs_hook6_loopback_out) 569 return EBUSY; 570 } 571 return 0; 572 } 573 574 575 /* 576 * Filter ioctl interface. 577 */ 578 /*ARGSUSED*/ 579 int iplioctl(dev, cmd, data, mode, cp, rp) 580 dev_t dev; 581 int cmd; 582 #if SOLARIS2 >= 7 583 intptr_t data; 584 #else 585 int *data; 586 #endif 587 int mode; 588 cred_t *cp; 589 int *rp; 590 { 591 int error = 0, tmp; 592 friostat_t fio; 593 minor_t unit; 594 u_int enable; 595 ipf_stack_t *ifs; 596 zoneid_t zid; 597 ipf_devstate_t *isp; 598 599 #ifdef IPFDEBUG 600 cmn_err(CE_CONT, "iplioctl(%x,%x,%x,%d,%x,%d)\n", 601 dev, cmd, data, mode, cp, rp); 602 #endif 603 unit = getminor(dev); 604 605 isp = ddi_get_soft_state(ipf_state, unit); 606 if (isp == NULL) 607 return ENXIO; 608 unit = isp->ipfs_minor; 609 610 zid = crgetzoneid(cp); 611 if (cmd == SIOCIPFZONESET) { 612 if (zid == GLOBAL_ZONEID) 613 return fr_setzoneid(isp, (caddr_t) data); 614 return EACCES; 615 } 616 617 /* 618 * ipf_find_stack returns with a read lock on ifs_ipf_global 619 */ 620 ifs = ipf_find_stack(zid, isp); 621 if (ifs == NULL) 622 return ENXIO; 623 624 if (ifs->ifs_fr_running <= 0) { 625 if (unit != IPL_LOGIPF) { 626 RWLOCK_EXIT(&ifs->ifs_ipf_global); 627 return EIO; 628 } 629 if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET && 630 cmd != SIOCIPFSET && cmd != SIOCFRENB && 631 cmd != SIOCGETFS && cmd != SIOCGETFF) { 632 RWLOCK_EXIT(&ifs->ifs_ipf_global); 633 return EIO; 634 } 635 } 636 637 if (ifs->ifs_fr_enable_active != 0) { 638 RWLOCK_EXIT(&ifs->ifs_ipf_global); 639 return EBUSY; 640 } 641 642 error = fr_ioctlswitch(unit, (caddr_t)data, cmd, mode, crgetuid(cp), 643 curproc, ifs); 644 if (error != -1) { 645 RWLOCK_EXIT(&ifs->ifs_ipf_global); 646 return error; 647 } 648 error = 0; 649 650 switch (cmd) 651 { 652 case SIOCFRENB : 653 if (!(mode & FWRITE)) 654 error = EPERM; 655 else { 656 error = COPYIN((caddr_t)data, (caddr_t)&enable, 657 sizeof(enable)); 658 if (error != 0) { 659 error = EFAULT; 660 break; 661 } 662 663 RWLOCK_EXIT(&ifs->ifs_ipf_global); 664 WRITE_ENTER(&ifs->ifs_ipf_global); 665 666 /* 667 * We must recheck fr_enable_active here, since we've 668 * dropped ifs_ipf_global from R in order to get it 669 * exclusively. 670 */ 671 if (ifs->ifs_fr_enable_active == 0) { 672 ifs->ifs_fr_enable_active = 1; 673 error = fr_enableipf(ifs, enable); 674 ifs->ifs_fr_enable_active = 0; 675 } 676 } 677 break; 678 case SIOCIPFSET : 679 if (!(mode & FWRITE)) { 680 error = EPERM; 681 break; 682 } 683 /* FALLTHRU */ 684 case SIOCIPFGETNEXT : 685 case SIOCIPFGET : 686 error = fr_ipftune(cmd, (void *)data, ifs); 687 break; 688 case SIOCSETFF : 689 if (!(mode & FWRITE)) 690 error = EPERM; 691 else { 692 error = COPYIN((caddr_t)data, 693 (caddr_t)&ifs->ifs_fr_flags, 694 sizeof(ifs->ifs_fr_flags)); 695 if (error != 0) 696 error = EFAULT; 697 } 698 break; 699 case SIOCIPFLP : 700 error = COPYIN((caddr_t)data, (caddr_t)&tmp, 701 sizeof(tmp)); 702 if (error != 0) 703 error = EFAULT; 704 else 705 error = fr_setipfloopback(tmp, ifs); 706 break; 707 case SIOCGETFF : 708 error = COPYOUT((caddr_t)&ifs->ifs_fr_flags, (caddr_t)data, 709 sizeof(ifs->ifs_fr_flags)); 710 if (error != 0) 711 error = EFAULT; 712 break; 713 case SIOCFUNCL : 714 error = fr_resolvefunc((void *)data); 715 break; 716 case SIOCINAFR : 717 case SIOCRMAFR : 718 case SIOCADAFR : 719 case SIOCZRLST : 720 if (!(mode & FWRITE)) 721 error = EPERM; 722 else 723 error = frrequest(unit, cmd, (caddr_t)data, 724 ifs->ifs_fr_active, 1, ifs); 725 break; 726 case SIOCINIFR : 727 case SIOCRMIFR : 728 case SIOCADIFR : 729 if (!(mode & FWRITE)) 730 error = EPERM; 731 else 732 error = frrequest(unit, cmd, (caddr_t)data, 733 1 - ifs->ifs_fr_active, 1, ifs); 734 break; 735 case SIOCSWAPA : 736 if (!(mode & FWRITE)) 737 error = EPERM; 738 else { 739 WRITE_ENTER(&ifs->ifs_ipf_mutex); 740 bzero((char *)ifs->ifs_frcache, 741 sizeof (ifs->ifs_frcache)); 742 error = COPYOUT((caddr_t)&ifs->ifs_fr_active, 743 (caddr_t)data, 744 sizeof(ifs->ifs_fr_active)); 745 if (error != 0) 746 error = EFAULT; 747 else 748 ifs->ifs_fr_active = 1 - ifs->ifs_fr_active; 749 RWLOCK_EXIT(&ifs->ifs_ipf_mutex); 750 } 751 break; 752 case SIOCGETFS : 753 fr_getstat(&fio, ifs); 754 error = fr_outobj((void *)data, &fio, IPFOBJ_IPFSTAT); 755 break; 756 case SIOCFRZST : 757 if (!(mode & FWRITE)) 758 error = EPERM; 759 else 760 error = fr_zerostats((caddr_t)data, ifs); 761 break; 762 case SIOCIPFFL : 763 if (!(mode & FWRITE)) 764 error = EPERM; 765 else { 766 error = COPYIN((caddr_t)data, (caddr_t)&tmp, 767 sizeof(tmp)); 768 if (!error) { 769 tmp = frflush(unit, 4, tmp, ifs); 770 error = COPYOUT((caddr_t)&tmp, (caddr_t)data, 771 sizeof(tmp)); 772 if (error != 0) 773 error = EFAULT; 774 } else 775 error = EFAULT; 776 } 777 break; 778 #ifdef USE_INET6 779 case SIOCIPFL6 : 780 if (!(mode & FWRITE)) 781 error = EPERM; 782 else { 783 error = COPYIN((caddr_t)data, (caddr_t)&tmp, 784 sizeof(tmp)); 785 if (!error) { 786 tmp = frflush(unit, 6, tmp, ifs); 787 error = COPYOUT((caddr_t)&tmp, (caddr_t)data, 788 sizeof(tmp)); 789 if (error != 0) 790 error = EFAULT; 791 } else 792 error = EFAULT; 793 } 794 break; 795 #endif 796 case SIOCSTLCK : 797 error = COPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp)); 798 if (error == 0) { 799 ifs->ifs_fr_state_lock = tmp; 800 ifs->ifs_fr_nat_lock = tmp; 801 ifs->ifs_fr_frag_lock = tmp; 802 ifs->ifs_fr_auth_lock = tmp; 803 } else 804 error = EFAULT; 805 break; 806 #ifdef IPFILTER_LOG 807 case SIOCIPFFB : 808 if (!(mode & FWRITE)) 809 error = EPERM; 810 else { 811 tmp = ipflog_clear(unit, ifs); 812 error = COPYOUT((caddr_t)&tmp, (caddr_t)data, 813 sizeof(tmp)); 814 if (error) 815 error = EFAULT; 816 } 817 break; 818 #endif /* IPFILTER_LOG */ 819 case SIOCFRSYN : 820 if (!(mode & FWRITE)) 821 error = EPERM; 822 else { 823 RWLOCK_EXIT(&ifs->ifs_ipf_global); 824 WRITE_ENTER(&ifs->ifs_ipf_global); 825 826 frsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs); 827 fr_natifpsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs); 828 fr_nataddrsync(0, NULL, NULL, ifs); 829 fr_statesync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs); 830 error = 0; 831 } 832 break; 833 case SIOCGFRST : 834 error = fr_outobj((void *)data, fr_fragstats(ifs), 835 IPFOBJ_FRAGSTAT); 836 break; 837 case FIONREAD : 838 #ifdef IPFILTER_LOG 839 tmp = (int)ifs->ifs_iplused[IPL_LOGIPF]; 840 841 error = COPYOUT((caddr_t)&tmp, (caddr_t)data, sizeof(tmp)); 842 if (error != 0) 843 error = EFAULT; 844 #endif 845 break; 846 case SIOCIPFITER : 847 error = ipf_frruleiter((caddr_t)data, crgetuid(cp), 848 curproc, ifs); 849 break; 850 851 case SIOCGENITER : 852 error = ipf_genericiter((caddr_t)data, crgetuid(cp), 853 curproc, ifs); 854 break; 855 856 case SIOCIPFDELTOK : 857 error = BCOPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp)); 858 if (error != 0) { 859 error = EFAULT; 860 } else { 861 error = ipf_deltoken(tmp, crgetuid(cp), curproc, ifs); 862 } 863 break; 864 865 default : 866 #ifdef IPFDEBUG 867 cmn_err(CE_NOTE, "Unknown: cmd 0x%x data %p", 868 cmd, (void *)data); 869 #endif 870 error = EINVAL; 871 break; 872 } 873 RWLOCK_EXIT(&ifs->ifs_ipf_global); 874 return error; 875 } 876 877 878 static int fr_enableipf(ifs, enable) 879 ipf_stack_t *ifs; 880 int enable; 881 { 882 int error; 883 884 if (!enable) { 885 error = ipldetach(ifs); 886 if (error == 0) 887 ifs->ifs_fr_running = -1; 888 return error; 889 } 890 891 if (ifs->ifs_fr_running > 0) 892 return 0; 893 894 error = iplattach(ifs); 895 if (error == 0) { 896 if (ifs->ifs_fr_timer_id == NULL) { 897 int hz = drv_usectohz(500000); 898 899 ifs->ifs_fr_timer_id = timeout(fr_slowtimer, 900 (void *)ifs, 901 hz); 902 } 903 ifs->ifs_fr_running = 1; 904 } else { 905 (void) ipldetach(ifs); 906 } 907 return error; 908 } 909 910 911 phy_if_t get_unit(name, v, ifs) 912 char *name; 913 int v; 914 ipf_stack_t *ifs; 915 { 916 net_handle_t nif; 917 918 if (v == 4) 919 nif = ifs->ifs_ipf_ipv4; 920 else if (v == 6) 921 nif = ifs->ifs_ipf_ipv6; 922 else 923 return 0; 924 925 return (net_phylookup(nif, name)); 926 } 927 928 /* 929 * routines below for saving IP headers to buffer 930 */ 931 /*ARGSUSED*/ 932 int iplopen(devp, flags, otype, cred) 933 dev_t *devp; 934 int flags, otype; 935 cred_t *cred; 936 { 937 ipf_devstate_t *isp; 938 minor_t min = getminor(*devp); 939 minor_t minor; 940 941 #ifdef IPFDEBUG 942 cmn_err(CE_CONT, "iplopen(%x,%x,%x,%x)\n", devp, flags, otype, cred); 943 #endif 944 if (!(otype & OTYP_CHR)) 945 return ENXIO; 946 947 if (IPL_LOGMAX < min) 948 return ENXIO; 949 950 minor = (minor_t)(uintptr_t)vmem_alloc(ipf_minor, 1, 951 VM_BESTFIT | VM_SLEEP); 952 953 if (ddi_soft_state_zalloc(ipf_state, minor) != 0) { 954 vmem_free(ipf_minor, (void *)(uintptr_t)minor, 1); 955 return ENXIO; 956 } 957 958 *devp = makedevice(getmajor(*devp), minor); 959 isp = ddi_get_soft_state(ipf_state, minor); 960 VERIFY(isp != NULL); 961 962 isp->ipfs_minor = min; 963 isp->ipfs_zoneid = IPFS_ZONE_UNSET; 964 965 return 0; 966 } 967 968 969 /*ARGSUSED*/ 970 int iplclose(dev, flags, otype, cred) 971 dev_t dev; 972 int flags, otype; 973 cred_t *cred; 974 { 975 minor_t min = getminor(dev); 976 977 #ifdef IPFDEBUG 978 cmn_err(CE_CONT, "iplclose(%x,%x,%x,%x)\n", dev, flags, otype, cred); 979 #endif 980 981 if (IPL_LOGMAX < min) 982 return ENXIO; 983 984 ddi_soft_state_free(ipf_state, min); 985 vmem_free(ipf_minor, (void *)(uintptr_t)min, 1); 986 987 return 0; 988 } 989 990 #ifdef IPFILTER_LOG 991 /* 992 * iplread/ipllog 993 * both of these must operate with at least splnet() lest they be 994 * called during packet processing and cause an inconsistancy to appear in 995 * the filter lists. 996 */ 997 /*ARGSUSED*/ 998 int iplread(dev, uio, cp) 999 dev_t dev; 1000 register struct uio *uio; 1001 cred_t *cp; 1002 { 1003 ipf_stack_t *ifs; 1004 int ret; 1005 minor_t unit; 1006 ipf_devstate_t *isp; 1007 1008 unit = getminor(dev); 1009 isp = ddi_get_soft_state(ipf_state, unit); 1010 if (isp == NULL) 1011 return ENXIO; 1012 unit = isp->ipfs_minor; 1013 1014 1015 /* 1016 * ipf_find_stack returns with a read lock on ifs_ipf_global 1017 */ 1018 ifs = ipf_find_stack(crgetzoneid(cp), isp); 1019 if (ifs == NULL) 1020 return ENXIO; 1021 1022 # ifdef IPFDEBUG 1023 cmn_err(CE_CONT, "iplread(%x,%x,%x)\n", dev, uio, cp); 1024 # endif 1025 1026 if (ifs->ifs_fr_running < 1) { 1027 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1028 return EIO; 1029 } 1030 1031 # ifdef IPFILTER_SYNC 1032 if (unit == IPL_LOGSYNC) { 1033 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1034 return ipfsync_read(uio); 1035 } 1036 # endif 1037 1038 ret = ipflog_read(unit, uio, ifs); 1039 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1040 return ret; 1041 } 1042 #endif /* IPFILTER_LOG */ 1043 1044 1045 /* 1046 * iplread/ipllog 1047 * both of these must operate with at least splnet() lest they be 1048 * called during packet processing and cause an inconsistancy to appear in 1049 * the filter lists. 1050 */ 1051 int iplwrite(dev, uio, cp) 1052 dev_t dev; 1053 register struct uio *uio; 1054 cred_t *cp; 1055 { 1056 ipf_stack_t *ifs; 1057 minor_t unit; 1058 ipf_devstate_t *isp; 1059 1060 unit = getminor(dev); 1061 isp = ddi_get_soft_state(ipf_state, unit); 1062 if (isp == NULL) 1063 return ENXIO; 1064 unit = isp->ipfs_minor; 1065 1066 /* 1067 * ipf_find_stack returns with a read lock on ifs_ipf_global 1068 */ 1069 ifs = ipf_find_stack(crgetzoneid(cp), isp); 1070 if (ifs == NULL) 1071 return ENXIO; 1072 1073 #ifdef IPFDEBUG 1074 cmn_err(CE_CONT, "iplwrite(%x,%x,%x)\n", dev, uio, cp); 1075 #endif 1076 1077 if (ifs->ifs_fr_running < 1) { 1078 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1079 return EIO; 1080 } 1081 1082 #ifdef IPFILTER_SYNC 1083 if (getminor(dev) == IPL_LOGSYNC) { 1084 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1085 return ipfsync_write(uio); 1086 } 1087 #endif /* IPFILTER_SYNC */ 1088 dev = dev; /* LINT */ 1089 uio = uio; /* LINT */ 1090 cp = cp; /* LINT */ 1091 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1092 return ENXIO; 1093 } 1094 1095 1096 /* 1097 * fr_send_reset - this could conceivably be a call to tcp_respond(), but that 1098 * requires a large amount of setting up and isn't any more efficient. 1099 */ 1100 int fr_send_reset(fin) 1101 fr_info_t *fin; 1102 { 1103 tcphdr_t *tcp, *tcp2; 1104 int tlen, hlen; 1105 mblk_t *m; 1106 #ifdef USE_INET6 1107 ip6_t *ip6; 1108 #endif 1109 ip_t *ip; 1110 1111 tcp = fin->fin_dp; 1112 if (tcp->th_flags & TH_RST) 1113 return -1; 1114 1115 #ifndef IPFILTER_CKSUM 1116 if (fr_checkl4sum(fin) == -1) 1117 return -1; 1118 #endif 1119 1120 tlen = (tcp->th_flags & (TH_SYN|TH_FIN)) ? 1 : 0; 1121 #ifdef USE_INET6 1122 if (fin->fin_v == 6) 1123 hlen = sizeof(ip6_t); 1124 else 1125 #endif 1126 hlen = sizeof(ip_t); 1127 hlen += sizeof(*tcp2); 1128 if ((m = (mblk_t *)allocb(hlen + 64, BPRI_HI)) == NULL) 1129 return -1; 1130 1131 m->b_rptr += 64; 1132 MTYPE(m) = M_DATA; 1133 m->b_wptr = m->b_rptr + hlen; 1134 ip = (ip_t *)m->b_rptr; 1135 bzero((char *)ip, hlen); 1136 tcp2 = (struct tcphdr *)(m->b_rptr + hlen - sizeof(*tcp2)); 1137 tcp2->th_dport = tcp->th_sport; 1138 tcp2->th_sport = tcp->th_dport; 1139 if (tcp->th_flags & TH_ACK) { 1140 tcp2->th_seq = tcp->th_ack; 1141 tcp2->th_flags = TH_RST; 1142 } else { 1143 tcp2->th_ack = ntohl(tcp->th_seq); 1144 tcp2->th_ack += tlen; 1145 tcp2->th_ack = htonl(tcp2->th_ack); 1146 tcp2->th_flags = TH_RST|TH_ACK; 1147 } 1148 tcp2->th_off = sizeof(struct tcphdr) >> 2; 1149 1150 ip->ip_v = fin->fin_v; 1151 #ifdef USE_INET6 1152 if (fin->fin_v == 6) { 1153 ip6 = (ip6_t *)m->b_rptr; 1154 ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow; 1155 ip6->ip6_src = fin->fin_dst6.in6; 1156 ip6->ip6_dst = fin->fin_src6.in6; 1157 ip6->ip6_plen = htons(sizeof(*tcp)); 1158 ip6->ip6_nxt = IPPROTO_TCP; 1159 tcp2->th_sum = fr_cksum(m, (ip_t *)ip6, IPPROTO_TCP, tcp2); 1160 } else 1161 #endif 1162 { 1163 ip->ip_src.s_addr = fin->fin_daddr; 1164 ip->ip_dst.s_addr = fin->fin_saddr; 1165 ip->ip_id = fr_nextipid(fin); 1166 ip->ip_hl = sizeof(*ip) >> 2; 1167 ip->ip_p = IPPROTO_TCP; 1168 ip->ip_len = sizeof(*ip) + sizeof(*tcp); 1169 ip->ip_tos = fin->fin_ip->ip_tos; 1170 tcp2->th_sum = fr_cksum(m, ip, IPPROTO_TCP, tcp2); 1171 } 1172 return fr_send_ip(fin, m, &m); 1173 } 1174 1175 /* 1176 * Function: fr_send_ip 1177 * Returns: 0: success 1178 * -1: failed 1179 * Parameters: 1180 * fin: packet information 1181 * m: the message block where ip head starts 1182 * 1183 * Send a new packet through the IP stack. 1184 * 1185 * For IPv4 packets, ip_len must be in host byte order, and ip_v, 1186 * ip_ttl, ip_off, and ip_sum are ignored (filled in by this 1187 * function). 1188 * 1189 * For IPv6 packets, ip6_flow, ip6_vfc, and ip6_hlim are filled 1190 * in by this function. 1191 * 1192 * All other portions of the packet must be in on-the-wire format. 1193 */ 1194 /*ARGSUSED*/ 1195 static int fr_send_ip(fin, m, mpp) 1196 fr_info_t *fin; 1197 mblk_t *m, **mpp; 1198 { 1199 qpktinfo_t qpi, *qpip; 1200 fr_info_t fnew; 1201 ip_t *ip; 1202 int i, hlen; 1203 ipf_stack_t *ifs = fin->fin_ifs; 1204 1205 ip = (ip_t *)m->b_rptr; 1206 bzero((char *)&fnew, sizeof(fnew)); 1207 1208 #ifdef USE_INET6 1209 if (fin->fin_v == 6) { 1210 ip6_t *ip6; 1211 1212 ip6 = (ip6_t *)ip; 1213 ip6->ip6_vfc = 0x60; 1214 ip6->ip6_hlim = 127; 1215 fnew.fin_v = 6; 1216 hlen = sizeof(*ip6); 1217 fnew.fin_plen = ntohs(ip6->ip6_plen) + hlen; 1218 } else 1219 #endif 1220 { 1221 fnew.fin_v = 4; 1222 #if SOLARIS2 >= 10 1223 ip->ip_ttl = 255; 1224 if (net_getpmtuenabled(ifs->ifs_ipf_ipv4) == 1) 1225 ip->ip_off = htons(IP_DF); 1226 #else 1227 if (ip_ttl_ptr != NULL) 1228 ip->ip_ttl = (u_char)(*ip_ttl_ptr); 1229 else 1230 ip->ip_ttl = 63; 1231 if (ip_mtudisc != NULL) 1232 ip->ip_off = htons(*ip_mtudisc ? IP_DF : 0); 1233 else 1234 ip->ip_off = htons(IP_DF); 1235 #endif 1236 /* 1237 * The dance with byte order and ip_len/ip_off is because in 1238 * fr_fastroute, it expects them to be in host byte order but 1239 * ipf_cksum expects them to be in network byte order. 1240 */ 1241 ip->ip_len = htons(ip->ip_len); 1242 ip->ip_sum = ipf_cksum((u_short *)ip, sizeof(*ip)); 1243 ip->ip_len = ntohs(ip->ip_len); 1244 ip->ip_off = ntohs(ip->ip_off); 1245 hlen = sizeof(*ip); 1246 fnew.fin_plen = ip->ip_len; 1247 } 1248 1249 qpip = fin->fin_qpi; 1250 qpi.qpi_off = 0; 1251 qpi.qpi_ill = qpip->qpi_ill; 1252 qpi.qpi_m = m; 1253 qpi.qpi_data = ip; 1254 fnew.fin_qpi = &qpi; 1255 fnew.fin_ifp = fin->fin_ifp; 1256 fnew.fin_flx = FI_NOCKSUM; 1257 fnew.fin_m = m; 1258 fnew.fin_qfm = m; 1259 fnew.fin_ip = ip; 1260 fnew.fin_mp = mpp; 1261 fnew.fin_hlen = hlen; 1262 fnew.fin_dp = (char *)ip + hlen; 1263 fnew.fin_ifs = fin->fin_ifs; 1264 (void) fr_makefrip(hlen, ip, &fnew); 1265 1266 i = fr_fastroute(m, mpp, &fnew, NULL); 1267 return i; 1268 } 1269 1270 1271 int fr_send_icmp_err(type, fin, dst) 1272 int type; 1273 fr_info_t *fin; 1274 int dst; 1275 { 1276 struct in_addr dst4; 1277 struct icmp *icmp; 1278 qpktinfo_t *qpi; 1279 int hlen, code; 1280 phy_if_t phy; 1281 u_short sz; 1282 #ifdef USE_INET6 1283 mblk_t *mb; 1284 #endif 1285 mblk_t *m; 1286 #ifdef USE_INET6 1287 ip6_t *ip6; 1288 #endif 1289 ip_t *ip; 1290 ipf_stack_t *ifs = fin->fin_ifs; 1291 1292 if ((type < 0) || (type > ICMP_MAXTYPE)) 1293 return -1; 1294 1295 code = fin->fin_icode; 1296 #ifdef USE_INET6 1297 if ((code < 0) || (code >= ICMP_MAX_UNREACH)) 1298 return -1; 1299 #endif 1300 1301 #ifndef IPFILTER_CKSUM 1302 if (fr_checkl4sum(fin) == -1) 1303 return -1; 1304 #endif 1305 1306 qpi = fin->fin_qpi; 1307 1308 #ifdef USE_INET6 1309 mb = fin->fin_qfm; 1310 1311 if (fin->fin_v == 6) { 1312 sz = sizeof(ip6_t); 1313 sz += MIN(mb->b_wptr - mb->b_rptr, 512); 1314 hlen = sizeof(ip6_t); 1315 type = icmptoicmp6types[type]; 1316 if (type == ICMP6_DST_UNREACH) 1317 code = icmptoicmp6unreach[code]; 1318 } else 1319 #endif 1320 { 1321 if ((fin->fin_p == IPPROTO_ICMP) && 1322 !(fin->fin_flx & FI_SHORT)) 1323 switch (ntohs(fin->fin_data[0]) >> 8) 1324 { 1325 case ICMP_ECHO : 1326 case ICMP_TSTAMP : 1327 case ICMP_IREQ : 1328 case ICMP_MASKREQ : 1329 break; 1330 default : 1331 return 0; 1332 } 1333 1334 sz = sizeof(ip_t) * 2; 1335 sz += 8; /* 64 bits of data */ 1336 hlen = sizeof(ip_t); 1337 } 1338 1339 sz += offsetof(struct icmp, icmp_ip); 1340 if ((m = (mblk_t *)allocb((size_t)sz + 64, BPRI_HI)) == NULL) 1341 return -1; 1342 MTYPE(m) = M_DATA; 1343 m->b_rptr += 64; 1344 m->b_wptr = m->b_rptr + sz; 1345 bzero((char *)m->b_rptr, (size_t)sz); 1346 ip = (ip_t *)m->b_rptr; 1347 ip->ip_v = fin->fin_v; 1348 icmp = (struct icmp *)(m->b_rptr + hlen); 1349 icmp->icmp_type = type & 0xff; 1350 icmp->icmp_code = code & 0xff; 1351 phy = (phy_if_t)qpi->qpi_ill; 1352 if (type == ICMP_UNREACH && (phy != 0) && 1353 fin->fin_icode == ICMP_UNREACH_NEEDFRAG) 1354 icmp->icmp_nextmtu = net_getmtu(ifs->ifs_ipf_ipv4, phy,0 ); 1355 1356 #ifdef USE_INET6 1357 if (fin->fin_v == 6) { 1358 struct in6_addr dst6; 1359 int csz; 1360 1361 if (dst == 0) { 1362 ipf_stack_t *ifs = fin->fin_ifs; 1363 1364 if (fr_ifpaddr(6, FRI_NORMAL, (void *)phy, 1365 (void *)&dst6, NULL, ifs) == -1) { 1366 FREE_MB_T(m); 1367 return -1; 1368 } 1369 } else 1370 dst6 = fin->fin_dst6.in6; 1371 1372 csz = sz; 1373 sz -= sizeof(ip6_t); 1374 ip6 = (ip6_t *)m->b_rptr; 1375 ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow; 1376 ip6->ip6_plen = htons((u_short)sz); 1377 ip6->ip6_nxt = IPPROTO_ICMPV6; 1378 ip6->ip6_src = dst6; 1379 ip6->ip6_dst = fin->fin_src6.in6; 1380 sz -= offsetof(struct icmp, icmp_ip); 1381 bcopy((char *)mb->b_rptr, (char *)&icmp->icmp_ip, sz); 1382 icmp->icmp_cksum = csz - sizeof(ip6_t); 1383 } else 1384 #endif 1385 { 1386 ip->ip_hl = sizeof(*ip) >> 2; 1387 ip->ip_p = IPPROTO_ICMP; 1388 ip->ip_id = fin->fin_ip->ip_id; 1389 ip->ip_tos = fin->fin_ip->ip_tos; 1390 ip->ip_len = (u_short)sz; 1391 if (dst == 0) { 1392 ipf_stack_t *ifs = fin->fin_ifs; 1393 1394 if (fr_ifpaddr(4, FRI_NORMAL, (void *)phy, 1395 (void *)&dst4, NULL, ifs) == -1) { 1396 FREE_MB_T(m); 1397 return -1; 1398 } 1399 } else { 1400 dst4 = fin->fin_dst; 1401 } 1402 ip->ip_src = dst4; 1403 ip->ip_dst = fin->fin_src; 1404 bcopy((char *)fin->fin_ip, (char *)&icmp->icmp_ip, 1405 sizeof(*fin->fin_ip)); 1406 bcopy((char *)fin->fin_ip + fin->fin_hlen, 1407 (char *)&icmp->icmp_ip + sizeof(*fin->fin_ip), 8); 1408 icmp->icmp_ip.ip_len = htons(icmp->icmp_ip.ip_len); 1409 icmp->icmp_ip.ip_off = htons(icmp->icmp_ip.ip_off); 1410 icmp->icmp_cksum = ipf_cksum((u_short *)icmp, 1411 sz - sizeof(ip_t)); 1412 } 1413 1414 /* 1415 * Need to exit out of these so we don't recursively call rw_enter 1416 * from fr_qout. 1417 */ 1418 return fr_send_ip(fin, m, &m); 1419 } 1420 1421 #include <sys/time.h> 1422 #include <sys/varargs.h> 1423 1424 #ifndef _KERNEL 1425 #include <stdio.h> 1426 #endif 1427 1428 /* 1429 * Return the first IP Address associated with an interface 1430 * For IPv6, we walk through the list of logical interfaces and return 1431 * the address of the first one that isn't a link-local interface. 1432 * We can't assume that it is :1 because another link-local address 1433 * may have been assigned there. 1434 */ 1435 /*ARGSUSED*/ 1436 int fr_ifpaddr(v, atype, ifptr, inp, inpmask, ifs) 1437 int v, atype; 1438 void *ifptr; 1439 struct in_addr *inp, *inpmask; 1440 ipf_stack_t *ifs; 1441 { 1442 struct sockaddr_in6 v6addr[2]; 1443 struct sockaddr_in v4addr[2]; 1444 net_ifaddr_t type[2]; 1445 net_handle_t net_data; 1446 phy_if_t phyif; 1447 void *array; 1448 1449 switch (v) 1450 { 1451 case 4: 1452 net_data = ifs->ifs_ipf_ipv4; 1453 array = v4addr; 1454 break; 1455 case 6: 1456 net_data = ifs->ifs_ipf_ipv6; 1457 array = v6addr; 1458 break; 1459 default: 1460 net_data = NULL; 1461 break; 1462 } 1463 1464 if (net_data == NULL) 1465 return -1; 1466 1467 phyif = (phy_if_t)ifptr; 1468 1469 switch (atype) 1470 { 1471 case FRI_PEERADDR : 1472 type[0] = NA_PEER; 1473 break; 1474 1475 case FRI_BROADCAST : 1476 type[0] = NA_BROADCAST; 1477 break; 1478 1479 default : 1480 type[0] = NA_ADDRESS; 1481 break; 1482 } 1483 1484 type[1] = NA_NETMASK; 1485 1486 if (v == 6) { 1487 lif_if_t idx = 0; 1488 1489 do { 1490 idx = net_lifgetnext(net_data, phyif, idx); 1491 if (net_getlifaddr(net_data, phyif, idx, 2, type, 1492 array) < 0) 1493 return -1; 1494 if (!IN6_IS_ADDR_LINKLOCAL(&v6addr[0].sin6_addr) && 1495 !IN6_IS_ADDR_MULTICAST(&v6addr[0].sin6_addr)) 1496 break; 1497 } while (idx != 0); 1498 1499 if (idx == 0) 1500 return -1; 1501 1502 return fr_ifpfillv6addr(atype, &v6addr[0], &v6addr[1], 1503 inp, inpmask); 1504 } 1505 1506 if (net_getlifaddr(net_data, phyif, 0, 2, type, array) < 0) 1507 return -1; 1508 1509 return fr_ifpfillv4addr(atype, &v4addr[0], &v4addr[1], inp, inpmask); 1510 } 1511 1512 1513 u_32_t fr_newisn(fin) 1514 fr_info_t *fin; 1515 { 1516 static int iss_seq_off = 0; 1517 u_char hash[16]; 1518 u_32_t newiss; 1519 MD5_CTX ctx; 1520 ipf_stack_t *ifs = fin->fin_ifs; 1521 1522 /* 1523 * Compute the base value of the ISS. It is a hash 1524 * of (saddr, sport, daddr, dport, secret). 1525 */ 1526 MD5Init(&ctx); 1527 1528 MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_src, 1529 sizeof(fin->fin_fi.fi_src)); 1530 MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_dst, 1531 sizeof(fin->fin_fi.fi_dst)); 1532 MD5Update(&ctx, (u_char *) &fin->fin_dat, sizeof(fin->fin_dat)); 1533 1534 MD5Update(&ctx, ifs->ifs_ipf_iss_secret, sizeof(ifs->ifs_ipf_iss_secret)); 1535 1536 MD5Final(hash, &ctx); 1537 1538 bcopy(hash, &newiss, sizeof(newiss)); 1539 1540 /* 1541 * Now increment our "timer", and add it in to 1542 * the computed value. 1543 * 1544 * XXX Use `addin'? 1545 * XXX TCP_ISSINCR too large to use? 1546 */ 1547 iss_seq_off += 0x00010000; 1548 newiss += iss_seq_off; 1549 return newiss; 1550 } 1551 1552 1553 /* ------------------------------------------------------------------------ */ 1554 /* Function: fr_nextipid */ 1555 /* Returns: int - 0 == success, -1 == error (packet should be droppped) */ 1556 /* Parameters: fin(I) - pointer to packet information */ 1557 /* */ 1558 /* Returns the next IPv4 ID to use for this packet. */ 1559 /* ------------------------------------------------------------------------ */ 1560 u_short fr_nextipid(fin) 1561 fr_info_t *fin; 1562 { 1563 static u_short ipid = 0; 1564 u_short id; 1565 ipf_stack_t *ifs = fin->fin_ifs; 1566 1567 MUTEX_ENTER(&ifs->ifs_ipf_rw); 1568 if (fin->fin_pktnum != 0) { 1569 id = fin->fin_pktnum & 0xffff; 1570 } else { 1571 id = ipid++; 1572 } 1573 MUTEX_EXIT(&ifs->ifs_ipf_rw); 1574 1575 return id; 1576 } 1577 1578 1579 #ifndef IPFILTER_CKSUM 1580 /* ARGSUSED */ 1581 #endif 1582 INLINE void fr_checkv4sum(fin) 1583 fr_info_t *fin; 1584 { 1585 #ifdef IPFILTER_CKSUM 1586 if (fr_checkl4sum(fin) == -1) 1587 fin->fin_flx |= FI_BAD; 1588 #endif 1589 } 1590 1591 1592 #ifdef USE_INET6 1593 # ifndef IPFILTER_CKSUM 1594 /* ARGSUSED */ 1595 # endif 1596 INLINE void fr_checkv6sum(fin) 1597 fr_info_t *fin; 1598 { 1599 # ifdef IPFILTER_CKSUM 1600 if (fr_checkl4sum(fin) == -1) 1601 fin->fin_flx |= FI_BAD; 1602 # endif 1603 } 1604 #endif /* USE_INET6 */ 1605 1606 1607 #if (SOLARIS2 < 7) 1608 void fr_slowtimer() 1609 #else 1610 /*ARGSUSED*/ 1611 void fr_slowtimer __P((void *arg)) 1612 #endif 1613 { 1614 ipf_stack_t *ifs = arg; 1615 1616 READ_ENTER(&ifs->ifs_ipf_global); 1617 if (ifs->ifs_fr_running != 1) { 1618 ifs->ifs_fr_timer_id = NULL; 1619 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1620 return; 1621 } 1622 ipf_expiretokens(ifs); 1623 fr_fragexpire(ifs); 1624 fr_timeoutstate(ifs); 1625 fr_natexpire(ifs); 1626 fr_authexpire(ifs); 1627 ifs->ifs_fr_ticks++; 1628 if (ifs->ifs_fr_running == 1) 1629 ifs->ifs_fr_timer_id = timeout(fr_slowtimer, arg, 1630 drv_usectohz(500000)); 1631 else 1632 ifs->ifs_fr_timer_id = NULL; 1633 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1634 } 1635 1636 1637 /* ------------------------------------------------------------------------ */ 1638 /* Function: fr_pullup */ 1639 /* Returns: NULL == pullup failed, else pointer to protocol header */ 1640 /* Parameters: m(I) - pointer to buffer where data packet starts */ 1641 /* fin(I) - pointer to packet information */ 1642 /* len(I) - number of bytes to pullup */ 1643 /* */ 1644 /* Attempt to move at least len bytes (from the start of the buffer) into a */ 1645 /* single buffer for ease of access. Operating system native functions are */ 1646 /* used to manage buffers - if necessary. If the entire packet ends up in */ 1647 /* a single buffer, set the FI_COALESCE flag even though fr_coalesce() has */ 1648 /* not been called. Both fin_ip and fin_dp are updated before exiting _IF_ */ 1649 /* and ONLY if the pullup succeeds. */ 1650 /* */ 1651 /* We assume that 'min' is a pointer to a buffer that is part of the chain */ 1652 /* of buffers that starts at *fin->fin_mp. */ 1653 /* ------------------------------------------------------------------------ */ 1654 void *fr_pullup(min, fin, len) 1655 mb_t *min; 1656 fr_info_t *fin; 1657 int len; 1658 { 1659 qpktinfo_t *qpi = fin->fin_qpi; 1660 int out = fin->fin_out, dpoff, ipoff; 1661 mb_t *m = min, *m1, *m2; 1662 char *ip; 1663 uint32_t start, stuff, end, value, flags; 1664 ipf_stack_t *ifs = fin->fin_ifs; 1665 1666 if (m == NULL) 1667 return NULL; 1668 1669 ip = (char *)fin->fin_ip; 1670 if ((fin->fin_flx & FI_COALESCE) != 0) 1671 return ip; 1672 1673 ipoff = fin->fin_ipoff; 1674 if (fin->fin_dp != NULL) 1675 dpoff = (char *)fin->fin_dp - (char *)ip; 1676 else 1677 dpoff = 0; 1678 1679 if (M_LEN(m) < len + ipoff) { 1680 1681 /* 1682 * pfil_precheck ensures the IP header is on a 32bit 1683 * aligned address so simply fail if that isn't currently 1684 * the case (should never happen). 1685 */ 1686 int inc = 0; 1687 1688 if (ipoff > 0) { 1689 if ((ipoff & 3) != 0) { 1690 inc = 4 - (ipoff & 3); 1691 if (m->b_rptr - inc >= m->b_datap->db_base) 1692 m->b_rptr -= inc; 1693 else 1694 inc = 0; 1695 } 1696 } 1697 1698 /* 1699 * XXX This is here as a work around for a bug with DEBUG 1700 * XXX Solaris kernels. The problem is b_prev is used by IP 1701 * XXX code as a way to stash the phyint_index for a packet, 1702 * XXX this doesn't get reset by IP but freeb does an ASSERT() 1703 * XXX for both of these to be NULL. See 6442390. 1704 */ 1705 m1 = m; 1706 m2 = m->b_prev; 1707 1708 do { 1709 m1->b_next = NULL; 1710 m1->b_prev = NULL; 1711 m1 = m1->b_cont; 1712 } while (m1); 1713 1714 /* 1715 * Need to preserve checksum information by copying them 1716 * to newmp which heads the pulluped message. 1717 */ 1718 hcksum_retrieve(m, NULL, NULL, &start, &stuff, &end, 1719 &value, &flags); 1720 1721 if (pullupmsg(m, len + ipoff + inc) == 0) { 1722 ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[1]); 1723 FREE_MB_T(*fin->fin_mp); 1724 *fin->fin_mp = NULL; 1725 fin->fin_m = NULL; 1726 fin->fin_ip = NULL; 1727 fin->fin_dp = NULL; 1728 qpi->qpi_data = NULL; 1729 return NULL; 1730 } 1731 1732 (void) hcksum_assoc(m, NULL, NULL, start, stuff, end, 1733 value, flags, 0); 1734 1735 m->b_prev = m2; 1736 m->b_rptr += inc; 1737 fin->fin_m = m; 1738 ip = MTOD(m, char *) + ipoff; 1739 qpi->qpi_data = ip; 1740 } 1741 1742 ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[0]); 1743 fin->fin_ip = (ip_t *)ip; 1744 if (fin->fin_dp != NULL) 1745 fin->fin_dp = (char *)fin->fin_ip + dpoff; 1746 1747 if (len == fin->fin_plen) 1748 fin->fin_flx |= FI_COALESCE; 1749 return ip; 1750 } 1751 1752 1753 /* 1754 * Function: fr_verifysrc 1755 * Returns: int (really boolean) 1756 * Parameters: fin - packet information 1757 * 1758 * Check whether the packet has a valid source address for the interface on 1759 * which the packet arrived, implementing the "fr_chksrc" feature. 1760 * Returns true iff the packet's source address is valid. 1761 */ 1762 int fr_verifysrc(fin) 1763 fr_info_t *fin; 1764 { 1765 net_handle_t net_data_p; 1766 phy_if_t phy_ifdata_routeto; 1767 struct sockaddr sin; 1768 ipf_stack_t *ifs = fin->fin_ifs; 1769 1770 if (fin->fin_v == 4) { 1771 net_data_p = ifs->ifs_ipf_ipv4; 1772 } else if (fin->fin_v == 6) { 1773 net_data_p = ifs->ifs_ipf_ipv6; 1774 } else { 1775 return (0); 1776 } 1777 1778 /* Get the index corresponding to the if name */ 1779 sin.sa_family = (fin->fin_v == 4) ? AF_INET : AF_INET6; 1780 bcopy(&fin->fin_saddr, &sin.sa_data, sizeof (struct in_addr)); 1781 phy_ifdata_routeto = net_routeto(net_data_p, &sin, NULL); 1782 1783 return (((phy_if_t)fin->fin_ifp == phy_ifdata_routeto) ? 1 : 0); 1784 } 1785 1786 1787 /* 1788 * Function: fr_fastroute 1789 * Returns: 0: success; 1790 * -1: failed 1791 * Parameters: 1792 * mb: the message block where ip head starts 1793 * mpp: the pointer to the pointer of the orignal 1794 * packet message 1795 * fin: packet information 1796 * fdp: destination interface information 1797 * if it is NULL, no interface information provided. 1798 * 1799 * This function is for fastroute/to/dup-to rules. It calls 1800 * pfil_make_lay2_packet to search route, make lay-2 header 1801 * ,and identify output queue for the IP packet. 1802 * The destination address depends on the following conditions: 1803 * 1: for fastroute rule, fdp is passed in as NULL, so the 1804 * destination address is the IP Packet's destination address 1805 * 2: for to/dup-to rule, if an ip address is specified after 1806 * the interface name, this address is the as destination 1807 * address. Otherwise IP Packet's destination address is used 1808 */ 1809 int fr_fastroute(mb, mpp, fin, fdp) 1810 mblk_t *mb, **mpp; 1811 fr_info_t *fin; 1812 frdest_t *fdp; 1813 { 1814 net_handle_t net_data_p; 1815 net_inject_t *inj; 1816 mblk_t *mp = NULL; 1817 frentry_t *fr = fin->fin_fr; 1818 qpktinfo_t *qpi; 1819 ip_t *ip; 1820 1821 struct sockaddr_in *sin; 1822 struct sockaddr_in6 *sin6; 1823 struct sockaddr *sinp; 1824 ipf_stack_t *ifs = fin->fin_ifs; 1825 #ifndef sparc 1826 u_short __iplen, __ipoff; 1827 #endif 1828 1829 if (fin->fin_v == 4) { 1830 net_data_p = ifs->ifs_ipf_ipv4; 1831 } else if (fin->fin_v == 6) { 1832 net_data_p = ifs->ifs_ipf_ipv6; 1833 } else { 1834 return (-1); 1835 } 1836 1837 inj = net_inject_alloc(NETINFO_VERSION); 1838 if (inj == NULL) 1839 return -1; 1840 1841 ip = fin->fin_ip; 1842 qpi = fin->fin_qpi; 1843 1844 /* 1845 * If this is a duplicate mblk then we want ip to point at that 1846 * data, not the original, if and only if it is already pointing at 1847 * the current mblk data. 1848 * 1849 * Otherwise, if it's not a duplicate, and we're not already pointing 1850 * at the current mblk data, then we want to ensure that the data 1851 * points at ip. 1852 */ 1853 1854 if ((ip == (ip_t *)qpi->qpi_m->b_rptr) && (qpi->qpi_m != mb)) { 1855 ip = (ip_t *)mb->b_rptr; 1856 } else if ((qpi->qpi_m == mb) && (ip != (ip_t *)qpi->qpi_m->b_rptr)) { 1857 qpi->qpi_m->b_rptr = (uchar_t *)ip; 1858 qpi->qpi_off = 0; 1859 } 1860 1861 /* 1862 * If there is another M_PROTO, we don't want it 1863 */ 1864 if (*mpp != mb) { 1865 mp = unlinkb(*mpp); 1866 freeb(*mpp); 1867 *mpp = mp; 1868 } 1869 1870 sinp = (struct sockaddr *)&inj->ni_addr; 1871 sin = (struct sockaddr_in *)sinp; 1872 sin6 = (struct sockaddr_in6 *)sinp; 1873 bzero((char *)&inj->ni_addr, sizeof (inj->ni_addr)); 1874 inj->ni_addr.ss_family = (fin->fin_v == 4) ? AF_INET : AF_INET6; 1875 inj->ni_packet = mb; 1876 1877 /* 1878 * In case we're here due to "to <if>" being used with 1879 * "keep state", check that we're going in the correct 1880 * direction. 1881 */ 1882 if (fdp != NULL) { 1883 if ((fr != NULL) && (fdp->fd_ifp != NULL) && 1884 (fin->fin_rev != 0) && (fdp == &fr->fr_tif)) 1885 goto bad_fastroute; 1886 inj->ni_physical = (phy_if_t)fdp->fd_ifp; 1887 if (fin->fin_v == 4) { 1888 sin->sin_addr = fdp->fd_ip; 1889 } else { 1890 sin6->sin6_addr = fdp->fd_ip6.in6; 1891 } 1892 } else { 1893 if (fin->fin_v == 4) { 1894 sin->sin_addr = ip->ip_dst; 1895 } else { 1896 sin6->sin6_addr = ((ip6_t *)ip)->ip6_dst; 1897 } 1898 inj->ni_physical = net_routeto(net_data_p, sinp, NULL); 1899 } 1900 1901 /* 1902 * Clear the hardware checksum flags from packets that we are doing 1903 * input processing on as leaving them set will cause the outgoing 1904 * NIC (if it supports hardware checksum) to calculate them anew, 1905 * using the old (correct) checksums as the pseudo value to start 1906 * from. 1907 */ 1908 if (fin->fin_out == 0) { 1909 DB_CKSUMFLAGS(mb) = 0; 1910 } 1911 1912 *mpp = mb; 1913 1914 if (fin->fin_out == 0) { 1915 void *saveifp; 1916 u_32_t pass; 1917 1918 saveifp = fin->fin_ifp; 1919 fin->fin_ifp = (void *)inj->ni_physical; 1920 fin->fin_flx &= ~FI_STATE; 1921 fin->fin_out = 1; 1922 (void) fr_acctpkt(fin, &pass); 1923 fin->fin_fr = NULL; 1924 if (!fr || !(fr->fr_flags & FR_RETMASK)) 1925 (void) fr_checkstate(fin, &pass); 1926 if (fr_checknatout(fin, NULL) == -1) 1927 goto bad_fastroute; 1928 fin->fin_out = 0; 1929 fin->fin_ifp = saveifp; 1930 } 1931 #ifndef sparc 1932 if (fin->fin_v == 4) { 1933 __iplen = (u_short)ip->ip_len, 1934 __ipoff = (u_short)ip->ip_off; 1935 1936 ip->ip_len = htons(__iplen); 1937 ip->ip_off = htons(__ipoff); 1938 } 1939 #endif 1940 1941 if (net_data_p) { 1942 if (net_inject(net_data_p, NI_DIRECT_OUT, inj) < 0) { 1943 net_inject_free(inj); 1944 return (-1); 1945 } 1946 } 1947 1948 ifs->ifs_fr_frouteok[0]++; 1949 net_inject_free(inj); 1950 return 0; 1951 bad_fastroute: 1952 net_inject_free(inj); 1953 freemsg(mb); 1954 ifs->ifs_fr_frouteok[1]++; 1955 return -1; 1956 } 1957 1958 1959 /* ------------------------------------------------------------------------ */ 1960 /* Function: ipf_hook4_out */ 1961 /* Returns: int - 0 == packet ok, else problem, free packet if not done */ 1962 /* Parameters: event(I) - pointer to event */ 1963 /* info(I) - pointer to hook information for firewalling */ 1964 /* */ 1965 /* Calling ipf_hook. */ 1966 /* ------------------------------------------------------------------------ */ 1967 /*ARGSUSED*/ 1968 int ipf_hook4_out(hook_event_token_t token, hook_data_t info, void *arg) 1969 { 1970 return ipf_hook(info, 1, 0, arg); 1971 } 1972 /*ARGSUSED*/ 1973 int ipf_hook6_out(hook_event_token_t token, hook_data_t info, void *arg) 1974 { 1975 return ipf_hook6(info, 1, 0, arg); 1976 } 1977 1978 /* ------------------------------------------------------------------------ */ 1979 /* Function: ipf_hook4_in */ 1980 /* Returns: int - 0 == packet ok, else problem, free packet if not done */ 1981 /* Parameters: event(I) - pointer to event */ 1982 /* info(I) - pointer to hook information for firewalling */ 1983 /* */ 1984 /* Calling ipf_hook. */ 1985 /* ------------------------------------------------------------------------ */ 1986 /*ARGSUSED*/ 1987 int ipf_hook4_in(hook_event_token_t token, hook_data_t info, void *arg) 1988 { 1989 return ipf_hook(info, 0, 0, arg); 1990 } 1991 /*ARGSUSED*/ 1992 int ipf_hook6_in(hook_event_token_t token, hook_data_t info, void *arg) 1993 { 1994 return ipf_hook6(info, 0, 0, arg); 1995 } 1996 1997 1998 /* ------------------------------------------------------------------------ */ 1999 /* Function: ipf_hook4_loop_out */ 2000 /* Returns: int - 0 == packet ok, else problem, free packet if not done */ 2001 /* Parameters: event(I) - pointer to event */ 2002 /* info(I) - pointer to hook information for firewalling */ 2003 /* */ 2004 /* Calling ipf_hook. */ 2005 /* ------------------------------------------------------------------------ */ 2006 /*ARGSUSED*/ 2007 int ipf_hook4_loop_out(hook_event_token_t token, hook_data_t info, void *arg) 2008 { 2009 return ipf_hook(info, 1, FI_NOCKSUM, arg); 2010 } 2011 /*ARGSUSED*/ 2012 int ipf_hook6_loop_out(hook_event_token_t token, hook_data_t info, void *arg) 2013 { 2014 return ipf_hook6(info, 1, FI_NOCKSUM, arg); 2015 } 2016 2017 /* ------------------------------------------------------------------------ */ 2018 /* Function: ipf_hook4_loop_in */ 2019 /* Returns: int - 0 == packet ok, else problem, free packet if not done */ 2020 /* Parameters: event(I) - pointer to event */ 2021 /* info(I) - pointer to hook information for firewalling */ 2022 /* */ 2023 /* Calling ipf_hook. */ 2024 /* ------------------------------------------------------------------------ */ 2025 /*ARGSUSED*/ 2026 int ipf_hook4_loop_in(hook_event_token_t token, hook_data_t info, void *arg) 2027 { 2028 return ipf_hook(info, 0, FI_NOCKSUM, arg); 2029 } 2030 /*ARGSUSED*/ 2031 int ipf_hook6_loop_in(hook_event_token_t token, hook_data_t info, void *arg) 2032 { 2033 return ipf_hook6(info, 0, FI_NOCKSUM, arg); 2034 } 2035 2036 /* ------------------------------------------------------------------------ */ 2037 /* Function: ipf_hook */ 2038 /* Returns: int - 0 == packet ok, else problem, free packet if not done */ 2039 /* Parameters: info(I) - pointer to hook information for firewalling */ 2040 /* out(I) - whether packet is going in or out */ 2041 /* loopback(I) - whether packet is a loopback packet or not */ 2042 /* */ 2043 /* Stepping stone function between the IP mainline and IPFilter. Extracts */ 2044 /* parameters out of the info structure and forms them up to be useful for */ 2045 /* calling ipfilter. */ 2046 /* ------------------------------------------------------------------------ */ 2047 int ipf_hook(hook_data_t info, int out, int loopback, void *arg) 2048 { 2049 hook_pkt_event_t *fw; 2050 ipf_stack_t *ifs; 2051 qpktinfo_t qpi; 2052 int rval, hlen; 2053 u_short swap; 2054 phy_if_t phy; 2055 ip_t *ip; 2056 2057 ifs = arg; 2058 fw = (hook_pkt_event_t *)info; 2059 2060 ASSERT(fw != NULL); 2061 phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp; 2062 2063 ip = fw->hpe_hdr; 2064 swap = ntohs(ip->ip_len); 2065 ip->ip_len = swap; 2066 swap = ntohs(ip->ip_off); 2067 ip->ip_off = swap; 2068 hlen = IPH_HDR_LENGTH(ip); 2069 2070 qpi.qpi_m = fw->hpe_mb; 2071 qpi.qpi_data = fw->hpe_hdr; 2072 qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr; 2073 qpi.qpi_ill = (void *)phy; 2074 qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST); 2075 if (qpi.qpi_flags) 2076 qpi.qpi_flags |= FI_MBCAST; 2077 qpi.qpi_flags |= loopback; 2078 2079 rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out, 2080 &qpi, fw->hpe_mp, ifs); 2081 2082 /* For fastroute cases, fr_check returns 0 with mp set to NULL */ 2083 if (rval == 0 && *(fw->hpe_mp) == NULL) 2084 rval = 1; 2085 2086 /* Notify IP the packet mblk_t and IP header pointers. */ 2087 fw->hpe_mb = qpi.qpi_m; 2088 fw->hpe_hdr = qpi.qpi_data; 2089 if (rval == 0) { 2090 ip = qpi.qpi_data; 2091 swap = ntohs(ip->ip_len); 2092 ip->ip_len = swap; 2093 swap = ntohs(ip->ip_off); 2094 ip->ip_off = swap; 2095 } 2096 return rval; 2097 2098 } 2099 int ipf_hook6(hook_data_t info, int out, int loopback, void *arg) 2100 { 2101 hook_pkt_event_t *fw; 2102 int rval, hlen; 2103 qpktinfo_t qpi; 2104 phy_if_t phy; 2105 2106 fw = (hook_pkt_event_t *)info; 2107 2108 ASSERT(fw != NULL); 2109 phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp; 2110 2111 hlen = sizeof (ip6_t); 2112 2113 qpi.qpi_m = fw->hpe_mb; 2114 qpi.qpi_data = fw->hpe_hdr; 2115 qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr; 2116 qpi.qpi_ill = (void *)phy; 2117 qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST); 2118 if (qpi.qpi_flags) 2119 qpi.qpi_flags |= FI_MBCAST; 2120 qpi.qpi_flags |= loopback; 2121 2122 rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out, 2123 &qpi, fw->hpe_mp, arg); 2124 2125 /* For fastroute cases, fr_check returns 0 with mp set to NULL */ 2126 if (rval == 0 && *(fw->hpe_mp) == NULL) 2127 rval = 1; 2128 2129 /* Notify IP the packet mblk_t and IP header pointers. */ 2130 fw->hpe_mb = qpi.qpi_m; 2131 fw->hpe_hdr = qpi.qpi_data; 2132 return rval; 2133 } 2134 2135 2136 /* ------------------------------------------------------------------------ */ 2137 /* Function: ipf_nic_event_v4 */ 2138 /* Returns: int - 0 == no problems encountered */ 2139 /* Parameters: event(I) - pointer to event */ 2140 /* info(I) - pointer to information about a NIC event */ 2141 /* */ 2142 /* Function to receive asynchronous NIC events from IP */ 2143 /* ------------------------------------------------------------------------ */ 2144 /*ARGSUSED*/ 2145 int ipf_nic_event_v4(hook_event_token_t event, hook_data_t info, void *arg) 2146 { 2147 struct sockaddr_in *sin; 2148 hook_nic_event_t *hn; 2149 ipf_stack_t *ifs = arg; 2150 void *new_ifp = NULL; 2151 2152 if (ifs->ifs_fr_running <= 0) 2153 return (0); 2154 2155 hn = (hook_nic_event_t *)info; 2156 2157 switch (hn->hne_event) 2158 { 2159 case NE_PLUMB : 2160 frsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, hn->hne_data, 2161 ifs); 2162 fr_natifpsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, 2163 hn->hne_data, ifs); 2164 fr_statesync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, 2165 hn->hne_data, ifs); 2166 break; 2167 2168 case NE_UNPLUMB : 2169 frsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs); 2170 fr_natifpsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, 2171 ifs); 2172 fr_statesync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs); 2173 break; 2174 2175 case NE_ADDRESS_CHANGE : 2176 /* 2177 * We only respond to events for logical interface 0 because 2178 * IPFilter only uses the first address given to a network 2179 * interface. We check for hne_lif==1 because the netinfo 2180 * code maps adds 1 to the lif number so that it can return 2181 * 0 to indicate "no more lifs" when walking them. 2182 */ 2183 if (hn->hne_lif == 1) { 2184 frsync(IPFSYNC_RESYNC, 4, (void *)hn->hne_nic, NULL, 2185 ifs); 2186 sin = hn->hne_data; 2187 fr_nataddrsync(4, (void *)hn->hne_nic, &sin->sin_addr, 2188 ifs); 2189 } 2190 break; 2191 2192 #if SOLARIS2 >= 10 2193 case NE_IFINDEX_CHANGE : 2194 WRITE_ENTER(&ifs->ifs_ipf_mutex); 2195 2196 if (hn->hne_data != NULL) { 2197 /* 2198 * The netinfo passes interface index as int (hne_data should be 2199 * handled as a pointer to int), which is always 32bit. We need to 2200 * convert it to void pointer here, since interfaces are 2201 * represented as pointers to void in IPF. The pointers are 64 bits 2202 * long on 64bit platforms. Doing something like 2203 * (void *)((int) x) 2204 * will throw warning: 2205 * "cast to pointer from integer of different size" 2206 * during 64bit compilation. 2207 * 2208 * The line below uses (size_t) to typecast int to 2209 * size_t, which might be 64bit/32bit (depending 2210 * on architecture). Once we have proper 64bit/32bit 2211 * type (size_t), we can safely convert it to void pointer. 2212 */ 2213 new_ifp = (void *)(size_t)*((int *)hn->hne_data); 2214 fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs); 2215 fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs); 2216 fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs); 2217 } 2218 RWLOCK_EXIT(&ifs->ifs_ipf_mutex); 2219 break; 2220 #endif 2221 2222 default : 2223 break; 2224 } 2225 2226 return 0; 2227 } 2228 2229 2230 /* ------------------------------------------------------------------------ */ 2231 /* Function: ipf_nic_event_v6 */ 2232 /* Returns: int - 0 == no problems encountered */ 2233 /* Parameters: event(I) - pointer to event */ 2234 /* info(I) - pointer to information about a NIC event */ 2235 /* */ 2236 /* Function to receive asynchronous NIC events from IP */ 2237 /* ------------------------------------------------------------------------ */ 2238 /*ARGSUSED*/ 2239 int ipf_nic_event_v6(hook_event_token_t event, hook_data_t info, void *arg) 2240 { 2241 struct sockaddr_in6 *sin6; 2242 hook_nic_event_t *hn; 2243 ipf_stack_t *ifs = arg; 2244 void *new_ifp = NULL; 2245 2246 if (ifs->ifs_fr_running <= 0) 2247 return (0); 2248 2249 hn = (hook_nic_event_t *)info; 2250 2251 switch (hn->hne_event) 2252 { 2253 case NE_PLUMB : 2254 frsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic, 2255 hn->hne_data, ifs); 2256 fr_natifpsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic, 2257 hn->hne_data, ifs); 2258 fr_statesync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic, 2259 hn->hne_data, ifs); 2260 break; 2261 2262 case NE_UNPLUMB : 2263 frsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs); 2264 fr_natifpsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, 2265 ifs); 2266 fr_statesync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs); 2267 break; 2268 2269 case NE_ADDRESS_CHANGE : 2270 if (hn->hne_lif == 1) { 2271 sin6 = hn->hne_data; 2272 fr_nataddrsync(6, (void *)hn->hne_nic, &sin6->sin6_addr, 2273 ifs); 2274 } 2275 break; 2276 2277 #if SOLARIS2 >= 10 2278 case NE_IFINDEX_CHANGE : 2279 WRITE_ENTER(&ifs->ifs_ipf_mutex); 2280 if (hn->hne_data != NULL) { 2281 /* 2282 * The netinfo passes interface index as int (hne_data should be 2283 * handled as a pointer to int), which is always 32bit. We need to 2284 * convert it to void pointer here, since interfaces are 2285 * represented as pointers to void in IPF. The pointers are 64 bits 2286 * long on 64bit platforms. Doing something like 2287 * (void *)((int) x) 2288 * will throw warning: 2289 * "cast to pointer from integer of different size" 2290 * during 64bit compilation. 2291 * 2292 * The line below uses (size_t) to typecast int to 2293 * size_t, which might be 64bit/32bit (depending 2294 * on architecture). Once we have proper 64bit/32bit 2295 * type (size_t), we can safely convert it to void pointer. 2296 */ 2297 new_ifp = (void *)(size_t)*((int *)hn->hne_data); 2298 fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs); 2299 fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs); 2300 fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs); 2301 } 2302 RWLOCK_EXIT(&ifs->ifs_ipf_mutex); 2303 break; 2304 #endif 2305 2306 default : 2307 break; 2308 } 2309 2310 return 0; 2311 } 2312 2313 /* 2314 * Functions fr_make_rst(), fr_make_icmp_v4(), fr_make_icmp_v6() 2315 * are needed in Solaris kernel only. We don't need them in 2316 * ipftest to pretend the ICMP/RST packet was sent as a response. 2317 */ 2318 #if defined(_KERNEL) && (SOLARIS2 >= 10) 2319 /* ------------------------------------------------------------------------ */ 2320 /* Function: fr_make_rst */ 2321 /* Returns: int - 0 on success, -1 on failure */ 2322 /* Parameters: fin(I) - pointer to packet information */ 2323 /* */ 2324 /* We must alter the original mblks passed to IPF from IP stack via */ 2325 /* FW_HOOKS. FW_HOOKS interface is powerfull, but it has some limitations. */ 2326 /* IPF can basicaly do only these things with mblk representing the packet: */ 2327 /* leave it as it is (pass the packet) */ 2328 /* */ 2329 /* discard it (block the packet) */ 2330 /* */ 2331 /* alter it (i.e. NAT) */ 2332 /* */ 2333 /* As you can see IPF can not simply discard the mblk and supply a new one */ 2334 /* instead to IP stack via FW_HOOKS. */ 2335 /* */ 2336 /* The return-rst action for packets coming via NIC is handled as follows: */ 2337 /* mblk with packet is discarded */ 2338 /* */ 2339 /* new mblk with RST response is constructed and injected to network */ 2340 /* */ 2341 /* IPF can't inject packets to loopback interface, this is just another */ 2342 /* limitation we have to deal with here. The only option to send RST */ 2343 /* response to offending TCP packet coming via loopback is to alter it. */ 2344 /* */ 2345 /* The fr_make_rst() function alters TCP SYN/FIN packet intercepted on */ 2346 /* loopback interface into TCP RST packet. fin->fin_mp is pointer to */ 2347 /* mblk L3 (IP) and L4 (TCP/UDP) packet headers. */ 2348 /* ------------------------------------------------------------------------ */ 2349 int fr_make_rst(fin) 2350 fr_info_t *fin; 2351 { 2352 uint16_t tmp_port; 2353 int rv = -1; 2354 uint32_t old_ack; 2355 tcphdr_t *tcp = NULL; 2356 struct in_addr tmp_src; 2357 #ifdef USE_INET6 2358 struct in6_addr tmp_src6; 2359 #endif 2360 2361 ASSERT(fin->fin_p == IPPROTO_TCP); 2362 2363 /* 2364 * We do not need to adjust chksum, since it is not being checked by 2365 * Solaris IP stack for loopback clients. 2366 */ 2367 if ((fin->fin_v == 4) && (fin->fin_p == IPPROTO_TCP) && 2368 ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) { 2369 2370 if (tcp->th_flags & (TH_SYN | TH_FIN)) { 2371 /* Swap IPv4 addresses. */ 2372 tmp_src = fin->fin_ip->ip_src; 2373 fin->fin_ip->ip_src = fin->fin_ip->ip_dst; 2374 fin->fin_ip->ip_dst = tmp_src; 2375 2376 rv = 0; 2377 } 2378 else 2379 tcp = NULL; 2380 } 2381 #ifdef USE_INET6 2382 else if ((fin->fin_v == 6) && (fin->fin_p == IPPROTO_TCP) && 2383 ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) { 2384 /* 2385 * We are relying on fact the next header is TCP, which is true 2386 * for regular TCP packets coming in over loopback. 2387 */ 2388 if (tcp->th_flags & (TH_SYN | TH_FIN)) { 2389 /* Swap IPv6 addresses. */ 2390 tmp_src6 = fin->fin_ip6->ip6_src; 2391 fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst; 2392 fin->fin_ip6->ip6_dst = tmp_src6; 2393 2394 rv = 0; 2395 } 2396 else 2397 tcp = NULL; 2398 } 2399 #endif 2400 2401 if (tcp != NULL) { 2402 /* 2403 * Adjust TCP header: 2404 * swap ports, 2405 * set flags, 2406 * set correct ACK number 2407 */ 2408 tmp_port = tcp->th_sport; 2409 tcp->th_sport = tcp->th_dport; 2410 tcp->th_dport = tmp_port; 2411 old_ack = tcp->th_ack; 2412 tcp->th_ack = htonl(ntohl(tcp->th_seq) + 1); 2413 tcp->th_seq = old_ack; 2414 tcp->th_flags = TH_RST | TH_ACK; 2415 } 2416 2417 return (rv); 2418 } 2419 2420 /* ------------------------------------------------------------------------ */ 2421 /* Function: fr_make_icmp_v4 */ 2422 /* Returns: int - 0 on success, -1 on failure */ 2423 /* Parameters: fin(I) - pointer to packet information */ 2424 /* */ 2425 /* Please read comment at fr_make_icmp() wrapper function to get an idea */ 2426 /* what is going to happen here and why. Once you read the comment there, */ 2427 /* continue here with next paragraph. */ 2428 /* */ 2429 /* To turn IPv4 packet into ICMPv4 response packet, these things must */ 2430 /* happen here: */ 2431 /* (1) Original mblk is copied (duplicated). */ 2432 /* */ 2433 /* (2) ICMP header is created. */ 2434 /* */ 2435 /* (3) Link ICMP header with copy of original mblk, we have ICMPv4 */ 2436 /* data ready then. */ 2437 /* */ 2438 /* (4) Swap IP addresses in original mblk and adjust IP header data. */ 2439 /* */ 2440 /* (5) The mblk containing original packet is trimmed to contain IP */ 2441 /* header only and ICMP chksum is computed. */ 2442 /* */ 2443 /* (6) The ICMP header we have from (3) is linked to original mblk, */ 2444 /* which now contains new IP header. If original packet was spread */ 2445 /* over several mblks, only the first mblk is kept. */ 2446 /* ------------------------------------------------------------------------ */ 2447 static int fr_make_icmp_v4(fin) 2448 fr_info_t *fin; 2449 { 2450 struct in_addr tmp_src; 2451 tcphdr_t *tcp; 2452 struct icmp *icmp; 2453 mblk_t *mblk_icmp; 2454 mblk_t *mblk_ip; 2455 size_t icmp_pld_len; /* octets to append to ICMP header */ 2456 size_t orig_iphdr_len; /* length of IP header only */ 2457 uint32_t sum; 2458 uint16_t *buf; 2459 int len; 2460 2461 2462 if (fin->fin_v != 4) 2463 return (-1); 2464 2465 /* 2466 * If we are dealing with TCP, then packet must be SYN/FIN to be routed 2467 * by IP stack. If it is not SYN/FIN, then we must drop it silently. 2468 */ 2469 tcp = (tcphdr_t *) fin->fin_dp; 2470 2471 if ((fin->fin_p == IPPROTO_TCP) && 2472 ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0))) 2473 return (-1); 2474 2475 /* 2476 * Step (1) 2477 * 2478 * Make copy of original mblk. 2479 * 2480 * We want to copy as much data as necessary, not less, not more. The 2481 * ICMPv4 payload length for unreachable messages is: 2482 * original IP header + 8 bytes of L4 (if there are any). 2483 * 2484 * We determine if there are at least 8 bytes of L4 data following IP 2485 * header first. 2486 */ 2487 icmp_pld_len = (fin->fin_dlen > ICMPERR_ICMPHLEN) ? 2488 ICMPERR_ICMPHLEN : fin->fin_dlen; 2489 /* 2490 * Since we don't want to copy more data than necessary, we must trim 2491 * the original mblk here. The right way (STREAMish) would be to use 2492 * adjmsg() to trim it. However we would have to calculate the length 2493 * argument for adjmsg() from pointers we already have here. 2494 * 2495 * Since we have pointers and offsets, it's faster and easier for 2496 * us to just adjust pointers by hand instead of using adjmsg(). 2497 */ 2498 fin->fin_m->b_wptr = (unsigned char *) fin->fin_dp; 2499 fin->fin_m->b_wptr += icmp_pld_len; 2500 icmp_pld_len = fin->fin_m->b_wptr - (unsigned char *) fin->fin_ip; 2501 2502 /* 2503 * Also we don't want to copy any L2 stuff, which might precede IP 2504 * header, so we have have to set b_rptr to point to the start of IP 2505 * header. 2506 */ 2507 fin->fin_m->b_rptr += fin->fin_ipoff; 2508 if ((mblk_ip = copyb(fin->fin_m)) == NULL) 2509 return (-1); 2510 fin->fin_m->b_rptr -= fin->fin_ipoff; 2511 2512 /* 2513 * Step (2) 2514 * 2515 * Create an ICMP header, which will be appened to original mblk later. 2516 * ICMP header is just another mblk. 2517 */ 2518 mblk_icmp = (mblk_t *) allocb(ICMPERR_ICMPHLEN, BPRI_HI); 2519 if (mblk_icmp == NULL) { 2520 FREE_MB_T(mblk_ip); 2521 return (-1); 2522 } 2523 2524 MTYPE(mblk_icmp) = M_DATA; 2525 icmp = (struct icmp *) mblk_icmp->b_wptr; 2526 icmp->icmp_type = ICMP_UNREACH; 2527 icmp->icmp_code = fin->fin_icode & 0xFF; 2528 icmp->icmp_void = 0; 2529 icmp->icmp_cksum = 0; 2530 mblk_icmp->b_wptr += ICMPERR_ICMPHLEN; 2531 2532 /* 2533 * Step (3) 2534 * 2535 * Complete ICMP packet - link ICMP header with L4 data from original 2536 * IP packet. 2537 */ 2538 linkb(mblk_icmp, mblk_ip); 2539 2540 /* 2541 * Step (4) 2542 * 2543 * Swap IP addresses and change IP header fields accordingly in 2544 * original IP packet. 2545 * 2546 * There is a rule option return-icmp as a dest for physical 2547 * interfaces. This option becomes useless for loopback, since IPF box 2548 * uses same address as a loopback destination. We ignore the option 2549 * here, the ICMP packet will always look like as it would have been 2550 * sent from the original destination host. 2551 */ 2552 tmp_src = fin->fin_ip->ip_src; 2553 fin->fin_ip->ip_src = fin->fin_ip->ip_dst; 2554 fin->fin_ip->ip_dst = tmp_src; 2555 fin->fin_ip->ip_p = IPPROTO_ICMP; 2556 fin->fin_ip->ip_sum = 0; 2557 2558 /* 2559 * Step (5) 2560 * 2561 * We trim the orignal mblk to hold IP header only. 2562 */ 2563 fin->fin_m->b_wptr = fin->fin_dp; 2564 orig_iphdr_len = fin->fin_m->b_wptr - 2565 (fin->fin_m->b_rptr + fin->fin_ipoff); 2566 fin->fin_ip->ip_len = htons(icmp_pld_len + ICMPERR_ICMPHLEN + 2567 orig_iphdr_len); 2568 2569 /* 2570 * ICMP chksum calculation. The data we are calculating chksum for are 2571 * spread over two mblks, therefore we have to use two for loops. 2572 * 2573 * First for loop computes chksum part for ICMP header. 2574 */ 2575 buf = (uint16_t *) icmp; 2576 len = ICMPERR_ICMPHLEN; 2577 for (sum = 0; len > 1; len -= 2) 2578 sum += *buf++; 2579 2580 /* 2581 * Here we add chksum part for ICMP payload. 2582 */ 2583 len = icmp_pld_len; 2584 buf = (uint16_t *) mblk_ip->b_rptr; 2585 for (; len > 1; len -= 2) 2586 sum += *buf++; 2587 2588 /* 2589 * Chksum is done. 2590 */ 2591 sum = (sum >> 16) + (sum & 0xffff); 2592 sum += (sum >> 16); 2593 icmp->icmp_cksum = ~sum; 2594 2595 /* 2596 * Step (6) 2597 * 2598 * Release all packet mblks, except the first one. 2599 */ 2600 if (fin->fin_m->b_cont != NULL) { 2601 FREE_MB_T(fin->fin_m->b_cont); 2602 } 2603 2604 /* 2605 * Append ICMP payload to first mblk, which already contains new IP 2606 * header. 2607 */ 2608 linkb(fin->fin_m, mblk_icmp); 2609 2610 return (0); 2611 } 2612 2613 #ifdef USE_INET6 2614 /* ------------------------------------------------------------------------ */ 2615 /* Function: fr_make_icmp_v6 */ 2616 /* Returns: int - 0 on success, -1 on failure */ 2617 /* Parameters: fin(I) - pointer to packet information */ 2618 /* */ 2619 /* Please read comment at fr_make_icmp() wrapper function to get an idea */ 2620 /* what and why is going to happen here. Once you read the comment there, */ 2621 /* continue here with next paragraph. */ 2622 /* */ 2623 /* This function turns IPv6 packet (UDP, TCP, ...) into ICMPv6 response. */ 2624 /* The algorithm is fairly simple: */ 2625 /* 1) We need to get copy of complete mblk. */ 2626 /* */ 2627 /* 2) New ICMPv6 header is created. */ 2628 /* */ 2629 /* 3) The copy of original mblk with packet is linked to ICMPv6 */ 2630 /* header. */ 2631 /* */ 2632 /* 4) The checksum must be adjusted. */ 2633 /* */ 2634 /* 5) IP addresses in original mblk are swapped and IP header data */ 2635 /* are adjusted (protocol number). */ 2636 /* */ 2637 /* 6) Original mblk is trimmed to hold IPv6 header only, then it is */ 2638 /* linked with the ICMPv6 data we got from (3). */ 2639 /* ------------------------------------------------------------------------ */ 2640 static int fr_make_icmp_v6(fin) 2641 fr_info_t *fin; 2642 { 2643 struct icmp6_hdr *icmp6; 2644 tcphdr_t *tcp; 2645 struct in6_addr tmp_src6; 2646 size_t icmp_pld_len; 2647 mblk_t *mblk_ip, *mblk_icmp; 2648 2649 if (fin->fin_v != 6) 2650 return (-1); 2651 2652 /* 2653 * If we are dealing with TCP, then packet must SYN/FIN to be routed by 2654 * IP stack. If it is not SYN/FIN, then we must drop it silently. 2655 */ 2656 tcp = (tcphdr_t *) fin->fin_dp; 2657 2658 if ((fin->fin_p == IPPROTO_TCP) && 2659 ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0))) 2660 return (-1); 2661 2662 /* 2663 * Step (1) 2664 * 2665 * We need to copy complete packet in case of IPv6, no trimming is 2666 * needed (except the L2 headers). 2667 */ 2668 icmp_pld_len = M_LEN(fin->fin_m); 2669 fin->fin_m->b_rptr += fin->fin_ipoff; 2670 if ((mblk_ip = copyb(fin->fin_m)) == NULL) 2671 return (-1); 2672 fin->fin_m->b_rptr -= fin->fin_ipoff; 2673 2674 /* 2675 * Step (2) 2676 * 2677 * Allocate and create ICMP header. 2678 */ 2679 mblk_icmp = (mblk_t *) allocb(sizeof (struct icmp6_hdr), 2680 BPRI_HI); 2681 2682 if (mblk_icmp == NULL) 2683 return (-1); 2684 2685 MTYPE(mblk_icmp) = M_DATA; 2686 icmp6 = (struct icmp6_hdr *) mblk_icmp->b_wptr; 2687 icmp6->icmp6_type = ICMP6_DST_UNREACH; 2688 icmp6->icmp6_code = fin->fin_icode & 0xFF; 2689 icmp6->icmp6_data32[0] = 0; 2690 mblk_icmp->b_wptr += sizeof (struct icmp6_hdr); 2691 2692 /* 2693 * Step (3) 2694 * 2695 * Link the copy of IP packet to ICMP header. 2696 */ 2697 linkb(mblk_icmp, mblk_ip); 2698 2699 /* 2700 * Step (4) 2701 * 2702 * Calculate chksum - this is much more easier task than in case of 2703 * IPv4 - ICMPv6 chksum only covers IP addresses, and payload length. 2704 * We are making compensation just for change of packet length. 2705 */ 2706 icmp6->icmp6_cksum = icmp_pld_len + sizeof (struct icmp6_hdr); 2707 2708 /* 2709 * Step (5) 2710 * 2711 * Swap IP addresses. 2712 */ 2713 tmp_src6 = fin->fin_ip6->ip6_src; 2714 fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst; 2715 fin->fin_ip6->ip6_dst = tmp_src6; 2716 2717 /* 2718 * and adjust IP header data. 2719 */ 2720 fin->fin_ip6->ip6_nxt = IPPROTO_ICMPV6; 2721 fin->fin_ip6->ip6_plen = htons(icmp_pld_len + sizeof (struct icmp6_hdr)); 2722 2723 /* 2724 * Step (6) 2725 * 2726 * We must release all linked mblks from original packet and keep only 2727 * the first mblk with IP header to link ICMP data. 2728 */ 2729 fin->fin_m->b_wptr = (unsigned char *) fin->fin_ip6 + sizeof (ip6_t); 2730 2731 if (fin->fin_m->b_cont != NULL) { 2732 FREE_MB_T(fin->fin_m->b_cont); 2733 } 2734 2735 /* 2736 * Append ICMP payload to IP header. 2737 */ 2738 linkb(fin->fin_m, mblk_icmp); 2739 2740 return (0); 2741 } 2742 #endif /* USE_INET6 */ 2743 2744 /* ------------------------------------------------------------------------ */ 2745 /* Function: fr_make_icmp */ 2746 /* Returns: int - 0 on success, -1 on failure */ 2747 /* Parameters: fin(I) - pointer to packet information */ 2748 /* */ 2749 /* We must alter the original mblks passed to IPF from IP stack via */ 2750 /* FW_HOOKS. The reasons why we must alter packet are discussed within */ 2751 /* comment at fr_make_rst() function. */ 2752 /* */ 2753 /* The fr_make_icmp() function acts as a wrapper, which passes the code */ 2754 /* execution to fr_make_icmp_v4() or fr_make_icmp_v6() depending on */ 2755 /* protocol version. However there are some details, which are common to */ 2756 /* both IP versions. The details are going to be explained here. */ 2757 /* */ 2758 /* The packet looks as follows: */ 2759 /* xxx | IP hdr | IP payload ... | */ 2760 /* ^ ^ ^ ^ */ 2761 /* | | | | */ 2762 /* | | | fin_m->b_wptr = fin->fin_dp + fin->fin_dlen */ 2763 /* | | | */ 2764 /* | | `- fin_m->fin_dp (in case of IPv4 points to L4 header) */ 2765 /* | | */ 2766 /* | `- fin_m->b_rptr + fin_ipoff (fin_ipoff is most likely 0 in case */ 2767 /* | of loopback) */ 2768 /* | */ 2769 /* `- fin_m->b_rptr - points to L2 header in case of physical NIC */ 2770 /* */ 2771 /* All relevant IP headers are pulled up into the first mblk. It happened */ 2772 /* well in advance before the matching rule was found (the rule, which took */ 2773 /* us here, to fr_make_icmp() function). */ 2774 /* */ 2775 /* Both functions will turn packet passed in fin->fin_m mblk into a new */ 2776 /* packet. New packet will be represented as chain of mblks. */ 2777 /* orig mblk |- b_cont ---. */ 2778 /* ^ `-> ICMP hdr |- b_cont--. */ 2779 /* | ^ `-> duped orig mblk */ 2780 /* | | ^ */ 2781 /* `- The original mblk | | */ 2782 /* will be trimmed to | | */ 2783 /* to contain IP header | | */ 2784 /* only | | */ 2785 /* | | */ 2786 /* `- This is newly | */ 2787 /* allocated mblk to | */ 2788 /* hold ICMPv6 data. | */ 2789 /* | */ 2790 /* | */ 2791 /* | */ 2792 /* This is the copy of original mblk, it will contain -' */ 2793 /* orignal IP packet in case of ICMPv6. In case of */ 2794 /* ICMPv4 it will contain up to 8 bytes of IP payload */ 2795 /* (TCP/UDP/L4) data from original packet. */ 2796 /* ------------------------------------------------------------------------ */ 2797 int fr_make_icmp(fin) 2798 fr_info_t *fin; 2799 { 2800 int rv; 2801 2802 if (fin->fin_v == 4) 2803 rv = fr_make_icmp_v4(fin); 2804 #ifdef USE_INET6 2805 else if (fin->fin_v == 6) 2806 rv = fr_make_icmp_v6(fin); 2807 #endif 2808 else 2809 rv = -1; 2810 2811 return (rv); 2812 } 2813 2814 /* ------------------------------------------------------------------------ */ 2815 /* Function: fr_buf_sum */ 2816 /* Returns: unsigned int - sum of buffer buf */ 2817 /* Parameters: buf - pointer to buf we want to sum up */ 2818 /* len - length of buffer buf */ 2819 /* */ 2820 /* Sums buffer buf. The result is used for chksum calculation. The buf */ 2821 /* argument must be aligned. */ 2822 /* ------------------------------------------------------------------------ */ 2823 static uint32_t fr_buf_sum(buf, len) 2824 const void *buf; 2825 unsigned int len; 2826 { 2827 uint32_t sum = 0; 2828 uint16_t *b = (uint16_t *)buf; 2829 2830 while (len > 1) { 2831 sum += *b++; 2832 len -= 2; 2833 } 2834 2835 if (len == 1) 2836 sum += htons((*(unsigned char *)b) << 8); 2837 2838 return (sum); 2839 } 2840 2841 /* ------------------------------------------------------------------------ */ 2842 /* Function: fr_calc_chksum */ 2843 /* Returns: void */ 2844 /* Parameters: fin - pointer to fr_info_t instance with packet data */ 2845 /* pkt - pointer to duplicated packet */ 2846 /* */ 2847 /* Calculates all chksums (L3, L4) for packet pkt. Works for both IP */ 2848 /* versions. */ 2849 /* ------------------------------------------------------------------------ */ 2850 void fr_calc_chksum(fin, pkt) 2851 fr_info_t *fin; 2852 mb_t *pkt; 2853 { 2854 struct pseudo_hdr { 2855 union { 2856 struct in_addr in4; 2857 #ifdef USE_INET6 2858 struct in6_addr in6; 2859 #endif 2860 } src_addr; 2861 union { 2862 struct in_addr in4; 2863 #ifdef USE_INET6 2864 struct in6_addr in6; 2865 #endif 2866 } dst_addr; 2867 char zero; 2868 char proto; 2869 uint16_t len; 2870 } phdr; 2871 uint32_t sum, ip_sum; 2872 void *buf; 2873 uint16_t *l4_csum_p; 2874 tcphdr_t *tcp; 2875 udphdr_t *udp; 2876 icmphdr_t *icmp; 2877 #ifdef USE_INET6 2878 struct icmp6_hdr *icmp6; 2879 #endif 2880 ip_t *ip; 2881 unsigned int len; 2882 int pld_len; 2883 2884 /* 2885 * We need to pullup the packet to the single continuous buffer to avoid 2886 * potential misaligment of b_rptr member in mblk chain. 2887 */ 2888 if (pullupmsg(pkt, -1) == 0) { 2889 cmn_err(CE_WARN, "Failed to pullup loopback pkt -> chksum" 2890 " will not be computed by IPF"); 2891 return; 2892 } 2893 2894 /* 2895 * It is guaranteed IP header starts right at b_rptr, because we are 2896 * working with a copy of the original packet. 2897 * 2898 * Compute pseudo header chksum for TCP and UDP. 2899 */ 2900 if ((fin->fin_p == IPPROTO_UDP) || 2901 (fin->fin_p == IPPROTO_TCP)) { 2902 bzero(&phdr, sizeof (phdr)); 2903 #ifdef USE_INET6 2904 if (fin->fin_v == 6) { 2905 phdr.src_addr.in6 = fin->fin_srcip6; 2906 phdr.dst_addr.in6 = fin->fin_dstip6; 2907 } else { 2908 phdr.src_addr.in4 = fin->fin_src; 2909 phdr.dst_addr.in4 = fin->fin_dst; 2910 } 2911 #else 2912 phdr.src_addr.in4 = fin->fin_src; 2913 phdr.dst_addr.in4 = fin->fin_dst; 2914 #endif 2915 phdr.zero = (char) 0; 2916 phdr.proto = fin->fin_p; 2917 phdr.len = htons((uint16_t)fin->fin_dlen); 2918 sum = fr_buf_sum(&phdr, (unsigned int)sizeof (phdr)); 2919 } else { 2920 sum = 0; 2921 } 2922 2923 /* 2924 * Set pointer to the L4 chksum field in the packet, set buf pointer to 2925 * the L4 header start. 2926 */ 2927 switch (fin->fin_p) { 2928 case IPPROTO_UDP: 2929 udp = (udphdr_t *)(pkt->b_rptr + fin->fin_hlen); 2930 l4_csum_p = &udp->uh_sum; 2931 buf = udp; 2932 break; 2933 case IPPROTO_TCP: 2934 tcp = (tcphdr_t *)(pkt->b_rptr + fin->fin_hlen); 2935 l4_csum_p = &tcp->th_sum; 2936 buf = tcp; 2937 break; 2938 case IPPROTO_ICMP: 2939 icmp = (icmphdr_t *)(pkt->b_rptr + fin->fin_hlen); 2940 l4_csum_p = &icmp->icmp_cksum; 2941 buf = icmp; 2942 break; 2943 #ifdef USE_INET6 2944 case IPPROTO_ICMPV6: 2945 icmp6 = (struct icmp6_hdr *)(pkt->b_rptr + fin->fin_hlen); 2946 l4_csum_p = &icmp6->icmp6_cksum; 2947 buf = icmp6; 2948 break; 2949 #endif 2950 default: 2951 l4_csum_p = NULL; 2952 } 2953 2954 /* 2955 * Compute L4 chksum if needed. 2956 */ 2957 if (l4_csum_p != NULL) { 2958 *l4_csum_p = (uint16_t)0; 2959 pld_len = fin->fin_dlen; 2960 len = pkt->b_wptr - (unsigned char *)buf; 2961 ASSERT(len == pld_len); 2962 /* 2963 * Add payload sum to pseudoheader sum. 2964 */ 2965 sum += fr_buf_sum(buf, len); 2966 while (sum >> 16) 2967 sum = (sum & 0xFFFF) + (sum >> 16); 2968 2969 *l4_csum_p = ~((uint16_t)sum); 2970 DTRACE_PROBE1(l4_sum, uint16_t, *l4_csum_p); 2971 } 2972 2973 /* 2974 * The IP header chksum is needed just for IPv4. 2975 */ 2976 if (fin->fin_v == 4) { 2977 /* 2978 * Compute IPv4 header chksum. 2979 */ 2980 ip = (ip_t *)pkt->b_rptr; 2981 ip->ip_sum = (uint16_t)0; 2982 ip_sum = fr_buf_sum(ip, (unsigned int)fin->fin_hlen); 2983 while (ip_sum >> 16) 2984 ip_sum = (ip_sum & 0xFFFF) + (ip_sum >> 16); 2985 2986 ip->ip_sum = ~((uint16_t)ip_sum); 2987 DTRACE_PROBE1(l3_sum, uint16_t, ip->ip_sum); 2988 } 2989 2990 return; 2991 } 2992 2993 #endif /* _KERNEL && SOLARIS2 >= 10 */ 2994