1 /* 2 * Copyright (C) 1993-2001, 2003 by Darren Reed. 3 * 4 * See the IPFILTER.LICENCE file for details on licencing. 5 * 6 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 7 */ 8 9 #if !defined(lint) 10 static const char sccsid[] = "@(#)ip_fil_solaris.c 1.7 07/22/06 (C) 1993-2000 Darren Reed"; 11 static const char rcsid[] = "@(#)$Id: ip_fil_solaris.c,v 2.62.2.19 2005/07/13 21:40:46 darrenr Exp $"; 12 #endif 13 14 #include <sys/types.h> 15 #include <sys/errno.h> 16 #include <sys/param.h> 17 #include <sys/cpuvar.h> 18 #include <sys/open.h> 19 #include <sys/ioctl.h> 20 #include <sys/filio.h> 21 #include <sys/systm.h> 22 #include <sys/strsubr.h> 23 #include <sys/cred.h> 24 #include <sys/ddi.h> 25 #include <sys/sunddi.h> 26 #include <sys/ksynch.h> 27 #include <sys/kmem.h> 28 #include <sys/mkdev.h> 29 #include <sys/protosw.h> 30 #include <sys/socket.h> 31 #include <sys/dditypes.h> 32 #include <sys/cmn_err.h> 33 #include <sys/zone.h> 34 #include <net/if.h> 35 #include <net/af.h> 36 #include <net/route.h> 37 #include <netinet/in.h> 38 #include <netinet/in_systm.h> 39 #include <netinet/ip.h> 40 #include <netinet/ip_var.h> 41 #include <netinet/tcp.h> 42 #include <netinet/udp.h> 43 #include <netinet/tcpip.h> 44 #include <netinet/ip_icmp.h> 45 #include "netinet/ip_compat.h" 46 #ifdef USE_INET6 47 # include <netinet/icmp6.h> 48 #endif 49 #include "netinet/ip_fil.h" 50 #include "netinet/ip_nat.h" 51 #include "netinet/ip_frag.h" 52 #include "netinet/ip_state.h" 53 #include "netinet/ip_auth.h" 54 #include "netinet/ip_proxy.h" 55 #include "netinet/ipf_stack.h" 56 #ifdef IPFILTER_LOOKUP 57 # include "netinet/ip_lookup.h" 58 #endif 59 #include <inet/ip_ire.h> 60 61 #include <sys/md5.h> 62 #include <sys/neti.h> 63 64 static int frzerostats __P((caddr_t, ipf_stack_t *)); 65 static int fr_setipfloopback __P((int, ipf_stack_t *)); 66 static int fr_enableipf __P((ipf_stack_t *, int)); 67 static int fr_send_ip __P((fr_info_t *fin, mblk_t *m, mblk_t **mp)); 68 static int ipf_nic_event_v4 __P((hook_event_token_t, hook_data_t, void *)); 69 static int ipf_nic_event_v6 __P((hook_event_token_t, hook_data_t, void *)); 70 static int ipf_hook __P((hook_data_t, int, int, void *)); 71 static int ipf_hook4_in __P((hook_event_token_t, hook_data_t, void *)); 72 static int ipf_hook4_out __P((hook_event_token_t, hook_data_t, void *)); 73 static int ipf_hook4_loop_out __P((hook_event_token_t, hook_data_t, 74 void *)); 75 static int ipf_hook4_loop_in __P((hook_event_token_t, hook_data_t, void *)); 76 static int ipf_hook4 __P((hook_data_t, int, int, void *)); 77 static int ipf_hook6_out __P((hook_event_token_t, hook_data_t, void *)); 78 static int ipf_hook6_in __P((hook_event_token_t, hook_data_t, void *)); 79 static int ipf_hook6_loop_out __P((hook_event_token_t, hook_data_t, 80 void *)); 81 static int ipf_hook6_loop_in __P((hook_event_token_t, hook_data_t, 82 void *)); 83 static int ipf_hook6 __P((hook_data_t, int, int, void *)); 84 extern int ipf_geniter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); 85 extern int ipf_frruleiter __P((void *, int, void *, ipf_stack_t *)); 86 87 #if SOLARIS2 < 10 88 #if SOLARIS2 >= 7 89 u_int *ip_ttl_ptr = NULL; 90 u_int *ip_mtudisc = NULL; 91 # if SOLARIS2 >= 8 92 int *ip_forwarding = NULL; 93 u_int *ip6_forwarding = NULL; 94 # else 95 u_int *ip_forwarding = NULL; 96 # endif 97 #else 98 u_long *ip_ttl_ptr = NULL; 99 u_long *ip_mtudisc = NULL; 100 u_long *ip_forwarding = NULL; 101 #endif 102 #endif 103 104 105 /* ------------------------------------------------------------------------ */ 106 /* Function: ipldetach */ 107 /* Returns: int - 0 == success, else error. */ 108 /* Parameters: Nil */ 109 /* */ 110 /* This function is responsible for undoing anything that might have been */ 111 /* done in a call to iplattach(). It must be able to clean up from a call */ 112 /* to iplattach() that did not succeed. Why might that happen? Someone */ 113 /* configures a table to be so large that we cannot allocate enough memory */ 114 /* for it. */ 115 /* ------------------------------------------------------------------------ */ 116 int ipldetach(ifs) 117 ipf_stack_t *ifs; 118 { 119 120 ASSERT(rw_read_locked(&ifs->ifs_ipf_global.ipf_lk) == 0); 121 122 #if SOLARIS2 < 10 123 124 if (ifs->ifs_fr_control_forwarding & 2) { 125 if (ip_forwarding != NULL) 126 *ip_forwarding = 0; 127 #if SOLARIS2 >= 8 128 if (ip6_forwarding != NULL) 129 *ip6_forwarding = 0; 130 #endif 131 } 132 #endif 133 134 /* 135 * This lock needs to be dropped around the net_hook_unregister calls 136 * because we can deadlock here with: 137 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs 138 * R(hook_family)->R(hei_lock)->R(ipf_global) (active hook running) 139 */ 140 RWLOCK_EXIT(&ifs->ifs_ipf_global); 141 142 #define UNDO_HOOK(_f, _b, _e, _h) \ 143 do { \ 144 if (ifs->_f != NULL) { \ 145 if (ifs->_b) { \ 146 ifs->_b = (net_hook_unregister(ifs->_f, \ 147 _e, ifs->_h) != 0); \ 148 if (!ifs->_b) { \ 149 hook_free(ifs->_h); \ 150 ifs->_h = NULL; \ 151 } \ 152 } else if (ifs->_h != NULL) { \ 153 hook_free(ifs->_h); \ 154 ifs->_h = NULL; \ 155 } \ 156 } \ 157 _NOTE(CONSTCOND) \ 158 } while (0) 159 160 /* 161 * Remove IPv6 Hooks 162 */ 163 if (ifs->ifs_ipf_ipv6 != NULL) { 164 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_in, 165 NH_PHYSICAL_IN, ifs_ipfhook6_in); 166 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_out, 167 NH_PHYSICAL_OUT, ifs_ipfhook6_out); 168 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_nic_events, 169 NH_NIC_EVENTS, ifs_ipfhook6_nicevents); 170 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_in, 171 NH_LOOPBACK_IN, ifs_ipfhook6_loop_in); 172 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_out, 173 NH_LOOPBACK_OUT, ifs_ipfhook6_loop_out); 174 175 if (net_protocol_release(ifs->ifs_ipf_ipv6) != 0) 176 goto detach_failed; 177 ifs->ifs_ipf_ipv6 = NULL; 178 } 179 180 /* 181 * Remove IPv4 Hooks 182 */ 183 if (ifs->ifs_ipf_ipv4 != NULL) { 184 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_in, 185 NH_PHYSICAL_IN, ifs_ipfhook4_in); 186 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_out, 187 NH_PHYSICAL_OUT, ifs_ipfhook4_out); 188 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_nic_events, 189 NH_NIC_EVENTS, ifs_ipfhook4_nicevents); 190 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_in, 191 NH_LOOPBACK_IN, ifs_ipfhook4_loop_in); 192 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_out, 193 NH_LOOPBACK_OUT, ifs_ipfhook4_loop_out); 194 195 if (net_protocol_release(ifs->ifs_ipf_ipv4) != 0) 196 goto detach_failed; 197 ifs->ifs_ipf_ipv4 = NULL; 198 } 199 200 #undef UNDO_HOOK 201 202 #ifdef IPFDEBUG 203 cmn_err(CE_CONT, "ipldetach()\n"); 204 #endif 205 206 WRITE_ENTER(&ifs->ifs_ipf_global); 207 fr_deinitialise(ifs); 208 209 (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE, ifs); 210 (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE, ifs); 211 212 if (ifs->ifs_ipf_locks_done == 1) { 213 MUTEX_DESTROY(&ifs->ifs_ipf_timeoutlock); 214 MUTEX_DESTROY(&ifs->ifs_ipf_rw); 215 RW_DESTROY(&ifs->ifs_ipf_tokens); 216 RW_DESTROY(&ifs->ifs_ipf_ipidfrag); 217 ifs->ifs_ipf_locks_done = 0; 218 } 219 220 if (ifs->ifs_hook4_physical_in || ifs->ifs_hook4_physical_out || 221 ifs->ifs_hook4_nic_events || ifs->ifs_hook4_loopback_in || 222 ifs->ifs_hook4_loopback_out || ifs->ifs_hook6_nic_events || 223 ifs->ifs_hook6_physical_in || ifs->ifs_hook6_physical_out || 224 ifs->ifs_hook6_loopback_in || ifs->ifs_hook6_loopback_out) 225 return -1; 226 227 return 0; 228 229 detach_failed: 230 WRITE_ENTER(&ifs->ifs_ipf_global); 231 return -1; 232 } 233 234 int iplattach(ifs) 235 ipf_stack_t *ifs; 236 { 237 #if SOLARIS2 < 10 238 int i; 239 #endif 240 netid_t id = ifs->ifs_netid; 241 242 #ifdef IPFDEBUG 243 cmn_err(CE_CONT, "iplattach()\n"); 244 #endif 245 246 ASSERT(rw_read_locked(&ifs->ifs_ipf_global.ipf_lk) == 0); 247 ifs->ifs_fr_flags = IPF_LOGGING; 248 #ifdef _KERNEL 249 ifs->ifs_fr_update_ipid = 0; 250 #else 251 ifs->ifs_fr_update_ipid = 1; 252 #endif 253 ifs->ifs_fr_minttl = 4; 254 ifs->ifs_fr_icmpminfragmtu = 68; 255 #if defined(IPFILTER_DEFAULT_BLOCK) 256 ifs->ifs_fr_pass = FR_BLOCK|FR_NOMATCH; 257 #else 258 ifs->ifs_fr_pass = (IPF_DEFAULT_PASS)|FR_NOMATCH; 259 #endif 260 261 bzero((char *)ifs->ifs_frcache, sizeof(ifs->ifs_frcache)); 262 MUTEX_INIT(&ifs->ifs_ipf_rw, "ipf rw mutex"); 263 MUTEX_INIT(&ifs->ifs_ipf_timeoutlock, "ipf timeout lock mutex"); 264 RWLOCK_INIT(&ifs->ifs_ipf_ipidfrag, "ipf IP NAT-Frag rwlock"); 265 RWLOCK_INIT(&ifs->ifs_ipf_tokens, "ipf token rwlock"); 266 ifs->ifs_ipf_locks_done = 1; 267 268 if (fr_initialise(ifs) < 0) 269 return -1; 270 271 HOOK_INIT(ifs->ifs_ipfhook4_nicevents, ipf_nic_event_v4, 272 "ipfilter_hook4_nicevents", ifs); 273 HOOK_INIT(ifs->ifs_ipfhook4_in, ipf_hook4_in, 274 "ipfilter_hook4_in", ifs); 275 HOOK_INIT(ifs->ifs_ipfhook4_out, ipf_hook4_out, 276 "ipfilter_hook4_out", ifs); 277 HOOK_INIT(ifs->ifs_ipfhook4_loop_in, ipf_hook4_loop_in, 278 "ipfilter_hook4_loop_in", ifs); 279 HOOK_INIT(ifs->ifs_ipfhook4_loop_out, ipf_hook4_loop_out, 280 "ipfilter_hook4_loop_out", ifs); 281 282 /* 283 * If we hold this lock over all of the net_hook_register calls, we 284 * can cause a deadlock to occur with the following lock ordering: 285 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs 286 * R(hook_family)->R(hei_lock)->R(ipf_global) (packet path) 287 */ 288 RWLOCK_EXIT(&ifs->ifs_ipf_global); 289 290 /* 291 * Add IPv4 hooks 292 */ 293 ifs->ifs_ipf_ipv4 = net_protocol_lookup(id, NHF_INET); 294 if (ifs->ifs_ipf_ipv4 == NULL) 295 goto hookup_failed; 296 297 ifs->ifs_hook4_nic_events = (net_hook_register(ifs->ifs_ipf_ipv4, 298 NH_NIC_EVENTS, ifs->ifs_ipfhook4_nicevents) == 0); 299 if (!ifs->ifs_hook4_nic_events) 300 goto hookup_failed; 301 302 ifs->ifs_hook4_physical_in = (net_hook_register(ifs->ifs_ipf_ipv4, 303 NH_PHYSICAL_IN, ifs->ifs_ipfhook4_in) == 0); 304 if (!ifs->ifs_hook4_physical_in) 305 goto hookup_failed; 306 307 ifs->ifs_hook4_physical_out = (net_hook_register(ifs->ifs_ipf_ipv4, 308 NH_PHYSICAL_OUT, ifs->ifs_ipfhook4_out) == 0); 309 if (!ifs->ifs_hook4_physical_out) 310 goto hookup_failed; 311 312 if (ifs->ifs_ipf_loopback) { 313 ifs->ifs_hook4_loopback_in = (net_hook_register( 314 ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN, 315 ifs->ifs_ipfhook4_loop_in) == 0); 316 if (!ifs->ifs_hook4_loopback_in) 317 goto hookup_failed; 318 319 ifs->ifs_hook4_loopback_out = (net_hook_register( 320 ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT, 321 ifs->ifs_ipfhook4_loop_out) == 0); 322 if (!ifs->ifs_hook4_loopback_out) 323 goto hookup_failed; 324 } 325 /* 326 * Add IPv6 hooks 327 */ 328 ifs->ifs_ipf_ipv6 = net_protocol_lookup(id, NHF_INET6); 329 if (ifs->ifs_ipf_ipv6 == NULL) 330 goto hookup_failed; 331 332 HOOK_INIT(ifs->ifs_ipfhook6_nicevents, ipf_nic_event_v6, 333 "ipfilter_hook6_nicevents", ifs); 334 HOOK_INIT(ifs->ifs_ipfhook6_in, ipf_hook6_in, 335 "ipfilter_hook6_in", ifs); 336 HOOK_INIT(ifs->ifs_ipfhook6_out, ipf_hook6_out, 337 "ipfilter_hook6_out", ifs); 338 HOOK_INIT(ifs->ifs_ipfhook6_loop_in, ipf_hook6_loop_in, 339 "ipfilter_hook6_loop_in", ifs); 340 HOOK_INIT(ifs->ifs_ipfhook6_loop_out, ipf_hook6_loop_out, 341 "ipfilter_hook6_loop_out", ifs); 342 343 ifs->ifs_hook6_nic_events = (net_hook_register(ifs->ifs_ipf_ipv6, 344 NH_NIC_EVENTS, ifs->ifs_ipfhook6_nicevents) == 0); 345 if (!ifs->ifs_hook6_nic_events) 346 goto hookup_failed; 347 348 ifs->ifs_hook6_physical_in = (net_hook_register(ifs->ifs_ipf_ipv6, 349 NH_PHYSICAL_IN, ifs->ifs_ipfhook6_in) == 0); 350 if (!ifs->ifs_hook6_physical_in) 351 goto hookup_failed; 352 353 ifs->ifs_hook6_physical_out = (net_hook_register(ifs->ifs_ipf_ipv6, 354 NH_PHYSICAL_OUT, ifs->ifs_ipfhook6_out) == 0); 355 if (!ifs->ifs_hook6_physical_out) 356 goto hookup_failed; 357 358 if (ifs->ifs_ipf_loopback) { 359 ifs->ifs_hook6_loopback_in = (net_hook_register( 360 ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN, 361 ifs->ifs_ipfhook6_loop_in) == 0); 362 if (!ifs->ifs_hook6_loopback_in) 363 goto hookup_failed; 364 365 ifs->ifs_hook6_loopback_out = (net_hook_register( 366 ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT, 367 ifs->ifs_ipfhook6_loop_out) == 0); 368 if (!ifs->ifs_hook6_loopback_out) 369 goto hookup_failed; 370 } 371 372 /* 373 * Reacquire ipf_global, now it is safe. 374 */ 375 WRITE_ENTER(&ifs->ifs_ipf_global); 376 377 /* Do not use private interface ip_params_arr[] in Solaris 10 */ 378 #if SOLARIS2 < 10 379 380 #if SOLARIS2 >= 8 381 ip_forwarding = &ip_g_forward; 382 #endif 383 /* 384 * XXX - There is no terminator for this array, so it is not possible 385 * to tell if what we are looking for is missing and go off the end 386 * of the array. 387 */ 388 389 #if SOLARIS2 <= 8 390 for (i = 0; ; i++) { 391 if (!strcmp(ip_param_arr[i].ip_param_name, "ip_def_ttl")) { 392 ip_ttl_ptr = &ip_param_arr[i].ip_param_value; 393 } else if (!strcmp(ip_param_arr[i].ip_param_name, 394 "ip_path_mtu_discovery")) { 395 ip_mtudisc = &ip_param_arr[i].ip_param_value; 396 } 397 #if SOLARIS2 < 8 398 else if (!strcmp(ip_param_arr[i].ip_param_name, 399 "ip_forwarding")) { 400 ip_forwarding = &ip_param_arr[i].ip_param_value; 401 } 402 #else 403 else if (!strcmp(ip_param_arr[i].ip_param_name, 404 "ip6_forwarding")) { 405 ip6_forwarding = &ip_param_arr[i].ip_param_value; 406 } 407 #endif 408 409 if (ip_mtudisc != NULL && ip_ttl_ptr != NULL && 410 #if SOLARIS2 >= 8 411 ip6_forwarding != NULL && 412 #endif 413 ip_forwarding != NULL) 414 break; 415 } 416 #endif 417 418 if (ifs->ifs_fr_control_forwarding & 1) { 419 if (ip_forwarding != NULL) 420 *ip_forwarding = 1; 421 #if SOLARIS2 >= 8 422 if (ip6_forwarding != NULL) 423 *ip6_forwarding = 1; 424 #endif 425 } 426 427 #endif 428 429 return 0; 430 hookup_failed: 431 WRITE_ENTER(&ifs->ifs_ipf_global); 432 return -1; 433 } 434 435 static int fr_setipfloopback(set, ifs) 436 int set; 437 ipf_stack_t *ifs; 438 { 439 if (ifs->ifs_ipf_ipv4 == NULL || ifs->ifs_ipf_ipv6 == NULL) 440 return EFAULT; 441 442 if (set && !ifs->ifs_ipf_loopback) { 443 ifs->ifs_ipf_loopback = 1; 444 445 ifs->ifs_hook4_loopback_in = (net_hook_register( 446 ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN, 447 ifs->ifs_ipfhook4_loop_in) == 0); 448 if (!ifs->ifs_hook4_loopback_in) 449 return EINVAL; 450 451 ifs->ifs_hook4_loopback_out = (net_hook_register( 452 ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT, 453 ifs->ifs_ipfhook4_loop_out) == 0); 454 if (!ifs->ifs_hook4_loopback_out) 455 return EINVAL; 456 457 ifs->ifs_hook6_loopback_in = (net_hook_register( 458 ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN, 459 ifs->ifs_ipfhook6_loop_in) == 0); 460 if (!ifs->ifs_hook6_loopback_in) 461 return EINVAL; 462 463 ifs->ifs_hook6_loopback_out = (net_hook_register( 464 ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT, 465 ifs->ifs_ipfhook6_loop_out) == 0); 466 if (!ifs->ifs_hook6_loopback_out) 467 return EINVAL; 468 469 } else if (!set && ifs->ifs_ipf_loopback) { 470 ifs->ifs_ipf_loopback = 0; 471 472 ifs->ifs_hook4_loopback_in = 473 (net_hook_unregister(ifs->ifs_ipf_ipv4, 474 NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0); 475 if (ifs->ifs_hook4_loopback_in) 476 return EBUSY; 477 478 ifs->ifs_hook4_loopback_out = 479 (net_hook_unregister(ifs->ifs_ipf_ipv4, 480 NH_LOOPBACK_OUT, ifs->ifs_ipfhook4_loop_out) != 0); 481 if (ifs->ifs_hook4_loopback_out) 482 return EBUSY; 483 484 ifs->ifs_hook6_loopback_in = 485 (net_hook_unregister(ifs->ifs_ipf_ipv6, 486 NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0); 487 if (ifs->ifs_hook6_loopback_in) 488 return EBUSY; 489 490 ifs->ifs_hook6_loopback_out = 491 (net_hook_unregister(ifs->ifs_ipf_ipv6, 492 NH_LOOPBACK_OUT, ifs->ifs_ipfhook6_loop_out) != 0); 493 if (ifs->ifs_hook6_loopback_out) 494 return EBUSY; 495 } 496 return 0; 497 } 498 499 500 /* 501 * Filter ioctl interface. 502 */ 503 /*ARGSUSED*/ 504 int iplioctl(dev, cmd, data, mode, cp, rp) 505 dev_t dev; 506 int cmd; 507 #if SOLARIS2 >= 7 508 intptr_t data; 509 #else 510 int *data; 511 #endif 512 int mode; 513 cred_t *cp; 514 int *rp; 515 { 516 int error = 0, tmp; 517 friostat_t fio; 518 minor_t unit; 519 u_int enable; 520 ipf_stack_t *ifs; 521 522 #ifdef IPFDEBUG 523 cmn_err(CE_CONT, "iplioctl(%x,%x,%x,%d,%x,%d)\n", 524 dev, cmd, data, mode, cp, rp); 525 #endif 526 unit = getminor(dev); 527 if (IPL_LOGMAX < unit) 528 return ENXIO; 529 530 /* 531 * As we're calling ipf_find_stack in user space, from a given zone 532 * to find the stack pointer for this zone, there is no need to have 533 * a hold/refence count here. 534 */ 535 ifs = ipf_find_stack(crgetzoneid(cp)); 536 ASSERT(ifs != NULL); 537 538 if (ifs->ifs_fr_running <= 0) { 539 if (unit != IPL_LOGIPF) { 540 return EIO; 541 } 542 if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET && 543 cmd != SIOCIPFSET && cmd != SIOCFRENB && 544 cmd != SIOCGETFS && cmd != SIOCGETFF) { 545 return EIO; 546 } 547 } 548 549 READ_ENTER(&ifs->ifs_ipf_global); 550 if (ifs->ifs_fr_enable_active != 0) { 551 RWLOCK_EXIT(&ifs->ifs_ipf_global); 552 return EBUSY; 553 } 554 555 error = fr_ioctlswitch(unit, (caddr_t)data, cmd, mode, crgetuid(cp), 556 curproc, ifs); 557 if (error != -1) { 558 RWLOCK_EXIT(&ifs->ifs_ipf_global); 559 return error; 560 } 561 error = 0; 562 563 switch (cmd) 564 { 565 case SIOCFRENB : 566 if (!(mode & FWRITE)) 567 error = EPERM; 568 else { 569 error = COPYIN((caddr_t)data, (caddr_t)&enable, 570 sizeof(enable)); 571 if (error != 0) { 572 error = EFAULT; 573 break; 574 } 575 576 RWLOCK_EXIT(&ifs->ifs_ipf_global); 577 WRITE_ENTER(&ifs->ifs_ipf_global); 578 579 /* 580 * We must recheck fr_enable_active here, since we've 581 * dropped ifs_ipf_global from R in order to get it 582 * exclusively. 583 */ 584 if (ifs->ifs_fr_enable_active == 0) { 585 ifs->ifs_fr_enable_active = 1; 586 error = fr_enableipf(ifs, enable); 587 ifs->ifs_fr_enable_active = 0; 588 } 589 } 590 break; 591 case SIOCIPFSET : 592 if (!(mode & FWRITE)) { 593 error = EPERM; 594 break; 595 } 596 /* FALLTHRU */ 597 case SIOCIPFGETNEXT : 598 case SIOCIPFGET : 599 error = fr_ipftune(cmd, (void *)data, ifs); 600 break; 601 case SIOCSETFF : 602 if (!(mode & FWRITE)) 603 error = EPERM; 604 else { 605 error = COPYIN((caddr_t)data, 606 (caddr_t)&ifs->ifs_fr_flags, 607 sizeof(ifs->ifs_fr_flags)); 608 if (error != 0) 609 error = EFAULT; 610 } 611 break; 612 case SIOCIPFLP : 613 error = COPYIN((caddr_t)data, (caddr_t)&tmp, 614 sizeof(tmp)); 615 if (error != 0) 616 error = EFAULT; 617 else 618 error = fr_setipfloopback(tmp, ifs); 619 break; 620 case SIOCGETFF : 621 error = COPYOUT((caddr_t)&ifs->ifs_fr_flags, (caddr_t)data, 622 sizeof(ifs->ifs_fr_flags)); 623 if (error != 0) 624 error = EFAULT; 625 break; 626 case SIOCFUNCL : 627 error = fr_resolvefunc((void *)data); 628 break; 629 case SIOCINAFR : 630 case SIOCRMAFR : 631 case SIOCADAFR : 632 case SIOCZRLST : 633 if (!(mode & FWRITE)) 634 error = EPERM; 635 else 636 error = frrequest(unit, cmd, (caddr_t)data, 637 ifs->ifs_fr_active, 1, ifs); 638 break; 639 case SIOCINIFR : 640 case SIOCRMIFR : 641 case SIOCADIFR : 642 if (!(mode & FWRITE)) 643 error = EPERM; 644 else 645 error = frrequest(unit, cmd, (caddr_t)data, 646 1 - ifs->ifs_fr_active, 1, ifs); 647 break; 648 case SIOCSWAPA : 649 if (!(mode & FWRITE)) 650 error = EPERM; 651 else { 652 WRITE_ENTER(&ifs->ifs_ipf_mutex); 653 bzero((char *)ifs->ifs_frcache, 654 sizeof (ifs->ifs_frcache)); 655 error = COPYOUT((caddr_t)&ifs->ifs_fr_active, 656 (caddr_t)data, 657 sizeof(ifs->ifs_fr_active)); 658 if (error != 0) 659 error = EFAULT; 660 else 661 ifs->ifs_fr_active = 1 - ifs->ifs_fr_active; 662 RWLOCK_EXIT(&ifs->ifs_ipf_mutex); 663 } 664 break; 665 case SIOCGETFS : 666 fr_getstat(&fio, ifs); 667 error = fr_outobj((void *)data, &fio, IPFOBJ_IPFSTAT); 668 break; 669 case SIOCFRZST : 670 if (!(mode & FWRITE)) 671 error = EPERM; 672 else 673 error = fr_zerostats((caddr_t)data, ifs); 674 break; 675 case SIOCIPFFL : 676 if (!(mode & FWRITE)) 677 error = EPERM; 678 else { 679 error = COPYIN((caddr_t)data, (caddr_t)&tmp, 680 sizeof(tmp)); 681 if (!error) { 682 tmp = frflush(unit, 4, tmp, ifs); 683 error = COPYOUT((caddr_t)&tmp, (caddr_t)data, 684 sizeof(tmp)); 685 if (error != 0) 686 error = EFAULT; 687 } else 688 error = EFAULT; 689 } 690 break; 691 #ifdef USE_INET6 692 case SIOCIPFL6 : 693 if (!(mode & FWRITE)) 694 error = EPERM; 695 else { 696 error = COPYIN((caddr_t)data, (caddr_t)&tmp, 697 sizeof(tmp)); 698 if (!error) { 699 tmp = frflush(unit, 6, tmp, ifs); 700 error = COPYOUT((caddr_t)&tmp, (caddr_t)data, 701 sizeof(tmp)); 702 if (error != 0) 703 error = EFAULT; 704 } else 705 error = EFAULT; 706 } 707 break; 708 #endif 709 case SIOCSTLCK : 710 error = COPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp)); 711 if (error == 0) { 712 ifs->ifs_fr_state_lock = tmp; 713 ifs->ifs_fr_nat_lock = tmp; 714 ifs->ifs_fr_frag_lock = tmp; 715 ifs->ifs_fr_auth_lock = tmp; 716 } else 717 error = EFAULT; 718 break; 719 #ifdef IPFILTER_LOG 720 case SIOCIPFFB : 721 if (!(mode & FWRITE)) 722 error = EPERM; 723 else { 724 tmp = ipflog_clear(unit, ifs); 725 error = COPYOUT((caddr_t)&tmp, (caddr_t)data, 726 sizeof(tmp)); 727 if (error) 728 error = EFAULT; 729 } 730 break; 731 #endif /* IPFILTER_LOG */ 732 case SIOCFRSYN : 733 if (!(mode & FWRITE)) 734 error = EPERM; 735 else { 736 RWLOCK_EXIT(&ifs->ifs_ipf_global); 737 WRITE_ENTER(&ifs->ifs_ipf_global); 738 739 frsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs); 740 fr_natifpsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs); 741 fr_nataddrsync(0, NULL, NULL, ifs); 742 fr_statesync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs); 743 error = 0; 744 } 745 break; 746 case SIOCGFRST : 747 error = fr_outobj((void *)data, fr_fragstats(ifs), 748 IPFOBJ_FRAGSTAT); 749 break; 750 case FIONREAD : 751 #ifdef IPFILTER_LOG 752 tmp = (int)ifs->ifs_iplused[IPL_LOGIPF]; 753 754 error = COPYOUT((caddr_t)&tmp, (caddr_t)data, sizeof(tmp)); 755 if (error != 0) 756 error = EFAULT; 757 #endif 758 break; 759 case SIOCIPFITER : 760 error = ipf_frruleiter((caddr_t)data, crgetuid(cp), 761 curproc, ifs); 762 break; 763 764 case SIOCGENITER : 765 error = ipf_genericiter((caddr_t)data, crgetuid(cp), 766 curproc, ifs); 767 break; 768 769 case SIOCIPFDELTOK : 770 error = BCOPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp)); 771 if (error != 0) { 772 error = EFAULT; 773 } else { 774 error = ipf_deltoken(tmp, crgetuid(cp), curproc, ifs); 775 } 776 break; 777 778 default : 779 #ifdef IPFDEBUG 780 cmn_err(CE_NOTE, "Unknown: cmd 0x%x data %p", 781 cmd, (void *)data); 782 #endif 783 error = EINVAL; 784 break; 785 } 786 RWLOCK_EXIT(&ifs->ifs_ipf_global); 787 return error; 788 } 789 790 791 static int fr_enableipf(ifs, enable) 792 ipf_stack_t *ifs; 793 int enable; 794 { 795 int error; 796 797 if (!enable) { 798 error = ipldetach(ifs); 799 if (error == 0) 800 ifs->ifs_fr_running = -1; 801 return error; 802 } 803 804 if (ifs->ifs_fr_running > 0) 805 return 0; 806 807 error = iplattach(ifs); 808 if (error == 0) { 809 if (ifs->ifs_fr_timer_id == NULL) { 810 int hz = drv_usectohz(500000); 811 812 ifs->ifs_fr_timer_id = timeout(fr_slowtimer, 813 (void *)ifs, 814 hz); 815 } 816 ifs->ifs_fr_running = 1; 817 } else { 818 (void) ipldetach(ifs); 819 } 820 return error; 821 } 822 823 824 phy_if_t get_unit(name, v, ifs) 825 char *name; 826 int v; 827 ipf_stack_t *ifs; 828 { 829 net_handle_t nif; 830 831 if (v == 4) 832 nif = ifs->ifs_ipf_ipv4; 833 else if (v == 6) 834 nif = ifs->ifs_ipf_ipv6; 835 else 836 return 0; 837 838 return (net_phylookup(nif, name)); 839 } 840 841 /* 842 * routines below for saving IP headers to buffer 843 */ 844 /*ARGSUSED*/ 845 int iplopen(devp, flags, otype, cred) 846 dev_t *devp; 847 int flags, otype; 848 cred_t *cred; 849 { 850 minor_t min = getminor(*devp); 851 852 #ifdef IPFDEBUG 853 cmn_err(CE_CONT, "iplopen(%x,%x,%x,%x)\n", devp, flags, otype, cred); 854 #endif 855 if (!(otype & OTYP_CHR)) 856 return ENXIO; 857 858 min = (IPL_LOGMAX < min) ? ENXIO : 0; 859 return min; 860 } 861 862 863 /*ARGSUSED*/ 864 int iplclose(dev, flags, otype, cred) 865 dev_t dev; 866 int flags, otype; 867 cred_t *cred; 868 { 869 minor_t min = getminor(dev); 870 871 #ifdef IPFDEBUG 872 cmn_err(CE_CONT, "iplclose(%x,%x,%x,%x)\n", dev, flags, otype, cred); 873 #endif 874 875 min = (IPL_LOGMAX < min) ? ENXIO : 0; 876 return min; 877 } 878 879 #ifdef IPFILTER_LOG 880 /* 881 * iplread/ipllog 882 * both of these must operate with at least splnet() lest they be 883 * called during packet processing and cause an inconsistancy to appear in 884 * the filter lists. 885 */ 886 /*ARGSUSED*/ 887 int iplread(dev, uio, cp) 888 dev_t dev; 889 register struct uio *uio; 890 cred_t *cp; 891 { 892 ipf_stack_t *ifs; 893 int ret; 894 895 /* 896 * As we're calling ipf_find_stack in user space, from a given zone 897 * to find the stack pointer for this zone, there is no need to have 898 * a hold/refence count here. 899 */ 900 ifs = ipf_find_stack(crgetzoneid(cp)); 901 ASSERT(ifs != NULL); 902 903 # ifdef IPFDEBUG 904 cmn_err(CE_CONT, "iplread(%x,%x,%x)\n", dev, uio, cp); 905 # endif 906 907 if (ifs->ifs_fr_running < 1) { 908 return EIO; 909 } 910 911 # ifdef IPFILTER_SYNC 912 if (getminor(dev) == IPL_LOGSYNC) { 913 return ipfsync_read(uio); 914 } 915 # endif 916 917 ret = ipflog_read(getminor(dev), uio, ifs); 918 return ret; 919 } 920 #endif /* IPFILTER_LOG */ 921 922 923 /* 924 * iplread/ipllog 925 * both of these must operate with at least splnet() lest they be 926 * called during packet processing and cause an inconsistancy to appear in 927 * the filter lists. 928 */ 929 int iplwrite(dev, uio, cp) 930 dev_t dev; 931 register struct uio *uio; 932 cred_t *cp; 933 { 934 ipf_stack_t *ifs; 935 936 /* 937 * As we're calling ipf_find_stack in user space, from a given zone 938 * to find the stack pointer for this zone, there is no need to have 939 * a hold/refence count here. 940 */ 941 ifs = ipf_find_stack(crgetzoneid(cp)); 942 ASSERT(ifs != NULL); 943 944 #ifdef IPFDEBUG 945 cmn_err(CE_CONT, "iplwrite(%x,%x,%x)\n", dev, uio, cp); 946 #endif 947 948 if (ifs->ifs_fr_running < 1) { 949 return EIO; 950 } 951 952 #ifdef IPFILTER_SYNC 953 if (getminor(dev) == IPL_LOGSYNC) 954 return ipfsync_write(uio); 955 #endif /* IPFILTER_SYNC */ 956 dev = dev; /* LINT */ 957 uio = uio; /* LINT */ 958 cp = cp; /* LINT */ 959 return ENXIO; 960 } 961 962 963 /* 964 * fr_send_reset - this could conceivably be a call to tcp_respond(), but that 965 * requires a large amount of setting up and isn't any more efficient. 966 */ 967 int fr_send_reset(fin) 968 fr_info_t *fin; 969 { 970 tcphdr_t *tcp, *tcp2; 971 int tlen, hlen; 972 mblk_t *m; 973 #ifdef USE_INET6 974 ip6_t *ip6; 975 #endif 976 ip_t *ip; 977 978 tcp = fin->fin_dp; 979 if (tcp->th_flags & TH_RST) 980 return -1; 981 982 #ifndef IPFILTER_CKSUM 983 if (fr_checkl4sum(fin) == -1) 984 return -1; 985 #endif 986 987 tlen = (tcp->th_flags & (TH_SYN|TH_FIN)) ? 1 : 0; 988 #ifdef USE_INET6 989 if (fin->fin_v == 6) 990 hlen = sizeof(ip6_t); 991 else 992 #endif 993 hlen = sizeof(ip_t); 994 hlen += sizeof(*tcp2); 995 if ((m = (mblk_t *)allocb(hlen + 64, BPRI_HI)) == NULL) 996 return -1; 997 998 m->b_rptr += 64; 999 MTYPE(m) = M_DATA; 1000 m->b_wptr = m->b_rptr + hlen; 1001 ip = (ip_t *)m->b_rptr; 1002 bzero((char *)ip, hlen); 1003 tcp2 = (struct tcphdr *)(m->b_rptr + hlen - sizeof(*tcp2)); 1004 tcp2->th_dport = tcp->th_sport; 1005 tcp2->th_sport = tcp->th_dport; 1006 if (tcp->th_flags & TH_ACK) { 1007 tcp2->th_seq = tcp->th_ack; 1008 tcp2->th_flags = TH_RST; 1009 } else { 1010 tcp2->th_ack = ntohl(tcp->th_seq); 1011 tcp2->th_ack += tlen; 1012 tcp2->th_ack = htonl(tcp2->th_ack); 1013 tcp2->th_flags = TH_RST|TH_ACK; 1014 } 1015 tcp2->th_off = sizeof(struct tcphdr) >> 2; 1016 1017 ip->ip_v = fin->fin_v; 1018 #ifdef USE_INET6 1019 if (fin->fin_v == 6) { 1020 ip6 = (ip6_t *)m->b_rptr; 1021 ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow; 1022 ip6->ip6_src = fin->fin_dst6.in6; 1023 ip6->ip6_dst = fin->fin_src6.in6; 1024 ip6->ip6_plen = htons(sizeof(*tcp)); 1025 ip6->ip6_nxt = IPPROTO_TCP; 1026 tcp2->th_sum = fr_cksum(m, (ip_t *)ip6, IPPROTO_TCP, tcp2); 1027 } else 1028 #endif 1029 { 1030 ip->ip_src.s_addr = fin->fin_daddr; 1031 ip->ip_dst.s_addr = fin->fin_saddr; 1032 ip->ip_id = fr_nextipid(fin); 1033 ip->ip_hl = sizeof(*ip) >> 2; 1034 ip->ip_p = IPPROTO_TCP; 1035 ip->ip_len = sizeof(*ip) + sizeof(*tcp); 1036 ip->ip_tos = fin->fin_ip->ip_tos; 1037 tcp2->th_sum = fr_cksum(m, ip, IPPROTO_TCP, tcp2); 1038 } 1039 return fr_send_ip(fin, m, &m); 1040 } 1041 1042 /* 1043 * Function: fr_send_ip 1044 * Returns: 0: success 1045 * -1: failed 1046 * Parameters: 1047 * fin: packet information 1048 * m: the message block where ip head starts 1049 * 1050 * Send a new packet through the IP stack. 1051 * 1052 * For IPv4 packets, ip_len must be in host byte order, and ip_v, 1053 * ip_ttl, ip_off, and ip_sum are ignored (filled in by this 1054 * function). 1055 * 1056 * For IPv6 packets, ip6_flow, ip6_vfc, and ip6_hlim are filled 1057 * in by this function. 1058 * 1059 * All other portions of the packet must be in on-the-wire format. 1060 */ 1061 /*ARGSUSED*/ 1062 static int fr_send_ip(fin, m, mpp) 1063 fr_info_t *fin; 1064 mblk_t *m, **mpp; 1065 { 1066 qpktinfo_t qpi, *qpip; 1067 fr_info_t fnew; 1068 ip_t *ip; 1069 int i, hlen; 1070 ipf_stack_t *ifs = fin->fin_ifs; 1071 1072 ip = (ip_t *)m->b_rptr; 1073 bzero((char *)&fnew, sizeof(fnew)); 1074 1075 #ifdef USE_INET6 1076 if (fin->fin_v == 6) { 1077 ip6_t *ip6; 1078 1079 ip6 = (ip6_t *)ip; 1080 ip6->ip6_vfc = 0x60; 1081 ip6->ip6_hlim = 127; 1082 fnew.fin_v = 6; 1083 hlen = sizeof(*ip6); 1084 fnew.fin_plen = ntohs(ip6->ip6_plen) + hlen; 1085 } else 1086 #endif 1087 { 1088 fnew.fin_v = 4; 1089 #if SOLARIS2 >= 10 1090 ip->ip_ttl = 255; 1091 if (net_getpmtuenabled(ifs->ifs_ipf_ipv4) == 1) 1092 ip->ip_off = htons(IP_DF); 1093 #else 1094 if (ip_ttl_ptr != NULL) 1095 ip->ip_ttl = (u_char)(*ip_ttl_ptr); 1096 else 1097 ip->ip_ttl = 63; 1098 if (ip_mtudisc != NULL) 1099 ip->ip_off = htons(*ip_mtudisc ? IP_DF : 0); 1100 else 1101 ip->ip_off = htons(IP_DF); 1102 #endif 1103 /* 1104 * The dance with byte order and ip_len/ip_off is because in 1105 * fr_fastroute, it expects them to be in host byte order but 1106 * ipf_cksum expects them to be in network byte order. 1107 */ 1108 ip->ip_len = htons(ip->ip_len); 1109 ip->ip_sum = ipf_cksum((u_short *)ip, sizeof(*ip)); 1110 ip->ip_len = ntohs(ip->ip_len); 1111 ip->ip_off = ntohs(ip->ip_off); 1112 hlen = sizeof(*ip); 1113 fnew.fin_plen = ip->ip_len; 1114 } 1115 1116 qpip = fin->fin_qpi; 1117 qpi.qpi_off = 0; 1118 qpi.qpi_ill = qpip->qpi_ill; 1119 qpi.qpi_m = m; 1120 qpi.qpi_data = ip; 1121 fnew.fin_qpi = &qpi; 1122 fnew.fin_ifp = fin->fin_ifp; 1123 fnew.fin_flx = FI_NOCKSUM; 1124 fnew.fin_m = m; 1125 fnew.fin_qfm = m; 1126 fnew.fin_ip = ip; 1127 fnew.fin_mp = mpp; 1128 fnew.fin_hlen = hlen; 1129 fnew.fin_dp = (char *)ip + hlen; 1130 fnew.fin_ifs = fin->fin_ifs; 1131 (void) fr_makefrip(hlen, ip, &fnew); 1132 1133 i = fr_fastroute(m, mpp, &fnew, NULL); 1134 return i; 1135 } 1136 1137 1138 int fr_send_icmp_err(type, fin, dst) 1139 int type; 1140 fr_info_t *fin; 1141 int dst; 1142 { 1143 struct in_addr dst4; 1144 struct icmp *icmp; 1145 qpktinfo_t *qpi; 1146 int hlen, code; 1147 phy_if_t phy; 1148 u_short sz; 1149 #ifdef USE_INET6 1150 mblk_t *mb; 1151 #endif 1152 mblk_t *m; 1153 #ifdef USE_INET6 1154 ip6_t *ip6; 1155 #endif 1156 ip_t *ip; 1157 ipf_stack_t *ifs = fin->fin_ifs; 1158 1159 if ((type < 0) || (type > ICMP_MAXTYPE)) 1160 return -1; 1161 1162 code = fin->fin_icode; 1163 #ifdef USE_INET6 1164 if ((code < 0) || (code >= ICMP_MAX_UNREACH)) 1165 return -1; 1166 #endif 1167 1168 #ifndef IPFILTER_CKSUM 1169 if (fr_checkl4sum(fin) == -1) 1170 return -1; 1171 #endif 1172 1173 qpi = fin->fin_qpi; 1174 1175 #ifdef USE_INET6 1176 mb = fin->fin_qfm; 1177 1178 if (fin->fin_v == 6) { 1179 sz = sizeof(ip6_t); 1180 sz += MIN(mb->b_wptr - mb->b_rptr, 512); 1181 hlen = sizeof(ip6_t); 1182 type = icmptoicmp6types[type]; 1183 if (type == ICMP6_DST_UNREACH) 1184 code = icmptoicmp6unreach[code]; 1185 } else 1186 #endif 1187 { 1188 if ((fin->fin_p == IPPROTO_ICMP) && 1189 !(fin->fin_flx & FI_SHORT)) 1190 switch (ntohs(fin->fin_data[0]) >> 8) 1191 { 1192 case ICMP_ECHO : 1193 case ICMP_TSTAMP : 1194 case ICMP_IREQ : 1195 case ICMP_MASKREQ : 1196 break; 1197 default : 1198 return 0; 1199 } 1200 1201 sz = sizeof(ip_t) * 2; 1202 sz += 8; /* 64 bits of data */ 1203 hlen = sizeof(ip_t); 1204 } 1205 1206 sz += offsetof(struct icmp, icmp_ip); 1207 if ((m = (mblk_t *)allocb((size_t)sz + 64, BPRI_HI)) == NULL) 1208 return -1; 1209 MTYPE(m) = M_DATA; 1210 m->b_rptr += 64; 1211 m->b_wptr = m->b_rptr + sz; 1212 bzero((char *)m->b_rptr, (size_t)sz); 1213 ip = (ip_t *)m->b_rptr; 1214 ip->ip_v = fin->fin_v; 1215 icmp = (struct icmp *)(m->b_rptr + hlen); 1216 icmp->icmp_type = type & 0xff; 1217 icmp->icmp_code = code & 0xff; 1218 phy = (phy_if_t)qpi->qpi_ill; 1219 if (type == ICMP_UNREACH && (phy != 0) && 1220 fin->fin_icode == ICMP_UNREACH_NEEDFRAG) 1221 icmp->icmp_nextmtu = net_getmtu(ifs->ifs_ipf_ipv4, phy,0 ); 1222 1223 #ifdef USE_INET6 1224 if (fin->fin_v == 6) { 1225 struct in6_addr dst6; 1226 int csz; 1227 1228 if (dst == 0) { 1229 ipf_stack_t *ifs = fin->fin_ifs; 1230 1231 if (fr_ifpaddr(6, FRI_NORMAL, (void *)phy, 1232 (void *)&dst6, NULL, ifs) == -1) { 1233 FREE_MB_T(m); 1234 return -1; 1235 } 1236 } else 1237 dst6 = fin->fin_dst6.in6; 1238 1239 csz = sz; 1240 sz -= sizeof(ip6_t); 1241 ip6 = (ip6_t *)m->b_rptr; 1242 ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow; 1243 ip6->ip6_plen = htons((u_short)sz); 1244 ip6->ip6_nxt = IPPROTO_ICMPV6; 1245 ip6->ip6_src = dst6; 1246 ip6->ip6_dst = fin->fin_src6.in6; 1247 sz -= offsetof(struct icmp, icmp_ip); 1248 bcopy((char *)mb->b_rptr, (char *)&icmp->icmp_ip, sz); 1249 icmp->icmp_cksum = csz - sizeof(ip6_t); 1250 } else 1251 #endif 1252 { 1253 ip->ip_hl = sizeof(*ip) >> 2; 1254 ip->ip_p = IPPROTO_ICMP; 1255 ip->ip_id = fin->fin_ip->ip_id; 1256 ip->ip_tos = fin->fin_ip->ip_tos; 1257 ip->ip_len = (u_short)sz; 1258 if (dst == 0) { 1259 ipf_stack_t *ifs = fin->fin_ifs; 1260 1261 if (fr_ifpaddr(4, FRI_NORMAL, (void *)phy, 1262 (void *)&dst4, NULL, ifs) == -1) { 1263 FREE_MB_T(m); 1264 return -1; 1265 } 1266 } else { 1267 dst4 = fin->fin_dst; 1268 } 1269 ip->ip_src = dst4; 1270 ip->ip_dst = fin->fin_src; 1271 bcopy((char *)fin->fin_ip, (char *)&icmp->icmp_ip, 1272 sizeof(*fin->fin_ip)); 1273 bcopy((char *)fin->fin_ip + fin->fin_hlen, 1274 (char *)&icmp->icmp_ip + sizeof(*fin->fin_ip), 8); 1275 icmp->icmp_ip.ip_len = htons(icmp->icmp_ip.ip_len); 1276 icmp->icmp_ip.ip_off = htons(icmp->icmp_ip.ip_off); 1277 icmp->icmp_cksum = ipf_cksum((u_short *)icmp, 1278 sz - sizeof(ip_t)); 1279 } 1280 1281 /* 1282 * Need to exit out of these so we don't recursively call rw_enter 1283 * from fr_qout. 1284 */ 1285 return fr_send_ip(fin, m, &m); 1286 } 1287 1288 #include <sys/time.h> 1289 #include <sys/varargs.h> 1290 1291 #ifndef _KERNEL 1292 #include <stdio.h> 1293 #endif 1294 1295 #define NULLADDR_RATE_LIMIT 10 /* 10 seconds */ 1296 1297 1298 /* 1299 * Print out warning message at rate-limited speed. 1300 */ 1301 static void rate_limit_message(ipf_stack_t *ifs, 1302 int rate, const char *message, ...) 1303 { 1304 static time_t last_time = 0; 1305 time_t now; 1306 va_list args; 1307 char msg_buf[256]; 1308 int need_printed = 0; 1309 1310 now = ddi_get_time(); 1311 1312 /* make sure, no multiple entries */ 1313 ASSERT(MUTEX_NOT_HELD(&(ifs->ifs_ipf_rw.ipf_lk))); 1314 MUTEX_ENTER(&ifs->ifs_ipf_rw); 1315 if (now - last_time >= rate) { 1316 need_printed = 1; 1317 last_time = now; 1318 } 1319 MUTEX_EXIT(&ifs->ifs_ipf_rw); 1320 1321 if (need_printed) { 1322 va_start(args, message); 1323 (void)vsnprintf(msg_buf, 255, message, args); 1324 va_end(args); 1325 #ifdef _KERNEL 1326 cmn_err(CE_WARN, msg_buf); 1327 #else 1328 fprintf(std_err, msg_buf); 1329 #endif 1330 } 1331 } 1332 1333 /* 1334 * Return the first IP Address associated with an interface 1335 * For IPv6, we walk through the list of logical interfaces and return 1336 * the address of the first one that isn't a link-local interface. 1337 * We can't assume that it is :1 because another link-local address 1338 * may have been assigned there. 1339 */ 1340 /*ARGSUSED*/ 1341 int fr_ifpaddr(v, atype, ifptr, inp, inpmask, ifs) 1342 int v, atype; 1343 void *ifptr; 1344 struct in_addr *inp, *inpmask; 1345 ipf_stack_t *ifs; 1346 { 1347 struct sockaddr_in6 v6addr[2]; 1348 struct sockaddr_in v4addr[2]; 1349 net_ifaddr_t type[2]; 1350 net_handle_t net_data; 1351 phy_if_t phyif; 1352 void *array; 1353 1354 switch (v) 1355 { 1356 case 4: 1357 net_data = ifs->ifs_ipf_ipv4; 1358 array = v4addr; 1359 break; 1360 case 6: 1361 net_data = ifs->ifs_ipf_ipv6; 1362 array = v6addr; 1363 break; 1364 default: 1365 net_data = NULL; 1366 break; 1367 } 1368 1369 if (net_data == NULL) 1370 return -1; 1371 1372 phyif = (phy_if_t)ifptr; 1373 1374 switch (atype) 1375 { 1376 case FRI_PEERADDR : 1377 type[0] = NA_PEER; 1378 break; 1379 1380 case FRI_BROADCAST : 1381 type[0] = NA_BROADCAST; 1382 break; 1383 1384 default : 1385 type[0] = NA_ADDRESS; 1386 break; 1387 } 1388 1389 type[1] = NA_NETMASK; 1390 1391 if (v == 6) { 1392 lif_if_t idx = 0; 1393 1394 do { 1395 idx = net_lifgetnext(net_data, phyif, idx); 1396 if (net_getlifaddr(net_data, phyif, idx, 2, type, 1397 array) < 0) 1398 return -1; 1399 if (!IN6_IS_ADDR_LINKLOCAL(&v6addr[0].sin6_addr) && 1400 !IN6_IS_ADDR_MULTICAST(&v6addr[0].sin6_addr)) 1401 break; 1402 } while (idx != 0); 1403 1404 if (idx == 0) 1405 return -1; 1406 1407 return fr_ifpfillv6addr(atype, &v6addr[0], &v6addr[1], 1408 inp, inpmask); 1409 } 1410 1411 if (net_getlifaddr(net_data, phyif, 0, 2, type, array) < 0) 1412 return -1; 1413 1414 return fr_ifpfillv4addr(atype, &v4addr[0], &v4addr[1], inp, inpmask); 1415 } 1416 1417 1418 u_32_t fr_newisn(fin) 1419 fr_info_t *fin; 1420 { 1421 static int iss_seq_off = 0; 1422 u_char hash[16]; 1423 u_32_t newiss; 1424 MD5_CTX ctx; 1425 ipf_stack_t *ifs = fin->fin_ifs; 1426 1427 /* 1428 * Compute the base value of the ISS. It is a hash 1429 * of (saddr, sport, daddr, dport, secret). 1430 */ 1431 MD5Init(&ctx); 1432 1433 MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_src, 1434 sizeof(fin->fin_fi.fi_src)); 1435 MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_dst, 1436 sizeof(fin->fin_fi.fi_dst)); 1437 MD5Update(&ctx, (u_char *) &fin->fin_dat, sizeof(fin->fin_dat)); 1438 1439 MD5Update(&ctx, ifs->ifs_ipf_iss_secret, sizeof(ifs->ifs_ipf_iss_secret)); 1440 1441 MD5Final(hash, &ctx); 1442 1443 bcopy(hash, &newiss, sizeof(newiss)); 1444 1445 /* 1446 * Now increment our "timer", and add it in to 1447 * the computed value. 1448 * 1449 * XXX Use `addin'? 1450 * XXX TCP_ISSINCR too large to use? 1451 */ 1452 iss_seq_off += 0x00010000; 1453 newiss += iss_seq_off; 1454 return newiss; 1455 } 1456 1457 1458 /* ------------------------------------------------------------------------ */ 1459 /* Function: fr_nextipid */ 1460 /* Returns: int - 0 == success, -1 == error (packet should be droppped) */ 1461 /* Parameters: fin(I) - pointer to packet information */ 1462 /* */ 1463 /* Returns the next IPv4 ID to use for this packet. */ 1464 /* ------------------------------------------------------------------------ */ 1465 u_short fr_nextipid(fin) 1466 fr_info_t *fin; 1467 { 1468 static u_short ipid = 0; 1469 u_short id; 1470 ipf_stack_t *ifs = fin->fin_ifs; 1471 1472 MUTEX_ENTER(&ifs->ifs_ipf_rw); 1473 if (fin->fin_pktnum != 0) { 1474 id = fin->fin_pktnum & 0xffff; 1475 } else { 1476 id = ipid++; 1477 } 1478 MUTEX_EXIT(&ifs->ifs_ipf_rw); 1479 1480 return id; 1481 } 1482 1483 1484 #ifndef IPFILTER_CKSUM 1485 /* ARGSUSED */ 1486 #endif 1487 INLINE void fr_checkv4sum(fin) 1488 fr_info_t *fin; 1489 { 1490 #ifdef IPFILTER_CKSUM 1491 if (fr_checkl4sum(fin) == -1) 1492 fin->fin_flx |= FI_BAD; 1493 #endif 1494 } 1495 1496 1497 #ifdef USE_INET6 1498 # ifndef IPFILTER_CKSUM 1499 /* ARGSUSED */ 1500 # endif 1501 INLINE void fr_checkv6sum(fin) 1502 fr_info_t *fin; 1503 { 1504 # ifdef IPFILTER_CKSUM 1505 if (fr_checkl4sum(fin) == -1) 1506 fin->fin_flx |= FI_BAD; 1507 # endif 1508 } 1509 #endif /* USE_INET6 */ 1510 1511 1512 #if (SOLARIS2 < 7) 1513 void fr_slowtimer() 1514 #else 1515 /*ARGSUSED*/ 1516 void fr_slowtimer __P((void *arg)) 1517 #endif 1518 { 1519 ipf_stack_t *ifs = arg; 1520 1521 READ_ENTER(&ifs->ifs_ipf_global); 1522 if (ifs->ifs_fr_running != 1) { 1523 ifs->ifs_fr_timer_id = NULL; 1524 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1525 return; 1526 } 1527 ipf_expiretokens(ifs); 1528 fr_fragexpire(ifs); 1529 fr_timeoutstate(ifs); 1530 fr_natexpire(ifs); 1531 fr_authexpire(ifs); 1532 ifs->ifs_fr_ticks++; 1533 if (ifs->ifs_fr_running == 1) 1534 ifs->ifs_fr_timer_id = timeout(fr_slowtimer, arg, 1535 drv_usectohz(500000)); 1536 else 1537 ifs->ifs_fr_timer_id = NULL; 1538 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1539 } 1540 1541 1542 /* ------------------------------------------------------------------------ */ 1543 /* Function: fr_pullup */ 1544 /* Returns: NULL == pullup failed, else pointer to protocol header */ 1545 /* Parameters: m(I) - pointer to buffer where data packet starts */ 1546 /* fin(I) - pointer to packet information */ 1547 /* len(I) - number of bytes to pullup */ 1548 /* */ 1549 /* Attempt to move at least len bytes (from the start of the buffer) into a */ 1550 /* single buffer for ease of access. Operating system native functions are */ 1551 /* used to manage buffers - if necessary. If the entire packet ends up in */ 1552 /* a single buffer, set the FI_COALESCE flag even though fr_coalesce() has */ 1553 /* not been called. Both fin_ip and fin_dp are updated before exiting _IF_ */ 1554 /* and ONLY if the pullup succeeds. */ 1555 /* */ 1556 /* We assume that 'min' is a pointer to a buffer that is part of the chain */ 1557 /* of buffers that starts at *fin->fin_mp. */ 1558 /* ------------------------------------------------------------------------ */ 1559 void *fr_pullup(min, fin, len) 1560 mb_t *min; 1561 fr_info_t *fin; 1562 int len; 1563 { 1564 qpktinfo_t *qpi = fin->fin_qpi; 1565 int out = fin->fin_out, dpoff, ipoff; 1566 mb_t *m = min, *m1, *m2; 1567 char *ip; 1568 uint32_t start, stuff, end, value, flags; 1569 ipf_stack_t *ifs = fin->fin_ifs; 1570 1571 if (m == NULL) 1572 return NULL; 1573 1574 ip = (char *)fin->fin_ip; 1575 if ((fin->fin_flx & FI_COALESCE) != 0) 1576 return ip; 1577 1578 ipoff = fin->fin_ipoff; 1579 if (fin->fin_dp != NULL) 1580 dpoff = (char *)fin->fin_dp - (char *)ip; 1581 else 1582 dpoff = 0; 1583 1584 if (M_LEN(m) < len + ipoff) { 1585 1586 /* 1587 * pfil_precheck ensures the IP header is on a 32bit 1588 * aligned address so simply fail if that isn't currently 1589 * the case (should never happen). 1590 */ 1591 int inc = 0; 1592 1593 if (ipoff > 0) { 1594 if ((ipoff & 3) != 0) { 1595 inc = 4 - (ipoff & 3); 1596 if (m->b_rptr - inc >= m->b_datap->db_base) 1597 m->b_rptr -= inc; 1598 else 1599 inc = 0; 1600 } 1601 } 1602 1603 /* 1604 * XXX This is here as a work around for a bug with DEBUG 1605 * XXX Solaris kernels. The problem is b_prev is used by IP 1606 * XXX code as a way to stash the phyint_index for a packet, 1607 * XXX this doesn't get reset by IP but freeb does an ASSERT() 1608 * XXX for both of these to be NULL. See 6442390. 1609 */ 1610 m1 = m; 1611 m2 = m->b_prev; 1612 1613 do { 1614 m1->b_next = NULL; 1615 m1->b_prev = NULL; 1616 m1 = m1->b_cont; 1617 } while (m1); 1618 1619 /* 1620 * Need to preserve checksum information by copying them 1621 * to newmp which heads the pulluped message. 1622 */ 1623 hcksum_retrieve(m, NULL, NULL, &start, &stuff, &end, 1624 &value, &flags); 1625 1626 if (pullupmsg(m, len + ipoff + inc) == 0) { 1627 ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[1]); 1628 FREE_MB_T(*fin->fin_mp); 1629 *fin->fin_mp = NULL; 1630 fin->fin_m = NULL; 1631 fin->fin_ip = NULL; 1632 fin->fin_dp = NULL; 1633 qpi->qpi_data = NULL; 1634 return NULL; 1635 } 1636 1637 (void) hcksum_assoc(m, NULL, NULL, start, stuff, end, 1638 value, flags, 0); 1639 1640 m->b_prev = m2; 1641 m->b_rptr += inc; 1642 fin->fin_m = m; 1643 ip = MTOD(m, char *) + ipoff; 1644 qpi->qpi_data = ip; 1645 } 1646 1647 ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[0]); 1648 fin->fin_ip = (ip_t *)ip; 1649 if (fin->fin_dp != NULL) 1650 fin->fin_dp = (char *)fin->fin_ip + dpoff; 1651 1652 if (len == fin->fin_plen) 1653 fin->fin_flx |= FI_COALESCE; 1654 return ip; 1655 } 1656 1657 1658 /* 1659 * Function: fr_verifysrc 1660 * Returns: int (really boolean) 1661 * Parameters: fin - packet information 1662 * 1663 * Check whether the packet has a valid source address for the interface on 1664 * which the packet arrived, implementing the "fr_chksrc" feature. 1665 * Returns true iff the packet's source address is valid. 1666 */ 1667 int fr_verifysrc(fin) 1668 fr_info_t *fin; 1669 { 1670 net_handle_t net_data_p; 1671 phy_if_t phy_ifdata_routeto; 1672 struct sockaddr sin; 1673 ipf_stack_t *ifs = fin->fin_ifs; 1674 1675 if (fin->fin_v == 4) { 1676 net_data_p = ifs->ifs_ipf_ipv4; 1677 } else if (fin->fin_v == 6) { 1678 net_data_p = ifs->ifs_ipf_ipv6; 1679 } else { 1680 return (0); 1681 } 1682 1683 /* Get the index corresponding to the if name */ 1684 sin.sa_family = (fin->fin_v == 4) ? AF_INET : AF_INET6; 1685 bcopy(&fin->fin_saddr, &sin.sa_data, sizeof (struct in_addr)); 1686 phy_ifdata_routeto = net_routeto(net_data_p, &sin, NULL); 1687 1688 return (((phy_if_t)fin->fin_ifp == phy_ifdata_routeto) ? 1 : 0); 1689 } 1690 1691 1692 /* 1693 * Function: fr_fastroute 1694 * Returns: 0: success; 1695 * -1: failed 1696 * Parameters: 1697 * mb: the message block where ip head starts 1698 * mpp: the pointer to the pointer of the orignal 1699 * packet message 1700 * fin: packet information 1701 * fdp: destination interface information 1702 * if it is NULL, no interface information provided. 1703 * 1704 * This function is for fastroute/to/dup-to rules. It calls 1705 * pfil_make_lay2_packet to search route, make lay-2 header 1706 * ,and identify output queue for the IP packet. 1707 * The destination address depends on the following conditions: 1708 * 1: for fastroute rule, fdp is passed in as NULL, so the 1709 * destination address is the IP Packet's destination address 1710 * 2: for to/dup-to rule, if an ip address is specified after 1711 * the interface name, this address is the as destination 1712 * address. Otherwise IP Packet's destination address is used 1713 */ 1714 int fr_fastroute(mb, mpp, fin, fdp) 1715 mblk_t *mb, **mpp; 1716 fr_info_t *fin; 1717 frdest_t *fdp; 1718 { 1719 net_handle_t net_data_p; 1720 net_inject_t *inj; 1721 mblk_t *mp = NULL; 1722 frentry_t *fr = fin->fin_fr; 1723 qpktinfo_t *qpi; 1724 ip_t *ip; 1725 1726 struct sockaddr_in *sin; 1727 struct sockaddr_in6 *sin6; 1728 struct sockaddr *sinp; 1729 ipf_stack_t *ifs = fin->fin_ifs; 1730 #ifndef sparc 1731 u_short __iplen, __ipoff; 1732 #endif 1733 1734 if (fin->fin_v == 4) { 1735 net_data_p = ifs->ifs_ipf_ipv4; 1736 } else if (fin->fin_v == 6) { 1737 net_data_p = ifs->ifs_ipf_ipv6; 1738 } else { 1739 return (-1); 1740 } 1741 1742 inj = net_inject_alloc(NETINFO_VERSION); 1743 if (inj == NULL) 1744 return -1; 1745 1746 ip = fin->fin_ip; 1747 qpi = fin->fin_qpi; 1748 1749 /* 1750 * If this is a duplicate mblk then we want ip to point at that 1751 * data, not the original, if and only if it is already pointing at 1752 * the current mblk data. 1753 * 1754 * Otherwise, if it's not a duplicate, and we're not already pointing 1755 * at the current mblk data, then we want to ensure that the data 1756 * points at ip. 1757 */ 1758 1759 if ((ip == (ip_t *)qpi->qpi_m->b_rptr) && (qpi->qpi_m != mb)) { 1760 ip = (ip_t *)mb->b_rptr; 1761 } else if ((qpi->qpi_m == mb) && (ip != (ip_t *)qpi->qpi_m->b_rptr)) { 1762 qpi->qpi_m->b_rptr = (uchar_t *)ip; 1763 qpi->qpi_off = 0; 1764 } 1765 1766 /* 1767 * If there is another M_PROTO, we don't want it 1768 */ 1769 if (*mpp != mb) { 1770 mp = unlinkb(*mpp); 1771 freeb(*mpp); 1772 *mpp = mp; 1773 } 1774 1775 sinp = (struct sockaddr *)&inj->ni_addr; 1776 sin = (struct sockaddr_in *)sinp; 1777 sin6 = (struct sockaddr_in6 *)sinp; 1778 bzero((char *)&inj->ni_addr, sizeof (inj->ni_addr)); 1779 inj->ni_addr.ss_family = (fin->fin_v == 4) ? AF_INET : AF_INET6; 1780 inj->ni_packet = mb; 1781 1782 /* 1783 * In case we're here due to "to <if>" being used with 1784 * "keep state", check that we're going in the correct 1785 * direction. 1786 */ 1787 if (fdp != NULL) { 1788 if ((fr != NULL) && (fdp->fd_ifp != NULL) && 1789 (fin->fin_rev != 0) && (fdp == &fr->fr_tif)) 1790 goto bad_fastroute; 1791 inj->ni_physical = (phy_if_t)fdp->fd_ifp; 1792 if (fin->fin_v == 4) { 1793 sin->sin_addr = fdp->fd_ip; 1794 } else { 1795 sin6->sin6_addr = fdp->fd_ip6.in6; 1796 } 1797 } else { 1798 if (fin->fin_v == 4) { 1799 sin->sin_addr = ip->ip_dst; 1800 } else { 1801 sin6->sin6_addr = ((ip6_t *)ip)->ip6_dst; 1802 } 1803 inj->ni_physical = net_routeto(net_data_p, sinp, NULL); 1804 } 1805 1806 /* 1807 * Clear the hardware checksum flags from packets that we are doing 1808 * input processing on as leaving them set will cause the outgoing 1809 * NIC (if it supports hardware checksum) to calculate them anew, 1810 * using the old (correct) checksums as the pseudo value to start 1811 * from. 1812 */ 1813 if (fin->fin_out == 0) { 1814 DB_CKSUMFLAGS(mb) = 0; 1815 } 1816 1817 *mpp = mb; 1818 1819 if (fin->fin_out == 0) { 1820 void *saveifp; 1821 u_32_t pass; 1822 1823 saveifp = fin->fin_ifp; 1824 fin->fin_ifp = (void *)inj->ni_physical; 1825 fin->fin_flx &= ~FI_STATE; 1826 fin->fin_out = 1; 1827 (void) fr_acctpkt(fin, &pass); 1828 fin->fin_fr = NULL; 1829 if (!fr || !(fr->fr_flags & FR_RETMASK)) 1830 (void) fr_checkstate(fin, &pass); 1831 if (fr_checknatout(fin, NULL) == -1) 1832 goto bad_fastroute; 1833 fin->fin_out = 0; 1834 fin->fin_ifp = saveifp; 1835 } 1836 #ifndef sparc 1837 if (fin->fin_v == 4) { 1838 __iplen = (u_short)ip->ip_len, 1839 __ipoff = (u_short)ip->ip_off; 1840 1841 ip->ip_len = htons(__iplen); 1842 ip->ip_off = htons(__ipoff); 1843 } 1844 #endif 1845 1846 if (net_data_p) { 1847 if (net_inject(net_data_p, NI_DIRECT_OUT, inj) < 0) { 1848 net_inject_free(inj); 1849 return (-1); 1850 } 1851 } 1852 1853 ifs->ifs_fr_frouteok[0]++; 1854 net_inject_free(inj); 1855 return 0; 1856 bad_fastroute: 1857 net_inject_free(inj); 1858 freemsg(mb); 1859 ifs->ifs_fr_frouteok[1]++; 1860 return -1; 1861 } 1862 1863 1864 /* ------------------------------------------------------------------------ */ 1865 /* Function: ipf_hook4_out */ 1866 /* Returns: int - 0 == packet ok, else problem, free packet if not done */ 1867 /* Parameters: event(I) - pointer to event */ 1868 /* info(I) - pointer to hook information for firewalling */ 1869 /* */ 1870 /* Calling ipf_hook. */ 1871 /* ------------------------------------------------------------------------ */ 1872 /*ARGSUSED*/ 1873 int ipf_hook4_out(hook_event_token_t token, hook_data_t info, void *arg) 1874 { 1875 return ipf_hook(info, 1, 0, arg); 1876 } 1877 /*ARGSUSED*/ 1878 int ipf_hook6_out(hook_event_token_t token, hook_data_t info, void *arg) 1879 { 1880 return ipf_hook6(info, 1, 0, arg); 1881 } 1882 1883 /* ------------------------------------------------------------------------ */ 1884 /* Function: ipf_hook4_in */ 1885 /* Returns: int - 0 == packet ok, else problem, free packet if not done */ 1886 /* Parameters: event(I) - pointer to event */ 1887 /* info(I) - pointer to hook information for firewalling */ 1888 /* */ 1889 /* Calling ipf_hook. */ 1890 /* ------------------------------------------------------------------------ */ 1891 /*ARGSUSED*/ 1892 int ipf_hook4_in(hook_event_token_t token, hook_data_t info, void *arg) 1893 { 1894 return ipf_hook(info, 0, 0, arg); 1895 } 1896 /*ARGSUSED*/ 1897 int ipf_hook6_in(hook_event_token_t token, hook_data_t info, void *arg) 1898 { 1899 return ipf_hook6(info, 0, 0, arg); 1900 } 1901 1902 1903 /* ------------------------------------------------------------------------ */ 1904 /* Function: ipf_hook4_loop_out */ 1905 /* Returns: int - 0 == packet ok, else problem, free packet if not done */ 1906 /* Parameters: event(I) - pointer to event */ 1907 /* info(I) - pointer to hook information for firewalling */ 1908 /* */ 1909 /* Calling ipf_hook. */ 1910 /* ------------------------------------------------------------------------ */ 1911 /*ARGSUSED*/ 1912 int ipf_hook4_loop_out(hook_event_token_t token, hook_data_t info, void *arg) 1913 { 1914 return ipf_hook(info, 1, FI_NOCKSUM, arg); 1915 } 1916 /*ARGSUSED*/ 1917 int ipf_hook6_loop_out(hook_event_token_t token, hook_data_t info, void *arg) 1918 { 1919 return ipf_hook6(info, 1, FI_NOCKSUM, arg); 1920 } 1921 1922 /* ------------------------------------------------------------------------ */ 1923 /* Function: ipf_hook4_loop_in */ 1924 /* Returns: int - 0 == packet ok, else problem, free packet if not done */ 1925 /* Parameters: event(I) - pointer to event */ 1926 /* info(I) - pointer to hook information for firewalling */ 1927 /* */ 1928 /* Calling ipf_hook. */ 1929 /* ------------------------------------------------------------------------ */ 1930 /*ARGSUSED*/ 1931 int ipf_hook4_loop_in(hook_event_token_t token, hook_data_t info, void *arg) 1932 { 1933 return ipf_hook(info, 0, FI_NOCKSUM, arg); 1934 } 1935 /*ARGSUSED*/ 1936 int ipf_hook6_loop_in(hook_event_token_t token, hook_data_t info, void *arg) 1937 { 1938 return ipf_hook6(info, 0, FI_NOCKSUM, arg); 1939 } 1940 1941 /* ------------------------------------------------------------------------ */ 1942 /* Function: ipf_hook */ 1943 /* Returns: int - 0 == packet ok, else problem, free packet if not done */ 1944 /* Parameters: info(I) - pointer to hook information for firewalling */ 1945 /* out(I) - whether packet is going in or out */ 1946 /* loopback(I) - whether packet is a loopback packet or not */ 1947 /* */ 1948 /* Stepping stone function between the IP mainline and IPFilter. Extracts */ 1949 /* parameters out of the info structure and forms them up to be useful for */ 1950 /* calling ipfilter. */ 1951 /* ------------------------------------------------------------------------ */ 1952 int ipf_hook(hook_data_t info, int out, int loopback, void *arg) 1953 { 1954 hook_pkt_event_t *fw; 1955 ipf_stack_t *ifs; 1956 qpktinfo_t qpi; 1957 int rval, hlen; 1958 u_short swap; 1959 phy_if_t phy; 1960 ip_t *ip; 1961 1962 ifs = arg; 1963 fw = (hook_pkt_event_t *)info; 1964 1965 ASSERT(fw != NULL); 1966 phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp; 1967 1968 ip = fw->hpe_hdr; 1969 swap = ntohs(ip->ip_len); 1970 ip->ip_len = swap; 1971 swap = ntohs(ip->ip_off); 1972 ip->ip_off = swap; 1973 hlen = IPH_HDR_LENGTH(ip); 1974 1975 qpi.qpi_m = fw->hpe_mb; 1976 qpi.qpi_data = fw->hpe_hdr; 1977 qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr; 1978 qpi.qpi_ill = (void *)phy; 1979 qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST); 1980 if (qpi.qpi_flags) 1981 qpi.qpi_flags |= FI_MBCAST; 1982 qpi.qpi_flags |= loopback; 1983 1984 rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out, 1985 &qpi, fw->hpe_mp, ifs); 1986 1987 /* For fastroute cases, fr_check returns 0 with mp set to NULL */ 1988 if (rval == 0 && *(fw->hpe_mp) == NULL) 1989 rval = 1; 1990 1991 /* Notify IP the packet mblk_t and IP header pointers. */ 1992 fw->hpe_mb = qpi.qpi_m; 1993 fw->hpe_hdr = qpi.qpi_data; 1994 if (rval == 0) { 1995 ip = qpi.qpi_data; 1996 swap = ntohs(ip->ip_len); 1997 ip->ip_len = swap; 1998 swap = ntohs(ip->ip_off); 1999 ip->ip_off = swap; 2000 } 2001 return rval; 2002 2003 } 2004 int ipf_hook6(hook_data_t info, int out, int loopback, void *arg) 2005 { 2006 hook_pkt_event_t *fw; 2007 int rval, hlen; 2008 qpktinfo_t qpi; 2009 phy_if_t phy; 2010 2011 fw = (hook_pkt_event_t *)info; 2012 2013 ASSERT(fw != NULL); 2014 phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp; 2015 2016 hlen = sizeof (ip6_t); 2017 2018 qpi.qpi_m = fw->hpe_mb; 2019 qpi.qpi_data = fw->hpe_hdr; 2020 qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr; 2021 qpi.qpi_ill = (void *)phy; 2022 qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST); 2023 if (qpi.qpi_flags) 2024 qpi.qpi_flags |= FI_MBCAST; 2025 qpi.qpi_flags |= loopback; 2026 2027 rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out, 2028 &qpi, fw->hpe_mp, arg); 2029 2030 /* For fastroute cases, fr_check returns 0 with mp set to NULL */ 2031 if (rval == 0 && *(fw->hpe_mp) == NULL) 2032 rval = 1; 2033 2034 /* Notify IP the packet mblk_t and IP header pointers. */ 2035 fw->hpe_mb = qpi.qpi_m; 2036 fw->hpe_hdr = qpi.qpi_data; 2037 return rval; 2038 2039 } 2040 2041 2042 /* ------------------------------------------------------------------------ */ 2043 /* Function: ipf_nic_event_v4 */ 2044 /* Returns: int - 0 == no problems encountered */ 2045 /* Parameters: event(I) - pointer to event */ 2046 /* info(I) - pointer to information about a NIC event */ 2047 /* */ 2048 /* Function to receive asynchronous NIC events from IP */ 2049 /* ------------------------------------------------------------------------ */ 2050 /*ARGSUSED*/ 2051 int ipf_nic_event_v4(hook_event_token_t event, hook_data_t info, void *arg) 2052 { 2053 struct sockaddr_in *sin; 2054 hook_nic_event_t *hn; 2055 ipf_stack_t *ifs = arg; 2056 void *new_ifp = NULL; 2057 2058 if (ifs->ifs_fr_running <= 0) 2059 return (0); 2060 2061 hn = (hook_nic_event_t *)info; 2062 2063 switch (hn->hne_event) 2064 { 2065 case NE_PLUMB : 2066 frsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, hn->hne_data, 2067 ifs); 2068 fr_natifpsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, 2069 hn->hne_data, ifs); 2070 fr_statesync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, 2071 hn->hne_data, ifs); 2072 break; 2073 2074 case NE_UNPLUMB : 2075 frsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs); 2076 fr_natifpsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, 2077 ifs); 2078 fr_statesync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs); 2079 break; 2080 2081 case NE_ADDRESS_CHANGE : 2082 /* 2083 * We only respond to events for logical interface 0 because 2084 * IPFilter only uses the first address given to a network 2085 * interface. We check for hne_lif==1 because the netinfo 2086 * code maps adds 1 to the lif number so that it can return 2087 * 0 to indicate "no more lifs" when walking them. 2088 */ 2089 if (hn->hne_lif == 1) { 2090 frsync(IPFSYNC_RESYNC, 4, (void *)hn->hne_nic, NULL, 2091 ifs); 2092 sin = hn->hne_data; 2093 fr_nataddrsync(4, (void *)hn->hne_nic, &sin->sin_addr, 2094 ifs); 2095 } 2096 break; 2097 2098 #if SOLARIS2 >= 10 2099 case NE_IFINDEX_CHANGE : 2100 WRITE_ENTER(&ifs->ifs_ipf_mutex); 2101 2102 if (hn->hne_data != NULL) { 2103 /* 2104 * The netinfo passes interface index as int (hne_data should be 2105 * handled as a pointer to int), which is always 32bit. We need to 2106 * convert it to void pointer here, since interfaces are 2107 * represented as pointers to void in IPF. The pointers are 64 bits 2108 * long on 64bit platforms. Doing something like 2109 * (void *)((int) x) 2110 * will throw warning: 2111 * "cast to pointer from integer of different size" 2112 * during 64bit compilation. 2113 * 2114 * The line below uses (size_t) to typecast int to 2115 * size_t, which might be 64bit/32bit (depending 2116 * on architecture). Once we have proper 64bit/32bit 2117 * type (size_t), we can safely convert it to void pointer. 2118 */ 2119 new_ifp = (void *)(size_t)*((int *)hn->hne_data); 2120 fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs); 2121 fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs); 2122 fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs); 2123 } 2124 RWLOCK_EXIT(&ifs->ifs_ipf_mutex); 2125 break; 2126 #endif 2127 2128 default : 2129 break; 2130 } 2131 2132 return 0; 2133 } 2134 2135 2136 /* ------------------------------------------------------------------------ */ 2137 /* Function: ipf_nic_event_v6 */ 2138 /* Returns: int - 0 == no problems encountered */ 2139 /* Parameters: event(I) - pointer to event */ 2140 /* info(I) - pointer to information about a NIC event */ 2141 /* */ 2142 /* Function to receive asynchronous NIC events from IP */ 2143 /* ------------------------------------------------------------------------ */ 2144 /*ARGSUSED*/ 2145 int ipf_nic_event_v6(hook_event_token_t event, hook_data_t info, void *arg) 2146 { 2147 struct sockaddr_in6 *sin6; 2148 hook_nic_event_t *hn; 2149 ipf_stack_t *ifs = arg; 2150 void *new_ifp = NULL; 2151 2152 if (ifs->ifs_fr_running <= 0) 2153 return (0); 2154 2155 hn = (hook_nic_event_t *)info; 2156 2157 switch (hn->hne_event) 2158 { 2159 case NE_PLUMB : 2160 frsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic, 2161 hn->hne_data, ifs); 2162 fr_natifpsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic, 2163 hn->hne_data, ifs); 2164 fr_statesync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic, 2165 hn->hne_data, ifs); 2166 break; 2167 2168 case NE_UNPLUMB : 2169 frsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs); 2170 fr_natifpsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, 2171 ifs); 2172 fr_statesync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs); 2173 break; 2174 2175 case NE_ADDRESS_CHANGE : 2176 if (hn->hne_lif == 1) { 2177 sin6 = hn->hne_data; 2178 fr_nataddrsync(6, (void *)hn->hne_nic, &sin6->sin6_addr, 2179 ifs); 2180 } 2181 break; 2182 2183 #if SOLARIS2 >= 10 2184 case NE_IFINDEX_CHANGE : 2185 WRITE_ENTER(&ifs->ifs_ipf_mutex); 2186 if (hn->hne_data != NULL) { 2187 /* 2188 * The netinfo passes interface index as int (hne_data should be 2189 * handled as a pointer to int), which is always 32bit. We need to 2190 * convert it to void pointer here, since interfaces are 2191 * represented as pointers to void in IPF. The pointers are 64 bits 2192 * long on 64bit platforms. Doing something like 2193 * (void *)((int) x) 2194 * will throw warning: 2195 * "cast to pointer from integer of different size" 2196 * during 64bit compilation. 2197 * 2198 * The line below uses (size_t) to typecast int to 2199 * size_t, which might be 64bit/32bit (depending 2200 * on architecture). Once we have proper 64bit/32bit 2201 * type (size_t), we can safely convert it to void pointer. 2202 */ 2203 new_ifp = (void *)(size_t)*((int *)hn->hne_data); 2204 fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs); 2205 fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs); 2206 fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs); 2207 } 2208 RWLOCK_EXIT(&ifs->ifs_ipf_mutex); 2209 break; 2210 #endif 2211 2212 default : 2213 break; 2214 } 2215 2216 return 0; 2217 } 2218 2219 /* 2220 * Functions fr_make_rst(), fr_make_icmp_v4(), fr_make_icmp_v6() 2221 * are needed in Solaris kernel only. We don't need them in 2222 * ipftest to pretend the ICMP/RST packet was sent as a response. 2223 */ 2224 #if defined(_KERNEL) && (SOLARIS2 >= 10) 2225 /* ------------------------------------------------------------------------ */ 2226 /* Function: fr_make_rst */ 2227 /* Returns: int - 0 on success, -1 on failure */ 2228 /* Parameters: fin(I) - pointer to packet information */ 2229 /* */ 2230 /* We must alter the original mblks passed to IPF from IP stack via */ 2231 /* FW_HOOKS. FW_HOOKS interface is powerfull, but it has some limitations. */ 2232 /* IPF can basicaly do only these things with mblk representing the packet: */ 2233 /* leave it as it is (pass the packet) */ 2234 /* */ 2235 /* discard it (block the packet) */ 2236 /* */ 2237 /* alter it (i.e. NAT) */ 2238 /* */ 2239 /* As you can see IPF can not simply discard the mblk and supply a new one */ 2240 /* instead to IP stack via FW_HOOKS. */ 2241 /* */ 2242 /* The return-rst action for packets coming via NIC is handled as follows: */ 2243 /* mblk with packet is discarded */ 2244 /* */ 2245 /* new mblk with RST response is constructed and injected to network */ 2246 /* */ 2247 /* IPF can't inject packets to loopback interface, this is just another */ 2248 /* limitation we have to deal with here. The only option to send RST */ 2249 /* response to offending TCP packet coming via loopback is to alter it. */ 2250 /* */ 2251 /* The fr_make_rst() function alters TCP SYN/FIN packet intercepted on */ 2252 /* loopback interface into TCP RST packet. fin->fin_mp is pointer to */ 2253 /* mblk L3 (IP) and L4 (TCP/UDP) packet headers. */ 2254 /* ------------------------------------------------------------------------ */ 2255 int fr_make_rst(fin) 2256 fr_info_t *fin; 2257 { 2258 uint16_t tmp_port; 2259 int rv = -1; 2260 uint32_t old_ack; 2261 tcphdr_t *tcp = NULL; 2262 struct in_addr tmp_src; 2263 #ifdef USE_INET6 2264 struct in6_addr tmp_src6; 2265 #endif 2266 2267 ASSERT(fin->fin_p == IPPROTO_TCP); 2268 2269 /* 2270 * We do not need to adjust chksum, since it is not being checked by 2271 * Solaris IP stack for loopback clients. 2272 */ 2273 if ((fin->fin_v == 4) && (fin->fin_p == IPPROTO_TCP) && 2274 ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) { 2275 2276 if (tcp->th_flags & (TH_SYN | TH_FIN)) { 2277 /* Swap IPv4 addresses. */ 2278 tmp_src = fin->fin_ip->ip_src; 2279 fin->fin_ip->ip_src = fin->fin_ip->ip_dst; 2280 fin->fin_ip->ip_dst = tmp_src; 2281 2282 rv = 0; 2283 } 2284 else 2285 tcp = NULL; 2286 } 2287 #ifdef USE_INET6 2288 else if ((fin->fin_v == 6) && (fin->fin_p == IPPROTO_TCP) && 2289 ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) { 2290 /* 2291 * We are relying on fact the next header is TCP, which is true 2292 * for regular TCP packets coming in over loopback. 2293 */ 2294 if (tcp->th_flags & (TH_SYN | TH_FIN)) { 2295 /* Swap IPv6 addresses. */ 2296 tmp_src6 = fin->fin_ip6->ip6_src; 2297 fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst; 2298 fin->fin_ip6->ip6_dst = tmp_src6; 2299 2300 rv = 0; 2301 } 2302 else 2303 tcp = NULL; 2304 } 2305 #endif 2306 2307 if (tcp != NULL) { 2308 /* 2309 * Adjust TCP header: 2310 * swap ports, 2311 * set flags, 2312 * set correct ACK number 2313 */ 2314 tmp_port = tcp->th_sport; 2315 tcp->th_sport = tcp->th_dport; 2316 tcp->th_dport = tmp_port; 2317 old_ack = tcp->th_ack; 2318 tcp->th_ack = htonl(ntohl(tcp->th_seq) + 1); 2319 tcp->th_seq = old_ack; 2320 tcp->th_flags = TH_RST | TH_ACK; 2321 } 2322 2323 return (rv); 2324 } 2325 2326 /* ------------------------------------------------------------------------ */ 2327 /* Function: fr_make_icmp_v4 */ 2328 /* Returns: int - 0 on success, -1 on failure */ 2329 /* Parameters: fin(I) - pointer to packet information */ 2330 /* */ 2331 /* Please read comment at fr_make_icmp() wrapper function to get an idea */ 2332 /* what is going to happen here and why. Once you read the comment there, */ 2333 /* continue here with next paragraph. */ 2334 /* */ 2335 /* To turn IPv4 packet into ICMPv4 response packet, these things must */ 2336 /* happen here: */ 2337 /* (1) Original mblk is copied (duplicated). */ 2338 /* */ 2339 /* (2) ICMP header is created. */ 2340 /* */ 2341 /* (3) Link ICMP header with copy of original mblk, we have ICMPv4 */ 2342 /* data ready then. */ 2343 /* */ 2344 /* (4) Swap IP addresses in original mblk and adjust IP header data. */ 2345 /* */ 2346 /* (5) The mblk containing original packet is trimmed to contain IP */ 2347 /* header only and ICMP chksum is computed. */ 2348 /* */ 2349 /* (6) The ICMP header we have from (3) is linked to original mblk, */ 2350 /* which now contains new IP header. If original packet was spread */ 2351 /* over several mblks, only the first mblk is kept. */ 2352 /* ------------------------------------------------------------------------ */ 2353 static int fr_make_icmp_v4(fin) 2354 fr_info_t *fin; 2355 { 2356 struct in_addr tmp_src; 2357 tcphdr_t *tcp; 2358 struct icmp *icmp; 2359 mblk_t *mblk_icmp; 2360 mblk_t *mblk_ip; 2361 size_t icmp_pld_len; /* octets to append to ICMP header */ 2362 size_t orig_iphdr_len; /* length of IP header only */ 2363 uint32_t sum; 2364 uint16_t *buf; 2365 int len; 2366 2367 2368 if (fin->fin_v != 4) 2369 return (-1); 2370 2371 /* 2372 * If we are dealing with TCP, then packet must be SYN/FIN to be routed 2373 * by IP stack. If it is not SYN/FIN, then we must drop it silently. 2374 */ 2375 tcp = (tcphdr_t *) fin->fin_dp; 2376 2377 if ((fin->fin_p == IPPROTO_TCP) && 2378 ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0))) 2379 return (-1); 2380 2381 /* 2382 * Step (1) 2383 * 2384 * Make copy of original mblk. 2385 * 2386 * We want to copy as much data as necessary, not less, not more. The 2387 * ICMPv4 payload length for unreachable messages is: 2388 * original IP header + 8 bytes of L4 (if there are any). 2389 * 2390 * We determine if there are at least 8 bytes of L4 data following IP 2391 * header first. 2392 */ 2393 icmp_pld_len = (fin->fin_dlen > ICMPERR_ICMPHLEN) ? 2394 ICMPERR_ICMPHLEN : fin->fin_dlen; 2395 /* 2396 * Since we don't want to copy more data than necessary, we must trim 2397 * the original mblk here. The right way (STREAMish) would be to use 2398 * adjmsg() to trim it. However we would have to calculate the length 2399 * argument for adjmsg() from pointers we already have here. 2400 * 2401 * Since we have pointers and offsets, it's faster and easier for 2402 * us to just adjust pointers by hand instead of using adjmsg(). 2403 */ 2404 fin->fin_m->b_wptr = (unsigned char *) fin->fin_dp; 2405 fin->fin_m->b_wptr += icmp_pld_len; 2406 icmp_pld_len = fin->fin_m->b_wptr - (unsigned char *) fin->fin_ip; 2407 2408 /* 2409 * Also we don't want to copy any L2 stuff, which might precede IP 2410 * header, so we have have to set b_rptr to point to the start of IP 2411 * header. 2412 */ 2413 fin->fin_m->b_rptr += fin->fin_ipoff; 2414 if ((mblk_ip = copyb(fin->fin_m)) == NULL) 2415 return (-1); 2416 fin->fin_m->b_rptr -= fin->fin_ipoff; 2417 2418 /* 2419 * Step (2) 2420 * 2421 * Create an ICMP header, which will be appened to original mblk later. 2422 * ICMP header is just another mblk. 2423 */ 2424 mblk_icmp = (mblk_t *) allocb(ICMPERR_ICMPHLEN, BPRI_HI); 2425 if (mblk_icmp == NULL) { 2426 FREE_MB_T(mblk_ip); 2427 return (-1); 2428 } 2429 2430 MTYPE(mblk_icmp) = M_DATA; 2431 icmp = (struct icmp *) mblk_icmp->b_wptr; 2432 icmp->icmp_type = ICMP_UNREACH; 2433 icmp->icmp_code = fin->fin_icode & 0xFF; 2434 icmp->icmp_void = 0; 2435 icmp->icmp_cksum = 0; 2436 mblk_icmp->b_wptr += ICMPERR_ICMPHLEN; 2437 2438 /* 2439 * Step (3) 2440 * 2441 * Complete ICMP packet - link ICMP header with L4 data from original 2442 * IP packet. 2443 */ 2444 linkb(mblk_icmp, mblk_ip); 2445 2446 /* 2447 * Step (4) 2448 * 2449 * Swap IP addresses and change IP header fields accordingly in 2450 * original IP packet. 2451 * 2452 * There is a rule option return-icmp as a dest for physical 2453 * interfaces. This option becomes useless for loopback, since IPF box 2454 * uses same address as a loopback destination. We ignore the option 2455 * here, the ICMP packet will always look like as it would have been 2456 * sent from the original destination host. 2457 */ 2458 tmp_src = fin->fin_ip->ip_src; 2459 fin->fin_ip->ip_src = fin->fin_ip->ip_dst; 2460 fin->fin_ip->ip_dst = tmp_src; 2461 fin->fin_ip->ip_p = IPPROTO_ICMP; 2462 fin->fin_ip->ip_sum = 0; 2463 2464 /* 2465 * Step (5) 2466 * 2467 * We trim the orignal mblk to hold IP header only. 2468 */ 2469 fin->fin_m->b_wptr = fin->fin_dp; 2470 orig_iphdr_len = fin->fin_m->b_wptr - 2471 (fin->fin_m->b_rptr + fin->fin_ipoff); 2472 fin->fin_ip->ip_len = htons(icmp_pld_len + ICMPERR_ICMPHLEN + 2473 orig_iphdr_len); 2474 2475 /* 2476 * ICMP chksum calculation. The data we are calculating chksum for are 2477 * spread over two mblks, therefore we have to use two for loops. 2478 * 2479 * First for loop computes chksum part for ICMP header. 2480 */ 2481 buf = (uint16_t *) icmp; 2482 len = ICMPERR_ICMPHLEN; 2483 for (sum = 0; len > 1; len -= 2) 2484 sum += *buf++; 2485 2486 /* 2487 * Here we add chksum part for ICMP payload. 2488 */ 2489 len = icmp_pld_len; 2490 buf = (uint16_t *) mblk_ip->b_rptr; 2491 for (; len > 1; len -= 2) 2492 sum += *buf++; 2493 2494 /* 2495 * Chksum is done. 2496 */ 2497 sum = (sum >> 16) + (sum & 0xffff); 2498 sum += (sum >> 16); 2499 icmp->icmp_cksum = ~sum; 2500 2501 /* 2502 * Step (6) 2503 * 2504 * Release all packet mblks, except the first one. 2505 */ 2506 if (fin->fin_m->b_cont != NULL) { 2507 FREE_MB_T(fin->fin_m->b_cont); 2508 } 2509 2510 /* 2511 * Append ICMP payload to first mblk, which already contains new IP 2512 * header. 2513 */ 2514 linkb(fin->fin_m, mblk_icmp); 2515 2516 return (0); 2517 } 2518 2519 #ifdef USE_INET6 2520 /* ------------------------------------------------------------------------ */ 2521 /* Function: fr_make_icmp_v6 */ 2522 /* Returns: int - 0 on success, -1 on failure */ 2523 /* Parameters: fin(I) - pointer to packet information */ 2524 /* */ 2525 /* Please read comment at fr_make_icmp() wrapper function to get an idea */ 2526 /* what and why is going to happen here. Once you read the comment there, */ 2527 /* continue here with next paragraph. */ 2528 /* */ 2529 /* This function turns IPv6 packet (UDP, TCP, ...) into ICMPv6 response. */ 2530 /* The algorithm is fairly simple: */ 2531 /* 1) We need to get copy of complete mblk. */ 2532 /* */ 2533 /* 2) New ICMPv6 header is created. */ 2534 /* */ 2535 /* 3) The copy of original mblk with packet is linked to ICMPv6 */ 2536 /* header. */ 2537 /* */ 2538 /* 4) The checksum must be adjusted. */ 2539 /* */ 2540 /* 5) IP addresses in original mblk are swapped and IP header data */ 2541 /* are adjusted (protocol number). */ 2542 /* */ 2543 /* 6) Original mblk is trimmed to hold IPv6 header only, then it is */ 2544 /* linked with the ICMPv6 data we got from (3). */ 2545 /* ------------------------------------------------------------------------ */ 2546 static int fr_make_icmp_v6(fin) 2547 fr_info_t *fin; 2548 { 2549 struct icmp6_hdr *icmp6; 2550 tcphdr_t *tcp; 2551 struct in6_addr tmp_src6; 2552 size_t icmp_pld_len; 2553 mblk_t *mblk_ip, *mblk_icmp; 2554 2555 if (fin->fin_v != 6) 2556 return (-1); 2557 2558 /* 2559 * If we are dealing with TCP, then packet must SYN/FIN to be routed by 2560 * IP stack. If it is not SYN/FIN, then we must drop it silently. 2561 */ 2562 tcp = (tcphdr_t *) fin->fin_dp; 2563 2564 if ((fin->fin_p == IPPROTO_TCP) && 2565 ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0))) 2566 return (-1); 2567 2568 /* 2569 * Step (1) 2570 * 2571 * We need to copy complete packet in case of IPv6, no trimming is 2572 * needed (except the L2 headers). 2573 */ 2574 icmp_pld_len = M_LEN(fin->fin_m); 2575 fin->fin_m->b_rptr += fin->fin_ipoff; 2576 if ((mblk_ip = copyb(fin->fin_m)) == NULL) 2577 return (-1); 2578 fin->fin_m->b_rptr -= fin->fin_ipoff; 2579 2580 /* 2581 * Step (2) 2582 * 2583 * Allocate and create ICMP header. 2584 */ 2585 mblk_icmp = (mblk_t *) allocb(sizeof (struct icmp6_hdr), 2586 BPRI_HI); 2587 2588 if (mblk_icmp == NULL) 2589 return (-1); 2590 2591 MTYPE(mblk_icmp) = M_DATA; 2592 icmp6 = (struct icmp6_hdr *) mblk_icmp->b_wptr; 2593 icmp6->icmp6_type = ICMP6_DST_UNREACH; 2594 icmp6->icmp6_code = fin->fin_icode & 0xFF; 2595 icmp6->icmp6_data32[0] = 0; 2596 mblk_icmp->b_wptr += sizeof (struct icmp6_hdr); 2597 2598 /* 2599 * Step (3) 2600 * 2601 * Link the copy of IP packet to ICMP header. 2602 */ 2603 linkb(mblk_icmp, mblk_ip); 2604 2605 /* 2606 * Step (4) 2607 * 2608 * Calculate chksum - this is much more easier task than in case of 2609 * IPv4 - ICMPv6 chksum only covers IP addresses, and payload length. 2610 * We are making compensation just for change of packet length. 2611 */ 2612 icmp6->icmp6_cksum = icmp_pld_len + sizeof (struct icmp6_hdr); 2613 2614 /* 2615 * Step (5) 2616 * 2617 * Swap IP addresses. 2618 */ 2619 tmp_src6 = fin->fin_ip6->ip6_src; 2620 fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst; 2621 fin->fin_ip6->ip6_dst = tmp_src6; 2622 2623 /* 2624 * and adjust IP header data. 2625 */ 2626 fin->fin_ip6->ip6_nxt = IPPROTO_ICMPV6; 2627 fin->fin_ip6->ip6_plen = htons(icmp_pld_len + sizeof (struct icmp6_hdr)); 2628 2629 /* 2630 * Step (6) 2631 * 2632 * We must release all linked mblks from original packet and keep only 2633 * the first mblk with IP header to link ICMP data. 2634 */ 2635 fin->fin_m->b_wptr = (unsigned char *) fin->fin_ip6 + sizeof (ip6_t); 2636 2637 if (fin->fin_m->b_cont != NULL) { 2638 FREE_MB_T(fin->fin_m->b_cont); 2639 } 2640 2641 /* 2642 * Append ICMP payload to IP header. 2643 */ 2644 linkb(fin->fin_m, mblk_icmp); 2645 2646 return (0); 2647 } 2648 #endif /* USE_INET6 */ 2649 2650 /* ------------------------------------------------------------------------ */ 2651 /* Function: fr_make_icmp */ 2652 /* Returns: int - 0 on success, -1 on failure */ 2653 /* Parameters: fin(I) - pointer to packet information */ 2654 /* */ 2655 /* We must alter the original mblks passed to IPF from IP stack via */ 2656 /* FW_HOOKS. The reasons why we must alter packet are discussed within */ 2657 /* comment at fr_make_rst() function. */ 2658 /* */ 2659 /* The fr_make_icmp() function acts as a wrapper, which passes the code */ 2660 /* execution to fr_make_icmp_v4() or fr_make_icmp_v6() depending on */ 2661 /* protocol version. However there are some details, which are common to */ 2662 /* both IP versions. The details are going to be explained here. */ 2663 /* */ 2664 /* The packet looks as follows: */ 2665 /* xxx | IP hdr | IP payload ... | */ 2666 /* ^ ^ ^ ^ */ 2667 /* | | | | */ 2668 /* | | | fin_m->b_wptr = fin->fin_dp + fin->fin_dlen */ 2669 /* | | | */ 2670 /* | | `- fin_m->fin_dp (in case of IPv4 points to L4 header) */ 2671 /* | | */ 2672 /* | `- fin_m->b_rptr + fin_ipoff (fin_ipoff is most likely 0 in case */ 2673 /* | of loopback) */ 2674 /* | */ 2675 /* `- fin_m->b_rptr - points to L2 header in case of physical NIC */ 2676 /* */ 2677 /* All relevant IP headers are pulled up into the first mblk. It happened */ 2678 /* well in advance before the matching rule was found (the rule, which took */ 2679 /* us here, to fr_make_icmp() function). */ 2680 /* */ 2681 /* Both functions will turn packet passed in fin->fin_m mblk into a new */ 2682 /* packet. New packet will be represented as chain of mblks. */ 2683 /* orig mblk |- b_cont ---. */ 2684 /* ^ `-> ICMP hdr |- b_cont--. */ 2685 /* | ^ `-> duped orig mblk */ 2686 /* | | ^ */ 2687 /* `- The original mblk | | */ 2688 /* will be trimmed to | | */ 2689 /* to contain IP header | | */ 2690 /* only | | */ 2691 /* | | */ 2692 /* `- This is newly | */ 2693 /* allocated mblk to | */ 2694 /* hold ICMPv6 data. | */ 2695 /* | */ 2696 /* | */ 2697 /* | */ 2698 /* This is the copy of original mblk, it will contain -' */ 2699 /* orignal IP packet in case of ICMPv6. In case of */ 2700 /* ICMPv4 it will contain up to 8 bytes of IP payload */ 2701 /* (TCP/UDP/L4) data from original packet. */ 2702 /* ------------------------------------------------------------------------ */ 2703 int fr_make_icmp(fin) 2704 fr_info_t *fin; 2705 { 2706 int rv; 2707 2708 if (fin->fin_v == 4) 2709 rv = fr_make_icmp_v4(fin); 2710 #ifdef USE_INET6 2711 else if (fin->fin_v == 6) 2712 rv = fr_make_icmp_v6(fin); 2713 #endif 2714 else 2715 rv = -1; 2716 2717 return (rv); 2718 } 2719 2720 /* ------------------------------------------------------------------------ */ 2721 /* Function: fr_buf_sum */ 2722 /* Returns: unsigned int - sum of buffer buf */ 2723 /* Parameters: buf - pointer to buf we want to sum up */ 2724 /* len - length of buffer buf */ 2725 /* */ 2726 /* Sums buffer buf. The result is used for chksum calculation. The buf */ 2727 /* argument must be aligned. */ 2728 /* ------------------------------------------------------------------------ */ 2729 static uint32_t fr_buf_sum(buf, len) 2730 const void *buf; 2731 unsigned int len; 2732 { 2733 uint32_t sum = 0; 2734 uint16_t *b = (uint16_t *)buf; 2735 2736 while (len > 1) { 2737 sum += *b++; 2738 len -= 2; 2739 } 2740 2741 if (len == 1) 2742 sum += htons((*(unsigned char *)b) << 8); 2743 2744 return (sum); 2745 } 2746 2747 /* ------------------------------------------------------------------------ */ 2748 /* Function: fr_calc_chksum */ 2749 /* Returns: void */ 2750 /* Parameters: fin - pointer to fr_info_t instance with packet data */ 2751 /* pkt - pointer to duplicated packet */ 2752 /* */ 2753 /* Calculates all chksums (L3, L4) for packet pkt. Works for both IP */ 2754 /* versions. */ 2755 /* ------------------------------------------------------------------------ */ 2756 void fr_calc_chksum(fin, pkt) 2757 fr_info_t *fin; 2758 mb_t *pkt; 2759 { 2760 struct pseudo_hdr { 2761 union { 2762 struct in_addr in4; 2763 #ifdef USE_INET6 2764 struct in6_addr in6; 2765 #endif 2766 } src_addr; 2767 union { 2768 struct in_addr in4; 2769 #ifdef USE_INET6 2770 struct in6_addr in6; 2771 #endif 2772 } dst_addr; 2773 char zero; 2774 char proto; 2775 uint16_t len; 2776 } phdr; 2777 uint32_t sum, ip_sum; 2778 void *buf; 2779 uint16_t *l4_csum_p; 2780 tcphdr_t *tcp; 2781 udphdr_t *udp; 2782 icmphdr_t *icmp; 2783 #ifdef USE_INET6 2784 struct icmp6_hdr *icmp6; 2785 #endif 2786 ip_t *ip; 2787 unsigned int len; 2788 int pld_len; 2789 2790 /* 2791 * We need to pullup the packet to the single continuous buffer to avoid 2792 * potential misaligment of b_rptr member in mblk chain. 2793 */ 2794 if (pullupmsg(pkt, -1) == 0) { 2795 cmn_err(CE_WARN, "Failed to pullup loopback pkt -> chksum" 2796 " will not be computed by IPF"); 2797 return; 2798 } 2799 2800 /* 2801 * It is guaranteed IP header starts right at b_rptr, because we are 2802 * working with a copy of the original packet. 2803 * 2804 * Compute pseudo header chksum for TCP and UDP. 2805 */ 2806 if ((fin->fin_p == IPPROTO_UDP) || 2807 (fin->fin_p == IPPROTO_TCP)) { 2808 bzero(&phdr, sizeof (phdr)); 2809 #ifdef USE_INET6 2810 if (fin->fin_v == 6) { 2811 phdr.src_addr.in6 = fin->fin_srcip6; 2812 phdr.dst_addr.in6 = fin->fin_dstip6; 2813 } else { 2814 phdr.src_addr.in4 = fin->fin_src; 2815 phdr.dst_addr.in4 = fin->fin_dst; 2816 } 2817 #else 2818 phdr.src_addr.in4 = fin->fin_src; 2819 phdr.dst_addr.in4 = fin->fin_dst; 2820 #endif 2821 phdr.zero = (char) 0; 2822 phdr.proto = fin->fin_p; 2823 phdr.len = htons((uint16_t)fin->fin_dlen); 2824 sum = fr_buf_sum(&phdr, (unsigned int)sizeof (phdr)); 2825 } else { 2826 sum = 0; 2827 } 2828 2829 /* 2830 * Set pointer to the L4 chksum field in the packet, set buf pointer to 2831 * the L4 header start. 2832 */ 2833 switch (fin->fin_p) { 2834 case IPPROTO_UDP: 2835 udp = (udphdr_t *)(pkt->b_rptr + fin->fin_hlen); 2836 l4_csum_p = &udp->uh_sum; 2837 buf = udp; 2838 break; 2839 case IPPROTO_TCP: 2840 tcp = (tcphdr_t *)(pkt->b_rptr + fin->fin_hlen); 2841 l4_csum_p = &tcp->th_sum; 2842 buf = tcp; 2843 break; 2844 case IPPROTO_ICMP: 2845 icmp = (icmphdr_t *)(pkt->b_rptr + fin->fin_hlen); 2846 l4_csum_p = &icmp->icmp_cksum; 2847 buf = icmp; 2848 break; 2849 #ifdef USE_INET6 2850 case IPPROTO_ICMPV6: 2851 icmp6 = (struct icmp6_hdr *)(pkt->b_rptr + fin->fin_hlen); 2852 l4_csum_p = &icmp6->icmp6_cksum; 2853 buf = icmp6; 2854 break; 2855 #endif 2856 default: 2857 l4_csum_p = NULL; 2858 } 2859 2860 /* 2861 * Compute L4 chksum if needed. 2862 */ 2863 if (l4_csum_p != NULL) { 2864 *l4_csum_p = (uint16_t)0; 2865 pld_len = fin->fin_dlen; 2866 len = pkt->b_wptr - (unsigned char *)buf; 2867 ASSERT(len == pld_len); 2868 /* 2869 * Add payload sum to pseudoheader sum. 2870 */ 2871 sum += fr_buf_sum(buf, len); 2872 while (sum >> 16) 2873 sum = (sum & 0xFFFF) + (sum >> 16); 2874 2875 *l4_csum_p = ~((uint16_t)sum); 2876 DTRACE_PROBE1(l4_sum, uint16_t, *l4_csum_p); 2877 } 2878 2879 /* 2880 * The IP header chksum is needed just for IPv4. 2881 */ 2882 if (fin->fin_v == 4) { 2883 /* 2884 * Compute IPv4 header chksum. 2885 */ 2886 ip = (ip_t *)pkt->b_rptr; 2887 ip->ip_sum = (uint16_t)0; 2888 ip_sum = fr_buf_sum(ip, (unsigned int)fin->fin_hlen); 2889 while (ip_sum >> 16) 2890 ip_sum = (ip_sum & 0xFFFF) + (ip_sum >> 16); 2891 2892 ip->ip_sum = ~((uint16_t)ip_sum); 2893 DTRACE_PROBE1(l3_sum, uint16_t, ip->ip_sum); 2894 } 2895 2896 return; 2897 } 2898 2899 #endif /* _KERNEL && SOLARIS2 >= 10 */ 2900