1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "@(#)spd.c 1.61 08/07/15 SMI" 27 28 /* 29 * IPsec Security Policy Database. 30 * 31 * This module maintains the SPD and provides routines used by ip and ip6 32 * to apply IPsec policy to inbound and outbound datagrams. 33 */ 34 35 #include <sys/types.h> 36 #include <sys/stream.h> 37 #include <sys/stropts.h> 38 #include <sys/sysmacros.h> 39 #include <sys/strsubr.h> 40 #include <sys/strlog.h> 41 #include <sys/cmn_err.h> 42 #include <sys/zone.h> 43 44 #include <sys/systm.h> 45 #include <sys/param.h> 46 #include <sys/kmem.h> 47 #include <sys/ddi.h> 48 49 #include <sys/crypto/api.h> 50 51 #include <inet/common.h> 52 #include <inet/mi.h> 53 54 #include <netinet/ip6.h> 55 #include <netinet/icmp6.h> 56 #include <netinet/udp.h> 57 58 #include <inet/ip.h> 59 #include <inet/ip6.h> 60 61 #include <net/pfkeyv2.h> 62 #include <net/pfpolicy.h> 63 #include <inet/ipsec_info.h> 64 #include <inet/sadb.h> 65 #include <inet/ipsec_impl.h> 66 67 #include <inet/ip_impl.h> /* For IP_MOD_ID */ 68 69 #include <inet/ipsecah.h> 70 #include <inet/ipsecesp.h> 71 #include <inet/ipdrop.h> 72 #include <inet/ipclassifier.h> 73 #include <inet/tun.h> 74 75 static void ipsec_update_present_flags(ipsec_stack_t *); 76 static ipsec_act_t *ipsec_act_wildcard_expand(ipsec_act_t *, uint_t *, 77 netstack_t *); 78 static void ipsec_out_free(void *); 79 static void ipsec_in_free(void *); 80 static mblk_t *ipsec_attach_global_policy(mblk_t **, conn_t *, 81 ipsec_selector_t *, netstack_t *); 82 static mblk_t *ipsec_apply_global_policy(mblk_t *, conn_t *, 83 ipsec_selector_t *, netstack_t *); 84 static mblk_t *ipsec_check_ipsecin_policy(mblk_t *, ipsec_policy_t *, 85 ipha_t *, ip6_t *, uint64_t, netstack_t *); 86 static void ipsec_in_release_refs(ipsec_in_t *); 87 static void ipsec_out_release_refs(ipsec_out_t *); 88 static void ipsec_action_free_table(ipsec_action_t *); 89 static void ipsec_action_reclaim(void *); 90 static void ipsec_action_reclaim_stack(netstack_t *); 91 static void ipsid_init(netstack_t *); 92 static void ipsid_fini(netstack_t *); 93 94 /* sel_flags values for ipsec_init_inbound_sel(). */ 95 #define SEL_NONE 0x0000 96 #define SEL_PORT_POLICY 0x0001 97 #define SEL_IS_ICMP 0x0002 98 #define SEL_TUNNEL_MODE 0x0004 99 100 /* Return values for ipsec_init_inbound_sel(). */ 101 typedef enum { SELRET_NOMEM, SELRET_BADPKT, SELRET_SUCCESS, SELRET_TUNFRAG} 102 selret_t; 103 104 static selret_t ipsec_init_inbound_sel(ipsec_selector_t *, mblk_t *, 105 ipha_t *, ip6_t *, uint8_t); 106 107 static boolean_t ipsec_check_ipsecin_action(struct ipsec_in_s *, mblk_t *, 108 struct ipsec_action_s *, ipha_t *ipha, ip6_t *ip6h, const char **, 109 kstat_named_t **); 110 static void ipsec_unregister_prov_update(void); 111 static void ipsec_prov_update_callback_stack(uint32_t, void *, netstack_t *); 112 static boolean_t ipsec_compare_action(ipsec_policy_t *, ipsec_policy_t *); 113 static uint32_t selector_hash(ipsec_selector_t *, ipsec_policy_root_t *); 114 static boolean_t ipsec_kstat_init(ipsec_stack_t *); 115 static void ipsec_kstat_destroy(ipsec_stack_t *); 116 static int ipsec_free_tables(ipsec_stack_t *); 117 static int tunnel_compare(const void *, const void *); 118 static void ipsec_freemsg_chain(mblk_t *); 119 static void ip_drop_packet_chain(mblk_t *, boolean_t, ill_t *, ire_t *, 120 struct kstat_named *, ipdropper_t *); 121 static boolean_t ipsec_kstat_init(ipsec_stack_t *); 122 static void ipsec_kstat_destroy(ipsec_stack_t *); 123 static int ipsec_free_tables(ipsec_stack_t *); 124 static int tunnel_compare(const void *, const void *); 125 static void ipsec_freemsg_chain(mblk_t *); 126 static void ip_drop_packet_chain(mblk_t *, boolean_t, ill_t *, ire_t *, 127 struct kstat_named *, ipdropper_t *); 128 129 /* 130 * Selector hash table is statically sized at module load time. 131 * we default to 251 buckets, which is the largest prime number under 255 132 */ 133 134 #define IPSEC_SPDHASH_DEFAULT 251 135 136 /* SPD hash-size tunable per tunnel. */ 137 #define TUN_SPDHASH_DEFAULT 5 138 139 uint32_t ipsec_spd_hashsize; 140 uint32_t tun_spd_hashsize; 141 142 #define IPSEC_SEL_NOHASH ((uint32_t)(~0)) 143 144 /* 145 * Handle global across all stack instances 146 */ 147 static crypto_notify_handle_t prov_update_handle = NULL; 148 149 static kmem_cache_t *ipsec_action_cache; 150 static kmem_cache_t *ipsec_sel_cache; 151 static kmem_cache_t *ipsec_pol_cache; 152 static kmem_cache_t *ipsec_info_cache; 153 154 /* Frag cache prototypes */ 155 static void ipsec_fragcache_clean(ipsec_fragcache_t *); 156 static ipsec_fragcache_entry_t *fragcache_delentry(int, 157 ipsec_fragcache_entry_t *, ipsec_fragcache_t *); 158 boolean_t ipsec_fragcache_init(ipsec_fragcache_t *); 159 void ipsec_fragcache_uninit(ipsec_fragcache_t *); 160 mblk_t *ipsec_fragcache_add(ipsec_fragcache_t *, mblk_t *, mblk_t *, int, 161 ipsec_stack_t *); 162 163 int ipsec_hdr_pullup_needed = 0; 164 int ipsec_weird_null_inbound_policy = 0; 165 166 #define ALGBITS_ROUND_DOWN(x, align) (((x)/(align))*(align)) 167 #define ALGBITS_ROUND_UP(x, align) ALGBITS_ROUND_DOWN((x)+(align)-1, align) 168 169 /* 170 * Inbound traffic should have matching identities for both SA's. 171 */ 172 173 #define SA_IDS_MATCH(sa1, sa2) \ 174 (((sa1) == NULL) || ((sa2) == NULL) || \ 175 (((sa1)->ipsa_src_cid == (sa2)->ipsa_src_cid) && \ 176 (((sa1)->ipsa_dst_cid == (sa2)->ipsa_dst_cid)))) 177 178 /* 179 * IPv4 Fragments 180 */ 181 #define IS_V4_FRAGMENT(ipha_fragment_offset_and_flags) \ 182 (((ntohs(ipha_fragment_offset_and_flags) & IPH_OFFSET) != 0) || \ 183 ((ntohs(ipha_fragment_offset_and_flags) & IPH_MF) != 0)) 184 185 /* 186 * IPv6 Fragments 187 */ 188 #define IS_V6_FRAGMENT(ipp) (ipp.ipp_fields & IPPF_FRAGHDR) 189 190 /* 191 * Policy failure messages. 192 */ 193 static char *ipsec_policy_failure_msgs[] = { 194 195 /* IPSEC_POLICY_NOT_NEEDED */ 196 "%s: Dropping the datagram because the incoming packet " 197 "is %s, but the recipient expects clear; Source %s, " 198 "Destination %s.\n", 199 200 /* IPSEC_POLICY_MISMATCH */ 201 "%s: Policy Failure for the incoming packet (%s); Source %s, " 202 "Destination %s.\n", 203 204 /* IPSEC_POLICY_AUTH_NOT_NEEDED */ 205 "%s: Authentication present while not expected in the " 206 "incoming %s packet; Source %s, Destination %s.\n", 207 208 /* IPSEC_POLICY_ENCR_NOT_NEEDED */ 209 "%s: Encryption present while not expected in the " 210 "incoming %s packet; Source %s, Destination %s.\n", 211 212 /* IPSEC_POLICY_SE_NOT_NEEDED */ 213 "%s: Self-Encapsulation present while not expected in the " 214 "incoming %s packet; Source %s, Destination %s.\n", 215 }; 216 217 /* 218 * General overviews: 219 * 220 * Locking: 221 * 222 * All of the system policy structures are protected by a single 223 * rwlock. These structures are threaded in a 224 * fairly complex fashion and are not expected to change on a 225 * regular basis, so this should not cause scaling/contention 226 * problems. As a result, policy checks should (hopefully) be MT-hot. 227 * 228 * Allocation policy: 229 * 230 * We use custom kmem cache types for the various 231 * bits & pieces of the policy data structures. All allocations 232 * use KM_NOSLEEP instead of KM_SLEEP for policy allocation. The 233 * policy table is of potentially unbounded size, so we don't 234 * want to provide a way to hog all system memory with policy 235 * entries.. 236 */ 237 238 /* Convenient functions for freeing or dropping a b_next linked mblk chain */ 239 240 /* Free all messages in an mblk chain */ 241 static void 242 ipsec_freemsg_chain(mblk_t *mp) 243 { 244 mblk_t *mpnext; 245 while (mp != NULL) { 246 ASSERT(mp->b_prev == NULL); 247 mpnext = mp->b_next; 248 mp->b_next = NULL; 249 freemsg(mp); /* Always works, even if NULL */ 250 mp = mpnext; 251 } 252 } 253 254 /* ip_drop all messages in an mblk chain */ 255 static void 256 ip_drop_packet_chain(mblk_t *mp, boolean_t inbound, ill_t *arriving, 257 ire_t *outbound_ire, struct kstat_named *counter, ipdropper_t *who_called) 258 { 259 mblk_t *mpnext; 260 while (mp != NULL) { 261 ASSERT(mp->b_prev == NULL); 262 mpnext = mp->b_next; 263 mp->b_next = NULL; 264 ip_drop_packet(mp, inbound, arriving, outbound_ire, counter, 265 who_called); 266 mp = mpnext; 267 } 268 } 269 270 /* 271 * AVL tree comparison function. 272 * the in-kernel avl assumes unique keys for all objects. 273 * Since sometimes policy will duplicate rules, we may insert 274 * multiple rules with the same rule id, so we need a tie-breaker. 275 */ 276 static int 277 ipsec_policy_cmpbyid(const void *a, const void *b) 278 { 279 const ipsec_policy_t *ipa, *ipb; 280 uint64_t idxa, idxb; 281 282 ipa = (const ipsec_policy_t *)a; 283 ipb = (const ipsec_policy_t *)b; 284 idxa = ipa->ipsp_index; 285 idxb = ipb->ipsp_index; 286 287 if (idxa < idxb) 288 return (-1); 289 if (idxa > idxb) 290 return (1); 291 /* 292 * Tie-breaker #1: All installed policy rules have a non-NULL 293 * ipsl_sel (selector set), so an entry with a NULL ipsp_sel is not 294 * actually in-tree but rather a template node being used in 295 * an avl_find query; see ipsec_policy_delete(). This gives us 296 * a placeholder in the ordering just before the the first entry with 297 * a key >= the one we're looking for, so we can walk forward from 298 * that point to get the remaining entries with the same id. 299 */ 300 if ((ipa->ipsp_sel == NULL) && (ipb->ipsp_sel != NULL)) 301 return (-1); 302 if ((ipb->ipsp_sel == NULL) && (ipa->ipsp_sel != NULL)) 303 return (1); 304 /* 305 * At most one of the arguments to the comparison should have a 306 * NULL selector pointer; if not, the tree is broken. 307 */ 308 ASSERT(ipa->ipsp_sel != NULL); 309 ASSERT(ipb->ipsp_sel != NULL); 310 /* 311 * Tie-breaker #2: use the virtual address of the policy node 312 * to arbitrarily break ties. Since we use the new tree node in 313 * the avl_find() in ipsec_insert_always, the new node will be 314 * inserted into the tree in the right place in the sequence. 315 */ 316 if (ipa < ipb) 317 return (-1); 318 if (ipa > ipb) 319 return (1); 320 return (0); 321 } 322 323 /* 324 * Free what ipsec_alloc_table allocated. 325 */ 326 void 327 ipsec_polhead_free_table(ipsec_policy_head_t *iph) 328 { 329 int dir; 330 int i; 331 332 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 333 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 334 335 if (ipr->ipr_hash == NULL) 336 continue; 337 338 for (i = 0; i < ipr->ipr_nchains; i++) { 339 ASSERT(ipr->ipr_hash[i].hash_head == NULL); 340 } 341 kmem_free(ipr->ipr_hash, ipr->ipr_nchains * 342 sizeof (ipsec_policy_hash_t)); 343 ipr->ipr_hash = NULL; 344 } 345 } 346 347 void 348 ipsec_polhead_destroy(ipsec_policy_head_t *iph) 349 { 350 int dir; 351 352 avl_destroy(&iph->iph_rulebyid); 353 rw_destroy(&iph->iph_lock); 354 355 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 356 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 357 int chain; 358 359 for (chain = 0; chain < ipr->ipr_nchains; chain++) 360 mutex_destroy(&(ipr->ipr_hash[chain].hash_lock)); 361 362 } 363 ipsec_polhead_free_table(iph); 364 } 365 366 /* 367 * Free the IPsec stack instance. 368 */ 369 /* ARGSUSED */ 370 static void 371 ipsec_stack_fini(netstackid_t stackid, void *arg) 372 { 373 ipsec_stack_t *ipss = (ipsec_stack_t *)arg; 374 void *cookie; 375 ipsec_tun_pol_t *node; 376 netstack_t *ns = ipss->ipsec_netstack; 377 int i; 378 ipsec_algtype_t algtype; 379 380 ipsec_loader_destroy(ipss); 381 382 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_WRITER); 383 /* 384 * It's possible we can just ASSERT() the tree is empty. After all, 385 * we aren't called until IP is ready to unload (and presumably all 386 * tunnels have been unplumbed). But we'll play it safe for now, the 387 * loop will just exit immediately if it's empty. 388 */ 389 cookie = NULL; 390 while ((node = (ipsec_tun_pol_t *) 391 avl_destroy_nodes(&ipss->ipsec_tunnel_policies, 392 &cookie)) != NULL) { 393 ITP_REFRELE(node, ns); 394 } 395 avl_destroy(&ipss->ipsec_tunnel_policies); 396 rw_exit(&ipss->ipsec_tunnel_policy_lock); 397 rw_destroy(&ipss->ipsec_tunnel_policy_lock); 398 399 ipsec_config_flush(ns); 400 401 ipsec_kstat_destroy(ipss); 402 403 ip_drop_unregister(&ipss->ipsec_dropper); 404 405 ip_drop_unregister(&ipss->ipsec_spd_dropper); 406 ip_drop_destroy(ipss); 407 /* 408 * Globals start with ref == 1 to prevent IPPH_REFRELE() from 409 * attempting to free them, hence they should have 1 now. 410 */ 411 ipsec_polhead_destroy(&ipss->ipsec_system_policy); 412 ASSERT(ipss->ipsec_system_policy.iph_refs == 1); 413 ipsec_polhead_destroy(&ipss->ipsec_inactive_policy); 414 ASSERT(ipss->ipsec_inactive_policy.iph_refs == 1); 415 416 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) { 417 ipsec_action_free_table(ipss->ipsec_action_hash[i].hash_head); 418 ipss->ipsec_action_hash[i].hash_head = NULL; 419 mutex_destroy(&(ipss->ipsec_action_hash[i].hash_lock)); 420 } 421 422 for (i = 0; i < ipss->ipsec_spd_hashsize; i++) { 423 ASSERT(ipss->ipsec_sel_hash[i].hash_head == NULL); 424 mutex_destroy(&(ipss->ipsec_sel_hash[i].hash_lock)); 425 } 426 427 mutex_enter(&ipss->ipsec_alg_lock); 428 for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype ++) { 429 int nalgs = ipss->ipsec_nalgs[algtype]; 430 431 for (i = 0; i < nalgs; i++) { 432 if (ipss->ipsec_alglists[algtype][i] != NULL) 433 ipsec_alg_unreg(algtype, i, ns); 434 } 435 } 436 mutex_exit(&ipss->ipsec_alg_lock); 437 mutex_destroy(&ipss->ipsec_alg_lock); 438 439 ipsid_gc(ns); 440 ipsid_fini(ns); 441 442 (void) ipsec_free_tables(ipss); 443 kmem_free(ipss, sizeof (*ipss)); 444 } 445 446 void 447 ipsec_policy_g_destroy(void) 448 { 449 kmem_cache_destroy(ipsec_action_cache); 450 kmem_cache_destroy(ipsec_sel_cache); 451 kmem_cache_destroy(ipsec_pol_cache); 452 kmem_cache_destroy(ipsec_info_cache); 453 454 ipsec_unregister_prov_update(); 455 456 netstack_unregister(NS_IPSEC); 457 } 458 459 460 /* 461 * Free what ipsec_alloc_tables allocated. 462 * Called when table allocation fails to free the table. 463 */ 464 static int 465 ipsec_free_tables(ipsec_stack_t *ipss) 466 { 467 int i; 468 469 if (ipss->ipsec_sel_hash != NULL) { 470 for (i = 0; i < ipss->ipsec_spd_hashsize; i++) { 471 ASSERT(ipss->ipsec_sel_hash[i].hash_head == NULL); 472 } 473 kmem_free(ipss->ipsec_sel_hash, ipss->ipsec_spd_hashsize * 474 sizeof (*ipss->ipsec_sel_hash)); 475 ipss->ipsec_sel_hash = NULL; 476 ipss->ipsec_spd_hashsize = 0; 477 } 478 ipsec_polhead_free_table(&ipss->ipsec_system_policy); 479 ipsec_polhead_free_table(&ipss->ipsec_inactive_policy); 480 481 return (ENOMEM); 482 } 483 484 /* 485 * Attempt to allocate the tables in a single policy head. 486 * Return nonzero on failure after cleaning up any work in progress. 487 */ 488 int 489 ipsec_alloc_table(ipsec_policy_head_t *iph, int nchains, int kmflag, 490 boolean_t global_cleanup, netstack_t *ns) 491 { 492 int dir; 493 494 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 495 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 496 497 ipr->ipr_nchains = nchains; 498 ipr->ipr_hash = kmem_zalloc(nchains * 499 sizeof (ipsec_policy_hash_t), kmflag); 500 if (ipr->ipr_hash == NULL) 501 return (global_cleanup ? 502 ipsec_free_tables(ns->netstack_ipsec) : 503 ENOMEM); 504 } 505 return (0); 506 } 507 508 /* 509 * Attempt to allocate the various tables. Return nonzero on failure 510 * after cleaning up any work in progress. 511 */ 512 static int 513 ipsec_alloc_tables(int kmflag, netstack_t *ns) 514 { 515 int error; 516 ipsec_stack_t *ipss = ns->netstack_ipsec; 517 518 error = ipsec_alloc_table(&ipss->ipsec_system_policy, 519 ipss->ipsec_spd_hashsize, kmflag, B_TRUE, ns); 520 if (error != 0) 521 return (error); 522 523 error = ipsec_alloc_table(&ipss->ipsec_inactive_policy, 524 ipss->ipsec_spd_hashsize, kmflag, B_TRUE, ns); 525 if (error != 0) 526 return (error); 527 528 ipss->ipsec_sel_hash = kmem_zalloc(ipss->ipsec_spd_hashsize * 529 sizeof (*ipss->ipsec_sel_hash), kmflag); 530 531 if (ipss->ipsec_sel_hash == NULL) 532 return (ipsec_free_tables(ipss)); 533 534 return (0); 535 } 536 537 /* 538 * After table allocation, initialize a policy head. 539 */ 540 void 541 ipsec_polhead_init(ipsec_policy_head_t *iph, int nchains) 542 { 543 int dir, chain; 544 545 rw_init(&iph->iph_lock, NULL, RW_DEFAULT, NULL); 546 avl_create(&iph->iph_rulebyid, ipsec_policy_cmpbyid, 547 sizeof (ipsec_policy_t), offsetof(ipsec_policy_t, ipsp_byid)); 548 549 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 550 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 551 ipr->ipr_nchains = nchains; 552 553 for (chain = 0; chain < nchains; chain++) { 554 mutex_init(&(ipr->ipr_hash[chain].hash_lock), 555 NULL, MUTEX_DEFAULT, NULL); 556 } 557 } 558 } 559 560 static boolean_t 561 ipsec_kstat_init(ipsec_stack_t *ipss) 562 { 563 ipss->ipsec_ksp = kstat_create_netstack("ip", 0, "ipsec_stat", "net", 564 KSTAT_TYPE_NAMED, sizeof (ipsec_kstats_t) / sizeof (kstat_named_t), 565 KSTAT_FLAG_PERSISTENT, ipss->ipsec_netstack->netstack_stackid); 566 567 if (ipss->ipsec_ksp == NULL || ipss->ipsec_ksp->ks_data == NULL) 568 return (B_FALSE); 569 570 ipss->ipsec_kstats = ipss->ipsec_ksp->ks_data; 571 572 #define KI(x) kstat_named_init(&ipss->ipsec_kstats->x, #x, KSTAT_DATA_UINT64) 573 KI(esp_stat_in_requests); 574 KI(esp_stat_in_discards); 575 KI(esp_stat_lookup_failure); 576 KI(ah_stat_in_requests); 577 KI(ah_stat_in_discards); 578 KI(ah_stat_lookup_failure); 579 KI(sadb_acquire_maxpackets); 580 KI(sadb_acquire_qhiwater); 581 #undef KI 582 583 kstat_install(ipss->ipsec_ksp); 584 return (B_TRUE); 585 } 586 587 static void 588 ipsec_kstat_destroy(ipsec_stack_t *ipss) 589 { 590 kstat_delete_netstack(ipss->ipsec_ksp, 591 ipss->ipsec_netstack->netstack_stackid); 592 ipss->ipsec_kstats = NULL; 593 594 } 595 596 /* 597 * Initialize the IPsec stack instance. 598 */ 599 /* ARGSUSED */ 600 static void * 601 ipsec_stack_init(netstackid_t stackid, netstack_t *ns) 602 { 603 ipsec_stack_t *ipss; 604 int i; 605 606 ipss = (ipsec_stack_t *)kmem_zalloc(sizeof (*ipss), KM_SLEEP); 607 ipss->ipsec_netstack = ns; 608 609 /* 610 * FIXME: netstack_ipsec is used by some of the routines we call 611 * below, but it isn't set until this routine returns. 612 * Either we introduce optional xxx_stack_alloc() functions 613 * that will be called by the netstack framework before xxx_stack_init, 614 * or we switch spd.c and sadb.c to operate on ipsec_stack_t 615 * (latter has some include file order issues for sadb.h, but makes 616 * sense if we merge some of the ipsec related stack_t's together. 617 */ 618 ns->netstack_ipsec = ipss; 619 620 /* 621 * Make two attempts to allocate policy hash tables; try it at 622 * the "preferred" size (may be set in /etc/system) first, 623 * then fall back to the default size. 624 */ 625 ipss->ipsec_spd_hashsize = (ipsec_spd_hashsize == 0) ? 626 IPSEC_SPDHASH_DEFAULT : ipsec_spd_hashsize; 627 628 if (ipsec_alloc_tables(KM_NOSLEEP, ns) != 0) { 629 cmn_err(CE_WARN, 630 "Unable to allocate %d entry IPsec policy hash table", 631 ipss->ipsec_spd_hashsize); 632 ipss->ipsec_spd_hashsize = IPSEC_SPDHASH_DEFAULT; 633 cmn_err(CE_WARN, "Falling back to %d entries", 634 ipss->ipsec_spd_hashsize); 635 (void) ipsec_alloc_tables(KM_SLEEP, ns); 636 } 637 638 /* Just set a default for tunnels. */ 639 ipss->ipsec_tun_spd_hashsize = (tun_spd_hashsize == 0) ? 640 TUN_SPDHASH_DEFAULT : tun_spd_hashsize; 641 642 ipsid_init(ns); 643 /* 644 * Globals need ref == 1 to prevent IPPH_REFRELE() from attempting 645 * to free them. 646 */ 647 ipss->ipsec_system_policy.iph_refs = 1; 648 ipss->ipsec_inactive_policy.iph_refs = 1; 649 ipsec_polhead_init(&ipss->ipsec_system_policy, 650 ipss->ipsec_spd_hashsize); 651 ipsec_polhead_init(&ipss->ipsec_inactive_policy, 652 ipss->ipsec_spd_hashsize); 653 rw_init(&ipss->ipsec_tunnel_policy_lock, NULL, RW_DEFAULT, NULL); 654 avl_create(&ipss->ipsec_tunnel_policies, tunnel_compare, 655 sizeof (ipsec_tun_pol_t), 0); 656 657 ipss->ipsec_next_policy_index = 1; 658 659 rw_init(&ipss->ipsec_system_policy.iph_lock, NULL, RW_DEFAULT, NULL); 660 rw_init(&ipss->ipsec_inactive_policy.iph_lock, NULL, RW_DEFAULT, NULL); 661 662 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) 663 mutex_init(&(ipss->ipsec_action_hash[i].hash_lock), 664 NULL, MUTEX_DEFAULT, NULL); 665 666 for (i = 0; i < ipss->ipsec_spd_hashsize; i++) 667 mutex_init(&(ipss->ipsec_sel_hash[i].hash_lock), 668 NULL, MUTEX_DEFAULT, NULL); 669 670 mutex_init(&ipss->ipsec_alg_lock, NULL, MUTEX_DEFAULT, NULL); 671 for (i = 0; i < IPSEC_NALGTYPES; i++) { 672 ipss->ipsec_nalgs[i] = 0; 673 } 674 675 ip_drop_init(ipss); 676 ip_drop_register(&ipss->ipsec_spd_dropper, "IPsec SPD"); 677 678 /* Set function to dummy until tun is loaded */ 679 rw_init(&ipss->ipsec_itp_get_byaddr_rw_lock, NULL, RW_DEFAULT, NULL); 680 rw_enter(&ipss->ipsec_itp_get_byaddr_rw_lock, RW_WRITER); 681 ipss->ipsec_itp_get_byaddr = itp_get_byaddr_dummy; 682 rw_exit(&ipss->ipsec_itp_get_byaddr_rw_lock); 683 684 /* IP's IPsec code calls the packet dropper */ 685 ip_drop_register(&ipss->ipsec_dropper, "IP IPsec processing"); 686 687 (void) ipsec_kstat_init(ipss); 688 689 ipsec_loader_init(ipss); 690 ipsec_loader_start(ipss); 691 692 return (ipss); 693 } 694 695 /* Global across all stack instances */ 696 void 697 ipsec_policy_g_init(void) 698 { 699 ipsec_action_cache = kmem_cache_create("ipsec_actions", 700 sizeof (ipsec_action_t), _POINTER_ALIGNMENT, NULL, NULL, 701 ipsec_action_reclaim, NULL, NULL, 0); 702 ipsec_sel_cache = kmem_cache_create("ipsec_selectors", 703 sizeof (ipsec_sel_t), _POINTER_ALIGNMENT, NULL, NULL, 704 NULL, NULL, NULL, 0); 705 ipsec_pol_cache = kmem_cache_create("ipsec_policy", 706 sizeof (ipsec_policy_t), _POINTER_ALIGNMENT, NULL, NULL, 707 NULL, NULL, NULL, 0); 708 ipsec_info_cache = kmem_cache_create("ipsec_info", 709 sizeof (ipsec_info_t), _POINTER_ALIGNMENT, NULL, NULL, 710 NULL, NULL, NULL, 0); 711 712 /* 713 * We want to be informed each time a stack is created or 714 * destroyed in the kernel, so we can maintain the 715 * set of ipsec_stack_t's. 716 */ 717 netstack_register(NS_IPSEC, ipsec_stack_init, NULL, ipsec_stack_fini); 718 } 719 720 /* 721 * Sort algorithm lists. 722 * 723 * I may need to split this based on 724 * authentication/encryption, and I may wish to have an administrator 725 * configure this list. Hold on to some NDD variables... 726 * 727 * XXX For now, sort on minimum key size (GAG!). While minimum key size is 728 * not the ideal metric, it's the only quantifiable measure available. 729 * We need a better metric for sorting algorithms by preference. 730 */ 731 static void 732 alg_insert_sortlist(enum ipsec_algtype at, uint8_t algid, netstack_t *ns) 733 { 734 ipsec_stack_t *ipss = ns->netstack_ipsec; 735 ipsec_alginfo_t *ai = ipss->ipsec_alglists[at][algid]; 736 uint8_t holder, swap; 737 uint_t i; 738 uint_t count = ipss->ipsec_nalgs[at]; 739 ASSERT(ai != NULL); 740 ASSERT(algid == ai->alg_id); 741 742 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 743 744 holder = algid; 745 746 for (i = 0; i < count - 1; i++) { 747 ipsec_alginfo_t *alt; 748 749 alt = ipss->ipsec_alglists[at][ipss->ipsec_sortlist[at][i]]; 750 /* 751 * If you want to give precedence to newly added algs, 752 * add the = in the > comparison. 753 */ 754 if ((holder != algid) || (ai->alg_minbits > alt->alg_minbits)) { 755 /* Swap sortlist[i] and holder. */ 756 swap = ipss->ipsec_sortlist[at][i]; 757 ipss->ipsec_sortlist[at][i] = holder; 758 holder = swap; 759 ai = alt; 760 } /* Else just continue. */ 761 } 762 763 /* Store holder in last slot. */ 764 ipss->ipsec_sortlist[at][i] = holder; 765 } 766 767 /* 768 * Remove an algorithm from a sorted algorithm list. 769 * This should be considerably easier, even with complex sorting. 770 */ 771 static void 772 alg_remove_sortlist(enum ipsec_algtype at, uint8_t algid, netstack_t *ns) 773 { 774 boolean_t copyback = B_FALSE; 775 int i; 776 ipsec_stack_t *ipss = ns->netstack_ipsec; 777 int newcount = ipss->ipsec_nalgs[at]; 778 779 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 780 781 for (i = 0; i <= newcount; i++) { 782 if (copyback) { 783 ipss->ipsec_sortlist[at][i-1] = 784 ipss->ipsec_sortlist[at][i]; 785 } else if (ipss->ipsec_sortlist[at][i] == algid) { 786 copyback = B_TRUE; 787 } 788 } 789 } 790 791 /* 792 * Add the specified algorithm to the algorithm tables. 793 * Must be called while holding the algorithm table writer lock. 794 */ 795 void 796 ipsec_alg_reg(ipsec_algtype_t algtype, ipsec_alginfo_t *alg, netstack_t *ns) 797 { 798 ipsec_stack_t *ipss = ns->netstack_ipsec; 799 800 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 801 802 ASSERT(ipss->ipsec_alglists[algtype][alg->alg_id] == NULL); 803 ipsec_alg_fix_min_max(alg, algtype, ns); 804 ipss->ipsec_alglists[algtype][alg->alg_id] = alg; 805 806 ipss->ipsec_nalgs[algtype]++; 807 alg_insert_sortlist(algtype, alg->alg_id, ns); 808 } 809 810 /* 811 * Remove the specified algorithm from the algorithm tables. 812 * Must be called while holding the algorithm table writer lock. 813 */ 814 void 815 ipsec_alg_unreg(ipsec_algtype_t algtype, uint8_t algid, netstack_t *ns) 816 { 817 ipsec_stack_t *ipss = ns->netstack_ipsec; 818 819 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 820 821 ASSERT(ipss->ipsec_alglists[algtype][algid] != NULL); 822 ipsec_alg_free(ipss->ipsec_alglists[algtype][algid]); 823 ipss->ipsec_alglists[algtype][algid] = NULL; 824 825 ipss->ipsec_nalgs[algtype]--; 826 alg_remove_sortlist(algtype, algid, ns); 827 } 828 829 /* 830 * Hooks for spdsock to get a grip on system policy. 831 */ 832 833 ipsec_policy_head_t * 834 ipsec_system_policy(netstack_t *ns) 835 { 836 ipsec_stack_t *ipss = ns->netstack_ipsec; 837 ipsec_policy_head_t *h = &ipss->ipsec_system_policy; 838 839 IPPH_REFHOLD(h); 840 return (h); 841 } 842 843 ipsec_policy_head_t * 844 ipsec_inactive_policy(netstack_t *ns) 845 { 846 ipsec_stack_t *ipss = ns->netstack_ipsec; 847 ipsec_policy_head_t *h = &ipss->ipsec_inactive_policy; 848 849 IPPH_REFHOLD(h); 850 return (h); 851 } 852 853 /* 854 * Lock inactive policy, then active policy, then exchange policy root 855 * pointers. 856 */ 857 void 858 ipsec_swap_policy(ipsec_policy_head_t *active, ipsec_policy_head_t *inactive, 859 netstack_t *ns) 860 { 861 int af, dir; 862 avl_tree_t r1, r2; 863 864 rw_enter(&inactive->iph_lock, RW_WRITER); 865 rw_enter(&active->iph_lock, RW_WRITER); 866 867 r1 = active->iph_rulebyid; 868 r2 = inactive->iph_rulebyid; 869 active->iph_rulebyid = r2; 870 inactive->iph_rulebyid = r1; 871 872 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 873 ipsec_policy_hash_t *h1, *h2; 874 875 h1 = active->iph_root[dir].ipr_hash; 876 h2 = inactive->iph_root[dir].ipr_hash; 877 active->iph_root[dir].ipr_hash = h2; 878 inactive->iph_root[dir].ipr_hash = h1; 879 880 for (af = 0; af < IPSEC_NAF; af++) { 881 ipsec_policy_t *t1, *t2; 882 883 t1 = active->iph_root[dir].ipr_nonhash[af]; 884 t2 = inactive->iph_root[dir].ipr_nonhash[af]; 885 active->iph_root[dir].ipr_nonhash[af] = t2; 886 inactive->iph_root[dir].ipr_nonhash[af] = t1; 887 if (t1 != NULL) { 888 t1->ipsp_hash.hash_pp = 889 &(inactive->iph_root[dir].ipr_nonhash[af]); 890 } 891 if (t2 != NULL) { 892 t2->ipsp_hash.hash_pp = 893 &(active->iph_root[dir].ipr_nonhash[af]); 894 } 895 896 } 897 } 898 active->iph_gen++; 899 inactive->iph_gen++; 900 ipsec_update_present_flags(ns->netstack_ipsec); 901 rw_exit(&active->iph_lock); 902 rw_exit(&inactive->iph_lock); 903 } 904 905 /* 906 * Swap global policy primary/secondary. 907 */ 908 void 909 ipsec_swap_global_policy(netstack_t *ns) 910 { 911 ipsec_stack_t *ipss = ns->netstack_ipsec; 912 913 ipsec_swap_policy(&ipss->ipsec_system_policy, 914 &ipss->ipsec_inactive_policy, ns); 915 } 916 917 /* 918 * Clone one policy rule.. 919 */ 920 static ipsec_policy_t * 921 ipsec_copy_policy(const ipsec_policy_t *src) 922 { 923 ipsec_policy_t *dst = kmem_cache_alloc(ipsec_pol_cache, KM_NOSLEEP); 924 925 if (dst == NULL) 926 return (NULL); 927 928 /* 929 * Adjust refcounts of cloned state. 930 */ 931 IPACT_REFHOLD(src->ipsp_act); 932 src->ipsp_sel->ipsl_refs++; 933 934 HASH_NULL(dst, ipsp_hash); 935 dst->ipsp_refs = 1; 936 dst->ipsp_sel = src->ipsp_sel; 937 dst->ipsp_act = src->ipsp_act; 938 dst->ipsp_prio = src->ipsp_prio; 939 dst->ipsp_index = src->ipsp_index; 940 941 return (dst); 942 } 943 944 void 945 ipsec_insert_always(avl_tree_t *tree, void *new_node) 946 { 947 void *node; 948 avl_index_t where; 949 950 node = avl_find(tree, new_node, &where); 951 ASSERT(node == NULL); 952 avl_insert(tree, new_node, where); 953 } 954 955 956 static int 957 ipsec_copy_chain(ipsec_policy_head_t *dph, ipsec_policy_t *src, 958 ipsec_policy_t **dstp) 959 { 960 for (; src != NULL; src = src->ipsp_hash.hash_next) { 961 ipsec_policy_t *dst = ipsec_copy_policy(src); 962 if (dst == NULL) 963 return (ENOMEM); 964 965 HASHLIST_INSERT(dst, ipsp_hash, *dstp); 966 ipsec_insert_always(&dph->iph_rulebyid, dst); 967 } 968 return (0); 969 } 970 971 972 973 /* 974 * Make one policy head look exactly like another. 975 * 976 * As with ipsec_swap_policy, we lock the destination policy head first, then 977 * the source policy head. Note that we only need to read-lock the source 978 * policy head as we are not changing it. 979 */ 980 int 981 ipsec_copy_polhead(ipsec_policy_head_t *sph, ipsec_policy_head_t *dph, 982 netstack_t *ns) 983 { 984 int af, dir, chain, nchains; 985 986 rw_enter(&dph->iph_lock, RW_WRITER); 987 988 ipsec_polhead_flush(dph, ns); 989 990 rw_enter(&sph->iph_lock, RW_READER); 991 992 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 993 ipsec_policy_root_t *dpr = &dph->iph_root[dir]; 994 ipsec_policy_root_t *spr = &sph->iph_root[dir]; 995 nchains = dpr->ipr_nchains; 996 997 ASSERT(dpr->ipr_nchains == spr->ipr_nchains); 998 999 for (af = 0; af < IPSEC_NAF; af++) { 1000 if (ipsec_copy_chain(dph, spr->ipr_nonhash[af], 1001 &dpr->ipr_nonhash[af])) 1002 goto abort_copy; 1003 } 1004 1005 for (chain = 0; chain < nchains; chain++) { 1006 if (ipsec_copy_chain(dph, 1007 spr->ipr_hash[chain].hash_head, 1008 &dpr->ipr_hash[chain].hash_head)) 1009 goto abort_copy; 1010 } 1011 } 1012 1013 dph->iph_gen++; 1014 1015 rw_exit(&sph->iph_lock); 1016 rw_exit(&dph->iph_lock); 1017 return (0); 1018 1019 abort_copy: 1020 ipsec_polhead_flush(dph, ns); 1021 rw_exit(&sph->iph_lock); 1022 rw_exit(&dph->iph_lock); 1023 return (ENOMEM); 1024 } 1025 1026 /* 1027 * Clone currently active policy to the inactive policy list. 1028 */ 1029 int 1030 ipsec_clone_system_policy(netstack_t *ns) 1031 { 1032 ipsec_stack_t *ipss = ns->netstack_ipsec; 1033 1034 return (ipsec_copy_polhead(&ipss->ipsec_system_policy, 1035 &ipss->ipsec_inactive_policy, ns)); 1036 } 1037 1038 /* 1039 * Generic "do we have IPvN policy" answer. 1040 */ 1041 boolean_t 1042 iph_ipvN(ipsec_policy_head_t *iph, boolean_t v6) 1043 { 1044 int i, hval; 1045 uint32_t valbit; 1046 ipsec_policy_root_t *ipr; 1047 ipsec_policy_t *ipp; 1048 1049 if (v6) { 1050 valbit = IPSL_IPV6; 1051 hval = IPSEC_AF_V6; 1052 } else { 1053 valbit = IPSL_IPV4; 1054 hval = IPSEC_AF_V4; 1055 } 1056 1057 ASSERT(RW_LOCK_HELD(&iph->iph_lock)); 1058 for (ipr = iph->iph_root; ipr < &(iph->iph_root[IPSEC_NTYPES]); ipr++) { 1059 if (ipr->ipr_nonhash[hval] != NULL) 1060 return (B_TRUE); 1061 for (i = 0; i < ipr->ipr_nchains; i++) { 1062 for (ipp = ipr->ipr_hash[i].hash_head; ipp != NULL; 1063 ipp = ipp->ipsp_hash.hash_next) { 1064 if (ipp->ipsp_sel->ipsl_key.ipsl_valid & valbit) 1065 return (B_TRUE); 1066 } 1067 } 1068 } 1069 1070 return (B_FALSE); 1071 } 1072 1073 /* 1074 * Extract the string from ipsec_policy_failure_msgs[type] and 1075 * log it. 1076 * 1077 */ 1078 void 1079 ipsec_log_policy_failure(int type, char *func_name, ipha_t *ipha, ip6_t *ip6h, 1080 boolean_t secure, netstack_t *ns) 1081 { 1082 char sbuf[INET6_ADDRSTRLEN]; 1083 char dbuf[INET6_ADDRSTRLEN]; 1084 char *s; 1085 char *d; 1086 ipsec_stack_t *ipss = ns->netstack_ipsec; 1087 1088 ASSERT((ipha == NULL && ip6h != NULL) || 1089 (ip6h == NULL && ipha != NULL)); 1090 1091 if (ipha != NULL) { 1092 s = inet_ntop(AF_INET, &ipha->ipha_src, sbuf, sizeof (sbuf)); 1093 d = inet_ntop(AF_INET, &ipha->ipha_dst, dbuf, sizeof (dbuf)); 1094 } else { 1095 s = inet_ntop(AF_INET6, &ip6h->ip6_src, sbuf, sizeof (sbuf)); 1096 d = inet_ntop(AF_INET6, &ip6h->ip6_dst, dbuf, sizeof (dbuf)); 1097 1098 } 1099 1100 /* Always bump the policy failure counter. */ 1101 ipss->ipsec_policy_failure_count[type]++; 1102 1103 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, SL_ERROR|SL_WARN|SL_CONSOLE, 1104 ipsec_policy_failure_msgs[type], func_name, 1105 (secure ? "secure" : "not secure"), s, d); 1106 } 1107 1108 /* 1109 * Rate-limiting front-end to strlog() for AH and ESP. Uses the ndd variables 1110 * in /dev/ip and the same rate-limiting clock so that there's a single 1111 * knob to turn to throttle the rate of messages. 1112 */ 1113 void 1114 ipsec_rl_strlog(netstack_t *ns, short mid, short sid, char level, ushort_t sl, 1115 char *fmt, ...) 1116 { 1117 va_list adx; 1118 hrtime_t current = gethrtime(); 1119 ip_stack_t *ipst = ns->netstack_ip; 1120 ipsec_stack_t *ipss = ns->netstack_ipsec; 1121 1122 sl |= SL_CONSOLE; 1123 /* 1124 * Throttle logging to stop syslog from being swamped. If variable 1125 * 'ipsec_policy_log_interval' is zero, don't log any messages at 1126 * all, otherwise log only one message every 'ipsec_policy_log_interval' 1127 * msec. Convert interval (in msec) to hrtime (in nsec). 1128 */ 1129 1130 if (ipst->ips_ipsec_policy_log_interval) { 1131 if (ipss->ipsec_policy_failure_last + 1132 ((hrtime_t)ipst->ips_ipsec_policy_log_interval * 1133 (hrtime_t)1000000) <= current) { 1134 va_start(adx, fmt); 1135 (void) vstrlog(mid, sid, level, sl, fmt, adx); 1136 va_end(adx); 1137 ipss->ipsec_policy_failure_last = current; 1138 } 1139 } 1140 } 1141 1142 void 1143 ipsec_config_flush(netstack_t *ns) 1144 { 1145 ipsec_stack_t *ipss = ns->netstack_ipsec; 1146 1147 rw_enter(&ipss->ipsec_system_policy.iph_lock, RW_WRITER); 1148 ipsec_polhead_flush(&ipss->ipsec_system_policy, ns); 1149 ipss->ipsec_next_policy_index = 1; 1150 rw_exit(&ipss->ipsec_system_policy.iph_lock); 1151 ipsec_action_reclaim_stack(ns); 1152 } 1153 1154 /* 1155 * Clip a policy's min/max keybits vs. the capabilities of the 1156 * algorithm. 1157 */ 1158 static void 1159 act_alg_adjust(uint_t algtype, uint_t algid, 1160 uint16_t *minbits, uint16_t *maxbits, netstack_t *ns) 1161 { 1162 ipsec_stack_t *ipss = ns->netstack_ipsec; 1163 ipsec_alginfo_t *algp = ipss->ipsec_alglists[algtype][algid]; 1164 1165 if (algp != NULL) { 1166 /* 1167 * If passed-in minbits is zero, we assume the caller trusts 1168 * us with setting the minimum key size. We pick the 1169 * algorithms DEFAULT key size for the minimum in this case. 1170 */ 1171 if (*minbits == 0) { 1172 *minbits = algp->alg_default_bits; 1173 ASSERT(*minbits >= algp->alg_minbits); 1174 } else { 1175 *minbits = MAX(MIN(*minbits, algp->alg_maxbits), 1176 algp->alg_minbits); 1177 } 1178 if (*maxbits == 0) 1179 *maxbits = algp->alg_maxbits; 1180 else 1181 *maxbits = MIN(MAX(*maxbits, algp->alg_minbits), 1182 algp->alg_maxbits); 1183 ASSERT(*minbits <= *maxbits); 1184 } else { 1185 *minbits = 0; 1186 *maxbits = 0; 1187 } 1188 } 1189 1190 /* 1191 * Check an action's requested algorithms against the algorithms currently 1192 * loaded in the system. 1193 */ 1194 boolean_t 1195 ipsec_check_action(ipsec_act_t *act, int *diag, netstack_t *ns) 1196 { 1197 ipsec_prot_t *ipp; 1198 ipsec_stack_t *ipss = ns->netstack_ipsec; 1199 1200 ipp = &act->ipa_apply; 1201 1202 if (ipp->ipp_use_ah && 1203 ipss->ipsec_alglists[IPSEC_ALG_AUTH][ipp->ipp_auth_alg] == NULL) { 1204 *diag = SPD_DIAGNOSTIC_UNSUPP_AH_ALG; 1205 return (B_FALSE); 1206 } 1207 if (ipp->ipp_use_espa && 1208 ipss->ipsec_alglists[IPSEC_ALG_AUTH][ipp->ipp_esp_auth_alg] == 1209 NULL) { 1210 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_AUTH_ALG; 1211 return (B_FALSE); 1212 } 1213 if (ipp->ipp_use_esp && 1214 ipss->ipsec_alglists[IPSEC_ALG_ENCR][ipp->ipp_encr_alg] == NULL) { 1215 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_ENCR_ALG; 1216 return (B_FALSE); 1217 } 1218 1219 act_alg_adjust(IPSEC_ALG_AUTH, ipp->ipp_auth_alg, 1220 &ipp->ipp_ah_minbits, &ipp->ipp_ah_maxbits, ns); 1221 act_alg_adjust(IPSEC_ALG_AUTH, ipp->ipp_esp_auth_alg, 1222 &ipp->ipp_espa_minbits, &ipp->ipp_espa_maxbits, ns); 1223 act_alg_adjust(IPSEC_ALG_ENCR, ipp->ipp_encr_alg, 1224 &ipp->ipp_espe_minbits, &ipp->ipp_espe_maxbits, ns); 1225 1226 if (ipp->ipp_ah_minbits > ipp->ipp_ah_maxbits) { 1227 *diag = SPD_DIAGNOSTIC_UNSUPP_AH_KEYSIZE; 1228 return (B_FALSE); 1229 } 1230 if (ipp->ipp_espa_minbits > ipp->ipp_espa_maxbits) { 1231 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_AUTH_KEYSIZE; 1232 return (B_FALSE); 1233 } 1234 if (ipp->ipp_espe_minbits > ipp->ipp_espe_maxbits) { 1235 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_ENCR_KEYSIZE; 1236 return (B_FALSE); 1237 } 1238 /* TODO: sanity check lifetimes */ 1239 return (B_TRUE); 1240 } 1241 1242 /* 1243 * Set up a single action during wildcard expansion.. 1244 */ 1245 static void 1246 ipsec_setup_act(ipsec_act_t *outact, ipsec_act_t *act, 1247 uint_t auth_alg, uint_t encr_alg, uint_t eauth_alg, netstack_t *ns) 1248 { 1249 ipsec_prot_t *ipp; 1250 1251 *outact = *act; 1252 ipp = &outact->ipa_apply; 1253 ipp->ipp_auth_alg = (uint8_t)auth_alg; 1254 ipp->ipp_encr_alg = (uint8_t)encr_alg; 1255 ipp->ipp_esp_auth_alg = (uint8_t)eauth_alg; 1256 1257 act_alg_adjust(IPSEC_ALG_AUTH, auth_alg, 1258 &ipp->ipp_ah_minbits, &ipp->ipp_ah_maxbits, ns); 1259 act_alg_adjust(IPSEC_ALG_AUTH, eauth_alg, 1260 &ipp->ipp_espa_minbits, &ipp->ipp_espa_maxbits, ns); 1261 act_alg_adjust(IPSEC_ALG_ENCR, encr_alg, 1262 &ipp->ipp_espe_minbits, &ipp->ipp_espe_maxbits, ns); 1263 } 1264 1265 /* 1266 * combinatoric expansion time: expand a wildcarded action into an 1267 * array of wildcarded actions; we return the exploded action list, 1268 * and return a count in *nact (output only). 1269 */ 1270 static ipsec_act_t * 1271 ipsec_act_wildcard_expand(ipsec_act_t *act, uint_t *nact, netstack_t *ns) 1272 { 1273 boolean_t use_ah, use_esp, use_espa; 1274 boolean_t wild_auth, wild_encr, wild_eauth; 1275 uint_t auth_alg, auth_idx, auth_min, auth_max; 1276 uint_t eauth_alg, eauth_idx, eauth_min, eauth_max; 1277 uint_t encr_alg, encr_idx, encr_min, encr_max; 1278 uint_t action_count, ai; 1279 ipsec_act_t *outact; 1280 ipsec_stack_t *ipss = ns->netstack_ipsec; 1281 1282 if (act->ipa_type != IPSEC_ACT_APPLY) { 1283 outact = kmem_alloc(sizeof (*act), KM_NOSLEEP); 1284 *nact = 1; 1285 if (outact != NULL) 1286 bcopy(act, outact, sizeof (*act)); 1287 return (outact); 1288 } 1289 /* 1290 * compute the combinatoric explosion.. 1291 * 1292 * we assume a request for encr if esp_req is PREF_REQUIRED 1293 * we assume a request for ah auth if ah_req is PREF_REQUIRED. 1294 * we assume a request for esp auth if !ah and esp_req is PREF_REQUIRED 1295 */ 1296 1297 use_ah = act->ipa_apply.ipp_use_ah; 1298 use_esp = act->ipa_apply.ipp_use_esp; 1299 use_espa = act->ipa_apply.ipp_use_espa; 1300 auth_alg = act->ipa_apply.ipp_auth_alg; 1301 eauth_alg = act->ipa_apply.ipp_esp_auth_alg; 1302 encr_alg = act->ipa_apply.ipp_encr_alg; 1303 1304 wild_auth = use_ah && (auth_alg == 0); 1305 wild_eauth = use_espa && (eauth_alg == 0); 1306 wild_encr = use_esp && (encr_alg == 0); 1307 1308 action_count = 1; 1309 auth_min = auth_max = auth_alg; 1310 eauth_min = eauth_max = eauth_alg; 1311 encr_min = encr_max = encr_alg; 1312 1313 /* 1314 * set up for explosion.. for each dimension, expand output 1315 * size by the explosion factor. 1316 * 1317 * Don't include the "any" algorithms, if defined, as no 1318 * kernel policies should be set for these algorithms. 1319 */ 1320 1321 #define SET_EXP_MINMAX(type, wild, alg, min, max, ipss) \ 1322 if (wild) { \ 1323 int nalgs = ipss->ipsec_nalgs[type]; \ 1324 if (ipss->ipsec_alglists[type][alg] != NULL) \ 1325 nalgs--; \ 1326 action_count *= nalgs; \ 1327 min = 0; \ 1328 max = ipss->ipsec_nalgs[type] - 1; \ 1329 } 1330 1331 SET_EXP_MINMAX(IPSEC_ALG_AUTH, wild_auth, SADB_AALG_NONE, 1332 auth_min, auth_max, ipss); 1333 SET_EXP_MINMAX(IPSEC_ALG_AUTH, wild_eauth, SADB_AALG_NONE, 1334 eauth_min, eauth_max, ipss); 1335 SET_EXP_MINMAX(IPSEC_ALG_ENCR, wild_encr, SADB_EALG_NONE, 1336 encr_min, encr_max, ipss); 1337 1338 #undef SET_EXP_MINMAX 1339 1340 /* 1341 * ok, allocate the whole mess.. 1342 */ 1343 1344 outact = kmem_alloc(sizeof (*outact) * action_count, KM_NOSLEEP); 1345 if (outact == NULL) 1346 return (NULL); 1347 1348 /* 1349 * Now compute all combinations. Note that non-wildcarded 1350 * dimensions just get a single value from auth_min, while 1351 * wildcarded dimensions indirect through the sortlist. 1352 * 1353 * We do encryption outermost since, at this time, there's 1354 * greater difference in security and performance between 1355 * encryption algorithms vs. authentication algorithms. 1356 */ 1357 1358 ai = 0; 1359 1360 #define WHICH_ALG(type, wild, idx, ipss) \ 1361 ((wild)?(ipss->ipsec_sortlist[type][idx]):(idx)) 1362 1363 for (encr_idx = encr_min; encr_idx <= encr_max; encr_idx++) { 1364 encr_alg = WHICH_ALG(IPSEC_ALG_ENCR, wild_encr, encr_idx, ipss); 1365 if (wild_encr && encr_alg == SADB_EALG_NONE) 1366 continue; 1367 for (auth_idx = auth_min; auth_idx <= auth_max; auth_idx++) { 1368 auth_alg = WHICH_ALG(IPSEC_ALG_AUTH, wild_auth, 1369 auth_idx, ipss); 1370 if (wild_auth && auth_alg == SADB_AALG_NONE) 1371 continue; 1372 for (eauth_idx = eauth_min; eauth_idx <= eauth_max; 1373 eauth_idx++) { 1374 eauth_alg = WHICH_ALG(IPSEC_ALG_AUTH, 1375 wild_eauth, eauth_idx, ipss); 1376 if (wild_eauth && eauth_alg == SADB_AALG_NONE) 1377 continue; 1378 1379 ipsec_setup_act(&outact[ai], act, 1380 auth_alg, encr_alg, eauth_alg, ns); 1381 ai++; 1382 } 1383 } 1384 } 1385 1386 #undef WHICH_ALG 1387 1388 ASSERT(ai == action_count); 1389 *nact = action_count; 1390 return (outact); 1391 } 1392 1393 /* 1394 * Extract the parts of an ipsec_prot_t from an old-style ipsec_req_t. 1395 */ 1396 static void 1397 ipsec_prot_from_req(ipsec_req_t *req, ipsec_prot_t *ipp) 1398 { 1399 bzero(ipp, sizeof (*ipp)); 1400 /* 1401 * ipp_use_* are bitfields. Look at "!!" in the following as a 1402 * "boolean canonicalization" operator. 1403 */ 1404 ipp->ipp_use_ah = !!(req->ipsr_ah_req & IPSEC_PREF_REQUIRED); 1405 ipp->ipp_use_esp = !!(req->ipsr_esp_req & IPSEC_PREF_REQUIRED); 1406 ipp->ipp_use_espa = !!(req->ipsr_esp_auth_alg); 1407 ipp->ipp_use_se = !!(req->ipsr_self_encap_req & IPSEC_PREF_REQUIRED); 1408 ipp->ipp_use_unique = !!((req->ipsr_ah_req|req->ipsr_esp_req) & 1409 IPSEC_PREF_UNIQUE); 1410 ipp->ipp_encr_alg = req->ipsr_esp_alg; 1411 /* 1412 * SADB_AALG_ANY is a placeholder to distinguish "any" from 1413 * "none" above. If auth is required, as determined above, 1414 * SADB_AALG_ANY becomes 0, which is the representation 1415 * of "any" and "none" in PF_KEY v2. 1416 */ 1417 ipp->ipp_auth_alg = (req->ipsr_auth_alg != SADB_AALG_ANY) ? 1418 req->ipsr_auth_alg : 0; 1419 ipp->ipp_esp_auth_alg = (req->ipsr_esp_auth_alg != SADB_AALG_ANY) ? 1420 req->ipsr_esp_auth_alg : 0; 1421 } 1422 1423 /* 1424 * Extract a new-style action from a request. 1425 */ 1426 void 1427 ipsec_actvec_from_req(ipsec_req_t *req, ipsec_act_t **actp, uint_t *nactp, 1428 netstack_t *ns) 1429 { 1430 struct ipsec_act act; 1431 1432 bzero(&act, sizeof (act)); 1433 if ((req->ipsr_ah_req & IPSEC_PREF_NEVER) && 1434 (req->ipsr_esp_req & IPSEC_PREF_NEVER)) { 1435 act.ipa_type = IPSEC_ACT_BYPASS; 1436 } else { 1437 act.ipa_type = IPSEC_ACT_APPLY; 1438 ipsec_prot_from_req(req, &act.ipa_apply); 1439 } 1440 *actp = ipsec_act_wildcard_expand(&act, nactp, ns); 1441 } 1442 1443 /* 1444 * Convert a new-style "prot" back to an ipsec_req_t (more backwards compat). 1445 * We assume caller has already zero'ed *req for us. 1446 */ 1447 static int 1448 ipsec_req_from_prot(ipsec_prot_t *ipp, ipsec_req_t *req) 1449 { 1450 req->ipsr_esp_alg = ipp->ipp_encr_alg; 1451 req->ipsr_auth_alg = ipp->ipp_auth_alg; 1452 req->ipsr_esp_auth_alg = ipp->ipp_esp_auth_alg; 1453 1454 if (ipp->ipp_use_unique) { 1455 req->ipsr_ah_req |= IPSEC_PREF_UNIQUE; 1456 req->ipsr_esp_req |= IPSEC_PREF_UNIQUE; 1457 } 1458 if (ipp->ipp_use_se) 1459 req->ipsr_self_encap_req |= IPSEC_PREF_REQUIRED; 1460 if (ipp->ipp_use_ah) 1461 req->ipsr_ah_req |= IPSEC_PREF_REQUIRED; 1462 if (ipp->ipp_use_esp) 1463 req->ipsr_esp_req |= IPSEC_PREF_REQUIRED; 1464 return (sizeof (*req)); 1465 } 1466 1467 /* 1468 * Convert a new-style action back to an ipsec_req_t (more backwards compat). 1469 * We assume caller has already zero'ed *req for us. 1470 */ 1471 static int 1472 ipsec_req_from_act(ipsec_action_t *ap, ipsec_req_t *req) 1473 { 1474 switch (ap->ipa_act.ipa_type) { 1475 case IPSEC_ACT_BYPASS: 1476 req->ipsr_ah_req = IPSEC_PREF_NEVER; 1477 req->ipsr_esp_req = IPSEC_PREF_NEVER; 1478 return (sizeof (*req)); 1479 case IPSEC_ACT_APPLY: 1480 return (ipsec_req_from_prot(&ap->ipa_act.ipa_apply, req)); 1481 } 1482 return (sizeof (*req)); 1483 } 1484 1485 /* 1486 * Convert a new-style action back to an ipsec_req_t (more backwards compat). 1487 * We assume caller has already zero'ed *req for us. 1488 */ 1489 int 1490 ipsec_req_from_head(ipsec_policy_head_t *ph, ipsec_req_t *req, int af) 1491 { 1492 ipsec_policy_t *p; 1493 1494 /* 1495 * FULL-PERSOCK: consult hash table, too? 1496 */ 1497 for (p = ph->iph_root[IPSEC_INBOUND].ipr_nonhash[af]; 1498 p != NULL; 1499 p = p->ipsp_hash.hash_next) { 1500 if ((p->ipsp_sel->ipsl_key.ipsl_valid & IPSL_WILDCARD) == 0) 1501 return (ipsec_req_from_act(p->ipsp_act, req)); 1502 } 1503 return (sizeof (*req)); 1504 } 1505 1506 /* 1507 * Based on per-socket or latched policy, convert to an appropriate 1508 * IP_SEC_OPT ipsec_req_t for the socket option; return size so we can 1509 * be tail-called from ip. 1510 */ 1511 int 1512 ipsec_req_from_conn(conn_t *connp, ipsec_req_t *req, int af) 1513 { 1514 ipsec_latch_t *ipl; 1515 int rv = sizeof (ipsec_req_t); 1516 1517 bzero(req, sizeof (*req)); 1518 1519 mutex_enter(&connp->conn_lock); 1520 ipl = connp->conn_latch; 1521 1522 /* 1523 * Find appropriate policy. First choice is latched action; 1524 * failing that, see latched policy; failing that, 1525 * look at configured policy. 1526 */ 1527 if (ipl != NULL) { 1528 if (ipl->ipl_in_action != NULL) { 1529 rv = ipsec_req_from_act(ipl->ipl_in_action, req); 1530 goto done; 1531 } 1532 if (ipl->ipl_in_policy != NULL) { 1533 rv = ipsec_req_from_act(ipl->ipl_in_policy->ipsp_act, 1534 req); 1535 goto done; 1536 } 1537 } 1538 if (connp->conn_policy != NULL) 1539 rv = ipsec_req_from_head(connp->conn_policy, req, af); 1540 done: 1541 mutex_exit(&connp->conn_lock); 1542 return (rv); 1543 } 1544 1545 void 1546 ipsec_actvec_free(ipsec_act_t *act, uint_t nact) 1547 { 1548 kmem_free(act, nact * sizeof (*act)); 1549 } 1550 1551 /* 1552 * When outbound policy is not cached, look it up the hard way and attach 1553 * an ipsec_out_t to the packet.. 1554 */ 1555 static mblk_t * 1556 ipsec_attach_global_policy(mblk_t **mp, conn_t *connp, ipsec_selector_t *sel, 1557 netstack_t *ns) 1558 { 1559 ipsec_policy_t *p; 1560 1561 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, sel, ns); 1562 1563 if (p == NULL) 1564 return (NULL); 1565 return (ipsec_attach_ipsec_out(mp, connp, p, sel->ips_protocol, ns)); 1566 } 1567 1568 /* 1569 * We have an ipsec_out already, but don't have cached policy; fill it in 1570 * with the right actions. 1571 */ 1572 static mblk_t * 1573 ipsec_apply_global_policy(mblk_t *ipsec_mp, conn_t *connp, 1574 ipsec_selector_t *sel, netstack_t *ns) 1575 { 1576 ipsec_out_t *io; 1577 ipsec_policy_t *p; 1578 1579 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 1580 ASSERT(ipsec_mp->b_cont->b_datap->db_type == M_DATA); 1581 1582 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1583 1584 if (io->ipsec_out_policy == NULL) { 1585 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, io, sel, ns); 1586 io->ipsec_out_policy = p; 1587 } 1588 return (ipsec_mp); 1589 } 1590 1591 1592 /* 1593 * Consumes a reference to ipsp. 1594 */ 1595 static mblk_t * 1596 ipsec_check_loopback_policy(mblk_t *first_mp, boolean_t mctl_present, 1597 ipsec_policy_t *ipsp) 1598 { 1599 mblk_t *ipsec_mp; 1600 ipsec_in_t *ii; 1601 netstack_t *ns; 1602 1603 if (!mctl_present) 1604 return (first_mp); 1605 1606 ipsec_mp = first_mp; 1607 1608 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1609 ns = ii->ipsec_in_ns; 1610 ASSERT(ii->ipsec_in_loopback); 1611 IPPOL_REFRELE(ipsp, ns); 1612 1613 /* 1614 * We should do an actual policy check here. Revisit this 1615 * when we revisit the IPsec API. (And pass a conn_t in when we 1616 * get there.) 1617 */ 1618 1619 return (first_mp); 1620 } 1621 1622 /* 1623 * Check that packet's inbound ports & proto match the selectors 1624 * expected by the SAs it traversed on the way in. 1625 */ 1626 static boolean_t 1627 ipsec_check_ipsecin_unique(ipsec_in_t *ii, const char **reason, 1628 kstat_named_t **counter, uint64_t pkt_unique) 1629 { 1630 uint64_t ah_mask, esp_mask; 1631 ipsa_t *ah_assoc; 1632 ipsa_t *esp_assoc; 1633 netstack_t *ns = ii->ipsec_in_ns; 1634 ipsec_stack_t *ipss = ns->netstack_ipsec; 1635 1636 ASSERT(ii->ipsec_in_secure); 1637 ASSERT(!ii->ipsec_in_loopback); 1638 1639 ah_assoc = ii->ipsec_in_ah_sa; 1640 esp_assoc = ii->ipsec_in_esp_sa; 1641 ASSERT((ah_assoc != NULL) || (esp_assoc != NULL)); 1642 1643 ah_mask = (ah_assoc != NULL) ? ah_assoc->ipsa_unique_mask : 0; 1644 esp_mask = (esp_assoc != NULL) ? esp_assoc->ipsa_unique_mask : 0; 1645 1646 if ((ah_mask == 0) && (esp_mask == 0)) 1647 return (B_TRUE); 1648 1649 /* 1650 * The pkt_unique check will also check for tunnel mode on the SA 1651 * vs. the tunneled_packet boolean. "Be liberal in what you receive" 1652 * should not apply in this case. ;) 1653 */ 1654 1655 if (ah_mask != 0 && 1656 ah_assoc->ipsa_unique_id != (pkt_unique & ah_mask)) { 1657 *reason = "AH inner header mismatch"; 1658 *counter = DROPPER(ipss, ipds_spd_ah_innermismatch); 1659 return (B_FALSE); 1660 } 1661 if (esp_mask != 0 && 1662 esp_assoc->ipsa_unique_id != (pkt_unique & esp_mask)) { 1663 *reason = "ESP inner header mismatch"; 1664 *counter = DROPPER(ipss, ipds_spd_esp_innermismatch); 1665 return (B_FALSE); 1666 } 1667 return (B_TRUE); 1668 } 1669 1670 static boolean_t 1671 ipsec_check_ipsecin_action(ipsec_in_t *ii, mblk_t *mp, ipsec_action_t *ap, 1672 ipha_t *ipha, ip6_t *ip6h, const char **reason, kstat_named_t **counter) 1673 { 1674 boolean_t ret = B_TRUE; 1675 ipsec_prot_t *ipp; 1676 ipsa_t *ah_assoc; 1677 ipsa_t *esp_assoc; 1678 boolean_t decaps; 1679 netstack_t *ns = ii->ipsec_in_ns; 1680 ipsec_stack_t *ipss = ns->netstack_ipsec; 1681 1682 ASSERT((ipha == NULL && ip6h != NULL) || 1683 (ip6h == NULL && ipha != NULL)); 1684 1685 if (ii->ipsec_in_loopback) { 1686 /* 1687 * Besides accepting pointer-equivalent actions, we also 1688 * accept any ICMP errors we generated for ourselves, 1689 * regardless of policy. If we do not wish to make this 1690 * assumption in the future, check here, and where 1691 * icmp_loopback is initialized in ip.c and ip6.c. (Look for 1692 * ipsec_out_icmp_loopback.) 1693 */ 1694 if (ap == ii->ipsec_in_action || ii->ipsec_in_icmp_loopback) 1695 return (B_TRUE); 1696 1697 /* Deep compare necessary here?? */ 1698 *counter = DROPPER(ipss, ipds_spd_loopback_mismatch); 1699 *reason = "loopback policy mismatch"; 1700 return (B_FALSE); 1701 } 1702 ASSERT(!ii->ipsec_in_icmp_loopback); 1703 1704 ah_assoc = ii->ipsec_in_ah_sa; 1705 esp_assoc = ii->ipsec_in_esp_sa; 1706 1707 decaps = ii->ipsec_in_decaps; 1708 1709 switch (ap->ipa_act.ipa_type) { 1710 case IPSEC_ACT_DISCARD: 1711 case IPSEC_ACT_REJECT: 1712 /* Should "fail hard" */ 1713 *counter = DROPPER(ipss, ipds_spd_explicit); 1714 *reason = "blocked by policy"; 1715 return (B_FALSE); 1716 1717 case IPSEC_ACT_BYPASS: 1718 case IPSEC_ACT_CLEAR: 1719 *counter = DROPPER(ipss, ipds_spd_got_secure); 1720 *reason = "expected clear, got protected"; 1721 return (B_FALSE); 1722 1723 case IPSEC_ACT_APPLY: 1724 ipp = &ap->ipa_act.ipa_apply; 1725 /* 1726 * As of now we do the simple checks of whether 1727 * the datagram has gone through the required IPSEC 1728 * protocol constraints or not. We might have more 1729 * in the future like sensitive levels, key bits, etc. 1730 * If it fails the constraints, check whether we would 1731 * have accepted this if it had come in clear. 1732 */ 1733 if (ipp->ipp_use_ah) { 1734 if (ah_assoc == NULL) { 1735 ret = ipsec_inbound_accept_clear(mp, ipha, 1736 ip6h); 1737 *counter = DROPPER(ipss, ipds_spd_got_clear); 1738 *reason = "unprotected not accepted"; 1739 break; 1740 } 1741 ASSERT(ah_assoc != NULL); 1742 ASSERT(ipp->ipp_auth_alg != 0); 1743 1744 if (ah_assoc->ipsa_auth_alg != 1745 ipp->ipp_auth_alg) { 1746 *counter = DROPPER(ipss, ipds_spd_bad_ahalg); 1747 *reason = "unacceptable ah alg"; 1748 ret = B_FALSE; 1749 break; 1750 } 1751 } else if (ah_assoc != NULL) { 1752 /* 1753 * Don't allow this. Check IPSEC NOTE above 1754 * ip_fanout_proto(). 1755 */ 1756 *counter = DROPPER(ipss, ipds_spd_got_ah); 1757 *reason = "unexpected AH"; 1758 ret = B_FALSE; 1759 break; 1760 } 1761 if (ipp->ipp_use_esp) { 1762 if (esp_assoc == NULL) { 1763 ret = ipsec_inbound_accept_clear(mp, ipha, 1764 ip6h); 1765 *counter = DROPPER(ipss, ipds_spd_got_clear); 1766 *reason = "unprotected not accepted"; 1767 break; 1768 } 1769 ASSERT(esp_assoc != NULL); 1770 ASSERT(ipp->ipp_encr_alg != 0); 1771 1772 if (esp_assoc->ipsa_encr_alg != 1773 ipp->ipp_encr_alg) { 1774 *counter = DROPPER(ipss, ipds_spd_bad_espealg); 1775 *reason = "unacceptable esp alg"; 1776 ret = B_FALSE; 1777 break; 1778 } 1779 /* 1780 * If the client does not need authentication, 1781 * we don't verify the alogrithm. 1782 */ 1783 if (ipp->ipp_use_espa) { 1784 if (esp_assoc->ipsa_auth_alg != 1785 ipp->ipp_esp_auth_alg) { 1786 *counter = DROPPER(ipss, 1787 ipds_spd_bad_espaalg); 1788 *reason = "unacceptable esp auth alg"; 1789 ret = B_FALSE; 1790 break; 1791 } 1792 } 1793 } else if (esp_assoc != NULL) { 1794 /* 1795 * Don't allow this. Check IPSEC NOTE above 1796 * ip_fanout_proto(). 1797 */ 1798 *counter = DROPPER(ipss, ipds_spd_got_esp); 1799 *reason = "unexpected ESP"; 1800 ret = B_FALSE; 1801 break; 1802 } 1803 if (ipp->ipp_use_se) { 1804 if (!decaps) { 1805 ret = ipsec_inbound_accept_clear(mp, ipha, 1806 ip6h); 1807 if (!ret) { 1808 /* XXX mutant? */ 1809 *counter = DROPPER(ipss, 1810 ipds_spd_bad_selfencap); 1811 *reason = "self encap not found"; 1812 break; 1813 } 1814 } 1815 } else if (decaps) { 1816 /* 1817 * XXX If the packet comes in tunneled and the 1818 * recipient does not expect it to be tunneled, it 1819 * is okay. But we drop to be consistent with the 1820 * other cases. 1821 */ 1822 *counter = DROPPER(ipss, ipds_spd_got_selfencap); 1823 *reason = "unexpected self encap"; 1824 ret = B_FALSE; 1825 break; 1826 } 1827 if (ii->ipsec_in_action != NULL) { 1828 /* 1829 * This can happen if we do a double policy-check on 1830 * a packet 1831 * XXX XXX should fix this case! 1832 */ 1833 IPACT_REFRELE(ii->ipsec_in_action); 1834 } 1835 ASSERT(ii->ipsec_in_action == NULL); 1836 IPACT_REFHOLD(ap); 1837 ii->ipsec_in_action = ap; 1838 break; /* from switch */ 1839 } 1840 return (ret); 1841 } 1842 1843 static boolean_t 1844 spd_match_inbound_ids(ipsec_latch_t *ipl, ipsa_t *sa) 1845 { 1846 ASSERT(ipl->ipl_ids_latched == B_TRUE); 1847 return ipsid_equal(ipl->ipl_remote_cid, sa->ipsa_src_cid) && 1848 ipsid_equal(ipl->ipl_local_cid, sa->ipsa_dst_cid); 1849 } 1850 1851 /* 1852 * Takes a latched conn and an inbound packet and returns a unique_id suitable 1853 * for SA comparisons. Most of the time we will copy from the conn_t, but 1854 * there are cases when the conn_t is latched but it has wildcard selectors, 1855 * and then we need to fallback to scooping them out of the packet. 1856 * 1857 * Assume we'll never have 0 with a conn_t present, so use 0 as a failure. We 1858 * can get away with this because we only have non-zero ports/proto for 1859 * latched conn_ts. 1860 * 1861 * Ideal candidate for an "inline" keyword, as we're JUST convoluted enough 1862 * to not be a nice macro. 1863 */ 1864 static uint64_t 1865 conn_to_unique(conn_t *connp, mblk_t *data_mp, ipha_t *ipha, ip6_t *ip6h) 1866 { 1867 ipsec_selector_t sel; 1868 uint8_t ulp = connp->conn_ulp; 1869 1870 ASSERT(connp->conn_latch->ipl_in_policy != NULL); 1871 1872 if ((ulp == IPPROTO_TCP || ulp == IPPROTO_UDP || ulp == IPPROTO_SCTP) && 1873 (connp->conn_fport == 0 || connp->conn_lport == 0)) { 1874 /* Slow path - we gotta grab from the packet. */ 1875 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, 1876 SEL_NONE) != SELRET_SUCCESS) { 1877 /* Failure -> have caller free packet with ENOMEM. */ 1878 return (0); 1879 } 1880 return (SA_UNIQUE_ID(sel.ips_remote_port, sel.ips_local_port, 1881 sel.ips_protocol, 0)); 1882 } 1883 1884 #ifdef DEBUG_NOT_UNTIL_6478464 1885 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, SEL_NONE) == 1886 SELRET_SUCCESS) { 1887 ASSERT(sel.ips_local_port == connp->conn_lport); 1888 ASSERT(sel.ips_remote_port == connp->conn_fport); 1889 ASSERT(sel.ips_protocol == connp->conn_ulp); 1890 } 1891 ASSERT(connp->conn_ulp != 0); 1892 #endif 1893 1894 return (SA_UNIQUE_ID(connp->conn_fport, connp->conn_lport, ulp, 0)); 1895 } 1896 1897 /* 1898 * Called to check policy on a latched connection, both from this file 1899 * and from tcp.c 1900 */ 1901 boolean_t 1902 ipsec_check_ipsecin_latch(ipsec_in_t *ii, mblk_t *mp, ipsec_latch_t *ipl, 1903 ipha_t *ipha, ip6_t *ip6h, const char **reason, kstat_named_t **counter, 1904 conn_t *connp) 1905 { 1906 netstack_t *ns = ii->ipsec_in_ns; 1907 ipsec_stack_t *ipss = ns->netstack_ipsec; 1908 1909 ASSERT(ipl->ipl_ids_latched == B_TRUE); 1910 1911 if (!ii->ipsec_in_loopback) { 1912 /* 1913 * Over loopback, there aren't real security associations, 1914 * so there are neither identities nor "unique" values 1915 * for us to check the packet against. 1916 */ 1917 if ((ii->ipsec_in_ah_sa != NULL) && 1918 (!spd_match_inbound_ids(ipl, ii->ipsec_in_ah_sa))) { 1919 *counter = DROPPER(ipss, ipds_spd_ah_badid); 1920 *reason = "AH identity mismatch"; 1921 return (B_FALSE); 1922 } 1923 1924 if ((ii->ipsec_in_esp_sa != NULL) && 1925 (!spd_match_inbound_ids(ipl, ii->ipsec_in_esp_sa))) { 1926 *counter = DROPPER(ipss, ipds_spd_esp_badid); 1927 *reason = "ESP identity mismatch"; 1928 return (B_FALSE); 1929 } 1930 1931 /* 1932 * Can fudge pkt_unique from connp because we're latched. 1933 * In DEBUG kernels (see conn_to_unique()'s implementation), 1934 * verify this even if it REALLY slows things down. 1935 */ 1936 if (!ipsec_check_ipsecin_unique(ii, reason, counter, 1937 conn_to_unique(connp, mp, ipha, ip6h))) { 1938 return (B_FALSE); 1939 } 1940 } 1941 1942 return (ipsec_check_ipsecin_action(ii, mp, ipl->ipl_in_action, 1943 ipha, ip6h, reason, counter)); 1944 } 1945 1946 /* 1947 * Check to see whether this secured datagram meets the policy 1948 * constraints specified in ipsp. 1949 * 1950 * Called from ipsec_check_global_policy, and ipsec_check_inbound_policy. 1951 * 1952 * Consumes a reference to ipsp. 1953 */ 1954 static mblk_t * 1955 ipsec_check_ipsecin_policy(mblk_t *first_mp, ipsec_policy_t *ipsp, 1956 ipha_t *ipha, ip6_t *ip6h, uint64_t pkt_unique, netstack_t *ns) 1957 { 1958 ipsec_in_t *ii; 1959 ipsec_action_t *ap; 1960 const char *reason = "no policy actions found"; 1961 mblk_t *data_mp, *ipsec_mp; 1962 ipsec_stack_t *ipss = ns->netstack_ipsec; 1963 ip_stack_t *ipst = ns->netstack_ip; 1964 kstat_named_t *counter; 1965 1966 counter = DROPPER(ipss, ipds_spd_got_secure); 1967 1968 data_mp = first_mp->b_cont; 1969 ipsec_mp = first_mp; 1970 1971 ASSERT(ipsp != NULL); 1972 1973 ASSERT((ipha == NULL && ip6h != NULL) || 1974 (ip6h == NULL && ipha != NULL)); 1975 1976 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1977 1978 if (ii->ipsec_in_loopback) 1979 return (ipsec_check_loopback_policy(first_mp, B_TRUE, ipsp)); 1980 ASSERT(ii->ipsec_in_type == IPSEC_IN); 1981 ASSERT(ii->ipsec_in_secure); 1982 1983 if (ii->ipsec_in_action != NULL) { 1984 /* 1985 * this can happen if we do a double policy-check on a packet 1986 * Would be nice to be able to delete this test.. 1987 */ 1988 IPACT_REFRELE(ii->ipsec_in_action); 1989 } 1990 ASSERT(ii->ipsec_in_action == NULL); 1991 1992 if (!SA_IDS_MATCH(ii->ipsec_in_ah_sa, ii->ipsec_in_esp_sa)) { 1993 reason = "inbound AH and ESP identities differ"; 1994 counter = DROPPER(ipss, ipds_spd_ahesp_diffid); 1995 goto drop; 1996 } 1997 1998 if (!ipsec_check_ipsecin_unique(ii, &reason, &counter, pkt_unique)) 1999 goto drop; 2000 2001 /* 2002 * Ok, now loop through the possible actions and see if any 2003 * of them work for us. 2004 */ 2005 2006 for (ap = ipsp->ipsp_act; ap != NULL; ap = ap->ipa_next) { 2007 if (ipsec_check_ipsecin_action(ii, data_mp, ap, 2008 ipha, ip6h, &reason, &counter)) { 2009 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2010 IPPOL_REFRELE(ipsp, ns); 2011 return (first_mp); 2012 } 2013 } 2014 drop: 2015 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, SL_ERROR|SL_WARN|SL_CONSOLE, 2016 "ipsec inbound policy mismatch: %s, packet dropped\n", 2017 reason); 2018 IPPOL_REFRELE(ipsp, ns); 2019 ASSERT(ii->ipsec_in_action == NULL); 2020 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2021 ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, 2022 &ipss->ipsec_spd_dropper); 2023 return (NULL); 2024 } 2025 2026 /* 2027 * sleazy prefix-length-based compare. 2028 * another inlining candidate.. 2029 */ 2030 boolean_t 2031 ip_addr_match(uint8_t *addr1, int pfxlen, in6_addr_t *addr2p) 2032 { 2033 int offset = pfxlen>>3; 2034 int bitsleft = pfxlen & 7; 2035 uint8_t *addr2 = (uint8_t *)addr2p; 2036 2037 /* 2038 * and there was much evil.. 2039 * XXX should inline-expand the bcmp here and do this 32 bits 2040 * or 64 bits at a time.. 2041 */ 2042 return ((bcmp(addr1, addr2, offset) == 0) && 2043 ((bitsleft == 0) || 2044 (((addr1[offset] ^ addr2[offset]) & (0xff<<(8-bitsleft))) == 0))); 2045 } 2046 2047 static ipsec_policy_t * 2048 ipsec_find_policy_chain(ipsec_policy_t *best, ipsec_policy_t *chain, 2049 ipsec_selector_t *sel, boolean_t is_icmp_inv_acq) 2050 { 2051 ipsec_selkey_t *isel; 2052 ipsec_policy_t *p; 2053 int bpri = best ? best->ipsp_prio : 0; 2054 2055 for (p = chain; p != NULL; p = p->ipsp_hash.hash_next) { 2056 uint32_t valid; 2057 2058 if (p->ipsp_prio <= bpri) 2059 continue; 2060 isel = &p->ipsp_sel->ipsl_key; 2061 valid = isel->ipsl_valid; 2062 2063 if ((valid & IPSL_PROTOCOL) && 2064 (isel->ipsl_proto != sel->ips_protocol)) 2065 continue; 2066 2067 if ((valid & IPSL_REMOTE_ADDR) && 2068 !ip_addr_match((uint8_t *)&isel->ipsl_remote, 2069 isel->ipsl_remote_pfxlen, &sel->ips_remote_addr_v6)) 2070 continue; 2071 2072 if ((valid & IPSL_LOCAL_ADDR) && 2073 !ip_addr_match((uint8_t *)&isel->ipsl_local, 2074 isel->ipsl_local_pfxlen, &sel->ips_local_addr_v6)) 2075 continue; 2076 2077 if ((valid & IPSL_REMOTE_PORT) && 2078 isel->ipsl_rport != sel->ips_remote_port) 2079 continue; 2080 2081 if ((valid & IPSL_LOCAL_PORT) && 2082 isel->ipsl_lport != sel->ips_local_port) 2083 continue; 2084 2085 if (!is_icmp_inv_acq) { 2086 if ((valid & IPSL_ICMP_TYPE) && 2087 (isel->ipsl_icmp_type > sel->ips_icmp_type || 2088 isel->ipsl_icmp_type_end < sel->ips_icmp_type)) { 2089 continue; 2090 } 2091 2092 if ((valid & IPSL_ICMP_CODE) && 2093 (isel->ipsl_icmp_code > sel->ips_icmp_code || 2094 isel->ipsl_icmp_code_end < 2095 sel->ips_icmp_code)) { 2096 continue; 2097 } 2098 } else { 2099 /* 2100 * special case for icmp inverse acquire 2101 * we only want policies that aren't drop/pass 2102 */ 2103 if (p->ipsp_act->ipa_act.ipa_type != IPSEC_ACT_APPLY) 2104 continue; 2105 } 2106 2107 /* we matched all the packet-port-field selectors! */ 2108 best = p; 2109 bpri = p->ipsp_prio; 2110 } 2111 2112 return (best); 2113 } 2114 2115 /* 2116 * Try to find and return the best policy entry under a given policy 2117 * root for a given set of selectors; the first parameter "best" is 2118 * the current best policy so far. If "best" is non-null, we have a 2119 * reference to it. We return a reference to a policy; if that policy 2120 * is not the original "best", we need to release that reference 2121 * before returning. 2122 */ 2123 ipsec_policy_t * 2124 ipsec_find_policy_head(ipsec_policy_t *best, ipsec_policy_head_t *head, 2125 int direction, ipsec_selector_t *sel, netstack_t *ns) 2126 { 2127 ipsec_policy_t *curbest; 2128 ipsec_policy_root_t *root; 2129 uint8_t is_icmp_inv_acq = sel->ips_is_icmp_inv_acq; 2130 int af = sel->ips_isv4 ? IPSEC_AF_V4 : IPSEC_AF_V6; 2131 2132 curbest = best; 2133 root = &head->iph_root[direction]; 2134 2135 #ifdef DEBUG 2136 if (is_icmp_inv_acq) { 2137 if (sel->ips_isv4) { 2138 if (sel->ips_protocol != IPPROTO_ICMP) { 2139 cmn_err(CE_WARN, "ipsec_find_policy_head:" 2140 " expecting icmp, got %d", 2141 sel->ips_protocol); 2142 } 2143 } else { 2144 if (sel->ips_protocol != IPPROTO_ICMPV6) { 2145 cmn_err(CE_WARN, "ipsec_find_policy_head:" 2146 " expecting icmpv6, got %d", 2147 sel->ips_protocol); 2148 } 2149 } 2150 } 2151 #endif 2152 2153 rw_enter(&head->iph_lock, RW_READER); 2154 2155 if (root->ipr_nchains > 0) { 2156 curbest = ipsec_find_policy_chain(curbest, 2157 root->ipr_hash[selector_hash(sel, root)].hash_head, sel, 2158 is_icmp_inv_acq); 2159 } 2160 curbest = ipsec_find_policy_chain(curbest, root->ipr_nonhash[af], sel, 2161 is_icmp_inv_acq); 2162 2163 /* 2164 * Adjust reference counts if we found anything new. 2165 */ 2166 if (curbest != best) { 2167 ASSERT(curbest != NULL); 2168 IPPOL_REFHOLD(curbest); 2169 2170 if (best != NULL) { 2171 IPPOL_REFRELE(best, ns); 2172 } 2173 } 2174 2175 rw_exit(&head->iph_lock); 2176 2177 return (curbest); 2178 } 2179 2180 /* 2181 * Find the best system policy (either global or per-interface) which 2182 * applies to the given selector; look in all the relevant policy roots 2183 * to figure out which policy wins. 2184 * 2185 * Returns a reference to a policy; caller must release this 2186 * reference when done. 2187 */ 2188 ipsec_policy_t * 2189 ipsec_find_policy(int direction, conn_t *connp, ipsec_out_t *io, 2190 ipsec_selector_t *sel, netstack_t *ns) 2191 { 2192 ipsec_policy_t *p; 2193 ipsec_stack_t *ipss = ns->netstack_ipsec; 2194 2195 p = ipsec_find_policy_head(NULL, &ipss->ipsec_system_policy, 2196 direction, sel, ns); 2197 if ((connp != NULL) && (connp->conn_policy != NULL)) { 2198 p = ipsec_find_policy_head(p, connp->conn_policy, 2199 direction, sel, ns); 2200 } else if ((io != NULL) && (io->ipsec_out_polhead != NULL)) { 2201 p = ipsec_find_policy_head(p, io->ipsec_out_polhead, 2202 direction, sel, ns); 2203 } 2204 2205 return (p); 2206 } 2207 2208 /* 2209 * Check with global policy and see whether this inbound 2210 * packet meets the policy constraints. 2211 * 2212 * Locate appropriate policy from global policy, supplemented by the 2213 * conn's configured and/or cached policy if the conn is supplied. 2214 * 2215 * Dispatch to ipsec_check_ipsecin_policy if we have policy and an 2216 * encrypted packet to see if they match. 2217 * 2218 * Otherwise, see if the policy allows cleartext; if not, drop it on the 2219 * floor. 2220 */ 2221 mblk_t * 2222 ipsec_check_global_policy(mblk_t *first_mp, conn_t *connp, 2223 ipha_t *ipha, ip6_t *ip6h, boolean_t mctl_present, netstack_t *ns) 2224 { 2225 ipsec_policy_t *p; 2226 ipsec_selector_t sel; 2227 mblk_t *data_mp, *ipsec_mp; 2228 boolean_t policy_present; 2229 kstat_named_t *counter; 2230 ipsec_in_t *ii = NULL; 2231 uint64_t pkt_unique; 2232 ipsec_stack_t *ipss = ns->netstack_ipsec; 2233 ip_stack_t *ipst = ns->netstack_ip; 2234 2235 data_mp = mctl_present ? first_mp->b_cont : first_mp; 2236 ipsec_mp = mctl_present ? first_mp : NULL; 2237 2238 sel.ips_is_icmp_inv_acq = 0; 2239 2240 ASSERT((ipha == NULL && ip6h != NULL) || 2241 (ip6h == NULL && ipha != NULL)); 2242 2243 if (ipha != NULL) 2244 policy_present = ipss->ipsec_inbound_v4_policy_present; 2245 else 2246 policy_present = ipss->ipsec_inbound_v6_policy_present; 2247 2248 if (!policy_present && connp == NULL) { 2249 /* 2250 * No global policy and no per-socket policy; 2251 * just pass it back (but we shouldn't get here in that case) 2252 */ 2253 return (first_mp); 2254 } 2255 2256 if (ipsec_mp != NULL) { 2257 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 2258 ii = (ipsec_in_t *)(ipsec_mp->b_rptr); 2259 ASSERT(ii->ipsec_in_type == IPSEC_IN); 2260 } 2261 2262 /* 2263 * If we have cached policy, use it. 2264 * Otherwise consult system policy. 2265 */ 2266 if ((connp != NULL) && (connp->conn_latch != NULL)) { 2267 p = connp->conn_latch->ipl_in_policy; 2268 if (p != NULL) { 2269 IPPOL_REFHOLD(p); 2270 } 2271 /* 2272 * Fudge sel for UNIQUE_ID setting below. 2273 */ 2274 pkt_unique = conn_to_unique(connp, data_mp, ipha, ip6h); 2275 } else { 2276 /* Initialize the ports in the selector */ 2277 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, 2278 SEL_NONE) == SELRET_NOMEM) { 2279 /* 2280 * Technically not a policy mismatch, but it is 2281 * an internal failure. 2282 */ 2283 ipsec_log_policy_failure(IPSEC_POLICY_MISMATCH, 2284 "ipsec_init_inbound_sel", ipha, ip6h, B_FALSE, ns); 2285 counter = DROPPER(ipss, ipds_spd_nomem); 2286 goto fail; 2287 } 2288 2289 /* 2290 * Find the policy which best applies. 2291 * 2292 * If we find global policy, we should look at both 2293 * local policy and global policy and see which is 2294 * stronger and match accordingly. 2295 * 2296 * If we don't find a global policy, check with 2297 * local policy alone. 2298 */ 2299 2300 p = ipsec_find_policy(IPSEC_TYPE_INBOUND, connp, NULL, &sel, 2301 ns); 2302 pkt_unique = SA_UNIQUE_ID(sel.ips_remote_port, 2303 sel.ips_local_port, sel.ips_protocol, 0); 2304 } 2305 2306 if (p == NULL) { 2307 if (ipsec_mp == NULL) { 2308 /* 2309 * We have no policy; default to succeeding. 2310 * XXX paranoid system design doesn't do this. 2311 */ 2312 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2313 return (first_mp); 2314 } else { 2315 counter = DROPPER(ipss, ipds_spd_got_secure); 2316 ipsec_log_policy_failure(IPSEC_POLICY_NOT_NEEDED, 2317 "ipsec_check_global_policy", ipha, ip6h, B_TRUE, 2318 ns); 2319 goto fail; 2320 } 2321 } 2322 if ((ii != NULL) && (ii->ipsec_in_secure)) { 2323 return (ipsec_check_ipsecin_policy(ipsec_mp, p, ipha, ip6h, 2324 pkt_unique, ns)); 2325 } 2326 if (p->ipsp_act->ipa_allow_clear) { 2327 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2328 IPPOL_REFRELE(p, ns); 2329 return (first_mp); 2330 } 2331 IPPOL_REFRELE(p, ns); 2332 /* 2333 * If we reach here, we will drop the packet because it failed the 2334 * global policy check because the packet was cleartext, and it 2335 * should not have been. 2336 */ 2337 ipsec_log_policy_failure(IPSEC_POLICY_MISMATCH, 2338 "ipsec_check_global_policy", ipha, ip6h, B_FALSE, ns); 2339 counter = DROPPER(ipss, ipds_spd_got_clear); 2340 2341 fail: 2342 ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, 2343 &ipss->ipsec_spd_dropper); 2344 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2345 return (NULL); 2346 } 2347 2348 /* 2349 * We check whether an inbound datagram is a valid one 2350 * to accept in clear. If it is secure, it is the job 2351 * of IPSEC to log information appropriately if it 2352 * suspects that it may not be the real one. 2353 * 2354 * It is called only while fanning out to the ULP 2355 * where ULP accepts only secure data and the incoming 2356 * is clear. Usually we never accept clear datagrams in 2357 * such cases. ICMP is the only exception. 2358 * 2359 * NOTE : We don't call this function if the client (ULP) 2360 * is willing to accept things in clear. 2361 */ 2362 boolean_t 2363 ipsec_inbound_accept_clear(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h) 2364 { 2365 ushort_t iph_hdr_length; 2366 icmph_t *icmph; 2367 icmp6_t *icmp6; 2368 uint8_t *nexthdrp; 2369 2370 ASSERT((ipha != NULL && ip6h == NULL) || 2371 (ipha == NULL && ip6h != NULL)); 2372 2373 if (ip6h != NULL) { 2374 iph_hdr_length = ip_hdr_length_v6(mp, ip6h); 2375 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, 2376 &nexthdrp)) { 2377 return (B_FALSE); 2378 } 2379 if (*nexthdrp != IPPROTO_ICMPV6) 2380 return (B_FALSE); 2381 icmp6 = (icmp6_t *)(&mp->b_rptr[iph_hdr_length]); 2382 /* Match IPv6 ICMP policy as closely as IPv4 as possible. */ 2383 switch (icmp6->icmp6_type) { 2384 case ICMP6_PARAM_PROB: 2385 /* Corresponds to port/proto unreach in IPv4. */ 2386 case ICMP6_ECHO_REQUEST: 2387 /* Just like IPv4. */ 2388 return (B_FALSE); 2389 2390 case MLD_LISTENER_QUERY: 2391 case MLD_LISTENER_REPORT: 2392 case MLD_LISTENER_REDUCTION: 2393 /* 2394 * XXX Seperate NDD in IPv4 what about here? 2395 * Plus, mcast is important to ND. 2396 */ 2397 case ICMP6_DST_UNREACH: 2398 /* Corresponds to HOST/NET unreachable in IPv4. */ 2399 case ICMP6_PACKET_TOO_BIG: 2400 case ICMP6_ECHO_REPLY: 2401 /* These are trusted in IPv4. */ 2402 case ND_ROUTER_SOLICIT: 2403 case ND_ROUTER_ADVERT: 2404 case ND_NEIGHBOR_SOLICIT: 2405 case ND_NEIGHBOR_ADVERT: 2406 case ND_REDIRECT: 2407 /* Trust ND messages for now. */ 2408 case ICMP6_TIME_EXCEEDED: 2409 default: 2410 return (B_TRUE); 2411 } 2412 } else { 2413 /* 2414 * If it is not ICMP, fail this request. 2415 */ 2416 if (ipha->ipha_protocol != IPPROTO_ICMP) { 2417 #ifdef FRAGCACHE_DEBUG 2418 cmn_err(CE_WARN, "Dropping - ipha_proto = %d\n", 2419 ipha->ipha_protocol); 2420 #endif 2421 return (B_FALSE); 2422 } 2423 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2424 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 2425 /* 2426 * It is an insecure icmp message. Check to see whether we are 2427 * willing to accept this one. 2428 */ 2429 2430 switch (icmph->icmph_type) { 2431 case ICMP_ECHO_REPLY: 2432 case ICMP_TIME_STAMP_REPLY: 2433 case ICMP_INFO_REPLY: 2434 case ICMP_ROUTER_ADVERTISEMENT: 2435 /* 2436 * We should not encourage clear replies if this 2437 * client expects secure. If somebody is replying 2438 * in clear some mailicious user watching both the 2439 * request and reply, can do chosen-plain-text attacks. 2440 * With global policy we might be just expecting secure 2441 * but sending out clear. We don't know what the right 2442 * thing is. We can't do much here as we can't control 2443 * the sender here. Till we are sure of what to do, 2444 * accept them. 2445 */ 2446 return (B_TRUE); 2447 case ICMP_ECHO_REQUEST: 2448 case ICMP_TIME_STAMP_REQUEST: 2449 case ICMP_INFO_REQUEST: 2450 case ICMP_ADDRESS_MASK_REQUEST: 2451 case ICMP_ROUTER_SOLICITATION: 2452 case ICMP_ADDRESS_MASK_REPLY: 2453 /* 2454 * Don't accept this as somebody could be sending 2455 * us plain text to get encrypted data. If we reply, 2456 * it will lead to chosen plain text attack. 2457 */ 2458 return (B_FALSE); 2459 case ICMP_DEST_UNREACHABLE: 2460 switch (icmph->icmph_code) { 2461 case ICMP_FRAGMENTATION_NEEDED: 2462 /* 2463 * Be in sync with icmp_inbound, where we have 2464 * already set ire_max_frag. 2465 */ 2466 #ifdef FRAGCACHE_DEBUG 2467 cmn_err(CE_WARN, "ICMP frag needed\n"); 2468 #endif 2469 return (B_TRUE); 2470 case ICMP_HOST_UNREACHABLE: 2471 case ICMP_NET_UNREACHABLE: 2472 /* 2473 * By accepting, we could reset a connection. 2474 * How do we solve the problem of some 2475 * intermediate router sending in-secure ICMP 2476 * messages ? 2477 */ 2478 return (B_TRUE); 2479 case ICMP_PORT_UNREACHABLE: 2480 case ICMP_PROTOCOL_UNREACHABLE: 2481 default : 2482 return (B_FALSE); 2483 } 2484 case ICMP_SOURCE_QUENCH: 2485 /* 2486 * If this is an attack, TCP will slow start 2487 * because of this. Is it very harmful ? 2488 */ 2489 return (B_TRUE); 2490 case ICMP_PARAM_PROBLEM: 2491 return (B_FALSE); 2492 case ICMP_TIME_EXCEEDED: 2493 return (B_TRUE); 2494 case ICMP_REDIRECT: 2495 return (B_FALSE); 2496 default : 2497 return (B_FALSE); 2498 } 2499 } 2500 } 2501 2502 void 2503 ipsec_latch_ids(ipsec_latch_t *ipl, ipsid_t *local, ipsid_t *remote) 2504 { 2505 mutex_enter(&ipl->ipl_lock); 2506 2507 if (ipl->ipl_ids_latched) { 2508 /* I lost, someone else got here before me */ 2509 mutex_exit(&ipl->ipl_lock); 2510 return; 2511 } 2512 2513 if (local != NULL) 2514 IPSID_REFHOLD(local); 2515 if (remote != NULL) 2516 IPSID_REFHOLD(remote); 2517 2518 ipl->ipl_local_cid = local; 2519 ipl->ipl_remote_cid = remote; 2520 ipl->ipl_ids_latched = B_TRUE; 2521 mutex_exit(&ipl->ipl_lock); 2522 } 2523 2524 void 2525 ipsec_latch_inbound(ipsec_latch_t *ipl, ipsec_in_t *ii) 2526 { 2527 ipsa_t *sa; 2528 2529 if (!ipl->ipl_ids_latched) { 2530 ipsid_t *local = NULL; 2531 ipsid_t *remote = NULL; 2532 2533 if (!ii->ipsec_in_loopback) { 2534 if (ii->ipsec_in_esp_sa != NULL) 2535 sa = ii->ipsec_in_esp_sa; 2536 else 2537 sa = ii->ipsec_in_ah_sa; 2538 ASSERT(sa != NULL); 2539 local = sa->ipsa_dst_cid; 2540 remote = sa->ipsa_src_cid; 2541 } 2542 ipsec_latch_ids(ipl, local, remote); 2543 } 2544 ipl->ipl_in_action = ii->ipsec_in_action; 2545 IPACT_REFHOLD(ipl->ipl_in_action); 2546 } 2547 2548 /* 2549 * Check whether the policy constraints are met either for an 2550 * inbound datagram; called from IP in numerous places. 2551 * 2552 * Note that this is not a chokepoint for inbound policy checks; 2553 * see also ipsec_check_ipsecin_latch() and ipsec_check_global_policy() 2554 */ 2555 mblk_t * 2556 ipsec_check_inbound_policy(mblk_t *first_mp, conn_t *connp, 2557 ipha_t *ipha, ip6_t *ip6h, boolean_t mctl_present) 2558 { 2559 ipsec_in_t *ii; 2560 boolean_t ret; 2561 mblk_t *mp = mctl_present ? first_mp->b_cont : first_mp; 2562 mblk_t *ipsec_mp = mctl_present ? first_mp : NULL; 2563 ipsec_latch_t *ipl; 2564 uint64_t unique_id; 2565 ipsec_stack_t *ipss; 2566 ip_stack_t *ipst; 2567 netstack_t *ns; 2568 2569 ASSERT(connp != NULL); 2570 ns = connp->conn_netstack; 2571 ipss = ns->netstack_ipsec; 2572 ipst = ns->netstack_ip; 2573 2574 if (ipsec_mp == NULL) { 2575 clear: 2576 /* 2577 * This is the case where the incoming datagram is 2578 * cleartext and we need to see whether this client 2579 * would like to receive such untrustworthy things from 2580 * the wire. 2581 */ 2582 ASSERT(mp != NULL); 2583 2584 mutex_enter(&connp->conn_lock); 2585 if (connp->conn_state_flags & CONN_CONDEMNED) { 2586 mutex_exit(&connp->conn_lock); 2587 ip_drop_packet(first_mp, B_TRUE, NULL, 2588 NULL, DROPPER(ipss, ipds_spd_got_clear), 2589 &ipss->ipsec_spd_dropper); 2590 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2591 return (NULL); 2592 } 2593 if ((ipl = connp->conn_latch) != NULL) { 2594 /* Hold a reference in case the conn is closing */ 2595 IPLATCH_REFHOLD(ipl); 2596 mutex_exit(&connp->conn_lock); 2597 /* 2598 * Policy is cached in the conn. 2599 */ 2600 if ((ipl->ipl_in_policy != NULL) && 2601 (!ipl->ipl_in_policy->ipsp_act->ipa_allow_clear)) { 2602 ret = ipsec_inbound_accept_clear(mp, 2603 ipha, ip6h); 2604 if (ret) { 2605 BUMP_MIB(&ipst->ips_ip_mib, 2606 ipsecInSucceeded); 2607 IPLATCH_REFRELE(ipl, ns); 2608 return (first_mp); 2609 } else { 2610 ipsec_log_policy_failure( 2611 IPSEC_POLICY_MISMATCH, 2612 "ipsec_check_inbound_policy", ipha, 2613 ip6h, B_FALSE, ns); 2614 ip_drop_packet(first_mp, B_TRUE, NULL, 2615 NULL, 2616 DROPPER(ipss, ipds_spd_got_clear), 2617 &ipss->ipsec_spd_dropper); 2618 BUMP_MIB(&ipst->ips_ip_mib, 2619 ipsecInFailed); 2620 IPLATCH_REFRELE(ipl, ns); 2621 return (NULL); 2622 } 2623 } else { 2624 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2625 IPLATCH_REFRELE(ipl, ns); 2626 return (first_mp); 2627 } 2628 } else { 2629 uchar_t db_type; 2630 2631 mutex_exit(&connp->conn_lock); 2632 /* 2633 * As this is a non-hardbound connection we need 2634 * to look at both per-socket policy and global 2635 * policy. As this is cleartext, mark the mp as 2636 * M_DATA in case if it is an ICMP error being 2637 * reported before calling ipsec_check_global_policy 2638 * so that it does not mistake it for IPSEC_IN. 2639 */ 2640 db_type = mp->b_datap->db_type; 2641 mp->b_datap->db_type = M_DATA; 2642 first_mp = ipsec_check_global_policy(first_mp, connp, 2643 ipha, ip6h, mctl_present, ns); 2644 if (first_mp != NULL) 2645 mp->b_datap->db_type = db_type; 2646 return (first_mp); 2647 } 2648 } 2649 /* 2650 * If it is inbound check whether the attached message 2651 * is secure or not. We have a special case for ICMP, 2652 * where we have a IPSEC_IN message and the attached 2653 * message is not secure. See icmp_inbound_error_fanout 2654 * for details. 2655 */ 2656 ASSERT(ipsec_mp != NULL); 2657 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 2658 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 2659 2660 if (!ii->ipsec_in_secure) 2661 goto clear; 2662 2663 /* 2664 * mp->b_cont could be either a M_CTL message 2665 * for icmp errors being sent up or a M_DATA message. 2666 */ 2667 ASSERT(mp->b_datap->db_type == M_CTL || mp->b_datap->db_type == M_DATA); 2668 2669 ASSERT(ii->ipsec_in_type == IPSEC_IN); 2670 2671 mutex_enter(&connp->conn_lock); 2672 /* Connection is closing */ 2673 if (connp->conn_state_flags & CONN_CONDEMNED) { 2674 mutex_exit(&connp->conn_lock); 2675 ip_drop_packet(first_mp, B_TRUE, NULL, 2676 NULL, DROPPER(ipss, ipds_spd_got_clear), 2677 &ipss->ipsec_spd_dropper); 2678 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2679 return (NULL); 2680 } 2681 2682 /* 2683 * Once a connection is latched it remains so for life, the conn_latch 2684 * pointer on the conn has not changed, simply initializing ipl here 2685 * as the earlier initialization was done only in the cleartext case. 2686 */ 2687 if ((ipl = connp->conn_latch) == NULL) { 2688 mutex_exit(&connp->conn_lock); 2689 /* 2690 * We don't have policies cached in the conn 2691 * for this stream. So, look at the global 2692 * policy. It will check against conn or global 2693 * depending on whichever is stronger. 2694 */ 2695 return (ipsec_check_global_policy(first_mp, connp, 2696 ipha, ip6h, mctl_present, ns)); 2697 } 2698 2699 IPLATCH_REFHOLD(ipl); 2700 mutex_exit(&connp->conn_lock); 2701 2702 if (ipl->ipl_in_action != NULL) { 2703 /* Policy is cached & latched; fast(er) path */ 2704 const char *reason; 2705 kstat_named_t *counter; 2706 2707 if (ipsec_check_ipsecin_latch(ii, mp, ipl, 2708 ipha, ip6h, &reason, &counter, connp)) { 2709 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2710 IPLATCH_REFRELE(ipl, ns); 2711 return (first_mp); 2712 } 2713 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, 2714 SL_ERROR|SL_WARN|SL_CONSOLE, 2715 "ipsec inbound policy mismatch: %s, packet dropped\n", 2716 reason); 2717 ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, 2718 &ipss->ipsec_spd_dropper); 2719 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2720 IPLATCH_REFRELE(ipl, ns); 2721 return (NULL); 2722 } else if (ipl->ipl_in_policy == NULL) { 2723 ipsec_weird_null_inbound_policy++; 2724 IPLATCH_REFRELE(ipl, ns); 2725 return (first_mp); 2726 } 2727 2728 unique_id = conn_to_unique(connp, mp, ipha, ip6h); 2729 IPPOL_REFHOLD(ipl->ipl_in_policy); 2730 first_mp = ipsec_check_ipsecin_policy(first_mp, ipl->ipl_in_policy, 2731 ipha, ip6h, unique_id, ns); 2732 /* 2733 * NOTE: ipsecIn{Failed,Succeeeded} bumped by 2734 * ipsec_check_ipsecin_policy(). 2735 */ 2736 if (first_mp != NULL) 2737 ipsec_latch_inbound(ipl, ii); 2738 IPLATCH_REFRELE(ipl, ns); 2739 return (first_mp); 2740 } 2741 2742 /* 2743 * Returns: 2744 * 2745 * SELRET_NOMEM --> msgpullup() needed to gather things failed. 2746 * SELRET_BADPKT --> If we're being called after tunnel-mode fragment 2747 * gathering, the initial fragment is too short for 2748 * useful data. Only returned if SEL_TUNNEL_FIRSTFRAG is 2749 * set. 2750 * SELRET_SUCCESS --> "sel" now has initialized IPsec selector data. 2751 * SELRET_TUNFRAG --> This is a fragment in a tunnel-mode packet. Caller 2752 * should put this packet in a fragment-gathering queue. 2753 * Only returned if SEL_TUNNEL_MODE and SEL_PORT_POLICY 2754 * is set. 2755 */ 2756 static selret_t 2757 ipsec_init_inbound_sel(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha, 2758 ip6_t *ip6h, uint8_t sel_flags) 2759 { 2760 uint16_t *ports; 2761 ushort_t hdr_len; 2762 int outer_hdr_len = 0; /* For ICMP tunnel-mode cases... */ 2763 mblk_t *spare_mp = NULL; 2764 uint8_t *nexthdrp; 2765 uint8_t nexthdr; 2766 uint8_t *typecode; 2767 uint8_t check_proto; 2768 ip6_pkt_t ipp; 2769 boolean_t port_policy_present = (sel_flags & SEL_PORT_POLICY); 2770 boolean_t is_icmp = (sel_flags & SEL_IS_ICMP); 2771 boolean_t tunnel_mode = (sel_flags & SEL_TUNNEL_MODE); 2772 2773 ASSERT((ipha == NULL && ip6h != NULL) || 2774 (ipha != NULL && ip6h == NULL)); 2775 2776 if (ip6h != NULL) { 2777 if (is_icmp) 2778 outer_hdr_len = ((uint8_t *)ip6h) - mp->b_rptr; 2779 2780 check_proto = IPPROTO_ICMPV6; 2781 sel->ips_isv4 = B_FALSE; 2782 sel->ips_local_addr_v6 = ip6h->ip6_dst; 2783 sel->ips_remote_addr_v6 = ip6h->ip6_src; 2784 2785 bzero(&ipp, sizeof (ipp)); 2786 (void) ip_find_hdr_v6(mp, ip6h, &ipp, NULL); 2787 2788 nexthdr = ip6h->ip6_nxt; 2789 switch (nexthdr) { 2790 case IPPROTO_HOPOPTS: 2791 case IPPROTO_ROUTING: 2792 case IPPROTO_DSTOPTS: 2793 case IPPROTO_FRAGMENT: 2794 /* 2795 * Use ip_hdr_length_nexthdr_v6(). And have a spare 2796 * mblk that's contiguous to feed it 2797 */ 2798 if ((spare_mp = msgpullup(mp, -1)) == NULL) 2799 return (SELRET_NOMEM); 2800 if (!ip_hdr_length_nexthdr_v6(spare_mp, 2801 (ip6_t *)(spare_mp->b_rptr + outer_hdr_len), 2802 &hdr_len, &nexthdrp)) { 2803 /* Malformed packet - caller frees. */ 2804 ipsec_freemsg_chain(spare_mp); 2805 return (SELRET_BADPKT); 2806 } 2807 nexthdr = *nexthdrp; 2808 /* We can just extract based on hdr_len now. */ 2809 break; 2810 default: 2811 hdr_len = IPV6_HDR_LEN; 2812 break; 2813 } 2814 2815 if (port_policy_present && IS_V6_FRAGMENT(ipp) && !is_icmp) { 2816 /* IPv6 Fragment */ 2817 ipsec_freemsg_chain(spare_mp); 2818 return (SELRET_TUNFRAG); 2819 } 2820 } else { 2821 if (is_icmp) 2822 outer_hdr_len = ((uint8_t *)ipha) - mp->b_rptr; 2823 check_proto = IPPROTO_ICMP; 2824 sel->ips_isv4 = B_TRUE; 2825 sel->ips_local_addr_v4 = ipha->ipha_dst; 2826 sel->ips_remote_addr_v4 = ipha->ipha_src; 2827 nexthdr = ipha->ipha_protocol; 2828 hdr_len = IPH_HDR_LENGTH(ipha); 2829 2830 if (port_policy_present && 2831 IS_V4_FRAGMENT(ipha->ipha_fragment_offset_and_flags) && 2832 !is_icmp) { 2833 /* IPv4 Fragment */ 2834 ipsec_freemsg_chain(spare_mp); 2835 return (SELRET_TUNFRAG); 2836 } 2837 2838 } 2839 sel->ips_protocol = nexthdr; 2840 2841 if ((nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP && 2842 nexthdr != IPPROTO_SCTP && nexthdr != check_proto) || 2843 (!port_policy_present && tunnel_mode)) { 2844 sel->ips_remote_port = sel->ips_local_port = 0; 2845 ipsec_freemsg_chain(spare_mp); 2846 return (SELRET_SUCCESS); 2847 } 2848 2849 if (&mp->b_rptr[hdr_len] + 4 > mp->b_wptr) { 2850 /* If we didn't pullup a copy already, do so now. */ 2851 /* 2852 * XXX performance, will upper-layers frequently split TCP/UDP 2853 * apart from IP or options? If so, perhaps we should revisit 2854 * the spare_mp strategy. 2855 */ 2856 ipsec_hdr_pullup_needed++; 2857 if (spare_mp == NULL && 2858 (spare_mp = msgpullup(mp, -1)) == NULL) { 2859 return (SELRET_NOMEM); 2860 } 2861 ports = (uint16_t *)&spare_mp->b_rptr[hdr_len + outer_hdr_len]; 2862 } else { 2863 ports = (uint16_t *)&mp->b_rptr[hdr_len + outer_hdr_len]; 2864 } 2865 2866 if (nexthdr == check_proto) { 2867 typecode = (uint8_t *)ports; 2868 sel->ips_icmp_type = *typecode++; 2869 sel->ips_icmp_code = *typecode; 2870 sel->ips_remote_port = sel->ips_local_port = 0; 2871 } else { 2872 sel->ips_remote_port = *ports++; 2873 sel->ips_local_port = *ports; 2874 } 2875 ipsec_freemsg_chain(spare_mp); 2876 return (SELRET_SUCCESS); 2877 } 2878 2879 static boolean_t 2880 ipsec_init_outbound_ports(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha, 2881 ip6_t *ip6h, int outer_hdr_len, ipsec_stack_t *ipss) 2882 { 2883 /* 2884 * XXX cut&paste shared with ipsec_init_inbound_sel 2885 */ 2886 uint16_t *ports; 2887 ushort_t hdr_len; 2888 mblk_t *spare_mp = NULL; 2889 uint8_t *nexthdrp; 2890 uint8_t nexthdr; 2891 uint8_t *typecode; 2892 uint8_t check_proto; 2893 2894 ASSERT((ipha == NULL && ip6h != NULL) || 2895 (ipha != NULL && ip6h == NULL)); 2896 2897 if (ip6h != NULL) { 2898 check_proto = IPPROTO_ICMPV6; 2899 nexthdr = ip6h->ip6_nxt; 2900 switch (nexthdr) { 2901 case IPPROTO_HOPOPTS: 2902 case IPPROTO_ROUTING: 2903 case IPPROTO_DSTOPTS: 2904 case IPPROTO_FRAGMENT: 2905 /* 2906 * Use ip_hdr_length_nexthdr_v6(). And have a spare 2907 * mblk that's contiguous to feed it 2908 */ 2909 spare_mp = msgpullup(mp, -1); 2910 if (spare_mp == NULL || 2911 !ip_hdr_length_nexthdr_v6(spare_mp, 2912 (ip6_t *)(spare_mp->b_rptr + outer_hdr_len), 2913 &hdr_len, &nexthdrp)) { 2914 /* Always works, even if NULL. */ 2915 ipsec_freemsg_chain(spare_mp); 2916 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 2917 DROPPER(ipss, ipds_spd_nomem), 2918 &ipss->ipsec_spd_dropper); 2919 return (B_FALSE); 2920 } else { 2921 nexthdr = *nexthdrp; 2922 /* We can just extract based on hdr_len now. */ 2923 } 2924 break; 2925 default: 2926 hdr_len = IPV6_HDR_LEN; 2927 break; 2928 } 2929 } else { 2930 check_proto = IPPROTO_ICMP; 2931 hdr_len = IPH_HDR_LENGTH(ipha); 2932 nexthdr = ipha->ipha_protocol; 2933 } 2934 2935 sel->ips_protocol = nexthdr; 2936 if (nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP && 2937 nexthdr != IPPROTO_SCTP && nexthdr != check_proto) { 2938 sel->ips_local_port = sel->ips_remote_port = 0; 2939 ipsec_freemsg_chain(spare_mp); /* Always works, even if NULL */ 2940 return (B_TRUE); 2941 } 2942 2943 if (&mp->b_rptr[hdr_len] + 4 + outer_hdr_len > mp->b_wptr) { 2944 /* If we didn't pullup a copy already, do so now. */ 2945 /* 2946 * XXX performance, will upper-layers frequently split TCP/UDP 2947 * apart from IP or options? If so, perhaps we should revisit 2948 * the spare_mp strategy. 2949 * 2950 * XXX should this be msgpullup(mp, hdr_len+4) ??? 2951 */ 2952 if (spare_mp == NULL && 2953 (spare_mp = msgpullup(mp, -1)) == NULL) { 2954 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 2955 DROPPER(ipss, ipds_spd_nomem), 2956 &ipss->ipsec_spd_dropper); 2957 return (B_FALSE); 2958 } 2959 ports = (uint16_t *)&spare_mp->b_rptr[hdr_len + outer_hdr_len]; 2960 } else { 2961 ports = (uint16_t *)&mp->b_rptr[hdr_len + outer_hdr_len]; 2962 } 2963 2964 if (nexthdr == check_proto) { 2965 typecode = (uint8_t *)ports; 2966 sel->ips_icmp_type = *typecode++; 2967 sel->ips_icmp_code = *typecode; 2968 sel->ips_remote_port = sel->ips_local_port = 0; 2969 } else { 2970 sel->ips_local_port = *ports++; 2971 sel->ips_remote_port = *ports; 2972 } 2973 ipsec_freemsg_chain(spare_mp); /* Always works, even if NULL */ 2974 return (B_TRUE); 2975 } 2976 2977 /* 2978 * Create an ipsec_action_t based on the way an inbound packet was protected. 2979 * Used to reflect traffic back to a sender. 2980 * 2981 * We don't bother interning the action into the hash table. 2982 */ 2983 ipsec_action_t * 2984 ipsec_in_to_out_action(ipsec_in_t *ii) 2985 { 2986 ipsa_t *ah_assoc, *esp_assoc; 2987 uint_t auth_alg = 0, encr_alg = 0, espa_alg = 0; 2988 ipsec_action_t *ap; 2989 boolean_t unique; 2990 2991 ap = kmem_cache_alloc(ipsec_action_cache, KM_NOSLEEP); 2992 2993 if (ap == NULL) 2994 return (NULL); 2995 2996 bzero(ap, sizeof (*ap)); 2997 HASH_NULL(ap, ipa_hash); 2998 ap->ipa_next = NULL; 2999 ap->ipa_refs = 1; 3000 3001 /* 3002 * Get the algorithms that were used for this packet. 3003 */ 3004 ap->ipa_act.ipa_type = IPSEC_ACT_APPLY; 3005 ap->ipa_act.ipa_log = 0; 3006 ah_assoc = ii->ipsec_in_ah_sa; 3007 ap->ipa_act.ipa_apply.ipp_use_ah = (ah_assoc != NULL); 3008 3009 esp_assoc = ii->ipsec_in_esp_sa; 3010 ap->ipa_act.ipa_apply.ipp_use_esp = (esp_assoc != NULL); 3011 3012 if (esp_assoc != NULL) { 3013 encr_alg = esp_assoc->ipsa_encr_alg; 3014 espa_alg = esp_assoc->ipsa_auth_alg; 3015 ap->ipa_act.ipa_apply.ipp_use_espa = (espa_alg != 0); 3016 } 3017 if (ah_assoc != NULL) 3018 auth_alg = ah_assoc->ipsa_auth_alg; 3019 3020 ap->ipa_act.ipa_apply.ipp_encr_alg = (uint8_t)encr_alg; 3021 ap->ipa_act.ipa_apply.ipp_auth_alg = (uint8_t)auth_alg; 3022 ap->ipa_act.ipa_apply.ipp_esp_auth_alg = (uint8_t)espa_alg; 3023 ap->ipa_act.ipa_apply.ipp_use_se = ii->ipsec_in_decaps; 3024 unique = B_FALSE; 3025 3026 if (esp_assoc != NULL) { 3027 ap->ipa_act.ipa_apply.ipp_espa_minbits = 3028 esp_assoc->ipsa_authkeybits; 3029 ap->ipa_act.ipa_apply.ipp_espa_maxbits = 3030 esp_assoc->ipsa_authkeybits; 3031 ap->ipa_act.ipa_apply.ipp_espe_minbits = 3032 esp_assoc->ipsa_encrkeybits; 3033 ap->ipa_act.ipa_apply.ipp_espe_maxbits = 3034 esp_assoc->ipsa_encrkeybits; 3035 ap->ipa_act.ipa_apply.ipp_km_proto = esp_assoc->ipsa_kmp; 3036 ap->ipa_act.ipa_apply.ipp_km_cookie = esp_assoc->ipsa_kmc; 3037 if (esp_assoc->ipsa_flags & IPSA_F_UNIQUE) 3038 unique = B_TRUE; 3039 } 3040 if (ah_assoc != NULL) { 3041 ap->ipa_act.ipa_apply.ipp_ah_minbits = 3042 ah_assoc->ipsa_authkeybits; 3043 ap->ipa_act.ipa_apply.ipp_ah_maxbits = 3044 ah_assoc->ipsa_authkeybits; 3045 ap->ipa_act.ipa_apply.ipp_km_proto = ah_assoc->ipsa_kmp; 3046 ap->ipa_act.ipa_apply.ipp_km_cookie = ah_assoc->ipsa_kmc; 3047 if (ah_assoc->ipsa_flags & IPSA_F_UNIQUE) 3048 unique = B_TRUE; 3049 } 3050 ap->ipa_act.ipa_apply.ipp_use_unique = unique; 3051 ap->ipa_want_unique = unique; 3052 ap->ipa_allow_clear = B_FALSE; 3053 ap->ipa_want_se = ii->ipsec_in_decaps; 3054 ap->ipa_want_ah = (ah_assoc != NULL); 3055 ap->ipa_want_esp = (esp_assoc != NULL); 3056 3057 ap->ipa_ovhd = ipsec_act_ovhd(&ap->ipa_act); 3058 3059 ap->ipa_act.ipa_apply.ipp_replay_depth = 0; /* don't care */ 3060 3061 return (ap); 3062 } 3063 3064 3065 /* 3066 * Compute the worst-case amount of extra space required by an action. 3067 * Note that, because of the ESP considerations listed below, this is 3068 * actually not the same as the best-case reduction in the MTU; in the 3069 * future, we should pass additional information to this function to 3070 * allow the actual MTU impact to be computed. 3071 * 3072 * AH: Revisit this if we implement algorithms with 3073 * a verifier size of more than 12 bytes. 3074 * 3075 * ESP: A more exact but more messy computation would take into 3076 * account the interaction between the cipher block size and the 3077 * effective MTU, yielding the inner payload size which reflects a 3078 * packet with *minimum* ESP padding.. 3079 */ 3080 int32_t 3081 ipsec_act_ovhd(const ipsec_act_t *act) 3082 { 3083 int32_t overhead = 0; 3084 3085 if (act->ipa_type == IPSEC_ACT_APPLY) { 3086 const ipsec_prot_t *ipp = &act->ipa_apply; 3087 3088 if (ipp->ipp_use_ah) 3089 overhead += IPSEC_MAX_AH_HDR_SIZE; 3090 if (ipp->ipp_use_esp) { 3091 overhead += IPSEC_MAX_ESP_HDR_SIZE; 3092 overhead += sizeof (struct udphdr); 3093 } 3094 if (ipp->ipp_use_se) 3095 overhead += IP_SIMPLE_HDR_LENGTH; 3096 } 3097 return (overhead); 3098 } 3099 3100 /* 3101 * This hash function is used only when creating policies and thus is not 3102 * performance-critical for packet flows. 3103 * 3104 * Future work: canonicalize the structures hashed with this (i.e., 3105 * zeroize padding) so the hash works correctly. 3106 */ 3107 /* ARGSUSED */ 3108 static uint32_t 3109 policy_hash(int size, const void *start, const void *end) 3110 { 3111 return (0); 3112 } 3113 3114 3115 /* 3116 * Hash function macros for each address type. 3117 * 3118 * The IPV6 hash function assumes that the low order 32-bits of the 3119 * address (typically containing the low order 24 bits of the mac 3120 * address) are reasonably well-distributed. Revisit this if we run 3121 * into trouble from lots of collisions on ::1 addresses and the like 3122 * (seems unlikely). 3123 */ 3124 #define IPSEC_IPV4_HASH(a, n) ((a) % (n)) 3125 #define IPSEC_IPV6_HASH(a, n) (((a).s6_addr32[3]) % (n)) 3126 3127 /* 3128 * These two hash functions should produce coordinated values 3129 * but have slightly different roles. 3130 */ 3131 static uint32_t 3132 selkey_hash(const ipsec_selkey_t *selkey, netstack_t *ns) 3133 { 3134 uint32_t valid = selkey->ipsl_valid; 3135 ipsec_stack_t *ipss = ns->netstack_ipsec; 3136 3137 if (!(valid & IPSL_REMOTE_ADDR)) 3138 return (IPSEC_SEL_NOHASH); 3139 3140 if (valid & IPSL_IPV4) { 3141 if (selkey->ipsl_remote_pfxlen == 32) { 3142 return (IPSEC_IPV4_HASH(selkey->ipsl_remote.ipsad_v4, 3143 ipss->ipsec_spd_hashsize)); 3144 } 3145 } 3146 if (valid & IPSL_IPV6) { 3147 if (selkey->ipsl_remote_pfxlen == 128) { 3148 return (IPSEC_IPV6_HASH(selkey->ipsl_remote.ipsad_v6, 3149 ipss->ipsec_spd_hashsize)); 3150 } 3151 } 3152 return (IPSEC_SEL_NOHASH); 3153 } 3154 3155 static uint32_t 3156 selector_hash(ipsec_selector_t *sel, ipsec_policy_root_t *root) 3157 { 3158 if (sel->ips_isv4) { 3159 return (IPSEC_IPV4_HASH(sel->ips_remote_addr_v4, 3160 root->ipr_nchains)); 3161 } 3162 return (IPSEC_IPV6_HASH(sel->ips_remote_addr_v6, root->ipr_nchains)); 3163 } 3164 3165 /* 3166 * Intern actions into the action hash table. 3167 */ 3168 ipsec_action_t * 3169 ipsec_act_find(const ipsec_act_t *a, int n, netstack_t *ns) 3170 { 3171 int i; 3172 uint32_t hval; 3173 ipsec_action_t *ap; 3174 ipsec_action_t *prev = NULL; 3175 int32_t overhead, maxovhd = 0; 3176 boolean_t allow_clear = B_FALSE; 3177 boolean_t want_ah = B_FALSE; 3178 boolean_t want_esp = B_FALSE; 3179 boolean_t want_se = B_FALSE; 3180 boolean_t want_unique = B_FALSE; 3181 ipsec_stack_t *ipss = ns->netstack_ipsec; 3182 3183 /* 3184 * TODO: should canonicalize a[] (i.e., zeroize any padding) 3185 * so we can use a non-trivial policy_hash function. 3186 */ 3187 for (i = n-1; i >= 0; i--) { 3188 hval = policy_hash(IPSEC_ACTION_HASH_SIZE, &a[i], &a[n]); 3189 3190 HASH_LOCK(ipss->ipsec_action_hash, hval); 3191 3192 for (HASH_ITERATE(ap, ipa_hash, 3193 ipss->ipsec_action_hash, hval)) { 3194 if (bcmp(&ap->ipa_act, &a[i], sizeof (*a)) != 0) 3195 continue; 3196 if (ap->ipa_next != prev) 3197 continue; 3198 break; 3199 } 3200 if (ap != NULL) { 3201 HASH_UNLOCK(ipss->ipsec_action_hash, hval); 3202 prev = ap; 3203 continue; 3204 } 3205 /* 3206 * need to allocate a new one.. 3207 */ 3208 ap = kmem_cache_alloc(ipsec_action_cache, KM_NOSLEEP); 3209 if (ap == NULL) { 3210 HASH_UNLOCK(ipss->ipsec_action_hash, hval); 3211 if (prev != NULL) 3212 ipsec_action_free(prev); 3213 return (NULL); 3214 } 3215 HASH_INSERT(ap, ipa_hash, ipss->ipsec_action_hash, hval); 3216 3217 ap->ipa_next = prev; 3218 ap->ipa_act = a[i]; 3219 3220 overhead = ipsec_act_ovhd(&a[i]); 3221 if (maxovhd < overhead) 3222 maxovhd = overhead; 3223 3224 if ((a[i].ipa_type == IPSEC_ACT_BYPASS) || 3225 (a[i].ipa_type == IPSEC_ACT_CLEAR)) 3226 allow_clear = B_TRUE; 3227 if (a[i].ipa_type == IPSEC_ACT_APPLY) { 3228 const ipsec_prot_t *ipp = &a[i].ipa_apply; 3229 3230 ASSERT(ipp->ipp_use_ah || ipp->ipp_use_esp); 3231 want_ah |= ipp->ipp_use_ah; 3232 want_esp |= ipp->ipp_use_esp; 3233 want_se |= ipp->ipp_use_se; 3234 want_unique |= ipp->ipp_use_unique; 3235 } 3236 ap->ipa_allow_clear = allow_clear; 3237 ap->ipa_want_ah = want_ah; 3238 ap->ipa_want_esp = want_esp; 3239 ap->ipa_want_se = want_se; 3240 ap->ipa_want_unique = want_unique; 3241 ap->ipa_refs = 1; /* from the hash table */ 3242 ap->ipa_ovhd = maxovhd; 3243 if (prev) 3244 prev->ipa_refs++; 3245 prev = ap; 3246 HASH_UNLOCK(ipss->ipsec_action_hash, hval); 3247 } 3248 3249 ap->ipa_refs++; /* caller's reference */ 3250 3251 return (ap); 3252 } 3253 3254 /* 3255 * Called when refcount goes to 0, indicating that all references to this 3256 * node are gone. 3257 * 3258 * This does not unchain the action from the hash table. 3259 */ 3260 void 3261 ipsec_action_free(ipsec_action_t *ap) 3262 { 3263 for (;;) { 3264 ipsec_action_t *np = ap->ipa_next; 3265 ASSERT(ap->ipa_refs == 0); 3266 ASSERT(ap->ipa_hash.hash_pp == NULL); 3267 kmem_cache_free(ipsec_action_cache, ap); 3268 ap = np; 3269 /* Inlined IPACT_REFRELE -- avoid recursion */ 3270 if (ap == NULL) 3271 break; 3272 membar_exit(); 3273 if (atomic_add_32_nv(&(ap)->ipa_refs, -1) != 0) 3274 break; 3275 /* End inlined IPACT_REFRELE */ 3276 } 3277 } 3278 3279 /* 3280 * Called when the action hash table goes away. 3281 * 3282 * The actions can be queued on an mblk with ipsec_in or 3283 * ipsec_out, hence the actions might still be around. 3284 * But we decrement ipa_refs here since we no longer have 3285 * a reference to the action from the hash table. 3286 */ 3287 static void 3288 ipsec_action_free_table(ipsec_action_t *ap) 3289 { 3290 while (ap != NULL) { 3291 ipsec_action_t *np = ap->ipa_next; 3292 3293 /* FIXME: remove? */ 3294 (void) printf("ipsec_action_free_table(%p) ref %d\n", 3295 (void *)ap, ap->ipa_refs); 3296 ASSERT(ap->ipa_refs > 0); 3297 IPACT_REFRELE(ap); 3298 ap = np; 3299 } 3300 } 3301 3302 /* 3303 * Need to walk all stack instances since the reclaim function 3304 * is global for all instances 3305 */ 3306 /* ARGSUSED */ 3307 static void 3308 ipsec_action_reclaim(void *arg) 3309 { 3310 netstack_handle_t nh; 3311 netstack_t *ns; 3312 3313 netstack_next_init(&nh); 3314 while ((ns = netstack_next(&nh)) != NULL) { 3315 ipsec_action_reclaim_stack(ns); 3316 netstack_rele(ns); 3317 } 3318 netstack_next_fini(&nh); 3319 } 3320 3321 /* 3322 * Periodically sweep action hash table for actions with refcount==1, and 3323 * nuke them. We cannot do this "on demand" (i.e., from IPACT_REFRELE) 3324 * because we can't close the race between another thread finding the action 3325 * in the hash table without holding the bucket lock during IPACT_REFRELE. 3326 * Instead, we run this function sporadically to clean up after ourselves; 3327 * we also set it as the "reclaim" function for the action kmem_cache. 3328 * 3329 * Note that it may take several passes of ipsec_action_gc() to free all 3330 * "stale" actions. 3331 */ 3332 static void 3333 ipsec_action_reclaim_stack(netstack_t *ns) 3334 { 3335 int i; 3336 ipsec_stack_t *ipss = ns->netstack_ipsec; 3337 3338 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) { 3339 ipsec_action_t *ap, *np; 3340 3341 /* skip the lock if nobody home */ 3342 if (ipss->ipsec_action_hash[i].hash_head == NULL) 3343 continue; 3344 3345 HASH_LOCK(ipss->ipsec_action_hash, i); 3346 for (ap = ipss->ipsec_action_hash[i].hash_head; 3347 ap != NULL; ap = np) { 3348 ASSERT(ap->ipa_refs > 0); 3349 np = ap->ipa_hash.hash_next; 3350 if (ap->ipa_refs > 1) 3351 continue; 3352 HASH_UNCHAIN(ap, ipa_hash, 3353 ipss->ipsec_action_hash, i); 3354 IPACT_REFRELE(ap); 3355 } 3356 HASH_UNLOCK(ipss->ipsec_action_hash, i); 3357 } 3358 } 3359 3360 /* 3361 * Intern a selector set into the selector set hash table. 3362 * This is simpler than the actions case.. 3363 */ 3364 static ipsec_sel_t * 3365 ipsec_find_sel(ipsec_selkey_t *selkey, netstack_t *ns) 3366 { 3367 ipsec_sel_t *sp; 3368 uint32_t hval, bucket; 3369 ipsec_stack_t *ipss = ns->netstack_ipsec; 3370 3371 /* 3372 * Exactly one AF bit should be set in selkey. 3373 */ 3374 ASSERT(!(selkey->ipsl_valid & IPSL_IPV4) ^ 3375 !(selkey->ipsl_valid & IPSL_IPV6)); 3376 3377 hval = selkey_hash(selkey, ns); 3378 /* Set pol_hval to uninitialized until we put it in a polhead. */ 3379 selkey->ipsl_sel_hval = hval; 3380 3381 bucket = (hval == IPSEC_SEL_NOHASH) ? 0 : hval; 3382 3383 ASSERT(!HASH_LOCKED(ipss->ipsec_sel_hash, bucket)); 3384 HASH_LOCK(ipss->ipsec_sel_hash, bucket); 3385 3386 for (HASH_ITERATE(sp, ipsl_hash, ipss->ipsec_sel_hash, bucket)) { 3387 if (bcmp(&sp->ipsl_key, selkey, 3388 offsetof(ipsec_selkey_t, ipsl_pol_hval)) == 0) 3389 break; 3390 } 3391 if (sp != NULL) { 3392 sp->ipsl_refs++; 3393 3394 HASH_UNLOCK(ipss->ipsec_sel_hash, bucket); 3395 return (sp); 3396 } 3397 3398 sp = kmem_cache_alloc(ipsec_sel_cache, KM_NOSLEEP); 3399 if (sp == NULL) { 3400 HASH_UNLOCK(ipss->ipsec_sel_hash, bucket); 3401 return (NULL); 3402 } 3403 3404 HASH_INSERT(sp, ipsl_hash, ipss->ipsec_sel_hash, bucket); 3405 sp->ipsl_refs = 2; /* one for hash table, one for caller */ 3406 sp->ipsl_key = *selkey; 3407 /* Set to uninitalized and have insertion into polhead fix things. */ 3408 if (selkey->ipsl_sel_hval != IPSEC_SEL_NOHASH) 3409 sp->ipsl_key.ipsl_pol_hval = 0; 3410 else 3411 sp->ipsl_key.ipsl_pol_hval = IPSEC_SEL_NOHASH; 3412 3413 HASH_UNLOCK(ipss->ipsec_sel_hash, bucket); 3414 3415 return (sp); 3416 } 3417 3418 static void 3419 ipsec_sel_rel(ipsec_sel_t **spp, netstack_t *ns) 3420 { 3421 ipsec_sel_t *sp = *spp; 3422 int hval = sp->ipsl_key.ipsl_sel_hval; 3423 ipsec_stack_t *ipss = ns->netstack_ipsec; 3424 3425 *spp = NULL; 3426 3427 if (hval == IPSEC_SEL_NOHASH) 3428 hval = 0; 3429 3430 ASSERT(!HASH_LOCKED(ipss->ipsec_sel_hash, hval)); 3431 HASH_LOCK(ipss->ipsec_sel_hash, hval); 3432 if (--sp->ipsl_refs == 1) { 3433 HASH_UNCHAIN(sp, ipsl_hash, ipss->ipsec_sel_hash, hval); 3434 sp->ipsl_refs--; 3435 HASH_UNLOCK(ipss->ipsec_sel_hash, hval); 3436 ASSERT(sp->ipsl_refs == 0); 3437 kmem_cache_free(ipsec_sel_cache, sp); 3438 /* Caller unlocks */ 3439 return; 3440 } 3441 3442 HASH_UNLOCK(ipss->ipsec_sel_hash, hval); 3443 } 3444 3445 /* 3446 * Free a policy rule which we know is no longer being referenced. 3447 */ 3448 void 3449 ipsec_policy_free(ipsec_policy_t *ipp, netstack_t *ns) 3450 { 3451 ASSERT(ipp->ipsp_refs == 0); 3452 ASSERT(ipp->ipsp_sel != NULL); 3453 ASSERT(ipp->ipsp_act != NULL); 3454 3455 ipsec_sel_rel(&ipp->ipsp_sel, ns); 3456 IPACT_REFRELE(ipp->ipsp_act); 3457 kmem_cache_free(ipsec_pol_cache, ipp); 3458 } 3459 3460 /* 3461 * Construction of new policy rules; construct a policy, and add it to 3462 * the appropriate tables. 3463 */ 3464 ipsec_policy_t * 3465 ipsec_policy_create(ipsec_selkey_t *keys, const ipsec_act_t *a, 3466 int nacts, int prio, uint64_t *index_ptr, netstack_t *ns) 3467 { 3468 ipsec_action_t *ap; 3469 ipsec_sel_t *sp; 3470 ipsec_policy_t *ipp; 3471 ipsec_stack_t *ipss = ns->netstack_ipsec; 3472 3473 if (index_ptr == NULL) 3474 index_ptr = &ipss->ipsec_next_policy_index; 3475 3476 ipp = kmem_cache_alloc(ipsec_pol_cache, KM_NOSLEEP); 3477 ap = ipsec_act_find(a, nacts, ns); 3478 sp = ipsec_find_sel(keys, ns); 3479 3480 if ((ap == NULL) || (sp == NULL) || (ipp == NULL)) { 3481 if (ap != NULL) { 3482 IPACT_REFRELE(ap); 3483 } 3484 if (sp != NULL) 3485 ipsec_sel_rel(&sp, ns); 3486 if (ipp != NULL) 3487 kmem_cache_free(ipsec_pol_cache, ipp); 3488 return (NULL); 3489 } 3490 3491 HASH_NULL(ipp, ipsp_hash); 3492 3493 ipp->ipsp_refs = 1; /* caller's reference */ 3494 ipp->ipsp_sel = sp; 3495 ipp->ipsp_act = ap; 3496 ipp->ipsp_prio = prio; /* rule priority */ 3497 ipp->ipsp_index = *index_ptr; 3498 (*index_ptr)++; 3499 3500 return (ipp); 3501 } 3502 3503 static void 3504 ipsec_update_present_flags(ipsec_stack_t *ipss) 3505 { 3506 boolean_t hashpol; 3507 3508 hashpol = (avl_numnodes(&ipss->ipsec_system_policy.iph_rulebyid) > 0); 3509 3510 if (hashpol) { 3511 ipss->ipsec_outbound_v4_policy_present = B_TRUE; 3512 ipss->ipsec_outbound_v6_policy_present = B_TRUE; 3513 ipss->ipsec_inbound_v4_policy_present = B_TRUE; 3514 ipss->ipsec_inbound_v6_policy_present = B_TRUE; 3515 return; 3516 } 3517 3518 ipss->ipsec_outbound_v4_policy_present = (NULL != 3519 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_OUTBOUND]. 3520 ipr_nonhash[IPSEC_AF_V4]); 3521 ipss->ipsec_outbound_v6_policy_present = (NULL != 3522 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_OUTBOUND]. 3523 ipr_nonhash[IPSEC_AF_V6]); 3524 ipss->ipsec_inbound_v4_policy_present = (NULL != 3525 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_INBOUND]. 3526 ipr_nonhash[IPSEC_AF_V4]); 3527 ipss->ipsec_inbound_v6_policy_present = (NULL != 3528 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_INBOUND]. 3529 ipr_nonhash[IPSEC_AF_V6]); 3530 } 3531 3532 boolean_t 3533 ipsec_policy_delete(ipsec_policy_head_t *php, ipsec_selkey_t *keys, int dir, 3534 netstack_t *ns) 3535 { 3536 ipsec_sel_t *sp; 3537 ipsec_policy_t *ip, *nip, *head; 3538 int af; 3539 ipsec_policy_root_t *pr = &php->iph_root[dir]; 3540 3541 sp = ipsec_find_sel(keys, ns); 3542 3543 if (sp == NULL) 3544 return (B_FALSE); 3545 3546 af = (sp->ipsl_key.ipsl_valid & IPSL_IPV4) ? IPSEC_AF_V4 : IPSEC_AF_V6; 3547 3548 rw_enter(&php->iph_lock, RW_WRITER); 3549 3550 if (sp->ipsl_key.ipsl_pol_hval == IPSEC_SEL_NOHASH) { 3551 head = pr->ipr_nonhash[af]; 3552 } else { 3553 head = pr->ipr_hash[sp->ipsl_key.ipsl_pol_hval].hash_head; 3554 } 3555 3556 for (ip = head; ip != NULL; ip = nip) { 3557 nip = ip->ipsp_hash.hash_next; 3558 if (ip->ipsp_sel != sp) { 3559 continue; 3560 } 3561 3562 IPPOL_UNCHAIN(php, ip, ns); 3563 3564 php->iph_gen++; 3565 ipsec_update_present_flags(ns->netstack_ipsec); 3566 3567 rw_exit(&php->iph_lock); 3568 3569 ipsec_sel_rel(&sp, ns); 3570 3571 return (B_TRUE); 3572 } 3573 3574 rw_exit(&php->iph_lock); 3575 ipsec_sel_rel(&sp, ns); 3576 return (B_FALSE); 3577 } 3578 3579 int 3580 ipsec_policy_delete_index(ipsec_policy_head_t *php, uint64_t policy_index, 3581 netstack_t *ns) 3582 { 3583 boolean_t found = B_FALSE; 3584 ipsec_policy_t ipkey; 3585 ipsec_policy_t *ip; 3586 avl_index_t where; 3587 3588 (void) memset(&ipkey, 0, sizeof (ipkey)); 3589 ipkey.ipsp_index = policy_index; 3590 3591 rw_enter(&php->iph_lock, RW_WRITER); 3592 3593 /* 3594 * We could be cleverer here about the walk. 3595 * but well, (k+1)*log(N) will do for now (k==number of matches, 3596 * N==number of table entries 3597 */ 3598 for (;;) { 3599 ip = (ipsec_policy_t *)avl_find(&php->iph_rulebyid, 3600 (void *)&ipkey, &where); 3601 ASSERT(ip == NULL); 3602 3603 ip = avl_nearest(&php->iph_rulebyid, where, AVL_AFTER); 3604 3605 if (ip == NULL) 3606 break; 3607 3608 if (ip->ipsp_index != policy_index) { 3609 ASSERT(ip->ipsp_index > policy_index); 3610 break; 3611 } 3612 3613 IPPOL_UNCHAIN(php, ip, ns); 3614 found = B_TRUE; 3615 } 3616 3617 if (found) { 3618 php->iph_gen++; 3619 ipsec_update_present_flags(ns->netstack_ipsec); 3620 } 3621 3622 rw_exit(&php->iph_lock); 3623 3624 return (found ? 0 : ENOENT); 3625 } 3626 3627 /* 3628 * Given a constructed ipsec_policy_t policy rule, see if it can be entered 3629 * into the correct policy ruleset. As a side-effect, it sets the hash 3630 * entries on "ipp"'s ipsp_pol_hval. 3631 * 3632 * Returns B_TRUE if it can be entered, B_FALSE if it can't be (because a 3633 * duplicate policy exists with exactly the same selectors), or an icmp 3634 * rule exists with a different encryption/authentication action. 3635 */ 3636 boolean_t 3637 ipsec_check_policy(ipsec_policy_head_t *php, ipsec_policy_t *ipp, int direction) 3638 { 3639 ipsec_policy_root_t *pr = &php->iph_root[direction]; 3640 int af = -1; 3641 ipsec_policy_t *p2, *head; 3642 uint8_t check_proto; 3643 ipsec_selkey_t *selkey = &ipp->ipsp_sel->ipsl_key; 3644 uint32_t valid = selkey->ipsl_valid; 3645 3646 if (valid & IPSL_IPV6) { 3647 ASSERT(!(valid & IPSL_IPV4)); 3648 af = IPSEC_AF_V6; 3649 check_proto = IPPROTO_ICMPV6; 3650 } else { 3651 ASSERT(valid & IPSL_IPV4); 3652 af = IPSEC_AF_V4; 3653 check_proto = IPPROTO_ICMP; 3654 } 3655 3656 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3657 3658 /* 3659 * Double-check that we don't have any duplicate selectors here. 3660 * Because selectors are interned below, we need only compare pointers 3661 * for equality. 3662 */ 3663 if (selkey->ipsl_sel_hval == IPSEC_SEL_NOHASH) { 3664 head = pr->ipr_nonhash[af]; 3665 } else { 3666 selkey->ipsl_pol_hval = 3667 (selkey->ipsl_valid & IPSL_IPV4) ? 3668 IPSEC_IPV4_HASH(selkey->ipsl_remote.ipsad_v4, 3669 pr->ipr_nchains) : 3670 IPSEC_IPV6_HASH(selkey->ipsl_remote.ipsad_v6, 3671 pr->ipr_nchains); 3672 3673 head = pr->ipr_hash[selkey->ipsl_pol_hval].hash_head; 3674 } 3675 3676 for (p2 = head; p2 != NULL; p2 = p2->ipsp_hash.hash_next) { 3677 if (p2->ipsp_sel == ipp->ipsp_sel) 3678 return (B_FALSE); 3679 } 3680 3681 /* 3682 * If it's ICMP and not a drop or pass rule, run through the ICMP 3683 * rules and make sure the action is either new or the same as any 3684 * other actions. We don't have to check the full chain because 3685 * discard and bypass will override all other actions 3686 */ 3687 3688 if (valid & IPSL_PROTOCOL && 3689 selkey->ipsl_proto == check_proto && 3690 (ipp->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_APPLY)) { 3691 3692 for (p2 = head; p2 != NULL; p2 = p2->ipsp_hash.hash_next) { 3693 3694 if (p2->ipsp_sel->ipsl_key.ipsl_valid & IPSL_PROTOCOL && 3695 p2->ipsp_sel->ipsl_key.ipsl_proto == check_proto && 3696 (p2->ipsp_act->ipa_act.ipa_type == 3697 IPSEC_ACT_APPLY)) { 3698 return (ipsec_compare_action(p2, ipp)); 3699 } 3700 } 3701 } 3702 3703 return (B_TRUE); 3704 } 3705 3706 /* 3707 * compare the action chains of two policies for equality 3708 * B_TRUE -> effective equality 3709 */ 3710 3711 static boolean_t 3712 ipsec_compare_action(ipsec_policy_t *p1, ipsec_policy_t *p2) 3713 { 3714 3715 ipsec_action_t *act1, *act2; 3716 3717 /* We have a valid rule. Let's compare the actions */ 3718 if (p1->ipsp_act == p2->ipsp_act) { 3719 /* same action. We are good */ 3720 return (B_TRUE); 3721 } 3722 3723 /* we have to walk the chain */ 3724 3725 act1 = p1->ipsp_act; 3726 act2 = p2->ipsp_act; 3727 3728 while (act1 != NULL && act2 != NULL) { 3729 3730 /* otherwise, Are we close enough? */ 3731 if (act1->ipa_allow_clear != act2->ipa_allow_clear || 3732 act1->ipa_want_ah != act2->ipa_want_ah || 3733 act1->ipa_want_esp != act2->ipa_want_esp || 3734 act1->ipa_want_se != act2->ipa_want_se) { 3735 /* Nope, we aren't */ 3736 return (B_FALSE); 3737 } 3738 3739 if (act1->ipa_want_ah) { 3740 if (act1->ipa_act.ipa_apply.ipp_auth_alg != 3741 act2->ipa_act.ipa_apply.ipp_auth_alg) { 3742 return (B_FALSE); 3743 } 3744 3745 if (act1->ipa_act.ipa_apply.ipp_ah_minbits != 3746 act2->ipa_act.ipa_apply.ipp_ah_minbits || 3747 act1->ipa_act.ipa_apply.ipp_ah_maxbits != 3748 act2->ipa_act.ipa_apply.ipp_ah_maxbits) { 3749 return (B_FALSE); 3750 } 3751 } 3752 3753 if (act1->ipa_want_esp) { 3754 if (act1->ipa_act.ipa_apply.ipp_use_esp != 3755 act2->ipa_act.ipa_apply.ipp_use_esp || 3756 act1->ipa_act.ipa_apply.ipp_use_espa != 3757 act2->ipa_act.ipa_apply.ipp_use_espa) { 3758 return (B_FALSE); 3759 } 3760 3761 if (act1->ipa_act.ipa_apply.ipp_use_esp) { 3762 if (act1->ipa_act.ipa_apply.ipp_encr_alg != 3763 act2->ipa_act.ipa_apply.ipp_encr_alg) { 3764 return (B_FALSE); 3765 } 3766 3767 if (act1->ipa_act.ipa_apply.ipp_espe_minbits != 3768 act2->ipa_act.ipa_apply.ipp_espe_minbits || 3769 act1->ipa_act.ipa_apply.ipp_espe_maxbits != 3770 act2->ipa_act.ipa_apply.ipp_espe_maxbits) { 3771 return (B_FALSE); 3772 } 3773 } 3774 3775 if (act1->ipa_act.ipa_apply.ipp_use_espa) { 3776 if (act1->ipa_act.ipa_apply.ipp_esp_auth_alg != 3777 act2->ipa_act.ipa_apply.ipp_esp_auth_alg) { 3778 return (B_FALSE); 3779 } 3780 3781 if (act1->ipa_act.ipa_apply.ipp_espa_minbits != 3782 act2->ipa_act.ipa_apply.ipp_espa_minbits || 3783 act1->ipa_act.ipa_apply.ipp_espa_maxbits != 3784 act2->ipa_act.ipa_apply.ipp_espa_maxbits) { 3785 return (B_FALSE); 3786 } 3787 } 3788 3789 } 3790 3791 act1 = act1->ipa_next; 3792 act2 = act2->ipa_next; 3793 } 3794 3795 if (act1 != NULL || act2 != NULL) { 3796 return (B_FALSE); 3797 } 3798 3799 return (B_TRUE); 3800 } 3801 3802 3803 /* 3804 * Given a constructed ipsec_policy_t policy rule, enter it into 3805 * the correct policy ruleset. 3806 * 3807 * ipsec_check_policy() is assumed to have succeeded first (to check for 3808 * duplicates). 3809 */ 3810 void 3811 ipsec_enter_policy(ipsec_policy_head_t *php, ipsec_policy_t *ipp, int direction, 3812 netstack_t *ns) 3813 { 3814 ipsec_policy_root_t *pr = &php->iph_root[direction]; 3815 ipsec_selkey_t *selkey = &ipp->ipsp_sel->ipsl_key; 3816 uint32_t valid = selkey->ipsl_valid; 3817 uint32_t hval = selkey->ipsl_pol_hval; 3818 int af = -1; 3819 3820 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3821 3822 if (valid & IPSL_IPV6) { 3823 ASSERT(!(valid & IPSL_IPV4)); 3824 af = IPSEC_AF_V6; 3825 } else { 3826 ASSERT(valid & IPSL_IPV4); 3827 af = IPSEC_AF_V4; 3828 } 3829 3830 php->iph_gen++; 3831 3832 if (hval == IPSEC_SEL_NOHASH) { 3833 HASHLIST_INSERT(ipp, ipsp_hash, pr->ipr_nonhash[af]); 3834 } else { 3835 HASH_LOCK(pr->ipr_hash, hval); 3836 HASH_INSERT(ipp, ipsp_hash, pr->ipr_hash, hval); 3837 HASH_UNLOCK(pr->ipr_hash, hval); 3838 } 3839 3840 ipsec_insert_always(&php->iph_rulebyid, ipp); 3841 3842 ipsec_update_present_flags(ns->netstack_ipsec); 3843 } 3844 3845 static void 3846 ipsec_ipr_flush(ipsec_policy_head_t *php, ipsec_policy_root_t *ipr, 3847 netstack_t *ns) 3848 { 3849 ipsec_policy_t *ip, *nip; 3850 int af, chain, nchain; 3851 3852 for (af = 0; af < IPSEC_NAF; af++) { 3853 for (ip = ipr->ipr_nonhash[af]; ip != NULL; ip = nip) { 3854 nip = ip->ipsp_hash.hash_next; 3855 IPPOL_UNCHAIN(php, ip, ns); 3856 } 3857 ipr->ipr_nonhash[af] = NULL; 3858 } 3859 nchain = ipr->ipr_nchains; 3860 3861 for (chain = 0; chain < nchain; chain++) { 3862 for (ip = ipr->ipr_hash[chain].hash_head; ip != NULL; 3863 ip = nip) { 3864 nip = ip->ipsp_hash.hash_next; 3865 IPPOL_UNCHAIN(php, ip, ns); 3866 } 3867 ipr->ipr_hash[chain].hash_head = NULL; 3868 } 3869 } 3870 3871 void 3872 ipsec_polhead_flush(ipsec_policy_head_t *php, netstack_t *ns) 3873 { 3874 int dir; 3875 3876 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3877 3878 for (dir = 0; dir < IPSEC_NTYPES; dir++) 3879 ipsec_ipr_flush(php, &php->iph_root[dir], ns); 3880 3881 ipsec_update_present_flags(ns->netstack_ipsec); 3882 } 3883 3884 void 3885 ipsec_polhead_free(ipsec_policy_head_t *php, netstack_t *ns) 3886 { 3887 int dir; 3888 3889 ASSERT(php->iph_refs == 0); 3890 3891 rw_enter(&php->iph_lock, RW_WRITER); 3892 ipsec_polhead_flush(php, ns); 3893 rw_exit(&php->iph_lock); 3894 rw_destroy(&php->iph_lock); 3895 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 3896 ipsec_policy_root_t *ipr = &php->iph_root[dir]; 3897 int chain; 3898 3899 for (chain = 0; chain < ipr->ipr_nchains; chain++) 3900 mutex_destroy(&(ipr->ipr_hash[chain].hash_lock)); 3901 3902 } 3903 ipsec_polhead_free_table(php); 3904 kmem_free(php, sizeof (*php)); 3905 } 3906 3907 static void 3908 ipsec_ipr_init(ipsec_policy_root_t *ipr) 3909 { 3910 int af; 3911 3912 ipr->ipr_nchains = 0; 3913 ipr->ipr_hash = NULL; 3914 3915 for (af = 0; af < IPSEC_NAF; af++) { 3916 ipr->ipr_nonhash[af] = NULL; 3917 } 3918 } 3919 3920 ipsec_policy_head_t * 3921 ipsec_polhead_create(void) 3922 { 3923 ipsec_policy_head_t *php; 3924 3925 php = kmem_alloc(sizeof (*php), KM_NOSLEEP); 3926 if (php == NULL) 3927 return (php); 3928 3929 rw_init(&php->iph_lock, NULL, RW_DEFAULT, NULL); 3930 php->iph_refs = 1; 3931 php->iph_gen = 0; 3932 3933 ipsec_ipr_init(&php->iph_root[IPSEC_TYPE_INBOUND]); 3934 ipsec_ipr_init(&php->iph_root[IPSEC_TYPE_OUTBOUND]); 3935 3936 avl_create(&php->iph_rulebyid, ipsec_policy_cmpbyid, 3937 sizeof (ipsec_policy_t), offsetof(ipsec_policy_t, ipsp_byid)); 3938 3939 return (php); 3940 } 3941 3942 /* 3943 * Clone the policy head into a new polhead; release one reference to the 3944 * old one and return the only reference to the new one. 3945 * If the old one had a refcount of 1, just return it. 3946 */ 3947 ipsec_policy_head_t * 3948 ipsec_polhead_split(ipsec_policy_head_t *php, netstack_t *ns) 3949 { 3950 ipsec_policy_head_t *nphp; 3951 3952 if (php == NULL) 3953 return (ipsec_polhead_create()); 3954 else if (php->iph_refs == 1) 3955 return (php); 3956 3957 nphp = ipsec_polhead_create(); 3958 if (nphp == NULL) 3959 return (NULL); 3960 3961 if (ipsec_copy_polhead(php, nphp, ns) != 0) { 3962 ipsec_polhead_free(nphp, ns); 3963 return (NULL); 3964 } 3965 IPPH_REFRELE(php, ns); 3966 return (nphp); 3967 } 3968 3969 /* 3970 * When sending a response to a ICMP request or generating a RST 3971 * in the TCP case, the outbound packets need to go at the same level 3972 * of protection as the incoming ones i.e we associate our outbound 3973 * policy with how the packet came in. We call this after we have 3974 * accepted the incoming packet which may or may not have been in 3975 * clear and hence we are sending the reply back with the policy 3976 * matching the incoming datagram's policy. 3977 * 3978 * NOTE : This technology serves two purposes : 3979 * 3980 * 1) If we have multiple outbound policies, we send out a reply 3981 * matching with how it came in rather than matching the outbound 3982 * policy. 3983 * 3984 * 2) For assymetric policies, we want to make sure that incoming 3985 * and outgoing has the same level of protection. Assymetric 3986 * policies exist only with global policy where we may not have 3987 * both outbound and inbound at the same time. 3988 * 3989 * NOTE2: This function is called by cleartext cases, so it needs to be 3990 * in IP proper. 3991 */ 3992 boolean_t 3993 ipsec_in_to_out(mblk_t *ipsec_mp, ipha_t *ipha, ip6_t *ip6h) 3994 { 3995 ipsec_in_t *ii; 3996 ipsec_out_t *io; 3997 boolean_t v4; 3998 mblk_t *mp; 3999 boolean_t secure, attach_if; 4000 uint_t ifindex; 4001 ipsec_selector_t sel; 4002 ipsec_action_t *reflect_action = NULL; 4003 zoneid_t zoneid; 4004 netstack_t *ns; 4005 4006 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 4007 4008 bzero((void*)&sel, sizeof (sel)); 4009 4010 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 4011 4012 mp = ipsec_mp->b_cont; 4013 ASSERT(mp != NULL); 4014 4015 if (ii->ipsec_in_action != NULL) { 4016 /* transfer reference.. */ 4017 reflect_action = ii->ipsec_in_action; 4018 ii->ipsec_in_action = NULL; 4019 } else if (!ii->ipsec_in_loopback) 4020 reflect_action = ipsec_in_to_out_action(ii); 4021 secure = ii->ipsec_in_secure; 4022 attach_if = ii->ipsec_in_attach_if; 4023 ifindex = ii->ipsec_in_ill_index; 4024 zoneid = ii->ipsec_in_zoneid; 4025 ASSERT(zoneid != ALL_ZONES); 4026 ns = ii->ipsec_in_ns; 4027 v4 = ii->ipsec_in_v4; 4028 4029 ipsec_in_release_refs(ii); /* No netstack_rele/hold needed */ 4030 4031 /* 4032 * The caller is going to send the datagram out which might 4033 * go on the wire or delivered locally through ip_wput_local. 4034 * 4035 * 1) If it goes out on the wire, new associations will be 4036 * obtained. 4037 * 2) If it is delivered locally, ip_wput_local will convert 4038 * this IPSEC_OUT to a IPSEC_IN looking at the requests. 4039 */ 4040 4041 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4042 bzero(io, sizeof (ipsec_out_t)); 4043 io->ipsec_out_type = IPSEC_OUT; 4044 io->ipsec_out_len = sizeof (ipsec_out_t); 4045 io->ipsec_out_frtn.free_func = ipsec_out_free; 4046 io->ipsec_out_frtn.free_arg = (char *)io; 4047 io->ipsec_out_act = reflect_action; 4048 4049 if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0, 4050 ns->netstack_ipsec)) 4051 return (B_FALSE); 4052 4053 io->ipsec_out_src_port = sel.ips_local_port; 4054 io->ipsec_out_dst_port = sel.ips_remote_port; 4055 io->ipsec_out_proto = sel.ips_protocol; 4056 io->ipsec_out_icmp_type = sel.ips_icmp_type; 4057 io->ipsec_out_icmp_code = sel.ips_icmp_code; 4058 4059 /* 4060 * Don't use global policy for this, as we want 4061 * to use the same protection that was applied to the inbound packet. 4062 */ 4063 io->ipsec_out_use_global_policy = B_FALSE; 4064 io->ipsec_out_proc_begin = B_FALSE; 4065 io->ipsec_out_secure = secure; 4066 io->ipsec_out_v4 = v4; 4067 io->ipsec_out_attach_if = attach_if; 4068 io->ipsec_out_ill_index = ifindex; 4069 io->ipsec_out_zoneid = zoneid; 4070 io->ipsec_out_ns = ns; /* No netstack_hold */ 4071 4072 return (B_TRUE); 4073 } 4074 4075 mblk_t * 4076 ipsec_in_tag(mblk_t *mp, mblk_t *cont, netstack_t *ns) 4077 { 4078 ipsec_in_t *ii = (ipsec_in_t *)mp->b_rptr; 4079 ipsec_in_t *nii; 4080 mblk_t *nmp; 4081 frtn_t nfrtn; 4082 ipsec_stack_t *ipss = ns->netstack_ipsec; 4083 4084 ASSERT(ii->ipsec_in_type == IPSEC_IN); 4085 ASSERT(ii->ipsec_in_len == sizeof (ipsec_in_t)); 4086 4087 nmp = ipsec_in_alloc(ii->ipsec_in_v4, ns); 4088 if (nmp == NULL) { 4089 ip_drop_packet_chain(cont, B_FALSE, NULL, NULL, 4090 DROPPER(ipss, ipds_spd_nomem), 4091 &ipss->ipsec_spd_dropper); 4092 return (NULL); 4093 } 4094 4095 ASSERT(nmp->b_datap->db_type == M_CTL); 4096 ASSERT(nmp->b_wptr == (nmp->b_rptr + sizeof (ipsec_info_t))); 4097 4098 /* 4099 * Bump refcounts. 4100 */ 4101 if (ii->ipsec_in_ah_sa != NULL) 4102 IPSA_REFHOLD(ii->ipsec_in_ah_sa); 4103 if (ii->ipsec_in_esp_sa != NULL) 4104 IPSA_REFHOLD(ii->ipsec_in_esp_sa); 4105 if (ii->ipsec_in_policy != NULL) 4106 IPPH_REFHOLD(ii->ipsec_in_policy); 4107 4108 /* 4109 * Copy everything, but preserve the free routine provided by 4110 * ipsec_in_alloc(). 4111 */ 4112 nii = (ipsec_in_t *)nmp->b_rptr; 4113 nfrtn = nii->ipsec_in_frtn; 4114 bcopy(ii, nii, sizeof (*ii)); 4115 nii->ipsec_in_frtn = nfrtn; 4116 4117 nmp->b_cont = cont; 4118 4119 return (nmp); 4120 } 4121 4122 mblk_t * 4123 ipsec_out_tag(mblk_t *mp, mblk_t *cont, netstack_t *ns) 4124 { 4125 ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; 4126 ipsec_out_t *nio; 4127 mblk_t *nmp; 4128 frtn_t nfrtn; 4129 ipsec_stack_t *ipss = ns->netstack_ipsec; 4130 4131 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4132 ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t)); 4133 4134 nmp = ipsec_alloc_ipsec_out(ns); 4135 if (nmp == NULL) { 4136 ip_drop_packet_chain(cont, B_FALSE, NULL, NULL, 4137 DROPPER(ipss, ipds_spd_nomem), 4138 &ipss->ipsec_spd_dropper); 4139 return (NULL); 4140 } 4141 ASSERT(nmp->b_datap->db_type == M_CTL); 4142 ASSERT(nmp->b_wptr == (nmp->b_rptr + sizeof (ipsec_info_t))); 4143 4144 /* 4145 * Bump refcounts. 4146 */ 4147 if (io->ipsec_out_ah_sa != NULL) 4148 IPSA_REFHOLD(io->ipsec_out_ah_sa); 4149 if (io->ipsec_out_esp_sa != NULL) 4150 IPSA_REFHOLD(io->ipsec_out_esp_sa); 4151 if (io->ipsec_out_polhead != NULL) 4152 IPPH_REFHOLD(io->ipsec_out_polhead); 4153 if (io->ipsec_out_policy != NULL) 4154 IPPOL_REFHOLD(io->ipsec_out_policy); 4155 if (io->ipsec_out_act != NULL) 4156 IPACT_REFHOLD(io->ipsec_out_act); 4157 if (io->ipsec_out_latch != NULL) 4158 IPLATCH_REFHOLD(io->ipsec_out_latch); 4159 if (io->ipsec_out_cred != NULL) 4160 crhold(io->ipsec_out_cred); 4161 4162 /* 4163 * Copy everything, but preserve the free routine provided by 4164 * ipsec_alloc_ipsec_out(). 4165 */ 4166 nio = (ipsec_out_t *)nmp->b_rptr; 4167 nfrtn = nio->ipsec_out_frtn; 4168 bcopy(io, nio, sizeof (*io)); 4169 nio->ipsec_out_frtn = nfrtn; 4170 4171 nmp->b_cont = cont; 4172 4173 return (nmp); 4174 } 4175 4176 static void 4177 ipsec_out_release_refs(ipsec_out_t *io) 4178 { 4179 netstack_t *ns = io->ipsec_out_ns; 4180 4181 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4182 ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t)); 4183 ASSERT(io->ipsec_out_ns != NULL); 4184 4185 /* Note: IPSA_REFRELE is multi-line macro */ 4186 if (io->ipsec_out_ah_sa != NULL) 4187 IPSA_REFRELE(io->ipsec_out_ah_sa); 4188 if (io->ipsec_out_esp_sa != NULL) 4189 IPSA_REFRELE(io->ipsec_out_esp_sa); 4190 if (io->ipsec_out_polhead != NULL) 4191 IPPH_REFRELE(io->ipsec_out_polhead, ns); 4192 if (io->ipsec_out_policy != NULL) 4193 IPPOL_REFRELE(io->ipsec_out_policy, ns); 4194 if (io->ipsec_out_act != NULL) 4195 IPACT_REFRELE(io->ipsec_out_act); 4196 if (io->ipsec_out_cred != NULL) { 4197 crfree(io->ipsec_out_cred); 4198 io->ipsec_out_cred = NULL; 4199 } 4200 if (io->ipsec_out_latch) { 4201 IPLATCH_REFRELE(io->ipsec_out_latch, ns); 4202 io->ipsec_out_latch = NULL; 4203 } 4204 } 4205 4206 static void 4207 ipsec_out_free(void *arg) 4208 { 4209 ipsec_out_t *io = (ipsec_out_t *)arg; 4210 ipsec_out_release_refs(io); 4211 kmem_cache_free(ipsec_info_cache, arg); 4212 } 4213 4214 static void 4215 ipsec_in_release_refs(ipsec_in_t *ii) 4216 { 4217 netstack_t *ns = ii->ipsec_in_ns; 4218 4219 ASSERT(ii->ipsec_in_ns != NULL); 4220 4221 /* Note: IPSA_REFRELE is multi-line macro */ 4222 if (ii->ipsec_in_ah_sa != NULL) 4223 IPSA_REFRELE(ii->ipsec_in_ah_sa); 4224 if (ii->ipsec_in_esp_sa != NULL) 4225 IPSA_REFRELE(ii->ipsec_in_esp_sa); 4226 if (ii->ipsec_in_policy != NULL) 4227 IPPH_REFRELE(ii->ipsec_in_policy, ns); 4228 if (ii->ipsec_in_da != NULL) { 4229 freeb(ii->ipsec_in_da); 4230 ii->ipsec_in_da = NULL; 4231 } 4232 } 4233 4234 static void 4235 ipsec_in_free(void *arg) 4236 { 4237 ipsec_in_t *ii = (ipsec_in_t *)arg; 4238 ipsec_in_release_refs(ii); 4239 kmem_cache_free(ipsec_info_cache, arg); 4240 } 4241 4242 /* 4243 * This is called only for outbound datagrams if the datagram needs to 4244 * go out secure. A NULL mp can be passed to get an ipsec_out. This 4245 * facility is used by ip_unbind. 4246 * 4247 * NOTE : o As the data part could be modified by ipsec_out_process etc. 4248 * we can't make it fast by calling a dup. 4249 */ 4250 mblk_t * 4251 ipsec_alloc_ipsec_out(netstack_t *ns) 4252 { 4253 mblk_t *ipsec_mp; 4254 ipsec_out_t *io = kmem_cache_alloc(ipsec_info_cache, KM_NOSLEEP); 4255 4256 if (io == NULL) 4257 return (NULL); 4258 4259 bzero(io, sizeof (ipsec_out_t)); 4260 4261 io->ipsec_out_type = IPSEC_OUT; 4262 io->ipsec_out_len = sizeof (ipsec_out_t); 4263 io->ipsec_out_frtn.free_func = ipsec_out_free; 4264 io->ipsec_out_frtn.free_arg = (char *)io; 4265 4266 /* 4267 * Set the zoneid to ALL_ZONES which is used as an invalid value. Code 4268 * using ipsec_out_zoneid should assert that the zoneid has been set to 4269 * a sane value. 4270 */ 4271 io->ipsec_out_zoneid = ALL_ZONES; 4272 io->ipsec_out_ns = ns; /* No netstack_hold */ 4273 4274 ipsec_mp = desballoc((uint8_t *)io, sizeof (ipsec_info_t), BPRI_HI, 4275 &io->ipsec_out_frtn); 4276 if (ipsec_mp == NULL) { 4277 ipsec_out_free(io); 4278 4279 return (NULL); 4280 } 4281 ipsec_mp->b_datap->db_type = M_CTL; 4282 ipsec_mp->b_wptr = ipsec_mp->b_rptr + sizeof (ipsec_info_t); 4283 4284 return (ipsec_mp); 4285 } 4286 4287 /* 4288 * Attach an IPSEC_OUT; use pol for policy if it is non-null. 4289 * Otherwise initialize using conn. 4290 * 4291 * If pol is non-null, we consume a reference to it. 4292 */ 4293 mblk_t * 4294 ipsec_attach_ipsec_out(mblk_t **mp, conn_t *connp, ipsec_policy_t *pol, 4295 uint8_t proto, netstack_t *ns) 4296 { 4297 mblk_t *ipsec_mp; 4298 ipsec_stack_t *ipss = ns->netstack_ipsec; 4299 4300 ASSERT((pol != NULL) || (connp != NULL)); 4301 4302 ipsec_mp = ipsec_alloc_ipsec_out(ns); 4303 if (ipsec_mp == NULL) { 4304 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, SL_ERROR|SL_NOTE, 4305 "ipsec_attach_ipsec_out: Allocation failure\n"); 4306 ip_drop_packet(*mp, B_FALSE, NULL, NULL, 4307 DROPPER(ipss, ipds_spd_nomem), 4308 &ipss->ipsec_spd_dropper); 4309 *mp = NULL; 4310 return (NULL); 4311 } 4312 ipsec_mp->b_cont = *mp; 4313 /* 4314 * If *mp is NULL, ipsec_init_ipsec_out() won't/should not be using it. 4315 */ 4316 return (ipsec_init_ipsec_out(ipsec_mp, mp, connp, pol, proto, ns)); 4317 } 4318 4319 /* 4320 * Initialize the IPSEC_OUT (ipsec_mp) using pol if it is non-null. 4321 * Otherwise initialize using conn. 4322 * 4323 * If pol is non-null, we consume a reference to it. 4324 */ 4325 mblk_t * 4326 ipsec_init_ipsec_out(mblk_t *ipsec_mp, mblk_t **mp, conn_t *connp, 4327 ipsec_policy_t *pol, uint8_t proto, netstack_t *ns) 4328 { 4329 ipsec_out_t *io; 4330 ipsec_policy_t *p; 4331 ipha_t *ipha; 4332 ip6_t *ip6h; 4333 ipsec_stack_t *ipss = ns->netstack_ipsec; 4334 4335 ASSERT(ipsec_mp->b_cont == *mp); 4336 4337 ASSERT((pol != NULL) || (connp != NULL)); 4338 4339 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 4340 ASSERT(ipsec_mp->b_wptr == (ipsec_mp->b_rptr + sizeof (ipsec_info_t))); 4341 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4342 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4343 ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t)); 4344 io->ipsec_out_latch = NULL; 4345 /* 4346 * Set the zoneid when we have the connp. 4347 * Otherwise, we're called from ip_wput_attach_policy() who will take 4348 * care of setting the zoneid. 4349 */ 4350 if (connp != NULL) 4351 io->ipsec_out_zoneid = connp->conn_zoneid; 4352 4353 io->ipsec_out_ns = ns; /* No netstack_hold */ 4354 4355 if (*mp != NULL) { 4356 ipha = (ipha_t *)(*mp)->b_rptr; 4357 if (IPH_HDR_VERSION(ipha) == IP_VERSION) { 4358 io->ipsec_out_v4 = B_TRUE; 4359 ip6h = NULL; 4360 } else { 4361 io->ipsec_out_v4 = B_FALSE; 4362 ip6h = (ip6_t *)ipha; 4363 ipha = NULL; 4364 } 4365 } else { 4366 ASSERT(connp != NULL && connp->conn_policy_cached); 4367 ip6h = NULL; 4368 ipha = NULL; 4369 io->ipsec_out_v4 = !connp->conn_pkt_isv6; 4370 } 4371 4372 p = NULL; 4373 4374 /* 4375 * Take latched policies over global policy. Check here again for 4376 * this, in case we had conn_latch set while the packet was flying 4377 * around in IP. 4378 */ 4379 if (connp != NULL && connp->conn_latch != NULL) { 4380 ASSERT(ns == connp->conn_netstack); 4381 p = connp->conn_latch->ipl_out_policy; 4382 io->ipsec_out_latch = connp->conn_latch; 4383 IPLATCH_REFHOLD(connp->conn_latch); 4384 if (p != NULL) { 4385 IPPOL_REFHOLD(p); 4386 } 4387 io->ipsec_out_src_port = connp->conn_lport; 4388 io->ipsec_out_dst_port = connp->conn_fport; 4389 io->ipsec_out_icmp_type = io->ipsec_out_icmp_code = 0; 4390 if (pol != NULL) 4391 IPPOL_REFRELE(pol, ns); 4392 } else if (pol != NULL) { 4393 ipsec_selector_t sel; 4394 4395 bzero((void*)&sel, sizeof (sel)); 4396 4397 p = pol; 4398 /* 4399 * conn does not have the port information. Get 4400 * it from the packet. 4401 */ 4402 4403 if (!ipsec_init_outbound_ports(&sel, *mp, ipha, ip6h, 0, 4404 ns->netstack_ipsec)) { 4405 /* Callee did ip_drop_packet() on *mp. */ 4406 *mp = NULL; 4407 freeb(ipsec_mp); 4408 return (NULL); 4409 } 4410 io->ipsec_out_src_port = sel.ips_local_port; 4411 io->ipsec_out_dst_port = sel.ips_remote_port; 4412 io->ipsec_out_icmp_type = sel.ips_icmp_type; 4413 io->ipsec_out_icmp_code = sel.ips_icmp_code; 4414 } 4415 4416 io->ipsec_out_proto = proto; 4417 io->ipsec_out_use_global_policy = B_TRUE; 4418 io->ipsec_out_secure = (p != NULL); 4419 io->ipsec_out_policy = p; 4420 4421 if (p == NULL) { 4422 if (connp->conn_policy != NULL) { 4423 io->ipsec_out_secure = B_TRUE; 4424 ASSERT(io->ipsec_out_latch == NULL); 4425 ASSERT(io->ipsec_out_use_global_policy == B_TRUE); 4426 io->ipsec_out_need_policy = B_TRUE; 4427 ASSERT(io->ipsec_out_polhead == NULL); 4428 IPPH_REFHOLD(connp->conn_policy); 4429 io->ipsec_out_polhead = connp->conn_policy; 4430 } 4431 } else { 4432 /* Handle explicit drop action. */ 4433 if (p->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_DISCARD || 4434 p->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_REJECT) { 4435 ip_drop_packet(ipsec_mp, B_FALSE, NULL, NULL, 4436 DROPPER(ipss, ipds_spd_explicit), 4437 &ipss->ipsec_spd_dropper); 4438 *mp = NULL; 4439 ipsec_mp = NULL; 4440 } 4441 } 4442 4443 return (ipsec_mp); 4444 } 4445 4446 /* 4447 * Allocate an IPSEC_IN mblk. This will be prepended to an inbound datagram 4448 * and keep track of what-if-any IPsec processing will be applied to the 4449 * datagram. 4450 */ 4451 mblk_t * 4452 ipsec_in_alloc(boolean_t isv4, netstack_t *ns) 4453 { 4454 mblk_t *ipsec_in; 4455 ipsec_in_t *ii = kmem_cache_alloc(ipsec_info_cache, KM_NOSLEEP); 4456 4457 if (ii == NULL) 4458 return (NULL); 4459 4460 bzero(ii, sizeof (ipsec_info_t)); 4461 ii->ipsec_in_type = IPSEC_IN; 4462 ii->ipsec_in_len = sizeof (ipsec_in_t); 4463 4464 ii->ipsec_in_v4 = isv4; 4465 ii->ipsec_in_secure = B_TRUE; 4466 ii->ipsec_in_ns = ns; /* No netstack_hold */ 4467 4468 ii->ipsec_in_frtn.free_func = ipsec_in_free; 4469 ii->ipsec_in_frtn.free_arg = (char *)ii; 4470 4471 ipsec_in = desballoc((uint8_t *)ii, sizeof (ipsec_info_t), BPRI_HI, 4472 &ii->ipsec_in_frtn); 4473 if (ipsec_in == NULL) { 4474 ip1dbg(("ipsec_in_alloc: IPSEC_IN allocation failure.\n")); 4475 ipsec_in_free(ii); 4476 return (NULL); 4477 } 4478 4479 ipsec_in->b_datap->db_type = M_CTL; 4480 ipsec_in->b_wptr += sizeof (ipsec_info_t); 4481 4482 return (ipsec_in); 4483 } 4484 4485 /* 4486 * This is called from ip_wput_local when a packet which needs 4487 * security is looped back, to convert the IPSEC_OUT to a IPSEC_IN 4488 * before fanout, where the policy check happens. In most of the 4489 * cases, IPSEC processing has *never* been done. There is one case 4490 * (ip_wput_ire_fragmentit -> ip_wput_frag -> icmp_frag_needed) where 4491 * the packet is destined for localhost, IPSEC processing has already 4492 * been done. 4493 * 4494 * Future: This could happen after SA selection has occurred for 4495 * outbound.. which will tell us who the src and dst identities are.. 4496 * Then it's just a matter of splicing the ah/esp SA pointers from the 4497 * ipsec_out_t to the ipsec_in_t. 4498 */ 4499 void 4500 ipsec_out_to_in(mblk_t *ipsec_mp) 4501 { 4502 ipsec_in_t *ii; 4503 ipsec_out_t *io; 4504 ipsec_policy_t *pol; 4505 ipsec_action_t *act; 4506 boolean_t v4, icmp_loopback; 4507 zoneid_t zoneid; 4508 netstack_t *ns; 4509 4510 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 4511 4512 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4513 4514 v4 = io->ipsec_out_v4; 4515 zoneid = io->ipsec_out_zoneid; 4516 icmp_loopback = io->ipsec_out_icmp_loopback; 4517 ns = io->ipsec_out_ns; 4518 4519 act = io->ipsec_out_act; 4520 if (act == NULL) { 4521 pol = io->ipsec_out_policy; 4522 if (pol != NULL) { 4523 act = pol->ipsp_act; 4524 IPACT_REFHOLD(act); 4525 } 4526 } 4527 io->ipsec_out_act = NULL; 4528 4529 ipsec_out_release_refs(io); /* No netstack_rele/hold needed */ 4530 4531 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 4532 bzero(ii, sizeof (ipsec_in_t)); 4533 ii->ipsec_in_type = IPSEC_IN; 4534 ii->ipsec_in_len = sizeof (ipsec_in_t); 4535 ii->ipsec_in_loopback = B_TRUE; 4536 ii->ipsec_in_ns = ns; /* No netstack_hold */ 4537 4538 ii->ipsec_in_frtn.free_func = ipsec_in_free; 4539 ii->ipsec_in_frtn.free_arg = (char *)ii; 4540 ii->ipsec_in_action = act; 4541 ii->ipsec_in_zoneid = zoneid; 4542 4543 /* 4544 * In most of the cases, we can't look at the ipsec_out_XXX_sa 4545 * because this never went through IPSEC processing. So, look at 4546 * the requests and infer whether it would have gone through 4547 * IPSEC processing or not. Initialize the "done" fields with 4548 * the requests. The possible values for "done" fields are : 4549 * 4550 * 1) zero, indicates that a particular preference was never 4551 * requested. 4552 * 2) non-zero, indicates that it could be IPSEC_PREF_REQUIRED/ 4553 * IPSEC_PREF_NEVER. If IPSEC_REQ_DONE is set, it means that 4554 * IPSEC processing has been completed. 4555 */ 4556 ii->ipsec_in_secure = B_TRUE; 4557 ii->ipsec_in_v4 = v4; 4558 ii->ipsec_in_icmp_loopback = icmp_loopback; 4559 ii->ipsec_in_attach_if = B_FALSE; 4560 } 4561 4562 /* 4563 * Consults global policy to see whether this datagram should 4564 * go out secure. If so it attaches a ipsec_mp in front and 4565 * returns. 4566 */ 4567 mblk_t * 4568 ip_wput_attach_policy(mblk_t *ipsec_mp, ipha_t *ipha, ip6_t *ip6h, ire_t *ire, 4569 conn_t *connp, boolean_t unspec_src, zoneid_t zoneid) 4570 { 4571 mblk_t *mp; 4572 ipsec_out_t *io = NULL; 4573 ipsec_selector_t sel; 4574 uint_t ill_index; 4575 boolean_t conn_dontroutex; 4576 boolean_t conn_multicast_loopx; 4577 boolean_t policy_present; 4578 ip_stack_t *ipst = ire->ire_ipst; 4579 netstack_t *ns = ipst->ips_netstack; 4580 ipsec_stack_t *ipss = ns->netstack_ipsec; 4581 4582 ASSERT((ipha != NULL && ip6h == NULL) || 4583 (ip6h != NULL && ipha == NULL)); 4584 4585 bzero((void*)&sel, sizeof (sel)); 4586 4587 if (ipha != NULL) 4588 policy_present = ipss->ipsec_outbound_v4_policy_present; 4589 else 4590 policy_present = ipss->ipsec_outbound_v6_policy_present; 4591 /* 4592 * Fast Path to see if there is any policy. 4593 */ 4594 if (!policy_present) { 4595 if (ipsec_mp->b_datap->db_type == M_CTL) { 4596 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4597 if (!io->ipsec_out_secure) { 4598 /* 4599 * If there is no global policy and ip_wput 4600 * or ip_wput_multicast has attached this mp 4601 * for multicast case, free the ipsec_mp and 4602 * return the original mp. 4603 */ 4604 mp = ipsec_mp->b_cont; 4605 freeb(ipsec_mp); 4606 ipsec_mp = mp; 4607 io = NULL; 4608 } 4609 ASSERT(io == NULL || !io->ipsec_out_tunnel); 4610 } 4611 if (((io == NULL) || (io->ipsec_out_polhead == NULL)) && 4612 ((connp == NULL) || (connp->conn_policy == NULL))) 4613 return (ipsec_mp); 4614 } 4615 4616 ill_index = 0; 4617 conn_multicast_loopx = conn_dontroutex = B_FALSE; 4618 mp = ipsec_mp; 4619 if (ipsec_mp->b_datap->db_type == M_CTL) { 4620 mp = ipsec_mp->b_cont; 4621 /* 4622 * This is a connection where we have some per-socket 4623 * policy or ip_wput has attached an ipsec_mp for 4624 * the multicast datagram. 4625 */ 4626 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4627 if (!io->ipsec_out_secure) { 4628 /* 4629 * This ipsec_mp was allocated in ip_wput or 4630 * ip_wput_multicast so that we will know the 4631 * value of ill_index, conn_dontroute, 4632 * conn_multicast_loop in the multicast case if 4633 * we inherit global policy here. 4634 */ 4635 ill_index = io->ipsec_out_ill_index; 4636 conn_dontroutex = io->ipsec_out_dontroute; 4637 conn_multicast_loopx = io->ipsec_out_multicast_loop; 4638 freeb(ipsec_mp); 4639 ipsec_mp = mp; 4640 io = NULL; 4641 } 4642 ASSERT(io == NULL || !io->ipsec_out_tunnel); 4643 } 4644 4645 if (ipha != NULL) { 4646 sel.ips_local_addr_v4 = (ipha->ipha_src != 0 ? 4647 ipha->ipha_src : ire->ire_src_addr); 4648 sel.ips_remote_addr_v4 = ip_get_dst(ipha); 4649 sel.ips_protocol = (uint8_t)ipha->ipha_protocol; 4650 sel.ips_isv4 = B_TRUE; 4651 } else { 4652 ushort_t hdr_len; 4653 uint8_t *nexthdrp; 4654 boolean_t is_fragment; 4655 4656 sel.ips_isv4 = B_FALSE; 4657 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4658 if (!unspec_src) 4659 sel.ips_local_addr_v6 = ire->ire_src_addr_v6; 4660 } else { 4661 sel.ips_local_addr_v6 = ip6h->ip6_src; 4662 } 4663 4664 sel.ips_remote_addr_v6 = ip_get_dst_v6(ip6h, &is_fragment); 4665 if (is_fragment) { 4666 /* 4667 * It's a packet fragment for a packet that 4668 * we have already processed (since IPsec processing 4669 * is done before fragmentation), so we don't 4670 * have to do policy checks again. Fragments can 4671 * come back to us for processing if they have 4672 * been queued up due to flow control. 4673 */ 4674 if (ipsec_mp->b_datap->db_type == M_CTL) { 4675 mp = ipsec_mp->b_cont; 4676 freeb(ipsec_mp); 4677 ipsec_mp = mp; 4678 } 4679 return (ipsec_mp); 4680 } 4681 4682 /* IPv6 common-case. */ 4683 sel.ips_protocol = ip6h->ip6_nxt; 4684 switch (ip6h->ip6_nxt) { 4685 case IPPROTO_TCP: 4686 case IPPROTO_UDP: 4687 case IPPROTO_SCTP: 4688 case IPPROTO_ICMPV6: 4689 break; 4690 default: 4691 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 4692 &hdr_len, &nexthdrp)) { 4693 BUMP_MIB(&ipst->ips_ip6_mib, 4694 ipIfStatsOutDiscards); 4695 freemsg(ipsec_mp); /* Not IPsec-related drop. */ 4696 return (NULL); 4697 } 4698 sel.ips_protocol = *nexthdrp; 4699 break; 4700 } 4701 } 4702 4703 if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0, ipss)) { 4704 if (ipha != NULL) { 4705 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 4706 } else { 4707 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 4708 } 4709 4710 /* Callee dropped the packet. */ 4711 return (NULL); 4712 } 4713 4714 if (io != NULL) { 4715 /* 4716 * We seem to have some local policy (we already have 4717 * an ipsec_out). Look at global policy and see 4718 * whether we have to inherit or not. 4719 */ 4720 io->ipsec_out_need_policy = B_FALSE; 4721 ipsec_mp = ipsec_apply_global_policy(ipsec_mp, connp, 4722 &sel, ns); 4723 ASSERT((io->ipsec_out_policy != NULL) || 4724 (io->ipsec_out_act != NULL)); 4725 ASSERT(io->ipsec_out_need_policy == B_FALSE); 4726 return (ipsec_mp); 4727 } 4728 /* 4729 * We pass in a pointer to a pointer because mp can become 4730 * NULL due to allocation failures or explicit drops. Callers 4731 * of this function should assume a NULL mp means the packet 4732 * was dropped. 4733 */ 4734 ipsec_mp = ipsec_attach_global_policy(&mp, connp, &sel, ns); 4735 if (ipsec_mp == NULL) 4736 return (mp); 4737 4738 /* 4739 * Copy the right port information. 4740 */ 4741 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 4742 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4743 4744 ASSERT(io->ipsec_out_need_policy == B_FALSE); 4745 ASSERT((io->ipsec_out_policy != NULL) || 4746 (io->ipsec_out_act != NULL)); 4747 io->ipsec_out_src_port = sel.ips_local_port; 4748 io->ipsec_out_dst_port = sel.ips_remote_port; 4749 io->ipsec_out_icmp_type = sel.ips_icmp_type; 4750 io->ipsec_out_icmp_code = sel.ips_icmp_code; 4751 /* 4752 * Set ill_index, conn_dontroute and conn_multicast_loop 4753 * for multicast datagrams. 4754 */ 4755 io->ipsec_out_ill_index = ill_index; 4756 io->ipsec_out_dontroute = conn_dontroutex; 4757 io->ipsec_out_multicast_loop = conn_multicast_loopx; 4758 4759 if (zoneid == ALL_ZONES) 4760 zoneid = GLOBAL_ZONEID; 4761 io->ipsec_out_zoneid = zoneid; 4762 return (ipsec_mp); 4763 } 4764 4765 /* 4766 * When appropriate, this function caches inbound and outbound policy 4767 * for this connection. 4768 * 4769 * XXX need to work out more details about per-interface policy and 4770 * caching here! 4771 * 4772 * XXX may want to split inbound and outbound caching for ill.. 4773 */ 4774 int 4775 ipsec_conn_cache_policy(conn_t *connp, boolean_t isv4) 4776 { 4777 boolean_t global_policy_present; 4778 netstack_t *ns = connp->conn_netstack; 4779 ipsec_stack_t *ipss = ns->netstack_ipsec; 4780 4781 /* 4782 * There is no policy latching for ICMP sockets because we can't 4783 * decide on which policy to use until we see the packet and get 4784 * type/code selectors. 4785 */ 4786 if (connp->conn_ulp == IPPROTO_ICMP || 4787 connp->conn_ulp == IPPROTO_ICMPV6) { 4788 connp->conn_in_enforce_policy = 4789 connp->conn_out_enforce_policy = B_TRUE; 4790 if (connp->conn_latch != NULL) { 4791 IPLATCH_REFRELE(connp->conn_latch, ns); 4792 connp->conn_latch = NULL; 4793 } 4794 connp->conn_flags |= IPCL_CHECK_POLICY; 4795 return (0); 4796 } 4797 4798 global_policy_present = isv4 ? 4799 (ipss->ipsec_outbound_v4_policy_present || 4800 ipss->ipsec_inbound_v4_policy_present) : 4801 (ipss->ipsec_outbound_v6_policy_present || 4802 ipss->ipsec_inbound_v6_policy_present); 4803 4804 if ((connp->conn_policy != NULL) || global_policy_present) { 4805 ipsec_selector_t sel; 4806 ipsec_policy_t *p; 4807 4808 if (connp->conn_latch == NULL && 4809 (connp->conn_latch = iplatch_create()) == NULL) { 4810 return (ENOMEM); 4811 } 4812 4813 sel.ips_protocol = connp->conn_ulp; 4814 sel.ips_local_port = connp->conn_lport; 4815 sel.ips_remote_port = connp->conn_fport; 4816 sel.ips_is_icmp_inv_acq = 0; 4817 sel.ips_isv4 = isv4; 4818 if (isv4) { 4819 sel.ips_local_addr_v4 = connp->conn_src; 4820 sel.ips_remote_addr_v4 = connp->conn_rem; 4821 } else { 4822 sel.ips_local_addr_v6 = connp->conn_srcv6; 4823 sel.ips_remote_addr_v6 = connp->conn_remv6; 4824 } 4825 4826 p = ipsec_find_policy(IPSEC_TYPE_INBOUND, connp, NULL, &sel, 4827 ns); 4828 if (connp->conn_latch->ipl_in_policy != NULL) 4829 IPPOL_REFRELE(connp->conn_latch->ipl_in_policy, ns); 4830 connp->conn_latch->ipl_in_policy = p; 4831 connp->conn_in_enforce_policy = (p != NULL); 4832 4833 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, &sel, 4834 ns); 4835 if (connp->conn_latch->ipl_out_policy != NULL) 4836 IPPOL_REFRELE(connp->conn_latch->ipl_out_policy, ns); 4837 connp->conn_latch->ipl_out_policy = p; 4838 connp->conn_out_enforce_policy = (p != NULL); 4839 4840 /* Clear the latched actions too, in case we're recaching. */ 4841 if (connp->conn_latch->ipl_out_action != NULL) 4842 IPACT_REFRELE(connp->conn_latch->ipl_out_action); 4843 if (connp->conn_latch->ipl_in_action != NULL) 4844 IPACT_REFRELE(connp->conn_latch->ipl_in_action); 4845 } 4846 4847 /* 4848 * We may or may not have policy for this endpoint. We still set 4849 * conn_policy_cached so that inbound datagrams don't have to look 4850 * at global policy as policy is considered latched for these 4851 * endpoints. We should not set conn_policy_cached until the conn 4852 * reflects the actual policy. If we *set* this before inheriting 4853 * the policy there is a window where the check 4854 * CONN_INBOUND_POLICY_PRESENT, will neither check with the policy 4855 * on the conn (because we have not yet copied the policy on to 4856 * conn and hence not set conn_in_enforce_policy) nor with the 4857 * global policy (because conn_policy_cached is already set). 4858 */ 4859 connp->conn_policy_cached = B_TRUE; 4860 if (connp->conn_in_enforce_policy) 4861 connp->conn_flags |= IPCL_CHECK_POLICY; 4862 return (0); 4863 } 4864 4865 void 4866 iplatch_free(ipsec_latch_t *ipl, netstack_t *ns) 4867 { 4868 if (ipl->ipl_out_policy != NULL) 4869 IPPOL_REFRELE(ipl->ipl_out_policy, ns); 4870 if (ipl->ipl_in_policy != NULL) 4871 IPPOL_REFRELE(ipl->ipl_in_policy, ns); 4872 if (ipl->ipl_in_action != NULL) 4873 IPACT_REFRELE(ipl->ipl_in_action); 4874 if (ipl->ipl_out_action != NULL) 4875 IPACT_REFRELE(ipl->ipl_out_action); 4876 if (ipl->ipl_local_cid != NULL) 4877 IPSID_REFRELE(ipl->ipl_local_cid); 4878 if (ipl->ipl_remote_cid != NULL) 4879 IPSID_REFRELE(ipl->ipl_remote_cid); 4880 if (ipl->ipl_local_id != NULL) 4881 crfree(ipl->ipl_local_id); 4882 mutex_destroy(&ipl->ipl_lock); 4883 kmem_free(ipl, sizeof (*ipl)); 4884 } 4885 4886 ipsec_latch_t * 4887 iplatch_create() 4888 { 4889 ipsec_latch_t *ipl = kmem_alloc(sizeof (*ipl), KM_NOSLEEP); 4890 if (ipl == NULL) 4891 return (ipl); 4892 bzero(ipl, sizeof (*ipl)); 4893 mutex_init(&ipl->ipl_lock, NULL, MUTEX_DEFAULT, NULL); 4894 ipl->ipl_refcnt = 1; 4895 return (ipl); 4896 } 4897 4898 /* 4899 * Hash function for ID hash table. 4900 */ 4901 static uint32_t 4902 ipsid_hash(int idtype, char *idstring) 4903 { 4904 uint32_t hval = idtype; 4905 unsigned char c; 4906 4907 while ((c = *idstring++) != 0) { 4908 hval = (hval << 4) | (hval >> 28); 4909 hval ^= c; 4910 } 4911 hval = hval ^ (hval >> 16); 4912 return (hval & (IPSID_HASHSIZE-1)); 4913 } 4914 4915 /* 4916 * Look up identity string in hash table. Return identity object 4917 * corresponding to the name -- either preexisting, or newly allocated. 4918 * 4919 * Return NULL if we need to allocate a new one and can't get memory. 4920 */ 4921 ipsid_t * 4922 ipsid_lookup(int idtype, char *idstring, netstack_t *ns) 4923 { 4924 ipsid_t *retval; 4925 char *nstr; 4926 int idlen = strlen(idstring) + 1; 4927 ipsec_stack_t *ipss = ns->netstack_ipsec; 4928 ipsif_t *bucket; 4929 4930 bucket = &ipss->ipsec_ipsid_buckets[ipsid_hash(idtype, idstring)]; 4931 4932 mutex_enter(&bucket->ipsif_lock); 4933 4934 for (retval = bucket->ipsif_head; retval != NULL; 4935 retval = retval->ipsid_next) { 4936 if (idtype != retval->ipsid_type) 4937 continue; 4938 if (bcmp(idstring, retval->ipsid_cid, idlen) != 0) 4939 continue; 4940 4941 IPSID_REFHOLD(retval); 4942 mutex_exit(&bucket->ipsif_lock); 4943 return (retval); 4944 } 4945 4946 retval = kmem_alloc(sizeof (*retval), KM_NOSLEEP); 4947 if (!retval) { 4948 mutex_exit(&bucket->ipsif_lock); 4949 return (NULL); 4950 } 4951 4952 nstr = kmem_alloc(idlen, KM_NOSLEEP); 4953 if (!nstr) { 4954 mutex_exit(&bucket->ipsif_lock); 4955 kmem_free(retval, sizeof (*retval)); 4956 return (NULL); 4957 } 4958 4959 retval->ipsid_refcnt = 1; 4960 retval->ipsid_next = bucket->ipsif_head; 4961 if (retval->ipsid_next != NULL) 4962 retval->ipsid_next->ipsid_ptpn = &retval->ipsid_next; 4963 retval->ipsid_ptpn = &bucket->ipsif_head; 4964 retval->ipsid_type = idtype; 4965 retval->ipsid_cid = nstr; 4966 bucket->ipsif_head = retval; 4967 bcopy(idstring, nstr, idlen); 4968 mutex_exit(&bucket->ipsif_lock); 4969 4970 return (retval); 4971 } 4972 4973 /* 4974 * Garbage collect the identity hash table. 4975 */ 4976 void 4977 ipsid_gc(netstack_t *ns) 4978 { 4979 int i, len; 4980 ipsid_t *id, *nid; 4981 ipsif_t *bucket; 4982 ipsec_stack_t *ipss = ns->netstack_ipsec; 4983 4984 for (i = 0; i < IPSID_HASHSIZE; i++) { 4985 bucket = &ipss->ipsec_ipsid_buckets[i]; 4986 mutex_enter(&bucket->ipsif_lock); 4987 for (id = bucket->ipsif_head; id != NULL; id = nid) { 4988 nid = id->ipsid_next; 4989 if (id->ipsid_refcnt == 0) { 4990 *id->ipsid_ptpn = nid; 4991 if (nid != NULL) 4992 nid->ipsid_ptpn = id->ipsid_ptpn; 4993 len = strlen(id->ipsid_cid) + 1; 4994 kmem_free(id->ipsid_cid, len); 4995 kmem_free(id, sizeof (*id)); 4996 } 4997 } 4998 mutex_exit(&bucket->ipsif_lock); 4999 } 5000 } 5001 5002 /* 5003 * Return true if two identities are the same. 5004 */ 5005 boolean_t 5006 ipsid_equal(ipsid_t *id1, ipsid_t *id2) 5007 { 5008 if (id1 == id2) 5009 return (B_TRUE); 5010 #ifdef DEBUG 5011 if ((id1 == NULL) || (id2 == NULL)) 5012 return (B_FALSE); 5013 /* 5014 * test that we're interning id's correctly.. 5015 */ 5016 ASSERT((strcmp(id1->ipsid_cid, id2->ipsid_cid) != 0) || 5017 (id1->ipsid_type != id2->ipsid_type)); 5018 #endif 5019 return (B_FALSE); 5020 } 5021 5022 /* 5023 * Initialize identity table; called during module initialization. 5024 */ 5025 static void 5026 ipsid_init(netstack_t *ns) 5027 { 5028 ipsif_t *bucket; 5029 int i; 5030 ipsec_stack_t *ipss = ns->netstack_ipsec; 5031 5032 for (i = 0; i < IPSID_HASHSIZE; i++) { 5033 bucket = &ipss->ipsec_ipsid_buckets[i]; 5034 mutex_init(&bucket->ipsif_lock, NULL, MUTEX_DEFAULT, NULL); 5035 } 5036 } 5037 5038 /* 5039 * Free identity table (preparatory to module unload) 5040 */ 5041 static void 5042 ipsid_fini(netstack_t *ns) 5043 { 5044 ipsif_t *bucket; 5045 int i; 5046 ipsec_stack_t *ipss = ns->netstack_ipsec; 5047 5048 for (i = 0; i < IPSID_HASHSIZE; i++) { 5049 bucket = &ipss->ipsec_ipsid_buckets[i]; 5050 ASSERT(bucket->ipsif_head == NULL); 5051 mutex_destroy(&bucket->ipsif_lock); 5052 } 5053 } 5054 5055 /* 5056 * Update the minimum and maximum supported key sizes for the 5057 * specified algorithm. Must be called while holding the algorithms lock. 5058 */ 5059 void 5060 ipsec_alg_fix_min_max(ipsec_alginfo_t *alg, ipsec_algtype_t alg_type, 5061 netstack_t *ns) 5062 { 5063 size_t crypto_min = (size_t)-1, crypto_max = 0; 5064 size_t cur_crypto_min, cur_crypto_max; 5065 boolean_t is_valid; 5066 crypto_mechanism_info_t *mech_infos; 5067 uint_t nmech_infos; 5068 int crypto_rc, i; 5069 crypto_mech_usage_t mask; 5070 ipsec_stack_t *ipss = ns->netstack_ipsec; 5071 5072 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 5073 5074 /* 5075 * Compute the min, max, and default key sizes (in number of 5076 * increments to the default key size in bits) as defined 5077 * by the algorithm mappings. This range of key sizes is used 5078 * for policy related operations. The effective key sizes 5079 * supported by the framework could be more limited than 5080 * those defined for an algorithm. 5081 */ 5082 alg->alg_default_bits = alg->alg_key_sizes[0]; 5083 if (alg->alg_increment != 0) { 5084 /* key sizes are defined by range & increment */ 5085 alg->alg_minbits = alg->alg_key_sizes[1]; 5086 alg->alg_maxbits = alg->alg_key_sizes[2]; 5087 5088 alg->alg_default = SADB_ALG_DEFAULT_INCR(alg->alg_minbits, 5089 alg->alg_increment, alg->alg_default_bits); 5090 } else if (alg->alg_nkey_sizes == 0) { 5091 /* no specified key size for algorithm */ 5092 alg->alg_minbits = alg->alg_maxbits = 0; 5093 } else { 5094 /* key sizes are defined by enumeration */ 5095 alg->alg_minbits = (uint16_t)-1; 5096 alg->alg_maxbits = 0; 5097 5098 for (i = 0; i < alg->alg_nkey_sizes; i++) { 5099 if (alg->alg_key_sizes[i] < alg->alg_minbits) 5100 alg->alg_minbits = alg->alg_key_sizes[i]; 5101 if (alg->alg_key_sizes[i] > alg->alg_maxbits) 5102 alg->alg_maxbits = alg->alg_key_sizes[i]; 5103 } 5104 alg->alg_default = 0; 5105 } 5106 5107 if (!(alg->alg_flags & ALG_FLAG_VALID)) 5108 return; 5109 5110 /* 5111 * Mechanisms do not apply to the NULL encryption 5112 * algorithm, so simply return for this case. 5113 */ 5114 if (alg->alg_id == SADB_EALG_NULL) 5115 return; 5116 5117 /* 5118 * Find the min and max key sizes supported by the cryptographic 5119 * framework providers. 5120 */ 5121 5122 /* get the key sizes supported by the framework */ 5123 crypto_rc = crypto_get_all_mech_info(alg->alg_mech_type, 5124 &mech_infos, &nmech_infos, KM_SLEEP); 5125 if (crypto_rc != CRYPTO_SUCCESS || nmech_infos == 0) { 5126 alg->alg_flags &= ~ALG_FLAG_VALID; 5127 return; 5128 } 5129 5130 /* min and max key sizes supported by framework */ 5131 for (i = 0, is_valid = B_FALSE; i < nmech_infos; i++) { 5132 int unit_bits; 5133 5134 /* 5135 * Ignore entries that do not support the operations 5136 * needed for the algorithm type. 5137 */ 5138 if (alg_type == IPSEC_ALG_AUTH) { 5139 mask = CRYPTO_MECH_USAGE_MAC; 5140 } else { 5141 mask = CRYPTO_MECH_USAGE_ENCRYPT | 5142 CRYPTO_MECH_USAGE_DECRYPT; 5143 } 5144 if ((mech_infos[i].mi_usage & mask) != mask) 5145 continue; 5146 5147 unit_bits = (mech_infos[i].mi_keysize_unit == 5148 CRYPTO_KEYSIZE_UNIT_IN_BYTES) ? 8 : 1; 5149 /* adjust min/max supported by framework */ 5150 cur_crypto_min = mech_infos[i].mi_min_key_size * unit_bits; 5151 cur_crypto_max = mech_infos[i].mi_max_key_size * unit_bits; 5152 5153 if (cur_crypto_min < crypto_min) 5154 crypto_min = cur_crypto_min; 5155 5156 /* 5157 * CRYPTO_EFFECTIVELY_INFINITE is a special value of 5158 * the crypto framework which means "no upper limit". 5159 */ 5160 if (mech_infos[i].mi_max_key_size == 5161 CRYPTO_EFFECTIVELY_INFINITE) { 5162 crypto_max = (size_t)-1; 5163 } else if (cur_crypto_max > crypto_max) { 5164 crypto_max = cur_crypto_max; 5165 } 5166 5167 is_valid = B_TRUE; 5168 } 5169 5170 kmem_free(mech_infos, sizeof (crypto_mechanism_info_t) * 5171 nmech_infos); 5172 5173 if (!is_valid) { 5174 /* no key sizes supported by framework */ 5175 alg->alg_flags &= ~ALG_FLAG_VALID; 5176 return; 5177 } 5178 5179 /* 5180 * Determine min and max key sizes from alg_key_sizes[]. 5181 * defined for the algorithm entry. Adjust key sizes based on 5182 * those supported by the framework. 5183 */ 5184 alg->alg_ef_default_bits = alg->alg_key_sizes[0]; 5185 if (alg->alg_increment != 0) { 5186 /* supported key sizes are defined by range & increment */ 5187 crypto_min = ALGBITS_ROUND_UP(crypto_min, alg->alg_increment); 5188 crypto_max = ALGBITS_ROUND_DOWN(crypto_max, alg->alg_increment); 5189 5190 alg->alg_ef_minbits = MAX(alg->alg_minbits, 5191 (uint16_t)crypto_min); 5192 alg->alg_ef_maxbits = MIN(alg->alg_maxbits, 5193 (uint16_t)crypto_max); 5194 5195 /* 5196 * If the sizes supported by the framework are outside 5197 * the range of sizes defined by the algorithm mappings, 5198 * the algorithm cannot be used. Check for this 5199 * condition here. 5200 */ 5201 if (alg->alg_ef_minbits > alg->alg_ef_maxbits) { 5202 alg->alg_flags &= ~ALG_FLAG_VALID; 5203 return; 5204 } 5205 5206 if (alg->alg_ef_default_bits < alg->alg_ef_minbits) 5207 alg->alg_ef_default_bits = alg->alg_ef_minbits; 5208 if (alg->alg_ef_default_bits > alg->alg_ef_maxbits) 5209 alg->alg_ef_default_bits = alg->alg_ef_maxbits; 5210 5211 alg->alg_ef_default = SADB_ALG_DEFAULT_INCR(alg->alg_ef_minbits, 5212 alg->alg_increment, alg->alg_ef_default_bits); 5213 } else if (alg->alg_nkey_sizes == 0) { 5214 /* no specified key size for algorithm */ 5215 alg->alg_ef_minbits = alg->alg_ef_maxbits = 0; 5216 } else { 5217 /* supported key sizes are defined by enumeration */ 5218 alg->alg_ef_minbits = (uint16_t)-1; 5219 alg->alg_ef_maxbits = 0; 5220 5221 for (i = 0, is_valid = B_FALSE; i < alg->alg_nkey_sizes; i++) { 5222 /* 5223 * Ignore the current key size if it is not in the 5224 * range of sizes supported by the framework. 5225 */ 5226 if (alg->alg_key_sizes[i] < crypto_min || 5227 alg->alg_key_sizes[i] > crypto_max) 5228 continue; 5229 if (alg->alg_key_sizes[i] < alg->alg_ef_minbits) 5230 alg->alg_ef_minbits = alg->alg_key_sizes[i]; 5231 if (alg->alg_key_sizes[i] > alg->alg_ef_maxbits) 5232 alg->alg_ef_maxbits = alg->alg_key_sizes[i]; 5233 is_valid = B_TRUE; 5234 } 5235 5236 if (!is_valid) { 5237 alg->alg_flags &= ~ALG_FLAG_VALID; 5238 return; 5239 } 5240 alg->alg_ef_default = 0; 5241 } 5242 } 5243 5244 /* 5245 * Free the memory used by the specified algorithm. 5246 */ 5247 void 5248 ipsec_alg_free(ipsec_alginfo_t *alg) 5249 { 5250 if (alg == NULL) 5251 return; 5252 5253 if (alg->alg_key_sizes != NULL) { 5254 kmem_free(alg->alg_key_sizes, 5255 (alg->alg_nkey_sizes + 1) * sizeof (uint16_t)); 5256 alg->alg_key_sizes = NULL; 5257 } 5258 if (alg->alg_block_sizes != NULL) { 5259 kmem_free(alg->alg_block_sizes, 5260 (alg->alg_nblock_sizes + 1) * sizeof (uint16_t)); 5261 alg->alg_block_sizes = NULL; 5262 } 5263 kmem_free(alg, sizeof (*alg)); 5264 } 5265 5266 /* 5267 * Check the validity of the specified key size for an algorithm. 5268 * Returns B_TRUE if key size is valid, B_FALSE otherwise. 5269 */ 5270 boolean_t 5271 ipsec_valid_key_size(uint16_t key_size, ipsec_alginfo_t *alg) 5272 { 5273 if (key_size < alg->alg_ef_minbits || key_size > alg->alg_ef_maxbits) 5274 return (B_FALSE); 5275 5276 if (alg->alg_increment == 0 && alg->alg_nkey_sizes != 0) { 5277 /* 5278 * If the key sizes are defined by enumeration, the new 5279 * key size must be equal to one of the supported values. 5280 */ 5281 int i; 5282 5283 for (i = 0; i < alg->alg_nkey_sizes; i++) 5284 if (key_size == alg->alg_key_sizes[i]) 5285 break; 5286 if (i == alg->alg_nkey_sizes) 5287 return (B_FALSE); 5288 } 5289 5290 return (B_TRUE); 5291 } 5292 5293 /* 5294 * Callback function invoked by the crypto framework when a provider 5295 * registers or unregisters. This callback updates the algorithms 5296 * tables when a crypto algorithm is no longer available or becomes 5297 * available, and triggers the freeing/creation of context templates 5298 * associated with existing SAs, if needed. 5299 * 5300 * Need to walk all stack instances since the callback is global 5301 * for all instances 5302 */ 5303 void 5304 ipsec_prov_update_callback(uint32_t event, void *event_arg) 5305 { 5306 netstack_handle_t nh; 5307 netstack_t *ns; 5308 5309 netstack_next_init(&nh); 5310 while ((ns = netstack_next(&nh)) != NULL) { 5311 ipsec_prov_update_callback_stack(event, event_arg, ns); 5312 netstack_rele(ns); 5313 } 5314 netstack_next_fini(&nh); 5315 } 5316 5317 static void 5318 ipsec_prov_update_callback_stack(uint32_t event, void *event_arg, 5319 netstack_t *ns) 5320 { 5321 crypto_notify_event_change_t *prov_change = 5322 (crypto_notify_event_change_t *)event_arg; 5323 uint_t algidx, algid, algtype, mech_count, mech_idx; 5324 ipsec_alginfo_t *alg; 5325 ipsec_alginfo_t oalg; 5326 crypto_mech_name_t *mechs; 5327 boolean_t alg_changed = B_FALSE; 5328 ipsec_stack_t *ipss = ns->netstack_ipsec; 5329 5330 /* ignore events for which we didn't register */ 5331 if (event != CRYPTO_EVENT_MECHS_CHANGED) { 5332 ip1dbg(("ipsec_prov_update_callback: unexpected event 0x%x " 5333 " received from crypto framework\n", event)); 5334 return; 5335 } 5336 5337 mechs = crypto_get_mech_list(&mech_count, KM_SLEEP); 5338 if (mechs == NULL) 5339 return; 5340 5341 /* 5342 * Walk the list of currently defined IPsec algorithm. Update 5343 * the algorithm valid flag and trigger an update of the 5344 * SAs that depend on that algorithm. 5345 */ 5346 mutex_enter(&ipss->ipsec_alg_lock); 5347 for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype++) { 5348 for (algidx = 0; algidx < ipss->ipsec_nalgs[algtype]; 5349 algidx++) { 5350 5351 algid = ipss->ipsec_sortlist[algtype][algidx]; 5352 alg = ipss->ipsec_alglists[algtype][algid]; 5353 ASSERT(alg != NULL); 5354 5355 /* 5356 * Skip the algorithms which do not map to the 5357 * crypto framework provider being added or removed. 5358 */ 5359 if (strncmp(alg->alg_mech_name, 5360 prov_change->ec_mech_name, 5361 CRYPTO_MAX_MECH_NAME) != 0) 5362 continue; 5363 5364 /* 5365 * Determine if the mechanism is valid. If it 5366 * is not, mark the algorithm as being invalid. If 5367 * it is, mark the algorithm as being valid. 5368 */ 5369 for (mech_idx = 0; mech_idx < mech_count; mech_idx++) 5370 if (strncmp(alg->alg_mech_name, 5371 mechs[mech_idx], CRYPTO_MAX_MECH_NAME) == 0) 5372 break; 5373 if (mech_idx == mech_count && 5374 alg->alg_flags & ALG_FLAG_VALID) { 5375 alg->alg_flags &= ~ALG_FLAG_VALID; 5376 alg_changed = B_TRUE; 5377 } else if (mech_idx < mech_count && 5378 !(alg->alg_flags & ALG_FLAG_VALID)) { 5379 alg->alg_flags |= ALG_FLAG_VALID; 5380 alg_changed = B_TRUE; 5381 } 5382 5383 /* 5384 * Update the supported key sizes, regardless 5385 * of whether a crypto provider was added or 5386 * removed. 5387 */ 5388 oalg = *alg; 5389 ipsec_alg_fix_min_max(alg, algtype, ns); 5390 if (!alg_changed && 5391 alg->alg_ef_minbits != oalg.alg_ef_minbits || 5392 alg->alg_ef_maxbits != oalg.alg_ef_maxbits || 5393 alg->alg_ef_default != oalg.alg_ef_default || 5394 alg->alg_ef_default_bits != 5395 oalg.alg_ef_default_bits) 5396 alg_changed = B_TRUE; 5397 5398 /* 5399 * Update the affected SAs if a software provider is 5400 * being added or removed. 5401 */ 5402 if (prov_change->ec_provider_type == 5403 CRYPTO_SW_PROVIDER) 5404 sadb_alg_update(algtype, alg->alg_id, 5405 prov_change->ec_change == 5406 CRYPTO_MECH_ADDED, ns); 5407 } 5408 } 5409 mutex_exit(&ipss->ipsec_alg_lock); 5410 crypto_free_mech_list(mechs, mech_count); 5411 5412 if (alg_changed) { 5413 /* 5414 * An algorithm has changed, i.e. it became valid or 5415 * invalid, or its support key sizes have changed. 5416 * Notify ipsecah and ipsecesp of this change so 5417 * that they can send a SADB_REGISTER to their consumers. 5418 */ 5419 ipsecah_algs_changed(ns); 5420 ipsecesp_algs_changed(ns); 5421 } 5422 } 5423 5424 /* 5425 * Registers with the crypto framework to be notified of crypto 5426 * providers changes. Used to update the algorithm tables and 5427 * to free or create context templates if needed. Invoked after IPsec 5428 * is loaded successfully. 5429 * 5430 * This is called separately for each IP instance, so we ensure we only 5431 * register once. 5432 */ 5433 void 5434 ipsec_register_prov_update(void) 5435 { 5436 if (prov_update_handle != NULL) 5437 return; 5438 5439 prov_update_handle = crypto_notify_events( 5440 ipsec_prov_update_callback, CRYPTO_EVENT_MECHS_CHANGED); 5441 } 5442 5443 /* 5444 * Unregisters from the framework to be notified of crypto providers 5445 * changes. Called from ipsec_policy_g_destroy(). 5446 */ 5447 static void 5448 ipsec_unregister_prov_update(void) 5449 { 5450 if (prov_update_handle != NULL) 5451 crypto_unnotify_events(prov_update_handle); 5452 } 5453 5454 /* 5455 * Tunnel-mode support routines. 5456 */ 5457 5458 /* 5459 * Returns an mblk chain suitable for putnext() if policies match and IPsec 5460 * SAs are available. If there's no per-tunnel policy, or a match comes back 5461 * with no match, then still return the packet and have global policy take 5462 * a crack at it in IP. 5463 * 5464 * Remember -> we can be forwarding packets. Keep that in mind w.r.t. 5465 * inner-packet contents. 5466 */ 5467 mblk_t * 5468 ipsec_tun_outbound(mblk_t *mp, tun_t *atp, ipha_t *inner_ipv4, 5469 ip6_t *inner_ipv6, ipha_t *outer_ipv4, ip6_t *outer_ipv6, int outer_hdr_len, 5470 netstack_t *ns) 5471 { 5472 ipsec_tun_pol_t *itp = atp->tun_itp; 5473 ipsec_policy_head_t *polhead; 5474 ipsec_selector_t sel; 5475 mblk_t *ipsec_mp, *ipsec_mp_head, *nmp; 5476 mblk_t *spare_mp = NULL; 5477 ipsec_out_t *io; 5478 boolean_t is_fragment; 5479 ipsec_policy_t *pol; 5480 ipsec_stack_t *ipss = ns->netstack_ipsec; 5481 5482 ASSERT(outer_ipv6 != NULL && outer_ipv4 == NULL || 5483 outer_ipv4 != NULL && outer_ipv6 == NULL); 5484 /* We take care of inners in a bit. */ 5485 5486 /* No policy on this tunnel - let global policy have at it. */ 5487 if (itp == NULL || !(itp->itp_flags & ITPF_P_ACTIVE)) 5488 return (mp); 5489 polhead = itp->itp_policy; 5490 5491 bzero(&sel, sizeof (sel)); 5492 if (inner_ipv4 != NULL) { 5493 ASSERT(inner_ipv6 == NULL); 5494 sel.ips_isv4 = B_TRUE; 5495 sel.ips_local_addr_v4 = inner_ipv4->ipha_src; 5496 sel.ips_remote_addr_v4 = inner_ipv4->ipha_dst; 5497 sel.ips_protocol = (uint8_t)inner_ipv4->ipha_protocol; 5498 is_fragment = 5499 IS_V4_FRAGMENT(inner_ipv4->ipha_fragment_offset_and_flags); 5500 } else { 5501 ASSERT(inner_ipv6 != NULL); 5502 sel.ips_isv4 = B_FALSE; 5503 sel.ips_local_addr_v6 = inner_ipv6->ip6_src; 5504 /* Use ip_get_dst_v6() just for the fragment bit. */ 5505 sel.ips_remote_addr_v6 = ip_get_dst_v6(inner_ipv6, 5506 &is_fragment); 5507 /* 5508 * Reset, because we don't care about routing-header dests 5509 * in the forwarding/tunnel path. 5510 */ 5511 sel.ips_remote_addr_v6 = inner_ipv6->ip6_dst; 5512 } 5513 5514 if (itp->itp_flags & ITPF_P_PER_PORT_SECURITY) { 5515 if (is_fragment) { 5516 ipha_t *oiph; 5517 ipha_t *iph = NULL; 5518 ip6_t *ip6h = NULL; 5519 int hdr_len; 5520 uint16_t ip6_hdr_length; 5521 uint8_t v6_proto; 5522 uint8_t *v6_proto_p; 5523 5524 /* 5525 * We have a fragment we need to track! 5526 */ 5527 mp = ipsec_fragcache_add(&itp->itp_fragcache, NULL, mp, 5528 outer_hdr_len, ipss); 5529 if (mp == NULL) 5530 return (NULL); 5531 5532 /* 5533 * If we get here, we have a full 5534 * fragment chain 5535 */ 5536 5537 oiph = (ipha_t *)mp->b_rptr; 5538 if (IPH_HDR_VERSION(oiph) == IPV4_VERSION) { 5539 hdr_len = ((outer_hdr_len != 0) ? 5540 IPH_HDR_LENGTH(oiph) : 0); 5541 iph = (ipha_t *)(mp->b_rptr + hdr_len); 5542 } else { 5543 ASSERT(IPH_HDR_VERSION(oiph) == IPV6_VERSION); 5544 if ((spare_mp = msgpullup(mp, -1)) == NULL) { 5545 ip_drop_packet_chain(mp, B_FALSE, 5546 NULL, NULL, 5547 DROPPER(ipss, ipds_spd_nomem), 5548 &ipss->ipsec_spd_dropper); 5549 } 5550 ip6h = (ip6_t *)spare_mp->b_rptr; 5551 (void) ip_hdr_length_nexthdr_v6(spare_mp, ip6h, 5552 &ip6_hdr_length, &v6_proto_p); 5553 hdr_len = ip6_hdr_length; 5554 } 5555 outer_hdr_len = hdr_len; 5556 5557 if (sel.ips_isv4) { 5558 if (iph == NULL) { 5559 /* Was v6 outer */ 5560 iph = (ipha_t *)(mp->b_rptr + hdr_len); 5561 } 5562 inner_ipv4 = iph; 5563 sel.ips_local_addr_v4 = inner_ipv4->ipha_src; 5564 sel.ips_remote_addr_v4 = inner_ipv4->ipha_dst; 5565 sel.ips_protocol = 5566 (uint8_t)inner_ipv4->ipha_protocol; 5567 } else { 5568 if ((spare_mp == NULL) && 5569 ((spare_mp = msgpullup(mp, -1)) == NULL)) { 5570 ip_drop_packet_chain(mp, B_FALSE, 5571 NULL, NULL, 5572 DROPPER(ipss, ipds_spd_nomem), 5573 &ipss->ipsec_spd_dropper); 5574 } 5575 inner_ipv6 = (ip6_t *)(spare_mp->b_rptr + 5576 hdr_len); 5577 sel.ips_local_addr_v6 = inner_ipv6->ip6_src; 5578 sel.ips_remote_addr_v6 = inner_ipv6->ip6_dst; 5579 (void) ip_hdr_length_nexthdr_v6(spare_mp, 5580 inner_ipv6, &ip6_hdr_length, 5581 &v6_proto_p); 5582 v6_proto = *v6_proto_p; 5583 sel.ips_protocol = v6_proto; 5584 #ifdef FRAGCACHE_DEBUG 5585 cmn_err(CE_WARN, "v6_sel.ips_protocol = %d\n", 5586 sel.ips_protocol); 5587 #endif 5588 } 5589 /* Ports are extracted below */ 5590 } 5591 5592 /* Get ports... */ 5593 if (spare_mp != NULL) { 5594 if (!ipsec_init_outbound_ports(&sel, spare_mp, 5595 inner_ipv4, inner_ipv6, outer_hdr_len, ipss)) { 5596 /* 5597 * callee did ip_drop_packet_chain() on 5598 * spare_mp 5599 */ 5600 ipsec_freemsg_chain(mp); 5601 return (NULL); 5602 } 5603 } else { 5604 if (!ipsec_init_outbound_ports(&sel, mp, 5605 inner_ipv4, inner_ipv6, outer_hdr_len, ipss)) { 5606 /* callee did ip_drop_packet_chain() on mp. */ 5607 return (NULL); 5608 } 5609 } 5610 #ifdef FRAGCACHE_DEBUG 5611 if (inner_ipv4 != NULL) 5612 cmn_err(CE_WARN, 5613 "(v4) sel.ips_protocol = %d, " 5614 "sel.ips_local_port = %d, " 5615 "sel.ips_remote_port = %d\n", 5616 sel.ips_protocol, ntohs(sel.ips_local_port), 5617 ntohs(sel.ips_remote_port)); 5618 if (inner_ipv6 != NULL) 5619 cmn_err(CE_WARN, 5620 "(v6) sel.ips_protocol = %d, " 5621 "sel.ips_local_port = %d, " 5622 "sel.ips_remote_port = %d\n", 5623 sel.ips_protocol, ntohs(sel.ips_local_port), 5624 ntohs(sel.ips_remote_port)); 5625 #endif 5626 /* Success so far - done with spare_mp */ 5627 ipsec_freemsg_chain(spare_mp); 5628 } 5629 rw_enter(&polhead->iph_lock, RW_READER); 5630 pol = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_OUTBOUND, 5631 &sel, ns); 5632 rw_exit(&polhead->iph_lock); 5633 if (pol == NULL) { 5634 /* 5635 * No matching policy on this tunnel, drop the packet. 5636 * 5637 * NOTE: Tunnel-mode tunnels are different from the 5638 * IP global transport mode policy head. For a tunnel-mode 5639 * tunnel, we drop the packet in lieu of passing it 5640 * along accepted the way a global-policy miss would. 5641 * 5642 * NOTE2: "negotiate transport" tunnels should match ALL 5643 * inbound packets, but we do not uncomment the ASSERT() 5644 * below because if/when we open PF_POLICY, a user can 5645 * shoot him/her-self in the foot with a 0 priority. 5646 */ 5647 5648 /* ASSERT(itp->itp_flags & ITPF_P_TUNNEL); */ 5649 #ifdef FRAGCACHE_DEBUG 5650 cmn_err(CE_WARN, "ipsec_tun_outbound(): No matching tunnel " 5651 "per-port policy\n"); 5652 #endif 5653 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 5654 DROPPER(ipss, ipds_spd_explicit), 5655 &ipss->ipsec_spd_dropper); 5656 return (NULL); 5657 } 5658 5659 #ifdef FRAGCACHE_DEBUG 5660 cmn_err(CE_WARN, "Having matching tunnel per-port policy\n"); 5661 #endif 5662 5663 /* Construct an IPSEC_OUT message. */ 5664 ipsec_mp = ipsec_mp_head = ipsec_alloc_ipsec_out(ns); 5665 if (ipsec_mp == NULL) { 5666 IPPOL_REFRELE(pol, ns); 5667 ip_drop_packet(mp, B_FALSE, NULL, NULL, 5668 DROPPER(ipss, ipds_spd_nomem), 5669 &ipss->ipsec_spd_dropper); 5670 return (NULL); 5671 } 5672 ipsec_mp->b_cont = mp; 5673 io = (ipsec_out_t *)ipsec_mp->b_rptr; 5674 IPPH_REFHOLD(polhead); 5675 /* 5676 * NOTE: free() function of ipsec_out mblk will release polhead and 5677 * pol references. 5678 */ 5679 io->ipsec_out_polhead = polhead; 5680 io->ipsec_out_policy = pol; 5681 io->ipsec_out_zoneid = atp->tun_zoneid; 5682 io->ipsec_out_v4 = (outer_ipv4 != NULL); 5683 io->ipsec_out_secure = B_TRUE; 5684 5685 if (!(itp->itp_flags & ITPF_P_TUNNEL)) { 5686 /* Set up transport mode for tunnelled packets. */ 5687 io->ipsec_out_proto = (inner_ipv4 != NULL) ? IPPROTO_ENCAP : 5688 IPPROTO_IPV6; 5689 return (ipsec_mp); 5690 } 5691 5692 /* Fill in tunnel-mode goodies here. */ 5693 io->ipsec_out_tunnel = B_TRUE; 5694 /* XXX Do I need to fill in all of the goodies here? */ 5695 if (inner_ipv4) { 5696 io->ipsec_out_inaf = AF_INET; 5697 io->ipsec_out_insrc[0] = 5698 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v4; 5699 io->ipsec_out_indst[0] = 5700 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v4; 5701 } else { 5702 io->ipsec_out_inaf = AF_INET6; 5703 io->ipsec_out_insrc[0] = 5704 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[0]; 5705 io->ipsec_out_insrc[1] = 5706 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[1]; 5707 io->ipsec_out_insrc[2] = 5708 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[2]; 5709 io->ipsec_out_insrc[3] = 5710 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[3]; 5711 io->ipsec_out_indst[0] = 5712 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[0]; 5713 io->ipsec_out_indst[1] = 5714 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[1]; 5715 io->ipsec_out_indst[2] = 5716 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[2]; 5717 io->ipsec_out_indst[3] = 5718 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[3]; 5719 } 5720 io->ipsec_out_insrcpfx = pol->ipsp_sel->ipsl_key.ipsl_local_pfxlen; 5721 io->ipsec_out_indstpfx = pol->ipsp_sel->ipsl_key.ipsl_remote_pfxlen; 5722 /* NOTE: These are used for transport mode too. */ 5723 io->ipsec_out_src_port = pol->ipsp_sel->ipsl_key.ipsl_lport; 5724 io->ipsec_out_dst_port = pol->ipsp_sel->ipsl_key.ipsl_rport; 5725 io->ipsec_out_proto = pol->ipsp_sel->ipsl_key.ipsl_proto; 5726 5727 /* 5728 * The mp pointer still valid 5729 * Add ipsec_out to each fragment. 5730 * The fragment head already has one 5731 */ 5732 nmp = mp->b_next; 5733 mp->b_next = NULL; 5734 mp = nmp; 5735 ASSERT(ipsec_mp != NULL); 5736 while (mp != NULL) { 5737 nmp = mp->b_next; 5738 ipsec_mp->b_next = ipsec_out_tag(ipsec_mp_head, mp, ns); 5739 if (ipsec_mp->b_next == NULL) { 5740 ip_drop_packet_chain(ipsec_mp_head, B_FALSE, NULL, NULL, 5741 DROPPER(ipss, ipds_spd_nomem), 5742 &ipss->ipsec_spd_dropper); 5743 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 5744 DROPPER(ipss, ipds_spd_nomem), 5745 &ipss->ipsec_spd_dropper); 5746 return (NULL); 5747 } 5748 ipsec_mp = ipsec_mp->b_next; 5749 mp->b_next = NULL; 5750 mp = nmp; 5751 } 5752 return (ipsec_mp_head); 5753 } 5754 5755 /* 5756 * NOTE: The following releases pol's reference and 5757 * calls ip_drop_packet() for me on NULL returns. 5758 */ 5759 mblk_t * 5760 ipsec_check_ipsecin_policy_reasm(mblk_t *ipsec_mp, ipsec_policy_t *pol, 5761 ipha_t *inner_ipv4, ip6_t *inner_ipv6, uint64_t pkt_unique, netstack_t *ns) 5762 { 5763 /* Assume ipsec_mp is a chain of b_next-linked IPSEC_IN M_CTLs. */ 5764 mblk_t *data_chain = NULL, *data_tail = NULL; 5765 mblk_t *ii_next; 5766 5767 while (ipsec_mp != NULL) { 5768 ii_next = ipsec_mp->b_next; 5769 ipsec_mp->b_next = NULL; /* No tripping asserts. */ 5770 5771 /* 5772 * Need IPPOL_REFHOLD(pol) for extras because 5773 * ipsecin_policy does the refrele. 5774 */ 5775 IPPOL_REFHOLD(pol); 5776 5777 if (ipsec_check_ipsecin_policy(ipsec_mp, pol, inner_ipv4, 5778 inner_ipv6, pkt_unique, ns) != NULL) { 5779 if (data_tail == NULL) { 5780 /* First one */ 5781 data_chain = data_tail = ipsec_mp->b_cont; 5782 } else { 5783 data_tail->b_next = ipsec_mp->b_cont; 5784 data_tail = data_tail->b_next; 5785 } 5786 freeb(ipsec_mp); 5787 } else { 5788 /* 5789 * ipsec_check_ipsecin_policy() freed ipsec_mp 5790 * already. Need to get rid of any extra pol 5791 * references, and any remaining bits as well. 5792 */ 5793 IPPOL_REFRELE(pol, ns); 5794 ipsec_freemsg_chain(data_chain); 5795 ipsec_freemsg_chain(ii_next); /* ipdrop stats? */ 5796 return (NULL); 5797 } 5798 ipsec_mp = ii_next; 5799 } 5800 /* 5801 * One last release because either the loop bumped it up, or we never 5802 * called ipsec_check_ipsecin_policy(). 5803 */ 5804 IPPOL_REFRELE(pol, ns); 5805 5806 /* data_chain is ready for return to tun module. */ 5807 return (data_chain); 5808 } 5809 5810 5811 /* 5812 * Returns B_TRUE if the inbound packet passed an IPsec policy check. Returns 5813 * B_FALSE if it failed or if it is a fragment needing its friends before a 5814 * policy check can be performed. 5815 * 5816 * Expects a non-NULL *data_mp, an optional ipsec_mp, and a non-NULL polhead. 5817 * data_mp may be reassigned with a b_next chain of packets if fragments 5818 * neeeded to be collected for a proper policy check. 5819 * 5820 * Always frees ipsec_mp, but only frees data_mp if returns B_FALSE. This 5821 * function calls ip_drop_packet() on data_mp if need be. 5822 * 5823 * NOTE: outer_hdr_len is signed. If it's a negative value, the caller 5824 * is inspecting an ICMP packet. 5825 */ 5826 boolean_t 5827 ipsec_tun_inbound(mblk_t *ipsec_mp, mblk_t **data_mp, ipsec_tun_pol_t *itp, 5828 ipha_t *inner_ipv4, ip6_t *inner_ipv6, ipha_t *outer_ipv4, 5829 ip6_t *outer_ipv6, int outer_hdr_len, netstack_t *ns) 5830 { 5831 ipsec_policy_head_t *polhead; 5832 ipsec_selector_t sel; 5833 mblk_t *message = (ipsec_mp == NULL) ? *data_mp : ipsec_mp; 5834 ipsec_policy_t *pol; 5835 uint16_t tmpport; 5836 selret_t rc; 5837 boolean_t retval, port_policy_present, is_icmp, global_present; 5838 in6_addr_t tmpaddr; 5839 ipaddr_t tmp4; 5840 ipsec_stack_t *ipss = ns->netstack_ipsec; 5841 uint8_t flags, *holder, *outer_hdr; 5842 5843 sel.ips_is_icmp_inv_acq = 0; 5844 5845 if (outer_ipv4 != NULL) { 5846 ASSERT(outer_ipv6 == NULL); 5847 outer_hdr = (uint8_t *)outer_ipv4; 5848 global_present = ipss->ipsec_inbound_v4_policy_present; 5849 } else { 5850 outer_hdr = (uint8_t *)outer_ipv6; 5851 global_present = ipss->ipsec_inbound_v6_policy_present; 5852 } 5853 ASSERT(outer_hdr != NULL); 5854 5855 ASSERT(inner_ipv4 != NULL && inner_ipv6 == NULL || 5856 inner_ipv4 == NULL && inner_ipv6 != NULL); 5857 ASSERT(message == *data_mp || message->b_cont == *data_mp); 5858 5859 if (outer_hdr_len < 0) { 5860 outer_hdr_len = (-outer_hdr_len); 5861 is_icmp = B_TRUE; 5862 } else { 5863 is_icmp = B_FALSE; 5864 } 5865 5866 if (itp != NULL && (itp->itp_flags & ITPF_P_ACTIVE)) { 5867 polhead = itp->itp_policy; 5868 /* 5869 * We need to perform full Tunnel-Mode enforcement, 5870 * and we need to have inner-header data for such enforcement. 5871 * 5872 * See ipsec_init_inbound_sel() for the 0x80000000 on inbound 5873 * and on return. 5874 */ 5875 5876 port_policy_present = ((itp->itp_flags & 5877 ITPF_P_PER_PORT_SECURITY) ? B_TRUE : B_FALSE); 5878 flags = ((port_policy_present ? SEL_PORT_POLICY : SEL_NONE) | 5879 (is_icmp ? SEL_IS_ICMP : SEL_NONE) | SEL_TUNNEL_MODE); 5880 5881 rc = ipsec_init_inbound_sel(&sel, *data_mp, inner_ipv4, 5882 inner_ipv6, flags); 5883 5884 switch (rc) { 5885 case SELRET_NOMEM: 5886 ip_drop_packet(message, B_TRUE, NULL, NULL, 5887 DROPPER(ipss, ipds_spd_nomem), 5888 &ipss->ipsec_spd_dropper); 5889 return (B_FALSE); 5890 case SELRET_TUNFRAG: 5891 /* 5892 * At this point, if we're cleartext, we don't want 5893 * to go there. 5894 */ 5895 if (ipsec_mp == NULL) { 5896 ip_drop_packet(*data_mp, B_TRUE, NULL, NULL, 5897 DROPPER(ipss, ipds_spd_got_clear), 5898 &ipss->ipsec_spd_dropper); 5899 *data_mp = NULL; 5900 return (B_FALSE); 5901 } 5902 ASSERT(((ipsec_in_t *)ipsec_mp->b_rptr)-> 5903 ipsec_in_secure); 5904 message = ipsec_fragcache_add(&itp->itp_fragcache, 5905 ipsec_mp, *data_mp, outer_hdr_len, ipss); 5906 5907 if (message == NULL) { 5908 /* 5909 * Data is cached, fragment chain is not 5910 * complete. I consume ipsec_mp and data_mp 5911 */ 5912 return (B_FALSE); 5913 } 5914 5915 /* 5916 * If we get here, we have a full fragment chain. 5917 * Reacquire headers and selectors from first fragment. 5918 */ 5919 if (inner_ipv4 != NULL) { 5920 inner_ipv4 = (ipha_t *)message->b_cont->b_rptr; 5921 ASSERT(message->b_cont->b_wptr - 5922 message->b_cont->b_rptr > sizeof (ipha_t)); 5923 } else { 5924 inner_ipv6 = (ip6_t *)message->b_cont->b_rptr; 5925 ASSERT(message->b_cont->b_wptr - 5926 message->b_cont->b_rptr > sizeof (ip6_t)); 5927 } 5928 /* Use SEL_NONE so we always get ports! */ 5929 rc = ipsec_init_inbound_sel(&sel, message->b_cont, 5930 inner_ipv4, inner_ipv6, SEL_NONE); 5931 switch (rc) { 5932 case SELRET_SUCCESS: 5933 /* 5934 * Get to same place as first caller's 5935 * SELRET_SUCCESS case. 5936 */ 5937 break; 5938 case SELRET_NOMEM: 5939 ip_drop_packet_chain(message, B_TRUE, 5940 NULL, NULL, 5941 DROPPER(ipss, ipds_spd_nomem), 5942 &ipss->ipsec_spd_dropper); 5943 return (B_FALSE); 5944 case SELRET_BADPKT: 5945 ip_drop_packet_chain(message, B_TRUE, 5946 NULL, NULL, 5947 DROPPER(ipss, ipds_spd_malformed_frag), 5948 &ipss->ipsec_spd_dropper); 5949 return (B_FALSE); 5950 case SELRET_TUNFRAG: 5951 cmn_err(CE_WARN, "(TUNFRAG on 2nd call...)"); 5952 /* FALLTHRU */ 5953 default: 5954 cmn_err(CE_WARN, "ipsec_init_inbound_sel(mark2)" 5955 " returns bizarro 0x%x", rc); 5956 /* Guaranteed panic! */ 5957 ASSERT(rc == SELRET_NOMEM); 5958 return (B_FALSE); 5959 } 5960 /* FALLTHRU */ 5961 case SELRET_SUCCESS: 5962 /* 5963 * Common case: 5964 * No per-port policy or a non-fragment. Keep going. 5965 */ 5966 break; 5967 case SELRET_BADPKT: 5968 /* 5969 * We may receive ICMP (with IPv6 inner) packets that 5970 * trigger this return value. Send 'em in for 5971 * enforcement checking. 5972 */ 5973 cmn_err(CE_NOTE, "ipsec_tun_inbound(): " 5974 "sending 'bad packet' in for enforcement"); 5975 break; 5976 default: 5977 cmn_err(CE_WARN, 5978 "ipsec_init_inbound_sel() returns bizarro 0x%x", 5979 rc); 5980 ASSERT(rc == SELRET_NOMEM); /* Guaranteed panic! */ 5981 return (B_FALSE); 5982 } 5983 5984 if (is_icmp) { 5985 /* 5986 * Swap local/remote because this is an ICMP packet. 5987 */ 5988 tmpaddr = sel.ips_local_addr_v6; 5989 sel.ips_local_addr_v6 = sel.ips_remote_addr_v6; 5990 sel.ips_remote_addr_v6 = tmpaddr; 5991 tmpport = sel.ips_local_port; 5992 sel.ips_local_port = sel.ips_remote_port; 5993 sel.ips_remote_port = tmpport; 5994 } 5995 5996 /* find_policy_head() */ 5997 rw_enter(&polhead->iph_lock, RW_READER); 5998 pol = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_INBOUND, 5999 &sel, ns); 6000 rw_exit(&polhead->iph_lock); 6001 if (pol != NULL) { 6002 if (ipsec_mp == NULL || 6003 !((ipsec_in_t *)ipsec_mp->b_rptr)-> 6004 ipsec_in_secure) { 6005 retval = pol->ipsp_act->ipa_allow_clear; 6006 if (!retval) { 6007 /* 6008 * XXX should never get here with 6009 * tunnel reassembled fragments? 6010 */ 6011 ASSERT(message->b_next == NULL); 6012 ip_drop_packet(message, B_TRUE, NULL, 6013 NULL, 6014 DROPPER(ipss, ipds_spd_got_clear), 6015 &ipss->ipsec_spd_dropper); 6016 } else if (ipsec_mp != NULL) { 6017 freeb(ipsec_mp); 6018 } 6019 6020 IPPOL_REFRELE(pol, ns); 6021 return (retval); 6022 } 6023 /* 6024 * NOTE: The following releases pol's reference and 6025 * calls ip_drop_packet() for me on NULL returns. 6026 * 6027 * "sel" is still good here, so let's use it! 6028 */ 6029 *data_mp = ipsec_check_ipsecin_policy_reasm(message, 6030 pol, inner_ipv4, inner_ipv6, SA_UNIQUE_ID( 6031 sel.ips_remote_port, sel.ips_local_port, 6032 (inner_ipv4 == NULL) ? IPPROTO_IPV6 : 6033 IPPROTO_ENCAP, sel.ips_protocol), ns); 6034 return (*data_mp != NULL); 6035 } 6036 6037 /* 6038 * Else fallthru and check the global policy on the outer 6039 * header(s) if this tunnel is an old-style transport-mode 6040 * one. Drop the packet explicitly (no policy entry) for 6041 * a new-style tunnel-mode tunnel. 6042 */ 6043 if ((itp->itp_flags & ITPF_P_TUNNEL) && !is_icmp) { 6044 ip_drop_packet_chain(message, B_TRUE, NULL, 6045 NULL, 6046 DROPPER(ipss, ipds_spd_explicit), 6047 &ipss->ipsec_spd_dropper); 6048 return (B_FALSE); 6049 } 6050 } 6051 6052 /* 6053 * NOTE: If we reach here, we will not have packet chains from 6054 * fragcache_add(), because the only way I get chains is on a 6055 * tunnel-mode tunnel, which either returns with a pass, or gets 6056 * hit by the ip_drop_packet_chain() call right above here. 6057 */ 6058 6059 /* If no per-tunnel security, check global policy now. */ 6060 if (ipsec_mp != NULL && !global_present) { 6061 if (((ipsec_in_t *)(ipsec_mp->b_rptr))-> 6062 ipsec_in_icmp_loopback) { 6063 /* 6064 * This is an ICMP message with an ipsec_mp 6065 * attached. We should accept it. 6066 */ 6067 if (ipsec_mp != NULL) 6068 freeb(ipsec_mp); 6069 return (B_TRUE); 6070 } 6071 6072 ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL, 6073 DROPPER(ipss, ipds_spd_got_secure), 6074 &ipss->ipsec_spd_dropper); 6075 return (B_FALSE); 6076 } 6077 6078 /* 6079 * The following assertion is valid because only the tun module alters 6080 * the mblk chain - stripping the outer header by advancing mp->b_rptr. 6081 */ 6082 ASSERT(is_icmp || ((*data_mp)->b_datap->db_base <= outer_hdr && 6083 outer_hdr < (*data_mp)->b_rptr)); 6084 holder = (*data_mp)->b_rptr; 6085 (*data_mp)->b_rptr = outer_hdr; 6086 6087 if (is_icmp) { 6088 /* 6089 * For ICMP packets, "outer_ipvN" is set to the outer header 6090 * that is *INSIDE* the ICMP payload. For global policy 6091 * checking, we need to reverse src/dst on the payload in 6092 * order to construct selectors appropriately. See "ripha" 6093 * constructions in ip.c. To avoid a bug like 6478464 (see 6094 * earlier in this file), we will actually exchange src/dst 6095 * in the packet, and reverse if after the call to 6096 * ipsec_check_global_policy(). 6097 */ 6098 if (outer_ipv4 != NULL) { 6099 tmp4 = outer_ipv4->ipha_src; 6100 outer_ipv4->ipha_src = outer_ipv4->ipha_dst; 6101 outer_ipv4->ipha_dst = tmp4; 6102 } else { 6103 ASSERT(outer_ipv6 != NULL); 6104 tmpaddr = outer_ipv6->ip6_src; 6105 outer_ipv6->ip6_src = outer_ipv6->ip6_dst; 6106 outer_ipv6->ip6_dst = tmpaddr; 6107 } 6108 } 6109 6110 /* NOTE: Frees message if it returns NULL. */ 6111 if (ipsec_check_global_policy(message, NULL, outer_ipv4, outer_ipv6, 6112 (ipsec_mp != NULL), ns) == NULL) { 6113 return (B_FALSE); 6114 } 6115 6116 if (is_icmp) { 6117 /* Set things back to normal. */ 6118 if (outer_ipv4 != NULL) { 6119 tmp4 = outer_ipv4->ipha_src; 6120 outer_ipv4->ipha_src = outer_ipv4->ipha_dst; 6121 outer_ipv4->ipha_dst = tmp4; 6122 } else { 6123 /* No need for ASSERT()s now. */ 6124 tmpaddr = outer_ipv6->ip6_src; 6125 outer_ipv6->ip6_src = outer_ipv6->ip6_dst; 6126 outer_ipv6->ip6_dst = tmpaddr; 6127 } 6128 } 6129 6130 (*data_mp)->b_rptr = holder; 6131 6132 if (ipsec_mp != NULL) 6133 freeb(ipsec_mp); 6134 6135 /* 6136 * At this point, we pretend it's a cleartext accepted 6137 * packet. 6138 */ 6139 return (B_TRUE); 6140 } 6141 6142 /* 6143 * AVL comparison routine for our list of tunnel polheads. 6144 */ 6145 static int 6146 tunnel_compare(const void *arg1, const void *arg2) 6147 { 6148 ipsec_tun_pol_t *left, *right; 6149 int rc; 6150 6151 left = (ipsec_tun_pol_t *)arg1; 6152 right = (ipsec_tun_pol_t *)arg2; 6153 6154 rc = strncmp(left->itp_name, right->itp_name, LIFNAMSIZ); 6155 return (rc == 0 ? rc : (rc > 0 ? 1 : -1)); 6156 } 6157 6158 /* 6159 * Free a tunnel policy node. 6160 */ 6161 void 6162 itp_free(ipsec_tun_pol_t *node, netstack_t *ns) 6163 { 6164 IPPH_REFRELE(node->itp_policy, ns); 6165 IPPH_REFRELE(node->itp_inactive, ns); 6166 mutex_destroy(&node->itp_lock); 6167 kmem_free(node, sizeof (*node)); 6168 } 6169 6170 void 6171 itp_unlink(ipsec_tun_pol_t *node, netstack_t *ns) 6172 { 6173 ipsec_stack_t *ipss = ns->netstack_ipsec; 6174 6175 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_WRITER); 6176 ipss->ipsec_tunnel_policy_gen++; 6177 ipsec_fragcache_uninit(&node->itp_fragcache); 6178 avl_remove(&ipss->ipsec_tunnel_policies, node); 6179 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6180 ITP_REFRELE(node, ns); 6181 } 6182 6183 /* 6184 * Public interface to look up a tunnel security policy by name. Used by 6185 * spdsock mostly. Returns "node" with a bumped refcnt. 6186 */ 6187 ipsec_tun_pol_t * 6188 get_tunnel_policy(char *name, netstack_t *ns) 6189 { 6190 ipsec_tun_pol_t *node, lookup; 6191 ipsec_stack_t *ipss = ns->netstack_ipsec; 6192 6193 (void) strncpy(lookup.itp_name, name, LIFNAMSIZ); 6194 6195 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_READER); 6196 node = (ipsec_tun_pol_t *)avl_find(&ipss->ipsec_tunnel_policies, 6197 &lookup, NULL); 6198 if (node != NULL) { 6199 ITP_REFHOLD(node); 6200 } 6201 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6202 6203 return (node); 6204 } 6205 6206 /* 6207 * Public interface to walk all tunnel security polcies. Useful for spdsock 6208 * DUMP operations. iterator() will not consume a reference. 6209 */ 6210 void 6211 itp_walk(void (*iterator)(ipsec_tun_pol_t *, void *, netstack_t *), 6212 void *arg, netstack_t *ns) 6213 { 6214 ipsec_tun_pol_t *node; 6215 ipsec_stack_t *ipss = ns->netstack_ipsec; 6216 6217 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_READER); 6218 for (node = avl_first(&ipss->ipsec_tunnel_policies); node != NULL; 6219 node = AVL_NEXT(&ipss->ipsec_tunnel_policies, node)) { 6220 iterator(node, arg, ns); 6221 } 6222 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6223 } 6224 6225 /* 6226 * Initialize policy head. This can only fail if there's a memory problem. 6227 */ 6228 static boolean_t 6229 tunnel_polhead_init(ipsec_policy_head_t *iph, netstack_t *ns) 6230 { 6231 ipsec_stack_t *ipss = ns->netstack_ipsec; 6232 6233 rw_init(&iph->iph_lock, NULL, RW_DEFAULT, NULL); 6234 iph->iph_refs = 1; 6235 iph->iph_gen = 0; 6236 if (ipsec_alloc_table(iph, ipss->ipsec_tun_spd_hashsize, 6237 KM_SLEEP, B_FALSE, ns) != 0) { 6238 ipsec_polhead_free_table(iph); 6239 return (B_FALSE); 6240 } 6241 ipsec_polhead_init(iph, ipss->ipsec_tun_spd_hashsize); 6242 return (B_TRUE); 6243 } 6244 6245 /* 6246 * Create a tunnel policy node with "name". Set errno with 6247 * ENOMEM if there's a memory problem, and EEXIST if there's an existing 6248 * node. 6249 */ 6250 ipsec_tun_pol_t * 6251 create_tunnel_policy(char *name, int *errno, uint64_t *gen, netstack_t *ns) 6252 { 6253 ipsec_tun_pol_t *newbie, *existing; 6254 avl_index_t where; 6255 ipsec_stack_t *ipss = ns->netstack_ipsec; 6256 6257 newbie = kmem_zalloc(sizeof (*newbie), KM_NOSLEEP); 6258 if (newbie == NULL) { 6259 *errno = ENOMEM; 6260 return (NULL); 6261 } 6262 if (!ipsec_fragcache_init(&newbie->itp_fragcache)) { 6263 kmem_free(newbie, sizeof (*newbie)); 6264 *errno = ENOMEM; 6265 return (NULL); 6266 } 6267 6268 (void) strncpy(newbie->itp_name, name, LIFNAMSIZ); 6269 6270 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_WRITER); 6271 existing = (ipsec_tun_pol_t *)avl_find(&ipss->ipsec_tunnel_policies, 6272 newbie, &where); 6273 if (existing != NULL) { 6274 itp_free(newbie, ns); 6275 *errno = EEXIST; 6276 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6277 return (NULL); 6278 } 6279 ipss->ipsec_tunnel_policy_gen++; 6280 *gen = ipss->ipsec_tunnel_policy_gen; 6281 newbie->itp_refcnt = 2; /* One for the caller, one for the tree. */ 6282 newbie->itp_next_policy_index = 1; 6283 avl_insert(&ipss->ipsec_tunnel_policies, newbie, where); 6284 mutex_init(&newbie->itp_lock, NULL, MUTEX_DEFAULT, NULL); 6285 newbie->itp_policy = kmem_zalloc(sizeof (ipsec_policy_head_t), 6286 KM_NOSLEEP); 6287 if (newbie->itp_policy == NULL) 6288 goto nomem; 6289 newbie->itp_inactive = kmem_zalloc(sizeof (ipsec_policy_head_t), 6290 KM_NOSLEEP); 6291 if (newbie->itp_inactive == NULL) { 6292 kmem_free(newbie->itp_policy, sizeof (ipsec_policy_head_t)); 6293 goto nomem; 6294 } 6295 6296 if (!tunnel_polhead_init(newbie->itp_policy, ns)) { 6297 kmem_free(newbie->itp_policy, sizeof (ipsec_policy_head_t)); 6298 kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t)); 6299 goto nomem; 6300 } else if (!tunnel_polhead_init(newbie->itp_inactive, ns)) { 6301 IPPH_REFRELE(newbie->itp_policy, ns); 6302 kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t)); 6303 goto nomem; 6304 } 6305 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6306 6307 return (newbie); 6308 nomem: 6309 *errno = ENOMEM; 6310 kmem_free(newbie, sizeof (*newbie)); 6311 return (NULL); 6312 } 6313 6314 /* 6315 * We can't call the tun_t lookup function until tun is 6316 * loaded, so create a dummy function to avoid symbol 6317 * lookup errors on boot. 6318 */ 6319 /* ARGSUSED */ 6320 ipsec_tun_pol_t * 6321 itp_get_byaddr_dummy(uint32_t *laddr, uint32_t *faddr, int af, netstack_t *ns) 6322 { 6323 return (NULL); /* Always return NULL. */ 6324 } 6325 6326 /* 6327 * Frag cache code, based on SunScreen 3.2 source 6328 * screen/kernel/common/screen_fragcache.c 6329 */ 6330 6331 #define IPSEC_FRAG_TTL_MAX 5 6332 /* 6333 * Note that the following parameters create 256 hash buckets 6334 * with 1024 free entries to be distributed. Things are cleaned 6335 * periodically and are attempted to be cleaned when there is no 6336 * free space, but this system errs on the side of dropping packets 6337 * over creating memory exhaustion. We may decide to make hash 6338 * factor a tunable if this proves to be a bad decision. 6339 */ 6340 #define IPSEC_FRAG_HASH_SLOTS (1<<8) 6341 #define IPSEC_FRAG_HASH_FACTOR 4 6342 #define IPSEC_FRAG_HASH_SIZE (IPSEC_FRAG_HASH_SLOTS * IPSEC_FRAG_HASH_FACTOR) 6343 6344 #define IPSEC_FRAG_HASH_MASK (IPSEC_FRAG_HASH_SLOTS - 1) 6345 #define IPSEC_FRAG_HASH_FUNC(id) (((id) & IPSEC_FRAG_HASH_MASK) ^ \ 6346 (((id) / \ 6347 (ushort_t)IPSEC_FRAG_HASH_SLOTS) & \ 6348 IPSEC_FRAG_HASH_MASK)) 6349 6350 /* Maximum fragments per packet. 48 bytes payload x 1366 packets > 64KB */ 6351 #define IPSEC_MAX_FRAGS 1366 6352 6353 #define V4_FRAG_OFFSET(ipha) ((ntohs(ipha->ipha_fragment_offset_and_flags) & \ 6354 IPH_OFFSET) << 3) 6355 #define V4_MORE_FRAGS(ipha) (ntohs(ipha->ipha_fragment_offset_and_flags) & \ 6356 IPH_MF) 6357 6358 /* 6359 * Initialize an ipsec fragcache instance. 6360 * Returns B_FALSE if memory allocation fails. 6361 */ 6362 boolean_t 6363 ipsec_fragcache_init(ipsec_fragcache_t *frag) 6364 { 6365 ipsec_fragcache_entry_t *ftemp; 6366 int i; 6367 6368 mutex_init(&frag->itpf_lock, NULL, MUTEX_DEFAULT, NULL); 6369 frag->itpf_ptr = (ipsec_fragcache_entry_t **) 6370 kmem_zalloc(sizeof (ipsec_fragcache_entry_t *) * 6371 IPSEC_FRAG_HASH_SLOTS, KM_NOSLEEP); 6372 if (frag->itpf_ptr == NULL) 6373 return (B_FALSE); 6374 6375 ftemp = (ipsec_fragcache_entry_t *) 6376 kmem_zalloc(sizeof (ipsec_fragcache_entry_t) * 6377 IPSEC_FRAG_HASH_SIZE, KM_NOSLEEP); 6378 if (ftemp == NULL) { 6379 kmem_free(frag->itpf_ptr, sizeof (ipsec_fragcache_entry_t *) * 6380 IPSEC_FRAG_HASH_SLOTS); 6381 return (B_FALSE); 6382 } 6383 6384 frag->itpf_freelist = NULL; 6385 6386 for (i = 0; i < IPSEC_FRAG_HASH_SIZE; i++) { 6387 ftemp->itpfe_next = frag->itpf_freelist; 6388 frag->itpf_freelist = ftemp; 6389 ftemp++; 6390 } 6391 6392 frag->itpf_expire_hint = 0; 6393 6394 return (B_TRUE); 6395 } 6396 6397 void 6398 ipsec_fragcache_uninit(ipsec_fragcache_t *frag) 6399 { 6400 ipsec_fragcache_entry_t *fep; 6401 int i; 6402 6403 mutex_enter(&frag->itpf_lock); 6404 if (frag->itpf_ptr) { 6405 /* Delete any existing fragcache entry chains */ 6406 for (i = 0; i < IPSEC_FRAG_HASH_SLOTS; i++) { 6407 fep = (frag->itpf_ptr)[i]; 6408 while (fep != NULL) { 6409 /* Returned fep is next in chain or NULL */ 6410 fep = fragcache_delentry(i, fep, frag); 6411 } 6412 } 6413 /* 6414 * Chase the pointers back to the beginning 6415 * of the memory allocation and then 6416 * get rid of the allocated freelist 6417 */ 6418 while (frag->itpf_freelist->itpfe_next != NULL) 6419 frag->itpf_freelist = frag->itpf_freelist->itpfe_next; 6420 /* 6421 * XXX - If we ever dynamically grow the freelist 6422 * then we'll have to free entries individually 6423 * or determine how many entries or chunks we have 6424 * grown since the initial allocation. 6425 */ 6426 kmem_free(frag->itpf_freelist, 6427 sizeof (ipsec_fragcache_entry_t) * 6428 IPSEC_FRAG_HASH_SIZE); 6429 /* Free the fragcache structure */ 6430 kmem_free(frag->itpf_ptr, 6431 sizeof (ipsec_fragcache_entry_t *) * 6432 IPSEC_FRAG_HASH_SLOTS); 6433 } 6434 mutex_exit(&frag->itpf_lock); 6435 mutex_destroy(&frag->itpf_lock); 6436 } 6437 6438 /* 6439 * Add a fragment to the fragment cache. Consumes mp if NULL is returned. 6440 * Returns mp if a whole fragment has been assembled, NULL otherwise 6441 */ 6442 6443 mblk_t * 6444 ipsec_fragcache_add(ipsec_fragcache_t *frag, mblk_t *ipsec_mp, mblk_t *mp, 6445 int outer_hdr_len, ipsec_stack_t *ipss) 6446 { 6447 boolean_t is_v4; 6448 time_t itpf_time; 6449 ipha_t *iph; 6450 ipha_t *oiph; 6451 ip6_t *ip6h = NULL; 6452 uint8_t v6_proto; 6453 uint8_t *v6_proto_p; 6454 uint16_t ip6_hdr_length; 6455 ip6_pkt_t ipp; 6456 ip6_frag_t *fraghdr; 6457 ipsec_fragcache_entry_t *fep; 6458 int i; 6459 mblk_t *nmp, *prevmp, *spare_mp = NULL; 6460 int firstbyte, lastbyte; 6461 int offset; 6462 int last; 6463 boolean_t inbound = (ipsec_mp != NULL); 6464 mblk_t *first_mp = inbound ? ipsec_mp : mp; 6465 6466 mutex_enter(&frag->itpf_lock); 6467 6468 oiph = (ipha_t *)mp->b_rptr; 6469 iph = (ipha_t *)(mp->b_rptr + outer_hdr_len); 6470 if (IPH_HDR_VERSION(iph) == IPV4_VERSION) { 6471 is_v4 = B_TRUE; 6472 } else { 6473 ASSERT(IPH_HDR_VERSION(iph) == IPV6_VERSION); 6474 if ((spare_mp = msgpullup(mp, -1)) == NULL) { 6475 mutex_exit(&frag->itpf_lock); 6476 ip_drop_packet(first_mp, inbound, NULL, NULL, 6477 DROPPER(ipss, ipds_spd_nomem), 6478 &ipss->ipsec_spd_dropper); 6479 return (NULL); 6480 } 6481 ip6h = (ip6_t *)(spare_mp->b_rptr + outer_hdr_len); 6482 6483 if (!ip_hdr_length_nexthdr_v6(spare_mp, ip6h, &ip6_hdr_length, 6484 &v6_proto_p)) { 6485 /* 6486 * Find upper layer protocol. 6487 * If it fails we have a malformed packet 6488 */ 6489 mutex_exit(&frag->itpf_lock); 6490 ip_drop_packet(first_mp, inbound, NULL, NULL, 6491 DROPPER(ipss, ipds_spd_malformed_packet), 6492 &ipss->ipsec_spd_dropper); 6493 freemsg(spare_mp); 6494 return (NULL); 6495 } else { 6496 v6_proto = *v6_proto_p; 6497 } 6498 6499 6500 bzero(&ipp, sizeof (ipp)); 6501 (void) ip_find_hdr_v6(spare_mp, ip6h, &ipp, NULL); 6502 if (!(ipp.ipp_fields & IPPF_FRAGHDR)) { 6503 /* 6504 * We think this is a fragment, but didn't find 6505 * a fragment header. Something is wrong. 6506 */ 6507 mutex_exit(&frag->itpf_lock); 6508 ip_drop_packet(first_mp, inbound, NULL, NULL, 6509 DROPPER(ipss, ipds_spd_malformed_frag), 6510 &ipss->ipsec_spd_dropper); 6511 freemsg(spare_mp); 6512 return (NULL); 6513 } 6514 fraghdr = ipp.ipp_fraghdr; 6515 is_v4 = B_FALSE; 6516 } 6517 6518 /* Anything to cleanup? */ 6519 6520 /* 6521 * This cleanup call could be put in a timer loop 6522 * but it may actually be just as reasonable a decision to 6523 * leave it here. The disadvantage is this only gets called when 6524 * frags are added. The advantage is that it is not 6525 * susceptible to race conditions like a time-based cleanup 6526 * may be. 6527 */ 6528 itpf_time = gethrestime_sec(); 6529 if (itpf_time >= frag->itpf_expire_hint) 6530 ipsec_fragcache_clean(frag); 6531 6532 /* Lookup to see if there is an existing entry */ 6533 6534 if (is_v4) 6535 i = IPSEC_FRAG_HASH_FUNC(iph->ipha_ident); 6536 else 6537 i = IPSEC_FRAG_HASH_FUNC(fraghdr->ip6f_ident); 6538 6539 for (fep = (frag->itpf_ptr)[i]; fep; fep = fep->itpfe_next) { 6540 if (is_v4) { 6541 ASSERT(iph != NULL); 6542 if ((fep->itpfe_id == iph->ipha_ident) && 6543 (fep->itpfe_src == iph->ipha_src) && 6544 (fep->itpfe_dst == iph->ipha_dst) && 6545 (fep->itpfe_proto == iph->ipha_protocol)) 6546 break; 6547 } else { 6548 ASSERT(fraghdr != NULL); 6549 ASSERT(fep != NULL); 6550 if ((fep->itpfe_id == fraghdr->ip6f_ident) && 6551 IN6_ARE_ADDR_EQUAL(&fep->itpfe_src6, 6552 &ip6h->ip6_src) && 6553 IN6_ARE_ADDR_EQUAL(&fep->itpfe_dst6, 6554 &ip6h->ip6_dst) && (fep->itpfe_proto == v6_proto)) 6555 break; 6556 } 6557 } 6558 6559 if (is_v4) { 6560 firstbyte = V4_FRAG_OFFSET(iph); 6561 lastbyte = firstbyte + ntohs(iph->ipha_length) - 6562 IPH_HDR_LENGTH(iph); 6563 last = (V4_MORE_FRAGS(iph) == 0); 6564 #ifdef FRAGCACHE_DEBUG 6565 cmn_err(CE_WARN, "V4 fragcache: firstbyte = %d, lastbyte = %d, " 6566 "last = %d, id = %d\n", firstbyte, lastbyte, last, 6567 iph->ipha_ident); 6568 #endif 6569 } else { 6570 firstbyte = ntohs(fraghdr->ip6f_offlg & IP6F_OFF_MASK); 6571 lastbyte = firstbyte + ntohs(ip6h->ip6_plen) + 6572 sizeof (ip6_t) - ip6_hdr_length; 6573 last = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG) == 0; 6574 #ifdef FRAGCACHE_DEBUG 6575 cmn_err(CE_WARN, "V6 fragcache: firstbyte = %d, lastbyte = %d, " 6576 "last = %d, id = %d, fraghdr = %p, spare_mp = %p\n", 6577 firstbyte, lastbyte, last, fraghdr->ip6f_ident, 6578 fraghdr, spare_mp); 6579 #endif 6580 } 6581 6582 /* check for bogus fragments and delete the entry */ 6583 if (firstbyte > 0 && firstbyte <= 8) { 6584 if (fep != NULL) 6585 (void) fragcache_delentry(i, fep, frag); 6586 mutex_exit(&frag->itpf_lock); 6587 ip_drop_packet(first_mp, inbound, NULL, NULL, 6588 DROPPER(ipss, ipds_spd_malformed_frag), 6589 &ipss->ipsec_spd_dropper); 6590 freemsg(spare_mp); 6591 return (NULL); 6592 } 6593 6594 /* Not found, allocate a new entry */ 6595 if (fep == NULL) { 6596 if (frag->itpf_freelist == NULL) { 6597 /* see if there is some space */ 6598 ipsec_fragcache_clean(frag); 6599 if (frag->itpf_freelist == NULL) { 6600 mutex_exit(&frag->itpf_lock); 6601 ip_drop_packet(first_mp, inbound, NULL, NULL, 6602 DROPPER(ipss, ipds_spd_nomem), 6603 &ipss->ipsec_spd_dropper); 6604 freemsg(spare_mp); 6605 return (NULL); 6606 } 6607 } 6608 6609 fep = frag->itpf_freelist; 6610 frag->itpf_freelist = fep->itpfe_next; 6611 6612 if (is_v4) { 6613 bcopy((caddr_t)&iph->ipha_src, (caddr_t)&fep->itpfe_src, 6614 sizeof (struct in_addr)); 6615 bcopy((caddr_t)&iph->ipha_dst, (caddr_t)&fep->itpfe_dst, 6616 sizeof (struct in_addr)); 6617 fep->itpfe_id = iph->ipha_ident; 6618 fep->itpfe_proto = iph->ipha_protocol; 6619 i = IPSEC_FRAG_HASH_FUNC(fep->itpfe_id); 6620 } else { 6621 bcopy((in6_addr_t *)&ip6h->ip6_src, 6622 (in6_addr_t *)&fep->itpfe_src6, 6623 sizeof (struct in6_addr)); 6624 bcopy((in6_addr_t *)&ip6h->ip6_dst, 6625 (in6_addr_t *)&fep->itpfe_dst6, 6626 sizeof (struct in6_addr)); 6627 fep->itpfe_id = fraghdr->ip6f_ident; 6628 fep->itpfe_proto = v6_proto; 6629 i = IPSEC_FRAG_HASH_FUNC(fep->itpfe_id); 6630 } 6631 itpf_time = gethrestime_sec(); 6632 fep->itpfe_exp = itpf_time + IPSEC_FRAG_TTL_MAX + 1; 6633 fep->itpfe_last = 0; 6634 fep->itpfe_fraglist = NULL; 6635 fep->itpfe_depth = 0; 6636 fep->itpfe_next = (frag->itpf_ptr)[i]; 6637 (frag->itpf_ptr)[i] = fep; 6638 6639 if (frag->itpf_expire_hint > fep->itpfe_exp) 6640 frag->itpf_expire_hint = fep->itpfe_exp; 6641 6642 } 6643 freemsg(spare_mp); 6644 6645 /* Insert it in the frag list */ 6646 /* List is in order by starting offset of fragments */ 6647 6648 prevmp = NULL; 6649 for (nmp = fep->itpfe_fraglist; nmp; nmp = nmp->b_next) { 6650 ipha_t *niph; 6651 ipha_t *oniph; 6652 ip6_t *nip6h; 6653 ip6_pkt_t nipp; 6654 ip6_frag_t *nfraghdr; 6655 uint16_t nip6_hdr_length; 6656 uint8_t *nv6_proto_p; 6657 int nfirstbyte, nlastbyte; 6658 char *data, *ndata; 6659 mblk_t *nspare_mp = NULL; 6660 mblk_t *ndata_mp = (inbound ? nmp->b_cont : nmp); 6661 int hdr_len; 6662 6663 oniph = (ipha_t *)mp->b_rptr; 6664 nip6h = NULL; 6665 niph = NULL; 6666 6667 /* 6668 * Determine outer header type and length and set 6669 * pointers appropriately 6670 */ 6671 6672 if (IPH_HDR_VERSION(oniph) == IPV4_VERSION) { 6673 hdr_len = ((outer_hdr_len != 0) ? 6674 IPH_HDR_LENGTH(oiph) : 0); 6675 niph = (ipha_t *)(ndata_mp->b_rptr + hdr_len); 6676 } else { 6677 ASSERT(IPH_HDR_VERSION(oniph) == IPV6_VERSION); 6678 if ((nspare_mp = msgpullup(ndata_mp, -1)) == NULL) { 6679 mutex_exit(&frag->itpf_lock); 6680 ip_drop_packet_chain(nmp, inbound, NULL, NULL, 6681 DROPPER(ipss, ipds_spd_nomem), 6682 &ipss->ipsec_spd_dropper); 6683 return (NULL); 6684 } 6685 nip6h = (ip6_t *)nspare_mp->b_rptr; 6686 (void) ip_hdr_length_nexthdr_v6(nspare_mp, nip6h, 6687 &nip6_hdr_length, &v6_proto_p); 6688 hdr_len = ((outer_hdr_len != 0) ? nip6_hdr_length : 0); 6689 } 6690 6691 /* 6692 * Determine inner header type and length and set 6693 * pointers appropriately 6694 */ 6695 6696 if (is_v4) { 6697 if (niph == NULL) { 6698 /* Was v6 outer */ 6699 niph = (ipha_t *)(ndata_mp->b_rptr + hdr_len); 6700 } 6701 nfirstbyte = V4_FRAG_OFFSET(niph); 6702 nlastbyte = nfirstbyte + ntohs(niph->ipha_length) - 6703 IPH_HDR_LENGTH(niph); 6704 } else { 6705 if ((nspare_mp == NULL) && 6706 ((nspare_mp = msgpullup(ndata_mp, -1)) == NULL)) { 6707 mutex_exit(&frag->itpf_lock); 6708 ip_drop_packet_chain(nmp, inbound, NULL, NULL, 6709 DROPPER(ipss, ipds_spd_nomem), 6710 &ipss->ipsec_spd_dropper); 6711 return (NULL); 6712 } 6713 nip6h = (ip6_t *)(nspare_mp->b_rptr + hdr_len); 6714 if (!ip_hdr_length_nexthdr_v6(nspare_mp, nip6h, 6715 &nip6_hdr_length, &nv6_proto_p)) { 6716 mutex_exit(&frag->itpf_lock); 6717 ip_drop_packet_chain(nmp, inbound, NULL, NULL, 6718 DROPPER(ipss, ipds_spd_malformed_frag), 6719 &ipss->ipsec_spd_dropper); 6720 ipsec_freemsg_chain(nspare_mp); 6721 return (NULL); 6722 } 6723 bzero(&nipp, sizeof (nipp)); 6724 (void) ip_find_hdr_v6(nspare_mp, nip6h, &nipp, NULL); 6725 nfraghdr = nipp.ipp_fraghdr; 6726 nfirstbyte = ntohs(nfraghdr->ip6f_offlg & 6727 IP6F_OFF_MASK); 6728 nlastbyte = nfirstbyte + ntohs(nip6h->ip6_plen) + 6729 sizeof (ip6_t) - nip6_hdr_length; 6730 } 6731 ipsec_freemsg_chain(nspare_mp); 6732 6733 /* Check for overlapping fragments */ 6734 if (firstbyte >= nfirstbyte && firstbyte < nlastbyte) { 6735 /* 6736 * Overlap Check: 6737 * ~~~~--------- # Check if the newly 6738 * ~ ndata_mp| # received fragment 6739 * ~~~~--------- # overlaps with the 6740 * ---------~~~~~~ # current fragment. 6741 * | mp ~ 6742 * ---------~~~~~~ 6743 */ 6744 if (is_v4) { 6745 data = (char *)iph + IPH_HDR_LENGTH(iph) + 6746 firstbyte - nfirstbyte; 6747 ndata = (char *)niph + IPH_HDR_LENGTH(niph); 6748 } else { 6749 data = (char *)ip6h + 6750 nip6_hdr_length + firstbyte - 6751 nfirstbyte; 6752 ndata = (char *)nip6h + nip6_hdr_length; 6753 } 6754 if (bcmp(data, ndata, MIN(lastbyte, nlastbyte) - 6755 firstbyte)) { 6756 /* Overlapping data does not match */ 6757 (void) fragcache_delentry(i, fep, frag); 6758 mutex_exit(&frag->itpf_lock); 6759 ip_drop_packet(first_mp, inbound, NULL, NULL, 6760 DROPPER(ipss, ipds_spd_overlap_frag), 6761 &ipss->ipsec_spd_dropper); 6762 return (NULL); 6763 } 6764 /* Part of defense for jolt2.c fragmentation attack */ 6765 if (firstbyte >= nfirstbyte && lastbyte <= nlastbyte) { 6766 /* 6767 * Check for identical or subset fragments: 6768 * ---------- ~~~~--------~~~~~ 6769 * | nmp | or ~ nmp ~ 6770 * ---------- ~~~~--------~~~~~ 6771 * ---------- ------ 6772 * | mp | | mp | 6773 * ---------- ------ 6774 */ 6775 mutex_exit(&frag->itpf_lock); 6776 ip_drop_packet(first_mp, inbound, NULL, NULL, 6777 DROPPER(ipss, ipds_spd_evil_frag), 6778 &ipss->ipsec_spd_dropper); 6779 return (NULL); 6780 } 6781 6782 } 6783 6784 /* Correct location for this fragment? */ 6785 if (firstbyte <= nfirstbyte) { 6786 /* 6787 * Check if the tail end of the new fragment overlaps 6788 * with the head of the current fragment. 6789 * --------~~~~~~~ 6790 * | nmp ~ 6791 * --------~~~~~~~ 6792 * ~~~~~-------- 6793 * ~ mp | 6794 * ~~~~~-------- 6795 */ 6796 if (lastbyte > nfirstbyte) { 6797 /* Fragments overlap */ 6798 data = (char *)iph + IPH_HDR_LENGTH(iph) + 6799 firstbyte - nfirstbyte; 6800 ndata = (char *)niph + IPH_HDR_LENGTH(niph); 6801 if (is_v4) { 6802 data = (char *)iph + 6803 IPH_HDR_LENGTH(iph) + firstbyte - 6804 nfirstbyte; 6805 ndata = (char *)niph + 6806 IPH_HDR_LENGTH(niph); 6807 } else { 6808 data = (char *)ip6h + 6809 nip6_hdr_length + firstbyte - 6810 nfirstbyte; 6811 ndata = (char *)nip6h + nip6_hdr_length; 6812 } 6813 if (bcmp(data, ndata, MIN(lastbyte, nlastbyte) 6814 - nfirstbyte)) { 6815 /* Overlap mismatch */ 6816 (void) fragcache_delentry(i, fep, frag); 6817 mutex_exit(&frag->itpf_lock); 6818 ip_drop_packet(first_mp, inbound, NULL, 6819 NULL, DROPPER(ipss, 6820 ipds_spd_overlap_frag), 6821 &ipss->ipsec_spd_dropper); 6822 return (NULL); 6823 } 6824 } 6825 6826 /* 6827 * Fragment does not illegally overlap and can now 6828 * be inserted into the chain 6829 */ 6830 break; 6831 } 6832 6833 prevmp = nmp; 6834 } 6835 first_mp->b_next = nmp; 6836 6837 if (prevmp == NULL) { 6838 fep->itpfe_fraglist = first_mp; 6839 } else { 6840 prevmp->b_next = first_mp; 6841 } 6842 if (last) 6843 fep->itpfe_last = 1; 6844 6845 /* Part of defense for jolt2.c fragmentation attack */ 6846 if (++(fep->itpfe_depth) > IPSEC_MAX_FRAGS) { 6847 (void) fragcache_delentry(i, fep, frag); 6848 mutex_exit(&frag->itpf_lock); 6849 ip_drop_packet(first_mp, inbound, NULL, NULL, 6850 DROPPER(ipss, ipds_spd_max_frags), 6851 &ipss->ipsec_spd_dropper); 6852 return (NULL); 6853 } 6854 6855 /* Check for complete packet */ 6856 6857 if (!fep->itpfe_last) { 6858 mutex_exit(&frag->itpf_lock); 6859 #ifdef FRAGCACHE_DEBUG 6860 cmn_err(CE_WARN, "Fragment cached, not last.\n"); 6861 #endif 6862 return (NULL); 6863 } 6864 6865 #ifdef FRAGCACHE_DEBUG 6866 cmn_err(CE_WARN, "Last fragment cached.\n"); 6867 cmn_err(CE_WARN, "mp = %p, first_mp = %p.\n", mp, first_mp); 6868 #endif 6869 6870 offset = 0; 6871 for (mp = fep->itpfe_fraglist; mp; mp = mp->b_next) { 6872 mblk_t *data_mp = (inbound ? mp->b_cont : mp); 6873 int hdr_len; 6874 6875 oiph = (ipha_t *)data_mp->b_rptr; 6876 ip6h = NULL; 6877 iph = NULL; 6878 6879 spare_mp = NULL; 6880 if (IPH_HDR_VERSION(oiph) == IPV4_VERSION) { 6881 hdr_len = ((outer_hdr_len != 0) ? 6882 IPH_HDR_LENGTH(oiph) : 0); 6883 iph = (ipha_t *)(data_mp->b_rptr + hdr_len); 6884 } else { 6885 ASSERT(IPH_HDR_VERSION(oiph) == IPV6_VERSION); 6886 if ((spare_mp = msgpullup(data_mp, -1)) == NULL) { 6887 mutex_exit(&frag->itpf_lock); 6888 ip_drop_packet_chain(mp, inbound, NULL, NULL, 6889 DROPPER(ipss, ipds_spd_nomem), 6890 &ipss->ipsec_spd_dropper); 6891 return (NULL); 6892 } 6893 ip6h = (ip6_t *)spare_mp->b_rptr; 6894 (void) ip_hdr_length_nexthdr_v6(spare_mp, ip6h, 6895 &ip6_hdr_length, &v6_proto_p); 6896 hdr_len = ((outer_hdr_len != 0) ? ip6_hdr_length : 0); 6897 } 6898 6899 /* Calculate current fragment start/end */ 6900 if (is_v4) { 6901 if (iph == NULL) { 6902 /* Was v6 outer */ 6903 iph = (ipha_t *)(data_mp->b_rptr + hdr_len); 6904 } 6905 firstbyte = V4_FRAG_OFFSET(iph); 6906 lastbyte = firstbyte + ntohs(iph->ipha_length) - 6907 IPH_HDR_LENGTH(iph); 6908 } else { 6909 if ((spare_mp == NULL) && 6910 ((spare_mp = msgpullup(data_mp, -1)) == NULL)) { 6911 mutex_exit(&frag->itpf_lock); 6912 ip_drop_packet_chain(mp, inbound, NULL, NULL, 6913 DROPPER(ipss, ipds_spd_nomem), 6914 &ipss->ipsec_spd_dropper); 6915 return (NULL); 6916 } 6917 ip6h = (ip6_t *)(spare_mp->b_rptr + hdr_len); 6918 if (!ip_hdr_length_nexthdr_v6(spare_mp, ip6h, 6919 &ip6_hdr_length, &v6_proto_p)) { 6920 mutex_exit(&frag->itpf_lock); 6921 ip_drop_packet_chain(mp, inbound, NULL, NULL, 6922 DROPPER(ipss, ipds_spd_malformed_frag), 6923 &ipss->ipsec_spd_dropper); 6924 ipsec_freemsg_chain(spare_mp); 6925 return (NULL); 6926 } 6927 v6_proto = *v6_proto_p; 6928 bzero(&ipp, sizeof (ipp)); 6929 (void) ip_find_hdr_v6(spare_mp, ip6h, &ipp, NULL); 6930 fraghdr = ipp.ipp_fraghdr; 6931 firstbyte = ntohs(fraghdr->ip6f_offlg & 6932 IP6F_OFF_MASK); 6933 lastbyte = firstbyte + ntohs(ip6h->ip6_plen) + 6934 sizeof (ip6_t) - ip6_hdr_length; 6935 } 6936 6937 /* 6938 * If this fragment is greater than current offset, 6939 * we have a missing fragment so return NULL 6940 */ 6941 if (firstbyte > offset) { 6942 mutex_exit(&frag->itpf_lock); 6943 #ifdef FRAGCACHE_DEBUG 6944 /* 6945 * Note, this can happen when the last frag 6946 * gets sent through because it is smaller 6947 * than the MTU. It is not necessarily an 6948 * error condition. 6949 */ 6950 cmn_err(CE_WARN, "Frag greater than offset! : " 6951 "missing fragment: firstbyte = %d, offset = %d, " 6952 "mp = %p\n", firstbyte, offset, mp); 6953 #endif 6954 ipsec_freemsg_chain(spare_mp); 6955 return (NULL); 6956 } 6957 6958 /* 6959 * If we are at the last fragment, we have the complete 6960 * packet, so rechain things and return it to caller 6961 * for processing 6962 */ 6963 6964 if ((is_v4 && !V4_MORE_FRAGS(iph)) || 6965 (!is_v4 && !(fraghdr->ip6f_offlg & IP6F_MORE_FRAG))) { 6966 mp = fep->itpfe_fraglist; 6967 fep->itpfe_fraglist = NULL; 6968 (void) fragcache_delentry(i, fep, frag); 6969 mutex_exit(&frag->itpf_lock); 6970 6971 if ((is_v4 && (firstbyte + ntohs(iph->ipha_length) > 6972 65535)) || (!is_v4 && (firstbyte + 6973 ntohs(ip6h->ip6_plen) > 65535))) { 6974 /* It is an invalid "ping-o-death" packet */ 6975 /* Discard it */ 6976 ip_drop_packet_chain(mp, inbound, NULL, NULL, 6977 DROPPER(ipss, ipds_spd_evil_frag), 6978 &ipss->ipsec_spd_dropper); 6979 ipsec_freemsg_chain(spare_mp); 6980 return (NULL); 6981 } 6982 #ifdef FRAGCACHE_DEBUG 6983 cmn_err(CE_WARN, "Fragcache returning mp = %p, " 6984 "mp->b_next = %p", mp, mp->b_next); 6985 #endif 6986 ipsec_freemsg_chain(spare_mp); 6987 /* 6988 * For inbound case, mp has ipsec_in b_next'd chain 6989 * For outbound case, it is just data mp chain 6990 */ 6991 return (mp); 6992 } 6993 ipsec_freemsg_chain(spare_mp); 6994 6995 /* 6996 * Update new ending offset if this 6997 * fragment extends the packet 6998 */ 6999 if (offset < lastbyte) 7000 offset = lastbyte; 7001 } 7002 7003 mutex_exit(&frag->itpf_lock); 7004 7005 /* Didn't find last fragment, so return NULL */ 7006 return (NULL); 7007 } 7008 7009 static void 7010 ipsec_fragcache_clean(ipsec_fragcache_t *frag) 7011 { 7012 ipsec_fragcache_entry_t *fep; 7013 int i; 7014 ipsec_fragcache_entry_t *earlyfep = NULL; 7015 time_t itpf_time; 7016 int earlyexp; 7017 int earlyi = 0; 7018 7019 ASSERT(MUTEX_HELD(&frag->itpf_lock)); 7020 7021 itpf_time = gethrestime_sec(); 7022 earlyexp = itpf_time + 10000; 7023 7024 for (i = 0; i < IPSEC_FRAG_HASH_SLOTS; i++) { 7025 fep = (frag->itpf_ptr)[i]; 7026 while (fep) { 7027 if (fep->itpfe_exp < itpf_time) { 7028 /* found */ 7029 fep = fragcache_delentry(i, fep, frag); 7030 } else { 7031 if (fep->itpfe_exp < earlyexp) { 7032 earlyfep = fep; 7033 earlyexp = fep->itpfe_exp; 7034 earlyi = i; 7035 } 7036 fep = fep->itpfe_next; 7037 } 7038 } 7039 } 7040 7041 frag->itpf_expire_hint = earlyexp; 7042 7043 /* if (!found) */ 7044 if (frag->itpf_freelist == NULL) 7045 (void) fragcache_delentry(earlyi, earlyfep, frag); 7046 } 7047 7048 static ipsec_fragcache_entry_t * 7049 fragcache_delentry(int slot, ipsec_fragcache_entry_t *fep, 7050 ipsec_fragcache_t *frag) 7051 { 7052 ipsec_fragcache_entry_t *targp; 7053 ipsec_fragcache_entry_t *nextp = fep->itpfe_next; 7054 7055 ASSERT(MUTEX_HELD(&frag->itpf_lock)); 7056 7057 /* Free up any fragment list still in cache entry */ 7058 ipsec_freemsg_chain(fep->itpfe_fraglist); 7059 7060 targp = (frag->itpf_ptr)[slot]; 7061 ASSERT(targp != 0); 7062 7063 if (targp == fep) { 7064 /* unlink from head of hash chain */ 7065 (frag->itpf_ptr)[slot] = nextp; 7066 /* link into free list */ 7067 fep->itpfe_next = frag->itpf_freelist; 7068 frag->itpf_freelist = fep; 7069 return (nextp); 7070 } 7071 7072 /* maybe should use double linked list to make update faster */ 7073 /* must be past front of chain */ 7074 while (targp) { 7075 if (targp->itpfe_next == fep) { 7076 /* unlink from hash chain */ 7077 targp->itpfe_next = nextp; 7078 /* link into free list */ 7079 fep->itpfe_next = frag->itpf_freelist; 7080 frag->itpf_freelist = fep; 7081 return (nextp); 7082 } 7083 targp = targp->itpfe_next; 7084 ASSERT(targp != 0); 7085 } 7086 /* NOTREACHED */ 7087 return (NULL); 7088 } 7089