1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * IPsec Security Policy Database. 30 * 31 * This module maintains the SPD and provides routines used by ip and ip6 32 * to apply IPsec policy to inbound and outbound datagrams. 33 */ 34 35 #include <sys/types.h> 36 #include <sys/stream.h> 37 #include <sys/stropts.h> 38 #include <sys/sysmacros.h> 39 #include <sys/strsubr.h> 40 #include <sys/strlog.h> 41 #include <sys/cmn_err.h> 42 #include <sys/zone.h> 43 44 #include <sys/systm.h> 45 #include <sys/param.h> 46 #include <sys/kmem.h> 47 #include <sys/ddi.h> 48 49 #include <sys/crypto/api.h> 50 51 #include <inet/common.h> 52 #include <inet/mi.h> 53 54 #include <netinet/ip6.h> 55 #include <netinet/icmp6.h> 56 #include <netinet/udp.h> 57 58 #include <inet/ip.h> 59 #include <inet/ip6.h> 60 61 #include <net/pfkeyv2.h> 62 #include <net/pfpolicy.h> 63 #include <inet/ipsec_info.h> 64 #include <inet/sadb.h> 65 #include <inet/ipsec_impl.h> 66 67 #include <inet/ip_impl.h> /* For IP_MOD_ID */ 68 69 #include <inet/ipsecah.h> 70 #include <inet/ipsecesp.h> 71 #include <inet/ipdrop.h> 72 #include <inet/ipclassifier.h> 73 #include <inet/tun.h> 74 75 static void ipsec_update_present_flags(ipsec_stack_t *); 76 static ipsec_act_t *ipsec_act_wildcard_expand(ipsec_act_t *, uint_t *, 77 netstack_t *); 78 static void ipsec_out_free(void *); 79 static void ipsec_in_free(void *); 80 static mblk_t *ipsec_attach_global_policy(mblk_t **, conn_t *, 81 ipsec_selector_t *, netstack_t *); 82 static mblk_t *ipsec_apply_global_policy(mblk_t *, conn_t *, 83 ipsec_selector_t *, netstack_t *); 84 static mblk_t *ipsec_check_ipsecin_policy(mblk_t *, ipsec_policy_t *, 85 ipha_t *, ip6_t *, uint64_t, netstack_t *); 86 static void ipsec_in_release_refs(ipsec_in_t *); 87 static void ipsec_out_release_refs(ipsec_out_t *); 88 static void ipsec_action_free_table(ipsec_action_t *); 89 static void ipsec_action_reclaim(void *); 90 static void ipsec_action_reclaim_stack(netstack_t *); 91 static void ipsid_init(netstack_t *); 92 static void ipsid_fini(netstack_t *); 93 94 /* sel_flags values for ipsec_init_inbound_sel(). */ 95 #define SEL_NONE 0x0000 96 #define SEL_PORT_POLICY 0x0001 97 #define SEL_IS_ICMP 0x0002 98 #define SEL_TUNNEL_MODE 0x0004 99 100 /* Return values for ipsec_init_inbound_sel(). */ 101 typedef enum { SELRET_NOMEM, SELRET_BADPKT, SELRET_SUCCESS, SELRET_TUNFRAG} 102 selret_t; 103 104 static selret_t ipsec_init_inbound_sel(ipsec_selector_t *, mblk_t *, 105 ipha_t *, ip6_t *, uint8_t); 106 107 static boolean_t ipsec_check_ipsecin_action(struct ipsec_in_s *, mblk_t *, 108 struct ipsec_action_s *, ipha_t *ipha, ip6_t *ip6h, const char **, 109 kstat_named_t **); 110 static void ipsec_unregister_prov_update(void); 111 static void ipsec_prov_update_callback_stack(uint32_t, void *, netstack_t *); 112 static boolean_t ipsec_compare_action(ipsec_policy_t *, ipsec_policy_t *); 113 static uint32_t selector_hash(ipsec_selector_t *, ipsec_policy_root_t *); 114 static boolean_t ipsec_kstat_init(ipsec_stack_t *); 115 static void ipsec_kstat_destroy(ipsec_stack_t *); 116 static int ipsec_free_tables(ipsec_stack_t *); 117 static int tunnel_compare(const void *, const void *); 118 static void ipsec_freemsg_chain(mblk_t *); 119 static void ip_drop_packet_chain(mblk_t *, boolean_t, ill_t *, ire_t *, 120 struct kstat_named *, ipdropper_t *); 121 static boolean_t ipsec_kstat_init(ipsec_stack_t *); 122 static void ipsec_kstat_destroy(ipsec_stack_t *); 123 static int ipsec_free_tables(ipsec_stack_t *); 124 static int tunnel_compare(const void *, const void *); 125 static void ipsec_freemsg_chain(mblk_t *); 126 static void ip_drop_packet_chain(mblk_t *, boolean_t, ill_t *, ire_t *, 127 struct kstat_named *, ipdropper_t *); 128 129 /* 130 * Selector hash table is statically sized at module load time. 131 * we default to 251 buckets, which is the largest prime number under 255 132 */ 133 134 #define IPSEC_SPDHASH_DEFAULT 251 135 136 /* SPD hash-size tunable per tunnel. */ 137 #define TUN_SPDHASH_DEFAULT 5 138 139 140 #define IPSEC_SEL_NOHASH ((uint32_t)(~0)) 141 142 /* 143 * Handle global across all stack instances 144 */ 145 static crypto_notify_handle_t prov_update_handle = NULL; 146 147 static kmem_cache_t *ipsec_action_cache; 148 static kmem_cache_t *ipsec_sel_cache; 149 static kmem_cache_t *ipsec_pol_cache; 150 static kmem_cache_t *ipsec_info_cache; 151 152 /* Frag cache prototypes */ 153 static void ipsec_fragcache_clean(ipsec_fragcache_t *); 154 static ipsec_fragcache_entry_t *fragcache_delentry(int, 155 ipsec_fragcache_entry_t *, ipsec_fragcache_t *); 156 boolean_t ipsec_fragcache_init(ipsec_fragcache_t *); 157 void ipsec_fragcache_uninit(ipsec_fragcache_t *); 158 mblk_t *ipsec_fragcache_add(ipsec_fragcache_t *, mblk_t *, mblk_t *, int, 159 ipsec_stack_t *); 160 161 int ipsec_hdr_pullup_needed = 0; 162 int ipsec_weird_null_inbound_policy = 0; 163 164 #define ALGBITS_ROUND_DOWN(x, align) (((x)/(align))*(align)) 165 #define ALGBITS_ROUND_UP(x, align) ALGBITS_ROUND_DOWN((x)+(align)-1, align) 166 167 /* 168 * Inbound traffic should have matching identities for both SA's. 169 */ 170 171 #define SA_IDS_MATCH(sa1, sa2) \ 172 (((sa1) == NULL) || ((sa2) == NULL) || \ 173 (((sa1)->ipsa_src_cid == (sa2)->ipsa_src_cid) && \ 174 (((sa1)->ipsa_dst_cid == (sa2)->ipsa_dst_cid)))) 175 176 /* 177 * IPv4 Fragments 178 */ 179 #define IS_V4_FRAGMENT(ipha_fragment_offset_and_flags) \ 180 (((ntohs(ipha_fragment_offset_and_flags) & IPH_OFFSET) != 0) || \ 181 ((ntohs(ipha_fragment_offset_and_flags) & IPH_MF) != 0)) 182 183 /* 184 * IPv6 Fragments 185 */ 186 #define IS_V6_FRAGMENT(ipp) (ipp.ipp_fields & IPPF_FRAGHDR) 187 188 /* 189 * Policy failure messages. 190 */ 191 static char *ipsec_policy_failure_msgs[] = { 192 193 /* IPSEC_POLICY_NOT_NEEDED */ 194 "%s: Dropping the datagram because the incoming packet " 195 "is %s, but the recipient expects clear; Source %s, " 196 "Destination %s.\n", 197 198 /* IPSEC_POLICY_MISMATCH */ 199 "%s: Policy Failure for the incoming packet (%s); Source %s, " 200 "Destination %s.\n", 201 202 /* IPSEC_POLICY_AUTH_NOT_NEEDED */ 203 "%s: Authentication present while not expected in the " 204 "incoming %s packet; Source %s, Destination %s.\n", 205 206 /* IPSEC_POLICY_ENCR_NOT_NEEDED */ 207 "%s: Encryption present while not expected in the " 208 "incoming %s packet; Source %s, Destination %s.\n", 209 210 /* IPSEC_POLICY_SE_NOT_NEEDED */ 211 "%s: Self-Encapsulation present while not expected in the " 212 "incoming %s packet; Source %s, Destination %s.\n", 213 }; 214 215 /* 216 * General overviews: 217 * 218 * Locking: 219 * 220 * All of the system policy structures are protected by a single 221 * rwlock. These structures are threaded in a 222 * fairly complex fashion and are not expected to change on a 223 * regular basis, so this should not cause scaling/contention 224 * problems. As a result, policy checks should (hopefully) be MT-hot. 225 * 226 * Allocation policy: 227 * 228 * We use custom kmem cache types for the various 229 * bits & pieces of the policy data structures. All allocations 230 * use KM_NOSLEEP instead of KM_SLEEP for policy allocation. The 231 * policy table is of potentially unbounded size, so we don't 232 * want to provide a way to hog all system memory with policy 233 * entries.. 234 */ 235 236 /* Convenient functions for freeing or dropping a b_next linked mblk chain */ 237 238 /* Free all messages in an mblk chain */ 239 static void 240 ipsec_freemsg_chain(mblk_t *mp) 241 { 242 mblk_t *mpnext; 243 while (mp != NULL) { 244 ASSERT(mp->b_prev == NULL); 245 mpnext = mp->b_next; 246 mp->b_next = NULL; 247 freemsg(mp); /* Always works, even if NULL */ 248 mp = mpnext; 249 } 250 } 251 252 /* ip_drop all messages in an mblk chain */ 253 static void 254 ip_drop_packet_chain(mblk_t *mp, boolean_t inbound, ill_t *arriving, 255 ire_t *outbound_ire, struct kstat_named *counter, ipdropper_t *who_called) 256 { 257 mblk_t *mpnext; 258 while (mp != NULL) { 259 ASSERT(mp->b_prev == NULL); 260 mpnext = mp->b_next; 261 mp->b_next = NULL; 262 ip_drop_packet(mp, inbound, arriving, outbound_ire, counter, 263 who_called); 264 mp = mpnext; 265 } 266 } 267 268 /* 269 * AVL tree comparison function. 270 * the in-kernel avl assumes unique keys for all objects. 271 * Since sometimes policy will duplicate rules, we may insert 272 * multiple rules with the same rule id, so we need a tie-breaker. 273 */ 274 static int 275 ipsec_policy_cmpbyid(const void *a, const void *b) 276 { 277 const ipsec_policy_t *ipa, *ipb; 278 uint64_t idxa, idxb; 279 280 ipa = (const ipsec_policy_t *)a; 281 ipb = (const ipsec_policy_t *)b; 282 idxa = ipa->ipsp_index; 283 idxb = ipb->ipsp_index; 284 285 if (idxa < idxb) 286 return (-1); 287 if (idxa > idxb) 288 return (1); 289 /* 290 * Tie-breaker #1: All installed policy rules have a non-NULL 291 * ipsl_sel (selector set), so an entry with a NULL ipsp_sel is not 292 * actually in-tree but rather a template node being used in 293 * an avl_find query; see ipsec_policy_delete(). This gives us 294 * a placeholder in the ordering just before the the first entry with 295 * a key >= the one we're looking for, so we can walk forward from 296 * that point to get the remaining entries with the same id. 297 */ 298 if ((ipa->ipsp_sel == NULL) && (ipb->ipsp_sel != NULL)) 299 return (-1); 300 if ((ipb->ipsp_sel == NULL) && (ipa->ipsp_sel != NULL)) 301 return (1); 302 /* 303 * At most one of the arguments to the comparison should have a 304 * NULL selector pointer; if not, the tree is broken. 305 */ 306 ASSERT(ipa->ipsp_sel != NULL); 307 ASSERT(ipb->ipsp_sel != NULL); 308 /* 309 * Tie-breaker #2: use the virtual address of the policy node 310 * to arbitrarily break ties. Since we use the new tree node in 311 * the avl_find() in ipsec_insert_always, the new node will be 312 * inserted into the tree in the right place in the sequence. 313 */ 314 if (ipa < ipb) 315 return (-1); 316 if (ipa > ipb) 317 return (1); 318 return (0); 319 } 320 321 /* 322 * Free what ipsec_alloc_table allocated. 323 */ 324 void 325 ipsec_polhead_free_table(ipsec_policy_head_t *iph) 326 { 327 int dir; 328 int i; 329 330 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 331 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 332 333 if (ipr->ipr_hash == NULL) 334 continue; 335 336 for (i = 0; i < ipr->ipr_nchains; i++) { 337 ASSERT(ipr->ipr_hash[i].hash_head == NULL); 338 } 339 kmem_free(ipr->ipr_hash, ipr->ipr_nchains * 340 sizeof (ipsec_policy_hash_t)); 341 ipr->ipr_hash = NULL; 342 } 343 } 344 345 void 346 ipsec_polhead_destroy(ipsec_policy_head_t *iph) 347 { 348 int dir; 349 350 avl_destroy(&iph->iph_rulebyid); 351 rw_destroy(&iph->iph_lock); 352 353 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 354 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 355 int chain; 356 357 for (chain = 0; chain < ipr->ipr_nchains; chain++) 358 mutex_destroy(&(ipr->ipr_hash[chain].hash_lock)); 359 360 } 361 ipsec_polhead_free_table(iph); 362 } 363 364 /* 365 * Free the IPsec stack instance. 366 */ 367 /* ARGSUSED */ 368 static void 369 ipsec_stack_fini(netstackid_t stackid, void *arg) 370 { 371 ipsec_stack_t *ipss = (ipsec_stack_t *)arg; 372 void *cookie; 373 ipsec_tun_pol_t *node; 374 netstack_t *ns = ipss->ipsec_netstack; 375 int i; 376 ipsec_algtype_t algtype; 377 378 ipsec_loader_destroy(ipss); 379 380 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_WRITER); 381 /* 382 * It's possible we can just ASSERT() the tree is empty. After all, 383 * we aren't called until IP is ready to unload (and presumably all 384 * tunnels have been unplumbed). But we'll play it safe for now, the 385 * loop will just exit immediately if it's empty. 386 */ 387 cookie = NULL; 388 while ((node = (ipsec_tun_pol_t *) 389 avl_destroy_nodes(&ipss->ipsec_tunnel_policies, 390 &cookie)) != NULL) { 391 ITP_REFRELE(node, ns); 392 } 393 avl_destroy(&ipss->ipsec_tunnel_policies); 394 rw_exit(&ipss->ipsec_tunnel_policy_lock); 395 rw_destroy(&ipss->ipsec_tunnel_policy_lock); 396 397 ipsec_config_flush(ns); 398 399 ipsec_kstat_destroy(ipss); 400 401 ip_drop_unregister(&ipss->ipsec_dropper); 402 403 ip_drop_unregister(&ipss->ipsec_spd_dropper); 404 ip_drop_destroy(ipss); 405 /* 406 * Globals start with ref == 1 to prevent IPPH_REFRELE() from 407 * attempting to free them, hence they should have 1 now. 408 */ 409 ipsec_polhead_destroy(&ipss->ipsec_system_policy); 410 ASSERT(ipss->ipsec_system_policy.iph_refs == 1); 411 ipsec_polhead_destroy(&ipss->ipsec_inactive_policy); 412 ASSERT(ipss->ipsec_inactive_policy.iph_refs == 1); 413 414 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) { 415 ipsec_action_free_table(ipss->ipsec_action_hash[i].hash_head); 416 ipss->ipsec_action_hash[i].hash_head = NULL; 417 mutex_destroy(&(ipss->ipsec_action_hash[i].hash_lock)); 418 } 419 420 for (i = 0; i < ipss->ipsec_spd_hashsize; i++) { 421 ASSERT(ipss->ipsec_sel_hash[i].hash_head == NULL); 422 mutex_destroy(&(ipss->ipsec_sel_hash[i].hash_lock)); 423 } 424 425 mutex_enter(&ipss->ipsec_alg_lock); 426 for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype ++) { 427 int nalgs = ipss->ipsec_nalgs[algtype]; 428 429 for (i = 0; i < nalgs; i++) { 430 if (ipss->ipsec_alglists[algtype][i] != NULL) 431 ipsec_alg_unreg(algtype, i, ns); 432 } 433 } 434 mutex_exit(&ipss->ipsec_alg_lock); 435 mutex_destroy(&ipss->ipsec_alg_lock); 436 437 ipsid_gc(ns); 438 ipsid_fini(ns); 439 440 (void) ipsec_free_tables(ipss); 441 kmem_free(ipss, sizeof (*ipss)); 442 } 443 444 void 445 ipsec_policy_g_destroy(void) 446 { 447 kmem_cache_destroy(ipsec_action_cache); 448 kmem_cache_destroy(ipsec_sel_cache); 449 kmem_cache_destroy(ipsec_pol_cache); 450 kmem_cache_destroy(ipsec_info_cache); 451 452 ipsec_unregister_prov_update(); 453 454 netstack_unregister(NS_IPSEC); 455 } 456 457 458 /* 459 * Free what ipsec_alloc_tables allocated. 460 * Called when table allocation fails to free the table. 461 */ 462 static int 463 ipsec_free_tables(ipsec_stack_t *ipss) 464 { 465 int i; 466 467 if (ipss->ipsec_sel_hash != NULL) { 468 for (i = 0; i < ipss->ipsec_spd_hashsize; i++) { 469 ASSERT(ipss->ipsec_sel_hash[i].hash_head == NULL); 470 } 471 kmem_free(ipss->ipsec_sel_hash, ipss->ipsec_spd_hashsize * 472 sizeof (*ipss->ipsec_sel_hash)); 473 ipss->ipsec_sel_hash = NULL; 474 ipss->ipsec_spd_hashsize = 0; 475 } 476 ipsec_polhead_free_table(&ipss->ipsec_system_policy); 477 ipsec_polhead_free_table(&ipss->ipsec_inactive_policy); 478 479 return (ENOMEM); 480 } 481 482 /* 483 * Attempt to allocate the tables in a single policy head. 484 * Return nonzero on failure after cleaning up any work in progress. 485 */ 486 int 487 ipsec_alloc_table(ipsec_policy_head_t *iph, int nchains, int kmflag, 488 boolean_t global_cleanup, netstack_t *ns) 489 { 490 int dir; 491 492 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 493 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 494 495 ipr->ipr_nchains = nchains; 496 ipr->ipr_hash = kmem_zalloc(nchains * 497 sizeof (ipsec_policy_hash_t), kmflag); 498 if (ipr->ipr_hash == NULL) 499 return (global_cleanup ? 500 ipsec_free_tables(ns->netstack_ipsec) : 501 ENOMEM); 502 } 503 return (0); 504 } 505 506 /* 507 * Attempt to allocate the various tables. Return nonzero on failure 508 * after cleaning up any work in progress. 509 */ 510 static int 511 ipsec_alloc_tables(int kmflag, netstack_t *ns) 512 { 513 int error; 514 ipsec_stack_t *ipss = ns->netstack_ipsec; 515 516 error = ipsec_alloc_table(&ipss->ipsec_system_policy, 517 ipss->ipsec_spd_hashsize, kmflag, B_TRUE, ns); 518 if (error != 0) 519 return (error); 520 521 error = ipsec_alloc_table(&ipss->ipsec_inactive_policy, 522 ipss->ipsec_spd_hashsize, kmflag, B_TRUE, ns); 523 if (error != 0) 524 return (error); 525 526 ipss->ipsec_sel_hash = kmem_zalloc(ipss->ipsec_spd_hashsize * 527 sizeof (*ipss->ipsec_sel_hash), kmflag); 528 529 if (ipss->ipsec_sel_hash == NULL) 530 return (ipsec_free_tables(ipss)); 531 532 return (0); 533 } 534 535 /* 536 * After table allocation, initialize a policy head. 537 */ 538 void 539 ipsec_polhead_init(ipsec_policy_head_t *iph, int nchains) 540 { 541 int dir, chain; 542 543 rw_init(&iph->iph_lock, NULL, RW_DEFAULT, NULL); 544 avl_create(&iph->iph_rulebyid, ipsec_policy_cmpbyid, 545 sizeof (ipsec_policy_t), offsetof(ipsec_policy_t, ipsp_byid)); 546 547 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 548 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 549 ipr->ipr_nchains = nchains; 550 551 for (chain = 0; chain < nchains; chain++) { 552 mutex_init(&(ipr->ipr_hash[chain].hash_lock), 553 NULL, MUTEX_DEFAULT, NULL); 554 } 555 } 556 } 557 558 static boolean_t 559 ipsec_kstat_init(ipsec_stack_t *ipss) 560 { 561 ipss->ipsec_ksp = kstat_create_netstack("ip", 0, "ipsec_stat", "net", 562 KSTAT_TYPE_NAMED, sizeof (ipsec_kstats_t) / sizeof (kstat_named_t), 563 KSTAT_FLAG_PERSISTENT, ipss->ipsec_netstack->netstack_stackid); 564 565 if (ipss->ipsec_ksp == NULL || ipss->ipsec_ksp->ks_data == NULL) 566 return (B_FALSE); 567 568 ipss->ipsec_kstats = ipss->ipsec_ksp->ks_data; 569 570 #define KI(x) kstat_named_init(&ipss->ipsec_kstats->x, #x, KSTAT_DATA_UINT64) 571 KI(esp_stat_in_requests); 572 KI(esp_stat_in_discards); 573 KI(esp_stat_lookup_failure); 574 KI(ah_stat_in_requests); 575 KI(ah_stat_in_discards); 576 KI(ah_stat_lookup_failure); 577 KI(sadb_acquire_maxpackets); 578 KI(sadb_acquire_qhiwater); 579 #undef KI 580 581 kstat_install(ipss->ipsec_ksp); 582 return (B_TRUE); 583 } 584 585 static void 586 ipsec_kstat_destroy(ipsec_stack_t *ipss) 587 { 588 kstat_delete_netstack(ipss->ipsec_ksp, 589 ipss->ipsec_netstack->netstack_stackid); 590 ipss->ipsec_kstats = NULL; 591 592 } 593 594 /* 595 * Initialize the IPsec stack instance. 596 */ 597 /* ARGSUSED */ 598 static void * 599 ipsec_stack_init(netstackid_t stackid, netstack_t *ns) 600 { 601 ipsec_stack_t *ipss; 602 int i; 603 604 ipss = (ipsec_stack_t *)kmem_zalloc(sizeof (*ipss), KM_SLEEP); 605 ipss->ipsec_netstack = ns; 606 607 /* 608 * FIXME: netstack_ipsec is used by some of the routines we call 609 * below, but it isn't set until this routine returns. 610 * Either we introduce optional xxx_stack_alloc() functions 611 * that will be called by the netstack framework before xxx_stack_init, 612 * or we switch spd.c and sadb.c to operate on ipsec_stack_t 613 * (latter has some include file order issues for sadb.h, but makes 614 * sense if we merge some of the ipsec related stack_t's together. 615 */ 616 ns->netstack_ipsec = ipss; 617 618 /* 619 * Make two attempts to allocate policy hash tables; try it at 620 * the "preferred" size (may be set in /etc/system) first, 621 * then fall back to the default size. 622 */ 623 if (ipss->ipsec_spd_hashsize == 0) 624 ipss->ipsec_spd_hashsize = IPSEC_SPDHASH_DEFAULT; 625 626 if (ipsec_alloc_tables(KM_NOSLEEP, ns) != 0) { 627 cmn_err(CE_WARN, 628 "Unable to allocate %d entry IPsec policy hash table", 629 ipss->ipsec_spd_hashsize); 630 ipss->ipsec_spd_hashsize = IPSEC_SPDHASH_DEFAULT; 631 cmn_err(CE_WARN, "Falling back to %d entries", 632 ipss->ipsec_spd_hashsize); 633 (void) ipsec_alloc_tables(KM_SLEEP, ns); 634 } 635 636 /* Just set a default for tunnels. */ 637 if (ipss->ipsec_tun_spd_hashsize == 0) 638 ipss->ipsec_tun_spd_hashsize = TUN_SPDHASH_DEFAULT; 639 640 ipsid_init(ns); 641 /* 642 * Globals need ref == 1 to prevent IPPH_REFRELE() from attempting 643 * to free them. 644 */ 645 ipss->ipsec_system_policy.iph_refs = 1; 646 ipss->ipsec_inactive_policy.iph_refs = 1; 647 ipsec_polhead_init(&ipss->ipsec_system_policy, 648 ipss->ipsec_spd_hashsize); 649 ipsec_polhead_init(&ipss->ipsec_inactive_policy, 650 ipss->ipsec_spd_hashsize); 651 rw_init(&ipss->ipsec_tunnel_policy_lock, NULL, RW_DEFAULT, NULL); 652 avl_create(&ipss->ipsec_tunnel_policies, tunnel_compare, 653 sizeof (ipsec_tun_pol_t), 0); 654 655 ipss->ipsec_next_policy_index = 1; 656 657 rw_init(&ipss->ipsec_system_policy.iph_lock, NULL, RW_DEFAULT, NULL); 658 rw_init(&ipss->ipsec_inactive_policy.iph_lock, NULL, RW_DEFAULT, NULL); 659 660 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) 661 mutex_init(&(ipss->ipsec_action_hash[i].hash_lock), 662 NULL, MUTEX_DEFAULT, NULL); 663 664 for (i = 0; i < ipss->ipsec_spd_hashsize; i++) 665 mutex_init(&(ipss->ipsec_sel_hash[i].hash_lock), 666 NULL, MUTEX_DEFAULT, NULL); 667 668 mutex_init(&ipss->ipsec_alg_lock, NULL, MUTEX_DEFAULT, NULL); 669 for (i = 0; i < IPSEC_NALGTYPES; i++) { 670 ipss->ipsec_nalgs[i] = 0; 671 } 672 673 ip_drop_init(ipss); 674 ip_drop_register(&ipss->ipsec_spd_dropper, "IPsec SPD"); 675 676 /* Set function to dummy until tun is loaded */ 677 rw_init(&ipss->ipsec_itp_get_byaddr_rw_lock, NULL, RW_DEFAULT, NULL); 678 rw_enter(&ipss->ipsec_itp_get_byaddr_rw_lock, RW_WRITER); 679 ipss->ipsec_itp_get_byaddr = itp_get_byaddr_dummy; 680 rw_exit(&ipss->ipsec_itp_get_byaddr_rw_lock); 681 682 /* IP's IPsec code calls the packet dropper */ 683 ip_drop_register(&ipss->ipsec_dropper, "IP IPsec processing"); 684 685 (void) ipsec_kstat_init(ipss); 686 687 ipsec_loader_init(ipss); 688 ipsec_loader_start(ipss); 689 690 return (ipss); 691 } 692 693 /* Global across all stack instances */ 694 void 695 ipsec_policy_g_init(void) 696 { 697 ipsec_action_cache = kmem_cache_create("ipsec_actions", 698 sizeof (ipsec_action_t), _POINTER_ALIGNMENT, NULL, NULL, 699 ipsec_action_reclaim, NULL, NULL, 0); 700 ipsec_sel_cache = kmem_cache_create("ipsec_selectors", 701 sizeof (ipsec_sel_t), _POINTER_ALIGNMENT, NULL, NULL, 702 NULL, NULL, NULL, 0); 703 ipsec_pol_cache = kmem_cache_create("ipsec_policy", 704 sizeof (ipsec_policy_t), _POINTER_ALIGNMENT, NULL, NULL, 705 NULL, NULL, NULL, 0); 706 ipsec_info_cache = kmem_cache_create("ipsec_info", 707 sizeof (ipsec_info_t), _POINTER_ALIGNMENT, NULL, NULL, 708 NULL, NULL, NULL, 0); 709 710 /* 711 * We want to be informed each time a stack is created or 712 * destroyed in the kernel, so we can maintain the 713 * set of ipsec_stack_t's. 714 */ 715 netstack_register(NS_IPSEC, ipsec_stack_init, NULL, ipsec_stack_fini); 716 } 717 718 /* 719 * Sort algorithm lists. 720 * 721 * I may need to split this based on 722 * authentication/encryption, and I may wish to have an administrator 723 * configure this list. Hold on to some NDD variables... 724 * 725 * XXX For now, sort on minimum key size (GAG!). While minimum key size is 726 * not the ideal metric, it's the only quantifiable measure available. 727 * We need a better metric for sorting algorithms by preference. 728 */ 729 static void 730 alg_insert_sortlist(enum ipsec_algtype at, uint8_t algid, netstack_t *ns) 731 { 732 ipsec_stack_t *ipss = ns->netstack_ipsec; 733 ipsec_alginfo_t *ai = ipss->ipsec_alglists[at][algid]; 734 uint8_t holder, swap; 735 uint_t i; 736 uint_t count = ipss->ipsec_nalgs[at]; 737 ASSERT(ai != NULL); 738 ASSERT(algid == ai->alg_id); 739 740 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 741 742 holder = algid; 743 744 for (i = 0; i < count - 1; i++) { 745 ipsec_alginfo_t *alt; 746 747 alt = ipss->ipsec_alglists[at][ipss->ipsec_sortlist[at][i]]; 748 /* 749 * If you want to give precedence to newly added algs, 750 * add the = in the > comparison. 751 */ 752 if ((holder != algid) || (ai->alg_minbits > alt->alg_minbits)) { 753 /* Swap sortlist[i] and holder. */ 754 swap = ipss->ipsec_sortlist[at][i]; 755 ipss->ipsec_sortlist[at][i] = holder; 756 holder = swap; 757 ai = alt; 758 } /* Else just continue. */ 759 } 760 761 /* Store holder in last slot. */ 762 ipss->ipsec_sortlist[at][i] = holder; 763 } 764 765 /* 766 * Remove an algorithm from a sorted algorithm list. 767 * This should be considerably easier, even with complex sorting. 768 */ 769 static void 770 alg_remove_sortlist(enum ipsec_algtype at, uint8_t algid, netstack_t *ns) 771 { 772 boolean_t copyback = B_FALSE; 773 int i; 774 ipsec_stack_t *ipss = ns->netstack_ipsec; 775 int newcount = ipss->ipsec_nalgs[at]; 776 777 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 778 779 for (i = 0; i <= newcount; i++) { 780 if (copyback) { 781 ipss->ipsec_sortlist[at][i-1] = 782 ipss->ipsec_sortlist[at][i]; 783 } else if (ipss->ipsec_sortlist[at][i] == algid) { 784 copyback = B_TRUE; 785 } 786 } 787 } 788 789 /* 790 * Add the specified algorithm to the algorithm tables. 791 * Must be called while holding the algorithm table writer lock. 792 */ 793 void 794 ipsec_alg_reg(ipsec_algtype_t algtype, ipsec_alginfo_t *alg, netstack_t *ns) 795 { 796 ipsec_stack_t *ipss = ns->netstack_ipsec; 797 798 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 799 800 ASSERT(ipss->ipsec_alglists[algtype][alg->alg_id] == NULL); 801 ipsec_alg_fix_min_max(alg, algtype, ns); 802 ipss->ipsec_alglists[algtype][alg->alg_id] = alg; 803 804 ipss->ipsec_nalgs[algtype]++; 805 alg_insert_sortlist(algtype, alg->alg_id, ns); 806 } 807 808 /* 809 * Remove the specified algorithm from the algorithm tables. 810 * Must be called while holding the algorithm table writer lock. 811 */ 812 void 813 ipsec_alg_unreg(ipsec_algtype_t algtype, uint8_t algid, netstack_t *ns) 814 { 815 ipsec_stack_t *ipss = ns->netstack_ipsec; 816 817 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 818 819 ASSERT(ipss->ipsec_alglists[algtype][algid] != NULL); 820 ipsec_alg_free(ipss->ipsec_alglists[algtype][algid]); 821 ipss->ipsec_alglists[algtype][algid] = NULL; 822 823 ipss->ipsec_nalgs[algtype]--; 824 alg_remove_sortlist(algtype, algid, ns); 825 } 826 827 /* 828 * Hooks for spdsock to get a grip on system policy. 829 */ 830 831 ipsec_policy_head_t * 832 ipsec_system_policy(netstack_t *ns) 833 { 834 ipsec_stack_t *ipss = ns->netstack_ipsec; 835 ipsec_policy_head_t *h = &ipss->ipsec_system_policy; 836 837 IPPH_REFHOLD(h); 838 return (h); 839 } 840 841 ipsec_policy_head_t * 842 ipsec_inactive_policy(netstack_t *ns) 843 { 844 ipsec_stack_t *ipss = ns->netstack_ipsec; 845 ipsec_policy_head_t *h = &ipss->ipsec_inactive_policy; 846 847 IPPH_REFHOLD(h); 848 return (h); 849 } 850 851 /* 852 * Lock inactive policy, then active policy, then exchange policy root 853 * pointers. 854 */ 855 void 856 ipsec_swap_policy(ipsec_policy_head_t *active, ipsec_policy_head_t *inactive, 857 netstack_t *ns) 858 { 859 int af, dir; 860 avl_tree_t r1, r2; 861 862 rw_enter(&inactive->iph_lock, RW_WRITER); 863 rw_enter(&active->iph_lock, RW_WRITER); 864 865 r1 = active->iph_rulebyid; 866 r2 = inactive->iph_rulebyid; 867 active->iph_rulebyid = r2; 868 inactive->iph_rulebyid = r1; 869 870 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 871 ipsec_policy_hash_t *h1, *h2; 872 873 h1 = active->iph_root[dir].ipr_hash; 874 h2 = inactive->iph_root[dir].ipr_hash; 875 active->iph_root[dir].ipr_hash = h2; 876 inactive->iph_root[dir].ipr_hash = h1; 877 878 for (af = 0; af < IPSEC_NAF; af++) { 879 ipsec_policy_t *t1, *t2; 880 881 t1 = active->iph_root[dir].ipr_nonhash[af]; 882 t2 = inactive->iph_root[dir].ipr_nonhash[af]; 883 active->iph_root[dir].ipr_nonhash[af] = t2; 884 inactive->iph_root[dir].ipr_nonhash[af] = t1; 885 if (t1 != NULL) { 886 t1->ipsp_hash.hash_pp = 887 &(inactive->iph_root[dir].ipr_nonhash[af]); 888 } 889 if (t2 != NULL) { 890 t2->ipsp_hash.hash_pp = 891 &(active->iph_root[dir].ipr_nonhash[af]); 892 } 893 894 } 895 } 896 active->iph_gen++; 897 inactive->iph_gen++; 898 ipsec_update_present_flags(ns->netstack_ipsec); 899 rw_exit(&active->iph_lock); 900 rw_exit(&inactive->iph_lock); 901 } 902 903 /* 904 * Swap global policy primary/secondary. 905 */ 906 void 907 ipsec_swap_global_policy(netstack_t *ns) 908 { 909 ipsec_stack_t *ipss = ns->netstack_ipsec; 910 911 ipsec_swap_policy(&ipss->ipsec_system_policy, 912 &ipss->ipsec_inactive_policy, ns); 913 } 914 915 /* 916 * Clone one policy rule.. 917 */ 918 static ipsec_policy_t * 919 ipsec_copy_policy(const ipsec_policy_t *src) 920 { 921 ipsec_policy_t *dst = kmem_cache_alloc(ipsec_pol_cache, KM_NOSLEEP); 922 923 if (dst == NULL) 924 return (NULL); 925 926 /* 927 * Adjust refcounts of cloned state. 928 */ 929 IPACT_REFHOLD(src->ipsp_act); 930 src->ipsp_sel->ipsl_refs++; 931 932 HASH_NULL(dst, ipsp_hash); 933 dst->ipsp_refs = 1; 934 dst->ipsp_sel = src->ipsp_sel; 935 dst->ipsp_act = src->ipsp_act; 936 dst->ipsp_prio = src->ipsp_prio; 937 dst->ipsp_index = src->ipsp_index; 938 939 return (dst); 940 } 941 942 void 943 ipsec_insert_always(avl_tree_t *tree, void *new_node) 944 { 945 void *node; 946 avl_index_t where; 947 948 node = avl_find(tree, new_node, &where); 949 ASSERT(node == NULL); 950 avl_insert(tree, new_node, where); 951 } 952 953 954 static int 955 ipsec_copy_chain(ipsec_policy_head_t *dph, ipsec_policy_t *src, 956 ipsec_policy_t **dstp) 957 { 958 for (; src != NULL; src = src->ipsp_hash.hash_next) { 959 ipsec_policy_t *dst = ipsec_copy_policy(src); 960 if (dst == NULL) 961 return (ENOMEM); 962 963 HASHLIST_INSERT(dst, ipsp_hash, *dstp); 964 ipsec_insert_always(&dph->iph_rulebyid, dst); 965 } 966 return (0); 967 } 968 969 970 971 /* 972 * Make one policy head look exactly like another. 973 * 974 * As with ipsec_swap_policy, we lock the destination policy head first, then 975 * the source policy head. Note that we only need to read-lock the source 976 * policy head as we are not changing it. 977 */ 978 int 979 ipsec_copy_polhead(ipsec_policy_head_t *sph, ipsec_policy_head_t *dph, 980 netstack_t *ns) 981 { 982 int af, dir, chain, nchains; 983 984 rw_enter(&dph->iph_lock, RW_WRITER); 985 986 ipsec_polhead_flush(dph, ns); 987 988 rw_enter(&sph->iph_lock, RW_READER); 989 990 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 991 ipsec_policy_root_t *dpr = &dph->iph_root[dir]; 992 ipsec_policy_root_t *spr = &sph->iph_root[dir]; 993 nchains = dpr->ipr_nchains; 994 995 ASSERT(dpr->ipr_nchains == spr->ipr_nchains); 996 997 for (af = 0; af < IPSEC_NAF; af++) { 998 if (ipsec_copy_chain(dph, spr->ipr_nonhash[af], 999 &dpr->ipr_nonhash[af])) 1000 goto abort_copy; 1001 } 1002 1003 for (chain = 0; chain < nchains; chain++) { 1004 if (ipsec_copy_chain(dph, 1005 spr->ipr_hash[chain].hash_head, 1006 &dpr->ipr_hash[chain].hash_head)) 1007 goto abort_copy; 1008 } 1009 } 1010 1011 dph->iph_gen++; 1012 1013 rw_exit(&sph->iph_lock); 1014 rw_exit(&dph->iph_lock); 1015 return (0); 1016 1017 abort_copy: 1018 ipsec_polhead_flush(dph, ns); 1019 rw_exit(&sph->iph_lock); 1020 rw_exit(&dph->iph_lock); 1021 return (ENOMEM); 1022 } 1023 1024 /* 1025 * Clone currently active policy to the inactive policy list. 1026 */ 1027 int 1028 ipsec_clone_system_policy(netstack_t *ns) 1029 { 1030 ipsec_stack_t *ipss = ns->netstack_ipsec; 1031 1032 return (ipsec_copy_polhead(&ipss->ipsec_system_policy, 1033 &ipss->ipsec_inactive_policy, ns)); 1034 } 1035 1036 /* 1037 * Generic "do we have IPvN policy" answer. 1038 */ 1039 boolean_t 1040 iph_ipvN(ipsec_policy_head_t *iph, boolean_t v6) 1041 { 1042 int i, hval; 1043 uint32_t valbit; 1044 ipsec_policy_root_t *ipr; 1045 ipsec_policy_t *ipp; 1046 1047 if (v6) { 1048 valbit = IPSL_IPV6; 1049 hval = IPSEC_AF_V6; 1050 } else { 1051 valbit = IPSL_IPV4; 1052 hval = IPSEC_AF_V4; 1053 } 1054 1055 ASSERT(RW_LOCK_HELD(&iph->iph_lock)); 1056 for (ipr = iph->iph_root; ipr < &(iph->iph_root[IPSEC_NTYPES]); ipr++) { 1057 if (ipr->ipr_nonhash[hval] != NULL) 1058 return (B_TRUE); 1059 for (i = 0; i < ipr->ipr_nchains; i++) { 1060 for (ipp = ipr->ipr_hash[i].hash_head; ipp != NULL; 1061 ipp = ipp->ipsp_hash.hash_next) { 1062 if (ipp->ipsp_sel->ipsl_key.ipsl_valid & valbit) 1063 return (B_TRUE); 1064 } 1065 } 1066 } 1067 1068 return (B_FALSE); 1069 } 1070 1071 /* 1072 * Extract the string from ipsec_policy_failure_msgs[type] and 1073 * log it. 1074 * 1075 */ 1076 void 1077 ipsec_log_policy_failure(int type, char *func_name, ipha_t *ipha, ip6_t *ip6h, 1078 boolean_t secure, netstack_t *ns) 1079 { 1080 char sbuf[INET6_ADDRSTRLEN]; 1081 char dbuf[INET6_ADDRSTRLEN]; 1082 char *s; 1083 char *d; 1084 ipsec_stack_t *ipss = ns->netstack_ipsec; 1085 1086 ASSERT((ipha == NULL && ip6h != NULL) || 1087 (ip6h == NULL && ipha != NULL)); 1088 1089 if (ipha != NULL) { 1090 s = inet_ntop(AF_INET, &ipha->ipha_src, sbuf, sizeof (sbuf)); 1091 d = inet_ntop(AF_INET, &ipha->ipha_dst, dbuf, sizeof (dbuf)); 1092 } else { 1093 s = inet_ntop(AF_INET6, &ip6h->ip6_src, sbuf, sizeof (sbuf)); 1094 d = inet_ntop(AF_INET6, &ip6h->ip6_dst, dbuf, sizeof (dbuf)); 1095 1096 } 1097 1098 /* Always bump the policy failure counter. */ 1099 ipss->ipsec_policy_failure_count[type]++; 1100 1101 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, SL_ERROR|SL_WARN|SL_CONSOLE, 1102 ipsec_policy_failure_msgs[type], func_name, 1103 (secure ? "secure" : "not secure"), s, d); 1104 } 1105 1106 /* 1107 * Rate-limiting front-end to strlog() for AH and ESP. Uses the ndd variables 1108 * in /dev/ip and the same rate-limiting clock so that there's a single 1109 * knob to turn to throttle the rate of messages. 1110 */ 1111 void 1112 ipsec_rl_strlog(netstack_t *ns, short mid, short sid, char level, ushort_t sl, 1113 char *fmt, ...) 1114 { 1115 va_list adx; 1116 hrtime_t current = gethrtime(); 1117 ip_stack_t *ipst = ns->netstack_ip; 1118 ipsec_stack_t *ipss = ns->netstack_ipsec; 1119 1120 sl |= SL_CONSOLE; 1121 /* 1122 * Throttle logging to stop syslog from being swamped. If variable 1123 * 'ipsec_policy_log_interval' is zero, don't log any messages at 1124 * all, otherwise log only one message every 'ipsec_policy_log_interval' 1125 * msec. Convert interval (in msec) to hrtime (in nsec). 1126 */ 1127 1128 if (ipst->ips_ipsec_policy_log_interval) { 1129 if (ipss->ipsec_policy_failure_last + 1130 ((hrtime_t)ipst->ips_ipsec_policy_log_interval * 1131 (hrtime_t)1000000) <= current) { 1132 va_start(adx, fmt); 1133 (void) vstrlog(mid, sid, level, sl, fmt, adx); 1134 va_end(adx); 1135 ipss->ipsec_policy_failure_last = current; 1136 } 1137 } 1138 } 1139 1140 void 1141 ipsec_config_flush(netstack_t *ns) 1142 { 1143 ipsec_stack_t *ipss = ns->netstack_ipsec; 1144 1145 rw_enter(&ipss->ipsec_system_policy.iph_lock, RW_WRITER); 1146 ipsec_polhead_flush(&ipss->ipsec_system_policy, ns); 1147 ipss->ipsec_next_policy_index = 1; 1148 rw_exit(&ipss->ipsec_system_policy.iph_lock); 1149 ipsec_action_reclaim_stack(ns); 1150 } 1151 1152 /* 1153 * Clip a policy's min/max keybits vs. the capabilities of the 1154 * algorithm. 1155 */ 1156 static void 1157 act_alg_adjust(uint_t algtype, uint_t algid, 1158 uint16_t *minbits, uint16_t *maxbits, netstack_t *ns) 1159 { 1160 ipsec_stack_t *ipss = ns->netstack_ipsec; 1161 ipsec_alginfo_t *algp = ipss->ipsec_alglists[algtype][algid]; 1162 1163 if (algp != NULL) { 1164 /* 1165 * If passed-in minbits is zero, we assume the caller trusts 1166 * us with setting the minimum key size. We pick the 1167 * algorithms DEFAULT key size for the minimum in this case. 1168 */ 1169 if (*minbits == 0) { 1170 *minbits = algp->alg_default_bits; 1171 ASSERT(*minbits >= algp->alg_minbits); 1172 } else { 1173 *minbits = MAX(MIN(*minbits, algp->alg_maxbits), 1174 algp->alg_minbits); 1175 } 1176 if (*maxbits == 0) 1177 *maxbits = algp->alg_maxbits; 1178 else 1179 *maxbits = MIN(MAX(*maxbits, algp->alg_minbits), 1180 algp->alg_maxbits); 1181 ASSERT(*minbits <= *maxbits); 1182 } else { 1183 *minbits = 0; 1184 *maxbits = 0; 1185 } 1186 } 1187 1188 /* 1189 * Check an action's requested algorithms against the algorithms currently 1190 * loaded in the system. 1191 */ 1192 boolean_t 1193 ipsec_check_action(ipsec_act_t *act, int *diag, netstack_t *ns) 1194 { 1195 ipsec_prot_t *ipp; 1196 ipsec_stack_t *ipss = ns->netstack_ipsec; 1197 1198 ipp = &act->ipa_apply; 1199 1200 if (ipp->ipp_use_ah && 1201 ipss->ipsec_alglists[IPSEC_ALG_AUTH][ipp->ipp_auth_alg] == NULL) { 1202 *diag = SPD_DIAGNOSTIC_UNSUPP_AH_ALG; 1203 return (B_FALSE); 1204 } 1205 if (ipp->ipp_use_espa && 1206 ipss->ipsec_alglists[IPSEC_ALG_AUTH][ipp->ipp_esp_auth_alg] == 1207 NULL) { 1208 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_AUTH_ALG; 1209 return (B_FALSE); 1210 } 1211 if (ipp->ipp_use_esp && 1212 ipss->ipsec_alglists[IPSEC_ALG_ENCR][ipp->ipp_encr_alg] == NULL) { 1213 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_ENCR_ALG; 1214 return (B_FALSE); 1215 } 1216 1217 act_alg_adjust(IPSEC_ALG_AUTH, ipp->ipp_auth_alg, 1218 &ipp->ipp_ah_minbits, &ipp->ipp_ah_maxbits, ns); 1219 act_alg_adjust(IPSEC_ALG_AUTH, ipp->ipp_esp_auth_alg, 1220 &ipp->ipp_espa_minbits, &ipp->ipp_espa_maxbits, ns); 1221 act_alg_adjust(IPSEC_ALG_ENCR, ipp->ipp_encr_alg, 1222 &ipp->ipp_espe_minbits, &ipp->ipp_espe_maxbits, ns); 1223 1224 if (ipp->ipp_ah_minbits > ipp->ipp_ah_maxbits) { 1225 *diag = SPD_DIAGNOSTIC_UNSUPP_AH_KEYSIZE; 1226 return (B_FALSE); 1227 } 1228 if (ipp->ipp_espa_minbits > ipp->ipp_espa_maxbits) { 1229 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_AUTH_KEYSIZE; 1230 return (B_FALSE); 1231 } 1232 if (ipp->ipp_espe_minbits > ipp->ipp_espe_maxbits) { 1233 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_ENCR_KEYSIZE; 1234 return (B_FALSE); 1235 } 1236 /* TODO: sanity check lifetimes */ 1237 return (B_TRUE); 1238 } 1239 1240 /* 1241 * Set up a single action during wildcard expansion.. 1242 */ 1243 static void 1244 ipsec_setup_act(ipsec_act_t *outact, ipsec_act_t *act, 1245 uint_t auth_alg, uint_t encr_alg, uint_t eauth_alg, netstack_t *ns) 1246 { 1247 ipsec_prot_t *ipp; 1248 1249 *outact = *act; 1250 ipp = &outact->ipa_apply; 1251 ipp->ipp_auth_alg = (uint8_t)auth_alg; 1252 ipp->ipp_encr_alg = (uint8_t)encr_alg; 1253 ipp->ipp_esp_auth_alg = (uint8_t)eauth_alg; 1254 1255 act_alg_adjust(IPSEC_ALG_AUTH, auth_alg, 1256 &ipp->ipp_ah_minbits, &ipp->ipp_ah_maxbits, ns); 1257 act_alg_adjust(IPSEC_ALG_AUTH, eauth_alg, 1258 &ipp->ipp_espa_minbits, &ipp->ipp_espa_maxbits, ns); 1259 act_alg_adjust(IPSEC_ALG_ENCR, encr_alg, 1260 &ipp->ipp_espe_minbits, &ipp->ipp_espe_maxbits, ns); 1261 } 1262 1263 /* 1264 * combinatoric expansion time: expand a wildcarded action into an 1265 * array of wildcarded actions; we return the exploded action list, 1266 * and return a count in *nact (output only). 1267 */ 1268 static ipsec_act_t * 1269 ipsec_act_wildcard_expand(ipsec_act_t *act, uint_t *nact, netstack_t *ns) 1270 { 1271 boolean_t use_ah, use_esp, use_espa; 1272 boolean_t wild_auth, wild_encr, wild_eauth; 1273 uint_t auth_alg, auth_idx, auth_min, auth_max; 1274 uint_t eauth_alg, eauth_idx, eauth_min, eauth_max; 1275 uint_t encr_alg, encr_idx, encr_min, encr_max; 1276 uint_t action_count, ai; 1277 ipsec_act_t *outact; 1278 ipsec_stack_t *ipss = ns->netstack_ipsec; 1279 1280 if (act->ipa_type != IPSEC_ACT_APPLY) { 1281 outact = kmem_alloc(sizeof (*act), KM_NOSLEEP); 1282 *nact = 1; 1283 if (outact != NULL) 1284 bcopy(act, outact, sizeof (*act)); 1285 return (outact); 1286 } 1287 /* 1288 * compute the combinatoric explosion.. 1289 * 1290 * we assume a request for encr if esp_req is PREF_REQUIRED 1291 * we assume a request for ah auth if ah_req is PREF_REQUIRED. 1292 * we assume a request for esp auth if !ah and esp_req is PREF_REQUIRED 1293 */ 1294 1295 use_ah = act->ipa_apply.ipp_use_ah; 1296 use_esp = act->ipa_apply.ipp_use_esp; 1297 use_espa = act->ipa_apply.ipp_use_espa; 1298 auth_alg = act->ipa_apply.ipp_auth_alg; 1299 eauth_alg = act->ipa_apply.ipp_esp_auth_alg; 1300 encr_alg = act->ipa_apply.ipp_encr_alg; 1301 1302 wild_auth = use_ah && (auth_alg == 0); 1303 wild_eauth = use_espa && (eauth_alg == 0); 1304 wild_encr = use_esp && (encr_alg == 0); 1305 1306 action_count = 1; 1307 auth_min = auth_max = auth_alg; 1308 eauth_min = eauth_max = eauth_alg; 1309 encr_min = encr_max = encr_alg; 1310 1311 /* 1312 * set up for explosion.. for each dimension, expand output 1313 * size by the explosion factor. 1314 * 1315 * Don't include the "any" algorithms, if defined, as no 1316 * kernel policies should be set for these algorithms. 1317 */ 1318 1319 #define SET_EXP_MINMAX(type, wild, alg, min, max, ipss) \ 1320 if (wild) { \ 1321 int nalgs = ipss->ipsec_nalgs[type]; \ 1322 if (ipss->ipsec_alglists[type][alg] != NULL) \ 1323 nalgs--; \ 1324 action_count *= nalgs; \ 1325 min = 0; \ 1326 max = ipss->ipsec_nalgs[type] - 1; \ 1327 } 1328 1329 SET_EXP_MINMAX(IPSEC_ALG_AUTH, wild_auth, SADB_AALG_NONE, 1330 auth_min, auth_max, ipss); 1331 SET_EXP_MINMAX(IPSEC_ALG_AUTH, wild_eauth, SADB_AALG_NONE, 1332 eauth_min, eauth_max, ipss); 1333 SET_EXP_MINMAX(IPSEC_ALG_ENCR, wild_encr, SADB_EALG_NONE, 1334 encr_min, encr_max, ipss); 1335 1336 #undef SET_EXP_MINMAX 1337 1338 /* 1339 * ok, allocate the whole mess.. 1340 */ 1341 1342 outact = kmem_alloc(sizeof (*outact) * action_count, KM_NOSLEEP); 1343 if (outact == NULL) 1344 return (NULL); 1345 1346 /* 1347 * Now compute all combinations. Note that non-wildcarded 1348 * dimensions just get a single value from auth_min, while 1349 * wildcarded dimensions indirect through the sortlist. 1350 * 1351 * We do encryption outermost since, at this time, there's 1352 * greater difference in security and performance between 1353 * encryption algorithms vs. authentication algorithms. 1354 */ 1355 1356 ai = 0; 1357 1358 #define WHICH_ALG(type, wild, idx, ipss) \ 1359 ((wild)?(ipss->ipsec_sortlist[type][idx]):(idx)) 1360 1361 for (encr_idx = encr_min; encr_idx <= encr_max; encr_idx++) { 1362 encr_alg = WHICH_ALG(IPSEC_ALG_ENCR, wild_encr, encr_idx, ipss); 1363 if (wild_encr && encr_alg == SADB_EALG_NONE) 1364 continue; 1365 for (auth_idx = auth_min; auth_idx <= auth_max; auth_idx++) { 1366 auth_alg = WHICH_ALG(IPSEC_ALG_AUTH, wild_auth, 1367 auth_idx, ipss); 1368 if (wild_auth && auth_alg == SADB_AALG_NONE) 1369 continue; 1370 for (eauth_idx = eauth_min; eauth_idx <= eauth_max; 1371 eauth_idx++) { 1372 eauth_alg = WHICH_ALG(IPSEC_ALG_AUTH, 1373 wild_eauth, eauth_idx, ipss); 1374 if (wild_eauth && eauth_alg == SADB_AALG_NONE) 1375 continue; 1376 1377 ipsec_setup_act(&outact[ai], act, 1378 auth_alg, encr_alg, eauth_alg, ns); 1379 ai++; 1380 } 1381 } 1382 } 1383 1384 #undef WHICH_ALG 1385 1386 ASSERT(ai == action_count); 1387 *nact = action_count; 1388 return (outact); 1389 } 1390 1391 /* 1392 * Extract the parts of an ipsec_prot_t from an old-style ipsec_req_t. 1393 */ 1394 static void 1395 ipsec_prot_from_req(ipsec_req_t *req, ipsec_prot_t *ipp) 1396 { 1397 bzero(ipp, sizeof (*ipp)); 1398 /* 1399 * ipp_use_* are bitfields. Look at "!!" in the following as a 1400 * "boolean canonicalization" operator. 1401 */ 1402 ipp->ipp_use_ah = !!(req->ipsr_ah_req & IPSEC_PREF_REQUIRED); 1403 ipp->ipp_use_esp = !!(req->ipsr_esp_req & IPSEC_PREF_REQUIRED); 1404 ipp->ipp_use_espa = !!(req->ipsr_esp_auth_alg) || !ipp->ipp_use_ah; 1405 ipp->ipp_use_se = !!(req->ipsr_self_encap_req & IPSEC_PREF_REQUIRED); 1406 ipp->ipp_use_unique = !!((req->ipsr_ah_req|req->ipsr_esp_req) & 1407 IPSEC_PREF_UNIQUE); 1408 ipp->ipp_encr_alg = req->ipsr_esp_alg; 1409 ipp->ipp_auth_alg = req->ipsr_auth_alg; 1410 ipp->ipp_esp_auth_alg = req->ipsr_esp_auth_alg; 1411 } 1412 1413 /* 1414 * Extract a new-style action from a request. 1415 */ 1416 void 1417 ipsec_actvec_from_req(ipsec_req_t *req, ipsec_act_t **actp, uint_t *nactp, 1418 netstack_t *ns) 1419 { 1420 struct ipsec_act act; 1421 1422 bzero(&act, sizeof (act)); 1423 if ((req->ipsr_ah_req & IPSEC_PREF_NEVER) && 1424 (req->ipsr_esp_req & IPSEC_PREF_NEVER)) { 1425 act.ipa_type = IPSEC_ACT_BYPASS; 1426 } else { 1427 act.ipa_type = IPSEC_ACT_APPLY; 1428 ipsec_prot_from_req(req, &act.ipa_apply); 1429 } 1430 *actp = ipsec_act_wildcard_expand(&act, nactp, ns); 1431 } 1432 1433 /* 1434 * Convert a new-style "prot" back to an ipsec_req_t (more backwards compat). 1435 * We assume caller has already zero'ed *req for us. 1436 */ 1437 static int 1438 ipsec_req_from_prot(ipsec_prot_t *ipp, ipsec_req_t *req) 1439 { 1440 req->ipsr_esp_alg = ipp->ipp_encr_alg; 1441 req->ipsr_auth_alg = ipp->ipp_auth_alg; 1442 req->ipsr_esp_auth_alg = ipp->ipp_esp_auth_alg; 1443 1444 if (ipp->ipp_use_unique) { 1445 req->ipsr_ah_req |= IPSEC_PREF_UNIQUE; 1446 req->ipsr_esp_req |= IPSEC_PREF_UNIQUE; 1447 } 1448 if (ipp->ipp_use_se) 1449 req->ipsr_self_encap_req |= IPSEC_PREF_REQUIRED; 1450 if (ipp->ipp_use_ah) 1451 req->ipsr_ah_req |= IPSEC_PREF_REQUIRED; 1452 if (ipp->ipp_use_esp) 1453 req->ipsr_esp_req |= IPSEC_PREF_REQUIRED; 1454 return (sizeof (*req)); 1455 } 1456 1457 /* 1458 * Convert a new-style action back to an ipsec_req_t (more backwards compat). 1459 * We assume caller has already zero'ed *req for us. 1460 */ 1461 static int 1462 ipsec_req_from_act(ipsec_action_t *ap, ipsec_req_t *req) 1463 { 1464 switch (ap->ipa_act.ipa_type) { 1465 case IPSEC_ACT_BYPASS: 1466 req->ipsr_ah_req = IPSEC_PREF_NEVER; 1467 req->ipsr_esp_req = IPSEC_PREF_NEVER; 1468 return (sizeof (*req)); 1469 case IPSEC_ACT_APPLY: 1470 return (ipsec_req_from_prot(&ap->ipa_act.ipa_apply, req)); 1471 } 1472 return (sizeof (*req)); 1473 } 1474 1475 /* 1476 * Convert a new-style action back to an ipsec_req_t (more backwards compat). 1477 * We assume caller has already zero'ed *req for us. 1478 */ 1479 int 1480 ipsec_req_from_head(ipsec_policy_head_t *ph, ipsec_req_t *req, int af) 1481 { 1482 ipsec_policy_t *p; 1483 1484 /* 1485 * FULL-PERSOCK: consult hash table, too? 1486 */ 1487 for (p = ph->iph_root[IPSEC_INBOUND].ipr_nonhash[af]; 1488 p != NULL; 1489 p = p->ipsp_hash.hash_next) { 1490 if ((p->ipsp_sel->ipsl_key.ipsl_valid & IPSL_WILDCARD) == 0) 1491 return (ipsec_req_from_act(p->ipsp_act, req)); 1492 } 1493 return (sizeof (*req)); 1494 } 1495 1496 /* 1497 * Based on per-socket or latched policy, convert to an appropriate 1498 * IP_SEC_OPT ipsec_req_t for the socket option; return size so we can 1499 * be tail-called from ip. 1500 */ 1501 int 1502 ipsec_req_from_conn(conn_t *connp, ipsec_req_t *req, int af) 1503 { 1504 ipsec_latch_t *ipl; 1505 int rv = sizeof (ipsec_req_t); 1506 1507 bzero(req, sizeof (*req)); 1508 1509 mutex_enter(&connp->conn_lock); 1510 ipl = connp->conn_latch; 1511 1512 /* 1513 * Find appropriate policy. First choice is latched action; 1514 * failing that, see latched policy; failing that, 1515 * look at configured policy. 1516 */ 1517 if (ipl != NULL) { 1518 if (ipl->ipl_in_action != NULL) { 1519 rv = ipsec_req_from_act(ipl->ipl_in_action, req); 1520 goto done; 1521 } 1522 if (ipl->ipl_in_policy != NULL) { 1523 rv = ipsec_req_from_act(ipl->ipl_in_policy->ipsp_act, 1524 req); 1525 goto done; 1526 } 1527 } 1528 if (connp->conn_policy != NULL) 1529 rv = ipsec_req_from_head(connp->conn_policy, req, af); 1530 done: 1531 mutex_exit(&connp->conn_lock); 1532 return (rv); 1533 } 1534 1535 void 1536 ipsec_actvec_free(ipsec_act_t *act, uint_t nact) 1537 { 1538 kmem_free(act, nact * sizeof (*act)); 1539 } 1540 1541 /* 1542 * When outbound policy is not cached, look it up the hard way and attach 1543 * an ipsec_out_t to the packet.. 1544 */ 1545 static mblk_t * 1546 ipsec_attach_global_policy(mblk_t **mp, conn_t *connp, ipsec_selector_t *sel, 1547 netstack_t *ns) 1548 { 1549 ipsec_policy_t *p; 1550 1551 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, sel, ns); 1552 1553 if (p == NULL) 1554 return (NULL); 1555 return (ipsec_attach_ipsec_out(mp, connp, p, sel->ips_protocol, ns)); 1556 } 1557 1558 /* 1559 * We have an ipsec_out already, but don't have cached policy; fill it in 1560 * with the right actions. 1561 */ 1562 static mblk_t * 1563 ipsec_apply_global_policy(mblk_t *ipsec_mp, conn_t *connp, 1564 ipsec_selector_t *sel, netstack_t *ns) 1565 { 1566 ipsec_out_t *io; 1567 ipsec_policy_t *p; 1568 1569 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 1570 ASSERT(ipsec_mp->b_cont->b_datap->db_type == M_DATA); 1571 1572 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1573 1574 if (io->ipsec_out_policy == NULL) { 1575 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, io, sel, ns); 1576 io->ipsec_out_policy = p; 1577 } 1578 return (ipsec_mp); 1579 } 1580 1581 1582 /* 1583 * Consumes a reference to ipsp. 1584 */ 1585 static mblk_t * 1586 ipsec_check_loopback_policy(mblk_t *first_mp, boolean_t mctl_present, 1587 ipsec_policy_t *ipsp) 1588 { 1589 mblk_t *ipsec_mp; 1590 ipsec_in_t *ii; 1591 netstack_t *ns; 1592 1593 if (!mctl_present) 1594 return (first_mp); 1595 1596 ipsec_mp = first_mp; 1597 1598 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1599 ns = ii->ipsec_in_ns; 1600 ASSERT(ii->ipsec_in_loopback); 1601 IPPOL_REFRELE(ipsp, ns); 1602 1603 /* 1604 * We should do an actual policy check here. Revisit this 1605 * when we revisit the IPsec API. (And pass a conn_t in when we 1606 * get there.) 1607 */ 1608 1609 return (first_mp); 1610 } 1611 1612 /* 1613 * Check that packet's inbound ports & proto match the selectors 1614 * expected by the SAs it traversed on the way in. 1615 */ 1616 static boolean_t 1617 ipsec_check_ipsecin_unique(ipsec_in_t *ii, const char **reason, 1618 kstat_named_t **counter, uint64_t pkt_unique) 1619 { 1620 uint64_t ah_mask, esp_mask; 1621 ipsa_t *ah_assoc; 1622 ipsa_t *esp_assoc; 1623 netstack_t *ns = ii->ipsec_in_ns; 1624 ipsec_stack_t *ipss = ns->netstack_ipsec; 1625 1626 ASSERT(ii->ipsec_in_secure); 1627 ASSERT(!ii->ipsec_in_loopback); 1628 1629 ah_assoc = ii->ipsec_in_ah_sa; 1630 esp_assoc = ii->ipsec_in_esp_sa; 1631 ASSERT((ah_assoc != NULL) || (esp_assoc != NULL)); 1632 1633 ah_mask = (ah_assoc != NULL) ? ah_assoc->ipsa_unique_mask : 0; 1634 esp_mask = (esp_assoc != NULL) ? esp_assoc->ipsa_unique_mask : 0; 1635 1636 if ((ah_mask == 0) && (esp_mask == 0)) 1637 return (B_TRUE); 1638 1639 /* 1640 * The pkt_unique check will also check for tunnel mode on the SA 1641 * vs. the tunneled_packet boolean. "Be liberal in what you receive" 1642 * should not apply in this case. ;) 1643 */ 1644 1645 if (ah_mask != 0 && 1646 ah_assoc->ipsa_unique_id != (pkt_unique & ah_mask)) { 1647 *reason = "AH inner header mismatch"; 1648 *counter = DROPPER(ipss, ipds_spd_ah_innermismatch); 1649 return (B_FALSE); 1650 } 1651 if (esp_mask != 0 && 1652 esp_assoc->ipsa_unique_id != (pkt_unique & esp_mask)) { 1653 *reason = "ESP inner header mismatch"; 1654 *counter = DROPPER(ipss, ipds_spd_esp_innermismatch); 1655 return (B_FALSE); 1656 } 1657 return (B_TRUE); 1658 } 1659 1660 static boolean_t 1661 ipsec_check_ipsecin_action(ipsec_in_t *ii, mblk_t *mp, ipsec_action_t *ap, 1662 ipha_t *ipha, ip6_t *ip6h, const char **reason, kstat_named_t **counter) 1663 { 1664 boolean_t ret = B_TRUE; 1665 ipsec_prot_t *ipp; 1666 ipsa_t *ah_assoc; 1667 ipsa_t *esp_assoc; 1668 boolean_t decaps; 1669 netstack_t *ns = ii->ipsec_in_ns; 1670 ipsec_stack_t *ipss = ns->netstack_ipsec; 1671 1672 ASSERT((ipha == NULL && ip6h != NULL) || 1673 (ip6h == NULL && ipha != NULL)); 1674 1675 if (ii->ipsec_in_loopback) { 1676 /* 1677 * Besides accepting pointer-equivalent actions, we also 1678 * accept any ICMP errors we generated for ourselves, 1679 * regardless of policy. If we do not wish to make this 1680 * assumption in the future, check here, and where 1681 * icmp_loopback is initialized in ip.c and ip6.c. (Look for 1682 * ipsec_out_icmp_loopback.) 1683 */ 1684 if (ap == ii->ipsec_in_action || ii->ipsec_in_icmp_loopback) 1685 return (B_TRUE); 1686 1687 /* Deep compare necessary here?? */ 1688 *counter = DROPPER(ipss, ipds_spd_loopback_mismatch); 1689 *reason = "loopback policy mismatch"; 1690 return (B_FALSE); 1691 } 1692 ASSERT(!ii->ipsec_in_icmp_loopback); 1693 1694 ah_assoc = ii->ipsec_in_ah_sa; 1695 esp_assoc = ii->ipsec_in_esp_sa; 1696 1697 decaps = ii->ipsec_in_decaps; 1698 1699 switch (ap->ipa_act.ipa_type) { 1700 case IPSEC_ACT_DISCARD: 1701 case IPSEC_ACT_REJECT: 1702 /* Should "fail hard" */ 1703 *counter = DROPPER(ipss, ipds_spd_explicit); 1704 *reason = "blocked by policy"; 1705 return (B_FALSE); 1706 1707 case IPSEC_ACT_BYPASS: 1708 case IPSEC_ACT_CLEAR: 1709 *counter = DROPPER(ipss, ipds_spd_got_secure); 1710 *reason = "expected clear, got protected"; 1711 return (B_FALSE); 1712 1713 case IPSEC_ACT_APPLY: 1714 ipp = &ap->ipa_act.ipa_apply; 1715 /* 1716 * As of now we do the simple checks of whether 1717 * the datagram has gone through the required IPSEC 1718 * protocol constraints or not. We might have more 1719 * in the future like sensitive levels, key bits, etc. 1720 * If it fails the constraints, check whether we would 1721 * have accepted this if it had come in clear. 1722 */ 1723 if (ipp->ipp_use_ah) { 1724 if (ah_assoc == NULL) { 1725 ret = ipsec_inbound_accept_clear(mp, ipha, 1726 ip6h); 1727 *counter = DROPPER(ipss, ipds_spd_got_clear); 1728 *reason = "unprotected not accepted"; 1729 break; 1730 } 1731 ASSERT(ah_assoc != NULL); 1732 ASSERT(ipp->ipp_auth_alg != 0); 1733 1734 if (ah_assoc->ipsa_auth_alg != 1735 ipp->ipp_auth_alg) { 1736 *counter = DROPPER(ipss, ipds_spd_bad_ahalg); 1737 *reason = "unacceptable ah alg"; 1738 ret = B_FALSE; 1739 break; 1740 } 1741 } else if (ah_assoc != NULL) { 1742 /* 1743 * Don't allow this. Check IPSEC NOTE above 1744 * ip_fanout_proto(). 1745 */ 1746 *counter = DROPPER(ipss, ipds_spd_got_ah); 1747 *reason = "unexpected AH"; 1748 ret = B_FALSE; 1749 break; 1750 } 1751 if (ipp->ipp_use_esp) { 1752 if (esp_assoc == NULL) { 1753 ret = ipsec_inbound_accept_clear(mp, ipha, 1754 ip6h); 1755 *counter = DROPPER(ipss, ipds_spd_got_clear); 1756 *reason = "unprotected not accepted"; 1757 break; 1758 } 1759 ASSERT(esp_assoc != NULL); 1760 ASSERT(ipp->ipp_encr_alg != 0); 1761 1762 if (esp_assoc->ipsa_encr_alg != 1763 ipp->ipp_encr_alg) { 1764 *counter = DROPPER(ipss, ipds_spd_bad_espealg); 1765 *reason = "unacceptable esp alg"; 1766 ret = B_FALSE; 1767 break; 1768 } 1769 /* 1770 * If the client does not need authentication, 1771 * we don't verify the alogrithm. 1772 */ 1773 if (ipp->ipp_use_espa) { 1774 if (esp_assoc->ipsa_auth_alg != 1775 ipp->ipp_esp_auth_alg) { 1776 *counter = DROPPER(ipss, 1777 ipds_spd_bad_espaalg); 1778 *reason = "unacceptable esp auth alg"; 1779 ret = B_FALSE; 1780 break; 1781 } 1782 } 1783 } else if (esp_assoc != NULL) { 1784 /* 1785 * Don't allow this. Check IPSEC NOTE above 1786 * ip_fanout_proto(). 1787 */ 1788 *counter = DROPPER(ipss, ipds_spd_got_esp); 1789 *reason = "unexpected ESP"; 1790 ret = B_FALSE; 1791 break; 1792 } 1793 if (ipp->ipp_use_se) { 1794 if (!decaps) { 1795 ret = ipsec_inbound_accept_clear(mp, ipha, 1796 ip6h); 1797 if (!ret) { 1798 /* XXX mutant? */ 1799 *counter = DROPPER(ipss, 1800 ipds_spd_bad_selfencap); 1801 *reason = "self encap not found"; 1802 break; 1803 } 1804 } 1805 } else if (decaps) { 1806 /* 1807 * XXX If the packet comes in tunneled and the 1808 * recipient does not expect it to be tunneled, it 1809 * is okay. But we drop to be consistent with the 1810 * other cases. 1811 */ 1812 *counter = DROPPER(ipss, ipds_spd_got_selfencap); 1813 *reason = "unexpected self encap"; 1814 ret = B_FALSE; 1815 break; 1816 } 1817 if (ii->ipsec_in_action != NULL) { 1818 /* 1819 * This can happen if we do a double policy-check on 1820 * a packet 1821 * XXX XXX should fix this case! 1822 */ 1823 IPACT_REFRELE(ii->ipsec_in_action); 1824 } 1825 ASSERT(ii->ipsec_in_action == NULL); 1826 IPACT_REFHOLD(ap); 1827 ii->ipsec_in_action = ap; 1828 break; /* from switch */ 1829 } 1830 return (ret); 1831 } 1832 1833 static boolean_t 1834 spd_match_inbound_ids(ipsec_latch_t *ipl, ipsa_t *sa) 1835 { 1836 ASSERT(ipl->ipl_ids_latched == B_TRUE); 1837 return ipsid_equal(ipl->ipl_remote_cid, sa->ipsa_src_cid) && 1838 ipsid_equal(ipl->ipl_local_cid, sa->ipsa_dst_cid); 1839 } 1840 1841 /* 1842 * Takes a latched conn and an inbound packet and returns a unique_id suitable 1843 * for SA comparisons. Most of the time we will copy from the conn_t, but 1844 * there are cases when the conn_t is latched but it has wildcard selectors, 1845 * and then we need to fallback to scooping them out of the packet. 1846 * 1847 * Assume we'll never have 0 with a conn_t present, so use 0 as a failure. We 1848 * can get away with this because we only have non-zero ports/proto for 1849 * latched conn_ts. 1850 * 1851 * Ideal candidate for an "inline" keyword, as we're JUST convoluted enough 1852 * to not be a nice macro. 1853 */ 1854 static uint64_t 1855 conn_to_unique(conn_t *connp, mblk_t *data_mp, ipha_t *ipha, ip6_t *ip6h) 1856 { 1857 ipsec_selector_t sel; 1858 uint8_t ulp = connp->conn_ulp; 1859 1860 ASSERT(connp->conn_latch->ipl_in_policy != NULL); 1861 1862 if ((ulp == IPPROTO_TCP || ulp == IPPROTO_UDP || ulp == IPPROTO_SCTP) && 1863 (connp->conn_fport == 0 || connp->conn_lport == 0)) { 1864 /* Slow path - we gotta grab from the packet. */ 1865 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, 1866 SEL_NONE) != SELRET_SUCCESS) { 1867 /* Failure -> have caller free packet with ENOMEM. */ 1868 return (0); 1869 } 1870 return (SA_UNIQUE_ID(sel.ips_remote_port, sel.ips_local_port, 1871 sel.ips_protocol, 0)); 1872 } 1873 1874 #ifdef DEBUG_NOT_UNTIL_6478464 1875 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, SEL_NONE) == 1876 SELRET_SUCCESS) { 1877 ASSERT(sel.ips_local_port == connp->conn_lport); 1878 ASSERT(sel.ips_remote_port == connp->conn_fport); 1879 ASSERT(sel.ips_protocol == connp->conn_ulp); 1880 } 1881 ASSERT(connp->conn_ulp != 0); 1882 #endif 1883 1884 return (SA_UNIQUE_ID(connp->conn_fport, connp->conn_lport, ulp, 0)); 1885 } 1886 1887 /* 1888 * Called to check policy on a latched connection, both from this file 1889 * and from tcp.c 1890 */ 1891 boolean_t 1892 ipsec_check_ipsecin_latch(ipsec_in_t *ii, mblk_t *mp, ipsec_latch_t *ipl, 1893 ipha_t *ipha, ip6_t *ip6h, const char **reason, kstat_named_t **counter, 1894 conn_t *connp) 1895 { 1896 netstack_t *ns = ii->ipsec_in_ns; 1897 ipsec_stack_t *ipss = ns->netstack_ipsec; 1898 1899 ASSERT(ipl->ipl_ids_latched == B_TRUE); 1900 1901 if (!ii->ipsec_in_loopback) { 1902 /* 1903 * Over loopback, there aren't real security associations, 1904 * so there are neither identities nor "unique" values 1905 * for us to check the packet against. 1906 */ 1907 if ((ii->ipsec_in_ah_sa != NULL) && 1908 (!spd_match_inbound_ids(ipl, ii->ipsec_in_ah_sa))) { 1909 *counter = DROPPER(ipss, ipds_spd_ah_badid); 1910 *reason = "AH identity mismatch"; 1911 return (B_FALSE); 1912 } 1913 1914 if ((ii->ipsec_in_esp_sa != NULL) && 1915 (!spd_match_inbound_ids(ipl, ii->ipsec_in_esp_sa))) { 1916 *counter = DROPPER(ipss, ipds_spd_esp_badid); 1917 *reason = "ESP identity mismatch"; 1918 return (B_FALSE); 1919 } 1920 1921 /* 1922 * Can fudge pkt_unique from connp because we're latched. 1923 * In DEBUG kernels (see conn_to_unique()'s implementation), 1924 * verify this even if it REALLY slows things down. 1925 */ 1926 if (!ipsec_check_ipsecin_unique(ii, reason, counter, 1927 conn_to_unique(connp, mp, ipha, ip6h))) { 1928 return (B_FALSE); 1929 } 1930 } 1931 1932 return (ipsec_check_ipsecin_action(ii, mp, ipl->ipl_in_action, 1933 ipha, ip6h, reason, counter)); 1934 } 1935 1936 /* 1937 * Check to see whether this secured datagram meets the policy 1938 * constraints specified in ipsp. 1939 * 1940 * Called from ipsec_check_global_policy, and ipsec_check_inbound_policy. 1941 * 1942 * Consumes a reference to ipsp. 1943 */ 1944 static mblk_t * 1945 ipsec_check_ipsecin_policy(mblk_t *first_mp, ipsec_policy_t *ipsp, 1946 ipha_t *ipha, ip6_t *ip6h, uint64_t pkt_unique, netstack_t *ns) 1947 { 1948 ipsec_in_t *ii; 1949 ipsec_action_t *ap; 1950 const char *reason = "no policy actions found"; 1951 mblk_t *data_mp, *ipsec_mp; 1952 ipsec_stack_t *ipss = ns->netstack_ipsec; 1953 ip_stack_t *ipst = ns->netstack_ip; 1954 kstat_named_t *counter; 1955 1956 counter = DROPPER(ipss, ipds_spd_got_secure); 1957 1958 data_mp = first_mp->b_cont; 1959 ipsec_mp = first_mp; 1960 1961 ASSERT(ipsp != NULL); 1962 1963 ASSERT((ipha == NULL && ip6h != NULL) || 1964 (ip6h == NULL && ipha != NULL)); 1965 1966 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1967 1968 if (ii->ipsec_in_loopback) 1969 return (ipsec_check_loopback_policy(first_mp, B_TRUE, ipsp)); 1970 ASSERT(ii->ipsec_in_type == IPSEC_IN); 1971 ASSERT(ii->ipsec_in_secure); 1972 1973 if (ii->ipsec_in_action != NULL) { 1974 /* 1975 * this can happen if we do a double policy-check on a packet 1976 * Would be nice to be able to delete this test.. 1977 */ 1978 IPACT_REFRELE(ii->ipsec_in_action); 1979 } 1980 ASSERT(ii->ipsec_in_action == NULL); 1981 1982 if (!SA_IDS_MATCH(ii->ipsec_in_ah_sa, ii->ipsec_in_esp_sa)) { 1983 reason = "inbound AH and ESP identities differ"; 1984 counter = DROPPER(ipss, ipds_spd_ahesp_diffid); 1985 goto drop; 1986 } 1987 1988 if (!ipsec_check_ipsecin_unique(ii, &reason, &counter, pkt_unique)) 1989 goto drop; 1990 1991 /* 1992 * Ok, now loop through the possible actions and see if any 1993 * of them work for us. 1994 */ 1995 1996 for (ap = ipsp->ipsp_act; ap != NULL; ap = ap->ipa_next) { 1997 if (ipsec_check_ipsecin_action(ii, data_mp, ap, 1998 ipha, ip6h, &reason, &counter)) { 1999 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2000 IPPOL_REFRELE(ipsp, ns); 2001 return (first_mp); 2002 } 2003 } 2004 drop: 2005 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, SL_ERROR|SL_WARN|SL_CONSOLE, 2006 "ipsec inbound policy mismatch: %s, packet dropped\n", 2007 reason); 2008 IPPOL_REFRELE(ipsp, ns); 2009 ASSERT(ii->ipsec_in_action == NULL); 2010 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2011 ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, 2012 &ipss->ipsec_spd_dropper); 2013 return (NULL); 2014 } 2015 2016 /* 2017 * sleazy prefix-length-based compare. 2018 * another inlining candidate.. 2019 */ 2020 boolean_t 2021 ip_addr_match(uint8_t *addr1, int pfxlen, in6_addr_t *addr2p) 2022 { 2023 int offset = pfxlen>>3; 2024 int bitsleft = pfxlen & 7; 2025 uint8_t *addr2 = (uint8_t *)addr2p; 2026 2027 /* 2028 * and there was much evil.. 2029 * XXX should inline-expand the bcmp here and do this 32 bits 2030 * or 64 bits at a time.. 2031 */ 2032 return ((bcmp(addr1, addr2, offset) == 0) && 2033 ((bitsleft == 0) || 2034 (((addr1[offset] ^ addr2[offset]) & 2035 (0xff<<(8-bitsleft))) == 0))); 2036 } 2037 2038 static ipsec_policy_t * 2039 ipsec_find_policy_chain(ipsec_policy_t *best, ipsec_policy_t *chain, 2040 ipsec_selector_t *sel, boolean_t is_icmp_inv_acq) 2041 { 2042 ipsec_selkey_t *isel; 2043 ipsec_policy_t *p; 2044 int bpri = best ? best->ipsp_prio : 0; 2045 2046 for (p = chain; p != NULL; p = p->ipsp_hash.hash_next) { 2047 uint32_t valid; 2048 2049 if (p->ipsp_prio <= bpri) 2050 continue; 2051 isel = &p->ipsp_sel->ipsl_key; 2052 valid = isel->ipsl_valid; 2053 2054 if ((valid & IPSL_PROTOCOL) && 2055 (isel->ipsl_proto != sel->ips_protocol)) 2056 continue; 2057 2058 if ((valid & IPSL_REMOTE_ADDR) && 2059 !ip_addr_match((uint8_t *)&isel->ipsl_remote, 2060 isel->ipsl_remote_pfxlen, 2061 &sel->ips_remote_addr_v6)) 2062 continue; 2063 2064 if ((valid & IPSL_LOCAL_ADDR) && 2065 !ip_addr_match((uint8_t *)&isel->ipsl_local, 2066 isel->ipsl_local_pfxlen, 2067 &sel->ips_local_addr_v6)) 2068 continue; 2069 2070 if ((valid & IPSL_REMOTE_PORT) && 2071 isel->ipsl_rport != sel->ips_remote_port) 2072 continue; 2073 2074 if ((valid & IPSL_LOCAL_PORT) && 2075 isel->ipsl_lport != sel->ips_local_port) 2076 continue; 2077 2078 if (!is_icmp_inv_acq) { 2079 if ((valid & IPSL_ICMP_TYPE) && 2080 (isel->ipsl_icmp_type > sel->ips_icmp_type || 2081 isel->ipsl_icmp_type_end < sel->ips_icmp_type)) { 2082 continue; 2083 } 2084 2085 if ((valid & IPSL_ICMP_CODE) && 2086 (isel->ipsl_icmp_code > sel->ips_icmp_code || 2087 isel->ipsl_icmp_code_end < 2088 sel->ips_icmp_code)) { 2089 continue; 2090 } 2091 } else { 2092 /* 2093 * special case for icmp inverse acquire 2094 * we only want policies that aren't drop/pass 2095 */ 2096 if (p->ipsp_act->ipa_act.ipa_type != IPSEC_ACT_APPLY) 2097 continue; 2098 } 2099 2100 /* we matched all the packet-port-field selectors! */ 2101 best = p; 2102 bpri = p->ipsp_prio; 2103 } 2104 2105 return (best); 2106 } 2107 2108 /* 2109 * Try to find and return the best policy entry under a given policy 2110 * root for a given set of selectors; the first parameter "best" is 2111 * the current best policy so far. If "best" is non-null, we have a 2112 * reference to it. We return a reference to a policy; if that policy 2113 * is not the original "best", we need to release that reference 2114 * before returning. 2115 */ 2116 ipsec_policy_t * 2117 ipsec_find_policy_head(ipsec_policy_t *best, ipsec_policy_head_t *head, 2118 int direction, ipsec_selector_t *sel, netstack_t *ns) 2119 { 2120 ipsec_policy_t *curbest; 2121 ipsec_policy_root_t *root; 2122 uint8_t is_icmp_inv_acq = sel->ips_is_icmp_inv_acq; 2123 int af = sel->ips_isv4 ? IPSEC_AF_V4 : IPSEC_AF_V6; 2124 2125 curbest = best; 2126 root = &head->iph_root[direction]; 2127 2128 #ifdef DEBUG 2129 if (is_icmp_inv_acq) { 2130 if (sel->ips_isv4) { 2131 if (sel->ips_protocol != IPPROTO_ICMP) { 2132 cmn_err(CE_WARN, "ipsec_find_policy_head:" 2133 " expecting icmp, got %d", sel->ips_protocol); 2134 } 2135 } else { 2136 if (sel->ips_protocol != IPPROTO_ICMPV6) { 2137 cmn_err(CE_WARN, "ipsec_find_policy_head:" 2138 " expecting icmpv6, got %d", sel->ips_protocol); 2139 } 2140 } 2141 } 2142 #endif 2143 2144 rw_enter(&head->iph_lock, RW_READER); 2145 2146 if (root->ipr_nchains > 0) { 2147 curbest = ipsec_find_policy_chain(curbest, 2148 root->ipr_hash[selector_hash(sel, root)].hash_head, sel, 2149 is_icmp_inv_acq); 2150 } 2151 curbest = ipsec_find_policy_chain(curbest, root->ipr_nonhash[af], sel, 2152 is_icmp_inv_acq); 2153 2154 /* 2155 * Adjust reference counts if we found anything new. 2156 */ 2157 if (curbest != best) { 2158 ASSERT(curbest != NULL); 2159 IPPOL_REFHOLD(curbest); 2160 2161 if (best != NULL) { 2162 IPPOL_REFRELE(best, ns); 2163 } 2164 } 2165 2166 rw_exit(&head->iph_lock); 2167 2168 return (curbest); 2169 } 2170 2171 /* 2172 * Find the best system policy (either global or per-interface) which 2173 * applies to the given selector; look in all the relevant policy roots 2174 * to figure out which policy wins. 2175 * 2176 * Returns a reference to a policy; caller must release this 2177 * reference when done. 2178 */ 2179 ipsec_policy_t * 2180 ipsec_find_policy(int direction, conn_t *connp, ipsec_out_t *io, 2181 ipsec_selector_t *sel, netstack_t *ns) 2182 { 2183 ipsec_policy_t *p; 2184 ipsec_stack_t *ipss = ns->netstack_ipsec; 2185 2186 p = ipsec_find_policy_head(NULL, &ipss->ipsec_system_policy, 2187 direction, sel, ns); 2188 if ((connp != NULL) && (connp->conn_policy != NULL)) { 2189 p = ipsec_find_policy_head(p, connp->conn_policy, 2190 direction, sel, ns); 2191 } else if ((io != NULL) && (io->ipsec_out_polhead != NULL)) { 2192 p = ipsec_find_policy_head(p, io->ipsec_out_polhead, 2193 direction, sel, ns); 2194 } 2195 2196 return (p); 2197 } 2198 2199 /* 2200 * Check with global policy and see whether this inbound 2201 * packet meets the policy constraints. 2202 * 2203 * Locate appropriate policy from global policy, supplemented by the 2204 * conn's configured and/or cached policy if the conn is supplied. 2205 * 2206 * Dispatch to ipsec_check_ipsecin_policy if we have policy and an 2207 * encrypted packet to see if they match. 2208 * 2209 * Otherwise, see if the policy allows cleartext; if not, drop it on the 2210 * floor. 2211 */ 2212 mblk_t * 2213 ipsec_check_global_policy(mblk_t *first_mp, conn_t *connp, 2214 ipha_t *ipha, ip6_t *ip6h, boolean_t mctl_present, netstack_t *ns) 2215 { 2216 ipsec_policy_t *p; 2217 ipsec_selector_t sel; 2218 mblk_t *data_mp, *ipsec_mp; 2219 boolean_t policy_present; 2220 kstat_named_t *counter; 2221 ipsec_in_t *ii = NULL; 2222 uint64_t pkt_unique; 2223 ipsec_stack_t *ipss = ns->netstack_ipsec; 2224 ip_stack_t *ipst = ns->netstack_ip; 2225 2226 data_mp = mctl_present ? first_mp->b_cont : first_mp; 2227 ipsec_mp = mctl_present ? first_mp : NULL; 2228 2229 sel.ips_is_icmp_inv_acq = 0; 2230 2231 ASSERT((ipha == NULL && ip6h != NULL) || 2232 (ip6h == NULL && ipha != NULL)); 2233 2234 if (ipha != NULL) 2235 policy_present = ipss->ipsec_inbound_v4_policy_present; 2236 else 2237 policy_present = ipss->ipsec_inbound_v6_policy_present; 2238 2239 if (!policy_present && connp == NULL) { 2240 /* 2241 * No global policy and no per-socket policy; 2242 * just pass it back (but we shouldn't get here in that case) 2243 */ 2244 return (first_mp); 2245 } 2246 2247 if (ipsec_mp != NULL) { 2248 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 2249 ii = (ipsec_in_t *)(ipsec_mp->b_rptr); 2250 ASSERT(ii->ipsec_in_type == IPSEC_IN); 2251 } 2252 2253 /* 2254 * If we have cached policy, use it. 2255 * Otherwise consult system policy. 2256 */ 2257 if ((connp != NULL) && (connp->conn_latch != NULL)) { 2258 p = connp->conn_latch->ipl_in_policy; 2259 if (p != NULL) { 2260 IPPOL_REFHOLD(p); 2261 } 2262 /* 2263 * Fudge sel for UNIQUE_ID setting below. 2264 */ 2265 pkt_unique = conn_to_unique(connp, data_mp, ipha, ip6h); 2266 } else { 2267 /* Initialize the ports in the selector */ 2268 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, 2269 SEL_NONE) == SELRET_NOMEM) { 2270 /* 2271 * Technically not a policy mismatch, but it is 2272 * an internal failure. 2273 */ 2274 ipsec_log_policy_failure(IPSEC_POLICY_MISMATCH, 2275 "ipsec_init_inbound_sel", ipha, ip6h, B_FALSE, ns); 2276 counter = DROPPER(ipss, ipds_spd_nomem); 2277 goto fail; 2278 } 2279 2280 /* 2281 * Find the policy which best applies. 2282 * 2283 * If we find global policy, we should look at both 2284 * local policy and global policy and see which is 2285 * stronger and match accordingly. 2286 * 2287 * If we don't find a global policy, check with 2288 * local policy alone. 2289 */ 2290 2291 p = ipsec_find_policy(IPSEC_TYPE_INBOUND, connp, NULL, &sel, 2292 ns); 2293 pkt_unique = SA_UNIQUE_ID(sel.ips_remote_port, 2294 sel.ips_local_port, sel.ips_protocol, 0); 2295 } 2296 2297 if (p == NULL) { 2298 if (ipsec_mp == NULL) { 2299 /* 2300 * We have no policy; default to succeeding. 2301 * XXX paranoid system design doesn't do this. 2302 */ 2303 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2304 return (first_mp); 2305 } else { 2306 counter = DROPPER(ipss, ipds_spd_got_secure); 2307 ipsec_log_policy_failure(IPSEC_POLICY_NOT_NEEDED, 2308 "ipsec_check_global_policy", ipha, ip6h, B_TRUE, 2309 ns); 2310 goto fail; 2311 } 2312 } 2313 if ((ii != NULL) && (ii->ipsec_in_secure)) { 2314 return (ipsec_check_ipsecin_policy(ipsec_mp, p, ipha, ip6h, 2315 pkt_unique, ns)); 2316 } 2317 if (p->ipsp_act->ipa_allow_clear) { 2318 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2319 IPPOL_REFRELE(p, ns); 2320 return (first_mp); 2321 } 2322 IPPOL_REFRELE(p, ns); 2323 /* 2324 * If we reach here, we will drop the packet because it failed the 2325 * global policy check because the packet was cleartext, and it 2326 * should not have been. 2327 */ 2328 ipsec_log_policy_failure(IPSEC_POLICY_MISMATCH, 2329 "ipsec_check_global_policy", ipha, ip6h, B_FALSE, ns); 2330 counter = DROPPER(ipss, ipds_spd_got_clear); 2331 2332 fail: 2333 ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, 2334 &ipss->ipsec_spd_dropper); 2335 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2336 return (NULL); 2337 } 2338 2339 /* 2340 * We check whether an inbound datagram is a valid one 2341 * to accept in clear. If it is secure, it is the job 2342 * of IPSEC to log information appropriately if it 2343 * suspects that it may not be the real one. 2344 * 2345 * It is called only while fanning out to the ULP 2346 * where ULP accepts only secure data and the incoming 2347 * is clear. Usually we never accept clear datagrams in 2348 * such cases. ICMP is the only exception. 2349 * 2350 * NOTE : We don't call this function if the client (ULP) 2351 * is willing to accept things in clear. 2352 */ 2353 boolean_t 2354 ipsec_inbound_accept_clear(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h) 2355 { 2356 ushort_t iph_hdr_length; 2357 icmph_t *icmph; 2358 icmp6_t *icmp6; 2359 uint8_t *nexthdrp; 2360 2361 ASSERT((ipha != NULL && ip6h == NULL) || 2362 (ipha == NULL && ip6h != NULL)); 2363 2364 if (ip6h != NULL) { 2365 iph_hdr_length = ip_hdr_length_v6(mp, ip6h); 2366 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, 2367 &nexthdrp)) { 2368 return (B_FALSE); 2369 } 2370 if (*nexthdrp != IPPROTO_ICMPV6) 2371 return (B_FALSE); 2372 icmp6 = (icmp6_t *)(&mp->b_rptr[iph_hdr_length]); 2373 /* Match IPv6 ICMP policy as closely as IPv4 as possible. */ 2374 switch (icmp6->icmp6_type) { 2375 case ICMP6_PARAM_PROB: 2376 /* Corresponds to port/proto unreach in IPv4. */ 2377 case ICMP6_ECHO_REQUEST: 2378 /* Just like IPv4. */ 2379 return (B_FALSE); 2380 2381 case MLD_LISTENER_QUERY: 2382 case MLD_LISTENER_REPORT: 2383 case MLD_LISTENER_REDUCTION: 2384 /* 2385 * XXX Seperate NDD in IPv4 what about here? 2386 * Plus, mcast is important to ND. 2387 */ 2388 case ICMP6_DST_UNREACH: 2389 /* Corresponds to HOST/NET unreachable in IPv4. */ 2390 case ICMP6_PACKET_TOO_BIG: 2391 case ICMP6_ECHO_REPLY: 2392 /* These are trusted in IPv4. */ 2393 case ND_ROUTER_SOLICIT: 2394 case ND_ROUTER_ADVERT: 2395 case ND_NEIGHBOR_SOLICIT: 2396 case ND_NEIGHBOR_ADVERT: 2397 case ND_REDIRECT: 2398 /* Trust ND messages for now. */ 2399 case ICMP6_TIME_EXCEEDED: 2400 default: 2401 return (B_TRUE); 2402 } 2403 } else { 2404 /* 2405 * If it is not ICMP, fail this request. 2406 */ 2407 if (ipha->ipha_protocol != IPPROTO_ICMP) { 2408 #ifdef FRAGCACHE_DEBUG 2409 cmn_err(CE_WARN, "Dropping - ipha_proto = %d\n", 2410 ipha->ipha_protocol); 2411 #endif 2412 return (B_FALSE); 2413 } 2414 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2415 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 2416 /* 2417 * It is an insecure icmp message. Check to see whether we are 2418 * willing to accept this one. 2419 */ 2420 2421 switch (icmph->icmph_type) { 2422 case ICMP_ECHO_REPLY: 2423 case ICMP_TIME_STAMP_REPLY: 2424 case ICMP_INFO_REPLY: 2425 case ICMP_ROUTER_ADVERTISEMENT: 2426 /* 2427 * We should not encourage clear replies if this 2428 * client expects secure. If somebody is replying 2429 * in clear some mailicious user watching both the 2430 * request and reply, can do chosen-plain-text attacks. 2431 * With global policy we might be just expecting secure 2432 * but sending out clear. We don't know what the right 2433 * thing is. We can't do much here as we can't control 2434 * the sender here. Till we are sure of what to do, 2435 * accept them. 2436 */ 2437 return (B_TRUE); 2438 case ICMP_ECHO_REQUEST: 2439 case ICMP_TIME_STAMP_REQUEST: 2440 case ICMP_INFO_REQUEST: 2441 case ICMP_ADDRESS_MASK_REQUEST: 2442 case ICMP_ROUTER_SOLICITATION: 2443 case ICMP_ADDRESS_MASK_REPLY: 2444 /* 2445 * Don't accept this as somebody could be sending 2446 * us plain text to get encrypted data. If we reply, 2447 * it will lead to chosen plain text attack. 2448 */ 2449 return (B_FALSE); 2450 case ICMP_DEST_UNREACHABLE: 2451 switch (icmph->icmph_code) { 2452 case ICMP_FRAGMENTATION_NEEDED: 2453 /* 2454 * Be in sync with icmp_inbound, where we have 2455 * already set ire_max_frag. 2456 */ 2457 #ifdef FRAGCACHE_DEBUG 2458 cmn_err(CE_WARN, "ICMP frag needed\n"); 2459 #endif 2460 return (B_TRUE); 2461 case ICMP_HOST_UNREACHABLE: 2462 case ICMP_NET_UNREACHABLE: 2463 /* 2464 * By accepting, we could reset a connection. 2465 * How do we solve the problem of some 2466 * intermediate router sending in-secure ICMP 2467 * messages ? 2468 */ 2469 return (B_TRUE); 2470 case ICMP_PORT_UNREACHABLE: 2471 case ICMP_PROTOCOL_UNREACHABLE: 2472 default : 2473 return (B_FALSE); 2474 } 2475 case ICMP_SOURCE_QUENCH: 2476 /* 2477 * If this is an attack, TCP will slow start 2478 * because of this. Is it very harmful ? 2479 */ 2480 return (B_TRUE); 2481 case ICMP_PARAM_PROBLEM: 2482 return (B_FALSE); 2483 case ICMP_TIME_EXCEEDED: 2484 return (B_TRUE); 2485 case ICMP_REDIRECT: 2486 return (B_FALSE); 2487 default : 2488 return (B_FALSE); 2489 } 2490 } 2491 } 2492 2493 void 2494 ipsec_latch_ids(ipsec_latch_t *ipl, ipsid_t *local, ipsid_t *remote) 2495 { 2496 mutex_enter(&ipl->ipl_lock); 2497 2498 if (ipl->ipl_ids_latched) { 2499 /* I lost, someone else got here before me */ 2500 mutex_exit(&ipl->ipl_lock); 2501 return; 2502 } 2503 2504 if (local != NULL) 2505 IPSID_REFHOLD(local); 2506 if (remote != NULL) 2507 IPSID_REFHOLD(remote); 2508 2509 ipl->ipl_local_cid = local; 2510 ipl->ipl_remote_cid = remote; 2511 ipl->ipl_ids_latched = B_TRUE; 2512 mutex_exit(&ipl->ipl_lock); 2513 } 2514 2515 void 2516 ipsec_latch_inbound(ipsec_latch_t *ipl, ipsec_in_t *ii) 2517 { 2518 ipsa_t *sa; 2519 2520 if (!ipl->ipl_ids_latched) { 2521 ipsid_t *local = NULL; 2522 ipsid_t *remote = NULL; 2523 2524 if (!ii->ipsec_in_loopback) { 2525 if (ii->ipsec_in_esp_sa != NULL) 2526 sa = ii->ipsec_in_esp_sa; 2527 else 2528 sa = ii->ipsec_in_ah_sa; 2529 ASSERT(sa != NULL); 2530 local = sa->ipsa_dst_cid; 2531 remote = sa->ipsa_src_cid; 2532 } 2533 ipsec_latch_ids(ipl, local, remote); 2534 } 2535 ipl->ipl_in_action = ii->ipsec_in_action; 2536 IPACT_REFHOLD(ipl->ipl_in_action); 2537 } 2538 2539 /* 2540 * Check whether the policy constraints are met either for an 2541 * inbound datagram; called from IP in numerous places. 2542 * 2543 * Note that this is not a chokepoint for inbound policy checks; 2544 * see also ipsec_check_ipsecin_latch() and ipsec_check_global_policy() 2545 */ 2546 mblk_t * 2547 ipsec_check_inbound_policy(mblk_t *first_mp, conn_t *connp, 2548 ipha_t *ipha, ip6_t *ip6h, boolean_t mctl_present) 2549 { 2550 ipsec_in_t *ii; 2551 boolean_t ret; 2552 mblk_t *mp = mctl_present ? first_mp->b_cont : first_mp; 2553 mblk_t *ipsec_mp = mctl_present ? first_mp : NULL; 2554 ipsec_latch_t *ipl; 2555 uint64_t unique_id; 2556 ipsec_stack_t *ipss; 2557 ip_stack_t *ipst; 2558 netstack_t *ns; 2559 2560 ASSERT(connp != NULL); 2561 ipl = connp->conn_latch; 2562 ns = connp->conn_netstack; 2563 ipss = ns->netstack_ipsec; 2564 ipst = ns->netstack_ip; 2565 2566 if (ipsec_mp == NULL) { 2567 clear: 2568 /* 2569 * This is the case where the incoming datagram is 2570 * cleartext and we need to see whether this client 2571 * would like to receive such untrustworthy things from 2572 * the wire. 2573 */ 2574 ASSERT(mp != NULL); 2575 2576 if (ipl != NULL) { 2577 /* 2578 * Policy is cached in the conn. 2579 */ 2580 if ((ipl->ipl_in_policy != NULL) && 2581 (!ipl->ipl_in_policy->ipsp_act->ipa_allow_clear)) { 2582 ret = ipsec_inbound_accept_clear(mp, 2583 ipha, ip6h); 2584 if (ret) { 2585 BUMP_MIB(&ipst->ips_ip_mib, 2586 ipsecInSucceeded); 2587 return (first_mp); 2588 } else { 2589 ipsec_log_policy_failure( 2590 IPSEC_POLICY_MISMATCH, 2591 "ipsec_check_inbound_policy", ipha, 2592 ip6h, B_FALSE, ns); 2593 ip_drop_packet(first_mp, B_TRUE, NULL, 2594 NULL, 2595 DROPPER(ipss, ipds_spd_got_clear), 2596 &ipss->ipsec_spd_dropper); 2597 BUMP_MIB(&ipst->ips_ip_mib, 2598 ipsecInFailed); 2599 return (NULL); 2600 } 2601 } else { 2602 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2603 return (first_mp); 2604 } 2605 } else { 2606 /* 2607 * As this is a non-hardbound connection we need 2608 * to look at both per-socket policy and global 2609 * policy. As this is cleartext, mark the mp as 2610 * M_DATA in case if it is an ICMP error being 2611 * reported before calling ipsec_check_global_policy 2612 * so that it does not mistake it for IPSEC_IN. 2613 */ 2614 uchar_t db_type = mp->b_datap->db_type; 2615 mp->b_datap->db_type = M_DATA; 2616 first_mp = ipsec_check_global_policy(first_mp, connp, 2617 ipha, ip6h, mctl_present, ns); 2618 if (first_mp != NULL) 2619 mp->b_datap->db_type = db_type; 2620 return (first_mp); 2621 } 2622 } 2623 /* 2624 * If it is inbound check whether the attached message 2625 * is secure or not. We have a special case for ICMP, 2626 * where we have a IPSEC_IN message and the attached 2627 * message is not secure. See icmp_inbound_error_fanout 2628 * for details. 2629 */ 2630 ASSERT(ipsec_mp != NULL); 2631 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 2632 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 2633 2634 if (!ii->ipsec_in_secure) 2635 goto clear; 2636 2637 /* 2638 * mp->b_cont could be either a M_CTL message 2639 * for icmp errors being sent up or a M_DATA message. 2640 */ 2641 ASSERT(mp->b_datap->db_type == M_CTL || mp->b_datap->db_type == M_DATA); 2642 2643 ASSERT(ii->ipsec_in_type == IPSEC_IN); 2644 2645 if (ipl == NULL) { 2646 /* 2647 * We don't have policies cached in the conn 2648 * for this stream. So, look at the global 2649 * policy. It will check against conn or global 2650 * depending on whichever is stronger. 2651 */ 2652 return (ipsec_check_global_policy(first_mp, connp, 2653 ipha, ip6h, mctl_present, ns)); 2654 } 2655 2656 if (ipl->ipl_in_action != NULL) { 2657 /* Policy is cached & latched; fast(er) path */ 2658 const char *reason; 2659 kstat_named_t *counter; 2660 2661 if (ipsec_check_ipsecin_latch(ii, mp, ipl, 2662 ipha, ip6h, &reason, &counter, connp)) { 2663 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2664 return (first_mp); 2665 } 2666 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, 2667 SL_ERROR|SL_WARN|SL_CONSOLE, 2668 "ipsec inbound policy mismatch: %s, packet dropped\n", 2669 reason); 2670 ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, 2671 &ipss->ipsec_spd_dropper); 2672 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2673 return (NULL); 2674 } else if (ipl->ipl_in_policy == NULL) { 2675 ipsec_weird_null_inbound_policy++; 2676 return (first_mp); 2677 } 2678 2679 unique_id = conn_to_unique(connp, mp, ipha, ip6h); 2680 IPPOL_REFHOLD(ipl->ipl_in_policy); 2681 first_mp = ipsec_check_ipsecin_policy(first_mp, ipl->ipl_in_policy, 2682 ipha, ip6h, unique_id, ns); 2683 /* 2684 * NOTE: ipsecIn{Failed,Succeeeded} bumped by 2685 * ipsec_check_ipsecin_policy(). 2686 */ 2687 if (first_mp != NULL) 2688 ipsec_latch_inbound(ipl, ii); 2689 return (first_mp); 2690 } 2691 2692 /* 2693 * Returns: 2694 * 2695 * SELRET_NOMEM --> msgpullup() needed to gather things failed. 2696 * SELRET_BADPKT --> If we're being called after tunnel-mode fragment 2697 * gathering, the initial fragment is too short for 2698 * useful data. Only returned if SEL_TUNNEL_FIRSTFRAG is 2699 * set. 2700 * SELRET_SUCCESS --> "sel" now has initialized IPsec selector data. 2701 * SELRET_TUNFRAG --> This is a fragment in a tunnel-mode packet. Caller 2702 * should put this packet in a fragment-gathering queue. 2703 * Only returned if SEL_TUNNEL_MODE and SEL_PORT_POLICY 2704 * is set. 2705 */ 2706 static selret_t 2707 ipsec_init_inbound_sel(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha, 2708 ip6_t *ip6h, uint8_t sel_flags) 2709 { 2710 uint16_t *ports; 2711 ushort_t hdr_len; 2712 int outer_hdr_len = 0; /* For ICMP tunnel-mode cases... */ 2713 mblk_t *spare_mp = NULL; 2714 uint8_t *nexthdrp; 2715 uint8_t nexthdr; 2716 uint8_t *typecode; 2717 uint8_t check_proto; 2718 ip6_pkt_t ipp; 2719 boolean_t port_policy_present = (sel_flags & SEL_PORT_POLICY); 2720 boolean_t is_icmp = (sel_flags & SEL_IS_ICMP); 2721 boolean_t tunnel_mode = (sel_flags & SEL_TUNNEL_MODE); 2722 2723 ASSERT((ipha == NULL && ip6h != NULL) || 2724 (ipha != NULL && ip6h == NULL)); 2725 2726 if (ip6h != NULL) { 2727 if (is_icmp) 2728 outer_hdr_len = ((uint8_t *)ip6h) - mp->b_rptr; 2729 2730 check_proto = IPPROTO_ICMPV6; 2731 sel->ips_isv4 = B_FALSE; 2732 sel->ips_local_addr_v6 = ip6h->ip6_dst; 2733 sel->ips_remote_addr_v6 = ip6h->ip6_src; 2734 2735 bzero(&ipp, sizeof (ipp)); 2736 (void) ip_find_hdr_v6(mp, ip6h, &ipp, NULL); 2737 2738 nexthdr = ip6h->ip6_nxt; 2739 switch (nexthdr) { 2740 case IPPROTO_HOPOPTS: 2741 case IPPROTO_ROUTING: 2742 case IPPROTO_DSTOPTS: 2743 case IPPROTO_FRAGMENT: 2744 /* 2745 * Use ip_hdr_length_nexthdr_v6(). And have a spare 2746 * mblk that's contiguous to feed it 2747 */ 2748 if ((spare_mp = msgpullup(mp, -1)) == NULL) 2749 return (SELRET_NOMEM); 2750 if (!ip_hdr_length_nexthdr_v6(spare_mp, 2751 (ip6_t *)(spare_mp->b_rptr + outer_hdr_len), 2752 &hdr_len, &nexthdrp)) { 2753 /* Malformed packet - caller frees. */ 2754 ipsec_freemsg_chain(spare_mp); 2755 return (SELRET_BADPKT); 2756 } 2757 nexthdr = *nexthdrp; 2758 /* We can just extract based on hdr_len now. */ 2759 break; 2760 default: 2761 hdr_len = IPV6_HDR_LEN; 2762 break; 2763 } 2764 2765 if (port_policy_present && IS_V6_FRAGMENT(ipp) && !is_icmp) { 2766 /* IPv6 Fragment */ 2767 ipsec_freemsg_chain(spare_mp); 2768 return (SELRET_TUNFRAG); 2769 } 2770 } else { 2771 if (is_icmp) 2772 outer_hdr_len = ((uint8_t *)ipha) - mp->b_rptr; 2773 check_proto = IPPROTO_ICMP; 2774 sel->ips_isv4 = B_TRUE; 2775 sel->ips_local_addr_v4 = ipha->ipha_dst; 2776 sel->ips_remote_addr_v4 = ipha->ipha_src; 2777 nexthdr = ipha->ipha_protocol; 2778 hdr_len = IPH_HDR_LENGTH(ipha); 2779 2780 if (port_policy_present && 2781 IS_V4_FRAGMENT(ipha->ipha_fragment_offset_and_flags) && 2782 !is_icmp) { 2783 /* IPv4 Fragment */ 2784 ipsec_freemsg_chain(spare_mp); 2785 return (SELRET_TUNFRAG); 2786 } 2787 2788 } 2789 sel->ips_protocol = nexthdr; 2790 2791 if ((nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP && 2792 nexthdr != IPPROTO_SCTP && nexthdr != check_proto) || 2793 (!port_policy_present && tunnel_mode)) { 2794 sel->ips_remote_port = sel->ips_local_port = 0; 2795 ipsec_freemsg_chain(spare_mp); 2796 return (SELRET_SUCCESS); 2797 } 2798 2799 if (&mp->b_rptr[hdr_len] + 4 > mp->b_wptr) { 2800 /* If we didn't pullup a copy already, do so now. */ 2801 /* 2802 * XXX performance, will upper-layers frequently split TCP/UDP 2803 * apart from IP or options? If so, perhaps we should revisit 2804 * the spare_mp strategy. 2805 */ 2806 ipsec_hdr_pullup_needed++; 2807 if (spare_mp == NULL && 2808 (spare_mp = msgpullup(mp, -1)) == NULL) { 2809 return (SELRET_NOMEM); 2810 } 2811 ports = (uint16_t *)&spare_mp->b_rptr[hdr_len + outer_hdr_len]; 2812 } else { 2813 ports = (uint16_t *)&mp->b_rptr[hdr_len + outer_hdr_len]; 2814 } 2815 2816 if (nexthdr == check_proto) { 2817 typecode = (uint8_t *)ports; 2818 sel->ips_icmp_type = *typecode++; 2819 sel->ips_icmp_code = *typecode; 2820 sel->ips_remote_port = sel->ips_local_port = 0; 2821 } else { 2822 sel->ips_remote_port = *ports++; 2823 sel->ips_local_port = *ports; 2824 } 2825 ipsec_freemsg_chain(spare_mp); 2826 return (SELRET_SUCCESS); 2827 } 2828 2829 static boolean_t 2830 ipsec_init_outbound_ports(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha, 2831 ip6_t *ip6h, int outer_hdr_len, ipsec_stack_t *ipss) 2832 { 2833 /* 2834 * XXX cut&paste shared with ipsec_init_inbound_sel 2835 */ 2836 uint16_t *ports; 2837 ushort_t hdr_len; 2838 mblk_t *spare_mp = NULL; 2839 uint8_t *nexthdrp; 2840 uint8_t nexthdr; 2841 uint8_t *typecode; 2842 uint8_t check_proto; 2843 2844 ASSERT((ipha == NULL && ip6h != NULL) || 2845 (ipha != NULL && ip6h == NULL)); 2846 2847 if (ip6h != NULL) { 2848 check_proto = IPPROTO_ICMPV6; 2849 nexthdr = ip6h->ip6_nxt; 2850 switch (nexthdr) { 2851 case IPPROTO_HOPOPTS: 2852 case IPPROTO_ROUTING: 2853 case IPPROTO_DSTOPTS: 2854 case IPPROTO_FRAGMENT: 2855 /* 2856 * Use ip_hdr_length_nexthdr_v6(). And have a spare 2857 * mblk that's contiguous to feed it 2858 */ 2859 spare_mp = msgpullup(mp, -1); 2860 if (spare_mp == NULL || 2861 !ip_hdr_length_nexthdr_v6(spare_mp, 2862 (ip6_t *)(spare_mp->b_rptr + outer_hdr_len), 2863 &hdr_len, &nexthdrp)) { 2864 /* Always works, even if NULL. */ 2865 ipsec_freemsg_chain(spare_mp); 2866 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 2867 DROPPER(ipss, ipds_spd_nomem), 2868 &ipss->ipsec_spd_dropper); 2869 return (B_FALSE); 2870 } else { 2871 nexthdr = *nexthdrp; 2872 /* We can just extract based on hdr_len now. */ 2873 } 2874 break; 2875 default: 2876 hdr_len = IPV6_HDR_LEN; 2877 break; 2878 } 2879 } else { 2880 check_proto = IPPROTO_ICMP; 2881 hdr_len = IPH_HDR_LENGTH(ipha); 2882 nexthdr = ipha->ipha_protocol; 2883 } 2884 2885 sel->ips_protocol = nexthdr; 2886 if (nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP && 2887 nexthdr != IPPROTO_SCTP && nexthdr != check_proto) { 2888 sel->ips_local_port = sel->ips_remote_port = 0; 2889 ipsec_freemsg_chain(spare_mp); /* Always works, even if NULL */ 2890 return (B_TRUE); 2891 } 2892 2893 if (&mp->b_rptr[hdr_len] + 4 + outer_hdr_len > mp->b_wptr) { 2894 /* If we didn't pullup a copy already, do so now. */ 2895 /* 2896 * XXX performance, will upper-layers frequently split TCP/UDP 2897 * apart from IP or options? If so, perhaps we should revisit 2898 * the spare_mp strategy. 2899 * 2900 * XXX should this be msgpullup(mp, hdr_len+4) ??? 2901 */ 2902 if (spare_mp == NULL && 2903 (spare_mp = msgpullup(mp, -1)) == NULL) { 2904 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 2905 DROPPER(ipss, ipds_spd_nomem), 2906 &ipss->ipsec_spd_dropper); 2907 return (B_FALSE); 2908 } 2909 ports = (uint16_t *)&spare_mp->b_rptr[hdr_len + outer_hdr_len]; 2910 } else { 2911 ports = (uint16_t *)&mp->b_rptr[hdr_len + outer_hdr_len]; 2912 } 2913 2914 if (nexthdr == check_proto) { 2915 typecode = (uint8_t *)ports; 2916 sel->ips_icmp_type = *typecode++; 2917 sel->ips_icmp_code = *typecode; 2918 sel->ips_remote_port = sel->ips_local_port = 0; 2919 } else { 2920 sel->ips_local_port = *ports++; 2921 sel->ips_remote_port = *ports; 2922 } 2923 ipsec_freemsg_chain(spare_mp); /* Always works, even if NULL */ 2924 return (B_TRUE); 2925 } 2926 2927 /* 2928 * Create an ipsec_action_t based on the way an inbound packet was protected. 2929 * Used to reflect traffic back to a sender. 2930 * 2931 * We don't bother interning the action into the hash table. 2932 */ 2933 ipsec_action_t * 2934 ipsec_in_to_out_action(ipsec_in_t *ii) 2935 { 2936 ipsa_t *ah_assoc, *esp_assoc; 2937 uint_t auth_alg = 0, encr_alg = 0, espa_alg = 0; 2938 ipsec_action_t *ap; 2939 boolean_t unique; 2940 2941 ap = kmem_cache_alloc(ipsec_action_cache, KM_NOSLEEP); 2942 2943 if (ap == NULL) 2944 return (NULL); 2945 2946 bzero(ap, sizeof (*ap)); 2947 HASH_NULL(ap, ipa_hash); 2948 ap->ipa_next = NULL; 2949 ap->ipa_refs = 1; 2950 2951 /* 2952 * Get the algorithms that were used for this packet. 2953 */ 2954 ap->ipa_act.ipa_type = IPSEC_ACT_APPLY; 2955 ap->ipa_act.ipa_log = 0; 2956 ah_assoc = ii->ipsec_in_ah_sa; 2957 ap->ipa_act.ipa_apply.ipp_use_ah = (ah_assoc != NULL); 2958 2959 esp_assoc = ii->ipsec_in_esp_sa; 2960 ap->ipa_act.ipa_apply.ipp_use_esp = (esp_assoc != NULL); 2961 2962 if (esp_assoc != NULL) { 2963 encr_alg = esp_assoc->ipsa_encr_alg; 2964 espa_alg = esp_assoc->ipsa_auth_alg; 2965 ap->ipa_act.ipa_apply.ipp_use_espa = (espa_alg != 0); 2966 } 2967 if (ah_assoc != NULL) 2968 auth_alg = ah_assoc->ipsa_auth_alg; 2969 2970 ap->ipa_act.ipa_apply.ipp_encr_alg = (uint8_t)encr_alg; 2971 ap->ipa_act.ipa_apply.ipp_auth_alg = (uint8_t)auth_alg; 2972 ap->ipa_act.ipa_apply.ipp_esp_auth_alg = (uint8_t)espa_alg; 2973 ap->ipa_act.ipa_apply.ipp_use_se = ii->ipsec_in_decaps; 2974 unique = B_FALSE; 2975 2976 if (esp_assoc != NULL) { 2977 ap->ipa_act.ipa_apply.ipp_espa_minbits = 2978 esp_assoc->ipsa_authkeybits; 2979 ap->ipa_act.ipa_apply.ipp_espa_maxbits = 2980 esp_assoc->ipsa_authkeybits; 2981 ap->ipa_act.ipa_apply.ipp_espe_minbits = 2982 esp_assoc->ipsa_encrkeybits; 2983 ap->ipa_act.ipa_apply.ipp_espe_maxbits = 2984 esp_assoc->ipsa_encrkeybits; 2985 ap->ipa_act.ipa_apply.ipp_km_proto = esp_assoc->ipsa_kmp; 2986 ap->ipa_act.ipa_apply.ipp_km_cookie = esp_assoc->ipsa_kmc; 2987 if (esp_assoc->ipsa_flags & IPSA_F_UNIQUE) 2988 unique = B_TRUE; 2989 } 2990 if (ah_assoc != NULL) { 2991 ap->ipa_act.ipa_apply.ipp_ah_minbits = 2992 ah_assoc->ipsa_authkeybits; 2993 ap->ipa_act.ipa_apply.ipp_ah_maxbits = 2994 ah_assoc->ipsa_authkeybits; 2995 ap->ipa_act.ipa_apply.ipp_km_proto = ah_assoc->ipsa_kmp; 2996 ap->ipa_act.ipa_apply.ipp_km_cookie = ah_assoc->ipsa_kmc; 2997 if (ah_assoc->ipsa_flags & IPSA_F_UNIQUE) 2998 unique = B_TRUE; 2999 } 3000 ap->ipa_act.ipa_apply.ipp_use_unique = unique; 3001 ap->ipa_want_unique = unique; 3002 ap->ipa_allow_clear = B_FALSE; 3003 ap->ipa_want_se = ii->ipsec_in_decaps; 3004 ap->ipa_want_ah = (ah_assoc != NULL); 3005 ap->ipa_want_esp = (esp_assoc != NULL); 3006 3007 ap->ipa_ovhd = ipsec_act_ovhd(&ap->ipa_act); 3008 3009 ap->ipa_act.ipa_apply.ipp_replay_depth = 0; /* don't care */ 3010 3011 return (ap); 3012 } 3013 3014 3015 /* 3016 * Compute the worst-case amount of extra space required by an action. 3017 * Note that, because of the ESP considerations listed below, this is 3018 * actually not the same as the best-case reduction in the MTU; in the 3019 * future, we should pass additional information to this function to 3020 * allow the actual MTU impact to be computed. 3021 * 3022 * AH: Revisit this if we implement algorithms with 3023 * a verifier size of more than 12 bytes. 3024 * 3025 * ESP: A more exact but more messy computation would take into 3026 * account the interaction between the cipher block size and the 3027 * effective MTU, yielding the inner payload size which reflects a 3028 * packet with *minimum* ESP padding.. 3029 */ 3030 int32_t 3031 ipsec_act_ovhd(const ipsec_act_t *act) 3032 { 3033 int32_t overhead = 0; 3034 3035 if (act->ipa_type == IPSEC_ACT_APPLY) { 3036 const ipsec_prot_t *ipp = &act->ipa_apply; 3037 3038 if (ipp->ipp_use_ah) 3039 overhead += IPSEC_MAX_AH_HDR_SIZE; 3040 if (ipp->ipp_use_esp) { 3041 overhead += IPSEC_MAX_ESP_HDR_SIZE; 3042 overhead += sizeof (struct udphdr); 3043 } 3044 if (ipp->ipp_use_se) 3045 overhead += IP_SIMPLE_HDR_LENGTH; 3046 } 3047 return (overhead); 3048 } 3049 3050 /* 3051 * This hash function is used only when creating policies and thus is not 3052 * performance-critical for packet flows. 3053 * 3054 * Future work: canonicalize the structures hashed with this (i.e., 3055 * zeroize padding) so the hash works correctly. 3056 */ 3057 /* ARGSUSED */ 3058 static uint32_t 3059 policy_hash(int size, const void *start, const void *end) 3060 { 3061 return (0); 3062 } 3063 3064 3065 /* 3066 * Hash function macros for each address type. 3067 * 3068 * The IPV6 hash function assumes that the low order 32-bits of the 3069 * address (typically containing the low order 24 bits of the mac 3070 * address) are reasonably well-distributed. Revisit this if we run 3071 * into trouble from lots of collisions on ::1 addresses and the like 3072 * (seems unlikely). 3073 */ 3074 #define IPSEC_IPV4_HASH(a, n) ((a) % (n)) 3075 #define IPSEC_IPV6_HASH(a, n) (((a).s6_addr32[3]) % (n)) 3076 3077 /* 3078 * These two hash functions should produce coordinated values 3079 * but have slightly different roles. 3080 */ 3081 static uint32_t 3082 selkey_hash(const ipsec_selkey_t *selkey, netstack_t *ns) 3083 { 3084 uint32_t valid = selkey->ipsl_valid; 3085 ipsec_stack_t *ipss = ns->netstack_ipsec; 3086 3087 if (!(valid & IPSL_REMOTE_ADDR)) 3088 return (IPSEC_SEL_NOHASH); 3089 3090 if (valid & IPSL_IPV4) { 3091 if (selkey->ipsl_remote_pfxlen == 32) { 3092 return (IPSEC_IPV4_HASH(selkey->ipsl_remote.ipsad_v4, 3093 ipss->ipsec_spd_hashsize)); 3094 } 3095 } 3096 if (valid & IPSL_IPV6) { 3097 if (selkey->ipsl_remote_pfxlen == 128) { 3098 return (IPSEC_IPV6_HASH(selkey->ipsl_remote.ipsad_v6, 3099 ipss->ipsec_spd_hashsize)); 3100 } 3101 } 3102 return (IPSEC_SEL_NOHASH); 3103 } 3104 3105 static uint32_t 3106 selector_hash(ipsec_selector_t *sel, ipsec_policy_root_t *root) 3107 { 3108 if (sel->ips_isv4) { 3109 return (IPSEC_IPV4_HASH(sel->ips_remote_addr_v4, 3110 root->ipr_nchains)); 3111 } 3112 return (IPSEC_IPV6_HASH(sel->ips_remote_addr_v6, root->ipr_nchains)); 3113 } 3114 3115 /* 3116 * Intern actions into the action hash table. 3117 */ 3118 ipsec_action_t * 3119 ipsec_act_find(const ipsec_act_t *a, int n, netstack_t *ns) 3120 { 3121 int i; 3122 uint32_t hval; 3123 ipsec_action_t *ap; 3124 ipsec_action_t *prev = NULL; 3125 int32_t overhead, maxovhd = 0; 3126 boolean_t allow_clear = B_FALSE; 3127 boolean_t want_ah = B_FALSE; 3128 boolean_t want_esp = B_FALSE; 3129 boolean_t want_se = B_FALSE; 3130 boolean_t want_unique = B_FALSE; 3131 ipsec_stack_t *ipss = ns->netstack_ipsec; 3132 3133 /* 3134 * TODO: should canonicalize a[] (i.e., zeroize any padding) 3135 * so we can use a non-trivial policy_hash function. 3136 */ 3137 for (i = n-1; i >= 0; i--) { 3138 hval = policy_hash(IPSEC_ACTION_HASH_SIZE, &a[i], &a[n]); 3139 3140 HASH_LOCK(ipss->ipsec_action_hash, hval); 3141 3142 for (HASH_ITERATE(ap, ipa_hash, 3143 ipss->ipsec_action_hash, hval)) { 3144 if (bcmp(&ap->ipa_act, &a[i], sizeof (*a)) != 0) 3145 continue; 3146 if (ap->ipa_next != prev) 3147 continue; 3148 break; 3149 } 3150 if (ap != NULL) { 3151 HASH_UNLOCK(ipss->ipsec_action_hash, hval); 3152 prev = ap; 3153 continue; 3154 } 3155 /* 3156 * need to allocate a new one.. 3157 */ 3158 ap = kmem_cache_alloc(ipsec_action_cache, KM_NOSLEEP); 3159 if (ap == NULL) { 3160 HASH_UNLOCK(ipss->ipsec_action_hash, hval); 3161 if (prev != NULL) 3162 ipsec_action_free(prev); 3163 return (NULL); 3164 } 3165 HASH_INSERT(ap, ipa_hash, ipss->ipsec_action_hash, hval); 3166 3167 ap->ipa_next = prev; 3168 ap->ipa_act = a[i]; 3169 3170 overhead = ipsec_act_ovhd(&a[i]); 3171 if (maxovhd < overhead) 3172 maxovhd = overhead; 3173 3174 if ((a[i].ipa_type == IPSEC_ACT_BYPASS) || 3175 (a[i].ipa_type == IPSEC_ACT_CLEAR)) 3176 allow_clear = B_TRUE; 3177 if (a[i].ipa_type == IPSEC_ACT_APPLY) { 3178 const ipsec_prot_t *ipp = &a[i].ipa_apply; 3179 3180 ASSERT(ipp->ipp_use_ah || ipp->ipp_use_esp); 3181 want_ah |= ipp->ipp_use_ah; 3182 want_esp |= ipp->ipp_use_esp; 3183 want_se |= ipp->ipp_use_se; 3184 want_unique |= ipp->ipp_use_unique; 3185 } 3186 ap->ipa_allow_clear = allow_clear; 3187 ap->ipa_want_ah = want_ah; 3188 ap->ipa_want_esp = want_esp; 3189 ap->ipa_want_se = want_se; 3190 ap->ipa_want_unique = want_unique; 3191 ap->ipa_refs = 1; /* from the hash table */ 3192 ap->ipa_ovhd = maxovhd; 3193 if (prev) 3194 prev->ipa_refs++; 3195 prev = ap; 3196 HASH_UNLOCK(ipss->ipsec_action_hash, hval); 3197 } 3198 3199 ap->ipa_refs++; /* caller's reference */ 3200 3201 return (ap); 3202 } 3203 3204 /* 3205 * Called when refcount goes to 0, indicating that all references to this 3206 * node are gone. 3207 * 3208 * This does not unchain the action from the hash table. 3209 */ 3210 void 3211 ipsec_action_free(ipsec_action_t *ap) 3212 { 3213 for (;;) { 3214 ipsec_action_t *np = ap->ipa_next; 3215 ASSERT(ap->ipa_refs == 0); 3216 ASSERT(ap->ipa_hash.hash_pp == NULL); 3217 kmem_cache_free(ipsec_action_cache, ap); 3218 ap = np; 3219 /* Inlined IPACT_REFRELE -- avoid recursion */ 3220 if (ap == NULL) 3221 break; 3222 membar_exit(); 3223 if (atomic_add_32_nv(&(ap)->ipa_refs, -1) != 0) 3224 break; 3225 /* End inlined IPACT_REFRELE */ 3226 } 3227 } 3228 3229 /* 3230 * Called when the action hash table goes away. 3231 * 3232 * The actions can be queued on an mblk with ipsec_in or 3233 * ipsec_out, hence the actions might still be around. 3234 * But we decrement ipa_refs here since we no longer have 3235 * a reference to the action from the hash table. 3236 */ 3237 static void 3238 ipsec_action_free_table(ipsec_action_t *ap) 3239 { 3240 while (ap != NULL) { 3241 ipsec_action_t *np = ap->ipa_next; 3242 3243 /* FIXME: remove? */ 3244 (void) printf("ipsec_action_free_table(%p) ref %d\n", 3245 (void *)ap, ap->ipa_refs); 3246 ASSERT(ap->ipa_refs > 0); 3247 IPACT_REFRELE(ap); 3248 ap = np; 3249 } 3250 } 3251 3252 /* 3253 * Need to walk all stack instances since the reclaim function 3254 * is global for all instances 3255 */ 3256 /* ARGSUSED */ 3257 static void 3258 ipsec_action_reclaim(void *arg) 3259 { 3260 netstack_handle_t nh; 3261 netstack_t *ns; 3262 3263 netstack_next_init(&nh); 3264 while ((ns = netstack_next(&nh)) != NULL) { 3265 ipsec_action_reclaim_stack(ns); 3266 netstack_rele(ns); 3267 } 3268 netstack_next_fini(&nh); 3269 } 3270 3271 /* 3272 * Periodically sweep action hash table for actions with refcount==1, and 3273 * nuke them. We cannot do this "on demand" (i.e., from IPACT_REFRELE) 3274 * because we can't close the race between another thread finding the action 3275 * in the hash table without holding the bucket lock during IPACT_REFRELE. 3276 * Instead, we run this function sporadically to clean up after ourselves; 3277 * we also set it as the "reclaim" function for the action kmem_cache. 3278 * 3279 * Note that it may take several passes of ipsec_action_gc() to free all 3280 * "stale" actions. 3281 */ 3282 static void 3283 ipsec_action_reclaim_stack(netstack_t *ns) 3284 { 3285 int i; 3286 ipsec_stack_t *ipss = ns->netstack_ipsec; 3287 3288 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) { 3289 ipsec_action_t *ap, *np; 3290 3291 /* skip the lock if nobody home */ 3292 if (ipss->ipsec_action_hash[i].hash_head == NULL) 3293 continue; 3294 3295 HASH_LOCK(ipss->ipsec_action_hash, i); 3296 for (ap = ipss->ipsec_action_hash[i].hash_head; 3297 ap != NULL; ap = np) { 3298 ASSERT(ap->ipa_refs > 0); 3299 np = ap->ipa_hash.hash_next; 3300 if (ap->ipa_refs > 1) 3301 continue; 3302 HASH_UNCHAIN(ap, ipa_hash, 3303 ipss->ipsec_action_hash, i); 3304 IPACT_REFRELE(ap); 3305 } 3306 HASH_UNLOCK(ipss->ipsec_action_hash, i); 3307 } 3308 } 3309 3310 /* 3311 * Intern a selector set into the selector set hash table. 3312 * This is simpler than the actions case.. 3313 */ 3314 static ipsec_sel_t * 3315 ipsec_find_sel(ipsec_selkey_t *selkey, netstack_t *ns) 3316 { 3317 ipsec_sel_t *sp; 3318 uint32_t hval, bucket; 3319 ipsec_stack_t *ipss = ns->netstack_ipsec; 3320 3321 /* 3322 * Exactly one AF bit should be set in selkey. 3323 */ 3324 ASSERT(!(selkey->ipsl_valid & IPSL_IPV4) ^ 3325 !(selkey->ipsl_valid & IPSL_IPV6)); 3326 3327 hval = selkey_hash(selkey, ns); 3328 /* Set pol_hval to uninitialized until we put it in a polhead. */ 3329 selkey->ipsl_sel_hval = hval; 3330 3331 bucket = (hval == IPSEC_SEL_NOHASH) ? 0 : hval; 3332 3333 ASSERT(!HASH_LOCKED(ipss->ipsec_sel_hash, bucket)); 3334 HASH_LOCK(ipss->ipsec_sel_hash, bucket); 3335 3336 for (HASH_ITERATE(sp, ipsl_hash, ipss->ipsec_sel_hash, bucket)) { 3337 if (bcmp(&sp->ipsl_key, selkey, 3338 offsetof(ipsec_selkey_t, ipsl_pol_hval)) == 0) 3339 break; 3340 } 3341 if (sp != NULL) { 3342 sp->ipsl_refs++; 3343 3344 HASH_UNLOCK(ipss->ipsec_sel_hash, bucket); 3345 return (sp); 3346 } 3347 3348 sp = kmem_cache_alloc(ipsec_sel_cache, KM_NOSLEEP); 3349 if (sp == NULL) { 3350 HASH_UNLOCK(ipss->ipsec_sel_hash, bucket); 3351 return (NULL); 3352 } 3353 3354 HASH_INSERT(sp, ipsl_hash, ipss->ipsec_sel_hash, bucket); 3355 sp->ipsl_refs = 2; /* one for hash table, one for caller */ 3356 sp->ipsl_key = *selkey; 3357 /* Set to uninitalized and have insertion into polhead fix things. */ 3358 if (selkey->ipsl_sel_hval != IPSEC_SEL_NOHASH) 3359 sp->ipsl_key.ipsl_pol_hval = 0; 3360 else 3361 sp->ipsl_key.ipsl_pol_hval = IPSEC_SEL_NOHASH; 3362 3363 HASH_UNLOCK(ipss->ipsec_sel_hash, bucket); 3364 3365 return (sp); 3366 } 3367 3368 static void 3369 ipsec_sel_rel(ipsec_sel_t **spp, netstack_t *ns) 3370 { 3371 ipsec_sel_t *sp = *spp; 3372 int hval = sp->ipsl_key.ipsl_sel_hval; 3373 ipsec_stack_t *ipss = ns->netstack_ipsec; 3374 3375 *spp = NULL; 3376 3377 if (hval == IPSEC_SEL_NOHASH) 3378 hval = 0; 3379 3380 ASSERT(!HASH_LOCKED(ipss->ipsec_sel_hash, hval)); 3381 HASH_LOCK(ipss->ipsec_sel_hash, hval); 3382 if (--sp->ipsl_refs == 1) { 3383 HASH_UNCHAIN(sp, ipsl_hash, ipss->ipsec_sel_hash, hval); 3384 sp->ipsl_refs--; 3385 HASH_UNLOCK(ipss->ipsec_sel_hash, hval); 3386 ASSERT(sp->ipsl_refs == 0); 3387 kmem_cache_free(ipsec_sel_cache, sp); 3388 /* Caller unlocks */ 3389 return; 3390 } 3391 3392 HASH_UNLOCK(ipss->ipsec_sel_hash, hval); 3393 } 3394 3395 /* 3396 * Free a policy rule which we know is no longer being referenced. 3397 */ 3398 void 3399 ipsec_policy_free(ipsec_policy_t *ipp, netstack_t *ns) 3400 { 3401 ASSERT(ipp->ipsp_refs == 0); 3402 ASSERT(ipp->ipsp_sel != NULL); 3403 ASSERT(ipp->ipsp_act != NULL); 3404 3405 ipsec_sel_rel(&ipp->ipsp_sel, ns); 3406 IPACT_REFRELE(ipp->ipsp_act); 3407 kmem_cache_free(ipsec_pol_cache, ipp); 3408 } 3409 3410 /* 3411 * Construction of new policy rules; construct a policy, and add it to 3412 * the appropriate tables. 3413 */ 3414 ipsec_policy_t * 3415 ipsec_policy_create(ipsec_selkey_t *keys, const ipsec_act_t *a, 3416 int nacts, int prio, uint64_t *index_ptr, netstack_t *ns) 3417 { 3418 ipsec_action_t *ap; 3419 ipsec_sel_t *sp; 3420 ipsec_policy_t *ipp; 3421 ipsec_stack_t *ipss = ns->netstack_ipsec; 3422 3423 if (index_ptr == NULL) 3424 index_ptr = &ipss->ipsec_next_policy_index; 3425 3426 ipp = kmem_cache_alloc(ipsec_pol_cache, KM_NOSLEEP); 3427 ap = ipsec_act_find(a, nacts, ns); 3428 sp = ipsec_find_sel(keys, ns); 3429 3430 if ((ap == NULL) || (sp == NULL) || (ipp == NULL)) { 3431 if (ap != NULL) { 3432 IPACT_REFRELE(ap); 3433 } 3434 if (sp != NULL) 3435 ipsec_sel_rel(&sp, ns); 3436 if (ipp != NULL) 3437 kmem_cache_free(ipsec_pol_cache, ipp); 3438 return (NULL); 3439 } 3440 3441 HASH_NULL(ipp, ipsp_hash); 3442 3443 ipp->ipsp_refs = 1; /* caller's reference */ 3444 ipp->ipsp_sel = sp; 3445 ipp->ipsp_act = ap; 3446 ipp->ipsp_prio = prio; /* rule priority */ 3447 ipp->ipsp_index = *index_ptr; 3448 (*index_ptr)++; 3449 3450 return (ipp); 3451 } 3452 3453 static void 3454 ipsec_update_present_flags(ipsec_stack_t *ipss) 3455 { 3456 boolean_t hashpol; 3457 3458 hashpol = (avl_numnodes(&ipss->ipsec_system_policy.iph_rulebyid) > 0); 3459 3460 if (hashpol) { 3461 ipss->ipsec_outbound_v4_policy_present = B_TRUE; 3462 ipss->ipsec_outbound_v6_policy_present = B_TRUE; 3463 ipss->ipsec_inbound_v4_policy_present = B_TRUE; 3464 ipss->ipsec_inbound_v6_policy_present = B_TRUE; 3465 return; 3466 } 3467 3468 ipss->ipsec_outbound_v4_policy_present = (NULL != 3469 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_OUTBOUND]. 3470 ipr_nonhash[IPSEC_AF_V4]); 3471 ipss->ipsec_outbound_v6_policy_present = (NULL != 3472 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_OUTBOUND]. 3473 ipr_nonhash[IPSEC_AF_V6]); 3474 ipss->ipsec_inbound_v4_policy_present = (NULL != 3475 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_INBOUND]. 3476 ipr_nonhash[IPSEC_AF_V4]); 3477 ipss->ipsec_inbound_v6_policy_present = (NULL != 3478 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_INBOUND]. 3479 ipr_nonhash[IPSEC_AF_V6]); 3480 } 3481 3482 boolean_t 3483 ipsec_policy_delete(ipsec_policy_head_t *php, ipsec_selkey_t *keys, int dir, 3484 netstack_t *ns) 3485 { 3486 ipsec_sel_t *sp; 3487 ipsec_policy_t *ip, *nip, *head; 3488 int af; 3489 ipsec_policy_root_t *pr = &php->iph_root[dir]; 3490 3491 sp = ipsec_find_sel(keys, ns); 3492 3493 if (sp == NULL) 3494 return (B_FALSE); 3495 3496 af = (sp->ipsl_key.ipsl_valid & IPSL_IPV4) ? IPSEC_AF_V4 : IPSEC_AF_V6; 3497 3498 rw_enter(&php->iph_lock, RW_WRITER); 3499 3500 if (sp->ipsl_key.ipsl_pol_hval == IPSEC_SEL_NOHASH) { 3501 head = pr->ipr_nonhash[af]; 3502 } else { 3503 head = pr->ipr_hash[sp->ipsl_key.ipsl_pol_hval].hash_head; 3504 } 3505 3506 for (ip = head; ip != NULL; ip = nip) { 3507 nip = ip->ipsp_hash.hash_next; 3508 if (ip->ipsp_sel != sp) { 3509 continue; 3510 } 3511 3512 IPPOL_UNCHAIN(php, ip, ns); 3513 3514 php->iph_gen++; 3515 ipsec_update_present_flags(ns->netstack_ipsec); 3516 3517 rw_exit(&php->iph_lock); 3518 3519 ipsec_sel_rel(&sp, ns); 3520 3521 return (B_TRUE); 3522 } 3523 3524 rw_exit(&php->iph_lock); 3525 ipsec_sel_rel(&sp, ns); 3526 return (B_FALSE); 3527 } 3528 3529 int 3530 ipsec_policy_delete_index(ipsec_policy_head_t *php, uint64_t policy_index, 3531 netstack_t *ns) 3532 { 3533 boolean_t found = B_FALSE; 3534 ipsec_policy_t ipkey; 3535 ipsec_policy_t *ip; 3536 avl_index_t where; 3537 3538 (void) memset(&ipkey, 0, sizeof (ipkey)); 3539 ipkey.ipsp_index = policy_index; 3540 3541 rw_enter(&php->iph_lock, RW_WRITER); 3542 3543 /* 3544 * We could be cleverer here about the walk. 3545 * but well, (k+1)*log(N) will do for now (k==number of matches, 3546 * N==number of table entries 3547 */ 3548 for (;;) { 3549 ip = (ipsec_policy_t *)avl_find(&php->iph_rulebyid, 3550 (void *)&ipkey, &where); 3551 ASSERT(ip == NULL); 3552 3553 ip = avl_nearest(&php->iph_rulebyid, where, AVL_AFTER); 3554 3555 if (ip == NULL) 3556 break; 3557 3558 if (ip->ipsp_index != policy_index) { 3559 ASSERT(ip->ipsp_index > policy_index); 3560 break; 3561 } 3562 3563 IPPOL_UNCHAIN(php, ip, ns); 3564 found = B_TRUE; 3565 } 3566 3567 if (found) { 3568 php->iph_gen++; 3569 ipsec_update_present_flags(ns->netstack_ipsec); 3570 } 3571 3572 rw_exit(&php->iph_lock); 3573 3574 return (found ? 0 : ENOENT); 3575 } 3576 3577 /* 3578 * Given a constructed ipsec_policy_t policy rule, see if it can be entered 3579 * into the correct policy ruleset. As a side-effect, it sets the hash 3580 * entries on "ipp"'s ipsp_pol_hval. 3581 * 3582 * Returns B_TRUE if it can be entered, B_FALSE if it can't be (because a 3583 * duplicate policy exists with exactly the same selectors), or an icmp 3584 * rule exists with a different encryption/authentication action. 3585 */ 3586 boolean_t 3587 ipsec_check_policy(ipsec_policy_head_t *php, ipsec_policy_t *ipp, int direction) 3588 { 3589 ipsec_policy_root_t *pr = &php->iph_root[direction]; 3590 int af = -1; 3591 ipsec_policy_t *p2, *head; 3592 uint8_t check_proto; 3593 ipsec_selkey_t *selkey = &ipp->ipsp_sel->ipsl_key; 3594 uint32_t valid = selkey->ipsl_valid; 3595 3596 if (valid & IPSL_IPV6) { 3597 ASSERT(!(valid & IPSL_IPV4)); 3598 af = IPSEC_AF_V6; 3599 check_proto = IPPROTO_ICMPV6; 3600 } else { 3601 ASSERT(valid & IPSL_IPV4); 3602 af = IPSEC_AF_V4; 3603 check_proto = IPPROTO_ICMP; 3604 } 3605 3606 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3607 3608 /* 3609 * Double-check that we don't have any duplicate selectors here. 3610 * Because selectors are interned below, we need only compare pointers 3611 * for equality. 3612 */ 3613 if (selkey->ipsl_sel_hval == IPSEC_SEL_NOHASH) { 3614 head = pr->ipr_nonhash[af]; 3615 } else { 3616 selkey->ipsl_pol_hval = 3617 (selkey->ipsl_valid & IPSL_IPV4) ? 3618 IPSEC_IPV4_HASH(selkey->ipsl_remote.ipsad_v4, 3619 pr->ipr_nchains) : 3620 IPSEC_IPV6_HASH(selkey->ipsl_remote.ipsad_v6, 3621 pr->ipr_nchains); 3622 3623 head = pr->ipr_hash[selkey->ipsl_pol_hval].hash_head; 3624 } 3625 3626 for (p2 = head; p2 != NULL; p2 = p2->ipsp_hash.hash_next) { 3627 if (p2->ipsp_sel == ipp->ipsp_sel) 3628 return (B_FALSE); 3629 } 3630 3631 /* 3632 * If it's ICMP and not a drop or pass rule, run through the ICMP 3633 * rules and make sure the action is either new or the same as any 3634 * other actions. We don't have to check the full chain because 3635 * discard and bypass will override all other actions 3636 */ 3637 3638 if (valid & IPSL_PROTOCOL && 3639 selkey->ipsl_proto == check_proto && 3640 (ipp->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_APPLY)) { 3641 3642 for (p2 = head; p2 != NULL; p2 = p2->ipsp_hash.hash_next) { 3643 3644 if (p2->ipsp_sel->ipsl_key.ipsl_valid & IPSL_PROTOCOL && 3645 p2->ipsp_sel->ipsl_key.ipsl_proto == check_proto && 3646 (p2->ipsp_act->ipa_act.ipa_type == 3647 IPSEC_ACT_APPLY)) { 3648 return (ipsec_compare_action(p2, ipp)); 3649 } 3650 } 3651 } 3652 3653 return (B_TRUE); 3654 } 3655 3656 /* 3657 * compare the action chains of two policies for equality 3658 * B_TRUE -> effective equality 3659 */ 3660 3661 static boolean_t 3662 ipsec_compare_action(ipsec_policy_t *p1, ipsec_policy_t *p2) 3663 { 3664 3665 ipsec_action_t *act1, *act2; 3666 3667 /* We have a valid rule. Let's compare the actions */ 3668 if (p1->ipsp_act == p2->ipsp_act) { 3669 /* same action. We are good */ 3670 return (B_TRUE); 3671 } 3672 3673 /* we have to walk the chain */ 3674 3675 act1 = p1->ipsp_act; 3676 act2 = p2->ipsp_act; 3677 3678 while (act1 != NULL && act2 != NULL) { 3679 3680 /* otherwise, Are we close enough? */ 3681 if (act1->ipa_allow_clear != act2->ipa_allow_clear || 3682 act1->ipa_want_ah != act2->ipa_want_ah || 3683 act1->ipa_want_esp != act2->ipa_want_esp || 3684 act1->ipa_want_se != act2->ipa_want_se) { 3685 /* Nope, we aren't */ 3686 return (B_FALSE); 3687 } 3688 3689 if (act1->ipa_want_ah) { 3690 if (act1->ipa_act.ipa_apply.ipp_auth_alg != 3691 act2->ipa_act.ipa_apply.ipp_auth_alg) { 3692 return (B_FALSE); 3693 } 3694 3695 if (act1->ipa_act.ipa_apply.ipp_ah_minbits != 3696 act2->ipa_act.ipa_apply.ipp_ah_minbits || 3697 act1->ipa_act.ipa_apply.ipp_ah_maxbits != 3698 act2->ipa_act.ipa_apply.ipp_ah_maxbits) { 3699 return (B_FALSE); 3700 } 3701 } 3702 3703 if (act1->ipa_want_esp) { 3704 if (act1->ipa_act.ipa_apply.ipp_use_esp != 3705 act2->ipa_act.ipa_apply.ipp_use_esp || 3706 act1->ipa_act.ipa_apply.ipp_use_espa != 3707 act2->ipa_act.ipa_apply.ipp_use_espa) { 3708 return (B_FALSE); 3709 } 3710 3711 if (act1->ipa_act.ipa_apply.ipp_use_esp) { 3712 if (act1->ipa_act.ipa_apply.ipp_encr_alg != 3713 act2->ipa_act.ipa_apply.ipp_encr_alg) { 3714 return (B_FALSE); 3715 } 3716 3717 if (act1->ipa_act.ipa_apply.ipp_espe_minbits != 3718 act2->ipa_act.ipa_apply.ipp_espe_minbits || 3719 act1->ipa_act.ipa_apply.ipp_espe_maxbits != 3720 act2->ipa_act.ipa_apply.ipp_espe_maxbits) { 3721 return (B_FALSE); 3722 } 3723 } 3724 3725 if (act1->ipa_act.ipa_apply.ipp_use_espa) { 3726 if (act1->ipa_act.ipa_apply.ipp_esp_auth_alg != 3727 act2->ipa_act.ipa_apply.ipp_esp_auth_alg) { 3728 return (B_FALSE); 3729 } 3730 3731 if (act1->ipa_act.ipa_apply.ipp_espa_minbits != 3732 act2->ipa_act.ipa_apply.ipp_espa_minbits || 3733 act1->ipa_act.ipa_apply.ipp_espa_maxbits != 3734 act2->ipa_act.ipa_apply.ipp_espa_maxbits) { 3735 return (B_FALSE); 3736 } 3737 } 3738 3739 } 3740 3741 act1 = act1->ipa_next; 3742 act2 = act2->ipa_next; 3743 } 3744 3745 if (act1 != NULL || act2 != NULL) { 3746 return (B_FALSE); 3747 } 3748 3749 return (B_TRUE); 3750 } 3751 3752 3753 /* 3754 * Given a constructed ipsec_policy_t policy rule, enter it into 3755 * the correct policy ruleset. 3756 * 3757 * ipsec_check_policy() is assumed to have succeeded first (to check for 3758 * duplicates). 3759 */ 3760 void 3761 ipsec_enter_policy(ipsec_policy_head_t *php, ipsec_policy_t *ipp, int direction, 3762 netstack_t *ns) 3763 { 3764 ipsec_policy_root_t *pr = &php->iph_root[direction]; 3765 ipsec_selkey_t *selkey = &ipp->ipsp_sel->ipsl_key; 3766 uint32_t valid = selkey->ipsl_valid; 3767 uint32_t hval = selkey->ipsl_pol_hval; 3768 int af = -1; 3769 3770 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3771 3772 if (valid & IPSL_IPV6) { 3773 ASSERT(!(valid & IPSL_IPV4)); 3774 af = IPSEC_AF_V6; 3775 } else { 3776 ASSERT(valid & IPSL_IPV4); 3777 af = IPSEC_AF_V4; 3778 } 3779 3780 php->iph_gen++; 3781 3782 if (hval == IPSEC_SEL_NOHASH) { 3783 HASHLIST_INSERT(ipp, ipsp_hash, pr->ipr_nonhash[af]); 3784 } else { 3785 HASH_LOCK(pr->ipr_hash, hval); 3786 HASH_INSERT(ipp, ipsp_hash, pr->ipr_hash, hval); 3787 HASH_UNLOCK(pr->ipr_hash, hval); 3788 } 3789 3790 ipsec_insert_always(&php->iph_rulebyid, ipp); 3791 3792 ipsec_update_present_flags(ns->netstack_ipsec); 3793 } 3794 3795 static void 3796 ipsec_ipr_flush(ipsec_policy_head_t *php, ipsec_policy_root_t *ipr, 3797 netstack_t *ns) 3798 { 3799 ipsec_policy_t *ip, *nip; 3800 int af, chain, nchain; 3801 3802 for (af = 0; af < IPSEC_NAF; af++) { 3803 for (ip = ipr->ipr_nonhash[af]; ip != NULL; ip = nip) { 3804 nip = ip->ipsp_hash.hash_next; 3805 IPPOL_UNCHAIN(php, ip, ns); 3806 } 3807 ipr->ipr_nonhash[af] = NULL; 3808 } 3809 nchain = ipr->ipr_nchains; 3810 3811 for (chain = 0; chain < nchain; chain++) { 3812 for (ip = ipr->ipr_hash[chain].hash_head; ip != NULL; 3813 ip = nip) { 3814 nip = ip->ipsp_hash.hash_next; 3815 IPPOL_UNCHAIN(php, ip, ns); 3816 } 3817 ipr->ipr_hash[chain].hash_head = NULL; 3818 } 3819 } 3820 3821 void 3822 ipsec_polhead_flush(ipsec_policy_head_t *php, netstack_t *ns) 3823 { 3824 int dir; 3825 3826 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3827 3828 for (dir = 0; dir < IPSEC_NTYPES; dir++) 3829 ipsec_ipr_flush(php, &php->iph_root[dir], ns); 3830 3831 ipsec_update_present_flags(ns->netstack_ipsec); 3832 } 3833 3834 void 3835 ipsec_polhead_free(ipsec_policy_head_t *php, netstack_t *ns) 3836 { 3837 int dir; 3838 3839 ASSERT(php->iph_refs == 0); 3840 3841 rw_enter(&php->iph_lock, RW_WRITER); 3842 ipsec_polhead_flush(php, ns); 3843 rw_exit(&php->iph_lock); 3844 rw_destroy(&php->iph_lock); 3845 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 3846 ipsec_policy_root_t *ipr = &php->iph_root[dir]; 3847 int chain; 3848 3849 for (chain = 0; chain < ipr->ipr_nchains; chain++) 3850 mutex_destroy(&(ipr->ipr_hash[chain].hash_lock)); 3851 3852 } 3853 ipsec_polhead_free_table(php); 3854 kmem_free(php, sizeof (*php)); 3855 } 3856 3857 static void 3858 ipsec_ipr_init(ipsec_policy_root_t *ipr) 3859 { 3860 int af; 3861 3862 ipr->ipr_nchains = 0; 3863 ipr->ipr_hash = NULL; 3864 3865 for (af = 0; af < IPSEC_NAF; af++) { 3866 ipr->ipr_nonhash[af] = NULL; 3867 } 3868 } 3869 3870 ipsec_policy_head_t * 3871 ipsec_polhead_create(void) 3872 { 3873 ipsec_policy_head_t *php; 3874 3875 php = kmem_alloc(sizeof (*php), KM_NOSLEEP); 3876 if (php == NULL) 3877 return (php); 3878 3879 rw_init(&php->iph_lock, NULL, RW_DEFAULT, NULL); 3880 php->iph_refs = 1; 3881 php->iph_gen = 0; 3882 3883 ipsec_ipr_init(&php->iph_root[IPSEC_TYPE_INBOUND]); 3884 ipsec_ipr_init(&php->iph_root[IPSEC_TYPE_OUTBOUND]); 3885 3886 avl_create(&php->iph_rulebyid, ipsec_policy_cmpbyid, 3887 sizeof (ipsec_policy_t), offsetof(ipsec_policy_t, ipsp_byid)); 3888 3889 return (php); 3890 } 3891 3892 /* 3893 * Clone the policy head into a new polhead; release one reference to the 3894 * old one and return the only reference to the new one. 3895 * If the old one had a refcount of 1, just return it. 3896 */ 3897 ipsec_policy_head_t * 3898 ipsec_polhead_split(ipsec_policy_head_t *php, netstack_t *ns) 3899 { 3900 ipsec_policy_head_t *nphp; 3901 3902 if (php == NULL) 3903 return (ipsec_polhead_create()); 3904 else if (php->iph_refs == 1) 3905 return (php); 3906 3907 nphp = ipsec_polhead_create(); 3908 if (nphp == NULL) 3909 return (NULL); 3910 3911 if (ipsec_copy_polhead(php, nphp, ns) != 0) { 3912 ipsec_polhead_free(nphp, ns); 3913 return (NULL); 3914 } 3915 IPPH_REFRELE(php, ns); 3916 return (nphp); 3917 } 3918 3919 /* 3920 * When sending a response to a ICMP request or generating a RST 3921 * in the TCP case, the outbound packets need to go at the same level 3922 * of protection as the incoming ones i.e we associate our outbound 3923 * policy with how the packet came in. We call this after we have 3924 * accepted the incoming packet which may or may not have been in 3925 * clear and hence we are sending the reply back with the policy 3926 * matching the incoming datagram's policy. 3927 * 3928 * NOTE : This technology serves two purposes : 3929 * 3930 * 1) If we have multiple outbound policies, we send out a reply 3931 * matching with how it came in rather than matching the outbound 3932 * policy. 3933 * 3934 * 2) For assymetric policies, we want to make sure that incoming 3935 * and outgoing has the same level of protection. Assymetric 3936 * policies exist only with global policy where we may not have 3937 * both outbound and inbound at the same time. 3938 * 3939 * NOTE2: This function is called by cleartext cases, so it needs to be 3940 * in IP proper. 3941 */ 3942 boolean_t 3943 ipsec_in_to_out(mblk_t *ipsec_mp, ipha_t *ipha, ip6_t *ip6h) 3944 { 3945 ipsec_in_t *ii; 3946 ipsec_out_t *io; 3947 boolean_t v4; 3948 mblk_t *mp; 3949 boolean_t secure, attach_if; 3950 uint_t ifindex; 3951 ipsec_selector_t sel; 3952 ipsec_action_t *reflect_action = NULL; 3953 zoneid_t zoneid; 3954 netstack_t *ns; 3955 3956 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 3957 3958 bzero((void*)&sel, sizeof (sel)); 3959 3960 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 3961 3962 mp = ipsec_mp->b_cont; 3963 ASSERT(mp != NULL); 3964 3965 if (ii->ipsec_in_action != NULL) { 3966 /* transfer reference.. */ 3967 reflect_action = ii->ipsec_in_action; 3968 ii->ipsec_in_action = NULL; 3969 } else if (!ii->ipsec_in_loopback) 3970 reflect_action = ipsec_in_to_out_action(ii); 3971 secure = ii->ipsec_in_secure; 3972 attach_if = ii->ipsec_in_attach_if; 3973 ifindex = ii->ipsec_in_ill_index; 3974 zoneid = ii->ipsec_in_zoneid; 3975 ASSERT(zoneid != ALL_ZONES); 3976 ns = ii->ipsec_in_ns; 3977 v4 = ii->ipsec_in_v4; 3978 3979 ipsec_in_release_refs(ii); /* No netstack_rele/hold needed */ 3980 3981 /* 3982 * The caller is going to send the datagram out which might 3983 * go on the wire or delivered locally through ip_wput_local. 3984 * 3985 * 1) If it goes out on the wire, new associations will be 3986 * obtained. 3987 * 2) If it is delivered locally, ip_wput_local will convert 3988 * this IPSEC_OUT to a IPSEC_IN looking at the requests. 3989 */ 3990 3991 io = (ipsec_out_t *)ipsec_mp->b_rptr; 3992 bzero(io, sizeof (ipsec_out_t)); 3993 io->ipsec_out_type = IPSEC_OUT; 3994 io->ipsec_out_len = sizeof (ipsec_out_t); 3995 io->ipsec_out_frtn.free_func = ipsec_out_free; 3996 io->ipsec_out_frtn.free_arg = (char *)io; 3997 io->ipsec_out_act = reflect_action; 3998 3999 if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0, 4000 ns->netstack_ipsec)) 4001 return (B_FALSE); 4002 4003 io->ipsec_out_src_port = sel.ips_local_port; 4004 io->ipsec_out_dst_port = sel.ips_remote_port; 4005 io->ipsec_out_proto = sel.ips_protocol; 4006 io->ipsec_out_icmp_type = sel.ips_icmp_type; 4007 io->ipsec_out_icmp_code = sel.ips_icmp_code; 4008 4009 /* 4010 * Don't use global policy for this, as we want 4011 * to use the same protection that was applied to the inbound packet. 4012 */ 4013 io->ipsec_out_use_global_policy = B_FALSE; 4014 io->ipsec_out_proc_begin = B_FALSE; 4015 io->ipsec_out_secure = secure; 4016 io->ipsec_out_v4 = v4; 4017 io->ipsec_out_attach_if = attach_if; 4018 io->ipsec_out_ill_index = ifindex; 4019 io->ipsec_out_zoneid = zoneid; 4020 io->ipsec_out_ns = ns; /* No netstack_hold */ 4021 4022 return (B_TRUE); 4023 } 4024 4025 mblk_t * 4026 ipsec_in_tag(mblk_t *mp, mblk_t *cont, netstack_t *ns) 4027 { 4028 ipsec_in_t *ii = (ipsec_in_t *)mp->b_rptr; 4029 ipsec_in_t *nii; 4030 mblk_t *nmp; 4031 frtn_t nfrtn; 4032 ipsec_stack_t *ipss = ns->netstack_ipsec; 4033 4034 ASSERT(ii->ipsec_in_type == IPSEC_IN); 4035 ASSERT(ii->ipsec_in_len == sizeof (ipsec_in_t)); 4036 4037 nmp = ipsec_in_alloc(ii->ipsec_in_v4, ns); 4038 if (nmp == NULL) { 4039 ip_drop_packet_chain(cont, B_FALSE, NULL, NULL, 4040 DROPPER(ipss, ipds_spd_nomem), 4041 &ipss->ipsec_spd_dropper); 4042 return (NULL); 4043 } 4044 4045 ASSERT(nmp->b_datap->db_type == M_CTL); 4046 ASSERT(nmp->b_wptr == (nmp->b_rptr + sizeof (ipsec_info_t))); 4047 4048 /* 4049 * Bump refcounts. 4050 */ 4051 if (ii->ipsec_in_ah_sa != NULL) 4052 IPSA_REFHOLD(ii->ipsec_in_ah_sa); 4053 if (ii->ipsec_in_esp_sa != NULL) 4054 IPSA_REFHOLD(ii->ipsec_in_esp_sa); 4055 if (ii->ipsec_in_policy != NULL) 4056 IPPH_REFHOLD(ii->ipsec_in_policy); 4057 4058 /* 4059 * Copy everything, but preserve the free routine provided by 4060 * ipsec_in_alloc(). 4061 */ 4062 nii = (ipsec_in_t *)nmp->b_rptr; 4063 nfrtn = nii->ipsec_in_frtn; 4064 bcopy(ii, nii, sizeof (*ii)); 4065 nii->ipsec_in_frtn = nfrtn; 4066 4067 nmp->b_cont = cont; 4068 4069 return (nmp); 4070 } 4071 4072 mblk_t * 4073 ipsec_out_tag(mblk_t *mp, mblk_t *cont, netstack_t *ns) 4074 { 4075 ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; 4076 ipsec_out_t *nio; 4077 mblk_t *nmp; 4078 frtn_t nfrtn; 4079 ipsec_stack_t *ipss = ns->netstack_ipsec; 4080 4081 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4082 ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t)); 4083 4084 nmp = ipsec_alloc_ipsec_out(ns); 4085 if (nmp == NULL) { 4086 ip_drop_packet_chain(cont, B_FALSE, NULL, NULL, 4087 DROPPER(ipss, ipds_spd_nomem), 4088 &ipss->ipsec_spd_dropper); 4089 return (NULL); 4090 } 4091 ASSERT(nmp->b_datap->db_type == M_CTL); 4092 ASSERT(nmp->b_wptr == (nmp->b_rptr + sizeof (ipsec_info_t))); 4093 4094 /* 4095 * Bump refcounts. 4096 */ 4097 if (io->ipsec_out_ah_sa != NULL) 4098 IPSA_REFHOLD(io->ipsec_out_ah_sa); 4099 if (io->ipsec_out_esp_sa != NULL) 4100 IPSA_REFHOLD(io->ipsec_out_esp_sa); 4101 if (io->ipsec_out_polhead != NULL) 4102 IPPH_REFHOLD(io->ipsec_out_polhead); 4103 if (io->ipsec_out_policy != NULL) 4104 IPPOL_REFHOLD(io->ipsec_out_policy); 4105 if (io->ipsec_out_act != NULL) 4106 IPACT_REFHOLD(io->ipsec_out_act); 4107 if (io->ipsec_out_latch != NULL) 4108 IPLATCH_REFHOLD(io->ipsec_out_latch); 4109 if (io->ipsec_out_cred != NULL) 4110 crhold(io->ipsec_out_cred); 4111 4112 /* 4113 * Copy everything, but preserve the free routine provided by 4114 * ipsec_alloc_ipsec_out(). 4115 */ 4116 nio = (ipsec_out_t *)nmp->b_rptr; 4117 nfrtn = nio->ipsec_out_frtn; 4118 bcopy(io, nio, sizeof (*io)); 4119 nio->ipsec_out_frtn = nfrtn; 4120 4121 nmp->b_cont = cont; 4122 4123 return (nmp); 4124 } 4125 4126 static void 4127 ipsec_out_release_refs(ipsec_out_t *io) 4128 { 4129 netstack_t *ns = io->ipsec_out_ns; 4130 4131 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4132 ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t)); 4133 ASSERT(io->ipsec_out_ns != NULL); 4134 4135 /* Note: IPSA_REFRELE is multi-line macro */ 4136 if (io->ipsec_out_ah_sa != NULL) 4137 IPSA_REFRELE(io->ipsec_out_ah_sa); 4138 if (io->ipsec_out_esp_sa != NULL) 4139 IPSA_REFRELE(io->ipsec_out_esp_sa); 4140 if (io->ipsec_out_polhead != NULL) 4141 IPPH_REFRELE(io->ipsec_out_polhead, ns); 4142 if (io->ipsec_out_policy != NULL) 4143 IPPOL_REFRELE(io->ipsec_out_policy, ns); 4144 if (io->ipsec_out_act != NULL) 4145 IPACT_REFRELE(io->ipsec_out_act); 4146 if (io->ipsec_out_cred != NULL) { 4147 crfree(io->ipsec_out_cred); 4148 io->ipsec_out_cred = NULL; 4149 } 4150 if (io->ipsec_out_latch) { 4151 IPLATCH_REFRELE(io->ipsec_out_latch, ns); 4152 io->ipsec_out_latch = NULL; 4153 } 4154 } 4155 4156 static void 4157 ipsec_out_free(void *arg) 4158 { 4159 ipsec_out_t *io = (ipsec_out_t *)arg; 4160 ipsec_out_release_refs(io); 4161 kmem_cache_free(ipsec_info_cache, arg); 4162 } 4163 4164 static void 4165 ipsec_in_release_refs(ipsec_in_t *ii) 4166 { 4167 netstack_t *ns = ii->ipsec_in_ns; 4168 4169 ASSERT(ii->ipsec_in_ns != NULL); 4170 4171 /* Note: IPSA_REFRELE is multi-line macro */ 4172 if (ii->ipsec_in_ah_sa != NULL) 4173 IPSA_REFRELE(ii->ipsec_in_ah_sa); 4174 if (ii->ipsec_in_esp_sa != NULL) 4175 IPSA_REFRELE(ii->ipsec_in_esp_sa); 4176 if (ii->ipsec_in_policy != NULL) 4177 IPPH_REFRELE(ii->ipsec_in_policy, ns); 4178 if (ii->ipsec_in_da != NULL) { 4179 freeb(ii->ipsec_in_da); 4180 ii->ipsec_in_da = NULL; 4181 } 4182 } 4183 4184 static void 4185 ipsec_in_free(void *arg) 4186 { 4187 ipsec_in_t *ii = (ipsec_in_t *)arg; 4188 ipsec_in_release_refs(ii); 4189 kmem_cache_free(ipsec_info_cache, arg); 4190 } 4191 4192 /* 4193 * This is called only for outbound datagrams if the datagram needs to 4194 * go out secure. A NULL mp can be passed to get an ipsec_out. This 4195 * facility is used by ip_unbind. 4196 * 4197 * NOTE : o As the data part could be modified by ipsec_out_process etc. 4198 * we can't make it fast by calling a dup. 4199 */ 4200 mblk_t * 4201 ipsec_alloc_ipsec_out(netstack_t *ns) 4202 { 4203 mblk_t *ipsec_mp; 4204 ipsec_out_t *io = kmem_cache_alloc(ipsec_info_cache, KM_NOSLEEP); 4205 4206 if (io == NULL) 4207 return (NULL); 4208 4209 bzero(io, sizeof (ipsec_out_t)); 4210 4211 io->ipsec_out_type = IPSEC_OUT; 4212 io->ipsec_out_len = sizeof (ipsec_out_t); 4213 io->ipsec_out_frtn.free_func = ipsec_out_free; 4214 io->ipsec_out_frtn.free_arg = (char *)io; 4215 4216 /* 4217 * Set the zoneid to ALL_ZONES which is used as an invalid value. Code 4218 * using ipsec_out_zoneid should assert that the zoneid has been set to 4219 * a sane value. 4220 */ 4221 io->ipsec_out_zoneid = ALL_ZONES; 4222 io->ipsec_out_ns = ns; /* No netstack_hold */ 4223 4224 ipsec_mp = desballoc((uint8_t *)io, sizeof (ipsec_info_t), BPRI_HI, 4225 &io->ipsec_out_frtn); 4226 if (ipsec_mp == NULL) { 4227 ipsec_out_free(io); 4228 4229 return (NULL); 4230 } 4231 ipsec_mp->b_datap->db_type = M_CTL; 4232 ipsec_mp->b_wptr = ipsec_mp->b_rptr + sizeof (ipsec_info_t); 4233 4234 return (ipsec_mp); 4235 } 4236 4237 /* 4238 * Attach an IPSEC_OUT; use pol for policy if it is non-null. 4239 * Otherwise initialize using conn. 4240 * 4241 * If pol is non-null, we consume a reference to it. 4242 */ 4243 mblk_t * 4244 ipsec_attach_ipsec_out(mblk_t **mp, conn_t *connp, ipsec_policy_t *pol, 4245 uint8_t proto, netstack_t *ns) 4246 { 4247 mblk_t *ipsec_mp; 4248 ipsec_stack_t *ipss = ns->netstack_ipsec; 4249 4250 ASSERT((pol != NULL) || (connp != NULL)); 4251 4252 ipsec_mp = ipsec_alloc_ipsec_out(ns); 4253 if (ipsec_mp == NULL) { 4254 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, SL_ERROR|SL_NOTE, 4255 "ipsec_attach_ipsec_out: Allocation failure\n"); 4256 ip_drop_packet(*mp, B_FALSE, NULL, NULL, 4257 DROPPER(ipss, ipds_spd_nomem), 4258 &ipss->ipsec_spd_dropper); 4259 *mp = NULL; 4260 return (NULL); 4261 } 4262 ipsec_mp->b_cont = *mp; 4263 /* 4264 * If *mp is NULL, ipsec_init_ipsec_out() won't/should not be using it. 4265 */ 4266 return (ipsec_init_ipsec_out(ipsec_mp, mp, connp, pol, proto, ns)); 4267 } 4268 4269 /* 4270 * Initialize the IPSEC_OUT (ipsec_mp) using pol if it is non-null. 4271 * Otherwise initialize using conn. 4272 * 4273 * If pol is non-null, we consume a reference to it. 4274 */ 4275 mblk_t * 4276 ipsec_init_ipsec_out(mblk_t *ipsec_mp, mblk_t **mp, conn_t *connp, 4277 ipsec_policy_t *pol, uint8_t proto, netstack_t *ns) 4278 { 4279 ipsec_out_t *io; 4280 ipsec_policy_t *p; 4281 ipha_t *ipha; 4282 ip6_t *ip6h; 4283 ipsec_stack_t *ipss = ns->netstack_ipsec; 4284 4285 ASSERT(ipsec_mp->b_cont == *mp); 4286 4287 ASSERT((pol != NULL) || (connp != NULL)); 4288 4289 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 4290 ASSERT(ipsec_mp->b_wptr == (ipsec_mp->b_rptr + sizeof (ipsec_info_t))); 4291 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4292 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4293 ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t)); 4294 io->ipsec_out_latch = NULL; 4295 /* 4296 * Set the zoneid when we have the connp. 4297 * Otherwise, we're called from ip_wput_attach_policy() who will take 4298 * care of setting the zoneid. 4299 */ 4300 if (connp != NULL) 4301 io->ipsec_out_zoneid = connp->conn_zoneid; 4302 4303 io->ipsec_out_ns = ns; /* No netstack_hold */ 4304 4305 if (*mp != NULL) { 4306 ipha = (ipha_t *)(*mp)->b_rptr; 4307 if (IPH_HDR_VERSION(ipha) == IP_VERSION) { 4308 io->ipsec_out_v4 = B_TRUE; 4309 ip6h = NULL; 4310 } else { 4311 io->ipsec_out_v4 = B_FALSE; 4312 ip6h = (ip6_t *)ipha; 4313 ipha = NULL; 4314 } 4315 } else { 4316 ASSERT(connp != NULL && connp->conn_policy_cached); 4317 ip6h = NULL; 4318 ipha = NULL; 4319 io->ipsec_out_v4 = !connp->conn_pkt_isv6; 4320 } 4321 4322 p = NULL; 4323 4324 /* 4325 * Take latched policies over global policy. Check here again for 4326 * this, in case we had conn_latch set while the packet was flying 4327 * around in IP. 4328 */ 4329 if (connp != NULL && connp->conn_latch != NULL) { 4330 ASSERT(ns == connp->conn_netstack); 4331 p = connp->conn_latch->ipl_out_policy; 4332 io->ipsec_out_latch = connp->conn_latch; 4333 IPLATCH_REFHOLD(connp->conn_latch); 4334 if (p != NULL) { 4335 IPPOL_REFHOLD(p); 4336 } 4337 io->ipsec_out_src_port = connp->conn_lport; 4338 io->ipsec_out_dst_port = connp->conn_fport; 4339 io->ipsec_out_icmp_type = io->ipsec_out_icmp_code = 0; 4340 if (pol != NULL) 4341 IPPOL_REFRELE(pol, ns); 4342 } else if (pol != NULL) { 4343 ipsec_selector_t sel; 4344 4345 bzero((void*)&sel, sizeof (sel)); 4346 4347 p = pol; 4348 /* 4349 * conn does not have the port information. Get 4350 * it from the packet. 4351 */ 4352 4353 if (!ipsec_init_outbound_ports(&sel, *mp, ipha, ip6h, 0, 4354 ns->netstack_ipsec)) { 4355 /* Callee did ip_drop_packet() on *mp. */ 4356 *mp = NULL; 4357 freeb(ipsec_mp); 4358 return (NULL); 4359 } 4360 io->ipsec_out_src_port = sel.ips_local_port; 4361 io->ipsec_out_dst_port = sel.ips_remote_port; 4362 io->ipsec_out_icmp_type = sel.ips_icmp_type; 4363 io->ipsec_out_icmp_code = sel.ips_icmp_code; 4364 } 4365 4366 io->ipsec_out_proto = proto; 4367 io->ipsec_out_use_global_policy = B_TRUE; 4368 io->ipsec_out_secure = (p != NULL); 4369 io->ipsec_out_policy = p; 4370 4371 if (p == NULL) { 4372 if (connp->conn_policy != NULL) { 4373 io->ipsec_out_secure = B_TRUE; 4374 ASSERT(io->ipsec_out_latch == NULL); 4375 ASSERT(io->ipsec_out_use_global_policy == B_TRUE); 4376 io->ipsec_out_need_policy = B_TRUE; 4377 ASSERT(io->ipsec_out_polhead == NULL); 4378 IPPH_REFHOLD(connp->conn_policy); 4379 io->ipsec_out_polhead = connp->conn_policy; 4380 } 4381 } else { 4382 /* Handle explicit drop action. */ 4383 if (p->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_DISCARD || 4384 p->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_REJECT) { 4385 ip_drop_packet(ipsec_mp, B_FALSE, NULL, NULL, 4386 DROPPER(ipss, ipds_spd_explicit), 4387 &ipss->ipsec_spd_dropper); 4388 *mp = NULL; 4389 ipsec_mp = NULL; 4390 } 4391 } 4392 4393 return (ipsec_mp); 4394 } 4395 4396 /* 4397 * Allocate an IPSEC_IN mblk. This will be prepended to an inbound datagram 4398 * and keep track of what-if-any IPsec processing will be applied to the 4399 * datagram. 4400 */ 4401 mblk_t * 4402 ipsec_in_alloc(boolean_t isv4, netstack_t *ns) 4403 { 4404 mblk_t *ipsec_in; 4405 ipsec_in_t *ii = kmem_cache_alloc(ipsec_info_cache, KM_NOSLEEP); 4406 4407 if (ii == NULL) 4408 return (NULL); 4409 4410 bzero(ii, sizeof (ipsec_info_t)); 4411 ii->ipsec_in_type = IPSEC_IN; 4412 ii->ipsec_in_len = sizeof (ipsec_in_t); 4413 4414 ii->ipsec_in_v4 = isv4; 4415 ii->ipsec_in_secure = B_TRUE; 4416 ii->ipsec_in_ns = ns; /* No netstack_hold */ 4417 4418 ii->ipsec_in_frtn.free_func = ipsec_in_free; 4419 ii->ipsec_in_frtn.free_arg = (char *)ii; 4420 4421 ipsec_in = desballoc((uint8_t *)ii, sizeof (ipsec_info_t), BPRI_HI, 4422 &ii->ipsec_in_frtn); 4423 if (ipsec_in == NULL) { 4424 ip1dbg(("ipsec_in_alloc: IPSEC_IN allocation failure.\n")); 4425 ipsec_in_free(ii); 4426 return (NULL); 4427 } 4428 4429 ipsec_in->b_datap->db_type = M_CTL; 4430 ipsec_in->b_wptr += sizeof (ipsec_info_t); 4431 4432 return (ipsec_in); 4433 } 4434 4435 /* 4436 * This is called from ip_wput_local when a packet which needs 4437 * security is looped back, to convert the IPSEC_OUT to a IPSEC_IN 4438 * before fanout, where the policy check happens. In most of the 4439 * cases, IPSEC processing has *never* been done. There is one case 4440 * (ip_wput_ire_fragmentit -> ip_wput_frag -> icmp_frag_needed) where 4441 * the packet is destined for localhost, IPSEC processing has already 4442 * been done. 4443 * 4444 * Future: This could happen after SA selection has occurred for 4445 * outbound.. which will tell us who the src and dst identities are.. 4446 * Then it's just a matter of splicing the ah/esp SA pointers from the 4447 * ipsec_out_t to the ipsec_in_t. 4448 */ 4449 void 4450 ipsec_out_to_in(mblk_t *ipsec_mp) 4451 { 4452 ipsec_in_t *ii; 4453 ipsec_out_t *io; 4454 ipsec_policy_t *pol; 4455 ipsec_action_t *act; 4456 boolean_t v4, icmp_loopback; 4457 zoneid_t zoneid; 4458 netstack_t *ns; 4459 4460 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 4461 4462 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4463 4464 v4 = io->ipsec_out_v4; 4465 zoneid = io->ipsec_out_zoneid; 4466 icmp_loopback = io->ipsec_out_icmp_loopback; 4467 ns = io->ipsec_out_ns; 4468 4469 act = io->ipsec_out_act; 4470 if (act == NULL) { 4471 pol = io->ipsec_out_policy; 4472 if (pol != NULL) { 4473 act = pol->ipsp_act; 4474 IPACT_REFHOLD(act); 4475 } 4476 } 4477 io->ipsec_out_act = NULL; 4478 4479 ipsec_out_release_refs(io); /* No netstack_rele/hold needed */ 4480 4481 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 4482 bzero(ii, sizeof (ipsec_in_t)); 4483 ii->ipsec_in_type = IPSEC_IN; 4484 ii->ipsec_in_len = sizeof (ipsec_in_t); 4485 ii->ipsec_in_loopback = B_TRUE; 4486 ii->ipsec_in_ns = ns; /* No netstack_hold */ 4487 4488 ii->ipsec_in_frtn.free_func = ipsec_in_free; 4489 ii->ipsec_in_frtn.free_arg = (char *)ii; 4490 ii->ipsec_in_action = act; 4491 ii->ipsec_in_zoneid = zoneid; 4492 4493 /* 4494 * In most of the cases, we can't look at the ipsec_out_XXX_sa 4495 * because this never went through IPSEC processing. So, look at 4496 * the requests and infer whether it would have gone through 4497 * IPSEC processing or not. Initialize the "done" fields with 4498 * the requests. The possible values for "done" fields are : 4499 * 4500 * 1) zero, indicates that a particular preference was never 4501 * requested. 4502 * 2) non-zero, indicates that it could be IPSEC_PREF_REQUIRED/ 4503 * IPSEC_PREF_NEVER. If IPSEC_REQ_DONE is set, it means that 4504 * IPSEC processing has been completed. 4505 */ 4506 ii->ipsec_in_secure = B_TRUE; 4507 ii->ipsec_in_v4 = v4; 4508 ii->ipsec_in_icmp_loopback = icmp_loopback; 4509 ii->ipsec_in_attach_if = B_FALSE; 4510 } 4511 4512 /* 4513 * Consults global policy to see whether this datagram should 4514 * go out secure. If so it attaches a ipsec_mp in front and 4515 * returns. 4516 */ 4517 mblk_t * 4518 ip_wput_attach_policy(mblk_t *ipsec_mp, ipha_t *ipha, ip6_t *ip6h, ire_t *ire, 4519 conn_t *connp, boolean_t unspec_src, zoneid_t zoneid) 4520 { 4521 mblk_t *mp; 4522 ipsec_out_t *io = NULL; 4523 ipsec_selector_t sel; 4524 uint_t ill_index; 4525 boolean_t conn_dontroutex; 4526 boolean_t conn_multicast_loopx; 4527 boolean_t policy_present; 4528 ip_stack_t *ipst = ire->ire_ipst; 4529 netstack_t *ns = ipst->ips_netstack; 4530 ipsec_stack_t *ipss = ns->netstack_ipsec; 4531 4532 ASSERT((ipha != NULL && ip6h == NULL) || 4533 (ip6h != NULL && ipha == NULL)); 4534 4535 bzero((void*)&sel, sizeof (sel)); 4536 4537 if (ipha != NULL) 4538 policy_present = ipss->ipsec_outbound_v4_policy_present; 4539 else 4540 policy_present = ipss->ipsec_outbound_v6_policy_present; 4541 /* 4542 * Fast Path to see if there is any policy. 4543 */ 4544 if (!policy_present) { 4545 if (ipsec_mp->b_datap->db_type == M_CTL) { 4546 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4547 if (!io->ipsec_out_secure) { 4548 /* 4549 * If there is no global policy and ip_wput 4550 * or ip_wput_multicast has attached this mp 4551 * for multicast case, free the ipsec_mp and 4552 * return the original mp. 4553 */ 4554 mp = ipsec_mp->b_cont; 4555 freeb(ipsec_mp); 4556 ipsec_mp = mp; 4557 io = NULL; 4558 } 4559 ASSERT(io == NULL || !io->ipsec_out_tunnel); 4560 } 4561 if (((io == NULL) || (io->ipsec_out_polhead == NULL)) && 4562 ((connp == NULL) || (connp->conn_policy == NULL))) 4563 return (ipsec_mp); 4564 } 4565 4566 ill_index = 0; 4567 conn_multicast_loopx = conn_dontroutex = B_FALSE; 4568 mp = ipsec_mp; 4569 if (ipsec_mp->b_datap->db_type == M_CTL) { 4570 mp = ipsec_mp->b_cont; 4571 /* 4572 * This is a connection where we have some per-socket 4573 * policy or ip_wput has attached an ipsec_mp for 4574 * the multicast datagram. 4575 */ 4576 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4577 if (!io->ipsec_out_secure) { 4578 /* 4579 * This ipsec_mp was allocated in ip_wput or 4580 * ip_wput_multicast so that we will know the 4581 * value of ill_index, conn_dontroute, 4582 * conn_multicast_loop in the multicast case if 4583 * we inherit global policy here. 4584 */ 4585 ill_index = io->ipsec_out_ill_index; 4586 conn_dontroutex = io->ipsec_out_dontroute; 4587 conn_multicast_loopx = io->ipsec_out_multicast_loop; 4588 freeb(ipsec_mp); 4589 ipsec_mp = mp; 4590 io = NULL; 4591 } 4592 ASSERT(io == NULL || !io->ipsec_out_tunnel); 4593 } 4594 4595 if (ipha != NULL) { 4596 sel.ips_local_addr_v4 = (ipha->ipha_src != 0 ? 4597 ipha->ipha_src : ire->ire_src_addr); 4598 sel.ips_remote_addr_v4 = ip_get_dst(ipha); 4599 sel.ips_protocol = (uint8_t)ipha->ipha_protocol; 4600 sel.ips_isv4 = B_TRUE; 4601 } else { 4602 ushort_t hdr_len; 4603 uint8_t *nexthdrp; 4604 boolean_t is_fragment; 4605 4606 sel.ips_isv4 = B_FALSE; 4607 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4608 if (!unspec_src) 4609 sel.ips_local_addr_v6 = ire->ire_src_addr_v6; 4610 } else { 4611 sel.ips_local_addr_v6 = ip6h->ip6_src; 4612 } 4613 4614 sel.ips_remote_addr_v6 = ip_get_dst_v6(ip6h, &is_fragment); 4615 if (is_fragment) { 4616 /* 4617 * It's a packet fragment for a packet that 4618 * we have already processed (since IPsec processing 4619 * is done before fragmentation), so we don't 4620 * have to do policy checks again. Fragments can 4621 * come back to us for processing if they have 4622 * been queued up due to flow control. 4623 */ 4624 if (ipsec_mp->b_datap->db_type == M_CTL) { 4625 mp = ipsec_mp->b_cont; 4626 freeb(ipsec_mp); 4627 ipsec_mp = mp; 4628 } 4629 return (ipsec_mp); 4630 } 4631 4632 /* IPv6 common-case. */ 4633 sel.ips_protocol = ip6h->ip6_nxt; 4634 switch (ip6h->ip6_nxt) { 4635 case IPPROTO_TCP: 4636 case IPPROTO_UDP: 4637 case IPPROTO_SCTP: 4638 case IPPROTO_ICMPV6: 4639 break; 4640 default: 4641 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 4642 &hdr_len, &nexthdrp)) { 4643 BUMP_MIB(&ipst->ips_ip6_mib, 4644 ipIfStatsOutDiscards); 4645 freemsg(ipsec_mp); /* Not IPsec-related drop. */ 4646 return (NULL); 4647 } 4648 sel.ips_protocol = *nexthdrp; 4649 break; 4650 } 4651 } 4652 4653 if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0, ipss)) { 4654 if (ipha != NULL) { 4655 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 4656 } else { 4657 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 4658 } 4659 4660 /* Callee dropped the packet. */ 4661 return (NULL); 4662 } 4663 4664 if (io != NULL) { 4665 /* 4666 * We seem to have some local policy (we already have 4667 * an ipsec_out). Look at global policy and see 4668 * whether we have to inherit or not. 4669 */ 4670 io->ipsec_out_need_policy = B_FALSE; 4671 ipsec_mp = ipsec_apply_global_policy(ipsec_mp, connp, 4672 &sel, ns); 4673 ASSERT((io->ipsec_out_policy != NULL) || 4674 (io->ipsec_out_act != NULL)); 4675 ASSERT(io->ipsec_out_need_policy == B_FALSE); 4676 return (ipsec_mp); 4677 } 4678 /* 4679 * We pass in a pointer to a pointer because mp can become 4680 * NULL due to allocation failures or explicit drops. Callers 4681 * of this function should assume a NULL mp means the packet 4682 * was dropped. 4683 */ 4684 ipsec_mp = ipsec_attach_global_policy(&mp, connp, &sel, ns); 4685 if (ipsec_mp == NULL) 4686 return (mp); 4687 4688 /* 4689 * Copy the right port information. 4690 */ 4691 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 4692 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4693 4694 ASSERT(io->ipsec_out_need_policy == B_FALSE); 4695 ASSERT((io->ipsec_out_policy != NULL) || 4696 (io->ipsec_out_act != NULL)); 4697 io->ipsec_out_src_port = sel.ips_local_port; 4698 io->ipsec_out_dst_port = sel.ips_remote_port; 4699 io->ipsec_out_icmp_type = sel.ips_icmp_type; 4700 io->ipsec_out_icmp_code = sel.ips_icmp_code; 4701 /* 4702 * Set ill_index, conn_dontroute and conn_multicast_loop 4703 * for multicast datagrams. 4704 */ 4705 io->ipsec_out_ill_index = ill_index; 4706 io->ipsec_out_dontroute = conn_dontroutex; 4707 io->ipsec_out_multicast_loop = conn_multicast_loopx; 4708 4709 if (zoneid == ALL_ZONES) 4710 zoneid = GLOBAL_ZONEID; 4711 io->ipsec_out_zoneid = zoneid; 4712 return (ipsec_mp); 4713 } 4714 4715 /* 4716 * When appropriate, this function caches inbound and outbound policy 4717 * for this connection. 4718 * 4719 * XXX need to work out more details about per-interface policy and 4720 * caching here! 4721 * 4722 * XXX may want to split inbound and outbound caching for ill.. 4723 */ 4724 int 4725 ipsec_conn_cache_policy(conn_t *connp, boolean_t isv4) 4726 { 4727 boolean_t global_policy_present; 4728 netstack_t *ns = connp->conn_netstack; 4729 ipsec_stack_t *ipss = ns->netstack_ipsec; 4730 4731 /* 4732 * There is no policy latching for ICMP sockets because we can't 4733 * decide on which policy to use until we see the packet and get 4734 * type/code selectors. 4735 */ 4736 if (connp->conn_ulp == IPPROTO_ICMP || 4737 connp->conn_ulp == IPPROTO_ICMPV6) { 4738 connp->conn_in_enforce_policy = 4739 connp->conn_out_enforce_policy = B_TRUE; 4740 if (connp->conn_latch != NULL) { 4741 IPLATCH_REFRELE(connp->conn_latch, ns); 4742 connp->conn_latch = NULL; 4743 } 4744 connp->conn_flags |= IPCL_CHECK_POLICY; 4745 return (0); 4746 } 4747 4748 global_policy_present = isv4 ? 4749 (ipss->ipsec_outbound_v4_policy_present || 4750 ipss->ipsec_inbound_v4_policy_present) : 4751 (ipss->ipsec_outbound_v6_policy_present || 4752 ipss->ipsec_inbound_v6_policy_present); 4753 4754 if ((connp->conn_policy != NULL) || global_policy_present) { 4755 ipsec_selector_t sel; 4756 ipsec_policy_t *p; 4757 4758 if (connp->conn_latch == NULL && 4759 (connp->conn_latch = iplatch_create()) == NULL) { 4760 return (ENOMEM); 4761 } 4762 4763 sel.ips_protocol = connp->conn_ulp; 4764 sel.ips_local_port = connp->conn_lport; 4765 sel.ips_remote_port = connp->conn_fport; 4766 sel.ips_is_icmp_inv_acq = 0; 4767 sel.ips_isv4 = isv4; 4768 if (isv4) { 4769 sel.ips_local_addr_v4 = connp->conn_src; 4770 sel.ips_remote_addr_v4 = connp->conn_rem; 4771 } else { 4772 sel.ips_local_addr_v6 = connp->conn_srcv6; 4773 sel.ips_remote_addr_v6 = connp->conn_remv6; 4774 } 4775 4776 p = ipsec_find_policy(IPSEC_TYPE_INBOUND, connp, NULL, &sel, 4777 ns); 4778 if (connp->conn_latch->ipl_in_policy != NULL) 4779 IPPOL_REFRELE(connp->conn_latch->ipl_in_policy, ns); 4780 connp->conn_latch->ipl_in_policy = p; 4781 connp->conn_in_enforce_policy = (p != NULL); 4782 4783 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, &sel, 4784 ns); 4785 if (connp->conn_latch->ipl_out_policy != NULL) 4786 IPPOL_REFRELE(connp->conn_latch->ipl_out_policy, ns); 4787 connp->conn_latch->ipl_out_policy = p; 4788 connp->conn_out_enforce_policy = (p != NULL); 4789 4790 /* Clear the latched actions too, in case we're recaching. */ 4791 if (connp->conn_latch->ipl_out_action != NULL) 4792 IPACT_REFRELE(connp->conn_latch->ipl_out_action); 4793 if (connp->conn_latch->ipl_in_action != NULL) 4794 IPACT_REFRELE(connp->conn_latch->ipl_in_action); 4795 } 4796 4797 /* 4798 * We may or may not have policy for this endpoint. We still set 4799 * conn_policy_cached so that inbound datagrams don't have to look 4800 * at global policy as policy is considered latched for these 4801 * endpoints. We should not set conn_policy_cached until the conn 4802 * reflects the actual policy. If we *set* this before inheriting 4803 * the policy there is a window where the check 4804 * CONN_INBOUND_POLICY_PRESENT, will neither check with the policy 4805 * on the conn (because we have not yet copied the policy on to 4806 * conn and hence not set conn_in_enforce_policy) nor with the 4807 * global policy (because conn_policy_cached is already set). 4808 */ 4809 connp->conn_policy_cached = B_TRUE; 4810 if (connp->conn_in_enforce_policy) 4811 connp->conn_flags |= IPCL_CHECK_POLICY; 4812 return (0); 4813 } 4814 4815 void 4816 iplatch_free(ipsec_latch_t *ipl, netstack_t *ns) 4817 { 4818 if (ipl->ipl_out_policy != NULL) 4819 IPPOL_REFRELE(ipl->ipl_out_policy, ns); 4820 if (ipl->ipl_in_policy != NULL) 4821 IPPOL_REFRELE(ipl->ipl_in_policy, ns); 4822 if (ipl->ipl_in_action != NULL) 4823 IPACT_REFRELE(ipl->ipl_in_action); 4824 if (ipl->ipl_out_action != NULL) 4825 IPACT_REFRELE(ipl->ipl_out_action); 4826 if (ipl->ipl_local_cid != NULL) 4827 IPSID_REFRELE(ipl->ipl_local_cid); 4828 if (ipl->ipl_remote_cid != NULL) 4829 IPSID_REFRELE(ipl->ipl_remote_cid); 4830 if (ipl->ipl_local_id != NULL) 4831 crfree(ipl->ipl_local_id); 4832 mutex_destroy(&ipl->ipl_lock); 4833 kmem_free(ipl, sizeof (*ipl)); 4834 } 4835 4836 ipsec_latch_t * 4837 iplatch_create() 4838 { 4839 ipsec_latch_t *ipl = kmem_alloc(sizeof (*ipl), KM_NOSLEEP); 4840 if (ipl == NULL) 4841 return (ipl); 4842 bzero(ipl, sizeof (*ipl)); 4843 mutex_init(&ipl->ipl_lock, NULL, MUTEX_DEFAULT, NULL); 4844 ipl->ipl_refcnt = 1; 4845 return (ipl); 4846 } 4847 4848 /* 4849 * Hash function for ID hash table. 4850 */ 4851 static uint32_t 4852 ipsid_hash(int idtype, char *idstring) 4853 { 4854 uint32_t hval = idtype; 4855 unsigned char c; 4856 4857 while ((c = *idstring++) != 0) { 4858 hval = (hval << 4) | (hval >> 28); 4859 hval ^= c; 4860 } 4861 hval = hval ^ (hval >> 16); 4862 return (hval & (IPSID_HASHSIZE-1)); 4863 } 4864 4865 /* 4866 * Look up identity string in hash table. Return identity object 4867 * corresponding to the name -- either preexisting, or newly allocated. 4868 * 4869 * Return NULL if we need to allocate a new one and can't get memory. 4870 */ 4871 ipsid_t * 4872 ipsid_lookup(int idtype, char *idstring, netstack_t *ns) 4873 { 4874 ipsid_t *retval; 4875 char *nstr; 4876 int idlen = strlen(idstring) + 1; 4877 ipsec_stack_t *ipss = ns->netstack_ipsec; 4878 ipsif_t *bucket; 4879 4880 bucket = &ipss->ipsec_ipsid_buckets[ipsid_hash(idtype, idstring)]; 4881 4882 mutex_enter(&bucket->ipsif_lock); 4883 4884 for (retval = bucket->ipsif_head; retval != NULL; 4885 retval = retval->ipsid_next) { 4886 if (idtype != retval->ipsid_type) 4887 continue; 4888 if (bcmp(idstring, retval->ipsid_cid, idlen) != 0) 4889 continue; 4890 4891 IPSID_REFHOLD(retval); 4892 mutex_exit(&bucket->ipsif_lock); 4893 return (retval); 4894 } 4895 4896 retval = kmem_alloc(sizeof (*retval), KM_NOSLEEP); 4897 if (!retval) { 4898 mutex_exit(&bucket->ipsif_lock); 4899 return (NULL); 4900 } 4901 4902 nstr = kmem_alloc(idlen, KM_NOSLEEP); 4903 if (!nstr) { 4904 mutex_exit(&bucket->ipsif_lock); 4905 kmem_free(retval, sizeof (*retval)); 4906 return (NULL); 4907 } 4908 4909 retval->ipsid_refcnt = 1; 4910 retval->ipsid_next = bucket->ipsif_head; 4911 if (retval->ipsid_next != NULL) 4912 retval->ipsid_next->ipsid_ptpn = &retval->ipsid_next; 4913 retval->ipsid_ptpn = &bucket->ipsif_head; 4914 retval->ipsid_type = idtype; 4915 retval->ipsid_cid = nstr; 4916 bucket->ipsif_head = retval; 4917 bcopy(idstring, nstr, idlen); 4918 mutex_exit(&bucket->ipsif_lock); 4919 4920 return (retval); 4921 } 4922 4923 /* 4924 * Garbage collect the identity hash table. 4925 */ 4926 void 4927 ipsid_gc(netstack_t *ns) 4928 { 4929 int i, len; 4930 ipsid_t *id, *nid; 4931 ipsif_t *bucket; 4932 ipsec_stack_t *ipss = ns->netstack_ipsec; 4933 4934 for (i = 0; i < IPSID_HASHSIZE; i++) { 4935 bucket = &ipss->ipsec_ipsid_buckets[i]; 4936 mutex_enter(&bucket->ipsif_lock); 4937 for (id = bucket->ipsif_head; id != NULL; id = nid) { 4938 nid = id->ipsid_next; 4939 if (id->ipsid_refcnt == 0) { 4940 *id->ipsid_ptpn = nid; 4941 if (nid != NULL) 4942 nid->ipsid_ptpn = id->ipsid_ptpn; 4943 len = strlen(id->ipsid_cid) + 1; 4944 kmem_free(id->ipsid_cid, len); 4945 kmem_free(id, sizeof (*id)); 4946 } 4947 } 4948 mutex_exit(&bucket->ipsif_lock); 4949 } 4950 } 4951 4952 /* 4953 * Return true if two identities are the same. 4954 */ 4955 boolean_t 4956 ipsid_equal(ipsid_t *id1, ipsid_t *id2) 4957 { 4958 if (id1 == id2) 4959 return (B_TRUE); 4960 #ifdef DEBUG 4961 if ((id1 == NULL) || (id2 == NULL)) 4962 return (B_FALSE); 4963 /* 4964 * test that we're interning id's correctly.. 4965 */ 4966 ASSERT((strcmp(id1->ipsid_cid, id2->ipsid_cid) != 0) || 4967 (id1->ipsid_type != id2->ipsid_type)); 4968 #endif 4969 return (B_FALSE); 4970 } 4971 4972 /* 4973 * Initialize identity table; called during module initialization. 4974 */ 4975 static void 4976 ipsid_init(netstack_t *ns) 4977 { 4978 ipsif_t *bucket; 4979 int i; 4980 ipsec_stack_t *ipss = ns->netstack_ipsec; 4981 4982 for (i = 0; i < IPSID_HASHSIZE; i++) { 4983 bucket = &ipss->ipsec_ipsid_buckets[i]; 4984 mutex_init(&bucket->ipsif_lock, NULL, MUTEX_DEFAULT, NULL); 4985 } 4986 } 4987 4988 /* 4989 * Free identity table (preparatory to module unload) 4990 */ 4991 static void 4992 ipsid_fini(netstack_t *ns) 4993 { 4994 ipsif_t *bucket; 4995 int i; 4996 ipsec_stack_t *ipss = ns->netstack_ipsec; 4997 4998 for (i = 0; i < IPSID_HASHSIZE; i++) { 4999 bucket = &ipss->ipsec_ipsid_buckets[i]; 5000 ASSERT(bucket->ipsif_head == NULL); 5001 mutex_destroy(&bucket->ipsif_lock); 5002 } 5003 } 5004 5005 /* 5006 * Update the minimum and maximum supported key sizes for the 5007 * specified algorithm. Must be called while holding the algorithms lock. 5008 */ 5009 void 5010 ipsec_alg_fix_min_max(ipsec_alginfo_t *alg, ipsec_algtype_t alg_type, 5011 netstack_t *ns) 5012 { 5013 size_t crypto_min = (size_t)-1, crypto_max = 0; 5014 size_t cur_crypto_min, cur_crypto_max; 5015 boolean_t is_valid; 5016 crypto_mechanism_info_t *mech_infos; 5017 uint_t nmech_infos; 5018 int crypto_rc, i; 5019 crypto_mech_usage_t mask; 5020 ipsec_stack_t *ipss = ns->netstack_ipsec; 5021 5022 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 5023 5024 /* 5025 * Compute the min, max, and default key sizes (in number of 5026 * increments to the default key size in bits) as defined 5027 * by the algorithm mappings. This range of key sizes is used 5028 * for policy related operations. The effective key sizes 5029 * supported by the framework could be more limited than 5030 * those defined for an algorithm. 5031 */ 5032 alg->alg_default_bits = alg->alg_key_sizes[0]; 5033 if (alg->alg_increment != 0) { 5034 /* key sizes are defined by range & increment */ 5035 alg->alg_minbits = alg->alg_key_sizes[1]; 5036 alg->alg_maxbits = alg->alg_key_sizes[2]; 5037 5038 alg->alg_default = SADB_ALG_DEFAULT_INCR(alg->alg_minbits, 5039 alg->alg_increment, alg->alg_default_bits); 5040 } else if (alg->alg_nkey_sizes == 0) { 5041 /* no specified key size for algorithm */ 5042 alg->alg_minbits = alg->alg_maxbits = 0; 5043 } else { 5044 /* key sizes are defined by enumeration */ 5045 alg->alg_minbits = (uint16_t)-1; 5046 alg->alg_maxbits = 0; 5047 5048 for (i = 0; i < alg->alg_nkey_sizes; i++) { 5049 if (alg->alg_key_sizes[i] < alg->alg_minbits) 5050 alg->alg_minbits = alg->alg_key_sizes[i]; 5051 if (alg->alg_key_sizes[i] > alg->alg_maxbits) 5052 alg->alg_maxbits = alg->alg_key_sizes[i]; 5053 } 5054 alg->alg_default = 0; 5055 } 5056 5057 if (!(alg->alg_flags & ALG_FLAG_VALID)) 5058 return; 5059 5060 /* 5061 * Mechanisms do not apply to the NULL encryption 5062 * algorithm, so simply return for this case. 5063 */ 5064 if (alg->alg_id == SADB_EALG_NULL) 5065 return; 5066 5067 /* 5068 * Find the min and max key sizes supported by the cryptographic 5069 * framework providers. 5070 */ 5071 5072 /* get the key sizes supported by the framework */ 5073 crypto_rc = crypto_get_all_mech_info(alg->alg_mech_type, 5074 &mech_infos, &nmech_infos, KM_SLEEP); 5075 if (crypto_rc != CRYPTO_SUCCESS || nmech_infos == 0) { 5076 alg->alg_flags &= ~ALG_FLAG_VALID; 5077 return; 5078 } 5079 5080 /* min and max key sizes supported by framework */ 5081 for (i = 0, is_valid = B_FALSE; i < nmech_infos; i++) { 5082 int unit_bits; 5083 5084 /* 5085 * Ignore entries that do not support the operations 5086 * needed for the algorithm type. 5087 */ 5088 if (alg_type == IPSEC_ALG_AUTH) 5089 mask = CRYPTO_MECH_USAGE_MAC; 5090 else 5091 mask = CRYPTO_MECH_USAGE_ENCRYPT | 5092 CRYPTO_MECH_USAGE_DECRYPT; 5093 if ((mech_infos[i].mi_usage & mask) != mask) 5094 continue; 5095 5096 unit_bits = (mech_infos[i].mi_keysize_unit == 5097 CRYPTO_KEYSIZE_UNIT_IN_BYTES) ? 8 : 1; 5098 /* adjust min/max supported by framework */ 5099 cur_crypto_min = mech_infos[i].mi_min_key_size * unit_bits; 5100 cur_crypto_max = mech_infos[i].mi_max_key_size * unit_bits; 5101 5102 if (cur_crypto_min < crypto_min) 5103 crypto_min = cur_crypto_min; 5104 5105 /* 5106 * CRYPTO_EFFECTIVELY_INFINITE is a special value of 5107 * the crypto framework which means "no upper limit". 5108 */ 5109 if (mech_infos[i].mi_max_key_size == 5110 CRYPTO_EFFECTIVELY_INFINITE) 5111 crypto_max = (size_t)-1; 5112 else if (cur_crypto_max > crypto_max) 5113 crypto_max = cur_crypto_max; 5114 5115 is_valid = B_TRUE; 5116 } 5117 5118 kmem_free(mech_infos, sizeof (crypto_mechanism_info_t) * 5119 nmech_infos); 5120 5121 if (!is_valid) { 5122 /* no key sizes supported by framework */ 5123 alg->alg_flags &= ~ALG_FLAG_VALID; 5124 return; 5125 } 5126 5127 /* 5128 * Determine min and max key sizes from alg_key_sizes[]. 5129 * defined for the algorithm entry. Adjust key sizes based on 5130 * those supported by the framework. 5131 */ 5132 alg->alg_ef_default_bits = alg->alg_key_sizes[0]; 5133 if (alg->alg_increment != 0) { 5134 /* supported key sizes are defined by range & increment */ 5135 crypto_min = ALGBITS_ROUND_UP(crypto_min, alg->alg_increment); 5136 crypto_max = ALGBITS_ROUND_DOWN(crypto_max, alg->alg_increment); 5137 5138 alg->alg_ef_minbits = MAX(alg->alg_minbits, 5139 (uint16_t)crypto_min); 5140 alg->alg_ef_maxbits = MIN(alg->alg_maxbits, 5141 (uint16_t)crypto_max); 5142 5143 /* 5144 * If the sizes supported by the framework are outside 5145 * the range of sizes defined by the algorithm mappings, 5146 * the algorithm cannot be used. Check for this 5147 * condition here. 5148 */ 5149 if (alg->alg_ef_minbits > alg->alg_ef_maxbits) { 5150 alg->alg_flags &= ~ALG_FLAG_VALID; 5151 return; 5152 } 5153 5154 if (alg->alg_ef_default_bits < alg->alg_ef_minbits) 5155 alg->alg_ef_default_bits = alg->alg_ef_minbits; 5156 if (alg->alg_ef_default_bits > alg->alg_ef_maxbits) 5157 alg->alg_ef_default_bits = alg->alg_ef_maxbits; 5158 5159 alg->alg_ef_default = SADB_ALG_DEFAULT_INCR(alg->alg_ef_minbits, 5160 alg->alg_increment, alg->alg_ef_default_bits); 5161 } else if (alg->alg_nkey_sizes == 0) { 5162 /* no specified key size for algorithm */ 5163 alg->alg_ef_minbits = alg->alg_ef_maxbits = 0; 5164 } else { 5165 /* supported key sizes are defined by enumeration */ 5166 alg->alg_ef_minbits = (uint16_t)-1; 5167 alg->alg_ef_maxbits = 0; 5168 5169 for (i = 0, is_valid = B_FALSE; i < alg->alg_nkey_sizes; i++) { 5170 /* 5171 * Ignore the current key size if it is not in the 5172 * range of sizes supported by the framework. 5173 */ 5174 if (alg->alg_key_sizes[i] < crypto_min || 5175 alg->alg_key_sizes[i] > crypto_max) 5176 continue; 5177 if (alg->alg_key_sizes[i] < alg->alg_ef_minbits) 5178 alg->alg_ef_minbits = alg->alg_key_sizes[i]; 5179 if (alg->alg_key_sizes[i] > alg->alg_ef_maxbits) 5180 alg->alg_ef_maxbits = alg->alg_key_sizes[i]; 5181 is_valid = B_TRUE; 5182 } 5183 5184 if (!is_valid) { 5185 alg->alg_flags &= ~ALG_FLAG_VALID; 5186 return; 5187 } 5188 alg->alg_ef_default = 0; 5189 } 5190 } 5191 5192 /* 5193 * Free the memory used by the specified algorithm. 5194 */ 5195 void 5196 ipsec_alg_free(ipsec_alginfo_t *alg) 5197 { 5198 if (alg == NULL) 5199 return; 5200 5201 if (alg->alg_key_sizes != NULL) { 5202 kmem_free(alg->alg_key_sizes, 5203 (alg->alg_nkey_sizes + 1) * sizeof (uint16_t)); 5204 alg->alg_key_sizes = NULL; 5205 } 5206 if (alg->alg_block_sizes != NULL) { 5207 kmem_free(alg->alg_block_sizes, 5208 (alg->alg_nblock_sizes + 1) * sizeof (uint16_t)); 5209 alg->alg_block_sizes = NULL; 5210 } 5211 kmem_free(alg, sizeof (*alg)); 5212 } 5213 5214 /* 5215 * Check the validity of the specified key size for an algorithm. 5216 * Returns B_TRUE if key size is valid, B_FALSE otherwise. 5217 */ 5218 boolean_t 5219 ipsec_valid_key_size(uint16_t key_size, ipsec_alginfo_t *alg) 5220 { 5221 if (key_size < alg->alg_ef_minbits || key_size > alg->alg_ef_maxbits) 5222 return (B_FALSE); 5223 5224 if (alg->alg_increment == 0 && alg->alg_nkey_sizes != 0) { 5225 /* 5226 * If the key sizes are defined by enumeration, the new 5227 * key size must be equal to one of the supported values. 5228 */ 5229 int i; 5230 5231 for (i = 0; i < alg->alg_nkey_sizes; i++) 5232 if (key_size == alg->alg_key_sizes[i]) 5233 break; 5234 if (i == alg->alg_nkey_sizes) 5235 return (B_FALSE); 5236 } 5237 5238 return (B_TRUE); 5239 } 5240 5241 /* 5242 * Callback function invoked by the crypto framework when a provider 5243 * registers or unregisters. This callback updates the algorithms 5244 * tables when a crypto algorithm is no longer available or becomes 5245 * available, and triggers the freeing/creation of context templates 5246 * associated with existing SAs, if needed. 5247 * 5248 * Need to walk all stack instances since the callback is global 5249 * for all instances 5250 */ 5251 void 5252 ipsec_prov_update_callback(uint32_t event, void *event_arg) 5253 { 5254 netstack_handle_t nh; 5255 netstack_t *ns; 5256 5257 netstack_next_init(&nh); 5258 while ((ns = netstack_next(&nh)) != NULL) { 5259 ipsec_prov_update_callback_stack(event, event_arg, ns); 5260 netstack_rele(ns); 5261 } 5262 netstack_next_fini(&nh); 5263 } 5264 5265 static void 5266 ipsec_prov_update_callback_stack(uint32_t event, void *event_arg, 5267 netstack_t *ns) 5268 { 5269 crypto_notify_event_change_t *prov_change = 5270 (crypto_notify_event_change_t *)event_arg; 5271 uint_t algidx, algid, algtype, mech_count, mech_idx; 5272 ipsec_alginfo_t *alg; 5273 ipsec_alginfo_t oalg; 5274 crypto_mech_name_t *mechs; 5275 boolean_t alg_changed = B_FALSE; 5276 ipsec_stack_t *ipss = ns->netstack_ipsec; 5277 5278 /* ignore events for which we didn't register */ 5279 if (event != CRYPTO_EVENT_MECHS_CHANGED) { 5280 ip1dbg(("ipsec_prov_update_callback: unexpected event 0x%x " 5281 " received from crypto framework\n", event)); 5282 return; 5283 } 5284 5285 mechs = crypto_get_mech_list(&mech_count, KM_SLEEP); 5286 if (mechs == NULL) 5287 return; 5288 5289 /* 5290 * Walk the list of currently defined IPsec algorithm. Update 5291 * the algorithm valid flag and trigger an update of the 5292 * SAs that depend on that algorithm. 5293 */ 5294 mutex_enter(&ipss->ipsec_alg_lock); 5295 for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype++) { 5296 for (algidx = 0; algidx < ipss->ipsec_nalgs[algtype]; 5297 algidx++) { 5298 5299 algid = ipss->ipsec_sortlist[algtype][algidx]; 5300 alg = ipss->ipsec_alglists[algtype][algid]; 5301 ASSERT(alg != NULL); 5302 5303 /* 5304 * Skip the algorithms which do not map to the 5305 * crypto framework provider being added or removed. 5306 */ 5307 if (strncmp(alg->alg_mech_name, 5308 prov_change->ec_mech_name, 5309 CRYPTO_MAX_MECH_NAME) != 0) 5310 continue; 5311 5312 /* 5313 * Determine if the mechanism is valid. If it 5314 * is not, mark the algorithm as being invalid. If 5315 * it is, mark the algorithm as being valid. 5316 */ 5317 for (mech_idx = 0; mech_idx < mech_count; mech_idx++) 5318 if (strncmp(alg->alg_mech_name, 5319 mechs[mech_idx], CRYPTO_MAX_MECH_NAME) == 0) 5320 break; 5321 if (mech_idx == mech_count && 5322 alg->alg_flags & ALG_FLAG_VALID) { 5323 alg->alg_flags &= ~ALG_FLAG_VALID; 5324 alg_changed = B_TRUE; 5325 } else if (mech_idx < mech_count && 5326 !(alg->alg_flags & ALG_FLAG_VALID)) { 5327 alg->alg_flags |= ALG_FLAG_VALID; 5328 alg_changed = B_TRUE; 5329 } 5330 5331 /* 5332 * Update the supported key sizes, regardless 5333 * of whether a crypto provider was added or 5334 * removed. 5335 */ 5336 oalg = *alg; 5337 ipsec_alg_fix_min_max(alg, algtype, ns); 5338 if (!alg_changed && 5339 alg->alg_ef_minbits != oalg.alg_ef_minbits || 5340 alg->alg_ef_maxbits != oalg.alg_ef_maxbits || 5341 alg->alg_ef_default != oalg.alg_ef_default || 5342 alg->alg_ef_default_bits != 5343 oalg.alg_ef_default_bits) 5344 alg_changed = B_TRUE; 5345 5346 /* 5347 * Update the affected SAs if a software provider is 5348 * being added or removed. 5349 */ 5350 if (prov_change->ec_provider_type == 5351 CRYPTO_SW_PROVIDER) 5352 sadb_alg_update(algtype, alg->alg_id, 5353 prov_change->ec_change == 5354 CRYPTO_MECH_ADDED, ns); 5355 } 5356 } 5357 mutex_exit(&ipss->ipsec_alg_lock); 5358 crypto_free_mech_list(mechs, mech_count); 5359 5360 if (alg_changed) { 5361 /* 5362 * An algorithm has changed, i.e. it became valid or 5363 * invalid, or its support key sizes have changed. 5364 * Notify ipsecah and ipsecesp of this change so 5365 * that they can send a SADB_REGISTER to their consumers. 5366 */ 5367 ipsecah_algs_changed(ns); 5368 ipsecesp_algs_changed(ns); 5369 } 5370 } 5371 5372 /* 5373 * Registers with the crypto framework to be notified of crypto 5374 * providers changes. Used to update the algorithm tables and 5375 * to free or create context templates if needed. Invoked after IPsec 5376 * is loaded successfully. 5377 * 5378 * This is called separately for each IP instance, so we ensure we only 5379 * register once. 5380 */ 5381 void 5382 ipsec_register_prov_update(void) 5383 { 5384 if (prov_update_handle != NULL) 5385 return; 5386 5387 prov_update_handle = crypto_notify_events( 5388 ipsec_prov_update_callback, CRYPTO_EVENT_MECHS_CHANGED); 5389 } 5390 5391 /* 5392 * Unregisters from the framework to be notified of crypto providers 5393 * changes. Called from ipsec_policy_g_destroy(). 5394 */ 5395 static void 5396 ipsec_unregister_prov_update(void) 5397 { 5398 if (prov_update_handle != NULL) 5399 crypto_unnotify_events(prov_update_handle); 5400 } 5401 5402 /* 5403 * Tunnel-mode support routines. 5404 */ 5405 5406 /* 5407 * Returns an mblk chain suitable for putnext() if policies match and IPsec 5408 * SAs are available. If there's no per-tunnel policy, or a match comes back 5409 * with no match, then still return the packet and have global policy take 5410 * a crack at it in IP. 5411 * 5412 * Remember -> we can be forwarding packets. Keep that in mind w.r.t. 5413 * inner-packet contents. 5414 */ 5415 mblk_t * 5416 ipsec_tun_outbound(mblk_t *mp, tun_t *atp, ipha_t *inner_ipv4, 5417 ip6_t *inner_ipv6, ipha_t *outer_ipv4, ip6_t *outer_ipv6, int outer_hdr_len, 5418 netstack_t *ns) 5419 { 5420 ipsec_tun_pol_t *itp = atp->tun_itp; 5421 ipsec_policy_head_t *polhead; 5422 ipsec_selector_t sel; 5423 mblk_t *ipsec_mp, *ipsec_mp_head, *nmp; 5424 mblk_t *spare_mp = NULL; 5425 ipsec_out_t *io; 5426 boolean_t is_fragment; 5427 ipsec_policy_t *pol; 5428 ipsec_stack_t *ipss = ns->netstack_ipsec; 5429 5430 ASSERT(outer_ipv6 != NULL && outer_ipv4 == NULL || 5431 outer_ipv4 != NULL && outer_ipv6 == NULL); 5432 /* We take care of inners in a bit. */ 5433 5434 /* No policy on this tunnel - let global policy have at it. */ 5435 if (itp == NULL || !(itp->itp_flags & ITPF_P_ACTIVE)) 5436 return (mp); 5437 polhead = itp->itp_policy; 5438 5439 bzero(&sel, sizeof (sel)); 5440 if (inner_ipv4 != NULL) { 5441 ASSERT(inner_ipv6 == NULL); 5442 sel.ips_isv4 = B_TRUE; 5443 sel.ips_local_addr_v4 = inner_ipv4->ipha_src; 5444 sel.ips_remote_addr_v4 = inner_ipv4->ipha_dst; 5445 sel.ips_protocol = (uint8_t)inner_ipv4->ipha_protocol; 5446 is_fragment = 5447 IS_V4_FRAGMENT(inner_ipv4->ipha_fragment_offset_and_flags); 5448 } else { 5449 ASSERT(inner_ipv6 != NULL); 5450 sel.ips_isv4 = B_FALSE; 5451 sel.ips_local_addr_v6 = inner_ipv6->ip6_src; 5452 /* Use ip_get_dst_v6() just for the fragment bit. */ 5453 sel.ips_remote_addr_v6 = ip_get_dst_v6(inner_ipv6, 5454 &is_fragment); 5455 /* 5456 * Reset, because we don't care about routing-header dests 5457 * in the forwarding/tunnel path. 5458 */ 5459 sel.ips_remote_addr_v6 = inner_ipv6->ip6_dst; 5460 } 5461 5462 if (itp->itp_flags & ITPF_P_PER_PORT_SECURITY) { 5463 if (is_fragment) { 5464 ipha_t *oiph; 5465 ipha_t *iph = NULL; 5466 ip6_t *ip6h = NULL; 5467 int hdr_len; 5468 uint16_t ip6_hdr_length; 5469 uint8_t v6_proto; 5470 uint8_t *v6_proto_p; 5471 5472 /* 5473 * We have a fragment we need to track! 5474 */ 5475 mp = ipsec_fragcache_add(&itp->itp_fragcache, NULL, mp, 5476 outer_hdr_len, ipss); 5477 if (mp == NULL) 5478 return (NULL); 5479 5480 /* 5481 * If we get here, we have a full 5482 * fragment chain 5483 */ 5484 5485 oiph = (ipha_t *)mp->b_rptr; 5486 if (IPH_HDR_VERSION(oiph) == IPV4_VERSION) { 5487 hdr_len = ((outer_hdr_len != 0) ? 5488 IPH_HDR_LENGTH(oiph) : 0); 5489 iph = (ipha_t *)(mp->b_rptr + hdr_len); 5490 } else { 5491 ASSERT(IPH_HDR_VERSION(oiph) == IPV6_VERSION); 5492 if ((spare_mp = msgpullup(mp, -1)) == NULL) { 5493 ip_drop_packet_chain(mp, B_FALSE, 5494 NULL, NULL, 5495 DROPPER(ipss, ipds_spd_nomem), 5496 &ipss->ipsec_spd_dropper); 5497 } 5498 ip6h = (ip6_t *)spare_mp->b_rptr; 5499 (void) ip_hdr_length_nexthdr_v6(spare_mp, ip6h, 5500 &ip6_hdr_length, &v6_proto_p); 5501 hdr_len = ip6_hdr_length; 5502 } 5503 outer_hdr_len = hdr_len; 5504 5505 if (sel.ips_isv4) { 5506 if (iph == NULL) { 5507 /* Was v6 outer */ 5508 iph = (ipha_t *)(mp->b_rptr + hdr_len); 5509 } 5510 inner_ipv4 = iph; 5511 sel.ips_local_addr_v4 = inner_ipv4->ipha_src; 5512 sel.ips_remote_addr_v4 = inner_ipv4->ipha_dst; 5513 sel.ips_protocol = 5514 (uint8_t)inner_ipv4->ipha_protocol; 5515 } else { 5516 if ((spare_mp == NULL) && 5517 ((spare_mp = msgpullup(mp, -1)) == NULL)) { 5518 ip_drop_packet_chain(mp, B_FALSE, 5519 NULL, NULL, 5520 DROPPER(ipss, ipds_spd_nomem), 5521 &ipss->ipsec_spd_dropper); 5522 } 5523 inner_ipv6 = (ip6_t *)(spare_mp->b_rptr + 5524 hdr_len); 5525 sel.ips_local_addr_v6 = inner_ipv6->ip6_src; 5526 sel.ips_remote_addr_v6 = inner_ipv6->ip6_dst; 5527 (void) ip_hdr_length_nexthdr_v6(spare_mp, 5528 inner_ipv6, &ip6_hdr_length, 5529 &v6_proto_p); 5530 v6_proto = *v6_proto_p; 5531 sel.ips_protocol = v6_proto; 5532 #ifdef FRAGCACHE_DEBUG 5533 cmn_err(CE_WARN, "v6_sel.ips_protocol = %d\n", 5534 sel.ips_protocol); 5535 #endif 5536 } 5537 /* Ports are extracted below */ 5538 } 5539 5540 /* Get ports... */ 5541 if (spare_mp != NULL) { 5542 if (!ipsec_init_outbound_ports(&sel, spare_mp, 5543 inner_ipv4, inner_ipv6, outer_hdr_len, ipss)) { 5544 /* 5545 * callee did ip_drop_packet_chain() on 5546 * spare_mp 5547 */ 5548 ipsec_freemsg_chain(mp); 5549 return (NULL); 5550 } 5551 } else { 5552 if (!ipsec_init_outbound_ports(&sel, mp, 5553 inner_ipv4, inner_ipv6, outer_hdr_len, ipss)) { 5554 /* callee did ip_drop_packet_chain() on mp. */ 5555 return (NULL); 5556 } 5557 } 5558 #ifdef FRAGCACHE_DEBUG 5559 if (inner_ipv4 != NULL) 5560 cmn_err(CE_WARN, 5561 "(v4) sel.ips_protocol = %d, " 5562 "sel.ips_local_port = %d, " 5563 "sel.ips_remote_port = %d\n", 5564 sel.ips_protocol, ntohs(sel.ips_local_port), 5565 ntohs(sel.ips_remote_port)); 5566 if (inner_ipv6 != NULL) 5567 cmn_err(CE_WARN, 5568 "(v6) sel.ips_protocol = %d, " 5569 "sel.ips_local_port = %d, " 5570 "sel.ips_remote_port = %d\n", 5571 sel.ips_protocol, ntohs(sel.ips_local_port), 5572 ntohs(sel.ips_remote_port)); 5573 #endif 5574 /* Success so far - done with spare_mp */ 5575 ipsec_freemsg_chain(spare_mp); 5576 } 5577 rw_enter(&polhead->iph_lock, RW_READER); 5578 pol = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_OUTBOUND, 5579 &sel, ns); 5580 rw_exit(&polhead->iph_lock); 5581 if (pol == NULL) { 5582 /* 5583 * No matching policy on this tunnel, drop the packet. 5584 * 5585 * NOTE: Tunnel-mode tunnels are different from the 5586 * IP global transport mode policy head. For a tunnel-mode 5587 * tunnel, we drop the packet in lieu of passing it 5588 * along accepted the way a global-policy miss would. 5589 * 5590 * NOTE2: "negotiate transport" tunnels should match ALL 5591 * inbound packets, but we do not uncomment the ASSERT() 5592 * below because if/when we open PF_POLICY, a user can 5593 * shoot him/her-self in the foot with a 0 priority. 5594 */ 5595 5596 /* ASSERT(itp->itp_flags & ITPF_P_TUNNEL); */ 5597 #ifdef FRAGCACHE_DEBUG 5598 cmn_err(CE_WARN, "ipsec_tun_outbound(): No matching tunnel " 5599 "per-port policy\n"); 5600 #endif 5601 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 5602 DROPPER(ipss, ipds_spd_explicit), 5603 &ipss->ipsec_spd_dropper); 5604 return (NULL); 5605 } 5606 5607 #ifdef FRAGCACHE_DEBUG 5608 cmn_err(CE_WARN, "Having matching tunnel per-port policy\n"); 5609 #endif 5610 5611 /* Construct an IPSEC_OUT message. */ 5612 ipsec_mp = ipsec_mp_head = ipsec_alloc_ipsec_out(ns); 5613 if (ipsec_mp == NULL) { 5614 IPPOL_REFRELE(pol, ns); 5615 ip_drop_packet(mp, B_FALSE, NULL, NULL, 5616 DROPPER(ipss, ipds_spd_nomem), 5617 &ipss->ipsec_spd_dropper); 5618 return (NULL); 5619 } 5620 ipsec_mp->b_cont = mp; 5621 io = (ipsec_out_t *)ipsec_mp->b_rptr; 5622 IPPH_REFHOLD(polhead); 5623 /* 5624 * NOTE: free() function of ipsec_out mblk will release polhead and 5625 * pol references. 5626 */ 5627 io->ipsec_out_polhead = polhead; 5628 io->ipsec_out_policy = pol; 5629 io->ipsec_out_zoneid = atp->tun_zoneid; 5630 io->ipsec_out_v4 = (outer_ipv4 != NULL); 5631 io->ipsec_out_secure = B_TRUE; 5632 5633 if (!(itp->itp_flags & ITPF_P_TUNNEL)) { 5634 /* Set up transport mode for tunnelled packets. */ 5635 io->ipsec_out_proto = (inner_ipv4 != NULL) ? IPPROTO_ENCAP : 5636 IPPROTO_IPV6; 5637 return (ipsec_mp); 5638 } 5639 5640 /* Fill in tunnel-mode goodies here. */ 5641 io->ipsec_out_tunnel = B_TRUE; 5642 /* XXX Do I need to fill in all of the goodies here? */ 5643 if (inner_ipv4) { 5644 io->ipsec_out_inaf = AF_INET; 5645 io->ipsec_out_insrc[0] = 5646 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v4; 5647 io->ipsec_out_indst[0] = 5648 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v4; 5649 } else { 5650 io->ipsec_out_inaf = AF_INET6; 5651 io->ipsec_out_insrc[0] = 5652 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[0]; 5653 io->ipsec_out_insrc[1] = 5654 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[1]; 5655 io->ipsec_out_insrc[2] = 5656 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[2]; 5657 io->ipsec_out_insrc[3] = 5658 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[3]; 5659 io->ipsec_out_indst[0] = 5660 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[0]; 5661 io->ipsec_out_indst[1] = 5662 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[1]; 5663 io->ipsec_out_indst[2] = 5664 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[2]; 5665 io->ipsec_out_indst[3] = 5666 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[3]; 5667 } 5668 io->ipsec_out_insrcpfx = pol->ipsp_sel->ipsl_key.ipsl_local_pfxlen; 5669 io->ipsec_out_indstpfx = pol->ipsp_sel->ipsl_key.ipsl_remote_pfxlen; 5670 /* NOTE: These are used for transport mode too. */ 5671 io->ipsec_out_src_port = pol->ipsp_sel->ipsl_key.ipsl_lport; 5672 io->ipsec_out_dst_port = pol->ipsp_sel->ipsl_key.ipsl_rport; 5673 io->ipsec_out_proto = pol->ipsp_sel->ipsl_key.ipsl_proto; 5674 5675 /* 5676 * The mp pointer still valid 5677 * Add ipsec_out to each fragment. 5678 * The fragment head already has one 5679 */ 5680 nmp = mp->b_next; 5681 mp->b_next = NULL; 5682 mp = nmp; 5683 ASSERT(ipsec_mp != NULL); 5684 while (mp != NULL) { 5685 nmp = mp->b_next; 5686 ipsec_mp->b_next = ipsec_out_tag(ipsec_mp_head, mp, ns); 5687 if (ipsec_mp->b_next == NULL) { 5688 ip_drop_packet_chain(ipsec_mp_head, B_FALSE, NULL, NULL, 5689 DROPPER(ipss, ipds_spd_nomem), 5690 &ipss->ipsec_spd_dropper); 5691 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 5692 DROPPER(ipss, ipds_spd_nomem), 5693 &ipss->ipsec_spd_dropper); 5694 return (NULL); 5695 } 5696 ipsec_mp = ipsec_mp->b_next; 5697 mp->b_next = NULL; 5698 mp = nmp; 5699 } 5700 return (ipsec_mp_head); 5701 } 5702 5703 /* 5704 * NOTE: The following releases pol's reference and 5705 * calls ip_drop_packet() for me on NULL returns. 5706 */ 5707 mblk_t * 5708 ipsec_check_ipsecin_policy_reasm(mblk_t *ipsec_mp, ipsec_policy_t *pol, 5709 ipha_t *inner_ipv4, ip6_t *inner_ipv6, uint64_t pkt_unique, netstack_t *ns) 5710 { 5711 /* Assume ipsec_mp is a chain of b_next-linked IPSEC_IN M_CTLs. */ 5712 mblk_t *data_chain = NULL, *data_tail = NULL; 5713 mblk_t *ii_next; 5714 5715 while (ipsec_mp != NULL) { 5716 ii_next = ipsec_mp->b_next; 5717 ipsec_mp->b_next = NULL; /* No tripping asserts. */ 5718 5719 /* 5720 * Need IPPOL_REFHOLD(pol) for extras because 5721 * ipsecin_policy does the refrele. 5722 */ 5723 IPPOL_REFHOLD(pol); 5724 5725 if (ipsec_check_ipsecin_policy(ipsec_mp, pol, inner_ipv4, 5726 inner_ipv6, pkt_unique, ns) != NULL) { 5727 if (data_tail == NULL) { 5728 /* First one */ 5729 data_chain = data_tail = ipsec_mp->b_cont; 5730 } else { 5731 data_tail->b_next = ipsec_mp->b_cont; 5732 data_tail = data_tail->b_next; 5733 } 5734 freeb(ipsec_mp); 5735 } else { 5736 /* 5737 * ipsec_check_ipsecin_policy() freed ipsec_mp 5738 * already. Need to get rid of any extra pol 5739 * references, and any remaining bits as well. 5740 */ 5741 IPPOL_REFRELE(pol, ns); 5742 ipsec_freemsg_chain(data_chain); 5743 ipsec_freemsg_chain(ii_next); /* ipdrop stats? */ 5744 return (NULL); 5745 } 5746 ipsec_mp = ii_next; 5747 } 5748 /* 5749 * One last release because either the loop bumped it up, or we never 5750 * called ipsec_check_ipsecin_policy(). 5751 */ 5752 IPPOL_REFRELE(pol, ns); 5753 5754 /* data_chain is ready for return to tun module. */ 5755 return (data_chain); 5756 } 5757 5758 5759 /* 5760 * Returns B_TRUE if the inbound packet passed an IPsec policy check. Returns 5761 * B_FALSE if it failed or if it is a fragment needing its friends before a 5762 * policy check can be performed. 5763 * 5764 * Expects a non-NULL *data_mp, an optional ipsec_mp, and a non-NULL polhead. 5765 * data_mp may be reassigned with a b_next chain of packets if fragments 5766 * neeeded to be collected for a proper policy check. 5767 * 5768 * Always frees ipsec_mp, but only frees data_mp if returns B_FALSE. This 5769 * function calls ip_drop_packet() on data_mp if need be. 5770 * 5771 * NOTE: outer_hdr_len is signed. If it's a negative value, the caller 5772 * is inspecting an ICMP packet. 5773 */ 5774 boolean_t 5775 ipsec_tun_inbound(mblk_t *ipsec_mp, mblk_t **data_mp, ipsec_tun_pol_t *itp, 5776 ipha_t *inner_ipv4, ip6_t *inner_ipv6, ipha_t *outer_ipv4, 5777 ip6_t *outer_ipv6, int outer_hdr_len, netstack_t *ns) 5778 { 5779 ipsec_policy_head_t *polhead; 5780 ipsec_selector_t sel; 5781 mblk_t *message = (ipsec_mp == NULL) ? *data_mp : ipsec_mp; 5782 ipsec_policy_t *pol; 5783 uint16_t tmpport; 5784 selret_t rc; 5785 boolean_t retval, port_policy_present, is_icmp, global_present; 5786 in6_addr_t tmpaddr; 5787 ipaddr_t tmp4; 5788 ipsec_stack_t *ipss = ns->netstack_ipsec; 5789 uint8_t flags, *holder, *outer_hdr; 5790 5791 sel.ips_is_icmp_inv_acq = 0; 5792 5793 if (outer_ipv4 != NULL) { 5794 ASSERT(outer_ipv6 == NULL); 5795 outer_hdr = (uint8_t *)outer_ipv4; 5796 global_present = ipss->ipsec_inbound_v4_policy_present; 5797 } else { 5798 outer_hdr = (uint8_t *)outer_ipv6; 5799 global_present = ipss->ipsec_inbound_v6_policy_present; 5800 } 5801 ASSERT(outer_hdr != NULL); 5802 5803 ASSERT(inner_ipv4 != NULL && inner_ipv6 == NULL || 5804 inner_ipv4 == NULL && inner_ipv6 != NULL); 5805 ASSERT(message == *data_mp || message->b_cont == *data_mp); 5806 5807 if (outer_hdr_len < 0) { 5808 outer_hdr_len = (-outer_hdr_len); 5809 is_icmp = B_TRUE; 5810 } else { 5811 is_icmp = B_FALSE; 5812 } 5813 5814 if (itp != NULL && (itp->itp_flags & ITPF_P_ACTIVE)) { 5815 polhead = itp->itp_policy; 5816 /* 5817 * We need to perform full Tunnel-Mode enforcement, 5818 * and we need to have inner-header data for such enforcement. 5819 * 5820 * See ipsec_init_inbound_sel() for the 0x80000000 on inbound 5821 * and on return. 5822 */ 5823 5824 port_policy_present = ((itp->itp_flags & 5825 ITPF_P_PER_PORT_SECURITY) ? B_TRUE : B_FALSE); 5826 flags = ((port_policy_present ? SEL_PORT_POLICY : SEL_NONE) | 5827 (is_icmp ? SEL_IS_ICMP : SEL_NONE) | SEL_TUNNEL_MODE); 5828 5829 rc = ipsec_init_inbound_sel(&sel, *data_mp, inner_ipv4, 5830 inner_ipv6, flags); 5831 5832 switch (rc) { 5833 case SELRET_NOMEM: 5834 ip_drop_packet(message, B_TRUE, NULL, NULL, 5835 DROPPER(ipss, ipds_spd_nomem), 5836 &ipss->ipsec_spd_dropper); 5837 return (B_FALSE); 5838 case SELRET_TUNFRAG: 5839 /* 5840 * At this point, if we're cleartext, we don't want 5841 * to go there. 5842 */ 5843 if (ipsec_mp == NULL) { 5844 ip_drop_packet(*data_mp, B_TRUE, NULL, NULL, 5845 DROPPER(ipss, ipds_spd_got_clear), 5846 &ipss->ipsec_spd_dropper); 5847 *data_mp = NULL; 5848 return (B_FALSE); 5849 } 5850 ASSERT(((ipsec_in_t *)ipsec_mp->b_rptr)-> 5851 ipsec_in_secure); 5852 message = ipsec_fragcache_add(&itp->itp_fragcache, 5853 ipsec_mp, *data_mp, outer_hdr_len, ipss); 5854 5855 if (message == NULL) { 5856 /* 5857 * Data is cached, fragment chain is not 5858 * complete. I consume ipsec_mp and data_mp 5859 */ 5860 return (B_FALSE); 5861 } 5862 5863 /* 5864 * If we get here, we have a full fragment chain. 5865 * Reacquire headers and selectors from first fragment. 5866 */ 5867 if (inner_ipv4 != NULL) { 5868 inner_ipv4 = (ipha_t *)message->b_cont->b_rptr; 5869 ASSERT(message->b_cont->b_wptr - 5870 message->b_cont->b_rptr > sizeof (ipha_t)); 5871 } else { 5872 inner_ipv6 = (ip6_t *)message->b_cont->b_rptr; 5873 ASSERT(message->b_cont->b_wptr - 5874 message->b_cont->b_rptr > sizeof (ip6_t)); 5875 } 5876 /* Use SEL_NONE so we always get ports! */ 5877 rc = ipsec_init_inbound_sel(&sel, message->b_cont, 5878 inner_ipv4, inner_ipv6, SEL_NONE); 5879 switch (rc) { 5880 case SELRET_SUCCESS: 5881 /* 5882 * Get to same place as first caller's 5883 * SELRET_SUCCESS case. 5884 */ 5885 break; 5886 case SELRET_NOMEM: 5887 ip_drop_packet_chain(message, B_TRUE, 5888 NULL, NULL, 5889 DROPPER(ipss, ipds_spd_nomem), 5890 &ipss->ipsec_spd_dropper); 5891 return (B_FALSE); 5892 case SELRET_BADPKT: 5893 ip_drop_packet_chain(message, B_TRUE, 5894 NULL, NULL, 5895 DROPPER(ipss, ipds_spd_malformed_frag), 5896 &ipss->ipsec_spd_dropper); 5897 return (B_FALSE); 5898 case SELRET_TUNFRAG: 5899 cmn_err(CE_WARN, "(TUNFRAG on 2nd call...)"); 5900 /* FALLTHRU */ 5901 default: 5902 cmn_err(CE_WARN, "ipsec_init_inbound_sel(mark2)" 5903 " returns bizarro 0x%x", rc); 5904 /* Guaranteed panic! */ 5905 ASSERT(rc == SELRET_NOMEM); 5906 return (B_FALSE); 5907 } 5908 /* FALLTHRU */ 5909 case SELRET_SUCCESS: 5910 /* 5911 * Common case: 5912 * No per-port policy or a non-fragment. Keep going. 5913 */ 5914 break; 5915 case SELRET_BADPKT: 5916 /* 5917 * We may receive ICMP (with IPv6 inner) packets that 5918 * trigger this return value. Send 'em in for 5919 * enforcement checking. 5920 */ 5921 cmn_err(CE_NOTE, "ipsec_tun_inbound(): " 5922 "sending 'bad packet' in for enforcement"); 5923 break; 5924 default: 5925 cmn_err(CE_WARN, 5926 "ipsec_init_inbound_sel() returns bizarro 0x%x", 5927 rc); 5928 ASSERT(rc == SELRET_NOMEM); /* Guaranteed panic! */ 5929 return (B_FALSE); 5930 } 5931 5932 if (is_icmp) { 5933 /* 5934 * Swap local/remote because this is an ICMP packet. 5935 */ 5936 tmpaddr = sel.ips_local_addr_v6; 5937 sel.ips_local_addr_v6 = sel.ips_remote_addr_v6; 5938 sel.ips_remote_addr_v6 = tmpaddr; 5939 tmpport = sel.ips_local_port; 5940 sel.ips_local_port = sel.ips_remote_port; 5941 sel.ips_remote_port = tmpport; 5942 } 5943 5944 /* find_policy_head() */ 5945 rw_enter(&polhead->iph_lock, RW_READER); 5946 pol = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_INBOUND, 5947 &sel, ns); 5948 rw_exit(&polhead->iph_lock); 5949 if (pol != NULL) { 5950 if (ipsec_mp == NULL || 5951 !((ipsec_in_t *)ipsec_mp->b_rptr)-> 5952 ipsec_in_secure) { 5953 retval = pol->ipsp_act->ipa_allow_clear; 5954 if (!retval) { 5955 /* 5956 * XXX should never get here with 5957 * tunnel reassembled fragments? 5958 */ 5959 ASSERT(message->b_next == NULL); 5960 ip_drop_packet(message, B_TRUE, NULL, 5961 NULL, 5962 DROPPER(ipss, ipds_spd_got_clear), 5963 &ipss->ipsec_spd_dropper); 5964 } else if (ipsec_mp != NULL) { 5965 freeb(ipsec_mp); 5966 } 5967 5968 IPPOL_REFRELE(pol, ns); 5969 return (retval); 5970 } 5971 /* 5972 * NOTE: The following releases pol's reference and 5973 * calls ip_drop_packet() for me on NULL returns. 5974 * 5975 * "sel" is still good here, so let's use it! 5976 */ 5977 *data_mp = ipsec_check_ipsecin_policy_reasm(message, 5978 pol, inner_ipv4, inner_ipv6, SA_UNIQUE_ID( 5979 sel.ips_remote_port, sel.ips_local_port, 5980 (inner_ipv4 == NULL) ? IPPROTO_IPV6 : 5981 IPPROTO_ENCAP, sel.ips_protocol), ns); 5982 return (*data_mp != NULL); 5983 } 5984 5985 /* 5986 * Else fallthru and check the global policy on the outer 5987 * header(s) if this tunnel is an old-style transport-mode 5988 * one. Drop the packet explicitly (no policy entry) for 5989 * a new-style tunnel-mode tunnel. 5990 */ 5991 if ((itp->itp_flags & ITPF_P_TUNNEL) && !is_icmp) { 5992 ip_drop_packet_chain(message, B_TRUE, NULL, 5993 NULL, 5994 DROPPER(ipss, ipds_spd_explicit), 5995 &ipss->ipsec_spd_dropper); 5996 return (B_FALSE); 5997 } 5998 } 5999 6000 /* 6001 * NOTE: If we reach here, we will not have packet chains from 6002 * fragcache_add(), because the only way I get chains is on a 6003 * tunnel-mode tunnel, which either returns with a pass, or gets 6004 * hit by the ip_drop_packet_chain() call right above here. 6005 */ 6006 6007 /* If no per-tunnel security, check global policy now. */ 6008 if (ipsec_mp != NULL && !global_present) { 6009 if (((ipsec_in_t *)(ipsec_mp->b_rptr))-> 6010 ipsec_in_icmp_loopback) { 6011 /* 6012 * This is an ICMP message with an ipsec_mp 6013 * attached. We should accept it. 6014 */ 6015 if (ipsec_mp != NULL) 6016 freeb(ipsec_mp); 6017 return (B_TRUE); 6018 } 6019 6020 ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL, 6021 DROPPER(ipss, ipds_spd_got_secure), 6022 &ipss->ipsec_spd_dropper); 6023 return (B_FALSE); 6024 } 6025 6026 /* 6027 * The following assertion is valid because only the tun module alters 6028 * the mblk chain - stripping the outer header by advancing mp->b_rptr. 6029 */ 6030 ASSERT(is_icmp || 6031 ((*data_mp)->b_datap->db_base <= outer_hdr && 6032 outer_hdr < (*data_mp)->b_rptr)); 6033 holder = (*data_mp)->b_rptr; 6034 (*data_mp)->b_rptr = outer_hdr; 6035 6036 if (is_icmp) { 6037 /* 6038 * For ICMP packets, "outer_ipvN" is set to the outer header 6039 * that is *INSIDE* the ICMP payload. For global policy 6040 * checking, we need to reverse src/dst on the payload in 6041 * order to construct selectors appropriately. See "ripha" 6042 * constructions in ip.c. To avoid a bug like 6478464 (see 6043 * earlier in this file), we will actually exchange src/dst 6044 * in the packet, and reverse if after the call to 6045 * ipsec_check_global_policy(). 6046 */ 6047 if (outer_ipv4 != NULL) { 6048 tmp4 = outer_ipv4->ipha_src; 6049 outer_ipv4->ipha_src = outer_ipv4->ipha_dst; 6050 outer_ipv4->ipha_dst = tmp4; 6051 } else { 6052 ASSERT(outer_ipv6 != NULL); 6053 tmpaddr = outer_ipv6->ip6_src; 6054 outer_ipv6->ip6_src = outer_ipv6->ip6_dst; 6055 outer_ipv6->ip6_dst = tmpaddr; 6056 } 6057 } 6058 6059 /* NOTE: Frees message if it returns NULL. */ 6060 if (ipsec_check_global_policy(message, NULL, outer_ipv4, outer_ipv6, 6061 (ipsec_mp != NULL), ns) == NULL) { 6062 return (B_FALSE); 6063 } 6064 6065 if (is_icmp) { 6066 /* Set things back to normal. */ 6067 if (outer_ipv4 != NULL) { 6068 tmp4 = outer_ipv4->ipha_src; 6069 outer_ipv4->ipha_src = outer_ipv4->ipha_dst; 6070 outer_ipv4->ipha_dst = tmp4; 6071 } else { 6072 /* No need for ASSERT()s now. */ 6073 tmpaddr = outer_ipv6->ip6_src; 6074 outer_ipv6->ip6_src = outer_ipv6->ip6_dst; 6075 outer_ipv6->ip6_dst = tmpaddr; 6076 } 6077 } 6078 6079 (*data_mp)->b_rptr = holder; 6080 6081 if (ipsec_mp != NULL) 6082 freeb(ipsec_mp); 6083 6084 /* 6085 * At this point, we pretend it's a cleartext accepted 6086 * packet. 6087 */ 6088 return (B_TRUE); 6089 } 6090 6091 /* 6092 * AVL comparison routine for our list of tunnel polheads. 6093 */ 6094 static int 6095 tunnel_compare(const void *arg1, const void *arg2) 6096 { 6097 ipsec_tun_pol_t *left, *right; 6098 int rc; 6099 6100 left = (ipsec_tun_pol_t *)arg1; 6101 right = (ipsec_tun_pol_t *)arg2; 6102 6103 rc = strncmp(left->itp_name, right->itp_name, LIFNAMSIZ); 6104 return (rc == 0 ? rc : (rc > 0 ? 1 : -1)); 6105 } 6106 6107 /* 6108 * Free a tunnel policy node. 6109 */ 6110 void 6111 itp_free(ipsec_tun_pol_t *node, netstack_t *ns) 6112 { 6113 IPPH_REFRELE(node->itp_policy, ns); 6114 IPPH_REFRELE(node->itp_inactive, ns); 6115 mutex_destroy(&node->itp_lock); 6116 kmem_free(node, sizeof (*node)); 6117 } 6118 6119 void 6120 itp_unlink(ipsec_tun_pol_t *node, netstack_t *ns) 6121 { 6122 ipsec_stack_t *ipss = ns->netstack_ipsec; 6123 6124 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_WRITER); 6125 ipss->ipsec_tunnel_policy_gen++; 6126 ipsec_fragcache_uninit(&node->itp_fragcache); 6127 avl_remove(&ipss->ipsec_tunnel_policies, node); 6128 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6129 ITP_REFRELE(node, ns); 6130 } 6131 6132 /* 6133 * Public interface to look up a tunnel security policy by name. Used by 6134 * spdsock mostly. Returns "node" with a bumped refcnt. 6135 */ 6136 ipsec_tun_pol_t * 6137 get_tunnel_policy(char *name, netstack_t *ns) 6138 { 6139 ipsec_tun_pol_t *node, lookup; 6140 ipsec_stack_t *ipss = ns->netstack_ipsec; 6141 6142 (void) strncpy(lookup.itp_name, name, LIFNAMSIZ); 6143 6144 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_READER); 6145 node = (ipsec_tun_pol_t *)avl_find(&ipss->ipsec_tunnel_policies, 6146 &lookup, NULL); 6147 if (node != NULL) { 6148 ITP_REFHOLD(node); 6149 } 6150 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6151 6152 return (node); 6153 } 6154 6155 /* 6156 * Public interface to walk all tunnel security polcies. Useful for spdsock 6157 * DUMP operations. iterator() will not consume a reference. 6158 */ 6159 void 6160 itp_walk(void (*iterator)(ipsec_tun_pol_t *, void *, netstack_t *), 6161 void *arg, netstack_t *ns) 6162 { 6163 ipsec_tun_pol_t *node; 6164 ipsec_stack_t *ipss = ns->netstack_ipsec; 6165 6166 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_READER); 6167 for (node = avl_first(&ipss->ipsec_tunnel_policies); node != NULL; 6168 node = AVL_NEXT(&ipss->ipsec_tunnel_policies, node)) { 6169 iterator(node, arg, ns); 6170 } 6171 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6172 } 6173 6174 /* 6175 * Initialize policy head. This can only fail if there's a memory problem. 6176 */ 6177 static boolean_t 6178 tunnel_polhead_init(ipsec_policy_head_t *iph, netstack_t *ns) 6179 { 6180 ipsec_stack_t *ipss = ns->netstack_ipsec; 6181 6182 rw_init(&iph->iph_lock, NULL, RW_DEFAULT, NULL); 6183 iph->iph_refs = 1; 6184 iph->iph_gen = 0; 6185 if (ipsec_alloc_table(iph, ipss->ipsec_tun_spd_hashsize, 6186 KM_SLEEP, B_FALSE, ns) != 0) { 6187 ipsec_polhead_free_table(iph); 6188 return (B_FALSE); 6189 } 6190 ipsec_polhead_init(iph, ipss->ipsec_tun_spd_hashsize); 6191 return (B_TRUE); 6192 } 6193 6194 /* 6195 * Create a tunnel policy node with "name". Set errno with 6196 * ENOMEM if there's a memory problem, and EEXIST if there's an existing 6197 * node. 6198 */ 6199 ipsec_tun_pol_t * 6200 create_tunnel_policy(char *name, int *errno, uint64_t *gen, netstack_t *ns) 6201 { 6202 ipsec_tun_pol_t *newbie, *existing; 6203 avl_index_t where; 6204 ipsec_stack_t *ipss = ns->netstack_ipsec; 6205 6206 newbie = kmem_zalloc(sizeof (*newbie), KM_NOSLEEP); 6207 if (newbie == NULL) { 6208 *errno = ENOMEM; 6209 return (NULL); 6210 } 6211 if (!ipsec_fragcache_init(&newbie->itp_fragcache)) { 6212 kmem_free(newbie, sizeof (*newbie)); 6213 *errno = ENOMEM; 6214 return (NULL); 6215 } 6216 6217 (void) strncpy(newbie->itp_name, name, LIFNAMSIZ); 6218 6219 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_WRITER); 6220 existing = (ipsec_tun_pol_t *)avl_find(&ipss->ipsec_tunnel_policies, 6221 newbie, &where); 6222 if (existing != NULL) { 6223 itp_free(newbie, ns); 6224 *errno = EEXIST; 6225 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6226 return (NULL); 6227 } 6228 ipss->ipsec_tunnel_policy_gen++; 6229 *gen = ipss->ipsec_tunnel_policy_gen; 6230 newbie->itp_refcnt = 2; /* One for the caller, one for the tree. */ 6231 newbie->itp_next_policy_index = 1; 6232 avl_insert(&ipss->ipsec_tunnel_policies, newbie, where); 6233 mutex_init(&newbie->itp_lock, NULL, MUTEX_DEFAULT, NULL); 6234 newbie->itp_policy = kmem_zalloc(sizeof (ipsec_policy_head_t), 6235 KM_NOSLEEP); 6236 if (newbie->itp_policy == NULL) 6237 goto nomem; 6238 newbie->itp_inactive = kmem_zalloc(sizeof (ipsec_policy_head_t), 6239 KM_NOSLEEP); 6240 if (newbie->itp_inactive == NULL) { 6241 kmem_free(newbie->itp_policy, sizeof (ipsec_policy_head_t)); 6242 goto nomem; 6243 } 6244 6245 if (!tunnel_polhead_init(newbie->itp_policy, ns)) { 6246 kmem_free(newbie->itp_policy, sizeof (ipsec_policy_head_t)); 6247 kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t)); 6248 goto nomem; 6249 } else if (!tunnel_polhead_init(newbie->itp_inactive, ns)) { 6250 IPPH_REFRELE(newbie->itp_policy, ns); 6251 kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t)); 6252 goto nomem; 6253 } 6254 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6255 6256 return (newbie); 6257 nomem: 6258 *errno = ENOMEM; 6259 kmem_free(newbie, sizeof (*newbie)); 6260 return (NULL); 6261 } 6262 6263 /* 6264 * We can't call the tun_t lookup function until tun is 6265 * loaded, so create a dummy function to avoid symbol 6266 * lookup errors on boot. 6267 */ 6268 /* ARGSUSED */ 6269 ipsec_tun_pol_t * 6270 itp_get_byaddr_dummy(uint32_t *laddr, uint32_t *faddr, int af, netstack_t *ns) 6271 { 6272 return (NULL); /* Always return NULL. */ 6273 } 6274 6275 /* 6276 * Frag cache code, based on SunScreen 3.2 source 6277 * screen/kernel/common/screen_fragcache.c 6278 */ 6279 6280 #define IPSEC_FRAG_TTL_MAX 5 6281 /* 6282 * Note that the following parameters create 256 hash buckets 6283 * with 1024 free entries to be distributed. Things are cleaned 6284 * periodically and are attempted to be cleaned when there is no 6285 * free space, but this system errs on the side of dropping packets 6286 * over creating memory exhaustion. We may decide to make hash 6287 * factor a tunable if this proves to be a bad decision. 6288 */ 6289 #define IPSEC_FRAG_HASH_SLOTS (1<<8) 6290 #define IPSEC_FRAG_HASH_FACTOR 4 6291 #define IPSEC_FRAG_HASH_SIZE (IPSEC_FRAG_HASH_SLOTS * IPSEC_FRAG_HASH_FACTOR) 6292 6293 #define IPSEC_FRAG_HASH_MASK (IPSEC_FRAG_HASH_SLOTS - 1) 6294 #define IPSEC_FRAG_HASH_FUNC(id) (((id) & IPSEC_FRAG_HASH_MASK) ^ \ 6295 (((id) / \ 6296 (ushort_t)IPSEC_FRAG_HASH_SLOTS) & \ 6297 IPSEC_FRAG_HASH_MASK)) 6298 6299 /* Maximum fragments per packet. 48 bytes payload x 1366 packets > 64KB */ 6300 #define IPSEC_MAX_FRAGS 1366 6301 6302 #define V4_FRAG_OFFSET(ipha) ((ntohs(ipha->ipha_fragment_offset_and_flags) & \ 6303 IPH_OFFSET) << 3) 6304 #define V4_MORE_FRAGS(ipha) (ntohs(ipha->ipha_fragment_offset_and_flags) & \ 6305 IPH_MF) 6306 6307 /* 6308 * Initialize an ipsec fragcache instance. 6309 * Returns B_FALSE if memory allocation fails. 6310 */ 6311 boolean_t 6312 ipsec_fragcache_init(ipsec_fragcache_t *frag) 6313 { 6314 ipsec_fragcache_entry_t *ftemp; 6315 int i; 6316 6317 mutex_init(&frag->itpf_lock, NULL, MUTEX_DEFAULT, NULL); 6318 frag->itpf_ptr = (ipsec_fragcache_entry_t **) 6319 kmem_zalloc( 6320 sizeof (ipsec_fragcache_entry_t *) * 6321 IPSEC_FRAG_HASH_SLOTS, KM_NOSLEEP); 6322 if (frag->itpf_ptr == NULL) 6323 return (B_FALSE); 6324 6325 ftemp = (ipsec_fragcache_entry_t *) 6326 kmem_zalloc(sizeof (ipsec_fragcache_entry_t) * 6327 IPSEC_FRAG_HASH_SIZE, KM_NOSLEEP); 6328 if (ftemp == NULL) { 6329 kmem_free(frag->itpf_ptr, 6330 sizeof (ipsec_fragcache_entry_t *) * 6331 IPSEC_FRAG_HASH_SLOTS); 6332 return (B_FALSE); 6333 } 6334 6335 frag->itpf_freelist = NULL; 6336 6337 for (i = 0; i < IPSEC_FRAG_HASH_SIZE; i++) { 6338 ftemp->itpfe_next = frag->itpf_freelist; 6339 frag->itpf_freelist = ftemp; 6340 ftemp++; 6341 } 6342 6343 frag->itpf_expire_hint = 0; 6344 6345 return (B_TRUE); 6346 } 6347 6348 void 6349 ipsec_fragcache_uninit(ipsec_fragcache_t *frag) 6350 { 6351 ipsec_fragcache_entry_t *fep; 6352 int i; 6353 6354 mutex_enter(&frag->itpf_lock); 6355 if (frag->itpf_ptr) { 6356 /* Delete any existing fragcache entry chains */ 6357 for (i = 0; i < IPSEC_FRAG_HASH_SLOTS; i++) { 6358 fep = (frag->itpf_ptr)[i]; 6359 while (fep != NULL) { 6360 /* Returned fep is next in chain or NULL */ 6361 fep = fragcache_delentry(i, fep, frag); 6362 } 6363 } 6364 /* 6365 * Chase the pointers back to the beginning 6366 * of the memory allocation and then 6367 * get rid of the allocated freelist 6368 */ 6369 while (frag->itpf_freelist->itpfe_next != NULL) 6370 frag->itpf_freelist = frag->itpf_freelist->itpfe_next; 6371 /* 6372 * XXX - If we ever dynamically grow the freelist 6373 * then we'll have to free entries individually 6374 * or determine how many entries or chunks we have 6375 * grown since the initial allocation. 6376 */ 6377 kmem_free(frag->itpf_freelist, 6378 sizeof (ipsec_fragcache_entry_t) * 6379 IPSEC_FRAG_HASH_SIZE); 6380 /* Free the fragcache structure */ 6381 kmem_free(frag->itpf_ptr, 6382 sizeof (ipsec_fragcache_entry_t *) * 6383 IPSEC_FRAG_HASH_SLOTS); 6384 } 6385 mutex_exit(&frag->itpf_lock); 6386 mutex_destroy(&frag->itpf_lock); 6387 } 6388 6389 /* 6390 * Add a fragment to the fragment cache. Consumes mp if NULL is returned. 6391 * Returns mp if a whole fragment has been assembled, NULL otherwise 6392 */ 6393 6394 mblk_t * 6395 ipsec_fragcache_add(ipsec_fragcache_t *frag, mblk_t *ipsec_mp, mblk_t *mp, 6396 int outer_hdr_len, ipsec_stack_t *ipss) 6397 { 6398 boolean_t is_v4; 6399 time_t itpf_time; 6400 ipha_t *iph; 6401 ipha_t *oiph; 6402 ip6_t *ip6h = NULL; 6403 uint8_t v6_proto; 6404 uint8_t *v6_proto_p; 6405 uint16_t ip6_hdr_length; 6406 ip6_pkt_t ipp; 6407 ip6_frag_t *fraghdr; 6408 ipsec_fragcache_entry_t *fep; 6409 int i; 6410 mblk_t *nmp, *prevmp, *spare_mp = NULL; 6411 int firstbyte, lastbyte; 6412 int offset; 6413 int last; 6414 boolean_t inbound = (ipsec_mp != NULL); 6415 mblk_t *first_mp = inbound ? ipsec_mp : mp; 6416 6417 mutex_enter(&frag->itpf_lock); 6418 6419 oiph = (ipha_t *)mp->b_rptr; 6420 iph = (ipha_t *)(mp->b_rptr + outer_hdr_len); 6421 if (IPH_HDR_VERSION(iph) == IPV4_VERSION) { 6422 is_v4 = B_TRUE; 6423 } else { 6424 ASSERT(IPH_HDR_VERSION(iph) == IPV6_VERSION); 6425 if ((spare_mp = msgpullup(mp, -1)) == NULL) { 6426 mutex_exit(&frag->itpf_lock); 6427 ip_drop_packet(first_mp, inbound, NULL, NULL, 6428 DROPPER(ipss, ipds_spd_nomem), 6429 &ipss->ipsec_spd_dropper); 6430 return (NULL); 6431 } 6432 ip6h = (ip6_t *)(spare_mp->b_rptr + outer_hdr_len); 6433 6434 if (!ip_hdr_length_nexthdr_v6(spare_mp, ip6h, &ip6_hdr_length, 6435 &v6_proto_p)) { 6436 /* 6437 * Find upper layer protocol. 6438 * If it fails we have a malformed packet 6439 */ 6440 mutex_exit(&frag->itpf_lock); 6441 ip_drop_packet(first_mp, inbound, NULL, NULL, 6442 DROPPER(ipss, ipds_spd_malformed_packet), 6443 &ipss->ipsec_spd_dropper); 6444 freemsg(spare_mp); 6445 return (NULL); 6446 } else { 6447 v6_proto = *v6_proto_p; 6448 } 6449 6450 6451 bzero(&ipp, sizeof (ipp)); 6452 (void) ip_find_hdr_v6(spare_mp, ip6h, &ipp, NULL); 6453 if (!(ipp.ipp_fields & IPPF_FRAGHDR)) { 6454 /* 6455 * We think this is a fragment, but didn't find 6456 * a fragment header. Something is wrong. 6457 */ 6458 mutex_exit(&frag->itpf_lock); 6459 ip_drop_packet(first_mp, inbound, NULL, NULL, 6460 DROPPER(ipss, ipds_spd_malformed_frag), 6461 &ipss->ipsec_spd_dropper); 6462 freemsg(spare_mp); 6463 return (NULL); 6464 } 6465 fraghdr = ipp.ipp_fraghdr; 6466 is_v4 = B_FALSE; 6467 } 6468 6469 /* Anything to cleanup? */ 6470 6471 /* 6472 * This cleanup call could be put in a timer loop 6473 * but it may actually be just as reasonable a decision to 6474 * leave it here. The disadvantage is this only gets called when 6475 * frags are added. The advantage is that it is not 6476 * susceptible to race conditions like a time-based cleanup 6477 * may be. 6478 */ 6479 itpf_time = gethrestime_sec(); 6480 if (itpf_time >= frag->itpf_expire_hint) 6481 ipsec_fragcache_clean(frag); 6482 6483 /* Lookup to see if there is an existing entry */ 6484 6485 if (is_v4) 6486 i = IPSEC_FRAG_HASH_FUNC(iph->ipha_ident); 6487 else 6488 i = IPSEC_FRAG_HASH_FUNC(fraghdr->ip6f_ident); 6489 6490 for (fep = (frag->itpf_ptr)[i]; fep; fep = fep->itpfe_next) { 6491 if (is_v4) { 6492 ASSERT(iph != NULL); 6493 if ((fep->itpfe_id == iph->ipha_ident) && 6494 (fep->itpfe_src == iph->ipha_src) && 6495 (fep->itpfe_dst == iph->ipha_dst) && 6496 (fep->itpfe_proto == iph->ipha_protocol)) 6497 break; 6498 } else { 6499 ASSERT(fraghdr != NULL); 6500 ASSERT(fep != NULL); 6501 if ((fep->itpfe_id == fraghdr->ip6f_ident) && 6502 IN6_ARE_ADDR_EQUAL(&fep->itpfe_src6, 6503 &ip6h->ip6_src) && 6504 IN6_ARE_ADDR_EQUAL(&fep->itpfe_dst6, 6505 &ip6h->ip6_dst) && (fep->itpfe_proto == v6_proto)) 6506 break; 6507 } 6508 } 6509 6510 if (is_v4) { 6511 firstbyte = V4_FRAG_OFFSET(iph); 6512 lastbyte = firstbyte + ntohs(iph->ipha_length) - 6513 IPH_HDR_LENGTH(iph); 6514 last = (V4_MORE_FRAGS(iph) == 0); 6515 #ifdef FRAGCACHE_DEBUG 6516 cmn_err(CE_WARN, "V4 fragcache: firstbyte = %d, lastbyte = %d, " 6517 "last = %d, id = %d\n", firstbyte, lastbyte, last, 6518 iph->ipha_ident); 6519 #endif 6520 } else { 6521 firstbyte = ntohs(fraghdr->ip6f_offlg & IP6F_OFF_MASK); 6522 lastbyte = firstbyte + ntohs(ip6h->ip6_plen) + 6523 sizeof (ip6_t) - ip6_hdr_length; 6524 last = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG) == 0; 6525 #ifdef FRAGCACHE_DEBUG 6526 cmn_err(CE_WARN, "V6 fragcache: firstbyte = %d, lastbyte = %d, " 6527 "last = %d, id = %d, fraghdr = %p, spare_mp = %p\n", 6528 firstbyte, lastbyte, last, fraghdr->ip6f_ident, 6529 fraghdr, spare_mp); 6530 #endif 6531 } 6532 6533 /* check for bogus fragments and delete the entry */ 6534 if (firstbyte > 0 && firstbyte <= 8) { 6535 if (fep != NULL) 6536 (void) fragcache_delentry(i, fep, frag); 6537 mutex_exit(&frag->itpf_lock); 6538 ip_drop_packet(first_mp, inbound, NULL, NULL, 6539 DROPPER(ipss, ipds_spd_malformed_frag), 6540 &ipss->ipsec_spd_dropper); 6541 freemsg(spare_mp); 6542 return (NULL); 6543 } 6544 6545 /* Not found, allocate a new entry */ 6546 if (fep == NULL) { 6547 if (frag->itpf_freelist == NULL) { 6548 /* see if there is some space */ 6549 ipsec_fragcache_clean(frag); 6550 if (frag->itpf_freelist == NULL) { 6551 mutex_exit(&frag->itpf_lock); 6552 ip_drop_packet(first_mp, inbound, NULL, NULL, 6553 DROPPER(ipss, ipds_spd_nomem), 6554 &ipss->ipsec_spd_dropper); 6555 freemsg(spare_mp); 6556 return (NULL); 6557 } 6558 } 6559 6560 fep = frag->itpf_freelist; 6561 frag->itpf_freelist = fep->itpfe_next; 6562 6563 if (is_v4) { 6564 bcopy((caddr_t)&iph->ipha_src, (caddr_t)&fep->itpfe_src, 6565 sizeof (struct in_addr)); 6566 bcopy((caddr_t)&iph->ipha_dst, (caddr_t)&fep->itpfe_dst, 6567 sizeof (struct in_addr)); 6568 fep->itpfe_id = iph->ipha_ident; 6569 fep->itpfe_proto = iph->ipha_protocol; 6570 i = IPSEC_FRAG_HASH_FUNC(fep->itpfe_id); 6571 } else { 6572 bcopy((in6_addr_t *)&ip6h->ip6_src, 6573 (in6_addr_t *)&fep->itpfe_src6, 6574 sizeof (struct in6_addr)); 6575 bcopy((in6_addr_t *)&ip6h->ip6_dst, 6576 (in6_addr_t *)&fep->itpfe_dst6, 6577 sizeof (struct in6_addr)); 6578 fep->itpfe_id = fraghdr->ip6f_ident; 6579 fep->itpfe_proto = v6_proto; 6580 i = IPSEC_FRAG_HASH_FUNC(fep->itpfe_id); 6581 } 6582 itpf_time = gethrestime_sec(); 6583 fep->itpfe_exp = itpf_time + IPSEC_FRAG_TTL_MAX + 1; 6584 fep->itpfe_last = 0; 6585 fep->itpfe_fraglist = NULL; 6586 fep->itpfe_depth = 0; 6587 fep->itpfe_next = (frag->itpf_ptr)[i]; 6588 (frag->itpf_ptr)[i] = fep; 6589 6590 if (frag->itpf_expire_hint > fep->itpfe_exp) 6591 frag->itpf_expire_hint = fep->itpfe_exp; 6592 6593 } 6594 freemsg(spare_mp); 6595 6596 /* Insert it in the frag list */ 6597 /* List is in order by starting offset of fragments */ 6598 6599 prevmp = NULL; 6600 for (nmp = fep->itpfe_fraglist; nmp; nmp = nmp->b_next) { 6601 ipha_t *niph; 6602 ipha_t *oniph; 6603 ip6_t *nip6h; 6604 ip6_pkt_t nipp; 6605 ip6_frag_t *nfraghdr; 6606 uint16_t nip6_hdr_length; 6607 uint8_t *nv6_proto_p; 6608 int nfirstbyte, nlastbyte; 6609 char *data, *ndata; 6610 mblk_t *nspare_mp = NULL; 6611 mblk_t *ndata_mp = (inbound ? nmp->b_cont : nmp); 6612 int hdr_len; 6613 6614 oniph = (ipha_t *)mp->b_rptr; 6615 nip6h = NULL; 6616 niph = NULL; 6617 6618 /* 6619 * Determine outer header type and length and set 6620 * pointers appropriately 6621 */ 6622 6623 if (IPH_HDR_VERSION(oniph) == IPV4_VERSION) { 6624 hdr_len = ((outer_hdr_len != 0) ? 6625 IPH_HDR_LENGTH(oiph) : 0); 6626 niph = (ipha_t *)(ndata_mp->b_rptr + hdr_len); 6627 } else { 6628 ASSERT(IPH_HDR_VERSION(oniph) == IPV6_VERSION); 6629 if ((nspare_mp = msgpullup(ndata_mp, -1)) == NULL) { 6630 mutex_exit(&frag->itpf_lock); 6631 ip_drop_packet_chain(nmp, inbound, NULL, NULL, 6632 DROPPER(ipss, ipds_spd_nomem), 6633 &ipss->ipsec_spd_dropper); 6634 return (NULL); 6635 } 6636 nip6h = (ip6_t *)nspare_mp->b_rptr; 6637 (void) ip_hdr_length_nexthdr_v6(nspare_mp, nip6h, 6638 &nip6_hdr_length, &v6_proto_p); 6639 hdr_len = ((outer_hdr_len != 0) ? nip6_hdr_length : 0); 6640 } 6641 6642 /* 6643 * Determine inner header type and length and set 6644 * pointers appropriately 6645 */ 6646 6647 if (is_v4) { 6648 if (niph == NULL) { 6649 /* Was v6 outer */ 6650 niph = (ipha_t *)(ndata_mp->b_rptr + hdr_len); 6651 } 6652 nfirstbyte = V4_FRAG_OFFSET(niph); 6653 nlastbyte = nfirstbyte + ntohs(niph->ipha_length) - 6654 IPH_HDR_LENGTH(niph); 6655 } else { 6656 if ((nspare_mp == NULL) && 6657 ((nspare_mp = msgpullup(ndata_mp, -1)) == NULL)) { 6658 mutex_exit(&frag->itpf_lock); 6659 ip_drop_packet_chain(nmp, inbound, NULL, NULL, 6660 DROPPER(ipss, ipds_spd_nomem), 6661 &ipss->ipsec_spd_dropper); 6662 return (NULL); 6663 } 6664 nip6h = (ip6_t *)(nspare_mp->b_rptr + hdr_len); 6665 if (!ip_hdr_length_nexthdr_v6(nspare_mp, nip6h, 6666 &nip6_hdr_length, &nv6_proto_p)) { 6667 mutex_exit(&frag->itpf_lock); 6668 ip_drop_packet_chain(nmp, inbound, NULL, NULL, 6669 DROPPER(ipss, ipds_spd_malformed_frag), 6670 &ipss->ipsec_spd_dropper); 6671 ipsec_freemsg_chain(nspare_mp); 6672 return (NULL); 6673 } 6674 bzero(&nipp, sizeof (nipp)); 6675 (void) ip_find_hdr_v6(nspare_mp, nip6h, &nipp, NULL); 6676 nfraghdr = nipp.ipp_fraghdr; 6677 nfirstbyte = ntohs(nfraghdr->ip6f_offlg & 6678 IP6F_OFF_MASK); 6679 nlastbyte = nfirstbyte + ntohs(nip6h->ip6_plen) + 6680 sizeof (ip6_t) - nip6_hdr_length; 6681 } 6682 ipsec_freemsg_chain(nspare_mp); 6683 6684 /* Check for overlapping fragments */ 6685 if (firstbyte >= nfirstbyte && firstbyte < nlastbyte) { 6686 /* 6687 * Overlap Check: 6688 * ~~~~--------- # Check if the newly 6689 * ~ ndata_mp| # received fragment 6690 * ~~~~--------- # overlaps with the 6691 * ---------~~~~~~ # current fragment. 6692 * | mp ~ 6693 * ---------~~~~~~ 6694 */ 6695 if (is_v4) { 6696 data = (char *)iph + IPH_HDR_LENGTH(iph) + 6697 firstbyte - nfirstbyte; 6698 ndata = (char *)niph + IPH_HDR_LENGTH(niph); 6699 } else { 6700 data = (char *)ip6h + 6701 nip6_hdr_length + firstbyte - 6702 nfirstbyte; 6703 ndata = (char *)nip6h + nip6_hdr_length; 6704 } 6705 if (bcmp(data, ndata, MIN(lastbyte, nlastbyte) 6706 - firstbyte)) { 6707 /* Overlapping data does not match */ 6708 (void) fragcache_delentry(i, fep, frag); 6709 mutex_exit(&frag->itpf_lock); 6710 ip_drop_packet(first_mp, inbound, NULL, NULL, 6711 DROPPER(ipss, ipds_spd_overlap_frag), 6712 &ipss->ipsec_spd_dropper); 6713 return (NULL); 6714 } 6715 /* Part of defense for jolt2.c fragmentation attack */ 6716 if (firstbyte >= nfirstbyte && lastbyte <= nlastbyte) { 6717 /* 6718 * Check for identical or subset fragments: 6719 * ---------- ~~~~--------~~~~~ 6720 * | nmp | or ~ nmp ~ 6721 * ---------- ~~~~--------~~~~~ 6722 * ---------- ------ 6723 * | mp | | mp | 6724 * ---------- ------ 6725 */ 6726 mutex_exit(&frag->itpf_lock); 6727 ip_drop_packet(first_mp, inbound, NULL, NULL, 6728 DROPPER(ipss, ipds_spd_evil_frag), 6729 &ipss->ipsec_spd_dropper); 6730 return (NULL); 6731 } 6732 6733 } 6734 6735 /* Correct location for this fragment? */ 6736 if (firstbyte <= nfirstbyte) { 6737 /* 6738 * Check if the tail end of the new fragment overlaps 6739 * with the head of the current fragment. 6740 * --------~~~~~~~ 6741 * | nmp ~ 6742 * --------~~~~~~~ 6743 * ~~~~~-------- 6744 * ~ mp | 6745 * ~~~~~-------- 6746 */ 6747 if (lastbyte > nfirstbyte) { 6748 /* Fragments overlap */ 6749 data = (char *)iph + IPH_HDR_LENGTH(iph) + 6750 firstbyte - nfirstbyte; 6751 ndata = (char *)niph + IPH_HDR_LENGTH(niph); 6752 if (is_v4) { 6753 data = (char *)iph + 6754 IPH_HDR_LENGTH(iph) + firstbyte - 6755 nfirstbyte; 6756 ndata = (char *)niph + 6757 IPH_HDR_LENGTH(niph); 6758 } else { 6759 data = (char *)ip6h + 6760 nip6_hdr_length + firstbyte - 6761 nfirstbyte; 6762 ndata = (char *)nip6h + nip6_hdr_length; 6763 } 6764 if (bcmp(data, ndata, MIN(lastbyte, nlastbyte) 6765 - nfirstbyte)) { 6766 /* Overlap mismatch */ 6767 (void) fragcache_delentry(i, fep, frag); 6768 mutex_exit(&frag->itpf_lock); 6769 ip_drop_packet(first_mp, inbound, NULL, 6770 NULL, 6771 DROPPER(ipss, ipds_spd_overlap_frag), 6772 &ipss->ipsec_spd_dropper); 6773 return (NULL); 6774 } 6775 } 6776 6777 /* 6778 * Fragment does not illegally overlap and can now 6779 * be inserted into the chain 6780 */ 6781 break; 6782 } 6783 6784 prevmp = nmp; 6785 } 6786 first_mp->b_next = nmp; 6787 6788 if (prevmp == NULL) { 6789 fep->itpfe_fraglist = first_mp; 6790 } else { 6791 prevmp->b_next = first_mp; 6792 } 6793 if (last) 6794 fep->itpfe_last = 1; 6795 6796 /* Part of defense for jolt2.c fragmentation attack */ 6797 if (++(fep->itpfe_depth) > IPSEC_MAX_FRAGS) { 6798 (void) fragcache_delentry(i, fep, frag); 6799 mutex_exit(&frag->itpf_lock); 6800 ip_drop_packet(first_mp, inbound, NULL, NULL, 6801 DROPPER(ipss, ipds_spd_max_frags), 6802 &ipss->ipsec_spd_dropper); 6803 return (NULL); 6804 } 6805 6806 /* Check for complete packet */ 6807 6808 if (!fep->itpfe_last) { 6809 mutex_exit(&frag->itpf_lock); 6810 #ifdef FRAGCACHE_DEBUG 6811 cmn_err(CE_WARN, "Fragment cached, not last.\n"); 6812 #endif 6813 return (NULL); 6814 } 6815 6816 #ifdef FRAGCACHE_DEBUG 6817 cmn_err(CE_WARN, "Last fragment cached.\n"); 6818 cmn_err(CE_WARN, "mp = %p, first_mp = %p.\n", mp, first_mp); 6819 #endif 6820 6821 offset = 0; 6822 for (mp = fep->itpfe_fraglist; mp; mp = mp->b_next) { 6823 mblk_t *data_mp = (inbound ? mp->b_cont : mp); 6824 int hdr_len; 6825 6826 oiph = (ipha_t *)data_mp->b_rptr; 6827 ip6h = NULL; 6828 iph = NULL; 6829 6830 spare_mp = NULL; 6831 if (IPH_HDR_VERSION(oiph) == IPV4_VERSION) { 6832 hdr_len = ((outer_hdr_len != 0) ? 6833 IPH_HDR_LENGTH(oiph) : 0); 6834 iph = (ipha_t *)(data_mp->b_rptr + hdr_len); 6835 } else { 6836 ASSERT(IPH_HDR_VERSION(oiph) == IPV6_VERSION); 6837 if ((spare_mp = msgpullup(data_mp, -1)) == NULL) { 6838 mutex_exit(&frag->itpf_lock); 6839 ip_drop_packet_chain(mp, inbound, NULL, NULL, 6840 DROPPER(ipss, ipds_spd_nomem), 6841 &ipss->ipsec_spd_dropper); 6842 return (NULL); 6843 } 6844 ip6h = (ip6_t *)spare_mp->b_rptr; 6845 (void) ip_hdr_length_nexthdr_v6(spare_mp, ip6h, 6846 &ip6_hdr_length, &v6_proto_p); 6847 hdr_len = ((outer_hdr_len != 0) ? ip6_hdr_length : 0); 6848 } 6849 6850 /* Calculate current fragment start/end */ 6851 if (is_v4) { 6852 if (iph == NULL) { 6853 /* Was v6 outer */ 6854 iph = (ipha_t *)(data_mp->b_rptr + hdr_len); 6855 } 6856 firstbyte = V4_FRAG_OFFSET(iph); 6857 lastbyte = firstbyte + ntohs(iph->ipha_length) - 6858 IPH_HDR_LENGTH(iph); 6859 } else { 6860 if ((spare_mp == NULL) && 6861 ((spare_mp = msgpullup(data_mp, -1)) == NULL)) { 6862 mutex_exit(&frag->itpf_lock); 6863 ip_drop_packet_chain(mp, inbound, NULL, NULL, 6864 DROPPER(ipss, ipds_spd_nomem), 6865 &ipss->ipsec_spd_dropper); 6866 return (NULL); 6867 } 6868 ip6h = (ip6_t *)(spare_mp->b_rptr + hdr_len); 6869 if (!ip_hdr_length_nexthdr_v6(spare_mp, ip6h, 6870 &ip6_hdr_length, &v6_proto_p)) { 6871 mutex_exit(&frag->itpf_lock); 6872 ip_drop_packet_chain(mp, inbound, NULL, NULL, 6873 DROPPER(ipss, ipds_spd_malformed_frag), 6874 &ipss->ipsec_spd_dropper); 6875 ipsec_freemsg_chain(spare_mp); 6876 return (NULL); 6877 } 6878 v6_proto = *v6_proto_p; 6879 bzero(&ipp, sizeof (ipp)); 6880 (void) ip_find_hdr_v6(spare_mp, ip6h, &ipp, NULL); 6881 fraghdr = ipp.ipp_fraghdr; 6882 firstbyte = ntohs(fraghdr->ip6f_offlg & 6883 IP6F_OFF_MASK); 6884 lastbyte = firstbyte + ntohs(ip6h->ip6_plen) + 6885 sizeof (ip6_t) - ip6_hdr_length; 6886 } 6887 6888 /* 6889 * If this fragment is greater than current offset, 6890 * we have a missing fragment so return NULL 6891 */ 6892 if (firstbyte > offset) { 6893 mutex_exit(&frag->itpf_lock); 6894 #ifdef FRAGCACHE_DEBUG 6895 /* 6896 * Note, this can happen when the last frag 6897 * gets sent through because it is smaller 6898 * than the MTU. It is not necessarily an 6899 * error condition. 6900 */ 6901 cmn_err(CE_WARN, "Frag greater than offset! : " 6902 "missing fragment: firstbyte = %d, offset = %d, " 6903 "mp = %p\n", firstbyte, offset, mp); 6904 #endif 6905 ipsec_freemsg_chain(spare_mp); 6906 return (NULL); 6907 } 6908 6909 /* 6910 * If we are at the last fragment, we have the complete 6911 * packet, so rechain things and return it to caller 6912 * for processing 6913 */ 6914 6915 if ((is_v4 && !V4_MORE_FRAGS(iph)) || 6916 (!is_v4 && !(fraghdr->ip6f_offlg & IP6F_MORE_FRAG))) { 6917 mp = fep->itpfe_fraglist; 6918 fep->itpfe_fraglist = NULL; 6919 (void) fragcache_delentry(i, fep, frag); 6920 mutex_exit(&frag->itpf_lock); 6921 6922 if ((is_v4 && (firstbyte + ntohs(iph->ipha_length) > 6923 65535)) || (!is_v4 && (firstbyte + 6924 ntohs(ip6h->ip6_plen) > 65535))) { 6925 /* It is an invalid "ping-o-death" packet */ 6926 /* Discard it */ 6927 ip_drop_packet_chain(mp, inbound, NULL, NULL, 6928 DROPPER(ipss, ipds_spd_evil_frag), 6929 &ipss->ipsec_spd_dropper); 6930 ipsec_freemsg_chain(spare_mp); 6931 return (NULL); 6932 } 6933 #ifdef FRAGCACHE_DEBUG 6934 cmn_err(CE_WARN, "Fragcache returning mp = %p, " 6935 "mp->b_next = %p", mp, mp->b_next); 6936 #endif 6937 ipsec_freemsg_chain(spare_mp); 6938 /* 6939 * For inbound case, mp has ipsec_in b_next'd chain 6940 * For outbound case, it is just data mp chain 6941 */ 6942 return (mp); 6943 } 6944 ipsec_freemsg_chain(spare_mp); 6945 6946 /* 6947 * Update new ending offset if this 6948 * fragment extends the packet 6949 */ 6950 if (offset < lastbyte) 6951 offset = lastbyte; 6952 } 6953 6954 mutex_exit(&frag->itpf_lock); 6955 6956 /* Didn't find last fragment, so return NULL */ 6957 return (NULL); 6958 } 6959 6960 static void 6961 ipsec_fragcache_clean(ipsec_fragcache_t *frag) 6962 { 6963 ipsec_fragcache_entry_t *fep; 6964 int i; 6965 ipsec_fragcache_entry_t *earlyfep = NULL; 6966 time_t itpf_time; 6967 int earlyexp; 6968 int earlyi = 0; 6969 6970 ASSERT(MUTEX_HELD(&frag->itpf_lock)); 6971 6972 itpf_time = gethrestime_sec(); 6973 earlyexp = itpf_time + 10000; 6974 6975 for (i = 0; i < IPSEC_FRAG_HASH_SLOTS; i++) { 6976 fep = (frag->itpf_ptr)[i]; 6977 while (fep) { 6978 if (fep->itpfe_exp < itpf_time) { 6979 /* found */ 6980 fep = fragcache_delentry(i, fep, frag); 6981 } else { 6982 if (fep->itpfe_exp < earlyexp) { 6983 earlyfep = fep; 6984 earlyexp = fep->itpfe_exp; 6985 earlyi = i; 6986 } 6987 fep = fep->itpfe_next; 6988 } 6989 } 6990 } 6991 6992 frag->itpf_expire_hint = earlyexp; 6993 6994 /* if (!found) */ 6995 if (frag->itpf_freelist == NULL) 6996 (void) fragcache_delentry(earlyi, earlyfep, frag); 6997 } 6998 6999 static ipsec_fragcache_entry_t * 7000 fragcache_delentry(int slot, ipsec_fragcache_entry_t *fep, 7001 ipsec_fragcache_t *frag) 7002 { 7003 ipsec_fragcache_entry_t *targp; 7004 ipsec_fragcache_entry_t *nextp = fep->itpfe_next; 7005 7006 ASSERT(MUTEX_HELD(&frag->itpf_lock)); 7007 7008 /* Free up any fragment list still in cache entry */ 7009 ipsec_freemsg_chain(fep->itpfe_fraglist); 7010 7011 targp = (frag->itpf_ptr)[slot]; 7012 ASSERT(targp != 0); 7013 7014 if (targp == fep) { 7015 /* unlink from head of hash chain */ 7016 (frag->itpf_ptr)[slot] = nextp; 7017 /* link into free list */ 7018 fep->itpfe_next = frag->itpf_freelist; 7019 frag->itpf_freelist = fep; 7020 return (nextp); 7021 } 7022 7023 /* maybe should use double linked list to make update faster */ 7024 /* must be past front of chain */ 7025 while (targp) { 7026 if (targp->itpfe_next == fep) { 7027 /* unlink from hash chain */ 7028 targp->itpfe_next = nextp; 7029 /* link into free list */ 7030 fep->itpfe_next = frag->itpf_freelist; 7031 frag->itpf_freelist = fep; 7032 return (nextp); 7033 } 7034 targp = targp->itpfe_next; 7035 ASSERT(targp != 0); 7036 } 7037 /* NOTREACHED */ 7038 return (NULL); 7039 } 7040