1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * IPsec Security Policy Database. 30 * 31 * This module maintains the SPD and provides routines used by ip and ip6 32 * to apply IPsec policy to inbound and outbound datagrams. 33 */ 34 35 #include <sys/types.h> 36 #include <sys/stream.h> 37 #include <sys/stropts.h> 38 #include <sys/sysmacros.h> 39 #include <sys/strsubr.h> 40 #include <sys/strlog.h> 41 #include <sys/cmn_err.h> 42 #include <sys/zone.h> 43 44 #include <sys/systm.h> 45 #include <sys/param.h> 46 #include <sys/kmem.h> 47 #include <sys/ddi.h> 48 49 #include <sys/crypto/api.h> 50 51 #include <inet/common.h> 52 #include <inet/mi.h> 53 54 #include <netinet/ip6.h> 55 #include <netinet/icmp6.h> 56 #include <netinet/udp.h> 57 58 #include <inet/ip.h> 59 #include <inet/ip6.h> 60 61 #include <net/pfkeyv2.h> 62 #include <net/pfpolicy.h> 63 #include <inet/ipsec_info.h> 64 #include <inet/sadb.h> 65 #include <inet/ipsec_impl.h> 66 67 #include <inet/ip_impl.h> /* For IP_MOD_ID */ 68 69 #include <inet/ipsecah.h> 70 #include <inet/ipsecesp.h> 71 #include <inet/ipdrop.h> 72 #include <inet/ipclassifier.h> 73 #include <inet/tun.h> 74 75 static void ipsec_update_present_flags(ipsec_stack_t *); 76 static ipsec_act_t *ipsec_act_wildcard_expand(ipsec_act_t *, uint_t *, 77 netstack_t *); 78 static void ipsec_out_free(void *); 79 static void ipsec_in_free(void *); 80 static mblk_t *ipsec_attach_global_policy(mblk_t **, conn_t *, 81 ipsec_selector_t *, netstack_t *); 82 static mblk_t *ipsec_apply_global_policy(mblk_t *, conn_t *, 83 ipsec_selector_t *, netstack_t *); 84 static mblk_t *ipsec_check_ipsecin_policy(mblk_t *, ipsec_policy_t *, 85 ipha_t *, ip6_t *, uint64_t, netstack_t *); 86 static void ipsec_in_release_refs(ipsec_in_t *); 87 static void ipsec_out_release_refs(ipsec_out_t *); 88 static void ipsec_action_free_table(ipsec_action_t *); 89 static void ipsec_action_reclaim(void *); 90 static void ipsec_action_reclaim_stack(netstack_t *); 91 static void ipsid_init(netstack_t *); 92 static void ipsid_fini(netstack_t *); 93 94 /* sel_flags values for ipsec_init_inbound_sel(). */ 95 #define SEL_NONE 0x0000 96 #define SEL_PORT_POLICY 0x0001 97 #define SEL_IS_ICMP 0x0002 98 #define SEL_TUNNEL_MODE 0x0004 99 100 /* Return values for ipsec_init_inbound_sel(). */ 101 typedef enum { SELRET_NOMEM, SELRET_BADPKT, SELRET_SUCCESS, SELRET_TUNFRAG} 102 selret_t; 103 104 static selret_t ipsec_init_inbound_sel(ipsec_selector_t *, mblk_t *, 105 ipha_t *, ip6_t *, uint8_t); 106 107 static boolean_t ipsec_check_ipsecin_action(struct ipsec_in_s *, mblk_t *, 108 struct ipsec_action_s *, ipha_t *ipha, ip6_t *ip6h, const char **, 109 kstat_named_t **); 110 static void ipsec_unregister_prov_update(void); 111 static void ipsec_prov_update_callback_stack(uint32_t, void *, netstack_t *); 112 static boolean_t ipsec_compare_action(ipsec_policy_t *, ipsec_policy_t *); 113 static uint32_t selector_hash(ipsec_selector_t *, ipsec_policy_root_t *); 114 static boolean_t ipsec_kstat_init(ipsec_stack_t *); 115 static void ipsec_kstat_destroy(ipsec_stack_t *); 116 static int ipsec_free_tables(ipsec_stack_t *); 117 static int tunnel_compare(const void *, const void *); 118 static void ipsec_freemsg_chain(mblk_t *); 119 static void ip_drop_packet_chain(mblk_t *, boolean_t, ill_t *, ire_t *, 120 struct kstat_named *, ipdropper_t *); 121 static boolean_t ipsec_kstat_init(ipsec_stack_t *); 122 static void ipsec_kstat_destroy(ipsec_stack_t *); 123 static int ipsec_free_tables(ipsec_stack_t *); 124 static int tunnel_compare(const void *, const void *); 125 static void ipsec_freemsg_chain(mblk_t *); 126 static void ip_drop_packet_chain(mblk_t *, boolean_t, ill_t *, ire_t *, 127 struct kstat_named *, ipdropper_t *); 128 129 /* 130 * Selector hash table is statically sized at module load time. 131 * we default to 251 buckets, which is the largest prime number under 255 132 */ 133 134 #define IPSEC_SPDHASH_DEFAULT 251 135 136 /* SPD hash-size tunable per tunnel. */ 137 #define TUN_SPDHASH_DEFAULT 5 138 139 uint32_t ipsec_spd_hashsize; 140 uint32_t tun_spd_hashsize; 141 142 #define IPSEC_SEL_NOHASH ((uint32_t)(~0)) 143 144 /* 145 * Handle global across all stack instances 146 */ 147 static crypto_notify_handle_t prov_update_handle = NULL; 148 149 static kmem_cache_t *ipsec_action_cache; 150 static kmem_cache_t *ipsec_sel_cache; 151 static kmem_cache_t *ipsec_pol_cache; 152 static kmem_cache_t *ipsec_info_cache; 153 154 /* Frag cache prototypes */ 155 static void ipsec_fragcache_clean(ipsec_fragcache_t *); 156 static ipsec_fragcache_entry_t *fragcache_delentry(int, 157 ipsec_fragcache_entry_t *, ipsec_fragcache_t *); 158 boolean_t ipsec_fragcache_init(ipsec_fragcache_t *); 159 void ipsec_fragcache_uninit(ipsec_fragcache_t *); 160 mblk_t *ipsec_fragcache_add(ipsec_fragcache_t *, mblk_t *, mblk_t *, int, 161 ipsec_stack_t *); 162 163 int ipsec_hdr_pullup_needed = 0; 164 int ipsec_weird_null_inbound_policy = 0; 165 166 #define ALGBITS_ROUND_DOWN(x, align) (((x)/(align))*(align)) 167 #define ALGBITS_ROUND_UP(x, align) ALGBITS_ROUND_DOWN((x)+(align)-1, align) 168 169 /* 170 * Inbound traffic should have matching identities for both SA's. 171 */ 172 173 #define SA_IDS_MATCH(sa1, sa2) \ 174 (((sa1) == NULL) || ((sa2) == NULL) || \ 175 (((sa1)->ipsa_src_cid == (sa2)->ipsa_src_cid) && \ 176 (((sa1)->ipsa_dst_cid == (sa2)->ipsa_dst_cid)))) 177 178 /* 179 * IPv4 Fragments 180 */ 181 #define IS_V4_FRAGMENT(ipha_fragment_offset_and_flags) \ 182 (((ntohs(ipha_fragment_offset_and_flags) & IPH_OFFSET) != 0) || \ 183 ((ntohs(ipha_fragment_offset_and_flags) & IPH_MF) != 0)) 184 185 /* 186 * IPv6 Fragments 187 */ 188 #define IS_V6_FRAGMENT(ipp) (ipp.ipp_fields & IPPF_FRAGHDR) 189 190 /* 191 * Policy failure messages. 192 */ 193 static char *ipsec_policy_failure_msgs[] = { 194 195 /* IPSEC_POLICY_NOT_NEEDED */ 196 "%s: Dropping the datagram because the incoming packet " 197 "is %s, but the recipient expects clear; Source %s, " 198 "Destination %s.\n", 199 200 /* IPSEC_POLICY_MISMATCH */ 201 "%s: Policy Failure for the incoming packet (%s); Source %s, " 202 "Destination %s.\n", 203 204 /* IPSEC_POLICY_AUTH_NOT_NEEDED */ 205 "%s: Authentication present while not expected in the " 206 "incoming %s packet; Source %s, Destination %s.\n", 207 208 /* IPSEC_POLICY_ENCR_NOT_NEEDED */ 209 "%s: Encryption present while not expected in the " 210 "incoming %s packet; Source %s, Destination %s.\n", 211 212 /* IPSEC_POLICY_SE_NOT_NEEDED */ 213 "%s: Self-Encapsulation present while not expected in the " 214 "incoming %s packet; Source %s, Destination %s.\n", 215 }; 216 217 /* 218 * General overviews: 219 * 220 * Locking: 221 * 222 * All of the system policy structures are protected by a single 223 * rwlock. These structures are threaded in a 224 * fairly complex fashion and are not expected to change on a 225 * regular basis, so this should not cause scaling/contention 226 * problems. As a result, policy checks should (hopefully) be MT-hot. 227 * 228 * Allocation policy: 229 * 230 * We use custom kmem cache types for the various 231 * bits & pieces of the policy data structures. All allocations 232 * use KM_NOSLEEP instead of KM_SLEEP for policy allocation. The 233 * policy table is of potentially unbounded size, so we don't 234 * want to provide a way to hog all system memory with policy 235 * entries.. 236 */ 237 238 /* Convenient functions for freeing or dropping a b_next linked mblk chain */ 239 240 /* Free all messages in an mblk chain */ 241 static void 242 ipsec_freemsg_chain(mblk_t *mp) 243 { 244 mblk_t *mpnext; 245 while (mp != NULL) { 246 ASSERT(mp->b_prev == NULL); 247 mpnext = mp->b_next; 248 mp->b_next = NULL; 249 freemsg(mp); /* Always works, even if NULL */ 250 mp = mpnext; 251 } 252 } 253 254 /* ip_drop all messages in an mblk chain */ 255 static void 256 ip_drop_packet_chain(mblk_t *mp, boolean_t inbound, ill_t *arriving, 257 ire_t *outbound_ire, struct kstat_named *counter, ipdropper_t *who_called) 258 { 259 mblk_t *mpnext; 260 while (mp != NULL) { 261 ASSERT(mp->b_prev == NULL); 262 mpnext = mp->b_next; 263 mp->b_next = NULL; 264 ip_drop_packet(mp, inbound, arriving, outbound_ire, counter, 265 who_called); 266 mp = mpnext; 267 } 268 } 269 270 /* 271 * AVL tree comparison function. 272 * the in-kernel avl assumes unique keys for all objects. 273 * Since sometimes policy will duplicate rules, we may insert 274 * multiple rules with the same rule id, so we need a tie-breaker. 275 */ 276 static int 277 ipsec_policy_cmpbyid(const void *a, const void *b) 278 { 279 const ipsec_policy_t *ipa, *ipb; 280 uint64_t idxa, idxb; 281 282 ipa = (const ipsec_policy_t *)a; 283 ipb = (const ipsec_policy_t *)b; 284 idxa = ipa->ipsp_index; 285 idxb = ipb->ipsp_index; 286 287 if (idxa < idxb) 288 return (-1); 289 if (idxa > idxb) 290 return (1); 291 /* 292 * Tie-breaker #1: All installed policy rules have a non-NULL 293 * ipsl_sel (selector set), so an entry with a NULL ipsp_sel is not 294 * actually in-tree but rather a template node being used in 295 * an avl_find query; see ipsec_policy_delete(). This gives us 296 * a placeholder in the ordering just before the the first entry with 297 * a key >= the one we're looking for, so we can walk forward from 298 * that point to get the remaining entries with the same id. 299 */ 300 if ((ipa->ipsp_sel == NULL) && (ipb->ipsp_sel != NULL)) 301 return (-1); 302 if ((ipb->ipsp_sel == NULL) && (ipa->ipsp_sel != NULL)) 303 return (1); 304 /* 305 * At most one of the arguments to the comparison should have a 306 * NULL selector pointer; if not, the tree is broken. 307 */ 308 ASSERT(ipa->ipsp_sel != NULL); 309 ASSERT(ipb->ipsp_sel != NULL); 310 /* 311 * Tie-breaker #2: use the virtual address of the policy node 312 * to arbitrarily break ties. Since we use the new tree node in 313 * the avl_find() in ipsec_insert_always, the new node will be 314 * inserted into the tree in the right place in the sequence. 315 */ 316 if (ipa < ipb) 317 return (-1); 318 if (ipa > ipb) 319 return (1); 320 return (0); 321 } 322 323 /* 324 * Free what ipsec_alloc_table allocated. 325 */ 326 void 327 ipsec_polhead_free_table(ipsec_policy_head_t *iph) 328 { 329 int dir; 330 int i; 331 332 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 333 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 334 335 if (ipr->ipr_hash == NULL) 336 continue; 337 338 for (i = 0; i < ipr->ipr_nchains; i++) { 339 ASSERT(ipr->ipr_hash[i].hash_head == NULL); 340 } 341 kmem_free(ipr->ipr_hash, ipr->ipr_nchains * 342 sizeof (ipsec_policy_hash_t)); 343 ipr->ipr_hash = NULL; 344 } 345 } 346 347 void 348 ipsec_polhead_destroy(ipsec_policy_head_t *iph) 349 { 350 int dir; 351 352 avl_destroy(&iph->iph_rulebyid); 353 rw_destroy(&iph->iph_lock); 354 355 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 356 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 357 int chain; 358 359 for (chain = 0; chain < ipr->ipr_nchains; chain++) 360 mutex_destroy(&(ipr->ipr_hash[chain].hash_lock)); 361 362 } 363 ipsec_polhead_free_table(iph); 364 } 365 366 /* 367 * Free the IPsec stack instance. 368 */ 369 /* ARGSUSED */ 370 static void 371 ipsec_stack_fini(netstackid_t stackid, void *arg) 372 { 373 ipsec_stack_t *ipss = (ipsec_stack_t *)arg; 374 void *cookie; 375 ipsec_tun_pol_t *node; 376 netstack_t *ns = ipss->ipsec_netstack; 377 int i; 378 ipsec_algtype_t algtype; 379 380 ipsec_loader_destroy(ipss); 381 382 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_WRITER); 383 /* 384 * It's possible we can just ASSERT() the tree is empty. After all, 385 * we aren't called until IP is ready to unload (and presumably all 386 * tunnels have been unplumbed). But we'll play it safe for now, the 387 * loop will just exit immediately if it's empty. 388 */ 389 cookie = NULL; 390 while ((node = (ipsec_tun_pol_t *) 391 avl_destroy_nodes(&ipss->ipsec_tunnel_policies, 392 &cookie)) != NULL) { 393 ITP_REFRELE(node, ns); 394 } 395 avl_destroy(&ipss->ipsec_tunnel_policies); 396 rw_exit(&ipss->ipsec_tunnel_policy_lock); 397 rw_destroy(&ipss->ipsec_tunnel_policy_lock); 398 399 ipsec_config_flush(ns); 400 401 ipsec_kstat_destroy(ipss); 402 403 ip_drop_unregister(&ipss->ipsec_dropper); 404 405 ip_drop_unregister(&ipss->ipsec_spd_dropper); 406 ip_drop_destroy(ipss); 407 /* 408 * Globals start with ref == 1 to prevent IPPH_REFRELE() from 409 * attempting to free them, hence they should have 1 now. 410 */ 411 ipsec_polhead_destroy(&ipss->ipsec_system_policy); 412 ASSERT(ipss->ipsec_system_policy.iph_refs == 1); 413 ipsec_polhead_destroy(&ipss->ipsec_inactive_policy); 414 ASSERT(ipss->ipsec_inactive_policy.iph_refs == 1); 415 416 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) { 417 ipsec_action_free_table(ipss->ipsec_action_hash[i].hash_head); 418 ipss->ipsec_action_hash[i].hash_head = NULL; 419 mutex_destroy(&(ipss->ipsec_action_hash[i].hash_lock)); 420 } 421 422 for (i = 0; i < ipss->ipsec_spd_hashsize; i++) { 423 ASSERT(ipss->ipsec_sel_hash[i].hash_head == NULL); 424 mutex_destroy(&(ipss->ipsec_sel_hash[i].hash_lock)); 425 } 426 427 mutex_enter(&ipss->ipsec_alg_lock); 428 for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype ++) { 429 int nalgs = ipss->ipsec_nalgs[algtype]; 430 431 for (i = 0; i < nalgs; i++) { 432 if (ipss->ipsec_alglists[algtype][i] != NULL) 433 ipsec_alg_unreg(algtype, i, ns); 434 } 435 } 436 mutex_exit(&ipss->ipsec_alg_lock); 437 mutex_destroy(&ipss->ipsec_alg_lock); 438 439 ipsid_gc(ns); 440 ipsid_fini(ns); 441 442 (void) ipsec_free_tables(ipss); 443 kmem_free(ipss, sizeof (*ipss)); 444 } 445 446 void 447 ipsec_policy_g_destroy(void) 448 { 449 kmem_cache_destroy(ipsec_action_cache); 450 kmem_cache_destroy(ipsec_sel_cache); 451 kmem_cache_destroy(ipsec_pol_cache); 452 kmem_cache_destroy(ipsec_info_cache); 453 454 ipsec_unregister_prov_update(); 455 456 netstack_unregister(NS_IPSEC); 457 } 458 459 460 /* 461 * Free what ipsec_alloc_tables allocated. 462 * Called when table allocation fails to free the table. 463 */ 464 static int 465 ipsec_free_tables(ipsec_stack_t *ipss) 466 { 467 int i; 468 469 if (ipss->ipsec_sel_hash != NULL) { 470 for (i = 0; i < ipss->ipsec_spd_hashsize; i++) { 471 ASSERT(ipss->ipsec_sel_hash[i].hash_head == NULL); 472 } 473 kmem_free(ipss->ipsec_sel_hash, ipss->ipsec_spd_hashsize * 474 sizeof (*ipss->ipsec_sel_hash)); 475 ipss->ipsec_sel_hash = NULL; 476 ipss->ipsec_spd_hashsize = 0; 477 } 478 ipsec_polhead_free_table(&ipss->ipsec_system_policy); 479 ipsec_polhead_free_table(&ipss->ipsec_inactive_policy); 480 481 return (ENOMEM); 482 } 483 484 /* 485 * Attempt to allocate the tables in a single policy head. 486 * Return nonzero on failure after cleaning up any work in progress. 487 */ 488 int 489 ipsec_alloc_table(ipsec_policy_head_t *iph, int nchains, int kmflag, 490 boolean_t global_cleanup, netstack_t *ns) 491 { 492 int dir; 493 494 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 495 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 496 497 ipr->ipr_nchains = nchains; 498 ipr->ipr_hash = kmem_zalloc(nchains * 499 sizeof (ipsec_policy_hash_t), kmflag); 500 if (ipr->ipr_hash == NULL) 501 return (global_cleanup ? 502 ipsec_free_tables(ns->netstack_ipsec) : 503 ENOMEM); 504 } 505 return (0); 506 } 507 508 /* 509 * Attempt to allocate the various tables. Return nonzero on failure 510 * after cleaning up any work in progress. 511 */ 512 static int 513 ipsec_alloc_tables(int kmflag, netstack_t *ns) 514 { 515 int error; 516 ipsec_stack_t *ipss = ns->netstack_ipsec; 517 518 error = ipsec_alloc_table(&ipss->ipsec_system_policy, 519 ipss->ipsec_spd_hashsize, kmflag, B_TRUE, ns); 520 if (error != 0) 521 return (error); 522 523 error = ipsec_alloc_table(&ipss->ipsec_inactive_policy, 524 ipss->ipsec_spd_hashsize, kmflag, B_TRUE, ns); 525 if (error != 0) 526 return (error); 527 528 ipss->ipsec_sel_hash = kmem_zalloc(ipss->ipsec_spd_hashsize * 529 sizeof (*ipss->ipsec_sel_hash), kmflag); 530 531 if (ipss->ipsec_sel_hash == NULL) 532 return (ipsec_free_tables(ipss)); 533 534 return (0); 535 } 536 537 /* 538 * After table allocation, initialize a policy head. 539 */ 540 void 541 ipsec_polhead_init(ipsec_policy_head_t *iph, int nchains) 542 { 543 int dir, chain; 544 545 rw_init(&iph->iph_lock, NULL, RW_DEFAULT, NULL); 546 avl_create(&iph->iph_rulebyid, ipsec_policy_cmpbyid, 547 sizeof (ipsec_policy_t), offsetof(ipsec_policy_t, ipsp_byid)); 548 549 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 550 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 551 ipr->ipr_nchains = nchains; 552 553 for (chain = 0; chain < nchains; chain++) { 554 mutex_init(&(ipr->ipr_hash[chain].hash_lock), 555 NULL, MUTEX_DEFAULT, NULL); 556 } 557 } 558 } 559 560 static boolean_t 561 ipsec_kstat_init(ipsec_stack_t *ipss) 562 { 563 ipss->ipsec_ksp = kstat_create_netstack("ip", 0, "ipsec_stat", "net", 564 KSTAT_TYPE_NAMED, sizeof (ipsec_kstats_t) / sizeof (kstat_named_t), 565 KSTAT_FLAG_PERSISTENT, ipss->ipsec_netstack->netstack_stackid); 566 567 if (ipss->ipsec_ksp == NULL || ipss->ipsec_ksp->ks_data == NULL) 568 return (B_FALSE); 569 570 ipss->ipsec_kstats = ipss->ipsec_ksp->ks_data; 571 572 #define KI(x) kstat_named_init(&ipss->ipsec_kstats->x, #x, KSTAT_DATA_UINT64) 573 KI(esp_stat_in_requests); 574 KI(esp_stat_in_discards); 575 KI(esp_stat_lookup_failure); 576 KI(ah_stat_in_requests); 577 KI(ah_stat_in_discards); 578 KI(ah_stat_lookup_failure); 579 KI(sadb_acquire_maxpackets); 580 KI(sadb_acquire_qhiwater); 581 #undef KI 582 583 kstat_install(ipss->ipsec_ksp); 584 return (B_TRUE); 585 } 586 587 static void 588 ipsec_kstat_destroy(ipsec_stack_t *ipss) 589 { 590 kstat_delete_netstack(ipss->ipsec_ksp, 591 ipss->ipsec_netstack->netstack_stackid); 592 ipss->ipsec_kstats = NULL; 593 594 } 595 596 /* 597 * Initialize the IPsec stack instance. 598 */ 599 /* ARGSUSED */ 600 static void * 601 ipsec_stack_init(netstackid_t stackid, netstack_t *ns) 602 { 603 ipsec_stack_t *ipss; 604 int i; 605 606 ipss = (ipsec_stack_t *)kmem_zalloc(sizeof (*ipss), KM_SLEEP); 607 ipss->ipsec_netstack = ns; 608 609 /* 610 * FIXME: netstack_ipsec is used by some of the routines we call 611 * below, but it isn't set until this routine returns. 612 * Either we introduce optional xxx_stack_alloc() functions 613 * that will be called by the netstack framework before xxx_stack_init, 614 * or we switch spd.c and sadb.c to operate on ipsec_stack_t 615 * (latter has some include file order issues for sadb.h, but makes 616 * sense if we merge some of the ipsec related stack_t's together. 617 */ 618 ns->netstack_ipsec = ipss; 619 620 /* 621 * Make two attempts to allocate policy hash tables; try it at 622 * the "preferred" size (may be set in /etc/system) first, 623 * then fall back to the default size. 624 */ 625 ipss->ipsec_spd_hashsize = (ipsec_spd_hashsize == 0) ? 626 IPSEC_SPDHASH_DEFAULT : ipsec_spd_hashsize; 627 628 if (ipsec_alloc_tables(KM_NOSLEEP, ns) != 0) { 629 cmn_err(CE_WARN, 630 "Unable to allocate %d entry IPsec policy hash table", 631 ipss->ipsec_spd_hashsize); 632 ipss->ipsec_spd_hashsize = IPSEC_SPDHASH_DEFAULT; 633 cmn_err(CE_WARN, "Falling back to %d entries", 634 ipss->ipsec_spd_hashsize); 635 (void) ipsec_alloc_tables(KM_SLEEP, ns); 636 } 637 638 /* Just set a default for tunnels. */ 639 ipss->ipsec_tun_spd_hashsize = (tun_spd_hashsize == 0) ? 640 TUN_SPDHASH_DEFAULT : tun_spd_hashsize; 641 642 ipsid_init(ns); 643 /* 644 * Globals need ref == 1 to prevent IPPH_REFRELE() from attempting 645 * to free them. 646 */ 647 ipss->ipsec_system_policy.iph_refs = 1; 648 ipss->ipsec_inactive_policy.iph_refs = 1; 649 ipsec_polhead_init(&ipss->ipsec_system_policy, 650 ipss->ipsec_spd_hashsize); 651 ipsec_polhead_init(&ipss->ipsec_inactive_policy, 652 ipss->ipsec_spd_hashsize); 653 rw_init(&ipss->ipsec_tunnel_policy_lock, NULL, RW_DEFAULT, NULL); 654 avl_create(&ipss->ipsec_tunnel_policies, tunnel_compare, 655 sizeof (ipsec_tun_pol_t), 0); 656 657 ipss->ipsec_next_policy_index = 1; 658 659 rw_init(&ipss->ipsec_system_policy.iph_lock, NULL, RW_DEFAULT, NULL); 660 rw_init(&ipss->ipsec_inactive_policy.iph_lock, NULL, RW_DEFAULT, NULL); 661 662 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) 663 mutex_init(&(ipss->ipsec_action_hash[i].hash_lock), 664 NULL, MUTEX_DEFAULT, NULL); 665 666 for (i = 0; i < ipss->ipsec_spd_hashsize; i++) 667 mutex_init(&(ipss->ipsec_sel_hash[i].hash_lock), 668 NULL, MUTEX_DEFAULT, NULL); 669 670 mutex_init(&ipss->ipsec_alg_lock, NULL, MUTEX_DEFAULT, NULL); 671 for (i = 0; i < IPSEC_NALGTYPES; i++) { 672 ipss->ipsec_nalgs[i] = 0; 673 } 674 675 ip_drop_init(ipss); 676 ip_drop_register(&ipss->ipsec_spd_dropper, "IPsec SPD"); 677 678 /* Set function to dummy until tun is loaded */ 679 rw_init(&ipss->ipsec_itp_get_byaddr_rw_lock, NULL, RW_DEFAULT, NULL); 680 rw_enter(&ipss->ipsec_itp_get_byaddr_rw_lock, RW_WRITER); 681 ipss->ipsec_itp_get_byaddr = itp_get_byaddr_dummy; 682 rw_exit(&ipss->ipsec_itp_get_byaddr_rw_lock); 683 684 /* IP's IPsec code calls the packet dropper */ 685 ip_drop_register(&ipss->ipsec_dropper, "IP IPsec processing"); 686 687 (void) ipsec_kstat_init(ipss); 688 689 ipsec_loader_init(ipss); 690 ipsec_loader_start(ipss); 691 692 return (ipss); 693 } 694 695 /* Global across all stack instances */ 696 void 697 ipsec_policy_g_init(void) 698 { 699 ipsec_action_cache = kmem_cache_create("ipsec_actions", 700 sizeof (ipsec_action_t), _POINTER_ALIGNMENT, NULL, NULL, 701 ipsec_action_reclaim, NULL, NULL, 0); 702 ipsec_sel_cache = kmem_cache_create("ipsec_selectors", 703 sizeof (ipsec_sel_t), _POINTER_ALIGNMENT, NULL, NULL, 704 NULL, NULL, NULL, 0); 705 ipsec_pol_cache = kmem_cache_create("ipsec_policy", 706 sizeof (ipsec_policy_t), _POINTER_ALIGNMENT, NULL, NULL, 707 NULL, NULL, NULL, 0); 708 ipsec_info_cache = kmem_cache_create("ipsec_info", 709 sizeof (ipsec_info_t), _POINTER_ALIGNMENT, NULL, NULL, 710 NULL, NULL, NULL, 0); 711 712 /* 713 * We want to be informed each time a stack is created or 714 * destroyed in the kernel, so we can maintain the 715 * set of ipsec_stack_t's. 716 */ 717 netstack_register(NS_IPSEC, ipsec_stack_init, NULL, ipsec_stack_fini); 718 } 719 720 /* 721 * Sort algorithm lists. 722 * 723 * I may need to split this based on 724 * authentication/encryption, and I may wish to have an administrator 725 * configure this list. Hold on to some NDD variables... 726 * 727 * XXX For now, sort on minimum key size (GAG!). While minimum key size is 728 * not the ideal metric, it's the only quantifiable measure available. 729 * We need a better metric for sorting algorithms by preference. 730 */ 731 static void 732 alg_insert_sortlist(enum ipsec_algtype at, uint8_t algid, netstack_t *ns) 733 { 734 ipsec_stack_t *ipss = ns->netstack_ipsec; 735 ipsec_alginfo_t *ai = ipss->ipsec_alglists[at][algid]; 736 uint8_t holder, swap; 737 uint_t i; 738 uint_t count = ipss->ipsec_nalgs[at]; 739 ASSERT(ai != NULL); 740 ASSERT(algid == ai->alg_id); 741 742 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 743 744 holder = algid; 745 746 for (i = 0; i < count - 1; i++) { 747 ipsec_alginfo_t *alt; 748 749 alt = ipss->ipsec_alglists[at][ipss->ipsec_sortlist[at][i]]; 750 /* 751 * If you want to give precedence to newly added algs, 752 * add the = in the > comparison. 753 */ 754 if ((holder != algid) || (ai->alg_minbits > alt->alg_minbits)) { 755 /* Swap sortlist[i] and holder. */ 756 swap = ipss->ipsec_sortlist[at][i]; 757 ipss->ipsec_sortlist[at][i] = holder; 758 holder = swap; 759 ai = alt; 760 } /* Else just continue. */ 761 } 762 763 /* Store holder in last slot. */ 764 ipss->ipsec_sortlist[at][i] = holder; 765 } 766 767 /* 768 * Remove an algorithm from a sorted algorithm list. 769 * This should be considerably easier, even with complex sorting. 770 */ 771 static void 772 alg_remove_sortlist(enum ipsec_algtype at, uint8_t algid, netstack_t *ns) 773 { 774 boolean_t copyback = B_FALSE; 775 int i; 776 ipsec_stack_t *ipss = ns->netstack_ipsec; 777 int newcount = ipss->ipsec_nalgs[at]; 778 779 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 780 781 for (i = 0; i <= newcount; i++) { 782 if (copyback) { 783 ipss->ipsec_sortlist[at][i-1] = 784 ipss->ipsec_sortlist[at][i]; 785 } else if (ipss->ipsec_sortlist[at][i] == algid) { 786 copyback = B_TRUE; 787 } 788 } 789 } 790 791 /* 792 * Add the specified algorithm to the algorithm tables. 793 * Must be called while holding the algorithm table writer lock. 794 */ 795 void 796 ipsec_alg_reg(ipsec_algtype_t algtype, ipsec_alginfo_t *alg, netstack_t *ns) 797 { 798 ipsec_stack_t *ipss = ns->netstack_ipsec; 799 800 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 801 802 ASSERT(ipss->ipsec_alglists[algtype][alg->alg_id] == NULL); 803 ipsec_alg_fix_min_max(alg, algtype, ns); 804 ipss->ipsec_alglists[algtype][alg->alg_id] = alg; 805 806 ipss->ipsec_nalgs[algtype]++; 807 alg_insert_sortlist(algtype, alg->alg_id, ns); 808 } 809 810 /* 811 * Remove the specified algorithm from the algorithm tables. 812 * Must be called while holding the algorithm table writer lock. 813 */ 814 void 815 ipsec_alg_unreg(ipsec_algtype_t algtype, uint8_t algid, netstack_t *ns) 816 { 817 ipsec_stack_t *ipss = ns->netstack_ipsec; 818 819 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 820 821 ASSERT(ipss->ipsec_alglists[algtype][algid] != NULL); 822 ipsec_alg_free(ipss->ipsec_alglists[algtype][algid]); 823 ipss->ipsec_alglists[algtype][algid] = NULL; 824 825 ipss->ipsec_nalgs[algtype]--; 826 alg_remove_sortlist(algtype, algid, ns); 827 } 828 829 /* 830 * Hooks for spdsock to get a grip on system policy. 831 */ 832 833 ipsec_policy_head_t * 834 ipsec_system_policy(netstack_t *ns) 835 { 836 ipsec_stack_t *ipss = ns->netstack_ipsec; 837 ipsec_policy_head_t *h = &ipss->ipsec_system_policy; 838 839 IPPH_REFHOLD(h); 840 return (h); 841 } 842 843 ipsec_policy_head_t * 844 ipsec_inactive_policy(netstack_t *ns) 845 { 846 ipsec_stack_t *ipss = ns->netstack_ipsec; 847 ipsec_policy_head_t *h = &ipss->ipsec_inactive_policy; 848 849 IPPH_REFHOLD(h); 850 return (h); 851 } 852 853 /* 854 * Lock inactive policy, then active policy, then exchange policy root 855 * pointers. 856 */ 857 void 858 ipsec_swap_policy(ipsec_policy_head_t *active, ipsec_policy_head_t *inactive, 859 netstack_t *ns) 860 { 861 int af, dir; 862 avl_tree_t r1, r2; 863 864 rw_enter(&inactive->iph_lock, RW_WRITER); 865 rw_enter(&active->iph_lock, RW_WRITER); 866 867 r1 = active->iph_rulebyid; 868 r2 = inactive->iph_rulebyid; 869 active->iph_rulebyid = r2; 870 inactive->iph_rulebyid = r1; 871 872 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 873 ipsec_policy_hash_t *h1, *h2; 874 875 h1 = active->iph_root[dir].ipr_hash; 876 h2 = inactive->iph_root[dir].ipr_hash; 877 active->iph_root[dir].ipr_hash = h2; 878 inactive->iph_root[dir].ipr_hash = h1; 879 880 for (af = 0; af < IPSEC_NAF; af++) { 881 ipsec_policy_t *t1, *t2; 882 883 t1 = active->iph_root[dir].ipr_nonhash[af]; 884 t2 = inactive->iph_root[dir].ipr_nonhash[af]; 885 active->iph_root[dir].ipr_nonhash[af] = t2; 886 inactive->iph_root[dir].ipr_nonhash[af] = t1; 887 if (t1 != NULL) { 888 t1->ipsp_hash.hash_pp = 889 &(inactive->iph_root[dir].ipr_nonhash[af]); 890 } 891 if (t2 != NULL) { 892 t2->ipsp_hash.hash_pp = 893 &(active->iph_root[dir].ipr_nonhash[af]); 894 } 895 896 } 897 } 898 active->iph_gen++; 899 inactive->iph_gen++; 900 ipsec_update_present_flags(ns->netstack_ipsec); 901 rw_exit(&active->iph_lock); 902 rw_exit(&inactive->iph_lock); 903 } 904 905 /* 906 * Swap global policy primary/secondary. 907 */ 908 void 909 ipsec_swap_global_policy(netstack_t *ns) 910 { 911 ipsec_stack_t *ipss = ns->netstack_ipsec; 912 913 ipsec_swap_policy(&ipss->ipsec_system_policy, 914 &ipss->ipsec_inactive_policy, ns); 915 } 916 917 /* 918 * Clone one policy rule.. 919 */ 920 static ipsec_policy_t * 921 ipsec_copy_policy(const ipsec_policy_t *src) 922 { 923 ipsec_policy_t *dst = kmem_cache_alloc(ipsec_pol_cache, KM_NOSLEEP); 924 925 if (dst == NULL) 926 return (NULL); 927 928 /* 929 * Adjust refcounts of cloned state. 930 */ 931 IPACT_REFHOLD(src->ipsp_act); 932 src->ipsp_sel->ipsl_refs++; 933 934 HASH_NULL(dst, ipsp_hash); 935 dst->ipsp_refs = 1; 936 dst->ipsp_sel = src->ipsp_sel; 937 dst->ipsp_act = src->ipsp_act; 938 dst->ipsp_prio = src->ipsp_prio; 939 dst->ipsp_index = src->ipsp_index; 940 941 return (dst); 942 } 943 944 void 945 ipsec_insert_always(avl_tree_t *tree, void *new_node) 946 { 947 void *node; 948 avl_index_t where; 949 950 node = avl_find(tree, new_node, &where); 951 ASSERT(node == NULL); 952 avl_insert(tree, new_node, where); 953 } 954 955 956 static int 957 ipsec_copy_chain(ipsec_policy_head_t *dph, ipsec_policy_t *src, 958 ipsec_policy_t **dstp) 959 { 960 for (; src != NULL; src = src->ipsp_hash.hash_next) { 961 ipsec_policy_t *dst = ipsec_copy_policy(src); 962 if (dst == NULL) 963 return (ENOMEM); 964 965 HASHLIST_INSERT(dst, ipsp_hash, *dstp); 966 ipsec_insert_always(&dph->iph_rulebyid, dst); 967 } 968 return (0); 969 } 970 971 972 973 /* 974 * Make one policy head look exactly like another. 975 * 976 * As with ipsec_swap_policy, we lock the destination policy head first, then 977 * the source policy head. Note that we only need to read-lock the source 978 * policy head as we are not changing it. 979 */ 980 int 981 ipsec_copy_polhead(ipsec_policy_head_t *sph, ipsec_policy_head_t *dph, 982 netstack_t *ns) 983 { 984 int af, dir, chain, nchains; 985 986 rw_enter(&dph->iph_lock, RW_WRITER); 987 988 ipsec_polhead_flush(dph, ns); 989 990 rw_enter(&sph->iph_lock, RW_READER); 991 992 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 993 ipsec_policy_root_t *dpr = &dph->iph_root[dir]; 994 ipsec_policy_root_t *spr = &sph->iph_root[dir]; 995 nchains = dpr->ipr_nchains; 996 997 ASSERT(dpr->ipr_nchains == spr->ipr_nchains); 998 999 for (af = 0; af < IPSEC_NAF; af++) { 1000 if (ipsec_copy_chain(dph, spr->ipr_nonhash[af], 1001 &dpr->ipr_nonhash[af])) 1002 goto abort_copy; 1003 } 1004 1005 for (chain = 0; chain < nchains; chain++) { 1006 if (ipsec_copy_chain(dph, 1007 spr->ipr_hash[chain].hash_head, 1008 &dpr->ipr_hash[chain].hash_head)) 1009 goto abort_copy; 1010 } 1011 } 1012 1013 dph->iph_gen++; 1014 1015 rw_exit(&sph->iph_lock); 1016 rw_exit(&dph->iph_lock); 1017 return (0); 1018 1019 abort_copy: 1020 ipsec_polhead_flush(dph, ns); 1021 rw_exit(&sph->iph_lock); 1022 rw_exit(&dph->iph_lock); 1023 return (ENOMEM); 1024 } 1025 1026 /* 1027 * Clone currently active policy to the inactive policy list. 1028 */ 1029 int 1030 ipsec_clone_system_policy(netstack_t *ns) 1031 { 1032 ipsec_stack_t *ipss = ns->netstack_ipsec; 1033 1034 return (ipsec_copy_polhead(&ipss->ipsec_system_policy, 1035 &ipss->ipsec_inactive_policy, ns)); 1036 } 1037 1038 /* 1039 * Generic "do we have IPvN policy" answer. 1040 */ 1041 boolean_t 1042 iph_ipvN(ipsec_policy_head_t *iph, boolean_t v6) 1043 { 1044 int i, hval; 1045 uint32_t valbit; 1046 ipsec_policy_root_t *ipr; 1047 ipsec_policy_t *ipp; 1048 1049 if (v6) { 1050 valbit = IPSL_IPV6; 1051 hval = IPSEC_AF_V6; 1052 } else { 1053 valbit = IPSL_IPV4; 1054 hval = IPSEC_AF_V4; 1055 } 1056 1057 ASSERT(RW_LOCK_HELD(&iph->iph_lock)); 1058 for (ipr = iph->iph_root; ipr < &(iph->iph_root[IPSEC_NTYPES]); ipr++) { 1059 if (ipr->ipr_nonhash[hval] != NULL) 1060 return (B_TRUE); 1061 for (i = 0; i < ipr->ipr_nchains; i++) { 1062 for (ipp = ipr->ipr_hash[i].hash_head; ipp != NULL; 1063 ipp = ipp->ipsp_hash.hash_next) { 1064 if (ipp->ipsp_sel->ipsl_key.ipsl_valid & valbit) 1065 return (B_TRUE); 1066 } 1067 } 1068 } 1069 1070 return (B_FALSE); 1071 } 1072 1073 /* 1074 * Extract the string from ipsec_policy_failure_msgs[type] and 1075 * log it. 1076 * 1077 */ 1078 void 1079 ipsec_log_policy_failure(int type, char *func_name, ipha_t *ipha, ip6_t *ip6h, 1080 boolean_t secure, netstack_t *ns) 1081 { 1082 char sbuf[INET6_ADDRSTRLEN]; 1083 char dbuf[INET6_ADDRSTRLEN]; 1084 char *s; 1085 char *d; 1086 ipsec_stack_t *ipss = ns->netstack_ipsec; 1087 1088 ASSERT((ipha == NULL && ip6h != NULL) || 1089 (ip6h == NULL && ipha != NULL)); 1090 1091 if (ipha != NULL) { 1092 s = inet_ntop(AF_INET, &ipha->ipha_src, sbuf, sizeof (sbuf)); 1093 d = inet_ntop(AF_INET, &ipha->ipha_dst, dbuf, sizeof (dbuf)); 1094 } else { 1095 s = inet_ntop(AF_INET6, &ip6h->ip6_src, sbuf, sizeof (sbuf)); 1096 d = inet_ntop(AF_INET6, &ip6h->ip6_dst, dbuf, sizeof (dbuf)); 1097 1098 } 1099 1100 /* Always bump the policy failure counter. */ 1101 ipss->ipsec_policy_failure_count[type]++; 1102 1103 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, SL_ERROR|SL_WARN|SL_CONSOLE, 1104 ipsec_policy_failure_msgs[type], func_name, 1105 (secure ? "secure" : "not secure"), s, d); 1106 } 1107 1108 /* 1109 * Rate-limiting front-end to strlog() for AH and ESP. Uses the ndd variables 1110 * in /dev/ip and the same rate-limiting clock so that there's a single 1111 * knob to turn to throttle the rate of messages. 1112 */ 1113 void 1114 ipsec_rl_strlog(netstack_t *ns, short mid, short sid, char level, ushort_t sl, 1115 char *fmt, ...) 1116 { 1117 va_list adx; 1118 hrtime_t current = gethrtime(); 1119 ip_stack_t *ipst = ns->netstack_ip; 1120 ipsec_stack_t *ipss = ns->netstack_ipsec; 1121 1122 sl |= SL_CONSOLE; 1123 /* 1124 * Throttle logging to stop syslog from being swamped. If variable 1125 * 'ipsec_policy_log_interval' is zero, don't log any messages at 1126 * all, otherwise log only one message every 'ipsec_policy_log_interval' 1127 * msec. Convert interval (in msec) to hrtime (in nsec). 1128 */ 1129 1130 if (ipst->ips_ipsec_policy_log_interval) { 1131 if (ipss->ipsec_policy_failure_last + 1132 ((hrtime_t)ipst->ips_ipsec_policy_log_interval * 1133 (hrtime_t)1000000) <= current) { 1134 va_start(adx, fmt); 1135 (void) vstrlog(mid, sid, level, sl, fmt, adx); 1136 va_end(adx); 1137 ipss->ipsec_policy_failure_last = current; 1138 } 1139 } 1140 } 1141 1142 void 1143 ipsec_config_flush(netstack_t *ns) 1144 { 1145 ipsec_stack_t *ipss = ns->netstack_ipsec; 1146 1147 rw_enter(&ipss->ipsec_system_policy.iph_lock, RW_WRITER); 1148 ipsec_polhead_flush(&ipss->ipsec_system_policy, ns); 1149 ipss->ipsec_next_policy_index = 1; 1150 rw_exit(&ipss->ipsec_system_policy.iph_lock); 1151 ipsec_action_reclaim_stack(ns); 1152 } 1153 1154 /* 1155 * Clip a policy's min/max keybits vs. the capabilities of the 1156 * algorithm. 1157 */ 1158 static void 1159 act_alg_adjust(uint_t algtype, uint_t algid, 1160 uint16_t *minbits, uint16_t *maxbits, netstack_t *ns) 1161 { 1162 ipsec_stack_t *ipss = ns->netstack_ipsec; 1163 ipsec_alginfo_t *algp = ipss->ipsec_alglists[algtype][algid]; 1164 1165 if (algp != NULL) { 1166 /* 1167 * If passed-in minbits is zero, we assume the caller trusts 1168 * us with setting the minimum key size. We pick the 1169 * algorithms DEFAULT key size for the minimum in this case. 1170 */ 1171 if (*minbits == 0) { 1172 *minbits = algp->alg_default_bits; 1173 ASSERT(*minbits >= algp->alg_minbits); 1174 } else { 1175 *minbits = MAX(MIN(*minbits, algp->alg_maxbits), 1176 algp->alg_minbits); 1177 } 1178 if (*maxbits == 0) 1179 *maxbits = algp->alg_maxbits; 1180 else 1181 *maxbits = MIN(MAX(*maxbits, algp->alg_minbits), 1182 algp->alg_maxbits); 1183 ASSERT(*minbits <= *maxbits); 1184 } else { 1185 *minbits = 0; 1186 *maxbits = 0; 1187 } 1188 } 1189 1190 /* 1191 * Check an action's requested algorithms against the algorithms currently 1192 * loaded in the system. 1193 */ 1194 boolean_t 1195 ipsec_check_action(ipsec_act_t *act, int *diag, netstack_t *ns) 1196 { 1197 ipsec_prot_t *ipp; 1198 ipsec_stack_t *ipss = ns->netstack_ipsec; 1199 1200 ipp = &act->ipa_apply; 1201 1202 if (ipp->ipp_use_ah && 1203 ipss->ipsec_alglists[IPSEC_ALG_AUTH][ipp->ipp_auth_alg] == NULL) { 1204 *diag = SPD_DIAGNOSTIC_UNSUPP_AH_ALG; 1205 return (B_FALSE); 1206 } 1207 if (ipp->ipp_use_espa && 1208 ipss->ipsec_alglists[IPSEC_ALG_AUTH][ipp->ipp_esp_auth_alg] == 1209 NULL) { 1210 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_AUTH_ALG; 1211 return (B_FALSE); 1212 } 1213 if (ipp->ipp_use_esp && 1214 ipss->ipsec_alglists[IPSEC_ALG_ENCR][ipp->ipp_encr_alg] == NULL) { 1215 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_ENCR_ALG; 1216 return (B_FALSE); 1217 } 1218 1219 act_alg_adjust(IPSEC_ALG_AUTH, ipp->ipp_auth_alg, 1220 &ipp->ipp_ah_minbits, &ipp->ipp_ah_maxbits, ns); 1221 act_alg_adjust(IPSEC_ALG_AUTH, ipp->ipp_esp_auth_alg, 1222 &ipp->ipp_espa_minbits, &ipp->ipp_espa_maxbits, ns); 1223 act_alg_adjust(IPSEC_ALG_ENCR, ipp->ipp_encr_alg, 1224 &ipp->ipp_espe_minbits, &ipp->ipp_espe_maxbits, ns); 1225 1226 if (ipp->ipp_ah_minbits > ipp->ipp_ah_maxbits) { 1227 *diag = SPD_DIAGNOSTIC_UNSUPP_AH_KEYSIZE; 1228 return (B_FALSE); 1229 } 1230 if (ipp->ipp_espa_minbits > ipp->ipp_espa_maxbits) { 1231 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_AUTH_KEYSIZE; 1232 return (B_FALSE); 1233 } 1234 if (ipp->ipp_espe_minbits > ipp->ipp_espe_maxbits) { 1235 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_ENCR_KEYSIZE; 1236 return (B_FALSE); 1237 } 1238 /* TODO: sanity check lifetimes */ 1239 return (B_TRUE); 1240 } 1241 1242 /* 1243 * Set up a single action during wildcard expansion.. 1244 */ 1245 static void 1246 ipsec_setup_act(ipsec_act_t *outact, ipsec_act_t *act, 1247 uint_t auth_alg, uint_t encr_alg, uint_t eauth_alg, netstack_t *ns) 1248 { 1249 ipsec_prot_t *ipp; 1250 1251 *outact = *act; 1252 ipp = &outact->ipa_apply; 1253 ipp->ipp_auth_alg = (uint8_t)auth_alg; 1254 ipp->ipp_encr_alg = (uint8_t)encr_alg; 1255 ipp->ipp_esp_auth_alg = (uint8_t)eauth_alg; 1256 1257 act_alg_adjust(IPSEC_ALG_AUTH, auth_alg, 1258 &ipp->ipp_ah_minbits, &ipp->ipp_ah_maxbits, ns); 1259 act_alg_adjust(IPSEC_ALG_AUTH, eauth_alg, 1260 &ipp->ipp_espa_minbits, &ipp->ipp_espa_maxbits, ns); 1261 act_alg_adjust(IPSEC_ALG_ENCR, encr_alg, 1262 &ipp->ipp_espe_minbits, &ipp->ipp_espe_maxbits, ns); 1263 } 1264 1265 /* 1266 * combinatoric expansion time: expand a wildcarded action into an 1267 * array of wildcarded actions; we return the exploded action list, 1268 * and return a count in *nact (output only). 1269 */ 1270 static ipsec_act_t * 1271 ipsec_act_wildcard_expand(ipsec_act_t *act, uint_t *nact, netstack_t *ns) 1272 { 1273 boolean_t use_ah, use_esp, use_espa; 1274 boolean_t wild_auth, wild_encr, wild_eauth; 1275 uint_t auth_alg, auth_idx, auth_min, auth_max; 1276 uint_t eauth_alg, eauth_idx, eauth_min, eauth_max; 1277 uint_t encr_alg, encr_idx, encr_min, encr_max; 1278 uint_t action_count, ai; 1279 ipsec_act_t *outact; 1280 ipsec_stack_t *ipss = ns->netstack_ipsec; 1281 1282 if (act->ipa_type != IPSEC_ACT_APPLY) { 1283 outact = kmem_alloc(sizeof (*act), KM_NOSLEEP); 1284 *nact = 1; 1285 if (outact != NULL) 1286 bcopy(act, outact, sizeof (*act)); 1287 return (outact); 1288 } 1289 /* 1290 * compute the combinatoric explosion.. 1291 * 1292 * we assume a request for encr if esp_req is PREF_REQUIRED 1293 * we assume a request for ah auth if ah_req is PREF_REQUIRED. 1294 * we assume a request for esp auth if !ah and esp_req is PREF_REQUIRED 1295 */ 1296 1297 use_ah = act->ipa_apply.ipp_use_ah; 1298 use_esp = act->ipa_apply.ipp_use_esp; 1299 use_espa = act->ipa_apply.ipp_use_espa; 1300 auth_alg = act->ipa_apply.ipp_auth_alg; 1301 eauth_alg = act->ipa_apply.ipp_esp_auth_alg; 1302 encr_alg = act->ipa_apply.ipp_encr_alg; 1303 1304 wild_auth = use_ah && (auth_alg == 0); 1305 wild_eauth = use_espa && (eauth_alg == 0); 1306 wild_encr = use_esp && (encr_alg == 0); 1307 1308 action_count = 1; 1309 auth_min = auth_max = auth_alg; 1310 eauth_min = eauth_max = eauth_alg; 1311 encr_min = encr_max = encr_alg; 1312 1313 /* 1314 * set up for explosion.. for each dimension, expand output 1315 * size by the explosion factor. 1316 * 1317 * Don't include the "any" algorithms, if defined, as no 1318 * kernel policies should be set for these algorithms. 1319 */ 1320 1321 #define SET_EXP_MINMAX(type, wild, alg, min, max, ipss) \ 1322 if (wild) { \ 1323 int nalgs = ipss->ipsec_nalgs[type]; \ 1324 if (ipss->ipsec_alglists[type][alg] != NULL) \ 1325 nalgs--; \ 1326 action_count *= nalgs; \ 1327 min = 0; \ 1328 max = ipss->ipsec_nalgs[type] - 1; \ 1329 } 1330 1331 SET_EXP_MINMAX(IPSEC_ALG_AUTH, wild_auth, SADB_AALG_NONE, 1332 auth_min, auth_max, ipss); 1333 SET_EXP_MINMAX(IPSEC_ALG_AUTH, wild_eauth, SADB_AALG_NONE, 1334 eauth_min, eauth_max, ipss); 1335 SET_EXP_MINMAX(IPSEC_ALG_ENCR, wild_encr, SADB_EALG_NONE, 1336 encr_min, encr_max, ipss); 1337 1338 #undef SET_EXP_MINMAX 1339 1340 /* 1341 * ok, allocate the whole mess.. 1342 */ 1343 1344 outact = kmem_alloc(sizeof (*outact) * action_count, KM_NOSLEEP); 1345 if (outact == NULL) 1346 return (NULL); 1347 1348 /* 1349 * Now compute all combinations. Note that non-wildcarded 1350 * dimensions just get a single value from auth_min, while 1351 * wildcarded dimensions indirect through the sortlist. 1352 * 1353 * We do encryption outermost since, at this time, there's 1354 * greater difference in security and performance between 1355 * encryption algorithms vs. authentication algorithms. 1356 */ 1357 1358 ai = 0; 1359 1360 #define WHICH_ALG(type, wild, idx, ipss) \ 1361 ((wild)?(ipss->ipsec_sortlist[type][idx]):(idx)) 1362 1363 for (encr_idx = encr_min; encr_idx <= encr_max; encr_idx++) { 1364 encr_alg = WHICH_ALG(IPSEC_ALG_ENCR, wild_encr, encr_idx, ipss); 1365 if (wild_encr && encr_alg == SADB_EALG_NONE) 1366 continue; 1367 for (auth_idx = auth_min; auth_idx <= auth_max; auth_idx++) { 1368 auth_alg = WHICH_ALG(IPSEC_ALG_AUTH, wild_auth, 1369 auth_idx, ipss); 1370 if (wild_auth && auth_alg == SADB_AALG_NONE) 1371 continue; 1372 for (eauth_idx = eauth_min; eauth_idx <= eauth_max; 1373 eauth_idx++) { 1374 eauth_alg = WHICH_ALG(IPSEC_ALG_AUTH, 1375 wild_eauth, eauth_idx, ipss); 1376 if (wild_eauth && eauth_alg == SADB_AALG_NONE) 1377 continue; 1378 1379 ipsec_setup_act(&outact[ai], act, 1380 auth_alg, encr_alg, eauth_alg, ns); 1381 ai++; 1382 } 1383 } 1384 } 1385 1386 #undef WHICH_ALG 1387 1388 ASSERT(ai == action_count); 1389 *nact = action_count; 1390 return (outact); 1391 } 1392 1393 /* 1394 * Extract the parts of an ipsec_prot_t from an old-style ipsec_req_t. 1395 */ 1396 static void 1397 ipsec_prot_from_req(ipsec_req_t *req, ipsec_prot_t *ipp) 1398 { 1399 bzero(ipp, sizeof (*ipp)); 1400 /* 1401 * ipp_use_* are bitfields. Look at "!!" in the following as a 1402 * "boolean canonicalization" operator. 1403 */ 1404 ipp->ipp_use_ah = !!(req->ipsr_ah_req & IPSEC_PREF_REQUIRED); 1405 ipp->ipp_use_esp = !!(req->ipsr_esp_req & IPSEC_PREF_REQUIRED); 1406 ipp->ipp_use_espa = !!(req->ipsr_esp_auth_alg) || !ipp->ipp_use_ah; 1407 ipp->ipp_use_se = !!(req->ipsr_self_encap_req & IPSEC_PREF_REQUIRED); 1408 ipp->ipp_use_unique = !!((req->ipsr_ah_req|req->ipsr_esp_req) & 1409 IPSEC_PREF_UNIQUE); 1410 ipp->ipp_encr_alg = req->ipsr_esp_alg; 1411 ipp->ipp_auth_alg = req->ipsr_auth_alg; 1412 ipp->ipp_esp_auth_alg = req->ipsr_esp_auth_alg; 1413 } 1414 1415 /* 1416 * Extract a new-style action from a request. 1417 */ 1418 void 1419 ipsec_actvec_from_req(ipsec_req_t *req, ipsec_act_t **actp, uint_t *nactp, 1420 netstack_t *ns) 1421 { 1422 struct ipsec_act act; 1423 1424 bzero(&act, sizeof (act)); 1425 if ((req->ipsr_ah_req & IPSEC_PREF_NEVER) && 1426 (req->ipsr_esp_req & IPSEC_PREF_NEVER)) { 1427 act.ipa_type = IPSEC_ACT_BYPASS; 1428 } else { 1429 act.ipa_type = IPSEC_ACT_APPLY; 1430 ipsec_prot_from_req(req, &act.ipa_apply); 1431 } 1432 *actp = ipsec_act_wildcard_expand(&act, nactp, ns); 1433 } 1434 1435 /* 1436 * Convert a new-style "prot" back to an ipsec_req_t (more backwards compat). 1437 * We assume caller has already zero'ed *req for us. 1438 */ 1439 static int 1440 ipsec_req_from_prot(ipsec_prot_t *ipp, ipsec_req_t *req) 1441 { 1442 req->ipsr_esp_alg = ipp->ipp_encr_alg; 1443 req->ipsr_auth_alg = ipp->ipp_auth_alg; 1444 req->ipsr_esp_auth_alg = ipp->ipp_esp_auth_alg; 1445 1446 if (ipp->ipp_use_unique) { 1447 req->ipsr_ah_req |= IPSEC_PREF_UNIQUE; 1448 req->ipsr_esp_req |= IPSEC_PREF_UNIQUE; 1449 } 1450 if (ipp->ipp_use_se) 1451 req->ipsr_self_encap_req |= IPSEC_PREF_REQUIRED; 1452 if (ipp->ipp_use_ah) 1453 req->ipsr_ah_req |= IPSEC_PREF_REQUIRED; 1454 if (ipp->ipp_use_esp) 1455 req->ipsr_esp_req |= IPSEC_PREF_REQUIRED; 1456 return (sizeof (*req)); 1457 } 1458 1459 /* 1460 * Convert a new-style action back to an ipsec_req_t (more backwards compat). 1461 * We assume caller has already zero'ed *req for us. 1462 */ 1463 static int 1464 ipsec_req_from_act(ipsec_action_t *ap, ipsec_req_t *req) 1465 { 1466 switch (ap->ipa_act.ipa_type) { 1467 case IPSEC_ACT_BYPASS: 1468 req->ipsr_ah_req = IPSEC_PREF_NEVER; 1469 req->ipsr_esp_req = IPSEC_PREF_NEVER; 1470 return (sizeof (*req)); 1471 case IPSEC_ACT_APPLY: 1472 return (ipsec_req_from_prot(&ap->ipa_act.ipa_apply, req)); 1473 } 1474 return (sizeof (*req)); 1475 } 1476 1477 /* 1478 * Convert a new-style action back to an ipsec_req_t (more backwards compat). 1479 * We assume caller has already zero'ed *req for us. 1480 */ 1481 int 1482 ipsec_req_from_head(ipsec_policy_head_t *ph, ipsec_req_t *req, int af) 1483 { 1484 ipsec_policy_t *p; 1485 1486 /* 1487 * FULL-PERSOCK: consult hash table, too? 1488 */ 1489 for (p = ph->iph_root[IPSEC_INBOUND].ipr_nonhash[af]; 1490 p != NULL; 1491 p = p->ipsp_hash.hash_next) { 1492 if ((p->ipsp_sel->ipsl_key.ipsl_valid & IPSL_WILDCARD) == 0) 1493 return (ipsec_req_from_act(p->ipsp_act, req)); 1494 } 1495 return (sizeof (*req)); 1496 } 1497 1498 /* 1499 * Based on per-socket or latched policy, convert to an appropriate 1500 * IP_SEC_OPT ipsec_req_t for the socket option; return size so we can 1501 * be tail-called from ip. 1502 */ 1503 int 1504 ipsec_req_from_conn(conn_t *connp, ipsec_req_t *req, int af) 1505 { 1506 ipsec_latch_t *ipl; 1507 int rv = sizeof (ipsec_req_t); 1508 1509 bzero(req, sizeof (*req)); 1510 1511 mutex_enter(&connp->conn_lock); 1512 ipl = connp->conn_latch; 1513 1514 /* 1515 * Find appropriate policy. First choice is latched action; 1516 * failing that, see latched policy; failing that, 1517 * look at configured policy. 1518 */ 1519 if (ipl != NULL) { 1520 if (ipl->ipl_in_action != NULL) { 1521 rv = ipsec_req_from_act(ipl->ipl_in_action, req); 1522 goto done; 1523 } 1524 if (ipl->ipl_in_policy != NULL) { 1525 rv = ipsec_req_from_act(ipl->ipl_in_policy->ipsp_act, 1526 req); 1527 goto done; 1528 } 1529 } 1530 if (connp->conn_policy != NULL) 1531 rv = ipsec_req_from_head(connp->conn_policy, req, af); 1532 done: 1533 mutex_exit(&connp->conn_lock); 1534 return (rv); 1535 } 1536 1537 void 1538 ipsec_actvec_free(ipsec_act_t *act, uint_t nact) 1539 { 1540 kmem_free(act, nact * sizeof (*act)); 1541 } 1542 1543 /* 1544 * When outbound policy is not cached, look it up the hard way and attach 1545 * an ipsec_out_t to the packet.. 1546 */ 1547 static mblk_t * 1548 ipsec_attach_global_policy(mblk_t **mp, conn_t *connp, ipsec_selector_t *sel, 1549 netstack_t *ns) 1550 { 1551 ipsec_policy_t *p; 1552 1553 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, sel, ns); 1554 1555 if (p == NULL) 1556 return (NULL); 1557 return (ipsec_attach_ipsec_out(mp, connp, p, sel->ips_protocol, ns)); 1558 } 1559 1560 /* 1561 * We have an ipsec_out already, but don't have cached policy; fill it in 1562 * with the right actions. 1563 */ 1564 static mblk_t * 1565 ipsec_apply_global_policy(mblk_t *ipsec_mp, conn_t *connp, 1566 ipsec_selector_t *sel, netstack_t *ns) 1567 { 1568 ipsec_out_t *io; 1569 ipsec_policy_t *p; 1570 1571 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 1572 ASSERT(ipsec_mp->b_cont->b_datap->db_type == M_DATA); 1573 1574 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1575 1576 if (io->ipsec_out_policy == NULL) { 1577 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, io, sel, ns); 1578 io->ipsec_out_policy = p; 1579 } 1580 return (ipsec_mp); 1581 } 1582 1583 1584 /* 1585 * Consumes a reference to ipsp. 1586 */ 1587 static mblk_t * 1588 ipsec_check_loopback_policy(mblk_t *first_mp, boolean_t mctl_present, 1589 ipsec_policy_t *ipsp) 1590 { 1591 mblk_t *ipsec_mp; 1592 ipsec_in_t *ii; 1593 netstack_t *ns; 1594 1595 if (!mctl_present) 1596 return (first_mp); 1597 1598 ipsec_mp = first_mp; 1599 1600 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1601 ns = ii->ipsec_in_ns; 1602 ASSERT(ii->ipsec_in_loopback); 1603 IPPOL_REFRELE(ipsp, ns); 1604 1605 /* 1606 * We should do an actual policy check here. Revisit this 1607 * when we revisit the IPsec API. (And pass a conn_t in when we 1608 * get there.) 1609 */ 1610 1611 return (first_mp); 1612 } 1613 1614 /* 1615 * Check that packet's inbound ports & proto match the selectors 1616 * expected by the SAs it traversed on the way in. 1617 */ 1618 static boolean_t 1619 ipsec_check_ipsecin_unique(ipsec_in_t *ii, const char **reason, 1620 kstat_named_t **counter, uint64_t pkt_unique) 1621 { 1622 uint64_t ah_mask, esp_mask; 1623 ipsa_t *ah_assoc; 1624 ipsa_t *esp_assoc; 1625 netstack_t *ns = ii->ipsec_in_ns; 1626 ipsec_stack_t *ipss = ns->netstack_ipsec; 1627 1628 ASSERT(ii->ipsec_in_secure); 1629 ASSERT(!ii->ipsec_in_loopback); 1630 1631 ah_assoc = ii->ipsec_in_ah_sa; 1632 esp_assoc = ii->ipsec_in_esp_sa; 1633 ASSERT((ah_assoc != NULL) || (esp_assoc != NULL)); 1634 1635 ah_mask = (ah_assoc != NULL) ? ah_assoc->ipsa_unique_mask : 0; 1636 esp_mask = (esp_assoc != NULL) ? esp_assoc->ipsa_unique_mask : 0; 1637 1638 if ((ah_mask == 0) && (esp_mask == 0)) 1639 return (B_TRUE); 1640 1641 /* 1642 * The pkt_unique check will also check for tunnel mode on the SA 1643 * vs. the tunneled_packet boolean. "Be liberal in what you receive" 1644 * should not apply in this case. ;) 1645 */ 1646 1647 if (ah_mask != 0 && 1648 ah_assoc->ipsa_unique_id != (pkt_unique & ah_mask)) { 1649 *reason = "AH inner header mismatch"; 1650 *counter = DROPPER(ipss, ipds_spd_ah_innermismatch); 1651 return (B_FALSE); 1652 } 1653 if (esp_mask != 0 && 1654 esp_assoc->ipsa_unique_id != (pkt_unique & esp_mask)) { 1655 *reason = "ESP inner header mismatch"; 1656 *counter = DROPPER(ipss, ipds_spd_esp_innermismatch); 1657 return (B_FALSE); 1658 } 1659 return (B_TRUE); 1660 } 1661 1662 static boolean_t 1663 ipsec_check_ipsecin_action(ipsec_in_t *ii, mblk_t *mp, ipsec_action_t *ap, 1664 ipha_t *ipha, ip6_t *ip6h, const char **reason, kstat_named_t **counter) 1665 { 1666 boolean_t ret = B_TRUE; 1667 ipsec_prot_t *ipp; 1668 ipsa_t *ah_assoc; 1669 ipsa_t *esp_assoc; 1670 boolean_t decaps; 1671 netstack_t *ns = ii->ipsec_in_ns; 1672 ipsec_stack_t *ipss = ns->netstack_ipsec; 1673 1674 ASSERT((ipha == NULL && ip6h != NULL) || 1675 (ip6h == NULL && ipha != NULL)); 1676 1677 if (ii->ipsec_in_loopback) { 1678 /* 1679 * Besides accepting pointer-equivalent actions, we also 1680 * accept any ICMP errors we generated for ourselves, 1681 * regardless of policy. If we do not wish to make this 1682 * assumption in the future, check here, and where 1683 * icmp_loopback is initialized in ip.c and ip6.c. (Look for 1684 * ipsec_out_icmp_loopback.) 1685 */ 1686 if (ap == ii->ipsec_in_action || ii->ipsec_in_icmp_loopback) 1687 return (B_TRUE); 1688 1689 /* Deep compare necessary here?? */ 1690 *counter = DROPPER(ipss, ipds_spd_loopback_mismatch); 1691 *reason = "loopback policy mismatch"; 1692 return (B_FALSE); 1693 } 1694 ASSERT(!ii->ipsec_in_icmp_loopback); 1695 1696 ah_assoc = ii->ipsec_in_ah_sa; 1697 esp_assoc = ii->ipsec_in_esp_sa; 1698 1699 decaps = ii->ipsec_in_decaps; 1700 1701 switch (ap->ipa_act.ipa_type) { 1702 case IPSEC_ACT_DISCARD: 1703 case IPSEC_ACT_REJECT: 1704 /* Should "fail hard" */ 1705 *counter = DROPPER(ipss, ipds_spd_explicit); 1706 *reason = "blocked by policy"; 1707 return (B_FALSE); 1708 1709 case IPSEC_ACT_BYPASS: 1710 case IPSEC_ACT_CLEAR: 1711 *counter = DROPPER(ipss, ipds_spd_got_secure); 1712 *reason = "expected clear, got protected"; 1713 return (B_FALSE); 1714 1715 case IPSEC_ACT_APPLY: 1716 ipp = &ap->ipa_act.ipa_apply; 1717 /* 1718 * As of now we do the simple checks of whether 1719 * the datagram has gone through the required IPSEC 1720 * protocol constraints or not. We might have more 1721 * in the future like sensitive levels, key bits, etc. 1722 * If it fails the constraints, check whether we would 1723 * have accepted this if it had come in clear. 1724 */ 1725 if (ipp->ipp_use_ah) { 1726 if (ah_assoc == NULL) { 1727 ret = ipsec_inbound_accept_clear(mp, ipha, 1728 ip6h); 1729 *counter = DROPPER(ipss, ipds_spd_got_clear); 1730 *reason = "unprotected not accepted"; 1731 break; 1732 } 1733 ASSERT(ah_assoc != NULL); 1734 ASSERT(ipp->ipp_auth_alg != 0); 1735 1736 if (ah_assoc->ipsa_auth_alg != 1737 ipp->ipp_auth_alg) { 1738 *counter = DROPPER(ipss, ipds_spd_bad_ahalg); 1739 *reason = "unacceptable ah alg"; 1740 ret = B_FALSE; 1741 break; 1742 } 1743 } else if (ah_assoc != NULL) { 1744 /* 1745 * Don't allow this. Check IPSEC NOTE above 1746 * ip_fanout_proto(). 1747 */ 1748 *counter = DROPPER(ipss, ipds_spd_got_ah); 1749 *reason = "unexpected AH"; 1750 ret = B_FALSE; 1751 break; 1752 } 1753 if (ipp->ipp_use_esp) { 1754 if (esp_assoc == NULL) { 1755 ret = ipsec_inbound_accept_clear(mp, ipha, 1756 ip6h); 1757 *counter = DROPPER(ipss, ipds_spd_got_clear); 1758 *reason = "unprotected not accepted"; 1759 break; 1760 } 1761 ASSERT(esp_assoc != NULL); 1762 ASSERT(ipp->ipp_encr_alg != 0); 1763 1764 if (esp_assoc->ipsa_encr_alg != 1765 ipp->ipp_encr_alg) { 1766 *counter = DROPPER(ipss, ipds_spd_bad_espealg); 1767 *reason = "unacceptable esp alg"; 1768 ret = B_FALSE; 1769 break; 1770 } 1771 /* 1772 * If the client does not need authentication, 1773 * we don't verify the alogrithm. 1774 */ 1775 if (ipp->ipp_use_espa) { 1776 if (esp_assoc->ipsa_auth_alg != 1777 ipp->ipp_esp_auth_alg) { 1778 *counter = DROPPER(ipss, 1779 ipds_spd_bad_espaalg); 1780 *reason = "unacceptable esp auth alg"; 1781 ret = B_FALSE; 1782 break; 1783 } 1784 } 1785 } else if (esp_assoc != NULL) { 1786 /* 1787 * Don't allow this. Check IPSEC NOTE above 1788 * ip_fanout_proto(). 1789 */ 1790 *counter = DROPPER(ipss, ipds_spd_got_esp); 1791 *reason = "unexpected ESP"; 1792 ret = B_FALSE; 1793 break; 1794 } 1795 if (ipp->ipp_use_se) { 1796 if (!decaps) { 1797 ret = ipsec_inbound_accept_clear(mp, ipha, 1798 ip6h); 1799 if (!ret) { 1800 /* XXX mutant? */ 1801 *counter = DROPPER(ipss, 1802 ipds_spd_bad_selfencap); 1803 *reason = "self encap not found"; 1804 break; 1805 } 1806 } 1807 } else if (decaps) { 1808 /* 1809 * XXX If the packet comes in tunneled and the 1810 * recipient does not expect it to be tunneled, it 1811 * is okay. But we drop to be consistent with the 1812 * other cases. 1813 */ 1814 *counter = DROPPER(ipss, ipds_spd_got_selfencap); 1815 *reason = "unexpected self encap"; 1816 ret = B_FALSE; 1817 break; 1818 } 1819 if (ii->ipsec_in_action != NULL) { 1820 /* 1821 * This can happen if we do a double policy-check on 1822 * a packet 1823 * XXX XXX should fix this case! 1824 */ 1825 IPACT_REFRELE(ii->ipsec_in_action); 1826 } 1827 ASSERT(ii->ipsec_in_action == NULL); 1828 IPACT_REFHOLD(ap); 1829 ii->ipsec_in_action = ap; 1830 break; /* from switch */ 1831 } 1832 return (ret); 1833 } 1834 1835 static boolean_t 1836 spd_match_inbound_ids(ipsec_latch_t *ipl, ipsa_t *sa) 1837 { 1838 ASSERT(ipl->ipl_ids_latched == B_TRUE); 1839 return ipsid_equal(ipl->ipl_remote_cid, sa->ipsa_src_cid) && 1840 ipsid_equal(ipl->ipl_local_cid, sa->ipsa_dst_cid); 1841 } 1842 1843 /* 1844 * Takes a latched conn and an inbound packet and returns a unique_id suitable 1845 * for SA comparisons. Most of the time we will copy from the conn_t, but 1846 * there are cases when the conn_t is latched but it has wildcard selectors, 1847 * and then we need to fallback to scooping them out of the packet. 1848 * 1849 * Assume we'll never have 0 with a conn_t present, so use 0 as a failure. We 1850 * can get away with this because we only have non-zero ports/proto for 1851 * latched conn_ts. 1852 * 1853 * Ideal candidate for an "inline" keyword, as we're JUST convoluted enough 1854 * to not be a nice macro. 1855 */ 1856 static uint64_t 1857 conn_to_unique(conn_t *connp, mblk_t *data_mp, ipha_t *ipha, ip6_t *ip6h) 1858 { 1859 ipsec_selector_t sel; 1860 uint8_t ulp = connp->conn_ulp; 1861 1862 ASSERT(connp->conn_latch->ipl_in_policy != NULL); 1863 1864 if ((ulp == IPPROTO_TCP || ulp == IPPROTO_UDP || ulp == IPPROTO_SCTP) && 1865 (connp->conn_fport == 0 || connp->conn_lport == 0)) { 1866 /* Slow path - we gotta grab from the packet. */ 1867 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, 1868 SEL_NONE) != SELRET_SUCCESS) { 1869 /* Failure -> have caller free packet with ENOMEM. */ 1870 return (0); 1871 } 1872 return (SA_UNIQUE_ID(sel.ips_remote_port, sel.ips_local_port, 1873 sel.ips_protocol, 0)); 1874 } 1875 1876 #ifdef DEBUG_NOT_UNTIL_6478464 1877 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, SEL_NONE) == 1878 SELRET_SUCCESS) { 1879 ASSERT(sel.ips_local_port == connp->conn_lport); 1880 ASSERT(sel.ips_remote_port == connp->conn_fport); 1881 ASSERT(sel.ips_protocol == connp->conn_ulp); 1882 } 1883 ASSERT(connp->conn_ulp != 0); 1884 #endif 1885 1886 return (SA_UNIQUE_ID(connp->conn_fport, connp->conn_lport, ulp, 0)); 1887 } 1888 1889 /* 1890 * Called to check policy on a latched connection, both from this file 1891 * and from tcp.c 1892 */ 1893 boolean_t 1894 ipsec_check_ipsecin_latch(ipsec_in_t *ii, mblk_t *mp, ipsec_latch_t *ipl, 1895 ipha_t *ipha, ip6_t *ip6h, const char **reason, kstat_named_t **counter, 1896 conn_t *connp) 1897 { 1898 netstack_t *ns = ii->ipsec_in_ns; 1899 ipsec_stack_t *ipss = ns->netstack_ipsec; 1900 1901 ASSERT(ipl->ipl_ids_latched == B_TRUE); 1902 1903 if (!ii->ipsec_in_loopback) { 1904 /* 1905 * Over loopback, there aren't real security associations, 1906 * so there are neither identities nor "unique" values 1907 * for us to check the packet against. 1908 */ 1909 if ((ii->ipsec_in_ah_sa != NULL) && 1910 (!spd_match_inbound_ids(ipl, ii->ipsec_in_ah_sa))) { 1911 *counter = DROPPER(ipss, ipds_spd_ah_badid); 1912 *reason = "AH identity mismatch"; 1913 return (B_FALSE); 1914 } 1915 1916 if ((ii->ipsec_in_esp_sa != NULL) && 1917 (!spd_match_inbound_ids(ipl, ii->ipsec_in_esp_sa))) { 1918 *counter = DROPPER(ipss, ipds_spd_esp_badid); 1919 *reason = "ESP identity mismatch"; 1920 return (B_FALSE); 1921 } 1922 1923 /* 1924 * Can fudge pkt_unique from connp because we're latched. 1925 * In DEBUG kernels (see conn_to_unique()'s implementation), 1926 * verify this even if it REALLY slows things down. 1927 */ 1928 if (!ipsec_check_ipsecin_unique(ii, reason, counter, 1929 conn_to_unique(connp, mp, ipha, ip6h))) { 1930 return (B_FALSE); 1931 } 1932 } 1933 1934 return (ipsec_check_ipsecin_action(ii, mp, ipl->ipl_in_action, 1935 ipha, ip6h, reason, counter)); 1936 } 1937 1938 /* 1939 * Check to see whether this secured datagram meets the policy 1940 * constraints specified in ipsp. 1941 * 1942 * Called from ipsec_check_global_policy, and ipsec_check_inbound_policy. 1943 * 1944 * Consumes a reference to ipsp. 1945 */ 1946 static mblk_t * 1947 ipsec_check_ipsecin_policy(mblk_t *first_mp, ipsec_policy_t *ipsp, 1948 ipha_t *ipha, ip6_t *ip6h, uint64_t pkt_unique, netstack_t *ns) 1949 { 1950 ipsec_in_t *ii; 1951 ipsec_action_t *ap; 1952 const char *reason = "no policy actions found"; 1953 mblk_t *data_mp, *ipsec_mp; 1954 ipsec_stack_t *ipss = ns->netstack_ipsec; 1955 ip_stack_t *ipst = ns->netstack_ip; 1956 kstat_named_t *counter; 1957 1958 counter = DROPPER(ipss, ipds_spd_got_secure); 1959 1960 data_mp = first_mp->b_cont; 1961 ipsec_mp = first_mp; 1962 1963 ASSERT(ipsp != NULL); 1964 1965 ASSERT((ipha == NULL && ip6h != NULL) || 1966 (ip6h == NULL && ipha != NULL)); 1967 1968 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1969 1970 if (ii->ipsec_in_loopback) 1971 return (ipsec_check_loopback_policy(first_mp, B_TRUE, ipsp)); 1972 ASSERT(ii->ipsec_in_type == IPSEC_IN); 1973 ASSERT(ii->ipsec_in_secure); 1974 1975 if (ii->ipsec_in_action != NULL) { 1976 /* 1977 * this can happen if we do a double policy-check on a packet 1978 * Would be nice to be able to delete this test.. 1979 */ 1980 IPACT_REFRELE(ii->ipsec_in_action); 1981 } 1982 ASSERT(ii->ipsec_in_action == NULL); 1983 1984 if (!SA_IDS_MATCH(ii->ipsec_in_ah_sa, ii->ipsec_in_esp_sa)) { 1985 reason = "inbound AH and ESP identities differ"; 1986 counter = DROPPER(ipss, ipds_spd_ahesp_diffid); 1987 goto drop; 1988 } 1989 1990 if (!ipsec_check_ipsecin_unique(ii, &reason, &counter, pkt_unique)) 1991 goto drop; 1992 1993 /* 1994 * Ok, now loop through the possible actions and see if any 1995 * of them work for us. 1996 */ 1997 1998 for (ap = ipsp->ipsp_act; ap != NULL; ap = ap->ipa_next) { 1999 if (ipsec_check_ipsecin_action(ii, data_mp, ap, 2000 ipha, ip6h, &reason, &counter)) { 2001 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2002 IPPOL_REFRELE(ipsp, ns); 2003 return (first_mp); 2004 } 2005 } 2006 drop: 2007 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, SL_ERROR|SL_WARN|SL_CONSOLE, 2008 "ipsec inbound policy mismatch: %s, packet dropped\n", 2009 reason); 2010 IPPOL_REFRELE(ipsp, ns); 2011 ASSERT(ii->ipsec_in_action == NULL); 2012 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2013 ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, 2014 &ipss->ipsec_spd_dropper); 2015 return (NULL); 2016 } 2017 2018 /* 2019 * sleazy prefix-length-based compare. 2020 * another inlining candidate.. 2021 */ 2022 boolean_t 2023 ip_addr_match(uint8_t *addr1, int pfxlen, in6_addr_t *addr2p) 2024 { 2025 int offset = pfxlen>>3; 2026 int bitsleft = pfxlen & 7; 2027 uint8_t *addr2 = (uint8_t *)addr2p; 2028 2029 /* 2030 * and there was much evil.. 2031 * XXX should inline-expand the bcmp here and do this 32 bits 2032 * or 64 bits at a time.. 2033 */ 2034 return ((bcmp(addr1, addr2, offset) == 0) && 2035 ((bitsleft == 0) || 2036 (((addr1[offset] ^ addr2[offset]) & (0xff<<(8-bitsleft))) == 0))); 2037 } 2038 2039 static ipsec_policy_t * 2040 ipsec_find_policy_chain(ipsec_policy_t *best, ipsec_policy_t *chain, 2041 ipsec_selector_t *sel, boolean_t is_icmp_inv_acq) 2042 { 2043 ipsec_selkey_t *isel; 2044 ipsec_policy_t *p; 2045 int bpri = best ? best->ipsp_prio : 0; 2046 2047 for (p = chain; p != NULL; p = p->ipsp_hash.hash_next) { 2048 uint32_t valid; 2049 2050 if (p->ipsp_prio <= bpri) 2051 continue; 2052 isel = &p->ipsp_sel->ipsl_key; 2053 valid = isel->ipsl_valid; 2054 2055 if ((valid & IPSL_PROTOCOL) && 2056 (isel->ipsl_proto != sel->ips_protocol)) 2057 continue; 2058 2059 if ((valid & IPSL_REMOTE_ADDR) && 2060 !ip_addr_match((uint8_t *)&isel->ipsl_remote, 2061 isel->ipsl_remote_pfxlen, &sel->ips_remote_addr_v6)) 2062 continue; 2063 2064 if ((valid & IPSL_LOCAL_ADDR) && 2065 !ip_addr_match((uint8_t *)&isel->ipsl_local, 2066 isel->ipsl_local_pfxlen, &sel->ips_local_addr_v6)) 2067 continue; 2068 2069 if ((valid & IPSL_REMOTE_PORT) && 2070 isel->ipsl_rport != sel->ips_remote_port) 2071 continue; 2072 2073 if ((valid & IPSL_LOCAL_PORT) && 2074 isel->ipsl_lport != sel->ips_local_port) 2075 continue; 2076 2077 if (!is_icmp_inv_acq) { 2078 if ((valid & IPSL_ICMP_TYPE) && 2079 (isel->ipsl_icmp_type > sel->ips_icmp_type || 2080 isel->ipsl_icmp_type_end < sel->ips_icmp_type)) { 2081 continue; 2082 } 2083 2084 if ((valid & IPSL_ICMP_CODE) && 2085 (isel->ipsl_icmp_code > sel->ips_icmp_code || 2086 isel->ipsl_icmp_code_end < 2087 sel->ips_icmp_code)) { 2088 continue; 2089 } 2090 } else { 2091 /* 2092 * special case for icmp inverse acquire 2093 * we only want policies that aren't drop/pass 2094 */ 2095 if (p->ipsp_act->ipa_act.ipa_type != IPSEC_ACT_APPLY) 2096 continue; 2097 } 2098 2099 /* we matched all the packet-port-field selectors! */ 2100 best = p; 2101 bpri = p->ipsp_prio; 2102 } 2103 2104 return (best); 2105 } 2106 2107 /* 2108 * Try to find and return the best policy entry under a given policy 2109 * root for a given set of selectors; the first parameter "best" is 2110 * the current best policy so far. If "best" is non-null, we have a 2111 * reference to it. We return a reference to a policy; if that policy 2112 * is not the original "best", we need to release that reference 2113 * before returning. 2114 */ 2115 ipsec_policy_t * 2116 ipsec_find_policy_head(ipsec_policy_t *best, ipsec_policy_head_t *head, 2117 int direction, ipsec_selector_t *sel, netstack_t *ns) 2118 { 2119 ipsec_policy_t *curbest; 2120 ipsec_policy_root_t *root; 2121 uint8_t is_icmp_inv_acq = sel->ips_is_icmp_inv_acq; 2122 int af = sel->ips_isv4 ? IPSEC_AF_V4 : IPSEC_AF_V6; 2123 2124 curbest = best; 2125 root = &head->iph_root[direction]; 2126 2127 #ifdef DEBUG 2128 if (is_icmp_inv_acq) { 2129 if (sel->ips_isv4) { 2130 if (sel->ips_protocol != IPPROTO_ICMP) { 2131 cmn_err(CE_WARN, "ipsec_find_policy_head:" 2132 " expecting icmp, got %d", 2133 sel->ips_protocol); 2134 } 2135 } else { 2136 if (sel->ips_protocol != IPPROTO_ICMPV6) { 2137 cmn_err(CE_WARN, "ipsec_find_policy_head:" 2138 " expecting icmpv6, got %d", 2139 sel->ips_protocol); 2140 } 2141 } 2142 } 2143 #endif 2144 2145 rw_enter(&head->iph_lock, RW_READER); 2146 2147 if (root->ipr_nchains > 0) { 2148 curbest = ipsec_find_policy_chain(curbest, 2149 root->ipr_hash[selector_hash(sel, root)].hash_head, sel, 2150 is_icmp_inv_acq); 2151 } 2152 curbest = ipsec_find_policy_chain(curbest, root->ipr_nonhash[af], sel, 2153 is_icmp_inv_acq); 2154 2155 /* 2156 * Adjust reference counts if we found anything new. 2157 */ 2158 if (curbest != best) { 2159 ASSERT(curbest != NULL); 2160 IPPOL_REFHOLD(curbest); 2161 2162 if (best != NULL) { 2163 IPPOL_REFRELE(best, ns); 2164 } 2165 } 2166 2167 rw_exit(&head->iph_lock); 2168 2169 return (curbest); 2170 } 2171 2172 /* 2173 * Find the best system policy (either global or per-interface) which 2174 * applies to the given selector; look in all the relevant policy roots 2175 * to figure out which policy wins. 2176 * 2177 * Returns a reference to a policy; caller must release this 2178 * reference when done. 2179 */ 2180 ipsec_policy_t * 2181 ipsec_find_policy(int direction, conn_t *connp, ipsec_out_t *io, 2182 ipsec_selector_t *sel, netstack_t *ns) 2183 { 2184 ipsec_policy_t *p; 2185 ipsec_stack_t *ipss = ns->netstack_ipsec; 2186 2187 p = ipsec_find_policy_head(NULL, &ipss->ipsec_system_policy, 2188 direction, sel, ns); 2189 if ((connp != NULL) && (connp->conn_policy != NULL)) { 2190 p = ipsec_find_policy_head(p, connp->conn_policy, 2191 direction, sel, ns); 2192 } else if ((io != NULL) && (io->ipsec_out_polhead != NULL)) { 2193 p = ipsec_find_policy_head(p, io->ipsec_out_polhead, 2194 direction, sel, ns); 2195 } 2196 2197 return (p); 2198 } 2199 2200 /* 2201 * Check with global policy and see whether this inbound 2202 * packet meets the policy constraints. 2203 * 2204 * Locate appropriate policy from global policy, supplemented by the 2205 * conn's configured and/or cached policy if the conn is supplied. 2206 * 2207 * Dispatch to ipsec_check_ipsecin_policy if we have policy and an 2208 * encrypted packet to see if they match. 2209 * 2210 * Otherwise, see if the policy allows cleartext; if not, drop it on the 2211 * floor. 2212 */ 2213 mblk_t * 2214 ipsec_check_global_policy(mblk_t *first_mp, conn_t *connp, 2215 ipha_t *ipha, ip6_t *ip6h, boolean_t mctl_present, netstack_t *ns) 2216 { 2217 ipsec_policy_t *p; 2218 ipsec_selector_t sel; 2219 mblk_t *data_mp, *ipsec_mp; 2220 boolean_t policy_present; 2221 kstat_named_t *counter; 2222 ipsec_in_t *ii = NULL; 2223 uint64_t pkt_unique; 2224 ipsec_stack_t *ipss = ns->netstack_ipsec; 2225 ip_stack_t *ipst = ns->netstack_ip; 2226 2227 data_mp = mctl_present ? first_mp->b_cont : first_mp; 2228 ipsec_mp = mctl_present ? first_mp : NULL; 2229 2230 sel.ips_is_icmp_inv_acq = 0; 2231 2232 ASSERT((ipha == NULL && ip6h != NULL) || 2233 (ip6h == NULL && ipha != NULL)); 2234 2235 if (ipha != NULL) 2236 policy_present = ipss->ipsec_inbound_v4_policy_present; 2237 else 2238 policy_present = ipss->ipsec_inbound_v6_policy_present; 2239 2240 if (!policy_present && connp == NULL) { 2241 /* 2242 * No global policy and no per-socket policy; 2243 * just pass it back (but we shouldn't get here in that case) 2244 */ 2245 return (first_mp); 2246 } 2247 2248 if (ipsec_mp != NULL) { 2249 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 2250 ii = (ipsec_in_t *)(ipsec_mp->b_rptr); 2251 ASSERT(ii->ipsec_in_type == IPSEC_IN); 2252 } 2253 2254 /* 2255 * If we have cached policy, use it. 2256 * Otherwise consult system policy. 2257 */ 2258 if ((connp != NULL) && (connp->conn_latch != NULL)) { 2259 p = connp->conn_latch->ipl_in_policy; 2260 if (p != NULL) { 2261 IPPOL_REFHOLD(p); 2262 } 2263 /* 2264 * Fudge sel for UNIQUE_ID setting below. 2265 */ 2266 pkt_unique = conn_to_unique(connp, data_mp, ipha, ip6h); 2267 } else { 2268 /* Initialize the ports in the selector */ 2269 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, 2270 SEL_NONE) == SELRET_NOMEM) { 2271 /* 2272 * Technically not a policy mismatch, but it is 2273 * an internal failure. 2274 */ 2275 ipsec_log_policy_failure(IPSEC_POLICY_MISMATCH, 2276 "ipsec_init_inbound_sel", ipha, ip6h, B_FALSE, ns); 2277 counter = DROPPER(ipss, ipds_spd_nomem); 2278 goto fail; 2279 } 2280 2281 /* 2282 * Find the policy which best applies. 2283 * 2284 * If we find global policy, we should look at both 2285 * local policy and global policy and see which is 2286 * stronger and match accordingly. 2287 * 2288 * If we don't find a global policy, check with 2289 * local policy alone. 2290 */ 2291 2292 p = ipsec_find_policy(IPSEC_TYPE_INBOUND, connp, NULL, &sel, 2293 ns); 2294 pkt_unique = SA_UNIQUE_ID(sel.ips_remote_port, 2295 sel.ips_local_port, sel.ips_protocol, 0); 2296 } 2297 2298 if (p == NULL) { 2299 if (ipsec_mp == NULL) { 2300 /* 2301 * We have no policy; default to succeeding. 2302 * XXX paranoid system design doesn't do this. 2303 */ 2304 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2305 return (first_mp); 2306 } else { 2307 counter = DROPPER(ipss, ipds_spd_got_secure); 2308 ipsec_log_policy_failure(IPSEC_POLICY_NOT_NEEDED, 2309 "ipsec_check_global_policy", ipha, ip6h, B_TRUE, 2310 ns); 2311 goto fail; 2312 } 2313 } 2314 if ((ii != NULL) && (ii->ipsec_in_secure)) { 2315 return (ipsec_check_ipsecin_policy(ipsec_mp, p, ipha, ip6h, 2316 pkt_unique, ns)); 2317 } 2318 if (p->ipsp_act->ipa_allow_clear) { 2319 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2320 IPPOL_REFRELE(p, ns); 2321 return (first_mp); 2322 } 2323 IPPOL_REFRELE(p, ns); 2324 /* 2325 * If we reach here, we will drop the packet because it failed the 2326 * global policy check because the packet was cleartext, and it 2327 * should not have been. 2328 */ 2329 ipsec_log_policy_failure(IPSEC_POLICY_MISMATCH, 2330 "ipsec_check_global_policy", ipha, ip6h, B_FALSE, ns); 2331 counter = DROPPER(ipss, ipds_spd_got_clear); 2332 2333 fail: 2334 ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, 2335 &ipss->ipsec_spd_dropper); 2336 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2337 return (NULL); 2338 } 2339 2340 /* 2341 * We check whether an inbound datagram is a valid one 2342 * to accept in clear. If it is secure, it is the job 2343 * of IPSEC to log information appropriately if it 2344 * suspects that it may not be the real one. 2345 * 2346 * It is called only while fanning out to the ULP 2347 * where ULP accepts only secure data and the incoming 2348 * is clear. Usually we never accept clear datagrams in 2349 * such cases. ICMP is the only exception. 2350 * 2351 * NOTE : We don't call this function if the client (ULP) 2352 * is willing to accept things in clear. 2353 */ 2354 boolean_t 2355 ipsec_inbound_accept_clear(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h) 2356 { 2357 ushort_t iph_hdr_length; 2358 icmph_t *icmph; 2359 icmp6_t *icmp6; 2360 uint8_t *nexthdrp; 2361 2362 ASSERT((ipha != NULL && ip6h == NULL) || 2363 (ipha == NULL && ip6h != NULL)); 2364 2365 if (ip6h != NULL) { 2366 iph_hdr_length = ip_hdr_length_v6(mp, ip6h); 2367 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, 2368 &nexthdrp)) { 2369 return (B_FALSE); 2370 } 2371 if (*nexthdrp != IPPROTO_ICMPV6) 2372 return (B_FALSE); 2373 icmp6 = (icmp6_t *)(&mp->b_rptr[iph_hdr_length]); 2374 /* Match IPv6 ICMP policy as closely as IPv4 as possible. */ 2375 switch (icmp6->icmp6_type) { 2376 case ICMP6_PARAM_PROB: 2377 /* Corresponds to port/proto unreach in IPv4. */ 2378 case ICMP6_ECHO_REQUEST: 2379 /* Just like IPv4. */ 2380 return (B_FALSE); 2381 2382 case MLD_LISTENER_QUERY: 2383 case MLD_LISTENER_REPORT: 2384 case MLD_LISTENER_REDUCTION: 2385 /* 2386 * XXX Seperate NDD in IPv4 what about here? 2387 * Plus, mcast is important to ND. 2388 */ 2389 case ICMP6_DST_UNREACH: 2390 /* Corresponds to HOST/NET unreachable in IPv4. */ 2391 case ICMP6_PACKET_TOO_BIG: 2392 case ICMP6_ECHO_REPLY: 2393 /* These are trusted in IPv4. */ 2394 case ND_ROUTER_SOLICIT: 2395 case ND_ROUTER_ADVERT: 2396 case ND_NEIGHBOR_SOLICIT: 2397 case ND_NEIGHBOR_ADVERT: 2398 case ND_REDIRECT: 2399 /* Trust ND messages for now. */ 2400 case ICMP6_TIME_EXCEEDED: 2401 default: 2402 return (B_TRUE); 2403 } 2404 } else { 2405 /* 2406 * If it is not ICMP, fail this request. 2407 */ 2408 if (ipha->ipha_protocol != IPPROTO_ICMP) { 2409 #ifdef FRAGCACHE_DEBUG 2410 cmn_err(CE_WARN, "Dropping - ipha_proto = %d\n", 2411 ipha->ipha_protocol); 2412 #endif 2413 return (B_FALSE); 2414 } 2415 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2416 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 2417 /* 2418 * It is an insecure icmp message. Check to see whether we are 2419 * willing to accept this one. 2420 */ 2421 2422 switch (icmph->icmph_type) { 2423 case ICMP_ECHO_REPLY: 2424 case ICMP_TIME_STAMP_REPLY: 2425 case ICMP_INFO_REPLY: 2426 case ICMP_ROUTER_ADVERTISEMENT: 2427 /* 2428 * We should not encourage clear replies if this 2429 * client expects secure. If somebody is replying 2430 * in clear some mailicious user watching both the 2431 * request and reply, can do chosen-plain-text attacks. 2432 * With global policy we might be just expecting secure 2433 * but sending out clear. We don't know what the right 2434 * thing is. We can't do much here as we can't control 2435 * the sender here. Till we are sure of what to do, 2436 * accept them. 2437 */ 2438 return (B_TRUE); 2439 case ICMP_ECHO_REQUEST: 2440 case ICMP_TIME_STAMP_REQUEST: 2441 case ICMP_INFO_REQUEST: 2442 case ICMP_ADDRESS_MASK_REQUEST: 2443 case ICMP_ROUTER_SOLICITATION: 2444 case ICMP_ADDRESS_MASK_REPLY: 2445 /* 2446 * Don't accept this as somebody could be sending 2447 * us plain text to get encrypted data. If we reply, 2448 * it will lead to chosen plain text attack. 2449 */ 2450 return (B_FALSE); 2451 case ICMP_DEST_UNREACHABLE: 2452 switch (icmph->icmph_code) { 2453 case ICMP_FRAGMENTATION_NEEDED: 2454 /* 2455 * Be in sync with icmp_inbound, where we have 2456 * already set ire_max_frag. 2457 */ 2458 #ifdef FRAGCACHE_DEBUG 2459 cmn_err(CE_WARN, "ICMP frag needed\n"); 2460 #endif 2461 return (B_TRUE); 2462 case ICMP_HOST_UNREACHABLE: 2463 case ICMP_NET_UNREACHABLE: 2464 /* 2465 * By accepting, we could reset a connection. 2466 * How do we solve the problem of some 2467 * intermediate router sending in-secure ICMP 2468 * messages ? 2469 */ 2470 return (B_TRUE); 2471 case ICMP_PORT_UNREACHABLE: 2472 case ICMP_PROTOCOL_UNREACHABLE: 2473 default : 2474 return (B_FALSE); 2475 } 2476 case ICMP_SOURCE_QUENCH: 2477 /* 2478 * If this is an attack, TCP will slow start 2479 * because of this. Is it very harmful ? 2480 */ 2481 return (B_TRUE); 2482 case ICMP_PARAM_PROBLEM: 2483 return (B_FALSE); 2484 case ICMP_TIME_EXCEEDED: 2485 return (B_TRUE); 2486 case ICMP_REDIRECT: 2487 return (B_FALSE); 2488 default : 2489 return (B_FALSE); 2490 } 2491 } 2492 } 2493 2494 void 2495 ipsec_latch_ids(ipsec_latch_t *ipl, ipsid_t *local, ipsid_t *remote) 2496 { 2497 mutex_enter(&ipl->ipl_lock); 2498 2499 if (ipl->ipl_ids_latched) { 2500 /* I lost, someone else got here before me */ 2501 mutex_exit(&ipl->ipl_lock); 2502 return; 2503 } 2504 2505 if (local != NULL) 2506 IPSID_REFHOLD(local); 2507 if (remote != NULL) 2508 IPSID_REFHOLD(remote); 2509 2510 ipl->ipl_local_cid = local; 2511 ipl->ipl_remote_cid = remote; 2512 ipl->ipl_ids_latched = B_TRUE; 2513 mutex_exit(&ipl->ipl_lock); 2514 } 2515 2516 void 2517 ipsec_latch_inbound(ipsec_latch_t *ipl, ipsec_in_t *ii) 2518 { 2519 ipsa_t *sa; 2520 2521 if (!ipl->ipl_ids_latched) { 2522 ipsid_t *local = NULL; 2523 ipsid_t *remote = NULL; 2524 2525 if (!ii->ipsec_in_loopback) { 2526 if (ii->ipsec_in_esp_sa != NULL) 2527 sa = ii->ipsec_in_esp_sa; 2528 else 2529 sa = ii->ipsec_in_ah_sa; 2530 ASSERT(sa != NULL); 2531 local = sa->ipsa_dst_cid; 2532 remote = sa->ipsa_src_cid; 2533 } 2534 ipsec_latch_ids(ipl, local, remote); 2535 } 2536 ipl->ipl_in_action = ii->ipsec_in_action; 2537 IPACT_REFHOLD(ipl->ipl_in_action); 2538 } 2539 2540 /* 2541 * Check whether the policy constraints are met either for an 2542 * inbound datagram; called from IP in numerous places. 2543 * 2544 * Note that this is not a chokepoint for inbound policy checks; 2545 * see also ipsec_check_ipsecin_latch() and ipsec_check_global_policy() 2546 */ 2547 mblk_t * 2548 ipsec_check_inbound_policy(mblk_t *first_mp, conn_t *connp, 2549 ipha_t *ipha, ip6_t *ip6h, boolean_t mctl_present) 2550 { 2551 ipsec_in_t *ii; 2552 boolean_t ret; 2553 mblk_t *mp = mctl_present ? first_mp->b_cont : first_mp; 2554 mblk_t *ipsec_mp = mctl_present ? first_mp : NULL; 2555 ipsec_latch_t *ipl; 2556 uint64_t unique_id; 2557 ipsec_stack_t *ipss; 2558 ip_stack_t *ipst; 2559 netstack_t *ns; 2560 2561 ASSERT(connp != NULL); 2562 ipl = connp->conn_latch; 2563 ns = connp->conn_netstack; 2564 ipss = ns->netstack_ipsec; 2565 ipst = ns->netstack_ip; 2566 2567 if (ipsec_mp == NULL) { 2568 clear: 2569 /* 2570 * This is the case where the incoming datagram is 2571 * cleartext and we need to see whether this client 2572 * would like to receive such untrustworthy things from 2573 * the wire. 2574 */ 2575 ASSERT(mp != NULL); 2576 2577 if (ipl != NULL) { 2578 /* 2579 * Policy is cached in the conn. 2580 */ 2581 if ((ipl->ipl_in_policy != NULL) && 2582 (!ipl->ipl_in_policy->ipsp_act->ipa_allow_clear)) { 2583 ret = ipsec_inbound_accept_clear(mp, 2584 ipha, ip6h); 2585 if (ret) { 2586 BUMP_MIB(&ipst->ips_ip_mib, 2587 ipsecInSucceeded); 2588 return (first_mp); 2589 } else { 2590 ipsec_log_policy_failure( 2591 IPSEC_POLICY_MISMATCH, 2592 "ipsec_check_inbound_policy", ipha, 2593 ip6h, B_FALSE, ns); 2594 ip_drop_packet(first_mp, B_TRUE, NULL, 2595 NULL, 2596 DROPPER(ipss, ipds_spd_got_clear), 2597 &ipss->ipsec_spd_dropper); 2598 BUMP_MIB(&ipst->ips_ip_mib, 2599 ipsecInFailed); 2600 return (NULL); 2601 } 2602 } else { 2603 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2604 return (first_mp); 2605 } 2606 } else { 2607 /* 2608 * As this is a non-hardbound connection we need 2609 * to look at both per-socket policy and global 2610 * policy. As this is cleartext, mark the mp as 2611 * M_DATA in case if it is an ICMP error being 2612 * reported before calling ipsec_check_global_policy 2613 * so that it does not mistake it for IPSEC_IN. 2614 */ 2615 uchar_t db_type = mp->b_datap->db_type; 2616 mp->b_datap->db_type = M_DATA; 2617 first_mp = ipsec_check_global_policy(first_mp, connp, 2618 ipha, ip6h, mctl_present, ns); 2619 if (first_mp != NULL) 2620 mp->b_datap->db_type = db_type; 2621 return (first_mp); 2622 } 2623 } 2624 /* 2625 * If it is inbound check whether the attached message 2626 * is secure or not. We have a special case for ICMP, 2627 * where we have a IPSEC_IN message and the attached 2628 * message is not secure. See icmp_inbound_error_fanout 2629 * for details. 2630 */ 2631 ASSERT(ipsec_mp != NULL); 2632 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 2633 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 2634 2635 if (!ii->ipsec_in_secure) 2636 goto clear; 2637 2638 /* 2639 * mp->b_cont could be either a M_CTL message 2640 * for icmp errors being sent up or a M_DATA message. 2641 */ 2642 ASSERT(mp->b_datap->db_type == M_CTL || mp->b_datap->db_type == M_DATA); 2643 2644 ASSERT(ii->ipsec_in_type == IPSEC_IN); 2645 2646 if (ipl == NULL) { 2647 /* 2648 * We don't have policies cached in the conn 2649 * for this stream. So, look at the global 2650 * policy. It will check against conn or global 2651 * depending on whichever is stronger. 2652 */ 2653 return (ipsec_check_global_policy(first_mp, connp, 2654 ipha, ip6h, mctl_present, ns)); 2655 } 2656 2657 if (ipl->ipl_in_action != NULL) { 2658 /* Policy is cached & latched; fast(er) path */ 2659 const char *reason; 2660 kstat_named_t *counter; 2661 2662 if (ipsec_check_ipsecin_latch(ii, mp, ipl, 2663 ipha, ip6h, &reason, &counter, connp)) { 2664 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2665 return (first_mp); 2666 } 2667 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, 2668 SL_ERROR|SL_WARN|SL_CONSOLE, 2669 "ipsec inbound policy mismatch: %s, packet dropped\n", 2670 reason); 2671 ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, 2672 &ipss->ipsec_spd_dropper); 2673 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2674 return (NULL); 2675 } else if (ipl->ipl_in_policy == NULL) { 2676 ipsec_weird_null_inbound_policy++; 2677 return (first_mp); 2678 } 2679 2680 unique_id = conn_to_unique(connp, mp, ipha, ip6h); 2681 IPPOL_REFHOLD(ipl->ipl_in_policy); 2682 first_mp = ipsec_check_ipsecin_policy(first_mp, ipl->ipl_in_policy, 2683 ipha, ip6h, unique_id, ns); 2684 /* 2685 * NOTE: ipsecIn{Failed,Succeeeded} bumped by 2686 * ipsec_check_ipsecin_policy(). 2687 */ 2688 if (first_mp != NULL) 2689 ipsec_latch_inbound(ipl, ii); 2690 return (first_mp); 2691 } 2692 2693 /* 2694 * Returns: 2695 * 2696 * SELRET_NOMEM --> msgpullup() needed to gather things failed. 2697 * SELRET_BADPKT --> If we're being called after tunnel-mode fragment 2698 * gathering, the initial fragment is too short for 2699 * useful data. Only returned if SEL_TUNNEL_FIRSTFRAG is 2700 * set. 2701 * SELRET_SUCCESS --> "sel" now has initialized IPsec selector data. 2702 * SELRET_TUNFRAG --> This is a fragment in a tunnel-mode packet. Caller 2703 * should put this packet in a fragment-gathering queue. 2704 * Only returned if SEL_TUNNEL_MODE and SEL_PORT_POLICY 2705 * is set. 2706 */ 2707 static selret_t 2708 ipsec_init_inbound_sel(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha, 2709 ip6_t *ip6h, uint8_t sel_flags) 2710 { 2711 uint16_t *ports; 2712 ushort_t hdr_len; 2713 int outer_hdr_len = 0; /* For ICMP tunnel-mode cases... */ 2714 mblk_t *spare_mp = NULL; 2715 uint8_t *nexthdrp; 2716 uint8_t nexthdr; 2717 uint8_t *typecode; 2718 uint8_t check_proto; 2719 ip6_pkt_t ipp; 2720 boolean_t port_policy_present = (sel_flags & SEL_PORT_POLICY); 2721 boolean_t is_icmp = (sel_flags & SEL_IS_ICMP); 2722 boolean_t tunnel_mode = (sel_flags & SEL_TUNNEL_MODE); 2723 2724 ASSERT((ipha == NULL && ip6h != NULL) || 2725 (ipha != NULL && ip6h == NULL)); 2726 2727 if (ip6h != NULL) { 2728 if (is_icmp) 2729 outer_hdr_len = ((uint8_t *)ip6h) - mp->b_rptr; 2730 2731 check_proto = IPPROTO_ICMPV6; 2732 sel->ips_isv4 = B_FALSE; 2733 sel->ips_local_addr_v6 = ip6h->ip6_dst; 2734 sel->ips_remote_addr_v6 = ip6h->ip6_src; 2735 2736 bzero(&ipp, sizeof (ipp)); 2737 (void) ip_find_hdr_v6(mp, ip6h, &ipp, NULL); 2738 2739 nexthdr = ip6h->ip6_nxt; 2740 switch (nexthdr) { 2741 case IPPROTO_HOPOPTS: 2742 case IPPROTO_ROUTING: 2743 case IPPROTO_DSTOPTS: 2744 case IPPROTO_FRAGMENT: 2745 /* 2746 * Use ip_hdr_length_nexthdr_v6(). And have a spare 2747 * mblk that's contiguous to feed it 2748 */ 2749 if ((spare_mp = msgpullup(mp, -1)) == NULL) 2750 return (SELRET_NOMEM); 2751 if (!ip_hdr_length_nexthdr_v6(spare_mp, 2752 (ip6_t *)(spare_mp->b_rptr + outer_hdr_len), 2753 &hdr_len, &nexthdrp)) { 2754 /* Malformed packet - caller frees. */ 2755 ipsec_freemsg_chain(spare_mp); 2756 return (SELRET_BADPKT); 2757 } 2758 nexthdr = *nexthdrp; 2759 /* We can just extract based on hdr_len now. */ 2760 break; 2761 default: 2762 hdr_len = IPV6_HDR_LEN; 2763 break; 2764 } 2765 2766 if (port_policy_present && IS_V6_FRAGMENT(ipp) && !is_icmp) { 2767 /* IPv6 Fragment */ 2768 ipsec_freemsg_chain(spare_mp); 2769 return (SELRET_TUNFRAG); 2770 } 2771 } else { 2772 if (is_icmp) 2773 outer_hdr_len = ((uint8_t *)ipha) - mp->b_rptr; 2774 check_proto = IPPROTO_ICMP; 2775 sel->ips_isv4 = B_TRUE; 2776 sel->ips_local_addr_v4 = ipha->ipha_dst; 2777 sel->ips_remote_addr_v4 = ipha->ipha_src; 2778 nexthdr = ipha->ipha_protocol; 2779 hdr_len = IPH_HDR_LENGTH(ipha); 2780 2781 if (port_policy_present && 2782 IS_V4_FRAGMENT(ipha->ipha_fragment_offset_and_flags) && 2783 !is_icmp) { 2784 /* IPv4 Fragment */ 2785 ipsec_freemsg_chain(spare_mp); 2786 return (SELRET_TUNFRAG); 2787 } 2788 2789 } 2790 sel->ips_protocol = nexthdr; 2791 2792 if ((nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP && 2793 nexthdr != IPPROTO_SCTP && nexthdr != check_proto) || 2794 (!port_policy_present && tunnel_mode)) { 2795 sel->ips_remote_port = sel->ips_local_port = 0; 2796 ipsec_freemsg_chain(spare_mp); 2797 return (SELRET_SUCCESS); 2798 } 2799 2800 if (&mp->b_rptr[hdr_len] + 4 > mp->b_wptr) { 2801 /* If we didn't pullup a copy already, do so now. */ 2802 /* 2803 * XXX performance, will upper-layers frequently split TCP/UDP 2804 * apart from IP or options? If so, perhaps we should revisit 2805 * the spare_mp strategy. 2806 */ 2807 ipsec_hdr_pullup_needed++; 2808 if (spare_mp == NULL && 2809 (spare_mp = msgpullup(mp, -1)) == NULL) { 2810 return (SELRET_NOMEM); 2811 } 2812 ports = (uint16_t *)&spare_mp->b_rptr[hdr_len + outer_hdr_len]; 2813 } else { 2814 ports = (uint16_t *)&mp->b_rptr[hdr_len + outer_hdr_len]; 2815 } 2816 2817 if (nexthdr == check_proto) { 2818 typecode = (uint8_t *)ports; 2819 sel->ips_icmp_type = *typecode++; 2820 sel->ips_icmp_code = *typecode; 2821 sel->ips_remote_port = sel->ips_local_port = 0; 2822 } else { 2823 sel->ips_remote_port = *ports++; 2824 sel->ips_local_port = *ports; 2825 } 2826 ipsec_freemsg_chain(spare_mp); 2827 return (SELRET_SUCCESS); 2828 } 2829 2830 static boolean_t 2831 ipsec_init_outbound_ports(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha, 2832 ip6_t *ip6h, int outer_hdr_len, ipsec_stack_t *ipss) 2833 { 2834 /* 2835 * XXX cut&paste shared with ipsec_init_inbound_sel 2836 */ 2837 uint16_t *ports; 2838 ushort_t hdr_len; 2839 mblk_t *spare_mp = NULL; 2840 uint8_t *nexthdrp; 2841 uint8_t nexthdr; 2842 uint8_t *typecode; 2843 uint8_t check_proto; 2844 2845 ASSERT((ipha == NULL && ip6h != NULL) || 2846 (ipha != NULL && ip6h == NULL)); 2847 2848 if (ip6h != NULL) { 2849 check_proto = IPPROTO_ICMPV6; 2850 nexthdr = ip6h->ip6_nxt; 2851 switch (nexthdr) { 2852 case IPPROTO_HOPOPTS: 2853 case IPPROTO_ROUTING: 2854 case IPPROTO_DSTOPTS: 2855 case IPPROTO_FRAGMENT: 2856 /* 2857 * Use ip_hdr_length_nexthdr_v6(). And have a spare 2858 * mblk that's contiguous to feed it 2859 */ 2860 spare_mp = msgpullup(mp, -1); 2861 if (spare_mp == NULL || 2862 !ip_hdr_length_nexthdr_v6(spare_mp, 2863 (ip6_t *)(spare_mp->b_rptr + outer_hdr_len), 2864 &hdr_len, &nexthdrp)) { 2865 /* Always works, even if NULL. */ 2866 ipsec_freemsg_chain(spare_mp); 2867 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 2868 DROPPER(ipss, ipds_spd_nomem), 2869 &ipss->ipsec_spd_dropper); 2870 return (B_FALSE); 2871 } else { 2872 nexthdr = *nexthdrp; 2873 /* We can just extract based on hdr_len now. */ 2874 } 2875 break; 2876 default: 2877 hdr_len = IPV6_HDR_LEN; 2878 break; 2879 } 2880 } else { 2881 check_proto = IPPROTO_ICMP; 2882 hdr_len = IPH_HDR_LENGTH(ipha); 2883 nexthdr = ipha->ipha_protocol; 2884 } 2885 2886 sel->ips_protocol = nexthdr; 2887 if (nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP && 2888 nexthdr != IPPROTO_SCTP && nexthdr != check_proto) { 2889 sel->ips_local_port = sel->ips_remote_port = 0; 2890 ipsec_freemsg_chain(spare_mp); /* Always works, even if NULL */ 2891 return (B_TRUE); 2892 } 2893 2894 if (&mp->b_rptr[hdr_len] + 4 + outer_hdr_len > mp->b_wptr) { 2895 /* If we didn't pullup a copy already, do so now. */ 2896 /* 2897 * XXX performance, will upper-layers frequently split TCP/UDP 2898 * apart from IP or options? If so, perhaps we should revisit 2899 * the spare_mp strategy. 2900 * 2901 * XXX should this be msgpullup(mp, hdr_len+4) ??? 2902 */ 2903 if (spare_mp == NULL && 2904 (spare_mp = msgpullup(mp, -1)) == NULL) { 2905 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 2906 DROPPER(ipss, ipds_spd_nomem), 2907 &ipss->ipsec_spd_dropper); 2908 return (B_FALSE); 2909 } 2910 ports = (uint16_t *)&spare_mp->b_rptr[hdr_len + outer_hdr_len]; 2911 } else { 2912 ports = (uint16_t *)&mp->b_rptr[hdr_len + outer_hdr_len]; 2913 } 2914 2915 if (nexthdr == check_proto) { 2916 typecode = (uint8_t *)ports; 2917 sel->ips_icmp_type = *typecode++; 2918 sel->ips_icmp_code = *typecode; 2919 sel->ips_remote_port = sel->ips_local_port = 0; 2920 } else { 2921 sel->ips_local_port = *ports++; 2922 sel->ips_remote_port = *ports; 2923 } 2924 ipsec_freemsg_chain(spare_mp); /* Always works, even if NULL */ 2925 return (B_TRUE); 2926 } 2927 2928 /* 2929 * Create an ipsec_action_t based on the way an inbound packet was protected. 2930 * Used to reflect traffic back to a sender. 2931 * 2932 * We don't bother interning the action into the hash table. 2933 */ 2934 ipsec_action_t * 2935 ipsec_in_to_out_action(ipsec_in_t *ii) 2936 { 2937 ipsa_t *ah_assoc, *esp_assoc; 2938 uint_t auth_alg = 0, encr_alg = 0, espa_alg = 0; 2939 ipsec_action_t *ap; 2940 boolean_t unique; 2941 2942 ap = kmem_cache_alloc(ipsec_action_cache, KM_NOSLEEP); 2943 2944 if (ap == NULL) 2945 return (NULL); 2946 2947 bzero(ap, sizeof (*ap)); 2948 HASH_NULL(ap, ipa_hash); 2949 ap->ipa_next = NULL; 2950 ap->ipa_refs = 1; 2951 2952 /* 2953 * Get the algorithms that were used for this packet. 2954 */ 2955 ap->ipa_act.ipa_type = IPSEC_ACT_APPLY; 2956 ap->ipa_act.ipa_log = 0; 2957 ah_assoc = ii->ipsec_in_ah_sa; 2958 ap->ipa_act.ipa_apply.ipp_use_ah = (ah_assoc != NULL); 2959 2960 esp_assoc = ii->ipsec_in_esp_sa; 2961 ap->ipa_act.ipa_apply.ipp_use_esp = (esp_assoc != NULL); 2962 2963 if (esp_assoc != NULL) { 2964 encr_alg = esp_assoc->ipsa_encr_alg; 2965 espa_alg = esp_assoc->ipsa_auth_alg; 2966 ap->ipa_act.ipa_apply.ipp_use_espa = (espa_alg != 0); 2967 } 2968 if (ah_assoc != NULL) 2969 auth_alg = ah_assoc->ipsa_auth_alg; 2970 2971 ap->ipa_act.ipa_apply.ipp_encr_alg = (uint8_t)encr_alg; 2972 ap->ipa_act.ipa_apply.ipp_auth_alg = (uint8_t)auth_alg; 2973 ap->ipa_act.ipa_apply.ipp_esp_auth_alg = (uint8_t)espa_alg; 2974 ap->ipa_act.ipa_apply.ipp_use_se = ii->ipsec_in_decaps; 2975 unique = B_FALSE; 2976 2977 if (esp_assoc != NULL) { 2978 ap->ipa_act.ipa_apply.ipp_espa_minbits = 2979 esp_assoc->ipsa_authkeybits; 2980 ap->ipa_act.ipa_apply.ipp_espa_maxbits = 2981 esp_assoc->ipsa_authkeybits; 2982 ap->ipa_act.ipa_apply.ipp_espe_minbits = 2983 esp_assoc->ipsa_encrkeybits; 2984 ap->ipa_act.ipa_apply.ipp_espe_maxbits = 2985 esp_assoc->ipsa_encrkeybits; 2986 ap->ipa_act.ipa_apply.ipp_km_proto = esp_assoc->ipsa_kmp; 2987 ap->ipa_act.ipa_apply.ipp_km_cookie = esp_assoc->ipsa_kmc; 2988 if (esp_assoc->ipsa_flags & IPSA_F_UNIQUE) 2989 unique = B_TRUE; 2990 } 2991 if (ah_assoc != NULL) { 2992 ap->ipa_act.ipa_apply.ipp_ah_minbits = 2993 ah_assoc->ipsa_authkeybits; 2994 ap->ipa_act.ipa_apply.ipp_ah_maxbits = 2995 ah_assoc->ipsa_authkeybits; 2996 ap->ipa_act.ipa_apply.ipp_km_proto = ah_assoc->ipsa_kmp; 2997 ap->ipa_act.ipa_apply.ipp_km_cookie = ah_assoc->ipsa_kmc; 2998 if (ah_assoc->ipsa_flags & IPSA_F_UNIQUE) 2999 unique = B_TRUE; 3000 } 3001 ap->ipa_act.ipa_apply.ipp_use_unique = unique; 3002 ap->ipa_want_unique = unique; 3003 ap->ipa_allow_clear = B_FALSE; 3004 ap->ipa_want_se = ii->ipsec_in_decaps; 3005 ap->ipa_want_ah = (ah_assoc != NULL); 3006 ap->ipa_want_esp = (esp_assoc != NULL); 3007 3008 ap->ipa_ovhd = ipsec_act_ovhd(&ap->ipa_act); 3009 3010 ap->ipa_act.ipa_apply.ipp_replay_depth = 0; /* don't care */ 3011 3012 return (ap); 3013 } 3014 3015 3016 /* 3017 * Compute the worst-case amount of extra space required by an action. 3018 * Note that, because of the ESP considerations listed below, this is 3019 * actually not the same as the best-case reduction in the MTU; in the 3020 * future, we should pass additional information to this function to 3021 * allow the actual MTU impact to be computed. 3022 * 3023 * AH: Revisit this if we implement algorithms with 3024 * a verifier size of more than 12 bytes. 3025 * 3026 * ESP: A more exact but more messy computation would take into 3027 * account the interaction between the cipher block size and the 3028 * effective MTU, yielding the inner payload size which reflects a 3029 * packet with *minimum* ESP padding.. 3030 */ 3031 int32_t 3032 ipsec_act_ovhd(const ipsec_act_t *act) 3033 { 3034 int32_t overhead = 0; 3035 3036 if (act->ipa_type == IPSEC_ACT_APPLY) { 3037 const ipsec_prot_t *ipp = &act->ipa_apply; 3038 3039 if (ipp->ipp_use_ah) 3040 overhead += IPSEC_MAX_AH_HDR_SIZE; 3041 if (ipp->ipp_use_esp) { 3042 overhead += IPSEC_MAX_ESP_HDR_SIZE; 3043 overhead += sizeof (struct udphdr); 3044 } 3045 if (ipp->ipp_use_se) 3046 overhead += IP_SIMPLE_HDR_LENGTH; 3047 } 3048 return (overhead); 3049 } 3050 3051 /* 3052 * This hash function is used only when creating policies and thus is not 3053 * performance-critical for packet flows. 3054 * 3055 * Future work: canonicalize the structures hashed with this (i.e., 3056 * zeroize padding) so the hash works correctly. 3057 */ 3058 /* ARGSUSED */ 3059 static uint32_t 3060 policy_hash(int size, const void *start, const void *end) 3061 { 3062 return (0); 3063 } 3064 3065 3066 /* 3067 * Hash function macros for each address type. 3068 * 3069 * The IPV6 hash function assumes that the low order 32-bits of the 3070 * address (typically containing the low order 24 bits of the mac 3071 * address) are reasonably well-distributed. Revisit this if we run 3072 * into trouble from lots of collisions on ::1 addresses and the like 3073 * (seems unlikely). 3074 */ 3075 #define IPSEC_IPV4_HASH(a, n) ((a) % (n)) 3076 #define IPSEC_IPV6_HASH(a, n) (((a).s6_addr32[3]) % (n)) 3077 3078 /* 3079 * These two hash functions should produce coordinated values 3080 * but have slightly different roles. 3081 */ 3082 static uint32_t 3083 selkey_hash(const ipsec_selkey_t *selkey, netstack_t *ns) 3084 { 3085 uint32_t valid = selkey->ipsl_valid; 3086 ipsec_stack_t *ipss = ns->netstack_ipsec; 3087 3088 if (!(valid & IPSL_REMOTE_ADDR)) 3089 return (IPSEC_SEL_NOHASH); 3090 3091 if (valid & IPSL_IPV4) { 3092 if (selkey->ipsl_remote_pfxlen == 32) { 3093 return (IPSEC_IPV4_HASH(selkey->ipsl_remote.ipsad_v4, 3094 ipss->ipsec_spd_hashsize)); 3095 } 3096 } 3097 if (valid & IPSL_IPV6) { 3098 if (selkey->ipsl_remote_pfxlen == 128) { 3099 return (IPSEC_IPV6_HASH(selkey->ipsl_remote.ipsad_v6, 3100 ipss->ipsec_spd_hashsize)); 3101 } 3102 } 3103 return (IPSEC_SEL_NOHASH); 3104 } 3105 3106 static uint32_t 3107 selector_hash(ipsec_selector_t *sel, ipsec_policy_root_t *root) 3108 { 3109 if (sel->ips_isv4) { 3110 return (IPSEC_IPV4_HASH(sel->ips_remote_addr_v4, 3111 root->ipr_nchains)); 3112 } 3113 return (IPSEC_IPV6_HASH(sel->ips_remote_addr_v6, root->ipr_nchains)); 3114 } 3115 3116 /* 3117 * Intern actions into the action hash table. 3118 */ 3119 ipsec_action_t * 3120 ipsec_act_find(const ipsec_act_t *a, int n, netstack_t *ns) 3121 { 3122 int i; 3123 uint32_t hval; 3124 ipsec_action_t *ap; 3125 ipsec_action_t *prev = NULL; 3126 int32_t overhead, maxovhd = 0; 3127 boolean_t allow_clear = B_FALSE; 3128 boolean_t want_ah = B_FALSE; 3129 boolean_t want_esp = B_FALSE; 3130 boolean_t want_se = B_FALSE; 3131 boolean_t want_unique = B_FALSE; 3132 ipsec_stack_t *ipss = ns->netstack_ipsec; 3133 3134 /* 3135 * TODO: should canonicalize a[] (i.e., zeroize any padding) 3136 * so we can use a non-trivial policy_hash function. 3137 */ 3138 for (i = n-1; i >= 0; i--) { 3139 hval = policy_hash(IPSEC_ACTION_HASH_SIZE, &a[i], &a[n]); 3140 3141 HASH_LOCK(ipss->ipsec_action_hash, hval); 3142 3143 for (HASH_ITERATE(ap, ipa_hash, 3144 ipss->ipsec_action_hash, hval)) { 3145 if (bcmp(&ap->ipa_act, &a[i], sizeof (*a)) != 0) 3146 continue; 3147 if (ap->ipa_next != prev) 3148 continue; 3149 break; 3150 } 3151 if (ap != NULL) { 3152 HASH_UNLOCK(ipss->ipsec_action_hash, hval); 3153 prev = ap; 3154 continue; 3155 } 3156 /* 3157 * need to allocate a new one.. 3158 */ 3159 ap = kmem_cache_alloc(ipsec_action_cache, KM_NOSLEEP); 3160 if (ap == NULL) { 3161 HASH_UNLOCK(ipss->ipsec_action_hash, hval); 3162 if (prev != NULL) 3163 ipsec_action_free(prev); 3164 return (NULL); 3165 } 3166 HASH_INSERT(ap, ipa_hash, ipss->ipsec_action_hash, hval); 3167 3168 ap->ipa_next = prev; 3169 ap->ipa_act = a[i]; 3170 3171 overhead = ipsec_act_ovhd(&a[i]); 3172 if (maxovhd < overhead) 3173 maxovhd = overhead; 3174 3175 if ((a[i].ipa_type == IPSEC_ACT_BYPASS) || 3176 (a[i].ipa_type == IPSEC_ACT_CLEAR)) 3177 allow_clear = B_TRUE; 3178 if (a[i].ipa_type == IPSEC_ACT_APPLY) { 3179 const ipsec_prot_t *ipp = &a[i].ipa_apply; 3180 3181 ASSERT(ipp->ipp_use_ah || ipp->ipp_use_esp); 3182 want_ah |= ipp->ipp_use_ah; 3183 want_esp |= ipp->ipp_use_esp; 3184 want_se |= ipp->ipp_use_se; 3185 want_unique |= ipp->ipp_use_unique; 3186 } 3187 ap->ipa_allow_clear = allow_clear; 3188 ap->ipa_want_ah = want_ah; 3189 ap->ipa_want_esp = want_esp; 3190 ap->ipa_want_se = want_se; 3191 ap->ipa_want_unique = want_unique; 3192 ap->ipa_refs = 1; /* from the hash table */ 3193 ap->ipa_ovhd = maxovhd; 3194 if (prev) 3195 prev->ipa_refs++; 3196 prev = ap; 3197 HASH_UNLOCK(ipss->ipsec_action_hash, hval); 3198 } 3199 3200 ap->ipa_refs++; /* caller's reference */ 3201 3202 return (ap); 3203 } 3204 3205 /* 3206 * Called when refcount goes to 0, indicating that all references to this 3207 * node are gone. 3208 * 3209 * This does not unchain the action from the hash table. 3210 */ 3211 void 3212 ipsec_action_free(ipsec_action_t *ap) 3213 { 3214 for (;;) { 3215 ipsec_action_t *np = ap->ipa_next; 3216 ASSERT(ap->ipa_refs == 0); 3217 ASSERT(ap->ipa_hash.hash_pp == NULL); 3218 kmem_cache_free(ipsec_action_cache, ap); 3219 ap = np; 3220 /* Inlined IPACT_REFRELE -- avoid recursion */ 3221 if (ap == NULL) 3222 break; 3223 membar_exit(); 3224 if (atomic_add_32_nv(&(ap)->ipa_refs, -1) != 0) 3225 break; 3226 /* End inlined IPACT_REFRELE */ 3227 } 3228 } 3229 3230 /* 3231 * Called when the action hash table goes away. 3232 * 3233 * The actions can be queued on an mblk with ipsec_in or 3234 * ipsec_out, hence the actions might still be around. 3235 * But we decrement ipa_refs here since we no longer have 3236 * a reference to the action from the hash table. 3237 */ 3238 static void 3239 ipsec_action_free_table(ipsec_action_t *ap) 3240 { 3241 while (ap != NULL) { 3242 ipsec_action_t *np = ap->ipa_next; 3243 3244 /* FIXME: remove? */ 3245 (void) printf("ipsec_action_free_table(%p) ref %d\n", 3246 (void *)ap, ap->ipa_refs); 3247 ASSERT(ap->ipa_refs > 0); 3248 IPACT_REFRELE(ap); 3249 ap = np; 3250 } 3251 } 3252 3253 /* 3254 * Need to walk all stack instances since the reclaim function 3255 * is global for all instances 3256 */ 3257 /* ARGSUSED */ 3258 static void 3259 ipsec_action_reclaim(void *arg) 3260 { 3261 netstack_handle_t nh; 3262 netstack_t *ns; 3263 3264 netstack_next_init(&nh); 3265 while ((ns = netstack_next(&nh)) != NULL) { 3266 ipsec_action_reclaim_stack(ns); 3267 netstack_rele(ns); 3268 } 3269 netstack_next_fini(&nh); 3270 } 3271 3272 /* 3273 * Periodically sweep action hash table for actions with refcount==1, and 3274 * nuke them. We cannot do this "on demand" (i.e., from IPACT_REFRELE) 3275 * because we can't close the race between another thread finding the action 3276 * in the hash table without holding the bucket lock during IPACT_REFRELE. 3277 * Instead, we run this function sporadically to clean up after ourselves; 3278 * we also set it as the "reclaim" function for the action kmem_cache. 3279 * 3280 * Note that it may take several passes of ipsec_action_gc() to free all 3281 * "stale" actions. 3282 */ 3283 static void 3284 ipsec_action_reclaim_stack(netstack_t *ns) 3285 { 3286 int i; 3287 ipsec_stack_t *ipss = ns->netstack_ipsec; 3288 3289 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) { 3290 ipsec_action_t *ap, *np; 3291 3292 /* skip the lock if nobody home */ 3293 if (ipss->ipsec_action_hash[i].hash_head == NULL) 3294 continue; 3295 3296 HASH_LOCK(ipss->ipsec_action_hash, i); 3297 for (ap = ipss->ipsec_action_hash[i].hash_head; 3298 ap != NULL; ap = np) { 3299 ASSERT(ap->ipa_refs > 0); 3300 np = ap->ipa_hash.hash_next; 3301 if (ap->ipa_refs > 1) 3302 continue; 3303 HASH_UNCHAIN(ap, ipa_hash, 3304 ipss->ipsec_action_hash, i); 3305 IPACT_REFRELE(ap); 3306 } 3307 HASH_UNLOCK(ipss->ipsec_action_hash, i); 3308 } 3309 } 3310 3311 /* 3312 * Intern a selector set into the selector set hash table. 3313 * This is simpler than the actions case.. 3314 */ 3315 static ipsec_sel_t * 3316 ipsec_find_sel(ipsec_selkey_t *selkey, netstack_t *ns) 3317 { 3318 ipsec_sel_t *sp; 3319 uint32_t hval, bucket; 3320 ipsec_stack_t *ipss = ns->netstack_ipsec; 3321 3322 /* 3323 * Exactly one AF bit should be set in selkey. 3324 */ 3325 ASSERT(!(selkey->ipsl_valid & IPSL_IPV4) ^ 3326 !(selkey->ipsl_valid & IPSL_IPV6)); 3327 3328 hval = selkey_hash(selkey, ns); 3329 /* Set pol_hval to uninitialized until we put it in a polhead. */ 3330 selkey->ipsl_sel_hval = hval; 3331 3332 bucket = (hval == IPSEC_SEL_NOHASH) ? 0 : hval; 3333 3334 ASSERT(!HASH_LOCKED(ipss->ipsec_sel_hash, bucket)); 3335 HASH_LOCK(ipss->ipsec_sel_hash, bucket); 3336 3337 for (HASH_ITERATE(sp, ipsl_hash, ipss->ipsec_sel_hash, bucket)) { 3338 if (bcmp(&sp->ipsl_key, selkey, 3339 offsetof(ipsec_selkey_t, ipsl_pol_hval)) == 0) 3340 break; 3341 } 3342 if (sp != NULL) { 3343 sp->ipsl_refs++; 3344 3345 HASH_UNLOCK(ipss->ipsec_sel_hash, bucket); 3346 return (sp); 3347 } 3348 3349 sp = kmem_cache_alloc(ipsec_sel_cache, KM_NOSLEEP); 3350 if (sp == NULL) { 3351 HASH_UNLOCK(ipss->ipsec_sel_hash, bucket); 3352 return (NULL); 3353 } 3354 3355 HASH_INSERT(sp, ipsl_hash, ipss->ipsec_sel_hash, bucket); 3356 sp->ipsl_refs = 2; /* one for hash table, one for caller */ 3357 sp->ipsl_key = *selkey; 3358 /* Set to uninitalized and have insertion into polhead fix things. */ 3359 if (selkey->ipsl_sel_hval != IPSEC_SEL_NOHASH) 3360 sp->ipsl_key.ipsl_pol_hval = 0; 3361 else 3362 sp->ipsl_key.ipsl_pol_hval = IPSEC_SEL_NOHASH; 3363 3364 HASH_UNLOCK(ipss->ipsec_sel_hash, bucket); 3365 3366 return (sp); 3367 } 3368 3369 static void 3370 ipsec_sel_rel(ipsec_sel_t **spp, netstack_t *ns) 3371 { 3372 ipsec_sel_t *sp = *spp; 3373 int hval = sp->ipsl_key.ipsl_sel_hval; 3374 ipsec_stack_t *ipss = ns->netstack_ipsec; 3375 3376 *spp = NULL; 3377 3378 if (hval == IPSEC_SEL_NOHASH) 3379 hval = 0; 3380 3381 ASSERT(!HASH_LOCKED(ipss->ipsec_sel_hash, hval)); 3382 HASH_LOCK(ipss->ipsec_sel_hash, hval); 3383 if (--sp->ipsl_refs == 1) { 3384 HASH_UNCHAIN(sp, ipsl_hash, ipss->ipsec_sel_hash, hval); 3385 sp->ipsl_refs--; 3386 HASH_UNLOCK(ipss->ipsec_sel_hash, hval); 3387 ASSERT(sp->ipsl_refs == 0); 3388 kmem_cache_free(ipsec_sel_cache, sp); 3389 /* Caller unlocks */ 3390 return; 3391 } 3392 3393 HASH_UNLOCK(ipss->ipsec_sel_hash, hval); 3394 } 3395 3396 /* 3397 * Free a policy rule which we know is no longer being referenced. 3398 */ 3399 void 3400 ipsec_policy_free(ipsec_policy_t *ipp, netstack_t *ns) 3401 { 3402 ASSERT(ipp->ipsp_refs == 0); 3403 ASSERT(ipp->ipsp_sel != NULL); 3404 ASSERT(ipp->ipsp_act != NULL); 3405 3406 ipsec_sel_rel(&ipp->ipsp_sel, ns); 3407 IPACT_REFRELE(ipp->ipsp_act); 3408 kmem_cache_free(ipsec_pol_cache, ipp); 3409 } 3410 3411 /* 3412 * Construction of new policy rules; construct a policy, and add it to 3413 * the appropriate tables. 3414 */ 3415 ipsec_policy_t * 3416 ipsec_policy_create(ipsec_selkey_t *keys, const ipsec_act_t *a, 3417 int nacts, int prio, uint64_t *index_ptr, netstack_t *ns) 3418 { 3419 ipsec_action_t *ap; 3420 ipsec_sel_t *sp; 3421 ipsec_policy_t *ipp; 3422 ipsec_stack_t *ipss = ns->netstack_ipsec; 3423 3424 if (index_ptr == NULL) 3425 index_ptr = &ipss->ipsec_next_policy_index; 3426 3427 ipp = kmem_cache_alloc(ipsec_pol_cache, KM_NOSLEEP); 3428 ap = ipsec_act_find(a, nacts, ns); 3429 sp = ipsec_find_sel(keys, ns); 3430 3431 if ((ap == NULL) || (sp == NULL) || (ipp == NULL)) { 3432 if (ap != NULL) { 3433 IPACT_REFRELE(ap); 3434 } 3435 if (sp != NULL) 3436 ipsec_sel_rel(&sp, ns); 3437 if (ipp != NULL) 3438 kmem_cache_free(ipsec_pol_cache, ipp); 3439 return (NULL); 3440 } 3441 3442 HASH_NULL(ipp, ipsp_hash); 3443 3444 ipp->ipsp_refs = 1; /* caller's reference */ 3445 ipp->ipsp_sel = sp; 3446 ipp->ipsp_act = ap; 3447 ipp->ipsp_prio = prio; /* rule priority */ 3448 ipp->ipsp_index = *index_ptr; 3449 (*index_ptr)++; 3450 3451 return (ipp); 3452 } 3453 3454 static void 3455 ipsec_update_present_flags(ipsec_stack_t *ipss) 3456 { 3457 boolean_t hashpol; 3458 3459 hashpol = (avl_numnodes(&ipss->ipsec_system_policy.iph_rulebyid) > 0); 3460 3461 if (hashpol) { 3462 ipss->ipsec_outbound_v4_policy_present = B_TRUE; 3463 ipss->ipsec_outbound_v6_policy_present = B_TRUE; 3464 ipss->ipsec_inbound_v4_policy_present = B_TRUE; 3465 ipss->ipsec_inbound_v6_policy_present = B_TRUE; 3466 return; 3467 } 3468 3469 ipss->ipsec_outbound_v4_policy_present = (NULL != 3470 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_OUTBOUND]. 3471 ipr_nonhash[IPSEC_AF_V4]); 3472 ipss->ipsec_outbound_v6_policy_present = (NULL != 3473 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_OUTBOUND]. 3474 ipr_nonhash[IPSEC_AF_V6]); 3475 ipss->ipsec_inbound_v4_policy_present = (NULL != 3476 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_INBOUND]. 3477 ipr_nonhash[IPSEC_AF_V4]); 3478 ipss->ipsec_inbound_v6_policy_present = (NULL != 3479 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_INBOUND]. 3480 ipr_nonhash[IPSEC_AF_V6]); 3481 } 3482 3483 boolean_t 3484 ipsec_policy_delete(ipsec_policy_head_t *php, ipsec_selkey_t *keys, int dir, 3485 netstack_t *ns) 3486 { 3487 ipsec_sel_t *sp; 3488 ipsec_policy_t *ip, *nip, *head; 3489 int af; 3490 ipsec_policy_root_t *pr = &php->iph_root[dir]; 3491 3492 sp = ipsec_find_sel(keys, ns); 3493 3494 if (sp == NULL) 3495 return (B_FALSE); 3496 3497 af = (sp->ipsl_key.ipsl_valid & IPSL_IPV4) ? IPSEC_AF_V4 : IPSEC_AF_V6; 3498 3499 rw_enter(&php->iph_lock, RW_WRITER); 3500 3501 if (sp->ipsl_key.ipsl_pol_hval == IPSEC_SEL_NOHASH) { 3502 head = pr->ipr_nonhash[af]; 3503 } else { 3504 head = pr->ipr_hash[sp->ipsl_key.ipsl_pol_hval].hash_head; 3505 } 3506 3507 for (ip = head; ip != NULL; ip = nip) { 3508 nip = ip->ipsp_hash.hash_next; 3509 if (ip->ipsp_sel != sp) { 3510 continue; 3511 } 3512 3513 IPPOL_UNCHAIN(php, ip, ns); 3514 3515 php->iph_gen++; 3516 ipsec_update_present_flags(ns->netstack_ipsec); 3517 3518 rw_exit(&php->iph_lock); 3519 3520 ipsec_sel_rel(&sp, ns); 3521 3522 return (B_TRUE); 3523 } 3524 3525 rw_exit(&php->iph_lock); 3526 ipsec_sel_rel(&sp, ns); 3527 return (B_FALSE); 3528 } 3529 3530 int 3531 ipsec_policy_delete_index(ipsec_policy_head_t *php, uint64_t policy_index, 3532 netstack_t *ns) 3533 { 3534 boolean_t found = B_FALSE; 3535 ipsec_policy_t ipkey; 3536 ipsec_policy_t *ip; 3537 avl_index_t where; 3538 3539 (void) memset(&ipkey, 0, sizeof (ipkey)); 3540 ipkey.ipsp_index = policy_index; 3541 3542 rw_enter(&php->iph_lock, RW_WRITER); 3543 3544 /* 3545 * We could be cleverer here about the walk. 3546 * but well, (k+1)*log(N) will do for now (k==number of matches, 3547 * N==number of table entries 3548 */ 3549 for (;;) { 3550 ip = (ipsec_policy_t *)avl_find(&php->iph_rulebyid, 3551 (void *)&ipkey, &where); 3552 ASSERT(ip == NULL); 3553 3554 ip = avl_nearest(&php->iph_rulebyid, where, AVL_AFTER); 3555 3556 if (ip == NULL) 3557 break; 3558 3559 if (ip->ipsp_index != policy_index) { 3560 ASSERT(ip->ipsp_index > policy_index); 3561 break; 3562 } 3563 3564 IPPOL_UNCHAIN(php, ip, ns); 3565 found = B_TRUE; 3566 } 3567 3568 if (found) { 3569 php->iph_gen++; 3570 ipsec_update_present_flags(ns->netstack_ipsec); 3571 } 3572 3573 rw_exit(&php->iph_lock); 3574 3575 return (found ? 0 : ENOENT); 3576 } 3577 3578 /* 3579 * Given a constructed ipsec_policy_t policy rule, see if it can be entered 3580 * into the correct policy ruleset. As a side-effect, it sets the hash 3581 * entries on "ipp"'s ipsp_pol_hval. 3582 * 3583 * Returns B_TRUE if it can be entered, B_FALSE if it can't be (because a 3584 * duplicate policy exists with exactly the same selectors), or an icmp 3585 * rule exists with a different encryption/authentication action. 3586 */ 3587 boolean_t 3588 ipsec_check_policy(ipsec_policy_head_t *php, ipsec_policy_t *ipp, int direction) 3589 { 3590 ipsec_policy_root_t *pr = &php->iph_root[direction]; 3591 int af = -1; 3592 ipsec_policy_t *p2, *head; 3593 uint8_t check_proto; 3594 ipsec_selkey_t *selkey = &ipp->ipsp_sel->ipsl_key; 3595 uint32_t valid = selkey->ipsl_valid; 3596 3597 if (valid & IPSL_IPV6) { 3598 ASSERT(!(valid & IPSL_IPV4)); 3599 af = IPSEC_AF_V6; 3600 check_proto = IPPROTO_ICMPV6; 3601 } else { 3602 ASSERT(valid & IPSL_IPV4); 3603 af = IPSEC_AF_V4; 3604 check_proto = IPPROTO_ICMP; 3605 } 3606 3607 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3608 3609 /* 3610 * Double-check that we don't have any duplicate selectors here. 3611 * Because selectors are interned below, we need only compare pointers 3612 * for equality. 3613 */ 3614 if (selkey->ipsl_sel_hval == IPSEC_SEL_NOHASH) { 3615 head = pr->ipr_nonhash[af]; 3616 } else { 3617 selkey->ipsl_pol_hval = 3618 (selkey->ipsl_valid & IPSL_IPV4) ? 3619 IPSEC_IPV4_HASH(selkey->ipsl_remote.ipsad_v4, 3620 pr->ipr_nchains) : 3621 IPSEC_IPV6_HASH(selkey->ipsl_remote.ipsad_v6, 3622 pr->ipr_nchains); 3623 3624 head = pr->ipr_hash[selkey->ipsl_pol_hval].hash_head; 3625 } 3626 3627 for (p2 = head; p2 != NULL; p2 = p2->ipsp_hash.hash_next) { 3628 if (p2->ipsp_sel == ipp->ipsp_sel) 3629 return (B_FALSE); 3630 } 3631 3632 /* 3633 * If it's ICMP and not a drop or pass rule, run through the ICMP 3634 * rules and make sure the action is either new or the same as any 3635 * other actions. We don't have to check the full chain because 3636 * discard and bypass will override all other actions 3637 */ 3638 3639 if (valid & IPSL_PROTOCOL && 3640 selkey->ipsl_proto == check_proto && 3641 (ipp->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_APPLY)) { 3642 3643 for (p2 = head; p2 != NULL; p2 = p2->ipsp_hash.hash_next) { 3644 3645 if (p2->ipsp_sel->ipsl_key.ipsl_valid & IPSL_PROTOCOL && 3646 p2->ipsp_sel->ipsl_key.ipsl_proto == check_proto && 3647 (p2->ipsp_act->ipa_act.ipa_type == 3648 IPSEC_ACT_APPLY)) { 3649 return (ipsec_compare_action(p2, ipp)); 3650 } 3651 } 3652 } 3653 3654 return (B_TRUE); 3655 } 3656 3657 /* 3658 * compare the action chains of two policies for equality 3659 * B_TRUE -> effective equality 3660 */ 3661 3662 static boolean_t 3663 ipsec_compare_action(ipsec_policy_t *p1, ipsec_policy_t *p2) 3664 { 3665 3666 ipsec_action_t *act1, *act2; 3667 3668 /* We have a valid rule. Let's compare the actions */ 3669 if (p1->ipsp_act == p2->ipsp_act) { 3670 /* same action. We are good */ 3671 return (B_TRUE); 3672 } 3673 3674 /* we have to walk the chain */ 3675 3676 act1 = p1->ipsp_act; 3677 act2 = p2->ipsp_act; 3678 3679 while (act1 != NULL && act2 != NULL) { 3680 3681 /* otherwise, Are we close enough? */ 3682 if (act1->ipa_allow_clear != act2->ipa_allow_clear || 3683 act1->ipa_want_ah != act2->ipa_want_ah || 3684 act1->ipa_want_esp != act2->ipa_want_esp || 3685 act1->ipa_want_se != act2->ipa_want_se) { 3686 /* Nope, we aren't */ 3687 return (B_FALSE); 3688 } 3689 3690 if (act1->ipa_want_ah) { 3691 if (act1->ipa_act.ipa_apply.ipp_auth_alg != 3692 act2->ipa_act.ipa_apply.ipp_auth_alg) { 3693 return (B_FALSE); 3694 } 3695 3696 if (act1->ipa_act.ipa_apply.ipp_ah_minbits != 3697 act2->ipa_act.ipa_apply.ipp_ah_minbits || 3698 act1->ipa_act.ipa_apply.ipp_ah_maxbits != 3699 act2->ipa_act.ipa_apply.ipp_ah_maxbits) { 3700 return (B_FALSE); 3701 } 3702 } 3703 3704 if (act1->ipa_want_esp) { 3705 if (act1->ipa_act.ipa_apply.ipp_use_esp != 3706 act2->ipa_act.ipa_apply.ipp_use_esp || 3707 act1->ipa_act.ipa_apply.ipp_use_espa != 3708 act2->ipa_act.ipa_apply.ipp_use_espa) { 3709 return (B_FALSE); 3710 } 3711 3712 if (act1->ipa_act.ipa_apply.ipp_use_esp) { 3713 if (act1->ipa_act.ipa_apply.ipp_encr_alg != 3714 act2->ipa_act.ipa_apply.ipp_encr_alg) { 3715 return (B_FALSE); 3716 } 3717 3718 if (act1->ipa_act.ipa_apply.ipp_espe_minbits != 3719 act2->ipa_act.ipa_apply.ipp_espe_minbits || 3720 act1->ipa_act.ipa_apply.ipp_espe_maxbits != 3721 act2->ipa_act.ipa_apply.ipp_espe_maxbits) { 3722 return (B_FALSE); 3723 } 3724 } 3725 3726 if (act1->ipa_act.ipa_apply.ipp_use_espa) { 3727 if (act1->ipa_act.ipa_apply.ipp_esp_auth_alg != 3728 act2->ipa_act.ipa_apply.ipp_esp_auth_alg) { 3729 return (B_FALSE); 3730 } 3731 3732 if (act1->ipa_act.ipa_apply.ipp_espa_minbits != 3733 act2->ipa_act.ipa_apply.ipp_espa_minbits || 3734 act1->ipa_act.ipa_apply.ipp_espa_maxbits != 3735 act2->ipa_act.ipa_apply.ipp_espa_maxbits) { 3736 return (B_FALSE); 3737 } 3738 } 3739 3740 } 3741 3742 act1 = act1->ipa_next; 3743 act2 = act2->ipa_next; 3744 } 3745 3746 if (act1 != NULL || act2 != NULL) { 3747 return (B_FALSE); 3748 } 3749 3750 return (B_TRUE); 3751 } 3752 3753 3754 /* 3755 * Given a constructed ipsec_policy_t policy rule, enter it into 3756 * the correct policy ruleset. 3757 * 3758 * ipsec_check_policy() is assumed to have succeeded first (to check for 3759 * duplicates). 3760 */ 3761 void 3762 ipsec_enter_policy(ipsec_policy_head_t *php, ipsec_policy_t *ipp, int direction, 3763 netstack_t *ns) 3764 { 3765 ipsec_policy_root_t *pr = &php->iph_root[direction]; 3766 ipsec_selkey_t *selkey = &ipp->ipsp_sel->ipsl_key; 3767 uint32_t valid = selkey->ipsl_valid; 3768 uint32_t hval = selkey->ipsl_pol_hval; 3769 int af = -1; 3770 3771 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3772 3773 if (valid & IPSL_IPV6) { 3774 ASSERT(!(valid & IPSL_IPV4)); 3775 af = IPSEC_AF_V6; 3776 } else { 3777 ASSERT(valid & IPSL_IPV4); 3778 af = IPSEC_AF_V4; 3779 } 3780 3781 php->iph_gen++; 3782 3783 if (hval == IPSEC_SEL_NOHASH) { 3784 HASHLIST_INSERT(ipp, ipsp_hash, pr->ipr_nonhash[af]); 3785 } else { 3786 HASH_LOCK(pr->ipr_hash, hval); 3787 HASH_INSERT(ipp, ipsp_hash, pr->ipr_hash, hval); 3788 HASH_UNLOCK(pr->ipr_hash, hval); 3789 } 3790 3791 ipsec_insert_always(&php->iph_rulebyid, ipp); 3792 3793 ipsec_update_present_flags(ns->netstack_ipsec); 3794 } 3795 3796 static void 3797 ipsec_ipr_flush(ipsec_policy_head_t *php, ipsec_policy_root_t *ipr, 3798 netstack_t *ns) 3799 { 3800 ipsec_policy_t *ip, *nip; 3801 int af, chain, nchain; 3802 3803 for (af = 0; af < IPSEC_NAF; af++) { 3804 for (ip = ipr->ipr_nonhash[af]; ip != NULL; ip = nip) { 3805 nip = ip->ipsp_hash.hash_next; 3806 IPPOL_UNCHAIN(php, ip, ns); 3807 } 3808 ipr->ipr_nonhash[af] = NULL; 3809 } 3810 nchain = ipr->ipr_nchains; 3811 3812 for (chain = 0; chain < nchain; chain++) { 3813 for (ip = ipr->ipr_hash[chain].hash_head; ip != NULL; 3814 ip = nip) { 3815 nip = ip->ipsp_hash.hash_next; 3816 IPPOL_UNCHAIN(php, ip, ns); 3817 } 3818 ipr->ipr_hash[chain].hash_head = NULL; 3819 } 3820 } 3821 3822 void 3823 ipsec_polhead_flush(ipsec_policy_head_t *php, netstack_t *ns) 3824 { 3825 int dir; 3826 3827 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3828 3829 for (dir = 0; dir < IPSEC_NTYPES; dir++) 3830 ipsec_ipr_flush(php, &php->iph_root[dir], ns); 3831 3832 ipsec_update_present_flags(ns->netstack_ipsec); 3833 } 3834 3835 void 3836 ipsec_polhead_free(ipsec_policy_head_t *php, netstack_t *ns) 3837 { 3838 int dir; 3839 3840 ASSERT(php->iph_refs == 0); 3841 3842 rw_enter(&php->iph_lock, RW_WRITER); 3843 ipsec_polhead_flush(php, ns); 3844 rw_exit(&php->iph_lock); 3845 rw_destroy(&php->iph_lock); 3846 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 3847 ipsec_policy_root_t *ipr = &php->iph_root[dir]; 3848 int chain; 3849 3850 for (chain = 0; chain < ipr->ipr_nchains; chain++) 3851 mutex_destroy(&(ipr->ipr_hash[chain].hash_lock)); 3852 3853 } 3854 ipsec_polhead_free_table(php); 3855 kmem_free(php, sizeof (*php)); 3856 } 3857 3858 static void 3859 ipsec_ipr_init(ipsec_policy_root_t *ipr) 3860 { 3861 int af; 3862 3863 ipr->ipr_nchains = 0; 3864 ipr->ipr_hash = NULL; 3865 3866 for (af = 0; af < IPSEC_NAF; af++) { 3867 ipr->ipr_nonhash[af] = NULL; 3868 } 3869 } 3870 3871 ipsec_policy_head_t * 3872 ipsec_polhead_create(void) 3873 { 3874 ipsec_policy_head_t *php; 3875 3876 php = kmem_alloc(sizeof (*php), KM_NOSLEEP); 3877 if (php == NULL) 3878 return (php); 3879 3880 rw_init(&php->iph_lock, NULL, RW_DEFAULT, NULL); 3881 php->iph_refs = 1; 3882 php->iph_gen = 0; 3883 3884 ipsec_ipr_init(&php->iph_root[IPSEC_TYPE_INBOUND]); 3885 ipsec_ipr_init(&php->iph_root[IPSEC_TYPE_OUTBOUND]); 3886 3887 avl_create(&php->iph_rulebyid, ipsec_policy_cmpbyid, 3888 sizeof (ipsec_policy_t), offsetof(ipsec_policy_t, ipsp_byid)); 3889 3890 return (php); 3891 } 3892 3893 /* 3894 * Clone the policy head into a new polhead; release one reference to the 3895 * old one and return the only reference to the new one. 3896 * If the old one had a refcount of 1, just return it. 3897 */ 3898 ipsec_policy_head_t * 3899 ipsec_polhead_split(ipsec_policy_head_t *php, netstack_t *ns) 3900 { 3901 ipsec_policy_head_t *nphp; 3902 3903 if (php == NULL) 3904 return (ipsec_polhead_create()); 3905 else if (php->iph_refs == 1) 3906 return (php); 3907 3908 nphp = ipsec_polhead_create(); 3909 if (nphp == NULL) 3910 return (NULL); 3911 3912 if (ipsec_copy_polhead(php, nphp, ns) != 0) { 3913 ipsec_polhead_free(nphp, ns); 3914 return (NULL); 3915 } 3916 IPPH_REFRELE(php, ns); 3917 return (nphp); 3918 } 3919 3920 /* 3921 * When sending a response to a ICMP request or generating a RST 3922 * in the TCP case, the outbound packets need to go at the same level 3923 * of protection as the incoming ones i.e we associate our outbound 3924 * policy with how the packet came in. We call this after we have 3925 * accepted the incoming packet which may or may not have been in 3926 * clear and hence we are sending the reply back with the policy 3927 * matching the incoming datagram's policy. 3928 * 3929 * NOTE : This technology serves two purposes : 3930 * 3931 * 1) If we have multiple outbound policies, we send out a reply 3932 * matching with how it came in rather than matching the outbound 3933 * policy. 3934 * 3935 * 2) For assymetric policies, we want to make sure that incoming 3936 * and outgoing has the same level of protection. Assymetric 3937 * policies exist only with global policy where we may not have 3938 * both outbound and inbound at the same time. 3939 * 3940 * NOTE2: This function is called by cleartext cases, so it needs to be 3941 * in IP proper. 3942 */ 3943 boolean_t 3944 ipsec_in_to_out(mblk_t *ipsec_mp, ipha_t *ipha, ip6_t *ip6h) 3945 { 3946 ipsec_in_t *ii; 3947 ipsec_out_t *io; 3948 boolean_t v4; 3949 mblk_t *mp; 3950 boolean_t secure, attach_if; 3951 uint_t ifindex; 3952 ipsec_selector_t sel; 3953 ipsec_action_t *reflect_action = NULL; 3954 zoneid_t zoneid; 3955 netstack_t *ns; 3956 3957 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 3958 3959 bzero((void*)&sel, sizeof (sel)); 3960 3961 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 3962 3963 mp = ipsec_mp->b_cont; 3964 ASSERT(mp != NULL); 3965 3966 if (ii->ipsec_in_action != NULL) { 3967 /* transfer reference.. */ 3968 reflect_action = ii->ipsec_in_action; 3969 ii->ipsec_in_action = NULL; 3970 } else if (!ii->ipsec_in_loopback) 3971 reflect_action = ipsec_in_to_out_action(ii); 3972 secure = ii->ipsec_in_secure; 3973 attach_if = ii->ipsec_in_attach_if; 3974 ifindex = ii->ipsec_in_ill_index; 3975 zoneid = ii->ipsec_in_zoneid; 3976 ASSERT(zoneid != ALL_ZONES); 3977 ns = ii->ipsec_in_ns; 3978 v4 = ii->ipsec_in_v4; 3979 3980 ipsec_in_release_refs(ii); /* No netstack_rele/hold needed */ 3981 3982 /* 3983 * The caller is going to send the datagram out which might 3984 * go on the wire or delivered locally through ip_wput_local. 3985 * 3986 * 1) If it goes out on the wire, new associations will be 3987 * obtained. 3988 * 2) If it is delivered locally, ip_wput_local will convert 3989 * this IPSEC_OUT to a IPSEC_IN looking at the requests. 3990 */ 3991 3992 io = (ipsec_out_t *)ipsec_mp->b_rptr; 3993 bzero(io, sizeof (ipsec_out_t)); 3994 io->ipsec_out_type = IPSEC_OUT; 3995 io->ipsec_out_len = sizeof (ipsec_out_t); 3996 io->ipsec_out_frtn.free_func = ipsec_out_free; 3997 io->ipsec_out_frtn.free_arg = (char *)io; 3998 io->ipsec_out_act = reflect_action; 3999 4000 if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0, 4001 ns->netstack_ipsec)) 4002 return (B_FALSE); 4003 4004 io->ipsec_out_src_port = sel.ips_local_port; 4005 io->ipsec_out_dst_port = sel.ips_remote_port; 4006 io->ipsec_out_proto = sel.ips_protocol; 4007 io->ipsec_out_icmp_type = sel.ips_icmp_type; 4008 io->ipsec_out_icmp_code = sel.ips_icmp_code; 4009 4010 /* 4011 * Don't use global policy for this, as we want 4012 * to use the same protection that was applied to the inbound packet. 4013 */ 4014 io->ipsec_out_use_global_policy = B_FALSE; 4015 io->ipsec_out_proc_begin = B_FALSE; 4016 io->ipsec_out_secure = secure; 4017 io->ipsec_out_v4 = v4; 4018 io->ipsec_out_attach_if = attach_if; 4019 io->ipsec_out_ill_index = ifindex; 4020 io->ipsec_out_zoneid = zoneid; 4021 io->ipsec_out_ns = ns; /* No netstack_hold */ 4022 4023 return (B_TRUE); 4024 } 4025 4026 mblk_t * 4027 ipsec_in_tag(mblk_t *mp, mblk_t *cont, netstack_t *ns) 4028 { 4029 ipsec_in_t *ii = (ipsec_in_t *)mp->b_rptr; 4030 ipsec_in_t *nii; 4031 mblk_t *nmp; 4032 frtn_t nfrtn; 4033 ipsec_stack_t *ipss = ns->netstack_ipsec; 4034 4035 ASSERT(ii->ipsec_in_type == IPSEC_IN); 4036 ASSERT(ii->ipsec_in_len == sizeof (ipsec_in_t)); 4037 4038 nmp = ipsec_in_alloc(ii->ipsec_in_v4, ns); 4039 if (nmp == NULL) { 4040 ip_drop_packet_chain(cont, B_FALSE, NULL, NULL, 4041 DROPPER(ipss, ipds_spd_nomem), 4042 &ipss->ipsec_spd_dropper); 4043 return (NULL); 4044 } 4045 4046 ASSERT(nmp->b_datap->db_type == M_CTL); 4047 ASSERT(nmp->b_wptr == (nmp->b_rptr + sizeof (ipsec_info_t))); 4048 4049 /* 4050 * Bump refcounts. 4051 */ 4052 if (ii->ipsec_in_ah_sa != NULL) 4053 IPSA_REFHOLD(ii->ipsec_in_ah_sa); 4054 if (ii->ipsec_in_esp_sa != NULL) 4055 IPSA_REFHOLD(ii->ipsec_in_esp_sa); 4056 if (ii->ipsec_in_policy != NULL) 4057 IPPH_REFHOLD(ii->ipsec_in_policy); 4058 4059 /* 4060 * Copy everything, but preserve the free routine provided by 4061 * ipsec_in_alloc(). 4062 */ 4063 nii = (ipsec_in_t *)nmp->b_rptr; 4064 nfrtn = nii->ipsec_in_frtn; 4065 bcopy(ii, nii, sizeof (*ii)); 4066 nii->ipsec_in_frtn = nfrtn; 4067 4068 nmp->b_cont = cont; 4069 4070 return (nmp); 4071 } 4072 4073 mblk_t * 4074 ipsec_out_tag(mblk_t *mp, mblk_t *cont, netstack_t *ns) 4075 { 4076 ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; 4077 ipsec_out_t *nio; 4078 mblk_t *nmp; 4079 frtn_t nfrtn; 4080 ipsec_stack_t *ipss = ns->netstack_ipsec; 4081 4082 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4083 ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t)); 4084 4085 nmp = ipsec_alloc_ipsec_out(ns); 4086 if (nmp == NULL) { 4087 ip_drop_packet_chain(cont, B_FALSE, NULL, NULL, 4088 DROPPER(ipss, ipds_spd_nomem), 4089 &ipss->ipsec_spd_dropper); 4090 return (NULL); 4091 } 4092 ASSERT(nmp->b_datap->db_type == M_CTL); 4093 ASSERT(nmp->b_wptr == (nmp->b_rptr + sizeof (ipsec_info_t))); 4094 4095 /* 4096 * Bump refcounts. 4097 */ 4098 if (io->ipsec_out_ah_sa != NULL) 4099 IPSA_REFHOLD(io->ipsec_out_ah_sa); 4100 if (io->ipsec_out_esp_sa != NULL) 4101 IPSA_REFHOLD(io->ipsec_out_esp_sa); 4102 if (io->ipsec_out_polhead != NULL) 4103 IPPH_REFHOLD(io->ipsec_out_polhead); 4104 if (io->ipsec_out_policy != NULL) 4105 IPPOL_REFHOLD(io->ipsec_out_policy); 4106 if (io->ipsec_out_act != NULL) 4107 IPACT_REFHOLD(io->ipsec_out_act); 4108 if (io->ipsec_out_latch != NULL) 4109 IPLATCH_REFHOLD(io->ipsec_out_latch); 4110 if (io->ipsec_out_cred != NULL) 4111 crhold(io->ipsec_out_cred); 4112 4113 /* 4114 * Copy everything, but preserve the free routine provided by 4115 * ipsec_alloc_ipsec_out(). 4116 */ 4117 nio = (ipsec_out_t *)nmp->b_rptr; 4118 nfrtn = nio->ipsec_out_frtn; 4119 bcopy(io, nio, sizeof (*io)); 4120 nio->ipsec_out_frtn = nfrtn; 4121 4122 nmp->b_cont = cont; 4123 4124 return (nmp); 4125 } 4126 4127 static void 4128 ipsec_out_release_refs(ipsec_out_t *io) 4129 { 4130 netstack_t *ns = io->ipsec_out_ns; 4131 4132 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4133 ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t)); 4134 ASSERT(io->ipsec_out_ns != NULL); 4135 4136 /* Note: IPSA_REFRELE is multi-line macro */ 4137 if (io->ipsec_out_ah_sa != NULL) 4138 IPSA_REFRELE(io->ipsec_out_ah_sa); 4139 if (io->ipsec_out_esp_sa != NULL) 4140 IPSA_REFRELE(io->ipsec_out_esp_sa); 4141 if (io->ipsec_out_polhead != NULL) 4142 IPPH_REFRELE(io->ipsec_out_polhead, ns); 4143 if (io->ipsec_out_policy != NULL) 4144 IPPOL_REFRELE(io->ipsec_out_policy, ns); 4145 if (io->ipsec_out_act != NULL) 4146 IPACT_REFRELE(io->ipsec_out_act); 4147 if (io->ipsec_out_cred != NULL) { 4148 crfree(io->ipsec_out_cred); 4149 io->ipsec_out_cred = NULL; 4150 } 4151 if (io->ipsec_out_latch) { 4152 IPLATCH_REFRELE(io->ipsec_out_latch, ns); 4153 io->ipsec_out_latch = NULL; 4154 } 4155 } 4156 4157 static void 4158 ipsec_out_free(void *arg) 4159 { 4160 ipsec_out_t *io = (ipsec_out_t *)arg; 4161 ipsec_out_release_refs(io); 4162 kmem_cache_free(ipsec_info_cache, arg); 4163 } 4164 4165 static void 4166 ipsec_in_release_refs(ipsec_in_t *ii) 4167 { 4168 netstack_t *ns = ii->ipsec_in_ns; 4169 4170 ASSERT(ii->ipsec_in_ns != NULL); 4171 4172 /* Note: IPSA_REFRELE is multi-line macro */ 4173 if (ii->ipsec_in_ah_sa != NULL) 4174 IPSA_REFRELE(ii->ipsec_in_ah_sa); 4175 if (ii->ipsec_in_esp_sa != NULL) 4176 IPSA_REFRELE(ii->ipsec_in_esp_sa); 4177 if (ii->ipsec_in_policy != NULL) 4178 IPPH_REFRELE(ii->ipsec_in_policy, ns); 4179 if (ii->ipsec_in_da != NULL) { 4180 freeb(ii->ipsec_in_da); 4181 ii->ipsec_in_da = NULL; 4182 } 4183 } 4184 4185 static void 4186 ipsec_in_free(void *arg) 4187 { 4188 ipsec_in_t *ii = (ipsec_in_t *)arg; 4189 ipsec_in_release_refs(ii); 4190 kmem_cache_free(ipsec_info_cache, arg); 4191 } 4192 4193 /* 4194 * This is called only for outbound datagrams if the datagram needs to 4195 * go out secure. A NULL mp can be passed to get an ipsec_out. This 4196 * facility is used by ip_unbind. 4197 * 4198 * NOTE : o As the data part could be modified by ipsec_out_process etc. 4199 * we can't make it fast by calling a dup. 4200 */ 4201 mblk_t * 4202 ipsec_alloc_ipsec_out(netstack_t *ns) 4203 { 4204 mblk_t *ipsec_mp; 4205 ipsec_out_t *io = kmem_cache_alloc(ipsec_info_cache, KM_NOSLEEP); 4206 4207 if (io == NULL) 4208 return (NULL); 4209 4210 bzero(io, sizeof (ipsec_out_t)); 4211 4212 io->ipsec_out_type = IPSEC_OUT; 4213 io->ipsec_out_len = sizeof (ipsec_out_t); 4214 io->ipsec_out_frtn.free_func = ipsec_out_free; 4215 io->ipsec_out_frtn.free_arg = (char *)io; 4216 4217 /* 4218 * Set the zoneid to ALL_ZONES which is used as an invalid value. Code 4219 * using ipsec_out_zoneid should assert that the zoneid has been set to 4220 * a sane value. 4221 */ 4222 io->ipsec_out_zoneid = ALL_ZONES; 4223 io->ipsec_out_ns = ns; /* No netstack_hold */ 4224 4225 ipsec_mp = desballoc((uint8_t *)io, sizeof (ipsec_info_t), BPRI_HI, 4226 &io->ipsec_out_frtn); 4227 if (ipsec_mp == NULL) { 4228 ipsec_out_free(io); 4229 4230 return (NULL); 4231 } 4232 ipsec_mp->b_datap->db_type = M_CTL; 4233 ipsec_mp->b_wptr = ipsec_mp->b_rptr + sizeof (ipsec_info_t); 4234 4235 return (ipsec_mp); 4236 } 4237 4238 /* 4239 * Attach an IPSEC_OUT; use pol for policy if it is non-null. 4240 * Otherwise initialize using conn. 4241 * 4242 * If pol is non-null, we consume a reference to it. 4243 */ 4244 mblk_t * 4245 ipsec_attach_ipsec_out(mblk_t **mp, conn_t *connp, ipsec_policy_t *pol, 4246 uint8_t proto, netstack_t *ns) 4247 { 4248 mblk_t *ipsec_mp; 4249 ipsec_stack_t *ipss = ns->netstack_ipsec; 4250 4251 ASSERT((pol != NULL) || (connp != NULL)); 4252 4253 ipsec_mp = ipsec_alloc_ipsec_out(ns); 4254 if (ipsec_mp == NULL) { 4255 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, SL_ERROR|SL_NOTE, 4256 "ipsec_attach_ipsec_out: Allocation failure\n"); 4257 ip_drop_packet(*mp, B_FALSE, NULL, NULL, 4258 DROPPER(ipss, ipds_spd_nomem), 4259 &ipss->ipsec_spd_dropper); 4260 *mp = NULL; 4261 return (NULL); 4262 } 4263 ipsec_mp->b_cont = *mp; 4264 /* 4265 * If *mp is NULL, ipsec_init_ipsec_out() won't/should not be using it. 4266 */ 4267 return (ipsec_init_ipsec_out(ipsec_mp, mp, connp, pol, proto, ns)); 4268 } 4269 4270 /* 4271 * Initialize the IPSEC_OUT (ipsec_mp) using pol if it is non-null. 4272 * Otherwise initialize using conn. 4273 * 4274 * If pol is non-null, we consume a reference to it. 4275 */ 4276 mblk_t * 4277 ipsec_init_ipsec_out(mblk_t *ipsec_mp, mblk_t **mp, conn_t *connp, 4278 ipsec_policy_t *pol, uint8_t proto, netstack_t *ns) 4279 { 4280 ipsec_out_t *io; 4281 ipsec_policy_t *p; 4282 ipha_t *ipha; 4283 ip6_t *ip6h; 4284 ipsec_stack_t *ipss = ns->netstack_ipsec; 4285 4286 ASSERT(ipsec_mp->b_cont == *mp); 4287 4288 ASSERT((pol != NULL) || (connp != NULL)); 4289 4290 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 4291 ASSERT(ipsec_mp->b_wptr == (ipsec_mp->b_rptr + sizeof (ipsec_info_t))); 4292 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4293 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4294 ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t)); 4295 io->ipsec_out_latch = NULL; 4296 /* 4297 * Set the zoneid when we have the connp. 4298 * Otherwise, we're called from ip_wput_attach_policy() who will take 4299 * care of setting the zoneid. 4300 */ 4301 if (connp != NULL) 4302 io->ipsec_out_zoneid = connp->conn_zoneid; 4303 4304 io->ipsec_out_ns = ns; /* No netstack_hold */ 4305 4306 if (*mp != NULL) { 4307 ipha = (ipha_t *)(*mp)->b_rptr; 4308 if (IPH_HDR_VERSION(ipha) == IP_VERSION) { 4309 io->ipsec_out_v4 = B_TRUE; 4310 ip6h = NULL; 4311 } else { 4312 io->ipsec_out_v4 = B_FALSE; 4313 ip6h = (ip6_t *)ipha; 4314 ipha = NULL; 4315 } 4316 } else { 4317 ASSERT(connp != NULL && connp->conn_policy_cached); 4318 ip6h = NULL; 4319 ipha = NULL; 4320 io->ipsec_out_v4 = !connp->conn_pkt_isv6; 4321 } 4322 4323 p = NULL; 4324 4325 /* 4326 * Take latched policies over global policy. Check here again for 4327 * this, in case we had conn_latch set while the packet was flying 4328 * around in IP. 4329 */ 4330 if (connp != NULL && connp->conn_latch != NULL) { 4331 ASSERT(ns == connp->conn_netstack); 4332 p = connp->conn_latch->ipl_out_policy; 4333 io->ipsec_out_latch = connp->conn_latch; 4334 IPLATCH_REFHOLD(connp->conn_latch); 4335 if (p != NULL) { 4336 IPPOL_REFHOLD(p); 4337 } 4338 io->ipsec_out_src_port = connp->conn_lport; 4339 io->ipsec_out_dst_port = connp->conn_fport; 4340 io->ipsec_out_icmp_type = io->ipsec_out_icmp_code = 0; 4341 if (pol != NULL) 4342 IPPOL_REFRELE(pol, ns); 4343 } else if (pol != NULL) { 4344 ipsec_selector_t sel; 4345 4346 bzero((void*)&sel, sizeof (sel)); 4347 4348 p = pol; 4349 /* 4350 * conn does not have the port information. Get 4351 * it from the packet. 4352 */ 4353 4354 if (!ipsec_init_outbound_ports(&sel, *mp, ipha, ip6h, 0, 4355 ns->netstack_ipsec)) { 4356 /* Callee did ip_drop_packet() on *mp. */ 4357 *mp = NULL; 4358 freeb(ipsec_mp); 4359 return (NULL); 4360 } 4361 io->ipsec_out_src_port = sel.ips_local_port; 4362 io->ipsec_out_dst_port = sel.ips_remote_port; 4363 io->ipsec_out_icmp_type = sel.ips_icmp_type; 4364 io->ipsec_out_icmp_code = sel.ips_icmp_code; 4365 } 4366 4367 io->ipsec_out_proto = proto; 4368 io->ipsec_out_use_global_policy = B_TRUE; 4369 io->ipsec_out_secure = (p != NULL); 4370 io->ipsec_out_policy = p; 4371 4372 if (p == NULL) { 4373 if (connp->conn_policy != NULL) { 4374 io->ipsec_out_secure = B_TRUE; 4375 ASSERT(io->ipsec_out_latch == NULL); 4376 ASSERT(io->ipsec_out_use_global_policy == B_TRUE); 4377 io->ipsec_out_need_policy = B_TRUE; 4378 ASSERT(io->ipsec_out_polhead == NULL); 4379 IPPH_REFHOLD(connp->conn_policy); 4380 io->ipsec_out_polhead = connp->conn_policy; 4381 } 4382 } else { 4383 /* Handle explicit drop action. */ 4384 if (p->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_DISCARD || 4385 p->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_REJECT) { 4386 ip_drop_packet(ipsec_mp, B_FALSE, NULL, NULL, 4387 DROPPER(ipss, ipds_spd_explicit), 4388 &ipss->ipsec_spd_dropper); 4389 *mp = NULL; 4390 ipsec_mp = NULL; 4391 } 4392 } 4393 4394 return (ipsec_mp); 4395 } 4396 4397 /* 4398 * Allocate an IPSEC_IN mblk. This will be prepended to an inbound datagram 4399 * and keep track of what-if-any IPsec processing will be applied to the 4400 * datagram. 4401 */ 4402 mblk_t * 4403 ipsec_in_alloc(boolean_t isv4, netstack_t *ns) 4404 { 4405 mblk_t *ipsec_in; 4406 ipsec_in_t *ii = kmem_cache_alloc(ipsec_info_cache, KM_NOSLEEP); 4407 4408 if (ii == NULL) 4409 return (NULL); 4410 4411 bzero(ii, sizeof (ipsec_info_t)); 4412 ii->ipsec_in_type = IPSEC_IN; 4413 ii->ipsec_in_len = sizeof (ipsec_in_t); 4414 4415 ii->ipsec_in_v4 = isv4; 4416 ii->ipsec_in_secure = B_TRUE; 4417 ii->ipsec_in_ns = ns; /* No netstack_hold */ 4418 4419 ii->ipsec_in_frtn.free_func = ipsec_in_free; 4420 ii->ipsec_in_frtn.free_arg = (char *)ii; 4421 4422 ipsec_in = desballoc((uint8_t *)ii, sizeof (ipsec_info_t), BPRI_HI, 4423 &ii->ipsec_in_frtn); 4424 if (ipsec_in == NULL) { 4425 ip1dbg(("ipsec_in_alloc: IPSEC_IN allocation failure.\n")); 4426 ipsec_in_free(ii); 4427 return (NULL); 4428 } 4429 4430 ipsec_in->b_datap->db_type = M_CTL; 4431 ipsec_in->b_wptr += sizeof (ipsec_info_t); 4432 4433 return (ipsec_in); 4434 } 4435 4436 /* 4437 * This is called from ip_wput_local when a packet which needs 4438 * security is looped back, to convert the IPSEC_OUT to a IPSEC_IN 4439 * before fanout, where the policy check happens. In most of the 4440 * cases, IPSEC processing has *never* been done. There is one case 4441 * (ip_wput_ire_fragmentit -> ip_wput_frag -> icmp_frag_needed) where 4442 * the packet is destined for localhost, IPSEC processing has already 4443 * been done. 4444 * 4445 * Future: This could happen after SA selection has occurred for 4446 * outbound.. which will tell us who the src and dst identities are.. 4447 * Then it's just a matter of splicing the ah/esp SA pointers from the 4448 * ipsec_out_t to the ipsec_in_t. 4449 */ 4450 void 4451 ipsec_out_to_in(mblk_t *ipsec_mp) 4452 { 4453 ipsec_in_t *ii; 4454 ipsec_out_t *io; 4455 ipsec_policy_t *pol; 4456 ipsec_action_t *act; 4457 boolean_t v4, icmp_loopback; 4458 zoneid_t zoneid; 4459 netstack_t *ns; 4460 4461 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 4462 4463 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4464 4465 v4 = io->ipsec_out_v4; 4466 zoneid = io->ipsec_out_zoneid; 4467 icmp_loopback = io->ipsec_out_icmp_loopback; 4468 ns = io->ipsec_out_ns; 4469 4470 act = io->ipsec_out_act; 4471 if (act == NULL) { 4472 pol = io->ipsec_out_policy; 4473 if (pol != NULL) { 4474 act = pol->ipsp_act; 4475 IPACT_REFHOLD(act); 4476 } 4477 } 4478 io->ipsec_out_act = NULL; 4479 4480 ipsec_out_release_refs(io); /* No netstack_rele/hold needed */ 4481 4482 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 4483 bzero(ii, sizeof (ipsec_in_t)); 4484 ii->ipsec_in_type = IPSEC_IN; 4485 ii->ipsec_in_len = sizeof (ipsec_in_t); 4486 ii->ipsec_in_loopback = B_TRUE; 4487 ii->ipsec_in_ns = ns; /* No netstack_hold */ 4488 4489 ii->ipsec_in_frtn.free_func = ipsec_in_free; 4490 ii->ipsec_in_frtn.free_arg = (char *)ii; 4491 ii->ipsec_in_action = act; 4492 ii->ipsec_in_zoneid = zoneid; 4493 4494 /* 4495 * In most of the cases, we can't look at the ipsec_out_XXX_sa 4496 * because this never went through IPSEC processing. So, look at 4497 * the requests and infer whether it would have gone through 4498 * IPSEC processing or not. Initialize the "done" fields with 4499 * the requests. The possible values for "done" fields are : 4500 * 4501 * 1) zero, indicates that a particular preference was never 4502 * requested. 4503 * 2) non-zero, indicates that it could be IPSEC_PREF_REQUIRED/ 4504 * IPSEC_PREF_NEVER. If IPSEC_REQ_DONE is set, it means that 4505 * IPSEC processing has been completed. 4506 */ 4507 ii->ipsec_in_secure = B_TRUE; 4508 ii->ipsec_in_v4 = v4; 4509 ii->ipsec_in_icmp_loopback = icmp_loopback; 4510 ii->ipsec_in_attach_if = B_FALSE; 4511 } 4512 4513 /* 4514 * Consults global policy to see whether this datagram should 4515 * go out secure. If so it attaches a ipsec_mp in front and 4516 * returns. 4517 */ 4518 mblk_t * 4519 ip_wput_attach_policy(mblk_t *ipsec_mp, ipha_t *ipha, ip6_t *ip6h, ire_t *ire, 4520 conn_t *connp, boolean_t unspec_src, zoneid_t zoneid) 4521 { 4522 mblk_t *mp; 4523 ipsec_out_t *io = NULL; 4524 ipsec_selector_t sel; 4525 uint_t ill_index; 4526 boolean_t conn_dontroutex; 4527 boolean_t conn_multicast_loopx; 4528 boolean_t policy_present; 4529 ip_stack_t *ipst = ire->ire_ipst; 4530 netstack_t *ns = ipst->ips_netstack; 4531 ipsec_stack_t *ipss = ns->netstack_ipsec; 4532 4533 ASSERT((ipha != NULL && ip6h == NULL) || 4534 (ip6h != NULL && ipha == NULL)); 4535 4536 bzero((void*)&sel, sizeof (sel)); 4537 4538 if (ipha != NULL) 4539 policy_present = ipss->ipsec_outbound_v4_policy_present; 4540 else 4541 policy_present = ipss->ipsec_outbound_v6_policy_present; 4542 /* 4543 * Fast Path to see if there is any policy. 4544 */ 4545 if (!policy_present) { 4546 if (ipsec_mp->b_datap->db_type == M_CTL) { 4547 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4548 if (!io->ipsec_out_secure) { 4549 /* 4550 * If there is no global policy and ip_wput 4551 * or ip_wput_multicast has attached this mp 4552 * for multicast case, free the ipsec_mp and 4553 * return the original mp. 4554 */ 4555 mp = ipsec_mp->b_cont; 4556 freeb(ipsec_mp); 4557 ipsec_mp = mp; 4558 io = NULL; 4559 } 4560 ASSERT(io == NULL || !io->ipsec_out_tunnel); 4561 } 4562 if (((io == NULL) || (io->ipsec_out_polhead == NULL)) && 4563 ((connp == NULL) || (connp->conn_policy == NULL))) 4564 return (ipsec_mp); 4565 } 4566 4567 ill_index = 0; 4568 conn_multicast_loopx = conn_dontroutex = B_FALSE; 4569 mp = ipsec_mp; 4570 if (ipsec_mp->b_datap->db_type == M_CTL) { 4571 mp = ipsec_mp->b_cont; 4572 /* 4573 * This is a connection where we have some per-socket 4574 * policy or ip_wput has attached an ipsec_mp for 4575 * the multicast datagram. 4576 */ 4577 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4578 if (!io->ipsec_out_secure) { 4579 /* 4580 * This ipsec_mp was allocated in ip_wput or 4581 * ip_wput_multicast so that we will know the 4582 * value of ill_index, conn_dontroute, 4583 * conn_multicast_loop in the multicast case if 4584 * we inherit global policy here. 4585 */ 4586 ill_index = io->ipsec_out_ill_index; 4587 conn_dontroutex = io->ipsec_out_dontroute; 4588 conn_multicast_loopx = io->ipsec_out_multicast_loop; 4589 freeb(ipsec_mp); 4590 ipsec_mp = mp; 4591 io = NULL; 4592 } 4593 ASSERT(io == NULL || !io->ipsec_out_tunnel); 4594 } 4595 4596 if (ipha != NULL) { 4597 sel.ips_local_addr_v4 = (ipha->ipha_src != 0 ? 4598 ipha->ipha_src : ire->ire_src_addr); 4599 sel.ips_remote_addr_v4 = ip_get_dst(ipha); 4600 sel.ips_protocol = (uint8_t)ipha->ipha_protocol; 4601 sel.ips_isv4 = B_TRUE; 4602 } else { 4603 ushort_t hdr_len; 4604 uint8_t *nexthdrp; 4605 boolean_t is_fragment; 4606 4607 sel.ips_isv4 = B_FALSE; 4608 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4609 if (!unspec_src) 4610 sel.ips_local_addr_v6 = ire->ire_src_addr_v6; 4611 } else { 4612 sel.ips_local_addr_v6 = ip6h->ip6_src; 4613 } 4614 4615 sel.ips_remote_addr_v6 = ip_get_dst_v6(ip6h, &is_fragment); 4616 if (is_fragment) { 4617 /* 4618 * It's a packet fragment for a packet that 4619 * we have already processed (since IPsec processing 4620 * is done before fragmentation), so we don't 4621 * have to do policy checks again. Fragments can 4622 * come back to us for processing if they have 4623 * been queued up due to flow control. 4624 */ 4625 if (ipsec_mp->b_datap->db_type == M_CTL) { 4626 mp = ipsec_mp->b_cont; 4627 freeb(ipsec_mp); 4628 ipsec_mp = mp; 4629 } 4630 return (ipsec_mp); 4631 } 4632 4633 /* IPv6 common-case. */ 4634 sel.ips_protocol = ip6h->ip6_nxt; 4635 switch (ip6h->ip6_nxt) { 4636 case IPPROTO_TCP: 4637 case IPPROTO_UDP: 4638 case IPPROTO_SCTP: 4639 case IPPROTO_ICMPV6: 4640 break; 4641 default: 4642 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 4643 &hdr_len, &nexthdrp)) { 4644 BUMP_MIB(&ipst->ips_ip6_mib, 4645 ipIfStatsOutDiscards); 4646 freemsg(ipsec_mp); /* Not IPsec-related drop. */ 4647 return (NULL); 4648 } 4649 sel.ips_protocol = *nexthdrp; 4650 break; 4651 } 4652 } 4653 4654 if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0, ipss)) { 4655 if (ipha != NULL) { 4656 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 4657 } else { 4658 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 4659 } 4660 4661 /* Callee dropped the packet. */ 4662 return (NULL); 4663 } 4664 4665 if (io != NULL) { 4666 /* 4667 * We seem to have some local policy (we already have 4668 * an ipsec_out). Look at global policy and see 4669 * whether we have to inherit or not. 4670 */ 4671 io->ipsec_out_need_policy = B_FALSE; 4672 ipsec_mp = ipsec_apply_global_policy(ipsec_mp, connp, 4673 &sel, ns); 4674 ASSERT((io->ipsec_out_policy != NULL) || 4675 (io->ipsec_out_act != NULL)); 4676 ASSERT(io->ipsec_out_need_policy == B_FALSE); 4677 return (ipsec_mp); 4678 } 4679 /* 4680 * We pass in a pointer to a pointer because mp can become 4681 * NULL due to allocation failures or explicit drops. Callers 4682 * of this function should assume a NULL mp means the packet 4683 * was dropped. 4684 */ 4685 ipsec_mp = ipsec_attach_global_policy(&mp, connp, &sel, ns); 4686 if (ipsec_mp == NULL) 4687 return (mp); 4688 4689 /* 4690 * Copy the right port information. 4691 */ 4692 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 4693 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4694 4695 ASSERT(io->ipsec_out_need_policy == B_FALSE); 4696 ASSERT((io->ipsec_out_policy != NULL) || 4697 (io->ipsec_out_act != NULL)); 4698 io->ipsec_out_src_port = sel.ips_local_port; 4699 io->ipsec_out_dst_port = sel.ips_remote_port; 4700 io->ipsec_out_icmp_type = sel.ips_icmp_type; 4701 io->ipsec_out_icmp_code = sel.ips_icmp_code; 4702 /* 4703 * Set ill_index, conn_dontroute and conn_multicast_loop 4704 * for multicast datagrams. 4705 */ 4706 io->ipsec_out_ill_index = ill_index; 4707 io->ipsec_out_dontroute = conn_dontroutex; 4708 io->ipsec_out_multicast_loop = conn_multicast_loopx; 4709 4710 if (zoneid == ALL_ZONES) 4711 zoneid = GLOBAL_ZONEID; 4712 io->ipsec_out_zoneid = zoneid; 4713 return (ipsec_mp); 4714 } 4715 4716 /* 4717 * When appropriate, this function caches inbound and outbound policy 4718 * for this connection. 4719 * 4720 * XXX need to work out more details about per-interface policy and 4721 * caching here! 4722 * 4723 * XXX may want to split inbound and outbound caching for ill.. 4724 */ 4725 int 4726 ipsec_conn_cache_policy(conn_t *connp, boolean_t isv4) 4727 { 4728 boolean_t global_policy_present; 4729 netstack_t *ns = connp->conn_netstack; 4730 ipsec_stack_t *ipss = ns->netstack_ipsec; 4731 4732 /* 4733 * There is no policy latching for ICMP sockets because we can't 4734 * decide on which policy to use until we see the packet and get 4735 * type/code selectors. 4736 */ 4737 if (connp->conn_ulp == IPPROTO_ICMP || 4738 connp->conn_ulp == IPPROTO_ICMPV6) { 4739 connp->conn_in_enforce_policy = 4740 connp->conn_out_enforce_policy = B_TRUE; 4741 if (connp->conn_latch != NULL) { 4742 IPLATCH_REFRELE(connp->conn_latch, ns); 4743 connp->conn_latch = NULL; 4744 } 4745 connp->conn_flags |= IPCL_CHECK_POLICY; 4746 return (0); 4747 } 4748 4749 global_policy_present = isv4 ? 4750 (ipss->ipsec_outbound_v4_policy_present || 4751 ipss->ipsec_inbound_v4_policy_present) : 4752 (ipss->ipsec_outbound_v6_policy_present || 4753 ipss->ipsec_inbound_v6_policy_present); 4754 4755 if ((connp->conn_policy != NULL) || global_policy_present) { 4756 ipsec_selector_t sel; 4757 ipsec_policy_t *p; 4758 4759 if (connp->conn_latch == NULL && 4760 (connp->conn_latch = iplatch_create()) == NULL) { 4761 return (ENOMEM); 4762 } 4763 4764 sel.ips_protocol = connp->conn_ulp; 4765 sel.ips_local_port = connp->conn_lport; 4766 sel.ips_remote_port = connp->conn_fport; 4767 sel.ips_is_icmp_inv_acq = 0; 4768 sel.ips_isv4 = isv4; 4769 if (isv4) { 4770 sel.ips_local_addr_v4 = connp->conn_src; 4771 sel.ips_remote_addr_v4 = connp->conn_rem; 4772 } else { 4773 sel.ips_local_addr_v6 = connp->conn_srcv6; 4774 sel.ips_remote_addr_v6 = connp->conn_remv6; 4775 } 4776 4777 p = ipsec_find_policy(IPSEC_TYPE_INBOUND, connp, NULL, &sel, 4778 ns); 4779 if (connp->conn_latch->ipl_in_policy != NULL) 4780 IPPOL_REFRELE(connp->conn_latch->ipl_in_policy, ns); 4781 connp->conn_latch->ipl_in_policy = p; 4782 connp->conn_in_enforce_policy = (p != NULL); 4783 4784 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, &sel, 4785 ns); 4786 if (connp->conn_latch->ipl_out_policy != NULL) 4787 IPPOL_REFRELE(connp->conn_latch->ipl_out_policy, ns); 4788 connp->conn_latch->ipl_out_policy = p; 4789 connp->conn_out_enforce_policy = (p != NULL); 4790 4791 /* Clear the latched actions too, in case we're recaching. */ 4792 if (connp->conn_latch->ipl_out_action != NULL) 4793 IPACT_REFRELE(connp->conn_latch->ipl_out_action); 4794 if (connp->conn_latch->ipl_in_action != NULL) 4795 IPACT_REFRELE(connp->conn_latch->ipl_in_action); 4796 } 4797 4798 /* 4799 * We may or may not have policy for this endpoint. We still set 4800 * conn_policy_cached so that inbound datagrams don't have to look 4801 * at global policy as policy is considered latched for these 4802 * endpoints. We should not set conn_policy_cached until the conn 4803 * reflects the actual policy. If we *set* this before inheriting 4804 * the policy there is a window where the check 4805 * CONN_INBOUND_POLICY_PRESENT, will neither check with the policy 4806 * on the conn (because we have not yet copied the policy on to 4807 * conn and hence not set conn_in_enforce_policy) nor with the 4808 * global policy (because conn_policy_cached is already set). 4809 */ 4810 connp->conn_policy_cached = B_TRUE; 4811 if (connp->conn_in_enforce_policy) 4812 connp->conn_flags |= IPCL_CHECK_POLICY; 4813 return (0); 4814 } 4815 4816 void 4817 iplatch_free(ipsec_latch_t *ipl, netstack_t *ns) 4818 { 4819 if (ipl->ipl_out_policy != NULL) 4820 IPPOL_REFRELE(ipl->ipl_out_policy, ns); 4821 if (ipl->ipl_in_policy != NULL) 4822 IPPOL_REFRELE(ipl->ipl_in_policy, ns); 4823 if (ipl->ipl_in_action != NULL) 4824 IPACT_REFRELE(ipl->ipl_in_action); 4825 if (ipl->ipl_out_action != NULL) 4826 IPACT_REFRELE(ipl->ipl_out_action); 4827 if (ipl->ipl_local_cid != NULL) 4828 IPSID_REFRELE(ipl->ipl_local_cid); 4829 if (ipl->ipl_remote_cid != NULL) 4830 IPSID_REFRELE(ipl->ipl_remote_cid); 4831 if (ipl->ipl_local_id != NULL) 4832 crfree(ipl->ipl_local_id); 4833 mutex_destroy(&ipl->ipl_lock); 4834 kmem_free(ipl, sizeof (*ipl)); 4835 } 4836 4837 ipsec_latch_t * 4838 iplatch_create() 4839 { 4840 ipsec_latch_t *ipl = kmem_alloc(sizeof (*ipl), KM_NOSLEEP); 4841 if (ipl == NULL) 4842 return (ipl); 4843 bzero(ipl, sizeof (*ipl)); 4844 mutex_init(&ipl->ipl_lock, NULL, MUTEX_DEFAULT, NULL); 4845 ipl->ipl_refcnt = 1; 4846 return (ipl); 4847 } 4848 4849 /* 4850 * Hash function for ID hash table. 4851 */ 4852 static uint32_t 4853 ipsid_hash(int idtype, char *idstring) 4854 { 4855 uint32_t hval = idtype; 4856 unsigned char c; 4857 4858 while ((c = *idstring++) != 0) { 4859 hval = (hval << 4) | (hval >> 28); 4860 hval ^= c; 4861 } 4862 hval = hval ^ (hval >> 16); 4863 return (hval & (IPSID_HASHSIZE-1)); 4864 } 4865 4866 /* 4867 * Look up identity string in hash table. Return identity object 4868 * corresponding to the name -- either preexisting, or newly allocated. 4869 * 4870 * Return NULL if we need to allocate a new one and can't get memory. 4871 */ 4872 ipsid_t * 4873 ipsid_lookup(int idtype, char *idstring, netstack_t *ns) 4874 { 4875 ipsid_t *retval; 4876 char *nstr; 4877 int idlen = strlen(idstring) + 1; 4878 ipsec_stack_t *ipss = ns->netstack_ipsec; 4879 ipsif_t *bucket; 4880 4881 bucket = &ipss->ipsec_ipsid_buckets[ipsid_hash(idtype, idstring)]; 4882 4883 mutex_enter(&bucket->ipsif_lock); 4884 4885 for (retval = bucket->ipsif_head; retval != NULL; 4886 retval = retval->ipsid_next) { 4887 if (idtype != retval->ipsid_type) 4888 continue; 4889 if (bcmp(idstring, retval->ipsid_cid, idlen) != 0) 4890 continue; 4891 4892 IPSID_REFHOLD(retval); 4893 mutex_exit(&bucket->ipsif_lock); 4894 return (retval); 4895 } 4896 4897 retval = kmem_alloc(sizeof (*retval), KM_NOSLEEP); 4898 if (!retval) { 4899 mutex_exit(&bucket->ipsif_lock); 4900 return (NULL); 4901 } 4902 4903 nstr = kmem_alloc(idlen, KM_NOSLEEP); 4904 if (!nstr) { 4905 mutex_exit(&bucket->ipsif_lock); 4906 kmem_free(retval, sizeof (*retval)); 4907 return (NULL); 4908 } 4909 4910 retval->ipsid_refcnt = 1; 4911 retval->ipsid_next = bucket->ipsif_head; 4912 if (retval->ipsid_next != NULL) 4913 retval->ipsid_next->ipsid_ptpn = &retval->ipsid_next; 4914 retval->ipsid_ptpn = &bucket->ipsif_head; 4915 retval->ipsid_type = idtype; 4916 retval->ipsid_cid = nstr; 4917 bucket->ipsif_head = retval; 4918 bcopy(idstring, nstr, idlen); 4919 mutex_exit(&bucket->ipsif_lock); 4920 4921 return (retval); 4922 } 4923 4924 /* 4925 * Garbage collect the identity hash table. 4926 */ 4927 void 4928 ipsid_gc(netstack_t *ns) 4929 { 4930 int i, len; 4931 ipsid_t *id, *nid; 4932 ipsif_t *bucket; 4933 ipsec_stack_t *ipss = ns->netstack_ipsec; 4934 4935 for (i = 0; i < IPSID_HASHSIZE; i++) { 4936 bucket = &ipss->ipsec_ipsid_buckets[i]; 4937 mutex_enter(&bucket->ipsif_lock); 4938 for (id = bucket->ipsif_head; id != NULL; id = nid) { 4939 nid = id->ipsid_next; 4940 if (id->ipsid_refcnt == 0) { 4941 *id->ipsid_ptpn = nid; 4942 if (nid != NULL) 4943 nid->ipsid_ptpn = id->ipsid_ptpn; 4944 len = strlen(id->ipsid_cid) + 1; 4945 kmem_free(id->ipsid_cid, len); 4946 kmem_free(id, sizeof (*id)); 4947 } 4948 } 4949 mutex_exit(&bucket->ipsif_lock); 4950 } 4951 } 4952 4953 /* 4954 * Return true if two identities are the same. 4955 */ 4956 boolean_t 4957 ipsid_equal(ipsid_t *id1, ipsid_t *id2) 4958 { 4959 if (id1 == id2) 4960 return (B_TRUE); 4961 #ifdef DEBUG 4962 if ((id1 == NULL) || (id2 == NULL)) 4963 return (B_FALSE); 4964 /* 4965 * test that we're interning id's correctly.. 4966 */ 4967 ASSERT((strcmp(id1->ipsid_cid, id2->ipsid_cid) != 0) || 4968 (id1->ipsid_type != id2->ipsid_type)); 4969 #endif 4970 return (B_FALSE); 4971 } 4972 4973 /* 4974 * Initialize identity table; called during module initialization. 4975 */ 4976 static void 4977 ipsid_init(netstack_t *ns) 4978 { 4979 ipsif_t *bucket; 4980 int i; 4981 ipsec_stack_t *ipss = ns->netstack_ipsec; 4982 4983 for (i = 0; i < IPSID_HASHSIZE; i++) { 4984 bucket = &ipss->ipsec_ipsid_buckets[i]; 4985 mutex_init(&bucket->ipsif_lock, NULL, MUTEX_DEFAULT, NULL); 4986 } 4987 } 4988 4989 /* 4990 * Free identity table (preparatory to module unload) 4991 */ 4992 static void 4993 ipsid_fini(netstack_t *ns) 4994 { 4995 ipsif_t *bucket; 4996 int i; 4997 ipsec_stack_t *ipss = ns->netstack_ipsec; 4998 4999 for (i = 0; i < IPSID_HASHSIZE; i++) { 5000 bucket = &ipss->ipsec_ipsid_buckets[i]; 5001 ASSERT(bucket->ipsif_head == NULL); 5002 mutex_destroy(&bucket->ipsif_lock); 5003 } 5004 } 5005 5006 /* 5007 * Update the minimum and maximum supported key sizes for the 5008 * specified algorithm. Must be called while holding the algorithms lock. 5009 */ 5010 void 5011 ipsec_alg_fix_min_max(ipsec_alginfo_t *alg, ipsec_algtype_t alg_type, 5012 netstack_t *ns) 5013 { 5014 size_t crypto_min = (size_t)-1, crypto_max = 0; 5015 size_t cur_crypto_min, cur_crypto_max; 5016 boolean_t is_valid; 5017 crypto_mechanism_info_t *mech_infos; 5018 uint_t nmech_infos; 5019 int crypto_rc, i; 5020 crypto_mech_usage_t mask; 5021 ipsec_stack_t *ipss = ns->netstack_ipsec; 5022 5023 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 5024 5025 /* 5026 * Compute the min, max, and default key sizes (in number of 5027 * increments to the default key size in bits) as defined 5028 * by the algorithm mappings. This range of key sizes is used 5029 * for policy related operations. The effective key sizes 5030 * supported by the framework could be more limited than 5031 * those defined for an algorithm. 5032 */ 5033 alg->alg_default_bits = alg->alg_key_sizes[0]; 5034 if (alg->alg_increment != 0) { 5035 /* key sizes are defined by range & increment */ 5036 alg->alg_minbits = alg->alg_key_sizes[1]; 5037 alg->alg_maxbits = alg->alg_key_sizes[2]; 5038 5039 alg->alg_default = SADB_ALG_DEFAULT_INCR(alg->alg_minbits, 5040 alg->alg_increment, alg->alg_default_bits); 5041 } else if (alg->alg_nkey_sizes == 0) { 5042 /* no specified key size for algorithm */ 5043 alg->alg_minbits = alg->alg_maxbits = 0; 5044 } else { 5045 /* key sizes are defined by enumeration */ 5046 alg->alg_minbits = (uint16_t)-1; 5047 alg->alg_maxbits = 0; 5048 5049 for (i = 0; i < alg->alg_nkey_sizes; i++) { 5050 if (alg->alg_key_sizes[i] < alg->alg_minbits) 5051 alg->alg_minbits = alg->alg_key_sizes[i]; 5052 if (alg->alg_key_sizes[i] > alg->alg_maxbits) 5053 alg->alg_maxbits = alg->alg_key_sizes[i]; 5054 } 5055 alg->alg_default = 0; 5056 } 5057 5058 if (!(alg->alg_flags & ALG_FLAG_VALID)) 5059 return; 5060 5061 /* 5062 * Mechanisms do not apply to the NULL encryption 5063 * algorithm, so simply return for this case. 5064 */ 5065 if (alg->alg_id == SADB_EALG_NULL) 5066 return; 5067 5068 /* 5069 * Find the min and max key sizes supported by the cryptographic 5070 * framework providers. 5071 */ 5072 5073 /* get the key sizes supported by the framework */ 5074 crypto_rc = crypto_get_all_mech_info(alg->alg_mech_type, 5075 &mech_infos, &nmech_infos, KM_SLEEP); 5076 if (crypto_rc != CRYPTO_SUCCESS || nmech_infos == 0) { 5077 alg->alg_flags &= ~ALG_FLAG_VALID; 5078 return; 5079 } 5080 5081 /* min and max key sizes supported by framework */ 5082 for (i = 0, is_valid = B_FALSE; i < nmech_infos; i++) { 5083 int unit_bits; 5084 5085 /* 5086 * Ignore entries that do not support the operations 5087 * needed for the algorithm type. 5088 */ 5089 if (alg_type == IPSEC_ALG_AUTH) { 5090 mask = CRYPTO_MECH_USAGE_MAC; 5091 } else { 5092 mask = CRYPTO_MECH_USAGE_ENCRYPT | 5093 CRYPTO_MECH_USAGE_DECRYPT; 5094 } 5095 if ((mech_infos[i].mi_usage & mask) != mask) 5096 continue; 5097 5098 unit_bits = (mech_infos[i].mi_keysize_unit == 5099 CRYPTO_KEYSIZE_UNIT_IN_BYTES) ? 8 : 1; 5100 /* adjust min/max supported by framework */ 5101 cur_crypto_min = mech_infos[i].mi_min_key_size * unit_bits; 5102 cur_crypto_max = mech_infos[i].mi_max_key_size * unit_bits; 5103 5104 if (cur_crypto_min < crypto_min) 5105 crypto_min = cur_crypto_min; 5106 5107 /* 5108 * CRYPTO_EFFECTIVELY_INFINITE is a special value of 5109 * the crypto framework which means "no upper limit". 5110 */ 5111 if (mech_infos[i].mi_max_key_size == 5112 CRYPTO_EFFECTIVELY_INFINITE) { 5113 crypto_max = (size_t)-1; 5114 } else if (cur_crypto_max > crypto_max) { 5115 crypto_max = cur_crypto_max; 5116 } 5117 5118 is_valid = B_TRUE; 5119 } 5120 5121 kmem_free(mech_infos, sizeof (crypto_mechanism_info_t) * 5122 nmech_infos); 5123 5124 if (!is_valid) { 5125 /* no key sizes supported by framework */ 5126 alg->alg_flags &= ~ALG_FLAG_VALID; 5127 return; 5128 } 5129 5130 /* 5131 * Determine min and max key sizes from alg_key_sizes[]. 5132 * defined for the algorithm entry. Adjust key sizes based on 5133 * those supported by the framework. 5134 */ 5135 alg->alg_ef_default_bits = alg->alg_key_sizes[0]; 5136 if (alg->alg_increment != 0) { 5137 /* supported key sizes are defined by range & increment */ 5138 crypto_min = ALGBITS_ROUND_UP(crypto_min, alg->alg_increment); 5139 crypto_max = ALGBITS_ROUND_DOWN(crypto_max, alg->alg_increment); 5140 5141 alg->alg_ef_minbits = MAX(alg->alg_minbits, 5142 (uint16_t)crypto_min); 5143 alg->alg_ef_maxbits = MIN(alg->alg_maxbits, 5144 (uint16_t)crypto_max); 5145 5146 /* 5147 * If the sizes supported by the framework are outside 5148 * the range of sizes defined by the algorithm mappings, 5149 * the algorithm cannot be used. Check for this 5150 * condition here. 5151 */ 5152 if (alg->alg_ef_minbits > alg->alg_ef_maxbits) { 5153 alg->alg_flags &= ~ALG_FLAG_VALID; 5154 return; 5155 } 5156 5157 if (alg->alg_ef_default_bits < alg->alg_ef_minbits) 5158 alg->alg_ef_default_bits = alg->alg_ef_minbits; 5159 if (alg->alg_ef_default_bits > alg->alg_ef_maxbits) 5160 alg->alg_ef_default_bits = alg->alg_ef_maxbits; 5161 5162 alg->alg_ef_default = SADB_ALG_DEFAULT_INCR(alg->alg_ef_minbits, 5163 alg->alg_increment, alg->alg_ef_default_bits); 5164 } else if (alg->alg_nkey_sizes == 0) { 5165 /* no specified key size for algorithm */ 5166 alg->alg_ef_minbits = alg->alg_ef_maxbits = 0; 5167 } else { 5168 /* supported key sizes are defined by enumeration */ 5169 alg->alg_ef_minbits = (uint16_t)-1; 5170 alg->alg_ef_maxbits = 0; 5171 5172 for (i = 0, is_valid = B_FALSE; i < alg->alg_nkey_sizes; i++) { 5173 /* 5174 * Ignore the current key size if it is not in the 5175 * range of sizes supported by the framework. 5176 */ 5177 if (alg->alg_key_sizes[i] < crypto_min || 5178 alg->alg_key_sizes[i] > crypto_max) 5179 continue; 5180 if (alg->alg_key_sizes[i] < alg->alg_ef_minbits) 5181 alg->alg_ef_minbits = alg->alg_key_sizes[i]; 5182 if (alg->alg_key_sizes[i] > alg->alg_ef_maxbits) 5183 alg->alg_ef_maxbits = alg->alg_key_sizes[i]; 5184 is_valid = B_TRUE; 5185 } 5186 5187 if (!is_valid) { 5188 alg->alg_flags &= ~ALG_FLAG_VALID; 5189 return; 5190 } 5191 alg->alg_ef_default = 0; 5192 } 5193 } 5194 5195 /* 5196 * Free the memory used by the specified algorithm. 5197 */ 5198 void 5199 ipsec_alg_free(ipsec_alginfo_t *alg) 5200 { 5201 if (alg == NULL) 5202 return; 5203 5204 if (alg->alg_key_sizes != NULL) { 5205 kmem_free(alg->alg_key_sizes, 5206 (alg->alg_nkey_sizes + 1) * sizeof (uint16_t)); 5207 alg->alg_key_sizes = NULL; 5208 } 5209 if (alg->alg_block_sizes != NULL) { 5210 kmem_free(alg->alg_block_sizes, 5211 (alg->alg_nblock_sizes + 1) * sizeof (uint16_t)); 5212 alg->alg_block_sizes = NULL; 5213 } 5214 kmem_free(alg, sizeof (*alg)); 5215 } 5216 5217 /* 5218 * Check the validity of the specified key size for an algorithm. 5219 * Returns B_TRUE if key size is valid, B_FALSE otherwise. 5220 */ 5221 boolean_t 5222 ipsec_valid_key_size(uint16_t key_size, ipsec_alginfo_t *alg) 5223 { 5224 if (key_size < alg->alg_ef_minbits || key_size > alg->alg_ef_maxbits) 5225 return (B_FALSE); 5226 5227 if (alg->alg_increment == 0 && alg->alg_nkey_sizes != 0) { 5228 /* 5229 * If the key sizes are defined by enumeration, the new 5230 * key size must be equal to one of the supported values. 5231 */ 5232 int i; 5233 5234 for (i = 0; i < alg->alg_nkey_sizes; i++) 5235 if (key_size == alg->alg_key_sizes[i]) 5236 break; 5237 if (i == alg->alg_nkey_sizes) 5238 return (B_FALSE); 5239 } 5240 5241 return (B_TRUE); 5242 } 5243 5244 /* 5245 * Callback function invoked by the crypto framework when a provider 5246 * registers or unregisters. This callback updates the algorithms 5247 * tables when a crypto algorithm is no longer available or becomes 5248 * available, and triggers the freeing/creation of context templates 5249 * associated with existing SAs, if needed. 5250 * 5251 * Need to walk all stack instances since the callback is global 5252 * for all instances 5253 */ 5254 void 5255 ipsec_prov_update_callback(uint32_t event, void *event_arg) 5256 { 5257 netstack_handle_t nh; 5258 netstack_t *ns; 5259 5260 netstack_next_init(&nh); 5261 while ((ns = netstack_next(&nh)) != NULL) { 5262 ipsec_prov_update_callback_stack(event, event_arg, ns); 5263 netstack_rele(ns); 5264 } 5265 netstack_next_fini(&nh); 5266 } 5267 5268 static void 5269 ipsec_prov_update_callback_stack(uint32_t event, void *event_arg, 5270 netstack_t *ns) 5271 { 5272 crypto_notify_event_change_t *prov_change = 5273 (crypto_notify_event_change_t *)event_arg; 5274 uint_t algidx, algid, algtype, mech_count, mech_idx; 5275 ipsec_alginfo_t *alg; 5276 ipsec_alginfo_t oalg; 5277 crypto_mech_name_t *mechs; 5278 boolean_t alg_changed = B_FALSE; 5279 ipsec_stack_t *ipss = ns->netstack_ipsec; 5280 5281 /* ignore events for which we didn't register */ 5282 if (event != CRYPTO_EVENT_MECHS_CHANGED) { 5283 ip1dbg(("ipsec_prov_update_callback: unexpected event 0x%x " 5284 " received from crypto framework\n", event)); 5285 return; 5286 } 5287 5288 mechs = crypto_get_mech_list(&mech_count, KM_SLEEP); 5289 if (mechs == NULL) 5290 return; 5291 5292 /* 5293 * Walk the list of currently defined IPsec algorithm. Update 5294 * the algorithm valid flag and trigger an update of the 5295 * SAs that depend on that algorithm. 5296 */ 5297 mutex_enter(&ipss->ipsec_alg_lock); 5298 for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype++) { 5299 for (algidx = 0; algidx < ipss->ipsec_nalgs[algtype]; 5300 algidx++) { 5301 5302 algid = ipss->ipsec_sortlist[algtype][algidx]; 5303 alg = ipss->ipsec_alglists[algtype][algid]; 5304 ASSERT(alg != NULL); 5305 5306 /* 5307 * Skip the algorithms which do not map to the 5308 * crypto framework provider being added or removed. 5309 */ 5310 if (strncmp(alg->alg_mech_name, 5311 prov_change->ec_mech_name, 5312 CRYPTO_MAX_MECH_NAME) != 0) 5313 continue; 5314 5315 /* 5316 * Determine if the mechanism is valid. If it 5317 * is not, mark the algorithm as being invalid. If 5318 * it is, mark the algorithm as being valid. 5319 */ 5320 for (mech_idx = 0; mech_idx < mech_count; mech_idx++) 5321 if (strncmp(alg->alg_mech_name, 5322 mechs[mech_idx], CRYPTO_MAX_MECH_NAME) == 0) 5323 break; 5324 if (mech_idx == mech_count && 5325 alg->alg_flags & ALG_FLAG_VALID) { 5326 alg->alg_flags &= ~ALG_FLAG_VALID; 5327 alg_changed = B_TRUE; 5328 } else if (mech_idx < mech_count && 5329 !(alg->alg_flags & ALG_FLAG_VALID)) { 5330 alg->alg_flags |= ALG_FLAG_VALID; 5331 alg_changed = B_TRUE; 5332 } 5333 5334 /* 5335 * Update the supported key sizes, regardless 5336 * of whether a crypto provider was added or 5337 * removed. 5338 */ 5339 oalg = *alg; 5340 ipsec_alg_fix_min_max(alg, algtype, ns); 5341 if (!alg_changed && 5342 alg->alg_ef_minbits != oalg.alg_ef_minbits || 5343 alg->alg_ef_maxbits != oalg.alg_ef_maxbits || 5344 alg->alg_ef_default != oalg.alg_ef_default || 5345 alg->alg_ef_default_bits != 5346 oalg.alg_ef_default_bits) 5347 alg_changed = B_TRUE; 5348 5349 /* 5350 * Update the affected SAs if a software provider is 5351 * being added or removed. 5352 */ 5353 if (prov_change->ec_provider_type == 5354 CRYPTO_SW_PROVIDER) 5355 sadb_alg_update(algtype, alg->alg_id, 5356 prov_change->ec_change == 5357 CRYPTO_MECH_ADDED, ns); 5358 } 5359 } 5360 mutex_exit(&ipss->ipsec_alg_lock); 5361 crypto_free_mech_list(mechs, mech_count); 5362 5363 if (alg_changed) { 5364 /* 5365 * An algorithm has changed, i.e. it became valid or 5366 * invalid, or its support key sizes have changed. 5367 * Notify ipsecah and ipsecesp of this change so 5368 * that they can send a SADB_REGISTER to their consumers. 5369 */ 5370 ipsecah_algs_changed(ns); 5371 ipsecesp_algs_changed(ns); 5372 } 5373 } 5374 5375 /* 5376 * Registers with the crypto framework to be notified of crypto 5377 * providers changes. Used to update the algorithm tables and 5378 * to free or create context templates if needed. Invoked after IPsec 5379 * is loaded successfully. 5380 * 5381 * This is called separately for each IP instance, so we ensure we only 5382 * register once. 5383 */ 5384 void 5385 ipsec_register_prov_update(void) 5386 { 5387 if (prov_update_handle != NULL) 5388 return; 5389 5390 prov_update_handle = crypto_notify_events( 5391 ipsec_prov_update_callback, CRYPTO_EVENT_MECHS_CHANGED); 5392 } 5393 5394 /* 5395 * Unregisters from the framework to be notified of crypto providers 5396 * changes. Called from ipsec_policy_g_destroy(). 5397 */ 5398 static void 5399 ipsec_unregister_prov_update(void) 5400 { 5401 if (prov_update_handle != NULL) 5402 crypto_unnotify_events(prov_update_handle); 5403 } 5404 5405 /* 5406 * Tunnel-mode support routines. 5407 */ 5408 5409 /* 5410 * Returns an mblk chain suitable for putnext() if policies match and IPsec 5411 * SAs are available. If there's no per-tunnel policy, or a match comes back 5412 * with no match, then still return the packet and have global policy take 5413 * a crack at it in IP. 5414 * 5415 * Remember -> we can be forwarding packets. Keep that in mind w.r.t. 5416 * inner-packet contents. 5417 */ 5418 mblk_t * 5419 ipsec_tun_outbound(mblk_t *mp, tun_t *atp, ipha_t *inner_ipv4, 5420 ip6_t *inner_ipv6, ipha_t *outer_ipv4, ip6_t *outer_ipv6, int outer_hdr_len, 5421 netstack_t *ns) 5422 { 5423 ipsec_tun_pol_t *itp = atp->tun_itp; 5424 ipsec_policy_head_t *polhead; 5425 ipsec_selector_t sel; 5426 mblk_t *ipsec_mp, *ipsec_mp_head, *nmp; 5427 mblk_t *spare_mp = NULL; 5428 ipsec_out_t *io; 5429 boolean_t is_fragment; 5430 ipsec_policy_t *pol; 5431 ipsec_stack_t *ipss = ns->netstack_ipsec; 5432 5433 ASSERT(outer_ipv6 != NULL && outer_ipv4 == NULL || 5434 outer_ipv4 != NULL && outer_ipv6 == NULL); 5435 /* We take care of inners in a bit. */ 5436 5437 /* No policy on this tunnel - let global policy have at it. */ 5438 if (itp == NULL || !(itp->itp_flags & ITPF_P_ACTIVE)) 5439 return (mp); 5440 polhead = itp->itp_policy; 5441 5442 bzero(&sel, sizeof (sel)); 5443 if (inner_ipv4 != NULL) { 5444 ASSERT(inner_ipv6 == NULL); 5445 sel.ips_isv4 = B_TRUE; 5446 sel.ips_local_addr_v4 = inner_ipv4->ipha_src; 5447 sel.ips_remote_addr_v4 = inner_ipv4->ipha_dst; 5448 sel.ips_protocol = (uint8_t)inner_ipv4->ipha_protocol; 5449 is_fragment = 5450 IS_V4_FRAGMENT(inner_ipv4->ipha_fragment_offset_and_flags); 5451 } else { 5452 ASSERT(inner_ipv6 != NULL); 5453 sel.ips_isv4 = B_FALSE; 5454 sel.ips_local_addr_v6 = inner_ipv6->ip6_src; 5455 /* Use ip_get_dst_v6() just for the fragment bit. */ 5456 sel.ips_remote_addr_v6 = ip_get_dst_v6(inner_ipv6, 5457 &is_fragment); 5458 /* 5459 * Reset, because we don't care about routing-header dests 5460 * in the forwarding/tunnel path. 5461 */ 5462 sel.ips_remote_addr_v6 = inner_ipv6->ip6_dst; 5463 } 5464 5465 if (itp->itp_flags & ITPF_P_PER_PORT_SECURITY) { 5466 if (is_fragment) { 5467 ipha_t *oiph; 5468 ipha_t *iph = NULL; 5469 ip6_t *ip6h = NULL; 5470 int hdr_len; 5471 uint16_t ip6_hdr_length; 5472 uint8_t v6_proto; 5473 uint8_t *v6_proto_p; 5474 5475 /* 5476 * We have a fragment we need to track! 5477 */ 5478 mp = ipsec_fragcache_add(&itp->itp_fragcache, NULL, mp, 5479 outer_hdr_len, ipss); 5480 if (mp == NULL) 5481 return (NULL); 5482 5483 /* 5484 * If we get here, we have a full 5485 * fragment chain 5486 */ 5487 5488 oiph = (ipha_t *)mp->b_rptr; 5489 if (IPH_HDR_VERSION(oiph) == IPV4_VERSION) { 5490 hdr_len = ((outer_hdr_len != 0) ? 5491 IPH_HDR_LENGTH(oiph) : 0); 5492 iph = (ipha_t *)(mp->b_rptr + hdr_len); 5493 } else { 5494 ASSERT(IPH_HDR_VERSION(oiph) == IPV6_VERSION); 5495 if ((spare_mp = msgpullup(mp, -1)) == NULL) { 5496 ip_drop_packet_chain(mp, B_FALSE, 5497 NULL, NULL, 5498 DROPPER(ipss, ipds_spd_nomem), 5499 &ipss->ipsec_spd_dropper); 5500 } 5501 ip6h = (ip6_t *)spare_mp->b_rptr; 5502 (void) ip_hdr_length_nexthdr_v6(spare_mp, ip6h, 5503 &ip6_hdr_length, &v6_proto_p); 5504 hdr_len = ip6_hdr_length; 5505 } 5506 outer_hdr_len = hdr_len; 5507 5508 if (sel.ips_isv4) { 5509 if (iph == NULL) { 5510 /* Was v6 outer */ 5511 iph = (ipha_t *)(mp->b_rptr + hdr_len); 5512 } 5513 inner_ipv4 = iph; 5514 sel.ips_local_addr_v4 = inner_ipv4->ipha_src; 5515 sel.ips_remote_addr_v4 = inner_ipv4->ipha_dst; 5516 sel.ips_protocol = 5517 (uint8_t)inner_ipv4->ipha_protocol; 5518 } else { 5519 if ((spare_mp == NULL) && 5520 ((spare_mp = msgpullup(mp, -1)) == NULL)) { 5521 ip_drop_packet_chain(mp, B_FALSE, 5522 NULL, NULL, 5523 DROPPER(ipss, ipds_spd_nomem), 5524 &ipss->ipsec_spd_dropper); 5525 } 5526 inner_ipv6 = (ip6_t *)(spare_mp->b_rptr + 5527 hdr_len); 5528 sel.ips_local_addr_v6 = inner_ipv6->ip6_src; 5529 sel.ips_remote_addr_v6 = inner_ipv6->ip6_dst; 5530 (void) ip_hdr_length_nexthdr_v6(spare_mp, 5531 inner_ipv6, &ip6_hdr_length, 5532 &v6_proto_p); 5533 v6_proto = *v6_proto_p; 5534 sel.ips_protocol = v6_proto; 5535 #ifdef FRAGCACHE_DEBUG 5536 cmn_err(CE_WARN, "v6_sel.ips_protocol = %d\n", 5537 sel.ips_protocol); 5538 #endif 5539 } 5540 /* Ports are extracted below */ 5541 } 5542 5543 /* Get ports... */ 5544 if (spare_mp != NULL) { 5545 if (!ipsec_init_outbound_ports(&sel, spare_mp, 5546 inner_ipv4, inner_ipv6, outer_hdr_len, ipss)) { 5547 /* 5548 * callee did ip_drop_packet_chain() on 5549 * spare_mp 5550 */ 5551 ipsec_freemsg_chain(mp); 5552 return (NULL); 5553 } 5554 } else { 5555 if (!ipsec_init_outbound_ports(&sel, mp, 5556 inner_ipv4, inner_ipv6, outer_hdr_len, ipss)) { 5557 /* callee did ip_drop_packet_chain() on mp. */ 5558 return (NULL); 5559 } 5560 } 5561 #ifdef FRAGCACHE_DEBUG 5562 if (inner_ipv4 != NULL) 5563 cmn_err(CE_WARN, 5564 "(v4) sel.ips_protocol = %d, " 5565 "sel.ips_local_port = %d, " 5566 "sel.ips_remote_port = %d\n", 5567 sel.ips_protocol, ntohs(sel.ips_local_port), 5568 ntohs(sel.ips_remote_port)); 5569 if (inner_ipv6 != NULL) 5570 cmn_err(CE_WARN, 5571 "(v6) sel.ips_protocol = %d, " 5572 "sel.ips_local_port = %d, " 5573 "sel.ips_remote_port = %d\n", 5574 sel.ips_protocol, ntohs(sel.ips_local_port), 5575 ntohs(sel.ips_remote_port)); 5576 #endif 5577 /* Success so far - done with spare_mp */ 5578 ipsec_freemsg_chain(spare_mp); 5579 } 5580 rw_enter(&polhead->iph_lock, RW_READER); 5581 pol = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_OUTBOUND, 5582 &sel, ns); 5583 rw_exit(&polhead->iph_lock); 5584 if (pol == NULL) { 5585 /* 5586 * No matching policy on this tunnel, drop the packet. 5587 * 5588 * NOTE: Tunnel-mode tunnels are different from the 5589 * IP global transport mode policy head. For a tunnel-mode 5590 * tunnel, we drop the packet in lieu of passing it 5591 * along accepted the way a global-policy miss would. 5592 * 5593 * NOTE2: "negotiate transport" tunnels should match ALL 5594 * inbound packets, but we do not uncomment the ASSERT() 5595 * below because if/when we open PF_POLICY, a user can 5596 * shoot him/her-self in the foot with a 0 priority. 5597 */ 5598 5599 /* ASSERT(itp->itp_flags & ITPF_P_TUNNEL); */ 5600 #ifdef FRAGCACHE_DEBUG 5601 cmn_err(CE_WARN, "ipsec_tun_outbound(): No matching tunnel " 5602 "per-port policy\n"); 5603 #endif 5604 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 5605 DROPPER(ipss, ipds_spd_explicit), 5606 &ipss->ipsec_spd_dropper); 5607 return (NULL); 5608 } 5609 5610 #ifdef FRAGCACHE_DEBUG 5611 cmn_err(CE_WARN, "Having matching tunnel per-port policy\n"); 5612 #endif 5613 5614 /* Construct an IPSEC_OUT message. */ 5615 ipsec_mp = ipsec_mp_head = ipsec_alloc_ipsec_out(ns); 5616 if (ipsec_mp == NULL) { 5617 IPPOL_REFRELE(pol, ns); 5618 ip_drop_packet(mp, B_FALSE, NULL, NULL, 5619 DROPPER(ipss, ipds_spd_nomem), 5620 &ipss->ipsec_spd_dropper); 5621 return (NULL); 5622 } 5623 ipsec_mp->b_cont = mp; 5624 io = (ipsec_out_t *)ipsec_mp->b_rptr; 5625 IPPH_REFHOLD(polhead); 5626 /* 5627 * NOTE: free() function of ipsec_out mblk will release polhead and 5628 * pol references. 5629 */ 5630 io->ipsec_out_polhead = polhead; 5631 io->ipsec_out_policy = pol; 5632 io->ipsec_out_zoneid = atp->tun_zoneid; 5633 io->ipsec_out_v4 = (outer_ipv4 != NULL); 5634 io->ipsec_out_secure = B_TRUE; 5635 5636 if (!(itp->itp_flags & ITPF_P_TUNNEL)) { 5637 /* Set up transport mode for tunnelled packets. */ 5638 io->ipsec_out_proto = (inner_ipv4 != NULL) ? IPPROTO_ENCAP : 5639 IPPROTO_IPV6; 5640 return (ipsec_mp); 5641 } 5642 5643 /* Fill in tunnel-mode goodies here. */ 5644 io->ipsec_out_tunnel = B_TRUE; 5645 /* XXX Do I need to fill in all of the goodies here? */ 5646 if (inner_ipv4) { 5647 io->ipsec_out_inaf = AF_INET; 5648 io->ipsec_out_insrc[0] = 5649 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v4; 5650 io->ipsec_out_indst[0] = 5651 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v4; 5652 } else { 5653 io->ipsec_out_inaf = AF_INET6; 5654 io->ipsec_out_insrc[0] = 5655 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[0]; 5656 io->ipsec_out_insrc[1] = 5657 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[1]; 5658 io->ipsec_out_insrc[2] = 5659 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[2]; 5660 io->ipsec_out_insrc[3] = 5661 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[3]; 5662 io->ipsec_out_indst[0] = 5663 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[0]; 5664 io->ipsec_out_indst[1] = 5665 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[1]; 5666 io->ipsec_out_indst[2] = 5667 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[2]; 5668 io->ipsec_out_indst[3] = 5669 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[3]; 5670 } 5671 io->ipsec_out_insrcpfx = pol->ipsp_sel->ipsl_key.ipsl_local_pfxlen; 5672 io->ipsec_out_indstpfx = pol->ipsp_sel->ipsl_key.ipsl_remote_pfxlen; 5673 /* NOTE: These are used for transport mode too. */ 5674 io->ipsec_out_src_port = pol->ipsp_sel->ipsl_key.ipsl_lport; 5675 io->ipsec_out_dst_port = pol->ipsp_sel->ipsl_key.ipsl_rport; 5676 io->ipsec_out_proto = pol->ipsp_sel->ipsl_key.ipsl_proto; 5677 5678 /* 5679 * The mp pointer still valid 5680 * Add ipsec_out to each fragment. 5681 * The fragment head already has one 5682 */ 5683 nmp = mp->b_next; 5684 mp->b_next = NULL; 5685 mp = nmp; 5686 ASSERT(ipsec_mp != NULL); 5687 while (mp != NULL) { 5688 nmp = mp->b_next; 5689 ipsec_mp->b_next = ipsec_out_tag(ipsec_mp_head, mp, ns); 5690 if (ipsec_mp->b_next == NULL) { 5691 ip_drop_packet_chain(ipsec_mp_head, B_FALSE, NULL, NULL, 5692 DROPPER(ipss, ipds_spd_nomem), 5693 &ipss->ipsec_spd_dropper); 5694 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 5695 DROPPER(ipss, ipds_spd_nomem), 5696 &ipss->ipsec_spd_dropper); 5697 return (NULL); 5698 } 5699 ipsec_mp = ipsec_mp->b_next; 5700 mp->b_next = NULL; 5701 mp = nmp; 5702 } 5703 return (ipsec_mp_head); 5704 } 5705 5706 /* 5707 * NOTE: The following releases pol's reference and 5708 * calls ip_drop_packet() for me on NULL returns. 5709 */ 5710 mblk_t * 5711 ipsec_check_ipsecin_policy_reasm(mblk_t *ipsec_mp, ipsec_policy_t *pol, 5712 ipha_t *inner_ipv4, ip6_t *inner_ipv6, uint64_t pkt_unique, netstack_t *ns) 5713 { 5714 /* Assume ipsec_mp is a chain of b_next-linked IPSEC_IN M_CTLs. */ 5715 mblk_t *data_chain = NULL, *data_tail = NULL; 5716 mblk_t *ii_next; 5717 5718 while (ipsec_mp != NULL) { 5719 ii_next = ipsec_mp->b_next; 5720 ipsec_mp->b_next = NULL; /* No tripping asserts. */ 5721 5722 /* 5723 * Need IPPOL_REFHOLD(pol) for extras because 5724 * ipsecin_policy does the refrele. 5725 */ 5726 IPPOL_REFHOLD(pol); 5727 5728 if (ipsec_check_ipsecin_policy(ipsec_mp, pol, inner_ipv4, 5729 inner_ipv6, pkt_unique, ns) != NULL) { 5730 if (data_tail == NULL) { 5731 /* First one */ 5732 data_chain = data_tail = ipsec_mp->b_cont; 5733 } else { 5734 data_tail->b_next = ipsec_mp->b_cont; 5735 data_tail = data_tail->b_next; 5736 } 5737 freeb(ipsec_mp); 5738 } else { 5739 /* 5740 * ipsec_check_ipsecin_policy() freed ipsec_mp 5741 * already. Need to get rid of any extra pol 5742 * references, and any remaining bits as well. 5743 */ 5744 IPPOL_REFRELE(pol, ns); 5745 ipsec_freemsg_chain(data_chain); 5746 ipsec_freemsg_chain(ii_next); /* ipdrop stats? */ 5747 return (NULL); 5748 } 5749 ipsec_mp = ii_next; 5750 } 5751 /* 5752 * One last release because either the loop bumped it up, or we never 5753 * called ipsec_check_ipsecin_policy(). 5754 */ 5755 IPPOL_REFRELE(pol, ns); 5756 5757 /* data_chain is ready for return to tun module. */ 5758 return (data_chain); 5759 } 5760 5761 5762 /* 5763 * Returns B_TRUE if the inbound packet passed an IPsec policy check. Returns 5764 * B_FALSE if it failed or if it is a fragment needing its friends before a 5765 * policy check can be performed. 5766 * 5767 * Expects a non-NULL *data_mp, an optional ipsec_mp, and a non-NULL polhead. 5768 * data_mp may be reassigned with a b_next chain of packets if fragments 5769 * neeeded to be collected for a proper policy check. 5770 * 5771 * Always frees ipsec_mp, but only frees data_mp if returns B_FALSE. This 5772 * function calls ip_drop_packet() on data_mp if need be. 5773 * 5774 * NOTE: outer_hdr_len is signed. If it's a negative value, the caller 5775 * is inspecting an ICMP packet. 5776 */ 5777 boolean_t 5778 ipsec_tun_inbound(mblk_t *ipsec_mp, mblk_t **data_mp, ipsec_tun_pol_t *itp, 5779 ipha_t *inner_ipv4, ip6_t *inner_ipv6, ipha_t *outer_ipv4, 5780 ip6_t *outer_ipv6, int outer_hdr_len, netstack_t *ns) 5781 { 5782 ipsec_policy_head_t *polhead; 5783 ipsec_selector_t sel; 5784 mblk_t *message = (ipsec_mp == NULL) ? *data_mp : ipsec_mp; 5785 ipsec_policy_t *pol; 5786 uint16_t tmpport; 5787 selret_t rc; 5788 boolean_t retval, port_policy_present, is_icmp, global_present; 5789 in6_addr_t tmpaddr; 5790 ipaddr_t tmp4; 5791 ipsec_stack_t *ipss = ns->netstack_ipsec; 5792 uint8_t flags, *holder, *outer_hdr; 5793 5794 sel.ips_is_icmp_inv_acq = 0; 5795 5796 if (outer_ipv4 != NULL) { 5797 ASSERT(outer_ipv6 == NULL); 5798 outer_hdr = (uint8_t *)outer_ipv4; 5799 global_present = ipss->ipsec_inbound_v4_policy_present; 5800 } else { 5801 outer_hdr = (uint8_t *)outer_ipv6; 5802 global_present = ipss->ipsec_inbound_v6_policy_present; 5803 } 5804 ASSERT(outer_hdr != NULL); 5805 5806 ASSERT(inner_ipv4 != NULL && inner_ipv6 == NULL || 5807 inner_ipv4 == NULL && inner_ipv6 != NULL); 5808 ASSERT(message == *data_mp || message->b_cont == *data_mp); 5809 5810 if (outer_hdr_len < 0) { 5811 outer_hdr_len = (-outer_hdr_len); 5812 is_icmp = B_TRUE; 5813 } else { 5814 is_icmp = B_FALSE; 5815 } 5816 5817 if (itp != NULL && (itp->itp_flags & ITPF_P_ACTIVE)) { 5818 polhead = itp->itp_policy; 5819 /* 5820 * We need to perform full Tunnel-Mode enforcement, 5821 * and we need to have inner-header data for such enforcement. 5822 * 5823 * See ipsec_init_inbound_sel() for the 0x80000000 on inbound 5824 * and on return. 5825 */ 5826 5827 port_policy_present = ((itp->itp_flags & 5828 ITPF_P_PER_PORT_SECURITY) ? B_TRUE : B_FALSE); 5829 flags = ((port_policy_present ? SEL_PORT_POLICY : SEL_NONE) | 5830 (is_icmp ? SEL_IS_ICMP : SEL_NONE) | SEL_TUNNEL_MODE); 5831 5832 rc = ipsec_init_inbound_sel(&sel, *data_mp, inner_ipv4, 5833 inner_ipv6, flags); 5834 5835 switch (rc) { 5836 case SELRET_NOMEM: 5837 ip_drop_packet(message, B_TRUE, NULL, NULL, 5838 DROPPER(ipss, ipds_spd_nomem), 5839 &ipss->ipsec_spd_dropper); 5840 return (B_FALSE); 5841 case SELRET_TUNFRAG: 5842 /* 5843 * At this point, if we're cleartext, we don't want 5844 * to go there. 5845 */ 5846 if (ipsec_mp == NULL) { 5847 ip_drop_packet(*data_mp, B_TRUE, NULL, NULL, 5848 DROPPER(ipss, ipds_spd_got_clear), 5849 &ipss->ipsec_spd_dropper); 5850 *data_mp = NULL; 5851 return (B_FALSE); 5852 } 5853 ASSERT(((ipsec_in_t *)ipsec_mp->b_rptr)-> 5854 ipsec_in_secure); 5855 message = ipsec_fragcache_add(&itp->itp_fragcache, 5856 ipsec_mp, *data_mp, outer_hdr_len, ipss); 5857 5858 if (message == NULL) { 5859 /* 5860 * Data is cached, fragment chain is not 5861 * complete. I consume ipsec_mp and data_mp 5862 */ 5863 return (B_FALSE); 5864 } 5865 5866 /* 5867 * If we get here, we have a full fragment chain. 5868 * Reacquire headers and selectors from first fragment. 5869 */ 5870 if (inner_ipv4 != NULL) { 5871 inner_ipv4 = (ipha_t *)message->b_cont->b_rptr; 5872 ASSERT(message->b_cont->b_wptr - 5873 message->b_cont->b_rptr > sizeof (ipha_t)); 5874 } else { 5875 inner_ipv6 = (ip6_t *)message->b_cont->b_rptr; 5876 ASSERT(message->b_cont->b_wptr - 5877 message->b_cont->b_rptr > sizeof (ip6_t)); 5878 } 5879 /* Use SEL_NONE so we always get ports! */ 5880 rc = ipsec_init_inbound_sel(&sel, message->b_cont, 5881 inner_ipv4, inner_ipv6, SEL_NONE); 5882 switch (rc) { 5883 case SELRET_SUCCESS: 5884 /* 5885 * Get to same place as first caller's 5886 * SELRET_SUCCESS case. 5887 */ 5888 break; 5889 case SELRET_NOMEM: 5890 ip_drop_packet_chain(message, B_TRUE, 5891 NULL, NULL, 5892 DROPPER(ipss, ipds_spd_nomem), 5893 &ipss->ipsec_spd_dropper); 5894 return (B_FALSE); 5895 case SELRET_BADPKT: 5896 ip_drop_packet_chain(message, B_TRUE, 5897 NULL, NULL, 5898 DROPPER(ipss, ipds_spd_malformed_frag), 5899 &ipss->ipsec_spd_dropper); 5900 return (B_FALSE); 5901 case SELRET_TUNFRAG: 5902 cmn_err(CE_WARN, "(TUNFRAG on 2nd call...)"); 5903 /* FALLTHRU */ 5904 default: 5905 cmn_err(CE_WARN, "ipsec_init_inbound_sel(mark2)" 5906 " returns bizarro 0x%x", rc); 5907 /* Guaranteed panic! */ 5908 ASSERT(rc == SELRET_NOMEM); 5909 return (B_FALSE); 5910 } 5911 /* FALLTHRU */ 5912 case SELRET_SUCCESS: 5913 /* 5914 * Common case: 5915 * No per-port policy or a non-fragment. Keep going. 5916 */ 5917 break; 5918 case SELRET_BADPKT: 5919 /* 5920 * We may receive ICMP (with IPv6 inner) packets that 5921 * trigger this return value. Send 'em in for 5922 * enforcement checking. 5923 */ 5924 cmn_err(CE_NOTE, "ipsec_tun_inbound(): " 5925 "sending 'bad packet' in for enforcement"); 5926 break; 5927 default: 5928 cmn_err(CE_WARN, 5929 "ipsec_init_inbound_sel() returns bizarro 0x%x", 5930 rc); 5931 ASSERT(rc == SELRET_NOMEM); /* Guaranteed panic! */ 5932 return (B_FALSE); 5933 } 5934 5935 if (is_icmp) { 5936 /* 5937 * Swap local/remote because this is an ICMP packet. 5938 */ 5939 tmpaddr = sel.ips_local_addr_v6; 5940 sel.ips_local_addr_v6 = sel.ips_remote_addr_v6; 5941 sel.ips_remote_addr_v6 = tmpaddr; 5942 tmpport = sel.ips_local_port; 5943 sel.ips_local_port = sel.ips_remote_port; 5944 sel.ips_remote_port = tmpport; 5945 } 5946 5947 /* find_policy_head() */ 5948 rw_enter(&polhead->iph_lock, RW_READER); 5949 pol = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_INBOUND, 5950 &sel, ns); 5951 rw_exit(&polhead->iph_lock); 5952 if (pol != NULL) { 5953 if (ipsec_mp == NULL || 5954 !((ipsec_in_t *)ipsec_mp->b_rptr)-> 5955 ipsec_in_secure) { 5956 retval = pol->ipsp_act->ipa_allow_clear; 5957 if (!retval) { 5958 /* 5959 * XXX should never get here with 5960 * tunnel reassembled fragments? 5961 */ 5962 ASSERT(message->b_next == NULL); 5963 ip_drop_packet(message, B_TRUE, NULL, 5964 NULL, 5965 DROPPER(ipss, ipds_spd_got_clear), 5966 &ipss->ipsec_spd_dropper); 5967 } else if (ipsec_mp != NULL) { 5968 freeb(ipsec_mp); 5969 } 5970 5971 IPPOL_REFRELE(pol, ns); 5972 return (retval); 5973 } 5974 /* 5975 * NOTE: The following releases pol's reference and 5976 * calls ip_drop_packet() for me on NULL returns. 5977 * 5978 * "sel" is still good here, so let's use it! 5979 */ 5980 *data_mp = ipsec_check_ipsecin_policy_reasm(message, 5981 pol, inner_ipv4, inner_ipv6, SA_UNIQUE_ID( 5982 sel.ips_remote_port, sel.ips_local_port, 5983 (inner_ipv4 == NULL) ? IPPROTO_IPV6 : 5984 IPPROTO_ENCAP, sel.ips_protocol), ns); 5985 return (*data_mp != NULL); 5986 } 5987 5988 /* 5989 * Else fallthru and check the global policy on the outer 5990 * header(s) if this tunnel is an old-style transport-mode 5991 * one. Drop the packet explicitly (no policy entry) for 5992 * a new-style tunnel-mode tunnel. 5993 */ 5994 if ((itp->itp_flags & ITPF_P_TUNNEL) && !is_icmp) { 5995 ip_drop_packet_chain(message, B_TRUE, NULL, 5996 NULL, 5997 DROPPER(ipss, ipds_spd_explicit), 5998 &ipss->ipsec_spd_dropper); 5999 return (B_FALSE); 6000 } 6001 } 6002 6003 /* 6004 * NOTE: If we reach here, we will not have packet chains from 6005 * fragcache_add(), because the only way I get chains is on a 6006 * tunnel-mode tunnel, which either returns with a pass, or gets 6007 * hit by the ip_drop_packet_chain() call right above here. 6008 */ 6009 6010 /* If no per-tunnel security, check global policy now. */ 6011 if (ipsec_mp != NULL && !global_present) { 6012 if (((ipsec_in_t *)(ipsec_mp->b_rptr))-> 6013 ipsec_in_icmp_loopback) { 6014 /* 6015 * This is an ICMP message with an ipsec_mp 6016 * attached. We should accept it. 6017 */ 6018 if (ipsec_mp != NULL) 6019 freeb(ipsec_mp); 6020 return (B_TRUE); 6021 } 6022 6023 ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL, 6024 DROPPER(ipss, ipds_spd_got_secure), 6025 &ipss->ipsec_spd_dropper); 6026 return (B_FALSE); 6027 } 6028 6029 /* 6030 * The following assertion is valid because only the tun module alters 6031 * the mblk chain - stripping the outer header by advancing mp->b_rptr. 6032 */ 6033 ASSERT(is_icmp || ((*data_mp)->b_datap->db_base <= outer_hdr && 6034 outer_hdr < (*data_mp)->b_rptr)); 6035 holder = (*data_mp)->b_rptr; 6036 (*data_mp)->b_rptr = outer_hdr; 6037 6038 if (is_icmp) { 6039 /* 6040 * For ICMP packets, "outer_ipvN" is set to the outer header 6041 * that is *INSIDE* the ICMP payload. For global policy 6042 * checking, we need to reverse src/dst on the payload in 6043 * order to construct selectors appropriately. See "ripha" 6044 * constructions in ip.c. To avoid a bug like 6478464 (see 6045 * earlier in this file), we will actually exchange src/dst 6046 * in the packet, and reverse if after the call to 6047 * ipsec_check_global_policy(). 6048 */ 6049 if (outer_ipv4 != NULL) { 6050 tmp4 = outer_ipv4->ipha_src; 6051 outer_ipv4->ipha_src = outer_ipv4->ipha_dst; 6052 outer_ipv4->ipha_dst = tmp4; 6053 } else { 6054 ASSERT(outer_ipv6 != NULL); 6055 tmpaddr = outer_ipv6->ip6_src; 6056 outer_ipv6->ip6_src = outer_ipv6->ip6_dst; 6057 outer_ipv6->ip6_dst = tmpaddr; 6058 } 6059 } 6060 6061 /* NOTE: Frees message if it returns NULL. */ 6062 if (ipsec_check_global_policy(message, NULL, outer_ipv4, outer_ipv6, 6063 (ipsec_mp != NULL), ns) == NULL) { 6064 return (B_FALSE); 6065 } 6066 6067 if (is_icmp) { 6068 /* Set things back to normal. */ 6069 if (outer_ipv4 != NULL) { 6070 tmp4 = outer_ipv4->ipha_src; 6071 outer_ipv4->ipha_src = outer_ipv4->ipha_dst; 6072 outer_ipv4->ipha_dst = tmp4; 6073 } else { 6074 /* No need for ASSERT()s now. */ 6075 tmpaddr = outer_ipv6->ip6_src; 6076 outer_ipv6->ip6_src = outer_ipv6->ip6_dst; 6077 outer_ipv6->ip6_dst = tmpaddr; 6078 } 6079 } 6080 6081 (*data_mp)->b_rptr = holder; 6082 6083 if (ipsec_mp != NULL) 6084 freeb(ipsec_mp); 6085 6086 /* 6087 * At this point, we pretend it's a cleartext accepted 6088 * packet. 6089 */ 6090 return (B_TRUE); 6091 } 6092 6093 /* 6094 * AVL comparison routine for our list of tunnel polheads. 6095 */ 6096 static int 6097 tunnel_compare(const void *arg1, const void *arg2) 6098 { 6099 ipsec_tun_pol_t *left, *right; 6100 int rc; 6101 6102 left = (ipsec_tun_pol_t *)arg1; 6103 right = (ipsec_tun_pol_t *)arg2; 6104 6105 rc = strncmp(left->itp_name, right->itp_name, LIFNAMSIZ); 6106 return (rc == 0 ? rc : (rc > 0 ? 1 : -1)); 6107 } 6108 6109 /* 6110 * Free a tunnel policy node. 6111 */ 6112 void 6113 itp_free(ipsec_tun_pol_t *node, netstack_t *ns) 6114 { 6115 IPPH_REFRELE(node->itp_policy, ns); 6116 IPPH_REFRELE(node->itp_inactive, ns); 6117 mutex_destroy(&node->itp_lock); 6118 kmem_free(node, sizeof (*node)); 6119 } 6120 6121 void 6122 itp_unlink(ipsec_tun_pol_t *node, netstack_t *ns) 6123 { 6124 ipsec_stack_t *ipss = ns->netstack_ipsec; 6125 6126 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_WRITER); 6127 ipss->ipsec_tunnel_policy_gen++; 6128 ipsec_fragcache_uninit(&node->itp_fragcache); 6129 avl_remove(&ipss->ipsec_tunnel_policies, node); 6130 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6131 ITP_REFRELE(node, ns); 6132 } 6133 6134 /* 6135 * Public interface to look up a tunnel security policy by name. Used by 6136 * spdsock mostly. Returns "node" with a bumped refcnt. 6137 */ 6138 ipsec_tun_pol_t * 6139 get_tunnel_policy(char *name, netstack_t *ns) 6140 { 6141 ipsec_tun_pol_t *node, lookup; 6142 ipsec_stack_t *ipss = ns->netstack_ipsec; 6143 6144 (void) strncpy(lookup.itp_name, name, LIFNAMSIZ); 6145 6146 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_READER); 6147 node = (ipsec_tun_pol_t *)avl_find(&ipss->ipsec_tunnel_policies, 6148 &lookup, NULL); 6149 if (node != NULL) { 6150 ITP_REFHOLD(node); 6151 } 6152 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6153 6154 return (node); 6155 } 6156 6157 /* 6158 * Public interface to walk all tunnel security polcies. Useful for spdsock 6159 * DUMP operations. iterator() will not consume a reference. 6160 */ 6161 void 6162 itp_walk(void (*iterator)(ipsec_tun_pol_t *, void *, netstack_t *), 6163 void *arg, netstack_t *ns) 6164 { 6165 ipsec_tun_pol_t *node; 6166 ipsec_stack_t *ipss = ns->netstack_ipsec; 6167 6168 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_READER); 6169 for (node = avl_first(&ipss->ipsec_tunnel_policies); node != NULL; 6170 node = AVL_NEXT(&ipss->ipsec_tunnel_policies, node)) { 6171 iterator(node, arg, ns); 6172 } 6173 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6174 } 6175 6176 /* 6177 * Initialize policy head. This can only fail if there's a memory problem. 6178 */ 6179 static boolean_t 6180 tunnel_polhead_init(ipsec_policy_head_t *iph, netstack_t *ns) 6181 { 6182 ipsec_stack_t *ipss = ns->netstack_ipsec; 6183 6184 rw_init(&iph->iph_lock, NULL, RW_DEFAULT, NULL); 6185 iph->iph_refs = 1; 6186 iph->iph_gen = 0; 6187 if (ipsec_alloc_table(iph, ipss->ipsec_tun_spd_hashsize, 6188 KM_SLEEP, B_FALSE, ns) != 0) { 6189 ipsec_polhead_free_table(iph); 6190 return (B_FALSE); 6191 } 6192 ipsec_polhead_init(iph, ipss->ipsec_tun_spd_hashsize); 6193 return (B_TRUE); 6194 } 6195 6196 /* 6197 * Create a tunnel policy node with "name". Set errno with 6198 * ENOMEM if there's a memory problem, and EEXIST if there's an existing 6199 * node. 6200 */ 6201 ipsec_tun_pol_t * 6202 create_tunnel_policy(char *name, int *errno, uint64_t *gen, netstack_t *ns) 6203 { 6204 ipsec_tun_pol_t *newbie, *existing; 6205 avl_index_t where; 6206 ipsec_stack_t *ipss = ns->netstack_ipsec; 6207 6208 newbie = kmem_zalloc(sizeof (*newbie), KM_NOSLEEP); 6209 if (newbie == NULL) { 6210 *errno = ENOMEM; 6211 return (NULL); 6212 } 6213 if (!ipsec_fragcache_init(&newbie->itp_fragcache)) { 6214 kmem_free(newbie, sizeof (*newbie)); 6215 *errno = ENOMEM; 6216 return (NULL); 6217 } 6218 6219 (void) strncpy(newbie->itp_name, name, LIFNAMSIZ); 6220 6221 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_WRITER); 6222 existing = (ipsec_tun_pol_t *)avl_find(&ipss->ipsec_tunnel_policies, 6223 newbie, &where); 6224 if (existing != NULL) { 6225 itp_free(newbie, ns); 6226 *errno = EEXIST; 6227 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6228 return (NULL); 6229 } 6230 ipss->ipsec_tunnel_policy_gen++; 6231 *gen = ipss->ipsec_tunnel_policy_gen; 6232 newbie->itp_refcnt = 2; /* One for the caller, one for the tree. */ 6233 newbie->itp_next_policy_index = 1; 6234 avl_insert(&ipss->ipsec_tunnel_policies, newbie, where); 6235 mutex_init(&newbie->itp_lock, NULL, MUTEX_DEFAULT, NULL); 6236 newbie->itp_policy = kmem_zalloc(sizeof (ipsec_policy_head_t), 6237 KM_NOSLEEP); 6238 if (newbie->itp_policy == NULL) 6239 goto nomem; 6240 newbie->itp_inactive = kmem_zalloc(sizeof (ipsec_policy_head_t), 6241 KM_NOSLEEP); 6242 if (newbie->itp_inactive == NULL) { 6243 kmem_free(newbie->itp_policy, sizeof (ipsec_policy_head_t)); 6244 goto nomem; 6245 } 6246 6247 if (!tunnel_polhead_init(newbie->itp_policy, ns)) { 6248 kmem_free(newbie->itp_policy, sizeof (ipsec_policy_head_t)); 6249 kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t)); 6250 goto nomem; 6251 } else if (!tunnel_polhead_init(newbie->itp_inactive, ns)) { 6252 IPPH_REFRELE(newbie->itp_policy, ns); 6253 kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t)); 6254 goto nomem; 6255 } 6256 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6257 6258 return (newbie); 6259 nomem: 6260 *errno = ENOMEM; 6261 kmem_free(newbie, sizeof (*newbie)); 6262 return (NULL); 6263 } 6264 6265 /* 6266 * We can't call the tun_t lookup function until tun is 6267 * loaded, so create a dummy function to avoid symbol 6268 * lookup errors on boot. 6269 */ 6270 /* ARGSUSED */ 6271 ipsec_tun_pol_t * 6272 itp_get_byaddr_dummy(uint32_t *laddr, uint32_t *faddr, int af, netstack_t *ns) 6273 { 6274 return (NULL); /* Always return NULL. */ 6275 } 6276 6277 /* 6278 * Frag cache code, based on SunScreen 3.2 source 6279 * screen/kernel/common/screen_fragcache.c 6280 */ 6281 6282 #define IPSEC_FRAG_TTL_MAX 5 6283 /* 6284 * Note that the following parameters create 256 hash buckets 6285 * with 1024 free entries to be distributed. Things are cleaned 6286 * periodically and are attempted to be cleaned when there is no 6287 * free space, but this system errs on the side of dropping packets 6288 * over creating memory exhaustion. We may decide to make hash 6289 * factor a tunable if this proves to be a bad decision. 6290 */ 6291 #define IPSEC_FRAG_HASH_SLOTS (1<<8) 6292 #define IPSEC_FRAG_HASH_FACTOR 4 6293 #define IPSEC_FRAG_HASH_SIZE (IPSEC_FRAG_HASH_SLOTS * IPSEC_FRAG_HASH_FACTOR) 6294 6295 #define IPSEC_FRAG_HASH_MASK (IPSEC_FRAG_HASH_SLOTS - 1) 6296 #define IPSEC_FRAG_HASH_FUNC(id) (((id) & IPSEC_FRAG_HASH_MASK) ^ \ 6297 (((id) / \ 6298 (ushort_t)IPSEC_FRAG_HASH_SLOTS) & \ 6299 IPSEC_FRAG_HASH_MASK)) 6300 6301 /* Maximum fragments per packet. 48 bytes payload x 1366 packets > 64KB */ 6302 #define IPSEC_MAX_FRAGS 1366 6303 6304 #define V4_FRAG_OFFSET(ipha) ((ntohs(ipha->ipha_fragment_offset_and_flags) & \ 6305 IPH_OFFSET) << 3) 6306 #define V4_MORE_FRAGS(ipha) (ntohs(ipha->ipha_fragment_offset_and_flags) & \ 6307 IPH_MF) 6308 6309 /* 6310 * Initialize an ipsec fragcache instance. 6311 * Returns B_FALSE if memory allocation fails. 6312 */ 6313 boolean_t 6314 ipsec_fragcache_init(ipsec_fragcache_t *frag) 6315 { 6316 ipsec_fragcache_entry_t *ftemp; 6317 int i; 6318 6319 mutex_init(&frag->itpf_lock, NULL, MUTEX_DEFAULT, NULL); 6320 frag->itpf_ptr = (ipsec_fragcache_entry_t **) 6321 kmem_zalloc(sizeof (ipsec_fragcache_entry_t *) * 6322 IPSEC_FRAG_HASH_SLOTS, KM_NOSLEEP); 6323 if (frag->itpf_ptr == NULL) 6324 return (B_FALSE); 6325 6326 ftemp = (ipsec_fragcache_entry_t *) 6327 kmem_zalloc(sizeof (ipsec_fragcache_entry_t) * 6328 IPSEC_FRAG_HASH_SIZE, KM_NOSLEEP); 6329 if (ftemp == NULL) { 6330 kmem_free(frag->itpf_ptr, sizeof (ipsec_fragcache_entry_t *) * 6331 IPSEC_FRAG_HASH_SLOTS); 6332 return (B_FALSE); 6333 } 6334 6335 frag->itpf_freelist = NULL; 6336 6337 for (i = 0; i < IPSEC_FRAG_HASH_SIZE; i++) { 6338 ftemp->itpfe_next = frag->itpf_freelist; 6339 frag->itpf_freelist = ftemp; 6340 ftemp++; 6341 } 6342 6343 frag->itpf_expire_hint = 0; 6344 6345 return (B_TRUE); 6346 } 6347 6348 void 6349 ipsec_fragcache_uninit(ipsec_fragcache_t *frag) 6350 { 6351 ipsec_fragcache_entry_t *fep; 6352 int i; 6353 6354 mutex_enter(&frag->itpf_lock); 6355 if (frag->itpf_ptr) { 6356 /* Delete any existing fragcache entry chains */ 6357 for (i = 0; i < IPSEC_FRAG_HASH_SLOTS; i++) { 6358 fep = (frag->itpf_ptr)[i]; 6359 while (fep != NULL) { 6360 /* Returned fep is next in chain or NULL */ 6361 fep = fragcache_delentry(i, fep, frag); 6362 } 6363 } 6364 /* 6365 * Chase the pointers back to the beginning 6366 * of the memory allocation and then 6367 * get rid of the allocated freelist 6368 */ 6369 while (frag->itpf_freelist->itpfe_next != NULL) 6370 frag->itpf_freelist = frag->itpf_freelist->itpfe_next; 6371 /* 6372 * XXX - If we ever dynamically grow the freelist 6373 * then we'll have to free entries individually 6374 * or determine how many entries or chunks we have 6375 * grown since the initial allocation. 6376 */ 6377 kmem_free(frag->itpf_freelist, 6378 sizeof (ipsec_fragcache_entry_t) * 6379 IPSEC_FRAG_HASH_SIZE); 6380 /* Free the fragcache structure */ 6381 kmem_free(frag->itpf_ptr, 6382 sizeof (ipsec_fragcache_entry_t *) * 6383 IPSEC_FRAG_HASH_SLOTS); 6384 } 6385 mutex_exit(&frag->itpf_lock); 6386 mutex_destroy(&frag->itpf_lock); 6387 } 6388 6389 /* 6390 * Add a fragment to the fragment cache. Consumes mp if NULL is returned. 6391 * Returns mp if a whole fragment has been assembled, NULL otherwise 6392 */ 6393 6394 mblk_t * 6395 ipsec_fragcache_add(ipsec_fragcache_t *frag, mblk_t *ipsec_mp, mblk_t *mp, 6396 int outer_hdr_len, ipsec_stack_t *ipss) 6397 { 6398 boolean_t is_v4; 6399 time_t itpf_time; 6400 ipha_t *iph; 6401 ipha_t *oiph; 6402 ip6_t *ip6h = NULL; 6403 uint8_t v6_proto; 6404 uint8_t *v6_proto_p; 6405 uint16_t ip6_hdr_length; 6406 ip6_pkt_t ipp; 6407 ip6_frag_t *fraghdr; 6408 ipsec_fragcache_entry_t *fep; 6409 int i; 6410 mblk_t *nmp, *prevmp, *spare_mp = NULL; 6411 int firstbyte, lastbyte; 6412 int offset; 6413 int last; 6414 boolean_t inbound = (ipsec_mp != NULL); 6415 mblk_t *first_mp = inbound ? ipsec_mp : mp; 6416 6417 mutex_enter(&frag->itpf_lock); 6418 6419 oiph = (ipha_t *)mp->b_rptr; 6420 iph = (ipha_t *)(mp->b_rptr + outer_hdr_len); 6421 if (IPH_HDR_VERSION(iph) == IPV4_VERSION) { 6422 is_v4 = B_TRUE; 6423 } else { 6424 ASSERT(IPH_HDR_VERSION(iph) == IPV6_VERSION); 6425 if ((spare_mp = msgpullup(mp, -1)) == NULL) { 6426 mutex_exit(&frag->itpf_lock); 6427 ip_drop_packet(first_mp, inbound, NULL, NULL, 6428 DROPPER(ipss, ipds_spd_nomem), 6429 &ipss->ipsec_spd_dropper); 6430 return (NULL); 6431 } 6432 ip6h = (ip6_t *)(spare_mp->b_rptr + outer_hdr_len); 6433 6434 if (!ip_hdr_length_nexthdr_v6(spare_mp, ip6h, &ip6_hdr_length, 6435 &v6_proto_p)) { 6436 /* 6437 * Find upper layer protocol. 6438 * If it fails we have a malformed packet 6439 */ 6440 mutex_exit(&frag->itpf_lock); 6441 ip_drop_packet(first_mp, inbound, NULL, NULL, 6442 DROPPER(ipss, ipds_spd_malformed_packet), 6443 &ipss->ipsec_spd_dropper); 6444 freemsg(spare_mp); 6445 return (NULL); 6446 } else { 6447 v6_proto = *v6_proto_p; 6448 } 6449 6450 6451 bzero(&ipp, sizeof (ipp)); 6452 (void) ip_find_hdr_v6(spare_mp, ip6h, &ipp, NULL); 6453 if (!(ipp.ipp_fields & IPPF_FRAGHDR)) { 6454 /* 6455 * We think this is a fragment, but didn't find 6456 * a fragment header. Something is wrong. 6457 */ 6458 mutex_exit(&frag->itpf_lock); 6459 ip_drop_packet(first_mp, inbound, NULL, NULL, 6460 DROPPER(ipss, ipds_spd_malformed_frag), 6461 &ipss->ipsec_spd_dropper); 6462 freemsg(spare_mp); 6463 return (NULL); 6464 } 6465 fraghdr = ipp.ipp_fraghdr; 6466 is_v4 = B_FALSE; 6467 } 6468 6469 /* Anything to cleanup? */ 6470 6471 /* 6472 * This cleanup call could be put in a timer loop 6473 * but it may actually be just as reasonable a decision to 6474 * leave it here. The disadvantage is this only gets called when 6475 * frags are added. The advantage is that it is not 6476 * susceptible to race conditions like a time-based cleanup 6477 * may be. 6478 */ 6479 itpf_time = gethrestime_sec(); 6480 if (itpf_time >= frag->itpf_expire_hint) 6481 ipsec_fragcache_clean(frag); 6482 6483 /* Lookup to see if there is an existing entry */ 6484 6485 if (is_v4) 6486 i = IPSEC_FRAG_HASH_FUNC(iph->ipha_ident); 6487 else 6488 i = IPSEC_FRAG_HASH_FUNC(fraghdr->ip6f_ident); 6489 6490 for (fep = (frag->itpf_ptr)[i]; fep; fep = fep->itpfe_next) { 6491 if (is_v4) { 6492 ASSERT(iph != NULL); 6493 if ((fep->itpfe_id == iph->ipha_ident) && 6494 (fep->itpfe_src == iph->ipha_src) && 6495 (fep->itpfe_dst == iph->ipha_dst) && 6496 (fep->itpfe_proto == iph->ipha_protocol)) 6497 break; 6498 } else { 6499 ASSERT(fraghdr != NULL); 6500 ASSERT(fep != NULL); 6501 if ((fep->itpfe_id == fraghdr->ip6f_ident) && 6502 IN6_ARE_ADDR_EQUAL(&fep->itpfe_src6, 6503 &ip6h->ip6_src) && 6504 IN6_ARE_ADDR_EQUAL(&fep->itpfe_dst6, 6505 &ip6h->ip6_dst) && (fep->itpfe_proto == v6_proto)) 6506 break; 6507 } 6508 } 6509 6510 if (is_v4) { 6511 firstbyte = V4_FRAG_OFFSET(iph); 6512 lastbyte = firstbyte + ntohs(iph->ipha_length) - 6513 IPH_HDR_LENGTH(iph); 6514 last = (V4_MORE_FRAGS(iph) == 0); 6515 #ifdef FRAGCACHE_DEBUG 6516 cmn_err(CE_WARN, "V4 fragcache: firstbyte = %d, lastbyte = %d, " 6517 "last = %d, id = %d\n", firstbyte, lastbyte, last, 6518 iph->ipha_ident); 6519 #endif 6520 } else { 6521 firstbyte = ntohs(fraghdr->ip6f_offlg & IP6F_OFF_MASK); 6522 lastbyte = firstbyte + ntohs(ip6h->ip6_plen) + 6523 sizeof (ip6_t) - ip6_hdr_length; 6524 last = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG) == 0; 6525 #ifdef FRAGCACHE_DEBUG 6526 cmn_err(CE_WARN, "V6 fragcache: firstbyte = %d, lastbyte = %d, " 6527 "last = %d, id = %d, fraghdr = %p, spare_mp = %p\n", 6528 firstbyte, lastbyte, last, fraghdr->ip6f_ident, 6529 fraghdr, spare_mp); 6530 #endif 6531 } 6532 6533 /* check for bogus fragments and delete the entry */ 6534 if (firstbyte > 0 && firstbyte <= 8) { 6535 if (fep != NULL) 6536 (void) fragcache_delentry(i, fep, frag); 6537 mutex_exit(&frag->itpf_lock); 6538 ip_drop_packet(first_mp, inbound, NULL, NULL, 6539 DROPPER(ipss, ipds_spd_malformed_frag), 6540 &ipss->ipsec_spd_dropper); 6541 freemsg(spare_mp); 6542 return (NULL); 6543 } 6544 6545 /* Not found, allocate a new entry */ 6546 if (fep == NULL) { 6547 if (frag->itpf_freelist == NULL) { 6548 /* see if there is some space */ 6549 ipsec_fragcache_clean(frag); 6550 if (frag->itpf_freelist == NULL) { 6551 mutex_exit(&frag->itpf_lock); 6552 ip_drop_packet(first_mp, inbound, NULL, NULL, 6553 DROPPER(ipss, ipds_spd_nomem), 6554 &ipss->ipsec_spd_dropper); 6555 freemsg(spare_mp); 6556 return (NULL); 6557 } 6558 } 6559 6560 fep = frag->itpf_freelist; 6561 frag->itpf_freelist = fep->itpfe_next; 6562 6563 if (is_v4) { 6564 bcopy((caddr_t)&iph->ipha_src, (caddr_t)&fep->itpfe_src, 6565 sizeof (struct in_addr)); 6566 bcopy((caddr_t)&iph->ipha_dst, (caddr_t)&fep->itpfe_dst, 6567 sizeof (struct in_addr)); 6568 fep->itpfe_id = iph->ipha_ident; 6569 fep->itpfe_proto = iph->ipha_protocol; 6570 i = IPSEC_FRAG_HASH_FUNC(fep->itpfe_id); 6571 } else { 6572 bcopy((in6_addr_t *)&ip6h->ip6_src, 6573 (in6_addr_t *)&fep->itpfe_src6, 6574 sizeof (struct in6_addr)); 6575 bcopy((in6_addr_t *)&ip6h->ip6_dst, 6576 (in6_addr_t *)&fep->itpfe_dst6, 6577 sizeof (struct in6_addr)); 6578 fep->itpfe_id = fraghdr->ip6f_ident; 6579 fep->itpfe_proto = v6_proto; 6580 i = IPSEC_FRAG_HASH_FUNC(fep->itpfe_id); 6581 } 6582 itpf_time = gethrestime_sec(); 6583 fep->itpfe_exp = itpf_time + IPSEC_FRAG_TTL_MAX + 1; 6584 fep->itpfe_last = 0; 6585 fep->itpfe_fraglist = NULL; 6586 fep->itpfe_depth = 0; 6587 fep->itpfe_next = (frag->itpf_ptr)[i]; 6588 (frag->itpf_ptr)[i] = fep; 6589 6590 if (frag->itpf_expire_hint > fep->itpfe_exp) 6591 frag->itpf_expire_hint = fep->itpfe_exp; 6592 6593 } 6594 freemsg(spare_mp); 6595 6596 /* Insert it in the frag list */ 6597 /* List is in order by starting offset of fragments */ 6598 6599 prevmp = NULL; 6600 for (nmp = fep->itpfe_fraglist; nmp; nmp = nmp->b_next) { 6601 ipha_t *niph; 6602 ipha_t *oniph; 6603 ip6_t *nip6h; 6604 ip6_pkt_t nipp; 6605 ip6_frag_t *nfraghdr; 6606 uint16_t nip6_hdr_length; 6607 uint8_t *nv6_proto_p; 6608 int nfirstbyte, nlastbyte; 6609 char *data, *ndata; 6610 mblk_t *nspare_mp = NULL; 6611 mblk_t *ndata_mp = (inbound ? nmp->b_cont : nmp); 6612 int hdr_len; 6613 6614 oniph = (ipha_t *)mp->b_rptr; 6615 nip6h = NULL; 6616 niph = NULL; 6617 6618 /* 6619 * Determine outer header type and length and set 6620 * pointers appropriately 6621 */ 6622 6623 if (IPH_HDR_VERSION(oniph) == IPV4_VERSION) { 6624 hdr_len = ((outer_hdr_len != 0) ? 6625 IPH_HDR_LENGTH(oiph) : 0); 6626 niph = (ipha_t *)(ndata_mp->b_rptr + hdr_len); 6627 } else { 6628 ASSERT(IPH_HDR_VERSION(oniph) == IPV6_VERSION); 6629 if ((nspare_mp = msgpullup(ndata_mp, -1)) == NULL) { 6630 mutex_exit(&frag->itpf_lock); 6631 ip_drop_packet_chain(nmp, inbound, NULL, NULL, 6632 DROPPER(ipss, ipds_spd_nomem), 6633 &ipss->ipsec_spd_dropper); 6634 return (NULL); 6635 } 6636 nip6h = (ip6_t *)nspare_mp->b_rptr; 6637 (void) ip_hdr_length_nexthdr_v6(nspare_mp, nip6h, 6638 &nip6_hdr_length, &v6_proto_p); 6639 hdr_len = ((outer_hdr_len != 0) ? nip6_hdr_length : 0); 6640 } 6641 6642 /* 6643 * Determine inner header type and length and set 6644 * pointers appropriately 6645 */ 6646 6647 if (is_v4) { 6648 if (niph == NULL) { 6649 /* Was v6 outer */ 6650 niph = (ipha_t *)(ndata_mp->b_rptr + hdr_len); 6651 } 6652 nfirstbyte = V4_FRAG_OFFSET(niph); 6653 nlastbyte = nfirstbyte + ntohs(niph->ipha_length) - 6654 IPH_HDR_LENGTH(niph); 6655 } else { 6656 if ((nspare_mp == NULL) && 6657 ((nspare_mp = msgpullup(ndata_mp, -1)) == NULL)) { 6658 mutex_exit(&frag->itpf_lock); 6659 ip_drop_packet_chain(nmp, inbound, NULL, NULL, 6660 DROPPER(ipss, ipds_spd_nomem), 6661 &ipss->ipsec_spd_dropper); 6662 return (NULL); 6663 } 6664 nip6h = (ip6_t *)(nspare_mp->b_rptr + hdr_len); 6665 if (!ip_hdr_length_nexthdr_v6(nspare_mp, nip6h, 6666 &nip6_hdr_length, &nv6_proto_p)) { 6667 mutex_exit(&frag->itpf_lock); 6668 ip_drop_packet_chain(nmp, inbound, NULL, NULL, 6669 DROPPER(ipss, ipds_spd_malformed_frag), 6670 &ipss->ipsec_spd_dropper); 6671 ipsec_freemsg_chain(nspare_mp); 6672 return (NULL); 6673 } 6674 bzero(&nipp, sizeof (nipp)); 6675 (void) ip_find_hdr_v6(nspare_mp, nip6h, &nipp, NULL); 6676 nfraghdr = nipp.ipp_fraghdr; 6677 nfirstbyte = ntohs(nfraghdr->ip6f_offlg & 6678 IP6F_OFF_MASK); 6679 nlastbyte = nfirstbyte + ntohs(nip6h->ip6_plen) + 6680 sizeof (ip6_t) - nip6_hdr_length; 6681 } 6682 ipsec_freemsg_chain(nspare_mp); 6683 6684 /* Check for overlapping fragments */ 6685 if (firstbyte >= nfirstbyte && firstbyte < nlastbyte) { 6686 /* 6687 * Overlap Check: 6688 * ~~~~--------- # Check if the newly 6689 * ~ ndata_mp| # received fragment 6690 * ~~~~--------- # overlaps with the 6691 * ---------~~~~~~ # current fragment. 6692 * | mp ~ 6693 * ---------~~~~~~ 6694 */ 6695 if (is_v4) { 6696 data = (char *)iph + IPH_HDR_LENGTH(iph) + 6697 firstbyte - nfirstbyte; 6698 ndata = (char *)niph + IPH_HDR_LENGTH(niph); 6699 } else { 6700 data = (char *)ip6h + 6701 nip6_hdr_length + firstbyte - 6702 nfirstbyte; 6703 ndata = (char *)nip6h + nip6_hdr_length; 6704 } 6705 if (bcmp(data, ndata, MIN(lastbyte, nlastbyte) - 6706 firstbyte)) { 6707 /* Overlapping data does not match */ 6708 (void) fragcache_delentry(i, fep, frag); 6709 mutex_exit(&frag->itpf_lock); 6710 ip_drop_packet(first_mp, inbound, NULL, NULL, 6711 DROPPER(ipss, ipds_spd_overlap_frag), 6712 &ipss->ipsec_spd_dropper); 6713 return (NULL); 6714 } 6715 /* Part of defense for jolt2.c fragmentation attack */ 6716 if (firstbyte >= nfirstbyte && lastbyte <= nlastbyte) { 6717 /* 6718 * Check for identical or subset fragments: 6719 * ---------- ~~~~--------~~~~~ 6720 * | nmp | or ~ nmp ~ 6721 * ---------- ~~~~--------~~~~~ 6722 * ---------- ------ 6723 * | mp | | mp | 6724 * ---------- ------ 6725 */ 6726 mutex_exit(&frag->itpf_lock); 6727 ip_drop_packet(first_mp, inbound, NULL, NULL, 6728 DROPPER(ipss, ipds_spd_evil_frag), 6729 &ipss->ipsec_spd_dropper); 6730 return (NULL); 6731 } 6732 6733 } 6734 6735 /* Correct location for this fragment? */ 6736 if (firstbyte <= nfirstbyte) { 6737 /* 6738 * Check if the tail end of the new fragment overlaps 6739 * with the head of the current fragment. 6740 * --------~~~~~~~ 6741 * | nmp ~ 6742 * --------~~~~~~~ 6743 * ~~~~~-------- 6744 * ~ mp | 6745 * ~~~~~-------- 6746 */ 6747 if (lastbyte > nfirstbyte) { 6748 /* Fragments overlap */ 6749 data = (char *)iph + IPH_HDR_LENGTH(iph) + 6750 firstbyte - nfirstbyte; 6751 ndata = (char *)niph + IPH_HDR_LENGTH(niph); 6752 if (is_v4) { 6753 data = (char *)iph + 6754 IPH_HDR_LENGTH(iph) + firstbyte - 6755 nfirstbyte; 6756 ndata = (char *)niph + 6757 IPH_HDR_LENGTH(niph); 6758 } else { 6759 data = (char *)ip6h + 6760 nip6_hdr_length + firstbyte - 6761 nfirstbyte; 6762 ndata = (char *)nip6h + nip6_hdr_length; 6763 } 6764 if (bcmp(data, ndata, MIN(lastbyte, nlastbyte) 6765 - nfirstbyte)) { 6766 /* Overlap mismatch */ 6767 (void) fragcache_delentry(i, fep, frag); 6768 mutex_exit(&frag->itpf_lock); 6769 ip_drop_packet(first_mp, inbound, NULL, 6770 NULL, DROPPER(ipss, 6771 ipds_spd_overlap_frag), 6772 &ipss->ipsec_spd_dropper); 6773 return (NULL); 6774 } 6775 } 6776 6777 /* 6778 * Fragment does not illegally overlap and can now 6779 * be inserted into the chain 6780 */ 6781 break; 6782 } 6783 6784 prevmp = nmp; 6785 } 6786 first_mp->b_next = nmp; 6787 6788 if (prevmp == NULL) { 6789 fep->itpfe_fraglist = first_mp; 6790 } else { 6791 prevmp->b_next = first_mp; 6792 } 6793 if (last) 6794 fep->itpfe_last = 1; 6795 6796 /* Part of defense for jolt2.c fragmentation attack */ 6797 if (++(fep->itpfe_depth) > IPSEC_MAX_FRAGS) { 6798 (void) fragcache_delentry(i, fep, frag); 6799 mutex_exit(&frag->itpf_lock); 6800 ip_drop_packet(first_mp, inbound, NULL, NULL, 6801 DROPPER(ipss, ipds_spd_max_frags), 6802 &ipss->ipsec_spd_dropper); 6803 return (NULL); 6804 } 6805 6806 /* Check for complete packet */ 6807 6808 if (!fep->itpfe_last) { 6809 mutex_exit(&frag->itpf_lock); 6810 #ifdef FRAGCACHE_DEBUG 6811 cmn_err(CE_WARN, "Fragment cached, not last.\n"); 6812 #endif 6813 return (NULL); 6814 } 6815 6816 #ifdef FRAGCACHE_DEBUG 6817 cmn_err(CE_WARN, "Last fragment cached.\n"); 6818 cmn_err(CE_WARN, "mp = %p, first_mp = %p.\n", mp, first_mp); 6819 #endif 6820 6821 offset = 0; 6822 for (mp = fep->itpfe_fraglist; mp; mp = mp->b_next) { 6823 mblk_t *data_mp = (inbound ? mp->b_cont : mp); 6824 int hdr_len; 6825 6826 oiph = (ipha_t *)data_mp->b_rptr; 6827 ip6h = NULL; 6828 iph = NULL; 6829 6830 spare_mp = NULL; 6831 if (IPH_HDR_VERSION(oiph) == IPV4_VERSION) { 6832 hdr_len = ((outer_hdr_len != 0) ? 6833 IPH_HDR_LENGTH(oiph) : 0); 6834 iph = (ipha_t *)(data_mp->b_rptr + hdr_len); 6835 } else { 6836 ASSERT(IPH_HDR_VERSION(oiph) == IPV6_VERSION); 6837 if ((spare_mp = msgpullup(data_mp, -1)) == NULL) { 6838 mutex_exit(&frag->itpf_lock); 6839 ip_drop_packet_chain(mp, inbound, NULL, NULL, 6840 DROPPER(ipss, ipds_spd_nomem), 6841 &ipss->ipsec_spd_dropper); 6842 return (NULL); 6843 } 6844 ip6h = (ip6_t *)spare_mp->b_rptr; 6845 (void) ip_hdr_length_nexthdr_v6(spare_mp, ip6h, 6846 &ip6_hdr_length, &v6_proto_p); 6847 hdr_len = ((outer_hdr_len != 0) ? ip6_hdr_length : 0); 6848 } 6849 6850 /* Calculate current fragment start/end */ 6851 if (is_v4) { 6852 if (iph == NULL) { 6853 /* Was v6 outer */ 6854 iph = (ipha_t *)(data_mp->b_rptr + hdr_len); 6855 } 6856 firstbyte = V4_FRAG_OFFSET(iph); 6857 lastbyte = firstbyte + ntohs(iph->ipha_length) - 6858 IPH_HDR_LENGTH(iph); 6859 } else { 6860 if ((spare_mp == NULL) && 6861 ((spare_mp = msgpullup(data_mp, -1)) == NULL)) { 6862 mutex_exit(&frag->itpf_lock); 6863 ip_drop_packet_chain(mp, inbound, NULL, NULL, 6864 DROPPER(ipss, ipds_spd_nomem), 6865 &ipss->ipsec_spd_dropper); 6866 return (NULL); 6867 } 6868 ip6h = (ip6_t *)(spare_mp->b_rptr + hdr_len); 6869 if (!ip_hdr_length_nexthdr_v6(spare_mp, ip6h, 6870 &ip6_hdr_length, &v6_proto_p)) { 6871 mutex_exit(&frag->itpf_lock); 6872 ip_drop_packet_chain(mp, inbound, NULL, NULL, 6873 DROPPER(ipss, ipds_spd_malformed_frag), 6874 &ipss->ipsec_spd_dropper); 6875 ipsec_freemsg_chain(spare_mp); 6876 return (NULL); 6877 } 6878 v6_proto = *v6_proto_p; 6879 bzero(&ipp, sizeof (ipp)); 6880 (void) ip_find_hdr_v6(spare_mp, ip6h, &ipp, NULL); 6881 fraghdr = ipp.ipp_fraghdr; 6882 firstbyte = ntohs(fraghdr->ip6f_offlg & 6883 IP6F_OFF_MASK); 6884 lastbyte = firstbyte + ntohs(ip6h->ip6_plen) + 6885 sizeof (ip6_t) - ip6_hdr_length; 6886 } 6887 6888 /* 6889 * If this fragment is greater than current offset, 6890 * we have a missing fragment so return NULL 6891 */ 6892 if (firstbyte > offset) { 6893 mutex_exit(&frag->itpf_lock); 6894 #ifdef FRAGCACHE_DEBUG 6895 /* 6896 * Note, this can happen when the last frag 6897 * gets sent through because it is smaller 6898 * than the MTU. It is not necessarily an 6899 * error condition. 6900 */ 6901 cmn_err(CE_WARN, "Frag greater than offset! : " 6902 "missing fragment: firstbyte = %d, offset = %d, " 6903 "mp = %p\n", firstbyte, offset, mp); 6904 #endif 6905 ipsec_freemsg_chain(spare_mp); 6906 return (NULL); 6907 } 6908 6909 /* 6910 * If we are at the last fragment, we have the complete 6911 * packet, so rechain things and return it to caller 6912 * for processing 6913 */ 6914 6915 if ((is_v4 && !V4_MORE_FRAGS(iph)) || 6916 (!is_v4 && !(fraghdr->ip6f_offlg & IP6F_MORE_FRAG))) { 6917 mp = fep->itpfe_fraglist; 6918 fep->itpfe_fraglist = NULL; 6919 (void) fragcache_delentry(i, fep, frag); 6920 mutex_exit(&frag->itpf_lock); 6921 6922 if ((is_v4 && (firstbyte + ntohs(iph->ipha_length) > 6923 65535)) || (!is_v4 && (firstbyte + 6924 ntohs(ip6h->ip6_plen) > 65535))) { 6925 /* It is an invalid "ping-o-death" packet */ 6926 /* Discard it */ 6927 ip_drop_packet_chain(mp, inbound, NULL, NULL, 6928 DROPPER(ipss, ipds_spd_evil_frag), 6929 &ipss->ipsec_spd_dropper); 6930 ipsec_freemsg_chain(spare_mp); 6931 return (NULL); 6932 } 6933 #ifdef FRAGCACHE_DEBUG 6934 cmn_err(CE_WARN, "Fragcache returning mp = %p, " 6935 "mp->b_next = %p", mp, mp->b_next); 6936 #endif 6937 ipsec_freemsg_chain(spare_mp); 6938 /* 6939 * For inbound case, mp has ipsec_in b_next'd chain 6940 * For outbound case, it is just data mp chain 6941 */ 6942 return (mp); 6943 } 6944 ipsec_freemsg_chain(spare_mp); 6945 6946 /* 6947 * Update new ending offset if this 6948 * fragment extends the packet 6949 */ 6950 if (offset < lastbyte) 6951 offset = lastbyte; 6952 } 6953 6954 mutex_exit(&frag->itpf_lock); 6955 6956 /* Didn't find last fragment, so return NULL */ 6957 return (NULL); 6958 } 6959 6960 static void 6961 ipsec_fragcache_clean(ipsec_fragcache_t *frag) 6962 { 6963 ipsec_fragcache_entry_t *fep; 6964 int i; 6965 ipsec_fragcache_entry_t *earlyfep = NULL; 6966 time_t itpf_time; 6967 int earlyexp; 6968 int earlyi = 0; 6969 6970 ASSERT(MUTEX_HELD(&frag->itpf_lock)); 6971 6972 itpf_time = gethrestime_sec(); 6973 earlyexp = itpf_time + 10000; 6974 6975 for (i = 0; i < IPSEC_FRAG_HASH_SLOTS; i++) { 6976 fep = (frag->itpf_ptr)[i]; 6977 while (fep) { 6978 if (fep->itpfe_exp < itpf_time) { 6979 /* found */ 6980 fep = fragcache_delentry(i, fep, frag); 6981 } else { 6982 if (fep->itpfe_exp < earlyexp) { 6983 earlyfep = fep; 6984 earlyexp = fep->itpfe_exp; 6985 earlyi = i; 6986 } 6987 fep = fep->itpfe_next; 6988 } 6989 } 6990 } 6991 6992 frag->itpf_expire_hint = earlyexp; 6993 6994 /* if (!found) */ 6995 if (frag->itpf_freelist == NULL) 6996 (void) fragcache_delentry(earlyi, earlyfep, frag); 6997 } 6998 6999 static ipsec_fragcache_entry_t * 7000 fragcache_delentry(int slot, ipsec_fragcache_entry_t *fep, 7001 ipsec_fragcache_t *frag) 7002 { 7003 ipsec_fragcache_entry_t *targp; 7004 ipsec_fragcache_entry_t *nextp = fep->itpfe_next; 7005 7006 ASSERT(MUTEX_HELD(&frag->itpf_lock)); 7007 7008 /* Free up any fragment list still in cache entry */ 7009 ipsec_freemsg_chain(fep->itpfe_fraglist); 7010 7011 targp = (frag->itpf_ptr)[slot]; 7012 ASSERT(targp != 0); 7013 7014 if (targp == fep) { 7015 /* unlink from head of hash chain */ 7016 (frag->itpf_ptr)[slot] = nextp; 7017 /* link into free list */ 7018 fep->itpfe_next = frag->itpf_freelist; 7019 frag->itpf_freelist = fep; 7020 return (nextp); 7021 } 7022 7023 /* maybe should use double linked list to make update faster */ 7024 /* must be past front of chain */ 7025 while (targp) { 7026 if (targp->itpfe_next == fep) { 7027 /* unlink from hash chain */ 7028 targp->itpfe_next = nextp; 7029 /* link into free list */ 7030 fep->itpfe_next = frag->itpf_freelist; 7031 frag->itpf_freelist = fep; 7032 return (nextp); 7033 } 7034 targp = targp->itpfe_next; 7035 ASSERT(targp != 0); 7036 } 7037 /* NOTREACHED */ 7038 return (NULL); 7039 } 7040