1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * IPsec Security Policy Database. 30 * 31 * This module maintains the SPD and provides routines used by ip and ip6 32 * to apply IPsec policy to inbound and outbound datagrams. 33 */ 34 35 #include <sys/types.h> 36 #include <sys/stream.h> 37 #include <sys/stropts.h> 38 #include <sys/sysmacros.h> 39 #include <sys/strsubr.h> 40 #include <sys/strlog.h> 41 #include <sys/cmn_err.h> 42 #include <sys/zone.h> 43 44 #include <sys/systm.h> 45 #include <sys/param.h> 46 #include <sys/kmem.h> 47 #include <sys/ddi.h> 48 49 #include <sys/crypto/api.h> 50 51 #include <inet/common.h> 52 #include <inet/mi.h> 53 54 #include <netinet/ip6.h> 55 #include <netinet/icmp6.h> 56 #include <netinet/udp.h> 57 58 #include <inet/ip.h> 59 #include <inet/ip6.h> 60 61 #include <net/pfkeyv2.h> 62 #include <net/pfpolicy.h> 63 #include <inet/ipsec_info.h> 64 #include <inet/sadb.h> 65 #include <inet/ipsec_impl.h> 66 67 #include <inet/ip_impl.h> /* For IP_MOD_ID */ 68 69 #include <inet/ipsecah.h> 70 #include <inet/ipsecesp.h> 71 #include <inet/ipdrop.h> 72 #include <inet/ipclassifier.h> 73 #include <inet/tun.h> 74 75 static void ipsec_update_present_flags(ipsec_stack_t *); 76 static ipsec_act_t *ipsec_act_wildcard_expand(ipsec_act_t *, uint_t *, 77 netstack_t *); 78 static void ipsec_out_free(void *); 79 static void ipsec_in_free(void *); 80 static mblk_t *ipsec_attach_global_policy(mblk_t **, conn_t *, 81 ipsec_selector_t *, netstack_t *); 82 static mblk_t *ipsec_apply_global_policy(mblk_t *, conn_t *, 83 ipsec_selector_t *, netstack_t *); 84 static mblk_t *ipsec_check_ipsecin_policy(mblk_t *, ipsec_policy_t *, 85 ipha_t *, ip6_t *, uint64_t, netstack_t *); 86 static void ipsec_in_release_refs(ipsec_in_t *); 87 static void ipsec_out_release_refs(ipsec_out_t *); 88 static void ipsec_action_free_table(ipsec_action_t *); 89 static void ipsec_action_reclaim(void *); 90 static void ipsec_action_reclaim_stack(netstack_t *); 91 static void ipsid_init(netstack_t *); 92 static void ipsid_fini(netstack_t *); 93 94 /* sel_flags values for ipsec_init_inbound_sel(). */ 95 #define SEL_NONE 0x0000 96 #define SEL_PORT_POLICY 0x0001 97 #define SEL_IS_ICMP 0x0002 98 #define SEL_TUNNEL_MODE 0x0004 99 100 /* Return values for ipsec_init_inbound_sel(). */ 101 typedef enum { SELRET_NOMEM, SELRET_BADPKT, SELRET_SUCCESS, SELRET_TUNFRAG} 102 selret_t; 103 104 static selret_t ipsec_init_inbound_sel(ipsec_selector_t *, mblk_t *, 105 ipha_t *, ip6_t *, uint8_t); 106 107 static boolean_t ipsec_check_ipsecin_action(struct ipsec_in_s *, mblk_t *, 108 struct ipsec_action_s *, ipha_t *ipha, ip6_t *ip6h, const char **, 109 kstat_named_t **); 110 static void ipsec_unregister_prov_update(void); 111 static void ipsec_prov_update_callback_stack(uint32_t, void *, netstack_t *); 112 static boolean_t ipsec_compare_action(ipsec_policy_t *, ipsec_policy_t *); 113 static uint32_t selector_hash(ipsec_selector_t *, ipsec_policy_root_t *); 114 static boolean_t ipsec_kstat_init(ipsec_stack_t *); 115 static void ipsec_kstat_destroy(ipsec_stack_t *); 116 static int ipsec_free_tables(ipsec_stack_t *); 117 static int tunnel_compare(const void *, const void *); 118 static void ipsec_freemsg_chain(mblk_t *); 119 static void ip_drop_packet_chain(mblk_t *, boolean_t, ill_t *, ire_t *, 120 struct kstat_named *, ipdropper_t *); 121 static boolean_t ipsec_kstat_init(ipsec_stack_t *); 122 static void ipsec_kstat_destroy(ipsec_stack_t *); 123 static int ipsec_free_tables(ipsec_stack_t *); 124 static int tunnel_compare(const void *, const void *); 125 static void ipsec_freemsg_chain(mblk_t *); 126 static void ip_drop_packet_chain(mblk_t *, boolean_t, ill_t *, ire_t *, 127 struct kstat_named *, ipdropper_t *); 128 129 /* 130 * Selector hash table is statically sized at module load time. 131 * we default to 251 buckets, which is the largest prime number under 255 132 */ 133 134 #define IPSEC_SPDHASH_DEFAULT 251 135 136 /* SPD hash-size tunable per tunnel. */ 137 #define TUN_SPDHASH_DEFAULT 5 138 139 uint32_t ipsec_spd_hashsize; 140 uint32_t tun_spd_hashsize; 141 142 #define IPSEC_SEL_NOHASH ((uint32_t)(~0)) 143 144 /* 145 * Handle global across all stack instances 146 */ 147 static crypto_notify_handle_t prov_update_handle = NULL; 148 149 static kmem_cache_t *ipsec_action_cache; 150 static kmem_cache_t *ipsec_sel_cache; 151 static kmem_cache_t *ipsec_pol_cache; 152 static kmem_cache_t *ipsec_info_cache; 153 154 /* Frag cache prototypes */ 155 static void ipsec_fragcache_clean(ipsec_fragcache_t *); 156 static ipsec_fragcache_entry_t *fragcache_delentry(int, 157 ipsec_fragcache_entry_t *, ipsec_fragcache_t *); 158 boolean_t ipsec_fragcache_init(ipsec_fragcache_t *); 159 void ipsec_fragcache_uninit(ipsec_fragcache_t *); 160 mblk_t *ipsec_fragcache_add(ipsec_fragcache_t *, mblk_t *, mblk_t *, int, 161 ipsec_stack_t *); 162 163 int ipsec_hdr_pullup_needed = 0; 164 int ipsec_weird_null_inbound_policy = 0; 165 166 #define ALGBITS_ROUND_DOWN(x, align) (((x)/(align))*(align)) 167 #define ALGBITS_ROUND_UP(x, align) ALGBITS_ROUND_DOWN((x)+(align)-1, align) 168 169 /* 170 * Inbound traffic should have matching identities for both SA's. 171 */ 172 173 #define SA_IDS_MATCH(sa1, sa2) \ 174 (((sa1) == NULL) || ((sa2) == NULL) || \ 175 (((sa1)->ipsa_src_cid == (sa2)->ipsa_src_cid) && \ 176 (((sa1)->ipsa_dst_cid == (sa2)->ipsa_dst_cid)))) 177 178 /* 179 * IPv4 Fragments 180 */ 181 #define IS_V4_FRAGMENT(ipha_fragment_offset_and_flags) \ 182 (((ntohs(ipha_fragment_offset_and_flags) & IPH_OFFSET) != 0) || \ 183 ((ntohs(ipha_fragment_offset_and_flags) & IPH_MF) != 0)) 184 185 /* 186 * IPv6 Fragments 187 */ 188 #define IS_V6_FRAGMENT(ipp) (ipp.ipp_fields & IPPF_FRAGHDR) 189 190 /* 191 * Policy failure messages. 192 */ 193 static char *ipsec_policy_failure_msgs[] = { 194 195 /* IPSEC_POLICY_NOT_NEEDED */ 196 "%s: Dropping the datagram because the incoming packet " 197 "is %s, but the recipient expects clear; Source %s, " 198 "Destination %s.\n", 199 200 /* IPSEC_POLICY_MISMATCH */ 201 "%s: Policy Failure for the incoming packet (%s); Source %s, " 202 "Destination %s.\n", 203 204 /* IPSEC_POLICY_AUTH_NOT_NEEDED */ 205 "%s: Authentication present while not expected in the " 206 "incoming %s packet; Source %s, Destination %s.\n", 207 208 /* IPSEC_POLICY_ENCR_NOT_NEEDED */ 209 "%s: Encryption present while not expected in the " 210 "incoming %s packet; Source %s, Destination %s.\n", 211 212 /* IPSEC_POLICY_SE_NOT_NEEDED */ 213 "%s: Self-Encapsulation present while not expected in the " 214 "incoming %s packet; Source %s, Destination %s.\n", 215 }; 216 217 /* 218 * General overviews: 219 * 220 * Locking: 221 * 222 * All of the system policy structures are protected by a single 223 * rwlock. These structures are threaded in a 224 * fairly complex fashion and are not expected to change on a 225 * regular basis, so this should not cause scaling/contention 226 * problems. As a result, policy checks should (hopefully) be MT-hot. 227 * 228 * Allocation policy: 229 * 230 * We use custom kmem cache types for the various 231 * bits & pieces of the policy data structures. All allocations 232 * use KM_NOSLEEP instead of KM_SLEEP for policy allocation. The 233 * policy table is of potentially unbounded size, so we don't 234 * want to provide a way to hog all system memory with policy 235 * entries.. 236 */ 237 238 /* Convenient functions for freeing or dropping a b_next linked mblk chain */ 239 240 /* Free all messages in an mblk chain */ 241 static void 242 ipsec_freemsg_chain(mblk_t *mp) 243 { 244 mblk_t *mpnext; 245 while (mp != NULL) { 246 ASSERT(mp->b_prev == NULL); 247 mpnext = mp->b_next; 248 mp->b_next = NULL; 249 freemsg(mp); /* Always works, even if NULL */ 250 mp = mpnext; 251 } 252 } 253 254 /* ip_drop all messages in an mblk chain */ 255 static void 256 ip_drop_packet_chain(mblk_t *mp, boolean_t inbound, ill_t *arriving, 257 ire_t *outbound_ire, struct kstat_named *counter, ipdropper_t *who_called) 258 { 259 mblk_t *mpnext; 260 while (mp != NULL) { 261 ASSERT(mp->b_prev == NULL); 262 mpnext = mp->b_next; 263 mp->b_next = NULL; 264 ip_drop_packet(mp, inbound, arriving, outbound_ire, counter, 265 who_called); 266 mp = mpnext; 267 } 268 } 269 270 /* 271 * AVL tree comparison function. 272 * the in-kernel avl assumes unique keys for all objects. 273 * Since sometimes policy will duplicate rules, we may insert 274 * multiple rules with the same rule id, so we need a tie-breaker. 275 */ 276 static int 277 ipsec_policy_cmpbyid(const void *a, const void *b) 278 { 279 const ipsec_policy_t *ipa, *ipb; 280 uint64_t idxa, idxb; 281 282 ipa = (const ipsec_policy_t *)a; 283 ipb = (const ipsec_policy_t *)b; 284 idxa = ipa->ipsp_index; 285 idxb = ipb->ipsp_index; 286 287 if (idxa < idxb) 288 return (-1); 289 if (idxa > idxb) 290 return (1); 291 /* 292 * Tie-breaker #1: All installed policy rules have a non-NULL 293 * ipsl_sel (selector set), so an entry with a NULL ipsp_sel is not 294 * actually in-tree but rather a template node being used in 295 * an avl_find query; see ipsec_policy_delete(). This gives us 296 * a placeholder in the ordering just before the the first entry with 297 * a key >= the one we're looking for, so we can walk forward from 298 * that point to get the remaining entries with the same id. 299 */ 300 if ((ipa->ipsp_sel == NULL) && (ipb->ipsp_sel != NULL)) 301 return (-1); 302 if ((ipb->ipsp_sel == NULL) && (ipa->ipsp_sel != NULL)) 303 return (1); 304 /* 305 * At most one of the arguments to the comparison should have a 306 * NULL selector pointer; if not, the tree is broken. 307 */ 308 ASSERT(ipa->ipsp_sel != NULL); 309 ASSERT(ipb->ipsp_sel != NULL); 310 /* 311 * Tie-breaker #2: use the virtual address of the policy node 312 * to arbitrarily break ties. Since we use the new tree node in 313 * the avl_find() in ipsec_insert_always, the new node will be 314 * inserted into the tree in the right place in the sequence. 315 */ 316 if (ipa < ipb) 317 return (-1); 318 if (ipa > ipb) 319 return (1); 320 return (0); 321 } 322 323 /* 324 * Free what ipsec_alloc_table allocated. 325 */ 326 void 327 ipsec_polhead_free_table(ipsec_policy_head_t *iph) 328 { 329 int dir; 330 int i; 331 332 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 333 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 334 335 if (ipr->ipr_hash == NULL) 336 continue; 337 338 for (i = 0; i < ipr->ipr_nchains; i++) { 339 ASSERT(ipr->ipr_hash[i].hash_head == NULL); 340 } 341 kmem_free(ipr->ipr_hash, ipr->ipr_nchains * 342 sizeof (ipsec_policy_hash_t)); 343 ipr->ipr_hash = NULL; 344 } 345 } 346 347 void 348 ipsec_polhead_destroy(ipsec_policy_head_t *iph) 349 { 350 int dir; 351 352 avl_destroy(&iph->iph_rulebyid); 353 rw_destroy(&iph->iph_lock); 354 355 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 356 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 357 int chain; 358 359 for (chain = 0; chain < ipr->ipr_nchains; chain++) 360 mutex_destroy(&(ipr->ipr_hash[chain].hash_lock)); 361 362 } 363 ipsec_polhead_free_table(iph); 364 } 365 366 /* 367 * Free the IPsec stack instance. 368 */ 369 /* ARGSUSED */ 370 static void 371 ipsec_stack_fini(netstackid_t stackid, void *arg) 372 { 373 ipsec_stack_t *ipss = (ipsec_stack_t *)arg; 374 void *cookie; 375 ipsec_tun_pol_t *node; 376 netstack_t *ns = ipss->ipsec_netstack; 377 int i; 378 ipsec_algtype_t algtype; 379 380 ipsec_loader_destroy(ipss); 381 382 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_WRITER); 383 /* 384 * It's possible we can just ASSERT() the tree is empty. After all, 385 * we aren't called until IP is ready to unload (and presumably all 386 * tunnels have been unplumbed). But we'll play it safe for now, the 387 * loop will just exit immediately if it's empty. 388 */ 389 cookie = NULL; 390 while ((node = (ipsec_tun_pol_t *) 391 avl_destroy_nodes(&ipss->ipsec_tunnel_policies, 392 &cookie)) != NULL) { 393 ITP_REFRELE(node, ns); 394 } 395 avl_destroy(&ipss->ipsec_tunnel_policies); 396 rw_exit(&ipss->ipsec_tunnel_policy_lock); 397 rw_destroy(&ipss->ipsec_tunnel_policy_lock); 398 399 ipsec_config_flush(ns); 400 401 ipsec_kstat_destroy(ipss); 402 403 ip_drop_unregister(&ipss->ipsec_dropper); 404 405 ip_drop_unregister(&ipss->ipsec_spd_dropper); 406 ip_drop_destroy(ipss); 407 /* 408 * Globals start with ref == 1 to prevent IPPH_REFRELE() from 409 * attempting to free them, hence they should have 1 now. 410 */ 411 ipsec_polhead_destroy(&ipss->ipsec_system_policy); 412 ASSERT(ipss->ipsec_system_policy.iph_refs == 1); 413 ipsec_polhead_destroy(&ipss->ipsec_inactive_policy); 414 ASSERT(ipss->ipsec_inactive_policy.iph_refs == 1); 415 416 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) { 417 ipsec_action_free_table(ipss->ipsec_action_hash[i].hash_head); 418 ipss->ipsec_action_hash[i].hash_head = NULL; 419 mutex_destroy(&(ipss->ipsec_action_hash[i].hash_lock)); 420 } 421 422 for (i = 0; i < ipss->ipsec_spd_hashsize; i++) { 423 ASSERT(ipss->ipsec_sel_hash[i].hash_head == NULL); 424 mutex_destroy(&(ipss->ipsec_sel_hash[i].hash_lock)); 425 } 426 427 mutex_enter(&ipss->ipsec_alg_lock); 428 for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype ++) { 429 int nalgs = ipss->ipsec_nalgs[algtype]; 430 431 for (i = 0; i < nalgs; i++) { 432 if (ipss->ipsec_alglists[algtype][i] != NULL) 433 ipsec_alg_unreg(algtype, i, ns); 434 } 435 } 436 mutex_exit(&ipss->ipsec_alg_lock); 437 mutex_destroy(&ipss->ipsec_alg_lock); 438 439 ipsid_gc(ns); 440 ipsid_fini(ns); 441 442 (void) ipsec_free_tables(ipss); 443 kmem_free(ipss, sizeof (*ipss)); 444 } 445 446 void 447 ipsec_policy_g_destroy(void) 448 { 449 kmem_cache_destroy(ipsec_action_cache); 450 kmem_cache_destroy(ipsec_sel_cache); 451 kmem_cache_destroy(ipsec_pol_cache); 452 kmem_cache_destroy(ipsec_info_cache); 453 454 ipsec_unregister_prov_update(); 455 456 netstack_unregister(NS_IPSEC); 457 } 458 459 460 /* 461 * Free what ipsec_alloc_tables allocated. 462 * Called when table allocation fails to free the table. 463 */ 464 static int 465 ipsec_free_tables(ipsec_stack_t *ipss) 466 { 467 int i; 468 469 if (ipss->ipsec_sel_hash != NULL) { 470 for (i = 0; i < ipss->ipsec_spd_hashsize; i++) { 471 ASSERT(ipss->ipsec_sel_hash[i].hash_head == NULL); 472 } 473 kmem_free(ipss->ipsec_sel_hash, ipss->ipsec_spd_hashsize * 474 sizeof (*ipss->ipsec_sel_hash)); 475 ipss->ipsec_sel_hash = NULL; 476 ipss->ipsec_spd_hashsize = 0; 477 } 478 ipsec_polhead_free_table(&ipss->ipsec_system_policy); 479 ipsec_polhead_free_table(&ipss->ipsec_inactive_policy); 480 481 return (ENOMEM); 482 } 483 484 /* 485 * Attempt to allocate the tables in a single policy head. 486 * Return nonzero on failure after cleaning up any work in progress. 487 */ 488 int 489 ipsec_alloc_table(ipsec_policy_head_t *iph, int nchains, int kmflag, 490 boolean_t global_cleanup, netstack_t *ns) 491 { 492 int dir; 493 494 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 495 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 496 497 ipr->ipr_nchains = nchains; 498 ipr->ipr_hash = kmem_zalloc(nchains * 499 sizeof (ipsec_policy_hash_t), kmflag); 500 if (ipr->ipr_hash == NULL) 501 return (global_cleanup ? 502 ipsec_free_tables(ns->netstack_ipsec) : 503 ENOMEM); 504 } 505 return (0); 506 } 507 508 /* 509 * Attempt to allocate the various tables. Return nonzero on failure 510 * after cleaning up any work in progress. 511 */ 512 static int 513 ipsec_alloc_tables(int kmflag, netstack_t *ns) 514 { 515 int error; 516 ipsec_stack_t *ipss = ns->netstack_ipsec; 517 518 error = ipsec_alloc_table(&ipss->ipsec_system_policy, 519 ipss->ipsec_spd_hashsize, kmflag, B_TRUE, ns); 520 if (error != 0) 521 return (error); 522 523 error = ipsec_alloc_table(&ipss->ipsec_inactive_policy, 524 ipss->ipsec_spd_hashsize, kmflag, B_TRUE, ns); 525 if (error != 0) 526 return (error); 527 528 ipss->ipsec_sel_hash = kmem_zalloc(ipss->ipsec_spd_hashsize * 529 sizeof (*ipss->ipsec_sel_hash), kmflag); 530 531 if (ipss->ipsec_sel_hash == NULL) 532 return (ipsec_free_tables(ipss)); 533 534 return (0); 535 } 536 537 /* 538 * After table allocation, initialize a policy head. 539 */ 540 void 541 ipsec_polhead_init(ipsec_policy_head_t *iph, int nchains) 542 { 543 int dir, chain; 544 545 rw_init(&iph->iph_lock, NULL, RW_DEFAULT, NULL); 546 avl_create(&iph->iph_rulebyid, ipsec_policy_cmpbyid, 547 sizeof (ipsec_policy_t), offsetof(ipsec_policy_t, ipsp_byid)); 548 549 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 550 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 551 ipr->ipr_nchains = nchains; 552 553 for (chain = 0; chain < nchains; chain++) { 554 mutex_init(&(ipr->ipr_hash[chain].hash_lock), 555 NULL, MUTEX_DEFAULT, NULL); 556 } 557 } 558 } 559 560 static boolean_t 561 ipsec_kstat_init(ipsec_stack_t *ipss) 562 { 563 ipss->ipsec_ksp = kstat_create_netstack("ip", 0, "ipsec_stat", "net", 564 KSTAT_TYPE_NAMED, sizeof (ipsec_kstats_t) / sizeof (kstat_named_t), 565 KSTAT_FLAG_PERSISTENT, ipss->ipsec_netstack->netstack_stackid); 566 567 if (ipss->ipsec_ksp == NULL || ipss->ipsec_ksp->ks_data == NULL) 568 return (B_FALSE); 569 570 ipss->ipsec_kstats = ipss->ipsec_ksp->ks_data; 571 572 #define KI(x) kstat_named_init(&ipss->ipsec_kstats->x, #x, KSTAT_DATA_UINT64) 573 KI(esp_stat_in_requests); 574 KI(esp_stat_in_discards); 575 KI(esp_stat_lookup_failure); 576 KI(ah_stat_in_requests); 577 KI(ah_stat_in_discards); 578 KI(ah_stat_lookup_failure); 579 KI(sadb_acquire_maxpackets); 580 KI(sadb_acquire_qhiwater); 581 #undef KI 582 583 kstat_install(ipss->ipsec_ksp); 584 return (B_TRUE); 585 } 586 587 static void 588 ipsec_kstat_destroy(ipsec_stack_t *ipss) 589 { 590 kstat_delete_netstack(ipss->ipsec_ksp, 591 ipss->ipsec_netstack->netstack_stackid); 592 ipss->ipsec_kstats = NULL; 593 594 } 595 596 /* 597 * Initialize the IPsec stack instance. 598 */ 599 /* ARGSUSED */ 600 static void * 601 ipsec_stack_init(netstackid_t stackid, netstack_t *ns) 602 { 603 ipsec_stack_t *ipss; 604 int i; 605 606 ipss = (ipsec_stack_t *)kmem_zalloc(sizeof (*ipss), KM_SLEEP); 607 ipss->ipsec_netstack = ns; 608 609 /* 610 * FIXME: netstack_ipsec is used by some of the routines we call 611 * below, but it isn't set until this routine returns. 612 * Either we introduce optional xxx_stack_alloc() functions 613 * that will be called by the netstack framework before xxx_stack_init, 614 * or we switch spd.c and sadb.c to operate on ipsec_stack_t 615 * (latter has some include file order issues for sadb.h, but makes 616 * sense if we merge some of the ipsec related stack_t's together. 617 */ 618 ns->netstack_ipsec = ipss; 619 620 /* 621 * Make two attempts to allocate policy hash tables; try it at 622 * the "preferred" size (may be set in /etc/system) first, 623 * then fall back to the default size. 624 */ 625 ipss->ipsec_spd_hashsize = (ipsec_spd_hashsize == 0) ? 626 IPSEC_SPDHASH_DEFAULT : ipsec_spd_hashsize; 627 628 if (ipsec_alloc_tables(KM_NOSLEEP, ns) != 0) { 629 cmn_err(CE_WARN, 630 "Unable to allocate %d entry IPsec policy hash table", 631 ipss->ipsec_spd_hashsize); 632 ipss->ipsec_spd_hashsize = IPSEC_SPDHASH_DEFAULT; 633 cmn_err(CE_WARN, "Falling back to %d entries", 634 ipss->ipsec_spd_hashsize); 635 (void) ipsec_alloc_tables(KM_SLEEP, ns); 636 } 637 638 /* Just set a default for tunnels. */ 639 ipss->ipsec_tun_spd_hashsize = (tun_spd_hashsize == 0) ? 640 TUN_SPDHASH_DEFAULT : tun_spd_hashsize; 641 642 ipsid_init(ns); 643 /* 644 * Globals need ref == 1 to prevent IPPH_REFRELE() from attempting 645 * to free them. 646 */ 647 ipss->ipsec_system_policy.iph_refs = 1; 648 ipss->ipsec_inactive_policy.iph_refs = 1; 649 ipsec_polhead_init(&ipss->ipsec_system_policy, 650 ipss->ipsec_spd_hashsize); 651 ipsec_polhead_init(&ipss->ipsec_inactive_policy, 652 ipss->ipsec_spd_hashsize); 653 rw_init(&ipss->ipsec_tunnel_policy_lock, NULL, RW_DEFAULT, NULL); 654 avl_create(&ipss->ipsec_tunnel_policies, tunnel_compare, 655 sizeof (ipsec_tun_pol_t), 0); 656 657 ipss->ipsec_next_policy_index = 1; 658 659 rw_init(&ipss->ipsec_system_policy.iph_lock, NULL, RW_DEFAULT, NULL); 660 rw_init(&ipss->ipsec_inactive_policy.iph_lock, NULL, RW_DEFAULT, NULL); 661 662 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) 663 mutex_init(&(ipss->ipsec_action_hash[i].hash_lock), 664 NULL, MUTEX_DEFAULT, NULL); 665 666 for (i = 0; i < ipss->ipsec_spd_hashsize; i++) 667 mutex_init(&(ipss->ipsec_sel_hash[i].hash_lock), 668 NULL, MUTEX_DEFAULT, NULL); 669 670 mutex_init(&ipss->ipsec_alg_lock, NULL, MUTEX_DEFAULT, NULL); 671 for (i = 0; i < IPSEC_NALGTYPES; i++) { 672 ipss->ipsec_nalgs[i] = 0; 673 } 674 675 ip_drop_init(ipss); 676 ip_drop_register(&ipss->ipsec_spd_dropper, "IPsec SPD"); 677 678 /* Set function to dummy until tun is loaded */ 679 rw_init(&ipss->ipsec_itp_get_byaddr_rw_lock, NULL, RW_DEFAULT, NULL); 680 rw_enter(&ipss->ipsec_itp_get_byaddr_rw_lock, RW_WRITER); 681 ipss->ipsec_itp_get_byaddr = itp_get_byaddr_dummy; 682 rw_exit(&ipss->ipsec_itp_get_byaddr_rw_lock); 683 684 /* IP's IPsec code calls the packet dropper */ 685 ip_drop_register(&ipss->ipsec_dropper, "IP IPsec processing"); 686 687 (void) ipsec_kstat_init(ipss); 688 689 ipsec_loader_init(ipss); 690 ipsec_loader_start(ipss); 691 692 return (ipss); 693 } 694 695 /* Global across all stack instances */ 696 void 697 ipsec_policy_g_init(void) 698 { 699 ipsec_action_cache = kmem_cache_create("ipsec_actions", 700 sizeof (ipsec_action_t), _POINTER_ALIGNMENT, NULL, NULL, 701 ipsec_action_reclaim, NULL, NULL, 0); 702 ipsec_sel_cache = kmem_cache_create("ipsec_selectors", 703 sizeof (ipsec_sel_t), _POINTER_ALIGNMENT, NULL, NULL, 704 NULL, NULL, NULL, 0); 705 ipsec_pol_cache = kmem_cache_create("ipsec_policy", 706 sizeof (ipsec_policy_t), _POINTER_ALIGNMENT, NULL, NULL, 707 NULL, NULL, NULL, 0); 708 ipsec_info_cache = kmem_cache_create("ipsec_info", 709 sizeof (ipsec_info_t), _POINTER_ALIGNMENT, NULL, NULL, 710 NULL, NULL, NULL, 0); 711 712 /* 713 * We want to be informed each time a stack is created or 714 * destroyed in the kernel, so we can maintain the 715 * set of ipsec_stack_t's. 716 */ 717 netstack_register(NS_IPSEC, ipsec_stack_init, NULL, ipsec_stack_fini); 718 } 719 720 /* 721 * Sort algorithm lists. 722 * 723 * I may need to split this based on 724 * authentication/encryption, and I may wish to have an administrator 725 * configure this list. Hold on to some NDD variables... 726 * 727 * XXX For now, sort on minimum key size (GAG!). While minimum key size is 728 * not the ideal metric, it's the only quantifiable measure available. 729 * We need a better metric for sorting algorithms by preference. 730 */ 731 static void 732 alg_insert_sortlist(enum ipsec_algtype at, uint8_t algid, netstack_t *ns) 733 { 734 ipsec_stack_t *ipss = ns->netstack_ipsec; 735 ipsec_alginfo_t *ai = ipss->ipsec_alglists[at][algid]; 736 uint8_t holder, swap; 737 uint_t i; 738 uint_t count = ipss->ipsec_nalgs[at]; 739 ASSERT(ai != NULL); 740 ASSERT(algid == ai->alg_id); 741 742 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 743 744 holder = algid; 745 746 for (i = 0; i < count - 1; i++) { 747 ipsec_alginfo_t *alt; 748 749 alt = ipss->ipsec_alglists[at][ipss->ipsec_sortlist[at][i]]; 750 /* 751 * If you want to give precedence to newly added algs, 752 * add the = in the > comparison. 753 */ 754 if ((holder != algid) || (ai->alg_minbits > alt->alg_minbits)) { 755 /* Swap sortlist[i] and holder. */ 756 swap = ipss->ipsec_sortlist[at][i]; 757 ipss->ipsec_sortlist[at][i] = holder; 758 holder = swap; 759 ai = alt; 760 } /* Else just continue. */ 761 } 762 763 /* Store holder in last slot. */ 764 ipss->ipsec_sortlist[at][i] = holder; 765 } 766 767 /* 768 * Remove an algorithm from a sorted algorithm list. 769 * This should be considerably easier, even with complex sorting. 770 */ 771 static void 772 alg_remove_sortlist(enum ipsec_algtype at, uint8_t algid, netstack_t *ns) 773 { 774 boolean_t copyback = B_FALSE; 775 int i; 776 ipsec_stack_t *ipss = ns->netstack_ipsec; 777 int newcount = ipss->ipsec_nalgs[at]; 778 779 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 780 781 for (i = 0; i <= newcount; i++) { 782 if (copyback) { 783 ipss->ipsec_sortlist[at][i-1] = 784 ipss->ipsec_sortlist[at][i]; 785 } else if (ipss->ipsec_sortlist[at][i] == algid) { 786 copyback = B_TRUE; 787 } 788 } 789 } 790 791 /* 792 * Add the specified algorithm to the algorithm tables. 793 * Must be called while holding the algorithm table writer lock. 794 */ 795 void 796 ipsec_alg_reg(ipsec_algtype_t algtype, ipsec_alginfo_t *alg, netstack_t *ns) 797 { 798 ipsec_stack_t *ipss = ns->netstack_ipsec; 799 800 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 801 802 ASSERT(ipss->ipsec_alglists[algtype][alg->alg_id] == NULL); 803 ipsec_alg_fix_min_max(alg, algtype, ns); 804 ipss->ipsec_alglists[algtype][alg->alg_id] = alg; 805 806 ipss->ipsec_nalgs[algtype]++; 807 alg_insert_sortlist(algtype, alg->alg_id, ns); 808 } 809 810 /* 811 * Remove the specified algorithm from the algorithm tables. 812 * Must be called while holding the algorithm table writer lock. 813 */ 814 void 815 ipsec_alg_unreg(ipsec_algtype_t algtype, uint8_t algid, netstack_t *ns) 816 { 817 ipsec_stack_t *ipss = ns->netstack_ipsec; 818 819 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 820 821 ASSERT(ipss->ipsec_alglists[algtype][algid] != NULL); 822 ipsec_alg_free(ipss->ipsec_alglists[algtype][algid]); 823 ipss->ipsec_alglists[algtype][algid] = NULL; 824 825 ipss->ipsec_nalgs[algtype]--; 826 alg_remove_sortlist(algtype, algid, ns); 827 } 828 829 /* 830 * Hooks for spdsock to get a grip on system policy. 831 */ 832 833 ipsec_policy_head_t * 834 ipsec_system_policy(netstack_t *ns) 835 { 836 ipsec_stack_t *ipss = ns->netstack_ipsec; 837 ipsec_policy_head_t *h = &ipss->ipsec_system_policy; 838 839 IPPH_REFHOLD(h); 840 return (h); 841 } 842 843 ipsec_policy_head_t * 844 ipsec_inactive_policy(netstack_t *ns) 845 { 846 ipsec_stack_t *ipss = ns->netstack_ipsec; 847 ipsec_policy_head_t *h = &ipss->ipsec_inactive_policy; 848 849 IPPH_REFHOLD(h); 850 return (h); 851 } 852 853 /* 854 * Lock inactive policy, then active policy, then exchange policy root 855 * pointers. 856 */ 857 void 858 ipsec_swap_policy(ipsec_policy_head_t *active, ipsec_policy_head_t *inactive, 859 netstack_t *ns) 860 { 861 int af, dir; 862 avl_tree_t r1, r2; 863 864 rw_enter(&inactive->iph_lock, RW_WRITER); 865 rw_enter(&active->iph_lock, RW_WRITER); 866 867 r1 = active->iph_rulebyid; 868 r2 = inactive->iph_rulebyid; 869 active->iph_rulebyid = r2; 870 inactive->iph_rulebyid = r1; 871 872 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 873 ipsec_policy_hash_t *h1, *h2; 874 875 h1 = active->iph_root[dir].ipr_hash; 876 h2 = inactive->iph_root[dir].ipr_hash; 877 active->iph_root[dir].ipr_hash = h2; 878 inactive->iph_root[dir].ipr_hash = h1; 879 880 for (af = 0; af < IPSEC_NAF; af++) { 881 ipsec_policy_t *t1, *t2; 882 883 t1 = active->iph_root[dir].ipr_nonhash[af]; 884 t2 = inactive->iph_root[dir].ipr_nonhash[af]; 885 active->iph_root[dir].ipr_nonhash[af] = t2; 886 inactive->iph_root[dir].ipr_nonhash[af] = t1; 887 if (t1 != NULL) { 888 t1->ipsp_hash.hash_pp = 889 &(inactive->iph_root[dir].ipr_nonhash[af]); 890 } 891 if (t2 != NULL) { 892 t2->ipsp_hash.hash_pp = 893 &(active->iph_root[dir].ipr_nonhash[af]); 894 } 895 896 } 897 } 898 active->iph_gen++; 899 inactive->iph_gen++; 900 ipsec_update_present_flags(ns->netstack_ipsec); 901 rw_exit(&active->iph_lock); 902 rw_exit(&inactive->iph_lock); 903 } 904 905 /* 906 * Swap global policy primary/secondary. 907 */ 908 void 909 ipsec_swap_global_policy(netstack_t *ns) 910 { 911 ipsec_stack_t *ipss = ns->netstack_ipsec; 912 913 ipsec_swap_policy(&ipss->ipsec_system_policy, 914 &ipss->ipsec_inactive_policy, ns); 915 } 916 917 /* 918 * Clone one policy rule.. 919 */ 920 static ipsec_policy_t * 921 ipsec_copy_policy(const ipsec_policy_t *src) 922 { 923 ipsec_policy_t *dst = kmem_cache_alloc(ipsec_pol_cache, KM_NOSLEEP); 924 925 if (dst == NULL) 926 return (NULL); 927 928 /* 929 * Adjust refcounts of cloned state. 930 */ 931 IPACT_REFHOLD(src->ipsp_act); 932 src->ipsp_sel->ipsl_refs++; 933 934 HASH_NULL(dst, ipsp_hash); 935 dst->ipsp_refs = 1; 936 dst->ipsp_sel = src->ipsp_sel; 937 dst->ipsp_act = src->ipsp_act; 938 dst->ipsp_prio = src->ipsp_prio; 939 dst->ipsp_index = src->ipsp_index; 940 941 return (dst); 942 } 943 944 void 945 ipsec_insert_always(avl_tree_t *tree, void *new_node) 946 { 947 void *node; 948 avl_index_t where; 949 950 node = avl_find(tree, new_node, &where); 951 ASSERT(node == NULL); 952 avl_insert(tree, new_node, where); 953 } 954 955 956 static int 957 ipsec_copy_chain(ipsec_policy_head_t *dph, ipsec_policy_t *src, 958 ipsec_policy_t **dstp) 959 { 960 for (; src != NULL; src = src->ipsp_hash.hash_next) { 961 ipsec_policy_t *dst = ipsec_copy_policy(src); 962 if (dst == NULL) 963 return (ENOMEM); 964 965 HASHLIST_INSERT(dst, ipsp_hash, *dstp); 966 ipsec_insert_always(&dph->iph_rulebyid, dst); 967 } 968 return (0); 969 } 970 971 972 973 /* 974 * Make one policy head look exactly like another. 975 * 976 * As with ipsec_swap_policy, we lock the destination policy head first, then 977 * the source policy head. Note that we only need to read-lock the source 978 * policy head as we are not changing it. 979 */ 980 int 981 ipsec_copy_polhead(ipsec_policy_head_t *sph, ipsec_policy_head_t *dph, 982 netstack_t *ns) 983 { 984 int af, dir, chain, nchains; 985 986 rw_enter(&dph->iph_lock, RW_WRITER); 987 988 ipsec_polhead_flush(dph, ns); 989 990 rw_enter(&sph->iph_lock, RW_READER); 991 992 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 993 ipsec_policy_root_t *dpr = &dph->iph_root[dir]; 994 ipsec_policy_root_t *spr = &sph->iph_root[dir]; 995 nchains = dpr->ipr_nchains; 996 997 ASSERT(dpr->ipr_nchains == spr->ipr_nchains); 998 999 for (af = 0; af < IPSEC_NAF; af++) { 1000 if (ipsec_copy_chain(dph, spr->ipr_nonhash[af], 1001 &dpr->ipr_nonhash[af])) 1002 goto abort_copy; 1003 } 1004 1005 for (chain = 0; chain < nchains; chain++) { 1006 if (ipsec_copy_chain(dph, 1007 spr->ipr_hash[chain].hash_head, 1008 &dpr->ipr_hash[chain].hash_head)) 1009 goto abort_copy; 1010 } 1011 } 1012 1013 dph->iph_gen++; 1014 1015 rw_exit(&sph->iph_lock); 1016 rw_exit(&dph->iph_lock); 1017 return (0); 1018 1019 abort_copy: 1020 ipsec_polhead_flush(dph, ns); 1021 rw_exit(&sph->iph_lock); 1022 rw_exit(&dph->iph_lock); 1023 return (ENOMEM); 1024 } 1025 1026 /* 1027 * Clone currently active policy to the inactive policy list. 1028 */ 1029 int 1030 ipsec_clone_system_policy(netstack_t *ns) 1031 { 1032 ipsec_stack_t *ipss = ns->netstack_ipsec; 1033 1034 return (ipsec_copy_polhead(&ipss->ipsec_system_policy, 1035 &ipss->ipsec_inactive_policy, ns)); 1036 } 1037 1038 /* 1039 * Generic "do we have IPvN policy" answer. 1040 */ 1041 boolean_t 1042 iph_ipvN(ipsec_policy_head_t *iph, boolean_t v6) 1043 { 1044 int i, hval; 1045 uint32_t valbit; 1046 ipsec_policy_root_t *ipr; 1047 ipsec_policy_t *ipp; 1048 1049 if (v6) { 1050 valbit = IPSL_IPV6; 1051 hval = IPSEC_AF_V6; 1052 } else { 1053 valbit = IPSL_IPV4; 1054 hval = IPSEC_AF_V4; 1055 } 1056 1057 ASSERT(RW_LOCK_HELD(&iph->iph_lock)); 1058 for (ipr = iph->iph_root; ipr < &(iph->iph_root[IPSEC_NTYPES]); ipr++) { 1059 if (ipr->ipr_nonhash[hval] != NULL) 1060 return (B_TRUE); 1061 for (i = 0; i < ipr->ipr_nchains; i++) { 1062 for (ipp = ipr->ipr_hash[i].hash_head; ipp != NULL; 1063 ipp = ipp->ipsp_hash.hash_next) { 1064 if (ipp->ipsp_sel->ipsl_key.ipsl_valid & valbit) 1065 return (B_TRUE); 1066 } 1067 } 1068 } 1069 1070 return (B_FALSE); 1071 } 1072 1073 /* 1074 * Extract the string from ipsec_policy_failure_msgs[type] and 1075 * log it. 1076 * 1077 */ 1078 void 1079 ipsec_log_policy_failure(int type, char *func_name, ipha_t *ipha, ip6_t *ip6h, 1080 boolean_t secure, netstack_t *ns) 1081 { 1082 char sbuf[INET6_ADDRSTRLEN]; 1083 char dbuf[INET6_ADDRSTRLEN]; 1084 char *s; 1085 char *d; 1086 ipsec_stack_t *ipss = ns->netstack_ipsec; 1087 1088 ASSERT((ipha == NULL && ip6h != NULL) || 1089 (ip6h == NULL && ipha != NULL)); 1090 1091 if (ipha != NULL) { 1092 s = inet_ntop(AF_INET, &ipha->ipha_src, sbuf, sizeof (sbuf)); 1093 d = inet_ntop(AF_INET, &ipha->ipha_dst, dbuf, sizeof (dbuf)); 1094 } else { 1095 s = inet_ntop(AF_INET6, &ip6h->ip6_src, sbuf, sizeof (sbuf)); 1096 d = inet_ntop(AF_INET6, &ip6h->ip6_dst, dbuf, sizeof (dbuf)); 1097 1098 } 1099 1100 /* Always bump the policy failure counter. */ 1101 ipss->ipsec_policy_failure_count[type]++; 1102 1103 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, SL_ERROR|SL_WARN|SL_CONSOLE, 1104 ipsec_policy_failure_msgs[type], func_name, 1105 (secure ? "secure" : "not secure"), s, d); 1106 } 1107 1108 /* 1109 * Rate-limiting front-end to strlog() for AH and ESP. Uses the ndd variables 1110 * in /dev/ip and the same rate-limiting clock so that there's a single 1111 * knob to turn to throttle the rate of messages. 1112 */ 1113 void 1114 ipsec_rl_strlog(netstack_t *ns, short mid, short sid, char level, ushort_t sl, 1115 char *fmt, ...) 1116 { 1117 va_list adx; 1118 hrtime_t current = gethrtime(); 1119 ip_stack_t *ipst = ns->netstack_ip; 1120 ipsec_stack_t *ipss = ns->netstack_ipsec; 1121 1122 sl |= SL_CONSOLE; 1123 /* 1124 * Throttle logging to stop syslog from being swamped. If variable 1125 * 'ipsec_policy_log_interval' is zero, don't log any messages at 1126 * all, otherwise log only one message every 'ipsec_policy_log_interval' 1127 * msec. Convert interval (in msec) to hrtime (in nsec). 1128 */ 1129 1130 if (ipst->ips_ipsec_policy_log_interval) { 1131 if (ipss->ipsec_policy_failure_last + 1132 ((hrtime_t)ipst->ips_ipsec_policy_log_interval * 1133 (hrtime_t)1000000) <= current) { 1134 va_start(adx, fmt); 1135 (void) vstrlog(mid, sid, level, sl, fmt, adx); 1136 va_end(adx); 1137 ipss->ipsec_policy_failure_last = current; 1138 } 1139 } 1140 } 1141 1142 void 1143 ipsec_config_flush(netstack_t *ns) 1144 { 1145 ipsec_stack_t *ipss = ns->netstack_ipsec; 1146 1147 rw_enter(&ipss->ipsec_system_policy.iph_lock, RW_WRITER); 1148 ipsec_polhead_flush(&ipss->ipsec_system_policy, ns); 1149 ipss->ipsec_next_policy_index = 1; 1150 rw_exit(&ipss->ipsec_system_policy.iph_lock); 1151 ipsec_action_reclaim_stack(ns); 1152 } 1153 1154 /* 1155 * Clip a policy's min/max keybits vs. the capabilities of the 1156 * algorithm. 1157 */ 1158 static void 1159 act_alg_adjust(uint_t algtype, uint_t algid, 1160 uint16_t *minbits, uint16_t *maxbits, netstack_t *ns) 1161 { 1162 ipsec_stack_t *ipss = ns->netstack_ipsec; 1163 ipsec_alginfo_t *algp = ipss->ipsec_alglists[algtype][algid]; 1164 1165 if (algp != NULL) { 1166 /* 1167 * If passed-in minbits is zero, we assume the caller trusts 1168 * us with setting the minimum key size. We pick the 1169 * algorithms DEFAULT key size for the minimum in this case. 1170 */ 1171 if (*minbits == 0) { 1172 *minbits = algp->alg_default_bits; 1173 ASSERT(*minbits >= algp->alg_minbits); 1174 } else { 1175 *minbits = MAX(MIN(*minbits, algp->alg_maxbits), 1176 algp->alg_minbits); 1177 } 1178 if (*maxbits == 0) 1179 *maxbits = algp->alg_maxbits; 1180 else 1181 *maxbits = MIN(MAX(*maxbits, algp->alg_minbits), 1182 algp->alg_maxbits); 1183 ASSERT(*minbits <= *maxbits); 1184 } else { 1185 *minbits = 0; 1186 *maxbits = 0; 1187 } 1188 } 1189 1190 /* 1191 * Check an action's requested algorithms against the algorithms currently 1192 * loaded in the system. 1193 */ 1194 boolean_t 1195 ipsec_check_action(ipsec_act_t *act, int *diag, netstack_t *ns) 1196 { 1197 ipsec_prot_t *ipp; 1198 ipsec_stack_t *ipss = ns->netstack_ipsec; 1199 1200 ipp = &act->ipa_apply; 1201 1202 if (ipp->ipp_use_ah && 1203 ipss->ipsec_alglists[IPSEC_ALG_AUTH][ipp->ipp_auth_alg] == NULL) { 1204 *diag = SPD_DIAGNOSTIC_UNSUPP_AH_ALG; 1205 return (B_FALSE); 1206 } 1207 if (ipp->ipp_use_espa && 1208 ipss->ipsec_alglists[IPSEC_ALG_AUTH][ipp->ipp_esp_auth_alg] == 1209 NULL) { 1210 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_AUTH_ALG; 1211 return (B_FALSE); 1212 } 1213 if (ipp->ipp_use_esp && 1214 ipss->ipsec_alglists[IPSEC_ALG_ENCR][ipp->ipp_encr_alg] == NULL) { 1215 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_ENCR_ALG; 1216 return (B_FALSE); 1217 } 1218 1219 act_alg_adjust(IPSEC_ALG_AUTH, ipp->ipp_auth_alg, 1220 &ipp->ipp_ah_minbits, &ipp->ipp_ah_maxbits, ns); 1221 act_alg_adjust(IPSEC_ALG_AUTH, ipp->ipp_esp_auth_alg, 1222 &ipp->ipp_espa_minbits, &ipp->ipp_espa_maxbits, ns); 1223 act_alg_adjust(IPSEC_ALG_ENCR, ipp->ipp_encr_alg, 1224 &ipp->ipp_espe_minbits, &ipp->ipp_espe_maxbits, ns); 1225 1226 if (ipp->ipp_ah_minbits > ipp->ipp_ah_maxbits) { 1227 *diag = SPD_DIAGNOSTIC_UNSUPP_AH_KEYSIZE; 1228 return (B_FALSE); 1229 } 1230 if (ipp->ipp_espa_minbits > ipp->ipp_espa_maxbits) { 1231 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_AUTH_KEYSIZE; 1232 return (B_FALSE); 1233 } 1234 if (ipp->ipp_espe_minbits > ipp->ipp_espe_maxbits) { 1235 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_ENCR_KEYSIZE; 1236 return (B_FALSE); 1237 } 1238 /* TODO: sanity check lifetimes */ 1239 return (B_TRUE); 1240 } 1241 1242 /* 1243 * Set up a single action during wildcard expansion.. 1244 */ 1245 static void 1246 ipsec_setup_act(ipsec_act_t *outact, ipsec_act_t *act, 1247 uint_t auth_alg, uint_t encr_alg, uint_t eauth_alg, netstack_t *ns) 1248 { 1249 ipsec_prot_t *ipp; 1250 1251 *outact = *act; 1252 ipp = &outact->ipa_apply; 1253 ipp->ipp_auth_alg = (uint8_t)auth_alg; 1254 ipp->ipp_encr_alg = (uint8_t)encr_alg; 1255 ipp->ipp_esp_auth_alg = (uint8_t)eauth_alg; 1256 1257 act_alg_adjust(IPSEC_ALG_AUTH, auth_alg, 1258 &ipp->ipp_ah_minbits, &ipp->ipp_ah_maxbits, ns); 1259 act_alg_adjust(IPSEC_ALG_AUTH, eauth_alg, 1260 &ipp->ipp_espa_minbits, &ipp->ipp_espa_maxbits, ns); 1261 act_alg_adjust(IPSEC_ALG_ENCR, encr_alg, 1262 &ipp->ipp_espe_minbits, &ipp->ipp_espe_maxbits, ns); 1263 } 1264 1265 /* 1266 * combinatoric expansion time: expand a wildcarded action into an 1267 * array of wildcarded actions; we return the exploded action list, 1268 * and return a count in *nact (output only). 1269 */ 1270 static ipsec_act_t * 1271 ipsec_act_wildcard_expand(ipsec_act_t *act, uint_t *nact, netstack_t *ns) 1272 { 1273 boolean_t use_ah, use_esp, use_espa; 1274 boolean_t wild_auth, wild_encr, wild_eauth; 1275 uint_t auth_alg, auth_idx, auth_min, auth_max; 1276 uint_t eauth_alg, eauth_idx, eauth_min, eauth_max; 1277 uint_t encr_alg, encr_idx, encr_min, encr_max; 1278 uint_t action_count, ai; 1279 ipsec_act_t *outact; 1280 ipsec_stack_t *ipss = ns->netstack_ipsec; 1281 1282 if (act->ipa_type != IPSEC_ACT_APPLY) { 1283 outact = kmem_alloc(sizeof (*act), KM_NOSLEEP); 1284 *nact = 1; 1285 if (outact != NULL) 1286 bcopy(act, outact, sizeof (*act)); 1287 return (outact); 1288 } 1289 /* 1290 * compute the combinatoric explosion.. 1291 * 1292 * we assume a request for encr if esp_req is PREF_REQUIRED 1293 * we assume a request for ah auth if ah_req is PREF_REQUIRED. 1294 * we assume a request for esp auth if !ah and esp_req is PREF_REQUIRED 1295 */ 1296 1297 use_ah = act->ipa_apply.ipp_use_ah; 1298 use_esp = act->ipa_apply.ipp_use_esp; 1299 use_espa = act->ipa_apply.ipp_use_espa; 1300 auth_alg = act->ipa_apply.ipp_auth_alg; 1301 eauth_alg = act->ipa_apply.ipp_esp_auth_alg; 1302 encr_alg = act->ipa_apply.ipp_encr_alg; 1303 1304 wild_auth = use_ah && (auth_alg == 0); 1305 wild_eauth = use_espa && (eauth_alg == 0); 1306 wild_encr = use_esp && (encr_alg == 0); 1307 1308 action_count = 1; 1309 auth_min = auth_max = auth_alg; 1310 eauth_min = eauth_max = eauth_alg; 1311 encr_min = encr_max = encr_alg; 1312 1313 /* 1314 * set up for explosion.. for each dimension, expand output 1315 * size by the explosion factor. 1316 * 1317 * Don't include the "any" algorithms, if defined, as no 1318 * kernel policies should be set for these algorithms. 1319 */ 1320 1321 #define SET_EXP_MINMAX(type, wild, alg, min, max, ipss) \ 1322 if (wild) { \ 1323 int nalgs = ipss->ipsec_nalgs[type]; \ 1324 if (ipss->ipsec_alglists[type][alg] != NULL) \ 1325 nalgs--; \ 1326 action_count *= nalgs; \ 1327 min = 0; \ 1328 max = ipss->ipsec_nalgs[type] - 1; \ 1329 } 1330 1331 SET_EXP_MINMAX(IPSEC_ALG_AUTH, wild_auth, SADB_AALG_NONE, 1332 auth_min, auth_max, ipss); 1333 SET_EXP_MINMAX(IPSEC_ALG_AUTH, wild_eauth, SADB_AALG_NONE, 1334 eauth_min, eauth_max, ipss); 1335 SET_EXP_MINMAX(IPSEC_ALG_ENCR, wild_encr, SADB_EALG_NONE, 1336 encr_min, encr_max, ipss); 1337 1338 #undef SET_EXP_MINMAX 1339 1340 /* 1341 * ok, allocate the whole mess.. 1342 */ 1343 1344 outact = kmem_alloc(sizeof (*outact) * action_count, KM_NOSLEEP); 1345 if (outact == NULL) 1346 return (NULL); 1347 1348 /* 1349 * Now compute all combinations. Note that non-wildcarded 1350 * dimensions just get a single value from auth_min, while 1351 * wildcarded dimensions indirect through the sortlist. 1352 * 1353 * We do encryption outermost since, at this time, there's 1354 * greater difference in security and performance between 1355 * encryption algorithms vs. authentication algorithms. 1356 */ 1357 1358 ai = 0; 1359 1360 #define WHICH_ALG(type, wild, idx, ipss) \ 1361 ((wild)?(ipss->ipsec_sortlist[type][idx]):(idx)) 1362 1363 for (encr_idx = encr_min; encr_idx <= encr_max; encr_idx++) { 1364 encr_alg = WHICH_ALG(IPSEC_ALG_ENCR, wild_encr, encr_idx, ipss); 1365 if (wild_encr && encr_alg == SADB_EALG_NONE) 1366 continue; 1367 for (auth_idx = auth_min; auth_idx <= auth_max; auth_idx++) { 1368 auth_alg = WHICH_ALG(IPSEC_ALG_AUTH, wild_auth, 1369 auth_idx, ipss); 1370 if (wild_auth && auth_alg == SADB_AALG_NONE) 1371 continue; 1372 for (eauth_idx = eauth_min; eauth_idx <= eauth_max; 1373 eauth_idx++) { 1374 eauth_alg = WHICH_ALG(IPSEC_ALG_AUTH, 1375 wild_eauth, eauth_idx, ipss); 1376 if (wild_eauth && eauth_alg == SADB_AALG_NONE) 1377 continue; 1378 1379 ipsec_setup_act(&outact[ai], act, 1380 auth_alg, encr_alg, eauth_alg, ns); 1381 ai++; 1382 } 1383 } 1384 } 1385 1386 #undef WHICH_ALG 1387 1388 ASSERT(ai == action_count); 1389 *nact = action_count; 1390 return (outact); 1391 } 1392 1393 /* 1394 * Extract the parts of an ipsec_prot_t from an old-style ipsec_req_t. 1395 */ 1396 static void 1397 ipsec_prot_from_req(ipsec_req_t *req, ipsec_prot_t *ipp) 1398 { 1399 bzero(ipp, sizeof (*ipp)); 1400 /* 1401 * ipp_use_* are bitfields. Look at "!!" in the following as a 1402 * "boolean canonicalization" operator. 1403 */ 1404 ipp->ipp_use_ah = !!(req->ipsr_ah_req & IPSEC_PREF_REQUIRED); 1405 ipp->ipp_use_esp = !!(req->ipsr_esp_req & IPSEC_PREF_REQUIRED); 1406 ipp->ipp_use_espa = !!(req->ipsr_esp_auth_alg); 1407 ipp->ipp_use_se = !!(req->ipsr_self_encap_req & IPSEC_PREF_REQUIRED); 1408 ipp->ipp_use_unique = !!((req->ipsr_ah_req|req->ipsr_esp_req) & 1409 IPSEC_PREF_UNIQUE); 1410 ipp->ipp_encr_alg = req->ipsr_esp_alg; 1411 /* 1412 * SADB_AALG_ANY is a placeholder to distinguish "any" from 1413 * "none" above. If auth is required, as determined above, 1414 * SADB_AALG_ANY becomes 0, which is the representation 1415 * of "any" and "none" in PF_KEY v2. 1416 */ 1417 ipp->ipp_auth_alg = (req->ipsr_auth_alg != SADB_AALG_ANY) ? 1418 req->ipsr_auth_alg : 0; 1419 ipp->ipp_esp_auth_alg = (req->ipsr_esp_auth_alg != SADB_AALG_ANY) ? 1420 req->ipsr_esp_auth_alg : 0; 1421 } 1422 1423 /* 1424 * Extract a new-style action from a request. 1425 */ 1426 void 1427 ipsec_actvec_from_req(ipsec_req_t *req, ipsec_act_t **actp, uint_t *nactp, 1428 netstack_t *ns) 1429 { 1430 struct ipsec_act act; 1431 1432 bzero(&act, sizeof (act)); 1433 if ((req->ipsr_ah_req & IPSEC_PREF_NEVER) && 1434 (req->ipsr_esp_req & IPSEC_PREF_NEVER)) { 1435 act.ipa_type = IPSEC_ACT_BYPASS; 1436 } else { 1437 act.ipa_type = IPSEC_ACT_APPLY; 1438 ipsec_prot_from_req(req, &act.ipa_apply); 1439 } 1440 *actp = ipsec_act_wildcard_expand(&act, nactp, ns); 1441 } 1442 1443 /* 1444 * Convert a new-style "prot" back to an ipsec_req_t (more backwards compat). 1445 * We assume caller has already zero'ed *req for us. 1446 */ 1447 static int 1448 ipsec_req_from_prot(ipsec_prot_t *ipp, ipsec_req_t *req) 1449 { 1450 req->ipsr_esp_alg = ipp->ipp_encr_alg; 1451 req->ipsr_auth_alg = ipp->ipp_auth_alg; 1452 req->ipsr_esp_auth_alg = ipp->ipp_esp_auth_alg; 1453 1454 if (ipp->ipp_use_unique) { 1455 req->ipsr_ah_req |= IPSEC_PREF_UNIQUE; 1456 req->ipsr_esp_req |= IPSEC_PREF_UNIQUE; 1457 } 1458 if (ipp->ipp_use_se) 1459 req->ipsr_self_encap_req |= IPSEC_PREF_REQUIRED; 1460 if (ipp->ipp_use_ah) 1461 req->ipsr_ah_req |= IPSEC_PREF_REQUIRED; 1462 if (ipp->ipp_use_esp) 1463 req->ipsr_esp_req |= IPSEC_PREF_REQUIRED; 1464 return (sizeof (*req)); 1465 } 1466 1467 /* 1468 * Convert a new-style action back to an ipsec_req_t (more backwards compat). 1469 * We assume caller has already zero'ed *req for us. 1470 */ 1471 static int 1472 ipsec_req_from_act(ipsec_action_t *ap, ipsec_req_t *req) 1473 { 1474 switch (ap->ipa_act.ipa_type) { 1475 case IPSEC_ACT_BYPASS: 1476 req->ipsr_ah_req = IPSEC_PREF_NEVER; 1477 req->ipsr_esp_req = IPSEC_PREF_NEVER; 1478 return (sizeof (*req)); 1479 case IPSEC_ACT_APPLY: 1480 return (ipsec_req_from_prot(&ap->ipa_act.ipa_apply, req)); 1481 } 1482 return (sizeof (*req)); 1483 } 1484 1485 /* 1486 * Convert a new-style action back to an ipsec_req_t (more backwards compat). 1487 * We assume caller has already zero'ed *req for us. 1488 */ 1489 int 1490 ipsec_req_from_head(ipsec_policy_head_t *ph, ipsec_req_t *req, int af) 1491 { 1492 ipsec_policy_t *p; 1493 1494 /* 1495 * FULL-PERSOCK: consult hash table, too? 1496 */ 1497 for (p = ph->iph_root[IPSEC_INBOUND].ipr_nonhash[af]; 1498 p != NULL; 1499 p = p->ipsp_hash.hash_next) { 1500 if ((p->ipsp_sel->ipsl_key.ipsl_valid & IPSL_WILDCARD) == 0) 1501 return (ipsec_req_from_act(p->ipsp_act, req)); 1502 } 1503 return (sizeof (*req)); 1504 } 1505 1506 /* 1507 * Based on per-socket or latched policy, convert to an appropriate 1508 * IP_SEC_OPT ipsec_req_t for the socket option; return size so we can 1509 * be tail-called from ip. 1510 */ 1511 int 1512 ipsec_req_from_conn(conn_t *connp, ipsec_req_t *req, int af) 1513 { 1514 ipsec_latch_t *ipl; 1515 int rv = sizeof (ipsec_req_t); 1516 1517 bzero(req, sizeof (*req)); 1518 1519 mutex_enter(&connp->conn_lock); 1520 ipl = connp->conn_latch; 1521 1522 /* 1523 * Find appropriate policy. First choice is latched action; 1524 * failing that, see latched policy; failing that, 1525 * look at configured policy. 1526 */ 1527 if (ipl != NULL) { 1528 if (ipl->ipl_in_action != NULL) { 1529 rv = ipsec_req_from_act(ipl->ipl_in_action, req); 1530 goto done; 1531 } 1532 if (ipl->ipl_in_policy != NULL) { 1533 rv = ipsec_req_from_act(ipl->ipl_in_policy->ipsp_act, 1534 req); 1535 goto done; 1536 } 1537 } 1538 if (connp->conn_policy != NULL) 1539 rv = ipsec_req_from_head(connp->conn_policy, req, af); 1540 done: 1541 mutex_exit(&connp->conn_lock); 1542 return (rv); 1543 } 1544 1545 void 1546 ipsec_actvec_free(ipsec_act_t *act, uint_t nact) 1547 { 1548 kmem_free(act, nact * sizeof (*act)); 1549 } 1550 1551 /* 1552 * When outbound policy is not cached, look it up the hard way and attach 1553 * an ipsec_out_t to the packet.. 1554 */ 1555 static mblk_t * 1556 ipsec_attach_global_policy(mblk_t **mp, conn_t *connp, ipsec_selector_t *sel, 1557 netstack_t *ns) 1558 { 1559 ipsec_policy_t *p; 1560 1561 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, sel, ns); 1562 1563 if (p == NULL) 1564 return (NULL); 1565 return (ipsec_attach_ipsec_out(mp, connp, p, sel->ips_protocol, ns)); 1566 } 1567 1568 /* 1569 * We have an ipsec_out already, but don't have cached policy; fill it in 1570 * with the right actions. 1571 */ 1572 static mblk_t * 1573 ipsec_apply_global_policy(mblk_t *ipsec_mp, conn_t *connp, 1574 ipsec_selector_t *sel, netstack_t *ns) 1575 { 1576 ipsec_out_t *io; 1577 ipsec_policy_t *p; 1578 1579 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 1580 ASSERT(ipsec_mp->b_cont->b_datap->db_type == M_DATA); 1581 1582 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1583 1584 if (io->ipsec_out_policy == NULL) { 1585 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, io, sel, ns); 1586 io->ipsec_out_policy = p; 1587 } 1588 return (ipsec_mp); 1589 } 1590 1591 1592 /* 1593 * Consumes a reference to ipsp. 1594 */ 1595 static mblk_t * 1596 ipsec_check_loopback_policy(mblk_t *first_mp, boolean_t mctl_present, 1597 ipsec_policy_t *ipsp) 1598 { 1599 mblk_t *ipsec_mp; 1600 ipsec_in_t *ii; 1601 netstack_t *ns; 1602 1603 if (!mctl_present) 1604 return (first_mp); 1605 1606 ipsec_mp = first_mp; 1607 1608 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1609 ns = ii->ipsec_in_ns; 1610 ASSERT(ii->ipsec_in_loopback); 1611 IPPOL_REFRELE(ipsp, ns); 1612 1613 /* 1614 * We should do an actual policy check here. Revisit this 1615 * when we revisit the IPsec API. (And pass a conn_t in when we 1616 * get there.) 1617 */ 1618 1619 return (first_mp); 1620 } 1621 1622 /* 1623 * Check that packet's inbound ports & proto match the selectors 1624 * expected by the SAs it traversed on the way in. 1625 */ 1626 static boolean_t 1627 ipsec_check_ipsecin_unique(ipsec_in_t *ii, const char **reason, 1628 kstat_named_t **counter, uint64_t pkt_unique) 1629 { 1630 uint64_t ah_mask, esp_mask; 1631 ipsa_t *ah_assoc; 1632 ipsa_t *esp_assoc; 1633 netstack_t *ns = ii->ipsec_in_ns; 1634 ipsec_stack_t *ipss = ns->netstack_ipsec; 1635 1636 ASSERT(ii->ipsec_in_secure); 1637 ASSERT(!ii->ipsec_in_loopback); 1638 1639 ah_assoc = ii->ipsec_in_ah_sa; 1640 esp_assoc = ii->ipsec_in_esp_sa; 1641 ASSERT((ah_assoc != NULL) || (esp_assoc != NULL)); 1642 1643 ah_mask = (ah_assoc != NULL) ? ah_assoc->ipsa_unique_mask : 0; 1644 esp_mask = (esp_assoc != NULL) ? esp_assoc->ipsa_unique_mask : 0; 1645 1646 if ((ah_mask == 0) && (esp_mask == 0)) 1647 return (B_TRUE); 1648 1649 /* 1650 * The pkt_unique check will also check for tunnel mode on the SA 1651 * vs. the tunneled_packet boolean. "Be liberal in what you receive" 1652 * should not apply in this case. ;) 1653 */ 1654 1655 if (ah_mask != 0 && 1656 ah_assoc->ipsa_unique_id != (pkt_unique & ah_mask)) { 1657 *reason = "AH inner header mismatch"; 1658 *counter = DROPPER(ipss, ipds_spd_ah_innermismatch); 1659 return (B_FALSE); 1660 } 1661 if (esp_mask != 0 && 1662 esp_assoc->ipsa_unique_id != (pkt_unique & esp_mask)) { 1663 *reason = "ESP inner header mismatch"; 1664 *counter = DROPPER(ipss, ipds_spd_esp_innermismatch); 1665 return (B_FALSE); 1666 } 1667 return (B_TRUE); 1668 } 1669 1670 static boolean_t 1671 ipsec_check_ipsecin_action(ipsec_in_t *ii, mblk_t *mp, ipsec_action_t *ap, 1672 ipha_t *ipha, ip6_t *ip6h, const char **reason, kstat_named_t **counter) 1673 { 1674 boolean_t ret = B_TRUE; 1675 ipsec_prot_t *ipp; 1676 ipsa_t *ah_assoc; 1677 ipsa_t *esp_assoc; 1678 boolean_t decaps; 1679 netstack_t *ns = ii->ipsec_in_ns; 1680 ipsec_stack_t *ipss = ns->netstack_ipsec; 1681 1682 ASSERT((ipha == NULL && ip6h != NULL) || 1683 (ip6h == NULL && ipha != NULL)); 1684 1685 if (ii->ipsec_in_loopback) { 1686 /* 1687 * Besides accepting pointer-equivalent actions, we also 1688 * accept any ICMP errors we generated for ourselves, 1689 * regardless of policy. If we do not wish to make this 1690 * assumption in the future, check here, and where 1691 * icmp_loopback is initialized in ip.c and ip6.c. (Look for 1692 * ipsec_out_icmp_loopback.) 1693 */ 1694 if (ap == ii->ipsec_in_action || ii->ipsec_in_icmp_loopback) 1695 return (B_TRUE); 1696 1697 /* Deep compare necessary here?? */ 1698 *counter = DROPPER(ipss, ipds_spd_loopback_mismatch); 1699 *reason = "loopback policy mismatch"; 1700 return (B_FALSE); 1701 } 1702 ASSERT(!ii->ipsec_in_icmp_loopback); 1703 1704 ah_assoc = ii->ipsec_in_ah_sa; 1705 esp_assoc = ii->ipsec_in_esp_sa; 1706 1707 decaps = ii->ipsec_in_decaps; 1708 1709 switch (ap->ipa_act.ipa_type) { 1710 case IPSEC_ACT_DISCARD: 1711 case IPSEC_ACT_REJECT: 1712 /* Should "fail hard" */ 1713 *counter = DROPPER(ipss, ipds_spd_explicit); 1714 *reason = "blocked by policy"; 1715 return (B_FALSE); 1716 1717 case IPSEC_ACT_BYPASS: 1718 case IPSEC_ACT_CLEAR: 1719 *counter = DROPPER(ipss, ipds_spd_got_secure); 1720 *reason = "expected clear, got protected"; 1721 return (B_FALSE); 1722 1723 case IPSEC_ACT_APPLY: 1724 ipp = &ap->ipa_act.ipa_apply; 1725 /* 1726 * As of now we do the simple checks of whether 1727 * the datagram has gone through the required IPSEC 1728 * protocol constraints or not. We might have more 1729 * in the future like sensitive levels, key bits, etc. 1730 * If it fails the constraints, check whether we would 1731 * have accepted this if it had come in clear. 1732 */ 1733 if (ipp->ipp_use_ah) { 1734 if (ah_assoc == NULL) { 1735 ret = ipsec_inbound_accept_clear(mp, ipha, 1736 ip6h); 1737 *counter = DROPPER(ipss, ipds_spd_got_clear); 1738 *reason = "unprotected not accepted"; 1739 break; 1740 } 1741 ASSERT(ah_assoc != NULL); 1742 ASSERT(ipp->ipp_auth_alg != 0); 1743 1744 if (ah_assoc->ipsa_auth_alg != 1745 ipp->ipp_auth_alg) { 1746 *counter = DROPPER(ipss, ipds_spd_bad_ahalg); 1747 *reason = "unacceptable ah alg"; 1748 ret = B_FALSE; 1749 break; 1750 } 1751 } else if (ah_assoc != NULL) { 1752 /* 1753 * Don't allow this. Check IPSEC NOTE above 1754 * ip_fanout_proto(). 1755 */ 1756 *counter = DROPPER(ipss, ipds_spd_got_ah); 1757 *reason = "unexpected AH"; 1758 ret = B_FALSE; 1759 break; 1760 } 1761 if (ipp->ipp_use_esp) { 1762 if (esp_assoc == NULL) { 1763 ret = ipsec_inbound_accept_clear(mp, ipha, 1764 ip6h); 1765 *counter = DROPPER(ipss, ipds_spd_got_clear); 1766 *reason = "unprotected not accepted"; 1767 break; 1768 } 1769 ASSERT(esp_assoc != NULL); 1770 ASSERT(ipp->ipp_encr_alg != 0); 1771 1772 if (esp_assoc->ipsa_encr_alg != 1773 ipp->ipp_encr_alg) { 1774 *counter = DROPPER(ipss, ipds_spd_bad_espealg); 1775 *reason = "unacceptable esp alg"; 1776 ret = B_FALSE; 1777 break; 1778 } 1779 /* 1780 * If the client does not need authentication, 1781 * we don't verify the alogrithm. 1782 */ 1783 if (ipp->ipp_use_espa) { 1784 if (esp_assoc->ipsa_auth_alg != 1785 ipp->ipp_esp_auth_alg) { 1786 *counter = DROPPER(ipss, 1787 ipds_spd_bad_espaalg); 1788 *reason = "unacceptable esp auth alg"; 1789 ret = B_FALSE; 1790 break; 1791 } 1792 } 1793 } else if (esp_assoc != NULL) { 1794 /* 1795 * Don't allow this. Check IPSEC NOTE above 1796 * ip_fanout_proto(). 1797 */ 1798 *counter = DROPPER(ipss, ipds_spd_got_esp); 1799 *reason = "unexpected ESP"; 1800 ret = B_FALSE; 1801 break; 1802 } 1803 if (ipp->ipp_use_se) { 1804 if (!decaps) { 1805 ret = ipsec_inbound_accept_clear(mp, ipha, 1806 ip6h); 1807 if (!ret) { 1808 /* XXX mutant? */ 1809 *counter = DROPPER(ipss, 1810 ipds_spd_bad_selfencap); 1811 *reason = "self encap not found"; 1812 break; 1813 } 1814 } 1815 } else if (decaps) { 1816 /* 1817 * XXX If the packet comes in tunneled and the 1818 * recipient does not expect it to be tunneled, it 1819 * is okay. But we drop to be consistent with the 1820 * other cases. 1821 */ 1822 *counter = DROPPER(ipss, ipds_spd_got_selfencap); 1823 *reason = "unexpected self encap"; 1824 ret = B_FALSE; 1825 break; 1826 } 1827 if (ii->ipsec_in_action != NULL) { 1828 /* 1829 * This can happen if we do a double policy-check on 1830 * a packet 1831 * XXX XXX should fix this case! 1832 */ 1833 IPACT_REFRELE(ii->ipsec_in_action); 1834 } 1835 ASSERT(ii->ipsec_in_action == NULL); 1836 IPACT_REFHOLD(ap); 1837 ii->ipsec_in_action = ap; 1838 break; /* from switch */ 1839 } 1840 return (ret); 1841 } 1842 1843 static boolean_t 1844 spd_match_inbound_ids(ipsec_latch_t *ipl, ipsa_t *sa) 1845 { 1846 ASSERT(ipl->ipl_ids_latched == B_TRUE); 1847 return ipsid_equal(ipl->ipl_remote_cid, sa->ipsa_src_cid) && 1848 ipsid_equal(ipl->ipl_local_cid, sa->ipsa_dst_cid); 1849 } 1850 1851 /* 1852 * Takes a latched conn and an inbound packet and returns a unique_id suitable 1853 * for SA comparisons. Most of the time we will copy from the conn_t, but 1854 * there are cases when the conn_t is latched but it has wildcard selectors, 1855 * and then we need to fallback to scooping them out of the packet. 1856 * 1857 * Assume we'll never have 0 with a conn_t present, so use 0 as a failure. We 1858 * can get away with this because we only have non-zero ports/proto for 1859 * latched conn_ts. 1860 * 1861 * Ideal candidate for an "inline" keyword, as we're JUST convoluted enough 1862 * to not be a nice macro. 1863 */ 1864 static uint64_t 1865 conn_to_unique(conn_t *connp, mblk_t *data_mp, ipha_t *ipha, ip6_t *ip6h) 1866 { 1867 ipsec_selector_t sel; 1868 uint8_t ulp = connp->conn_ulp; 1869 1870 ASSERT(connp->conn_latch->ipl_in_policy != NULL); 1871 1872 if ((ulp == IPPROTO_TCP || ulp == IPPROTO_UDP || ulp == IPPROTO_SCTP) && 1873 (connp->conn_fport == 0 || connp->conn_lport == 0)) { 1874 /* Slow path - we gotta grab from the packet. */ 1875 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, 1876 SEL_NONE) != SELRET_SUCCESS) { 1877 /* Failure -> have caller free packet with ENOMEM. */ 1878 return (0); 1879 } 1880 return (SA_UNIQUE_ID(sel.ips_remote_port, sel.ips_local_port, 1881 sel.ips_protocol, 0)); 1882 } 1883 1884 #ifdef DEBUG_NOT_UNTIL_6478464 1885 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, SEL_NONE) == 1886 SELRET_SUCCESS) { 1887 ASSERT(sel.ips_local_port == connp->conn_lport); 1888 ASSERT(sel.ips_remote_port == connp->conn_fport); 1889 ASSERT(sel.ips_protocol == connp->conn_ulp); 1890 } 1891 ASSERT(connp->conn_ulp != 0); 1892 #endif 1893 1894 return (SA_UNIQUE_ID(connp->conn_fport, connp->conn_lport, ulp, 0)); 1895 } 1896 1897 /* 1898 * Called to check policy on a latched connection, both from this file 1899 * and from tcp.c 1900 */ 1901 boolean_t 1902 ipsec_check_ipsecin_latch(ipsec_in_t *ii, mblk_t *mp, ipsec_latch_t *ipl, 1903 ipha_t *ipha, ip6_t *ip6h, const char **reason, kstat_named_t **counter, 1904 conn_t *connp) 1905 { 1906 netstack_t *ns = ii->ipsec_in_ns; 1907 ipsec_stack_t *ipss = ns->netstack_ipsec; 1908 1909 ASSERT(ipl->ipl_ids_latched == B_TRUE); 1910 1911 if (!ii->ipsec_in_loopback) { 1912 /* 1913 * Over loopback, there aren't real security associations, 1914 * so there are neither identities nor "unique" values 1915 * for us to check the packet against. 1916 */ 1917 if ((ii->ipsec_in_ah_sa != NULL) && 1918 (!spd_match_inbound_ids(ipl, ii->ipsec_in_ah_sa))) { 1919 *counter = DROPPER(ipss, ipds_spd_ah_badid); 1920 *reason = "AH identity mismatch"; 1921 return (B_FALSE); 1922 } 1923 1924 if ((ii->ipsec_in_esp_sa != NULL) && 1925 (!spd_match_inbound_ids(ipl, ii->ipsec_in_esp_sa))) { 1926 *counter = DROPPER(ipss, ipds_spd_esp_badid); 1927 *reason = "ESP identity mismatch"; 1928 return (B_FALSE); 1929 } 1930 1931 /* 1932 * Can fudge pkt_unique from connp because we're latched. 1933 * In DEBUG kernels (see conn_to_unique()'s implementation), 1934 * verify this even if it REALLY slows things down. 1935 */ 1936 if (!ipsec_check_ipsecin_unique(ii, reason, counter, 1937 conn_to_unique(connp, mp, ipha, ip6h))) { 1938 return (B_FALSE); 1939 } 1940 } 1941 1942 return (ipsec_check_ipsecin_action(ii, mp, ipl->ipl_in_action, 1943 ipha, ip6h, reason, counter)); 1944 } 1945 1946 /* 1947 * Check to see whether this secured datagram meets the policy 1948 * constraints specified in ipsp. 1949 * 1950 * Called from ipsec_check_global_policy, and ipsec_check_inbound_policy. 1951 * 1952 * Consumes a reference to ipsp. 1953 */ 1954 static mblk_t * 1955 ipsec_check_ipsecin_policy(mblk_t *first_mp, ipsec_policy_t *ipsp, 1956 ipha_t *ipha, ip6_t *ip6h, uint64_t pkt_unique, netstack_t *ns) 1957 { 1958 ipsec_in_t *ii; 1959 ipsec_action_t *ap; 1960 const char *reason = "no policy actions found"; 1961 mblk_t *data_mp, *ipsec_mp; 1962 ipsec_stack_t *ipss = ns->netstack_ipsec; 1963 ip_stack_t *ipst = ns->netstack_ip; 1964 kstat_named_t *counter; 1965 1966 counter = DROPPER(ipss, ipds_spd_got_secure); 1967 1968 data_mp = first_mp->b_cont; 1969 ipsec_mp = first_mp; 1970 1971 ASSERT(ipsp != NULL); 1972 1973 ASSERT((ipha == NULL && ip6h != NULL) || 1974 (ip6h == NULL && ipha != NULL)); 1975 1976 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1977 1978 if (ii->ipsec_in_loopback) 1979 return (ipsec_check_loopback_policy(first_mp, B_TRUE, ipsp)); 1980 ASSERT(ii->ipsec_in_type == IPSEC_IN); 1981 ASSERT(ii->ipsec_in_secure); 1982 1983 if (ii->ipsec_in_action != NULL) { 1984 /* 1985 * this can happen if we do a double policy-check on a packet 1986 * Would be nice to be able to delete this test.. 1987 */ 1988 IPACT_REFRELE(ii->ipsec_in_action); 1989 } 1990 ASSERT(ii->ipsec_in_action == NULL); 1991 1992 if (!SA_IDS_MATCH(ii->ipsec_in_ah_sa, ii->ipsec_in_esp_sa)) { 1993 reason = "inbound AH and ESP identities differ"; 1994 counter = DROPPER(ipss, ipds_spd_ahesp_diffid); 1995 goto drop; 1996 } 1997 1998 if (!ipsec_check_ipsecin_unique(ii, &reason, &counter, pkt_unique)) 1999 goto drop; 2000 2001 /* 2002 * Ok, now loop through the possible actions and see if any 2003 * of them work for us. 2004 */ 2005 2006 for (ap = ipsp->ipsp_act; ap != NULL; ap = ap->ipa_next) { 2007 if (ipsec_check_ipsecin_action(ii, data_mp, ap, 2008 ipha, ip6h, &reason, &counter)) { 2009 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2010 IPPOL_REFRELE(ipsp, ns); 2011 return (first_mp); 2012 } 2013 } 2014 drop: 2015 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, SL_ERROR|SL_WARN|SL_CONSOLE, 2016 "ipsec inbound policy mismatch: %s, packet dropped\n", 2017 reason); 2018 IPPOL_REFRELE(ipsp, ns); 2019 ASSERT(ii->ipsec_in_action == NULL); 2020 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2021 ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, 2022 &ipss->ipsec_spd_dropper); 2023 return (NULL); 2024 } 2025 2026 /* 2027 * sleazy prefix-length-based compare. 2028 * another inlining candidate.. 2029 */ 2030 boolean_t 2031 ip_addr_match(uint8_t *addr1, int pfxlen, in6_addr_t *addr2p) 2032 { 2033 int offset = pfxlen>>3; 2034 int bitsleft = pfxlen & 7; 2035 uint8_t *addr2 = (uint8_t *)addr2p; 2036 2037 /* 2038 * and there was much evil.. 2039 * XXX should inline-expand the bcmp here and do this 32 bits 2040 * or 64 bits at a time.. 2041 */ 2042 return ((bcmp(addr1, addr2, offset) == 0) && 2043 ((bitsleft == 0) || 2044 (((addr1[offset] ^ addr2[offset]) & (0xff<<(8-bitsleft))) == 0))); 2045 } 2046 2047 static ipsec_policy_t * 2048 ipsec_find_policy_chain(ipsec_policy_t *best, ipsec_policy_t *chain, 2049 ipsec_selector_t *sel, boolean_t is_icmp_inv_acq) 2050 { 2051 ipsec_selkey_t *isel; 2052 ipsec_policy_t *p; 2053 int bpri = best ? best->ipsp_prio : 0; 2054 2055 for (p = chain; p != NULL; p = p->ipsp_hash.hash_next) { 2056 uint32_t valid; 2057 2058 if (p->ipsp_prio <= bpri) 2059 continue; 2060 isel = &p->ipsp_sel->ipsl_key; 2061 valid = isel->ipsl_valid; 2062 2063 if ((valid & IPSL_PROTOCOL) && 2064 (isel->ipsl_proto != sel->ips_protocol)) 2065 continue; 2066 2067 if ((valid & IPSL_REMOTE_ADDR) && 2068 !ip_addr_match((uint8_t *)&isel->ipsl_remote, 2069 isel->ipsl_remote_pfxlen, &sel->ips_remote_addr_v6)) 2070 continue; 2071 2072 if ((valid & IPSL_LOCAL_ADDR) && 2073 !ip_addr_match((uint8_t *)&isel->ipsl_local, 2074 isel->ipsl_local_pfxlen, &sel->ips_local_addr_v6)) 2075 continue; 2076 2077 if ((valid & IPSL_REMOTE_PORT) && 2078 isel->ipsl_rport != sel->ips_remote_port) 2079 continue; 2080 2081 if ((valid & IPSL_LOCAL_PORT) && 2082 isel->ipsl_lport != sel->ips_local_port) 2083 continue; 2084 2085 if (!is_icmp_inv_acq) { 2086 if ((valid & IPSL_ICMP_TYPE) && 2087 (isel->ipsl_icmp_type > sel->ips_icmp_type || 2088 isel->ipsl_icmp_type_end < sel->ips_icmp_type)) { 2089 continue; 2090 } 2091 2092 if ((valid & IPSL_ICMP_CODE) && 2093 (isel->ipsl_icmp_code > sel->ips_icmp_code || 2094 isel->ipsl_icmp_code_end < 2095 sel->ips_icmp_code)) { 2096 continue; 2097 } 2098 } else { 2099 /* 2100 * special case for icmp inverse acquire 2101 * we only want policies that aren't drop/pass 2102 */ 2103 if (p->ipsp_act->ipa_act.ipa_type != IPSEC_ACT_APPLY) 2104 continue; 2105 } 2106 2107 /* we matched all the packet-port-field selectors! */ 2108 best = p; 2109 bpri = p->ipsp_prio; 2110 } 2111 2112 return (best); 2113 } 2114 2115 /* 2116 * Try to find and return the best policy entry under a given policy 2117 * root for a given set of selectors; the first parameter "best" is 2118 * the current best policy so far. If "best" is non-null, we have a 2119 * reference to it. We return a reference to a policy; if that policy 2120 * is not the original "best", we need to release that reference 2121 * before returning. 2122 */ 2123 ipsec_policy_t * 2124 ipsec_find_policy_head(ipsec_policy_t *best, ipsec_policy_head_t *head, 2125 int direction, ipsec_selector_t *sel, netstack_t *ns) 2126 { 2127 ipsec_policy_t *curbest; 2128 ipsec_policy_root_t *root; 2129 uint8_t is_icmp_inv_acq = sel->ips_is_icmp_inv_acq; 2130 int af = sel->ips_isv4 ? IPSEC_AF_V4 : IPSEC_AF_V6; 2131 2132 curbest = best; 2133 root = &head->iph_root[direction]; 2134 2135 #ifdef DEBUG 2136 if (is_icmp_inv_acq) { 2137 if (sel->ips_isv4) { 2138 if (sel->ips_protocol != IPPROTO_ICMP) { 2139 cmn_err(CE_WARN, "ipsec_find_policy_head:" 2140 " expecting icmp, got %d", 2141 sel->ips_protocol); 2142 } 2143 } else { 2144 if (sel->ips_protocol != IPPROTO_ICMPV6) { 2145 cmn_err(CE_WARN, "ipsec_find_policy_head:" 2146 " expecting icmpv6, got %d", 2147 sel->ips_protocol); 2148 } 2149 } 2150 } 2151 #endif 2152 2153 rw_enter(&head->iph_lock, RW_READER); 2154 2155 if (root->ipr_nchains > 0) { 2156 curbest = ipsec_find_policy_chain(curbest, 2157 root->ipr_hash[selector_hash(sel, root)].hash_head, sel, 2158 is_icmp_inv_acq); 2159 } 2160 curbest = ipsec_find_policy_chain(curbest, root->ipr_nonhash[af], sel, 2161 is_icmp_inv_acq); 2162 2163 /* 2164 * Adjust reference counts if we found anything new. 2165 */ 2166 if (curbest != best) { 2167 ASSERT(curbest != NULL); 2168 IPPOL_REFHOLD(curbest); 2169 2170 if (best != NULL) { 2171 IPPOL_REFRELE(best, ns); 2172 } 2173 } 2174 2175 rw_exit(&head->iph_lock); 2176 2177 return (curbest); 2178 } 2179 2180 /* 2181 * Find the best system policy (either global or per-interface) which 2182 * applies to the given selector; look in all the relevant policy roots 2183 * to figure out which policy wins. 2184 * 2185 * Returns a reference to a policy; caller must release this 2186 * reference when done. 2187 */ 2188 ipsec_policy_t * 2189 ipsec_find_policy(int direction, conn_t *connp, ipsec_out_t *io, 2190 ipsec_selector_t *sel, netstack_t *ns) 2191 { 2192 ipsec_policy_t *p; 2193 ipsec_stack_t *ipss = ns->netstack_ipsec; 2194 2195 p = ipsec_find_policy_head(NULL, &ipss->ipsec_system_policy, 2196 direction, sel, ns); 2197 if ((connp != NULL) && (connp->conn_policy != NULL)) { 2198 p = ipsec_find_policy_head(p, connp->conn_policy, 2199 direction, sel, ns); 2200 } else if ((io != NULL) && (io->ipsec_out_polhead != NULL)) { 2201 p = ipsec_find_policy_head(p, io->ipsec_out_polhead, 2202 direction, sel, ns); 2203 } 2204 2205 return (p); 2206 } 2207 2208 /* 2209 * Check with global policy and see whether this inbound 2210 * packet meets the policy constraints. 2211 * 2212 * Locate appropriate policy from global policy, supplemented by the 2213 * conn's configured and/or cached policy if the conn is supplied. 2214 * 2215 * Dispatch to ipsec_check_ipsecin_policy if we have policy and an 2216 * encrypted packet to see if they match. 2217 * 2218 * Otherwise, see if the policy allows cleartext; if not, drop it on the 2219 * floor. 2220 */ 2221 mblk_t * 2222 ipsec_check_global_policy(mblk_t *first_mp, conn_t *connp, 2223 ipha_t *ipha, ip6_t *ip6h, boolean_t mctl_present, netstack_t *ns) 2224 { 2225 ipsec_policy_t *p; 2226 ipsec_selector_t sel; 2227 mblk_t *data_mp, *ipsec_mp; 2228 boolean_t policy_present; 2229 kstat_named_t *counter; 2230 ipsec_in_t *ii = NULL; 2231 uint64_t pkt_unique; 2232 ipsec_stack_t *ipss = ns->netstack_ipsec; 2233 ip_stack_t *ipst = ns->netstack_ip; 2234 2235 data_mp = mctl_present ? first_mp->b_cont : first_mp; 2236 ipsec_mp = mctl_present ? first_mp : NULL; 2237 2238 sel.ips_is_icmp_inv_acq = 0; 2239 2240 ASSERT((ipha == NULL && ip6h != NULL) || 2241 (ip6h == NULL && ipha != NULL)); 2242 2243 if (ipha != NULL) 2244 policy_present = ipss->ipsec_inbound_v4_policy_present; 2245 else 2246 policy_present = ipss->ipsec_inbound_v6_policy_present; 2247 2248 if (!policy_present && connp == NULL) { 2249 /* 2250 * No global policy and no per-socket policy; 2251 * just pass it back (but we shouldn't get here in that case) 2252 */ 2253 return (first_mp); 2254 } 2255 2256 if (ipsec_mp != NULL) { 2257 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 2258 ii = (ipsec_in_t *)(ipsec_mp->b_rptr); 2259 ASSERT(ii->ipsec_in_type == IPSEC_IN); 2260 } 2261 2262 /* 2263 * If we have cached policy, use it. 2264 * Otherwise consult system policy. 2265 */ 2266 if ((connp != NULL) && (connp->conn_latch != NULL)) { 2267 p = connp->conn_latch->ipl_in_policy; 2268 if (p != NULL) { 2269 IPPOL_REFHOLD(p); 2270 } 2271 /* 2272 * Fudge sel for UNIQUE_ID setting below. 2273 */ 2274 pkt_unique = conn_to_unique(connp, data_mp, ipha, ip6h); 2275 } else { 2276 /* Initialize the ports in the selector */ 2277 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, 2278 SEL_NONE) == SELRET_NOMEM) { 2279 /* 2280 * Technically not a policy mismatch, but it is 2281 * an internal failure. 2282 */ 2283 ipsec_log_policy_failure(IPSEC_POLICY_MISMATCH, 2284 "ipsec_init_inbound_sel", ipha, ip6h, B_FALSE, ns); 2285 counter = DROPPER(ipss, ipds_spd_nomem); 2286 goto fail; 2287 } 2288 2289 /* 2290 * Find the policy which best applies. 2291 * 2292 * If we find global policy, we should look at both 2293 * local policy and global policy and see which is 2294 * stronger and match accordingly. 2295 * 2296 * If we don't find a global policy, check with 2297 * local policy alone. 2298 */ 2299 2300 p = ipsec_find_policy(IPSEC_TYPE_INBOUND, connp, NULL, &sel, 2301 ns); 2302 pkt_unique = SA_UNIQUE_ID(sel.ips_remote_port, 2303 sel.ips_local_port, sel.ips_protocol, 0); 2304 } 2305 2306 if (p == NULL) { 2307 if (ipsec_mp == NULL) { 2308 /* 2309 * We have no policy; default to succeeding. 2310 * XXX paranoid system design doesn't do this. 2311 */ 2312 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2313 return (first_mp); 2314 } else { 2315 counter = DROPPER(ipss, ipds_spd_got_secure); 2316 ipsec_log_policy_failure(IPSEC_POLICY_NOT_NEEDED, 2317 "ipsec_check_global_policy", ipha, ip6h, B_TRUE, 2318 ns); 2319 goto fail; 2320 } 2321 } 2322 if ((ii != NULL) && (ii->ipsec_in_secure)) { 2323 return (ipsec_check_ipsecin_policy(ipsec_mp, p, ipha, ip6h, 2324 pkt_unique, ns)); 2325 } 2326 if (p->ipsp_act->ipa_allow_clear) { 2327 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2328 IPPOL_REFRELE(p, ns); 2329 return (first_mp); 2330 } 2331 IPPOL_REFRELE(p, ns); 2332 /* 2333 * If we reach here, we will drop the packet because it failed the 2334 * global policy check because the packet was cleartext, and it 2335 * should not have been. 2336 */ 2337 ipsec_log_policy_failure(IPSEC_POLICY_MISMATCH, 2338 "ipsec_check_global_policy", ipha, ip6h, B_FALSE, ns); 2339 counter = DROPPER(ipss, ipds_spd_got_clear); 2340 2341 fail: 2342 ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, 2343 &ipss->ipsec_spd_dropper); 2344 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2345 return (NULL); 2346 } 2347 2348 /* 2349 * We check whether an inbound datagram is a valid one 2350 * to accept in clear. If it is secure, it is the job 2351 * of IPSEC to log information appropriately if it 2352 * suspects that it may not be the real one. 2353 * 2354 * It is called only while fanning out to the ULP 2355 * where ULP accepts only secure data and the incoming 2356 * is clear. Usually we never accept clear datagrams in 2357 * such cases. ICMP is the only exception. 2358 * 2359 * NOTE : We don't call this function if the client (ULP) 2360 * is willing to accept things in clear. 2361 */ 2362 boolean_t 2363 ipsec_inbound_accept_clear(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h) 2364 { 2365 ushort_t iph_hdr_length; 2366 icmph_t *icmph; 2367 icmp6_t *icmp6; 2368 uint8_t *nexthdrp; 2369 2370 ASSERT((ipha != NULL && ip6h == NULL) || 2371 (ipha == NULL && ip6h != NULL)); 2372 2373 if (ip6h != NULL) { 2374 iph_hdr_length = ip_hdr_length_v6(mp, ip6h); 2375 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, 2376 &nexthdrp)) { 2377 return (B_FALSE); 2378 } 2379 if (*nexthdrp != IPPROTO_ICMPV6) 2380 return (B_FALSE); 2381 icmp6 = (icmp6_t *)(&mp->b_rptr[iph_hdr_length]); 2382 /* Match IPv6 ICMP policy as closely as IPv4 as possible. */ 2383 switch (icmp6->icmp6_type) { 2384 case ICMP6_PARAM_PROB: 2385 /* Corresponds to port/proto unreach in IPv4. */ 2386 case ICMP6_ECHO_REQUEST: 2387 /* Just like IPv4. */ 2388 return (B_FALSE); 2389 2390 case MLD_LISTENER_QUERY: 2391 case MLD_LISTENER_REPORT: 2392 case MLD_LISTENER_REDUCTION: 2393 /* 2394 * XXX Seperate NDD in IPv4 what about here? 2395 * Plus, mcast is important to ND. 2396 */ 2397 case ICMP6_DST_UNREACH: 2398 /* Corresponds to HOST/NET unreachable in IPv4. */ 2399 case ICMP6_PACKET_TOO_BIG: 2400 case ICMP6_ECHO_REPLY: 2401 /* These are trusted in IPv4. */ 2402 case ND_ROUTER_SOLICIT: 2403 case ND_ROUTER_ADVERT: 2404 case ND_NEIGHBOR_SOLICIT: 2405 case ND_NEIGHBOR_ADVERT: 2406 case ND_REDIRECT: 2407 /* Trust ND messages for now. */ 2408 case ICMP6_TIME_EXCEEDED: 2409 default: 2410 return (B_TRUE); 2411 } 2412 } else { 2413 /* 2414 * If it is not ICMP, fail this request. 2415 */ 2416 if (ipha->ipha_protocol != IPPROTO_ICMP) { 2417 #ifdef FRAGCACHE_DEBUG 2418 cmn_err(CE_WARN, "Dropping - ipha_proto = %d\n", 2419 ipha->ipha_protocol); 2420 #endif 2421 return (B_FALSE); 2422 } 2423 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2424 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 2425 /* 2426 * It is an insecure icmp message. Check to see whether we are 2427 * willing to accept this one. 2428 */ 2429 2430 switch (icmph->icmph_type) { 2431 case ICMP_ECHO_REPLY: 2432 case ICMP_TIME_STAMP_REPLY: 2433 case ICMP_INFO_REPLY: 2434 case ICMP_ROUTER_ADVERTISEMENT: 2435 /* 2436 * We should not encourage clear replies if this 2437 * client expects secure. If somebody is replying 2438 * in clear some mailicious user watching both the 2439 * request and reply, can do chosen-plain-text attacks. 2440 * With global policy we might be just expecting secure 2441 * but sending out clear. We don't know what the right 2442 * thing is. We can't do much here as we can't control 2443 * the sender here. Till we are sure of what to do, 2444 * accept them. 2445 */ 2446 return (B_TRUE); 2447 case ICMP_ECHO_REQUEST: 2448 case ICMP_TIME_STAMP_REQUEST: 2449 case ICMP_INFO_REQUEST: 2450 case ICMP_ADDRESS_MASK_REQUEST: 2451 case ICMP_ROUTER_SOLICITATION: 2452 case ICMP_ADDRESS_MASK_REPLY: 2453 /* 2454 * Don't accept this as somebody could be sending 2455 * us plain text to get encrypted data. If we reply, 2456 * it will lead to chosen plain text attack. 2457 */ 2458 return (B_FALSE); 2459 case ICMP_DEST_UNREACHABLE: 2460 switch (icmph->icmph_code) { 2461 case ICMP_FRAGMENTATION_NEEDED: 2462 /* 2463 * Be in sync with icmp_inbound, where we have 2464 * already set ire_max_frag. 2465 */ 2466 #ifdef FRAGCACHE_DEBUG 2467 cmn_err(CE_WARN, "ICMP frag needed\n"); 2468 #endif 2469 return (B_TRUE); 2470 case ICMP_HOST_UNREACHABLE: 2471 case ICMP_NET_UNREACHABLE: 2472 /* 2473 * By accepting, we could reset a connection. 2474 * How do we solve the problem of some 2475 * intermediate router sending in-secure ICMP 2476 * messages ? 2477 */ 2478 return (B_TRUE); 2479 case ICMP_PORT_UNREACHABLE: 2480 case ICMP_PROTOCOL_UNREACHABLE: 2481 default : 2482 return (B_FALSE); 2483 } 2484 case ICMP_SOURCE_QUENCH: 2485 /* 2486 * If this is an attack, TCP will slow start 2487 * because of this. Is it very harmful ? 2488 */ 2489 return (B_TRUE); 2490 case ICMP_PARAM_PROBLEM: 2491 return (B_FALSE); 2492 case ICMP_TIME_EXCEEDED: 2493 return (B_TRUE); 2494 case ICMP_REDIRECT: 2495 return (B_FALSE); 2496 default : 2497 return (B_FALSE); 2498 } 2499 } 2500 } 2501 2502 void 2503 ipsec_latch_ids(ipsec_latch_t *ipl, ipsid_t *local, ipsid_t *remote) 2504 { 2505 mutex_enter(&ipl->ipl_lock); 2506 2507 if (ipl->ipl_ids_latched) { 2508 /* I lost, someone else got here before me */ 2509 mutex_exit(&ipl->ipl_lock); 2510 return; 2511 } 2512 2513 if (local != NULL) 2514 IPSID_REFHOLD(local); 2515 if (remote != NULL) 2516 IPSID_REFHOLD(remote); 2517 2518 ipl->ipl_local_cid = local; 2519 ipl->ipl_remote_cid = remote; 2520 ipl->ipl_ids_latched = B_TRUE; 2521 mutex_exit(&ipl->ipl_lock); 2522 } 2523 2524 void 2525 ipsec_latch_inbound(ipsec_latch_t *ipl, ipsec_in_t *ii) 2526 { 2527 ipsa_t *sa; 2528 2529 if (!ipl->ipl_ids_latched) { 2530 ipsid_t *local = NULL; 2531 ipsid_t *remote = NULL; 2532 2533 if (!ii->ipsec_in_loopback) { 2534 if (ii->ipsec_in_esp_sa != NULL) 2535 sa = ii->ipsec_in_esp_sa; 2536 else 2537 sa = ii->ipsec_in_ah_sa; 2538 ASSERT(sa != NULL); 2539 local = sa->ipsa_dst_cid; 2540 remote = sa->ipsa_src_cid; 2541 } 2542 ipsec_latch_ids(ipl, local, remote); 2543 } 2544 ipl->ipl_in_action = ii->ipsec_in_action; 2545 IPACT_REFHOLD(ipl->ipl_in_action); 2546 } 2547 2548 /* 2549 * Check whether the policy constraints are met either for an 2550 * inbound datagram; called from IP in numerous places. 2551 * 2552 * Note that this is not a chokepoint for inbound policy checks; 2553 * see also ipsec_check_ipsecin_latch() and ipsec_check_global_policy() 2554 */ 2555 mblk_t * 2556 ipsec_check_inbound_policy(mblk_t *first_mp, conn_t *connp, 2557 ipha_t *ipha, ip6_t *ip6h, boolean_t mctl_present) 2558 { 2559 ipsec_in_t *ii; 2560 boolean_t ret; 2561 mblk_t *mp = mctl_present ? first_mp->b_cont : first_mp; 2562 mblk_t *ipsec_mp = mctl_present ? first_mp : NULL; 2563 ipsec_latch_t *ipl; 2564 uint64_t unique_id; 2565 ipsec_stack_t *ipss; 2566 ip_stack_t *ipst; 2567 netstack_t *ns; 2568 2569 ASSERT(connp != NULL); 2570 ipl = connp->conn_latch; 2571 ns = connp->conn_netstack; 2572 ipss = ns->netstack_ipsec; 2573 ipst = ns->netstack_ip; 2574 2575 if (ipsec_mp == NULL) { 2576 clear: 2577 /* 2578 * This is the case where the incoming datagram is 2579 * cleartext and we need to see whether this client 2580 * would like to receive such untrustworthy things from 2581 * the wire. 2582 */ 2583 ASSERT(mp != NULL); 2584 2585 if (ipl != NULL) { 2586 /* 2587 * Policy is cached in the conn. 2588 */ 2589 if ((ipl->ipl_in_policy != NULL) && 2590 (!ipl->ipl_in_policy->ipsp_act->ipa_allow_clear)) { 2591 ret = ipsec_inbound_accept_clear(mp, 2592 ipha, ip6h); 2593 if (ret) { 2594 BUMP_MIB(&ipst->ips_ip_mib, 2595 ipsecInSucceeded); 2596 return (first_mp); 2597 } else { 2598 ipsec_log_policy_failure( 2599 IPSEC_POLICY_MISMATCH, 2600 "ipsec_check_inbound_policy", ipha, 2601 ip6h, B_FALSE, ns); 2602 ip_drop_packet(first_mp, B_TRUE, NULL, 2603 NULL, 2604 DROPPER(ipss, ipds_spd_got_clear), 2605 &ipss->ipsec_spd_dropper); 2606 BUMP_MIB(&ipst->ips_ip_mib, 2607 ipsecInFailed); 2608 return (NULL); 2609 } 2610 } else { 2611 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2612 return (first_mp); 2613 } 2614 } else { 2615 /* 2616 * As this is a non-hardbound connection we need 2617 * to look at both per-socket policy and global 2618 * policy. As this is cleartext, mark the mp as 2619 * M_DATA in case if it is an ICMP error being 2620 * reported before calling ipsec_check_global_policy 2621 * so that it does not mistake it for IPSEC_IN. 2622 */ 2623 uchar_t db_type = mp->b_datap->db_type; 2624 mp->b_datap->db_type = M_DATA; 2625 first_mp = ipsec_check_global_policy(first_mp, connp, 2626 ipha, ip6h, mctl_present, ns); 2627 if (first_mp != NULL) 2628 mp->b_datap->db_type = db_type; 2629 return (first_mp); 2630 } 2631 } 2632 /* 2633 * If it is inbound check whether the attached message 2634 * is secure or not. We have a special case for ICMP, 2635 * where we have a IPSEC_IN message and the attached 2636 * message is not secure. See icmp_inbound_error_fanout 2637 * for details. 2638 */ 2639 ASSERT(ipsec_mp != NULL); 2640 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 2641 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 2642 2643 if (!ii->ipsec_in_secure) 2644 goto clear; 2645 2646 /* 2647 * mp->b_cont could be either a M_CTL message 2648 * for icmp errors being sent up or a M_DATA message. 2649 */ 2650 ASSERT(mp->b_datap->db_type == M_CTL || mp->b_datap->db_type == M_DATA); 2651 2652 ASSERT(ii->ipsec_in_type == IPSEC_IN); 2653 2654 if (ipl == NULL) { 2655 /* 2656 * We don't have policies cached in the conn 2657 * for this stream. So, look at the global 2658 * policy. It will check against conn or global 2659 * depending on whichever is stronger. 2660 */ 2661 return (ipsec_check_global_policy(first_mp, connp, 2662 ipha, ip6h, mctl_present, ns)); 2663 } 2664 2665 if (ipl->ipl_in_action != NULL) { 2666 /* Policy is cached & latched; fast(er) path */ 2667 const char *reason; 2668 kstat_named_t *counter; 2669 2670 if (ipsec_check_ipsecin_latch(ii, mp, ipl, 2671 ipha, ip6h, &reason, &counter, connp)) { 2672 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2673 return (first_mp); 2674 } 2675 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, 2676 SL_ERROR|SL_WARN|SL_CONSOLE, 2677 "ipsec inbound policy mismatch: %s, packet dropped\n", 2678 reason); 2679 ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, 2680 &ipss->ipsec_spd_dropper); 2681 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2682 return (NULL); 2683 } else if (ipl->ipl_in_policy == NULL) { 2684 ipsec_weird_null_inbound_policy++; 2685 return (first_mp); 2686 } 2687 2688 unique_id = conn_to_unique(connp, mp, ipha, ip6h); 2689 IPPOL_REFHOLD(ipl->ipl_in_policy); 2690 first_mp = ipsec_check_ipsecin_policy(first_mp, ipl->ipl_in_policy, 2691 ipha, ip6h, unique_id, ns); 2692 /* 2693 * NOTE: ipsecIn{Failed,Succeeeded} bumped by 2694 * ipsec_check_ipsecin_policy(). 2695 */ 2696 if (first_mp != NULL) 2697 ipsec_latch_inbound(ipl, ii); 2698 return (first_mp); 2699 } 2700 2701 /* 2702 * Returns: 2703 * 2704 * SELRET_NOMEM --> msgpullup() needed to gather things failed. 2705 * SELRET_BADPKT --> If we're being called after tunnel-mode fragment 2706 * gathering, the initial fragment is too short for 2707 * useful data. Only returned if SEL_TUNNEL_FIRSTFRAG is 2708 * set. 2709 * SELRET_SUCCESS --> "sel" now has initialized IPsec selector data. 2710 * SELRET_TUNFRAG --> This is a fragment in a tunnel-mode packet. Caller 2711 * should put this packet in a fragment-gathering queue. 2712 * Only returned if SEL_TUNNEL_MODE and SEL_PORT_POLICY 2713 * is set. 2714 */ 2715 static selret_t 2716 ipsec_init_inbound_sel(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha, 2717 ip6_t *ip6h, uint8_t sel_flags) 2718 { 2719 uint16_t *ports; 2720 ushort_t hdr_len; 2721 int outer_hdr_len = 0; /* For ICMP tunnel-mode cases... */ 2722 mblk_t *spare_mp = NULL; 2723 uint8_t *nexthdrp; 2724 uint8_t nexthdr; 2725 uint8_t *typecode; 2726 uint8_t check_proto; 2727 ip6_pkt_t ipp; 2728 boolean_t port_policy_present = (sel_flags & SEL_PORT_POLICY); 2729 boolean_t is_icmp = (sel_flags & SEL_IS_ICMP); 2730 boolean_t tunnel_mode = (sel_flags & SEL_TUNNEL_MODE); 2731 2732 ASSERT((ipha == NULL && ip6h != NULL) || 2733 (ipha != NULL && ip6h == NULL)); 2734 2735 if (ip6h != NULL) { 2736 if (is_icmp) 2737 outer_hdr_len = ((uint8_t *)ip6h) - mp->b_rptr; 2738 2739 check_proto = IPPROTO_ICMPV6; 2740 sel->ips_isv4 = B_FALSE; 2741 sel->ips_local_addr_v6 = ip6h->ip6_dst; 2742 sel->ips_remote_addr_v6 = ip6h->ip6_src; 2743 2744 bzero(&ipp, sizeof (ipp)); 2745 (void) ip_find_hdr_v6(mp, ip6h, &ipp, NULL); 2746 2747 nexthdr = ip6h->ip6_nxt; 2748 switch (nexthdr) { 2749 case IPPROTO_HOPOPTS: 2750 case IPPROTO_ROUTING: 2751 case IPPROTO_DSTOPTS: 2752 case IPPROTO_FRAGMENT: 2753 /* 2754 * Use ip_hdr_length_nexthdr_v6(). And have a spare 2755 * mblk that's contiguous to feed it 2756 */ 2757 if ((spare_mp = msgpullup(mp, -1)) == NULL) 2758 return (SELRET_NOMEM); 2759 if (!ip_hdr_length_nexthdr_v6(spare_mp, 2760 (ip6_t *)(spare_mp->b_rptr + outer_hdr_len), 2761 &hdr_len, &nexthdrp)) { 2762 /* Malformed packet - caller frees. */ 2763 ipsec_freemsg_chain(spare_mp); 2764 return (SELRET_BADPKT); 2765 } 2766 nexthdr = *nexthdrp; 2767 /* We can just extract based on hdr_len now. */ 2768 break; 2769 default: 2770 hdr_len = IPV6_HDR_LEN; 2771 break; 2772 } 2773 2774 if (port_policy_present && IS_V6_FRAGMENT(ipp) && !is_icmp) { 2775 /* IPv6 Fragment */ 2776 ipsec_freemsg_chain(spare_mp); 2777 return (SELRET_TUNFRAG); 2778 } 2779 } else { 2780 if (is_icmp) 2781 outer_hdr_len = ((uint8_t *)ipha) - mp->b_rptr; 2782 check_proto = IPPROTO_ICMP; 2783 sel->ips_isv4 = B_TRUE; 2784 sel->ips_local_addr_v4 = ipha->ipha_dst; 2785 sel->ips_remote_addr_v4 = ipha->ipha_src; 2786 nexthdr = ipha->ipha_protocol; 2787 hdr_len = IPH_HDR_LENGTH(ipha); 2788 2789 if (port_policy_present && 2790 IS_V4_FRAGMENT(ipha->ipha_fragment_offset_and_flags) && 2791 !is_icmp) { 2792 /* IPv4 Fragment */ 2793 ipsec_freemsg_chain(spare_mp); 2794 return (SELRET_TUNFRAG); 2795 } 2796 2797 } 2798 sel->ips_protocol = nexthdr; 2799 2800 if ((nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP && 2801 nexthdr != IPPROTO_SCTP && nexthdr != check_proto) || 2802 (!port_policy_present && tunnel_mode)) { 2803 sel->ips_remote_port = sel->ips_local_port = 0; 2804 ipsec_freemsg_chain(spare_mp); 2805 return (SELRET_SUCCESS); 2806 } 2807 2808 if (&mp->b_rptr[hdr_len] + 4 > mp->b_wptr) { 2809 /* If we didn't pullup a copy already, do so now. */ 2810 /* 2811 * XXX performance, will upper-layers frequently split TCP/UDP 2812 * apart from IP or options? If so, perhaps we should revisit 2813 * the spare_mp strategy. 2814 */ 2815 ipsec_hdr_pullup_needed++; 2816 if (spare_mp == NULL && 2817 (spare_mp = msgpullup(mp, -1)) == NULL) { 2818 return (SELRET_NOMEM); 2819 } 2820 ports = (uint16_t *)&spare_mp->b_rptr[hdr_len + outer_hdr_len]; 2821 } else { 2822 ports = (uint16_t *)&mp->b_rptr[hdr_len + outer_hdr_len]; 2823 } 2824 2825 if (nexthdr == check_proto) { 2826 typecode = (uint8_t *)ports; 2827 sel->ips_icmp_type = *typecode++; 2828 sel->ips_icmp_code = *typecode; 2829 sel->ips_remote_port = sel->ips_local_port = 0; 2830 } else { 2831 sel->ips_remote_port = *ports++; 2832 sel->ips_local_port = *ports; 2833 } 2834 ipsec_freemsg_chain(spare_mp); 2835 return (SELRET_SUCCESS); 2836 } 2837 2838 static boolean_t 2839 ipsec_init_outbound_ports(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha, 2840 ip6_t *ip6h, int outer_hdr_len, ipsec_stack_t *ipss) 2841 { 2842 /* 2843 * XXX cut&paste shared with ipsec_init_inbound_sel 2844 */ 2845 uint16_t *ports; 2846 ushort_t hdr_len; 2847 mblk_t *spare_mp = NULL; 2848 uint8_t *nexthdrp; 2849 uint8_t nexthdr; 2850 uint8_t *typecode; 2851 uint8_t check_proto; 2852 2853 ASSERT((ipha == NULL && ip6h != NULL) || 2854 (ipha != NULL && ip6h == NULL)); 2855 2856 if (ip6h != NULL) { 2857 check_proto = IPPROTO_ICMPV6; 2858 nexthdr = ip6h->ip6_nxt; 2859 switch (nexthdr) { 2860 case IPPROTO_HOPOPTS: 2861 case IPPROTO_ROUTING: 2862 case IPPROTO_DSTOPTS: 2863 case IPPROTO_FRAGMENT: 2864 /* 2865 * Use ip_hdr_length_nexthdr_v6(). And have a spare 2866 * mblk that's contiguous to feed it 2867 */ 2868 spare_mp = msgpullup(mp, -1); 2869 if (spare_mp == NULL || 2870 !ip_hdr_length_nexthdr_v6(spare_mp, 2871 (ip6_t *)(spare_mp->b_rptr + outer_hdr_len), 2872 &hdr_len, &nexthdrp)) { 2873 /* Always works, even if NULL. */ 2874 ipsec_freemsg_chain(spare_mp); 2875 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 2876 DROPPER(ipss, ipds_spd_nomem), 2877 &ipss->ipsec_spd_dropper); 2878 return (B_FALSE); 2879 } else { 2880 nexthdr = *nexthdrp; 2881 /* We can just extract based on hdr_len now. */ 2882 } 2883 break; 2884 default: 2885 hdr_len = IPV6_HDR_LEN; 2886 break; 2887 } 2888 } else { 2889 check_proto = IPPROTO_ICMP; 2890 hdr_len = IPH_HDR_LENGTH(ipha); 2891 nexthdr = ipha->ipha_protocol; 2892 } 2893 2894 sel->ips_protocol = nexthdr; 2895 if (nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP && 2896 nexthdr != IPPROTO_SCTP && nexthdr != check_proto) { 2897 sel->ips_local_port = sel->ips_remote_port = 0; 2898 ipsec_freemsg_chain(spare_mp); /* Always works, even if NULL */ 2899 return (B_TRUE); 2900 } 2901 2902 if (&mp->b_rptr[hdr_len] + 4 + outer_hdr_len > mp->b_wptr) { 2903 /* If we didn't pullup a copy already, do so now. */ 2904 /* 2905 * XXX performance, will upper-layers frequently split TCP/UDP 2906 * apart from IP or options? If so, perhaps we should revisit 2907 * the spare_mp strategy. 2908 * 2909 * XXX should this be msgpullup(mp, hdr_len+4) ??? 2910 */ 2911 if (spare_mp == NULL && 2912 (spare_mp = msgpullup(mp, -1)) == NULL) { 2913 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 2914 DROPPER(ipss, ipds_spd_nomem), 2915 &ipss->ipsec_spd_dropper); 2916 return (B_FALSE); 2917 } 2918 ports = (uint16_t *)&spare_mp->b_rptr[hdr_len + outer_hdr_len]; 2919 } else { 2920 ports = (uint16_t *)&mp->b_rptr[hdr_len + outer_hdr_len]; 2921 } 2922 2923 if (nexthdr == check_proto) { 2924 typecode = (uint8_t *)ports; 2925 sel->ips_icmp_type = *typecode++; 2926 sel->ips_icmp_code = *typecode; 2927 sel->ips_remote_port = sel->ips_local_port = 0; 2928 } else { 2929 sel->ips_local_port = *ports++; 2930 sel->ips_remote_port = *ports; 2931 } 2932 ipsec_freemsg_chain(spare_mp); /* Always works, even if NULL */ 2933 return (B_TRUE); 2934 } 2935 2936 /* 2937 * Create an ipsec_action_t based on the way an inbound packet was protected. 2938 * Used to reflect traffic back to a sender. 2939 * 2940 * We don't bother interning the action into the hash table. 2941 */ 2942 ipsec_action_t * 2943 ipsec_in_to_out_action(ipsec_in_t *ii) 2944 { 2945 ipsa_t *ah_assoc, *esp_assoc; 2946 uint_t auth_alg = 0, encr_alg = 0, espa_alg = 0; 2947 ipsec_action_t *ap; 2948 boolean_t unique; 2949 2950 ap = kmem_cache_alloc(ipsec_action_cache, KM_NOSLEEP); 2951 2952 if (ap == NULL) 2953 return (NULL); 2954 2955 bzero(ap, sizeof (*ap)); 2956 HASH_NULL(ap, ipa_hash); 2957 ap->ipa_next = NULL; 2958 ap->ipa_refs = 1; 2959 2960 /* 2961 * Get the algorithms that were used for this packet. 2962 */ 2963 ap->ipa_act.ipa_type = IPSEC_ACT_APPLY; 2964 ap->ipa_act.ipa_log = 0; 2965 ah_assoc = ii->ipsec_in_ah_sa; 2966 ap->ipa_act.ipa_apply.ipp_use_ah = (ah_assoc != NULL); 2967 2968 esp_assoc = ii->ipsec_in_esp_sa; 2969 ap->ipa_act.ipa_apply.ipp_use_esp = (esp_assoc != NULL); 2970 2971 if (esp_assoc != NULL) { 2972 encr_alg = esp_assoc->ipsa_encr_alg; 2973 espa_alg = esp_assoc->ipsa_auth_alg; 2974 ap->ipa_act.ipa_apply.ipp_use_espa = (espa_alg != 0); 2975 } 2976 if (ah_assoc != NULL) 2977 auth_alg = ah_assoc->ipsa_auth_alg; 2978 2979 ap->ipa_act.ipa_apply.ipp_encr_alg = (uint8_t)encr_alg; 2980 ap->ipa_act.ipa_apply.ipp_auth_alg = (uint8_t)auth_alg; 2981 ap->ipa_act.ipa_apply.ipp_esp_auth_alg = (uint8_t)espa_alg; 2982 ap->ipa_act.ipa_apply.ipp_use_se = ii->ipsec_in_decaps; 2983 unique = B_FALSE; 2984 2985 if (esp_assoc != NULL) { 2986 ap->ipa_act.ipa_apply.ipp_espa_minbits = 2987 esp_assoc->ipsa_authkeybits; 2988 ap->ipa_act.ipa_apply.ipp_espa_maxbits = 2989 esp_assoc->ipsa_authkeybits; 2990 ap->ipa_act.ipa_apply.ipp_espe_minbits = 2991 esp_assoc->ipsa_encrkeybits; 2992 ap->ipa_act.ipa_apply.ipp_espe_maxbits = 2993 esp_assoc->ipsa_encrkeybits; 2994 ap->ipa_act.ipa_apply.ipp_km_proto = esp_assoc->ipsa_kmp; 2995 ap->ipa_act.ipa_apply.ipp_km_cookie = esp_assoc->ipsa_kmc; 2996 if (esp_assoc->ipsa_flags & IPSA_F_UNIQUE) 2997 unique = B_TRUE; 2998 } 2999 if (ah_assoc != NULL) { 3000 ap->ipa_act.ipa_apply.ipp_ah_minbits = 3001 ah_assoc->ipsa_authkeybits; 3002 ap->ipa_act.ipa_apply.ipp_ah_maxbits = 3003 ah_assoc->ipsa_authkeybits; 3004 ap->ipa_act.ipa_apply.ipp_km_proto = ah_assoc->ipsa_kmp; 3005 ap->ipa_act.ipa_apply.ipp_km_cookie = ah_assoc->ipsa_kmc; 3006 if (ah_assoc->ipsa_flags & IPSA_F_UNIQUE) 3007 unique = B_TRUE; 3008 } 3009 ap->ipa_act.ipa_apply.ipp_use_unique = unique; 3010 ap->ipa_want_unique = unique; 3011 ap->ipa_allow_clear = B_FALSE; 3012 ap->ipa_want_se = ii->ipsec_in_decaps; 3013 ap->ipa_want_ah = (ah_assoc != NULL); 3014 ap->ipa_want_esp = (esp_assoc != NULL); 3015 3016 ap->ipa_ovhd = ipsec_act_ovhd(&ap->ipa_act); 3017 3018 ap->ipa_act.ipa_apply.ipp_replay_depth = 0; /* don't care */ 3019 3020 return (ap); 3021 } 3022 3023 3024 /* 3025 * Compute the worst-case amount of extra space required by an action. 3026 * Note that, because of the ESP considerations listed below, this is 3027 * actually not the same as the best-case reduction in the MTU; in the 3028 * future, we should pass additional information to this function to 3029 * allow the actual MTU impact to be computed. 3030 * 3031 * AH: Revisit this if we implement algorithms with 3032 * a verifier size of more than 12 bytes. 3033 * 3034 * ESP: A more exact but more messy computation would take into 3035 * account the interaction between the cipher block size and the 3036 * effective MTU, yielding the inner payload size which reflects a 3037 * packet with *minimum* ESP padding.. 3038 */ 3039 int32_t 3040 ipsec_act_ovhd(const ipsec_act_t *act) 3041 { 3042 int32_t overhead = 0; 3043 3044 if (act->ipa_type == IPSEC_ACT_APPLY) { 3045 const ipsec_prot_t *ipp = &act->ipa_apply; 3046 3047 if (ipp->ipp_use_ah) 3048 overhead += IPSEC_MAX_AH_HDR_SIZE; 3049 if (ipp->ipp_use_esp) { 3050 overhead += IPSEC_MAX_ESP_HDR_SIZE; 3051 overhead += sizeof (struct udphdr); 3052 } 3053 if (ipp->ipp_use_se) 3054 overhead += IP_SIMPLE_HDR_LENGTH; 3055 } 3056 return (overhead); 3057 } 3058 3059 /* 3060 * This hash function is used only when creating policies and thus is not 3061 * performance-critical for packet flows. 3062 * 3063 * Future work: canonicalize the structures hashed with this (i.e., 3064 * zeroize padding) so the hash works correctly. 3065 */ 3066 /* ARGSUSED */ 3067 static uint32_t 3068 policy_hash(int size, const void *start, const void *end) 3069 { 3070 return (0); 3071 } 3072 3073 3074 /* 3075 * Hash function macros for each address type. 3076 * 3077 * The IPV6 hash function assumes that the low order 32-bits of the 3078 * address (typically containing the low order 24 bits of the mac 3079 * address) are reasonably well-distributed. Revisit this if we run 3080 * into trouble from lots of collisions on ::1 addresses and the like 3081 * (seems unlikely). 3082 */ 3083 #define IPSEC_IPV4_HASH(a, n) ((a) % (n)) 3084 #define IPSEC_IPV6_HASH(a, n) (((a).s6_addr32[3]) % (n)) 3085 3086 /* 3087 * These two hash functions should produce coordinated values 3088 * but have slightly different roles. 3089 */ 3090 static uint32_t 3091 selkey_hash(const ipsec_selkey_t *selkey, netstack_t *ns) 3092 { 3093 uint32_t valid = selkey->ipsl_valid; 3094 ipsec_stack_t *ipss = ns->netstack_ipsec; 3095 3096 if (!(valid & IPSL_REMOTE_ADDR)) 3097 return (IPSEC_SEL_NOHASH); 3098 3099 if (valid & IPSL_IPV4) { 3100 if (selkey->ipsl_remote_pfxlen == 32) { 3101 return (IPSEC_IPV4_HASH(selkey->ipsl_remote.ipsad_v4, 3102 ipss->ipsec_spd_hashsize)); 3103 } 3104 } 3105 if (valid & IPSL_IPV6) { 3106 if (selkey->ipsl_remote_pfxlen == 128) { 3107 return (IPSEC_IPV6_HASH(selkey->ipsl_remote.ipsad_v6, 3108 ipss->ipsec_spd_hashsize)); 3109 } 3110 } 3111 return (IPSEC_SEL_NOHASH); 3112 } 3113 3114 static uint32_t 3115 selector_hash(ipsec_selector_t *sel, ipsec_policy_root_t *root) 3116 { 3117 if (sel->ips_isv4) { 3118 return (IPSEC_IPV4_HASH(sel->ips_remote_addr_v4, 3119 root->ipr_nchains)); 3120 } 3121 return (IPSEC_IPV6_HASH(sel->ips_remote_addr_v6, root->ipr_nchains)); 3122 } 3123 3124 /* 3125 * Intern actions into the action hash table. 3126 */ 3127 ipsec_action_t * 3128 ipsec_act_find(const ipsec_act_t *a, int n, netstack_t *ns) 3129 { 3130 int i; 3131 uint32_t hval; 3132 ipsec_action_t *ap; 3133 ipsec_action_t *prev = NULL; 3134 int32_t overhead, maxovhd = 0; 3135 boolean_t allow_clear = B_FALSE; 3136 boolean_t want_ah = B_FALSE; 3137 boolean_t want_esp = B_FALSE; 3138 boolean_t want_se = B_FALSE; 3139 boolean_t want_unique = B_FALSE; 3140 ipsec_stack_t *ipss = ns->netstack_ipsec; 3141 3142 /* 3143 * TODO: should canonicalize a[] (i.e., zeroize any padding) 3144 * so we can use a non-trivial policy_hash function. 3145 */ 3146 for (i = n-1; i >= 0; i--) { 3147 hval = policy_hash(IPSEC_ACTION_HASH_SIZE, &a[i], &a[n]); 3148 3149 HASH_LOCK(ipss->ipsec_action_hash, hval); 3150 3151 for (HASH_ITERATE(ap, ipa_hash, 3152 ipss->ipsec_action_hash, hval)) { 3153 if (bcmp(&ap->ipa_act, &a[i], sizeof (*a)) != 0) 3154 continue; 3155 if (ap->ipa_next != prev) 3156 continue; 3157 break; 3158 } 3159 if (ap != NULL) { 3160 HASH_UNLOCK(ipss->ipsec_action_hash, hval); 3161 prev = ap; 3162 continue; 3163 } 3164 /* 3165 * need to allocate a new one.. 3166 */ 3167 ap = kmem_cache_alloc(ipsec_action_cache, KM_NOSLEEP); 3168 if (ap == NULL) { 3169 HASH_UNLOCK(ipss->ipsec_action_hash, hval); 3170 if (prev != NULL) 3171 ipsec_action_free(prev); 3172 return (NULL); 3173 } 3174 HASH_INSERT(ap, ipa_hash, ipss->ipsec_action_hash, hval); 3175 3176 ap->ipa_next = prev; 3177 ap->ipa_act = a[i]; 3178 3179 overhead = ipsec_act_ovhd(&a[i]); 3180 if (maxovhd < overhead) 3181 maxovhd = overhead; 3182 3183 if ((a[i].ipa_type == IPSEC_ACT_BYPASS) || 3184 (a[i].ipa_type == IPSEC_ACT_CLEAR)) 3185 allow_clear = B_TRUE; 3186 if (a[i].ipa_type == IPSEC_ACT_APPLY) { 3187 const ipsec_prot_t *ipp = &a[i].ipa_apply; 3188 3189 ASSERT(ipp->ipp_use_ah || ipp->ipp_use_esp); 3190 want_ah |= ipp->ipp_use_ah; 3191 want_esp |= ipp->ipp_use_esp; 3192 want_se |= ipp->ipp_use_se; 3193 want_unique |= ipp->ipp_use_unique; 3194 } 3195 ap->ipa_allow_clear = allow_clear; 3196 ap->ipa_want_ah = want_ah; 3197 ap->ipa_want_esp = want_esp; 3198 ap->ipa_want_se = want_se; 3199 ap->ipa_want_unique = want_unique; 3200 ap->ipa_refs = 1; /* from the hash table */ 3201 ap->ipa_ovhd = maxovhd; 3202 if (prev) 3203 prev->ipa_refs++; 3204 prev = ap; 3205 HASH_UNLOCK(ipss->ipsec_action_hash, hval); 3206 } 3207 3208 ap->ipa_refs++; /* caller's reference */ 3209 3210 return (ap); 3211 } 3212 3213 /* 3214 * Called when refcount goes to 0, indicating that all references to this 3215 * node are gone. 3216 * 3217 * This does not unchain the action from the hash table. 3218 */ 3219 void 3220 ipsec_action_free(ipsec_action_t *ap) 3221 { 3222 for (;;) { 3223 ipsec_action_t *np = ap->ipa_next; 3224 ASSERT(ap->ipa_refs == 0); 3225 ASSERT(ap->ipa_hash.hash_pp == NULL); 3226 kmem_cache_free(ipsec_action_cache, ap); 3227 ap = np; 3228 /* Inlined IPACT_REFRELE -- avoid recursion */ 3229 if (ap == NULL) 3230 break; 3231 membar_exit(); 3232 if (atomic_add_32_nv(&(ap)->ipa_refs, -1) != 0) 3233 break; 3234 /* End inlined IPACT_REFRELE */ 3235 } 3236 } 3237 3238 /* 3239 * Called when the action hash table goes away. 3240 * 3241 * The actions can be queued on an mblk with ipsec_in or 3242 * ipsec_out, hence the actions might still be around. 3243 * But we decrement ipa_refs here since we no longer have 3244 * a reference to the action from the hash table. 3245 */ 3246 static void 3247 ipsec_action_free_table(ipsec_action_t *ap) 3248 { 3249 while (ap != NULL) { 3250 ipsec_action_t *np = ap->ipa_next; 3251 3252 /* FIXME: remove? */ 3253 (void) printf("ipsec_action_free_table(%p) ref %d\n", 3254 (void *)ap, ap->ipa_refs); 3255 ASSERT(ap->ipa_refs > 0); 3256 IPACT_REFRELE(ap); 3257 ap = np; 3258 } 3259 } 3260 3261 /* 3262 * Need to walk all stack instances since the reclaim function 3263 * is global for all instances 3264 */ 3265 /* ARGSUSED */ 3266 static void 3267 ipsec_action_reclaim(void *arg) 3268 { 3269 netstack_handle_t nh; 3270 netstack_t *ns; 3271 3272 netstack_next_init(&nh); 3273 while ((ns = netstack_next(&nh)) != NULL) { 3274 ipsec_action_reclaim_stack(ns); 3275 netstack_rele(ns); 3276 } 3277 netstack_next_fini(&nh); 3278 } 3279 3280 /* 3281 * Periodically sweep action hash table for actions with refcount==1, and 3282 * nuke them. We cannot do this "on demand" (i.e., from IPACT_REFRELE) 3283 * because we can't close the race between another thread finding the action 3284 * in the hash table without holding the bucket lock during IPACT_REFRELE. 3285 * Instead, we run this function sporadically to clean up after ourselves; 3286 * we also set it as the "reclaim" function for the action kmem_cache. 3287 * 3288 * Note that it may take several passes of ipsec_action_gc() to free all 3289 * "stale" actions. 3290 */ 3291 static void 3292 ipsec_action_reclaim_stack(netstack_t *ns) 3293 { 3294 int i; 3295 ipsec_stack_t *ipss = ns->netstack_ipsec; 3296 3297 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) { 3298 ipsec_action_t *ap, *np; 3299 3300 /* skip the lock if nobody home */ 3301 if (ipss->ipsec_action_hash[i].hash_head == NULL) 3302 continue; 3303 3304 HASH_LOCK(ipss->ipsec_action_hash, i); 3305 for (ap = ipss->ipsec_action_hash[i].hash_head; 3306 ap != NULL; ap = np) { 3307 ASSERT(ap->ipa_refs > 0); 3308 np = ap->ipa_hash.hash_next; 3309 if (ap->ipa_refs > 1) 3310 continue; 3311 HASH_UNCHAIN(ap, ipa_hash, 3312 ipss->ipsec_action_hash, i); 3313 IPACT_REFRELE(ap); 3314 } 3315 HASH_UNLOCK(ipss->ipsec_action_hash, i); 3316 } 3317 } 3318 3319 /* 3320 * Intern a selector set into the selector set hash table. 3321 * This is simpler than the actions case.. 3322 */ 3323 static ipsec_sel_t * 3324 ipsec_find_sel(ipsec_selkey_t *selkey, netstack_t *ns) 3325 { 3326 ipsec_sel_t *sp; 3327 uint32_t hval, bucket; 3328 ipsec_stack_t *ipss = ns->netstack_ipsec; 3329 3330 /* 3331 * Exactly one AF bit should be set in selkey. 3332 */ 3333 ASSERT(!(selkey->ipsl_valid & IPSL_IPV4) ^ 3334 !(selkey->ipsl_valid & IPSL_IPV6)); 3335 3336 hval = selkey_hash(selkey, ns); 3337 /* Set pol_hval to uninitialized until we put it in a polhead. */ 3338 selkey->ipsl_sel_hval = hval; 3339 3340 bucket = (hval == IPSEC_SEL_NOHASH) ? 0 : hval; 3341 3342 ASSERT(!HASH_LOCKED(ipss->ipsec_sel_hash, bucket)); 3343 HASH_LOCK(ipss->ipsec_sel_hash, bucket); 3344 3345 for (HASH_ITERATE(sp, ipsl_hash, ipss->ipsec_sel_hash, bucket)) { 3346 if (bcmp(&sp->ipsl_key, selkey, 3347 offsetof(ipsec_selkey_t, ipsl_pol_hval)) == 0) 3348 break; 3349 } 3350 if (sp != NULL) { 3351 sp->ipsl_refs++; 3352 3353 HASH_UNLOCK(ipss->ipsec_sel_hash, bucket); 3354 return (sp); 3355 } 3356 3357 sp = kmem_cache_alloc(ipsec_sel_cache, KM_NOSLEEP); 3358 if (sp == NULL) { 3359 HASH_UNLOCK(ipss->ipsec_sel_hash, bucket); 3360 return (NULL); 3361 } 3362 3363 HASH_INSERT(sp, ipsl_hash, ipss->ipsec_sel_hash, bucket); 3364 sp->ipsl_refs = 2; /* one for hash table, one for caller */ 3365 sp->ipsl_key = *selkey; 3366 /* Set to uninitalized and have insertion into polhead fix things. */ 3367 if (selkey->ipsl_sel_hval != IPSEC_SEL_NOHASH) 3368 sp->ipsl_key.ipsl_pol_hval = 0; 3369 else 3370 sp->ipsl_key.ipsl_pol_hval = IPSEC_SEL_NOHASH; 3371 3372 HASH_UNLOCK(ipss->ipsec_sel_hash, bucket); 3373 3374 return (sp); 3375 } 3376 3377 static void 3378 ipsec_sel_rel(ipsec_sel_t **spp, netstack_t *ns) 3379 { 3380 ipsec_sel_t *sp = *spp; 3381 int hval = sp->ipsl_key.ipsl_sel_hval; 3382 ipsec_stack_t *ipss = ns->netstack_ipsec; 3383 3384 *spp = NULL; 3385 3386 if (hval == IPSEC_SEL_NOHASH) 3387 hval = 0; 3388 3389 ASSERT(!HASH_LOCKED(ipss->ipsec_sel_hash, hval)); 3390 HASH_LOCK(ipss->ipsec_sel_hash, hval); 3391 if (--sp->ipsl_refs == 1) { 3392 HASH_UNCHAIN(sp, ipsl_hash, ipss->ipsec_sel_hash, hval); 3393 sp->ipsl_refs--; 3394 HASH_UNLOCK(ipss->ipsec_sel_hash, hval); 3395 ASSERT(sp->ipsl_refs == 0); 3396 kmem_cache_free(ipsec_sel_cache, sp); 3397 /* Caller unlocks */ 3398 return; 3399 } 3400 3401 HASH_UNLOCK(ipss->ipsec_sel_hash, hval); 3402 } 3403 3404 /* 3405 * Free a policy rule which we know is no longer being referenced. 3406 */ 3407 void 3408 ipsec_policy_free(ipsec_policy_t *ipp, netstack_t *ns) 3409 { 3410 ASSERT(ipp->ipsp_refs == 0); 3411 ASSERT(ipp->ipsp_sel != NULL); 3412 ASSERT(ipp->ipsp_act != NULL); 3413 3414 ipsec_sel_rel(&ipp->ipsp_sel, ns); 3415 IPACT_REFRELE(ipp->ipsp_act); 3416 kmem_cache_free(ipsec_pol_cache, ipp); 3417 } 3418 3419 /* 3420 * Construction of new policy rules; construct a policy, and add it to 3421 * the appropriate tables. 3422 */ 3423 ipsec_policy_t * 3424 ipsec_policy_create(ipsec_selkey_t *keys, const ipsec_act_t *a, 3425 int nacts, int prio, uint64_t *index_ptr, netstack_t *ns) 3426 { 3427 ipsec_action_t *ap; 3428 ipsec_sel_t *sp; 3429 ipsec_policy_t *ipp; 3430 ipsec_stack_t *ipss = ns->netstack_ipsec; 3431 3432 if (index_ptr == NULL) 3433 index_ptr = &ipss->ipsec_next_policy_index; 3434 3435 ipp = kmem_cache_alloc(ipsec_pol_cache, KM_NOSLEEP); 3436 ap = ipsec_act_find(a, nacts, ns); 3437 sp = ipsec_find_sel(keys, ns); 3438 3439 if ((ap == NULL) || (sp == NULL) || (ipp == NULL)) { 3440 if (ap != NULL) { 3441 IPACT_REFRELE(ap); 3442 } 3443 if (sp != NULL) 3444 ipsec_sel_rel(&sp, ns); 3445 if (ipp != NULL) 3446 kmem_cache_free(ipsec_pol_cache, ipp); 3447 return (NULL); 3448 } 3449 3450 HASH_NULL(ipp, ipsp_hash); 3451 3452 ipp->ipsp_refs = 1; /* caller's reference */ 3453 ipp->ipsp_sel = sp; 3454 ipp->ipsp_act = ap; 3455 ipp->ipsp_prio = prio; /* rule priority */ 3456 ipp->ipsp_index = *index_ptr; 3457 (*index_ptr)++; 3458 3459 return (ipp); 3460 } 3461 3462 static void 3463 ipsec_update_present_flags(ipsec_stack_t *ipss) 3464 { 3465 boolean_t hashpol; 3466 3467 hashpol = (avl_numnodes(&ipss->ipsec_system_policy.iph_rulebyid) > 0); 3468 3469 if (hashpol) { 3470 ipss->ipsec_outbound_v4_policy_present = B_TRUE; 3471 ipss->ipsec_outbound_v6_policy_present = B_TRUE; 3472 ipss->ipsec_inbound_v4_policy_present = B_TRUE; 3473 ipss->ipsec_inbound_v6_policy_present = B_TRUE; 3474 return; 3475 } 3476 3477 ipss->ipsec_outbound_v4_policy_present = (NULL != 3478 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_OUTBOUND]. 3479 ipr_nonhash[IPSEC_AF_V4]); 3480 ipss->ipsec_outbound_v6_policy_present = (NULL != 3481 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_OUTBOUND]. 3482 ipr_nonhash[IPSEC_AF_V6]); 3483 ipss->ipsec_inbound_v4_policy_present = (NULL != 3484 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_INBOUND]. 3485 ipr_nonhash[IPSEC_AF_V4]); 3486 ipss->ipsec_inbound_v6_policy_present = (NULL != 3487 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_INBOUND]. 3488 ipr_nonhash[IPSEC_AF_V6]); 3489 } 3490 3491 boolean_t 3492 ipsec_policy_delete(ipsec_policy_head_t *php, ipsec_selkey_t *keys, int dir, 3493 netstack_t *ns) 3494 { 3495 ipsec_sel_t *sp; 3496 ipsec_policy_t *ip, *nip, *head; 3497 int af; 3498 ipsec_policy_root_t *pr = &php->iph_root[dir]; 3499 3500 sp = ipsec_find_sel(keys, ns); 3501 3502 if (sp == NULL) 3503 return (B_FALSE); 3504 3505 af = (sp->ipsl_key.ipsl_valid & IPSL_IPV4) ? IPSEC_AF_V4 : IPSEC_AF_V6; 3506 3507 rw_enter(&php->iph_lock, RW_WRITER); 3508 3509 if (sp->ipsl_key.ipsl_pol_hval == IPSEC_SEL_NOHASH) { 3510 head = pr->ipr_nonhash[af]; 3511 } else { 3512 head = pr->ipr_hash[sp->ipsl_key.ipsl_pol_hval].hash_head; 3513 } 3514 3515 for (ip = head; ip != NULL; ip = nip) { 3516 nip = ip->ipsp_hash.hash_next; 3517 if (ip->ipsp_sel != sp) { 3518 continue; 3519 } 3520 3521 IPPOL_UNCHAIN(php, ip, ns); 3522 3523 php->iph_gen++; 3524 ipsec_update_present_flags(ns->netstack_ipsec); 3525 3526 rw_exit(&php->iph_lock); 3527 3528 ipsec_sel_rel(&sp, ns); 3529 3530 return (B_TRUE); 3531 } 3532 3533 rw_exit(&php->iph_lock); 3534 ipsec_sel_rel(&sp, ns); 3535 return (B_FALSE); 3536 } 3537 3538 int 3539 ipsec_policy_delete_index(ipsec_policy_head_t *php, uint64_t policy_index, 3540 netstack_t *ns) 3541 { 3542 boolean_t found = B_FALSE; 3543 ipsec_policy_t ipkey; 3544 ipsec_policy_t *ip; 3545 avl_index_t where; 3546 3547 (void) memset(&ipkey, 0, sizeof (ipkey)); 3548 ipkey.ipsp_index = policy_index; 3549 3550 rw_enter(&php->iph_lock, RW_WRITER); 3551 3552 /* 3553 * We could be cleverer here about the walk. 3554 * but well, (k+1)*log(N) will do for now (k==number of matches, 3555 * N==number of table entries 3556 */ 3557 for (;;) { 3558 ip = (ipsec_policy_t *)avl_find(&php->iph_rulebyid, 3559 (void *)&ipkey, &where); 3560 ASSERT(ip == NULL); 3561 3562 ip = avl_nearest(&php->iph_rulebyid, where, AVL_AFTER); 3563 3564 if (ip == NULL) 3565 break; 3566 3567 if (ip->ipsp_index != policy_index) { 3568 ASSERT(ip->ipsp_index > policy_index); 3569 break; 3570 } 3571 3572 IPPOL_UNCHAIN(php, ip, ns); 3573 found = B_TRUE; 3574 } 3575 3576 if (found) { 3577 php->iph_gen++; 3578 ipsec_update_present_flags(ns->netstack_ipsec); 3579 } 3580 3581 rw_exit(&php->iph_lock); 3582 3583 return (found ? 0 : ENOENT); 3584 } 3585 3586 /* 3587 * Given a constructed ipsec_policy_t policy rule, see if it can be entered 3588 * into the correct policy ruleset. As a side-effect, it sets the hash 3589 * entries on "ipp"'s ipsp_pol_hval. 3590 * 3591 * Returns B_TRUE if it can be entered, B_FALSE if it can't be (because a 3592 * duplicate policy exists with exactly the same selectors), or an icmp 3593 * rule exists with a different encryption/authentication action. 3594 */ 3595 boolean_t 3596 ipsec_check_policy(ipsec_policy_head_t *php, ipsec_policy_t *ipp, int direction) 3597 { 3598 ipsec_policy_root_t *pr = &php->iph_root[direction]; 3599 int af = -1; 3600 ipsec_policy_t *p2, *head; 3601 uint8_t check_proto; 3602 ipsec_selkey_t *selkey = &ipp->ipsp_sel->ipsl_key; 3603 uint32_t valid = selkey->ipsl_valid; 3604 3605 if (valid & IPSL_IPV6) { 3606 ASSERT(!(valid & IPSL_IPV4)); 3607 af = IPSEC_AF_V6; 3608 check_proto = IPPROTO_ICMPV6; 3609 } else { 3610 ASSERT(valid & IPSL_IPV4); 3611 af = IPSEC_AF_V4; 3612 check_proto = IPPROTO_ICMP; 3613 } 3614 3615 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3616 3617 /* 3618 * Double-check that we don't have any duplicate selectors here. 3619 * Because selectors are interned below, we need only compare pointers 3620 * for equality. 3621 */ 3622 if (selkey->ipsl_sel_hval == IPSEC_SEL_NOHASH) { 3623 head = pr->ipr_nonhash[af]; 3624 } else { 3625 selkey->ipsl_pol_hval = 3626 (selkey->ipsl_valid & IPSL_IPV4) ? 3627 IPSEC_IPV4_HASH(selkey->ipsl_remote.ipsad_v4, 3628 pr->ipr_nchains) : 3629 IPSEC_IPV6_HASH(selkey->ipsl_remote.ipsad_v6, 3630 pr->ipr_nchains); 3631 3632 head = pr->ipr_hash[selkey->ipsl_pol_hval].hash_head; 3633 } 3634 3635 for (p2 = head; p2 != NULL; p2 = p2->ipsp_hash.hash_next) { 3636 if (p2->ipsp_sel == ipp->ipsp_sel) 3637 return (B_FALSE); 3638 } 3639 3640 /* 3641 * If it's ICMP and not a drop or pass rule, run through the ICMP 3642 * rules and make sure the action is either new or the same as any 3643 * other actions. We don't have to check the full chain because 3644 * discard and bypass will override all other actions 3645 */ 3646 3647 if (valid & IPSL_PROTOCOL && 3648 selkey->ipsl_proto == check_proto && 3649 (ipp->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_APPLY)) { 3650 3651 for (p2 = head; p2 != NULL; p2 = p2->ipsp_hash.hash_next) { 3652 3653 if (p2->ipsp_sel->ipsl_key.ipsl_valid & IPSL_PROTOCOL && 3654 p2->ipsp_sel->ipsl_key.ipsl_proto == check_proto && 3655 (p2->ipsp_act->ipa_act.ipa_type == 3656 IPSEC_ACT_APPLY)) { 3657 return (ipsec_compare_action(p2, ipp)); 3658 } 3659 } 3660 } 3661 3662 return (B_TRUE); 3663 } 3664 3665 /* 3666 * compare the action chains of two policies for equality 3667 * B_TRUE -> effective equality 3668 */ 3669 3670 static boolean_t 3671 ipsec_compare_action(ipsec_policy_t *p1, ipsec_policy_t *p2) 3672 { 3673 3674 ipsec_action_t *act1, *act2; 3675 3676 /* We have a valid rule. Let's compare the actions */ 3677 if (p1->ipsp_act == p2->ipsp_act) { 3678 /* same action. We are good */ 3679 return (B_TRUE); 3680 } 3681 3682 /* we have to walk the chain */ 3683 3684 act1 = p1->ipsp_act; 3685 act2 = p2->ipsp_act; 3686 3687 while (act1 != NULL && act2 != NULL) { 3688 3689 /* otherwise, Are we close enough? */ 3690 if (act1->ipa_allow_clear != act2->ipa_allow_clear || 3691 act1->ipa_want_ah != act2->ipa_want_ah || 3692 act1->ipa_want_esp != act2->ipa_want_esp || 3693 act1->ipa_want_se != act2->ipa_want_se) { 3694 /* Nope, we aren't */ 3695 return (B_FALSE); 3696 } 3697 3698 if (act1->ipa_want_ah) { 3699 if (act1->ipa_act.ipa_apply.ipp_auth_alg != 3700 act2->ipa_act.ipa_apply.ipp_auth_alg) { 3701 return (B_FALSE); 3702 } 3703 3704 if (act1->ipa_act.ipa_apply.ipp_ah_minbits != 3705 act2->ipa_act.ipa_apply.ipp_ah_minbits || 3706 act1->ipa_act.ipa_apply.ipp_ah_maxbits != 3707 act2->ipa_act.ipa_apply.ipp_ah_maxbits) { 3708 return (B_FALSE); 3709 } 3710 } 3711 3712 if (act1->ipa_want_esp) { 3713 if (act1->ipa_act.ipa_apply.ipp_use_esp != 3714 act2->ipa_act.ipa_apply.ipp_use_esp || 3715 act1->ipa_act.ipa_apply.ipp_use_espa != 3716 act2->ipa_act.ipa_apply.ipp_use_espa) { 3717 return (B_FALSE); 3718 } 3719 3720 if (act1->ipa_act.ipa_apply.ipp_use_esp) { 3721 if (act1->ipa_act.ipa_apply.ipp_encr_alg != 3722 act2->ipa_act.ipa_apply.ipp_encr_alg) { 3723 return (B_FALSE); 3724 } 3725 3726 if (act1->ipa_act.ipa_apply.ipp_espe_minbits != 3727 act2->ipa_act.ipa_apply.ipp_espe_minbits || 3728 act1->ipa_act.ipa_apply.ipp_espe_maxbits != 3729 act2->ipa_act.ipa_apply.ipp_espe_maxbits) { 3730 return (B_FALSE); 3731 } 3732 } 3733 3734 if (act1->ipa_act.ipa_apply.ipp_use_espa) { 3735 if (act1->ipa_act.ipa_apply.ipp_esp_auth_alg != 3736 act2->ipa_act.ipa_apply.ipp_esp_auth_alg) { 3737 return (B_FALSE); 3738 } 3739 3740 if (act1->ipa_act.ipa_apply.ipp_espa_minbits != 3741 act2->ipa_act.ipa_apply.ipp_espa_minbits || 3742 act1->ipa_act.ipa_apply.ipp_espa_maxbits != 3743 act2->ipa_act.ipa_apply.ipp_espa_maxbits) { 3744 return (B_FALSE); 3745 } 3746 } 3747 3748 } 3749 3750 act1 = act1->ipa_next; 3751 act2 = act2->ipa_next; 3752 } 3753 3754 if (act1 != NULL || act2 != NULL) { 3755 return (B_FALSE); 3756 } 3757 3758 return (B_TRUE); 3759 } 3760 3761 3762 /* 3763 * Given a constructed ipsec_policy_t policy rule, enter it into 3764 * the correct policy ruleset. 3765 * 3766 * ipsec_check_policy() is assumed to have succeeded first (to check for 3767 * duplicates). 3768 */ 3769 void 3770 ipsec_enter_policy(ipsec_policy_head_t *php, ipsec_policy_t *ipp, int direction, 3771 netstack_t *ns) 3772 { 3773 ipsec_policy_root_t *pr = &php->iph_root[direction]; 3774 ipsec_selkey_t *selkey = &ipp->ipsp_sel->ipsl_key; 3775 uint32_t valid = selkey->ipsl_valid; 3776 uint32_t hval = selkey->ipsl_pol_hval; 3777 int af = -1; 3778 3779 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3780 3781 if (valid & IPSL_IPV6) { 3782 ASSERT(!(valid & IPSL_IPV4)); 3783 af = IPSEC_AF_V6; 3784 } else { 3785 ASSERT(valid & IPSL_IPV4); 3786 af = IPSEC_AF_V4; 3787 } 3788 3789 php->iph_gen++; 3790 3791 if (hval == IPSEC_SEL_NOHASH) { 3792 HASHLIST_INSERT(ipp, ipsp_hash, pr->ipr_nonhash[af]); 3793 } else { 3794 HASH_LOCK(pr->ipr_hash, hval); 3795 HASH_INSERT(ipp, ipsp_hash, pr->ipr_hash, hval); 3796 HASH_UNLOCK(pr->ipr_hash, hval); 3797 } 3798 3799 ipsec_insert_always(&php->iph_rulebyid, ipp); 3800 3801 ipsec_update_present_flags(ns->netstack_ipsec); 3802 } 3803 3804 static void 3805 ipsec_ipr_flush(ipsec_policy_head_t *php, ipsec_policy_root_t *ipr, 3806 netstack_t *ns) 3807 { 3808 ipsec_policy_t *ip, *nip; 3809 int af, chain, nchain; 3810 3811 for (af = 0; af < IPSEC_NAF; af++) { 3812 for (ip = ipr->ipr_nonhash[af]; ip != NULL; ip = nip) { 3813 nip = ip->ipsp_hash.hash_next; 3814 IPPOL_UNCHAIN(php, ip, ns); 3815 } 3816 ipr->ipr_nonhash[af] = NULL; 3817 } 3818 nchain = ipr->ipr_nchains; 3819 3820 for (chain = 0; chain < nchain; chain++) { 3821 for (ip = ipr->ipr_hash[chain].hash_head; ip != NULL; 3822 ip = nip) { 3823 nip = ip->ipsp_hash.hash_next; 3824 IPPOL_UNCHAIN(php, ip, ns); 3825 } 3826 ipr->ipr_hash[chain].hash_head = NULL; 3827 } 3828 } 3829 3830 void 3831 ipsec_polhead_flush(ipsec_policy_head_t *php, netstack_t *ns) 3832 { 3833 int dir; 3834 3835 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3836 3837 for (dir = 0; dir < IPSEC_NTYPES; dir++) 3838 ipsec_ipr_flush(php, &php->iph_root[dir], ns); 3839 3840 ipsec_update_present_flags(ns->netstack_ipsec); 3841 } 3842 3843 void 3844 ipsec_polhead_free(ipsec_policy_head_t *php, netstack_t *ns) 3845 { 3846 int dir; 3847 3848 ASSERT(php->iph_refs == 0); 3849 3850 rw_enter(&php->iph_lock, RW_WRITER); 3851 ipsec_polhead_flush(php, ns); 3852 rw_exit(&php->iph_lock); 3853 rw_destroy(&php->iph_lock); 3854 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 3855 ipsec_policy_root_t *ipr = &php->iph_root[dir]; 3856 int chain; 3857 3858 for (chain = 0; chain < ipr->ipr_nchains; chain++) 3859 mutex_destroy(&(ipr->ipr_hash[chain].hash_lock)); 3860 3861 } 3862 ipsec_polhead_free_table(php); 3863 kmem_free(php, sizeof (*php)); 3864 } 3865 3866 static void 3867 ipsec_ipr_init(ipsec_policy_root_t *ipr) 3868 { 3869 int af; 3870 3871 ipr->ipr_nchains = 0; 3872 ipr->ipr_hash = NULL; 3873 3874 for (af = 0; af < IPSEC_NAF; af++) { 3875 ipr->ipr_nonhash[af] = NULL; 3876 } 3877 } 3878 3879 ipsec_policy_head_t * 3880 ipsec_polhead_create(void) 3881 { 3882 ipsec_policy_head_t *php; 3883 3884 php = kmem_alloc(sizeof (*php), KM_NOSLEEP); 3885 if (php == NULL) 3886 return (php); 3887 3888 rw_init(&php->iph_lock, NULL, RW_DEFAULT, NULL); 3889 php->iph_refs = 1; 3890 php->iph_gen = 0; 3891 3892 ipsec_ipr_init(&php->iph_root[IPSEC_TYPE_INBOUND]); 3893 ipsec_ipr_init(&php->iph_root[IPSEC_TYPE_OUTBOUND]); 3894 3895 avl_create(&php->iph_rulebyid, ipsec_policy_cmpbyid, 3896 sizeof (ipsec_policy_t), offsetof(ipsec_policy_t, ipsp_byid)); 3897 3898 return (php); 3899 } 3900 3901 /* 3902 * Clone the policy head into a new polhead; release one reference to the 3903 * old one and return the only reference to the new one. 3904 * If the old one had a refcount of 1, just return it. 3905 */ 3906 ipsec_policy_head_t * 3907 ipsec_polhead_split(ipsec_policy_head_t *php, netstack_t *ns) 3908 { 3909 ipsec_policy_head_t *nphp; 3910 3911 if (php == NULL) 3912 return (ipsec_polhead_create()); 3913 else if (php->iph_refs == 1) 3914 return (php); 3915 3916 nphp = ipsec_polhead_create(); 3917 if (nphp == NULL) 3918 return (NULL); 3919 3920 if (ipsec_copy_polhead(php, nphp, ns) != 0) { 3921 ipsec_polhead_free(nphp, ns); 3922 return (NULL); 3923 } 3924 IPPH_REFRELE(php, ns); 3925 return (nphp); 3926 } 3927 3928 /* 3929 * When sending a response to a ICMP request or generating a RST 3930 * in the TCP case, the outbound packets need to go at the same level 3931 * of protection as the incoming ones i.e we associate our outbound 3932 * policy with how the packet came in. We call this after we have 3933 * accepted the incoming packet which may or may not have been in 3934 * clear and hence we are sending the reply back with the policy 3935 * matching the incoming datagram's policy. 3936 * 3937 * NOTE : This technology serves two purposes : 3938 * 3939 * 1) If we have multiple outbound policies, we send out a reply 3940 * matching with how it came in rather than matching the outbound 3941 * policy. 3942 * 3943 * 2) For assymetric policies, we want to make sure that incoming 3944 * and outgoing has the same level of protection. Assymetric 3945 * policies exist only with global policy where we may not have 3946 * both outbound and inbound at the same time. 3947 * 3948 * NOTE2: This function is called by cleartext cases, so it needs to be 3949 * in IP proper. 3950 */ 3951 boolean_t 3952 ipsec_in_to_out(mblk_t *ipsec_mp, ipha_t *ipha, ip6_t *ip6h) 3953 { 3954 ipsec_in_t *ii; 3955 ipsec_out_t *io; 3956 boolean_t v4; 3957 mblk_t *mp; 3958 boolean_t secure, attach_if; 3959 uint_t ifindex; 3960 ipsec_selector_t sel; 3961 ipsec_action_t *reflect_action = NULL; 3962 zoneid_t zoneid; 3963 netstack_t *ns; 3964 3965 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 3966 3967 bzero((void*)&sel, sizeof (sel)); 3968 3969 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 3970 3971 mp = ipsec_mp->b_cont; 3972 ASSERT(mp != NULL); 3973 3974 if (ii->ipsec_in_action != NULL) { 3975 /* transfer reference.. */ 3976 reflect_action = ii->ipsec_in_action; 3977 ii->ipsec_in_action = NULL; 3978 } else if (!ii->ipsec_in_loopback) 3979 reflect_action = ipsec_in_to_out_action(ii); 3980 secure = ii->ipsec_in_secure; 3981 attach_if = ii->ipsec_in_attach_if; 3982 ifindex = ii->ipsec_in_ill_index; 3983 zoneid = ii->ipsec_in_zoneid; 3984 ASSERT(zoneid != ALL_ZONES); 3985 ns = ii->ipsec_in_ns; 3986 v4 = ii->ipsec_in_v4; 3987 3988 ipsec_in_release_refs(ii); /* No netstack_rele/hold needed */ 3989 3990 /* 3991 * The caller is going to send the datagram out which might 3992 * go on the wire or delivered locally through ip_wput_local. 3993 * 3994 * 1) If it goes out on the wire, new associations will be 3995 * obtained. 3996 * 2) If it is delivered locally, ip_wput_local will convert 3997 * this IPSEC_OUT to a IPSEC_IN looking at the requests. 3998 */ 3999 4000 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4001 bzero(io, sizeof (ipsec_out_t)); 4002 io->ipsec_out_type = IPSEC_OUT; 4003 io->ipsec_out_len = sizeof (ipsec_out_t); 4004 io->ipsec_out_frtn.free_func = ipsec_out_free; 4005 io->ipsec_out_frtn.free_arg = (char *)io; 4006 io->ipsec_out_act = reflect_action; 4007 4008 if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0, 4009 ns->netstack_ipsec)) 4010 return (B_FALSE); 4011 4012 io->ipsec_out_src_port = sel.ips_local_port; 4013 io->ipsec_out_dst_port = sel.ips_remote_port; 4014 io->ipsec_out_proto = sel.ips_protocol; 4015 io->ipsec_out_icmp_type = sel.ips_icmp_type; 4016 io->ipsec_out_icmp_code = sel.ips_icmp_code; 4017 4018 /* 4019 * Don't use global policy for this, as we want 4020 * to use the same protection that was applied to the inbound packet. 4021 */ 4022 io->ipsec_out_use_global_policy = B_FALSE; 4023 io->ipsec_out_proc_begin = B_FALSE; 4024 io->ipsec_out_secure = secure; 4025 io->ipsec_out_v4 = v4; 4026 io->ipsec_out_attach_if = attach_if; 4027 io->ipsec_out_ill_index = ifindex; 4028 io->ipsec_out_zoneid = zoneid; 4029 io->ipsec_out_ns = ns; /* No netstack_hold */ 4030 4031 return (B_TRUE); 4032 } 4033 4034 mblk_t * 4035 ipsec_in_tag(mblk_t *mp, mblk_t *cont, netstack_t *ns) 4036 { 4037 ipsec_in_t *ii = (ipsec_in_t *)mp->b_rptr; 4038 ipsec_in_t *nii; 4039 mblk_t *nmp; 4040 frtn_t nfrtn; 4041 ipsec_stack_t *ipss = ns->netstack_ipsec; 4042 4043 ASSERT(ii->ipsec_in_type == IPSEC_IN); 4044 ASSERT(ii->ipsec_in_len == sizeof (ipsec_in_t)); 4045 4046 nmp = ipsec_in_alloc(ii->ipsec_in_v4, ns); 4047 if (nmp == NULL) { 4048 ip_drop_packet_chain(cont, B_FALSE, NULL, NULL, 4049 DROPPER(ipss, ipds_spd_nomem), 4050 &ipss->ipsec_spd_dropper); 4051 return (NULL); 4052 } 4053 4054 ASSERT(nmp->b_datap->db_type == M_CTL); 4055 ASSERT(nmp->b_wptr == (nmp->b_rptr + sizeof (ipsec_info_t))); 4056 4057 /* 4058 * Bump refcounts. 4059 */ 4060 if (ii->ipsec_in_ah_sa != NULL) 4061 IPSA_REFHOLD(ii->ipsec_in_ah_sa); 4062 if (ii->ipsec_in_esp_sa != NULL) 4063 IPSA_REFHOLD(ii->ipsec_in_esp_sa); 4064 if (ii->ipsec_in_policy != NULL) 4065 IPPH_REFHOLD(ii->ipsec_in_policy); 4066 4067 /* 4068 * Copy everything, but preserve the free routine provided by 4069 * ipsec_in_alloc(). 4070 */ 4071 nii = (ipsec_in_t *)nmp->b_rptr; 4072 nfrtn = nii->ipsec_in_frtn; 4073 bcopy(ii, nii, sizeof (*ii)); 4074 nii->ipsec_in_frtn = nfrtn; 4075 4076 nmp->b_cont = cont; 4077 4078 return (nmp); 4079 } 4080 4081 mblk_t * 4082 ipsec_out_tag(mblk_t *mp, mblk_t *cont, netstack_t *ns) 4083 { 4084 ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; 4085 ipsec_out_t *nio; 4086 mblk_t *nmp; 4087 frtn_t nfrtn; 4088 ipsec_stack_t *ipss = ns->netstack_ipsec; 4089 4090 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4091 ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t)); 4092 4093 nmp = ipsec_alloc_ipsec_out(ns); 4094 if (nmp == NULL) { 4095 ip_drop_packet_chain(cont, B_FALSE, NULL, NULL, 4096 DROPPER(ipss, ipds_spd_nomem), 4097 &ipss->ipsec_spd_dropper); 4098 return (NULL); 4099 } 4100 ASSERT(nmp->b_datap->db_type == M_CTL); 4101 ASSERT(nmp->b_wptr == (nmp->b_rptr + sizeof (ipsec_info_t))); 4102 4103 /* 4104 * Bump refcounts. 4105 */ 4106 if (io->ipsec_out_ah_sa != NULL) 4107 IPSA_REFHOLD(io->ipsec_out_ah_sa); 4108 if (io->ipsec_out_esp_sa != NULL) 4109 IPSA_REFHOLD(io->ipsec_out_esp_sa); 4110 if (io->ipsec_out_polhead != NULL) 4111 IPPH_REFHOLD(io->ipsec_out_polhead); 4112 if (io->ipsec_out_policy != NULL) 4113 IPPOL_REFHOLD(io->ipsec_out_policy); 4114 if (io->ipsec_out_act != NULL) 4115 IPACT_REFHOLD(io->ipsec_out_act); 4116 if (io->ipsec_out_latch != NULL) 4117 IPLATCH_REFHOLD(io->ipsec_out_latch); 4118 if (io->ipsec_out_cred != NULL) 4119 crhold(io->ipsec_out_cred); 4120 4121 /* 4122 * Copy everything, but preserve the free routine provided by 4123 * ipsec_alloc_ipsec_out(). 4124 */ 4125 nio = (ipsec_out_t *)nmp->b_rptr; 4126 nfrtn = nio->ipsec_out_frtn; 4127 bcopy(io, nio, sizeof (*io)); 4128 nio->ipsec_out_frtn = nfrtn; 4129 4130 nmp->b_cont = cont; 4131 4132 return (nmp); 4133 } 4134 4135 static void 4136 ipsec_out_release_refs(ipsec_out_t *io) 4137 { 4138 netstack_t *ns = io->ipsec_out_ns; 4139 4140 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4141 ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t)); 4142 ASSERT(io->ipsec_out_ns != NULL); 4143 4144 /* Note: IPSA_REFRELE is multi-line macro */ 4145 if (io->ipsec_out_ah_sa != NULL) 4146 IPSA_REFRELE(io->ipsec_out_ah_sa); 4147 if (io->ipsec_out_esp_sa != NULL) 4148 IPSA_REFRELE(io->ipsec_out_esp_sa); 4149 if (io->ipsec_out_polhead != NULL) 4150 IPPH_REFRELE(io->ipsec_out_polhead, ns); 4151 if (io->ipsec_out_policy != NULL) 4152 IPPOL_REFRELE(io->ipsec_out_policy, ns); 4153 if (io->ipsec_out_act != NULL) 4154 IPACT_REFRELE(io->ipsec_out_act); 4155 if (io->ipsec_out_cred != NULL) { 4156 crfree(io->ipsec_out_cred); 4157 io->ipsec_out_cred = NULL; 4158 } 4159 if (io->ipsec_out_latch) { 4160 IPLATCH_REFRELE(io->ipsec_out_latch, ns); 4161 io->ipsec_out_latch = NULL; 4162 } 4163 } 4164 4165 static void 4166 ipsec_out_free(void *arg) 4167 { 4168 ipsec_out_t *io = (ipsec_out_t *)arg; 4169 ipsec_out_release_refs(io); 4170 kmem_cache_free(ipsec_info_cache, arg); 4171 } 4172 4173 static void 4174 ipsec_in_release_refs(ipsec_in_t *ii) 4175 { 4176 netstack_t *ns = ii->ipsec_in_ns; 4177 4178 ASSERT(ii->ipsec_in_ns != NULL); 4179 4180 /* Note: IPSA_REFRELE is multi-line macro */ 4181 if (ii->ipsec_in_ah_sa != NULL) 4182 IPSA_REFRELE(ii->ipsec_in_ah_sa); 4183 if (ii->ipsec_in_esp_sa != NULL) 4184 IPSA_REFRELE(ii->ipsec_in_esp_sa); 4185 if (ii->ipsec_in_policy != NULL) 4186 IPPH_REFRELE(ii->ipsec_in_policy, ns); 4187 if (ii->ipsec_in_da != NULL) { 4188 freeb(ii->ipsec_in_da); 4189 ii->ipsec_in_da = NULL; 4190 } 4191 } 4192 4193 static void 4194 ipsec_in_free(void *arg) 4195 { 4196 ipsec_in_t *ii = (ipsec_in_t *)arg; 4197 ipsec_in_release_refs(ii); 4198 kmem_cache_free(ipsec_info_cache, arg); 4199 } 4200 4201 /* 4202 * This is called only for outbound datagrams if the datagram needs to 4203 * go out secure. A NULL mp can be passed to get an ipsec_out. This 4204 * facility is used by ip_unbind. 4205 * 4206 * NOTE : o As the data part could be modified by ipsec_out_process etc. 4207 * we can't make it fast by calling a dup. 4208 */ 4209 mblk_t * 4210 ipsec_alloc_ipsec_out(netstack_t *ns) 4211 { 4212 mblk_t *ipsec_mp; 4213 ipsec_out_t *io = kmem_cache_alloc(ipsec_info_cache, KM_NOSLEEP); 4214 4215 if (io == NULL) 4216 return (NULL); 4217 4218 bzero(io, sizeof (ipsec_out_t)); 4219 4220 io->ipsec_out_type = IPSEC_OUT; 4221 io->ipsec_out_len = sizeof (ipsec_out_t); 4222 io->ipsec_out_frtn.free_func = ipsec_out_free; 4223 io->ipsec_out_frtn.free_arg = (char *)io; 4224 4225 /* 4226 * Set the zoneid to ALL_ZONES which is used as an invalid value. Code 4227 * using ipsec_out_zoneid should assert that the zoneid has been set to 4228 * a sane value. 4229 */ 4230 io->ipsec_out_zoneid = ALL_ZONES; 4231 io->ipsec_out_ns = ns; /* No netstack_hold */ 4232 4233 ipsec_mp = desballoc((uint8_t *)io, sizeof (ipsec_info_t), BPRI_HI, 4234 &io->ipsec_out_frtn); 4235 if (ipsec_mp == NULL) { 4236 ipsec_out_free(io); 4237 4238 return (NULL); 4239 } 4240 ipsec_mp->b_datap->db_type = M_CTL; 4241 ipsec_mp->b_wptr = ipsec_mp->b_rptr + sizeof (ipsec_info_t); 4242 4243 return (ipsec_mp); 4244 } 4245 4246 /* 4247 * Attach an IPSEC_OUT; use pol for policy if it is non-null. 4248 * Otherwise initialize using conn. 4249 * 4250 * If pol is non-null, we consume a reference to it. 4251 */ 4252 mblk_t * 4253 ipsec_attach_ipsec_out(mblk_t **mp, conn_t *connp, ipsec_policy_t *pol, 4254 uint8_t proto, netstack_t *ns) 4255 { 4256 mblk_t *ipsec_mp; 4257 ipsec_stack_t *ipss = ns->netstack_ipsec; 4258 4259 ASSERT((pol != NULL) || (connp != NULL)); 4260 4261 ipsec_mp = ipsec_alloc_ipsec_out(ns); 4262 if (ipsec_mp == NULL) { 4263 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, SL_ERROR|SL_NOTE, 4264 "ipsec_attach_ipsec_out: Allocation failure\n"); 4265 ip_drop_packet(*mp, B_FALSE, NULL, NULL, 4266 DROPPER(ipss, ipds_spd_nomem), 4267 &ipss->ipsec_spd_dropper); 4268 *mp = NULL; 4269 return (NULL); 4270 } 4271 ipsec_mp->b_cont = *mp; 4272 /* 4273 * If *mp is NULL, ipsec_init_ipsec_out() won't/should not be using it. 4274 */ 4275 return (ipsec_init_ipsec_out(ipsec_mp, mp, connp, pol, proto, ns)); 4276 } 4277 4278 /* 4279 * Initialize the IPSEC_OUT (ipsec_mp) using pol if it is non-null. 4280 * Otherwise initialize using conn. 4281 * 4282 * If pol is non-null, we consume a reference to it. 4283 */ 4284 mblk_t * 4285 ipsec_init_ipsec_out(mblk_t *ipsec_mp, mblk_t **mp, conn_t *connp, 4286 ipsec_policy_t *pol, uint8_t proto, netstack_t *ns) 4287 { 4288 ipsec_out_t *io; 4289 ipsec_policy_t *p; 4290 ipha_t *ipha; 4291 ip6_t *ip6h; 4292 ipsec_stack_t *ipss = ns->netstack_ipsec; 4293 4294 ASSERT(ipsec_mp->b_cont == *mp); 4295 4296 ASSERT((pol != NULL) || (connp != NULL)); 4297 4298 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 4299 ASSERT(ipsec_mp->b_wptr == (ipsec_mp->b_rptr + sizeof (ipsec_info_t))); 4300 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4301 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4302 ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t)); 4303 io->ipsec_out_latch = NULL; 4304 /* 4305 * Set the zoneid when we have the connp. 4306 * Otherwise, we're called from ip_wput_attach_policy() who will take 4307 * care of setting the zoneid. 4308 */ 4309 if (connp != NULL) 4310 io->ipsec_out_zoneid = connp->conn_zoneid; 4311 4312 io->ipsec_out_ns = ns; /* No netstack_hold */ 4313 4314 if (*mp != NULL) { 4315 ipha = (ipha_t *)(*mp)->b_rptr; 4316 if (IPH_HDR_VERSION(ipha) == IP_VERSION) { 4317 io->ipsec_out_v4 = B_TRUE; 4318 ip6h = NULL; 4319 } else { 4320 io->ipsec_out_v4 = B_FALSE; 4321 ip6h = (ip6_t *)ipha; 4322 ipha = NULL; 4323 } 4324 } else { 4325 ASSERT(connp != NULL && connp->conn_policy_cached); 4326 ip6h = NULL; 4327 ipha = NULL; 4328 io->ipsec_out_v4 = !connp->conn_pkt_isv6; 4329 } 4330 4331 p = NULL; 4332 4333 /* 4334 * Take latched policies over global policy. Check here again for 4335 * this, in case we had conn_latch set while the packet was flying 4336 * around in IP. 4337 */ 4338 if (connp != NULL && connp->conn_latch != NULL) { 4339 ASSERT(ns == connp->conn_netstack); 4340 p = connp->conn_latch->ipl_out_policy; 4341 io->ipsec_out_latch = connp->conn_latch; 4342 IPLATCH_REFHOLD(connp->conn_latch); 4343 if (p != NULL) { 4344 IPPOL_REFHOLD(p); 4345 } 4346 io->ipsec_out_src_port = connp->conn_lport; 4347 io->ipsec_out_dst_port = connp->conn_fport; 4348 io->ipsec_out_icmp_type = io->ipsec_out_icmp_code = 0; 4349 if (pol != NULL) 4350 IPPOL_REFRELE(pol, ns); 4351 } else if (pol != NULL) { 4352 ipsec_selector_t sel; 4353 4354 bzero((void*)&sel, sizeof (sel)); 4355 4356 p = pol; 4357 /* 4358 * conn does not have the port information. Get 4359 * it from the packet. 4360 */ 4361 4362 if (!ipsec_init_outbound_ports(&sel, *mp, ipha, ip6h, 0, 4363 ns->netstack_ipsec)) { 4364 /* Callee did ip_drop_packet() on *mp. */ 4365 *mp = NULL; 4366 freeb(ipsec_mp); 4367 return (NULL); 4368 } 4369 io->ipsec_out_src_port = sel.ips_local_port; 4370 io->ipsec_out_dst_port = sel.ips_remote_port; 4371 io->ipsec_out_icmp_type = sel.ips_icmp_type; 4372 io->ipsec_out_icmp_code = sel.ips_icmp_code; 4373 } 4374 4375 io->ipsec_out_proto = proto; 4376 io->ipsec_out_use_global_policy = B_TRUE; 4377 io->ipsec_out_secure = (p != NULL); 4378 io->ipsec_out_policy = p; 4379 4380 if (p == NULL) { 4381 if (connp->conn_policy != NULL) { 4382 io->ipsec_out_secure = B_TRUE; 4383 ASSERT(io->ipsec_out_latch == NULL); 4384 ASSERT(io->ipsec_out_use_global_policy == B_TRUE); 4385 io->ipsec_out_need_policy = B_TRUE; 4386 ASSERT(io->ipsec_out_polhead == NULL); 4387 IPPH_REFHOLD(connp->conn_policy); 4388 io->ipsec_out_polhead = connp->conn_policy; 4389 } 4390 } else { 4391 /* Handle explicit drop action. */ 4392 if (p->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_DISCARD || 4393 p->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_REJECT) { 4394 ip_drop_packet(ipsec_mp, B_FALSE, NULL, NULL, 4395 DROPPER(ipss, ipds_spd_explicit), 4396 &ipss->ipsec_spd_dropper); 4397 *mp = NULL; 4398 ipsec_mp = NULL; 4399 } 4400 } 4401 4402 return (ipsec_mp); 4403 } 4404 4405 /* 4406 * Allocate an IPSEC_IN mblk. This will be prepended to an inbound datagram 4407 * and keep track of what-if-any IPsec processing will be applied to the 4408 * datagram. 4409 */ 4410 mblk_t * 4411 ipsec_in_alloc(boolean_t isv4, netstack_t *ns) 4412 { 4413 mblk_t *ipsec_in; 4414 ipsec_in_t *ii = kmem_cache_alloc(ipsec_info_cache, KM_NOSLEEP); 4415 4416 if (ii == NULL) 4417 return (NULL); 4418 4419 bzero(ii, sizeof (ipsec_info_t)); 4420 ii->ipsec_in_type = IPSEC_IN; 4421 ii->ipsec_in_len = sizeof (ipsec_in_t); 4422 4423 ii->ipsec_in_v4 = isv4; 4424 ii->ipsec_in_secure = B_TRUE; 4425 ii->ipsec_in_ns = ns; /* No netstack_hold */ 4426 4427 ii->ipsec_in_frtn.free_func = ipsec_in_free; 4428 ii->ipsec_in_frtn.free_arg = (char *)ii; 4429 4430 ipsec_in = desballoc((uint8_t *)ii, sizeof (ipsec_info_t), BPRI_HI, 4431 &ii->ipsec_in_frtn); 4432 if (ipsec_in == NULL) { 4433 ip1dbg(("ipsec_in_alloc: IPSEC_IN allocation failure.\n")); 4434 ipsec_in_free(ii); 4435 return (NULL); 4436 } 4437 4438 ipsec_in->b_datap->db_type = M_CTL; 4439 ipsec_in->b_wptr += sizeof (ipsec_info_t); 4440 4441 return (ipsec_in); 4442 } 4443 4444 /* 4445 * This is called from ip_wput_local when a packet which needs 4446 * security is looped back, to convert the IPSEC_OUT to a IPSEC_IN 4447 * before fanout, where the policy check happens. In most of the 4448 * cases, IPSEC processing has *never* been done. There is one case 4449 * (ip_wput_ire_fragmentit -> ip_wput_frag -> icmp_frag_needed) where 4450 * the packet is destined for localhost, IPSEC processing has already 4451 * been done. 4452 * 4453 * Future: This could happen after SA selection has occurred for 4454 * outbound.. which will tell us who the src and dst identities are.. 4455 * Then it's just a matter of splicing the ah/esp SA pointers from the 4456 * ipsec_out_t to the ipsec_in_t. 4457 */ 4458 void 4459 ipsec_out_to_in(mblk_t *ipsec_mp) 4460 { 4461 ipsec_in_t *ii; 4462 ipsec_out_t *io; 4463 ipsec_policy_t *pol; 4464 ipsec_action_t *act; 4465 boolean_t v4, icmp_loopback; 4466 zoneid_t zoneid; 4467 netstack_t *ns; 4468 4469 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 4470 4471 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4472 4473 v4 = io->ipsec_out_v4; 4474 zoneid = io->ipsec_out_zoneid; 4475 icmp_loopback = io->ipsec_out_icmp_loopback; 4476 ns = io->ipsec_out_ns; 4477 4478 act = io->ipsec_out_act; 4479 if (act == NULL) { 4480 pol = io->ipsec_out_policy; 4481 if (pol != NULL) { 4482 act = pol->ipsp_act; 4483 IPACT_REFHOLD(act); 4484 } 4485 } 4486 io->ipsec_out_act = NULL; 4487 4488 ipsec_out_release_refs(io); /* No netstack_rele/hold needed */ 4489 4490 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 4491 bzero(ii, sizeof (ipsec_in_t)); 4492 ii->ipsec_in_type = IPSEC_IN; 4493 ii->ipsec_in_len = sizeof (ipsec_in_t); 4494 ii->ipsec_in_loopback = B_TRUE; 4495 ii->ipsec_in_ns = ns; /* No netstack_hold */ 4496 4497 ii->ipsec_in_frtn.free_func = ipsec_in_free; 4498 ii->ipsec_in_frtn.free_arg = (char *)ii; 4499 ii->ipsec_in_action = act; 4500 ii->ipsec_in_zoneid = zoneid; 4501 4502 /* 4503 * In most of the cases, we can't look at the ipsec_out_XXX_sa 4504 * because this never went through IPSEC processing. So, look at 4505 * the requests and infer whether it would have gone through 4506 * IPSEC processing or not. Initialize the "done" fields with 4507 * the requests. The possible values for "done" fields are : 4508 * 4509 * 1) zero, indicates that a particular preference was never 4510 * requested. 4511 * 2) non-zero, indicates that it could be IPSEC_PREF_REQUIRED/ 4512 * IPSEC_PREF_NEVER. If IPSEC_REQ_DONE is set, it means that 4513 * IPSEC processing has been completed. 4514 */ 4515 ii->ipsec_in_secure = B_TRUE; 4516 ii->ipsec_in_v4 = v4; 4517 ii->ipsec_in_icmp_loopback = icmp_loopback; 4518 ii->ipsec_in_attach_if = B_FALSE; 4519 } 4520 4521 /* 4522 * Consults global policy to see whether this datagram should 4523 * go out secure. If so it attaches a ipsec_mp in front and 4524 * returns. 4525 */ 4526 mblk_t * 4527 ip_wput_attach_policy(mblk_t *ipsec_mp, ipha_t *ipha, ip6_t *ip6h, ire_t *ire, 4528 conn_t *connp, boolean_t unspec_src, zoneid_t zoneid) 4529 { 4530 mblk_t *mp; 4531 ipsec_out_t *io = NULL; 4532 ipsec_selector_t sel; 4533 uint_t ill_index; 4534 boolean_t conn_dontroutex; 4535 boolean_t conn_multicast_loopx; 4536 boolean_t policy_present; 4537 ip_stack_t *ipst = ire->ire_ipst; 4538 netstack_t *ns = ipst->ips_netstack; 4539 ipsec_stack_t *ipss = ns->netstack_ipsec; 4540 4541 ASSERT((ipha != NULL && ip6h == NULL) || 4542 (ip6h != NULL && ipha == NULL)); 4543 4544 bzero((void*)&sel, sizeof (sel)); 4545 4546 if (ipha != NULL) 4547 policy_present = ipss->ipsec_outbound_v4_policy_present; 4548 else 4549 policy_present = ipss->ipsec_outbound_v6_policy_present; 4550 /* 4551 * Fast Path to see if there is any policy. 4552 */ 4553 if (!policy_present) { 4554 if (ipsec_mp->b_datap->db_type == M_CTL) { 4555 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4556 if (!io->ipsec_out_secure) { 4557 /* 4558 * If there is no global policy and ip_wput 4559 * or ip_wput_multicast has attached this mp 4560 * for multicast case, free the ipsec_mp and 4561 * return the original mp. 4562 */ 4563 mp = ipsec_mp->b_cont; 4564 freeb(ipsec_mp); 4565 ipsec_mp = mp; 4566 io = NULL; 4567 } 4568 ASSERT(io == NULL || !io->ipsec_out_tunnel); 4569 } 4570 if (((io == NULL) || (io->ipsec_out_polhead == NULL)) && 4571 ((connp == NULL) || (connp->conn_policy == NULL))) 4572 return (ipsec_mp); 4573 } 4574 4575 ill_index = 0; 4576 conn_multicast_loopx = conn_dontroutex = B_FALSE; 4577 mp = ipsec_mp; 4578 if (ipsec_mp->b_datap->db_type == M_CTL) { 4579 mp = ipsec_mp->b_cont; 4580 /* 4581 * This is a connection where we have some per-socket 4582 * policy or ip_wput has attached an ipsec_mp for 4583 * the multicast datagram. 4584 */ 4585 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4586 if (!io->ipsec_out_secure) { 4587 /* 4588 * This ipsec_mp was allocated in ip_wput or 4589 * ip_wput_multicast so that we will know the 4590 * value of ill_index, conn_dontroute, 4591 * conn_multicast_loop in the multicast case if 4592 * we inherit global policy here. 4593 */ 4594 ill_index = io->ipsec_out_ill_index; 4595 conn_dontroutex = io->ipsec_out_dontroute; 4596 conn_multicast_loopx = io->ipsec_out_multicast_loop; 4597 freeb(ipsec_mp); 4598 ipsec_mp = mp; 4599 io = NULL; 4600 } 4601 ASSERT(io == NULL || !io->ipsec_out_tunnel); 4602 } 4603 4604 if (ipha != NULL) { 4605 sel.ips_local_addr_v4 = (ipha->ipha_src != 0 ? 4606 ipha->ipha_src : ire->ire_src_addr); 4607 sel.ips_remote_addr_v4 = ip_get_dst(ipha); 4608 sel.ips_protocol = (uint8_t)ipha->ipha_protocol; 4609 sel.ips_isv4 = B_TRUE; 4610 } else { 4611 ushort_t hdr_len; 4612 uint8_t *nexthdrp; 4613 boolean_t is_fragment; 4614 4615 sel.ips_isv4 = B_FALSE; 4616 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4617 if (!unspec_src) 4618 sel.ips_local_addr_v6 = ire->ire_src_addr_v6; 4619 } else { 4620 sel.ips_local_addr_v6 = ip6h->ip6_src; 4621 } 4622 4623 sel.ips_remote_addr_v6 = ip_get_dst_v6(ip6h, &is_fragment); 4624 if (is_fragment) { 4625 /* 4626 * It's a packet fragment for a packet that 4627 * we have already processed (since IPsec processing 4628 * is done before fragmentation), so we don't 4629 * have to do policy checks again. Fragments can 4630 * come back to us for processing if they have 4631 * been queued up due to flow control. 4632 */ 4633 if (ipsec_mp->b_datap->db_type == M_CTL) { 4634 mp = ipsec_mp->b_cont; 4635 freeb(ipsec_mp); 4636 ipsec_mp = mp; 4637 } 4638 return (ipsec_mp); 4639 } 4640 4641 /* IPv6 common-case. */ 4642 sel.ips_protocol = ip6h->ip6_nxt; 4643 switch (ip6h->ip6_nxt) { 4644 case IPPROTO_TCP: 4645 case IPPROTO_UDP: 4646 case IPPROTO_SCTP: 4647 case IPPROTO_ICMPV6: 4648 break; 4649 default: 4650 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 4651 &hdr_len, &nexthdrp)) { 4652 BUMP_MIB(&ipst->ips_ip6_mib, 4653 ipIfStatsOutDiscards); 4654 freemsg(ipsec_mp); /* Not IPsec-related drop. */ 4655 return (NULL); 4656 } 4657 sel.ips_protocol = *nexthdrp; 4658 break; 4659 } 4660 } 4661 4662 if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0, ipss)) { 4663 if (ipha != NULL) { 4664 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 4665 } else { 4666 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 4667 } 4668 4669 /* Callee dropped the packet. */ 4670 return (NULL); 4671 } 4672 4673 if (io != NULL) { 4674 /* 4675 * We seem to have some local policy (we already have 4676 * an ipsec_out). Look at global policy and see 4677 * whether we have to inherit or not. 4678 */ 4679 io->ipsec_out_need_policy = B_FALSE; 4680 ipsec_mp = ipsec_apply_global_policy(ipsec_mp, connp, 4681 &sel, ns); 4682 ASSERT((io->ipsec_out_policy != NULL) || 4683 (io->ipsec_out_act != NULL)); 4684 ASSERT(io->ipsec_out_need_policy == B_FALSE); 4685 return (ipsec_mp); 4686 } 4687 /* 4688 * We pass in a pointer to a pointer because mp can become 4689 * NULL due to allocation failures or explicit drops. Callers 4690 * of this function should assume a NULL mp means the packet 4691 * was dropped. 4692 */ 4693 ipsec_mp = ipsec_attach_global_policy(&mp, connp, &sel, ns); 4694 if (ipsec_mp == NULL) 4695 return (mp); 4696 4697 /* 4698 * Copy the right port information. 4699 */ 4700 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 4701 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4702 4703 ASSERT(io->ipsec_out_need_policy == B_FALSE); 4704 ASSERT((io->ipsec_out_policy != NULL) || 4705 (io->ipsec_out_act != NULL)); 4706 io->ipsec_out_src_port = sel.ips_local_port; 4707 io->ipsec_out_dst_port = sel.ips_remote_port; 4708 io->ipsec_out_icmp_type = sel.ips_icmp_type; 4709 io->ipsec_out_icmp_code = sel.ips_icmp_code; 4710 /* 4711 * Set ill_index, conn_dontroute and conn_multicast_loop 4712 * for multicast datagrams. 4713 */ 4714 io->ipsec_out_ill_index = ill_index; 4715 io->ipsec_out_dontroute = conn_dontroutex; 4716 io->ipsec_out_multicast_loop = conn_multicast_loopx; 4717 4718 if (zoneid == ALL_ZONES) 4719 zoneid = GLOBAL_ZONEID; 4720 io->ipsec_out_zoneid = zoneid; 4721 return (ipsec_mp); 4722 } 4723 4724 /* 4725 * When appropriate, this function caches inbound and outbound policy 4726 * for this connection. 4727 * 4728 * XXX need to work out more details about per-interface policy and 4729 * caching here! 4730 * 4731 * XXX may want to split inbound and outbound caching for ill.. 4732 */ 4733 int 4734 ipsec_conn_cache_policy(conn_t *connp, boolean_t isv4) 4735 { 4736 boolean_t global_policy_present; 4737 netstack_t *ns = connp->conn_netstack; 4738 ipsec_stack_t *ipss = ns->netstack_ipsec; 4739 4740 /* 4741 * There is no policy latching for ICMP sockets because we can't 4742 * decide on which policy to use until we see the packet and get 4743 * type/code selectors. 4744 */ 4745 if (connp->conn_ulp == IPPROTO_ICMP || 4746 connp->conn_ulp == IPPROTO_ICMPV6) { 4747 connp->conn_in_enforce_policy = 4748 connp->conn_out_enforce_policy = B_TRUE; 4749 if (connp->conn_latch != NULL) { 4750 IPLATCH_REFRELE(connp->conn_latch, ns); 4751 connp->conn_latch = NULL; 4752 } 4753 connp->conn_flags |= IPCL_CHECK_POLICY; 4754 return (0); 4755 } 4756 4757 global_policy_present = isv4 ? 4758 (ipss->ipsec_outbound_v4_policy_present || 4759 ipss->ipsec_inbound_v4_policy_present) : 4760 (ipss->ipsec_outbound_v6_policy_present || 4761 ipss->ipsec_inbound_v6_policy_present); 4762 4763 if ((connp->conn_policy != NULL) || global_policy_present) { 4764 ipsec_selector_t sel; 4765 ipsec_policy_t *p; 4766 4767 if (connp->conn_latch == NULL && 4768 (connp->conn_latch = iplatch_create()) == NULL) { 4769 return (ENOMEM); 4770 } 4771 4772 sel.ips_protocol = connp->conn_ulp; 4773 sel.ips_local_port = connp->conn_lport; 4774 sel.ips_remote_port = connp->conn_fport; 4775 sel.ips_is_icmp_inv_acq = 0; 4776 sel.ips_isv4 = isv4; 4777 if (isv4) { 4778 sel.ips_local_addr_v4 = connp->conn_src; 4779 sel.ips_remote_addr_v4 = connp->conn_rem; 4780 } else { 4781 sel.ips_local_addr_v6 = connp->conn_srcv6; 4782 sel.ips_remote_addr_v6 = connp->conn_remv6; 4783 } 4784 4785 p = ipsec_find_policy(IPSEC_TYPE_INBOUND, connp, NULL, &sel, 4786 ns); 4787 if (connp->conn_latch->ipl_in_policy != NULL) 4788 IPPOL_REFRELE(connp->conn_latch->ipl_in_policy, ns); 4789 connp->conn_latch->ipl_in_policy = p; 4790 connp->conn_in_enforce_policy = (p != NULL); 4791 4792 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, &sel, 4793 ns); 4794 if (connp->conn_latch->ipl_out_policy != NULL) 4795 IPPOL_REFRELE(connp->conn_latch->ipl_out_policy, ns); 4796 connp->conn_latch->ipl_out_policy = p; 4797 connp->conn_out_enforce_policy = (p != NULL); 4798 4799 /* Clear the latched actions too, in case we're recaching. */ 4800 if (connp->conn_latch->ipl_out_action != NULL) 4801 IPACT_REFRELE(connp->conn_latch->ipl_out_action); 4802 if (connp->conn_latch->ipl_in_action != NULL) 4803 IPACT_REFRELE(connp->conn_latch->ipl_in_action); 4804 } 4805 4806 /* 4807 * We may or may not have policy for this endpoint. We still set 4808 * conn_policy_cached so that inbound datagrams don't have to look 4809 * at global policy as policy is considered latched for these 4810 * endpoints. We should not set conn_policy_cached until the conn 4811 * reflects the actual policy. If we *set* this before inheriting 4812 * the policy there is a window where the check 4813 * CONN_INBOUND_POLICY_PRESENT, will neither check with the policy 4814 * on the conn (because we have not yet copied the policy on to 4815 * conn and hence not set conn_in_enforce_policy) nor with the 4816 * global policy (because conn_policy_cached is already set). 4817 */ 4818 connp->conn_policy_cached = B_TRUE; 4819 if (connp->conn_in_enforce_policy) 4820 connp->conn_flags |= IPCL_CHECK_POLICY; 4821 return (0); 4822 } 4823 4824 void 4825 iplatch_free(ipsec_latch_t *ipl, netstack_t *ns) 4826 { 4827 if (ipl->ipl_out_policy != NULL) 4828 IPPOL_REFRELE(ipl->ipl_out_policy, ns); 4829 if (ipl->ipl_in_policy != NULL) 4830 IPPOL_REFRELE(ipl->ipl_in_policy, ns); 4831 if (ipl->ipl_in_action != NULL) 4832 IPACT_REFRELE(ipl->ipl_in_action); 4833 if (ipl->ipl_out_action != NULL) 4834 IPACT_REFRELE(ipl->ipl_out_action); 4835 if (ipl->ipl_local_cid != NULL) 4836 IPSID_REFRELE(ipl->ipl_local_cid); 4837 if (ipl->ipl_remote_cid != NULL) 4838 IPSID_REFRELE(ipl->ipl_remote_cid); 4839 if (ipl->ipl_local_id != NULL) 4840 crfree(ipl->ipl_local_id); 4841 mutex_destroy(&ipl->ipl_lock); 4842 kmem_free(ipl, sizeof (*ipl)); 4843 } 4844 4845 ipsec_latch_t * 4846 iplatch_create() 4847 { 4848 ipsec_latch_t *ipl = kmem_alloc(sizeof (*ipl), KM_NOSLEEP); 4849 if (ipl == NULL) 4850 return (ipl); 4851 bzero(ipl, sizeof (*ipl)); 4852 mutex_init(&ipl->ipl_lock, NULL, MUTEX_DEFAULT, NULL); 4853 ipl->ipl_refcnt = 1; 4854 return (ipl); 4855 } 4856 4857 /* 4858 * Hash function for ID hash table. 4859 */ 4860 static uint32_t 4861 ipsid_hash(int idtype, char *idstring) 4862 { 4863 uint32_t hval = idtype; 4864 unsigned char c; 4865 4866 while ((c = *idstring++) != 0) { 4867 hval = (hval << 4) | (hval >> 28); 4868 hval ^= c; 4869 } 4870 hval = hval ^ (hval >> 16); 4871 return (hval & (IPSID_HASHSIZE-1)); 4872 } 4873 4874 /* 4875 * Look up identity string in hash table. Return identity object 4876 * corresponding to the name -- either preexisting, or newly allocated. 4877 * 4878 * Return NULL if we need to allocate a new one and can't get memory. 4879 */ 4880 ipsid_t * 4881 ipsid_lookup(int idtype, char *idstring, netstack_t *ns) 4882 { 4883 ipsid_t *retval; 4884 char *nstr; 4885 int idlen = strlen(idstring) + 1; 4886 ipsec_stack_t *ipss = ns->netstack_ipsec; 4887 ipsif_t *bucket; 4888 4889 bucket = &ipss->ipsec_ipsid_buckets[ipsid_hash(idtype, idstring)]; 4890 4891 mutex_enter(&bucket->ipsif_lock); 4892 4893 for (retval = bucket->ipsif_head; retval != NULL; 4894 retval = retval->ipsid_next) { 4895 if (idtype != retval->ipsid_type) 4896 continue; 4897 if (bcmp(idstring, retval->ipsid_cid, idlen) != 0) 4898 continue; 4899 4900 IPSID_REFHOLD(retval); 4901 mutex_exit(&bucket->ipsif_lock); 4902 return (retval); 4903 } 4904 4905 retval = kmem_alloc(sizeof (*retval), KM_NOSLEEP); 4906 if (!retval) { 4907 mutex_exit(&bucket->ipsif_lock); 4908 return (NULL); 4909 } 4910 4911 nstr = kmem_alloc(idlen, KM_NOSLEEP); 4912 if (!nstr) { 4913 mutex_exit(&bucket->ipsif_lock); 4914 kmem_free(retval, sizeof (*retval)); 4915 return (NULL); 4916 } 4917 4918 retval->ipsid_refcnt = 1; 4919 retval->ipsid_next = bucket->ipsif_head; 4920 if (retval->ipsid_next != NULL) 4921 retval->ipsid_next->ipsid_ptpn = &retval->ipsid_next; 4922 retval->ipsid_ptpn = &bucket->ipsif_head; 4923 retval->ipsid_type = idtype; 4924 retval->ipsid_cid = nstr; 4925 bucket->ipsif_head = retval; 4926 bcopy(idstring, nstr, idlen); 4927 mutex_exit(&bucket->ipsif_lock); 4928 4929 return (retval); 4930 } 4931 4932 /* 4933 * Garbage collect the identity hash table. 4934 */ 4935 void 4936 ipsid_gc(netstack_t *ns) 4937 { 4938 int i, len; 4939 ipsid_t *id, *nid; 4940 ipsif_t *bucket; 4941 ipsec_stack_t *ipss = ns->netstack_ipsec; 4942 4943 for (i = 0; i < IPSID_HASHSIZE; i++) { 4944 bucket = &ipss->ipsec_ipsid_buckets[i]; 4945 mutex_enter(&bucket->ipsif_lock); 4946 for (id = bucket->ipsif_head; id != NULL; id = nid) { 4947 nid = id->ipsid_next; 4948 if (id->ipsid_refcnt == 0) { 4949 *id->ipsid_ptpn = nid; 4950 if (nid != NULL) 4951 nid->ipsid_ptpn = id->ipsid_ptpn; 4952 len = strlen(id->ipsid_cid) + 1; 4953 kmem_free(id->ipsid_cid, len); 4954 kmem_free(id, sizeof (*id)); 4955 } 4956 } 4957 mutex_exit(&bucket->ipsif_lock); 4958 } 4959 } 4960 4961 /* 4962 * Return true if two identities are the same. 4963 */ 4964 boolean_t 4965 ipsid_equal(ipsid_t *id1, ipsid_t *id2) 4966 { 4967 if (id1 == id2) 4968 return (B_TRUE); 4969 #ifdef DEBUG 4970 if ((id1 == NULL) || (id2 == NULL)) 4971 return (B_FALSE); 4972 /* 4973 * test that we're interning id's correctly.. 4974 */ 4975 ASSERT((strcmp(id1->ipsid_cid, id2->ipsid_cid) != 0) || 4976 (id1->ipsid_type != id2->ipsid_type)); 4977 #endif 4978 return (B_FALSE); 4979 } 4980 4981 /* 4982 * Initialize identity table; called during module initialization. 4983 */ 4984 static void 4985 ipsid_init(netstack_t *ns) 4986 { 4987 ipsif_t *bucket; 4988 int i; 4989 ipsec_stack_t *ipss = ns->netstack_ipsec; 4990 4991 for (i = 0; i < IPSID_HASHSIZE; i++) { 4992 bucket = &ipss->ipsec_ipsid_buckets[i]; 4993 mutex_init(&bucket->ipsif_lock, NULL, MUTEX_DEFAULT, NULL); 4994 } 4995 } 4996 4997 /* 4998 * Free identity table (preparatory to module unload) 4999 */ 5000 static void 5001 ipsid_fini(netstack_t *ns) 5002 { 5003 ipsif_t *bucket; 5004 int i; 5005 ipsec_stack_t *ipss = ns->netstack_ipsec; 5006 5007 for (i = 0; i < IPSID_HASHSIZE; i++) { 5008 bucket = &ipss->ipsec_ipsid_buckets[i]; 5009 ASSERT(bucket->ipsif_head == NULL); 5010 mutex_destroy(&bucket->ipsif_lock); 5011 } 5012 } 5013 5014 /* 5015 * Update the minimum and maximum supported key sizes for the 5016 * specified algorithm. Must be called while holding the algorithms lock. 5017 */ 5018 void 5019 ipsec_alg_fix_min_max(ipsec_alginfo_t *alg, ipsec_algtype_t alg_type, 5020 netstack_t *ns) 5021 { 5022 size_t crypto_min = (size_t)-1, crypto_max = 0; 5023 size_t cur_crypto_min, cur_crypto_max; 5024 boolean_t is_valid; 5025 crypto_mechanism_info_t *mech_infos; 5026 uint_t nmech_infos; 5027 int crypto_rc, i; 5028 crypto_mech_usage_t mask; 5029 ipsec_stack_t *ipss = ns->netstack_ipsec; 5030 5031 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 5032 5033 /* 5034 * Compute the min, max, and default key sizes (in number of 5035 * increments to the default key size in bits) as defined 5036 * by the algorithm mappings. This range of key sizes is used 5037 * for policy related operations. The effective key sizes 5038 * supported by the framework could be more limited than 5039 * those defined for an algorithm. 5040 */ 5041 alg->alg_default_bits = alg->alg_key_sizes[0]; 5042 if (alg->alg_increment != 0) { 5043 /* key sizes are defined by range & increment */ 5044 alg->alg_minbits = alg->alg_key_sizes[1]; 5045 alg->alg_maxbits = alg->alg_key_sizes[2]; 5046 5047 alg->alg_default = SADB_ALG_DEFAULT_INCR(alg->alg_minbits, 5048 alg->alg_increment, alg->alg_default_bits); 5049 } else if (alg->alg_nkey_sizes == 0) { 5050 /* no specified key size for algorithm */ 5051 alg->alg_minbits = alg->alg_maxbits = 0; 5052 } else { 5053 /* key sizes are defined by enumeration */ 5054 alg->alg_minbits = (uint16_t)-1; 5055 alg->alg_maxbits = 0; 5056 5057 for (i = 0; i < alg->alg_nkey_sizes; i++) { 5058 if (alg->alg_key_sizes[i] < alg->alg_minbits) 5059 alg->alg_minbits = alg->alg_key_sizes[i]; 5060 if (alg->alg_key_sizes[i] > alg->alg_maxbits) 5061 alg->alg_maxbits = alg->alg_key_sizes[i]; 5062 } 5063 alg->alg_default = 0; 5064 } 5065 5066 if (!(alg->alg_flags & ALG_FLAG_VALID)) 5067 return; 5068 5069 /* 5070 * Mechanisms do not apply to the NULL encryption 5071 * algorithm, so simply return for this case. 5072 */ 5073 if (alg->alg_id == SADB_EALG_NULL) 5074 return; 5075 5076 /* 5077 * Find the min and max key sizes supported by the cryptographic 5078 * framework providers. 5079 */ 5080 5081 /* get the key sizes supported by the framework */ 5082 crypto_rc = crypto_get_all_mech_info(alg->alg_mech_type, 5083 &mech_infos, &nmech_infos, KM_SLEEP); 5084 if (crypto_rc != CRYPTO_SUCCESS || nmech_infos == 0) { 5085 alg->alg_flags &= ~ALG_FLAG_VALID; 5086 return; 5087 } 5088 5089 /* min and max key sizes supported by framework */ 5090 for (i = 0, is_valid = B_FALSE; i < nmech_infos; i++) { 5091 int unit_bits; 5092 5093 /* 5094 * Ignore entries that do not support the operations 5095 * needed for the algorithm type. 5096 */ 5097 if (alg_type == IPSEC_ALG_AUTH) { 5098 mask = CRYPTO_MECH_USAGE_MAC; 5099 } else { 5100 mask = CRYPTO_MECH_USAGE_ENCRYPT | 5101 CRYPTO_MECH_USAGE_DECRYPT; 5102 } 5103 if ((mech_infos[i].mi_usage & mask) != mask) 5104 continue; 5105 5106 unit_bits = (mech_infos[i].mi_keysize_unit == 5107 CRYPTO_KEYSIZE_UNIT_IN_BYTES) ? 8 : 1; 5108 /* adjust min/max supported by framework */ 5109 cur_crypto_min = mech_infos[i].mi_min_key_size * unit_bits; 5110 cur_crypto_max = mech_infos[i].mi_max_key_size * unit_bits; 5111 5112 if (cur_crypto_min < crypto_min) 5113 crypto_min = cur_crypto_min; 5114 5115 /* 5116 * CRYPTO_EFFECTIVELY_INFINITE is a special value of 5117 * the crypto framework which means "no upper limit". 5118 */ 5119 if (mech_infos[i].mi_max_key_size == 5120 CRYPTO_EFFECTIVELY_INFINITE) { 5121 crypto_max = (size_t)-1; 5122 } else if (cur_crypto_max > crypto_max) { 5123 crypto_max = cur_crypto_max; 5124 } 5125 5126 is_valid = B_TRUE; 5127 } 5128 5129 kmem_free(mech_infos, sizeof (crypto_mechanism_info_t) * 5130 nmech_infos); 5131 5132 if (!is_valid) { 5133 /* no key sizes supported by framework */ 5134 alg->alg_flags &= ~ALG_FLAG_VALID; 5135 return; 5136 } 5137 5138 /* 5139 * Determine min and max key sizes from alg_key_sizes[]. 5140 * defined for the algorithm entry. Adjust key sizes based on 5141 * those supported by the framework. 5142 */ 5143 alg->alg_ef_default_bits = alg->alg_key_sizes[0]; 5144 if (alg->alg_increment != 0) { 5145 /* supported key sizes are defined by range & increment */ 5146 crypto_min = ALGBITS_ROUND_UP(crypto_min, alg->alg_increment); 5147 crypto_max = ALGBITS_ROUND_DOWN(crypto_max, alg->alg_increment); 5148 5149 alg->alg_ef_minbits = MAX(alg->alg_minbits, 5150 (uint16_t)crypto_min); 5151 alg->alg_ef_maxbits = MIN(alg->alg_maxbits, 5152 (uint16_t)crypto_max); 5153 5154 /* 5155 * If the sizes supported by the framework are outside 5156 * the range of sizes defined by the algorithm mappings, 5157 * the algorithm cannot be used. Check for this 5158 * condition here. 5159 */ 5160 if (alg->alg_ef_minbits > alg->alg_ef_maxbits) { 5161 alg->alg_flags &= ~ALG_FLAG_VALID; 5162 return; 5163 } 5164 5165 if (alg->alg_ef_default_bits < alg->alg_ef_minbits) 5166 alg->alg_ef_default_bits = alg->alg_ef_minbits; 5167 if (alg->alg_ef_default_bits > alg->alg_ef_maxbits) 5168 alg->alg_ef_default_bits = alg->alg_ef_maxbits; 5169 5170 alg->alg_ef_default = SADB_ALG_DEFAULT_INCR(alg->alg_ef_minbits, 5171 alg->alg_increment, alg->alg_ef_default_bits); 5172 } else if (alg->alg_nkey_sizes == 0) { 5173 /* no specified key size for algorithm */ 5174 alg->alg_ef_minbits = alg->alg_ef_maxbits = 0; 5175 } else { 5176 /* supported key sizes are defined by enumeration */ 5177 alg->alg_ef_minbits = (uint16_t)-1; 5178 alg->alg_ef_maxbits = 0; 5179 5180 for (i = 0, is_valid = B_FALSE; i < alg->alg_nkey_sizes; i++) { 5181 /* 5182 * Ignore the current key size if it is not in the 5183 * range of sizes supported by the framework. 5184 */ 5185 if (alg->alg_key_sizes[i] < crypto_min || 5186 alg->alg_key_sizes[i] > crypto_max) 5187 continue; 5188 if (alg->alg_key_sizes[i] < alg->alg_ef_minbits) 5189 alg->alg_ef_minbits = alg->alg_key_sizes[i]; 5190 if (alg->alg_key_sizes[i] > alg->alg_ef_maxbits) 5191 alg->alg_ef_maxbits = alg->alg_key_sizes[i]; 5192 is_valid = B_TRUE; 5193 } 5194 5195 if (!is_valid) { 5196 alg->alg_flags &= ~ALG_FLAG_VALID; 5197 return; 5198 } 5199 alg->alg_ef_default = 0; 5200 } 5201 } 5202 5203 /* 5204 * Free the memory used by the specified algorithm. 5205 */ 5206 void 5207 ipsec_alg_free(ipsec_alginfo_t *alg) 5208 { 5209 if (alg == NULL) 5210 return; 5211 5212 if (alg->alg_key_sizes != NULL) { 5213 kmem_free(alg->alg_key_sizes, 5214 (alg->alg_nkey_sizes + 1) * sizeof (uint16_t)); 5215 alg->alg_key_sizes = NULL; 5216 } 5217 if (alg->alg_block_sizes != NULL) { 5218 kmem_free(alg->alg_block_sizes, 5219 (alg->alg_nblock_sizes + 1) * sizeof (uint16_t)); 5220 alg->alg_block_sizes = NULL; 5221 } 5222 kmem_free(alg, sizeof (*alg)); 5223 } 5224 5225 /* 5226 * Check the validity of the specified key size for an algorithm. 5227 * Returns B_TRUE if key size is valid, B_FALSE otherwise. 5228 */ 5229 boolean_t 5230 ipsec_valid_key_size(uint16_t key_size, ipsec_alginfo_t *alg) 5231 { 5232 if (key_size < alg->alg_ef_minbits || key_size > alg->alg_ef_maxbits) 5233 return (B_FALSE); 5234 5235 if (alg->alg_increment == 0 && alg->alg_nkey_sizes != 0) { 5236 /* 5237 * If the key sizes are defined by enumeration, the new 5238 * key size must be equal to one of the supported values. 5239 */ 5240 int i; 5241 5242 for (i = 0; i < alg->alg_nkey_sizes; i++) 5243 if (key_size == alg->alg_key_sizes[i]) 5244 break; 5245 if (i == alg->alg_nkey_sizes) 5246 return (B_FALSE); 5247 } 5248 5249 return (B_TRUE); 5250 } 5251 5252 /* 5253 * Callback function invoked by the crypto framework when a provider 5254 * registers or unregisters. This callback updates the algorithms 5255 * tables when a crypto algorithm is no longer available or becomes 5256 * available, and triggers the freeing/creation of context templates 5257 * associated with existing SAs, if needed. 5258 * 5259 * Need to walk all stack instances since the callback is global 5260 * for all instances 5261 */ 5262 void 5263 ipsec_prov_update_callback(uint32_t event, void *event_arg) 5264 { 5265 netstack_handle_t nh; 5266 netstack_t *ns; 5267 5268 netstack_next_init(&nh); 5269 while ((ns = netstack_next(&nh)) != NULL) { 5270 ipsec_prov_update_callback_stack(event, event_arg, ns); 5271 netstack_rele(ns); 5272 } 5273 netstack_next_fini(&nh); 5274 } 5275 5276 static void 5277 ipsec_prov_update_callback_stack(uint32_t event, void *event_arg, 5278 netstack_t *ns) 5279 { 5280 crypto_notify_event_change_t *prov_change = 5281 (crypto_notify_event_change_t *)event_arg; 5282 uint_t algidx, algid, algtype, mech_count, mech_idx; 5283 ipsec_alginfo_t *alg; 5284 ipsec_alginfo_t oalg; 5285 crypto_mech_name_t *mechs; 5286 boolean_t alg_changed = B_FALSE; 5287 ipsec_stack_t *ipss = ns->netstack_ipsec; 5288 5289 /* ignore events for which we didn't register */ 5290 if (event != CRYPTO_EVENT_MECHS_CHANGED) { 5291 ip1dbg(("ipsec_prov_update_callback: unexpected event 0x%x " 5292 " received from crypto framework\n", event)); 5293 return; 5294 } 5295 5296 mechs = crypto_get_mech_list(&mech_count, KM_SLEEP); 5297 if (mechs == NULL) 5298 return; 5299 5300 /* 5301 * Walk the list of currently defined IPsec algorithm. Update 5302 * the algorithm valid flag and trigger an update of the 5303 * SAs that depend on that algorithm. 5304 */ 5305 mutex_enter(&ipss->ipsec_alg_lock); 5306 for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype++) { 5307 for (algidx = 0; algidx < ipss->ipsec_nalgs[algtype]; 5308 algidx++) { 5309 5310 algid = ipss->ipsec_sortlist[algtype][algidx]; 5311 alg = ipss->ipsec_alglists[algtype][algid]; 5312 ASSERT(alg != NULL); 5313 5314 /* 5315 * Skip the algorithms which do not map to the 5316 * crypto framework provider being added or removed. 5317 */ 5318 if (strncmp(alg->alg_mech_name, 5319 prov_change->ec_mech_name, 5320 CRYPTO_MAX_MECH_NAME) != 0) 5321 continue; 5322 5323 /* 5324 * Determine if the mechanism is valid. If it 5325 * is not, mark the algorithm as being invalid. If 5326 * it is, mark the algorithm as being valid. 5327 */ 5328 for (mech_idx = 0; mech_idx < mech_count; mech_idx++) 5329 if (strncmp(alg->alg_mech_name, 5330 mechs[mech_idx], CRYPTO_MAX_MECH_NAME) == 0) 5331 break; 5332 if (mech_idx == mech_count && 5333 alg->alg_flags & ALG_FLAG_VALID) { 5334 alg->alg_flags &= ~ALG_FLAG_VALID; 5335 alg_changed = B_TRUE; 5336 } else if (mech_idx < mech_count && 5337 !(alg->alg_flags & ALG_FLAG_VALID)) { 5338 alg->alg_flags |= ALG_FLAG_VALID; 5339 alg_changed = B_TRUE; 5340 } 5341 5342 /* 5343 * Update the supported key sizes, regardless 5344 * of whether a crypto provider was added or 5345 * removed. 5346 */ 5347 oalg = *alg; 5348 ipsec_alg_fix_min_max(alg, algtype, ns); 5349 if (!alg_changed && 5350 alg->alg_ef_minbits != oalg.alg_ef_minbits || 5351 alg->alg_ef_maxbits != oalg.alg_ef_maxbits || 5352 alg->alg_ef_default != oalg.alg_ef_default || 5353 alg->alg_ef_default_bits != 5354 oalg.alg_ef_default_bits) 5355 alg_changed = B_TRUE; 5356 5357 /* 5358 * Update the affected SAs if a software provider is 5359 * being added or removed. 5360 */ 5361 if (prov_change->ec_provider_type == 5362 CRYPTO_SW_PROVIDER) 5363 sadb_alg_update(algtype, alg->alg_id, 5364 prov_change->ec_change == 5365 CRYPTO_MECH_ADDED, ns); 5366 } 5367 } 5368 mutex_exit(&ipss->ipsec_alg_lock); 5369 crypto_free_mech_list(mechs, mech_count); 5370 5371 if (alg_changed) { 5372 /* 5373 * An algorithm has changed, i.e. it became valid or 5374 * invalid, or its support key sizes have changed. 5375 * Notify ipsecah and ipsecesp of this change so 5376 * that they can send a SADB_REGISTER to their consumers. 5377 */ 5378 ipsecah_algs_changed(ns); 5379 ipsecesp_algs_changed(ns); 5380 } 5381 } 5382 5383 /* 5384 * Registers with the crypto framework to be notified of crypto 5385 * providers changes. Used to update the algorithm tables and 5386 * to free or create context templates if needed. Invoked after IPsec 5387 * is loaded successfully. 5388 * 5389 * This is called separately for each IP instance, so we ensure we only 5390 * register once. 5391 */ 5392 void 5393 ipsec_register_prov_update(void) 5394 { 5395 if (prov_update_handle != NULL) 5396 return; 5397 5398 prov_update_handle = crypto_notify_events( 5399 ipsec_prov_update_callback, CRYPTO_EVENT_MECHS_CHANGED); 5400 } 5401 5402 /* 5403 * Unregisters from the framework to be notified of crypto providers 5404 * changes. Called from ipsec_policy_g_destroy(). 5405 */ 5406 static void 5407 ipsec_unregister_prov_update(void) 5408 { 5409 if (prov_update_handle != NULL) 5410 crypto_unnotify_events(prov_update_handle); 5411 } 5412 5413 /* 5414 * Tunnel-mode support routines. 5415 */ 5416 5417 /* 5418 * Returns an mblk chain suitable for putnext() if policies match and IPsec 5419 * SAs are available. If there's no per-tunnel policy, or a match comes back 5420 * with no match, then still return the packet and have global policy take 5421 * a crack at it in IP. 5422 * 5423 * Remember -> we can be forwarding packets. Keep that in mind w.r.t. 5424 * inner-packet contents. 5425 */ 5426 mblk_t * 5427 ipsec_tun_outbound(mblk_t *mp, tun_t *atp, ipha_t *inner_ipv4, 5428 ip6_t *inner_ipv6, ipha_t *outer_ipv4, ip6_t *outer_ipv6, int outer_hdr_len, 5429 netstack_t *ns) 5430 { 5431 ipsec_tun_pol_t *itp = atp->tun_itp; 5432 ipsec_policy_head_t *polhead; 5433 ipsec_selector_t sel; 5434 mblk_t *ipsec_mp, *ipsec_mp_head, *nmp; 5435 mblk_t *spare_mp = NULL; 5436 ipsec_out_t *io; 5437 boolean_t is_fragment; 5438 ipsec_policy_t *pol; 5439 ipsec_stack_t *ipss = ns->netstack_ipsec; 5440 5441 ASSERT(outer_ipv6 != NULL && outer_ipv4 == NULL || 5442 outer_ipv4 != NULL && outer_ipv6 == NULL); 5443 /* We take care of inners in a bit. */ 5444 5445 /* No policy on this tunnel - let global policy have at it. */ 5446 if (itp == NULL || !(itp->itp_flags & ITPF_P_ACTIVE)) 5447 return (mp); 5448 polhead = itp->itp_policy; 5449 5450 bzero(&sel, sizeof (sel)); 5451 if (inner_ipv4 != NULL) { 5452 ASSERT(inner_ipv6 == NULL); 5453 sel.ips_isv4 = B_TRUE; 5454 sel.ips_local_addr_v4 = inner_ipv4->ipha_src; 5455 sel.ips_remote_addr_v4 = inner_ipv4->ipha_dst; 5456 sel.ips_protocol = (uint8_t)inner_ipv4->ipha_protocol; 5457 is_fragment = 5458 IS_V4_FRAGMENT(inner_ipv4->ipha_fragment_offset_and_flags); 5459 } else { 5460 ASSERT(inner_ipv6 != NULL); 5461 sel.ips_isv4 = B_FALSE; 5462 sel.ips_local_addr_v6 = inner_ipv6->ip6_src; 5463 /* Use ip_get_dst_v6() just for the fragment bit. */ 5464 sel.ips_remote_addr_v6 = ip_get_dst_v6(inner_ipv6, 5465 &is_fragment); 5466 /* 5467 * Reset, because we don't care about routing-header dests 5468 * in the forwarding/tunnel path. 5469 */ 5470 sel.ips_remote_addr_v6 = inner_ipv6->ip6_dst; 5471 } 5472 5473 if (itp->itp_flags & ITPF_P_PER_PORT_SECURITY) { 5474 if (is_fragment) { 5475 ipha_t *oiph; 5476 ipha_t *iph = NULL; 5477 ip6_t *ip6h = NULL; 5478 int hdr_len; 5479 uint16_t ip6_hdr_length; 5480 uint8_t v6_proto; 5481 uint8_t *v6_proto_p; 5482 5483 /* 5484 * We have a fragment we need to track! 5485 */ 5486 mp = ipsec_fragcache_add(&itp->itp_fragcache, NULL, mp, 5487 outer_hdr_len, ipss); 5488 if (mp == NULL) 5489 return (NULL); 5490 5491 /* 5492 * If we get here, we have a full 5493 * fragment chain 5494 */ 5495 5496 oiph = (ipha_t *)mp->b_rptr; 5497 if (IPH_HDR_VERSION(oiph) == IPV4_VERSION) { 5498 hdr_len = ((outer_hdr_len != 0) ? 5499 IPH_HDR_LENGTH(oiph) : 0); 5500 iph = (ipha_t *)(mp->b_rptr + hdr_len); 5501 } else { 5502 ASSERT(IPH_HDR_VERSION(oiph) == IPV6_VERSION); 5503 if ((spare_mp = msgpullup(mp, -1)) == NULL) { 5504 ip_drop_packet_chain(mp, B_FALSE, 5505 NULL, NULL, 5506 DROPPER(ipss, ipds_spd_nomem), 5507 &ipss->ipsec_spd_dropper); 5508 } 5509 ip6h = (ip6_t *)spare_mp->b_rptr; 5510 (void) ip_hdr_length_nexthdr_v6(spare_mp, ip6h, 5511 &ip6_hdr_length, &v6_proto_p); 5512 hdr_len = ip6_hdr_length; 5513 } 5514 outer_hdr_len = hdr_len; 5515 5516 if (sel.ips_isv4) { 5517 if (iph == NULL) { 5518 /* Was v6 outer */ 5519 iph = (ipha_t *)(mp->b_rptr + hdr_len); 5520 } 5521 inner_ipv4 = iph; 5522 sel.ips_local_addr_v4 = inner_ipv4->ipha_src; 5523 sel.ips_remote_addr_v4 = inner_ipv4->ipha_dst; 5524 sel.ips_protocol = 5525 (uint8_t)inner_ipv4->ipha_protocol; 5526 } else { 5527 if ((spare_mp == NULL) && 5528 ((spare_mp = msgpullup(mp, -1)) == NULL)) { 5529 ip_drop_packet_chain(mp, B_FALSE, 5530 NULL, NULL, 5531 DROPPER(ipss, ipds_spd_nomem), 5532 &ipss->ipsec_spd_dropper); 5533 } 5534 inner_ipv6 = (ip6_t *)(spare_mp->b_rptr + 5535 hdr_len); 5536 sel.ips_local_addr_v6 = inner_ipv6->ip6_src; 5537 sel.ips_remote_addr_v6 = inner_ipv6->ip6_dst; 5538 (void) ip_hdr_length_nexthdr_v6(spare_mp, 5539 inner_ipv6, &ip6_hdr_length, 5540 &v6_proto_p); 5541 v6_proto = *v6_proto_p; 5542 sel.ips_protocol = v6_proto; 5543 #ifdef FRAGCACHE_DEBUG 5544 cmn_err(CE_WARN, "v6_sel.ips_protocol = %d\n", 5545 sel.ips_protocol); 5546 #endif 5547 } 5548 /* Ports are extracted below */ 5549 } 5550 5551 /* Get ports... */ 5552 if (spare_mp != NULL) { 5553 if (!ipsec_init_outbound_ports(&sel, spare_mp, 5554 inner_ipv4, inner_ipv6, outer_hdr_len, ipss)) { 5555 /* 5556 * callee did ip_drop_packet_chain() on 5557 * spare_mp 5558 */ 5559 ipsec_freemsg_chain(mp); 5560 return (NULL); 5561 } 5562 } else { 5563 if (!ipsec_init_outbound_ports(&sel, mp, 5564 inner_ipv4, inner_ipv6, outer_hdr_len, ipss)) { 5565 /* callee did ip_drop_packet_chain() on mp. */ 5566 return (NULL); 5567 } 5568 } 5569 #ifdef FRAGCACHE_DEBUG 5570 if (inner_ipv4 != NULL) 5571 cmn_err(CE_WARN, 5572 "(v4) sel.ips_protocol = %d, " 5573 "sel.ips_local_port = %d, " 5574 "sel.ips_remote_port = %d\n", 5575 sel.ips_protocol, ntohs(sel.ips_local_port), 5576 ntohs(sel.ips_remote_port)); 5577 if (inner_ipv6 != NULL) 5578 cmn_err(CE_WARN, 5579 "(v6) sel.ips_protocol = %d, " 5580 "sel.ips_local_port = %d, " 5581 "sel.ips_remote_port = %d\n", 5582 sel.ips_protocol, ntohs(sel.ips_local_port), 5583 ntohs(sel.ips_remote_port)); 5584 #endif 5585 /* Success so far - done with spare_mp */ 5586 ipsec_freemsg_chain(spare_mp); 5587 } 5588 rw_enter(&polhead->iph_lock, RW_READER); 5589 pol = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_OUTBOUND, 5590 &sel, ns); 5591 rw_exit(&polhead->iph_lock); 5592 if (pol == NULL) { 5593 /* 5594 * No matching policy on this tunnel, drop the packet. 5595 * 5596 * NOTE: Tunnel-mode tunnels are different from the 5597 * IP global transport mode policy head. For a tunnel-mode 5598 * tunnel, we drop the packet in lieu of passing it 5599 * along accepted the way a global-policy miss would. 5600 * 5601 * NOTE2: "negotiate transport" tunnels should match ALL 5602 * inbound packets, but we do not uncomment the ASSERT() 5603 * below because if/when we open PF_POLICY, a user can 5604 * shoot him/her-self in the foot with a 0 priority. 5605 */ 5606 5607 /* ASSERT(itp->itp_flags & ITPF_P_TUNNEL); */ 5608 #ifdef FRAGCACHE_DEBUG 5609 cmn_err(CE_WARN, "ipsec_tun_outbound(): No matching tunnel " 5610 "per-port policy\n"); 5611 #endif 5612 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 5613 DROPPER(ipss, ipds_spd_explicit), 5614 &ipss->ipsec_spd_dropper); 5615 return (NULL); 5616 } 5617 5618 #ifdef FRAGCACHE_DEBUG 5619 cmn_err(CE_WARN, "Having matching tunnel per-port policy\n"); 5620 #endif 5621 5622 /* Construct an IPSEC_OUT message. */ 5623 ipsec_mp = ipsec_mp_head = ipsec_alloc_ipsec_out(ns); 5624 if (ipsec_mp == NULL) { 5625 IPPOL_REFRELE(pol, ns); 5626 ip_drop_packet(mp, B_FALSE, NULL, NULL, 5627 DROPPER(ipss, ipds_spd_nomem), 5628 &ipss->ipsec_spd_dropper); 5629 return (NULL); 5630 } 5631 ipsec_mp->b_cont = mp; 5632 io = (ipsec_out_t *)ipsec_mp->b_rptr; 5633 IPPH_REFHOLD(polhead); 5634 /* 5635 * NOTE: free() function of ipsec_out mblk will release polhead and 5636 * pol references. 5637 */ 5638 io->ipsec_out_polhead = polhead; 5639 io->ipsec_out_policy = pol; 5640 io->ipsec_out_zoneid = atp->tun_zoneid; 5641 io->ipsec_out_v4 = (outer_ipv4 != NULL); 5642 io->ipsec_out_secure = B_TRUE; 5643 5644 if (!(itp->itp_flags & ITPF_P_TUNNEL)) { 5645 /* Set up transport mode for tunnelled packets. */ 5646 io->ipsec_out_proto = (inner_ipv4 != NULL) ? IPPROTO_ENCAP : 5647 IPPROTO_IPV6; 5648 return (ipsec_mp); 5649 } 5650 5651 /* Fill in tunnel-mode goodies here. */ 5652 io->ipsec_out_tunnel = B_TRUE; 5653 /* XXX Do I need to fill in all of the goodies here? */ 5654 if (inner_ipv4) { 5655 io->ipsec_out_inaf = AF_INET; 5656 io->ipsec_out_insrc[0] = 5657 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v4; 5658 io->ipsec_out_indst[0] = 5659 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v4; 5660 } else { 5661 io->ipsec_out_inaf = AF_INET6; 5662 io->ipsec_out_insrc[0] = 5663 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[0]; 5664 io->ipsec_out_insrc[1] = 5665 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[1]; 5666 io->ipsec_out_insrc[2] = 5667 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[2]; 5668 io->ipsec_out_insrc[3] = 5669 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[3]; 5670 io->ipsec_out_indst[0] = 5671 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[0]; 5672 io->ipsec_out_indst[1] = 5673 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[1]; 5674 io->ipsec_out_indst[2] = 5675 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[2]; 5676 io->ipsec_out_indst[3] = 5677 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[3]; 5678 } 5679 io->ipsec_out_insrcpfx = pol->ipsp_sel->ipsl_key.ipsl_local_pfxlen; 5680 io->ipsec_out_indstpfx = pol->ipsp_sel->ipsl_key.ipsl_remote_pfxlen; 5681 /* NOTE: These are used for transport mode too. */ 5682 io->ipsec_out_src_port = pol->ipsp_sel->ipsl_key.ipsl_lport; 5683 io->ipsec_out_dst_port = pol->ipsp_sel->ipsl_key.ipsl_rport; 5684 io->ipsec_out_proto = pol->ipsp_sel->ipsl_key.ipsl_proto; 5685 5686 /* 5687 * The mp pointer still valid 5688 * Add ipsec_out to each fragment. 5689 * The fragment head already has one 5690 */ 5691 nmp = mp->b_next; 5692 mp->b_next = NULL; 5693 mp = nmp; 5694 ASSERT(ipsec_mp != NULL); 5695 while (mp != NULL) { 5696 nmp = mp->b_next; 5697 ipsec_mp->b_next = ipsec_out_tag(ipsec_mp_head, mp, ns); 5698 if (ipsec_mp->b_next == NULL) { 5699 ip_drop_packet_chain(ipsec_mp_head, B_FALSE, NULL, NULL, 5700 DROPPER(ipss, ipds_spd_nomem), 5701 &ipss->ipsec_spd_dropper); 5702 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 5703 DROPPER(ipss, ipds_spd_nomem), 5704 &ipss->ipsec_spd_dropper); 5705 return (NULL); 5706 } 5707 ipsec_mp = ipsec_mp->b_next; 5708 mp->b_next = NULL; 5709 mp = nmp; 5710 } 5711 return (ipsec_mp_head); 5712 } 5713 5714 /* 5715 * NOTE: The following releases pol's reference and 5716 * calls ip_drop_packet() for me on NULL returns. 5717 */ 5718 mblk_t * 5719 ipsec_check_ipsecin_policy_reasm(mblk_t *ipsec_mp, ipsec_policy_t *pol, 5720 ipha_t *inner_ipv4, ip6_t *inner_ipv6, uint64_t pkt_unique, netstack_t *ns) 5721 { 5722 /* Assume ipsec_mp is a chain of b_next-linked IPSEC_IN M_CTLs. */ 5723 mblk_t *data_chain = NULL, *data_tail = NULL; 5724 mblk_t *ii_next; 5725 5726 while (ipsec_mp != NULL) { 5727 ii_next = ipsec_mp->b_next; 5728 ipsec_mp->b_next = NULL; /* No tripping asserts. */ 5729 5730 /* 5731 * Need IPPOL_REFHOLD(pol) for extras because 5732 * ipsecin_policy does the refrele. 5733 */ 5734 IPPOL_REFHOLD(pol); 5735 5736 if (ipsec_check_ipsecin_policy(ipsec_mp, pol, inner_ipv4, 5737 inner_ipv6, pkt_unique, ns) != NULL) { 5738 if (data_tail == NULL) { 5739 /* First one */ 5740 data_chain = data_tail = ipsec_mp->b_cont; 5741 } else { 5742 data_tail->b_next = ipsec_mp->b_cont; 5743 data_tail = data_tail->b_next; 5744 } 5745 freeb(ipsec_mp); 5746 } else { 5747 /* 5748 * ipsec_check_ipsecin_policy() freed ipsec_mp 5749 * already. Need to get rid of any extra pol 5750 * references, and any remaining bits as well. 5751 */ 5752 IPPOL_REFRELE(pol, ns); 5753 ipsec_freemsg_chain(data_chain); 5754 ipsec_freemsg_chain(ii_next); /* ipdrop stats? */ 5755 return (NULL); 5756 } 5757 ipsec_mp = ii_next; 5758 } 5759 /* 5760 * One last release because either the loop bumped it up, or we never 5761 * called ipsec_check_ipsecin_policy(). 5762 */ 5763 IPPOL_REFRELE(pol, ns); 5764 5765 /* data_chain is ready for return to tun module. */ 5766 return (data_chain); 5767 } 5768 5769 5770 /* 5771 * Returns B_TRUE if the inbound packet passed an IPsec policy check. Returns 5772 * B_FALSE if it failed or if it is a fragment needing its friends before a 5773 * policy check can be performed. 5774 * 5775 * Expects a non-NULL *data_mp, an optional ipsec_mp, and a non-NULL polhead. 5776 * data_mp may be reassigned with a b_next chain of packets if fragments 5777 * neeeded to be collected for a proper policy check. 5778 * 5779 * Always frees ipsec_mp, but only frees data_mp if returns B_FALSE. This 5780 * function calls ip_drop_packet() on data_mp if need be. 5781 * 5782 * NOTE: outer_hdr_len is signed. If it's a negative value, the caller 5783 * is inspecting an ICMP packet. 5784 */ 5785 boolean_t 5786 ipsec_tun_inbound(mblk_t *ipsec_mp, mblk_t **data_mp, ipsec_tun_pol_t *itp, 5787 ipha_t *inner_ipv4, ip6_t *inner_ipv6, ipha_t *outer_ipv4, 5788 ip6_t *outer_ipv6, int outer_hdr_len, netstack_t *ns) 5789 { 5790 ipsec_policy_head_t *polhead; 5791 ipsec_selector_t sel; 5792 mblk_t *message = (ipsec_mp == NULL) ? *data_mp : ipsec_mp; 5793 ipsec_policy_t *pol; 5794 uint16_t tmpport; 5795 selret_t rc; 5796 boolean_t retval, port_policy_present, is_icmp, global_present; 5797 in6_addr_t tmpaddr; 5798 ipaddr_t tmp4; 5799 ipsec_stack_t *ipss = ns->netstack_ipsec; 5800 uint8_t flags, *holder, *outer_hdr; 5801 5802 sel.ips_is_icmp_inv_acq = 0; 5803 5804 if (outer_ipv4 != NULL) { 5805 ASSERT(outer_ipv6 == NULL); 5806 outer_hdr = (uint8_t *)outer_ipv4; 5807 global_present = ipss->ipsec_inbound_v4_policy_present; 5808 } else { 5809 outer_hdr = (uint8_t *)outer_ipv6; 5810 global_present = ipss->ipsec_inbound_v6_policy_present; 5811 } 5812 ASSERT(outer_hdr != NULL); 5813 5814 ASSERT(inner_ipv4 != NULL && inner_ipv6 == NULL || 5815 inner_ipv4 == NULL && inner_ipv6 != NULL); 5816 ASSERT(message == *data_mp || message->b_cont == *data_mp); 5817 5818 if (outer_hdr_len < 0) { 5819 outer_hdr_len = (-outer_hdr_len); 5820 is_icmp = B_TRUE; 5821 } else { 5822 is_icmp = B_FALSE; 5823 } 5824 5825 if (itp != NULL && (itp->itp_flags & ITPF_P_ACTIVE)) { 5826 polhead = itp->itp_policy; 5827 /* 5828 * We need to perform full Tunnel-Mode enforcement, 5829 * and we need to have inner-header data for such enforcement. 5830 * 5831 * See ipsec_init_inbound_sel() for the 0x80000000 on inbound 5832 * and on return. 5833 */ 5834 5835 port_policy_present = ((itp->itp_flags & 5836 ITPF_P_PER_PORT_SECURITY) ? B_TRUE : B_FALSE); 5837 flags = ((port_policy_present ? SEL_PORT_POLICY : SEL_NONE) | 5838 (is_icmp ? SEL_IS_ICMP : SEL_NONE) | SEL_TUNNEL_MODE); 5839 5840 rc = ipsec_init_inbound_sel(&sel, *data_mp, inner_ipv4, 5841 inner_ipv6, flags); 5842 5843 switch (rc) { 5844 case SELRET_NOMEM: 5845 ip_drop_packet(message, B_TRUE, NULL, NULL, 5846 DROPPER(ipss, ipds_spd_nomem), 5847 &ipss->ipsec_spd_dropper); 5848 return (B_FALSE); 5849 case SELRET_TUNFRAG: 5850 /* 5851 * At this point, if we're cleartext, we don't want 5852 * to go there. 5853 */ 5854 if (ipsec_mp == NULL) { 5855 ip_drop_packet(*data_mp, B_TRUE, NULL, NULL, 5856 DROPPER(ipss, ipds_spd_got_clear), 5857 &ipss->ipsec_spd_dropper); 5858 *data_mp = NULL; 5859 return (B_FALSE); 5860 } 5861 ASSERT(((ipsec_in_t *)ipsec_mp->b_rptr)-> 5862 ipsec_in_secure); 5863 message = ipsec_fragcache_add(&itp->itp_fragcache, 5864 ipsec_mp, *data_mp, outer_hdr_len, ipss); 5865 5866 if (message == NULL) { 5867 /* 5868 * Data is cached, fragment chain is not 5869 * complete. I consume ipsec_mp and data_mp 5870 */ 5871 return (B_FALSE); 5872 } 5873 5874 /* 5875 * If we get here, we have a full fragment chain. 5876 * Reacquire headers and selectors from first fragment. 5877 */ 5878 if (inner_ipv4 != NULL) { 5879 inner_ipv4 = (ipha_t *)message->b_cont->b_rptr; 5880 ASSERT(message->b_cont->b_wptr - 5881 message->b_cont->b_rptr > sizeof (ipha_t)); 5882 } else { 5883 inner_ipv6 = (ip6_t *)message->b_cont->b_rptr; 5884 ASSERT(message->b_cont->b_wptr - 5885 message->b_cont->b_rptr > sizeof (ip6_t)); 5886 } 5887 /* Use SEL_NONE so we always get ports! */ 5888 rc = ipsec_init_inbound_sel(&sel, message->b_cont, 5889 inner_ipv4, inner_ipv6, SEL_NONE); 5890 switch (rc) { 5891 case SELRET_SUCCESS: 5892 /* 5893 * Get to same place as first caller's 5894 * SELRET_SUCCESS case. 5895 */ 5896 break; 5897 case SELRET_NOMEM: 5898 ip_drop_packet_chain(message, B_TRUE, 5899 NULL, NULL, 5900 DROPPER(ipss, ipds_spd_nomem), 5901 &ipss->ipsec_spd_dropper); 5902 return (B_FALSE); 5903 case SELRET_BADPKT: 5904 ip_drop_packet_chain(message, B_TRUE, 5905 NULL, NULL, 5906 DROPPER(ipss, ipds_spd_malformed_frag), 5907 &ipss->ipsec_spd_dropper); 5908 return (B_FALSE); 5909 case SELRET_TUNFRAG: 5910 cmn_err(CE_WARN, "(TUNFRAG on 2nd call...)"); 5911 /* FALLTHRU */ 5912 default: 5913 cmn_err(CE_WARN, "ipsec_init_inbound_sel(mark2)" 5914 " returns bizarro 0x%x", rc); 5915 /* Guaranteed panic! */ 5916 ASSERT(rc == SELRET_NOMEM); 5917 return (B_FALSE); 5918 } 5919 /* FALLTHRU */ 5920 case SELRET_SUCCESS: 5921 /* 5922 * Common case: 5923 * No per-port policy or a non-fragment. Keep going. 5924 */ 5925 break; 5926 case SELRET_BADPKT: 5927 /* 5928 * We may receive ICMP (with IPv6 inner) packets that 5929 * trigger this return value. Send 'em in for 5930 * enforcement checking. 5931 */ 5932 cmn_err(CE_NOTE, "ipsec_tun_inbound(): " 5933 "sending 'bad packet' in for enforcement"); 5934 break; 5935 default: 5936 cmn_err(CE_WARN, 5937 "ipsec_init_inbound_sel() returns bizarro 0x%x", 5938 rc); 5939 ASSERT(rc == SELRET_NOMEM); /* Guaranteed panic! */ 5940 return (B_FALSE); 5941 } 5942 5943 if (is_icmp) { 5944 /* 5945 * Swap local/remote because this is an ICMP packet. 5946 */ 5947 tmpaddr = sel.ips_local_addr_v6; 5948 sel.ips_local_addr_v6 = sel.ips_remote_addr_v6; 5949 sel.ips_remote_addr_v6 = tmpaddr; 5950 tmpport = sel.ips_local_port; 5951 sel.ips_local_port = sel.ips_remote_port; 5952 sel.ips_remote_port = tmpport; 5953 } 5954 5955 /* find_policy_head() */ 5956 rw_enter(&polhead->iph_lock, RW_READER); 5957 pol = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_INBOUND, 5958 &sel, ns); 5959 rw_exit(&polhead->iph_lock); 5960 if (pol != NULL) { 5961 if (ipsec_mp == NULL || 5962 !((ipsec_in_t *)ipsec_mp->b_rptr)-> 5963 ipsec_in_secure) { 5964 retval = pol->ipsp_act->ipa_allow_clear; 5965 if (!retval) { 5966 /* 5967 * XXX should never get here with 5968 * tunnel reassembled fragments? 5969 */ 5970 ASSERT(message->b_next == NULL); 5971 ip_drop_packet(message, B_TRUE, NULL, 5972 NULL, 5973 DROPPER(ipss, ipds_spd_got_clear), 5974 &ipss->ipsec_spd_dropper); 5975 } else if (ipsec_mp != NULL) { 5976 freeb(ipsec_mp); 5977 } 5978 5979 IPPOL_REFRELE(pol, ns); 5980 return (retval); 5981 } 5982 /* 5983 * NOTE: The following releases pol's reference and 5984 * calls ip_drop_packet() for me on NULL returns. 5985 * 5986 * "sel" is still good here, so let's use it! 5987 */ 5988 *data_mp = ipsec_check_ipsecin_policy_reasm(message, 5989 pol, inner_ipv4, inner_ipv6, SA_UNIQUE_ID( 5990 sel.ips_remote_port, sel.ips_local_port, 5991 (inner_ipv4 == NULL) ? IPPROTO_IPV6 : 5992 IPPROTO_ENCAP, sel.ips_protocol), ns); 5993 return (*data_mp != NULL); 5994 } 5995 5996 /* 5997 * Else fallthru and check the global policy on the outer 5998 * header(s) if this tunnel is an old-style transport-mode 5999 * one. Drop the packet explicitly (no policy entry) for 6000 * a new-style tunnel-mode tunnel. 6001 */ 6002 if ((itp->itp_flags & ITPF_P_TUNNEL) && !is_icmp) { 6003 ip_drop_packet_chain(message, B_TRUE, NULL, 6004 NULL, 6005 DROPPER(ipss, ipds_spd_explicit), 6006 &ipss->ipsec_spd_dropper); 6007 return (B_FALSE); 6008 } 6009 } 6010 6011 /* 6012 * NOTE: If we reach here, we will not have packet chains from 6013 * fragcache_add(), because the only way I get chains is on a 6014 * tunnel-mode tunnel, which either returns with a pass, or gets 6015 * hit by the ip_drop_packet_chain() call right above here. 6016 */ 6017 6018 /* If no per-tunnel security, check global policy now. */ 6019 if (ipsec_mp != NULL && !global_present) { 6020 if (((ipsec_in_t *)(ipsec_mp->b_rptr))-> 6021 ipsec_in_icmp_loopback) { 6022 /* 6023 * This is an ICMP message with an ipsec_mp 6024 * attached. We should accept it. 6025 */ 6026 if (ipsec_mp != NULL) 6027 freeb(ipsec_mp); 6028 return (B_TRUE); 6029 } 6030 6031 ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL, 6032 DROPPER(ipss, ipds_spd_got_secure), 6033 &ipss->ipsec_spd_dropper); 6034 return (B_FALSE); 6035 } 6036 6037 /* 6038 * The following assertion is valid because only the tun module alters 6039 * the mblk chain - stripping the outer header by advancing mp->b_rptr. 6040 */ 6041 ASSERT(is_icmp || ((*data_mp)->b_datap->db_base <= outer_hdr && 6042 outer_hdr < (*data_mp)->b_rptr)); 6043 holder = (*data_mp)->b_rptr; 6044 (*data_mp)->b_rptr = outer_hdr; 6045 6046 if (is_icmp) { 6047 /* 6048 * For ICMP packets, "outer_ipvN" is set to the outer header 6049 * that is *INSIDE* the ICMP payload. For global policy 6050 * checking, we need to reverse src/dst on the payload in 6051 * order to construct selectors appropriately. See "ripha" 6052 * constructions in ip.c. To avoid a bug like 6478464 (see 6053 * earlier in this file), we will actually exchange src/dst 6054 * in the packet, and reverse if after the call to 6055 * ipsec_check_global_policy(). 6056 */ 6057 if (outer_ipv4 != NULL) { 6058 tmp4 = outer_ipv4->ipha_src; 6059 outer_ipv4->ipha_src = outer_ipv4->ipha_dst; 6060 outer_ipv4->ipha_dst = tmp4; 6061 } else { 6062 ASSERT(outer_ipv6 != NULL); 6063 tmpaddr = outer_ipv6->ip6_src; 6064 outer_ipv6->ip6_src = outer_ipv6->ip6_dst; 6065 outer_ipv6->ip6_dst = tmpaddr; 6066 } 6067 } 6068 6069 /* NOTE: Frees message if it returns NULL. */ 6070 if (ipsec_check_global_policy(message, NULL, outer_ipv4, outer_ipv6, 6071 (ipsec_mp != NULL), ns) == NULL) { 6072 return (B_FALSE); 6073 } 6074 6075 if (is_icmp) { 6076 /* Set things back to normal. */ 6077 if (outer_ipv4 != NULL) { 6078 tmp4 = outer_ipv4->ipha_src; 6079 outer_ipv4->ipha_src = outer_ipv4->ipha_dst; 6080 outer_ipv4->ipha_dst = tmp4; 6081 } else { 6082 /* No need for ASSERT()s now. */ 6083 tmpaddr = outer_ipv6->ip6_src; 6084 outer_ipv6->ip6_src = outer_ipv6->ip6_dst; 6085 outer_ipv6->ip6_dst = tmpaddr; 6086 } 6087 } 6088 6089 (*data_mp)->b_rptr = holder; 6090 6091 if (ipsec_mp != NULL) 6092 freeb(ipsec_mp); 6093 6094 /* 6095 * At this point, we pretend it's a cleartext accepted 6096 * packet. 6097 */ 6098 return (B_TRUE); 6099 } 6100 6101 /* 6102 * AVL comparison routine for our list of tunnel polheads. 6103 */ 6104 static int 6105 tunnel_compare(const void *arg1, const void *arg2) 6106 { 6107 ipsec_tun_pol_t *left, *right; 6108 int rc; 6109 6110 left = (ipsec_tun_pol_t *)arg1; 6111 right = (ipsec_tun_pol_t *)arg2; 6112 6113 rc = strncmp(left->itp_name, right->itp_name, LIFNAMSIZ); 6114 return (rc == 0 ? rc : (rc > 0 ? 1 : -1)); 6115 } 6116 6117 /* 6118 * Free a tunnel policy node. 6119 */ 6120 void 6121 itp_free(ipsec_tun_pol_t *node, netstack_t *ns) 6122 { 6123 IPPH_REFRELE(node->itp_policy, ns); 6124 IPPH_REFRELE(node->itp_inactive, ns); 6125 mutex_destroy(&node->itp_lock); 6126 kmem_free(node, sizeof (*node)); 6127 } 6128 6129 void 6130 itp_unlink(ipsec_tun_pol_t *node, netstack_t *ns) 6131 { 6132 ipsec_stack_t *ipss = ns->netstack_ipsec; 6133 6134 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_WRITER); 6135 ipss->ipsec_tunnel_policy_gen++; 6136 ipsec_fragcache_uninit(&node->itp_fragcache); 6137 avl_remove(&ipss->ipsec_tunnel_policies, node); 6138 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6139 ITP_REFRELE(node, ns); 6140 } 6141 6142 /* 6143 * Public interface to look up a tunnel security policy by name. Used by 6144 * spdsock mostly. Returns "node" with a bumped refcnt. 6145 */ 6146 ipsec_tun_pol_t * 6147 get_tunnel_policy(char *name, netstack_t *ns) 6148 { 6149 ipsec_tun_pol_t *node, lookup; 6150 ipsec_stack_t *ipss = ns->netstack_ipsec; 6151 6152 (void) strncpy(lookup.itp_name, name, LIFNAMSIZ); 6153 6154 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_READER); 6155 node = (ipsec_tun_pol_t *)avl_find(&ipss->ipsec_tunnel_policies, 6156 &lookup, NULL); 6157 if (node != NULL) { 6158 ITP_REFHOLD(node); 6159 } 6160 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6161 6162 return (node); 6163 } 6164 6165 /* 6166 * Public interface to walk all tunnel security polcies. Useful for spdsock 6167 * DUMP operations. iterator() will not consume a reference. 6168 */ 6169 void 6170 itp_walk(void (*iterator)(ipsec_tun_pol_t *, void *, netstack_t *), 6171 void *arg, netstack_t *ns) 6172 { 6173 ipsec_tun_pol_t *node; 6174 ipsec_stack_t *ipss = ns->netstack_ipsec; 6175 6176 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_READER); 6177 for (node = avl_first(&ipss->ipsec_tunnel_policies); node != NULL; 6178 node = AVL_NEXT(&ipss->ipsec_tunnel_policies, node)) { 6179 iterator(node, arg, ns); 6180 } 6181 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6182 } 6183 6184 /* 6185 * Initialize policy head. This can only fail if there's a memory problem. 6186 */ 6187 static boolean_t 6188 tunnel_polhead_init(ipsec_policy_head_t *iph, netstack_t *ns) 6189 { 6190 ipsec_stack_t *ipss = ns->netstack_ipsec; 6191 6192 rw_init(&iph->iph_lock, NULL, RW_DEFAULT, NULL); 6193 iph->iph_refs = 1; 6194 iph->iph_gen = 0; 6195 if (ipsec_alloc_table(iph, ipss->ipsec_tun_spd_hashsize, 6196 KM_SLEEP, B_FALSE, ns) != 0) { 6197 ipsec_polhead_free_table(iph); 6198 return (B_FALSE); 6199 } 6200 ipsec_polhead_init(iph, ipss->ipsec_tun_spd_hashsize); 6201 return (B_TRUE); 6202 } 6203 6204 /* 6205 * Create a tunnel policy node with "name". Set errno with 6206 * ENOMEM if there's a memory problem, and EEXIST if there's an existing 6207 * node. 6208 */ 6209 ipsec_tun_pol_t * 6210 create_tunnel_policy(char *name, int *errno, uint64_t *gen, netstack_t *ns) 6211 { 6212 ipsec_tun_pol_t *newbie, *existing; 6213 avl_index_t where; 6214 ipsec_stack_t *ipss = ns->netstack_ipsec; 6215 6216 newbie = kmem_zalloc(sizeof (*newbie), KM_NOSLEEP); 6217 if (newbie == NULL) { 6218 *errno = ENOMEM; 6219 return (NULL); 6220 } 6221 if (!ipsec_fragcache_init(&newbie->itp_fragcache)) { 6222 kmem_free(newbie, sizeof (*newbie)); 6223 *errno = ENOMEM; 6224 return (NULL); 6225 } 6226 6227 (void) strncpy(newbie->itp_name, name, LIFNAMSIZ); 6228 6229 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_WRITER); 6230 existing = (ipsec_tun_pol_t *)avl_find(&ipss->ipsec_tunnel_policies, 6231 newbie, &where); 6232 if (existing != NULL) { 6233 itp_free(newbie, ns); 6234 *errno = EEXIST; 6235 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6236 return (NULL); 6237 } 6238 ipss->ipsec_tunnel_policy_gen++; 6239 *gen = ipss->ipsec_tunnel_policy_gen; 6240 newbie->itp_refcnt = 2; /* One for the caller, one for the tree. */ 6241 newbie->itp_next_policy_index = 1; 6242 avl_insert(&ipss->ipsec_tunnel_policies, newbie, where); 6243 mutex_init(&newbie->itp_lock, NULL, MUTEX_DEFAULT, NULL); 6244 newbie->itp_policy = kmem_zalloc(sizeof (ipsec_policy_head_t), 6245 KM_NOSLEEP); 6246 if (newbie->itp_policy == NULL) 6247 goto nomem; 6248 newbie->itp_inactive = kmem_zalloc(sizeof (ipsec_policy_head_t), 6249 KM_NOSLEEP); 6250 if (newbie->itp_inactive == NULL) { 6251 kmem_free(newbie->itp_policy, sizeof (ipsec_policy_head_t)); 6252 goto nomem; 6253 } 6254 6255 if (!tunnel_polhead_init(newbie->itp_policy, ns)) { 6256 kmem_free(newbie->itp_policy, sizeof (ipsec_policy_head_t)); 6257 kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t)); 6258 goto nomem; 6259 } else if (!tunnel_polhead_init(newbie->itp_inactive, ns)) { 6260 IPPH_REFRELE(newbie->itp_policy, ns); 6261 kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t)); 6262 goto nomem; 6263 } 6264 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6265 6266 return (newbie); 6267 nomem: 6268 *errno = ENOMEM; 6269 kmem_free(newbie, sizeof (*newbie)); 6270 return (NULL); 6271 } 6272 6273 /* 6274 * We can't call the tun_t lookup function until tun is 6275 * loaded, so create a dummy function to avoid symbol 6276 * lookup errors on boot. 6277 */ 6278 /* ARGSUSED */ 6279 ipsec_tun_pol_t * 6280 itp_get_byaddr_dummy(uint32_t *laddr, uint32_t *faddr, int af, netstack_t *ns) 6281 { 6282 return (NULL); /* Always return NULL. */ 6283 } 6284 6285 /* 6286 * Frag cache code, based on SunScreen 3.2 source 6287 * screen/kernel/common/screen_fragcache.c 6288 */ 6289 6290 #define IPSEC_FRAG_TTL_MAX 5 6291 /* 6292 * Note that the following parameters create 256 hash buckets 6293 * with 1024 free entries to be distributed. Things are cleaned 6294 * periodically and are attempted to be cleaned when there is no 6295 * free space, but this system errs on the side of dropping packets 6296 * over creating memory exhaustion. We may decide to make hash 6297 * factor a tunable if this proves to be a bad decision. 6298 */ 6299 #define IPSEC_FRAG_HASH_SLOTS (1<<8) 6300 #define IPSEC_FRAG_HASH_FACTOR 4 6301 #define IPSEC_FRAG_HASH_SIZE (IPSEC_FRAG_HASH_SLOTS * IPSEC_FRAG_HASH_FACTOR) 6302 6303 #define IPSEC_FRAG_HASH_MASK (IPSEC_FRAG_HASH_SLOTS - 1) 6304 #define IPSEC_FRAG_HASH_FUNC(id) (((id) & IPSEC_FRAG_HASH_MASK) ^ \ 6305 (((id) / \ 6306 (ushort_t)IPSEC_FRAG_HASH_SLOTS) & \ 6307 IPSEC_FRAG_HASH_MASK)) 6308 6309 /* Maximum fragments per packet. 48 bytes payload x 1366 packets > 64KB */ 6310 #define IPSEC_MAX_FRAGS 1366 6311 6312 #define V4_FRAG_OFFSET(ipha) ((ntohs(ipha->ipha_fragment_offset_and_flags) & \ 6313 IPH_OFFSET) << 3) 6314 #define V4_MORE_FRAGS(ipha) (ntohs(ipha->ipha_fragment_offset_and_flags) & \ 6315 IPH_MF) 6316 6317 /* 6318 * Initialize an ipsec fragcache instance. 6319 * Returns B_FALSE if memory allocation fails. 6320 */ 6321 boolean_t 6322 ipsec_fragcache_init(ipsec_fragcache_t *frag) 6323 { 6324 ipsec_fragcache_entry_t *ftemp; 6325 int i; 6326 6327 mutex_init(&frag->itpf_lock, NULL, MUTEX_DEFAULT, NULL); 6328 frag->itpf_ptr = (ipsec_fragcache_entry_t **) 6329 kmem_zalloc(sizeof (ipsec_fragcache_entry_t *) * 6330 IPSEC_FRAG_HASH_SLOTS, KM_NOSLEEP); 6331 if (frag->itpf_ptr == NULL) 6332 return (B_FALSE); 6333 6334 ftemp = (ipsec_fragcache_entry_t *) 6335 kmem_zalloc(sizeof (ipsec_fragcache_entry_t) * 6336 IPSEC_FRAG_HASH_SIZE, KM_NOSLEEP); 6337 if (ftemp == NULL) { 6338 kmem_free(frag->itpf_ptr, sizeof (ipsec_fragcache_entry_t *) * 6339 IPSEC_FRAG_HASH_SLOTS); 6340 return (B_FALSE); 6341 } 6342 6343 frag->itpf_freelist = NULL; 6344 6345 for (i = 0; i < IPSEC_FRAG_HASH_SIZE; i++) { 6346 ftemp->itpfe_next = frag->itpf_freelist; 6347 frag->itpf_freelist = ftemp; 6348 ftemp++; 6349 } 6350 6351 frag->itpf_expire_hint = 0; 6352 6353 return (B_TRUE); 6354 } 6355 6356 void 6357 ipsec_fragcache_uninit(ipsec_fragcache_t *frag) 6358 { 6359 ipsec_fragcache_entry_t *fep; 6360 int i; 6361 6362 mutex_enter(&frag->itpf_lock); 6363 if (frag->itpf_ptr) { 6364 /* Delete any existing fragcache entry chains */ 6365 for (i = 0; i < IPSEC_FRAG_HASH_SLOTS; i++) { 6366 fep = (frag->itpf_ptr)[i]; 6367 while (fep != NULL) { 6368 /* Returned fep is next in chain or NULL */ 6369 fep = fragcache_delentry(i, fep, frag); 6370 } 6371 } 6372 /* 6373 * Chase the pointers back to the beginning 6374 * of the memory allocation and then 6375 * get rid of the allocated freelist 6376 */ 6377 while (frag->itpf_freelist->itpfe_next != NULL) 6378 frag->itpf_freelist = frag->itpf_freelist->itpfe_next; 6379 /* 6380 * XXX - If we ever dynamically grow the freelist 6381 * then we'll have to free entries individually 6382 * or determine how many entries or chunks we have 6383 * grown since the initial allocation. 6384 */ 6385 kmem_free(frag->itpf_freelist, 6386 sizeof (ipsec_fragcache_entry_t) * 6387 IPSEC_FRAG_HASH_SIZE); 6388 /* Free the fragcache structure */ 6389 kmem_free(frag->itpf_ptr, 6390 sizeof (ipsec_fragcache_entry_t *) * 6391 IPSEC_FRAG_HASH_SLOTS); 6392 } 6393 mutex_exit(&frag->itpf_lock); 6394 mutex_destroy(&frag->itpf_lock); 6395 } 6396 6397 /* 6398 * Add a fragment to the fragment cache. Consumes mp if NULL is returned. 6399 * Returns mp if a whole fragment has been assembled, NULL otherwise 6400 */ 6401 6402 mblk_t * 6403 ipsec_fragcache_add(ipsec_fragcache_t *frag, mblk_t *ipsec_mp, mblk_t *mp, 6404 int outer_hdr_len, ipsec_stack_t *ipss) 6405 { 6406 boolean_t is_v4; 6407 time_t itpf_time; 6408 ipha_t *iph; 6409 ipha_t *oiph; 6410 ip6_t *ip6h = NULL; 6411 uint8_t v6_proto; 6412 uint8_t *v6_proto_p; 6413 uint16_t ip6_hdr_length; 6414 ip6_pkt_t ipp; 6415 ip6_frag_t *fraghdr; 6416 ipsec_fragcache_entry_t *fep; 6417 int i; 6418 mblk_t *nmp, *prevmp, *spare_mp = NULL; 6419 int firstbyte, lastbyte; 6420 int offset; 6421 int last; 6422 boolean_t inbound = (ipsec_mp != NULL); 6423 mblk_t *first_mp = inbound ? ipsec_mp : mp; 6424 6425 mutex_enter(&frag->itpf_lock); 6426 6427 oiph = (ipha_t *)mp->b_rptr; 6428 iph = (ipha_t *)(mp->b_rptr + outer_hdr_len); 6429 if (IPH_HDR_VERSION(iph) == IPV4_VERSION) { 6430 is_v4 = B_TRUE; 6431 } else { 6432 ASSERT(IPH_HDR_VERSION(iph) == IPV6_VERSION); 6433 if ((spare_mp = msgpullup(mp, -1)) == NULL) { 6434 mutex_exit(&frag->itpf_lock); 6435 ip_drop_packet(first_mp, inbound, NULL, NULL, 6436 DROPPER(ipss, ipds_spd_nomem), 6437 &ipss->ipsec_spd_dropper); 6438 return (NULL); 6439 } 6440 ip6h = (ip6_t *)(spare_mp->b_rptr + outer_hdr_len); 6441 6442 if (!ip_hdr_length_nexthdr_v6(spare_mp, ip6h, &ip6_hdr_length, 6443 &v6_proto_p)) { 6444 /* 6445 * Find upper layer protocol. 6446 * If it fails we have a malformed packet 6447 */ 6448 mutex_exit(&frag->itpf_lock); 6449 ip_drop_packet(first_mp, inbound, NULL, NULL, 6450 DROPPER(ipss, ipds_spd_malformed_packet), 6451 &ipss->ipsec_spd_dropper); 6452 freemsg(spare_mp); 6453 return (NULL); 6454 } else { 6455 v6_proto = *v6_proto_p; 6456 } 6457 6458 6459 bzero(&ipp, sizeof (ipp)); 6460 (void) ip_find_hdr_v6(spare_mp, ip6h, &ipp, NULL); 6461 if (!(ipp.ipp_fields & IPPF_FRAGHDR)) { 6462 /* 6463 * We think this is a fragment, but didn't find 6464 * a fragment header. Something is wrong. 6465 */ 6466 mutex_exit(&frag->itpf_lock); 6467 ip_drop_packet(first_mp, inbound, NULL, NULL, 6468 DROPPER(ipss, ipds_spd_malformed_frag), 6469 &ipss->ipsec_spd_dropper); 6470 freemsg(spare_mp); 6471 return (NULL); 6472 } 6473 fraghdr = ipp.ipp_fraghdr; 6474 is_v4 = B_FALSE; 6475 } 6476 6477 /* Anything to cleanup? */ 6478 6479 /* 6480 * This cleanup call could be put in a timer loop 6481 * but it may actually be just as reasonable a decision to 6482 * leave it here. The disadvantage is this only gets called when 6483 * frags are added. The advantage is that it is not 6484 * susceptible to race conditions like a time-based cleanup 6485 * may be. 6486 */ 6487 itpf_time = gethrestime_sec(); 6488 if (itpf_time >= frag->itpf_expire_hint) 6489 ipsec_fragcache_clean(frag); 6490 6491 /* Lookup to see if there is an existing entry */ 6492 6493 if (is_v4) 6494 i = IPSEC_FRAG_HASH_FUNC(iph->ipha_ident); 6495 else 6496 i = IPSEC_FRAG_HASH_FUNC(fraghdr->ip6f_ident); 6497 6498 for (fep = (frag->itpf_ptr)[i]; fep; fep = fep->itpfe_next) { 6499 if (is_v4) { 6500 ASSERT(iph != NULL); 6501 if ((fep->itpfe_id == iph->ipha_ident) && 6502 (fep->itpfe_src == iph->ipha_src) && 6503 (fep->itpfe_dst == iph->ipha_dst) && 6504 (fep->itpfe_proto == iph->ipha_protocol)) 6505 break; 6506 } else { 6507 ASSERT(fraghdr != NULL); 6508 ASSERT(fep != NULL); 6509 if ((fep->itpfe_id == fraghdr->ip6f_ident) && 6510 IN6_ARE_ADDR_EQUAL(&fep->itpfe_src6, 6511 &ip6h->ip6_src) && 6512 IN6_ARE_ADDR_EQUAL(&fep->itpfe_dst6, 6513 &ip6h->ip6_dst) && (fep->itpfe_proto == v6_proto)) 6514 break; 6515 } 6516 } 6517 6518 if (is_v4) { 6519 firstbyte = V4_FRAG_OFFSET(iph); 6520 lastbyte = firstbyte + ntohs(iph->ipha_length) - 6521 IPH_HDR_LENGTH(iph); 6522 last = (V4_MORE_FRAGS(iph) == 0); 6523 #ifdef FRAGCACHE_DEBUG 6524 cmn_err(CE_WARN, "V4 fragcache: firstbyte = %d, lastbyte = %d, " 6525 "last = %d, id = %d\n", firstbyte, lastbyte, last, 6526 iph->ipha_ident); 6527 #endif 6528 } else { 6529 firstbyte = ntohs(fraghdr->ip6f_offlg & IP6F_OFF_MASK); 6530 lastbyte = firstbyte + ntohs(ip6h->ip6_plen) + 6531 sizeof (ip6_t) - ip6_hdr_length; 6532 last = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG) == 0; 6533 #ifdef FRAGCACHE_DEBUG 6534 cmn_err(CE_WARN, "V6 fragcache: firstbyte = %d, lastbyte = %d, " 6535 "last = %d, id = %d, fraghdr = %p, spare_mp = %p\n", 6536 firstbyte, lastbyte, last, fraghdr->ip6f_ident, 6537 fraghdr, spare_mp); 6538 #endif 6539 } 6540 6541 /* check for bogus fragments and delete the entry */ 6542 if (firstbyte > 0 && firstbyte <= 8) { 6543 if (fep != NULL) 6544 (void) fragcache_delentry(i, fep, frag); 6545 mutex_exit(&frag->itpf_lock); 6546 ip_drop_packet(first_mp, inbound, NULL, NULL, 6547 DROPPER(ipss, ipds_spd_malformed_frag), 6548 &ipss->ipsec_spd_dropper); 6549 freemsg(spare_mp); 6550 return (NULL); 6551 } 6552 6553 /* Not found, allocate a new entry */ 6554 if (fep == NULL) { 6555 if (frag->itpf_freelist == NULL) { 6556 /* see if there is some space */ 6557 ipsec_fragcache_clean(frag); 6558 if (frag->itpf_freelist == NULL) { 6559 mutex_exit(&frag->itpf_lock); 6560 ip_drop_packet(first_mp, inbound, NULL, NULL, 6561 DROPPER(ipss, ipds_spd_nomem), 6562 &ipss->ipsec_spd_dropper); 6563 freemsg(spare_mp); 6564 return (NULL); 6565 } 6566 } 6567 6568 fep = frag->itpf_freelist; 6569 frag->itpf_freelist = fep->itpfe_next; 6570 6571 if (is_v4) { 6572 bcopy((caddr_t)&iph->ipha_src, (caddr_t)&fep->itpfe_src, 6573 sizeof (struct in_addr)); 6574 bcopy((caddr_t)&iph->ipha_dst, (caddr_t)&fep->itpfe_dst, 6575 sizeof (struct in_addr)); 6576 fep->itpfe_id = iph->ipha_ident; 6577 fep->itpfe_proto = iph->ipha_protocol; 6578 i = IPSEC_FRAG_HASH_FUNC(fep->itpfe_id); 6579 } else { 6580 bcopy((in6_addr_t *)&ip6h->ip6_src, 6581 (in6_addr_t *)&fep->itpfe_src6, 6582 sizeof (struct in6_addr)); 6583 bcopy((in6_addr_t *)&ip6h->ip6_dst, 6584 (in6_addr_t *)&fep->itpfe_dst6, 6585 sizeof (struct in6_addr)); 6586 fep->itpfe_id = fraghdr->ip6f_ident; 6587 fep->itpfe_proto = v6_proto; 6588 i = IPSEC_FRAG_HASH_FUNC(fep->itpfe_id); 6589 } 6590 itpf_time = gethrestime_sec(); 6591 fep->itpfe_exp = itpf_time + IPSEC_FRAG_TTL_MAX + 1; 6592 fep->itpfe_last = 0; 6593 fep->itpfe_fraglist = NULL; 6594 fep->itpfe_depth = 0; 6595 fep->itpfe_next = (frag->itpf_ptr)[i]; 6596 (frag->itpf_ptr)[i] = fep; 6597 6598 if (frag->itpf_expire_hint > fep->itpfe_exp) 6599 frag->itpf_expire_hint = fep->itpfe_exp; 6600 6601 } 6602 freemsg(spare_mp); 6603 6604 /* Insert it in the frag list */ 6605 /* List is in order by starting offset of fragments */ 6606 6607 prevmp = NULL; 6608 for (nmp = fep->itpfe_fraglist; nmp; nmp = nmp->b_next) { 6609 ipha_t *niph; 6610 ipha_t *oniph; 6611 ip6_t *nip6h; 6612 ip6_pkt_t nipp; 6613 ip6_frag_t *nfraghdr; 6614 uint16_t nip6_hdr_length; 6615 uint8_t *nv6_proto_p; 6616 int nfirstbyte, nlastbyte; 6617 char *data, *ndata; 6618 mblk_t *nspare_mp = NULL; 6619 mblk_t *ndata_mp = (inbound ? nmp->b_cont : nmp); 6620 int hdr_len; 6621 6622 oniph = (ipha_t *)mp->b_rptr; 6623 nip6h = NULL; 6624 niph = NULL; 6625 6626 /* 6627 * Determine outer header type and length and set 6628 * pointers appropriately 6629 */ 6630 6631 if (IPH_HDR_VERSION(oniph) == IPV4_VERSION) { 6632 hdr_len = ((outer_hdr_len != 0) ? 6633 IPH_HDR_LENGTH(oiph) : 0); 6634 niph = (ipha_t *)(ndata_mp->b_rptr + hdr_len); 6635 } else { 6636 ASSERT(IPH_HDR_VERSION(oniph) == IPV6_VERSION); 6637 if ((nspare_mp = msgpullup(ndata_mp, -1)) == NULL) { 6638 mutex_exit(&frag->itpf_lock); 6639 ip_drop_packet_chain(nmp, inbound, NULL, NULL, 6640 DROPPER(ipss, ipds_spd_nomem), 6641 &ipss->ipsec_spd_dropper); 6642 return (NULL); 6643 } 6644 nip6h = (ip6_t *)nspare_mp->b_rptr; 6645 (void) ip_hdr_length_nexthdr_v6(nspare_mp, nip6h, 6646 &nip6_hdr_length, &v6_proto_p); 6647 hdr_len = ((outer_hdr_len != 0) ? nip6_hdr_length : 0); 6648 } 6649 6650 /* 6651 * Determine inner header type and length and set 6652 * pointers appropriately 6653 */ 6654 6655 if (is_v4) { 6656 if (niph == NULL) { 6657 /* Was v6 outer */ 6658 niph = (ipha_t *)(ndata_mp->b_rptr + hdr_len); 6659 } 6660 nfirstbyte = V4_FRAG_OFFSET(niph); 6661 nlastbyte = nfirstbyte + ntohs(niph->ipha_length) - 6662 IPH_HDR_LENGTH(niph); 6663 } else { 6664 if ((nspare_mp == NULL) && 6665 ((nspare_mp = msgpullup(ndata_mp, -1)) == NULL)) { 6666 mutex_exit(&frag->itpf_lock); 6667 ip_drop_packet_chain(nmp, inbound, NULL, NULL, 6668 DROPPER(ipss, ipds_spd_nomem), 6669 &ipss->ipsec_spd_dropper); 6670 return (NULL); 6671 } 6672 nip6h = (ip6_t *)(nspare_mp->b_rptr + hdr_len); 6673 if (!ip_hdr_length_nexthdr_v6(nspare_mp, nip6h, 6674 &nip6_hdr_length, &nv6_proto_p)) { 6675 mutex_exit(&frag->itpf_lock); 6676 ip_drop_packet_chain(nmp, inbound, NULL, NULL, 6677 DROPPER(ipss, ipds_spd_malformed_frag), 6678 &ipss->ipsec_spd_dropper); 6679 ipsec_freemsg_chain(nspare_mp); 6680 return (NULL); 6681 } 6682 bzero(&nipp, sizeof (nipp)); 6683 (void) ip_find_hdr_v6(nspare_mp, nip6h, &nipp, NULL); 6684 nfraghdr = nipp.ipp_fraghdr; 6685 nfirstbyte = ntohs(nfraghdr->ip6f_offlg & 6686 IP6F_OFF_MASK); 6687 nlastbyte = nfirstbyte + ntohs(nip6h->ip6_plen) + 6688 sizeof (ip6_t) - nip6_hdr_length; 6689 } 6690 ipsec_freemsg_chain(nspare_mp); 6691 6692 /* Check for overlapping fragments */ 6693 if (firstbyte >= nfirstbyte && firstbyte < nlastbyte) { 6694 /* 6695 * Overlap Check: 6696 * ~~~~--------- # Check if the newly 6697 * ~ ndata_mp| # received fragment 6698 * ~~~~--------- # overlaps with the 6699 * ---------~~~~~~ # current fragment. 6700 * | mp ~ 6701 * ---------~~~~~~ 6702 */ 6703 if (is_v4) { 6704 data = (char *)iph + IPH_HDR_LENGTH(iph) + 6705 firstbyte - nfirstbyte; 6706 ndata = (char *)niph + IPH_HDR_LENGTH(niph); 6707 } else { 6708 data = (char *)ip6h + 6709 nip6_hdr_length + firstbyte - 6710 nfirstbyte; 6711 ndata = (char *)nip6h + nip6_hdr_length; 6712 } 6713 if (bcmp(data, ndata, MIN(lastbyte, nlastbyte) - 6714 firstbyte)) { 6715 /* Overlapping data does not match */ 6716 (void) fragcache_delentry(i, fep, frag); 6717 mutex_exit(&frag->itpf_lock); 6718 ip_drop_packet(first_mp, inbound, NULL, NULL, 6719 DROPPER(ipss, ipds_spd_overlap_frag), 6720 &ipss->ipsec_spd_dropper); 6721 return (NULL); 6722 } 6723 /* Part of defense for jolt2.c fragmentation attack */ 6724 if (firstbyte >= nfirstbyte && lastbyte <= nlastbyte) { 6725 /* 6726 * Check for identical or subset fragments: 6727 * ---------- ~~~~--------~~~~~ 6728 * | nmp | or ~ nmp ~ 6729 * ---------- ~~~~--------~~~~~ 6730 * ---------- ------ 6731 * | mp | | mp | 6732 * ---------- ------ 6733 */ 6734 mutex_exit(&frag->itpf_lock); 6735 ip_drop_packet(first_mp, inbound, NULL, NULL, 6736 DROPPER(ipss, ipds_spd_evil_frag), 6737 &ipss->ipsec_spd_dropper); 6738 return (NULL); 6739 } 6740 6741 } 6742 6743 /* Correct location for this fragment? */ 6744 if (firstbyte <= nfirstbyte) { 6745 /* 6746 * Check if the tail end of the new fragment overlaps 6747 * with the head of the current fragment. 6748 * --------~~~~~~~ 6749 * | nmp ~ 6750 * --------~~~~~~~ 6751 * ~~~~~-------- 6752 * ~ mp | 6753 * ~~~~~-------- 6754 */ 6755 if (lastbyte > nfirstbyte) { 6756 /* Fragments overlap */ 6757 data = (char *)iph + IPH_HDR_LENGTH(iph) + 6758 firstbyte - nfirstbyte; 6759 ndata = (char *)niph + IPH_HDR_LENGTH(niph); 6760 if (is_v4) { 6761 data = (char *)iph + 6762 IPH_HDR_LENGTH(iph) + firstbyte - 6763 nfirstbyte; 6764 ndata = (char *)niph + 6765 IPH_HDR_LENGTH(niph); 6766 } else { 6767 data = (char *)ip6h + 6768 nip6_hdr_length + firstbyte - 6769 nfirstbyte; 6770 ndata = (char *)nip6h + nip6_hdr_length; 6771 } 6772 if (bcmp(data, ndata, MIN(lastbyte, nlastbyte) 6773 - nfirstbyte)) { 6774 /* Overlap mismatch */ 6775 (void) fragcache_delentry(i, fep, frag); 6776 mutex_exit(&frag->itpf_lock); 6777 ip_drop_packet(first_mp, inbound, NULL, 6778 NULL, DROPPER(ipss, 6779 ipds_spd_overlap_frag), 6780 &ipss->ipsec_spd_dropper); 6781 return (NULL); 6782 } 6783 } 6784 6785 /* 6786 * Fragment does not illegally overlap and can now 6787 * be inserted into the chain 6788 */ 6789 break; 6790 } 6791 6792 prevmp = nmp; 6793 } 6794 first_mp->b_next = nmp; 6795 6796 if (prevmp == NULL) { 6797 fep->itpfe_fraglist = first_mp; 6798 } else { 6799 prevmp->b_next = first_mp; 6800 } 6801 if (last) 6802 fep->itpfe_last = 1; 6803 6804 /* Part of defense for jolt2.c fragmentation attack */ 6805 if (++(fep->itpfe_depth) > IPSEC_MAX_FRAGS) { 6806 (void) fragcache_delentry(i, fep, frag); 6807 mutex_exit(&frag->itpf_lock); 6808 ip_drop_packet(first_mp, inbound, NULL, NULL, 6809 DROPPER(ipss, ipds_spd_max_frags), 6810 &ipss->ipsec_spd_dropper); 6811 return (NULL); 6812 } 6813 6814 /* Check for complete packet */ 6815 6816 if (!fep->itpfe_last) { 6817 mutex_exit(&frag->itpf_lock); 6818 #ifdef FRAGCACHE_DEBUG 6819 cmn_err(CE_WARN, "Fragment cached, not last.\n"); 6820 #endif 6821 return (NULL); 6822 } 6823 6824 #ifdef FRAGCACHE_DEBUG 6825 cmn_err(CE_WARN, "Last fragment cached.\n"); 6826 cmn_err(CE_WARN, "mp = %p, first_mp = %p.\n", mp, first_mp); 6827 #endif 6828 6829 offset = 0; 6830 for (mp = fep->itpfe_fraglist; mp; mp = mp->b_next) { 6831 mblk_t *data_mp = (inbound ? mp->b_cont : mp); 6832 int hdr_len; 6833 6834 oiph = (ipha_t *)data_mp->b_rptr; 6835 ip6h = NULL; 6836 iph = NULL; 6837 6838 spare_mp = NULL; 6839 if (IPH_HDR_VERSION(oiph) == IPV4_VERSION) { 6840 hdr_len = ((outer_hdr_len != 0) ? 6841 IPH_HDR_LENGTH(oiph) : 0); 6842 iph = (ipha_t *)(data_mp->b_rptr + hdr_len); 6843 } else { 6844 ASSERT(IPH_HDR_VERSION(oiph) == IPV6_VERSION); 6845 if ((spare_mp = msgpullup(data_mp, -1)) == NULL) { 6846 mutex_exit(&frag->itpf_lock); 6847 ip_drop_packet_chain(mp, inbound, NULL, NULL, 6848 DROPPER(ipss, ipds_spd_nomem), 6849 &ipss->ipsec_spd_dropper); 6850 return (NULL); 6851 } 6852 ip6h = (ip6_t *)spare_mp->b_rptr; 6853 (void) ip_hdr_length_nexthdr_v6(spare_mp, ip6h, 6854 &ip6_hdr_length, &v6_proto_p); 6855 hdr_len = ((outer_hdr_len != 0) ? ip6_hdr_length : 0); 6856 } 6857 6858 /* Calculate current fragment start/end */ 6859 if (is_v4) { 6860 if (iph == NULL) { 6861 /* Was v6 outer */ 6862 iph = (ipha_t *)(data_mp->b_rptr + hdr_len); 6863 } 6864 firstbyte = V4_FRAG_OFFSET(iph); 6865 lastbyte = firstbyte + ntohs(iph->ipha_length) - 6866 IPH_HDR_LENGTH(iph); 6867 } else { 6868 if ((spare_mp == NULL) && 6869 ((spare_mp = msgpullup(data_mp, -1)) == NULL)) { 6870 mutex_exit(&frag->itpf_lock); 6871 ip_drop_packet_chain(mp, inbound, NULL, NULL, 6872 DROPPER(ipss, ipds_spd_nomem), 6873 &ipss->ipsec_spd_dropper); 6874 return (NULL); 6875 } 6876 ip6h = (ip6_t *)(spare_mp->b_rptr + hdr_len); 6877 if (!ip_hdr_length_nexthdr_v6(spare_mp, ip6h, 6878 &ip6_hdr_length, &v6_proto_p)) { 6879 mutex_exit(&frag->itpf_lock); 6880 ip_drop_packet_chain(mp, inbound, NULL, NULL, 6881 DROPPER(ipss, ipds_spd_malformed_frag), 6882 &ipss->ipsec_spd_dropper); 6883 ipsec_freemsg_chain(spare_mp); 6884 return (NULL); 6885 } 6886 v6_proto = *v6_proto_p; 6887 bzero(&ipp, sizeof (ipp)); 6888 (void) ip_find_hdr_v6(spare_mp, ip6h, &ipp, NULL); 6889 fraghdr = ipp.ipp_fraghdr; 6890 firstbyte = ntohs(fraghdr->ip6f_offlg & 6891 IP6F_OFF_MASK); 6892 lastbyte = firstbyte + ntohs(ip6h->ip6_plen) + 6893 sizeof (ip6_t) - ip6_hdr_length; 6894 } 6895 6896 /* 6897 * If this fragment is greater than current offset, 6898 * we have a missing fragment so return NULL 6899 */ 6900 if (firstbyte > offset) { 6901 mutex_exit(&frag->itpf_lock); 6902 #ifdef FRAGCACHE_DEBUG 6903 /* 6904 * Note, this can happen when the last frag 6905 * gets sent through because it is smaller 6906 * than the MTU. It is not necessarily an 6907 * error condition. 6908 */ 6909 cmn_err(CE_WARN, "Frag greater than offset! : " 6910 "missing fragment: firstbyte = %d, offset = %d, " 6911 "mp = %p\n", firstbyte, offset, mp); 6912 #endif 6913 ipsec_freemsg_chain(spare_mp); 6914 return (NULL); 6915 } 6916 6917 /* 6918 * If we are at the last fragment, we have the complete 6919 * packet, so rechain things and return it to caller 6920 * for processing 6921 */ 6922 6923 if ((is_v4 && !V4_MORE_FRAGS(iph)) || 6924 (!is_v4 && !(fraghdr->ip6f_offlg & IP6F_MORE_FRAG))) { 6925 mp = fep->itpfe_fraglist; 6926 fep->itpfe_fraglist = NULL; 6927 (void) fragcache_delentry(i, fep, frag); 6928 mutex_exit(&frag->itpf_lock); 6929 6930 if ((is_v4 && (firstbyte + ntohs(iph->ipha_length) > 6931 65535)) || (!is_v4 && (firstbyte + 6932 ntohs(ip6h->ip6_plen) > 65535))) { 6933 /* It is an invalid "ping-o-death" packet */ 6934 /* Discard it */ 6935 ip_drop_packet_chain(mp, inbound, NULL, NULL, 6936 DROPPER(ipss, ipds_spd_evil_frag), 6937 &ipss->ipsec_spd_dropper); 6938 ipsec_freemsg_chain(spare_mp); 6939 return (NULL); 6940 } 6941 #ifdef FRAGCACHE_DEBUG 6942 cmn_err(CE_WARN, "Fragcache returning mp = %p, " 6943 "mp->b_next = %p", mp, mp->b_next); 6944 #endif 6945 ipsec_freemsg_chain(spare_mp); 6946 /* 6947 * For inbound case, mp has ipsec_in b_next'd chain 6948 * For outbound case, it is just data mp chain 6949 */ 6950 return (mp); 6951 } 6952 ipsec_freemsg_chain(spare_mp); 6953 6954 /* 6955 * Update new ending offset if this 6956 * fragment extends the packet 6957 */ 6958 if (offset < lastbyte) 6959 offset = lastbyte; 6960 } 6961 6962 mutex_exit(&frag->itpf_lock); 6963 6964 /* Didn't find last fragment, so return NULL */ 6965 return (NULL); 6966 } 6967 6968 static void 6969 ipsec_fragcache_clean(ipsec_fragcache_t *frag) 6970 { 6971 ipsec_fragcache_entry_t *fep; 6972 int i; 6973 ipsec_fragcache_entry_t *earlyfep = NULL; 6974 time_t itpf_time; 6975 int earlyexp; 6976 int earlyi = 0; 6977 6978 ASSERT(MUTEX_HELD(&frag->itpf_lock)); 6979 6980 itpf_time = gethrestime_sec(); 6981 earlyexp = itpf_time + 10000; 6982 6983 for (i = 0; i < IPSEC_FRAG_HASH_SLOTS; i++) { 6984 fep = (frag->itpf_ptr)[i]; 6985 while (fep) { 6986 if (fep->itpfe_exp < itpf_time) { 6987 /* found */ 6988 fep = fragcache_delentry(i, fep, frag); 6989 } else { 6990 if (fep->itpfe_exp < earlyexp) { 6991 earlyfep = fep; 6992 earlyexp = fep->itpfe_exp; 6993 earlyi = i; 6994 } 6995 fep = fep->itpfe_next; 6996 } 6997 } 6998 } 6999 7000 frag->itpf_expire_hint = earlyexp; 7001 7002 /* if (!found) */ 7003 if (frag->itpf_freelist == NULL) 7004 (void) fragcache_delentry(earlyi, earlyfep, frag); 7005 } 7006 7007 static ipsec_fragcache_entry_t * 7008 fragcache_delentry(int slot, ipsec_fragcache_entry_t *fep, 7009 ipsec_fragcache_t *frag) 7010 { 7011 ipsec_fragcache_entry_t *targp; 7012 ipsec_fragcache_entry_t *nextp = fep->itpfe_next; 7013 7014 ASSERT(MUTEX_HELD(&frag->itpf_lock)); 7015 7016 /* Free up any fragment list still in cache entry */ 7017 ipsec_freemsg_chain(fep->itpfe_fraglist); 7018 7019 targp = (frag->itpf_ptr)[slot]; 7020 ASSERT(targp != 0); 7021 7022 if (targp == fep) { 7023 /* unlink from head of hash chain */ 7024 (frag->itpf_ptr)[slot] = nextp; 7025 /* link into free list */ 7026 fep->itpfe_next = frag->itpf_freelist; 7027 frag->itpf_freelist = fep; 7028 return (nextp); 7029 } 7030 7031 /* maybe should use double linked list to make update faster */ 7032 /* must be past front of chain */ 7033 while (targp) { 7034 if (targp->itpfe_next == fep) { 7035 /* unlink from hash chain */ 7036 targp->itpfe_next = nextp; 7037 /* link into free list */ 7038 fep->itpfe_next = frag->itpf_freelist; 7039 frag->itpf_freelist = fep; 7040 return (nextp); 7041 } 7042 targp = targp->itpfe_next; 7043 ASSERT(targp != 0); 7044 } 7045 /* NOTREACHED */ 7046 return (NULL); 7047 } 7048