1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * IPsec Security Policy Database. 30 * 31 * This module maintains the SPD and provides routines used by ip and ip6 32 * to apply IPsec policy to inbound and outbound datagrams. 33 */ 34 35 #include <sys/types.h> 36 #include <sys/stream.h> 37 #include <sys/stropts.h> 38 #include <sys/sysmacros.h> 39 #include <sys/strsubr.h> 40 #include <sys/strlog.h> 41 #include <sys/cmn_err.h> 42 #include <sys/zone.h> 43 44 #include <sys/systm.h> 45 #include <sys/param.h> 46 #include <sys/kmem.h> 47 #include <sys/ddi.h> 48 49 #include <sys/crypto/api.h> 50 51 #include <inet/common.h> 52 #include <inet/mi.h> 53 54 #include <netinet/ip6.h> 55 #include <netinet/icmp6.h> 56 #include <netinet/udp.h> 57 58 #include <inet/ip.h> 59 #include <inet/ip6.h> 60 61 #include <net/pfkeyv2.h> 62 #include <net/pfpolicy.h> 63 #include <inet/ipsec_info.h> 64 #include <inet/sadb.h> 65 #include <inet/ipsec_impl.h> 66 67 #include <inet/ip_impl.h> /* For IP_MOD_ID */ 68 69 #include <inet/ipsecah.h> 70 #include <inet/ipsecesp.h> 71 #include <inet/ipdrop.h> 72 #include <inet/ipclassifier.h> 73 #include <inet/tun.h> 74 75 static void ipsec_update_present_flags(ipsec_stack_t *); 76 static ipsec_act_t *ipsec_act_wildcard_expand(ipsec_act_t *, uint_t *, 77 netstack_t *); 78 static void ipsec_out_free(void *); 79 static void ipsec_in_free(void *); 80 static mblk_t *ipsec_attach_global_policy(mblk_t *, conn_t *, 81 ipsec_selector_t *, netstack_t *); 82 static mblk_t *ipsec_apply_global_policy(mblk_t *, conn_t *, 83 ipsec_selector_t *, netstack_t *); 84 static mblk_t *ipsec_check_ipsecin_policy(mblk_t *, ipsec_policy_t *, 85 ipha_t *, ip6_t *, uint64_t, netstack_t *); 86 static void ipsec_in_release_refs(ipsec_in_t *); 87 static void ipsec_out_release_refs(ipsec_out_t *); 88 static void ipsec_action_free_table(ipsec_action_t *); 89 static void ipsec_action_reclaim(void *); 90 static void ipsec_action_reclaim_stack(netstack_t *); 91 static void ipsid_init(netstack_t *); 92 static void ipsid_fini(netstack_t *); 93 94 /* sel_flags values for ipsec_init_inbound_sel(). */ 95 #define SEL_NONE 0x0000 96 #define SEL_PORT_POLICY 0x0001 97 #define SEL_IS_ICMP 0x0002 98 #define SEL_TUNNEL_MODE 0x0004 99 100 /* Return values for ipsec_init_inbound_sel(). */ 101 typedef enum { SELRET_NOMEM, SELRET_BADPKT, SELRET_SUCCESS, SELRET_TUNFRAG} 102 selret_t; 103 104 static selret_t ipsec_init_inbound_sel(ipsec_selector_t *, mblk_t *, 105 ipha_t *, ip6_t *, uint8_t); 106 107 static boolean_t ipsec_check_ipsecin_action(struct ipsec_in_s *, mblk_t *, 108 struct ipsec_action_s *, ipha_t *ipha, ip6_t *ip6h, const char **, 109 kstat_named_t **); 110 static void ipsec_unregister_prov_update(void); 111 static void ipsec_prov_update_callback_stack(uint32_t, void *, netstack_t *); 112 static boolean_t ipsec_compare_action(ipsec_policy_t *, ipsec_policy_t *); 113 static uint32_t selector_hash(ipsec_selector_t *, ipsec_policy_root_t *); 114 static boolean_t ipsec_kstat_init(ipsec_stack_t *); 115 static void ipsec_kstat_destroy(ipsec_stack_t *); 116 static int ipsec_free_tables(ipsec_stack_t *); 117 static int tunnel_compare(const void *, const void *); 118 static void ipsec_freemsg_chain(mblk_t *); 119 static void ip_drop_packet_chain(mblk_t *, boolean_t, ill_t *, ire_t *, 120 struct kstat_named *, ipdropper_t *); 121 static boolean_t ipsec_kstat_init(ipsec_stack_t *); 122 static void ipsec_kstat_destroy(ipsec_stack_t *); 123 static int ipsec_free_tables(ipsec_stack_t *); 124 static int tunnel_compare(const void *, const void *); 125 static void ipsec_freemsg_chain(mblk_t *); 126 static void ip_drop_packet_chain(mblk_t *, boolean_t, ill_t *, ire_t *, 127 struct kstat_named *, ipdropper_t *); 128 129 /* 130 * Selector hash table is statically sized at module load time. 131 * we default to 251 buckets, which is the largest prime number under 255 132 */ 133 134 #define IPSEC_SPDHASH_DEFAULT 251 135 136 /* SPD hash-size tunable per tunnel. */ 137 #define TUN_SPDHASH_DEFAULT 5 138 139 140 #define IPSEC_SEL_NOHASH ((uint32_t)(~0)) 141 142 /* 143 * Handle global across all stack instances 144 */ 145 static crypto_notify_handle_t prov_update_handle = NULL; 146 147 static kmem_cache_t *ipsec_action_cache; 148 static kmem_cache_t *ipsec_sel_cache; 149 static kmem_cache_t *ipsec_pol_cache; 150 static kmem_cache_t *ipsec_info_cache; 151 152 /* Frag cache prototypes */ 153 static void ipsec_fragcache_clean(ipsec_fragcache_t *); 154 static ipsec_fragcache_entry_t *fragcache_delentry(int, 155 ipsec_fragcache_entry_t *, ipsec_fragcache_t *); 156 boolean_t ipsec_fragcache_init(ipsec_fragcache_t *); 157 void ipsec_fragcache_uninit(ipsec_fragcache_t *); 158 mblk_t *ipsec_fragcache_add(ipsec_fragcache_t *, mblk_t *, mblk_t *, int, 159 ipsec_stack_t *); 160 161 int ipsec_hdr_pullup_needed = 0; 162 int ipsec_weird_null_inbound_policy = 0; 163 164 #define ALGBITS_ROUND_DOWN(x, align) (((x)/(align))*(align)) 165 #define ALGBITS_ROUND_UP(x, align) ALGBITS_ROUND_DOWN((x)+(align)-1, align) 166 167 /* 168 * Inbound traffic should have matching identities for both SA's. 169 */ 170 171 #define SA_IDS_MATCH(sa1, sa2) \ 172 (((sa1) == NULL) || ((sa2) == NULL) || \ 173 (((sa1)->ipsa_src_cid == (sa2)->ipsa_src_cid) && \ 174 (((sa1)->ipsa_dst_cid == (sa2)->ipsa_dst_cid)))) 175 176 /* 177 * IPv4 Fragments 178 */ 179 #define IS_V4_FRAGMENT(ipha_fragment_offset_and_flags) \ 180 (((ntohs(ipha_fragment_offset_and_flags) & IPH_OFFSET) != 0) || \ 181 ((ntohs(ipha_fragment_offset_and_flags) & IPH_MF) != 0)) 182 183 /* 184 * IPv6 Fragments 185 */ 186 #define IS_V6_FRAGMENT(ipp) (ipp.ipp_fields & IPPF_FRAGHDR) 187 188 /* 189 * Policy failure messages. 190 */ 191 static char *ipsec_policy_failure_msgs[] = { 192 193 /* IPSEC_POLICY_NOT_NEEDED */ 194 "%s: Dropping the datagram because the incoming packet " 195 "is %s, but the recipient expects clear; Source %s, " 196 "Destination %s.\n", 197 198 /* IPSEC_POLICY_MISMATCH */ 199 "%s: Policy Failure for the incoming packet (%s); Source %s, " 200 "Destination %s.\n", 201 202 /* IPSEC_POLICY_AUTH_NOT_NEEDED */ 203 "%s: Authentication present while not expected in the " 204 "incoming %s packet; Source %s, Destination %s.\n", 205 206 /* IPSEC_POLICY_ENCR_NOT_NEEDED */ 207 "%s: Encryption present while not expected in the " 208 "incoming %s packet; Source %s, Destination %s.\n", 209 210 /* IPSEC_POLICY_SE_NOT_NEEDED */ 211 "%s: Self-Encapsulation present while not expected in the " 212 "incoming %s packet; Source %s, Destination %s.\n", 213 }; 214 215 /* 216 * General overviews: 217 * 218 * Locking: 219 * 220 * All of the system policy structures are protected by a single 221 * rwlock. These structures are threaded in a 222 * fairly complex fashion and are not expected to change on a 223 * regular basis, so this should not cause scaling/contention 224 * problems. As a result, policy checks should (hopefully) be MT-hot. 225 * 226 * Allocation policy: 227 * 228 * We use custom kmem cache types for the various 229 * bits & pieces of the policy data structures. All allocations 230 * use KM_NOSLEEP instead of KM_SLEEP for policy allocation. The 231 * policy table is of potentially unbounded size, so we don't 232 * want to provide a way to hog all system memory with policy 233 * entries.. 234 */ 235 236 /* Convenient functions for freeing or dropping a b_next linked mblk chain */ 237 238 /* Free all messages in an mblk chain */ 239 static void 240 ipsec_freemsg_chain(mblk_t *mp) 241 { 242 mblk_t *mpnext; 243 while (mp != NULL) { 244 ASSERT(mp->b_prev == NULL); 245 mpnext = mp->b_next; 246 mp->b_next = NULL; 247 freemsg(mp); /* Always works, even if NULL */ 248 mp = mpnext; 249 } 250 } 251 252 /* ip_drop all messages in an mblk chain */ 253 static void 254 ip_drop_packet_chain(mblk_t *mp, boolean_t inbound, ill_t *arriving, 255 ire_t *outbound_ire, struct kstat_named *counter, ipdropper_t *who_called) 256 { 257 mblk_t *mpnext; 258 while (mp != NULL) { 259 ASSERT(mp->b_prev == NULL); 260 mpnext = mp->b_next; 261 mp->b_next = NULL; 262 ip_drop_packet(mp, inbound, arriving, outbound_ire, counter, 263 who_called); 264 mp = mpnext; 265 } 266 } 267 268 /* 269 * AVL tree comparison function. 270 * the in-kernel avl assumes unique keys for all objects. 271 * Since sometimes policy will duplicate rules, we may insert 272 * multiple rules with the same rule id, so we need a tie-breaker. 273 */ 274 static int 275 ipsec_policy_cmpbyid(const void *a, const void *b) 276 { 277 const ipsec_policy_t *ipa, *ipb; 278 uint64_t idxa, idxb; 279 280 ipa = (const ipsec_policy_t *)a; 281 ipb = (const ipsec_policy_t *)b; 282 idxa = ipa->ipsp_index; 283 idxb = ipb->ipsp_index; 284 285 if (idxa < idxb) 286 return (-1); 287 if (idxa > idxb) 288 return (1); 289 /* 290 * Tie-breaker #1: All installed policy rules have a non-NULL 291 * ipsl_sel (selector set), so an entry with a NULL ipsp_sel is not 292 * actually in-tree but rather a template node being used in 293 * an avl_find query; see ipsec_policy_delete(). This gives us 294 * a placeholder in the ordering just before the the first entry with 295 * a key >= the one we're looking for, so we can walk forward from 296 * that point to get the remaining entries with the same id. 297 */ 298 if ((ipa->ipsp_sel == NULL) && (ipb->ipsp_sel != NULL)) 299 return (-1); 300 if ((ipb->ipsp_sel == NULL) && (ipa->ipsp_sel != NULL)) 301 return (1); 302 /* 303 * At most one of the arguments to the comparison should have a 304 * NULL selector pointer; if not, the tree is broken. 305 */ 306 ASSERT(ipa->ipsp_sel != NULL); 307 ASSERT(ipb->ipsp_sel != NULL); 308 /* 309 * Tie-breaker #2: use the virtual address of the policy node 310 * to arbitrarily break ties. Since we use the new tree node in 311 * the avl_find() in ipsec_insert_always, the new node will be 312 * inserted into the tree in the right place in the sequence. 313 */ 314 if (ipa < ipb) 315 return (-1); 316 if (ipa > ipb) 317 return (1); 318 return (0); 319 } 320 321 /* 322 * Free what ipsec_alloc_table allocated. 323 */ 324 void 325 ipsec_polhead_free_table(ipsec_policy_head_t *iph) 326 { 327 int dir; 328 int i; 329 330 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 331 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 332 333 if (ipr->ipr_hash == NULL) 334 continue; 335 336 for (i = 0; i < ipr->ipr_nchains; i++) { 337 ASSERT(ipr->ipr_hash[i].hash_head == NULL); 338 } 339 kmem_free(ipr->ipr_hash, ipr->ipr_nchains * 340 sizeof (ipsec_policy_hash_t)); 341 ipr->ipr_hash = NULL; 342 } 343 } 344 345 void 346 ipsec_polhead_destroy(ipsec_policy_head_t *iph) 347 { 348 int dir; 349 350 avl_destroy(&iph->iph_rulebyid); 351 rw_destroy(&iph->iph_lock); 352 353 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 354 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 355 int chain; 356 357 for (chain = 0; chain < ipr->ipr_nchains; chain++) 358 mutex_destroy(&(ipr->ipr_hash[chain].hash_lock)); 359 360 } 361 ipsec_polhead_free_table(iph); 362 } 363 364 /* 365 * Free the IPsec stack instance. 366 */ 367 /* ARGSUSED */ 368 static void 369 ipsec_stack_fini(netstackid_t stackid, void *arg) 370 { 371 ipsec_stack_t *ipss = (ipsec_stack_t *)arg; 372 void *cookie; 373 ipsec_tun_pol_t *node; 374 netstack_t *ns = ipss->ipsec_netstack; 375 int i; 376 ipsec_algtype_t algtype; 377 378 ipsec_loader_destroy(ipss); 379 380 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_WRITER); 381 /* 382 * It's possible we can just ASSERT() the tree is empty. After all, 383 * we aren't called until IP is ready to unload (and presumably all 384 * tunnels have been unplumbed). But we'll play it safe for now, the 385 * loop will just exit immediately if it's empty. 386 */ 387 cookie = NULL; 388 while ((node = (ipsec_tun_pol_t *) 389 avl_destroy_nodes(&ipss->ipsec_tunnel_policies, 390 &cookie)) != NULL) { 391 ITP_REFRELE(node, ns); 392 } 393 avl_destroy(&ipss->ipsec_tunnel_policies); 394 rw_exit(&ipss->ipsec_tunnel_policy_lock); 395 rw_destroy(&ipss->ipsec_tunnel_policy_lock); 396 397 ipsec_config_flush(ns); 398 399 ipsec_kstat_destroy(ipss); 400 401 ip_drop_unregister(&ipss->ipsec_dropper); 402 403 ip_drop_unregister(&ipss->ipsec_spd_dropper); 404 ip_drop_destroy(ipss); 405 /* 406 * Globals start with ref == 1 to prevent IPPH_REFRELE() from 407 * attempting to free them, hence they should have 1 now. 408 */ 409 ipsec_polhead_destroy(&ipss->ipsec_system_policy); 410 ASSERT(ipss->ipsec_system_policy.iph_refs == 1); 411 ipsec_polhead_destroy(&ipss->ipsec_inactive_policy); 412 ASSERT(ipss->ipsec_inactive_policy.iph_refs == 1); 413 414 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) { 415 ipsec_action_free_table(ipss->ipsec_action_hash[i].hash_head); 416 ipss->ipsec_action_hash[i].hash_head = NULL; 417 mutex_destroy(&(ipss->ipsec_action_hash[i].hash_lock)); 418 } 419 420 for (i = 0; i < ipss->ipsec_spd_hashsize; i++) { 421 ASSERT(ipss->ipsec_sel_hash[i].hash_head == NULL); 422 mutex_destroy(&(ipss->ipsec_sel_hash[i].hash_lock)); 423 } 424 425 mutex_enter(&ipss->ipsec_alg_lock); 426 for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype ++) { 427 int nalgs = ipss->ipsec_nalgs[algtype]; 428 429 for (i = 0; i < nalgs; i++) { 430 if (ipss->ipsec_alglists[algtype][i] != NULL) 431 ipsec_alg_unreg(algtype, i, ns); 432 } 433 } 434 mutex_exit(&ipss->ipsec_alg_lock); 435 mutex_destroy(&ipss->ipsec_alg_lock); 436 437 ipsid_gc(ns); 438 ipsid_fini(ns); 439 440 (void) ipsec_free_tables(ipss); 441 kmem_free(ipss, sizeof (*ipss)); 442 } 443 444 void 445 ipsec_policy_g_destroy(void) 446 { 447 kmem_cache_destroy(ipsec_action_cache); 448 kmem_cache_destroy(ipsec_sel_cache); 449 kmem_cache_destroy(ipsec_pol_cache); 450 kmem_cache_destroy(ipsec_info_cache); 451 452 ipsec_unregister_prov_update(); 453 454 netstack_unregister(NS_IPSEC); 455 } 456 457 458 /* 459 * Free what ipsec_alloc_tables allocated. 460 * Called when table allocation fails to free the table. 461 */ 462 static int 463 ipsec_free_tables(ipsec_stack_t *ipss) 464 { 465 int i; 466 467 if (ipss->ipsec_sel_hash != NULL) { 468 for (i = 0; i < ipss->ipsec_spd_hashsize; i++) { 469 ASSERT(ipss->ipsec_sel_hash[i].hash_head == NULL); 470 } 471 kmem_free(ipss->ipsec_sel_hash, ipss->ipsec_spd_hashsize * 472 sizeof (*ipss->ipsec_sel_hash)); 473 ipss->ipsec_sel_hash = NULL; 474 ipss->ipsec_spd_hashsize = 0; 475 } 476 ipsec_polhead_free_table(&ipss->ipsec_system_policy); 477 ipsec_polhead_free_table(&ipss->ipsec_inactive_policy); 478 479 return (ENOMEM); 480 } 481 482 /* 483 * Attempt to allocate the tables in a single policy head. 484 * Return nonzero on failure after cleaning up any work in progress. 485 */ 486 int 487 ipsec_alloc_table(ipsec_policy_head_t *iph, int nchains, int kmflag, 488 boolean_t global_cleanup, netstack_t *ns) 489 { 490 int dir; 491 492 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 493 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 494 495 ipr->ipr_nchains = nchains; 496 ipr->ipr_hash = kmem_zalloc(nchains * 497 sizeof (ipsec_policy_hash_t), kmflag); 498 if (ipr->ipr_hash == NULL) 499 return (global_cleanup ? 500 ipsec_free_tables(ns->netstack_ipsec) : 501 ENOMEM); 502 } 503 return (0); 504 } 505 506 /* 507 * Attempt to allocate the various tables. Return nonzero on failure 508 * after cleaning up any work in progress. 509 */ 510 static int 511 ipsec_alloc_tables(int kmflag, netstack_t *ns) 512 { 513 int error; 514 ipsec_stack_t *ipss = ns->netstack_ipsec; 515 516 error = ipsec_alloc_table(&ipss->ipsec_system_policy, 517 ipss->ipsec_spd_hashsize, kmflag, B_TRUE, ns); 518 if (error != 0) 519 return (error); 520 521 error = ipsec_alloc_table(&ipss->ipsec_inactive_policy, 522 ipss->ipsec_spd_hashsize, kmflag, B_TRUE, ns); 523 if (error != 0) 524 return (error); 525 526 ipss->ipsec_sel_hash = kmem_zalloc(ipss->ipsec_spd_hashsize * 527 sizeof (*ipss->ipsec_sel_hash), kmflag); 528 529 if (ipss->ipsec_sel_hash == NULL) 530 return (ipsec_free_tables(ipss)); 531 532 return (0); 533 } 534 535 /* 536 * After table allocation, initialize a policy head. 537 */ 538 void 539 ipsec_polhead_init(ipsec_policy_head_t *iph, int nchains) 540 { 541 int dir, chain; 542 543 rw_init(&iph->iph_lock, NULL, RW_DEFAULT, NULL); 544 avl_create(&iph->iph_rulebyid, ipsec_policy_cmpbyid, 545 sizeof (ipsec_policy_t), offsetof(ipsec_policy_t, ipsp_byid)); 546 547 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 548 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 549 ipr->ipr_nchains = nchains; 550 551 for (chain = 0; chain < nchains; chain++) { 552 mutex_init(&(ipr->ipr_hash[chain].hash_lock), 553 NULL, MUTEX_DEFAULT, NULL); 554 } 555 } 556 } 557 558 static boolean_t 559 ipsec_kstat_init(ipsec_stack_t *ipss) 560 { 561 ipss->ipsec_ksp = kstat_create_netstack("ip", 0, "ipsec_stat", "net", 562 KSTAT_TYPE_NAMED, sizeof (ipsec_kstats_t) / sizeof (kstat_named_t), 563 KSTAT_FLAG_PERSISTENT, ipss->ipsec_netstack->netstack_stackid); 564 565 if (ipss->ipsec_ksp == NULL || ipss->ipsec_ksp->ks_data == NULL) 566 return (B_FALSE); 567 568 ipss->ipsec_kstats = ipss->ipsec_ksp->ks_data; 569 570 #define KI(x) kstat_named_init(&ipss->ipsec_kstats->x, #x, KSTAT_DATA_UINT64) 571 KI(esp_stat_in_requests); 572 KI(esp_stat_in_discards); 573 KI(esp_stat_lookup_failure); 574 KI(ah_stat_in_requests); 575 KI(ah_stat_in_discards); 576 KI(ah_stat_lookup_failure); 577 KI(sadb_acquire_maxpackets); 578 KI(sadb_acquire_qhiwater); 579 #undef KI 580 581 kstat_install(ipss->ipsec_ksp); 582 return (B_TRUE); 583 } 584 585 static void 586 ipsec_kstat_destroy(ipsec_stack_t *ipss) 587 { 588 kstat_delete_netstack(ipss->ipsec_ksp, 589 ipss->ipsec_netstack->netstack_stackid); 590 ipss->ipsec_kstats = NULL; 591 592 } 593 594 /* 595 * Initialize the IPsec stack instance. 596 */ 597 /* ARGSUSED */ 598 static void * 599 ipsec_stack_init(netstackid_t stackid, netstack_t *ns) 600 { 601 ipsec_stack_t *ipss; 602 int i; 603 604 ipss = (ipsec_stack_t *)kmem_zalloc(sizeof (*ipss), KM_SLEEP); 605 ipss->ipsec_netstack = ns; 606 607 /* 608 * FIXME: netstack_ipsec is used by some of the routines we call 609 * below, but it isn't set until this routine returns. 610 * Either we introduce optional xxx_stack_alloc() functions 611 * that will be called by the netstack framework before xxx_stack_init, 612 * or we switch spd.c and sadb.c to operate on ipsec_stack_t 613 * (latter has some include file order issues for sadb.h, but makes 614 * sense if we merge some of the ipsec related stack_t's together. 615 */ 616 ns->netstack_ipsec = ipss; 617 618 /* 619 * Make two attempts to allocate policy hash tables; try it at 620 * the "preferred" size (may be set in /etc/system) first, 621 * then fall back to the default size. 622 */ 623 if (ipss->ipsec_spd_hashsize == 0) 624 ipss->ipsec_spd_hashsize = IPSEC_SPDHASH_DEFAULT; 625 626 if (ipsec_alloc_tables(KM_NOSLEEP, ns) != 0) { 627 cmn_err(CE_WARN, 628 "Unable to allocate %d entry IPsec policy hash table", 629 ipss->ipsec_spd_hashsize); 630 ipss->ipsec_spd_hashsize = IPSEC_SPDHASH_DEFAULT; 631 cmn_err(CE_WARN, "Falling back to %d entries", 632 ipss->ipsec_spd_hashsize); 633 (void) ipsec_alloc_tables(KM_SLEEP, ns); 634 } 635 636 /* Just set a default for tunnels. */ 637 if (ipss->ipsec_tun_spd_hashsize == 0) 638 ipss->ipsec_tun_spd_hashsize = TUN_SPDHASH_DEFAULT; 639 640 ipsid_init(ns); 641 /* 642 * Globals need ref == 1 to prevent IPPH_REFRELE() from attempting 643 * to free them. 644 */ 645 ipss->ipsec_system_policy.iph_refs = 1; 646 ipss->ipsec_inactive_policy.iph_refs = 1; 647 ipsec_polhead_init(&ipss->ipsec_system_policy, 648 ipss->ipsec_spd_hashsize); 649 ipsec_polhead_init(&ipss->ipsec_inactive_policy, 650 ipss->ipsec_spd_hashsize); 651 rw_init(&ipss->ipsec_tunnel_policy_lock, NULL, RW_DEFAULT, NULL); 652 avl_create(&ipss->ipsec_tunnel_policies, tunnel_compare, 653 sizeof (ipsec_tun_pol_t), 0); 654 655 ipss->ipsec_next_policy_index = 1; 656 657 rw_init(&ipss->ipsec_system_policy.iph_lock, NULL, RW_DEFAULT, NULL); 658 rw_init(&ipss->ipsec_inactive_policy.iph_lock, NULL, RW_DEFAULT, NULL); 659 660 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) 661 mutex_init(&(ipss->ipsec_action_hash[i].hash_lock), 662 NULL, MUTEX_DEFAULT, NULL); 663 664 for (i = 0; i < ipss->ipsec_spd_hashsize; i++) 665 mutex_init(&(ipss->ipsec_sel_hash[i].hash_lock), 666 NULL, MUTEX_DEFAULT, NULL); 667 668 mutex_init(&ipss->ipsec_alg_lock, NULL, MUTEX_DEFAULT, NULL); 669 for (i = 0; i < IPSEC_NALGTYPES; i++) { 670 ipss->ipsec_nalgs[i] = 0; 671 } 672 673 ip_drop_init(ipss); 674 ip_drop_register(&ipss->ipsec_spd_dropper, "IPsec SPD"); 675 676 /* Set function to dummy until tun is loaded */ 677 rw_init(&ipss->ipsec_itp_get_byaddr_rw_lock, NULL, RW_DEFAULT, NULL); 678 rw_enter(&ipss->ipsec_itp_get_byaddr_rw_lock, RW_WRITER); 679 ipss->ipsec_itp_get_byaddr = itp_get_byaddr_dummy; 680 rw_exit(&ipss->ipsec_itp_get_byaddr_rw_lock); 681 682 /* IP's IPsec code calls the packet dropper */ 683 ip_drop_register(&ipss->ipsec_dropper, "IP IPsec processing"); 684 685 (void) ipsec_kstat_init(ipss); 686 687 ipsec_loader_init(ipss); 688 ipsec_loader_start(ipss); 689 690 return (ipss); 691 } 692 693 /* Global across all stack instances */ 694 void 695 ipsec_policy_g_init(void) 696 { 697 ipsec_action_cache = kmem_cache_create("ipsec_actions", 698 sizeof (ipsec_action_t), _POINTER_ALIGNMENT, NULL, NULL, 699 ipsec_action_reclaim, NULL, NULL, 0); 700 ipsec_sel_cache = kmem_cache_create("ipsec_selectors", 701 sizeof (ipsec_sel_t), _POINTER_ALIGNMENT, NULL, NULL, 702 NULL, NULL, NULL, 0); 703 ipsec_pol_cache = kmem_cache_create("ipsec_policy", 704 sizeof (ipsec_policy_t), _POINTER_ALIGNMENT, NULL, NULL, 705 NULL, NULL, NULL, 0); 706 ipsec_info_cache = kmem_cache_create("ipsec_info", 707 sizeof (ipsec_info_t), _POINTER_ALIGNMENT, NULL, NULL, 708 NULL, NULL, NULL, 0); 709 710 /* 711 * We want to be informed each time a stack is created or 712 * destroyed in the kernel, so we can maintain the 713 * set of ipsec_stack_t's. 714 */ 715 netstack_register(NS_IPSEC, ipsec_stack_init, NULL, ipsec_stack_fini); 716 } 717 718 /* 719 * Sort algorithm lists. 720 * 721 * I may need to split this based on 722 * authentication/encryption, and I may wish to have an administrator 723 * configure this list. Hold on to some NDD variables... 724 * 725 * XXX For now, sort on minimum key size (GAG!). While minimum key size is 726 * not the ideal metric, it's the only quantifiable measure available. 727 * We need a better metric for sorting algorithms by preference. 728 */ 729 static void 730 alg_insert_sortlist(enum ipsec_algtype at, uint8_t algid, netstack_t *ns) 731 { 732 ipsec_stack_t *ipss = ns->netstack_ipsec; 733 ipsec_alginfo_t *ai = ipss->ipsec_alglists[at][algid]; 734 uint8_t holder, swap; 735 uint_t i; 736 uint_t count = ipss->ipsec_nalgs[at]; 737 ASSERT(ai != NULL); 738 ASSERT(algid == ai->alg_id); 739 740 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 741 742 holder = algid; 743 744 for (i = 0; i < count - 1; i++) { 745 ipsec_alginfo_t *alt; 746 747 alt = ipss->ipsec_alglists[at][ipss->ipsec_sortlist[at][i]]; 748 /* 749 * If you want to give precedence to newly added algs, 750 * add the = in the > comparison. 751 */ 752 if ((holder != algid) || (ai->alg_minbits > alt->alg_minbits)) { 753 /* Swap sortlist[i] and holder. */ 754 swap = ipss->ipsec_sortlist[at][i]; 755 ipss->ipsec_sortlist[at][i] = holder; 756 holder = swap; 757 ai = alt; 758 } /* Else just continue. */ 759 } 760 761 /* Store holder in last slot. */ 762 ipss->ipsec_sortlist[at][i] = holder; 763 } 764 765 /* 766 * Remove an algorithm from a sorted algorithm list. 767 * This should be considerably easier, even with complex sorting. 768 */ 769 static void 770 alg_remove_sortlist(enum ipsec_algtype at, uint8_t algid, netstack_t *ns) 771 { 772 boolean_t copyback = B_FALSE; 773 int i; 774 ipsec_stack_t *ipss = ns->netstack_ipsec; 775 int newcount = ipss->ipsec_nalgs[at]; 776 777 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 778 779 for (i = 0; i <= newcount; i++) { 780 if (copyback) { 781 ipss->ipsec_sortlist[at][i-1] = 782 ipss->ipsec_sortlist[at][i]; 783 } else if (ipss->ipsec_sortlist[at][i] == algid) { 784 copyback = B_TRUE; 785 } 786 } 787 } 788 789 /* 790 * Add the specified algorithm to the algorithm tables. 791 * Must be called while holding the algorithm table writer lock. 792 */ 793 void 794 ipsec_alg_reg(ipsec_algtype_t algtype, ipsec_alginfo_t *alg, netstack_t *ns) 795 { 796 ipsec_stack_t *ipss = ns->netstack_ipsec; 797 798 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 799 800 ASSERT(ipss->ipsec_alglists[algtype][alg->alg_id] == NULL); 801 ipsec_alg_fix_min_max(alg, algtype, ns); 802 ipss->ipsec_alglists[algtype][alg->alg_id] = alg; 803 804 ipss->ipsec_nalgs[algtype]++; 805 alg_insert_sortlist(algtype, alg->alg_id, ns); 806 } 807 808 /* 809 * Remove the specified algorithm from the algorithm tables. 810 * Must be called while holding the algorithm table writer lock. 811 */ 812 void 813 ipsec_alg_unreg(ipsec_algtype_t algtype, uint8_t algid, netstack_t *ns) 814 { 815 ipsec_stack_t *ipss = ns->netstack_ipsec; 816 817 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 818 819 ASSERT(ipss->ipsec_alglists[algtype][algid] != NULL); 820 ipsec_alg_free(ipss->ipsec_alglists[algtype][algid]); 821 ipss->ipsec_alglists[algtype][algid] = NULL; 822 823 ipss->ipsec_nalgs[algtype]--; 824 alg_remove_sortlist(algtype, algid, ns); 825 } 826 827 /* 828 * Hooks for spdsock to get a grip on system policy. 829 */ 830 831 ipsec_policy_head_t * 832 ipsec_system_policy(netstack_t *ns) 833 { 834 ipsec_stack_t *ipss = ns->netstack_ipsec; 835 ipsec_policy_head_t *h = &ipss->ipsec_system_policy; 836 837 IPPH_REFHOLD(h); 838 return (h); 839 } 840 841 ipsec_policy_head_t * 842 ipsec_inactive_policy(netstack_t *ns) 843 { 844 ipsec_stack_t *ipss = ns->netstack_ipsec; 845 ipsec_policy_head_t *h = &ipss->ipsec_inactive_policy; 846 847 IPPH_REFHOLD(h); 848 return (h); 849 } 850 851 /* 852 * Lock inactive policy, then active policy, then exchange policy root 853 * pointers. 854 */ 855 void 856 ipsec_swap_policy(ipsec_policy_head_t *active, ipsec_policy_head_t *inactive, 857 netstack_t *ns) 858 { 859 int af, dir; 860 avl_tree_t r1, r2; 861 862 rw_enter(&inactive->iph_lock, RW_WRITER); 863 rw_enter(&active->iph_lock, RW_WRITER); 864 865 r1 = active->iph_rulebyid; 866 r2 = inactive->iph_rulebyid; 867 active->iph_rulebyid = r2; 868 inactive->iph_rulebyid = r1; 869 870 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 871 ipsec_policy_hash_t *h1, *h2; 872 873 h1 = active->iph_root[dir].ipr_hash; 874 h2 = inactive->iph_root[dir].ipr_hash; 875 active->iph_root[dir].ipr_hash = h2; 876 inactive->iph_root[dir].ipr_hash = h1; 877 878 for (af = 0; af < IPSEC_NAF; af++) { 879 ipsec_policy_t *t1, *t2; 880 881 t1 = active->iph_root[dir].ipr_nonhash[af]; 882 t2 = inactive->iph_root[dir].ipr_nonhash[af]; 883 active->iph_root[dir].ipr_nonhash[af] = t2; 884 inactive->iph_root[dir].ipr_nonhash[af] = t1; 885 if (t1 != NULL) { 886 t1->ipsp_hash.hash_pp = 887 &(inactive->iph_root[dir].ipr_nonhash[af]); 888 } 889 if (t2 != NULL) { 890 t2->ipsp_hash.hash_pp = 891 &(active->iph_root[dir].ipr_nonhash[af]); 892 } 893 894 } 895 } 896 active->iph_gen++; 897 inactive->iph_gen++; 898 ipsec_update_present_flags(ns->netstack_ipsec); 899 rw_exit(&active->iph_lock); 900 rw_exit(&inactive->iph_lock); 901 } 902 903 /* 904 * Swap global policy primary/secondary. 905 */ 906 void 907 ipsec_swap_global_policy(netstack_t *ns) 908 { 909 ipsec_stack_t *ipss = ns->netstack_ipsec; 910 911 ipsec_swap_policy(&ipss->ipsec_system_policy, 912 &ipss->ipsec_inactive_policy, ns); 913 } 914 915 /* 916 * Clone one policy rule.. 917 */ 918 static ipsec_policy_t * 919 ipsec_copy_policy(const ipsec_policy_t *src) 920 { 921 ipsec_policy_t *dst = kmem_cache_alloc(ipsec_pol_cache, KM_NOSLEEP); 922 923 if (dst == NULL) 924 return (NULL); 925 926 /* 927 * Adjust refcounts of cloned state. 928 */ 929 IPACT_REFHOLD(src->ipsp_act); 930 src->ipsp_sel->ipsl_refs++; 931 932 HASH_NULL(dst, ipsp_hash); 933 dst->ipsp_refs = 1; 934 dst->ipsp_sel = src->ipsp_sel; 935 dst->ipsp_act = src->ipsp_act; 936 dst->ipsp_prio = src->ipsp_prio; 937 dst->ipsp_index = src->ipsp_index; 938 939 return (dst); 940 } 941 942 void 943 ipsec_insert_always(avl_tree_t *tree, void *new_node) 944 { 945 void *node; 946 avl_index_t where; 947 948 node = avl_find(tree, new_node, &where); 949 ASSERT(node == NULL); 950 avl_insert(tree, new_node, where); 951 } 952 953 954 static int 955 ipsec_copy_chain(ipsec_policy_head_t *dph, ipsec_policy_t *src, 956 ipsec_policy_t **dstp) 957 { 958 for (; src != NULL; src = src->ipsp_hash.hash_next) { 959 ipsec_policy_t *dst = ipsec_copy_policy(src); 960 if (dst == NULL) 961 return (ENOMEM); 962 963 HASHLIST_INSERT(dst, ipsp_hash, *dstp); 964 ipsec_insert_always(&dph->iph_rulebyid, dst); 965 } 966 return (0); 967 } 968 969 970 971 /* 972 * Make one policy head look exactly like another. 973 * 974 * As with ipsec_swap_policy, we lock the destination policy head first, then 975 * the source policy head. Note that we only need to read-lock the source 976 * policy head as we are not changing it. 977 */ 978 int 979 ipsec_copy_polhead(ipsec_policy_head_t *sph, ipsec_policy_head_t *dph, 980 netstack_t *ns) 981 { 982 int af, dir, chain, nchains; 983 984 rw_enter(&dph->iph_lock, RW_WRITER); 985 986 ipsec_polhead_flush(dph, ns); 987 988 rw_enter(&sph->iph_lock, RW_READER); 989 990 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 991 ipsec_policy_root_t *dpr = &dph->iph_root[dir]; 992 ipsec_policy_root_t *spr = &sph->iph_root[dir]; 993 nchains = dpr->ipr_nchains; 994 995 ASSERT(dpr->ipr_nchains == spr->ipr_nchains); 996 997 for (af = 0; af < IPSEC_NAF; af++) { 998 if (ipsec_copy_chain(dph, spr->ipr_nonhash[af], 999 &dpr->ipr_nonhash[af])) 1000 goto abort_copy; 1001 } 1002 1003 for (chain = 0; chain < nchains; chain++) { 1004 if (ipsec_copy_chain(dph, 1005 spr->ipr_hash[chain].hash_head, 1006 &dpr->ipr_hash[chain].hash_head)) 1007 goto abort_copy; 1008 } 1009 } 1010 1011 dph->iph_gen++; 1012 1013 rw_exit(&sph->iph_lock); 1014 rw_exit(&dph->iph_lock); 1015 return (0); 1016 1017 abort_copy: 1018 ipsec_polhead_flush(dph, ns); 1019 rw_exit(&sph->iph_lock); 1020 rw_exit(&dph->iph_lock); 1021 return (ENOMEM); 1022 } 1023 1024 /* 1025 * Clone currently active policy to the inactive policy list. 1026 */ 1027 int 1028 ipsec_clone_system_policy(netstack_t *ns) 1029 { 1030 ipsec_stack_t *ipss = ns->netstack_ipsec; 1031 1032 return (ipsec_copy_polhead(&ipss->ipsec_system_policy, 1033 &ipss->ipsec_inactive_policy, ns)); 1034 } 1035 1036 /* 1037 * Generic "do we have IPvN policy" answer. 1038 */ 1039 boolean_t 1040 iph_ipvN(ipsec_policy_head_t *iph, boolean_t v6) 1041 { 1042 int i, hval; 1043 uint32_t valbit; 1044 ipsec_policy_root_t *ipr; 1045 ipsec_policy_t *ipp; 1046 1047 if (v6) { 1048 valbit = IPSL_IPV6; 1049 hval = IPSEC_AF_V6; 1050 } else { 1051 valbit = IPSL_IPV4; 1052 hval = IPSEC_AF_V4; 1053 } 1054 1055 ASSERT(RW_LOCK_HELD(&iph->iph_lock)); 1056 for (ipr = iph->iph_root; ipr < &(iph->iph_root[IPSEC_NTYPES]); ipr++) { 1057 if (ipr->ipr_nonhash[hval] != NULL) 1058 return (B_TRUE); 1059 for (i = 0; i < ipr->ipr_nchains; i++) { 1060 for (ipp = ipr->ipr_hash[i].hash_head; ipp != NULL; 1061 ipp = ipp->ipsp_hash.hash_next) { 1062 if (ipp->ipsp_sel->ipsl_key.ipsl_valid & valbit) 1063 return (B_TRUE); 1064 } 1065 } 1066 } 1067 1068 return (B_FALSE); 1069 } 1070 1071 /* 1072 * Extract the string from ipsec_policy_failure_msgs[type] and 1073 * log it. 1074 * 1075 */ 1076 void 1077 ipsec_log_policy_failure(int type, char *func_name, ipha_t *ipha, ip6_t *ip6h, 1078 boolean_t secure, netstack_t *ns) 1079 { 1080 char sbuf[INET6_ADDRSTRLEN]; 1081 char dbuf[INET6_ADDRSTRLEN]; 1082 char *s; 1083 char *d; 1084 ipsec_stack_t *ipss = ns->netstack_ipsec; 1085 1086 ASSERT((ipha == NULL && ip6h != NULL) || 1087 (ip6h == NULL && ipha != NULL)); 1088 1089 if (ipha != NULL) { 1090 s = inet_ntop(AF_INET, &ipha->ipha_src, sbuf, sizeof (sbuf)); 1091 d = inet_ntop(AF_INET, &ipha->ipha_dst, dbuf, sizeof (dbuf)); 1092 } else { 1093 s = inet_ntop(AF_INET6, &ip6h->ip6_src, sbuf, sizeof (sbuf)); 1094 d = inet_ntop(AF_INET6, &ip6h->ip6_dst, dbuf, sizeof (dbuf)); 1095 1096 } 1097 1098 /* Always bump the policy failure counter. */ 1099 ipss->ipsec_policy_failure_count[type]++; 1100 1101 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, SL_ERROR|SL_WARN|SL_CONSOLE, 1102 ipsec_policy_failure_msgs[type], func_name, 1103 (secure ? "secure" : "not secure"), s, d); 1104 } 1105 1106 /* 1107 * Rate-limiting front-end to strlog() for AH and ESP. Uses the ndd variables 1108 * in /dev/ip and the same rate-limiting clock so that there's a single 1109 * knob to turn to throttle the rate of messages. 1110 */ 1111 void 1112 ipsec_rl_strlog(netstack_t *ns, short mid, short sid, char level, ushort_t sl, 1113 char *fmt, ...) 1114 { 1115 va_list adx; 1116 hrtime_t current = gethrtime(); 1117 ip_stack_t *ipst = ns->netstack_ip; 1118 ipsec_stack_t *ipss = ns->netstack_ipsec; 1119 1120 sl |= SL_CONSOLE; 1121 /* 1122 * Throttle logging to stop syslog from being swamped. If variable 1123 * 'ipsec_policy_log_interval' is zero, don't log any messages at 1124 * all, otherwise log only one message every 'ipsec_policy_log_interval' 1125 * msec. Convert interval (in msec) to hrtime (in nsec). 1126 */ 1127 1128 if (ipst->ips_ipsec_policy_log_interval) { 1129 if (ipss->ipsec_policy_failure_last + 1130 ((hrtime_t)ipst->ips_ipsec_policy_log_interval * 1131 (hrtime_t)1000000) <= current) { 1132 va_start(adx, fmt); 1133 (void) vstrlog(mid, sid, level, sl, fmt, adx); 1134 va_end(adx); 1135 ipss->ipsec_policy_failure_last = current; 1136 } 1137 } 1138 } 1139 1140 void 1141 ipsec_config_flush(netstack_t *ns) 1142 { 1143 ipsec_stack_t *ipss = ns->netstack_ipsec; 1144 1145 rw_enter(&ipss->ipsec_system_policy.iph_lock, RW_WRITER); 1146 ipsec_polhead_flush(&ipss->ipsec_system_policy, ns); 1147 ipss->ipsec_next_policy_index = 1; 1148 rw_exit(&ipss->ipsec_system_policy.iph_lock); 1149 ipsec_action_reclaim_stack(ns); 1150 } 1151 1152 /* 1153 * Clip a policy's min/max keybits vs. the capabilities of the 1154 * algorithm. 1155 */ 1156 static void 1157 act_alg_adjust(uint_t algtype, uint_t algid, 1158 uint16_t *minbits, uint16_t *maxbits, netstack_t *ns) 1159 { 1160 ipsec_stack_t *ipss = ns->netstack_ipsec; 1161 ipsec_alginfo_t *algp = ipss->ipsec_alglists[algtype][algid]; 1162 1163 if (algp != NULL) { 1164 /* 1165 * If passed-in minbits is zero, we assume the caller trusts 1166 * us with setting the minimum key size. We pick the 1167 * algorithms DEFAULT key size for the minimum in this case. 1168 */ 1169 if (*minbits == 0) { 1170 *minbits = algp->alg_default_bits; 1171 ASSERT(*minbits >= algp->alg_minbits); 1172 } else { 1173 *minbits = MAX(MIN(*minbits, algp->alg_maxbits), 1174 algp->alg_minbits); 1175 } 1176 if (*maxbits == 0) 1177 *maxbits = algp->alg_maxbits; 1178 else 1179 *maxbits = MIN(MAX(*maxbits, algp->alg_minbits), 1180 algp->alg_maxbits); 1181 ASSERT(*minbits <= *maxbits); 1182 } else { 1183 *minbits = 0; 1184 *maxbits = 0; 1185 } 1186 } 1187 1188 /* 1189 * Check an action's requested algorithms against the algorithms currently 1190 * loaded in the system. 1191 */ 1192 boolean_t 1193 ipsec_check_action(ipsec_act_t *act, int *diag, netstack_t *ns) 1194 { 1195 ipsec_prot_t *ipp; 1196 ipsec_stack_t *ipss = ns->netstack_ipsec; 1197 1198 ipp = &act->ipa_apply; 1199 1200 if (ipp->ipp_use_ah && 1201 ipss->ipsec_alglists[IPSEC_ALG_AUTH][ipp->ipp_auth_alg] == NULL) { 1202 *diag = SPD_DIAGNOSTIC_UNSUPP_AH_ALG; 1203 return (B_FALSE); 1204 } 1205 if (ipp->ipp_use_espa && 1206 ipss->ipsec_alglists[IPSEC_ALG_AUTH][ipp->ipp_esp_auth_alg] == 1207 NULL) { 1208 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_AUTH_ALG; 1209 return (B_FALSE); 1210 } 1211 if (ipp->ipp_use_esp && 1212 ipss->ipsec_alglists[IPSEC_ALG_ENCR][ipp->ipp_encr_alg] == NULL) { 1213 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_ENCR_ALG; 1214 return (B_FALSE); 1215 } 1216 1217 act_alg_adjust(IPSEC_ALG_AUTH, ipp->ipp_auth_alg, 1218 &ipp->ipp_ah_minbits, &ipp->ipp_ah_maxbits, ns); 1219 act_alg_adjust(IPSEC_ALG_AUTH, ipp->ipp_esp_auth_alg, 1220 &ipp->ipp_espa_minbits, &ipp->ipp_espa_maxbits, ns); 1221 act_alg_adjust(IPSEC_ALG_ENCR, ipp->ipp_encr_alg, 1222 &ipp->ipp_espe_minbits, &ipp->ipp_espe_maxbits, ns); 1223 1224 if (ipp->ipp_ah_minbits > ipp->ipp_ah_maxbits) { 1225 *diag = SPD_DIAGNOSTIC_UNSUPP_AH_KEYSIZE; 1226 return (B_FALSE); 1227 } 1228 if (ipp->ipp_espa_minbits > ipp->ipp_espa_maxbits) { 1229 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_AUTH_KEYSIZE; 1230 return (B_FALSE); 1231 } 1232 if (ipp->ipp_espe_minbits > ipp->ipp_espe_maxbits) { 1233 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_ENCR_KEYSIZE; 1234 return (B_FALSE); 1235 } 1236 /* TODO: sanity check lifetimes */ 1237 return (B_TRUE); 1238 } 1239 1240 /* 1241 * Set up a single action during wildcard expansion.. 1242 */ 1243 static void 1244 ipsec_setup_act(ipsec_act_t *outact, ipsec_act_t *act, 1245 uint_t auth_alg, uint_t encr_alg, uint_t eauth_alg, netstack_t *ns) 1246 { 1247 ipsec_prot_t *ipp; 1248 1249 *outact = *act; 1250 ipp = &outact->ipa_apply; 1251 ipp->ipp_auth_alg = (uint8_t)auth_alg; 1252 ipp->ipp_encr_alg = (uint8_t)encr_alg; 1253 ipp->ipp_esp_auth_alg = (uint8_t)eauth_alg; 1254 1255 act_alg_adjust(IPSEC_ALG_AUTH, auth_alg, 1256 &ipp->ipp_ah_minbits, &ipp->ipp_ah_maxbits, ns); 1257 act_alg_adjust(IPSEC_ALG_AUTH, eauth_alg, 1258 &ipp->ipp_espa_minbits, &ipp->ipp_espa_maxbits, ns); 1259 act_alg_adjust(IPSEC_ALG_ENCR, encr_alg, 1260 &ipp->ipp_espe_minbits, &ipp->ipp_espe_maxbits, ns); 1261 } 1262 1263 /* 1264 * combinatoric expansion time: expand a wildcarded action into an 1265 * array of wildcarded actions; we return the exploded action list, 1266 * and return a count in *nact (output only). 1267 */ 1268 static ipsec_act_t * 1269 ipsec_act_wildcard_expand(ipsec_act_t *act, uint_t *nact, netstack_t *ns) 1270 { 1271 boolean_t use_ah, use_esp, use_espa; 1272 boolean_t wild_auth, wild_encr, wild_eauth; 1273 uint_t auth_alg, auth_idx, auth_min, auth_max; 1274 uint_t eauth_alg, eauth_idx, eauth_min, eauth_max; 1275 uint_t encr_alg, encr_idx, encr_min, encr_max; 1276 uint_t action_count, ai; 1277 ipsec_act_t *outact; 1278 ipsec_stack_t *ipss = ns->netstack_ipsec; 1279 1280 if (act->ipa_type != IPSEC_ACT_APPLY) { 1281 outact = kmem_alloc(sizeof (*act), KM_NOSLEEP); 1282 *nact = 1; 1283 if (outact != NULL) 1284 bcopy(act, outact, sizeof (*act)); 1285 return (outact); 1286 } 1287 /* 1288 * compute the combinatoric explosion.. 1289 * 1290 * we assume a request for encr if esp_req is PREF_REQUIRED 1291 * we assume a request for ah auth if ah_req is PREF_REQUIRED. 1292 * we assume a request for esp auth if !ah and esp_req is PREF_REQUIRED 1293 */ 1294 1295 use_ah = act->ipa_apply.ipp_use_ah; 1296 use_esp = act->ipa_apply.ipp_use_esp; 1297 use_espa = act->ipa_apply.ipp_use_espa; 1298 auth_alg = act->ipa_apply.ipp_auth_alg; 1299 eauth_alg = act->ipa_apply.ipp_esp_auth_alg; 1300 encr_alg = act->ipa_apply.ipp_encr_alg; 1301 1302 wild_auth = use_ah && (auth_alg == 0); 1303 wild_eauth = use_espa && (eauth_alg == 0); 1304 wild_encr = use_esp && (encr_alg == 0); 1305 1306 action_count = 1; 1307 auth_min = auth_max = auth_alg; 1308 eauth_min = eauth_max = eauth_alg; 1309 encr_min = encr_max = encr_alg; 1310 1311 /* 1312 * set up for explosion.. for each dimension, expand output 1313 * size by the explosion factor. 1314 * 1315 * Don't include the "any" algorithms, if defined, as no 1316 * kernel policies should be set for these algorithms. 1317 */ 1318 1319 #define SET_EXP_MINMAX(type, wild, alg, min, max, ipss) \ 1320 if (wild) { \ 1321 int nalgs = ipss->ipsec_nalgs[type]; \ 1322 if (ipss->ipsec_alglists[type][alg] != NULL) \ 1323 nalgs--; \ 1324 action_count *= nalgs; \ 1325 min = 0; \ 1326 max = ipss->ipsec_nalgs[type] - 1; \ 1327 } 1328 1329 SET_EXP_MINMAX(IPSEC_ALG_AUTH, wild_auth, SADB_AALG_NONE, 1330 auth_min, auth_max, ipss); 1331 SET_EXP_MINMAX(IPSEC_ALG_AUTH, wild_eauth, SADB_AALG_NONE, 1332 eauth_min, eauth_max, ipss); 1333 SET_EXP_MINMAX(IPSEC_ALG_ENCR, wild_encr, SADB_EALG_NONE, 1334 encr_min, encr_max, ipss); 1335 1336 #undef SET_EXP_MINMAX 1337 1338 /* 1339 * ok, allocate the whole mess.. 1340 */ 1341 1342 outact = kmem_alloc(sizeof (*outact) * action_count, KM_NOSLEEP); 1343 if (outact == NULL) 1344 return (NULL); 1345 1346 /* 1347 * Now compute all combinations. Note that non-wildcarded 1348 * dimensions just get a single value from auth_min, while 1349 * wildcarded dimensions indirect through the sortlist. 1350 * 1351 * We do encryption outermost since, at this time, there's 1352 * greater difference in security and performance between 1353 * encryption algorithms vs. authentication algorithms. 1354 */ 1355 1356 ai = 0; 1357 1358 #define WHICH_ALG(type, wild, idx, ipss) \ 1359 ((wild)?(ipss->ipsec_sortlist[type][idx]):(idx)) 1360 1361 for (encr_idx = encr_min; encr_idx <= encr_max; encr_idx++) { 1362 encr_alg = WHICH_ALG(IPSEC_ALG_ENCR, wild_encr, encr_idx, ipss); 1363 if (wild_encr && encr_alg == SADB_EALG_NONE) 1364 continue; 1365 for (auth_idx = auth_min; auth_idx <= auth_max; auth_idx++) { 1366 auth_alg = WHICH_ALG(IPSEC_ALG_AUTH, wild_auth, 1367 auth_idx, ipss); 1368 if (wild_auth && auth_alg == SADB_AALG_NONE) 1369 continue; 1370 for (eauth_idx = eauth_min; eauth_idx <= eauth_max; 1371 eauth_idx++) { 1372 eauth_alg = WHICH_ALG(IPSEC_ALG_AUTH, 1373 wild_eauth, eauth_idx, ipss); 1374 if (wild_eauth && eauth_alg == SADB_AALG_NONE) 1375 continue; 1376 1377 ipsec_setup_act(&outact[ai], act, 1378 auth_alg, encr_alg, eauth_alg, ns); 1379 ai++; 1380 } 1381 } 1382 } 1383 1384 #undef WHICH_ALG 1385 1386 ASSERT(ai == action_count); 1387 *nact = action_count; 1388 return (outact); 1389 } 1390 1391 /* 1392 * Extract the parts of an ipsec_prot_t from an old-style ipsec_req_t. 1393 */ 1394 static void 1395 ipsec_prot_from_req(ipsec_req_t *req, ipsec_prot_t *ipp) 1396 { 1397 bzero(ipp, sizeof (*ipp)); 1398 /* 1399 * ipp_use_* are bitfields. Look at "!!" in the following as a 1400 * "boolean canonicalization" operator. 1401 */ 1402 ipp->ipp_use_ah = !!(req->ipsr_ah_req & IPSEC_PREF_REQUIRED); 1403 ipp->ipp_use_esp = !!(req->ipsr_esp_req & IPSEC_PREF_REQUIRED); 1404 ipp->ipp_use_espa = !!(req->ipsr_esp_auth_alg) || !ipp->ipp_use_ah; 1405 ipp->ipp_use_se = !!(req->ipsr_self_encap_req & IPSEC_PREF_REQUIRED); 1406 ipp->ipp_use_unique = !!((req->ipsr_ah_req|req->ipsr_esp_req) & 1407 IPSEC_PREF_UNIQUE); 1408 ipp->ipp_encr_alg = req->ipsr_esp_alg; 1409 ipp->ipp_auth_alg = req->ipsr_auth_alg; 1410 ipp->ipp_esp_auth_alg = req->ipsr_esp_auth_alg; 1411 } 1412 1413 /* 1414 * Extract a new-style action from a request. 1415 */ 1416 void 1417 ipsec_actvec_from_req(ipsec_req_t *req, ipsec_act_t **actp, uint_t *nactp, 1418 netstack_t *ns) 1419 { 1420 struct ipsec_act act; 1421 1422 bzero(&act, sizeof (act)); 1423 if ((req->ipsr_ah_req & IPSEC_PREF_NEVER) && 1424 (req->ipsr_esp_req & IPSEC_PREF_NEVER)) { 1425 act.ipa_type = IPSEC_ACT_BYPASS; 1426 } else { 1427 act.ipa_type = IPSEC_ACT_APPLY; 1428 ipsec_prot_from_req(req, &act.ipa_apply); 1429 } 1430 *actp = ipsec_act_wildcard_expand(&act, nactp, ns); 1431 } 1432 1433 /* 1434 * Convert a new-style "prot" back to an ipsec_req_t (more backwards compat). 1435 * We assume caller has already zero'ed *req for us. 1436 */ 1437 static int 1438 ipsec_req_from_prot(ipsec_prot_t *ipp, ipsec_req_t *req) 1439 { 1440 req->ipsr_esp_alg = ipp->ipp_encr_alg; 1441 req->ipsr_auth_alg = ipp->ipp_auth_alg; 1442 req->ipsr_esp_auth_alg = ipp->ipp_esp_auth_alg; 1443 1444 if (ipp->ipp_use_unique) { 1445 req->ipsr_ah_req |= IPSEC_PREF_UNIQUE; 1446 req->ipsr_esp_req |= IPSEC_PREF_UNIQUE; 1447 } 1448 if (ipp->ipp_use_se) 1449 req->ipsr_self_encap_req |= IPSEC_PREF_REQUIRED; 1450 if (ipp->ipp_use_ah) 1451 req->ipsr_ah_req |= IPSEC_PREF_REQUIRED; 1452 if (ipp->ipp_use_esp) 1453 req->ipsr_esp_req |= IPSEC_PREF_REQUIRED; 1454 return (sizeof (*req)); 1455 } 1456 1457 /* 1458 * Convert a new-style action back to an ipsec_req_t (more backwards compat). 1459 * We assume caller has already zero'ed *req for us. 1460 */ 1461 static int 1462 ipsec_req_from_act(ipsec_action_t *ap, ipsec_req_t *req) 1463 { 1464 switch (ap->ipa_act.ipa_type) { 1465 case IPSEC_ACT_BYPASS: 1466 req->ipsr_ah_req = IPSEC_PREF_NEVER; 1467 req->ipsr_esp_req = IPSEC_PREF_NEVER; 1468 return (sizeof (*req)); 1469 case IPSEC_ACT_APPLY: 1470 return (ipsec_req_from_prot(&ap->ipa_act.ipa_apply, req)); 1471 } 1472 return (sizeof (*req)); 1473 } 1474 1475 /* 1476 * Convert a new-style action back to an ipsec_req_t (more backwards compat). 1477 * We assume caller has already zero'ed *req for us. 1478 */ 1479 int 1480 ipsec_req_from_head(ipsec_policy_head_t *ph, ipsec_req_t *req, int af) 1481 { 1482 ipsec_policy_t *p; 1483 1484 /* 1485 * FULL-PERSOCK: consult hash table, too? 1486 */ 1487 for (p = ph->iph_root[IPSEC_INBOUND].ipr_nonhash[af]; 1488 p != NULL; 1489 p = p->ipsp_hash.hash_next) { 1490 if ((p->ipsp_sel->ipsl_key.ipsl_valid & IPSL_WILDCARD) == 0) 1491 return (ipsec_req_from_act(p->ipsp_act, req)); 1492 } 1493 return (sizeof (*req)); 1494 } 1495 1496 /* 1497 * Based on per-socket or latched policy, convert to an appropriate 1498 * IP_SEC_OPT ipsec_req_t for the socket option; return size so we can 1499 * be tail-called from ip. 1500 */ 1501 int 1502 ipsec_req_from_conn(conn_t *connp, ipsec_req_t *req, int af) 1503 { 1504 ipsec_latch_t *ipl; 1505 int rv = sizeof (ipsec_req_t); 1506 1507 bzero(req, sizeof (*req)); 1508 1509 mutex_enter(&connp->conn_lock); 1510 ipl = connp->conn_latch; 1511 1512 /* 1513 * Find appropriate policy. First choice is latched action; 1514 * failing that, see latched policy; failing that, 1515 * look at configured policy. 1516 */ 1517 if (ipl != NULL) { 1518 if (ipl->ipl_in_action != NULL) { 1519 rv = ipsec_req_from_act(ipl->ipl_in_action, req); 1520 goto done; 1521 } 1522 if (ipl->ipl_in_policy != NULL) { 1523 rv = ipsec_req_from_act(ipl->ipl_in_policy->ipsp_act, 1524 req); 1525 goto done; 1526 } 1527 } 1528 if (connp->conn_policy != NULL) 1529 rv = ipsec_req_from_head(connp->conn_policy, req, af); 1530 done: 1531 mutex_exit(&connp->conn_lock); 1532 return (rv); 1533 } 1534 1535 void 1536 ipsec_actvec_free(ipsec_act_t *act, uint_t nact) 1537 { 1538 kmem_free(act, nact * sizeof (*act)); 1539 } 1540 1541 /* 1542 * When outbound policy is not cached, look it up the hard way and attach 1543 * an ipsec_out_t to the packet.. 1544 */ 1545 static mblk_t * 1546 ipsec_attach_global_policy(mblk_t *mp, conn_t *connp, ipsec_selector_t *sel, 1547 netstack_t *ns) 1548 { 1549 ipsec_policy_t *p; 1550 1551 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, sel, ns); 1552 1553 if (p == NULL) 1554 return (NULL); 1555 return (ipsec_attach_ipsec_out(mp, connp, p, sel->ips_protocol, ns)); 1556 } 1557 1558 /* 1559 * We have an ipsec_out already, but don't have cached policy; fill it in 1560 * with the right actions. 1561 */ 1562 static mblk_t * 1563 ipsec_apply_global_policy(mblk_t *ipsec_mp, conn_t *connp, 1564 ipsec_selector_t *sel, netstack_t *ns) 1565 { 1566 ipsec_out_t *io; 1567 ipsec_policy_t *p; 1568 1569 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 1570 ASSERT(ipsec_mp->b_cont->b_datap->db_type == M_DATA); 1571 1572 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1573 1574 if (io->ipsec_out_policy == NULL) { 1575 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, io, sel, ns); 1576 io->ipsec_out_policy = p; 1577 } 1578 return (ipsec_mp); 1579 } 1580 1581 1582 /* 1583 * Consumes a reference to ipsp. 1584 */ 1585 static mblk_t * 1586 ipsec_check_loopback_policy(mblk_t *first_mp, boolean_t mctl_present, 1587 ipsec_policy_t *ipsp) 1588 { 1589 mblk_t *ipsec_mp; 1590 ipsec_in_t *ii; 1591 netstack_t *ns; 1592 1593 if (!mctl_present) 1594 return (first_mp); 1595 1596 ipsec_mp = first_mp; 1597 1598 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1599 ns = ii->ipsec_in_ns; 1600 ASSERT(ii->ipsec_in_loopback); 1601 IPPOL_REFRELE(ipsp, ns); 1602 1603 /* 1604 * We should do an actual policy check here. Revisit this 1605 * when we revisit the IPsec API. (And pass a conn_t in when we 1606 * get there.) 1607 */ 1608 1609 return (first_mp); 1610 } 1611 1612 /* 1613 * Check that packet's inbound ports & proto match the selectors 1614 * expected by the SAs it traversed on the way in. 1615 */ 1616 static boolean_t 1617 ipsec_check_ipsecin_unique(ipsec_in_t *ii, const char **reason, 1618 kstat_named_t **counter, uint64_t pkt_unique) 1619 { 1620 uint64_t ah_mask, esp_mask; 1621 ipsa_t *ah_assoc; 1622 ipsa_t *esp_assoc; 1623 netstack_t *ns = ii->ipsec_in_ns; 1624 ipsec_stack_t *ipss = ns->netstack_ipsec; 1625 1626 ASSERT(ii->ipsec_in_secure); 1627 ASSERT(!ii->ipsec_in_loopback); 1628 1629 ah_assoc = ii->ipsec_in_ah_sa; 1630 esp_assoc = ii->ipsec_in_esp_sa; 1631 ASSERT((ah_assoc != NULL) || (esp_assoc != NULL)); 1632 1633 ah_mask = (ah_assoc != NULL) ? ah_assoc->ipsa_unique_mask : 0; 1634 esp_mask = (esp_assoc != NULL) ? esp_assoc->ipsa_unique_mask : 0; 1635 1636 if ((ah_mask == 0) && (esp_mask == 0)) 1637 return (B_TRUE); 1638 1639 /* 1640 * The pkt_unique check will also check for tunnel mode on the SA 1641 * vs. the tunneled_packet boolean. "Be liberal in what you receive" 1642 * should not apply in this case. ;) 1643 */ 1644 1645 if (ah_mask != 0 && 1646 ah_assoc->ipsa_unique_id != (pkt_unique & ah_mask)) { 1647 *reason = "AH inner header mismatch"; 1648 *counter = DROPPER(ipss, ipds_spd_ah_innermismatch); 1649 return (B_FALSE); 1650 } 1651 if (esp_mask != 0 && 1652 esp_assoc->ipsa_unique_id != (pkt_unique & esp_mask)) { 1653 *reason = "ESP inner header mismatch"; 1654 *counter = DROPPER(ipss, ipds_spd_esp_innermismatch); 1655 return (B_FALSE); 1656 } 1657 return (B_TRUE); 1658 } 1659 1660 static boolean_t 1661 ipsec_check_ipsecin_action(ipsec_in_t *ii, mblk_t *mp, ipsec_action_t *ap, 1662 ipha_t *ipha, ip6_t *ip6h, const char **reason, kstat_named_t **counter) 1663 { 1664 boolean_t ret = B_TRUE; 1665 ipsec_prot_t *ipp; 1666 ipsa_t *ah_assoc; 1667 ipsa_t *esp_assoc; 1668 boolean_t decaps; 1669 netstack_t *ns = ii->ipsec_in_ns; 1670 ipsec_stack_t *ipss = ns->netstack_ipsec; 1671 1672 ASSERT((ipha == NULL && ip6h != NULL) || 1673 (ip6h == NULL && ipha != NULL)); 1674 1675 if (ii->ipsec_in_loopback) { 1676 /* 1677 * Besides accepting pointer-equivalent actions, we also 1678 * accept any ICMP errors we generated for ourselves, 1679 * regardless of policy. If we do not wish to make this 1680 * assumption in the future, check here, and where 1681 * icmp_loopback is initialized in ip.c and ip6.c. (Look for 1682 * ipsec_out_icmp_loopback.) 1683 */ 1684 if (ap == ii->ipsec_in_action || ii->ipsec_in_icmp_loopback) 1685 return (B_TRUE); 1686 1687 /* Deep compare necessary here?? */ 1688 *counter = DROPPER(ipss, ipds_spd_loopback_mismatch); 1689 *reason = "loopback policy mismatch"; 1690 return (B_FALSE); 1691 } 1692 ASSERT(!ii->ipsec_in_icmp_loopback); 1693 1694 ah_assoc = ii->ipsec_in_ah_sa; 1695 esp_assoc = ii->ipsec_in_esp_sa; 1696 1697 decaps = ii->ipsec_in_decaps; 1698 1699 switch (ap->ipa_act.ipa_type) { 1700 case IPSEC_ACT_DISCARD: 1701 case IPSEC_ACT_REJECT: 1702 /* Should "fail hard" */ 1703 *counter = DROPPER(ipss, ipds_spd_explicit); 1704 *reason = "blocked by policy"; 1705 return (B_FALSE); 1706 1707 case IPSEC_ACT_BYPASS: 1708 case IPSEC_ACT_CLEAR: 1709 *counter = DROPPER(ipss, ipds_spd_got_secure); 1710 *reason = "expected clear, got protected"; 1711 return (B_FALSE); 1712 1713 case IPSEC_ACT_APPLY: 1714 ipp = &ap->ipa_act.ipa_apply; 1715 /* 1716 * As of now we do the simple checks of whether 1717 * the datagram has gone through the required IPSEC 1718 * protocol constraints or not. We might have more 1719 * in the future like sensitive levels, key bits, etc. 1720 * If it fails the constraints, check whether we would 1721 * have accepted this if it had come in clear. 1722 */ 1723 if (ipp->ipp_use_ah) { 1724 if (ah_assoc == NULL) { 1725 ret = ipsec_inbound_accept_clear(mp, ipha, 1726 ip6h); 1727 *counter = DROPPER(ipss, ipds_spd_got_clear); 1728 *reason = "unprotected not accepted"; 1729 break; 1730 } 1731 ASSERT(ah_assoc != NULL); 1732 ASSERT(ipp->ipp_auth_alg != 0); 1733 1734 if (ah_assoc->ipsa_auth_alg != 1735 ipp->ipp_auth_alg) { 1736 *counter = DROPPER(ipss, ipds_spd_bad_ahalg); 1737 *reason = "unacceptable ah alg"; 1738 ret = B_FALSE; 1739 break; 1740 } 1741 } else if (ah_assoc != NULL) { 1742 /* 1743 * Don't allow this. Check IPSEC NOTE above 1744 * ip_fanout_proto(). 1745 */ 1746 *counter = DROPPER(ipss, ipds_spd_got_ah); 1747 *reason = "unexpected AH"; 1748 ret = B_FALSE; 1749 break; 1750 } 1751 if (ipp->ipp_use_esp) { 1752 if (esp_assoc == NULL) { 1753 ret = ipsec_inbound_accept_clear(mp, ipha, 1754 ip6h); 1755 *counter = DROPPER(ipss, ipds_spd_got_clear); 1756 *reason = "unprotected not accepted"; 1757 break; 1758 } 1759 ASSERT(esp_assoc != NULL); 1760 ASSERT(ipp->ipp_encr_alg != 0); 1761 1762 if (esp_assoc->ipsa_encr_alg != 1763 ipp->ipp_encr_alg) { 1764 *counter = DROPPER(ipss, ipds_spd_bad_espealg); 1765 *reason = "unacceptable esp alg"; 1766 ret = B_FALSE; 1767 break; 1768 } 1769 /* 1770 * If the client does not need authentication, 1771 * we don't verify the alogrithm. 1772 */ 1773 if (ipp->ipp_use_espa) { 1774 if (esp_assoc->ipsa_auth_alg != 1775 ipp->ipp_esp_auth_alg) { 1776 *counter = DROPPER(ipss, 1777 ipds_spd_bad_espaalg); 1778 *reason = "unacceptable esp auth alg"; 1779 ret = B_FALSE; 1780 break; 1781 } 1782 } 1783 } else if (esp_assoc != NULL) { 1784 /* 1785 * Don't allow this. Check IPSEC NOTE above 1786 * ip_fanout_proto(). 1787 */ 1788 *counter = DROPPER(ipss, ipds_spd_got_esp); 1789 *reason = "unexpected ESP"; 1790 ret = B_FALSE; 1791 break; 1792 } 1793 if (ipp->ipp_use_se) { 1794 if (!decaps) { 1795 ret = ipsec_inbound_accept_clear(mp, ipha, 1796 ip6h); 1797 if (!ret) { 1798 /* XXX mutant? */ 1799 *counter = DROPPER(ipss, 1800 ipds_spd_bad_selfencap); 1801 *reason = "self encap not found"; 1802 break; 1803 } 1804 } 1805 } else if (decaps) { 1806 /* 1807 * XXX If the packet comes in tunneled and the 1808 * recipient does not expect it to be tunneled, it 1809 * is okay. But we drop to be consistent with the 1810 * other cases. 1811 */ 1812 *counter = DROPPER(ipss, ipds_spd_got_selfencap); 1813 *reason = "unexpected self encap"; 1814 ret = B_FALSE; 1815 break; 1816 } 1817 if (ii->ipsec_in_action != NULL) { 1818 /* 1819 * This can happen if we do a double policy-check on 1820 * a packet 1821 * XXX XXX should fix this case! 1822 */ 1823 IPACT_REFRELE(ii->ipsec_in_action); 1824 } 1825 ASSERT(ii->ipsec_in_action == NULL); 1826 IPACT_REFHOLD(ap); 1827 ii->ipsec_in_action = ap; 1828 break; /* from switch */ 1829 } 1830 return (ret); 1831 } 1832 1833 static boolean_t 1834 spd_match_inbound_ids(ipsec_latch_t *ipl, ipsa_t *sa) 1835 { 1836 ASSERT(ipl->ipl_ids_latched == B_TRUE); 1837 return ipsid_equal(ipl->ipl_remote_cid, sa->ipsa_src_cid) && 1838 ipsid_equal(ipl->ipl_local_cid, sa->ipsa_dst_cid); 1839 } 1840 1841 /* 1842 * Takes a latched conn and an inbound packet and returns a unique_id suitable 1843 * for SA comparisons. Most of the time we will copy from the conn_t, but 1844 * there are cases when the conn_t is latched but it has wildcard selectors, 1845 * and then we need to fallback to scooping them out of the packet. 1846 * 1847 * Assume we'll never have 0 with a conn_t present, so use 0 as a failure. We 1848 * can get away with this because we only have non-zero ports/proto for 1849 * latched conn_ts. 1850 * 1851 * Ideal candidate for an "inline" keyword, as we're JUST convoluted enough 1852 * to not be a nice macro. 1853 */ 1854 static uint64_t 1855 conn_to_unique(conn_t *connp, mblk_t *data_mp, ipha_t *ipha, ip6_t *ip6h) 1856 { 1857 ipsec_selector_t sel; 1858 uint8_t ulp = connp->conn_ulp; 1859 1860 ASSERT(connp->conn_latch->ipl_in_policy != NULL); 1861 1862 if ((ulp == IPPROTO_TCP || ulp == IPPROTO_UDP || ulp == IPPROTO_SCTP) && 1863 (connp->conn_fport == 0 || connp->conn_lport == 0)) { 1864 /* Slow path - we gotta grab from the packet. */ 1865 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, 1866 SEL_NONE) != SELRET_SUCCESS) { 1867 /* Failure -> have caller free packet with ENOMEM. */ 1868 return (0); 1869 } 1870 return (SA_UNIQUE_ID(sel.ips_remote_port, sel.ips_local_port, 1871 sel.ips_protocol, 0)); 1872 } 1873 1874 #ifdef DEBUG_NOT_UNTIL_6478464 1875 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, SEL_NONE) == 1876 SELRET_SUCCESS) { 1877 ASSERT(sel.ips_local_port == connp->conn_lport); 1878 ASSERT(sel.ips_remote_port == connp->conn_fport); 1879 ASSERT(sel.ips_protocol == connp->conn_ulp); 1880 } 1881 ASSERT(connp->conn_ulp != 0); 1882 #endif 1883 1884 return (SA_UNIQUE_ID(connp->conn_fport, connp->conn_lport, ulp, 0)); 1885 } 1886 1887 /* 1888 * Called to check policy on a latched connection, both from this file 1889 * and from tcp.c 1890 */ 1891 boolean_t 1892 ipsec_check_ipsecin_latch(ipsec_in_t *ii, mblk_t *mp, ipsec_latch_t *ipl, 1893 ipha_t *ipha, ip6_t *ip6h, const char **reason, kstat_named_t **counter, 1894 conn_t *connp) 1895 { 1896 netstack_t *ns = ii->ipsec_in_ns; 1897 ipsec_stack_t *ipss = ns->netstack_ipsec; 1898 1899 ASSERT(ipl->ipl_ids_latched == B_TRUE); 1900 1901 if (!ii->ipsec_in_loopback) { 1902 /* 1903 * Over loopback, there aren't real security associations, 1904 * so there are neither identities nor "unique" values 1905 * for us to check the packet against. 1906 */ 1907 if ((ii->ipsec_in_ah_sa != NULL) && 1908 (!spd_match_inbound_ids(ipl, ii->ipsec_in_ah_sa))) { 1909 *counter = DROPPER(ipss, ipds_spd_ah_badid); 1910 *reason = "AH identity mismatch"; 1911 return (B_FALSE); 1912 } 1913 1914 if ((ii->ipsec_in_esp_sa != NULL) && 1915 (!spd_match_inbound_ids(ipl, ii->ipsec_in_esp_sa))) { 1916 *counter = DROPPER(ipss, ipds_spd_esp_badid); 1917 *reason = "ESP identity mismatch"; 1918 return (B_FALSE); 1919 } 1920 1921 /* 1922 * Can fudge pkt_unique from connp because we're latched. 1923 * In DEBUG kernels (see conn_to_unique()'s implementation), 1924 * verify this even if it REALLY slows things down. 1925 */ 1926 if (!ipsec_check_ipsecin_unique(ii, reason, counter, 1927 conn_to_unique(connp, mp, ipha, ip6h))) { 1928 return (B_FALSE); 1929 } 1930 } 1931 1932 return (ipsec_check_ipsecin_action(ii, mp, ipl->ipl_in_action, 1933 ipha, ip6h, reason, counter)); 1934 } 1935 1936 /* 1937 * Check to see whether this secured datagram meets the policy 1938 * constraints specified in ipsp. 1939 * 1940 * Called from ipsec_check_global_policy, and ipsec_check_inbound_policy. 1941 * 1942 * Consumes a reference to ipsp. 1943 */ 1944 static mblk_t * 1945 ipsec_check_ipsecin_policy(mblk_t *first_mp, ipsec_policy_t *ipsp, 1946 ipha_t *ipha, ip6_t *ip6h, uint64_t pkt_unique, netstack_t *ns) 1947 { 1948 ipsec_in_t *ii; 1949 ipsec_action_t *ap; 1950 const char *reason = "no policy actions found"; 1951 mblk_t *data_mp, *ipsec_mp; 1952 ipsec_stack_t *ipss = ns->netstack_ipsec; 1953 ip_stack_t *ipst = ns->netstack_ip; 1954 kstat_named_t *counter; 1955 1956 counter = DROPPER(ipss, ipds_spd_got_secure); 1957 1958 data_mp = first_mp->b_cont; 1959 ipsec_mp = first_mp; 1960 1961 ASSERT(ipsp != NULL); 1962 1963 ASSERT((ipha == NULL && ip6h != NULL) || 1964 (ip6h == NULL && ipha != NULL)); 1965 1966 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1967 1968 if (ii->ipsec_in_loopback) 1969 return (ipsec_check_loopback_policy(first_mp, B_TRUE, ipsp)); 1970 ASSERT(ii->ipsec_in_type == IPSEC_IN); 1971 ASSERT(ii->ipsec_in_secure); 1972 1973 if (ii->ipsec_in_action != NULL) { 1974 /* 1975 * this can happen if we do a double policy-check on a packet 1976 * Would be nice to be able to delete this test.. 1977 */ 1978 IPACT_REFRELE(ii->ipsec_in_action); 1979 } 1980 ASSERT(ii->ipsec_in_action == NULL); 1981 1982 if (!SA_IDS_MATCH(ii->ipsec_in_ah_sa, ii->ipsec_in_esp_sa)) { 1983 reason = "inbound AH and ESP identities differ"; 1984 counter = DROPPER(ipss, ipds_spd_ahesp_diffid); 1985 goto drop; 1986 } 1987 1988 if (!ipsec_check_ipsecin_unique(ii, &reason, &counter, pkt_unique)) 1989 goto drop; 1990 1991 /* 1992 * Ok, now loop through the possible actions and see if any 1993 * of them work for us. 1994 */ 1995 1996 for (ap = ipsp->ipsp_act; ap != NULL; ap = ap->ipa_next) { 1997 if (ipsec_check_ipsecin_action(ii, data_mp, ap, 1998 ipha, ip6h, &reason, &counter)) { 1999 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2000 IPPOL_REFRELE(ipsp, ns); 2001 return (first_mp); 2002 } 2003 } 2004 drop: 2005 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, SL_ERROR|SL_WARN|SL_CONSOLE, 2006 "ipsec inbound policy mismatch: %s, packet dropped\n", 2007 reason); 2008 IPPOL_REFRELE(ipsp, ns); 2009 ASSERT(ii->ipsec_in_action == NULL); 2010 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2011 ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, 2012 &ipss->ipsec_spd_dropper); 2013 return (NULL); 2014 } 2015 2016 /* 2017 * sleazy prefix-length-based compare. 2018 * another inlining candidate.. 2019 */ 2020 boolean_t 2021 ip_addr_match(uint8_t *addr1, int pfxlen, in6_addr_t *addr2p) 2022 { 2023 int offset = pfxlen>>3; 2024 int bitsleft = pfxlen & 7; 2025 uint8_t *addr2 = (uint8_t *)addr2p; 2026 2027 /* 2028 * and there was much evil.. 2029 * XXX should inline-expand the bcmp here and do this 32 bits 2030 * or 64 bits at a time.. 2031 */ 2032 return ((bcmp(addr1, addr2, offset) == 0) && 2033 ((bitsleft == 0) || 2034 (((addr1[offset] ^ addr2[offset]) & 2035 (0xff<<(8-bitsleft))) == 0))); 2036 } 2037 2038 static ipsec_policy_t * 2039 ipsec_find_policy_chain(ipsec_policy_t *best, ipsec_policy_t *chain, 2040 ipsec_selector_t *sel, boolean_t is_icmp_inv_acq) 2041 { 2042 ipsec_selkey_t *isel; 2043 ipsec_policy_t *p; 2044 int bpri = best ? best->ipsp_prio : 0; 2045 2046 for (p = chain; p != NULL; p = p->ipsp_hash.hash_next) { 2047 uint32_t valid; 2048 2049 if (p->ipsp_prio <= bpri) 2050 continue; 2051 isel = &p->ipsp_sel->ipsl_key; 2052 valid = isel->ipsl_valid; 2053 2054 if ((valid & IPSL_PROTOCOL) && 2055 (isel->ipsl_proto != sel->ips_protocol)) 2056 continue; 2057 2058 if ((valid & IPSL_REMOTE_ADDR) && 2059 !ip_addr_match((uint8_t *)&isel->ipsl_remote, 2060 isel->ipsl_remote_pfxlen, 2061 &sel->ips_remote_addr_v6)) 2062 continue; 2063 2064 if ((valid & IPSL_LOCAL_ADDR) && 2065 !ip_addr_match((uint8_t *)&isel->ipsl_local, 2066 isel->ipsl_local_pfxlen, 2067 &sel->ips_local_addr_v6)) 2068 continue; 2069 2070 if ((valid & IPSL_REMOTE_PORT) && 2071 isel->ipsl_rport != sel->ips_remote_port) 2072 continue; 2073 2074 if ((valid & IPSL_LOCAL_PORT) && 2075 isel->ipsl_lport != sel->ips_local_port) 2076 continue; 2077 2078 if (!is_icmp_inv_acq) { 2079 if ((valid & IPSL_ICMP_TYPE) && 2080 (isel->ipsl_icmp_type > sel->ips_icmp_type || 2081 isel->ipsl_icmp_type_end < sel->ips_icmp_type)) { 2082 continue; 2083 } 2084 2085 if ((valid & IPSL_ICMP_CODE) && 2086 (isel->ipsl_icmp_code > sel->ips_icmp_code || 2087 isel->ipsl_icmp_code_end < 2088 sel->ips_icmp_code)) { 2089 continue; 2090 } 2091 } else { 2092 /* 2093 * special case for icmp inverse acquire 2094 * we only want policies that aren't drop/pass 2095 */ 2096 if (p->ipsp_act->ipa_act.ipa_type != IPSEC_ACT_APPLY) 2097 continue; 2098 } 2099 2100 /* we matched all the packet-port-field selectors! */ 2101 best = p; 2102 bpri = p->ipsp_prio; 2103 } 2104 2105 return (best); 2106 } 2107 2108 /* 2109 * Try to find and return the best policy entry under a given policy 2110 * root for a given set of selectors; the first parameter "best" is 2111 * the current best policy so far. If "best" is non-null, we have a 2112 * reference to it. We return a reference to a policy; if that policy 2113 * is not the original "best", we need to release that reference 2114 * before returning. 2115 */ 2116 ipsec_policy_t * 2117 ipsec_find_policy_head(ipsec_policy_t *best, ipsec_policy_head_t *head, 2118 int direction, ipsec_selector_t *sel, netstack_t *ns) 2119 { 2120 ipsec_policy_t *curbest; 2121 ipsec_policy_root_t *root; 2122 uint8_t is_icmp_inv_acq = sel->ips_is_icmp_inv_acq; 2123 int af = sel->ips_isv4 ? IPSEC_AF_V4 : IPSEC_AF_V6; 2124 2125 curbest = best; 2126 root = &head->iph_root[direction]; 2127 2128 #ifdef DEBUG 2129 if (is_icmp_inv_acq) { 2130 if (sel->ips_isv4) { 2131 if (sel->ips_protocol != IPPROTO_ICMP) { 2132 cmn_err(CE_WARN, "ipsec_find_policy_head:" 2133 " expecting icmp, got %d", sel->ips_protocol); 2134 } 2135 } else { 2136 if (sel->ips_protocol != IPPROTO_ICMPV6) { 2137 cmn_err(CE_WARN, "ipsec_find_policy_head:" 2138 " expecting icmpv6, got %d", sel->ips_protocol); 2139 } 2140 } 2141 } 2142 #endif 2143 2144 rw_enter(&head->iph_lock, RW_READER); 2145 2146 if (root->ipr_nchains > 0) { 2147 curbest = ipsec_find_policy_chain(curbest, 2148 root->ipr_hash[selector_hash(sel, root)].hash_head, sel, 2149 is_icmp_inv_acq); 2150 } 2151 curbest = ipsec_find_policy_chain(curbest, root->ipr_nonhash[af], sel, 2152 is_icmp_inv_acq); 2153 2154 /* 2155 * Adjust reference counts if we found anything new. 2156 */ 2157 if (curbest != best) { 2158 ASSERT(curbest != NULL); 2159 IPPOL_REFHOLD(curbest); 2160 2161 if (best != NULL) { 2162 IPPOL_REFRELE(best, ns); 2163 } 2164 } 2165 2166 rw_exit(&head->iph_lock); 2167 2168 return (curbest); 2169 } 2170 2171 /* 2172 * Find the best system policy (either global or per-interface) which 2173 * applies to the given selector; look in all the relevant policy roots 2174 * to figure out which policy wins. 2175 * 2176 * Returns a reference to a policy; caller must release this 2177 * reference when done. 2178 */ 2179 ipsec_policy_t * 2180 ipsec_find_policy(int direction, conn_t *connp, ipsec_out_t *io, 2181 ipsec_selector_t *sel, netstack_t *ns) 2182 { 2183 ipsec_policy_t *p; 2184 ipsec_stack_t *ipss = ns->netstack_ipsec; 2185 2186 p = ipsec_find_policy_head(NULL, &ipss->ipsec_system_policy, 2187 direction, sel, ns); 2188 if ((connp != NULL) && (connp->conn_policy != NULL)) { 2189 p = ipsec_find_policy_head(p, connp->conn_policy, 2190 direction, sel, ns); 2191 } else if ((io != NULL) && (io->ipsec_out_polhead != NULL)) { 2192 p = ipsec_find_policy_head(p, io->ipsec_out_polhead, 2193 direction, sel, ns); 2194 } 2195 2196 return (p); 2197 } 2198 2199 /* 2200 * Check with global policy and see whether this inbound 2201 * packet meets the policy constraints. 2202 * 2203 * Locate appropriate policy from global policy, supplemented by the 2204 * conn's configured and/or cached policy if the conn is supplied. 2205 * 2206 * Dispatch to ipsec_check_ipsecin_policy if we have policy and an 2207 * encrypted packet to see if they match. 2208 * 2209 * Otherwise, see if the policy allows cleartext; if not, drop it on the 2210 * floor. 2211 */ 2212 mblk_t * 2213 ipsec_check_global_policy(mblk_t *first_mp, conn_t *connp, 2214 ipha_t *ipha, ip6_t *ip6h, boolean_t mctl_present, netstack_t *ns) 2215 { 2216 ipsec_policy_t *p; 2217 ipsec_selector_t sel; 2218 mblk_t *data_mp, *ipsec_mp; 2219 boolean_t policy_present; 2220 kstat_named_t *counter; 2221 ipsec_in_t *ii = NULL; 2222 uint64_t pkt_unique; 2223 ipsec_stack_t *ipss = ns->netstack_ipsec; 2224 ip_stack_t *ipst = ns->netstack_ip; 2225 2226 data_mp = mctl_present ? first_mp->b_cont : first_mp; 2227 ipsec_mp = mctl_present ? first_mp : NULL; 2228 2229 sel.ips_is_icmp_inv_acq = 0; 2230 2231 ASSERT((ipha == NULL && ip6h != NULL) || 2232 (ip6h == NULL && ipha != NULL)); 2233 2234 if (ipha != NULL) 2235 policy_present = ipss->ipsec_inbound_v4_policy_present; 2236 else 2237 policy_present = ipss->ipsec_inbound_v6_policy_present; 2238 2239 if (!policy_present && connp == NULL) { 2240 /* 2241 * No global policy and no per-socket policy; 2242 * just pass it back (but we shouldn't get here in that case) 2243 */ 2244 return (first_mp); 2245 } 2246 2247 if (ipsec_mp != NULL) { 2248 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 2249 ii = (ipsec_in_t *)(ipsec_mp->b_rptr); 2250 ASSERT(ii->ipsec_in_type == IPSEC_IN); 2251 } 2252 2253 /* 2254 * If we have cached policy, use it. 2255 * Otherwise consult system policy. 2256 */ 2257 if ((connp != NULL) && (connp->conn_latch != NULL)) { 2258 p = connp->conn_latch->ipl_in_policy; 2259 if (p != NULL) { 2260 IPPOL_REFHOLD(p); 2261 } 2262 /* 2263 * Fudge sel for UNIQUE_ID setting below. 2264 */ 2265 pkt_unique = conn_to_unique(connp, data_mp, ipha, ip6h); 2266 } else { 2267 /* Initialize the ports in the selector */ 2268 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, 2269 SEL_NONE) == SELRET_NOMEM) { 2270 /* 2271 * Technically not a policy mismatch, but it is 2272 * an internal failure. 2273 */ 2274 ipsec_log_policy_failure(IPSEC_POLICY_MISMATCH, 2275 "ipsec_init_inbound_sel", ipha, ip6h, B_FALSE, ns); 2276 counter = DROPPER(ipss, ipds_spd_nomem); 2277 goto fail; 2278 } 2279 2280 /* 2281 * Find the policy which best applies. 2282 * 2283 * If we find global policy, we should look at both 2284 * local policy and global policy and see which is 2285 * stronger and match accordingly. 2286 * 2287 * If we don't find a global policy, check with 2288 * local policy alone. 2289 */ 2290 2291 p = ipsec_find_policy(IPSEC_TYPE_INBOUND, connp, NULL, &sel, 2292 ns); 2293 pkt_unique = SA_UNIQUE_ID(sel.ips_remote_port, 2294 sel.ips_local_port, sel.ips_protocol, 0); 2295 } 2296 2297 if (p == NULL) { 2298 if (ipsec_mp == NULL) { 2299 /* 2300 * We have no policy; default to succeeding. 2301 * XXX paranoid system design doesn't do this. 2302 */ 2303 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2304 return (first_mp); 2305 } else { 2306 counter = DROPPER(ipss, ipds_spd_got_secure); 2307 ipsec_log_policy_failure(IPSEC_POLICY_NOT_NEEDED, 2308 "ipsec_check_global_policy", ipha, ip6h, B_TRUE, 2309 ns); 2310 goto fail; 2311 } 2312 } 2313 if ((ii != NULL) && (ii->ipsec_in_secure)) { 2314 return (ipsec_check_ipsecin_policy(ipsec_mp, p, ipha, ip6h, 2315 pkt_unique, ns)); 2316 } 2317 if (p->ipsp_act->ipa_allow_clear) { 2318 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2319 IPPOL_REFRELE(p, ns); 2320 return (first_mp); 2321 } 2322 IPPOL_REFRELE(p, ns); 2323 /* 2324 * If we reach here, we will drop the packet because it failed the 2325 * global policy check because the packet was cleartext, and it 2326 * should not have been. 2327 */ 2328 ipsec_log_policy_failure(IPSEC_POLICY_MISMATCH, 2329 "ipsec_check_global_policy", ipha, ip6h, B_FALSE, ns); 2330 counter = DROPPER(ipss, ipds_spd_got_clear); 2331 2332 fail: 2333 ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, 2334 &ipss->ipsec_spd_dropper); 2335 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2336 return (NULL); 2337 } 2338 2339 /* 2340 * We check whether an inbound datagram is a valid one 2341 * to accept in clear. If it is secure, it is the job 2342 * of IPSEC to log information appropriately if it 2343 * suspects that it may not be the real one. 2344 * 2345 * It is called only while fanning out to the ULP 2346 * where ULP accepts only secure data and the incoming 2347 * is clear. Usually we never accept clear datagrams in 2348 * such cases. ICMP is the only exception. 2349 * 2350 * NOTE : We don't call this function if the client (ULP) 2351 * is willing to accept things in clear. 2352 */ 2353 boolean_t 2354 ipsec_inbound_accept_clear(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h) 2355 { 2356 ushort_t iph_hdr_length; 2357 icmph_t *icmph; 2358 icmp6_t *icmp6; 2359 uint8_t *nexthdrp; 2360 2361 ASSERT((ipha != NULL && ip6h == NULL) || 2362 (ipha == NULL && ip6h != NULL)); 2363 2364 if (ip6h != NULL) { 2365 iph_hdr_length = ip_hdr_length_v6(mp, ip6h); 2366 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, 2367 &nexthdrp)) { 2368 return (B_FALSE); 2369 } 2370 if (*nexthdrp != IPPROTO_ICMPV6) 2371 return (B_FALSE); 2372 icmp6 = (icmp6_t *)(&mp->b_rptr[iph_hdr_length]); 2373 /* Match IPv6 ICMP policy as closely as IPv4 as possible. */ 2374 switch (icmp6->icmp6_type) { 2375 case ICMP6_PARAM_PROB: 2376 /* Corresponds to port/proto unreach in IPv4. */ 2377 case ICMP6_ECHO_REQUEST: 2378 /* Just like IPv4. */ 2379 return (B_FALSE); 2380 2381 case MLD_LISTENER_QUERY: 2382 case MLD_LISTENER_REPORT: 2383 case MLD_LISTENER_REDUCTION: 2384 /* 2385 * XXX Seperate NDD in IPv4 what about here? 2386 * Plus, mcast is important to ND. 2387 */ 2388 case ICMP6_DST_UNREACH: 2389 /* Corresponds to HOST/NET unreachable in IPv4. */ 2390 case ICMP6_PACKET_TOO_BIG: 2391 case ICMP6_ECHO_REPLY: 2392 /* These are trusted in IPv4. */ 2393 case ND_ROUTER_SOLICIT: 2394 case ND_ROUTER_ADVERT: 2395 case ND_NEIGHBOR_SOLICIT: 2396 case ND_NEIGHBOR_ADVERT: 2397 case ND_REDIRECT: 2398 /* Trust ND messages for now. */ 2399 case ICMP6_TIME_EXCEEDED: 2400 default: 2401 return (B_TRUE); 2402 } 2403 } else { 2404 /* 2405 * If it is not ICMP, fail this request. 2406 */ 2407 if (ipha->ipha_protocol != IPPROTO_ICMP) { 2408 #ifdef FRAGCACHE_DEBUG 2409 cmn_err(CE_WARN, "Dropping - ipha_proto = %d\n", 2410 ipha->ipha_protocol); 2411 #endif 2412 return (B_FALSE); 2413 } 2414 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2415 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 2416 /* 2417 * It is an insecure icmp message. Check to see whether we are 2418 * willing to accept this one. 2419 */ 2420 2421 switch (icmph->icmph_type) { 2422 case ICMP_ECHO_REPLY: 2423 case ICMP_TIME_STAMP_REPLY: 2424 case ICMP_INFO_REPLY: 2425 case ICMP_ROUTER_ADVERTISEMENT: 2426 /* 2427 * We should not encourage clear replies if this 2428 * client expects secure. If somebody is replying 2429 * in clear some mailicious user watching both the 2430 * request and reply, can do chosen-plain-text attacks. 2431 * With global policy we might be just expecting secure 2432 * but sending out clear. We don't know what the right 2433 * thing is. We can't do much here as we can't control 2434 * the sender here. Till we are sure of what to do, 2435 * accept them. 2436 */ 2437 return (B_TRUE); 2438 case ICMP_ECHO_REQUEST: 2439 case ICMP_TIME_STAMP_REQUEST: 2440 case ICMP_INFO_REQUEST: 2441 case ICMP_ADDRESS_MASK_REQUEST: 2442 case ICMP_ROUTER_SOLICITATION: 2443 case ICMP_ADDRESS_MASK_REPLY: 2444 /* 2445 * Don't accept this as somebody could be sending 2446 * us plain text to get encrypted data. If we reply, 2447 * it will lead to chosen plain text attack. 2448 */ 2449 return (B_FALSE); 2450 case ICMP_DEST_UNREACHABLE: 2451 switch (icmph->icmph_code) { 2452 case ICMP_FRAGMENTATION_NEEDED: 2453 /* 2454 * Be in sync with icmp_inbound, where we have 2455 * already set ire_max_frag. 2456 */ 2457 #ifdef FRAGCACHE_DEBUG 2458 cmn_err(CE_WARN, "ICMP frag needed\n"); 2459 #endif 2460 return (B_TRUE); 2461 case ICMP_HOST_UNREACHABLE: 2462 case ICMP_NET_UNREACHABLE: 2463 /* 2464 * By accepting, we could reset a connection. 2465 * How do we solve the problem of some 2466 * intermediate router sending in-secure ICMP 2467 * messages ? 2468 */ 2469 return (B_TRUE); 2470 case ICMP_PORT_UNREACHABLE: 2471 case ICMP_PROTOCOL_UNREACHABLE: 2472 default : 2473 return (B_FALSE); 2474 } 2475 case ICMP_SOURCE_QUENCH: 2476 /* 2477 * If this is an attack, TCP will slow start 2478 * because of this. Is it very harmful ? 2479 */ 2480 return (B_TRUE); 2481 case ICMP_PARAM_PROBLEM: 2482 return (B_FALSE); 2483 case ICMP_TIME_EXCEEDED: 2484 return (B_TRUE); 2485 case ICMP_REDIRECT: 2486 return (B_FALSE); 2487 default : 2488 return (B_FALSE); 2489 } 2490 } 2491 } 2492 2493 void 2494 ipsec_latch_ids(ipsec_latch_t *ipl, ipsid_t *local, ipsid_t *remote) 2495 { 2496 mutex_enter(&ipl->ipl_lock); 2497 2498 if (ipl->ipl_ids_latched) { 2499 /* I lost, someone else got here before me */ 2500 mutex_exit(&ipl->ipl_lock); 2501 return; 2502 } 2503 2504 if (local != NULL) 2505 IPSID_REFHOLD(local); 2506 if (remote != NULL) 2507 IPSID_REFHOLD(remote); 2508 2509 ipl->ipl_local_cid = local; 2510 ipl->ipl_remote_cid = remote; 2511 ipl->ipl_ids_latched = B_TRUE; 2512 mutex_exit(&ipl->ipl_lock); 2513 } 2514 2515 void 2516 ipsec_latch_inbound(ipsec_latch_t *ipl, ipsec_in_t *ii) 2517 { 2518 ipsa_t *sa; 2519 2520 if (!ipl->ipl_ids_latched) { 2521 ipsid_t *local = NULL; 2522 ipsid_t *remote = NULL; 2523 2524 if (!ii->ipsec_in_loopback) { 2525 if (ii->ipsec_in_esp_sa != NULL) 2526 sa = ii->ipsec_in_esp_sa; 2527 else 2528 sa = ii->ipsec_in_ah_sa; 2529 ASSERT(sa != NULL); 2530 local = sa->ipsa_dst_cid; 2531 remote = sa->ipsa_src_cid; 2532 } 2533 ipsec_latch_ids(ipl, local, remote); 2534 } 2535 ipl->ipl_in_action = ii->ipsec_in_action; 2536 IPACT_REFHOLD(ipl->ipl_in_action); 2537 } 2538 2539 /* 2540 * Check whether the policy constraints are met either for an 2541 * inbound datagram; called from IP in numerous places. 2542 * 2543 * Note that this is not a chokepoint for inbound policy checks; 2544 * see also ipsec_check_ipsecin_latch() and ipsec_check_global_policy() 2545 */ 2546 mblk_t * 2547 ipsec_check_inbound_policy(mblk_t *first_mp, conn_t *connp, 2548 ipha_t *ipha, ip6_t *ip6h, boolean_t mctl_present) 2549 { 2550 ipsec_in_t *ii; 2551 boolean_t ret; 2552 mblk_t *mp = mctl_present ? first_mp->b_cont : first_mp; 2553 mblk_t *ipsec_mp = mctl_present ? first_mp : NULL; 2554 ipsec_latch_t *ipl; 2555 uint64_t unique_id; 2556 ipsec_stack_t *ipss; 2557 ip_stack_t *ipst; 2558 netstack_t *ns; 2559 2560 ASSERT(connp != NULL); 2561 ipl = connp->conn_latch; 2562 ns = connp->conn_netstack; 2563 ipss = ns->netstack_ipsec; 2564 ipst = ns->netstack_ip; 2565 2566 if (ipsec_mp == NULL) { 2567 clear: 2568 /* 2569 * This is the case where the incoming datagram is 2570 * cleartext and we need to see whether this client 2571 * would like to receive such untrustworthy things from 2572 * the wire. 2573 */ 2574 ASSERT(mp != NULL); 2575 2576 if (ipl != NULL) { 2577 /* 2578 * Policy is cached in the conn. 2579 */ 2580 if ((ipl->ipl_in_policy != NULL) && 2581 (!ipl->ipl_in_policy->ipsp_act->ipa_allow_clear)) { 2582 ret = ipsec_inbound_accept_clear(mp, 2583 ipha, ip6h); 2584 if (ret) { 2585 BUMP_MIB(&ipst->ips_ip_mib, 2586 ipsecInSucceeded); 2587 return (first_mp); 2588 } else { 2589 ipsec_log_policy_failure( 2590 IPSEC_POLICY_MISMATCH, 2591 "ipsec_check_inbound_policy", ipha, 2592 ip6h, B_FALSE, ns); 2593 ip_drop_packet(first_mp, B_TRUE, NULL, 2594 NULL, 2595 DROPPER(ipss, ipds_spd_got_clear), 2596 &ipss->ipsec_spd_dropper); 2597 BUMP_MIB(&ipst->ips_ip_mib, 2598 ipsecInFailed); 2599 return (NULL); 2600 } 2601 } else { 2602 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2603 return (first_mp); 2604 } 2605 } else { 2606 /* 2607 * As this is a non-hardbound connection we need 2608 * to look at both per-socket policy and global 2609 * policy. As this is cleartext, mark the mp as 2610 * M_DATA in case if it is an ICMP error being 2611 * reported before calling ipsec_check_global_policy 2612 * so that it does not mistake it for IPSEC_IN. 2613 */ 2614 uchar_t db_type = mp->b_datap->db_type; 2615 mp->b_datap->db_type = M_DATA; 2616 first_mp = ipsec_check_global_policy(first_mp, connp, 2617 ipha, ip6h, mctl_present, ns); 2618 if (first_mp != NULL) 2619 mp->b_datap->db_type = db_type; 2620 return (first_mp); 2621 } 2622 } 2623 /* 2624 * If it is inbound check whether the attached message 2625 * is secure or not. We have a special case for ICMP, 2626 * where we have a IPSEC_IN message and the attached 2627 * message is not secure. See icmp_inbound_error_fanout 2628 * for details. 2629 */ 2630 ASSERT(ipsec_mp != NULL); 2631 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 2632 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 2633 2634 if (!ii->ipsec_in_secure) 2635 goto clear; 2636 2637 /* 2638 * mp->b_cont could be either a M_CTL message 2639 * for icmp errors being sent up or a M_DATA message. 2640 */ 2641 ASSERT(mp->b_datap->db_type == M_CTL || mp->b_datap->db_type == M_DATA); 2642 2643 ASSERT(ii->ipsec_in_type == IPSEC_IN); 2644 2645 if (ipl == NULL) { 2646 /* 2647 * We don't have policies cached in the conn 2648 * for this stream. So, look at the global 2649 * policy. It will check against conn or global 2650 * depending on whichever is stronger. 2651 */ 2652 return (ipsec_check_global_policy(first_mp, connp, 2653 ipha, ip6h, mctl_present, ns)); 2654 } 2655 2656 if (ipl->ipl_in_action != NULL) { 2657 /* Policy is cached & latched; fast(er) path */ 2658 const char *reason; 2659 kstat_named_t *counter; 2660 2661 if (ipsec_check_ipsecin_latch(ii, mp, ipl, 2662 ipha, ip6h, &reason, &counter, connp)) { 2663 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2664 return (first_mp); 2665 } 2666 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, 2667 SL_ERROR|SL_WARN|SL_CONSOLE, 2668 "ipsec inbound policy mismatch: %s, packet dropped\n", 2669 reason); 2670 ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, 2671 &ipss->ipsec_spd_dropper); 2672 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2673 return (NULL); 2674 } else if (ipl->ipl_in_policy == NULL) { 2675 ipsec_weird_null_inbound_policy++; 2676 return (first_mp); 2677 } 2678 2679 unique_id = conn_to_unique(connp, mp, ipha, ip6h); 2680 IPPOL_REFHOLD(ipl->ipl_in_policy); 2681 first_mp = ipsec_check_ipsecin_policy(first_mp, ipl->ipl_in_policy, 2682 ipha, ip6h, unique_id, ns); 2683 /* 2684 * NOTE: ipsecIn{Failed,Succeeeded} bumped by 2685 * ipsec_check_ipsecin_policy(). 2686 */ 2687 if (first_mp != NULL) 2688 ipsec_latch_inbound(ipl, ii); 2689 return (first_mp); 2690 } 2691 2692 /* 2693 * Returns: 2694 * 2695 * SELRET_NOMEM --> msgpullup() needed to gather things failed. 2696 * SELRET_BADPKT --> If we're being called after tunnel-mode fragment 2697 * gathering, the initial fragment is too short for 2698 * useful data. Only returned if SEL_TUNNEL_FIRSTFRAG is 2699 * set. 2700 * SELRET_SUCCESS --> "sel" now has initialized IPsec selector data. 2701 * SELRET_TUNFRAG --> This is a fragment in a tunnel-mode packet. Caller 2702 * should put this packet in a fragment-gathering queue. 2703 * Only returned if SEL_TUNNEL_MODE and SEL_PORT_POLICY 2704 * is set. 2705 */ 2706 static selret_t 2707 ipsec_init_inbound_sel(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha, 2708 ip6_t *ip6h, uint8_t sel_flags) 2709 { 2710 uint16_t *ports; 2711 ushort_t hdr_len; 2712 int outer_hdr_len = 0; /* For ICMP tunnel-mode cases... */ 2713 mblk_t *spare_mp = NULL; 2714 uint8_t *nexthdrp; 2715 uint8_t nexthdr; 2716 uint8_t *typecode; 2717 uint8_t check_proto; 2718 ip6_pkt_t ipp; 2719 boolean_t port_policy_present = (sel_flags & SEL_PORT_POLICY); 2720 boolean_t is_icmp = (sel_flags & SEL_IS_ICMP); 2721 boolean_t tunnel_mode = (sel_flags & SEL_TUNNEL_MODE); 2722 2723 ASSERT((ipha == NULL && ip6h != NULL) || 2724 (ipha != NULL && ip6h == NULL)); 2725 2726 if (ip6h != NULL) { 2727 if (is_icmp) 2728 outer_hdr_len = ((uint8_t *)ip6h) - mp->b_rptr; 2729 2730 check_proto = IPPROTO_ICMPV6; 2731 sel->ips_isv4 = B_FALSE; 2732 sel->ips_local_addr_v6 = ip6h->ip6_dst; 2733 sel->ips_remote_addr_v6 = ip6h->ip6_src; 2734 2735 bzero(&ipp, sizeof (ipp)); 2736 (void) ip_find_hdr_v6(mp, ip6h, &ipp, NULL); 2737 2738 nexthdr = ip6h->ip6_nxt; 2739 switch (nexthdr) { 2740 case IPPROTO_HOPOPTS: 2741 case IPPROTO_ROUTING: 2742 case IPPROTO_DSTOPTS: 2743 case IPPROTO_FRAGMENT: 2744 /* 2745 * Use ip_hdr_length_nexthdr_v6(). And have a spare 2746 * mblk that's contiguous to feed it 2747 */ 2748 if ((spare_mp = msgpullup(mp, -1)) == NULL) 2749 return (SELRET_NOMEM); 2750 if (!ip_hdr_length_nexthdr_v6(spare_mp, 2751 (ip6_t *)(spare_mp->b_rptr + outer_hdr_len), 2752 &hdr_len, &nexthdrp)) { 2753 /* Malformed packet - caller frees. */ 2754 ipsec_freemsg_chain(spare_mp); 2755 return (SELRET_BADPKT); 2756 } 2757 nexthdr = *nexthdrp; 2758 /* We can just extract based on hdr_len now. */ 2759 break; 2760 default: 2761 hdr_len = IPV6_HDR_LEN; 2762 break; 2763 } 2764 2765 if (port_policy_present && IS_V6_FRAGMENT(ipp) && !is_icmp) { 2766 /* IPv6 Fragment */ 2767 ipsec_freemsg_chain(spare_mp); 2768 return (SELRET_TUNFRAG); 2769 } 2770 } else { 2771 if (is_icmp) 2772 outer_hdr_len = ((uint8_t *)ipha) - mp->b_rptr; 2773 check_proto = IPPROTO_ICMP; 2774 sel->ips_isv4 = B_TRUE; 2775 sel->ips_local_addr_v4 = ipha->ipha_dst; 2776 sel->ips_remote_addr_v4 = ipha->ipha_src; 2777 nexthdr = ipha->ipha_protocol; 2778 hdr_len = IPH_HDR_LENGTH(ipha); 2779 2780 if (port_policy_present && 2781 IS_V4_FRAGMENT(ipha->ipha_fragment_offset_and_flags) && 2782 !is_icmp) { 2783 /* IPv4 Fragment */ 2784 ipsec_freemsg_chain(spare_mp); 2785 return (SELRET_TUNFRAG); 2786 } 2787 2788 } 2789 sel->ips_protocol = nexthdr; 2790 2791 if ((nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP && 2792 nexthdr != IPPROTO_SCTP && nexthdr != check_proto) || 2793 (!port_policy_present && tunnel_mode)) { 2794 sel->ips_remote_port = sel->ips_local_port = 0; 2795 ipsec_freemsg_chain(spare_mp); 2796 return (SELRET_SUCCESS); 2797 } 2798 2799 if (&mp->b_rptr[hdr_len] + 4 > mp->b_wptr) { 2800 /* If we didn't pullup a copy already, do so now. */ 2801 /* 2802 * XXX performance, will upper-layers frequently split TCP/UDP 2803 * apart from IP or options? If so, perhaps we should revisit 2804 * the spare_mp strategy. 2805 */ 2806 ipsec_hdr_pullup_needed++; 2807 if (spare_mp == NULL && 2808 (spare_mp = msgpullup(mp, -1)) == NULL) { 2809 return (SELRET_NOMEM); 2810 } 2811 ports = (uint16_t *)&spare_mp->b_rptr[hdr_len + outer_hdr_len]; 2812 } else { 2813 ports = (uint16_t *)&mp->b_rptr[hdr_len + outer_hdr_len]; 2814 } 2815 2816 if (nexthdr == check_proto) { 2817 typecode = (uint8_t *)ports; 2818 sel->ips_icmp_type = *typecode++; 2819 sel->ips_icmp_code = *typecode; 2820 sel->ips_remote_port = sel->ips_local_port = 0; 2821 } else { 2822 sel->ips_remote_port = *ports++; 2823 sel->ips_local_port = *ports; 2824 } 2825 ipsec_freemsg_chain(spare_mp); 2826 return (SELRET_SUCCESS); 2827 } 2828 2829 static boolean_t 2830 ipsec_init_outbound_ports(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha, 2831 ip6_t *ip6h, int outer_hdr_len, ipsec_stack_t *ipss) 2832 { 2833 /* 2834 * XXX cut&paste shared with ipsec_init_inbound_sel 2835 */ 2836 uint16_t *ports; 2837 ushort_t hdr_len; 2838 mblk_t *spare_mp = NULL; 2839 uint8_t *nexthdrp; 2840 uint8_t nexthdr; 2841 uint8_t *typecode; 2842 uint8_t check_proto; 2843 2844 ASSERT((ipha == NULL && ip6h != NULL) || 2845 (ipha != NULL && ip6h == NULL)); 2846 2847 if (ip6h != NULL) { 2848 check_proto = IPPROTO_ICMPV6; 2849 nexthdr = ip6h->ip6_nxt; 2850 switch (nexthdr) { 2851 case IPPROTO_HOPOPTS: 2852 case IPPROTO_ROUTING: 2853 case IPPROTO_DSTOPTS: 2854 case IPPROTO_FRAGMENT: 2855 /* 2856 * Use ip_hdr_length_nexthdr_v6(). And have a spare 2857 * mblk that's contiguous to feed it 2858 */ 2859 spare_mp = msgpullup(mp, -1); 2860 if (spare_mp == NULL || 2861 !ip_hdr_length_nexthdr_v6(spare_mp, 2862 (ip6_t *)(spare_mp->b_rptr + outer_hdr_len), 2863 &hdr_len, &nexthdrp)) { 2864 /* Always works, even if NULL. */ 2865 ipsec_freemsg_chain(spare_mp); 2866 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 2867 DROPPER(ipss, ipds_spd_nomem), 2868 &ipss->ipsec_spd_dropper); 2869 return (B_FALSE); 2870 } else { 2871 nexthdr = *nexthdrp; 2872 /* We can just extract based on hdr_len now. */ 2873 } 2874 break; 2875 default: 2876 hdr_len = IPV6_HDR_LEN; 2877 break; 2878 } 2879 } else { 2880 check_proto = IPPROTO_ICMP; 2881 hdr_len = IPH_HDR_LENGTH(ipha); 2882 nexthdr = ipha->ipha_protocol; 2883 } 2884 2885 sel->ips_protocol = nexthdr; 2886 if (nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP && 2887 nexthdr != IPPROTO_SCTP && nexthdr != check_proto) { 2888 sel->ips_local_port = sel->ips_remote_port = 0; 2889 ipsec_freemsg_chain(spare_mp); /* Always works, even if NULL */ 2890 return (B_TRUE); 2891 } 2892 2893 if (&mp->b_rptr[hdr_len] + 4 + outer_hdr_len > mp->b_wptr) { 2894 /* If we didn't pullup a copy already, do so now. */ 2895 /* 2896 * XXX performance, will upper-layers frequently split TCP/UDP 2897 * apart from IP or options? If so, perhaps we should revisit 2898 * the spare_mp strategy. 2899 * 2900 * XXX should this be msgpullup(mp, hdr_len+4) ??? 2901 */ 2902 if (spare_mp == NULL && 2903 (spare_mp = msgpullup(mp, -1)) == NULL) { 2904 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 2905 DROPPER(ipss, ipds_spd_nomem), 2906 &ipss->ipsec_spd_dropper); 2907 return (B_FALSE); 2908 } 2909 ports = (uint16_t *)&spare_mp->b_rptr[hdr_len + outer_hdr_len]; 2910 } else { 2911 ports = (uint16_t *)&mp->b_rptr[hdr_len + outer_hdr_len]; 2912 } 2913 2914 if (nexthdr == check_proto) { 2915 typecode = (uint8_t *)ports; 2916 sel->ips_icmp_type = *typecode++; 2917 sel->ips_icmp_code = *typecode; 2918 sel->ips_remote_port = sel->ips_local_port = 0; 2919 } else { 2920 sel->ips_local_port = *ports++; 2921 sel->ips_remote_port = *ports; 2922 } 2923 ipsec_freemsg_chain(spare_mp); /* Always works, even if NULL */ 2924 return (B_TRUE); 2925 } 2926 2927 /* 2928 * Create an ipsec_action_t based on the way an inbound packet was protected. 2929 * Used to reflect traffic back to a sender. 2930 * 2931 * We don't bother interning the action into the hash table. 2932 */ 2933 ipsec_action_t * 2934 ipsec_in_to_out_action(ipsec_in_t *ii) 2935 { 2936 ipsa_t *ah_assoc, *esp_assoc; 2937 uint_t auth_alg = 0, encr_alg = 0, espa_alg = 0; 2938 ipsec_action_t *ap; 2939 boolean_t unique; 2940 2941 ap = kmem_cache_alloc(ipsec_action_cache, KM_NOSLEEP); 2942 2943 if (ap == NULL) 2944 return (NULL); 2945 2946 bzero(ap, sizeof (*ap)); 2947 HASH_NULL(ap, ipa_hash); 2948 ap->ipa_next = NULL; 2949 ap->ipa_refs = 1; 2950 2951 /* 2952 * Get the algorithms that were used for this packet. 2953 */ 2954 ap->ipa_act.ipa_type = IPSEC_ACT_APPLY; 2955 ap->ipa_act.ipa_log = 0; 2956 ah_assoc = ii->ipsec_in_ah_sa; 2957 ap->ipa_act.ipa_apply.ipp_use_ah = (ah_assoc != NULL); 2958 2959 esp_assoc = ii->ipsec_in_esp_sa; 2960 ap->ipa_act.ipa_apply.ipp_use_esp = (esp_assoc != NULL); 2961 2962 if (esp_assoc != NULL) { 2963 encr_alg = esp_assoc->ipsa_encr_alg; 2964 espa_alg = esp_assoc->ipsa_auth_alg; 2965 ap->ipa_act.ipa_apply.ipp_use_espa = (espa_alg != 0); 2966 } 2967 if (ah_assoc != NULL) 2968 auth_alg = ah_assoc->ipsa_auth_alg; 2969 2970 ap->ipa_act.ipa_apply.ipp_encr_alg = (uint8_t)encr_alg; 2971 ap->ipa_act.ipa_apply.ipp_auth_alg = (uint8_t)auth_alg; 2972 ap->ipa_act.ipa_apply.ipp_esp_auth_alg = (uint8_t)espa_alg; 2973 ap->ipa_act.ipa_apply.ipp_use_se = ii->ipsec_in_decaps; 2974 unique = B_FALSE; 2975 2976 if (esp_assoc != NULL) { 2977 ap->ipa_act.ipa_apply.ipp_espa_minbits = 2978 esp_assoc->ipsa_authkeybits; 2979 ap->ipa_act.ipa_apply.ipp_espa_maxbits = 2980 esp_assoc->ipsa_authkeybits; 2981 ap->ipa_act.ipa_apply.ipp_espe_minbits = 2982 esp_assoc->ipsa_encrkeybits; 2983 ap->ipa_act.ipa_apply.ipp_espe_maxbits = 2984 esp_assoc->ipsa_encrkeybits; 2985 ap->ipa_act.ipa_apply.ipp_km_proto = esp_assoc->ipsa_kmp; 2986 ap->ipa_act.ipa_apply.ipp_km_cookie = esp_assoc->ipsa_kmc; 2987 if (esp_assoc->ipsa_flags & IPSA_F_UNIQUE) 2988 unique = B_TRUE; 2989 } 2990 if (ah_assoc != NULL) { 2991 ap->ipa_act.ipa_apply.ipp_ah_minbits = 2992 ah_assoc->ipsa_authkeybits; 2993 ap->ipa_act.ipa_apply.ipp_ah_maxbits = 2994 ah_assoc->ipsa_authkeybits; 2995 ap->ipa_act.ipa_apply.ipp_km_proto = ah_assoc->ipsa_kmp; 2996 ap->ipa_act.ipa_apply.ipp_km_cookie = ah_assoc->ipsa_kmc; 2997 if (ah_assoc->ipsa_flags & IPSA_F_UNIQUE) 2998 unique = B_TRUE; 2999 } 3000 ap->ipa_act.ipa_apply.ipp_use_unique = unique; 3001 ap->ipa_want_unique = unique; 3002 ap->ipa_allow_clear = B_FALSE; 3003 ap->ipa_want_se = ii->ipsec_in_decaps; 3004 ap->ipa_want_ah = (ah_assoc != NULL); 3005 ap->ipa_want_esp = (esp_assoc != NULL); 3006 3007 ap->ipa_ovhd = ipsec_act_ovhd(&ap->ipa_act); 3008 3009 ap->ipa_act.ipa_apply.ipp_replay_depth = 0; /* don't care */ 3010 3011 return (ap); 3012 } 3013 3014 3015 /* 3016 * Compute the worst-case amount of extra space required by an action. 3017 * Note that, because of the ESP considerations listed below, this is 3018 * actually not the same as the best-case reduction in the MTU; in the 3019 * future, we should pass additional information to this function to 3020 * allow the actual MTU impact to be computed. 3021 * 3022 * AH: Revisit this if we implement algorithms with 3023 * a verifier size of more than 12 bytes. 3024 * 3025 * ESP: A more exact but more messy computation would take into 3026 * account the interaction between the cipher block size and the 3027 * effective MTU, yielding the inner payload size which reflects a 3028 * packet with *minimum* ESP padding.. 3029 */ 3030 int32_t 3031 ipsec_act_ovhd(const ipsec_act_t *act) 3032 { 3033 int32_t overhead = 0; 3034 3035 if (act->ipa_type == IPSEC_ACT_APPLY) { 3036 const ipsec_prot_t *ipp = &act->ipa_apply; 3037 3038 if (ipp->ipp_use_ah) 3039 overhead += IPSEC_MAX_AH_HDR_SIZE; 3040 if (ipp->ipp_use_esp) { 3041 overhead += IPSEC_MAX_ESP_HDR_SIZE; 3042 overhead += sizeof (struct udphdr); 3043 } 3044 if (ipp->ipp_use_se) 3045 overhead += IP_SIMPLE_HDR_LENGTH; 3046 } 3047 return (overhead); 3048 } 3049 3050 /* 3051 * This hash function is used only when creating policies and thus is not 3052 * performance-critical for packet flows. 3053 * 3054 * Future work: canonicalize the structures hashed with this (i.e., 3055 * zeroize padding) so the hash works correctly. 3056 */ 3057 /* ARGSUSED */ 3058 static uint32_t 3059 policy_hash(int size, const void *start, const void *end) 3060 { 3061 return (0); 3062 } 3063 3064 3065 /* 3066 * Hash function macros for each address type. 3067 * 3068 * The IPV6 hash function assumes that the low order 32-bits of the 3069 * address (typically containing the low order 24 bits of the mac 3070 * address) are reasonably well-distributed. Revisit this if we run 3071 * into trouble from lots of collisions on ::1 addresses and the like 3072 * (seems unlikely). 3073 */ 3074 #define IPSEC_IPV4_HASH(a, n) ((a) % (n)) 3075 #define IPSEC_IPV6_HASH(a, n) (((a).s6_addr32[3]) % (n)) 3076 3077 /* 3078 * These two hash functions should produce coordinated values 3079 * but have slightly different roles. 3080 */ 3081 static uint32_t 3082 selkey_hash(const ipsec_selkey_t *selkey, netstack_t *ns) 3083 { 3084 uint32_t valid = selkey->ipsl_valid; 3085 ipsec_stack_t *ipss = ns->netstack_ipsec; 3086 3087 if (!(valid & IPSL_REMOTE_ADDR)) 3088 return (IPSEC_SEL_NOHASH); 3089 3090 if (valid & IPSL_IPV4) { 3091 if (selkey->ipsl_remote_pfxlen == 32) { 3092 return (IPSEC_IPV4_HASH(selkey->ipsl_remote.ipsad_v4, 3093 ipss->ipsec_spd_hashsize)); 3094 } 3095 } 3096 if (valid & IPSL_IPV6) { 3097 if (selkey->ipsl_remote_pfxlen == 128) { 3098 return (IPSEC_IPV6_HASH(selkey->ipsl_remote.ipsad_v6, 3099 ipss->ipsec_spd_hashsize)); 3100 } 3101 } 3102 return (IPSEC_SEL_NOHASH); 3103 } 3104 3105 static uint32_t 3106 selector_hash(ipsec_selector_t *sel, ipsec_policy_root_t *root) 3107 { 3108 if (sel->ips_isv4) { 3109 return (IPSEC_IPV4_HASH(sel->ips_remote_addr_v4, 3110 root->ipr_nchains)); 3111 } 3112 return (IPSEC_IPV6_HASH(sel->ips_remote_addr_v6, root->ipr_nchains)); 3113 } 3114 3115 /* 3116 * Intern actions into the action hash table. 3117 */ 3118 ipsec_action_t * 3119 ipsec_act_find(const ipsec_act_t *a, int n, netstack_t *ns) 3120 { 3121 int i; 3122 uint32_t hval; 3123 ipsec_action_t *ap; 3124 ipsec_action_t *prev = NULL; 3125 int32_t overhead, maxovhd = 0; 3126 boolean_t allow_clear = B_FALSE; 3127 boolean_t want_ah = B_FALSE; 3128 boolean_t want_esp = B_FALSE; 3129 boolean_t want_se = B_FALSE; 3130 boolean_t want_unique = B_FALSE; 3131 ipsec_stack_t *ipss = ns->netstack_ipsec; 3132 3133 /* 3134 * TODO: should canonicalize a[] (i.e., zeroize any padding) 3135 * so we can use a non-trivial policy_hash function. 3136 */ 3137 for (i = n-1; i >= 0; i--) { 3138 hval = policy_hash(IPSEC_ACTION_HASH_SIZE, &a[i], &a[n]); 3139 3140 HASH_LOCK(ipss->ipsec_action_hash, hval); 3141 3142 for (HASH_ITERATE(ap, ipa_hash, 3143 ipss->ipsec_action_hash, hval)) { 3144 if (bcmp(&ap->ipa_act, &a[i], sizeof (*a)) != 0) 3145 continue; 3146 if (ap->ipa_next != prev) 3147 continue; 3148 break; 3149 } 3150 if (ap != NULL) { 3151 HASH_UNLOCK(ipss->ipsec_action_hash, hval); 3152 prev = ap; 3153 continue; 3154 } 3155 /* 3156 * need to allocate a new one.. 3157 */ 3158 ap = kmem_cache_alloc(ipsec_action_cache, KM_NOSLEEP); 3159 if (ap == NULL) { 3160 HASH_UNLOCK(ipss->ipsec_action_hash, hval); 3161 if (prev != NULL) 3162 ipsec_action_free(prev); 3163 return (NULL); 3164 } 3165 HASH_INSERT(ap, ipa_hash, ipss->ipsec_action_hash, hval); 3166 3167 ap->ipa_next = prev; 3168 ap->ipa_act = a[i]; 3169 3170 overhead = ipsec_act_ovhd(&a[i]); 3171 if (maxovhd < overhead) 3172 maxovhd = overhead; 3173 3174 if ((a[i].ipa_type == IPSEC_ACT_BYPASS) || 3175 (a[i].ipa_type == IPSEC_ACT_CLEAR)) 3176 allow_clear = B_TRUE; 3177 if (a[i].ipa_type == IPSEC_ACT_APPLY) { 3178 const ipsec_prot_t *ipp = &a[i].ipa_apply; 3179 3180 ASSERT(ipp->ipp_use_ah || ipp->ipp_use_esp); 3181 want_ah |= ipp->ipp_use_ah; 3182 want_esp |= ipp->ipp_use_esp; 3183 want_se |= ipp->ipp_use_se; 3184 want_unique |= ipp->ipp_use_unique; 3185 } 3186 ap->ipa_allow_clear = allow_clear; 3187 ap->ipa_want_ah = want_ah; 3188 ap->ipa_want_esp = want_esp; 3189 ap->ipa_want_se = want_se; 3190 ap->ipa_want_unique = want_unique; 3191 ap->ipa_refs = 1; /* from the hash table */ 3192 ap->ipa_ovhd = maxovhd; 3193 if (prev) 3194 prev->ipa_refs++; 3195 prev = ap; 3196 HASH_UNLOCK(ipss->ipsec_action_hash, hval); 3197 } 3198 3199 ap->ipa_refs++; /* caller's reference */ 3200 3201 return (ap); 3202 } 3203 3204 /* 3205 * Called when refcount goes to 0, indicating that all references to this 3206 * node are gone. 3207 * 3208 * This does not unchain the action from the hash table. 3209 */ 3210 void 3211 ipsec_action_free(ipsec_action_t *ap) 3212 { 3213 for (;;) { 3214 ipsec_action_t *np = ap->ipa_next; 3215 ASSERT(ap->ipa_refs == 0); 3216 ASSERT(ap->ipa_hash.hash_pp == NULL); 3217 kmem_cache_free(ipsec_action_cache, ap); 3218 ap = np; 3219 /* Inlined IPACT_REFRELE -- avoid recursion */ 3220 if (ap == NULL) 3221 break; 3222 membar_exit(); 3223 if (atomic_add_32_nv(&(ap)->ipa_refs, -1) != 0) 3224 break; 3225 /* End inlined IPACT_REFRELE */ 3226 } 3227 } 3228 3229 /* 3230 * Called when the action hash table goes away. 3231 * 3232 * The actions can be queued on an mblk with ipsec_in or 3233 * ipsec_out, hence the actions might still be around. 3234 * But we decrement ipa_refs here since we no longer have 3235 * a reference to the action from the hash table. 3236 */ 3237 static void 3238 ipsec_action_free_table(ipsec_action_t *ap) 3239 { 3240 while (ap != NULL) { 3241 ipsec_action_t *np = ap->ipa_next; 3242 3243 /* FIXME: remove? */ 3244 (void) printf("ipsec_action_free_table(%p) ref %d\n", 3245 (void *)ap, ap->ipa_refs); 3246 ASSERT(ap->ipa_refs > 0); 3247 IPACT_REFRELE(ap); 3248 ap = np; 3249 } 3250 } 3251 3252 /* 3253 * Need to walk all stack instances since the reclaim function 3254 * is global for all instances 3255 */ 3256 /* ARGSUSED */ 3257 static void 3258 ipsec_action_reclaim(void *arg) 3259 { 3260 netstack_handle_t nh; 3261 netstack_t *ns; 3262 3263 netstack_next_init(&nh); 3264 while ((ns = netstack_next(&nh)) != NULL) { 3265 ipsec_action_reclaim_stack(ns); 3266 netstack_rele(ns); 3267 } 3268 netstack_next_fini(&nh); 3269 } 3270 3271 /* 3272 * Periodically sweep action hash table for actions with refcount==1, and 3273 * nuke them. We cannot do this "on demand" (i.e., from IPACT_REFRELE) 3274 * because we can't close the race between another thread finding the action 3275 * in the hash table without holding the bucket lock during IPACT_REFRELE. 3276 * Instead, we run this function sporadically to clean up after ourselves; 3277 * we also set it as the "reclaim" function for the action kmem_cache. 3278 * 3279 * Note that it may take several passes of ipsec_action_gc() to free all 3280 * "stale" actions. 3281 */ 3282 static void 3283 ipsec_action_reclaim_stack(netstack_t *ns) 3284 { 3285 int i; 3286 ipsec_stack_t *ipss = ns->netstack_ipsec; 3287 3288 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) { 3289 ipsec_action_t *ap, *np; 3290 3291 /* skip the lock if nobody home */ 3292 if (ipss->ipsec_action_hash[i].hash_head == NULL) 3293 continue; 3294 3295 HASH_LOCK(ipss->ipsec_action_hash, i); 3296 for (ap = ipss->ipsec_action_hash[i].hash_head; 3297 ap != NULL; ap = np) { 3298 ASSERT(ap->ipa_refs > 0); 3299 np = ap->ipa_hash.hash_next; 3300 if (ap->ipa_refs > 1) 3301 continue; 3302 HASH_UNCHAIN(ap, ipa_hash, 3303 ipss->ipsec_action_hash, i); 3304 IPACT_REFRELE(ap); 3305 } 3306 HASH_UNLOCK(ipss->ipsec_action_hash, i); 3307 } 3308 } 3309 3310 /* 3311 * Intern a selector set into the selector set hash table. 3312 * This is simpler than the actions case.. 3313 */ 3314 static ipsec_sel_t * 3315 ipsec_find_sel(ipsec_selkey_t *selkey, netstack_t *ns) 3316 { 3317 ipsec_sel_t *sp; 3318 uint32_t hval, bucket; 3319 ipsec_stack_t *ipss = ns->netstack_ipsec; 3320 3321 /* 3322 * Exactly one AF bit should be set in selkey. 3323 */ 3324 ASSERT(!(selkey->ipsl_valid & IPSL_IPV4) ^ 3325 !(selkey->ipsl_valid & IPSL_IPV6)); 3326 3327 hval = selkey_hash(selkey, ns); 3328 /* Set pol_hval to uninitialized until we put it in a polhead. */ 3329 selkey->ipsl_sel_hval = hval; 3330 3331 bucket = (hval == IPSEC_SEL_NOHASH) ? 0 : hval; 3332 3333 ASSERT(!HASH_LOCKED(ipss->ipsec_sel_hash, bucket)); 3334 HASH_LOCK(ipss->ipsec_sel_hash, bucket); 3335 3336 for (HASH_ITERATE(sp, ipsl_hash, ipss->ipsec_sel_hash, bucket)) { 3337 if (bcmp(&sp->ipsl_key, selkey, 3338 offsetof(ipsec_selkey_t, ipsl_pol_hval)) == 0) 3339 break; 3340 } 3341 if (sp != NULL) { 3342 sp->ipsl_refs++; 3343 3344 HASH_UNLOCK(ipss->ipsec_sel_hash, bucket); 3345 return (sp); 3346 } 3347 3348 sp = kmem_cache_alloc(ipsec_sel_cache, KM_NOSLEEP); 3349 if (sp == NULL) { 3350 HASH_UNLOCK(ipss->ipsec_sel_hash, bucket); 3351 return (NULL); 3352 } 3353 3354 HASH_INSERT(sp, ipsl_hash, ipss->ipsec_sel_hash, bucket); 3355 sp->ipsl_refs = 2; /* one for hash table, one for caller */ 3356 sp->ipsl_key = *selkey; 3357 /* Set to uninitalized and have insertion into polhead fix things. */ 3358 if (selkey->ipsl_sel_hval != IPSEC_SEL_NOHASH) 3359 sp->ipsl_key.ipsl_pol_hval = 0; 3360 else 3361 sp->ipsl_key.ipsl_pol_hval = IPSEC_SEL_NOHASH; 3362 3363 HASH_UNLOCK(ipss->ipsec_sel_hash, bucket); 3364 3365 return (sp); 3366 } 3367 3368 static void 3369 ipsec_sel_rel(ipsec_sel_t **spp, netstack_t *ns) 3370 { 3371 ipsec_sel_t *sp = *spp; 3372 int hval = sp->ipsl_key.ipsl_sel_hval; 3373 ipsec_stack_t *ipss = ns->netstack_ipsec; 3374 3375 *spp = NULL; 3376 3377 if (hval == IPSEC_SEL_NOHASH) 3378 hval = 0; 3379 3380 ASSERT(!HASH_LOCKED(ipss->ipsec_sel_hash, hval)); 3381 HASH_LOCK(ipss->ipsec_sel_hash, hval); 3382 if (--sp->ipsl_refs == 1) { 3383 HASH_UNCHAIN(sp, ipsl_hash, ipss->ipsec_sel_hash, hval); 3384 sp->ipsl_refs--; 3385 HASH_UNLOCK(ipss->ipsec_sel_hash, hval); 3386 ASSERT(sp->ipsl_refs == 0); 3387 kmem_cache_free(ipsec_sel_cache, sp); 3388 /* Caller unlocks */ 3389 return; 3390 } 3391 3392 HASH_UNLOCK(ipss->ipsec_sel_hash, hval); 3393 } 3394 3395 /* 3396 * Free a policy rule which we know is no longer being referenced. 3397 */ 3398 void 3399 ipsec_policy_free(ipsec_policy_t *ipp, netstack_t *ns) 3400 { 3401 ASSERT(ipp->ipsp_refs == 0); 3402 ASSERT(ipp->ipsp_sel != NULL); 3403 ASSERT(ipp->ipsp_act != NULL); 3404 3405 ipsec_sel_rel(&ipp->ipsp_sel, ns); 3406 IPACT_REFRELE(ipp->ipsp_act); 3407 kmem_cache_free(ipsec_pol_cache, ipp); 3408 } 3409 3410 /* 3411 * Construction of new policy rules; construct a policy, and add it to 3412 * the appropriate tables. 3413 */ 3414 ipsec_policy_t * 3415 ipsec_policy_create(ipsec_selkey_t *keys, const ipsec_act_t *a, 3416 int nacts, int prio, uint64_t *index_ptr, netstack_t *ns) 3417 { 3418 ipsec_action_t *ap; 3419 ipsec_sel_t *sp; 3420 ipsec_policy_t *ipp; 3421 ipsec_stack_t *ipss = ns->netstack_ipsec; 3422 3423 if (index_ptr == NULL) 3424 index_ptr = &ipss->ipsec_next_policy_index; 3425 3426 ipp = kmem_cache_alloc(ipsec_pol_cache, KM_NOSLEEP); 3427 ap = ipsec_act_find(a, nacts, ns); 3428 sp = ipsec_find_sel(keys, ns); 3429 3430 if ((ap == NULL) || (sp == NULL) || (ipp == NULL)) { 3431 if (ap != NULL) { 3432 IPACT_REFRELE(ap); 3433 } 3434 if (sp != NULL) 3435 ipsec_sel_rel(&sp, ns); 3436 if (ipp != NULL) 3437 kmem_cache_free(ipsec_pol_cache, ipp); 3438 return (NULL); 3439 } 3440 3441 HASH_NULL(ipp, ipsp_hash); 3442 3443 ipp->ipsp_refs = 1; /* caller's reference */ 3444 ipp->ipsp_sel = sp; 3445 ipp->ipsp_act = ap; 3446 ipp->ipsp_prio = prio; /* rule priority */ 3447 ipp->ipsp_index = *index_ptr; 3448 (*index_ptr)++; 3449 3450 return (ipp); 3451 } 3452 3453 static void 3454 ipsec_update_present_flags(ipsec_stack_t *ipss) 3455 { 3456 boolean_t hashpol; 3457 3458 hashpol = (avl_numnodes(&ipss->ipsec_system_policy.iph_rulebyid) > 0); 3459 3460 if (hashpol) { 3461 ipss->ipsec_outbound_v4_policy_present = B_TRUE; 3462 ipss->ipsec_outbound_v6_policy_present = B_TRUE; 3463 ipss->ipsec_inbound_v4_policy_present = B_TRUE; 3464 ipss->ipsec_inbound_v6_policy_present = B_TRUE; 3465 return; 3466 } 3467 3468 ipss->ipsec_outbound_v4_policy_present = (NULL != 3469 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_OUTBOUND]. 3470 ipr_nonhash[IPSEC_AF_V4]); 3471 ipss->ipsec_outbound_v6_policy_present = (NULL != 3472 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_OUTBOUND]. 3473 ipr_nonhash[IPSEC_AF_V6]); 3474 ipss->ipsec_inbound_v4_policy_present = (NULL != 3475 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_INBOUND]. 3476 ipr_nonhash[IPSEC_AF_V4]); 3477 ipss->ipsec_inbound_v6_policy_present = (NULL != 3478 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_INBOUND]. 3479 ipr_nonhash[IPSEC_AF_V6]); 3480 } 3481 3482 boolean_t 3483 ipsec_policy_delete(ipsec_policy_head_t *php, ipsec_selkey_t *keys, int dir, 3484 netstack_t *ns) 3485 { 3486 ipsec_sel_t *sp; 3487 ipsec_policy_t *ip, *nip, *head; 3488 int af; 3489 ipsec_policy_root_t *pr = &php->iph_root[dir]; 3490 3491 sp = ipsec_find_sel(keys, ns); 3492 3493 if (sp == NULL) 3494 return (B_FALSE); 3495 3496 af = (sp->ipsl_key.ipsl_valid & IPSL_IPV4) ? IPSEC_AF_V4 : IPSEC_AF_V6; 3497 3498 rw_enter(&php->iph_lock, RW_WRITER); 3499 3500 if (sp->ipsl_key.ipsl_pol_hval == IPSEC_SEL_NOHASH) { 3501 head = pr->ipr_nonhash[af]; 3502 } else { 3503 head = pr->ipr_hash[sp->ipsl_key.ipsl_pol_hval].hash_head; 3504 } 3505 3506 for (ip = head; ip != NULL; ip = nip) { 3507 nip = ip->ipsp_hash.hash_next; 3508 if (ip->ipsp_sel != sp) { 3509 continue; 3510 } 3511 3512 IPPOL_UNCHAIN(php, ip, ns); 3513 3514 php->iph_gen++; 3515 ipsec_update_present_flags(ns->netstack_ipsec); 3516 3517 rw_exit(&php->iph_lock); 3518 3519 ipsec_sel_rel(&sp, ns); 3520 3521 return (B_TRUE); 3522 } 3523 3524 rw_exit(&php->iph_lock); 3525 ipsec_sel_rel(&sp, ns); 3526 return (B_FALSE); 3527 } 3528 3529 int 3530 ipsec_policy_delete_index(ipsec_policy_head_t *php, uint64_t policy_index, 3531 netstack_t *ns) 3532 { 3533 boolean_t found = B_FALSE; 3534 ipsec_policy_t ipkey; 3535 ipsec_policy_t *ip; 3536 avl_index_t where; 3537 3538 (void) memset(&ipkey, 0, sizeof (ipkey)); 3539 ipkey.ipsp_index = policy_index; 3540 3541 rw_enter(&php->iph_lock, RW_WRITER); 3542 3543 /* 3544 * We could be cleverer here about the walk. 3545 * but well, (k+1)*log(N) will do for now (k==number of matches, 3546 * N==number of table entries 3547 */ 3548 for (;;) { 3549 ip = (ipsec_policy_t *)avl_find(&php->iph_rulebyid, 3550 (void *)&ipkey, &where); 3551 ASSERT(ip == NULL); 3552 3553 ip = avl_nearest(&php->iph_rulebyid, where, AVL_AFTER); 3554 3555 if (ip == NULL) 3556 break; 3557 3558 if (ip->ipsp_index != policy_index) { 3559 ASSERT(ip->ipsp_index > policy_index); 3560 break; 3561 } 3562 3563 IPPOL_UNCHAIN(php, ip, ns); 3564 found = B_TRUE; 3565 } 3566 3567 if (found) { 3568 php->iph_gen++; 3569 ipsec_update_present_flags(ns->netstack_ipsec); 3570 } 3571 3572 rw_exit(&php->iph_lock); 3573 3574 return (found ? 0 : ENOENT); 3575 } 3576 3577 /* 3578 * Given a constructed ipsec_policy_t policy rule, see if it can be entered 3579 * into the correct policy ruleset. As a side-effect, it sets the hash 3580 * entries on "ipp"'s ipsp_pol_hval. 3581 * 3582 * Returns B_TRUE if it can be entered, B_FALSE if it can't be (because a 3583 * duplicate policy exists with exactly the same selectors), or an icmp 3584 * rule exists with a different encryption/authentication action. 3585 */ 3586 boolean_t 3587 ipsec_check_policy(ipsec_policy_head_t *php, ipsec_policy_t *ipp, int direction) 3588 { 3589 ipsec_policy_root_t *pr = &php->iph_root[direction]; 3590 int af = -1; 3591 ipsec_policy_t *p2, *head; 3592 uint8_t check_proto; 3593 ipsec_selkey_t *selkey = &ipp->ipsp_sel->ipsl_key; 3594 uint32_t valid = selkey->ipsl_valid; 3595 3596 if (valid & IPSL_IPV6) { 3597 ASSERT(!(valid & IPSL_IPV4)); 3598 af = IPSEC_AF_V6; 3599 check_proto = IPPROTO_ICMPV6; 3600 } else { 3601 ASSERT(valid & IPSL_IPV4); 3602 af = IPSEC_AF_V4; 3603 check_proto = IPPROTO_ICMP; 3604 } 3605 3606 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3607 3608 /* 3609 * Double-check that we don't have any duplicate selectors here. 3610 * Because selectors are interned below, we need only compare pointers 3611 * for equality. 3612 */ 3613 if (selkey->ipsl_sel_hval == IPSEC_SEL_NOHASH) { 3614 head = pr->ipr_nonhash[af]; 3615 } else { 3616 selkey->ipsl_pol_hval = 3617 (selkey->ipsl_valid & IPSL_IPV4) ? 3618 IPSEC_IPV4_HASH(selkey->ipsl_remote.ipsad_v4, 3619 pr->ipr_nchains) : 3620 IPSEC_IPV6_HASH(selkey->ipsl_remote.ipsad_v6, 3621 pr->ipr_nchains); 3622 3623 head = pr->ipr_hash[selkey->ipsl_pol_hval].hash_head; 3624 } 3625 3626 for (p2 = head; p2 != NULL; p2 = p2->ipsp_hash.hash_next) { 3627 if (p2->ipsp_sel == ipp->ipsp_sel) 3628 return (B_FALSE); 3629 } 3630 3631 /* 3632 * If it's ICMP and not a drop or pass rule, run through the ICMP 3633 * rules and make sure the action is either new or the same as any 3634 * other actions. We don't have to check the full chain because 3635 * discard and bypass will override all other actions 3636 */ 3637 3638 if (valid & IPSL_PROTOCOL && 3639 selkey->ipsl_proto == check_proto && 3640 (ipp->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_APPLY)) { 3641 3642 for (p2 = head; p2 != NULL; p2 = p2->ipsp_hash.hash_next) { 3643 3644 if (p2->ipsp_sel->ipsl_key.ipsl_valid & IPSL_PROTOCOL && 3645 p2->ipsp_sel->ipsl_key.ipsl_proto == check_proto && 3646 (p2->ipsp_act->ipa_act.ipa_type == 3647 IPSEC_ACT_APPLY)) { 3648 return (ipsec_compare_action(p2, ipp)); 3649 } 3650 } 3651 } 3652 3653 return (B_TRUE); 3654 } 3655 3656 /* 3657 * compare the action chains of two policies for equality 3658 * B_TRUE -> effective equality 3659 */ 3660 3661 static boolean_t 3662 ipsec_compare_action(ipsec_policy_t *p1, ipsec_policy_t *p2) 3663 { 3664 3665 ipsec_action_t *act1, *act2; 3666 3667 /* We have a valid rule. Let's compare the actions */ 3668 if (p1->ipsp_act == p2->ipsp_act) { 3669 /* same action. We are good */ 3670 return (B_TRUE); 3671 } 3672 3673 /* we have to walk the chain */ 3674 3675 act1 = p1->ipsp_act; 3676 act2 = p2->ipsp_act; 3677 3678 while (act1 != NULL && act2 != NULL) { 3679 3680 /* otherwise, Are we close enough? */ 3681 if (act1->ipa_allow_clear != act2->ipa_allow_clear || 3682 act1->ipa_want_ah != act2->ipa_want_ah || 3683 act1->ipa_want_esp != act2->ipa_want_esp || 3684 act1->ipa_want_se != act2->ipa_want_se) { 3685 /* Nope, we aren't */ 3686 return (B_FALSE); 3687 } 3688 3689 if (act1->ipa_want_ah) { 3690 if (act1->ipa_act.ipa_apply.ipp_auth_alg != 3691 act2->ipa_act.ipa_apply.ipp_auth_alg) { 3692 return (B_FALSE); 3693 } 3694 3695 if (act1->ipa_act.ipa_apply.ipp_ah_minbits != 3696 act2->ipa_act.ipa_apply.ipp_ah_minbits || 3697 act1->ipa_act.ipa_apply.ipp_ah_maxbits != 3698 act2->ipa_act.ipa_apply.ipp_ah_maxbits) { 3699 return (B_FALSE); 3700 } 3701 } 3702 3703 if (act1->ipa_want_esp) { 3704 if (act1->ipa_act.ipa_apply.ipp_use_esp != 3705 act2->ipa_act.ipa_apply.ipp_use_esp || 3706 act1->ipa_act.ipa_apply.ipp_use_espa != 3707 act2->ipa_act.ipa_apply.ipp_use_espa) { 3708 return (B_FALSE); 3709 } 3710 3711 if (act1->ipa_act.ipa_apply.ipp_use_esp) { 3712 if (act1->ipa_act.ipa_apply.ipp_encr_alg != 3713 act2->ipa_act.ipa_apply.ipp_encr_alg) { 3714 return (B_FALSE); 3715 } 3716 3717 if (act1->ipa_act.ipa_apply.ipp_espe_minbits != 3718 act2->ipa_act.ipa_apply.ipp_espe_minbits || 3719 act1->ipa_act.ipa_apply.ipp_espe_maxbits != 3720 act2->ipa_act.ipa_apply.ipp_espe_maxbits) { 3721 return (B_FALSE); 3722 } 3723 } 3724 3725 if (act1->ipa_act.ipa_apply.ipp_use_espa) { 3726 if (act1->ipa_act.ipa_apply.ipp_esp_auth_alg != 3727 act2->ipa_act.ipa_apply.ipp_esp_auth_alg) { 3728 return (B_FALSE); 3729 } 3730 3731 if (act1->ipa_act.ipa_apply.ipp_espa_minbits != 3732 act2->ipa_act.ipa_apply.ipp_espa_minbits || 3733 act1->ipa_act.ipa_apply.ipp_espa_maxbits != 3734 act2->ipa_act.ipa_apply.ipp_espa_maxbits) { 3735 return (B_FALSE); 3736 } 3737 } 3738 3739 } 3740 3741 act1 = act1->ipa_next; 3742 act2 = act2->ipa_next; 3743 } 3744 3745 if (act1 != NULL || act2 != NULL) { 3746 return (B_FALSE); 3747 } 3748 3749 return (B_TRUE); 3750 } 3751 3752 3753 /* 3754 * Given a constructed ipsec_policy_t policy rule, enter it into 3755 * the correct policy ruleset. 3756 * 3757 * ipsec_check_policy() is assumed to have succeeded first (to check for 3758 * duplicates). 3759 */ 3760 void 3761 ipsec_enter_policy(ipsec_policy_head_t *php, ipsec_policy_t *ipp, int direction, 3762 netstack_t *ns) 3763 { 3764 ipsec_policy_root_t *pr = &php->iph_root[direction]; 3765 ipsec_selkey_t *selkey = &ipp->ipsp_sel->ipsl_key; 3766 uint32_t valid = selkey->ipsl_valid; 3767 uint32_t hval = selkey->ipsl_pol_hval; 3768 int af = -1; 3769 3770 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3771 3772 if (valid & IPSL_IPV6) { 3773 ASSERT(!(valid & IPSL_IPV4)); 3774 af = IPSEC_AF_V6; 3775 } else { 3776 ASSERT(valid & IPSL_IPV4); 3777 af = IPSEC_AF_V4; 3778 } 3779 3780 php->iph_gen++; 3781 3782 if (hval == IPSEC_SEL_NOHASH) { 3783 HASHLIST_INSERT(ipp, ipsp_hash, pr->ipr_nonhash[af]); 3784 } else { 3785 HASH_LOCK(pr->ipr_hash, hval); 3786 HASH_INSERT(ipp, ipsp_hash, pr->ipr_hash, hval); 3787 HASH_UNLOCK(pr->ipr_hash, hval); 3788 } 3789 3790 ipsec_insert_always(&php->iph_rulebyid, ipp); 3791 3792 ipsec_update_present_flags(ns->netstack_ipsec); 3793 } 3794 3795 static void 3796 ipsec_ipr_flush(ipsec_policy_head_t *php, ipsec_policy_root_t *ipr, 3797 netstack_t *ns) 3798 { 3799 ipsec_policy_t *ip, *nip; 3800 int af, chain, nchain; 3801 3802 for (af = 0; af < IPSEC_NAF; af++) { 3803 for (ip = ipr->ipr_nonhash[af]; ip != NULL; ip = nip) { 3804 nip = ip->ipsp_hash.hash_next; 3805 IPPOL_UNCHAIN(php, ip, ns); 3806 } 3807 ipr->ipr_nonhash[af] = NULL; 3808 } 3809 nchain = ipr->ipr_nchains; 3810 3811 for (chain = 0; chain < nchain; chain++) { 3812 for (ip = ipr->ipr_hash[chain].hash_head; ip != NULL; 3813 ip = nip) { 3814 nip = ip->ipsp_hash.hash_next; 3815 IPPOL_UNCHAIN(php, ip, ns); 3816 } 3817 ipr->ipr_hash[chain].hash_head = NULL; 3818 } 3819 } 3820 3821 void 3822 ipsec_polhead_flush(ipsec_policy_head_t *php, netstack_t *ns) 3823 { 3824 int dir; 3825 3826 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3827 3828 for (dir = 0; dir < IPSEC_NTYPES; dir++) 3829 ipsec_ipr_flush(php, &php->iph_root[dir], ns); 3830 3831 ipsec_update_present_flags(ns->netstack_ipsec); 3832 } 3833 3834 void 3835 ipsec_polhead_free(ipsec_policy_head_t *php, netstack_t *ns) 3836 { 3837 int dir; 3838 3839 ASSERT(php->iph_refs == 0); 3840 3841 rw_enter(&php->iph_lock, RW_WRITER); 3842 ipsec_polhead_flush(php, ns); 3843 rw_exit(&php->iph_lock); 3844 rw_destroy(&php->iph_lock); 3845 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 3846 ipsec_policy_root_t *ipr = &php->iph_root[dir]; 3847 int chain; 3848 3849 for (chain = 0; chain < ipr->ipr_nchains; chain++) 3850 mutex_destroy(&(ipr->ipr_hash[chain].hash_lock)); 3851 3852 } 3853 ipsec_polhead_free_table(php); 3854 kmem_free(php, sizeof (*php)); 3855 } 3856 3857 static void 3858 ipsec_ipr_init(ipsec_policy_root_t *ipr) 3859 { 3860 int af; 3861 3862 ipr->ipr_nchains = 0; 3863 ipr->ipr_hash = NULL; 3864 3865 for (af = 0; af < IPSEC_NAF; af++) { 3866 ipr->ipr_nonhash[af] = NULL; 3867 } 3868 } 3869 3870 ipsec_policy_head_t * 3871 ipsec_polhead_create(void) 3872 { 3873 ipsec_policy_head_t *php; 3874 3875 php = kmem_alloc(sizeof (*php), KM_NOSLEEP); 3876 if (php == NULL) 3877 return (php); 3878 3879 rw_init(&php->iph_lock, NULL, RW_DEFAULT, NULL); 3880 php->iph_refs = 1; 3881 php->iph_gen = 0; 3882 3883 ipsec_ipr_init(&php->iph_root[IPSEC_TYPE_INBOUND]); 3884 ipsec_ipr_init(&php->iph_root[IPSEC_TYPE_OUTBOUND]); 3885 3886 avl_create(&php->iph_rulebyid, ipsec_policy_cmpbyid, 3887 sizeof (ipsec_policy_t), offsetof(ipsec_policy_t, ipsp_byid)); 3888 3889 return (php); 3890 } 3891 3892 /* 3893 * Clone the policy head into a new polhead; release one reference to the 3894 * old one and return the only reference to the new one. 3895 * If the old one had a refcount of 1, just return it. 3896 */ 3897 ipsec_policy_head_t * 3898 ipsec_polhead_split(ipsec_policy_head_t *php, netstack_t *ns) 3899 { 3900 ipsec_policy_head_t *nphp; 3901 3902 if (php == NULL) 3903 return (ipsec_polhead_create()); 3904 else if (php->iph_refs == 1) 3905 return (php); 3906 3907 nphp = ipsec_polhead_create(); 3908 if (nphp == NULL) 3909 return (NULL); 3910 3911 if (ipsec_copy_polhead(php, nphp, ns) != 0) { 3912 ipsec_polhead_free(nphp, ns); 3913 return (NULL); 3914 } 3915 IPPH_REFRELE(php, ns); 3916 return (nphp); 3917 } 3918 3919 /* 3920 * When sending a response to a ICMP request or generating a RST 3921 * in the TCP case, the outbound packets need to go at the same level 3922 * of protection as the incoming ones i.e we associate our outbound 3923 * policy with how the packet came in. We call this after we have 3924 * accepted the incoming packet which may or may not have been in 3925 * clear and hence we are sending the reply back with the policy 3926 * matching the incoming datagram's policy. 3927 * 3928 * NOTE : This technology serves two purposes : 3929 * 3930 * 1) If we have multiple outbound policies, we send out a reply 3931 * matching with how it came in rather than matching the outbound 3932 * policy. 3933 * 3934 * 2) For assymetric policies, we want to make sure that incoming 3935 * and outgoing has the same level of protection. Assymetric 3936 * policies exist only with global policy where we may not have 3937 * both outbound and inbound at the same time. 3938 * 3939 * NOTE2: This function is called by cleartext cases, so it needs to be 3940 * in IP proper. 3941 */ 3942 boolean_t 3943 ipsec_in_to_out(mblk_t *ipsec_mp, ipha_t *ipha, ip6_t *ip6h) 3944 { 3945 ipsec_in_t *ii; 3946 ipsec_out_t *io; 3947 boolean_t v4; 3948 mblk_t *mp; 3949 boolean_t secure, attach_if; 3950 uint_t ifindex; 3951 ipsec_selector_t sel; 3952 ipsec_action_t *reflect_action = NULL; 3953 zoneid_t zoneid; 3954 netstack_t *ns; 3955 3956 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 3957 3958 bzero((void*)&sel, sizeof (sel)); 3959 3960 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 3961 3962 mp = ipsec_mp->b_cont; 3963 ASSERT(mp != NULL); 3964 3965 if (ii->ipsec_in_action != NULL) { 3966 /* transfer reference.. */ 3967 reflect_action = ii->ipsec_in_action; 3968 ii->ipsec_in_action = NULL; 3969 } else if (!ii->ipsec_in_loopback) 3970 reflect_action = ipsec_in_to_out_action(ii); 3971 secure = ii->ipsec_in_secure; 3972 attach_if = ii->ipsec_in_attach_if; 3973 ifindex = ii->ipsec_in_ill_index; 3974 zoneid = ii->ipsec_in_zoneid; 3975 ASSERT(zoneid != ALL_ZONES); 3976 ns = ii->ipsec_in_ns; 3977 v4 = ii->ipsec_in_v4; 3978 3979 ipsec_in_release_refs(ii); /* No netstack_rele/hold needed */ 3980 3981 /* 3982 * The caller is going to send the datagram out which might 3983 * go on the wire or delivered locally through ip_wput_local. 3984 * 3985 * 1) If it goes out on the wire, new associations will be 3986 * obtained. 3987 * 2) If it is delivered locally, ip_wput_local will convert 3988 * this IPSEC_OUT to a IPSEC_IN looking at the requests. 3989 */ 3990 3991 io = (ipsec_out_t *)ipsec_mp->b_rptr; 3992 bzero(io, sizeof (ipsec_out_t)); 3993 io->ipsec_out_type = IPSEC_OUT; 3994 io->ipsec_out_len = sizeof (ipsec_out_t); 3995 io->ipsec_out_frtn.free_func = ipsec_out_free; 3996 io->ipsec_out_frtn.free_arg = (char *)io; 3997 io->ipsec_out_act = reflect_action; 3998 3999 if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0, 4000 ns->netstack_ipsec)) 4001 return (B_FALSE); 4002 4003 io->ipsec_out_src_port = sel.ips_local_port; 4004 io->ipsec_out_dst_port = sel.ips_remote_port; 4005 io->ipsec_out_proto = sel.ips_protocol; 4006 io->ipsec_out_icmp_type = sel.ips_icmp_type; 4007 io->ipsec_out_icmp_code = sel.ips_icmp_code; 4008 4009 /* 4010 * Don't use global policy for this, as we want 4011 * to use the same protection that was applied to the inbound packet. 4012 */ 4013 io->ipsec_out_use_global_policy = B_FALSE; 4014 io->ipsec_out_proc_begin = B_FALSE; 4015 io->ipsec_out_secure = secure; 4016 io->ipsec_out_v4 = v4; 4017 io->ipsec_out_attach_if = attach_if; 4018 io->ipsec_out_ill_index = ifindex; 4019 io->ipsec_out_zoneid = zoneid; 4020 io->ipsec_out_ns = ns; /* No netstack_hold */ 4021 4022 return (B_TRUE); 4023 } 4024 4025 mblk_t * 4026 ipsec_in_tag(mblk_t *mp, mblk_t *cont, netstack_t *ns) 4027 { 4028 ipsec_in_t *ii = (ipsec_in_t *)mp->b_rptr; 4029 ipsec_in_t *nii; 4030 mblk_t *nmp; 4031 frtn_t nfrtn; 4032 ipsec_stack_t *ipss = ns->netstack_ipsec; 4033 4034 ASSERT(ii->ipsec_in_type == IPSEC_IN); 4035 ASSERT(ii->ipsec_in_len == sizeof (ipsec_in_t)); 4036 4037 nmp = ipsec_in_alloc(ii->ipsec_in_v4, ns); 4038 if (nmp == NULL) { 4039 ip_drop_packet_chain(cont, B_FALSE, NULL, NULL, 4040 DROPPER(ipss, ipds_spd_nomem), 4041 &ipss->ipsec_spd_dropper); 4042 return (NULL); 4043 } 4044 4045 ASSERT(nmp->b_datap->db_type == M_CTL); 4046 ASSERT(nmp->b_wptr == (nmp->b_rptr + sizeof (ipsec_info_t))); 4047 4048 /* 4049 * Bump refcounts. 4050 */ 4051 if (ii->ipsec_in_ah_sa != NULL) 4052 IPSA_REFHOLD(ii->ipsec_in_ah_sa); 4053 if (ii->ipsec_in_esp_sa != NULL) 4054 IPSA_REFHOLD(ii->ipsec_in_esp_sa); 4055 if (ii->ipsec_in_policy != NULL) 4056 IPPH_REFHOLD(ii->ipsec_in_policy); 4057 4058 /* 4059 * Copy everything, but preserve the free routine provided by 4060 * ipsec_in_alloc(). 4061 */ 4062 nii = (ipsec_in_t *)nmp->b_rptr; 4063 nfrtn = nii->ipsec_in_frtn; 4064 bcopy(ii, nii, sizeof (*ii)); 4065 nii->ipsec_in_frtn = nfrtn; 4066 4067 nmp->b_cont = cont; 4068 4069 return (nmp); 4070 } 4071 4072 mblk_t * 4073 ipsec_out_tag(mblk_t *mp, mblk_t *cont, netstack_t *ns) 4074 { 4075 ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; 4076 ipsec_out_t *nio; 4077 mblk_t *nmp; 4078 frtn_t nfrtn; 4079 ipsec_stack_t *ipss = ns->netstack_ipsec; 4080 4081 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4082 ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t)); 4083 4084 nmp = ipsec_alloc_ipsec_out(ns); 4085 if (nmp == NULL) { 4086 ip_drop_packet_chain(cont, B_FALSE, NULL, NULL, 4087 DROPPER(ipss, ipds_spd_nomem), 4088 &ipss->ipsec_spd_dropper); 4089 return (NULL); 4090 } 4091 ASSERT(nmp->b_datap->db_type == M_CTL); 4092 ASSERT(nmp->b_wptr == (nmp->b_rptr + sizeof (ipsec_info_t))); 4093 4094 /* 4095 * Bump refcounts. 4096 */ 4097 if (io->ipsec_out_ah_sa != NULL) 4098 IPSA_REFHOLD(io->ipsec_out_ah_sa); 4099 if (io->ipsec_out_esp_sa != NULL) 4100 IPSA_REFHOLD(io->ipsec_out_esp_sa); 4101 if (io->ipsec_out_polhead != NULL) 4102 IPPH_REFHOLD(io->ipsec_out_polhead); 4103 if (io->ipsec_out_policy != NULL) 4104 IPPOL_REFHOLD(io->ipsec_out_policy); 4105 if (io->ipsec_out_act != NULL) 4106 IPACT_REFHOLD(io->ipsec_out_act); 4107 if (io->ipsec_out_latch != NULL) 4108 IPLATCH_REFHOLD(io->ipsec_out_latch); 4109 if (io->ipsec_out_cred != NULL) 4110 crhold(io->ipsec_out_cred); 4111 4112 /* 4113 * Copy everything, but preserve the free routine provided by 4114 * ipsec_alloc_ipsec_out(). 4115 */ 4116 nio = (ipsec_out_t *)nmp->b_rptr; 4117 nfrtn = nio->ipsec_out_frtn; 4118 bcopy(io, nio, sizeof (*io)); 4119 nio->ipsec_out_frtn = nfrtn; 4120 4121 nmp->b_cont = cont; 4122 4123 return (nmp); 4124 } 4125 4126 static void 4127 ipsec_out_release_refs(ipsec_out_t *io) 4128 { 4129 netstack_t *ns = io->ipsec_out_ns; 4130 4131 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4132 ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t)); 4133 ASSERT(io->ipsec_out_ns != NULL); 4134 4135 /* Note: IPSA_REFRELE is multi-line macro */ 4136 if (io->ipsec_out_ah_sa != NULL) 4137 IPSA_REFRELE(io->ipsec_out_ah_sa); 4138 if (io->ipsec_out_esp_sa != NULL) 4139 IPSA_REFRELE(io->ipsec_out_esp_sa); 4140 if (io->ipsec_out_polhead != NULL) 4141 IPPH_REFRELE(io->ipsec_out_polhead, ns); 4142 if (io->ipsec_out_policy != NULL) 4143 IPPOL_REFRELE(io->ipsec_out_policy, ns); 4144 if (io->ipsec_out_act != NULL) 4145 IPACT_REFRELE(io->ipsec_out_act); 4146 if (io->ipsec_out_cred != NULL) { 4147 crfree(io->ipsec_out_cred); 4148 io->ipsec_out_cred = NULL; 4149 } 4150 if (io->ipsec_out_latch) { 4151 IPLATCH_REFRELE(io->ipsec_out_latch, ns); 4152 io->ipsec_out_latch = NULL; 4153 } 4154 } 4155 4156 static void 4157 ipsec_out_free(void *arg) 4158 { 4159 ipsec_out_t *io = (ipsec_out_t *)arg; 4160 ipsec_out_release_refs(io); 4161 kmem_cache_free(ipsec_info_cache, arg); 4162 } 4163 4164 static void 4165 ipsec_in_release_refs(ipsec_in_t *ii) 4166 { 4167 netstack_t *ns = ii->ipsec_in_ns; 4168 4169 ASSERT(ii->ipsec_in_ns != NULL); 4170 4171 /* Note: IPSA_REFRELE is multi-line macro */ 4172 if (ii->ipsec_in_ah_sa != NULL) 4173 IPSA_REFRELE(ii->ipsec_in_ah_sa); 4174 if (ii->ipsec_in_esp_sa != NULL) 4175 IPSA_REFRELE(ii->ipsec_in_esp_sa); 4176 if (ii->ipsec_in_policy != NULL) 4177 IPPH_REFRELE(ii->ipsec_in_policy, ns); 4178 if (ii->ipsec_in_da != NULL) { 4179 freeb(ii->ipsec_in_da); 4180 ii->ipsec_in_da = NULL; 4181 } 4182 } 4183 4184 static void 4185 ipsec_in_free(void *arg) 4186 { 4187 ipsec_in_t *ii = (ipsec_in_t *)arg; 4188 ipsec_in_release_refs(ii); 4189 kmem_cache_free(ipsec_info_cache, arg); 4190 } 4191 4192 /* 4193 * This is called only for outbound datagrams if the datagram needs to 4194 * go out secure. A NULL mp can be passed to get an ipsec_out. This 4195 * facility is used by ip_unbind. 4196 * 4197 * NOTE : o As the data part could be modified by ipsec_out_process etc. 4198 * we can't make it fast by calling a dup. 4199 */ 4200 mblk_t * 4201 ipsec_alloc_ipsec_out(netstack_t *ns) 4202 { 4203 mblk_t *ipsec_mp; 4204 ipsec_out_t *io = kmem_cache_alloc(ipsec_info_cache, KM_NOSLEEP); 4205 4206 if (io == NULL) 4207 return (NULL); 4208 4209 bzero(io, sizeof (ipsec_out_t)); 4210 4211 io->ipsec_out_type = IPSEC_OUT; 4212 io->ipsec_out_len = sizeof (ipsec_out_t); 4213 io->ipsec_out_frtn.free_func = ipsec_out_free; 4214 io->ipsec_out_frtn.free_arg = (char *)io; 4215 4216 /* 4217 * Set the zoneid to ALL_ZONES which is used as an invalid value. Code 4218 * using ipsec_out_zoneid should assert that the zoneid has been set to 4219 * a sane value. 4220 */ 4221 io->ipsec_out_zoneid = ALL_ZONES; 4222 io->ipsec_out_ns = ns; /* No netstack_hold */ 4223 4224 ipsec_mp = desballoc((uint8_t *)io, sizeof (ipsec_info_t), BPRI_HI, 4225 &io->ipsec_out_frtn); 4226 if (ipsec_mp == NULL) { 4227 ipsec_out_free(io); 4228 4229 return (NULL); 4230 } 4231 ipsec_mp->b_datap->db_type = M_CTL; 4232 ipsec_mp->b_wptr = ipsec_mp->b_rptr + sizeof (ipsec_info_t); 4233 4234 return (ipsec_mp); 4235 } 4236 4237 /* 4238 * Attach an IPSEC_OUT; use pol for policy if it is non-null. 4239 * Otherwise initialize using conn. 4240 * 4241 * If pol is non-null, we consume a reference to it. 4242 */ 4243 mblk_t * 4244 ipsec_attach_ipsec_out(mblk_t *mp, conn_t *connp, ipsec_policy_t *pol, 4245 uint8_t proto, netstack_t *ns) 4246 { 4247 mblk_t *ipsec_mp; 4248 ipsec_stack_t *ipss = ns->netstack_ipsec; 4249 4250 ASSERT((pol != NULL) || (connp != NULL)); 4251 4252 ipsec_mp = ipsec_alloc_ipsec_out(ns); 4253 if (ipsec_mp == NULL) { 4254 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, SL_ERROR|SL_NOTE, 4255 "ipsec_attach_ipsec_out: Allocation failure\n"); 4256 ip_drop_packet(mp, B_FALSE, NULL, NULL, 4257 DROPPER(ipss, ipds_spd_nomem), 4258 &ipss->ipsec_spd_dropper); 4259 return (NULL); 4260 } 4261 ipsec_mp->b_cont = mp; 4262 return (ipsec_init_ipsec_out(ipsec_mp, connp, pol, proto, ns)); 4263 } 4264 4265 /* 4266 * Initialize the IPSEC_OUT (ipsec_mp) using pol if it is non-null. 4267 * Otherwise initialize using conn. 4268 * 4269 * If pol is non-null, we consume a reference to it. 4270 */ 4271 mblk_t * 4272 ipsec_init_ipsec_out(mblk_t *ipsec_mp, conn_t *connp, ipsec_policy_t *pol, 4273 uint8_t proto, netstack_t *ns) 4274 { 4275 mblk_t *mp; 4276 ipsec_out_t *io; 4277 ipsec_policy_t *p; 4278 ipha_t *ipha; 4279 ip6_t *ip6h; 4280 ipsec_stack_t *ipss = ns->netstack_ipsec; 4281 4282 ASSERT((pol != NULL) || (connp != NULL)); 4283 4284 /* 4285 * If mp is NULL, we won't/should not be using it. 4286 */ 4287 mp = ipsec_mp->b_cont; 4288 4289 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 4290 ASSERT(ipsec_mp->b_wptr == (ipsec_mp->b_rptr + sizeof (ipsec_info_t))); 4291 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4292 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4293 ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t)); 4294 io->ipsec_out_latch = NULL; 4295 /* 4296 * Set the zoneid when we have the connp. 4297 * Otherwise, we're called from ip_wput_attach_policy() who will take 4298 * care of setting the zoneid. 4299 */ 4300 if (connp != NULL) 4301 io->ipsec_out_zoneid = connp->conn_zoneid; 4302 4303 io->ipsec_out_ns = ns; /* No netstack_hold */ 4304 4305 if (mp != NULL) { 4306 ipha = (ipha_t *)mp->b_rptr; 4307 if (IPH_HDR_VERSION(ipha) == IP_VERSION) { 4308 io->ipsec_out_v4 = B_TRUE; 4309 ip6h = NULL; 4310 } else { 4311 io->ipsec_out_v4 = B_FALSE; 4312 ip6h = (ip6_t *)ipha; 4313 ipha = NULL; 4314 } 4315 } else { 4316 ASSERT(connp != NULL && connp->conn_policy_cached); 4317 ip6h = NULL; 4318 ipha = NULL; 4319 io->ipsec_out_v4 = !connp->conn_pkt_isv6; 4320 } 4321 4322 p = NULL; 4323 4324 /* 4325 * Take latched policies over global policy. Check here again for 4326 * this, in case we had conn_latch set while the packet was flying 4327 * around in IP. 4328 */ 4329 if (connp != NULL && connp->conn_latch != NULL) { 4330 ASSERT(ns == connp->conn_netstack); 4331 p = connp->conn_latch->ipl_out_policy; 4332 io->ipsec_out_latch = connp->conn_latch; 4333 IPLATCH_REFHOLD(connp->conn_latch); 4334 if (p != NULL) { 4335 IPPOL_REFHOLD(p); 4336 } 4337 io->ipsec_out_src_port = connp->conn_lport; 4338 io->ipsec_out_dst_port = connp->conn_fport; 4339 io->ipsec_out_icmp_type = io->ipsec_out_icmp_code = 0; 4340 if (pol != NULL) 4341 IPPOL_REFRELE(pol, ns); 4342 } else if (pol != NULL) { 4343 ipsec_selector_t sel; 4344 4345 bzero((void*)&sel, sizeof (sel)); 4346 4347 p = pol; 4348 /* 4349 * conn does not have the port information. Get 4350 * it from the packet. 4351 */ 4352 4353 if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0, 4354 ns->netstack_ipsec)) { 4355 /* Callee did ip_drop_packet(). */ 4356 return (NULL); 4357 } 4358 io->ipsec_out_src_port = sel.ips_local_port; 4359 io->ipsec_out_dst_port = sel.ips_remote_port; 4360 io->ipsec_out_icmp_type = sel.ips_icmp_type; 4361 io->ipsec_out_icmp_code = sel.ips_icmp_code; 4362 } 4363 4364 io->ipsec_out_proto = proto; 4365 io->ipsec_out_use_global_policy = B_TRUE; 4366 io->ipsec_out_secure = (p != NULL); 4367 io->ipsec_out_policy = p; 4368 4369 if (p == NULL) { 4370 if (connp->conn_policy != NULL) { 4371 io->ipsec_out_secure = B_TRUE; 4372 ASSERT(io->ipsec_out_latch == NULL); 4373 ASSERT(io->ipsec_out_use_global_policy == B_TRUE); 4374 io->ipsec_out_need_policy = B_TRUE; 4375 ASSERT(io->ipsec_out_polhead == NULL); 4376 IPPH_REFHOLD(connp->conn_policy); 4377 io->ipsec_out_polhead = connp->conn_policy; 4378 } 4379 } else { 4380 /* Handle explicit drop action. */ 4381 if (p->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_DISCARD || 4382 p->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_REJECT) { 4383 ip_drop_packet(ipsec_mp, B_FALSE, NULL, NULL, 4384 DROPPER(ipss, ipds_spd_explicit), 4385 &ipss->ipsec_spd_dropper); 4386 ipsec_mp = NULL; 4387 } 4388 } 4389 4390 return (ipsec_mp); 4391 } 4392 4393 /* 4394 * Allocate an IPSEC_IN mblk. This will be prepended to an inbound datagram 4395 * and keep track of what-if-any IPsec processing will be applied to the 4396 * datagram. 4397 */ 4398 mblk_t * 4399 ipsec_in_alloc(boolean_t isv4, netstack_t *ns) 4400 { 4401 mblk_t *ipsec_in; 4402 ipsec_in_t *ii = kmem_cache_alloc(ipsec_info_cache, KM_NOSLEEP); 4403 4404 if (ii == NULL) 4405 return (NULL); 4406 4407 bzero(ii, sizeof (ipsec_info_t)); 4408 ii->ipsec_in_type = IPSEC_IN; 4409 ii->ipsec_in_len = sizeof (ipsec_in_t); 4410 4411 ii->ipsec_in_v4 = isv4; 4412 ii->ipsec_in_secure = B_TRUE; 4413 ii->ipsec_in_ns = ns; /* No netstack_hold */ 4414 4415 ii->ipsec_in_frtn.free_func = ipsec_in_free; 4416 ii->ipsec_in_frtn.free_arg = (char *)ii; 4417 4418 ipsec_in = desballoc((uint8_t *)ii, sizeof (ipsec_info_t), BPRI_HI, 4419 &ii->ipsec_in_frtn); 4420 if (ipsec_in == NULL) { 4421 ip1dbg(("ipsec_in_alloc: IPSEC_IN allocation failure.\n")); 4422 ipsec_in_free(ii); 4423 return (NULL); 4424 } 4425 4426 ipsec_in->b_datap->db_type = M_CTL; 4427 ipsec_in->b_wptr += sizeof (ipsec_info_t); 4428 4429 return (ipsec_in); 4430 } 4431 4432 /* 4433 * This is called from ip_wput_local when a packet which needs 4434 * security is looped back, to convert the IPSEC_OUT to a IPSEC_IN 4435 * before fanout, where the policy check happens. In most of the 4436 * cases, IPSEC processing has *never* been done. There is one case 4437 * (ip_wput_ire_fragmentit -> ip_wput_frag -> icmp_frag_needed) where 4438 * the packet is destined for localhost, IPSEC processing has already 4439 * been done. 4440 * 4441 * Future: This could happen after SA selection has occurred for 4442 * outbound.. which will tell us who the src and dst identities are.. 4443 * Then it's just a matter of splicing the ah/esp SA pointers from the 4444 * ipsec_out_t to the ipsec_in_t. 4445 */ 4446 void 4447 ipsec_out_to_in(mblk_t *ipsec_mp) 4448 { 4449 ipsec_in_t *ii; 4450 ipsec_out_t *io; 4451 ipsec_policy_t *pol; 4452 ipsec_action_t *act; 4453 boolean_t v4, icmp_loopback; 4454 zoneid_t zoneid; 4455 netstack_t *ns; 4456 4457 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 4458 4459 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4460 4461 v4 = io->ipsec_out_v4; 4462 zoneid = io->ipsec_out_zoneid; 4463 icmp_loopback = io->ipsec_out_icmp_loopback; 4464 ns = io->ipsec_out_ns; 4465 4466 act = io->ipsec_out_act; 4467 if (act == NULL) { 4468 pol = io->ipsec_out_policy; 4469 if (pol != NULL) { 4470 act = pol->ipsp_act; 4471 IPACT_REFHOLD(act); 4472 } 4473 } 4474 io->ipsec_out_act = NULL; 4475 4476 ipsec_out_release_refs(io); /* No netstack_rele/hold needed */ 4477 4478 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 4479 bzero(ii, sizeof (ipsec_in_t)); 4480 ii->ipsec_in_type = IPSEC_IN; 4481 ii->ipsec_in_len = sizeof (ipsec_in_t); 4482 ii->ipsec_in_loopback = B_TRUE; 4483 ii->ipsec_in_ns = ns; /* No netstack_hold */ 4484 4485 ii->ipsec_in_frtn.free_func = ipsec_in_free; 4486 ii->ipsec_in_frtn.free_arg = (char *)ii; 4487 ii->ipsec_in_action = act; 4488 ii->ipsec_in_zoneid = zoneid; 4489 4490 /* 4491 * In most of the cases, we can't look at the ipsec_out_XXX_sa 4492 * because this never went through IPSEC processing. So, look at 4493 * the requests and infer whether it would have gone through 4494 * IPSEC processing or not. Initialize the "done" fields with 4495 * the requests. The possible values for "done" fields are : 4496 * 4497 * 1) zero, indicates that a particular preference was never 4498 * requested. 4499 * 2) non-zero, indicates that it could be IPSEC_PREF_REQUIRED/ 4500 * IPSEC_PREF_NEVER. If IPSEC_REQ_DONE is set, it means that 4501 * IPSEC processing has been completed. 4502 */ 4503 ii->ipsec_in_secure = B_TRUE; 4504 ii->ipsec_in_v4 = v4; 4505 ii->ipsec_in_icmp_loopback = icmp_loopback; 4506 ii->ipsec_in_attach_if = B_FALSE; 4507 } 4508 4509 /* 4510 * Consults global policy to see whether this datagram should 4511 * go out secure. If so it attaches a ipsec_mp in front and 4512 * returns. 4513 */ 4514 mblk_t * 4515 ip_wput_attach_policy(mblk_t *ipsec_mp, ipha_t *ipha, ip6_t *ip6h, ire_t *ire, 4516 conn_t *connp, boolean_t unspec_src, zoneid_t zoneid) 4517 { 4518 mblk_t *mp; 4519 ipsec_out_t *io = NULL; 4520 ipsec_selector_t sel; 4521 uint_t ill_index; 4522 boolean_t conn_dontroutex; 4523 boolean_t conn_multicast_loopx; 4524 boolean_t policy_present; 4525 ip_stack_t *ipst = ire->ire_ipst; 4526 netstack_t *ns = ipst->ips_netstack; 4527 ipsec_stack_t *ipss = ns->netstack_ipsec; 4528 4529 ASSERT((ipha != NULL && ip6h == NULL) || 4530 (ip6h != NULL && ipha == NULL)); 4531 4532 bzero((void*)&sel, sizeof (sel)); 4533 4534 if (ipha != NULL) 4535 policy_present = ipss->ipsec_outbound_v4_policy_present; 4536 else 4537 policy_present = ipss->ipsec_outbound_v6_policy_present; 4538 /* 4539 * Fast Path to see if there is any policy. 4540 */ 4541 if (!policy_present) { 4542 if (ipsec_mp->b_datap->db_type == M_CTL) { 4543 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4544 if (!io->ipsec_out_secure) { 4545 /* 4546 * If there is no global policy and ip_wput 4547 * or ip_wput_multicast has attached this mp 4548 * for multicast case, free the ipsec_mp and 4549 * return the original mp. 4550 */ 4551 mp = ipsec_mp->b_cont; 4552 freeb(ipsec_mp); 4553 ipsec_mp = mp; 4554 io = NULL; 4555 } 4556 ASSERT(io == NULL || !io->ipsec_out_tunnel); 4557 } 4558 if (((io == NULL) || (io->ipsec_out_polhead == NULL)) && 4559 ((connp == NULL) || (connp->conn_policy == NULL))) 4560 return (ipsec_mp); 4561 } 4562 4563 ill_index = 0; 4564 conn_multicast_loopx = conn_dontroutex = B_FALSE; 4565 mp = ipsec_mp; 4566 if (ipsec_mp->b_datap->db_type == M_CTL) { 4567 mp = ipsec_mp->b_cont; 4568 /* 4569 * This is a connection where we have some per-socket 4570 * policy or ip_wput has attached an ipsec_mp for 4571 * the multicast datagram. 4572 */ 4573 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4574 if (!io->ipsec_out_secure) { 4575 /* 4576 * This ipsec_mp was allocated in ip_wput or 4577 * ip_wput_multicast so that we will know the 4578 * value of ill_index, conn_dontroute, 4579 * conn_multicast_loop in the multicast case if 4580 * we inherit global policy here. 4581 */ 4582 ill_index = io->ipsec_out_ill_index; 4583 conn_dontroutex = io->ipsec_out_dontroute; 4584 conn_multicast_loopx = io->ipsec_out_multicast_loop; 4585 freeb(ipsec_mp); 4586 ipsec_mp = mp; 4587 io = NULL; 4588 } 4589 ASSERT(io == NULL || !io->ipsec_out_tunnel); 4590 } 4591 4592 if (ipha != NULL) { 4593 sel.ips_local_addr_v4 = (ipha->ipha_src != 0 ? 4594 ipha->ipha_src : ire->ire_src_addr); 4595 sel.ips_remote_addr_v4 = ip_get_dst(ipha); 4596 sel.ips_protocol = (uint8_t)ipha->ipha_protocol; 4597 sel.ips_isv4 = B_TRUE; 4598 } else { 4599 ushort_t hdr_len; 4600 uint8_t *nexthdrp; 4601 boolean_t is_fragment; 4602 4603 sel.ips_isv4 = B_FALSE; 4604 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4605 if (!unspec_src) 4606 sel.ips_local_addr_v6 = ire->ire_src_addr_v6; 4607 } else { 4608 sel.ips_local_addr_v6 = ip6h->ip6_src; 4609 } 4610 4611 sel.ips_remote_addr_v6 = ip_get_dst_v6(ip6h, &is_fragment); 4612 if (is_fragment) { 4613 /* 4614 * It's a packet fragment for a packet that 4615 * we have already processed (since IPsec processing 4616 * is done before fragmentation), so we don't 4617 * have to do policy checks again. Fragments can 4618 * come back to us for processing if they have 4619 * been queued up due to flow control. 4620 */ 4621 if (ipsec_mp->b_datap->db_type == M_CTL) { 4622 mp = ipsec_mp->b_cont; 4623 freeb(ipsec_mp); 4624 ipsec_mp = mp; 4625 } 4626 return (ipsec_mp); 4627 } 4628 4629 /* IPv6 common-case. */ 4630 sel.ips_protocol = ip6h->ip6_nxt; 4631 switch (ip6h->ip6_nxt) { 4632 case IPPROTO_TCP: 4633 case IPPROTO_UDP: 4634 case IPPROTO_SCTP: 4635 case IPPROTO_ICMPV6: 4636 break; 4637 default: 4638 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 4639 &hdr_len, &nexthdrp)) { 4640 BUMP_MIB(&ipst->ips_ip6_mib, 4641 ipIfStatsOutDiscards); 4642 freemsg(ipsec_mp); /* Not IPsec-related drop. */ 4643 return (NULL); 4644 } 4645 sel.ips_protocol = *nexthdrp; 4646 break; 4647 } 4648 } 4649 4650 if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0, ipss)) { 4651 if (ipha != NULL) { 4652 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 4653 } else { 4654 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 4655 } 4656 4657 /* Callee dropped the packet. */ 4658 return (NULL); 4659 } 4660 4661 if (io != NULL) { 4662 /* 4663 * We seem to have some local policy (we already have 4664 * an ipsec_out). Look at global policy and see 4665 * whether we have to inherit or not. 4666 */ 4667 io->ipsec_out_need_policy = B_FALSE; 4668 ipsec_mp = ipsec_apply_global_policy(ipsec_mp, connp, 4669 &sel, ns); 4670 ASSERT((io->ipsec_out_policy != NULL) || 4671 (io->ipsec_out_act != NULL)); 4672 ASSERT(io->ipsec_out_need_policy == B_FALSE); 4673 return (ipsec_mp); 4674 } 4675 ipsec_mp = ipsec_attach_global_policy(mp, connp, &sel, ns); 4676 if (ipsec_mp == NULL) 4677 return (mp); 4678 4679 /* 4680 * Copy the right port information. 4681 */ 4682 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 4683 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4684 4685 ASSERT(io->ipsec_out_need_policy == B_FALSE); 4686 ASSERT((io->ipsec_out_policy != NULL) || 4687 (io->ipsec_out_act != NULL)); 4688 io->ipsec_out_src_port = sel.ips_local_port; 4689 io->ipsec_out_dst_port = sel.ips_remote_port; 4690 io->ipsec_out_icmp_type = sel.ips_icmp_type; 4691 io->ipsec_out_icmp_code = sel.ips_icmp_code; 4692 /* 4693 * Set ill_index, conn_dontroute and conn_multicast_loop 4694 * for multicast datagrams. 4695 */ 4696 io->ipsec_out_ill_index = ill_index; 4697 io->ipsec_out_dontroute = conn_dontroutex; 4698 io->ipsec_out_multicast_loop = conn_multicast_loopx; 4699 4700 if (zoneid == ALL_ZONES) 4701 zoneid = GLOBAL_ZONEID; 4702 io->ipsec_out_zoneid = zoneid; 4703 return (ipsec_mp); 4704 } 4705 4706 /* 4707 * When appropriate, this function caches inbound and outbound policy 4708 * for this connection. 4709 * 4710 * XXX need to work out more details about per-interface policy and 4711 * caching here! 4712 * 4713 * XXX may want to split inbound and outbound caching for ill.. 4714 */ 4715 int 4716 ipsec_conn_cache_policy(conn_t *connp, boolean_t isv4) 4717 { 4718 boolean_t global_policy_present; 4719 netstack_t *ns = connp->conn_netstack; 4720 ipsec_stack_t *ipss = ns->netstack_ipsec; 4721 4722 /* 4723 * There is no policy latching for ICMP sockets because we can't 4724 * decide on which policy to use until we see the packet and get 4725 * type/code selectors. 4726 */ 4727 if (connp->conn_ulp == IPPROTO_ICMP || 4728 connp->conn_ulp == IPPROTO_ICMPV6) { 4729 connp->conn_in_enforce_policy = 4730 connp->conn_out_enforce_policy = B_TRUE; 4731 if (connp->conn_latch != NULL) { 4732 IPLATCH_REFRELE(connp->conn_latch, ns); 4733 connp->conn_latch = NULL; 4734 } 4735 connp->conn_flags |= IPCL_CHECK_POLICY; 4736 return (0); 4737 } 4738 4739 global_policy_present = isv4 ? 4740 (ipss->ipsec_outbound_v4_policy_present || 4741 ipss->ipsec_inbound_v4_policy_present) : 4742 (ipss->ipsec_outbound_v6_policy_present || 4743 ipss->ipsec_inbound_v6_policy_present); 4744 4745 if ((connp->conn_policy != NULL) || global_policy_present) { 4746 ipsec_selector_t sel; 4747 ipsec_policy_t *p; 4748 4749 if (connp->conn_latch == NULL && 4750 (connp->conn_latch = iplatch_create()) == NULL) { 4751 return (ENOMEM); 4752 } 4753 4754 sel.ips_protocol = connp->conn_ulp; 4755 sel.ips_local_port = connp->conn_lport; 4756 sel.ips_remote_port = connp->conn_fport; 4757 sel.ips_is_icmp_inv_acq = 0; 4758 sel.ips_isv4 = isv4; 4759 if (isv4) { 4760 sel.ips_local_addr_v4 = connp->conn_src; 4761 sel.ips_remote_addr_v4 = connp->conn_rem; 4762 } else { 4763 sel.ips_local_addr_v6 = connp->conn_srcv6; 4764 sel.ips_remote_addr_v6 = connp->conn_remv6; 4765 } 4766 4767 p = ipsec_find_policy(IPSEC_TYPE_INBOUND, connp, NULL, &sel, 4768 ns); 4769 if (connp->conn_latch->ipl_in_policy != NULL) 4770 IPPOL_REFRELE(connp->conn_latch->ipl_in_policy, ns); 4771 connp->conn_latch->ipl_in_policy = p; 4772 connp->conn_in_enforce_policy = (p != NULL); 4773 4774 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, &sel, 4775 ns); 4776 if (connp->conn_latch->ipl_out_policy != NULL) 4777 IPPOL_REFRELE(connp->conn_latch->ipl_out_policy, ns); 4778 connp->conn_latch->ipl_out_policy = p; 4779 connp->conn_out_enforce_policy = (p != NULL); 4780 4781 /* Clear the latched actions too, in case we're recaching. */ 4782 if (connp->conn_latch->ipl_out_action != NULL) 4783 IPACT_REFRELE(connp->conn_latch->ipl_out_action); 4784 if (connp->conn_latch->ipl_in_action != NULL) 4785 IPACT_REFRELE(connp->conn_latch->ipl_in_action); 4786 } 4787 4788 /* 4789 * We may or may not have policy for this endpoint. We still set 4790 * conn_policy_cached so that inbound datagrams don't have to look 4791 * at global policy as policy is considered latched for these 4792 * endpoints. We should not set conn_policy_cached until the conn 4793 * reflects the actual policy. If we *set* this before inheriting 4794 * the policy there is a window where the check 4795 * CONN_INBOUND_POLICY_PRESENT, will neither check with the policy 4796 * on the conn (because we have not yet copied the policy on to 4797 * conn and hence not set conn_in_enforce_policy) nor with the 4798 * global policy (because conn_policy_cached is already set). 4799 */ 4800 connp->conn_policy_cached = B_TRUE; 4801 if (connp->conn_in_enforce_policy) 4802 connp->conn_flags |= IPCL_CHECK_POLICY; 4803 return (0); 4804 } 4805 4806 void 4807 iplatch_free(ipsec_latch_t *ipl, netstack_t *ns) 4808 { 4809 if (ipl->ipl_out_policy != NULL) 4810 IPPOL_REFRELE(ipl->ipl_out_policy, ns); 4811 if (ipl->ipl_in_policy != NULL) 4812 IPPOL_REFRELE(ipl->ipl_in_policy, ns); 4813 if (ipl->ipl_in_action != NULL) 4814 IPACT_REFRELE(ipl->ipl_in_action); 4815 if (ipl->ipl_out_action != NULL) 4816 IPACT_REFRELE(ipl->ipl_out_action); 4817 if (ipl->ipl_local_cid != NULL) 4818 IPSID_REFRELE(ipl->ipl_local_cid); 4819 if (ipl->ipl_remote_cid != NULL) 4820 IPSID_REFRELE(ipl->ipl_remote_cid); 4821 if (ipl->ipl_local_id != NULL) 4822 crfree(ipl->ipl_local_id); 4823 mutex_destroy(&ipl->ipl_lock); 4824 kmem_free(ipl, sizeof (*ipl)); 4825 } 4826 4827 ipsec_latch_t * 4828 iplatch_create() 4829 { 4830 ipsec_latch_t *ipl = kmem_alloc(sizeof (*ipl), KM_NOSLEEP); 4831 if (ipl == NULL) 4832 return (ipl); 4833 bzero(ipl, sizeof (*ipl)); 4834 mutex_init(&ipl->ipl_lock, NULL, MUTEX_DEFAULT, NULL); 4835 ipl->ipl_refcnt = 1; 4836 return (ipl); 4837 } 4838 4839 /* 4840 * Hash function for ID hash table. 4841 */ 4842 static uint32_t 4843 ipsid_hash(int idtype, char *idstring) 4844 { 4845 uint32_t hval = idtype; 4846 unsigned char c; 4847 4848 while ((c = *idstring++) != 0) { 4849 hval = (hval << 4) | (hval >> 28); 4850 hval ^= c; 4851 } 4852 hval = hval ^ (hval >> 16); 4853 return (hval & (IPSID_HASHSIZE-1)); 4854 } 4855 4856 /* 4857 * Look up identity string in hash table. Return identity object 4858 * corresponding to the name -- either preexisting, or newly allocated. 4859 * 4860 * Return NULL if we need to allocate a new one and can't get memory. 4861 */ 4862 ipsid_t * 4863 ipsid_lookup(int idtype, char *idstring, netstack_t *ns) 4864 { 4865 ipsid_t *retval; 4866 char *nstr; 4867 int idlen = strlen(idstring) + 1; 4868 ipsec_stack_t *ipss = ns->netstack_ipsec; 4869 ipsif_t *bucket; 4870 4871 bucket = &ipss->ipsec_ipsid_buckets[ipsid_hash(idtype, idstring)]; 4872 4873 mutex_enter(&bucket->ipsif_lock); 4874 4875 for (retval = bucket->ipsif_head; retval != NULL; 4876 retval = retval->ipsid_next) { 4877 if (idtype != retval->ipsid_type) 4878 continue; 4879 if (bcmp(idstring, retval->ipsid_cid, idlen) != 0) 4880 continue; 4881 4882 IPSID_REFHOLD(retval); 4883 mutex_exit(&bucket->ipsif_lock); 4884 return (retval); 4885 } 4886 4887 retval = kmem_alloc(sizeof (*retval), KM_NOSLEEP); 4888 if (!retval) { 4889 mutex_exit(&bucket->ipsif_lock); 4890 return (NULL); 4891 } 4892 4893 nstr = kmem_alloc(idlen, KM_NOSLEEP); 4894 if (!nstr) { 4895 mutex_exit(&bucket->ipsif_lock); 4896 kmem_free(retval, sizeof (*retval)); 4897 return (NULL); 4898 } 4899 4900 retval->ipsid_refcnt = 1; 4901 retval->ipsid_next = bucket->ipsif_head; 4902 if (retval->ipsid_next != NULL) 4903 retval->ipsid_next->ipsid_ptpn = &retval->ipsid_next; 4904 retval->ipsid_ptpn = &bucket->ipsif_head; 4905 retval->ipsid_type = idtype; 4906 retval->ipsid_cid = nstr; 4907 bucket->ipsif_head = retval; 4908 bcopy(idstring, nstr, idlen); 4909 mutex_exit(&bucket->ipsif_lock); 4910 4911 return (retval); 4912 } 4913 4914 /* 4915 * Garbage collect the identity hash table. 4916 */ 4917 void 4918 ipsid_gc(netstack_t *ns) 4919 { 4920 int i, len; 4921 ipsid_t *id, *nid; 4922 ipsif_t *bucket; 4923 ipsec_stack_t *ipss = ns->netstack_ipsec; 4924 4925 for (i = 0; i < IPSID_HASHSIZE; i++) { 4926 bucket = &ipss->ipsec_ipsid_buckets[i]; 4927 mutex_enter(&bucket->ipsif_lock); 4928 for (id = bucket->ipsif_head; id != NULL; id = nid) { 4929 nid = id->ipsid_next; 4930 if (id->ipsid_refcnt == 0) { 4931 *id->ipsid_ptpn = nid; 4932 if (nid != NULL) 4933 nid->ipsid_ptpn = id->ipsid_ptpn; 4934 len = strlen(id->ipsid_cid) + 1; 4935 kmem_free(id->ipsid_cid, len); 4936 kmem_free(id, sizeof (*id)); 4937 } 4938 } 4939 mutex_exit(&bucket->ipsif_lock); 4940 } 4941 } 4942 4943 /* 4944 * Return true if two identities are the same. 4945 */ 4946 boolean_t 4947 ipsid_equal(ipsid_t *id1, ipsid_t *id2) 4948 { 4949 if (id1 == id2) 4950 return (B_TRUE); 4951 #ifdef DEBUG 4952 if ((id1 == NULL) || (id2 == NULL)) 4953 return (B_FALSE); 4954 /* 4955 * test that we're interning id's correctly.. 4956 */ 4957 ASSERT((strcmp(id1->ipsid_cid, id2->ipsid_cid) != 0) || 4958 (id1->ipsid_type != id2->ipsid_type)); 4959 #endif 4960 return (B_FALSE); 4961 } 4962 4963 /* 4964 * Initialize identity table; called during module initialization. 4965 */ 4966 static void 4967 ipsid_init(netstack_t *ns) 4968 { 4969 ipsif_t *bucket; 4970 int i; 4971 ipsec_stack_t *ipss = ns->netstack_ipsec; 4972 4973 for (i = 0; i < IPSID_HASHSIZE; i++) { 4974 bucket = &ipss->ipsec_ipsid_buckets[i]; 4975 mutex_init(&bucket->ipsif_lock, NULL, MUTEX_DEFAULT, NULL); 4976 } 4977 } 4978 4979 /* 4980 * Free identity table (preparatory to module unload) 4981 */ 4982 static void 4983 ipsid_fini(netstack_t *ns) 4984 { 4985 ipsif_t *bucket; 4986 int i; 4987 ipsec_stack_t *ipss = ns->netstack_ipsec; 4988 4989 for (i = 0; i < IPSID_HASHSIZE; i++) { 4990 bucket = &ipss->ipsec_ipsid_buckets[i]; 4991 ASSERT(bucket->ipsif_head == NULL); 4992 mutex_destroy(&bucket->ipsif_lock); 4993 } 4994 } 4995 4996 /* 4997 * Update the minimum and maximum supported key sizes for the 4998 * specified algorithm. Must be called while holding the algorithms lock. 4999 */ 5000 void 5001 ipsec_alg_fix_min_max(ipsec_alginfo_t *alg, ipsec_algtype_t alg_type, 5002 netstack_t *ns) 5003 { 5004 size_t crypto_min = (size_t)-1, crypto_max = 0; 5005 size_t cur_crypto_min, cur_crypto_max; 5006 boolean_t is_valid; 5007 crypto_mechanism_info_t *mech_infos; 5008 uint_t nmech_infos; 5009 int crypto_rc, i; 5010 crypto_mech_usage_t mask; 5011 ipsec_stack_t *ipss = ns->netstack_ipsec; 5012 5013 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 5014 5015 /* 5016 * Compute the min, max, and default key sizes (in number of 5017 * increments to the default key size in bits) as defined 5018 * by the algorithm mappings. This range of key sizes is used 5019 * for policy related operations. The effective key sizes 5020 * supported by the framework could be more limited than 5021 * those defined for an algorithm. 5022 */ 5023 alg->alg_default_bits = alg->alg_key_sizes[0]; 5024 if (alg->alg_increment != 0) { 5025 /* key sizes are defined by range & increment */ 5026 alg->alg_minbits = alg->alg_key_sizes[1]; 5027 alg->alg_maxbits = alg->alg_key_sizes[2]; 5028 5029 alg->alg_default = SADB_ALG_DEFAULT_INCR(alg->alg_minbits, 5030 alg->alg_increment, alg->alg_default_bits); 5031 } else if (alg->alg_nkey_sizes == 0) { 5032 /* no specified key size for algorithm */ 5033 alg->alg_minbits = alg->alg_maxbits = 0; 5034 } else { 5035 /* key sizes are defined by enumeration */ 5036 alg->alg_minbits = (uint16_t)-1; 5037 alg->alg_maxbits = 0; 5038 5039 for (i = 0; i < alg->alg_nkey_sizes; i++) { 5040 if (alg->alg_key_sizes[i] < alg->alg_minbits) 5041 alg->alg_minbits = alg->alg_key_sizes[i]; 5042 if (alg->alg_key_sizes[i] > alg->alg_maxbits) 5043 alg->alg_maxbits = alg->alg_key_sizes[i]; 5044 } 5045 alg->alg_default = 0; 5046 } 5047 5048 if (!(alg->alg_flags & ALG_FLAG_VALID)) 5049 return; 5050 5051 /* 5052 * Mechanisms do not apply to the NULL encryption 5053 * algorithm, so simply return for this case. 5054 */ 5055 if (alg->alg_id == SADB_EALG_NULL) 5056 return; 5057 5058 /* 5059 * Find the min and max key sizes supported by the cryptographic 5060 * framework providers. 5061 */ 5062 5063 /* get the key sizes supported by the framework */ 5064 crypto_rc = crypto_get_all_mech_info(alg->alg_mech_type, 5065 &mech_infos, &nmech_infos, KM_SLEEP); 5066 if (crypto_rc != CRYPTO_SUCCESS || nmech_infos == 0) { 5067 alg->alg_flags &= ~ALG_FLAG_VALID; 5068 return; 5069 } 5070 5071 /* min and max key sizes supported by framework */ 5072 for (i = 0, is_valid = B_FALSE; i < nmech_infos; i++) { 5073 int unit_bits; 5074 5075 /* 5076 * Ignore entries that do not support the operations 5077 * needed for the algorithm type. 5078 */ 5079 if (alg_type == IPSEC_ALG_AUTH) 5080 mask = CRYPTO_MECH_USAGE_MAC; 5081 else 5082 mask = CRYPTO_MECH_USAGE_ENCRYPT | 5083 CRYPTO_MECH_USAGE_DECRYPT; 5084 if ((mech_infos[i].mi_usage & mask) != mask) 5085 continue; 5086 5087 unit_bits = (mech_infos[i].mi_keysize_unit == 5088 CRYPTO_KEYSIZE_UNIT_IN_BYTES) ? 8 : 1; 5089 /* adjust min/max supported by framework */ 5090 cur_crypto_min = mech_infos[i].mi_min_key_size * unit_bits; 5091 cur_crypto_max = mech_infos[i].mi_max_key_size * unit_bits; 5092 5093 if (cur_crypto_min < crypto_min) 5094 crypto_min = cur_crypto_min; 5095 5096 /* 5097 * CRYPTO_EFFECTIVELY_INFINITE is a special value of 5098 * the crypto framework which means "no upper limit". 5099 */ 5100 if (mech_infos[i].mi_max_key_size == 5101 CRYPTO_EFFECTIVELY_INFINITE) 5102 crypto_max = (size_t)-1; 5103 else if (cur_crypto_max > crypto_max) 5104 crypto_max = cur_crypto_max; 5105 5106 is_valid = B_TRUE; 5107 } 5108 5109 kmem_free(mech_infos, sizeof (crypto_mechanism_info_t) * 5110 nmech_infos); 5111 5112 if (!is_valid) { 5113 /* no key sizes supported by framework */ 5114 alg->alg_flags &= ~ALG_FLAG_VALID; 5115 return; 5116 } 5117 5118 /* 5119 * Determine min and max key sizes from alg_key_sizes[]. 5120 * defined for the algorithm entry. Adjust key sizes based on 5121 * those supported by the framework. 5122 */ 5123 alg->alg_ef_default_bits = alg->alg_key_sizes[0]; 5124 if (alg->alg_increment != 0) { 5125 /* supported key sizes are defined by range & increment */ 5126 crypto_min = ALGBITS_ROUND_UP(crypto_min, alg->alg_increment); 5127 crypto_max = ALGBITS_ROUND_DOWN(crypto_max, alg->alg_increment); 5128 5129 alg->alg_ef_minbits = MAX(alg->alg_minbits, 5130 (uint16_t)crypto_min); 5131 alg->alg_ef_maxbits = MIN(alg->alg_maxbits, 5132 (uint16_t)crypto_max); 5133 5134 /* 5135 * If the sizes supported by the framework are outside 5136 * the range of sizes defined by the algorithm mappings, 5137 * the algorithm cannot be used. Check for this 5138 * condition here. 5139 */ 5140 if (alg->alg_ef_minbits > alg->alg_ef_maxbits) { 5141 alg->alg_flags &= ~ALG_FLAG_VALID; 5142 return; 5143 } 5144 5145 if (alg->alg_ef_default_bits < alg->alg_ef_minbits) 5146 alg->alg_ef_default_bits = alg->alg_ef_minbits; 5147 if (alg->alg_ef_default_bits > alg->alg_ef_maxbits) 5148 alg->alg_ef_default_bits = alg->alg_ef_maxbits; 5149 5150 alg->alg_ef_default = SADB_ALG_DEFAULT_INCR(alg->alg_ef_minbits, 5151 alg->alg_increment, alg->alg_ef_default_bits); 5152 } else if (alg->alg_nkey_sizes == 0) { 5153 /* no specified key size for algorithm */ 5154 alg->alg_ef_minbits = alg->alg_ef_maxbits = 0; 5155 } else { 5156 /* supported key sizes are defined by enumeration */ 5157 alg->alg_ef_minbits = (uint16_t)-1; 5158 alg->alg_ef_maxbits = 0; 5159 5160 for (i = 0, is_valid = B_FALSE; i < alg->alg_nkey_sizes; i++) { 5161 /* 5162 * Ignore the current key size if it is not in the 5163 * range of sizes supported by the framework. 5164 */ 5165 if (alg->alg_key_sizes[i] < crypto_min || 5166 alg->alg_key_sizes[i] > crypto_max) 5167 continue; 5168 if (alg->alg_key_sizes[i] < alg->alg_ef_minbits) 5169 alg->alg_ef_minbits = alg->alg_key_sizes[i]; 5170 if (alg->alg_key_sizes[i] > alg->alg_ef_maxbits) 5171 alg->alg_ef_maxbits = alg->alg_key_sizes[i]; 5172 is_valid = B_TRUE; 5173 } 5174 5175 if (!is_valid) { 5176 alg->alg_flags &= ~ALG_FLAG_VALID; 5177 return; 5178 } 5179 alg->alg_ef_default = 0; 5180 } 5181 } 5182 5183 /* 5184 * Free the memory used by the specified algorithm. 5185 */ 5186 void 5187 ipsec_alg_free(ipsec_alginfo_t *alg) 5188 { 5189 if (alg == NULL) 5190 return; 5191 5192 if (alg->alg_key_sizes != NULL) { 5193 kmem_free(alg->alg_key_sizes, 5194 (alg->alg_nkey_sizes + 1) * sizeof (uint16_t)); 5195 alg->alg_key_sizes = NULL; 5196 } 5197 if (alg->alg_block_sizes != NULL) { 5198 kmem_free(alg->alg_block_sizes, 5199 (alg->alg_nblock_sizes + 1) * sizeof (uint16_t)); 5200 alg->alg_block_sizes = NULL; 5201 } 5202 kmem_free(alg, sizeof (*alg)); 5203 } 5204 5205 /* 5206 * Check the validity of the specified key size for an algorithm. 5207 * Returns B_TRUE if key size is valid, B_FALSE otherwise. 5208 */ 5209 boolean_t 5210 ipsec_valid_key_size(uint16_t key_size, ipsec_alginfo_t *alg) 5211 { 5212 if (key_size < alg->alg_ef_minbits || key_size > alg->alg_ef_maxbits) 5213 return (B_FALSE); 5214 5215 if (alg->alg_increment == 0 && alg->alg_nkey_sizes != 0) { 5216 /* 5217 * If the key sizes are defined by enumeration, the new 5218 * key size must be equal to one of the supported values. 5219 */ 5220 int i; 5221 5222 for (i = 0; i < alg->alg_nkey_sizes; i++) 5223 if (key_size == alg->alg_key_sizes[i]) 5224 break; 5225 if (i == alg->alg_nkey_sizes) 5226 return (B_FALSE); 5227 } 5228 5229 return (B_TRUE); 5230 } 5231 5232 /* 5233 * Callback function invoked by the crypto framework when a provider 5234 * registers or unregisters. This callback updates the algorithms 5235 * tables when a crypto algorithm is no longer available or becomes 5236 * available, and triggers the freeing/creation of context templates 5237 * associated with existing SAs, if needed. 5238 * 5239 * Need to walk all stack instances since the callback is global 5240 * for all instances 5241 */ 5242 void 5243 ipsec_prov_update_callback(uint32_t event, void *event_arg) 5244 { 5245 netstack_handle_t nh; 5246 netstack_t *ns; 5247 5248 netstack_next_init(&nh); 5249 while ((ns = netstack_next(&nh)) != NULL) { 5250 ipsec_prov_update_callback_stack(event, event_arg, ns); 5251 netstack_rele(ns); 5252 } 5253 netstack_next_fini(&nh); 5254 } 5255 5256 static void 5257 ipsec_prov_update_callback_stack(uint32_t event, void *event_arg, 5258 netstack_t *ns) 5259 { 5260 crypto_notify_event_change_t *prov_change = 5261 (crypto_notify_event_change_t *)event_arg; 5262 uint_t algidx, algid, algtype, mech_count, mech_idx; 5263 ipsec_alginfo_t *alg; 5264 ipsec_alginfo_t oalg; 5265 crypto_mech_name_t *mechs; 5266 boolean_t alg_changed = B_FALSE; 5267 ipsec_stack_t *ipss = ns->netstack_ipsec; 5268 5269 /* ignore events for which we didn't register */ 5270 if (event != CRYPTO_EVENT_MECHS_CHANGED) { 5271 ip1dbg(("ipsec_prov_update_callback: unexpected event 0x%x " 5272 " received from crypto framework\n", event)); 5273 return; 5274 } 5275 5276 mechs = crypto_get_mech_list(&mech_count, KM_SLEEP); 5277 if (mechs == NULL) 5278 return; 5279 5280 /* 5281 * Walk the list of currently defined IPsec algorithm. Update 5282 * the algorithm valid flag and trigger an update of the 5283 * SAs that depend on that algorithm. 5284 */ 5285 mutex_enter(&ipss->ipsec_alg_lock); 5286 for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype++) { 5287 for (algidx = 0; algidx < ipss->ipsec_nalgs[algtype]; 5288 algidx++) { 5289 5290 algid = ipss->ipsec_sortlist[algtype][algidx]; 5291 alg = ipss->ipsec_alglists[algtype][algid]; 5292 ASSERT(alg != NULL); 5293 5294 /* 5295 * Skip the algorithms which do not map to the 5296 * crypto framework provider being added or removed. 5297 */ 5298 if (strncmp(alg->alg_mech_name, 5299 prov_change->ec_mech_name, 5300 CRYPTO_MAX_MECH_NAME) != 0) 5301 continue; 5302 5303 /* 5304 * Determine if the mechanism is valid. If it 5305 * is not, mark the algorithm as being invalid. If 5306 * it is, mark the algorithm as being valid. 5307 */ 5308 for (mech_idx = 0; mech_idx < mech_count; mech_idx++) 5309 if (strncmp(alg->alg_mech_name, 5310 mechs[mech_idx], CRYPTO_MAX_MECH_NAME) == 0) 5311 break; 5312 if (mech_idx == mech_count && 5313 alg->alg_flags & ALG_FLAG_VALID) { 5314 alg->alg_flags &= ~ALG_FLAG_VALID; 5315 alg_changed = B_TRUE; 5316 } else if (mech_idx < mech_count && 5317 !(alg->alg_flags & ALG_FLAG_VALID)) { 5318 alg->alg_flags |= ALG_FLAG_VALID; 5319 alg_changed = B_TRUE; 5320 } 5321 5322 /* 5323 * Update the supported key sizes, regardless 5324 * of whether a crypto provider was added or 5325 * removed. 5326 */ 5327 oalg = *alg; 5328 ipsec_alg_fix_min_max(alg, algtype, ns); 5329 if (!alg_changed && 5330 alg->alg_ef_minbits != oalg.alg_ef_minbits || 5331 alg->alg_ef_maxbits != oalg.alg_ef_maxbits || 5332 alg->alg_ef_default != oalg.alg_ef_default || 5333 alg->alg_ef_default_bits != 5334 oalg.alg_ef_default_bits) 5335 alg_changed = B_TRUE; 5336 5337 /* 5338 * Update the affected SAs if a software provider is 5339 * being added or removed. 5340 */ 5341 if (prov_change->ec_provider_type == 5342 CRYPTO_SW_PROVIDER) 5343 sadb_alg_update(algtype, alg->alg_id, 5344 prov_change->ec_change == 5345 CRYPTO_MECH_ADDED, ns); 5346 } 5347 } 5348 mutex_exit(&ipss->ipsec_alg_lock); 5349 crypto_free_mech_list(mechs, mech_count); 5350 5351 if (alg_changed) { 5352 /* 5353 * An algorithm has changed, i.e. it became valid or 5354 * invalid, or its support key sizes have changed. 5355 * Notify ipsecah and ipsecesp of this change so 5356 * that they can send a SADB_REGISTER to their consumers. 5357 */ 5358 ipsecah_algs_changed(ns); 5359 ipsecesp_algs_changed(ns); 5360 } 5361 } 5362 5363 /* 5364 * Registers with the crypto framework to be notified of crypto 5365 * providers changes. Used to update the algorithm tables and 5366 * to free or create context templates if needed. Invoked after IPsec 5367 * is loaded successfully. 5368 * 5369 * This is called separately for each IP instance, so we ensure we only 5370 * register once. 5371 */ 5372 void 5373 ipsec_register_prov_update(void) 5374 { 5375 if (prov_update_handle != NULL) 5376 return; 5377 5378 prov_update_handle = crypto_notify_events( 5379 ipsec_prov_update_callback, CRYPTO_EVENT_MECHS_CHANGED); 5380 } 5381 5382 /* 5383 * Unregisters from the framework to be notified of crypto providers 5384 * changes. Called from ipsec_policy_g_destroy(). 5385 */ 5386 static void 5387 ipsec_unregister_prov_update(void) 5388 { 5389 if (prov_update_handle != NULL) 5390 crypto_unnotify_events(prov_update_handle); 5391 } 5392 5393 /* 5394 * Tunnel-mode support routines. 5395 */ 5396 5397 /* 5398 * Returns an mblk chain suitable for putnext() if policies match and IPsec 5399 * SAs are available. If there's no per-tunnel policy, or a match comes back 5400 * with no match, then still return the packet and have global policy take 5401 * a crack at it in IP. 5402 * 5403 * Remember -> we can be forwarding packets. Keep that in mind w.r.t. 5404 * inner-packet contents. 5405 */ 5406 mblk_t * 5407 ipsec_tun_outbound(mblk_t *mp, tun_t *atp, ipha_t *inner_ipv4, 5408 ip6_t *inner_ipv6, ipha_t *outer_ipv4, ip6_t *outer_ipv6, int outer_hdr_len, 5409 netstack_t *ns) 5410 { 5411 ipsec_tun_pol_t *itp = atp->tun_itp; 5412 ipsec_policy_head_t *polhead; 5413 ipsec_selector_t sel; 5414 mblk_t *ipsec_mp, *ipsec_mp_head, *nmp; 5415 mblk_t *spare_mp = NULL; 5416 ipsec_out_t *io; 5417 boolean_t is_fragment; 5418 ipsec_policy_t *pol; 5419 ipsec_stack_t *ipss = ns->netstack_ipsec; 5420 5421 ASSERT(outer_ipv6 != NULL && outer_ipv4 == NULL || 5422 outer_ipv4 != NULL && outer_ipv6 == NULL); 5423 /* We take care of inners in a bit. */ 5424 5425 /* No policy on this tunnel - let global policy have at it. */ 5426 if (itp == NULL || !(itp->itp_flags & ITPF_P_ACTIVE)) 5427 return (mp); 5428 polhead = itp->itp_policy; 5429 5430 bzero(&sel, sizeof (sel)); 5431 if (inner_ipv4 != NULL) { 5432 ASSERT(inner_ipv6 == NULL); 5433 sel.ips_isv4 = B_TRUE; 5434 sel.ips_local_addr_v4 = inner_ipv4->ipha_src; 5435 sel.ips_remote_addr_v4 = inner_ipv4->ipha_dst; 5436 sel.ips_protocol = (uint8_t)inner_ipv4->ipha_protocol; 5437 is_fragment = 5438 IS_V4_FRAGMENT(inner_ipv4->ipha_fragment_offset_and_flags); 5439 } else { 5440 ASSERT(inner_ipv6 != NULL); 5441 sel.ips_isv4 = B_FALSE; 5442 sel.ips_local_addr_v6 = inner_ipv6->ip6_src; 5443 /* Use ip_get_dst_v6() just for the fragment bit. */ 5444 sel.ips_remote_addr_v6 = ip_get_dst_v6(inner_ipv6, 5445 &is_fragment); 5446 /* 5447 * Reset, because we don't care about routing-header dests 5448 * in the forwarding/tunnel path. 5449 */ 5450 sel.ips_remote_addr_v6 = inner_ipv6->ip6_dst; 5451 } 5452 5453 if (itp->itp_flags & ITPF_P_PER_PORT_SECURITY) { 5454 if (is_fragment) { 5455 ipha_t *oiph; 5456 ipha_t *iph = NULL; 5457 ip6_t *ip6h = NULL; 5458 int hdr_len; 5459 uint16_t ip6_hdr_length; 5460 uint8_t v6_proto; 5461 uint8_t *v6_proto_p; 5462 5463 /* 5464 * We have a fragment we need to track! 5465 */ 5466 mp = ipsec_fragcache_add(&itp->itp_fragcache, NULL, mp, 5467 outer_hdr_len, ipss); 5468 if (mp == NULL) 5469 return (NULL); 5470 5471 /* 5472 * If we get here, we have a full 5473 * fragment chain 5474 */ 5475 5476 oiph = (ipha_t *)mp->b_rptr; 5477 if (IPH_HDR_VERSION(oiph) == IPV4_VERSION) { 5478 hdr_len = ((outer_hdr_len != 0) ? 5479 IPH_HDR_LENGTH(oiph) : 0); 5480 iph = (ipha_t *)(mp->b_rptr + hdr_len); 5481 } else { 5482 ASSERT(IPH_HDR_VERSION(oiph) == IPV6_VERSION); 5483 if ((spare_mp = msgpullup(mp, -1)) == NULL) { 5484 ip_drop_packet_chain(mp, B_FALSE, 5485 NULL, NULL, 5486 DROPPER(ipss, ipds_spd_nomem), 5487 &ipss->ipsec_spd_dropper); 5488 } 5489 ip6h = (ip6_t *)spare_mp->b_rptr; 5490 (void) ip_hdr_length_nexthdr_v6(spare_mp, ip6h, 5491 &ip6_hdr_length, &v6_proto_p); 5492 hdr_len = ip6_hdr_length; 5493 } 5494 outer_hdr_len = hdr_len; 5495 5496 if (sel.ips_isv4) { 5497 if (iph == NULL) { 5498 /* Was v6 outer */ 5499 iph = (ipha_t *)(mp->b_rptr + hdr_len); 5500 } 5501 inner_ipv4 = iph; 5502 sel.ips_local_addr_v4 = inner_ipv4->ipha_src; 5503 sel.ips_remote_addr_v4 = inner_ipv4->ipha_dst; 5504 sel.ips_protocol = 5505 (uint8_t)inner_ipv4->ipha_protocol; 5506 } else { 5507 if ((spare_mp == NULL) && 5508 ((spare_mp = msgpullup(mp, -1)) == NULL)) { 5509 ip_drop_packet_chain(mp, B_FALSE, 5510 NULL, NULL, 5511 DROPPER(ipss, ipds_spd_nomem), 5512 &ipss->ipsec_spd_dropper); 5513 } 5514 inner_ipv6 = (ip6_t *)(spare_mp->b_rptr + 5515 hdr_len); 5516 sel.ips_local_addr_v6 = inner_ipv6->ip6_src; 5517 sel.ips_remote_addr_v6 = inner_ipv6->ip6_dst; 5518 (void) ip_hdr_length_nexthdr_v6(spare_mp, 5519 inner_ipv6, &ip6_hdr_length, 5520 &v6_proto_p); 5521 v6_proto = *v6_proto_p; 5522 sel.ips_protocol = v6_proto; 5523 #ifdef FRAGCACHE_DEBUG 5524 cmn_err(CE_WARN, "v6_sel.ips_protocol = %d\n", 5525 sel.ips_protocol); 5526 #endif 5527 } 5528 /* Ports are extracted below */ 5529 } 5530 5531 /* Get ports... */ 5532 if (spare_mp != NULL) { 5533 if (!ipsec_init_outbound_ports(&sel, spare_mp, 5534 inner_ipv4, inner_ipv6, outer_hdr_len, ipss)) { 5535 /* 5536 * callee did ip_drop_packet_chain() on 5537 * spare_mp 5538 */ 5539 ipsec_freemsg_chain(mp); 5540 return (NULL); 5541 } 5542 } else { 5543 if (!ipsec_init_outbound_ports(&sel, mp, 5544 inner_ipv4, inner_ipv6, outer_hdr_len, ipss)) { 5545 /* callee did ip_drop_packet_chain() on mp. */ 5546 return (NULL); 5547 } 5548 } 5549 #ifdef FRAGCACHE_DEBUG 5550 if (inner_ipv4 != NULL) 5551 cmn_err(CE_WARN, 5552 "(v4) sel.ips_protocol = %d, " 5553 "sel.ips_local_port = %d, " 5554 "sel.ips_remote_port = %d\n", 5555 sel.ips_protocol, ntohs(sel.ips_local_port), 5556 ntohs(sel.ips_remote_port)); 5557 if (inner_ipv6 != NULL) 5558 cmn_err(CE_WARN, 5559 "(v6) sel.ips_protocol = %d, " 5560 "sel.ips_local_port = %d, " 5561 "sel.ips_remote_port = %d\n", 5562 sel.ips_protocol, ntohs(sel.ips_local_port), 5563 ntohs(sel.ips_remote_port)); 5564 #endif 5565 /* Success so far - done with spare_mp */ 5566 ipsec_freemsg_chain(spare_mp); 5567 } 5568 rw_enter(&polhead->iph_lock, RW_READER); 5569 pol = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_OUTBOUND, 5570 &sel, ns); 5571 rw_exit(&polhead->iph_lock); 5572 if (pol == NULL) { 5573 /* 5574 * No matching policy on this tunnel, drop the packet. 5575 * 5576 * NOTE: Tunnel-mode tunnels are different from the 5577 * IP global transport mode policy head. For a tunnel-mode 5578 * tunnel, we drop the packet in lieu of passing it 5579 * along accepted the way a global-policy miss would. 5580 * 5581 * NOTE2: "negotiate transport" tunnels should match ALL 5582 * inbound packets, but we do not uncomment the ASSERT() 5583 * below because if/when we open PF_POLICY, a user can 5584 * shoot him/her-self in the foot with a 0 priority. 5585 */ 5586 5587 /* ASSERT(itp->itp_flags & ITPF_P_TUNNEL); */ 5588 #ifdef FRAGCACHE_DEBUG 5589 cmn_err(CE_WARN, "ipsec_tun_outbound(): No matching tunnel " 5590 "per-port policy\n"); 5591 #endif 5592 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 5593 DROPPER(ipss, ipds_spd_explicit), 5594 &ipss->ipsec_spd_dropper); 5595 return (NULL); 5596 } 5597 5598 #ifdef FRAGCACHE_DEBUG 5599 cmn_err(CE_WARN, "Having matching tunnel per-port policy\n"); 5600 #endif 5601 5602 /* Construct an IPSEC_OUT message. */ 5603 ipsec_mp = ipsec_mp_head = ipsec_alloc_ipsec_out(ns); 5604 if (ipsec_mp == NULL) { 5605 IPPOL_REFRELE(pol, ns); 5606 ip_drop_packet(mp, B_FALSE, NULL, NULL, 5607 DROPPER(ipss, ipds_spd_nomem), 5608 &ipss->ipsec_spd_dropper); 5609 return (NULL); 5610 } 5611 ipsec_mp->b_cont = mp; 5612 io = (ipsec_out_t *)ipsec_mp->b_rptr; 5613 IPPH_REFHOLD(polhead); 5614 /* 5615 * NOTE: free() function of ipsec_out mblk will release polhead and 5616 * pol references. 5617 */ 5618 io->ipsec_out_polhead = polhead; 5619 io->ipsec_out_policy = pol; 5620 io->ipsec_out_zoneid = atp->tun_zoneid; 5621 io->ipsec_out_v4 = (outer_ipv4 != NULL); 5622 io->ipsec_out_secure = B_TRUE; 5623 5624 if (!(itp->itp_flags & ITPF_P_TUNNEL)) { 5625 /* Set up transport mode for tunnelled packets. */ 5626 io->ipsec_out_proto = (inner_ipv4 != NULL) ? IPPROTO_ENCAP : 5627 IPPROTO_IPV6; 5628 return (ipsec_mp); 5629 } 5630 5631 /* Fill in tunnel-mode goodies here. */ 5632 io->ipsec_out_tunnel = B_TRUE; 5633 /* XXX Do I need to fill in all of the goodies here? */ 5634 if (inner_ipv4) { 5635 io->ipsec_out_inaf = AF_INET; 5636 io->ipsec_out_insrc[0] = 5637 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v4; 5638 io->ipsec_out_indst[0] = 5639 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v4; 5640 } else { 5641 io->ipsec_out_inaf = AF_INET6; 5642 io->ipsec_out_insrc[0] = 5643 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[0]; 5644 io->ipsec_out_insrc[1] = 5645 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[1]; 5646 io->ipsec_out_insrc[2] = 5647 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[2]; 5648 io->ipsec_out_insrc[3] = 5649 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[3]; 5650 io->ipsec_out_indst[0] = 5651 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[0]; 5652 io->ipsec_out_indst[1] = 5653 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[1]; 5654 io->ipsec_out_indst[2] = 5655 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[2]; 5656 io->ipsec_out_indst[3] = 5657 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[3]; 5658 } 5659 io->ipsec_out_insrcpfx = pol->ipsp_sel->ipsl_key.ipsl_local_pfxlen; 5660 io->ipsec_out_indstpfx = pol->ipsp_sel->ipsl_key.ipsl_remote_pfxlen; 5661 /* NOTE: These are used for transport mode too. */ 5662 io->ipsec_out_src_port = pol->ipsp_sel->ipsl_key.ipsl_lport; 5663 io->ipsec_out_dst_port = pol->ipsp_sel->ipsl_key.ipsl_rport; 5664 io->ipsec_out_proto = pol->ipsp_sel->ipsl_key.ipsl_proto; 5665 5666 /* 5667 * The mp pointer still valid 5668 * Add ipsec_out to each fragment. 5669 * The fragment head already has one 5670 */ 5671 nmp = mp->b_next; 5672 mp->b_next = NULL; 5673 mp = nmp; 5674 ASSERT(ipsec_mp != NULL); 5675 while (mp != NULL) { 5676 nmp = mp->b_next; 5677 ipsec_mp->b_next = ipsec_out_tag(ipsec_mp_head, mp, ns); 5678 if (ipsec_mp->b_next == NULL) { 5679 ip_drop_packet_chain(ipsec_mp_head, B_FALSE, NULL, NULL, 5680 DROPPER(ipss, ipds_spd_nomem), 5681 &ipss->ipsec_spd_dropper); 5682 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 5683 DROPPER(ipss, ipds_spd_nomem), 5684 &ipss->ipsec_spd_dropper); 5685 return (NULL); 5686 } 5687 ipsec_mp = ipsec_mp->b_next; 5688 mp->b_next = NULL; 5689 mp = nmp; 5690 } 5691 return (ipsec_mp_head); 5692 } 5693 5694 /* 5695 * NOTE: The following releases pol's reference and 5696 * calls ip_drop_packet() for me on NULL returns. 5697 */ 5698 mblk_t * 5699 ipsec_check_ipsecin_policy_reasm(mblk_t *ipsec_mp, ipsec_policy_t *pol, 5700 ipha_t *inner_ipv4, ip6_t *inner_ipv6, uint64_t pkt_unique, netstack_t *ns) 5701 { 5702 /* Assume ipsec_mp is a chain of b_next-linked IPSEC_IN M_CTLs. */ 5703 mblk_t *data_chain = NULL, *data_tail = NULL; 5704 mblk_t *ii_next; 5705 5706 while (ipsec_mp != NULL) { 5707 ii_next = ipsec_mp->b_next; 5708 ipsec_mp->b_next = NULL; /* No tripping asserts. */ 5709 5710 /* 5711 * Need IPPOL_REFHOLD(pol) for extras because 5712 * ipsecin_policy does the refrele. 5713 */ 5714 IPPOL_REFHOLD(pol); 5715 5716 if (ipsec_check_ipsecin_policy(ipsec_mp, pol, inner_ipv4, 5717 inner_ipv6, pkt_unique, ns) != NULL) { 5718 if (data_tail == NULL) { 5719 /* First one */ 5720 data_chain = data_tail = ipsec_mp->b_cont; 5721 } else { 5722 data_tail->b_next = ipsec_mp->b_cont; 5723 data_tail = data_tail->b_next; 5724 } 5725 freeb(ipsec_mp); 5726 } else { 5727 /* 5728 * ipsec_check_ipsecin_policy() freed ipsec_mp 5729 * already. Need to get rid of any extra pol 5730 * references, and any remaining bits as well. 5731 */ 5732 IPPOL_REFRELE(pol, ns); 5733 ipsec_freemsg_chain(data_chain); 5734 ipsec_freemsg_chain(ii_next); /* ipdrop stats? */ 5735 return (NULL); 5736 } 5737 ipsec_mp = ii_next; 5738 } 5739 /* 5740 * One last release because either the loop bumped it up, or we never 5741 * called ipsec_check_ipsecin_policy(). 5742 */ 5743 IPPOL_REFRELE(pol, ns); 5744 5745 /* data_chain is ready for return to tun module. */ 5746 return (data_chain); 5747 } 5748 5749 5750 /* 5751 * Returns B_TRUE if the inbound packet passed an IPsec policy check. Returns 5752 * B_FALSE if it failed or if it is a fragment needing its friends before a 5753 * policy check can be performed. 5754 * 5755 * Expects a non-NULL *data_mp, an optional ipsec_mp, and a non-NULL polhead. 5756 * data_mp may be reassigned with a b_next chain of packets if fragments 5757 * neeeded to be collected for a proper policy check. 5758 * 5759 * Always frees ipsec_mp, but only frees data_mp if returns B_FALSE. This 5760 * function calls ip_drop_packet() on data_mp if need be. 5761 * 5762 * NOTE: outer_hdr_len is signed. If it's a negative value, the caller 5763 * is inspecting an ICMP packet. 5764 */ 5765 boolean_t 5766 ipsec_tun_inbound(mblk_t *ipsec_mp, mblk_t **data_mp, ipsec_tun_pol_t *itp, 5767 ipha_t *inner_ipv4, ip6_t *inner_ipv6, ipha_t *outer_ipv4, 5768 ip6_t *outer_ipv6, int outer_hdr_len, netstack_t *ns) 5769 { 5770 ipsec_policy_head_t *polhead; 5771 ipsec_selector_t sel; 5772 mblk_t *message = (ipsec_mp == NULL) ? *data_mp : ipsec_mp; 5773 ipsec_policy_t *pol; 5774 uint16_t tmpport; 5775 selret_t rc; 5776 boolean_t retval, port_policy_present, is_icmp, global_present; 5777 in6_addr_t tmpaddr; 5778 ipaddr_t tmp4; 5779 ipsec_stack_t *ipss = ns->netstack_ipsec; 5780 uint8_t flags, *holder, *outer_hdr; 5781 5782 sel.ips_is_icmp_inv_acq = 0; 5783 5784 if (outer_ipv4 != NULL) { 5785 ASSERT(outer_ipv6 == NULL); 5786 outer_hdr = (uint8_t *)outer_ipv4; 5787 global_present = ipss->ipsec_inbound_v4_policy_present; 5788 } else { 5789 outer_hdr = (uint8_t *)outer_ipv6; 5790 global_present = ipss->ipsec_inbound_v6_policy_present; 5791 } 5792 ASSERT(outer_hdr != NULL); 5793 5794 ASSERT(inner_ipv4 != NULL && inner_ipv6 == NULL || 5795 inner_ipv4 == NULL && inner_ipv6 != NULL); 5796 ASSERT(message == *data_mp || message->b_cont == *data_mp); 5797 5798 if (outer_hdr_len < 0) { 5799 outer_hdr_len = (-outer_hdr_len); 5800 is_icmp = B_TRUE; 5801 } else { 5802 is_icmp = B_FALSE; 5803 } 5804 5805 if (itp != NULL && (itp->itp_flags & ITPF_P_ACTIVE)) { 5806 polhead = itp->itp_policy; 5807 /* 5808 * We need to perform full Tunnel-Mode enforcement, 5809 * and we need to have inner-header data for such enforcement. 5810 * 5811 * See ipsec_init_inbound_sel() for the 0x80000000 on inbound 5812 * and on return. 5813 */ 5814 5815 port_policy_present = ((itp->itp_flags & 5816 ITPF_P_PER_PORT_SECURITY) ? B_TRUE : B_FALSE); 5817 flags = ((port_policy_present ? SEL_PORT_POLICY : SEL_NONE) | 5818 (is_icmp ? SEL_IS_ICMP : SEL_NONE) | SEL_TUNNEL_MODE); 5819 5820 rc = ipsec_init_inbound_sel(&sel, *data_mp, inner_ipv4, 5821 inner_ipv6, flags); 5822 5823 switch (rc) { 5824 case SELRET_NOMEM: 5825 ip_drop_packet(message, B_TRUE, NULL, NULL, 5826 DROPPER(ipss, ipds_spd_nomem), 5827 &ipss->ipsec_spd_dropper); 5828 return (B_FALSE); 5829 case SELRET_TUNFRAG: 5830 /* 5831 * At this point, if we're cleartext, we don't want 5832 * to go there. 5833 */ 5834 if (ipsec_mp == NULL) { 5835 ip_drop_packet(*data_mp, B_TRUE, NULL, NULL, 5836 DROPPER(ipss, ipds_spd_got_clear), 5837 &ipss->ipsec_spd_dropper); 5838 *data_mp = NULL; 5839 return (B_FALSE); 5840 } 5841 ASSERT(((ipsec_in_t *)ipsec_mp->b_rptr)-> 5842 ipsec_in_secure); 5843 message = ipsec_fragcache_add(&itp->itp_fragcache, 5844 ipsec_mp, *data_mp, outer_hdr_len, ipss); 5845 5846 if (message == NULL) { 5847 /* 5848 * Data is cached, fragment chain is not 5849 * complete. I consume ipsec_mp and data_mp 5850 */ 5851 return (B_FALSE); 5852 } 5853 5854 /* 5855 * If we get here, we have a full fragment chain. 5856 * Reacquire headers and selectors from first fragment. 5857 */ 5858 if (inner_ipv4 != NULL) { 5859 inner_ipv4 = (ipha_t *)message->b_cont->b_rptr; 5860 ASSERT(message->b_cont->b_wptr - 5861 message->b_cont->b_rptr > sizeof (ipha_t)); 5862 } else { 5863 inner_ipv6 = (ip6_t *)message->b_cont->b_rptr; 5864 ASSERT(message->b_cont->b_wptr - 5865 message->b_cont->b_rptr > sizeof (ip6_t)); 5866 } 5867 /* Use SEL_NONE so we always get ports! */ 5868 rc = ipsec_init_inbound_sel(&sel, message->b_cont, 5869 inner_ipv4, inner_ipv6, SEL_NONE); 5870 switch (rc) { 5871 case SELRET_SUCCESS: 5872 /* 5873 * Get to same place as first caller's 5874 * SELRET_SUCCESS case. 5875 */ 5876 break; 5877 case SELRET_NOMEM: 5878 ip_drop_packet_chain(message, B_TRUE, 5879 NULL, NULL, 5880 DROPPER(ipss, ipds_spd_nomem), 5881 &ipss->ipsec_spd_dropper); 5882 return (B_FALSE); 5883 case SELRET_BADPKT: 5884 ip_drop_packet_chain(message, B_TRUE, 5885 NULL, NULL, 5886 DROPPER(ipss, ipds_spd_malformed_frag), 5887 &ipss->ipsec_spd_dropper); 5888 return (B_FALSE); 5889 case SELRET_TUNFRAG: 5890 cmn_err(CE_WARN, "(TUNFRAG on 2nd call...)"); 5891 /* FALLTHRU */ 5892 default: 5893 cmn_err(CE_WARN, "ipsec_init_inbound_sel(mark2)" 5894 " returns bizarro 0x%x", rc); 5895 /* Guaranteed panic! */ 5896 ASSERT(rc == SELRET_NOMEM); 5897 return (B_FALSE); 5898 } 5899 /* FALLTHRU */ 5900 case SELRET_SUCCESS: 5901 /* 5902 * Common case: 5903 * No per-port policy or a non-fragment. Keep going. 5904 */ 5905 break; 5906 case SELRET_BADPKT: 5907 /* 5908 * We may receive ICMP (with IPv6 inner) packets that 5909 * trigger this return value. Send 'em in for 5910 * enforcement checking. 5911 */ 5912 cmn_err(CE_NOTE, "ipsec_tun_inbound(): " 5913 "sending 'bad packet' in for enforcement"); 5914 break; 5915 default: 5916 cmn_err(CE_WARN, 5917 "ipsec_init_inbound_sel() returns bizarro 0x%x", 5918 rc); 5919 ASSERT(rc == SELRET_NOMEM); /* Guaranteed panic! */ 5920 return (B_FALSE); 5921 } 5922 5923 if (is_icmp) { 5924 /* 5925 * Swap local/remote because this is an ICMP packet. 5926 */ 5927 tmpaddr = sel.ips_local_addr_v6; 5928 sel.ips_local_addr_v6 = sel.ips_remote_addr_v6; 5929 sel.ips_remote_addr_v6 = tmpaddr; 5930 tmpport = sel.ips_local_port; 5931 sel.ips_local_port = sel.ips_remote_port; 5932 sel.ips_remote_port = tmpport; 5933 } 5934 5935 /* find_policy_head() */ 5936 rw_enter(&polhead->iph_lock, RW_READER); 5937 pol = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_INBOUND, 5938 &sel, ns); 5939 rw_exit(&polhead->iph_lock); 5940 if (pol != NULL) { 5941 if (ipsec_mp == NULL || 5942 !((ipsec_in_t *)ipsec_mp->b_rptr)-> 5943 ipsec_in_secure) { 5944 retval = pol->ipsp_act->ipa_allow_clear; 5945 if (!retval) { 5946 /* 5947 * XXX should never get here with 5948 * tunnel reassembled fragments? 5949 */ 5950 ASSERT(message->b_next == NULL); 5951 ip_drop_packet(message, B_TRUE, NULL, 5952 NULL, 5953 DROPPER(ipss, ipds_spd_got_clear), 5954 &ipss->ipsec_spd_dropper); 5955 } else if (ipsec_mp != NULL) { 5956 freeb(ipsec_mp); 5957 } 5958 5959 IPPOL_REFRELE(pol, ns); 5960 return (retval); 5961 } 5962 /* 5963 * NOTE: The following releases pol's reference and 5964 * calls ip_drop_packet() for me on NULL returns. 5965 * 5966 * "sel" is still good here, so let's use it! 5967 */ 5968 *data_mp = ipsec_check_ipsecin_policy_reasm(message, 5969 pol, inner_ipv4, inner_ipv6, SA_UNIQUE_ID( 5970 sel.ips_remote_port, sel.ips_local_port, 5971 (inner_ipv4 == NULL) ? IPPROTO_IPV6 : 5972 IPPROTO_ENCAP, sel.ips_protocol), ns); 5973 return (*data_mp != NULL); 5974 } 5975 5976 /* 5977 * Else fallthru and check the global policy on the outer 5978 * header(s) if this tunnel is an old-style transport-mode 5979 * one. Drop the packet explicitly (no policy entry) for 5980 * a new-style tunnel-mode tunnel. 5981 */ 5982 if ((itp->itp_flags & ITPF_P_TUNNEL) && !is_icmp) { 5983 ip_drop_packet_chain(message, B_TRUE, NULL, 5984 NULL, 5985 DROPPER(ipss, ipds_spd_explicit), 5986 &ipss->ipsec_spd_dropper); 5987 return (B_FALSE); 5988 } 5989 } 5990 5991 /* 5992 * NOTE: If we reach here, we will not have packet chains from 5993 * fragcache_add(), because the only way I get chains is on a 5994 * tunnel-mode tunnel, which either returns with a pass, or gets 5995 * hit by the ip_drop_packet_chain() call right above here. 5996 */ 5997 5998 /* If no per-tunnel security, check global policy now. */ 5999 if (ipsec_mp != NULL && !global_present) { 6000 if (((ipsec_in_t *)(ipsec_mp->b_rptr))-> 6001 ipsec_in_icmp_loopback) { 6002 /* 6003 * This is an ICMP message with an ipsec_mp 6004 * attached. We should accept it. 6005 */ 6006 if (ipsec_mp != NULL) 6007 freeb(ipsec_mp); 6008 return (B_TRUE); 6009 } 6010 6011 ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL, 6012 DROPPER(ipss, ipds_spd_got_secure), 6013 &ipss->ipsec_spd_dropper); 6014 return (B_FALSE); 6015 } 6016 6017 /* 6018 * The following assertion is valid because only the tun module alters 6019 * the mblk chain - stripping the outer header by advancing mp->b_rptr. 6020 */ 6021 ASSERT(is_icmp || 6022 ((*data_mp)->b_datap->db_base <= outer_hdr && 6023 outer_hdr < (*data_mp)->b_rptr)); 6024 holder = (*data_mp)->b_rptr; 6025 (*data_mp)->b_rptr = outer_hdr; 6026 6027 if (is_icmp) { 6028 /* 6029 * For ICMP packets, "outer_ipvN" is set to the outer header 6030 * that is *INSIDE* the ICMP payload. For global policy 6031 * checking, we need to reverse src/dst on the payload in 6032 * order to construct selectors appropriately. See "ripha" 6033 * constructions in ip.c. To avoid a bug like 6478464 (see 6034 * earlier in this file), we will actually exchange src/dst 6035 * in the packet, and reverse if after the call to 6036 * ipsec_check_global_policy(). 6037 */ 6038 if (outer_ipv4 != NULL) { 6039 tmp4 = outer_ipv4->ipha_src; 6040 outer_ipv4->ipha_src = outer_ipv4->ipha_dst; 6041 outer_ipv4->ipha_dst = tmp4; 6042 } else { 6043 ASSERT(outer_ipv6 != NULL); 6044 tmpaddr = outer_ipv6->ip6_src; 6045 outer_ipv6->ip6_src = outer_ipv6->ip6_dst; 6046 outer_ipv6->ip6_dst = tmpaddr; 6047 } 6048 } 6049 6050 /* NOTE: Frees message if it returns NULL. */ 6051 if (ipsec_check_global_policy(message, NULL, outer_ipv4, outer_ipv6, 6052 (ipsec_mp != NULL), ns) == NULL) { 6053 return (B_FALSE); 6054 } 6055 6056 if (is_icmp) { 6057 /* Set things back to normal. */ 6058 if (outer_ipv4 != NULL) { 6059 tmp4 = outer_ipv4->ipha_src; 6060 outer_ipv4->ipha_src = outer_ipv4->ipha_dst; 6061 outer_ipv4->ipha_dst = tmp4; 6062 } else { 6063 /* No need for ASSERT()s now. */ 6064 tmpaddr = outer_ipv6->ip6_src; 6065 outer_ipv6->ip6_src = outer_ipv6->ip6_dst; 6066 outer_ipv6->ip6_dst = tmpaddr; 6067 } 6068 } 6069 6070 (*data_mp)->b_rptr = holder; 6071 6072 if (ipsec_mp != NULL) 6073 freeb(ipsec_mp); 6074 6075 /* 6076 * At this point, we pretend it's a cleartext accepted 6077 * packet. 6078 */ 6079 return (B_TRUE); 6080 } 6081 6082 /* 6083 * AVL comparison routine for our list of tunnel polheads. 6084 */ 6085 static int 6086 tunnel_compare(const void *arg1, const void *arg2) 6087 { 6088 ipsec_tun_pol_t *left, *right; 6089 int rc; 6090 6091 left = (ipsec_tun_pol_t *)arg1; 6092 right = (ipsec_tun_pol_t *)arg2; 6093 6094 rc = strncmp(left->itp_name, right->itp_name, LIFNAMSIZ); 6095 return (rc == 0 ? rc : (rc > 0 ? 1 : -1)); 6096 } 6097 6098 /* 6099 * Free a tunnel policy node. 6100 */ 6101 void 6102 itp_free(ipsec_tun_pol_t *node, netstack_t *ns) 6103 { 6104 IPPH_REFRELE(node->itp_policy, ns); 6105 IPPH_REFRELE(node->itp_inactive, ns); 6106 mutex_destroy(&node->itp_lock); 6107 kmem_free(node, sizeof (*node)); 6108 } 6109 6110 void 6111 itp_unlink(ipsec_tun_pol_t *node, netstack_t *ns) 6112 { 6113 ipsec_stack_t *ipss = ns->netstack_ipsec; 6114 6115 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_WRITER); 6116 ipss->ipsec_tunnel_policy_gen++; 6117 ipsec_fragcache_uninit(&node->itp_fragcache); 6118 avl_remove(&ipss->ipsec_tunnel_policies, node); 6119 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6120 ITP_REFRELE(node, ns); 6121 } 6122 6123 /* 6124 * Public interface to look up a tunnel security policy by name. Used by 6125 * spdsock mostly. Returns "node" with a bumped refcnt. 6126 */ 6127 ipsec_tun_pol_t * 6128 get_tunnel_policy(char *name, netstack_t *ns) 6129 { 6130 ipsec_tun_pol_t *node, lookup; 6131 ipsec_stack_t *ipss = ns->netstack_ipsec; 6132 6133 (void) strncpy(lookup.itp_name, name, LIFNAMSIZ); 6134 6135 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_READER); 6136 node = (ipsec_tun_pol_t *)avl_find(&ipss->ipsec_tunnel_policies, 6137 &lookup, NULL); 6138 if (node != NULL) { 6139 ITP_REFHOLD(node); 6140 } 6141 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6142 6143 return (node); 6144 } 6145 6146 /* 6147 * Public interface to walk all tunnel security polcies. Useful for spdsock 6148 * DUMP operations. iterator() will not consume a reference. 6149 */ 6150 void 6151 itp_walk(void (*iterator)(ipsec_tun_pol_t *, void *, netstack_t *), 6152 void *arg, netstack_t *ns) 6153 { 6154 ipsec_tun_pol_t *node; 6155 ipsec_stack_t *ipss = ns->netstack_ipsec; 6156 6157 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_READER); 6158 for (node = avl_first(&ipss->ipsec_tunnel_policies); node != NULL; 6159 node = AVL_NEXT(&ipss->ipsec_tunnel_policies, node)) { 6160 iterator(node, arg, ns); 6161 } 6162 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6163 } 6164 6165 /* 6166 * Initialize policy head. This can only fail if there's a memory problem. 6167 */ 6168 static boolean_t 6169 tunnel_polhead_init(ipsec_policy_head_t *iph, netstack_t *ns) 6170 { 6171 ipsec_stack_t *ipss = ns->netstack_ipsec; 6172 6173 rw_init(&iph->iph_lock, NULL, RW_DEFAULT, NULL); 6174 iph->iph_refs = 1; 6175 iph->iph_gen = 0; 6176 if (ipsec_alloc_table(iph, ipss->ipsec_tun_spd_hashsize, 6177 KM_SLEEP, B_FALSE, ns) != 0) { 6178 ipsec_polhead_free_table(iph); 6179 return (B_FALSE); 6180 } 6181 ipsec_polhead_init(iph, ipss->ipsec_tun_spd_hashsize); 6182 return (B_TRUE); 6183 } 6184 6185 /* 6186 * Create a tunnel policy node with "name". Set errno with 6187 * ENOMEM if there's a memory problem, and EEXIST if there's an existing 6188 * node. 6189 */ 6190 ipsec_tun_pol_t * 6191 create_tunnel_policy(char *name, int *errno, uint64_t *gen, netstack_t *ns) 6192 { 6193 ipsec_tun_pol_t *newbie, *existing; 6194 avl_index_t where; 6195 ipsec_stack_t *ipss = ns->netstack_ipsec; 6196 6197 newbie = kmem_zalloc(sizeof (*newbie), KM_NOSLEEP); 6198 if (newbie == NULL) { 6199 *errno = ENOMEM; 6200 return (NULL); 6201 } 6202 if (!ipsec_fragcache_init(&newbie->itp_fragcache)) { 6203 kmem_free(newbie, sizeof (*newbie)); 6204 *errno = ENOMEM; 6205 return (NULL); 6206 } 6207 6208 (void) strncpy(newbie->itp_name, name, LIFNAMSIZ); 6209 6210 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_WRITER); 6211 existing = (ipsec_tun_pol_t *)avl_find(&ipss->ipsec_tunnel_policies, 6212 newbie, &where); 6213 if (existing != NULL) { 6214 itp_free(newbie, ns); 6215 *errno = EEXIST; 6216 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6217 return (NULL); 6218 } 6219 ipss->ipsec_tunnel_policy_gen++; 6220 *gen = ipss->ipsec_tunnel_policy_gen; 6221 newbie->itp_refcnt = 2; /* One for the caller, one for the tree. */ 6222 newbie->itp_next_policy_index = 1; 6223 avl_insert(&ipss->ipsec_tunnel_policies, newbie, where); 6224 mutex_init(&newbie->itp_lock, NULL, MUTEX_DEFAULT, NULL); 6225 newbie->itp_policy = kmem_zalloc(sizeof (ipsec_policy_head_t), 6226 KM_NOSLEEP); 6227 if (newbie->itp_policy == NULL) 6228 goto nomem; 6229 newbie->itp_inactive = kmem_zalloc(sizeof (ipsec_policy_head_t), 6230 KM_NOSLEEP); 6231 if (newbie->itp_inactive == NULL) { 6232 kmem_free(newbie->itp_policy, sizeof (ipsec_policy_head_t)); 6233 goto nomem; 6234 } 6235 6236 if (!tunnel_polhead_init(newbie->itp_policy, ns)) { 6237 kmem_free(newbie->itp_policy, sizeof (ipsec_policy_head_t)); 6238 kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t)); 6239 goto nomem; 6240 } else if (!tunnel_polhead_init(newbie->itp_inactive, ns)) { 6241 IPPH_REFRELE(newbie->itp_policy, ns); 6242 kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t)); 6243 goto nomem; 6244 } 6245 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6246 6247 return (newbie); 6248 nomem: 6249 *errno = ENOMEM; 6250 kmem_free(newbie, sizeof (*newbie)); 6251 return (NULL); 6252 } 6253 6254 /* 6255 * We can't call the tun_t lookup function until tun is 6256 * loaded, so create a dummy function to avoid symbol 6257 * lookup errors on boot. 6258 */ 6259 /* ARGSUSED */ 6260 ipsec_tun_pol_t * 6261 itp_get_byaddr_dummy(uint32_t *laddr, uint32_t *faddr, int af, netstack_t *ns) 6262 { 6263 return (NULL); /* Always return NULL. */ 6264 } 6265 6266 /* 6267 * Frag cache code, based on SunScreen 3.2 source 6268 * screen/kernel/common/screen_fragcache.c 6269 */ 6270 6271 #define IPSEC_FRAG_TTL_MAX 5 6272 /* 6273 * Note that the following parameters create 256 hash buckets 6274 * with 1024 free entries to be distributed. Things are cleaned 6275 * periodically and are attempted to be cleaned when there is no 6276 * free space, but this system errs on the side of dropping packets 6277 * over creating memory exhaustion. We may decide to make hash 6278 * factor a tunable if this proves to be a bad decision. 6279 */ 6280 #define IPSEC_FRAG_HASH_SLOTS (1<<8) 6281 #define IPSEC_FRAG_HASH_FACTOR 4 6282 #define IPSEC_FRAG_HASH_SIZE (IPSEC_FRAG_HASH_SLOTS * IPSEC_FRAG_HASH_FACTOR) 6283 6284 #define IPSEC_FRAG_HASH_MASK (IPSEC_FRAG_HASH_SLOTS - 1) 6285 #define IPSEC_FRAG_HASH_FUNC(id) (((id) & IPSEC_FRAG_HASH_MASK) ^ \ 6286 (((id) / \ 6287 (ushort_t)IPSEC_FRAG_HASH_SLOTS) & \ 6288 IPSEC_FRAG_HASH_MASK)) 6289 6290 /* Maximum fragments per packet. 48 bytes payload x 1366 packets > 64KB */ 6291 #define IPSEC_MAX_FRAGS 1366 6292 6293 #define V4_FRAG_OFFSET(ipha) ((ntohs(ipha->ipha_fragment_offset_and_flags) & \ 6294 IPH_OFFSET) << 3) 6295 #define V4_MORE_FRAGS(ipha) (ntohs(ipha->ipha_fragment_offset_and_flags) & \ 6296 IPH_MF) 6297 6298 /* 6299 * Initialize an ipsec fragcache instance. 6300 * Returns B_FALSE if memory allocation fails. 6301 */ 6302 boolean_t 6303 ipsec_fragcache_init(ipsec_fragcache_t *frag) 6304 { 6305 ipsec_fragcache_entry_t *ftemp; 6306 int i; 6307 6308 mutex_init(&frag->itpf_lock, NULL, MUTEX_DEFAULT, NULL); 6309 frag->itpf_ptr = (ipsec_fragcache_entry_t **) 6310 kmem_zalloc( 6311 sizeof (ipsec_fragcache_entry_t *) * 6312 IPSEC_FRAG_HASH_SLOTS, KM_NOSLEEP); 6313 if (frag->itpf_ptr == NULL) 6314 return (B_FALSE); 6315 6316 ftemp = (ipsec_fragcache_entry_t *) 6317 kmem_zalloc(sizeof (ipsec_fragcache_entry_t) * 6318 IPSEC_FRAG_HASH_SIZE, KM_NOSLEEP); 6319 if (ftemp == NULL) { 6320 kmem_free(frag->itpf_ptr, 6321 sizeof (ipsec_fragcache_entry_t *) * 6322 IPSEC_FRAG_HASH_SLOTS); 6323 return (B_FALSE); 6324 } 6325 6326 frag->itpf_freelist = NULL; 6327 6328 for (i = 0; i < IPSEC_FRAG_HASH_SIZE; i++) { 6329 ftemp->itpfe_next = frag->itpf_freelist; 6330 frag->itpf_freelist = ftemp; 6331 ftemp++; 6332 } 6333 6334 frag->itpf_expire_hint = 0; 6335 6336 return (B_TRUE); 6337 } 6338 6339 void 6340 ipsec_fragcache_uninit(ipsec_fragcache_t *frag) 6341 { 6342 ipsec_fragcache_entry_t *fep; 6343 int i; 6344 6345 mutex_enter(&frag->itpf_lock); 6346 if (frag->itpf_ptr) { 6347 /* Delete any existing fragcache entry chains */ 6348 for (i = 0; i < IPSEC_FRAG_HASH_SLOTS; i++) { 6349 fep = (frag->itpf_ptr)[i]; 6350 while (fep != NULL) { 6351 /* Returned fep is next in chain or NULL */ 6352 fep = fragcache_delentry(i, fep, frag); 6353 } 6354 } 6355 /* 6356 * Chase the pointers back to the beginning 6357 * of the memory allocation and then 6358 * get rid of the allocated freelist 6359 */ 6360 while (frag->itpf_freelist->itpfe_next != NULL) 6361 frag->itpf_freelist = frag->itpf_freelist->itpfe_next; 6362 /* 6363 * XXX - If we ever dynamically grow the freelist 6364 * then we'll have to free entries individually 6365 * or determine how many entries or chunks we have 6366 * grown since the initial allocation. 6367 */ 6368 kmem_free(frag->itpf_freelist, 6369 sizeof (ipsec_fragcache_entry_t) * 6370 IPSEC_FRAG_HASH_SIZE); 6371 /* Free the fragcache structure */ 6372 kmem_free(frag->itpf_ptr, 6373 sizeof (ipsec_fragcache_entry_t *) * 6374 IPSEC_FRAG_HASH_SLOTS); 6375 } 6376 mutex_exit(&frag->itpf_lock); 6377 mutex_destroy(&frag->itpf_lock); 6378 } 6379 6380 /* 6381 * Add a fragment to the fragment cache. Consumes mp if NULL is returned. 6382 * Returns mp if a whole fragment has been assembled, NULL otherwise 6383 */ 6384 6385 mblk_t * 6386 ipsec_fragcache_add(ipsec_fragcache_t *frag, mblk_t *ipsec_mp, mblk_t *mp, 6387 int outer_hdr_len, ipsec_stack_t *ipss) 6388 { 6389 boolean_t is_v4; 6390 time_t itpf_time; 6391 ipha_t *iph; 6392 ipha_t *oiph; 6393 ip6_t *ip6h = NULL; 6394 uint8_t v6_proto; 6395 uint8_t *v6_proto_p; 6396 uint16_t ip6_hdr_length; 6397 ip6_pkt_t ipp; 6398 ip6_frag_t *fraghdr; 6399 ipsec_fragcache_entry_t *fep; 6400 int i; 6401 mblk_t *nmp, *prevmp, *spare_mp = NULL; 6402 int firstbyte, lastbyte; 6403 int offset; 6404 int last; 6405 boolean_t inbound = (ipsec_mp != NULL); 6406 mblk_t *first_mp = inbound ? ipsec_mp : mp; 6407 6408 mutex_enter(&frag->itpf_lock); 6409 6410 oiph = (ipha_t *)mp->b_rptr; 6411 iph = (ipha_t *)(mp->b_rptr + outer_hdr_len); 6412 if (IPH_HDR_VERSION(iph) == IPV4_VERSION) { 6413 is_v4 = B_TRUE; 6414 } else { 6415 ASSERT(IPH_HDR_VERSION(iph) == IPV6_VERSION); 6416 if ((spare_mp = msgpullup(mp, -1)) == NULL) { 6417 mutex_exit(&frag->itpf_lock); 6418 ip_drop_packet(first_mp, inbound, NULL, NULL, 6419 DROPPER(ipss, ipds_spd_nomem), 6420 &ipss->ipsec_spd_dropper); 6421 return (NULL); 6422 } 6423 ip6h = (ip6_t *)(spare_mp->b_rptr + outer_hdr_len); 6424 6425 if (!ip_hdr_length_nexthdr_v6(spare_mp, ip6h, &ip6_hdr_length, 6426 &v6_proto_p)) { 6427 /* 6428 * Find upper layer protocol. 6429 * If it fails we have a malformed packet 6430 */ 6431 mutex_exit(&frag->itpf_lock); 6432 ip_drop_packet(first_mp, inbound, NULL, NULL, 6433 DROPPER(ipss, ipds_spd_malformed_packet), 6434 &ipss->ipsec_spd_dropper); 6435 freemsg(spare_mp); 6436 return (NULL); 6437 } else { 6438 v6_proto = *v6_proto_p; 6439 } 6440 6441 6442 bzero(&ipp, sizeof (ipp)); 6443 (void) ip_find_hdr_v6(spare_mp, ip6h, &ipp, NULL); 6444 if (!(ipp.ipp_fields & IPPF_FRAGHDR)) { 6445 /* 6446 * We think this is a fragment, but didn't find 6447 * a fragment header. Something is wrong. 6448 */ 6449 mutex_exit(&frag->itpf_lock); 6450 ip_drop_packet(first_mp, inbound, NULL, NULL, 6451 DROPPER(ipss, ipds_spd_malformed_frag), 6452 &ipss->ipsec_spd_dropper); 6453 freemsg(spare_mp); 6454 return (NULL); 6455 } 6456 fraghdr = ipp.ipp_fraghdr; 6457 is_v4 = B_FALSE; 6458 } 6459 6460 /* Anything to cleanup? */ 6461 6462 /* 6463 * This cleanup call could be put in a timer loop 6464 * but it may actually be just as reasonable a decision to 6465 * leave it here. The disadvantage is this only gets called when 6466 * frags are added. The advantage is that it is not 6467 * susceptible to race conditions like a time-based cleanup 6468 * may be. 6469 */ 6470 itpf_time = gethrestime_sec(); 6471 if (itpf_time >= frag->itpf_expire_hint) 6472 ipsec_fragcache_clean(frag); 6473 6474 /* Lookup to see if there is an existing entry */ 6475 6476 if (is_v4) 6477 i = IPSEC_FRAG_HASH_FUNC(iph->ipha_ident); 6478 else 6479 i = IPSEC_FRAG_HASH_FUNC(fraghdr->ip6f_ident); 6480 6481 for (fep = (frag->itpf_ptr)[i]; fep; fep = fep->itpfe_next) { 6482 if (is_v4) { 6483 ASSERT(iph != NULL); 6484 if ((fep->itpfe_id == iph->ipha_ident) && 6485 (fep->itpfe_src == iph->ipha_src) && 6486 (fep->itpfe_dst == iph->ipha_dst) && 6487 (fep->itpfe_proto == iph->ipha_protocol)) 6488 break; 6489 } else { 6490 ASSERT(fraghdr != NULL); 6491 ASSERT(fep != NULL); 6492 if ((fep->itpfe_id == fraghdr->ip6f_ident) && 6493 IN6_ARE_ADDR_EQUAL(&fep->itpfe_src6, 6494 &ip6h->ip6_src) && 6495 IN6_ARE_ADDR_EQUAL(&fep->itpfe_dst6, 6496 &ip6h->ip6_dst) && (fep->itpfe_proto == v6_proto)) 6497 break; 6498 } 6499 } 6500 6501 if (is_v4) { 6502 firstbyte = V4_FRAG_OFFSET(iph); 6503 lastbyte = firstbyte + ntohs(iph->ipha_length) - 6504 IPH_HDR_LENGTH(iph); 6505 last = (V4_MORE_FRAGS(iph) == 0); 6506 #ifdef FRAGCACHE_DEBUG 6507 cmn_err(CE_WARN, "V4 fragcache: firstbyte = %d, lastbyte = %d, " 6508 "last = %d, id = %d\n", firstbyte, lastbyte, last, 6509 iph->ipha_ident); 6510 #endif 6511 } else { 6512 firstbyte = ntohs(fraghdr->ip6f_offlg & IP6F_OFF_MASK); 6513 lastbyte = firstbyte + ntohs(ip6h->ip6_plen) + 6514 sizeof (ip6_t) - ip6_hdr_length; 6515 last = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG) == 0; 6516 #ifdef FRAGCACHE_DEBUG 6517 cmn_err(CE_WARN, "V6 fragcache: firstbyte = %d, lastbyte = %d, " 6518 "last = %d, id = %d, fraghdr = %p, spare_mp = %p\n", 6519 firstbyte, lastbyte, last, fraghdr->ip6f_ident, 6520 fraghdr, spare_mp); 6521 #endif 6522 } 6523 6524 /* check for bogus fragments and delete the entry */ 6525 if (firstbyte > 0 && firstbyte <= 8) { 6526 if (fep != NULL) 6527 (void) fragcache_delentry(i, fep, frag); 6528 mutex_exit(&frag->itpf_lock); 6529 ip_drop_packet(first_mp, inbound, NULL, NULL, 6530 DROPPER(ipss, ipds_spd_malformed_frag), 6531 &ipss->ipsec_spd_dropper); 6532 freemsg(spare_mp); 6533 return (NULL); 6534 } 6535 6536 /* Not found, allocate a new entry */ 6537 if (fep == NULL) { 6538 if (frag->itpf_freelist == NULL) { 6539 /* see if there is some space */ 6540 ipsec_fragcache_clean(frag); 6541 if (frag->itpf_freelist == NULL) { 6542 mutex_exit(&frag->itpf_lock); 6543 ip_drop_packet(first_mp, inbound, NULL, NULL, 6544 DROPPER(ipss, ipds_spd_nomem), 6545 &ipss->ipsec_spd_dropper); 6546 freemsg(spare_mp); 6547 return (NULL); 6548 } 6549 } 6550 6551 fep = frag->itpf_freelist; 6552 frag->itpf_freelist = fep->itpfe_next; 6553 6554 if (is_v4) { 6555 bcopy((caddr_t)&iph->ipha_src, (caddr_t)&fep->itpfe_src, 6556 sizeof (struct in_addr)); 6557 bcopy((caddr_t)&iph->ipha_dst, (caddr_t)&fep->itpfe_dst, 6558 sizeof (struct in_addr)); 6559 fep->itpfe_id = iph->ipha_ident; 6560 fep->itpfe_proto = iph->ipha_protocol; 6561 i = IPSEC_FRAG_HASH_FUNC(fep->itpfe_id); 6562 } else { 6563 bcopy((in6_addr_t *)&ip6h->ip6_src, 6564 (in6_addr_t *)&fep->itpfe_src6, 6565 sizeof (struct in6_addr)); 6566 bcopy((in6_addr_t *)&ip6h->ip6_dst, 6567 (in6_addr_t *)&fep->itpfe_dst6, 6568 sizeof (struct in6_addr)); 6569 fep->itpfe_id = fraghdr->ip6f_ident; 6570 fep->itpfe_proto = v6_proto; 6571 i = IPSEC_FRAG_HASH_FUNC(fep->itpfe_id); 6572 } 6573 itpf_time = gethrestime_sec(); 6574 fep->itpfe_exp = itpf_time + IPSEC_FRAG_TTL_MAX + 1; 6575 fep->itpfe_last = 0; 6576 fep->itpfe_fraglist = NULL; 6577 fep->itpfe_depth = 0; 6578 fep->itpfe_next = (frag->itpf_ptr)[i]; 6579 (frag->itpf_ptr)[i] = fep; 6580 6581 if (frag->itpf_expire_hint > fep->itpfe_exp) 6582 frag->itpf_expire_hint = fep->itpfe_exp; 6583 6584 } 6585 freemsg(spare_mp); 6586 6587 /* Insert it in the frag list */ 6588 /* List is in order by starting offset of fragments */ 6589 6590 prevmp = NULL; 6591 for (nmp = fep->itpfe_fraglist; nmp; nmp = nmp->b_next) { 6592 ipha_t *niph; 6593 ipha_t *oniph; 6594 ip6_t *nip6h; 6595 ip6_pkt_t nipp; 6596 ip6_frag_t *nfraghdr; 6597 uint16_t nip6_hdr_length; 6598 uint8_t *nv6_proto_p; 6599 int nfirstbyte, nlastbyte; 6600 char *data, *ndata; 6601 mblk_t *nspare_mp = NULL; 6602 mblk_t *ndata_mp = (inbound ? nmp->b_cont : nmp); 6603 int hdr_len; 6604 6605 oniph = (ipha_t *)mp->b_rptr; 6606 nip6h = NULL; 6607 niph = NULL; 6608 6609 /* 6610 * Determine outer header type and length and set 6611 * pointers appropriately 6612 */ 6613 6614 if (IPH_HDR_VERSION(oniph) == IPV4_VERSION) { 6615 hdr_len = ((outer_hdr_len != 0) ? 6616 IPH_HDR_LENGTH(oiph) : 0); 6617 niph = (ipha_t *)(ndata_mp->b_rptr + hdr_len); 6618 } else { 6619 ASSERT(IPH_HDR_VERSION(oniph) == IPV6_VERSION); 6620 if ((nspare_mp = msgpullup(ndata_mp, -1)) == NULL) { 6621 mutex_exit(&frag->itpf_lock); 6622 ip_drop_packet_chain(nmp, inbound, NULL, NULL, 6623 DROPPER(ipss, ipds_spd_nomem), 6624 &ipss->ipsec_spd_dropper); 6625 return (NULL); 6626 } 6627 nip6h = (ip6_t *)nspare_mp->b_rptr; 6628 (void) ip_hdr_length_nexthdr_v6(nspare_mp, nip6h, 6629 &nip6_hdr_length, &v6_proto_p); 6630 hdr_len = ((outer_hdr_len != 0) ? nip6_hdr_length : 0); 6631 } 6632 6633 /* 6634 * Determine inner header type and length and set 6635 * pointers appropriately 6636 */ 6637 6638 if (is_v4) { 6639 if (niph == NULL) { 6640 /* Was v6 outer */ 6641 niph = (ipha_t *)(ndata_mp->b_rptr + hdr_len); 6642 } 6643 nfirstbyte = V4_FRAG_OFFSET(niph); 6644 nlastbyte = nfirstbyte + ntohs(niph->ipha_length) - 6645 IPH_HDR_LENGTH(niph); 6646 } else { 6647 if ((nspare_mp == NULL) && 6648 ((nspare_mp = msgpullup(ndata_mp, -1)) == NULL)) { 6649 mutex_exit(&frag->itpf_lock); 6650 ip_drop_packet_chain(nmp, inbound, NULL, NULL, 6651 DROPPER(ipss, ipds_spd_nomem), 6652 &ipss->ipsec_spd_dropper); 6653 return (NULL); 6654 } 6655 nip6h = (ip6_t *)(nspare_mp->b_rptr + hdr_len); 6656 if (!ip_hdr_length_nexthdr_v6(nspare_mp, nip6h, 6657 &nip6_hdr_length, &nv6_proto_p)) { 6658 mutex_exit(&frag->itpf_lock); 6659 ip_drop_packet_chain(nmp, inbound, NULL, NULL, 6660 DROPPER(ipss, ipds_spd_malformed_frag), 6661 &ipss->ipsec_spd_dropper); 6662 ipsec_freemsg_chain(nspare_mp); 6663 return (NULL); 6664 } 6665 bzero(&nipp, sizeof (nipp)); 6666 (void) ip_find_hdr_v6(nspare_mp, nip6h, &nipp, NULL); 6667 nfraghdr = nipp.ipp_fraghdr; 6668 nfirstbyte = ntohs(nfraghdr->ip6f_offlg & 6669 IP6F_OFF_MASK); 6670 nlastbyte = nfirstbyte + ntohs(nip6h->ip6_plen) + 6671 sizeof (ip6_t) - nip6_hdr_length; 6672 } 6673 ipsec_freemsg_chain(nspare_mp); 6674 6675 /* Check for overlapping fragments */ 6676 if (firstbyte >= nfirstbyte && firstbyte < nlastbyte) { 6677 /* 6678 * Overlap Check: 6679 * ~~~~--------- # Check if the newly 6680 * ~ ndata_mp| # received fragment 6681 * ~~~~--------- # overlaps with the 6682 * ---------~~~~~~ # current fragment. 6683 * | mp ~ 6684 * ---------~~~~~~ 6685 */ 6686 if (is_v4) { 6687 data = (char *)iph + IPH_HDR_LENGTH(iph) + 6688 firstbyte - nfirstbyte; 6689 ndata = (char *)niph + IPH_HDR_LENGTH(niph); 6690 } else { 6691 data = (char *)ip6h + 6692 nip6_hdr_length + firstbyte - 6693 nfirstbyte; 6694 ndata = (char *)nip6h + nip6_hdr_length; 6695 } 6696 if (bcmp(data, ndata, MIN(lastbyte, nlastbyte) 6697 - firstbyte)) { 6698 /* Overlapping data does not match */ 6699 (void) fragcache_delentry(i, fep, frag); 6700 mutex_exit(&frag->itpf_lock); 6701 ip_drop_packet(first_mp, inbound, NULL, NULL, 6702 DROPPER(ipss, ipds_spd_overlap_frag), 6703 &ipss->ipsec_spd_dropper); 6704 return (NULL); 6705 } 6706 /* Part of defense for jolt2.c fragmentation attack */ 6707 if (firstbyte >= nfirstbyte && lastbyte <= nlastbyte) { 6708 /* 6709 * Check for identical or subset fragments: 6710 * ---------- ~~~~--------~~~~~ 6711 * | nmp | or ~ nmp ~ 6712 * ---------- ~~~~--------~~~~~ 6713 * ---------- ------ 6714 * | mp | | mp | 6715 * ---------- ------ 6716 */ 6717 mutex_exit(&frag->itpf_lock); 6718 ip_drop_packet(first_mp, inbound, NULL, NULL, 6719 DROPPER(ipss, ipds_spd_evil_frag), 6720 &ipss->ipsec_spd_dropper); 6721 return (NULL); 6722 } 6723 6724 } 6725 6726 /* Correct location for this fragment? */ 6727 if (firstbyte <= nfirstbyte) { 6728 /* 6729 * Check if the tail end of the new fragment overlaps 6730 * with the head of the current fragment. 6731 * --------~~~~~~~ 6732 * | nmp ~ 6733 * --------~~~~~~~ 6734 * ~~~~~-------- 6735 * ~ mp | 6736 * ~~~~~-------- 6737 */ 6738 if (lastbyte > nfirstbyte) { 6739 /* Fragments overlap */ 6740 data = (char *)iph + IPH_HDR_LENGTH(iph) + 6741 firstbyte - nfirstbyte; 6742 ndata = (char *)niph + IPH_HDR_LENGTH(niph); 6743 if (is_v4) { 6744 data = (char *)iph + 6745 IPH_HDR_LENGTH(iph) + firstbyte - 6746 nfirstbyte; 6747 ndata = (char *)niph + 6748 IPH_HDR_LENGTH(niph); 6749 } else { 6750 data = (char *)ip6h + 6751 nip6_hdr_length + firstbyte - 6752 nfirstbyte; 6753 ndata = (char *)nip6h + nip6_hdr_length; 6754 } 6755 if (bcmp(data, ndata, MIN(lastbyte, nlastbyte) 6756 - nfirstbyte)) { 6757 /* Overlap mismatch */ 6758 (void) fragcache_delentry(i, fep, frag); 6759 mutex_exit(&frag->itpf_lock); 6760 ip_drop_packet(first_mp, inbound, NULL, 6761 NULL, 6762 DROPPER(ipss, ipds_spd_overlap_frag), 6763 &ipss->ipsec_spd_dropper); 6764 return (NULL); 6765 } 6766 } 6767 6768 /* 6769 * Fragment does not illegally overlap and can now 6770 * be inserted into the chain 6771 */ 6772 break; 6773 } 6774 6775 prevmp = nmp; 6776 } 6777 first_mp->b_next = nmp; 6778 6779 if (prevmp == NULL) { 6780 fep->itpfe_fraglist = first_mp; 6781 } else { 6782 prevmp->b_next = first_mp; 6783 } 6784 if (last) 6785 fep->itpfe_last = 1; 6786 6787 /* Part of defense for jolt2.c fragmentation attack */ 6788 if (++(fep->itpfe_depth) > IPSEC_MAX_FRAGS) { 6789 (void) fragcache_delentry(i, fep, frag); 6790 mutex_exit(&frag->itpf_lock); 6791 ip_drop_packet(first_mp, inbound, NULL, NULL, 6792 DROPPER(ipss, ipds_spd_max_frags), 6793 &ipss->ipsec_spd_dropper); 6794 return (NULL); 6795 } 6796 6797 /* Check for complete packet */ 6798 6799 if (!fep->itpfe_last) { 6800 mutex_exit(&frag->itpf_lock); 6801 #ifdef FRAGCACHE_DEBUG 6802 cmn_err(CE_WARN, "Fragment cached, not last.\n"); 6803 #endif 6804 return (NULL); 6805 } 6806 6807 #ifdef FRAGCACHE_DEBUG 6808 cmn_err(CE_WARN, "Last fragment cached.\n"); 6809 cmn_err(CE_WARN, "mp = %p, first_mp = %p.\n", mp, first_mp); 6810 #endif 6811 6812 offset = 0; 6813 for (mp = fep->itpfe_fraglist; mp; mp = mp->b_next) { 6814 mblk_t *data_mp = (inbound ? mp->b_cont : mp); 6815 int hdr_len; 6816 6817 oiph = (ipha_t *)data_mp->b_rptr; 6818 ip6h = NULL; 6819 iph = NULL; 6820 6821 spare_mp = NULL; 6822 if (IPH_HDR_VERSION(oiph) == IPV4_VERSION) { 6823 hdr_len = ((outer_hdr_len != 0) ? 6824 IPH_HDR_LENGTH(oiph) : 0); 6825 iph = (ipha_t *)(data_mp->b_rptr + hdr_len); 6826 } else { 6827 ASSERT(IPH_HDR_VERSION(oiph) == IPV6_VERSION); 6828 if ((spare_mp = msgpullup(data_mp, -1)) == NULL) { 6829 mutex_exit(&frag->itpf_lock); 6830 ip_drop_packet_chain(mp, inbound, NULL, NULL, 6831 DROPPER(ipss, ipds_spd_nomem), 6832 &ipss->ipsec_spd_dropper); 6833 return (NULL); 6834 } 6835 ip6h = (ip6_t *)spare_mp->b_rptr; 6836 (void) ip_hdr_length_nexthdr_v6(spare_mp, ip6h, 6837 &ip6_hdr_length, &v6_proto_p); 6838 hdr_len = ((outer_hdr_len != 0) ? ip6_hdr_length : 0); 6839 } 6840 6841 /* Calculate current fragment start/end */ 6842 if (is_v4) { 6843 if (iph == NULL) { 6844 /* Was v6 outer */ 6845 iph = (ipha_t *)(data_mp->b_rptr + hdr_len); 6846 } 6847 firstbyte = V4_FRAG_OFFSET(iph); 6848 lastbyte = firstbyte + ntohs(iph->ipha_length) - 6849 IPH_HDR_LENGTH(iph); 6850 } else { 6851 if ((spare_mp == NULL) && 6852 ((spare_mp = msgpullup(data_mp, -1)) == NULL)) { 6853 mutex_exit(&frag->itpf_lock); 6854 ip_drop_packet_chain(mp, inbound, NULL, NULL, 6855 DROPPER(ipss, ipds_spd_nomem), 6856 &ipss->ipsec_spd_dropper); 6857 return (NULL); 6858 } 6859 ip6h = (ip6_t *)(spare_mp->b_rptr + hdr_len); 6860 if (!ip_hdr_length_nexthdr_v6(spare_mp, ip6h, 6861 &ip6_hdr_length, &v6_proto_p)) { 6862 mutex_exit(&frag->itpf_lock); 6863 ip_drop_packet_chain(mp, inbound, NULL, NULL, 6864 DROPPER(ipss, ipds_spd_malformed_frag), 6865 &ipss->ipsec_spd_dropper); 6866 ipsec_freemsg_chain(spare_mp); 6867 return (NULL); 6868 } 6869 v6_proto = *v6_proto_p; 6870 bzero(&ipp, sizeof (ipp)); 6871 (void) ip_find_hdr_v6(spare_mp, ip6h, &ipp, NULL); 6872 fraghdr = ipp.ipp_fraghdr; 6873 firstbyte = ntohs(fraghdr->ip6f_offlg & 6874 IP6F_OFF_MASK); 6875 lastbyte = firstbyte + ntohs(ip6h->ip6_plen) + 6876 sizeof (ip6_t) - ip6_hdr_length; 6877 } 6878 6879 /* 6880 * If this fragment is greater than current offset, 6881 * we have a missing fragment so return NULL 6882 */ 6883 if (firstbyte > offset) { 6884 mutex_exit(&frag->itpf_lock); 6885 #ifdef FRAGCACHE_DEBUG 6886 /* 6887 * Note, this can happen when the last frag 6888 * gets sent through because it is smaller 6889 * than the MTU. It is not necessarily an 6890 * error condition. 6891 */ 6892 cmn_err(CE_WARN, "Frag greater than offset! : " 6893 "missing fragment: firstbyte = %d, offset = %d, " 6894 "mp = %p\n", firstbyte, offset, mp); 6895 #endif 6896 ipsec_freemsg_chain(spare_mp); 6897 return (NULL); 6898 } 6899 6900 /* 6901 * If we are at the last fragment, we have the complete 6902 * packet, so rechain things and return it to caller 6903 * for processing 6904 */ 6905 6906 if ((is_v4 && !V4_MORE_FRAGS(iph)) || 6907 (!is_v4 && !(fraghdr->ip6f_offlg & IP6F_MORE_FRAG))) { 6908 mp = fep->itpfe_fraglist; 6909 fep->itpfe_fraglist = NULL; 6910 (void) fragcache_delentry(i, fep, frag); 6911 mutex_exit(&frag->itpf_lock); 6912 6913 if ((is_v4 && (firstbyte + ntohs(iph->ipha_length) > 6914 65535)) || (!is_v4 && (firstbyte + 6915 ntohs(ip6h->ip6_plen) > 65535))) { 6916 /* It is an invalid "ping-o-death" packet */ 6917 /* Discard it */ 6918 ip_drop_packet_chain(mp, inbound, NULL, NULL, 6919 DROPPER(ipss, ipds_spd_evil_frag), 6920 &ipss->ipsec_spd_dropper); 6921 ipsec_freemsg_chain(spare_mp); 6922 return (NULL); 6923 } 6924 #ifdef FRAGCACHE_DEBUG 6925 cmn_err(CE_WARN, "Fragcache returning mp = %p, " 6926 "mp->b_next = %p", mp, mp->b_next); 6927 #endif 6928 ipsec_freemsg_chain(spare_mp); 6929 /* 6930 * For inbound case, mp has ipsec_in b_next'd chain 6931 * For outbound case, it is just data mp chain 6932 */ 6933 return (mp); 6934 } 6935 ipsec_freemsg_chain(spare_mp); 6936 6937 /* 6938 * Update new ending offset if this 6939 * fragment extends the packet 6940 */ 6941 if (offset < lastbyte) 6942 offset = lastbyte; 6943 } 6944 6945 mutex_exit(&frag->itpf_lock); 6946 6947 /* Didn't find last fragment, so return NULL */ 6948 return (NULL); 6949 } 6950 6951 static void 6952 ipsec_fragcache_clean(ipsec_fragcache_t *frag) 6953 { 6954 ipsec_fragcache_entry_t *fep; 6955 int i; 6956 ipsec_fragcache_entry_t *earlyfep = NULL; 6957 time_t itpf_time; 6958 int earlyexp; 6959 int earlyi = 0; 6960 6961 ASSERT(MUTEX_HELD(&frag->itpf_lock)); 6962 6963 itpf_time = gethrestime_sec(); 6964 earlyexp = itpf_time + 10000; 6965 6966 for (i = 0; i < IPSEC_FRAG_HASH_SLOTS; i++) { 6967 fep = (frag->itpf_ptr)[i]; 6968 while (fep) { 6969 if (fep->itpfe_exp < itpf_time) { 6970 /* found */ 6971 fep = fragcache_delentry(i, fep, frag); 6972 } else { 6973 if (fep->itpfe_exp < earlyexp) { 6974 earlyfep = fep; 6975 earlyexp = fep->itpfe_exp; 6976 earlyi = i; 6977 } 6978 fep = fep->itpfe_next; 6979 } 6980 } 6981 } 6982 6983 frag->itpf_expire_hint = earlyexp; 6984 6985 /* if (!found) */ 6986 if (frag->itpf_freelist == NULL) 6987 (void) fragcache_delentry(earlyi, earlyfep, frag); 6988 } 6989 6990 static ipsec_fragcache_entry_t * 6991 fragcache_delentry(int slot, ipsec_fragcache_entry_t *fep, 6992 ipsec_fragcache_t *frag) 6993 { 6994 ipsec_fragcache_entry_t *targp; 6995 ipsec_fragcache_entry_t *nextp = fep->itpfe_next; 6996 6997 ASSERT(MUTEX_HELD(&frag->itpf_lock)); 6998 6999 /* Free up any fragment list still in cache entry */ 7000 ipsec_freemsg_chain(fep->itpfe_fraglist); 7001 7002 targp = (frag->itpf_ptr)[slot]; 7003 ASSERT(targp != 0); 7004 7005 if (targp == fep) { 7006 /* unlink from head of hash chain */ 7007 (frag->itpf_ptr)[slot] = nextp; 7008 /* link into free list */ 7009 fep->itpfe_next = frag->itpf_freelist; 7010 frag->itpf_freelist = fep; 7011 return (nextp); 7012 } 7013 7014 /* maybe should use double linked list to make update faster */ 7015 /* must be past front of chain */ 7016 while (targp) { 7017 if (targp->itpfe_next == fep) { 7018 /* unlink from hash chain */ 7019 targp->itpfe_next = nextp; 7020 /* link into free list */ 7021 fep->itpfe_next = frag->itpf_freelist; 7022 frag->itpf_freelist = fep; 7023 return (nextp); 7024 } 7025 targp = targp->itpfe_next; 7026 ASSERT(targp != 0); 7027 } 7028 /* NOTREACHED */ 7029 return (NULL); 7030 } 7031