1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "@(#)spd.c 1.61 08/07/15 SMI" 27 28 /* 29 * IPsec Security Policy Database. 30 * 31 * This module maintains the SPD and provides routines used by ip and ip6 32 * to apply IPsec policy to inbound and outbound datagrams. 33 */ 34 35 #include <sys/types.h> 36 #include <sys/stream.h> 37 #include <sys/stropts.h> 38 #include <sys/sysmacros.h> 39 #include <sys/strsubr.h> 40 #include <sys/strlog.h> 41 #include <sys/cmn_err.h> 42 #include <sys/zone.h> 43 44 #include <sys/systm.h> 45 #include <sys/param.h> 46 #include <sys/kmem.h> 47 #include <sys/ddi.h> 48 49 #include <sys/crypto/api.h> 50 51 #include <inet/common.h> 52 #include <inet/mi.h> 53 54 #include <netinet/ip6.h> 55 #include <netinet/icmp6.h> 56 #include <netinet/udp.h> 57 58 #include <inet/ip.h> 59 #include <inet/ip6.h> 60 61 #include <net/pfkeyv2.h> 62 #include <net/pfpolicy.h> 63 #include <inet/ipsec_info.h> 64 #include <inet/sadb.h> 65 #include <inet/ipsec_impl.h> 66 67 #include <inet/ip_impl.h> /* For IP_MOD_ID */ 68 69 #include <inet/ipsecah.h> 70 #include <inet/ipsecesp.h> 71 #include <inet/ipdrop.h> 72 #include <inet/ipclassifier.h> 73 #include <inet/tun.h> 74 75 static void ipsec_update_present_flags(ipsec_stack_t *); 76 static ipsec_act_t *ipsec_act_wildcard_expand(ipsec_act_t *, uint_t *, 77 netstack_t *); 78 static void ipsec_out_free(void *); 79 static void ipsec_in_free(void *); 80 static mblk_t *ipsec_attach_global_policy(mblk_t **, conn_t *, 81 ipsec_selector_t *, netstack_t *); 82 static mblk_t *ipsec_apply_global_policy(mblk_t *, conn_t *, 83 ipsec_selector_t *, netstack_t *); 84 static mblk_t *ipsec_check_ipsecin_policy(mblk_t *, ipsec_policy_t *, 85 ipha_t *, ip6_t *, uint64_t, netstack_t *); 86 static void ipsec_in_release_refs(ipsec_in_t *); 87 static void ipsec_out_release_refs(ipsec_out_t *); 88 static void ipsec_action_free_table(ipsec_action_t *); 89 static void ipsec_action_reclaim(void *); 90 static void ipsec_action_reclaim_stack(netstack_t *); 91 static void ipsid_init(netstack_t *); 92 static void ipsid_fini(netstack_t *); 93 94 /* sel_flags values for ipsec_init_inbound_sel(). */ 95 #define SEL_NONE 0x0000 96 #define SEL_PORT_POLICY 0x0001 97 #define SEL_IS_ICMP 0x0002 98 #define SEL_TUNNEL_MODE 0x0004 99 100 /* Return values for ipsec_init_inbound_sel(). */ 101 typedef enum { SELRET_NOMEM, SELRET_BADPKT, SELRET_SUCCESS, SELRET_TUNFRAG} 102 selret_t; 103 104 static selret_t ipsec_init_inbound_sel(ipsec_selector_t *, mblk_t *, 105 ipha_t *, ip6_t *, uint8_t); 106 107 static boolean_t ipsec_check_ipsecin_action(struct ipsec_in_s *, mblk_t *, 108 struct ipsec_action_s *, ipha_t *ipha, ip6_t *ip6h, const char **, 109 kstat_named_t **); 110 static void ipsec_unregister_prov_update(void); 111 static void ipsec_prov_update_callback_stack(uint32_t, void *, netstack_t *); 112 static boolean_t ipsec_compare_action(ipsec_policy_t *, ipsec_policy_t *); 113 static uint32_t selector_hash(ipsec_selector_t *, ipsec_policy_root_t *); 114 static boolean_t ipsec_kstat_init(ipsec_stack_t *); 115 static void ipsec_kstat_destroy(ipsec_stack_t *); 116 static int ipsec_free_tables(ipsec_stack_t *); 117 static int tunnel_compare(const void *, const void *); 118 static void ipsec_freemsg_chain(mblk_t *); 119 static void ip_drop_packet_chain(mblk_t *, boolean_t, ill_t *, ire_t *, 120 struct kstat_named *, ipdropper_t *); 121 static boolean_t ipsec_kstat_init(ipsec_stack_t *); 122 static void ipsec_kstat_destroy(ipsec_stack_t *); 123 static int ipsec_free_tables(ipsec_stack_t *); 124 static int tunnel_compare(const void *, const void *); 125 static void ipsec_freemsg_chain(mblk_t *); 126 static void ip_drop_packet_chain(mblk_t *, boolean_t, ill_t *, ire_t *, 127 struct kstat_named *, ipdropper_t *); 128 129 /* 130 * Selector hash table is statically sized at module load time. 131 * we default to 251 buckets, which is the largest prime number under 255 132 */ 133 134 #define IPSEC_SPDHASH_DEFAULT 251 135 136 /* SPD hash-size tunable per tunnel. */ 137 #define TUN_SPDHASH_DEFAULT 5 138 139 uint32_t ipsec_spd_hashsize; 140 uint32_t tun_spd_hashsize; 141 142 #define IPSEC_SEL_NOHASH ((uint32_t)(~0)) 143 144 /* 145 * Handle global across all stack instances 146 */ 147 static crypto_notify_handle_t prov_update_handle = NULL; 148 149 static kmem_cache_t *ipsec_action_cache; 150 static kmem_cache_t *ipsec_sel_cache; 151 static kmem_cache_t *ipsec_pol_cache; 152 static kmem_cache_t *ipsec_info_cache; 153 154 /* Frag cache prototypes */ 155 static void ipsec_fragcache_clean(ipsec_fragcache_t *); 156 static ipsec_fragcache_entry_t *fragcache_delentry(int, 157 ipsec_fragcache_entry_t *, ipsec_fragcache_t *); 158 boolean_t ipsec_fragcache_init(ipsec_fragcache_t *); 159 void ipsec_fragcache_uninit(ipsec_fragcache_t *); 160 mblk_t *ipsec_fragcache_add(ipsec_fragcache_t *, mblk_t *, mblk_t *, int, 161 ipsec_stack_t *); 162 163 int ipsec_hdr_pullup_needed = 0; 164 int ipsec_weird_null_inbound_policy = 0; 165 166 #define ALGBITS_ROUND_DOWN(x, align) (((x)/(align))*(align)) 167 #define ALGBITS_ROUND_UP(x, align) ALGBITS_ROUND_DOWN((x)+(align)-1, align) 168 169 /* 170 * Inbound traffic should have matching identities for both SA's. 171 */ 172 173 #define SA_IDS_MATCH(sa1, sa2) \ 174 (((sa1) == NULL) || ((sa2) == NULL) || \ 175 (((sa1)->ipsa_src_cid == (sa2)->ipsa_src_cid) && \ 176 (((sa1)->ipsa_dst_cid == (sa2)->ipsa_dst_cid)))) 177 178 /* 179 * IPv6 Fragments 180 */ 181 #define IS_V6_FRAGMENT(ipp) (ipp.ipp_fields & IPPF_FRAGHDR) 182 183 /* 184 * Policy failure messages. 185 */ 186 static char *ipsec_policy_failure_msgs[] = { 187 188 /* IPSEC_POLICY_NOT_NEEDED */ 189 "%s: Dropping the datagram because the incoming packet " 190 "is %s, but the recipient expects clear; Source %s, " 191 "Destination %s.\n", 192 193 /* IPSEC_POLICY_MISMATCH */ 194 "%s: Policy Failure for the incoming packet (%s); Source %s, " 195 "Destination %s.\n", 196 197 /* IPSEC_POLICY_AUTH_NOT_NEEDED */ 198 "%s: Authentication present while not expected in the " 199 "incoming %s packet; Source %s, Destination %s.\n", 200 201 /* IPSEC_POLICY_ENCR_NOT_NEEDED */ 202 "%s: Encryption present while not expected in the " 203 "incoming %s packet; Source %s, Destination %s.\n", 204 205 /* IPSEC_POLICY_SE_NOT_NEEDED */ 206 "%s: Self-Encapsulation present while not expected in the " 207 "incoming %s packet; Source %s, Destination %s.\n", 208 }; 209 210 /* 211 * General overviews: 212 * 213 * Locking: 214 * 215 * All of the system policy structures are protected by a single 216 * rwlock. These structures are threaded in a 217 * fairly complex fashion and are not expected to change on a 218 * regular basis, so this should not cause scaling/contention 219 * problems. As a result, policy checks should (hopefully) be MT-hot. 220 * 221 * Allocation policy: 222 * 223 * We use custom kmem cache types for the various 224 * bits & pieces of the policy data structures. All allocations 225 * use KM_NOSLEEP instead of KM_SLEEP for policy allocation. The 226 * policy table is of potentially unbounded size, so we don't 227 * want to provide a way to hog all system memory with policy 228 * entries.. 229 */ 230 231 /* Convenient functions for freeing or dropping a b_next linked mblk chain */ 232 233 /* Free all messages in an mblk chain */ 234 static void 235 ipsec_freemsg_chain(mblk_t *mp) 236 { 237 mblk_t *mpnext; 238 while (mp != NULL) { 239 ASSERT(mp->b_prev == NULL); 240 mpnext = mp->b_next; 241 mp->b_next = NULL; 242 freemsg(mp); /* Always works, even if NULL */ 243 mp = mpnext; 244 } 245 } 246 247 /* ip_drop all messages in an mblk chain */ 248 static void 249 ip_drop_packet_chain(mblk_t *mp, boolean_t inbound, ill_t *arriving, 250 ire_t *outbound_ire, struct kstat_named *counter, ipdropper_t *who_called) 251 { 252 mblk_t *mpnext; 253 while (mp != NULL) { 254 ASSERT(mp->b_prev == NULL); 255 mpnext = mp->b_next; 256 mp->b_next = NULL; 257 ip_drop_packet(mp, inbound, arriving, outbound_ire, counter, 258 who_called); 259 mp = mpnext; 260 } 261 } 262 263 /* 264 * AVL tree comparison function. 265 * the in-kernel avl assumes unique keys for all objects. 266 * Since sometimes policy will duplicate rules, we may insert 267 * multiple rules with the same rule id, so we need a tie-breaker. 268 */ 269 static int 270 ipsec_policy_cmpbyid(const void *a, const void *b) 271 { 272 const ipsec_policy_t *ipa, *ipb; 273 uint64_t idxa, idxb; 274 275 ipa = (const ipsec_policy_t *)a; 276 ipb = (const ipsec_policy_t *)b; 277 idxa = ipa->ipsp_index; 278 idxb = ipb->ipsp_index; 279 280 if (idxa < idxb) 281 return (-1); 282 if (idxa > idxb) 283 return (1); 284 /* 285 * Tie-breaker #1: All installed policy rules have a non-NULL 286 * ipsl_sel (selector set), so an entry with a NULL ipsp_sel is not 287 * actually in-tree but rather a template node being used in 288 * an avl_find query; see ipsec_policy_delete(). This gives us 289 * a placeholder in the ordering just before the the first entry with 290 * a key >= the one we're looking for, so we can walk forward from 291 * that point to get the remaining entries with the same id. 292 */ 293 if ((ipa->ipsp_sel == NULL) && (ipb->ipsp_sel != NULL)) 294 return (-1); 295 if ((ipb->ipsp_sel == NULL) && (ipa->ipsp_sel != NULL)) 296 return (1); 297 /* 298 * At most one of the arguments to the comparison should have a 299 * NULL selector pointer; if not, the tree is broken. 300 */ 301 ASSERT(ipa->ipsp_sel != NULL); 302 ASSERT(ipb->ipsp_sel != NULL); 303 /* 304 * Tie-breaker #2: use the virtual address of the policy node 305 * to arbitrarily break ties. Since we use the new tree node in 306 * the avl_find() in ipsec_insert_always, the new node will be 307 * inserted into the tree in the right place in the sequence. 308 */ 309 if (ipa < ipb) 310 return (-1); 311 if (ipa > ipb) 312 return (1); 313 return (0); 314 } 315 316 /* 317 * Free what ipsec_alloc_table allocated. 318 */ 319 void 320 ipsec_polhead_free_table(ipsec_policy_head_t *iph) 321 { 322 int dir; 323 int i; 324 325 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 326 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 327 328 if (ipr->ipr_hash == NULL) 329 continue; 330 331 for (i = 0; i < ipr->ipr_nchains; i++) { 332 ASSERT(ipr->ipr_hash[i].hash_head == NULL); 333 } 334 kmem_free(ipr->ipr_hash, ipr->ipr_nchains * 335 sizeof (ipsec_policy_hash_t)); 336 ipr->ipr_hash = NULL; 337 } 338 } 339 340 void 341 ipsec_polhead_destroy(ipsec_policy_head_t *iph) 342 { 343 int dir; 344 345 avl_destroy(&iph->iph_rulebyid); 346 rw_destroy(&iph->iph_lock); 347 348 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 349 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 350 int chain; 351 352 for (chain = 0; chain < ipr->ipr_nchains; chain++) 353 mutex_destroy(&(ipr->ipr_hash[chain].hash_lock)); 354 355 } 356 ipsec_polhead_free_table(iph); 357 } 358 359 /* 360 * Free the IPsec stack instance. 361 */ 362 /* ARGSUSED */ 363 static void 364 ipsec_stack_fini(netstackid_t stackid, void *arg) 365 { 366 ipsec_stack_t *ipss = (ipsec_stack_t *)arg; 367 void *cookie; 368 ipsec_tun_pol_t *node; 369 netstack_t *ns = ipss->ipsec_netstack; 370 int i; 371 ipsec_algtype_t algtype; 372 373 ipsec_loader_destroy(ipss); 374 375 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_WRITER); 376 /* 377 * It's possible we can just ASSERT() the tree is empty. After all, 378 * we aren't called until IP is ready to unload (and presumably all 379 * tunnels have been unplumbed). But we'll play it safe for now, the 380 * loop will just exit immediately if it's empty. 381 */ 382 cookie = NULL; 383 while ((node = (ipsec_tun_pol_t *) 384 avl_destroy_nodes(&ipss->ipsec_tunnel_policies, 385 &cookie)) != NULL) { 386 ITP_REFRELE(node, ns); 387 } 388 avl_destroy(&ipss->ipsec_tunnel_policies); 389 rw_exit(&ipss->ipsec_tunnel_policy_lock); 390 rw_destroy(&ipss->ipsec_tunnel_policy_lock); 391 392 ipsec_config_flush(ns); 393 394 ipsec_kstat_destroy(ipss); 395 396 ip_drop_unregister(&ipss->ipsec_dropper); 397 398 ip_drop_unregister(&ipss->ipsec_spd_dropper); 399 ip_drop_destroy(ipss); 400 /* 401 * Globals start with ref == 1 to prevent IPPH_REFRELE() from 402 * attempting to free them, hence they should have 1 now. 403 */ 404 ipsec_polhead_destroy(&ipss->ipsec_system_policy); 405 ASSERT(ipss->ipsec_system_policy.iph_refs == 1); 406 ipsec_polhead_destroy(&ipss->ipsec_inactive_policy); 407 ASSERT(ipss->ipsec_inactive_policy.iph_refs == 1); 408 409 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) { 410 ipsec_action_free_table(ipss->ipsec_action_hash[i].hash_head); 411 ipss->ipsec_action_hash[i].hash_head = NULL; 412 mutex_destroy(&(ipss->ipsec_action_hash[i].hash_lock)); 413 } 414 415 for (i = 0; i < ipss->ipsec_spd_hashsize; i++) { 416 ASSERT(ipss->ipsec_sel_hash[i].hash_head == NULL); 417 mutex_destroy(&(ipss->ipsec_sel_hash[i].hash_lock)); 418 } 419 420 mutex_enter(&ipss->ipsec_alg_lock); 421 for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype ++) { 422 int nalgs = ipss->ipsec_nalgs[algtype]; 423 424 for (i = 0; i < nalgs; i++) { 425 if (ipss->ipsec_alglists[algtype][i] != NULL) 426 ipsec_alg_unreg(algtype, i, ns); 427 } 428 } 429 mutex_exit(&ipss->ipsec_alg_lock); 430 mutex_destroy(&ipss->ipsec_alg_lock); 431 432 ipsid_gc(ns); 433 ipsid_fini(ns); 434 435 (void) ipsec_free_tables(ipss); 436 kmem_free(ipss, sizeof (*ipss)); 437 } 438 439 void 440 ipsec_policy_g_destroy(void) 441 { 442 kmem_cache_destroy(ipsec_action_cache); 443 kmem_cache_destroy(ipsec_sel_cache); 444 kmem_cache_destroy(ipsec_pol_cache); 445 kmem_cache_destroy(ipsec_info_cache); 446 447 ipsec_unregister_prov_update(); 448 449 netstack_unregister(NS_IPSEC); 450 } 451 452 453 /* 454 * Free what ipsec_alloc_tables allocated. 455 * Called when table allocation fails to free the table. 456 */ 457 static int 458 ipsec_free_tables(ipsec_stack_t *ipss) 459 { 460 int i; 461 462 if (ipss->ipsec_sel_hash != NULL) { 463 for (i = 0; i < ipss->ipsec_spd_hashsize; i++) { 464 ASSERT(ipss->ipsec_sel_hash[i].hash_head == NULL); 465 } 466 kmem_free(ipss->ipsec_sel_hash, ipss->ipsec_spd_hashsize * 467 sizeof (*ipss->ipsec_sel_hash)); 468 ipss->ipsec_sel_hash = NULL; 469 ipss->ipsec_spd_hashsize = 0; 470 } 471 ipsec_polhead_free_table(&ipss->ipsec_system_policy); 472 ipsec_polhead_free_table(&ipss->ipsec_inactive_policy); 473 474 return (ENOMEM); 475 } 476 477 /* 478 * Attempt to allocate the tables in a single policy head. 479 * Return nonzero on failure after cleaning up any work in progress. 480 */ 481 int 482 ipsec_alloc_table(ipsec_policy_head_t *iph, int nchains, int kmflag, 483 boolean_t global_cleanup, netstack_t *ns) 484 { 485 int dir; 486 487 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 488 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 489 490 ipr->ipr_nchains = nchains; 491 ipr->ipr_hash = kmem_zalloc(nchains * 492 sizeof (ipsec_policy_hash_t), kmflag); 493 if (ipr->ipr_hash == NULL) 494 return (global_cleanup ? 495 ipsec_free_tables(ns->netstack_ipsec) : 496 ENOMEM); 497 } 498 return (0); 499 } 500 501 /* 502 * Attempt to allocate the various tables. Return nonzero on failure 503 * after cleaning up any work in progress. 504 */ 505 static int 506 ipsec_alloc_tables(int kmflag, netstack_t *ns) 507 { 508 int error; 509 ipsec_stack_t *ipss = ns->netstack_ipsec; 510 511 error = ipsec_alloc_table(&ipss->ipsec_system_policy, 512 ipss->ipsec_spd_hashsize, kmflag, B_TRUE, ns); 513 if (error != 0) 514 return (error); 515 516 error = ipsec_alloc_table(&ipss->ipsec_inactive_policy, 517 ipss->ipsec_spd_hashsize, kmflag, B_TRUE, ns); 518 if (error != 0) 519 return (error); 520 521 ipss->ipsec_sel_hash = kmem_zalloc(ipss->ipsec_spd_hashsize * 522 sizeof (*ipss->ipsec_sel_hash), kmflag); 523 524 if (ipss->ipsec_sel_hash == NULL) 525 return (ipsec_free_tables(ipss)); 526 527 return (0); 528 } 529 530 /* 531 * After table allocation, initialize a policy head. 532 */ 533 void 534 ipsec_polhead_init(ipsec_policy_head_t *iph, int nchains) 535 { 536 int dir, chain; 537 538 rw_init(&iph->iph_lock, NULL, RW_DEFAULT, NULL); 539 avl_create(&iph->iph_rulebyid, ipsec_policy_cmpbyid, 540 sizeof (ipsec_policy_t), offsetof(ipsec_policy_t, ipsp_byid)); 541 542 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 543 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 544 ipr->ipr_nchains = nchains; 545 546 for (chain = 0; chain < nchains; chain++) { 547 mutex_init(&(ipr->ipr_hash[chain].hash_lock), 548 NULL, MUTEX_DEFAULT, NULL); 549 } 550 } 551 } 552 553 static boolean_t 554 ipsec_kstat_init(ipsec_stack_t *ipss) 555 { 556 ipss->ipsec_ksp = kstat_create_netstack("ip", 0, "ipsec_stat", "net", 557 KSTAT_TYPE_NAMED, sizeof (ipsec_kstats_t) / sizeof (kstat_named_t), 558 KSTAT_FLAG_PERSISTENT, ipss->ipsec_netstack->netstack_stackid); 559 560 if (ipss->ipsec_ksp == NULL || ipss->ipsec_ksp->ks_data == NULL) 561 return (B_FALSE); 562 563 ipss->ipsec_kstats = ipss->ipsec_ksp->ks_data; 564 565 #define KI(x) kstat_named_init(&ipss->ipsec_kstats->x, #x, KSTAT_DATA_UINT64) 566 KI(esp_stat_in_requests); 567 KI(esp_stat_in_discards); 568 KI(esp_stat_lookup_failure); 569 KI(ah_stat_in_requests); 570 KI(ah_stat_in_discards); 571 KI(ah_stat_lookup_failure); 572 KI(sadb_acquire_maxpackets); 573 KI(sadb_acquire_qhiwater); 574 #undef KI 575 576 kstat_install(ipss->ipsec_ksp); 577 return (B_TRUE); 578 } 579 580 static void 581 ipsec_kstat_destroy(ipsec_stack_t *ipss) 582 { 583 kstat_delete_netstack(ipss->ipsec_ksp, 584 ipss->ipsec_netstack->netstack_stackid); 585 ipss->ipsec_kstats = NULL; 586 587 } 588 589 /* 590 * Initialize the IPsec stack instance. 591 */ 592 /* ARGSUSED */ 593 static void * 594 ipsec_stack_init(netstackid_t stackid, netstack_t *ns) 595 { 596 ipsec_stack_t *ipss; 597 int i; 598 599 ipss = (ipsec_stack_t *)kmem_zalloc(sizeof (*ipss), KM_SLEEP); 600 ipss->ipsec_netstack = ns; 601 602 /* 603 * FIXME: netstack_ipsec is used by some of the routines we call 604 * below, but it isn't set until this routine returns. 605 * Either we introduce optional xxx_stack_alloc() functions 606 * that will be called by the netstack framework before xxx_stack_init, 607 * or we switch spd.c and sadb.c to operate on ipsec_stack_t 608 * (latter has some include file order issues for sadb.h, but makes 609 * sense if we merge some of the ipsec related stack_t's together. 610 */ 611 ns->netstack_ipsec = ipss; 612 613 /* 614 * Make two attempts to allocate policy hash tables; try it at 615 * the "preferred" size (may be set in /etc/system) first, 616 * then fall back to the default size. 617 */ 618 ipss->ipsec_spd_hashsize = (ipsec_spd_hashsize == 0) ? 619 IPSEC_SPDHASH_DEFAULT : ipsec_spd_hashsize; 620 621 if (ipsec_alloc_tables(KM_NOSLEEP, ns) != 0) { 622 cmn_err(CE_WARN, 623 "Unable to allocate %d entry IPsec policy hash table", 624 ipss->ipsec_spd_hashsize); 625 ipss->ipsec_spd_hashsize = IPSEC_SPDHASH_DEFAULT; 626 cmn_err(CE_WARN, "Falling back to %d entries", 627 ipss->ipsec_spd_hashsize); 628 (void) ipsec_alloc_tables(KM_SLEEP, ns); 629 } 630 631 /* Just set a default for tunnels. */ 632 ipss->ipsec_tun_spd_hashsize = (tun_spd_hashsize == 0) ? 633 TUN_SPDHASH_DEFAULT : tun_spd_hashsize; 634 635 ipsid_init(ns); 636 /* 637 * Globals need ref == 1 to prevent IPPH_REFRELE() from attempting 638 * to free them. 639 */ 640 ipss->ipsec_system_policy.iph_refs = 1; 641 ipss->ipsec_inactive_policy.iph_refs = 1; 642 ipsec_polhead_init(&ipss->ipsec_system_policy, 643 ipss->ipsec_spd_hashsize); 644 ipsec_polhead_init(&ipss->ipsec_inactive_policy, 645 ipss->ipsec_spd_hashsize); 646 rw_init(&ipss->ipsec_tunnel_policy_lock, NULL, RW_DEFAULT, NULL); 647 avl_create(&ipss->ipsec_tunnel_policies, tunnel_compare, 648 sizeof (ipsec_tun_pol_t), 0); 649 650 ipss->ipsec_next_policy_index = 1; 651 652 rw_init(&ipss->ipsec_system_policy.iph_lock, NULL, RW_DEFAULT, NULL); 653 rw_init(&ipss->ipsec_inactive_policy.iph_lock, NULL, RW_DEFAULT, NULL); 654 655 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) 656 mutex_init(&(ipss->ipsec_action_hash[i].hash_lock), 657 NULL, MUTEX_DEFAULT, NULL); 658 659 for (i = 0; i < ipss->ipsec_spd_hashsize; i++) 660 mutex_init(&(ipss->ipsec_sel_hash[i].hash_lock), 661 NULL, MUTEX_DEFAULT, NULL); 662 663 mutex_init(&ipss->ipsec_alg_lock, NULL, MUTEX_DEFAULT, NULL); 664 for (i = 0; i < IPSEC_NALGTYPES; i++) { 665 ipss->ipsec_nalgs[i] = 0; 666 } 667 668 ip_drop_init(ipss); 669 ip_drop_register(&ipss->ipsec_spd_dropper, "IPsec SPD"); 670 671 /* Set function to dummy until tun is loaded */ 672 rw_init(&ipss->ipsec_itp_get_byaddr_rw_lock, NULL, RW_DEFAULT, NULL); 673 rw_enter(&ipss->ipsec_itp_get_byaddr_rw_lock, RW_WRITER); 674 ipss->ipsec_itp_get_byaddr = itp_get_byaddr_dummy; 675 rw_exit(&ipss->ipsec_itp_get_byaddr_rw_lock); 676 677 /* IP's IPsec code calls the packet dropper */ 678 ip_drop_register(&ipss->ipsec_dropper, "IP IPsec processing"); 679 680 (void) ipsec_kstat_init(ipss); 681 682 ipsec_loader_init(ipss); 683 ipsec_loader_start(ipss); 684 685 return (ipss); 686 } 687 688 /* Global across all stack instances */ 689 void 690 ipsec_policy_g_init(void) 691 { 692 ipsec_action_cache = kmem_cache_create("ipsec_actions", 693 sizeof (ipsec_action_t), _POINTER_ALIGNMENT, NULL, NULL, 694 ipsec_action_reclaim, NULL, NULL, 0); 695 ipsec_sel_cache = kmem_cache_create("ipsec_selectors", 696 sizeof (ipsec_sel_t), _POINTER_ALIGNMENT, NULL, NULL, 697 NULL, NULL, NULL, 0); 698 ipsec_pol_cache = kmem_cache_create("ipsec_policy", 699 sizeof (ipsec_policy_t), _POINTER_ALIGNMENT, NULL, NULL, 700 NULL, NULL, NULL, 0); 701 ipsec_info_cache = kmem_cache_create("ipsec_info", 702 sizeof (ipsec_info_t), _POINTER_ALIGNMENT, NULL, NULL, 703 NULL, NULL, NULL, 0); 704 705 /* 706 * We want to be informed each time a stack is created or 707 * destroyed in the kernel, so we can maintain the 708 * set of ipsec_stack_t's. 709 */ 710 netstack_register(NS_IPSEC, ipsec_stack_init, NULL, ipsec_stack_fini); 711 } 712 713 /* 714 * Sort algorithm lists. 715 * 716 * I may need to split this based on 717 * authentication/encryption, and I may wish to have an administrator 718 * configure this list. Hold on to some NDD variables... 719 * 720 * XXX For now, sort on minimum key size (GAG!). While minimum key size is 721 * not the ideal metric, it's the only quantifiable measure available. 722 * We need a better metric for sorting algorithms by preference. 723 */ 724 static void 725 alg_insert_sortlist(enum ipsec_algtype at, uint8_t algid, netstack_t *ns) 726 { 727 ipsec_stack_t *ipss = ns->netstack_ipsec; 728 ipsec_alginfo_t *ai = ipss->ipsec_alglists[at][algid]; 729 uint8_t holder, swap; 730 uint_t i; 731 uint_t count = ipss->ipsec_nalgs[at]; 732 ASSERT(ai != NULL); 733 ASSERT(algid == ai->alg_id); 734 735 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 736 737 holder = algid; 738 739 for (i = 0; i < count - 1; i++) { 740 ipsec_alginfo_t *alt; 741 742 alt = ipss->ipsec_alglists[at][ipss->ipsec_sortlist[at][i]]; 743 /* 744 * If you want to give precedence to newly added algs, 745 * add the = in the > comparison. 746 */ 747 if ((holder != algid) || (ai->alg_minbits > alt->alg_minbits)) { 748 /* Swap sortlist[i] and holder. */ 749 swap = ipss->ipsec_sortlist[at][i]; 750 ipss->ipsec_sortlist[at][i] = holder; 751 holder = swap; 752 ai = alt; 753 } /* Else just continue. */ 754 } 755 756 /* Store holder in last slot. */ 757 ipss->ipsec_sortlist[at][i] = holder; 758 } 759 760 /* 761 * Remove an algorithm from a sorted algorithm list. 762 * This should be considerably easier, even with complex sorting. 763 */ 764 static void 765 alg_remove_sortlist(enum ipsec_algtype at, uint8_t algid, netstack_t *ns) 766 { 767 boolean_t copyback = B_FALSE; 768 int i; 769 ipsec_stack_t *ipss = ns->netstack_ipsec; 770 int newcount = ipss->ipsec_nalgs[at]; 771 772 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 773 774 for (i = 0; i <= newcount; i++) { 775 if (copyback) { 776 ipss->ipsec_sortlist[at][i-1] = 777 ipss->ipsec_sortlist[at][i]; 778 } else if (ipss->ipsec_sortlist[at][i] == algid) { 779 copyback = B_TRUE; 780 } 781 } 782 } 783 784 /* 785 * Add the specified algorithm to the algorithm tables. 786 * Must be called while holding the algorithm table writer lock. 787 */ 788 void 789 ipsec_alg_reg(ipsec_algtype_t algtype, ipsec_alginfo_t *alg, netstack_t *ns) 790 { 791 ipsec_stack_t *ipss = ns->netstack_ipsec; 792 793 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 794 795 ASSERT(ipss->ipsec_alglists[algtype][alg->alg_id] == NULL); 796 ipsec_alg_fix_min_max(alg, algtype, ns); 797 ipss->ipsec_alglists[algtype][alg->alg_id] = alg; 798 799 ipss->ipsec_nalgs[algtype]++; 800 alg_insert_sortlist(algtype, alg->alg_id, ns); 801 } 802 803 /* 804 * Remove the specified algorithm from the algorithm tables. 805 * Must be called while holding the algorithm table writer lock. 806 */ 807 void 808 ipsec_alg_unreg(ipsec_algtype_t algtype, uint8_t algid, netstack_t *ns) 809 { 810 ipsec_stack_t *ipss = ns->netstack_ipsec; 811 812 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 813 814 ASSERT(ipss->ipsec_alglists[algtype][algid] != NULL); 815 ipsec_alg_free(ipss->ipsec_alglists[algtype][algid]); 816 ipss->ipsec_alglists[algtype][algid] = NULL; 817 818 ipss->ipsec_nalgs[algtype]--; 819 alg_remove_sortlist(algtype, algid, ns); 820 } 821 822 /* 823 * Hooks for spdsock to get a grip on system policy. 824 */ 825 826 ipsec_policy_head_t * 827 ipsec_system_policy(netstack_t *ns) 828 { 829 ipsec_stack_t *ipss = ns->netstack_ipsec; 830 ipsec_policy_head_t *h = &ipss->ipsec_system_policy; 831 832 IPPH_REFHOLD(h); 833 return (h); 834 } 835 836 ipsec_policy_head_t * 837 ipsec_inactive_policy(netstack_t *ns) 838 { 839 ipsec_stack_t *ipss = ns->netstack_ipsec; 840 ipsec_policy_head_t *h = &ipss->ipsec_inactive_policy; 841 842 IPPH_REFHOLD(h); 843 return (h); 844 } 845 846 /* 847 * Lock inactive policy, then active policy, then exchange policy root 848 * pointers. 849 */ 850 void 851 ipsec_swap_policy(ipsec_policy_head_t *active, ipsec_policy_head_t *inactive, 852 netstack_t *ns) 853 { 854 int af, dir; 855 avl_tree_t r1, r2; 856 857 rw_enter(&inactive->iph_lock, RW_WRITER); 858 rw_enter(&active->iph_lock, RW_WRITER); 859 860 r1 = active->iph_rulebyid; 861 r2 = inactive->iph_rulebyid; 862 active->iph_rulebyid = r2; 863 inactive->iph_rulebyid = r1; 864 865 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 866 ipsec_policy_hash_t *h1, *h2; 867 868 h1 = active->iph_root[dir].ipr_hash; 869 h2 = inactive->iph_root[dir].ipr_hash; 870 active->iph_root[dir].ipr_hash = h2; 871 inactive->iph_root[dir].ipr_hash = h1; 872 873 for (af = 0; af < IPSEC_NAF; af++) { 874 ipsec_policy_t *t1, *t2; 875 876 t1 = active->iph_root[dir].ipr_nonhash[af]; 877 t2 = inactive->iph_root[dir].ipr_nonhash[af]; 878 active->iph_root[dir].ipr_nonhash[af] = t2; 879 inactive->iph_root[dir].ipr_nonhash[af] = t1; 880 if (t1 != NULL) { 881 t1->ipsp_hash.hash_pp = 882 &(inactive->iph_root[dir].ipr_nonhash[af]); 883 } 884 if (t2 != NULL) { 885 t2->ipsp_hash.hash_pp = 886 &(active->iph_root[dir].ipr_nonhash[af]); 887 } 888 889 } 890 } 891 active->iph_gen++; 892 inactive->iph_gen++; 893 ipsec_update_present_flags(ns->netstack_ipsec); 894 rw_exit(&active->iph_lock); 895 rw_exit(&inactive->iph_lock); 896 } 897 898 /* 899 * Swap global policy primary/secondary. 900 */ 901 void 902 ipsec_swap_global_policy(netstack_t *ns) 903 { 904 ipsec_stack_t *ipss = ns->netstack_ipsec; 905 906 ipsec_swap_policy(&ipss->ipsec_system_policy, 907 &ipss->ipsec_inactive_policy, ns); 908 } 909 910 /* 911 * Clone one policy rule.. 912 */ 913 static ipsec_policy_t * 914 ipsec_copy_policy(const ipsec_policy_t *src) 915 { 916 ipsec_policy_t *dst = kmem_cache_alloc(ipsec_pol_cache, KM_NOSLEEP); 917 918 if (dst == NULL) 919 return (NULL); 920 921 /* 922 * Adjust refcounts of cloned state. 923 */ 924 IPACT_REFHOLD(src->ipsp_act); 925 src->ipsp_sel->ipsl_refs++; 926 927 HASH_NULL(dst, ipsp_hash); 928 dst->ipsp_refs = 1; 929 dst->ipsp_sel = src->ipsp_sel; 930 dst->ipsp_act = src->ipsp_act; 931 dst->ipsp_prio = src->ipsp_prio; 932 dst->ipsp_index = src->ipsp_index; 933 934 return (dst); 935 } 936 937 void 938 ipsec_insert_always(avl_tree_t *tree, void *new_node) 939 { 940 void *node; 941 avl_index_t where; 942 943 node = avl_find(tree, new_node, &where); 944 ASSERT(node == NULL); 945 avl_insert(tree, new_node, where); 946 } 947 948 949 static int 950 ipsec_copy_chain(ipsec_policy_head_t *dph, ipsec_policy_t *src, 951 ipsec_policy_t **dstp) 952 { 953 for (; src != NULL; src = src->ipsp_hash.hash_next) { 954 ipsec_policy_t *dst = ipsec_copy_policy(src); 955 if (dst == NULL) 956 return (ENOMEM); 957 958 HASHLIST_INSERT(dst, ipsp_hash, *dstp); 959 ipsec_insert_always(&dph->iph_rulebyid, dst); 960 } 961 return (0); 962 } 963 964 965 966 /* 967 * Make one policy head look exactly like another. 968 * 969 * As with ipsec_swap_policy, we lock the destination policy head first, then 970 * the source policy head. Note that we only need to read-lock the source 971 * policy head as we are not changing it. 972 */ 973 int 974 ipsec_copy_polhead(ipsec_policy_head_t *sph, ipsec_policy_head_t *dph, 975 netstack_t *ns) 976 { 977 int af, dir, chain, nchains; 978 979 rw_enter(&dph->iph_lock, RW_WRITER); 980 981 ipsec_polhead_flush(dph, ns); 982 983 rw_enter(&sph->iph_lock, RW_READER); 984 985 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 986 ipsec_policy_root_t *dpr = &dph->iph_root[dir]; 987 ipsec_policy_root_t *spr = &sph->iph_root[dir]; 988 nchains = dpr->ipr_nchains; 989 990 ASSERT(dpr->ipr_nchains == spr->ipr_nchains); 991 992 for (af = 0; af < IPSEC_NAF; af++) { 993 if (ipsec_copy_chain(dph, spr->ipr_nonhash[af], 994 &dpr->ipr_nonhash[af])) 995 goto abort_copy; 996 } 997 998 for (chain = 0; chain < nchains; chain++) { 999 if (ipsec_copy_chain(dph, 1000 spr->ipr_hash[chain].hash_head, 1001 &dpr->ipr_hash[chain].hash_head)) 1002 goto abort_copy; 1003 } 1004 } 1005 1006 dph->iph_gen++; 1007 1008 rw_exit(&sph->iph_lock); 1009 rw_exit(&dph->iph_lock); 1010 return (0); 1011 1012 abort_copy: 1013 ipsec_polhead_flush(dph, ns); 1014 rw_exit(&sph->iph_lock); 1015 rw_exit(&dph->iph_lock); 1016 return (ENOMEM); 1017 } 1018 1019 /* 1020 * Clone currently active policy to the inactive policy list. 1021 */ 1022 int 1023 ipsec_clone_system_policy(netstack_t *ns) 1024 { 1025 ipsec_stack_t *ipss = ns->netstack_ipsec; 1026 1027 return (ipsec_copy_polhead(&ipss->ipsec_system_policy, 1028 &ipss->ipsec_inactive_policy, ns)); 1029 } 1030 1031 /* 1032 * Generic "do we have IPvN policy" answer. 1033 */ 1034 boolean_t 1035 iph_ipvN(ipsec_policy_head_t *iph, boolean_t v6) 1036 { 1037 int i, hval; 1038 uint32_t valbit; 1039 ipsec_policy_root_t *ipr; 1040 ipsec_policy_t *ipp; 1041 1042 if (v6) { 1043 valbit = IPSL_IPV6; 1044 hval = IPSEC_AF_V6; 1045 } else { 1046 valbit = IPSL_IPV4; 1047 hval = IPSEC_AF_V4; 1048 } 1049 1050 ASSERT(RW_LOCK_HELD(&iph->iph_lock)); 1051 for (ipr = iph->iph_root; ipr < &(iph->iph_root[IPSEC_NTYPES]); ipr++) { 1052 if (ipr->ipr_nonhash[hval] != NULL) 1053 return (B_TRUE); 1054 for (i = 0; i < ipr->ipr_nchains; i++) { 1055 for (ipp = ipr->ipr_hash[i].hash_head; ipp != NULL; 1056 ipp = ipp->ipsp_hash.hash_next) { 1057 if (ipp->ipsp_sel->ipsl_key.ipsl_valid & valbit) 1058 return (B_TRUE); 1059 } 1060 } 1061 } 1062 1063 return (B_FALSE); 1064 } 1065 1066 /* 1067 * Extract the string from ipsec_policy_failure_msgs[type] and 1068 * log it. 1069 * 1070 */ 1071 void 1072 ipsec_log_policy_failure(int type, char *func_name, ipha_t *ipha, ip6_t *ip6h, 1073 boolean_t secure, netstack_t *ns) 1074 { 1075 char sbuf[INET6_ADDRSTRLEN]; 1076 char dbuf[INET6_ADDRSTRLEN]; 1077 char *s; 1078 char *d; 1079 ipsec_stack_t *ipss = ns->netstack_ipsec; 1080 1081 ASSERT((ipha == NULL && ip6h != NULL) || 1082 (ip6h == NULL && ipha != NULL)); 1083 1084 if (ipha != NULL) { 1085 s = inet_ntop(AF_INET, &ipha->ipha_src, sbuf, sizeof (sbuf)); 1086 d = inet_ntop(AF_INET, &ipha->ipha_dst, dbuf, sizeof (dbuf)); 1087 } else { 1088 s = inet_ntop(AF_INET6, &ip6h->ip6_src, sbuf, sizeof (sbuf)); 1089 d = inet_ntop(AF_INET6, &ip6h->ip6_dst, dbuf, sizeof (dbuf)); 1090 1091 } 1092 1093 /* Always bump the policy failure counter. */ 1094 ipss->ipsec_policy_failure_count[type]++; 1095 1096 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, SL_ERROR|SL_WARN|SL_CONSOLE, 1097 ipsec_policy_failure_msgs[type], func_name, 1098 (secure ? "secure" : "not secure"), s, d); 1099 } 1100 1101 /* 1102 * Rate-limiting front-end to strlog() for AH and ESP. Uses the ndd variables 1103 * in /dev/ip and the same rate-limiting clock so that there's a single 1104 * knob to turn to throttle the rate of messages. 1105 */ 1106 void 1107 ipsec_rl_strlog(netstack_t *ns, short mid, short sid, char level, ushort_t sl, 1108 char *fmt, ...) 1109 { 1110 va_list adx; 1111 hrtime_t current = gethrtime(); 1112 ip_stack_t *ipst = ns->netstack_ip; 1113 ipsec_stack_t *ipss = ns->netstack_ipsec; 1114 1115 sl |= SL_CONSOLE; 1116 /* 1117 * Throttle logging to stop syslog from being swamped. If variable 1118 * 'ipsec_policy_log_interval' is zero, don't log any messages at 1119 * all, otherwise log only one message every 'ipsec_policy_log_interval' 1120 * msec. Convert interval (in msec) to hrtime (in nsec). 1121 */ 1122 1123 if (ipst->ips_ipsec_policy_log_interval) { 1124 if (ipss->ipsec_policy_failure_last + 1125 ((hrtime_t)ipst->ips_ipsec_policy_log_interval * 1126 (hrtime_t)1000000) <= current) { 1127 va_start(adx, fmt); 1128 (void) vstrlog(mid, sid, level, sl, fmt, adx); 1129 va_end(adx); 1130 ipss->ipsec_policy_failure_last = current; 1131 } 1132 } 1133 } 1134 1135 void 1136 ipsec_config_flush(netstack_t *ns) 1137 { 1138 ipsec_stack_t *ipss = ns->netstack_ipsec; 1139 1140 rw_enter(&ipss->ipsec_system_policy.iph_lock, RW_WRITER); 1141 ipsec_polhead_flush(&ipss->ipsec_system_policy, ns); 1142 ipss->ipsec_next_policy_index = 1; 1143 rw_exit(&ipss->ipsec_system_policy.iph_lock); 1144 ipsec_action_reclaim_stack(ns); 1145 } 1146 1147 /* 1148 * Clip a policy's min/max keybits vs. the capabilities of the 1149 * algorithm. 1150 */ 1151 static void 1152 act_alg_adjust(uint_t algtype, uint_t algid, 1153 uint16_t *minbits, uint16_t *maxbits, netstack_t *ns) 1154 { 1155 ipsec_stack_t *ipss = ns->netstack_ipsec; 1156 ipsec_alginfo_t *algp = ipss->ipsec_alglists[algtype][algid]; 1157 1158 if (algp != NULL) { 1159 /* 1160 * If passed-in minbits is zero, we assume the caller trusts 1161 * us with setting the minimum key size. We pick the 1162 * algorithms DEFAULT key size for the minimum in this case. 1163 */ 1164 if (*minbits == 0) { 1165 *minbits = algp->alg_default_bits; 1166 ASSERT(*minbits >= algp->alg_minbits); 1167 } else { 1168 *minbits = MAX(MIN(*minbits, algp->alg_maxbits), 1169 algp->alg_minbits); 1170 } 1171 if (*maxbits == 0) 1172 *maxbits = algp->alg_maxbits; 1173 else 1174 *maxbits = MIN(MAX(*maxbits, algp->alg_minbits), 1175 algp->alg_maxbits); 1176 ASSERT(*minbits <= *maxbits); 1177 } else { 1178 *minbits = 0; 1179 *maxbits = 0; 1180 } 1181 } 1182 1183 /* 1184 * Check an action's requested algorithms against the algorithms currently 1185 * loaded in the system. 1186 */ 1187 boolean_t 1188 ipsec_check_action(ipsec_act_t *act, int *diag, netstack_t *ns) 1189 { 1190 ipsec_prot_t *ipp; 1191 ipsec_stack_t *ipss = ns->netstack_ipsec; 1192 1193 ipp = &act->ipa_apply; 1194 1195 if (ipp->ipp_use_ah && 1196 ipss->ipsec_alglists[IPSEC_ALG_AUTH][ipp->ipp_auth_alg] == NULL) { 1197 *diag = SPD_DIAGNOSTIC_UNSUPP_AH_ALG; 1198 return (B_FALSE); 1199 } 1200 if (ipp->ipp_use_espa && 1201 ipss->ipsec_alglists[IPSEC_ALG_AUTH][ipp->ipp_esp_auth_alg] == 1202 NULL) { 1203 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_AUTH_ALG; 1204 return (B_FALSE); 1205 } 1206 if (ipp->ipp_use_esp && 1207 ipss->ipsec_alglists[IPSEC_ALG_ENCR][ipp->ipp_encr_alg] == NULL) { 1208 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_ENCR_ALG; 1209 return (B_FALSE); 1210 } 1211 1212 act_alg_adjust(IPSEC_ALG_AUTH, ipp->ipp_auth_alg, 1213 &ipp->ipp_ah_minbits, &ipp->ipp_ah_maxbits, ns); 1214 act_alg_adjust(IPSEC_ALG_AUTH, ipp->ipp_esp_auth_alg, 1215 &ipp->ipp_espa_minbits, &ipp->ipp_espa_maxbits, ns); 1216 act_alg_adjust(IPSEC_ALG_ENCR, ipp->ipp_encr_alg, 1217 &ipp->ipp_espe_minbits, &ipp->ipp_espe_maxbits, ns); 1218 1219 if (ipp->ipp_ah_minbits > ipp->ipp_ah_maxbits) { 1220 *diag = SPD_DIAGNOSTIC_UNSUPP_AH_KEYSIZE; 1221 return (B_FALSE); 1222 } 1223 if (ipp->ipp_espa_minbits > ipp->ipp_espa_maxbits) { 1224 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_AUTH_KEYSIZE; 1225 return (B_FALSE); 1226 } 1227 if (ipp->ipp_espe_minbits > ipp->ipp_espe_maxbits) { 1228 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_ENCR_KEYSIZE; 1229 return (B_FALSE); 1230 } 1231 /* TODO: sanity check lifetimes */ 1232 return (B_TRUE); 1233 } 1234 1235 /* 1236 * Set up a single action during wildcard expansion.. 1237 */ 1238 static void 1239 ipsec_setup_act(ipsec_act_t *outact, ipsec_act_t *act, 1240 uint_t auth_alg, uint_t encr_alg, uint_t eauth_alg, netstack_t *ns) 1241 { 1242 ipsec_prot_t *ipp; 1243 1244 *outact = *act; 1245 ipp = &outact->ipa_apply; 1246 ipp->ipp_auth_alg = (uint8_t)auth_alg; 1247 ipp->ipp_encr_alg = (uint8_t)encr_alg; 1248 ipp->ipp_esp_auth_alg = (uint8_t)eauth_alg; 1249 1250 act_alg_adjust(IPSEC_ALG_AUTH, auth_alg, 1251 &ipp->ipp_ah_minbits, &ipp->ipp_ah_maxbits, ns); 1252 act_alg_adjust(IPSEC_ALG_AUTH, eauth_alg, 1253 &ipp->ipp_espa_minbits, &ipp->ipp_espa_maxbits, ns); 1254 act_alg_adjust(IPSEC_ALG_ENCR, encr_alg, 1255 &ipp->ipp_espe_minbits, &ipp->ipp_espe_maxbits, ns); 1256 } 1257 1258 /* 1259 * combinatoric expansion time: expand a wildcarded action into an 1260 * array of wildcarded actions; we return the exploded action list, 1261 * and return a count in *nact (output only). 1262 */ 1263 static ipsec_act_t * 1264 ipsec_act_wildcard_expand(ipsec_act_t *act, uint_t *nact, netstack_t *ns) 1265 { 1266 boolean_t use_ah, use_esp, use_espa; 1267 boolean_t wild_auth, wild_encr, wild_eauth; 1268 uint_t auth_alg, auth_idx, auth_min, auth_max; 1269 uint_t eauth_alg, eauth_idx, eauth_min, eauth_max; 1270 uint_t encr_alg, encr_idx, encr_min, encr_max; 1271 uint_t action_count, ai; 1272 ipsec_act_t *outact; 1273 ipsec_stack_t *ipss = ns->netstack_ipsec; 1274 1275 if (act->ipa_type != IPSEC_ACT_APPLY) { 1276 outact = kmem_alloc(sizeof (*act), KM_NOSLEEP); 1277 *nact = 1; 1278 if (outact != NULL) 1279 bcopy(act, outact, sizeof (*act)); 1280 return (outact); 1281 } 1282 /* 1283 * compute the combinatoric explosion.. 1284 * 1285 * we assume a request for encr if esp_req is PREF_REQUIRED 1286 * we assume a request for ah auth if ah_req is PREF_REQUIRED. 1287 * we assume a request for esp auth if !ah and esp_req is PREF_REQUIRED 1288 */ 1289 1290 use_ah = act->ipa_apply.ipp_use_ah; 1291 use_esp = act->ipa_apply.ipp_use_esp; 1292 use_espa = act->ipa_apply.ipp_use_espa; 1293 auth_alg = act->ipa_apply.ipp_auth_alg; 1294 eauth_alg = act->ipa_apply.ipp_esp_auth_alg; 1295 encr_alg = act->ipa_apply.ipp_encr_alg; 1296 1297 wild_auth = use_ah && (auth_alg == 0); 1298 wild_eauth = use_espa && (eauth_alg == 0); 1299 wild_encr = use_esp && (encr_alg == 0); 1300 1301 action_count = 1; 1302 auth_min = auth_max = auth_alg; 1303 eauth_min = eauth_max = eauth_alg; 1304 encr_min = encr_max = encr_alg; 1305 1306 /* 1307 * set up for explosion.. for each dimension, expand output 1308 * size by the explosion factor. 1309 * 1310 * Don't include the "any" algorithms, if defined, as no 1311 * kernel policies should be set for these algorithms. 1312 */ 1313 1314 #define SET_EXP_MINMAX(type, wild, alg, min, max, ipss) \ 1315 if (wild) { \ 1316 int nalgs = ipss->ipsec_nalgs[type]; \ 1317 if (ipss->ipsec_alglists[type][alg] != NULL) \ 1318 nalgs--; \ 1319 action_count *= nalgs; \ 1320 min = 0; \ 1321 max = ipss->ipsec_nalgs[type] - 1; \ 1322 } 1323 1324 SET_EXP_MINMAX(IPSEC_ALG_AUTH, wild_auth, SADB_AALG_NONE, 1325 auth_min, auth_max, ipss); 1326 SET_EXP_MINMAX(IPSEC_ALG_AUTH, wild_eauth, SADB_AALG_NONE, 1327 eauth_min, eauth_max, ipss); 1328 SET_EXP_MINMAX(IPSEC_ALG_ENCR, wild_encr, SADB_EALG_NONE, 1329 encr_min, encr_max, ipss); 1330 1331 #undef SET_EXP_MINMAX 1332 1333 /* 1334 * ok, allocate the whole mess.. 1335 */ 1336 1337 outact = kmem_alloc(sizeof (*outact) * action_count, KM_NOSLEEP); 1338 if (outact == NULL) 1339 return (NULL); 1340 1341 /* 1342 * Now compute all combinations. Note that non-wildcarded 1343 * dimensions just get a single value from auth_min, while 1344 * wildcarded dimensions indirect through the sortlist. 1345 * 1346 * We do encryption outermost since, at this time, there's 1347 * greater difference in security and performance between 1348 * encryption algorithms vs. authentication algorithms. 1349 */ 1350 1351 ai = 0; 1352 1353 #define WHICH_ALG(type, wild, idx, ipss) \ 1354 ((wild)?(ipss->ipsec_sortlist[type][idx]):(idx)) 1355 1356 for (encr_idx = encr_min; encr_idx <= encr_max; encr_idx++) { 1357 encr_alg = WHICH_ALG(IPSEC_ALG_ENCR, wild_encr, encr_idx, ipss); 1358 if (wild_encr && encr_alg == SADB_EALG_NONE) 1359 continue; 1360 for (auth_idx = auth_min; auth_idx <= auth_max; auth_idx++) { 1361 auth_alg = WHICH_ALG(IPSEC_ALG_AUTH, wild_auth, 1362 auth_idx, ipss); 1363 if (wild_auth && auth_alg == SADB_AALG_NONE) 1364 continue; 1365 for (eauth_idx = eauth_min; eauth_idx <= eauth_max; 1366 eauth_idx++) { 1367 eauth_alg = WHICH_ALG(IPSEC_ALG_AUTH, 1368 wild_eauth, eauth_idx, ipss); 1369 if (wild_eauth && eauth_alg == SADB_AALG_NONE) 1370 continue; 1371 1372 ipsec_setup_act(&outact[ai], act, 1373 auth_alg, encr_alg, eauth_alg, ns); 1374 ai++; 1375 } 1376 } 1377 } 1378 1379 #undef WHICH_ALG 1380 1381 ASSERT(ai == action_count); 1382 *nact = action_count; 1383 return (outact); 1384 } 1385 1386 /* 1387 * Extract the parts of an ipsec_prot_t from an old-style ipsec_req_t. 1388 */ 1389 static void 1390 ipsec_prot_from_req(ipsec_req_t *req, ipsec_prot_t *ipp) 1391 { 1392 bzero(ipp, sizeof (*ipp)); 1393 /* 1394 * ipp_use_* are bitfields. Look at "!!" in the following as a 1395 * "boolean canonicalization" operator. 1396 */ 1397 ipp->ipp_use_ah = !!(req->ipsr_ah_req & IPSEC_PREF_REQUIRED); 1398 ipp->ipp_use_esp = !!(req->ipsr_esp_req & IPSEC_PREF_REQUIRED); 1399 ipp->ipp_use_espa = !!(req->ipsr_esp_auth_alg); 1400 ipp->ipp_use_se = !!(req->ipsr_self_encap_req & IPSEC_PREF_REQUIRED); 1401 ipp->ipp_use_unique = !!((req->ipsr_ah_req|req->ipsr_esp_req) & 1402 IPSEC_PREF_UNIQUE); 1403 ipp->ipp_encr_alg = req->ipsr_esp_alg; 1404 /* 1405 * SADB_AALG_ANY is a placeholder to distinguish "any" from 1406 * "none" above. If auth is required, as determined above, 1407 * SADB_AALG_ANY becomes 0, which is the representation 1408 * of "any" and "none" in PF_KEY v2. 1409 */ 1410 ipp->ipp_auth_alg = (req->ipsr_auth_alg != SADB_AALG_ANY) ? 1411 req->ipsr_auth_alg : 0; 1412 ipp->ipp_esp_auth_alg = (req->ipsr_esp_auth_alg != SADB_AALG_ANY) ? 1413 req->ipsr_esp_auth_alg : 0; 1414 } 1415 1416 /* 1417 * Extract a new-style action from a request. 1418 */ 1419 void 1420 ipsec_actvec_from_req(ipsec_req_t *req, ipsec_act_t **actp, uint_t *nactp, 1421 netstack_t *ns) 1422 { 1423 struct ipsec_act act; 1424 1425 bzero(&act, sizeof (act)); 1426 if ((req->ipsr_ah_req & IPSEC_PREF_NEVER) && 1427 (req->ipsr_esp_req & IPSEC_PREF_NEVER)) { 1428 act.ipa_type = IPSEC_ACT_BYPASS; 1429 } else { 1430 act.ipa_type = IPSEC_ACT_APPLY; 1431 ipsec_prot_from_req(req, &act.ipa_apply); 1432 } 1433 *actp = ipsec_act_wildcard_expand(&act, nactp, ns); 1434 } 1435 1436 /* 1437 * Convert a new-style "prot" back to an ipsec_req_t (more backwards compat). 1438 * We assume caller has already zero'ed *req for us. 1439 */ 1440 static int 1441 ipsec_req_from_prot(ipsec_prot_t *ipp, ipsec_req_t *req) 1442 { 1443 req->ipsr_esp_alg = ipp->ipp_encr_alg; 1444 req->ipsr_auth_alg = ipp->ipp_auth_alg; 1445 req->ipsr_esp_auth_alg = ipp->ipp_esp_auth_alg; 1446 1447 if (ipp->ipp_use_unique) { 1448 req->ipsr_ah_req |= IPSEC_PREF_UNIQUE; 1449 req->ipsr_esp_req |= IPSEC_PREF_UNIQUE; 1450 } 1451 if (ipp->ipp_use_se) 1452 req->ipsr_self_encap_req |= IPSEC_PREF_REQUIRED; 1453 if (ipp->ipp_use_ah) 1454 req->ipsr_ah_req |= IPSEC_PREF_REQUIRED; 1455 if (ipp->ipp_use_esp) 1456 req->ipsr_esp_req |= IPSEC_PREF_REQUIRED; 1457 return (sizeof (*req)); 1458 } 1459 1460 /* 1461 * Convert a new-style action back to an ipsec_req_t (more backwards compat). 1462 * We assume caller has already zero'ed *req for us. 1463 */ 1464 static int 1465 ipsec_req_from_act(ipsec_action_t *ap, ipsec_req_t *req) 1466 { 1467 switch (ap->ipa_act.ipa_type) { 1468 case IPSEC_ACT_BYPASS: 1469 req->ipsr_ah_req = IPSEC_PREF_NEVER; 1470 req->ipsr_esp_req = IPSEC_PREF_NEVER; 1471 return (sizeof (*req)); 1472 case IPSEC_ACT_APPLY: 1473 return (ipsec_req_from_prot(&ap->ipa_act.ipa_apply, req)); 1474 } 1475 return (sizeof (*req)); 1476 } 1477 1478 /* 1479 * Convert a new-style action back to an ipsec_req_t (more backwards compat). 1480 * We assume caller has already zero'ed *req for us. 1481 */ 1482 int 1483 ipsec_req_from_head(ipsec_policy_head_t *ph, ipsec_req_t *req, int af) 1484 { 1485 ipsec_policy_t *p; 1486 1487 /* 1488 * FULL-PERSOCK: consult hash table, too? 1489 */ 1490 for (p = ph->iph_root[IPSEC_INBOUND].ipr_nonhash[af]; 1491 p != NULL; 1492 p = p->ipsp_hash.hash_next) { 1493 if ((p->ipsp_sel->ipsl_key.ipsl_valid & IPSL_WILDCARD) == 0) 1494 return (ipsec_req_from_act(p->ipsp_act, req)); 1495 } 1496 return (sizeof (*req)); 1497 } 1498 1499 /* 1500 * Based on per-socket or latched policy, convert to an appropriate 1501 * IP_SEC_OPT ipsec_req_t for the socket option; return size so we can 1502 * be tail-called from ip. 1503 */ 1504 int 1505 ipsec_req_from_conn(conn_t *connp, ipsec_req_t *req, int af) 1506 { 1507 ipsec_latch_t *ipl; 1508 int rv = sizeof (ipsec_req_t); 1509 1510 bzero(req, sizeof (*req)); 1511 1512 mutex_enter(&connp->conn_lock); 1513 ipl = connp->conn_latch; 1514 1515 /* 1516 * Find appropriate policy. First choice is latched action; 1517 * failing that, see latched policy; failing that, 1518 * look at configured policy. 1519 */ 1520 if (ipl != NULL) { 1521 if (ipl->ipl_in_action != NULL) { 1522 rv = ipsec_req_from_act(ipl->ipl_in_action, req); 1523 goto done; 1524 } 1525 if (ipl->ipl_in_policy != NULL) { 1526 rv = ipsec_req_from_act(ipl->ipl_in_policy->ipsp_act, 1527 req); 1528 goto done; 1529 } 1530 } 1531 if (connp->conn_policy != NULL) 1532 rv = ipsec_req_from_head(connp->conn_policy, req, af); 1533 done: 1534 mutex_exit(&connp->conn_lock); 1535 return (rv); 1536 } 1537 1538 void 1539 ipsec_actvec_free(ipsec_act_t *act, uint_t nact) 1540 { 1541 kmem_free(act, nact * sizeof (*act)); 1542 } 1543 1544 /* 1545 * When outbound policy is not cached, look it up the hard way and attach 1546 * an ipsec_out_t to the packet.. 1547 */ 1548 static mblk_t * 1549 ipsec_attach_global_policy(mblk_t **mp, conn_t *connp, ipsec_selector_t *sel, 1550 netstack_t *ns) 1551 { 1552 ipsec_policy_t *p; 1553 1554 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, sel, ns); 1555 1556 if (p == NULL) 1557 return (NULL); 1558 return (ipsec_attach_ipsec_out(mp, connp, p, sel->ips_protocol, ns)); 1559 } 1560 1561 /* 1562 * We have an ipsec_out already, but don't have cached policy; fill it in 1563 * with the right actions. 1564 */ 1565 static mblk_t * 1566 ipsec_apply_global_policy(mblk_t *ipsec_mp, conn_t *connp, 1567 ipsec_selector_t *sel, netstack_t *ns) 1568 { 1569 ipsec_out_t *io; 1570 ipsec_policy_t *p; 1571 1572 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 1573 ASSERT(ipsec_mp->b_cont->b_datap->db_type == M_DATA); 1574 1575 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1576 1577 if (io->ipsec_out_policy == NULL) { 1578 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, io, sel, ns); 1579 io->ipsec_out_policy = p; 1580 } 1581 return (ipsec_mp); 1582 } 1583 1584 1585 /* 1586 * Consumes a reference to ipsp. 1587 */ 1588 static mblk_t * 1589 ipsec_check_loopback_policy(mblk_t *first_mp, boolean_t mctl_present, 1590 ipsec_policy_t *ipsp) 1591 { 1592 mblk_t *ipsec_mp; 1593 ipsec_in_t *ii; 1594 netstack_t *ns; 1595 1596 if (!mctl_present) 1597 return (first_mp); 1598 1599 ipsec_mp = first_mp; 1600 1601 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1602 ns = ii->ipsec_in_ns; 1603 ASSERT(ii->ipsec_in_loopback); 1604 IPPOL_REFRELE(ipsp, ns); 1605 1606 /* 1607 * We should do an actual policy check here. Revisit this 1608 * when we revisit the IPsec API. (And pass a conn_t in when we 1609 * get there.) 1610 */ 1611 1612 return (first_mp); 1613 } 1614 1615 /* 1616 * Check that packet's inbound ports & proto match the selectors 1617 * expected by the SAs it traversed on the way in. 1618 */ 1619 static boolean_t 1620 ipsec_check_ipsecin_unique(ipsec_in_t *ii, const char **reason, 1621 kstat_named_t **counter, uint64_t pkt_unique) 1622 { 1623 uint64_t ah_mask, esp_mask; 1624 ipsa_t *ah_assoc; 1625 ipsa_t *esp_assoc; 1626 netstack_t *ns = ii->ipsec_in_ns; 1627 ipsec_stack_t *ipss = ns->netstack_ipsec; 1628 1629 ASSERT(ii->ipsec_in_secure); 1630 ASSERT(!ii->ipsec_in_loopback); 1631 1632 ah_assoc = ii->ipsec_in_ah_sa; 1633 esp_assoc = ii->ipsec_in_esp_sa; 1634 ASSERT((ah_assoc != NULL) || (esp_assoc != NULL)); 1635 1636 ah_mask = (ah_assoc != NULL) ? ah_assoc->ipsa_unique_mask : 0; 1637 esp_mask = (esp_assoc != NULL) ? esp_assoc->ipsa_unique_mask : 0; 1638 1639 if ((ah_mask == 0) && (esp_mask == 0)) 1640 return (B_TRUE); 1641 1642 /* 1643 * The pkt_unique check will also check for tunnel mode on the SA 1644 * vs. the tunneled_packet boolean. "Be liberal in what you receive" 1645 * should not apply in this case. ;) 1646 */ 1647 1648 if (ah_mask != 0 && 1649 ah_assoc->ipsa_unique_id != (pkt_unique & ah_mask)) { 1650 *reason = "AH inner header mismatch"; 1651 *counter = DROPPER(ipss, ipds_spd_ah_innermismatch); 1652 return (B_FALSE); 1653 } 1654 if (esp_mask != 0 && 1655 esp_assoc->ipsa_unique_id != (pkt_unique & esp_mask)) { 1656 *reason = "ESP inner header mismatch"; 1657 *counter = DROPPER(ipss, ipds_spd_esp_innermismatch); 1658 return (B_FALSE); 1659 } 1660 return (B_TRUE); 1661 } 1662 1663 static boolean_t 1664 ipsec_check_ipsecin_action(ipsec_in_t *ii, mblk_t *mp, ipsec_action_t *ap, 1665 ipha_t *ipha, ip6_t *ip6h, const char **reason, kstat_named_t **counter) 1666 { 1667 boolean_t ret = B_TRUE; 1668 ipsec_prot_t *ipp; 1669 ipsa_t *ah_assoc; 1670 ipsa_t *esp_assoc; 1671 boolean_t decaps; 1672 netstack_t *ns = ii->ipsec_in_ns; 1673 ipsec_stack_t *ipss = ns->netstack_ipsec; 1674 1675 ASSERT((ipha == NULL && ip6h != NULL) || 1676 (ip6h == NULL && ipha != NULL)); 1677 1678 if (ii->ipsec_in_loopback) { 1679 /* 1680 * Besides accepting pointer-equivalent actions, we also 1681 * accept any ICMP errors we generated for ourselves, 1682 * regardless of policy. If we do not wish to make this 1683 * assumption in the future, check here, and where 1684 * icmp_loopback is initialized in ip.c and ip6.c. (Look for 1685 * ipsec_out_icmp_loopback.) 1686 */ 1687 if (ap == ii->ipsec_in_action || ii->ipsec_in_icmp_loopback) 1688 return (B_TRUE); 1689 1690 /* Deep compare necessary here?? */ 1691 *counter = DROPPER(ipss, ipds_spd_loopback_mismatch); 1692 *reason = "loopback policy mismatch"; 1693 return (B_FALSE); 1694 } 1695 ASSERT(!ii->ipsec_in_icmp_loopback); 1696 1697 ah_assoc = ii->ipsec_in_ah_sa; 1698 esp_assoc = ii->ipsec_in_esp_sa; 1699 1700 decaps = ii->ipsec_in_decaps; 1701 1702 switch (ap->ipa_act.ipa_type) { 1703 case IPSEC_ACT_DISCARD: 1704 case IPSEC_ACT_REJECT: 1705 /* Should "fail hard" */ 1706 *counter = DROPPER(ipss, ipds_spd_explicit); 1707 *reason = "blocked by policy"; 1708 return (B_FALSE); 1709 1710 case IPSEC_ACT_BYPASS: 1711 case IPSEC_ACT_CLEAR: 1712 *counter = DROPPER(ipss, ipds_spd_got_secure); 1713 *reason = "expected clear, got protected"; 1714 return (B_FALSE); 1715 1716 case IPSEC_ACT_APPLY: 1717 ipp = &ap->ipa_act.ipa_apply; 1718 /* 1719 * As of now we do the simple checks of whether 1720 * the datagram has gone through the required IPSEC 1721 * protocol constraints or not. We might have more 1722 * in the future like sensitive levels, key bits, etc. 1723 * If it fails the constraints, check whether we would 1724 * have accepted this if it had come in clear. 1725 */ 1726 if (ipp->ipp_use_ah) { 1727 if (ah_assoc == NULL) { 1728 ret = ipsec_inbound_accept_clear(mp, ipha, 1729 ip6h); 1730 *counter = DROPPER(ipss, ipds_spd_got_clear); 1731 *reason = "unprotected not accepted"; 1732 break; 1733 } 1734 ASSERT(ah_assoc != NULL); 1735 ASSERT(ipp->ipp_auth_alg != 0); 1736 1737 if (ah_assoc->ipsa_auth_alg != 1738 ipp->ipp_auth_alg) { 1739 *counter = DROPPER(ipss, ipds_spd_bad_ahalg); 1740 *reason = "unacceptable ah alg"; 1741 ret = B_FALSE; 1742 break; 1743 } 1744 } else if (ah_assoc != NULL) { 1745 /* 1746 * Don't allow this. Check IPSEC NOTE above 1747 * ip_fanout_proto(). 1748 */ 1749 *counter = DROPPER(ipss, ipds_spd_got_ah); 1750 *reason = "unexpected AH"; 1751 ret = B_FALSE; 1752 break; 1753 } 1754 if (ipp->ipp_use_esp) { 1755 if (esp_assoc == NULL) { 1756 ret = ipsec_inbound_accept_clear(mp, ipha, 1757 ip6h); 1758 *counter = DROPPER(ipss, ipds_spd_got_clear); 1759 *reason = "unprotected not accepted"; 1760 break; 1761 } 1762 ASSERT(esp_assoc != NULL); 1763 ASSERT(ipp->ipp_encr_alg != 0); 1764 1765 if (esp_assoc->ipsa_encr_alg != 1766 ipp->ipp_encr_alg) { 1767 *counter = DROPPER(ipss, ipds_spd_bad_espealg); 1768 *reason = "unacceptable esp alg"; 1769 ret = B_FALSE; 1770 break; 1771 } 1772 /* 1773 * If the client does not need authentication, 1774 * we don't verify the alogrithm. 1775 */ 1776 if (ipp->ipp_use_espa) { 1777 if (esp_assoc->ipsa_auth_alg != 1778 ipp->ipp_esp_auth_alg) { 1779 *counter = DROPPER(ipss, 1780 ipds_spd_bad_espaalg); 1781 *reason = "unacceptable esp auth alg"; 1782 ret = B_FALSE; 1783 break; 1784 } 1785 } 1786 } else if (esp_assoc != NULL) { 1787 /* 1788 * Don't allow this. Check IPSEC NOTE above 1789 * ip_fanout_proto(). 1790 */ 1791 *counter = DROPPER(ipss, ipds_spd_got_esp); 1792 *reason = "unexpected ESP"; 1793 ret = B_FALSE; 1794 break; 1795 } 1796 if (ipp->ipp_use_se) { 1797 if (!decaps) { 1798 ret = ipsec_inbound_accept_clear(mp, ipha, 1799 ip6h); 1800 if (!ret) { 1801 /* XXX mutant? */ 1802 *counter = DROPPER(ipss, 1803 ipds_spd_bad_selfencap); 1804 *reason = "self encap not found"; 1805 break; 1806 } 1807 } 1808 } else if (decaps) { 1809 /* 1810 * XXX If the packet comes in tunneled and the 1811 * recipient does not expect it to be tunneled, it 1812 * is okay. But we drop to be consistent with the 1813 * other cases. 1814 */ 1815 *counter = DROPPER(ipss, ipds_spd_got_selfencap); 1816 *reason = "unexpected self encap"; 1817 ret = B_FALSE; 1818 break; 1819 } 1820 if (ii->ipsec_in_action != NULL) { 1821 /* 1822 * This can happen if we do a double policy-check on 1823 * a packet 1824 * XXX XXX should fix this case! 1825 */ 1826 IPACT_REFRELE(ii->ipsec_in_action); 1827 } 1828 ASSERT(ii->ipsec_in_action == NULL); 1829 IPACT_REFHOLD(ap); 1830 ii->ipsec_in_action = ap; 1831 break; /* from switch */ 1832 } 1833 return (ret); 1834 } 1835 1836 static boolean_t 1837 spd_match_inbound_ids(ipsec_latch_t *ipl, ipsa_t *sa) 1838 { 1839 ASSERT(ipl->ipl_ids_latched == B_TRUE); 1840 return ipsid_equal(ipl->ipl_remote_cid, sa->ipsa_src_cid) && 1841 ipsid_equal(ipl->ipl_local_cid, sa->ipsa_dst_cid); 1842 } 1843 1844 /* 1845 * Takes a latched conn and an inbound packet and returns a unique_id suitable 1846 * for SA comparisons. Most of the time we will copy from the conn_t, but 1847 * there are cases when the conn_t is latched but it has wildcard selectors, 1848 * and then we need to fallback to scooping them out of the packet. 1849 * 1850 * Assume we'll never have 0 with a conn_t present, so use 0 as a failure. We 1851 * can get away with this because we only have non-zero ports/proto for 1852 * latched conn_ts. 1853 * 1854 * Ideal candidate for an "inline" keyword, as we're JUST convoluted enough 1855 * to not be a nice macro. 1856 */ 1857 static uint64_t 1858 conn_to_unique(conn_t *connp, mblk_t *data_mp, ipha_t *ipha, ip6_t *ip6h) 1859 { 1860 ipsec_selector_t sel; 1861 uint8_t ulp = connp->conn_ulp; 1862 1863 ASSERT(connp->conn_latch->ipl_in_policy != NULL); 1864 1865 if ((ulp == IPPROTO_TCP || ulp == IPPROTO_UDP || ulp == IPPROTO_SCTP) && 1866 (connp->conn_fport == 0 || connp->conn_lport == 0)) { 1867 /* Slow path - we gotta grab from the packet. */ 1868 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, 1869 SEL_NONE) != SELRET_SUCCESS) { 1870 /* Failure -> have caller free packet with ENOMEM. */ 1871 return (0); 1872 } 1873 return (SA_UNIQUE_ID(sel.ips_remote_port, sel.ips_local_port, 1874 sel.ips_protocol, 0)); 1875 } 1876 1877 #ifdef DEBUG_NOT_UNTIL_6478464 1878 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, SEL_NONE) == 1879 SELRET_SUCCESS) { 1880 ASSERT(sel.ips_local_port == connp->conn_lport); 1881 ASSERT(sel.ips_remote_port == connp->conn_fport); 1882 ASSERT(sel.ips_protocol == connp->conn_ulp); 1883 } 1884 ASSERT(connp->conn_ulp != 0); 1885 #endif 1886 1887 return (SA_UNIQUE_ID(connp->conn_fport, connp->conn_lport, ulp, 0)); 1888 } 1889 1890 /* 1891 * Called to check policy on a latched connection, both from this file 1892 * and from tcp.c 1893 */ 1894 boolean_t 1895 ipsec_check_ipsecin_latch(ipsec_in_t *ii, mblk_t *mp, ipsec_latch_t *ipl, 1896 ipha_t *ipha, ip6_t *ip6h, const char **reason, kstat_named_t **counter, 1897 conn_t *connp) 1898 { 1899 netstack_t *ns = ii->ipsec_in_ns; 1900 ipsec_stack_t *ipss = ns->netstack_ipsec; 1901 1902 ASSERT(ipl->ipl_ids_latched == B_TRUE); 1903 1904 if (!ii->ipsec_in_loopback) { 1905 /* 1906 * Over loopback, there aren't real security associations, 1907 * so there are neither identities nor "unique" values 1908 * for us to check the packet against. 1909 */ 1910 if ((ii->ipsec_in_ah_sa != NULL) && 1911 (!spd_match_inbound_ids(ipl, ii->ipsec_in_ah_sa))) { 1912 *counter = DROPPER(ipss, ipds_spd_ah_badid); 1913 *reason = "AH identity mismatch"; 1914 return (B_FALSE); 1915 } 1916 1917 if ((ii->ipsec_in_esp_sa != NULL) && 1918 (!spd_match_inbound_ids(ipl, ii->ipsec_in_esp_sa))) { 1919 *counter = DROPPER(ipss, ipds_spd_esp_badid); 1920 *reason = "ESP identity mismatch"; 1921 return (B_FALSE); 1922 } 1923 1924 /* 1925 * Can fudge pkt_unique from connp because we're latched. 1926 * In DEBUG kernels (see conn_to_unique()'s implementation), 1927 * verify this even if it REALLY slows things down. 1928 */ 1929 if (!ipsec_check_ipsecin_unique(ii, reason, counter, 1930 conn_to_unique(connp, mp, ipha, ip6h))) { 1931 return (B_FALSE); 1932 } 1933 } 1934 1935 return (ipsec_check_ipsecin_action(ii, mp, ipl->ipl_in_action, 1936 ipha, ip6h, reason, counter)); 1937 } 1938 1939 /* 1940 * Check to see whether this secured datagram meets the policy 1941 * constraints specified in ipsp. 1942 * 1943 * Called from ipsec_check_global_policy, and ipsec_check_inbound_policy. 1944 * 1945 * Consumes a reference to ipsp. 1946 */ 1947 static mblk_t * 1948 ipsec_check_ipsecin_policy(mblk_t *first_mp, ipsec_policy_t *ipsp, 1949 ipha_t *ipha, ip6_t *ip6h, uint64_t pkt_unique, netstack_t *ns) 1950 { 1951 ipsec_in_t *ii; 1952 ipsec_action_t *ap; 1953 const char *reason = "no policy actions found"; 1954 mblk_t *data_mp, *ipsec_mp; 1955 ipsec_stack_t *ipss = ns->netstack_ipsec; 1956 ip_stack_t *ipst = ns->netstack_ip; 1957 kstat_named_t *counter; 1958 1959 counter = DROPPER(ipss, ipds_spd_got_secure); 1960 1961 data_mp = first_mp->b_cont; 1962 ipsec_mp = first_mp; 1963 1964 ASSERT(ipsp != NULL); 1965 1966 ASSERT((ipha == NULL && ip6h != NULL) || 1967 (ip6h == NULL && ipha != NULL)); 1968 1969 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1970 1971 if (ii->ipsec_in_loopback) 1972 return (ipsec_check_loopback_policy(first_mp, B_TRUE, ipsp)); 1973 ASSERT(ii->ipsec_in_type == IPSEC_IN); 1974 ASSERT(ii->ipsec_in_secure); 1975 1976 if (ii->ipsec_in_action != NULL) { 1977 /* 1978 * this can happen if we do a double policy-check on a packet 1979 * Would be nice to be able to delete this test.. 1980 */ 1981 IPACT_REFRELE(ii->ipsec_in_action); 1982 } 1983 ASSERT(ii->ipsec_in_action == NULL); 1984 1985 if (!SA_IDS_MATCH(ii->ipsec_in_ah_sa, ii->ipsec_in_esp_sa)) { 1986 reason = "inbound AH and ESP identities differ"; 1987 counter = DROPPER(ipss, ipds_spd_ahesp_diffid); 1988 goto drop; 1989 } 1990 1991 if (!ipsec_check_ipsecin_unique(ii, &reason, &counter, pkt_unique)) 1992 goto drop; 1993 1994 /* 1995 * Ok, now loop through the possible actions and see if any 1996 * of them work for us. 1997 */ 1998 1999 for (ap = ipsp->ipsp_act; ap != NULL; ap = ap->ipa_next) { 2000 if (ipsec_check_ipsecin_action(ii, data_mp, ap, 2001 ipha, ip6h, &reason, &counter)) { 2002 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2003 IPPOL_REFRELE(ipsp, ns); 2004 return (first_mp); 2005 } 2006 } 2007 drop: 2008 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, SL_ERROR|SL_WARN|SL_CONSOLE, 2009 "ipsec inbound policy mismatch: %s, packet dropped\n", 2010 reason); 2011 IPPOL_REFRELE(ipsp, ns); 2012 ASSERT(ii->ipsec_in_action == NULL); 2013 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2014 ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, 2015 &ipss->ipsec_spd_dropper); 2016 return (NULL); 2017 } 2018 2019 /* 2020 * sleazy prefix-length-based compare. 2021 * another inlining candidate.. 2022 */ 2023 boolean_t 2024 ip_addr_match(uint8_t *addr1, int pfxlen, in6_addr_t *addr2p) 2025 { 2026 int offset = pfxlen>>3; 2027 int bitsleft = pfxlen & 7; 2028 uint8_t *addr2 = (uint8_t *)addr2p; 2029 2030 /* 2031 * and there was much evil.. 2032 * XXX should inline-expand the bcmp here and do this 32 bits 2033 * or 64 bits at a time.. 2034 */ 2035 return ((bcmp(addr1, addr2, offset) == 0) && 2036 ((bitsleft == 0) || 2037 (((addr1[offset] ^ addr2[offset]) & (0xff<<(8-bitsleft))) == 0))); 2038 } 2039 2040 static ipsec_policy_t * 2041 ipsec_find_policy_chain(ipsec_policy_t *best, ipsec_policy_t *chain, 2042 ipsec_selector_t *sel, boolean_t is_icmp_inv_acq) 2043 { 2044 ipsec_selkey_t *isel; 2045 ipsec_policy_t *p; 2046 int bpri = best ? best->ipsp_prio : 0; 2047 2048 for (p = chain; p != NULL; p = p->ipsp_hash.hash_next) { 2049 uint32_t valid; 2050 2051 if (p->ipsp_prio <= bpri) 2052 continue; 2053 isel = &p->ipsp_sel->ipsl_key; 2054 valid = isel->ipsl_valid; 2055 2056 if ((valid & IPSL_PROTOCOL) && 2057 (isel->ipsl_proto != sel->ips_protocol)) 2058 continue; 2059 2060 if ((valid & IPSL_REMOTE_ADDR) && 2061 !ip_addr_match((uint8_t *)&isel->ipsl_remote, 2062 isel->ipsl_remote_pfxlen, &sel->ips_remote_addr_v6)) 2063 continue; 2064 2065 if ((valid & IPSL_LOCAL_ADDR) && 2066 !ip_addr_match((uint8_t *)&isel->ipsl_local, 2067 isel->ipsl_local_pfxlen, &sel->ips_local_addr_v6)) 2068 continue; 2069 2070 if ((valid & IPSL_REMOTE_PORT) && 2071 isel->ipsl_rport != sel->ips_remote_port) 2072 continue; 2073 2074 if ((valid & IPSL_LOCAL_PORT) && 2075 isel->ipsl_lport != sel->ips_local_port) 2076 continue; 2077 2078 if (!is_icmp_inv_acq) { 2079 if ((valid & IPSL_ICMP_TYPE) && 2080 (isel->ipsl_icmp_type > sel->ips_icmp_type || 2081 isel->ipsl_icmp_type_end < sel->ips_icmp_type)) { 2082 continue; 2083 } 2084 2085 if ((valid & IPSL_ICMP_CODE) && 2086 (isel->ipsl_icmp_code > sel->ips_icmp_code || 2087 isel->ipsl_icmp_code_end < 2088 sel->ips_icmp_code)) { 2089 continue; 2090 } 2091 } else { 2092 /* 2093 * special case for icmp inverse acquire 2094 * we only want policies that aren't drop/pass 2095 */ 2096 if (p->ipsp_act->ipa_act.ipa_type != IPSEC_ACT_APPLY) 2097 continue; 2098 } 2099 2100 /* we matched all the packet-port-field selectors! */ 2101 best = p; 2102 bpri = p->ipsp_prio; 2103 } 2104 2105 return (best); 2106 } 2107 2108 /* 2109 * Try to find and return the best policy entry under a given policy 2110 * root for a given set of selectors; the first parameter "best" is 2111 * the current best policy so far. If "best" is non-null, we have a 2112 * reference to it. We return a reference to a policy; if that policy 2113 * is not the original "best", we need to release that reference 2114 * before returning. 2115 */ 2116 ipsec_policy_t * 2117 ipsec_find_policy_head(ipsec_policy_t *best, ipsec_policy_head_t *head, 2118 int direction, ipsec_selector_t *sel, netstack_t *ns) 2119 { 2120 ipsec_policy_t *curbest; 2121 ipsec_policy_root_t *root; 2122 uint8_t is_icmp_inv_acq = sel->ips_is_icmp_inv_acq; 2123 int af = sel->ips_isv4 ? IPSEC_AF_V4 : IPSEC_AF_V6; 2124 2125 curbest = best; 2126 root = &head->iph_root[direction]; 2127 2128 #ifdef DEBUG 2129 if (is_icmp_inv_acq) { 2130 if (sel->ips_isv4) { 2131 if (sel->ips_protocol != IPPROTO_ICMP) { 2132 cmn_err(CE_WARN, "ipsec_find_policy_head:" 2133 " expecting icmp, got %d", 2134 sel->ips_protocol); 2135 } 2136 } else { 2137 if (sel->ips_protocol != IPPROTO_ICMPV6) { 2138 cmn_err(CE_WARN, "ipsec_find_policy_head:" 2139 " expecting icmpv6, got %d", 2140 sel->ips_protocol); 2141 } 2142 } 2143 } 2144 #endif 2145 2146 rw_enter(&head->iph_lock, RW_READER); 2147 2148 if (root->ipr_nchains > 0) { 2149 curbest = ipsec_find_policy_chain(curbest, 2150 root->ipr_hash[selector_hash(sel, root)].hash_head, sel, 2151 is_icmp_inv_acq); 2152 } 2153 curbest = ipsec_find_policy_chain(curbest, root->ipr_nonhash[af], sel, 2154 is_icmp_inv_acq); 2155 2156 /* 2157 * Adjust reference counts if we found anything new. 2158 */ 2159 if (curbest != best) { 2160 ASSERT(curbest != NULL); 2161 IPPOL_REFHOLD(curbest); 2162 2163 if (best != NULL) { 2164 IPPOL_REFRELE(best, ns); 2165 } 2166 } 2167 2168 rw_exit(&head->iph_lock); 2169 2170 return (curbest); 2171 } 2172 2173 /* 2174 * Find the best system policy (either global or per-interface) which 2175 * applies to the given selector; look in all the relevant policy roots 2176 * to figure out which policy wins. 2177 * 2178 * Returns a reference to a policy; caller must release this 2179 * reference when done. 2180 */ 2181 ipsec_policy_t * 2182 ipsec_find_policy(int direction, conn_t *connp, ipsec_out_t *io, 2183 ipsec_selector_t *sel, netstack_t *ns) 2184 { 2185 ipsec_policy_t *p; 2186 ipsec_stack_t *ipss = ns->netstack_ipsec; 2187 2188 p = ipsec_find_policy_head(NULL, &ipss->ipsec_system_policy, 2189 direction, sel, ns); 2190 if ((connp != NULL) && (connp->conn_policy != NULL)) { 2191 p = ipsec_find_policy_head(p, connp->conn_policy, 2192 direction, sel, ns); 2193 } else if ((io != NULL) && (io->ipsec_out_polhead != NULL)) { 2194 p = ipsec_find_policy_head(p, io->ipsec_out_polhead, 2195 direction, sel, ns); 2196 } 2197 2198 return (p); 2199 } 2200 2201 /* 2202 * Check with global policy and see whether this inbound 2203 * packet meets the policy constraints. 2204 * 2205 * Locate appropriate policy from global policy, supplemented by the 2206 * conn's configured and/or cached policy if the conn is supplied. 2207 * 2208 * Dispatch to ipsec_check_ipsecin_policy if we have policy and an 2209 * encrypted packet to see if they match. 2210 * 2211 * Otherwise, see if the policy allows cleartext; if not, drop it on the 2212 * floor. 2213 */ 2214 mblk_t * 2215 ipsec_check_global_policy(mblk_t *first_mp, conn_t *connp, 2216 ipha_t *ipha, ip6_t *ip6h, boolean_t mctl_present, netstack_t *ns) 2217 { 2218 ipsec_policy_t *p; 2219 ipsec_selector_t sel; 2220 mblk_t *data_mp, *ipsec_mp; 2221 boolean_t policy_present; 2222 kstat_named_t *counter; 2223 ipsec_in_t *ii = NULL; 2224 uint64_t pkt_unique; 2225 ipsec_stack_t *ipss = ns->netstack_ipsec; 2226 ip_stack_t *ipst = ns->netstack_ip; 2227 2228 data_mp = mctl_present ? first_mp->b_cont : first_mp; 2229 ipsec_mp = mctl_present ? first_mp : NULL; 2230 2231 sel.ips_is_icmp_inv_acq = 0; 2232 2233 ASSERT((ipha == NULL && ip6h != NULL) || 2234 (ip6h == NULL && ipha != NULL)); 2235 2236 if (ipha != NULL) 2237 policy_present = ipss->ipsec_inbound_v4_policy_present; 2238 else 2239 policy_present = ipss->ipsec_inbound_v6_policy_present; 2240 2241 if (!policy_present && connp == NULL) { 2242 /* 2243 * No global policy and no per-socket policy; 2244 * just pass it back (but we shouldn't get here in that case) 2245 */ 2246 return (first_mp); 2247 } 2248 2249 if (ipsec_mp != NULL) { 2250 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 2251 ii = (ipsec_in_t *)(ipsec_mp->b_rptr); 2252 ASSERT(ii->ipsec_in_type == IPSEC_IN); 2253 } 2254 2255 /* 2256 * If we have cached policy, use it. 2257 * Otherwise consult system policy. 2258 */ 2259 if ((connp != NULL) && (connp->conn_latch != NULL)) { 2260 p = connp->conn_latch->ipl_in_policy; 2261 if (p != NULL) { 2262 IPPOL_REFHOLD(p); 2263 } 2264 /* 2265 * Fudge sel for UNIQUE_ID setting below. 2266 */ 2267 pkt_unique = conn_to_unique(connp, data_mp, ipha, ip6h); 2268 } else { 2269 /* Initialize the ports in the selector */ 2270 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, 2271 SEL_NONE) == SELRET_NOMEM) { 2272 /* 2273 * Technically not a policy mismatch, but it is 2274 * an internal failure. 2275 */ 2276 ipsec_log_policy_failure(IPSEC_POLICY_MISMATCH, 2277 "ipsec_init_inbound_sel", ipha, ip6h, B_FALSE, ns); 2278 counter = DROPPER(ipss, ipds_spd_nomem); 2279 goto fail; 2280 } 2281 2282 /* 2283 * Find the policy which best applies. 2284 * 2285 * If we find global policy, we should look at both 2286 * local policy and global policy and see which is 2287 * stronger and match accordingly. 2288 * 2289 * If we don't find a global policy, check with 2290 * local policy alone. 2291 */ 2292 2293 p = ipsec_find_policy(IPSEC_TYPE_INBOUND, connp, NULL, &sel, 2294 ns); 2295 pkt_unique = SA_UNIQUE_ID(sel.ips_remote_port, 2296 sel.ips_local_port, sel.ips_protocol, 0); 2297 } 2298 2299 if (p == NULL) { 2300 if (ipsec_mp == NULL) { 2301 /* 2302 * We have no policy; default to succeeding. 2303 * XXX paranoid system design doesn't do this. 2304 */ 2305 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2306 return (first_mp); 2307 } else { 2308 counter = DROPPER(ipss, ipds_spd_got_secure); 2309 ipsec_log_policy_failure(IPSEC_POLICY_NOT_NEEDED, 2310 "ipsec_check_global_policy", ipha, ip6h, B_TRUE, 2311 ns); 2312 goto fail; 2313 } 2314 } 2315 if ((ii != NULL) && (ii->ipsec_in_secure)) { 2316 return (ipsec_check_ipsecin_policy(ipsec_mp, p, ipha, ip6h, 2317 pkt_unique, ns)); 2318 } 2319 if (p->ipsp_act->ipa_allow_clear) { 2320 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2321 IPPOL_REFRELE(p, ns); 2322 return (first_mp); 2323 } 2324 IPPOL_REFRELE(p, ns); 2325 /* 2326 * If we reach here, we will drop the packet because it failed the 2327 * global policy check because the packet was cleartext, and it 2328 * should not have been. 2329 */ 2330 ipsec_log_policy_failure(IPSEC_POLICY_MISMATCH, 2331 "ipsec_check_global_policy", ipha, ip6h, B_FALSE, ns); 2332 counter = DROPPER(ipss, ipds_spd_got_clear); 2333 2334 fail: 2335 ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, 2336 &ipss->ipsec_spd_dropper); 2337 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2338 return (NULL); 2339 } 2340 2341 /* 2342 * We check whether an inbound datagram is a valid one 2343 * to accept in clear. If it is secure, it is the job 2344 * of IPSEC to log information appropriately if it 2345 * suspects that it may not be the real one. 2346 * 2347 * It is called only while fanning out to the ULP 2348 * where ULP accepts only secure data and the incoming 2349 * is clear. Usually we never accept clear datagrams in 2350 * such cases. ICMP is the only exception. 2351 * 2352 * NOTE : We don't call this function if the client (ULP) 2353 * is willing to accept things in clear. 2354 */ 2355 boolean_t 2356 ipsec_inbound_accept_clear(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h) 2357 { 2358 ushort_t iph_hdr_length; 2359 icmph_t *icmph; 2360 icmp6_t *icmp6; 2361 uint8_t *nexthdrp; 2362 2363 ASSERT((ipha != NULL && ip6h == NULL) || 2364 (ipha == NULL && ip6h != NULL)); 2365 2366 if (ip6h != NULL) { 2367 iph_hdr_length = ip_hdr_length_v6(mp, ip6h); 2368 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, 2369 &nexthdrp)) { 2370 return (B_FALSE); 2371 } 2372 if (*nexthdrp != IPPROTO_ICMPV6) 2373 return (B_FALSE); 2374 icmp6 = (icmp6_t *)(&mp->b_rptr[iph_hdr_length]); 2375 /* Match IPv6 ICMP policy as closely as IPv4 as possible. */ 2376 switch (icmp6->icmp6_type) { 2377 case ICMP6_PARAM_PROB: 2378 /* Corresponds to port/proto unreach in IPv4. */ 2379 case ICMP6_ECHO_REQUEST: 2380 /* Just like IPv4. */ 2381 return (B_FALSE); 2382 2383 case MLD_LISTENER_QUERY: 2384 case MLD_LISTENER_REPORT: 2385 case MLD_LISTENER_REDUCTION: 2386 /* 2387 * XXX Seperate NDD in IPv4 what about here? 2388 * Plus, mcast is important to ND. 2389 */ 2390 case ICMP6_DST_UNREACH: 2391 /* Corresponds to HOST/NET unreachable in IPv4. */ 2392 case ICMP6_PACKET_TOO_BIG: 2393 case ICMP6_ECHO_REPLY: 2394 /* These are trusted in IPv4. */ 2395 case ND_ROUTER_SOLICIT: 2396 case ND_ROUTER_ADVERT: 2397 case ND_NEIGHBOR_SOLICIT: 2398 case ND_NEIGHBOR_ADVERT: 2399 case ND_REDIRECT: 2400 /* Trust ND messages for now. */ 2401 case ICMP6_TIME_EXCEEDED: 2402 default: 2403 return (B_TRUE); 2404 } 2405 } else { 2406 /* 2407 * If it is not ICMP, fail this request. 2408 */ 2409 if (ipha->ipha_protocol != IPPROTO_ICMP) { 2410 #ifdef FRAGCACHE_DEBUG 2411 cmn_err(CE_WARN, "Dropping - ipha_proto = %d\n", 2412 ipha->ipha_protocol); 2413 #endif 2414 return (B_FALSE); 2415 } 2416 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2417 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 2418 /* 2419 * It is an insecure icmp message. Check to see whether we are 2420 * willing to accept this one. 2421 */ 2422 2423 switch (icmph->icmph_type) { 2424 case ICMP_ECHO_REPLY: 2425 case ICMP_TIME_STAMP_REPLY: 2426 case ICMP_INFO_REPLY: 2427 case ICMP_ROUTER_ADVERTISEMENT: 2428 /* 2429 * We should not encourage clear replies if this 2430 * client expects secure. If somebody is replying 2431 * in clear some mailicious user watching both the 2432 * request and reply, can do chosen-plain-text attacks. 2433 * With global policy we might be just expecting secure 2434 * but sending out clear. We don't know what the right 2435 * thing is. We can't do much here as we can't control 2436 * the sender here. Till we are sure of what to do, 2437 * accept them. 2438 */ 2439 return (B_TRUE); 2440 case ICMP_ECHO_REQUEST: 2441 case ICMP_TIME_STAMP_REQUEST: 2442 case ICMP_INFO_REQUEST: 2443 case ICMP_ADDRESS_MASK_REQUEST: 2444 case ICMP_ROUTER_SOLICITATION: 2445 case ICMP_ADDRESS_MASK_REPLY: 2446 /* 2447 * Don't accept this as somebody could be sending 2448 * us plain text to get encrypted data. If we reply, 2449 * it will lead to chosen plain text attack. 2450 */ 2451 return (B_FALSE); 2452 case ICMP_DEST_UNREACHABLE: 2453 switch (icmph->icmph_code) { 2454 case ICMP_FRAGMENTATION_NEEDED: 2455 /* 2456 * Be in sync with icmp_inbound, where we have 2457 * already set ire_max_frag. 2458 */ 2459 #ifdef FRAGCACHE_DEBUG 2460 cmn_err(CE_WARN, "ICMP frag needed\n"); 2461 #endif 2462 return (B_TRUE); 2463 case ICMP_HOST_UNREACHABLE: 2464 case ICMP_NET_UNREACHABLE: 2465 /* 2466 * By accepting, we could reset a connection. 2467 * How do we solve the problem of some 2468 * intermediate router sending in-secure ICMP 2469 * messages ? 2470 */ 2471 return (B_TRUE); 2472 case ICMP_PORT_UNREACHABLE: 2473 case ICMP_PROTOCOL_UNREACHABLE: 2474 default : 2475 return (B_FALSE); 2476 } 2477 case ICMP_SOURCE_QUENCH: 2478 /* 2479 * If this is an attack, TCP will slow start 2480 * because of this. Is it very harmful ? 2481 */ 2482 return (B_TRUE); 2483 case ICMP_PARAM_PROBLEM: 2484 return (B_FALSE); 2485 case ICMP_TIME_EXCEEDED: 2486 return (B_TRUE); 2487 case ICMP_REDIRECT: 2488 return (B_FALSE); 2489 default : 2490 return (B_FALSE); 2491 } 2492 } 2493 } 2494 2495 void 2496 ipsec_latch_ids(ipsec_latch_t *ipl, ipsid_t *local, ipsid_t *remote) 2497 { 2498 mutex_enter(&ipl->ipl_lock); 2499 2500 if (ipl->ipl_ids_latched) { 2501 /* I lost, someone else got here before me */ 2502 mutex_exit(&ipl->ipl_lock); 2503 return; 2504 } 2505 2506 if (local != NULL) 2507 IPSID_REFHOLD(local); 2508 if (remote != NULL) 2509 IPSID_REFHOLD(remote); 2510 2511 ipl->ipl_local_cid = local; 2512 ipl->ipl_remote_cid = remote; 2513 ipl->ipl_ids_latched = B_TRUE; 2514 mutex_exit(&ipl->ipl_lock); 2515 } 2516 2517 void 2518 ipsec_latch_inbound(ipsec_latch_t *ipl, ipsec_in_t *ii) 2519 { 2520 ipsa_t *sa; 2521 2522 if (!ipl->ipl_ids_latched) { 2523 ipsid_t *local = NULL; 2524 ipsid_t *remote = NULL; 2525 2526 if (!ii->ipsec_in_loopback) { 2527 if (ii->ipsec_in_esp_sa != NULL) 2528 sa = ii->ipsec_in_esp_sa; 2529 else 2530 sa = ii->ipsec_in_ah_sa; 2531 ASSERT(sa != NULL); 2532 local = sa->ipsa_dst_cid; 2533 remote = sa->ipsa_src_cid; 2534 } 2535 ipsec_latch_ids(ipl, local, remote); 2536 } 2537 ipl->ipl_in_action = ii->ipsec_in_action; 2538 IPACT_REFHOLD(ipl->ipl_in_action); 2539 } 2540 2541 /* 2542 * Check whether the policy constraints are met either for an 2543 * inbound datagram; called from IP in numerous places. 2544 * 2545 * Note that this is not a chokepoint for inbound policy checks; 2546 * see also ipsec_check_ipsecin_latch() and ipsec_check_global_policy() 2547 */ 2548 mblk_t * 2549 ipsec_check_inbound_policy(mblk_t *first_mp, conn_t *connp, 2550 ipha_t *ipha, ip6_t *ip6h, boolean_t mctl_present) 2551 { 2552 ipsec_in_t *ii; 2553 boolean_t ret; 2554 mblk_t *mp = mctl_present ? first_mp->b_cont : first_mp; 2555 mblk_t *ipsec_mp = mctl_present ? first_mp : NULL; 2556 ipsec_latch_t *ipl; 2557 uint64_t unique_id; 2558 ipsec_stack_t *ipss; 2559 ip_stack_t *ipst; 2560 netstack_t *ns; 2561 2562 ASSERT(connp != NULL); 2563 ns = connp->conn_netstack; 2564 ipss = ns->netstack_ipsec; 2565 ipst = ns->netstack_ip; 2566 2567 if (ipsec_mp == NULL) { 2568 clear: 2569 /* 2570 * This is the case where the incoming datagram is 2571 * cleartext and we need to see whether this client 2572 * would like to receive such untrustworthy things from 2573 * the wire. 2574 */ 2575 ASSERT(mp != NULL); 2576 2577 mutex_enter(&connp->conn_lock); 2578 if (connp->conn_state_flags & CONN_CONDEMNED) { 2579 mutex_exit(&connp->conn_lock); 2580 ip_drop_packet(first_mp, B_TRUE, NULL, 2581 NULL, DROPPER(ipss, ipds_spd_got_clear), 2582 &ipss->ipsec_spd_dropper); 2583 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2584 return (NULL); 2585 } 2586 if ((ipl = connp->conn_latch) != NULL) { 2587 /* Hold a reference in case the conn is closing */ 2588 IPLATCH_REFHOLD(ipl); 2589 mutex_exit(&connp->conn_lock); 2590 /* 2591 * Policy is cached in the conn. 2592 */ 2593 if ((ipl->ipl_in_policy != NULL) && 2594 (!ipl->ipl_in_policy->ipsp_act->ipa_allow_clear)) { 2595 ret = ipsec_inbound_accept_clear(mp, 2596 ipha, ip6h); 2597 if (ret) { 2598 BUMP_MIB(&ipst->ips_ip_mib, 2599 ipsecInSucceeded); 2600 IPLATCH_REFRELE(ipl, ns); 2601 return (first_mp); 2602 } else { 2603 ipsec_log_policy_failure( 2604 IPSEC_POLICY_MISMATCH, 2605 "ipsec_check_inbound_policy", ipha, 2606 ip6h, B_FALSE, ns); 2607 ip_drop_packet(first_mp, B_TRUE, NULL, 2608 NULL, 2609 DROPPER(ipss, ipds_spd_got_clear), 2610 &ipss->ipsec_spd_dropper); 2611 BUMP_MIB(&ipst->ips_ip_mib, 2612 ipsecInFailed); 2613 IPLATCH_REFRELE(ipl, ns); 2614 return (NULL); 2615 } 2616 } else { 2617 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2618 IPLATCH_REFRELE(ipl, ns); 2619 return (first_mp); 2620 } 2621 } else { 2622 uchar_t db_type; 2623 2624 mutex_exit(&connp->conn_lock); 2625 /* 2626 * As this is a non-hardbound connection we need 2627 * to look at both per-socket policy and global 2628 * policy. As this is cleartext, mark the mp as 2629 * M_DATA in case if it is an ICMP error being 2630 * reported before calling ipsec_check_global_policy 2631 * so that it does not mistake it for IPSEC_IN. 2632 */ 2633 db_type = mp->b_datap->db_type; 2634 mp->b_datap->db_type = M_DATA; 2635 first_mp = ipsec_check_global_policy(first_mp, connp, 2636 ipha, ip6h, mctl_present, ns); 2637 if (first_mp != NULL) 2638 mp->b_datap->db_type = db_type; 2639 return (first_mp); 2640 } 2641 } 2642 /* 2643 * If it is inbound check whether the attached message 2644 * is secure or not. We have a special case for ICMP, 2645 * where we have a IPSEC_IN message and the attached 2646 * message is not secure. See icmp_inbound_error_fanout 2647 * for details. 2648 */ 2649 ASSERT(ipsec_mp != NULL); 2650 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 2651 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 2652 2653 if (!ii->ipsec_in_secure) 2654 goto clear; 2655 2656 /* 2657 * mp->b_cont could be either a M_CTL message 2658 * for icmp errors being sent up or a M_DATA message. 2659 */ 2660 ASSERT(mp->b_datap->db_type == M_CTL || mp->b_datap->db_type == M_DATA); 2661 2662 ASSERT(ii->ipsec_in_type == IPSEC_IN); 2663 2664 mutex_enter(&connp->conn_lock); 2665 /* Connection is closing */ 2666 if (connp->conn_state_flags & CONN_CONDEMNED) { 2667 mutex_exit(&connp->conn_lock); 2668 ip_drop_packet(first_mp, B_TRUE, NULL, 2669 NULL, DROPPER(ipss, ipds_spd_got_clear), 2670 &ipss->ipsec_spd_dropper); 2671 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2672 return (NULL); 2673 } 2674 2675 /* 2676 * Once a connection is latched it remains so for life, the conn_latch 2677 * pointer on the conn has not changed, simply initializing ipl here 2678 * as the earlier initialization was done only in the cleartext case. 2679 */ 2680 if ((ipl = connp->conn_latch) == NULL) { 2681 mutex_exit(&connp->conn_lock); 2682 /* 2683 * We don't have policies cached in the conn 2684 * for this stream. So, look at the global 2685 * policy. It will check against conn or global 2686 * depending on whichever is stronger. 2687 */ 2688 return (ipsec_check_global_policy(first_mp, connp, 2689 ipha, ip6h, mctl_present, ns)); 2690 } 2691 2692 IPLATCH_REFHOLD(ipl); 2693 mutex_exit(&connp->conn_lock); 2694 2695 if (ipl->ipl_in_action != NULL) { 2696 /* Policy is cached & latched; fast(er) path */ 2697 const char *reason; 2698 kstat_named_t *counter; 2699 2700 if (ipsec_check_ipsecin_latch(ii, mp, ipl, 2701 ipha, ip6h, &reason, &counter, connp)) { 2702 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2703 IPLATCH_REFRELE(ipl, ns); 2704 return (first_mp); 2705 } 2706 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, 2707 SL_ERROR|SL_WARN|SL_CONSOLE, 2708 "ipsec inbound policy mismatch: %s, packet dropped\n", 2709 reason); 2710 ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, 2711 &ipss->ipsec_spd_dropper); 2712 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2713 IPLATCH_REFRELE(ipl, ns); 2714 return (NULL); 2715 } else if (ipl->ipl_in_policy == NULL) { 2716 ipsec_weird_null_inbound_policy++; 2717 IPLATCH_REFRELE(ipl, ns); 2718 return (first_mp); 2719 } 2720 2721 unique_id = conn_to_unique(connp, mp, ipha, ip6h); 2722 IPPOL_REFHOLD(ipl->ipl_in_policy); 2723 first_mp = ipsec_check_ipsecin_policy(first_mp, ipl->ipl_in_policy, 2724 ipha, ip6h, unique_id, ns); 2725 /* 2726 * NOTE: ipsecIn{Failed,Succeeeded} bumped by 2727 * ipsec_check_ipsecin_policy(). 2728 */ 2729 if (first_mp != NULL) 2730 ipsec_latch_inbound(ipl, ii); 2731 IPLATCH_REFRELE(ipl, ns); 2732 return (first_mp); 2733 } 2734 2735 /* 2736 * Returns: 2737 * 2738 * SELRET_NOMEM --> msgpullup() needed to gather things failed. 2739 * SELRET_BADPKT --> If we're being called after tunnel-mode fragment 2740 * gathering, the initial fragment is too short for 2741 * useful data. Only returned if SEL_TUNNEL_FIRSTFRAG is 2742 * set. 2743 * SELRET_SUCCESS --> "sel" now has initialized IPsec selector data. 2744 * SELRET_TUNFRAG --> This is a fragment in a tunnel-mode packet. Caller 2745 * should put this packet in a fragment-gathering queue. 2746 * Only returned if SEL_TUNNEL_MODE and SEL_PORT_POLICY 2747 * is set. 2748 */ 2749 static selret_t 2750 ipsec_init_inbound_sel(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha, 2751 ip6_t *ip6h, uint8_t sel_flags) 2752 { 2753 uint16_t *ports; 2754 ushort_t hdr_len; 2755 int outer_hdr_len = 0; /* For ICMP tunnel-mode cases... */ 2756 mblk_t *spare_mp = NULL; 2757 uint8_t *nexthdrp; 2758 uint8_t nexthdr; 2759 uint8_t *typecode; 2760 uint8_t check_proto; 2761 ip6_pkt_t ipp; 2762 boolean_t port_policy_present = (sel_flags & SEL_PORT_POLICY); 2763 boolean_t is_icmp = (sel_flags & SEL_IS_ICMP); 2764 boolean_t tunnel_mode = (sel_flags & SEL_TUNNEL_MODE); 2765 2766 ASSERT((ipha == NULL && ip6h != NULL) || 2767 (ipha != NULL && ip6h == NULL)); 2768 2769 if (ip6h != NULL) { 2770 if (is_icmp) 2771 outer_hdr_len = ((uint8_t *)ip6h) - mp->b_rptr; 2772 2773 check_proto = IPPROTO_ICMPV6; 2774 sel->ips_isv4 = B_FALSE; 2775 sel->ips_local_addr_v6 = ip6h->ip6_dst; 2776 sel->ips_remote_addr_v6 = ip6h->ip6_src; 2777 2778 bzero(&ipp, sizeof (ipp)); 2779 (void) ip_find_hdr_v6(mp, ip6h, &ipp, NULL); 2780 2781 nexthdr = ip6h->ip6_nxt; 2782 switch (nexthdr) { 2783 case IPPROTO_HOPOPTS: 2784 case IPPROTO_ROUTING: 2785 case IPPROTO_DSTOPTS: 2786 case IPPROTO_FRAGMENT: 2787 /* 2788 * Use ip_hdr_length_nexthdr_v6(). And have a spare 2789 * mblk that's contiguous to feed it 2790 */ 2791 if ((spare_mp = msgpullup(mp, -1)) == NULL) 2792 return (SELRET_NOMEM); 2793 if (!ip_hdr_length_nexthdr_v6(spare_mp, 2794 (ip6_t *)(spare_mp->b_rptr + outer_hdr_len), 2795 &hdr_len, &nexthdrp)) { 2796 /* Malformed packet - caller frees. */ 2797 ipsec_freemsg_chain(spare_mp); 2798 return (SELRET_BADPKT); 2799 } 2800 nexthdr = *nexthdrp; 2801 /* We can just extract based on hdr_len now. */ 2802 break; 2803 default: 2804 hdr_len = IPV6_HDR_LEN; 2805 break; 2806 } 2807 2808 if (port_policy_present && IS_V6_FRAGMENT(ipp) && !is_icmp) { 2809 /* IPv6 Fragment */ 2810 ipsec_freemsg_chain(spare_mp); 2811 return (SELRET_TUNFRAG); 2812 } 2813 } else { 2814 if (is_icmp) 2815 outer_hdr_len = ((uint8_t *)ipha) - mp->b_rptr; 2816 check_proto = IPPROTO_ICMP; 2817 sel->ips_isv4 = B_TRUE; 2818 sel->ips_local_addr_v4 = ipha->ipha_dst; 2819 sel->ips_remote_addr_v4 = ipha->ipha_src; 2820 nexthdr = ipha->ipha_protocol; 2821 hdr_len = IPH_HDR_LENGTH(ipha); 2822 2823 if (port_policy_present && 2824 IS_V4_FRAGMENT(ipha->ipha_fragment_offset_and_flags) && 2825 !is_icmp) { 2826 /* IPv4 Fragment */ 2827 ipsec_freemsg_chain(spare_mp); 2828 return (SELRET_TUNFRAG); 2829 } 2830 2831 } 2832 sel->ips_protocol = nexthdr; 2833 2834 if ((nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP && 2835 nexthdr != IPPROTO_SCTP && nexthdr != check_proto) || 2836 (!port_policy_present && tunnel_mode)) { 2837 sel->ips_remote_port = sel->ips_local_port = 0; 2838 ipsec_freemsg_chain(spare_mp); 2839 return (SELRET_SUCCESS); 2840 } 2841 2842 if (&mp->b_rptr[hdr_len] + 4 > mp->b_wptr) { 2843 /* If we didn't pullup a copy already, do so now. */ 2844 /* 2845 * XXX performance, will upper-layers frequently split TCP/UDP 2846 * apart from IP or options? If so, perhaps we should revisit 2847 * the spare_mp strategy. 2848 */ 2849 ipsec_hdr_pullup_needed++; 2850 if (spare_mp == NULL && 2851 (spare_mp = msgpullup(mp, -1)) == NULL) { 2852 return (SELRET_NOMEM); 2853 } 2854 ports = (uint16_t *)&spare_mp->b_rptr[hdr_len + outer_hdr_len]; 2855 } else { 2856 ports = (uint16_t *)&mp->b_rptr[hdr_len + outer_hdr_len]; 2857 } 2858 2859 if (nexthdr == check_proto) { 2860 typecode = (uint8_t *)ports; 2861 sel->ips_icmp_type = *typecode++; 2862 sel->ips_icmp_code = *typecode; 2863 sel->ips_remote_port = sel->ips_local_port = 0; 2864 } else { 2865 sel->ips_remote_port = *ports++; 2866 sel->ips_local_port = *ports; 2867 } 2868 ipsec_freemsg_chain(spare_mp); 2869 return (SELRET_SUCCESS); 2870 } 2871 2872 static boolean_t 2873 ipsec_init_outbound_ports(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha, 2874 ip6_t *ip6h, int outer_hdr_len, ipsec_stack_t *ipss) 2875 { 2876 /* 2877 * XXX cut&paste shared with ipsec_init_inbound_sel 2878 */ 2879 uint16_t *ports; 2880 ushort_t hdr_len; 2881 mblk_t *spare_mp = NULL; 2882 uint8_t *nexthdrp; 2883 uint8_t nexthdr; 2884 uint8_t *typecode; 2885 uint8_t check_proto; 2886 2887 ASSERT((ipha == NULL && ip6h != NULL) || 2888 (ipha != NULL && ip6h == NULL)); 2889 2890 if (ip6h != NULL) { 2891 check_proto = IPPROTO_ICMPV6; 2892 nexthdr = ip6h->ip6_nxt; 2893 switch (nexthdr) { 2894 case IPPROTO_HOPOPTS: 2895 case IPPROTO_ROUTING: 2896 case IPPROTO_DSTOPTS: 2897 case IPPROTO_FRAGMENT: 2898 /* 2899 * Use ip_hdr_length_nexthdr_v6(). And have a spare 2900 * mblk that's contiguous to feed it 2901 */ 2902 spare_mp = msgpullup(mp, -1); 2903 if (spare_mp == NULL || 2904 !ip_hdr_length_nexthdr_v6(spare_mp, 2905 (ip6_t *)(spare_mp->b_rptr + outer_hdr_len), 2906 &hdr_len, &nexthdrp)) { 2907 /* Always works, even if NULL. */ 2908 ipsec_freemsg_chain(spare_mp); 2909 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 2910 DROPPER(ipss, ipds_spd_nomem), 2911 &ipss->ipsec_spd_dropper); 2912 return (B_FALSE); 2913 } else { 2914 nexthdr = *nexthdrp; 2915 /* We can just extract based on hdr_len now. */ 2916 } 2917 break; 2918 default: 2919 hdr_len = IPV6_HDR_LEN; 2920 break; 2921 } 2922 } else { 2923 check_proto = IPPROTO_ICMP; 2924 hdr_len = IPH_HDR_LENGTH(ipha); 2925 nexthdr = ipha->ipha_protocol; 2926 } 2927 2928 sel->ips_protocol = nexthdr; 2929 if (nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP && 2930 nexthdr != IPPROTO_SCTP && nexthdr != check_proto) { 2931 sel->ips_local_port = sel->ips_remote_port = 0; 2932 ipsec_freemsg_chain(spare_mp); /* Always works, even if NULL */ 2933 return (B_TRUE); 2934 } 2935 2936 if (&mp->b_rptr[hdr_len] + 4 + outer_hdr_len > mp->b_wptr) { 2937 /* If we didn't pullup a copy already, do so now. */ 2938 /* 2939 * XXX performance, will upper-layers frequently split TCP/UDP 2940 * apart from IP or options? If so, perhaps we should revisit 2941 * the spare_mp strategy. 2942 * 2943 * XXX should this be msgpullup(mp, hdr_len+4) ??? 2944 */ 2945 if (spare_mp == NULL && 2946 (spare_mp = msgpullup(mp, -1)) == NULL) { 2947 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 2948 DROPPER(ipss, ipds_spd_nomem), 2949 &ipss->ipsec_spd_dropper); 2950 return (B_FALSE); 2951 } 2952 ports = (uint16_t *)&spare_mp->b_rptr[hdr_len + outer_hdr_len]; 2953 } else { 2954 ports = (uint16_t *)&mp->b_rptr[hdr_len + outer_hdr_len]; 2955 } 2956 2957 if (nexthdr == check_proto) { 2958 typecode = (uint8_t *)ports; 2959 sel->ips_icmp_type = *typecode++; 2960 sel->ips_icmp_code = *typecode; 2961 sel->ips_remote_port = sel->ips_local_port = 0; 2962 } else { 2963 sel->ips_local_port = *ports++; 2964 sel->ips_remote_port = *ports; 2965 } 2966 ipsec_freemsg_chain(spare_mp); /* Always works, even if NULL */ 2967 return (B_TRUE); 2968 } 2969 2970 /* 2971 * Create an ipsec_action_t based on the way an inbound packet was protected. 2972 * Used to reflect traffic back to a sender. 2973 * 2974 * We don't bother interning the action into the hash table. 2975 */ 2976 ipsec_action_t * 2977 ipsec_in_to_out_action(ipsec_in_t *ii) 2978 { 2979 ipsa_t *ah_assoc, *esp_assoc; 2980 uint_t auth_alg = 0, encr_alg = 0, espa_alg = 0; 2981 ipsec_action_t *ap; 2982 boolean_t unique; 2983 2984 ap = kmem_cache_alloc(ipsec_action_cache, KM_NOSLEEP); 2985 2986 if (ap == NULL) 2987 return (NULL); 2988 2989 bzero(ap, sizeof (*ap)); 2990 HASH_NULL(ap, ipa_hash); 2991 ap->ipa_next = NULL; 2992 ap->ipa_refs = 1; 2993 2994 /* 2995 * Get the algorithms that were used for this packet. 2996 */ 2997 ap->ipa_act.ipa_type = IPSEC_ACT_APPLY; 2998 ap->ipa_act.ipa_log = 0; 2999 ah_assoc = ii->ipsec_in_ah_sa; 3000 ap->ipa_act.ipa_apply.ipp_use_ah = (ah_assoc != NULL); 3001 3002 esp_assoc = ii->ipsec_in_esp_sa; 3003 ap->ipa_act.ipa_apply.ipp_use_esp = (esp_assoc != NULL); 3004 3005 if (esp_assoc != NULL) { 3006 encr_alg = esp_assoc->ipsa_encr_alg; 3007 espa_alg = esp_assoc->ipsa_auth_alg; 3008 ap->ipa_act.ipa_apply.ipp_use_espa = (espa_alg != 0); 3009 } 3010 if (ah_assoc != NULL) 3011 auth_alg = ah_assoc->ipsa_auth_alg; 3012 3013 ap->ipa_act.ipa_apply.ipp_encr_alg = (uint8_t)encr_alg; 3014 ap->ipa_act.ipa_apply.ipp_auth_alg = (uint8_t)auth_alg; 3015 ap->ipa_act.ipa_apply.ipp_esp_auth_alg = (uint8_t)espa_alg; 3016 ap->ipa_act.ipa_apply.ipp_use_se = ii->ipsec_in_decaps; 3017 unique = B_FALSE; 3018 3019 if (esp_assoc != NULL) { 3020 ap->ipa_act.ipa_apply.ipp_espa_minbits = 3021 esp_assoc->ipsa_authkeybits; 3022 ap->ipa_act.ipa_apply.ipp_espa_maxbits = 3023 esp_assoc->ipsa_authkeybits; 3024 ap->ipa_act.ipa_apply.ipp_espe_minbits = 3025 esp_assoc->ipsa_encrkeybits; 3026 ap->ipa_act.ipa_apply.ipp_espe_maxbits = 3027 esp_assoc->ipsa_encrkeybits; 3028 ap->ipa_act.ipa_apply.ipp_km_proto = esp_assoc->ipsa_kmp; 3029 ap->ipa_act.ipa_apply.ipp_km_cookie = esp_assoc->ipsa_kmc; 3030 if (esp_assoc->ipsa_flags & IPSA_F_UNIQUE) 3031 unique = B_TRUE; 3032 } 3033 if (ah_assoc != NULL) { 3034 ap->ipa_act.ipa_apply.ipp_ah_minbits = 3035 ah_assoc->ipsa_authkeybits; 3036 ap->ipa_act.ipa_apply.ipp_ah_maxbits = 3037 ah_assoc->ipsa_authkeybits; 3038 ap->ipa_act.ipa_apply.ipp_km_proto = ah_assoc->ipsa_kmp; 3039 ap->ipa_act.ipa_apply.ipp_km_cookie = ah_assoc->ipsa_kmc; 3040 if (ah_assoc->ipsa_flags & IPSA_F_UNIQUE) 3041 unique = B_TRUE; 3042 } 3043 ap->ipa_act.ipa_apply.ipp_use_unique = unique; 3044 ap->ipa_want_unique = unique; 3045 ap->ipa_allow_clear = B_FALSE; 3046 ap->ipa_want_se = ii->ipsec_in_decaps; 3047 ap->ipa_want_ah = (ah_assoc != NULL); 3048 ap->ipa_want_esp = (esp_assoc != NULL); 3049 3050 ap->ipa_ovhd = ipsec_act_ovhd(&ap->ipa_act); 3051 3052 ap->ipa_act.ipa_apply.ipp_replay_depth = 0; /* don't care */ 3053 3054 return (ap); 3055 } 3056 3057 3058 /* 3059 * Compute the worst-case amount of extra space required by an action. 3060 * Note that, because of the ESP considerations listed below, this is 3061 * actually not the same as the best-case reduction in the MTU; in the 3062 * future, we should pass additional information to this function to 3063 * allow the actual MTU impact to be computed. 3064 * 3065 * AH: Revisit this if we implement algorithms with 3066 * a verifier size of more than 12 bytes. 3067 * 3068 * ESP: A more exact but more messy computation would take into 3069 * account the interaction between the cipher block size and the 3070 * effective MTU, yielding the inner payload size which reflects a 3071 * packet with *minimum* ESP padding.. 3072 */ 3073 int32_t 3074 ipsec_act_ovhd(const ipsec_act_t *act) 3075 { 3076 int32_t overhead = 0; 3077 3078 if (act->ipa_type == IPSEC_ACT_APPLY) { 3079 const ipsec_prot_t *ipp = &act->ipa_apply; 3080 3081 if (ipp->ipp_use_ah) 3082 overhead += IPSEC_MAX_AH_HDR_SIZE; 3083 if (ipp->ipp_use_esp) { 3084 overhead += IPSEC_MAX_ESP_HDR_SIZE; 3085 overhead += sizeof (struct udphdr); 3086 } 3087 if (ipp->ipp_use_se) 3088 overhead += IP_SIMPLE_HDR_LENGTH; 3089 } 3090 return (overhead); 3091 } 3092 3093 /* 3094 * This hash function is used only when creating policies and thus is not 3095 * performance-critical for packet flows. 3096 * 3097 * Future work: canonicalize the structures hashed with this (i.e., 3098 * zeroize padding) so the hash works correctly. 3099 */ 3100 /* ARGSUSED */ 3101 static uint32_t 3102 policy_hash(int size, const void *start, const void *end) 3103 { 3104 return (0); 3105 } 3106 3107 3108 /* 3109 * Hash function macros for each address type. 3110 * 3111 * The IPV6 hash function assumes that the low order 32-bits of the 3112 * address (typically containing the low order 24 bits of the mac 3113 * address) are reasonably well-distributed. Revisit this if we run 3114 * into trouble from lots of collisions on ::1 addresses and the like 3115 * (seems unlikely). 3116 */ 3117 #define IPSEC_IPV4_HASH(a, n) ((a) % (n)) 3118 #define IPSEC_IPV6_HASH(a, n) (((a).s6_addr32[3]) % (n)) 3119 3120 /* 3121 * These two hash functions should produce coordinated values 3122 * but have slightly different roles. 3123 */ 3124 static uint32_t 3125 selkey_hash(const ipsec_selkey_t *selkey, netstack_t *ns) 3126 { 3127 uint32_t valid = selkey->ipsl_valid; 3128 ipsec_stack_t *ipss = ns->netstack_ipsec; 3129 3130 if (!(valid & IPSL_REMOTE_ADDR)) 3131 return (IPSEC_SEL_NOHASH); 3132 3133 if (valid & IPSL_IPV4) { 3134 if (selkey->ipsl_remote_pfxlen == 32) { 3135 return (IPSEC_IPV4_HASH(selkey->ipsl_remote.ipsad_v4, 3136 ipss->ipsec_spd_hashsize)); 3137 } 3138 } 3139 if (valid & IPSL_IPV6) { 3140 if (selkey->ipsl_remote_pfxlen == 128) { 3141 return (IPSEC_IPV6_HASH(selkey->ipsl_remote.ipsad_v6, 3142 ipss->ipsec_spd_hashsize)); 3143 } 3144 } 3145 return (IPSEC_SEL_NOHASH); 3146 } 3147 3148 static uint32_t 3149 selector_hash(ipsec_selector_t *sel, ipsec_policy_root_t *root) 3150 { 3151 if (sel->ips_isv4) { 3152 return (IPSEC_IPV4_HASH(sel->ips_remote_addr_v4, 3153 root->ipr_nchains)); 3154 } 3155 return (IPSEC_IPV6_HASH(sel->ips_remote_addr_v6, root->ipr_nchains)); 3156 } 3157 3158 /* 3159 * Intern actions into the action hash table. 3160 */ 3161 ipsec_action_t * 3162 ipsec_act_find(const ipsec_act_t *a, int n, netstack_t *ns) 3163 { 3164 int i; 3165 uint32_t hval; 3166 ipsec_action_t *ap; 3167 ipsec_action_t *prev = NULL; 3168 int32_t overhead, maxovhd = 0; 3169 boolean_t allow_clear = B_FALSE; 3170 boolean_t want_ah = B_FALSE; 3171 boolean_t want_esp = B_FALSE; 3172 boolean_t want_se = B_FALSE; 3173 boolean_t want_unique = B_FALSE; 3174 ipsec_stack_t *ipss = ns->netstack_ipsec; 3175 3176 /* 3177 * TODO: should canonicalize a[] (i.e., zeroize any padding) 3178 * so we can use a non-trivial policy_hash function. 3179 */ 3180 for (i = n-1; i >= 0; i--) { 3181 hval = policy_hash(IPSEC_ACTION_HASH_SIZE, &a[i], &a[n]); 3182 3183 HASH_LOCK(ipss->ipsec_action_hash, hval); 3184 3185 for (HASH_ITERATE(ap, ipa_hash, 3186 ipss->ipsec_action_hash, hval)) { 3187 if (bcmp(&ap->ipa_act, &a[i], sizeof (*a)) != 0) 3188 continue; 3189 if (ap->ipa_next != prev) 3190 continue; 3191 break; 3192 } 3193 if (ap != NULL) { 3194 HASH_UNLOCK(ipss->ipsec_action_hash, hval); 3195 prev = ap; 3196 continue; 3197 } 3198 /* 3199 * need to allocate a new one.. 3200 */ 3201 ap = kmem_cache_alloc(ipsec_action_cache, KM_NOSLEEP); 3202 if (ap == NULL) { 3203 HASH_UNLOCK(ipss->ipsec_action_hash, hval); 3204 if (prev != NULL) 3205 ipsec_action_free(prev); 3206 return (NULL); 3207 } 3208 HASH_INSERT(ap, ipa_hash, ipss->ipsec_action_hash, hval); 3209 3210 ap->ipa_next = prev; 3211 ap->ipa_act = a[i]; 3212 3213 overhead = ipsec_act_ovhd(&a[i]); 3214 if (maxovhd < overhead) 3215 maxovhd = overhead; 3216 3217 if ((a[i].ipa_type == IPSEC_ACT_BYPASS) || 3218 (a[i].ipa_type == IPSEC_ACT_CLEAR)) 3219 allow_clear = B_TRUE; 3220 if (a[i].ipa_type == IPSEC_ACT_APPLY) { 3221 const ipsec_prot_t *ipp = &a[i].ipa_apply; 3222 3223 ASSERT(ipp->ipp_use_ah || ipp->ipp_use_esp); 3224 want_ah |= ipp->ipp_use_ah; 3225 want_esp |= ipp->ipp_use_esp; 3226 want_se |= ipp->ipp_use_se; 3227 want_unique |= ipp->ipp_use_unique; 3228 } 3229 ap->ipa_allow_clear = allow_clear; 3230 ap->ipa_want_ah = want_ah; 3231 ap->ipa_want_esp = want_esp; 3232 ap->ipa_want_se = want_se; 3233 ap->ipa_want_unique = want_unique; 3234 ap->ipa_refs = 1; /* from the hash table */ 3235 ap->ipa_ovhd = maxovhd; 3236 if (prev) 3237 prev->ipa_refs++; 3238 prev = ap; 3239 HASH_UNLOCK(ipss->ipsec_action_hash, hval); 3240 } 3241 3242 ap->ipa_refs++; /* caller's reference */ 3243 3244 return (ap); 3245 } 3246 3247 /* 3248 * Called when refcount goes to 0, indicating that all references to this 3249 * node are gone. 3250 * 3251 * This does not unchain the action from the hash table. 3252 */ 3253 void 3254 ipsec_action_free(ipsec_action_t *ap) 3255 { 3256 for (;;) { 3257 ipsec_action_t *np = ap->ipa_next; 3258 ASSERT(ap->ipa_refs == 0); 3259 ASSERT(ap->ipa_hash.hash_pp == NULL); 3260 kmem_cache_free(ipsec_action_cache, ap); 3261 ap = np; 3262 /* Inlined IPACT_REFRELE -- avoid recursion */ 3263 if (ap == NULL) 3264 break; 3265 membar_exit(); 3266 if (atomic_add_32_nv(&(ap)->ipa_refs, -1) != 0) 3267 break; 3268 /* End inlined IPACT_REFRELE */ 3269 } 3270 } 3271 3272 /* 3273 * Called when the action hash table goes away. 3274 * 3275 * The actions can be queued on an mblk with ipsec_in or 3276 * ipsec_out, hence the actions might still be around. 3277 * But we decrement ipa_refs here since we no longer have 3278 * a reference to the action from the hash table. 3279 */ 3280 static void 3281 ipsec_action_free_table(ipsec_action_t *ap) 3282 { 3283 while (ap != NULL) { 3284 ipsec_action_t *np = ap->ipa_next; 3285 3286 /* FIXME: remove? */ 3287 (void) printf("ipsec_action_free_table(%p) ref %d\n", 3288 (void *)ap, ap->ipa_refs); 3289 ASSERT(ap->ipa_refs > 0); 3290 IPACT_REFRELE(ap); 3291 ap = np; 3292 } 3293 } 3294 3295 /* 3296 * Need to walk all stack instances since the reclaim function 3297 * is global for all instances 3298 */ 3299 /* ARGSUSED */ 3300 static void 3301 ipsec_action_reclaim(void *arg) 3302 { 3303 netstack_handle_t nh; 3304 netstack_t *ns; 3305 3306 netstack_next_init(&nh); 3307 while ((ns = netstack_next(&nh)) != NULL) { 3308 ipsec_action_reclaim_stack(ns); 3309 netstack_rele(ns); 3310 } 3311 netstack_next_fini(&nh); 3312 } 3313 3314 /* 3315 * Periodically sweep action hash table for actions with refcount==1, and 3316 * nuke them. We cannot do this "on demand" (i.e., from IPACT_REFRELE) 3317 * because we can't close the race between another thread finding the action 3318 * in the hash table without holding the bucket lock during IPACT_REFRELE. 3319 * Instead, we run this function sporadically to clean up after ourselves; 3320 * we also set it as the "reclaim" function for the action kmem_cache. 3321 * 3322 * Note that it may take several passes of ipsec_action_gc() to free all 3323 * "stale" actions. 3324 */ 3325 static void 3326 ipsec_action_reclaim_stack(netstack_t *ns) 3327 { 3328 int i; 3329 ipsec_stack_t *ipss = ns->netstack_ipsec; 3330 3331 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) { 3332 ipsec_action_t *ap, *np; 3333 3334 /* skip the lock if nobody home */ 3335 if (ipss->ipsec_action_hash[i].hash_head == NULL) 3336 continue; 3337 3338 HASH_LOCK(ipss->ipsec_action_hash, i); 3339 for (ap = ipss->ipsec_action_hash[i].hash_head; 3340 ap != NULL; ap = np) { 3341 ASSERT(ap->ipa_refs > 0); 3342 np = ap->ipa_hash.hash_next; 3343 if (ap->ipa_refs > 1) 3344 continue; 3345 HASH_UNCHAIN(ap, ipa_hash, 3346 ipss->ipsec_action_hash, i); 3347 IPACT_REFRELE(ap); 3348 } 3349 HASH_UNLOCK(ipss->ipsec_action_hash, i); 3350 } 3351 } 3352 3353 /* 3354 * Intern a selector set into the selector set hash table. 3355 * This is simpler than the actions case.. 3356 */ 3357 static ipsec_sel_t * 3358 ipsec_find_sel(ipsec_selkey_t *selkey, netstack_t *ns) 3359 { 3360 ipsec_sel_t *sp; 3361 uint32_t hval, bucket; 3362 ipsec_stack_t *ipss = ns->netstack_ipsec; 3363 3364 /* 3365 * Exactly one AF bit should be set in selkey. 3366 */ 3367 ASSERT(!(selkey->ipsl_valid & IPSL_IPV4) ^ 3368 !(selkey->ipsl_valid & IPSL_IPV6)); 3369 3370 hval = selkey_hash(selkey, ns); 3371 /* Set pol_hval to uninitialized until we put it in a polhead. */ 3372 selkey->ipsl_sel_hval = hval; 3373 3374 bucket = (hval == IPSEC_SEL_NOHASH) ? 0 : hval; 3375 3376 ASSERT(!HASH_LOCKED(ipss->ipsec_sel_hash, bucket)); 3377 HASH_LOCK(ipss->ipsec_sel_hash, bucket); 3378 3379 for (HASH_ITERATE(sp, ipsl_hash, ipss->ipsec_sel_hash, bucket)) { 3380 if (bcmp(&sp->ipsl_key, selkey, 3381 offsetof(ipsec_selkey_t, ipsl_pol_hval)) == 0) 3382 break; 3383 } 3384 if (sp != NULL) { 3385 sp->ipsl_refs++; 3386 3387 HASH_UNLOCK(ipss->ipsec_sel_hash, bucket); 3388 return (sp); 3389 } 3390 3391 sp = kmem_cache_alloc(ipsec_sel_cache, KM_NOSLEEP); 3392 if (sp == NULL) { 3393 HASH_UNLOCK(ipss->ipsec_sel_hash, bucket); 3394 return (NULL); 3395 } 3396 3397 HASH_INSERT(sp, ipsl_hash, ipss->ipsec_sel_hash, bucket); 3398 sp->ipsl_refs = 2; /* one for hash table, one for caller */ 3399 sp->ipsl_key = *selkey; 3400 /* Set to uninitalized and have insertion into polhead fix things. */ 3401 if (selkey->ipsl_sel_hval != IPSEC_SEL_NOHASH) 3402 sp->ipsl_key.ipsl_pol_hval = 0; 3403 else 3404 sp->ipsl_key.ipsl_pol_hval = IPSEC_SEL_NOHASH; 3405 3406 HASH_UNLOCK(ipss->ipsec_sel_hash, bucket); 3407 3408 return (sp); 3409 } 3410 3411 static void 3412 ipsec_sel_rel(ipsec_sel_t **spp, netstack_t *ns) 3413 { 3414 ipsec_sel_t *sp = *spp; 3415 int hval = sp->ipsl_key.ipsl_sel_hval; 3416 ipsec_stack_t *ipss = ns->netstack_ipsec; 3417 3418 *spp = NULL; 3419 3420 if (hval == IPSEC_SEL_NOHASH) 3421 hval = 0; 3422 3423 ASSERT(!HASH_LOCKED(ipss->ipsec_sel_hash, hval)); 3424 HASH_LOCK(ipss->ipsec_sel_hash, hval); 3425 if (--sp->ipsl_refs == 1) { 3426 HASH_UNCHAIN(sp, ipsl_hash, ipss->ipsec_sel_hash, hval); 3427 sp->ipsl_refs--; 3428 HASH_UNLOCK(ipss->ipsec_sel_hash, hval); 3429 ASSERT(sp->ipsl_refs == 0); 3430 kmem_cache_free(ipsec_sel_cache, sp); 3431 /* Caller unlocks */ 3432 return; 3433 } 3434 3435 HASH_UNLOCK(ipss->ipsec_sel_hash, hval); 3436 } 3437 3438 /* 3439 * Free a policy rule which we know is no longer being referenced. 3440 */ 3441 void 3442 ipsec_policy_free(ipsec_policy_t *ipp, netstack_t *ns) 3443 { 3444 ASSERT(ipp->ipsp_refs == 0); 3445 ASSERT(ipp->ipsp_sel != NULL); 3446 ASSERT(ipp->ipsp_act != NULL); 3447 3448 ipsec_sel_rel(&ipp->ipsp_sel, ns); 3449 IPACT_REFRELE(ipp->ipsp_act); 3450 kmem_cache_free(ipsec_pol_cache, ipp); 3451 } 3452 3453 /* 3454 * Construction of new policy rules; construct a policy, and add it to 3455 * the appropriate tables. 3456 */ 3457 ipsec_policy_t * 3458 ipsec_policy_create(ipsec_selkey_t *keys, const ipsec_act_t *a, 3459 int nacts, int prio, uint64_t *index_ptr, netstack_t *ns) 3460 { 3461 ipsec_action_t *ap; 3462 ipsec_sel_t *sp; 3463 ipsec_policy_t *ipp; 3464 ipsec_stack_t *ipss = ns->netstack_ipsec; 3465 3466 if (index_ptr == NULL) 3467 index_ptr = &ipss->ipsec_next_policy_index; 3468 3469 ipp = kmem_cache_alloc(ipsec_pol_cache, KM_NOSLEEP); 3470 ap = ipsec_act_find(a, nacts, ns); 3471 sp = ipsec_find_sel(keys, ns); 3472 3473 if ((ap == NULL) || (sp == NULL) || (ipp == NULL)) { 3474 if (ap != NULL) { 3475 IPACT_REFRELE(ap); 3476 } 3477 if (sp != NULL) 3478 ipsec_sel_rel(&sp, ns); 3479 if (ipp != NULL) 3480 kmem_cache_free(ipsec_pol_cache, ipp); 3481 return (NULL); 3482 } 3483 3484 HASH_NULL(ipp, ipsp_hash); 3485 3486 ipp->ipsp_refs = 1; /* caller's reference */ 3487 ipp->ipsp_sel = sp; 3488 ipp->ipsp_act = ap; 3489 ipp->ipsp_prio = prio; /* rule priority */ 3490 ipp->ipsp_index = *index_ptr; 3491 (*index_ptr)++; 3492 3493 return (ipp); 3494 } 3495 3496 static void 3497 ipsec_update_present_flags(ipsec_stack_t *ipss) 3498 { 3499 boolean_t hashpol; 3500 3501 hashpol = (avl_numnodes(&ipss->ipsec_system_policy.iph_rulebyid) > 0); 3502 3503 if (hashpol) { 3504 ipss->ipsec_outbound_v4_policy_present = B_TRUE; 3505 ipss->ipsec_outbound_v6_policy_present = B_TRUE; 3506 ipss->ipsec_inbound_v4_policy_present = B_TRUE; 3507 ipss->ipsec_inbound_v6_policy_present = B_TRUE; 3508 return; 3509 } 3510 3511 ipss->ipsec_outbound_v4_policy_present = (NULL != 3512 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_OUTBOUND]. 3513 ipr_nonhash[IPSEC_AF_V4]); 3514 ipss->ipsec_outbound_v6_policy_present = (NULL != 3515 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_OUTBOUND]. 3516 ipr_nonhash[IPSEC_AF_V6]); 3517 ipss->ipsec_inbound_v4_policy_present = (NULL != 3518 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_INBOUND]. 3519 ipr_nonhash[IPSEC_AF_V4]); 3520 ipss->ipsec_inbound_v6_policy_present = (NULL != 3521 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_INBOUND]. 3522 ipr_nonhash[IPSEC_AF_V6]); 3523 } 3524 3525 boolean_t 3526 ipsec_policy_delete(ipsec_policy_head_t *php, ipsec_selkey_t *keys, int dir, 3527 netstack_t *ns) 3528 { 3529 ipsec_sel_t *sp; 3530 ipsec_policy_t *ip, *nip, *head; 3531 int af; 3532 ipsec_policy_root_t *pr = &php->iph_root[dir]; 3533 3534 sp = ipsec_find_sel(keys, ns); 3535 3536 if (sp == NULL) 3537 return (B_FALSE); 3538 3539 af = (sp->ipsl_key.ipsl_valid & IPSL_IPV4) ? IPSEC_AF_V4 : IPSEC_AF_V6; 3540 3541 rw_enter(&php->iph_lock, RW_WRITER); 3542 3543 if (sp->ipsl_key.ipsl_pol_hval == IPSEC_SEL_NOHASH) { 3544 head = pr->ipr_nonhash[af]; 3545 } else { 3546 head = pr->ipr_hash[sp->ipsl_key.ipsl_pol_hval].hash_head; 3547 } 3548 3549 for (ip = head; ip != NULL; ip = nip) { 3550 nip = ip->ipsp_hash.hash_next; 3551 if (ip->ipsp_sel != sp) { 3552 continue; 3553 } 3554 3555 IPPOL_UNCHAIN(php, ip, ns); 3556 3557 php->iph_gen++; 3558 ipsec_update_present_flags(ns->netstack_ipsec); 3559 3560 rw_exit(&php->iph_lock); 3561 3562 ipsec_sel_rel(&sp, ns); 3563 3564 return (B_TRUE); 3565 } 3566 3567 rw_exit(&php->iph_lock); 3568 ipsec_sel_rel(&sp, ns); 3569 return (B_FALSE); 3570 } 3571 3572 int 3573 ipsec_policy_delete_index(ipsec_policy_head_t *php, uint64_t policy_index, 3574 netstack_t *ns) 3575 { 3576 boolean_t found = B_FALSE; 3577 ipsec_policy_t ipkey; 3578 ipsec_policy_t *ip; 3579 avl_index_t where; 3580 3581 (void) memset(&ipkey, 0, sizeof (ipkey)); 3582 ipkey.ipsp_index = policy_index; 3583 3584 rw_enter(&php->iph_lock, RW_WRITER); 3585 3586 /* 3587 * We could be cleverer here about the walk. 3588 * but well, (k+1)*log(N) will do for now (k==number of matches, 3589 * N==number of table entries 3590 */ 3591 for (;;) { 3592 ip = (ipsec_policy_t *)avl_find(&php->iph_rulebyid, 3593 (void *)&ipkey, &where); 3594 ASSERT(ip == NULL); 3595 3596 ip = avl_nearest(&php->iph_rulebyid, where, AVL_AFTER); 3597 3598 if (ip == NULL) 3599 break; 3600 3601 if (ip->ipsp_index != policy_index) { 3602 ASSERT(ip->ipsp_index > policy_index); 3603 break; 3604 } 3605 3606 IPPOL_UNCHAIN(php, ip, ns); 3607 found = B_TRUE; 3608 } 3609 3610 if (found) { 3611 php->iph_gen++; 3612 ipsec_update_present_flags(ns->netstack_ipsec); 3613 } 3614 3615 rw_exit(&php->iph_lock); 3616 3617 return (found ? 0 : ENOENT); 3618 } 3619 3620 /* 3621 * Given a constructed ipsec_policy_t policy rule, see if it can be entered 3622 * into the correct policy ruleset. As a side-effect, it sets the hash 3623 * entries on "ipp"'s ipsp_pol_hval. 3624 * 3625 * Returns B_TRUE if it can be entered, B_FALSE if it can't be (because a 3626 * duplicate policy exists with exactly the same selectors), or an icmp 3627 * rule exists with a different encryption/authentication action. 3628 */ 3629 boolean_t 3630 ipsec_check_policy(ipsec_policy_head_t *php, ipsec_policy_t *ipp, int direction) 3631 { 3632 ipsec_policy_root_t *pr = &php->iph_root[direction]; 3633 int af = -1; 3634 ipsec_policy_t *p2, *head; 3635 uint8_t check_proto; 3636 ipsec_selkey_t *selkey = &ipp->ipsp_sel->ipsl_key; 3637 uint32_t valid = selkey->ipsl_valid; 3638 3639 if (valid & IPSL_IPV6) { 3640 ASSERT(!(valid & IPSL_IPV4)); 3641 af = IPSEC_AF_V6; 3642 check_proto = IPPROTO_ICMPV6; 3643 } else { 3644 ASSERT(valid & IPSL_IPV4); 3645 af = IPSEC_AF_V4; 3646 check_proto = IPPROTO_ICMP; 3647 } 3648 3649 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3650 3651 /* 3652 * Double-check that we don't have any duplicate selectors here. 3653 * Because selectors are interned below, we need only compare pointers 3654 * for equality. 3655 */ 3656 if (selkey->ipsl_sel_hval == IPSEC_SEL_NOHASH) { 3657 head = pr->ipr_nonhash[af]; 3658 } else { 3659 selkey->ipsl_pol_hval = 3660 (selkey->ipsl_valid & IPSL_IPV4) ? 3661 IPSEC_IPV4_HASH(selkey->ipsl_remote.ipsad_v4, 3662 pr->ipr_nchains) : 3663 IPSEC_IPV6_HASH(selkey->ipsl_remote.ipsad_v6, 3664 pr->ipr_nchains); 3665 3666 head = pr->ipr_hash[selkey->ipsl_pol_hval].hash_head; 3667 } 3668 3669 for (p2 = head; p2 != NULL; p2 = p2->ipsp_hash.hash_next) { 3670 if (p2->ipsp_sel == ipp->ipsp_sel) 3671 return (B_FALSE); 3672 } 3673 3674 /* 3675 * If it's ICMP and not a drop or pass rule, run through the ICMP 3676 * rules and make sure the action is either new or the same as any 3677 * other actions. We don't have to check the full chain because 3678 * discard and bypass will override all other actions 3679 */ 3680 3681 if (valid & IPSL_PROTOCOL && 3682 selkey->ipsl_proto == check_proto && 3683 (ipp->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_APPLY)) { 3684 3685 for (p2 = head; p2 != NULL; p2 = p2->ipsp_hash.hash_next) { 3686 3687 if (p2->ipsp_sel->ipsl_key.ipsl_valid & IPSL_PROTOCOL && 3688 p2->ipsp_sel->ipsl_key.ipsl_proto == check_proto && 3689 (p2->ipsp_act->ipa_act.ipa_type == 3690 IPSEC_ACT_APPLY)) { 3691 return (ipsec_compare_action(p2, ipp)); 3692 } 3693 } 3694 } 3695 3696 return (B_TRUE); 3697 } 3698 3699 /* 3700 * compare the action chains of two policies for equality 3701 * B_TRUE -> effective equality 3702 */ 3703 3704 static boolean_t 3705 ipsec_compare_action(ipsec_policy_t *p1, ipsec_policy_t *p2) 3706 { 3707 3708 ipsec_action_t *act1, *act2; 3709 3710 /* We have a valid rule. Let's compare the actions */ 3711 if (p1->ipsp_act == p2->ipsp_act) { 3712 /* same action. We are good */ 3713 return (B_TRUE); 3714 } 3715 3716 /* we have to walk the chain */ 3717 3718 act1 = p1->ipsp_act; 3719 act2 = p2->ipsp_act; 3720 3721 while (act1 != NULL && act2 != NULL) { 3722 3723 /* otherwise, Are we close enough? */ 3724 if (act1->ipa_allow_clear != act2->ipa_allow_clear || 3725 act1->ipa_want_ah != act2->ipa_want_ah || 3726 act1->ipa_want_esp != act2->ipa_want_esp || 3727 act1->ipa_want_se != act2->ipa_want_se) { 3728 /* Nope, we aren't */ 3729 return (B_FALSE); 3730 } 3731 3732 if (act1->ipa_want_ah) { 3733 if (act1->ipa_act.ipa_apply.ipp_auth_alg != 3734 act2->ipa_act.ipa_apply.ipp_auth_alg) { 3735 return (B_FALSE); 3736 } 3737 3738 if (act1->ipa_act.ipa_apply.ipp_ah_minbits != 3739 act2->ipa_act.ipa_apply.ipp_ah_minbits || 3740 act1->ipa_act.ipa_apply.ipp_ah_maxbits != 3741 act2->ipa_act.ipa_apply.ipp_ah_maxbits) { 3742 return (B_FALSE); 3743 } 3744 } 3745 3746 if (act1->ipa_want_esp) { 3747 if (act1->ipa_act.ipa_apply.ipp_use_esp != 3748 act2->ipa_act.ipa_apply.ipp_use_esp || 3749 act1->ipa_act.ipa_apply.ipp_use_espa != 3750 act2->ipa_act.ipa_apply.ipp_use_espa) { 3751 return (B_FALSE); 3752 } 3753 3754 if (act1->ipa_act.ipa_apply.ipp_use_esp) { 3755 if (act1->ipa_act.ipa_apply.ipp_encr_alg != 3756 act2->ipa_act.ipa_apply.ipp_encr_alg) { 3757 return (B_FALSE); 3758 } 3759 3760 if (act1->ipa_act.ipa_apply.ipp_espe_minbits != 3761 act2->ipa_act.ipa_apply.ipp_espe_minbits || 3762 act1->ipa_act.ipa_apply.ipp_espe_maxbits != 3763 act2->ipa_act.ipa_apply.ipp_espe_maxbits) { 3764 return (B_FALSE); 3765 } 3766 } 3767 3768 if (act1->ipa_act.ipa_apply.ipp_use_espa) { 3769 if (act1->ipa_act.ipa_apply.ipp_esp_auth_alg != 3770 act2->ipa_act.ipa_apply.ipp_esp_auth_alg) { 3771 return (B_FALSE); 3772 } 3773 3774 if (act1->ipa_act.ipa_apply.ipp_espa_minbits != 3775 act2->ipa_act.ipa_apply.ipp_espa_minbits || 3776 act1->ipa_act.ipa_apply.ipp_espa_maxbits != 3777 act2->ipa_act.ipa_apply.ipp_espa_maxbits) { 3778 return (B_FALSE); 3779 } 3780 } 3781 3782 } 3783 3784 act1 = act1->ipa_next; 3785 act2 = act2->ipa_next; 3786 } 3787 3788 if (act1 != NULL || act2 != NULL) { 3789 return (B_FALSE); 3790 } 3791 3792 return (B_TRUE); 3793 } 3794 3795 3796 /* 3797 * Given a constructed ipsec_policy_t policy rule, enter it into 3798 * the correct policy ruleset. 3799 * 3800 * ipsec_check_policy() is assumed to have succeeded first (to check for 3801 * duplicates). 3802 */ 3803 void 3804 ipsec_enter_policy(ipsec_policy_head_t *php, ipsec_policy_t *ipp, int direction, 3805 netstack_t *ns) 3806 { 3807 ipsec_policy_root_t *pr = &php->iph_root[direction]; 3808 ipsec_selkey_t *selkey = &ipp->ipsp_sel->ipsl_key; 3809 uint32_t valid = selkey->ipsl_valid; 3810 uint32_t hval = selkey->ipsl_pol_hval; 3811 int af = -1; 3812 3813 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3814 3815 if (valid & IPSL_IPV6) { 3816 ASSERT(!(valid & IPSL_IPV4)); 3817 af = IPSEC_AF_V6; 3818 } else { 3819 ASSERT(valid & IPSL_IPV4); 3820 af = IPSEC_AF_V4; 3821 } 3822 3823 php->iph_gen++; 3824 3825 if (hval == IPSEC_SEL_NOHASH) { 3826 HASHLIST_INSERT(ipp, ipsp_hash, pr->ipr_nonhash[af]); 3827 } else { 3828 HASH_LOCK(pr->ipr_hash, hval); 3829 HASH_INSERT(ipp, ipsp_hash, pr->ipr_hash, hval); 3830 HASH_UNLOCK(pr->ipr_hash, hval); 3831 } 3832 3833 ipsec_insert_always(&php->iph_rulebyid, ipp); 3834 3835 ipsec_update_present_flags(ns->netstack_ipsec); 3836 } 3837 3838 static void 3839 ipsec_ipr_flush(ipsec_policy_head_t *php, ipsec_policy_root_t *ipr, 3840 netstack_t *ns) 3841 { 3842 ipsec_policy_t *ip, *nip; 3843 int af, chain, nchain; 3844 3845 for (af = 0; af < IPSEC_NAF; af++) { 3846 for (ip = ipr->ipr_nonhash[af]; ip != NULL; ip = nip) { 3847 nip = ip->ipsp_hash.hash_next; 3848 IPPOL_UNCHAIN(php, ip, ns); 3849 } 3850 ipr->ipr_nonhash[af] = NULL; 3851 } 3852 nchain = ipr->ipr_nchains; 3853 3854 for (chain = 0; chain < nchain; chain++) { 3855 for (ip = ipr->ipr_hash[chain].hash_head; ip != NULL; 3856 ip = nip) { 3857 nip = ip->ipsp_hash.hash_next; 3858 IPPOL_UNCHAIN(php, ip, ns); 3859 } 3860 ipr->ipr_hash[chain].hash_head = NULL; 3861 } 3862 } 3863 3864 void 3865 ipsec_polhead_flush(ipsec_policy_head_t *php, netstack_t *ns) 3866 { 3867 int dir; 3868 3869 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3870 3871 for (dir = 0; dir < IPSEC_NTYPES; dir++) 3872 ipsec_ipr_flush(php, &php->iph_root[dir], ns); 3873 3874 ipsec_update_present_flags(ns->netstack_ipsec); 3875 } 3876 3877 void 3878 ipsec_polhead_free(ipsec_policy_head_t *php, netstack_t *ns) 3879 { 3880 int dir; 3881 3882 ASSERT(php->iph_refs == 0); 3883 3884 rw_enter(&php->iph_lock, RW_WRITER); 3885 ipsec_polhead_flush(php, ns); 3886 rw_exit(&php->iph_lock); 3887 rw_destroy(&php->iph_lock); 3888 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 3889 ipsec_policy_root_t *ipr = &php->iph_root[dir]; 3890 int chain; 3891 3892 for (chain = 0; chain < ipr->ipr_nchains; chain++) 3893 mutex_destroy(&(ipr->ipr_hash[chain].hash_lock)); 3894 3895 } 3896 ipsec_polhead_free_table(php); 3897 kmem_free(php, sizeof (*php)); 3898 } 3899 3900 static void 3901 ipsec_ipr_init(ipsec_policy_root_t *ipr) 3902 { 3903 int af; 3904 3905 ipr->ipr_nchains = 0; 3906 ipr->ipr_hash = NULL; 3907 3908 for (af = 0; af < IPSEC_NAF; af++) { 3909 ipr->ipr_nonhash[af] = NULL; 3910 } 3911 } 3912 3913 ipsec_policy_head_t * 3914 ipsec_polhead_create(void) 3915 { 3916 ipsec_policy_head_t *php; 3917 3918 php = kmem_alloc(sizeof (*php), KM_NOSLEEP); 3919 if (php == NULL) 3920 return (php); 3921 3922 rw_init(&php->iph_lock, NULL, RW_DEFAULT, NULL); 3923 php->iph_refs = 1; 3924 php->iph_gen = 0; 3925 3926 ipsec_ipr_init(&php->iph_root[IPSEC_TYPE_INBOUND]); 3927 ipsec_ipr_init(&php->iph_root[IPSEC_TYPE_OUTBOUND]); 3928 3929 avl_create(&php->iph_rulebyid, ipsec_policy_cmpbyid, 3930 sizeof (ipsec_policy_t), offsetof(ipsec_policy_t, ipsp_byid)); 3931 3932 return (php); 3933 } 3934 3935 /* 3936 * Clone the policy head into a new polhead; release one reference to the 3937 * old one and return the only reference to the new one. 3938 * If the old one had a refcount of 1, just return it. 3939 */ 3940 ipsec_policy_head_t * 3941 ipsec_polhead_split(ipsec_policy_head_t *php, netstack_t *ns) 3942 { 3943 ipsec_policy_head_t *nphp; 3944 3945 if (php == NULL) 3946 return (ipsec_polhead_create()); 3947 else if (php->iph_refs == 1) 3948 return (php); 3949 3950 nphp = ipsec_polhead_create(); 3951 if (nphp == NULL) 3952 return (NULL); 3953 3954 if (ipsec_copy_polhead(php, nphp, ns) != 0) { 3955 ipsec_polhead_free(nphp, ns); 3956 return (NULL); 3957 } 3958 IPPH_REFRELE(php, ns); 3959 return (nphp); 3960 } 3961 3962 /* 3963 * When sending a response to a ICMP request or generating a RST 3964 * in the TCP case, the outbound packets need to go at the same level 3965 * of protection as the incoming ones i.e we associate our outbound 3966 * policy with how the packet came in. We call this after we have 3967 * accepted the incoming packet which may or may not have been in 3968 * clear and hence we are sending the reply back with the policy 3969 * matching the incoming datagram's policy. 3970 * 3971 * NOTE : This technology serves two purposes : 3972 * 3973 * 1) If we have multiple outbound policies, we send out a reply 3974 * matching with how it came in rather than matching the outbound 3975 * policy. 3976 * 3977 * 2) For assymetric policies, we want to make sure that incoming 3978 * and outgoing has the same level of protection. Assymetric 3979 * policies exist only with global policy where we may not have 3980 * both outbound and inbound at the same time. 3981 * 3982 * NOTE2: This function is called by cleartext cases, so it needs to be 3983 * in IP proper. 3984 */ 3985 boolean_t 3986 ipsec_in_to_out(mblk_t *ipsec_mp, ipha_t *ipha, ip6_t *ip6h) 3987 { 3988 ipsec_in_t *ii; 3989 ipsec_out_t *io; 3990 boolean_t v4; 3991 mblk_t *mp; 3992 boolean_t secure, attach_if; 3993 uint_t ifindex; 3994 ipsec_selector_t sel; 3995 ipsec_action_t *reflect_action = NULL; 3996 zoneid_t zoneid; 3997 netstack_t *ns; 3998 3999 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 4000 4001 bzero((void*)&sel, sizeof (sel)); 4002 4003 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 4004 4005 mp = ipsec_mp->b_cont; 4006 ASSERT(mp != NULL); 4007 4008 if (ii->ipsec_in_action != NULL) { 4009 /* transfer reference.. */ 4010 reflect_action = ii->ipsec_in_action; 4011 ii->ipsec_in_action = NULL; 4012 } else if (!ii->ipsec_in_loopback) 4013 reflect_action = ipsec_in_to_out_action(ii); 4014 secure = ii->ipsec_in_secure; 4015 attach_if = ii->ipsec_in_attach_if; 4016 ifindex = ii->ipsec_in_ill_index; 4017 zoneid = ii->ipsec_in_zoneid; 4018 ASSERT(zoneid != ALL_ZONES); 4019 ns = ii->ipsec_in_ns; 4020 v4 = ii->ipsec_in_v4; 4021 4022 ipsec_in_release_refs(ii); /* No netstack_rele/hold needed */ 4023 4024 /* 4025 * The caller is going to send the datagram out which might 4026 * go on the wire or delivered locally through ip_wput_local. 4027 * 4028 * 1) If it goes out on the wire, new associations will be 4029 * obtained. 4030 * 2) If it is delivered locally, ip_wput_local will convert 4031 * this IPSEC_OUT to a IPSEC_IN looking at the requests. 4032 */ 4033 4034 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4035 bzero(io, sizeof (ipsec_out_t)); 4036 io->ipsec_out_type = IPSEC_OUT; 4037 io->ipsec_out_len = sizeof (ipsec_out_t); 4038 io->ipsec_out_frtn.free_func = ipsec_out_free; 4039 io->ipsec_out_frtn.free_arg = (char *)io; 4040 io->ipsec_out_act = reflect_action; 4041 4042 if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0, 4043 ns->netstack_ipsec)) 4044 return (B_FALSE); 4045 4046 io->ipsec_out_src_port = sel.ips_local_port; 4047 io->ipsec_out_dst_port = sel.ips_remote_port; 4048 io->ipsec_out_proto = sel.ips_protocol; 4049 io->ipsec_out_icmp_type = sel.ips_icmp_type; 4050 io->ipsec_out_icmp_code = sel.ips_icmp_code; 4051 4052 /* 4053 * Don't use global policy for this, as we want 4054 * to use the same protection that was applied to the inbound packet. 4055 */ 4056 io->ipsec_out_use_global_policy = B_FALSE; 4057 io->ipsec_out_proc_begin = B_FALSE; 4058 io->ipsec_out_secure = secure; 4059 io->ipsec_out_v4 = v4; 4060 io->ipsec_out_attach_if = attach_if; 4061 io->ipsec_out_ill_index = ifindex; 4062 io->ipsec_out_zoneid = zoneid; 4063 io->ipsec_out_ns = ns; /* No netstack_hold */ 4064 4065 return (B_TRUE); 4066 } 4067 4068 mblk_t * 4069 ipsec_in_tag(mblk_t *mp, mblk_t *cont, netstack_t *ns) 4070 { 4071 ipsec_in_t *ii = (ipsec_in_t *)mp->b_rptr; 4072 ipsec_in_t *nii; 4073 mblk_t *nmp; 4074 frtn_t nfrtn; 4075 ipsec_stack_t *ipss = ns->netstack_ipsec; 4076 4077 ASSERT(ii->ipsec_in_type == IPSEC_IN); 4078 ASSERT(ii->ipsec_in_len == sizeof (ipsec_in_t)); 4079 4080 nmp = ipsec_in_alloc(ii->ipsec_in_v4, ns); 4081 if (nmp == NULL) { 4082 ip_drop_packet_chain(cont, B_FALSE, NULL, NULL, 4083 DROPPER(ipss, ipds_spd_nomem), 4084 &ipss->ipsec_spd_dropper); 4085 return (NULL); 4086 } 4087 4088 ASSERT(nmp->b_datap->db_type == M_CTL); 4089 ASSERT(nmp->b_wptr == (nmp->b_rptr + sizeof (ipsec_info_t))); 4090 4091 /* 4092 * Bump refcounts. 4093 */ 4094 if (ii->ipsec_in_ah_sa != NULL) 4095 IPSA_REFHOLD(ii->ipsec_in_ah_sa); 4096 if (ii->ipsec_in_esp_sa != NULL) 4097 IPSA_REFHOLD(ii->ipsec_in_esp_sa); 4098 if (ii->ipsec_in_policy != NULL) 4099 IPPH_REFHOLD(ii->ipsec_in_policy); 4100 4101 /* 4102 * Copy everything, but preserve the free routine provided by 4103 * ipsec_in_alloc(). 4104 */ 4105 nii = (ipsec_in_t *)nmp->b_rptr; 4106 nfrtn = nii->ipsec_in_frtn; 4107 bcopy(ii, nii, sizeof (*ii)); 4108 nii->ipsec_in_frtn = nfrtn; 4109 4110 nmp->b_cont = cont; 4111 4112 return (nmp); 4113 } 4114 4115 mblk_t * 4116 ipsec_out_tag(mblk_t *mp, mblk_t *cont, netstack_t *ns) 4117 { 4118 ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; 4119 ipsec_out_t *nio; 4120 mblk_t *nmp; 4121 frtn_t nfrtn; 4122 ipsec_stack_t *ipss = ns->netstack_ipsec; 4123 4124 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4125 ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t)); 4126 4127 nmp = ipsec_alloc_ipsec_out(ns); 4128 if (nmp == NULL) { 4129 ip_drop_packet_chain(cont, B_FALSE, NULL, NULL, 4130 DROPPER(ipss, ipds_spd_nomem), 4131 &ipss->ipsec_spd_dropper); 4132 return (NULL); 4133 } 4134 ASSERT(nmp->b_datap->db_type == M_CTL); 4135 ASSERT(nmp->b_wptr == (nmp->b_rptr + sizeof (ipsec_info_t))); 4136 4137 /* 4138 * Bump refcounts. 4139 */ 4140 if (io->ipsec_out_ah_sa != NULL) 4141 IPSA_REFHOLD(io->ipsec_out_ah_sa); 4142 if (io->ipsec_out_esp_sa != NULL) 4143 IPSA_REFHOLD(io->ipsec_out_esp_sa); 4144 if (io->ipsec_out_polhead != NULL) 4145 IPPH_REFHOLD(io->ipsec_out_polhead); 4146 if (io->ipsec_out_policy != NULL) 4147 IPPOL_REFHOLD(io->ipsec_out_policy); 4148 if (io->ipsec_out_act != NULL) 4149 IPACT_REFHOLD(io->ipsec_out_act); 4150 if (io->ipsec_out_latch != NULL) 4151 IPLATCH_REFHOLD(io->ipsec_out_latch); 4152 if (io->ipsec_out_cred != NULL) 4153 crhold(io->ipsec_out_cred); 4154 4155 /* 4156 * Copy everything, but preserve the free routine provided by 4157 * ipsec_alloc_ipsec_out(). 4158 */ 4159 nio = (ipsec_out_t *)nmp->b_rptr; 4160 nfrtn = nio->ipsec_out_frtn; 4161 bcopy(io, nio, sizeof (*io)); 4162 nio->ipsec_out_frtn = nfrtn; 4163 4164 nmp->b_cont = cont; 4165 4166 return (nmp); 4167 } 4168 4169 static void 4170 ipsec_out_release_refs(ipsec_out_t *io) 4171 { 4172 netstack_t *ns = io->ipsec_out_ns; 4173 4174 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4175 ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t)); 4176 ASSERT(io->ipsec_out_ns != NULL); 4177 4178 /* Note: IPSA_REFRELE is multi-line macro */ 4179 if (io->ipsec_out_ah_sa != NULL) 4180 IPSA_REFRELE(io->ipsec_out_ah_sa); 4181 if (io->ipsec_out_esp_sa != NULL) 4182 IPSA_REFRELE(io->ipsec_out_esp_sa); 4183 if (io->ipsec_out_polhead != NULL) 4184 IPPH_REFRELE(io->ipsec_out_polhead, ns); 4185 if (io->ipsec_out_policy != NULL) 4186 IPPOL_REFRELE(io->ipsec_out_policy, ns); 4187 if (io->ipsec_out_act != NULL) 4188 IPACT_REFRELE(io->ipsec_out_act); 4189 if (io->ipsec_out_cred != NULL) { 4190 crfree(io->ipsec_out_cred); 4191 io->ipsec_out_cred = NULL; 4192 } 4193 if (io->ipsec_out_latch) { 4194 IPLATCH_REFRELE(io->ipsec_out_latch, ns); 4195 io->ipsec_out_latch = NULL; 4196 } 4197 } 4198 4199 static void 4200 ipsec_out_free(void *arg) 4201 { 4202 ipsec_out_t *io = (ipsec_out_t *)arg; 4203 ipsec_out_release_refs(io); 4204 kmem_cache_free(ipsec_info_cache, arg); 4205 } 4206 4207 static void 4208 ipsec_in_release_refs(ipsec_in_t *ii) 4209 { 4210 netstack_t *ns = ii->ipsec_in_ns; 4211 4212 ASSERT(ii->ipsec_in_ns != NULL); 4213 4214 /* Note: IPSA_REFRELE is multi-line macro */ 4215 if (ii->ipsec_in_ah_sa != NULL) 4216 IPSA_REFRELE(ii->ipsec_in_ah_sa); 4217 if (ii->ipsec_in_esp_sa != NULL) 4218 IPSA_REFRELE(ii->ipsec_in_esp_sa); 4219 if (ii->ipsec_in_policy != NULL) 4220 IPPH_REFRELE(ii->ipsec_in_policy, ns); 4221 if (ii->ipsec_in_da != NULL) { 4222 freeb(ii->ipsec_in_da); 4223 ii->ipsec_in_da = NULL; 4224 } 4225 } 4226 4227 static void 4228 ipsec_in_free(void *arg) 4229 { 4230 ipsec_in_t *ii = (ipsec_in_t *)arg; 4231 ipsec_in_release_refs(ii); 4232 kmem_cache_free(ipsec_info_cache, arg); 4233 } 4234 4235 /* 4236 * This is called only for outbound datagrams if the datagram needs to 4237 * go out secure. A NULL mp can be passed to get an ipsec_out. This 4238 * facility is used by ip_unbind. 4239 * 4240 * NOTE : o As the data part could be modified by ipsec_out_process etc. 4241 * we can't make it fast by calling a dup. 4242 */ 4243 mblk_t * 4244 ipsec_alloc_ipsec_out(netstack_t *ns) 4245 { 4246 mblk_t *ipsec_mp; 4247 ipsec_out_t *io = kmem_cache_alloc(ipsec_info_cache, KM_NOSLEEP); 4248 4249 if (io == NULL) 4250 return (NULL); 4251 4252 bzero(io, sizeof (ipsec_out_t)); 4253 4254 io->ipsec_out_type = IPSEC_OUT; 4255 io->ipsec_out_len = sizeof (ipsec_out_t); 4256 io->ipsec_out_frtn.free_func = ipsec_out_free; 4257 io->ipsec_out_frtn.free_arg = (char *)io; 4258 4259 /* 4260 * Set the zoneid to ALL_ZONES which is used as an invalid value. Code 4261 * using ipsec_out_zoneid should assert that the zoneid has been set to 4262 * a sane value. 4263 */ 4264 io->ipsec_out_zoneid = ALL_ZONES; 4265 io->ipsec_out_ns = ns; /* No netstack_hold */ 4266 4267 ipsec_mp = desballoc((uint8_t *)io, sizeof (ipsec_info_t), BPRI_HI, 4268 &io->ipsec_out_frtn); 4269 if (ipsec_mp == NULL) { 4270 ipsec_out_free(io); 4271 4272 return (NULL); 4273 } 4274 ipsec_mp->b_datap->db_type = M_CTL; 4275 ipsec_mp->b_wptr = ipsec_mp->b_rptr + sizeof (ipsec_info_t); 4276 4277 return (ipsec_mp); 4278 } 4279 4280 /* 4281 * Attach an IPSEC_OUT; use pol for policy if it is non-null. 4282 * Otherwise initialize using conn. 4283 * 4284 * If pol is non-null, we consume a reference to it. 4285 */ 4286 mblk_t * 4287 ipsec_attach_ipsec_out(mblk_t **mp, conn_t *connp, ipsec_policy_t *pol, 4288 uint8_t proto, netstack_t *ns) 4289 { 4290 mblk_t *ipsec_mp; 4291 ipsec_stack_t *ipss = ns->netstack_ipsec; 4292 4293 ASSERT((pol != NULL) || (connp != NULL)); 4294 4295 ipsec_mp = ipsec_alloc_ipsec_out(ns); 4296 if (ipsec_mp == NULL) { 4297 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, SL_ERROR|SL_NOTE, 4298 "ipsec_attach_ipsec_out: Allocation failure\n"); 4299 ip_drop_packet(*mp, B_FALSE, NULL, NULL, 4300 DROPPER(ipss, ipds_spd_nomem), 4301 &ipss->ipsec_spd_dropper); 4302 *mp = NULL; 4303 return (NULL); 4304 } 4305 ipsec_mp->b_cont = *mp; 4306 /* 4307 * If *mp is NULL, ipsec_init_ipsec_out() won't/should not be using it. 4308 */ 4309 return (ipsec_init_ipsec_out(ipsec_mp, mp, connp, pol, proto, ns)); 4310 } 4311 4312 /* 4313 * Initialize the IPSEC_OUT (ipsec_mp) using pol if it is non-null. 4314 * Otherwise initialize using conn. 4315 * 4316 * If pol is non-null, we consume a reference to it. 4317 */ 4318 mblk_t * 4319 ipsec_init_ipsec_out(mblk_t *ipsec_mp, mblk_t **mp, conn_t *connp, 4320 ipsec_policy_t *pol, uint8_t proto, netstack_t *ns) 4321 { 4322 ipsec_out_t *io; 4323 ipsec_policy_t *p; 4324 ipha_t *ipha; 4325 ip6_t *ip6h; 4326 ipsec_stack_t *ipss = ns->netstack_ipsec; 4327 4328 ASSERT(ipsec_mp->b_cont == *mp); 4329 4330 ASSERT((pol != NULL) || (connp != NULL)); 4331 4332 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 4333 ASSERT(ipsec_mp->b_wptr == (ipsec_mp->b_rptr + sizeof (ipsec_info_t))); 4334 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4335 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4336 ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t)); 4337 io->ipsec_out_latch = NULL; 4338 /* 4339 * Set the zoneid when we have the connp. 4340 * Otherwise, we're called from ip_wput_attach_policy() who will take 4341 * care of setting the zoneid. 4342 */ 4343 if (connp != NULL) 4344 io->ipsec_out_zoneid = connp->conn_zoneid; 4345 4346 io->ipsec_out_ns = ns; /* No netstack_hold */ 4347 4348 if (*mp != NULL) { 4349 ipha = (ipha_t *)(*mp)->b_rptr; 4350 if (IPH_HDR_VERSION(ipha) == IP_VERSION) { 4351 io->ipsec_out_v4 = B_TRUE; 4352 ip6h = NULL; 4353 } else { 4354 io->ipsec_out_v4 = B_FALSE; 4355 ip6h = (ip6_t *)ipha; 4356 ipha = NULL; 4357 } 4358 } else { 4359 ASSERT(connp != NULL && connp->conn_policy_cached); 4360 ip6h = NULL; 4361 ipha = NULL; 4362 io->ipsec_out_v4 = !connp->conn_pkt_isv6; 4363 } 4364 4365 p = NULL; 4366 4367 /* 4368 * Take latched policies over global policy. Check here again for 4369 * this, in case we had conn_latch set while the packet was flying 4370 * around in IP. 4371 */ 4372 if (connp != NULL && connp->conn_latch != NULL) { 4373 ASSERT(ns == connp->conn_netstack); 4374 p = connp->conn_latch->ipl_out_policy; 4375 io->ipsec_out_latch = connp->conn_latch; 4376 IPLATCH_REFHOLD(connp->conn_latch); 4377 if (p != NULL) { 4378 IPPOL_REFHOLD(p); 4379 } 4380 io->ipsec_out_src_port = connp->conn_lport; 4381 io->ipsec_out_dst_port = connp->conn_fport; 4382 io->ipsec_out_icmp_type = io->ipsec_out_icmp_code = 0; 4383 if (pol != NULL) 4384 IPPOL_REFRELE(pol, ns); 4385 } else if (pol != NULL) { 4386 ipsec_selector_t sel; 4387 4388 bzero((void*)&sel, sizeof (sel)); 4389 4390 p = pol; 4391 /* 4392 * conn does not have the port information. Get 4393 * it from the packet. 4394 */ 4395 4396 if (!ipsec_init_outbound_ports(&sel, *mp, ipha, ip6h, 0, 4397 ns->netstack_ipsec)) { 4398 /* Callee did ip_drop_packet() on *mp. */ 4399 *mp = NULL; 4400 freeb(ipsec_mp); 4401 return (NULL); 4402 } 4403 io->ipsec_out_src_port = sel.ips_local_port; 4404 io->ipsec_out_dst_port = sel.ips_remote_port; 4405 io->ipsec_out_icmp_type = sel.ips_icmp_type; 4406 io->ipsec_out_icmp_code = sel.ips_icmp_code; 4407 } 4408 4409 io->ipsec_out_proto = proto; 4410 io->ipsec_out_use_global_policy = B_TRUE; 4411 io->ipsec_out_secure = (p != NULL); 4412 io->ipsec_out_policy = p; 4413 4414 if (p == NULL) { 4415 if (connp->conn_policy != NULL) { 4416 io->ipsec_out_secure = B_TRUE; 4417 ASSERT(io->ipsec_out_latch == NULL); 4418 ASSERT(io->ipsec_out_use_global_policy == B_TRUE); 4419 io->ipsec_out_need_policy = B_TRUE; 4420 ASSERT(io->ipsec_out_polhead == NULL); 4421 IPPH_REFHOLD(connp->conn_policy); 4422 io->ipsec_out_polhead = connp->conn_policy; 4423 } 4424 } else { 4425 /* Handle explicit drop action. */ 4426 if (p->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_DISCARD || 4427 p->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_REJECT) { 4428 ip_drop_packet(ipsec_mp, B_FALSE, NULL, NULL, 4429 DROPPER(ipss, ipds_spd_explicit), 4430 &ipss->ipsec_spd_dropper); 4431 *mp = NULL; 4432 ipsec_mp = NULL; 4433 } 4434 } 4435 4436 return (ipsec_mp); 4437 } 4438 4439 /* 4440 * Allocate an IPSEC_IN mblk. This will be prepended to an inbound datagram 4441 * and keep track of what-if-any IPsec processing will be applied to the 4442 * datagram. 4443 */ 4444 mblk_t * 4445 ipsec_in_alloc(boolean_t isv4, netstack_t *ns) 4446 { 4447 mblk_t *ipsec_in; 4448 ipsec_in_t *ii = kmem_cache_alloc(ipsec_info_cache, KM_NOSLEEP); 4449 4450 if (ii == NULL) 4451 return (NULL); 4452 4453 bzero(ii, sizeof (ipsec_info_t)); 4454 ii->ipsec_in_type = IPSEC_IN; 4455 ii->ipsec_in_len = sizeof (ipsec_in_t); 4456 4457 ii->ipsec_in_v4 = isv4; 4458 ii->ipsec_in_secure = B_TRUE; 4459 ii->ipsec_in_ns = ns; /* No netstack_hold */ 4460 4461 ii->ipsec_in_frtn.free_func = ipsec_in_free; 4462 ii->ipsec_in_frtn.free_arg = (char *)ii; 4463 4464 ipsec_in = desballoc((uint8_t *)ii, sizeof (ipsec_info_t), BPRI_HI, 4465 &ii->ipsec_in_frtn); 4466 if (ipsec_in == NULL) { 4467 ip1dbg(("ipsec_in_alloc: IPSEC_IN allocation failure.\n")); 4468 ipsec_in_free(ii); 4469 return (NULL); 4470 } 4471 4472 ipsec_in->b_datap->db_type = M_CTL; 4473 ipsec_in->b_wptr += sizeof (ipsec_info_t); 4474 4475 return (ipsec_in); 4476 } 4477 4478 /* 4479 * This is called from ip_wput_local when a packet which needs 4480 * security is looped back, to convert the IPSEC_OUT to a IPSEC_IN 4481 * before fanout, where the policy check happens. In most of the 4482 * cases, IPSEC processing has *never* been done. There is one case 4483 * (ip_wput_ire_fragmentit -> ip_wput_frag -> icmp_frag_needed) where 4484 * the packet is destined for localhost, IPSEC processing has already 4485 * been done. 4486 * 4487 * Future: This could happen after SA selection has occurred for 4488 * outbound.. which will tell us who the src and dst identities are.. 4489 * Then it's just a matter of splicing the ah/esp SA pointers from the 4490 * ipsec_out_t to the ipsec_in_t. 4491 */ 4492 void 4493 ipsec_out_to_in(mblk_t *ipsec_mp) 4494 { 4495 ipsec_in_t *ii; 4496 ipsec_out_t *io; 4497 ipsec_policy_t *pol; 4498 ipsec_action_t *act; 4499 boolean_t v4, icmp_loopback; 4500 zoneid_t zoneid; 4501 netstack_t *ns; 4502 4503 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 4504 4505 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4506 4507 v4 = io->ipsec_out_v4; 4508 zoneid = io->ipsec_out_zoneid; 4509 icmp_loopback = io->ipsec_out_icmp_loopback; 4510 ns = io->ipsec_out_ns; 4511 4512 act = io->ipsec_out_act; 4513 if (act == NULL) { 4514 pol = io->ipsec_out_policy; 4515 if (pol != NULL) { 4516 act = pol->ipsp_act; 4517 IPACT_REFHOLD(act); 4518 } 4519 } 4520 io->ipsec_out_act = NULL; 4521 4522 ipsec_out_release_refs(io); /* No netstack_rele/hold needed */ 4523 4524 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 4525 bzero(ii, sizeof (ipsec_in_t)); 4526 ii->ipsec_in_type = IPSEC_IN; 4527 ii->ipsec_in_len = sizeof (ipsec_in_t); 4528 ii->ipsec_in_loopback = B_TRUE; 4529 ii->ipsec_in_ns = ns; /* No netstack_hold */ 4530 4531 ii->ipsec_in_frtn.free_func = ipsec_in_free; 4532 ii->ipsec_in_frtn.free_arg = (char *)ii; 4533 ii->ipsec_in_action = act; 4534 ii->ipsec_in_zoneid = zoneid; 4535 4536 /* 4537 * In most of the cases, we can't look at the ipsec_out_XXX_sa 4538 * because this never went through IPSEC processing. So, look at 4539 * the requests and infer whether it would have gone through 4540 * IPSEC processing or not. Initialize the "done" fields with 4541 * the requests. The possible values for "done" fields are : 4542 * 4543 * 1) zero, indicates that a particular preference was never 4544 * requested. 4545 * 2) non-zero, indicates that it could be IPSEC_PREF_REQUIRED/ 4546 * IPSEC_PREF_NEVER. If IPSEC_REQ_DONE is set, it means that 4547 * IPSEC processing has been completed. 4548 */ 4549 ii->ipsec_in_secure = B_TRUE; 4550 ii->ipsec_in_v4 = v4; 4551 ii->ipsec_in_icmp_loopback = icmp_loopback; 4552 ii->ipsec_in_attach_if = B_FALSE; 4553 } 4554 4555 /* 4556 * Consults global policy to see whether this datagram should 4557 * go out secure. If so it attaches a ipsec_mp in front and 4558 * returns. 4559 */ 4560 mblk_t * 4561 ip_wput_attach_policy(mblk_t *ipsec_mp, ipha_t *ipha, ip6_t *ip6h, ire_t *ire, 4562 conn_t *connp, boolean_t unspec_src, zoneid_t zoneid) 4563 { 4564 mblk_t *mp; 4565 ipsec_out_t *io = NULL; 4566 ipsec_selector_t sel; 4567 uint_t ill_index; 4568 boolean_t conn_dontroutex; 4569 boolean_t conn_multicast_loopx; 4570 boolean_t policy_present; 4571 ip_stack_t *ipst = ire->ire_ipst; 4572 netstack_t *ns = ipst->ips_netstack; 4573 ipsec_stack_t *ipss = ns->netstack_ipsec; 4574 4575 ASSERT((ipha != NULL && ip6h == NULL) || 4576 (ip6h != NULL && ipha == NULL)); 4577 4578 bzero((void*)&sel, sizeof (sel)); 4579 4580 if (ipha != NULL) 4581 policy_present = ipss->ipsec_outbound_v4_policy_present; 4582 else 4583 policy_present = ipss->ipsec_outbound_v6_policy_present; 4584 /* 4585 * Fast Path to see if there is any policy. 4586 */ 4587 if (!policy_present) { 4588 if (ipsec_mp->b_datap->db_type == M_CTL) { 4589 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4590 if (!io->ipsec_out_secure) { 4591 /* 4592 * If there is no global policy and ip_wput 4593 * or ip_wput_multicast has attached this mp 4594 * for multicast case, free the ipsec_mp and 4595 * return the original mp. 4596 */ 4597 mp = ipsec_mp->b_cont; 4598 freeb(ipsec_mp); 4599 ipsec_mp = mp; 4600 io = NULL; 4601 } 4602 ASSERT(io == NULL || !io->ipsec_out_tunnel); 4603 } 4604 if (((io == NULL) || (io->ipsec_out_polhead == NULL)) && 4605 ((connp == NULL) || (connp->conn_policy == NULL))) 4606 return (ipsec_mp); 4607 } 4608 4609 ill_index = 0; 4610 conn_multicast_loopx = conn_dontroutex = B_FALSE; 4611 mp = ipsec_mp; 4612 if (ipsec_mp->b_datap->db_type == M_CTL) { 4613 mp = ipsec_mp->b_cont; 4614 /* 4615 * This is a connection where we have some per-socket 4616 * policy or ip_wput has attached an ipsec_mp for 4617 * the multicast datagram. 4618 */ 4619 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4620 if (!io->ipsec_out_secure) { 4621 /* 4622 * This ipsec_mp was allocated in ip_wput or 4623 * ip_wput_multicast so that we will know the 4624 * value of ill_index, conn_dontroute, 4625 * conn_multicast_loop in the multicast case if 4626 * we inherit global policy here. 4627 */ 4628 ill_index = io->ipsec_out_ill_index; 4629 conn_dontroutex = io->ipsec_out_dontroute; 4630 conn_multicast_loopx = io->ipsec_out_multicast_loop; 4631 freeb(ipsec_mp); 4632 ipsec_mp = mp; 4633 io = NULL; 4634 } 4635 ASSERT(io == NULL || !io->ipsec_out_tunnel); 4636 } 4637 4638 if (ipha != NULL) { 4639 sel.ips_local_addr_v4 = (ipha->ipha_src != 0 ? 4640 ipha->ipha_src : ire->ire_src_addr); 4641 sel.ips_remote_addr_v4 = ip_get_dst(ipha); 4642 sel.ips_protocol = (uint8_t)ipha->ipha_protocol; 4643 sel.ips_isv4 = B_TRUE; 4644 } else { 4645 ushort_t hdr_len; 4646 uint8_t *nexthdrp; 4647 boolean_t is_fragment; 4648 4649 sel.ips_isv4 = B_FALSE; 4650 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4651 if (!unspec_src) 4652 sel.ips_local_addr_v6 = ire->ire_src_addr_v6; 4653 } else { 4654 sel.ips_local_addr_v6 = ip6h->ip6_src; 4655 } 4656 4657 sel.ips_remote_addr_v6 = ip_get_dst_v6(ip6h, &is_fragment); 4658 if (is_fragment) { 4659 /* 4660 * It's a packet fragment for a packet that 4661 * we have already processed (since IPsec processing 4662 * is done before fragmentation), so we don't 4663 * have to do policy checks again. Fragments can 4664 * come back to us for processing if they have 4665 * been queued up due to flow control. 4666 */ 4667 if (ipsec_mp->b_datap->db_type == M_CTL) { 4668 mp = ipsec_mp->b_cont; 4669 freeb(ipsec_mp); 4670 ipsec_mp = mp; 4671 } 4672 return (ipsec_mp); 4673 } 4674 4675 /* IPv6 common-case. */ 4676 sel.ips_protocol = ip6h->ip6_nxt; 4677 switch (ip6h->ip6_nxt) { 4678 case IPPROTO_TCP: 4679 case IPPROTO_UDP: 4680 case IPPROTO_SCTP: 4681 case IPPROTO_ICMPV6: 4682 break; 4683 default: 4684 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 4685 &hdr_len, &nexthdrp)) { 4686 BUMP_MIB(&ipst->ips_ip6_mib, 4687 ipIfStatsOutDiscards); 4688 freemsg(ipsec_mp); /* Not IPsec-related drop. */ 4689 return (NULL); 4690 } 4691 sel.ips_protocol = *nexthdrp; 4692 break; 4693 } 4694 } 4695 4696 if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0, ipss)) { 4697 if (ipha != NULL) { 4698 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 4699 } else { 4700 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 4701 } 4702 4703 /* Callee dropped the packet. */ 4704 return (NULL); 4705 } 4706 4707 if (io != NULL) { 4708 /* 4709 * We seem to have some local policy (we already have 4710 * an ipsec_out). Look at global policy and see 4711 * whether we have to inherit or not. 4712 */ 4713 io->ipsec_out_need_policy = B_FALSE; 4714 ipsec_mp = ipsec_apply_global_policy(ipsec_mp, connp, 4715 &sel, ns); 4716 ASSERT((io->ipsec_out_policy != NULL) || 4717 (io->ipsec_out_act != NULL)); 4718 ASSERT(io->ipsec_out_need_policy == B_FALSE); 4719 return (ipsec_mp); 4720 } 4721 /* 4722 * We pass in a pointer to a pointer because mp can become 4723 * NULL due to allocation failures or explicit drops. Callers 4724 * of this function should assume a NULL mp means the packet 4725 * was dropped. 4726 */ 4727 ipsec_mp = ipsec_attach_global_policy(&mp, connp, &sel, ns); 4728 if (ipsec_mp == NULL) 4729 return (mp); 4730 4731 /* 4732 * Copy the right port information. 4733 */ 4734 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 4735 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4736 4737 ASSERT(io->ipsec_out_need_policy == B_FALSE); 4738 ASSERT((io->ipsec_out_policy != NULL) || 4739 (io->ipsec_out_act != NULL)); 4740 io->ipsec_out_src_port = sel.ips_local_port; 4741 io->ipsec_out_dst_port = sel.ips_remote_port; 4742 io->ipsec_out_icmp_type = sel.ips_icmp_type; 4743 io->ipsec_out_icmp_code = sel.ips_icmp_code; 4744 /* 4745 * Set ill_index, conn_dontroute and conn_multicast_loop 4746 * for multicast datagrams. 4747 */ 4748 io->ipsec_out_ill_index = ill_index; 4749 io->ipsec_out_dontroute = conn_dontroutex; 4750 io->ipsec_out_multicast_loop = conn_multicast_loopx; 4751 4752 if (zoneid == ALL_ZONES) 4753 zoneid = GLOBAL_ZONEID; 4754 io->ipsec_out_zoneid = zoneid; 4755 return (ipsec_mp); 4756 } 4757 4758 /* 4759 * When appropriate, this function caches inbound and outbound policy 4760 * for this connection. 4761 * 4762 * XXX need to work out more details about per-interface policy and 4763 * caching here! 4764 * 4765 * XXX may want to split inbound and outbound caching for ill.. 4766 */ 4767 int 4768 ipsec_conn_cache_policy(conn_t *connp, boolean_t isv4) 4769 { 4770 boolean_t global_policy_present; 4771 netstack_t *ns = connp->conn_netstack; 4772 ipsec_stack_t *ipss = ns->netstack_ipsec; 4773 4774 /* 4775 * There is no policy latching for ICMP sockets because we can't 4776 * decide on which policy to use until we see the packet and get 4777 * type/code selectors. 4778 */ 4779 if (connp->conn_ulp == IPPROTO_ICMP || 4780 connp->conn_ulp == IPPROTO_ICMPV6) { 4781 connp->conn_in_enforce_policy = 4782 connp->conn_out_enforce_policy = B_TRUE; 4783 if (connp->conn_latch != NULL) { 4784 IPLATCH_REFRELE(connp->conn_latch, ns); 4785 connp->conn_latch = NULL; 4786 } 4787 connp->conn_flags |= IPCL_CHECK_POLICY; 4788 return (0); 4789 } 4790 4791 global_policy_present = isv4 ? 4792 (ipss->ipsec_outbound_v4_policy_present || 4793 ipss->ipsec_inbound_v4_policy_present) : 4794 (ipss->ipsec_outbound_v6_policy_present || 4795 ipss->ipsec_inbound_v6_policy_present); 4796 4797 if ((connp->conn_policy != NULL) || global_policy_present) { 4798 ipsec_selector_t sel; 4799 ipsec_policy_t *p; 4800 4801 if (connp->conn_latch == NULL && 4802 (connp->conn_latch = iplatch_create()) == NULL) { 4803 return (ENOMEM); 4804 } 4805 4806 sel.ips_protocol = connp->conn_ulp; 4807 sel.ips_local_port = connp->conn_lport; 4808 sel.ips_remote_port = connp->conn_fport; 4809 sel.ips_is_icmp_inv_acq = 0; 4810 sel.ips_isv4 = isv4; 4811 if (isv4) { 4812 sel.ips_local_addr_v4 = connp->conn_src; 4813 sel.ips_remote_addr_v4 = connp->conn_rem; 4814 } else { 4815 sel.ips_local_addr_v6 = connp->conn_srcv6; 4816 sel.ips_remote_addr_v6 = connp->conn_remv6; 4817 } 4818 4819 p = ipsec_find_policy(IPSEC_TYPE_INBOUND, connp, NULL, &sel, 4820 ns); 4821 if (connp->conn_latch->ipl_in_policy != NULL) 4822 IPPOL_REFRELE(connp->conn_latch->ipl_in_policy, ns); 4823 connp->conn_latch->ipl_in_policy = p; 4824 connp->conn_in_enforce_policy = (p != NULL); 4825 4826 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, &sel, 4827 ns); 4828 if (connp->conn_latch->ipl_out_policy != NULL) 4829 IPPOL_REFRELE(connp->conn_latch->ipl_out_policy, ns); 4830 connp->conn_latch->ipl_out_policy = p; 4831 connp->conn_out_enforce_policy = (p != NULL); 4832 4833 /* Clear the latched actions too, in case we're recaching. */ 4834 if (connp->conn_latch->ipl_out_action != NULL) 4835 IPACT_REFRELE(connp->conn_latch->ipl_out_action); 4836 if (connp->conn_latch->ipl_in_action != NULL) 4837 IPACT_REFRELE(connp->conn_latch->ipl_in_action); 4838 } 4839 4840 /* 4841 * We may or may not have policy for this endpoint. We still set 4842 * conn_policy_cached so that inbound datagrams don't have to look 4843 * at global policy as policy is considered latched for these 4844 * endpoints. We should not set conn_policy_cached until the conn 4845 * reflects the actual policy. If we *set* this before inheriting 4846 * the policy there is a window where the check 4847 * CONN_INBOUND_POLICY_PRESENT, will neither check with the policy 4848 * on the conn (because we have not yet copied the policy on to 4849 * conn and hence not set conn_in_enforce_policy) nor with the 4850 * global policy (because conn_policy_cached is already set). 4851 */ 4852 connp->conn_policy_cached = B_TRUE; 4853 if (connp->conn_in_enforce_policy) 4854 connp->conn_flags |= IPCL_CHECK_POLICY; 4855 return (0); 4856 } 4857 4858 void 4859 iplatch_free(ipsec_latch_t *ipl, netstack_t *ns) 4860 { 4861 if (ipl->ipl_out_policy != NULL) 4862 IPPOL_REFRELE(ipl->ipl_out_policy, ns); 4863 if (ipl->ipl_in_policy != NULL) 4864 IPPOL_REFRELE(ipl->ipl_in_policy, ns); 4865 if (ipl->ipl_in_action != NULL) 4866 IPACT_REFRELE(ipl->ipl_in_action); 4867 if (ipl->ipl_out_action != NULL) 4868 IPACT_REFRELE(ipl->ipl_out_action); 4869 if (ipl->ipl_local_cid != NULL) 4870 IPSID_REFRELE(ipl->ipl_local_cid); 4871 if (ipl->ipl_remote_cid != NULL) 4872 IPSID_REFRELE(ipl->ipl_remote_cid); 4873 if (ipl->ipl_local_id != NULL) 4874 crfree(ipl->ipl_local_id); 4875 mutex_destroy(&ipl->ipl_lock); 4876 kmem_free(ipl, sizeof (*ipl)); 4877 } 4878 4879 ipsec_latch_t * 4880 iplatch_create() 4881 { 4882 ipsec_latch_t *ipl = kmem_alloc(sizeof (*ipl), KM_NOSLEEP); 4883 if (ipl == NULL) 4884 return (ipl); 4885 bzero(ipl, sizeof (*ipl)); 4886 mutex_init(&ipl->ipl_lock, NULL, MUTEX_DEFAULT, NULL); 4887 ipl->ipl_refcnt = 1; 4888 return (ipl); 4889 } 4890 4891 /* 4892 * Hash function for ID hash table. 4893 */ 4894 static uint32_t 4895 ipsid_hash(int idtype, char *idstring) 4896 { 4897 uint32_t hval = idtype; 4898 unsigned char c; 4899 4900 while ((c = *idstring++) != 0) { 4901 hval = (hval << 4) | (hval >> 28); 4902 hval ^= c; 4903 } 4904 hval = hval ^ (hval >> 16); 4905 return (hval & (IPSID_HASHSIZE-1)); 4906 } 4907 4908 /* 4909 * Look up identity string in hash table. Return identity object 4910 * corresponding to the name -- either preexisting, or newly allocated. 4911 * 4912 * Return NULL if we need to allocate a new one and can't get memory. 4913 */ 4914 ipsid_t * 4915 ipsid_lookup(int idtype, char *idstring, netstack_t *ns) 4916 { 4917 ipsid_t *retval; 4918 char *nstr; 4919 int idlen = strlen(idstring) + 1; 4920 ipsec_stack_t *ipss = ns->netstack_ipsec; 4921 ipsif_t *bucket; 4922 4923 bucket = &ipss->ipsec_ipsid_buckets[ipsid_hash(idtype, idstring)]; 4924 4925 mutex_enter(&bucket->ipsif_lock); 4926 4927 for (retval = bucket->ipsif_head; retval != NULL; 4928 retval = retval->ipsid_next) { 4929 if (idtype != retval->ipsid_type) 4930 continue; 4931 if (bcmp(idstring, retval->ipsid_cid, idlen) != 0) 4932 continue; 4933 4934 IPSID_REFHOLD(retval); 4935 mutex_exit(&bucket->ipsif_lock); 4936 return (retval); 4937 } 4938 4939 retval = kmem_alloc(sizeof (*retval), KM_NOSLEEP); 4940 if (!retval) { 4941 mutex_exit(&bucket->ipsif_lock); 4942 return (NULL); 4943 } 4944 4945 nstr = kmem_alloc(idlen, KM_NOSLEEP); 4946 if (!nstr) { 4947 mutex_exit(&bucket->ipsif_lock); 4948 kmem_free(retval, sizeof (*retval)); 4949 return (NULL); 4950 } 4951 4952 retval->ipsid_refcnt = 1; 4953 retval->ipsid_next = bucket->ipsif_head; 4954 if (retval->ipsid_next != NULL) 4955 retval->ipsid_next->ipsid_ptpn = &retval->ipsid_next; 4956 retval->ipsid_ptpn = &bucket->ipsif_head; 4957 retval->ipsid_type = idtype; 4958 retval->ipsid_cid = nstr; 4959 bucket->ipsif_head = retval; 4960 bcopy(idstring, nstr, idlen); 4961 mutex_exit(&bucket->ipsif_lock); 4962 4963 return (retval); 4964 } 4965 4966 /* 4967 * Garbage collect the identity hash table. 4968 */ 4969 void 4970 ipsid_gc(netstack_t *ns) 4971 { 4972 int i, len; 4973 ipsid_t *id, *nid; 4974 ipsif_t *bucket; 4975 ipsec_stack_t *ipss = ns->netstack_ipsec; 4976 4977 for (i = 0; i < IPSID_HASHSIZE; i++) { 4978 bucket = &ipss->ipsec_ipsid_buckets[i]; 4979 mutex_enter(&bucket->ipsif_lock); 4980 for (id = bucket->ipsif_head; id != NULL; id = nid) { 4981 nid = id->ipsid_next; 4982 if (id->ipsid_refcnt == 0) { 4983 *id->ipsid_ptpn = nid; 4984 if (nid != NULL) 4985 nid->ipsid_ptpn = id->ipsid_ptpn; 4986 len = strlen(id->ipsid_cid) + 1; 4987 kmem_free(id->ipsid_cid, len); 4988 kmem_free(id, sizeof (*id)); 4989 } 4990 } 4991 mutex_exit(&bucket->ipsif_lock); 4992 } 4993 } 4994 4995 /* 4996 * Return true if two identities are the same. 4997 */ 4998 boolean_t 4999 ipsid_equal(ipsid_t *id1, ipsid_t *id2) 5000 { 5001 if (id1 == id2) 5002 return (B_TRUE); 5003 #ifdef DEBUG 5004 if ((id1 == NULL) || (id2 == NULL)) 5005 return (B_FALSE); 5006 /* 5007 * test that we're interning id's correctly.. 5008 */ 5009 ASSERT((strcmp(id1->ipsid_cid, id2->ipsid_cid) != 0) || 5010 (id1->ipsid_type != id2->ipsid_type)); 5011 #endif 5012 return (B_FALSE); 5013 } 5014 5015 /* 5016 * Initialize identity table; called during module initialization. 5017 */ 5018 static void 5019 ipsid_init(netstack_t *ns) 5020 { 5021 ipsif_t *bucket; 5022 int i; 5023 ipsec_stack_t *ipss = ns->netstack_ipsec; 5024 5025 for (i = 0; i < IPSID_HASHSIZE; i++) { 5026 bucket = &ipss->ipsec_ipsid_buckets[i]; 5027 mutex_init(&bucket->ipsif_lock, NULL, MUTEX_DEFAULT, NULL); 5028 } 5029 } 5030 5031 /* 5032 * Free identity table (preparatory to module unload) 5033 */ 5034 static void 5035 ipsid_fini(netstack_t *ns) 5036 { 5037 ipsif_t *bucket; 5038 int i; 5039 ipsec_stack_t *ipss = ns->netstack_ipsec; 5040 5041 for (i = 0; i < IPSID_HASHSIZE; i++) { 5042 bucket = &ipss->ipsec_ipsid_buckets[i]; 5043 ASSERT(bucket->ipsif_head == NULL); 5044 mutex_destroy(&bucket->ipsif_lock); 5045 } 5046 } 5047 5048 /* 5049 * Update the minimum and maximum supported key sizes for the 5050 * specified algorithm. Must be called while holding the algorithms lock. 5051 */ 5052 void 5053 ipsec_alg_fix_min_max(ipsec_alginfo_t *alg, ipsec_algtype_t alg_type, 5054 netstack_t *ns) 5055 { 5056 size_t crypto_min = (size_t)-1, crypto_max = 0; 5057 size_t cur_crypto_min, cur_crypto_max; 5058 boolean_t is_valid; 5059 crypto_mechanism_info_t *mech_infos; 5060 uint_t nmech_infos; 5061 int crypto_rc, i; 5062 crypto_mech_usage_t mask; 5063 ipsec_stack_t *ipss = ns->netstack_ipsec; 5064 5065 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 5066 5067 /* 5068 * Compute the min, max, and default key sizes (in number of 5069 * increments to the default key size in bits) as defined 5070 * by the algorithm mappings. This range of key sizes is used 5071 * for policy related operations. The effective key sizes 5072 * supported by the framework could be more limited than 5073 * those defined for an algorithm. 5074 */ 5075 alg->alg_default_bits = alg->alg_key_sizes[0]; 5076 if (alg->alg_increment != 0) { 5077 /* key sizes are defined by range & increment */ 5078 alg->alg_minbits = alg->alg_key_sizes[1]; 5079 alg->alg_maxbits = alg->alg_key_sizes[2]; 5080 5081 alg->alg_default = SADB_ALG_DEFAULT_INCR(alg->alg_minbits, 5082 alg->alg_increment, alg->alg_default_bits); 5083 } else if (alg->alg_nkey_sizes == 0) { 5084 /* no specified key size for algorithm */ 5085 alg->alg_minbits = alg->alg_maxbits = 0; 5086 } else { 5087 /* key sizes are defined by enumeration */ 5088 alg->alg_minbits = (uint16_t)-1; 5089 alg->alg_maxbits = 0; 5090 5091 for (i = 0; i < alg->alg_nkey_sizes; i++) { 5092 if (alg->alg_key_sizes[i] < alg->alg_minbits) 5093 alg->alg_minbits = alg->alg_key_sizes[i]; 5094 if (alg->alg_key_sizes[i] > alg->alg_maxbits) 5095 alg->alg_maxbits = alg->alg_key_sizes[i]; 5096 } 5097 alg->alg_default = 0; 5098 } 5099 5100 if (!(alg->alg_flags & ALG_FLAG_VALID)) 5101 return; 5102 5103 /* 5104 * Mechanisms do not apply to the NULL encryption 5105 * algorithm, so simply return for this case. 5106 */ 5107 if (alg->alg_id == SADB_EALG_NULL) 5108 return; 5109 5110 /* 5111 * Find the min and max key sizes supported by the cryptographic 5112 * framework providers. 5113 */ 5114 5115 /* get the key sizes supported by the framework */ 5116 crypto_rc = crypto_get_all_mech_info(alg->alg_mech_type, 5117 &mech_infos, &nmech_infos, KM_SLEEP); 5118 if (crypto_rc != CRYPTO_SUCCESS || nmech_infos == 0) { 5119 alg->alg_flags &= ~ALG_FLAG_VALID; 5120 return; 5121 } 5122 5123 /* min and max key sizes supported by framework */ 5124 for (i = 0, is_valid = B_FALSE; i < nmech_infos; i++) { 5125 int unit_bits; 5126 5127 /* 5128 * Ignore entries that do not support the operations 5129 * needed for the algorithm type. 5130 */ 5131 if (alg_type == IPSEC_ALG_AUTH) { 5132 mask = CRYPTO_MECH_USAGE_MAC; 5133 } else { 5134 mask = CRYPTO_MECH_USAGE_ENCRYPT | 5135 CRYPTO_MECH_USAGE_DECRYPT; 5136 } 5137 if ((mech_infos[i].mi_usage & mask) != mask) 5138 continue; 5139 5140 unit_bits = (mech_infos[i].mi_keysize_unit == 5141 CRYPTO_KEYSIZE_UNIT_IN_BYTES) ? 8 : 1; 5142 /* adjust min/max supported by framework */ 5143 cur_crypto_min = mech_infos[i].mi_min_key_size * unit_bits; 5144 cur_crypto_max = mech_infos[i].mi_max_key_size * unit_bits; 5145 5146 if (cur_crypto_min < crypto_min) 5147 crypto_min = cur_crypto_min; 5148 5149 /* 5150 * CRYPTO_EFFECTIVELY_INFINITE is a special value of 5151 * the crypto framework which means "no upper limit". 5152 */ 5153 if (mech_infos[i].mi_max_key_size == 5154 CRYPTO_EFFECTIVELY_INFINITE) { 5155 crypto_max = (size_t)-1; 5156 } else if (cur_crypto_max > crypto_max) { 5157 crypto_max = cur_crypto_max; 5158 } 5159 5160 is_valid = B_TRUE; 5161 } 5162 5163 kmem_free(mech_infos, sizeof (crypto_mechanism_info_t) * 5164 nmech_infos); 5165 5166 if (!is_valid) { 5167 /* no key sizes supported by framework */ 5168 alg->alg_flags &= ~ALG_FLAG_VALID; 5169 return; 5170 } 5171 5172 /* 5173 * Determine min and max key sizes from alg_key_sizes[]. 5174 * defined for the algorithm entry. Adjust key sizes based on 5175 * those supported by the framework. 5176 */ 5177 alg->alg_ef_default_bits = alg->alg_key_sizes[0]; 5178 if (alg->alg_increment != 0) { 5179 /* supported key sizes are defined by range & increment */ 5180 crypto_min = ALGBITS_ROUND_UP(crypto_min, alg->alg_increment); 5181 crypto_max = ALGBITS_ROUND_DOWN(crypto_max, alg->alg_increment); 5182 5183 alg->alg_ef_minbits = MAX(alg->alg_minbits, 5184 (uint16_t)crypto_min); 5185 alg->alg_ef_maxbits = MIN(alg->alg_maxbits, 5186 (uint16_t)crypto_max); 5187 5188 /* 5189 * If the sizes supported by the framework are outside 5190 * the range of sizes defined by the algorithm mappings, 5191 * the algorithm cannot be used. Check for this 5192 * condition here. 5193 */ 5194 if (alg->alg_ef_minbits > alg->alg_ef_maxbits) { 5195 alg->alg_flags &= ~ALG_FLAG_VALID; 5196 return; 5197 } 5198 5199 if (alg->alg_ef_default_bits < alg->alg_ef_minbits) 5200 alg->alg_ef_default_bits = alg->alg_ef_minbits; 5201 if (alg->alg_ef_default_bits > alg->alg_ef_maxbits) 5202 alg->alg_ef_default_bits = alg->alg_ef_maxbits; 5203 5204 alg->alg_ef_default = SADB_ALG_DEFAULT_INCR(alg->alg_ef_minbits, 5205 alg->alg_increment, alg->alg_ef_default_bits); 5206 } else if (alg->alg_nkey_sizes == 0) { 5207 /* no specified key size for algorithm */ 5208 alg->alg_ef_minbits = alg->alg_ef_maxbits = 0; 5209 } else { 5210 /* supported key sizes are defined by enumeration */ 5211 alg->alg_ef_minbits = (uint16_t)-1; 5212 alg->alg_ef_maxbits = 0; 5213 5214 for (i = 0, is_valid = B_FALSE; i < alg->alg_nkey_sizes; i++) { 5215 /* 5216 * Ignore the current key size if it is not in the 5217 * range of sizes supported by the framework. 5218 */ 5219 if (alg->alg_key_sizes[i] < crypto_min || 5220 alg->alg_key_sizes[i] > crypto_max) 5221 continue; 5222 if (alg->alg_key_sizes[i] < alg->alg_ef_minbits) 5223 alg->alg_ef_minbits = alg->alg_key_sizes[i]; 5224 if (alg->alg_key_sizes[i] > alg->alg_ef_maxbits) 5225 alg->alg_ef_maxbits = alg->alg_key_sizes[i]; 5226 is_valid = B_TRUE; 5227 } 5228 5229 if (!is_valid) { 5230 alg->alg_flags &= ~ALG_FLAG_VALID; 5231 return; 5232 } 5233 alg->alg_ef_default = 0; 5234 } 5235 } 5236 5237 /* 5238 * Free the memory used by the specified algorithm. 5239 */ 5240 void 5241 ipsec_alg_free(ipsec_alginfo_t *alg) 5242 { 5243 if (alg == NULL) 5244 return; 5245 5246 if (alg->alg_key_sizes != NULL) { 5247 kmem_free(alg->alg_key_sizes, 5248 (alg->alg_nkey_sizes + 1) * sizeof (uint16_t)); 5249 alg->alg_key_sizes = NULL; 5250 } 5251 if (alg->alg_block_sizes != NULL) { 5252 kmem_free(alg->alg_block_sizes, 5253 (alg->alg_nblock_sizes + 1) * sizeof (uint16_t)); 5254 alg->alg_block_sizes = NULL; 5255 } 5256 kmem_free(alg, sizeof (*alg)); 5257 } 5258 5259 /* 5260 * Check the validity of the specified key size for an algorithm. 5261 * Returns B_TRUE if key size is valid, B_FALSE otherwise. 5262 */ 5263 boolean_t 5264 ipsec_valid_key_size(uint16_t key_size, ipsec_alginfo_t *alg) 5265 { 5266 if (key_size < alg->alg_ef_minbits || key_size > alg->alg_ef_maxbits) 5267 return (B_FALSE); 5268 5269 if (alg->alg_increment == 0 && alg->alg_nkey_sizes != 0) { 5270 /* 5271 * If the key sizes are defined by enumeration, the new 5272 * key size must be equal to one of the supported values. 5273 */ 5274 int i; 5275 5276 for (i = 0; i < alg->alg_nkey_sizes; i++) 5277 if (key_size == alg->alg_key_sizes[i]) 5278 break; 5279 if (i == alg->alg_nkey_sizes) 5280 return (B_FALSE); 5281 } 5282 5283 return (B_TRUE); 5284 } 5285 5286 /* 5287 * Callback function invoked by the crypto framework when a provider 5288 * registers or unregisters. This callback updates the algorithms 5289 * tables when a crypto algorithm is no longer available or becomes 5290 * available, and triggers the freeing/creation of context templates 5291 * associated with existing SAs, if needed. 5292 * 5293 * Need to walk all stack instances since the callback is global 5294 * for all instances 5295 */ 5296 void 5297 ipsec_prov_update_callback(uint32_t event, void *event_arg) 5298 { 5299 netstack_handle_t nh; 5300 netstack_t *ns; 5301 5302 netstack_next_init(&nh); 5303 while ((ns = netstack_next(&nh)) != NULL) { 5304 ipsec_prov_update_callback_stack(event, event_arg, ns); 5305 netstack_rele(ns); 5306 } 5307 netstack_next_fini(&nh); 5308 } 5309 5310 static void 5311 ipsec_prov_update_callback_stack(uint32_t event, void *event_arg, 5312 netstack_t *ns) 5313 { 5314 crypto_notify_event_change_t *prov_change = 5315 (crypto_notify_event_change_t *)event_arg; 5316 uint_t algidx, algid, algtype, mech_count, mech_idx; 5317 ipsec_alginfo_t *alg; 5318 ipsec_alginfo_t oalg; 5319 crypto_mech_name_t *mechs; 5320 boolean_t alg_changed = B_FALSE; 5321 ipsec_stack_t *ipss = ns->netstack_ipsec; 5322 5323 /* ignore events for which we didn't register */ 5324 if (event != CRYPTO_EVENT_MECHS_CHANGED) { 5325 ip1dbg(("ipsec_prov_update_callback: unexpected event 0x%x " 5326 " received from crypto framework\n", event)); 5327 return; 5328 } 5329 5330 mechs = crypto_get_mech_list(&mech_count, KM_SLEEP); 5331 if (mechs == NULL) 5332 return; 5333 5334 /* 5335 * Walk the list of currently defined IPsec algorithm. Update 5336 * the algorithm valid flag and trigger an update of the 5337 * SAs that depend on that algorithm. 5338 */ 5339 mutex_enter(&ipss->ipsec_alg_lock); 5340 for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype++) { 5341 for (algidx = 0; algidx < ipss->ipsec_nalgs[algtype]; 5342 algidx++) { 5343 5344 algid = ipss->ipsec_sortlist[algtype][algidx]; 5345 alg = ipss->ipsec_alglists[algtype][algid]; 5346 ASSERT(alg != NULL); 5347 5348 /* 5349 * Skip the algorithms which do not map to the 5350 * crypto framework provider being added or removed. 5351 */ 5352 if (strncmp(alg->alg_mech_name, 5353 prov_change->ec_mech_name, 5354 CRYPTO_MAX_MECH_NAME) != 0) 5355 continue; 5356 5357 /* 5358 * Determine if the mechanism is valid. If it 5359 * is not, mark the algorithm as being invalid. If 5360 * it is, mark the algorithm as being valid. 5361 */ 5362 for (mech_idx = 0; mech_idx < mech_count; mech_idx++) 5363 if (strncmp(alg->alg_mech_name, 5364 mechs[mech_idx], CRYPTO_MAX_MECH_NAME) == 0) 5365 break; 5366 if (mech_idx == mech_count && 5367 alg->alg_flags & ALG_FLAG_VALID) { 5368 alg->alg_flags &= ~ALG_FLAG_VALID; 5369 alg_changed = B_TRUE; 5370 } else if (mech_idx < mech_count && 5371 !(alg->alg_flags & ALG_FLAG_VALID)) { 5372 alg->alg_flags |= ALG_FLAG_VALID; 5373 alg_changed = B_TRUE; 5374 } 5375 5376 /* 5377 * Update the supported key sizes, regardless 5378 * of whether a crypto provider was added or 5379 * removed. 5380 */ 5381 oalg = *alg; 5382 ipsec_alg_fix_min_max(alg, algtype, ns); 5383 if (!alg_changed && 5384 alg->alg_ef_minbits != oalg.alg_ef_minbits || 5385 alg->alg_ef_maxbits != oalg.alg_ef_maxbits || 5386 alg->alg_ef_default != oalg.alg_ef_default || 5387 alg->alg_ef_default_bits != 5388 oalg.alg_ef_default_bits) 5389 alg_changed = B_TRUE; 5390 5391 /* 5392 * Update the affected SAs if a software provider is 5393 * being added or removed. 5394 */ 5395 if (prov_change->ec_provider_type == 5396 CRYPTO_SW_PROVIDER) 5397 sadb_alg_update(algtype, alg->alg_id, 5398 prov_change->ec_change == 5399 CRYPTO_MECH_ADDED, ns); 5400 } 5401 } 5402 mutex_exit(&ipss->ipsec_alg_lock); 5403 crypto_free_mech_list(mechs, mech_count); 5404 5405 if (alg_changed) { 5406 /* 5407 * An algorithm has changed, i.e. it became valid or 5408 * invalid, or its support key sizes have changed. 5409 * Notify ipsecah and ipsecesp of this change so 5410 * that they can send a SADB_REGISTER to their consumers. 5411 */ 5412 ipsecah_algs_changed(ns); 5413 ipsecesp_algs_changed(ns); 5414 } 5415 } 5416 5417 /* 5418 * Registers with the crypto framework to be notified of crypto 5419 * providers changes. Used to update the algorithm tables and 5420 * to free or create context templates if needed. Invoked after IPsec 5421 * is loaded successfully. 5422 * 5423 * This is called separately for each IP instance, so we ensure we only 5424 * register once. 5425 */ 5426 void 5427 ipsec_register_prov_update(void) 5428 { 5429 if (prov_update_handle != NULL) 5430 return; 5431 5432 prov_update_handle = crypto_notify_events( 5433 ipsec_prov_update_callback, CRYPTO_EVENT_MECHS_CHANGED); 5434 } 5435 5436 /* 5437 * Unregisters from the framework to be notified of crypto providers 5438 * changes. Called from ipsec_policy_g_destroy(). 5439 */ 5440 static void 5441 ipsec_unregister_prov_update(void) 5442 { 5443 if (prov_update_handle != NULL) 5444 crypto_unnotify_events(prov_update_handle); 5445 } 5446 5447 /* 5448 * Tunnel-mode support routines. 5449 */ 5450 5451 /* 5452 * Returns an mblk chain suitable for putnext() if policies match and IPsec 5453 * SAs are available. If there's no per-tunnel policy, or a match comes back 5454 * with no match, then still return the packet and have global policy take 5455 * a crack at it in IP. 5456 * 5457 * Remember -> we can be forwarding packets. Keep that in mind w.r.t. 5458 * inner-packet contents. 5459 */ 5460 mblk_t * 5461 ipsec_tun_outbound(mblk_t *mp, tun_t *atp, ipha_t *inner_ipv4, 5462 ip6_t *inner_ipv6, ipha_t *outer_ipv4, ip6_t *outer_ipv6, int outer_hdr_len, 5463 netstack_t *ns) 5464 { 5465 ipsec_tun_pol_t *itp = atp->tun_itp; 5466 ipsec_policy_head_t *polhead; 5467 ipsec_selector_t sel; 5468 mblk_t *ipsec_mp, *ipsec_mp_head, *nmp; 5469 mblk_t *spare_mp = NULL; 5470 ipsec_out_t *io; 5471 boolean_t is_fragment; 5472 ipsec_policy_t *pol; 5473 ipsec_stack_t *ipss = ns->netstack_ipsec; 5474 5475 ASSERT(outer_ipv6 != NULL && outer_ipv4 == NULL || 5476 outer_ipv4 != NULL && outer_ipv6 == NULL); 5477 /* We take care of inners in a bit. */ 5478 5479 /* No policy on this tunnel - let global policy have at it. */ 5480 if (itp == NULL || !(itp->itp_flags & ITPF_P_ACTIVE)) 5481 return (mp); 5482 polhead = itp->itp_policy; 5483 5484 bzero(&sel, sizeof (sel)); 5485 if (inner_ipv4 != NULL) { 5486 ASSERT(inner_ipv6 == NULL); 5487 sel.ips_isv4 = B_TRUE; 5488 sel.ips_local_addr_v4 = inner_ipv4->ipha_src; 5489 sel.ips_remote_addr_v4 = inner_ipv4->ipha_dst; 5490 sel.ips_protocol = (uint8_t)inner_ipv4->ipha_protocol; 5491 is_fragment = 5492 IS_V4_FRAGMENT(inner_ipv4->ipha_fragment_offset_and_flags); 5493 } else { 5494 ASSERT(inner_ipv6 != NULL); 5495 sel.ips_isv4 = B_FALSE; 5496 sel.ips_local_addr_v6 = inner_ipv6->ip6_src; 5497 /* Use ip_get_dst_v6() just for the fragment bit. */ 5498 sel.ips_remote_addr_v6 = ip_get_dst_v6(inner_ipv6, 5499 &is_fragment); 5500 /* 5501 * Reset, because we don't care about routing-header dests 5502 * in the forwarding/tunnel path. 5503 */ 5504 sel.ips_remote_addr_v6 = inner_ipv6->ip6_dst; 5505 } 5506 5507 if (itp->itp_flags & ITPF_P_PER_PORT_SECURITY) { 5508 if (is_fragment) { 5509 ipha_t *oiph; 5510 ipha_t *iph = NULL; 5511 ip6_t *ip6h = NULL; 5512 int hdr_len; 5513 uint16_t ip6_hdr_length; 5514 uint8_t v6_proto; 5515 uint8_t *v6_proto_p; 5516 5517 /* 5518 * We have a fragment we need to track! 5519 */ 5520 mp = ipsec_fragcache_add(&itp->itp_fragcache, NULL, mp, 5521 outer_hdr_len, ipss); 5522 if (mp == NULL) 5523 return (NULL); 5524 5525 /* 5526 * If we get here, we have a full 5527 * fragment chain 5528 */ 5529 5530 oiph = (ipha_t *)mp->b_rptr; 5531 if (IPH_HDR_VERSION(oiph) == IPV4_VERSION) { 5532 hdr_len = ((outer_hdr_len != 0) ? 5533 IPH_HDR_LENGTH(oiph) : 0); 5534 iph = (ipha_t *)(mp->b_rptr + hdr_len); 5535 } else { 5536 ASSERT(IPH_HDR_VERSION(oiph) == IPV6_VERSION); 5537 if ((spare_mp = msgpullup(mp, -1)) == NULL) { 5538 ip_drop_packet_chain(mp, B_FALSE, 5539 NULL, NULL, 5540 DROPPER(ipss, ipds_spd_nomem), 5541 &ipss->ipsec_spd_dropper); 5542 } 5543 ip6h = (ip6_t *)spare_mp->b_rptr; 5544 (void) ip_hdr_length_nexthdr_v6(spare_mp, ip6h, 5545 &ip6_hdr_length, &v6_proto_p); 5546 hdr_len = ip6_hdr_length; 5547 } 5548 outer_hdr_len = hdr_len; 5549 5550 if (sel.ips_isv4) { 5551 if (iph == NULL) { 5552 /* Was v6 outer */ 5553 iph = (ipha_t *)(mp->b_rptr + hdr_len); 5554 } 5555 inner_ipv4 = iph; 5556 sel.ips_local_addr_v4 = inner_ipv4->ipha_src; 5557 sel.ips_remote_addr_v4 = inner_ipv4->ipha_dst; 5558 sel.ips_protocol = 5559 (uint8_t)inner_ipv4->ipha_protocol; 5560 } else { 5561 if ((spare_mp == NULL) && 5562 ((spare_mp = msgpullup(mp, -1)) == NULL)) { 5563 ip_drop_packet_chain(mp, B_FALSE, 5564 NULL, NULL, 5565 DROPPER(ipss, ipds_spd_nomem), 5566 &ipss->ipsec_spd_dropper); 5567 } 5568 inner_ipv6 = (ip6_t *)(spare_mp->b_rptr + 5569 hdr_len); 5570 sel.ips_local_addr_v6 = inner_ipv6->ip6_src; 5571 sel.ips_remote_addr_v6 = inner_ipv6->ip6_dst; 5572 (void) ip_hdr_length_nexthdr_v6(spare_mp, 5573 inner_ipv6, &ip6_hdr_length, 5574 &v6_proto_p); 5575 v6_proto = *v6_proto_p; 5576 sel.ips_protocol = v6_proto; 5577 #ifdef FRAGCACHE_DEBUG 5578 cmn_err(CE_WARN, "v6_sel.ips_protocol = %d\n", 5579 sel.ips_protocol); 5580 #endif 5581 } 5582 /* Ports are extracted below */ 5583 } 5584 5585 /* Get ports... */ 5586 if (spare_mp != NULL) { 5587 if (!ipsec_init_outbound_ports(&sel, spare_mp, 5588 inner_ipv4, inner_ipv6, outer_hdr_len, ipss)) { 5589 /* 5590 * callee did ip_drop_packet_chain() on 5591 * spare_mp 5592 */ 5593 ipsec_freemsg_chain(mp); 5594 return (NULL); 5595 } 5596 } else { 5597 if (!ipsec_init_outbound_ports(&sel, mp, 5598 inner_ipv4, inner_ipv6, outer_hdr_len, ipss)) { 5599 /* callee did ip_drop_packet_chain() on mp. */ 5600 return (NULL); 5601 } 5602 } 5603 #ifdef FRAGCACHE_DEBUG 5604 if (inner_ipv4 != NULL) 5605 cmn_err(CE_WARN, 5606 "(v4) sel.ips_protocol = %d, " 5607 "sel.ips_local_port = %d, " 5608 "sel.ips_remote_port = %d\n", 5609 sel.ips_protocol, ntohs(sel.ips_local_port), 5610 ntohs(sel.ips_remote_port)); 5611 if (inner_ipv6 != NULL) 5612 cmn_err(CE_WARN, 5613 "(v6) sel.ips_protocol = %d, " 5614 "sel.ips_local_port = %d, " 5615 "sel.ips_remote_port = %d\n", 5616 sel.ips_protocol, ntohs(sel.ips_local_port), 5617 ntohs(sel.ips_remote_port)); 5618 #endif 5619 /* Success so far - done with spare_mp */ 5620 ipsec_freemsg_chain(spare_mp); 5621 } 5622 rw_enter(&polhead->iph_lock, RW_READER); 5623 pol = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_OUTBOUND, 5624 &sel, ns); 5625 rw_exit(&polhead->iph_lock); 5626 if (pol == NULL) { 5627 /* 5628 * No matching policy on this tunnel, drop the packet. 5629 * 5630 * NOTE: Tunnel-mode tunnels are different from the 5631 * IP global transport mode policy head. For a tunnel-mode 5632 * tunnel, we drop the packet in lieu of passing it 5633 * along accepted the way a global-policy miss would. 5634 * 5635 * NOTE2: "negotiate transport" tunnels should match ALL 5636 * inbound packets, but we do not uncomment the ASSERT() 5637 * below because if/when we open PF_POLICY, a user can 5638 * shoot him/her-self in the foot with a 0 priority. 5639 */ 5640 5641 /* ASSERT(itp->itp_flags & ITPF_P_TUNNEL); */ 5642 #ifdef FRAGCACHE_DEBUG 5643 cmn_err(CE_WARN, "ipsec_tun_outbound(): No matching tunnel " 5644 "per-port policy\n"); 5645 #endif 5646 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 5647 DROPPER(ipss, ipds_spd_explicit), 5648 &ipss->ipsec_spd_dropper); 5649 return (NULL); 5650 } 5651 5652 #ifdef FRAGCACHE_DEBUG 5653 cmn_err(CE_WARN, "Having matching tunnel per-port policy\n"); 5654 #endif 5655 5656 /* Construct an IPSEC_OUT message. */ 5657 ipsec_mp = ipsec_mp_head = ipsec_alloc_ipsec_out(ns); 5658 if (ipsec_mp == NULL) { 5659 IPPOL_REFRELE(pol, ns); 5660 ip_drop_packet(mp, B_FALSE, NULL, NULL, 5661 DROPPER(ipss, ipds_spd_nomem), 5662 &ipss->ipsec_spd_dropper); 5663 return (NULL); 5664 } 5665 ipsec_mp->b_cont = mp; 5666 io = (ipsec_out_t *)ipsec_mp->b_rptr; 5667 IPPH_REFHOLD(polhead); 5668 /* 5669 * NOTE: free() function of ipsec_out mblk will release polhead and 5670 * pol references. 5671 */ 5672 io->ipsec_out_polhead = polhead; 5673 io->ipsec_out_policy = pol; 5674 io->ipsec_out_zoneid = atp->tun_zoneid; 5675 io->ipsec_out_v4 = (outer_ipv4 != NULL); 5676 io->ipsec_out_secure = B_TRUE; 5677 5678 if (!(itp->itp_flags & ITPF_P_TUNNEL)) { 5679 /* Set up transport mode for tunnelled packets. */ 5680 io->ipsec_out_proto = (inner_ipv4 != NULL) ? IPPROTO_ENCAP : 5681 IPPROTO_IPV6; 5682 return (ipsec_mp); 5683 } 5684 5685 /* Fill in tunnel-mode goodies here. */ 5686 io->ipsec_out_tunnel = B_TRUE; 5687 /* XXX Do I need to fill in all of the goodies here? */ 5688 if (inner_ipv4) { 5689 io->ipsec_out_inaf = AF_INET; 5690 io->ipsec_out_insrc[0] = 5691 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v4; 5692 io->ipsec_out_indst[0] = 5693 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v4; 5694 } else { 5695 io->ipsec_out_inaf = AF_INET6; 5696 io->ipsec_out_insrc[0] = 5697 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[0]; 5698 io->ipsec_out_insrc[1] = 5699 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[1]; 5700 io->ipsec_out_insrc[2] = 5701 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[2]; 5702 io->ipsec_out_insrc[3] = 5703 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[3]; 5704 io->ipsec_out_indst[0] = 5705 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[0]; 5706 io->ipsec_out_indst[1] = 5707 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[1]; 5708 io->ipsec_out_indst[2] = 5709 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[2]; 5710 io->ipsec_out_indst[3] = 5711 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[3]; 5712 } 5713 io->ipsec_out_insrcpfx = pol->ipsp_sel->ipsl_key.ipsl_local_pfxlen; 5714 io->ipsec_out_indstpfx = pol->ipsp_sel->ipsl_key.ipsl_remote_pfxlen; 5715 /* NOTE: These are used for transport mode too. */ 5716 io->ipsec_out_src_port = pol->ipsp_sel->ipsl_key.ipsl_lport; 5717 io->ipsec_out_dst_port = pol->ipsp_sel->ipsl_key.ipsl_rport; 5718 io->ipsec_out_proto = pol->ipsp_sel->ipsl_key.ipsl_proto; 5719 5720 /* 5721 * The mp pointer still valid 5722 * Add ipsec_out to each fragment. 5723 * The fragment head already has one 5724 */ 5725 nmp = mp->b_next; 5726 mp->b_next = NULL; 5727 mp = nmp; 5728 ASSERT(ipsec_mp != NULL); 5729 while (mp != NULL) { 5730 nmp = mp->b_next; 5731 ipsec_mp->b_next = ipsec_out_tag(ipsec_mp_head, mp, ns); 5732 if (ipsec_mp->b_next == NULL) { 5733 ip_drop_packet_chain(ipsec_mp_head, B_FALSE, NULL, NULL, 5734 DROPPER(ipss, ipds_spd_nomem), 5735 &ipss->ipsec_spd_dropper); 5736 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 5737 DROPPER(ipss, ipds_spd_nomem), 5738 &ipss->ipsec_spd_dropper); 5739 return (NULL); 5740 } 5741 ipsec_mp = ipsec_mp->b_next; 5742 mp->b_next = NULL; 5743 mp = nmp; 5744 } 5745 return (ipsec_mp_head); 5746 } 5747 5748 /* 5749 * NOTE: The following releases pol's reference and 5750 * calls ip_drop_packet() for me on NULL returns. 5751 */ 5752 mblk_t * 5753 ipsec_check_ipsecin_policy_reasm(mblk_t *ipsec_mp, ipsec_policy_t *pol, 5754 ipha_t *inner_ipv4, ip6_t *inner_ipv6, uint64_t pkt_unique, netstack_t *ns) 5755 { 5756 /* Assume ipsec_mp is a chain of b_next-linked IPSEC_IN M_CTLs. */ 5757 mblk_t *data_chain = NULL, *data_tail = NULL; 5758 mblk_t *ii_next; 5759 5760 while (ipsec_mp != NULL) { 5761 ii_next = ipsec_mp->b_next; 5762 ipsec_mp->b_next = NULL; /* No tripping asserts. */ 5763 5764 /* 5765 * Need IPPOL_REFHOLD(pol) for extras because 5766 * ipsecin_policy does the refrele. 5767 */ 5768 IPPOL_REFHOLD(pol); 5769 5770 if (ipsec_check_ipsecin_policy(ipsec_mp, pol, inner_ipv4, 5771 inner_ipv6, pkt_unique, ns) != NULL) { 5772 if (data_tail == NULL) { 5773 /* First one */ 5774 data_chain = data_tail = ipsec_mp->b_cont; 5775 } else { 5776 data_tail->b_next = ipsec_mp->b_cont; 5777 data_tail = data_tail->b_next; 5778 } 5779 freeb(ipsec_mp); 5780 } else { 5781 /* 5782 * ipsec_check_ipsecin_policy() freed ipsec_mp 5783 * already. Need to get rid of any extra pol 5784 * references, and any remaining bits as well. 5785 */ 5786 IPPOL_REFRELE(pol, ns); 5787 ipsec_freemsg_chain(data_chain); 5788 ipsec_freemsg_chain(ii_next); /* ipdrop stats? */ 5789 return (NULL); 5790 } 5791 ipsec_mp = ii_next; 5792 } 5793 /* 5794 * One last release because either the loop bumped it up, or we never 5795 * called ipsec_check_ipsecin_policy(). 5796 */ 5797 IPPOL_REFRELE(pol, ns); 5798 5799 /* data_chain is ready for return to tun module. */ 5800 return (data_chain); 5801 } 5802 5803 5804 /* 5805 * Returns B_TRUE if the inbound packet passed an IPsec policy check. Returns 5806 * B_FALSE if it failed or if it is a fragment needing its friends before a 5807 * policy check can be performed. 5808 * 5809 * Expects a non-NULL *data_mp, an optional ipsec_mp, and a non-NULL polhead. 5810 * data_mp may be reassigned with a b_next chain of packets if fragments 5811 * neeeded to be collected for a proper policy check. 5812 * 5813 * Always frees ipsec_mp, but only frees data_mp if returns B_FALSE. This 5814 * function calls ip_drop_packet() on data_mp if need be. 5815 * 5816 * NOTE: outer_hdr_len is signed. If it's a negative value, the caller 5817 * is inspecting an ICMP packet. 5818 */ 5819 boolean_t 5820 ipsec_tun_inbound(mblk_t *ipsec_mp, mblk_t **data_mp, ipsec_tun_pol_t *itp, 5821 ipha_t *inner_ipv4, ip6_t *inner_ipv6, ipha_t *outer_ipv4, 5822 ip6_t *outer_ipv6, int outer_hdr_len, netstack_t *ns) 5823 { 5824 ipsec_policy_head_t *polhead; 5825 ipsec_selector_t sel; 5826 mblk_t *message = (ipsec_mp == NULL) ? *data_mp : ipsec_mp; 5827 ipsec_policy_t *pol; 5828 uint16_t tmpport; 5829 selret_t rc; 5830 boolean_t retval, port_policy_present, is_icmp, global_present; 5831 in6_addr_t tmpaddr; 5832 ipaddr_t tmp4; 5833 ipsec_stack_t *ipss = ns->netstack_ipsec; 5834 uint8_t flags, *holder, *outer_hdr; 5835 5836 sel.ips_is_icmp_inv_acq = 0; 5837 5838 if (outer_ipv4 != NULL) { 5839 ASSERT(outer_ipv6 == NULL); 5840 outer_hdr = (uint8_t *)outer_ipv4; 5841 global_present = ipss->ipsec_inbound_v4_policy_present; 5842 } else { 5843 outer_hdr = (uint8_t *)outer_ipv6; 5844 global_present = ipss->ipsec_inbound_v6_policy_present; 5845 } 5846 ASSERT(outer_hdr != NULL); 5847 5848 ASSERT(inner_ipv4 != NULL && inner_ipv6 == NULL || 5849 inner_ipv4 == NULL && inner_ipv6 != NULL); 5850 ASSERT(message == *data_mp || message->b_cont == *data_mp); 5851 5852 if (outer_hdr_len < 0) { 5853 outer_hdr_len = (-outer_hdr_len); 5854 is_icmp = B_TRUE; 5855 } else { 5856 is_icmp = B_FALSE; 5857 } 5858 5859 if (itp != NULL && (itp->itp_flags & ITPF_P_ACTIVE)) { 5860 polhead = itp->itp_policy; 5861 /* 5862 * We need to perform full Tunnel-Mode enforcement, 5863 * and we need to have inner-header data for such enforcement. 5864 * 5865 * See ipsec_init_inbound_sel() for the 0x80000000 on inbound 5866 * and on return. 5867 */ 5868 5869 port_policy_present = ((itp->itp_flags & 5870 ITPF_P_PER_PORT_SECURITY) ? B_TRUE : B_FALSE); 5871 flags = ((port_policy_present ? SEL_PORT_POLICY : SEL_NONE) | 5872 (is_icmp ? SEL_IS_ICMP : SEL_NONE) | SEL_TUNNEL_MODE); 5873 5874 rc = ipsec_init_inbound_sel(&sel, *data_mp, inner_ipv4, 5875 inner_ipv6, flags); 5876 5877 switch (rc) { 5878 case SELRET_NOMEM: 5879 ip_drop_packet(message, B_TRUE, NULL, NULL, 5880 DROPPER(ipss, ipds_spd_nomem), 5881 &ipss->ipsec_spd_dropper); 5882 return (B_FALSE); 5883 case SELRET_TUNFRAG: 5884 /* 5885 * At this point, if we're cleartext, we don't want 5886 * to go there. 5887 */ 5888 if (ipsec_mp == NULL) { 5889 ip_drop_packet(*data_mp, B_TRUE, NULL, NULL, 5890 DROPPER(ipss, ipds_spd_got_clear), 5891 &ipss->ipsec_spd_dropper); 5892 *data_mp = NULL; 5893 return (B_FALSE); 5894 } 5895 ASSERT(((ipsec_in_t *)ipsec_mp->b_rptr)-> 5896 ipsec_in_secure); 5897 message = ipsec_fragcache_add(&itp->itp_fragcache, 5898 ipsec_mp, *data_mp, outer_hdr_len, ipss); 5899 5900 if (message == NULL) { 5901 /* 5902 * Data is cached, fragment chain is not 5903 * complete. I consume ipsec_mp and data_mp 5904 */ 5905 return (B_FALSE); 5906 } 5907 5908 /* 5909 * If we get here, we have a full fragment chain. 5910 * Reacquire headers and selectors from first fragment. 5911 */ 5912 if (inner_ipv4 != NULL) { 5913 inner_ipv4 = (ipha_t *)message->b_cont->b_rptr; 5914 ASSERT(message->b_cont->b_wptr - 5915 message->b_cont->b_rptr > sizeof (ipha_t)); 5916 } else { 5917 inner_ipv6 = (ip6_t *)message->b_cont->b_rptr; 5918 ASSERT(message->b_cont->b_wptr - 5919 message->b_cont->b_rptr > sizeof (ip6_t)); 5920 } 5921 /* Use SEL_NONE so we always get ports! */ 5922 rc = ipsec_init_inbound_sel(&sel, message->b_cont, 5923 inner_ipv4, inner_ipv6, SEL_NONE); 5924 switch (rc) { 5925 case SELRET_SUCCESS: 5926 /* 5927 * Get to same place as first caller's 5928 * SELRET_SUCCESS case. 5929 */ 5930 break; 5931 case SELRET_NOMEM: 5932 ip_drop_packet_chain(message, B_TRUE, 5933 NULL, NULL, 5934 DROPPER(ipss, ipds_spd_nomem), 5935 &ipss->ipsec_spd_dropper); 5936 return (B_FALSE); 5937 case SELRET_BADPKT: 5938 ip_drop_packet_chain(message, B_TRUE, 5939 NULL, NULL, 5940 DROPPER(ipss, ipds_spd_malformed_frag), 5941 &ipss->ipsec_spd_dropper); 5942 return (B_FALSE); 5943 case SELRET_TUNFRAG: 5944 cmn_err(CE_WARN, "(TUNFRAG on 2nd call...)"); 5945 /* FALLTHRU */ 5946 default: 5947 cmn_err(CE_WARN, "ipsec_init_inbound_sel(mark2)" 5948 " returns bizarro 0x%x", rc); 5949 /* Guaranteed panic! */ 5950 ASSERT(rc == SELRET_NOMEM); 5951 return (B_FALSE); 5952 } 5953 /* FALLTHRU */ 5954 case SELRET_SUCCESS: 5955 /* 5956 * Common case: 5957 * No per-port policy or a non-fragment. Keep going. 5958 */ 5959 break; 5960 case SELRET_BADPKT: 5961 /* 5962 * We may receive ICMP (with IPv6 inner) packets that 5963 * trigger this return value. Send 'em in for 5964 * enforcement checking. 5965 */ 5966 cmn_err(CE_NOTE, "ipsec_tun_inbound(): " 5967 "sending 'bad packet' in for enforcement"); 5968 break; 5969 default: 5970 cmn_err(CE_WARN, 5971 "ipsec_init_inbound_sel() returns bizarro 0x%x", 5972 rc); 5973 ASSERT(rc == SELRET_NOMEM); /* Guaranteed panic! */ 5974 return (B_FALSE); 5975 } 5976 5977 if (is_icmp) { 5978 /* 5979 * Swap local/remote because this is an ICMP packet. 5980 */ 5981 tmpaddr = sel.ips_local_addr_v6; 5982 sel.ips_local_addr_v6 = sel.ips_remote_addr_v6; 5983 sel.ips_remote_addr_v6 = tmpaddr; 5984 tmpport = sel.ips_local_port; 5985 sel.ips_local_port = sel.ips_remote_port; 5986 sel.ips_remote_port = tmpport; 5987 } 5988 5989 /* find_policy_head() */ 5990 rw_enter(&polhead->iph_lock, RW_READER); 5991 pol = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_INBOUND, 5992 &sel, ns); 5993 rw_exit(&polhead->iph_lock); 5994 if (pol != NULL) { 5995 if (ipsec_mp == NULL || 5996 !((ipsec_in_t *)ipsec_mp->b_rptr)-> 5997 ipsec_in_secure) { 5998 retval = pol->ipsp_act->ipa_allow_clear; 5999 if (!retval) { 6000 /* 6001 * XXX should never get here with 6002 * tunnel reassembled fragments? 6003 */ 6004 ASSERT(message->b_next == NULL); 6005 ip_drop_packet(message, B_TRUE, NULL, 6006 NULL, 6007 DROPPER(ipss, ipds_spd_got_clear), 6008 &ipss->ipsec_spd_dropper); 6009 } else if (ipsec_mp != NULL) { 6010 freeb(ipsec_mp); 6011 } 6012 6013 IPPOL_REFRELE(pol, ns); 6014 return (retval); 6015 } 6016 /* 6017 * NOTE: The following releases pol's reference and 6018 * calls ip_drop_packet() for me on NULL returns. 6019 * 6020 * "sel" is still good here, so let's use it! 6021 */ 6022 *data_mp = ipsec_check_ipsecin_policy_reasm(message, 6023 pol, inner_ipv4, inner_ipv6, SA_UNIQUE_ID( 6024 sel.ips_remote_port, sel.ips_local_port, 6025 (inner_ipv4 == NULL) ? IPPROTO_IPV6 : 6026 IPPROTO_ENCAP, sel.ips_protocol), ns); 6027 return (*data_mp != NULL); 6028 } 6029 6030 /* 6031 * Else fallthru and check the global policy on the outer 6032 * header(s) if this tunnel is an old-style transport-mode 6033 * one. Drop the packet explicitly (no policy entry) for 6034 * a new-style tunnel-mode tunnel. 6035 */ 6036 if ((itp->itp_flags & ITPF_P_TUNNEL) && !is_icmp) { 6037 ip_drop_packet_chain(message, B_TRUE, NULL, 6038 NULL, 6039 DROPPER(ipss, ipds_spd_explicit), 6040 &ipss->ipsec_spd_dropper); 6041 return (B_FALSE); 6042 } 6043 } 6044 6045 /* 6046 * NOTE: If we reach here, we will not have packet chains from 6047 * fragcache_add(), because the only way I get chains is on a 6048 * tunnel-mode tunnel, which either returns with a pass, or gets 6049 * hit by the ip_drop_packet_chain() call right above here. 6050 */ 6051 6052 /* If no per-tunnel security, check global policy now. */ 6053 if (ipsec_mp != NULL && !global_present) { 6054 if (((ipsec_in_t *)(ipsec_mp->b_rptr))-> 6055 ipsec_in_icmp_loopback) { 6056 /* 6057 * This is an ICMP message with an ipsec_mp 6058 * attached. We should accept it. 6059 */ 6060 if (ipsec_mp != NULL) 6061 freeb(ipsec_mp); 6062 return (B_TRUE); 6063 } 6064 6065 ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL, 6066 DROPPER(ipss, ipds_spd_got_secure), 6067 &ipss->ipsec_spd_dropper); 6068 return (B_FALSE); 6069 } 6070 6071 /* 6072 * The following assertion is valid because only the tun module alters 6073 * the mblk chain - stripping the outer header by advancing mp->b_rptr. 6074 */ 6075 ASSERT(is_icmp || ((*data_mp)->b_datap->db_base <= outer_hdr && 6076 outer_hdr < (*data_mp)->b_rptr)); 6077 holder = (*data_mp)->b_rptr; 6078 (*data_mp)->b_rptr = outer_hdr; 6079 6080 if (is_icmp) { 6081 /* 6082 * For ICMP packets, "outer_ipvN" is set to the outer header 6083 * that is *INSIDE* the ICMP payload. For global policy 6084 * checking, we need to reverse src/dst on the payload in 6085 * order to construct selectors appropriately. See "ripha" 6086 * constructions in ip.c. To avoid a bug like 6478464 (see 6087 * earlier in this file), we will actually exchange src/dst 6088 * in the packet, and reverse if after the call to 6089 * ipsec_check_global_policy(). 6090 */ 6091 if (outer_ipv4 != NULL) { 6092 tmp4 = outer_ipv4->ipha_src; 6093 outer_ipv4->ipha_src = outer_ipv4->ipha_dst; 6094 outer_ipv4->ipha_dst = tmp4; 6095 } else { 6096 ASSERT(outer_ipv6 != NULL); 6097 tmpaddr = outer_ipv6->ip6_src; 6098 outer_ipv6->ip6_src = outer_ipv6->ip6_dst; 6099 outer_ipv6->ip6_dst = tmpaddr; 6100 } 6101 } 6102 6103 /* NOTE: Frees message if it returns NULL. */ 6104 if (ipsec_check_global_policy(message, NULL, outer_ipv4, outer_ipv6, 6105 (ipsec_mp != NULL), ns) == NULL) { 6106 return (B_FALSE); 6107 } 6108 6109 if (is_icmp) { 6110 /* Set things back to normal. */ 6111 if (outer_ipv4 != NULL) { 6112 tmp4 = outer_ipv4->ipha_src; 6113 outer_ipv4->ipha_src = outer_ipv4->ipha_dst; 6114 outer_ipv4->ipha_dst = tmp4; 6115 } else { 6116 /* No need for ASSERT()s now. */ 6117 tmpaddr = outer_ipv6->ip6_src; 6118 outer_ipv6->ip6_src = outer_ipv6->ip6_dst; 6119 outer_ipv6->ip6_dst = tmpaddr; 6120 } 6121 } 6122 6123 (*data_mp)->b_rptr = holder; 6124 6125 if (ipsec_mp != NULL) 6126 freeb(ipsec_mp); 6127 6128 /* 6129 * At this point, we pretend it's a cleartext accepted 6130 * packet. 6131 */ 6132 return (B_TRUE); 6133 } 6134 6135 /* 6136 * AVL comparison routine for our list of tunnel polheads. 6137 */ 6138 static int 6139 tunnel_compare(const void *arg1, const void *arg2) 6140 { 6141 ipsec_tun_pol_t *left, *right; 6142 int rc; 6143 6144 left = (ipsec_tun_pol_t *)arg1; 6145 right = (ipsec_tun_pol_t *)arg2; 6146 6147 rc = strncmp(left->itp_name, right->itp_name, LIFNAMSIZ); 6148 return (rc == 0 ? rc : (rc > 0 ? 1 : -1)); 6149 } 6150 6151 /* 6152 * Free a tunnel policy node. 6153 */ 6154 void 6155 itp_free(ipsec_tun_pol_t *node, netstack_t *ns) 6156 { 6157 IPPH_REFRELE(node->itp_policy, ns); 6158 IPPH_REFRELE(node->itp_inactive, ns); 6159 mutex_destroy(&node->itp_lock); 6160 kmem_free(node, sizeof (*node)); 6161 } 6162 6163 void 6164 itp_unlink(ipsec_tun_pol_t *node, netstack_t *ns) 6165 { 6166 ipsec_stack_t *ipss = ns->netstack_ipsec; 6167 6168 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_WRITER); 6169 ipss->ipsec_tunnel_policy_gen++; 6170 ipsec_fragcache_uninit(&node->itp_fragcache); 6171 avl_remove(&ipss->ipsec_tunnel_policies, node); 6172 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6173 ITP_REFRELE(node, ns); 6174 } 6175 6176 /* 6177 * Public interface to look up a tunnel security policy by name. Used by 6178 * spdsock mostly. Returns "node" with a bumped refcnt. 6179 */ 6180 ipsec_tun_pol_t * 6181 get_tunnel_policy(char *name, netstack_t *ns) 6182 { 6183 ipsec_tun_pol_t *node, lookup; 6184 ipsec_stack_t *ipss = ns->netstack_ipsec; 6185 6186 (void) strncpy(lookup.itp_name, name, LIFNAMSIZ); 6187 6188 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_READER); 6189 node = (ipsec_tun_pol_t *)avl_find(&ipss->ipsec_tunnel_policies, 6190 &lookup, NULL); 6191 if (node != NULL) { 6192 ITP_REFHOLD(node); 6193 } 6194 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6195 6196 return (node); 6197 } 6198 6199 /* 6200 * Public interface to walk all tunnel security polcies. Useful for spdsock 6201 * DUMP operations. iterator() will not consume a reference. 6202 */ 6203 void 6204 itp_walk(void (*iterator)(ipsec_tun_pol_t *, void *, netstack_t *), 6205 void *arg, netstack_t *ns) 6206 { 6207 ipsec_tun_pol_t *node; 6208 ipsec_stack_t *ipss = ns->netstack_ipsec; 6209 6210 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_READER); 6211 for (node = avl_first(&ipss->ipsec_tunnel_policies); node != NULL; 6212 node = AVL_NEXT(&ipss->ipsec_tunnel_policies, node)) { 6213 iterator(node, arg, ns); 6214 } 6215 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6216 } 6217 6218 /* 6219 * Initialize policy head. This can only fail if there's a memory problem. 6220 */ 6221 static boolean_t 6222 tunnel_polhead_init(ipsec_policy_head_t *iph, netstack_t *ns) 6223 { 6224 ipsec_stack_t *ipss = ns->netstack_ipsec; 6225 6226 rw_init(&iph->iph_lock, NULL, RW_DEFAULT, NULL); 6227 iph->iph_refs = 1; 6228 iph->iph_gen = 0; 6229 if (ipsec_alloc_table(iph, ipss->ipsec_tun_spd_hashsize, 6230 KM_SLEEP, B_FALSE, ns) != 0) { 6231 ipsec_polhead_free_table(iph); 6232 return (B_FALSE); 6233 } 6234 ipsec_polhead_init(iph, ipss->ipsec_tun_spd_hashsize); 6235 return (B_TRUE); 6236 } 6237 6238 /* 6239 * Create a tunnel policy node with "name". Set errno with 6240 * ENOMEM if there's a memory problem, and EEXIST if there's an existing 6241 * node. 6242 */ 6243 ipsec_tun_pol_t * 6244 create_tunnel_policy(char *name, int *errno, uint64_t *gen, netstack_t *ns) 6245 { 6246 ipsec_tun_pol_t *newbie, *existing; 6247 avl_index_t where; 6248 ipsec_stack_t *ipss = ns->netstack_ipsec; 6249 6250 newbie = kmem_zalloc(sizeof (*newbie), KM_NOSLEEP); 6251 if (newbie == NULL) { 6252 *errno = ENOMEM; 6253 return (NULL); 6254 } 6255 if (!ipsec_fragcache_init(&newbie->itp_fragcache)) { 6256 kmem_free(newbie, sizeof (*newbie)); 6257 *errno = ENOMEM; 6258 return (NULL); 6259 } 6260 6261 (void) strncpy(newbie->itp_name, name, LIFNAMSIZ); 6262 6263 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_WRITER); 6264 existing = (ipsec_tun_pol_t *)avl_find(&ipss->ipsec_tunnel_policies, 6265 newbie, &where); 6266 if (existing != NULL) { 6267 itp_free(newbie, ns); 6268 *errno = EEXIST; 6269 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6270 return (NULL); 6271 } 6272 ipss->ipsec_tunnel_policy_gen++; 6273 *gen = ipss->ipsec_tunnel_policy_gen; 6274 newbie->itp_refcnt = 2; /* One for the caller, one for the tree. */ 6275 newbie->itp_next_policy_index = 1; 6276 avl_insert(&ipss->ipsec_tunnel_policies, newbie, where); 6277 mutex_init(&newbie->itp_lock, NULL, MUTEX_DEFAULT, NULL); 6278 newbie->itp_policy = kmem_zalloc(sizeof (ipsec_policy_head_t), 6279 KM_NOSLEEP); 6280 if (newbie->itp_policy == NULL) 6281 goto nomem; 6282 newbie->itp_inactive = kmem_zalloc(sizeof (ipsec_policy_head_t), 6283 KM_NOSLEEP); 6284 if (newbie->itp_inactive == NULL) { 6285 kmem_free(newbie->itp_policy, sizeof (ipsec_policy_head_t)); 6286 goto nomem; 6287 } 6288 6289 if (!tunnel_polhead_init(newbie->itp_policy, ns)) { 6290 kmem_free(newbie->itp_policy, sizeof (ipsec_policy_head_t)); 6291 kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t)); 6292 goto nomem; 6293 } else if (!tunnel_polhead_init(newbie->itp_inactive, ns)) { 6294 IPPH_REFRELE(newbie->itp_policy, ns); 6295 kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t)); 6296 goto nomem; 6297 } 6298 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6299 6300 return (newbie); 6301 nomem: 6302 *errno = ENOMEM; 6303 kmem_free(newbie, sizeof (*newbie)); 6304 return (NULL); 6305 } 6306 6307 /* 6308 * We can't call the tun_t lookup function until tun is 6309 * loaded, so create a dummy function to avoid symbol 6310 * lookup errors on boot. 6311 */ 6312 /* ARGSUSED */ 6313 ipsec_tun_pol_t * 6314 itp_get_byaddr_dummy(uint32_t *laddr, uint32_t *faddr, int af, netstack_t *ns) 6315 { 6316 return (NULL); /* Always return NULL. */ 6317 } 6318 6319 /* 6320 * Frag cache code, based on SunScreen 3.2 source 6321 * screen/kernel/common/screen_fragcache.c 6322 */ 6323 6324 #define IPSEC_FRAG_TTL_MAX 5 6325 /* 6326 * Note that the following parameters create 256 hash buckets 6327 * with 1024 free entries to be distributed. Things are cleaned 6328 * periodically and are attempted to be cleaned when there is no 6329 * free space, but this system errs on the side of dropping packets 6330 * over creating memory exhaustion. We may decide to make hash 6331 * factor a tunable if this proves to be a bad decision. 6332 */ 6333 #define IPSEC_FRAG_HASH_SLOTS (1<<8) 6334 #define IPSEC_FRAG_HASH_FACTOR 4 6335 #define IPSEC_FRAG_HASH_SIZE (IPSEC_FRAG_HASH_SLOTS * IPSEC_FRAG_HASH_FACTOR) 6336 6337 #define IPSEC_FRAG_HASH_MASK (IPSEC_FRAG_HASH_SLOTS - 1) 6338 #define IPSEC_FRAG_HASH_FUNC(id) (((id) & IPSEC_FRAG_HASH_MASK) ^ \ 6339 (((id) / \ 6340 (ushort_t)IPSEC_FRAG_HASH_SLOTS) & \ 6341 IPSEC_FRAG_HASH_MASK)) 6342 6343 /* Maximum fragments per packet. 48 bytes payload x 1366 packets > 64KB */ 6344 #define IPSEC_MAX_FRAGS 1366 6345 6346 #define V4_FRAG_OFFSET(ipha) ((ntohs(ipha->ipha_fragment_offset_and_flags) & \ 6347 IPH_OFFSET) << 3) 6348 #define V4_MORE_FRAGS(ipha) (ntohs(ipha->ipha_fragment_offset_and_flags) & \ 6349 IPH_MF) 6350 6351 /* 6352 * Initialize an ipsec fragcache instance. 6353 * Returns B_FALSE if memory allocation fails. 6354 */ 6355 boolean_t 6356 ipsec_fragcache_init(ipsec_fragcache_t *frag) 6357 { 6358 ipsec_fragcache_entry_t *ftemp; 6359 int i; 6360 6361 mutex_init(&frag->itpf_lock, NULL, MUTEX_DEFAULT, NULL); 6362 frag->itpf_ptr = (ipsec_fragcache_entry_t **) 6363 kmem_zalloc(sizeof (ipsec_fragcache_entry_t *) * 6364 IPSEC_FRAG_HASH_SLOTS, KM_NOSLEEP); 6365 if (frag->itpf_ptr == NULL) 6366 return (B_FALSE); 6367 6368 ftemp = (ipsec_fragcache_entry_t *) 6369 kmem_zalloc(sizeof (ipsec_fragcache_entry_t) * 6370 IPSEC_FRAG_HASH_SIZE, KM_NOSLEEP); 6371 if (ftemp == NULL) { 6372 kmem_free(frag->itpf_ptr, sizeof (ipsec_fragcache_entry_t *) * 6373 IPSEC_FRAG_HASH_SLOTS); 6374 return (B_FALSE); 6375 } 6376 6377 frag->itpf_freelist = NULL; 6378 6379 for (i = 0; i < IPSEC_FRAG_HASH_SIZE; i++) { 6380 ftemp->itpfe_next = frag->itpf_freelist; 6381 frag->itpf_freelist = ftemp; 6382 ftemp++; 6383 } 6384 6385 frag->itpf_expire_hint = 0; 6386 6387 return (B_TRUE); 6388 } 6389 6390 void 6391 ipsec_fragcache_uninit(ipsec_fragcache_t *frag) 6392 { 6393 ipsec_fragcache_entry_t *fep; 6394 int i; 6395 6396 mutex_enter(&frag->itpf_lock); 6397 if (frag->itpf_ptr) { 6398 /* Delete any existing fragcache entry chains */ 6399 for (i = 0; i < IPSEC_FRAG_HASH_SLOTS; i++) { 6400 fep = (frag->itpf_ptr)[i]; 6401 while (fep != NULL) { 6402 /* Returned fep is next in chain or NULL */ 6403 fep = fragcache_delentry(i, fep, frag); 6404 } 6405 } 6406 /* 6407 * Chase the pointers back to the beginning 6408 * of the memory allocation and then 6409 * get rid of the allocated freelist 6410 */ 6411 while (frag->itpf_freelist->itpfe_next != NULL) 6412 frag->itpf_freelist = frag->itpf_freelist->itpfe_next; 6413 /* 6414 * XXX - If we ever dynamically grow the freelist 6415 * then we'll have to free entries individually 6416 * or determine how many entries or chunks we have 6417 * grown since the initial allocation. 6418 */ 6419 kmem_free(frag->itpf_freelist, 6420 sizeof (ipsec_fragcache_entry_t) * 6421 IPSEC_FRAG_HASH_SIZE); 6422 /* Free the fragcache structure */ 6423 kmem_free(frag->itpf_ptr, 6424 sizeof (ipsec_fragcache_entry_t *) * 6425 IPSEC_FRAG_HASH_SLOTS); 6426 } 6427 mutex_exit(&frag->itpf_lock); 6428 mutex_destroy(&frag->itpf_lock); 6429 } 6430 6431 /* 6432 * Add a fragment to the fragment cache. Consumes mp if NULL is returned. 6433 * Returns mp if a whole fragment has been assembled, NULL otherwise 6434 */ 6435 6436 mblk_t * 6437 ipsec_fragcache_add(ipsec_fragcache_t *frag, mblk_t *ipsec_mp, mblk_t *mp, 6438 int outer_hdr_len, ipsec_stack_t *ipss) 6439 { 6440 boolean_t is_v4; 6441 time_t itpf_time; 6442 ipha_t *iph; 6443 ipha_t *oiph; 6444 ip6_t *ip6h = NULL; 6445 uint8_t v6_proto; 6446 uint8_t *v6_proto_p; 6447 uint16_t ip6_hdr_length; 6448 ip6_pkt_t ipp; 6449 ip6_frag_t *fraghdr; 6450 ipsec_fragcache_entry_t *fep; 6451 int i; 6452 mblk_t *nmp, *prevmp, *spare_mp = NULL; 6453 int firstbyte, lastbyte; 6454 int offset; 6455 int last; 6456 boolean_t inbound = (ipsec_mp != NULL); 6457 mblk_t *first_mp = inbound ? ipsec_mp : mp; 6458 6459 mutex_enter(&frag->itpf_lock); 6460 6461 oiph = (ipha_t *)mp->b_rptr; 6462 iph = (ipha_t *)(mp->b_rptr + outer_hdr_len); 6463 if (IPH_HDR_VERSION(iph) == IPV4_VERSION) { 6464 is_v4 = B_TRUE; 6465 } else { 6466 ASSERT(IPH_HDR_VERSION(iph) == IPV6_VERSION); 6467 if ((spare_mp = msgpullup(mp, -1)) == NULL) { 6468 mutex_exit(&frag->itpf_lock); 6469 ip_drop_packet(first_mp, inbound, NULL, NULL, 6470 DROPPER(ipss, ipds_spd_nomem), 6471 &ipss->ipsec_spd_dropper); 6472 return (NULL); 6473 } 6474 ip6h = (ip6_t *)(spare_mp->b_rptr + outer_hdr_len); 6475 6476 if (!ip_hdr_length_nexthdr_v6(spare_mp, ip6h, &ip6_hdr_length, 6477 &v6_proto_p)) { 6478 /* 6479 * Find upper layer protocol. 6480 * If it fails we have a malformed packet 6481 */ 6482 mutex_exit(&frag->itpf_lock); 6483 ip_drop_packet(first_mp, inbound, NULL, NULL, 6484 DROPPER(ipss, ipds_spd_malformed_packet), 6485 &ipss->ipsec_spd_dropper); 6486 freemsg(spare_mp); 6487 return (NULL); 6488 } else { 6489 v6_proto = *v6_proto_p; 6490 } 6491 6492 6493 bzero(&ipp, sizeof (ipp)); 6494 (void) ip_find_hdr_v6(spare_mp, ip6h, &ipp, NULL); 6495 if (!(ipp.ipp_fields & IPPF_FRAGHDR)) { 6496 /* 6497 * We think this is a fragment, but didn't find 6498 * a fragment header. Something is wrong. 6499 */ 6500 mutex_exit(&frag->itpf_lock); 6501 ip_drop_packet(first_mp, inbound, NULL, NULL, 6502 DROPPER(ipss, ipds_spd_malformed_frag), 6503 &ipss->ipsec_spd_dropper); 6504 freemsg(spare_mp); 6505 return (NULL); 6506 } 6507 fraghdr = ipp.ipp_fraghdr; 6508 is_v4 = B_FALSE; 6509 } 6510 6511 /* Anything to cleanup? */ 6512 6513 /* 6514 * This cleanup call could be put in a timer loop 6515 * but it may actually be just as reasonable a decision to 6516 * leave it here. The disadvantage is this only gets called when 6517 * frags are added. The advantage is that it is not 6518 * susceptible to race conditions like a time-based cleanup 6519 * may be. 6520 */ 6521 itpf_time = gethrestime_sec(); 6522 if (itpf_time >= frag->itpf_expire_hint) 6523 ipsec_fragcache_clean(frag); 6524 6525 /* Lookup to see if there is an existing entry */ 6526 6527 if (is_v4) 6528 i = IPSEC_FRAG_HASH_FUNC(iph->ipha_ident); 6529 else 6530 i = IPSEC_FRAG_HASH_FUNC(fraghdr->ip6f_ident); 6531 6532 for (fep = (frag->itpf_ptr)[i]; fep; fep = fep->itpfe_next) { 6533 if (is_v4) { 6534 ASSERT(iph != NULL); 6535 if ((fep->itpfe_id == iph->ipha_ident) && 6536 (fep->itpfe_src == iph->ipha_src) && 6537 (fep->itpfe_dst == iph->ipha_dst) && 6538 (fep->itpfe_proto == iph->ipha_protocol)) 6539 break; 6540 } else { 6541 ASSERT(fraghdr != NULL); 6542 ASSERT(fep != NULL); 6543 if ((fep->itpfe_id == fraghdr->ip6f_ident) && 6544 IN6_ARE_ADDR_EQUAL(&fep->itpfe_src6, 6545 &ip6h->ip6_src) && 6546 IN6_ARE_ADDR_EQUAL(&fep->itpfe_dst6, 6547 &ip6h->ip6_dst) && (fep->itpfe_proto == v6_proto)) 6548 break; 6549 } 6550 } 6551 6552 if (is_v4) { 6553 firstbyte = V4_FRAG_OFFSET(iph); 6554 lastbyte = firstbyte + ntohs(iph->ipha_length) - 6555 IPH_HDR_LENGTH(iph); 6556 last = (V4_MORE_FRAGS(iph) == 0); 6557 #ifdef FRAGCACHE_DEBUG 6558 cmn_err(CE_WARN, "V4 fragcache: firstbyte = %d, lastbyte = %d, " 6559 "last = %d, id = %d\n", firstbyte, lastbyte, last, 6560 iph->ipha_ident); 6561 #endif 6562 } else { 6563 firstbyte = ntohs(fraghdr->ip6f_offlg & IP6F_OFF_MASK); 6564 lastbyte = firstbyte + ntohs(ip6h->ip6_plen) + 6565 sizeof (ip6_t) - ip6_hdr_length; 6566 last = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG) == 0; 6567 #ifdef FRAGCACHE_DEBUG 6568 cmn_err(CE_WARN, "V6 fragcache: firstbyte = %d, lastbyte = %d, " 6569 "last = %d, id = %d, fraghdr = %p, spare_mp = %p\n", 6570 firstbyte, lastbyte, last, fraghdr->ip6f_ident, 6571 fraghdr, spare_mp); 6572 #endif 6573 } 6574 6575 /* check for bogus fragments and delete the entry */ 6576 if (firstbyte > 0 && firstbyte <= 8) { 6577 if (fep != NULL) 6578 (void) fragcache_delentry(i, fep, frag); 6579 mutex_exit(&frag->itpf_lock); 6580 ip_drop_packet(first_mp, inbound, NULL, NULL, 6581 DROPPER(ipss, ipds_spd_malformed_frag), 6582 &ipss->ipsec_spd_dropper); 6583 freemsg(spare_mp); 6584 return (NULL); 6585 } 6586 6587 /* Not found, allocate a new entry */ 6588 if (fep == NULL) { 6589 if (frag->itpf_freelist == NULL) { 6590 /* see if there is some space */ 6591 ipsec_fragcache_clean(frag); 6592 if (frag->itpf_freelist == NULL) { 6593 mutex_exit(&frag->itpf_lock); 6594 ip_drop_packet(first_mp, inbound, NULL, NULL, 6595 DROPPER(ipss, ipds_spd_nomem), 6596 &ipss->ipsec_spd_dropper); 6597 freemsg(spare_mp); 6598 return (NULL); 6599 } 6600 } 6601 6602 fep = frag->itpf_freelist; 6603 frag->itpf_freelist = fep->itpfe_next; 6604 6605 if (is_v4) { 6606 bcopy((caddr_t)&iph->ipha_src, (caddr_t)&fep->itpfe_src, 6607 sizeof (struct in_addr)); 6608 bcopy((caddr_t)&iph->ipha_dst, (caddr_t)&fep->itpfe_dst, 6609 sizeof (struct in_addr)); 6610 fep->itpfe_id = iph->ipha_ident; 6611 fep->itpfe_proto = iph->ipha_protocol; 6612 i = IPSEC_FRAG_HASH_FUNC(fep->itpfe_id); 6613 } else { 6614 bcopy((in6_addr_t *)&ip6h->ip6_src, 6615 (in6_addr_t *)&fep->itpfe_src6, 6616 sizeof (struct in6_addr)); 6617 bcopy((in6_addr_t *)&ip6h->ip6_dst, 6618 (in6_addr_t *)&fep->itpfe_dst6, 6619 sizeof (struct in6_addr)); 6620 fep->itpfe_id = fraghdr->ip6f_ident; 6621 fep->itpfe_proto = v6_proto; 6622 i = IPSEC_FRAG_HASH_FUNC(fep->itpfe_id); 6623 } 6624 itpf_time = gethrestime_sec(); 6625 fep->itpfe_exp = itpf_time + IPSEC_FRAG_TTL_MAX + 1; 6626 fep->itpfe_last = 0; 6627 fep->itpfe_fraglist = NULL; 6628 fep->itpfe_depth = 0; 6629 fep->itpfe_next = (frag->itpf_ptr)[i]; 6630 (frag->itpf_ptr)[i] = fep; 6631 6632 if (frag->itpf_expire_hint > fep->itpfe_exp) 6633 frag->itpf_expire_hint = fep->itpfe_exp; 6634 6635 } 6636 freemsg(spare_mp); 6637 6638 /* Insert it in the frag list */ 6639 /* List is in order by starting offset of fragments */ 6640 6641 prevmp = NULL; 6642 for (nmp = fep->itpfe_fraglist; nmp; nmp = nmp->b_next) { 6643 ipha_t *niph; 6644 ipha_t *oniph; 6645 ip6_t *nip6h; 6646 ip6_pkt_t nipp; 6647 ip6_frag_t *nfraghdr; 6648 uint16_t nip6_hdr_length; 6649 uint8_t *nv6_proto_p; 6650 int nfirstbyte, nlastbyte; 6651 char *data, *ndata; 6652 mblk_t *nspare_mp = NULL; 6653 mblk_t *ndata_mp = (inbound ? nmp->b_cont : nmp); 6654 int hdr_len; 6655 6656 oniph = (ipha_t *)mp->b_rptr; 6657 nip6h = NULL; 6658 niph = NULL; 6659 6660 /* 6661 * Determine outer header type and length and set 6662 * pointers appropriately 6663 */ 6664 6665 if (IPH_HDR_VERSION(oniph) == IPV4_VERSION) { 6666 hdr_len = ((outer_hdr_len != 0) ? 6667 IPH_HDR_LENGTH(oiph) : 0); 6668 niph = (ipha_t *)(ndata_mp->b_rptr + hdr_len); 6669 } else { 6670 ASSERT(IPH_HDR_VERSION(oniph) == IPV6_VERSION); 6671 if ((nspare_mp = msgpullup(ndata_mp, -1)) == NULL) { 6672 mutex_exit(&frag->itpf_lock); 6673 ip_drop_packet_chain(nmp, inbound, NULL, NULL, 6674 DROPPER(ipss, ipds_spd_nomem), 6675 &ipss->ipsec_spd_dropper); 6676 return (NULL); 6677 } 6678 nip6h = (ip6_t *)nspare_mp->b_rptr; 6679 (void) ip_hdr_length_nexthdr_v6(nspare_mp, nip6h, 6680 &nip6_hdr_length, &v6_proto_p); 6681 hdr_len = ((outer_hdr_len != 0) ? nip6_hdr_length : 0); 6682 } 6683 6684 /* 6685 * Determine inner header type and length and set 6686 * pointers appropriately 6687 */ 6688 6689 if (is_v4) { 6690 if (niph == NULL) { 6691 /* Was v6 outer */ 6692 niph = (ipha_t *)(ndata_mp->b_rptr + hdr_len); 6693 } 6694 nfirstbyte = V4_FRAG_OFFSET(niph); 6695 nlastbyte = nfirstbyte + ntohs(niph->ipha_length) - 6696 IPH_HDR_LENGTH(niph); 6697 } else { 6698 if ((nspare_mp == NULL) && 6699 ((nspare_mp = msgpullup(ndata_mp, -1)) == NULL)) { 6700 mutex_exit(&frag->itpf_lock); 6701 ip_drop_packet_chain(nmp, inbound, NULL, NULL, 6702 DROPPER(ipss, ipds_spd_nomem), 6703 &ipss->ipsec_spd_dropper); 6704 return (NULL); 6705 } 6706 nip6h = (ip6_t *)(nspare_mp->b_rptr + hdr_len); 6707 if (!ip_hdr_length_nexthdr_v6(nspare_mp, nip6h, 6708 &nip6_hdr_length, &nv6_proto_p)) { 6709 mutex_exit(&frag->itpf_lock); 6710 ip_drop_packet_chain(nmp, inbound, NULL, NULL, 6711 DROPPER(ipss, ipds_spd_malformed_frag), 6712 &ipss->ipsec_spd_dropper); 6713 ipsec_freemsg_chain(nspare_mp); 6714 return (NULL); 6715 } 6716 bzero(&nipp, sizeof (nipp)); 6717 (void) ip_find_hdr_v6(nspare_mp, nip6h, &nipp, NULL); 6718 nfraghdr = nipp.ipp_fraghdr; 6719 nfirstbyte = ntohs(nfraghdr->ip6f_offlg & 6720 IP6F_OFF_MASK); 6721 nlastbyte = nfirstbyte + ntohs(nip6h->ip6_plen) + 6722 sizeof (ip6_t) - nip6_hdr_length; 6723 } 6724 ipsec_freemsg_chain(nspare_mp); 6725 6726 /* Check for overlapping fragments */ 6727 if (firstbyte >= nfirstbyte && firstbyte < nlastbyte) { 6728 /* 6729 * Overlap Check: 6730 * ~~~~--------- # Check if the newly 6731 * ~ ndata_mp| # received fragment 6732 * ~~~~--------- # overlaps with the 6733 * ---------~~~~~~ # current fragment. 6734 * | mp ~ 6735 * ---------~~~~~~ 6736 */ 6737 if (is_v4) { 6738 data = (char *)iph + IPH_HDR_LENGTH(iph) + 6739 firstbyte - nfirstbyte; 6740 ndata = (char *)niph + IPH_HDR_LENGTH(niph); 6741 } else { 6742 data = (char *)ip6h + 6743 nip6_hdr_length + firstbyte - 6744 nfirstbyte; 6745 ndata = (char *)nip6h + nip6_hdr_length; 6746 } 6747 if (bcmp(data, ndata, MIN(lastbyte, nlastbyte) - 6748 firstbyte)) { 6749 /* Overlapping data does not match */ 6750 (void) fragcache_delentry(i, fep, frag); 6751 mutex_exit(&frag->itpf_lock); 6752 ip_drop_packet(first_mp, inbound, NULL, NULL, 6753 DROPPER(ipss, ipds_spd_overlap_frag), 6754 &ipss->ipsec_spd_dropper); 6755 return (NULL); 6756 } 6757 /* Part of defense for jolt2.c fragmentation attack */ 6758 if (firstbyte >= nfirstbyte && lastbyte <= nlastbyte) { 6759 /* 6760 * Check for identical or subset fragments: 6761 * ---------- ~~~~--------~~~~~ 6762 * | nmp | or ~ nmp ~ 6763 * ---------- ~~~~--------~~~~~ 6764 * ---------- ------ 6765 * | mp | | mp | 6766 * ---------- ------ 6767 */ 6768 mutex_exit(&frag->itpf_lock); 6769 ip_drop_packet(first_mp, inbound, NULL, NULL, 6770 DROPPER(ipss, ipds_spd_evil_frag), 6771 &ipss->ipsec_spd_dropper); 6772 return (NULL); 6773 } 6774 6775 } 6776 6777 /* Correct location for this fragment? */ 6778 if (firstbyte <= nfirstbyte) { 6779 /* 6780 * Check if the tail end of the new fragment overlaps 6781 * with the head of the current fragment. 6782 * --------~~~~~~~ 6783 * | nmp ~ 6784 * --------~~~~~~~ 6785 * ~~~~~-------- 6786 * ~ mp | 6787 * ~~~~~-------- 6788 */ 6789 if (lastbyte > nfirstbyte) { 6790 /* Fragments overlap */ 6791 data = (char *)iph + IPH_HDR_LENGTH(iph) + 6792 firstbyte - nfirstbyte; 6793 ndata = (char *)niph + IPH_HDR_LENGTH(niph); 6794 if (is_v4) { 6795 data = (char *)iph + 6796 IPH_HDR_LENGTH(iph) + firstbyte - 6797 nfirstbyte; 6798 ndata = (char *)niph + 6799 IPH_HDR_LENGTH(niph); 6800 } else { 6801 data = (char *)ip6h + 6802 nip6_hdr_length + firstbyte - 6803 nfirstbyte; 6804 ndata = (char *)nip6h + nip6_hdr_length; 6805 } 6806 if (bcmp(data, ndata, MIN(lastbyte, nlastbyte) 6807 - nfirstbyte)) { 6808 /* Overlap mismatch */ 6809 (void) fragcache_delentry(i, fep, frag); 6810 mutex_exit(&frag->itpf_lock); 6811 ip_drop_packet(first_mp, inbound, NULL, 6812 NULL, DROPPER(ipss, 6813 ipds_spd_overlap_frag), 6814 &ipss->ipsec_spd_dropper); 6815 return (NULL); 6816 } 6817 } 6818 6819 /* 6820 * Fragment does not illegally overlap and can now 6821 * be inserted into the chain 6822 */ 6823 break; 6824 } 6825 6826 prevmp = nmp; 6827 } 6828 first_mp->b_next = nmp; 6829 6830 if (prevmp == NULL) { 6831 fep->itpfe_fraglist = first_mp; 6832 } else { 6833 prevmp->b_next = first_mp; 6834 } 6835 if (last) 6836 fep->itpfe_last = 1; 6837 6838 /* Part of defense for jolt2.c fragmentation attack */ 6839 if (++(fep->itpfe_depth) > IPSEC_MAX_FRAGS) { 6840 (void) fragcache_delentry(i, fep, frag); 6841 mutex_exit(&frag->itpf_lock); 6842 ip_drop_packet(first_mp, inbound, NULL, NULL, 6843 DROPPER(ipss, ipds_spd_max_frags), 6844 &ipss->ipsec_spd_dropper); 6845 return (NULL); 6846 } 6847 6848 /* Check for complete packet */ 6849 6850 if (!fep->itpfe_last) { 6851 mutex_exit(&frag->itpf_lock); 6852 #ifdef FRAGCACHE_DEBUG 6853 cmn_err(CE_WARN, "Fragment cached, not last.\n"); 6854 #endif 6855 return (NULL); 6856 } 6857 6858 #ifdef FRAGCACHE_DEBUG 6859 cmn_err(CE_WARN, "Last fragment cached.\n"); 6860 cmn_err(CE_WARN, "mp = %p, first_mp = %p.\n", mp, first_mp); 6861 #endif 6862 6863 offset = 0; 6864 for (mp = fep->itpfe_fraglist; mp; mp = mp->b_next) { 6865 mblk_t *data_mp = (inbound ? mp->b_cont : mp); 6866 int hdr_len; 6867 6868 oiph = (ipha_t *)data_mp->b_rptr; 6869 ip6h = NULL; 6870 iph = NULL; 6871 6872 spare_mp = NULL; 6873 if (IPH_HDR_VERSION(oiph) == IPV4_VERSION) { 6874 hdr_len = ((outer_hdr_len != 0) ? 6875 IPH_HDR_LENGTH(oiph) : 0); 6876 iph = (ipha_t *)(data_mp->b_rptr + hdr_len); 6877 } else { 6878 ASSERT(IPH_HDR_VERSION(oiph) == IPV6_VERSION); 6879 if ((spare_mp = msgpullup(data_mp, -1)) == NULL) { 6880 mutex_exit(&frag->itpf_lock); 6881 ip_drop_packet_chain(mp, inbound, NULL, NULL, 6882 DROPPER(ipss, ipds_spd_nomem), 6883 &ipss->ipsec_spd_dropper); 6884 return (NULL); 6885 } 6886 ip6h = (ip6_t *)spare_mp->b_rptr; 6887 (void) ip_hdr_length_nexthdr_v6(spare_mp, ip6h, 6888 &ip6_hdr_length, &v6_proto_p); 6889 hdr_len = ((outer_hdr_len != 0) ? ip6_hdr_length : 0); 6890 } 6891 6892 /* Calculate current fragment start/end */ 6893 if (is_v4) { 6894 if (iph == NULL) { 6895 /* Was v6 outer */ 6896 iph = (ipha_t *)(data_mp->b_rptr + hdr_len); 6897 } 6898 firstbyte = V4_FRAG_OFFSET(iph); 6899 lastbyte = firstbyte + ntohs(iph->ipha_length) - 6900 IPH_HDR_LENGTH(iph); 6901 } else { 6902 if ((spare_mp == NULL) && 6903 ((spare_mp = msgpullup(data_mp, -1)) == NULL)) { 6904 mutex_exit(&frag->itpf_lock); 6905 ip_drop_packet_chain(mp, inbound, NULL, NULL, 6906 DROPPER(ipss, ipds_spd_nomem), 6907 &ipss->ipsec_spd_dropper); 6908 return (NULL); 6909 } 6910 ip6h = (ip6_t *)(spare_mp->b_rptr + hdr_len); 6911 if (!ip_hdr_length_nexthdr_v6(spare_mp, ip6h, 6912 &ip6_hdr_length, &v6_proto_p)) { 6913 mutex_exit(&frag->itpf_lock); 6914 ip_drop_packet_chain(mp, inbound, NULL, NULL, 6915 DROPPER(ipss, ipds_spd_malformed_frag), 6916 &ipss->ipsec_spd_dropper); 6917 ipsec_freemsg_chain(spare_mp); 6918 return (NULL); 6919 } 6920 v6_proto = *v6_proto_p; 6921 bzero(&ipp, sizeof (ipp)); 6922 (void) ip_find_hdr_v6(spare_mp, ip6h, &ipp, NULL); 6923 fraghdr = ipp.ipp_fraghdr; 6924 firstbyte = ntohs(fraghdr->ip6f_offlg & 6925 IP6F_OFF_MASK); 6926 lastbyte = firstbyte + ntohs(ip6h->ip6_plen) + 6927 sizeof (ip6_t) - ip6_hdr_length; 6928 } 6929 6930 /* 6931 * If this fragment is greater than current offset, 6932 * we have a missing fragment so return NULL 6933 */ 6934 if (firstbyte > offset) { 6935 mutex_exit(&frag->itpf_lock); 6936 #ifdef FRAGCACHE_DEBUG 6937 /* 6938 * Note, this can happen when the last frag 6939 * gets sent through because it is smaller 6940 * than the MTU. It is not necessarily an 6941 * error condition. 6942 */ 6943 cmn_err(CE_WARN, "Frag greater than offset! : " 6944 "missing fragment: firstbyte = %d, offset = %d, " 6945 "mp = %p\n", firstbyte, offset, mp); 6946 #endif 6947 ipsec_freemsg_chain(spare_mp); 6948 return (NULL); 6949 } 6950 6951 /* 6952 * If we are at the last fragment, we have the complete 6953 * packet, so rechain things and return it to caller 6954 * for processing 6955 */ 6956 6957 if ((is_v4 && !V4_MORE_FRAGS(iph)) || 6958 (!is_v4 && !(fraghdr->ip6f_offlg & IP6F_MORE_FRAG))) { 6959 mp = fep->itpfe_fraglist; 6960 fep->itpfe_fraglist = NULL; 6961 (void) fragcache_delentry(i, fep, frag); 6962 mutex_exit(&frag->itpf_lock); 6963 6964 if ((is_v4 && (firstbyte + ntohs(iph->ipha_length) > 6965 65535)) || (!is_v4 && (firstbyte + 6966 ntohs(ip6h->ip6_plen) > 65535))) { 6967 /* It is an invalid "ping-o-death" packet */ 6968 /* Discard it */ 6969 ip_drop_packet_chain(mp, inbound, NULL, NULL, 6970 DROPPER(ipss, ipds_spd_evil_frag), 6971 &ipss->ipsec_spd_dropper); 6972 ipsec_freemsg_chain(spare_mp); 6973 return (NULL); 6974 } 6975 #ifdef FRAGCACHE_DEBUG 6976 cmn_err(CE_WARN, "Fragcache returning mp = %p, " 6977 "mp->b_next = %p", mp, mp->b_next); 6978 #endif 6979 ipsec_freemsg_chain(spare_mp); 6980 /* 6981 * For inbound case, mp has ipsec_in b_next'd chain 6982 * For outbound case, it is just data mp chain 6983 */ 6984 return (mp); 6985 } 6986 ipsec_freemsg_chain(spare_mp); 6987 6988 /* 6989 * Update new ending offset if this 6990 * fragment extends the packet 6991 */ 6992 if (offset < lastbyte) 6993 offset = lastbyte; 6994 } 6995 6996 mutex_exit(&frag->itpf_lock); 6997 6998 /* Didn't find last fragment, so return NULL */ 6999 return (NULL); 7000 } 7001 7002 static void 7003 ipsec_fragcache_clean(ipsec_fragcache_t *frag) 7004 { 7005 ipsec_fragcache_entry_t *fep; 7006 int i; 7007 ipsec_fragcache_entry_t *earlyfep = NULL; 7008 time_t itpf_time; 7009 int earlyexp; 7010 int earlyi = 0; 7011 7012 ASSERT(MUTEX_HELD(&frag->itpf_lock)); 7013 7014 itpf_time = gethrestime_sec(); 7015 earlyexp = itpf_time + 10000; 7016 7017 for (i = 0; i < IPSEC_FRAG_HASH_SLOTS; i++) { 7018 fep = (frag->itpf_ptr)[i]; 7019 while (fep) { 7020 if (fep->itpfe_exp < itpf_time) { 7021 /* found */ 7022 fep = fragcache_delentry(i, fep, frag); 7023 } else { 7024 if (fep->itpfe_exp < earlyexp) { 7025 earlyfep = fep; 7026 earlyexp = fep->itpfe_exp; 7027 earlyi = i; 7028 } 7029 fep = fep->itpfe_next; 7030 } 7031 } 7032 } 7033 7034 frag->itpf_expire_hint = earlyexp; 7035 7036 /* if (!found) */ 7037 if (frag->itpf_freelist == NULL) 7038 (void) fragcache_delentry(earlyi, earlyfep, frag); 7039 } 7040 7041 static ipsec_fragcache_entry_t * 7042 fragcache_delentry(int slot, ipsec_fragcache_entry_t *fep, 7043 ipsec_fragcache_t *frag) 7044 { 7045 ipsec_fragcache_entry_t *targp; 7046 ipsec_fragcache_entry_t *nextp = fep->itpfe_next; 7047 7048 ASSERT(MUTEX_HELD(&frag->itpf_lock)); 7049 7050 /* Free up any fragment list still in cache entry */ 7051 ipsec_freemsg_chain(fep->itpfe_fraglist); 7052 7053 targp = (frag->itpf_ptr)[slot]; 7054 ASSERT(targp != 0); 7055 7056 if (targp == fep) { 7057 /* unlink from head of hash chain */ 7058 (frag->itpf_ptr)[slot] = nextp; 7059 /* link into free list */ 7060 fep->itpfe_next = frag->itpf_freelist; 7061 frag->itpf_freelist = fep; 7062 return (nextp); 7063 } 7064 7065 /* maybe should use double linked list to make update faster */ 7066 /* must be past front of chain */ 7067 while (targp) { 7068 if (targp->itpfe_next == fep) { 7069 /* unlink from hash chain */ 7070 targp->itpfe_next = nextp; 7071 /* link into free list */ 7072 fep->itpfe_next = frag->itpf_freelist; 7073 frag->itpf_freelist = fep; 7074 return (nextp); 7075 } 7076 targp = targp->itpfe_next; 7077 ASSERT(targp != 0); 7078 } 7079 /* NOTREACHED */ 7080 return (NULL); 7081 } 7082