1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * IPsec Security Policy Database. 30 * 31 * This module maintains the SPD and provides routines used by ip and ip6 32 * to apply IPsec policy to inbound and outbound datagrams. 33 */ 34 35 #include <sys/types.h> 36 #include <sys/stream.h> 37 #include <sys/stropts.h> 38 #include <sys/sysmacros.h> 39 #include <sys/strsubr.h> 40 #include <sys/strlog.h> 41 #include <sys/cmn_err.h> 42 #include <sys/zone.h> 43 44 #include <sys/systm.h> 45 #include <sys/param.h> 46 #include <sys/kmem.h> 47 #include <sys/ddi.h> 48 49 #include <sys/crypto/api.h> 50 51 #include <inet/common.h> 52 #include <inet/mi.h> 53 54 #include <netinet/ip6.h> 55 #include <netinet/icmp6.h> 56 #include <netinet/udp.h> 57 58 #include <inet/ip.h> 59 #include <inet/ip6.h> 60 61 #include <net/pfkeyv2.h> 62 #include <net/pfpolicy.h> 63 #include <inet/ipsec_info.h> 64 #include <inet/sadb.h> 65 #include <inet/ipsec_impl.h> 66 67 #include <inet/ip_impl.h> /* For IP_MOD_ID */ 68 69 #include <inet/ipsecah.h> 70 #include <inet/ipsecesp.h> 71 #include <inet/ipdrop.h> 72 #include <inet/ipclassifier.h> 73 #include <inet/tun.h> 74 75 static void ipsec_update_present_flags(); 76 static ipsec_act_t *ipsec_act_wildcard_expand(ipsec_act_t *, uint_t *); 77 static void ipsec_out_free(void *); 78 static void ipsec_in_free(void *); 79 static mblk_t *ipsec_attach_global_policy(mblk_t *, conn_t *, 80 ipsec_selector_t *); 81 static mblk_t *ipsec_apply_global_policy(mblk_t *, conn_t *, 82 ipsec_selector_t *); 83 static mblk_t *ipsec_check_ipsecin_policy(mblk_t *, ipsec_policy_t *, 84 ipha_t *, ip6_t *, uint64_t); 85 static void ipsec_in_release_refs(ipsec_in_t *); 86 static void ipsec_out_release_refs(ipsec_out_t *); 87 static void ipsec_action_reclaim(void *); 88 static void ipsid_init(void); 89 static void ipsid_fini(void); 90 91 /* sel_flags values for ipsec_init_inbound_sel(). */ 92 #define SEL_NONE 0x0000 93 #define SEL_PORT_POLICY 0x0001 94 #define SEL_IS_ICMP 0x0002 95 #define SEL_TUNNEL_MODE 0x0004 96 97 /* Return values for ipsec_init_inbound_sel(). */ 98 typedef enum { SELRET_NOMEM, SELRET_BADPKT, SELRET_SUCCESS, SELRET_TUNFRAG} 99 selret_t; 100 101 static selret_t ipsec_init_inbound_sel(ipsec_selector_t *, mblk_t *, 102 ipha_t *, ip6_t *, uint8_t); 103 104 static boolean_t ipsec_check_ipsecin_action(struct ipsec_in_s *, mblk_t *, 105 struct ipsec_action_s *, ipha_t *ipha, ip6_t *ip6h, const char **, 106 kstat_named_t **); 107 static void ipsec_unregister_prov_update(void); 108 static boolean_t ipsec_compare_action(ipsec_policy_t *, ipsec_policy_t *); 109 static uint32_t selector_hash(ipsec_selector_t *, ipsec_policy_root_t *); 110 static int tunnel_compare(const void *, const void *); 111 static void ipsec_freemsg_chain(mblk_t *); 112 static void ip_drop_packet_chain(mblk_t *, boolean_t, ill_t *, ire_t *, 113 struct kstat_named *, ipdropper_t *); 114 115 /* 116 * Policy rule index generator. We assume this won't wrap in the 117 * lifetime of a system. If we make 2^20 policy changes per second, 118 * this will last 2^44 seconds, or roughly 500,000 years, so we don't 119 * have to worry about reusing policy index values. 120 * 121 * Protected by ipsec_conf_lock. 122 */ 123 uint64_t ipsec_next_policy_index = 1; 124 125 /* 126 * Active & Inactive system policy roots 127 */ 128 static ipsec_policy_head_t system_policy; 129 static ipsec_policy_head_t inactive_policy; 130 131 /* 132 * Tunnel policies - AVL tree indexed by tunnel name. 133 */ 134 krwlock_t tunnel_policy_lock; 135 uint64_t tunnel_policy_gen; /* To keep track of updates w/o searches. */ 136 avl_tree_t tunnel_policies; 137 138 /* Packet dropper for generic SPD drops. */ 139 ipdropper_t spd_dropper; 140 141 /* 142 * For now, use a trivially sized hash table for actions. 143 * In the future we can add the structure canonicalization necessary 144 * to get the hash function to behave correctly.. 145 */ 146 #define IPSEC_ACTION_HASH_SIZE 1 147 148 /* 149 * Selector hash table is statically sized at module load time. 150 * we default to 251 buckets, which is the largest prime number under 255 151 */ 152 153 #define IPSEC_SPDHASH_DEFAULT 251 154 uint32_t ipsec_spd_hashsize = 0; 155 156 /* SPD hash-size tunable per tunnel. */ 157 #define TUN_SPDHASH_DEFAULT 5 158 uint32_t tun_spd_hashsize; 159 160 161 #define IPSEC_SEL_NOHASH ((uint32_t)(~0)) 162 163 static HASH_HEAD(ipsec_action_s) ipsec_action_hash[IPSEC_ACTION_HASH_SIZE]; 164 static HASH_HEAD(ipsec_sel) *ipsec_sel_hash; 165 166 static kmem_cache_t *ipsec_action_cache; 167 static kmem_cache_t *ipsec_sel_cache; 168 static kmem_cache_t *ipsec_pol_cache; 169 static kmem_cache_t *ipsec_info_cache; 170 171 boolean_t ipsec_inbound_v4_policy_present = B_FALSE; 172 boolean_t ipsec_outbound_v4_policy_present = B_FALSE; 173 boolean_t ipsec_inbound_v6_policy_present = B_FALSE; 174 boolean_t ipsec_outbound_v6_policy_present = B_FALSE; 175 176 /* Frag cache prototypes */ 177 static void ipsec_fragcache_clean(ipsec_fragcache_t *); 178 static ipsec_fragcache_entry_t *fragcache_delentry(int, 179 ipsec_fragcache_entry_t *, ipsec_fragcache_t *); 180 boolean_t ipsec_fragcache_init(ipsec_fragcache_t *); 181 void ipsec_fragcache_uninit(ipsec_fragcache_t *); 182 mblk_t *ipsec_fragcache_add(ipsec_fragcache_t *, mblk_t *, mblk_t *, int); 183 184 /* 185 * Because policy needs to know what algorithms are supported, keep the 186 * lists of algorithms here. 187 */ 188 189 kmutex_t alg_lock; 190 krwlock_t itp_get_byaddr_rw_lock; 191 ipsec_tun_pol_t *(*itp_get_byaddr)(uint32_t *, uint32_t *, int); 192 uint8_t ipsec_nalgs[IPSEC_NALGTYPES]; 193 ipsec_alginfo_t *ipsec_alglists[IPSEC_NALGTYPES][IPSEC_MAX_ALGS]; 194 uint8_t ipsec_sortlist[IPSEC_NALGTYPES][IPSEC_MAX_ALGS]; 195 ipsec_algs_exec_mode_t ipsec_algs_exec_mode[IPSEC_NALGTYPES]; 196 static crypto_notify_handle_t prov_update_handle = NULL; 197 198 int ipsec_hdr_pullup_needed = 0; 199 int ipsec_weird_null_inbound_policy = 0; 200 201 #define ALGBITS_ROUND_DOWN(x, align) (((x)/(align))*(align)) 202 #define ALGBITS_ROUND_UP(x, align) ALGBITS_ROUND_DOWN((x)+(align)-1, align) 203 204 /* 205 * Inbound traffic should have matching identities for both SA's. 206 */ 207 208 #define SA_IDS_MATCH(sa1, sa2) \ 209 (((sa1) == NULL) || ((sa2) == NULL) || \ 210 (((sa1)->ipsa_src_cid == (sa2)->ipsa_src_cid) && \ 211 (((sa1)->ipsa_dst_cid == (sa2)->ipsa_dst_cid)))) 212 213 /* 214 * IPv4 Fragments 215 */ 216 #define IS_V4_FRAGMENT(ipha_fragment_offset_and_flags) \ 217 (((ntohs(ipha_fragment_offset_and_flags) & IPH_OFFSET) != 0) || \ 218 ((ntohs(ipha_fragment_offset_and_flags) & IPH_MF) != 0)) 219 220 /* 221 * IPv6 Fragments 222 */ 223 #define IS_V6_FRAGMENT(ipp) (ipp.ipp_fields & IPPF_FRAGHDR) 224 225 /* 226 * Policy failure messages. 227 */ 228 static char *ipsec_policy_failure_msgs[] = { 229 230 /* IPSEC_POLICY_NOT_NEEDED */ 231 "%s: Dropping the datagram because the incoming packet " 232 "is %s, but the recipient expects clear; Source %s, " 233 "Destination %s.\n", 234 235 /* IPSEC_POLICY_MISMATCH */ 236 "%s: Policy Failure for the incoming packet (%s); Source %s, " 237 "Destination %s.\n", 238 239 /* IPSEC_POLICY_AUTH_NOT_NEEDED */ 240 "%s: Authentication present while not expected in the " 241 "incoming %s packet; Source %s, Destination %s.\n", 242 243 /* IPSEC_POLICY_ENCR_NOT_NEEDED */ 244 "%s: Encryption present while not expected in the " 245 "incoming %s packet; Source %s, Destination %s.\n", 246 247 /* IPSEC_POLICY_SE_NOT_NEEDED */ 248 "%s: Self-Encapsulation present while not expected in the " 249 "incoming %s packet; Source %s, Destination %s.\n", 250 }; 251 /* 252 * Have a counter for every possible policy message in the previous array. 253 */ 254 static uint32_t ipsec_policy_failure_count[IPSEC_POLICY_MAX]; 255 /* Time since last ipsec policy failure that printed a message. */ 256 hrtime_t ipsec_policy_failure_last = 0; 257 258 /* 259 * General overviews: 260 * 261 * Locking: 262 * 263 * All of the system policy structures are protected by a single 264 * rwlock, ipsec_conf_lock. These structures are threaded in a 265 * fairly complex fashion and are not expected to change on a 266 * regular basis, so this should not cause scaling/contention 267 * problems. As a result, policy checks should (hopefully) be MT-hot. 268 * 269 * Allocation policy: 270 * 271 * We use custom kmem cache types for the various 272 * bits & pieces of the policy data structures. All allocations 273 * use KM_NOSLEEP instead of KM_SLEEP for policy allocation. The 274 * policy table is of potentially unbounded size, so we don't 275 * want to provide a way to hog all system memory with policy 276 * entries.. 277 */ 278 279 /* Convenient functions for freeing or dropping a b_next linked mblk chain */ 280 281 /* Free all messages in an mblk chain */ 282 static void 283 ipsec_freemsg_chain(mblk_t *mp) 284 { 285 mblk_t *mpnext; 286 while (mp != NULL) { 287 ASSERT(mp->b_prev == NULL); 288 mpnext = mp->b_next; 289 mp->b_next = NULL; 290 freemsg(mp); /* Always works, even if NULL */ 291 mp = mpnext; 292 } 293 } 294 295 /* ip_drop all messages in an mblk chain */ 296 static void 297 ip_drop_packet_chain(mblk_t *mp, boolean_t inbound, ill_t *arriving, 298 ire_t *outbound_ire, struct kstat_named *counter, ipdropper_t *who_called) 299 { 300 mblk_t *mpnext; 301 while (mp != NULL) { 302 ASSERT(mp->b_prev == NULL); 303 mpnext = mp->b_next; 304 mp->b_next = NULL; 305 ip_drop_packet(mp, inbound, arriving, outbound_ire, counter, 306 who_called); 307 mp = mpnext; 308 } 309 } 310 311 /* 312 * AVL tree comparison function. 313 * the in-kernel avl assumes unique keys for all objects. 314 * Since sometimes policy will duplicate rules, we may insert 315 * multiple rules with the same rule id, so we need a tie-breaker. 316 */ 317 static int 318 ipsec_policy_cmpbyid(const void *a, const void *b) 319 { 320 const ipsec_policy_t *ipa, *ipb; 321 uint64_t idxa, idxb; 322 323 ipa = (const ipsec_policy_t *)a; 324 ipb = (const ipsec_policy_t *)b; 325 idxa = ipa->ipsp_index; 326 idxb = ipb->ipsp_index; 327 328 if (idxa < idxb) 329 return (-1); 330 if (idxa > idxb) 331 return (1); 332 /* 333 * Tie-breaker #1: All installed policy rules have a non-NULL 334 * ipsl_sel (selector set), so an entry with a NULL ipsp_sel is not 335 * actually in-tree but rather a template node being used in 336 * an avl_find query; see ipsec_policy_delete(). This gives us 337 * a placeholder in the ordering just before the the first entry with 338 * a key >= the one we're looking for, so we can walk forward from 339 * that point to get the remaining entries with the same id. 340 */ 341 if ((ipa->ipsp_sel == NULL) && (ipb->ipsp_sel != NULL)) 342 return (-1); 343 if ((ipb->ipsp_sel == NULL) && (ipa->ipsp_sel != NULL)) 344 return (1); 345 /* 346 * At most one of the arguments to the comparison should have a 347 * NULL selector pointer; if not, the tree is broken. 348 */ 349 ASSERT(ipa->ipsp_sel != NULL); 350 ASSERT(ipb->ipsp_sel != NULL); 351 /* 352 * Tie-breaker #2: use the virtual address of the policy node 353 * to arbitrarily break ties. Since we use the new tree node in 354 * the avl_find() in ipsec_insert_always, the new node will be 355 * inserted into the tree in the right place in the sequence. 356 */ 357 if (ipa < ipb) 358 return (-1); 359 if (ipa > ipb) 360 return (1); 361 return (0); 362 } 363 364 void 365 ipsec_polhead_free_table(ipsec_policy_head_t *iph) 366 { 367 int dir; 368 369 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 370 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 371 372 if (ipr->ipr_hash == NULL) 373 continue; 374 375 kmem_free(ipr->ipr_hash, ipr->ipr_nchains * 376 sizeof (ipsec_policy_hash_t)); 377 } 378 } 379 380 void 381 ipsec_polhead_destroy(ipsec_policy_head_t *iph) 382 { 383 int dir; 384 385 avl_destroy(&iph->iph_rulebyid); 386 rw_destroy(&iph->iph_lock); 387 388 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 389 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 390 int chain; 391 392 for (chain = 0; chain < ipr->ipr_nchains; chain++) 393 mutex_destroy(&(ipr->ipr_hash[chain].hash_lock)); 394 395 } 396 ipsec_polhead_free_table(iph); 397 } 398 399 /* 400 * Module unload hook. 401 */ 402 void 403 ipsec_policy_destroy(void) 404 { 405 int i; 406 void *cookie; 407 ipsec_tun_pol_t *node; 408 409 ip_drop_unregister(&spd_dropper); 410 ip_drop_destroy(); 411 412 rw_enter(&tunnel_policy_lock, RW_WRITER); 413 /* 414 * It's possible we can just ASSERT() the tree is empty. After all, 415 * we aren't called until IP is ready to unload (and presumably all 416 * tunnels have been unplumbed). But we'll play it safe for now, the 417 * loop will just exit immediately if it's empty. 418 */ 419 cookie = NULL; 420 while ((node = (ipsec_tun_pol_t *) 421 avl_destroy_nodes(&tunnel_policies, &cookie)) != NULL) { 422 ITP_REFRELE(node); 423 } 424 avl_destroy(&tunnel_policies); 425 rw_exit(&tunnel_policy_lock); 426 rw_destroy(&tunnel_policy_lock); 427 ipsec_polhead_destroy(&system_policy); 428 ipsec_polhead_destroy(&inactive_policy); 429 430 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) 431 mutex_destroy(&(ipsec_action_hash[i].hash_lock)); 432 433 for (i = 0; i < ipsec_spd_hashsize; i++) 434 mutex_destroy(&(ipsec_sel_hash[i].hash_lock)); 435 436 ipsec_unregister_prov_update(); 437 438 mutex_destroy(&alg_lock); 439 440 kmem_cache_destroy(ipsec_action_cache); 441 kmem_cache_destroy(ipsec_sel_cache); 442 kmem_cache_destroy(ipsec_pol_cache); 443 kmem_cache_destroy(ipsec_info_cache); 444 ipsid_gc(); 445 ipsid_fini(); 446 } 447 448 449 /* 450 * Called when table allocation fails to free the table. 451 */ 452 static int 453 ipsec_alloc_tables_failed() 454 { 455 if (ipsec_sel_hash != NULL) { 456 kmem_free(ipsec_sel_hash, ipsec_spd_hashsize * 457 sizeof (*ipsec_sel_hash)); 458 ipsec_sel_hash = NULL; 459 } 460 ipsec_polhead_free_table(&system_policy); 461 ipsec_polhead_free_table(&inactive_policy); 462 463 return (ENOMEM); 464 } 465 466 /* 467 * Attempt to allocate the tables in a single policy head. 468 * Return nonzero on failure after cleaning up any work in progress. 469 */ 470 int 471 ipsec_alloc_table(ipsec_policy_head_t *iph, int nchains, int kmflag, 472 boolean_t global_cleanup) 473 { 474 int dir; 475 476 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 477 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 478 479 ipr->ipr_nchains = nchains; 480 ipr->ipr_hash = kmem_zalloc(nchains * 481 sizeof (ipsec_policy_hash_t), kmflag); 482 if (ipr->ipr_hash == NULL) 483 return (global_cleanup ? ipsec_alloc_tables_failed() : 484 ENOMEM); 485 } 486 return (0); 487 } 488 489 /* 490 * Attempt to allocate the various tables. Return nonzero on failure 491 * after cleaning up any work in progress. 492 */ 493 static int 494 ipsec_alloc_tables(int kmflag) 495 { 496 int error; 497 498 error = ipsec_alloc_table(&system_policy, ipsec_spd_hashsize, kmflag, 499 B_TRUE); 500 if (error != 0) 501 return (error); 502 503 error = ipsec_alloc_table(&inactive_policy, ipsec_spd_hashsize, kmflag, 504 B_TRUE); 505 if (error != 0) 506 return (error); 507 508 ipsec_sel_hash = kmem_zalloc(ipsec_spd_hashsize * 509 sizeof (*ipsec_sel_hash), kmflag); 510 511 if (ipsec_sel_hash == NULL) 512 return (ipsec_alloc_tables_failed()); 513 514 return (0); 515 } 516 517 /* 518 * After table allocation, initialize a policy head. 519 */ 520 void 521 ipsec_polhead_init(ipsec_policy_head_t *iph, int nchains) 522 { 523 int dir, chain; 524 525 rw_init(&iph->iph_lock, NULL, RW_DEFAULT, NULL); 526 avl_create(&iph->iph_rulebyid, ipsec_policy_cmpbyid, 527 sizeof (ipsec_policy_t), offsetof(ipsec_policy_t, ipsp_byid)); 528 529 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 530 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 531 ipr->ipr_nchains = nchains; 532 533 for (chain = 0; chain < nchains; chain++) { 534 mutex_init(&(ipr->ipr_hash[chain].hash_lock), 535 NULL, MUTEX_DEFAULT, NULL); 536 } 537 } 538 } 539 540 /* 541 * Module load hook. 542 */ 543 void 544 ipsec_policy_init() 545 { 546 int i; 547 548 /* 549 * Make two attempts to allocate policy hash tables; try it at 550 * the "preferred" size (may be set in /etc/system) first, 551 * then fall back to the default size. 552 */ 553 if (ipsec_spd_hashsize == 0) 554 ipsec_spd_hashsize = IPSEC_SPDHASH_DEFAULT; 555 556 if (ipsec_alloc_tables(KM_NOSLEEP) != 0) { 557 cmn_err(CE_WARN, 558 "Unable to allocate %d entry IPsec policy hash table", 559 ipsec_spd_hashsize); 560 ipsec_spd_hashsize = IPSEC_SPDHASH_DEFAULT; 561 cmn_err(CE_WARN, "Falling back to %d entries", 562 ipsec_spd_hashsize); 563 (void) ipsec_alloc_tables(KM_SLEEP); 564 } 565 566 /* Just set a default for tunnels. */ 567 if (tun_spd_hashsize == 0) 568 tun_spd_hashsize = TUN_SPDHASH_DEFAULT; 569 570 ipsid_init(); 571 /* 572 * Globals need ref == 1 to prevent IPPH_REFRELE() from attempting 573 * to free them. 574 */ 575 system_policy.iph_refs = 1; 576 inactive_policy.iph_refs = 1; 577 ipsec_polhead_init(&system_policy, ipsec_spd_hashsize); 578 ipsec_polhead_init(&inactive_policy, ipsec_spd_hashsize); 579 rw_init(&tunnel_policy_lock, NULL, RW_DEFAULT, NULL); 580 avl_create(&tunnel_policies, tunnel_compare, sizeof (ipsec_tun_pol_t), 581 0); 582 583 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) 584 mutex_init(&(ipsec_action_hash[i].hash_lock), 585 NULL, MUTEX_DEFAULT, NULL); 586 587 for (i = 0; i < ipsec_spd_hashsize; i++) 588 mutex_init(&(ipsec_sel_hash[i].hash_lock), 589 NULL, MUTEX_DEFAULT, NULL); 590 591 mutex_init(&alg_lock, NULL, MUTEX_DEFAULT, NULL); 592 593 for (i = 0; i < IPSEC_NALGTYPES; i++) 594 ipsec_nalgs[i] = 0; 595 596 ipsec_action_cache = kmem_cache_create("ipsec_actions", 597 sizeof (ipsec_action_t), _POINTER_ALIGNMENT, NULL, NULL, 598 ipsec_action_reclaim, NULL, NULL, 0); 599 ipsec_sel_cache = kmem_cache_create("ipsec_selectors", 600 sizeof (ipsec_sel_t), _POINTER_ALIGNMENT, NULL, NULL, 601 NULL, NULL, NULL, 0); 602 ipsec_pol_cache = kmem_cache_create("ipsec_policy", 603 sizeof (ipsec_policy_t), _POINTER_ALIGNMENT, NULL, NULL, 604 NULL, NULL, NULL, 0); 605 ipsec_info_cache = kmem_cache_create("ipsec_info", 606 sizeof (ipsec_info_t), _POINTER_ALIGNMENT, NULL, NULL, 607 NULL, NULL, NULL, 0); 608 609 ip_drop_init(); 610 ip_drop_register(&spd_dropper, "IPsec SPD"); 611 612 /* Set function to dummy until tun is loaded */ 613 rw_init(&itp_get_byaddr_rw_lock, NULL, RW_DEFAULT, NULL); 614 rw_enter(&itp_get_byaddr_rw_lock, RW_WRITER); 615 itp_get_byaddr = itp_get_byaddr_dummy; 616 rw_exit(&itp_get_byaddr_rw_lock); 617 } 618 619 /* 620 * Sort algorithm lists. 621 * 622 * I may need to split this based on 623 * authentication/encryption, and I may wish to have an administrator 624 * configure this list. Hold on to some NDD variables... 625 * 626 * XXX For now, sort on minimum key size (GAG!). While minimum key size is 627 * not the ideal metric, it's the only quantifiable measure available. 628 * We need a better metric for sorting algorithms by preference. 629 */ 630 static void 631 alg_insert_sortlist(enum ipsec_algtype at, uint8_t algid) 632 { 633 ipsec_alginfo_t *ai = ipsec_alglists[at][algid]; 634 uint8_t holder, swap; 635 uint_t i; 636 uint_t count = ipsec_nalgs[at]; 637 ASSERT(ai != NULL); 638 ASSERT(algid == ai->alg_id); 639 640 ASSERT(MUTEX_HELD(&alg_lock)); 641 642 holder = algid; 643 644 for (i = 0; i < count - 1; i++) { 645 ipsec_alginfo_t *alt; 646 647 alt = ipsec_alglists[at][ipsec_sortlist[at][i]]; 648 /* 649 * If you want to give precedence to newly added algs, 650 * add the = in the > comparison. 651 */ 652 if ((holder != algid) || (ai->alg_minbits > alt->alg_minbits)) { 653 /* Swap sortlist[i] and holder. */ 654 swap = ipsec_sortlist[at][i]; 655 ipsec_sortlist[at][i] = holder; 656 holder = swap; 657 ai = alt; 658 } /* Else just continue. */ 659 } 660 661 /* Store holder in last slot. */ 662 ipsec_sortlist[at][i] = holder; 663 } 664 665 /* 666 * Remove an algorithm from a sorted algorithm list. 667 * This should be considerably easier, even with complex sorting. 668 */ 669 static void 670 alg_remove_sortlist(enum ipsec_algtype at, uint8_t algid) 671 { 672 boolean_t copyback = B_FALSE; 673 int i; 674 int newcount = ipsec_nalgs[at]; 675 676 ASSERT(MUTEX_HELD(&alg_lock)); 677 678 for (i = 0; i <= newcount; i++) { 679 if (copyback) 680 ipsec_sortlist[at][i-1] = ipsec_sortlist[at][i]; 681 else if (ipsec_sortlist[at][i] == algid) 682 copyback = B_TRUE; 683 } 684 } 685 686 /* 687 * Add the specified algorithm to the algorithm tables. 688 * Must be called while holding the algorithm table writer lock. 689 */ 690 void 691 ipsec_alg_reg(ipsec_algtype_t algtype, ipsec_alginfo_t *alg) 692 { 693 ASSERT(MUTEX_HELD(&alg_lock)); 694 695 ASSERT(ipsec_alglists[algtype][alg->alg_id] == NULL); 696 ipsec_alg_fix_min_max(alg, algtype); 697 ipsec_alglists[algtype][alg->alg_id] = alg; 698 699 ipsec_nalgs[algtype]++; 700 alg_insert_sortlist(algtype, alg->alg_id); 701 } 702 703 /* 704 * Remove the specified algorithm from the algorithm tables. 705 * Must be called while holding the algorithm table writer lock. 706 */ 707 void 708 ipsec_alg_unreg(ipsec_algtype_t algtype, uint8_t algid) 709 { 710 ASSERT(MUTEX_HELD(&alg_lock)); 711 712 ASSERT(ipsec_alglists[algtype][algid] != NULL); 713 ipsec_alg_free(ipsec_alglists[algtype][algid]); 714 ipsec_alglists[algtype][algid] = NULL; 715 716 ipsec_nalgs[algtype]--; 717 alg_remove_sortlist(algtype, algid); 718 } 719 720 /* 721 * Hooks for spdsock to get a grip on system policy. 722 */ 723 724 ipsec_policy_head_t * 725 ipsec_system_policy(void) 726 { 727 ipsec_policy_head_t *h = &system_policy; 728 IPPH_REFHOLD(h); 729 return (h); 730 } 731 732 ipsec_policy_head_t * 733 ipsec_inactive_policy(void) 734 { 735 ipsec_policy_head_t *h = &inactive_policy; 736 IPPH_REFHOLD(h); 737 return (h); 738 } 739 740 /* 741 * Lock inactive policy, then active policy, then exchange policy root 742 * pointers. 743 */ 744 void 745 ipsec_swap_policy(ipsec_policy_head_t *active, ipsec_policy_head_t *inactive) 746 { 747 int af, dir; 748 avl_tree_t r1, r2; 749 750 rw_enter(&inactive->iph_lock, RW_WRITER); 751 rw_enter(&active->iph_lock, RW_WRITER); 752 753 r1 = active->iph_rulebyid; 754 r2 = inactive->iph_rulebyid; 755 active->iph_rulebyid = r2; 756 inactive->iph_rulebyid = r1; 757 758 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 759 ipsec_policy_hash_t *h1, *h2; 760 761 h1 = active->iph_root[dir].ipr_hash; 762 h2 = inactive->iph_root[dir].ipr_hash; 763 active->iph_root[dir].ipr_hash = h2; 764 inactive->iph_root[dir].ipr_hash = h1; 765 766 for (af = 0; af < IPSEC_NAF; af++) { 767 ipsec_policy_t *t1, *t2; 768 769 t1 = active->iph_root[dir].ipr_nonhash[af]; 770 t2 = inactive->iph_root[dir].ipr_nonhash[af]; 771 active->iph_root[dir].ipr_nonhash[af] = t2; 772 inactive->iph_root[dir].ipr_nonhash[af] = t1; 773 if (t1 != NULL) { 774 t1->ipsp_hash.hash_pp = 775 &(inactive->iph_root[dir].ipr_nonhash[af]); 776 } 777 if (t2 != NULL) { 778 t2->ipsp_hash.hash_pp = 779 &(active->iph_root[dir].ipr_nonhash[af]); 780 } 781 782 } 783 } 784 active->iph_gen++; 785 inactive->iph_gen++; 786 ipsec_update_present_flags(); 787 rw_exit(&active->iph_lock); 788 rw_exit(&inactive->iph_lock); 789 } 790 791 /* 792 * Swap global policy primary/secondary. 793 */ 794 void 795 ipsec_swap_global_policy(void) 796 { 797 ipsec_swap_policy(&system_policy, &inactive_policy); 798 } 799 800 /* 801 * Clone one policy rule.. 802 */ 803 static ipsec_policy_t * 804 ipsec_copy_policy(const ipsec_policy_t *src) 805 { 806 ipsec_policy_t *dst = kmem_cache_alloc(ipsec_pol_cache, KM_NOSLEEP); 807 808 if (dst == NULL) 809 return (NULL); 810 811 /* 812 * Adjust refcounts of cloned state. 813 */ 814 IPACT_REFHOLD(src->ipsp_act); 815 src->ipsp_sel->ipsl_refs++; 816 817 HASH_NULL(dst, ipsp_hash); 818 dst->ipsp_refs = 1; 819 dst->ipsp_sel = src->ipsp_sel; 820 dst->ipsp_act = src->ipsp_act; 821 dst->ipsp_prio = src->ipsp_prio; 822 dst->ipsp_index = src->ipsp_index; 823 824 return (dst); 825 } 826 827 void 828 ipsec_insert_always(avl_tree_t *tree, void *new_node) 829 { 830 void *node; 831 avl_index_t where; 832 833 node = avl_find(tree, new_node, &where); 834 ASSERT(node == NULL); 835 avl_insert(tree, new_node, where); 836 } 837 838 839 static int 840 ipsec_copy_chain(ipsec_policy_head_t *dph, ipsec_policy_t *src, 841 ipsec_policy_t **dstp) 842 { 843 for (; src != NULL; src = src->ipsp_hash.hash_next) { 844 ipsec_policy_t *dst = ipsec_copy_policy(src); 845 if (dst == NULL) 846 return (ENOMEM); 847 848 HASHLIST_INSERT(dst, ipsp_hash, *dstp); 849 ipsec_insert_always(&dph->iph_rulebyid, dst); 850 } 851 return (0); 852 } 853 854 855 856 /* 857 * Make one policy head look exactly like another. 858 * 859 * As with ipsec_swap_policy, we lock the destination policy head first, then 860 * the source policy head. Note that we only need to read-lock the source 861 * policy head as we are not changing it. 862 */ 863 int 864 ipsec_copy_polhead(ipsec_policy_head_t *sph, ipsec_policy_head_t *dph) 865 { 866 int af, dir, chain, nchains; 867 868 rw_enter(&dph->iph_lock, RW_WRITER); 869 870 ipsec_polhead_flush(dph); 871 872 rw_enter(&sph->iph_lock, RW_READER); 873 874 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 875 ipsec_policy_root_t *dpr = &dph->iph_root[dir]; 876 ipsec_policy_root_t *spr = &sph->iph_root[dir]; 877 nchains = dpr->ipr_nchains; 878 879 ASSERT(dpr->ipr_nchains == spr->ipr_nchains); 880 881 for (af = 0; af < IPSEC_NAF; af++) { 882 if (ipsec_copy_chain(dph, spr->ipr_nonhash[af], 883 &dpr->ipr_nonhash[af])) 884 goto abort_copy; 885 } 886 887 for (chain = 0; chain < nchains; chain++) { 888 if (ipsec_copy_chain(dph, 889 spr->ipr_hash[chain].hash_head, 890 &dpr->ipr_hash[chain].hash_head)) 891 goto abort_copy; 892 } 893 } 894 895 dph->iph_gen++; 896 897 rw_exit(&sph->iph_lock); 898 rw_exit(&dph->iph_lock); 899 return (0); 900 901 abort_copy: 902 ipsec_polhead_flush(dph); 903 rw_exit(&sph->iph_lock); 904 rw_exit(&dph->iph_lock); 905 return (ENOMEM); 906 } 907 908 /* 909 * Clone currently active policy to the inactive policy list. 910 */ 911 int 912 ipsec_clone_system_policy(void) 913 { 914 return (ipsec_copy_polhead(&system_policy, &inactive_policy)); 915 } 916 917 /* 918 * Generic "do we have IPvN policy" answer. 919 */ 920 boolean_t 921 iph_ipvN(ipsec_policy_head_t *iph, boolean_t v6) 922 { 923 int i, hval; 924 uint32_t valbit; 925 ipsec_policy_root_t *ipr; 926 ipsec_policy_t *ipp; 927 928 if (v6) { 929 valbit = IPSL_IPV6; 930 hval = IPSEC_AF_V6; 931 } else { 932 valbit = IPSL_IPV4; 933 hval = IPSEC_AF_V4; 934 } 935 936 ASSERT(RW_LOCK_HELD(&iph->iph_lock)); 937 for (ipr = iph->iph_root; ipr < &(iph->iph_root[IPSEC_NTYPES]); ipr++) { 938 if (ipr->ipr_nonhash[hval] != NULL) 939 return (B_TRUE); 940 for (i = 0; i < ipr->ipr_nchains; i++) { 941 for (ipp = ipr->ipr_hash[i].hash_head; ipp != NULL; 942 ipp = ipp->ipsp_hash.hash_next) { 943 if (ipp->ipsp_sel->ipsl_key.ipsl_valid & valbit) 944 return (B_TRUE); 945 } 946 } 947 } 948 949 return (B_FALSE); 950 } 951 952 /* 953 * Extract the string from ipsec_policy_failure_msgs[type] and 954 * log it. 955 * 956 */ 957 void 958 ipsec_log_policy_failure(int type, char *func_name, ipha_t *ipha, ip6_t *ip6h, 959 boolean_t secure) 960 { 961 char sbuf[INET6_ADDRSTRLEN]; 962 char dbuf[INET6_ADDRSTRLEN]; 963 char *s; 964 char *d; 965 966 ASSERT((ipha == NULL && ip6h != NULL) || 967 (ip6h == NULL && ipha != NULL)); 968 969 if (ipha != NULL) { 970 s = inet_ntop(AF_INET, &ipha->ipha_src, sbuf, sizeof (sbuf)); 971 d = inet_ntop(AF_INET, &ipha->ipha_dst, dbuf, sizeof (dbuf)); 972 } else { 973 s = inet_ntop(AF_INET6, &ip6h->ip6_src, sbuf, sizeof (sbuf)); 974 d = inet_ntop(AF_INET6, &ip6h->ip6_dst, dbuf, sizeof (dbuf)); 975 976 } 977 978 /* Always bump the policy failure counter. */ 979 ipsec_policy_failure_count[type]++; 980 981 ipsec_rl_strlog(IP_MOD_ID, 0, 0, SL_ERROR|SL_WARN|SL_CONSOLE, 982 ipsec_policy_failure_msgs[type], func_name, 983 (secure ? "secure" : "not secure"), s, d); 984 } 985 986 /* 987 * Rate-limiting front-end to strlog() for AH and ESP. Uses the ndd variables 988 * in /dev/ip and the same rate-limiting clock so that there's a single 989 * knob to turn to throttle the rate of messages. 990 */ 991 void 992 ipsec_rl_strlog(short mid, short sid, char level, ushort_t sl, char *fmt, ...) 993 { 994 va_list adx; 995 hrtime_t current = gethrtime(); 996 997 sl |= SL_CONSOLE; 998 /* 999 * Throttle logging to stop syslog from being swamped. If variable 1000 * 'ipsec_policy_log_interval' is zero, don't log any messages at 1001 * all, otherwise log only one message every 'ipsec_policy_log_interval' 1002 * msec. Convert interval (in msec) to hrtime (in nsec). 1003 */ 1004 1005 if (ipsec_policy_log_interval) { 1006 if (ipsec_policy_failure_last + 1007 ((hrtime_t)ipsec_policy_log_interval * (hrtime_t)1000000) <= 1008 current) { 1009 va_start(adx, fmt); 1010 (void) vstrlog(mid, sid, level, sl, fmt, adx); 1011 va_end(adx); 1012 ipsec_policy_failure_last = current; 1013 } 1014 } 1015 } 1016 1017 void 1018 ipsec_config_flush() 1019 { 1020 rw_enter(&system_policy.iph_lock, RW_WRITER); 1021 ipsec_polhead_flush(&system_policy); 1022 ipsec_next_policy_index = 1; 1023 rw_exit(&system_policy.iph_lock); 1024 ipsec_action_reclaim(0); 1025 } 1026 1027 /* 1028 * Clip a policy's min/max keybits vs. the capabilities of the 1029 * algorithm. 1030 */ 1031 static void 1032 act_alg_adjust(uint_t algtype, uint_t algid, 1033 uint16_t *minbits, uint16_t *maxbits) 1034 { 1035 ipsec_alginfo_t *algp = ipsec_alglists[algtype][algid]; 1036 if (algp != NULL) { 1037 /* 1038 * If passed-in minbits is zero, we assume the caller trusts 1039 * us with setting the minimum key size. We pick the 1040 * algorithms DEFAULT key size for the minimum in this case. 1041 */ 1042 if (*minbits == 0) { 1043 *minbits = algp->alg_default_bits; 1044 ASSERT(*minbits >= algp->alg_minbits); 1045 } else { 1046 *minbits = MAX(MIN(*minbits, algp->alg_maxbits), 1047 algp->alg_minbits); 1048 } 1049 if (*maxbits == 0) 1050 *maxbits = algp->alg_maxbits; 1051 else 1052 *maxbits = MIN(MAX(*maxbits, algp->alg_minbits), 1053 algp->alg_maxbits); 1054 ASSERT(*minbits <= *maxbits); 1055 } else { 1056 *minbits = 0; 1057 *maxbits = 0; 1058 } 1059 } 1060 1061 /* 1062 * Check an action's requested algorithms against the algorithms currently 1063 * loaded in the system. 1064 */ 1065 boolean_t 1066 ipsec_check_action(ipsec_act_t *act, int *diag) 1067 { 1068 ipsec_prot_t *ipp; 1069 1070 ipp = &act->ipa_apply; 1071 1072 if (ipp->ipp_use_ah && 1073 ipsec_alglists[IPSEC_ALG_AUTH][ipp->ipp_auth_alg] == NULL) { 1074 *diag = SPD_DIAGNOSTIC_UNSUPP_AH_ALG; 1075 return (B_FALSE); 1076 } 1077 if (ipp->ipp_use_espa && 1078 ipsec_alglists[IPSEC_ALG_AUTH][ipp->ipp_esp_auth_alg] == NULL) { 1079 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_AUTH_ALG; 1080 return (B_FALSE); 1081 } 1082 if (ipp->ipp_use_esp && 1083 ipsec_alglists[IPSEC_ALG_ENCR][ipp->ipp_encr_alg] == NULL) { 1084 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_ENCR_ALG; 1085 return (B_FALSE); 1086 } 1087 1088 act_alg_adjust(IPSEC_ALG_AUTH, ipp->ipp_auth_alg, 1089 &ipp->ipp_ah_minbits, &ipp->ipp_ah_maxbits); 1090 act_alg_adjust(IPSEC_ALG_AUTH, ipp->ipp_esp_auth_alg, 1091 &ipp->ipp_espa_minbits, &ipp->ipp_espa_maxbits); 1092 act_alg_adjust(IPSEC_ALG_ENCR, ipp->ipp_encr_alg, 1093 &ipp->ipp_espe_minbits, &ipp->ipp_espe_maxbits); 1094 1095 if (ipp->ipp_ah_minbits > ipp->ipp_ah_maxbits) { 1096 *diag = SPD_DIAGNOSTIC_UNSUPP_AH_KEYSIZE; 1097 return (B_FALSE); 1098 } 1099 if (ipp->ipp_espa_minbits > ipp->ipp_espa_maxbits) { 1100 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_AUTH_KEYSIZE; 1101 return (B_FALSE); 1102 } 1103 if (ipp->ipp_espe_minbits > ipp->ipp_espe_maxbits) { 1104 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_ENCR_KEYSIZE; 1105 return (B_FALSE); 1106 } 1107 /* TODO: sanity check lifetimes */ 1108 return (B_TRUE); 1109 } 1110 1111 /* 1112 * Set up a single action during wildcard expansion.. 1113 */ 1114 static void 1115 ipsec_setup_act(ipsec_act_t *outact, ipsec_act_t *act, 1116 uint_t auth_alg, uint_t encr_alg, uint_t eauth_alg) 1117 { 1118 ipsec_prot_t *ipp; 1119 1120 *outact = *act; 1121 ipp = &outact->ipa_apply; 1122 ipp->ipp_auth_alg = (uint8_t)auth_alg; 1123 ipp->ipp_encr_alg = (uint8_t)encr_alg; 1124 ipp->ipp_esp_auth_alg = (uint8_t)eauth_alg; 1125 1126 act_alg_adjust(IPSEC_ALG_AUTH, auth_alg, 1127 &ipp->ipp_ah_minbits, &ipp->ipp_ah_maxbits); 1128 act_alg_adjust(IPSEC_ALG_AUTH, eauth_alg, 1129 &ipp->ipp_espa_minbits, &ipp->ipp_espa_maxbits); 1130 act_alg_adjust(IPSEC_ALG_ENCR, encr_alg, 1131 &ipp->ipp_espe_minbits, &ipp->ipp_espe_maxbits); 1132 } 1133 1134 /* 1135 * combinatoric expansion time: expand a wildcarded action into an 1136 * array of wildcarded actions; we return the exploded action list, 1137 * and return a count in *nact (output only). 1138 */ 1139 static ipsec_act_t * 1140 ipsec_act_wildcard_expand(ipsec_act_t *act, uint_t *nact) 1141 { 1142 boolean_t use_ah, use_esp, use_espa; 1143 boolean_t wild_auth, wild_encr, wild_eauth; 1144 uint_t auth_alg, auth_idx, auth_min, auth_max; 1145 uint_t eauth_alg, eauth_idx, eauth_min, eauth_max; 1146 uint_t encr_alg, encr_idx, encr_min, encr_max; 1147 uint_t action_count, ai; 1148 ipsec_act_t *outact; 1149 1150 if (act->ipa_type != IPSEC_ACT_APPLY) { 1151 outact = kmem_alloc(sizeof (*act), KM_NOSLEEP); 1152 *nact = 1; 1153 if (outact != NULL) 1154 bcopy(act, outact, sizeof (*act)); 1155 return (outact); 1156 } 1157 /* 1158 * compute the combinatoric explosion.. 1159 * 1160 * we assume a request for encr if esp_req is PREF_REQUIRED 1161 * we assume a request for ah auth if ah_req is PREF_REQUIRED. 1162 * we assume a request for esp auth if !ah and esp_req is PREF_REQUIRED 1163 */ 1164 1165 use_ah = act->ipa_apply.ipp_use_ah; 1166 use_esp = act->ipa_apply.ipp_use_esp; 1167 use_espa = act->ipa_apply.ipp_use_espa; 1168 auth_alg = act->ipa_apply.ipp_auth_alg; 1169 eauth_alg = act->ipa_apply.ipp_esp_auth_alg; 1170 encr_alg = act->ipa_apply.ipp_encr_alg; 1171 1172 wild_auth = use_ah && (auth_alg == 0); 1173 wild_eauth = use_espa && (eauth_alg == 0); 1174 wild_encr = use_esp && (encr_alg == 0); 1175 1176 action_count = 1; 1177 auth_min = auth_max = auth_alg; 1178 eauth_min = eauth_max = eauth_alg; 1179 encr_min = encr_max = encr_alg; 1180 1181 /* 1182 * set up for explosion.. for each dimension, expand output 1183 * size by the explosion factor. 1184 * 1185 * Don't include the "any" algorithms, if defined, as no 1186 * kernel policies should be set for these algorithms. 1187 */ 1188 1189 #define SET_EXP_MINMAX(type, wild, alg, min, max) if (wild) { \ 1190 int nalgs = ipsec_nalgs[type]; \ 1191 if (ipsec_alglists[type][alg] != NULL) \ 1192 nalgs--; \ 1193 action_count *= nalgs; \ 1194 min = 0; \ 1195 max = ipsec_nalgs[type] - 1; \ 1196 } 1197 1198 SET_EXP_MINMAX(IPSEC_ALG_AUTH, wild_auth, SADB_AALG_NONE, 1199 auth_min, auth_max); 1200 SET_EXP_MINMAX(IPSEC_ALG_AUTH, wild_eauth, SADB_AALG_NONE, 1201 eauth_min, eauth_max); 1202 SET_EXP_MINMAX(IPSEC_ALG_ENCR, wild_encr, SADB_EALG_NONE, 1203 encr_min, encr_max); 1204 1205 #undef SET_EXP_MINMAX 1206 1207 /* 1208 * ok, allocate the whole mess.. 1209 */ 1210 1211 outact = kmem_alloc(sizeof (*outact) * action_count, KM_NOSLEEP); 1212 if (outact == NULL) 1213 return (NULL); 1214 1215 /* 1216 * Now compute all combinations. Note that non-wildcarded 1217 * dimensions just get a single value from auth_min, while 1218 * wildcarded dimensions indirect through the sortlist. 1219 * 1220 * We do encryption outermost since, at this time, there's 1221 * greater difference in security and performance between 1222 * encryption algorithms vs. authentication algorithms. 1223 */ 1224 1225 ai = 0; 1226 1227 #define WHICH_ALG(type, wild, idx) ((wild)?(ipsec_sortlist[type][idx]):(idx)) 1228 1229 for (encr_idx = encr_min; encr_idx <= encr_max; encr_idx++) { 1230 encr_alg = WHICH_ALG(IPSEC_ALG_ENCR, wild_encr, encr_idx); 1231 if (wild_encr && encr_alg == SADB_EALG_NONE) 1232 continue; 1233 for (auth_idx = auth_min; auth_idx <= auth_max; auth_idx++) { 1234 auth_alg = WHICH_ALG(IPSEC_ALG_AUTH, wild_auth, 1235 auth_idx); 1236 if (wild_auth && auth_alg == SADB_AALG_NONE) 1237 continue; 1238 for (eauth_idx = eauth_min; eauth_idx <= eauth_max; 1239 eauth_idx++) { 1240 eauth_alg = WHICH_ALG(IPSEC_ALG_AUTH, 1241 wild_eauth, eauth_idx); 1242 if (wild_eauth && eauth_alg == SADB_AALG_NONE) 1243 continue; 1244 1245 ipsec_setup_act(&outact[ai], act, 1246 auth_alg, encr_alg, eauth_alg); 1247 ai++; 1248 } 1249 } 1250 } 1251 1252 #undef WHICH_ALG 1253 1254 ASSERT(ai == action_count); 1255 *nact = action_count; 1256 return (outact); 1257 } 1258 1259 /* 1260 * Extract the parts of an ipsec_prot_t from an old-style ipsec_req_t. 1261 */ 1262 static void 1263 ipsec_prot_from_req(ipsec_req_t *req, ipsec_prot_t *ipp) 1264 { 1265 bzero(ipp, sizeof (*ipp)); 1266 /* 1267 * ipp_use_* are bitfields. Look at "!!" in the following as a 1268 * "boolean canonicalization" operator. 1269 */ 1270 ipp->ipp_use_ah = !!(req->ipsr_ah_req & IPSEC_PREF_REQUIRED); 1271 ipp->ipp_use_esp = !!(req->ipsr_esp_req & IPSEC_PREF_REQUIRED); 1272 ipp->ipp_use_espa = !!(req->ipsr_esp_auth_alg) || !ipp->ipp_use_ah; 1273 ipp->ipp_use_se = !!(req->ipsr_self_encap_req & IPSEC_PREF_REQUIRED); 1274 ipp->ipp_use_unique = !!((req->ipsr_ah_req|req->ipsr_esp_req) & 1275 IPSEC_PREF_UNIQUE); 1276 ipp->ipp_encr_alg = req->ipsr_esp_alg; 1277 ipp->ipp_auth_alg = req->ipsr_auth_alg; 1278 ipp->ipp_esp_auth_alg = req->ipsr_esp_auth_alg; 1279 } 1280 1281 /* 1282 * Extract a new-style action from a request. 1283 */ 1284 void 1285 ipsec_actvec_from_req(ipsec_req_t *req, ipsec_act_t **actp, uint_t *nactp) 1286 { 1287 struct ipsec_act act; 1288 bzero(&act, sizeof (act)); 1289 if ((req->ipsr_ah_req & IPSEC_PREF_NEVER) && 1290 (req->ipsr_esp_req & IPSEC_PREF_NEVER)) { 1291 act.ipa_type = IPSEC_ACT_BYPASS; 1292 } else { 1293 act.ipa_type = IPSEC_ACT_APPLY; 1294 ipsec_prot_from_req(req, &act.ipa_apply); 1295 } 1296 *actp = ipsec_act_wildcard_expand(&act, nactp); 1297 } 1298 1299 /* 1300 * Convert a new-style "prot" back to an ipsec_req_t (more backwards compat). 1301 * We assume caller has already zero'ed *req for us. 1302 */ 1303 static int 1304 ipsec_req_from_prot(ipsec_prot_t *ipp, ipsec_req_t *req) 1305 { 1306 req->ipsr_esp_alg = ipp->ipp_encr_alg; 1307 req->ipsr_auth_alg = ipp->ipp_auth_alg; 1308 req->ipsr_esp_auth_alg = ipp->ipp_esp_auth_alg; 1309 1310 if (ipp->ipp_use_unique) { 1311 req->ipsr_ah_req |= IPSEC_PREF_UNIQUE; 1312 req->ipsr_esp_req |= IPSEC_PREF_UNIQUE; 1313 } 1314 if (ipp->ipp_use_se) 1315 req->ipsr_self_encap_req |= IPSEC_PREF_REQUIRED; 1316 if (ipp->ipp_use_ah) 1317 req->ipsr_ah_req |= IPSEC_PREF_REQUIRED; 1318 if (ipp->ipp_use_esp) 1319 req->ipsr_esp_req |= IPSEC_PREF_REQUIRED; 1320 return (sizeof (*req)); 1321 } 1322 1323 /* 1324 * Convert a new-style action back to an ipsec_req_t (more backwards compat). 1325 * We assume caller has already zero'ed *req for us. 1326 */ 1327 static int 1328 ipsec_req_from_act(ipsec_action_t *ap, ipsec_req_t *req) 1329 { 1330 switch (ap->ipa_act.ipa_type) { 1331 case IPSEC_ACT_BYPASS: 1332 req->ipsr_ah_req = IPSEC_PREF_NEVER; 1333 req->ipsr_esp_req = IPSEC_PREF_NEVER; 1334 return (sizeof (*req)); 1335 case IPSEC_ACT_APPLY: 1336 return (ipsec_req_from_prot(&ap->ipa_act.ipa_apply, req)); 1337 } 1338 return (sizeof (*req)); 1339 } 1340 1341 /* 1342 * Convert a new-style action back to an ipsec_req_t (more backwards compat). 1343 * We assume caller has already zero'ed *req for us. 1344 */ 1345 int 1346 ipsec_req_from_head(ipsec_policy_head_t *ph, ipsec_req_t *req, int af) 1347 { 1348 ipsec_policy_t *p; 1349 1350 /* 1351 * FULL-PERSOCK: consult hash table, too? 1352 */ 1353 for (p = ph->iph_root[IPSEC_INBOUND].ipr_nonhash[af]; 1354 p != NULL; 1355 p = p->ipsp_hash.hash_next) { 1356 if ((p->ipsp_sel->ipsl_key.ipsl_valid & IPSL_WILDCARD) == 0) 1357 return (ipsec_req_from_act(p->ipsp_act, req)); 1358 } 1359 return (sizeof (*req)); 1360 } 1361 1362 /* 1363 * Based on per-socket or latched policy, convert to an appropriate 1364 * IP_SEC_OPT ipsec_req_t for the socket option; return size so we can 1365 * be tail-called from ip. 1366 */ 1367 int 1368 ipsec_req_from_conn(conn_t *connp, ipsec_req_t *req, int af) 1369 { 1370 ipsec_latch_t *ipl; 1371 int rv = sizeof (ipsec_req_t); 1372 1373 bzero(req, sizeof (*req)); 1374 1375 mutex_enter(&connp->conn_lock); 1376 ipl = connp->conn_latch; 1377 1378 /* 1379 * Find appropriate policy. First choice is latched action; 1380 * failing that, see latched policy; failing that, 1381 * look at configured policy. 1382 */ 1383 if (ipl != NULL) { 1384 if (ipl->ipl_in_action != NULL) { 1385 rv = ipsec_req_from_act(ipl->ipl_in_action, req); 1386 goto done; 1387 } 1388 if (ipl->ipl_in_policy != NULL) { 1389 rv = ipsec_req_from_act(ipl->ipl_in_policy->ipsp_act, 1390 req); 1391 goto done; 1392 } 1393 } 1394 if (connp->conn_policy != NULL) 1395 rv = ipsec_req_from_head(connp->conn_policy, req, af); 1396 done: 1397 mutex_exit(&connp->conn_lock); 1398 return (rv); 1399 } 1400 1401 void 1402 ipsec_actvec_free(ipsec_act_t *act, uint_t nact) 1403 { 1404 kmem_free(act, nact * sizeof (*act)); 1405 } 1406 1407 /* 1408 * When outbound policy is not cached, look it up the hard way and attach 1409 * an ipsec_out_t to the packet.. 1410 */ 1411 static mblk_t * 1412 ipsec_attach_global_policy(mblk_t *mp, conn_t *connp, ipsec_selector_t *sel) 1413 { 1414 ipsec_policy_t *p; 1415 1416 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, sel); 1417 1418 if (p == NULL) 1419 return (NULL); 1420 return (ipsec_attach_ipsec_out(mp, connp, p, sel->ips_protocol)); 1421 } 1422 1423 /* 1424 * We have an ipsec_out already, but don't have cached policy; fill it in 1425 * with the right actions. 1426 */ 1427 static mblk_t * 1428 ipsec_apply_global_policy(mblk_t *ipsec_mp, conn_t *connp, 1429 ipsec_selector_t *sel) 1430 { 1431 ipsec_out_t *io; 1432 ipsec_policy_t *p; 1433 1434 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 1435 ASSERT(ipsec_mp->b_cont->b_datap->db_type == M_DATA); 1436 1437 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1438 1439 if (io->ipsec_out_policy == NULL) { 1440 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, io, sel); 1441 io->ipsec_out_policy = p; 1442 } 1443 return (ipsec_mp); 1444 } 1445 1446 1447 /* 1448 * Consumes a reference to ipsp. 1449 */ 1450 static mblk_t * 1451 ipsec_check_loopback_policy(mblk_t *first_mp, boolean_t mctl_present, 1452 ipsec_policy_t *ipsp) 1453 { 1454 mblk_t *ipsec_mp; 1455 ipsec_in_t *ii; 1456 1457 if (!mctl_present) 1458 return (first_mp); 1459 1460 ipsec_mp = first_mp; 1461 1462 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1463 ASSERT(ii->ipsec_in_loopback); 1464 IPPOL_REFRELE(ipsp); 1465 1466 /* 1467 * We should do an actual policy check here. Revisit this 1468 * when we revisit the IPsec API. (And pass a conn_t in when we 1469 * get there.) 1470 */ 1471 1472 return (first_mp); 1473 } 1474 1475 /* 1476 * Check that packet's inbound ports & proto match the selectors 1477 * expected by the SAs it traversed on the way in. 1478 */ 1479 static boolean_t 1480 ipsec_check_ipsecin_unique(ipsec_in_t *ii, const char **reason, 1481 kstat_named_t **counter, uint64_t pkt_unique) 1482 { 1483 uint64_t ah_mask, esp_mask; 1484 ipsa_t *ah_assoc; 1485 ipsa_t *esp_assoc; 1486 1487 ASSERT(ii->ipsec_in_secure); 1488 ASSERT(!ii->ipsec_in_loopback); 1489 1490 ah_assoc = ii->ipsec_in_ah_sa; 1491 esp_assoc = ii->ipsec_in_esp_sa; 1492 ASSERT((ah_assoc != NULL) || (esp_assoc != NULL)); 1493 1494 ah_mask = (ah_assoc != NULL) ? ah_assoc->ipsa_unique_mask : 0; 1495 esp_mask = (esp_assoc != NULL) ? esp_assoc->ipsa_unique_mask : 0; 1496 1497 if ((ah_mask == 0) && (esp_mask == 0)) 1498 return (B_TRUE); 1499 1500 /* 1501 * The pkt_unique check will also check for tunnel mode on the SA 1502 * vs. the tunneled_packet boolean. "Be liberal in what you receive" 1503 * should not apply in this case. ;) 1504 */ 1505 1506 if (ah_mask != 0 && 1507 ah_assoc->ipsa_unique_id != (pkt_unique & ah_mask)) { 1508 *reason = "AH inner header mismatch"; 1509 *counter = &ipdrops_spd_ah_innermismatch; 1510 return (B_FALSE); 1511 } 1512 if (esp_mask != 0 && 1513 esp_assoc->ipsa_unique_id != (pkt_unique & esp_mask)) { 1514 *reason = "ESP inner header mismatch"; 1515 *counter = &ipdrops_spd_esp_innermismatch; 1516 return (B_FALSE); 1517 } 1518 return (B_TRUE); 1519 } 1520 1521 static boolean_t 1522 ipsec_check_ipsecin_action(ipsec_in_t *ii, mblk_t *mp, ipsec_action_t *ap, 1523 ipha_t *ipha, ip6_t *ip6h, const char **reason, kstat_named_t **counter) 1524 { 1525 boolean_t ret = B_TRUE; 1526 ipsec_prot_t *ipp; 1527 ipsa_t *ah_assoc; 1528 ipsa_t *esp_assoc; 1529 boolean_t decaps; 1530 1531 ASSERT((ipha == NULL && ip6h != NULL) || 1532 (ip6h == NULL && ipha != NULL)); 1533 1534 if (ii->ipsec_in_loopback) { 1535 /* 1536 * Besides accepting pointer-equivalent actions, we also 1537 * accept any ICMP errors we generated for ourselves, 1538 * regardless of policy. If we do not wish to make this 1539 * assumption in the future, check here, and where 1540 * icmp_loopback is initialized in ip.c and ip6.c. (Look for 1541 * ipsec_out_icmp_loopback.) 1542 */ 1543 if (ap == ii->ipsec_in_action || ii->ipsec_in_icmp_loopback) 1544 return (B_TRUE); 1545 1546 /* Deep compare necessary here?? */ 1547 *counter = &ipdrops_spd_loopback_mismatch; 1548 *reason = "loopback policy mismatch"; 1549 return (B_FALSE); 1550 } 1551 ASSERT(!ii->ipsec_in_icmp_loopback); 1552 1553 ah_assoc = ii->ipsec_in_ah_sa; 1554 esp_assoc = ii->ipsec_in_esp_sa; 1555 1556 decaps = ii->ipsec_in_decaps; 1557 1558 switch (ap->ipa_act.ipa_type) { 1559 case IPSEC_ACT_DISCARD: 1560 case IPSEC_ACT_REJECT: 1561 /* Should "fail hard" */ 1562 *counter = &ipdrops_spd_explicit; 1563 *reason = "blocked by policy"; 1564 return (B_FALSE); 1565 1566 case IPSEC_ACT_BYPASS: 1567 case IPSEC_ACT_CLEAR: 1568 *counter = &ipdrops_spd_got_secure; 1569 *reason = "expected clear, got protected"; 1570 return (B_FALSE); 1571 1572 case IPSEC_ACT_APPLY: 1573 ipp = &ap->ipa_act.ipa_apply; 1574 /* 1575 * As of now we do the simple checks of whether 1576 * the datagram has gone through the required IPSEC 1577 * protocol constraints or not. We might have more 1578 * in the future like sensitive levels, key bits, etc. 1579 * If it fails the constraints, check whether we would 1580 * have accepted this if it had come in clear. 1581 */ 1582 if (ipp->ipp_use_ah) { 1583 if (ah_assoc == NULL) { 1584 ret = ipsec_inbound_accept_clear(mp, ipha, 1585 ip6h); 1586 *counter = &ipdrops_spd_got_clear; 1587 *reason = "unprotected not accepted"; 1588 break; 1589 } 1590 ASSERT(ah_assoc != NULL); 1591 ASSERT(ipp->ipp_auth_alg != 0); 1592 1593 if (ah_assoc->ipsa_auth_alg != 1594 ipp->ipp_auth_alg) { 1595 *counter = &ipdrops_spd_bad_ahalg; 1596 *reason = "unacceptable ah alg"; 1597 ret = B_FALSE; 1598 break; 1599 } 1600 } else if (ah_assoc != NULL) { 1601 /* 1602 * Don't allow this. Check IPSEC NOTE above 1603 * ip_fanout_proto(). 1604 */ 1605 *counter = &ipdrops_spd_got_ah; 1606 *reason = "unexpected AH"; 1607 ret = B_FALSE; 1608 break; 1609 } 1610 if (ipp->ipp_use_esp) { 1611 if (esp_assoc == NULL) { 1612 ret = ipsec_inbound_accept_clear(mp, ipha, 1613 ip6h); 1614 *counter = &ipdrops_spd_got_clear; 1615 *reason = "unprotected not accepted"; 1616 break; 1617 } 1618 ASSERT(esp_assoc != NULL); 1619 ASSERT(ipp->ipp_encr_alg != 0); 1620 1621 if (esp_assoc->ipsa_encr_alg != 1622 ipp->ipp_encr_alg) { 1623 *counter = &ipdrops_spd_bad_espealg; 1624 *reason = "unacceptable esp alg"; 1625 ret = B_FALSE; 1626 break; 1627 } 1628 /* 1629 * If the client does not need authentication, 1630 * we don't verify the alogrithm. 1631 */ 1632 if (ipp->ipp_use_espa) { 1633 if (esp_assoc->ipsa_auth_alg != 1634 ipp->ipp_esp_auth_alg) { 1635 *counter = &ipdrops_spd_bad_espaalg; 1636 *reason = "unacceptable esp auth alg"; 1637 ret = B_FALSE; 1638 break; 1639 } 1640 } 1641 } else if (esp_assoc != NULL) { 1642 /* 1643 * Don't allow this. Check IPSEC NOTE above 1644 * ip_fanout_proto(). 1645 */ 1646 *counter = &ipdrops_spd_got_esp; 1647 *reason = "unexpected ESP"; 1648 ret = B_FALSE; 1649 break; 1650 } 1651 if (ipp->ipp_use_se) { 1652 if (!decaps) { 1653 ret = ipsec_inbound_accept_clear(mp, ipha, 1654 ip6h); 1655 if (!ret) { 1656 /* XXX mutant? */ 1657 *counter = &ipdrops_spd_bad_selfencap; 1658 *reason = "self encap not found"; 1659 break; 1660 } 1661 } 1662 } else if (decaps) { 1663 /* 1664 * XXX If the packet comes in tunneled and the 1665 * recipient does not expect it to be tunneled, it 1666 * is okay. But we drop to be consistent with the 1667 * other cases. 1668 */ 1669 *counter = &ipdrops_spd_got_selfencap; 1670 *reason = "unexpected self encap"; 1671 ret = B_FALSE; 1672 break; 1673 } 1674 if (ii->ipsec_in_action != NULL) { 1675 /* 1676 * This can happen if we do a double policy-check on 1677 * a packet 1678 * XXX XXX should fix this case! 1679 */ 1680 IPACT_REFRELE(ii->ipsec_in_action); 1681 } 1682 ASSERT(ii->ipsec_in_action == NULL); 1683 IPACT_REFHOLD(ap); 1684 ii->ipsec_in_action = ap; 1685 break; /* from switch */ 1686 } 1687 return (ret); 1688 } 1689 1690 static boolean_t 1691 spd_match_inbound_ids(ipsec_latch_t *ipl, ipsa_t *sa) 1692 { 1693 ASSERT(ipl->ipl_ids_latched == B_TRUE); 1694 return ipsid_equal(ipl->ipl_remote_cid, sa->ipsa_src_cid) && 1695 ipsid_equal(ipl->ipl_local_cid, sa->ipsa_dst_cid); 1696 } 1697 1698 /* 1699 * Takes a latched conn and an inbound packet and returns a unique_id suitable 1700 * for SA comparisons. Most of the time we will copy from the conn_t, but 1701 * there are cases when the conn_t is latched but it has wildcard selectors, 1702 * and then we need to fallback to scooping them out of the packet. 1703 * 1704 * Assume we'll never have 0 with a conn_t present, so use 0 as a failure. We 1705 * can get away with this because we only have non-zero ports/proto for 1706 * latched conn_ts. 1707 * 1708 * Ideal candidate for an "inline" keyword, as we're JUST convoluted enough 1709 * to not be a nice macro. 1710 */ 1711 static uint64_t 1712 conn_to_unique(conn_t *connp, mblk_t *data_mp, ipha_t *ipha, ip6_t *ip6h) 1713 { 1714 ipsec_selector_t sel; 1715 uint8_t ulp = connp->conn_ulp; 1716 1717 ASSERT(connp->conn_latch->ipl_in_policy != NULL); 1718 1719 if ((ulp == IPPROTO_TCP || ulp == IPPROTO_UDP || ulp == IPPROTO_SCTP) && 1720 (connp->conn_fport == 0 || connp->conn_lport == 0)) { 1721 /* Slow path - we gotta grab from the packet. */ 1722 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, 1723 SEL_NONE) != SELRET_SUCCESS) { 1724 /* Failure -> have caller free packet with ENOMEM. */ 1725 return (0); 1726 } 1727 return (SA_UNIQUE_ID(sel.ips_remote_port, sel.ips_local_port, 1728 sel.ips_protocol, 0)); 1729 } 1730 1731 #ifdef DEBUG_NOT_UNTIL_6478464 1732 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, SEL_NONE) == 1733 SELRET_SUCCESS) { 1734 ASSERT(sel.ips_local_port == connp->conn_lport); 1735 ASSERT(sel.ips_remote_port == connp->conn_fport); 1736 ASSERT(sel.ips_protocol == connp->conn_ulp); 1737 } 1738 ASSERT(connp->conn_ulp != 0); 1739 #endif 1740 1741 return (SA_UNIQUE_ID(connp->conn_fport, connp->conn_lport, ulp, 0)); 1742 } 1743 1744 /* 1745 * Called to check policy on a latched connection, both from this file 1746 * and from tcp.c 1747 */ 1748 boolean_t 1749 ipsec_check_ipsecin_latch(ipsec_in_t *ii, mblk_t *mp, ipsec_latch_t *ipl, 1750 ipha_t *ipha, ip6_t *ip6h, const char **reason, kstat_named_t **counter, 1751 conn_t *connp) 1752 { 1753 ASSERT(ipl->ipl_ids_latched == B_TRUE); 1754 1755 if (!ii->ipsec_in_loopback) { 1756 /* 1757 * Over loopback, there aren't real security associations, 1758 * so there are neither identities nor "unique" values 1759 * for us to check the packet against. 1760 */ 1761 if ((ii->ipsec_in_ah_sa != NULL) && 1762 (!spd_match_inbound_ids(ipl, ii->ipsec_in_ah_sa))) { 1763 *counter = &ipdrops_spd_ah_badid; 1764 *reason = "AH identity mismatch"; 1765 return (B_FALSE); 1766 } 1767 1768 if ((ii->ipsec_in_esp_sa != NULL) && 1769 (!spd_match_inbound_ids(ipl, ii->ipsec_in_esp_sa))) { 1770 *counter = &ipdrops_spd_esp_badid; 1771 *reason = "ESP identity mismatch"; 1772 return (B_FALSE); 1773 } 1774 1775 /* 1776 * Can fudge pkt_unique from connp because we're latched. 1777 * In DEBUG kernels (see conn_to_unique()'s implementation), 1778 * verify this even if it REALLY slows things down. 1779 */ 1780 if (!ipsec_check_ipsecin_unique(ii, reason, counter, 1781 conn_to_unique(connp, mp, ipha, ip6h))) { 1782 return (B_FALSE); 1783 } 1784 } 1785 1786 return (ipsec_check_ipsecin_action(ii, mp, ipl->ipl_in_action, 1787 ipha, ip6h, reason, counter)); 1788 } 1789 1790 /* 1791 * Check to see whether this secured datagram meets the policy 1792 * constraints specified in ipsp. 1793 * 1794 * Called from ipsec_check_global_policy, and ipsec_check_inbound_policy. 1795 * 1796 * Consumes a reference to ipsp. 1797 */ 1798 static mblk_t * 1799 ipsec_check_ipsecin_policy(mblk_t *first_mp, ipsec_policy_t *ipsp, 1800 ipha_t *ipha, ip6_t *ip6h, uint64_t pkt_unique) 1801 { 1802 ipsec_in_t *ii; 1803 ipsec_action_t *ap; 1804 const char *reason = "no policy actions found"; 1805 mblk_t *data_mp, *ipsec_mp; 1806 kstat_named_t *counter = &ipdrops_spd_got_secure; 1807 1808 data_mp = first_mp->b_cont; 1809 ipsec_mp = first_mp; 1810 1811 ASSERT(ipsp != NULL); 1812 1813 ASSERT((ipha == NULL && ip6h != NULL) || 1814 (ip6h == NULL && ipha != NULL)); 1815 1816 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1817 1818 if (ii->ipsec_in_loopback) 1819 return (ipsec_check_loopback_policy(first_mp, B_TRUE, ipsp)); 1820 ASSERT(ii->ipsec_in_type == IPSEC_IN); 1821 ASSERT(ii->ipsec_in_secure); 1822 1823 if (ii->ipsec_in_action != NULL) { 1824 /* 1825 * this can happen if we do a double policy-check on a packet 1826 * Would be nice to be able to delete this test.. 1827 */ 1828 IPACT_REFRELE(ii->ipsec_in_action); 1829 } 1830 ASSERT(ii->ipsec_in_action == NULL); 1831 1832 if (!SA_IDS_MATCH(ii->ipsec_in_ah_sa, ii->ipsec_in_esp_sa)) { 1833 reason = "inbound AH and ESP identities differ"; 1834 counter = &ipdrops_spd_ahesp_diffid; 1835 goto drop; 1836 } 1837 1838 if (!ipsec_check_ipsecin_unique(ii, &reason, &counter, pkt_unique)) 1839 goto drop; 1840 1841 /* 1842 * Ok, now loop through the possible actions and see if any 1843 * of them work for us. 1844 */ 1845 1846 for (ap = ipsp->ipsp_act; ap != NULL; ap = ap->ipa_next) { 1847 if (ipsec_check_ipsecin_action(ii, data_mp, ap, 1848 ipha, ip6h, &reason, &counter)) { 1849 BUMP_MIB(&ip_mib, ipsecInSucceeded); 1850 IPPOL_REFRELE(ipsp); 1851 return (first_mp); 1852 } 1853 } 1854 drop: 1855 ipsec_rl_strlog(IP_MOD_ID, 0, 0, SL_ERROR|SL_WARN|SL_CONSOLE, 1856 "ipsec inbound policy mismatch: %s, packet dropped\n", 1857 reason); 1858 IPPOL_REFRELE(ipsp); 1859 ASSERT(ii->ipsec_in_action == NULL); 1860 BUMP_MIB(&ip_mib, ipsecInFailed); 1861 ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, &spd_dropper); 1862 return (NULL); 1863 } 1864 1865 /* 1866 * sleazy prefix-length-based compare. 1867 * another inlining candidate.. 1868 */ 1869 boolean_t 1870 ip_addr_match(uint8_t *addr1, int pfxlen, in6_addr_t *addr2p) 1871 { 1872 int offset = pfxlen>>3; 1873 int bitsleft = pfxlen & 7; 1874 uint8_t *addr2 = (uint8_t *)addr2p; 1875 1876 /* 1877 * and there was much evil.. 1878 * XXX should inline-expand the bcmp here and do this 32 bits 1879 * or 64 bits at a time.. 1880 */ 1881 return ((bcmp(addr1, addr2, offset) == 0) && 1882 ((bitsleft == 0) || 1883 (((addr1[offset] ^ addr2[offset]) & 1884 (0xff<<(8-bitsleft))) == 0))); 1885 } 1886 1887 static ipsec_policy_t * 1888 ipsec_find_policy_chain(ipsec_policy_t *best, ipsec_policy_t *chain, 1889 ipsec_selector_t *sel, boolean_t is_icmp_inv_acq) 1890 { 1891 ipsec_selkey_t *isel; 1892 ipsec_policy_t *p; 1893 int bpri = best ? best->ipsp_prio : 0; 1894 1895 for (p = chain; p != NULL; p = p->ipsp_hash.hash_next) { 1896 uint32_t valid; 1897 1898 if (p->ipsp_prio <= bpri) 1899 continue; 1900 isel = &p->ipsp_sel->ipsl_key; 1901 valid = isel->ipsl_valid; 1902 1903 if ((valid & IPSL_PROTOCOL) && 1904 (isel->ipsl_proto != sel->ips_protocol)) 1905 continue; 1906 1907 if ((valid & IPSL_REMOTE_ADDR) && 1908 !ip_addr_match((uint8_t *)&isel->ipsl_remote, 1909 isel->ipsl_remote_pfxlen, 1910 &sel->ips_remote_addr_v6)) 1911 continue; 1912 1913 if ((valid & IPSL_LOCAL_ADDR) && 1914 !ip_addr_match((uint8_t *)&isel->ipsl_local, 1915 isel->ipsl_local_pfxlen, 1916 &sel->ips_local_addr_v6)) 1917 continue; 1918 1919 if ((valid & IPSL_REMOTE_PORT) && 1920 isel->ipsl_rport != sel->ips_remote_port) 1921 continue; 1922 1923 if ((valid & IPSL_LOCAL_PORT) && 1924 isel->ipsl_lport != sel->ips_local_port) 1925 continue; 1926 1927 if (!is_icmp_inv_acq) { 1928 if ((valid & IPSL_ICMP_TYPE) && 1929 (isel->ipsl_icmp_type > sel->ips_icmp_type || 1930 isel->ipsl_icmp_type_end < sel->ips_icmp_type)) { 1931 continue; 1932 } 1933 1934 if ((valid & IPSL_ICMP_CODE) && 1935 (isel->ipsl_icmp_code > sel->ips_icmp_code || 1936 isel->ipsl_icmp_code_end < 1937 sel->ips_icmp_code)) { 1938 continue; 1939 } 1940 } else { 1941 /* 1942 * special case for icmp inverse acquire 1943 * we only want policies that aren't drop/pass 1944 */ 1945 if (p->ipsp_act->ipa_act.ipa_type != IPSEC_ACT_APPLY) 1946 continue; 1947 } 1948 1949 /* we matched all the packet-port-field selectors! */ 1950 best = p; 1951 bpri = p->ipsp_prio; 1952 } 1953 1954 return (best); 1955 } 1956 1957 /* 1958 * Try to find and return the best policy entry under a given policy 1959 * root for a given set of selectors; the first parameter "best" is 1960 * the current best policy so far. If "best" is non-null, we have a 1961 * reference to it. We return a reference to a policy; if that policy 1962 * is not the original "best", we need to release that reference 1963 * before returning. 1964 */ 1965 ipsec_policy_t * 1966 ipsec_find_policy_head(ipsec_policy_t *best, ipsec_policy_head_t *head, 1967 int direction, ipsec_selector_t *sel) 1968 { 1969 ipsec_policy_t *curbest; 1970 ipsec_policy_root_t *root; 1971 uint8_t is_icmp_inv_acq = sel->ips_is_icmp_inv_acq; 1972 int af = sel->ips_isv4 ? IPSEC_AF_V4 : IPSEC_AF_V6; 1973 1974 curbest = best; 1975 root = &head->iph_root[direction]; 1976 1977 #ifdef DEBUG 1978 if (is_icmp_inv_acq) { 1979 if (sel->ips_isv4) { 1980 if (sel->ips_protocol != IPPROTO_ICMP) { 1981 cmn_err(CE_WARN, "ipsec_find_policy_head:" 1982 " expecting icmp, got %d", sel->ips_protocol); 1983 } 1984 } else { 1985 if (sel->ips_protocol != IPPROTO_ICMPV6) { 1986 cmn_err(CE_WARN, "ipsec_find_policy_head:" 1987 " expecting icmpv6, got %d", sel->ips_protocol); 1988 } 1989 } 1990 } 1991 #endif 1992 1993 rw_enter(&head->iph_lock, RW_READER); 1994 1995 if (root->ipr_nchains > 0) { 1996 curbest = ipsec_find_policy_chain(curbest, 1997 root->ipr_hash[selector_hash(sel, root)].hash_head, sel, 1998 is_icmp_inv_acq); 1999 } 2000 curbest = ipsec_find_policy_chain(curbest, root->ipr_nonhash[af], sel, 2001 is_icmp_inv_acq); 2002 2003 /* 2004 * Adjust reference counts if we found anything new. 2005 */ 2006 if (curbest != best) { 2007 ASSERT(curbest != NULL); 2008 IPPOL_REFHOLD(curbest); 2009 2010 if (best != NULL) { 2011 IPPOL_REFRELE(best); 2012 } 2013 } 2014 2015 rw_exit(&head->iph_lock); 2016 2017 return (curbest); 2018 } 2019 2020 /* 2021 * Find the best system policy (either global or per-interface) which 2022 * applies to the given selector; look in all the relevant policy roots 2023 * to figure out which policy wins. 2024 * 2025 * Returns a reference to a policy; caller must release this 2026 * reference when done. 2027 */ 2028 ipsec_policy_t * 2029 ipsec_find_policy(int direction, conn_t *connp, ipsec_out_t *io, 2030 ipsec_selector_t *sel) 2031 { 2032 ipsec_policy_t *p; 2033 2034 p = ipsec_find_policy_head(NULL, &system_policy, direction, sel); 2035 if ((connp != NULL) && (connp->conn_policy != NULL)) { 2036 p = ipsec_find_policy_head(p, connp->conn_policy, 2037 direction, sel); 2038 } else if ((io != NULL) && (io->ipsec_out_polhead != NULL)) { 2039 p = ipsec_find_policy_head(p, io->ipsec_out_polhead, 2040 direction, sel); 2041 } 2042 2043 return (p); 2044 } 2045 2046 /* 2047 * Check with global policy and see whether this inbound 2048 * packet meets the policy constraints. 2049 * 2050 * Locate appropriate policy from global policy, supplemented by the 2051 * conn's configured and/or cached policy if the conn is supplied. 2052 * 2053 * Dispatch to ipsec_check_ipsecin_policy if we have policy and an 2054 * encrypted packet to see if they match. 2055 * 2056 * Otherwise, see if the policy allows cleartext; if not, drop it on the 2057 * floor. 2058 */ 2059 mblk_t * 2060 ipsec_check_global_policy(mblk_t *first_mp, conn_t *connp, 2061 ipha_t *ipha, ip6_t *ip6h, boolean_t mctl_present) 2062 { 2063 ipsec_policy_t *p; 2064 ipsec_selector_t sel; 2065 mblk_t *data_mp, *ipsec_mp; 2066 boolean_t policy_present; 2067 kstat_named_t *counter; 2068 ipsec_in_t *ii = NULL; 2069 uint64_t pkt_unique; 2070 2071 data_mp = mctl_present ? first_mp->b_cont : first_mp; 2072 ipsec_mp = mctl_present ? first_mp : NULL; 2073 2074 sel.ips_is_icmp_inv_acq = 0; 2075 2076 ASSERT((ipha == NULL && ip6h != NULL) || 2077 (ip6h == NULL && ipha != NULL)); 2078 2079 if (ipha != NULL) 2080 policy_present = ipsec_inbound_v4_policy_present; 2081 else 2082 policy_present = ipsec_inbound_v6_policy_present; 2083 2084 if (!policy_present && connp == NULL) { 2085 /* 2086 * No global policy and no per-socket policy; 2087 * just pass it back (but we shouldn't get here in that case) 2088 */ 2089 return (first_mp); 2090 } 2091 2092 if (ipsec_mp != NULL) { 2093 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 2094 ii = (ipsec_in_t *)(ipsec_mp->b_rptr); 2095 ASSERT(ii->ipsec_in_type == IPSEC_IN); 2096 } 2097 2098 /* 2099 * If we have cached policy, use it. 2100 * Otherwise consult system policy. 2101 */ 2102 if ((connp != NULL) && (connp->conn_latch != NULL)) { 2103 p = connp->conn_latch->ipl_in_policy; 2104 if (p != NULL) { 2105 IPPOL_REFHOLD(p); 2106 } 2107 /* 2108 * Fudge sel for UNIQUE_ID setting below. 2109 */ 2110 pkt_unique = conn_to_unique(connp, data_mp, ipha, ip6h); 2111 } else { 2112 /* Initialize the ports in the selector */ 2113 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, 2114 SEL_NONE) == SELRET_NOMEM) { 2115 /* 2116 * Technically not a policy mismatch, but it is 2117 * an internal failure. 2118 */ 2119 ipsec_log_policy_failure(IPSEC_POLICY_MISMATCH, 2120 "ipsec_init_inbound_sel", ipha, ip6h, B_FALSE); 2121 counter = &ipdrops_spd_nomem; 2122 goto fail; 2123 } 2124 2125 /* 2126 * Find the policy which best applies. 2127 * 2128 * If we find global policy, we should look at both 2129 * local policy and global policy and see which is 2130 * stronger and match accordingly. 2131 * 2132 * If we don't find a global policy, check with 2133 * local policy alone. 2134 */ 2135 2136 p = ipsec_find_policy(IPSEC_TYPE_INBOUND, connp, NULL, &sel); 2137 pkt_unique = SA_UNIQUE_ID(sel.ips_remote_port, 2138 sel.ips_local_port, sel.ips_protocol, 0); 2139 } 2140 2141 if (p == NULL) { 2142 if (ipsec_mp == NULL) { 2143 /* 2144 * We have no policy; default to succeeding. 2145 * XXX paranoid system design doesn't do this. 2146 */ 2147 BUMP_MIB(&ip_mib, ipsecInSucceeded); 2148 return (first_mp); 2149 } else { 2150 counter = &ipdrops_spd_got_secure; 2151 ipsec_log_policy_failure(IPSEC_POLICY_NOT_NEEDED, 2152 "ipsec_check_global_policy", ipha, ip6h, B_TRUE); 2153 goto fail; 2154 } 2155 } 2156 if ((ii != NULL) && (ii->ipsec_in_secure)) { 2157 return (ipsec_check_ipsecin_policy(ipsec_mp, p, ipha, ip6h, 2158 pkt_unique)); 2159 } 2160 if (p->ipsp_act->ipa_allow_clear) { 2161 BUMP_MIB(&ip_mib, ipsecInSucceeded); 2162 IPPOL_REFRELE(p); 2163 return (first_mp); 2164 } 2165 IPPOL_REFRELE(p); 2166 /* 2167 * If we reach here, we will drop the packet because it failed the 2168 * global policy check because the packet was cleartext, and it 2169 * should not have been. 2170 */ 2171 ipsec_log_policy_failure(IPSEC_POLICY_MISMATCH, 2172 "ipsec_check_global_policy", ipha, ip6h, B_FALSE); 2173 counter = &ipdrops_spd_got_clear; 2174 2175 fail: 2176 ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, &spd_dropper); 2177 BUMP_MIB(&ip_mib, ipsecInFailed); 2178 return (NULL); 2179 } 2180 2181 /* 2182 * We check whether an inbound datagram is a valid one 2183 * to accept in clear. If it is secure, it is the job 2184 * of IPSEC to log information appropriately if it 2185 * suspects that it may not be the real one. 2186 * 2187 * It is called only while fanning out to the ULP 2188 * where ULP accepts only secure data and the incoming 2189 * is clear. Usually we never accept clear datagrams in 2190 * such cases. ICMP is the only exception. 2191 * 2192 * NOTE : We don't call this function if the client (ULP) 2193 * is willing to accept things in clear. 2194 */ 2195 boolean_t 2196 ipsec_inbound_accept_clear(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h) 2197 { 2198 ushort_t iph_hdr_length; 2199 icmph_t *icmph; 2200 icmp6_t *icmp6; 2201 uint8_t *nexthdrp; 2202 2203 ASSERT((ipha != NULL && ip6h == NULL) || 2204 (ipha == NULL && ip6h != NULL)); 2205 2206 if (ip6h != NULL) { 2207 iph_hdr_length = ip_hdr_length_v6(mp, ip6h); 2208 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, 2209 &nexthdrp)) { 2210 return (B_FALSE); 2211 } 2212 if (*nexthdrp != IPPROTO_ICMPV6) 2213 return (B_FALSE); 2214 icmp6 = (icmp6_t *)(&mp->b_rptr[iph_hdr_length]); 2215 /* Match IPv6 ICMP policy as closely as IPv4 as possible. */ 2216 switch (icmp6->icmp6_type) { 2217 case ICMP6_PARAM_PROB: 2218 /* Corresponds to port/proto unreach in IPv4. */ 2219 case ICMP6_ECHO_REQUEST: 2220 /* Just like IPv4. */ 2221 return (B_FALSE); 2222 2223 case MLD_LISTENER_QUERY: 2224 case MLD_LISTENER_REPORT: 2225 case MLD_LISTENER_REDUCTION: 2226 /* 2227 * XXX Seperate NDD in IPv4 what about here? 2228 * Plus, mcast is important to ND. 2229 */ 2230 case ICMP6_DST_UNREACH: 2231 /* Corresponds to HOST/NET unreachable in IPv4. */ 2232 case ICMP6_PACKET_TOO_BIG: 2233 case ICMP6_ECHO_REPLY: 2234 /* These are trusted in IPv4. */ 2235 case ND_ROUTER_SOLICIT: 2236 case ND_ROUTER_ADVERT: 2237 case ND_NEIGHBOR_SOLICIT: 2238 case ND_NEIGHBOR_ADVERT: 2239 case ND_REDIRECT: 2240 /* Trust ND messages for now. */ 2241 case ICMP6_TIME_EXCEEDED: 2242 default: 2243 return (B_TRUE); 2244 } 2245 } else { 2246 /* 2247 * If it is not ICMP, fail this request. 2248 */ 2249 if (ipha->ipha_protocol != IPPROTO_ICMP) { 2250 #ifdef FRAGCACHE_DEBUG 2251 cmn_err(CE_WARN, "Dropping - ipha_proto = %d\n", 2252 ipha->ipha_protocol); 2253 #endif 2254 return (B_FALSE); 2255 } 2256 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2257 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 2258 /* 2259 * It is an insecure icmp message. Check to see whether we are 2260 * willing to accept this one. 2261 */ 2262 2263 switch (icmph->icmph_type) { 2264 case ICMP_ECHO_REPLY: 2265 case ICMP_TIME_STAMP_REPLY: 2266 case ICMP_INFO_REPLY: 2267 case ICMP_ROUTER_ADVERTISEMENT: 2268 /* 2269 * We should not encourage clear replies if this 2270 * client expects secure. If somebody is replying 2271 * in clear some mailicious user watching both the 2272 * request and reply, can do chosen-plain-text attacks. 2273 * With global policy we might be just expecting secure 2274 * but sending out clear. We don't know what the right 2275 * thing is. We can't do much here as we can't control 2276 * the sender here. Till we are sure of what to do, 2277 * accept them. 2278 */ 2279 return (B_TRUE); 2280 case ICMP_ECHO_REQUEST: 2281 case ICMP_TIME_STAMP_REQUEST: 2282 case ICMP_INFO_REQUEST: 2283 case ICMP_ADDRESS_MASK_REQUEST: 2284 case ICMP_ROUTER_SOLICITATION: 2285 case ICMP_ADDRESS_MASK_REPLY: 2286 /* 2287 * Don't accept this as somebody could be sending 2288 * us plain text to get encrypted data. If we reply, 2289 * it will lead to chosen plain text attack. 2290 */ 2291 return (B_FALSE); 2292 case ICMP_DEST_UNREACHABLE: 2293 switch (icmph->icmph_code) { 2294 case ICMP_FRAGMENTATION_NEEDED: 2295 /* 2296 * Be in sync with icmp_inbound, where we have 2297 * already set ire_max_frag. 2298 */ 2299 #ifdef FRAGCACHE_DEBUG 2300 cmn_err(CE_WARN, "ICMP frag needed\n"); 2301 #endif 2302 return (B_TRUE); 2303 case ICMP_HOST_UNREACHABLE: 2304 case ICMP_NET_UNREACHABLE: 2305 /* 2306 * By accepting, we could reset a connection. 2307 * How do we solve the problem of some 2308 * intermediate router sending in-secure ICMP 2309 * messages ? 2310 */ 2311 return (B_TRUE); 2312 case ICMP_PORT_UNREACHABLE: 2313 case ICMP_PROTOCOL_UNREACHABLE: 2314 default : 2315 return (B_FALSE); 2316 } 2317 case ICMP_SOURCE_QUENCH: 2318 /* 2319 * If this is an attack, TCP will slow start 2320 * because of this. Is it very harmful ? 2321 */ 2322 return (B_TRUE); 2323 case ICMP_PARAM_PROBLEM: 2324 return (B_FALSE); 2325 case ICMP_TIME_EXCEEDED: 2326 return (B_TRUE); 2327 case ICMP_REDIRECT: 2328 return (B_FALSE); 2329 default : 2330 return (B_FALSE); 2331 } 2332 } 2333 } 2334 2335 void 2336 ipsec_latch_ids(ipsec_latch_t *ipl, ipsid_t *local, ipsid_t *remote) 2337 { 2338 mutex_enter(&ipl->ipl_lock); 2339 2340 if (ipl->ipl_ids_latched) { 2341 /* I lost, someone else got here before me */ 2342 mutex_exit(&ipl->ipl_lock); 2343 return; 2344 } 2345 2346 if (local != NULL) 2347 IPSID_REFHOLD(local); 2348 if (remote != NULL) 2349 IPSID_REFHOLD(remote); 2350 2351 ipl->ipl_local_cid = local; 2352 ipl->ipl_remote_cid = remote; 2353 ipl->ipl_ids_latched = B_TRUE; 2354 mutex_exit(&ipl->ipl_lock); 2355 } 2356 2357 void 2358 ipsec_latch_inbound(ipsec_latch_t *ipl, ipsec_in_t *ii) 2359 { 2360 ipsa_t *sa; 2361 2362 if (!ipl->ipl_ids_latched) { 2363 ipsid_t *local = NULL; 2364 ipsid_t *remote = NULL; 2365 2366 if (!ii->ipsec_in_loopback) { 2367 if (ii->ipsec_in_esp_sa != NULL) 2368 sa = ii->ipsec_in_esp_sa; 2369 else 2370 sa = ii->ipsec_in_ah_sa; 2371 ASSERT(sa != NULL); 2372 local = sa->ipsa_dst_cid; 2373 remote = sa->ipsa_src_cid; 2374 } 2375 ipsec_latch_ids(ipl, local, remote); 2376 } 2377 ipl->ipl_in_action = ii->ipsec_in_action; 2378 IPACT_REFHOLD(ipl->ipl_in_action); 2379 } 2380 2381 /* 2382 * Check whether the policy constraints are met either for an 2383 * inbound datagram; called from IP in numerous places. 2384 * 2385 * Note that this is not a chokepoint for inbound policy checks; 2386 * see also ipsec_check_ipsecin_latch() and ipsec_check_global_policy() 2387 */ 2388 mblk_t * 2389 ipsec_check_inbound_policy(mblk_t *first_mp, conn_t *connp, 2390 ipha_t *ipha, ip6_t *ip6h, boolean_t mctl_present) 2391 { 2392 ipsec_in_t *ii; 2393 boolean_t ret; 2394 mblk_t *mp = mctl_present ? first_mp->b_cont : first_mp; 2395 mblk_t *ipsec_mp = mctl_present ? first_mp : NULL; 2396 ipsec_latch_t *ipl; 2397 uint64_t unique_id; 2398 2399 ASSERT(connp != NULL); 2400 ipl = connp->conn_latch; 2401 2402 if (ipsec_mp == NULL) { 2403 clear: 2404 /* 2405 * This is the case where the incoming datagram is 2406 * cleartext and we need to see whether this client 2407 * would like to receive such untrustworthy things from 2408 * the wire. 2409 */ 2410 ASSERT(mp != NULL); 2411 2412 if (ipl != NULL) { 2413 /* 2414 * Policy is cached in the conn. 2415 */ 2416 if ((ipl->ipl_in_policy != NULL) && 2417 (!ipl->ipl_in_policy->ipsp_act->ipa_allow_clear)) { 2418 ret = ipsec_inbound_accept_clear(mp, 2419 ipha, ip6h); 2420 if (ret) { 2421 BUMP_MIB(&ip_mib, ipsecInSucceeded); 2422 return (first_mp); 2423 } else { 2424 ipsec_log_policy_failure( 2425 IPSEC_POLICY_MISMATCH, 2426 "ipsec_check_inbound_policy", ipha, 2427 ip6h, B_FALSE); 2428 ip_drop_packet(first_mp, B_TRUE, NULL, 2429 NULL, &ipdrops_spd_got_clear, 2430 &spd_dropper); 2431 BUMP_MIB(&ip_mib, ipsecInFailed); 2432 return (NULL); 2433 } 2434 } else { 2435 BUMP_MIB(&ip_mib, ipsecInSucceeded); 2436 return (first_mp); 2437 } 2438 } else { 2439 /* 2440 * As this is a non-hardbound connection we need 2441 * to look at both per-socket policy and global 2442 * policy. As this is cleartext, mark the mp as 2443 * M_DATA in case if it is an ICMP error being 2444 * reported before calling ipsec_check_global_policy 2445 * so that it does not mistake it for IPSEC_IN. 2446 */ 2447 uchar_t db_type = mp->b_datap->db_type; 2448 mp->b_datap->db_type = M_DATA; 2449 first_mp = ipsec_check_global_policy(first_mp, connp, 2450 ipha, ip6h, mctl_present); 2451 if (first_mp != NULL) 2452 mp->b_datap->db_type = db_type; 2453 return (first_mp); 2454 } 2455 } 2456 /* 2457 * If it is inbound check whether the attached message 2458 * is secure or not. We have a special case for ICMP, 2459 * where we have a IPSEC_IN message and the attached 2460 * message is not secure. See icmp_inbound_error_fanout 2461 * for details. 2462 */ 2463 ASSERT(ipsec_mp != NULL); 2464 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 2465 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 2466 2467 if (!ii->ipsec_in_secure) 2468 goto clear; 2469 2470 /* 2471 * mp->b_cont could be either a M_CTL message 2472 * for icmp errors being sent up or a M_DATA message. 2473 */ 2474 ASSERT(mp->b_datap->db_type == M_CTL || mp->b_datap->db_type == M_DATA); 2475 2476 ASSERT(ii->ipsec_in_type == IPSEC_IN); 2477 2478 if (ipl == NULL) { 2479 /* 2480 * We don't have policies cached in the conn 2481 * for this stream. So, look at the global 2482 * policy. It will check against conn or global 2483 * depending on whichever is stronger. 2484 */ 2485 return (ipsec_check_global_policy(first_mp, connp, 2486 ipha, ip6h, mctl_present)); 2487 } 2488 2489 if (ipl->ipl_in_action != NULL) { 2490 /* Policy is cached & latched; fast(er) path */ 2491 const char *reason; 2492 kstat_named_t *counter; 2493 if (ipsec_check_ipsecin_latch(ii, mp, ipl, 2494 ipha, ip6h, &reason, &counter, connp)) { 2495 BUMP_MIB(&ip_mib, ipsecInSucceeded); 2496 return (first_mp); 2497 } 2498 ipsec_rl_strlog(IP_MOD_ID, 0, 0, SL_ERROR|SL_WARN|SL_CONSOLE, 2499 "ipsec inbound policy mismatch: %s, packet dropped\n", 2500 reason); 2501 ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, 2502 &spd_dropper); 2503 BUMP_MIB(&ip_mib, ipsecInFailed); 2504 return (NULL); 2505 } else if (ipl->ipl_in_policy == NULL) { 2506 ipsec_weird_null_inbound_policy++; 2507 return (first_mp); 2508 } 2509 2510 unique_id = conn_to_unique(connp, mp, ipha, ip6h); 2511 IPPOL_REFHOLD(ipl->ipl_in_policy); 2512 first_mp = ipsec_check_ipsecin_policy(first_mp, ipl->ipl_in_policy, 2513 ipha, ip6h, unique_id); 2514 /* 2515 * NOTE: ipsecIn{Failed,Succeeeded} bumped by 2516 * ipsec_check_ipsecin_policy(). 2517 */ 2518 if (first_mp != NULL) 2519 ipsec_latch_inbound(ipl, ii); 2520 return (first_mp); 2521 } 2522 2523 /* 2524 * Returns: 2525 * 2526 * SELRET_NOMEM --> msgpullup() needed to gather things failed. 2527 * SELRET_BADPKT --> If we're being called after tunnel-mode fragment 2528 * gathering, the initial fragment is too short for 2529 * useful data. Only returned if SEL_TUNNEL_FIRSTFRAG is 2530 * set. 2531 * SELRET_SUCCESS --> "sel" now has initialized IPsec selector data. 2532 * SELRET_TUNFRAG --> This is a fragment in a tunnel-mode packet. Caller 2533 * should put this packet in a fragment-gathering queue. 2534 * Only returned if SEL_TUNNEL_MODE and SEL_PORT_POLICY 2535 * is set. 2536 */ 2537 static selret_t 2538 ipsec_init_inbound_sel(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha, 2539 ip6_t *ip6h, uint8_t sel_flags) 2540 { 2541 uint16_t *ports; 2542 ushort_t hdr_len; 2543 int outer_hdr_len = 0; /* For ICMP tunnel-mode cases... */ 2544 mblk_t *spare_mp = NULL; 2545 uint8_t *nexthdrp; 2546 uint8_t nexthdr; 2547 uint8_t *typecode; 2548 uint8_t check_proto; 2549 ip6_pkt_t ipp; 2550 boolean_t port_policy_present = (sel_flags & SEL_PORT_POLICY); 2551 boolean_t is_icmp = (sel_flags & SEL_IS_ICMP); 2552 boolean_t tunnel_mode = (sel_flags & SEL_TUNNEL_MODE); 2553 2554 ASSERT((ipha == NULL && ip6h != NULL) || 2555 (ipha != NULL && ip6h == NULL)); 2556 2557 if (ip6h != NULL) { 2558 if (is_icmp) 2559 outer_hdr_len = ((uint8_t *)ip6h) - mp->b_rptr; 2560 2561 check_proto = IPPROTO_ICMPV6; 2562 sel->ips_isv4 = B_FALSE; 2563 sel->ips_local_addr_v6 = ip6h->ip6_dst; 2564 sel->ips_remote_addr_v6 = ip6h->ip6_src; 2565 2566 bzero(&ipp, sizeof (ipp)); 2567 (void) ip_find_hdr_v6(mp, ip6h, &ipp, NULL); 2568 2569 nexthdr = ip6h->ip6_nxt; 2570 switch (nexthdr) { 2571 case IPPROTO_HOPOPTS: 2572 case IPPROTO_ROUTING: 2573 case IPPROTO_DSTOPTS: 2574 case IPPROTO_FRAGMENT: 2575 /* 2576 * Use ip_hdr_length_nexthdr_v6(). And have a spare 2577 * mblk that's contiguous to feed it 2578 */ 2579 if ((spare_mp = msgpullup(mp, -1)) == NULL) 2580 return (SELRET_NOMEM); 2581 if (!ip_hdr_length_nexthdr_v6(spare_mp, 2582 (ip6_t *)(spare_mp->b_rptr + outer_hdr_len), 2583 &hdr_len, &nexthdrp)) { 2584 /* Malformed packet - caller frees. */ 2585 ipsec_freemsg_chain(spare_mp); 2586 return (SELRET_BADPKT); 2587 } 2588 nexthdr = *nexthdrp; 2589 /* We can just extract based on hdr_len now. */ 2590 break; 2591 default: 2592 hdr_len = IPV6_HDR_LEN; 2593 break; 2594 } 2595 2596 if (port_policy_present && IS_V6_FRAGMENT(ipp) && !is_icmp) { 2597 /* IPv6 Fragment */ 2598 ipsec_freemsg_chain(spare_mp); 2599 return (SELRET_TUNFRAG); 2600 } 2601 } else { 2602 if (is_icmp) 2603 outer_hdr_len = ((uint8_t *)ipha) - mp->b_rptr; 2604 check_proto = IPPROTO_ICMP; 2605 sel->ips_isv4 = B_TRUE; 2606 sel->ips_local_addr_v4 = ipha->ipha_dst; 2607 sel->ips_remote_addr_v4 = ipha->ipha_src; 2608 nexthdr = ipha->ipha_protocol; 2609 hdr_len = IPH_HDR_LENGTH(ipha); 2610 2611 if (port_policy_present && 2612 IS_V4_FRAGMENT(ipha->ipha_fragment_offset_and_flags) && 2613 !is_icmp) { 2614 /* IPv4 Fragment */ 2615 ipsec_freemsg_chain(spare_mp); 2616 return (SELRET_TUNFRAG); 2617 } 2618 2619 } 2620 sel->ips_protocol = nexthdr; 2621 2622 if ((nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP && 2623 nexthdr != IPPROTO_SCTP && nexthdr != check_proto) || 2624 (!port_policy_present && tunnel_mode)) { 2625 sel->ips_remote_port = sel->ips_local_port = 0; 2626 ipsec_freemsg_chain(spare_mp); 2627 return (SELRET_SUCCESS); 2628 } 2629 2630 if (&mp->b_rptr[hdr_len] + 4 > mp->b_wptr) { 2631 /* If we didn't pullup a copy already, do so now. */ 2632 /* 2633 * XXX performance, will upper-layers frequently split TCP/UDP 2634 * apart from IP or options? If so, perhaps we should revisit 2635 * the spare_mp strategy. 2636 */ 2637 ipsec_hdr_pullup_needed++; 2638 if (spare_mp == NULL && 2639 (spare_mp = msgpullup(mp, -1)) == NULL) { 2640 return (SELRET_NOMEM); 2641 } 2642 ports = (uint16_t *)&spare_mp->b_rptr[hdr_len + outer_hdr_len]; 2643 } else { 2644 ports = (uint16_t *)&mp->b_rptr[hdr_len + outer_hdr_len]; 2645 } 2646 2647 if (nexthdr == check_proto) { 2648 typecode = (uint8_t *)ports; 2649 sel->ips_icmp_type = *typecode++; 2650 sel->ips_icmp_code = *typecode; 2651 sel->ips_remote_port = sel->ips_local_port = 0; 2652 } else { 2653 sel->ips_remote_port = *ports++; 2654 sel->ips_local_port = *ports; 2655 } 2656 ipsec_freemsg_chain(spare_mp); 2657 return (SELRET_SUCCESS); 2658 } 2659 2660 static boolean_t 2661 ipsec_init_outbound_ports(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha, 2662 ip6_t *ip6h, int outer_hdr_len) 2663 { 2664 /* 2665 * XXX cut&paste shared with ipsec_init_inbound_sel 2666 */ 2667 uint16_t *ports; 2668 ushort_t hdr_len; 2669 mblk_t *spare_mp = NULL; 2670 uint8_t *nexthdrp; 2671 uint8_t nexthdr; 2672 uint8_t *typecode; 2673 uint8_t check_proto; 2674 2675 ASSERT((ipha == NULL && ip6h != NULL) || 2676 (ipha != NULL && ip6h == NULL)); 2677 2678 if (ip6h != NULL) { 2679 check_proto = IPPROTO_ICMPV6; 2680 nexthdr = ip6h->ip6_nxt; 2681 switch (nexthdr) { 2682 case IPPROTO_HOPOPTS: 2683 case IPPROTO_ROUTING: 2684 case IPPROTO_DSTOPTS: 2685 case IPPROTO_FRAGMENT: 2686 /* 2687 * Use ip_hdr_length_nexthdr_v6(). And have a spare 2688 * mblk that's contiguous to feed it 2689 */ 2690 spare_mp = msgpullup(mp, -1); 2691 if (spare_mp == NULL || 2692 !ip_hdr_length_nexthdr_v6(spare_mp, 2693 (ip6_t *)(spare_mp->b_rptr + outer_hdr_len), 2694 &hdr_len, &nexthdrp)) { 2695 /* Always works, even if NULL. */ 2696 ipsec_freemsg_chain(spare_mp); 2697 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 2698 &ipdrops_spd_nomem, &spd_dropper); 2699 return (B_FALSE); 2700 } else { 2701 nexthdr = *nexthdrp; 2702 /* We can just extract based on hdr_len now. */ 2703 } 2704 break; 2705 default: 2706 hdr_len = IPV6_HDR_LEN; 2707 break; 2708 } 2709 } else { 2710 check_proto = IPPROTO_ICMP; 2711 hdr_len = IPH_HDR_LENGTH(ipha); 2712 nexthdr = ipha->ipha_protocol; 2713 } 2714 2715 sel->ips_protocol = nexthdr; 2716 if (nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP && 2717 nexthdr != IPPROTO_SCTP && nexthdr != check_proto) { 2718 sel->ips_local_port = sel->ips_remote_port = 0; 2719 ipsec_freemsg_chain(spare_mp); /* Always works, even if NULL */ 2720 return (B_TRUE); 2721 } 2722 2723 if (&mp->b_rptr[hdr_len] + 4 + outer_hdr_len > mp->b_wptr) { 2724 /* If we didn't pullup a copy already, do so now. */ 2725 /* 2726 * XXX performance, will upper-layers frequently split TCP/UDP 2727 * apart from IP or options? If so, perhaps we should revisit 2728 * the spare_mp strategy. 2729 * 2730 * XXX should this be msgpullup(mp, hdr_len+4) ??? 2731 */ 2732 if (spare_mp == NULL && 2733 (spare_mp = msgpullup(mp, -1)) == NULL) { 2734 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 2735 &ipdrops_spd_nomem, &spd_dropper); 2736 return (B_FALSE); 2737 } 2738 ports = (uint16_t *)&spare_mp->b_rptr[hdr_len + outer_hdr_len]; 2739 } else { 2740 ports = (uint16_t *)&mp->b_rptr[hdr_len + outer_hdr_len]; 2741 } 2742 2743 if (nexthdr == check_proto) { 2744 typecode = (uint8_t *)ports; 2745 sel->ips_icmp_type = *typecode++; 2746 sel->ips_icmp_code = *typecode; 2747 sel->ips_remote_port = sel->ips_local_port = 0; 2748 } else { 2749 sel->ips_local_port = *ports++; 2750 sel->ips_remote_port = *ports; 2751 } 2752 ipsec_freemsg_chain(spare_mp); /* Always works, even if NULL */ 2753 return (B_TRUE); 2754 } 2755 2756 /* 2757 * Create an ipsec_action_t based on the way an inbound packet was protected. 2758 * Used to reflect traffic back to a sender. 2759 * 2760 * We don't bother interning the action into the hash table. 2761 */ 2762 ipsec_action_t * 2763 ipsec_in_to_out_action(ipsec_in_t *ii) 2764 { 2765 ipsa_t *ah_assoc, *esp_assoc; 2766 uint_t auth_alg = 0, encr_alg = 0, espa_alg = 0; 2767 ipsec_action_t *ap; 2768 boolean_t unique; 2769 2770 ap = kmem_cache_alloc(ipsec_action_cache, KM_NOSLEEP); 2771 2772 if (ap == NULL) 2773 return (NULL); 2774 2775 bzero(ap, sizeof (*ap)); 2776 HASH_NULL(ap, ipa_hash); 2777 ap->ipa_next = NULL; 2778 ap->ipa_refs = 1; 2779 2780 /* 2781 * Get the algorithms that were used for this packet. 2782 */ 2783 ap->ipa_act.ipa_type = IPSEC_ACT_APPLY; 2784 ap->ipa_act.ipa_log = 0; 2785 ah_assoc = ii->ipsec_in_ah_sa; 2786 ap->ipa_act.ipa_apply.ipp_use_ah = (ah_assoc != NULL); 2787 2788 esp_assoc = ii->ipsec_in_esp_sa; 2789 ap->ipa_act.ipa_apply.ipp_use_esp = (esp_assoc != NULL); 2790 2791 if (esp_assoc != NULL) { 2792 encr_alg = esp_assoc->ipsa_encr_alg; 2793 espa_alg = esp_assoc->ipsa_auth_alg; 2794 ap->ipa_act.ipa_apply.ipp_use_espa = (espa_alg != 0); 2795 } 2796 if (ah_assoc != NULL) 2797 auth_alg = ah_assoc->ipsa_auth_alg; 2798 2799 ap->ipa_act.ipa_apply.ipp_encr_alg = (uint8_t)encr_alg; 2800 ap->ipa_act.ipa_apply.ipp_auth_alg = (uint8_t)auth_alg; 2801 ap->ipa_act.ipa_apply.ipp_esp_auth_alg = (uint8_t)espa_alg; 2802 ap->ipa_act.ipa_apply.ipp_use_se = ii->ipsec_in_decaps; 2803 unique = B_FALSE; 2804 2805 if (esp_assoc != NULL) { 2806 ap->ipa_act.ipa_apply.ipp_espa_minbits = 2807 esp_assoc->ipsa_authkeybits; 2808 ap->ipa_act.ipa_apply.ipp_espa_maxbits = 2809 esp_assoc->ipsa_authkeybits; 2810 ap->ipa_act.ipa_apply.ipp_espe_minbits = 2811 esp_assoc->ipsa_encrkeybits; 2812 ap->ipa_act.ipa_apply.ipp_espe_maxbits = 2813 esp_assoc->ipsa_encrkeybits; 2814 ap->ipa_act.ipa_apply.ipp_km_proto = esp_assoc->ipsa_kmp; 2815 ap->ipa_act.ipa_apply.ipp_km_cookie = esp_assoc->ipsa_kmc; 2816 if (esp_assoc->ipsa_flags & IPSA_F_UNIQUE) 2817 unique = B_TRUE; 2818 } 2819 if (ah_assoc != NULL) { 2820 ap->ipa_act.ipa_apply.ipp_ah_minbits = 2821 ah_assoc->ipsa_authkeybits; 2822 ap->ipa_act.ipa_apply.ipp_ah_maxbits = 2823 ah_assoc->ipsa_authkeybits; 2824 ap->ipa_act.ipa_apply.ipp_km_proto = ah_assoc->ipsa_kmp; 2825 ap->ipa_act.ipa_apply.ipp_km_cookie = ah_assoc->ipsa_kmc; 2826 if (ah_assoc->ipsa_flags & IPSA_F_UNIQUE) 2827 unique = B_TRUE; 2828 } 2829 ap->ipa_act.ipa_apply.ipp_use_unique = unique; 2830 ap->ipa_want_unique = unique; 2831 ap->ipa_allow_clear = B_FALSE; 2832 ap->ipa_want_se = ii->ipsec_in_decaps; 2833 ap->ipa_want_ah = (ah_assoc != NULL); 2834 ap->ipa_want_esp = (esp_assoc != NULL); 2835 2836 ap->ipa_ovhd = ipsec_act_ovhd(&ap->ipa_act); 2837 2838 ap->ipa_act.ipa_apply.ipp_replay_depth = 0; /* don't care */ 2839 2840 return (ap); 2841 } 2842 2843 2844 /* 2845 * Compute the worst-case amount of extra space required by an action. 2846 * Note that, because of the ESP considerations listed below, this is 2847 * actually not the same as the best-case reduction in the MTU; in the 2848 * future, we should pass additional information to this function to 2849 * allow the actual MTU impact to be computed. 2850 * 2851 * AH: Revisit this if we implement algorithms with 2852 * a verifier size of more than 12 bytes. 2853 * 2854 * ESP: A more exact but more messy computation would take into 2855 * account the interaction between the cipher block size and the 2856 * effective MTU, yielding the inner payload size which reflects a 2857 * packet with *minimum* ESP padding.. 2858 */ 2859 int32_t 2860 ipsec_act_ovhd(const ipsec_act_t *act) 2861 { 2862 int32_t overhead = 0; 2863 2864 if (act->ipa_type == IPSEC_ACT_APPLY) { 2865 const ipsec_prot_t *ipp = &act->ipa_apply; 2866 2867 if (ipp->ipp_use_ah) 2868 overhead += IPSEC_MAX_AH_HDR_SIZE; 2869 if (ipp->ipp_use_esp) { 2870 overhead += IPSEC_MAX_ESP_HDR_SIZE; 2871 overhead += sizeof (struct udphdr); 2872 } 2873 if (ipp->ipp_use_se) 2874 overhead += IP_SIMPLE_HDR_LENGTH; 2875 } 2876 return (overhead); 2877 } 2878 2879 /* 2880 * This hash function is used only when creating policies and thus is not 2881 * performance-critical for packet flows. 2882 * 2883 * Future work: canonicalize the structures hashed with this (i.e., 2884 * zeroize padding) so the hash works correctly. 2885 */ 2886 /* ARGSUSED */ 2887 static uint32_t 2888 policy_hash(int size, const void *start, const void *end) 2889 { 2890 return (0); 2891 } 2892 2893 2894 /* 2895 * Hash function macros for each address type. 2896 * 2897 * The IPV6 hash function assumes that the low order 32-bits of the 2898 * address (typically containing the low order 24 bits of the mac 2899 * address) are reasonably well-distributed. Revisit this if we run 2900 * into trouble from lots of collisions on ::1 addresses and the like 2901 * (seems unlikely). 2902 */ 2903 #define IPSEC_IPV4_HASH(a, n) ((a) % (n)) 2904 #define IPSEC_IPV6_HASH(a, n) (((a).s6_addr32[3]) % (n)) 2905 2906 /* 2907 * These two hash functions should produce coordinated values 2908 * but have slightly different roles. 2909 */ 2910 static uint32_t 2911 selkey_hash(const ipsec_selkey_t *selkey) 2912 { 2913 uint32_t valid = selkey->ipsl_valid; 2914 2915 if (!(valid & IPSL_REMOTE_ADDR)) 2916 return (IPSEC_SEL_NOHASH); 2917 2918 if (valid & IPSL_IPV4) { 2919 if (selkey->ipsl_remote_pfxlen == 32) 2920 return (IPSEC_IPV4_HASH(selkey->ipsl_remote.ipsad_v4, 2921 ipsec_spd_hashsize)); 2922 } 2923 if (valid & IPSL_IPV6) { 2924 if (selkey->ipsl_remote_pfxlen == 128) 2925 return (IPSEC_IPV6_HASH(selkey->ipsl_remote.ipsad_v6, 2926 ipsec_spd_hashsize)); 2927 } 2928 return (IPSEC_SEL_NOHASH); 2929 } 2930 2931 static uint32_t 2932 selector_hash(ipsec_selector_t *sel, ipsec_policy_root_t *root) 2933 { 2934 if (sel->ips_isv4) { 2935 return (IPSEC_IPV4_HASH(sel->ips_remote_addr_v4, 2936 root->ipr_nchains)); 2937 } 2938 return (IPSEC_IPV6_HASH(sel->ips_remote_addr_v6, root->ipr_nchains)); 2939 } 2940 2941 /* 2942 * Intern actions into the action hash table. 2943 */ 2944 ipsec_action_t * 2945 ipsec_act_find(const ipsec_act_t *a, int n) 2946 { 2947 int i; 2948 uint32_t hval; 2949 ipsec_action_t *ap; 2950 ipsec_action_t *prev = NULL; 2951 int32_t overhead, maxovhd = 0; 2952 boolean_t allow_clear = B_FALSE; 2953 boolean_t want_ah = B_FALSE; 2954 boolean_t want_esp = B_FALSE; 2955 boolean_t want_se = B_FALSE; 2956 boolean_t want_unique = B_FALSE; 2957 2958 /* 2959 * TODO: should canonicalize a[] (i.e., zeroize any padding) 2960 * so we can use a non-trivial policy_hash function. 2961 */ 2962 for (i = n-1; i >= 0; i--) { 2963 hval = policy_hash(IPSEC_ACTION_HASH_SIZE, &a[i], &a[n]); 2964 2965 HASH_LOCK(ipsec_action_hash, hval); 2966 2967 for (HASH_ITERATE(ap, ipa_hash, ipsec_action_hash, hval)) { 2968 if (bcmp(&ap->ipa_act, &a[i], sizeof (*a)) != 0) 2969 continue; 2970 if (ap->ipa_next != prev) 2971 continue; 2972 break; 2973 } 2974 if (ap != NULL) { 2975 HASH_UNLOCK(ipsec_action_hash, hval); 2976 prev = ap; 2977 continue; 2978 } 2979 /* 2980 * need to allocate a new one.. 2981 */ 2982 ap = kmem_cache_alloc(ipsec_action_cache, KM_NOSLEEP); 2983 if (ap == NULL) { 2984 HASH_UNLOCK(ipsec_action_hash, hval); 2985 if (prev != NULL) 2986 ipsec_action_free(prev); 2987 return (NULL); 2988 } 2989 HASH_INSERT(ap, ipa_hash, ipsec_action_hash, hval); 2990 2991 ap->ipa_next = prev; 2992 ap->ipa_act = a[i]; 2993 2994 overhead = ipsec_act_ovhd(&a[i]); 2995 if (maxovhd < overhead) 2996 maxovhd = overhead; 2997 2998 if ((a[i].ipa_type == IPSEC_ACT_BYPASS) || 2999 (a[i].ipa_type == IPSEC_ACT_CLEAR)) 3000 allow_clear = B_TRUE; 3001 if (a[i].ipa_type == IPSEC_ACT_APPLY) { 3002 const ipsec_prot_t *ipp = &a[i].ipa_apply; 3003 3004 ASSERT(ipp->ipp_use_ah || ipp->ipp_use_esp); 3005 want_ah |= ipp->ipp_use_ah; 3006 want_esp |= ipp->ipp_use_esp; 3007 want_se |= ipp->ipp_use_se; 3008 want_unique |= ipp->ipp_use_unique; 3009 } 3010 ap->ipa_allow_clear = allow_clear; 3011 ap->ipa_want_ah = want_ah; 3012 ap->ipa_want_esp = want_esp; 3013 ap->ipa_want_se = want_se; 3014 ap->ipa_want_unique = want_unique; 3015 ap->ipa_refs = 1; /* from the hash table */ 3016 ap->ipa_ovhd = maxovhd; 3017 if (prev) 3018 prev->ipa_refs++; 3019 prev = ap; 3020 HASH_UNLOCK(ipsec_action_hash, hval); 3021 } 3022 3023 ap->ipa_refs++; /* caller's reference */ 3024 3025 return (ap); 3026 } 3027 3028 /* 3029 * Called when refcount goes to 0, indicating that all references to this 3030 * node are gone. 3031 * 3032 * This does not unchain the action from the hash table. 3033 */ 3034 void 3035 ipsec_action_free(ipsec_action_t *ap) 3036 { 3037 for (;;) { 3038 ipsec_action_t *np = ap->ipa_next; 3039 ASSERT(ap->ipa_refs == 0); 3040 ASSERT(ap->ipa_hash.hash_pp == NULL); 3041 kmem_cache_free(ipsec_action_cache, ap); 3042 ap = np; 3043 /* Inlined IPACT_REFRELE -- avoid recursion */ 3044 if (ap == NULL) 3045 break; 3046 membar_exit(); 3047 if (atomic_add_32_nv(&(ap)->ipa_refs, -1) != 0) 3048 break; 3049 /* End inlined IPACT_REFRELE */ 3050 } 3051 } 3052 3053 /* 3054 * Periodically sweep action hash table for actions with refcount==1, and 3055 * nuke them. We cannot do this "on demand" (i.e., from IPACT_REFRELE) 3056 * because we can't close the race between another thread finding the action 3057 * in the hash table without holding the bucket lock during IPACT_REFRELE. 3058 * Instead, we run this function sporadically to clean up after ourselves; 3059 * we also set it as the "reclaim" function for the action kmem_cache. 3060 * 3061 * Note that it may take several passes of ipsec_action_gc() to free all 3062 * "stale" actions. 3063 */ 3064 /* ARGSUSED */ 3065 static void 3066 ipsec_action_reclaim(void *dummy) 3067 { 3068 int i; 3069 3070 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) { 3071 ipsec_action_t *ap, *np; 3072 3073 /* skip the lock if nobody home */ 3074 if (ipsec_action_hash[i].hash_head == NULL) 3075 continue; 3076 3077 HASH_LOCK(ipsec_action_hash, i); 3078 for (ap = ipsec_action_hash[i].hash_head; 3079 ap != NULL; ap = np) { 3080 ASSERT(ap->ipa_refs > 0); 3081 np = ap->ipa_hash.hash_next; 3082 if (ap->ipa_refs > 1) 3083 continue; 3084 HASH_UNCHAIN(ap, ipa_hash, ipsec_action_hash, i); 3085 IPACT_REFRELE(ap); 3086 } 3087 HASH_UNLOCK(ipsec_action_hash, i); 3088 } 3089 } 3090 3091 /* 3092 * Intern a selector set into the selector set hash table. 3093 * This is simpler than the actions case.. 3094 */ 3095 static ipsec_sel_t * 3096 ipsec_find_sel(ipsec_selkey_t *selkey) 3097 { 3098 ipsec_sel_t *sp; 3099 uint32_t hval, bucket; 3100 3101 /* 3102 * Exactly one AF bit should be set in selkey. 3103 */ 3104 ASSERT(!(selkey->ipsl_valid & IPSL_IPV4) ^ 3105 !(selkey->ipsl_valid & IPSL_IPV6)); 3106 3107 hval = selkey_hash(selkey); 3108 /* Set pol_hval to uninitialized until we put it in a polhead. */ 3109 selkey->ipsl_sel_hval = hval; 3110 3111 bucket = (hval == IPSEC_SEL_NOHASH) ? 0 : hval; 3112 3113 ASSERT(!HASH_LOCKED(ipsec_sel_hash, bucket)); 3114 HASH_LOCK(ipsec_sel_hash, bucket); 3115 3116 for (HASH_ITERATE(sp, ipsl_hash, ipsec_sel_hash, bucket)) { 3117 if (bcmp(&sp->ipsl_key, selkey, 3118 offsetof(ipsec_selkey_t, ipsl_pol_hval)) == 0) 3119 break; 3120 } 3121 if (sp != NULL) { 3122 sp->ipsl_refs++; 3123 3124 HASH_UNLOCK(ipsec_sel_hash, bucket); 3125 return (sp); 3126 } 3127 3128 sp = kmem_cache_alloc(ipsec_sel_cache, KM_NOSLEEP); 3129 if (sp == NULL) { 3130 HASH_UNLOCK(ipsec_sel_hash, bucket); 3131 return (NULL); 3132 } 3133 3134 HASH_INSERT(sp, ipsl_hash, ipsec_sel_hash, bucket); 3135 sp->ipsl_refs = 2; /* one for hash table, one for caller */ 3136 sp->ipsl_key = *selkey; 3137 /* Set to uninitalized and have insertion into polhead fix things. */ 3138 if (selkey->ipsl_sel_hval != IPSEC_SEL_NOHASH) 3139 sp->ipsl_key.ipsl_pol_hval = 0; 3140 else 3141 sp->ipsl_key.ipsl_pol_hval = IPSEC_SEL_NOHASH; 3142 3143 HASH_UNLOCK(ipsec_sel_hash, bucket); 3144 3145 return (sp); 3146 } 3147 3148 static void 3149 ipsec_sel_rel(ipsec_sel_t **spp) 3150 { 3151 ipsec_sel_t *sp = *spp; 3152 int hval = sp->ipsl_key.ipsl_sel_hval; 3153 *spp = NULL; 3154 3155 if (hval == IPSEC_SEL_NOHASH) 3156 hval = 0; 3157 3158 ASSERT(!HASH_LOCKED(ipsec_sel_hash, hval)); 3159 HASH_LOCK(ipsec_sel_hash, hval); 3160 if (--sp->ipsl_refs == 1) { 3161 HASH_UNCHAIN(sp, ipsl_hash, ipsec_sel_hash, hval); 3162 sp->ipsl_refs--; 3163 HASH_UNLOCK(ipsec_sel_hash, hval); 3164 ASSERT(sp->ipsl_refs == 0); 3165 kmem_cache_free(ipsec_sel_cache, sp); 3166 /* Caller unlocks */ 3167 return; 3168 } 3169 3170 HASH_UNLOCK(ipsec_sel_hash, hval); 3171 } 3172 3173 /* 3174 * Free a policy rule which we know is no longer being referenced. 3175 */ 3176 void 3177 ipsec_policy_free(ipsec_policy_t *ipp) 3178 { 3179 ASSERT(ipp->ipsp_refs == 0); 3180 ASSERT(ipp->ipsp_sel != NULL); 3181 ASSERT(ipp->ipsp_act != NULL); 3182 ipsec_sel_rel(&ipp->ipsp_sel); 3183 IPACT_REFRELE(ipp->ipsp_act); 3184 kmem_cache_free(ipsec_pol_cache, ipp); 3185 } 3186 3187 /* 3188 * Construction of new policy rules; construct a policy, and add it to 3189 * the appropriate tables. 3190 */ 3191 ipsec_policy_t * 3192 ipsec_policy_create(ipsec_selkey_t *keys, const ipsec_act_t *a, 3193 int nacts, int prio, uint64_t *index_ptr) 3194 { 3195 ipsec_action_t *ap; 3196 ipsec_sel_t *sp; 3197 ipsec_policy_t *ipp; 3198 3199 if (index_ptr == NULL) 3200 index_ptr = &ipsec_next_policy_index; 3201 3202 ipp = kmem_cache_alloc(ipsec_pol_cache, KM_NOSLEEP); 3203 ap = ipsec_act_find(a, nacts); 3204 sp = ipsec_find_sel(keys); 3205 3206 if ((ap == NULL) || (sp == NULL) || (ipp == NULL)) { 3207 if (ap != NULL) { 3208 IPACT_REFRELE(ap); 3209 } 3210 if (sp != NULL) 3211 ipsec_sel_rel(&sp); 3212 if (ipp != NULL) 3213 kmem_cache_free(ipsec_pol_cache, ipp); 3214 return (NULL); 3215 } 3216 3217 HASH_NULL(ipp, ipsp_hash); 3218 3219 ipp->ipsp_refs = 1; /* caller's reference */ 3220 ipp->ipsp_sel = sp; 3221 ipp->ipsp_act = ap; 3222 ipp->ipsp_prio = prio; /* rule priority */ 3223 ipp->ipsp_index = *index_ptr; 3224 (*index_ptr)++; 3225 3226 return (ipp); 3227 } 3228 3229 static void 3230 ipsec_update_present_flags() 3231 { 3232 boolean_t hashpol = (avl_numnodes(&system_policy.iph_rulebyid) > 0); 3233 3234 if (hashpol) { 3235 ipsec_outbound_v4_policy_present = B_TRUE; 3236 ipsec_outbound_v6_policy_present = B_TRUE; 3237 ipsec_inbound_v4_policy_present = B_TRUE; 3238 ipsec_inbound_v6_policy_present = B_TRUE; 3239 return; 3240 } 3241 3242 ipsec_outbound_v4_policy_present = (NULL != 3243 system_policy.iph_root[IPSEC_TYPE_OUTBOUND]. 3244 ipr_nonhash[IPSEC_AF_V4]); 3245 ipsec_outbound_v6_policy_present = (NULL != 3246 system_policy.iph_root[IPSEC_TYPE_OUTBOUND]. 3247 ipr_nonhash[IPSEC_AF_V6]); 3248 ipsec_inbound_v4_policy_present = (NULL != 3249 system_policy.iph_root[IPSEC_TYPE_INBOUND]. 3250 ipr_nonhash[IPSEC_AF_V4]); 3251 ipsec_inbound_v6_policy_present = (NULL != 3252 system_policy.iph_root[IPSEC_TYPE_INBOUND]. 3253 ipr_nonhash[IPSEC_AF_V6]); 3254 } 3255 3256 boolean_t 3257 ipsec_policy_delete(ipsec_policy_head_t *php, ipsec_selkey_t *keys, int dir) 3258 { 3259 ipsec_sel_t *sp; 3260 ipsec_policy_t *ip, *nip, *head; 3261 int af; 3262 ipsec_policy_root_t *pr = &php->iph_root[dir]; 3263 3264 sp = ipsec_find_sel(keys); 3265 3266 if (sp == NULL) 3267 return (B_FALSE); 3268 3269 af = (sp->ipsl_key.ipsl_valid & IPSL_IPV4) ? IPSEC_AF_V4 : IPSEC_AF_V6; 3270 3271 rw_enter(&php->iph_lock, RW_WRITER); 3272 3273 if (sp->ipsl_key.ipsl_pol_hval == IPSEC_SEL_NOHASH) { 3274 head = pr->ipr_nonhash[af]; 3275 } else { 3276 head = pr->ipr_hash[sp->ipsl_key.ipsl_pol_hval].hash_head; 3277 } 3278 3279 for (ip = head; ip != NULL; ip = nip) { 3280 nip = ip->ipsp_hash.hash_next; 3281 if (ip->ipsp_sel != sp) { 3282 continue; 3283 } 3284 3285 IPPOL_UNCHAIN(php, ip); 3286 3287 php->iph_gen++; 3288 ipsec_update_present_flags(); 3289 3290 rw_exit(&php->iph_lock); 3291 3292 ipsec_sel_rel(&sp); 3293 3294 return (B_TRUE); 3295 } 3296 3297 rw_exit(&php->iph_lock); 3298 ipsec_sel_rel(&sp); 3299 return (B_FALSE); 3300 } 3301 3302 int 3303 ipsec_policy_delete_index(ipsec_policy_head_t *php, uint64_t policy_index) 3304 { 3305 boolean_t found = B_FALSE; 3306 ipsec_policy_t ipkey; 3307 ipsec_policy_t *ip; 3308 avl_index_t where; 3309 3310 (void) memset(&ipkey, 0, sizeof (ipkey)); 3311 ipkey.ipsp_index = policy_index; 3312 3313 rw_enter(&php->iph_lock, RW_WRITER); 3314 3315 /* 3316 * We could be cleverer here about the walk. 3317 * but well, (k+1)*log(N) will do for now (k==number of matches, 3318 * N==number of table entries 3319 */ 3320 for (;;) { 3321 ip = (ipsec_policy_t *)avl_find(&php->iph_rulebyid, 3322 (void *)&ipkey, &where); 3323 ASSERT(ip == NULL); 3324 3325 ip = avl_nearest(&php->iph_rulebyid, where, AVL_AFTER); 3326 3327 if (ip == NULL) 3328 break; 3329 3330 if (ip->ipsp_index != policy_index) { 3331 ASSERT(ip->ipsp_index > policy_index); 3332 break; 3333 } 3334 3335 IPPOL_UNCHAIN(php, ip); 3336 found = B_TRUE; 3337 } 3338 3339 if (found) { 3340 php->iph_gen++; 3341 ipsec_update_present_flags(); 3342 } 3343 3344 rw_exit(&php->iph_lock); 3345 3346 return (found ? 0 : ENOENT); 3347 } 3348 3349 /* 3350 * Given a constructed ipsec_policy_t policy rule, see if it can be entered 3351 * into the correct policy ruleset. As a side-effect, it sets the hash 3352 * entries on "ipp"'s ipsp_pol_hval. 3353 * 3354 * Returns B_TRUE if it can be entered, B_FALSE if it can't be (because a 3355 * duplicate policy exists with exactly the same selectors), or an icmp 3356 * rule exists with a different encryption/authentication action. 3357 */ 3358 boolean_t 3359 ipsec_check_policy(ipsec_policy_head_t *php, ipsec_policy_t *ipp, int direction) 3360 { 3361 ipsec_policy_root_t *pr = &php->iph_root[direction]; 3362 int af = -1; 3363 ipsec_policy_t *p2, *head; 3364 uint8_t check_proto; 3365 ipsec_selkey_t *selkey = &ipp->ipsp_sel->ipsl_key; 3366 uint32_t valid = selkey->ipsl_valid; 3367 3368 if (valid & IPSL_IPV6) { 3369 ASSERT(!(valid & IPSL_IPV4)); 3370 af = IPSEC_AF_V6; 3371 check_proto = IPPROTO_ICMPV6; 3372 } else { 3373 ASSERT(valid & IPSL_IPV4); 3374 af = IPSEC_AF_V4; 3375 check_proto = IPPROTO_ICMP; 3376 } 3377 3378 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3379 3380 /* 3381 * Double-check that we don't have any duplicate selectors here. 3382 * Because selectors are interned below, we need only compare pointers 3383 * for equality. 3384 */ 3385 if (selkey->ipsl_sel_hval == IPSEC_SEL_NOHASH) { 3386 head = pr->ipr_nonhash[af]; 3387 } else { 3388 selkey->ipsl_pol_hval = 3389 (selkey->ipsl_valid & IPSL_IPV4) ? 3390 IPSEC_IPV4_HASH(selkey->ipsl_remote.ipsad_v4, 3391 pr->ipr_nchains) : 3392 IPSEC_IPV6_HASH(selkey->ipsl_remote.ipsad_v6, 3393 pr->ipr_nchains); 3394 3395 head = pr->ipr_hash[selkey->ipsl_pol_hval].hash_head; 3396 } 3397 3398 for (p2 = head; p2 != NULL; p2 = p2->ipsp_hash.hash_next) { 3399 if (p2->ipsp_sel == ipp->ipsp_sel) 3400 return (B_FALSE); 3401 } 3402 3403 /* 3404 * If it's ICMP and not a drop or pass rule, run through the ICMP 3405 * rules and make sure the action is either new or the same as any 3406 * other actions. We don't have to check the full chain because 3407 * discard and bypass will override all other actions 3408 */ 3409 3410 if (valid & IPSL_PROTOCOL && 3411 selkey->ipsl_proto == check_proto && 3412 (ipp->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_APPLY)) { 3413 3414 for (p2 = head; p2 != NULL; p2 = p2->ipsp_hash.hash_next) { 3415 3416 if (p2->ipsp_sel->ipsl_key.ipsl_valid & IPSL_PROTOCOL && 3417 p2->ipsp_sel->ipsl_key.ipsl_proto == check_proto && 3418 (p2->ipsp_act->ipa_act.ipa_type == 3419 IPSEC_ACT_APPLY)) { 3420 return (ipsec_compare_action(p2, ipp)); 3421 } 3422 } 3423 } 3424 3425 return (B_TRUE); 3426 } 3427 3428 /* 3429 * compare the action chains of two policies for equality 3430 * B_TRUE -> effective equality 3431 */ 3432 3433 static boolean_t 3434 ipsec_compare_action(ipsec_policy_t *p1, ipsec_policy_t *p2) 3435 { 3436 3437 ipsec_action_t *act1, *act2; 3438 3439 /* We have a valid rule. Let's compare the actions */ 3440 if (p1->ipsp_act == p2->ipsp_act) { 3441 /* same action. We are good */ 3442 return (B_TRUE); 3443 } 3444 3445 /* we have to walk the chain */ 3446 3447 act1 = p1->ipsp_act; 3448 act2 = p2->ipsp_act; 3449 3450 while (act1 != NULL && act2 != NULL) { 3451 3452 /* otherwise, Are we close enough? */ 3453 if (act1->ipa_allow_clear != act2->ipa_allow_clear || 3454 act1->ipa_want_ah != act2->ipa_want_ah || 3455 act1->ipa_want_esp != act2->ipa_want_esp || 3456 act1->ipa_want_se != act2->ipa_want_se) { 3457 /* Nope, we aren't */ 3458 return (B_FALSE); 3459 } 3460 3461 if (act1->ipa_want_ah) { 3462 if (act1->ipa_act.ipa_apply.ipp_auth_alg != 3463 act2->ipa_act.ipa_apply.ipp_auth_alg) { 3464 return (B_FALSE); 3465 } 3466 3467 if (act1->ipa_act.ipa_apply.ipp_ah_minbits != 3468 act2->ipa_act.ipa_apply.ipp_ah_minbits || 3469 act1->ipa_act.ipa_apply.ipp_ah_maxbits != 3470 act2->ipa_act.ipa_apply.ipp_ah_maxbits) { 3471 return (B_FALSE); 3472 } 3473 } 3474 3475 if (act1->ipa_want_esp) { 3476 if (act1->ipa_act.ipa_apply.ipp_use_esp != 3477 act2->ipa_act.ipa_apply.ipp_use_esp || 3478 act1->ipa_act.ipa_apply.ipp_use_espa != 3479 act2->ipa_act.ipa_apply.ipp_use_espa) { 3480 return (B_FALSE); 3481 } 3482 3483 if (act1->ipa_act.ipa_apply.ipp_use_esp) { 3484 if (act1->ipa_act.ipa_apply.ipp_encr_alg != 3485 act2->ipa_act.ipa_apply.ipp_encr_alg) { 3486 return (B_FALSE); 3487 } 3488 3489 if (act1->ipa_act.ipa_apply.ipp_espe_minbits != 3490 act2->ipa_act.ipa_apply.ipp_espe_minbits || 3491 act1->ipa_act.ipa_apply.ipp_espe_maxbits != 3492 act2->ipa_act.ipa_apply.ipp_espe_maxbits) { 3493 return (B_FALSE); 3494 } 3495 } 3496 3497 if (act1->ipa_act.ipa_apply.ipp_use_espa) { 3498 if (act1->ipa_act.ipa_apply.ipp_esp_auth_alg != 3499 act2->ipa_act.ipa_apply.ipp_esp_auth_alg) { 3500 return (B_FALSE); 3501 } 3502 3503 if (act1->ipa_act.ipa_apply.ipp_espa_minbits != 3504 act2->ipa_act.ipa_apply.ipp_espa_minbits || 3505 act1->ipa_act.ipa_apply.ipp_espa_maxbits != 3506 act2->ipa_act.ipa_apply.ipp_espa_maxbits) { 3507 return (B_FALSE); 3508 } 3509 } 3510 3511 } 3512 3513 act1 = act1->ipa_next; 3514 act2 = act2->ipa_next; 3515 } 3516 3517 if (act1 != NULL || act2 != NULL) { 3518 return (B_FALSE); 3519 } 3520 3521 return (B_TRUE); 3522 } 3523 3524 3525 /* 3526 * Given a constructed ipsec_policy_t policy rule, enter it into 3527 * the correct policy ruleset. 3528 * 3529 * ipsec_check_policy() is assumed to have succeeded first (to check for 3530 * duplicates). 3531 */ 3532 void 3533 ipsec_enter_policy(ipsec_policy_head_t *php, ipsec_policy_t *ipp, int direction) 3534 { 3535 ipsec_policy_root_t *pr = &php->iph_root[direction]; 3536 ipsec_selkey_t *selkey = &ipp->ipsp_sel->ipsl_key; 3537 uint32_t valid = selkey->ipsl_valid; 3538 uint32_t hval = selkey->ipsl_pol_hval; 3539 int af = -1; 3540 3541 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3542 3543 if (valid & IPSL_IPV6) { 3544 ASSERT(!(valid & IPSL_IPV4)); 3545 af = IPSEC_AF_V6; 3546 } else { 3547 ASSERT(valid & IPSL_IPV4); 3548 af = IPSEC_AF_V4; 3549 } 3550 3551 php->iph_gen++; 3552 3553 if (hval == IPSEC_SEL_NOHASH) { 3554 HASHLIST_INSERT(ipp, ipsp_hash, pr->ipr_nonhash[af]); 3555 } else { 3556 HASH_LOCK(pr->ipr_hash, hval); 3557 HASH_INSERT(ipp, ipsp_hash, pr->ipr_hash, hval); 3558 HASH_UNLOCK(pr->ipr_hash, hval); 3559 } 3560 3561 ipsec_insert_always(&php->iph_rulebyid, ipp); 3562 3563 ipsec_update_present_flags(); 3564 } 3565 3566 static void 3567 ipsec_ipr_flush(ipsec_policy_head_t *php, ipsec_policy_root_t *ipr) 3568 { 3569 ipsec_policy_t *ip, *nip; 3570 3571 int af, chain, nchain; 3572 3573 for (af = 0; af < IPSEC_NAF; af++) { 3574 for (ip = ipr->ipr_nonhash[af]; ip != NULL; ip = nip) { 3575 nip = ip->ipsp_hash.hash_next; 3576 IPPOL_UNCHAIN(php, ip); 3577 } 3578 ipr->ipr_nonhash[af] = NULL; 3579 } 3580 nchain = ipr->ipr_nchains; 3581 3582 for (chain = 0; chain < nchain; chain++) { 3583 for (ip = ipr->ipr_hash[chain].hash_head; ip != NULL; 3584 ip = nip) { 3585 nip = ip->ipsp_hash.hash_next; 3586 IPPOL_UNCHAIN(php, ip); 3587 } 3588 ipr->ipr_hash[chain].hash_head = NULL; 3589 } 3590 } 3591 3592 void 3593 ipsec_polhead_flush(ipsec_policy_head_t *php) 3594 { 3595 int dir; 3596 3597 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3598 3599 for (dir = 0; dir < IPSEC_NTYPES; dir++) 3600 ipsec_ipr_flush(php, &php->iph_root[dir]); 3601 3602 ipsec_update_present_flags(); 3603 } 3604 3605 void 3606 ipsec_polhead_free(ipsec_policy_head_t *php) 3607 { 3608 int dir; 3609 3610 ASSERT(php->iph_refs == 0); 3611 rw_enter(&php->iph_lock, RW_WRITER); 3612 ipsec_polhead_flush(php); 3613 rw_exit(&php->iph_lock); 3614 rw_destroy(&php->iph_lock); 3615 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 3616 ipsec_policy_root_t *ipr = &php->iph_root[dir]; 3617 int chain; 3618 3619 for (chain = 0; chain < ipr->ipr_nchains; chain++) 3620 mutex_destroy(&(ipr->ipr_hash[chain].hash_lock)); 3621 3622 } 3623 ipsec_polhead_free_table(php); 3624 kmem_free(php, sizeof (*php)); 3625 } 3626 3627 static void 3628 ipsec_ipr_init(ipsec_policy_root_t *ipr) 3629 { 3630 int af; 3631 3632 ipr->ipr_nchains = 0; 3633 ipr->ipr_hash = NULL; 3634 3635 for (af = 0; af < IPSEC_NAF; af++) { 3636 ipr->ipr_nonhash[af] = NULL; 3637 } 3638 } 3639 3640 ipsec_policy_head_t * 3641 ipsec_polhead_create(void) 3642 { 3643 ipsec_policy_head_t *php; 3644 3645 php = kmem_alloc(sizeof (*php), KM_NOSLEEP); 3646 if (php == NULL) 3647 return (php); 3648 3649 rw_init(&php->iph_lock, NULL, RW_DEFAULT, NULL); 3650 php->iph_refs = 1; 3651 php->iph_gen = 0; 3652 3653 ipsec_ipr_init(&php->iph_root[IPSEC_TYPE_INBOUND]); 3654 ipsec_ipr_init(&php->iph_root[IPSEC_TYPE_OUTBOUND]); 3655 3656 avl_create(&php->iph_rulebyid, ipsec_policy_cmpbyid, 3657 sizeof (ipsec_policy_t), offsetof(ipsec_policy_t, ipsp_byid)); 3658 3659 return (php); 3660 } 3661 3662 /* 3663 * Clone the policy head into a new polhead; release one reference to the 3664 * old one and return the only reference to the new one. 3665 * If the old one had a refcount of 1, just return it. 3666 */ 3667 ipsec_policy_head_t * 3668 ipsec_polhead_split(ipsec_policy_head_t *php) 3669 { 3670 ipsec_policy_head_t *nphp; 3671 3672 if (php == NULL) 3673 return (ipsec_polhead_create()); 3674 else if (php->iph_refs == 1) 3675 return (php); 3676 3677 nphp = ipsec_polhead_create(); 3678 if (nphp == NULL) 3679 return (NULL); 3680 3681 if (ipsec_copy_polhead(php, nphp) != 0) { 3682 ipsec_polhead_free(nphp); 3683 return (NULL); 3684 } 3685 IPPH_REFRELE(php); 3686 return (nphp); 3687 } 3688 3689 /* 3690 * When sending a response to a ICMP request or generating a RST 3691 * in the TCP case, the outbound packets need to go at the same level 3692 * of protection as the incoming ones i.e we associate our outbound 3693 * policy with how the packet came in. We call this after we have 3694 * accepted the incoming packet which may or may not have been in 3695 * clear and hence we are sending the reply back with the policy 3696 * matching the incoming datagram's policy. 3697 * 3698 * NOTE : This technology serves two purposes : 3699 * 3700 * 1) If we have multiple outbound policies, we send out a reply 3701 * matching with how it came in rather than matching the outbound 3702 * policy. 3703 * 3704 * 2) For assymetric policies, we want to make sure that incoming 3705 * and outgoing has the same level of protection. Assymetric 3706 * policies exist only with global policy where we may not have 3707 * both outbound and inbound at the same time. 3708 * 3709 * NOTE2: This function is called by cleartext cases, so it needs to be 3710 * in IP proper. 3711 */ 3712 boolean_t 3713 ipsec_in_to_out(mblk_t *ipsec_mp, ipha_t *ipha, ip6_t *ip6h) 3714 { 3715 ipsec_in_t *ii; 3716 ipsec_out_t *io; 3717 boolean_t v4; 3718 mblk_t *mp; 3719 boolean_t secure, attach_if; 3720 uint_t ifindex; 3721 ipsec_selector_t sel; 3722 ipsec_action_t *reflect_action = NULL; 3723 zoneid_t zoneid; 3724 3725 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 3726 3727 bzero((void*)&sel, sizeof (sel)); 3728 3729 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 3730 3731 mp = ipsec_mp->b_cont; 3732 ASSERT(mp != NULL); 3733 3734 if (ii->ipsec_in_action != NULL) { 3735 /* transfer reference.. */ 3736 reflect_action = ii->ipsec_in_action; 3737 ii->ipsec_in_action = NULL; 3738 } else if (!ii->ipsec_in_loopback) 3739 reflect_action = ipsec_in_to_out_action(ii); 3740 secure = ii->ipsec_in_secure; 3741 attach_if = ii->ipsec_in_attach_if; 3742 ifindex = ii->ipsec_in_ill_index; 3743 zoneid = ii->ipsec_in_zoneid; 3744 ASSERT(zoneid != ALL_ZONES); 3745 v4 = ii->ipsec_in_v4; 3746 3747 ipsec_in_release_refs(ii); 3748 3749 /* 3750 * The caller is going to send the datagram out which might 3751 * go on the wire or delivered locally through ip_wput_local. 3752 * 3753 * 1) If it goes out on the wire, new associations will be 3754 * obtained. 3755 * 2) If it is delivered locally, ip_wput_local will convert 3756 * this IPSEC_OUT to a IPSEC_IN looking at the requests. 3757 */ 3758 3759 io = (ipsec_out_t *)ipsec_mp->b_rptr; 3760 bzero(io, sizeof (ipsec_out_t)); 3761 io->ipsec_out_type = IPSEC_OUT; 3762 io->ipsec_out_len = sizeof (ipsec_out_t); 3763 io->ipsec_out_frtn.free_func = ipsec_out_free; 3764 io->ipsec_out_frtn.free_arg = (char *)io; 3765 io->ipsec_out_act = reflect_action; 3766 3767 if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0)) 3768 return (B_FALSE); 3769 3770 io->ipsec_out_src_port = sel.ips_local_port; 3771 io->ipsec_out_dst_port = sel.ips_remote_port; 3772 io->ipsec_out_proto = sel.ips_protocol; 3773 io->ipsec_out_icmp_type = sel.ips_icmp_type; 3774 io->ipsec_out_icmp_code = sel.ips_icmp_code; 3775 3776 /* 3777 * Don't use global policy for this, as we want 3778 * to use the same protection that was applied to the inbound packet. 3779 */ 3780 io->ipsec_out_use_global_policy = B_FALSE; 3781 io->ipsec_out_proc_begin = B_FALSE; 3782 io->ipsec_out_secure = secure; 3783 io->ipsec_out_v4 = v4; 3784 io->ipsec_out_attach_if = attach_if; 3785 io->ipsec_out_ill_index = ifindex; 3786 io->ipsec_out_zoneid = zoneid; 3787 return (B_TRUE); 3788 } 3789 3790 mblk_t * 3791 ipsec_in_tag(mblk_t *mp, mblk_t *cont) 3792 { 3793 ipsec_in_t *ii = (ipsec_in_t *)mp->b_rptr; 3794 ipsec_in_t *nii; 3795 mblk_t *nmp; 3796 frtn_t nfrtn; 3797 3798 ASSERT(ii->ipsec_in_type == IPSEC_IN); 3799 ASSERT(ii->ipsec_in_len == sizeof (ipsec_in_t)); 3800 3801 nmp = ipsec_in_alloc(ii->ipsec_in_v4); 3802 3803 ASSERT(nmp->b_datap->db_type == M_CTL); 3804 ASSERT(nmp->b_wptr == (nmp->b_rptr + sizeof (ipsec_info_t))); 3805 3806 /* 3807 * Bump refcounts. 3808 */ 3809 if (ii->ipsec_in_ah_sa != NULL) 3810 IPSA_REFHOLD(ii->ipsec_in_ah_sa); 3811 if (ii->ipsec_in_esp_sa != NULL) 3812 IPSA_REFHOLD(ii->ipsec_in_esp_sa); 3813 if (ii->ipsec_in_policy != NULL) 3814 IPPH_REFHOLD(ii->ipsec_in_policy); 3815 3816 /* 3817 * Copy everything, but preserve the free routine provided by 3818 * ipsec_in_alloc(). 3819 */ 3820 nii = (ipsec_in_t *)nmp->b_rptr; 3821 nfrtn = nii->ipsec_in_frtn; 3822 bcopy(ii, nii, sizeof (*ii)); 3823 nii->ipsec_in_frtn = nfrtn; 3824 3825 nmp->b_cont = cont; 3826 3827 return (nmp); 3828 } 3829 3830 mblk_t * 3831 ipsec_out_tag(mblk_t *mp, mblk_t *cont) 3832 { 3833 ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; 3834 ipsec_out_t *nio; 3835 mblk_t *nmp; 3836 frtn_t nfrtn; 3837 3838 ASSERT(io->ipsec_out_type == IPSEC_OUT); 3839 ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t)); 3840 3841 nmp = ipsec_alloc_ipsec_out(); 3842 if (nmp == NULL) { 3843 ip_drop_packet_chain(cont, B_FALSE, NULL, NULL, 3844 &ipdrops_spd_nomem, &spd_dropper); 3845 return (NULL); 3846 } 3847 ASSERT(nmp->b_datap->db_type == M_CTL); 3848 ASSERT(nmp->b_wptr == (nmp->b_rptr + sizeof (ipsec_info_t))); 3849 3850 /* 3851 * Bump refcounts. 3852 */ 3853 if (io->ipsec_out_ah_sa != NULL) 3854 IPSA_REFHOLD(io->ipsec_out_ah_sa); 3855 if (io->ipsec_out_esp_sa != NULL) 3856 IPSA_REFHOLD(io->ipsec_out_esp_sa); 3857 if (io->ipsec_out_polhead != NULL) 3858 IPPH_REFHOLD(io->ipsec_out_polhead); 3859 if (io->ipsec_out_policy != NULL) 3860 IPPOL_REFHOLD(io->ipsec_out_policy); 3861 if (io->ipsec_out_act != NULL) 3862 IPACT_REFHOLD(io->ipsec_out_act); 3863 if (io->ipsec_out_latch != NULL) 3864 IPLATCH_REFHOLD(io->ipsec_out_latch); 3865 if (io->ipsec_out_cred != NULL) 3866 crhold(io->ipsec_out_cred); 3867 3868 /* 3869 * Copy everything, but preserve the free routine provided by 3870 * ipsec_alloc_ipsec_out(). 3871 */ 3872 nio = (ipsec_out_t *)nmp->b_rptr; 3873 nfrtn = nio->ipsec_out_frtn; 3874 bcopy(io, nio, sizeof (*io)); 3875 nio->ipsec_out_frtn = nfrtn; 3876 3877 nmp->b_cont = cont; 3878 3879 return (nmp); 3880 } 3881 3882 static void 3883 ipsec_out_release_refs(ipsec_out_t *io) 3884 { 3885 ASSERT(io->ipsec_out_type == IPSEC_OUT); 3886 ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t)); 3887 3888 /* Note: IPSA_REFRELE is multi-line macro */ 3889 if (io->ipsec_out_ah_sa != NULL) 3890 IPSA_REFRELE(io->ipsec_out_ah_sa); 3891 if (io->ipsec_out_esp_sa != NULL) 3892 IPSA_REFRELE(io->ipsec_out_esp_sa); 3893 if (io->ipsec_out_polhead != NULL) 3894 IPPH_REFRELE(io->ipsec_out_polhead); 3895 if (io->ipsec_out_policy != NULL) 3896 IPPOL_REFRELE(io->ipsec_out_policy); 3897 if (io->ipsec_out_act != NULL) 3898 IPACT_REFRELE(io->ipsec_out_act); 3899 if (io->ipsec_out_cred != NULL) { 3900 crfree(io->ipsec_out_cred); 3901 io->ipsec_out_cred = NULL; 3902 } 3903 if (io->ipsec_out_latch) { 3904 IPLATCH_REFRELE(io->ipsec_out_latch); 3905 io->ipsec_out_latch = NULL; 3906 } 3907 } 3908 3909 static void 3910 ipsec_out_free(void *arg) 3911 { 3912 ipsec_out_t *io = (ipsec_out_t *)arg; 3913 ipsec_out_release_refs(io); 3914 kmem_cache_free(ipsec_info_cache, arg); 3915 } 3916 3917 static void 3918 ipsec_in_release_refs(ipsec_in_t *ii) 3919 { 3920 /* Note: IPSA_REFRELE is multi-line macro */ 3921 if (ii->ipsec_in_ah_sa != NULL) 3922 IPSA_REFRELE(ii->ipsec_in_ah_sa); 3923 if (ii->ipsec_in_esp_sa != NULL) 3924 IPSA_REFRELE(ii->ipsec_in_esp_sa); 3925 if (ii->ipsec_in_policy != NULL) 3926 IPPH_REFRELE(ii->ipsec_in_policy); 3927 if (ii->ipsec_in_da != NULL) { 3928 freeb(ii->ipsec_in_da); 3929 ii->ipsec_in_da = NULL; 3930 } 3931 } 3932 3933 static void 3934 ipsec_in_free(void *arg) 3935 { 3936 ipsec_in_t *ii = (ipsec_in_t *)arg; 3937 ipsec_in_release_refs(ii); 3938 kmem_cache_free(ipsec_info_cache, arg); 3939 } 3940 3941 /* 3942 * This is called only for outbound datagrams if the datagram needs to 3943 * go out secure. A NULL mp can be passed to get an ipsec_out. This 3944 * facility is used by ip_unbind. 3945 * 3946 * NOTE : o As the data part could be modified by ipsec_out_process etc. 3947 * we can't make it fast by calling a dup. 3948 */ 3949 mblk_t * 3950 ipsec_alloc_ipsec_out() 3951 { 3952 mblk_t *ipsec_mp; 3953 3954 ipsec_out_t *io = kmem_cache_alloc(ipsec_info_cache, KM_NOSLEEP); 3955 3956 if (io == NULL) 3957 return (NULL); 3958 3959 bzero(io, sizeof (ipsec_out_t)); 3960 3961 io->ipsec_out_type = IPSEC_OUT; 3962 io->ipsec_out_len = sizeof (ipsec_out_t); 3963 io->ipsec_out_frtn.free_func = ipsec_out_free; 3964 io->ipsec_out_frtn.free_arg = (char *)io; 3965 3966 /* 3967 * Set the zoneid to ALL_ZONES which is used as an invalid value. Code 3968 * using ipsec_out_zoneid should assert that the zoneid has been set to 3969 * a sane value. 3970 */ 3971 io->ipsec_out_zoneid = ALL_ZONES; 3972 3973 ipsec_mp = desballoc((uint8_t *)io, sizeof (ipsec_info_t), BPRI_HI, 3974 &io->ipsec_out_frtn); 3975 if (ipsec_mp == NULL) { 3976 ipsec_out_free(io); 3977 3978 return (NULL); 3979 } 3980 ipsec_mp->b_datap->db_type = M_CTL; 3981 ipsec_mp->b_wptr = ipsec_mp->b_rptr + sizeof (ipsec_info_t); 3982 3983 return (ipsec_mp); 3984 } 3985 3986 /* 3987 * Attach an IPSEC_OUT; use pol for policy if it is non-null. 3988 * Otherwise initialize using conn. 3989 * 3990 * If pol is non-null, we consume a reference to it. 3991 */ 3992 mblk_t * 3993 ipsec_attach_ipsec_out(mblk_t *mp, conn_t *connp, ipsec_policy_t *pol, 3994 uint8_t proto) 3995 { 3996 mblk_t *ipsec_mp; 3997 3998 ASSERT((pol != NULL) || (connp != NULL)); 3999 4000 ipsec_mp = ipsec_alloc_ipsec_out(); 4001 if (ipsec_mp == NULL) { 4002 ipsec_rl_strlog(IP_MOD_ID, 0, 0, SL_ERROR|SL_NOTE, 4003 "ipsec_attach_ipsec_out: Allocation failure\n"); 4004 ip_drop_packet(mp, B_FALSE, NULL, NULL, &ipdrops_spd_nomem, 4005 &spd_dropper); 4006 return (NULL); 4007 } 4008 ipsec_mp->b_cont = mp; 4009 return (ipsec_init_ipsec_out(ipsec_mp, connp, pol, proto)); 4010 } 4011 4012 /* 4013 * Initialize the IPSEC_OUT (ipsec_mp) using pol if it is non-null. 4014 * Otherwise initialize using conn. 4015 * 4016 * If pol is non-null, we consume a reference to it. 4017 */ 4018 mblk_t * 4019 ipsec_init_ipsec_out(mblk_t *ipsec_mp, conn_t *connp, ipsec_policy_t *pol, 4020 uint8_t proto) 4021 { 4022 mblk_t *mp; 4023 ipsec_out_t *io; 4024 ipsec_policy_t *p; 4025 ipha_t *ipha; 4026 ip6_t *ip6h; 4027 4028 ASSERT((pol != NULL) || (connp != NULL)); 4029 4030 /* 4031 * If mp is NULL, we won't/should not be using it. 4032 */ 4033 mp = ipsec_mp->b_cont; 4034 4035 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 4036 ASSERT(ipsec_mp->b_wptr == (ipsec_mp->b_rptr + sizeof (ipsec_info_t))); 4037 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4038 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4039 ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t)); 4040 io->ipsec_out_latch = NULL; 4041 /* 4042 * Set the zoneid when we have the connp. 4043 * Otherwise, we're called from ip_wput_attach_policy() who will take 4044 * care of setting the zoneid. 4045 */ 4046 if (connp != NULL) 4047 io->ipsec_out_zoneid = connp->conn_zoneid; 4048 4049 if (mp != NULL) { 4050 ipha = (ipha_t *)mp->b_rptr; 4051 if (IPH_HDR_VERSION(ipha) == IP_VERSION) { 4052 io->ipsec_out_v4 = B_TRUE; 4053 ip6h = NULL; 4054 } else { 4055 io->ipsec_out_v4 = B_FALSE; 4056 ip6h = (ip6_t *)ipha; 4057 ipha = NULL; 4058 } 4059 } else { 4060 ASSERT(connp != NULL && connp->conn_policy_cached); 4061 ip6h = NULL; 4062 ipha = NULL; 4063 io->ipsec_out_v4 = !connp->conn_pkt_isv6; 4064 } 4065 4066 p = NULL; 4067 4068 /* 4069 * Take latched policies over global policy. Check here again for 4070 * this, in case we had conn_latch set while the packet was flying 4071 * around in IP. 4072 */ 4073 if (connp != NULL && connp->conn_latch != NULL) { 4074 p = connp->conn_latch->ipl_out_policy; 4075 io->ipsec_out_latch = connp->conn_latch; 4076 IPLATCH_REFHOLD(connp->conn_latch); 4077 if (p != NULL) { 4078 IPPOL_REFHOLD(p); 4079 } 4080 io->ipsec_out_src_port = connp->conn_lport; 4081 io->ipsec_out_dst_port = connp->conn_fport; 4082 io->ipsec_out_icmp_type = io->ipsec_out_icmp_code = 0; 4083 if (pol != NULL) 4084 IPPOL_REFRELE(pol); 4085 } else if (pol != NULL) { 4086 ipsec_selector_t sel; 4087 4088 bzero((void*)&sel, sizeof (sel)); 4089 4090 p = pol; 4091 /* 4092 * conn does not have the port information. Get 4093 * it from the packet. 4094 */ 4095 4096 if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0)) { 4097 /* Callee did ip_drop_packet(). */ 4098 return (NULL); 4099 } 4100 io->ipsec_out_src_port = sel.ips_local_port; 4101 io->ipsec_out_dst_port = sel.ips_remote_port; 4102 io->ipsec_out_icmp_type = sel.ips_icmp_type; 4103 io->ipsec_out_icmp_code = sel.ips_icmp_code; 4104 } 4105 4106 io->ipsec_out_proto = proto; 4107 io->ipsec_out_use_global_policy = B_TRUE; 4108 io->ipsec_out_secure = (p != NULL); 4109 io->ipsec_out_policy = p; 4110 4111 if (p == NULL) { 4112 if (connp->conn_policy != NULL) { 4113 io->ipsec_out_secure = B_TRUE; 4114 ASSERT(io->ipsec_out_latch == NULL); 4115 ASSERT(io->ipsec_out_use_global_policy == B_TRUE); 4116 io->ipsec_out_need_policy = B_TRUE; 4117 ASSERT(io->ipsec_out_polhead == NULL); 4118 IPPH_REFHOLD(connp->conn_policy); 4119 io->ipsec_out_polhead = connp->conn_policy; 4120 } 4121 } else { 4122 /* Handle explicit drop action. */ 4123 if (p->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_DISCARD || 4124 p->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_REJECT) { 4125 ip_drop_packet(ipsec_mp, B_FALSE, NULL, NULL, 4126 &ipdrops_spd_explicit, &spd_dropper); 4127 ipsec_mp = NULL; 4128 } 4129 } 4130 4131 return (ipsec_mp); 4132 } 4133 4134 /* 4135 * Allocate an IPSEC_IN mblk. This will be prepended to an inbound datagram 4136 * and keep track of what-if-any IPsec processing will be applied to the 4137 * datagram. 4138 */ 4139 mblk_t * 4140 ipsec_in_alloc(boolean_t isv4) 4141 { 4142 mblk_t *ipsec_in; 4143 ipsec_in_t *ii = kmem_cache_alloc(ipsec_info_cache, KM_NOSLEEP); 4144 4145 if (ii == NULL) 4146 return (NULL); 4147 4148 bzero(ii, sizeof (ipsec_info_t)); 4149 ii->ipsec_in_type = IPSEC_IN; 4150 ii->ipsec_in_len = sizeof (ipsec_in_t); 4151 4152 ii->ipsec_in_v4 = isv4; 4153 ii->ipsec_in_secure = B_TRUE; 4154 4155 ii->ipsec_in_frtn.free_func = ipsec_in_free; 4156 ii->ipsec_in_frtn.free_arg = (char *)ii; 4157 4158 ipsec_in = desballoc((uint8_t *)ii, sizeof (ipsec_info_t), BPRI_HI, 4159 &ii->ipsec_in_frtn); 4160 if (ipsec_in == NULL) { 4161 ip1dbg(("ipsec_in_alloc: IPSEC_IN allocation failure.\n")); 4162 ipsec_in_free(ii); 4163 return (NULL); 4164 } 4165 4166 ipsec_in->b_datap->db_type = M_CTL; 4167 ipsec_in->b_wptr += sizeof (ipsec_info_t); 4168 4169 return (ipsec_in); 4170 } 4171 4172 /* 4173 * This is called from ip_wput_local when a packet which needs 4174 * security is looped back, to convert the IPSEC_OUT to a IPSEC_IN 4175 * before fanout, where the policy check happens. In most of the 4176 * cases, IPSEC processing has *never* been done. There is one case 4177 * (ip_wput_ire_fragmentit -> ip_wput_frag -> icmp_frag_needed) where 4178 * the packet is destined for localhost, IPSEC processing has already 4179 * been done. 4180 * 4181 * Future: This could happen after SA selection has occurred for 4182 * outbound.. which will tell us who the src and dst identities are.. 4183 * Then it's just a matter of splicing the ah/esp SA pointers from the 4184 * ipsec_out_t to the ipsec_in_t. 4185 */ 4186 void 4187 ipsec_out_to_in(mblk_t *ipsec_mp) 4188 { 4189 ipsec_in_t *ii; 4190 ipsec_out_t *io; 4191 ipsec_policy_t *pol; 4192 ipsec_action_t *act; 4193 boolean_t v4, icmp_loopback; 4194 zoneid_t zoneid; 4195 4196 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 4197 4198 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4199 4200 v4 = io->ipsec_out_v4; 4201 zoneid = io->ipsec_out_zoneid; 4202 icmp_loopback = io->ipsec_out_icmp_loopback; 4203 4204 act = io->ipsec_out_act; 4205 if (act == NULL) { 4206 pol = io->ipsec_out_policy; 4207 if (pol != NULL) { 4208 act = pol->ipsp_act; 4209 IPACT_REFHOLD(act); 4210 } 4211 } 4212 io->ipsec_out_act = NULL; 4213 4214 ipsec_out_release_refs(io); 4215 4216 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 4217 bzero(ii, sizeof (ipsec_in_t)); 4218 ii->ipsec_in_type = IPSEC_IN; 4219 ii->ipsec_in_len = sizeof (ipsec_in_t); 4220 ii->ipsec_in_loopback = B_TRUE; 4221 ii->ipsec_in_frtn.free_func = ipsec_in_free; 4222 ii->ipsec_in_frtn.free_arg = (char *)ii; 4223 ii->ipsec_in_action = act; 4224 ii->ipsec_in_zoneid = zoneid; 4225 4226 /* 4227 * In most of the cases, we can't look at the ipsec_out_XXX_sa 4228 * because this never went through IPSEC processing. So, look at 4229 * the requests and infer whether it would have gone through 4230 * IPSEC processing or not. Initialize the "done" fields with 4231 * the requests. The possible values for "done" fields are : 4232 * 4233 * 1) zero, indicates that a particular preference was never 4234 * requested. 4235 * 2) non-zero, indicates that it could be IPSEC_PREF_REQUIRED/ 4236 * IPSEC_PREF_NEVER. If IPSEC_REQ_DONE is set, it means that 4237 * IPSEC processing has been completed. 4238 */ 4239 ii->ipsec_in_secure = B_TRUE; 4240 ii->ipsec_in_v4 = v4; 4241 ii->ipsec_in_icmp_loopback = icmp_loopback; 4242 ii->ipsec_in_attach_if = B_FALSE; 4243 } 4244 4245 /* 4246 * Consults global policy to see whether this datagram should 4247 * go out secure. If so it attaches a ipsec_mp in front and 4248 * returns. 4249 */ 4250 mblk_t * 4251 ip_wput_attach_policy(mblk_t *ipsec_mp, ipha_t *ipha, ip6_t *ip6h, ire_t *ire, 4252 conn_t *connp, boolean_t unspec_src, zoneid_t zoneid) 4253 { 4254 mblk_t *mp; 4255 ipsec_out_t *io = NULL; 4256 ipsec_selector_t sel; 4257 uint_t ill_index; 4258 boolean_t conn_dontroutex; 4259 boolean_t conn_multicast_loopx; 4260 boolean_t policy_present; 4261 4262 ASSERT((ipha != NULL && ip6h == NULL) || 4263 (ip6h != NULL && ipha == NULL)); 4264 4265 bzero((void*)&sel, sizeof (sel)); 4266 4267 if (ipha != NULL) 4268 policy_present = ipsec_outbound_v4_policy_present; 4269 else 4270 policy_present = ipsec_outbound_v6_policy_present; 4271 /* 4272 * Fast Path to see if there is any policy. 4273 */ 4274 if (!policy_present) { 4275 if (ipsec_mp->b_datap->db_type == M_CTL) { 4276 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4277 if (!io->ipsec_out_secure) { 4278 /* 4279 * If there is no global policy and ip_wput 4280 * or ip_wput_multicast has attached this mp 4281 * for multicast case, free the ipsec_mp and 4282 * return the original mp. 4283 */ 4284 mp = ipsec_mp->b_cont; 4285 freeb(ipsec_mp); 4286 ipsec_mp = mp; 4287 io = NULL; 4288 } 4289 ASSERT(io == NULL || !io->ipsec_out_tunnel); 4290 } 4291 if (((io == NULL) || (io->ipsec_out_polhead == NULL)) && 4292 ((connp == NULL) || (connp->conn_policy == NULL))) 4293 return (ipsec_mp); 4294 } 4295 4296 ill_index = 0; 4297 conn_multicast_loopx = conn_dontroutex = B_FALSE; 4298 mp = ipsec_mp; 4299 if (ipsec_mp->b_datap->db_type == M_CTL) { 4300 mp = ipsec_mp->b_cont; 4301 /* 4302 * This is a connection where we have some per-socket 4303 * policy or ip_wput has attached an ipsec_mp for 4304 * the multicast datagram. 4305 */ 4306 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4307 if (!io->ipsec_out_secure) { 4308 /* 4309 * This ipsec_mp was allocated in ip_wput or 4310 * ip_wput_multicast so that we will know the 4311 * value of ill_index, conn_dontroute, 4312 * conn_multicast_loop in the multicast case if 4313 * we inherit global policy here. 4314 */ 4315 ill_index = io->ipsec_out_ill_index; 4316 conn_dontroutex = io->ipsec_out_dontroute; 4317 conn_multicast_loopx = io->ipsec_out_multicast_loop; 4318 freeb(ipsec_mp); 4319 ipsec_mp = mp; 4320 io = NULL; 4321 } 4322 ASSERT(io == NULL || !io->ipsec_out_tunnel); 4323 } 4324 4325 if (ipha != NULL) { 4326 sel.ips_local_addr_v4 = (ipha->ipha_src != 0 ? 4327 ipha->ipha_src : ire->ire_src_addr); 4328 sel.ips_remote_addr_v4 = ip_get_dst(ipha); 4329 sel.ips_protocol = (uint8_t)ipha->ipha_protocol; 4330 sel.ips_isv4 = B_TRUE; 4331 } else { 4332 ushort_t hdr_len; 4333 uint8_t *nexthdrp; 4334 boolean_t is_fragment; 4335 4336 sel.ips_isv4 = B_FALSE; 4337 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4338 if (!unspec_src) 4339 sel.ips_local_addr_v6 = ire->ire_src_addr_v6; 4340 } else { 4341 sel.ips_local_addr_v6 = ip6h->ip6_src; 4342 } 4343 4344 sel.ips_remote_addr_v6 = ip_get_dst_v6(ip6h, &is_fragment); 4345 if (is_fragment) { 4346 /* 4347 * It's a packet fragment for a packet that 4348 * we have already processed (since IPsec processing 4349 * is done before fragmentation), so we don't 4350 * have to do policy checks again. Fragments can 4351 * come back to us for processing if they have 4352 * been queued up due to flow control. 4353 */ 4354 if (ipsec_mp->b_datap->db_type == M_CTL) { 4355 mp = ipsec_mp->b_cont; 4356 freeb(ipsec_mp); 4357 ipsec_mp = mp; 4358 } 4359 return (ipsec_mp); 4360 } 4361 4362 /* IPv6 common-case. */ 4363 sel.ips_protocol = ip6h->ip6_nxt; 4364 switch (ip6h->ip6_nxt) { 4365 case IPPROTO_TCP: 4366 case IPPROTO_UDP: 4367 case IPPROTO_SCTP: 4368 case IPPROTO_ICMPV6: 4369 break; 4370 default: 4371 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 4372 &hdr_len, &nexthdrp)) { 4373 BUMP_MIB(&ip6_mib, ipIfStatsOutDiscards); 4374 freemsg(ipsec_mp); /* Not IPsec-related drop. */ 4375 return (NULL); 4376 } 4377 sel.ips_protocol = *nexthdrp; 4378 break; 4379 } 4380 } 4381 4382 if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0)) { 4383 if (ipha != NULL) { 4384 BUMP_MIB(&ip_mib, ipIfStatsOutDiscards); 4385 } else { 4386 BUMP_MIB(&ip6_mib, ipIfStatsOutDiscards); 4387 } 4388 4389 /* Callee dropped the packet. */ 4390 return (NULL); 4391 } 4392 4393 if (io != NULL) { 4394 /* 4395 * We seem to have some local policy (we already have 4396 * an ipsec_out). Look at global policy and see 4397 * whether we have to inherit or not. 4398 */ 4399 io->ipsec_out_need_policy = B_FALSE; 4400 ipsec_mp = ipsec_apply_global_policy(ipsec_mp, connp, &sel); 4401 ASSERT((io->ipsec_out_policy != NULL) || 4402 (io->ipsec_out_act != NULL)); 4403 ASSERT(io->ipsec_out_need_policy == B_FALSE); 4404 return (ipsec_mp); 4405 } 4406 ipsec_mp = ipsec_attach_global_policy(mp, connp, &sel); 4407 if (ipsec_mp == NULL) 4408 return (mp); 4409 4410 /* 4411 * Copy the right port information. 4412 */ 4413 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 4414 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4415 4416 ASSERT(io->ipsec_out_need_policy == B_FALSE); 4417 ASSERT((io->ipsec_out_policy != NULL) || 4418 (io->ipsec_out_act != NULL)); 4419 io->ipsec_out_src_port = sel.ips_local_port; 4420 io->ipsec_out_dst_port = sel.ips_remote_port; 4421 io->ipsec_out_icmp_type = sel.ips_icmp_type; 4422 io->ipsec_out_icmp_code = sel.ips_icmp_code; 4423 /* 4424 * Set ill_index, conn_dontroute and conn_multicast_loop 4425 * for multicast datagrams. 4426 */ 4427 io->ipsec_out_ill_index = ill_index; 4428 io->ipsec_out_dontroute = conn_dontroutex; 4429 io->ipsec_out_multicast_loop = conn_multicast_loopx; 4430 4431 if (zoneid == ALL_ZONES) 4432 zoneid = GLOBAL_ZONEID; 4433 io->ipsec_out_zoneid = zoneid; 4434 return (ipsec_mp); 4435 } 4436 4437 /* 4438 * When appropriate, this function caches inbound and outbound policy 4439 * for this connection. 4440 * 4441 * XXX need to work out more details about per-interface policy and 4442 * caching here! 4443 * 4444 * XXX may want to split inbound and outbound caching for ill.. 4445 */ 4446 int 4447 ipsec_conn_cache_policy(conn_t *connp, boolean_t isv4) 4448 { 4449 boolean_t global_policy_present; 4450 4451 /* 4452 * There is no policy latching for ICMP sockets because we can't 4453 * decide on which policy to use until we see the packet and get 4454 * type/code selectors. 4455 */ 4456 if (connp->conn_ulp == IPPROTO_ICMP || 4457 connp->conn_ulp == IPPROTO_ICMPV6) { 4458 connp->conn_in_enforce_policy = 4459 connp->conn_out_enforce_policy = B_TRUE; 4460 if (connp->conn_latch != NULL) { 4461 IPLATCH_REFRELE(connp->conn_latch); 4462 connp->conn_latch = NULL; 4463 } 4464 connp->conn_flags |= IPCL_CHECK_POLICY; 4465 return (0); 4466 } 4467 4468 global_policy_present = isv4 ? 4469 (ipsec_outbound_v4_policy_present || 4470 ipsec_inbound_v4_policy_present) : 4471 (ipsec_outbound_v6_policy_present || 4472 ipsec_inbound_v6_policy_present); 4473 4474 if ((connp->conn_policy != NULL) || global_policy_present) { 4475 ipsec_selector_t sel; 4476 ipsec_policy_t *p; 4477 4478 if (connp->conn_latch == NULL && 4479 (connp->conn_latch = iplatch_create()) == NULL) { 4480 return (ENOMEM); 4481 } 4482 4483 sel.ips_protocol = connp->conn_ulp; 4484 sel.ips_local_port = connp->conn_lport; 4485 sel.ips_remote_port = connp->conn_fport; 4486 sel.ips_is_icmp_inv_acq = 0; 4487 sel.ips_isv4 = isv4; 4488 if (isv4) { 4489 sel.ips_local_addr_v4 = connp->conn_src; 4490 sel.ips_remote_addr_v4 = connp->conn_rem; 4491 } else { 4492 sel.ips_local_addr_v6 = connp->conn_srcv6; 4493 sel.ips_remote_addr_v6 = connp->conn_remv6; 4494 } 4495 4496 p = ipsec_find_policy(IPSEC_TYPE_INBOUND, connp, NULL, &sel); 4497 if (connp->conn_latch->ipl_in_policy != NULL) 4498 IPPOL_REFRELE(connp->conn_latch->ipl_in_policy); 4499 connp->conn_latch->ipl_in_policy = p; 4500 connp->conn_in_enforce_policy = (p != NULL); 4501 4502 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, &sel); 4503 if (connp->conn_latch->ipl_out_policy != NULL) 4504 IPPOL_REFRELE(connp->conn_latch->ipl_out_policy); 4505 connp->conn_latch->ipl_out_policy = p; 4506 connp->conn_out_enforce_policy = (p != NULL); 4507 4508 /* Clear the latched actions too, in case we're recaching. */ 4509 if (connp->conn_latch->ipl_out_action != NULL) 4510 IPACT_REFRELE(connp->conn_latch->ipl_out_action); 4511 if (connp->conn_latch->ipl_in_action != NULL) 4512 IPACT_REFRELE(connp->conn_latch->ipl_in_action); 4513 } 4514 4515 /* 4516 * We may or may not have policy for this endpoint. We still set 4517 * conn_policy_cached so that inbound datagrams don't have to look 4518 * at global policy as policy is considered latched for these 4519 * endpoints. We should not set conn_policy_cached until the conn 4520 * reflects the actual policy. If we *set* this before inheriting 4521 * the policy there is a window where the check 4522 * CONN_INBOUND_POLICY_PRESENT, will neither check with the policy 4523 * on the conn (because we have not yet copied the policy on to 4524 * conn and hence not set conn_in_enforce_policy) nor with the 4525 * global policy (because conn_policy_cached is already set). 4526 */ 4527 connp->conn_policy_cached = B_TRUE; 4528 if (connp->conn_in_enforce_policy) 4529 connp->conn_flags |= IPCL_CHECK_POLICY; 4530 return (0); 4531 } 4532 4533 void 4534 iplatch_free(ipsec_latch_t *ipl) 4535 { 4536 if (ipl->ipl_out_policy != NULL) 4537 IPPOL_REFRELE(ipl->ipl_out_policy); 4538 if (ipl->ipl_in_policy != NULL) 4539 IPPOL_REFRELE(ipl->ipl_in_policy); 4540 if (ipl->ipl_in_action != NULL) 4541 IPACT_REFRELE(ipl->ipl_in_action); 4542 if (ipl->ipl_out_action != NULL) 4543 IPACT_REFRELE(ipl->ipl_out_action); 4544 if (ipl->ipl_local_cid != NULL) 4545 IPSID_REFRELE(ipl->ipl_local_cid); 4546 if (ipl->ipl_remote_cid != NULL) 4547 IPSID_REFRELE(ipl->ipl_remote_cid); 4548 if (ipl->ipl_local_id != NULL) 4549 crfree(ipl->ipl_local_id); 4550 mutex_destroy(&ipl->ipl_lock); 4551 kmem_free(ipl, sizeof (*ipl)); 4552 } 4553 4554 ipsec_latch_t * 4555 iplatch_create() 4556 { 4557 ipsec_latch_t *ipl = kmem_alloc(sizeof (*ipl), KM_NOSLEEP); 4558 if (ipl == NULL) 4559 return (ipl); 4560 bzero(ipl, sizeof (*ipl)); 4561 mutex_init(&ipl->ipl_lock, NULL, MUTEX_DEFAULT, NULL); 4562 ipl->ipl_refcnt = 1; 4563 return (ipl); 4564 } 4565 4566 /* 4567 * Identity hash table. 4568 * 4569 * Identities are refcounted and "interned" into the hash table. 4570 * Only references coming from other objects (SA's, latching state) 4571 * are counted in ipsid_refcnt. 4572 * 4573 * Locking: IPSID_REFHOLD is safe only when (a) the object's hash bucket 4574 * is locked, (b) we know that the refcount must be > 0. 4575 * 4576 * The ipsid_next and ipsid_ptpn fields are only to be referenced or 4577 * modified when the bucket lock is held; in particular, we only 4578 * delete objects while holding the bucket lock, and we only increase 4579 * the refcount from 0 to 1 while the bucket lock is held. 4580 */ 4581 4582 #define IPSID_HASHSIZE 64 4583 4584 typedef struct ipsif_s 4585 { 4586 ipsid_t *ipsif_head; 4587 kmutex_t ipsif_lock; 4588 } ipsif_t; 4589 4590 ipsif_t ipsid_buckets[IPSID_HASHSIZE]; 4591 4592 /* 4593 * Hash function for ID hash table. 4594 */ 4595 static uint32_t 4596 ipsid_hash(int idtype, char *idstring) 4597 { 4598 uint32_t hval = idtype; 4599 unsigned char c; 4600 4601 while ((c = *idstring++) != 0) { 4602 hval = (hval << 4) | (hval >> 28); 4603 hval ^= c; 4604 } 4605 hval = hval ^ (hval >> 16); 4606 return (hval & (IPSID_HASHSIZE-1)); 4607 } 4608 4609 /* 4610 * Look up identity string in hash table. Return identity object 4611 * corresponding to the name -- either preexisting, or newly allocated. 4612 * 4613 * Return NULL if we need to allocate a new one and can't get memory. 4614 */ 4615 ipsid_t * 4616 ipsid_lookup(int idtype, char *idstring) 4617 { 4618 ipsid_t *retval; 4619 char *nstr; 4620 int idlen = strlen(idstring) + 1; 4621 4622 ipsif_t *bucket = &ipsid_buckets[ipsid_hash(idtype, idstring)]; 4623 4624 mutex_enter(&bucket->ipsif_lock); 4625 4626 for (retval = bucket->ipsif_head; retval != NULL; 4627 retval = retval->ipsid_next) { 4628 if (idtype != retval->ipsid_type) 4629 continue; 4630 if (bcmp(idstring, retval->ipsid_cid, idlen) != 0) 4631 continue; 4632 4633 IPSID_REFHOLD(retval); 4634 mutex_exit(&bucket->ipsif_lock); 4635 return (retval); 4636 } 4637 4638 retval = kmem_alloc(sizeof (*retval), KM_NOSLEEP); 4639 if (!retval) { 4640 mutex_exit(&bucket->ipsif_lock); 4641 return (NULL); 4642 } 4643 4644 nstr = kmem_alloc(idlen, KM_NOSLEEP); 4645 if (!nstr) { 4646 mutex_exit(&bucket->ipsif_lock); 4647 kmem_free(retval, sizeof (*retval)); 4648 return (NULL); 4649 } 4650 4651 retval->ipsid_refcnt = 1; 4652 retval->ipsid_next = bucket->ipsif_head; 4653 if (retval->ipsid_next != NULL) 4654 retval->ipsid_next->ipsid_ptpn = &retval->ipsid_next; 4655 retval->ipsid_ptpn = &bucket->ipsif_head; 4656 retval->ipsid_type = idtype; 4657 retval->ipsid_cid = nstr; 4658 bucket->ipsif_head = retval; 4659 bcopy(idstring, nstr, idlen); 4660 mutex_exit(&bucket->ipsif_lock); 4661 4662 return (retval); 4663 } 4664 4665 /* 4666 * Garbage collect the identity hash table. 4667 */ 4668 void 4669 ipsid_gc() 4670 { 4671 int i, len; 4672 ipsid_t *id, *nid; 4673 ipsif_t *bucket; 4674 4675 for (i = 0; i < IPSID_HASHSIZE; i++) { 4676 bucket = &ipsid_buckets[i]; 4677 mutex_enter(&bucket->ipsif_lock); 4678 for (id = bucket->ipsif_head; id != NULL; id = nid) { 4679 nid = id->ipsid_next; 4680 if (id->ipsid_refcnt == 0) { 4681 *id->ipsid_ptpn = nid; 4682 if (nid != NULL) 4683 nid->ipsid_ptpn = id->ipsid_ptpn; 4684 len = strlen(id->ipsid_cid) + 1; 4685 kmem_free(id->ipsid_cid, len); 4686 kmem_free(id, sizeof (*id)); 4687 } 4688 } 4689 mutex_exit(&bucket->ipsif_lock); 4690 } 4691 } 4692 4693 /* 4694 * Return true if two identities are the same. 4695 */ 4696 boolean_t 4697 ipsid_equal(ipsid_t *id1, ipsid_t *id2) 4698 { 4699 if (id1 == id2) 4700 return (B_TRUE); 4701 #ifdef DEBUG 4702 if ((id1 == NULL) || (id2 == NULL)) 4703 return (B_FALSE); 4704 /* 4705 * test that we're interning id's correctly.. 4706 */ 4707 ASSERT((strcmp(id1->ipsid_cid, id2->ipsid_cid) != 0) || 4708 (id1->ipsid_type != id2->ipsid_type)); 4709 #endif 4710 return (B_FALSE); 4711 } 4712 4713 /* 4714 * Initialize identity table; called during module initialization. 4715 */ 4716 static void 4717 ipsid_init() 4718 { 4719 ipsif_t *bucket; 4720 int i; 4721 4722 for (i = 0; i < IPSID_HASHSIZE; i++) { 4723 bucket = &ipsid_buckets[i]; 4724 mutex_init(&bucket->ipsif_lock, NULL, MUTEX_DEFAULT, NULL); 4725 } 4726 } 4727 4728 /* 4729 * Free identity table (preparatory to module unload) 4730 */ 4731 static void 4732 ipsid_fini() 4733 { 4734 ipsif_t *bucket; 4735 int i; 4736 4737 for (i = 0; i < IPSID_HASHSIZE; i++) { 4738 bucket = &ipsid_buckets[i]; 4739 mutex_destroy(&bucket->ipsif_lock); 4740 } 4741 } 4742 4743 /* 4744 * Update the minimum and maximum supported key sizes for the 4745 * specified algorithm. Must be called while holding the algorithms lock. 4746 */ 4747 void 4748 ipsec_alg_fix_min_max(ipsec_alginfo_t *alg, ipsec_algtype_t alg_type) 4749 { 4750 size_t crypto_min = (size_t)-1, crypto_max = 0; 4751 size_t cur_crypto_min, cur_crypto_max; 4752 boolean_t is_valid; 4753 crypto_mechanism_info_t *mech_infos; 4754 uint_t nmech_infos; 4755 int crypto_rc, i; 4756 crypto_mech_usage_t mask; 4757 4758 ASSERT(MUTEX_HELD(&alg_lock)); 4759 4760 /* 4761 * Compute the min, max, and default key sizes (in number of 4762 * increments to the default key size in bits) as defined 4763 * by the algorithm mappings. This range of key sizes is used 4764 * for policy related operations. The effective key sizes 4765 * supported by the framework could be more limited than 4766 * those defined for an algorithm. 4767 */ 4768 alg->alg_default_bits = alg->alg_key_sizes[0]; 4769 if (alg->alg_increment != 0) { 4770 /* key sizes are defined by range & increment */ 4771 alg->alg_minbits = alg->alg_key_sizes[1]; 4772 alg->alg_maxbits = alg->alg_key_sizes[2]; 4773 4774 alg->alg_default = SADB_ALG_DEFAULT_INCR(alg->alg_minbits, 4775 alg->alg_increment, alg->alg_default_bits); 4776 } else if (alg->alg_nkey_sizes == 0) { 4777 /* no specified key size for algorithm */ 4778 alg->alg_minbits = alg->alg_maxbits = 0; 4779 } else { 4780 /* key sizes are defined by enumeration */ 4781 alg->alg_minbits = (uint16_t)-1; 4782 alg->alg_maxbits = 0; 4783 4784 for (i = 0; i < alg->alg_nkey_sizes; i++) { 4785 if (alg->alg_key_sizes[i] < alg->alg_minbits) 4786 alg->alg_minbits = alg->alg_key_sizes[i]; 4787 if (alg->alg_key_sizes[i] > alg->alg_maxbits) 4788 alg->alg_maxbits = alg->alg_key_sizes[i]; 4789 } 4790 alg->alg_default = 0; 4791 } 4792 4793 if (!(alg->alg_flags & ALG_FLAG_VALID)) 4794 return; 4795 4796 /* 4797 * Mechanisms do not apply to the NULL encryption 4798 * algorithm, so simply return for this case. 4799 */ 4800 if (alg->alg_id == SADB_EALG_NULL) 4801 return; 4802 4803 /* 4804 * Find the min and max key sizes supported by the cryptographic 4805 * framework providers. 4806 */ 4807 4808 /* get the key sizes supported by the framework */ 4809 crypto_rc = crypto_get_all_mech_info(alg->alg_mech_type, 4810 &mech_infos, &nmech_infos, KM_SLEEP); 4811 if (crypto_rc != CRYPTO_SUCCESS || nmech_infos == 0) { 4812 alg->alg_flags &= ~ALG_FLAG_VALID; 4813 return; 4814 } 4815 4816 /* min and max key sizes supported by framework */ 4817 for (i = 0, is_valid = B_FALSE; i < nmech_infos; i++) { 4818 int unit_bits; 4819 4820 /* 4821 * Ignore entries that do not support the operations 4822 * needed for the algorithm type. 4823 */ 4824 if (alg_type == IPSEC_ALG_AUTH) 4825 mask = CRYPTO_MECH_USAGE_MAC; 4826 else 4827 mask = CRYPTO_MECH_USAGE_ENCRYPT | 4828 CRYPTO_MECH_USAGE_DECRYPT; 4829 if ((mech_infos[i].mi_usage & mask) != mask) 4830 continue; 4831 4832 unit_bits = (mech_infos[i].mi_keysize_unit == 4833 CRYPTO_KEYSIZE_UNIT_IN_BYTES) ? 8 : 1; 4834 /* adjust min/max supported by framework */ 4835 cur_crypto_min = mech_infos[i].mi_min_key_size * unit_bits; 4836 cur_crypto_max = mech_infos[i].mi_max_key_size * unit_bits; 4837 4838 if (cur_crypto_min < crypto_min) 4839 crypto_min = cur_crypto_min; 4840 4841 /* 4842 * CRYPTO_EFFECTIVELY_INFINITE is a special value of 4843 * the crypto framework which means "no upper limit". 4844 */ 4845 if (mech_infos[i].mi_max_key_size == 4846 CRYPTO_EFFECTIVELY_INFINITE) 4847 crypto_max = (size_t)-1; 4848 else if (cur_crypto_max > crypto_max) 4849 crypto_max = cur_crypto_max; 4850 4851 is_valid = B_TRUE; 4852 } 4853 4854 kmem_free(mech_infos, sizeof (crypto_mechanism_info_t) * 4855 nmech_infos); 4856 4857 if (!is_valid) { 4858 /* no key sizes supported by framework */ 4859 alg->alg_flags &= ~ALG_FLAG_VALID; 4860 return; 4861 } 4862 4863 /* 4864 * Determine min and max key sizes from alg_key_sizes[]. 4865 * defined for the algorithm entry. Adjust key sizes based on 4866 * those supported by the framework. 4867 */ 4868 alg->alg_ef_default_bits = alg->alg_key_sizes[0]; 4869 if (alg->alg_increment != 0) { 4870 /* supported key sizes are defined by range & increment */ 4871 crypto_min = ALGBITS_ROUND_UP(crypto_min, alg->alg_increment); 4872 crypto_max = ALGBITS_ROUND_DOWN(crypto_max, alg->alg_increment); 4873 4874 alg->alg_ef_minbits = MAX(alg->alg_minbits, 4875 (uint16_t)crypto_min); 4876 alg->alg_ef_maxbits = MIN(alg->alg_maxbits, 4877 (uint16_t)crypto_max); 4878 4879 /* 4880 * If the sizes supported by the framework are outside 4881 * the range of sizes defined by the algorithm mappings, 4882 * the algorithm cannot be used. Check for this 4883 * condition here. 4884 */ 4885 if (alg->alg_ef_minbits > alg->alg_ef_maxbits) { 4886 alg->alg_flags &= ~ALG_FLAG_VALID; 4887 return; 4888 } 4889 4890 if (alg->alg_ef_default_bits < alg->alg_ef_minbits) 4891 alg->alg_ef_default_bits = alg->alg_ef_minbits; 4892 if (alg->alg_ef_default_bits > alg->alg_ef_maxbits) 4893 alg->alg_ef_default_bits = alg->alg_ef_maxbits; 4894 4895 alg->alg_ef_default = SADB_ALG_DEFAULT_INCR(alg->alg_ef_minbits, 4896 alg->alg_increment, alg->alg_ef_default_bits); 4897 } else if (alg->alg_nkey_sizes == 0) { 4898 /* no specified key size for algorithm */ 4899 alg->alg_ef_minbits = alg->alg_ef_maxbits = 0; 4900 } else { 4901 /* supported key sizes are defined by enumeration */ 4902 alg->alg_ef_minbits = (uint16_t)-1; 4903 alg->alg_ef_maxbits = 0; 4904 4905 for (i = 0, is_valid = B_FALSE; i < alg->alg_nkey_sizes; i++) { 4906 /* 4907 * Ignore the current key size if it is not in the 4908 * range of sizes supported by the framework. 4909 */ 4910 if (alg->alg_key_sizes[i] < crypto_min || 4911 alg->alg_key_sizes[i] > crypto_max) 4912 continue; 4913 if (alg->alg_key_sizes[i] < alg->alg_ef_minbits) 4914 alg->alg_ef_minbits = alg->alg_key_sizes[i]; 4915 if (alg->alg_key_sizes[i] > alg->alg_ef_maxbits) 4916 alg->alg_ef_maxbits = alg->alg_key_sizes[i]; 4917 is_valid = B_TRUE; 4918 } 4919 4920 if (!is_valid) { 4921 alg->alg_flags &= ~ALG_FLAG_VALID; 4922 return; 4923 } 4924 alg->alg_ef_default = 0; 4925 } 4926 } 4927 4928 /* 4929 * Free the memory used by the specified algorithm. 4930 */ 4931 void 4932 ipsec_alg_free(ipsec_alginfo_t *alg) 4933 { 4934 if (alg == NULL) 4935 return; 4936 4937 if (alg->alg_key_sizes != NULL) 4938 kmem_free(alg->alg_key_sizes, 4939 (alg->alg_nkey_sizes + 1) * sizeof (uint16_t)); 4940 4941 if (alg->alg_block_sizes != NULL) 4942 kmem_free(alg->alg_block_sizes, 4943 (alg->alg_nblock_sizes + 1) * sizeof (uint16_t)); 4944 4945 kmem_free(alg, sizeof (*alg)); 4946 } 4947 4948 /* 4949 * Check the validity of the specified key size for an algorithm. 4950 * Returns B_TRUE if key size is valid, B_FALSE otherwise. 4951 */ 4952 boolean_t 4953 ipsec_valid_key_size(uint16_t key_size, ipsec_alginfo_t *alg) 4954 { 4955 if (key_size < alg->alg_ef_minbits || key_size > alg->alg_ef_maxbits) 4956 return (B_FALSE); 4957 4958 if (alg->alg_increment == 0 && alg->alg_nkey_sizes != 0) { 4959 /* 4960 * If the key sizes are defined by enumeration, the new 4961 * key size must be equal to one of the supported values. 4962 */ 4963 int i; 4964 4965 for (i = 0; i < alg->alg_nkey_sizes; i++) 4966 if (key_size == alg->alg_key_sizes[i]) 4967 break; 4968 if (i == alg->alg_nkey_sizes) 4969 return (B_FALSE); 4970 } 4971 4972 return (B_TRUE); 4973 } 4974 4975 /* 4976 * Callback function invoked by the crypto framework when a provider 4977 * registers or unregisters. This callback updates the algorithms 4978 * tables when a crypto algorithm is no longer available or becomes 4979 * available, and triggers the freeing/creation of context templates 4980 * associated with existing SAs, if needed. 4981 */ 4982 void 4983 ipsec_prov_update_callback(uint32_t event, void *event_arg) 4984 { 4985 crypto_notify_event_change_t *prov_change = 4986 (crypto_notify_event_change_t *)event_arg; 4987 uint_t algidx, algid, algtype, mech_count, mech_idx; 4988 ipsec_alginfo_t *alg; 4989 ipsec_alginfo_t oalg; 4990 crypto_mech_name_t *mechs; 4991 boolean_t alg_changed = B_FALSE; 4992 4993 /* ignore events for which we didn't register */ 4994 if (event != CRYPTO_EVENT_MECHS_CHANGED) { 4995 ip1dbg(("ipsec_prov_update_callback: unexpected event 0x%x " 4996 " received from crypto framework\n", event)); 4997 return; 4998 } 4999 5000 mechs = crypto_get_mech_list(&mech_count, KM_SLEEP); 5001 if (mechs == NULL) 5002 return; 5003 5004 /* 5005 * Walk the list of currently defined IPsec algorithm. Update 5006 * the algorithm valid flag and trigger an update of the 5007 * SAs that depend on that algorithm. 5008 */ 5009 mutex_enter(&alg_lock); 5010 for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype++) { 5011 for (algidx = 0; algidx < ipsec_nalgs[algtype]; algidx++) { 5012 5013 algid = ipsec_sortlist[algtype][algidx]; 5014 alg = ipsec_alglists[algtype][algid]; 5015 ASSERT(alg != NULL); 5016 5017 /* 5018 * Skip the algorithms which do not map to the 5019 * crypto framework provider being added or removed. 5020 */ 5021 if (strncmp(alg->alg_mech_name, 5022 prov_change->ec_mech_name, 5023 CRYPTO_MAX_MECH_NAME) != 0) 5024 continue; 5025 5026 /* 5027 * Determine if the mechanism is valid. If it 5028 * is not, mark the algorithm as being invalid. If 5029 * it is, mark the algorithm as being valid. 5030 */ 5031 for (mech_idx = 0; mech_idx < mech_count; mech_idx++) 5032 if (strncmp(alg->alg_mech_name, 5033 mechs[mech_idx], CRYPTO_MAX_MECH_NAME) == 0) 5034 break; 5035 if (mech_idx == mech_count && 5036 alg->alg_flags & ALG_FLAG_VALID) { 5037 alg->alg_flags &= ~ALG_FLAG_VALID; 5038 alg_changed = B_TRUE; 5039 } else if (mech_idx < mech_count && 5040 !(alg->alg_flags & ALG_FLAG_VALID)) { 5041 alg->alg_flags |= ALG_FLAG_VALID; 5042 alg_changed = B_TRUE; 5043 } 5044 5045 /* 5046 * Update the supported key sizes, regardless 5047 * of whether a crypto provider was added or 5048 * removed. 5049 */ 5050 oalg = *alg; 5051 ipsec_alg_fix_min_max(alg, algtype); 5052 if (!alg_changed && 5053 alg->alg_ef_minbits != oalg.alg_ef_minbits || 5054 alg->alg_ef_maxbits != oalg.alg_ef_maxbits || 5055 alg->alg_ef_default != oalg.alg_ef_default || 5056 alg->alg_ef_default_bits != 5057 oalg.alg_ef_default_bits) 5058 alg_changed = B_TRUE; 5059 5060 /* 5061 * Update the affected SAs if a software provider is 5062 * being added or removed. 5063 */ 5064 if (prov_change->ec_provider_type == 5065 CRYPTO_SW_PROVIDER) 5066 sadb_alg_update(algtype, alg->alg_id, 5067 prov_change->ec_change == 5068 CRYPTO_MECH_ADDED); 5069 } 5070 } 5071 mutex_exit(&alg_lock); 5072 crypto_free_mech_list(mechs, mech_count); 5073 5074 if (alg_changed) { 5075 /* 5076 * An algorithm has changed, i.e. it became valid or 5077 * invalid, or its support key sizes have changed. 5078 * Notify ipsecah and ipsecesp of this change so 5079 * that they can send a SADB_REGISTER to their consumers. 5080 */ 5081 ipsecah_algs_changed(); 5082 ipsecesp_algs_changed(); 5083 } 5084 } 5085 5086 /* 5087 * Registers with the crypto framework to be notified of crypto 5088 * providers changes. Used to update the algorithm tables and 5089 * to free or create context templates if needed. Invoked after IPsec 5090 * is loaded successfully. 5091 */ 5092 void 5093 ipsec_register_prov_update(void) 5094 { 5095 prov_update_handle = crypto_notify_events( 5096 ipsec_prov_update_callback, CRYPTO_EVENT_MECHS_CHANGED); 5097 } 5098 5099 /* 5100 * Unregisters from the framework to be notified of crypto providers 5101 * changes. Called from ipsec_policy_destroy(). 5102 */ 5103 static void 5104 ipsec_unregister_prov_update(void) 5105 { 5106 if (prov_update_handle != NULL) 5107 crypto_unnotify_events(prov_update_handle); 5108 } 5109 5110 /* 5111 * Tunnel-mode support routines. 5112 */ 5113 5114 /* 5115 * Returns an mblk chain suitable for putnext() if policies match and IPsec 5116 * SAs are available. If there's no per-tunnel policy, or a match comes back 5117 * with no match, then still return the packet and have global policy take 5118 * a crack at it in IP. 5119 * 5120 * Remember -> we can be forwarding packets. Keep that in mind w.r.t. 5121 * inner-packet contents. 5122 */ 5123 mblk_t * 5124 ipsec_tun_outbound(mblk_t *mp, tun_t *atp, ipha_t *inner_ipv4, 5125 ip6_t *inner_ipv6, ipha_t *outer_ipv4, ip6_t *outer_ipv6, int outer_hdr_len) 5126 { 5127 ipsec_tun_pol_t *itp = atp->tun_itp; 5128 ipsec_policy_head_t *polhead; 5129 ipsec_selector_t sel; 5130 mblk_t *ipsec_mp, *ipsec_mp_head, *nmp; 5131 mblk_t *spare_mp = NULL; 5132 ipsec_out_t *io; 5133 boolean_t is_fragment; 5134 ipsec_policy_t *pol; 5135 5136 ASSERT(outer_ipv6 != NULL && outer_ipv4 == NULL || 5137 outer_ipv4 != NULL && outer_ipv6 == NULL); 5138 /* We take care of inners in a bit. */ 5139 5140 /* No policy on this tunnel - let global policy have at it. */ 5141 if (itp == NULL || !(itp->itp_flags & ITPF_P_ACTIVE)) 5142 return (mp); 5143 polhead = itp->itp_policy; 5144 5145 bzero(&sel, sizeof (sel)); 5146 if (inner_ipv4 != NULL) { 5147 ASSERT(inner_ipv6 == NULL); 5148 sel.ips_isv4 = B_TRUE; 5149 sel.ips_local_addr_v4 = inner_ipv4->ipha_src; 5150 sel.ips_remote_addr_v4 = inner_ipv4->ipha_dst; 5151 sel.ips_protocol = (uint8_t)inner_ipv4->ipha_protocol; 5152 is_fragment = 5153 IS_V4_FRAGMENT(inner_ipv4->ipha_fragment_offset_and_flags); 5154 } else { 5155 ASSERT(inner_ipv6 != NULL); 5156 sel.ips_isv4 = B_FALSE; 5157 sel.ips_local_addr_v6 = inner_ipv6->ip6_src; 5158 /* Use ip_get_dst_v6() just for the fragment bit. */ 5159 sel.ips_remote_addr_v6 = ip_get_dst_v6(inner_ipv6, 5160 &is_fragment); 5161 /* 5162 * Reset, because we don't care about routing-header dests 5163 * in the forwarding/tunnel path. 5164 */ 5165 sel.ips_remote_addr_v6 = inner_ipv6->ip6_dst; 5166 } 5167 5168 if (itp->itp_flags & ITPF_P_PER_PORT_SECURITY) { 5169 if (is_fragment) { 5170 ipha_t *oiph; 5171 ipha_t *iph = NULL; 5172 ip6_t *ip6h = NULL; 5173 int hdr_len; 5174 uint16_t ip6_hdr_length; 5175 uint8_t v6_proto; 5176 uint8_t *v6_proto_p; 5177 5178 /* 5179 * We have a fragment we need to track! 5180 */ 5181 mp = ipsec_fragcache_add(&itp->itp_fragcache, NULL, mp, 5182 outer_hdr_len); 5183 if (mp == NULL) 5184 return (NULL); 5185 5186 /* 5187 * If we get here, we have a full 5188 * fragment chain 5189 */ 5190 5191 oiph = (ipha_t *)mp->b_rptr; 5192 if (IPH_HDR_VERSION(oiph) == IPV4_VERSION) { 5193 hdr_len = ((outer_hdr_len != 0) ? 5194 IPH_HDR_LENGTH(oiph) : 0); 5195 iph = (ipha_t *)(mp->b_rptr + hdr_len); 5196 } else { 5197 ASSERT(IPH_HDR_VERSION(oiph) == IPV6_VERSION); 5198 if ((spare_mp = msgpullup(mp, -1)) == NULL) { 5199 ip_drop_packet_chain(mp, B_FALSE, 5200 NULL, NULL, &ipdrops_spd_nomem, 5201 &spd_dropper); 5202 } 5203 ip6h = (ip6_t *)spare_mp->b_rptr; 5204 (void) ip_hdr_length_nexthdr_v6(spare_mp, ip6h, 5205 &ip6_hdr_length, &v6_proto_p); 5206 hdr_len = ip6_hdr_length; 5207 } 5208 outer_hdr_len = hdr_len; 5209 5210 if (sel.ips_isv4) { 5211 if (iph == NULL) { 5212 /* Was v6 outer */ 5213 iph = (ipha_t *)(mp->b_rptr + hdr_len); 5214 } 5215 inner_ipv4 = iph; 5216 sel.ips_local_addr_v4 = inner_ipv4->ipha_src; 5217 sel.ips_remote_addr_v4 = inner_ipv4->ipha_dst; 5218 sel.ips_protocol = 5219 (uint8_t)inner_ipv4->ipha_protocol; 5220 } else { 5221 if ((spare_mp == NULL) && 5222 ((spare_mp = msgpullup(mp, -1)) == NULL)) { 5223 ip_drop_packet_chain(mp, B_FALSE, 5224 NULL, NULL, &ipdrops_spd_nomem, 5225 &spd_dropper); 5226 } 5227 inner_ipv6 = (ip6_t *)(spare_mp->b_rptr + 5228 hdr_len); 5229 sel.ips_local_addr_v6 = inner_ipv6->ip6_src; 5230 sel.ips_remote_addr_v6 = inner_ipv6->ip6_dst; 5231 (void) ip_hdr_length_nexthdr_v6(spare_mp, 5232 inner_ipv6, &ip6_hdr_length, 5233 &v6_proto_p); 5234 v6_proto = *v6_proto_p; 5235 sel.ips_protocol = v6_proto; 5236 #ifdef FRAGCACHE_DEBUG 5237 cmn_err(CE_WARN, "v6_sel.ips_protocol = %d\n", 5238 sel.ips_protocol); 5239 #endif 5240 } 5241 /* Ports are extracted below */ 5242 } 5243 5244 /* Get ports... */ 5245 if (spare_mp != NULL) { 5246 if (!ipsec_init_outbound_ports(&sel, spare_mp, 5247 inner_ipv4, inner_ipv6, outer_hdr_len)) { 5248 /* 5249 * callee did ip_drop_packet_chain() on 5250 * spare_mp 5251 */ 5252 ipsec_freemsg_chain(mp); 5253 return (NULL); 5254 } 5255 } else { 5256 if (!ipsec_init_outbound_ports(&sel, mp, 5257 inner_ipv4, inner_ipv6, outer_hdr_len)) { 5258 /* callee did ip_drop_packet_chain() on mp. */ 5259 return (NULL); 5260 } 5261 } 5262 #ifdef FRAGCACHE_DEBUG 5263 if (inner_ipv4 != NULL) 5264 cmn_err(CE_WARN, 5265 "(v4) sel.ips_protocol = %d, " 5266 "sel.ips_local_port = %d, " 5267 "sel.ips_remote_port = %d\n", 5268 sel.ips_protocol, ntohs(sel.ips_local_port), 5269 ntohs(sel.ips_remote_port)); 5270 if (inner_ipv6 != NULL) 5271 cmn_err(CE_WARN, 5272 "(v6) sel.ips_protocol = %d, " 5273 "sel.ips_local_port = %d, " 5274 "sel.ips_remote_port = %d\n", 5275 sel.ips_protocol, ntohs(sel.ips_local_port), 5276 ntohs(sel.ips_remote_port)); 5277 #endif 5278 /* Success so far - done with spare_mp */ 5279 ipsec_freemsg_chain(spare_mp); 5280 } 5281 rw_enter(&polhead->iph_lock, RW_READER); 5282 pol = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_OUTBOUND, &sel); 5283 rw_exit(&polhead->iph_lock); 5284 if (pol == NULL) { 5285 /* 5286 * No matching policy on this tunnel, drop the packet. 5287 * 5288 * NOTE: Tunnel-mode tunnels are different from the 5289 * IP global transport mode policy head. For a tunnel-mode 5290 * tunnel, we drop the packet in lieu of passing it 5291 * along accepted the way a global-policy miss would. 5292 * 5293 * NOTE2: "negotiate transport" tunnels should match ALL 5294 * inbound packets, but we do not uncomment the ASSERT() 5295 * below because if/when we open PF_POLICY, a user can 5296 * shoot him/her-self in the foot with a 0 priority. 5297 */ 5298 5299 /* ASSERT(itp->itp_flags & ITPF_P_TUNNEL); */ 5300 #ifdef FRAGCACHE_DEBUG 5301 cmn_err(CE_WARN, "ipsec_tun_outbound(): No matching tunnel " 5302 "per-port policy\n"); 5303 #endif 5304 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 5305 &ipdrops_spd_explicit, &spd_dropper); 5306 return (NULL); 5307 } 5308 5309 #ifdef FRAGCACHE_DEBUG 5310 cmn_err(CE_WARN, "Having matching tunnel per-port policy\n"); 5311 #endif 5312 5313 /* Construct an IPSEC_OUT message. */ 5314 ipsec_mp = ipsec_mp_head = ipsec_alloc_ipsec_out(); 5315 if (ipsec_mp == NULL) { 5316 IPPOL_REFRELE(pol); 5317 ip_drop_packet(mp, B_FALSE, NULL, NULL, &ipdrops_spd_nomem, 5318 &spd_dropper); 5319 return (NULL); 5320 } 5321 ipsec_mp->b_cont = mp; 5322 io = (ipsec_out_t *)ipsec_mp->b_rptr; 5323 IPPH_REFHOLD(polhead); 5324 /* 5325 * NOTE: free() function of ipsec_out mblk will release polhead and 5326 * pol references. 5327 */ 5328 io->ipsec_out_polhead = polhead; 5329 io->ipsec_out_policy = pol; 5330 io->ipsec_out_zoneid = atp->tun_zoneid; 5331 io->ipsec_out_v4 = (outer_ipv4 != NULL); 5332 io->ipsec_out_secure = B_TRUE; 5333 5334 if (!(itp->itp_flags & ITPF_P_TUNNEL)) { 5335 /* Set up transport mode for tunnelled packets. */ 5336 io->ipsec_out_proto = (inner_ipv4 != NULL) ? IPPROTO_ENCAP : 5337 IPPROTO_IPV6; 5338 return (ipsec_mp); 5339 } 5340 5341 /* Fill in tunnel-mode goodies here. */ 5342 io->ipsec_out_tunnel = B_TRUE; 5343 /* XXX Do I need to fill in all of the goodies here? */ 5344 if (inner_ipv4) { 5345 io->ipsec_out_inaf = AF_INET; 5346 io->ipsec_out_insrc[0] = 5347 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v4; 5348 io->ipsec_out_indst[0] = 5349 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v4; 5350 } else { 5351 io->ipsec_out_inaf = AF_INET6; 5352 io->ipsec_out_insrc[0] = 5353 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[0]; 5354 io->ipsec_out_insrc[1] = 5355 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[1]; 5356 io->ipsec_out_insrc[2] = 5357 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[2]; 5358 io->ipsec_out_insrc[3] = 5359 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[3]; 5360 io->ipsec_out_indst[0] = 5361 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[0]; 5362 io->ipsec_out_indst[1] = 5363 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[1]; 5364 io->ipsec_out_indst[2] = 5365 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[2]; 5366 io->ipsec_out_indst[3] = 5367 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[3]; 5368 } 5369 io->ipsec_out_insrcpfx = pol->ipsp_sel->ipsl_key.ipsl_local_pfxlen; 5370 io->ipsec_out_indstpfx = pol->ipsp_sel->ipsl_key.ipsl_remote_pfxlen; 5371 /* NOTE: These are used for transport mode too. */ 5372 io->ipsec_out_src_port = pol->ipsp_sel->ipsl_key.ipsl_lport; 5373 io->ipsec_out_dst_port = pol->ipsp_sel->ipsl_key.ipsl_rport; 5374 io->ipsec_out_proto = pol->ipsp_sel->ipsl_key.ipsl_proto; 5375 5376 /* 5377 * The mp pointer still valid 5378 * Add ipsec_out to each fragment. 5379 * The fragment head already has one 5380 */ 5381 nmp = mp->b_next; 5382 mp->b_next = NULL; 5383 mp = nmp; 5384 ASSERT(ipsec_mp != NULL); 5385 while (mp != NULL) { 5386 nmp = mp->b_next; 5387 ipsec_mp->b_next = ipsec_out_tag(ipsec_mp_head, mp); 5388 if (ipsec_mp->b_next == NULL) { 5389 ip_drop_packet_chain(ipsec_mp_head, B_FALSE, NULL, NULL, 5390 &ipdrops_spd_nomem, &spd_dropper); 5391 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 5392 &ipdrops_spd_nomem, &spd_dropper); 5393 return (NULL); 5394 } 5395 ipsec_mp = ipsec_mp->b_next; 5396 mp->b_next = NULL; 5397 mp = nmp; 5398 } 5399 return (ipsec_mp_head); 5400 } 5401 5402 /* 5403 * NOTE: The following releases pol's reference and 5404 * calls ip_drop_packet() for me on NULL returns. 5405 */ 5406 mblk_t * 5407 ipsec_check_ipsecin_policy_reasm(mblk_t *ipsec_mp, ipsec_policy_t *pol, 5408 ipha_t *inner_ipv4, ip6_t *inner_ipv6, uint64_t pkt_unique) 5409 { 5410 /* Assume ipsec_mp is a chain of b_next-linked IPSEC_IN M_CTLs. */ 5411 mblk_t *data_chain = NULL, *data_tail = NULL; 5412 mblk_t *ii_next; 5413 5414 while (ipsec_mp != NULL) { 5415 ii_next = ipsec_mp->b_next; 5416 ipsec_mp->b_next = NULL; /* No tripping asserts. */ 5417 5418 /* 5419 * Need IPPOL_REFHOLD(pol) for extras because 5420 * ipsecin_policy does the refrele. 5421 */ 5422 IPPOL_REFHOLD(pol); 5423 5424 if (ipsec_check_ipsecin_policy(ipsec_mp, pol, inner_ipv4, 5425 inner_ipv6, pkt_unique) != NULL) { 5426 if (data_tail == NULL) { 5427 /* First one */ 5428 data_chain = data_tail = ipsec_mp->b_cont; 5429 } else { 5430 data_tail->b_next = ipsec_mp->b_cont; 5431 data_tail = data_tail->b_next; 5432 } 5433 freeb(ipsec_mp); 5434 } else { 5435 /* 5436 * ipsec_check_ipsecin_policy() freed ipsec_mp 5437 * already. Need to get rid of any extra pol 5438 * references, and any remaining bits as well. 5439 */ 5440 IPPOL_REFRELE(pol); 5441 ipsec_freemsg_chain(data_chain); 5442 ipsec_freemsg_chain(ii_next); /* ipdrop stats? */ 5443 return (NULL); 5444 } 5445 ipsec_mp = ii_next; 5446 } 5447 /* 5448 * One last release because either the loop bumped it up, or we never 5449 * called ipsec_check_ipsecin_policy(). 5450 */ 5451 IPPOL_REFRELE(pol); 5452 5453 /* data_chain is ready for return to tun module. */ 5454 return (data_chain); 5455 } 5456 5457 5458 /* 5459 * Returns B_TRUE if the inbound packet passed an IPsec policy check. Returns 5460 * B_FALSE if it failed or if it is a fragment needing its friends before a 5461 * policy check can be performed. 5462 * 5463 * Expects a non-NULL *data_mp, an optional ipsec_mp, and a non-NULL polhead. 5464 * data_mp may be reassigned with a b_next chain of packets if fragments 5465 * neeeded to be collected for a proper policy check. 5466 * 5467 * Always frees ipsec_mp, but only frees data_mp if returns B_FALSE. This 5468 * function calls ip_drop_packet() on data_mp if need be. 5469 * 5470 * NOTE: outer_hdr_len is signed. If it's a negative value, the caller 5471 * is inspecting an ICMP packet. 5472 */ 5473 boolean_t 5474 ipsec_tun_inbound(mblk_t *ipsec_mp, mblk_t **data_mp, ipsec_tun_pol_t *itp, 5475 ipha_t *inner_ipv4, ip6_t *inner_ipv6, ipha_t *outer_ipv4, 5476 ip6_t *outer_ipv6, int outer_hdr_len) 5477 { 5478 ipsec_policy_head_t *polhead; 5479 ipsec_selector_t sel; 5480 mblk_t *message = (ipsec_mp == NULL) ? *data_mp : ipsec_mp; 5481 ipsec_policy_t *pol; 5482 uint16_t tmpport; 5483 selret_t rc; 5484 boolean_t retval, port_policy_present, is_icmp, global_present; 5485 in6_addr_t tmpaddr; 5486 ipaddr_t tmp4; 5487 uint8_t flags, *holder, *outer_hdr; 5488 5489 sel.ips_is_icmp_inv_acq = 0; 5490 5491 if (outer_ipv4 != NULL) { 5492 ASSERT(outer_ipv6 == NULL); 5493 outer_hdr = (uint8_t *)outer_ipv4; 5494 global_present = ipsec_inbound_v4_policy_present; 5495 } else { 5496 outer_hdr = (uint8_t *)outer_ipv6; 5497 global_present = ipsec_inbound_v6_policy_present; 5498 } 5499 ASSERT(outer_hdr != NULL); 5500 5501 ASSERT(inner_ipv4 != NULL && inner_ipv6 == NULL || 5502 inner_ipv4 == NULL && inner_ipv6 != NULL); 5503 ASSERT(message == *data_mp || message->b_cont == *data_mp); 5504 5505 if (outer_hdr_len < 0) { 5506 outer_hdr_len = (-outer_hdr_len); 5507 is_icmp = B_TRUE; 5508 } else { 5509 is_icmp = B_FALSE; 5510 } 5511 5512 if (itp != NULL && (itp->itp_flags & ITPF_P_ACTIVE)) { 5513 polhead = itp->itp_policy; 5514 /* 5515 * We need to perform full Tunnel-Mode enforcement, 5516 * and we need to have inner-header data for such enforcement. 5517 * 5518 * See ipsec_init_inbound_sel() for the 0x80000000 on inbound 5519 * and on return. 5520 */ 5521 5522 port_policy_present = ((itp->itp_flags & 5523 ITPF_P_PER_PORT_SECURITY) ? B_TRUE : B_FALSE); 5524 flags = ((port_policy_present ? SEL_PORT_POLICY : SEL_NONE) | 5525 (is_icmp ? SEL_IS_ICMP : SEL_NONE) | SEL_TUNNEL_MODE); 5526 5527 rc = ipsec_init_inbound_sel(&sel, *data_mp, inner_ipv4, 5528 inner_ipv6, flags); 5529 5530 switch (rc) { 5531 case SELRET_NOMEM: 5532 ip_drop_packet(message, B_TRUE, NULL, NULL, 5533 &ipdrops_spd_nomem, &spd_dropper); 5534 return (B_FALSE); 5535 case SELRET_TUNFRAG: 5536 /* 5537 * At this point, if we're cleartext, we don't want 5538 * to go there. 5539 */ 5540 if (ipsec_mp == NULL) { 5541 ip_drop_packet(*data_mp, B_TRUE, NULL, NULL, 5542 &ipdrops_spd_got_clear, &spd_dropper); 5543 *data_mp = NULL; 5544 return (B_FALSE); 5545 } 5546 ASSERT(((ipsec_in_t *)ipsec_mp->b_rptr)-> 5547 ipsec_in_secure); 5548 message = ipsec_fragcache_add(&itp->itp_fragcache, 5549 ipsec_mp, *data_mp, outer_hdr_len); 5550 5551 if (message == NULL) { 5552 /* 5553 * Data is cached, fragment chain is not 5554 * complete. I consume ipsec_mp and data_mp 5555 */ 5556 return (B_FALSE); 5557 } 5558 5559 /* 5560 * If we get here, we have a full fragment chain. 5561 * Reacquire headers and selectors from first fragment. 5562 */ 5563 if (inner_ipv4 != NULL) { 5564 inner_ipv4 = (ipha_t *)message->b_cont->b_rptr; 5565 ASSERT(message->b_cont->b_wptr - 5566 message->b_cont->b_rptr > sizeof (ipha_t)); 5567 } else { 5568 inner_ipv6 = (ip6_t *)message->b_cont->b_rptr; 5569 ASSERT(message->b_cont->b_wptr - 5570 message->b_cont->b_rptr > sizeof (ip6_t)); 5571 } 5572 /* Use SEL_NONE so we always get ports! */ 5573 rc = ipsec_init_inbound_sel(&sel, message->b_cont, 5574 inner_ipv4, inner_ipv6, SEL_NONE); 5575 switch (rc) { 5576 case SELRET_SUCCESS: 5577 /* 5578 * Get to same place as first caller's 5579 * SELRET_SUCCESS case. 5580 */ 5581 break; 5582 case SELRET_NOMEM: 5583 ip_drop_packet_chain(message, B_TRUE, NULL, 5584 NULL, &ipdrops_spd_nomem, &spd_dropper); 5585 return (B_FALSE); 5586 case SELRET_BADPKT: 5587 ip_drop_packet_chain(message, B_TRUE, NULL, 5588 NULL, &ipdrops_spd_malformed_frag, 5589 &spd_dropper); 5590 return (B_FALSE); 5591 case SELRET_TUNFRAG: 5592 cmn_err(CE_WARN, "(TUNFRAG on 2nd call...)"); 5593 /* FALLTHRU */ 5594 default: 5595 cmn_err(CE_WARN, "ipsec_init_inbound_sel(mark2)" 5596 " returns bizarro 0x%x", rc); 5597 /* Guaranteed panic! */ 5598 ASSERT(rc == SELRET_NOMEM); 5599 return (B_FALSE); 5600 } 5601 /* FALLTHRU */ 5602 case SELRET_SUCCESS: 5603 /* 5604 * Common case: 5605 * No per-port policy or a non-fragment. Keep going. 5606 */ 5607 break; 5608 case SELRET_BADPKT: 5609 /* 5610 * We may receive ICMP (with IPv6 inner) packets that 5611 * trigger this return value. Send 'em in for 5612 * enforcement checking. 5613 */ 5614 cmn_err(CE_NOTE, "ipsec_tun_inbound(): " 5615 "sending 'bad packet' in for enforcement"); 5616 break; 5617 default: 5618 cmn_err(CE_WARN, 5619 "ipsec_init_inbound_sel() returns bizarro 0x%x", 5620 rc); 5621 ASSERT(rc == SELRET_NOMEM); /* Guaranteed panic! */ 5622 return (B_FALSE); 5623 } 5624 5625 if (is_icmp) { 5626 /* 5627 * Swap local/remote because this is an ICMP packet. 5628 */ 5629 tmpaddr = sel.ips_local_addr_v6; 5630 sel.ips_local_addr_v6 = sel.ips_remote_addr_v6; 5631 sel.ips_remote_addr_v6 = tmpaddr; 5632 tmpport = sel.ips_local_port; 5633 sel.ips_local_port = sel.ips_remote_port; 5634 sel.ips_remote_port = tmpport; 5635 } 5636 5637 /* find_policy_head() */ 5638 rw_enter(&polhead->iph_lock, RW_READER); 5639 pol = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_INBOUND, 5640 &sel); 5641 rw_exit(&polhead->iph_lock); 5642 if (pol != NULL) { 5643 if (ipsec_mp == NULL || 5644 !((ipsec_in_t *)ipsec_mp->b_rptr)-> 5645 ipsec_in_secure) { 5646 retval = pol->ipsp_act->ipa_allow_clear; 5647 if (!retval) { 5648 /* 5649 * XXX should never get here with 5650 * tunnel reassembled fragments? 5651 */ 5652 ASSERT(message->b_next == NULL); 5653 ip_drop_packet(message, B_TRUE, NULL, 5654 NULL, &ipdrops_spd_got_clear, 5655 &spd_dropper); 5656 } else if (ipsec_mp != NULL) { 5657 freeb(ipsec_mp); 5658 } 5659 5660 IPPOL_REFRELE(pol); 5661 return (retval); 5662 } 5663 /* 5664 * NOTE: The following releases pol's reference and 5665 * calls ip_drop_packet() for me on NULL returns. 5666 * 5667 * "sel" is still good here, so let's use it! 5668 */ 5669 *data_mp = ipsec_check_ipsecin_policy_reasm(message, 5670 pol, inner_ipv4, inner_ipv6, SA_UNIQUE_ID( 5671 sel.ips_remote_port, sel.ips_local_port, 5672 (inner_ipv4 == NULL) ? IPPROTO_IPV6 : 5673 IPPROTO_ENCAP, sel.ips_protocol)); 5674 return (*data_mp != NULL); 5675 } 5676 5677 /* 5678 * Else fallthru and check the global policy on the outer 5679 * header(s) if this tunnel is an old-style transport-mode 5680 * one. Drop the packet explicitly (no policy entry) for 5681 * a new-style tunnel-mode tunnel. 5682 */ 5683 if ((itp->itp_flags & ITPF_P_TUNNEL) && !is_icmp) { 5684 ip_drop_packet_chain(message, B_TRUE, NULL, 5685 NULL, &ipdrops_spd_explicit, &spd_dropper); 5686 return (B_FALSE); 5687 } 5688 } 5689 5690 /* 5691 * NOTE: If we reach here, we will not have packet chains from 5692 * fragcache_add(), because the only way I get chains is on a 5693 * tunnel-mode tunnel, which either returns with a pass, or gets 5694 * hit by the ip_drop_packet_chain() call right above here. 5695 */ 5696 5697 /* If no per-tunnel security, check global policy now. */ 5698 if (ipsec_mp != NULL && !global_present) { 5699 if (((ipsec_in_t *)(ipsec_mp->b_rptr))-> 5700 ipsec_in_icmp_loopback) { 5701 /* 5702 * This is an ICMP message with an ipsec_mp 5703 * attached. We should accept it. 5704 */ 5705 if (ipsec_mp != NULL) 5706 freeb(ipsec_mp); 5707 return (B_TRUE); 5708 } 5709 5710 ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL, 5711 &ipdrops_spd_got_secure, &spd_dropper); 5712 return (B_FALSE); 5713 } 5714 5715 /* 5716 * The following assertion is valid because only the tun module alters 5717 * the mblk chain - stripping the outer header by advancing mp->b_rptr. 5718 */ 5719 ASSERT(is_icmp || 5720 ((*data_mp)->b_datap->db_base <= outer_hdr && 5721 outer_hdr < (*data_mp)->b_rptr)); 5722 holder = (*data_mp)->b_rptr; 5723 (*data_mp)->b_rptr = outer_hdr; 5724 5725 if (is_icmp) { 5726 /* 5727 * For ICMP packets, "outer_ipvN" is set to the outer header 5728 * that is *INSIDE* the ICMP payload. For global policy 5729 * checking, we need to reverse src/dst on the payload in 5730 * order to construct selectors appropriately. See "ripha" 5731 * constructions in ip.c. To avoid a bug like 6478464 (see 5732 * earlier in this file), we will actually exchange src/dst 5733 * in the packet, and reverse if after the call to 5734 * ipsec_check_global_policy(). 5735 */ 5736 if (outer_ipv4 != NULL) { 5737 tmp4 = outer_ipv4->ipha_src; 5738 outer_ipv4->ipha_src = outer_ipv4->ipha_dst; 5739 outer_ipv4->ipha_dst = tmp4; 5740 } else { 5741 ASSERT(outer_ipv6 != NULL); 5742 tmpaddr = outer_ipv6->ip6_src; 5743 outer_ipv6->ip6_src = outer_ipv6->ip6_dst; 5744 outer_ipv6->ip6_dst = tmpaddr; 5745 } 5746 } 5747 5748 /* NOTE: Frees message if it returns NULL. */ 5749 if (ipsec_check_global_policy(message, NULL, outer_ipv4, outer_ipv6, 5750 (ipsec_mp != NULL)) == NULL) { 5751 return (B_FALSE); 5752 } 5753 5754 if (is_icmp) { 5755 /* Set things back to normal. */ 5756 if (outer_ipv4 != NULL) { 5757 tmp4 = outer_ipv4->ipha_src; 5758 outer_ipv4->ipha_src = outer_ipv4->ipha_dst; 5759 outer_ipv4->ipha_dst = tmp4; 5760 } else { 5761 /* No need for ASSERT()s now. */ 5762 tmpaddr = outer_ipv6->ip6_src; 5763 outer_ipv6->ip6_src = outer_ipv6->ip6_dst; 5764 outer_ipv6->ip6_dst = tmpaddr; 5765 } 5766 } 5767 5768 (*data_mp)->b_rptr = holder; 5769 5770 if (ipsec_mp != NULL) 5771 freeb(ipsec_mp); 5772 5773 /* 5774 * At this point, we pretend it's a cleartext accepted 5775 * packet. 5776 */ 5777 return (B_TRUE); 5778 } 5779 5780 /* 5781 * AVL comparison routine for our list of tunnel polheads. 5782 */ 5783 static int 5784 tunnel_compare(const void *arg1, const void *arg2) 5785 { 5786 ipsec_tun_pol_t *left, *right; 5787 int rc; 5788 5789 left = (ipsec_tun_pol_t *)arg1; 5790 right = (ipsec_tun_pol_t *)arg2; 5791 5792 rc = strncmp(left->itp_name, right->itp_name, LIFNAMSIZ); 5793 return (rc == 0 ? rc : (rc > 0 ? 1 : -1)); 5794 } 5795 5796 /* 5797 * Free a tunnel policy node. 5798 */ 5799 void 5800 itp_free(ipsec_tun_pol_t *node) 5801 { 5802 IPPH_REFRELE(node->itp_policy); 5803 IPPH_REFRELE(node->itp_inactive); 5804 mutex_destroy(&node->itp_lock); 5805 kmem_free(node, sizeof (*node)); 5806 } 5807 5808 void 5809 itp_unlink(ipsec_tun_pol_t *node) 5810 { 5811 rw_enter(&tunnel_policy_lock, RW_WRITER); 5812 tunnel_policy_gen++; 5813 ipsec_fragcache_uninit(&node->itp_fragcache); 5814 avl_remove(&tunnel_policies, node); 5815 rw_exit(&tunnel_policy_lock); 5816 ITP_REFRELE(node); 5817 } 5818 5819 /* 5820 * Public interface to look up a tunnel security policy by name. Used by 5821 * spdsock mostly. Returns "node" with a bumped refcnt. 5822 */ 5823 ipsec_tun_pol_t * 5824 get_tunnel_policy(char *name) 5825 { 5826 ipsec_tun_pol_t *node, lookup; 5827 5828 (void) strncpy(lookup.itp_name, name, LIFNAMSIZ); 5829 5830 rw_enter(&tunnel_policy_lock, RW_READER); 5831 node = (ipsec_tun_pol_t *)avl_find(&tunnel_policies, &lookup, NULL); 5832 if (node != NULL) { 5833 ITP_REFHOLD(node); 5834 } 5835 rw_exit(&tunnel_policy_lock); 5836 5837 return (node); 5838 } 5839 5840 /* 5841 * Public interface to walk all tunnel security polcies. Useful for spdsock 5842 * DUMP operations. iterator() will not consume a reference. 5843 */ 5844 void 5845 itp_walk(void (*iterator)(ipsec_tun_pol_t *, void *), void *arg) 5846 { 5847 ipsec_tun_pol_t *node; 5848 5849 rw_enter(&tunnel_policy_lock, RW_READER); 5850 for (node = avl_first(&tunnel_policies); node != NULL; 5851 node = AVL_NEXT(&tunnel_policies, node)) { 5852 iterator(node, arg); 5853 } 5854 rw_exit(&tunnel_policy_lock); 5855 } 5856 5857 /* 5858 * Initialize policy head. This can only fail if there's a memory problem. 5859 */ 5860 static boolean_t 5861 tunnel_polhead_init(ipsec_policy_head_t *iph) 5862 { 5863 rw_init(&iph->iph_lock, NULL, RW_DEFAULT, NULL); 5864 iph->iph_refs = 1; 5865 iph->iph_gen = 0; 5866 if (ipsec_alloc_table(iph, tun_spd_hashsize, KM_SLEEP, B_FALSE) != 0) { 5867 ipsec_polhead_free_table(iph); 5868 return (B_FALSE); 5869 } 5870 ipsec_polhead_init(iph, tun_spd_hashsize); 5871 return (B_TRUE); 5872 } 5873 5874 /* 5875 * Create a tunnel policy node with "name". Set errno with 5876 * ENOMEM if there's a memory problem, and EEXIST if there's an existing 5877 * node. 5878 */ 5879 ipsec_tun_pol_t * 5880 create_tunnel_policy(char *name, int *errno, uint64_t *gen) 5881 { 5882 ipsec_tun_pol_t *newbie, *existing; 5883 avl_index_t where; 5884 5885 newbie = kmem_zalloc(sizeof (*newbie), KM_NOSLEEP); 5886 if (newbie == NULL) { 5887 *errno = ENOMEM; 5888 return (NULL); 5889 } 5890 if (!ipsec_fragcache_init(&newbie->itp_fragcache)) { 5891 kmem_free(newbie, sizeof (*newbie)); 5892 *errno = ENOMEM; 5893 return (NULL); 5894 } 5895 5896 (void) strncpy(newbie->itp_name, name, LIFNAMSIZ); 5897 5898 rw_enter(&tunnel_policy_lock, RW_WRITER); 5899 existing = (ipsec_tun_pol_t *)avl_find(&tunnel_policies, newbie, 5900 &where); 5901 if (existing != NULL) { 5902 itp_free(newbie); 5903 *errno = EEXIST; 5904 rw_exit(&tunnel_policy_lock); 5905 return (NULL); 5906 } 5907 tunnel_policy_gen++; 5908 *gen = tunnel_policy_gen; 5909 newbie->itp_refcnt = 2; /* One for the caller, one for the tree. */ 5910 newbie->itp_next_policy_index = 1; 5911 avl_insert(&tunnel_policies, newbie, where); 5912 mutex_init(&newbie->itp_lock, NULL, MUTEX_DEFAULT, NULL); 5913 newbie->itp_policy = kmem_zalloc(sizeof (ipsec_policy_head_t), 5914 KM_NOSLEEP); 5915 if (newbie->itp_policy == NULL) 5916 goto nomem; 5917 newbie->itp_inactive = kmem_zalloc(sizeof (ipsec_policy_head_t), 5918 KM_NOSLEEP); 5919 if (newbie->itp_inactive == NULL) { 5920 kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t)); 5921 goto nomem; 5922 } 5923 5924 if (!tunnel_polhead_init(newbie->itp_policy)) { 5925 kmem_free(newbie->itp_policy, sizeof (ipsec_policy_head_t)); 5926 kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t)); 5927 goto nomem; 5928 } else if (!tunnel_polhead_init(newbie->itp_inactive)) { 5929 IPPH_REFRELE(newbie->itp_policy); 5930 kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t)); 5931 goto nomem; 5932 } 5933 rw_exit(&tunnel_policy_lock); 5934 5935 return (newbie); 5936 nomem: 5937 *errno = ENOMEM; 5938 kmem_free(newbie, sizeof (*newbie)); 5939 return (NULL); 5940 } 5941 5942 /* 5943 * We can't call the tun_t lookup function until tun is 5944 * loaded, so create a dummy function to avoid symbol 5945 * lookup errors on boot. 5946 */ 5947 /* ARGSUSED */ 5948 ipsec_tun_pol_t * 5949 itp_get_byaddr_dummy(uint32_t *laddr, uint32_t *faddr, int af) 5950 { 5951 return (NULL); /* Always return NULL. */ 5952 } 5953 5954 /* 5955 * Frag cache code, based on SunScreen 3.2 source 5956 * screen/kernel/common/screen_fragcache.c 5957 */ 5958 5959 #define IPSEC_FRAG_TTL_MAX 5 5960 /* 5961 * Note that the following parameters create 256 hash buckets 5962 * with 1024 free entries to be distributed. Things are cleaned 5963 * periodically and are attempted to be cleaned when there is no 5964 * free space, but this system errs on the side of dropping packets 5965 * over creating memory exhaustion. We may decide to make hash 5966 * factor a tunable if this proves to be a bad decision. 5967 */ 5968 #define IPSEC_FRAG_HASH_SLOTS (1<<8) 5969 #define IPSEC_FRAG_HASH_FACTOR 4 5970 #define IPSEC_FRAG_HASH_SIZE (IPSEC_FRAG_HASH_SLOTS * IPSEC_FRAG_HASH_FACTOR) 5971 5972 #define IPSEC_FRAG_HASH_MASK (IPSEC_FRAG_HASH_SLOTS - 1) 5973 #define IPSEC_FRAG_HASH_FUNC(id) (((id) & IPSEC_FRAG_HASH_MASK) ^ \ 5974 (((id) / \ 5975 (ushort_t)IPSEC_FRAG_HASH_SLOTS) & \ 5976 IPSEC_FRAG_HASH_MASK)) 5977 5978 /* Maximum fragments per packet. 48 bytes payload x 1366 packets > 64KB */ 5979 #define IPSEC_MAX_FRAGS 1366 5980 5981 #define V4_FRAG_OFFSET(ipha) ((ntohs(ipha->ipha_fragment_offset_and_flags) & \ 5982 IPH_OFFSET) << 3) 5983 #define V4_MORE_FRAGS(ipha) (ntohs(ipha->ipha_fragment_offset_and_flags) & \ 5984 IPH_MF) 5985 5986 /* 5987 * Initialize an ipsec fragcache instance. 5988 * Returns B_FALSE if memory allocation fails. 5989 */ 5990 boolean_t 5991 ipsec_fragcache_init(ipsec_fragcache_t *frag) 5992 { 5993 ipsec_fragcache_entry_t *ftemp; 5994 int i; 5995 5996 mutex_init(&frag->itpf_lock, NULL, MUTEX_DEFAULT, NULL); 5997 frag->itpf_ptr = (ipsec_fragcache_entry_t **) 5998 kmem_zalloc( 5999 sizeof (ipsec_fragcache_entry_t *) * 6000 IPSEC_FRAG_HASH_SLOTS, KM_NOSLEEP); 6001 if (frag->itpf_ptr == NULL) 6002 return (B_FALSE); 6003 6004 ftemp = (ipsec_fragcache_entry_t *) 6005 kmem_zalloc(sizeof (ipsec_fragcache_entry_t) * 6006 IPSEC_FRAG_HASH_SIZE, KM_NOSLEEP); 6007 if (ftemp == NULL) { 6008 kmem_free(frag->itpf_ptr, 6009 sizeof (ipsec_fragcache_entry_t *) * 6010 IPSEC_FRAG_HASH_SLOTS); 6011 return (B_FALSE); 6012 } 6013 6014 frag->itpf_freelist = NULL; 6015 6016 for (i = 0; i < IPSEC_FRAG_HASH_SIZE; i++) { 6017 ftemp->itpfe_next = frag->itpf_freelist; 6018 frag->itpf_freelist = ftemp; 6019 ftemp++; 6020 } 6021 6022 frag->itpf_expire_hint = 0; 6023 6024 return (B_TRUE); 6025 } 6026 6027 void 6028 ipsec_fragcache_uninit(ipsec_fragcache_t *frag) 6029 { 6030 ipsec_fragcache_entry_t *fep; 6031 int i; 6032 6033 mutex_enter(&frag->itpf_lock); 6034 if (frag->itpf_ptr) { 6035 /* Delete any existing fragcache entry chains */ 6036 for (i = 0; i < IPSEC_FRAG_HASH_SLOTS; i++) { 6037 fep = (frag->itpf_ptr)[i]; 6038 while (fep != NULL) { 6039 /* Returned fep is next in chain or NULL */ 6040 fep = fragcache_delentry(i, fep, frag); 6041 } 6042 } 6043 /* 6044 * Chase the pointers back to the beginning 6045 * of the memory allocation and then 6046 * get rid of the allocated freelist 6047 */ 6048 while (frag->itpf_freelist->itpfe_next != NULL) 6049 frag->itpf_freelist = frag->itpf_freelist->itpfe_next; 6050 /* 6051 * XXX - If we ever dynamically grow the freelist 6052 * then we'll have to free entries individually 6053 * or determine how many entries or chunks we have 6054 * grown since the initial allocation. 6055 */ 6056 kmem_free(frag->itpf_freelist, 6057 sizeof (ipsec_fragcache_entry_t) * 6058 IPSEC_FRAG_HASH_SIZE); 6059 /* Free the fragcache structure */ 6060 kmem_free(frag->itpf_ptr, 6061 sizeof (ipsec_fragcache_entry_t *) * 6062 IPSEC_FRAG_HASH_SLOTS); 6063 } 6064 mutex_exit(&frag->itpf_lock); 6065 mutex_destroy(&frag->itpf_lock); 6066 } 6067 6068 /* 6069 * Add a fragment to the fragment cache. Consumes mp if NULL is returned. 6070 * Returns mp if a whole fragment has been assembled, NULL otherwise 6071 */ 6072 6073 mblk_t * 6074 ipsec_fragcache_add(ipsec_fragcache_t *frag, mblk_t *ipsec_mp, mblk_t *mp, 6075 int outer_hdr_len) 6076 { 6077 boolean_t is_v4; 6078 time_t itpf_time; 6079 ipha_t *iph; 6080 ipha_t *oiph; 6081 ip6_t *ip6h = NULL; 6082 uint8_t v6_proto; 6083 uint8_t *v6_proto_p; 6084 uint16_t ip6_hdr_length; 6085 ip6_pkt_t ipp; 6086 ip6_frag_t *fraghdr; 6087 ipsec_fragcache_entry_t *fep; 6088 int i; 6089 mblk_t *nmp, *prevmp, *spare_mp = NULL; 6090 int firstbyte, lastbyte; 6091 int offset; 6092 int last; 6093 boolean_t inbound = (ipsec_mp != NULL); 6094 mblk_t *first_mp = inbound ? ipsec_mp : mp; 6095 6096 mutex_enter(&frag->itpf_lock); 6097 6098 oiph = (ipha_t *)mp->b_rptr; 6099 iph = (ipha_t *)(mp->b_rptr + outer_hdr_len); 6100 if (IPH_HDR_VERSION(iph) == IPV4_VERSION) { 6101 is_v4 = B_TRUE; 6102 } else { 6103 ASSERT(IPH_HDR_VERSION(iph) == IPV6_VERSION); 6104 if ((spare_mp = msgpullup(mp, -1)) == NULL) { 6105 mutex_exit(&frag->itpf_lock); 6106 ip_drop_packet(first_mp, inbound, NULL, NULL, 6107 &ipdrops_spd_nomem, &spd_dropper); 6108 return (NULL); 6109 } 6110 ip6h = (ip6_t *)(spare_mp->b_rptr + outer_hdr_len); 6111 6112 if (!ip_hdr_length_nexthdr_v6(spare_mp, ip6h, &ip6_hdr_length, 6113 &v6_proto_p)) { 6114 /* 6115 * Find upper layer protocol. 6116 * If it fails we have a malformed packet 6117 */ 6118 mutex_exit(&frag->itpf_lock); 6119 ip_drop_packet(first_mp, inbound, NULL, NULL, 6120 &ipdrops_spd_malformed_packet, &spd_dropper); 6121 freemsg(spare_mp); 6122 return (NULL); 6123 } else { 6124 v6_proto = *v6_proto_p; 6125 } 6126 6127 6128 bzero(&ipp, sizeof (ipp)); 6129 (void) ip_find_hdr_v6(spare_mp, ip6h, &ipp, NULL); 6130 if (!(ipp.ipp_fields & IPPF_FRAGHDR)) { 6131 /* 6132 * We think this is a fragment, but didn't find 6133 * a fragment header. Something is wrong. 6134 */ 6135 mutex_exit(&frag->itpf_lock); 6136 ip_drop_packet(first_mp, inbound, NULL, NULL, 6137 &ipdrops_spd_malformed_frag, &spd_dropper); 6138 freemsg(spare_mp); 6139 return (NULL); 6140 } 6141 fraghdr = ipp.ipp_fraghdr; 6142 is_v4 = B_FALSE; 6143 } 6144 6145 /* Anything to cleanup? */ 6146 6147 /* 6148 * This cleanup call could be put in a timer loop 6149 * but it may actually be just as reasonable a decision to 6150 * leave it here. The disadvantage is this only gets called when 6151 * frags are added. The advantage is that it is not 6152 * susceptible to race conditions like a time-based cleanup 6153 * may be. 6154 */ 6155 itpf_time = gethrestime_sec(); 6156 if (itpf_time >= frag->itpf_expire_hint) 6157 ipsec_fragcache_clean(frag); 6158 6159 /* Lookup to see if there is an existing entry */ 6160 6161 if (is_v4) 6162 i = IPSEC_FRAG_HASH_FUNC(iph->ipha_ident); 6163 else 6164 i = IPSEC_FRAG_HASH_FUNC(fraghdr->ip6f_ident); 6165 6166 for (fep = (frag->itpf_ptr)[i]; fep; fep = fep->itpfe_next) { 6167 if (is_v4) { 6168 ASSERT(iph != NULL); 6169 if ((fep->itpfe_id == iph->ipha_ident) && 6170 (fep->itpfe_src == iph->ipha_src) && 6171 (fep->itpfe_dst == iph->ipha_dst) && 6172 (fep->itpfe_proto == iph->ipha_protocol)) 6173 break; 6174 } else { 6175 ASSERT(fraghdr != NULL); 6176 ASSERT(fep != NULL); 6177 if ((fep->itpfe_id == fraghdr->ip6f_ident) && 6178 IN6_ARE_ADDR_EQUAL(&fep->itpfe_src6, 6179 &ip6h->ip6_src) && 6180 IN6_ARE_ADDR_EQUAL(&fep->itpfe_dst6, 6181 &ip6h->ip6_dst) && (fep->itpfe_proto == v6_proto)) 6182 break; 6183 } 6184 } 6185 6186 if (is_v4) { 6187 firstbyte = V4_FRAG_OFFSET(iph); 6188 lastbyte = firstbyte + ntohs(iph->ipha_length) - 6189 IPH_HDR_LENGTH(iph); 6190 last = (V4_MORE_FRAGS(iph) == 0); 6191 #ifdef FRAGCACHE_DEBUG 6192 cmn_err(CE_WARN, "V4 fragcache: firstbyte = %d, lastbyte = %d, " 6193 "last = %d, id = %d\n", firstbyte, lastbyte, last, 6194 iph->ipha_ident); 6195 #endif 6196 } else { 6197 firstbyte = ntohs(fraghdr->ip6f_offlg & IP6F_OFF_MASK); 6198 lastbyte = firstbyte + ntohs(ip6h->ip6_plen) + 6199 sizeof (ip6_t) - ip6_hdr_length; 6200 last = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG) == 0; 6201 #ifdef FRAGCACHE_DEBUG 6202 cmn_err(CE_WARN, "V6 fragcache: firstbyte = %d, lastbyte = %d, " 6203 "last = %d, id = %d, fraghdr = %p, spare_mp = %p\n", 6204 firstbyte, lastbyte, last, fraghdr->ip6f_ident, 6205 fraghdr, spare_mp); 6206 #endif 6207 } 6208 6209 /* check for bogus fragments and delete the entry */ 6210 if (firstbyte > 0 && firstbyte <= 8) { 6211 if (fep != NULL) 6212 (void) fragcache_delentry(i, fep, frag); 6213 mutex_exit(&frag->itpf_lock); 6214 ip_drop_packet(first_mp, inbound, NULL, NULL, 6215 &ipdrops_spd_malformed_frag, &spd_dropper); 6216 freemsg(spare_mp); 6217 return (NULL); 6218 } 6219 6220 /* Not found, allocate a new entry */ 6221 if (fep == NULL) { 6222 if (frag->itpf_freelist == NULL) { 6223 /* see if there is some space */ 6224 ipsec_fragcache_clean(frag); 6225 if (frag->itpf_freelist == NULL) { 6226 mutex_exit(&frag->itpf_lock); 6227 ip_drop_packet(first_mp, inbound, NULL, NULL, 6228 &ipdrops_spd_nomem, &spd_dropper); 6229 freemsg(spare_mp); 6230 return (NULL); 6231 } 6232 } 6233 6234 fep = frag->itpf_freelist; 6235 frag->itpf_freelist = fep->itpfe_next; 6236 6237 if (is_v4) { 6238 bcopy((caddr_t)&iph->ipha_src, (caddr_t)&fep->itpfe_src, 6239 sizeof (struct in_addr)); 6240 bcopy((caddr_t)&iph->ipha_dst, (caddr_t)&fep->itpfe_dst, 6241 sizeof (struct in_addr)); 6242 fep->itpfe_id = iph->ipha_ident; 6243 fep->itpfe_proto = iph->ipha_protocol; 6244 i = IPSEC_FRAG_HASH_FUNC(fep->itpfe_id); 6245 } else { 6246 bcopy((in6_addr_t *)&ip6h->ip6_src, 6247 (in6_addr_t *)&fep->itpfe_src6, 6248 sizeof (struct in6_addr)); 6249 bcopy((in6_addr_t *)&ip6h->ip6_dst, 6250 (in6_addr_t *)&fep->itpfe_dst6, 6251 sizeof (struct in6_addr)); 6252 fep->itpfe_id = fraghdr->ip6f_ident; 6253 fep->itpfe_proto = v6_proto; 6254 i = IPSEC_FRAG_HASH_FUNC(fep->itpfe_id); 6255 } 6256 itpf_time = gethrestime_sec(); 6257 fep->itpfe_exp = itpf_time + IPSEC_FRAG_TTL_MAX + 1; 6258 fep->itpfe_last = 0; 6259 fep->itpfe_fraglist = NULL; 6260 fep->itpfe_depth = 0; 6261 fep->itpfe_next = (frag->itpf_ptr)[i]; 6262 (frag->itpf_ptr)[i] = fep; 6263 6264 if (frag->itpf_expire_hint > fep->itpfe_exp) 6265 frag->itpf_expire_hint = fep->itpfe_exp; 6266 6267 } 6268 freemsg(spare_mp); 6269 6270 /* Insert it in the frag list */ 6271 /* List is in order by starting offset of fragments */ 6272 6273 prevmp = NULL; 6274 for (nmp = fep->itpfe_fraglist; nmp; nmp = nmp->b_next) { 6275 ipha_t *niph; 6276 ipha_t *oniph; 6277 ip6_t *nip6h; 6278 ip6_pkt_t nipp; 6279 ip6_frag_t *nfraghdr; 6280 uint16_t nip6_hdr_length; 6281 uint8_t *nv6_proto_p; 6282 int nfirstbyte, nlastbyte; 6283 char *data, *ndata; 6284 mblk_t *nspare_mp = NULL; 6285 mblk_t *ndata_mp = (inbound ? nmp->b_cont : nmp); 6286 int hdr_len; 6287 6288 oniph = (ipha_t *)mp->b_rptr; 6289 nip6h = NULL; 6290 niph = NULL; 6291 6292 /* 6293 * Determine outer header type and length and set 6294 * pointers appropriately 6295 */ 6296 6297 if (IPH_HDR_VERSION(oniph) == IPV4_VERSION) { 6298 hdr_len = ((outer_hdr_len != 0) ? 6299 IPH_HDR_LENGTH(oiph) : 0); 6300 niph = (ipha_t *)(ndata_mp->b_rptr + hdr_len); 6301 } else { 6302 ASSERT(IPH_HDR_VERSION(oniph) == IPV6_VERSION); 6303 if ((nspare_mp = msgpullup(ndata_mp, -1)) == NULL) { 6304 mutex_exit(&frag->itpf_lock); 6305 ip_drop_packet_chain(nmp, inbound, NULL, NULL, 6306 &ipdrops_spd_nomem, &spd_dropper); 6307 return (NULL); 6308 } 6309 nip6h = (ip6_t *)nspare_mp->b_rptr; 6310 (void) ip_hdr_length_nexthdr_v6(nspare_mp, nip6h, 6311 &nip6_hdr_length, &v6_proto_p); 6312 hdr_len = ((outer_hdr_len != 0) ? nip6_hdr_length : 0); 6313 } 6314 6315 /* 6316 * Determine inner header type and length and set 6317 * pointers appropriately 6318 */ 6319 6320 if (is_v4) { 6321 if (niph == NULL) { 6322 /* Was v6 outer */ 6323 niph = (ipha_t *)(ndata_mp->b_rptr + hdr_len); 6324 } 6325 nfirstbyte = V4_FRAG_OFFSET(niph); 6326 nlastbyte = nfirstbyte + ntohs(niph->ipha_length) - 6327 IPH_HDR_LENGTH(niph); 6328 } else { 6329 if ((nspare_mp == NULL) && 6330 ((nspare_mp = msgpullup(ndata_mp, -1)) == NULL)) { 6331 mutex_exit(&frag->itpf_lock); 6332 ip_drop_packet_chain(nmp, inbound, NULL, NULL, 6333 &ipdrops_spd_nomem, &spd_dropper); 6334 return (NULL); 6335 } 6336 nip6h = (ip6_t *)(nspare_mp->b_rptr + hdr_len); 6337 if (!ip_hdr_length_nexthdr_v6(nspare_mp, nip6h, 6338 &nip6_hdr_length, &nv6_proto_p)) { 6339 mutex_exit(&frag->itpf_lock); 6340 ip_drop_packet_chain(nmp, inbound, NULL, NULL, 6341 &ipdrops_spd_malformed_frag, &spd_dropper); 6342 ipsec_freemsg_chain(nspare_mp); 6343 return (NULL); 6344 } 6345 bzero(&nipp, sizeof (nipp)); 6346 (void) ip_find_hdr_v6(nspare_mp, nip6h, &nipp, NULL); 6347 nfraghdr = nipp.ipp_fraghdr; 6348 nfirstbyte = ntohs(nfraghdr->ip6f_offlg & 6349 IP6F_OFF_MASK); 6350 nlastbyte = nfirstbyte + ntohs(nip6h->ip6_plen) + 6351 sizeof (ip6_t) - nip6_hdr_length; 6352 } 6353 ipsec_freemsg_chain(nspare_mp); 6354 6355 /* Check for overlapping fragments */ 6356 if (firstbyte >= nfirstbyte && firstbyte < nlastbyte) { 6357 /* 6358 * Overlap Check: 6359 * ~~~~--------- # Check if the newly 6360 * ~ ndata_mp| # received fragment 6361 * ~~~~--------- # overlaps with the 6362 * ---------~~~~~~ # current fragment. 6363 * | mp ~ 6364 * ---------~~~~~~ 6365 */ 6366 if (is_v4) { 6367 data = (char *)iph + IPH_HDR_LENGTH(iph) + 6368 firstbyte - nfirstbyte; 6369 ndata = (char *)niph + IPH_HDR_LENGTH(niph); 6370 } else { 6371 data = (char *)ip6h + 6372 nip6_hdr_length + firstbyte - 6373 nfirstbyte; 6374 ndata = (char *)nip6h + nip6_hdr_length; 6375 } 6376 if (bcmp(data, ndata, MIN(lastbyte, nlastbyte) 6377 - firstbyte)) { 6378 /* Overlapping data does not match */ 6379 (void) fragcache_delentry(i, fep, frag); 6380 mutex_exit(&frag->itpf_lock); 6381 ip_drop_packet(first_mp, inbound, NULL, NULL, 6382 &ipdrops_spd_overlap_frag, &spd_dropper); 6383 return (NULL); 6384 } 6385 /* Part of defense for jolt2.c fragmentation attack */ 6386 if (firstbyte >= nfirstbyte && lastbyte <= nlastbyte) { 6387 /* 6388 * Check for identical or subset fragments: 6389 * ---------- ~~~~--------~~~~~ 6390 * | nmp | or ~ nmp ~ 6391 * ---------- ~~~~--------~~~~~ 6392 * ---------- ------ 6393 * | mp | | mp | 6394 * ---------- ------ 6395 */ 6396 mutex_exit(&frag->itpf_lock); 6397 ip_drop_packet(first_mp, inbound, NULL, NULL, 6398 &ipdrops_spd_evil_frag, &spd_dropper); 6399 return (NULL); 6400 } 6401 6402 } 6403 6404 /* Correct location for this fragment? */ 6405 if (firstbyte <= nfirstbyte) { 6406 /* 6407 * Check if the tail end of the new fragment overlaps 6408 * with the head of the current fragment. 6409 * --------~~~~~~~ 6410 * | nmp ~ 6411 * --------~~~~~~~ 6412 * ~~~~~-------- 6413 * ~ mp | 6414 * ~~~~~-------- 6415 */ 6416 if (lastbyte > nfirstbyte) { 6417 /* Fragments overlap */ 6418 data = (char *)iph + IPH_HDR_LENGTH(iph) + 6419 firstbyte - nfirstbyte; 6420 ndata = (char *)niph + IPH_HDR_LENGTH(niph); 6421 if (is_v4) { 6422 data = (char *)iph + 6423 IPH_HDR_LENGTH(iph) + firstbyte - 6424 nfirstbyte; 6425 ndata = (char *)niph + 6426 IPH_HDR_LENGTH(niph); 6427 } else { 6428 data = (char *)ip6h + 6429 nip6_hdr_length + firstbyte - 6430 nfirstbyte; 6431 ndata = (char *)nip6h + nip6_hdr_length; 6432 } 6433 if (bcmp(data, ndata, MIN(lastbyte, nlastbyte) 6434 - nfirstbyte)) { 6435 /* Overlap mismatch */ 6436 (void) fragcache_delentry(i, fep, frag); 6437 mutex_exit(&frag->itpf_lock); 6438 ip_drop_packet(first_mp, inbound, NULL, 6439 NULL, &ipdrops_spd_overlap_frag, 6440 &spd_dropper); 6441 return (NULL); 6442 } 6443 } 6444 6445 /* 6446 * Fragment does not illegally overlap and can now 6447 * be inserted into the chain 6448 */ 6449 break; 6450 } 6451 6452 prevmp = nmp; 6453 } 6454 first_mp->b_next = nmp; 6455 6456 if (prevmp == NULL) { 6457 fep->itpfe_fraglist = first_mp; 6458 } else { 6459 prevmp->b_next = first_mp; 6460 } 6461 if (last) 6462 fep->itpfe_last = 1; 6463 6464 /* Part of defense for jolt2.c fragmentation attack */ 6465 if (++(fep->itpfe_depth) > IPSEC_MAX_FRAGS) { 6466 (void) fragcache_delentry(i, fep, frag); 6467 mutex_exit(&frag->itpf_lock); 6468 ip_drop_packet(first_mp, inbound, NULL, NULL, 6469 &ipdrops_spd_max_frags, &spd_dropper); 6470 return (NULL); 6471 } 6472 6473 /* Check for complete packet */ 6474 6475 if (!fep->itpfe_last) { 6476 mutex_exit(&frag->itpf_lock); 6477 #ifdef FRAGCACHE_DEBUG 6478 cmn_err(CE_WARN, "Fragment cached, not last.\n"); 6479 #endif 6480 return (NULL); 6481 } 6482 6483 #ifdef FRAGCACHE_DEBUG 6484 cmn_err(CE_WARN, "Last fragment cached.\n"); 6485 cmn_err(CE_WARN, "mp = %p, first_mp = %p.\n", mp, first_mp); 6486 #endif 6487 6488 offset = 0; 6489 for (mp = fep->itpfe_fraglist; mp; mp = mp->b_next) { 6490 mblk_t *data_mp = (inbound ? mp->b_cont : mp); 6491 int hdr_len; 6492 6493 oiph = (ipha_t *)data_mp->b_rptr; 6494 ip6h = NULL; 6495 iph = NULL; 6496 6497 spare_mp = NULL; 6498 if (IPH_HDR_VERSION(oiph) == IPV4_VERSION) { 6499 hdr_len = ((outer_hdr_len != 0) ? 6500 IPH_HDR_LENGTH(oiph) : 0); 6501 iph = (ipha_t *)(data_mp->b_rptr + hdr_len); 6502 } else { 6503 ASSERT(IPH_HDR_VERSION(oiph) == IPV6_VERSION); 6504 if ((spare_mp = msgpullup(data_mp, -1)) == NULL) { 6505 mutex_exit(&frag->itpf_lock); 6506 ip_drop_packet_chain(mp, inbound, NULL, NULL, 6507 &ipdrops_spd_nomem, &spd_dropper); 6508 return (NULL); 6509 } 6510 ip6h = (ip6_t *)spare_mp->b_rptr; 6511 (void) ip_hdr_length_nexthdr_v6(spare_mp, ip6h, 6512 &ip6_hdr_length, &v6_proto_p); 6513 hdr_len = ((outer_hdr_len != 0) ? ip6_hdr_length : 0); 6514 } 6515 6516 /* Calculate current fragment start/end */ 6517 if (is_v4) { 6518 if (iph == NULL) { 6519 /* Was v6 outer */ 6520 iph = (ipha_t *)(data_mp->b_rptr + hdr_len); 6521 } 6522 firstbyte = V4_FRAG_OFFSET(iph); 6523 lastbyte = firstbyte + ntohs(iph->ipha_length) - 6524 IPH_HDR_LENGTH(iph); 6525 } else { 6526 if ((spare_mp == NULL) && 6527 ((spare_mp = msgpullup(data_mp, -1)) == NULL)) { 6528 mutex_exit(&frag->itpf_lock); 6529 ip_drop_packet_chain(mp, inbound, NULL, NULL, 6530 &ipdrops_spd_nomem, &spd_dropper); 6531 return (NULL); 6532 } 6533 ip6h = (ip6_t *)(spare_mp->b_rptr + hdr_len); 6534 if (!ip_hdr_length_nexthdr_v6(spare_mp, ip6h, 6535 &ip6_hdr_length, &v6_proto_p)) { 6536 mutex_exit(&frag->itpf_lock); 6537 ip_drop_packet_chain(mp, inbound, NULL, NULL, 6538 &ipdrops_spd_malformed_frag, &spd_dropper); 6539 ipsec_freemsg_chain(spare_mp); 6540 return (NULL); 6541 } 6542 v6_proto = *v6_proto_p; 6543 bzero(&ipp, sizeof (ipp)); 6544 (void) ip_find_hdr_v6(spare_mp, ip6h, &ipp, NULL); 6545 fraghdr = ipp.ipp_fraghdr; 6546 firstbyte = ntohs(fraghdr->ip6f_offlg & 6547 IP6F_OFF_MASK); 6548 lastbyte = firstbyte + ntohs(ip6h->ip6_plen) + 6549 sizeof (ip6_t) - ip6_hdr_length; 6550 } 6551 6552 /* 6553 * If this fragment is greater than current offset, 6554 * we have a missing fragment so return NULL 6555 */ 6556 if (firstbyte > offset) { 6557 mutex_exit(&frag->itpf_lock); 6558 #ifdef FRAGCACHE_DEBUG 6559 /* 6560 * Note, this can happen when the last frag 6561 * gets sent through because it is smaller 6562 * than the MTU. It is not necessarily an 6563 * error condition. 6564 */ 6565 cmn_err(CE_WARN, "Frag greater than offset! : " 6566 "missing fragment: firstbyte = %d, offset = %d, " 6567 "mp = %p\n", firstbyte, offset, mp); 6568 #endif 6569 ipsec_freemsg_chain(spare_mp); 6570 return (NULL); 6571 } 6572 6573 /* 6574 * If we are at the last fragment, we have the complete 6575 * packet, so rechain things and return it to caller 6576 * for processing 6577 */ 6578 6579 if ((is_v4 && !V4_MORE_FRAGS(iph)) || 6580 (!is_v4 && !(fraghdr->ip6f_offlg & IP6F_MORE_FRAG))) { 6581 mp = fep->itpfe_fraglist; 6582 fep->itpfe_fraglist = NULL; 6583 (void) fragcache_delentry(i, fep, frag); 6584 mutex_exit(&frag->itpf_lock); 6585 6586 if ((is_v4 && (firstbyte + ntohs(iph->ipha_length) > 6587 65535)) || (!is_v4 && (firstbyte + 6588 ntohs(ip6h->ip6_plen) > 65535))) { 6589 /* It is an invalid "ping-o-death" packet */ 6590 /* Discard it */ 6591 ip_drop_packet_chain(mp, inbound, NULL, NULL, 6592 &ipdrops_spd_evil_frag, &spd_dropper); 6593 ipsec_freemsg_chain(spare_mp); 6594 return (NULL); 6595 } 6596 #ifdef FRAGCACHE_DEBUG 6597 cmn_err(CE_WARN, "Fragcache returning mp = %p, " 6598 "mp->b_next = %p", mp, mp->b_next); 6599 #endif 6600 ipsec_freemsg_chain(spare_mp); 6601 /* 6602 * For inbound case, mp has ipsec_in b_next'd chain 6603 * For outbound case, it is just data mp chain 6604 */ 6605 return (mp); 6606 } 6607 ipsec_freemsg_chain(spare_mp); 6608 6609 /* 6610 * Update new ending offset if this 6611 * fragment extends the packet 6612 */ 6613 if (offset < lastbyte) 6614 offset = lastbyte; 6615 } 6616 6617 mutex_exit(&frag->itpf_lock); 6618 6619 /* Didn't find last fragment, so return NULL */ 6620 return (NULL); 6621 } 6622 6623 static void 6624 ipsec_fragcache_clean(ipsec_fragcache_t *frag) 6625 { 6626 ipsec_fragcache_entry_t *fep; 6627 int i; 6628 ipsec_fragcache_entry_t *earlyfep = NULL; 6629 time_t itpf_time; 6630 int earlyexp; 6631 int earlyi = 0; 6632 6633 ASSERT(MUTEX_HELD(&frag->itpf_lock)); 6634 6635 itpf_time = gethrestime_sec(); 6636 earlyexp = itpf_time + 10000; 6637 6638 for (i = 0; i < IPSEC_FRAG_HASH_SLOTS; i++) { 6639 fep = (frag->itpf_ptr)[i]; 6640 while (fep) { 6641 if (fep->itpfe_exp < itpf_time) { 6642 /* found */ 6643 fep = fragcache_delentry(i, fep, frag); 6644 } else { 6645 if (fep->itpfe_exp < earlyexp) { 6646 earlyfep = fep; 6647 earlyexp = fep->itpfe_exp; 6648 earlyi = i; 6649 } 6650 fep = fep->itpfe_next; 6651 } 6652 } 6653 } 6654 6655 frag->itpf_expire_hint = earlyexp; 6656 6657 /* if (!found) */ 6658 if (frag->itpf_freelist == NULL) 6659 (void) fragcache_delentry(earlyi, earlyfep, frag); 6660 } 6661 6662 static ipsec_fragcache_entry_t * 6663 fragcache_delentry(int slot, ipsec_fragcache_entry_t *fep, 6664 ipsec_fragcache_t *frag) 6665 { 6666 ipsec_fragcache_entry_t *targp; 6667 ipsec_fragcache_entry_t *nextp = fep->itpfe_next; 6668 6669 ASSERT(MUTEX_HELD(&frag->itpf_lock)); 6670 6671 /* Free up any fragment list still in cache entry */ 6672 ipsec_freemsg_chain(fep->itpfe_fraglist); 6673 6674 targp = (frag->itpf_ptr)[slot]; 6675 ASSERT(targp != 0); 6676 6677 if (targp == fep) { 6678 /* unlink from head of hash chain */ 6679 (frag->itpf_ptr)[slot] = nextp; 6680 /* link into free list */ 6681 fep->itpfe_next = frag->itpf_freelist; 6682 frag->itpf_freelist = fep; 6683 return (nextp); 6684 } 6685 6686 /* maybe should use double linked list to make update faster */ 6687 /* must be past front of chain */ 6688 while (targp) { 6689 if (targp->itpfe_next == fep) { 6690 /* unlink from hash chain */ 6691 targp->itpfe_next = nextp; 6692 /* link into free list */ 6693 fep->itpfe_next = frag->itpf_freelist; 6694 frag->itpf_freelist = fep; 6695 return (nextp); 6696 } 6697 targp = targp->itpfe_next; 6698 ASSERT(targp != 0); 6699 } 6700 /* NOTREACHED */ 6701 return (NULL); 6702 } 6703