1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * IPsec Security Policy Database. 30 * 31 * This module maintains the SPD and provides routines used by ip and ip6 32 * to apply IPsec policy to inbound and outbound datagrams. 33 */ 34 35 #include <sys/types.h> 36 #include <sys/stream.h> 37 #include <sys/stropts.h> 38 #include <sys/sysmacros.h> 39 #include <sys/strsubr.h> 40 #include <sys/strlog.h> 41 #include <sys/cmn_err.h> 42 #include <sys/zone.h> 43 44 #include <sys/systm.h> 45 #include <sys/param.h> 46 #include <sys/kmem.h> 47 #include <sys/ddi.h> 48 49 #include <sys/crypto/api.h> 50 51 #include <inet/common.h> 52 #include <inet/mi.h> 53 54 #include <netinet/ip6.h> 55 #include <netinet/icmp6.h> 56 #include <netinet/udp.h> 57 58 #include <inet/ip.h> 59 #include <inet/ip6.h> 60 61 #include <net/pfkeyv2.h> 62 #include <net/pfpolicy.h> 63 #include <inet/ipsec_info.h> 64 #include <inet/sadb.h> 65 #include <inet/ipsec_impl.h> 66 67 #include <inet/ip_impl.h> /* For IP_MOD_ID */ 68 69 #include <inet/ipsecah.h> 70 #include <inet/ipsecesp.h> 71 #include <inet/ipdrop.h> 72 #include <inet/ipclassifier.h> 73 #include <inet/tun.h> 74 75 static void ipsec_update_present_flags(); 76 static ipsec_act_t *ipsec_act_wildcard_expand(ipsec_act_t *, uint_t *); 77 static void ipsec_out_free(void *); 78 static void ipsec_in_free(void *); 79 static mblk_t *ipsec_attach_global_policy(mblk_t *, conn_t *, 80 ipsec_selector_t *); 81 static mblk_t *ipsec_apply_global_policy(mblk_t *, conn_t *, 82 ipsec_selector_t *); 83 static mblk_t *ipsec_check_ipsecin_policy(mblk_t *, ipsec_policy_t *, 84 ipha_t *, ip6_t *, uint64_t); 85 static void ipsec_in_release_refs(ipsec_in_t *); 86 static void ipsec_out_release_refs(ipsec_out_t *); 87 static void ipsec_action_reclaim(void *); 88 static void ipsid_init(void); 89 static void ipsid_fini(void); 90 91 /* sel_flags values for ipsec_init_inbound_sel(). */ 92 #define SEL_NONE 0x0000 93 #define SEL_PORT_POLICY 0x0001 94 #define SEL_IS_ICMP 0x0002 95 #define SEL_TUNNEL_MODE 0x0004 96 97 /* Return values for ipsec_init_inbound_sel(). */ 98 typedef enum { SELRET_NOMEM, SELRET_BADPKT, SELRET_SUCCESS, SELRET_TUNFRAG} 99 selret_t; 100 101 static selret_t ipsec_init_inbound_sel(ipsec_selector_t *, mblk_t *, 102 ipha_t *, ip6_t *, uint8_t); 103 104 static boolean_t ipsec_check_ipsecin_action(struct ipsec_in_s *, mblk_t *, 105 struct ipsec_action_s *, ipha_t *ipha, ip6_t *ip6h, const char **, 106 kstat_named_t **); 107 static void ipsec_unregister_prov_update(void); 108 static boolean_t ipsec_compare_action(ipsec_policy_t *, ipsec_policy_t *); 109 static uint32_t selector_hash(ipsec_selector_t *, ipsec_policy_root_t *); 110 static int tunnel_compare(const void *, const void *); 111 static void ipsec_freemsg_chain(mblk_t *); 112 static void ip_drop_packet_chain(mblk_t *, boolean_t, ill_t *, ire_t *, 113 struct kstat_named *, ipdropper_t *); 114 115 /* 116 * Policy rule index generator. We assume this won't wrap in the 117 * lifetime of a system. If we make 2^20 policy changes per second, 118 * this will last 2^44 seconds, or roughly 500,000 years, so we don't 119 * have to worry about reusing policy index values. 120 * 121 * Protected by ipsec_conf_lock. 122 */ 123 uint64_t ipsec_next_policy_index = 1; 124 125 /* 126 * Active & Inactive system policy roots 127 */ 128 static ipsec_policy_head_t system_policy; 129 static ipsec_policy_head_t inactive_policy; 130 131 /* 132 * Tunnel policies - AVL tree indexed by tunnel name. 133 */ 134 krwlock_t tunnel_policy_lock; 135 uint64_t tunnel_policy_gen; /* To keep track of updates w/o searches. */ 136 avl_tree_t tunnel_policies; 137 138 /* Packet dropper for generic SPD drops. */ 139 ipdropper_t spd_dropper; 140 141 /* 142 * For now, use a trivially sized hash table for actions. 143 * In the future we can add the structure canonicalization necessary 144 * to get the hash function to behave correctly.. 145 */ 146 #define IPSEC_ACTION_HASH_SIZE 1 147 148 /* 149 * Selector hash table is statically sized at module load time. 150 * we default to 251 buckets, which is the largest prime number under 255 151 */ 152 153 #define IPSEC_SPDHASH_DEFAULT 251 154 uint32_t ipsec_spd_hashsize = 0; 155 156 /* SPD hash-size tunable per tunnel. */ 157 #define TUN_SPDHASH_DEFAULT 5 158 uint32_t tun_spd_hashsize; 159 160 161 #define IPSEC_SEL_NOHASH ((uint32_t)(~0)) 162 163 static HASH_HEAD(ipsec_action_s) ipsec_action_hash[IPSEC_ACTION_HASH_SIZE]; 164 static HASH_HEAD(ipsec_sel) *ipsec_sel_hash; 165 166 static kmem_cache_t *ipsec_action_cache; 167 static kmem_cache_t *ipsec_sel_cache; 168 static kmem_cache_t *ipsec_pol_cache; 169 static kmem_cache_t *ipsec_info_cache; 170 171 boolean_t ipsec_inbound_v4_policy_present = B_FALSE; 172 boolean_t ipsec_outbound_v4_policy_present = B_FALSE; 173 boolean_t ipsec_inbound_v6_policy_present = B_FALSE; 174 boolean_t ipsec_outbound_v6_policy_present = B_FALSE; 175 176 /* Frag cache prototypes */ 177 static void ipsec_fragcache_clean(ipsec_fragcache_t *); 178 static ipsec_fragcache_entry_t *fragcache_delentry(int, 179 ipsec_fragcache_entry_t *, ipsec_fragcache_t *); 180 boolean_t ipsec_fragcache_init(ipsec_fragcache_t *); 181 void ipsec_fragcache_uninit(ipsec_fragcache_t *); 182 mblk_t *ipsec_fragcache_add(ipsec_fragcache_t *, mblk_t *, mblk_t *, int); 183 184 /* 185 * Because policy needs to know what algorithms are supported, keep the 186 * lists of algorithms here. 187 */ 188 189 kmutex_t alg_lock; 190 krwlock_t itp_get_byaddr_rw_lock; 191 ipsec_tun_pol_t *(*itp_get_byaddr)(uint32_t *, uint32_t *, int); 192 uint8_t ipsec_nalgs[IPSEC_NALGTYPES]; 193 ipsec_alginfo_t *ipsec_alglists[IPSEC_NALGTYPES][IPSEC_MAX_ALGS]; 194 uint8_t ipsec_sortlist[IPSEC_NALGTYPES][IPSEC_MAX_ALGS]; 195 ipsec_algs_exec_mode_t ipsec_algs_exec_mode[IPSEC_NALGTYPES]; 196 static crypto_notify_handle_t prov_update_handle = NULL; 197 198 int ipsec_hdr_pullup_needed = 0; 199 int ipsec_weird_null_inbound_policy = 0; 200 201 #define ALGBITS_ROUND_DOWN(x, align) (((x)/(align))*(align)) 202 #define ALGBITS_ROUND_UP(x, align) ALGBITS_ROUND_DOWN((x)+(align)-1, align) 203 204 /* 205 * Inbound traffic should have matching identities for both SA's. 206 */ 207 208 #define SA_IDS_MATCH(sa1, sa2) \ 209 (((sa1) == NULL) || ((sa2) == NULL) || \ 210 (((sa1)->ipsa_src_cid == (sa2)->ipsa_src_cid) && \ 211 (((sa1)->ipsa_dst_cid == (sa2)->ipsa_dst_cid)))) 212 213 /* 214 * IPv4 Fragments 215 */ 216 #define IS_V4_FRAGMENT(ipha_fragment_offset_and_flags) \ 217 (((ntohs(ipha_fragment_offset_and_flags) & IPH_OFFSET) != 0) || \ 218 ((ntohs(ipha_fragment_offset_and_flags) & IPH_MF) != 0)) 219 220 /* 221 * IPv6 Fragments 222 */ 223 #define IS_V6_FRAGMENT(ipp) (ipp.ipp_fields & IPPF_FRAGHDR) 224 225 /* 226 * Policy failure messages. 227 */ 228 static char *ipsec_policy_failure_msgs[] = { 229 230 /* IPSEC_POLICY_NOT_NEEDED */ 231 "%s: Dropping the datagram because the incoming packet " 232 "is %s, but the recipient expects clear; Source %s, " 233 "Destination %s.\n", 234 235 /* IPSEC_POLICY_MISMATCH */ 236 "%s: Policy Failure for the incoming packet (%s); Source %s, " 237 "Destination %s.\n", 238 239 /* IPSEC_POLICY_AUTH_NOT_NEEDED */ 240 "%s: Authentication present while not expected in the " 241 "incoming %s packet; Source %s, Destination %s.\n", 242 243 /* IPSEC_POLICY_ENCR_NOT_NEEDED */ 244 "%s: Encryption present while not expected in the " 245 "incoming %s packet; Source %s, Destination %s.\n", 246 247 /* IPSEC_POLICY_SE_NOT_NEEDED */ 248 "%s: Self-Encapsulation present while not expected in the " 249 "incoming %s packet; Source %s, Destination %s.\n", 250 }; 251 /* 252 * Have a counter for every possible policy message in the previous array. 253 */ 254 static uint32_t ipsec_policy_failure_count[IPSEC_POLICY_MAX]; 255 /* Time since last ipsec policy failure that printed a message. */ 256 hrtime_t ipsec_policy_failure_last = 0; 257 258 /* 259 * General overviews: 260 * 261 * Locking: 262 * 263 * All of the system policy structures are protected by a single 264 * rwlock, ipsec_conf_lock. These structures are threaded in a 265 * fairly complex fashion and are not expected to change on a 266 * regular basis, so this should not cause scaling/contention 267 * problems. As a result, policy checks should (hopefully) be MT-hot. 268 * 269 * Allocation policy: 270 * 271 * We use custom kmem cache types for the various 272 * bits & pieces of the policy data structures. All allocations 273 * use KM_NOSLEEP instead of KM_SLEEP for policy allocation. The 274 * policy table is of potentially unbounded size, so we don't 275 * want to provide a way to hog all system memory with policy 276 * entries.. 277 */ 278 279 /* Convenient functions for freeing or dropping a b_next linked mblk chain */ 280 281 /* Free all messages in an mblk chain */ 282 static void 283 ipsec_freemsg_chain(mblk_t *mp) 284 { 285 mblk_t *mpnext; 286 while (mp != NULL) { 287 ASSERT(mp->b_prev == NULL); 288 mpnext = mp->b_next; 289 mp->b_next = NULL; 290 freemsg(mp); /* Always works, even if NULL */ 291 mp = mpnext; 292 } 293 } 294 295 /* ip_drop all messages in an mblk chain */ 296 static void 297 ip_drop_packet_chain(mblk_t *mp, boolean_t inbound, ill_t *arriving, 298 ire_t *outbound_ire, struct kstat_named *counter, ipdropper_t *who_called) 299 { 300 mblk_t *mpnext; 301 while (mp != NULL) { 302 ASSERT(mp->b_prev == NULL); 303 mpnext = mp->b_next; 304 mp->b_next = NULL; 305 ip_drop_packet(mp, inbound, arriving, outbound_ire, counter, 306 who_called); 307 mp = mpnext; 308 } 309 } 310 311 /* 312 * AVL tree comparison function. 313 * the in-kernel avl assumes unique keys for all objects. 314 * Since sometimes policy will duplicate rules, we may insert 315 * multiple rules with the same rule id, so we need a tie-breaker. 316 */ 317 static int 318 ipsec_policy_cmpbyid(const void *a, const void *b) 319 { 320 const ipsec_policy_t *ipa, *ipb; 321 uint64_t idxa, idxb; 322 323 ipa = (const ipsec_policy_t *)a; 324 ipb = (const ipsec_policy_t *)b; 325 idxa = ipa->ipsp_index; 326 idxb = ipb->ipsp_index; 327 328 if (idxa < idxb) 329 return (-1); 330 if (idxa > idxb) 331 return (1); 332 /* 333 * Tie-breaker #1: All installed policy rules have a non-NULL 334 * ipsl_sel (selector set), so an entry with a NULL ipsp_sel is not 335 * actually in-tree but rather a template node being used in 336 * an avl_find query; see ipsec_policy_delete(). This gives us 337 * a placeholder in the ordering just before the the first entry with 338 * a key >= the one we're looking for, so we can walk forward from 339 * that point to get the remaining entries with the same id. 340 */ 341 if ((ipa->ipsp_sel == NULL) && (ipb->ipsp_sel != NULL)) 342 return (-1); 343 if ((ipb->ipsp_sel == NULL) && (ipa->ipsp_sel != NULL)) 344 return (1); 345 /* 346 * At most one of the arguments to the comparison should have a 347 * NULL selector pointer; if not, the tree is broken. 348 */ 349 ASSERT(ipa->ipsp_sel != NULL); 350 ASSERT(ipb->ipsp_sel != NULL); 351 /* 352 * Tie-breaker #2: use the virtual address of the policy node 353 * to arbitrarily break ties. Since we use the new tree node in 354 * the avl_find() in ipsec_insert_always, the new node will be 355 * inserted into the tree in the right place in the sequence. 356 */ 357 if (ipa < ipb) 358 return (-1); 359 if (ipa > ipb) 360 return (1); 361 return (0); 362 } 363 364 void 365 ipsec_polhead_free_table(ipsec_policy_head_t *iph) 366 { 367 int dir; 368 369 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 370 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 371 372 if (ipr->ipr_hash == NULL) 373 continue; 374 375 kmem_free(ipr->ipr_hash, ipr->ipr_nchains * 376 sizeof (ipsec_policy_hash_t)); 377 } 378 } 379 380 void 381 ipsec_polhead_destroy(ipsec_policy_head_t *iph) 382 { 383 int dir; 384 385 avl_destroy(&iph->iph_rulebyid); 386 rw_destroy(&iph->iph_lock); 387 388 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 389 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 390 int chain; 391 392 for (chain = 0; chain < ipr->ipr_nchains; chain++) 393 mutex_destroy(&(ipr->ipr_hash[chain].hash_lock)); 394 395 } 396 ipsec_polhead_free_table(iph); 397 } 398 399 /* 400 * Module unload hook. 401 */ 402 void 403 ipsec_policy_destroy(void) 404 { 405 int i; 406 void *cookie; 407 ipsec_tun_pol_t *node; 408 409 ip_drop_unregister(&spd_dropper); 410 ip_drop_destroy(); 411 412 rw_enter(&tunnel_policy_lock, RW_WRITER); 413 /* 414 * It's possible we can just ASSERT() the tree is empty. After all, 415 * we aren't called until IP is ready to unload (and presumably all 416 * tunnels have been unplumbed). But we'll play it safe for now, the 417 * loop will just exit immediately if it's empty. 418 */ 419 cookie = NULL; 420 while ((node = (ipsec_tun_pol_t *) 421 avl_destroy_nodes(&tunnel_policies, &cookie)) != NULL) { 422 ITP_REFRELE(node); 423 } 424 avl_destroy(&tunnel_policies); 425 rw_exit(&tunnel_policy_lock); 426 rw_destroy(&tunnel_policy_lock); 427 ipsec_polhead_destroy(&system_policy); 428 ipsec_polhead_destroy(&inactive_policy); 429 430 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) 431 mutex_destroy(&(ipsec_action_hash[i].hash_lock)); 432 433 for (i = 0; i < ipsec_spd_hashsize; i++) 434 mutex_destroy(&(ipsec_sel_hash[i].hash_lock)); 435 436 ipsec_unregister_prov_update(); 437 438 mutex_destroy(&alg_lock); 439 440 kmem_cache_destroy(ipsec_action_cache); 441 kmem_cache_destroy(ipsec_sel_cache); 442 kmem_cache_destroy(ipsec_pol_cache); 443 kmem_cache_destroy(ipsec_info_cache); 444 ipsid_gc(); 445 ipsid_fini(); 446 } 447 448 449 /* 450 * Called when table allocation fails to free the table. 451 */ 452 static int 453 ipsec_alloc_tables_failed() 454 { 455 if (ipsec_sel_hash != NULL) { 456 kmem_free(ipsec_sel_hash, ipsec_spd_hashsize * 457 sizeof (*ipsec_sel_hash)); 458 ipsec_sel_hash = NULL; 459 } 460 ipsec_polhead_free_table(&system_policy); 461 ipsec_polhead_free_table(&inactive_policy); 462 463 return (ENOMEM); 464 } 465 466 /* 467 * Attempt to allocate the tables in a single policy head. 468 * Return nonzero on failure after cleaning up any work in progress. 469 */ 470 int 471 ipsec_alloc_table(ipsec_policy_head_t *iph, int nchains, int kmflag, 472 boolean_t global_cleanup) 473 { 474 int dir; 475 476 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 477 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 478 479 ipr->ipr_nchains = nchains; 480 ipr->ipr_hash = kmem_zalloc(nchains * 481 sizeof (ipsec_policy_hash_t), kmflag); 482 if (ipr->ipr_hash == NULL) 483 return (global_cleanup ? ipsec_alloc_tables_failed() : 484 ENOMEM); 485 } 486 return (0); 487 } 488 489 /* 490 * Attempt to allocate the various tables. Return nonzero on failure 491 * after cleaning up any work in progress. 492 */ 493 static int 494 ipsec_alloc_tables(int kmflag) 495 { 496 int error; 497 498 error = ipsec_alloc_table(&system_policy, ipsec_spd_hashsize, kmflag, 499 B_TRUE); 500 if (error != 0) 501 return (error); 502 503 error = ipsec_alloc_table(&inactive_policy, ipsec_spd_hashsize, kmflag, 504 B_TRUE); 505 if (error != 0) 506 return (error); 507 508 ipsec_sel_hash = kmem_zalloc(ipsec_spd_hashsize * 509 sizeof (*ipsec_sel_hash), kmflag); 510 511 if (ipsec_sel_hash == NULL) 512 return (ipsec_alloc_tables_failed()); 513 514 return (0); 515 } 516 517 /* 518 * After table allocation, initialize a policy head. 519 */ 520 void 521 ipsec_polhead_init(ipsec_policy_head_t *iph, int nchains) 522 { 523 int dir, chain; 524 525 rw_init(&iph->iph_lock, NULL, RW_DEFAULT, NULL); 526 avl_create(&iph->iph_rulebyid, ipsec_policy_cmpbyid, 527 sizeof (ipsec_policy_t), offsetof(ipsec_policy_t, ipsp_byid)); 528 529 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 530 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 531 ipr->ipr_nchains = nchains; 532 533 for (chain = 0; chain < nchains; chain++) { 534 mutex_init(&(ipr->ipr_hash[chain].hash_lock), 535 NULL, MUTEX_DEFAULT, NULL); 536 } 537 } 538 } 539 540 /* 541 * Module load hook. 542 */ 543 void 544 ipsec_policy_init() 545 { 546 int i; 547 548 /* 549 * Make two attempts to allocate policy hash tables; try it at 550 * the "preferred" size (may be set in /etc/system) first, 551 * then fall back to the default size. 552 */ 553 if (ipsec_spd_hashsize == 0) 554 ipsec_spd_hashsize = IPSEC_SPDHASH_DEFAULT; 555 556 if (ipsec_alloc_tables(KM_NOSLEEP) != 0) { 557 cmn_err(CE_WARN, 558 "Unable to allocate %d entry IPsec policy hash table", 559 ipsec_spd_hashsize); 560 ipsec_spd_hashsize = IPSEC_SPDHASH_DEFAULT; 561 cmn_err(CE_WARN, "Falling back to %d entries", 562 ipsec_spd_hashsize); 563 (void) ipsec_alloc_tables(KM_SLEEP); 564 } 565 566 /* Just set a default for tunnels. */ 567 if (tun_spd_hashsize == 0) 568 tun_spd_hashsize = TUN_SPDHASH_DEFAULT; 569 570 ipsid_init(); 571 /* 572 * Globals need ref == 1 to prevent IPPH_REFRELE() from attempting 573 * to free them. 574 */ 575 system_policy.iph_refs = 1; 576 inactive_policy.iph_refs = 1; 577 ipsec_polhead_init(&system_policy, ipsec_spd_hashsize); 578 ipsec_polhead_init(&inactive_policy, ipsec_spd_hashsize); 579 rw_init(&tunnel_policy_lock, NULL, RW_DEFAULT, NULL); 580 avl_create(&tunnel_policies, tunnel_compare, sizeof (ipsec_tun_pol_t), 581 0); 582 583 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) 584 mutex_init(&(ipsec_action_hash[i].hash_lock), 585 NULL, MUTEX_DEFAULT, NULL); 586 587 for (i = 0; i < ipsec_spd_hashsize; i++) 588 mutex_init(&(ipsec_sel_hash[i].hash_lock), 589 NULL, MUTEX_DEFAULT, NULL); 590 591 mutex_init(&alg_lock, NULL, MUTEX_DEFAULT, NULL); 592 593 for (i = 0; i < IPSEC_NALGTYPES; i++) 594 ipsec_nalgs[i] = 0; 595 596 ipsec_action_cache = kmem_cache_create("ipsec_actions", 597 sizeof (ipsec_action_t), _POINTER_ALIGNMENT, NULL, NULL, 598 ipsec_action_reclaim, NULL, NULL, 0); 599 ipsec_sel_cache = kmem_cache_create("ipsec_selectors", 600 sizeof (ipsec_sel_t), _POINTER_ALIGNMENT, NULL, NULL, 601 NULL, NULL, NULL, 0); 602 ipsec_pol_cache = kmem_cache_create("ipsec_policy", 603 sizeof (ipsec_policy_t), _POINTER_ALIGNMENT, NULL, NULL, 604 NULL, NULL, NULL, 0); 605 ipsec_info_cache = kmem_cache_create("ipsec_info", 606 sizeof (ipsec_info_t), _POINTER_ALIGNMENT, NULL, NULL, 607 NULL, NULL, NULL, 0); 608 609 ip_drop_init(); 610 ip_drop_register(&spd_dropper, "IPsec SPD"); 611 612 /* Set function to dummy until tun is loaded */ 613 rw_init(&itp_get_byaddr_rw_lock, NULL, RW_DEFAULT, NULL); 614 rw_enter(&itp_get_byaddr_rw_lock, RW_WRITER); 615 itp_get_byaddr = itp_get_byaddr_dummy; 616 rw_exit(&itp_get_byaddr_rw_lock); 617 } 618 619 /* 620 * Sort algorithm lists. 621 * 622 * I may need to split this based on 623 * authentication/encryption, and I may wish to have an administrator 624 * configure this list. Hold on to some NDD variables... 625 * 626 * XXX For now, sort on minimum key size (GAG!). While minimum key size is 627 * not the ideal metric, it's the only quantifiable measure available. 628 * We need a better metric for sorting algorithms by preference. 629 */ 630 static void 631 alg_insert_sortlist(enum ipsec_algtype at, uint8_t algid) 632 { 633 ipsec_alginfo_t *ai = ipsec_alglists[at][algid]; 634 uint8_t holder, swap; 635 uint_t i; 636 uint_t count = ipsec_nalgs[at]; 637 ASSERT(ai != NULL); 638 ASSERT(algid == ai->alg_id); 639 640 ASSERT(MUTEX_HELD(&alg_lock)); 641 642 holder = algid; 643 644 for (i = 0; i < count - 1; i++) { 645 ipsec_alginfo_t *alt; 646 647 alt = ipsec_alglists[at][ipsec_sortlist[at][i]]; 648 /* 649 * If you want to give precedence to newly added algs, 650 * add the = in the > comparison. 651 */ 652 if ((holder != algid) || (ai->alg_minbits > alt->alg_minbits)) { 653 /* Swap sortlist[i] and holder. */ 654 swap = ipsec_sortlist[at][i]; 655 ipsec_sortlist[at][i] = holder; 656 holder = swap; 657 ai = alt; 658 } /* Else just continue. */ 659 } 660 661 /* Store holder in last slot. */ 662 ipsec_sortlist[at][i] = holder; 663 } 664 665 /* 666 * Remove an algorithm from a sorted algorithm list. 667 * This should be considerably easier, even with complex sorting. 668 */ 669 static void 670 alg_remove_sortlist(enum ipsec_algtype at, uint8_t algid) 671 { 672 boolean_t copyback = B_FALSE; 673 int i; 674 int newcount = ipsec_nalgs[at]; 675 676 ASSERT(MUTEX_HELD(&alg_lock)); 677 678 for (i = 0; i <= newcount; i++) { 679 if (copyback) 680 ipsec_sortlist[at][i-1] = ipsec_sortlist[at][i]; 681 else if (ipsec_sortlist[at][i] == algid) 682 copyback = B_TRUE; 683 } 684 } 685 686 /* 687 * Add the specified algorithm to the algorithm tables. 688 * Must be called while holding the algorithm table writer lock. 689 */ 690 void 691 ipsec_alg_reg(ipsec_algtype_t algtype, ipsec_alginfo_t *alg) 692 { 693 ASSERT(MUTEX_HELD(&alg_lock)); 694 695 ASSERT(ipsec_alglists[algtype][alg->alg_id] == NULL); 696 ipsec_alg_fix_min_max(alg, algtype); 697 ipsec_alglists[algtype][alg->alg_id] = alg; 698 699 ipsec_nalgs[algtype]++; 700 alg_insert_sortlist(algtype, alg->alg_id); 701 } 702 703 /* 704 * Remove the specified algorithm from the algorithm tables. 705 * Must be called while holding the algorithm table writer lock. 706 */ 707 void 708 ipsec_alg_unreg(ipsec_algtype_t algtype, uint8_t algid) 709 { 710 ASSERT(MUTEX_HELD(&alg_lock)); 711 712 ASSERT(ipsec_alglists[algtype][algid] != NULL); 713 ipsec_alg_free(ipsec_alglists[algtype][algid]); 714 ipsec_alglists[algtype][algid] = NULL; 715 716 ipsec_nalgs[algtype]--; 717 alg_remove_sortlist(algtype, algid); 718 } 719 720 /* 721 * Hooks for spdsock to get a grip on system policy. 722 */ 723 724 ipsec_policy_head_t * 725 ipsec_system_policy(void) 726 { 727 ipsec_policy_head_t *h = &system_policy; 728 IPPH_REFHOLD(h); 729 return (h); 730 } 731 732 ipsec_policy_head_t * 733 ipsec_inactive_policy(void) 734 { 735 ipsec_policy_head_t *h = &inactive_policy; 736 IPPH_REFHOLD(h); 737 return (h); 738 } 739 740 /* 741 * Lock inactive policy, then active policy, then exchange policy root 742 * pointers. 743 */ 744 void 745 ipsec_swap_policy(ipsec_policy_head_t *active, ipsec_policy_head_t *inactive) 746 { 747 int af, dir; 748 avl_tree_t r1, r2; 749 750 rw_enter(&inactive->iph_lock, RW_WRITER); 751 rw_enter(&active->iph_lock, RW_WRITER); 752 753 r1 = active->iph_rulebyid; 754 r2 = inactive->iph_rulebyid; 755 active->iph_rulebyid = r2; 756 inactive->iph_rulebyid = r1; 757 758 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 759 ipsec_policy_hash_t *h1, *h2; 760 761 h1 = active->iph_root[dir].ipr_hash; 762 h2 = inactive->iph_root[dir].ipr_hash; 763 active->iph_root[dir].ipr_hash = h2; 764 inactive->iph_root[dir].ipr_hash = h1; 765 766 for (af = 0; af < IPSEC_NAF; af++) { 767 ipsec_policy_t *t1, *t2; 768 769 t1 = active->iph_root[dir].ipr_nonhash[af]; 770 t2 = inactive->iph_root[dir].ipr_nonhash[af]; 771 active->iph_root[dir].ipr_nonhash[af] = t2; 772 inactive->iph_root[dir].ipr_nonhash[af] = t1; 773 if (t1 != NULL) { 774 t1->ipsp_hash.hash_pp = 775 &(inactive->iph_root[dir].ipr_nonhash[af]); 776 } 777 if (t2 != NULL) { 778 t2->ipsp_hash.hash_pp = 779 &(active->iph_root[dir].ipr_nonhash[af]); 780 } 781 782 } 783 } 784 active->iph_gen++; 785 inactive->iph_gen++; 786 ipsec_update_present_flags(); 787 rw_exit(&active->iph_lock); 788 rw_exit(&inactive->iph_lock); 789 } 790 791 /* 792 * Swap global policy primary/secondary. 793 */ 794 void 795 ipsec_swap_global_policy(void) 796 { 797 ipsec_swap_policy(&system_policy, &inactive_policy); 798 } 799 800 /* 801 * Clone one policy rule.. 802 */ 803 static ipsec_policy_t * 804 ipsec_copy_policy(const ipsec_policy_t *src) 805 { 806 ipsec_policy_t *dst = kmem_cache_alloc(ipsec_pol_cache, KM_NOSLEEP); 807 808 if (dst == NULL) 809 return (NULL); 810 811 /* 812 * Adjust refcounts of cloned state. 813 */ 814 IPACT_REFHOLD(src->ipsp_act); 815 src->ipsp_sel->ipsl_refs++; 816 817 HASH_NULL(dst, ipsp_hash); 818 dst->ipsp_refs = 1; 819 dst->ipsp_sel = src->ipsp_sel; 820 dst->ipsp_act = src->ipsp_act; 821 dst->ipsp_prio = src->ipsp_prio; 822 dst->ipsp_index = src->ipsp_index; 823 824 return (dst); 825 } 826 827 void 828 ipsec_insert_always(avl_tree_t *tree, void *new_node) 829 { 830 void *node; 831 avl_index_t where; 832 833 node = avl_find(tree, new_node, &where); 834 ASSERT(node == NULL); 835 avl_insert(tree, new_node, where); 836 } 837 838 839 static int 840 ipsec_copy_chain(ipsec_policy_head_t *dph, ipsec_policy_t *src, 841 ipsec_policy_t **dstp) 842 { 843 for (; src != NULL; src = src->ipsp_hash.hash_next) { 844 ipsec_policy_t *dst = ipsec_copy_policy(src); 845 if (dst == NULL) 846 return (ENOMEM); 847 848 HASHLIST_INSERT(dst, ipsp_hash, *dstp); 849 ipsec_insert_always(&dph->iph_rulebyid, dst); 850 } 851 return (0); 852 } 853 854 855 856 /* 857 * Make one policy head look exactly like another. 858 * 859 * As with ipsec_swap_policy, we lock the destination policy head first, then 860 * the source policy head. Note that we only need to read-lock the source 861 * policy head as we are not changing it. 862 */ 863 int 864 ipsec_copy_polhead(ipsec_policy_head_t *sph, ipsec_policy_head_t *dph) 865 { 866 int af, dir, chain, nchains; 867 868 rw_enter(&dph->iph_lock, RW_WRITER); 869 870 ipsec_polhead_flush(dph); 871 872 rw_enter(&sph->iph_lock, RW_READER); 873 874 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 875 ipsec_policy_root_t *dpr = &dph->iph_root[dir]; 876 ipsec_policy_root_t *spr = &sph->iph_root[dir]; 877 nchains = dpr->ipr_nchains; 878 879 ASSERT(dpr->ipr_nchains == spr->ipr_nchains); 880 881 for (af = 0; af < IPSEC_NAF; af++) { 882 if (ipsec_copy_chain(dph, spr->ipr_nonhash[af], 883 &dpr->ipr_nonhash[af])) 884 goto abort_copy; 885 } 886 887 for (chain = 0; chain < nchains; chain++) { 888 if (ipsec_copy_chain(dph, 889 spr->ipr_hash[chain].hash_head, 890 &dpr->ipr_hash[chain].hash_head)) 891 goto abort_copy; 892 } 893 } 894 895 dph->iph_gen++; 896 897 rw_exit(&sph->iph_lock); 898 rw_exit(&dph->iph_lock); 899 return (0); 900 901 abort_copy: 902 ipsec_polhead_flush(dph); 903 rw_exit(&sph->iph_lock); 904 rw_exit(&dph->iph_lock); 905 return (ENOMEM); 906 } 907 908 /* 909 * Clone currently active policy to the inactive policy list. 910 */ 911 int 912 ipsec_clone_system_policy(void) 913 { 914 return (ipsec_copy_polhead(&system_policy, &inactive_policy)); 915 } 916 917 /* 918 * Generic "do we have IPvN policy" answer. 919 */ 920 boolean_t 921 iph_ipvN(ipsec_policy_head_t *iph, boolean_t v6) 922 { 923 int i, hval; 924 uint32_t valbit; 925 ipsec_policy_root_t *ipr; 926 ipsec_policy_t *ipp; 927 928 if (v6) { 929 valbit = IPSL_IPV6; 930 hval = IPSEC_AF_V6; 931 } else { 932 valbit = IPSL_IPV4; 933 hval = IPSEC_AF_V4; 934 } 935 936 ASSERT(RW_LOCK_HELD(&iph->iph_lock)); 937 for (ipr = iph->iph_root; ipr < &(iph->iph_root[IPSEC_NTYPES]); ipr++) { 938 if (ipr->ipr_nonhash[hval] != NULL) 939 return (B_TRUE); 940 for (i = 0; i < ipr->ipr_nchains; i++) { 941 for (ipp = ipr->ipr_hash[i].hash_head; ipp != NULL; 942 ipp = ipp->ipsp_hash.hash_next) { 943 if (ipp->ipsp_sel->ipsl_key.ipsl_valid & valbit) 944 return (B_TRUE); 945 } 946 } 947 } 948 949 return (B_FALSE); 950 } 951 952 /* 953 * Extract the string from ipsec_policy_failure_msgs[type] and 954 * log it. 955 * 956 */ 957 void 958 ipsec_log_policy_failure(int type, char *func_name, ipha_t *ipha, ip6_t *ip6h, 959 boolean_t secure) 960 { 961 char sbuf[INET6_ADDRSTRLEN]; 962 char dbuf[INET6_ADDRSTRLEN]; 963 char *s; 964 char *d; 965 966 ASSERT((ipha == NULL && ip6h != NULL) || 967 (ip6h == NULL && ipha != NULL)); 968 969 if (ipha != NULL) { 970 s = inet_ntop(AF_INET, &ipha->ipha_src, sbuf, sizeof (sbuf)); 971 d = inet_ntop(AF_INET, &ipha->ipha_dst, dbuf, sizeof (dbuf)); 972 } else { 973 s = inet_ntop(AF_INET6, &ip6h->ip6_src, sbuf, sizeof (sbuf)); 974 d = inet_ntop(AF_INET6, &ip6h->ip6_dst, dbuf, sizeof (dbuf)); 975 976 } 977 978 /* Always bump the policy failure counter. */ 979 ipsec_policy_failure_count[type]++; 980 981 ipsec_rl_strlog(IP_MOD_ID, 0, 0, SL_ERROR|SL_WARN|SL_CONSOLE, 982 ipsec_policy_failure_msgs[type], func_name, 983 (secure ? "secure" : "not secure"), s, d); 984 } 985 986 /* 987 * Rate-limiting front-end to strlog() for AH and ESP. Uses the ndd variables 988 * in /dev/ip and the same rate-limiting clock so that there's a single 989 * knob to turn to throttle the rate of messages. 990 */ 991 void 992 ipsec_rl_strlog(short mid, short sid, char level, ushort_t sl, char *fmt, ...) 993 { 994 va_list adx; 995 hrtime_t current = gethrtime(); 996 997 sl |= SL_CONSOLE; 998 /* 999 * Throttle logging to stop syslog from being swamped. If variable 1000 * 'ipsec_policy_log_interval' is zero, don't log any messages at 1001 * all, otherwise log only one message every 'ipsec_policy_log_interval' 1002 * msec. Convert interval (in msec) to hrtime (in nsec). 1003 */ 1004 1005 if (ipsec_policy_log_interval) { 1006 if (ipsec_policy_failure_last + 1007 ((hrtime_t)ipsec_policy_log_interval * (hrtime_t)1000000) <= 1008 current) { 1009 va_start(adx, fmt); 1010 (void) vstrlog(mid, sid, level, sl, fmt, adx); 1011 va_end(adx); 1012 ipsec_policy_failure_last = current; 1013 } 1014 } 1015 } 1016 1017 void 1018 ipsec_config_flush() 1019 { 1020 rw_enter(&system_policy.iph_lock, RW_WRITER); 1021 ipsec_polhead_flush(&system_policy); 1022 ipsec_next_policy_index = 1; 1023 rw_exit(&system_policy.iph_lock); 1024 ipsec_action_reclaim(0); 1025 } 1026 1027 /* 1028 * Clip a policy's min/max keybits vs. the capabilities of the 1029 * algorithm. 1030 */ 1031 static void 1032 act_alg_adjust(uint_t algtype, uint_t algid, 1033 uint16_t *minbits, uint16_t *maxbits) 1034 { 1035 ipsec_alginfo_t *algp = ipsec_alglists[algtype][algid]; 1036 if (algp != NULL) { 1037 /* 1038 * If passed-in minbits is zero, we assume the caller trusts 1039 * us with setting the minimum key size. We pick the 1040 * algorithms DEFAULT key size for the minimum in this case. 1041 */ 1042 if (*minbits == 0) { 1043 *minbits = algp->alg_default_bits; 1044 ASSERT(*minbits >= algp->alg_minbits); 1045 } else { 1046 *minbits = MAX(MIN(*minbits, algp->alg_maxbits), 1047 algp->alg_minbits); 1048 } 1049 if (*maxbits == 0) 1050 *maxbits = algp->alg_maxbits; 1051 else 1052 *maxbits = MIN(MAX(*maxbits, algp->alg_minbits), 1053 algp->alg_maxbits); 1054 ASSERT(*minbits <= *maxbits); 1055 } else { 1056 *minbits = 0; 1057 *maxbits = 0; 1058 } 1059 } 1060 1061 /* 1062 * Check an action's requested algorithms against the algorithms currently 1063 * loaded in the system. 1064 */ 1065 boolean_t 1066 ipsec_check_action(ipsec_act_t *act, int *diag) 1067 { 1068 ipsec_prot_t *ipp; 1069 1070 ipp = &act->ipa_apply; 1071 1072 if (ipp->ipp_use_ah && 1073 ipsec_alglists[IPSEC_ALG_AUTH][ipp->ipp_auth_alg] == NULL) { 1074 *diag = SPD_DIAGNOSTIC_UNSUPP_AH_ALG; 1075 return (B_FALSE); 1076 } 1077 if (ipp->ipp_use_espa && 1078 ipsec_alglists[IPSEC_ALG_AUTH][ipp->ipp_esp_auth_alg] == NULL) { 1079 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_AUTH_ALG; 1080 return (B_FALSE); 1081 } 1082 if (ipp->ipp_use_esp && 1083 ipsec_alglists[IPSEC_ALG_ENCR][ipp->ipp_encr_alg] == NULL) { 1084 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_ENCR_ALG; 1085 return (B_FALSE); 1086 } 1087 1088 act_alg_adjust(IPSEC_ALG_AUTH, ipp->ipp_auth_alg, 1089 &ipp->ipp_ah_minbits, &ipp->ipp_ah_maxbits); 1090 act_alg_adjust(IPSEC_ALG_AUTH, ipp->ipp_esp_auth_alg, 1091 &ipp->ipp_espa_minbits, &ipp->ipp_espa_maxbits); 1092 act_alg_adjust(IPSEC_ALG_ENCR, ipp->ipp_encr_alg, 1093 &ipp->ipp_espe_minbits, &ipp->ipp_espe_maxbits); 1094 1095 if (ipp->ipp_ah_minbits > ipp->ipp_ah_maxbits) { 1096 *diag = SPD_DIAGNOSTIC_UNSUPP_AH_KEYSIZE; 1097 return (B_FALSE); 1098 } 1099 if (ipp->ipp_espa_minbits > ipp->ipp_espa_maxbits) { 1100 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_AUTH_KEYSIZE; 1101 return (B_FALSE); 1102 } 1103 if (ipp->ipp_espe_minbits > ipp->ipp_espe_maxbits) { 1104 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_ENCR_KEYSIZE; 1105 return (B_FALSE); 1106 } 1107 /* TODO: sanity check lifetimes */ 1108 return (B_TRUE); 1109 } 1110 1111 /* 1112 * Set up a single action during wildcard expansion.. 1113 */ 1114 static void 1115 ipsec_setup_act(ipsec_act_t *outact, ipsec_act_t *act, 1116 uint_t auth_alg, uint_t encr_alg, uint_t eauth_alg) 1117 { 1118 ipsec_prot_t *ipp; 1119 1120 *outact = *act; 1121 ipp = &outact->ipa_apply; 1122 ipp->ipp_auth_alg = (uint8_t)auth_alg; 1123 ipp->ipp_encr_alg = (uint8_t)encr_alg; 1124 ipp->ipp_esp_auth_alg = (uint8_t)eauth_alg; 1125 1126 act_alg_adjust(IPSEC_ALG_AUTH, auth_alg, 1127 &ipp->ipp_ah_minbits, &ipp->ipp_ah_maxbits); 1128 act_alg_adjust(IPSEC_ALG_AUTH, eauth_alg, 1129 &ipp->ipp_espa_minbits, &ipp->ipp_espa_maxbits); 1130 act_alg_adjust(IPSEC_ALG_ENCR, encr_alg, 1131 &ipp->ipp_espe_minbits, &ipp->ipp_espe_maxbits); 1132 } 1133 1134 /* 1135 * combinatoric expansion time: expand a wildcarded action into an 1136 * array of wildcarded actions; we return the exploded action list, 1137 * and return a count in *nact (output only). 1138 */ 1139 static ipsec_act_t * 1140 ipsec_act_wildcard_expand(ipsec_act_t *act, uint_t *nact) 1141 { 1142 boolean_t use_ah, use_esp, use_espa; 1143 boolean_t wild_auth, wild_encr, wild_eauth; 1144 uint_t auth_alg, auth_idx, auth_min, auth_max; 1145 uint_t eauth_alg, eauth_idx, eauth_min, eauth_max; 1146 uint_t encr_alg, encr_idx, encr_min, encr_max; 1147 uint_t action_count, ai; 1148 ipsec_act_t *outact; 1149 1150 if (act->ipa_type != IPSEC_ACT_APPLY) { 1151 outact = kmem_alloc(sizeof (*act), KM_NOSLEEP); 1152 *nact = 1; 1153 if (outact != NULL) 1154 bcopy(act, outact, sizeof (*act)); 1155 return (outact); 1156 } 1157 /* 1158 * compute the combinatoric explosion.. 1159 * 1160 * we assume a request for encr if esp_req is PREF_REQUIRED 1161 * we assume a request for ah auth if ah_req is PREF_REQUIRED. 1162 * we assume a request for esp auth if !ah and esp_req is PREF_REQUIRED 1163 */ 1164 1165 use_ah = act->ipa_apply.ipp_use_ah; 1166 use_esp = act->ipa_apply.ipp_use_esp; 1167 use_espa = act->ipa_apply.ipp_use_espa; 1168 auth_alg = act->ipa_apply.ipp_auth_alg; 1169 eauth_alg = act->ipa_apply.ipp_esp_auth_alg; 1170 encr_alg = act->ipa_apply.ipp_encr_alg; 1171 1172 wild_auth = use_ah && (auth_alg == 0); 1173 wild_eauth = use_espa && (eauth_alg == 0); 1174 wild_encr = use_esp && (encr_alg == 0); 1175 1176 action_count = 1; 1177 auth_min = auth_max = auth_alg; 1178 eauth_min = eauth_max = eauth_alg; 1179 encr_min = encr_max = encr_alg; 1180 1181 /* 1182 * set up for explosion.. for each dimension, expand output 1183 * size by the explosion factor. 1184 * 1185 * Don't include the "any" algorithms, if defined, as no 1186 * kernel policies should be set for these algorithms. 1187 */ 1188 1189 #define SET_EXP_MINMAX(type, wild, alg, min, max) if (wild) { \ 1190 int nalgs = ipsec_nalgs[type]; \ 1191 if (ipsec_alglists[type][alg] != NULL) \ 1192 nalgs--; \ 1193 action_count *= nalgs; \ 1194 min = 0; \ 1195 max = ipsec_nalgs[type] - 1; \ 1196 } 1197 1198 SET_EXP_MINMAX(IPSEC_ALG_AUTH, wild_auth, SADB_AALG_NONE, 1199 auth_min, auth_max); 1200 SET_EXP_MINMAX(IPSEC_ALG_AUTH, wild_eauth, SADB_AALG_NONE, 1201 eauth_min, eauth_max); 1202 SET_EXP_MINMAX(IPSEC_ALG_ENCR, wild_encr, SADB_EALG_NONE, 1203 encr_min, encr_max); 1204 1205 #undef SET_EXP_MINMAX 1206 1207 /* 1208 * ok, allocate the whole mess.. 1209 */ 1210 1211 outact = kmem_alloc(sizeof (*outact) * action_count, KM_NOSLEEP); 1212 if (outact == NULL) 1213 return (NULL); 1214 1215 /* 1216 * Now compute all combinations. Note that non-wildcarded 1217 * dimensions just get a single value from auth_min, while 1218 * wildcarded dimensions indirect through the sortlist. 1219 * 1220 * We do encryption outermost since, at this time, there's 1221 * greater difference in security and performance between 1222 * encryption algorithms vs. authentication algorithms. 1223 */ 1224 1225 ai = 0; 1226 1227 #define WHICH_ALG(type, wild, idx) ((wild)?(ipsec_sortlist[type][idx]):(idx)) 1228 1229 for (encr_idx = encr_min; encr_idx <= encr_max; encr_idx++) { 1230 encr_alg = WHICH_ALG(IPSEC_ALG_ENCR, wild_encr, encr_idx); 1231 if (wild_encr && encr_alg == SADB_EALG_NONE) 1232 continue; 1233 for (auth_idx = auth_min; auth_idx <= auth_max; auth_idx++) { 1234 auth_alg = WHICH_ALG(IPSEC_ALG_AUTH, wild_auth, 1235 auth_idx); 1236 if (wild_auth && auth_alg == SADB_AALG_NONE) 1237 continue; 1238 for (eauth_idx = eauth_min; eauth_idx <= eauth_max; 1239 eauth_idx++) { 1240 eauth_alg = WHICH_ALG(IPSEC_ALG_AUTH, 1241 wild_eauth, eauth_idx); 1242 if (wild_eauth && eauth_alg == SADB_AALG_NONE) 1243 continue; 1244 1245 ipsec_setup_act(&outact[ai], act, 1246 auth_alg, encr_alg, eauth_alg); 1247 ai++; 1248 } 1249 } 1250 } 1251 1252 #undef WHICH_ALG 1253 1254 ASSERT(ai == action_count); 1255 *nact = action_count; 1256 return (outact); 1257 } 1258 1259 /* 1260 * Extract the parts of an ipsec_prot_t from an old-style ipsec_req_t. 1261 */ 1262 static void 1263 ipsec_prot_from_req(ipsec_req_t *req, ipsec_prot_t *ipp) 1264 { 1265 bzero(ipp, sizeof (*ipp)); 1266 /* 1267 * ipp_use_* are bitfields. Look at "!!" in the following as a 1268 * "boolean canonicalization" operator. 1269 */ 1270 ipp->ipp_use_ah = !!(req->ipsr_ah_req & IPSEC_PREF_REQUIRED); 1271 ipp->ipp_use_esp = !!(req->ipsr_esp_req & IPSEC_PREF_REQUIRED); 1272 ipp->ipp_use_espa = !!(req->ipsr_esp_auth_alg) || !ipp->ipp_use_ah; 1273 ipp->ipp_use_se = !!(req->ipsr_self_encap_req & IPSEC_PREF_REQUIRED); 1274 ipp->ipp_use_unique = !!((req->ipsr_ah_req|req->ipsr_esp_req) & 1275 IPSEC_PREF_UNIQUE); 1276 ipp->ipp_encr_alg = req->ipsr_esp_alg; 1277 ipp->ipp_auth_alg = req->ipsr_auth_alg; 1278 ipp->ipp_esp_auth_alg = req->ipsr_esp_auth_alg; 1279 } 1280 1281 /* 1282 * Extract a new-style action from a request. 1283 */ 1284 void 1285 ipsec_actvec_from_req(ipsec_req_t *req, ipsec_act_t **actp, uint_t *nactp) 1286 { 1287 struct ipsec_act act; 1288 bzero(&act, sizeof (act)); 1289 if ((req->ipsr_ah_req & IPSEC_PREF_NEVER) && 1290 (req->ipsr_esp_req & IPSEC_PREF_NEVER)) { 1291 act.ipa_type = IPSEC_ACT_BYPASS; 1292 } else { 1293 act.ipa_type = IPSEC_ACT_APPLY; 1294 ipsec_prot_from_req(req, &act.ipa_apply); 1295 } 1296 *actp = ipsec_act_wildcard_expand(&act, nactp); 1297 } 1298 1299 /* 1300 * Convert a new-style "prot" back to an ipsec_req_t (more backwards compat). 1301 * We assume caller has already zero'ed *req for us. 1302 */ 1303 static int 1304 ipsec_req_from_prot(ipsec_prot_t *ipp, ipsec_req_t *req) 1305 { 1306 req->ipsr_esp_alg = ipp->ipp_encr_alg; 1307 req->ipsr_auth_alg = ipp->ipp_auth_alg; 1308 req->ipsr_esp_auth_alg = ipp->ipp_esp_auth_alg; 1309 1310 if (ipp->ipp_use_unique) { 1311 req->ipsr_ah_req |= IPSEC_PREF_UNIQUE; 1312 req->ipsr_esp_req |= IPSEC_PREF_UNIQUE; 1313 } 1314 if (ipp->ipp_use_se) 1315 req->ipsr_self_encap_req |= IPSEC_PREF_REQUIRED; 1316 if (ipp->ipp_use_ah) 1317 req->ipsr_ah_req |= IPSEC_PREF_REQUIRED; 1318 if (ipp->ipp_use_esp) 1319 req->ipsr_esp_req |= IPSEC_PREF_REQUIRED; 1320 return (sizeof (*req)); 1321 } 1322 1323 /* 1324 * Convert a new-style action back to an ipsec_req_t (more backwards compat). 1325 * We assume caller has already zero'ed *req for us. 1326 */ 1327 static int 1328 ipsec_req_from_act(ipsec_action_t *ap, ipsec_req_t *req) 1329 { 1330 switch (ap->ipa_act.ipa_type) { 1331 case IPSEC_ACT_BYPASS: 1332 req->ipsr_ah_req = IPSEC_PREF_NEVER; 1333 req->ipsr_esp_req = IPSEC_PREF_NEVER; 1334 return (sizeof (*req)); 1335 case IPSEC_ACT_APPLY: 1336 return (ipsec_req_from_prot(&ap->ipa_act.ipa_apply, req)); 1337 } 1338 return (sizeof (*req)); 1339 } 1340 1341 /* 1342 * Convert a new-style action back to an ipsec_req_t (more backwards compat). 1343 * We assume caller has already zero'ed *req for us. 1344 */ 1345 int 1346 ipsec_req_from_head(ipsec_policy_head_t *ph, ipsec_req_t *req, int af) 1347 { 1348 ipsec_policy_t *p; 1349 1350 /* 1351 * FULL-PERSOCK: consult hash table, too? 1352 */ 1353 for (p = ph->iph_root[IPSEC_INBOUND].ipr_nonhash[af]; 1354 p != NULL; 1355 p = p->ipsp_hash.hash_next) { 1356 if ((p->ipsp_sel->ipsl_key.ipsl_valid & IPSL_WILDCARD) == 0) 1357 return (ipsec_req_from_act(p->ipsp_act, req)); 1358 } 1359 return (sizeof (*req)); 1360 } 1361 1362 /* 1363 * Based on per-socket or latched policy, convert to an appropriate 1364 * IP_SEC_OPT ipsec_req_t for the socket option; return size so we can 1365 * be tail-called from ip. 1366 */ 1367 int 1368 ipsec_req_from_conn(conn_t *connp, ipsec_req_t *req, int af) 1369 { 1370 ipsec_latch_t *ipl; 1371 int rv = sizeof (ipsec_req_t); 1372 1373 bzero(req, sizeof (*req)); 1374 1375 mutex_enter(&connp->conn_lock); 1376 ipl = connp->conn_latch; 1377 1378 /* 1379 * Find appropriate policy. First choice is latched action; 1380 * failing that, see latched policy; failing that, 1381 * look at configured policy. 1382 */ 1383 if (ipl != NULL) { 1384 if (ipl->ipl_in_action != NULL) { 1385 rv = ipsec_req_from_act(ipl->ipl_in_action, req); 1386 goto done; 1387 } 1388 if (ipl->ipl_in_policy != NULL) { 1389 rv = ipsec_req_from_act(ipl->ipl_in_policy->ipsp_act, 1390 req); 1391 goto done; 1392 } 1393 } 1394 if (connp->conn_policy != NULL) 1395 rv = ipsec_req_from_head(connp->conn_policy, req, af); 1396 done: 1397 mutex_exit(&connp->conn_lock); 1398 return (rv); 1399 } 1400 1401 void 1402 ipsec_actvec_free(ipsec_act_t *act, uint_t nact) 1403 { 1404 kmem_free(act, nact * sizeof (*act)); 1405 } 1406 1407 /* 1408 * When outbound policy is not cached, look it up the hard way and attach 1409 * an ipsec_out_t to the packet.. 1410 */ 1411 static mblk_t * 1412 ipsec_attach_global_policy(mblk_t *mp, conn_t *connp, ipsec_selector_t *sel) 1413 { 1414 ipsec_policy_t *p; 1415 1416 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, sel); 1417 1418 if (p == NULL) 1419 return (NULL); 1420 return (ipsec_attach_ipsec_out(mp, connp, p, sel->ips_protocol)); 1421 } 1422 1423 /* 1424 * We have an ipsec_out already, but don't have cached policy; fill it in 1425 * with the right actions. 1426 */ 1427 static mblk_t * 1428 ipsec_apply_global_policy(mblk_t *ipsec_mp, conn_t *connp, 1429 ipsec_selector_t *sel) 1430 { 1431 ipsec_out_t *io; 1432 ipsec_policy_t *p; 1433 1434 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 1435 ASSERT(ipsec_mp->b_cont->b_datap->db_type == M_DATA); 1436 1437 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1438 1439 if (io->ipsec_out_policy == NULL) { 1440 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, io, sel); 1441 io->ipsec_out_policy = p; 1442 } 1443 return (ipsec_mp); 1444 } 1445 1446 1447 /* 1448 * Consumes a reference to ipsp. 1449 */ 1450 static mblk_t * 1451 ipsec_check_loopback_policy(mblk_t *first_mp, boolean_t mctl_present, 1452 ipsec_policy_t *ipsp) 1453 { 1454 mblk_t *ipsec_mp; 1455 ipsec_in_t *ii; 1456 1457 if (!mctl_present) 1458 return (first_mp); 1459 1460 ipsec_mp = first_mp; 1461 1462 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1463 ASSERT(ii->ipsec_in_loopback); 1464 IPPOL_REFRELE(ipsp); 1465 1466 /* 1467 * We should do an actual policy check here. Revisit this 1468 * when we revisit the IPsec API. (And pass a conn_t in when we 1469 * get there.) 1470 */ 1471 1472 return (first_mp); 1473 } 1474 1475 /* 1476 * Check that packet's inbound ports & proto match the selectors 1477 * expected by the SAs it traversed on the way in. 1478 */ 1479 static boolean_t 1480 ipsec_check_ipsecin_unique(ipsec_in_t *ii, const char **reason, 1481 kstat_named_t **counter, uint64_t pkt_unique) 1482 { 1483 uint64_t ah_mask, esp_mask; 1484 ipsa_t *ah_assoc; 1485 ipsa_t *esp_assoc; 1486 1487 ASSERT(ii->ipsec_in_secure); 1488 ASSERT(!ii->ipsec_in_loopback); 1489 1490 ah_assoc = ii->ipsec_in_ah_sa; 1491 esp_assoc = ii->ipsec_in_esp_sa; 1492 ASSERT((ah_assoc != NULL) || (esp_assoc != NULL)); 1493 1494 ah_mask = (ah_assoc != NULL) ? ah_assoc->ipsa_unique_mask : 0; 1495 esp_mask = (esp_assoc != NULL) ? esp_assoc->ipsa_unique_mask : 0; 1496 1497 if ((ah_mask == 0) && (esp_mask == 0)) 1498 return (B_TRUE); 1499 1500 /* 1501 * The pkt_unique check will also check for tunnel mode on the SA 1502 * vs. the tunneled_packet boolean. "Be liberal in what you receive" 1503 * should not apply in this case. ;) 1504 */ 1505 1506 if (ah_mask != 0 && 1507 ah_assoc->ipsa_unique_id != (pkt_unique & ah_mask)) { 1508 *reason = "AH inner header mismatch"; 1509 *counter = &ipdrops_spd_ah_innermismatch; 1510 return (B_FALSE); 1511 } 1512 if (esp_mask != 0 && 1513 esp_assoc->ipsa_unique_id != (pkt_unique & esp_mask)) { 1514 *reason = "ESP inner header mismatch"; 1515 *counter = &ipdrops_spd_esp_innermismatch; 1516 return (B_FALSE); 1517 } 1518 return (B_TRUE); 1519 } 1520 1521 static boolean_t 1522 ipsec_check_ipsecin_action(ipsec_in_t *ii, mblk_t *mp, ipsec_action_t *ap, 1523 ipha_t *ipha, ip6_t *ip6h, const char **reason, kstat_named_t **counter) 1524 { 1525 boolean_t ret = B_TRUE; 1526 ipsec_prot_t *ipp; 1527 ipsa_t *ah_assoc; 1528 ipsa_t *esp_assoc; 1529 boolean_t decaps; 1530 1531 ASSERT((ipha == NULL && ip6h != NULL) || 1532 (ip6h == NULL && ipha != NULL)); 1533 1534 if (ii->ipsec_in_loopback) { 1535 /* 1536 * Besides accepting pointer-equivalent actions, we also 1537 * accept any ICMP errors we generated for ourselves, 1538 * regardless of policy. If we do not wish to make this 1539 * assumption in the future, check here, and where 1540 * icmp_loopback is initialized in ip.c and ip6.c. (Look for 1541 * ipsec_out_icmp_loopback.) 1542 */ 1543 if (ap == ii->ipsec_in_action || ii->ipsec_in_icmp_loopback) 1544 return (B_TRUE); 1545 1546 /* Deep compare necessary here?? */ 1547 *counter = &ipdrops_spd_loopback_mismatch; 1548 *reason = "loopback policy mismatch"; 1549 return (B_FALSE); 1550 } 1551 ASSERT(!ii->ipsec_in_icmp_loopback); 1552 1553 ah_assoc = ii->ipsec_in_ah_sa; 1554 esp_assoc = ii->ipsec_in_esp_sa; 1555 1556 decaps = ii->ipsec_in_decaps; 1557 1558 switch (ap->ipa_act.ipa_type) { 1559 case IPSEC_ACT_DISCARD: 1560 case IPSEC_ACT_REJECT: 1561 /* Should "fail hard" */ 1562 *counter = &ipdrops_spd_explicit; 1563 *reason = "blocked by policy"; 1564 return (B_FALSE); 1565 1566 case IPSEC_ACT_BYPASS: 1567 case IPSEC_ACT_CLEAR: 1568 *counter = &ipdrops_spd_got_secure; 1569 *reason = "expected clear, got protected"; 1570 return (B_FALSE); 1571 1572 case IPSEC_ACT_APPLY: 1573 ipp = &ap->ipa_act.ipa_apply; 1574 /* 1575 * As of now we do the simple checks of whether 1576 * the datagram has gone through the required IPSEC 1577 * protocol constraints or not. We might have more 1578 * in the future like sensitive levels, key bits, etc. 1579 * If it fails the constraints, check whether we would 1580 * have accepted this if it had come in clear. 1581 */ 1582 if (ipp->ipp_use_ah) { 1583 if (ah_assoc == NULL) { 1584 ret = ipsec_inbound_accept_clear(mp, ipha, 1585 ip6h); 1586 *counter = &ipdrops_spd_got_clear; 1587 *reason = "unprotected not accepted"; 1588 break; 1589 } 1590 ASSERT(ah_assoc != NULL); 1591 ASSERT(ipp->ipp_auth_alg != 0); 1592 1593 if (ah_assoc->ipsa_auth_alg != 1594 ipp->ipp_auth_alg) { 1595 *counter = &ipdrops_spd_bad_ahalg; 1596 *reason = "unacceptable ah alg"; 1597 ret = B_FALSE; 1598 break; 1599 } 1600 } else if (ah_assoc != NULL) { 1601 /* 1602 * Don't allow this. Check IPSEC NOTE above 1603 * ip_fanout_proto(). 1604 */ 1605 *counter = &ipdrops_spd_got_ah; 1606 *reason = "unexpected AH"; 1607 ret = B_FALSE; 1608 break; 1609 } 1610 if (ipp->ipp_use_esp) { 1611 if (esp_assoc == NULL) { 1612 ret = ipsec_inbound_accept_clear(mp, ipha, 1613 ip6h); 1614 *counter = &ipdrops_spd_got_clear; 1615 *reason = "unprotected not accepted"; 1616 break; 1617 } 1618 ASSERT(esp_assoc != NULL); 1619 ASSERT(ipp->ipp_encr_alg != 0); 1620 1621 if (esp_assoc->ipsa_encr_alg != 1622 ipp->ipp_encr_alg) { 1623 *counter = &ipdrops_spd_bad_espealg; 1624 *reason = "unacceptable esp alg"; 1625 ret = B_FALSE; 1626 break; 1627 } 1628 /* 1629 * If the client does not need authentication, 1630 * we don't verify the alogrithm. 1631 */ 1632 if (ipp->ipp_use_espa) { 1633 if (esp_assoc->ipsa_auth_alg != 1634 ipp->ipp_esp_auth_alg) { 1635 *counter = &ipdrops_spd_bad_espaalg; 1636 *reason = "unacceptable esp auth alg"; 1637 ret = B_FALSE; 1638 break; 1639 } 1640 } 1641 } else if (esp_assoc != NULL) { 1642 /* 1643 * Don't allow this. Check IPSEC NOTE above 1644 * ip_fanout_proto(). 1645 */ 1646 *counter = &ipdrops_spd_got_esp; 1647 *reason = "unexpected ESP"; 1648 ret = B_FALSE; 1649 break; 1650 } 1651 if (ipp->ipp_use_se) { 1652 if (!decaps) { 1653 ret = ipsec_inbound_accept_clear(mp, ipha, 1654 ip6h); 1655 if (!ret) { 1656 /* XXX mutant? */ 1657 *counter = &ipdrops_spd_bad_selfencap; 1658 *reason = "self encap not found"; 1659 break; 1660 } 1661 } 1662 } else if (decaps) { 1663 /* 1664 * XXX If the packet comes in tunneled and the 1665 * recipient does not expect it to be tunneled, it 1666 * is okay. But we drop to be consistent with the 1667 * other cases. 1668 */ 1669 *counter = &ipdrops_spd_got_selfencap; 1670 *reason = "unexpected self encap"; 1671 ret = B_FALSE; 1672 break; 1673 } 1674 if (ii->ipsec_in_action != NULL) { 1675 /* 1676 * This can happen if we do a double policy-check on 1677 * a packet 1678 * XXX XXX should fix this case! 1679 */ 1680 IPACT_REFRELE(ii->ipsec_in_action); 1681 } 1682 ASSERT(ii->ipsec_in_action == NULL); 1683 IPACT_REFHOLD(ap); 1684 ii->ipsec_in_action = ap; 1685 break; /* from switch */ 1686 } 1687 return (ret); 1688 } 1689 1690 static boolean_t 1691 spd_match_inbound_ids(ipsec_latch_t *ipl, ipsa_t *sa) 1692 { 1693 ASSERT(ipl->ipl_ids_latched == B_TRUE); 1694 return ipsid_equal(ipl->ipl_remote_cid, sa->ipsa_src_cid) && 1695 ipsid_equal(ipl->ipl_local_cid, sa->ipsa_dst_cid); 1696 } 1697 1698 /* 1699 * Takes a latched conn and an inbound packet and returns a unique_id suitable 1700 * for SA comparisons. Most of the time we will copy from the conn_t, but 1701 * there are cases when the conn_t is latched but it has wildcard selectors, 1702 * and then we need to fallback to scooping them out of the packet. 1703 * 1704 * Assume we'll never have 0 with a conn_t present, so use 0 as a failure. We 1705 * can get away with this because we only have non-zero ports/proto for 1706 * latched conn_ts. 1707 * 1708 * Ideal candidate for an "inline" keyword, as we're JUST convoluted enough 1709 * to not be a nice macro. 1710 */ 1711 static uint64_t 1712 conn_to_unique(conn_t *connp, mblk_t *data_mp, ipha_t *ipha, ip6_t *ip6h) 1713 { 1714 ipsec_selector_t sel; 1715 uint8_t ulp = connp->conn_ulp; 1716 1717 ASSERT(connp->conn_latch->ipl_in_policy != NULL); 1718 1719 if ((ulp == IPPROTO_TCP || ulp == IPPROTO_UDP || ulp == IPPROTO_SCTP) && 1720 (connp->conn_fport == 0 || connp->conn_lport == 0)) { 1721 /* Slow path - we gotta grab from the packet. */ 1722 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, 1723 SEL_NONE) != SELRET_SUCCESS) { 1724 /* Failure -> have caller free packet with ENOMEM. */ 1725 return (0); 1726 } 1727 return (SA_UNIQUE_ID(sel.ips_remote_port, sel.ips_local_port, 1728 sel.ips_protocol, 0)); 1729 } 1730 1731 #ifdef DEBUG_NOT_UNTIL_6478464 1732 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, SEL_NONE) == 1733 SELRET_SUCCESS) { 1734 ASSERT(sel.ips_local_port == connp->conn_lport); 1735 ASSERT(sel.ips_remote_port == connp->conn_fport); 1736 ASSERT(sel.ips_protocol == connp->conn_ulp); 1737 } 1738 ASSERT(connp->conn_ulp != 0); 1739 #endif 1740 1741 return (SA_UNIQUE_ID(connp->conn_fport, connp->conn_lport, ulp, 0)); 1742 } 1743 1744 /* 1745 * Called to check policy on a latched connection, both from this file 1746 * and from tcp.c 1747 */ 1748 boolean_t 1749 ipsec_check_ipsecin_latch(ipsec_in_t *ii, mblk_t *mp, ipsec_latch_t *ipl, 1750 ipha_t *ipha, ip6_t *ip6h, const char **reason, kstat_named_t **counter, 1751 conn_t *connp) 1752 { 1753 ASSERT(ipl->ipl_ids_latched == B_TRUE); 1754 1755 if (!ii->ipsec_in_loopback) { 1756 /* 1757 * Over loopback, there aren't real security associations, 1758 * so there are neither identities nor "unique" values 1759 * for us to check the packet against. 1760 */ 1761 if ((ii->ipsec_in_ah_sa != NULL) && 1762 (!spd_match_inbound_ids(ipl, ii->ipsec_in_ah_sa))) { 1763 *counter = &ipdrops_spd_ah_badid; 1764 *reason = "AH identity mismatch"; 1765 return (B_FALSE); 1766 } 1767 1768 if ((ii->ipsec_in_esp_sa != NULL) && 1769 (!spd_match_inbound_ids(ipl, ii->ipsec_in_esp_sa))) { 1770 *counter = &ipdrops_spd_esp_badid; 1771 *reason = "ESP identity mismatch"; 1772 return (B_FALSE); 1773 } 1774 1775 /* 1776 * Can fudge pkt_unique from connp because we're latched. 1777 * In DEBUG kernels (see conn_to_unique()'s implementation), 1778 * verify this even if it REALLY slows things down. 1779 */ 1780 if (!ipsec_check_ipsecin_unique(ii, reason, counter, 1781 conn_to_unique(connp, mp, ipha, ip6h))) { 1782 return (B_FALSE); 1783 } 1784 } 1785 1786 return (ipsec_check_ipsecin_action(ii, mp, ipl->ipl_in_action, 1787 ipha, ip6h, reason, counter)); 1788 } 1789 1790 /* 1791 * Check to see whether this secured datagram meets the policy 1792 * constraints specified in ipsp. 1793 * 1794 * Called from ipsec_check_global_policy, and ipsec_check_inbound_policy. 1795 * 1796 * Consumes a reference to ipsp. 1797 */ 1798 static mblk_t * 1799 ipsec_check_ipsecin_policy(mblk_t *first_mp, ipsec_policy_t *ipsp, 1800 ipha_t *ipha, ip6_t *ip6h, uint64_t pkt_unique) 1801 { 1802 ipsec_in_t *ii; 1803 ipsec_action_t *ap; 1804 const char *reason = "no policy actions found"; 1805 mblk_t *data_mp, *ipsec_mp; 1806 kstat_named_t *counter = &ipdrops_spd_got_secure; 1807 1808 data_mp = first_mp->b_cont; 1809 ipsec_mp = first_mp; 1810 1811 ASSERT(ipsp != NULL); 1812 1813 ASSERT((ipha == NULL && ip6h != NULL) || 1814 (ip6h == NULL && ipha != NULL)); 1815 1816 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1817 1818 if (ii->ipsec_in_loopback) 1819 return (ipsec_check_loopback_policy(first_mp, B_TRUE, ipsp)); 1820 ASSERT(ii->ipsec_in_type == IPSEC_IN); 1821 ASSERT(ii->ipsec_in_secure); 1822 1823 if (ii->ipsec_in_action != NULL) { 1824 /* 1825 * this can happen if we do a double policy-check on a packet 1826 * Would be nice to be able to delete this test.. 1827 */ 1828 IPACT_REFRELE(ii->ipsec_in_action); 1829 } 1830 ASSERT(ii->ipsec_in_action == NULL); 1831 1832 if (!SA_IDS_MATCH(ii->ipsec_in_ah_sa, ii->ipsec_in_esp_sa)) { 1833 reason = "inbound AH and ESP identities differ"; 1834 counter = &ipdrops_spd_ahesp_diffid; 1835 goto drop; 1836 } 1837 1838 if (!ipsec_check_ipsecin_unique(ii, &reason, &counter, pkt_unique)) 1839 goto drop; 1840 1841 /* 1842 * Ok, now loop through the possible actions and see if any 1843 * of them work for us. 1844 */ 1845 1846 for (ap = ipsp->ipsp_act; ap != NULL; ap = ap->ipa_next) { 1847 if (ipsec_check_ipsecin_action(ii, data_mp, ap, 1848 ipha, ip6h, &reason, &counter)) { 1849 BUMP_MIB(&ip_mib, ipsecInSucceeded); 1850 IPPOL_REFRELE(ipsp); 1851 return (first_mp); 1852 } 1853 } 1854 drop: 1855 ipsec_rl_strlog(IP_MOD_ID, 0, 0, SL_ERROR|SL_WARN|SL_CONSOLE, 1856 "ipsec inbound policy mismatch: %s, packet dropped\n", 1857 reason); 1858 IPPOL_REFRELE(ipsp); 1859 ASSERT(ii->ipsec_in_action == NULL); 1860 BUMP_MIB(&ip_mib, ipsecInFailed); 1861 ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, &spd_dropper); 1862 return (NULL); 1863 } 1864 1865 /* 1866 * sleazy prefix-length-based compare. 1867 * another inlining candidate.. 1868 */ 1869 boolean_t 1870 ip_addr_match(uint8_t *addr1, int pfxlen, in6_addr_t *addr2p) 1871 { 1872 int offset = pfxlen>>3; 1873 int bitsleft = pfxlen & 7; 1874 uint8_t *addr2 = (uint8_t *)addr2p; 1875 1876 /* 1877 * and there was much evil.. 1878 * XXX should inline-expand the bcmp here and do this 32 bits 1879 * or 64 bits at a time.. 1880 */ 1881 return ((bcmp(addr1, addr2, offset) == 0) && 1882 ((bitsleft == 0) || 1883 (((addr1[offset] ^ addr2[offset]) & 1884 (0xff<<(8-bitsleft))) == 0))); 1885 } 1886 1887 static ipsec_policy_t * 1888 ipsec_find_policy_chain(ipsec_policy_t *best, ipsec_policy_t *chain, 1889 ipsec_selector_t *sel, boolean_t is_icmp_inv_acq) 1890 { 1891 ipsec_selkey_t *isel; 1892 ipsec_policy_t *p; 1893 int bpri = best ? best->ipsp_prio : 0; 1894 1895 for (p = chain; p != NULL; p = p->ipsp_hash.hash_next) { 1896 uint32_t valid; 1897 1898 if (p->ipsp_prio <= bpri) 1899 continue; 1900 isel = &p->ipsp_sel->ipsl_key; 1901 valid = isel->ipsl_valid; 1902 1903 if ((valid & IPSL_PROTOCOL) && 1904 (isel->ipsl_proto != sel->ips_protocol)) 1905 continue; 1906 1907 if ((valid & IPSL_REMOTE_ADDR) && 1908 !ip_addr_match((uint8_t *)&isel->ipsl_remote, 1909 isel->ipsl_remote_pfxlen, 1910 &sel->ips_remote_addr_v6)) 1911 continue; 1912 1913 if ((valid & IPSL_LOCAL_ADDR) && 1914 !ip_addr_match((uint8_t *)&isel->ipsl_local, 1915 isel->ipsl_local_pfxlen, 1916 &sel->ips_local_addr_v6)) 1917 continue; 1918 1919 if ((valid & IPSL_REMOTE_PORT) && 1920 isel->ipsl_rport != sel->ips_remote_port) 1921 continue; 1922 1923 if ((valid & IPSL_LOCAL_PORT) && 1924 isel->ipsl_lport != sel->ips_local_port) 1925 continue; 1926 1927 if (!is_icmp_inv_acq) { 1928 if ((valid & IPSL_ICMP_TYPE) && 1929 (isel->ipsl_icmp_type > sel->ips_icmp_type || 1930 isel->ipsl_icmp_type_end < sel->ips_icmp_type)) { 1931 continue; 1932 } 1933 1934 if ((valid & IPSL_ICMP_CODE) && 1935 (isel->ipsl_icmp_code > sel->ips_icmp_code || 1936 isel->ipsl_icmp_code_end < 1937 sel->ips_icmp_code)) { 1938 continue; 1939 } 1940 } else { 1941 /* 1942 * special case for icmp inverse acquire 1943 * we only want policies that aren't drop/pass 1944 */ 1945 if (p->ipsp_act->ipa_act.ipa_type != IPSEC_ACT_APPLY) 1946 continue; 1947 } 1948 1949 /* we matched all the packet-port-field selectors! */ 1950 best = p; 1951 bpri = p->ipsp_prio; 1952 } 1953 1954 return (best); 1955 } 1956 1957 /* 1958 * Try to find and return the best policy entry under a given policy 1959 * root for a given set of selectors; the first parameter "best" is 1960 * the current best policy so far. If "best" is non-null, we have a 1961 * reference to it. We return a reference to a policy; if that policy 1962 * is not the original "best", we need to release that reference 1963 * before returning. 1964 */ 1965 ipsec_policy_t * 1966 ipsec_find_policy_head(ipsec_policy_t *best, ipsec_policy_head_t *head, 1967 int direction, ipsec_selector_t *sel) 1968 { 1969 ipsec_policy_t *curbest; 1970 ipsec_policy_root_t *root; 1971 uint8_t is_icmp_inv_acq = sel->ips_is_icmp_inv_acq; 1972 int af = sel->ips_isv4 ? IPSEC_AF_V4 : IPSEC_AF_V6; 1973 1974 curbest = best; 1975 root = &head->iph_root[direction]; 1976 1977 #ifdef DEBUG 1978 if (is_icmp_inv_acq) { 1979 if (sel->ips_isv4) { 1980 if (sel->ips_protocol != IPPROTO_ICMP) { 1981 cmn_err(CE_WARN, "ipsec_find_policy_head:" 1982 " expecting icmp, got %d", sel->ips_protocol); 1983 } 1984 } else { 1985 if (sel->ips_protocol != IPPROTO_ICMPV6) { 1986 cmn_err(CE_WARN, "ipsec_find_policy_head:" 1987 " expecting icmpv6, got %d", sel->ips_protocol); 1988 } 1989 } 1990 } 1991 #endif 1992 1993 rw_enter(&head->iph_lock, RW_READER); 1994 1995 if (root->ipr_nchains > 0) { 1996 curbest = ipsec_find_policy_chain(curbest, 1997 root->ipr_hash[selector_hash(sel, root)].hash_head, sel, 1998 is_icmp_inv_acq); 1999 } 2000 curbest = ipsec_find_policy_chain(curbest, root->ipr_nonhash[af], sel, 2001 is_icmp_inv_acq); 2002 2003 /* 2004 * Adjust reference counts if we found anything new. 2005 */ 2006 if (curbest != best) { 2007 ASSERT(curbest != NULL); 2008 IPPOL_REFHOLD(curbest); 2009 2010 if (best != NULL) { 2011 IPPOL_REFRELE(best); 2012 } 2013 } 2014 2015 rw_exit(&head->iph_lock); 2016 2017 return (curbest); 2018 } 2019 2020 /* 2021 * Find the best system policy (either global or per-interface) which 2022 * applies to the given selector; look in all the relevant policy roots 2023 * to figure out which policy wins. 2024 * 2025 * Returns a reference to a policy; caller must release this 2026 * reference when done. 2027 */ 2028 ipsec_policy_t * 2029 ipsec_find_policy(int direction, conn_t *connp, ipsec_out_t *io, 2030 ipsec_selector_t *sel) 2031 { 2032 ipsec_policy_t *p; 2033 2034 p = ipsec_find_policy_head(NULL, &system_policy, direction, sel); 2035 if ((connp != NULL) && (connp->conn_policy != NULL)) { 2036 p = ipsec_find_policy_head(p, connp->conn_policy, 2037 direction, sel); 2038 } else if ((io != NULL) && (io->ipsec_out_polhead != NULL)) { 2039 p = ipsec_find_policy_head(p, io->ipsec_out_polhead, 2040 direction, sel); 2041 } 2042 2043 return (p); 2044 } 2045 2046 /* 2047 * Check with global policy and see whether this inbound 2048 * packet meets the policy constraints. 2049 * 2050 * Locate appropriate policy from global policy, supplemented by the 2051 * conn's configured and/or cached policy if the conn is supplied. 2052 * 2053 * Dispatch to ipsec_check_ipsecin_policy if we have policy and an 2054 * encrypted packet to see if they match. 2055 * 2056 * Otherwise, see if the policy allows cleartext; if not, drop it on the 2057 * floor. 2058 */ 2059 mblk_t * 2060 ipsec_check_global_policy(mblk_t *first_mp, conn_t *connp, 2061 ipha_t *ipha, ip6_t *ip6h, boolean_t mctl_present) 2062 { 2063 ipsec_policy_t *p; 2064 ipsec_selector_t sel; 2065 mblk_t *data_mp, *ipsec_mp; 2066 boolean_t policy_present; 2067 kstat_named_t *counter; 2068 ipsec_in_t *ii = NULL; 2069 uint64_t pkt_unique; 2070 2071 data_mp = mctl_present ? first_mp->b_cont : first_mp; 2072 ipsec_mp = mctl_present ? first_mp : NULL; 2073 2074 sel.ips_is_icmp_inv_acq = 0; 2075 2076 ASSERT((ipha == NULL && ip6h != NULL) || 2077 (ip6h == NULL && ipha != NULL)); 2078 2079 if (ipha != NULL) 2080 policy_present = ipsec_inbound_v4_policy_present; 2081 else 2082 policy_present = ipsec_inbound_v6_policy_present; 2083 2084 if (!policy_present && connp == NULL) { 2085 /* 2086 * No global policy and no per-socket policy; 2087 * just pass it back (but we shouldn't get here in that case) 2088 */ 2089 return (first_mp); 2090 } 2091 2092 if (ipsec_mp != NULL) { 2093 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 2094 ii = (ipsec_in_t *)(ipsec_mp->b_rptr); 2095 ASSERT(ii->ipsec_in_type == IPSEC_IN); 2096 } 2097 2098 /* 2099 * If we have cached policy, use it. 2100 * Otherwise consult system policy. 2101 */ 2102 if ((connp != NULL) && (connp->conn_latch != NULL)) { 2103 p = connp->conn_latch->ipl_in_policy; 2104 if (p != NULL) { 2105 IPPOL_REFHOLD(p); 2106 } 2107 /* 2108 * Fudge sel for UNIQUE_ID setting below. 2109 */ 2110 pkt_unique = conn_to_unique(connp, data_mp, ipha, ip6h); 2111 } else { 2112 /* Initialize the ports in the selector */ 2113 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, 2114 SEL_NONE) == SELRET_NOMEM) { 2115 /* 2116 * Technically not a policy mismatch, but it is 2117 * an internal failure. 2118 */ 2119 ipsec_log_policy_failure(IPSEC_POLICY_MISMATCH, 2120 "ipsec_init_inbound_sel", ipha, ip6h, B_FALSE); 2121 counter = &ipdrops_spd_nomem; 2122 goto fail; 2123 } 2124 2125 /* 2126 * Find the policy which best applies. 2127 * 2128 * If we find global policy, we should look at both 2129 * local policy and global policy and see which is 2130 * stronger and match accordingly. 2131 * 2132 * If we don't find a global policy, check with 2133 * local policy alone. 2134 */ 2135 2136 p = ipsec_find_policy(IPSEC_TYPE_INBOUND, connp, NULL, &sel); 2137 pkt_unique = SA_UNIQUE_ID(sel.ips_remote_port, 2138 sel.ips_local_port, sel.ips_protocol, 0); 2139 } 2140 2141 if (p == NULL) { 2142 if (ipsec_mp == NULL) { 2143 /* 2144 * We have no policy; default to succeeding. 2145 * XXX paranoid system design doesn't do this. 2146 */ 2147 BUMP_MIB(&ip_mib, ipsecInSucceeded); 2148 return (first_mp); 2149 } else { 2150 counter = &ipdrops_spd_got_secure; 2151 ipsec_log_policy_failure(IPSEC_POLICY_NOT_NEEDED, 2152 "ipsec_check_global_policy", ipha, ip6h, B_TRUE); 2153 goto fail; 2154 } 2155 } 2156 if ((ii != NULL) && (ii->ipsec_in_secure)) { 2157 return (ipsec_check_ipsecin_policy(ipsec_mp, p, ipha, ip6h, 2158 pkt_unique)); 2159 } 2160 if (p->ipsp_act->ipa_allow_clear) { 2161 BUMP_MIB(&ip_mib, ipsecInSucceeded); 2162 IPPOL_REFRELE(p); 2163 return (first_mp); 2164 } 2165 IPPOL_REFRELE(p); 2166 /* 2167 * If we reach here, we will drop the packet because it failed the 2168 * global policy check because the packet was cleartext, and it 2169 * should not have been. 2170 */ 2171 ipsec_log_policy_failure(IPSEC_POLICY_MISMATCH, 2172 "ipsec_check_global_policy", ipha, ip6h, B_FALSE); 2173 counter = &ipdrops_spd_got_clear; 2174 2175 fail: 2176 ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, &spd_dropper); 2177 BUMP_MIB(&ip_mib, ipsecInFailed); 2178 return (NULL); 2179 } 2180 2181 /* 2182 * We check whether an inbound datagram is a valid one 2183 * to accept in clear. If it is secure, it is the job 2184 * of IPSEC to log information appropriately if it 2185 * suspects that it may not be the real one. 2186 * 2187 * It is called only while fanning out to the ULP 2188 * where ULP accepts only secure data and the incoming 2189 * is clear. Usually we never accept clear datagrams in 2190 * such cases. ICMP is the only exception. 2191 * 2192 * NOTE : We don't call this function if the client (ULP) 2193 * is willing to accept things in clear. 2194 */ 2195 boolean_t 2196 ipsec_inbound_accept_clear(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h) 2197 { 2198 ushort_t iph_hdr_length; 2199 icmph_t *icmph; 2200 icmp6_t *icmp6; 2201 uint8_t *nexthdrp; 2202 2203 ASSERT((ipha != NULL && ip6h == NULL) || 2204 (ipha == NULL && ip6h != NULL)); 2205 2206 if (ip6h != NULL) { 2207 iph_hdr_length = ip_hdr_length_v6(mp, ip6h); 2208 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, 2209 &nexthdrp)) { 2210 return (B_FALSE); 2211 } 2212 if (*nexthdrp != IPPROTO_ICMPV6) 2213 return (B_FALSE); 2214 icmp6 = (icmp6_t *)(&mp->b_rptr[iph_hdr_length]); 2215 /* Match IPv6 ICMP policy as closely as IPv4 as possible. */ 2216 switch (icmp6->icmp6_type) { 2217 case ICMP6_PARAM_PROB: 2218 /* Corresponds to port/proto unreach in IPv4. */ 2219 case ICMP6_ECHO_REQUEST: 2220 /* Just like IPv4. */ 2221 return (B_FALSE); 2222 2223 case MLD_LISTENER_QUERY: 2224 case MLD_LISTENER_REPORT: 2225 case MLD_LISTENER_REDUCTION: 2226 /* 2227 * XXX Seperate NDD in IPv4 what about here? 2228 * Plus, mcast is important to ND. 2229 */ 2230 case ICMP6_DST_UNREACH: 2231 /* Corresponds to HOST/NET unreachable in IPv4. */ 2232 case ICMP6_PACKET_TOO_BIG: 2233 case ICMP6_ECHO_REPLY: 2234 /* These are trusted in IPv4. */ 2235 case ND_ROUTER_SOLICIT: 2236 case ND_ROUTER_ADVERT: 2237 case ND_NEIGHBOR_SOLICIT: 2238 case ND_NEIGHBOR_ADVERT: 2239 case ND_REDIRECT: 2240 /* Trust ND messages for now. */ 2241 case ICMP6_TIME_EXCEEDED: 2242 default: 2243 return (B_TRUE); 2244 } 2245 } else { 2246 /* 2247 * If it is not ICMP, fail this request. 2248 */ 2249 if (ipha->ipha_protocol != IPPROTO_ICMP) { 2250 #ifdef FRAGCACHE_DEBUG 2251 cmn_err(CE_WARN, "Dropping - ipha_proto = %d\n", 2252 ipha->ipha_protocol); 2253 #endif 2254 return (B_FALSE); 2255 } 2256 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2257 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 2258 /* 2259 * It is an insecure icmp message. Check to see whether we are 2260 * willing to accept this one. 2261 */ 2262 2263 switch (icmph->icmph_type) { 2264 case ICMP_ECHO_REPLY: 2265 case ICMP_TIME_STAMP_REPLY: 2266 case ICMP_INFO_REPLY: 2267 case ICMP_ROUTER_ADVERTISEMENT: 2268 /* 2269 * We should not encourage clear replies if this 2270 * client expects secure. If somebody is replying 2271 * in clear some mailicious user watching both the 2272 * request and reply, can do chosen-plain-text attacks. 2273 * With global policy we might be just expecting secure 2274 * but sending out clear. We don't know what the right 2275 * thing is. We can't do much here as we can't control 2276 * the sender here. Till we are sure of what to do, 2277 * accept them. 2278 */ 2279 return (B_TRUE); 2280 case ICMP_ECHO_REQUEST: 2281 case ICMP_TIME_STAMP_REQUEST: 2282 case ICMP_INFO_REQUEST: 2283 case ICMP_ADDRESS_MASK_REQUEST: 2284 case ICMP_ROUTER_SOLICITATION: 2285 case ICMP_ADDRESS_MASK_REPLY: 2286 /* 2287 * Don't accept this as somebody could be sending 2288 * us plain text to get encrypted data. If we reply, 2289 * it will lead to chosen plain text attack. 2290 */ 2291 return (B_FALSE); 2292 case ICMP_DEST_UNREACHABLE: 2293 switch (icmph->icmph_code) { 2294 case ICMP_FRAGMENTATION_NEEDED: 2295 /* 2296 * Be in sync with icmp_inbound, where we have 2297 * already set ire_max_frag. 2298 */ 2299 #ifdef FRAGCACHE_DEBUG 2300 cmn_err(CE_WARN, "ICMP frag needed\n"); 2301 #endif 2302 return (B_TRUE); 2303 case ICMP_HOST_UNREACHABLE: 2304 case ICMP_NET_UNREACHABLE: 2305 /* 2306 * By accepting, we could reset a connection. 2307 * How do we solve the problem of some 2308 * intermediate router sending in-secure ICMP 2309 * messages ? 2310 */ 2311 return (B_TRUE); 2312 case ICMP_PORT_UNREACHABLE: 2313 case ICMP_PROTOCOL_UNREACHABLE: 2314 default : 2315 return (B_FALSE); 2316 } 2317 case ICMP_SOURCE_QUENCH: 2318 /* 2319 * If this is an attack, TCP will slow start 2320 * because of this. Is it very harmful ? 2321 */ 2322 return (B_TRUE); 2323 case ICMP_PARAM_PROBLEM: 2324 return (B_FALSE); 2325 case ICMP_TIME_EXCEEDED: 2326 return (B_TRUE); 2327 case ICMP_REDIRECT: 2328 return (B_FALSE); 2329 default : 2330 return (B_FALSE); 2331 } 2332 } 2333 } 2334 2335 void 2336 ipsec_latch_ids(ipsec_latch_t *ipl, ipsid_t *local, ipsid_t *remote) 2337 { 2338 mutex_enter(&ipl->ipl_lock); 2339 2340 if (ipl->ipl_ids_latched) { 2341 /* I lost, someone else got here before me */ 2342 mutex_exit(&ipl->ipl_lock); 2343 return; 2344 } 2345 2346 if (local != NULL) 2347 IPSID_REFHOLD(local); 2348 if (remote != NULL) 2349 IPSID_REFHOLD(remote); 2350 2351 ipl->ipl_local_cid = local; 2352 ipl->ipl_remote_cid = remote; 2353 ipl->ipl_ids_latched = B_TRUE; 2354 mutex_exit(&ipl->ipl_lock); 2355 } 2356 2357 void 2358 ipsec_latch_inbound(ipsec_latch_t *ipl, ipsec_in_t *ii) 2359 { 2360 ipsa_t *sa; 2361 2362 if (!ipl->ipl_ids_latched) { 2363 ipsid_t *local = NULL; 2364 ipsid_t *remote = NULL; 2365 2366 if (!ii->ipsec_in_loopback) { 2367 if (ii->ipsec_in_esp_sa != NULL) 2368 sa = ii->ipsec_in_esp_sa; 2369 else 2370 sa = ii->ipsec_in_ah_sa; 2371 ASSERT(sa != NULL); 2372 local = sa->ipsa_dst_cid; 2373 remote = sa->ipsa_src_cid; 2374 } 2375 ipsec_latch_ids(ipl, local, remote); 2376 } 2377 ipl->ipl_in_action = ii->ipsec_in_action; 2378 IPACT_REFHOLD(ipl->ipl_in_action); 2379 } 2380 2381 /* 2382 * Check whether the policy constraints are met either for an 2383 * inbound datagram; called from IP in numerous places. 2384 * 2385 * Note that this is not a chokepoint for inbound policy checks; 2386 * see also ipsec_check_ipsecin_latch() and ipsec_check_global_policy() 2387 */ 2388 mblk_t * 2389 ipsec_check_inbound_policy(mblk_t *first_mp, conn_t *connp, 2390 ipha_t *ipha, ip6_t *ip6h, boolean_t mctl_present) 2391 { 2392 ipsec_in_t *ii; 2393 boolean_t ret; 2394 mblk_t *mp = mctl_present ? first_mp->b_cont : first_mp; 2395 mblk_t *ipsec_mp = mctl_present ? first_mp : NULL; 2396 ipsec_latch_t *ipl; 2397 uint64_t unique_id; 2398 2399 ASSERT(connp != NULL); 2400 ipl = connp->conn_latch; 2401 2402 if (ipsec_mp == NULL) { 2403 clear: 2404 /* 2405 * This is the case where the incoming datagram is 2406 * cleartext and we need to see whether this client 2407 * would like to receive such untrustworthy things from 2408 * the wire. 2409 */ 2410 ASSERT(mp != NULL); 2411 2412 if (ipl != NULL) { 2413 /* 2414 * Policy is cached in the conn. 2415 */ 2416 if ((ipl->ipl_in_policy != NULL) && 2417 (!ipl->ipl_in_policy->ipsp_act->ipa_allow_clear)) { 2418 ret = ipsec_inbound_accept_clear(mp, 2419 ipha, ip6h); 2420 if (ret) { 2421 BUMP_MIB(&ip_mib, ipsecInSucceeded); 2422 return (first_mp); 2423 } else { 2424 ipsec_log_policy_failure( 2425 IPSEC_POLICY_MISMATCH, 2426 "ipsec_check_inbound_policy", ipha, 2427 ip6h, B_FALSE); 2428 ip_drop_packet(first_mp, B_TRUE, NULL, 2429 NULL, &ipdrops_spd_got_clear, 2430 &spd_dropper); 2431 BUMP_MIB(&ip_mib, ipsecInFailed); 2432 return (NULL); 2433 } 2434 } else { 2435 BUMP_MIB(&ip_mib, ipsecInSucceeded); 2436 return (first_mp); 2437 } 2438 } else { 2439 /* 2440 * As this is a non-hardbound connection we need 2441 * to look at both per-socket policy and global 2442 * policy. As this is cleartext, mark the mp as 2443 * M_DATA in case if it is an ICMP error being 2444 * reported before calling ipsec_check_global_policy 2445 * so that it does not mistake it for IPSEC_IN. 2446 */ 2447 uchar_t db_type = mp->b_datap->db_type; 2448 mp->b_datap->db_type = M_DATA; 2449 first_mp = ipsec_check_global_policy(first_mp, connp, 2450 ipha, ip6h, mctl_present); 2451 if (first_mp != NULL) 2452 mp->b_datap->db_type = db_type; 2453 return (first_mp); 2454 } 2455 } 2456 /* 2457 * If it is inbound check whether the attached message 2458 * is secure or not. We have a special case for ICMP, 2459 * where we have a IPSEC_IN message and the attached 2460 * message is not secure. See icmp_inbound_error_fanout 2461 * for details. 2462 */ 2463 ASSERT(ipsec_mp != NULL); 2464 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 2465 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 2466 2467 if (!ii->ipsec_in_secure) 2468 goto clear; 2469 2470 /* 2471 * mp->b_cont could be either a M_CTL message 2472 * for icmp errors being sent up or a M_DATA message. 2473 */ 2474 ASSERT(mp->b_datap->db_type == M_CTL || mp->b_datap->db_type == M_DATA); 2475 2476 ASSERT(ii->ipsec_in_type == IPSEC_IN); 2477 2478 if (ipl == NULL) { 2479 /* 2480 * We don't have policies cached in the conn 2481 * for this stream. So, look at the global 2482 * policy. It will check against conn or global 2483 * depending on whichever is stronger. 2484 */ 2485 return (ipsec_check_global_policy(first_mp, connp, 2486 ipha, ip6h, mctl_present)); 2487 } 2488 2489 if (ipl->ipl_in_action != NULL) { 2490 /* Policy is cached & latched; fast(er) path */ 2491 const char *reason; 2492 kstat_named_t *counter; 2493 if (ipsec_check_ipsecin_latch(ii, mp, ipl, 2494 ipha, ip6h, &reason, &counter, connp)) { 2495 BUMP_MIB(&ip_mib, ipsecInSucceeded); 2496 return (first_mp); 2497 } 2498 ipsec_rl_strlog(IP_MOD_ID, 0, 0, SL_ERROR|SL_WARN|SL_CONSOLE, 2499 "ipsec inbound policy mismatch: %s, packet dropped\n", 2500 reason); 2501 ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, 2502 &spd_dropper); 2503 BUMP_MIB(&ip_mib, ipsecInFailed); 2504 return (NULL); 2505 } else if (ipl->ipl_in_policy == NULL) { 2506 ipsec_weird_null_inbound_policy++; 2507 return (first_mp); 2508 } 2509 2510 unique_id = conn_to_unique(connp, mp, ipha, ip6h); 2511 IPPOL_REFHOLD(ipl->ipl_in_policy); 2512 first_mp = ipsec_check_ipsecin_policy(first_mp, ipl->ipl_in_policy, 2513 ipha, ip6h, unique_id); 2514 /* 2515 * NOTE: ipsecIn{Failed,Succeeeded} bumped by 2516 * ipsec_check_ipsecin_policy(). 2517 */ 2518 if (first_mp != NULL) 2519 ipsec_latch_inbound(ipl, ii); 2520 return (first_mp); 2521 } 2522 2523 /* 2524 * Returns: 2525 * 2526 * SELRET_NOMEM --> msgpullup() needed to gather things failed. 2527 * SELRET_BADPKT --> If we're being called after tunnel-mode fragment 2528 * gathering, the initial fragment is too short for 2529 * useful data. Only returned if SEL_TUNNEL_FIRSTFRAG is 2530 * set. 2531 * SELRET_SUCCESS --> "sel" now has initialized IPsec selector data. 2532 * SELRET_TUNFRAG --> This is a fragment in a tunnel-mode packet. Caller 2533 * should put this packet in a fragment-gathering queue. 2534 * Only returned if SEL_TUNNEL_MODE and SEL_PORT_POLICY 2535 * is set. 2536 */ 2537 static selret_t 2538 ipsec_init_inbound_sel(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha, 2539 ip6_t *ip6h, uint8_t sel_flags) 2540 { 2541 uint16_t *ports; 2542 ushort_t hdr_len; 2543 int outer_hdr_len = 0; /* For ICMP tunnel-mode cases... */ 2544 mblk_t *spare_mp = NULL; 2545 uint8_t *nexthdrp; 2546 uint8_t nexthdr; 2547 uint8_t *typecode; 2548 uint8_t check_proto; 2549 ip6_pkt_t ipp; 2550 boolean_t port_policy_present = (sel_flags & SEL_PORT_POLICY); 2551 boolean_t is_icmp = (sel_flags & SEL_IS_ICMP); 2552 boolean_t tunnel_mode = (sel_flags & SEL_TUNNEL_MODE); 2553 2554 ASSERT((ipha == NULL && ip6h != NULL) || 2555 (ipha != NULL && ip6h == NULL)); 2556 2557 if (ip6h != NULL) { 2558 if (is_icmp) 2559 outer_hdr_len = ((uint8_t *)ip6h) - mp->b_rptr; 2560 2561 check_proto = IPPROTO_ICMPV6; 2562 sel->ips_isv4 = B_FALSE; 2563 sel->ips_local_addr_v6 = ip6h->ip6_dst; 2564 sel->ips_remote_addr_v6 = ip6h->ip6_src; 2565 2566 bzero(&ipp, sizeof (ipp)); 2567 (void) ip_find_hdr_v6(mp, ip6h, &ipp, NULL); 2568 2569 nexthdr = ip6h->ip6_nxt; 2570 switch (nexthdr) { 2571 case IPPROTO_HOPOPTS: 2572 case IPPROTO_ROUTING: 2573 case IPPROTO_DSTOPTS: 2574 case IPPROTO_FRAGMENT: 2575 /* 2576 * Use ip_hdr_length_nexthdr_v6(). And have a spare 2577 * mblk that's contiguous to feed it 2578 */ 2579 if ((spare_mp = msgpullup(mp, -1)) == NULL) 2580 return (SELRET_NOMEM); 2581 if (!ip_hdr_length_nexthdr_v6(spare_mp, 2582 (ip6_t *)(spare_mp->b_rptr + outer_hdr_len), 2583 &hdr_len, &nexthdrp)) { 2584 /* Malformed packet - caller frees. */ 2585 ipsec_freemsg_chain(spare_mp); 2586 return (SELRET_BADPKT); 2587 } 2588 nexthdr = *nexthdrp; 2589 /* We can just extract based on hdr_len now. */ 2590 break; 2591 default: 2592 hdr_len = IPV6_HDR_LEN; 2593 break; 2594 } 2595 2596 if (port_policy_present && IS_V6_FRAGMENT(ipp) && !is_icmp) { 2597 /* IPv6 Fragment */ 2598 ipsec_freemsg_chain(spare_mp); 2599 return (SELRET_TUNFRAG); 2600 } 2601 } else { 2602 if (is_icmp) 2603 outer_hdr_len = ((uint8_t *)ipha) - mp->b_rptr; 2604 check_proto = IPPROTO_ICMP; 2605 sel->ips_isv4 = B_TRUE; 2606 sel->ips_local_addr_v4 = ipha->ipha_dst; 2607 sel->ips_remote_addr_v4 = ipha->ipha_src; 2608 nexthdr = ipha->ipha_protocol; 2609 hdr_len = IPH_HDR_LENGTH(ipha); 2610 2611 if (port_policy_present && 2612 IS_V4_FRAGMENT(ipha->ipha_fragment_offset_and_flags) && 2613 !is_icmp) { 2614 /* IPv4 Fragment */ 2615 ipsec_freemsg_chain(spare_mp); 2616 return (SELRET_TUNFRAG); 2617 } 2618 2619 } 2620 sel->ips_protocol = nexthdr; 2621 2622 if ((nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP && 2623 nexthdr != IPPROTO_SCTP && nexthdr != check_proto) || 2624 (!port_policy_present && tunnel_mode)) { 2625 sel->ips_remote_port = sel->ips_local_port = 0; 2626 ipsec_freemsg_chain(spare_mp); 2627 return (SELRET_SUCCESS); 2628 } 2629 2630 if (&mp->b_rptr[hdr_len] + 4 > mp->b_wptr) { 2631 /* If we didn't pullup a copy already, do so now. */ 2632 /* 2633 * XXX performance, will upper-layers frequently split TCP/UDP 2634 * apart from IP or options? If so, perhaps we should revisit 2635 * the spare_mp strategy. 2636 */ 2637 ipsec_hdr_pullup_needed++; 2638 if (spare_mp == NULL && 2639 (spare_mp = msgpullup(mp, -1)) == NULL) { 2640 return (SELRET_NOMEM); 2641 } 2642 ports = (uint16_t *)&spare_mp->b_rptr[hdr_len + outer_hdr_len]; 2643 } else { 2644 ports = (uint16_t *)&mp->b_rptr[hdr_len + outer_hdr_len]; 2645 } 2646 2647 if (nexthdr == check_proto) { 2648 typecode = (uint8_t *)ports; 2649 sel->ips_icmp_type = *typecode++; 2650 sel->ips_icmp_code = *typecode; 2651 sel->ips_remote_port = sel->ips_local_port = 0; 2652 } else { 2653 sel->ips_remote_port = *ports++; 2654 sel->ips_local_port = *ports; 2655 } 2656 ipsec_freemsg_chain(spare_mp); 2657 return (SELRET_SUCCESS); 2658 } 2659 2660 static boolean_t 2661 ipsec_init_outbound_ports(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha, 2662 ip6_t *ip6h, int outer_hdr_len) 2663 { 2664 /* 2665 * XXX cut&paste shared with ipsec_init_inbound_sel 2666 */ 2667 uint16_t *ports; 2668 ushort_t hdr_len; 2669 mblk_t *spare_mp = NULL; 2670 uint8_t *nexthdrp; 2671 uint8_t nexthdr; 2672 uint8_t *typecode; 2673 uint8_t check_proto; 2674 2675 ASSERT((ipha == NULL && ip6h != NULL) || 2676 (ipha != NULL && ip6h == NULL)); 2677 2678 if (ip6h != NULL) { 2679 check_proto = IPPROTO_ICMPV6; 2680 nexthdr = ip6h->ip6_nxt; 2681 switch (nexthdr) { 2682 case IPPROTO_HOPOPTS: 2683 case IPPROTO_ROUTING: 2684 case IPPROTO_DSTOPTS: 2685 case IPPROTO_FRAGMENT: 2686 /* 2687 * Use ip_hdr_length_nexthdr_v6(). And have a spare 2688 * mblk that's contiguous to feed it 2689 */ 2690 spare_mp = msgpullup(mp, -1); 2691 if (spare_mp == NULL || 2692 !ip_hdr_length_nexthdr_v6(spare_mp, 2693 (ip6_t *)(spare_mp->b_rptr + outer_hdr_len), 2694 &hdr_len, &nexthdrp)) { 2695 /* Always works, even if NULL. */ 2696 ipsec_freemsg_chain(spare_mp); 2697 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 2698 &ipdrops_spd_nomem, &spd_dropper); 2699 return (B_FALSE); 2700 } else { 2701 nexthdr = *nexthdrp; 2702 /* We can just extract based on hdr_len now. */ 2703 } 2704 break; 2705 default: 2706 hdr_len = IPV6_HDR_LEN; 2707 break; 2708 } 2709 } else { 2710 check_proto = IPPROTO_ICMP; 2711 hdr_len = IPH_HDR_LENGTH(ipha); 2712 nexthdr = ipha->ipha_protocol; 2713 } 2714 2715 sel->ips_protocol = nexthdr; 2716 if (nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP && 2717 nexthdr != IPPROTO_SCTP && nexthdr != check_proto) { 2718 sel->ips_local_port = sel->ips_remote_port = 0; 2719 ipsec_freemsg_chain(spare_mp); /* Always works, even if NULL */ 2720 return (B_TRUE); 2721 } 2722 2723 if (&mp->b_rptr[hdr_len] + 4 + outer_hdr_len > mp->b_wptr) { 2724 /* If we didn't pullup a copy already, do so now. */ 2725 /* 2726 * XXX performance, will upper-layers frequently split TCP/UDP 2727 * apart from IP or options? If so, perhaps we should revisit 2728 * the spare_mp strategy. 2729 * 2730 * XXX should this be msgpullup(mp, hdr_len+4) ??? 2731 */ 2732 if (spare_mp == NULL && 2733 (spare_mp = msgpullup(mp, -1)) == NULL) { 2734 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 2735 &ipdrops_spd_nomem, &spd_dropper); 2736 return (B_FALSE); 2737 } 2738 ports = (uint16_t *)&spare_mp->b_rptr[hdr_len + outer_hdr_len]; 2739 } else { 2740 ports = (uint16_t *)&mp->b_rptr[hdr_len + outer_hdr_len]; 2741 } 2742 2743 if (nexthdr == check_proto) { 2744 typecode = (uint8_t *)ports; 2745 sel->ips_icmp_type = *typecode++; 2746 sel->ips_icmp_code = *typecode; 2747 sel->ips_remote_port = sel->ips_local_port = 0; 2748 } else { 2749 sel->ips_local_port = *ports++; 2750 sel->ips_remote_port = *ports; 2751 } 2752 ipsec_freemsg_chain(spare_mp); /* Always works, even if NULL */ 2753 return (B_TRUE); 2754 } 2755 2756 /* 2757 * Create an ipsec_action_t based on the way an inbound packet was protected. 2758 * Used to reflect traffic back to a sender. 2759 * 2760 * We don't bother interning the action into the hash table. 2761 */ 2762 ipsec_action_t * 2763 ipsec_in_to_out_action(ipsec_in_t *ii) 2764 { 2765 ipsa_t *ah_assoc, *esp_assoc; 2766 uint_t auth_alg = 0, encr_alg = 0, espa_alg = 0; 2767 ipsec_action_t *ap; 2768 boolean_t unique; 2769 2770 ap = kmem_cache_alloc(ipsec_action_cache, KM_NOSLEEP); 2771 2772 if (ap == NULL) 2773 return (NULL); 2774 2775 bzero(ap, sizeof (*ap)); 2776 HASH_NULL(ap, ipa_hash); 2777 ap->ipa_next = NULL; 2778 ap->ipa_refs = 1; 2779 2780 /* 2781 * Get the algorithms that were used for this packet. 2782 */ 2783 ap->ipa_act.ipa_type = IPSEC_ACT_APPLY; 2784 ap->ipa_act.ipa_log = 0; 2785 ah_assoc = ii->ipsec_in_ah_sa; 2786 ap->ipa_act.ipa_apply.ipp_use_ah = (ah_assoc != NULL); 2787 2788 esp_assoc = ii->ipsec_in_esp_sa; 2789 ap->ipa_act.ipa_apply.ipp_use_esp = (esp_assoc != NULL); 2790 2791 if (esp_assoc != NULL) { 2792 encr_alg = esp_assoc->ipsa_encr_alg; 2793 espa_alg = esp_assoc->ipsa_auth_alg; 2794 ap->ipa_act.ipa_apply.ipp_use_espa = (espa_alg != 0); 2795 } 2796 if (ah_assoc != NULL) 2797 auth_alg = ah_assoc->ipsa_auth_alg; 2798 2799 ap->ipa_act.ipa_apply.ipp_encr_alg = (uint8_t)encr_alg; 2800 ap->ipa_act.ipa_apply.ipp_auth_alg = (uint8_t)auth_alg; 2801 ap->ipa_act.ipa_apply.ipp_esp_auth_alg = (uint8_t)espa_alg; 2802 ap->ipa_act.ipa_apply.ipp_use_se = ii->ipsec_in_decaps; 2803 unique = B_FALSE; 2804 2805 if (esp_assoc != NULL) { 2806 ap->ipa_act.ipa_apply.ipp_espa_minbits = 2807 esp_assoc->ipsa_authkeybits; 2808 ap->ipa_act.ipa_apply.ipp_espa_maxbits = 2809 esp_assoc->ipsa_authkeybits; 2810 ap->ipa_act.ipa_apply.ipp_espe_minbits = 2811 esp_assoc->ipsa_encrkeybits; 2812 ap->ipa_act.ipa_apply.ipp_espe_maxbits = 2813 esp_assoc->ipsa_encrkeybits; 2814 ap->ipa_act.ipa_apply.ipp_km_proto = esp_assoc->ipsa_kmp; 2815 ap->ipa_act.ipa_apply.ipp_km_cookie = esp_assoc->ipsa_kmc; 2816 if (esp_assoc->ipsa_flags & IPSA_F_UNIQUE) 2817 unique = B_TRUE; 2818 } 2819 if (ah_assoc != NULL) { 2820 ap->ipa_act.ipa_apply.ipp_ah_minbits = 2821 ah_assoc->ipsa_authkeybits; 2822 ap->ipa_act.ipa_apply.ipp_ah_maxbits = 2823 ah_assoc->ipsa_authkeybits; 2824 ap->ipa_act.ipa_apply.ipp_km_proto = ah_assoc->ipsa_kmp; 2825 ap->ipa_act.ipa_apply.ipp_km_cookie = ah_assoc->ipsa_kmc; 2826 if (ah_assoc->ipsa_flags & IPSA_F_UNIQUE) 2827 unique = B_TRUE; 2828 } 2829 ap->ipa_act.ipa_apply.ipp_use_unique = unique; 2830 ap->ipa_want_unique = unique; 2831 ap->ipa_allow_clear = B_FALSE; 2832 ap->ipa_want_se = ii->ipsec_in_decaps; 2833 ap->ipa_want_ah = (ah_assoc != NULL); 2834 ap->ipa_want_esp = (esp_assoc != NULL); 2835 2836 ap->ipa_ovhd = ipsec_act_ovhd(&ap->ipa_act); 2837 2838 ap->ipa_act.ipa_apply.ipp_replay_depth = 0; /* don't care */ 2839 2840 return (ap); 2841 } 2842 2843 2844 /* 2845 * Compute the worst-case amount of extra space required by an action. 2846 * Note that, because of the ESP considerations listed below, this is 2847 * actually not the same as the best-case reduction in the MTU; in the 2848 * future, we should pass additional information to this function to 2849 * allow the actual MTU impact to be computed. 2850 * 2851 * AH: Revisit this if we implement algorithms with 2852 * a verifier size of more than 12 bytes. 2853 * 2854 * ESP: A more exact but more messy computation would take into 2855 * account the interaction between the cipher block size and the 2856 * effective MTU, yielding the inner payload size which reflects a 2857 * packet with *minimum* ESP padding.. 2858 */ 2859 int32_t 2860 ipsec_act_ovhd(const ipsec_act_t *act) 2861 { 2862 int32_t overhead = 0; 2863 2864 if (act->ipa_type == IPSEC_ACT_APPLY) { 2865 const ipsec_prot_t *ipp = &act->ipa_apply; 2866 2867 if (ipp->ipp_use_ah) 2868 overhead += IPSEC_MAX_AH_HDR_SIZE; 2869 if (ipp->ipp_use_esp) { 2870 overhead += IPSEC_MAX_ESP_HDR_SIZE; 2871 overhead += sizeof (struct udphdr); 2872 } 2873 if (ipp->ipp_use_se) 2874 overhead += IP_SIMPLE_HDR_LENGTH; 2875 } 2876 return (overhead); 2877 } 2878 2879 /* 2880 * This hash function is used only when creating policies and thus is not 2881 * performance-critical for packet flows. 2882 * 2883 * Future work: canonicalize the structures hashed with this (i.e., 2884 * zeroize padding) so the hash works correctly. 2885 */ 2886 /* ARGSUSED */ 2887 static uint32_t 2888 policy_hash(int size, const void *start, const void *end) 2889 { 2890 return (0); 2891 } 2892 2893 2894 /* 2895 * Hash function macros for each address type. 2896 * 2897 * The IPV6 hash function assumes that the low order 32-bits of the 2898 * address (typically containing the low order 24 bits of the mac 2899 * address) are reasonably well-distributed. Revisit this if we run 2900 * into trouble from lots of collisions on ::1 addresses and the like 2901 * (seems unlikely). 2902 */ 2903 #define IPSEC_IPV4_HASH(a, n) ((a) % (n)) 2904 #define IPSEC_IPV6_HASH(a, n) (((a).s6_addr32[3]) % (n)) 2905 2906 /* 2907 * These two hash functions should produce coordinated values 2908 * but have slightly different roles. 2909 */ 2910 static uint32_t 2911 selkey_hash(const ipsec_selkey_t *selkey) 2912 { 2913 uint32_t valid = selkey->ipsl_valid; 2914 2915 if (!(valid & IPSL_REMOTE_ADDR)) 2916 return (IPSEC_SEL_NOHASH); 2917 2918 if (valid & IPSL_IPV4) { 2919 if (selkey->ipsl_remote_pfxlen == 32) 2920 return (IPSEC_IPV4_HASH(selkey->ipsl_remote.ipsad_v4, 2921 ipsec_spd_hashsize)); 2922 } 2923 if (valid & IPSL_IPV6) { 2924 if (selkey->ipsl_remote_pfxlen == 128) 2925 return (IPSEC_IPV6_HASH(selkey->ipsl_remote.ipsad_v6, 2926 ipsec_spd_hashsize)); 2927 } 2928 return (IPSEC_SEL_NOHASH); 2929 } 2930 2931 static uint32_t 2932 selector_hash(ipsec_selector_t *sel, ipsec_policy_root_t *root) 2933 { 2934 if (sel->ips_isv4) { 2935 return (IPSEC_IPV4_HASH(sel->ips_remote_addr_v4, 2936 root->ipr_nchains)); 2937 } 2938 return (IPSEC_IPV6_HASH(sel->ips_remote_addr_v6, root->ipr_nchains)); 2939 } 2940 2941 /* 2942 * Intern actions into the action hash table. 2943 */ 2944 ipsec_action_t * 2945 ipsec_act_find(const ipsec_act_t *a, int n) 2946 { 2947 int i; 2948 uint32_t hval; 2949 ipsec_action_t *ap; 2950 ipsec_action_t *prev = NULL; 2951 int32_t overhead, maxovhd = 0; 2952 boolean_t allow_clear = B_FALSE; 2953 boolean_t want_ah = B_FALSE; 2954 boolean_t want_esp = B_FALSE; 2955 boolean_t want_se = B_FALSE; 2956 boolean_t want_unique = B_FALSE; 2957 2958 /* 2959 * TODO: should canonicalize a[] (i.e., zeroize any padding) 2960 * so we can use a non-trivial policy_hash function. 2961 */ 2962 for (i = n-1; i >= 0; i--) { 2963 hval = policy_hash(IPSEC_ACTION_HASH_SIZE, &a[i], &a[n]); 2964 2965 HASH_LOCK(ipsec_action_hash, hval); 2966 2967 for (HASH_ITERATE(ap, ipa_hash, ipsec_action_hash, hval)) { 2968 if (bcmp(&ap->ipa_act, &a[i], sizeof (*a)) != 0) 2969 continue; 2970 if (ap->ipa_next != prev) 2971 continue; 2972 break; 2973 } 2974 if (ap != NULL) { 2975 HASH_UNLOCK(ipsec_action_hash, hval); 2976 prev = ap; 2977 continue; 2978 } 2979 /* 2980 * need to allocate a new one.. 2981 */ 2982 ap = kmem_cache_alloc(ipsec_action_cache, KM_NOSLEEP); 2983 if (ap == NULL) { 2984 HASH_UNLOCK(ipsec_action_hash, hval); 2985 if (prev != NULL) 2986 ipsec_action_free(prev); 2987 return (NULL); 2988 } 2989 HASH_INSERT(ap, ipa_hash, ipsec_action_hash, hval); 2990 2991 ap->ipa_next = prev; 2992 ap->ipa_act = a[i]; 2993 2994 overhead = ipsec_act_ovhd(&a[i]); 2995 if (maxovhd < overhead) 2996 maxovhd = overhead; 2997 2998 if ((a[i].ipa_type == IPSEC_ACT_BYPASS) || 2999 (a[i].ipa_type == IPSEC_ACT_CLEAR)) 3000 allow_clear = B_TRUE; 3001 if (a[i].ipa_type == IPSEC_ACT_APPLY) { 3002 const ipsec_prot_t *ipp = &a[i].ipa_apply; 3003 3004 ASSERT(ipp->ipp_use_ah || ipp->ipp_use_esp); 3005 want_ah |= ipp->ipp_use_ah; 3006 want_esp |= ipp->ipp_use_esp; 3007 want_se |= ipp->ipp_use_se; 3008 want_unique |= ipp->ipp_use_unique; 3009 } 3010 ap->ipa_allow_clear = allow_clear; 3011 ap->ipa_want_ah = want_ah; 3012 ap->ipa_want_esp = want_esp; 3013 ap->ipa_want_se = want_se; 3014 ap->ipa_want_unique = want_unique; 3015 ap->ipa_refs = 1; /* from the hash table */ 3016 ap->ipa_ovhd = maxovhd; 3017 if (prev) 3018 prev->ipa_refs++; 3019 prev = ap; 3020 HASH_UNLOCK(ipsec_action_hash, hval); 3021 } 3022 3023 ap->ipa_refs++; /* caller's reference */ 3024 3025 return (ap); 3026 } 3027 3028 /* 3029 * Called when refcount goes to 0, indicating that all references to this 3030 * node are gone. 3031 * 3032 * This does not unchain the action from the hash table. 3033 */ 3034 void 3035 ipsec_action_free(ipsec_action_t *ap) 3036 { 3037 for (;;) { 3038 ipsec_action_t *np = ap->ipa_next; 3039 ASSERT(ap->ipa_refs == 0); 3040 ASSERT(ap->ipa_hash.hash_pp == NULL); 3041 kmem_cache_free(ipsec_action_cache, ap); 3042 ap = np; 3043 /* Inlined IPACT_REFRELE -- avoid recursion */ 3044 if (ap == NULL) 3045 break; 3046 membar_exit(); 3047 if (atomic_add_32_nv(&(ap)->ipa_refs, -1) != 0) 3048 break; 3049 /* End inlined IPACT_REFRELE */ 3050 } 3051 } 3052 3053 /* 3054 * Periodically sweep action hash table for actions with refcount==1, and 3055 * nuke them. We cannot do this "on demand" (i.e., from IPACT_REFRELE) 3056 * because we can't close the race between another thread finding the action 3057 * in the hash table without holding the bucket lock during IPACT_REFRELE. 3058 * Instead, we run this function sporadically to clean up after ourselves; 3059 * we also set it as the "reclaim" function for the action kmem_cache. 3060 * 3061 * Note that it may take several passes of ipsec_action_gc() to free all 3062 * "stale" actions. 3063 */ 3064 /* ARGSUSED */ 3065 static void 3066 ipsec_action_reclaim(void *dummy) 3067 { 3068 int i; 3069 3070 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) { 3071 ipsec_action_t *ap, *np; 3072 3073 /* skip the lock if nobody home */ 3074 if (ipsec_action_hash[i].hash_head == NULL) 3075 continue; 3076 3077 HASH_LOCK(ipsec_action_hash, i); 3078 for (ap = ipsec_action_hash[i].hash_head; 3079 ap != NULL; ap = np) { 3080 ASSERT(ap->ipa_refs > 0); 3081 np = ap->ipa_hash.hash_next; 3082 if (ap->ipa_refs > 1) 3083 continue; 3084 HASH_UNCHAIN(ap, ipa_hash, ipsec_action_hash, i); 3085 IPACT_REFRELE(ap); 3086 } 3087 HASH_UNLOCK(ipsec_action_hash, i); 3088 } 3089 } 3090 3091 /* 3092 * Intern a selector set into the selector set hash table. 3093 * This is simpler than the actions case.. 3094 */ 3095 static ipsec_sel_t * 3096 ipsec_find_sel(ipsec_selkey_t *selkey) 3097 { 3098 ipsec_sel_t *sp; 3099 uint32_t hval, bucket; 3100 3101 /* 3102 * Exactly one AF bit should be set in selkey. 3103 */ 3104 ASSERT(!(selkey->ipsl_valid & IPSL_IPV4) ^ 3105 !(selkey->ipsl_valid & IPSL_IPV6)); 3106 3107 hval = selkey_hash(selkey); 3108 /* Set pol_hval to uninitialized until we put it in a polhead. */ 3109 selkey->ipsl_sel_hval = hval; 3110 3111 bucket = (hval == IPSEC_SEL_NOHASH) ? 0 : hval; 3112 3113 ASSERT(!HASH_LOCKED(ipsec_sel_hash, bucket)); 3114 HASH_LOCK(ipsec_sel_hash, bucket); 3115 3116 for (HASH_ITERATE(sp, ipsl_hash, ipsec_sel_hash, bucket)) { 3117 if (bcmp(&sp->ipsl_key, selkey, 3118 offsetof(ipsec_selkey_t, ipsl_pol_hval)) == 0) 3119 break; 3120 } 3121 if (sp != NULL) { 3122 sp->ipsl_refs++; 3123 3124 HASH_UNLOCK(ipsec_sel_hash, bucket); 3125 return (sp); 3126 } 3127 3128 sp = kmem_cache_alloc(ipsec_sel_cache, KM_NOSLEEP); 3129 if (sp == NULL) { 3130 HASH_UNLOCK(ipsec_sel_hash, bucket); 3131 return (NULL); 3132 } 3133 3134 HASH_INSERT(sp, ipsl_hash, ipsec_sel_hash, bucket); 3135 sp->ipsl_refs = 2; /* one for hash table, one for caller */ 3136 sp->ipsl_key = *selkey; 3137 /* Set to uninitalized and have insertion into polhead fix things. */ 3138 if (selkey->ipsl_sel_hval != IPSEC_SEL_NOHASH) 3139 sp->ipsl_key.ipsl_pol_hval = 0; 3140 else 3141 sp->ipsl_key.ipsl_pol_hval = IPSEC_SEL_NOHASH; 3142 3143 HASH_UNLOCK(ipsec_sel_hash, bucket); 3144 3145 return (sp); 3146 } 3147 3148 static void 3149 ipsec_sel_rel(ipsec_sel_t **spp) 3150 { 3151 ipsec_sel_t *sp = *spp; 3152 int hval = sp->ipsl_key.ipsl_sel_hval; 3153 *spp = NULL; 3154 3155 if (hval == IPSEC_SEL_NOHASH) 3156 hval = 0; 3157 3158 ASSERT(!HASH_LOCKED(ipsec_sel_hash, hval)); 3159 HASH_LOCK(ipsec_sel_hash, hval); 3160 if (--sp->ipsl_refs == 1) { 3161 HASH_UNCHAIN(sp, ipsl_hash, ipsec_sel_hash, hval); 3162 sp->ipsl_refs--; 3163 HASH_UNLOCK(ipsec_sel_hash, hval); 3164 ASSERT(sp->ipsl_refs == 0); 3165 kmem_cache_free(ipsec_sel_cache, sp); 3166 /* Caller unlocks */ 3167 return; 3168 } 3169 3170 HASH_UNLOCK(ipsec_sel_hash, hval); 3171 } 3172 3173 /* 3174 * Free a policy rule which we know is no longer being referenced. 3175 */ 3176 void 3177 ipsec_policy_free(ipsec_policy_t *ipp) 3178 { 3179 ASSERT(ipp->ipsp_refs == 0); 3180 ASSERT(ipp->ipsp_sel != NULL); 3181 ASSERT(ipp->ipsp_act != NULL); 3182 ipsec_sel_rel(&ipp->ipsp_sel); 3183 IPACT_REFRELE(ipp->ipsp_act); 3184 kmem_cache_free(ipsec_pol_cache, ipp); 3185 } 3186 3187 /* 3188 * Construction of new policy rules; construct a policy, and add it to 3189 * the appropriate tables. 3190 */ 3191 ipsec_policy_t * 3192 ipsec_policy_create(ipsec_selkey_t *keys, const ipsec_act_t *a, 3193 int nacts, int prio, uint64_t *index_ptr) 3194 { 3195 ipsec_action_t *ap; 3196 ipsec_sel_t *sp; 3197 ipsec_policy_t *ipp; 3198 3199 if (index_ptr == NULL) 3200 index_ptr = &ipsec_next_policy_index; 3201 3202 ipp = kmem_cache_alloc(ipsec_pol_cache, KM_NOSLEEP); 3203 ap = ipsec_act_find(a, nacts); 3204 sp = ipsec_find_sel(keys); 3205 3206 if ((ap == NULL) || (sp == NULL) || (ipp == NULL)) { 3207 if (ap != NULL) { 3208 IPACT_REFRELE(ap); 3209 } 3210 if (sp != NULL) 3211 ipsec_sel_rel(&sp); 3212 if (ipp != NULL) 3213 kmem_cache_free(ipsec_pol_cache, ipp); 3214 return (NULL); 3215 } 3216 3217 HASH_NULL(ipp, ipsp_hash); 3218 3219 ipp->ipsp_refs = 1; /* caller's reference */ 3220 ipp->ipsp_sel = sp; 3221 ipp->ipsp_act = ap; 3222 ipp->ipsp_prio = prio; /* rule priority */ 3223 ipp->ipsp_index = *index_ptr; 3224 (*index_ptr)++; 3225 3226 return (ipp); 3227 } 3228 3229 static void 3230 ipsec_update_present_flags() 3231 { 3232 boolean_t hashpol = (avl_numnodes(&system_policy.iph_rulebyid) > 0); 3233 3234 if (hashpol) { 3235 ipsec_outbound_v4_policy_present = B_TRUE; 3236 ipsec_outbound_v6_policy_present = B_TRUE; 3237 ipsec_inbound_v4_policy_present = B_TRUE; 3238 ipsec_inbound_v6_policy_present = B_TRUE; 3239 return; 3240 } 3241 3242 ipsec_outbound_v4_policy_present = (NULL != 3243 system_policy.iph_root[IPSEC_TYPE_OUTBOUND]. 3244 ipr_nonhash[IPSEC_AF_V4]); 3245 ipsec_outbound_v6_policy_present = (NULL != 3246 system_policy.iph_root[IPSEC_TYPE_OUTBOUND]. 3247 ipr_nonhash[IPSEC_AF_V6]); 3248 ipsec_inbound_v4_policy_present = (NULL != 3249 system_policy.iph_root[IPSEC_TYPE_INBOUND]. 3250 ipr_nonhash[IPSEC_AF_V4]); 3251 ipsec_inbound_v6_policy_present = (NULL != 3252 system_policy.iph_root[IPSEC_TYPE_INBOUND]. 3253 ipr_nonhash[IPSEC_AF_V6]); 3254 } 3255 3256 boolean_t 3257 ipsec_policy_delete(ipsec_policy_head_t *php, ipsec_selkey_t *keys, int dir) 3258 { 3259 ipsec_sel_t *sp; 3260 ipsec_policy_t *ip, *nip, *head; 3261 int af; 3262 ipsec_policy_root_t *pr = &php->iph_root[dir]; 3263 3264 sp = ipsec_find_sel(keys); 3265 3266 if (sp == NULL) 3267 return (B_FALSE); 3268 3269 af = (sp->ipsl_key.ipsl_valid & IPSL_IPV4) ? IPSEC_AF_V4 : IPSEC_AF_V6; 3270 3271 rw_enter(&php->iph_lock, RW_WRITER); 3272 3273 if (sp->ipsl_key.ipsl_pol_hval == IPSEC_SEL_NOHASH) { 3274 head = pr->ipr_nonhash[af]; 3275 } else { 3276 head = pr->ipr_hash[sp->ipsl_key.ipsl_pol_hval].hash_head; 3277 } 3278 3279 for (ip = head; ip != NULL; ip = nip) { 3280 nip = ip->ipsp_hash.hash_next; 3281 if (ip->ipsp_sel != sp) { 3282 continue; 3283 } 3284 3285 IPPOL_UNCHAIN(php, ip); 3286 3287 php->iph_gen++; 3288 ipsec_update_present_flags(); 3289 3290 rw_exit(&php->iph_lock); 3291 3292 ipsec_sel_rel(&sp); 3293 3294 return (B_TRUE); 3295 } 3296 3297 rw_exit(&php->iph_lock); 3298 ipsec_sel_rel(&sp); 3299 return (B_FALSE); 3300 } 3301 3302 int 3303 ipsec_policy_delete_index(ipsec_policy_head_t *php, uint64_t policy_index) 3304 { 3305 boolean_t found = B_FALSE; 3306 ipsec_policy_t ipkey; 3307 ipsec_policy_t *ip; 3308 avl_index_t where; 3309 3310 (void) memset(&ipkey, 0, sizeof (ipkey)); 3311 ipkey.ipsp_index = policy_index; 3312 3313 rw_enter(&php->iph_lock, RW_WRITER); 3314 3315 /* 3316 * We could be cleverer here about the walk. 3317 * but well, (k+1)*log(N) will do for now (k==number of matches, 3318 * N==number of table entries 3319 */ 3320 for (;;) { 3321 ip = (ipsec_policy_t *)avl_find(&php->iph_rulebyid, 3322 (void *)&ipkey, &where); 3323 ASSERT(ip == NULL); 3324 3325 ip = avl_nearest(&php->iph_rulebyid, where, AVL_AFTER); 3326 3327 if (ip == NULL) 3328 break; 3329 3330 if (ip->ipsp_index != policy_index) { 3331 ASSERT(ip->ipsp_index > policy_index); 3332 break; 3333 } 3334 3335 IPPOL_UNCHAIN(php, ip); 3336 found = B_TRUE; 3337 } 3338 3339 if (found) { 3340 php->iph_gen++; 3341 ipsec_update_present_flags(); 3342 } 3343 3344 rw_exit(&php->iph_lock); 3345 3346 return (found ? 0 : ENOENT); 3347 } 3348 3349 /* 3350 * Given a constructed ipsec_policy_t policy rule, see if it can be entered 3351 * into the correct policy ruleset. As a side-effect, it sets the hash 3352 * entries on "ipp"'s ipsp_pol_hval. 3353 * 3354 * Returns B_TRUE if it can be entered, B_FALSE if it can't be (because a 3355 * duplicate policy exists with exactly the same selectors), or an icmp 3356 * rule exists with a different encryption/authentication action. 3357 */ 3358 boolean_t 3359 ipsec_check_policy(ipsec_policy_head_t *php, ipsec_policy_t *ipp, int direction) 3360 { 3361 ipsec_policy_root_t *pr = &php->iph_root[direction]; 3362 int af = -1; 3363 ipsec_policy_t *p2, *head; 3364 uint8_t check_proto; 3365 ipsec_selkey_t *selkey = &ipp->ipsp_sel->ipsl_key; 3366 uint32_t valid = selkey->ipsl_valid; 3367 3368 if (valid & IPSL_IPV6) { 3369 ASSERT(!(valid & IPSL_IPV4)); 3370 af = IPSEC_AF_V6; 3371 check_proto = IPPROTO_ICMPV6; 3372 } else { 3373 ASSERT(valid & IPSL_IPV4); 3374 af = IPSEC_AF_V4; 3375 check_proto = IPPROTO_ICMP; 3376 } 3377 3378 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3379 3380 /* 3381 * Double-check that we don't have any duplicate selectors here. 3382 * Because selectors are interned below, we need only compare pointers 3383 * for equality. 3384 */ 3385 if (selkey->ipsl_sel_hval == IPSEC_SEL_NOHASH) { 3386 head = pr->ipr_nonhash[af]; 3387 } else { 3388 selkey->ipsl_pol_hval = 3389 (selkey->ipsl_valid & IPSL_IPV4) ? 3390 IPSEC_IPV4_HASH(selkey->ipsl_remote.ipsad_v4, 3391 pr->ipr_nchains) : 3392 IPSEC_IPV6_HASH(selkey->ipsl_remote.ipsad_v6, 3393 pr->ipr_nchains); 3394 3395 head = pr->ipr_hash[selkey->ipsl_pol_hval].hash_head; 3396 } 3397 3398 for (p2 = head; p2 != NULL; p2 = p2->ipsp_hash.hash_next) { 3399 if (p2->ipsp_sel == ipp->ipsp_sel) 3400 return (B_FALSE); 3401 } 3402 3403 /* 3404 * If it's ICMP and not a drop or pass rule, run through the ICMP 3405 * rules and make sure the action is either new or the same as any 3406 * other actions. We don't have to check the full chain because 3407 * discard and bypass will override all other actions 3408 */ 3409 3410 if (valid & IPSL_PROTOCOL && 3411 selkey->ipsl_proto == check_proto && 3412 (ipp->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_APPLY)) { 3413 3414 for (p2 = head; p2 != NULL; p2 = p2->ipsp_hash.hash_next) { 3415 3416 if (p2->ipsp_sel->ipsl_key.ipsl_valid & IPSL_PROTOCOL && 3417 p2->ipsp_sel->ipsl_key.ipsl_proto == check_proto && 3418 (p2->ipsp_act->ipa_act.ipa_type == 3419 IPSEC_ACT_APPLY)) { 3420 return (ipsec_compare_action(p2, ipp)); 3421 } 3422 } 3423 } 3424 3425 return (B_TRUE); 3426 } 3427 3428 /* 3429 * compare the action chains of two policies for equality 3430 * B_TRUE -> effective equality 3431 */ 3432 3433 static boolean_t 3434 ipsec_compare_action(ipsec_policy_t *p1, ipsec_policy_t *p2) 3435 { 3436 3437 ipsec_action_t *act1, *act2; 3438 3439 /* We have a valid rule. Let's compare the actions */ 3440 if (p1->ipsp_act == p2->ipsp_act) { 3441 /* same action. We are good */ 3442 return (B_TRUE); 3443 } 3444 3445 /* we have to walk the chain */ 3446 3447 act1 = p1->ipsp_act; 3448 act2 = p2->ipsp_act; 3449 3450 while (act1 != NULL && act2 != NULL) { 3451 3452 /* otherwise, Are we close enough? */ 3453 if (act1->ipa_allow_clear != act2->ipa_allow_clear || 3454 act1->ipa_want_ah != act2->ipa_want_ah || 3455 act1->ipa_want_esp != act2->ipa_want_esp || 3456 act1->ipa_want_se != act2->ipa_want_se) { 3457 /* Nope, we aren't */ 3458 return (B_FALSE); 3459 } 3460 3461 if (act1->ipa_want_ah) { 3462 if (act1->ipa_act.ipa_apply.ipp_auth_alg != 3463 act2->ipa_act.ipa_apply.ipp_auth_alg) { 3464 return (B_FALSE); 3465 } 3466 3467 if (act1->ipa_act.ipa_apply.ipp_ah_minbits != 3468 act2->ipa_act.ipa_apply.ipp_ah_minbits || 3469 act1->ipa_act.ipa_apply.ipp_ah_maxbits != 3470 act2->ipa_act.ipa_apply.ipp_ah_maxbits) { 3471 return (B_FALSE); 3472 } 3473 } 3474 3475 if (act1->ipa_want_esp) { 3476 if (act1->ipa_act.ipa_apply.ipp_use_esp != 3477 act2->ipa_act.ipa_apply.ipp_use_esp || 3478 act1->ipa_act.ipa_apply.ipp_use_espa != 3479 act2->ipa_act.ipa_apply.ipp_use_espa) { 3480 return (B_FALSE); 3481 } 3482 3483 if (act1->ipa_act.ipa_apply.ipp_use_esp) { 3484 if (act1->ipa_act.ipa_apply.ipp_encr_alg != 3485 act2->ipa_act.ipa_apply.ipp_encr_alg) { 3486 return (B_FALSE); 3487 } 3488 3489 if (act1->ipa_act.ipa_apply.ipp_espe_minbits != 3490 act2->ipa_act.ipa_apply.ipp_espe_minbits || 3491 act1->ipa_act.ipa_apply.ipp_espe_maxbits != 3492 act2->ipa_act.ipa_apply.ipp_espe_maxbits) { 3493 return (B_FALSE); 3494 } 3495 } 3496 3497 if (act1->ipa_act.ipa_apply.ipp_use_espa) { 3498 if (act1->ipa_act.ipa_apply.ipp_esp_auth_alg != 3499 act2->ipa_act.ipa_apply.ipp_esp_auth_alg) { 3500 return (B_FALSE); 3501 } 3502 3503 if (act1->ipa_act.ipa_apply.ipp_espa_minbits != 3504 act2->ipa_act.ipa_apply.ipp_espa_minbits || 3505 act1->ipa_act.ipa_apply.ipp_espa_maxbits != 3506 act2->ipa_act.ipa_apply.ipp_espa_maxbits) { 3507 return (B_FALSE); 3508 } 3509 } 3510 3511 } 3512 3513 act1 = act1->ipa_next; 3514 act2 = act2->ipa_next; 3515 } 3516 3517 if (act1 != NULL || act2 != NULL) { 3518 return (B_FALSE); 3519 } 3520 3521 return (B_TRUE); 3522 } 3523 3524 3525 /* 3526 * Given a constructed ipsec_policy_t policy rule, enter it into 3527 * the correct policy ruleset. 3528 * 3529 * ipsec_check_policy() is assumed to have succeeded first (to check for 3530 * duplicates). 3531 */ 3532 void 3533 ipsec_enter_policy(ipsec_policy_head_t *php, ipsec_policy_t *ipp, int direction) 3534 { 3535 ipsec_policy_root_t *pr = &php->iph_root[direction]; 3536 ipsec_selkey_t *selkey = &ipp->ipsp_sel->ipsl_key; 3537 uint32_t valid = selkey->ipsl_valid; 3538 uint32_t hval = selkey->ipsl_pol_hval; 3539 int af = -1; 3540 3541 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3542 3543 if (valid & IPSL_IPV6) { 3544 ASSERT(!(valid & IPSL_IPV4)); 3545 af = IPSEC_AF_V6; 3546 } else { 3547 ASSERT(valid & IPSL_IPV4); 3548 af = IPSEC_AF_V4; 3549 } 3550 3551 php->iph_gen++; 3552 3553 if (hval == IPSEC_SEL_NOHASH) { 3554 HASHLIST_INSERT(ipp, ipsp_hash, pr->ipr_nonhash[af]); 3555 } else { 3556 HASH_LOCK(pr->ipr_hash, hval); 3557 HASH_INSERT(ipp, ipsp_hash, pr->ipr_hash, hval); 3558 HASH_UNLOCK(pr->ipr_hash, hval); 3559 } 3560 3561 ipsec_insert_always(&php->iph_rulebyid, ipp); 3562 3563 ipsec_update_present_flags(); 3564 } 3565 3566 static void 3567 ipsec_ipr_flush(ipsec_policy_head_t *php, ipsec_policy_root_t *ipr) 3568 { 3569 ipsec_policy_t *ip, *nip; 3570 3571 int af, chain, nchain; 3572 3573 for (af = 0; af < IPSEC_NAF; af++) { 3574 for (ip = ipr->ipr_nonhash[af]; ip != NULL; ip = nip) { 3575 nip = ip->ipsp_hash.hash_next; 3576 IPPOL_UNCHAIN(php, ip); 3577 } 3578 ipr->ipr_nonhash[af] = NULL; 3579 } 3580 nchain = ipr->ipr_nchains; 3581 3582 for (chain = 0; chain < nchain; chain++) { 3583 for (ip = ipr->ipr_hash[chain].hash_head; ip != NULL; 3584 ip = nip) { 3585 nip = ip->ipsp_hash.hash_next; 3586 IPPOL_UNCHAIN(php, ip); 3587 } 3588 ipr->ipr_hash[chain].hash_head = NULL; 3589 } 3590 } 3591 3592 void 3593 ipsec_polhead_flush(ipsec_policy_head_t *php) 3594 { 3595 int dir; 3596 3597 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3598 3599 for (dir = 0; dir < IPSEC_NTYPES; dir++) 3600 ipsec_ipr_flush(php, &php->iph_root[dir]); 3601 3602 ipsec_update_present_flags(); 3603 } 3604 3605 void 3606 ipsec_polhead_free(ipsec_policy_head_t *php) 3607 { 3608 int dir; 3609 3610 ASSERT(php->iph_refs == 0); 3611 rw_enter(&php->iph_lock, RW_WRITER); 3612 ipsec_polhead_flush(php); 3613 rw_exit(&php->iph_lock); 3614 rw_destroy(&php->iph_lock); 3615 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 3616 ipsec_policy_root_t *ipr = &php->iph_root[dir]; 3617 int chain; 3618 3619 for (chain = 0; chain < ipr->ipr_nchains; chain++) 3620 mutex_destroy(&(ipr->ipr_hash[chain].hash_lock)); 3621 3622 } 3623 ipsec_polhead_free_table(php); 3624 kmem_free(php, sizeof (*php)); 3625 } 3626 3627 static void 3628 ipsec_ipr_init(ipsec_policy_root_t *ipr) 3629 { 3630 int af; 3631 3632 ipr->ipr_nchains = 0; 3633 ipr->ipr_hash = NULL; 3634 3635 for (af = 0; af < IPSEC_NAF; af++) { 3636 ipr->ipr_nonhash[af] = NULL; 3637 } 3638 } 3639 3640 ipsec_policy_head_t * 3641 ipsec_polhead_create(void) 3642 { 3643 ipsec_policy_head_t *php; 3644 3645 php = kmem_alloc(sizeof (*php), KM_NOSLEEP); 3646 if (php == NULL) 3647 return (php); 3648 3649 rw_init(&php->iph_lock, NULL, RW_DEFAULT, NULL); 3650 php->iph_refs = 1; 3651 php->iph_gen = 0; 3652 3653 ipsec_ipr_init(&php->iph_root[IPSEC_TYPE_INBOUND]); 3654 ipsec_ipr_init(&php->iph_root[IPSEC_TYPE_OUTBOUND]); 3655 3656 avl_create(&php->iph_rulebyid, ipsec_policy_cmpbyid, 3657 sizeof (ipsec_policy_t), offsetof(ipsec_policy_t, ipsp_byid)); 3658 3659 return (php); 3660 } 3661 3662 /* 3663 * Clone the policy head into a new polhead; release one reference to the 3664 * old one and return the only reference to the new one. 3665 * If the old one had a refcount of 1, just return it. 3666 */ 3667 ipsec_policy_head_t * 3668 ipsec_polhead_split(ipsec_policy_head_t *php) 3669 { 3670 ipsec_policy_head_t *nphp; 3671 3672 if (php == NULL) 3673 return (ipsec_polhead_create()); 3674 else if (php->iph_refs == 1) 3675 return (php); 3676 3677 nphp = ipsec_polhead_create(); 3678 if (nphp == NULL) 3679 return (NULL); 3680 3681 if (ipsec_copy_polhead(php, nphp) != 0) { 3682 ipsec_polhead_free(nphp); 3683 return (NULL); 3684 } 3685 IPPH_REFRELE(php); 3686 return (nphp); 3687 } 3688 3689 /* 3690 * When sending a response to a ICMP request or generating a RST 3691 * in the TCP case, the outbound packets need to go at the same level 3692 * of protection as the incoming ones i.e we associate our outbound 3693 * policy with how the packet came in. We call this after we have 3694 * accepted the incoming packet which may or may not have been in 3695 * clear and hence we are sending the reply back with the policy 3696 * matching the incoming datagram's policy. 3697 * 3698 * NOTE : This technology serves two purposes : 3699 * 3700 * 1) If we have multiple outbound policies, we send out a reply 3701 * matching with how it came in rather than matching the outbound 3702 * policy. 3703 * 3704 * 2) For assymetric policies, we want to make sure that incoming 3705 * and outgoing has the same level of protection. Assymetric 3706 * policies exist only with global policy where we may not have 3707 * both outbound and inbound at the same time. 3708 * 3709 * NOTE2: This function is called by cleartext cases, so it needs to be 3710 * in IP proper. 3711 */ 3712 boolean_t 3713 ipsec_in_to_out(mblk_t *ipsec_mp, ipha_t *ipha, ip6_t *ip6h) 3714 { 3715 ipsec_in_t *ii; 3716 ipsec_out_t *io; 3717 boolean_t v4; 3718 mblk_t *mp; 3719 boolean_t secure, attach_if; 3720 uint_t ifindex; 3721 ipsec_selector_t sel; 3722 ipsec_action_t *reflect_action = NULL; 3723 zoneid_t zoneid; 3724 3725 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 3726 3727 bzero((void*)&sel, sizeof (sel)); 3728 3729 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 3730 3731 mp = ipsec_mp->b_cont; 3732 ASSERT(mp != NULL); 3733 3734 if (ii->ipsec_in_action != NULL) { 3735 /* transfer reference.. */ 3736 reflect_action = ii->ipsec_in_action; 3737 ii->ipsec_in_action = NULL; 3738 } else if (!ii->ipsec_in_loopback) 3739 reflect_action = ipsec_in_to_out_action(ii); 3740 secure = ii->ipsec_in_secure; 3741 attach_if = ii->ipsec_in_attach_if; 3742 ifindex = ii->ipsec_in_ill_index; 3743 zoneid = ii->ipsec_in_zoneid; 3744 ASSERT(zoneid != ALL_ZONES); 3745 v4 = ii->ipsec_in_v4; 3746 3747 ipsec_in_release_refs(ii); 3748 3749 /* 3750 * The caller is going to send the datagram out which might 3751 * go on the wire or delivered locally through ip_wput_local. 3752 * 3753 * 1) If it goes out on the wire, new associations will be 3754 * obtained. 3755 * 2) If it is delivered locally, ip_wput_local will convert 3756 * this IPSEC_OUT to a IPSEC_IN looking at the requests. 3757 */ 3758 3759 io = (ipsec_out_t *)ipsec_mp->b_rptr; 3760 bzero(io, sizeof (ipsec_out_t)); 3761 io->ipsec_out_type = IPSEC_OUT; 3762 io->ipsec_out_len = sizeof (ipsec_out_t); 3763 io->ipsec_out_frtn.free_func = ipsec_out_free; 3764 io->ipsec_out_frtn.free_arg = (char *)io; 3765 io->ipsec_out_act = reflect_action; 3766 3767 if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0)) 3768 return (B_FALSE); 3769 3770 io->ipsec_out_src_port = sel.ips_local_port; 3771 io->ipsec_out_dst_port = sel.ips_remote_port; 3772 io->ipsec_out_proto = sel.ips_protocol; 3773 io->ipsec_out_icmp_type = sel.ips_icmp_type; 3774 io->ipsec_out_icmp_code = sel.ips_icmp_code; 3775 3776 /* 3777 * Don't use global policy for this, as we want 3778 * to use the same protection that was applied to the inbound packet. 3779 */ 3780 io->ipsec_out_use_global_policy = B_FALSE; 3781 io->ipsec_out_proc_begin = B_FALSE; 3782 io->ipsec_out_secure = secure; 3783 io->ipsec_out_v4 = v4; 3784 io->ipsec_out_attach_if = attach_if; 3785 io->ipsec_out_ill_index = ifindex; 3786 io->ipsec_out_zoneid = zoneid; 3787 return (B_TRUE); 3788 } 3789 3790 mblk_t * 3791 ipsec_in_tag(mblk_t *mp, mblk_t *cont) 3792 { 3793 ipsec_in_t *ii = (ipsec_in_t *)mp->b_rptr; 3794 ipsec_in_t *nii; 3795 mblk_t *nmp; 3796 frtn_t nfrtn; 3797 3798 ASSERT(ii->ipsec_in_type == IPSEC_IN); 3799 ASSERT(ii->ipsec_in_len == sizeof (ipsec_in_t)); 3800 3801 nmp = ipsec_in_alloc(ii->ipsec_in_v4); 3802 3803 ASSERT(nmp->b_datap->db_type == M_CTL); 3804 ASSERT(nmp->b_wptr == (nmp->b_rptr + sizeof (ipsec_info_t))); 3805 3806 /* 3807 * Bump refcounts. 3808 */ 3809 if (ii->ipsec_in_ah_sa != NULL) 3810 IPSA_REFHOLD(ii->ipsec_in_ah_sa); 3811 if (ii->ipsec_in_esp_sa != NULL) 3812 IPSA_REFHOLD(ii->ipsec_in_esp_sa); 3813 if (ii->ipsec_in_policy != NULL) 3814 IPPH_REFHOLD(ii->ipsec_in_policy); 3815 3816 /* 3817 * Copy everything, but preserve the free routine provided by 3818 * ipsec_in_alloc(). 3819 */ 3820 nii = (ipsec_in_t *)nmp->b_rptr; 3821 nfrtn = nii->ipsec_in_frtn; 3822 bcopy(ii, nii, sizeof (*ii)); 3823 nii->ipsec_in_frtn = nfrtn; 3824 3825 nmp->b_cont = cont; 3826 3827 return (nmp); 3828 } 3829 3830 mblk_t * 3831 ipsec_out_tag(mblk_t *mp, mblk_t *cont) 3832 { 3833 ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; 3834 ipsec_out_t *nio; 3835 mblk_t *nmp; 3836 frtn_t nfrtn; 3837 3838 ASSERT(io->ipsec_out_type == IPSEC_OUT); 3839 ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t)); 3840 3841 nmp = ipsec_alloc_ipsec_out(); 3842 if (nmp == NULL) { 3843 ip_drop_packet_chain(cont, B_FALSE, NULL, NULL, 3844 &ipdrops_spd_nomem, &spd_dropper); 3845 return (NULL); 3846 } 3847 ASSERT(nmp->b_datap->db_type == M_CTL); 3848 ASSERT(nmp->b_wptr == (nmp->b_rptr + sizeof (ipsec_info_t))); 3849 3850 /* 3851 * Bump refcounts. 3852 */ 3853 if (io->ipsec_out_ah_sa != NULL) 3854 IPSA_REFHOLD(io->ipsec_out_ah_sa); 3855 if (io->ipsec_out_esp_sa != NULL) 3856 IPSA_REFHOLD(io->ipsec_out_esp_sa); 3857 if (io->ipsec_out_polhead != NULL) 3858 IPPH_REFHOLD(io->ipsec_out_polhead); 3859 if (io->ipsec_out_policy != NULL) 3860 IPPOL_REFHOLD(io->ipsec_out_policy); 3861 if (io->ipsec_out_act != NULL) 3862 IPACT_REFHOLD(io->ipsec_out_act); 3863 if (io->ipsec_out_latch != NULL) 3864 IPLATCH_REFHOLD(io->ipsec_out_latch); 3865 if (io->ipsec_out_cred != NULL) 3866 crhold(io->ipsec_out_cred); 3867 3868 /* 3869 * Copy everything, but preserve the free routine provided by 3870 * ipsec_alloc_ipsec_out(). 3871 */ 3872 nio = (ipsec_out_t *)nmp->b_rptr; 3873 nfrtn = nio->ipsec_out_frtn; 3874 bcopy(io, nio, sizeof (*io)); 3875 nio->ipsec_out_frtn = nfrtn; 3876 3877 nmp->b_cont = cont; 3878 3879 return (nmp); 3880 } 3881 3882 static void 3883 ipsec_out_release_refs(ipsec_out_t *io) 3884 { 3885 ASSERT(io->ipsec_out_type == IPSEC_OUT); 3886 ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t)); 3887 3888 /* Note: IPSA_REFRELE is multi-line macro */ 3889 if (io->ipsec_out_ah_sa != NULL) 3890 IPSA_REFRELE(io->ipsec_out_ah_sa); 3891 if (io->ipsec_out_esp_sa != NULL) 3892 IPSA_REFRELE(io->ipsec_out_esp_sa); 3893 if (io->ipsec_out_polhead != NULL) 3894 IPPH_REFRELE(io->ipsec_out_polhead); 3895 if (io->ipsec_out_policy != NULL) 3896 IPPOL_REFRELE(io->ipsec_out_policy); 3897 if (io->ipsec_out_act != NULL) 3898 IPACT_REFRELE(io->ipsec_out_act); 3899 if (io->ipsec_out_cred != NULL) { 3900 crfree(io->ipsec_out_cred); 3901 io->ipsec_out_cred = NULL; 3902 } 3903 if (io->ipsec_out_latch) { 3904 IPLATCH_REFRELE(io->ipsec_out_latch); 3905 io->ipsec_out_latch = NULL; 3906 } 3907 } 3908 3909 static void 3910 ipsec_out_free(void *arg) 3911 { 3912 ipsec_out_t *io = (ipsec_out_t *)arg; 3913 ipsec_out_release_refs(io); 3914 kmem_cache_free(ipsec_info_cache, arg); 3915 } 3916 3917 static void 3918 ipsec_in_release_refs(ipsec_in_t *ii) 3919 { 3920 /* Note: IPSA_REFRELE is multi-line macro */ 3921 if (ii->ipsec_in_ah_sa != NULL) 3922 IPSA_REFRELE(ii->ipsec_in_ah_sa); 3923 if (ii->ipsec_in_esp_sa != NULL) 3924 IPSA_REFRELE(ii->ipsec_in_esp_sa); 3925 if (ii->ipsec_in_policy != NULL) 3926 IPPH_REFRELE(ii->ipsec_in_policy); 3927 if (ii->ipsec_in_da != NULL) { 3928 freeb(ii->ipsec_in_da); 3929 ii->ipsec_in_da = NULL; 3930 } 3931 } 3932 3933 static void 3934 ipsec_in_free(void *arg) 3935 { 3936 ipsec_in_t *ii = (ipsec_in_t *)arg; 3937 ipsec_in_release_refs(ii); 3938 kmem_cache_free(ipsec_info_cache, arg); 3939 } 3940 3941 /* 3942 * This is called only for outbound datagrams if the datagram needs to 3943 * go out secure. A NULL mp can be passed to get an ipsec_out. This 3944 * facility is used by ip_unbind. 3945 * 3946 * NOTE : o As the data part could be modified by ipsec_out_process etc. 3947 * we can't make it fast by calling a dup. 3948 */ 3949 mblk_t * 3950 ipsec_alloc_ipsec_out() 3951 { 3952 mblk_t *ipsec_mp; 3953 3954 ipsec_out_t *io = kmem_cache_alloc(ipsec_info_cache, KM_NOSLEEP); 3955 3956 if (io == NULL) 3957 return (NULL); 3958 3959 bzero(io, sizeof (ipsec_out_t)); 3960 3961 io->ipsec_out_type = IPSEC_OUT; 3962 io->ipsec_out_len = sizeof (ipsec_out_t); 3963 io->ipsec_out_frtn.free_func = ipsec_out_free; 3964 io->ipsec_out_frtn.free_arg = (char *)io; 3965 3966 /* 3967 * Set the zoneid to ALL_ZONES which is used as an invalid value. Code 3968 * using ipsec_out_zoneid should assert that the zoneid has been set to 3969 * a sane value. 3970 */ 3971 io->ipsec_out_zoneid = ALL_ZONES; 3972 3973 ipsec_mp = desballoc((uint8_t *)io, sizeof (ipsec_info_t), BPRI_HI, 3974 &io->ipsec_out_frtn); 3975 if (ipsec_mp == NULL) { 3976 ipsec_out_free(io); 3977 3978 return (NULL); 3979 } 3980 ipsec_mp->b_datap->db_type = M_CTL; 3981 ipsec_mp->b_wptr = ipsec_mp->b_rptr + sizeof (ipsec_info_t); 3982 3983 return (ipsec_mp); 3984 } 3985 3986 /* 3987 * Attach an IPSEC_OUT; use pol for policy if it is non-null. 3988 * Otherwise initialize using conn. 3989 * 3990 * If pol is non-null, we consume a reference to it. 3991 */ 3992 mblk_t * 3993 ipsec_attach_ipsec_out(mblk_t *mp, conn_t *connp, ipsec_policy_t *pol, 3994 uint8_t proto) 3995 { 3996 mblk_t *ipsec_mp; 3997 3998 ASSERT((pol != NULL) || (connp != NULL)); 3999 4000 ipsec_mp = ipsec_alloc_ipsec_out(); 4001 if (ipsec_mp == NULL) { 4002 ipsec_rl_strlog(IP_MOD_ID, 0, 0, SL_ERROR|SL_NOTE, 4003 "ipsec_attach_ipsec_out: Allocation failure\n"); 4004 BUMP_MIB(&ip_mib, ipOutDiscards); 4005 ip_drop_packet(mp, B_FALSE, NULL, NULL, &ipdrops_spd_nomem, 4006 &spd_dropper); 4007 return (NULL); 4008 } 4009 ipsec_mp->b_cont = mp; 4010 return (ipsec_init_ipsec_out(ipsec_mp, connp, pol, proto)); 4011 } 4012 4013 /* 4014 * Initialize the IPSEC_OUT (ipsec_mp) using pol if it is non-null. 4015 * Otherwise initialize using conn. 4016 * 4017 * If pol is non-null, we consume a reference to it. 4018 */ 4019 mblk_t * 4020 ipsec_init_ipsec_out(mblk_t *ipsec_mp, conn_t *connp, ipsec_policy_t *pol, 4021 uint8_t proto) 4022 { 4023 mblk_t *mp; 4024 ipsec_out_t *io; 4025 ipsec_policy_t *p; 4026 ipha_t *ipha; 4027 ip6_t *ip6h; 4028 4029 ASSERT((pol != NULL) || (connp != NULL)); 4030 4031 /* 4032 * If mp is NULL, we won't/should not be using it. 4033 */ 4034 mp = ipsec_mp->b_cont; 4035 4036 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 4037 ASSERT(ipsec_mp->b_wptr == (ipsec_mp->b_rptr + sizeof (ipsec_info_t))); 4038 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4039 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4040 ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t)); 4041 io->ipsec_out_latch = NULL; 4042 /* 4043 * Set the zoneid when we have the connp. 4044 * Otherwise, we're called from ip_wput_attach_policy() who will take 4045 * care of setting the zoneid. 4046 */ 4047 if (connp != NULL) 4048 io->ipsec_out_zoneid = connp->conn_zoneid; 4049 4050 if (mp != NULL) { 4051 ipha = (ipha_t *)mp->b_rptr; 4052 if (IPH_HDR_VERSION(ipha) == IP_VERSION) { 4053 io->ipsec_out_v4 = B_TRUE; 4054 ip6h = NULL; 4055 } else { 4056 io->ipsec_out_v4 = B_FALSE; 4057 ip6h = (ip6_t *)ipha; 4058 ipha = NULL; 4059 } 4060 } else { 4061 ASSERT(connp != NULL && connp->conn_policy_cached); 4062 ip6h = NULL; 4063 ipha = NULL; 4064 io->ipsec_out_v4 = !connp->conn_pkt_isv6; 4065 } 4066 4067 p = NULL; 4068 4069 /* 4070 * Take latched policies over global policy. Check here again for 4071 * this, in case we had conn_latch set while the packet was flying 4072 * around in IP. 4073 */ 4074 if (connp != NULL && connp->conn_latch != NULL) { 4075 p = connp->conn_latch->ipl_out_policy; 4076 io->ipsec_out_latch = connp->conn_latch; 4077 IPLATCH_REFHOLD(connp->conn_latch); 4078 if (p != NULL) { 4079 IPPOL_REFHOLD(p); 4080 } 4081 io->ipsec_out_src_port = connp->conn_lport; 4082 io->ipsec_out_dst_port = connp->conn_fport; 4083 io->ipsec_out_icmp_type = io->ipsec_out_icmp_code = 0; 4084 if (pol != NULL) 4085 IPPOL_REFRELE(pol); 4086 } else if (pol != NULL) { 4087 ipsec_selector_t sel; 4088 4089 bzero((void*)&sel, sizeof (sel)); 4090 4091 p = pol; 4092 /* 4093 * conn does not have the port information. Get 4094 * it from the packet. 4095 */ 4096 4097 if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0)) { 4098 /* Callee did ip_drop_packet(). */ 4099 return (NULL); 4100 } 4101 io->ipsec_out_src_port = sel.ips_local_port; 4102 io->ipsec_out_dst_port = sel.ips_remote_port; 4103 io->ipsec_out_icmp_type = sel.ips_icmp_type; 4104 io->ipsec_out_icmp_code = sel.ips_icmp_code; 4105 } 4106 4107 io->ipsec_out_proto = proto; 4108 io->ipsec_out_use_global_policy = B_TRUE; 4109 io->ipsec_out_secure = (p != NULL); 4110 io->ipsec_out_policy = p; 4111 4112 if (p == NULL) { 4113 if (connp->conn_policy != NULL) { 4114 io->ipsec_out_secure = B_TRUE; 4115 ASSERT(io->ipsec_out_latch == NULL); 4116 ASSERT(io->ipsec_out_use_global_policy == B_TRUE); 4117 io->ipsec_out_need_policy = B_TRUE; 4118 ASSERT(io->ipsec_out_polhead == NULL); 4119 IPPH_REFHOLD(connp->conn_policy); 4120 io->ipsec_out_polhead = connp->conn_policy; 4121 } 4122 } else { 4123 /* Handle explicit drop action. */ 4124 if (p->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_DISCARD || 4125 p->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_REJECT) { 4126 ip_drop_packet(ipsec_mp, B_FALSE, NULL, NULL, 4127 &ipdrops_spd_explicit, &spd_dropper); 4128 ipsec_mp = NULL; 4129 } 4130 } 4131 4132 return (ipsec_mp); 4133 } 4134 4135 /* 4136 * Allocate an IPSEC_IN mblk. This will be prepended to an inbound datagram 4137 * and keep track of what-if-any IPsec processing will be applied to the 4138 * datagram. 4139 */ 4140 mblk_t * 4141 ipsec_in_alloc(boolean_t isv4) 4142 { 4143 mblk_t *ipsec_in; 4144 ipsec_in_t *ii = kmem_cache_alloc(ipsec_info_cache, KM_NOSLEEP); 4145 4146 if (ii == NULL) 4147 return (NULL); 4148 4149 bzero(ii, sizeof (ipsec_info_t)); 4150 ii->ipsec_in_type = IPSEC_IN; 4151 ii->ipsec_in_len = sizeof (ipsec_in_t); 4152 4153 ii->ipsec_in_v4 = isv4; 4154 ii->ipsec_in_secure = B_TRUE; 4155 4156 ii->ipsec_in_frtn.free_func = ipsec_in_free; 4157 ii->ipsec_in_frtn.free_arg = (char *)ii; 4158 4159 ipsec_in = desballoc((uint8_t *)ii, sizeof (ipsec_info_t), BPRI_HI, 4160 &ii->ipsec_in_frtn); 4161 if (ipsec_in == NULL) { 4162 ip1dbg(("ipsec_in_alloc: IPSEC_IN allocation failure.\n")); 4163 ipsec_in_free(ii); 4164 return (NULL); 4165 } 4166 4167 ipsec_in->b_datap->db_type = M_CTL; 4168 ipsec_in->b_wptr += sizeof (ipsec_info_t); 4169 4170 return (ipsec_in); 4171 } 4172 4173 /* 4174 * This is called from ip_wput_local when a packet which needs 4175 * security is looped back, to convert the IPSEC_OUT to a IPSEC_IN 4176 * before fanout, where the policy check happens. In most of the 4177 * cases, IPSEC processing has *never* been done. There is one case 4178 * (ip_wput_ire_fragmentit -> ip_wput_frag -> icmp_frag_needed) where 4179 * the packet is destined for localhost, IPSEC processing has already 4180 * been done. 4181 * 4182 * Future: This could happen after SA selection has occurred for 4183 * outbound.. which will tell us who the src and dst identities are.. 4184 * Then it's just a matter of splicing the ah/esp SA pointers from the 4185 * ipsec_out_t to the ipsec_in_t. 4186 */ 4187 void 4188 ipsec_out_to_in(mblk_t *ipsec_mp) 4189 { 4190 ipsec_in_t *ii; 4191 ipsec_out_t *io; 4192 ipsec_policy_t *pol; 4193 ipsec_action_t *act; 4194 boolean_t v4, icmp_loopback; 4195 zoneid_t zoneid; 4196 4197 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 4198 4199 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4200 4201 v4 = io->ipsec_out_v4; 4202 zoneid = io->ipsec_out_zoneid; 4203 icmp_loopback = io->ipsec_out_icmp_loopback; 4204 4205 act = io->ipsec_out_act; 4206 if (act == NULL) { 4207 pol = io->ipsec_out_policy; 4208 if (pol != NULL) { 4209 act = pol->ipsp_act; 4210 IPACT_REFHOLD(act); 4211 } 4212 } 4213 io->ipsec_out_act = NULL; 4214 4215 ipsec_out_release_refs(io); 4216 4217 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 4218 bzero(ii, sizeof (ipsec_in_t)); 4219 ii->ipsec_in_type = IPSEC_IN; 4220 ii->ipsec_in_len = sizeof (ipsec_in_t); 4221 ii->ipsec_in_loopback = B_TRUE; 4222 ii->ipsec_in_frtn.free_func = ipsec_in_free; 4223 ii->ipsec_in_frtn.free_arg = (char *)ii; 4224 ii->ipsec_in_action = act; 4225 ii->ipsec_in_zoneid = zoneid; 4226 4227 /* 4228 * In most of the cases, we can't look at the ipsec_out_XXX_sa 4229 * because this never went through IPSEC processing. So, look at 4230 * the requests and infer whether it would have gone through 4231 * IPSEC processing or not. Initialize the "done" fields with 4232 * the requests. The possible values for "done" fields are : 4233 * 4234 * 1) zero, indicates that a particular preference was never 4235 * requested. 4236 * 2) non-zero, indicates that it could be IPSEC_PREF_REQUIRED/ 4237 * IPSEC_PREF_NEVER. If IPSEC_REQ_DONE is set, it means that 4238 * IPSEC processing has been completed. 4239 */ 4240 ii->ipsec_in_secure = B_TRUE; 4241 ii->ipsec_in_v4 = v4; 4242 ii->ipsec_in_icmp_loopback = icmp_loopback; 4243 ii->ipsec_in_attach_if = B_FALSE; 4244 } 4245 4246 /* 4247 * Consults global policy to see whether this datagram should 4248 * go out secure. If so it attaches a ipsec_mp in front and 4249 * returns. 4250 */ 4251 mblk_t * 4252 ip_wput_attach_policy(mblk_t *ipsec_mp, ipha_t *ipha, ip6_t *ip6h, ire_t *ire, 4253 conn_t *connp, boolean_t unspec_src, zoneid_t zoneid) 4254 { 4255 mblk_t *mp; 4256 ipsec_out_t *io = NULL; 4257 ipsec_selector_t sel; 4258 uint_t ill_index; 4259 boolean_t conn_dontroutex; 4260 boolean_t conn_multicast_loopx; 4261 boolean_t policy_present; 4262 4263 ASSERT((ipha != NULL && ip6h == NULL) || 4264 (ip6h != NULL && ipha == NULL)); 4265 4266 bzero((void*)&sel, sizeof (sel)); 4267 4268 if (ipha != NULL) 4269 policy_present = ipsec_outbound_v4_policy_present; 4270 else 4271 policy_present = ipsec_outbound_v6_policy_present; 4272 /* 4273 * Fast Path to see if there is any policy. 4274 */ 4275 if (!policy_present) { 4276 if (ipsec_mp->b_datap->db_type == M_CTL) { 4277 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4278 if (!io->ipsec_out_secure) { 4279 /* 4280 * If there is no global policy and ip_wput 4281 * or ip_wput_multicast has attached this mp 4282 * for multicast case, free the ipsec_mp and 4283 * return the original mp. 4284 */ 4285 mp = ipsec_mp->b_cont; 4286 freeb(ipsec_mp); 4287 ipsec_mp = mp; 4288 io = NULL; 4289 } 4290 ASSERT(io == NULL || !io->ipsec_out_tunnel); 4291 } 4292 if (((io == NULL) || (io->ipsec_out_polhead == NULL)) && 4293 ((connp == NULL) || (connp->conn_policy == NULL))) 4294 return (ipsec_mp); 4295 } 4296 4297 ill_index = 0; 4298 conn_multicast_loopx = conn_dontroutex = B_FALSE; 4299 mp = ipsec_mp; 4300 if (ipsec_mp->b_datap->db_type == M_CTL) { 4301 mp = ipsec_mp->b_cont; 4302 /* 4303 * This is a connection where we have some per-socket 4304 * policy or ip_wput has attached an ipsec_mp for 4305 * the multicast datagram. 4306 */ 4307 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4308 if (!io->ipsec_out_secure) { 4309 /* 4310 * This ipsec_mp was allocated in ip_wput or 4311 * ip_wput_multicast so that we will know the 4312 * value of ill_index, conn_dontroute, 4313 * conn_multicast_loop in the multicast case if 4314 * we inherit global policy here. 4315 */ 4316 ill_index = io->ipsec_out_ill_index; 4317 conn_dontroutex = io->ipsec_out_dontroute; 4318 conn_multicast_loopx = io->ipsec_out_multicast_loop; 4319 freeb(ipsec_mp); 4320 ipsec_mp = mp; 4321 io = NULL; 4322 } 4323 ASSERT(io == NULL || !io->ipsec_out_tunnel); 4324 } 4325 4326 if (ipha != NULL) { 4327 sel.ips_local_addr_v4 = (ipha->ipha_src != 0 ? 4328 ipha->ipha_src : ire->ire_src_addr); 4329 sel.ips_remote_addr_v4 = ip_get_dst(ipha); 4330 sel.ips_protocol = (uint8_t)ipha->ipha_protocol; 4331 sel.ips_isv4 = B_TRUE; 4332 } else { 4333 ushort_t hdr_len; 4334 uint8_t *nexthdrp; 4335 boolean_t is_fragment; 4336 4337 sel.ips_isv4 = B_FALSE; 4338 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4339 if (!unspec_src) 4340 sel.ips_local_addr_v6 = ire->ire_src_addr_v6; 4341 } else { 4342 sel.ips_local_addr_v6 = ip6h->ip6_src; 4343 } 4344 4345 sel.ips_remote_addr_v6 = ip_get_dst_v6(ip6h, &is_fragment); 4346 if (is_fragment) { 4347 /* 4348 * It's a packet fragment for a packet that 4349 * we have already processed (since IPsec processing 4350 * is done before fragmentation), so we don't 4351 * have to do policy checks again. Fragments can 4352 * come back to us for processing if they have 4353 * been queued up due to flow control. 4354 */ 4355 if (ipsec_mp->b_datap->db_type == M_CTL) { 4356 mp = ipsec_mp->b_cont; 4357 freeb(ipsec_mp); 4358 ipsec_mp = mp; 4359 } 4360 return (ipsec_mp); 4361 } 4362 4363 /* IPv6 common-case. */ 4364 sel.ips_protocol = ip6h->ip6_nxt; 4365 switch (ip6h->ip6_nxt) { 4366 case IPPROTO_TCP: 4367 case IPPROTO_UDP: 4368 case IPPROTO_SCTP: 4369 case IPPROTO_ICMPV6: 4370 break; 4371 default: 4372 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 4373 &hdr_len, &nexthdrp)) { 4374 BUMP_MIB(&ip6_mib, ipv6OutDiscards); 4375 freemsg(ipsec_mp); /* Not IPsec-related drop. */ 4376 return (NULL); 4377 } 4378 sel.ips_protocol = *nexthdrp; 4379 break; 4380 } 4381 } 4382 4383 if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0)) { 4384 if (ipha != NULL) { 4385 BUMP_MIB(&ip_mib, ipOutDiscards); 4386 } else { 4387 BUMP_MIB(&ip6_mib, ipv6OutDiscards); 4388 } 4389 4390 /* Callee dropped the packet. */ 4391 return (NULL); 4392 } 4393 4394 if (io != NULL) { 4395 /* 4396 * We seem to have some local policy (we already have 4397 * an ipsec_out). Look at global policy and see 4398 * whether we have to inherit or not. 4399 */ 4400 io->ipsec_out_need_policy = B_FALSE; 4401 ipsec_mp = ipsec_apply_global_policy(ipsec_mp, connp, &sel); 4402 ASSERT((io->ipsec_out_policy != NULL) || 4403 (io->ipsec_out_act != NULL)); 4404 ASSERT(io->ipsec_out_need_policy == B_FALSE); 4405 return (ipsec_mp); 4406 } 4407 ipsec_mp = ipsec_attach_global_policy(mp, connp, &sel); 4408 if (ipsec_mp == NULL) 4409 return (mp); 4410 4411 /* 4412 * Copy the right port information. 4413 */ 4414 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 4415 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4416 4417 ASSERT(io->ipsec_out_need_policy == B_FALSE); 4418 ASSERT((io->ipsec_out_policy != NULL) || 4419 (io->ipsec_out_act != NULL)); 4420 io->ipsec_out_src_port = sel.ips_local_port; 4421 io->ipsec_out_dst_port = sel.ips_remote_port; 4422 io->ipsec_out_icmp_type = sel.ips_icmp_type; 4423 io->ipsec_out_icmp_code = sel.ips_icmp_code; 4424 /* 4425 * Set ill_index, conn_dontroute and conn_multicast_loop 4426 * for multicast datagrams. 4427 */ 4428 io->ipsec_out_ill_index = ill_index; 4429 io->ipsec_out_dontroute = conn_dontroutex; 4430 io->ipsec_out_multicast_loop = conn_multicast_loopx; 4431 4432 if (zoneid == ALL_ZONES) 4433 zoneid = GLOBAL_ZONEID; 4434 io->ipsec_out_zoneid = zoneid; 4435 return (ipsec_mp); 4436 } 4437 4438 /* 4439 * When appropriate, this function caches inbound and outbound policy 4440 * for this connection. 4441 * 4442 * XXX need to work out more details about per-interface policy and 4443 * caching here! 4444 * 4445 * XXX may want to split inbound and outbound caching for ill.. 4446 */ 4447 int 4448 ipsec_conn_cache_policy(conn_t *connp, boolean_t isv4) 4449 { 4450 boolean_t global_policy_present; 4451 4452 /* 4453 * There is no policy latching for ICMP sockets because we can't 4454 * decide on which policy to use until we see the packet and get 4455 * type/code selectors. 4456 */ 4457 if (connp->conn_ulp == IPPROTO_ICMP || 4458 connp->conn_ulp == IPPROTO_ICMPV6) { 4459 connp->conn_in_enforce_policy = 4460 connp->conn_out_enforce_policy = B_TRUE; 4461 if (connp->conn_latch != NULL) { 4462 IPLATCH_REFRELE(connp->conn_latch); 4463 connp->conn_latch = NULL; 4464 } 4465 connp->conn_flags |= IPCL_CHECK_POLICY; 4466 return (0); 4467 } 4468 4469 global_policy_present = isv4 ? 4470 (ipsec_outbound_v4_policy_present || 4471 ipsec_inbound_v4_policy_present) : 4472 (ipsec_outbound_v6_policy_present || 4473 ipsec_inbound_v6_policy_present); 4474 4475 if ((connp->conn_policy != NULL) || global_policy_present) { 4476 ipsec_selector_t sel; 4477 ipsec_policy_t *p; 4478 4479 if (connp->conn_latch == NULL && 4480 (connp->conn_latch = iplatch_create()) == NULL) { 4481 return (ENOMEM); 4482 } 4483 4484 sel.ips_protocol = connp->conn_ulp; 4485 sel.ips_local_port = connp->conn_lport; 4486 sel.ips_remote_port = connp->conn_fport; 4487 sel.ips_is_icmp_inv_acq = 0; 4488 sel.ips_isv4 = isv4; 4489 if (isv4) { 4490 sel.ips_local_addr_v4 = connp->conn_src; 4491 sel.ips_remote_addr_v4 = connp->conn_rem; 4492 } else { 4493 sel.ips_local_addr_v6 = connp->conn_srcv6; 4494 sel.ips_remote_addr_v6 = connp->conn_remv6; 4495 } 4496 4497 p = ipsec_find_policy(IPSEC_TYPE_INBOUND, connp, NULL, &sel); 4498 if (connp->conn_latch->ipl_in_policy != NULL) 4499 IPPOL_REFRELE(connp->conn_latch->ipl_in_policy); 4500 connp->conn_latch->ipl_in_policy = p; 4501 connp->conn_in_enforce_policy = (p != NULL); 4502 4503 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, &sel); 4504 if (connp->conn_latch->ipl_out_policy != NULL) 4505 IPPOL_REFRELE(connp->conn_latch->ipl_out_policy); 4506 connp->conn_latch->ipl_out_policy = p; 4507 connp->conn_out_enforce_policy = (p != NULL); 4508 4509 /* Clear the latched actions too, in case we're recaching. */ 4510 if (connp->conn_latch->ipl_out_action != NULL) 4511 IPACT_REFRELE(connp->conn_latch->ipl_out_action); 4512 if (connp->conn_latch->ipl_in_action != NULL) 4513 IPACT_REFRELE(connp->conn_latch->ipl_in_action); 4514 } 4515 4516 /* 4517 * We may or may not have policy for this endpoint. We still set 4518 * conn_policy_cached so that inbound datagrams don't have to look 4519 * at global policy as policy is considered latched for these 4520 * endpoints. We should not set conn_policy_cached until the conn 4521 * reflects the actual policy. If we *set* this before inheriting 4522 * the policy there is a window where the check 4523 * CONN_INBOUND_POLICY_PRESENT, will neither check with the policy 4524 * on the conn (because we have not yet copied the policy on to 4525 * conn and hence not set conn_in_enforce_policy) nor with the 4526 * global policy (because conn_policy_cached is already set). 4527 */ 4528 connp->conn_policy_cached = B_TRUE; 4529 if (connp->conn_in_enforce_policy) 4530 connp->conn_flags |= IPCL_CHECK_POLICY; 4531 return (0); 4532 } 4533 4534 void 4535 iplatch_free(ipsec_latch_t *ipl) 4536 { 4537 if (ipl->ipl_out_policy != NULL) 4538 IPPOL_REFRELE(ipl->ipl_out_policy); 4539 if (ipl->ipl_in_policy != NULL) 4540 IPPOL_REFRELE(ipl->ipl_in_policy); 4541 if (ipl->ipl_in_action != NULL) 4542 IPACT_REFRELE(ipl->ipl_in_action); 4543 if (ipl->ipl_out_action != NULL) 4544 IPACT_REFRELE(ipl->ipl_out_action); 4545 if (ipl->ipl_local_cid != NULL) 4546 IPSID_REFRELE(ipl->ipl_local_cid); 4547 if (ipl->ipl_remote_cid != NULL) 4548 IPSID_REFRELE(ipl->ipl_remote_cid); 4549 if (ipl->ipl_local_id != NULL) 4550 crfree(ipl->ipl_local_id); 4551 mutex_destroy(&ipl->ipl_lock); 4552 kmem_free(ipl, sizeof (*ipl)); 4553 } 4554 4555 ipsec_latch_t * 4556 iplatch_create() 4557 { 4558 ipsec_latch_t *ipl = kmem_alloc(sizeof (*ipl), KM_NOSLEEP); 4559 if (ipl == NULL) 4560 return (ipl); 4561 bzero(ipl, sizeof (*ipl)); 4562 mutex_init(&ipl->ipl_lock, NULL, MUTEX_DEFAULT, NULL); 4563 ipl->ipl_refcnt = 1; 4564 return (ipl); 4565 } 4566 4567 /* 4568 * Identity hash table. 4569 * 4570 * Identities are refcounted and "interned" into the hash table. 4571 * Only references coming from other objects (SA's, latching state) 4572 * are counted in ipsid_refcnt. 4573 * 4574 * Locking: IPSID_REFHOLD is safe only when (a) the object's hash bucket 4575 * is locked, (b) we know that the refcount must be > 0. 4576 * 4577 * The ipsid_next and ipsid_ptpn fields are only to be referenced or 4578 * modified when the bucket lock is held; in particular, we only 4579 * delete objects while holding the bucket lock, and we only increase 4580 * the refcount from 0 to 1 while the bucket lock is held. 4581 */ 4582 4583 #define IPSID_HASHSIZE 64 4584 4585 typedef struct ipsif_s 4586 { 4587 ipsid_t *ipsif_head; 4588 kmutex_t ipsif_lock; 4589 } ipsif_t; 4590 4591 ipsif_t ipsid_buckets[IPSID_HASHSIZE]; 4592 4593 /* 4594 * Hash function for ID hash table. 4595 */ 4596 static uint32_t 4597 ipsid_hash(int idtype, char *idstring) 4598 { 4599 uint32_t hval = idtype; 4600 unsigned char c; 4601 4602 while ((c = *idstring++) != 0) { 4603 hval = (hval << 4) | (hval >> 28); 4604 hval ^= c; 4605 } 4606 hval = hval ^ (hval >> 16); 4607 return (hval & (IPSID_HASHSIZE-1)); 4608 } 4609 4610 /* 4611 * Look up identity string in hash table. Return identity object 4612 * corresponding to the name -- either preexisting, or newly allocated. 4613 * 4614 * Return NULL if we need to allocate a new one and can't get memory. 4615 */ 4616 ipsid_t * 4617 ipsid_lookup(int idtype, char *idstring) 4618 { 4619 ipsid_t *retval; 4620 char *nstr; 4621 int idlen = strlen(idstring) + 1; 4622 4623 ipsif_t *bucket = &ipsid_buckets[ipsid_hash(idtype, idstring)]; 4624 4625 mutex_enter(&bucket->ipsif_lock); 4626 4627 for (retval = bucket->ipsif_head; retval != NULL; 4628 retval = retval->ipsid_next) { 4629 if (idtype != retval->ipsid_type) 4630 continue; 4631 if (bcmp(idstring, retval->ipsid_cid, idlen) != 0) 4632 continue; 4633 4634 IPSID_REFHOLD(retval); 4635 mutex_exit(&bucket->ipsif_lock); 4636 return (retval); 4637 } 4638 4639 retval = kmem_alloc(sizeof (*retval), KM_NOSLEEP); 4640 if (!retval) { 4641 mutex_exit(&bucket->ipsif_lock); 4642 return (NULL); 4643 } 4644 4645 nstr = kmem_alloc(idlen, KM_NOSLEEP); 4646 if (!nstr) { 4647 mutex_exit(&bucket->ipsif_lock); 4648 kmem_free(retval, sizeof (*retval)); 4649 return (NULL); 4650 } 4651 4652 retval->ipsid_refcnt = 1; 4653 retval->ipsid_next = bucket->ipsif_head; 4654 if (retval->ipsid_next != NULL) 4655 retval->ipsid_next->ipsid_ptpn = &retval->ipsid_next; 4656 retval->ipsid_ptpn = &bucket->ipsif_head; 4657 retval->ipsid_type = idtype; 4658 retval->ipsid_cid = nstr; 4659 bucket->ipsif_head = retval; 4660 bcopy(idstring, nstr, idlen); 4661 mutex_exit(&bucket->ipsif_lock); 4662 4663 return (retval); 4664 } 4665 4666 /* 4667 * Garbage collect the identity hash table. 4668 */ 4669 void 4670 ipsid_gc() 4671 { 4672 int i, len; 4673 ipsid_t *id, *nid; 4674 ipsif_t *bucket; 4675 4676 for (i = 0; i < IPSID_HASHSIZE; i++) { 4677 bucket = &ipsid_buckets[i]; 4678 mutex_enter(&bucket->ipsif_lock); 4679 for (id = bucket->ipsif_head; id != NULL; id = nid) { 4680 nid = id->ipsid_next; 4681 if (id->ipsid_refcnt == 0) { 4682 *id->ipsid_ptpn = nid; 4683 if (nid != NULL) 4684 nid->ipsid_ptpn = id->ipsid_ptpn; 4685 len = strlen(id->ipsid_cid) + 1; 4686 kmem_free(id->ipsid_cid, len); 4687 kmem_free(id, sizeof (*id)); 4688 } 4689 } 4690 mutex_exit(&bucket->ipsif_lock); 4691 } 4692 } 4693 4694 /* 4695 * Return true if two identities are the same. 4696 */ 4697 boolean_t 4698 ipsid_equal(ipsid_t *id1, ipsid_t *id2) 4699 { 4700 if (id1 == id2) 4701 return (B_TRUE); 4702 #ifdef DEBUG 4703 if ((id1 == NULL) || (id2 == NULL)) 4704 return (B_FALSE); 4705 /* 4706 * test that we're interning id's correctly.. 4707 */ 4708 ASSERT((strcmp(id1->ipsid_cid, id2->ipsid_cid) != 0) || 4709 (id1->ipsid_type != id2->ipsid_type)); 4710 #endif 4711 return (B_FALSE); 4712 } 4713 4714 /* 4715 * Initialize identity table; called during module initialization. 4716 */ 4717 static void 4718 ipsid_init() 4719 { 4720 ipsif_t *bucket; 4721 int i; 4722 4723 for (i = 0; i < IPSID_HASHSIZE; i++) { 4724 bucket = &ipsid_buckets[i]; 4725 mutex_init(&bucket->ipsif_lock, NULL, MUTEX_DEFAULT, NULL); 4726 } 4727 } 4728 4729 /* 4730 * Free identity table (preparatory to module unload) 4731 */ 4732 static void 4733 ipsid_fini() 4734 { 4735 ipsif_t *bucket; 4736 int i; 4737 4738 for (i = 0; i < IPSID_HASHSIZE; i++) { 4739 bucket = &ipsid_buckets[i]; 4740 mutex_destroy(&bucket->ipsif_lock); 4741 } 4742 } 4743 4744 /* 4745 * Update the minimum and maximum supported key sizes for the 4746 * specified algorithm. Must be called while holding the algorithms lock. 4747 */ 4748 void 4749 ipsec_alg_fix_min_max(ipsec_alginfo_t *alg, ipsec_algtype_t alg_type) 4750 { 4751 size_t crypto_min = (size_t)-1, crypto_max = 0; 4752 size_t cur_crypto_min, cur_crypto_max; 4753 boolean_t is_valid; 4754 crypto_mechanism_info_t *mech_infos; 4755 uint_t nmech_infos; 4756 int crypto_rc, i; 4757 crypto_mech_usage_t mask; 4758 4759 ASSERT(MUTEX_HELD(&alg_lock)); 4760 4761 /* 4762 * Compute the min, max, and default key sizes (in number of 4763 * increments to the default key size in bits) as defined 4764 * by the algorithm mappings. This range of key sizes is used 4765 * for policy related operations. The effective key sizes 4766 * supported by the framework could be more limited than 4767 * those defined for an algorithm. 4768 */ 4769 alg->alg_default_bits = alg->alg_key_sizes[0]; 4770 if (alg->alg_increment != 0) { 4771 /* key sizes are defined by range & increment */ 4772 alg->alg_minbits = alg->alg_key_sizes[1]; 4773 alg->alg_maxbits = alg->alg_key_sizes[2]; 4774 4775 alg->alg_default = SADB_ALG_DEFAULT_INCR(alg->alg_minbits, 4776 alg->alg_increment, alg->alg_default_bits); 4777 } else if (alg->alg_nkey_sizes == 0) { 4778 /* no specified key size for algorithm */ 4779 alg->alg_minbits = alg->alg_maxbits = 0; 4780 } else { 4781 /* key sizes are defined by enumeration */ 4782 alg->alg_minbits = (uint16_t)-1; 4783 alg->alg_maxbits = 0; 4784 4785 for (i = 0; i < alg->alg_nkey_sizes; i++) { 4786 if (alg->alg_key_sizes[i] < alg->alg_minbits) 4787 alg->alg_minbits = alg->alg_key_sizes[i]; 4788 if (alg->alg_key_sizes[i] > alg->alg_maxbits) 4789 alg->alg_maxbits = alg->alg_key_sizes[i]; 4790 } 4791 alg->alg_default = 0; 4792 } 4793 4794 if (!(alg->alg_flags & ALG_FLAG_VALID)) 4795 return; 4796 4797 /* 4798 * Mechanisms do not apply to the NULL encryption 4799 * algorithm, so simply return for this case. 4800 */ 4801 if (alg->alg_id == SADB_EALG_NULL) 4802 return; 4803 4804 /* 4805 * Find the min and max key sizes supported by the cryptographic 4806 * framework providers. 4807 */ 4808 4809 /* get the key sizes supported by the framework */ 4810 crypto_rc = crypto_get_all_mech_info(alg->alg_mech_type, 4811 &mech_infos, &nmech_infos, KM_SLEEP); 4812 if (crypto_rc != CRYPTO_SUCCESS || nmech_infos == 0) { 4813 alg->alg_flags &= ~ALG_FLAG_VALID; 4814 return; 4815 } 4816 4817 /* min and max key sizes supported by framework */ 4818 for (i = 0, is_valid = B_FALSE; i < nmech_infos; i++) { 4819 int unit_bits; 4820 4821 /* 4822 * Ignore entries that do not support the operations 4823 * needed for the algorithm type. 4824 */ 4825 if (alg_type == IPSEC_ALG_AUTH) 4826 mask = CRYPTO_MECH_USAGE_MAC; 4827 else 4828 mask = CRYPTO_MECH_USAGE_ENCRYPT | 4829 CRYPTO_MECH_USAGE_DECRYPT; 4830 if ((mech_infos[i].mi_usage & mask) != mask) 4831 continue; 4832 4833 unit_bits = (mech_infos[i].mi_keysize_unit == 4834 CRYPTO_KEYSIZE_UNIT_IN_BYTES) ? 8 : 1; 4835 /* adjust min/max supported by framework */ 4836 cur_crypto_min = mech_infos[i].mi_min_key_size * unit_bits; 4837 cur_crypto_max = mech_infos[i].mi_max_key_size * unit_bits; 4838 4839 if (cur_crypto_min < crypto_min) 4840 crypto_min = cur_crypto_min; 4841 4842 /* 4843 * CRYPTO_EFFECTIVELY_INFINITE is a special value of 4844 * the crypto framework which means "no upper limit". 4845 */ 4846 if (mech_infos[i].mi_max_key_size == 4847 CRYPTO_EFFECTIVELY_INFINITE) 4848 crypto_max = (size_t)-1; 4849 else if (cur_crypto_max > crypto_max) 4850 crypto_max = cur_crypto_max; 4851 4852 is_valid = B_TRUE; 4853 } 4854 4855 kmem_free(mech_infos, sizeof (crypto_mechanism_info_t) * 4856 nmech_infos); 4857 4858 if (!is_valid) { 4859 /* no key sizes supported by framework */ 4860 alg->alg_flags &= ~ALG_FLAG_VALID; 4861 return; 4862 } 4863 4864 /* 4865 * Determine min and max key sizes from alg_key_sizes[]. 4866 * defined for the algorithm entry. Adjust key sizes based on 4867 * those supported by the framework. 4868 */ 4869 alg->alg_ef_default_bits = alg->alg_key_sizes[0]; 4870 if (alg->alg_increment != 0) { 4871 /* supported key sizes are defined by range & increment */ 4872 crypto_min = ALGBITS_ROUND_UP(crypto_min, alg->alg_increment); 4873 crypto_max = ALGBITS_ROUND_DOWN(crypto_max, alg->alg_increment); 4874 4875 alg->alg_ef_minbits = MAX(alg->alg_minbits, 4876 (uint16_t)crypto_min); 4877 alg->alg_ef_maxbits = MIN(alg->alg_maxbits, 4878 (uint16_t)crypto_max); 4879 4880 /* 4881 * If the sizes supported by the framework are outside 4882 * the range of sizes defined by the algorithm mappings, 4883 * the algorithm cannot be used. Check for this 4884 * condition here. 4885 */ 4886 if (alg->alg_ef_minbits > alg->alg_ef_maxbits) { 4887 alg->alg_flags &= ~ALG_FLAG_VALID; 4888 return; 4889 } 4890 4891 if (alg->alg_ef_default_bits < alg->alg_ef_minbits) 4892 alg->alg_ef_default_bits = alg->alg_ef_minbits; 4893 if (alg->alg_ef_default_bits > alg->alg_ef_maxbits) 4894 alg->alg_ef_default_bits = alg->alg_ef_maxbits; 4895 4896 alg->alg_ef_default = SADB_ALG_DEFAULT_INCR(alg->alg_ef_minbits, 4897 alg->alg_increment, alg->alg_ef_default_bits); 4898 } else if (alg->alg_nkey_sizes == 0) { 4899 /* no specified key size for algorithm */ 4900 alg->alg_ef_minbits = alg->alg_ef_maxbits = 0; 4901 } else { 4902 /* supported key sizes are defined by enumeration */ 4903 alg->alg_ef_minbits = (uint16_t)-1; 4904 alg->alg_ef_maxbits = 0; 4905 4906 for (i = 0, is_valid = B_FALSE; i < alg->alg_nkey_sizes; i++) { 4907 /* 4908 * Ignore the current key size if it is not in the 4909 * range of sizes supported by the framework. 4910 */ 4911 if (alg->alg_key_sizes[i] < crypto_min || 4912 alg->alg_key_sizes[i] > crypto_max) 4913 continue; 4914 if (alg->alg_key_sizes[i] < alg->alg_ef_minbits) 4915 alg->alg_ef_minbits = alg->alg_key_sizes[i]; 4916 if (alg->alg_key_sizes[i] > alg->alg_ef_maxbits) 4917 alg->alg_ef_maxbits = alg->alg_key_sizes[i]; 4918 is_valid = B_TRUE; 4919 } 4920 4921 if (!is_valid) { 4922 alg->alg_flags &= ~ALG_FLAG_VALID; 4923 return; 4924 } 4925 alg->alg_ef_default = 0; 4926 } 4927 } 4928 4929 /* 4930 * Free the memory used by the specified algorithm. 4931 */ 4932 void 4933 ipsec_alg_free(ipsec_alginfo_t *alg) 4934 { 4935 if (alg == NULL) 4936 return; 4937 4938 if (alg->alg_key_sizes != NULL) 4939 kmem_free(alg->alg_key_sizes, 4940 (alg->alg_nkey_sizes + 1) * sizeof (uint16_t)); 4941 4942 if (alg->alg_block_sizes != NULL) 4943 kmem_free(alg->alg_block_sizes, 4944 (alg->alg_nblock_sizes + 1) * sizeof (uint16_t)); 4945 4946 kmem_free(alg, sizeof (*alg)); 4947 } 4948 4949 /* 4950 * Check the validity of the specified key size for an algorithm. 4951 * Returns B_TRUE if key size is valid, B_FALSE otherwise. 4952 */ 4953 boolean_t 4954 ipsec_valid_key_size(uint16_t key_size, ipsec_alginfo_t *alg) 4955 { 4956 if (key_size < alg->alg_ef_minbits || key_size > alg->alg_ef_maxbits) 4957 return (B_FALSE); 4958 4959 if (alg->alg_increment == 0 && alg->alg_nkey_sizes != 0) { 4960 /* 4961 * If the key sizes are defined by enumeration, the new 4962 * key size must be equal to one of the supported values. 4963 */ 4964 int i; 4965 4966 for (i = 0; i < alg->alg_nkey_sizes; i++) 4967 if (key_size == alg->alg_key_sizes[i]) 4968 break; 4969 if (i == alg->alg_nkey_sizes) 4970 return (B_FALSE); 4971 } 4972 4973 return (B_TRUE); 4974 } 4975 4976 /* 4977 * Callback function invoked by the crypto framework when a provider 4978 * registers or unregisters. This callback updates the algorithms 4979 * tables when a crypto algorithm is no longer available or becomes 4980 * available, and triggers the freeing/creation of context templates 4981 * associated with existing SAs, if needed. 4982 */ 4983 void 4984 ipsec_prov_update_callback(uint32_t event, void *event_arg) 4985 { 4986 crypto_notify_event_change_t *prov_change = 4987 (crypto_notify_event_change_t *)event_arg; 4988 uint_t algidx, algid, algtype, mech_count, mech_idx; 4989 ipsec_alginfo_t *alg; 4990 ipsec_alginfo_t oalg; 4991 crypto_mech_name_t *mechs; 4992 boolean_t alg_changed = B_FALSE; 4993 4994 /* ignore events for which we didn't register */ 4995 if (event != CRYPTO_EVENT_MECHS_CHANGED) { 4996 ip1dbg(("ipsec_prov_update_callback: unexpected event 0x%x " 4997 " received from crypto framework\n", event)); 4998 return; 4999 } 5000 5001 mechs = crypto_get_mech_list(&mech_count, KM_SLEEP); 5002 if (mechs == NULL) 5003 return; 5004 5005 /* 5006 * Walk the list of currently defined IPsec algorithm. Update 5007 * the algorithm valid flag and trigger an update of the 5008 * SAs that depend on that algorithm. 5009 */ 5010 mutex_enter(&alg_lock); 5011 for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype++) { 5012 for (algidx = 0; algidx < ipsec_nalgs[algtype]; algidx++) { 5013 5014 algid = ipsec_sortlist[algtype][algidx]; 5015 alg = ipsec_alglists[algtype][algid]; 5016 ASSERT(alg != NULL); 5017 5018 /* 5019 * Skip the algorithms which do not map to the 5020 * crypto framework provider being added or removed. 5021 */ 5022 if (strncmp(alg->alg_mech_name, 5023 prov_change->ec_mech_name, 5024 CRYPTO_MAX_MECH_NAME) != 0) 5025 continue; 5026 5027 /* 5028 * Determine if the mechanism is valid. If it 5029 * is not, mark the algorithm as being invalid. If 5030 * it is, mark the algorithm as being valid. 5031 */ 5032 for (mech_idx = 0; mech_idx < mech_count; mech_idx++) 5033 if (strncmp(alg->alg_mech_name, 5034 mechs[mech_idx], CRYPTO_MAX_MECH_NAME) == 0) 5035 break; 5036 if (mech_idx == mech_count && 5037 alg->alg_flags & ALG_FLAG_VALID) { 5038 alg->alg_flags &= ~ALG_FLAG_VALID; 5039 alg_changed = B_TRUE; 5040 } else if (mech_idx < mech_count && 5041 !(alg->alg_flags & ALG_FLAG_VALID)) { 5042 alg->alg_flags |= ALG_FLAG_VALID; 5043 alg_changed = B_TRUE; 5044 } 5045 5046 /* 5047 * Update the supported key sizes, regardless 5048 * of whether a crypto provider was added or 5049 * removed. 5050 */ 5051 oalg = *alg; 5052 ipsec_alg_fix_min_max(alg, algtype); 5053 if (!alg_changed && 5054 alg->alg_ef_minbits != oalg.alg_ef_minbits || 5055 alg->alg_ef_maxbits != oalg.alg_ef_maxbits || 5056 alg->alg_ef_default != oalg.alg_ef_default || 5057 alg->alg_ef_default_bits != 5058 oalg.alg_ef_default_bits) 5059 alg_changed = B_TRUE; 5060 5061 /* 5062 * Update the affected SAs if a software provider is 5063 * being added or removed. 5064 */ 5065 if (prov_change->ec_provider_type == 5066 CRYPTO_SW_PROVIDER) 5067 sadb_alg_update(algtype, alg->alg_id, 5068 prov_change->ec_change == 5069 CRYPTO_MECH_ADDED); 5070 } 5071 } 5072 mutex_exit(&alg_lock); 5073 crypto_free_mech_list(mechs, mech_count); 5074 5075 if (alg_changed) { 5076 /* 5077 * An algorithm has changed, i.e. it became valid or 5078 * invalid, or its support key sizes have changed. 5079 * Notify ipsecah and ipsecesp of this change so 5080 * that they can send a SADB_REGISTER to their consumers. 5081 */ 5082 ipsecah_algs_changed(); 5083 ipsecesp_algs_changed(); 5084 } 5085 } 5086 5087 /* 5088 * Registers with the crypto framework to be notified of crypto 5089 * providers changes. Used to update the algorithm tables and 5090 * to free or create context templates if needed. Invoked after IPsec 5091 * is loaded successfully. 5092 */ 5093 void 5094 ipsec_register_prov_update(void) 5095 { 5096 prov_update_handle = crypto_notify_events( 5097 ipsec_prov_update_callback, CRYPTO_EVENT_MECHS_CHANGED); 5098 } 5099 5100 /* 5101 * Unregisters from the framework to be notified of crypto providers 5102 * changes. Called from ipsec_policy_destroy(). 5103 */ 5104 static void 5105 ipsec_unregister_prov_update(void) 5106 { 5107 if (prov_update_handle != NULL) 5108 crypto_unnotify_events(prov_update_handle); 5109 } 5110 5111 /* 5112 * Tunnel-mode support routines. 5113 */ 5114 5115 /* 5116 * Returns an mblk chain suitable for putnext() if policies match and IPsec 5117 * SAs are available. If there's no per-tunnel policy, or a match comes back 5118 * with no match, then still return the packet and have global policy take 5119 * a crack at it in IP. 5120 * 5121 * Remember -> we can be forwarding packets. Keep that in mind w.r.t. 5122 * inner-packet contents. 5123 */ 5124 mblk_t * 5125 ipsec_tun_outbound(mblk_t *mp, tun_t *atp, ipha_t *inner_ipv4, 5126 ip6_t *inner_ipv6, ipha_t *outer_ipv4, ip6_t *outer_ipv6, int outer_hdr_len) 5127 { 5128 ipsec_tun_pol_t *itp = atp->tun_itp; 5129 ipsec_policy_head_t *polhead; 5130 ipsec_selector_t sel; 5131 mblk_t *ipsec_mp, *ipsec_mp_head, *nmp; 5132 mblk_t *spare_mp = NULL; 5133 ipsec_out_t *io; 5134 boolean_t is_fragment; 5135 ipsec_policy_t *pol; 5136 5137 ASSERT(outer_ipv6 != NULL && outer_ipv4 == NULL || 5138 outer_ipv4 != NULL && outer_ipv6 == NULL); 5139 /* We take care of inners in a bit. */ 5140 5141 /* No policy on this tunnel - let global policy have at it. */ 5142 if (itp == NULL || !(itp->itp_flags & ITPF_P_ACTIVE)) 5143 return (mp); 5144 polhead = itp->itp_policy; 5145 5146 bzero(&sel, sizeof (sel)); 5147 if (inner_ipv4 != NULL) { 5148 ASSERT(inner_ipv6 == NULL); 5149 sel.ips_isv4 = B_TRUE; 5150 sel.ips_local_addr_v4 = inner_ipv4->ipha_src; 5151 sel.ips_remote_addr_v4 = inner_ipv4->ipha_dst; 5152 sel.ips_protocol = (uint8_t)inner_ipv4->ipha_protocol; 5153 is_fragment = 5154 IS_V4_FRAGMENT(inner_ipv4->ipha_fragment_offset_and_flags); 5155 } else { 5156 ASSERT(inner_ipv6 != NULL); 5157 sel.ips_isv4 = B_FALSE; 5158 sel.ips_local_addr_v6 = inner_ipv6->ip6_src; 5159 /* Use ip_get_dst_v6() just for the fragment bit. */ 5160 sel.ips_remote_addr_v6 = ip_get_dst_v6(inner_ipv6, 5161 &is_fragment); 5162 /* 5163 * Reset, because we don't care about routing-header dests 5164 * in the forwarding/tunnel path. 5165 */ 5166 sel.ips_remote_addr_v6 = inner_ipv6->ip6_dst; 5167 } 5168 5169 if (itp->itp_flags & ITPF_P_PER_PORT_SECURITY) { 5170 if (is_fragment) { 5171 ipha_t *oiph; 5172 ipha_t *iph = NULL; 5173 ip6_t *ip6h = NULL; 5174 int hdr_len; 5175 uint16_t ip6_hdr_length; 5176 uint8_t v6_proto; 5177 uint8_t *v6_proto_p; 5178 5179 /* 5180 * We have a fragment we need to track! 5181 */ 5182 mp = ipsec_fragcache_add(&itp->itp_fragcache, NULL, mp, 5183 outer_hdr_len); 5184 if (mp == NULL) 5185 return (NULL); 5186 5187 /* 5188 * If we get here, we have a full 5189 * fragment chain 5190 */ 5191 5192 oiph = (ipha_t *)mp->b_rptr; 5193 if (IPH_HDR_VERSION(oiph) == IPV4_VERSION) { 5194 hdr_len = ((outer_hdr_len != 0) ? 5195 IPH_HDR_LENGTH(oiph) : 0); 5196 iph = (ipha_t *)(mp->b_rptr + hdr_len); 5197 } else { 5198 ASSERT(IPH_HDR_VERSION(oiph) == IPV6_VERSION); 5199 if ((spare_mp = msgpullup(mp, -1)) == NULL) { 5200 ip_drop_packet_chain(mp, B_FALSE, 5201 NULL, NULL, &ipdrops_spd_nomem, 5202 &spd_dropper); 5203 } 5204 ip6h = (ip6_t *)spare_mp->b_rptr; 5205 (void) ip_hdr_length_nexthdr_v6(spare_mp, ip6h, 5206 &ip6_hdr_length, &v6_proto_p); 5207 hdr_len = ip6_hdr_length; 5208 } 5209 outer_hdr_len = hdr_len; 5210 5211 if (sel.ips_isv4) { 5212 if (iph == NULL) { 5213 /* Was v6 outer */ 5214 iph = (ipha_t *)(mp->b_rptr + hdr_len); 5215 } 5216 inner_ipv4 = iph; 5217 sel.ips_local_addr_v4 = inner_ipv4->ipha_src; 5218 sel.ips_remote_addr_v4 = inner_ipv4->ipha_dst; 5219 sel.ips_protocol = 5220 (uint8_t)inner_ipv4->ipha_protocol; 5221 } else { 5222 if ((spare_mp == NULL) && 5223 ((spare_mp = msgpullup(mp, -1)) == NULL)) { 5224 ip_drop_packet_chain(mp, B_FALSE, 5225 NULL, NULL, &ipdrops_spd_nomem, 5226 &spd_dropper); 5227 } 5228 inner_ipv6 = (ip6_t *)(spare_mp->b_rptr + 5229 hdr_len); 5230 sel.ips_local_addr_v6 = inner_ipv6->ip6_src; 5231 sel.ips_remote_addr_v6 = inner_ipv6->ip6_dst; 5232 (void) ip_hdr_length_nexthdr_v6(spare_mp, 5233 inner_ipv6, &ip6_hdr_length, 5234 &v6_proto_p); 5235 v6_proto = *v6_proto_p; 5236 sel.ips_protocol = v6_proto; 5237 #ifdef FRAGCACHE_DEBUG 5238 cmn_err(CE_WARN, "v6_sel.ips_protocol = %d\n", 5239 sel.ips_protocol); 5240 #endif 5241 } 5242 /* Ports are extracted below */ 5243 } 5244 5245 /* Get ports... */ 5246 if (spare_mp != NULL) { 5247 if (!ipsec_init_outbound_ports(&sel, spare_mp, 5248 inner_ipv4, inner_ipv6, outer_hdr_len)) { 5249 /* 5250 * callee did ip_drop_packet_chain() on 5251 * spare_mp 5252 */ 5253 ipsec_freemsg_chain(mp); 5254 return (NULL); 5255 } 5256 } else { 5257 if (!ipsec_init_outbound_ports(&sel, mp, 5258 inner_ipv4, inner_ipv6, outer_hdr_len)) { 5259 /* callee did ip_drop_packet_chain() on mp. */ 5260 return (NULL); 5261 } 5262 } 5263 #ifdef FRAGCACHE_DEBUG 5264 if (inner_ipv4 != NULL) 5265 cmn_err(CE_WARN, 5266 "(v4) sel.ips_protocol = %d, " 5267 "sel.ips_local_port = %d, " 5268 "sel.ips_remote_port = %d\n", 5269 sel.ips_protocol, ntohs(sel.ips_local_port), 5270 ntohs(sel.ips_remote_port)); 5271 if (inner_ipv6 != NULL) 5272 cmn_err(CE_WARN, 5273 "(v6) sel.ips_protocol = %d, " 5274 "sel.ips_local_port = %d, " 5275 "sel.ips_remote_port = %d\n", 5276 sel.ips_protocol, ntohs(sel.ips_local_port), 5277 ntohs(sel.ips_remote_port)); 5278 #endif 5279 /* Success so far - done with spare_mp */ 5280 ipsec_freemsg_chain(spare_mp); 5281 } 5282 rw_enter(&polhead->iph_lock, RW_READER); 5283 pol = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_OUTBOUND, &sel); 5284 rw_exit(&polhead->iph_lock); 5285 if (pol == NULL) { 5286 /* 5287 * No matching policy on this tunnel, drop the packet. 5288 * 5289 * NOTE: Tunnel-mode tunnels are different from the 5290 * IP global transport mode policy head. For a tunnel-mode 5291 * tunnel, we drop the packet in lieu of passing it 5292 * along accepted the way a global-policy miss would. 5293 * 5294 * NOTE2: "negotiate transport" tunnels should match ALL 5295 * inbound packets, but we do not uncomment the ASSERT() 5296 * below because if/when we open PF_POLICY, a user can 5297 * shoot him/her-self in the foot with a 0 priority. 5298 */ 5299 5300 /* ASSERT(itp->itp_flags & ITPF_P_TUNNEL); */ 5301 #ifdef FRAGCACHE_DEBUG 5302 cmn_err(CE_WARN, "ipsec_tun_outbound(): No matching tunnel " 5303 "per-port policy\n"); 5304 #endif 5305 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 5306 &ipdrops_spd_explicit, &spd_dropper); 5307 return (NULL); 5308 } 5309 5310 #ifdef FRAGCACHE_DEBUG 5311 cmn_err(CE_WARN, "Having matching tunnel per-port policy\n"); 5312 #endif 5313 5314 /* Construct an IPSEC_OUT message. */ 5315 ipsec_mp = ipsec_mp_head = ipsec_alloc_ipsec_out(); 5316 if (ipsec_mp == NULL) { 5317 IPPOL_REFRELE(pol); 5318 ip_drop_packet(mp, B_FALSE, NULL, NULL, &ipdrops_spd_nomem, 5319 &spd_dropper); 5320 return (NULL); 5321 } 5322 ipsec_mp->b_cont = mp; 5323 io = (ipsec_out_t *)ipsec_mp->b_rptr; 5324 IPPH_REFHOLD(polhead); 5325 /* 5326 * NOTE: free() function of ipsec_out mblk will release polhead and 5327 * pol references. 5328 */ 5329 io->ipsec_out_polhead = polhead; 5330 io->ipsec_out_policy = pol; 5331 io->ipsec_out_zoneid = atp->tun_zoneid; 5332 io->ipsec_out_v4 = (outer_ipv4 != NULL); 5333 io->ipsec_out_secure = B_TRUE; 5334 5335 if (!(itp->itp_flags & ITPF_P_TUNNEL)) { 5336 /* Set up transport mode for tunnelled packets. */ 5337 io->ipsec_out_proto = (inner_ipv4 != NULL) ? IPPROTO_ENCAP : 5338 IPPROTO_IPV6; 5339 return (ipsec_mp); 5340 } 5341 5342 /* Fill in tunnel-mode goodies here. */ 5343 io->ipsec_out_tunnel = B_TRUE; 5344 /* XXX Do I need to fill in all of the goodies here? */ 5345 if (inner_ipv4) { 5346 io->ipsec_out_inaf = AF_INET; 5347 io->ipsec_out_insrc[0] = 5348 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v4; 5349 io->ipsec_out_indst[0] = 5350 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v4; 5351 } else { 5352 io->ipsec_out_inaf = AF_INET6; 5353 io->ipsec_out_insrc[0] = 5354 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[0]; 5355 io->ipsec_out_insrc[1] = 5356 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[1]; 5357 io->ipsec_out_insrc[2] = 5358 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[2]; 5359 io->ipsec_out_insrc[3] = 5360 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[3]; 5361 io->ipsec_out_indst[0] = 5362 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[0]; 5363 io->ipsec_out_indst[1] = 5364 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[1]; 5365 io->ipsec_out_indst[2] = 5366 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[2]; 5367 io->ipsec_out_indst[3] = 5368 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[3]; 5369 } 5370 io->ipsec_out_insrcpfx = pol->ipsp_sel->ipsl_key.ipsl_local_pfxlen; 5371 io->ipsec_out_indstpfx = pol->ipsp_sel->ipsl_key.ipsl_remote_pfxlen; 5372 /* NOTE: These are used for transport mode too. */ 5373 io->ipsec_out_src_port = pol->ipsp_sel->ipsl_key.ipsl_lport; 5374 io->ipsec_out_dst_port = pol->ipsp_sel->ipsl_key.ipsl_rport; 5375 io->ipsec_out_proto = pol->ipsp_sel->ipsl_key.ipsl_proto; 5376 5377 /* 5378 * The mp pointer still valid 5379 * Add ipsec_out to each fragment. 5380 * The fragment head already has one 5381 */ 5382 nmp = mp->b_next; 5383 mp->b_next = NULL; 5384 mp = nmp; 5385 ASSERT(ipsec_mp != NULL); 5386 while (mp != NULL) { 5387 nmp = mp->b_next; 5388 ipsec_mp->b_next = ipsec_out_tag(ipsec_mp_head, mp); 5389 if (ipsec_mp->b_next == NULL) { 5390 ip_drop_packet_chain(ipsec_mp_head, B_FALSE, NULL, NULL, 5391 &ipdrops_spd_nomem, &spd_dropper); 5392 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 5393 &ipdrops_spd_nomem, &spd_dropper); 5394 return (NULL); 5395 } 5396 ipsec_mp = ipsec_mp->b_next; 5397 mp->b_next = NULL; 5398 mp = nmp; 5399 } 5400 return (ipsec_mp_head); 5401 } 5402 5403 /* 5404 * NOTE: The following releases pol's reference and 5405 * calls ip_drop_packet() for me on NULL returns. 5406 */ 5407 mblk_t * 5408 ipsec_check_ipsecin_policy_reasm(mblk_t *ipsec_mp, ipsec_policy_t *pol, 5409 ipha_t *inner_ipv4, ip6_t *inner_ipv6, uint64_t pkt_unique) 5410 { 5411 /* Assume ipsec_mp is a chain of b_next-linked IPSEC_IN M_CTLs. */ 5412 mblk_t *data_chain = NULL, *data_tail = NULL; 5413 mblk_t *ii_next; 5414 5415 while (ipsec_mp != NULL) { 5416 ii_next = ipsec_mp->b_next; 5417 ipsec_mp->b_next = NULL; /* No tripping asserts. */ 5418 5419 /* 5420 * Need IPPOL_REFHOLD(pol) for extras because 5421 * ipsecin_policy does the refrele. 5422 */ 5423 IPPOL_REFHOLD(pol); 5424 5425 if (ipsec_check_ipsecin_policy(ipsec_mp, pol, inner_ipv4, 5426 inner_ipv6, pkt_unique) != NULL) { 5427 if (data_tail == NULL) { 5428 /* First one */ 5429 data_chain = data_tail = ipsec_mp->b_cont; 5430 } else { 5431 data_tail->b_next = ipsec_mp->b_cont; 5432 data_tail = data_tail->b_next; 5433 } 5434 freeb(ipsec_mp); 5435 } else { 5436 /* 5437 * ipsec_check_ipsecin_policy() freed ipsec_mp 5438 * already. Need to get rid of any extra pol 5439 * references, and any remaining bits as well. 5440 */ 5441 IPPOL_REFRELE(pol); 5442 ipsec_freemsg_chain(data_chain); 5443 ipsec_freemsg_chain(ii_next); /* ipdrop stats? */ 5444 return (NULL); 5445 } 5446 ipsec_mp = ii_next; 5447 } 5448 /* 5449 * One last release because either the loop bumped it up, or we never 5450 * called ipsec_check_ipsecin_policy(). 5451 */ 5452 IPPOL_REFRELE(pol); 5453 5454 /* data_chain is ready for return to tun module. */ 5455 return (data_chain); 5456 } 5457 5458 5459 /* 5460 * Returns B_TRUE if the inbound packet passed an IPsec policy check. Returns 5461 * B_FALSE if it failed or if it is a fragment needing its friends before a 5462 * policy check can be performed. 5463 * 5464 * Expects a non-NULL *data_mp, an optional ipsec_mp, and a non-NULL polhead. 5465 * data_mp may be reassigned with a b_next chain of packets if fragments 5466 * neeeded to be collected for a proper policy check. 5467 * 5468 * Always frees ipsec_mp, but only frees data_mp if returns B_FALSE. This 5469 * function calls ip_drop_packet() on data_mp if need be. 5470 * 5471 * NOTE: outer_hdr_len is signed. If it's a negative value, the caller 5472 * is inspecting an ICMP packet. 5473 */ 5474 boolean_t 5475 ipsec_tun_inbound(mblk_t *ipsec_mp, mblk_t **data_mp, ipsec_tun_pol_t *itp, 5476 ipha_t *inner_ipv4, ip6_t *inner_ipv6, ipha_t *outer_ipv4, 5477 ip6_t *outer_ipv6, int outer_hdr_len) 5478 { 5479 ipsec_policy_head_t *polhead; 5480 ipsec_selector_t sel; 5481 mblk_t *message = (ipsec_mp == NULL) ? *data_mp : ipsec_mp; 5482 ipsec_policy_t *pol; 5483 uint16_t tmpport; 5484 selret_t rc; 5485 boolean_t retval, port_policy_present, is_icmp, global_present; 5486 in6_addr_t tmpaddr; 5487 ipaddr_t tmp4; 5488 uint8_t flags, *holder, *outer_hdr; 5489 5490 sel.ips_is_icmp_inv_acq = 0; 5491 5492 if (outer_ipv4 != NULL) { 5493 ASSERT(outer_ipv6 == NULL); 5494 outer_hdr = (uint8_t *)outer_ipv4; 5495 global_present = ipsec_inbound_v4_policy_present; 5496 } else { 5497 outer_hdr = (uint8_t *)outer_ipv6; 5498 global_present = ipsec_inbound_v6_policy_present; 5499 } 5500 ASSERT(outer_hdr != NULL); 5501 5502 ASSERT(inner_ipv4 != NULL && inner_ipv6 == NULL || 5503 inner_ipv4 == NULL && inner_ipv6 != NULL); 5504 ASSERT(message == *data_mp || message->b_cont == *data_mp); 5505 5506 if (outer_hdr_len < 0) { 5507 outer_hdr_len = (-outer_hdr_len); 5508 is_icmp = B_TRUE; 5509 } else { 5510 is_icmp = B_FALSE; 5511 } 5512 5513 if (itp != NULL && (itp->itp_flags & ITPF_P_ACTIVE)) { 5514 polhead = itp->itp_policy; 5515 /* 5516 * We need to perform full Tunnel-Mode enforcement, 5517 * and we need to have inner-header data for such enforcement. 5518 * 5519 * See ipsec_init_inbound_sel() for the 0x80000000 on inbound 5520 * and on return. 5521 */ 5522 5523 port_policy_present = ((itp->itp_flags & 5524 ITPF_P_PER_PORT_SECURITY) ? B_TRUE : B_FALSE); 5525 flags = ((port_policy_present ? SEL_PORT_POLICY : SEL_NONE) | 5526 (is_icmp ? SEL_IS_ICMP : SEL_NONE) | SEL_TUNNEL_MODE); 5527 5528 rc = ipsec_init_inbound_sel(&sel, *data_mp, inner_ipv4, 5529 inner_ipv6, flags); 5530 5531 switch (rc) { 5532 case SELRET_NOMEM: 5533 ip_drop_packet(message, B_TRUE, NULL, NULL, 5534 &ipdrops_spd_nomem, &spd_dropper); 5535 return (B_FALSE); 5536 case SELRET_TUNFRAG: 5537 /* 5538 * At this point, if we're cleartext, we don't want 5539 * to go there. 5540 */ 5541 if (ipsec_mp == NULL) { 5542 ip_drop_packet(*data_mp, B_TRUE, NULL, NULL, 5543 &ipdrops_spd_got_clear, &spd_dropper); 5544 *data_mp = NULL; 5545 return (B_FALSE); 5546 } 5547 ASSERT(((ipsec_in_t *)ipsec_mp->b_rptr)-> 5548 ipsec_in_secure); 5549 message = ipsec_fragcache_add(&itp->itp_fragcache, 5550 ipsec_mp, *data_mp, outer_hdr_len); 5551 5552 if (message == NULL) { 5553 /* 5554 * Data is cached, fragment chain is not 5555 * complete. I consume ipsec_mp and data_mp 5556 */ 5557 return (B_FALSE); 5558 } 5559 5560 /* 5561 * If we get here, we have a full fragment chain. 5562 * Reacquire headers and selectors from first fragment. 5563 */ 5564 if (inner_ipv4 != NULL) { 5565 inner_ipv4 = (ipha_t *)message->b_cont->b_rptr; 5566 ASSERT(message->b_cont->b_wptr - 5567 message->b_cont->b_rptr > sizeof (ipha_t)); 5568 } else { 5569 inner_ipv6 = (ip6_t *)message->b_cont->b_rptr; 5570 ASSERT(message->b_cont->b_wptr - 5571 message->b_cont->b_rptr > sizeof (ip6_t)); 5572 } 5573 /* Use SEL_NONE so we always get ports! */ 5574 rc = ipsec_init_inbound_sel(&sel, message->b_cont, 5575 inner_ipv4, inner_ipv6, SEL_NONE); 5576 switch (rc) { 5577 case SELRET_SUCCESS: 5578 /* 5579 * Get to same place as first caller's 5580 * SELRET_SUCCESS case. 5581 */ 5582 break; 5583 case SELRET_NOMEM: 5584 ip_drop_packet_chain(message, B_TRUE, NULL, 5585 NULL, &ipdrops_spd_nomem, &spd_dropper); 5586 return (B_FALSE); 5587 case SELRET_BADPKT: 5588 ip_drop_packet_chain(message, B_TRUE, NULL, 5589 NULL, &ipdrops_spd_malformed_frag, 5590 &spd_dropper); 5591 return (B_FALSE); 5592 case SELRET_TUNFRAG: 5593 cmn_err(CE_WARN, "(TUNFRAG on 2nd call...)"); 5594 /* FALLTHRU */ 5595 default: 5596 cmn_err(CE_WARN, "ipsec_init_inbound_sel(mark2)" 5597 " returns bizarro 0x%x", rc); 5598 /* Guaranteed panic! */ 5599 ASSERT(rc == SELRET_NOMEM); 5600 return (B_FALSE); 5601 } 5602 /* FALLTHRU */ 5603 case SELRET_SUCCESS: 5604 /* 5605 * Common case: 5606 * No per-port policy or a non-fragment. Keep going. 5607 */ 5608 break; 5609 case SELRET_BADPKT: 5610 /* 5611 * We may receive ICMP (with IPv6 inner) packets that 5612 * trigger this return value. Send 'em in for 5613 * enforcement checking. 5614 */ 5615 cmn_err(CE_NOTE, "ipsec_tun_inbound(): " 5616 "sending 'bad packet' in for enforcement"); 5617 break; 5618 default: 5619 cmn_err(CE_WARN, 5620 "ipsec_init_inbound_sel() returns bizarro 0x%x", 5621 rc); 5622 ASSERT(rc == SELRET_NOMEM); /* Guaranteed panic! */ 5623 return (B_FALSE); 5624 } 5625 5626 if (is_icmp) { 5627 /* 5628 * Swap local/remote because this is an ICMP packet. 5629 */ 5630 tmpaddr = sel.ips_local_addr_v6; 5631 sel.ips_local_addr_v6 = sel.ips_remote_addr_v6; 5632 sel.ips_remote_addr_v6 = tmpaddr; 5633 tmpport = sel.ips_local_port; 5634 sel.ips_local_port = sel.ips_remote_port; 5635 sel.ips_remote_port = tmpport; 5636 } 5637 5638 /* find_policy_head() */ 5639 rw_enter(&polhead->iph_lock, RW_READER); 5640 pol = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_INBOUND, 5641 &sel); 5642 rw_exit(&polhead->iph_lock); 5643 if (pol != NULL) { 5644 if (ipsec_mp == NULL || 5645 !((ipsec_in_t *)ipsec_mp->b_rptr)-> 5646 ipsec_in_secure) { 5647 retval = pol->ipsp_act->ipa_allow_clear; 5648 if (!retval) { 5649 /* 5650 * XXX should never get here with 5651 * tunnel reassembled fragments? 5652 */ 5653 ASSERT(message->b_next == NULL); 5654 ip_drop_packet(message, B_TRUE, NULL, 5655 NULL, &ipdrops_spd_got_clear, 5656 &spd_dropper); 5657 } else if (ipsec_mp != NULL) { 5658 freeb(ipsec_mp); 5659 } 5660 5661 IPPOL_REFRELE(pol); 5662 return (retval); 5663 } 5664 /* 5665 * NOTE: The following releases pol's reference and 5666 * calls ip_drop_packet() for me on NULL returns. 5667 * 5668 * "sel" is still good here, so let's use it! 5669 */ 5670 *data_mp = ipsec_check_ipsecin_policy_reasm(message, 5671 pol, inner_ipv4, inner_ipv6, SA_UNIQUE_ID( 5672 sel.ips_remote_port, sel.ips_local_port, 5673 (inner_ipv4 == NULL) ? IPPROTO_IPV6 : 5674 IPPROTO_ENCAP, sel.ips_protocol)); 5675 return (*data_mp != NULL); 5676 } 5677 5678 /* 5679 * Else fallthru and check the global policy on the outer 5680 * header(s) if this tunnel is an old-style transport-mode 5681 * one. Drop the packet explicitly (no policy entry) for 5682 * a new-style tunnel-mode tunnel. 5683 */ 5684 if ((itp->itp_flags & ITPF_P_TUNNEL) && !is_icmp) { 5685 ip_drop_packet_chain(message, B_TRUE, NULL, 5686 NULL, &ipdrops_spd_explicit, &spd_dropper); 5687 return (B_FALSE); 5688 } 5689 } 5690 5691 /* 5692 * NOTE: If we reach here, we will not have packet chains from 5693 * fragcache_add(), because the only way I get chains is on a 5694 * tunnel-mode tunnel, which either returns with a pass, or gets 5695 * hit by the ip_drop_packet_chain() call right above here. 5696 */ 5697 5698 /* If no per-tunnel security, check global policy now. */ 5699 if (ipsec_mp != NULL && !global_present) { 5700 if (((ipsec_in_t *)(ipsec_mp->b_rptr))-> 5701 ipsec_in_icmp_loopback) { 5702 /* 5703 * This is an ICMP message with an ipsec_mp 5704 * attached. We should accept it. 5705 */ 5706 if (ipsec_mp != NULL) 5707 freeb(ipsec_mp); 5708 return (B_TRUE); 5709 } 5710 5711 ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL, 5712 &ipdrops_spd_got_secure, &spd_dropper); 5713 return (B_FALSE); 5714 } 5715 5716 /* 5717 * The following assertion is valid because only the tun module alters 5718 * the mblk chain - stripping the outer header by advancing mp->b_rptr. 5719 */ 5720 ASSERT(is_icmp || 5721 ((*data_mp)->b_datap->db_base <= outer_hdr && 5722 outer_hdr < (*data_mp)->b_rptr)); 5723 holder = (*data_mp)->b_rptr; 5724 (*data_mp)->b_rptr = outer_hdr; 5725 5726 if (is_icmp) { 5727 /* 5728 * For ICMP packets, "outer_ipvN" is set to the outer header 5729 * that is *INSIDE* the ICMP payload. For global policy 5730 * checking, we need to reverse src/dst on the payload in 5731 * order to construct selectors appropriately. See "ripha" 5732 * constructions in ip.c. To avoid a bug like 6478464 (see 5733 * earlier in this file), we will actually exchange src/dst 5734 * in the packet, and reverse if after the call to 5735 * ipsec_check_global_policy(). 5736 */ 5737 if (outer_ipv4 != NULL) { 5738 tmp4 = outer_ipv4->ipha_src; 5739 outer_ipv4->ipha_src = outer_ipv4->ipha_dst; 5740 outer_ipv4->ipha_dst = tmp4; 5741 } else { 5742 ASSERT(outer_ipv6 != NULL); 5743 tmpaddr = outer_ipv6->ip6_src; 5744 outer_ipv6->ip6_src = outer_ipv6->ip6_dst; 5745 outer_ipv6->ip6_dst = tmpaddr; 5746 } 5747 } 5748 5749 /* NOTE: Frees message if it returns NULL. */ 5750 if (ipsec_check_global_policy(message, NULL, outer_ipv4, outer_ipv6, 5751 (ipsec_mp != NULL)) == NULL) { 5752 return (B_FALSE); 5753 } 5754 5755 if (is_icmp) { 5756 /* Set things back to normal. */ 5757 if (outer_ipv4 != NULL) { 5758 tmp4 = outer_ipv4->ipha_src; 5759 outer_ipv4->ipha_src = outer_ipv4->ipha_dst; 5760 outer_ipv4->ipha_dst = tmp4; 5761 } else { 5762 /* No need for ASSERT()s now. */ 5763 tmpaddr = outer_ipv6->ip6_src; 5764 outer_ipv6->ip6_src = outer_ipv6->ip6_dst; 5765 outer_ipv6->ip6_dst = tmpaddr; 5766 } 5767 } 5768 5769 (*data_mp)->b_rptr = holder; 5770 5771 if (ipsec_mp != NULL) 5772 freeb(ipsec_mp); 5773 5774 /* 5775 * At this point, we pretend it's a cleartext accepted 5776 * packet. 5777 */ 5778 return (B_TRUE); 5779 } 5780 5781 /* 5782 * AVL comparison routine for our list of tunnel polheads. 5783 */ 5784 static int 5785 tunnel_compare(const void *arg1, const void *arg2) 5786 { 5787 ipsec_tun_pol_t *left, *right; 5788 int rc; 5789 5790 left = (ipsec_tun_pol_t *)arg1; 5791 right = (ipsec_tun_pol_t *)arg2; 5792 5793 rc = strncmp(left->itp_name, right->itp_name, LIFNAMSIZ); 5794 return (rc == 0 ? rc : (rc > 0 ? 1 : -1)); 5795 } 5796 5797 /* 5798 * Free a tunnel policy node. 5799 */ 5800 void 5801 itp_free(ipsec_tun_pol_t *node) 5802 { 5803 IPPH_REFRELE(node->itp_policy); 5804 IPPH_REFRELE(node->itp_inactive); 5805 mutex_destroy(&node->itp_lock); 5806 kmem_free(node, sizeof (*node)); 5807 } 5808 5809 void 5810 itp_unlink(ipsec_tun_pol_t *node) 5811 { 5812 rw_enter(&tunnel_policy_lock, RW_WRITER); 5813 tunnel_policy_gen++; 5814 ipsec_fragcache_uninit(&node->itp_fragcache); 5815 avl_remove(&tunnel_policies, node); 5816 rw_exit(&tunnel_policy_lock); 5817 ITP_REFRELE(node); 5818 } 5819 5820 /* 5821 * Public interface to look up a tunnel security policy by name. Used by 5822 * spdsock mostly. Returns "node" with a bumped refcnt. 5823 */ 5824 ipsec_tun_pol_t * 5825 get_tunnel_policy(char *name) 5826 { 5827 ipsec_tun_pol_t *node, lookup; 5828 5829 (void) strncpy(lookup.itp_name, name, LIFNAMSIZ); 5830 5831 rw_enter(&tunnel_policy_lock, RW_READER); 5832 node = (ipsec_tun_pol_t *)avl_find(&tunnel_policies, &lookup, NULL); 5833 if (node != NULL) { 5834 ITP_REFHOLD(node); 5835 } 5836 rw_exit(&tunnel_policy_lock); 5837 5838 return (node); 5839 } 5840 5841 /* 5842 * Public interface to walk all tunnel security polcies. Useful for spdsock 5843 * DUMP operations. iterator() will not consume a reference. 5844 */ 5845 void 5846 itp_walk(void (*iterator)(ipsec_tun_pol_t *, void *), void *arg) 5847 { 5848 ipsec_tun_pol_t *node; 5849 5850 rw_enter(&tunnel_policy_lock, RW_READER); 5851 for (node = avl_first(&tunnel_policies); node != NULL; 5852 node = AVL_NEXT(&tunnel_policies, node)) { 5853 iterator(node, arg); 5854 } 5855 rw_exit(&tunnel_policy_lock); 5856 } 5857 5858 /* 5859 * Initialize policy head. This can only fail if there's a memory problem. 5860 */ 5861 static boolean_t 5862 tunnel_polhead_init(ipsec_policy_head_t *iph) 5863 { 5864 rw_init(&iph->iph_lock, NULL, RW_DEFAULT, NULL); 5865 iph->iph_refs = 1; 5866 iph->iph_gen = 0; 5867 if (ipsec_alloc_table(iph, tun_spd_hashsize, KM_SLEEP, B_FALSE) != 0) { 5868 ipsec_polhead_free_table(iph); 5869 return (B_FALSE); 5870 } 5871 ipsec_polhead_init(iph, tun_spd_hashsize); 5872 return (B_TRUE); 5873 } 5874 5875 /* 5876 * Create a tunnel policy node with "name". Set errno with 5877 * ENOMEM if there's a memory problem, and EEXIST if there's an existing 5878 * node. 5879 */ 5880 ipsec_tun_pol_t * 5881 create_tunnel_policy(char *name, int *errno, uint64_t *gen) 5882 { 5883 ipsec_tun_pol_t *newbie, *existing; 5884 avl_index_t where; 5885 5886 newbie = kmem_zalloc(sizeof (*newbie), KM_NOSLEEP); 5887 if (newbie == NULL) { 5888 *errno = ENOMEM; 5889 return (NULL); 5890 } 5891 if (!ipsec_fragcache_init(&newbie->itp_fragcache)) { 5892 kmem_free(newbie, sizeof (*newbie)); 5893 *errno = ENOMEM; 5894 return (NULL); 5895 } 5896 5897 (void) strncpy(newbie->itp_name, name, LIFNAMSIZ); 5898 5899 rw_enter(&tunnel_policy_lock, RW_WRITER); 5900 existing = (ipsec_tun_pol_t *)avl_find(&tunnel_policies, newbie, 5901 &where); 5902 if (existing != NULL) { 5903 itp_free(newbie); 5904 *errno = EEXIST; 5905 rw_exit(&tunnel_policy_lock); 5906 return (NULL); 5907 } 5908 tunnel_policy_gen++; 5909 *gen = tunnel_policy_gen; 5910 newbie->itp_refcnt = 2; /* One for the caller, one for the tree. */ 5911 newbie->itp_next_policy_index = 1; 5912 avl_insert(&tunnel_policies, newbie, where); 5913 mutex_init(&newbie->itp_lock, NULL, MUTEX_DEFAULT, NULL); 5914 newbie->itp_policy = kmem_zalloc(sizeof (ipsec_policy_head_t), 5915 KM_NOSLEEP); 5916 if (newbie->itp_policy == NULL) 5917 goto nomem; 5918 newbie->itp_inactive = kmem_zalloc(sizeof (ipsec_policy_head_t), 5919 KM_NOSLEEP); 5920 if (newbie->itp_inactive == NULL) { 5921 kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t)); 5922 goto nomem; 5923 } 5924 5925 if (!tunnel_polhead_init(newbie->itp_policy)) { 5926 kmem_free(newbie->itp_policy, sizeof (ipsec_policy_head_t)); 5927 kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t)); 5928 goto nomem; 5929 } else if (!tunnel_polhead_init(newbie->itp_inactive)) { 5930 IPPH_REFRELE(newbie->itp_policy); 5931 kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t)); 5932 goto nomem; 5933 } 5934 rw_exit(&tunnel_policy_lock); 5935 5936 return (newbie); 5937 nomem: 5938 *errno = ENOMEM; 5939 kmem_free(newbie, sizeof (*newbie)); 5940 return (NULL); 5941 } 5942 5943 /* 5944 * We can't call the tun_t lookup function until tun is 5945 * loaded, so create a dummy function to avoid symbol 5946 * lookup errors on boot. 5947 */ 5948 /* ARGSUSED */ 5949 ipsec_tun_pol_t * 5950 itp_get_byaddr_dummy(uint32_t *laddr, uint32_t *faddr, int af) 5951 { 5952 return (NULL); /* Always return NULL. */ 5953 } 5954 5955 /* 5956 * Frag cache code, based on SunScreen 3.2 source 5957 * screen/kernel/common/screen_fragcache.c 5958 */ 5959 5960 #define IPSEC_FRAG_TTL_MAX 5 5961 /* 5962 * Note that the following parameters create 256 hash buckets 5963 * with 1024 free entries to be distributed. Things are cleaned 5964 * periodically and are attempted to be cleaned when there is no 5965 * free space, but this system errs on the side of dropping packets 5966 * over creating memory exhaustion. We may decide to make hash 5967 * factor a tunable if this proves to be a bad decision. 5968 */ 5969 #define IPSEC_FRAG_HASH_SLOTS (1<<8) 5970 #define IPSEC_FRAG_HASH_FACTOR 4 5971 #define IPSEC_FRAG_HASH_SIZE (IPSEC_FRAG_HASH_SLOTS * IPSEC_FRAG_HASH_FACTOR) 5972 5973 #define IPSEC_FRAG_HASH_MASK (IPSEC_FRAG_HASH_SLOTS - 1) 5974 #define IPSEC_FRAG_HASH_FUNC(id) (((id) & IPSEC_FRAG_HASH_MASK) ^ \ 5975 (((id) / \ 5976 (ushort_t)IPSEC_FRAG_HASH_SLOTS) & \ 5977 IPSEC_FRAG_HASH_MASK)) 5978 5979 /* Maximum fragments per packet. 48 bytes payload x 1366 packets > 64KB */ 5980 #define IPSEC_MAX_FRAGS 1366 5981 5982 #define V4_FRAG_OFFSET(ipha) ((ntohs(ipha->ipha_fragment_offset_and_flags) & \ 5983 IPH_OFFSET) << 3) 5984 #define V4_MORE_FRAGS(ipha) (ntohs(ipha->ipha_fragment_offset_and_flags) & \ 5985 IPH_MF) 5986 5987 /* 5988 * Initialize an ipsec fragcache instance. 5989 * Returns B_FALSE if memory allocation fails. 5990 */ 5991 boolean_t 5992 ipsec_fragcache_init(ipsec_fragcache_t *frag) 5993 { 5994 ipsec_fragcache_entry_t *ftemp; 5995 int i; 5996 5997 mutex_init(&frag->itpf_lock, NULL, MUTEX_DEFAULT, NULL); 5998 frag->itpf_ptr = (ipsec_fragcache_entry_t **) 5999 kmem_zalloc( 6000 sizeof (ipsec_fragcache_entry_t *) * 6001 IPSEC_FRAG_HASH_SLOTS, KM_NOSLEEP); 6002 if (frag->itpf_ptr == NULL) 6003 return (B_FALSE); 6004 6005 ftemp = (ipsec_fragcache_entry_t *) 6006 kmem_zalloc(sizeof (ipsec_fragcache_entry_t) * 6007 IPSEC_FRAG_HASH_SIZE, KM_NOSLEEP); 6008 if (ftemp == NULL) { 6009 kmem_free(frag->itpf_ptr, 6010 sizeof (ipsec_fragcache_entry_t *) * 6011 IPSEC_FRAG_HASH_SLOTS); 6012 return (B_FALSE); 6013 } 6014 6015 frag->itpf_freelist = NULL; 6016 6017 for (i = 0; i < IPSEC_FRAG_HASH_SIZE; i++) { 6018 ftemp->itpfe_next = frag->itpf_freelist; 6019 frag->itpf_freelist = ftemp; 6020 ftemp++; 6021 } 6022 6023 frag->itpf_expire_hint = 0; 6024 6025 return (B_TRUE); 6026 } 6027 6028 void 6029 ipsec_fragcache_uninit(ipsec_fragcache_t *frag) 6030 { 6031 ipsec_fragcache_entry_t *fep; 6032 int i; 6033 6034 mutex_enter(&frag->itpf_lock); 6035 if (frag->itpf_ptr) { 6036 /* Delete any existing fragcache entry chains */ 6037 for (i = 0; i < IPSEC_FRAG_HASH_SLOTS; i++) { 6038 fep = (frag->itpf_ptr)[i]; 6039 while (fep != NULL) { 6040 /* Returned fep is next in chain or NULL */ 6041 fep = fragcache_delentry(i, fep, frag); 6042 } 6043 } 6044 /* 6045 * Chase the pointers back to the beginning 6046 * of the memory allocation and then 6047 * get rid of the allocated freelist 6048 */ 6049 while (frag->itpf_freelist->itpfe_next != NULL) 6050 frag->itpf_freelist = frag->itpf_freelist->itpfe_next; 6051 /* 6052 * XXX - If we ever dynamically grow the freelist 6053 * then we'll have to free entries individually 6054 * or determine how many entries or chunks we have 6055 * grown since the initial allocation. 6056 */ 6057 kmem_free(frag->itpf_freelist, 6058 sizeof (ipsec_fragcache_entry_t) * 6059 IPSEC_FRAG_HASH_SIZE); 6060 /* Free the fragcache structure */ 6061 kmem_free(frag->itpf_ptr, 6062 sizeof (ipsec_fragcache_entry_t *) * 6063 IPSEC_FRAG_HASH_SLOTS); 6064 } 6065 mutex_exit(&frag->itpf_lock); 6066 mutex_destroy(&frag->itpf_lock); 6067 } 6068 6069 /* 6070 * Add a fragment to the fragment cache. Consumes mp if NULL is returned. 6071 * Returns mp if a whole fragment has been assembled, NULL otherwise 6072 */ 6073 6074 mblk_t * 6075 ipsec_fragcache_add(ipsec_fragcache_t *frag, mblk_t *ipsec_mp, mblk_t *mp, 6076 int outer_hdr_len) 6077 { 6078 boolean_t is_v4; 6079 time_t itpf_time; 6080 ipha_t *iph; 6081 ipha_t *oiph; 6082 ip6_t *ip6h = NULL; 6083 uint8_t v6_proto; 6084 uint8_t *v6_proto_p; 6085 uint16_t ip6_hdr_length; 6086 ip6_pkt_t ipp; 6087 ip6_frag_t *fraghdr; 6088 ipsec_fragcache_entry_t *fep; 6089 int i; 6090 mblk_t *nmp, *prevmp, *spare_mp = NULL; 6091 int firstbyte, lastbyte; 6092 int offset; 6093 int last; 6094 boolean_t inbound = (ipsec_mp != NULL); 6095 mblk_t *first_mp = inbound ? ipsec_mp : mp; 6096 6097 mutex_enter(&frag->itpf_lock); 6098 6099 oiph = (ipha_t *)mp->b_rptr; 6100 iph = (ipha_t *)(mp->b_rptr + outer_hdr_len); 6101 if (IPH_HDR_VERSION(iph) == IPV4_VERSION) { 6102 is_v4 = B_TRUE; 6103 } else { 6104 ASSERT(IPH_HDR_VERSION(iph) == IPV6_VERSION); 6105 if ((spare_mp = msgpullup(mp, -1)) == NULL) { 6106 mutex_exit(&frag->itpf_lock); 6107 ip_drop_packet(first_mp, inbound, NULL, NULL, 6108 &ipdrops_spd_nomem, &spd_dropper); 6109 return (NULL); 6110 } 6111 ip6h = (ip6_t *)(spare_mp->b_rptr + outer_hdr_len); 6112 6113 if (!ip_hdr_length_nexthdr_v6(spare_mp, ip6h, &ip6_hdr_length, 6114 &v6_proto_p)) { 6115 /* 6116 * Find upper layer protocol. 6117 * If it fails we have a malformed packet 6118 */ 6119 mutex_exit(&frag->itpf_lock); 6120 ip_drop_packet(first_mp, inbound, NULL, NULL, 6121 &ipdrops_spd_malformed_packet, &spd_dropper); 6122 freemsg(spare_mp); 6123 return (NULL); 6124 } else { 6125 v6_proto = *v6_proto_p; 6126 } 6127 6128 6129 bzero(&ipp, sizeof (ipp)); 6130 (void) ip_find_hdr_v6(spare_mp, ip6h, &ipp, NULL); 6131 if (!(ipp.ipp_fields & IPPF_FRAGHDR)) { 6132 /* 6133 * We think this is a fragment, but didn't find 6134 * a fragment header. Something is wrong. 6135 */ 6136 mutex_exit(&frag->itpf_lock); 6137 ip_drop_packet(first_mp, inbound, NULL, NULL, 6138 &ipdrops_spd_malformed_frag, &spd_dropper); 6139 freemsg(spare_mp); 6140 return (NULL); 6141 } 6142 fraghdr = ipp.ipp_fraghdr; 6143 is_v4 = B_FALSE; 6144 } 6145 6146 /* Anything to cleanup? */ 6147 6148 /* 6149 * This cleanup call could be put in a timer loop 6150 * but it may actually be just as reasonable a decision to 6151 * leave it here. The disadvantage is this only gets called when 6152 * frags are added. The advantage is that it is not 6153 * susceptible to race conditions like a time-based cleanup 6154 * may be. 6155 */ 6156 itpf_time = gethrestime_sec(); 6157 if (itpf_time >= frag->itpf_expire_hint) 6158 ipsec_fragcache_clean(frag); 6159 6160 /* Lookup to see if there is an existing entry */ 6161 6162 if (is_v4) 6163 i = IPSEC_FRAG_HASH_FUNC(iph->ipha_ident); 6164 else 6165 i = IPSEC_FRAG_HASH_FUNC(fraghdr->ip6f_ident); 6166 6167 for (fep = (frag->itpf_ptr)[i]; fep; fep = fep->itpfe_next) { 6168 if (is_v4) { 6169 ASSERT(iph != NULL); 6170 if ((fep->itpfe_id == iph->ipha_ident) && 6171 (fep->itpfe_src == iph->ipha_src) && 6172 (fep->itpfe_dst == iph->ipha_dst) && 6173 (fep->itpfe_proto == iph->ipha_protocol)) 6174 break; 6175 } else { 6176 ASSERT(fraghdr != NULL); 6177 ASSERT(fep != NULL); 6178 if ((fep->itpfe_id == fraghdr->ip6f_ident) && 6179 IN6_ARE_ADDR_EQUAL(&fep->itpfe_src6, 6180 &ip6h->ip6_src) && 6181 IN6_ARE_ADDR_EQUAL(&fep->itpfe_dst6, 6182 &ip6h->ip6_dst) && (fep->itpfe_proto == v6_proto)) 6183 break; 6184 } 6185 } 6186 6187 if (is_v4) { 6188 firstbyte = V4_FRAG_OFFSET(iph); 6189 lastbyte = firstbyte + ntohs(iph->ipha_length) - 6190 IPH_HDR_LENGTH(iph); 6191 last = (V4_MORE_FRAGS(iph) == 0); 6192 #ifdef FRAGCACHE_DEBUG 6193 cmn_err(CE_WARN, "V4 fragcache: firstbyte = %d, lastbyte = %d, " 6194 "last = %d, id = %d\n", firstbyte, lastbyte, last, 6195 iph->ipha_ident); 6196 #endif 6197 } else { 6198 firstbyte = ntohs(fraghdr->ip6f_offlg & IP6F_OFF_MASK); 6199 lastbyte = firstbyte + ntohs(ip6h->ip6_plen) + 6200 sizeof (ip6_t) - ip6_hdr_length; 6201 last = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG) == 0; 6202 #ifdef FRAGCACHE_DEBUG 6203 cmn_err(CE_WARN, "V6 fragcache: firstbyte = %d, lastbyte = %d, " 6204 "last = %d, id = %d, fraghdr = %p, spare_mp = %p\n", 6205 firstbyte, lastbyte, last, fraghdr->ip6f_ident, 6206 fraghdr, spare_mp); 6207 #endif 6208 } 6209 6210 /* check for bogus fragments and delete the entry */ 6211 if (firstbyte > 0 && firstbyte <= 8) { 6212 if (fep != NULL) 6213 (void) fragcache_delentry(i, fep, frag); 6214 mutex_exit(&frag->itpf_lock); 6215 ip_drop_packet(first_mp, inbound, NULL, NULL, 6216 &ipdrops_spd_malformed_frag, &spd_dropper); 6217 freemsg(spare_mp); 6218 return (NULL); 6219 } 6220 6221 /* Not found, allocate a new entry */ 6222 if (fep == NULL) { 6223 if (frag->itpf_freelist == NULL) { 6224 /* see if there is some space */ 6225 ipsec_fragcache_clean(frag); 6226 if (frag->itpf_freelist == NULL) { 6227 mutex_exit(&frag->itpf_lock); 6228 ip_drop_packet(first_mp, inbound, NULL, NULL, 6229 &ipdrops_spd_nomem, &spd_dropper); 6230 freemsg(spare_mp); 6231 return (NULL); 6232 } 6233 } 6234 6235 fep = frag->itpf_freelist; 6236 frag->itpf_freelist = fep->itpfe_next; 6237 6238 if (is_v4) { 6239 bcopy((caddr_t)&iph->ipha_src, (caddr_t)&fep->itpfe_src, 6240 sizeof (struct in_addr)); 6241 bcopy((caddr_t)&iph->ipha_dst, (caddr_t)&fep->itpfe_dst, 6242 sizeof (struct in_addr)); 6243 fep->itpfe_id = iph->ipha_ident; 6244 fep->itpfe_proto = iph->ipha_protocol; 6245 i = IPSEC_FRAG_HASH_FUNC(fep->itpfe_id); 6246 } else { 6247 bcopy((in6_addr_t *)&ip6h->ip6_src, 6248 (in6_addr_t *)&fep->itpfe_src6, 6249 sizeof (struct in6_addr)); 6250 bcopy((in6_addr_t *)&ip6h->ip6_dst, 6251 (in6_addr_t *)&fep->itpfe_dst6, 6252 sizeof (struct in6_addr)); 6253 fep->itpfe_id = fraghdr->ip6f_ident; 6254 fep->itpfe_proto = v6_proto; 6255 i = IPSEC_FRAG_HASH_FUNC(fep->itpfe_id); 6256 } 6257 itpf_time = gethrestime_sec(); 6258 fep->itpfe_exp = itpf_time + IPSEC_FRAG_TTL_MAX + 1; 6259 fep->itpfe_last = 0; 6260 fep->itpfe_fraglist = NULL; 6261 fep->itpfe_depth = 0; 6262 fep->itpfe_next = (frag->itpf_ptr)[i]; 6263 (frag->itpf_ptr)[i] = fep; 6264 6265 if (frag->itpf_expire_hint > fep->itpfe_exp) 6266 frag->itpf_expire_hint = fep->itpfe_exp; 6267 6268 } 6269 freemsg(spare_mp); 6270 6271 /* Insert it in the frag list */ 6272 /* List is in order by starting offset of fragments */ 6273 6274 prevmp = NULL; 6275 for (nmp = fep->itpfe_fraglist; nmp; nmp = nmp->b_next) { 6276 ipha_t *niph; 6277 ipha_t *oniph; 6278 ip6_t *nip6h; 6279 ip6_pkt_t nipp; 6280 ip6_frag_t *nfraghdr; 6281 uint16_t nip6_hdr_length; 6282 uint8_t *nv6_proto_p; 6283 int nfirstbyte, nlastbyte; 6284 char *data, *ndata; 6285 mblk_t *nspare_mp = NULL; 6286 mblk_t *ndata_mp = (inbound ? nmp->b_cont : nmp); 6287 int hdr_len; 6288 6289 oniph = (ipha_t *)mp->b_rptr; 6290 nip6h = NULL; 6291 niph = NULL; 6292 6293 /* 6294 * Determine outer header type and length and set 6295 * pointers appropriately 6296 */ 6297 6298 if (IPH_HDR_VERSION(oniph) == IPV4_VERSION) { 6299 hdr_len = ((outer_hdr_len != 0) ? 6300 IPH_HDR_LENGTH(oiph) : 0); 6301 niph = (ipha_t *)(ndata_mp->b_rptr + hdr_len); 6302 } else { 6303 ASSERT(IPH_HDR_VERSION(oniph) == IPV6_VERSION); 6304 if ((nspare_mp = msgpullup(ndata_mp, -1)) == NULL) { 6305 mutex_exit(&frag->itpf_lock); 6306 ip_drop_packet_chain(nmp, inbound, NULL, NULL, 6307 &ipdrops_spd_nomem, &spd_dropper); 6308 return (NULL); 6309 } 6310 nip6h = (ip6_t *)nspare_mp->b_rptr; 6311 (void) ip_hdr_length_nexthdr_v6(nspare_mp, nip6h, 6312 &nip6_hdr_length, &v6_proto_p); 6313 hdr_len = ((outer_hdr_len != 0) ? nip6_hdr_length : 0); 6314 } 6315 6316 /* 6317 * Determine inner header type and length and set 6318 * pointers appropriately 6319 */ 6320 6321 if (is_v4) { 6322 if (niph == NULL) { 6323 /* Was v6 outer */ 6324 niph = (ipha_t *)(ndata_mp->b_rptr + hdr_len); 6325 } 6326 nfirstbyte = V4_FRAG_OFFSET(niph); 6327 nlastbyte = nfirstbyte + ntohs(niph->ipha_length) - 6328 IPH_HDR_LENGTH(niph); 6329 } else { 6330 if ((nspare_mp == NULL) && 6331 ((nspare_mp = msgpullup(ndata_mp, -1)) == NULL)) { 6332 mutex_exit(&frag->itpf_lock); 6333 ip_drop_packet_chain(nmp, inbound, NULL, NULL, 6334 &ipdrops_spd_nomem, &spd_dropper); 6335 return (NULL); 6336 } 6337 nip6h = (ip6_t *)(nspare_mp->b_rptr + hdr_len); 6338 if (!ip_hdr_length_nexthdr_v6(nspare_mp, nip6h, 6339 &nip6_hdr_length, &nv6_proto_p)) { 6340 mutex_exit(&frag->itpf_lock); 6341 ip_drop_packet_chain(nmp, inbound, NULL, NULL, 6342 &ipdrops_spd_malformed_frag, &spd_dropper); 6343 ipsec_freemsg_chain(nspare_mp); 6344 return (NULL); 6345 } 6346 bzero(&nipp, sizeof (nipp)); 6347 (void) ip_find_hdr_v6(nspare_mp, nip6h, &nipp, NULL); 6348 nfraghdr = nipp.ipp_fraghdr; 6349 nfirstbyte = ntohs(nfraghdr->ip6f_offlg & 6350 IP6F_OFF_MASK); 6351 nlastbyte = nfirstbyte + ntohs(nip6h->ip6_plen) + 6352 sizeof (ip6_t) - nip6_hdr_length; 6353 } 6354 ipsec_freemsg_chain(nspare_mp); 6355 6356 /* Check for overlapping fragments */ 6357 if (firstbyte >= nfirstbyte && firstbyte < nlastbyte) { 6358 /* 6359 * Overlap Check: 6360 * ~~~~--------- # Check if the newly 6361 * ~ ndata_mp| # received fragment 6362 * ~~~~--------- # overlaps with the 6363 * ---------~~~~~~ # current fragment. 6364 * | mp ~ 6365 * ---------~~~~~~ 6366 */ 6367 if (is_v4) { 6368 data = (char *)iph + IPH_HDR_LENGTH(iph) + 6369 firstbyte - nfirstbyte; 6370 ndata = (char *)niph + IPH_HDR_LENGTH(niph); 6371 } else { 6372 data = (char *)ip6h + 6373 nip6_hdr_length + firstbyte - 6374 nfirstbyte; 6375 ndata = (char *)nip6h + nip6_hdr_length; 6376 } 6377 if (bcmp(data, ndata, MIN(lastbyte, nlastbyte) 6378 - firstbyte)) { 6379 /* Overlapping data does not match */ 6380 (void) fragcache_delentry(i, fep, frag); 6381 mutex_exit(&frag->itpf_lock); 6382 ip_drop_packet(first_mp, inbound, NULL, NULL, 6383 &ipdrops_spd_overlap_frag, &spd_dropper); 6384 return (NULL); 6385 } 6386 /* Part of defense for jolt2.c fragmentation attack */ 6387 if (firstbyte >= nfirstbyte && lastbyte <= nlastbyte) { 6388 /* 6389 * Check for identical or subset fragments: 6390 * ---------- ~~~~--------~~~~~ 6391 * | nmp | or ~ nmp ~ 6392 * ---------- ~~~~--------~~~~~ 6393 * ---------- ------ 6394 * | mp | | mp | 6395 * ---------- ------ 6396 */ 6397 mutex_exit(&frag->itpf_lock); 6398 ip_drop_packet(first_mp, inbound, NULL, NULL, 6399 &ipdrops_spd_evil_frag, &spd_dropper); 6400 return (NULL); 6401 } 6402 6403 } 6404 6405 /* Correct location for this fragment? */ 6406 if (firstbyte <= nfirstbyte) { 6407 /* 6408 * Check if the tail end of the new fragment overlaps 6409 * with the head of the current fragment. 6410 * --------~~~~~~~ 6411 * | nmp ~ 6412 * --------~~~~~~~ 6413 * ~~~~~-------- 6414 * ~ mp | 6415 * ~~~~~-------- 6416 */ 6417 if (lastbyte > nfirstbyte) { 6418 /* Fragments overlap */ 6419 data = (char *)iph + IPH_HDR_LENGTH(iph) + 6420 firstbyte - nfirstbyte; 6421 ndata = (char *)niph + IPH_HDR_LENGTH(niph); 6422 if (is_v4) { 6423 data = (char *)iph + 6424 IPH_HDR_LENGTH(iph) + firstbyte - 6425 nfirstbyte; 6426 ndata = (char *)niph + 6427 IPH_HDR_LENGTH(niph); 6428 } else { 6429 data = (char *)ip6h + 6430 nip6_hdr_length + firstbyte - 6431 nfirstbyte; 6432 ndata = (char *)nip6h + nip6_hdr_length; 6433 } 6434 if (bcmp(data, ndata, MIN(lastbyte, nlastbyte) 6435 - nfirstbyte)) { 6436 /* Overlap mismatch */ 6437 (void) fragcache_delentry(i, fep, frag); 6438 mutex_exit(&frag->itpf_lock); 6439 ip_drop_packet(first_mp, inbound, NULL, 6440 NULL, &ipdrops_spd_overlap_frag, 6441 &spd_dropper); 6442 return (NULL); 6443 } 6444 } 6445 6446 /* 6447 * Fragment does not illegally overlap and can now 6448 * be inserted into the chain 6449 */ 6450 break; 6451 } 6452 6453 prevmp = nmp; 6454 } 6455 first_mp->b_next = nmp; 6456 6457 if (prevmp == NULL) { 6458 fep->itpfe_fraglist = first_mp; 6459 } else { 6460 prevmp->b_next = first_mp; 6461 } 6462 if (last) 6463 fep->itpfe_last = 1; 6464 6465 /* Part of defense for jolt2.c fragmentation attack */ 6466 if (++(fep->itpfe_depth) > IPSEC_MAX_FRAGS) { 6467 (void) fragcache_delentry(i, fep, frag); 6468 mutex_exit(&frag->itpf_lock); 6469 ip_drop_packet(first_mp, inbound, NULL, NULL, 6470 &ipdrops_spd_max_frags, &spd_dropper); 6471 return (NULL); 6472 } 6473 6474 /* Check for complete packet */ 6475 6476 if (!fep->itpfe_last) { 6477 mutex_exit(&frag->itpf_lock); 6478 #ifdef FRAGCACHE_DEBUG 6479 cmn_err(CE_WARN, "Fragment cached, not last.\n"); 6480 #endif 6481 return (NULL); 6482 } 6483 6484 #ifdef FRAGCACHE_DEBUG 6485 cmn_err(CE_WARN, "Last fragment cached.\n"); 6486 cmn_err(CE_WARN, "mp = %p, first_mp = %p.\n", mp, first_mp); 6487 #endif 6488 6489 offset = 0; 6490 for (mp = fep->itpfe_fraglist; mp; mp = mp->b_next) { 6491 mblk_t *data_mp = (inbound ? mp->b_cont : mp); 6492 int hdr_len; 6493 6494 oiph = (ipha_t *)data_mp->b_rptr; 6495 ip6h = NULL; 6496 iph = NULL; 6497 6498 spare_mp = NULL; 6499 if (IPH_HDR_VERSION(oiph) == IPV4_VERSION) { 6500 hdr_len = ((outer_hdr_len != 0) ? 6501 IPH_HDR_LENGTH(oiph) : 0); 6502 iph = (ipha_t *)(data_mp->b_rptr + hdr_len); 6503 } else { 6504 ASSERT(IPH_HDR_VERSION(oiph) == IPV6_VERSION); 6505 if ((spare_mp = msgpullup(data_mp, -1)) == NULL) { 6506 mutex_exit(&frag->itpf_lock); 6507 ip_drop_packet_chain(mp, inbound, NULL, NULL, 6508 &ipdrops_spd_nomem, &spd_dropper); 6509 return (NULL); 6510 } 6511 ip6h = (ip6_t *)spare_mp->b_rptr; 6512 (void) ip_hdr_length_nexthdr_v6(spare_mp, ip6h, 6513 &ip6_hdr_length, &v6_proto_p); 6514 hdr_len = ((outer_hdr_len != 0) ? ip6_hdr_length : 0); 6515 } 6516 6517 /* Calculate current fragment start/end */ 6518 if (is_v4) { 6519 if (iph == NULL) { 6520 /* Was v6 outer */ 6521 iph = (ipha_t *)(data_mp->b_rptr + hdr_len); 6522 } 6523 firstbyte = V4_FRAG_OFFSET(iph); 6524 lastbyte = firstbyte + ntohs(iph->ipha_length) - 6525 IPH_HDR_LENGTH(iph); 6526 } else { 6527 if ((spare_mp == NULL) && 6528 ((spare_mp = msgpullup(data_mp, -1)) == NULL)) { 6529 mutex_exit(&frag->itpf_lock); 6530 ip_drop_packet_chain(mp, inbound, NULL, NULL, 6531 &ipdrops_spd_nomem, &spd_dropper); 6532 return (NULL); 6533 } 6534 ip6h = (ip6_t *)(spare_mp->b_rptr + hdr_len); 6535 if (!ip_hdr_length_nexthdr_v6(spare_mp, ip6h, 6536 &ip6_hdr_length, &v6_proto_p)) { 6537 mutex_exit(&frag->itpf_lock); 6538 ip_drop_packet_chain(mp, inbound, NULL, NULL, 6539 &ipdrops_spd_malformed_frag, &spd_dropper); 6540 ipsec_freemsg_chain(spare_mp); 6541 return (NULL); 6542 } 6543 v6_proto = *v6_proto_p; 6544 bzero(&ipp, sizeof (ipp)); 6545 (void) ip_find_hdr_v6(spare_mp, ip6h, &ipp, NULL); 6546 fraghdr = ipp.ipp_fraghdr; 6547 firstbyte = ntohs(fraghdr->ip6f_offlg & 6548 IP6F_OFF_MASK); 6549 lastbyte = firstbyte + ntohs(ip6h->ip6_plen) + 6550 sizeof (ip6_t) - ip6_hdr_length; 6551 } 6552 6553 /* 6554 * If this fragment is greater than current offset, 6555 * we have a missing fragment so return NULL 6556 */ 6557 if (firstbyte > offset) { 6558 mutex_exit(&frag->itpf_lock); 6559 #ifdef FRAGCACHE_DEBUG 6560 /* 6561 * Note, this can happen when the last frag 6562 * gets sent through because it is smaller 6563 * than the MTU. It is not necessarily an 6564 * error condition. 6565 */ 6566 cmn_err(CE_WARN, "Frag greater than offset! : " 6567 "missing fragment: firstbyte = %d, offset = %d, " 6568 "mp = %p\n", firstbyte, offset, mp); 6569 #endif 6570 ipsec_freemsg_chain(spare_mp); 6571 return (NULL); 6572 } 6573 6574 /* 6575 * If we are at the last fragment, we have the complete 6576 * packet, so rechain things and return it to caller 6577 * for processing 6578 */ 6579 6580 if ((is_v4 && !V4_MORE_FRAGS(iph)) || 6581 (!is_v4 && !(fraghdr->ip6f_offlg & IP6F_MORE_FRAG))) { 6582 mp = fep->itpfe_fraglist; 6583 fep->itpfe_fraglist = NULL; 6584 (void) fragcache_delentry(i, fep, frag); 6585 mutex_exit(&frag->itpf_lock); 6586 6587 if ((is_v4 && (firstbyte + ntohs(iph->ipha_length) > 6588 65535)) || (!is_v4 && (firstbyte + 6589 ntohs(ip6h->ip6_plen) > 65535))) { 6590 /* It is an invalid "ping-o-death" packet */ 6591 /* Discard it */ 6592 ip_drop_packet_chain(mp, inbound, NULL, NULL, 6593 &ipdrops_spd_evil_frag, &spd_dropper); 6594 ipsec_freemsg_chain(spare_mp); 6595 return (NULL); 6596 } 6597 #ifdef FRAGCACHE_DEBUG 6598 cmn_err(CE_WARN, "Fragcache returning mp = %p, " 6599 "mp->b_next = %p", mp, mp->b_next); 6600 #endif 6601 ipsec_freemsg_chain(spare_mp); 6602 /* 6603 * For inbound case, mp has ipsec_in b_next'd chain 6604 * For outbound case, it is just data mp chain 6605 */ 6606 return (mp); 6607 } 6608 ipsec_freemsg_chain(spare_mp); 6609 6610 /* 6611 * Update new ending offset if this 6612 * fragment extends the packet 6613 */ 6614 if (offset < lastbyte) 6615 offset = lastbyte; 6616 } 6617 6618 mutex_exit(&frag->itpf_lock); 6619 6620 /* Didn't find last fragment, so return NULL */ 6621 return (NULL); 6622 } 6623 6624 static void 6625 ipsec_fragcache_clean(ipsec_fragcache_t *frag) 6626 { 6627 ipsec_fragcache_entry_t *fep; 6628 int i; 6629 ipsec_fragcache_entry_t *earlyfep = NULL; 6630 time_t itpf_time; 6631 int earlyexp; 6632 int earlyi = 0; 6633 6634 ASSERT(MUTEX_HELD(&frag->itpf_lock)); 6635 6636 itpf_time = gethrestime_sec(); 6637 earlyexp = itpf_time + 10000; 6638 6639 for (i = 0; i < IPSEC_FRAG_HASH_SLOTS; i++) { 6640 fep = (frag->itpf_ptr)[i]; 6641 while (fep) { 6642 if (fep->itpfe_exp < itpf_time) { 6643 /* found */ 6644 fep = fragcache_delentry(i, fep, frag); 6645 } else { 6646 if (fep->itpfe_exp < earlyexp) { 6647 earlyfep = fep; 6648 earlyexp = fep->itpfe_exp; 6649 earlyi = i; 6650 } 6651 fep = fep->itpfe_next; 6652 } 6653 } 6654 } 6655 6656 frag->itpf_expire_hint = earlyexp; 6657 6658 /* if (!found) */ 6659 if (frag->itpf_freelist == NULL) 6660 (void) fragcache_delentry(earlyi, earlyfep, frag); 6661 } 6662 6663 static ipsec_fragcache_entry_t * 6664 fragcache_delentry(int slot, ipsec_fragcache_entry_t *fep, 6665 ipsec_fragcache_t *frag) 6666 { 6667 ipsec_fragcache_entry_t *targp; 6668 ipsec_fragcache_entry_t *nextp = fep->itpfe_next; 6669 6670 ASSERT(MUTEX_HELD(&frag->itpf_lock)); 6671 6672 /* Free up any fragment list still in cache entry */ 6673 ipsec_freemsg_chain(fep->itpfe_fraglist); 6674 6675 targp = (frag->itpf_ptr)[slot]; 6676 ASSERT(targp != 0); 6677 6678 if (targp == fep) { 6679 /* unlink from head of hash chain */ 6680 (frag->itpf_ptr)[slot] = nextp; 6681 /* link into free list */ 6682 fep->itpfe_next = frag->itpf_freelist; 6683 frag->itpf_freelist = fep; 6684 return (nextp); 6685 } 6686 6687 /* maybe should use double linked list to make update faster */ 6688 /* must be past front of chain */ 6689 while (targp) { 6690 if (targp->itpfe_next == fep) { 6691 /* unlink from hash chain */ 6692 targp->itpfe_next = nextp; 6693 /* link into free list */ 6694 fep->itpfe_next = frag->itpf_freelist; 6695 frag->itpf_freelist = fep; 6696 return (nextp); 6697 } 6698 targp = targp->itpfe_next; 6699 ASSERT(targp != 0); 6700 } 6701 /* NOTREACHED */ 6702 return (NULL); 6703 } 6704