1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * IPsec Security Policy Database. 30 * 31 * This module maintains the SPD and provides routines used by ip and ip6 32 * to apply IPsec policy to inbound and outbound datagrams. 33 */ 34 35 #include <sys/types.h> 36 #include <sys/stream.h> 37 #include <sys/stropts.h> 38 #include <sys/sysmacros.h> 39 #include <sys/strsubr.h> 40 #include <sys/strlog.h> 41 #include <sys/cmn_err.h> 42 #include <sys/zone.h> 43 44 #include <sys/systm.h> 45 #include <sys/param.h> 46 #include <sys/kmem.h> 47 #include <sys/ddi.h> 48 49 #include <sys/crypto/api.h> 50 51 #include <inet/common.h> 52 #include <inet/mi.h> 53 54 #include <netinet/ip6.h> 55 #include <netinet/icmp6.h> 56 #include <netinet/udp.h> 57 58 #include <inet/ip.h> 59 #include <inet/ip6.h> 60 61 #include <net/pfkeyv2.h> 62 #include <net/pfpolicy.h> 63 #include <inet/ipsec_info.h> 64 #include <inet/sadb.h> 65 #include <inet/ipsec_impl.h> 66 #include <inet/ipsecah.h> 67 #include <inet/ipsecesp.h> 68 #include <inet/ipdrop.h> 69 #include <inet/ipclassifier.h> 70 #include <inet/tun.h> 71 72 static void ipsec_update_present_flags(); 73 static ipsec_act_t *ipsec_act_wildcard_expand(ipsec_act_t *, uint_t *); 74 static void ipsec_out_free(void *); 75 static void ipsec_in_free(void *); 76 static mblk_t *ipsec_attach_global_policy(mblk_t *, conn_t *, 77 ipsec_selector_t *); 78 static mblk_t *ipsec_apply_global_policy(mblk_t *, conn_t *, 79 ipsec_selector_t *); 80 static mblk_t *ipsec_check_ipsecin_policy(queue_t *, mblk_t *, 81 ipsec_policy_t *, ipha_t *, ip6_t *, uint64_t); 82 static void ipsec_in_release_refs(ipsec_in_t *); 83 static void ipsec_out_release_refs(ipsec_out_t *); 84 static void ipsec_action_reclaim(void *); 85 static void ipsid_init(void); 86 static void ipsid_fini(void); 87 88 /* sel_flags values for ipsec_init_inbound_sel(). */ 89 #define SEL_NONE 0x0000 90 #define SEL_PORT_POLICY 0x0001 91 #define SEL_IS_ICMP 0x0002 92 #define SEL_TUNNEL_MODE 0x0004 93 94 /* Return values for ipsec_init_inbound_sel(). */ 95 typedef enum { SELRET_NOMEM, SELRET_BADPKT, SELRET_SUCCESS, SELRET_TUNFRAG} 96 selret_t; 97 98 static selret_t ipsec_init_inbound_sel(ipsec_selector_t *, mblk_t *, 99 ipha_t *, ip6_t *, uint8_t); 100 101 static boolean_t ipsec_check_ipsecin_action(struct ipsec_in_s *, mblk_t *, 102 struct ipsec_action_s *, ipha_t *ipha, ip6_t *ip6h, const char **, 103 kstat_named_t **); 104 static void ipsec_unregister_prov_update(void); 105 static boolean_t ipsec_compare_action(ipsec_policy_t *, ipsec_policy_t *); 106 static uint32_t selector_hash(ipsec_selector_t *, ipsec_policy_root_t *); 107 static int tunnel_compare(const void *, const void *); 108 static void ipsec_freemsg_chain(mblk_t *); 109 static void ip_drop_packet_chain(mblk_t *, boolean_t, ill_t *, ire_t *, 110 struct kstat_named *, ipdropper_t *); 111 112 /* 113 * Policy rule index generator. We assume this won't wrap in the 114 * lifetime of a system. If we make 2^20 policy changes per second, 115 * this will last 2^44 seconds, or roughly 500,000 years, so we don't 116 * have to worry about reusing policy index values. 117 * 118 * Protected by ipsec_conf_lock. 119 */ 120 uint64_t ipsec_next_policy_index = 1; 121 122 /* 123 * Active & Inactive system policy roots 124 */ 125 static ipsec_policy_head_t system_policy; 126 static ipsec_policy_head_t inactive_policy; 127 128 /* 129 * Tunnel policies - AVL tree indexed by tunnel name. 130 */ 131 krwlock_t tunnel_policy_lock; 132 uint64_t tunnel_policy_gen; /* To keep track of updates w/o searches. */ 133 avl_tree_t tunnel_policies; 134 135 /* Packet dropper for generic SPD drops. */ 136 ipdropper_t spd_dropper; 137 138 /* 139 * For now, use a trivially sized hash table for actions. 140 * In the future we can add the structure canonicalization necessary 141 * to get the hash function to behave correctly.. 142 */ 143 #define IPSEC_ACTION_HASH_SIZE 1 144 145 /* 146 * Selector hash table is statically sized at module load time. 147 * we default to 251 buckets, which is the largest prime number under 255 148 */ 149 150 #define IPSEC_SPDHASH_DEFAULT 251 151 uint32_t ipsec_spd_hashsize = 0; 152 153 /* SPD hash-size tunable per tunnel. */ 154 #define TUN_SPDHASH_DEFAULT 5 155 uint32_t tun_spd_hashsize; 156 157 158 #define IPSEC_SEL_NOHASH ((uint32_t)(~0)) 159 160 static HASH_HEAD(ipsec_action_s) ipsec_action_hash[IPSEC_ACTION_HASH_SIZE]; 161 static HASH_HEAD(ipsec_sel) *ipsec_sel_hash; 162 163 static kmem_cache_t *ipsec_action_cache; 164 static kmem_cache_t *ipsec_sel_cache; 165 static kmem_cache_t *ipsec_pol_cache; 166 static kmem_cache_t *ipsec_info_cache; 167 168 boolean_t ipsec_inbound_v4_policy_present = B_FALSE; 169 boolean_t ipsec_outbound_v4_policy_present = B_FALSE; 170 boolean_t ipsec_inbound_v6_policy_present = B_FALSE; 171 boolean_t ipsec_outbound_v6_policy_present = B_FALSE; 172 173 /* Frag cache prototypes */ 174 static void ipsec_fragcache_clean(ipsec_fragcache_t *); 175 static ipsec_fragcache_entry_t *fragcache_delentry(int, 176 ipsec_fragcache_entry_t *, ipsec_fragcache_t *); 177 boolean_t ipsec_fragcache_init(ipsec_fragcache_t *); 178 void ipsec_fragcache_uninit(ipsec_fragcache_t *); 179 mblk_t *ipsec_fragcache_add(ipsec_fragcache_t *, mblk_t *, mblk_t *, int); 180 181 /* 182 * Because policy needs to know what algorithms are supported, keep the 183 * lists of algorithms here. 184 */ 185 186 kmutex_t alg_lock; 187 krwlock_t itp_get_byaddr_rw_lock; 188 ipsec_tun_pol_t *(*itp_get_byaddr)(uint32_t *, uint32_t *, int); 189 uint8_t ipsec_nalgs[IPSEC_NALGTYPES]; 190 ipsec_alginfo_t *ipsec_alglists[IPSEC_NALGTYPES][IPSEC_MAX_ALGS]; 191 uint8_t ipsec_sortlist[IPSEC_NALGTYPES][IPSEC_MAX_ALGS]; 192 ipsec_algs_exec_mode_t ipsec_algs_exec_mode[IPSEC_NALGTYPES]; 193 static crypto_notify_handle_t prov_update_handle = NULL; 194 195 int ipsec_hdr_pullup_needed = 0; 196 int ipsec_weird_null_inbound_policy = 0; 197 198 #define ALGBITS_ROUND_DOWN(x, align) (((x)/(align))*(align)) 199 #define ALGBITS_ROUND_UP(x, align) ALGBITS_ROUND_DOWN((x)+(align)-1, align) 200 201 /* 202 * Inbound traffic should have matching identities for both SA's. 203 */ 204 205 #define SA_IDS_MATCH(sa1, sa2) \ 206 (((sa1) == NULL) || ((sa2) == NULL) || \ 207 (((sa1)->ipsa_src_cid == (sa2)->ipsa_src_cid) && \ 208 (((sa1)->ipsa_dst_cid == (sa2)->ipsa_dst_cid)))) 209 210 /* 211 * IPv4 Fragments 212 */ 213 #define IS_V4_FRAGMENT(ipha_fragment_offset_and_flags) \ 214 (((ntohs(ipha_fragment_offset_and_flags) & IPH_OFFSET) != 0) || \ 215 ((ntohs(ipha_fragment_offset_and_flags) & IPH_MF) != 0)) 216 217 /* 218 * IPv6 Fragments 219 */ 220 #define IS_V6_FRAGMENT(ipp) (ipp.ipp_fields & IPPF_FRAGHDR) 221 222 /* 223 * Policy failure messages. 224 */ 225 static char *ipsec_policy_failure_msgs[] = { 226 227 /* IPSEC_POLICY_NOT_NEEDED */ 228 "%s: Dropping the datagram because the incoming packet " 229 "is %s, but the recipient expects clear; Source %s, " 230 "Destination %s.\n", 231 232 /* IPSEC_POLICY_MISMATCH */ 233 "%s: Policy Failure for the incoming packet (%s); Source %s, " 234 "Destination %s.\n", 235 236 /* IPSEC_POLICY_AUTH_NOT_NEEDED */ 237 "%s: Authentication present while not expected in the " 238 "incoming %s packet; Source %s, Destination %s.\n", 239 240 /* IPSEC_POLICY_ENCR_NOT_NEEDED */ 241 "%s: Encryption present while not expected in the " 242 "incoming %s packet; Source %s, Destination %s.\n", 243 244 /* IPSEC_POLICY_SE_NOT_NEEDED */ 245 "%s: Self-Encapsulation present while not expected in the " 246 "incoming %s packet; Source %s, Destination %s.\n", 247 }; 248 /* 249 * Have a counter for every possible policy message in the previous array. 250 */ 251 static uint32_t ipsec_policy_failure_count[IPSEC_POLICY_MAX]; 252 /* Time since last ipsec policy failure that printed a message. */ 253 hrtime_t ipsec_policy_failure_last = 0; 254 255 /* 256 * General overviews: 257 * 258 * Locking: 259 * 260 * All of the system policy structures are protected by a single 261 * rwlock, ipsec_conf_lock. These structures are threaded in a 262 * fairly complex fashion and are not expected to change on a 263 * regular basis, so this should not cause scaling/contention 264 * problems. As a result, policy checks should (hopefully) be MT-hot. 265 * 266 * Allocation policy: 267 * 268 * We use custom kmem cache types for the various 269 * bits & pieces of the policy data structures. All allocations 270 * use KM_NOSLEEP instead of KM_SLEEP for policy allocation. The 271 * policy table is of potentially unbounded size, so we don't 272 * want to provide a way to hog all system memory with policy 273 * entries.. 274 */ 275 276 /* Convenient functions for freeing or dropping a b_next linked mblk chain */ 277 278 /* Free all messages in an mblk chain */ 279 static void 280 ipsec_freemsg_chain(mblk_t *mp) 281 { 282 mblk_t *mpnext; 283 while (mp != NULL) { 284 ASSERT(mp->b_prev == NULL); 285 mpnext = mp->b_next; 286 mp->b_next = NULL; 287 freemsg(mp); /* Always works, even if NULL */ 288 mp = mpnext; 289 } 290 } 291 292 /* ip_drop all messages in an mblk chain */ 293 static void 294 ip_drop_packet_chain(mblk_t *mp, boolean_t inbound, ill_t *arriving, 295 ire_t *outbound_ire, struct kstat_named *counter, ipdropper_t *who_called) 296 { 297 mblk_t *mpnext; 298 while (mp != NULL) { 299 ASSERT(mp->b_prev == NULL); 300 mpnext = mp->b_next; 301 mp->b_next = NULL; 302 ip_drop_packet(mp, inbound, arriving, outbound_ire, counter, 303 who_called); 304 mp = mpnext; 305 } 306 } 307 308 /* 309 * AVL tree comparison function. 310 * the in-kernel avl assumes unique keys for all objects. 311 * Since sometimes policy will duplicate rules, we may insert 312 * multiple rules with the same rule id, so we need a tie-breaker. 313 */ 314 static int 315 ipsec_policy_cmpbyid(const void *a, const void *b) 316 { 317 const ipsec_policy_t *ipa, *ipb; 318 uint64_t idxa, idxb; 319 320 ipa = (const ipsec_policy_t *)a; 321 ipb = (const ipsec_policy_t *)b; 322 idxa = ipa->ipsp_index; 323 idxb = ipb->ipsp_index; 324 325 if (idxa < idxb) 326 return (-1); 327 if (idxa > idxb) 328 return (1); 329 /* 330 * Tie-breaker #1: All installed policy rules have a non-NULL 331 * ipsl_sel (selector set), so an entry with a NULL ipsp_sel is not 332 * actually in-tree but rather a template node being used in 333 * an avl_find query; see ipsec_policy_delete(). This gives us 334 * a placeholder in the ordering just before the the first entry with 335 * a key >= the one we're looking for, so we can walk forward from 336 * that point to get the remaining entries with the same id. 337 */ 338 if ((ipa->ipsp_sel == NULL) && (ipb->ipsp_sel != NULL)) 339 return (-1); 340 if ((ipb->ipsp_sel == NULL) && (ipa->ipsp_sel != NULL)) 341 return (1); 342 /* 343 * At most one of the arguments to the comparison should have a 344 * NULL selector pointer; if not, the tree is broken. 345 */ 346 ASSERT(ipa->ipsp_sel != NULL); 347 ASSERT(ipb->ipsp_sel != NULL); 348 /* 349 * Tie-breaker #2: use the virtual address of the policy node 350 * to arbitrarily break ties. Since we use the new tree node in 351 * the avl_find() in ipsec_insert_always, the new node will be 352 * inserted into the tree in the right place in the sequence. 353 */ 354 if (ipa < ipb) 355 return (-1); 356 if (ipa > ipb) 357 return (1); 358 return (0); 359 } 360 361 void 362 ipsec_polhead_free_table(ipsec_policy_head_t *iph) 363 { 364 int dir; 365 366 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 367 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 368 369 if (ipr->ipr_hash == NULL) 370 continue; 371 372 kmem_free(ipr->ipr_hash, ipr->ipr_nchains * 373 sizeof (ipsec_policy_hash_t)); 374 } 375 } 376 377 void 378 ipsec_polhead_destroy(ipsec_policy_head_t *iph) 379 { 380 int dir; 381 382 avl_destroy(&iph->iph_rulebyid); 383 rw_destroy(&iph->iph_lock); 384 385 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 386 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 387 int chain; 388 389 for (chain = 0; chain < ipr->ipr_nchains; chain++) 390 mutex_destroy(&(ipr->ipr_hash[chain].hash_lock)); 391 392 } 393 ipsec_polhead_free_table(iph); 394 } 395 396 /* 397 * Module unload hook. 398 */ 399 void 400 ipsec_policy_destroy(void) 401 { 402 int i; 403 void *cookie; 404 ipsec_tun_pol_t *node; 405 406 ip_drop_unregister(&spd_dropper); 407 ip_drop_destroy(); 408 409 rw_enter(&tunnel_policy_lock, RW_WRITER); 410 /* 411 * It's possible we can just ASSERT() the tree is empty. After all, 412 * we aren't called until IP is ready to unload (and presumably all 413 * tunnels have been unplumbed). But we'll play it safe for now, the 414 * loop will just exit immediately if it's empty. 415 */ 416 cookie = NULL; 417 while ((node = (ipsec_tun_pol_t *) 418 avl_destroy_nodes(&tunnel_policies, &cookie)) != NULL) { 419 ITP_REFRELE(node); 420 } 421 avl_destroy(&tunnel_policies); 422 rw_exit(&tunnel_policy_lock); 423 rw_destroy(&tunnel_policy_lock); 424 ipsec_polhead_destroy(&system_policy); 425 ipsec_polhead_destroy(&inactive_policy); 426 427 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) 428 mutex_destroy(&(ipsec_action_hash[i].hash_lock)); 429 430 for (i = 0; i < ipsec_spd_hashsize; i++) 431 mutex_destroy(&(ipsec_sel_hash[i].hash_lock)); 432 433 ipsec_unregister_prov_update(); 434 435 mutex_destroy(&alg_lock); 436 437 kmem_cache_destroy(ipsec_action_cache); 438 kmem_cache_destroy(ipsec_sel_cache); 439 kmem_cache_destroy(ipsec_pol_cache); 440 kmem_cache_destroy(ipsec_info_cache); 441 ipsid_gc(); 442 ipsid_fini(); 443 } 444 445 446 /* 447 * Called when table allocation fails to free the table. 448 */ 449 static int 450 ipsec_alloc_tables_failed() 451 { 452 if (ipsec_sel_hash != NULL) { 453 kmem_free(ipsec_sel_hash, ipsec_spd_hashsize * 454 sizeof (*ipsec_sel_hash)); 455 ipsec_sel_hash = NULL; 456 } 457 ipsec_polhead_free_table(&system_policy); 458 ipsec_polhead_free_table(&inactive_policy); 459 460 return (ENOMEM); 461 } 462 463 /* 464 * Attempt to allocate the tables in a single policy head. 465 * Return nonzero on failure after cleaning up any work in progress. 466 */ 467 int 468 ipsec_alloc_table(ipsec_policy_head_t *iph, int nchains, int kmflag, 469 boolean_t global_cleanup) 470 { 471 int dir; 472 473 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 474 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 475 476 ipr->ipr_nchains = nchains; 477 ipr->ipr_hash = kmem_zalloc(nchains * 478 sizeof (ipsec_policy_hash_t), kmflag); 479 if (ipr->ipr_hash == NULL) 480 return (global_cleanup ? ipsec_alloc_tables_failed() : 481 ENOMEM); 482 } 483 return (0); 484 } 485 486 /* 487 * Attempt to allocate the various tables. Return nonzero on failure 488 * after cleaning up any work in progress. 489 */ 490 static int 491 ipsec_alloc_tables(int kmflag) 492 { 493 int error; 494 495 error = ipsec_alloc_table(&system_policy, ipsec_spd_hashsize, kmflag, 496 B_TRUE); 497 if (error != 0) 498 return (error); 499 500 error = ipsec_alloc_table(&inactive_policy, ipsec_spd_hashsize, kmflag, 501 B_TRUE); 502 if (error != 0) 503 return (error); 504 505 ipsec_sel_hash = kmem_zalloc(ipsec_spd_hashsize * 506 sizeof (*ipsec_sel_hash), kmflag); 507 508 if (ipsec_sel_hash == NULL) 509 return (ipsec_alloc_tables_failed()); 510 511 return (0); 512 } 513 514 /* 515 * After table allocation, initialize a policy head. 516 */ 517 void 518 ipsec_polhead_init(ipsec_policy_head_t *iph, int nchains) 519 { 520 int dir, chain; 521 522 rw_init(&iph->iph_lock, NULL, RW_DEFAULT, NULL); 523 avl_create(&iph->iph_rulebyid, ipsec_policy_cmpbyid, 524 sizeof (ipsec_policy_t), offsetof(ipsec_policy_t, ipsp_byid)); 525 526 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 527 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 528 ipr->ipr_nchains = nchains; 529 530 for (chain = 0; chain < nchains; chain++) { 531 mutex_init(&(ipr->ipr_hash[chain].hash_lock), 532 NULL, MUTEX_DEFAULT, NULL); 533 } 534 } 535 } 536 537 /* 538 * Module load hook. 539 */ 540 void 541 ipsec_policy_init() 542 { 543 int i; 544 545 /* 546 * Make two attempts to allocate policy hash tables; try it at 547 * the "preferred" size (may be set in /etc/system) first, 548 * then fall back to the default size. 549 */ 550 if (ipsec_spd_hashsize == 0) 551 ipsec_spd_hashsize = IPSEC_SPDHASH_DEFAULT; 552 553 if (ipsec_alloc_tables(KM_NOSLEEP) != 0) { 554 cmn_err(CE_WARN, 555 "Unable to allocate %d entry IPsec policy hash table", 556 ipsec_spd_hashsize); 557 ipsec_spd_hashsize = IPSEC_SPDHASH_DEFAULT; 558 cmn_err(CE_WARN, "Falling back to %d entries", 559 ipsec_spd_hashsize); 560 (void) ipsec_alloc_tables(KM_SLEEP); 561 } 562 563 /* Just set a default for tunnels. */ 564 if (tun_spd_hashsize == 0) 565 tun_spd_hashsize = TUN_SPDHASH_DEFAULT; 566 567 ipsid_init(); 568 /* 569 * Globals need ref == 1 to prevent IPPH_REFRELE() from attempting 570 * to free them. 571 */ 572 system_policy.iph_refs = 1; 573 inactive_policy.iph_refs = 1; 574 ipsec_polhead_init(&system_policy, ipsec_spd_hashsize); 575 ipsec_polhead_init(&inactive_policy, ipsec_spd_hashsize); 576 rw_init(&tunnel_policy_lock, NULL, RW_DEFAULT, NULL); 577 avl_create(&tunnel_policies, tunnel_compare, sizeof (ipsec_tun_pol_t), 578 0); 579 580 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) 581 mutex_init(&(ipsec_action_hash[i].hash_lock), 582 NULL, MUTEX_DEFAULT, NULL); 583 584 for (i = 0; i < ipsec_spd_hashsize; i++) 585 mutex_init(&(ipsec_sel_hash[i].hash_lock), 586 NULL, MUTEX_DEFAULT, NULL); 587 588 mutex_init(&alg_lock, NULL, MUTEX_DEFAULT, NULL); 589 590 for (i = 0; i < IPSEC_NALGTYPES; i++) 591 ipsec_nalgs[i] = 0; 592 593 ipsec_action_cache = kmem_cache_create("ipsec_actions", 594 sizeof (ipsec_action_t), _POINTER_ALIGNMENT, NULL, NULL, 595 ipsec_action_reclaim, NULL, NULL, 0); 596 ipsec_sel_cache = kmem_cache_create("ipsec_selectors", 597 sizeof (ipsec_sel_t), _POINTER_ALIGNMENT, NULL, NULL, 598 NULL, NULL, NULL, 0); 599 ipsec_pol_cache = kmem_cache_create("ipsec_policy", 600 sizeof (ipsec_policy_t), _POINTER_ALIGNMENT, NULL, NULL, 601 NULL, NULL, NULL, 0); 602 ipsec_info_cache = kmem_cache_create("ipsec_info", 603 sizeof (ipsec_info_t), _POINTER_ALIGNMENT, NULL, NULL, 604 NULL, NULL, NULL, 0); 605 606 ip_drop_init(); 607 ip_drop_register(&spd_dropper, "IPsec SPD"); 608 609 /* Set function to dummy until tun is loaded */ 610 rw_init(&itp_get_byaddr_rw_lock, NULL, RW_DEFAULT, NULL); 611 rw_enter(&itp_get_byaddr_rw_lock, RW_WRITER); 612 itp_get_byaddr = itp_get_byaddr_dummy; 613 rw_exit(&itp_get_byaddr_rw_lock); 614 } 615 616 /* 617 * Sort algorithm lists. 618 * 619 * I may need to split this based on 620 * authentication/encryption, and I may wish to have an administrator 621 * configure this list. Hold on to some NDD variables... 622 * 623 * XXX For now, sort on minimum key size (GAG!). While minimum key size is 624 * not the ideal metric, it's the only quantifiable measure available. 625 * We need a better metric for sorting algorithms by preference. 626 */ 627 static void 628 alg_insert_sortlist(enum ipsec_algtype at, uint8_t algid) 629 { 630 ipsec_alginfo_t *ai = ipsec_alglists[at][algid]; 631 uint8_t holder, swap; 632 uint_t i; 633 uint_t count = ipsec_nalgs[at]; 634 ASSERT(ai != NULL); 635 ASSERT(algid == ai->alg_id); 636 637 ASSERT(MUTEX_HELD(&alg_lock)); 638 639 holder = algid; 640 641 for (i = 0; i < count - 1; i++) { 642 ipsec_alginfo_t *alt; 643 644 alt = ipsec_alglists[at][ipsec_sortlist[at][i]]; 645 /* 646 * If you want to give precedence to newly added algs, 647 * add the = in the > comparison. 648 */ 649 if ((holder != algid) || (ai->alg_minbits > alt->alg_minbits)) { 650 /* Swap sortlist[i] and holder. */ 651 swap = ipsec_sortlist[at][i]; 652 ipsec_sortlist[at][i] = holder; 653 holder = swap; 654 ai = alt; 655 } /* Else just continue. */ 656 } 657 658 /* Store holder in last slot. */ 659 ipsec_sortlist[at][i] = holder; 660 } 661 662 /* 663 * Remove an algorithm from a sorted algorithm list. 664 * This should be considerably easier, even with complex sorting. 665 */ 666 static void 667 alg_remove_sortlist(enum ipsec_algtype at, uint8_t algid) 668 { 669 boolean_t copyback = B_FALSE; 670 int i; 671 int newcount = ipsec_nalgs[at]; 672 673 ASSERT(MUTEX_HELD(&alg_lock)); 674 675 for (i = 0; i <= newcount; i++) { 676 if (copyback) 677 ipsec_sortlist[at][i-1] = ipsec_sortlist[at][i]; 678 else if (ipsec_sortlist[at][i] == algid) 679 copyback = B_TRUE; 680 } 681 } 682 683 /* 684 * Add the specified algorithm to the algorithm tables. 685 * Must be called while holding the algorithm table writer lock. 686 */ 687 void 688 ipsec_alg_reg(ipsec_algtype_t algtype, ipsec_alginfo_t *alg) 689 { 690 ASSERT(MUTEX_HELD(&alg_lock)); 691 692 ASSERT(ipsec_alglists[algtype][alg->alg_id] == NULL); 693 ipsec_alg_fix_min_max(alg, algtype); 694 ipsec_alglists[algtype][alg->alg_id] = alg; 695 696 ipsec_nalgs[algtype]++; 697 alg_insert_sortlist(algtype, alg->alg_id); 698 } 699 700 /* 701 * Remove the specified algorithm from the algorithm tables. 702 * Must be called while holding the algorithm table writer lock. 703 */ 704 void 705 ipsec_alg_unreg(ipsec_algtype_t algtype, uint8_t algid) 706 { 707 ASSERT(MUTEX_HELD(&alg_lock)); 708 709 ASSERT(ipsec_alglists[algtype][algid] != NULL); 710 ipsec_alg_free(ipsec_alglists[algtype][algid]); 711 ipsec_alglists[algtype][algid] = NULL; 712 713 ipsec_nalgs[algtype]--; 714 alg_remove_sortlist(algtype, algid); 715 } 716 717 /* 718 * Hooks for spdsock to get a grip on system policy. 719 */ 720 721 ipsec_policy_head_t * 722 ipsec_system_policy(void) 723 { 724 ipsec_policy_head_t *h = &system_policy; 725 IPPH_REFHOLD(h); 726 return (h); 727 } 728 729 ipsec_policy_head_t * 730 ipsec_inactive_policy(void) 731 { 732 ipsec_policy_head_t *h = &inactive_policy; 733 IPPH_REFHOLD(h); 734 return (h); 735 } 736 737 /* 738 * Lock inactive policy, then active policy, then exchange policy root 739 * pointers. 740 */ 741 void 742 ipsec_swap_policy(ipsec_policy_head_t *active, ipsec_policy_head_t *inactive) 743 { 744 int af, dir; 745 avl_tree_t r1, r2; 746 747 rw_enter(&inactive->iph_lock, RW_WRITER); 748 rw_enter(&active->iph_lock, RW_WRITER); 749 750 r1 = active->iph_rulebyid; 751 r2 = inactive->iph_rulebyid; 752 active->iph_rulebyid = r2; 753 inactive->iph_rulebyid = r1; 754 755 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 756 ipsec_policy_hash_t *h1, *h2; 757 758 h1 = active->iph_root[dir].ipr_hash; 759 h2 = inactive->iph_root[dir].ipr_hash; 760 active->iph_root[dir].ipr_hash = h2; 761 inactive->iph_root[dir].ipr_hash = h1; 762 763 for (af = 0; af < IPSEC_NAF; af++) { 764 ipsec_policy_t *t1, *t2; 765 766 t1 = active->iph_root[dir].ipr_nonhash[af]; 767 t2 = inactive->iph_root[dir].ipr_nonhash[af]; 768 active->iph_root[dir].ipr_nonhash[af] = t2; 769 inactive->iph_root[dir].ipr_nonhash[af] = t1; 770 if (t1 != NULL) { 771 t1->ipsp_hash.hash_pp = 772 &(inactive->iph_root[dir].ipr_nonhash[af]); 773 } 774 if (t2 != NULL) { 775 t2->ipsp_hash.hash_pp = 776 &(active->iph_root[dir].ipr_nonhash[af]); 777 } 778 779 } 780 } 781 active->iph_gen++; 782 inactive->iph_gen++; 783 ipsec_update_present_flags(); 784 rw_exit(&active->iph_lock); 785 rw_exit(&inactive->iph_lock); 786 } 787 788 /* 789 * Swap global policy primary/secondary. 790 */ 791 void 792 ipsec_swap_global_policy(void) 793 { 794 ipsec_swap_policy(&system_policy, &inactive_policy); 795 } 796 797 /* 798 * Clone one policy rule.. 799 */ 800 static ipsec_policy_t * 801 ipsec_copy_policy(const ipsec_policy_t *src) 802 { 803 ipsec_policy_t *dst = kmem_cache_alloc(ipsec_pol_cache, KM_NOSLEEP); 804 805 if (dst == NULL) 806 return (NULL); 807 808 /* 809 * Adjust refcounts of cloned state. 810 */ 811 IPACT_REFHOLD(src->ipsp_act); 812 src->ipsp_sel->ipsl_refs++; 813 814 HASH_NULL(dst, ipsp_hash); 815 dst->ipsp_refs = 1; 816 dst->ipsp_sel = src->ipsp_sel; 817 dst->ipsp_act = src->ipsp_act; 818 dst->ipsp_prio = src->ipsp_prio; 819 dst->ipsp_index = src->ipsp_index; 820 821 return (dst); 822 } 823 824 void 825 ipsec_insert_always(avl_tree_t *tree, void *new_node) 826 { 827 void *node; 828 avl_index_t where; 829 830 node = avl_find(tree, new_node, &where); 831 ASSERT(node == NULL); 832 avl_insert(tree, new_node, where); 833 } 834 835 836 static int 837 ipsec_copy_chain(ipsec_policy_head_t *dph, ipsec_policy_t *src, 838 ipsec_policy_t **dstp) 839 { 840 for (; src != NULL; src = src->ipsp_hash.hash_next) { 841 ipsec_policy_t *dst = ipsec_copy_policy(src); 842 if (dst == NULL) 843 return (ENOMEM); 844 845 HASHLIST_INSERT(dst, ipsp_hash, *dstp); 846 ipsec_insert_always(&dph->iph_rulebyid, dst); 847 } 848 return (0); 849 } 850 851 852 853 /* 854 * Make one policy head look exactly like another. 855 * 856 * As with ipsec_swap_policy, we lock the destination policy head first, then 857 * the source policy head. Note that we only need to read-lock the source 858 * policy head as we are not changing it. 859 */ 860 int 861 ipsec_copy_polhead(ipsec_policy_head_t *sph, ipsec_policy_head_t *dph) 862 { 863 int af, dir, chain, nchains; 864 865 rw_enter(&dph->iph_lock, RW_WRITER); 866 867 ipsec_polhead_flush(dph); 868 869 rw_enter(&sph->iph_lock, RW_READER); 870 871 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 872 ipsec_policy_root_t *dpr = &dph->iph_root[dir]; 873 ipsec_policy_root_t *spr = &sph->iph_root[dir]; 874 nchains = dpr->ipr_nchains; 875 876 ASSERT(dpr->ipr_nchains == spr->ipr_nchains); 877 878 for (af = 0; af < IPSEC_NAF; af++) { 879 if (ipsec_copy_chain(dph, spr->ipr_nonhash[af], 880 &dpr->ipr_nonhash[af])) 881 goto abort_copy; 882 } 883 884 for (chain = 0; chain < nchains; chain++) { 885 if (ipsec_copy_chain(dph, 886 spr->ipr_hash[chain].hash_head, 887 &dpr->ipr_hash[chain].hash_head)) 888 goto abort_copy; 889 } 890 } 891 892 dph->iph_gen++; 893 894 rw_exit(&sph->iph_lock); 895 rw_exit(&dph->iph_lock); 896 return (0); 897 898 abort_copy: 899 ipsec_polhead_flush(dph); 900 rw_exit(&sph->iph_lock); 901 rw_exit(&dph->iph_lock); 902 return (ENOMEM); 903 } 904 905 /* 906 * Clone currently active policy to the inactive policy list. 907 */ 908 int 909 ipsec_clone_system_policy(void) 910 { 911 return (ipsec_copy_polhead(&system_policy, &inactive_policy)); 912 } 913 914 /* 915 * Generic "do we have IPvN policy" answer. 916 */ 917 boolean_t 918 iph_ipvN(ipsec_policy_head_t *iph, boolean_t v6) 919 { 920 int i, hval; 921 uint32_t valbit; 922 ipsec_policy_root_t *ipr; 923 ipsec_policy_t *ipp; 924 925 if (v6) { 926 valbit = IPSL_IPV6; 927 hval = IPSEC_AF_V6; 928 } else { 929 valbit = IPSL_IPV4; 930 hval = IPSEC_AF_V4; 931 } 932 933 ASSERT(RW_LOCK_HELD(&iph->iph_lock)); 934 for (ipr = iph->iph_root; ipr < &(iph->iph_root[IPSEC_NTYPES]); ipr++) { 935 if (ipr->ipr_nonhash[hval] != NULL) 936 return (B_TRUE); 937 for (i = 0; i < ipr->ipr_nchains; i++) { 938 for (ipp = ipr->ipr_hash[i].hash_head; ipp != NULL; 939 ipp = ipp->ipsp_hash.hash_next) { 940 if (ipp->ipsp_sel->ipsl_key.ipsl_valid & valbit) 941 return (B_TRUE); 942 } 943 } 944 } 945 946 return (B_FALSE); 947 } 948 949 /* 950 * Extract the string from ipsec_policy_failure_msgs[type] and 951 * log it. 952 * 953 */ 954 void 955 ipsec_log_policy_failure(queue_t *q, int type, char *func_name, ipha_t *ipha, 956 ip6_t *ip6h, boolean_t secure) 957 { 958 char sbuf[INET6_ADDRSTRLEN]; 959 char dbuf[INET6_ADDRSTRLEN]; 960 char *s; 961 char *d; 962 short mid = 0; 963 964 ASSERT((ipha == NULL && ip6h != NULL) || 965 (ip6h == NULL && ipha != NULL)); 966 967 if (ipha != NULL) { 968 s = inet_ntop(AF_INET, &ipha->ipha_src, sbuf, sizeof (sbuf)); 969 d = inet_ntop(AF_INET, &ipha->ipha_dst, dbuf, sizeof (dbuf)); 970 } else { 971 s = inet_ntop(AF_INET6, &ip6h->ip6_src, sbuf, sizeof (sbuf)); 972 d = inet_ntop(AF_INET6, &ip6h->ip6_dst, dbuf, sizeof (dbuf)); 973 974 } 975 976 /* Always bump the policy failure counter. */ 977 ipsec_policy_failure_count[type]++; 978 979 if (q != NULL) { 980 mid = q->q_qinfo->qi_minfo->mi_idnum; 981 } 982 ipsec_rl_strlog(mid, 0, 0, SL_ERROR|SL_WARN|SL_CONSOLE, 983 ipsec_policy_failure_msgs[type], 984 func_name, 985 (secure ? "secure" : "not secure"), s, d); 986 } 987 988 /* 989 * Rate-limiting front-end to strlog() for AH and ESP. Uses the ndd variables 990 * in /dev/ip and the same rate-limiting clock so that there's a single 991 * knob to turn to throttle the rate of messages. 992 */ 993 void 994 ipsec_rl_strlog(short mid, short sid, char level, ushort_t sl, char *fmt, ...) 995 { 996 va_list adx; 997 hrtime_t current = gethrtime(); 998 999 sl |= SL_CONSOLE; 1000 /* 1001 * Throttle logging to stop syslog from being swamped. If variable 1002 * 'ipsec_policy_log_interval' is zero, don't log any messages at 1003 * all, otherwise log only one message every 'ipsec_policy_log_interval' 1004 * msec. Convert interval (in msec) to hrtime (in nsec). 1005 */ 1006 1007 if (ipsec_policy_log_interval) { 1008 if (ipsec_policy_failure_last + 1009 ((hrtime_t)ipsec_policy_log_interval * (hrtime_t)1000000) <= 1010 current) { 1011 va_start(adx, fmt); 1012 (void) vstrlog(mid, sid, level, sl, fmt, adx); 1013 va_end(adx); 1014 ipsec_policy_failure_last = current; 1015 } 1016 } 1017 } 1018 1019 void 1020 ipsec_config_flush() 1021 { 1022 rw_enter(&system_policy.iph_lock, RW_WRITER); 1023 ipsec_polhead_flush(&system_policy); 1024 ipsec_next_policy_index = 1; 1025 rw_exit(&system_policy.iph_lock); 1026 ipsec_action_reclaim(0); 1027 } 1028 1029 /* 1030 * Clip a policy's min/max keybits vs. the capabilities of the 1031 * algorithm. 1032 */ 1033 static void 1034 act_alg_adjust(uint_t algtype, uint_t algid, 1035 uint16_t *minbits, uint16_t *maxbits) 1036 { 1037 ipsec_alginfo_t *algp = ipsec_alglists[algtype][algid]; 1038 if (algp != NULL) { 1039 /* 1040 * If passed-in minbits is zero, we assume the caller trusts 1041 * us with setting the minimum key size. We pick the 1042 * algorithms DEFAULT key size for the minimum in this case. 1043 */ 1044 if (*minbits == 0) { 1045 *minbits = algp->alg_default_bits; 1046 ASSERT(*minbits >= algp->alg_minbits); 1047 } else { 1048 *minbits = MAX(MIN(*minbits, algp->alg_maxbits), 1049 algp->alg_minbits); 1050 } 1051 if (*maxbits == 0) 1052 *maxbits = algp->alg_maxbits; 1053 else 1054 *maxbits = MIN(MAX(*maxbits, algp->alg_minbits), 1055 algp->alg_maxbits); 1056 ASSERT(*minbits <= *maxbits); 1057 } else { 1058 *minbits = 0; 1059 *maxbits = 0; 1060 } 1061 } 1062 1063 /* 1064 * Check an action's requested algorithms against the algorithms currently 1065 * loaded in the system. 1066 */ 1067 boolean_t 1068 ipsec_check_action(ipsec_act_t *act, int *diag) 1069 { 1070 ipsec_prot_t *ipp; 1071 1072 ipp = &act->ipa_apply; 1073 1074 if (ipp->ipp_use_ah && 1075 ipsec_alglists[IPSEC_ALG_AUTH][ipp->ipp_auth_alg] == NULL) { 1076 *diag = SPD_DIAGNOSTIC_UNSUPP_AH_ALG; 1077 return (B_FALSE); 1078 } 1079 if (ipp->ipp_use_espa && 1080 ipsec_alglists[IPSEC_ALG_AUTH][ipp->ipp_esp_auth_alg] == NULL) { 1081 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_AUTH_ALG; 1082 return (B_FALSE); 1083 } 1084 if (ipp->ipp_use_esp && 1085 ipsec_alglists[IPSEC_ALG_ENCR][ipp->ipp_encr_alg] == NULL) { 1086 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_ENCR_ALG; 1087 return (B_FALSE); 1088 } 1089 1090 act_alg_adjust(IPSEC_ALG_AUTH, ipp->ipp_auth_alg, 1091 &ipp->ipp_ah_minbits, &ipp->ipp_ah_maxbits); 1092 act_alg_adjust(IPSEC_ALG_AUTH, ipp->ipp_esp_auth_alg, 1093 &ipp->ipp_espa_minbits, &ipp->ipp_espa_maxbits); 1094 act_alg_adjust(IPSEC_ALG_ENCR, ipp->ipp_encr_alg, 1095 &ipp->ipp_espe_minbits, &ipp->ipp_espe_maxbits); 1096 1097 if (ipp->ipp_ah_minbits > ipp->ipp_ah_maxbits) { 1098 *diag = SPD_DIAGNOSTIC_UNSUPP_AH_KEYSIZE; 1099 return (B_FALSE); 1100 } 1101 if (ipp->ipp_espa_minbits > ipp->ipp_espa_maxbits) { 1102 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_AUTH_KEYSIZE; 1103 return (B_FALSE); 1104 } 1105 if (ipp->ipp_espe_minbits > ipp->ipp_espe_maxbits) { 1106 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_ENCR_KEYSIZE; 1107 return (B_FALSE); 1108 } 1109 /* TODO: sanity check lifetimes */ 1110 return (B_TRUE); 1111 } 1112 1113 /* 1114 * Set up a single action during wildcard expansion.. 1115 */ 1116 static void 1117 ipsec_setup_act(ipsec_act_t *outact, ipsec_act_t *act, 1118 uint_t auth_alg, uint_t encr_alg, uint_t eauth_alg) 1119 { 1120 ipsec_prot_t *ipp; 1121 1122 *outact = *act; 1123 ipp = &outact->ipa_apply; 1124 ipp->ipp_auth_alg = (uint8_t)auth_alg; 1125 ipp->ipp_encr_alg = (uint8_t)encr_alg; 1126 ipp->ipp_esp_auth_alg = (uint8_t)eauth_alg; 1127 1128 act_alg_adjust(IPSEC_ALG_AUTH, auth_alg, 1129 &ipp->ipp_ah_minbits, &ipp->ipp_ah_maxbits); 1130 act_alg_adjust(IPSEC_ALG_AUTH, eauth_alg, 1131 &ipp->ipp_espa_minbits, &ipp->ipp_espa_maxbits); 1132 act_alg_adjust(IPSEC_ALG_ENCR, encr_alg, 1133 &ipp->ipp_espe_minbits, &ipp->ipp_espe_maxbits); 1134 } 1135 1136 /* 1137 * combinatoric expansion time: expand a wildcarded action into an 1138 * array of wildcarded actions; we return the exploded action list, 1139 * and return a count in *nact (output only). 1140 */ 1141 static ipsec_act_t * 1142 ipsec_act_wildcard_expand(ipsec_act_t *act, uint_t *nact) 1143 { 1144 boolean_t use_ah, use_esp, use_espa; 1145 boolean_t wild_auth, wild_encr, wild_eauth; 1146 uint_t auth_alg, auth_idx, auth_min, auth_max; 1147 uint_t eauth_alg, eauth_idx, eauth_min, eauth_max; 1148 uint_t encr_alg, encr_idx, encr_min, encr_max; 1149 uint_t action_count, ai; 1150 ipsec_act_t *outact; 1151 1152 if (act->ipa_type != IPSEC_ACT_APPLY) { 1153 outact = kmem_alloc(sizeof (*act), KM_NOSLEEP); 1154 *nact = 1; 1155 if (outact != NULL) 1156 bcopy(act, outact, sizeof (*act)); 1157 return (outact); 1158 } 1159 /* 1160 * compute the combinatoric explosion.. 1161 * 1162 * we assume a request for encr if esp_req is PREF_REQUIRED 1163 * we assume a request for ah auth if ah_req is PREF_REQUIRED. 1164 * we assume a request for esp auth if !ah and esp_req is PREF_REQUIRED 1165 */ 1166 1167 use_ah = act->ipa_apply.ipp_use_ah; 1168 use_esp = act->ipa_apply.ipp_use_esp; 1169 use_espa = act->ipa_apply.ipp_use_espa; 1170 auth_alg = act->ipa_apply.ipp_auth_alg; 1171 eauth_alg = act->ipa_apply.ipp_esp_auth_alg; 1172 encr_alg = act->ipa_apply.ipp_encr_alg; 1173 1174 wild_auth = use_ah && (auth_alg == 0); 1175 wild_eauth = use_espa && (eauth_alg == 0); 1176 wild_encr = use_esp && (encr_alg == 0); 1177 1178 action_count = 1; 1179 auth_min = auth_max = auth_alg; 1180 eauth_min = eauth_max = eauth_alg; 1181 encr_min = encr_max = encr_alg; 1182 1183 /* 1184 * set up for explosion.. for each dimension, expand output 1185 * size by the explosion factor. 1186 * 1187 * Don't include the "any" algorithms, if defined, as no 1188 * kernel policies should be set for these algorithms. 1189 */ 1190 1191 #define SET_EXP_MINMAX(type, wild, alg, min, max) if (wild) { \ 1192 int nalgs = ipsec_nalgs[type]; \ 1193 if (ipsec_alglists[type][alg] != NULL) \ 1194 nalgs--; \ 1195 action_count *= nalgs; \ 1196 min = 0; \ 1197 max = ipsec_nalgs[type] - 1; \ 1198 } 1199 1200 SET_EXP_MINMAX(IPSEC_ALG_AUTH, wild_auth, SADB_AALG_NONE, 1201 auth_min, auth_max); 1202 SET_EXP_MINMAX(IPSEC_ALG_AUTH, wild_eauth, SADB_AALG_NONE, 1203 eauth_min, eauth_max); 1204 SET_EXP_MINMAX(IPSEC_ALG_ENCR, wild_encr, SADB_EALG_NONE, 1205 encr_min, encr_max); 1206 1207 #undef SET_EXP_MINMAX 1208 1209 /* 1210 * ok, allocate the whole mess.. 1211 */ 1212 1213 outact = kmem_alloc(sizeof (*outact) * action_count, KM_NOSLEEP); 1214 if (outact == NULL) 1215 return (NULL); 1216 1217 /* 1218 * Now compute all combinations. Note that non-wildcarded 1219 * dimensions just get a single value from auth_min, while 1220 * wildcarded dimensions indirect through the sortlist. 1221 * 1222 * We do encryption outermost since, at this time, there's 1223 * greater difference in security and performance between 1224 * encryption algorithms vs. authentication algorithms. 1225 */ 1226 1227 ai = 0; 1228 1229 #define WHICH_ALG(type, wild, idx) ((wild)?(ipsec_sortlist[type][idx]):(idx)) 1230 1231 for (encr_idx = encr_min; encr_idx <= encr_max; encr_idx++) { 1232 encr_alg = WHICH_ALG(IPSEC_ALG_ENCR, wild_encr, encr_idx); 1233 if (wild_encr && encr_alg == SADB_EALG_NONE) 1234 continue; 1235 for (auth_idx = auth_min; auth_idx <= auth_max; auth_idx++) { 1236 auth_alg = WHICH_ALG(IPSEC_ALG_AUTH, wild_auth, 1237 auth_idx); 1238 if (wild_auth && auth_alg == SADB_AALG_NONE) 1239 continue; 1240 for (eauth_idx = eauth_min; eauth_idx <= eauth_max; 1241 eauth_idx++) { 1242 eauth_alg = WHICH_ALG(IPSEC_ALG_AUTH, 1243 wild_eauth, eauth_idx); 1244 if (wild_eauth && eauth_alg == SADB_AALG_NONE) 1245 continue; 1246 1247 ipsec_setup_act(&outact[ai], act, 1248 auth_alg, encr_alg, eauth_alg); 1249 ai++; 1250 } 1251 } 1252 } 1253 1254 #undef WHICH_ALG 1255 1256 ASSERT(ai == action_count); 1257 *nact = action_count; 1258 return (outact); 1259 } 1260 1261 /* 1262 * Extract the parts of an ipsec_prot_t from an old-style ipsec_req_t. 1263 */ 1264 static void 1265 ipsec_prot_from_req(ipsec_req_t *req, ipsec_prot_t *ipp) 1266 { 1267 bzero(ipp, sizeof (*ipp)); 1268 /* 1269 * ipp_use_* are bitfields. Look at "!!" in the following as a 1270 * "boolean canonicalization" operator. 1271 */ 1272 ipp->ipp_use_ah = !!(req->ipsr_ah_req & IPSEC_PREF_REQUIRED); 1273 ipp->ipp_use_esp = !!(req->ipsr_esp_req & IPSEC_PREF_REQUIRED); 1274 ipp->ipp_use_espa = !!(req->ipsr_esp_auth_alg) || !ipp->ipp_use_ah; 1275 ipp->ipp_use_se = !!(req->ipsr_self_encap_req & IPSEC_PREF_REQUIRED); 1276 ipp->ipp_use_unique = !!((req->ipsr_ah_req|req->ipsr_esp_req) & 1277 IPSEC_PREF_UNIQUE); 1278 ipp->ipp_encr_alg = req->ipsr_esp_alg; 1279 ipp->ipp_auth_alg = req->ipsr_auth_alg; 1280 ipp->ipp_esp_auth_alg = req->ipsr_esp_auth_alg; 1281 } 1282 1283 /* 1284 * Extract a new-style action from a request. 1285 */ 1286 void 1287 ipsec_actvec_from_req(ipsec_req_t *req, ipsec_act_t **actp, uint_t *nactp) 1288 { 1289 struct ipsec_act act; 1290 bzero(&act, sizeof (act)); 1291 if ((req->ipsr_ah_req & IPSEC_PREF_NEVER) && 1292 (req->ipsr_esp_req & IPSEC_PREF_NEVER)) { 1293 act.ipa_type = IPSEC_ACT_BYPASS; 1294 } else { 1295 act.ipa_type = IPSEC_ACT_APPLY; 1296 ipsec_prot_from_req(req, &act.ipa_apply); 1297 } 1298 *actp = ipsec_act_wildcard_expand(&act, nactp); 1299 } 1300 1301 /* 1302 * Convert a new-style "prot" back to an ipsec_req_t (more backwards compat). 1303 * We assume caller has already zero'ed *req for us. 1304 */ 1305 static int 1306 ipsec_req_from_prot(ipsec_prot_t *ipp, ipsec_req_t *req) 1307 { 1308 req->ipsr_esp_alg = ipp->ipp_encr_alg; 1309 req->ipsr_auth_alg = ipp->ipp_auth_alg; 1310 req->ipsr_esp_auth_alg = ipp->ipp_esp_auth_alg; 1311 1312 if (ipp->ipp_use_unique) { 1313 req->ipsr_ah_req |= IPSEC_PREF_UNIQUE; 1314 req->ipsr_esp_req |= IPSEC_PREF_UNIQUE; 1315 } 1316 if (ipp->ipp_use_se) 1317 req->ipsr_self_encap_req |= IPSEC_PREF_REQUIRED; 1318 if (ipp->ipp_use_ah) 1319 req->ipsr_ah_req |= IPSEC_PREF_REQUIRED; 1320 if (ipp->ipp_use_esp) 1321 req->ipsr_esp_req |= IPSEC_PREF_REQUIRED; 1322 return (sizeof (*req)); 1323 } 1324 1325 /* 1326 * Convert a new-style action back to an ipsec_req_t (more backwards compat). 1327 * We assume caller has already zero'ed *req for us. 1328 */ 1329 static int 1330 ipsec_req_from_act(ipsec_action_t *ap, ipsec_req_t *req) 1331 { 1332 switch (ap->ipa_act.ipa_type) { 1333 case IPSEC_ACT_BYPASS: 1334 req->ipsr_ah_req = IPSEC_PREF_NEVER; 1335 req->ipsr_esp_req = IPSEC_PREF_NEVER; 1336 return (sizeof (*req)); 1337 case IPSEC_ACT_APPLY: 1338 return (ipsec_req_from_prot(&ap->ipa_act.ipa_apply, req)); 1339 } 1340 return (sizeof (*req)); 1341 } 1342 1343 /* 1344 * Convert a new-style action back to an ipsec_req_t (more backwards compat). 1345 * We assume caller has already zero'ed *req for us. 1346 */ 1347 int 1348 ipsec_req_from_head(ipsec_policy_head_t *ph, ipsec_req_t *req, int af) 1349 { 1350 ipsec_policy_t *p; 1351 1352 /* 1353 * FULL-PERSOCK: consult hash table, too? 1354 */ 1355 for (p = ph->iph_root[IPSEC_INBOUND].ipr_nonhash[af]; 1356 p != NULL; 1357 p = p->ipsp_hash.hash_next) { 1358 if ((p->ipsp_sel->ipsl_key.ipsl_valid & IPSL_WILDCARD) == 0) 1359 return (ipsec_req_from_act(p->ipsp_act, req)); 1360 } 1361 return (sizeof (*req)); 1362 } 1363 1364 /* 1365 * Based on per-socket or latched policy, convert to an appropriate 1366 * IP_SEC_OPT ipsec_req_t for the socket option; return size so we can 1367 * be tail-called from ip. 1368 */ 1369 int 1370 ipsec_req_from_conn(conn_t *connp, ipsec_req_t *req, int af) 1371 { 1372 ipsec_latch_t *ipl; 1373 int rv = sizeof (ipsec_req_t); 1374 1375 bzero(req, sizeof (*req)); 1376 1377 mutex_enter(&connp->conn_lock); 1378 ipl = connp->conn_latch; 1379 1380 /* 1381 * Find appropriate policy. First choice is latched action; 1382 * failing that, see latched policy; failing that, 1383 * look at configured policy. 1384 */ 1385 if (ipl != NULL) { 1386 if (ipl->ipl_in_action != NULL) { 1387 rv = ipsec_req_from_act(ipl->ipl_in_action, req); 1388 goto done; 1389 } 1390 if (ipl->ipl_in_policy != NULL) { 1391 rv = ipsec_req_from_act(ipl->ipl_in_policy->ipsp_act, 1392 req); 1393 goto done; 1394 } 1395 } 1396 if (connp->conn_policy != NULL) 1397 rv = ipsec_req_from_head(connp->conn_policy, req, af); 1398 done: 1399 mutex_exit(&connp->conn_lock); 1400 return (rv); 1401 } 1402 1403 void 1404 ipsec_actvec_free(ipsec_act_t *act, uint_t nact) 1405 { 1406 kmem_free(act, nact * sizeof (*act)); 1407 } 1408 1409 /* 1410 * When outbound policy is not cached, look it up the hard way and attach 1411 * an ipsec_out_t to the packet.. 1412 */ 1413 static mblk_t * 1414 ipsec_attach_global_policy(mblk_t *mp, conn_t *connp, ipsec_selector_t *sel) 1415 { 1416 ipsec_policy_t *p; 1417 1418 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, sel); 1419 1420 if (p == NULL) 1421 return (NULL); 1422 return (ipsec_attach_ipsec_out(mp, connp, p, sel->ips_protocol)); 1423 } 1424 1425 /* 1426 * We have an ipsec_out already, but don't have cached policy; fill it in 1427 * with the right actions. 1428 */ 1429 static mblk_t * 1430 ipsec_apply_global_policy(mblk_t *ipsec_mp, conn_t *connp, 1431 ipsec_selector_t *sel) 1432 { 1433 ipsec_out_t *io; 1434 ipsec_policy_t *p; 1435 1436 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 1437 ASSERT(ipsec_mp->b_cont->b_datap->db_type == M_DATA); 1438 1439 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1440 1441 if (io->ipsec_out_policy == NULL) { 1442 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, io, sel); 1443 io->ipsec_out_policy = p; 1444 } 1445 return (ipsec_mp); 1446 } 1447 1448 1449 /* ARGSUSED */ 1450 /* 1451 * Consumes a reference to ipsp. 1452 */ 1453 static mblk_t * 1454 ipsec_check_loopback_policy(queue_t *q, mblk_t *first_mp, 1455 boolean_t mctl_present, ipsec_policy_t *ipsp) 1456 { 1457 mblk_t *ipsec_mp; 1458 ipsec_in_t *ii; 1459 1460 if (!mctl_present) 1461 return (first_mp); 1462 1463 ipsec_mp = first_mp; 1464 1465 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1466 ASSERT(ii->ipsec_in_loopback); 1467 IPPOL_REFRELE(ipsp); 1468 1469 /* 1470 * We should do an actual policy check here. Revisit this 1471 * when we revisit the IPsec API. 1472 */ 1473 1474 return (first_mp); 1475 } 1476 1477 /* 1478 * Check that packet's inbound ports & proto match the selectors 1479 * expected by the SAs it traversed on the way in. 1480 */ 1481 static boolean_t 1482 ipsec_check_ipsecin_unique(ipsec_in_t *ii, const char **reason, 1483 kstat_named_t **counter, uint64_t pkt_unique) 1484 { 1485 uint64_t ah_mask, esp_mask; 1486 ipsa_t *ah_assoc; 1487 ipsa_t *esp_assoc; 1488 1489 ASSERT(ii->ipsec_in_secure); 1490 ASSERT(!ii->ipsec_in_loopback); 1491 1492 ah_assoc = ii->ipsec_in_ah_sa; 1493 esp_assoc = ii->ipsec_in_esp_sa; 1494 ASSERT((ah_assoc != NULL) || (esp_assoc != NULL)); 1495 1496 ah_mask = (ah_assoc != NULL) ? ah_assoc->ipsa_unique_mask : 0; 1497 esp_mask = (esp_assoc != NULL) ? esp_assoc->ipsa_unique_mask : 0; 1498 1499 if ((ah_mask == 0) && (esp_mask == 0)) 1500 return (B_TRUE); 1501 1502 /* 1503 * The pkt_unique check will also check for tunnel mode on the SA 1504 * vs. the tunneled_packet boolean. "Be liberal in what you receive" 1505 * should not apply in this case. ;) 1506 */ 1507 1508 if (ah_mask != 0 && 1509 ah_assoc->ipsa_unique_id != (pkt_unique & ah_mask)) { 1510 *reason = "AH inner header mismatch"; 1511 *counter = &ipdrops_spd_ah_innermismatch; 1512 return (B_FALSE); 1513 } 1514 if (esp_mask != 0 && 1515 esp_assoc->ipsa_unique_id != (pkt_unique & esp_mask)) { 1516 *reason = "ESP inner header mismatch"; 1517 *counter = &ipdrops_spd_esp_innermismatch; 1518 return (B_FALSE); 1519 } 1520 return (B_TRUE); 1521 } 1522 1523 static boolean_t 1524 ipsec_check_ipsecin_action(ipsec_in_t *ii, mblk_t *mp, ipsec_action_t *ap, 1525 ipha_t *ipha, ip6_t *ip6h, const char **reason, kstat_named_t **counter) 1526 { 1527 boolean_t ret = B_TRUE; 1528 ipsec_prot_t *ipp; 1529 ipsa_t *ah_assoc; 1530 ipsa_t *esp_assoc; 1531 boolean_t decaps; 1532 1533 ASSERT((ipha == NULL && ip6h != NULL) || 1534 (ip6h == NULL && ipha != NULL)); 1535 1536 if (ii->ipsec_in_loopback) { 1537 /* 1538 * Besides accepting pointer-equivalent actions, we also 1539 * accept any ICMP errors we generated for ourselves, 1540 * regardless of policy. If we do not wish to make this 1541 * assumption in the future, check here, and where 1542 * icmp_loopback is initialized in ip.c and ip6.c. (Look for 1543 * ipsec_out_icmp_loopback.) 1544 */ 1545 if (ap == ii->ipsec_in_action || ii->ipsec_in_icmp_loopback) 1546 return (B_TRUE); 1547 1548 /* Deep compare necessary here?? */ 1549 *counter = &ipdrops_spd_loopback_mismatch; 1550 *reason = "loopback policy mismatch"; 1551 return (B_FALSE); 1552 } 1553 ASSERT(!ii->ipsec_in_icmp_loopback); 1554 1555 ah_assoc = ii->ipsec_in_ah_sa; 1556 esp_assoc = ii->ipsec_in_esp_sa; 1557 1558 decaps = ii->ipsec_in_decaps; 1559 1560 switch (ap->ipa_act.ipa_type) { 1561 case IPSEC_ACT_DISCARD: 1562 case IPSEC_ACT_REJECT: 1563 /* Should "fail hard" */ 1564 *counter = &ipdrops_spd_explicit; 1565 *reason = "blocked by policy"; 1566 return (B_FALSE); 1567 1568 case IPSEC_ACT_BYPASS: 1569 case IPSEC_ACT_CLEAR: 1570 *counter = &ipdrops_spd_got_secure; 1571 *reason = "expected clear, got protected"; 1572 return (B_FALSE); 1573 1574 case IPSEC_ACT_APPLY: 1575 ipp = &ap->ipa_act.ipa_apply; 1576 /* 1577 * As of now we do the simple checks of whether 1578 * the datagram has gone through the required IPSEC 1579 * protocol constraints or not. We might have more 1580 * in the future like sensitive levels, key bits, etc. 1581 * If it fails the constraints, check whether we would 1582 * have accepted this if it had come in clear. 1583 */ 1584 if (ipp->ipp_use_ah) { 1585 if (ah_assoc == NULL) { 1586 ret = ipsec_inbound_accept_clear(mp, ipha, 1587 ip6h); 1588 *counter = &ipdrops_spd_got_clear; 1589 *reason = "unprotected not accepted"; 1590 break; 1591 } 1592 ASSERT(ah_assoc != NULL); 1593 ASSERT(ipp->ipp_auth_alg != 0); 1594 1595 if (ah_assoc->ipsa_auth_alg != 1596 ipp->ipp_auth_alg) { 1597 *counter = &ipdrops_spd_bad_ahalg; 1598 *reason = "unacceptable ah alg"; 1599 ret = B_FALSE; 1600 break; 1601 } 1602 } else if (ah_assoc != NULL) { 1603 /* 1604 * Don't allow this. Check IPSEC NOTE above 1605 * ip_fanout_proto(). 1606 */ 1607 *counter = &ipdrops_spd_got_ah; 1608 *reason = "unexpected AH"; 1609 ret = B_FALSE; 1610 break; 1611 } 1612 if (ipp->ipp_use_esp) { 1613 if (esp_assoc == NULL) { 1614 ret = ipsec_inbound_accept_clear(mp, ipha, 1615 ip6h); 1616 *counter = &ipdrops_spd_got_clear; 1617 *reason = "unprotected not accepted"; 1618 break; 1619 } 1620 ASSERT(esp_assoc != NULL); 1621 ASSERT(ipp->ipp_encr_alg != 0); 1622 1623 if (esp_assoc->ipsa_encr_alg != 1624 ipp->ipp_encr_alg) { 1625 *counter = &ipdrops_spd_bad_espealg; 1626 *reason = "unacceptable esp alg"; 1627 ret = B_FALSE; 1628 break; 1629 } 1630 /* 1631 * If the client does not need authentication, 1632 * we don't verify the alogrithm. 1633 */ 1634 if (ipp->ipp_use_espa) { 1635 if (esp_assoc->ipsa_auth_alg != 1636 ipp->ipp_esp_auth_alg) { 1637 *counter = &ipdrops_spd_bad_espaalg; 1638 *reason = "unacceptable esp auth alg"; 1639 ret = B_FALSE; 1640 break; 1641 } 1642 } 1643 } else if (esp_assoc != NULL) { 1644 /* 1645 * Don't allow this. Check IPSEC NOTE above 1646 * ip_fanout_proto(). 1647 */ 1648 *counter = &ipdrops_spd_got_esp; 1649 *reason = "unexpected ESP"; 1650 ret = B_FALSE; 1651 break; 1652 } 1653 if (ipp->ipp_use_se) { 1654 if (!decaps) { 1655 ret = ipsec_inbound_accept_clear(mp, ipha, 1656 ip6h); 1657 if (!ret) { 1658 /* XXX mutant? */ 1659 *counter = &ipdrops_spd_bad_selfencap; 1660 *reason = "self encap not found"; 1661 break; 1662 } 1663 } 1664 } else if (decaps) { 1665 /* 1666 * XXX If the packet comes in tunneled and the 1667 * recipient does not expect it to be tunneled, it 1668 * is okay. But we drop to be consistent with the 1669 * other cases. 1670 */ 1671 *counter = &ipdrops_spd_got_selfencap; 1672 *reason = "unexpected self encap"; 1673 ret = B_FALSE; 1674 break; 1675 } 1676 if (ii->ipsec_in_action != NULL) { 1677 /* 1678 * This can happen if we do a double policy-check on 1679 * a packet 1680 * XXX XXX should fix this case! 1681 */ 1682 IPACT_REFRELE(ii->ipsec_in_action); 1683 } 1684 ASSERT(ii->ipsec_in_action == NULL); 1685 IPACT_REFHOLD(ap); 1686 ii->ipsec_in_action = ap; 1687 break; /* from switch */ 1688 } 1689 return (ret); 1690 } 1691 1692 static boolean_t 1693 spd_match_inbound_ids(ipsec_latch_t *ipl, ipsa_t *sa) 1694 { 1695 ASSERT(ipl->ipl_ids_latched == B_TRUE); 1696 return ipsid_equal(ipl->ipl_remote_cid, sa->ipsa_src_cid) && 1697 ipsid_equal(ipl->ipl_local_cid, sa->ipsa_dst_cid); 1698 } 1699 1700 /* 1701 * Takes a latched conn and an inbound packet and returns a unique_id suitable 1702 * for SA comparisons. Most of the time we will copy from the conn_t, but 1703 * there are cases when the conn_t is latched but it has wildcard selectors, 1704 * and then we need to fallback to scooping them out of the packet. 1705 * 1706 * Assume we'll never have 0 with a conn_t present, so use 0 as a failure. We 1707 * can get away with this because we only have non-zero ports/proto for 1708 * latched conn_ts. 1709 * 1710 * Ideal candidate for an "inline" keyword, as we're JUST convoluted enough 1711 * to not be a nice macro. 1712 */ 1713 static uint64_t 1714 conn_to_unique(conn_t *connp, mblk_t *data_mp, ipha_t *ipha, ip6_t *ip6h) 1715 { 1716 ipsec_selector_t sel; 1717 uint8_t ulp = connp->conn_ulp; 1718 1719 ASSERT(connp->conn_latch->ipl_in_policy != NULL); 1720 1721 if ((ulp == IPPROTO_TCP || ulp == IPPROTO_UDP || ulp == IPPROTO_SCTP) && 1722 (connp->conn_fport == 0 || connp->conn_lport == 0)) { 1723 /* Slow path - we gotta grab from the packet. */ 1724 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, 1725 SEL_NONE) != SELRET_SUCCESS) { 1726 /* Failure -> have caller free packet with ENOMEM. */ 1727 return (0); 1728 } 1729 return (SA_UNIQUE_ID(sel.ips_remote_port, sel.ips_local_port, 1730 sel.ips_protocol, 0)); 1731 } 1732 1733 #ifdef DEBUG_NOT_UNTIL_6478464 1734 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, SEL_NONE) == 1735 SELRET_SUCCESS) { 1736 ASSERT(sel.ips_local_port == connp->conn_lport); 1737 ASSERT(sel.ips_remote_port == connp->conn_fport); 1738 ASSERT(sel.ips_protocol == connp->conn_ulp); 1739 } 1740 ASSERT(connp->conn_ulp != 0); 1741 #endif 1742 1743 return (SA_UNIQUE_ID(connp->conn_fport, connp->conn_lport, ulp, 0)); 1744 } 1745 1746 /* 1747 * Called to check policy on a latched connection, both from this file 1748 * and from tcp.c 1749 */ 1750 boolean_t 1751 ipsec_check_ipsecin_latch(ipsec_in_t *ii, mblk_t *mp, ipsec_latch_t *ipl, 1752 ipha_t *ipha, ip6_t *ip6h, const char **reason, kstat_named_t **counter, 1753 conn_t *connp) 1754 { 1755 ASSERT(ipl->ipl_ids_latched == B_TRUE); 1756 1757 if (!ii->ipsec_in_loopback) { 1758 /* 1759 * Over loopback, there aren't real security associations, 1760 * so there are neither identities nor "unique" values 1761 * for us to check the packet against. 1762 */ 1763 if ((ii->ipsec_in_ah_sa != NULL) && 1764 (!spd_match_inbound_ids(ipl, ii->ipsec_in_ah_sa))) { 1765 *counter = &ipdrops_spd_ah_badid; 1766 *reason = "AH identity mismatch"; 1767 return (B_FALSE); 1768 } 1769 1770 if ((ii->ipsec_in_esp_sa != NULL) && 1771 (!spd_match_inbound_ids(ipl, ii->ipsec_in_esp_sa))) { 1772 *counter = &ipdrops_spd_esp_badid; 1773 *reason = "ESP identity mismatch"; 1774 return (B_FALSE); 1775 } 1776 1777 /* 1778 * Can fudge pkt_unique from connp because we're latched. 1779 * In DEBUG kernels (see conn_to_unique()'s implementation), 1780 * verify this even if it REALLY slows things down. 1781 */ 1782 if (!ipsec_check_ipsecin_unique(ii, reason, counter, 1783 conn_to_unique(connp, mp, ipha, ip6h))) { 1784 return (B_FALSE); 1785 } 1786 } 1787 1788 return (ipsec_check_ipsecin_action(ii, mp, ipl->ipl_in_action, 1789 ipha, ip6h, reason, counter)); 1790 } 1791 1792 /* 1793 * Check to see whether this secured datagram meets the policy 1794 * constraints specified in ipsp. 1795 * 1796 * Called from ipsec_check_global_policy, and ipsec_check_inbound_policy. 1797 * 1798 * Consumes a reference to ipsp. 1799 */ 1800 static mblk_t * 1801 ipsec_check_ipsecin_policy(queue_t *q, mblk_t *first_mp, ipsec_policy_t *ipsp, 1802 ipha_t *ipha, ip6_t *ip6h, uint64_t pkt_unique) 1803 { 1804 ipsec_in_t *ii; 1805 ipsec_action_t *ap; 1806 const char *reason = "no policy actions found"; 1807 mblk_t *data_mp, *ipsec_mp; 1808 short mid = 0; 1809 kstat_named_t *counter = &ipdrops_spd_got_secure; 1810 1811 data_mp = first_mp->b_cont; 1812 ipsec_mp = first_mp; 1813 1814 ASSERT(ipsp != NULL); 1815 1816 ASSERT((ipha == NULL && ip6h != NULL) || 1817 (ip6h == NULL && ipha != NULL)); 1818 1819 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1820 1821 if (ii->ipsec_in_loopback) 1822 return (ipsec_check_loopback_policy(q, first_mp, B_TRUE, ipsp)); 1823 ASSERT(ii->ipsec_in_type == IPSEC_IN); 1824 ASSERT(ii->ipsec_in_secure); 1825 1826 if (ii->ipsec_in_action != NULL) { 1827 /* 1828 * this can happen if we do a double policy-check on a packet 1829 * Would be nice to be able to delete this test.. 1830 */ 1831 IPACT_REFRELE(ii->ipsec_in_action); 1832 } 1833 ASSERT(ii->ipsec_in_action == NULL); 1834 1835 if (!SA_IDS_MATCH(ii->ipsec_in_ah_sa, ii->ipsec_in_esp_sa)) { 1836 reason = "inbound AH and ESP identities differ"; 1837 counter = &ipdrops_spd_ahesp_diffid; 1838 goto drop; 1839 } 1840 1841 if (!ipsec_check_ipsecin_unique(ii, &reason, &counter, pkt_unique)) 1842 goto drop; 1843 1844 /* 1845 * Ok, now loop through the possible actions and see if any 1846 * of them work for us. 1847 */ 1848 1849 for (ap = ipsp->ipsp_act; ap != NULL; ap = ap->ipa_next) { 1850 if (ipsec_check_ipsecin_action(ii, data_mp, ap, 1851 ipha, ip6h, &reason, &counter)) { 1852 BUMP_MIB(&ip_mib, ipsecInSucceeded); 1853 IPPOL_REFRELE(ipsp); 1854 return (first_mp); 1855 } 1856 } 1857 drop: 1858 if (q != NULL) { 1859 mid = q->q_qinfo->qi_minfo->mi_idnum; 1860 } 1861 ipsec_rl_strlog(mid, 0, 0, SL_ERROR|SL_WARN|SL_CONSOLE, 1862 "ipsec inbound policy mismatch: %s, packet dropped\n", 1863 reason); 1864 IPPOL_REFRELE(ipsp); 1865 ASSERT(ii->ipsec_in_action == NULL); 1866 BUMP_MIB(&ip_mib, ipsecInFailed); 1867 ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, &spd_dropper); 1868 return (NULL); 1869 } 1870 1871 /* 1872 * sleazy prefix-length-based compare. 1873 * another inlining candidate.. 1874 */ 1875 boolean_t 1876 ip_addr_match(uint8_t *addr1, int pfxlen, in6_addr_t *addr2p) 1877 { 1878 int offset = pfxlen>>3; 1879 int bitsleft = pfxlen & 7; 1880 uint8_t *addr2 = (uint8_t *)addr2p; 1881 1882 /* 1883 * and there was much evil.. 1884 * XXX should inline-expand the bcmp here and do this 32 bits 1885 * or 64 bits at a time.. 1886 */ 1887 return ((bcmp(addr1, addr2, offset) == 0) && 1888 ((bitsleft == 0) || 1889 (((addr1[offset] ^ addr2[offset]) & 1890 (0xff<<(8-bitsleft))) == 0))); 1891 } 1892 1893 static ipsec_policy_t * 1894 ipsec_find_policy_chain(ipsec_policy_t *best, ipsec_policy_t *chain, 1895 ipsec_selector_t *sel, boolean_t is_icmp_inv_acq) 1896 { 1897 ipsec_selkey_t *isel; 1898 ipsec_policy_t *p; 1899 int bpri = best ? best->ipsp_prio : 0; 1900 1901 for (p = chain; p != NULL; p = p->ipsp_hash.hash_next) { 1902 uint32_t valid; 1903 1904 if (p->ipsp_prio <= bpri) 1905 continue; 1906 isel = &p->ipsp_sel->ipsl_key; 1907 valid = isel->ipsl_valid; 1908 1909 if ((valid & IPSL_PROTOCOL) && 1910 (isel->ipsl_proto != sel->ips_protocol)) 1911 continue; 1912 1913 if ((valid & IPSL_REMOTE_ADDR) && 1914 !ip_addr_match((uint8_t *)&isel->ipsl_remote, 1915 isel->ipsl_remote_pfxlen, 1916 &sel->ips_remote_addr_v6)) 1917 continue; 1918 1919 if ((valid & IPSL_LOCAL_ADDR) && 1920 !ip_addr_match((uint8_t *)&isel->ipsl_local, 1921 isel->ipsl_local_pfxlen, 1922 &sel->ips_local_addr_v6)) 1923 continue; 1924 1925 if ((valid & IPSL_REMOTE_PORT) && 1926 isel->ipsl_rport != sel->ips_remote_port) 1927 continue; 1928 1929 if ((valid & IPSL_LOCAL_PORT) && 1930 isel->ipsl_lport != sel->ips_local_port) 1931 continue; 1932 1933 if (!is_icmp_inv_acq) { 1934 if ((valid & IPSL_ICMP_TYPE) && 1935 (isel->ipsl_icmp_type > sel->ips_icmp_type || 1936 isel->ipsl_icmp_type_end < sel->ips_icmp_type)) { 1937 continue; 1938 } 1939 1940 if ((valid & IPSL_ICMP_CODE) && 1941 (isel->ipsl_icmp_code > sel->ips_icmp_code || 1942 isel->ipsl_icmp_code_end < 1943 sel->ips_icmp_code)) { 1944 continue; 1945 } 1946 } else { 1947 /* 1948 * special case for icmp inverse acquire 1949 * we only want policies that aren't drop/pass 1950 */ 1951 if (p->ipsp_act->ipa_act.ipa_type != IPSEC_ACT_APPLY) 1952 continue; 1953 } 1954 1955 /* we matched all the packet-port-field selectors! */ 1956 best = p; 1957 bpri = p->ipsp_prio; 1958 } 1959 1960 return (best); 1961 } 1962 1963 /* 1964 * Try to find and return the best policy entry under a given policy 1965 * root for a given set of selectors; the first parameter "best" is 1966 * the current best policy so far. If "best" is non-null, we have a 1967 * reference to it. We return a reference to a policy; if that policy 1968 * is not the original "best", we need to release that reference 1969 * before returning. 1970 */ 1971 ipsec_policy_t * 1972 ipsec_find_policy_head(ipsec_policy_t *best, ipsec_policy_head_t *head, 1973 int direction, ipsec_selector_t *sel) 1974 { 1975 ipsec_policy_t *curbest; 1976 ipsec_policy_root_t *root; 1977 uint8_t is_icmp_inv_acq = sel->ips_is_icmp_inv_acq; 1978 int af = sel->ips_isv4 ? IPSEC_AF_V4 : IPSEC_AF_V6; 1979 1980 curbest = best; 1981 root = &head->iph_root[direction]; 1982 1983 #ifdef DEBUG 1984 if (is_icmp_inv_acq) { 1985 if (sel->ips_isv4) { 1986 if (sel->ips_protocol != IPPROTO_ICMP) { 1987 cmn_err(CE_WARN, "ipsec_find_policy_head:" 1988 " expecting icmp, got %d", sel->ips_protocol); 1989 } 1990 } else { 1991 if (sel->ips_protocol != IPPROTO_ICMPV6) { 1992 cmn_err(CE_WARN, "ipsec_find_policy_head:" 1993 " expecting icmpv6, got %d", sel->ips_protocol); 1994 } 1995 } 1996 } 1997 #endif 1998 1999 rw_enter(&head->iph_lock, RW_READER); 2000 2001 if (root->ipr_nchains > 0) { 2002 curbest = ipsec_find_policy_chain(curbest, 2003 root->ipr_hash[selector_hash(sel, root)].hash_head, sel, 2004 is_icmp_inv_acq); 2005 } 2006 curbest = ipsec_find_policy_chain(curbest, root->ipr_nonhash[af], sel, 2007 is_icmp_inv_acq); 2008 2009 /* 2010 * Adjust reference counts if we found anything new. 2011 */ 2012 if (curbest != best) { 2013 ASSERT(curbest != NULL); 2014 IPPOL_REFHOLD(curbest); 2015 2016 if (best != NULL) { 2017 IPPOL_REFRELE(best); 2018 } 2019 } 2020 2021 rw_exit(&head->iph_lock); 2022 2023 return (curbest); 2024 } 2025 2026 /* 2027 * Find the best system policy (either global or per-interface) which 2028 * applies to the given selector; look in all the relevant policy roots 2029 * to figure out which policy wins. 2030 * 2031 * Returns a reference to a policy; caller must release this 2032 * reference when done. 2033 */ 2034 ipsec_policy_t * 2035 ipsec_find_policy(int direction, conn_t *connp, ipsec_out_t *io, 2036 ipsec_selector_t *sel) 2037 { 2038 ipsec_policy_t *p; 2039 2040 p = ipsec_find_policy_head(NULL, &system_policy, direction, sel); 2041 if ((connp != NULL) && (connp->conn_policy != NULL)) { 2042 p = ipsec_find_policy_head(p, connp->conn_policy, 2043 direction, sel); 2044 } else if ((io != NULL) && (io->ipsec_out_polhead != NULL)) { 2045 p = ipsec_find_policy_head(p, io->ipsec_out_polhead, 2046 direction, sel); 2047 } 2048 2049 return (p); 2050 } 2051 2052 /* 2053 * Check with global policy and see whether this inbound 2054 * packet meets the policy constraints. 2055 * 2056 * Locate appropriate policy from global policy, supplemented by the 2057 * conn's configured and/or cached policy if the conn is supplied. 2058 * 2059 * Dispatch to ipsec_check_ipsecin_policy if we have policy and an 2060 * encrypted packet to see if they match. 2061 * 2062 * Otherwise, see if the policy allows cleartext; if not, drop it on the 2063 * floor. 2064 */ 2065 mblk_t * 2066 ipsec_check_global_policy(mblk_t *first_mp, conn_t *connp, 2067 ipha_t *ipha, ip6_t *ip6h, boolean_t mctl_present) 2068 { 2069 ipsec_policy_t *p; 2070 ipsec_selector_t sel; 2071 queue_t *q = NULL; 2072 mblk_t *data_mp, *ipsec_mp; 2073 boolean_t policy_present; 2074 kstat_named_t *counter; 2075 ipsec_in_t *ii = NULL; 2076 uint64_t pkt_unique; 2077 2078 data_mp = mctl_present ? first_mp->b_cont : first_mp; 2079 ipsec_mp = mctl_present ? first_mp : NULL; 2080 2081 sel.ips_is_icmp_inv_acq = 0; 2082 2083 ASSERT((ipha == NULL && ip6h != NULL) || 2084 (ip6h == NULL && ipha != NULL)); 2085 2086 if (ipha != NULL) 2087 policy_present = ipsec_inbound_v4_policy_present; 2088 else 2089 policy_present = ipsec_inbound_v6_policy_present; 2090 2091 if (!policy_present && connp == NULL) { 2092 /* 2093 * No global policy and no per-socket policy; 2094 * just pass it back (but we shouldn't get here in that case) 2095 */ 2096 return (first_mp); 2097 } 2098 2099 if (connp != NULL) 2100 q = CONNP_TO_WQ(connp); 2101 2102 if (ipsec_mp != NULL) { 2103 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 2104 ii = (ipsec_in_t *)(ipsec_mp->b_rptr); 2105 ASSERT(ii->ipsec_in_type == IPSEC_IN); 2106 } 2107 2108 /* 2109 * If we have cached policy, use it. 2110 * Otherwise consult system policy. 2111 */ 2112 if ((connp != NULL) && (connp->conn_latch != NULL)) { 2113 p = connp->conn_latch->ipl_in_policy; 2114 if (p != NULL) { 2115 IPPOL_REFHOLD(p); 2116 } 2117 /* 2118 * Fudge sel for UNIQUE_ID setting below. 2119 */ 2120 pkt_unique = conn_to_unique(connp, data_mp, ipha, ip6h); 2121 } else { 2122 /* Initialize the ports in the selector */ 2123 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, 2124 SEL_NONE) == SELRET_NOMEM) { 2125 /* 2126 * Technically not a policy mismatch, but it is 2127 * an internal failure. 2128 */ 2129 ipsec_log_policy_failure(q, IPSEC_POLICY_MISMATCH, 2130 "ipsec_init_inbound_sel", ipha, ip6h, B_FALSE); 2131 counter = &ipdrops_spd_nomem; 2132 goto fail; 2133 } 2134 2135 /* 2136 * Find the policy which best applies. 2137 * 2138 * If we find global policy, we should look at both 2139 * local policy and global policy and see which is 2140 * stronger and match accordingly. 2141 * 2142 * If we don't find a global policy, check with 2143 * local policy alone. 2144 */ 2145 2146 p = ipsec_find_policy(IPSEC_TYPE_INBOUND, connp, NULL, &sel); 2147 pkt_unique = SA_UNIQUE_ID(sel.ips_remote_port, 2148 sel.ips_local_port, sel.ips_protocol, 0); 2149 } 2150 2151 if (p == NULL) { 2152 if (ipsec_mp == NULL) { 2153 /* 2154 * We have no policy; default to succeeding. 2155 * XXX paranoid system design doesn't do this. 2156 */ 2157 BUMP_MIB(&ip_mib, ipsecInSucceeded); 2158 return (first_mp); 2159 } else { 2160 counter = &ipdrops_spd_got_secure; 2161 ipsec_log_policy_failure(q, IPSEC_POLICY_NOT_NEEDED, 2162 "ipsec_check_global_policy", ipha, ip6h, B_TRUE); 2163 goto fail; 2164 } 2165 } 2166 if ((ii != NULL) && (ii->ipsec_in_secure)) 2167 return (ipsec_check_ipsecin_policy(q, ipsec_mp, p, ipha, ip6h, 2168 pkt_unique)); 2169 if (p->ipsp_act->ipa_allow_clear) { 2170 BUMP_MIB(&ip_mib, ipsecInSucceeded); 2171 IPPOL_REFRELE(p); 2172 return (first_mp); 2173 } 2174 IPPOL_REFRELE(p); 2175 /* 2176 * If we reach here, we will drop the packet because it failed the 2177 * global policy check because the packet was cleartext, and it 2178 * should not have been. 2179 */ 2180 ipsec_log_policy_failure(q, IPSEC_POLICY_MISMATCH, 2181 "ipsec_check_global_policy", ipha, ip6h, B_FALSE); 2182 counter = &ipdrops_spd_got_clear; 2183 2184 fail: 2185 ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, &spd_dropper); 2186 BUMP_MIB(&ip_mib, ipsecInFailed); 2187 return (NULL); 2188 } 2189 2190 /* 2191 * We check whether an inbound datagram is a valid one 2192 * to accept in clear. If it is secure, it is the job 2193 * of IPSEC to log information appropriately if it 2194 * suspects that it may not be the real one. 2195 * 2196 * It is called only while fanning out to the ULP 2197 * where ULP accepts only secure data and the incoming 2198 * is clear. Usually we never accept clear datagrams in 2199 * such cases. ICMP is the only exception. 2200 * 2201 * NOTE : We don't call this function if the client (ULP) 2202 * is willing to accept things in clear. 2203 */ 2204 boolean_t 2205 ipsec_inbound_accept_clear(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h) 2206 { 2207 ushort_t iph_hdr_length; 2208 icmph_t *icmph; 2209 icmp6_t *icmp6; 2210 uint8_t *nexthdrp; 2211 2212 ASSERT((ipha != NULL && ip6h == NULL) || 2213 (ipha == NULL && ip6h != NULL)); 2214 2215 if (ip6h != NULL) { 2216 iph_hdr_length = ip_hdr_length_v6(mp, ip6h); 2217 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, 2218 &nexthdrp)) { 2219 return (B_FALSE); 2220 } 2221 if (*nexthdrp != IPPROTO_ICMPV6) 2222 return (B_FALSE); 2223 icmp6 = (icmp6_t *)(&mp->b_rptr[iph_hdr_length]); 2224 /* Match IPv6 ICMP policy as closely as IPv4 as possible. */ 2225 switch (icmp6->icmp6_type) { 2226 case ICMP6_PARAM_PROB: 2227 /* Corresponds to port/proto unreach in IPv4. */ 2228 case ICMP6_ECHO_REQUEST: 2229 /* Just like IPv4. */ 2230 return (B_FALSE); 2231 2232 case MLD_LISTENER_QUERY: 2233 case MLD_LISTENER_REPORT: 2234 case MLD_LISTENER_REDUCTION: 2235 /* 2236 * XXX Seperate NDD in IPv4 what about here? 2237 * Plus, mcast is important to ND. 2238 */ 2239 case ICMP6_DST_UNREACH: 2240 /* Corresponds to HOST/NET unreachable in IPv4. */ 2241 case ICMP6_PACKET_TOO_BIG: 2242 case ICMP6_ECHO_REPLY: 2243 /* These are trusted in IPv4. */ 2244 case ND_ROUTER_SOLICIT: 2245 case ND_ROUTER_ADVERT: 2246 case ND_NEIGHBOR_SOLICIT: 2247 case ND_NEIGHBOR_ADVERT: 2248 case ND_REDIRECT: 2249 /* Trust ND messages for now. */ 2250 case ICMP6_TIME_EXCEEDED: 2251 default: 2252 return (B_TRUE); 2253 } 2254 } else { 2255 /* 2256 * If it is not ICMP, fail this request. 2257 */ 2258 if (ipha->ipha_protocol != IPPROTO_ICMP) { 2259 #ifdef FRAGCACHE_DEBUG 2260 cmn_err(CE_WARN, "Dropping - ipha_proto = %d\n", 2261 ipha->ipha_protocol); 2262 #endif 2263 return (B_FALSE); 2264 } 2265 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2266 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 2267 /* 2268 * It is an insecure icmp message. Check to see whether we are 2269 * willing to accept this one. 2270 */ 2271 2272 switch (icmph->icmph_type) { 2273 case ICMP_ECHO_REPLY: 2274 case ICMP_TIME_STAMP_REPLY: 2275 case ICMP_INFO_REPLY: 2276 case ICMP_ROUTER_ADVERTISEMENT: 2277 /* 2278 * We should not encourage clear replies if this 2279 * client expects secure. If somebody is replying 2280 * in clear some mailicious user watching both the 2281 * request and reply, can do chosen-plain-text attacks. 2282 * With global policy we might be just expecting secure 2283 * but sending out clear. We don't know what the right 2284 * thing is. We can't do much here as we can't control 2285 * the sender here. Till we are sure of what to do, 2286 * accept them. 2287 */ 2288 return (B_TRUE); 2289 case ICMP_ECHO_REQUEST: 2290 case ICMP_TIME_STAMP_REQUEST: 2291 case ICMP_INFO_REQUEST: 2292 case ICMP_ADDRESS_MASK_REQUEST: 2293 case ICMP_ROUTER_SOLICITATION: 2294 case ICMP_ADDRESS_MASK_REPLY: 2295 /* 2296 * Don't accept this as somebody could be sending 2297 * us plain text to get encrypted data. If we reply, 2298 * it will lead to chosen plain text attack. 2299 */ 2300 return (B_FALSE); 2301 case ICMP_DEST_UNREACHABLE: 2302 switch (icmph->icmph_code) { 2303 case ICMP_FRAGMENTATION_NEEDED: 2304 /* 2305 * Be in sync with icmp_inbound, where we have 2306 * already set ire_max_frag. 2307 */ 2308 #ifdef FRAGCACHE_DEBUG 2309 cmn_err(CE_WARN, "ICMP frag needed\n"); 2310 #endif 2311 return (B_TRUE); 2312 case ICMP_HOST_UNREACHABLE: 2313 case ICMP_NET_UNREACHABLE: 2314 /* 2315 * By accepting, we could reset a connection. 2316 * How do we solve the problem of some 2317 * intermediate router sending in-secure ICMP 2318 * messages ? 2319 */ 2320 return (B_TRUE); 2321 case ICMP_PORT_UNREACHABLE: 2322 case ICMP_PROTOCOL_UNREACHABLE: 2323 default : 2324 return (B_FALSE); 2325 } 2326 case ICMP_SOURCE_QUENCH: 2327 /* 2328 * If this is an attack, TCP will slow start 2329 * because of this. Is it very harmful ? 2330 */ 2331 return (B_TRUE); 2332 case ICMP_PARAM_PROBLEM: 2333 return (B_FALSE); 2334 case ICMP_TIME_EXCEEDED: 2335 return (B_TRUE); 2336 case ICMP_REDIRECT: 2337 return (B_FALSE); 2338 default : 2339 return (B_FALSE); 2340 } 2341 } 2342 } 2343 2344 void 2345 ipsec_latch_ids(ipsec_latch_t *ipl, ipsid_t *local, ipsid_t *remote) 2346 { 2347 mutex_enter(&ipl->ipl_lock); 2348 2349 if (ipl->ipl_ids_latched) { 2350 /* I lost, someone else got here before me */ 2351 mutex_exit(&ipl->ipl_lock); 2352 return; 2353 } 2354 2355 if (local != NULL) 2356 IPSID_REFHOLD(local); 2357 if (remote != NULL) 2358 IPSID_REFHOLD(remote); 2359 2360 ipl->ipl_local_cid = local; 2361 ipl->ipl_remote_cid = remote; 2362 ipl->ipl_ids_latched = B_TRUE; 2363 mutex_exit(&ipl->ipl_lock); 2364 } 2365 2366 void 2367 ipsec_latch_inbound(ipsec_latch_t *ipl, ipsec_in_t *ii) 2368 { 2369 ipsa_t *sa; 2370 2371 if (!ipl->ipl_ids_latched) { 2372 ipsid_t *local = NULL; 2373 ipsid_t *remote = NULL; 2374 2375 if (!ii->ipsec_in_loopback) { 2376 if (ii->ipsec_in_esp_sa != NULL) 2377 sa = ii->ipsec_in_esp_sa; 2378 else 2379 sa = ii->ipsec_in_ah_sa; 2380 ASSERT(sa != NULL); 2381 local = sa->ipsa_dst_cid; 2382 remote = sa->ipsa_src_cid; 2383 } 2384 ipsec_latch_ids(ipl, local, remote); 2385 } 2386 ipl->ipl_in_action = ii->ipsec_in_action; 2387 IPACT_REFHOLD(ipl->ipl_in_action); 2388 } 2389 2390 /* 2391 * Check whether the policy constraints are met either for an 2392 * inbound datagram; called from IP in numerous places. 2393 * 2394 * Note that this is not a chokepoint for inbound policy checks; 2395 * see also ipsec_check_ipsecin_latch() and ipsec_check_global_policy() 2396 */ 2397 mblk_t * 2398 ipsec_check_inbound_policy(mblk_t *first_mp, conn_t *connp, 2399 ipha_t *ipha, ip6_t *ip6h, boolean_t mctl_present) 2400 { 2401 ipsec_in_t *ii; 2402 boolean_t ret; 2403 queue_t *q; 2404 short mid = 0; 2405 mblk_t *mp = mctl_present ? first_mp->b_cont : first_mp; 2406 mblk_t *ipsec_mp = mctl_present ? first_mp : NULL; 2407 ipsec_latch_t *ipl; 2408 uint64_t unique_id; 2409 2410 ASSERT(connp != NULL); 2411 ipl = connp->conn_latch; 2412 2413 if (ipsec_mp == NULL) { 2414 clear: 2415 /* 2416 * This is the case where the incoming datagram is 2417 * cleartext and we need to see whether this client 2418 * would like to receive such untrustworthy things from 2419 * the wire. 2420 */ 2421 ASSERT(mp != NULL); 2422 2423 if (ipl != NULL) { 2424 /* 2425 * Policy is cached in the conn. 2426 */ 2427 if ((ipl->ipl_in_policy != NULL) && 2428 (!ipl->ipl_in_policy->ipsp_act->ipa_allow_clear)) { 2429 ret = ipsec_inbound_accept_clear(mp, 2430 ipha, ip6h); 2431 if (ret) { 2432 BUMP_MIB(&ip_mib, ipsecInSucceeded); 2433 return (first_mp); 2434 } else { 2435 ipsec_log_policy_failure( 2436 CONNP_TO_WQ(connp), 2437 IPSEC_POLICY_MISMATCH, 2438 "ipsec_check_inbound_policy", ipha, 2439 ip6h, B_FALSE); 2440 ip_drop_packet(first_mp, B_TRUE, NULL, 2441 NULL, &ipdrops_spd_got_clear, 2442 &spd_dropper); 2443 BUMP_MIB(&ip_mib, ipsecInFailed); 2444 return (NULL); 2445 } 2446 } else { 2447 BUMP_MIB(&ip_mib, ipsecInSucceeded); 2448 return (first_mp); 2449 } 2450 } else { 2451 /* 2452 * As this is a non-hardbound connection we need 2453 * to look at both per-socket policy and global 2454 * policy. As this is cleartext, mark the mp as 2455 * M_DATA in case if it is an ICMP error being 2456 * reported before calling ipsec_check_global_policy 2457 * so that it does not mistake it for IPSEC_IN. 2458 */ 2459 uchar_t db_type = mp->b_datap->db_type; 2460 mp->b_datap->db_type = M_DATA; 2461 first_mp = ipsec_check_global_policy(first_mp, connp, 2462 ipha, ip6h, mctl_present); 2463 if (first_mp != NULL) 2464 mp->b_datap->db_type = db_type; 2465 return (first_mp); 2466 } 2467 } 2468 /* 2469 * If it is inbound check whether the attached message 2470 * is secure or not. We have a special case for ICMP, 2471 * where we have a IPSEC_IN message and the attached 2472 * message is not secure. See icmp_inbound_error_fanout 2473 * for details. 2474 */ 2475 ASSERT(ipsec_mp != NULL); 2476 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 2477 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 2478 2479 if (!ii->ipsec_in_secure) 2480 goto clear; 2481 2482 /* 2483 * mp->b_cont could be either a M_CTL message 2484 * for icmp errors being sent up or a M_DATA message. 2485 */ 2486 ASSERT(mp->b_datap->db_type == M_CTL || mp->b_datap->db_type == M_DATA); 2487 2488 ASSERT(ii->ipsec_in_type == IPSEC_IN); 2489 2490 if (ipl == NULL) { 2491 /* 2492 * We don't have policies cached in the conn 2493 * for this stream. So, look at the global 2494 * policy. It will check against conn or global 2495 * depending on whichever is stronger. 2496 */ 2497 return (ipsec_check_global_policy(first_mp, connp, 2498 ipha, ip6h, mctl_present)); 2499 } 2500 2501 if (ipl->ipl_in_action != NULL) { 2502 /* Policy is cached & latched; fast(er) path */ 2503 const char *reason; 2504 kstat_named_t *counter; 2505 if (ipsec_check_ipsecin_latch(ii, mp, ipl, 2506 ipha, ip6h, &reason, &counter, connp)) { 2507 BUMP_MIB(&ip_mib, ipsecInSucceeded); 2508 return (first_mp); 2509 } 2510 q = CONNP_TO_WQ(connp); 2511 if (q != NULL) { 2512 mid = q->q_qinfo->qi_minfo->mi_idnum; 2513 } 2514 ipsec_rl_strlog(mid, 0, 0, SL_ERROR|SL_WARN|SL_CONSOLE, 2515 "ipsec inbound policy mismatch: %s, packet dropped\n", 2516 reason); 2517 ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, 2518 &spd_dropper); 2519 BUMP_MIB(&ip_mib, ipsecInFailed); 2520 return (NULL); 2521 } else if (ipl->ipl_in_policy == NULL) { 2522 ipsec_weird_null_inbound_policy++; 2523 return (first_mp); 2524 } 2525 2526 unique_id = conn_to_unique(connp, mp, ipha, ip6h); 2527 IPPOL_REFHOLD(ipl->ipl_in_policy); 2528 first_mp = ipsec_check_ipsecin_policy(CONNP_TO_WQ(connp), first_mp, 2529 ipl->ipl_in_policy, ipha, ip6h, unique_id); 2530 /* 2531 * NOTE: ipsecIn{Failed,Succeeeded} bumped by 2532 * ipsec_check_ipsecin_policy(). 2533 */ 2534 if (first_mp != NULL) 2535 ipsec_latch_inbound(ipl, ii); 2536 return (first_mp); 2537 } 2538 2539 /* 2540 * Returns: 2541 * 2542 * SELRET_NOMEM --> msgpullup() needed to gather things failed. 2543 * SELRET_BADPKT --> If we're being called after tunnel-mode fragment 2544 * gathering, the initial fragment is too short for 2545 * useful data. Only returned if SEL_TUNNEL_FIRSTFRAG is 2546 * set. 2547 * SELRET_SUCCESS --> "sel" now has initialized IPsec selector data. 2548 * SELRET_TUNFRAG --> This is a fragment in a tunnel-mode packet. Caller 2549 * should put this packet in a fragment-gathering queue. 2550 * Only returned if SEL_TUNNEL_MODE and SEL_PORT_POLICY 2551 * is set. 2552 */ 2553 static selret_t 2554 ipsec_init_inbound_sel(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha, 2555 ip6_t *ip6h, uint8_t sel_flags) 2556 { 2557 uint16_t *ports; 2558 ushort_t hdr_len; 2559 int outer_hdr_len = 0; /* For ICMP tunnel-mode cases... */ 2560 mblk_t *spare_mp = NULL; 2561 uint8_t *nexthdrp; 2562 uint8_t nexthdr; 2563 uint8_t *typecode; 2564 uint8_t check_proto; 2565 ip6_pkt_t ipp; 2566 boolean_t port_policy_present = (sel_flags & SEL_PORT_POLICY); 2567 boolean_t is_icmp = (sel_flags & SEL_IS_ICMP); 2568 boolean_t tunnel_mode = (sel_flags & SEL_TUNNEL_MODE); 2569 2570 ASSERT((ipha == NULL && ip6h != NULL) || 2571 (ipha != NULL && ip6h == NULL)); 2572 2573 if (ip6h != NULL) { 2574 if (is_icmp) 2575 outer_hdr_len = ((uint8_t *)ip6h) - mp->b_rptr; 2576 2577 check_proto = IPPROTO_ICMPV6; 2578 sel->ips_isv4 = B_FALSE; 2579 sel->ips_local_addr_v6 = ip6h->ip6_dst; 2580 sel->ips_remote_addr_v6 = ip6h->ip6_src; 2581 2582 bzero(&ipp, sizeof (ipp)); 2583 (void) ip_find_hdr_v6(mp, ip6h, &ipp, NULL); 2584 2585 nexthdr = ip6h->ip6_nxt; 2586 switch (nexthdr) { 2587 case IPPROTO_HOPOPTS: 2588 case IPPROTO_ROUTING: 2589 case IPPROTO_DSTOPTS: 2590 case IPPROTO_FRAGMENT: 2591 /* 2592 * Use ip_hdr_length_nexthdr_v6(). And have a spare 2593 * mblk that's contiguous to feed it 2594 */ 2595 if ((spare_mp = msgpullup(mp, -1)) == NULL) 2596 return (SELRET_NOMEM); 2597 if (!ip_hdr_length_nexthdr_v6(spare_mp, 2598 (ip6_t *)(spare_mp->b_rptr + outer_hdr_len), 2599 &hdr_len, &nexthdrp)) { 2600 /* Malformed packet - caller frees. */ 2601 ipsec_freemsg_chain(spare_mp); 2602 return (SELRET_BADPKT); 2603 } 2604 nexthdr = *nexthdrp; 2605 /* We can just extract based on hdr_len now. */ 2606 break; 2607 default: 2608 hdr_len = IPV6_HDR_LEN; 2609 break; 2610 } 2611 2612 if (port_policy_present && IS_V6_FRAGMENT(ipp) && !is_icmp) { 2613 /* IPv6 Fragment */ 2614 ipsec_freemsg_chain(spare_mp); 2615 return (SELRET_TUNFRAG); 2616 } 2617 } else { 2618 if (is_icmp) 2619 outer_hdr_len = ((uint8_t *)ipha) - mp->b_rptr; 2620 check_proto = IPPROTO_ICMP; 2621 sel->ips_isv4 = B_TRUE; 2622 sel->ips_local_addr_v4 = ipha->ipha_dst; 2623 sel->ips_remote_addr_v4 = ipha->ipha_src; 2624 nexthdr = ipha->ipha_protocol; 2625 hdr_len = IPH_HDR_LENGTH(ipha); 2626 2627 if (port_policy_present && 2628 IS_V4_FRAGMENT(ipha->ipha_fragment_offset_and_flags) && 2629 !is_icmp) { 2630 /* IPv4 Fragment */ 2631 ipsec_freemsg_chain(spare_mp); 2632 return (SELRET_TUNFRAG); 2633 } 2634 2635 } 2636 sel->ips_protocol = nexthdr; 2637 2638 if ((nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP && 2639 nexthdr != IPPROTO_SCTP && nexthdr != check_proto) || 2640 (!port_policy_present && tunnel_mode)) { 2641 sel->ips_remote_port = sel->ips_local_port = 0; 2642 ipsec_freemsg_chain(spare_mp); 2643 return (SELRET_SUCCESS); 2644 } 2645 2646 if (&mp->b_rptr[hdr_len] + 4 > mp->b_wptr) { 2647 /* If we didn't pullup a copy already, do so now. */ 2648 /* 2649 * XXX performance, will upper-layers frequently split TCP/UDP 2650 * apart from IP or options? If so, perhaps we should revisit 2651 * the spare_mp strategy. 2652 */ 2653 ipsec_hdr_pullup_needed++; 2654 if (spare_mp == NULL && 2655 (spare_mp = msgpullup(mp, -1)) == NULL) { 2656 return (SELRET_NOMEM); 2657 } 2658 ports = (uint16_t *)&spare_mp->b_rptr[hdr_len + outer_hdr_len]; 2659 } else { 2660 ports = (uint16_t *)&mp->b_rptr[hdr_len + outer_hdr_len]; 2661 } 2662 2663 if (nexthdr == check_proto) { 2664 typecode = (uint8_t *)ports; 2665 sel->ips_icmp_type = *typecode++; 2666 sel->ips_icmp_code = *typecode; 2667 sel->ips_remote_port = sel->ips_local_port = 0; 2668 } else { 2669 sel->ips_remote_port = *ports++; 2670 sel->ips_local_port = *ports; 2671 } 2672 ipsec_freemsg_chain(spare_mp); 2673 return (SELRET_SUCCESS); 2674 } 2675 2676 static boolean_t 2677 ipsec_init_outbound_ports(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha, 2678 ip6_t *ip6h, int outer_hdr_len) 2679 { 2680 /* 2681 * XXX cut&paste shared with ipsec_init_inbound_sel 2682 */ 2683 uint16_t *ports; 2684 ushort_t hdr_len; 2685 mblk_t *spare_mp = NULL; 2686 uint8_t *nexthdrp; 2687 uint8_t nexthdr; 2688 uint8_t *typecode; 2689 uint8_t check_proto; 2690 2691 ASSERT((ipha == NULL && ip6h != NULL) || 2692 (ipha != NULL && ip6h == NULL)); 2693 2694 if (ip6h != NULL) { 2695 check_proto = IPPROTO_ICMPV6; 2696 nexthdr = ip6h->ip6_nxt; 2697 switch (nexthdr) { 2698 case IPPROTO_HOPOPTS: 2699 case IPPROTO_ROUTING: 2700 case IPPROTO_DSTOPTS: 2701 case IPPROTO_FRAGMENT: 2702 /* 2703 * Use ip_hdr_length_nexthdr_v6(). And have a spare 2704 * mblk that's contiguous to feed it 2705 */ 2706 spare_mp = msgpullup(mp, -1); 2707 if (spare_mp == NULL || 2708 !ip_hdr_length_nexthdr_v6(spare_mp, 2709 (ip6_t *)(spare_mp->b_rptr + outer_hdr_len), 2710 &hdr_len, &nexthdrp)) { 2711 /* Always works, even if NULL. */ 2712 ipsec_freemsg_chain(spare_mp); 2713 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 2714 &ipdrops_spd_nomem, &spd_dropper); 2715 return (B_FALSE); 2716 } else { 2717 nexthdr = *nexthdrp; 2718 /* We can just extract based on hdr_len now. */ 2719 } 2720 break; 2721 default: 2722 hdr_len = IPV6_HDR_LEN; 2723 break; 2724 } 2725 } else { 2726 check_proto = IPPROTO_ICMP; 2727 hdr_len = IPH_HDR_LENGTH(ipha); 2728 nexthdr = ipha->ipha_protocol; 2729 } 2730 2731 sel->ips_protocol = nexthdr; 2732 if (nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP && 2733 nexthdr != IPPROTO_SCTP && nexthdr != check_proto) { 2734 sel->ips_local_port = sel->ips_remote_port = 0; 2735 ipsec_freemsg_chain(spare_mp); /* Always works, even if NULL */ 2736 return (B_TRUE); 2737 } 2738 2739 if (&mp->b_rptr[hdr_len] + 4 + outer_hdr_len > mp->b_wptr) { 2740 /* If we didn't pullup a copy already, do so now. */ 2741 /* 2742 * XXX performance, will upper-layers frequently split TCP/UDP 2743 * apart from IP or options? If so, perhaps we should revisit 2744 * the spare_mp strategy. 2745 * 2746 * XXX should this be msgpullup(mp, hdr_len+4) ??? 2747 */ 2748 if (spare_mp == NULL && 2749 (spare_mp = msgpullup(mp, -1)) == NULL) { 2750 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 2751 &ipdrops_spd_nomem, &spd_dropper); 2752 return (B_FALSE); 2753 } 2754 ports = (uint16_t *)&spare_mp->b_rptr[hdr_len + outer_hdr_len]; 2755 } else { 2756 ports = (uint16_t *)&mp->b_rptr[hdr_len + outer_hdr_len]; 2757 } 2758 2759 if (nexthdr == check_proto) { 2760 typecode = (uint8_t *)ports; 2761 sel->ips_icmp_type = *typecode++; 2762 sel->ips_icmp_code = *typecode; 2763 sel->ips_remote_port = sel->ips_local_port = 0; 2764 } else { 2765 sel->ips_local_port = *ports++; 2766 sel->ips_remote_port = *ports; 2767 } 2768 ipsec_freemsg_chain(spare_mp); /* Always works, even if NULL */ 2769 return (B_TRUE); 2770 } 2771 2772 /* 2773 * Create an ipsec_action_t based on the way an inbound packet was protected. 2774 * Used to reflect traffic back to a sender. 2775 * 2776 * We don't bother interning the action into the hash table. 2777 */ 2778 ipsec_action_t * 2779 ipsec_in_to_out_action(ipsec_in_t *ii) 2780 { 2781 ipsa_t *ah_assoc, *esp_assoc; 2782 uint_t auth_alg = 0, encr_alg = 0, espa_alg = 0; 2783 ipsec_action_t *ap; 2784 boolean_t unique; 2785 2786 ap = kmem_cache_alloc(ipsec_action_cache, KM_NOSLEEP); 2787 2788 if (ap == NULL) 2789 return (NULL); 2790 2791 bzero(ap, sizeof (*ap)); 2792 HASH_NULL(ap, ipa_hash); 2793 ap->ipa_next = NULL; 2794 ap->ipa_refs = 1; 2795 2796 /* 2797 * Get the algorithms that were used for this packet. 2798 */ 2799 ap->ipa_act.ipa_type = IPSEC_ACT_APPLY; 2800 ap->ipa_act.ipa_log = 0; 2801 ah_assoc = ii->ipsec_in_ah_sa; 2802 ap->ipa_act.ipa_apply.ipp_use_ah = (ah_assoc != NULL); 2803 2804 esp_assoc = ii->ipsec_in_esp_sa; 2805 ap->ipa_act.ipa_apply.ipp_use_esp = (esp_assoc != NULL); 2806 2807 if (esp_assoc != NULL) { 2808 encr_alg = esp_assoc->ipsa_encr_alg; 2809 espa_alg = esp_assoc->ipsa_auth_alg; 2810 ap->ipa_act.ipa_apply.ipp_use_espa = (espa_alg != 0); 2811 } 2812 if (ah_assoc != NULL) 2813 auth_alg = ah_assoc->ipsa_auth_alg; 2814 2815 ap->ipa_act.ipa_apply.ipp_encr_alg = (uint8_t)encr_alg; 2816 ap->ipa_act.ipa_apply.ipp_auth_alg = (uint8_t)auth_alg; 2817 ap->ipa_act.ipa_apply.ipp_esp_auth_alg = (uint8_t)espa_alg; 2818 ap->ipa_act.ipa_apply.ipp_use_se = ii->ipsec_in_decaps; 2819 unique = B_FALSE; 2820 2821 if (esp_assoc != NULL) { 2822 ap->ipa_act.ipa_apply.ipp_espa_minbits = 2823 esp_assoc->ipsa_authkeybits; 2824 ap->ipa_act.ipa_apply.ipp_espa_maxbits = 2825 esp_assoc->ipsa_authkeybits; 2826 ap->ipa_act.ipa_apply.ipp_espe_minbits = 2827 esp_assoc->ipsa_encrkeybits; 2828 ap->ipa_act.ipa_apply.ipp_espe_maxbits = 2829 esp_assoc->ipsa_encrkeybits; 2830 ap->ipa_act.ipa_apply.ipp_km_proto = esp_assoc->ipsa_kmp; 2831 ap->ipa_act.ipa_apply.ipp_km_cookie = esp_assoc->ipsa_kmc; 2832 if (esp_assoc->ipsa_flags & IPSA_F_UNIQUE) 2833 unique = B_TRUE; 2834 } 2835 if (ah_assoc != NULL) { 2836 ap->ipa_act.ipa_apply.ipp_ah_minbits = 2837 ah_assoc->ipsa_authkeybits; 2838 ap->ipa_act.ipa_apply.ipp_ah_maxbits = 2839 ah_assoc->ipsa_authkeybits; 2840 ap->ipa_act.ipa_apply.ipp_km_proto = ah_assoc->ipsa_kmp; 2841 ap->ipa_act.ipa_apply.ipp_km_cookie = ah_assoc->ipsa_kmc; 2842 if (ah_assoc->ipsa_flags & IPSA_F_UNIQUE) 2843 unique = B_TRUE; 2844 } 2845 ap->ipa_act.ipa_apply.ipp_use_unique = unique; 2846 ap->ipa_want_unique = unique; 2847 ap->ipa_allow_clear = B_FALSE; 2848 ap->ipa_want_se = ii->ipsec_in_decaps; 2849 ap->ipa_want_ah = (ah_assoc != NULL); 2850 ap->ipa_want_esp = (esp_assoc != NULL); 2851 2852 ap->ipa_ovhd = ipsec_act_ovhd(&ap->ipa_act); 2853 2854 ap->ipa_act.ipa_apply.ipp_replay_depth = 0; /* don't care */ 2855 2856 return (ap); 2857 } 2858 2859 2860 /* 2861 * Compute the worst-case amount of extra space required by an action. 2862 * Note that, because of the ESP considerations listed below, this is 2863 * actually not the same as the best-case reduction in the MTU; in the 2864 * future, we should pass additional information to this function to 2865 * allow the actual MTU impact to be computed. 2866 * 2867 * AH: Revisit this if we implement algorithms with 2868 * a verifier size of more than 12 bytes. 2869 * 2870 * ESP: A more exact but more messy computation would take into 2871 * account the interaction between the cipher block size and the 2872 * effective MTU, yielding the inner payload size which reflects a 2873 * packet with *minimum* ESP padding.. 2874 */ 2875 int32_t 2876 ipsec_act_ovhd(const ipsec_act_t *act) 2877 { 2878 int32_t overhead = 0; 2879 2880 if (act->ipa_type == IPSEC_ACT_APPLY) { 2881 const ipsec_prot_t *ipp = &act->ipa_apply; 2882 2883 if (ipp->ipp_use_ah) 2884 overhead += IPSEC_MAX_AH_HDR_SIZE; 2885 if (ipp->ipp_use_esp) { 2886 overhead += IPSEC_MAX_ESP_HDR_SIZE; 2887 overhead += sizeof (struct udphdr); 2888 } 2889 if (ipp->ipp_use_se) 2890 overhead += IP_SIMPLE_HDR_LENGTH; 2891 } 2892 return (overhead); 2893 } 2894 2895 /* 2896 * This hash function is used only when creating policies and thus is not 2897 * performance-critical for packet flows. 2898 * 2899 * Future work: canonicalize the structures hashed with this (i.e., 2900 * zeroize padding) so the hash works correctly. 2901 */ 2902 /* ARGSUSED */ 2903 static uint32_t 2904 policy_hash(int size, const void *start, const void *end) 2905 { 2906 return (0); 2907 } 2908 2909 2910 /* 2911 * Hash function macros for each address type. 2912 * 2913 * The IPV6 hash function assumes that the low order 32-bits of the 2914 * address (typically containing the low order 24 bits of the mac 2915 * address) are reasonably well-distributed. Revisit this if we run 2916 * into trouble from lots of collisions on ::1 addresses and the like 2917 * (seems unlikely). 2918 */ 2919 #define IPSEC_IPV4_HASH(a, n) ((a) % (n)) 2920 #define IPSEC_IPV6_HASH(a, n) (((a).s6_addr32[3]) % (n)) 2921 2922 /* 2923 * These two hash functions should produce coordinated values 2924 * but have slightly different roles. 2925 */ 2926 static uint32_t 2927 selkey_hash(const ipsec_selkey_t *selkey) 2928 { 2929 uint32_t valid = selkey->ipsl_valid; 2930 2931 if (!(valid & IPSL_REMOTE_ADDR)) 2932 return (IPSEC_SEL_NOHASH); 2933 2934 if (valid & IPSL_IPV4) { 2935 if (selkey->ipsl_remote_pfxlen == 32) 2936 return (IPSEC_IPV4_HASH(selkey->ipsl_remote.ipsad_v4, 2937 ipsec_spd_hashsize)); 2938 } 2939 if (valid & IPSL_IPV6) { 2940 if (selkey->ipsl_remote_pfxlen == 128) 2941 return (IPSEC_IPV6_HASH(selkey->ipsl_remote.ipsad_v6, 2942 ipsec_spd_hashsize)); 2943 } 2944 return (IPSEC_SEL_NOHASH); 2945 } 2946 2947 static uint32_t 2948 selector_hash(ipsec_selector_t *sel, ipsec_policy_root_t *root) 2949 { 2950 if (sel->ips_isv4) { 2951 return (IPSEC_IPV4_HASH(sel->ips_remote_addr_v4, 2952 root->ipr_nchains)); 2953 } 2954 return (IPSEC_IPV6_HASH(sel->ips_remote_addr_v6, root->ipr_nchains)); 2955 } 2956 2957 /* 2958 * Intern actions into the action hash table. 2959 */ 2960 ipsec_action_t * 2961 ipsec_act_find(const ipsec_act_t *a, int n) 2962 { 2963 int i; 2964 uint32_t hval; 2965 ipsec_action_t *ap; 2966 ipsec_action_t *prev = NULL; 2967 int32_t overhead, maxovhd = 0; 2968 boolean_t allow_clear = B_FALSE; 2969 boolean_t want_ah = B_FALSE; 2970 boolean_t want_esp = B_FALSE; 2971 boolean_t want_se = B_FALSE; 2972 boolean_t want_unique = B_FALSE; 2973 2974 /* 2975 * TODO: should canonicalize a[] (i.e., zeroize any padding) 2976 * so we can use a non-trivial policy_hash function. 2977 */ 2978 for (i = n-1; i >= 0; i--) { 2979 hval = policy_hash(IPSEC_ACTION_HASH_SIZE, &a[i], &a[n]); 2980 2981 HASH_LOCK(ipsec_action_hash, hval); 2982 2983 for (HASH_ITERATE(ap, ipa_hash, ipsec_action_hash, hval)) { 2984 if (bcmp(&ap->ipa_act, &a[i], sizeof (*a)) != 0) 2985 continue; 2986 if (ap->ipa_next != prev) 2987 continue; 2988 break; 2989 } 2990 if (ap != NULL) { 2991 HASH_UNLOCK(ipsec_action_hash, hval); 2992 prev = ap; 2993 continue; 2994 } 2995 /* 2996 * need to allocate a new one.. 2997 */ 2998 ap = kmem_cache_alloc(ipsec_action_cache, KM_NOSLEEP); 2999 if (ap == NULL) { 3000 HASH_UNLOCK(ipsec_action_hash, hval); 3001 if (prev != NULL) 3002 ipsec_action_free(prev); 3003 return (NULL); 3004 } 3005 HASH_INSERT(ap, ipa_hash, ipsec_action_hash, hval); 3006 3007 ap->ipa_next = prev; 3008 ap->ipa_act = a[i]; 3009 3010 overhead = ipsec_act_ovhd(&a[i]); 3011 if (maxovhd < overhead) 3012 maxovhd = overhead; 3013 3014 if ((a[i].ipa_type == IPSEC_ACT_BYPASS) || 3015 (a[i].ipa_type == IPSEC_ACT_CLEAR)) 3016 allow_clear = B_TRUE; 3017 if (a[i].ipa_type == IPSEC_ACT_APPLY) { 3018 const ipsec_prot_t *ipp = &a[i].ipa_apply; 3019 3020 ASSERT(ipp->ipp_use_ah || ipp->ipp_use_esp); 3021 want_ah |= ipp->ipp_use_ah; 3022 want_esp |= ipp->ipp_use_esp; 3023 want_se |= ipp->ipp_use_se; 3024 want_unique |= ipp->ipp_use_unique; 3025 } 3026 ap->ipa_allow_clear = allow_clear; 3027 ap->ipa_want_ah = want_ah; 3028 ap->ipa_want_esp = want_esp; 3029 ap->ipa_want_se = want_se; 3030 ap->ipa_want_unique = want_unique; 3031 ap->ipa_refs = 1; /* from the hash table */ 3032 ap->ipa_ovhd = maxovhd; 3033 if (prev) 3034 prev->ipa_refs++; 3035 prev = ap; 3036 HASH_UNLOCK(ipsec_action_hash, hval); 3037 } 3038 3039 ap->ipa_refs++; /* caller's reference */ 3040 3041 return (ap); 3042 } 3043 3044 /* 3045 * Called when refcount goes to 0, indicating that all references to this 3046 * node are gone. 3047 * 3048 * This does not unchain the action from the hash table. 3049 */ 3050 void 3051 ipsec_action_free(ipsec_action_t *ap) 3052 { 3053 for (;;) { 3054 ipsec_action_t *np = ap->ipa_next; 3055 ASSERT(ap->ipa_refs == 0); 3056 ASSERT(ap->ipa_hash.hash_pp == NULL); 3057 kmem_cache_free(ipsec_action_cache, ap); 3058 ap = np; 3059 /* Inlined IPACT_REFRELE -- avoid recursion */ 3060 if (ap == NULL) 3061 break; 3062 membar_exit(); 3063 if (atomic_add_32_nv(&(ap)->ipa_refs, -1) != 0) 3064 break; 3065 /* End inlined IPACT_REFRELE */ 3066 } 3067 } 3068 3069 /* 3070 * Periodically sweep action hash table for actions with refcount==1, and 3071 * nuke them. We cannot do this "on demand" (i.e., from IPACT_REFRELE) 3072 * because we can't close the race between another thread finding the action 3073 * in the hash table without holding the bucket lock during IPACT_REFRELE. 3074 * Instead, we run this function sporadically to clean up after ourselves; 3075 * we also set it as the "reclaim" function for the action kmem_cache. 3076 * 3077 * Note that it may take several passes of ipsec_action_gc() to free all 3078 * "stale" actions. 3079 */ 3080 /* ARGSUSED */ 3081 static void 3082 ipsec_action_reclaim(void *dummy) 3083 { 3084 int i; 3085 3086 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) { 3087 ipsec_action_t *ap, *np; 3088 3089 /* skip the lock if nobody home */ 3090 if (ipsec_action_hash[i].hash_head == NULL) 3091 continue; 3092 3093 HASH_LOCK(ipsec_action_hash, i); 3094 for (ap = ipsec_action_hash[i].hash_head; 3095 ap != NULL; ap = np) { 3096 ASSERT(ap->ipa_refs > 0); 3097 np = ap->ipa_hash.hash_next; 3098 if (ap->ipa_refs > 1) 3099 continue; 3100 HASH_UNCHAIN(ap, ipa_hash, ipsec_action_hash, i); 3101 IPACT_REFRELE(ap); 3102 } 3103 HASH_UNLOCK(ipsec_action_hash, i); 3104 } 3105 } 3106 3107 /* 3108 * Intern a selector set into the selector set hash table. 3109 * This is simpler than the actions case.. 3110 */ 3111 static ipsec_sel_t * 3112 ipsec_find_sel(ipsec_selkey_t *selkey) 3113 { 3114 ipsec_sel_t *sp; 3115 uint32_t hval, bucket; 3116 3117 /* 3118 * Exactly one AF bit should be set in selkey. 3119 */ 3120 ASSERT(!(selkey->ipsl_valid & IPSL_IPV4) ^ 3121 !(selkey->ipsl_valid & IPSL_IPV6)); 3122 3123 hval = selkey_hash(selkey); 3124 /* Set pol_hval to uninitialized until we put it in a polhead. */ 3125 selkey->ipsl_sel_hval = hval; 3126 3127 bucket = (hval == IPSEC_SEL_NOHASH) ? 0 : hval; 3128 3129 ASSERT(!HASH_LOCKED(ipsec_sel_hash, bucket)); 3130 HASH_LOCK(ipsec_sel_hash, bucket); 3131 3132 for (HASH_ITERATE(sp, ipsl_hash, ipsec_sel_hash, bucket)) { 3133 if (bcmp(&sp->ipsl_key, selkey, 3134 offsetof(ipsec_selkey_t, ipsl_pol_hval)) == 0) 3135 break; 3136 } 3137 if (sp != NULL) { 3138 sp->ipsl_refs++; 3139 3140 HASH_UNLOCK(ipsec_sel_hash, bucket); 3141 return (sp); 3142 } 3143 3144 sp = kmem_cache_alloc(ipsec_sel_cache, KM_NOSLEEP); 3145 if (sp == NULL) { 3146 HASH_UNLOCK(ipsec_sel_hash, bucket); 3147 return (NULL); 3148 } 3149 3150 HASH_INSERT(sp, ipsl_hash, ipsec_sel_hash, bucket); 3151 sp->ipsl_refs = 2; /* one for hash table, one for caller */ 3152 sp->ipsl_key = *selkey; 3153 /* Set to uninitalized and have insertion into polhead fix things. */ 3154 if (selkey->ipsl_sel_hval != IPSEC_SEL_NOHASH) 3155 sp->ipsl_key.ipsl_pol_hval = 0; 3156 else 3157 sp->ipsl_key.ipsl_pol_hval = IPSEC_SEL_NOHASH; 3158 3159 HASH_UNLOCK(ipsec_sel_hash, bucket); 3160 3161 return (sp); 3162 } 3163 3164 static void 3165 ipsec_sel_rel(ipsec_sel_t **spp) 3166 { 3167 ipsec_sel_t *sp = *spp; 3168 int hval = sp->ipsl_key.ipsl_sel_hval; 3169 *spp = NULL; 3170 3171 if (hval == IPSEC_SEL_NOHASH) 3172 hval = 0; 3173 3174 ASSERT(!HASH_LOCKED(ipsec_sel_hash, hval)); 3175 HASH_LOCK(ipsec_sel_hash, hval); 3176 if (--sp->ipsl_refs == 1) { 3177 HASH_UNCHAIN(sp, ipsl_hash, ipsec_sel_hash, hval); 3178 sp->ipsl_refs--; 3179 HASH_UNLOCK(ipsec_sel_hash, hval); 3180 ASSERT(sp->ipsl_refs == 0); 3181 kmem_cache_free(ipsec_sel_cache, sp); 3182 /* Caller unlocks */ 3183 return; 3184 } 3185 3186 HASH_UNLOCK(ipsec_sel_hash, hval); 3187 } 3188 3189 /* 3190 * Free a policy rule which we know is no longer being referenced. 3191 */ 3192 void 3193 ipsec_policy_free(ipsec_policy_t *ipp) 3194 { 3195 ASSERT(ipp->ipsp_refs == 0); 3196 ASSERT(ipp->ipsp_sel != NULL); 3197 ASSERT(ipp->ipsp_act != NULL); 3198 ipsec_sel_rel(&ipp->ipsp_sel); 3199 IPACT_REFRELE(ipp->ipsp_act); 3200 kmem_cache_free(ipsec_pol_cache, ipp); 3201 } 3202 3203 /* 3204 * Construction of new policy rules; construct a policy, and add it to 3205 * the appropriate tables. 3206 */ 3207 ipsec_policy_t * 3208 ipsec_policy_create(ipsec_selkey_t *keys, const ipsec_act_t *a, 3209 int nacts, int prio, uint64_t *index_ptr) 3210 { 3211 ipsec_action_t *ap; 3212 ipsec_sel_t *sp; 3213 ipsec_policy_t *ipp; 3214 3215 if (index_ptr == NULL) 3216 index_ptr = &ipsec_next_policy_index; 3217 3218 ipp = kmem_cache_alloc(ipsec_pol_cache, KM_NOSLEEP); 3219 ap = ipsec_act_find(a, nacts); 3220 sp = ipsec_find_sel(keys); 3221 3222 if ((ap == NULL) || (sp == NULL) || (ipp == NULL)) { 3223 if (ap != NULL) { 3224 IPACT_REFRELE(ap); 3225 } 3226 if (sp != NULL) 3227 ipsec_sel_rel(&sp); 3228 if (ipp != NULL) 3229 kmem_cache_free(ipsec_pol_cache, ipp); 3230 return (NULL); 3231 } 3232 3233 HASH_NULL(ipp, ipsp_hash); 3234 3235 ipp->ipsp_refs = 1; /* caller's reference */ 3236 ipp->ipsp_sel = sp; 3237 ipp->ipsp_act = ap; 3238 ipp->ipsp_prio = prio; /* rule priority */ 3239 ipp->ipsp_index = *index_ptr; 3240 (*index_ptr)++; 3241 3242 return (ipp); 3243 } 3244 3245 static void 3246 ipsec_update_present_flags() 3247 { 3248 boolean_t hashpol = (avl_numnodes(&system_policy.iph_rulebyid) > 0); 3249 3250 if (hashpol) { 3251 ipsec_outbound_v4_policy_present = B_TRUE; 3252 ipsec_outbound_v6_policy_present = B_TRUE; 3253 ipsec_inbound_v4_policy_present = B_TRUE; 3254 ipsec_inbound_v6_policy_present = B_TRUE; 3255 return; 3256 } 3257 3258 ipsec_outbound_v4_policy_present = (NULL != 3259 system_policy.iph_root[IPSEC_TYPE_OUTBOUND]. 3260 ipr_nonhash[IPSEC_AF_V4]); 3261 ipsec_outbound_v6_policy_present = (NULL != 3262 system_policy.iph_root[IPSEC_TYPE_OUTBOUND]. 3263 ipr_nonhash[IPSEC_AF_V6]); 3264 ipsec_inbound_v4_policy_present = (NULL != 3265 system_policy.iph_root[IPSEC_TYPE_INBOUND]. 3266 ipr_nonhash[IPSEC_AF_V4]); 3267 ipsec_inbound_v6_policy_present = (NULL != 3268 system_policy.iph_root[IPSEC_TYPE_INBOUND]. 3269 ipr_nonhash[IPSEC_AF_V6]); 3270 } 3271 3272 boolean_t 3273 ipsec_policy_delete(ipsec_policy_head_t *php, ipsec_selkey_t *keys, int dir) 3274 { 3275 ipsec_sel_t *sp; 3276 ipsec_policy_t *ip, *nip, *head; 3277 int af; 3278 ipsec_policy_root_t *pr = &php->iph_root[dir]; 3279 3280 sp = ipsec_find_sel(keys); 3281 3282 if (sp == NULL) 3283 return (B_FALSE); 3284 3285 af = (sp->ipsl_key.ipsl_valid & IPSL_IPV4) ? IPSEC_AF_V4 : IPSEC_AF_V6; 3286 3287 rw_enter(&php->iph_lock, RW_WRITER); 3288 3289 if (sp->ipsl_key.ipsl_pol_hval == IPSEC_SEL_NOHASH) { 3290 head = pr->ipr_nonhash[af]; 3291 } else { 3292 head = pr->ipr_hash[sp->ipsl_key.ipsl_pol_hval].hash_head; 3293 } 3294 3295 for (ip = head; ip != NULL; ip = nip) { 3296 nip = ip->ipsp_hash.hash_next; 3297 if (ip->ipsp_sel != sp) { 3298 continue; 3299 } 3300 3301 IPPOL_UNCHAIN(php, ip); 3302 3303 php->iph_gen++; 3304 ipsec_update_present_flags(); 3305 3306 rw_exit(&php->iph_lock); 3307 3308 ipsec_sel_rel(&sp); 3309 3310 return (B_TRUE); 3311 } 3312 3313 rw_exit(&php->iph_lock); 3314 ipsec_sel_rel(&sp); 3315 return (B_FALSE); 3316 } 3317 3318 int 3319 ipsec_policy_delete_index(ipsec_policy_head_t *php, uint64_t policy_index) 3320 { 3321 boolean_t found = B_FALSE; 3322 ipsec_policy_t ipkey; 3323 ipsec_policy_t *ip; 3324 avl_index_t where; 3325 3326 (void) memset(&ipkey, 0, sizeof (ipkey)); 3327 ipkey.ipsp_index = policy_index; 3328 3329 rw_enter(&php->iph_lock, RW_WRITER); 3330 3331 /* 3332 * We could be cleverer here about the walk. 3333 * but well, (k+1)*log(N) will do for now (k==number of matches, 3334 * N==number of table entries 3335 */ 3336 for (;;) { 3337 ip = (ipsec_policy_t *)avl_find(&php->iph_rulebyid, 3338 (void *)&ipkey, &where); 3339 ASSERT(ip == NULL); 3340 3341 ip = avl_nearest(&php->iph_rulebyid, where, AVL_AFTER); 3342 3343 if (ip == NULL) 3344 break; 3345 3346 if (ip->ipsp_index != policy_index) { 3347 ASSERT(ip->ipsp_index > policy_index); 3348 break; 3349 } 3350 3351 IPPOL_UNCHAIN(php, ip); 3352 found = B_TRUE; 3353 } 3354 3355 if (found) { 3356 php->iph_gen++; 3357 ipsec_update_present_flags(); 3358 } 3359 3360 rw_exit(&php->iph_lock); 3361 3362 return (found ? 0 : ENOENT); 3363 } 3364 3365 /* 3366 * Given a constructed ipsec_policy_t policy rule, see if it can be entered 3367 * into the correct policy ruleset. As a side-effect, it sets the hash 3368 * entries on "ipp"'s ipsp_pol_hval. 3369 * 3370 * Returns B_TRUE if it can be entered, B_FALSE if it can't be (because a 3371 * duplicate policy exists with exactly the same selectors), or an icmp 3372 * rule exists with a different encryption/authentication action. 3373 */ 3374 boolean_t 3375 ipsec_check_policy(ipsec_policy_head_t *php, ipsec_policy_t *ipp, int direction) 3376 { 3377 ipsec_policy_root_t *pr = &php->iph_root[direction]; 3378 int af = -1; 3379 ipsec_policy_t *p2, *head; 3380 uint8_t check_proto; 3381 ipsec_selkey_t *selkey = &ipp->ipsp_sel->ipsl_key; 3382 uint32_t valid = selkey->ipsl_valid; 3383 3384 if (valid & IPSL_IPV6) { 3385 ASSERT(!(valid & IPSL_IPV4)); 3386 af = IPSEC_AF_V6; 3387 check_proto = IPPROTO_ICMPV6; 3388 } else { 3389 ASSERT(valid & IPSL_IPV4); 3390 af = IPSEC_AF_V4; 3391 check_proto = IPPROTO_ICMP; 3392 } 3393 3394 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3395 3396 /* 3397 * Double-check that we don't have any duplicate selectors here. 3398 * Because selectors are interned below, we need only compare pointers 3399 * for equality. 3400 */ 3401 if (selkey->ipsl_sel_hval == IPSEC_SEL_NOHASH) { 3402 head = pr->ipr_nonhash[af]; 3403 } else { 3404 selkey->ipsl_pol_hval = 3405 (selkey->ipsl_valid & IPSL_IPV4) ? 3406 IPSEC_IPV4_HASH(selkey->ipsl_remote.ipsad_v4, 3407 pr->ipr_nchains) : 3408 IPSEC_IPV6_HASH(selkey->ipsl_remote.ipsad_v6, 3409 pr->ipr_nchains); 3410 3411 head = pr->ipr_hash[selkey->ipsl_pol_hval].hash_head; 3412 } 3413 3414 for (p2 = head; p2 != NULL; p2 = p2->ipsp_hash.hash_next) { 3415 if (p2->ipsp_sel == ipp->ipsp_sel) 3416 return (B_FALSE); 3417 } 3418 3419 /* 3420 * If it's ICMP and not a drop or pass rule, run through the ICMP 3421 * rules and make sure the action is either new or the same as any 3422 * other actions. We don't have to check the full chain because 3423 * discard and bypass will override all other actions 3424 */ 3425 3426 if (valid & IPSL_PROTOCOL && 3427 selkey->ipsl_proto == check_proto && 3428 (ipp->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_APPLY)) { 3429 3430 for (p2 = head; p2 != NULL; p2 = p2->ipsp_hash.hash_next) { 3431 3432 if (p2->ipsp_sel->ipsl_key.ipsl_valid & IPSL_PROTOCOL && 3433 p2->ipsp_sel->ipsl_key.ipsl_proto == check_proto && 3434 (p2->ipsp_act->ipa_act.ipa_type == 3435 IPSEC_ACT_APPLY)) { 3436 return (ipsec_compare_action(p2, ipp)); 3437 } 3438 } 3439 } 3440 3441 return (B_TRUE); 3442 } 3443 3444 /* 3445 * compare the action chains of two policies for equality 3446 * B_TRUE -> effective equality 3447 */ 3448 3449 static boolean_t 3450 ipsec_compare_action(ipsec_policy_t *p1, ipsec_policy_t *p2) 3451 { 3452 3453 ipsec_action_t *act1, *act2; 3454 3455 /* We have a valid rule. Let's compare the actions */ 3456 if (p1->ipsp_act == p2->ipsp_act) { 3457 /* same action. We are good */ 3458 return (B_TRUE); 3459 } 3460 3461 /* we have to walk the chain */ 3462 3463 act1 = p1->ipsp_act; 3464 act2 = p2->ipsp_act; 3465 3466 while (act1 != NULL && act2 != NULL) { 3467 3468 /* otherwise, Are we close enough? */ 3469 if (act1->ipa_allow_clear != act2->ipa_allow_clear || 3470 act1->ipa_want_ah != act2->ipa_want_ah || 3471 act1->ipa_want_esp != act2->ipa_want_esp || 3472 act1->ipa_want_se != act2->ipa_want_se) { 3473 /* Nope, we aren't */ 3474 return (B_FALSE); 3475 } 3476 3477 if (act1->ipa_want_ah) { 3478 if (act1->ipa_act.ipa_apply.ipp_auth_alg != 3479 act2->ipa_act.ipa_apply.ipp_auth_alg) { 3480 return (B_FALSE); 3481 } 3482 3483 if (act1->ipa_act.ipa_apply.ipp_ah_minbits != 3484 act2->ipa_act.ipa_apply.ipp_ah_minbits || 3485 act1->ipa_act.ipa_apply.ipp_ah_maxbits != 3486 act2->ipa_act.ipa_apply.ipp_ah_maxbits) { 3487 return (B_FALSE); 3488 } 3489 } 3490 3491 if (act1->ipa_want_esp) { 3492 if (act1->ipa_act.ipa_apply.ipp_use_esp != 3493 act2->ipa_act.ipa_apply.ipp_use_esp || 3494 act1->ipa_act.ipa_apply.ipp_use_espa != 3495 act2->ipa_act.ipa_apply.ipp_use_espa) { 3496 return (B_FALSE); 3497 } 3498 3499 if (act1->ipa_act.ipa_apply.ipp_use_esp) { 3500 if (act1->ipa_act.ipa_apply.ipp_encr_alg != 3501 act2->ipa_act.ipa_apply.ipp_encr_alg) { 3502 return (B_FALSE); 3503 } 3504 3505 if (act1->ipa_act.ipa_apply.ipp_espe_minbits != 3506 act2->ipa_act.ipa_apply.ipp_espe_minbits || 3507 act1->ipa_act.ipa_apply.ipp_espe_maxbits != 3508 act2->ipa_act.ipa_apply.ipp_espe_maxbits) { 3509 return (B_FALSE); 3510 } 3511 } 3512 3513 if (act1->ipa_act.ipa_apply.ipp_use_espa) { 3514 if (act1->ipa_act.ipa_apply.ipp_esp_auth_alg != 3515 act2->ipa_act.ipa_apply.ipp_esp_auth_alg) { 3516 return (B_FALSE); 3517 } 3518 3519 if (act1->ipa_act.ipa_apply.ipp_espa_minbits != 3520 act2->ipa_act.ipa_apply.ipp_espa_minbits || 3521 act1->ipa_act.ipa_apply.ipp_espa_maxbits != 3522 act2->ipa_act.ipa_apply.ipp_espa_maxbits) { 3523 return (B_FALSE); 3524 } 3525 } 3526 3527 } 3528 3529 act1 = act1->ipa_next; 3530 act2 = act2->ipa_next; 3531 } 3532 3533 if (act1 != NULL || act2 != NULL) { 3534 return (B_FALSE); 3535 } 3536 3537 return (B_TRUE); 3538 } 3539 3540 3541 /* 3542 * Given a constructed ipsec_policy_t policy rule, enter it into 3543 * the correct policy ruleset. 3544 * 3545 * ipsec_check_policy() is assumed to have succeeded first (to check for 3546 * duplicates). 3547 */ 3548 void 3549 ipsec_enter_policy(ipsec_policy_head_t *php, ipsec_policy_t *ipp, int direction) 3550 { 3551 ipsec_policy_root_t *pr = &php->iph_root[direction]; 3552 ipsec_selkey_t *selkey = &ipp->ipsp_sel->ipsl_key; 3553 uint32_t valid = selkey->ipsl_valid; 3554 uint32_t hval = selkey->ipsl_pol_hval; 3555 int af = -1; 3556 3557 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3558 3559 if (valid & IPSL_IPV6) { 3560 ASSERT(!(valid & IPSL_IPV4)); 3561 af = IPSEC_AF_V6; 3562 } else { 3563 ASSERT(valid & IPSL_IPV4); 3564 af = IPSEC_AF_V4; 3565 } 3566 3567 php->iph_gen++; 3568 3569 if (hval == IPSEC_SEL_NOHASH) { 3570 HASHLIST_INSERT(ipp, ipsp_hash, pr->ipr_nonhash[af]); 3571 } else { 3572 HASH_LOCK(pr->ipr_hash, hval); 3573 HASH_INSERT(ipp, ipsp_hash, pr->ipr_hash, hval); 3574 HASH_UNLOCK(pr->ipr_hash, hval); 3575 } 3576 3577 ipsec_insert_always(&php->iph_rulebyid, ipp); 3578 3579 ipsec_update_present_flags(); 3580 } 3581 3582 static void 3583 ipsec_ipr_flush(ipsec_policy_head_t *php, ipsec_policy_root_t *ipr) 3584 { 3585 ipsec_policy_t *ip, *nip; 3586 3587 int af, chain, nchain; 3588 3589 for (af = 0; af < IPSEC_NAF; af++) { 3590 for (ip = ipr->ipr_nonhash[af]; ip != NULL; ip = nip) { 3591 nip = ip->ipsp_hash.hash_next; 3592 IPPOL_UNCHAIN(php, ip); 3593 } 3594 ipr->ipr_nonhash[af] = NULL; 3595 } 3596 nchain = ipr->ipr_nchains; 3597 3598 for (chain = 0; chain < nchain; chain++) { 3599 for (ip = ipr->ipr_hash[chain].hash_head; ip != NULL; 3600 ip = nip) { 3601 nip = ip->ipsp_hash.hash_next; 3602 IPPOL_UNCHAIN(php, ip); 3603 } 3604 ipr->ipr_hash[chain].hash_head = NULL; 3605 } 3606 } 3607 3608 void 3609 ipsec_polhead_flush(ipsec_policy_head_t *php) 3610 { 3611 int dir; 3612 3613 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3614 3615 for (dir = 0; dir < IPSEC_NTYPES; dir++) 3616 ipsec_ipr_flush(php, &php->iph_root[dir]); 3617 3618 ipsec_update_present_flags(); 3619 } 3620 3621 void 3622 ipsec_polhead_free(ipsec_policy_head_t *php) 3623 { 3624 int dir; 3625 3626 ASSERT(php->iph_refs == 0); 3627 rw_enter(&php->iph_lock, RW_WRITER); 3628 ipsec_polhead_flush(php); 3629 rw_exit(&php->iph_lock); 3630 rw_destroy(&php->iph_lock); 3631 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 3632 ipsec_policy_root_t *ipr = &php->iph_root[dir]; 3633 int chain; 3634 3635 for (chain = 0; chain < ipr->ipr_nchains; chain++) 3636 mutex_destroy(&(ipr->ipr_hash[chain].hash_lock)); 3637 3638 } 3639 ipsec_polhead_free_table(php); 3640 kmem_free(php, sizeof (*php)); 3641 } 3642 3643 static void 3644 ipsec_ipr_init(ipsec_policy_root_t *ipr) 3645 { 3646 int af; 3647 3648 ipr->ipr_nchains = 0; 3649 ipr->ipr_hash = NULL; 3650 3651 for (af = 0; af < IPSEC_NAF; af++) { 3652 ipr->ipr_nonhash[af] = NULL; 3653 } 3654 } 3655 3656 ipsec_policy_head_t * 3657 ipsec_polhead_create(void) 3658 { 3659 ipsec_policy_head_t *php; 3660 3661 php = kmem_alloc(sizeof (*php), KM_NOSLEEP); 3662 if (php == NULL) 3663 return (php); 3664 3665 rw_init(&php->iph_lock, NULL, RW_DEFAULT, NULL); 3666 php->iph_refs = 1; 3667 php->iph_gen = 0; 3668 3669 ipsec_ipr_init(&php->iph_root[IPSEC_TYPE_INBOUND]); 3670 ipsec_ipr_init(&php->iph_root[IPSEC_TYPE_OUTBOUND]); 3671 3672 avl_create(&php->iph_rulebyid, ipsec_policy_cmpbyid, 3673 sizeof (ipsec_policy_t), offsetof(ipsec_policy_t, ipsp_byid)); 3674 3675 return (php); 3676 } 3677 3678 /* 3679 * Clone the policy head into a new polhead; release one reference to the 3680 * old one and return the only reference to the new one. 3681 * If the old one had a refcount of 1, just return it. 3682 */ 3683 ipsec_policy_head_t * 3684 ipsec_polhead_split(ipsec_policy_head_t *php) 3685 { 3686 ipsec_policy_head_t *nphp; 3687 3688 if (php == NULL) 3689 return (ipsec_polhead_create()); 3690 else if (php->iph_refs == 1) 3691 return (php); 3692 3693 nphp = ipsec_polhead_create(); 3694 if (nphp == NULL) 3695 return (NULL); 3696 3697 if (ipsec_copy_polhead(php, nphp) != 0) { 3698 ipsec_polhead_free(nphp); 3699 return (NULL); 3700 } 3701 IPPH_REFRELE(php); 3702 return (nphp); 3703 } 3704 3705 /* 3706 * When sending a response to a ICMP request or generating a RST 3707 * in the TCP case, the outbound packets need to go at the same level 3708 * of protection as the incoming ones i.e we associate our outbound 3709 * policy with how the packet came in. We call this after we have 3710 * accepted the incoming packet which may or may not have been in 3711 * clear and hence we are sending the reply back with the policy 3712 * matching the incoming datagram's policy. 3713 * 3714 * NOTE : This technology serves two purposes : 3715 * 3716 * 1) If we have multiple outbound policies, we send out a reply 3717 * matching with how it came in rather than matching the outbound 3718 * policy. 3719 * 3720 * 2) For assymetric policies, we want to make sure that incoming 3721 * and outgoing has the same level of protection. Assymetric 3722 * policies exist only with global policy where we may not have 3723 * both outbound and inbound at the same time. 3724 * 3725 * NOTE2: This function is called by cleartext cases, so it needs to be 3726 * in IP proper. 3727 */ 3728 boolean_t 3729 ipsec_in_to_out(mblk_t *ipsec_mp, ipha_t *ipha, ip6_t *ip6h) 3730 { 3731 ipsec_in_t *ii; 3732 ipsec_out_t *io; 3733 boolean_t v4; 3734 mblk_t *mp; 3735 boolean_t secure, attach_if; 3736 uint_t ifindex; 3737 ipsec_selector_t sel; 3738 ipsec_action_t *reflect_action = NULL; 3739 zoneid_t zoneid; 3740 3741 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 3742 3743 bzero((void*)&sel, sizeof (sel)); 3744 3745 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 3746 3747 mp = ipsec_mp->b_cont; 3748 ASSERT(mp != NULL); 3749 3750 if (ii->ipsec_in_action != NULL) { 3751 /* transfer reference.. */ 3752 reflect_action = ii->ipsec_in_action; 3753 ii->ipsec_in_action = NULL; 3754 } else if (!ii->ipsec_in_loopback) 3755 reflect_action = ipsec_in_to_out_action(ii); 3756 secure = ii->ipsec_in_secure; 3757 attach_if = ii->ipsec_in_attach_if; 3758 ifindex = ii->ipsec_in_ill_index; 3759 zoneid = ii->ipsec_in_zoneid; 3760 ASSERT(zoneid != ALL_ZONES); 3761 v4 = ii->ipsec_in_v4; 3762 3763 ipsec_in_release_refs(ii); 3764 3765 /* 3766 * The caller is going to send the datagram out which might 3767 * go on the wire or delivered locally through ip_wput_local. 3768 * 3769 * 1) If it goes out on the wire, new associations will be 3770 * obtained. 3771 * 2) If it is delivered locally, ip_wput_local will convert 3772 * this IPSEC_OUT to a IPSEC_IN looking at the requests. 3773 */ 3774 3775 io = (ipsec_out_t *)ipsec_mp->b_rptr; 3776 bzero(io, sizeof (ipsec_out_t)); 3777 io->ipsec_out_type = IPSEC_OUT; 3778 io->ipsec_out_len = sizeof (ipsec_out_t); 3779 io->ipsec_out_frtn.free_func = ipsec_out_free; 3780 io->ipsec_out_frtn.free_arg = (char *)io; 3781 io->ipsec_out_act = reflect_action; 3782 3783 if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0)) 3784 return (B_FALSE); 3785 3786 io->ipsec_out_src_port = sel.ips_local_port; 3787 io->ipsec_out_dst_port = sel.ips_remote_port; 3788 io->ipsec_out_proto = sel.ips_protocol; 3789 io->ipsec_out_icmp_type = sel.ips_icmp_type; 3790 io->ipsec_out_icmp_code = sel.ips_icmp_code; 3791 3792 /* 3793 * Don't use global policy for this, as we want 3794 * to use the same protection that was applied to the inbound packet. 3795 */ 3796 io->ipsec_out_use_global_policy = B_FALSE; 3797 io->ipsec_out_proc_begin = B_FALSE; 3798 io->ipsec_out_secure = secure; 3799 io->ipsec_out_v4 = v4; 3800 io->ipsec_out_attach_if = attach_if; 3801 io->ipsec_out_ill_index = ifindex; 3802 io->ipsec_out_zoneid = zoneid; 3803 return (B_TRUE); 3804 } 3805 3806 mblk_t * 3807 ipsec_in_tag(mblk_t *mp, mblk_t *cont) 3808 { 3809 ipsec_in_t *ii = (ipsec_in_t *)mp->b_rptr; 3810 ipsec_in_t *nii; 3811 mblk_t *nmp; 3812 frtn_t nfrtn; 3813 3814 ASSERT(ii->ipsec_in_type == IPSEC_IN); 3815 ASSERT(ii->ipsec_in_len == sizeof (ipsec_in_t)); 3816 3817 nmp = ipsec_in_alloc(ii->ipsec_in_v4); 3818 3819 ASSERT(nmp->b_datap->db_type == M_CTL); 3820 ASSERT(nmp->b_wptr == (nmp->b_rptr + sizeof (ipsec_info_t))); 3821 3822 /* 3823 * Bump refcounts. 3824 */ 3825 if (ii->ipsec_in_ah_sa != NULL) 3826 IPSA_REFHOLD(ii->ipsec_in_ah_sa); 3827 if (ii->ipsec_in_esp_sa != NULL) 3828 IPSA_REFHOLD(ii->ipsec_in_esp_sa); 3829 if (ii->ipsec_in_policy != NULL) 3830 IPPH_REFHOLD(ii->ipsec_in_policy); 3831 3832 /* 3833 * Copy everything, but preserve the free routine provided by 3834 * ipsec_in_alloc(). 3835 */ 3836 nii = (ipsec_in_t *)nmp->b_rptr; 3837 nfrtn = nii->ipsec_in_frtn; 3838 bcopy(ii, nii, sizeof (*ii)); 3839 nii->ipsec_in_frtn = nfrtn; 3840 3841 nmp->b_cont = cont; 3842 3843 return (nmp); 3844 } 3845 3846 mblk_t * 3847 ipsec_out_tag(mblk_t *mp, mblk_t *cont) 3848 { 3849 ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; 3850 ipsec_out_t *nio; 3851 mblk_t *nmp; 3852 frtn_t nfrtn; 3853 3854 ASSERT(io->ipsec_out_type == IPSEC_OUT); 3855 ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t)); 3856 3857 nmp = ipsec_alloc_ipsec_out(); 3858 if (nmp == NULL) { 3859 ip_drop_packet_chain(cont, B_FALSE, NULL, NULL, 3860 &ipdrops_spd_nomem, &spd_dropper); 3861 return (NULL); 3862 } 3863 ASSERT(nmp->b_datap->db_type == M_CTL); 3864 ASSERT(nmp->b_wptr == (nmp->b_rptr + sizeof (ipsec_info_t))); 3865 3866 /* 3867 * Bump refcounts. 3868 */ 3869 if (io->ipsec_out_ah_sa != NULL) 3870 IPSA_REFHOLD(io->ipsec_out_ah_sa); 3871 if (io->ipsec_out_esp_sa != NULL) 3872 IPSA_REFHOLD(io->ipsec_out_esp_sa); 3873 if (io->ipsec_out_polhead != NULL) 3874 IPPH_REFHOLD(io->ipsec_out_polhead); 3875 if (io->ipsec_out_policy != NULL) 3876 IPPOL_REFHOLD(io->ipsec_out_policy); 3877 if (io->ipsec_out_act != NULL) 3878 IPACT_REFHOLD(io->ipsec_out_act); 3879 if (io->ipsec_out_latch != NULL) 3880 IPLATCH_REFHOLD(io->ipsec_out_latch); 3881 if (io->ipsec_out_cred != NULL) 3882 crhold(io->ipsec_out_cred); 3883 3884 /* 3885 * Copy everything, but preserve the free routine provided by 3886 * ipsec_alloc_ipsec_out(). 3887 */ 3888 nio = (ipsec_out_t *)nmp->b_rptr; 3889 nfrtn = nio->ipsec_out_frtn; 3890 bcopy(io, nio, sizeof (*io)); 3891 nio->ipsec_out_frtn = nfrtn; 3892 3893 nmp->b_cont = cont; 3894 3895 return (nmp); 3896 } 3897 3898 static void 3899 ipsec_out_release_refs(ipsec_out_t *io) 3900 { 3901 ASSERT(io->ipsec_out_type == IPSEC_OUT); 3902 ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t)); 3903 3904 /* Note: IPSA_REFRELE is multi-line macro */ 3905 if (io->ipsec_out_ah_sa != NULL) 3906 IPSA_REFRELE(io->ipsec_out_ah_sa); 3907 if (io->ipsec_out_esp_sa != NULL) 3908 IPSA_REFRELE(io->ipsec_out_esp_sa); 3909 if (io->ipsec_out_polhead != NULL) 3910 IPPH_REFRELE(io->ipsec_out_polhead); 3911 if (io->ipsec_out_policy != NULL) 3912 IPPOL_REFRELE(io->ipsec_out_policy); 3913 if (io->ipsec_out_act != NULL) 3914 IPACT_REFRELE(io->ipsec_out_act); 3915 if (io->ipsec_out_cred != NULL) { 3916 crfree(io->ipsec_out_cred); 3917 io->ipsec_out_cred = NULL; 3918 } 3919 if (io->ipsec_out_latch) { 3920 IPLATCH_REFRELE(io->ipsec_out_latch); 3921 io->ipsec_out_latch = NULL; 3922 } 3923 } 3924 3925 static void 3926 ipsec_out_free(void *arg) 3927 { 3928 ipsec_out_t *io = (ipsec_out_t *)arg; 3929 ipsec_out_release_refs(io); 3930 kmem_cache_free(ipsec_info_cache, arg); 3931 } 3932 3933 static void 3934 ipsec_in_release_refs(ipsec_in_t *ii) 3935 { 3936 /* Note: IPSA_REFRELE is multi-line macro */ 3937 if (ii->ipsec_in_ah_sa != NULL) 3938 IPSA_REFRELE(ii->ipsec_in_ah_sa); 3939 if (ii->ipsec_in_esp_sa != NULL) 3940 IPSA_REFRELE(ii->ipsec_in_esp_sa); 3941 if (ii->ipsec_in_policy != NULL) 3942 IPPH_REFRELE(ii->ipsec_in_policy); 3943 if (ii->ipsec_in_da != NULL) { 3944 freeb(ii->ipsec_in_da); 3945 ii->ipsec_in_da = NULL; 3946 } 3947 } 3948 3949 static void 3950 ipsec_in_free(void *arg) 3951 { 3952 ipsec_in_t *ii = (ipsec_in_t *)arg; 3953 ipsec_in_release_refs(ii); 3954 kmem_cache_free(ipsec_info_cache, arg); 3955 } 3956 3957 /* 3958 * This is called only for outbound datagrams if the datagram needs to 3959 * go out secure. A NULL mp can be passed to get an ipsec_out. This 3960 * facility is used by ip_unbind. 3961 * 3962 * NOTE : o As the data part could be modified by ipsec_out_process etc. 3963 * we can't make it fast by calling a dup. 3964 */ 3965 mblk_t * 3966 ipsec_alloc_ipsec_out() 3967 { 3968 mblk_t *ipsec_mp; 3969 3970 ipsec_out_t *io = kmem_cache_alloc(ipsec_info_cache, KM_NOSLEEP); 3971 3972 if (io == NULL) 3973 return (NULL); 3974 3975 bzero(io, sizeof (ipsec_out_t)); 3976 3977 io->ipsec_out_type = IPSEC_OUT; 3978 io->ipsec_out_len = sizeof (ipsec_out_t); 3979 io->ipsec_out_frtn.free_func = ipsec_out_free; 3980 io->ipsec_out_frtn.free_arg = (char *)io; 3981 3982 /* 3983 * Set the zoneid to ALL_ZONES which is used as an invalid value. Code 3984 * using ipsec_out_zoneid should assert that the zoneid has been set to 3985 * a sane value. 3986 */ 3987 io->ipsec_out_zoneid = ALL_ZONES; 3988 3989 ipsec_mp = desballoc((uint8_t *)io, sizeof (ipsec_info_t), BPRI_HI, 3990 &io->ipsec_out_frtn); 3991 if (ipsec_mp == NULL) { 3992 ipsec_out_free(io); 3993 3994 return (NULL); 3995 } 3996 ipsec_mp->b_datap->db_type = M_CTL; 3997 ipsec_mp->b_wptr = ipsec_mp->b_rptr + sizeof (ipsec_info_t); 3998 3999 return (ipsec_mp); 4000 } 4001 4002 /* 4003 * Attach an IPSEC_OUT; use pol for policy if it is non-null. 4004 * Otherwise initialize using conn. 4005 * 4006 * If pol is non-null, we consume a reference to it. 4007 */ 4008 mblk_t * 4009 ipsec_attach_ipsec_out(mblk_t *mp, conn_t *connp, ipsec_policy_t *pol, 4010 uint8_t proto) 4011 { 4012 mblk_t *ipsec_mp; 4013 queue_t *q; 4014 short mid = 0; 4015 4016 ASSERT((pol != NULL) || (connp != NULL)); 4017 4018 ipsec_mp = ipsec_alloc_ipsec_out(); 4019 if (ipsec_mp == NULL) { 4020 q = CONNP_TO_WQ(connp); 4021 if (q != NULL) { 4022 mid = q->q_qinfo->qi_minfo->mi_idnum; 4023 } 4024 ipsec_rl_strlog(mid, 0, 0, SL_ERROR|SL_NOTE, 4025 "ipsec_attach_ipsec_out: Allocation failure\n"); 4026 BUMP_MIB(&ip_mib, ipOutDiscards); 4027 ip_drop_packet(mp, B_FALSE, NULL, NULL, &ipdrops_spd_nomem, 4028 &spd_dropper); 4029 return (NULL); 4030 } 4031 ipsec_mp->b_cont = mp; 4032 return (ipsec_init_ipsec_out(ipsec_mp, connp, pol, proto)); 4033 } 4034 4035 /* 4036 * Initialize the IPSEC_OUT (ipsec_mp) using pol if it is non-null. 4037 * Otherwise initialize using conn. 4038 * 4039 * If pol is non-null, we consume a reference to it. 4040 */ 4041 mblk_t * 4042 ipsec_init_ipsec_out(mblk_t *ipsec_mp, conn_t *connp, ipsec_policy_t *pol, 4043 uint8_t proto) 4044 { 4045 mblk_t *mp; 4046 ipsec_out_t *io; 4047 ipsec_policy_t *p; 4048 ipha_t *ipha; 4049 ip6_t *ip6h; 4050 4051 ASSERT((pol != NULL) || (connp != NULL)); 4052 4053 /* 4054 * If mp is NULL, we won't/should not be using it. 4055 */ 4056 mp = ipsec_mp->b_cont; 4057 4058 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 4059 ASSERT(ipsec_mp->b_wptr == (ipsec_mp->b_rptr + sizeof (ipsec_info_t))); 4060 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4061 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4062 ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t)); 4063 io->ipsec_out_latch = NULL; 4064 /* 4065 * Set the zoneid when we have the connp. 4066 * Otherwise, we're called from ip_wput_attach_policy() who will take 4067 * care of setting the zoneid. 4068 */ 4069 if (connp != NULL) 4070 io->ipsec_out_zoneid = connp->conn_zoneid; 4071 4072 if (mp != NULL) { 4073 ipha = (ipha_t *)mp->b_rptr; 4074 if (IPH_HDR_VERSION(ipha) == IP_VERSION) { 4075 io->ipsec_out_v4 = B_TRUE; 4076 ip6h = NULL; 4077 } else { 4078 io->ipsec_out_v4 = B_FALSE; 4079 ip6h = (ip6_t *)ipha; 4080 ipha = NULL; 4081 } 4082 } else { 4083 ASSERT(connp != NULL && connp->conn_policy_cached); 4084 ip6h = NULL; 4085 ipha = NULL; 4086 io->ipsec_out_v4 = !connp->conn_pkt_isv6; 4087 } 4088 4089 p = NULL; 4090 4091 /* 4092 * Take latched policies over global policy. Check here again for 4093 * this, in case we had conn_latch set while the packet was flying 4094 * around in IP. 4095 */ 4096 if (connp != NULL && connp->conn_latch != NULL) { 4097 p = connp->conn_latch->ipl_out_policy; 4098 io->ipsec_out_latch = connp->conn_latch; 4099 IPLATCH_REFHOLD(connp->conn_latch); 4100 if (p != NULL) { 4101 IPPOL_REFHOLD(p); 4102 } 4103 io->ipsec_out_src_port = connp->conn_lport; 4104 io->ipsec_out_dst_port = connp->conn_fport; 4105 io->ipsec_out_icmp_type = io->ipsec_out_icmp_code = 0; 4106 if (pol != NULL) 4107 IPPOL_REFRELE(pol); 4108 } else if (pol != NULL) { 4109 ipsec_selector_t sel; 4110 4111 bzero((void*)&sel, sizeof (sel)); 4112 4113 p = pol; 4114 /* 4115 * conn does not have the port information. Get 4116 * it from the packet. 4117 */ 4118 4119 if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0)) { 4120 /* Callee did ip_drop_packet(). */ 4121 return (NULL); 4122 } 4123 io->ipsec_out_src_port = sel.ips_local_port; 4124 io->ipsec_out_dst_port = sel.ips_remote_port; 4125 io->ipsec_out_icmp_type = sel.ips_icmp_type; 4126 io->ipsec_out_icmp_code = sel.ips_icmp_code; 4127 } 4128 4129 io->ipsec_out_proto = proto; 4130 io->ipsec_out_use_global_policy = B_TRUE; 4131 io->ipsec_out_secure = (p != NULL); 4132 io->ipsec_out_policy = p; 4133 4134 if (p == NULL) { 4135 if (connp->conn_policy != NULL) { 4136 io->ipsec_out_secure = B_TRUE; 4137 ASSERT(io->ipsec_out_latch == NULL); 4138 ASSERT(io->ipsec_out_use_global_policy == B_TRUE); 4139 io->ipsec_out_need_policy = B_TRUE; 4140 ASSERT(io->ipsec_out_polhead == NULL); 4141 IPPH_REFHOLD(connp->conn_policy); 4142 io->ipsec_out_polhead = connp->conn_policy; 4143 } 4144 } else { 4145 /* Handle explicit drop action. */ 4146 if (p->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_DISCARD || 4147 p->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_REJECT) { 4148 ip_drop_packet(ipsec_mp, B_FALSE, NULL, NULL, 4149 &ipdrops_spd_explicit, &spd_dropper); 4150 ipsec_mp = NULL; 4151 } 4152 } 4153 4154 return (ipsec_mp); 4155 } 4156 4157 /* 4158 * Allocate an IPSEC_IN mblk. This will be prepended to an inbound datagram 4159 * and keep track of what-if-any IPsec processing will be applied to the 4160 * datagram. 4161 */ 4162 mblk_t * 4163 ipsec_in_alloc(boolean_t isv4) 4164 { 4165 mblk_t *ipsec_in; 4166 ipsec_in_t *ii = kmem_cache_alloc(ipsec_info_cache, KM_NOSLEEP); 4167 4168 if (ii == NULL) 4169 return (NULL); 4170 4171 bzero(ii, sizeof (ipsec_info_t)); 4172 ii->ipsec_in_type = IPSEC_IN; 4173 ii->ipsec_in_len = sizeof (ipsec_in_t); 4174 4175 ii->ipsec_in_v4 = isv4; 4176 ii->ipsec_in_secure = B_TRUE; 4177 4178 ii->ipsec_in_frtn.free_func = ipsec_in_free; 4179 ii->ipsec_in_frtn.free_arg = (char *)ii; 4180 4181 ipsec_in = desballoc((uint8_t *)ii, sizeof (ipsec_info_t), BPRI_HI, 4182 &ii->ipsec_in_frtn); 4183 if (ipsec_in == NULL) { 4184 ip1dbg(("ipsec_in_alloc: IPSEC_IN allocation failure.\n")); 4185 ipsec_in_free(ii); 4186 return (NULL); 4187 } 4188 4189 ipsec_in->b_datap->db_type = M_CTL; 4190 ipsec_in->b_wptr += sizeof (ipsec_info_t); 4191 4192 return (ipsec_in); 4193 } 4194 4195 /* 4196 * This is called from ip_wput_local when a packet which needs 4197 * security is looped back, to convert the IPSEC_OUT to a IPSEC_IN 4198 * before fanout, where the policy check happens. In most of the 4199 * cases, IPSEC processing has *never* been done. There is one case 4200 * (ip_wput_ire_fragmentit -> ip_wput_frag -> icmp_frag_needed) where 4201 * the packet is destined for localhost, IPSEC processing has already 4202 * been done. 4203 * 4204 * Future: This could happen after SA selection has occurred for 4205 * outbound.. which will tell us who the src and dst identities are.. 4206 * Then it's just a matter of splicing the ah/esp SA pointers from the 4207 * ipsec_out_t to the ipsec_in_t. 4208 */ 4209 void 4210 ipsec_out_to_in(mblk_t *ipsec_mp) 4211 { 4212 ipsec_in_t *ii; 4213 ipsec_out_t *io; 4214 ipsec_policy_t *pol; 4215 ipsec_action_t *act; 4216 boolean_t v4, icmp_loopback; 4217 zoneid_t zoneid; 4218 4219 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 4220 4221 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4222 4223 v4 = io->ipsec_out_v4; 4224 zoneid = io->ipsec_out_zoneid; 4225 icmp_loopback = io->ipsec_out_icmp_loopback; 4226 4227 act = io->ipsec_out_act; 4228 if (act == NULL) { 4229 pol = io->ipsec_out_policy; 4230 if (pol != NULL) { 4231 act = pol->ipsp_act; 4232 IPACT_REFHOLD(act); 4233 } 4234 } 4235 io->ipsec_out_act = NULL; 4236 4237 ipsec_out_release_refs(io); 4238 4239 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 4240 bzero(ii, sizeof (ipsec_in_t)); 4241 ii->ipsec_in_type = IPSEC_IN; 4242 ii->ipsec_in_len = sizeof (ipsec_in_t); 4243 ii->ipsec_in_loopback = B_TRUE; 4244 ii->ipsec_in_frtn.free_func = ipsec_in_free; 4245 ii->ipsec_in_frtn.free_arg = (char *)ii; 4246 ii->ipsec_in_action = act; 4247 ii->ipsec_in_zoneid = zoneid; 4248 4249 /* 4250 * In most of the cases, we can't look at the ipsec_out_XXX_sa 4251 * because this never went through IPSEC processing. So, look at 4252 * the requests and infer whether it would have gone through 4253 * IPSEC processing or not. Initialize the "done" fields with 4254 * the requests. The possible values for "done" fields are : 4255 * 4256 * 1) zero, indicates that a particular preference was never 4257 * requested. 4258 * 2) non-zero, indicates that it could be IPSEC_PREF_REQUIRED/ 4259 * IPSEC_PREF_NEVER. If IPSEC_REQ_DONE is set, it means that 4260 * IPSEC processing has been completed. 4261 */ 4262 ii->ipsec_in_secure = B_TRUE; 4263 ii->ipsec_in_v4 = v4; 4264 ii->ipsec_in_icmp_loopback = icmp_loopback; 4265 ii->ipsec_in_attach_if = B_FALSE; 4266 } 4267 4268 /* 4269 * Consults global policy to see whether this datagram should 4270 * go out secure. If so it attaches a ipsec_mp in front and 4271 * returns. 4272 */ 4273 mblk_t * 4274 ip_wput_attach_policy(mblk_t *ipsec_mp, ipha_t *ipha, ip6_t *ip6h, ire_t *ire, 4275 conn_t *connp, boolean_t unspec_src, zoneid_t zoneid) 4276 { 4277 mblk_t *mp; 4278 ipsec_out_t *io = NULL; 4279 ipsec_selector_t sel; 4280 uint_t ill_index; 4281 boolean_t conn_dontroutex; 4282 boolean_t conn_multicast_loopx; 4283 boolean_t policy_present; 4284 4285 ASSERT((ipha != NULL && ip6h == NULL) || 4286 (ip6h != NULL && ipha == NULL)); 4287 4288 bzero((void*)&sel, sizeof (sel)); 4289 4290 if (ipha != NULL) 4291 policy_present = ipsec_outbound_v4_policy_present; 4292 else 4293 policy_present = ipsec_outbound_v6_policy_present; 4294 /* 4295 * Fast Path to see if there is any policy. 4296 */ 4297 if (!policy_present) { 4298 if (ipsec_mp->b_datap->db_type == M_CTL) { 4299 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4300 if (!io->ipsec_out_secure) { 4301 /* 4302 * If there is no global policy and ip_wput 4303 * or ip_wput_multicast has attached this mp 4304 * for multicast case, free the ipsec_mp and 4305 * return the original mp. 4306 */ 4307 mp = ipsec_mp->b_cont; 4308 freeb(ipsec_mp); 4309 ipsec_mp = mp; 4310 io = NULL; 4311 } 4312 ASSERT(io == NULL || !io->ipsec_out_tunnel); 4313 } 4314 if (((io == NULL) || (io->ipsec_out_polhead == NULL)) && 4315 ((connp == NULL) || (connp->conn_policy == NULL))) 4316 return (ipsec_mp); 4317 } 4318 4319 ill_index = 0; 4320 conn_multicast_loopx = conn_dontroutex = B_FALSE; 4321 mp = ipsec_mp; 4322 if (ipsec_mp->b_datap->db_type == M_CTL) { 4323 mp = ipsec_mp->b_cont; 4324 /* 4325 * This is a connection where we have some per-socket 4326 * policy or ip_wput has attached an ipsec_mp for 4327 * the multicast datagram. 4328 */ 4329 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4330 if (!io->ipsec_out_secure) { 4331 /* 4332 * This ipsec_mp was allocated in ip_wput or 4333 * ip_wput_multicast so that we will know the 4334 * value of ill_index, conn_dontroute, 4335 * conn_multicast_loop in the multicast case if 4336 * we inherit global policy here. 4337 */ 4338 ill_index = io->ipsec_out_ill_index; 4339 conn_dontroutex = io->ipsec_out_dontroute; 4340 conn_multicast_loopx = io->ipsec_out_multicast_loop; 4341 freeb(ipsec_mp); 4342 ipsec_mp = mp; 4343 io = NULL; 4344 } 4345 ASSERT(io == NULL || !io->ipsec_out_tunnel); 4346 } 4347 4348 if (ipha != NULL) { 4349 sel.ips_local_addr_v4 = (ipha->ipha_src != 0 ? 4350 ipha->ipha_src : ire->ire_src_addr); 4351 sel.ips_remote_addr_v4 = ip_get_dst(ipha); 4352 sel.ips_protocol = (uint8_t)ipha->ipha_protocol; 4353 sel.ips_isv4 = B_TRUE; 4354 } else { 4355 ushort_t hdr_len; 4356 uint8_t *nexthdrp; 4357 boolean_t is_fragment; 4358 4359 sel.ips_isv4 = B_FALSE; 4360 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4361 if (!unspec_src) 4362 sel.ips_local_addr_v6 = ire->ire_src_addr_v6; 4363 } else { 4364 sel.ips_local_addr_v6 = ip6h->ip6_src; 4365 } 4366 4367 sel.ips_remote_addr_v6 = ip_get_dst_v6(ip6h, &is_fragment); 4368 if (is_fragment) { 4369 /* 4370 * It's a packet fragment for a packet that 4371 * we have already processed (since IPsec processing 4372 * is done before fragmentation), so we don't 4373 * have to do policy checks again. Fragments can 4374 * come back to us for processing if they have 4375 * been queued up due to flow control. 4376 */ 4377 if (ipsec_mp->b_datap->db_type == M_CTL) { 4378 mp = ipsec_mp->b_cont; 4379 freeb(ipsec_mp); 4380 ipsec_mp = mp; 4381 } 4382 return (ipsec_mp); 4383 } 4384 4385 /* IPv6 common-case. */ 4386 sel.ips_protocol = ip6h->ip6_nxt; 4387 switch (ip6h->ip6_nxt) { 4388 case IPPROTO_TCP: 4389 case IPPROTO_UDP: 4390 case IPPROTO_SCTP: 4391 case IPPROTO_ICMPV6: 4392 break; 4393 default: 4394 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 4395 &hdr_len, &nexthdrp)) { 4396 BUMP_MIB(&ip6_mib, ipv6OutDiscards); 4397 freemsg(ipsec_mp); /* Not IPsec-related drop. */ 4398 return (NULL); 4399 } 4400 sel.ips_protocol = *nexthdrp; 4401 break; 4402 } 4403 } 4404 4405 if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0)) { 4406 if (ipha != NULL) { 4407 BUMP_MIB(&ip_mib, ipOutDiscards); 4408 } else { 4409 BUMP_MIB(&ip6_mib, ipv6OutDiscards); 4410 } 4411 4412 /* Callee dropped the packet. */ 4413 return (NULL); 4414 } 4415 4416 if (io != NULL) { 4417 /* 4418 * We seem to have some local policy (we already have 4419 * an ipsec_out). Look at global policy and see 4420 * whether we have to inherit or not. 4421 */ 4422 io->ipsec_out_need_policy = B_FALSE; 4423 ipsec_mp = ipsec_apply_global_policy(ipsec_mp, connp, &sel); 4424 ASSERT((io->ipsec_out_policy != NULL) || 4425 (io->ipsec_out_act != NULL)); 4426 ASSERT(io->ipsec_out_need_policy == B_FALSE); 4427 return (ipsec_mp); 4428 } 4429 ipsec_mp = ipsec_attach_global_policy(mp, connp, &sel); 4430 if (ipsec_mp == NULL) 4431 return (mp); 4432 4433 /* 4434 * Copy the right port information. 4435 */ 4436 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 4437 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4438 4439 ASSERT(io->ipsec_out_need_policy == B_FALSE); 4440 ASSERT((io->ipsec_out_policy != NULL) || 4441 (io->ipsec_out_act != NULL)); 4442 io->ipsec_out_src_port = sel.ips_local_port; 4443 io->ipsec_out_dst_port = sel.ips_remote_port; 4444 io->ipsec_out_icmp_type = sel.ips_icmp_type; 4445 io->ipsec_out_icmp_code = sel.ips_icmp_code; 4446 /* 4447 * Set ill_index, conn_dontroute and conn_multicast_loop 4448 * for multicast datagrams. 4449 */ 4450 io->ipsec_out_ill_index = ill_index; 4451 io->ipsec_out_dontroute = conn_dontroutex; 4452 io->ipsec_out_multicast_loop = conn_multicast_loopx; 4453 4454 if (zoneid == ALL_ZONES) 4455 zoneid = GLOBAL_ZONEID; 4456 io->ipsec_out_zoneid = zoneid; 4457 return (ipsec_mp); 4458 } 4459 4460 /* 4461 * When appropriate, this function caches inbound and outbound policy 4462 * for this connection. 4463 * 4464 * XXX need to work out more details about per-interface policy and 4465 * caching here! 4466 * 4467 * XXX may want to split inbound and outbound caching for ill.. 4468 */ 4469 int 4470 ipsec_conn_cache_policy(conn_t *connp, boolean_t isv4) 4471 { 4472 boolean_t global_policy_present; 4473 4474 /* 4475 * There is no policy latching for ICMP sockets because we can't 4476 * decide on which policy to use until we see the packet and get 4477 * type/code selectors. 4478 */ 4479 if (connp->conn_ulp == IPPROTO_ICMP || 4480 connp->conn_ulp == IPPROTO_ICMPV6) { 4481 connp->conn_in_enforce_policy = 4482 connp->conn_out_enforce_policy = B_TRUE; 4483 if (connp->conn_latch != NULL) { 4484 IPLATCH_REFRELE(connp->conn_latch); 4485 connp->conn_latch = NULL; 4486 } 4487 connp->conn_flags |= IPCL_CHECK_POLICY; 4488 return (0); 4489 } 4490 4491 global_policy_present = isv4 ? 4492 (ipsec_outbound_v4_policy_present || 4493 ipsec_inbound_v4_policy_present) : 4494 (ipsec_outbound_v6_policy_present || 4495 ipsec_inbound_v6_policy_present); 4496 4497 if ((connp->conn_policy != NULL) || global_policy_present) { 4498 ipsec_selector_t sel; 4499 ipsec_policy_t *p; 4500 4501 if (connp->conn_latch == NULL && 4502 (connp->conn_latch = iplatch_create()) == NULL) { 4503 return (ENOMEM); 4504 } 4505 4506 sel.ips_protocol = connp->conn_ulp; 4507 sel.ips_local_port = connp->conn_lport; 4508 sel.ips_remote_port = connp->conn_fport; 4509 sel.ips_is_icmp_inv_acq = 0; 4510 sel.ips_isv4 = isv4; 4511 if (isv4) { 4512 sel.ips_local_addr_v4 = connp->conn_src; 4513 sel.ips_remote_addr_v4 = connp->conn_rem; 4514 } else { 4515 sel.ips_local_addr_v6 = connp->conn_srcv6; 4516 sel.ips_remote_addr_v6 = connp->conn_remv6; 4517 } 4518 4519 p = ipsec_find_policy(IPSEC_TYPE_INBOUND, connp, NULL, &sel); 4520 if (connp->conn_latch->ipl_in_policy != NULL) 4521 IPPOL_REFRELE(connp->conn_latch->ipl_in_policy); 4522 connp->conn_latch->ipl_in_policy = p; 4523 connp->conn_in_enforce_policy = (p != NULL); 4524 4525 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, &sel); 4526 if (connp->conn_latch->ipl_out_policy != NULL) 4527 IPPOL_REFRELE(connp->conn_latch->ipl_out_policy); 4528 connp->conn_latch->ipl_out_policy = p; 4529 connp->conn_out_enforce_policy = (p != NULL); 4530 4531 /* Clear the latched actions too, in case we're recaching. */ 4532 if (connp->conn_latch->ipl_out_action != NULL) 4533 IPACT_REFRELE(connp->conn_latch->ipl_out_action); 4534 if (connp->conn_latch->ipl_in_action != NULL) 4535 IPACT_REFRELE(connp->conn_latch->ipl_in_action); 4536 } 4537 4538 /* 4539 * We may or may not have policy for this endpoint. We still set 4540 * conn_policy_cached so that inbound datagrams don't have to look 4541 * at global policy as policy is considered latched for these 4542 * endpoints. We should not set conn_policy_cached until the conn 4543 * reflects the actual policy. If we *set* this before inheriting 4544 * the policy there is a window where the check 4545 * CONN_INBOUND_POLICY_PRESENT, will neither check with the policy 4546 * on the conn (because we have not yet copied the policy on to 4547 * conn and hence not set conn_in_enforce_policy) nor with the 4548 * global policy (because conn_policy_cached is already set). 4549 */ 4550 connp->conn_policy_cached = B_TRUE; 4551 if (connp->conn_in_enforce_policy) 4552 connp->conn_flags |= IPCL_CHECK_POLICY; 4553 return (0); 4554 } 4555 4556 void 4557 iplatch_free(ipsec_latch_t *ipl) 4558 { 4559 if (ipl->ipl_out_policy != NULL) 4560 IPPOL_REFRELE(ipl->ipl_out_policy); 4561 if (ipl->ipl_in_policy != NULL) 4562 IPPOL_REFRELE(ipl->ipl_in_policy); 4563 if (ipl->ipl_in_action != NULL) 4564 IPACT_REFRELE(ipl->ipl_in_action); 4565 if (ipl->ipl_out_action != NULL) 4566 IPACT_REFRELE(ipl->ipl_out_action); 4567 if (ipl->ipl_local_cid != NULL) 4568 IPSID_REFRELE(ipl->ipl_local_cid); 4569 if (ipl->ipl_remote_cid != NULL) 4570 IPSID_REFRELE(ipl->ipl_remote_cid); 4571 if (ipl->ipl_local_id != NULL) 4572 crfree(ipl->ipl_local_id); 4573 mutex_destroy(&ipl->ipl_lock); 4574 kmem_free(ipl, sizeof (*ipl)); 4575 } 4576 4577 ipsec_latch_t * 4578 iplatch_create() 4579 { 4580 ipsec_latch_t *ipl = kmem_alloc(sizeof (*ipl), KM_NOSLEEP); 4581 if (ipl == NULL) 4582 return (ipl); 4583 bzero(ipl, sizeof (*ipl)); 4584 mutex_init(&ipl->ipl_lock, NULL, MUTEX_DEFAULT, NULL); 4585 ipl->ipl_refcnt = 1; 4586 return (ipl); 4587 } 4588 4589 /* 4590 * Identity hash table. 4591 * 4592 * Identities are refcounted and "interned" into the hash table. 4593 * Only references coming from other objects (SA's, latching state) 4594 * are counted in ipsid_refcnt. 4595 * 4596 * Locking: IPSID_REFHOLD is safe only when (a) the object's hash bucket 4597 * is locked, (b) we know that the refcount must be > 0. 4598 * 4599 * The ipsid_next and ipsid_ptpn fields are only to be referenced or 4600 * modified when the bucket lock is held; in particular, we only 4601 * delete objects while holding the bucket lock, and we only increase 4602 * the refcount from 0 to 1 while the bucket lock is held. 4603 */ 4604 4605 #define IPSID_HASHSIZE 64 4606 4607 typedef struct ipsif_s 4608 { 4609 ipsid_t *ipsif_head; 4610 kmutex_t ipsif_lock; 4611 } ipsif_t; 4612 4613 ipsif_t ipsid_buckets[IPSID_HASHSIZE]; 4614 4615 /* 4616 * Hash function for ID hash table. 4617 */ 4618 static uint32_t 4619 ipsid_hash(int idtype, char *idstring) 4620 { 4621 uint32_t hval = idtype; 4622 unsigned char c; 4623 4624 while ((c = *idstring++) != 0) { 4625 hval = (hval << 4) | (hval >> 28); 4626 hval ^= c; 4627 } 4628 hval = hval ^ (hval >> 16); 4629 return (hval & (IPSID_HASHSIZE-1)); 4630 } 4631 4632 /* 4633 * Look up identity string in hash table. Return identity object 4634 * corresponding to the name -- either preexisting, or newly allocated. 4635 * 4636 * Return NULL if we need to allocate a new one and can't get memory. 4637 */ 4638 ipsid_t * 4639 ipsid_lookup(int idtype, char *idstring) 4640 { 4641 ipsid_t *retval; 4642 char *nstr; 4643 int idlen = strlen(idstring) + 1; 4644 4645 ipsif_t *bucket = &ipsid_buckets[ipsid_hash(idtype, idstring)]; 4646 4647 mutex_enter(&bucket->ipsif_lock); 4648 4649 for (retval = bucket->ipsif_head; retval != NULL; 4650 retval = retval->ipsid_next) { 4651 if (idtype != retval->ipsid_type) 4652 continue; 4653 if (bcmp(idstring, retval->ipsid_cid, idlen) != 0) 4654 continue; 4655 4656 IPSID_REFHOLD(retval); 4657 mutex_exit(&bucket->ipsif_lock); 4658 return (retval); 4659 } 4660 4661 retval = kmem_alloc(sizeof (*retval), KM_NOSLEEP); 4662 if (!retval) { 4663 mutex_exit(&bucket->ipsif_lock); 4664 return (NULL); 4665 } 4666 4667 nstr = kmem_alloc(idlen, KM_NOSLEEP); 4668 if (!nstr) { 4669 mutex_exit(&bucket->ipsif_lock); 4670 kmem_free(retval, sizeof (*retval)); 4671 return (NULL); 4672 } 4673 4674 retval->ipsid_refcnt = 1; 4675 retval->ipsid_next = bucket->ipsif_head; 4676 if (retval->ipsid_next != NULL) 4677 retval->ipsid_next->ipsid_ptpn = &retval->ipsid_next; 4678 retval->ipsid_ptpn = &bucket->ipsif_head; 4679 retval->ipsid_type = idtype; 4680 retval->ipsid_cid = nstr; 4681 bucket->ipsif_head = retval; 4682 bcopy(idstring, nstr, idlen); 4683 mutex_exit(&bucket->ipsif_lock); 4684 4685 return (retval); 4686 } 4687 4688 /* 4689 * Garbage collect the identity hash table. 4690 */ 4691 void 4692 ipsid_gc() 4693 { 4694 int i, len; 4695 ipsid_t *id, *nid; 4696 ipsif_t *bucket; 4697 4698 for (i = 0; i < IPSID_HASHSIZE; i++) { 4699 bucket = &ipsid_buckets[i]; 4700 mutex_enter(&bucket->ipsif_lock); 4701 for (id = bucket->ipsif_head; id != NULL; id = nid) { 4702 nid = id->ipsid_next; 4703 if (id->ipsid_refcnt == 0) { 4704 *id->ipsid_ptpn = nid; 4705 if (nid != NULL) 4706 nid->ipsid_ptpn = id->ipsid_ptpn; 4707 len = strlen(id->ipsid_cid) + 1; 4708 kmem_free(id->ipsid_cid, len); 4709 kmem_free(id, sizeof (*id)); 4710 } 4711 } 4712 mutex_exit(&bucket->ipsif_lock); 4713 } 4714 } 4715 4716 /* 4717 * Return true if two identities are the same. 4718 */ 4719 boolean_t 4720 ipsid_equal(ipsid_t *id1, ipsid_t *id2) 4721 { 4722 if (id1 == id2) 4723 return (B_TRUE); 4724 #ifdef DEBUG 4725 if ((id1 == NULL) || (id2 == NULL)) 4726 return (B_FALSE); 4727 /* 4728 * test that we're interning id's correctly.. 4729 */ 4730 ASSERT((strcmp(id1->ipsid_cid, id2->ipsid_cid) != 0) || 4731 (id1->ipsid_type != id2->ipsid_type)); 4732 #endif 4733 return (B_FALSE); 4734 } 4735 4736 /* 4737 * Initialize identity table; called during module initialization. 4738 */ 4739 static void 4740 ipsid_init() 4741 { 4742 ipsif_t *bucket; 4743 int i; 4744 4745 for (i = 0; i < IPSID_HASHSIZE; i++) { 4746 bucket = &ipsid_buckets[i]; 4747 mutex_init(&bucket->ipsif_lock, NULL, MUTEX_DEFAULT, NULL); 4748 } 4749 } 4750 4751 /* 4752 * Free identity table (preparatory to module unload) 4753 */ 4754 static void 4755 ipsid_fini() 4756 { 4757 ipsif_t *bucket; 4758 int i; 4759 4760 for (i = 0; i < IPSID_HASHSIZE; i++) { 4761 bucket = &ipsid_buckets[i]; 4762 mutex_destroy(&bucket->ipsif_lock); 4763 } 4764 } 4765 4766 /* 4767 * Update the minimum and maximum supported key sizes for the 4768 * specified algorithm. Must be called while holding the algorithms lock. 4769 */ 4770 void 4771 ipsec_alg_fix_min_max(ipsec_alginfo_t *alg, ipsec_algtype_t alg_type) 4772 { 4773 size_t crypto_min = (size_t)-1, crypto_max = 0; 4774 size_t cur_crypto_min, cur_crypto_max; 4775 boolean_t is_valid; 4776 crypto_mechanism_info_t *mech_infos; 4777 uint_t nmech_infos; 4778 int crypto_rc, i; 4779 crypto_mech_usage_t mask; 4780 4781 ASSERT(MUTEX_HELD(&alg_lock)); 4782 4783 /* 4784 * Compute the min, max, and default key sizes (in number of 4785 * increments to the default key size in bits) as defined 4786 * by the algorithm mappings. This range of key sizes is used 4787 * for policy related operations. The effective key sizes 4788 * supported by the framework could be more limited than 4789 * those defined for an algorithm. 4790 */ 4791 alg->alg_default_bits = alg->alg_key_sizes[0]; 4792 if (alg->alg_increment != 0) { 4793 /* key sizes are defined by range & increment */ 4794 alg->alg_minbits = alg->alg_key_sizes[1]; 4795 alg->alg_maxbits = alg->alg_key_sizes[2]; 4796 4797 alg->alg_default = SADB_ALG_DEFAULT_INCR(alg->alg_minbits, 4798 alg->alg_increment, alg->alg_default_bits); 4799 } else if (alg->alg_nkey_sizes == 0) { 4800 /* no specified key size for algorithm */ 4801 alg->alg_minbits = alg->alg_maxbits = 0; 4802 } else { 4803 /* key sizes are defined by enumeration */ 4804 alg->alg_minbits = (uint16_t)-1; 4805 alg->alg_maxbits = 0; 4806 4807 for (i = 0; i < alg->alg_nkey_sizes; i++) { 4808 if (alg->alg_key_sizes[i] < alg->alg_minbits) 4809 alg->alg_minbits = alg->alg_key_sizes[i]; 4810 if (alg->alg_key_sizes[i] > alg->alg_maxbits) 4811 alg->alg_maxbits = alg->alg_key_sizes[i]; 4812 } 4813 alg->alg_default = 0; 4814 } 4815 4816 if (!(alg->alg_flags & ALG_FLAG_VALID)) 4817 return; 4818 4819 /* 4820 * Mechanisms do not apply to the NULL encryption 4821 * algorithm, so simply return for this case. 4822 */ 4823 if (alg->alg_id == SADB_EALG_NULL) 4824 return; 4825 4826 /* 4827 * Find the min and max key sizes supported by the cryptographic 4828 * framework providers. 4829 */ 4830 4831 /* get the key sizes supported by the framework */ 4832 crypto_rc = crypto_get_all_mech_info(alg->alg_mech_type, 4833 &mech_infos, &nmech_infos, KM_SLEEP); 4834 if (crypto_rc != CRYPTO_SUCCESS || nmech_infos == 0) { 4835 alg->alg_flags &= ~ALG_FLAG_VALID; 4836 return; 4837 } 4838 4839 /* min and max key sizes supported by framework */ 4840 for (i = 0, is_valid = B_FALSE; i < nmech_infos; i++) { 4841 int unit_bits; 4842 4843 /* 4844 * Ignore entries that do not support the operations 4845 * needed for the algorithm type. 4846 */ 4847 if (alg_type == IPSEC_ALG_AUTH) 4848 mask = CRYPTO_MECH_USAGE_MAC; 4849 else 4850 mask = CRYPTO_MECH_USAGE_ENCRYPT | 4851 CRYPTO_MECH_USAGE_DECRYPT; 4852 if ((mech_infos[i].mi_usage & mask) != mask) 4853 continue; 4854 4855 unit_bits = (mech_infos[i].mi_keysize_unit == 4856 CRYPTO_KEYSIZE_UNIT_IN_BYTES) ? 8 : 1; 4857 /* adjust min/max supported by framework */ 4858 cur_crypto_min = mech_infos[i].mi_min_key_size * unit_bits; 4859 cur_crypto_max = mech_infos[i].mi_max_key_size * unit_bits; 4860 4861 if (cur_crypto_min < crypto_min) 4862 crypto_min = cur_crypto_min; 4863 4864 /* 4865 * CRYPTO_EFFECTIVELY_INFINITE is a special value of 4866 * the crypto framework which means "no upper limit". 4867 */ 4868 if (mech_infos[i].mi_max_key_size == 4869 CRYPTO_EFFECTIVELY_INFINITE) 4870 crypto_max = (size_t)-1; 4871 else if (cur_crypto_max > crypto_max) 4872 crypto_max = cur_crypto_max; 4873 4874 is_valid = B_TRUE; 4875 } 4876 4877 kmem_free(mech_infos, sizeof (crypto_mechanism_info_t) * 4878 nmech_infos); 4879 4880 if (!is_valid) { 4881 /* no key sizes supported by framework */ 4882 alg->alg_flags &= ~ALG_FLAG_VALID; 4883 return; 4884 } 4885 4886 /* 4887 * Determine min and max key sizes from alg_key_sizes[]. 4888 * defined for the algorithm entry. Adjust key sizes based on 4889 * those supported by the framework. 4890 */ 4891 alg->alg_ef_default_bits = alg->alg_key_sizes[0]; 4892 if (alg->alg_increment != 0) { 4893 /* supported key sizes are defined by range & increment */ 4894 crypto_min = ALGBITS_ROUND_UP(crypto_min, alg->alg_increment); 4895 crypto_max = ALGBITS_ROUND_DOWN(crypto_max, alg->alg_increment); 4896 4897 alg->alg_ef_minbits = MAX(alg->alg_minbits, 4898 (uint16_t)crypto_min); 4899 alg->alg_ef_maxbits = MIN(alg->alg_maxbits, 4900 (uint16_t)crypto_max); 4901 4902 /* 4903 * If the sizes supported by the framework are outside 4904 * the range of sizes defined by the algorithm mappings, 4905 * the algorithm cannot be used. Check for this 4906 * condition here. 4907 */ 4908 if (alg->alg_ef_minbits > alg->alg_ef_maxbits) { 4909 alg->alg_flags &= ~ALG_FLAG_VALID; 4910 return; 4911 } 4912 4913 if (alg->alg_ef_default_bits < alg->alg_ef_minbits) 4914 alg->alg_ef_default_bits = alg->alg_ef_minbits; 4915 if (alg->alg_ef_default_bits > alg->alg_ef_maxbits) 4916 alg->alg_ef_default_bits = alg->alg_ef_maxbits; 4917 4918 alg->alg_ef_default = SADB_ALG_DEFAULT_INCR(alg->alg_ef_minbits, 4919 alg->alg_increment, alg->alg_ef_default_bits); 4920 } else if (alg->alg_nkey_sizes == 0) { 4921 /* no specified key size for algorithm */ 4922 alg->alg_ef_minbits = alg->alg_ef_maxbits = 0; 4923 } else { 4924 /* supported key sizes are defined by enumeration */ 4925 alg->alg_ef_minbits = (uint16_t)-1; 4926 alg->alg_ef_maxbits = 0; 4927 4928 for (i = 0, is_valid = B_FALSE; i < alg->alg_nkey_sizes; i++) { 4929 /* 4930 * Ignore the current key size if it is not in the 4931 * range of sizes supported by the framework. 4932 */ 4933 if (alg->alg_key_sizes[i] < crypto_min || 4934 alg->alg_key_sizes[i] > crypto_max) 4935 continue; 4936 if (alg->alg_key_sizes[i] < alg->alg_ef_minbits) 4937 alg->alg_ef_minbits = alg->alg_key_sizes[i]; 4938 if (alg->alg_key_sizes[i] > alg->alg_ef_maxbits) 4939 alg->alg_ef_maxbits = alg->alg_key_sizes[i]; 4940 is_valid = B_TRUE; 4941 } 4942 4943 if (!is_valid) { 4944 alg->alg_flags &= ~ALG_FLAG_VALID; 4945 return; 4946 } 4947 alg->alg_ef_default = 0; 4948 } 4949 } 4950 4951 /* 4952 * Free the memory used by the specified algorithm. 4953 */ 4954 void 4955 ipsec_alg_free(ipsec_alginfo_t *alg) 4956 { 4957 if (alg == NULL) 4958 return; 4959 4960 if (alg->alg_key_sizes != NULL) 4961 kmem_free(alg->alg_key_sizes, 4962 (alg->alg_nkey_sizes + 1) * sizeof (uint16_t)); 4963 4964 if (alg->alg_block_sizes != NULL) 4965 kmem_free(alg->alg_block_sizes, 4966 (alg->alg_nblock_sizes + 1) * sizeof (uint16_t)); 4967 4968 kmem_free(alg, sizeof (*alg)); 4969 } 4970 4971 /* 4972 * Check the validity of the specified key size for an algorithm. 4973 * Returns B_TRUE if key size is valid, B_FALSE otherwise. 4974 */ 4975 boolean_t 4976 ipsec_valid_key_size(uint16_t key_size, ipsec_alginfo_t *alg) 4977 { 4978 if (key_size < alg->alg_ef_minbits || key_size > alg->alg_ef_maxbits) 4979 return (B_FALSE); 4980 4981 if (alg->alg_increment == 0 && alg->alg_nkey_sizes != 0) { 4982 /* 4983 * If the key sizes are defined by enumeration, the new 4984 * key size must be equal to one of the supported values. 4985 */ 4986 int i; 4987 4988 for (i = 0; i < alg->alg_nkey_sizes; i++) 4989 if (key_size == alg->alg_key_sizes[i]) 4990 break; 4991 if (i == alg->alg_nkey_sizes) 4992 return (B_FALSE); 4993 } 4994 4995 return (B_TRUE); 4996 } 4997 4998 /* 4999 * Callback function invoked by the crypto framework when a provider 5000 * registers or unregisters. This callback updates the algorithms 5001 * tables when a crypto algorithm is no longer available or becomes 5002 * available, and triggers the freeing/creation of context templates 5003 * associated with existing SAs, if needed. 5004 */ 5005 void 5006 ipsec_prov_update_callback(uint32_t event, void *event_arg) 5007 { 5008 crypto_notify_event_change_t *prov_change = 5009 (crypto_notify_event_change_t *)event_arg; 5010 uint_t algidx, algid, algtype, mech_count, mech_idx; 5011 ipsec_alginfo_t *alg; 5012 ipsec_alginfo_t oalg; 5013 crypto_mech_name_t *mechs; 5014 boolean_t alg_changed = B_FALSE; 5015 5016 /* ignore events for which we didn't register */ 5017 if (event != CRYPTO_EVENT_MECHS_CHANGED) { 5018 ip1dbg(("ipsec_prov_update_callback: unexpected event 0x%x " 5019 " received from crypto framework\n", event)); 5020 return; 5021 } 5022 5023 mechs = crypto_get_mech_list(&mech_count, KM_SLEEP); 5024 if (mechs == NULL) 5025 return; 5026 5027 /* 5028 * Walk the list of currently defined IPsec algorithm. Update 5029 * the algorithm valid flag and trigger an update of the 5030 * SAs that depend on that algorithm. 5031 */ 5032 mutex_enter(&alg_lock); 5033 for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype++) { 5034 for (algidx = 0; algidx < ipsec_nalgs[algtype]; algidx++) { 5035 5036 algid = ipsec_sortlist[algtype][algidx]; 5037 alg = ipsec_alglists[algtype][algid]; 5038 ASSERT(alg != NULL); 5039 5040 /* 5041 * Skip the algorithms which do not map to the 5042 * crypto framework provider being added or removed. 5043 */ 5044 if (strncmp(alg->alg_mech_name, 5045 prov_change->ec_mech_name, 5046 CRYPTO_MAX_MECH_NAME) != 0) 5047 continue; 5048 5049 /* 5050 * Determine if the mechanism is valid. If it 5051 * is not, mark the algorithm as being invalid. If 5052 * it is, mark the algorithm as being valid. 5053 */ 5054 for (mech_idx = 0; mech_idx < mech_count; mech_idx++) 5055 if (strncmp(alg->alg_mech_name, 5056 mechs[mech_idx], CRYPTO_MAX_MECH_NAME) == 0) 5057 break; 5058 if (mech_idx == mech_count && 5059 alg->alg_flags & ALG_FLAG_VALID) { 5060 alg->alg_flags &= ~ALG_FLAG_VALID; 5061 alg_changed = B_TRUE; 5062 } else if (mech_idx < mech_count && 5063 !(alg->alg_flags & ALG_FLAG_VALID)) { 5064 alg->alg_flags |= ALG_FLAG_VALID; 5065 alg_changed = B_TRUE; 5066 } 5067 5068 /* 5069 * Update the supported key sizes, regardless 5070 * of whether a crypto provider was added or 5071 * removed. 5072 */ 5073 oalg = *alg; 5074 ipsec_alg_fix_min_max(alg, algtype); 5075 if (!alg_changed && 5076 alg->alg_ef_minbits != oalg.alg_ef_minbits || 5077 alg->alg_ef_maxbits != oalg.alg_ef_maxbits || 5078 alg->alg_ef_default != oalg.alg_ef_default || 5079 alg->alg_ef_default_bits != 5080 oalg.alg_ef_default_bits) 5081 alg_changed = B_TRUE; 5082 5083 /* 5084 * Update the affected SAs if a software provider is 5085 * being added or removed. 5086 */ 5087 if (prov_change->ec_provider_type == 5088 CRYPTO_SW_PROVIDER) 5089 sadb_alg_update(algtype, alg->alg_id, 5090 prov_change->ec_change == 5091 CRYPTO_MECH_ADDED); 5092 } 5093 } 5094 mutex_exit(&alg_lock); 5095 crypto_free_mech_list(mechs, mech_count); 5096 5097 if (alg_changed) { 5098 /* 5099 * An algorithm has changed, i.e. it became valid or 5100 * invalid, or its support key sizes have changed. 5101 * Notify ipsecah and ipsecesp of this change so 5102 * that they can send a SADB_REGISTER to their consumers. 5103 */ 5104 ipsecah_algs_changed(); 5105 ipsecesp_algs_changed(); 5106 } 5107 } 5108 5109 /* 5110 * Registers with the crypto framework to be notified of crypto 5111 * providers changes. Used to update the algorithm tables and 5112 * to free or create context templates if needed. Invoked after IPsec 5113 * is loaded successfully. 5114 */ 5115 void 5116 ipsec_register_prov_update(void) 5117 { 5118 prov_update_handle = crypto_notify_events( 5119 ipsec_prov_update_callback, CRYPTO_EVENT_MECHS_CHANGED); 5120 } 5121 5122 /* 5123 * Unregisters from the framework to be notified of crypto providers 5124 * changes. Called from ipsec_policy_destroy(). 5125 */ 5126 static void 5127 ipsec_unregister_prov_update(void) 5128 { 5129 if (prov_update_handle != NULL) 5130 crypto_unnotify_events(prov_update_handle); 5131 } 5132 5133 /* 5134 * Tunnel-mode support routines. 5135 */ 5136 5137 /* 5138 * Returns an mblk chain suitable for putnext() if policies match and IPsec 5139 * SAs are available. If there's no per-tunnel policy, or a match comes back 5140 * with no match, then still return the packet and have global policy take 5141 * a crack at it in IP. 5142 * 5143 * Remember -> we can be forwarding packets. Keep that in mind w.r.t. 5144 * inner-packet contents. 5145 */ 5146 mblk_t * 5147 ipsec_tun_outbound(mblk_t *mp, tun_t *atp, ipha_t *inner_ipv4, 5148 ip6_t *inner_ipv6, ipha_t *outer_ipv4, ip6_t *outer_ipv6, int outer_hdr_len) 5149 { 5150 ipsec_tun_pol_t *itp = atp->tun_itp; 5151 ipsec_policy_head_t *polhead; 5152 ipsec_selector_t sel; 5153 mblk_t *ipsec_mp, *ipsec_mp_head, *nmp; 5154 mblk_t *spare_mp = NULL; 5155 ipsec_out_t *io; 5156 boolean_t is_fragment; 5157 ipsec_policy_t *pol; 5158 5159 ASSERT(outer_ipv6 != NULL && outer_ipv4 == NULL || 5160 outer_ipv4 != NULL && outer_ipv6 == NULL); 5161 /* We take care of inners in a bit. */ 5162 5163 /* No policy on this tunnel - let global policy have at it. */ 5164 if (itp == NULL || !(itp->itp_flags & ITPF_P_ACTIVE)) 5165 return (mp); 5166 polhead = itp->itp_policy; 5167 5168 bzero(&sel, sizeof (sel)); 5169 if (inner_ipv4 != NULL) { 5170 ASSERT(inner_ipv6 == NULL); 5171 sel.ips_isv4 = B_TRUE; 5172 sel.ips_local_addr_v4 = inner_ipv4->ipha_src; 5173 sel.ips_remote_addr_v4 = inner_ipv4->ipha_dst; 5174 sel.ips_protocol = (uint8_t)inner_ipv4->ipha_protocol; 5175 is_fragment = 5176 IS_V4_FRAGMENT(inner_ipv4->ipha_fragment_offset_and_flags); 5177 } else { 5178 ASSERT(inner_ipv6 != NULL); 5179 sel.ips_isv4 = B_FALSE; 5180 sel.ips_local_addr_v6 = inner_ipv6->ip6_src; 5181 /* Use ip_get_dst_v6() just for the fragment bit. */ 5182 sel.ips_remote_addr_v6 = ip_get_dst_v6(inner_ipv6, 5183 &is_fragment); 5184 /* 5185 * Reset, because we don't care about routing-header dests 5186 * in the forwarding/tunnel path. 5187 */ 5188 sel.ips_remote_addr_v6 = inner_ipv6->ip6_dst; 5189 } 5190 5191 if (itp->itp_flags & ITPF_P_PER_PORT_SECURITY) { 5192 if (is_fragment) { 5193 ipha_t *oiph; 5194 ipha_t *iph = NULL; 5195 ip6_t *ip6h = NULL; 5196 int hdr_len; 5197 uint16_t ip6_hdr_length; 5198 uint8_t v6_proto; 5199 uint8_t *v6_proto_p; 5200 5201 /* 5202 * We have a fragment we need to track! 5203 */ 5204 mp = ipsec_fragcache_add(&itp->itp_fragcache, NULL, mp, 5205 outer_hdr_len); 5206 if (mp == NULL) 5207 return (NULL); 5208 5209 /* 5210 * If we get here, we have a full 5211 * fragment chain 5212 */ 5213 5214 oiph = (ipha_t *)mp->b_rptr; 5215 if (IPH_HDR_VERSION(oiph) == IPV4_VERSION) { 5216 hdr_len = ((outer_hdr_len != 0) ? 5217 IPH_HDR_LENGTH(oiph) : 0); 5218 iph = (ipha_t *)(mp->b_rptr + hdr_len); 5219 } else { 5220 ASSERT(IPH_HDR_VERSION(oiph) == IPV6_VERSION); 5221 if ((spare_mp = msgpullup(mp, -1)) == NULL) { 5222 ip_drop_packet_chain(mp, B_FALSE, 5223 NULL, NULL, &ipdrops_spd_nomem, 5224 &spd_dropper); 5225 } 5226 ip6h = (ip6_t *)spare_mp->b_rptr; 5227 (void) ip_hdr_length_nexthdr_v6(spare_mp, ip6h, 5228 &ip6_hdr_length, &v6_proto_p); 5229 hdr_len = ip6_hdr_length; 5230 } 5231 outer_hdr_len = hdr_len; 5232 5233 if (sel.ips_isv4) { 5234 if (iph == NULL) { 5235 /* Was v6 outer */ 5236 iph = (ipha_t *)(mp->b_rptr + hdr_len); 5237 } 5238 inner_ipv4 = iph; 5239 sel.ips_local_addr_v4 = inner_ipv4->ipha_src; 5240 sel.ips_remote_addr_v4 = inner_ipv4->ipha_dst; 5241 sel.ips_protocol = 5242 (uint8_t)inner_ipv4->ipha_protocol; 5243 } else { 5244 if ((spare_mp == NULL) && 5245 ((spare_mp = msgpullup(mp, -1)) == NULL)) { 5246 ip_drop_packet_chain(mp, B_FALSE, 5247 NULL, NULL, &ipdrops_spd_nomem, 5248 &spd_dropper); 5249 } 5250 inner_ipv6 = (ip6_t *)(spare_mp->b_rptr + 5251 hdr_len); 5252 sel.ips_local_addr_v6 = inner_ipv6->ip6_src; 5253 sel.ips_remote_addr_v6 = inner_ipv6->ip6_dst; 5254 (void) ip_hdr_length_nexthdr_v6(spare_mp, 5255 inner_ipv6, &ip6_hdr_length, 5256 &v6_proto_p); 5257 v6_proto = *v6_proto_p; 5258 sel.ips_protocol = v6_proto; 5259 #ifdef FRAGCACHE_DEBUG 5260 cmn_err(CE_WARN, "v6_sel.ips_protocol = %d\n", 5261 sel.ips_protocol); 5262 #endif 5263 } 5264 /* Ports are extracted below */ 5265 } 5266 5267 /* Get ports... */ 5268 if (spare_mp != NULL) { 5269 if (!ipsec_init_outbound_ports(&sel, spare_mp, 5270 inner_ipv4, inner_ipv6, outer_hdr_len)) { 5271 /* 5272 * callee did ip_drop_packet_chain() on 5273 * spare_mp 5274 */ 5275 ipsec_freemsg_chain(mp); 5276 return (NULL); 5277 } 5278 } else { 5279 if (!ipsec_init_outbound_ports(&sel, mp, 5280 inner_ipv4, inner_ipv6, outer_hdr_len)) { 5281 /* callee did ip_drop_packet_chain() on mp. */ 5282 return (NULL); 5283 } 5284 } 5285 #ifdef FRAGCACHE_DEBUG 5286 if (inner_ipv4 != NULL) 5287 cmn_err(CE_WARN, 5288 "(v4) sel.ips_protocol = %d, " 5289 "sel.ips_local_port = %d, " 5290 "sel.ips_remote_port = %d\n", 5291 sel.ips_protocol, ntohs(sel.ips_local_port), 5292 ntohs(sel.ips_remote_port)); 5293 if (inner_ipv6 != NULL) 5294 cmn_err(CE_WARN, 5295 "(v6) sel.ips_protocol = %d, " 5296 "sel.ips_local_port = %d, " 5297 "sel.ips_remote_port = %d\n", 5298 sel.ips_protocol, ntohs(sel.ips_local_port), 5299 ntohs(sel.ips_remote_port)); 5300 #endif 5301 /* Success so far - done with spare_mp */ 5302 ipsec_freemsg_chain(spare_mp); 5303 } 5304 rw_enter(&polhead->iph_lock, RW_READER); 5305 pol = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_OUTBOUND, &sel); 5306 rw_exit(&polhead->iph_lock); 5307 if (pol == NULL) { 5308 /* 5309 * No matching policy on this tunnel, drop the packet. 5310 * 5311 * NOTE: Tunnel-mode tunnels are different from the 5312 * IP global transport mode policy head. For a tunnel-mode 5313 * tunnel, we drop the packet in lieu of passing it 5314 * along accepted the way a global-policy miss would. 5315 * 5316 * NOTE2: "negotiate transport" tunnels should match ALL 5317 * inbound packets, but we do not uncomment the ASSERT() 5318 * below because if/when we open PF_POLICY, a user can 5319 * shoot him/her-self in the foot with a 0 priority. 5320 */ 5321 5322 /* ASSERT(itp->itp_flags & ITPF_P_TUNNEL); */ 5323 #ifdef FRAGCACHE_DEBUG 5324 cmn_err(CE_WARN, "ipsec_tun_outbound(): No matching tunnel " 5325 "per-port policy\n"); 5326 #endif 5327 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 5328 &ipdrops_spd_explicit, &spd_dropper); 5329 return (NULL); 5330 } 5331 5332 #ifdef FRAGCACHE_DEBUG 5333 cmn_err(CE_WARN, "Having matching tunnel per-port policy\n"); 5334 #endif 5335 5336 /* Construct an IPSEC_OUT message. */ 5337 ipsec_mp = ipsec_mp_head = ipsec_alloc_ipsec_out(); 5338 if (ipsec_mp == NULL) { 5339 IPPOL_REFRELE(pol); 5340 ip_drop_packet(mp, B_FALSE, NULL, NULL, &ipdrops_spd_nomem, 5341 &spd_dropper); 5342 return (NULL); 5343 } 5344 ipsec_mp->b_cont = mp; 5345 io = (ipsec_out_t *)ipsec_mp->b_rptr; 5346 IPPH_REFHOLD(polhead); 5347 /* 5348 * NOTE: free() function of ipsec_out mblk will release polhead and 5349 * pol references. 5350 */ 5351 io->ipsec_out_polhead = polhead; 5352 io->ipsec_out_policy = pol; 5353 io->ipsec_out_zoneid = atp->tun_zoneid; 5354 io->ipsec_out_v4 = (outer_ipv4 != NULL); 5355 io->ipsec_out_secure = B_TRUE; 5356 5357 if (!(itp->itp_flags & ITPF_P_TUNNEL)) { 5358 /* Set up transport mode for tunnelled packets. */ 5359 io->ipsec_out_proto = (inner_ipv4 != NULL) ? IPPROTO_ENCAP : 5360 IPPROTO_IPV6; 5361 return (ipsec_mp); 5362 } 5363 5364 /* Fill in tunnel-mode goodies here. */ 5365 io->ipsec_out_tunnel = B_TRUE; 5366 /* XXX Do I need to fill in all of the goodies here? */ 5367 if (inner_ipv4) { 5368 io->ipsec_out_inaf = AF_INET; 5369 io->ipsec_out_insrc[0] = 5370 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v4; 5371 io->ipsec_out_indst[0] = 5372 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v4; 5373 } else { 5374 io->ipsec_out_inaf = AF_INET6; 5375 io->ipsec_out_insrc[0] = 5376 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[0]; 5377 io->ipsec_out_insrc[1] = 5378 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[1]; 5379 io->ipsec_out_insrc[2] = 5380 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[2]; 5381 io->ipsec_out_insrc[3] = 5382 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[3]; 5383 io->ipsec_out_indst[0] = 5384 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[0]; 5385 io->ipsec_out_indst[1] = 5386 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[1]; 5387 io->ipsec_out_indst[2] = 5388 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[2]; 5389 io->ipsec_out_indst[3] = 5390 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[3]; 5391 } 5392 io->ipsec_out_insrcpfx = pol->ipsp_sel->ipsl_key.ipsl_local_pfxlen; 5393 io->ipsec_out_indstpfx = pol->ipsp_sel->ipsl_key.ipsl_remote_pfxlen; 5394 /* NOTE: These are used for transport mode too. */ 5395 io->ipsec_out_src_port = pol->ipsp_sel->ipsl_key.ipsl_lport; 5396 io->ipsec_out_dst_port = pol->ipsp_sel->ipsl_key.ipsl_rport; 5397 io->ipsec_out_proto = pol->ipsp_sel->ipsl_key.ipsl_proto; 5398 5399 /* 5400 * The mp pointer still valid 5401 * Add ipsec_out to each fragment. 5402 * The fragment head already has one 5403 */ 5404 nmp = mp->b_next; 5405 mp->b_next = NULL; 5406 mp = nmp; 5407 ASSERT(ipsec_mp != NULL); 5408 while (mp != NULL) { 5409 nmp = mp->b_next; 5410 ipsec_mp->b_next = ipsec_out_tag(ipsec_mp_head, mp); 5411 if (ipsec_mp->b_next == NULL) { 5412 ip_drop_packet_chain(ipsec_mp_head, B_FALSE, NULL, NULL, 5413 &ipdrops_spd_nomem, &spd_dropper); 5414 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 5415 &ipdrops_spd_nomem, &spd_dropper); 5416 return (NULL); 5417 } 5418 ipsec_mp = ipsec_mp->b_next; 5419 mp->b_next = NULL; 5420 mp = nmp; 5421 } 5422 return (ipsec_mp_head); 5423 } 5424 5425 /* 5426 * NOTE: The following releases pol's reference and 5427 * calls ip_drop_packet() for me on NULL returns. 5428 */ 5429 mblk_t * 5430 ipsec_check_ipsecin_policy_reasm(mblk_t *ipsec_mp, ipsec_policy_t *pol, 5431 ipha_t *inner_ipv4, ip6_t *inner_ipv6, uint64_t pkt_unique) 5432 { 5433 /* Assume ipsec_mp is a chain of b_next-linked IPSEC_IN M_CTLs. */ 5434 mblk_t *data_chain = NULL, *data_tail = NULL; 5435 mblk_t *ii_next; 5436 5437 while (ipsec_mp != NULL) { 5438 ii_next = ipsec_mp->b_next; 5439 ipsec_mp->b_next = NULL; /* No tripping asserts. */ 5440 5441 /* 5442 * Need IPPOL_REFHOLD(pol) for extras because 5443 * ipsecin_policy does the refrele. 5444 */ 5445 IPPOL_REFHOLD(pol); 5446 5447 if (ipsec_check_ipsecin_policy(NULL, ipsec_mp, pol, 5448 inner_ipv4, inner_ipv6, pkt_unique) != NULL) { 5449 if (data_tail == NULL) { 5450 /* First one */ 5451 data_chain = data_tail = ipsec_mp->b_cont; 5452 } else { 5453 data_tail->b_next = ipsec_mp->b_cont; 5454 data_tail = data_tail->b_next; 5455 } 5456 freeb(ipsec_mp); 5457 } else { 5458 /* 5459 * ipsec_check_ipsecin_policy() freed ipsec_mp 5460 * already. Need to get rid of any extra pol 5461 * references, and any remaining bits as well. 5462 */ 5463 IPPOL_REFRELE(pol); 5464 ipsec_freemsg_chain(data_chain); 5465 ipsec_freemsg_chain(ii_next); /* ipdrop stats? */ 5466 return (NULL); 5467 } 5468 ipsec_mp = ii_next; 5469 } 5470 /* 5471 * One last release because either the loop bumped it up, or we never 5472 * called ipsec_check_ipsecin_policy(). 5473 */ 5474 IPPOL_REFRELE(pol); 5475 5476 /* data_chain is ready for return to tun module. */ 5477 return (data_chain); 5478 } 5479 5480 5481 /* 5482 * Returns B_TRUE if the inbound packet passed an IPsec policy check. Returns 5483 * B_FALSE if it failed or if it is a fragment needing its friends before a 5484 * policy check can be performed. 5485 * 5486 * Expects a non-NULL *data_mp, an optional ipsec_mp, and a non-NULL polhead. 5487 * data_mp may be reassigned with a b_next chain of packets if fragments 5488 * neeeded to be collected for a proper policy check. 5489 * 5490 * Always frees ipsec_mp, but only frees data_mp if returns B_FALSE. This 5491 * function calls ip_drop_packet() on data_mp if need be. 5492 * 5493 * NOTE: outer_hdr_len is signed. If it's a negative value, the caller 5494 * is inspecting an ICMP packet. 5495 */ 5496 boolean_t 5497 ipsec_tun_inbound(mblk_t *ipsec_mp, mblk_t **data_mp, ipsec_tun_pol_t *itp, 5498 ipha_t *inner_ipv4, ip6_t *inner_ipv6, ipha_t *outer_ipv4, 5499 ip6_t *outer_ipv6, int outer_hdr_len) 5500 { 5501 ipsec_policy_head_t *polhead; 5502 ipsec_selector_t sel; 5503 mblk_t *message = (ipsec_mp == NULL) ? *data_mp : ipsec_mp; 5504 ipsec_policy_t *pol; 5505 uint16_t tmpport; 5506 selret_t rc; 5507 boolean_t retval, port_policy_present, is_icmp; 5508 in6_addr_t tmpaddr; 5509 uint8_t flags; 5510 5511 sel.ips_is_icmp_inv_acq = 0; 5512 5513 ASSERT(outer_ipv4 != NULL && outer_ipv6 == NULL || 5514 outer_ipv4 == NULL && outer_ipv6 != NULL); 5515 ASSERT(inner_ipv4 != NULL && inner_ipv6 == NULL || 5516 inner_ipv4 == NULL && inner_ipv6 != NULL); 5517 ASSERT(message == *data_mp || message->b_cont == *data_mp); 5518 5519 if (outer_hdr_len < 0) { 5520 outer_hdr_len = (-outer_hdr_len); 5521 is_icmp = B_TRUE; 5522 } else { 5523 is_icmp = B_FALSE; 5524 } 5525 5526 if (itp != NULL && (itp->itp_flags & ITPF_P_ACTIVE)) { 5527 polhead = itp->itp_policy; 5528 /* 5529 * We need to perform full Tunnel-Mode enforcement, 5530 * and we need to have inner-header data for such enforcement. 5531 * 5532 * See ipsec_init_inbound_sel() for the 0x80000000 on inbound 5533 * and on return. 5534 */ 5535 5536 port_policy_present = ((itp->itp_flags & 5537 ITPF_P_PER_PORT_SECURITY) ? B_TRUE : B_FALSE); 5538 flags = ((port_policy_present ? SEL_PORT_POLICY : SEL_NONE) | 5539 (is_icmp ? SEL_IS_ICMP : SEL_NONE) | SEL_TUNNEL_MODE); 5540 5541 rc = ipsec_init_inbound_sel(&sel, *data_mp, inner_ipv4, 5542 inner_ipv6, flags); 5543 5544 switch (rc) { 5545 case SELRET_NOMEM: 5546 ip_drop_packet(message, B_TRUE, NULL, NULL, 5547 &ipdrops_spd_nomem, &spd_dropper); 5548 return (B_FALSE); 5549 case SELRET_TUNFRAG: 5550 /* 5551 * At this point, if we're cleartext, we don't want 5552 * to go there. 5553 */ 5554 if (ipsec_mp == NULL) { 5555 ip_drop_packet(*data_mp, B_TRUE, NULL, NULL, 5556 &ipdrops_spd_got_clear, &spd_dropper); 5557 *data_mp = NULL; 5558 return (B_FALSE); 5559 } 5560 ASSERT(((ipsec_in_t *)ipsec_mp->b_rptr)-> 5561 ipsec_in_secure); 5562 message = ipsec_fragcache_add(&itp->itp_fragcache, 5563 ipsec_mp, *data_mp, outer_hdr_len); 5564 5565 if (message == NULL) { 5566 /* 5567 * Data is cached, fragment chain is not 5568 * complete. I consume ipsec_mp and data_mp 5569 */ 5570 return (B_FALSE); 5571 } 5572 5573 /* 5574 * If we get here, we have a full fragment chain. 5575 * Reacquire headers and selectors from first fragment. 5576 */ 5577 if (inner_ipv4 != NULL) { 5578 inner_ipv4 = (ipha_t *)message->b_cont->b_rptr; 5579 ASSERT(message->b_cont->b_wptr - 5580 message->b_cont->b_rptr > sizeof (ipha_t)); 5581 } else { 5582 inner_ipv6 = (ip6_t *)message->b_cont->b_rptr; 5583 ASSERT(message->b_cont->b_wptr - 5584 message->b_cont->b_rptr > sizeof (ip6_t)); 5585 } 5586 /* Use SEL_NONE so we always get ports! */ 5587 rc = ipsec_init_inbound_sel(&sel, message->b_cont, 5588 inner_ipv4, inner_ipv6, SEL_NONE); 5589 switch (rc) { 5590 case SELRET_SUCCESS: 5591 /* 5592 * Get to same place as first caller's 5593 * SELRET_SUCCESS case. 5594 */ 5595 break; 5596 case SELRET_NOMEM: 5597 ip_drop_packet_chain(message, B_TRUE, NULL, 5598 NULL, &ipdrops_spd_nomem, &spd_dropper); 5599 return (B_FALSE); 5600 case SELRET_BADPKT: 5601 ip_drop_packet_chain(message, B_TRUE, NULL, 5602 NULL, &ipdrops_spd_malformed_frag, 5603 &spd_dropper); 5604 return (B_FALSE); 5605 case SELRET_TUNFRAG: 5606 cmn_err(CE_WARN, "(TUNFRAG on 2nd call...)"); 5607 /* FALLTHRU */ 5608 default: 5609 cmn_err(CE_WARN, "ipsec_init_inbound_sel(mark2)" 5610 " returns bizarro 0x%x", rc); 5611 /* Guaranteed panic! */ 5612 ASSERT(rc == SELRET_NOMEM); 5613 return (B_FALSE); 5614 } 5615 /* FALLTHRU */ 5616 case SELRET_SUCCESS: 5617 /* 5618 * Common case: 5619 * No per-port policy or a non-fragment. Keep going. 5620 */ 5621 break; 5622 case SELRET_BADPKT: 5623 /* 5624 * We may receive ICMP (with IPv6 inner) packets that 5625 * trigger this return value. Send 'em in for 5626 * enforcement checking. 5627 */ 5628 cmn_err(CE_NOTE, "ipsec_tun_inbound(): " 5629 "sending 'bad packet' in for enforcement"); 5630 break; 5631 default: 5632 cmn_err(CE_WARN, 5633 "ipsec_init_inbound_sel() returns bizarro 0x%x", 5634 rc); 5635 ASSERT(rc == SELRET_NOMEM); /* Guaranteed panic! */ 5636 return (B_FALSE); 5637 } 5638 5639 if (is_icmp) { 5640 /* 5641 * Swap local/remote because this is an ICMP packet. 5642 */ 5643 tmpaddr = sel.ips_local_addr_v6; 5644 sel.ips_local_addr_v6 = sel.ips_remote_addr_v6; 5645 sel.ips_remote_addr_v6 = tmpaddr; 5646 tmpport = sel.ips_local_port; 5647 sel.ips_local_port = sel.ips_remote_port; 5648 sel.ips_remote_port = tmpport; 5649 } 5650 5651 /* find_policy_head() */ 5652 rw_enter(&polhead->iph_lock, RW_READER); 5653 pol = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_INBOUND, 5654 &sel); 5655 rw_exit(&polhead->iph_lock); 5656 if (pol != NULL) { 5657 if (ipsec_mp == NULL || 5658 !((ipsec_in_t *)ipsec_mp->b_rptr)-> 5659 ipsec_in_secure) { 5660 retval = pol->ipsp_act->ipa_allow_clear; 5661 if (!retval) { 5662 /* 5663 * XXX should never get here with 5664 * tunnel reassembled fragments? 5665 */ 5666 ASSERT(message->b_next == NULL); 5667 ip_drop_packet(message, B_TRUE, NULL, 5668 NULL, &ipdrops_spd_got_clear, 5669 &spd_dropper); 5670 } else if (ipsec_mp != NULL) { 5671 freeb(ipsec_mp); 5672 } 5673 5674 IPPOL_REFRELE(pol); 5675 return (retval); 5676 } 5677 /* 5678 * NOTE: The following releases pol's reference and 5679 * calls ip_drop_packet() for me on NULL returns. 5680 * 5681 * "sel" is still good here, so let's use it! 5682 */ 5683 *data_mp = ipsec_check_ipsecin_policy_reasm(message, 5684 pol, inner_ipv4, inner_ipv6, SA_UNIQUE_ID( 5685 sel.ips_remote_port, sel.ips_local_port, 5686 (inner_ipv4 == NULL) ? IPPROTO_IPV6 : 5687 IPPROTO_ENCAP, sel.ips_protocol)); 5688 return (*data_mp != NULL); 5689 } 5690 5691 /* 5692 * Else fallthru and check the global policy on the outer 5693 * header(s) if this tunnel is an old-style transport-mode 5694 * one. Drop the packet explicitly (no policy entry) for 5695 * a new-style tunnel-mode tunnel. 5696 */ 5697 if ((itp->itp_flags & ITPF_P_TUNNEL) && !is_icmp) { 5698 ip_drop_packet_chain(message, B_TRUE, NULL, 5699 NULL, &ipdrops_spd_explicit, &spd_dropper); 5700 return (B_FALSE); 5701 } 5702 } 5703 5704 /* 5705 * NOTE: If we reach here, we will not have packet chains from 5706 * fragcache_add(), because the only way I get chains is on a 5707 * tunnel-mode tunnel, which either returns with a pass, or gets 5708 * hit by the ip_drop_packet_chain() call right above here. 5709 */ 5710 5711 /* If no per-tunnel security, check global policy now. */ 5712 if (ipsec_mp != NULL && 5713 (((outer_ipv4 != NULL) && !ipsec_inbound_v4_policy_present) || 5714 ((outer_ipv6 != NULL) && !ipsec_inbound_v6_policy_present))) { 5715 if (((ipsec_in_t *)(ipsec_mp->b_rptr))-> 5716 ipsec_in_icmp_loopback) { 5717 /* 5718 * This is an ICMP message with an ipsec_mp 5719 * attached. We should accept it. 5720 */ 5721 if (ipsec_mp != NULL) 5722 freeb(ipsec_mp); 5723 return (B_TRUE); 5724 } 5725 5726 ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL, 5727 &ipdrops_spd_got_secure, &spd_dropper); 5728 return (B_FALSE); 5729 } 5730 5731 /* NOTE: Frees message if it returns NULL. */ 5732 if (ipsec_check_global_policy(message, NULL, outer_ipv4, outer_ipv6, 5733 (ipsec_mp != NULL)) == NULL) { 5734 return (B_FALSE); 5735 } 5736 5737 if (ipsec_mp != NULL) 5738 freeb(ipsec_mp); 5739 5740 /* 5741 * At this point, we pretend it's a cleartext accepted 5742 * packet. 5743 */ 5744 return (B_TRUE); 5745 } 5746 5747 /* 5748 * AVL comparison routine for our list of tunnel polheads. 5749 */ 5750 static int 5751 tunnel_compare(const void *arg1, const void *arg2) 5752 { 5753 ipsec_tun_pol_t *left, *right; 5754 int rc; 5755 5756 left = (ipsec_tun_pol_t *)arg1; 5757 right = (ipsec_tun_pol_t *)arg2; 5758 5759 rc = strncmp(left->itp_name, right->itp_name, LIFNAMSIZ); 5760 return (rc == 0 ? rc : (rc > 0 ? 1 : -1)); 5761 } 5762 5763 /* 5764 * Free a tunnel policy node. 5765 */ 5766 void 5767 itp_free(ipsec_tun_pol_t *node) 5768 { 5769 IPPH_REFRELE(node->itp_policy); 5770 IPPH_REFRELE(node->itp_inactive); 5771 mutex_destroy(&node->itp_lock); 5772 kmem_free(node, sizeof (*node)); 5773 } 5774 5775 void 5776 itp_unlink(ipsec_tun_pol_t *node) 5777 { 5778 rw_enter(&tunnel_policy_lock, RW_WRITER); 5779 tunnel_policy_gen++; 5780 ipsec_fragcache_uninit(&node->itp_fragcache); 5781 avl_remove(&tunnel_policies, node); 5782 rw_exit(&tunnel_policy_lock); 5783 ITP_REFRELE(node); 5784 } 5785 5786 /* 5787 * Public interface to look up a tunnel security policy by name. Used by 5788 * spdsock mostly. Returns "node" with a bumped refcnt. 5789 */ 5790 ipsec_tun_pol_t * 5791 get_tunnel_policy(char *name) 5792 { 5793 ipsec_tun_pol_t *node, lookup; 5794 5795 (void) strncpy(lookup.itp_name, name, LIFNAMSIZ); 5796 5797 rw_enter(&tunnel_policy_lock, RW_READER); 5798 node = (ipsec_tun_pol_t *)avl_find(&tunnel_policies, &lookup, NULL); 5799 if (node != NULL) { 5800 ITP_REFHOLD(node); 5801 } 5802 rw_exit(&tunnel_policy_lock); 5803 5804 return (node); 5805 } 5806 5807 /* 5808 * Public interface to walk all tunnel security polcies. Useful for spdsock 5809 * DUMP operations. iterator() will not consume a reference. 5810 */ 5811 void 5812 itp_walk(void (*iterator)(ipsec_tun_pol_t *, void *), void *arg) 5813 { 5814 ipsec_tun_pol_t *node; 5815 5816 rw_enter(&tunnel_policy_lock, RW_READER); 5817 for (node = avl_first(&tunnel_policies); node != NULL; 5818 node = AVL_NEXT(&tunnel_policies, node)) { 5819 iterator(node, arg); 5820 } 5821 rw_exit(&tunnel_policy_lock); 5822 } 5823 5824 /* 5825 * Initialize policy head. This can only fail if there's a memory problem. 5826 */ 5827 static boolean_t 5828 tunnel_polhead_init(ipsec_policy_head_t *iph) 5829 { 5830 rw_init(&iph->iph_lock, NULL, RW_DEFAULT, NULL); 5831 iph->iph_refs = 1; 5832 iph->iph_gen = 0; 5833 if (ipsec_alloc_table(iph, tun_spd_hashsize, KM_SLEEP, B_FALSE) != 0) { 5834 ipsec_polhead_free_table(iph); 5835 return (B_FALSE); 5836 } 5837 ipsec_polhead_init(iph, tun_spd_hashsize); 5838 return (B_TRUE); 5839 } 5840 5841 /* 5842 * Create a tunnel policy node with "name". Set errno with 5843 * ENOMEM if there's a memory problem, and EEXIST if there's an existing 5844 * node. 5845 */ 5846 ipsec_tun_pol_t * 5847 create_tunnel_policy(char *name, int *errno, uint64_t *gen) 5848 { 5849 ipsec_tun_pol_t *newbie, *existing; 5850 avl_index_t where; 5851 5852 newbie = kmem_zalloc(sizeof (*newbie), KM_NOSLEEP); 5853 if (newbie == NULL) { 5854 *errno = ENOMEM; 5855 return (NULL); 5856 } 5857 if (!ipsec_fragcache_init(&newbie->itp_fragcache)) { 5858 kmem_free(newbie, sizeof (*newbie)); 5859 *errno = ENOMEM; 5860 return (NULL); 5861 } 5862 5863 (void) strncpy(newbie->itp_name, name, LIFNAMSIZ); 5864 5865 rw_enter(&tunnel_policy_lock, RW_WRITER); 5866 existing = (ipsec_tun_pol_t *)avl_find(&tunnel_policies, newbie, 5867 &where); 5868 if (existing != NULL) { 5869 itp_free(newbie); 5870 *errno = EEXIST; 5871 rw_exit(&tunnel_policy_lock); 5872 return (NULL); 5873 } 5874 tunnel_policy_gen++; 5875 *gen = tunnel_policy_gen; 5876 newbie->itp_refcnt = 2; /* One for the caller, one for the tree. */ 5877 newbie->itp_next_policy_index = 1; 5878 avl_insert(&tunnel_policies, newbie, where); 5879 mutex_init(&newbie->itp_lock, NULL, MUTEX_DEFAULT, NULL); 5880 newbie->itp_policy = kmem_zalloc(sizeof (ipsec_policy_head_t), 5881 KM_NOSLEEP); 5882 if (newbie->itp_policy == NULL) 5883 goto nomem; 5884 newbie->itp_inactive = kmem_zalloc(sizeof (ipsec_policy_head_t), 5885 KM_NOSLEEP); 5886 if (newbie->itp_inactive == NULL) { 5887 kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t)); 5888 goto nomem; 5889 } 5890 5891 if (!tunnel_polhead_init(newbie->itp_policy)) { 5892 kmem_free(newbie->itp_policy, sizeof (ipsec_policy_head_t)); 5893 kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t)); 5894 goto nomem; 5895 } else if (!tunnel_polhead_init(newbie->itp_inactive)) { 5896 IPPH_REFRELE(newbie->itp_policy); 5897 kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t)); 5898 goto nomem; 5899 } 5900 rw_exit(&tunnel_policy_lock); 5901 5902 return (newbie); 5903 nomem: 5904 *errno = ENOMEM; 5905 kmem_free(newbie, sizeof (*newbie)); 5906 return (NULL); 5907 } 5908 5909 /* 5910 * We can't call the tun_t lookup function until tun is 5911 * loaded, so create a dummy function to avoid symbol 5912 * lookup errors on boot. 5913 */ 5914 /* ARGSUSED */ 5915 ipsec_tun_pol_t * 5916 itp_get_byaddr_dummy(uint32_t *laddr, uint32_t *faddr, int af) 5917 { 5918 return (NULL); /* Always return NULL. */ 5919 } 5920 5921 /* 5922 * Frag cache code, based on SunScreen 3.2 source 5923 * screen/kernel/common/screen_fragcache.c 5924 */ 5925 5926 #define IPSEC_FRAG_TTL_MAX 5 5927 /* 5928 * Note that the following parameters create 256 hash buckets 5929 * with 1024 free entries to be distributed. Things are cleaned 5930 * periodically and are attempted to be cleaned when there is no 5931 * free space, but this system errs on the side of dropping packets 5932 * over creating memory exhaustion. We may decide to make hash 5933 * factor a tunable if this proves to be a bad decision. 5934 */ 5935 #define IPSEC_FRAG_HASH_SLOTS (1<<8) 5936 #define IPSEC_FRAG_HASH_FACTOR 4 5937 #define IPSEC_FRAG_HASH_SIZE (IPSEC_FRAG_HASH_SLOTS * IPSEC_FRAG_HASH_FACTOR) 5938 5939 #define IPSEC_FRAG_HASH_MASK (IPSEC_FRAG_HASH_SLOTS - 1) 5940 #define IPSEC_FRAG_HASH_FUNC(id) (((id) & IPSEC_FRAG_HASH_MASK) ^ \ 5941 (((id) / \ 5942 (ushort_t)IPSEC_FRAG_HASH_SLOTS) & \ 5943 IPSEC_FRAG_HASH_MASK)) 5944 5945 /* Maximum fragments per packet. 48 bytes payload x 1366 packets > 64KB */ 5946 #define IPSEC_MAX_FRAGS 1366 5947 5948 #define V4_FRAG_OFFSET(ipha) ((ntohs(ipha->ipha_fragment_offset_and_flags) & \ 5949 IPH_OFFSET) << 3) 5950 #define V4_MORE_FRAGS(ipha) (ntohs(ipha->ipha_fragment_offset_and_flags) & \ 5951 IPH_MF) 5952 5953 /* 5954 * Initialize an ipsec fragcache instance. 5955 * Returns B_FALSE if memory allocation fails. 5956 */ 5957 boolean_t 5958 ipsec_fragcache_init(ipsec_fragcache_t *frag) 5959 { 5960 ipsec_fragcache_entry_t *ftemp; 5961 int i; 5962 5963 mutex_init(&frag->itpf_lock, NULL, MUTEX_DEFAULT, NULL); 5964 frag->itpf_ptr = (ipsec_fragcache_entry_t **) 5965 kmem_zalloc( 5966 sizeof (ipsec_fragcache_entry_t *) * 5967 IPSEC_FRAG_HASH_SLOTS, KM_NOSLEEP); 5968 if (frag->itpf_ptr == NULL) 5969 return (B_FALSE); 5970 5971 ftemp = (ipsec_fragcache_entry_t *) 5972 kmem_zalloc(sizeof (ipsec_fragcache_entry_t) * 5973 IPSEC_FRAG_HASH_SIZE, KM_NOSLEEP); 5974 if (ftemp == NULL) { 5975 kmem_free(frag->itpf_ptr, 5976 sizeof (ipsec_fragcache_entry_t *) * 5977 IPSEC_FRAG_HASH_SLOTS); 5978 return (B_FALSE); 5979 } 5980 5981 frag->itpf_freelist = NULL; 5982 5983 for (i = 0; i < IPSEC_FRAG_HASH_SIZE; i++) { 5984 ftemp->itpfe_next = frag->itpf_freelist; 5985 frag->itpf_freelist = ftemp; 5986 ftemp++; 5987 } 5988 5989 frag->itpf_expire_hint = 0; 5990 5991 return (B_TRUE); 5992 } 5993 5994 void 5995 ipsec_fragcache_uninit(ipsec_fragcache_t *frag) 5996 { 5997 ipsec_fragcache_entry_t *fep; 5998 int i; 5999 6000 mutex_enter(&frag->itpf_lock); 6001 if (frag->itpf_ptr) { 6002 /* Delete any existing fragcache entry chains */ 6003 for (i = 0; i < IPSEC_FRAG_HASH_SLOTS; i++) { 6004 fep = (frag->itpf_ptr)[i]; 6005 while (fep != NULL) { 6006 /* Returned fep is next in chain or NULL */ 6007 fep = fragcache_delentry(i, fep, frag); 6008 } 6009 } 6010 /* 6011 * Chase the pointers back to the beginning 6012 * of the memory allocation and then 6013 * get rid of the allocated freelist 6014 */ 6015 while (frag->itpf_freelist->itpfe_next != NULL) 6016 frag->itpf_freelist = frag->itpf_freelist->itpfe_next; 6017 /* 6018 * XXX - If we ever dynamically grow the freelist 6019 * then we'll have to free entries individually 6020 * or determine how many entries or chunks we have 6021 * grown since the initial allocation. 6022 */ 6023 kmem_free(frag->itpf_freelist, 6024 sizeof (ipsec_fragcache_entry_t) * 6025 IPSEC_FRAG_HASH_SIZE); 6026 /* Free the fragcache structure */ 6027 kmem_free(frag->itpf_ptr, 6028 sizeof (ipsec_fragcache_entry_t *) * 6029 IPSEC_FRAG_HASH_SLOTS); 6030 } 6031 mutex_exit(&frag->itpf_lock); 6032 mutex_destroy(&frag->itpf_lock); 6033 } 6034 6035 /* 6036 * Add a fragment to the fragment cache. Consumes mp if NULL is returned. 6037 * Returns mp if a whole fragment has been assembled, NULL otherwise 6038 */ 6039 6040 mblk_t * 6041 ipsec_fragcache_add(ipsec_fragcache_t *frag, mblk_t *ipsec_mp, mblk_t *mp, 6042 int outer_hdr_len) 6043 { 6044 boolean_t is_v4; 6045 time_t itpf_time; 6046 ipha_t *iph; 6047 ipha_t *oiph; 6048 ip6_t *ip6h = NULL; 6049 uint8_t v6_proto; 6050 uint8_t *v6_proto_p; 6051 uint16_t ip6_hdr_length; 6052 ip6_pkt_t ipp; 6053 ip6_frag_t *fraghdr; 6054 ipsec_fragcache_entry_t *fep; 6055 int i; 6056 mblk_t *nmp, *prevmp, *spare_mp = NULL; 6057 int firstbyte, lastbyte; 6058 int offset; 6059 int last; 6060 boolean_t inbound = (ipsec_mp != NULL); 6061 mblk_t *first_mp = inbound ? ipsec_mp : mp; 6062 6063 mutex_enter(&frag->itpf_lock); 6064 6065 oiph = (ipha_t *)mp->b_rptr; 6066 iph = (ipha_t *)(mp->b_rptr + outer_hdr_len); 6067 if (IPH_HDR_VERSION(iph) == IPV4_VERSION) { 6068 is_v4 = B_TRUE; 6069 } else { 6070 ASSERT(IPH_HDR_VERSION(iph) == IPV6_VERSION); 6071 if ((spare_mp = msgpullup(mp, -1)) == NULL) { 6072 mutex_exit(&frag->itpf_lock); 6073 ip_drop_packet(first_mp, inbound, NULL, NULL, 6074 &ipdrops_spd_nomem, &spd_dropper); 6075 return (NULL); 6076 } 6077 ip6h = (ip6_t *)(spare_mp->b_rptr + outer_hdr_len); 6078 6079 if (!ip_hdr_length_nexthdr_v6(spare_mp, ip6h, &ip6_hdr_length, 6080 &v6_proto_p)) { 6081 /* 6082 * Find upper layer protocol. 6083 * If it fails we have a malformed packet 6084 */ 6085 mutex_exit(&frag->itpf_lock); 6086 ip_drop_packet(first_mp, inbound, NULL, NULL, 6087 &ipdrops_spd_malformed_packet, &spd_dropper); 6088 freemsg(spare_mp); 6089 return (NULL); 6090 } else { 6091 v6_proto = *v6_proto_p; 6092 } 6093 6094 6095 bzero(&ipp, sizeof (ipp)); 6096 (void) ip_find_hdr_v6(spare_mp, ip6h, &ipp, NULL); 6097 if (!(ipp.ipp_fields & IPPF_FRAGHDR)) { 6098 /* 6099 * We think this is a fragment, but didn't find 6100 * a fragment header. Something is wrong. 6101 */ 6102 mutex_exit(&frag->itpf_lock); 6103 ip_drop_packet(first_mp, inbound, NULL, NULL, 6104 &ipdrops_spd_malformed_frag, &spd_dropper); 6105 freemsg(spare_mp); 6106 return (NULL); 6107 } 6108 fraghdr = ipp.ipp_fraghdr; 6109 is_v4 = B_FALSE; 6110 } 6111 6112 /* Anything to cleanup? */ 6113 6114 /* 6115 * This cleanup call could be put in a timer loop 6116 * but it may actually be just as reasonable a decision to 6117 * leave it here. The disadvantage is this only gets called when 6118 * frags are added. The advantage is that it is not 6119 * susceptible to race conditions like a time-based cleanup 6120 * may be. 6121 */ 6122 itpf_time = gethrestime_sec(); 6123 if (itpf_time >= frag->itpf_expire_hint) 6124 ipsec_fragcache_clean(frag); 6125 6126 /* Lookup to see if there is an existing entry */ 6127 6128 if (is_v4) 6129 i = IPSEC_FRAG_HASH_FUNC(iph->ipha_ident); 6130 else 6131 i = IPSEC_FRAG_HASH_FUNC(fraghdr->ip6f_ident); 6132 6133 for (fep = (frag->itpf_ptr)[i]; fep; fep = fep->itpfe_next) { 6134 if (is_v4) { 6135 ASSERT(iph != NULL); 6136 if ((fep->itpfe_id == iph->ipha_ident) && 6137 (fep->itpfe_src == iph->ipha_src) && 6138 (fep->itpfe_dst == iph->ipha_dst) && 6139 (fep->itpfe_proto == iph->ipha_protocol)) 6140 break; 6141 } else { 6142 ASSERT(fraghdr != NULL); 6143 ASSERT(fep != NULL); 6144 if ((fep->itpfe_id == fraghdr->ip6f_ident) && 6145 IN6_ARE_ADDR_EQUAL(&fep->itpfe_src6, 6146 &ip6h->ip6_src) && 6147 IN6_ARE_ADDR_EQUAL(&fep->itpfe_dst6, 6148 &ip6h->ip6_dst) && (fep->itpfe_proto == v6_proto)) 6149 break; 6150 } 6151 } 6152 6153 if (is_v4) { 6154 firstbyte = V4_FRAG_OFFSET(iph); 6155 lastbyte = firstbyte + ntohs(iph->ipha_length) - 6156 IPH_HDR_LENGTH(iph); 6157 last = (V4_MORE_FRAGS(iph) == 0); 6158 #ifdef FRAGCACHE_DEBUG 6159 cmn_err(CE_WARN, "V4 fragcache: firstbyte = %d, lastbyte = %d, " 6160 "last = %d, id = %d\n", firstbyte, lastbyte, last, 6161 iph->ipha_ident); 6162 #endif 6163 } else { 6164 firstbyte = ntohs(fraghdr->ip6f_offlg & IP6F_OFF_MASK); 6165 lastbyte = firstbyte + ntohs(ip6h->ip6_plen) + 6166 sizeof (ip6_t) - ip6_hdr_length; 6167 last = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG) == 0; 6168 #ifdef FRAGCACHE_DEBUG 6169 cmn_err(CE_WARN, "V6 fragcache: firstbyte = %d, lastbyte = %d, " 6170 "last = %d, id = %d, fraghdr = %p, spare_mp = %p\n", 6171 firstbyte, lastbyte, last, fraghdr->ip6f_ident, 6172 fraghdr, spare_mp); 6173 #endif 6174 } 6175 6176 /* check for bogus fragments and delete the entry */ 6177 if (firstbyte > 0 && firstbyte <= 8) { 6178 if (fep != NULL) 6179 (void) fragcache_delentry(i, fep, frag); 6180 mutex_exit(&frag->itpf_lock); 6181 ip_drop_packet(first_mp, inbound, NULL, NULL, 6182 &ipdrops_spd_malformed_frag, &spd_dropper); 6183 freemsg(spare_mp); 6184 return (NULL); 6185 } 6186 6187 /* Not found, allocate a new entry */ 6188 if (fep == NULL) { 6189 if (frag->itpf_freelist == NULL) { 6190 /* see if there is some space */ 6191 ipsec_fragcache_clean(frag); 6192 if (frag->itpf_freelist == NULL) { 6193 mutex_exit(&frag->itpf_lock); 6194 ip_drop_packet(first_mp, inbound, NULL, NULL, 6195 &ipdrops_spd_nomem, &spd_dropper); 6196 freemsg(spare_mp); 6197 return (NULL); 6198 } 6199 } 6200 6201 fep = frag->itpf_freelist; 6202 frag->itpf_freelist = fep->itpfe_next; 6203 6204 if (is_v4) { 6205 bcopy((caddr_t)&iph->ipha_src, (caddr_t)&fep->itpfe_src, 6206 sizeof (struct in_addr)); 6207 bcopy((caddr_t)&iph->ipha_dst, (caddr_t)&fep->itpfe_dst, 6208 sizeof (struct in_addr)); 6209 fep->itpfe_id = iph->ipha_ident; 6210 fep->itpfe_proto = iph->ipha_protocol; 6211 i = IPSEC_FRAG_HASH_FUNC(fep->itpfe_id); 6212 } else { 6213 bcopy((in6_addr_t *)&ip6h->ip6_src, 6214 (in6_addr_t *)&fep->itpfe_src6, 6215 sizeof (struct in6_addr)); 6216 bcopy((in6_addr_t *)&ip6h->ip6_dst, 6217 (in6_addr_t *)&fep->itpfe_dst6, 6218 sizeof (struct in6_addr)); 6219 fep->itpfe_id = fraghdr->ip6f_ident; 6220 fep->itpfe_proto = v6_proto; 6221 i = IPSEC_FRAG_HASH_FUNC(fep->itpfe_id); 6222 } 6223 itpf_time = gethrestime_sec(); 6224 fep->itpfe_exp = itpf_time + IPSEC_FRAG_TTL_MAX + 1; 6225 fep->itpfe_last = 0; 6226 fep->itpfe_fraglist = NULL; 6227 fep->itpfe_depth = 0; 6228 fep->itpfe_next = (frag->itpf_ptr)[i]; 6229 (frag->itpf_ptr)[i] = fep; 6230 6231 if (frag->itpf_expire_hint > fep->itpfe_exp) 6232 frag->itpf_expire_hint = fep->itpfe_exp; 6233 6234 } 6235 freemsg(spare_mp); 6236 6237 /* Insert it in the frag list */ 6238 /* List is in order by starting offset of fragments */ 6239 6240 prevmp = NULL; 6241 for (nmp = fep->itpfe_fraglist; nmp; nmp = nmp->b_next) { 6242 ipha_t *niph; 6243 ipha_t *oniph; 6244 ip6_t *nip6h; 6245 ip6_pkt_t nipp; 6246 ip6_frag_t *nfraghdr; 6247 uint16_t nip6_hdr_length; 6248 uint8_t *nv6_proto_p; 6249 int nfirstbyte, nlastbyte; 6250 char *data, *ndata; 6251 mblk_t *nspare_mp = NULL; 6252 mblk_t *ndata_mp = (inbound ? nmp->b_cont : nmp); 6253 int hdr_len; 6254 6255 oniph = (ipha_t *)mp->b_rptr; 6256 nip6h = NULL; 6257 niph = NULL; 6258 6259 /* 6260 * Determine outer header type and length and set 6261 * pointers appropriately 6262 */ 6263 6264 if (IPH_HDR_VERSION(oniph) == IPV4_VERSION) { 6265 hdr_len = ((outer_hdr_len != 0) ? 6266 IPH_HDR_LENGTH(oiph) : 0); 6267 niph = (ipha_t *)(ndata_mp->b_rptr + hdr_len); 6268 } else { 6269 ASSERT(IPH_HDR_VERSION(oniph) == IPV6_VERSION); 6270 if ((nspare_mp = msgpullup(ndata_mp, -1)) == NULL) { 6271 mutex_exit(&frag->itpf_lock); 6272 ip_drop_packet_chain(nmp, inbound, NULL, NULL, 6273 &ipdrops_spd_nomem, &spd_dropper); 6274 return (NULL); 6275 } 6276 nip6h = (ip6_t *)nspare_mp->b_rptr; 6277 (void) ip_hdr_length_nexthdr_v6(nspare_mp, nip6h, 6278 &nip6_hdr_length, &v6_proto_p); 6279 hdr_len = ((outer_hdr_len != 0) ? nip6_hdr_length : 0); 6280 } 6281 6282 /* 6283 * Determine inner header type and length and set 6284 * pointers appropriately 6285 */ 6286 6287 if (is_v4) { 6288 if (niph == NULL) { 6289 /* Was v6 outer */ 6290 niph = (ipha_t *)(ndata_mp->b_rptr + hdr_len); 6291 } 6292 nfirstbyte = V4_FRAG_OFFSET(niph); 6293 nlastbyte = nfirstbyte + ntohs(niph->ipha_length) - 6294 IPH_HDR_LENGTH(niph); 6295 } else { 6296 if ((nspare_mp == NULL) && 6297 ((nspare_mp = msgpullup(ndata_mp, -1)) == NULL)) { 6298 mutex_exit(&frag->itpf_lock); 6299 ip_drop_packet_chain(nmp, inbound, NULL, NULL, 6300 &ipdrops_spd_nomem, &spd_dropper); 6301 return (NULL); 6302 } 6303 nip6h = (ip6_t *)(nspare_mp->b_rptr + hdr_len); 6304 if (!ip_hdr_length_nexthdr_v6(nspare_mp, nip6h, 6305 &nip6_hdr_length, &nv6_proto_p)) { 6306 mutex_exit(&frag->itpf_lock); 6307 ip_drop_packet_chain(nmp, inbound, NULL, NULL, 6308 &ipdrops_spd_malformed_frag, &spd_dropper); 6309 ipsec_freemsg_chain(nspare_mp); 6310 return (NULL); 6311 } 6312 bzero(&nipp, sizeof (nipp)); 6313 (void) ip_find_hdr_v6(nspare_mp, nip6h, &nipp, NULL); 6314 nfraghdr = nipp.ipp_fraghdr; 6315 nfirstbyte = ntohs(nfraghdr->ip6f_offlg & 6316 IP6F_OFF_MASK); 6317 nlastbyte = nfirstbyte + ntohs(nip6h->ip6_plen) + 6318 sizeof (ip6_t) - nip6_hdr_length; 6319 } 6320 ipsec_freemsg_chain(nspare_mp); 6321 6322 /* Check for overlapping fragments */ 6323 if (firstbyte >= nfirstbyte && firstbyte < nlastbyte) { 6324 /* 6325 * Overlap Check: 6326 * ~~~~--------- # Check if the newly 6327 * ~ ndata_mp| # received fragment 6328 * ~~~~--------- # overlaps with the 6329 * ---------~~~~~~ # current fragment. 6330 * | mp ~ 6331 * ---------~~~~~~ 6332 */ 6333 if (is_v4) { 6334 data = (char *)iph + IPH_HDR_LENGTH(iph) + 6335 firstbyte - nfirstbyte; 6336 ndata = (char *)niph + IPH_HDR_LENGTH(niph); 6337 } else { 6338 data = (char *)ip6h + 6339 nip6_hdr_length + firstbyte - 6340 nfirstbyte; 6341 ndata = (char *)nip6h + nip6_hdr_length; 6342 } 6343 if (bcmp(data, ndata, MIN(lastbyte, nlastbyte) 6344 - firstbyte)) { 6345 /* Overlapping data does not match */ 6346 (void) fragcache_delentry(i, fep, frag); 6347 mutex_exit(&frag->itpf_lock); 6348 ip_drop_packet(first_mp, inbound, NULL, NULL, 6349 &ipdrops_spd_overlap_frag, &spd_dropper); 6350 return (NULL); 6351 } 6352 /* Part of defense for jolt2.c fragmentation attack */ 6353 if (firstbyte >= nfirstbyte && lastbyte <= nlastbyte) { 6354 /* 6355 * Check for identical or subset fragments: 6356 * ---------- ~~~~--------~~~~~ 6357 * | nmp | or ~ nmp ~ 6358 * ---------- ~~~~--------~~~~~ 6359 * ---------- ------ 6360 * | mp | | mp | 6361 * ---------- ------ 6362 */ 6363 mutex_exit(&frag->itpf_lock); 6364 ip_drop_packet(first_mp, inbound, NULL, NULL, 6365 &ipdrops_spd_evil_frag, &spd_dropper); 6366 return (NULL); 6367 } 6368 6369 } 6370 6371 /* Correct location for this fragment? */ 6372 if (firstbyte <= nfirstbyte) { 6373 /* 6374 * Check if the tail end of the new fragment overlaps 6375 * with the head of the current fragment. 6376 * --------~~~~~~~ 6377 * | nmp ~ 6378 * --------~~~~~~~ 6379 * ~~~~~-------- 6380 * ~ mp | 6381 * ~~~~~-------- 6382 */ 6383 if (lastbyte > nfirstbyte) { 6384 /* Fragments overlap */ 6385 data = (char *)iph + IPH_HDR_LENGTH(iph) + 6386 firstbyte - nfirstbyte; 6387 ndata = (char *)niph + IPH_HDR_LENGTH(niph); 6388 if (is_v4) { 6389 data = (char *)iph + 6390 IPH_HDR_LENGTH(iph) + firstbyte - 6391 nfirstbyte; 6392 ndata = (char *)niph + 6393 IPH_HDR_LENGTH(niph); 6394 } else { 6395 data = (char *)ip6h + 6396 nip6_hdr_length + firstbyte - 6397 nfirstbyte; 6398 ndata = (char *)nip6h + nip6_hdr_length; 6399 } 6400 if (bcmp(data, ndata, MIN(lastbyte, nlastbyte) 6401 - nfirstbyte)) { 6402 /* Overlap mismatch */ 6403 (void) fragcache_delentry(i, fep, frag); 6404 mutex_exit(&frag->itpf_lock); 6405 ip_drop_packet(first_mp, inbound, NULL, 6406 NULL, &ipdrops_spd_overlap_frag, 6407 &spd_dropper); 6408 return (NULL); 6409 } 6410 } 6411 6412 /* 6413 * Fragment does not illegally overlap and can now 6414 * be inserted into the chain 6415 */ 6416 break; 6417 } 6418 6419 prevmp = nmp; 6420 } 6421 first_mp->b_next = nmp; 6422 6423 if (prevmp == NULL) { 6424 fep->itpfe_fraglist = first_mp; 6425 } else { 6426 prevmp->b_next = first_mp; 6427 } 6428 if (last) 6429 fep->itpfe_last = 1; 6430 6431 /* Part of defense for jolt2.c fragmentation attack */ 6432 if (++(fep->itpfe_depth) > IPSEC_MAX_FRAGS) { 6433 (void) fragcache_delentry(i, fep, frag); 6434 mutex_exit(&frag->itpf_lock); 6435 ip_drop_packet(first_mp, inbound, NULL, NULL, 6436 &ipdrops_spd_max_frags, &spd_dropper); 6437 return (NULL); 6438 } 6439 6440 /* Check for complete packet */ 6441 6442 if (!fep->itpfe_last) { 6443 mutex_exit(&frag->itpf_lock); 6444 #ifdef FRAGCACHE_DEBUG 6445 cmn_err(CE_WARN, "Fragment cached, not last.\n"); 6446 #endif 6447 return (NULL); 6448 } 6449 6450 #ifdef FRAGCACHE_DEBUG 6451 cmn_err(CE_WARN, "Last fragment cached.\n"); 6452 cmn_err(CE_WARN, "mp = %p, first_mp = %p.\n", mp, first_mp); 6453 #endif 6454 6455 offset = 0; 6456 for (mp = fep->itpfe_fraglist; mp; mp = mp->b_next) { 6457 mblk_t *data_mp = (inbound ? mp->b_cont : mp); 6458 int hdr_len; 6459 6460 oiph = (ipha_t *)data_mp->b_rptr; 6461 ip6h = NULL; 6462 iph = NULL; 6463 6464 spare_mp = NULL; 6465 if (IPH_HDR_VERSION(oiph) == IPV4_VERSION) { 6466 hdr_len = ((outer_hdr_len != 0) ? 6467 IPH_HDR_LENGTH(oiph) : 0); 6468 iph = (ipha_t *)(data_mp->b_rptr + hdr_len); 6469 } else { 6470 ASSERT(IPH_HDR_VERSION(oiph) == IPV6_VERSION); 6471 if ((spare_mp = msgpullup(data_mp, -1)) == NULL) { 6472 mutex_exit(&frag->itpf_lock); 6473 ip_drop_packet_chain(mp, inbound, NULL, NULL, 6474 &ipdrops_spd_nomem, &spd_dropper); 6475 return (NULL); 6476 } 6477 ip6h = (ip6_t *)spare_mp->b_rptr; 6478 (void) ip_hdr_length_nexthdr_v6(spare_mp, ip6h, 6479 &ip6_hdr_length, &v6_proto_p); 6480 hdr_len = ((outer_hdr_len != 0) ? ip6_hdr_length : 0); 6481 } 6482 6483 /* Calculate current fragment start/end */ 6484 if (is_v4) { 6485 if (iph == NULL) { 6486 /* Was v6 outer */ 6487 iph = (ipha_t *)(data_mp->b_rptr + hdr_len); 6488 } 6489 firstbyte = V4_FRAG_OFFSET(iph); 6490 lastbyte = firstbyte + ntohs(iph->ipha_length) - 6491 IPH_HDR_LENGTH(iph); 6492 } else { 6493 if ((spare_mp == NULL) && 6494 ((spare_mp = msgpullup(data_mp, -1)) == NULL)) { 6495 mutex_exit(&frag->itpf_lock); 6496 ip_drop_packet_chain(mp, inbound, NULL, NULL, 6497 &ipdrops_spd_nomem, &spd_dropper); 6498 return (NULL); 6499 } 6500 ip6h = (ip6_t *)(spare_mp->b_rptr + hdr_len); 6501 if (!ip_hdr_length_nexthdr_v6(spare_mp, ip6h, 6502 &ip6_hdr_length, &v6_proto_p)) { 6503 mutex_exit(&frag->itpf_lock); 6504 ip_drop_packet_chain(mp, inbound, NULL, NULL, 6505 &ipdrops_spd_malformed_frag, &spd_dropper); 6506 ipsec_freemsg_chain(spare_mp); 6507 return (NULL); 6508 } 6509 v6_proto = *v6_proto_p; 6510 bzero(&ipp, sizeof (ipp)); 6511 (void) ip_find_hdr_v6(spare_mp, ip6h, &ipp, NULL); 6512 fraghdr = ipp.ipp_fraghdr; 6513 firstbyte = ntohs(fraghdr->ip6f_offlg & 6514 IP6F_OFF_MASK); 6515 lastbyte = firstbyte + ntohs(ip6h->ip6_plen) + 6516 sizeof (ip6_t) - ip6_hdr_length; 6517 } 6518 6519 /* 6520 * If this fragment is greater than current offset, 6521 * we have a missing fragment so return NULL 6522 */ 6523 if (firstbyte > offset) { 6524 mutex_exit(&frag->itpf_lock); 6525 #ifdef FRAGCACHE_DEBUG 6526 /* 6527 * Note, this can happen when the last frag 6528 * gets sent through because it is smaller 6529 * than the MTU. It is not necessarily an 6530 * error condition. 6531 */ 6532 cmn_err(CE_WARN, "Frag greater than offset! : " 6533 "missing fragment: firstbyte = %d, offset = %d, " 6534 "mp = %p\n", firstbyte, offset, mp); 6535 #endif 6536 ipsec_freemsg_chain(spare_mp); 6537 return (NULL); 6538 } 6539 6540 /* 6541 * If we are at the last fragment, we have the complete 6542 * packet, so rechain things and return it to caller 6543 * for processing 6544 */ 6545 6546 if ((is_v4 && !V4_MORE_FRAGS(iph)) || 6547 (!is_v4 && !(fraghdr->ip6f_offlg & IP6F_MORE_FRAG))) { 6548 mp = fep->itpfe_fraglist; 6549 fep->itpfe_fraglist = NULL; 6550 (void) fragcache_delentry(i, fep, frag); 6551 mutex_exit(&frag->itpf_lock); 6552 6553 if ((is_v4 && (firstbyte + ntohs(iph->ipha_length) > 6554 65535)) || (!is_v4 && (firstbyte + 6555 ntohs(ip6h->ip6_plen) > 65535))) { 6556 /* It is an invalid "ping-o-death" packet */ 6557 /* Discard it */ 6558 ip_drop_packet_chain(mp, inbound, NULL, NULL, 6559 &ipdrops_spd_evil_frag, &spd_dropper); 6560 ipsec_freemsg_chain(spare_mp); 6561 return (NULL); 6562 } 6563 #ifdef FRAGCACHE_DEBUG 6564 cmn_err(CE_WARN, "Fragcache returning mp = %p, " 6565 "mp->b_next = %p", mp, mp->b_next); 6566 #endif 6567 ipsec_freemsg_chain(spare_mp); 6568 /* 6569 * For inbound case, mp has ipsec_in b_next'd chain 6570 * For outbound case, it is just data mp chain 6571 */ 6572 return (mp); 6573 } 6574 ipsec_freemsg_chain(spare_mp); 6575 6576 /* 6577 * Update new ending offset if this 6578 * fragment extends the packet 6579 */ 6580 if (offset < lastbyte) 6581 offset = lastbyte; 6582 } 6583 6584 mutex_exit(&frag->itpf_lock); 6585 6586 /* Didn't find last fragment, so return NULL */ 6587 return (NULL); 6588 } 6589 6590 static void 6591 ipsec_fragcache_clean(ipsec_fragcache_t *frag) 6592 { 6593 ipsec_fragcache_entry_t *fep; 6594 int i; 6595 ipsec_fragcache_entry_t *earlyfep = NULL; 6596 time_t itpf_time; 6597 int earlyexp; 6598 int earlyi = 0; 6599 6600 ASSERT(MUTEX_HELD(&frag->itpf_lock)); 6601 6602 itpf_time = gethrestime_sec(); 6603 earlyexp = itpf_time + 10000; 6604 6605 for (i = 0; i < IPSEC_FRAG_HASH_SLOTS; i++) { 6606 fep = (frag->itpf_ptr)[i]; 6607 while (fep) { 6608 if (fep->itpfe_exp < itpf_time) { 6609 /* found */ 6610 fep = fragcache_delentry(i, fep, frag); 6611 } else { 6612 if (fep->itpfe_exp < earlyexp) { 6613 earlyfep = fep; 6614 earlyexp = fep->itpfe_exp; 6615 earlyi = i; 6616 } 6617 fep = fep->itpfe_next; 6618 } 6619 } 6620 } 6621 6622 frag->itpf_expire_hint = earlyexp; 6623 6624 /* if (!found) */ 6625 if (frag->itpf_freelist == NULL) 6626 (void) fragcache_delentry(earlyi, earlyfep, frag); 6627 } 6628 6629 static ipsec_fragcache_entry_t * 6630 fragcache_delentry(int slot, ipsec_fragcache_entry_t *fep, 6631 ipsec_fragcache_t *frag) 6632 { 6633 ipsec_fragcache_entry_t *targp; 6634 ipsec_fragcache_entry_t *nextp = fep->itpfe_next; 6635 6636 ASSERT(MUTEX_HELD(&frag->itpf_lock)); 6637 6638 /* Free up any fragment list still in cache entry */ 6639 ipsec_freemsg_chain(fep->itpfe_fraglist); 6640 6641 targp = (frag->itpf_ptr)[slot]; 6642 ASSERT(targp != 0); 6643 6644 if (targp == fep) { 6645 /* unlink from head of hash chain */ 6646 (frag->itpf_ptr)[slot] = nextp; 6647 /* link into free list */ 6648 fep->itpfe_next = frag->itpf_freelist; 6649 frag->itpf_freelist = fep; 6650 return (nextp); 6651 } 6652 6653 /* maybe should use double linked list to make update faster */ 6654 /* must be past front of chain */ 6655 while (targp) { 6656 if (targp->itpfe_next == fep) { 6657 /* unlink from hash chain */ 6658 targp->itpfe_next = nextp; 6659 /* link into free list */ 6660 fep->itpfe_next = frag->itpf_freelist; 6661 frag->itpf_freelist = fep; 6662 return (nextp); 6663 } 6664 targp = targp->itpfe_next; 6665 ASSERT(targp != 0); 6666 } 6667 /* NOTREACHED */ 6668 return (NULL); 6669 } 6670