1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * IPsec Security Policy Database. 28 * 29 * This module maintains the SPD and provides routines used by ip and ip6 30 * to apply IPsec policy to inbound and outbound datagrams. 31 */ 32 33 #include <sys/types.h> 34 #include <sys/stream.h> 35 #include <sys/stropts.h> 36 #include <sys/sysmacros.h> 37 #include <sys/strsubr.h> 38 #include <sys/strsun.h> 39 #include <sys/strlog.h> 40 #include <sys/strsun.h> 41 #include <sys/cmn_err.h> 42 #include <sys/zone.h> 43 44 #include <sys/systm.h> 45 #include <sys/param.h> 46 #include <sys/kmem.h> 47 #include <sys/ddi.h> 48 49 #include <sys/crypto/api.h> 50 51 #include <inet/common.h> 52 #include <inet/mi.h> 53 54 #include <netinet/ip6.h> 55 #include <netinet/icmp6.h> 56 #include <netinet/udp.h> 57 58 #include <inet/ip.h> 59 #include <inet/ip6.h> 60 61 #include <net/pfkeyv2.h> 62 #include <net/pfpolicy.h> 63 #include <inet/sadb.h> 64 #include <inet/ipsec_impl.h> 65 66 #include <inet/ip_impl.h> /* For IP_MOD_ID */ 67 68 #include <inet/ipsecah.h> 69 #include <inet/ipsecesp.h> 70 #include <inet/ipdrop.h> 71 #include <inet/ipclassifier.h> 72 #include <inet/iptun.h> 73 #include <inet/iptun/iptun_impl.h> 74 75 static void ipsec_update_present_flags(ipsec_stack_t *); 76 static ipsec_act_t *ipsec_act_wildcard_expand(ipsec_act_t *, uint_t *, 77 netstack_t *); 78 static mblk_t *ipsec_check_ipsecin_policy(mblk_t *, ipsec_policy_t *, 79 ipha_t *, ip6_t *, uint64_t, ip_recv_attr_t *, netstack_t *); 80 static void ipsec_action_free_table(ipsec_action_t *); 81 static void ipsec_action_reclaim(void *); 82 static void ipsec_action_reclaim_stack(ipsec_stack_t *); 83 static void ipsid_init(netstack_t *); 84 static void ipsid_fini(netstack_t *); 85 86 /* sel_flags values for ipsec_init_inbound_sel(). */ 87 #define SEL_NONE 0x0000 88 #define SEL_PORT_POLICY 0x0001 89 #define SEL_IS_ICMP 0x0002 90 #define SEL_TUNNEL_MODE 0x0004 91 #define SEL_POST_FRAG 0x0008 92 93 /* Return values for ipsec_init_inbound_sel(). */ 94 typedef enum { SELRET_NOMEM, SELRET_BADPKT, SELRET_SUCCESS, SELRET_TUNFRAG} 95 selret_t; 96 97 static selret_t ipsec_init_inbound_sel(ipsec_selector_t *, mblk_t *, 98 ipha_t *, ip6_t *, uint8_t); 99 100 static boolean_t ipsec_check_ipsecin_action(ip_recv_attr_t *, mblk_t *, 101 struct ipsec_action_s *, ipha_t *ipha, ip6_t *ip6h, const char **, 102 kstat_named_t **, netstack_t *); 103 static void ipsec_unregister_prov_update(void); 104 static void ipsec_prov_update_callback_stack(uint32_t, void *, netstack_t *); 105 static boolean_t ipsec_compare_action(ipsec_policy_t *, ipsec_policy_t *); 106 static uint32_t selector_hash(ipsec_selector_t *, ipsec_policy_root_t *); 107 static boolean_t ipsec_kstat_init(ipsec_stack_t *); 108 static void ipsec_kstat_destroy(ipsec_stack_t *); 109 static int ipsec_free_tables(ipsec_stack_t *); 110 static int tunnel_compare(const void *, const void *); 111 static void ipsec_freemsg_chain(mblk_t *); 112 static void ip_drop_packet_chain(mblk_t *, boolean_t, ill_t *, 113 struct kstat_named *, ipdropper_t *); 114 static boolean_t ipsec_kstat_init(ipsec_stack_t *); 115 static void ipsec_kstat_destroy(ipsec_stack_t *); 116 static int ipsec_free_tables(ipsec_stack_t *); 117 static int tunnel_compare(const void *, const void *); 118 static void ipsec_freemsg_chain(mblk_t *); 119 120 /* 121 * Selector hash table is statically sized at module load time. 122 * we default to 251 buckets, which is the largest prime number under 255 123 */ 124 125 #define IPSEC_SPDHASH_DEFAULT 251 126 127 /* SPD hash-size tunable per tunnel. */ 128 #define TUN_SPDHASH_DEFAULT 5 129 130 uint32_t ipsec_spd_hashsize; 131 uint32_t tun_spd_hashsize; 132 133 #define IPSEC_SEL_NOHASH ((uint32_t)(~0)) 134 135 /* 136 * Handle global across all stack instances 137 */ 138 static crypto_notify_handle_t prov_update_handle = NULL; 139 140 static kmem_cache_t *ipsec_action_cache; 141 static kmem_cache_t *ipsec_sel_cache; 142 static kmem_cache_t *ipsec_pol_cache; 143 144 /* Frag cache prototypes */ 145 static void ipsec_fragcache_clean(ipsec_fragcache_t *, ipsec_stack_t *); 146 static ipsec_fragcache_entry_t *fragcache_delentry(int, 147 ipsec_fragcache_entry_t *, ipsec_fragcache_t *, ipsec_stack_t *); 148 boolean_t ipsec_fragcache_init(ipsec_fragcache_t *); 149 void ipsec_fragcache_uninit(ipsec_fragcache_t *, ipsec_stack_t *ipss); 150 mblk_t *ipsec_fragcache_add(ipsec_fragcache_t *, mblk_t *, mblk_t *, 151 int, ipsec_stack_t *); 152 153 int ipsec_hdr_pullup_needed = 0; 154 int ipsec_weird_null_inbound_policy = 0; 155 156 #define ALGBITS_ROUND_DOWN(x, align) (((x)/(align))*(align)) 157 #define ALGBITS_ROUND_UP(x, align) ALGBITS_ROUND_DOWN((x)+(align)-1, align) 158 159 /* 160 * Inbound traffic should have matching identities for both SA's. 161 */ 162 163 #define SA_IDS_MATCH(sa1, sa2) \ 164 (((sa1) == NULL) || ((sa2) == NULL) || \ 165 (((sa1)->ipsa_src_cid == (sa2)->ipsa_src_cid) && \ 166 (((sa1)->ipsa_dst_cid == (sa2)->ipsa_dst_cid)))) 167 168 /* 169 * IPv6 Fragments 170 */ 171 #define IS_V6_FRAGMENT(ipp) (ipp.ipp_fields & IPPF_FRAGHDR) 172 173 /* 174 * Policy failure messages. 175 */ 176 static char *ipsec_policy_failure_msgs[] = { 177 178 /* IPSEC_POLICY_NOT_NEEDED */ 179 "%s: Dropping the datagram because the incoming packet " 180 "is %s, but the recipient expects clear; Source %s, " 181 "Destination %s.\n", 182 183 /* IPSEC_POLICY_MISMATCH */ 184 "%s: Policy Failure for the incoming packet (%s); Source %s, " 185 "Destination %s.\n", 186 187 /* IPSEC_POLICY_AUTH_NOT_NEEDED */ 188 "%s: Authentication present while not expected in the " 189 "incoming %s packet; Source %s, Destination %s.\n", 190 191 /* IPSEC_POLICY_ENCR_NOT_NEEDED */ 192 "%s: Encryption present while not expected in the " 193 "incoming %s packet; Source %s, Destination %s.\n", 194 195 /* IPSEC_POLICY_SE_NOT_NEEDED */ 196 "%s: Self-Encapsulation present while not expected in the " 197 "incoming %s packet; Source %s, Destination %s.\n", 198 }; 199 200 /* 201 * General overviews: 202 * 203 * Locking: 204 * 205 * All of the system policy structures are protected by a single 206 * rwlock. These structures are threaded in a 207 * fairly complex fashion and are not expected to change on a 208 * regular basis, so this should not cause scaling/contention 209 * problems. As a result, policy checks should (hopefully) be MT-hot. 210 * 211 * Allocation policy: 212 * 213 * We use custom kmem cache types for the various 214 * bits & pieces of the policy data structures. All allocations 215 * use KM_NOSLEEP instead of KM_SLEEP for policy allocation. The 216 * policy table is of potentially unbounded size, so we don't 217 * want to provide a way to hog all system memory with policy 218 * entries.. 219 */ 220 221 /* Convenient functions for freeing or dropping a b_next linked mblk chain */ 222 223 /* Free all messages in an mblk chain */ 224 static void 225 ipsec_freemsg_chain(mblk_t *mp) 226 { 227 mblk_t *mpnext; 228 while (mp != NULL) { 229 ASSERT(mp->b_prev == NULL); 230 mpnext = mp->b_next; 231 mp->b_next = NULL; 232 freemsg(mp); 233 mp = mpnext; 234 } 235 } 236 237 /* 238 * ip_drop all messages in an mblk chain 239 * Can handle a b_next chain of ip_recv_attr_t mblks, or just a b_next chain 240 * of data. 241 */ 242 static void 243 ip_drop_packet_chain(mblk_t *mp, boolean_t inbound, ill_t *ill, 244 struct kstat_named *counter, ipdropper_t *who_called) 245 { 246 mblk_t *mpnext; 247 while (mp != NULL) { 248 ASSERT(mp->b_prev == NULL); 249 mpnext = mp->b_next; 250 mp->b_next = NULL; 251 if (ip_recv_attr_is_mblk(mp)) 252 mp = ip_recv_attr_free_mblk(mp); 253 ip_drop_packet(mp, inbound, ill, counter, who_called); 254 mp = mpnext; 255 } 256 } 257 258 /* 259 * AVL tree comparison function. 260 * the in-kernel avl assumes unique keys for all objects. 261 * Since sometimes policy will duplicate rules, we may insert 262 * multiple rules with the same rule id, so we need a tie-breaker. 263 */ 264 static int 265 ipsec_policy_cmpbyid(const void *a, const void *b) 266 { 267 const ipsec_policy_t *ipa, *ipb; 268 uint64_t idxa, idxb; 269 270 ipa = (const ipsec_policy_t *)a; 271 ipb = (const ipsec_policy_t *)b; 272 idxa = ipa->ipsp_index; 273 idxb = ipb->ipsp_index; 274 275 if (idxa < idxb) 276 return (-1); 277 if (idxa > idxb) 278 return (1); 279 /* 280 * Tie-breaker #1: All installed policy rules have a non-NULL 281 * ipsl_sel (selector set), so an entry with a NULL ipsp_sel is not 282 * actually in-tree but rather a template node being used in 283 * an avl_find query; see ipsec_policy_delete(). This gives us 284 * a placeholder in the ordering just before the first entry with 285 * a key >= the one we're looking for, so we can walk forward from 286 * that point to get the remaining entries with the same id. 287 */ 288 if ((ipa->ipsp_sel == NULL) && (ipb->ipsp_sel != NULL)) 289 return (-1); 290 if ((ipb->ipsp_sel == NULL) && (ipa->ipsp_sel != NULL)) 291 return (1); 292 /* 293 * At most one of the arguments to the comparison should have a 294 * NULL selector pointer; if not, the tree is broken. 295 */ 296 ASSERT(ipa->ipsp_sel != NULL); 297 ASSERT(ipb->ipsp_sel != NULL); 298 /* 299 * Tie-breaker #2: use the virtual address of the policy node 300 * to arbitrarily break ties. Since we use the new tree node in 301 * the avl_find() in ipsec_insert_always, the new node will be 302 * inserted into the tree in the right place in the sequence. 303 */ 304 if (ipa < ipb) 305 return (-1); 306 if (ipa > ipb) 307 return (1); 308 return (0); 309 } 310 311 /* 312 * Free what ipsec_alloc_table allocated. 313 */ 314 void 315 ipsec_polhead_free_table(ipsec_policy_head_t *iph) 316 { 317 int dir; 318 int i; 319 320 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 321 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 322 323 if (ipr->ipr_hash == NULL) 324 continue; 325 326 for (i = 0; i < ipr->ipr_nchains; i++) { 327 ASSERT(ipr->ipr_hash[i].hash_head == NULL); 328 } 329 kmem_free(ipr->ipr_hash, ipr->ipr_nchains * 330 sizeof (ipsec_policy_hash_t)); 331 ipr->ipr_hash = NULL; 332 } 333 } 334 335 void 336 ipsec_polhead_destroy(ipsec_policy_head_t *iph) 337 { 338 int dir; 339 340 avl_destroy(&iph->iph_rulebyid); 341 rw_destroy(&iph->iph_lock); 342 343 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 344 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 345 int chain; 346 347 for (chain = 0; chain < ipr->ipr_nchains; chain++) 348 mutex_destroy(&(ipr->ipr_hash[chain].hash_lock)); 349 350 } 351 ipsec_polhead_free_table(iph); 352 } 353 354 /* 355 * Free the IPsec stack instance. 356 */ 357 /* ARGSUSED */ 358 static void 359 ipsec_stack_fini(netstackid_t stackid, void *arg) 360 { 361 ipsec_stack_t *ipss = (ipsec_stack_t *)arg; 362 void *cookie; 363 ipsec_tun_pol_t *node; 364 netstack_t *ns = ipss->ipsec_netstack; 365 int i; 366 ipsec_algtype_t algtype; 367 368 ipsec_loader_destroy(ipss); 369 370 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_WRITER); 371 /* 372 * It's possible we can just ASSERT() the tree is empty. After all, 373 * we aren't called until IP is ready to unload (and presumably all 374 * tunnels have been unplumbed). But we'll play it safe for now, the 375 * loop will just exit immediately if it's empty. 376 */ 377 cookie = NULL; 378 while ((node = (ipsec_tun_pol_t *) 379 avl_destroy_nodes(&ipss->ipsec_tunnel_policies, 380 &cookie)) != NULL) { 381 ITP_REFRELE(node, ns); 382 } 383 avl_destroy(&ipss->ipsec_tunnel_policies); 384 rw_exit(&ipss->ipsec_tunnel_policy_lock); 385 rw_destroy(&ipss->ipsec_tunnel_policy_lock); 386 387 ipsec_config_flush(ns); 388 389 ipsec_kstat_destroy(ipss); 390 391 ip_drop_unregister(&ipss->ipsec_dropper); 392 393 ip_drop_unregister(&ipss->ipsec_spd_dropper); 394 ip_drop_destroy(ipss); 395 /* 396 * Globals start with ref == 1 to prevent IPPH_REFRELE() from 397 * attempting to free them, hence they should have 1 now. 398 */ 399 ipsec_polhead_destroy(&ipss->ipsec_system_policy); 400 ASSERT(ipss->ipsec_system_policy.iph_refs == 1); 401 ipsec_polhead_destroy(&ipss->ipsec_inactive_policy); 402 ASSERT(ipss->ipsec_inactive_policy.iph_refs == 1); 403 404 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) { 405 ipsec_action_free_table(ipss->ipsec_action_hash[i].hash_head); 406 ipss->ipsec_action_hash[i].hash_head = NULL; 407 mutex_destroy(&(ipss->ipsec_action_hash[i].hash_lock)); 408 } 409 410 for (i = 0; i < ipss->ipsec_spd_hashsize; i++) { 411 ASSERT(ipss->ipsec_sel_hash[i].hash_head == NULL); 412 mutex_destroy(&(ipss->ipsec_sel_hash[i].hash_lock)); 413 } 414 415 mutex_enter(&ipss->ipsec_alg_lock); 416 for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype ++) { 417 int nalgs = ipss->ipsec_nalgs[algtype]; 418 419 for (i = 0; i < nalgs; i++) { 420 if (ipss->ipsec_alglists[algtype][i] != NULL) 421 ipsec_alg_unreg(algtype, i, ns); 422 } 423 } 424 mutex_exit(&ipss->ipsec_alg_lock); 425 mutex_destroy(&ipss->ipsec_alg_lock); 426 427 ipsid_gc(ns); 428 ipsid_fini(ns); 429 430 (void) ipsec_free_tables(ipss); 431 kmem_free(ipss, sizeof (*ipss)); 432 } 433 434 void 435 ipsec_policy_g_destroy(void) 436 { 437 kmem_cache_destroy(ipsec_action_cache); 438 kmem_cache_destroy(ipsec_sel_cache); 439 kmem_cache_destroy(ipsec_pol_cache); 440 441 ipsec_unregister_prov_update(); 442 443 netstack_unregister(NS_IPSEC); 444 } 445 446 447 /* 448 * Free what ipsec_alloc_tables allocated. 449 * Called when table allocation fails to free the table. 450 */ 451 static int 452 ipsec_free_tables(ipsec_stack_t *ipss) 453 { 454 int i; 455 456 if (ipss->ipsec_sel_hash != NULL) { 457 for (i = 0; i < ipss->ipsec_spd_hashsize; i++) { 458 ASSERT(ipss->ipsec_sel_hash[i].hash_head == NULL); 459 } 460 kmem_free(ipss->ipsec_sel_hash, ipss->ipsec_spd_hashsize * 461 sizeof (*ipss->ipsec_sel_hash)); 462 ipss->ipsec_sel_hash = NULL; 463 ipss->ipsec_spd_hashsize = 0; 464 } 465 ipsec_polhead_free_table(&ipss->ipsec_system_policy); 466 ipsec_polhead_free_table(&ipss->ipsec_inactive_policy); 467 468 return (ENOMEM); 469 } 470 471 /* 472 * Attempt to allocate the tables in a single policy head. 473 * Return nonzero on failure after cleaning up any work in progress. 474 */ 475 int 476 ipsec_alloc_table(ipsec_policy_head_t *iph, int nchains, int kmflag, 477 boolean_t global_cleanup, netstack_t *ns) 478 { 479 int dir; 480 481 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 482 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 483 484 ipr->ipr_nchains = nchains; 485 ipr->ipr_hash = kmem_zalloc(nchains * 486 sizeof (ipsec_policy_hash_t), kmflag); 487 if (ipr->ipr_hash == NULL) 488 return (global_cleanup ? 489 ipsec_free_tables(ns->netstack_ipsec) : 490 ENOMEM); 491 } 492 return (0); 493 } 494 495 /* 496 * Attempt to allocate the various tables. Return nonzero on failure 497 * after cleaning up any work in progress. 498 */ 499 static int 500 ipsec_alloc_tables(int kmflag, netstack_t *ns) 501 { 502 int error; 503 ipsec_stack_t *ipss = ns->netstack_ipsec; 504 505 error = ipsec_alloc_table(&ipss->ipsec_system_policy, 506 ipss->ipsec_spd_hashsize, kmflag, B_TRUE, ns); 507 if (error != 0) 508 return (error); 509 510 error = ipsec_alloc_table(&ipss->ipsec_inactive_policy, 511 ipss->ipsec_spd_hashsize, kmflag, B_TRUE, ns); 512 if (error != 0) 513 return (error); 514 515 ipss->ipsec_sel_hash = kmem_zalloc(ipss->ipsec_spd_hashsize * 516 sizeof (*ipss->ipsec_sel_hash), kmflag); 517 518 if (ipss->ipsec_sel_hash == NULL) 519 return (ipsec_free_tables(ipss)); 520 521 return (0); 522 } 523 524 /* 525 * After table allocation, initialize a policy head. 526 */ 527 void 528 ipsec_polhead_init(ipsec_policy_head_t *iph, int nchains) 529 { 530 int dir, chain; 531 532 rw_init(&iph->iph_lock, NULL, RW_DEFAULT, NULL); 533 avl_create(&iph->iph_rulebyid, ipsec_policy_cmpbyid, 534 sizeof (ipsec_policy_t), offsetof(ipsec_policy_t, ipsp_byid)); 535 536 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 537 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 538 ipr->ipr_nchains = nchains; 539 540 for (chain = 0; chain < nchains; chain++) { 541 mutex_init(&(ipr->ipr_hash[chain].hash_lock), 542 NULL, MUTEX_DEFAULT, NULL); 543 } 544 } 545 } 546 547 static boolean_t 548 ipsec_kstat_init(ipsec_stack_t *ipss) 549 { 550 ipss->ipsec_ksp = kstat_create_netstack("ip", 0, "ipsec_stat", "net", 551 KSTAT_TYPE_NAMED, sizeof (ipsec_kstats_t) / sizeof (kstat_named_t), 552 KSTAT_FLAG_PERSISTENT, ipss->ipsec_netstack->netstack_stackid); 553 554 if (ipss->ipsec_ksp == NULL || ipss->ipsec_ksp->ks_data == NULL) 555 return (B_FALSE); 556 557 ipss->ipsec_kstats = ipss->ipsec_ksp->ks_data; 558 559 #define KI(x) kstat_named_init(&ipss->ipsec_kstats->x, #x, KSTAT_DATA_UINT64) 560 KI(esp_stat_in_requests); 561 KI(esp_stat_in_discards); 562 KI(esp_stat_lookup_failure); 563 KI(ah_stat_in_requests); 564 KI(ah_stat_in_discards); 565 KI(ah_stat_lookup_failure); 566 KI(sadb_acquire_maxpackets); 567 KI(sadb_acquire_qhiwater); 568 #undef KI 569 570 kstat_install(ipss->ipsec_ksp); 571 return (B_TRUE); 572 } 573 574 static void 575 ipsec_kstat_destroy(ipsec_stack_t *ipss) 576 { 577 kstat_delete_netstack(ipss->ipsec_ksp, 578 ipss->ipsec_netstack->netstack_stackid); 579 ipss->ipsec_kstats = NULL; 580 581 } 582 583 /* 584 * Initialize the IPsec stack instance. 585 */ 586 /* ARGSUSED */ 587 static void * 588 ipsec_stack_init(netstackid_t stackid, netstack_t *ns) 589 { 590 ipsec_stack_t *ipss; 591 int i; 592 593 ipss = (ipsec_stack_t *)kmem_zalloc(sizeof (*ipss), KM_SLEEP); 594 ipss->ipsec_netstack = ns; 595 596 /* 597 * FIXME: netstack_ipsec is used by some of the routines we call 598 * below, but it isn't set until this routine returns. 599 * Either we introduce optional xxx_stack_alloc() functions 600 * that will be called by the netstack framework before xxx_stack_init, 601 * or we switch spd.c and sadb.c to operate on ipsec_stack_t 602 * (latter has some include file order issues for sadb.h, but makes 603 * sense if we merge some of the ipsec related stack_t's together. 604 */ 605 ns->netstack_ipsec = ipss; 606 607 /* 608 * Make two attempts to allocate policy hash tables; try it at 609 * the "preferred" size (may be set in /etc/system) first, 610 * then fall back to the default size. 611 */ 612 ipss->ipsec_spd_hashsize = (ipsec_spd_hashsize == 0) ? 613 IPSEC_SPDHASH_DEFAULT : ipsec_spd_hashsize; 614 615 if (ipsec_alloc_tables(KM_NOSLEEP, ns) != 0) { 616 cmn_err(CE_WARN, 617 "Unable to allocate %d entry IPsec policy hash table", 618 ipss->ipsec_spd_hashsize); 619 ipss->ipsec_spd_hashsize = IPSEC_SPDHASH_DEFAULT; 620 cmn_err(CE_WARN, "Falling back to %d entries", 621 ipss->ipsec_spd_hashsize); 622 (void) ipsec_alloc_tables(KM_SLEEP, ns); 623 } 624 625 /* Just set a default for tunnels. */ 626 ipss->ipsec_tun_spd_hashsize = (tun_spd_hashsize == 0) ? 627 TUN_SPDHASH_DEFAULT : tun_spd_hashsize; 628 629 ipsid_init(ns); 630 /* 631 * Globals need ref == 1 to prevent IPPH_REFRELE() from attempting 632 * to free them. 633 */ 634 ipss->ipsec_system_policy.iph_refs = 1; 635 ipss->ipsec_inactive_policy.iph_refs = 1; 636 ipsec_polhead_init(&ipss->ipsec_system_policy, 637 ipss->ipsec_spd_hashsize); 638 ipsec_polhead_init(&ipss->ipsec_inactive_policy, 639 ipss->ipsec_spd_hashsize); 640 rw_init(&ipss->ipsec_tunnel_policy_lock, NULL, RW_DEFAULT, NULL); 641 avl_create(&ipss->ipsec_tunnel_policies, tunnel_compare, 642 sizeof (ipsec_tun_pol_t), 0); 643 644 ipss->ipsec_next_policy_index = 1; 645 646 rw_init(&ipss->ipsec_system_policy.iph_lock, NULL, RW_DEFAULT, NULL); 647 rw_init(&ipss->ipsec_inactive_policy.iph_lock, NULL, RW_DEFAULT, NULL); 648 649 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) 650 mutex_init(&(ipss->ipsec_action_hash[i].hash_lock), 651 NULL, MUTEX_DEFAULT, NULL); 652 653 for (i = 0; i < ipss->ipsec_spd_hashsize; i++) 654 mutex_init(&(ipss->ipsec_sel_hash[i].hash_lock), 655 NULL, MUTEX_DEFAULT, NULL); 656 657 mutex_init(&ipss->ipsec_alg_lock, NULL, MUTEX_DEFAULT, NULL); 658 for (i = 0; i < IPSEC_NALGTYPES; i++) { 659 ipss->ipsec_nalgs[i] = 0; 660 } 661 662 ip_drop_init(ipss); 663 ip_drop_register(&ipss->ipsec_spd_dropper, "IPsec SPD"); 664 665 /* IP's IPsec code calls the packet dropper */ 666 ip_drop_register(&ipss->ipsec_dropper, "IP IPsec processing"); 667 668 (void) ipsec_kstat_init(ipss); 669 670 ipsec_loader_init(ipss); 671 ipsec_loader_start(ipss); 672 673 return (ipss); 674 } 675 676 /* Global across all stack instances */ 677 void 678 ipsec_policy_g_init(void) 679 { 680 ipsec_action_cache = kmem_cache_create("ipsec_actions", 681 sizeof (ipsec_action_t), _POINTER_ALIGNMENT, NULL, NULL, 682 ipsec_action_reclaim, NULL, NULL, 0); 683 ipsec_sel_cache = kmem_cache_create("ipsec_selectors", 684 sizeof (ipsec_sel_t), _POINTER_ALIGNMENT, NULL, NULL, 685 NULL, NULL, NULL, 0); 686 ipsec_pol_cache = kmem_cache_create("ipsec_policy", 687 sizeof (ipsec_policy_t), _POINTER_ALIGNMENT, NULL, NULL, 688 NULL, NULL, NULL, 0); 689 690 /* 691 * We want to be informed each time a stack is created or 692 * destroyed in the kernel, so we can maintain the 693 * set of ipsec_stack_t's. 694 */ 695 netstack_register(NS_IPSEC, ipsec_stack_init, NULL, ipsec_stack_fini); 696 } 697 698 /* 699 * Sort algorithm lists. 700 * 701 * I may need to split this based on 702 * authentication/encryption, and I may wish to have an administrator 703 * configure this list. Hold on to some NDD variables... 704 * 705 * XXX For now, sort on minimum key size (GAG!). While minimum key size is 706 * not the ideal metric, it's the only quantifiable measure available. 707 * We need a better metric for sorting algorithms by preference. 708 */ 709 static void 710 alg_insert_sortlist(enum ipsec_algtype at, uint8_t algid, netstack_t *ns) 711 { 712 ipsec_stack_t *ipss = ns->netstack_ipsec; 713 ipsec_alginfo_t *ai = ipss->ipsec_alglists[at][algid]; 714 uint8_t holder, swap; 715 uint_t i; 716 uint_t count = ipss->ipsec_nalgs[at]; 717 ASSERT(ai != NULL); 718 ASSERT(algid == ai->alg_id); 719 720 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 721 722 holder = algid; 723 724 for (i = 0; i < count - 1; i++) { 725 ipsec_alginfo_t *alt; 726 727 alt = ipss->ipsec_alglists[at][ipss->ipsec_sortlist[at][i]]; 728 /* 729 * If you want to give precedence to newly added algs, 730 * add the = in the > comparison. 731 */ 732 if ((holder != algid) || (ai->alg_minbits > alt->alg_minbits)) { 733 /* Swap sortlist[i] and holder. */ 734 swap = ipss->ipsec_sortlist[at][i]; 735 ipss->ipsec_sortlist[at][i] = holder; 736 holder = swap; 737 ai = alt; 738 } /* Else just continue. */ 739 } 740 741 /* Store holder in last slot. */ 742 ipss->ipsec_sortlist[at][i] = holder; 743 } 744 745 /* 746 * Remove an algorithm from a sorted algorithm list. 747 * This should be considerably easier, even with complex sorting. 748 */ 749 static void 750 alg_remove_sortlist(enum ipsec_algtype at, uint8_t algid, netstack_t *ns) 751 { 752 boolean_t copyback = B_FALSE; 753 int i; 754 ipsec_stack_t *ipss = ns->netstack_ipsec; 755 int newcount = ipss->ipsec_nalgs[at]; 756 757 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 758 759 for (i = 0; i <= newcount; i++) { 760 if (copyback) { 761 ipss->ipsec_sortlist[at][i-1] = 762 ipss->ipsec_sortlist[at][i]; 763 } else if (ipss->ipsec_sortlist[at][i] == algid) { 764 copyback = B_TRUE; 765 } 766 } 767 } 768 769 /* 770 * Add the specified algorithm to the algorithm tables. 771 * Must be called while holding the algorithm table writer lock. 772 */ 773 void 774 ipsec_alg_reg(ipsec_algtype_t algtype, ipsec_alginfo_t *alg, netstack_t *ns) 775 { 776 ipsec_stack_t *ipss = ns->netstack_ipsec; 777 778 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 779 780 ASSERT(ipss->ipsec_alglists[algtype][alg->alg_id] == NULL); 781 ipsec_alg_fix_min_max(alg, algtype, ns); 782 ipss->ipsec_alglists[algtype][alg->alg_id] = alg; 783 784 ipss->ipsec_nalgs[algtype]++; 785 alg_insert_sortlist(algtype, alg->alg_id, ns); 786 } 787 788 /* 789 * Remove the specified algorithm from the algorithm tables. 790 * Must be called while holding the algorithm table writer lock. 791 */ 792 void 793 ipsec_alg_unreg(ipsec_algtype_t algtype, uint8_t algid, netstack_t *ns) 794 { 795 ipsec_stack_t *ipss = ns->netstack_ipsec; 796 797 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 798 799 ASSERT(ipss->ipsec_alglists[algtype][algid] != NULL); 800 ipsec_alg_free(ipss->ipsec_alglists[algtype][algid]); 801 ipss->ipsec_alglists[algtype][algid] = NULL; 802 803 ipss->ipsec_nalgs[algtype]--; 804 alg_remove_sortlist(algtype, algid, ns); 805 } 806 807 /* 808 * Hooks for spdsock to get a grip on system policy. 809 */ 810 811 ipsec_policy_head_t * 812 ipsec_system_policy(netstack_t *ns) 813 { 814 ipsec_stack_t *ipss = ns->netstack_ipsec; 815 ipsec_policy_head_t *h = &ipss->ipsec_system_policy; 816 817 IPPH_REFHOLD(h); 818 return (h); 819 } 820 821 ipsec_policy_head_t * 822 ipsec_inactive_policy(netstack_t *ns) 823 { 824 ipsec_stack_t *ipss = ns->netstack_ipsec; 825 ipsec_policy_head_t *h = &ipss->ipsec_inactive_policy; 826 827 IPPH_REFHOLD(h); 828 return (h); 829 } 830 831 /* 832 * Lock inactive policy, then active policy, then exchange policy root 833 * pointers. 834 */ 835 void 836 ipsec_swap_policy(ipsec_policy_head_t *active, ipsec_policy_head_t *inactive, 837 netstack_t *ns) 838 { 839 int af, dir; 840 avl_tree_t r1, r2; 841 842 rw_enter(&inactive->iph_lock, RW_WRITER); 843 rw_enter(&active->iph_lock, RW_WRITER); 844 845 r1 = active->iph_rulebyid; 846 r2 = inactive->iph_rulebyid; 847 active->iph_rulebyid = r2; 848 inactive->iph_rulebyid = r1; 849 850 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 851 ipsec_policy_hash_t *h1, *h2; 852 853 h1 = active->iph_root[dir].ipr_hash; 854 h2 = inactive->iph_root[dir].ipr_hash; 855 active->iph_root[dir].ipr_hash = h2; 856 inactive->iph_root[dir].ipr_hash = h1; 857 858 for (af = 0; af < IPSEC_NAF; af++) { 859 ipsec_policy_t *t1, *t2; 860 861 t1 = active->iph_root[dir].ipr_nonhash[af]; 862 t2 = inactive->iph_root[dir].ipr_nonhash[af]; 863 active->iph_root[dir].ipr_nonhash[af] = t2; 864 inactive->iph_root[dir].ipr_nonhash[af] = t1; 865 if (t1 != NULL) { 866 t1->ipsp_hash.hash_pp = 867 &(inactive->iph_root[dir].ipr_nonhash[af]); 868 } 869 if (t2 != NULL) { 870 t2->ipsp_hash.hash_pp = 871 &(active->iph_root[dir].ipr_nonhash[af]); 872 } 873 874 } 875 } 876 active->iph_gen++; 877 inactive->iph_gen++; 878 ipsec_update_present_flags(ns->netstack_ipsec); 879 rw_exit(&active->iph_lock); 880 rw_exit(&inactive->iph_lock); 881 } 882 883 /* 884 * Swap global policy primary/secondary. 885 */ 886 void 887 ipsec_swap_global_policy(netstack_t *ns) 888 { 889 ipsec_stack_t *ipss = ns->netstack_ipsec; 890 891 ipsec_swap_policy(&ipss->ipsec_system_policy, 892 &ipss->ipsec_inactive_policy, ns); 893 } 894 895 /* 896 * Clone one policy rule.. 897 */ 898 static ipsec_policy_t * 899 ipsec_copy_policy(const ipsec_policy_t *src) 900 { 901 ipsec_policy_t *dst = kmem_cache_alloc(ipsec_pol_cache, KM_NOSLEEP); 902 903 if (dst == NULL) 904 return (NULL); 905 906 /* 907 * Adjust refcounts of cloned state. 908 */ 909 IPACT_REFHOLD(src->ipsp_act); 910 src->ipsp_sel->ipsl_refs++; 911 912 HASH_NULL(dst, ipsp_hash); 913 dst->ipsp_netstack = src->ipsp_netstack; 914 dst->ipsp_refs = 1; 915 dst->ipsp_sel = src->ipsp_sel; 916 dst->ipsp_act = src->ipsp_act; 917 dst->ipsp_prio = src->ipsp_prio; 918 dst->ipsp_index = src->ipsp_index; 919 920 return (dst); 921 } 922 923 void 924 ipsec_insert_always(avl_tree_t *tree, void *new_node) 925 { 926 void *node; 927 avl_index_t where; 928 929 node = avl_find(tree, new_node, &where); 930 ASSERT(node == NULL); 931 avl_insert(tree, new_node, where); 932 } 933 934 935 static int 936 ipsec_copy_chain(ipsec_policy_head_t *dph, ipsec_policy_t *src, 937 ipsec_policy_t **dstp) 938 { 939 for (; src != NULL; src = src->ipsp_hash.hash_next) { 940 ipsec_policy_t *dst = ipsec_copy_policy(src); 941 if (dst == NULL) 942 return (ENOMEM); 943 944 HASHLIST_INSERT(dst, ipsp_hash, *dstp); 945 ipsec_insert_always(&dph->iph_rulebyid, dst); 946 } 947 return (0); 948 } 949 950 951 952 /* 953 * Make one policy head look exactly like another. 954 * 955 * As with ipsec_swap_policy, we lock the destination policy head first, then 956 * the source policy head. Note that we only need to read-lock the source 957 * policy head as we are not changing it. 958 */ 959 int 960 ipsec_copy_polhead(ipsec_policy_head_t *sph, ipsec_policy_head_t *dph, 961 netstack_t *ns) 962 { 963 int af, dir, chain, nchains; 964 965 rw_enter(&dph->iph_lock, RW_WRITER); 966 967 ipsec_polhead_flush(dph, ns); 968 969 rw_enter(&sph->iph_lock, RW_READER); 970 971 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 972 ipsec_policy_root_t *dpr = &dph->iph_root[dir]; 973 ipsec_policy_root_t *spr = &sph->iph_root[dir]; 974 nchains = dpr->ipr_nchains; 975 976 ASSERT(dpr->ipr_nchains == spr->ipr_nchains); 977 978 for (af = 0; af < IPSEC_NAF; af++) { 979 if (ipsec_copy_chain(dph, spr->ipr_nonhash[af], 980 &dpr->ipr_nonhash[af])) 981 goto abort_copy; 982 } 983 984 for (chain = 0; chain < nchains; chain++) { 985 if (ipsec_copy_chain(dph, 986 spr->ipr_hash[chain].hash_head, 987 &dpr->ipr_hash[chain].hash_head)) 988 goto abort_copy; 989 } 990 } 991 992 dph->iph_gen++; 993 994 rw_exit(&sph->iph_lock); 995 rw_exit(&dph->iph_lock); 996 return (0); 997 998 abort_copy: 999 ipsec_polhead_flush(dph, ns); 1000 rw_exit(&sph->iph_lock); 1001 rw_exit(&dph->iph_lock); 1002 return (ENOMEM); 1003 } 1004 1005 /* 1006 * Clone currently active policy to the inactive policy list. 1007 */ 1008 int 1009 ipsec_clone_system_policy(netstack_t *ns) 1010 { 1011 ipsec_stack_t *ipss = ns->netstack_ipsec; 1012 1013 return (ipsec_copy_polhead(&ipss->ipsec_system_policy, 1014 &ipss->ipsec_inactive_policy, ns)); 1015 } 1016 1017 /* 1018 * Extract the string from ipsec_policy_failure_msgs[type] and 1019 * log it. 1020 * 1021 */ 1022 void 1023 ipsec_log_policy_failure(int type, char *func_name, ipha_t *ipha, ip6_t *ip6h, 1024 boolean_t secure, netstack_t *ns) 1025 { 1026 char sbuf[INET6_ADDRSTRLEN]; 1027 char dbuf[INET6_ADDRSTRLEN]; 1028 char *s; 1029 char *d; 1030 ipsec_stack_t *ipss = ns->netstack_ipsec; 1031 1032 ASSERT((ipha == NULL && ip6h != NULL) || 1033 (ip6h == NULL && ipha != NULL)); 1034 1035 if (ipha != NULL) { 1036 s = inet_ntop(AF_INET, &ipha->ipha_src, sbuf, sizeof (sbuf)); 1037 d = inet_ntop(AF_INET, &ipha->ipha_dst, dbuf, sizeof (dbuf)); 1038 } else { 1039 s = inet_ntop(AF_INET6, &ip6h->ip6_src, sbuf, sizeof (sbuf)); 1040 d = inet_ntop(AF_INET6, &ip6h->ip6_dst, dbuf, sizeof (dbuf)); 1041 1042 } 1043 1044 /* Always bump the policy failure counter. */ 1045 ipss->ipsec_policy_failure_count[type]++; 1046 1047 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, SL_ERROR|SL_WARN|SL_CONSOLE, 1048 ipsec_policy_failure_msgs[type], func_name, 1049 (secure ? "secure" : "not secure"), s, d); 1050 } 1051 1052 /* 1053 * Rate-limiting front-end to strlog() for AH and ESP. Uses the ndd variables 1054 * in /dev/ip and the same rate-limiting clock so that there's a single 1055 * knob to turn to throttle the rate of messages. 1056 */ 1057 void 1058 ipsec_rl_strlog(netstack_t *ns, short mid, short sid, char level, ushort_t sl, 1059 char *fmt, ...) 1060 { 1061 va_list adx; 1062 hrtime_t current = gethrtime(); 1063 ip_stack_t *ipst = ns->netstack_ip; 1064 ipsec_stack_t *ipss = ns->netstack_ipsec; 1065 1066 sl |= SL_CONSOLE; 1067 /* 1068 * Throttle logging to stop syslog from being swamped. If variable 1069 * 'ipsec_policy_log_interval' is zero, don't log any messages at 1070 * all, otherwise log only one message every 'ipsec_policy_log_interval' 1071 * msec. Convert interval (in msec) to hrtime (in nsec). 1072 */ 1073 1074 if (ipst->ips_ipsec_policy_log_interval) { 1075 if (ipss->ipsec_policy_failure_last + 1076 MSEC2NSEC(ipst->ips_ipsec_policy_log_interval) <= current) { 1077 va_start(adx, fmt); 1078 (void) vstrlog(mid, sid, level, sl, fmt, adx); 1079 va_end(adx); 1080 ipss->ipsec_policy_failure_last = current; 1081 } 1082 } 1083 } 1084 1085 void 1086 ipsec_config_flush(netstack_t *ns) 1087 { 1088 ipsec_stack_t *ipss = ns->netstack_ipsec; 1089 1090 rw_enter(&ipss->ipsec_system_policy.iph_lock, RW_WRITER); 1091 ipsec_polhead_flush(&ipss->ipsec_system_policy, ns); 1092 ipss->ipsec_next_policy_index = 1; 1093 rw_exit(&ipss->ipsec_system_policy.iph_lock); 1094 ipsec_action_reclaim_stack(ipss); 1095 } 1096 1097 /* 1098 * Clip a policy's min/max keybits vs. the capabilities of the 1099 * algorithm. 1100 */ 1101 static void 1102 act_alg_adjust(uint_t algtype, uint_t algid, 1103 uint16_t *minbits, uint16_t *maxbits, netstack_t *ns) 1104 { 1105 ipsec_stack_t *ipss = ns->netstack_ipsec; 1106 ipsec_alginfo_t *algp = ipss->ipsec_alglists[algtype][algid]; 1107 1108 if (algp != NULL) { 1109 /* 1110 * If passed-in minbits is zero, we assume the caller trusts 1111 * us with setting the minimum key size. We pick the 1112 * algorithms DEFAULT key size for the minimum in this case. 1113 */ 1114 if (*minbits == 0) { 1115 *minbits = algp->alg_default_bits; 1116 ASSERT(*minbits >= algp->alg_minbits); 1117 } else { 1118 *minbits = MAX(MIN(*minbits, algp->alg_maxbits), 1119 algp->alg_minbits); 1120 } 1121 if (*maxbits == 0) 1122 *maxbits = algp->alg_maxbits; 1123 else 1124 *maxbits = MIN(MAX(*maxbits, algp->alg_minbits), 1125 algp->alg_maxbits); 1126 ASSERT(*minbits <= *maxbits); 1127 } else { 1128 *minbits = 0; 1129 *maxbits = 0; 1130 } 1131 } 1132 1133 /* 1134 * Check an action's requested algorithms against the algorithms currently 1135 * loaded in the system. 1136 */ 1137 boolean_t 1138 ipsec_check_action(ipsec_act_t *act, int *diag, netstack_t *ns) 1139 { 1140 ipsec_prot_t *ipp; 1141 ipsec_stack_t *ipss = ns->netstack_ipsec; 1142 1143 ipp = &act->ipa_apply; 1144 1145 if (ipp->ipp_use_ah && 1146 ipss->ipsec_alglists[IPSEC_ALG_AUTH][ipp->ipp_auth_alg] == NULL) { 1147 *diag = SPD_DIAGNOSTIC_UNSUPP_AH_ALG; 1148 return (B_FALSE); 1149 } 1150 if (ipp->ipp_use_espa && 1151 ipss->ipsec_alglists[IPSEC_ALG_AUTH][ipp->ipp_esp_auth_alg] == 1152 NULL) { 1153 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_AUTH_ALG; 1154 return (B_FALSE); 1155 } 1156 if (ipp->ipp_use_esp && 1157 ipss->ipsec_alglists[IPSEC_ALG_ENCR][ipp->ipp_encr_alg] == NULL) { 1158 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_ENCR_ALG; 1159 return (B_FALSE); 1160 } 1161 1162 act_alg_adjust(IPSEC_ALG_AUTH, ipp->ipp_auth_alg, 1163 &ipp->ipp_ah_minbits, &ipp->ipp_ah_maxbits, ns); 1164 act_alg_adjust(IPSEC_ALG_AUTH, ipp->ipp_esp_auth_alg, 1165 &ipp->ipp_espa_minbits, &ipp->ipp_espa_maxbits, ns); 1166 act_alg_adjust(IPSEC_ALG_ENCR, ipp->ipp_encr_alg, 1167 &ipp->ipp_espe_minbits, &ipp->ipp_espe_maxbits, ns); 1168 1169 if (ipp->ipp_ah_minbits > ipp->ipp_ah_maxbits) { 1170 *diag = SPD_DIAGNOSTIC_UNSUPP_AH_KEYSIZE; 1171 return (B_FALSE); 1172 } 1173 if (ipp->ipp_espa_minbits > ipp->ipp_espa_maxbits) { 1174 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_AUTH_KEYSIZE; 1175 return (B_FALSE); 1176 } 1177 if (ipp->ipp_espe_minbits > ipp->ipp_espe_maxbits) { 1178 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_ENCR_KEYSIZE; 1179 return (B_FALSE); 1180 } 1181 /* TODO: sanity check lifetimes */ 1182 return (B_TRUE); 1183 } 1184 1185 /* 1186 * Set up a single action during wildcard expansion.. 1187 */ 1188 static void 1189 ipsec_setup_act(ipsec_act_t *outact, ipsec_act_t *act, 1190 uint_t auth_alg, uint_t encr_alg, uint_t eauth_alg, netstack_t *ns) 1191 { 1192 ipsec_prot_t *ipp; 1193 1194 *outact = *act; 1195 ipp = &outact->ipa_apply; 1196 ipp->ipp_auth_alg = (uint8_t)auth_alg; 1197 ipp->ipp_encr_alg = (uint8_t)encr_alg; 1198 ipp->ipp_esp_auth_alg = (uint8_t)eauth_alg; 1199 1200 act_alg_adjust(IPSEC_ALG_AUTH, auth_alg, 1201 &ipp->ipp_ah_minbits, &ipp->ipp_ah_maxbits, ns); 1202 act_alg_adjust(IPSEC_ALG_AUTH, eauth_alg, 1203 &ipp->ipp_espa_minbits, &ipp->ipp_espa_maxbits, ns); 1204 act_alg_adjust(IPSEC_ALG_ENCR, encr_alg, 1205 &ipp->ipp_espe_minbits, &ipp->ipp_espe_maxbits, ns); 1206 } 1207 1208 /* 1209 * combinatoric expansion time: expand a wildcarded action into an 1210 * array of wildcarded actions; we return the exploded action list, 1211 * and return a count in *nact (output only). 1212 */ 1213 static ipsec_act_t * 1214 ipsec_act_wildcard_expand(ipsec_act_t *act, uint_t *nact, netstack_t *ns) 1215 { 1216 boolean_t use_ah, use_esp, use_espa; 1217 boolean_t wild_auth, wild_encr, wild_eauth; 1218 uint_t auth_alg, auth_idx, auth_min, auth_max; 1219 uint_t eauth_alg, eauth_idx, eauth_min, eauth_max; 1220 uint_t encr_alg, encr_idx, encr_min, encr_max; 1221 uint_t action_count, ai; 1222 ipsec_act_t *outact; 1223 ipsec_stack_t *ipss = ns->netstack_ipsec; 1224 1225 if (act->ipa_type != IPSEC_ACT_APPLY) { 1226 outact = kmem_alloc(sizeof (*act), KM_NOSLEEP); 1227 *nact = 1; 1228 if (outact != NULL) 1229 bcopy(act, outact, sizeof (*act)); 1230 return (outact); 1231 } 1232 /* 1233 * compute the combinatoric explosion.. 1234 * 1235 * we assume a request for encr if esp_req is PREF_REQUIRED 1236 * we assume a request for ah auth if ah_req is PREF_REQUIRED. 1237 * we assume a request for esp auth if !ah and esp_req is PREF_REQUIRED 1238 */ 1239 1240 use_ah = act->ipa_apply.ipp_use_ah; 1241 use_esp = act->ipa_apply.ipp_use_esp; 1242 use_espa = act->ipa_apply.ipp_use_espa; 1243 auth_alg = act->ipa_apply.ipp_auth_alg; 1244 eauth_alg = act->ipa_apply.ipp_esp_auth_alg; 1245 encr_alg = act->ipa_apply.ipp_encr_alg; 1246 1247 wild_auth = use_ah && (auth_alg == 0); 1248 wild_eauth = use_espa && (eauth_alg == 0); 1249 wild_encr = use_esp && (encr_alg == 0); 1250 1251 action_count = 1; 1252 auth_min = auth_max = auth_alg; 1253 eauth_min = eauth_max = eauth_alg; 1254 encr_min = encr_max = encr_alg; 1255 1256 /* 1257 * set up for explosion.. for each dimension, expand output 1258 * size by the explosion factor. 1259 * 1260 * Don't include the "any" algorithms, if defined, as no 1261 * kernel policies should be set for these algorithms. 1262 */ 1263 1264 #define SET_EXP_MINMAX(type, wild, alg, min, max, ipss) \ 1265 if (wild) { \ 1266 int nalgs = ipss->ipsec_nalgs[type]; \ 1267 if (ipss->ipsec_alglists[type][alg] != NULL) \ 1268 nalgs--; \ 1269 action_count *= nalgs; \ 1270 min = 0; \ 1271 max = ipss->ipsec_nalgs[type] - 1; \ 1272 } 1273 1274 SET_EXP_MINMAX(IPSEC_ALG_AUTH, wild_auth, SADB_AALG_NONE, 1275 auth_min, auth_max, ipss); 1276 SET_EXP_MINMAX(IPSEC_ALG_AUTH, wild_eauth, SADB_AALG_NONE, 1277 eauth_min, eauth_max, ipss); 1278 SET_EXP_MINMAX(IPSEC_ALG_ENCR, wild_encr, SADB_EALG_NONE, 1279 encr_min, encr_max, ipss); 1280 1281 #undef SET_EXP_MINMAX 1282 1283 /* 1284 * ok, allocate the whole mess.. 1285 */ 1286 1287 outact = kmem_alloc(sizeof (*outact) * action_count, KM_NOSLEEP); 1288 if (outact == NULL) 1289 return (NULL); 1290 1291 /* 1292 * Now compute all combinations. Note that non-wildcarded 1293 * dimensions just get a single value from auth_min, while 1294 * wildcarded dimensions indirect through the sortlist. 1295 * 1296 * We do encryption outermost since, at this time, there's 1297 * greater difference in security and performance between 1298 * encryption algorithms vs. authentication algorithms. 1299 */ 1300 1301 ai = 0; 1302 1303 #define WHICH_ALG(type, wild, idx, ipss) \ 1304 ((wild)?(ipss->ipsec_sortlist[type][idx]):(idx)) 1305 1306 for (encr_idx = encr_min; encr_idx <= encr_max; encr_idx++) { 1307 encr_alg = WHICH_ALG(IPSEC_ALG_ENCR, wild_encr, encr_idx, ipss); 1308 if (wild_encr && encr_alg == SADB_EALG_NONE) 1309 continue; 1310 for (auth_idx = auth_min; auth_idx <= auth_max; auth_idx++) { 1311 auth_alg = WHICH_ALG(IPSEC_ALG_AUTH, wild_auth, 1312 auth_idx, ipss); 1313 if (wild_auth && auth_alg == SADB_AALG_NONE) 1314 continue; 1315 for (eauth_idx = eauth_min; eauth_idx <= eauth_max; 1316 eauth_idx++) { 1317 eauth_alg = WHICH_ALG(IPSEC_ALG_AUTH, 1318 wild_eauth, eauth_idx, ipss); 1319 if (wild_eauth && eauth_alg == SADB_AALG_NONE) 1320 continue; 1321 1322 ipsec_setup_act(&outact[ai], act, 1323 auth_alg, encr_alg, eauth_alg, ns); 1324 ai++; 1325 } 1326 } 1327 } 1328 1329 #undef WHICH_ALG 1330 1331 ASSERT(ai == action_count); 1332 *nact = action_count; 1333 return (outact); 1334 } 1335 1336 /* 1337 * Extract the parts of an ipsec_prot_t from an old-style ipsec_req_t. 1338 */ 1339 static void 1340 ipsec_prot_from_req(const ipsec_req_t *req, ipsec_prot_t *ipp) 1341 { 1342 bzero(ipp, sizeof (*ipp)); 1343 /* 1344 * ipp_use_* are bitfields. Look at "!!" in the following as a 1345 * "boolean canonicalization" operator. 1346 */ 1347 ipp->ipp_use_ah = !!(req->ipsr_ah_req & IPSEC_PREF_REQUIRED); 1348 ipp->ipp_use_esp = !!(req->ipsr_esp_req & IPSEC_PREF_REQUIRED); 1349 ipp->ipp_use_espa = !!(req->ipsr_esp_auth_alg); 1350 ipp->ipp_use_se = !!(req->ipsr_self_encap_req & IPSEC_PREF_REQUIRED); 1351 ipp->ipp_use_unique = !!((req->ipsr_ah_req|req->ipsr_esp_req) & 1352 IPSEC_PREF_UNIQUE); 1353 ipp->ipp_encr_alg = req->ipsr_esp_alg; 1354 /* 1355 * SADB_AALG_ANY is a placeholder to distinguish "any" from 1356 * "none" above. If auth is required, as determined above, 1357 * SADB_AALG_ANY becomes 0, which is the representation 1358 * of "any" and "none" in PF_KEY v2. 1359 */ 1360 ipp->ipp_auth_alg = (req->ipsr_auth_alg != SADB_AALG_ANY) ? 1361 req->ipsr_auth_alg : 0; 1362 ipp->ipp_esp_auth_alg = (req->ipsr_esp_auth_alg != SADB_AALG_ANY) ? 1363 req->ipsr_esp_auth_alg : 0; 1364 } 1365 1366 /* 1367 * Extract a new-style action from a request. 1368 */ 1369 void 1370 ipsec_actvec_from_req(const ipsec_req_t *req, ipsec_act_t **actp, uint_t *nactp, 1371 netstack_t *ns) 1372 { 1373 struct ipsec_act act; 1374 1375 bzero(&act, sizeof (act)); 1376 if ((req->ipsr_ah_req & IPSEC_PREF_NEVER) && 1377 (req->ipsr_esp_req & IPSEC_PREF_NEVER)) { 1378 act.ipa_type = IPSEC_ACT_BYPASS; 1379 } else { 1380 act.ipa_type = IPSEC_ACT_APPLY; 1381 ipsec_prot_from_req(req, &act.ipa_apply); 1382 } 1383 *actp = ipsec_act_wildcard_expand(&act, nactp, ns); 1384 } 1385 1386 /* 1387 * Convert a new-style "prot" back to an ipsec_req_t (more backwards compat). 1388 * We assume caller has already zero'ed *req for us. 1389 */ 1390 static int 1391 ipsec_req_from_prot(ipsec_prot_t *ipp, ipsec_req_t *req) 1392 { 1393 req->ipsr_esp_alg = ipp->ipp_encr_alg; 1394 req->ipsr_auth_alg = ipp->ipp_auth_alg; 1395 req->ipsr_esp_auth_alg = ipp->ipp_esp_auth_alg; 1396 1397 if (ipp->ipp_use_unique) { 1398 req->ipsr_ah_req |= IPSEC_PREF_UNIQUE; 1399 req->ipsr_esp_req |= IPSEC_PREF_UNIQUE; 1400 } 1401 if (ipp->ipp_use_se) 1402 req->ipsr_self_encap_req |= IPSEC_PREF_REQUIRED; 1403 if (ipp->ipp_use_ah) 1404 req->ipsr_ah_req |= IPSEC_PREF_REQUIRED; 1405 if (ipp->ipp_use_esp) 1406 req->ipsr_esp_req |= IPSEC_PREF_REQUIRED; 1407 return (sizeof (*req)); 1408 } 1409 1410 /* 1411 * Convert a new-style action back to an ipsec_req_t (more backwards compat). 1412 * We assume caller has already zero'ed *req for us. 1413 */ 1414 static int 1415 ipsec_req_from_act(ipsec_action_t *ap, ipsec_req_t *req) 1416 { 1417 switch (ap->ipa_act.ipa_type) { 1418 case IPSEC_ACT_BYPASS: 1419 req->ipsr_ah_req = IPSEC_PREF_NEVER; 1420 req->ipsr_esp_req = IPSEC_PREF_NEVER; 1421 return (sizeof (*req)); 1422 case IPSEC_ACT_APPLY: 1423 return (ipsec_req_from_prot(&ap->ipa_act.ipa_apply, req)); 1424 } 1425 return (sizeof (*req)); 1426 } 1427 1428 /* 1429 * Convert a new-style action back to an ipsec_req_t (more backwards compat). 1430 * We assume caller has already zero'ed *req for us. 1431 */ 1432 int 1433 ipsec_req_from_head(ipsec_policy_head_t *ph, ipsec_req_t *req, int af) 1434 { 1435 ipsec_policy_t *p; 1436 1437 /* 1438 * FULL-PERSOCK: consult hash table, too? 1439 */ 1440 for (p = ph->iph_root[IPSEC_INBOUND].ipr_nonhash[af]; 1441 p != NULL; 1442 p = p->ipsp_hash.hash_next) { 1443 if ((p->ipsp_sel->ipsl_key.ipsl_valid & IPSL_WILDCARD) == 0) 1444 return (ipsec_req_from_act(p->ipsp_act, req)); 1445 } 1446 return (sizeof (*req)); 1447 } 1448 1449 /* 1450 * Based on per-socket or latched policy, convert to an appropriate 1451 * IP_SEC_OPT ipsec_req_t for the socket option; return size so we can 1452 * be tail-called from ip. 1453 */ 1454 int 1455 ipsec_req_from_conn(conn_t *connp, ipsec_req_t *req, int af) 1456 { 1457 ipsec_latch_t *ipl; 1458 int rv = sizeof (ipsec_req_t); 1459 1460 bzero(req, sizeof (*req)); 1461 1462 ASSERT(MUTEX_HELD(&connp->conn_lock)); 1463 ipl = connp->conn_latch; 1464 1465 /* 1466 * Find appropriate policy. First choice is latched action; 1467 * failing that, see latched policy; failing that, 1468 * look at configured policy. 1469 */ 1470 if (ipl != NULL) { 1471 if (connp->conn_latch_in_action != NULL) { 1472 rv = ipsec_req_from_act(connp->conn_latch_in_action, 1473 req); 1474 goto done; 1475 } 1476 if (connp->conn_latch_in_policy != NULL) { 1477 rv = ipsec_req_from_act( 1478 connp->conn_latch_in_policy->ipsp_act, req); 1479 goto done; 1480 } 1481 } 1482 if (connp->conn_policy != NULL) 1483 rv = ipsec_req_from_head(connp->conn_policy, req, af); 1484 done: 1485 return (rv); 1486 } 1487 1488 void 1489 ipsec_actvec_free(ipsec_act_t *act, uint_t nact) 1490 { 1491 kmem_free(act, nact * sizeof (*act)); 1492 } 1493 1494 /* 1495 * Consumes a reference to ipsp. 1496 */ 1497 static mblk_t * 1498 ipsec_check_loopback_policy(mblk_t *data_mp, ip_recv_attr_t *ira, 1499 ipsec_policy_t *ipsp) 1500 { 1501 if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) 1502 return (data_mp); 1503 1504 ASSERT(ira->ira_flags & IRAF_LOOPBACK); 1505 1506 IPPOL_REFRELE(ipsp); 1507 1508 /* 1509 * We should do an actual policy check here. Revisit this 1510 * when we revisit the IPsec API. (And pass a conn_t in when we 1511 * get there.) 1512 */ 1513 1514 return (data_mp); 1515 } 1516 1517 /* 1518 * Check that packet's inbound ports & proto match the selectors 1519 * expected by the SAs it traversed on the way in. 1520 */ 1521 static boolean_t 1522 ipsec_check_ipsecin_unique(ip_recv_attr_t *ira, const char **reason, 1523 kstat_named_t **counter, uint64_t pkt_unique, netstack_t *ns) 1524 { 1525 uint64_t ah_mask, esp_mask; 1526 ipsa_t *ah_assoc; 1527 ipsa_t *esp_assoc; 1528 ipsec_stack_t *ipss = ns->netstack_ipsec; 1529 1530 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 1531 ASSERT(!(ira->ira_flags & IRAF_LOOPBACK)); 1532 1533 ah_assoc = ira->ira_ipsec_ah_sa; 1534 esp_assoc = ira->ira_ipsec_esp_sa; 1535 ASSERT((ah_assoc != NULL) || (esp_assoc != NULL)); 1536 1537 ah_mask = (ah_assoc != NULL) ? ah_assoc->ipsa_unique_mask : 0; 1538 esp_mask = (esp_assoc != NULL) ? esp_assoc->ipsa_unique_mask : 0; 1539 1540 if ((ah_mask == 0) && (esp_mask == 0)) 1541 return (B_TRUE); 1542 1543 /* 1544 * The pkt_unique check will also check for tunnel mode on the SA 1545 * vs. the tunneled_packet boolean. "Be liberal in what you receive" 1546 * should not apply in this case. ;) 1547 */ 1548 1549 if (ah_mask != 0 && 1550 ah_assoc->ipsa_unique_id != (pkt_unique & ah_mask)) { 1551 *reason = "AH inner header mismatch"; 1552 *counter = DROPPER(ipss, ipds_spd_ah_innermismatch); 1553 return (B_FALSE); 1554 } 1555 if (esp_mask != 0 && 1556 esp_assoc->ipsa_unique_id != (pkt_unique & esp_mask)) { 1557 *reason = "ESP inner header mismatch"; 1558 *counter = DROPPER(ipss, ipds_spd_esp_innermismatch); 1559 return (B_FALSE); 1560 } 1561 return (B_TRUE); 1562 } 1563 1564 static boolean_t 1565 ipsec_check_ipsecin_action(ip_recv_attr_t *ira, mblk_t *mp, ipsec_action_t *ap, 1566 ipha_t *ipha, ip6_t *ip6h, const char **reason, kstat_named_t **counter, 1567 netstack_t *ns) 1568 { 1569 boolean_t ret = B_TRUE; 1570 ipsec_prot_t *ipp; 1571 ipsa_t *ah_assoc; 1572 ipsa_t *esp_assoc; 1573 boolean_t decaps; 1574 ipsec_stack_t *ipss = ns->netstack_ipsec; 1575 1576 ASSERT((ipha == NULL && ip6h != NULL) || 1577 (ip6h == NULL && ipha != NULL)); 1578 1579 if (ira->ira_flags & IRAF_LOOPBACK) { 1580 /* 1581 * Besides accepting pointer-equivalent actions, we also 1582 * accept any ICMP errors we generated for ourselves, 1583 * regardless of policy. If we do not wish to make this 1584 * assumption in the future, check here, and where 1585 * IXAF_TRUSTED_ICMP is initialized in ip.c and ip6.c. 1586 */ 1587 if (ap == ira->ira_ipsec_action || 1588 (ira->ira_flags & IRAF_TRUSTED_ICMP)) 1589 return (B_TRUE); 1590 1591 /* Deep compare necessary here?? */ 1592 *counter = DROPPER(ipss, ipds_spd_loopback_mismatch); 1593 *reason = "loopback policy mismatch"; 1594 return (B_FALSE); 1595 } 1596 ASSERT(!(ira->ira_flags & IRAF_TRUSTED_ICMP)); 1597 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 1598 1599 ah_assoc = ira->ira_ipsec_ah_sa; 1600 esp_assoc = ira->ira_ipsec_esp_sa; 1601 1602 decaps = (ira->ira_flags & IRAF_IPSEC_DECAPS); 1603 1604 switch (ap->ipa_act.ipa_type) { 1605 case IPSEC_ACT_DISCARD: 1606 case IPSEC_ACT_REJECT: 1607 /* Should "fail hard" */ 1608 *counter = DROPPER(ipss, ipds_spd_explicit); 1609 *reason = "blocked by policy"; 1610 return (B_FALSE); 1611 1612 case IPSEC_ACT_BYPASS: 1613 case IPSEC_ACT_CLEAR: 1614 *counter = DROPPER(ipss, ipds_spd_got_secure); 1615 *reason = "expected clear, got protected"; 1616 return (B_FALSE); 1617 1618 case IPSEC_ACT_APPLY: 1619 ipp = &ap->ipa_act.ipa_apply; 1620 /* 1621 * As of now we do the simple checks of whether 1622 * the datagram has gone through the required IPSEC 1623 * protocol constraints or not. We might have more 1624 * in the future like sensitive levels, key bits, etc. 1625 * If it fails the constraints, check whether we would 1626 * have accepted this if it had come in clear. 1627 */ 1628 if (ipp->ipp_use_ah) { 1629 if (ah_assoc == NULL) { 1630 ret = ipsec_inbound_accept_clear(mp, ipha, 1631 ip6h); 1632 *counter = DROPPER(ipss, ipds_spd_got_clear); 1633 *reason = "unprotected not accepted"; 1634 break; 1635 } 1636 ASSERT(ah_assoc != NULL); 1637 ASSERT(ipp->ipp_auth_alg != 0); 1638 1639 if (ah_assoc->ipsa_auth_alg != 1640 ipp->ipp_auth_alg) { 1641 *counter = DROPPER(ipss, ipds_spd_bad_ahalg); 1642 *reason = "unacceptable ah alg"; 1643 ret = B_FALSE; 1644 break; 1645 } 1646 } else if (ah_assoc != NULL) { 1647 /* 1648 * Don't allow this. Check IPSEC NOTE above 1649 * ip_fanout_proto(). 1650 */ 1651 *counter = DROPPER(ipss, ipds_spd_got_ah); 1652 *reason = "unexpected AH"; 1653 ret = B_FALSE; 1654 break; 1655 } 1656 if (ipp->ipp_use_esp) { 1657 if (esp_assoc == NULL) { 1658 ret = ipsec_inbound_accept_clear(mp, ipha, 1659 ip6h); 1660 *counter = DROPPER(ipss, ipds_spd_got_clear); 1661 *reason = "unprotected not accepted"; 1662 break; 1663 } 1664 ASSERT(esp_assoc != NULL); 1665 ASSERT(ipp->ipp_encr_alg != 0); 1666 1667 if (esp_assoc->ipsa_encr_alg != 1668 ipp->ipp_encr_alg) { 1669 *counter = DROPPER(ipss, ipds_spd_bad_espealg); 1670 *reason = "unacceptable esp alg"; 1671 ret = B_FALSE; 1672 break; 1673 } 1674 /* 1675 * If the client does not need authentication, 1676 * we don't verify the alogrithm. 1677 */ 1678 if (ipp->ipp_use_espa) { 1679 if (esp_assoc->ipsa_auth_alg != 1680 ipp->ipp_esp_auth_alg) { 1681 *counter = DROPPER(ipss, 1682 ipds_spd_bad_espaalg); 1683 *reason = "unacceptable esp auth alg"; 1684 ret = B_FALSE; 1685 break; 1686 } 1687 } 1688 } else if (esp_assoc != NULL) { 1689 /* 1690 * Don't allow this. Check IPSEC NOTE above 1691 * ip_fanout_proto(). 1692 */ 1693 *counter = DROPPER(ipss, ipds_spd_got_esp); 1694 *reason = "unexpected ESP"; 1695 ret = B_FALSE; 1696 break; 1697 } 1698 if (ipp->ipp_use_se) { 1699 if (!decaps) { 1700 ret = ipsec_inbound_accept_clear(mp, ipha, 1701 ip6h); 1702 if (!ret) { 1703 /* XXX mutant? */ 1704 *counter = DROPPER(ipss, 1705 ipds_spd_bad_selfencap); 1706 *reason = "self encap not found"; 1707 break; 1708 } 1709 } 1710 } else if (decaps) { 1711 /* 1712 * XXX If the packet comes in tunneled and the 1713 * recipient does not expect it to be tunneled, it 1714 * is okay. But we drop to be consistent with the 1715 * other cases. 1716 */ 1717 *counter = DROPPER(ipss, ipds_spd_got_selfencap); 1718 *reason = "unexpected self encap"; 1719 ret = B_FALSE; 1720 break; 1721 } 1722 if (ira->ira_ipsec_action != NULL) { 1723 /* 1724 * This can happen if we do a double policy-check on 1725 * a packet 1726 * XXX XXX should fix this case! 1727 */ 1728 IPACT_REFRELE(ira->ira_ipsec_action); 1729 } 1730 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 1731 ASSERT(ira->ira_ipsec_action == NULL); 1732 IPACT_REFHOLD(ap); 1733 ira->ira_ipsec_action = ap; 1734 break; /* from switch */ 1735 } 1736 return (ret); 1737 } 1738 1739 static boolean_t 1740 spd_match_inbound_ids(ipsec_latch_t *ipl, ipsa_t *sa) 1741 { 1742 ASSERT(ipl->ipl_ids_latched == B_TRUE); 1743 return ipsid_equal(ipl->ipl_remote_cid, sa->ipsa_src_cid) && 1744 ipsid_equal(ipl->ipl_local_cid, sa->ipsa_dst_cid); 1745 } 1746 1747 /* 1748 * Takes a latched conn and an inbound packet and returns a unique_id suitable 1749 * for SA comparisons. Most of the time we will copy from the conn_t, but 1750 * there are cases when the conn_t is latched but it has wildcard selectors, 1751 * and then we need to fallback to scooping them out of the packet. 1752 * 1753 * Assume we'll never have 0 with a conn_t present, so use 0 as a failure. We 1754 * can get away with this because we only have non-zero ports/proto for 1755 * latched conn_ts. 1756 * 1757 * Ideal candidate for an "inline" keyword, as we're JUST convoluted enough 1758 * to not be a nice macro. 1759 */ 1760 static uint64_t 1761 conn_to_unique(conn_t *connp, mblk_t *data_mp, ipha_t *ipha, ip6_t *ip6h) 1762 { 1763 ipsec_selector_t sel; 1764 uint8_t ulp = connp->conn_proto; 1765 1766 ASSERT(connp->conn_latch_in_policy != NULL); 1767 1768 if ((ulp == IPPROTO_TCP || ulp == IPPROTO_UDP || ulp == IPPROTO_SCTP) && 1769 (connp->conn_fport == 0 || connp->conn_lport == 0)) { 1770 /* Slow path - we gotta grab from the packet. */ 1771 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, 1772 SEL_NONE) != SELRET_SUCCESS) { 1773 /* Failure -> have caller free packet with ENOMEM. */ 1774 return (0); 1775 } 1776 return (SA_UNIQUE_ID(sel.ips_remote_port, sel.ips_local_port, 1777 sel.ips_protocol, 0)); 1778 } 1779 1780 #ifdef DEBUG_NOT_UNTIL_6478464 1781 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, SEL_NONE) == 1782 SELRET_SUCCESS) { 1783 ASSERT(sel.ips_local_port == connp->conn_lport); 1784 ASSERT(sel.ips_remote_port == connp->conn_fport); 1785 ASSERT(sel.ips_protocol == connp->conn_proto); 1786 } 1787 ASSERT(connp->conn_proto != 0); 1788 #endif 1789 1790 return (SA_UNIQUE_ID(connp->conn_fport, connp->conn_lport, ulp, 0)); 1791 } 1792 1793 /* 1794 * Called to check policy on a latched connection. 1795 * Note that we don't dereference conn_latch or conn_ihere since the conn might 1796 * be closing. The caller passes a held ipsec_latch_t instead. 1797 */ 1798 static boolean_t 1799 ipsec_check_ipsecin_latch(ip_recv_attr_t *ira, mblk_t *mp, ipsec_latch_t *ipl, 1800 ipsec_action_t *ap, ipha_t *ipha, ip6_t *ip6h, const char **reason, 1801 kstat_named_t **counter, conn_t *connp, netstack_t *ns) 1802 { 1803 ipsec_stack_t *ipss = ns->netstack_ipsec; 1804 1805 ASSERT(ipl->ipl_ids_latched == B_TRUE); 1806 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 1807 1808 if (!(ira->ira_flags & IRAF_LOOPBACK)) { 1809 /* 1810 * Over loopback, there aren't real security associations, 1811 * so there are neither identities nor "unique" values 1812 * for us to check the packet against. 1813 */ 1814 if (ira->ira_ipsec_ah_sa != NULL) { 1815 if (!spd_match_inbound_ids(ipl, 1816 ira->ira_ipsec_ah_sa)) { 1817 *counter = DROPPER(ipss, ipds_spd_ah_badid); 1818 *reason = "AH identity mismatch"; 1819 return (B_FALSE); 1820 } 1821 } 1822 1823 if (ira->ira_ipsec_esp_sa != NULL) { 1824 if (!spd_match_inbound_ids(ipl, 1825 ira->ira_ipsec_esp_sa)) { 1826 *counter = DROPPER(ipss, ipds_spd_esp_badid); 1827 *reason = "ESP identity mismatch"; 1828 return (B_FALSE); 1829 } 1830 } 1831 1832 /* 1833 * Can fudge pkt_unique from connp because we're latched. 1834 * In DEBUG kernels (see conn_to_unique()'s implementation), 1835 * verify this even if it REALLY slows things down. 1836 */ 1837 if (!ipsec_check_ipsecin_unique(ira, reason, counter, 1838 conn_to_unique(connp, mp, ipha, ip6h), ns)) { 1839 return (B_FALSE); 1840 } 1841 } 1842 return (ipsec_check_ipsecin_action(ira, mp, ap, ipha, ip6h, reason, 1843 counter, ns)); 1844 } 1845 1846 /* 1847 * Check to see whether this secured datagram meets the policy 1848 * constraints specified in ipsp. 1849 * 1850 * Called from ipsec_check_global_policy, and ipsec_check_inbound_policy. 1851 * 1852 * Consumes a reference to ipsp. 1853 * Returns the mblk if ok. 1854 */ 1855 static mblk_t * 1856 ipsec_check_ipsecin_policy(mblk_t *data_mp, ipsec_policy_t *ipsp, 1857 ipha_t *ipha, ip6_t *ip6h, uint64_t pkt_unique, ip_recv_attr_t *ira, 1858 netstack_t *ns) 1859 { 1860 ipsec_action_t *ap; 1861 const char *reason = "no policy actions found"; 1862 ip_stack_t *ipst = ns->netstack_ip; 1863 ipsec_stack_t *ipss = ns->netstack_ipsec; 1864 kstat_named_t *counter; 1865 1866 counter = DROPPER(ipss, ipds_spd_got_secure); 1867 1868 ASSERT(ipsp != NULL); 1869 1870 ASSERT((ipha == NULL && ip6h != NULL) || 1871 (ip6h == NULL && ipha != NULL)); 1872 1873 if (ira->ira_flags & IRAF_LOOPBACK) 1874 return (ipsec_check_loopback_policy(data_mp, ira, ipsp)); 1875 1876 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 1877 1878 if (ira->ira_ipsec_action != NULL) { 1879 /* 1880 * this can happen if we do a double policy-check on a packet 1881 * Would be nice to be able to delete this test.. 1882 */ 1883 IPACT_REFRELE(ira->ira_ipsec_action); 1884 } 1885 ASSERT(ira->ira_ipsec_action == NULL); 1886 1887 if (!SA_IDS_MATCH(ira->ira_ipsec_ah_sa, ira->ira_ipsec_esp_sa)) { 1888 reason = "inbound AH and ESP identities differ"; 1889 counter = DROPPER(ipss, ipds_spd_ahesp_diffid); 1890 goto drop; 1891 } 1892 1893 if (!ipsec_check_ipsecin_unique(ira, &reason, &counter, pkt_unique, 1894 ns)) 1895 goto drop; 1896 1897 /* 1898 * Ok, now loop through the possible actions and see if any 1899 * of them work for us. 1900 */ 1901 1902 for (ap = ipsp->ipsp_act; ap != NULL; ap = ap->ipa_next) { 1903 if (ipsec_check_ipsecin_action(ira, data_mp, ap, 1904 ipha, ip6h, &reason, &counter, ns)) { 1905 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 1906 IPPOL_REFRELE(ipsp); 1907 return (data_mp); 1908 } 1909 } 1910 drop: 1911 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, SL_ERROR|SL_WARN|SL_CONSOLE, 1912 "ipsec inbound policy mismatch: %s, packet dropped\n", 1913 reason); 1914 IPPOL_REFRELE(ipsp); 1915 ASSERT(ira->ira_ipsec_action == NULL); 1916 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 1917 ip_drop_packet(data_mp, B_TRUE, NULL, counter, 1918 &ipss->ipsec_spd_dropper); 1919 return (NULL); 1920 } 1921 1922 /* 1923 * sleazy prefix-length-based compare. 1924 * another inlining candidate.. 1925 */ 1926 boolean_t 1927 ip_addr_match(uint8_t *addr1, int pfxlen, in6_addr_t *addr2p) 1928 { 1929 int offset = pfxlen>>3; 1930 int bitsleft = pfxlen & 7; 1931 uint8_t *addr2 = (uint8_t *)addr2p; 1932 1933 /* 1934 * and there was much evil.. 1935 * XXX should inline-expand the bcmp here and do this 32 bits 1936 * or 64 bits at a time.. 1937 */ 1938 return ((bcmp(addr1, addr2, offset) == 0) && 1939 ((bitsleft == 0) || 1940 (((addr1[offset] ^ addr2[offset]) & (0xff<<(8-bitsleft))) == 0))); 1941 } 1942 1943 static ipsec_policy_t * 1944 ipsec_find_policy_chain(ipsec_policy_t *best, ipsec_policy_t *chain, 1945 ipsec_selector_t *sel, boolean_t is_icmp_inv_acq) 1946 { 1947 ipsec_selkey_t *isel; 1948 ipsec_policy_t *p; 1949 int bpri = best ? best->ipsp_prio : 0; 1950 1951 for (p = chain; p != NULL; p = p->ipsp_hash.hash_next) { 1952 uint32_t valid; 1953 1954 if (p->ipsp_prio <= bpri) 1955 continue; 1956 isel = &p->ipsp_sel->ipsl_key; 1957 valid = isel->ipsl_valid; 1958 1959 if ((valid & IPSL_PROTOCOL) && 1960 (isel->ipsl_proto != sel->ips_protocol)) 1961 continue; 1962 1963 if ((valid & IPSL_REMOTE_ADDR) && 1964 !ip_addr_match((uint8_t *)&isel->ipsl_remote, 1965 isel->ipsl_remote_pfxlen, &sel->ips_remote_addr_v6)) 1966 continue; 1967 1968 if ((valid & IPSL_LOCAL_ADDR) && 1969 !ip_addr_match((uint8_t *)&isel->ipsl_local, 1970 isel->ipsl_local_pfxlen, &sel->ips_local_addr_v6)) 1971 continue; 1972 1973 if ((valid & IPSL_REMOTE_PORT) && 1974 isel->ipsl_rport != sel->ips_remote_port) 1975 continue; 1976 1977 if ((valid & IPSL_LOCAL_PORT) && 1978 isel->ipsl_lport != sel->ips_local_port) 1979 continue; 1980 1981 if (!is_icmp_inv_acq) { 1982 if ((valid & IPSL_ICMP_TYPE) && 1983 (isel->ipsl_icmp_type > sel->ips_icmp_type || 1984 isel->ipsl_icmp_type_end < sel->ips_icmp_type)) { 1985 continue; 1986 } 1987 1988 if ((valid & IPSL_ICMP_CODE) && 1989 (isel->ipsl_icmp_code > sel->ips_icmp_code || 1990 isel->ipsl_icmp_code_end < 1991 sel->ips_icmp_code)) { 1992 continue; 1993 } 1994 } else { 1995 /* 1996 * special case for icmp inverse acquire 1997 * we only want policies that aren't drop/pass 1998 */ 1999 if (p->ipsp_act->ipa_act.ipa_type != IPSEC_ACT_APPLY) 2000 continue; 2001 } 2002 2003 /* we matched all the packet-port-field selectors! */ 2004 best = p; 2005 bpri = p->ipsp_prio; 2006 } 2007 2008 return (best); 2009 } 2010 2011 /* 2012 * Try to find and return the best policy entry under a given policy 2013 * root for a given set of selectors; the first parameter "best" is 2014 * the current best policy so far. If "best" is non-null, we have a 2015 * reference to it. We return a reference to a policy; if that policy 2016 * is not the original "best", we need to release that reference 2017 * before returning. 2018 */ 2019 ipsec_policy_t * 2020 ipsec_find_policy_head(ipsec_policy_t *best, ipsec_policy_head_t *head, 2021 int direction, ipsec_selector_t *sel) 2022 { 2023 ipsec_policy_t *curbest; 2024 ipsec_policy_root_t *root; 2025 uint8_t is_icmp_inv_acq = sel->ips_is_icmp_inv_acq; 2026 int af = sel->ips_isv4 ? IPSEC_AF_V4 : IPSEC_AF_V6; 2027 2028 curbest = best; 2029 root = &head->iph_root[direction]; 2030 2031 #ifdef DEBUG 2032 if (is_icmp_inv_acq) { 2033 if (sel->ips_isv4) { 2034 if (sel->ips_protocol != IPPROTO_ICMP) { 2035 cmn_err(CE_WARN, "ipsec_find_policy_head:" 2036 " expecting icmp, got %d", 2037 sel->ips_protocol); 2038 } 2039 } else { 2040 if (sel->ips_protocol != IPPROTO_ICMPV6) { 2041 cmn_err(CE_WARN, "ipsec_find_policy_head:" 2042 " expecting icmpv6, got %d", 2043 sel->ips_protocol); 2044 } 2045 } 2046 } 2047 #endif 2048 2049 rw_enter(&head->iph_lock, RW_READER); 2050 2051 if (root->ipr_nchains > 0) { 2052 curbest = ipsec_find_policy_chain(curbest, 2053 root->ipr_hash[selector_hash(sel, root)].hash_head, sel, 2054 is_icmp_inv_acq); 2055 } 2056 curbest = ipsec_find_policy_chain(curbest, root->ipr_nonhash[af], sel, 2057 is_icmp_inv_acq); 2058 2059 /* 2060 * Adjust reference counts if we found anything new. 2061 */ 2062 if (curbest != best) { 2063 ASSERT(curbest != NULL); 2064 IPPOL_REFHOLD(curbest); 2065 2066 if (best != NULL) { 2067 IPPOL_REFRELE(best); 2068 } 2069 } 2070 2071 rw_exit(&head->iph_lock); 2072 2073 return (curbest); 2074 } 2075 2076 /* 2077 * Find the best system policy (either global or per-interface) which 2078 * applies to the given selector; look in all the relevant policy roots 2079 * to figure out which policy wins. 2080 * 2081 * Returns a reference to a policy; caller must release this 2082 * reference when done. 2083 */ 2084 ipsec_policy_t * 2085 ipsec_find_policy(int direction, const conn_t *connp, ipsec_selector_t *sel, 2086 netstack_t *ns) 2087 { 2088 ipsec_policy_t *p; 2089 ipsec_stack_t *ipss = ns->netstack_ipsec; 2090 2091 p = ipsec_find_policy_head(NULL, &ipss->ipsec_system_policy, 2092 direction, sel); 2093 if ((connp != NULL) && (connp->conn_policy != NULL)) { 2094 p = ipsec_find_policy_head(p, connp->conn_policy, 2095 direction, sel); 2096 } 2097 2098 return (p); 2099 } 2100 2101 /* 2102 * Check with global policy and see whether this inbound 2103 * packet meets the policy constraints. 2104 * 2105 * Locate appropriate policy from global policy, supplemented by the 2106 * conn's configured and/or cached policy if the conn is supplied. 2107 * 2108 * Dispatch to ipsec_check_ipsecin_policy if we have policy and an 2109 * encrypted packet to see if they match. 2110 * 2111 * Otherwise, see if the policy allows cleartext; if not, drop it on the 2112 * floor. 2113 */ 2114 mblk_t * 2115 ipsec_check_global_policy(mblk_t *data_mp, conn_t *connp, 2116 ipha_t *ipha, ip6_t *ip6h, ip_recv_attr_t *ira, netstack_t *ns) 2117 { 2118 ipsec_policy_t *p; 2119 ipsec_selector_t sel; 2120 boolean_t policy_present; 2121 kstat_named_t *counter; 2122 uint64_t pkt_unique; 2123 ip_stack_t *ipst = ns->netstack_ip; 2124 ipsec_stack_t *ipss = ns->netstack_ipsec; 2125 2126 sel.ips_is_icmp_inv_acq = 0; 2127 2128 ASSERT((ipha == NULL && ip6h != NULL) || 2129 (ip6h == NULL && ipha != NULL)); 2130 2131 if (ipha != NULL) 2132 policy_present = ipss->ipsec_inbound_v4_policy_present; 2133 else 2134 policy_present = ipss->ipsec_inbound_v6_policy_present; 2135 2136 if (!policy_present && connp == NULL) { 2137 /* 2138 * No global policy and no per-socket policy; 2139 * just pass it back (but we shouldn't get here in that case) 2140 */ 2141 return (data_mp); 2142 } 2143 2144 /* 2145 * If we have cached policy, use it. 2146 * Otherwise consult system policy. 2147 */ 2148 if ((connp != NULL) && (connp->conn_latch != NULL)) { 2149 p = connp->conn_latch_in_policy; 2150 if (p != NULL) { 2151 IPPOL_REFHOLD(p); 2152 } 2153 /* 2154 * Fudge sel for UNIQUE_ID setting below. 2155 */ 2156 pkt_unique = conn_to_unique(connp, data_mp, ipha, ip6h); 2157 } else { 2158 /* Initialize the ports in the selector */ 2159 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, 2160 SEL_NONE) == SELRET_NOMEM) { 2161 /* 2162 * Technically not a policy mismatch, but it is 2163 * an internal failure. 2164 */ 2165 ipsec_log_policy_failure(IPSEC_POLICY_MISMATCH, 2166 "ipsec_init_inbound_sel", ipha, ip6h, B_TRUE, ns); 2167 counter = DROPPER(ipss, ipds_spd_nomem); 2168 goto fail; 2169 } 2170 2171 /* 2172 * Find the policy which best applies. 2173 * 2174 * If we find global policy, we should look at both 2175 * local policy and global policy and see which is 2176 * stronger and match accordingly. 2177 * 2178 * If we don't find a global policy, check with 2179 * local policy alone. 2180 */ 2181 2182 p = ipsec_find_policy(IPSEC_TYPE_INBOUND, connp, &sel, ns); 2183 pkt_unique = SA_UNIQUE_ID(sel.ips_remote_port, 2184 sel.ips_local_port, sel.ips_protocol, 0); 2185 } 2186 2187 if (p == NULL) { 2188 if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) { 2189 /* 2190 * We have no policy; default to succeeding. 2191 * XXX paranoid system design doesn't do this. 2192 */ 2193 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2194 return (data_mp); 2195 } else { 2196 counter = DROPPER(ipss, ipds_spd_got_secure); 2197 ipsec_log_policy_failure(IPSEC_POLICY_NOT_NEEDED, 2198 "ipsec_check_global_policy", ipha, ip6h, B_TRUE, 2199 ns); 2200 goto fail; 2201 } 2202 } 2203 if (ira->ira_flags & IRAF_IPSEC_SECURE) { 2204 return (ipsec_check_ipsecin_policy(data_mp, p, ipha, ip6h, 2205 pkt_unique, ira, ns)); 2206 } 2207 if (p->ipsp_act->ipa_allow_clear) { 2208 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2209 IPPOL_REFRELE(p); 2210 return (data_mp); 2211 } 2212 IPPOL_REFRELE(p); 2213 /* 2214 * If we reach here, we will drop the packet because it failed the 2215 * global policy check because the packet was cleartext, and it 2216 * should not have been. 2217 */ 2218 ipsec_log_policy_failure(IPSEC_POLICY_MISMATCH, 2219 "ipsec_check_global_policy", ipha, ip6h, B_FALSE, ns); 2220 counter = DROPPER(ipss, ipds_spd_got_clear); 2221 2222 fail: 2223 ip_drop_packet(data_mp, B_TRUE, NULL, counter, 2224 &ipss->ipsec_spd_dropper); 2225 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2226 return (NULL); 2227 } 2228 2229 /* 2230 * We check whether an inbound datagram is a valid one 2231 * to accept in clear. If it is secure, it is the job 2232 * of IPSEC to log information appropriately if it 2233 * suspects that it may not be the real one. 2234 * 2235 * It is called only while fanning out to the ULP 2236 * where ULP accepts only secure data and the incoming 2237 * is clear. Usually we never accept clear datagrams in 2238 * such cases. ICMP is the only exception. 2239 * 2240 * NOTE : We don't call this function if the client (ULP) 2241 * is willing to accept things in clear. 2242 */ 2243 boolean_t 2244 ipsec_inbound_accept_clear(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h) 2245 { 2246 ushort_t iph_hdr_length; 2247 icmph_t *icmph; 2248 icmp6_t *icmp6; 2249 uint8_t *nexthdrp; 2250 2251 ASSERT((ipha != NULL && ip6h == NULL) || 2252 (ipha == NULL && ip6h != NULL)); 2253 2254 if (ip6h != NULL) { 2255 iph_hdr_length = ip_hdr_length_v6(mp, ip6h); 2256 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, 2257 &nexthdrp)) { 2258 return (B_FALSE); 2259 } 2260 if (*nexthdrp != IPPROTO_ICMPV6) 2261 return (B_FALSE); 2262 icmp6 = (icmp6_t *)(&mp->b_rptr[iph_hdr_length]); 2263 /* Match IPv6 ICMP policy as closely as IPv4 as possible. */ 2264 switch (icmp6->icmp6_type) { 2265 case ICMP6_PARAM_PROB: 2266 /* Corresponds to port/proto unreach in IPv4. */ 2267 case ICMP6_ECHO_REQUEST: 2268 /* Just like IPv4. */ 2269 return (B_FALSE); 2270 2271 case MLD_LISTENER_QUERY: 2272 case MLD_LISTENER_REPORT: 2273 case MLD_LISTENER_REDUCTION: 2274 /* 2275 * XXX Seperate NDD in IPv4 what about here? 2276 * Plus, mcast is important to ND. 2277 */ 2278 case ICMP6_DST_UNREACH: 2279 /* Corresponds to HOST/NET unreachable in IPv4. */ 2280 case ICMP6_PACKET_TOO_BIG: 2281 case ICMP6_ECHO_REPLY: 2282 /* These are trusted in IPv4. */ 2283 case ND_ROUTER_SOLICIT: 2284 case ND_ROUTER_ADVERT: 2285 case ND_NEIGHBOR_SOLICIT: 2286 case ND_NEIGHBOR_ADVERT: 2287 case ND_REDIRECT: 2288 /* Trust ND messages for now. */ 2289 case ICMP6_TIME_EXCEEDED: 2290 default: 2291 return (B_TRUE); 2292 } 2293 } else { 2294 /* 2295 * If it is not ICMP, fail this request. 2296 */ 2297 if (ipha->ipha_protocol != IPPROTO_ICMP) { 2298 #ifdef FRAGCACHE_DEBUG 2299 cmn_err(CE_WARN, "Dropping - ipha_proto = %d\n", 2300 ipha->ipha_protocol); 2301 #endif 2302 return (B_FALSE); 2303 } 2304 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2305 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 2306 /* 2307 * It is an insecure icmp message. Check to see whether we are 2308 * willing to accept this one. 2309 */ 2310 2311 switch (icmph->icmph_type) { 2312 case ICMP_ECHO_REPLY: 2313 case ICMP_TIME_STAMP_REPLY: 2314 case ICMP_INFO_REPLY: 2315 case ICMP_ROUTER_ADVERTISEMENT: 2316 /* 2317 * We should not encourage clear replies if this 2318 * client expects secure. If somebody is replying 2319 * in clear some mailicious user watching both the 2320 * request and reply, can do chosen-plain-text attacks. 2321 * With global policy we might be just expecting secure 2322 * but sending out clear. We don't know what the right 2323 * thing is. We can't do much here as we can't control 2324 * the sender here. Till we are sure of what to do, 2325 * accept them. 2326 */ 2327 return (B_TRUE); 2328 case ICMP_ECHO_REQUEST: 2329 case ICMP_TIME_STAMP_REQUEST: 2330 case ICMP_INFO_REQUEST: 2331 case ICMP_ADDRESS_MASK_REQUEST: 2332 case ICMP_ROUTER_SOLICITATION: 2333 case ICMP_ADDRESS_MASK_REPLY: 2334 /* 2335 * Don't accept this as somebody could be sending 2336 * us plain text to get encrypted data. If we reply, 2337 * it will lead to chosen plain text attack. 2338 */ 2339 return (B_FALSE); 2340 case ICMP_DEST_UNREACHABLE: 2341 switch (icmph->icmph_code) { 2342 case ICMP_FRAGMENTATION_NEEDED: 2343 /* 2344 * Be in sync with icmp_inbound, where we have 2345 * already set dce_pmtu 2346 */ 2347 #ifdef FRAGCACHE_DEBUG 2348 cmn_err(CE_WARN, "ICMP frag needed\n"); 2349 #endif 2350 return (B_TRUE); 2351 case ICMP_HOST_UNREACHABLE: 2352 case ICMP_NET_UNREACHABLE: 2353 /* 2354 * By accepting, we could reset a connection. 2355 * How do we solve the problem of some 2356 * intermediate router sending in-secure ICMP 2357 * messages ? 2358 */ 2359 return (B_TRUE); 2360 case ICMP_PORT_UNREACHABLE: 2361 case ICMP_PROTOCOL_UNREACHABLE: 2362 default : 2363 return (B_FALSE); 2364 } 2365 case ICMP_SOURCE_QUENCH: 2366 /* 2367 * If this is an attack, TCP will slow start 2368 * because of this. Is it very harmful ? 2369 */ 2370 return (B_TRUE); 2371 case ICMP_PARAM_PROBLEM: 2372 return (B_FALSE); 2373 case ICMP_TIME_EXCEEDED: 2374 return (B_TRUE); 2375 case ICMP_REDIRECT: 2376 return (B_FALSE); 2377 default : 2378 return (B_FALSE); 2379 } 2380 } 2381 } 2382 2383 void 2384 ipsec_latch_ids(ipsec_latch_t *ipl, ipsid_t *local, ipsid_t *remote) 2385 { 2386 mutex_enter(&ipl->ipl_lock); 2387 2388 if (ipl->ipl_ids_latched) { 2389 /* I lost, someone else got here before me */ 2390 mutex_exit(&ipl->ipl_lock); 2391 return; 2392 } 2393 2394 if (local != NULL) 2395 IPSID_REFHOLD(local); 2396 if (remote != NULL) 2397 IPSID_REFHOLD(remote); 2398 2399 ipl->ipl_local_cid = local; 2400 ipl->ipl_remote_cid = remote; 2401 ipl->ipl_ids_latched = B_TRUE; 2402 mutex_exit(&ipl->ipl_lock); 2403 } 2404 2405 void 2406 ipsec_latch_inbound(conn_t *connp, ip_recv_attr_t *ira) 2407 { 2408 ipsa_t *sa; 2409 ipsec_latch_t *ipl = connp->conn_latch; 2410 2411 if (!ipl->ipl_ids_latched) { 2412 ipsid_t *local = NULL; 2413 ipsid_t *remote = NULL; 2414 2415 if (!(ira->ira_flags & IRAF_LOOPBACK)) { 2416 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 2417 if (ira->ira_ipsec_esp_sa != NULL) 2418 sa = ira->ira_ipsec_esp_sa; 2419 else 2420 sa = ira->ira_ipsec_ah_sa; 2421 ASSERT(sa != NULL); 2422 local = sa->ipsa_dst_cid; 2423 remote = sa->ipsa_src_cid; 2424 } 2425 ipsec_latch_ids(ipl, local, remote); 2426 } 2427 if (ira->ira_flags & IRAF_IPSEC_SECURE) { 2428 if (connp->conn_latch_in_action != NULL) { 2429 /* 2430 * Previously cached action. This is probably 2431 * harmless, but in DEBUG kernels, check for 2432 * action equality. 2433 * 2434 * Preserve the existing action to preserve latch 2435 * invariance. 2436 */ 2437 ASSERT(connp->conn_latch_in_action == 2438 ira->ira_ipsec_action); 2439 return; 2440 } 2441 connp->conn_latch_in_action = ira->ira_ipsec_action; 2442 IPACT_REFHOLD(connp->conn_latch_in_action); 2443 } 2444 } 2445 2446 /* 2447 * Check whether the policy constraints are met either for an 2448 * inbound datagram; called from IP in numerous places. 2449 * 2450 * Note that this is not a chokepoint for inbound policy checks; 2451 * see also ipsec_check_ipsecin_latch() and ipsec_check_global_policy() 2452 */ 2453 mblk_t * 2454 ipsec_check_inbound_policy(mblk_t *mp, conn_t *connp, 2455 ipha_t *ipha, ip6_t *ip6h, ip_recv_attr_t *ira) 2456 { 2457 boolean_t ret; 2458 ipsec_latch_t *ipl; 2459 ipsec_action_t *ap; 2460 uint64_t unique_id; 2461 ipsec_stack_t *ipss; 2462 ip_stack_t *ipst; 2463 netstack_t *ns; 2464 ipsec_policy_head_t *policy_head; 2465 ipsec_policy_t *p = NULL; 2466 2467 ASSERT(connp != NULL); 2468 ns = connp->conn_netstack; 2469 ipss = ns->netstack_ipsec; 2470 ipst = ns->netstack_ip; 2471 2472 if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) { 2473 /* 2474 * This is the case where the incoming datagram is 2475 * cleartext and we need to see whether this client 2476 * would like to receive such untrustworthy things from 2477 * the wire. 2478 */ 2479 ASSERT(mp != NULL); 2480 2481 mutex_enter(&connp->conn_lock); 2482 if (connp->conn_state_flags & CONN_CONDEMNED) { 2483 mutex_exit(&connp->conn_lock); 2484 ip_drop_packet(mp, B_TRUE, NULL, 2485 DROPPER(ipss, ipds_spd_got_clear), 2486 &ipss->ipsec_spd_dropper); 2487 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2488 return (NULL); 2489 } 2490 if (connp->conn_latch != NULL) { 2491 /* Hold a reference in case the conn is closing */ 2492 p = connp->conn_latch_in_policy; 2493 if (p != NULL) 2494 IPPOL_REFHOLD(p); 2495 mutex_exit(&connp->conn_lock); 2496 /* 2497 * Policy is cached in the conn. 2498 */ 2499 if (p != NULL && !p->ipsp_act->ipa_allow_clear) { 2500 ret = ipsec_inbound_accept_clear(mp, 2501 ipha, ip6h); 2502 if (ret) { 2503 BUMP_MIB(&ipst->ips_ip_mib, 2504 ipsecInSucceeded); 2505 IPPOL_REFRELE(p); 2506 return (mp); 2507 } else { 2508 ipsec_log_policy_failure( 2509 IPSEC_POLICY_MISMATCH, 2510 "ipsec_check_inbound_policy", ipha, 2511 ip6h, B_FALSE, ns); 2512 ip_drop_packet(mp, B_TRUE, NULL, 2513 DROPPER(ipss, ipds_spd_got_clear), 2514 &ipss->ipsec_spd_dropper); 2515 BUMP_MIB(&ipst->ips_ip_mib, 2516 ipsecInFailed); 2517 IPPOL_REFRELE(p); 2518 return (NULL); 2519 } 2520 } else { 2521 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2522 if (p != NULL) 2523 IPPOL_REFRELE(p); 2524 return (mp); 2525 } 2526 } else { 2527 policy_head = connp->conn_policy; 2528 2529 /* Hold a reference in case the conn is closing */ 2530 if (policy_head != NULL) 2531 IPPH_REFHOLD(policy_head); 2532 mutex_exit(&connp->conn_lock); 2533 /* 2534 * As this is a non-hardbound connection we need 2535 * to look at both per-socket policy and global 2536 * policy. 2537 */ 2538 mp = ipsec_check_global_policy(mp, connp, 2539 ipha, ip6h, ira, ns); 2540 if (policy_head != NULL) 2541 IPPH_REFRELE(policy_head, ns); 2542 return (mp); 2543 } 2544 } 2545 2546 mutex_enter(&connp->conn_lock); 2547 /* Connection is closing */ 2548 if (connp->conn_state_flags & CONN_CONDEMNED) { 2549 mutex_exit(&connp->conn_lock); 2550 ip_drop_packet(mp, B_TRUE, NULL, 2551 DROPPER(ipss, ipds_spd_got_clear), 2552 &ipss->ipsec_spd_dropper); 2553 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2554 return (NULL); 2555 } 2556 2557 /* 2558 * Once a connection is latched it remains so for life, the conn_latch 2559 * pointer on the conn has not changed, simply initializing ipl here 2560 * as the earlier initialization was done only in the cleartext case. 2561 */ 2562 if ((ipl = connp->conn_latch) == NULL) { 2563 mblk_t *retmp; 2564 policy_head = connp->conn_policy; 2565 2566 /* Hold a reference in case the conn is closing */ 2567 if (policy_head != NULL) 2568 IPPH_REFHOLD(policy_head); 2569 mutex_exit(&connp->conn_lock); 2570 /* 2571 * We don't have policies cached in the conn 2572 * for this stream. So, look at the global 2573 * policy. It will check against conn or global 2574 * depending on whichever is stronger. 2575 */ 2576 retmp = ipsec_check_global_policy(mp, connp, 2577 ipha, ip6h, ira, ns); 2578 if (policy_head != NULL) 2579 IPPH_REFRELE(policy_head, ns); 2580 return (retmp); 2581 } 2582 2583 IPLATCH_REFHOLD(ipl); 2584 /* Hold reference on conn_latch_in_action in case conn is closing */ 2585 ap = connp->conn_latch_in_action; 2586 if (ap != NULL) 2587 IPACT_REFHOLD(ap); 2588 mutex_exit(&connp->conn_lock); 2589 2590 if (ap != NULL) { 2591 /* Policy is cached & latched; fast(er) path */ 2592 const char *reason; 2593 kstat_named_t *counter; 2594 2595 if (ipsec_check_ipsecin_latch(ira, mp, ipl, ap, 2596 ipha, ip6h, &reason, &counter, connp, ns)) { 2597 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2598 IPLATCH_REFRELE(ipl); 2599 IPACT_REFRELE(ap); 2600 return (mp); 2601 } 2602 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, 2603 SL_ERROR|SL_WARN|SL_CONSOLE, 2604 "ipsec inbound policy mismatch: %s, packet dropped\n", 2605 reason); 2606 ip_drop_packet(mp, B_TRUE, NULL, counter, 2607 &ipss->ipsec_spd_dropper); 2608 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2609 IPLATCH_REFRELE(ipl); 2610 IPACT_REFRELE(ap); 2611 return (NULL); 2612 } 2613 if ((p = connp->conn_latch_in_policy) == NULL) { 2614 ipsec_weird_null_inbound_policy++; 2615 IPLATCH_REFRELE(ipl); 2616 return (mp); 2617 } 2618 2619 unique_id = conn_to_unique(connp, mp, ipha, ip6h); 2620 IPPOL_REFHOLD(p); 2621 mp = ipsec_check_ipsecin_policy(mp, p, ipha, ip6h, unique_id, ira, ns); 2622 /* 2623 * NOTE: ipsecIn{Failed,Succeeeded} bumped by 2624 * ipsec_check_ipsecin_policy(). 2625 */ 2626 if (mp != NULL) 2627 ipsec_latch_inbound(connp, ira); 2628 IPLATCH_REFRELE(ipl); 2629 return (mp); 2630 } 2631 2632 /* 2633 * Handle all sorts of cases like tunnel-mode and ICMP. 2634 */ 2635 static int 2636 prepended_length(mblk_t *mp, uintptr_t hptr) 2637 { 2638 int rc = 0; 2639 2640 while (mp != NULL) { 2641 if (hptr >= (uintptr_t)mp->b_rptr && hptr < 2642 (uintptr_t)mp->b_wptr) { 2643 rc += (int)(hptr - (uintptr_t)mp->b_rptr); 2644 break; /* out of while loop */ 2645 } 2646 rc += (int)MBLKL(mp); 2647 mp = mp->b_cont; 2648 } 2649 2650 if (mp == NULL) { 2651 /* 2652 * IF (big IF) we make it here by naturally exiting the loop, 2653 * then ip6h isn't in the mblk chain "mp" at all. 2654 * 2655 * The only case where this happens is with a reversed IP 2656 * header that gets passed up by inbound ICMP processing. 2657 * This unfortunately triggers longstanding bug 6478464. For 2658 * now, just pass up 0 for the answer. 2659 */ 2660 #ifdef DEBUG_NOT_UNTIL_6478464 2661 ASSERT(mp != NULL); 2662 #endif 2663 rc = 0; 2664 } 2665 2666 return (rc); 2667 } 2668 2669 /* 2670 * Returns: 2671 * 2672 * SELRET_NOMEM --> msgpullup() needed to gather things failed. 2673 * SELRET_BADPKT --> If we're being called after tunnel-mode fragment 2674 * gathering, the initial fragment is too short for 2675 * useful data. Only returned if SEL_TUNNEL_FIRSTFRAG is 2676 * set. 2677 * SELRET_SUCCESS --> "sel" now has initialized IPsec selector data. 2678 * SELRET_TUNFRAG --> This is a fragment in a tunnel-mode packet. Caller 2679 * should put this packet in a fragment-gathering queue. 2680 * Only returned if SEL_TUNNEL_MODE and SEL_PORT_POLICY 2681 * is set. 2682 * 2683 * Note that ipha/ip6h can be in a different mblk (mp->b_cont) in the case 2684 * of tunneled packets. 2685 * Also, mp->b_rptr can be an ICMP error where ipha/ip6h is the packet in 2686 * error past the ICMP error. 2687 */ 2688 static selret_t 2689 ipsec_init_inbound_sel(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha, 2690 ip6_t *ip6h, uint8_t sel_flags) 2691 { 2692 uint16_t *ports; 2693 int outer_hdr_len = 0; /* For ICMP or tunnel-mode cases... */ 2694 ushort_t hdr_len; 2695 mblk_t *spare_mp = NULL; 2696 uint8_t *nexthdrp, *transportp; 2697 uint8_t nexthdr; 2698 uint8_t icmp_proto; 2699 ip_pkt_t ipp; 2700 boolean_t port_policy_present = (sel_flags & SEL_PORT_POLICY); 2701 boolean_t is_icmp = (sel_flags & SEL_IS_ICMP); 2702 boolean_t tunnel_mode = (sel_flags & SEL_TUNNEL_MODE); 2703 boolean_t post_frag = (sel_flags & SEL_POST_FRAG); 2704 2705 ASSERT((ipha == NULL && ip6h != NULL) || 2706 (ipha != NULL && ip6h == NULL)); 2707 2708 if (ip6h != NULL) { 2709 outer_hdr_len = prepended_length(mp, (uintptr_t)ip6h); 2710 nexthdr = ip6h->ip6_nxt; 2711 icmp_proto = IPPROTO_ICMPV6; 2712 sel->ips_isv4 = B_FALSE; 2713 sel->ips_local_addr_v6 = ip6h->ip6_dst; 2714 sel->ips_remote_addr_v6 = ip6h->ip6_src; 2715 2716 bzero(&ipp, sizeof (ipp)); 2717 2718 switch (nexthdr) { 2719 case IPPROTO_HOPOPTS: 2720 case IPPROTO_ROUTING: 2721 case IPPROTO_DSTOPTS: 2722 case IPPROTO_FRAGMENT: 2723 /* 2724 * Use ip_hdr_length_nexthdr_v6(). And have a spare 2725 * mblk that's contiguous to feed it 2726 */ 2727 if ((spare_mp = msgpullup(mp, -1)) == NULL) 2728 return (SELRET_NOMEM); 2729 if (!ip_hdr_length_nexthdr_v6(spare_mp, 2730 (ip6_t *)(spare_mp->b_rptr + outer_hdr_len), 2731 &hdr_len, &nexthdrp)) { 2732 /* Malformed packet - caller frees. */ 2733 ipsec_freemsg_chain(spare_mp); 2734 return (SELRET_BADPKT); 2735 } 2736 /* Repopulate now that we have the whole packet */ 2737 ip6h = (ip6_t *)(spare_mp->b_rptr + outer_hdr_len); 2738 (void) ip_find_hdr_v6(spare_mp, ip6h, B_FALSE, &ipp, 2739 NULL); 2740 nexthdr = *nexthdrp; 2741 /* We can just extract based on hdr_len now. */ 2742 break; 2743 default: 2744 (void) ip_find_hdr_v6(mp, ip6h, B_FALSE, &ipp, NULL); 2745 hdr_len = IPV6_HDR_LEN; 2746 break; 2747 } 2748 if (port_policy_present && IS_V6_FRAGMENT(ipp) && !is_icmp) { 2749 /* IPv6 Fragment */ 2750 ipsec_freemsg_chain(spare_mp); 2751 return (SELRET_TUNFRAG); 2752 } 2753 transportp = (uint8_t *)ip6h + hdr_len; 2754 } else { 2755 outer_hdr_len = prepended_length(mp, (uintptr_t)ipha); 2756 icmp_proto = IPPROTO_ICMP; 2757 sel->ips_isv4 = B_TRUE; 2758 sel->ips_local_addr_v4 = ipha->ipha_dst; 2759 sel->ips_remote_addr_v4 = ipha->ipha_src; 2760 nexthdr = ipha->ipha_protocol; 2761 hdr_len = IPH_HDR_LENGTH(ipha); 2762 2763 if (port_policy_present && 2764 IS_V4_FRAGMENT(ipha->ipha_fragment_offset_and_flags) && 2765 !is_icmp) { 2766 /* IPv4 Fragment */ 2767 ipsec_freemsg_chain(spare_mp); 2768 return (SELRET_TUNFRAG); 2769 } 2770 transportp = (uint8_t *)ipha + hdr_len; 2771 } 2772 sel->ips_protocol = nexthdr; 2773 2774 if ((nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP && 2775 nexthdr != IPPROTO_SCTP && nexthdr != icmp_proto) || 2776 (!port_policy_present && !post_frag && tunnel_mode)) { 2777 sel->ips_remote_port = sel->ips_local_port = 0; 2778 ipsec_freemsg_chain(spare_mp); 2779 return (SELRET_SUCCESS); 2780 } 2781 2782 if (transportp + 4 > mp->b_wptr) { 2783 /* If we didn't pullup a copy already, do so now. */ 2784 /* 2785 * XXX performance, will upper-layers frequently split TCP/UDP 2786 * apart from IP or options? If so, perhaps we should revisit 2787 * the spare_mp strategy. 2788 */ 2789 ipsec_hdr_pullup_needed++; 2790 if (spare_mp == NULL && 2791 (spare_mp = msgpullup(mp, -1)) == NULL) { 2792 return (SELRET_NOMEM); 2793 } 2794 transportp = &spare_mp->b_rptr[hdr_len + outer_hdr_len]; 2795 } 2796 2797 if (nexthdr == icmp_proto) { 2798 sel->ips_icmp_type = *transportp++; 2799 sel->ips_icmp_code = *transportp; 2800 sel->ips_remote_port = sel->ips_local_port = 0; 2801 } else { 2802 ports = (uint16_t *)transportp; 2803 sel->ips_remote_port = *ports++; 2804 sel->ips_local_port = *ports; 2805 } 2806 ipsec_freemsg_chain(spare_mp); 2807 return (SELRET_SUCCESS); 2808 } 2809 2810 /* 2811 * This is called with a b_next chain of messages from the fragcache code, 2812 * hence it needs to discard a chain on error. 2813 */ 2814 static boolean_t 2815 ipsec_init_outbound_ports(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha, 2816 ip6_t *ip6h, int outer_hdr_len, ipsec_stack_t *ipss) 2817 { 2818 /* 2819 * XXX cut&paste shared with ipsec_init_inbound_sel 2820 */ 2821 uint16_t *ports; 2822 ushort_t hdr_len; 2823 mblk_t *spare_mp = NULL; 2824 uint8_t *nexthdrp; 2825 uint8_t nexthdr; 2826 uint8_t *typecode; 2827 uint8_t check_proto; 2828 2829 ASSERT((ipha == NULL && ip6h != NULL) || 2830 (ipha != NULL && ip6h == NULL)); 2831 2832 if (ip6h != NULL) { 2833 check_proto = IPPROTO_ICMPV6; 2834 nexthdr = ip6h->ip6_nxt; 2835 switch (nexthdr) { 2836 case IPPROTO_HOPOPTS: 2837 case IPPROTO_ROUTING: 2838 case IPPROTO_DSTOPTS: 2839 case IPPROTO_FRAGMENT: 2840 /* 2841 * Use ip_hdr_length_nexthdr_v6(). And have a spare 2842 * mblk that's contiguous to feed it 2843 */ 2844 spare_mp = msgpullup(mp, -1); 2845 if (spare_mp == NULL || 2846 !ip_hdr_length_nexthdr_v6(spare_mp, 2847 (ip6_t *)(spare_mp->b_rptr + outer_hdr_len), 2848 &hdr_len, &nexthdrp)) { 2849 /* Always works, even if NULL. */ 2850 ipsec_freemsg_chain(spare_mp); 2851 ip_drop_packet_chain(mp, B_FALSE, NULL, 2852 DROPPER(ipss, ipds_spd_nomem), 2853 &ipss->ipsec_spd_dropper); 2854 return (B_FALSE); 2855 } else { 2856 nexthdr = *nexthdrp; 2857 /* We can just extract based on hdr_len now. */ 2858 } 2859 break; 2860 default: 2861 hdr_len = IPV6_HDR_LEN; 2862 break; 2863 } 2864 } else { 2865 check_proto = IPPROTO_ICMP; 2866 hdr_len = IPH_HDR_LENGTH(ipha); 2867 nexthdr = ipha->ipha_protocol; 2868 } 2869 2870 sel->ips_protocol = nexthdr; 2871 if (nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP && 2872 nexthdr != IPPROTO_SCTP && nexthdr != check_proto) { 2873 sel->ips_local_port = sel->ips_remote_port = 0; 2874 ipsec_freemsg_chain(spare_mp); /* Always works, even if NULL */ 2875 return (B_TRUE); 2876 } 2877 2878 if (&mp->b_rptr[hdr_len] + 4 + outer_hdr_len > mp->b_wptr) { 2879 /* If we didn't pullup a copy already, do so now. */ 2880 /* 2881 * XXX performance, will upper-layers frequently split TCP/UDP 2882 * apart from IP or options? If so, perhaps we should revisit 2883 * the spare_mp strategy. 2884 * 2885 * XXX should this be msgpullup(mp, hdr_len+4) ??? 2886 */ 2887 if (spare_mp == NULL && 2888 (spare_mp = msgpullup(mp, -1)) == NULL) { 2889 ip_drop_packet_chain(mp, B_FALSE, NULL, 2890 DROPPER(ipss, ipds_spd_nomem), 2891 &ipss->ipsec_spd_dropper); 2892 return (B_FALSE); 2893 } 2894 ports = (uint16_t *)&spare_mp->b_rptr[hdr_len + outer_hdr_len]; 2895 } else { 2896 ports = (uint16_t *)&mp->b_rptr[hdr_len + outer_hdr_len]; 2897 } 2898 2899 if (nexthdr == check_proto) { 2900 typecode = (uint8_t *)ports; 2901 sel->ips_icmp_type = *typecode++; 2902 sel->ips_icmp_code = *typecode; 2903 sel->ips_remote_port = sel->ips_local_port = 0; 2904 } else { 2905 sel->ips_local_port = *ports++; 2906 sel->ips_remote_port = *ports; 2907 } 2908 ipsec_freemsg_chain(spare_mp); /* Always works, even if NULL */ 2909 return (B_TRUE); 2910 } 2911 2912 /* 2913 * Prepend an mblk with a ipsec_crypto_t to the message chain. 2914 * Frees the argument and returns NULL should the allocation fail. 2915 * Returns the pointer to the crypto data part. 2916 */ 2917 mblk_t * 2918 ipsec_add_crypto_data(mblk_t *data_mp, ipsec_crypto_t **icp) 2919 { 2920 mblk_t *mp; 2921 2922 mp = allocb(sizeof (ipsec_crypto_t), BPRI_MED); 2923 if (mp == NULL) { 2924 freemsg(data_mp); 2925 return (NULL); 2926 } 2927 bzero(mp->b_rptr, sizeof (ipsec_crypto_t)); 2928 mp->b_wptr += sizeof (ipsec_crypto_t); 2929 mp->b_cont = data_mp; 2930 mp->b_datap->db_type = M_EVENT; /* For ASSERT */ 2931 *icp = (ipsec_crypto_t *)mp->b_rptr; 2932 return (mp); 2933 } 2934 2935 /* 2936 * Remove what was prepended above. Return b_cont and a pointer to the 2937 * crypto data. 2938 * The caller must call ipsec_free_crypto_data for mblk once it is done 2939 * with the crypto data. 2940 */ 2941 mblk_t * 2942 ipsec_remove_crypto_data(mblk_t *crypto_mp, ipsec_crypto_t **icp) 2943 { 2944 ASSERT(crypto_mp->b_datap->db_type == M_EVENT); 2945 ASSERT(MBLKL(crypto_mp) == sizeof (ipsec_crypto_t)); 2946 2947 *icp = (ipsec_crypto_t *)crypto_mp->b_rptr; 2948 return (crypto_mp->b_cont); 2949 } 2950 2951 /* 2952 * Free what was prepended above. Return b_cont. 2953 */ 2954 mblk_t * 2955 ipsec_free_crypto_data(mblk_t *crypto_mp) 2956 { 2957 mblk_t *mp; 2958 2959 ASSERT(crypto_mp->b_datap->db_type == M_EVENT); 2960 ASSERT(MBLKL(crypto_mp) == sizeof (ipsec_crypto_t)); 2961 2962 mp = crypto_mp->b_cont; 2963 freeb(crypto_mp); 2964 return (mp); 2965 } 2966 2967 /* 2968 * Create an ipsec_action_t based on the way an inbound packet was protected. 2969 * Used to reflect traffic back to a sender. 2970 * 2971 * We don't bother interning the action into the hash table. 2972 */ 2973 ipsec_action_t * 2974 ipsec_in_to_out_action(ip_recv_attr_t *ira) 2975 { 2976 ipsa_t *ah_assoc, *esp_assoc; 2977 uint_t auth_alg = 0, encr_alg = 0, espa_alg = 0; 2978 ipsec_action_t *ap; 2979 boolean_t unique; 2980 2981 ap = kmem_cache_alloc(ipsec_action_cache, KM_NOSLEEP); 2982 2983 if (ap == NULL) 2984 return (NULL); 2985 2986 bzero(ap, sizeof (*ap)); 2987 HASH_NULL(ap, ipa_hash); 2988 ap->ipa_next = NULL; 2989 ap->ipa_refs = 1; 2990 2991 /* 2992 * Get the algorithms that were used for this packet. 2993 */ 2994 ap->ipa_act.ipa_type = IPSEC_ACT_APPLY; 2995 ap->ipa_act.ipa_log = 0; 2996 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 2997 2998 ah_assoc = ira->ira_ipsec_ah_sa; 2999 ap->ipa_act.ipa_apply.ipp_use_ah = (ah_assoc != NULL); 3000 3001 esp_assoc = ira->ira_ipsec_esp_sa; 3002 ap->ipa_act.ipa_apply.ipp_use_esp = (esp_assoc != NULL); 3003 3004 if (esp_assoc != NULL) { 3005 encr_alg = esp_assoc->ipsa_encr_alg; 3006 espa_alg = esp_assoc->ipsa_auth_alg; 3007 ap->ipa_act.ipa_apply.ipp_use_espa = (espa_alg != 0); 3008 } 3009 if (ah_assoc != NULL) 3010 auth_alg = ah_assoc->ipsa_auth_alg; 3011 3012 ap->ipa_act.ipa_apply.ipp_encr_alg = (uint8_t)encr_alg; 3013 ap->ipa_act.ipa_apply.ipp_auth_alg = (uint8_t)auth_alg; 3014 ap->ipa_act.ipa_apply.ipp_esp_auth_alg = (uint8_t)espa_alg; 3015 ap->ipa_act.ipa_apply.ipp_use_se = 3016 !!(ira->ira_flags & IRAF_IPSEC_DECAPS); 3017 unique = B_FALSE; 3018 3019 if (esp_assoc != NULL) { 3020 ap->ipa_act.ipa_apply.ipp_espa_minbits = 3021 esp_assoc->ipsa_authkeybits; 3022 ap->ipa_act.ipa_apply.ipp_espa_maxbits = 3023 esp_assoc->ipsa_authkeybits; 3024 ap->ipa_act.ipa_apply.ipp_espe_minbits = 3025 esp_assoc->ipsa_encrkeybits; 3026 ap->ipa_act.ipa_apply.ipp_espe_maxbits = 3027 esp_assoc->ipsa_encrkeybits; 3028 ap->ipa_act.ipa_apply.ipp_km_proto = esp_assoc->ipsa_kmp; 3029 ap->ipa_act.ipa_apply.ipp_km_cookie = esp_assoc->ipsa_kmc; 3030 if (esp_assoc->ipsa_flags & IPSA_F_UNIQUE) 3031 unique = B_TRUE; 3032 } 3033 if (ah_assoc != NULL) { 3034 ap->ipa_act.ipa_apply.ipp_ah_minbits = 3035 ah_assoc->ipsa_authkeybits; 3036 ap->ipa_act.ipa_apply.ipp_ah_maxbits = 3037 ah_assoc->ipsa_authkeybits; 3038 ap->ipa_act.ipa_apply.ipp_km_proto = ah_assoc->ipsa_kmp; 3039 ap->ipa_act.ipa_apply.ipp_km_cookie = ah_assoc->ipsa_kmc; 3040 if (ah_assoc->ipsa_flags & IPSA_F_UNIQUE) 3041 unique = B_TRUE; 3042 } 3043 ap->ipa_act.ipa_apply.ipp_use_unique = unique; 3044 ap->ipa_want_unique = unique; 3045 ap->ipa_allow_clear = B_FALSE; 3046 ap->ipa_want_se = !!(ira->ira_flags & IRAF_IPSEC_DECAPS); 3047 ap->ipa_want_ah = (ah_assoc != NULL); 3048 ap->ipa_want_esp = (esp_assoc != NULL); 3049 3050 ap->ipa_ovhd = ipsec_act_ovhd(&ap->ipa_act); 3051 3052 ap->ipa_act.ipa_apply.ipp_replay_depth = 0; /* don't care */ 3053 3054 return (ap); 3055 } 3056 3057 3058 /* 3059 * Compute the worst-case amount of extra space required by an action. 3060 * Note that, because of the ESP considerations listed below, this is 3061 * actually not the same as the best-case reduction in the MTU; in the 3062 * future, we should pass additional information to this function to 3063 * allow the actual MTU impact to be computed. 3064 * 3065 * AH: Revisit this if we implement algorithms with 3066 * a verifier size of more than 12 bytes. 3067 * 3068 * ESP: A more exact but more messy computation would take into 3069 * account the interaction between the cipher block size and the 3070 * effective MTU, yielding the inner payload size which reflects a 3071 * packet with *minimum* ESP padding.. 3072 */ 3073 int32_t 3074 ipsec_act_ovhd(const ipsec_act_t *act) 3075 { 3076 int32_t overhead = 0; 3077 3078 if (act->ipa_type == IPSEC_ACT_APPLY) { 3079 const ipsec_prot_t *ipp = &act->ipa_apply; 3080 3081 if (ipp->ipp_use_ah) 3082 overhead += IPSEC_MAX_AH_HDR_SIZE; 3083 if (ipp->ipp_use_esp) { 3084 overhead += IPSEC_MAX_ESP_HDR_SIZE; 3085 overhead += sizeof (struct udphdr); 3086 } 3087 if (ipp->ipp_use_se) 3088 overhead += IP_SIMPLE_HDR_LENGTH; 3089 } 3090 return (overhead); 3091 } 3092 3093 /* 3094 * This hash function is used only when creating policies and thus is not 3095 * performance-critical for packet flows. 3096 * 3097 * Future work: canonicalize the structures hashed with this (i.e., 3098 * zeroize padding) so the hash works correctly. 3099 */ 3100 /* ARGSUSED */ 3101 static uint32_t 3102 policy_hash(int size, const void *start, const void *end) 3103 { 3104 return (0); 3105 } 3106 3107 3108 /* 3109 * Hash function macros for each address type. 3110 * 3111 * The IPV6 hash function assumes that the low order 32-bits of the 3112 * address (typically containing the low order 24 bits of the mac 3113 * address) are reasonably well-distributed. Revisit this if we run 3114 * into trouble from lots of collisions on ::1 addresses and the like 3115 * (seems unlikely). 3116 */ 3117 #define IPSEC_IPV4_HASH(a, n) ((a) % (n)) 3118 #define IPSEC_IPV6_HASH(a, n) (((a).s6_addr32[3]) % (n)) 3119 3120 /* 3121 * These two hash functions should produce coordinated values 3122 * but have slightly different roles. 3123 */ 3124 static uint32_t 3125 selkey_hash(const ipsec_selkey_t *selkey, netstack_t *ns) 3126 { 3127 uint32_t valid = selkey->ipsl_valid; 3128 ipsec_stack_t *ipss = ns->netstack_ipsec; 3129 3130 if (!(valid & IPSL_REMOTE_ADDR)) 3131 return (IPSEC_SEL_NOHASH); 3132 3133 if (valid & IPSL_IPV4) { 3134 if (selkey->ipsl_remote_pfxlen == 32) { 3135 return (IPSEC_IPV4_HASH(selkey->ipsl_remote.ipsad_v4, 3136 ipss->ipsec_spd_hashsize)); 3137 } 3138 } 3139 if (valid & IPSL_IPV6) { 3140 if (selkey->ipsl_remote_pfxlen == 128) { 3141 return (IPSEC_IPV6_HASH(selkey->ipsl_remote.ipsad_v6, 3142 ipss->ipsec_spd_hashsize)); 3143 } 3144 } 3145 return (IPSEC_SEL_NOHASH); 3146 } 3147 3148 static uint32_t 3149 selector_hash(ipsec_selector_t *sel, ipsec_policy_root_t *root) 3150 { 3151 if (sel->ips_isv4) { 3152 return (IPSEC_IPV4_HASH(sel->ips_remote_addr_v4, 3153 root->ipr_nchains)); 3154 } 3155 return (IPSEC_IPV6_HASH(sel->ips_remote_addr_v6, root->ipr_nchains)); 3156 } 3157 3158 /* 3159 * Intern actions into the action hash table. 3160 */ 3161 ipsec_action_t * 3162 ipsec_act_find(const ipsec_act_t *a, int n, netstack_t *ns) 3163 { 3164 int i; 3165 uint32_t hval; 3166 ipsec_action_t *ap; 3167 ipsec_action_t *prev = NULL; 3168 int32_t overhead, maxovhd = 0; 3169 boolean_t allow_clear = B_FALSE; 3170 boolean_t want_ah = B_FALSE; 3171 boolean_t want_esp = B_FALSE; 3172 boolean_t want_se = B_FALSE; 3173 boolean_t want_unique = B_FALSE; 3174 ipsec_stack_t *ipss = ns->netstack_ipsec; 3175 3176 /* 3177 * TODO: should canonicalize a[] (i.e., zeroize any padding) 3178 * so we can use a non-trivial policy_hash function. 3179 */ 3180 for (i = n-1; i >= 0; i--) { 3181 hval = policy_hash(IPSEC_ACTION_HASH_SIZE, &a[i], &a[n]); 3182 3183 HASH_LOCK(ipss->ipsec_action_hash, hval); 3184 3185 for (HASH_ITERATE(ap, ipa_hash, 3186 ipss->ipsec_action_hash, hval)) { 3187 if (bcmp(&ap->ipa_act, &a[i], sizeof (*a)) != 0) 3188 continue; 3189 if (ap->ipa_next != prev) 3190 continue; 3191 break; 3192 } 3193 if (ap != NULL) { 3194 HASH_UNLOCK(ipss->ipsec_action_hash, hval); 3195 prev = ap; 3196 continue; 3197 } 3198 /* 3199 * need to allocate a new one.. 3200 */ 3201 ap = kmem_cache_alloc(ipsec_action_cache, KM_NOSLEEP); 3202 if (ap == NULL) { 3203 HASH_UNLOCK(ipss->ipsec_action_hash, hval); 3204 if (prev != NULL) 3205 ipsec_action_free(prev); 3206 return (NULL); 3207 } 3208 HASH_INSERT(ap, ipa_hash, ipss->ipsec_action_hash, hval); 3209 3210 ap->ipa_next = prev; 3211 ap->ipa_act = a[i]; 3212 3213 overhead = ipsec_act_ovhd(&a[i]); 3214 if (maxovhd < overhead) 3215 maxovhd = overhead; 3216 3217 if ((a[i].ipa_type == IPSEC_ACT_BYPASS) || 3218 (a[i].ipa_type == IPSEC_ACT_CLEAR)) 3219 allow_clear = B_TRUE; 3220 if (a[i].ipa_type == IPSEC_ACT_APPLY) { 3221 const ipsec_prot_t *ipp = &a[i].ipa_apply; 3222 3223 ASSERT(ipp->ipp_use_ah || ipp->ipp_use_esp); 3224 want_ah |= ipp->ipp_use_ah; 3225 want_esp |= ipp->ipp_use_esp; 3226 want_se |= ipp->ipp_use_se; 3227 want_unique |= ipp->ipp_use_unique; 3228 } 3229 ap->ipa_allow_clear = allow_clear; 3230 ap->ipa_want_ah = want_ah; 3231 ap->ipa_want_esp = want_esp; 3232 ap->ipa_want_se = want_se; 3233 ap->ipa_want_unique = want_unique; 3234 ap->ipa_refs = 1; /* from the hash table */ 3235 ap->ipa_ovhd = maxovhd; 3236 if (prev) 3237 prev->ipa_refs++; 3238 prev = ap; 3239 HASH_UNLOCK(ipss->ipsec_action_hash, hval); 3240 } 3241 3242 ap->ipa_refs++; /* caller's reference */ 3243 3244 return (ap); 3245 } 3246 3247 /* 3248 * Called when refcount goes to 0, indicating that all references to this 3249 * node are gone. 3250 * 3251 * This does not unchain the action from the hash table. 3252 */ 3253 void 3254 ipsec_action_free(ipsec_action_t *ap) 3255 { 3256 for (;;) { 3257 ipsec_action_t *np = ap->ipa_next; 3258 ASSERT(ap->ipa_refs == 0); 3259 ASSERT(ap->ipa_hash.hash_pp == NULL); 3260 kmem_cache_free(ipsec_action_cache, ap); 3261 ap = np; 3262 /* Inlined IPACT_REFRELE -- avoid recursion */ 3263 if (ap == NULL) 3264 break; 3265 membar_exit(); 3266 if (atomic_dec_32_nv(&(ap)->ipa_refs) != 0) 3267 break; 3268 /* End inlined IPACT_REFRELE */ 3269 } 3270 } 3271 3272 /* 3273 * Called when the action hash table goes away. 3274 * 3275 * The actions can be queued on an mblk with ipsec_in or 3276 * ipsec_out, hence the actions might still be around. 3277 * But we decrement ipa_refs here since we no longer have 3278 * a reference to the action from the hash table. 3279 */ 3280 static void 3281 ipsec_action_free_table(ipsec_action_t *ap) 3282 { 3283 while (ap != NULL) { 3284 ipsec_action_t *np = ap->ipa_next; 3285 3286 /* FIXME: remove? */ 3287 (void) printf("ipsec_action_free_table(%p) ref %d\n", 3288 (void *)ap, ap->ipa_refs); 3289 ASSERT(ap->ipa_refs > 0); 3290 IPACT_REFRELE(ap); 3291 ap = np; 3292 } 3293 } 3294 3295 /* 3296 * Need to walk all stack instances since the reclaim function 3297 * is global for all instances 3298 */ 3299 /* ARGSUSED */ 3300 static void 3301 ipsec_action_reclaim(void *arg) 3302 { 3303 netstack_handle_t nh; 3304 netstack_t *ns; 3305 ipsec_stack_t *ipss; 3306 3307 netstack_next_init(&nh); 3308 while ((ns = netstack_next(&nh)) != NULL) { 3309 /* 3310 * netstack_next() can return a netstack_t with a NULL 3311 * netstack_ipsec at boot time. 3312 */ 3313 if ((ipss = ns->netstack_ipsec) == NULL) { 3314 netstack_rele(ns); 3315 continue; 3316 } 3317 ipsec_action_reclaim_stack(ipss); 3318 netstack_rele(ns); 3319 } 3320 netstack_next_fini(&nh); 3321 } 3322 3323 /* 3324 * Periodically sweep action hash table for actions with refcount==1, and 3325 * nuke them. We cannot do this "on demand" (i.e., from IPACT_REFRELE) 3326 * because we can't close the race between another thread finding the action 3327 * in the hash table without holding the bucket lock during IPACT_REFRELE. 3328 * Instead, we run this function sporadically to clean up after ourselves; 3329 * we also set it as the "reclaim" function for the action kmem_cache. 3330 * 3331 * Note that it may take several passes of ipsec_action_gc() to free all 3332 * "stale" actions. 3333 */ 3334 static void 3335 ipsec_action_reclaim_stack(ipsec_stack_t *ipss) 3336 { 3337 int i; 3338 3339 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) { 3340 ipsec_action_t *ap, *np; 3341 3342 /* skip the lock if nobody home */ 3343 if (ipss->ipsec_action_hash[i].hash_head == NULL) 3344 continue; 3345 3346 HASH_LOCK(ipss->ipsec_action_hash, i); 3347 for (ap = ipss->ipsec_action_hash[i].hash_head; 3348 ap != NULL; ap = np) { 3349 ASSERT(ap->ipa_refs > 0); 3350 np = ap->ipa_hash.hash_next; 3351 if (ap->ipa_refs > 1) 3352 continue; 3353 HASH_UNCHAIN(ap, ipa_hash, 3354 ipss->ipsec_action_hash, i); 3355 IPACT_REFRELE(ap); 3356 } 3357 HASH_UNLOCK(ipss->ipsec_action_hash, i); 3358 } 3359 } 3360 3361 /* 3362 * Intern a selector set into the selector set hash table. 3363 * This is simpler than the actions case.. 3364 */ 3365 static ipsec_sel_t * 3366 ipsec_find_sel(ipsec_selkey_t *selkey, netstack_t *ns) 3367 { 3368 ipsec_sel_t *sp; 3369 uint32_t hval, bucket; 3370 ipsec_stack_t *ipss = ns->netstack_ipsec; 3371 3372 /* 3373 * Exactly one AF bit should be set in selkey. 3374 */ 3375 ASSERT(!(selkey->ipsl_valid & IPSL_IPV4) ^ 3376 !(selkey->ipsl_valid & IPSL_IPV6)); 3377 3378 hval = selkey_hash(selkey, ns); 3379 /* Set pol_hval to uninitialized until we put it in a polhead. */ 3380 selkey->ipsl_sel_hval = hval; 3381 3382 bucket = (hval == IPSEC_SEL_NOHASH) ? 0 : hval; 3383 3384 ASSERT(!HASH_LOCKED(ipss->ipsec_sel_hash, bucket)); 3385 HASH_LOCK(ipss->ipsec_sel_hash, bucket); 3386 3387 for (HASH_ITERATE(sp, ipsl_hash, ipss->ipsec_sel_hash, bucket)) { 3388 if (bcmp(&sp->ipsl_key, selkey, 3389 offsetof(ipsec_selkey_t, ipsl_pol_hval)) == 0) 3390 break; 3391 } 3392 if (sp != NULL) { 3393 sp->ipsl_refs++; 3394 3395 HASH_UNLOCK(ipss->ipsec_sel_hash, bucket); 3396 return (sp); 3397 } 3398 3399 sp = kmem_cache_alloc(ipsec_sel_cache, KM_NOSLEEP); 3400 if (sp == NULL) { 3401 HASH_UNLOCK(ipss->ipsec_sel_hash, bucket); 3402 return (NULL); 3403 } 3404 3405 HASH_INSERT(sp, ipsl_hash, ipss->ipsec_sel_hash, bucket); 3406 sp->ipsl_refs = 2; /* one for hash table, one for caller */ 3407 sp->ipsl_key = *selkey; 3408 /* Set to uninitalized and have insertion into polhead fix things. */ 3409 if (selkey->ipsl_sel_hval != IPSEC_SEL_NOHASH) 3410 sp->ipsl_key.ipsl_pol_hval = 0; 3411 else 3412 sp->ipsl_key.ipsl_pol_hval = IPSEC_SEL_NOHASH; 3413 3414 HASH_UNLOCK(ipss->ipsec_sel_hash, bucket); 3415 3416 return (sp); 3417 } 3418 3419 static void 3420 ipsec_sel_rel(ipsec_sel_t **spp, netstack_t *ns) 3421 { 3422 ipsec_sel_t *sp = *spp; 3423 int hval = sp->ipsl_key.ipsl_sel_hval; 3424 ipsec_stack_t *ipss = ns->netstack_ipsec; 3425 3426 *spp = NULL; 3427 3428 if (hval == IPSEC_SEL_NOHASH) 3429 hval = 0; 3430 3431 ASSERT(!HASH_LOCKED(ipss->ipsec_sel_hash, hval)); 3432 HASH_LOCK(ipss->ipsec_sel_hash, hval); 3433 if (--sp->ipsl_refs == 1) { 3434 HASH_UNCHAIN(sp, ipsl_hash, ipss->ipsec_sel_hash, hval); 3435 sp->ipsl_refs--; 3436 HASH_UNLOCK(ipss->ipsec_sel_hash, hval); 3437 ASSERT(sp->ipsl_refs == 0); 3438 kmem_cache_free(ipsec_sel_cache, sp); 3439 /* Caller unlocks */ 3440 return; 3441 } 3442 3443 HASH_UNLOCK(ipss->ipsec_sel_hash, hval); 3444 } 3445 3446 /* 3447 * Free a policy rule which we know is no longer being referenced. 3448 */ 3449 void 3450 ipsec_policy_free(ipsec_policy_t *ipp) 3451 { 3452 ASSERT(ipp->ipsp_refs == 0); 3453 ASSERT(ipp->ipsp_sel != NULL); 3454 ASSERT(ipp->ipsp_act != NULL); 3455 ASSERT(ipp->ipsp_netstack != NULL); 3456 3457 ipsec_sel_rel(&ipp->ipsp_sel, ipp->ipsp_netstack); 3458 IPACT_REFRELE(ipp->ipsp_act); 3459 kmem_cache_free(ipsec_pol_cache, ipp); 3460 } 3461 3462 /* 3463 * Construction of new policy rules; construct a policy, and add it to 3464 * the appropriate tables. 3465 */ 3466 ipsec_policy_t * 3467 ipsec_policy_create(ipsec_selkey_t *keys, const ipsec_act_t *a, 3468 int nacts, int prio, uint64_t *index_ptr, netstack_t *ns) 3469 { 3470 ipsec_action_t *ap; 3471 ipsec_sel_t *sp; 3472 ipsec_policy_t *ipp; 3473 ipsec_stack_t *ipss = ns->netstack_ipsec; 3474 3475 if (index_ptr == NULL) 3476 index_ptr = &ipss->ipsec_next_policy_index; 3477 3478 ipp = kmem_cache_alloc(ipsec_pol_cache, KM_NOSLEEP); 3479 ap = ipsec_act_find(a, nacts, ns); 3480 sp = ipsec_find_sel(keys, ns); 3481 3482 if ((ap == NULL) || (sp == NULL) || (ipp == NULL)) { 3483 if (ap != NULL) { 3484 IPACT_REFRELE(ap); 3485 } 3486 if (sp != NULL) 3487 ipsec_sel_rel(&sp, ns); 3488 if (ipp != NULL) 3489 kmem_cache_free(ipsec_pol_cache, ipp); 3490 return (NULL); 3491 } 3492 3493 HASH_NULL(ipp, ipsp_hash); 3494 3495 ipp->ipsp_netstack = ns; /* Needed for ipsec_policy_free */ 3496 ipp->ipsp_refs = 1; /* caller's reference */ 3497 ipp->ipsp_sel = sp; 3498 ipp->ipsp_act = ap; 3499 ipp->ipsp_prio = prio; /* rule priority */ 3500 ipp->ipsp_index = *index_ptr; 3501 (*index_ptr)++; 3502 3503 return (ipp); 3504 } 3505 3506 static void 3507 ipsec_update_present_flags(ipsec_stack_t *ipss) 3508 { 3509 boolean_t hashpol; 3510 3511 hashpol = (avl_numnodes(&ipss->ipsec_system_policy.iph_rulebyid) > 0); 3512 3513 if (hashpol) { 3514 ipss->ipsec_outbound_v4_policy_present = B_TRUE; 3515 ipss->ipsec_outbound_v6_policy_present = B_TRUE; 3516 ipss->ipsec_inbound_v4_policy_present = B_TRUE; 3517 ipss->ipsec_inbound_v6_policy_present = B_TRUE; 3518 return; 3519 } 3520 3521 ipss->ipsec_outbound_v4_policy_present = (NULL != 3522 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_OUTBOUND]. 3523 ipr_nonhash[IPSEC_AF_V4]); 3524 ipss->ipsec_outbound_v6_policy_present = (NULL != 3525 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_OUTBOUND]. 3526 ipr_nonhash[IPSEC_AF_V6]); 3527 ipss->ipsec_inbound_v4_policy_present = (NULL != 3528 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_INBOUND]. 3529 ipr_nonhash[IPSEC_AF_V4]); 3530 ipss->ipsec_inbound_v6_policy_present = (NULL != 3531 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_INBOUND]. 3532 ipr_nonhash[IPSEC_AF_V6]); 3533 } 3534 3535 boolean_t 3536 ipsec_policy_delete(ipsec_policy_head_t *php, ipsec_selkey_t *keys, int dir, 3537 netstack_t *ns) 3538 { 3539 ipsec_sel_t *sp; 3540 ipsec_policy_t *ip, *nip, *head; 3541 int af; 3542 ipsec_policy_root_t *pr = &php->iph_root[dir]; 3543 3544 sp = ipsec_find_sel(keys, ns); 3545 3546 if (sp == NULL) 3547 return (B_FALSE); 3548 3549 af = (sp->ipsl_key.ipsl_valid & IPSL_IPV4) ? IPSEC_AF_V4 : IPSEC_AF_V6; 3550 3551 rw_enter(&php->iph_lock, RW_WRITER); 3552 3553 if (sp->ipsl_key.ipsl_pol_hval == IPSEC_SEL_NOHASH) { 3554 head = pr->ipr_nonhash[af]; 3555 } else { 3556 head = pr->ipr_hash[sp->ipsl_key.ipsl_pol_hval].hash_head; 3557 } 3558 3559 for (ip = head; ip != NULL; ip = nip) { 3560 nip = ip->ipsp_hash.hash_next; 3561 if (ip->ipsp_sel != sp) { 3562 continue; 3563 } 3564 3565 IPPOL_UNCHAIN(php, ip); 3566 3567 php->iph_gen++; 3568 ipsec_update_present_flags(ns->netstack_ipsec); 3569 3570 rw_exit(&php->iph_lock); 3571 3572 ipsec_sel_rel(&sp, ns); 3573 3574 return (B_TRUE); 3575 } 3576 3577 rw_exit(&php->iph_lock); 3578 ipsec_sel_rel(&sp, ns); 3579 return (B_FALSE); 3580 } 3581 3582 int 3583 ipsec_policy_delete_index(ipsec_policy_head_t *php, uint64_t policy_index, 3584 netstack_t *ns) 3585 { 3586 boolean_t found = B_FALSE; 3587 ipsec_policy_t ipkey; 3588 ipsec_policy_t *ip; 3589 avl_index_t where; 3590 3591 bzero(&ipkey, sizeof (ipkey)); 3592 ipkey.ipsp_index = policy_index; 3593 3594 rw_enter(&php->iph_lock, RW_WRITER); 3595 3596 /* 3597 * We could be cleverer here about the walk. 3598 * but well, (k+1)*log(N) will do for now (k==number of matches, 3599 * N==number of table entries 3600 */ 3601 for (;;) { 3602 ip = (ipsec_policy_t *)avl_find(&php->iph_rulebyid, 3603 (void *)&ipkey, &where); 3604 ASSERT(ip == NULL); 3605 3606 ip = avl_nearest(&php->iph_rulebyid, where, AVL_AFTER); 3607 3608 if (ip == NULL) 3609 break; 3610 3611 if (ip->ipsp_index != policy_index) { 3612 ASSERT(ip->ipsp_index > policy_index); 3613 break; 3614 } 3615 3616 IPPOL_UNCHAIN(php, ip); 3617 found = B_TRUE; 3618 } 3619 3620 if (found) { 3621 php->iph_gen++; 3622 ipsec_update_present_flags(ns->netstack_ipsec); 3623 } 3624 3625 rw_exit(&php->iph_lock); 3626 3627 return (found ? 0 : ENOENT); 3628 } 3629 3630 /* 3631 * Given a constructed ipsec_policy_t policy rule, see if it can be entered 3632 * into the correct policy ruleset. As a side-effect, it sets the hash 3633 * entries on "ipp"'s ipsp_pol_hval. 3634 * 3635 * Returns B_TRUE if it can be entered, B_FALSE if it can't be (because a 3636 * duplicate policy exists with exactly the same selectors), or an icmp 3637 * rule exists with a different encryption/authentication action. 3638 */ 3639 boolean_t 3640 ipsec_check_policy(ipsec_policy_head_t *php, ipsec_policy_t *ipp, int direction) 3641 { 3642 ipsec_policy_root_t *pr = &php->iph_root[direction]; 3643 int af = -1; 3644 ipsec_policy_t *p2, *head; 3645 uint8_t check_proto; 3646 ipsec_selkey_t *selkey = &ipp->ipsp_sel->ipsl_key; 3647 uint32_t valid = selkey->ipsl_valid; 3648 3649 if (valid & IPSL_IPV6) { 3650 ASSERT(!(valid & IPSL_IPV4)); 3651 af = IPSEC_AF_V6; 3652 check_proto = IPPROTO_ICMPV6; 3653 } else { 3654 ASSERT(valid & IPSL_IPV4); 3655 af = IPSEC_AF_V4; 3656 check_proto = IPPROTO_ICMP; 3657 } 3658 3659 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3660 3661 /* 3662 * Double-check that we don't have any duplicate selectors here. 3663 * Because selectors are interned below, we need only compare pointers 3664 * for equality. 3665 */ 3666 if (selkey->ipsl_sel_hval == IPSEC_SEL_NOHASH) { 3667 head = pr->ipr_nonhash[af]; 3668 } else { 3669 selkey->ipsl_pol_hval = 3670 (selkey->ipsl_valid & IPSL_IPV4) ? 3671 IPSEC_IPV4_HASH(selkey->ipsl_remote.ipsad_v4, 3672 pr->ipr_nchains) : 3673 IPSEC_IPV6_HASH(selkey->ipsl_remote.ipsad_v6, 3674 pr->ipr_nchains); 3675 3676 head = pr->ipr_hash[selkey->ipsl_pol_hval].hash_head; 3677 } 3678 3679 for (p2 = head; p2 != NULL; p2 = p2->ipsp_hash.hash_next) { 3680 if (p2->ipsp_sel == ipp->ipsp_sel) 3681 return (B_FALSE); 3682 } 3683 3684 /* 3685 * If it's ICMP and not a drop or pass rule, run through the ICMP 3686 * rules and make sure the action is either new or the same as any 3687 * other actions. We don't have to check the full chain because 3688 * discard and bypass will override all other actions 3689 */ 3690 3691 if (valid & IPSL_PROTOCOL && 3692 selkey->ipsl_proto == check_proto && 3693 (ipp->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_APPLY)) { 3694 3695 for (p2 = head; p2 != NULL; p2 = p2->ipsp_hash.hash_next) { 3696 3697 if (p2->ipsp_sel->ipsl_key.ipsl_valid & IPSL_PROTOCOL && 3698 p2->ipsp_sel->ipsl_key.ipsl_proto == check_proto && 3699 (p2->ipsp_act->ipa_act.ipa_type == 3700 IPSEC_ACT_APPLY)) { 3701 return (ipsec_compare_action(p2, ipp)); 3702 } 3703 } 3704 } 3705 3706 return (B_TRUE); 3707 } 3708 3709 /* 3710 * compare the action chains of two policies for equality 3711 * B_TRUE -> effective equality 3712 */ 3713 3714 static boolean_t 3715 ipsec_compare_action(ipsec_policy_t *p1, ipsec_policy_t *p2) 3716 { 3717 3718 ipsec_action_t *act1, *act2; 3719 3720 /* We have a valid rule. Let's compare the actions */ 3721 if (p1->ipsp_act == p2->ipsp_act) { 3722 /* same action. We are good */ 3723 return (B_TRUE); 3724 } 3725 3726 /* we have to walk the chain */ 3727 3728 act1 = p1->ipsp_act; 3729 act2 = p2->ipsp_act; 3730 3731 while (act1 != NULL && act2 != NULL) { 3732 3733 /* otherwise, Are we close enough? */ 3734 if (act1->ipa_allow_clear != act2->ipa_allow_clear || 3735 act1->ipa_want_ah != act2->ipa_want_ah || 3736 act1->ipa_want_esp != act2->ipa_want_esp || 3737 act1->ipa_want_se != act2->ipa_want_se) { 3738 /* Nope, we aren't */ 3739 return (B_FALSE); 3740 } 3741 3742 if (act1->ipa_want_ah) { 3743 if (act1->ipa_act.ipa_apply.ipp_auth_alg != 3744 act2->ipa_act.ipa_apply.ipp_auth_alg) { 3745 return (B_FALSE); 3746 } 3747 3748 if (act1->ipa_act.ipa_apply.ipp_ah_minbits != 3749 act2->ipa_act.ipa_apply.ipp_ah_minbits || 3750 act1->ipa_act.ipa_apply.ipp_ah_maxbits != 3751 act2->ipa_act.ipa_apply.ipp_ah_maxbits) { 3752 return (B_FALSE); 3753 } 3754 } 3755 3756 if (act1->ipa_want_esp) { 3757 if (act1->ipa_act.ipa_apply.ipp_use_esp != 3758 act2->ipa_act.ipa_apply.ipp_use_esp || 3759 act1->ipa_act.ipa_apply.ipp_use_espa != 3760 act2->ipa_act.ipa_apply.ipp_use_espa) { 3761 return (B_FALSE); 3762 } 3763 3764 if (act1->ipa_act.ipa_apply.ipp_use_esp) { 3765 if (act1->ipa_act.ipa_apply.ipp_encr_alg != 3766 act2->ipa_act.ipa_apply.ipp_encr_alg) { 3767 return (B_FALSE); 3768 } 3769 3770 if (act1->ipa_act.ipa_apply.ipp_espe_minbits != 3771 act2->ipa_act.ipa_apply.ipp_espe_minbits || 3772 act1->ipa_act.ipa_apply.ipp_espe_maxbits != 3773 act2->ipa_act.ipa_apply.ipp_espe_maxbits) { 3774 return (B_FALSE); 3775 } 3776 } 3777 3778 if (act1->ipa_act.ipa_apply.ipp_use_espa) { 3779 if (act1->ipa_act.ipa_apply.ipp_esp_auth_alg != 3780 act2->ipa_act.ipa_apply.ipp_esp_auth_alg) { 3781 return (B_FALSE); 3782 } 3783 3784 if (act1->ipa_act.ipa_apply.ipp_espa_minbits != 3785 act2->ipa_act.ipa_apply.ipp_espa_minbits || 3786 act1->ipa_act.ipa_apply.ipp_espa_maxbits != 3787 act2->ipa_act.ipa_apply.ipp_espa_maxbits) { 3788 return (B_FALSE); 3789 } 3790 } 3791 3792 } 3793 3794 act1 = act1->ipa_next; 3795 act2 = act2->ipa_next; 3796 } 3797 3798 if (act1 != NULL || act2 != NULL) { 3799 return (B_FALSE); 3800 } 3801 3802 return (B_TRUE); 3803 } 3804 3805 3806 /* 3807 * Given a constructed ipsec_policy_t policy rule, enter it into 3808 * the correct policy ruleset. 3809 * 3810 * ipsec_check_policy() is assumed to have succeeded first (to check for 3811 * duplicates). 3812 */ 3813 void 3814 ipsec_enter_policy(ipsec_policy_head_t *php, ipsec_policy_t *ipp, int direction, 3815 netstack_t *ns) 3816 { 3817 ipsec_policy_root_t *pr = &php->iph_root[direction]; 3818 ipsec_selkey_t *selkey = &ipp->ipsp_sel->ipsl_key; 3819 uint32_t valid = selkey->ipsl_valid; 3820 uint32_t hval = selkey->ipsl_pol_hval; 3821 int af = -1; 3822 3823 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3824 3825 if (valid & IPSL_IPV6) { 3826 ASSERT(!(valid & IPSL_IPV4)); 3827 af = IPSEC_AF_V6; 3828 } else { 3829 ASSERT(valid & IPSL_IPV4); 3830 af = IPSEC_AF_V4; 3831 } 3832 3833 php->iph_gen++; 3834 3835 if (hval == IPSEC_SEL_NOHASH) { 3836 HASHLIST_INSERT(ipp, ipsp_hash, pr->ipr_nonhash[af]); 3837 } else { 3838 HASH_LOCK(pr->ipr_hash, hval); 3839 HASH_INSERT(ipp, ipsp_hash, pr->ipr_hash, hval); 3840 HASH_UNLOCK(pr->ipr_hash, hval); 3841 } 3842 3843 ipsec_insert_always(&php->iph_rulebyid, ipp); 3844 3845 ipsec_update_present_flags(ns->netstack_ipsec); 3846 } 3847 3848 static void 3849 ipsec_ipr_flush(ipsec_policy_head_t *php, ipsec_policy_root_t *ipr) 3850 { 3851 ipsec_policy_t *ip, *nip; 3852 int af, chain, nchain; 3853 3854 for (af = 0; af < IPSEC_NAF; af++) { 3855 for (ip = ipr->ipr_nonhash[af]; ip != NULL; ip = nip) { 3856 nip = ip->ipsp_hash.hash_next; 3857 IPPOL_UNCHAIN(php, ip); 3858 } 3859 ipr->ipr_nonhash[af] = NULL; 3860 } 3861 nchain = ipr->ipr_nchains; 3862 3863 for (chain = 0; chain < nchain; chain++) { 3864 for (ip = ipr->ipr_hash[chain].hash_head; ip != NULL; 3865 ip = nip) { 3866 nip = ip->ipsp_hash.hash_next; 3867 IPPOL_UNCHAIN(php, ip); 3868 } 3869 ipr->ipr_hash[chain].hash_head = NULL; 3870 } 3871 } 3872 3873 /* 3874 * Create and insert inbound or outbound policy associated with actp for the 3875 * address family fam into the policy head ph. Returns B_TRUE if policy was 3876 * inserted, and B_FALSE otherwise. 3877 */ 3878 boolean_t 3879 ipsec_polhead_insert(ipsec_policy_head_t *ph, ipsec_act_t *actp, uint_t nact, 3880 int fam, int ptype, netstack_t *ns) 3881 { 3882 ipsec_selkey_t sel; 3883 ipsec_policy_t *pol; 3884 ipsec_policy_root_t *pr; 3885 3886 bzero(&sel, sizeof (sel)); 3887 sel.ipsl_valid = (fam == IPSEC_AF_V4 ? IPSL_IPV4 : IPSL_IPV6); 3888 if ((pol = ipsec_policy_create(&sel, actp, nact, IPSEC_PRIO_SOCKET, 3889 NULL, ns)) != NULL) { 3890 pr = &ph->iph_root[ptype]; 3891 HASHLIST_INSERT(pol, ipsp_hash, pr->ipr_nonhash[fam]); 3892 ipsec_insert_always(&ph->iph_rulebyid, pol); 3893 } 3894 return (pol != NULL); 3895 } 3896 3897 void 3898 ipsec_polhead_flush(ipsec_policy_head_t *php, netstack_t *ns) 3899 { 3900 int dir; 3901 3902 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3903 3904 for (dir = 0; dir < IPSEC_NTYPES; dir++) 3905 ipsec_ipr_flush(php, &php->iph_root[dir]); 3906 3907 php->iph_gen++; 3908 ipsec_update_present_flags(ns->netstack_ipsec); 3909 } 3910 3911 void 3912 ipsec_polhead_free(ipsec_policy_head_t *php, netstack_t *ns) 3913 { 3914 int dir; 3915 3916 ASSERT(php->iph_refs == 0); 3917 3918 rw_enter(&php->iph_lock, RW_WRITER); 3919 ipsec_polhead_flush(php, ns); 3920 rw_exit(&php->iph_lock); 3921 rw_destroy(&php->iph_lock); 3922 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 3923 ipsec_policy_root_t *ipr = &php->iph_root[dir]; 3924 int chain; 3925 3926 for (chain = 0; chain < ipr->ipr_nchains; chain++) 3927 mutex_destroy(&(ipr->ipr_hash[chain].hash_lock)); 3928 3929 } 3930 ipsec_polhead_free_table(php); 3931 kmem_free(php, sizeof (*php)); 3932 } 3933 3934 static void 3935 ipsec_ipr_init(ipsec_policy_root_t *ipr) 3936 { 3937 int af; 3938 3939 ipr->ipr_nchains = 0; 3940 ipr->ipr_hash = NULL; 3941 3942 for (af = 0; af < IPSEC_NAF; af++) { 3943 ipr->ipr_nonhash[af] = NULL; 3944 } 3945 } 3946 3947 ipsec_policy_head_t * 3948 ipsec_polhead_create(void) 3949 { 3950 ipsec_policy_head_t *php; 3951 3952 php = kmem_alloc(sizeof (*php), KM_NOSLEEP); 3953 if (php == NULL) 3954 return (php); 3955 3956 rw_init(&php->iph_lock, NULL, RW_DEFAULT, NULL); 3957 php->iph_refs = 1; 3958 php->iph_gen = 0; 3959 3960 ipsec_ipr_init(&php->iph_root[IPSEC_TYPE_INBOUND]); 3961 ipsec_ipr_init(&php->iph_root[IPSEC_TYPE_OUTBOUND]); 3962 3963 avl_create(&php->iph_rulebyid, ipsec_policy_cmpbyid, 3964 sizeof (ipsec_policy_t), offsetof(ipsec_policy_t, ipsp_byid)); 3965 3966 return (php); 3967 } 3968 3969 /* 3970 * Clone the policy head into a new polhead; release one reference to the 3971 * old one and return the only reference to the new one. 3972 * If the old one had a refcount of 1, just return it. 3973 */ 3974 ipsec_policy_head_t * 3975 ipsec_polhead_split(ipsec_policy_head_t *php, netstack_t *ns) 3976 { 3977 ipsec_policy_head_t *nphp; 3978 3979 if (php == NULL) 3980 return (ipsec_polhead_create()); 3981 else if (php->iph_refs == 1) 3982 return (php); 3983 3984 nphp = ipsec_polhead_create(); 3985 if (nphp == NULL) 3986 return (NULL); 3987 3988 if (ipsec_copy_polhead(php, nphp, ns) != 0) { 3989 ipsec_polhead_free(nphp, ns); 3990 return (NULL); 3991 } 3992 IPPH_REFRELE(php, ns); 3993 return (nphp); 3994 } 3995 3996 /* 3997 * When sending a response to a ICMP request or generating a RST 3998 * in the TCP case, the outbound packets need to go at the same level 3999 * of protection as the incoming ones i.e we associate our outbound 4000 * policy with how the packet came in. We call this after we have 4001 * accepted the incoming packet which may or may not have been in 4002 * clear and hence we are sending the reply back with the policy 4003 * matching the incoming datagram's policy. 4004 * 4005 * NOTE : This technology serves two purposes : 4006 * 4007 * 1) If we have multiple outbound policies, we send out a reply 4008 * matching with how it came in rather than matching the outbound 4009 * policy. 4010 * 4011 * 2) For assymetric policies, we want to make sure that incoming 4012 * and outgoing has the same level of protection. Assymetric 4013 * policies exist only with global policy where we may not have 4014 * both outbound and inbound at the same time. 4015 * 4016 * NOTE2: This function is called by cleartext cases, so it needs to be 4017 * in IP proper. 4018 * 4019 * Note: the caller has moved other parts of ira into ixa already. 4020 */ 4021 boolean_t 4022 ipsec_in_to_out(ip_recv_attr_t *ira, ip_xmit_attr_t *ixa, mblk_t *data_mp, 4023 ipha_t *ipha, ip6_t *ip6h) 4024 { 4025 ipsec_selector_t sel; 4026 ipsec_action_t *reflect_action = NULL; 4027 netstack_t *ns = ixa->ixa_ipst->ips_netstack; 4028 4029 bzero((void*)&sel, sizeof (sel)); 4030 4031 if (ira->ira_ipsec_action != NULL) { 4032 /* transfer reference.. */ 4033 reflect_action = ira->ira_ipsec_action; 4034 ira->ira_ipsec_action = NULL; 4035 } else if (!(ira->ira_flags & IRAF_LOOPBACK)) 4036 reflect_action = ipsec_in_to_out_action(ira); 4037 4038 /* 4039 * The caller is going to send the datagram out which might 4040 * go on the wire or delivered locally through ire_send_local. 4041 * 4042 * 1) If it goes out on the wire, new associations will be 4043 * obtained. 4044 * 2) If it is delivered locally, ire_send_local will convert 4045 * this ip_xmit_attr_t back to a ip_recv_attr_t looking at the 4046 * requests. 4047 */ 4048 ixa->ixa_ipsec_action = reflect_action; 4049 4050 if (!ipsec_init_outbound_ports(&sel, data_mp, ipha, ip6h, 0, 4051 ns->netstack_ipsec)) { 4052 /* Note: data_mp already consumed and ip_drop_packet done */ 4053 return (B_FALSE); 4054 } 4055 ixa->ixa_ipsec_src_port = sel.ips_local_port; 4056 ixa->ixa_ipsec_dst_port = sel.ips_remote_port; 4057 ixa->ixa_ipsec_proto = sel.ips_protocol; 4058 ixa->ixa_ipsec_icmp_type = sel.ips_icmp_type; 4059 ixa->ixa_ipsec_icmp_code = sel.ips_icmp_code; 4060 4061 /* 4062 * Don't use global policy for this, as we want 4063 * to use the same protection that was applied to the inbound packet. 4064 * Thus we set IXAF_NO_IPSEC is it arrived in the clear to make 4065 * it be sent in the clear. 4066 */ 4067 if (ira->ira_flags & IRAF_IPSEC_SECURE) 4068 ixa->ixa_flags |= IXAF_IPSEC_SECURE; 4069 else 4070 ixa->ixa_flags |= IXAF_NO_IPSEC; 4071 4072 return (B_TRUE); 4073 } 4074 4075 void 4076 ipsec_out_release_refs(ip_xmit_attr_t *ixa) 4077 { 4078 if (!(ixa->ixa_flags & IXAF_IPSEC_SECURE)) 4079 return; 4080 4081 if (ixa->ixa_ipsec_ah_sa != NULL) { 4082 IPSA_REFRELE(ixa->ixa_ipsec_ah_sa); 4083 ixa->ixa_ipsec_ah_sa = NULL; 4084 } 4085 if (ixa->ixa_ipsec_esp_sa != NULL) { 4086 IPSA_REFRELE(ixa->ixa_ipsec_esp_sa); 4087 ixa->ixa_ipsec_esp_sa = NULL; 4088 } 4089 if (ixa->ixa_ipsec_policy != NULL) { 4090 IPPOL_REFRELE(ixa->ixa_ipsec_policy); 4091 ixa->ixa_ipsec_policy = NULL; 4092 } 4093 if (ixa->ixa_ipsec_action != NULL) { 4094 IPACT_REFRELE(ixa->ixa_ipsec_action); 4095 ixa->ixa_ipsec_action = NULL; 4096 } 4097 if (ixa->ixa_ipsec_latch) { 4098 IPLATCH_REFRELE(ixa->ixa_ipsec_latch); 4099 ixa->ixa_ipsec_latch = NULL; 4100 } 4101 /* Clear the soft references to the SAs */ 4102 ixa->ixa_ipsec_ref[0].ipsr_sa = NULL; 4103 ixa->ixa_ipsec_ref[0].ipsr_bucket = NULL; 4104 ixa->ixa_ipsec_ref[0].ipsr_gen = 0; 4105 ixa->ixa_ipsec_ref[1].ipsr_sa = NULL; 4106 ixa->ixa_ipsec_ref[1].ipsr_bucket = NULL; 4107 ixa->ixa_ipsec_ref[1].ipsr_gen = 0; 4108 ixa->ixa_flags &= ~IXAF_IPSEC_SECURE; 4109 } 4110 4111 void 4112 ipsec_in_release_refs(ip_recv_attr_t *ira) 4113 { 4114 if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) 4115 return; 4116 4117 if (ira->ira_ipsec_ah_sa != NULL) { 4118 IPSA_REFRELE(ira->ira_ipsec_ah_sa); 4119 ira->ira_ipsec_ah_sa = NULL; 4120 } 4121 if (ira->ira_ipsec_esp_sa != NULL) { 4122 IPSA_REFRELE(ira->ira_ipsec_esp_sa); 4123 ira->ira_ipsec_esp_sa = NULL; 4124 } 4125 ira->ira_flags &= ~IRAF_IPSEC_SECURE; 4126 } 4127 4128 /* 4129 * This is called from ire_send_local when a packet 4130 * is looped back. We setup the ip_recv_attr_t "borrowing" the references 4131 * held by the callers. 4132 * Note that we don't do any IPsec but we carry the actions and IPSEC flags 4133 * across so that the fanout policy checks see that IPsec was applied. 4134 * 4135 * The caller should do ipsec_in_release_refs() on the ira by calling 4136 * ira_cleanup(). 4137 */ 4138 void 4139 ipsec_out_to_in(ip_xmit_attr_t *ixa, ill_t *ill, ip_recv_attr_t *ira) 4140 { 4141 ipsec_policy_t *pol; 4142 ipsec_action_t *act; 4143 4144 /* Non-IPsec operations */ 4145 ira->ira_free_flags = 0; 4146 ira->ira_zoneid = ixa->ixa_zoneid; 4147 ira->ira_cred = ixa->ixa_cred; 4148 ira->ira_cpid = ixa->ixa_cpid; 4149 ira->ira_tsl = ixa->ixa_tsl; 4150 ira->ira_ill = ira->ira_rill = ill; 4151 ira->ira_flags = ixa->ixa_flags & IAF_MASK; 4152 ira->ira_no_loop_zoneid = ixa->ixa_no_loop_zoneid; 4153 ira->ira_pktlen = ixa->ixa_pktlen; 4154 ira->ira_ip_hdr_length = ixa->ixa_ip_hdr_length; 4155 ira->ira_protocol = ixa->ixa_protocol; 4156 ira->ira_mhip = NULL; 4157 4158 ira->ira_flags |= IRAF_LOOPBACK | IRAF_L2SRC_LOOPBACK; 4159 4160 ira->ira_sqp = ixa->ixa_sqp; 4161 ira->ira_ring = NULL; 4162 4163 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 4164 ira->ira_rifindex = ira->ira_ruifindex; 4165 4166 if (!(ixa->ixa_flags & IXAF_IPSEC_SECURE)) 4167 return; 4168 4169 ira->ira_flags |= IRAF_IPSEC_SECURE; 4170 4171 ira->ira_ipsec_ah_sa = NULL; 4172 ira->ira_ipsec_esp_sa = NULL; 4173 4174 act = ixa->ixa_ipsec_action; 4175 if (act == NULL) { 4176 pol = ixa->ixa_ipsec_policy; 4177 if (pol != NULL) { 4178 act = pol->ipsp_act; 4179 IPACT_REFHOLD(act); 4180 } 4181 } 4182 ixa->ixa_ipsec_action = NULL; 4183 ira->ira_ipsec_action = act; 4184 } 4185 4186 /* 4187 * Consults global policy and per-socket policy to see whether this datagram 4188 * should go out secure. If so it updates the ip_xmit_attr_t 4189 * Should not be used when connecting, since then we want to latch the policy. 4190 * 4191 * If connp is NULL we just look at the global policy. 4192 * 4193 * Returns NULL if the packet was dropped, in which case the MIB has 4194 * been incremented and ip_drop_packet done. 4195 */ 4196 mblk_t * 4197 ip_output_attach_policy(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h, 4198 const conn_t *connp, ip_xmit_attr_t *ixa) 4199 { 4200 ipsec_selector_t sel; 4201 boolean_t policy_present; 4202 ip_stack_t *ipst = ixa->ixa_ipst; 4203 netstack_t *ns = ipst->ips_netstack; 4204 ipsec_stack_t *ipss = ns->netstack_ipsec; 4205 ipsec_policy_t *p; 4206 4207 ixa->ixa_ipsec_policy_gen = ipss->ipsec_system_policy.iph_gen; 4208 ASSERT((ipha != NULL && ip6h == NULL) || 4209 (ip6h != NULL && ipha == NULL)); 4210 4211 if (ipha != NULL) 4212 policy_present = ipss->ipsec_outbound_v4_policy_present; 4213 else 4214 policy_present = ipss->ipsec_outbound_v6_policy_present; 4215 4216 if (!policy_present && (connp == NULL || connp->conn_policy == NULL)) 4217 return (mp); 4218 4219 bzero((void*)&sel, sizeof (sel)); 4220 4221 if (ipha != NULL) { 4222 sel.ips_local_addr_v4 = ipha->ipha_src; 4223 sel.ips_remote_addr_v4 = ip_get_dst(ipha); 4224 sel.ips_isv4 = B_TRUE; 4225 } else { 4226 sel.ips_isv4 = B_FALSE; 4227 sel.ips_local_addr_v6 = ip6h->ip6_src; 4228 sel.ips_remote_addr_v6 = ip_get_dst_v6(ip6h, mp, NULL); 4229 } 4230 sel.ips_protocol = ixa->ixa_protocol; 4231 4232 if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0, ipss)) { 4233 if (ipha != NULL) { 4234 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 4235 } else { 4236 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 4237 } 4238 /* Note: mp already consumed and ip_drop_packet done */ 4239 return (NULL); 4240 } 4241 4242 ASSERT(ixa->ixa_ipsec_policy == NULL); 4243 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, &sel, ns); 4244 ixa->ixa_ipsec_policy = p; 4245 if (p != NULL) { 4246 ixa->ixa_flags |= IXAF_IPSEC_SECURE; 4247 if (connp == NULL || connp->conn_policy == NULL) 4248 ixa->ixa_flags |= IXAF_IPSEC_GLOBAL_POLICY; 4249 } else { 4250 ixa->ixa_flags &= ~IXAF_IPSEC_SECURE; 4251 } 4252 4253 /* 4254 * Copy the right port information. 4255 */ 4256 ixa->ixa_ipsec_src_port = sel.ips_local_port; 4257 ixa->ixa_ipsec_dst_port = sel.ips_remote_port; 4258 ixa->ixa_ipsec_icmp_type = sel.ips_icmp_type; 4259 ixa->ixa_ipsec_icmp_code = sel.ips_icmp_code; 4260 ixa->ixa_ipsec_proto = sel.ips_protocol; 4261 return (mp); 4262 } 4263 4264 /* 4265 * When appropriate, this function caches inbound and outbound policy 4266 * for this connection. The outbound policy is stored in conn_ixa. 4267 * Note that it can not be used for SCTP since conn_faddr isn't set for SCTP. 4268 * 4269 * XXX need to work out more details about per-interface policy and 4270 * caching here! 4271 * 4272 * XXX may want to split inbound and outbound caching for ill.. 4273 */ 4274 int 4275 ipsec_conn_cache_policy(conn_t *connp, boolean_t isv4) 4276 { 4277 boolean_t global_policy_present; 4278 netstack_t *ns = connp->conn_netstack; 4279 ipsec_stack_t *ipss = ns->netstack_ipsec; 4280 4281 connp->conn_ixa->ixa_ipsec_policy_gen = 4282 ipss->ipsec_system_policy.iph_gen; 4283 /* 4284 * There is no policy latching for ICMP sockets because we can't 4285 * decide on which policy to use until we see the packet and get 4286 * type/code selectors. 4287 */ 4288 if (connp->conn_proto == IPPROTO_ICMP || 4289 connp->conn_proto == IPPROTO_ICMPV6) { 4290 connp->conn_in_enforce_policy = 4291 connp->conn_out_enforce_policy = B_TRUE; 4292 if (connp->conn_latch != NULL) { 4293 IPLATCH_REFRELE(connp->conn_latch); 4294 connp->conn_latch = NULL; 4295 } 4296 if (connp->conn_latch_in_policy != NULL) { 4297 IPPOL_REFRELE(connp->conn_latch_in_policy); 4298 connp->conn_latch_in_policy = NULL; 4299 } 4300 if (connp->conn_latch_in_action != NULL) { 4301 IPACT_REFRELE(connp->conn_latch_in_action); 4302 connp->conn_latch_in_action = NULL; 4303 } 4304 if (connp->conn_ixa->ixa_ipsec_policy != NULL) { 4305 IPPOL_REFRELE(connp->conn_ixa->ixa_ipsec_policy); 4306 connp->conn_ixa->ixa_ipsec_policy = NULL; 4307 } 4308 if (connp->conn_ixa->ixa_ipsec_action != NULL) { 4309 IPACT_REFRELE(connp->conn_ixa->ixa_ipsec_action); 4310 connp->conn_ixa->ixa_ipsec_action = NULL; 4311 } 4312 connp->conn_ixa->ixa_flags &= ~IXAF_IPSEC_SECURE; 4313 return (0); 4314 } 4315 4316 global_policy_present = isv4 ? 4317 (ipss->ipsec_outbound_v4_policy_present || 4318 ipss->ipsec_inbound_v4_policy_present) : 4319 (ipss->ipsec_outbound_v6_policy_present || 4320 ipss->ipsec_inbound_v6_policy_present); 4321 4322 if ((connp->conn_policy != NULL) || global_policy_present) { 4323 ipsec_selector_t sel; 4324 ipsec_policy_t *p; 4325 4326 if (connp->conn_latch == NULL && 4327 (connp->conn_latch = iplatch_create()) == NULL) { 4328 return (ENOMEM); 4329 } 4330 4331 bzero((void*)&sel, sizeof (sel)); 4332 4333 sel.ips_protocol = connp->conn_proto; 4334 sel.ips_local_port = connp->conn_lport; 4335 sel.ips_remote_port = connp->conn_fport; 4336 sel.ips_is_icmp_inv_acq = 0; 4337 sel.ips_isv4 = isv4; 4338 if (isv4) { 4339 sel.ips_local_addr_v4 = connp->conn_laddr_v4; 4340 sel.ips_remote_addr_v4 = connp->conn_faddr_v4; 4341 } else { 4342 sel.ips_local_addr_v6 = connp->conn_laddr_v6; 4343 sel.ips_remote_addr_v6 = connp->conn_faddr_v6; 4344 } 4345 4346 p = ipsec_find_policy(IPSEC_TYPE_INBOUND, connp, &sel, ns); 4347 if (connp->conn_latch_in_policy != NULL) 4348 IPPOL_REFRELE(connp->conn_latch_in_policy); 4349 connp->conn_latch_in_policy = p; 4350 connp->conn_in_enforce_policy = (p != NULL); 4351 4352 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, &sel, ns); 4353 if (connp->conn_ixa->ixa_ipsec_policy != NULL) 4354 IPPOL_REFRELE(connp->conn_ixa->ixa_ipsec_policy); 4355 connp->conn_ixa->ixa_ipsec_policy = p; 4356 connp->conn_out_enforce_policy = (p != NULL); 4357 if (p != NULL) { 4358 connp->conn_ixa->ixa_flags |= IXAF_IPSEC_SECURE; 4359 if (connp->conn_policy == NULL) { 4360 connp->conn_ixa->ixa_flags |= 4361 IXAF_IPSEC_GLOBAL_POLICY; 4362 } 4363 } else { 4364 connp->conn_ixa->ixa_flags &= ~IXAF_IPSEC_SECURE; 4365 } 4366 /* Clear the latched actions too, in case we're recaching. */ 4367 if (connp->conn_ixa->ixa_ipsec_action != NULL) { 4368 IPACT_REFRELE(connp->conn_ixa->ixa_ipsec_action); 4369 connp->conn_ixa->ixa_ipsec_action = NULL; 4370 } 4371 if (connp->conn_latch_in_action != NULL) { 4372 IPACT_REFRELE(connp->conn_latch_in_action); 4373 connp->conn_latch_in_action = NULL; 4374 } 4375 connp->conn_ixa->ixa_ipsec_src_port = sel.ips_local_port; 4376 connp->conn_ixa->ixa_ipsec_dst_port = sel.ips_remote_port; 4377 connp->conn_ixa->ixa_ipsec_icmp_type = sel.ips_icmp_type; 4378 connp->conn_ixa->ixa_ipsec_icmp_code = sel.ips_icmp_code; 4379 connp->conn_ixa->ixa_ipsec_proto = sel.ips_protocol; 4380 } else { 4381 connp->conn_ixa->ixa_flags &= ~IXAF_IPSEC_SECURE; 4382 } 4383 4384 /* 4385 * We may or may not have policy for this endpoint. We still set 4386 * conn_policy_cached so that inbound datagrams don't have to look 4387 * at global policy as policy is considered latched for these 4388 * endpoints. We should not set conn_policy_cached until the conn 4389 * reflects the actual policy. If we *set* this before inheriting 4390 * the policy there is a window where the check 4391 * CONN_INBOUND_POLICY_PRESENT, will neither check with the policy 4392 * on the conn (because we have not yet copied the policy on to 4393 * conn and hence not set conn_in_enforce_policy) nor with the 4394 * global policy (because conn_policy_cached is already set). 4395 */ 4396 connp->conn_policy_cached = B_TRUE; 4397 return (0); 4398 } 4399 4400 /* 4401 * When appropriate, this function caches outbound policy for faddr/fport. 4402 * It is used when we are not connected i.e., when we can not latch the 4403 * policy. 4404 */ 4405 void 4406 ipsec_cache_outbound_policy(const conn_t *connp, const in6_addr_t *v6src, 4407 const in6_addr_t *v6dst, in_port_t dstport, ip_xmit_attr_t *ixa) 4408 { 4409 boolean_t isv4 = (ixa->ixa_flags & IXAF_IS_IPV4) != 0; 4410 boolean_t global_policy_present; 4411 netstack_t *ns = connp->conn_netstack; 4412 ipsec_stack_t *ipss = ns->netstack_ipsec; 4413 4414 ixa->ixa_ipsec_policy_gen = ipss->ipsec_system_policy.iph_gen; 4415 4416 /* 4417 * There is no policy caching for ICMP sockets because we can't 4418 * decide on which policy to use until we see the packet and get 4419 * type/code selectors. 4420 */ 4421 if (connp->conn_proto == IPPROTO_ICMP || 4422 connp->conn_proto == IPPROTO_ICMPV6) { 4423 ixa->ixa_flags &= ~IXAF_IPSEC_SECURE; 4424 if (ixa->ixa_ipsec_policy != NULL) { 4425 IPPOL_REFRELE(ixa->ixa_ipsec_policy); 4426 ixa->ixa_ipsec_policy = NULL; 4427 } 4428 if (ixa->ixa_ipsec_action != NULL) { 4429 IPACT_REFRELE(ixa->ixa_ipsec_action); 4430 ixa->ixa_ipsec_action = NULL; 4431 } 4432 return; 4433 } 4434 4435 global_policy_present = isv4 ? 4436 (ipss->ipsec_outbound_v4_policy_present || 4437 ipss->ipsec_inbound_v4_policy_present) : 4438 (ipss->ipsec_outbound_v6_policy_present || 4439 ipss->ipsec_inbound_v6_policy_present); 4440 4441 if ((connp->conn_policy != NULL) || global_policy_present) { 4442 ipsec_selector_t sel; 4443 ipsec_policy_t *p; 4444 4445 bzero((void*)&sel, sizeof (sel)); 4446 4447 sel.ips_protocol = connp->conn_proto; 4448 sel.ips_local_port = connp->conn_lport; 4449 sel.ips_remote_port = dstport; 4450 sel.ips_is_icmp_inv_acq = 0; 4451 sel.ips_isv4 = isv4; 4452 if (isv4) { 4453 IN6_V4MAPPED_TO_IPADDR(v6src, sel.ips_local_addr_v4); 4454 IN6_V4MAPPED_TO_IPADDR(v6dst, sel.ips_remote_addr_v4); 4455 } else { 4456 sel.ips_local_addr_v6 = *v6src; 4457 sel.ips_remote_addr_v6 = *v6dst; 4458 } 4459 4460 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, &sel, ns); 4461 if (ixa->ixa_ipsec_policy != NULL) 4462 IPPOL_REFRELE(ixa->ixa_ipsec_policy); 4463 ixa->ixa_ipsec_policy = p; 4464 if (p != NULL) { 4465 ixa->ixa_flags |= IXAF_IPSEC_SECURE; 4466 if (connp->conn_policy == NULL) 4467 ixa->ixa_flags |= IXAF_IPSEC_GLOBAL_POLICY; 4468 } else { 4469 ixa->ixa_flags &= ~IXAF_IPSEC_SECURE; 4470 } 4471 /* Clear the latched actions too, in case we're recaching. */ 4472 if (ixa->ixa_ipsec_action != NULL) { 4473 IPACT_REFRELE(ixa->ixa_ipsec_action); 4474 ixa->ixa_ipsec_action = NULL; 4475 } 4476 4477 ixa->ixa_ipsec_src_port = sel.ips_local_port; 4478 ixa->ixa_ipsec_dst_port = sel.ips_remote_port; 4479 ixa->ixa_ipsec_icmp_type = sel.ips_icmp_type; 4480 ixa->ixa_ipsec_icmp_code = sel.ips_icmp_code; 4481 ixa->ixa_ipsec_proto = sel.ips_protocol; 4482 } else { 4483 ixa->ixa_flags &= ~IXAF_IPSEC_SECURE; 4484 if (ixa->ixa_ipsec_policy != NULL) { 4485 IPPOL_REFRELE(ixa->ixa_ipsec_policy); 4486 ixa->ixa_ipsec_policy = NULL; 4487 } 4488 if (ixa->ixa_ipsec_action != NULL) { 4489 IPACT_REFRELE(ixa->ixa_ipsec_action); 4490 ixa->ixa_ipsec_action = NULL; 4491 } 4492 } 4493 } 4494 4495 /* 4496 * Returns B_FALSE if the policy has gone stale. 4497 */ 4498 boolean_t 4499 ipsec_outbound_policy_current(ip_xmit_attr_t *ixa) 4500 { 4501 ipsec_stack_t *ipss = ixa->ixa_ipst->ips_netstack->netstack_ipsec; 4502 4503 if (!(ixa->ixa_flags & IXAF_IPSEC_GLOBAL_POLICY)) 4504 return (B_TRUE); 4505 4506 return (ixa->ixa_ipsec_policy_gen == ipss->ipsec_system_policy.iph_gen); 4507 } 4508 4509 void 4510 iplatch_free(ipsec_latch_t *ipl) 4511 { 4512 if (ipl->ipl_local_cid != NULL) 4513 IPSID_REFRELE(ipl->ipl_local_cid); 4514 if (ipl->ipl_remote_cid != NULL) 4515 IPSID_REFRELE(ipl->ipl_remote_cid); 4516 mutex_destroy(&ipl->ipl_lock); 4517 kmem_free(ipl, sizeof (*ipl)); 4518 } 4519 4520 ipsec_latch_t * 4521 iplatch_create() 4522 { 4523 ipsec_latch_t *ipl = kmem_zalloc(sizeof (*ipl), KM_NOSLEEP); 4524 if (ipl == NULL) 4525 return (ipl); 4526 mutex_init(&ipl->ipl_lock, NULL, MUTEX_DEFAULT, NULL); 4527 ipl->ipl_refcnt = 1; 4528 return (ipl); 4529 } 4530 4531 /* 4532 * Hash function for ID hash table. 4533 */ 4534 static uint32_t 4535 ipsid_hash(int idtype, char *idstring) 4536 { 4537 uint32_t hval = idtype; 4538 unsigned char c; 4539 4540 while ((c = *idstring++) != 0) { 4541 hval = (hval << 4) | (hval >> 28); 4542 hval ^= c; 4543 } 4544 hval = hval ^ (hval >> 16); 4545 return (hval & (IPSID_HASHSIZE-1)); 4546 } 4547 4548 /* 4549 * Look up identity string in hash table. Return identity object 4550 * corresponding to the name -- either preexisting, or newly allocated. 4551 * 4552 * Return NULL if we need to allocate a new one and can't get memory. 4553 */ 4554 ipsid_t * 4555 ipsid_lookup(int idtype, char *idstring, netstack_t *ns) 4556 { 4557 ipsid_t *retval; 4558 char *nstr; 4559 int idlen = strlen(idstring) + 1; 4560 ipsec_stack_t *ipss = ns->netstack_ipsec; 4561 ipsif_t *bucket; 4562 4563 bucket = &ipss->ipsec_ipsid_buckets[ipsid_hash(idtype, idstring)]; 4564 4565 mutex_enter(&bucket->ipsif_lock); 4566 4567 for (retval = bucket->ipsif_head; retval != NULL; 4568 retval = retval->ipsid_next) { 4569 if (idtype != retval->ipsid_type) 4570 continue; 4571 if (bcmp(idstring, retval->ipsid_cid, idlen) != 0) 4572 continue; 4573 4574 IPSID_REFHOLD(retval); 4575 mutex_exit(&bucket->ipsif_lock); 4576 return (retval); 4577 } 4578 4579 retval = kmem_alloc(sizeof (*retval), KM_NOSLEEP); 4580 if (!retval) { 4581 mutex_exit(&bucket->ipsif_lock); 4582 return (NULL); 4583 } 4584 4585 nstr = kmem_alloc(idlen, KM_NOSLEEP); 4586 if (!nstr) { 4587 mutex_exit(&bucket->ipsif_lock); 4588 kmem_free(retval, sizeof (*retval)); 4589 return (NULL); 4590 } 4591 4592 retval->ipsid_refcnt = 1; 4593 retval->ipsid_next = bucket->ipsif_head; 4594 if (retval->ipsid_next != NULL) 4595 retval->ipsid_next->ipsid_ptpn = &retval->ipsid_next; 4596 retval->ipsid_ptpn = &bucket->ipsif_head; 4597 retval->ipsid_type = idtype; 4598 retval->ipsid_cid = nstr; 4599 bucket->ipsif_head = retval; 4600 bcopy(idstring, nstr, idlen); 4601 mutex_exit(&bucket->ipsif_lock); 4602 4603 return (retval); 4604 } 4605 4606 /* 4607 * Garbage collect the identity hash table. 4608 */ 4609 void 4610 ipsid_gc(netstack_t *ns) 4611 { 4612 int i, len; 4613 ipsid_t *id, *nid; 4614 ipsif_t *bucket; 4615 ipsec_stack_t *ipss = ns->netstack_ipsec; 4616 4617 for (i = 0; i < IPSID_HASHSIZE; i++) { 4618 bucket = &ipss->ipsec_ipsid_buckets[i]; 4619 mutex_enter(&bucket->ipsif_lock); 4620 for (id = bucket->ipsif_head; id != NULL; id = nid) { 4621 nid = id->ipsid_next; 4622 if (id->ipsid_refcnt == 0) { 4623 *id->ipsid_ptpn = nid; 4624 if (nid != NULL) 4625 nid->ipsid_ptpn = id->ipsid_ptpn; 4626 len = strlen(id->ipsid_cid) + 1; 4627 kmem_free(id->ipsid_cid, len); 4628 kmem_free(id, sizeof (*id)); 4629 } 4630 } 4631 mutex_exit(&bucket->ipsif_lock); 4632 } 4633 } 4634 4635 /* 4636 * Return true if two identities are the same. 4637 */ 4638 boolean_t 4639 ipsid_equal(ipsid_t *id1, ipsid_t *id2) 4640 { 4641 if (id1 == id2) 4642 return (B_TRUE); 4643 #ifdef DEBUG 4644 if ((id1 == NULL) || (id2 == NULL)) 4645 return (B_FALSE); 4646 /* 4647 * test that we're interning id's correctly.. 4648 */ 4649 ASSERT((strcmp(id1->ipsid_cid, id2->ipsid_cid) != 0) || 4650 (id1->ipsid_type != id2->ipsid_type)); 4651 #endif 4652 return (B_FALSE); 4653 } 4654 4655 /* 4656 * Initialize identity table; called during module initialization. 4657 */ 4658 static void 4659 ipsid_init(netstack_t *ns) 4660 { 4661 ipsif_t *bucket; 4662 int i; 4663 ipsec_stack_t *ipss = ns->netstack_ipsec; 4664 4665 for (i = 0; i < IPSID_HASHSIZE; i++) { 4666 bucket = &ipss->ipsec_ipsid_buckets[i]; 4667 mutex_init(&bucket->ipsif_lock, NULL, MUTEX_DEFAULT, NULL); 4668 } 4669 } 4670 4671 /* 4672 * Free identity table (preparatory to module unload) 4673 */ 4674 static void 4675 ipsid_fini(netstack_t *ns) 4676 { 4677 ipsif_t *bucket; 4678 int i; 4679 ipsec_stack_t *ipss = ns->netstack_ipsec; 4680 4681 for (i = 0; i < IPSID_HASHSIZE; i++) { 4682 bucket = &ipss->ipsec_ipsid_buckets[i]; 4683 ASSERT(bucket->ipsif_head == NULL); 4684 mutex_destroy(&bucket->ipsif_lock); 4685 } 4686 } 4687 4688 /* 4689 * Update the minimum and maximum supported key sizes for the 4690 * specified algorithm. Must be called while holding the algorithms lock. 4691 */ 4692 void 4693 ipsec_alg_fix_min_max(ipsec_alginfo_t *alg, ipsec_algtype_t alg_type, 4694 netstack_t *ns) 4695 { 4696 size_t crypto_min = (size_t)-1, crypto_max = 0; 4697 size_t cur_crypto_min, cur_crypto_max; 4698 boolean_t is_valid; 4699 crypto_mechanism_info_t *mech_infos; 4700 uint_t nmech_infos; 4701 int crypto_rc, i; 4702 crypto_mech_usage_t mask; 4703 ipsec_stack_t *ipss = ns->netstack_ipsec; 4704 4705 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 4706 4707 /* 4708 * Compute the min, max, and default key sizes (in number of 4709 * increments to the default key size in bits) as defined 4710 * by the algorithm mappings. This range of key sizes is used 4711 * for policy related operations. The effective key sizes 4712 * supported by the framework could be more limited than 4713 * those defined for an algorithm. 4714 */ 4715 alg->alg_default_bits = alg->alg_key_sizes[0]; 4716 alg->alg_default = 0; 4717 if (alg->alg_increment != 0) { 4718 /* key sizes are defined by range & increment */ 4719 alg->alg_minbits = alg->alg_key_sizes[1]; 4720 alg->alg_maxbits = alg->alg_key_sizes[2]; 4721 } else if (alg->alg_nkey_sizes == 0) { 4722 /* no specified key size for algorithm */ 4723 alg->alg_minbits = alg->alg_maxbits = 0; 4724 } else { 4725 /* key sizes are defined by enumeration */ 4726 alg->alg_minbits = (uint16_t)-1; 4727 alg->alg_maxbits = 0; 4728 4729 for (i = 0; i < alg->alg_nkey_sizes; i++) { 4730 if (alg->alg_key_sizes[i] < alg->alg_minbits) 4731 alg->alg_minbits = alg->alg_key_sizes[i]; 4732 if (alg->alg_key_sizes[i] > alg->alg_maxbits) 4733 alg->alg_maxbits = alg->alg_key_sizes[i]; 4734 } 4735 } 4736 4737 if (!(alg->alg_flags & ALG_FLAG_VALID)) 4738 return; 4739 4740 /* 4741 * Mechanisms do not apply to the NULL encryption 4742 * algorithm, so simply return for this case. 4743 */ 4744 if (alg->alg_id == SADB_EALG_NULL) 4745 return; 4746 4747 /* 4748 * Find the min and max key sizes supported by the cryptographic 4749 * framework providers. 4750 */ 4751 4752 /* get the key sizes supported by the framework */ 4753 crypto_rc = crypto_get_all_mech_info(alg->alg_mech_type, 4754 &mech_infos, &nmech_infos, KM_SLEEP); 4755 if (crypto_rc != CRYPTO_SUCCESS || nmech_infos == 0) { 4756 alg->alg_flags &= ~ALG_FLAG_VALID; 4757 return; 4758 } 4759 4760 /* min and max key sizes supported by framework */ 4761 for (i = 0, is_valid = B_FALSE; i < nmech_infos; i++) { 4762 int unit_bits; 4763 4764 /* 4765 * Ignore entries that do not support the operations 4766 * needed for the algorithm type. 4767 */ 4768 if (alg_type == IPSEC_ALG_AUTH) { 4769 mask = CRYPTO_MECH_USAGE_MAC; 4770 } else { 4771 mask = CRYPTO_MECH_USAGE_ENCRYPT | 4772 CRYPTO_MECH_USAGE_DECRYPT; 4773 } 4774 if ((mech_infos[i].mi_usage & mask) != mask) 4775 continue; 4776 4777 unit_bits = (mech_infos[i].mi_keysize_unit == 4778 CRYPTO_KEYSIZE_UNIT_IN_BYTES) ? 8 : 1; 4779 /* adjust min/max supported by framework */ 4780 cur_crypto_min = mech_infos[i].mi_min_key_size * unit_bits; 4781 cur_crypto_max = mech_infos[i].mi_max_key_size * unit_bits; 4782 4783 if (cur_crypto_min < crypto_min) 4784 crypto_min = cur_crypto_min; 4785 4786 /* 4787 * CRYPTO_EFFECTIVELY_INFINITE is a special value of 4788 * the crypto framework which means "no upper limit". 4789 */ 4790 if (mech_infos[i].mi_max_key_size == 4791 CRYPTO_EFFECTIVELY_INFINITE) { 4792 crypto_max = (size_t)-1; 4793 } else if (cur_crypto_max > crypto_max) { 4794 crypto_max = cur_crypto_max; 4795 } 4796 4797 is_valid = B_TRUE; 4798 } 4799 4800 kmem_free(mech_infos, sizeof (crypto_mechanism_info_t) * 4801 nmech_infos); 4802 4803 if (!is_valid) { 4804 /* no key sizes supported by framework */ 4805 alg->alg_flags &= ~ALG_FLAG_VALID; 4806 return; 4807 } 4808 4809 /* 4810 * Determine min and max key sizes from alg_key_sizes[]. 4811 * defined for the algorithm entry. Adjust key sizes based on 4812 * those supported by the framework. 4813 */ 4814 alg->alg_ef_default_bits = alg->alg_key_sizes[0]; 4815 4816 /* 4817 * For backwards compatability, assume that the IV length 4818 * is the same as the data length. 4819 */ 4820 alg->alg_ivlen = alg->alg_datalen; 4821 4822 /* 4823 * Copy any algorithm parameters (if provided) into dedicated 4824 * elements in the ipsec_alginfo_t structure. 4825 * There may be a better place to put this code. 4826 */ 4827 for (i = 0; i < alg->alg_nparams; i++) { 4828 switch (i) { 4829 case 0: 4830 /* Initialisation Vector length (bytes) */ 4831 alg->alg_ivlen = alg->alg_params[0]; 4832 break; 4833 case 1: 4834 /* Integrity Check Vector length (bytes) */ 4835 alg->alg_icvlen = alg->alg_params[1]; 4836 break; 4837 case 2: 4838 /* Salt length (bytes) */ 4839 alg->alg_saltlen = (uint8_t)alg->alg_params[2]; 4840 break; 4841 default: 4842 break; 4843 } 4844 } 4845 4846 /* Default if the IV length is not specified. */ 4847 if (alg_type == IPSEC_ALG_ENCR && alg->alg_ivlen == 0) 4848 alg->alg_ivlen = alg->alg_datalen; 4849 4850 alg_flag_check(alg); 4851 4852 if (alg->alg_increment != 0) { 4853 /* supported key sizes are defined by range & increment */ 4854 crypto_min = ALGBITS_ROUND_UP(crypto_min, alg->alg_increment); 4855 crypto_max = ALGBITS_ROUND_DOWN(crypto_max, alg->alg_increment); 4856 4857 alg->alg_ef_minbits = MAX(alg->alg_minbits, 4858 (uint16_t)crypto_min); 4859 alg->alg_ef_maxbits = MIN(alg->alg_maxbits, 4860 (uint16_t)crypto_max); 4861 4862 /* 4863 * If the sizes supported by the framework are outside 4864 * the range of sizes defined by the algorithm mappings, 4865 * the algorithm cannot be used. Check for this 4866 * condition here. 4867 */ 4868 if (alg->alg_ef_minbits > alg->alg_ef_maxbits) { 4869 alg->alg_flags &= ~ALG_FLAG_VALID; 4870 return; 4871 } 4872 if (alg->alg_ef_default_bits < alg->alg_ef_minbits) 4873 alg->alg_ef_default_bits = alg->alg_ef_minbits; 4874 if (alg->alg_ef_default_bits > alg->alg_ef_maxbits) 4875 alg->alg_ef_default_bits = alg->alg_ef_maxbits; 4876 } else if (alg->alg_nkey_sizes == 0) { 4877 /* no specified key size for algorithm */ 4878 alg->alg_ef_minbits = alg->alg_ef_maxbits = 0; 4879 } else { 4880 /* supported key sizes are defined by enumeration */ 4881 alg->alg_ef_minbits = (uint16_t)-1; 4882 alg->alg_ef_maxbits = 0; 4883 4884 for (i = 0, is_valid = B_FALSE; i < alg->alg_nkey_sizes; i++) { 4885 /* 4886 * Ignore the current key size if it is not in the 4887 * range of sizes supported by the framework. 4888 */ 4889 if (alg->alg_key_sizes[i] < crypto_min || 4890 alg->alg_key_sizes[i] > crypto_max) 4891 continue; 4892 if (alg->alg_key_sizes[i] < alg->alg_ef_minbits) 4893 alg->alg_ef_minbits = alg->alg_key_sizes[i]; 4894 if (alg->alg_key_sizes[i] > alg->alg_ef_maxbits) 4895 alg->alg_ef_maxbits = alg->alg_key_sizes[i]; 4896 is_valid = B_TRUE; 4897 } 4898 4899 if (!is_valid) { 4900 alg->alg_flags &= ~ALG_FLAG_VALID; 4901 return; 4902 } 4903 alg->alg_ef_default = 0; 4904 } 4905 } 4906 4907 /* 4908 * Sanity check parameters provided by ipsecalgs(1m). Assume that 4909 * the algoritm is marked as valid, there is a check at the top 4910 * of this function. If any of the checks below fail, the algorithm 4911 * entry is invalid. 4912 */ 4913 void 4914 alg_flag_check(ipsec_alginfo_t *alg) 4915 { 4916 alg->alg_flags &= ~ALG_FLAG_VALID; 4917 4918 /* 4919 * Can't have the algorithm marked as CCM and GCM. 4920 * Check the ALG_FLAG_COMBINED and ALG_FLAG_COUNTERMODE 4921 * flags are set for CCM & GCM. 4922 */ 4923 if ((alg->alg_flags & (ALG_FLAG_CCM|ALG_FLAG_GCM)) == 4924 (ALG_FLAG_CCM|ALG_FLAG_GCM)) 4925 return; 4926 if (alg->alg_flags & (ALG_FLAG_CCM|ALG_FLAG_GCM)) { 4927 if (!(alg->alg_flags & ALG_FLAG_COUNTERMODE)) 4928 return; 4929 if (!(alg->alg_flags & ALG_FLAG_COMBINED)) 4930 return; 4931 } 4932 4933 /* 4934 * For ALG_FLAG_COUNTERMODE, check the parameters 4935 * fit in the ipsec_nonce_t structure. 4936 */ 4937 if (alg->alg_flags & ALG_FLAG_COUNTERMODE) { 4938 if (alg->alg_ivlen != sizeof (((ipsec_nonce_t *)NULL)->iv)) 4939 return; 4940 if (alg->alg_saltlen > sizeof (((ipsec_nonce_t *)NULL)->salt)) 4941 return; 4942 } 4943 if ((alg->alg_flags & ALG_FLAG_COMBINED) && 4944 (alg->alg_icvlen == 0)) 4945 return; 4946 4947 /* all is well. */ 4948 alg->alg_flags |= ALG_FLAG_VALID; 4949 } 4950 4951 /* 4952 * Free the memory used by the specified algorithm. 4953 */ 4954 void 4955 ipsec_alg_free(ipsec_alginfo_t *alg) 4956 { 4957 if (alg == NULL) 4958 return; 4959 4960 if (alg->alg_key_sizes != NULL) { 4961 kmem_free(alg->alg_key_sizes, 4962 (alg->alg_nkey_sizes + 1) * sizeof (uint16_t)); 4963 alg->alg_key_sizes = NULL; 4964 } 4965 if (alg->alg_block_sizes != NULL) { 4966 kmem_free(alg->alg_block_sizes, 4967 (alg->alg_nblock_sizes + 1) * sizeof (uint16_t)); 4968 alg->alg_block_sizes = NULL; 4969 } 4970 if (alg->alg_params != NULL) { 4971 kmem_free(alg->alg_params, 4972 (alg->alg_nparams + 1) * sizeof (uint16_t)); 4973 alg->alg_params = NULL; 4974 } 4975 kmem_free(alg, sizeof (*alg)); 4976 } 4977 4978 /* 4979 * Check the validity of the specified key size for an algorithm. 4980 * Returns B_TRUE if key size is valid, B_FALSE otherwise. 4981 */ 4982 boolean_t 4983 ipsec_valid_key_size(uint16_t key_size, ipsec_alginfo_t *alg) 4984 { 4985 if (key_size < alg->alg_ef_minbits || key_size > alg->alg_ef_maxbits) 4986 return (B_FALSE); 4987 4988 if (alg->alg_increment == 0 && alg->alg_nkey_sizes != 0) { 4989 /* 4990 * If the key sizes are defined by enumeration, the new 4991 * key size must be equal to one of the supported values. 4992 */ 4993 int i; 4994 4995 for (i = 0; i < alg->alg_nkey_sizes; i++) 4996 if (key_size == alg->alg_key_sizes[i]) 4997 break; 4998 if (i == alg->alg_nkey_sizes) 4999 return (B_FALSE); 5000 } 5001 5002 return (B_TRUE); 5003 } 5004 5005 /* 5006 * Callback function invoked by the crypto framework when a provider 5007 * registers or unregisters. This callback updates the algorithms 5008 * tables when a crypto algorithm is no longer available or becomes 5009 * available, and triggers the freeing/creation of context templates 5010 * associated with existing SAs, if needed. 5011 * 5012 * Need to walk all stack instances since the callback is global 5013 * for all instances 5014 */ 5015 void 5016 ipsec_prov_update_callback(uint32_t event, void *event_arg) 5017 { 5018 netstack_handle_t nh; 5019 netstack_t *ns; 5020 5021 netstack_next_init(&nh); 5022 while ((ns = netstack_next(&nh)) != NULL) { 5023 ipsec_prov_update_callback_stack(event, event_arg, ns); 5024 netstack_rele(ns); 5025 } 5026 netstack_next_fini(&nh); 5027 } 5028 5029 static void 5030 ipsec_prov_update_callback_stack(uint32_t event, void *event_arg, 5031 netstack_t *ns) 5032 { 5033 crypto_notify_event_change_t *prov_change = 5034 (crypto_notify_event_change_t *)event_arg; 5035 uint_t algidx, algid, algtype, mech_count, mech_idx; 5036 ipsec_alginfo_t *alg; 5037 ipsec_alginfo_t oalg; 5038 crypto_mech_name_t *mechs; 5039 boolean_t alg_changed = B_FALSE; 5040 ipsec_stack_t *ipss = ns->netstack_ipsec; 5041 5042 /* ignore events for which we didn't register */ 5043 if (event != CRYPTO_EVENT_MECHS_CHANGED) { 5044 ip1dbg(("ipsec_prov_update_callback: unexpected event 0x%x " 5045 " received from crypto framework\n", event)); 5046 return; 5047 } 5048 5049 mechs = crypto_get_mech_list(&mech_count, KM_SLEEP); 5050 if (mechs == NULL) 5051 return; 5052 5053 /* 5054 * Walk the list of currently defined IPsec algorithm. Update 5055 * the algorithm valid flag and trigger an update of the 5056 * SAs that depend on that algorithm. 5057 */ 5058 mutex_enter(&ipss->ipsec_alg_lock); 5059 for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype++) { 5060 for (algidx = 0; algidx < ipss->ipsec_nalgs[algtype]; 5061 algidx++) { 5062 5063 algid = ipss->ipsec_sortlist[algtype][algidx]; 5064 alg = ipss->ipsec_alglists[algtype][algid]; 5065 ASSERT(alg != NULL); 5066 5067 /* 5068 * Skip the algorithms which do not map to the 5069 * crypto framework provider being added or removed. 5070 */ 5071 if (strncmp(alg->alg_mech_name, 5072 prov_change->ec_mech_name, 5073 CRYPTO_MAX_MECH_NAME) != 0) 5074 continue; 5075 5076 /* 5077 * Determine if the mechanism is valid. If it 5078 * is not, mark the algorithm as being invalid. If 5079 * it is, mark the algorithm as being valid. 5080 */ 5081 for (mech_idx = 0; mech_idx < mech_count; mech_idx++) 5082 if (strncmp(alg->alg_mech_name, 5083 mechs[mech_idx], CRYPTO_MAX_MECH_NAME) == 0) 5084 break; 5085 if (mech_idx == mech_count && 5086 alg->alg_flags & ALG_FLAG_VALID) { 5087 alg->alg_flags &= ~ALG_FLAG_VALID; 5088 alg_changed = B_TRUE; 5089 } else if (mech_idx < mech_count && 5090 !(alg->alg_flags & ALG_FLAG_VALID)) { 5091 alg->alg_flags |= ALG_FLAG_VALID; 5092 alg_changed = B_TRUE; 5093 } 5094 5095 /* 5096 * Update the supported key sizes, regardless 5097 * of whether a crypto provider was added or 5098 * removed. 5099 */ 5100 oalg = *alg; 5101 ipsec_alg_fix_min_max(alg, algtype, ns); 5102 if (!alg_changed && 5103 alg->alg_ef_minbits != oalg.alg_ef_minbits || 5104 alg->alg_ef_maxbits != oalg.alg_ef_maxbits || 5105 alg->alg_ef_default != oalg.alg_ef_default || 5106 alg->alg_ef_default_bits != 5107 oalg.alg_ef_default_bits) 5108 alg_changed = B_TRUE; 5109 5110 /* 5111 * Update the affected SAs if a software provider is 5112 * being added or removed. 5113 */ 5114 if (prov_change->ec_provider_type == 5115 CRYPTO_SW_PROVIDER) 5116 sadb_alg_update(algtype, alg->alg_id, 5117 prov_change->ec_change == 5118 CRYPTO_MECH_ADDED, ns); 5119 } 5120 } 5121 mutex_exit(&ipss->ipsec_alg_lock); 5122 crypto_free_mech_list(mechs, mech_count); 5123 5124 if (alg_changed) { 5125 /* 5126 * An algorithm has changed, i.e. it became valid or 5127 * invalid, or its support key sizes have changed. 5128 * Notify ipsecah and ipsecesp of this change so 5129 * that they can send a SADB_REGISTER to their consumers. 5130 */ 5131 ipsecah_algs_changed(ns); 5132 ipsecesp_algs_changed(ns); 5133 } 5134 } 5135 5136 /* 5137 * Registers with the crypto framework to be notified of crypto 5138 * providers changes. Used to update the algorithm tables and 5139 * to free or create context templates if needed. Invoked after IPsec 5140 * is loaded successfully. 5141 * 5142 * This is called separately for each IP instance, so we ensure we only 5143 * register once. 5144 */ 5145 void 5146 ipsec_register_prov_update(void) 5147 { 5148 if (prov_update_handle != NULL) 5149 return; 5150 5151 prov_update_handle = crypto_notify_events( 5152 ipsec_prov_update_callback, CRYPTO_EVENT_MECHS_CHANGED); 5153 } 5154 5155 /* 5156 * Unregisters from the framework to be notified of crypto providers 5157 * changes. Called from ipsec_policy_g_destroy(). 5158 */ 5159 static void 5160 ipsec_unregister_prov_update(void) 5161 { 5162 if (prov_update_handle != NULL) 5163 crypto_unnotify_events(prov_update_handle); 5164 } 5165 5166 /* 5167 * Tunnel-mode support routines. 5168 */ 5169 5170 /* 5171 * Returns an mblk chain suitable for putnext() if policies match and IPsec 5172 * SAs are available. If there's no per-tunnel policy, or a match comes back 5173 * with no match, then still return the packet and have global policy take 5174 * a crack at it in IP. 5175 * This updates the ip_xmit_attr with the IPsec policy. 5176 * 5177 * Remember -> we can be forwarding packets. Keep that in mind w.r.t. 5178 * inner-packet contents. 5179 */ 5180 mblk_t * 5181 ipsec_tun_outbound(mblk_t *mp, iptun_t *iptun, ipha_t *inner_ipv4, 5182 ip6_t *inner_ipv6, ipha_t *outer_ipv4, ip6_t *outer_ipv6, int outer_hdr_len, 5183 ip_xmit_attr_t *ixa) 5184 { 5185 ipsec_policy_head_t *polhead; 5186 ipsec_selector_t sel; 5187 mblk_t *nmp; 5188 boolean_t is_fragment; 5189 ipsec_policy_t *pol; 5190 ipsec_tun_pol_t *itp = iptun->iptun_itp; 5191 netstack_t *ns = iptun->iptun_ns; 5192 ipsec_stack_t *ipss = ns->netstack_ipsec; 5193 5194 ASSERT(outer_ipv6 != NULL && outer_ipv4 == NULL || 5195 outer_ipv4 != NULL && outer_ipv6 == NULL); 5196 /* We take care of inners in a bit. */ 5197 5198 /* Are the IPsec fields initialized at all? */ 5199 if (!(ixa->ixa_flags & IXAF_IPSEC_SECURE)) { 5200 ASSERT(ixa->ixa_ipsec_policy == NULL); 5201 ASSERT(ixa->ixa_ipsec_latch == NULL); 5202 ASSERT(ixa->ixa_ipsec_action == NULL); 5203 ASSERT(ixa->ixa_ipsec_ah_sa == NULL); 5204 ASSERT(ixa->ixa_ipsec_esp_sa == NULL); 5205 } 5206 5207 ASSERT(itp != NULL && (itp->itp_flags & ITPF_P_ACTIVE)); 5208 polhead = itp->itp_policy; 5209 5210 bzero(&sel, sizeof (sel)); 5211 if (inner_ipv4 != NULL) { 5212 ASSERT(inner_ipv6 == NULL); 5213 sel.ips_isv4 = B_TRUE; 5214 sel.ips_local_addr_v4 = inner_ipv4->ipha_src; 5215 sel.ips_remote_addr_v4 = inner_ipv4->ipha_dst; 5216 sel.ips_protocol = (uint8_t)inner_ipv4->ipha_protocol; 5217 } else { 5218 ASSERT(inner_ipv6 != NULL); 5219 sel.ips_isv4 = B_FALSE; 5220 sel.ips_local_addr_v6 = inner_ipv6->ip6_src; 5221 /* 5222 * We don't care about routing-header dests in the 5223 * forwarding/tunnel path, so just grab ip6_dst. 5224 */ 5225 sel.ips_remote_addr_v6 = inner_ipv6->ip6_dst; 5226 } 5227 5228 if (itp->itp_flags & ITPF_P_PER_PORT_SECURITY) { 5229 /* 5230 * Caller can prepend the outer header, which means 5231 * inner_ipv[46] may be stuck in the middle. Pullup the whole 5232 * mess now if need-be, for easier processing later. Don't 5233 * forget to rewire the outer header too. 5234 */ 5235 if (mp->b_cont != NULL) { 5236 nmp = msgpullup(mp, -1); 5237 if (nmp == NULL) { 5238 ip_drop_packet(mp, B_FALSE, NULL, 5239 DROPPER(ipss, ipds_spd_nomem), 5240 &ipss->ipsec_spd_dropper); 5241 return (NULL); 5242 } 5243 freemsg(mp); 5244 mp = nmp; 5245 if (outer_ipv4 != NULL) 5246 outer_ipv4 = (ipha_t *)mp->b_rptr; 5247 else 5248 outer_ipv6 = (ip6_t *)mp->b_rptr; 5249 if (inner_ipv4 != NULL) { 5250 inner_ipv4 = 5251 (ipha_t *)(mp->b_rptr + outer_hdr_len); 5252 } else { 5253 inner_ipv6 = 5254 (ip6_t *)(mp->b_rptr + outer_hdr_len); 5255 } 5256 } 5257 if (inner_ipv4 != NULL) { 5258 is_fragment = IS_V4_FRAGMENT( 5259 inner_ipv4->ipha_fragment_offset_and_flags); 5260 } else { 5261 sel.ips_remote_addr_v6 = ip_get_dst_v6(inner_ipv6, mp, 5262 &is_fragment); 5263 } 5264 5265 if (is_fragment) { 5266 ipha_t *oiph; 5267 ipha_t *iph = NULL; 5268 ip6_t *ip6h = NULL; 5269 int hdr_len; 5270 uint16_t ip6_hdr_length; 5271 uint8_t v6_proto; 5272 uint8_t *v6_proto_p; 5273 5274 /* 5275 * We have a fragment we need to track! 5276 */ 5277 mp = ipsec_fragcache_add(&itp->itp_fragcache, NULL, mp, 5278 outer_hdr_len, ipss); 5279 if (mp == NULL) 5280 return (NULL); 5281 ASSERT(mp->b_cont == NULL); 5282 5283 /* 5284 * If we get here, we have a full fragment chain 5285 */ 5286 5287 oiph = (ipha_t *)mp->b_rptr; 5288 if (IPH_HDR_VERSION(oiph) == IPV4_VERSION) { 5289 hdr_len = ((outer_hdr_len != 0) ? 5290 IPH_HDR_LENGTH(oiph) : 0); 5291 iph = (ipha_t *)(mp->b_rptr + hdr_len); 5292 } else { 5293 ASSERT(IPH_HDR_VERSION(oiph) == IPV6_VERSION); 5294 ip6h = (ip6_t *)mp->b_rptr; 5295 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 5296 &ip6_hdr_length, &v6_proto_p)) { 5297 ip_drop_packet_chain(mp, B_FALSE, NULL, 5298 DROPPER(ipss, 5299 ipds_spd_malformed_packet), 5300 &ipss->ipsec_spd_dropper); 5301 return (NULL); 5302 } 5303 hdr_len = ip6_hdr_length; 5304 } 5305 outer_hdr_len = hdr_len; 5306 5307 if (sel.ips_isv4) { 5308 if (iph == NULL) { 5309 /* Was v6 outer */ 5310 iph = (ipha_t *)(mp->b_rptr + hdr_len); 5311 } 5312 inner_ipv4 = iph; 5313 sel.ips_local_addr_v4 = inner_ipv4->ipha_src; 5314 sel.ips_remote_addr_v4 = inner_ipv4->ipha_dst; 5315 sel.ips_protocol = 5316 (uint8_t)inner_ipv4->ipha_protocol; 5317 } else { 5318 inner_ipv6 = (ip6_t *)(mp->b_rptr + 5319 hdr_len); 5320 sel.ips_local_addr_v6 = inner_ipv6->ip6_src; 5321 sel.ips_remote_addr_v6 = inner_ipv6->ip6_dst; 5322 if (!ip_hdr_length_nexthdr_v6(mp, 5323 inner_ipv6, &ip6_hdr_length, &v6_proto_p)) { 5324 ip_drop_packet_chain(mp, B_FALSE, NULL, 5325 DROPPER(ipss, 5326 ipds_spd_malformed_frag), 5327 &ipss->ipsec_spd_dropper); 5328 return (NULL); 5329 } 5330 v6_proto = *v6_proto_p; 5331 sel.ips_protocol = v6_proto; 5332 #ifdef FRAGCACHE_DEBUG 5333 cmn_err(CE_WARN, "v6_sel.ips_protocol = %d\n", 5334 sel.ips_protocol); 5335 #endif 5336 } 5337 /* Ports are extracted below */ 5338 } 5339 5340 /* Get ports... */ 5341 if (!ipsec_init_outbound_ports(&sel, mp, 5342 inner_ipv4, inner_ipv6, outer_hdr_len, ipss)) { 5343 /* callee did ip_drop_packet_chain() on mp. */ 5344 return (NULL); 5345 } 5346 #ifdef FRAGCACHE_DEBUG 5347 if (inner_ipv4 != NULL) 5348 cmn_err(CE_WARN, 5349 "(v4) sel.ips_protocol = %d, " 5350 "sel.ips_local_port = %d, " 5351 "sel.ips_remote_port = %d\n", 5352 sel.ips_protocol, ntohs(sel.ips_local_port), 5353 ntohs(sel.ips_remote_port)); 5354 if (inner_ipv6 != NULL) 5355 cmn_err(CE_WARN, 5356 "(v6) sel.ips_protocol = %d, " 5357 "sel.ips_local_port = %d, " 5358 "sel.ips_remote_port = %d\n", 5359 sel.ips_protocol, ntohs(sel.ips_local_port), 5360 ntohs(sel.ips_remote_port)); 5361 #endif 5362 /* Success so far! */ 5363 } 5364 rw_enter(&polhead->iph_lock, RW_READER); 5365 pol = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_OUTBOUND, &sel); 5366 rw_exit(&polhead->iph_lock); 5367 if (pol == NULL) { 5368 /* 5369 * No matching policy on this tunnel, drop the packet. 5370 * 5371 * NOTE: Tunnel-mode tunnels are different from the 5372 * IP global transport mode policy head. For a tunnel-mode 5373 * tunnel, we drop the packet in lieu of passing it 5374 * along accepted the way a global-policy miss would. 5375 * 5376 * NOTE2: "negotiate transport" tunnels should match ALL 5377 * inbound packets, but we do not uncomment the ASSERT() 5378 * below because if/when we open PF_POLICY, a user can 5379 * shoot him/her-self in the foot with a 0 priority. 5380 */ 5381 5382 /* ASSERT(itp->itp_flags & ITPF_P_TUNNEL); */ 5383 #ifdef FRAGCACHE_DEBUG 5384 cmn_err(CE_WARN, "ipsec_tun_outbound(): No matching tunnel " 5385 "per-port policy\n"); 5386 #endif 5387 ip_drop_packet_chain(mp, B_FALSE, NULL, 5388 DROPPER(ipss, ipds_spd_explicit), 5389 &ipss->ipsec_spd_dropper); 5390 return (NULL); 5391 } 5392 5393 #ifdef FRAGCACHE_DEBUG 5394 cmn_err(CE_WARN, "Having matching tunnel per-port policy\n"); 5395 #endif 5396 5397 /* 5398 * NOTE: ixa_cleanup() function will release pol references. 5399 */ 5400 ixa->ixa_ipsec_policy = pol; 5401 /* 5402 * NOTE: There is a subtle difference between iptun_zoneid and 5403 * iptun_connp->conn_zoneid explained in iptun_conn_create(). When 5404 * interacting with the ip module, we must use conn_zoneid. 5405 */ 5406 ixa->ixa_zoneid = iptun->iptun_connp->conn_zoneid; 5407 5408 ASSERT((outer_ipv4 != NULL) ? (ixa->ixa_flags & IXAF_IS_IPV4) : 5409 !(ixa->ixa_flags & IXAF_IS_IPV4)); 5410 ASSERT(ixa->ixa_ipsec_policy != NULL); 5411 ixa->ixa_flags |= IXAF_IPSEC_SECURE; 5412 5413 if (!(itp->itp_flags & ITPF_P_TUNNEL)) { 5414 /* Set up transport mode for tunnelled packets. */ 5415 ixa->ixa_ipsec_proto = (inner_ipv4 != NULL) ? IPPROTO_ENCAP : 5416 IPPROTO_IPV6; 5417 return (mp); 5418 } 5419 5420 /* Fill in tunnel-mode goodies here. */ 5421 ixa->ixa_flags |= IXAF_IPSEC_TUNNEL; 5422 /* XXX Do I need to fill in all of the goodies here? */ 5423 if (inner_ipv4) { 5424 ixa->ixa_ipsec_inaf = AF_INET; 5425 ixa->ixa_ipsec_insrc[0] = 5426 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v4; 5427 ixa->ixa_ipsec_indst[0] = 5428 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v4; 5429 } else { 5430 ixa->ixa_ipsec_inaf = AF_INET6; 5431 ixa->ixa_ipsec_insrc[0] = 5432 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[0]; 5433 ixa->ixa_ipsec_insrc[1] = 5434 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[1]; 5435 ixa->ixa_ipsec_insrc[2] = 5436 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[2]; 5437 ixa->ixa_ipsec_insrc[3] = 5438 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[3]; 5439 ixa->ixa_ipsec_indst[0] = 5440 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[0]; 5441 ixa->ixa_ipsec_indst[1] = 5442 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[1]; 5443 ixa->ixa_ipsec_indst[2] = 5444 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[2]; 5445 ixa->ixa_ipsec_indst[3] = 5446 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[3]; 5447 } 5448 ixa->ixa_ipsec_insrcpfx = pol->ipsp_sel->ipsl_key.ipsl_local_pfxlen; 5449 ixa->ixa_ipsec_indstpfx = pol->ipsp_sel->ipsl_key.ipsl_remote_pfxlen; 5450 /* NOTE: These are used for transport mode too. */ 5451 ixa->ixa_ipsec_src_port = pol->ipsp_sel->ipsl_key.ipsl_lport; 5452 ixa->ixa_ipsec_dst_port = pol->ipsp_sel->ipsl_key.ipsl_rport; 5453 ixa->ixa_ipsec_proto = pol->ipsp_sel->ipsl_key.ipsl_proto; 5454 5455 return (mp); 5456 } 5457 5458 /* 5459 * NOTE: The following releases pol's reference and 5460 * calls ip_drop_packet() for me on NULL returns. 5461 */ 5462 mblk_t * 5463 ipsec_check_ipsecin_policy_reasm(mblk_t *attr_mp, ipsec_policy_t *pol, 5464 ipha_t *inner_ipv4, ip6_t *inner_ipv6, uint64_t pkt_unique, netstack_t *ns) 5465 { 5466 /* Assume attr_mp is a chain of b_next-linked ip_recv_attr mblk. */ 5467 mblk_t *data_chain = NULL, *data_tail = NULL; 5468 mblk_t *next; 5469 mblk_t *data_mp; 5470 ip_recv_attr_t iras; 5471 5472 while (attr_mp != NULL) { 5473 ASSERT(ip_recv_attr_is_mblk(attr_mp)); 5474 next = attr_mp->b_next; 5475 attr_mp->b_next = NULL; /* No tripping asserts. */ 5476 5477 data_mp = attr_mp->b_cont; 5478 attr_mp->b_cont = NULL; 5479 if (!ip_recv_attr_from_mblk(attr_mp, &iras)) { 5480 /* The ill or ip_stack_t disappeared on us */ 5481 freemsg(data_mp); /* ip_drop_packet?? */ 5482 ira_cleanup(&iras, B_TRUE); 5483 goto fail; 5484 } 5485 5486 /* 5487 * Need IPPOL_REFHOLD(pol) for extras because 5488 * ipsecin_policy does the refrele. 5489 */ 5490 IPPOL_REFHOLD(pol); 5491 5492 data_mp = ipsec_check_ipsecin_policy(data_mp, pol, inner_ipv4, 5493 inner_ipv6, pkt_unique, &iras, ns); 5494 ira_cleanup(&iras, B_TRUE); 5495 5496 if (data_mp == NULL) 5497 goto fail; 5498 5499 if (data_tail == NULL) { 5500 /* First one */ 5501 data_chain = data_tail = data_mp; 5502 } else { 5503 data_tail->b_next = data_mp; 5504 data_tail = data_mp; 5505 } 5506 attr_mp = next; 5507 } 5508 /* 5509 * One last release because either the loop bumped it up, or we never 5510 * called ipsec_check_ipsecin_policy(). 5511 */ 5512 IPPOL_REFRELE(pol); 5513 5514 /* data_chain is ready for return to tun module. */ 5515 return (data_chain); 5516 5517 fail: 5518 /* 5519 * Need to get rid of any extra pol 5520 * references, and any remaining bits as well. 5521 */ 5522 IPPOL_REFRELE(pol); 5523 ipsec_freemsg_chain(data_chain); 5524 ipsec_freemsg_chain(next); /* ipdrop stats? */ 5525 return (NULL); 5526 } 5527 5528 /* 5529 * Return a message if the inbound packet passed an IPsec policy check. Returns 5530 * NULL if it failed or if it is a fragment needing its friends before a 5531 * policy check can be performed. 5532 * 5533 * Expects a non-NULL data_mp, and a non-NULL polhead. 5534 * The returned mblk may be a b_next chain of packets if fragments 5535 * neeeded to be collected for a proper policy check. 5536 * 5537 * This function calls ip_drop_packet() on data_mp if need be. 5538 * 5539 * NOTE: outer_hdr_len is signed. If it's a negative value, the caller 5540 * is inspecting an ICMP packet. 5541 */ 5542 mblk_t * 5543 ipsec_tun_inbound(ip_recv_attr_t *ira, mblk_t *data_mp, ipsec_tun_pol_t *itp, 5544 ipha_t *inner_ipv4, ip6_t *inner_ipv6, ipha_t *outer_ipv4, 5545 ip6_t *outer_ipv6, int outer_hdr_len, netstack_t *ns) 5546 { 5547 ipsec_policy_head_t *polhead; 5548 ipsec_selector_t sel; 5549 ipsec_policy_t *pol; 5550 uint16_t tmpport; 5551 selret_t rc; 5552 boolean_t port_policy_present, is_icmp, global_present; 5553 in6_addr_t tmpaddr; 5554 ipaddr_t tmp4; 5555 uint8_t flags, *inner_hdr; 5556 ipsec_stack_t *ipss = ns->netstack_ipsec; 5557 5558 sel.ips_is_icmp_inv_acq = 0; 5559 5560 if (outer_ipv4 != NULL) { 5561 ASSERT(outer_ipv6 == NULL); 5562 global_present = ipss->ipsec_inbound_v4_policy_present; 5563 } else { 5564 ASSERT(outer_ipv6 != NULL); 5565 global_present = ipss->ipsec_inbound_v6_policy_present; 5566 } 5567 5568 ASSERT(inner_ipv4 != NULL && inner_ipv6 == NULL || 5569 inner_ipv4 == NULL && inner_ipv6 != NULL); 5570 5571 if (outer_hdr_len < 0) { 5572 outer_hdr_len = (-outer_hdr_len); 5573 is_icmp = B_TRUE; 5574 } else { 5575 is_icmp = B_FALSE; 5576 } 5577 5578 if (itp != NULL && (itp->itp_flags & ITPF_P_ACTIVE)) { 5579 mblk_t *mp = data_mp; 5580 5581 polhead = itp->itp_policy; 5582 /* 5583 * We need to perform full Tunnel-Mode enforcement, 5584 * and we need to have inner-header data for such enforcement. 5585 * 5586 * See ipsec_init_inbound_sel() for the 0x80000000 on inbound 5587 * and on return. 5588 */ 5589 5590 port_policy_present = ((itp->itp_flags & 5591 ITPF_P_PER_PORT_SECURITY) ? B_TRUE : B_FALSE); 5592 /* 5593 * NOTE: Even if our policy is transport mode, set the 5594 * SEL_TUNNEL_MODE flag so ipsec_init_inbound_sel() can 5595 * do the right thing w.r.t. outer headers. 5596 */ 5597 flags = ((port_policy_present ? SEL_PORT_POLICY : SEL_NONE) | 5598 (is_icmp ? SEL_IS_ICMP : SEL_NONE) | SEL_TUNNEL_MODE); 5599 5600 rc = ipsec_init_inbound_sel(&sel, data_mp, inner_ipv4, 5601 inner_ipv6, flags); 5602 5603 switch (rc) { 5604 case SELRET_NOMEM: 5605 ip_drop_packet(data_mp, B_TRUE, NULL, 5606 DROPPER(ipss, ipds_spd_nomem), 5607 &ipss->ipsec_spd_dropper); 5608 return (NULL); 5609 case SELRET_TUNFRAG: 5610 /* 5611 * At this point, if we're cleartext, we don't want 5612 * to go there. 5613 */ 5614 if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) { 5615 ip_drop_packet(data_mp, B_TRUE, NULL, 5616 DROPPER(ipss, ipds_spd_got_clear), 5617 &ipss->ipsec_spd_dropper); 5618 return (NULL); 5619 } 5620 5621 /* 5622 * Inner and outer headers may not be contiguous. 5623 * Pullup the data_mp now to satisfy assumptions of 5624 * ipsec_fragcache_add() 5625 */ 5626 if (data_mp->b_cont != NULL) { 5627 mblk_t *nmp; 5628 5629 nmp = msgpullup(data_mp, -1); 5630 if (nmp == NULL) { 5631 ip_drop_packet(data_mp, B_TRUE, NULL, 5632 DROPPER(ipss, ipds_spd_nomem), 5633 &ipss->ipsec_spd_dropper); 5634 return (NULL); 5635 } 5636 freemsg(data_mp); 5637 data_mp = nmp; 5638 if (outer_ipv4 != NULL) 5639 outer_ipv4 = 5640 (ipha_t *)data_mp->b_rptr; 5641 else 5642 outer_ipv6 = 5643 (ip6_t *)data_mp->b_rptr; 5644 if (inner_ipv4 != NULL) { 5645 inner_ipv4 = 5646 (ipha_t *)(data_mp->b_rptr + 5647 outer_hdr_len); 5648 } else { 5649 inner_ipv6 = 5650 (ip6_t *)(data_mp->b_rptr + 5651 outer_hdr_len); 5652 } 5653 } 5654 5655 /* 5656 * If we need to queue the packet. First we 5657 * get an mblk with the attributes. ipsec_fragcache_add 5658 * will prepend that to the queued data and return 5659 * a list of b_next messages each of which starts with 5660 * the attribute mblk. 5661 */ 5662 mp = ip_recv_attr_to_mblk(ira); 5663 if (mp == NULL) { 5664 ip_drop_packet(data_mp, B_TRUE, NULL, 5665 DROPPER(ipss, ipds_spd_nomem), 5666 &ipss->ipsec_spd_dropper); 5667 return (NULL); 5668 } 5669 5670 mp = ipsec_fragcache_add(&itp->itp_fragcache, 5671 mp, data_mp, outer_hdr_len, ipss); 5672 5673 if (mp == NULL) { 5674 /* 5675 * Data is cached, fragment chain is not 5676 * complete. 5677 */ 5678 return (NULL); 5679 } 5680 5681 /* 5682 * If we get here, we have a full fragment chain. 5683 * Reacquire headers and selectors from first fragment. 5684 */ 5685 ASSERT(ip_recv_attr_is_mblk(mp)); 5686 data_mp = mp->b_cont; 5687 inner_hdr = data_mp->b_rptr; 5688 if (outer_ipv4 != NULL) { 5689 inner_hdr += IPH_HDR_LENGTH( 5690 (ipha_t *)data_mp->b_rptr); 5691 } else { 5692 inner_hdr += ip_hdr_length_v6(data_mp, 5693 (ip6_t *)data_mp->b_rptr); 5694 } 5695 ASSERT(inner_hdr <= data_mp->b_wptr); 5696 5697 if (inner_ipv4 != NULL) { 5698 inner_ipv4 = (ipha_t *)inner_hdr; 5699 inner_ipv6 = NULL; 5700 } else { 5701 inner_ipv6 = (ip6_t *)inner_hdr; 5702 inner_ipv4 = NULL; 5703 } 5704 5705 /* 5706 * Use SEL_TUNNEL_MODE to take into account the outer 5707 * header. Use SEL_POST_FRAG so we always get ports. 5708 */ 5709 rc = ipsec_init_inbound_sel(&sel, data_mp, 5710 inner_ipv4, inner_ipv6, 5711 SEL_TUNNEL_MODE | SEL_POST_FRAG); 5712 switch (rc) { 5713 case SELRET_SUCCESS: 5714 /* 5715 * Get to same place as first caller's 5716 * SELRET_SUCCESS case. 5717 */ 5718 break; 5719 case SELRET_NOMEM: 5720 ip_drop_packet_chain(mp, B_TRUE, NULL, 5721 DROPPER(ipss, ipds_spd_nomem), 5722 &ipss->ipsec_spd_dropper); 5723 return (NULL); 5724 case SELRET_BADPKT: 5725 ip_drop_packet_chain(mp, B_TRUE, NULL, 5726 DROPPER(ipss, ipds_spd_malformed_frag), 5727 &ipss->ipsec_spd_dropper); 5728 return (NULL); 5729 case SELRET_TUNFRAG: 5730 cmn_err(CE_WARN, "(TUNFRAG on 2nd call...)"); 5731 /* FALLTHRU */ 5732 default: 5733 cmn_err(CE_WARN, "ipsec_init_inbound_sel(mark2)" 5734 " returns bizarro 0x%x", rc); 5735 /* Guaranteed panic! */ 5736 ASSERT(rc == SELRET_NOMEM); 5737 return (NULL); 5738 } 5739 /* FALLTHRU */ 5740 case SELRET_SUCCESS: 5741 /* 5742 * Common case: 5743 * No per-port policy or a non-fragment. Keep going. 5744 */ 5745 break; 5746 case SELRET_BADPKT: 5747 /* 5748 * We may receive ICMP (with IPv6 inner) packets that 5749 * trigger this return value. Send 'em in for 5750 * enforcement checking. 5751 */ 5752 cmn_err(CE_NOTE, "ipsec_tun_inbound(): " 5753 "sending 'bad packet' in for enforcement"); 5754 break; 5755 default: 5756 cmn_err(CE_WARN, 5757 "ipsec_init_inbound_sel() returns bizarro 0x%x", 5758 rc); 5759 ASSERT(rc == SELRET_NOMEM); /* Guaranteed panic! */ 5760 return (NULL); 5761 } 5762 5763 if (is_icmp) { 5764 /* 5765 * Swap local/remote because this is an ICMP packet. 5766 */ 5767 tmpaddr = sel.ips_local_addr_v6; 5768 sel.ips_local_addr_v6 = sel.ips_remote_addr_v6; 5769 sel.ips_remote_addr_v6 = tmpaddr; 5770 tmpport = sel.ips_local_port; 5771 sel.ips_local_port = sel.ips_remote_port; 5772 sel.ips_remote_port = tmpport; 5773 } 5774 5775 /* find_policy_head() */ 5776 rw_enter(&polhead->iph_lock, RW_READER); 5777 pol = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_INBOUND, 5778 &sel); 5779 rw_exit(&polhead->iph_lock); 5780 if (pol != NULL) { 5781 uint64_t pkt_unique; 5782 5783 if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) { 5784 if (!pol->ipsp_act->ipa_allow_clear) { 5785 /* 5786 * XXX should never get here with 5787 * tunnel reassembled fragments? 5788 */ 5789 ASSERT(mp == data_mp); 5790 ip_drop_packet(data_mp, B_TRUE, NULL, 5791 DROPPER(ipss, ipds_spd_got_clear), 5792 &ipss->ipsec_spd_dropper); 5793 IPPOL_REFRELE(pol); 5794 return (NULL); 5795 } else { 5796 IPPOL_REFRELE(pol); 5797 return (mp); 5798 } 5799 } 5800 pkt_unique = SA_UNIQUE_ID(sel.ips_remote_port, 5801 sel.ips_local_port, 5802 (inner_ipv4 == NULL) ? IPPROTO_IPV6 : 5803 IPPROTO_ENCAP, sel.ips_protocol); 5804 5805 /* 5806 * NOTE: The following releases pol's reference and 5807 * calls ip_drop_packet() for me on NULL returns. 5808 * 5809 * "sel" is still good here, so let's use it! 5810 */ 5811 if (data_mp == mp) { 5812 /* A single packet without attributes */ 5813 data_mp = ipsec_check_ipsecin_policy(data_mp, 5814 pol, inner_ipv4, inner_ipv6, pkt_unique, 5815 ira, ns); 5816 } else { 5817 /* 5818 * We pass in the b_next chain of attr_mp's 5819 * and get back a b_next chain of data_mp's. 5820 */ 5821 data_mp = ipsec_check_ipsecin_policy_reasm(mp, 5822 pol, inner_ipv4, inner_ipv6, pkt_unique, 5823 ns); 5824 } 5825 return (data_mp); 5826 } 5827 5828 /* 5829 * Else fallthru and check the global policy on the outer 5830 * header(s) if this tunnel is an old-style transport-mode 5831 * one. Drop the packet explicitly (no policy entry) for 5832 * a new-style tunnel-mode tunnel. 5833 */ 5834 if ((itp->itp_flags & ITPF_P_TUNNEL) && !is_icmp) { 5835 ip_drop_packet_chain(data_mp, B_TRUE, NULL, 5836 DROPPER(ipss, ipds_spd_explicit), 5837 &ipss->ipsec_spd_dropper); 5838 return (NULL); 5839 } 5840 } 5841 5842 /* 5843 * NOTE: If we reach here, we will not have packet chains from 5844 * fragcache_add(), because the only way I get chains is on a 5845 * tunnel-mode tunnel, which either returns with a pass, or gets 5846 * hit by the ip_drop_packet_chain() call right above here. 5847 */ 5848 ASSERT(data_mp->b_next == NULL); 5849 5850 /* If no per-tunnel security, check global policy now. */ 5851 if ((ira->ira_flags & IRAF_IPSEC_SECURE) && !global_present) { 5852 if (ira->ira_flags & IRAF_TRUSTED_ICMP) { 5853 /* 5854 * This is an ICMP message that was geenrated locally. 5855 * We should accept it. 5856 */ 5857 return (data_mp); 5858 } 5859 5860 ip_drop_packet(data_mp, B_TRUE, NULL, 5861 DROPPER(ipss, ipds_spd_got_secure), 5862 &ipss->ipsec_spd_dropper); 5863 return (NULL); 5864 } 5865 5866 if (is_icmp) { 5867 /* 5868 * For ICMP packets, "outer_ipvN" is set to the outer header 5869 * that is *INSIDE* the ICMP payload. For global policy 5870 * checking, we need to reverse src/dst on the payload in 5871 * order to construct selectors appropriately. See "ripha" 5872 * constructions in ip.c. To avoid a bug like 6478464 (see 5873 * earlier in this file), we will actually exchange src/dst 5874 * in the packet, and reverse if after the call to 5875 * ipsec_check_global_policy(). 5876 */ 5877 if (outer_ipv4 != NULL) { 5878 tmp4 = outer_ipv4->ipha_src; 5879 outer_ipv4->ipha_src = outer_ipv4->ipha_dst; 5880 outer_ipv4->ipha_dst = tmp4; 5881 } else { 5882 ASSERT(outer_ipv6 != NULL); 5883 tmpaddr = outer_ipv6->ip6_src; 5884 outer_ipv6->ip6_src = outer_ipv6->ip6_dst; 5885 outer_ipv6->ip6_dst = tmpaddr; 5886 } 5887 } 5888 5889 data_mp = ipsec_check_global_policy(data_mp, NULL, outer_ipv4, 5890 outer_ipv6, ira, ns); 5891 if (data_mp == NULL) 5892 return (NULL); 5893 5894 if (is_icmp) { 5895 /* Set things back to normal. */ 5896 if (outer_ipv4 != NULL) { 5897 tmp4 = outer_ipv4->ipha_src; 5898 outer_ipv4->ipha_src = outer_ipv4->ipha_dst; 5899 outer_ipv4->ipha_dst = tmp4; 5900 } else { 5901 /* No need for ASSERT()s now. */ 5902 tmpaddr = outer_ipv6->ip6_src; 5903 outer_ipv6->ip6_src = outer_ipv6->ip6_dst; 5904 outer_ipv6->ip6_dst = tmpaddr; 5905 } 5906 } 5907 5908 /* 5909 * At this point, we pretend it's a cleartext accepted 5910 * packet. 5911 */ 5912 return (data_mp); 5913 } 5914 5915 /* 5916 * AVL comparison routine for our list of tunnel polheads. 5917 */ 5918 static int 5919 tunnel_compare(const void *arg1, const void *arg2) 5920 { 5921 ipsec_tun_pol_t *left, *right; 5922 int rc; 5923 5924 left = (ipsec_tun_pol_t *)arg1; 5925 right = (ipsec_tun_pol_t *)arg2; 5926 5927 rc = strncmp(left->itp_name, right->itp_name, LIFNAMSIZ); 5928 return (rc == 0 ? rc : (rc > 0 ? 1 : -1)); 5929 } 5930 5931 /* 5932 * Free a tunnel policy node. 5933 */ 5934 void 5935 itp_free(ipsec_tun_pol_t *node, netstack_t *ns) 5936 { 5937 if (node->itp_policy != NULL) { 5938 IPPH_REFRELE(node->itp_policy, ns); 5939 node->itp_policy = NULL; 5940 } 5941 if (node->itp_inactive != NULL) { 5942 IPPH_REFRELE(node->itp_inactive, ns); 5943 node->itp_inactive = NULL; 5944 } 5945 mutex_destroy(&node->itp_lock); 5946 kmem_free(node, sizeof (*node)); 5947 } 5948 5949 void 5950 itp_unlink(ipsec_tun_pol_t *node, netstack_t *ns) 5951 { 5952 ipsec_stack_t *ipss = ns->netstack_ipsec; 5953 5954 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_WRITER); 5955 ipss->ipsec_tunnel_policy_gen++; 5956 ipsec_fragcache_uninit(&node->itp_fragcache, ipss); 5957 avl_remove(&ipss->ipsec_tunnel_policies, node); 5958 rw_exit(&ipss->ipsec_tunnel_policy_lock); 5959 ITP_REFRELE(node, ns); 5960 } 5961 5962 /* 5963 * Public interface to look up a tunnel security policy by name. Used by 5964 * spdsock mostly. Returns "node" with a bumped refcnt. 5965 */ 5966 ipsec_tun_pol_t * 5967 get_tunnel_policy(char *name, netstack_t *ns) 5968 { 5969 ipsec_tun_pol_t *node, lookup; 5970 ipsec_stack_t *ipss = ns->netstack_ipsec; 5971 5972 (void) strncpy(lookup.itp_name, name, LIFNAMSIZ); 5973 5974 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_READER); 5975 node = (ipsec_tun_pol_t *)avl_find(&ipss->ipsec_tunnel_policies, 5976 &lookup, NULL); 5977 if (node != NULL) { 5978 ITP_REFHOLD(node); 5979 } 5980 rw_exit(&ipss->ipsec_tunnel_policy_lock); 5981 5982 return (node); 5983 } 5984 5985 /* 5986 * Public interface to walk all tunnel security polcies. Useful for spdsock 5987 * DUMP operations. iterator() will not consume a reference. 5988 */ 5989 void 5990 itp_walk(void (*iterator)(ipsec_tun_pol_t *, void *, netstack_t *), 5991 void *arg, netstack_t *ns) 5992 { 5993 ipsec_tun_pol_t *node; 5994 ipsec_stack_t *ipss = ns->netstack_ipsec; 5995 5996 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_READER); 5997 for (node = avl_first(&ipss->ipsec_tunnel_policies); node != NULL; 5998 node = AVL_NEXT(&ipss->ipsec_tunnel_policies, node)) { 5999 iterator(node, arg, ns); 6000 } 6001 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6002 } 6003 6004 /* 6005 * Initialize policy head. This can only fail if there's a memory problem. 6006 */ 6007 static boolean_t 6008 tunnel_polhead_init(ipsec_policy_head_t *iph, netstack_t *ns) 6009 { 6010 ipsec_stack_t *ipss = ns->netstack_ipsec; 6011 6012 rw_init(&iph->iph_lock, NULL, RW_DEFAULT, NULL); 6013 iph->iph_refs = 1; 6014 iph->iph_gen = 0; 6015 if (ipsec_alloc_table(iph, ipss->ipsec_tun_spd_hashsize, 6016 KM_SLEEP, B_FALSE, ns) != 0) { 6017 ipsec_polhead_free_table(iph); 6018 return (B_FALSE); 6019 } 6020 ipsec_polhead_init(iph, ipss->ipsec_tun_spd_hashsize); 6021 return (B_TRUE); 6022 } 6023 6024 /* 6025 * Create a tunnel policy node with "name". Set errno with 6026 * ENOMEM if there's a memory problem, and EEXIST if there's an existing 6027 * node. 6028 */ 6029 ipsec_tun_pol_t * 6030 create_tunnel_policy(char *name, int *errno, uint64_t *gen, netstack_t *ns) 6031 { 6032 ipsec_tun_pol_t *newbie, *existing; 6033 avl_index_t where; 6034 ipsec_stack_t *ipss = ns->netstack_ipsec; 6035 6036 newbie = kmem_zalloc(sizeof (*newbie), KM_NOSLEEP); 6037 if (newbie == NULL) { 6038 *errno = ENOMEM; 6039 return (NULL); 6040 } 6041 if (!ipsec_fragcache_init(&newbie->itp_fragcache)) { 6042 kmem_free(newbie, sizeof (*newbie)); 6043 *errno = ENOMEM; 6044 return (NULL); 6045 } 6046 6047 (void) strncpy(newbie->itp_name, name, LIFNAMSIZ); 6048 6049 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_WRITER); 6050 existing = (ipsec_tun_pol_t *)avl_find(&ipss->ipsec_tunnel_policies, 6051 newbie, &where); 6052 if (existing != NULL) { 6053 itp_free(newbie, ns); 6054 *errno = EEXIST; 6055 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6056 return (NULL); 6057 } 6058 ipss->ipsec_tunnel_policy_gen++; 6059 *gen = ipss->ipsec_tunnel_policy_gen; 6060 newbie->itp_refcnt = 2; /* One for the caller, one for the tree. */ 6061 newbie->itp_next_policy_index = 1; 6062 avl_insert(&ipss->ipsec_tunnel_policies, newbie, where); 6063 mutex_init(&newbie->itp_lock, NULL, MUTEX_DEFAULT, NULL); 6064 newbie->itp_policy = kmem_zalloc(sizeof (ipsec_policy_head_t), 6065 KM_NOSLEEP); 6066 if (newbie->itp_policy == NULL) 6067 goto nomem; 6068 newbie->itp_inactive = kmem_zalloc(sizeof (ipsec_policy_head_t), 6069 KM_NOSLEEP); 6070 if (newbie->itp_inactive == NULL) { 6071 kmem_free(newbie->itp_policy, sizeof (ipsec_policy_head_t)); 6072 goto nomem; 6073 } 6074 6075 if (!tunnel_polhead_init(newbie->itp_policy, ns)) { 6076 kmem_free(newbie->itp_policy, sizeof (ipsec_policy_head_t)); 6077 kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t)); 6078 goto nomem; 6079 } else if (!tunnel_polhead_init(newbie->itp_inactive, ns)) { 6080 IPPH_REFRELE(newbie->itp_policy, ns); 6081 kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t)); 6082 goto nomem; 6083 } 6084 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6085 6086 return (newbie); 6087 nomem: 6088 *errno = ENOMEM; 6089 kmem_free(newbie, sizeof (*newbie)); 6090 return (NULL); 6091 } 6092 6093 /* 6094 * Given two addresses, find a tunnel instance's IPsec policy heads. 6095 * Returns NULL on failure. 6096 */ 6097 ipsec_tun_pol_t * 6098 itp_get_byaddr(uint32_t *laddr, uint32_t *faddr, int af, ip_stack_t *ipst) 6099 { 6100 conn_t *connp; 6101 iptun_t *iptun; 6102 ipsec_tun_pol_t *itp = NULL; 6103 6104 /* Classifiers are used to "src" being foreign. */ 6105 if (af == AF_INET) { 6106 connp = ipcl_iptun_classify_v4((ipaddr_t *)faddr, 6107 (ipaddr_t *)laddr, ipst); 6108 } else { 6109 ASSERT(af == AF_INET6); 6110 ASSERT(!IN6_IS_ADDR_V4MAPPED((in6_addr_t *)laddr)); 6111 ASSERT(!IN6_IS_ADDR_V4MAPPED((in6_addr_t *)faddr)); 6112 connp = ipcl_iptun_classify_v6((in6_addr_t *)faddr, 6113 (in6_addr_t *)laddr, ipst); 6114 } 6115 6116 if (connp == NULL) 6117 return (NULL); 6118 6119 if (IPCL_IS_IPTUN(connp)) { 6120 iptun = connp->conn_iptun; 6121 if (iptun != NULL) { 6122 itp = iptun->iptun_itp; 6123 if (itp != NULL) { 6124 /* Braces due to the macro's nature... */ 6125 ITP_REFHOLD(itp); 6126 } 6127 } /* Else itp is already NULL. */ 6128 } 6129 6130 CONN_DEC_REF(connp); 6131 return (itp); 6132 } 6133 6134 /* 6135 * Frag cache code, based on SunScreen 3.2 source 6136 * screen/kernel/common/screen_fragcache.c 6137 */ 6138 6139 #define IPSEC_FRAG_TTL_MAX 5 6140 /* 6141 * Note that the following parameters create 256 hash buckets 6142 * with 1024 free entries to be distributed. Things are cleaned 6143 * periodically and are attempted to be cleaned when there is no 6144 * free space, but this system errs on the side of dropping packets 6145 * over creating memory exhaustion. We may decide to make hash 6146 * factor a tunable if this proves to be a bad decision. 6147 */ 6148 #define IPSEC_FRAG_HASH_SLOTS (1<<8) 6149 #define IPSEC_FRAG_HASH_FACTOR 4 6150 #define IPSEC_FRAG_HASH_SIZE (IPSEC_FRAG_HASH_SLOTS * IPSEC_FRAG_HASH_FACTOR) 6151 6152 #define IPSEC_FRAG_HASH_MASK (IPSEC_FRAG_HASH_SLOTS - 1) 6153 #define IPSEC_FRAG_HASH_FUNC(id) (((id) & IPSEC_FRAG_HASH_MASK) ^ \ 6154 (((id) / \ 6155 (ushort_t)IPSEC_FRAG_HASH_SLOTS) & \ 6156 IPSEC_FRAG_HASH_MASK)) 6157 6158 /* Maximum fragments per packet. 48 bytes payload x 1366 packets > 64KB */ 6159 #define IPSEC_MAX_FRAGS 1366 6160 6161 #define V4_FRAG_OFFSET(ipha) ((ntohs(ipha->ipha_fragment_offset_and_flags) & \ 6162 IPH_OFFSET) << 3) 6163 #define V4_MORE_FRAGS(ipha) (ntohs(ipha->ipha_fragment_offset_and_flags) & \ 6164 IPH_MF) 6165 6166 /* 6167 * Initialize an ipsec fragcache instance. 6168 * Returns B_FALSE if memory allocation fails. 6169 */ 6170 boolean_t 6171 ipsec_fragcache_init(ipsec_fragcache_t *frag) 6172 { 6173 ipsec_fragcache_entry_t *ftemp; 6174 int i; 6175 6176 mutex_init(&frag->itpf_lock, NULL, MUTEX_DEFAULT, NULL); 6177 frag->itpf_ptr = (ipsec_fragcache_entry_t **) 6178 kmem_zalloc(sizeof (ipsec_fragcache_entry_t *) * 6179 IPSEC_FRAG_HASH_SLOTS, KM_NOSLEEP); 6180 if (frag->itpf_ptr == NULL) 6181 return (B_FALSE); 6182 6183 ftemp = (ipsec_fragcache_entry_t *) 6184 kmem_zalloc(sizeof (ipsec_fragcache_entry_t) * 6185 IPSEC_FRAG_HASH_SIZE, KM_NOSLEEP); 6186 if (ftemp == NULL) { 6187 kmem_free(frag->itpf_ptr, sizeof (ipsec_fragcache_entry_t *) * 6188 IPSEC_FRAG_HASH_SLOTS); 6189 return (B_FALSE); 6190 } 6191 6192 frag->itpf_freelist = NULL; 6193 6194 for (i = 0; i < IPSEC_FRAG_HASH_SIZE; i++) { 6195 ftemp->itpfe_next = frag->itpf_freelist; 6196 frag->itpf_freelist = ftemp; 6197 ftemp++; 6198 } 6199 6200 frag->itpf_expire_hint = 0; 6201 6202 return (B_TRUE); 6203 } 6204 6205 void 6206 ipsec_fragcache_uninit(ipsec_fragcache_t *frag, ipsec_stack_t *ipss) 6207 { 6208 ipsec_fragcache_entry_t *fep; 6209 int i; 6210 6211 mutex_enter(&frag->itpf_lock); 6212 if (frag->itpf_ptr) { 6213 /* Delete any existing fragcache entry chains */ 6214 for (i = 0; i < IPSEC_FRAG_HASH_SLOTS; i++) { 6215 fep = (frag->itpf_ptr)[i]; 6216 while (fep != NULL) { 6217 /* Returned fep is next in chain or NULL */ 6218 fep = fragcache_delentry(i, fep, frag, ipss); 6219 } 6220 } 6221 /* 6222 * Chase the pointers back to the beginning 6223 * of the memory allocation and then 6224 * get rid of the allocated freelist 6225 */ 6226 while (frag->itpf_freelist->itpfe_next != NULL) 6227 frag->itpf_freelist = frag->itpf_freelist->itpfe_next; 6228 /* 6229 * XXX - If we ever dynamically grow the freelist 6230 * then we'll have to free entries individually 6231 * or determine how many entries or chunks we have 6232 * grown since the initial allocation. 6233 */ 6234 kmem_free(frag->itpf_freelist, 6235 sizeof (ipsec_fragcache_entry_t) * 6236 IPSEC_FRAG_HASH_SIZE); 6237 /* Free the fragcache structure */ 6238 kmem_free(frag->itpf_ptr, 6239 sizeof (ipsec_fragcache_entry_t *) * 6240 IPSEC_FRAG_HASH_SLOTS); 6241 } 6242 mutex_exit(&frag->itpf_lock); 6243 mutex_destroy(&frag->itpf_lock); 6244 } 6245 6246 /* 6247 * Add a fragment to the fragment cache. Consumes mp if NULL is returned. 6248 * Returns mp if a whole fragment has been assembled, NULL otherwise 6249 * The returned mp could be a b_next chain of fragments. 6250 * 6251 * The iramp argument is set on inbound; NULL if outbound. 6252 */ 6253 mblk_t * 6254 ipsec_fragcache_add(ipsec_fragcache_t *frag, mblk_t *iramp, mblk_t *mp, 6255 int outer_hdr_len, ipsec_stack_t *ipss) 6256 { 6257 boolean_t is_v4; 6258 time_t itpf_time; 6259 ipha_t *iph; 6260 ipha_t *oiph; 6261 ip6_t *ip6h = NULL; 6262 uint8_t v6_proto; 6263 uint8_t *v6_proto_p; 6264 uint16_t ip6_hdr_length; 6265 ip_pkt_t ipp; 6266 ip6_frag_t *fraghdr; 6267 ipsec_fragcache_entry_t *fep; 6268 int i; 6269 mblk_t *nmp, *prevmp; 6270 int firstbyte, lastbyte; 6271 int offset; 6272 int last; 6273 boolean_t inbound = (iramp != NULL); 6274 6275 #ifdef FRAGCACHE_DEBUG 6276 cmn_err(CE_WARN, "Fragcache: %s\n", inbound ? "INBOUND" : "OUTBOUND"); 6277 #endif 6278 /* 6279 * You're on the slow path, so insure that every packet in the 6280 * cache is a single-mblk one. 6281 */ 6282 if (mp->b_cont != NULL) { 6283 nmp = msgpullup(mp, -1); 6284 if (nmp == NULL) { 6285 ip_drop_packet(mp, inbound, NULL, 6286 DROPPER(ipss, ipds_spd_nomem), 6287 &ipss->ipsec_spd_dropper); 6288 if (inbound) 6289 (void) ip_recv_attr_free_mblk(iramp); 6290 return (NULL); 6291 } 6292 freemsg(mp); 6293 mp = nmp; 6294 } 6295 6296 mutex_enter(&frag->itpf_lock); 6297 6298 oiph = (ipha_t *)mp->b_rptr; 6299 iph = (ipha_t *)(mp->b_rptr + outer_hdr_len); 6300 6301 if (IPH_HDR_VERSION(iph) == IPV4_VERSION) { 6302 is_v4 = B_TRUE; 6303 } else { 6304 ASSERT(IPH_HDR_VERSION(iph) == IPV6_VERSION); 6305 ip6h = (ip6_t *)(mp->b_rptr + outer_hdr_len); 6306 6307 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &ip6_hdr_length, 6308 &v6_proto_p)) { 6309 /* 6310 * Find upper layer protocol. 6311 * If it fails we have a malformed packet 6312 */ 6313 mutex_exit(&frag->itpf_lock); 6314 ip_drop_packet(mp, inbound, NULL, 6315 DROPPER(ipss, ipds_spd_malformed_packet), 6316 &ipss->ipsec_spd_dropper); 6317 if (inbound) 6318 (void) ip_recv_attr_free_mblk(iramp); 6319 return (NULL); 6320 } else { 6321 v6_proto = *v6_proto_p; 6322 } 6323 6324 6325 bzero(&ipp, sizeof (ipp)); 6326 (void) ip_find_hdr_v6(mp, ip6h, B_FALSE, &ipp, NULL); 6327 if (!(ipp.ipp_fields & IPPF_FRAGHDR)) { 6328 /* 6329 * We think this is a fragment, but didn't find 6330 * a fragment header. Something is wrong. 6331 */ 6332 mutex_exit(&frag->itpf_lock); 6333 ip_drop_packet(mp, inbound, NULL, 6334 DROPPER(ipss, ipds_spd_malformed_frag), 6335 &ipss->ipsec_spd_dropper); 6336 if (inbound) 6337 (void) ip_recv_attr_free_mblk(iramp); 6338 return (NULL); 6339 } 6340 fraghdr = ipp.ipp_fraghdr; 6341 is_v4 = B_FALSE; 6342 } 6343 6344 /* Anything to cleanup? */ 6345 6346 /* 6347 * This cleanup call could be put in a timer loop 6348 * but it may actually be just as reasonable a decision to 6349 * leave it here. The disadvantage is this only gets called when 6350 * frags are added. The advantage is that it is not 6351 * susceptible to race conditions like a time-based cleanup 6352 * may be. 6353 */ 6354 itpf_time = gethrestime_sec(); 6355 if (itpf_time >= frag->itpf_expire_hint) 6356 ipsec_fragcache_clean(frag, ipss); 6357 6358 /* Lookup to see if there is an existing entry */ 6359 6360 if (is_v4) 6361 i = IPSEC_FRAG_HASH_FUNC(iph->ipha_ident); 6362 else 6363 i = IPSEC_FRAG_HASH_FUNC(fraghdr->ip6f_ident); 6364 6365 for (fep = (frag->itpf_ptr)[i]; fep; fep = fep->itpfe_next) { 6366 if (is_v4) { 6367 ASSERT(iph != NULL); 6368 if ((fep->itpfe_id == iph->ipha_ident) && 6369 (fep->itpfe_src == iph->ipha_src) && 6370 (fep->itpfe_dst == iph->ipha_dst) && 6371 (fep->itpfe_proto == iph->ipha_protocol)) 6372 break; 6373 } else { 6374 ASSERT(fraghdr != NULL); 6375 ASSERT(fep != NULL); 6376 if ((fep->itpfe_id == fraghdr->ip6f_ident) && 6377 IN6_ARE_ADDR_EQUAL(&fep->itpfe_src6, 6378 &ip6h->ip6_src) && 6379 IN6_ARE_ADDR_EQUAL(&fep->itpfe_dst6, 6380 &ip6h->ip6_dst) && (fep->itpfe_proto == v6_proto)) 6381 break; 6382 } 6383 } 6384 6385 if (is_v4) { 6386 firstbyte = V4_FRAG_OFFSET(iph); 6387 lastbyte = firstbyte + ntohs(iph->ipha_length) - 6388 IPH_HDR_LENGTH(iph); 6389 last = (V4_MORE_FRAGS(iph) == 0); 6390 #ifdef FRAGCACHE_DEBUG 6391 cmn_err(CE_WARN, "V4 fragcache: firstbyte = %d, lastbyte = %d, " 6392 "is_last_frag = %d, id = %d, mp = %p\n", firstbyte, 6393 lastbyte, last, iph->ipha_ident, mp); 6394 #endif 6395 } else { 6396 firstbyte = ntohs(fraghdr->ip6f_offlg & IP6F_OFF_MASK); 6397 lastbyte = firstbyte + ntohs(ip6h->ip6_plen) + 6398 sizeof (ip6_t) - ip6_hdr_length; 6399 last = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG) == 0; 6400 #ifdef FRAGCACHE_DEBUG 6401 cmn_err(CE_WARN, "V6 fragcache: firstbyte = %d, lastbyte = %d, " 6402 "is_last_frag = %d, id = %d, fraghdr = %p, mp = %p\n", 6403 firstbyte, lastbyte, last, fraghdr->ip6f_ident, fraghdr, 6404 mp); 6405 #endif 6406 } 6407 6408 /* check for bogus fragments and delete the entry */ 6409 if (firstbyte > 0 && firstbyte <= 8) { 6410 if (fep != NULL) 6411 (void) fragcache_delentry(i, fep, frag, ipss); 6412 mutex_exit(&frag->itpf_lock); 6413 ip_drop_packet(mp, inbound, NULL, 6414 DROPPER(ipss, ipds_spd_malformed_frag), 6415 &ipss->ipsec_spd_dropper); 6416 if (inbound) 6417 (void) ip_recv_attr_free_mblk(iramp); 6418 return (NULL); 6419 } 6420 6421 /* Not found, allocate a new entry */ 6422 if (fep == NULL) { 6423 if (frag->itpf_freelist == NULL) { 6424 /* see if there is some space */ 6425 ipsec_fragcache_clean(frag, ipss); 6426 if (frag->itpf_freelist == NULL) { 6427 mutex_exit(&frag->itpf_lock); 6428 ip_drop_packet(mp, inbound, NULL, 6429 DROPPER(ipss, ipds_spd_nomem), 6430 &ipss->ipsec_spd_dropper); 6431 if (inbound) 6432 (void) ip_recv_attr_free_mblk(iramp); 6433 return (NULL); 6434 } 6435 } 6436 6437 fep = frag->itpf_freelist; 6438 frag->itpf_freelist = fep->itpfe_next; 6439 6440 if (is_v4) { 6441 bcopy((caddr_t)&iph->ipha_src, (caddr_t)&fep->itpfe_src, 6442 sizeof (struct in_addr)); 6443 bcopy((caddr_t)&iph->ipha_dst, (caddr_t)&fep->itpfe_dst, 6444 sizeof (struct in_addr)); 6445 fep->itpfe_id = iph->ipha_ident; 6446 fep->itpfe_proto = iph->ipha_protocol; 6447 i = IPSEC_FRAG_HASH_FUNC(fep->itpfe_id); 6448 } else { 6449 bcopy((in6_addr_t *)&ip6h->ip6_src, 6450 (in6_addr_t *)&fep->itpfe_src6, 6451 sizeof (struct in6_addr)); 6452 bcopy((in6_addr_t *)&ip6h->ip6_dst, 6453 (in6_addr_t *)&fep->itpfe_dst6, 6454 sizeof (struct in6_addr)); 6455 fep->itpfe_id = fraghdr->ip6f_ident; 6456 fep->itpfe_proto = v6_proto; 6457 i = IPSEC_FRAG_HASH_FUNC(fep->itpfe_id); 6458 } 6459 itpf_time = gethrestime_sec(); 6460 fep->itpfe_exp = itpf_time + IPSEC_FRAG_TTL_MAX + 1; 6461 fep->itpfe_last = 0; 6462 fep->itpfe_fraglist = NULL; 6463 fep->itpfe_depth = 0; 6464 fep->itpfe_next = (frag->itpf_ptr)[i]; 6465 (frag->itpf_ptr)[i] = fep; 6466 6467 if (frag->itpf_expire_hint > fep->itpfe_exp) 6468 frag->itpf_expire_hint = fep->itpfe_exp; 6469 6470 } 6471 6472 /* Insert it in the frag list */ 6473 /* List is in order by starting offset of fragments */ 6474 6475 prevmp = NULL; 6476 for (nmp = fep->itpfe_fraglist; nmp; nmp = nmp->b_next) { 6477 ipha_t *niph; 6478 ipha_t *oniph; 6479 ip6_t *nip6h; 6480 ip_pkt_t nipp; 6481 ip6_frag_t *nfraghdr; 6482 uint16_t nip6_hdr_length; 6483 uint8_t *nv6_proto_p; 6484 int nfirstbyte, nlastbyte; 6485 char *data, *ndata; 6486 mblk_t *ndata_mp = (inbound ? nmp->b_cont : nmp); 6487 int hdr_len; 6488 6489 oniph = (ipha_t *)mp->b_rptr; 6490 nip6h = NULL; 6491 niph = NULL; 6492 6493 /* 6494 * Determine outer header type and length and set 6495 * pointers appropriately 6496 */ 6497 6498 if (IPH_HDR_VERSION(oniph) == IPV4_VERSION) { 6499 hdr_len = ((outer_hdr_len != 0) ? 6500 IPH_HDR_LENGTH(oiph) : 0); 6501 niph = (ipha_t *)(ndata_mp->b_rptr + hdr_len); 6502 } else { 6503 ASSERT(IPH_HDR_VERSION(oniph) == IPV6_VERSION); 6504 ASSERT(ndata_mp->b_cont == NULL); 6505 nip6h = (ip6_t *)ndata_mp->b_rptr; 6506 (void) ip_hdr_length_nexthdr_v6(ndata_mp, nip6h, 6507 &nip6_hdr_length, &v6_proto_p); 6508 hdr_len = ((outer_hdr_len != 0) ? nip6_hdr_length : 0); 6509 } 6510 6511 /* 6512 * Determine inner header type and length and set 6513 * pointers appropriately 6514 */ 6515 6516 if (is_v4) { 6517 if (niph == NULL) { 6518 /* Was v6 outer */ 6519 niph = (ipha_t *)(ndata_mp->b_rptr + hdr_len); 6520 } 6521 nfirstbyte = V4_FRAG_OFFSET(niph); 6522 nlastbyte = nfirstbyte + ntohs(niph->ipha_length) - 6523 IPH_HDR_LENGTH(niph); 6524 } else { 6525 ASSERT(ndata_mp->b_cont == NULL); 6526 nip6h = (ip6_t *)(ndata_mp->b_rptr + hdr_len); 6527 if (!ip_hdr_length_nexthdr_v6(ndata_mp, nip6h, 6528 &nip6_hdr_length, &nv6_proto_p)) { 6529 mutex_exit(&frag->itpf_lock); 6530 ip_drop_packet_chain(nmp, inbound, NULL, 6531 DROPPER(ipss, ipds_spd_malformed_frag), 6532 &ipss->ipsec_spd_dropper); 6533 ipsec_freemsg_chain(ndata_mp); 6534 if (inbound) 6535 (void) ip_recv_attr_free_mblk(iramp); 6536 return (NULL); 6537 } 6538 bzero(&nipp, sizeof (nipp)); 6539 (void) ip_find_hdr_v6(ndata_mp, nip6h, B_FALSE, &nipp, 6540 NULL); 6541 nfraghdr = nipp.ipp_fraghdr; 6542 nfirstbyte = ntohs(nfraghdr->ip6f_offlg & 6543 IP6F_OFF_MASK); 6544 nlastbyte = nfirstbyte + ntohs(nip6h->ip6_plen) + 6545 sizeof (ip6_t) - nip6_hdr_length; 6546 } 6547 6548 /* Check for overlapping fragments */ 6549 if (firstbyte >= nfirstbyte && firstbyte < nlastbyte) { 6550 /* 6551 * Overlap Check: 6552 * ~~~~--------- # Check if the newly 6553 * ~ ndata_mp| # received fragment 6554 * ~~~~--------- # overlaps with the 6555 * ---------~~~~~~ # current fragment. 6556 * | mp ~ 6557 * ---------~~~~~~ 6558 */ 6559 if (is_v4) { 6560 data = (char *)iph + IPH_HDR_LENGTH(iph) + 6561 firstbyte - nfirstbyte; 6562 ndata = (char *)niph + IPH_HDR_LENGTH(niph); 6563 } else { 6564 data = (char *)ip6h + 6565 nip6_hdr_length + firstbyte - 6566 nfirstbyte; 6567 ndata = (char *)nip6h + nip6_hdr_length; 6568 } 6569 if (bcmp(data, ndata, MIN(lastbyte, nlastbyte) - 6570 firstbyte)) { 6571 /* Overlapping data does not match */ 6572 (void) fragcache_delentry(i, fep, frag, ipss); 6573 mutex_exit(&frag->itpf_lock); 6574 ip_drop_packet(mp, inbound, NULL, 6575 DROPPER(ipss, ipds_spd_overlap_frag), 6576 &ipss->ipsec_spd_dropper); 6577 if (inbound) 6578 (void) ip_recv_attr_free_mblk(iramp); 6579 return (NULL); 6580 } 6581 /* Part of defense for jolt2.c fragmentation attack */ 6582 if (firstbyte >= nfirstbyte && lastbyte <= nlastbyte) { 6583 /* 6584 * Check for identical or subset fragments: 6585 * ---------- ~~~~--------~~~~~ 6586 * | nmp | or ~ nmp ~ 6587 * ---------- ~~~~--------~~~~~ 6588 * ---------- ------ 6589 * | mp | | mp | 6590 * ---------- ------ 6591 */ 6592 mutex_exit(&frag->itpf_lock); 6593 ip_drop_packet(mp, inbound, NULL, 6594 DROPPER(ipss, ipds_spd_evil_frag), 6595 &ipss->ipsec_spd_dropper); 6596 if (inbound) 6597 (void) ip_recv_attr_free_mblk(iramp); 6598 return (NULL); 6599 } 6600 6601 } 6602 6603 /* Correct location for this fragment? */ 6604 if (firstbyte <= nfirstbyte) { 6605 /* 6606 * Check if the tail end of the new fragment overlaps 6607 * with the head of the current fragment. 6608 * --------~~~~~~~ 6609 * | nmp ~ 6610 * --------~~~~~~~ 6611 * ~~~~~-------- 6612 * ~ mp | 6613 * ~~~~~-------- 6614 */ 6615 if (lastbyte > nfirstbyte) { 6616 /* Fragments overlap */ 6617 data = (char *)iph + IPH_HDR_LENGTH(iph) + 6618 firstbyte - nfirstbyte; 6619 ndata = (char *)niph + IPH_HDR_LENGTH(niph); 6620 if (is_v4) { 6621 data = (char *)iph + 6622 IPH_HDR_LENGTH(iph) + firstbyte - 6623 nfirstbyte; 6624 ndata = (char *)niph + 6625 IPH_HDR_LENGTH(niph); 6626 } else { 6627 data = (char *)ip6h + 6628 nip6_hdr_length + firstbyte - 6629 nfirstbyte; 6630 ndata = (char *)nip6h + nip6_hdr_length; 6631 } 6632 if (bcmp(data, ndata, MIN(lastbyte, nlastbyte) 6633 - nfirstbyte)) { 6634 /* Overlap mismatch */ 6635 (void) fragcache_delentry(i, fep, frag, 6636 ipss); 6637 mutex_exit(&frag->itpf_lock); 6638 ip_drop_packet(mp, inbound, NULL, 6639 DROPPER(ipss, 6640 ipds_spd_overlap_frag), 6641 &ipss->ipsec_spd_dropper); 6642 if (inbound) { 6643 (void) ip_recv_attr_free_mblk( 6644 iramp); 6645 } 6646 return (NULL); 6647 } 6648 } 6649 6650 /* 6651 * Fragment does not illegally overlap and can now 6652 * be inserted into the chain 6653 */ 6654 break; 6655 } 6656 6657 prevmp = nmp; 6658 } 6659 /* Prepend the attributes before we link it in */ 6660 if (iramp != NULL) { 6661 ASSERT(iramp->b_cont == NULL); 6662 iramp->b_cont = mp; 6663 mp = iramp; 6664 iramp = NULL; 6665 } 6666 mp->b_next = nmp; 6667 6668 if (prevmp == NULL) { 6669 fep->itpfe_fraglist = mp; 6670 } else { 6671 prevmp->b_next = mp; 6672 } 6673 if (last) 6674 fep->itpfe_last = 1; 6675 6676 /* Part of defense for jolt2.c fragmentation attack */ 6677 if (++(fep->itpfe_depth) > IPSEC_MAX_FRAGS) { 6678 (void) fragcache_delentry(i, fep, frag, ipss); 6679 mutex_exit(&frag->itpf_lock); 6680 if (inbound) 6681 mp = ip_recv_attr_free_mblk(mp); 6682 6683 ip_drop_packet(mp, inbound, NULL, 6684 DROPPER(ipss, ipds_spd_max_frags), 6685 &ipss->ipsec_spd_dropper); 6686 return (NULL); 6687 } 6688 6689 /* Check for complete packet */ 6690 6691 if (!fep->itpfe_last) { 6692 mutex_exit(&frag->itpf_lock); 6693 #ifdef FRAGCACHE_DEBUG 6694 cmn_err(CE_WARN, "Fragment cached, last not yet seen.\n"); 6695 #endif 6696 return (NULL); 6697 } 6698 6699 offset = 0; 6700 for (mp = fep->itpfe_fraglist; mp; mp = mp->b_next) { 6701 mblk_t *data_mp = (inbound ? mp->b_cont : mp); 6702 int hdr_len; 6703 6704 oiph = (ipha_t *)data_mp->b_rptr; 6705 ip6h = NULL; 6706 iph = NULL; 6707 6708 if (IPH_HDR_VERSION(oiph) == IPV4_VERSION) { 6709 hdr_len = ((outer_hdr_len != 0) ? 6710 IPH_HDR_LENGTH(oiph) : 0); 6711 iph = (ipha_t *)(data_mp->b_rptr + hdr_len); 6712 } else { 6713 ASSERT(IPH_HDR_VERSION(oiph) == IPV6_VERSION); 6714 ASSERT(data_mp->b_cont == NULL); 6715 ip6h = (ip6_t *)data_mp->b_rptr; 6716 (void) ip_hdr_length_nexthdr_v6(data_mp, ip6h, 6717 &ip6_hdr_length, &v6_proto_p); 6718 hdr_len = ((outer_hdr_len != 0) ? ip6_hdr_length : 0); 6719 } 6720 6721 /* Calculate current fragment start/end */ 6722 if (is_v4) { 6723 if (iph == NULL) { 6724 /* Was v6 outer */ 6725 iph = (ipha_t *)(data_mp->b_rptr + hdr_len); 6726 } 6727 firstbyte = V4_FRAG_OFFSET(iph); 6728 lastbyte = firstbyte + ntohs(iph->ipha_length) - 6729 IPH_HDR_LENGTH(iph); 6730 } else { 6731 ASSERT(data_mp->b_cont == NULL); 6732 ip6h = (ip6_t *)(data_mp->b_rptr + hdr_len); 6733 if (!ip_hdr_length_nexthdr_v6(data_mp, ip6h, 6734 &ip6_hdr_length, &v6_proto_p)) { 6735 mutex_exit(&frag->itpf_lock); 6736 ip_drop_packet_chain(mp, inbound, NULL, 6737 DROPPER(ipss, ipds_spd_malformed_frag), 6738 &ipss->ipsec_spd_dropper); 6739 return (NULL); 6740 } 6741 v6_proto = *v6_proto_p; 6742 bzero(&ipp, sizeof (ipp)); 6743 (void) ip_find_hdr_v6(data_mp, ip6h, B_FALSE, &ipp, 6744 NULL); 6745 fraghdr = ipp.ipp_fraghdr; 6746 firstbyte = ntohs(fraghdr->ip6f_offlg & 6747 IP6F_OFF_MASK); 6748 lastbyte = firstbyte + ntohs(ip6h->ip6_plen) + 6749 sizeof (ip6_t) - ip6_hdr_length; 6750 } 6751 6752 /* 6753 * If this fragment is greater than current offset, 6754 * we have a missing fragment so return NULL 6755 */ 6756 if (firstbyte > offset) { 6757 mutex_exit(&frag->itpf_lock); 6758 #ifdef FRAGCACHE_DEBUG 6759 /* 6760 * Note, this can happen when the last frag 6761 * gets sent through because it is smaller 6762 * than the MTU. It is not necessarily an 6763 * error condition. 6764 */ 6765 cmn_err(CE_WARN, "Frag greater than offset! : " 6766 "missing fragment: firstbyte = %d, offset = %d, " 6767 "mp = %p\n", firstbyte, offset, mp); 6768 #endif 6769 return (NULL); 6770 } 6771 #ifdef FRAGCACHE_DEBUG 6772 cmn_err(CE_WARN, "Frag offsets : " 6773 "firstbyte = %d, offset = %d, mp = %p\n", 6774 firstbyte, offset, mp); 6775 #endif 6776 6777 /* 6778 * If we are at the last fragment, we have the complete 6779 * packet, so rechain things and return it to caller 6780 * for processing 6781 */ 6782 6783 if ((is_v4 && !V4_MORE_FRAGS(iph)) || 6784 (!is_v4 && !(fraghdr->ip6f_offlg & IP6F_MORE_FRAG))) { 6785 mp = fep->itpfe_fraglist; 6786 fep->itpfe_fraglist = NULL; 6787 (void) fragcache_delentry(i, fep, frag, ipss); 6788 mutex_exit(&frag->itpf_lock); 6789 6790 if ((is_v4 && (firstbyte + ntohs(iph->ipha_length) > 6791 65535)) || (!is_v4 && (firstbyte + 6792 ntohs(ip6h->ip6_plen) > 65535))) { 6793 /* It is an invalid "ping-o-death" packet */ 6794 /* Discard it */ 6795 ip_drop_packet_chain(mp, inbound, NULL, 6796 DROPPER(ipss, ipds_spd_evil_frag), 6797 &ipss->ipsec_spd_dropper); 6798 return (NULL); 6799 } 6800 #ifdef FRAGCACHE_DEBUG 6801 cmn_err(CE_WARN, "Fragcache returning mp = %p, " 6802 "mp->b_next = %p", mp, mp->b_next); 6803 #endif 6804 /* 6805 * For inbound case, mp has attrmp b_next'd chain 6806 * For outbound case, it is just data mp chain 6807 */ 6808 return (mp); 6809 } 6810 6811 /* 6812 * Update new ending offset if this 6813 * fragment extends the packet 6814 */ 6815 if (offset < lastbyte) 6816 offset = lastbyte; 6817 } 6818 6819 mutex_exit(&frag->itpf_lock); 6820 6821 /* Didn't find last fragment, so return NULL */ 6822 return (NULL); 6823 } 6824 6825 static void 6826 ipsec_fragcache_clean(ipsec_fragcache_t *frag, ipsec_stack_t *ipss) 6827 { 6828 ipsec_fragcache_entry_t *fep; 6829 int i; 6830 ipsec_fragcache_entry_t *earlyfep = NULL; 6831 time_t itpf_time; 6832 int earlyexp; 6833 int earlyi = 0; 6834 6835 ASSERT(MUTEX_HELD(&frag->itpf_lock)); 6836 6837 itpf_time = gethrestime_sec(); 6838 earlyexp = itpf_time + 10000; 6839 6840 for (i = 0; i < IPSEC_FRAG_HASH_SLOTS; i++) { 6841 fep = (frag->itpf_ptr)[i]; 6842 while (fep) { 6843 if (fep->itpfe_exp < itpf_time) { 6844 /* found */ 6845 fep = fragcache_delentry(i, fep, frag, ipss); 6846 } else { 6847 if (fep->itpfe_exp < earlyexp) { 6848 earlyfep = fep; 6849 earlyexp = fep->itpfe_exp; 6850 earlyi = i; 6851 } 6852 fep = fep->itpfe_next; 6853 } 6854 } 6855 } 6856 6857 frag->itpf_expire_hint = earlyexp; 6858 6859 /* if (!found) */ 6860 if (frag->itpf_freelist == NULL) 6861 (void) fragcache_delentry(earlyi, earlyfep, frag, ipss); 6862 } 6863 6864 static ipsec_fragcache_entry_t * 6865 fragcache_delentry(int slot, ipsec_fragcache_entry_t *fep, 6866 ipsec_fragcache_t *frag, ipsec_stack_t *ipss) 6867 { 6868 ipsec_fragcache_entry_t *targp; 6869 ipsec_fragcache_entry_t *nextp = fep->itpfe_next; 6870 6871 ASSERT(MUTEX_HELD(&frag->itpf_lock)); 6872 6873 /* Free up any fragment list still in cache entry */ 6874 if (fep->itpfe_fraglist != NULL) { 6875 ip_drop_packet_chain(fep->itpfe_fraglist, 6876 ip_recv_attr_is_mblk(fep->itpfe_fraglist), NULL, 6877 DROPPER(ipss, ipds_spd_expired_frags), 6878 &ipss->ipsec_spd_dropper); 6879 } 6880 fep->itpfe_fraglist = NULL; 6881 6882 targp = (frag->itpf_ptr)[slot]; 6883 ASSERT(targp != 0); 6884 6885 if (targp == fep) { 6886 /* unlink from head of hash chain */ 6887 (frag->itpf_ptr)[slot] = nextp; 6888 /* link into free list */ 6889 fep->itpfe_next = frag->itpf_freelist; 6890 frag->itpf_freelist = fep; 6891 return (nextp); 6892 } 6893 6894 /* maybe should use double linked list to make update faster */ 6895 /* must be past front of chain */ 6896 while (targp) { 6897 if (targp->itpfe_next == fep) { 6898 /* unlink from hash chain */ 6899 targp->itpfe_next = nextp; 6900 /* link into free list */ 6901 fep->itpfe_next = frag->itpf_freelist; 6902 frag->itpf_freelist = fep; 6903 return (nextp); 6904 } 6905 targp = targp->itpfe_next; 6906 ASSERT(targp != 0); 6907 } 6908 /* NOTREACHED */ 6909 return (NULL); 6910 } 6911