1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * Copyright (c) 2012 Nexenta Systems, Inc. All rights reserved. 25 * Copyright (c) 2016 by Delphix. All rights reserved. 26 * Copyright (c) 2018, Joyent, Inc. 27 */ 28 29 /* 30 * IPsec Security Policy Database. 31 * 32 * This module maintains the SPD and provides routines used by ip and ip6 33 * to apply IPsec policy to inbound and outbound datagrams. 34 */ 35 36 #include <sys/types.h> 37 #include <sys/stream.h> 38 #include <sys/stropts.h> 39 #include <sys/sysmacros.h> 40 #include <sys/strsubr.h> 41 #include <sys/strsun.h> 42 #include <sys/strlog.h> 43 #include <sys/strsun.h> 44 #include <sys/cmn_err.h> 45 #include <sys/zone.h> 46 47 #include <sys/systm.h> 48 #include <sys/param.h> 49 #include <sys/kmem.h> 50 #include <sys/ddi.h> 51 52 #include <sys/crypto/api.h> 53 54 #include <inet/common.h> 55 #include <inet/mi.h> 56 57 #include <netinet/ip6.h> 58 #include <netinet/icmp6.h> 59 #include <netinet/udp.h> 60 61 #include <inet/ip.h> 62 #include <inet/ip6.h> 63 64 #include <net/pfkeyv2.h> 65 #include <net/pfpolicy.h> 66 #include <inet/sadb.h> 67 #include <inet/ipsec_impl.h> 68 69 #include <inet/ip_impl.h> /* For IP_MOD_ID */ 70 71 #include <inet/ipsecah.h> 72 #include <inet/ipsecesp.h> 73 #include <inet/ipdrop.h> 74 #include <inet/ipclassifier.h> 75 #include <inet/iptun.h> 76 #include <inet/iptun/iptun_impl.h> 77 78 static void ipsec_update_present_flags(ipsec_stack_t *); 79 static ipsec_act_t *ipsec_act_wildcard_expand(ipsec_act_t *, uint_t *, 80 netstack_t *); 81 static mblk_t *ipsec_check_ipsecin_policy(mblk_t *, ipsec_policy_t *, 82 ipha_t *, ip6_t *, uint64_t, ip_recv_attr_t *, netstack_t *); 83 static void ipsec_action_free_table(ipsec_action_t *); 84 static void ipsec_action_reclaim(void *); 85 static void ipsec_action_reclaim_stack(ipsec_stack_t *); 86 static void ipsid_init(netstack_t *); 87 static void ipsid_fini(netstack_t *); 88 89 /* sel_flags values for ipsec_init_inbound_sel(). */ 90 #define SEL_NONE 0x0000 91 #define SEL_PORT_POLICY 0x0001 92 #define SEL_IS_ICMP 0x0002 93 #define SEL_TUNNEL_MODE 0x0004 94 #define SEL_POST_FRAG 0x0008 95 96 /* Return values for ipsec_init_inbound_sel(). */ 97 typedef enum { SELRET_NOMEM, SELRET_BADPKT, SELRET_SUCCESS, SELRET_TUNFRAG} 98 selret_t; 99 100 static selret_t ipsec_init_inbound_sel(ipsec_selector_t *, mblk_t *, 101 ipha_t *, ip6_t *, uint8_t); 102 103 static boolean_t ipsec_check_ipsecin_action(ip_recv_attr_t *, mblk_t *, 104 struct ipsec_action_s *, ipha_t *ipha, ip6_t *ip6h, const char **, 105 kstat_named_t **, netstack_t *); 106 static void ipsec_unregister_prov_update(void); 107 static void ipsec_prov_update_callback_stack(uint32_t, void *, netstack_t *); 108 static boolean_t ipsec_compare_action(ipsec_policy_t *, ipsec_policy_t *); 109 static uint32_t selector_hash(ipsec_selector_t *, ipsec_policy_root_t *); 110 static boolean_t ipsec_kstat_init(ipsec_stack_t *); 111 static void ipsec_kstat_destroy(ipsec_stack_t *); 112 static int ipsec_free_tables(ipsec_stack_t *); 113 static int tunnel_compare(const void *, const void *); 114 static void ipsec_freemsg_chain(mblk_t *); 115 static void ip_drop_packet_chain(mblk_t *, boolean_t, ill_t *, 116 struct kstat_named *, ipdropper_t *); 117 static boolean_t ipsec_kstat_init(ipsec_stack_t *); 118 static void ipsec_kstat_destroy(ipsec_stack_t *); 119 static int ipsec_free_tables(ipsec_stack_t *); 120 static int tunnel_compare(const void *, const void *); 121 static void ipsec_freemsg_chain(mblk_t *); 122 123 /* 124 * Selector hash table is statically sized at module load time. 125 * we default to 251 buckets, which is the largest prime number under 255 126 */ 127 128 #define IPSEC_SPDHASH_DEFAULT 251 129 130 /* SPD hash-size tunable per tunnel. */ 131 #define TUN_SPDHASH_DEFAULT 5 132 133 uint32_t ipsec_spd_hashsize; 134 uint32_t tun_spd_hashsize; 135 136 #define IPSEC_SEL_NOHASH ((uint32_t)(~0)) 137 138 /* 139 * Handle global across all stack instances 140 */ 141 static crypto_notify_handle_t prov_update_handle = NULL; 142 143 static kmem_cache_t *ipsec_action_cache; 144 static kmem_cache_t *ipsec_sel_cache; 145 static kmem_cache_t *ipsec_pol_cache; 146 147 /* Frag cache prototypes */ 148 static void ipsec_fragcache_clean(ipsec_fragcache_t *, ipsec_stack_t *); 149 static ipsec_fragcache_entry_t *fragcache_delentry(int, 150 ipsec_fragcache_entry_t *, ipsec_fragcache_t *, ipsec_stack_t *); 151 boolean_t ipsec_fragcache_init(ipsec_fragcache_t *); 152 void ipsec_fragcache_uninit(ipsec_fragcache_t *, ipsec_stack_t *ipss); 153 mblk_t *ipsec_fragcache_add(ipsec_fragcache_t *, mblk_t *, mblk_t *, 154 int, ipsec_stack_t *); 155 156 int ipsec_hdr_pullup_needed = 0; 157 int ipsec_weird_null_inbound_policy = 0; 158 159 #define ALGBITS_ROUND_DOWN(x, align) (((x)/(align))*(align)) 160 #define ALGBITS_ROUND_UP(x, align) ALGBITS_ROUND_DOWN((x)+(align)-1, align) 161 162 /* 163 * Inbound traffic should have matching identities for both SA's. 164 */ 165 166 #define SA_IDS_MATCH(sa1, sa2) \ 167 (((sa1) == NULL) || ((sa2) == NULL) || \ 168 (((sa1)->ipsa_src_cid == (sa2)->ipsa_src_cid) && \ 169 (((sa1)->ipsa_dst_cid == (sa2)->ipsa_dst_cid)))) 170 171 /* 172 * IPv6 Fragments 173 */ 174 #define IS_V6_FRAGMENT(ipp) (ipp.ipp_fields & IPPF_FRAGHDR) 175 176 /* 177 * Policy failure messages. 178 */ 179 static char *ipsec_policy_failure_msgs[] = { 180 181 /* IPSEC_POLICY_NOT_NEEDED */ 182 "%s: Dropping the datagram because the incoming packet " 183 "is %s, but the recipient expects clear; Source %s, " 184 "Destination %s.\n", 185 186 /* IPSEC_POLICY_MISMATCH */ 187 "%s: Policy Failure for the incoming packet (%s); Source %s, " 188 "Destination %s.\n", 189 190 /* IPSEC_POLICY_AUTH_NOT_NEEDED */ 191 "%s: Authentication present while not expected in the " 192 "incoming %s packet; Source %s, Destination %s.\n", 193 194 /* IPSEC_POLICY_ENCR_NOT_NEEDED */ 195 "%s: Encryption present while not expected in the " 196 "incoming %s packet; Source %s, Destination %s.\n", 197 198 /* IPSEC_POLICY_SE_NOT_NEEDED */ 199 "%s: Self-Encapsulation present while not expected in the " 200 "incoming %s packet; Source %s, Destination %s.\n", 201 }; 202 203 /* 204 * General overviews: 205 * 206 * Locking: 207 * 208 * All of the system policy structures are protected by a single 209 * rwlock. These structures are threaded in a 210 * fairly complex fashion and are not expected to change on a 211 * regular basis, so this should not cause scaling/contention 212 * problems. As a result, policy checks should (hopefully) be MT-hot. 213 * 214 * Allocation policy: 215 * 216 * We use custom kmem cache types for the various 217 * bits & pieces of the policy data structures. All allocations 218 * use KM_NOSLEEP instead of KM_SLEEP for policy allocation. The 219 * policy table is of potentially unbounded size, so we don't 220 * want to provide a way to hog all system memory with policy 221 * entries.. 222 */ 223 224 /* Convenient functions for freeing or dropping a b_next linked mblk chain */ 225 226 /* Free all messages in an mblk chain */ 227 static void 228 ipsec_freemsg_chain(mblk_t *mp) 229 { 230 mblk_t *mpnext; 231 while (mp != NULL) { 232 ASSERT(mp->b_prev == NULL); 233 mpnext = mp->b_next; 234 mp->b_next = NULL; 235 freemsg(mp); 236 mp = mpnext; 237 } 238 } 239 240 /* 241 * ip_drop all messages in an mblk chain 242 * Can handle a b_next chain of ip_recv_attr_t mblks, or just a b_next chain 243 * of data. 244 */ 245 static void 246 ip_drop_packet_chain(mblk_t *mp, boolean_t inbound, ill_t *ill, 247 struct kstat_named *counter, ipdropper_t *who_called) 248 { 249 mblk_t *mpnext; 250 while (mp != NULL) { 251 ASSERT(mp->b_prev == NULL); 252 mpnext = mp->b_next; 253 mp->b_next = NULL; 254 if (ip_recv_attr_is_mblk(mp)) 255 mp = ip_recv_attr_free_mblk(mp); 256 ip_drop_packet(mp, inbound, ill, counter, who_called); 257 mp = mpnext; 258 } 259 } 260 261 /* 262 * AVL tree comparison function. 263 * the in-kernel avl assumes unique keys for all objects. 264 * Since sometimes policy will duplicate rules, we may insert 265 * multiple rules with the same rule id, so we need a tie-breaker. 266 */ 267 static int 268 ipsec_policy_cmpbyid(const void *a, const void *b) 269 { 270 const ipsec_policy_t *ipa, *ipb; 271 uint64_t idxa, idxb; 272 273 ipa = (const ipsec_policy_t *)a; 274 ipb = (const ipsec_policy_t *)b; 275 idxa = ipa->ipsp_index; 276 idxb = ipb->ipsp_index; 277 278 if (idxa < idxb) 279 return (-1); 280 if (idxa > idxb) 281 return (1); 282 /* 283 * Tie-breaker #1: All installed policy rules have a non-NULL 284 * ipsl_sel (selector set), so an entry with a NULL ipsp_sel is not 285 * actually in-tree but rather a template node being used in 286 * an avl_find query; see ipsec_policy_delete(). This gives us 287 * a placeholder in the ordering just before the first entry with 288 * a key >= the one we're looking for, so we can walk forward from 289 * that point to get the remaining entries with the same id. 290 */ 291 if ((ipa->ipsp_sel == NULL) && (ipb->ipsp_sel != NULL)) 292 return (-1); 293 if ((ipb->ipsp_sel == NULL) && (ipa->ipsp_sel != NULL)) 294 return (1); 295 /* 296 * At most one of the arguments to the comparison should have a 297 * NULL selector pointer; if not, the tree is broken. 298 */ 299 ASSERT(ipa->ipsp_sel != NULL); 300 ASSERT(ipb->ipsp_sel != NULL); 301 /* 302 * Tie-breaker #2: use the virtual address of the policy node 303 * to arbitrarily break ties. Since we use the new tree node in 304 * the avl_find() in ipsec_insert_always, the new node will be 305 * inserted into the tree in the right place in the sequence. 306 */ 307 if (ipa < ipb) 308 return (-1); 309 if (ipa > ipb) 310 return (1); 311 return (0); 312 } 313 314 /* 315 * Free what ipsec_alloc_table allocated. 316 */ 317 void 318 ipsec_polhead_free_table(ipsec_policy_head_t *iph) 319 { 320 int dir; 321 int i; 322 323 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 324 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 325 326 if (ipr->ipr_hash == NULL) 327 continue; 328 329 for (i = 0; i < ipr->ipr_nchains; i++) { 330 ASSERT(ipr->ipr_hash[i].hash_head == NULL); 331 } 332 kmem_free(ipr->ipr_hash, ipr->ipr_nchains * 333 sizeof (ipsec_policy_hash_t)); 334 ipr->ipr_hash = NULL; 335 } 336 } 337 338 void 339 ipsec_polhead_destroy(ipsec_policy_head_t *iph) 340 { 341 int dir; 342 343 avl_destroy(&iph->iph_rulebyid); 344 rw_destroy(&iph->iph_lock); 345 346 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 347 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 348 int chain; 349 350 for (chain = 0; chain < ipr->ipr_nchains; chain++) 351 mutex_destroy(&(ipr->ipr_hash[chain].hash_lock)); 352 353 } 354 ipsec_polhead_free_table(iph); 355 } 356 357 /* 358 * Free the IPsec stack instance. 359 */ 360 /* ARGSUSED */ 361 static void 362 ipsec_stack_fini(netstackid_t stackid, void *arg) 363 { 364 ipsec_stack_t *ipss = (ipsec_stack_t *)arg; 365 void *cookie; 366 ipsec_tun_pol_t *node; 367 netstack_t *ns = ipss->ipsec_netstack; 368 int i; 369 ipsec_algtype_t algtype; 370 371 ipsec_loader_destroy(ipss); 372 373 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_WRITER); 374 /* 375 * It's possible we can just ASSERT() the tree is empty. After all, 376 * we aren't called until IP is ready to unload (and presumably all 377 * tunnels have been unplumbed). But we'll play it safe for now, the 378 * loop will just exit immediately if it's empty. 379 */ 380 cookie = NULL; 381 while ((node = (ipsec_tun_pol_t *) 382 avl_destroy_nodes(&ipss->ipsec_tunnel_policies, 383 &cookie)) != NULL) { 384 ITP_REFRELE(node, ns); 385 } 386 avl_destroy(&ipss->ipsec_tunnel_policies); 387 rw_exit(&ipss->ipsec_tunnel_policy_lock); 388 rw_destroy(&ipss->ipsec_tunnel_policy_lock); 389 390 ipsec_config_flush(ns); 391 392 ipsec_kstat_destroy(ipss); 393 394 ip_drop_unregister(&ipss->ipsec_dropper); 395 396 ip_drop_unregister(&ipss->ipsec_spd_dropper); 397 ip_drop_destroy(ipss); 398 /* 399 * Globals start with ref == 1 to prevent IPPH_REFRELE() from 400 * attempting to free them, hence they should have 1 now. 401 */ 402 ipsec_polhead_destroy(&ipss->ipsec_system_policy); 403 ASSERT(ipss->ipsec_system_policy.iph_refs == 1); 404 ipsec_polhead_destroy(&ipss->ipsec_inactive_policy); 405 ASSERT(ipss->ipsec_inactive_policy.iph_refs == 1); 406 407 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) { 408 ipsec_action_free_table(ipss->ipsec_action_hash[i].hash_head); 409 ipss->ipsec_action_hash[i].hash_head = NULL; 410 mutex_destroy(&(ipss->ipsec_action_hash[i].hash_lock)); 411 } 412 413 for (i = 0; i < ipss->ipsec_spd_hashsize; i++) { 414 ASSERT(ipss->ipsec_sel_hash[i].hash_head == NULL); 415 mutex_destroy(&(ipss->ipsec_sel_hash[i].hash_lock)); 416 } 417 418 rw_enter(&ipss->ipsec_alg_lock, RW_WRITER); 419 for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype ++) { 420 for (i = 0; i < IPSEC_MAX_ALGS; i++) { 421 if (ipss->ipsec_alglists[algtype][i] != NULL) 422 ipsec_alg_unreg(algtype, i, ns); 423 } 424 } 425 rw_exit(&ipss->ipsec_alg_lock); 426 rw_destroy(&ipss->ipsec_alg_lock); 427 428 ipsid_gc(ns); 429 ipsid_fini(ns); 430 431 (void) ipsec_free_tables(ipss); 432 kmem_free(ipss, sizeof (*ipss)); 433 } 434 435 void 436 ipsec_policy_g_destroy(void) 437 { 438 kmem_cache_destroy(ipsec_action_cache); 439 kmem_cache_destroy(ipsec_sel_cache); 440 kmem_cache_destroy(ipsec_pol_cache); 441 442 ipsec_unregister_prov_update(); 443 444 netstack_unregister(NS_IPSEC); 445 } 446 447 448 /* 449 * Free what ipsec_alloc_tables allocated. 450 * Called when table allocation fails to free the table. 451 */ 452 static int 453 ipsec_free_tables(ipsec_stack_t *ipss) 454 { 455 int i; 456 457 if (ipss->ipsec_sel_hash != NULL) { 458 for (i = 0; i < ipss->ipsec_spd_hashsize; i++) { 459 ASSERT(ipss->ipsec_sel_hash[i].hash_head == NULL); 460 } 461 kmem_free(ipss->ipsec_sel_hash, ipss->ipsec_spd_hashsize * 462 sizeof (*ipss->ipsec_sel_hash)); 463 ipss->ipsec_sel_hash = NULL; 464 ipss->ipsec_spd_hashsize = 0; 465 } 466 ipsec_polhead_free_table(&ipss->ipsec_system_policy); 467 ipsec_polhead_free_table(&ipss->ipsec_inactive_policy); 468 469 return (ENOMEM); 470 } 471 472 /* 473 * Attempt to allocate the tables in a single policy head. 474 * Return nonzero on failure after cleaning up any work in progress. 475 */ 476 int 477 ipsec_alloc_table(ipsec_policy_head_t *iph, int nchains, int kmflag, 478 boolean_t global_cleanup, netstack_t *ns) 479 { 480 int dir; 481 482 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 483 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 484 485 ipr->ipr_nchains = nchains; 486 ipr->ipr_hash = kmem_zalloc(nchains * 487 sizeof (ipsec_policy_hash_t), kmflag); 488 if (ipr->ipr_hash == NULL) 489 return (global_cleanup ? 490 ipsec_free_tables(ns->netstack_ipsec) : 491 ENOMEM); 492 } 493 return (0); 494 } 495 496 /* 497 * Attempt to allocate the various tables. Return nonzero on failure 498 * after cleaning up any work in progress. 499 */ 500 static int 501 ipsec_alloc_tables(int kmflag, netstack_t *ns) 502 { 503 int error; 504 ipsec_stack_t *ipss = ns->netstack_ipsec; 505 506 error = ipsec_alloc_table(&ipss->ipsec_system_policy, 507 ipss->ipsec_spd_hashsize, kmflag, B_TRUE, ns); 508 if (error != 0) 509 return (error); 510 511 error = ipsec_alloc_table(&ipss->ipsec_inactive_policy, 512 ipss->ipsec_spd_hashsize, kmflag, B_TRUE, ns); 513 if (error != 0) 514 return (error); 515 516 ipss->ipsec_sel_hash = kmem_zalloc(ipss->ipsec_spd_hashsize * 517 sizeof (*ipss->ipsec_sel_hash), kmflag); 518 519 if (ipss->ipsec_sel_hash == NULL) 520 return (ipsec_free_tables(ipss)); 521 522 return (0); 523 } 524 525 /* 526 * After table allocation, initialize a policy head. 527 */ 528 void 529 ipsec_polhead_init(ipsec_policy_head_t *iph, int nchains) 530 { 531 int dir, chain; 532 533 rw_init(&iph->iph_lock, NULL, RW_DEFAULT, NULL); 534 avl_create(&iph->iph_rulebyid, ipsec_policy_cmpbyid, 535 sizeof (ipsec_policy_t), offsetof(ipsec_policy_t, ipsp_byid)); 536 537 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 538 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 539 ipr->ipr_nchains = nchains; 540 541 for (chain = 0; chain < nchains; chain++) { 542 mutex_init(&(ipr->ipr_hash[chain].hash_lock), 543 NULL, MUTEX_DEFAULT, NULL); 544 } 545 } 546 } 547 548 static boolean_t 549 ipsec_kstat_init(ipsec_stack_t *ipss) 550 { 551 ipss->ipsec_ksp = kstat_create_netstack("ip", 0, "ipsec_stat", "net", 552 KSTAT_TYPE_NAMED, sizeof (ipsec_kstats_t) / sizeof (kstat_named_t), 553 KSTAT_FLAG_PERSISTENT, ipss->ipsec_netstack->netstack_stackid); 554 555 if (ipss->ipsec_ksp == NULL || ipss->ipsec_ksp->ks_data == NULL) 556 return (B_FALSE); 557 558 ipss->ipsec_kstats = ipss->ipsec_ksp->ks_data; 559 560 #define KI(x) kstat_named_init(&ipss->ipsec_kstats->x, #x, KSTAT_DATA_UINT64) 561 KI(esp_stat_in_requests); 562 KI(esp_stat_in_discards); 563 KI(esp_stat_lookup_failure); 564 KI(ah_stat_in_requests); 565 KI(ah_stat_in_discards); 566 KI(ah_stat_lookup_failure); 567 KI(sadb_acquire_maxpackets); 568 KI(sadb_acquire_qhiwater); 569 #undef KI 570 571 kstat_install(ipss->ipsec_ksp); 572 return (B_TRUE); 573 } 574 575 static void 576 ipsec_kstat_destroy(ipsec_stack_t *ipss) 577 { 578 kstat_delete_netstack(ipss->ipsec_ksp, 579 ipss->ipsec_netstack->netstack_stackid); 580 ipss->ipsec_kstats = NULL; 581 582 } 583 584 /* 585 * Initialize the IPsec stack instance. 586 */ 587 /* ARGSUSED */ 588 static void * 589 ipsec_stack_init(netstackid_t stackid, netstack_t *ns) 590 { 591 ipsec_stack_t *ipss; 592 int i; 593 594 ipss = (ipsec_stack_t *)kmem_zalloc(sizeof (*ipss), KM_SLEEP); 595 ipss->ipsec_netstack = ns; 596 597 /* 598 * FIXME: netstack_ipsec is used by some of the routines we call 599 * below, but it isn't set until this routine returns. 600 * Either we introduce optional xxx_stack_alloc() functions 601 * that will be called by the netstack framework before xxx_stack_init, 602 * or we switch spd.c and sadb.c to operate on ipsec_stack_t 603 * (latter has some include file order issues for sadb.h, but makes 604 * sense if we merge some of the ipsec related stack_t's together. 605 */ 606 ns->netstack_ipsec = ipss; 607 608 /* 609 * Make two attempts to allocate policy hash tables; try it at 610 * the "preferred" size (may be set in /etc/system) first, 611 * then fall back to the default size. 612 */ 613 ipss->ipsec_spd_hashsize = (ipsec_spd_hashsize == 0) ? 614 IPSEC_SPDHASH_DEFAULT : ipsec_spd_hashsize; 615 616 if (ipsec_alloc_tables(KM_NOSLEEP, ns) != 0) { 617 cmn_err(CE_WARN, 618 "Unable to allocate %d entry IPsec policy hash table", 619 ipss->ipsec_spd_hashsize); 620 ipss->ipsec_spd_hashsize = IPSEC_SPDHASH_DEFAULT; 621 cmn_err(CE_WARN, "Falling back to %d entries", 622 ipss->ipsec_spd_hashsize); 623 (void) ipsec_alloc_tables(KM_SLEEP, ns); 624 } 625 626 /* Just set a default for tunnels. */ 627 ipss->ipsec_tun_spd_hashsize = (tun_spd_hashsize == 0) ? 628 TUN_SPDHASH_DEFAULT : tun_spd_hashsize; 629 630 ipsid_init(ns); 631 /* 632 * Globals need ref == 1 to prevent IPPH_REFRELE() from attempting 633 * to free them. 634 */ 635 ipss->ipsec_system_policy.iph_refs = 1; 636 ipss->ipsec_inactive_policy.iph_refs = 1; 637 ipsec_polhead_init(&ipss->ipsec_system_policy, 638 ipss->ipsec_spd_hashsize); 639 ipsec_polhead_init(&ipss->ipsec_inactive_policy, 640 ipss->ipsec_spd_hashsize); 641 rw_init(&ipss->ipsec_tunnel_policy_lock, NULL, RW_DEFAULT, NULL); 642 avl_create(&ipss->ipsec_tunnel_policies, tunnel_compare, 643 sizeof (ipsec_tun_pol_t), 0); 644 645 ipss->ipsec_next_policy_index = 1; 646 647 rw_init(&ipss->ipsec_system_policy.iph_lock, NULL, RW_DEFAULT, NULL); 648 rw_init(&ipss->ipsec_inactive_policy.iph_lock, NULL, RW_DEFAULT, NULL); 649 650 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) 651 mutex_init(&(ipss->ipsec_action_hash[i].hash_lock), 652 NULL, MUTEX_DEFAULT, NULL); 653 654 for (i = 0; i < ipss->ipsec_spd_hashsize; i++) 655 mutex_init(&(ipss->ipsec_sel_hash[i].hash_lock), 656 NULL, MUTEX_DEFAULT, NULL); 657 658 rw_init(&ipss->ipsec_alg_lock, NULL, RW_DEFAULT, NULL); 659 for (i = 0; i < IPSEC_NALGTYPES; i++) { 660 ipss->ipsec_nalgs[i] = 0; 661 } 662 663 ip_drop_init(ipss); 664 ip_drop_register(&ipss->ipsec_spd_dropper, "IPsec SPD"); 665 666 /* IP's IPsec code calls the packet dropper */ 667 ip_drop_register(&ipss->ipsec_dropper, "IP IPsec processing"); 668 669 (void) ipsec_kstat_init(ipss); 670 671 ipsec_loader_init(ipss); 672 ipsec_loader_start(ipss); 673 674 return (ipss); 675 } 676 677 /* Global across all stack instances */ 678 void 679 ipsec_policy_g_init(void) 680 { 681 ipsec_action_cache = kmem_cache_create("ipsec_actions", 682 sizeof (ipsec_action_t), _POINTER_ALIGNMENT, NULL, NULL, 683 ipsec_action_reclaim, NULL, NULL, 0); 684 ipsec_sel_cache = kmem_cache_create("ipsec_selectors", 685 sizeof (ipsec_sel_t), _POINTER_ALIGNMENT, NULL, NULL, 686 NULL, NULL, NULL, 0); 687 ipsec_pol_cache = kmem_cache_create("ipsec_policy", 688 sizeof (ipsec_policy_t), _POINTER_ALIGNMENT, NULL, NULL, 689 NULL, NULL, NULL, 0); 690 691 /* 692 * We want to be informed each time a stack is created or 693 * destroyed in the kernel, so we can maintain the 694 * set of ipsec_stack_t's. 695 */ 696 netstack_register(NS_IPSEC, ipsec_stack_init, NULL, ipsec_stack_fini); 697 } 698 699 /* 700 * Sort algorithm lists. 701 * 702 * I may need to split this based on 703 * authentication/encryption, and I may wish to have an administrator 704 * configure this list. Hold on to some NDD variables... 705 * 706 * XXX For now, sort on minimum key size (GAG!). While minimum key size is 707 * not the ideal metric, it's the only quantifiable measure available. 708 * We need a better metric for sorting algorithms by preference. 709 */ 710 static void 711 alg_insert_sortlist(enum ipsec_algtype at, uint8_t algid, netstack_t *ns) 712 { 713 ipsec_stack_t *ipss = ns->netstack_ipsec; 714 ipsec_alginfo_t *ai = ipss->ipsec_alglists[at][algid]; 715 uint8_t holder, swap; 716 uint_t i; 717 uint_t count = ipss->ipsec_nalgs[at]; 718 ASSERT(ai != NULL); 719 ASSERT(algid == ai->alg_id); 720 721 ASSERT(RW_WRITE_HELD(&ipss->ipsec_alg_lock)); 722 723 holder = algid; 724 725 for (i = 0; i < count - 1; i++) { 726 ipsec_alginfo_t *alt; 727 728 alt = ipss->ipsec_alglists[at][ipss->ipsec_sortlist[at][i]]; 729 /* 730 * If you want to give precedence to newly added algs, 731 * add the = in the > comparison. 732 */ 733 if ((holder != algid) || (ai->alg_minbits > alt->alg_minbits)) { 734 /* Swap sortlist[i] and holder. */ 735 swap = ipss->ipsec_sortlist[at][i]; 736 ipss->ipsec_sortlist[at][i] = holder; 737 holder = swap; 738 ai = alt; 739 } /* Else just continue. */ 740 } 741 742 /* Store holder in last slot. */ 743 ipss->ipsec_sortlist[at][i] = holder; 744 } 745 746 /* 747 * Remove an algorithm from a sorted algorithm list. 748 * This should be considerably easier, even with complex sorting. 749 */ 750 static void 751 alg_remove_sortlist(enum ipsec_algtype at, uint8_t algid, netstack_t *ns) 752 { 753 boolean_t copyback = B_FALSE; 754 int i; 755 ipsec_stack_t *ipss = ns->netstack_ipsec; 756 int newcount = ipss->ipsec_nalgs[at]; 757 758 ASSERT(RW_WRITE_HELD(&ipss->ipsec_alg_lock)); 759 760 for (i = 0; i <= newcount; i++) { 761 if (copyback) { 762 ipss->ipsec_sortlist[at][i-1] = 763 ipss->ipsec_sortlist[at][i]; 764 } else if (ipss->ipsec_sortlist[at][i] == algid) { 765 copyback = B_TRUE; 766 } 767 } 768 } 769 770 /* 771 * Add the specified algorithm to the algorithm tables. 772 * Must be called while holding the algorithm table writer lock. 773 */ 774 void 775 ipsec_alg_reg(ipsec_algtype_t algtype, ipsec_alginfo_t *alg, netstack_t *ns) 776 { 777 ipsec_stack_t *ipss = ns->netstack_ipsec; 778 779 ASSERT(RW_WRITE_HELD(&ipss->ipsec_alg_lock)); 780 781 ASSERT(ipss->ipsec_alglists[algtype][alg->alg_id] == NULL); 782 ipsec_alg_fix_min_max(alg, algtype, ns); 783 ipss->ipsec_alglists[algtype][alg->alg_id] = alg; 784 785 ipss->ipsec_nalgs[algtype]++; 786 alg_insert_sortlist(algtype, alg->alg_id, ns); 787 } 788 789 /* 790 * Remove the specified algorithm from the algorithm tables. 791 * Must be called while holding the algorithm table writer lock. 792 */ 793 void 794 ipsec_alg_unreg(ipsec_algtype_t algtype, uint8_t algid, netstack_t *ns) 795 { 796 ipsec_stack_t *ipss = ns->netstack_ipsec; 797 798 ASSERT(RW_WRITE_HELD(&ipss->ipsec_alg_lock)); 799 800 ASSERT(ipss->ipsec_alglists[algtype][algid] != NULL); 801 ipsec_alg_free(ipss->ipsec_alglists[algtype][algid]); 802 ipss->ipsec_alglists[algtype][algid] = NULL; 803 804 ipss->ipsec_nalgs[algtype]--; 805 alg_remove_sortlist(algtype, algid, ns); 806 } 807 808 /* 809 * Hooks for spdsock to get a grip on system policy. 810 */ 811 812 ipsec_policy_head_t * 813 ipsec_system_policy(netstack_t *ns) 814 { 815 ipsec_stack_t *ipss = ns->netstack_ipsec; 816 ipsec_policy_head_t *h = &ipss->ipsec_system_policy; 817 818 IPPH_REFHOLD(h); 819 return (h); 820 } 821 822 ipsec_policy_head_t * 823 ipsec_inactive_policy(netstack_t *ns) 824 { 825 ipsec_stack_t *ipss = ns->netstack_ipsec; 826 ipsec_policy_head_t *h = &ipss->ipsec_inactive_policy; 827 828 IPPH_REFHOLD(h); 829 return (h); 830 } 831 832 /* 833 * Lock inactive policy, then active policy, then exchange policy root 834 * pointers. 835 */ 836 void 837 ipsec_swap_policy(ipsec_policy_head_t *active, ipsec_policy_head_t *inactive, 838 netstack_t *ns) 839 { 840 int af, dir; 841 avl_tree_t r1, r2; 842 843 rw_enter(&inactive->iph_lock, RW_WRITER); 844 rw_enter(&active->iph_lock, RW_WRITER); 845 846 r1 = active->iph_rulebyid; 847 r2 = inactive->iph_rulebyid; 848 active->iph_rulebyid = r2; 849 inactive->iph_rulebyid = r1; 850 851 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 852 ipsec_policy_hash_t *h1, *h2; 853 854 h1 = active->iph_root[dir].ipr_hash; 855 h2 = inactive->iph_root[dir].ipr_hash; 856 active->iph_root[dir].ipr_hash = h2; 857 inactive->iph_root[dir].ipr_hash = h1; 858 859 for (af = 0; af < IPSEC_NAF; af++) { 860 ipsec_policy_t *t1, *t2; 861 862 t1 = active->iph_root[dir].ipr_nonhash[af]; 863 t2 = inactive->iph_root[dir].ipr_nonhash[af]; 864 active->iph_root[dir].ipr_nonhash[af] = t2; 865 inactive->iph_root[dir].ipr_nonhash[af] = t1; 866 if (t1 != NULL) { 867 t1->ipsp_hash.hash_pp = 868 &(inactive->iph_root[dir].ipr_nonhash[af]); 869 } 870 if (t2 != NULL) { 871 t2->ipsp_hash.hash_pp = 872 &(active->iph_root[dir].ipr_nonhash[af]); 873 } 874 875 } 876 } 877 active->iph_gen++; 878 inactive->iph_gen++; 879 ipsec_update_present_flags(ns->netstack_ipsec); 880 rw_exit(&active->iph_lock); 881 rw_exit(&inactive->iph_lock); 882 } 883 884 /* 885 * Swap global policy primary/secondary. 886 */ 887 void 888 ipsec_swap_global_policy(netstack_t *ns) 889 { 890 ipsec_stack_t *ipss = ns->netstack_ipsec; 891 892 ipsec_swap_policy(&ipss->ipsec_system_policy, 893 &ipss->ipsec_inactive_policy, ns); 894 } 895 896 /* 897 * Clone one policy rule.. 898 */ 899 static ipsec_policy_t * 900 ipsec_copy_policy(const ipsec_policy_t *src) 901 { 902 ipsec_policy_t *dst = kmem_cache_alloc(ipsec_pol_cache, KM_NOSLEEP); 903 904 if (dst == NULL) 905 return (NULL); 906 907 /* 908 * Adjust refcounts of cloned state. 909 */ 910 IPACT_REFHOLD(src->ipsp_act); 911 src->ipsp_sel->ipsl_refs++; 912 913 HASH_NULL(dst, ipsp_hash); 914 dst->ipsp_netstack = src->ipsp_netstack; 915 dst->ipsp_refs = 1; 916 dst->ipsp_sel = src->ipsp_sel; 917 dst->ipsp_act = src->ipsp_act; 918 dst->ipsp_prio = src->ipsp_prio; 919 dst->ipsp_index = src->ipsp_index; 920 921 return (dst); 922 } 923 924 void 925 ipsec_insert_always(avl_tree_t *tree, void *new_node) 926 { 927 void *node; 928 avl_index_t where; 929 930 node = avl_find(tree, new_node, &where); 931 ASSERT(node == NULL); 932 avl_insert(tree, new_node, where); 933 } 934 935 936 static int 937 ipsec_copy_chain(ipsec_policy_head_t *dph, ipsec_policy_t *src, 938 ipsec_policy_t **dstp) 939 { 940 for (; src != NULL; src = src->ipsp_hash.hash_next) { 941 ipsec_policy_t *dst = ipsec_copy_policy(src); 942 if (dst == NULL) 943 return (ENOMEM); 944 945 HASHLIST_INSERT(dst, ipsp_hash, *dstp); 946 ipsec_insert_always(&dph->iph_rulebyid, dst); 947 } 948 return (0); 949 } 950 951 952 953 /* 954 * Make one policy head look exactly like another. 955 * 956 * As with ipsec_swap_policy, we lock the destination policy head first, then 957 * the source policy head. Note that we only need to read-lock the source 958 * policy head as we are not changing it. 959 */ 960 int 961 ipsec_copy_polhead(ipsec_policy_head_t *sph, ipsec_policy_head_t *dph, 962 netstack_t *ns) 963 { 964 int af, dir, chain, nchains; 965 966 rw_enter(&dph->iph_lock, RW_WRITER); 967 968 ipsec_polhead_flush(dph, ns); 969 970 rw_enter(&sph->iph_lock, RW_READER); 971 972 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 973 ipsec_policy_root_t *dpr = &dph->iph_root[dir]; 974 ipsec_policy_root_t *spr = &sph->iph_root[dir]; 975 nchains = dpr->ipr_nchains; 976 977 ASSERT(dpr->ipr_nchains == spr->ipr_nchains); 978 979 for (af = 0; af < IPSEC_NAF; af++) { 980 if (ipsec_copy_chain(dph, spr->ipr_nonhash[af], 981 &dpr->ipr_nonhash[af])) 982 goto abort_copy; 983 } 984 985 for (chain = 0; chain < nchains; chain++) { 986 if (ipsec_copy_chain(dph, 987 spr->ipr_hash[chain].hash_head, 988 &dpr->ipr_hash[chain].hash_head)) 989 goto abort_copy; 990 } 991 } 992 993 dph->iph_gen++; 994 995 rw_exit(&sph->iph_lock); 996 rw_exit(&dph->iph_lock); 997 return (0); 998 999 abort_copy: 1000 ipsec_polhead_flush(dph, ns); 1001 rw_exit(&sph->iph_lock); 1002 rw_exit(&dph->iph_lock); 1003 return (ENOMEM); 1004 } 1005 1006 /* 1007 * Clone currently active policy to the inactive policy list. 1008 */ 1009 int 1010 ipsec_clone_system_policy(netstack_t *ns) 1011 { 1012 ipsec_stack_t *ipss = ns->netstack_ipsec; 1013 1014 return (ipsec_copy_polhead(&ipss->ipsec_system_policy, 1015 &ipss->ipsec_inactive_policy, ns)); 1016 } 1017 1018 /* 1019 * Extract the string from ipsec_policy_failure_msgs[type] and 1020 * log it. 1021 * 1022 */ 1023 void 1024 ipsec_log_policy_failure(int type, char *func_name, ipha_t *ipha, ip6_t *ip6h, 1025 boolean_t secure, netstack_t *ns) 1026 { 1027 char sbuf[INET6_ADDRSTRLEN]; 1028 char dbuf[INET6_ADDRSTRLEN]; 1029 char *s; 1030 char *d; 1031 ipsec_stack_t *ipss = ns->netstack_ipsec; 1032 1033 ASSERT((ipha == NULL && ip6h != NULL) || 1034 (ip6h == NULL && ipha != NULL)); 1035 1036 if (ipha != NULL) { 1037 s = inet_ntop(AF_INET, &ipha->ipha_src, sbuf, sizeof (sbuf)); 1038 d = inet_ntop(AF_INET, &ipha->ipha_dst, dbuf, sizeof (dbuf)); 1039 } else { 1040 s = inet_ntop(AF_INET6, &ip6h->ip6_src, sbuf, sizeof (sbuf)); 1041 d = inet_ntop(AF_INET6, &ip6h->ip6_dst, dbuf, sizeof (dbuf)); 1042 1043 } 1044 1045 /* Always bump the policy failure counter. */ 1046 ipss->ipsec_policy_failure_count[type]++; 1047 1048 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, SL_ERROR|SL_WARN|SL_CONSOLE, 1049 ipsec_policy_failure_msgs[type], func_name, 1050 (secure ? "secure" : "not secure"), s, d); 1051 } 1052 1053 /* 1054 * Rate-limiting front-end to strlog() for AH and ESP. Uses the ndd variables 1055 * in /dev/ip and the same rate-limiting clock so that there's a single 1056 * knob to turn to throttle the rate of messages. 1057 */ 1058 void 1059 ipsec_rl_strlog(netstack_t *ns, short mid, short sid, char level, ushort_t sl, 1060 char *fmt, ...) 1061 { 1062 va_list adx; 1063 hrtime_t current = gethrtime(); 1064 ip_stack_t *ipst = ns->netstack_ip; 1065 ipsec_stack_t *ipss = ns->netstack_ipsec; 1066 1067 sl |= SL_CONSOLE; 1068 /* 1069 * Throttle logging to stop syslog from being swamped. If variable 1070 * 'ipsec_policy_log_interval' is zero, don't log any messages at 1071 * all, otherwise log only one message every 'ipsec_policy_log_interval' 1072 * msec. Convert interval (in msec) to hrtime (in nsec). 1073 */ 1074 1075 if (ipst->ips_ipsec_policy_log_interval) { 1076 if (ipss->ipsec_policy_failure_last + 1077 MSEC2NSEC(ipst->ips_ipsec_policy_log_interval) <= current) { 1078 va_start(adx, fmt); 1079 (void) vstrlog(mid, sid, level, sl, fmt, adx); 1080 va_end(adx); 1081 ipss->ipsec_policy_failure_last = current; 1082 } 1083 } 1084 } 1085 1086 void 1087 ipsec_config_flush(netstack_t *ns) 1088 { 1089 ipsec_stack_t *ipss = ns->netstack_ipsec; 1090 1091 rw_enter(&ipss->ipsec_system_policy.iph_lock, RW_WRITER); 1092 ipsec_polhead_flush(&ipss->ipsec_system_policy, ns); 1093 ipss->ipsec_next_policy_index = 1; 1094 rw_exit(&ipss->ipsec_system_policy.iph_lock); 1095 ipsec_action_reclaim_stack(ipss); 1096 } 1097 1098 /* 1099 * Clip a policy's min/max keybits vs. the capabilities of the 1100 * algorithm. 1101 */ 1102 static void 1103 act_alg_adjust(uint_t algtype, uint_t algid, 1104 uint16_t *minbits, uint16_t *maxbits, netstack_t *ns) 1105 { 1106 ipsec_stack_t *ipss = ns->netstack_ipsec; 1107 ipsec_alginfo_t *algp = ipss->ipsec_alglists[algtype][algid]; 1108 1109 if (algp != NULL) { 1110 /* 1111 * If passed-in minbits is zero, we assume the caller trusts 1112 * us with setting the minimum key size. We pick the 1113 * algorithms DEFAULT key size for the minimum in this case. 1114 */ 1115 if (*minbits == 0) { 1116 *minbits = algp->alg_default_bits; 1117 ASSERT(*minbits >= algp->alg_minbits); 1118 } else { 1119 *minbits = MAX(MIN(*minbits, algp->alg_maxbits), 1120 algp->alg_minbits); 1121 } 1122 if (*maxbits == 0) 1123 *maxbits = algp->alg_maxbits; 1124 else 1125 *maxbits = MIN(MAX(*maxbits, algp->alg_minbits), 1126 algp->alg_maxbits); 1127 ASSERT(*minbits <= *maxbits); 1128 } else { 1129 *minbits = 0; 1130 *maxbits = 0; 1131 } 1132 } 1133 1134 /* 1135 * Check an action's requested algorithms against the algorithms currently 1136 * loaded in the system. 1137 */ 1138 boolean_t 1139 ipsec_check_action(ipsec_act_t *act, int *diag, netstack_t *ns) 1140 { 1141 ipsec_prot_t *ipp; 1142 ipsec_stack_t *ipss = ns->netstack_ipsec; 1143 1144 ipp = &act->ipa_apply; 1145 1146 if (ipp->ipp_use_ah && 1147 ipss->ipsec_alglists[IPSEC_ALG_AUTH][ipp->ipp_auth_alg] == NULL) { 1148 *diag = SPD_DIAGNOSTIC_UNSUPP_AH_ALG; 1149 return (B_FALSE); 1150 } 1151 if (ipp->ipp_use_espa && 1152 ipss->ipsec_alglists[IPSEC_ALG_AUTH][ipp->ipp_esp_auth_alg] == 1153 NULL) { 1154 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_AUTH_ALG; 1155 return (B_FALSE); 1156 } 1157 if (ipp->ipp_use_esp && 1158 ipss->ipsec_alglists[IPSEC_ALG_ENCR][ipp->ipp_encr_alg] == NULL) { 1159 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_ENCR_ALG; 1160 return (B_FALSE); 1161 } 1162 1163 act_alg_adjust(IPSEC_ALG_AUTH, ipp->ipp_auth_alg, 1164 &ipp->ipp_ah_minbits, &ipp->ipp_ah_maxbits, ns); 1165 act_alg_adjust(IPSEC_ALG_AUTH, ipp->ipp_esp_auth_alg, 1166 &ipp->ipp_espa_minbits, &ipp->ipp_espa_maxbits, ns); 1167 act_alg_adjust(IPSEC_ALG_ENCR, ipp->ipp_encr_alg, 1168 &ipp->ipp_espe_minbits, &ipp->ipp_espe_maxbits, ns); 1169 1170 if (ipp->ipp_ah_minbits > ipp->ipp_ah_maxbits) { 1171 *diag = SPD_DIAGNOSTIC_UNSUPP_AH_KEYSIZE; 1172 return (B_FALSE); 1173 } 1174 if (ipp->ipp_espa_minbits > ipp->ipp_espa_maxbits) { 1175 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_AUTH_KEYSIZE; 1176 return (B_FALSE); 1177 } 1178 if (ipp->ipp_espe_minbits > ipp->ipp_espe_maxbits) { 1179 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_ENCR_KEYSIZE; 1180 return (B_FALSE); 1181 } 1182 /* TODO: sanity check lifetimes */ 1183 return (B_TRUE); 1184 } 1185 1186 /* 1187 * Set up a single action during wildcard expansion.. 1188 */ 1189 static void 1190 ipsec_setup_act(ipsec_act_t *outact, ipsec_act_t *act, 1191 uint_t auth_alg, uint_t encr_alg, uint_t eauth_alg, netstack_t *ns) 1192 { 1193 ipsec_prot_t *ipp; 1194 1195 *outact = *act; 1196 ipp = &outact->ipa_apply; 1197 ipp->ipp_auth_alg = (uint8_t)auth_alg; 1198 ipp->ipp_encr_alg = (uint8_t)encr_alg; 1199 ipp->ipp_esp_auth_alg = (uint8_t)eauth_alg; 1200 1201 act_alg_adjust(IPSEC_ALG_AUTH, auth_alg, 1202 &ipp->ipp_ah_minbits, &ipp->ipp_ah_maxbits, ns); 1203 act_alg_adjust(IPSEC_ALG_AUTH, eauth_alg, 1204 &ipp->ipp_espa_minbits, &ipp->ipp_espa_maxbits, ns); 1205 act_alg_adjust(IPSEC_ALG_ENCR, encr_alg, 1206 &ipp->ipp_espe_minbits, &ipp->ipp_espe_maxbits, ns); 1207 } 1208 1209 /* 1210 * combinatoric expansion time: expand a wildcarded action into an 1211 * array of wildcarded actions; we return the exploded action list, 1212 * and return a count in *nact (output only). 1213 */ 1214 static ipsec_act_t * 1215 ipsec_act_wildcard_expand(ipsec_act_t *act, uint_t *nact, netstack_t *ns) 1216 { 1217 boolean_t use_ah, use_esp, use_espa; 1218 boolean_t wild_auth, wild_encr, wild_eauth; 1219 uint_t auth_alg, auth_idx, auth_min, auth_max; 1220 uint_t eauth_alg, eauth_idx, eauth_min, eauth_max; 1221 uint_t encr_alg, encr_idx, encr_min, encr_max; 1222 uint_t action_count, ai; 1223 ipsec_act_t *outact; 1224 ipsec_stack_t *ipss = ns->netstack_ipsec; 1225 1226 if (act->ipa_type != IPSEC_ACT_APPLY) { 1227 outact = kmem_alloc(sizeof (*act), KM_NOSLEEP); 1228 *nact = 1; 1229 if (outact != NULL) 1230 bcopy(act, outact, sizeof (*act)); 1231 return (outact); 1232 } 1233 /* 1234 * compute the combinatoric explosion.. 1235 * 1236 * we assume a request for encr if esp_req is PREF_REQUIRED 1237 * we assume a request for ah auth if ah_req is PREF_REQUIRED. 1238 * we assume a request for esp auth if !ah and esp_req is PREF_REQUIRED 1239 */ 1240 1241 use_ah = act->ipa_apply.ipp_use_ah; 1242 use_esp = act->ipa_apply.ipp_use_esp; 1243 use_espa = act->ipa_apply.ipp_use_espa; 1244 auth_alg = act->ipa_apply.ipp_auth_alg; 1245 eauth_alg = act->ipa_apply.ipp_esp_auth_alg; 1246 encr_alg = act->ipa_apply.ipp_encr_alg; 1247 1248 wild_auth = use_ah && (auth_alg == 0); 1249 wild_eauth = use_espa && (eauth_alg == 0); 1250 wild_encr = use_esp && (encr_alg == 0); 1251 1252 action_count = 1; 1253 auth_min = auth_max = auth_alg; 1254 eauth_min = eauth_max = eauth_alg; 1255 encr_min = encr_max = encr_alg; 1256 1257 /* 1258 * set up for explosion.. for each dimension, expand output 1259 * size by the explosion factor. 1260 * 1261 * Don't include the "any" algorithms, if defined, as no 1262 * kernel policies should be set for these algorithms. 1263 */ 1264 1265 #define SET_EXP_MINMAX(type, wild, alg, min, max, ipss) \ 1266 if (wild) { \ 1267 int nalgs = ipss->ipsec_nalgs[type]; \ 1268 if (ipss->ipsec_alglists[type][alg] != NULL) \ 1269 nalgs--; \ 1270 action_count *= nalgs; \ 1271 min = 0; \ 1272 max = ipss->ipsec_nalgs[type] - 1; \ 1273 } 1274 1275 SET_EXP_MINMAX(IPSEC_ALG_AUTH, wild_auth, SADB_AALG_NONE, 1276 auth_min, auth_max, ipss); 1277 SET_EXP_MINMAX(IPSEC_ALG_AUTH, wild_eauth, SADB_AALG_NONE, 1278 eauth_min, eauth_max, ipss); 1279 SET_EXP_MINMAX(IPSEC_ALG_ENCR, wild_encr, SADB_EALG_NONE, 1280 encr_min, encr_max, ipss); 1281 1282 #undef SET_EXP_MINMAX 1283 1284 /* 1285 * ok, allocate the whole mess.. 1286 */ 1287 1288 outact = kmem_alloc(sizeof (*outact) * action_count, KM_NOSLEEP); 1289 if (outact == NULL) 1290 return (NULL); 1291 1292 /* 1293 * Now compute all combinations. Note that non-wildcarded 1294 * dimensions just get a single value from auth_min, while 1295 * wildcarded dimensions indirect through the sortlist. 1296 * 1297 * We do encryption outermost since, at this time, there's 1298 * greater difference in security and performance between 1299 * encryption algorithms vs. authentication algorithms. 1300 */ 1301 1302 ai = 0; 1303 1304 #define WHICH_ALG(type, wild, idx, ipss) \ 1305 ((wild)?(ipss->ipsec_sortlist[type][idx]):(idx)) 1306 1307 for (encr_idx = encr_min; encr_idx <= encr_max; encr_idx++) { 1308 encr_alg = WHICH_ALG(IPSEC_ALG_ENCR, wild_encr, encr_idx, ipss); 1309 if (wild_encr && encr_alg == SADB_EALG_NONE) 1310 continue; 1311 for (auth_idx = auth_min; auth_idx <= auth_max; auth_idx++) { 1312 auth_alg = WHICH_ALG(IPSEC_ALG_AUTH, wild_auth, 1313 auth_idx, ipss); 1314 if (wild_auth && auth_alg == SADB_AALG_NONE) 1315 continue; 1316 for (eauth_idx = eauth_min; eauth_idx <= eauth_max; 1317 eauth_idx++) { 1318 eauth_alg = WHICH_ALG(IPSEC_ALG_AUTH, 1319 wild_eauth, eauth_idx, ipss); 1320 if (wild_eauth && eauth_alg == SADB_AALG_NONE) 1321 continue; 1322 1323 ipsec_setup_act(&outact[ai], act, 1324 auth_alg, encr_alg, eauth_alg, ns); 1325 ai++; 1326 } 1327 } 1328 } 1329 1330 #undef WHICH_ALG 1331 1332 ASSERT(ai == action_count); 1333 *nact = action_count; 1334 return (outact); 1335 } 1336 1337 /* 1338 * Extract the parts of an ipsec_prot_t from an old-style ipsec_req_t. 1339 */ 1340 static void 1341 ipsec_prot_from_req(const ipsec_req_t *req, ipsec_prot_t *ipp) 1342 { 1343 bzero(ipp, sizeof (*ipp)); 1344 /* 1345 * ipp_use_* are bitfields. Look at "!!" in the following as a 1346 * "boolean canonicalization" operator. 1347 */ 1348 ipp->ipp_use_ah = !!(req->ipsr_ah_req & IPSEC_PREF_REQUIRED); 1349 ipp->ipp_use_esp = !!(req->ipsr_esp_req & IPSEC_PREF_REQUIRED); 1350 ipp->ipp_use_espa = !!(req->ipsr_esp_auth_alg); 1351 ipp->ipp_use_se = !!(req->ipsr_self_encap_req & IPSEC_PREF_REQUIRED); 1352 ipp->ipp_use_unique = !!((req->ipsr_ah_req|req->ipsr_esp_req) & 1353 IPSEC_PREF_UNIQUE); 1354 ipp->ipp_encr_alg = req->ipsr_esp_alg; 1355 /* 1356 * SADB_AALG_ANY is a placeholder to distinguish "any" from 1357 * "none" above. If auth is required, as determined above, 1358 * SADB_AALG_ANY becomes 0, which is the representation 1359 * of "any" and "none" in PF_KEY v2. 1360 */ 1361 ipp->ipp_auth_alg = (req->ipsr_auth_alg != SADB_AALG_ANY) ? 1362 req->ipsr_auth_alg : 0; 1363 ipp->ipp_esp_auth_alg = (req->ipsr_esp_auth_alg != SADB_AALG_ANY) ? 1364 req->ipsr_esp_auth_alg : 0; 1365 } 1366 1367 /* 1368 * Extract a new-style action from a request. 1369 */ 1370 void 1371 ipsec_actvec_from_req(const ipsec_req_t *req, ipsec_act_t **actp, uint_t *nactp, 1372 netstack_t *ns) 1373 { 1374 struct ipsec_act act; 1375 1376 bzero(&act, sizeof (act)); 1377 if ((req->ipsr_ah_req & IPSEC_PREF_NEVER) && 1378 (req->ipsr_esp_req & IPSEC_PREF_NEVER)) { 1379 act.ipa_type = IPSEC_ACT_BYPASS; 1380 } else { 1381 act.ipa_type = IPSEC_ACT_APPLY; 1382 ipsec_prot_from_req(req, &act.ipa_apply); 1383 } 1384 *actp = ipsec_act_wildcard_expand(&act, nactp, ns); 1385 } 1386 1387 /* 1388 * Convert a new-style "prot" back to an ipsec_req_t (more backwards compat). 1389 * We assume caller has already zero'ed *req for us. 1390 */ 1391 static int 1392 ipsec_req_from_prot(ipsec_prot_t *ipp, ipsec_req_t *req) 1393 { 1394 req->ipsr_esp_alg = ipp->ipp_encr_alg; 1395 req->ipsr_auth_alg = ipp->ipp_auth_alg; 1396 req->ipsr_esp_auth_alg = ipp->ipp_esp_auth_alg; 1397 1398 if (ipp->ipp_use_unique) { 1399 req->ipsr_ah_req |= IPSEC_PREF_UNIQUE; 1400 req->ipsr_esp_req |= IPSEC_PREF_UNIQUE; 1401 } 1402 if (ipp->ipp_use_se) 1403 req->ipsr_self_encap_req |= IPSEC_PREF_REQUIRED; 1404 if (ipp->ipp_use_ah) 1405 req->ipsr_ah_req |= IPSEC_PREF_REQUIRED; 1406 if (ipp->ipp_use_esp) 1407 req->ipsr_esp_req |= IPSEC_PREF_REQUIRED; 1408 return (sizeof (*req)); 1409 } 1410 1411 /* 1412 * Convert a new-style action back to an ipsec_req_t (more backwards compat). 1413 * We assume caller has already zero'ed *req for us. 1414 */ 1415 static int 1416 ipsec_req_from_act(ipsec_action_t *ap, ipsec_req_t *req) 1417 { 1418 switch (ap->ipa_act.ipa_type) { 1419 case IPSEC_ACT_BYPASS: 1420 req->ipsr_ah_req = IPSEC_PREF_NEVER; 1421 req->ipsr_esp_req = IPSEC_PREF_NEVER; 1422 return (sizeof (*req)); 1423 case IPSEC_ACT_APPLY: 1424 return (ipsec_req_from_prot(&ap->ipa_act.ipa_apply, req)); 1425 } 1426 return (sizeof (*req)); 1427 } 1428 1429 /* 1430 * Convert a new-style action back to an ipsec_req_t (more backwards compat). 1431 * We assume caller has already zero'ed *req for us. 1432 */ 1433 int 1434 ipsec_req_from_head(ipsec_policy_head_t *ph, ipsec_req_t *req, int af) 1435 { 1436 ipsec_policy_t *p; 1437 1438 /* 1439 * FULL-PERSOCK: consult hash table, too? 1440 */ 1441 for (p = ph->iph_root[IPSEC_INBOUND].ipr_nonhash[af]; 1442 p != NULL; 1443 p = p->ipsp_hash.hash_next) { 1444 if ((p->ipsp_sel->ipsl_key.ipsl_valid & IPSL_WILDCARD) == 0) 1445 return (ipsec_req_from_act(p->ipsp_act, req)); 1446 } 1447 return (sizeof (*req)); 1448 } 1449 1450 /* 1451 * Based on per-socket or latched policy, convert to an appropriate 1452 * IP_SEC_OPT ipsec_req_t for the socket option; return size so we can 1453 * be tail-called from ip. 1454 */ 1455 int 1456 ipsec_req_from_conn(conn_t *connp, ipsec_req_t *req, int af) 1457 { 1458 ipsec_latch_t *ipl; 1459 int rv = sizeof (ipsec_req_t); 1460 1461 bzero(req, sizeof (*req)); 1462 1463 ASSERT(MUTEX_HELD(&connp->conn_lock)); 1464 ipl = connp->conn_latch; 1465 1466 /* 1467 * Find appropriate policy. First choice is latched action; 1468 * failing that, see latched policy; failing that, 1469 * look at configured policy. 1470 */ 1471 if (ipl != NULL) { 1472 if (connp->conn_latch_in_action != NULL) { 1473 rv = ipsec_req_from_act(connp->conn_latch_in_action, 1474 req); 1475 goto done; 1476 } 1477 if (connp->conn_latch_in_policy != NULL) { 1478 rv = ipsec_req_from_act( 1479 connp->conn_latch_in_policy->ipsp_act, req); 1480 goto done; 1481 } 1482 } 1483 if (connp->conn_policy != NULL) 1484 rv = ipsec_req_from_head(connp->conn_policy, req, af); 1485 done: 1486 return (rv); 1487 } 1488 1489 void 1490 ipsec_actvec_free(ipsec_act_t *act, uint_t nact) 1491 { 1492 kmem_free(act, nact * sizeof (*act)); 1493 } 1494 1495 /* 1496 * Consumes a reference to ipsp. 1497 */ 1498 static mblk_t * 1499 ipsec_check_loopback_policy(mblk_t *data_mp, ip_recv_attr_t *ira, 1500 ipsec_policy_t *ipsp) 1501 { 1502 if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) 1503 return (data_mp); 1504 1505 ASSERT(ira->ira_flags & IRAF_LOOPBACK); 1506 1507 IPPOL_REFRELE(ipsp); 1508 1509 /* 1510 * We should do an actual policy check here. Revisit this 1511 * when we revisit the IPsec API. (And pass a conn_t in when we 1512 * get there.) 1513 */ 1514 1515 return (data_mp); 1516 } 1517 1518 /* 1519 * Check that packet's inbound ports & proto match the selectors 1520 * expected by the SAs it traversed on the way in. 1521 */ 1522 static boolean_t 1523 ipsec_check_ipsecin_unique(ip_recv_attr_t *ira, const char **reason, 1524 kstat_named_t **counter, uint64_t pkt_unique, netstack_t *ns) 1525 { 1526 uint64_t ah_mask, esp_mask; 1527 ipsa_t *ah_assoc; 1528 ipsa_t *esp_assoc; 1529 ipsec_stack_t *ipss = ns->netstack_ipsec; 1530 1531 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 1532 ASSERT(!(ira->ira_flags & IRAF_LOOPBACK)); 1533 1534 ah_assoc = ira->ira_ipsec_ah_sa; 1535 esp_assoc = ira->ira_ipsec_esp_sa; 1536 ASSERT((ah_assoc != NULL) || (esp_assoc != NULL)); 1537 1538 ah_mask = (ah_assoc != NULL) ? ah_assoc->ipsa_unique_mask : 0; 1539 esp_mask = (esp_assoc != NULL) ? esp_assoc->ipsa_unique_mask : 0; 1540 1541 if ((ah_mask == 0) && (esp_mask == 0)) 1542 return (B_TRUE); 1543 1544 /* 1545 * The pkt_unique check will also check for tunnel mode on the SA 1546 * vs. the tunneled_packet boolean. "Be liberal in what you receive" 1547 * should not apply in this case. ;) 1548 */ 1549 1550 if (ah_mask != 0 && 1551 ah_assoc->ipsa_unique_id != (pkt_unique & ah_mask)) { 1552 *reason = "AH inner header mismatch"; 1553 *counter = DROPPER(ipss, ipds_spd_ah_innermismatch); 1554 return (B_FALSE); 1555 } 1556 if (esp_mask != 0 && 1557 esp_assoc->ipsa_unique_id != (pkt_unique & esp_mask)) { 1558 *reason = "ESP inner header mismatch"; 1559 *counter = DROPPER(ipss, ipds_spd_esp_innermismatch); 1560 return (B_FALSE); 1561 } 1562 return (B_TRUE); 1563 } 1564 1565 static boolean_t 1566 ipsec_check_ipsecin_action(ip_recv_attr_t *ira, mblk_t *mp, ipsec_action_t *ap, 1567 ipha_t *ipha, ip6_t *ip6h, const char **reason, kstat_named_t **counter, 1568 netstack_t *ns) 1569 { 1570 boolean_t ret = B_TRUE; 1571 ipsec_prot_t *ipp; 1572 ipsa_t *ah_assoc; 1573 ipsa_t *esp_assoc; 1574 boolean_t decaps; 1575 ipsec_stack_t *ipss = ns->netstack_ipsec; 1576 1577 ASSERT((ipha == NULL && ip6h != NULL) || 1578 (ip6h == NULL && ipha != NULL)); 1579 1580 if (ira->ira_flags & IRAF_LOOPBACK) { 1581 /* 1582 * Besides accepting pointer-equivalent actions, we also 1583 * accept any ICMP errors we generated for ourselves, 1584 * regardless of policy. If we do not wish to make this 1585 * assumption in the future, check here, and where 1586 * IXAF_TRUSTED_ICMP is initialized in ip.c and ip6.c. 1587 */ 1588 if (ap == ira->ira_ipsec_action || 1589 (ira->ira_flags & IRAF_TRUSTED_ICMP)) 1590 return (B_TRUE); 1591 1592 /* Deep compare necessary here?? */ 1593 *counter = DROPPER(ipss, ipds_spd_loopback_mismatch); 1594 *reason = "loopback policy mismatch"; 1595 return (B_FALSE); 1596 } 1597 ASSERT(!(ira->ira_flags & IRAF_TRUSTED_ICMP)); 1598 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 1599 1600 ah_assoc = ira->ira_ipsec_ah_sa; 1601 esp_assoc = ira->ira_ipsec_esp_sa; 1602 1603 decaps = (ira->ira_flags & IRAF_IPSEC_DECAPS); 1604 1605 switch (ap->ipa_act.ipa_type) { 1606 case IPSEC_ACT_DISCARD: 1607 case IPSEC_ACT_REJECT: 1608 /* Should "fail hard" */ 1609 *counter = DROPPER(ipss, ipds_spd_explicit); 1610 *reason = "blocked by policy"; 1611 return (B_FALSE); 1612 1613 case IPSEC_ACT_BYPASS: 1614 case IPSEC_ACT_CLEAR: 1615 *counter = DROPPER(ipss, ipds_spd_got_secure); 1616 *reason = "expected clear, got protected"; 1617 return (B_FALSE); 1618 1619 case IPSEC_ACT_APPLY: 1620 ipp = &ap->ipa_act.ipa_apply; 1621 /* 1622 * As of now we do the simple checks of whether 1623 * the datagram has gone through the required IPSEC 1624 * protocol constraints or not. We might have more 1625 * in the future like sensitive levels, key bits, etc. 1626 * If it fails the constraints, check whether we would 1627 * have accepted this if it had come in clear. 1628 */ 1629 if (ipp->ipp_use_ah) { 1630 if (ah_assoc == NULL) { 1631 ret = ipsec_inbound_accept_clear(mp, ipha, 1632 ip6h); 1633 *counter = DROPPER(ipss, ipds_spd_got_clear); 1634 *reason = "unprotected not accepted"; 1635 break; 1636 } 1637 ASSERT(ah_assoc != NULL); 1638 ASSERT(ipp->ipp_auth_alg != 0); 1639 1640 if (ah_assoc->ipsa_auth_alg != 1641 ipp->ipp_auth_alg) { 1642 *counter = DROPPER(ipss, ipds_spd_bad_ahalg); 1643 *reason = "unacceptable ah alg"; 1644 ret = B_FALSE; 1645 break; 1646 } 1647 } else if (ah_assoc != NULL) { 1648 /* 1649 * Don't allow this. Check IPSEC NOTE above 1650 * ip_fanout_proto(). 1651 */ 1652 *counter = DROPPER(ipss, ipds_spd_got_ah); 1653 *reason = "unexpected AH"; 1654 ret = B_FALSE; 1655 break; 1656 } 1657 if (ipp->ipp_use_esp) { 1658 if (esp_assoc == NULL) { 1659 ret = ipsec_inbound_accept_clear(mp, ipha, 1660 ip6h); 1661 *counter = DROPPER(ipss, ipds_spd_got_clear); 1662 *reason = "unprotected not accepted"; 1663 break; 1664 } 1665 ASSERT(esp_assoc != NULL); 1666 ASSERT(ipp->ipp_encr_alg != 0); 1667 1668 if (esp_assoc->ipsa_encr_alg != 1669 ipp->ipp_encr_alg) { 1670 *counter = DROPPER(ipss, ipds_spd_bad_espealg); 1671 *reason = "unacceptable esp alg"; 1672 ret = B_FALSE; 1673 break; 1674 } 1675 /* 1676 * If the client does not need authentication, 1677 * we don't verify the alogrithm. 1678 */ 1679 if (ipp->ipp_use_espa) { 1680 if (esp_assoc->ipsa_auth_alg != 1681 ipp->ipp_esp_auth_alg) { 1682 *counter = DROPPER(ipss, 1683 ipds_spd_bad_espaalg); 1684 *reason = "unacceptable esp auth alg"; 1685 ret = B_FALSE; 1686 break; 1687 } 1688 } 1689 } else if (esp_assoc != NULL) { 1690 /* 1691 * Don't allow this. Check IPSEC NOTE above 1692 * ip_fanout_proto(). 1693 */ 1694 *counter = DROPPER(ipss, ipds_spd_got_esp); 1695 *reason = "unexpected ESP"; 1696 ret = B_FALSE; 1697 break; 1698 } 1699 if (ipp->ipp_use_se) { 1700 if (!decaps) { 1701 ret = ipsec_inbound_accept_clear(mp, ipha, 1702 ip6h); 1703 if (!ret) { 1704 /* XXX mutant? */ 1705 *counter = DROPPER(ipss, 1706 ipds_spd_bad_selfencap); 1707 *reason = "self encap not found"; 1708 break; 1709 } 1710 } 1711 } else if (decaps) { 1712 /* 1713 * XXX If the packet comes in tunneled and the 1714 * recipient does not expect it to be tunneled, it 1715 * is okay. But we drop to be consistent with the 1716 * other cases. 1717 */ 1718 *counter = DROPPER(ipss, ipds_spd_got_selfencap); 1719 *reason = "unexpected self encap"; 1720 ret = B_FALSE; 1721 break; 1722 } 1723 if (ira->ira_ipsec_action != NULL) { 1724 /* 1725 * This can happen if we do a double policy-check on 1726 * a packet 1727 * XXX XXX should fix this case! 1728 */ 1729 IPACT_REFRELE(ira->ira_ipsec_action); 1730 } 1731 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 1732 ASSERT(ira->ira_ipsec_action == NULL); 1733 IPACT_REFHOLD(ap); 1734 ira->ira_ipsec_action = ap; 1735 break; /* from switch */ 1736 } 1737 return (ret); 1738 } 1739 1740 static boolean_t 1741 spd_match_inbound_ids(ipsec_latch_t *ipl, ipsa_t *sa) 1742 { 1743 ASSERT(ipl->ipl_ids_latched == B_TRUE); 1744 return ipsid_equal(ipl->ipl_remote_cid, sa->ipsa_src_cid) && 1745 ipsid_equal(ipl->ipl_local_cid, sa->ipsa_dst_cid); 1746 } 1747 1748 /* 1749 * Takes a latched conn and an inbound packet and returns a unique_id suitable 1750 * for SA comparisons. Most of the time we will copy from the conn_t, but 1751 * there are cases when the conn_t is latched but it has wildcard selectors, 1752 * and then we need to fallback to scooping them out of the packet. 1753 * 1754 * Assume we'll never have 0 with a conn_t present, so use 0 as a failure. We 1755 * can get away with this because we only have non-zero ports/proto for 1756 * latched conn_ts. 1757 * 1758 * Ideal candidate for an "inline" keyword, as we're JUST convoluted enough 1759 * to not be a nice macro. 1760 */ 1761 static uint64_t 1762 conn_to_unique(conn_t *connp, mblk_t *data_mp, ipha_t *ipha, ip6_t *ip6h) 1763 { 1764 ipsec_selector_t sel; 1765 uint8_t ulp = connp->conn_proto; 1766 1767 ASSERT(connp->conn_latch_in_policy != NULL); 1768 1769 if ((ulp == IPPROTO_TCP || ulp == IPPROTO_UDP || ulp == IPPROTO_SCTP) && 1770 (connp->conn_fport == 0 || connp->conn_lport == 0)) { 1771 /* Slow path - we gotta grab from the packet. */ 1772 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, 1773 SEL_NONE) != SELRET_SUCCESS) { 1774 /* Failure -> have caller free packet with ENOMEM. */ 1775 return (0); 1776 } 1777 return (SA_UNIQUE_ID(sel.ips_remote_port, sel.ips_local_port, 1778 sel.ips_protocol, 0)); 1779 } 1780 1781 #ifdef DEBUG_NOT_UNTIL_6478464 1782 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, SEL_NONE) == 1783 SELRET_SUCCESS) { 1784 ASSERT(sel.ips_local_port == connp->conn_lport); 1785 ASSERT(sel.ips_remote_port == connp->conn_fport); 1786 ASSERT(sel.ips_protocol == connp->conn_proto); 1787 } 1788 ASSERT(connp->conn_proto != 0); 1789 #endif 1790 1791 return (SA_UNIQUE_ID(connp->conn_fport, connp->conn_lport, ulp, 0)); 1792 } 1793 1794 /* 1795 * Called to check policy on a latched connection. 1796 * Note that we don't dereference conn_latch or conn_ihere since the conn might 1797 * be closing. The caller passes a held ipsec_latch_t instead. 1798 */ 1799 static boolean_t 1800 ipsec_check_ipsecin_latch(ip_recv_attr_t *ira, mblk_t *mp, ipsec_latch_t *ipl, 1801 ipsec_action_t *ap, ipha_t *ipha, ip6_t *ip6h, const char **reason, 1802 kstat_named_t **counter, conn_t *connp, netstack_t *ns) 1803 { 1804 ipsec_stack_t *ipss = ns->netstack_ipsec; 1805 1806 ASSERT(ipl->ipl_ids_latched == B_TRUE); 1807 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 1808 1809 if (!(ira->ira_flags & IRAF_LOOPBACK)) { 1810 /* 1811 * Over loopback, there aren't real security associations, 1812 * so there are neither identities nor "unique" values 1813 * for us to check the packet against. 1814 */ 1815 if (ira->ira_ipsec_ah_sa != NULL) { 1816 if (!spd_match_inbound_ids(ipl, 1817 ira->ira_ipsec_ah_sa)) { 1818 *counter = DROPPER(ipss, ipds_spd_ah_badid); 1819 *reason = "AH identity mismatch"; 1820 return (B_FALSE); 1821 } 1822 } 1823 1824 if (ira->ira_ipsec_esp_sa != NULL) { 1825 if (!spd_match_inbound_ids(ipl, 1826 ira->ira_ipsec_esp_sa)) { 1827 *counter = DROPPER(ipss, ipds_spd_esp_badid); 1828 *reason = "ESP identity mismatch"; 1829 return (B_FALSE); 1830 } 1831 } 1832 1833 /* 1834 * Can fudge pkt_unique from connp because we're latched. 1835 * In DEBUG kernels (see conn_to_unique()'s implementation), 1836 * verify this even if it REALLY slows things down. 1837 */ 1838 if (!ipsec_check_ipsecin_unique(ira, reason, counter, 1839 conn_to_unique(connp, mp, ipha, ip6h), ns)) { 1840 return (B_FALSE); 1841 } 1842 } 1843 return (ipsec_check_ipsecin_action(ira, mp, ap, ipha, ip6h, reason, 1844 counter, ns)); 1845 } 1846 1847 /* 1848 * Check to see whether this secured datagram meets the policy 1849 * constraints specified in ipsp. 1850 * 1851 * Called from ipsec_check_global_policy, and ipsec_check_inbound_policy. 1852 * 1853 * Consumes a reference to ipsp. 1854 * Returns the mblk if ok. 1855 */ 1856 static mblk_t * 1857 ipsec_check_ipsecin_policy(mblk_t *data_mp, ipsec_policy_t *ipsp, 1858 ipha_t *ipha, ip6_t *ip6h, uint64_t pkt_unique, ip_recv_attr_t *ira, 1859 netstack_t *ns) 1860 { 1861 ipsec_action_t *ap; 1862 const char *reason = "no policy actions found"; 1863 ip_stack_t *ipst = ns->netstack_ip; 1864 ipsec_stack_t *ipss = ns->netstack_ipsec; 1865 kstat_named_t *counter; 1866 1867 counter = DROPPER(ipss, ipds_spd_got_secure); 1868 1869 ASSERT(ipsp != NULL); 1870 1871 ASSERT((ipha == NULL && ip6h != NULL) || 1872 (ip6h == NULL && ipha != NULL)); 1873 1874 if (ira->ira_flags & IRAF_LOOPBACK) 1875 return (ipsec_check_loopback_policy(data_mp, ira, ipsp)); 1876 1877 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 1878 1879 if (ira->ira_ipsec_action != NULL) { 1880 /* 1881 * this can happen if we do a double policy-check on a packet 1882 * Would be nice to be able to delete this test.. 1883 */ 1884 IPACT_REFRELE(ira->ira_ipsec_action); 1885 } 1886 ASSERT(ira->ira_ipsec_action == NULL); 1887 1888 if (!SA_IDS_MATCH(ira->ira_ipsec_ah_sa, ira->ira_ipsec_esp_sa)) { 1889 reason = "inbound AH and ESP identities differ"; 1890 counter = DROPPER(ipss, ipds_spd_ahesp_diffid); 1891 goto drop; 1892 } 1893 1894 if (!ipsec_check_ipsecin_unique(ira, &reason, &counter, pkt_unique, 1895 ns)) 1896 goto drop; 1897 1898 /* 1899 * Ok, now loop through the possible actions and see if any 1900 * of them work for us. 1901 */ 1902 1903 for (ap = ipsp->ipsp_act; ap != NULL; ap = ap->ipa_next) { 1904 if (ipsec_check_ipsecin_action(ira, data_mp, ap, 1905 ipha, ip6h, &reason, &counter, ns)) { 1906 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 1907 IPPOL_REFRELE(ipsp); 1908 return (data_mp); 1909 } 1910 } 1911 drop: 1912 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, SL_ERROR|SL_WARN|SL_CONSOLE, 1913 "ipsec inbound policy mismatch: %s, packet dropped\n", 1914 reason); 1915 IPPOL_REFRELE(ipsp); 1916 ASSERT(ira->ira_ipsec_action == NULL); 1917 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 1918 ip_drop_packet(data_mp, B_TRUE, NULL, counter, 1919 &ipss->ipsec_spd_dropper); 1920 return (NULL); 1921 } 1922 1923 /* 1924 * sleazy prefix-length-based compare. 1925 * another inlining candidate.. 1926 */ 1927 boolean_t 1928 ip_addr_match(uint8_t *addr1, int pfxlen, in6_addr_t *addr2p) 1929 { 1930 int offset = pfxlen>>3; 1931 int bitsleft = pfxlen & 7; 1932 uint8_t *addr2 = (uint8_t *)addr2p; 1933 1934 /* 1935 * and there was much evil.. 1936 * XXX should inline-expand the bcmp here and do this 32 bits 1937 * or 64 bits at a time.. 1938 */ 1939 return ((bcmp(addr1, addr2, offset) == 0) && 1940 ((bitsleft == 0) || 1941 (((addr1[offset] ^ addr2[offset]) & (0xff<<(8-bitsleft))) == 0))); 1942 } 1943 1944 static ipsec_policy_t * 1945 ipsec_find_policy_chain(ipsec_policy_t *best, ipsec_policy_t *chain, 1946 ipsec_selector_t *sel, boolean_t is_icmp_inv_acq) 1947 { 1948 ipsec_selkey_t *isel; 1949 ipsec_policy_t *p; 1950 int bpri = best ? best->ipsp_prio : 0; 1951 1952 for (p = chain; p != NULL; p = p->ipsp_hash.hash_next) { 1953 uint32_t valid; 1954 1955 if (p->ipsp_prio <= bpri) 1956 continue; 1957 isel = &p->ipsp_sel->ipsl_key; 1958 valid = isel->ipsl_valid; 1959 1960 if ((valid & IPSL_PROTOCOL) && 1961 (isel->ipsl_proto != sel->ips_protocol)) 1962 continue; 1963 1964 if ((valid & IPSL_REMOTE_ADDR) && 1965 !ip_addr_match((uint8_t *)&isel->ipsl_remote, 1966 isel->ipsl_remote_pfxlen, &sel->ips_remote_addr_v6)) 1967 continue; 1968 1969 if ((valid & IPSL_LOCAL_ADDR) && 1970 !ip_addr_match((uint8_t *)&isel->ipsl_local, 1971 isel->ipsl_local_pfxlen, &sel->ips_local_addr_v6)) 1972 continue; 1973 1974 if ((valid & IPSL_REMOTE_PORT) && 1975 isel->ipsl_rport != sel->ips_remote_port) 1976 continue; 1977 1978 if ((valid & IPSL_LOCAL_PORT) && 1979 isel->ipsl_lport != sel->ips_local_port) 1980 continue; 1981 1982 if (!is_icmp_inv_acq) { 1983 if ((valid & IPSL_ICMP_TYPE) && 1984 (isel->ipsl_icmp_type > sel->ips_icmp_type || 1985 isel->ipsl_icmp_type_end < sel->ips_icmp_type)) { 1986 continue; 1987 } 1988 1989 if ((valid & IPSL_ICMP_CODE) && 1990 (isel->ipsl_icmp_code > sel->ips_icmp_code || 1991 isel->ipsl_icmp_code_end < 1992 sel->ips_icmp_code)) { 1993 continue; 1994 } 1995 } else { 1996 /* 1997 * special case for icmp inverse acquire 1998 * we only want policies that aren't drop/pass 1999 */ 2000 if (p->ipsp_act->ipa_act.ipa_type != IPSEC_ACT_APPLY) 2001 continue; 2002 } 2003 2004 /* we matched all the packet-port-field selectors! */ 2005 best = p; 2006 bpri = p->ipsp_prio; 2007 } 2008 2009 return (best); 2010 } 2011 2012 /* 2013 * Try to find and return the best policy entry under a given policy 2014 * root for a given set of selectors; the first parameter "best" is 2015 * the current best policy so far. If "best" is non-null, we have a 2016 * reference to it. We return a reference to a policy; if that policy 2017 * is not the original "best", we need to release that reference 2018 * before returning. 2019 */ 2020 ipsec_policy_t * 2021 ipsec_find_policy_head(ipsec_policy_t *best, ipsec_policy_head_t *head, 2022 int direction, ipsec_selector_t *sel) 2023 { 2024 ipsec_policy_t *curbest; 2025 ipsec_policy_root_t *root; 2026 uint8_t is_icmp_inv_acq = sel->ips_is_icmp_inv_acq; 2027 int af = sel->ips_isv4 ? IPSEC_AF_V4 : IPSEC_AF_V6; 2028 2029 curbest = best; 2030 root = &head->iph_root[direction]; 2031 2032 #ifdef DEBUG 2033 if (is_icmp_inv_acq) { 2034 if (sel->ips_isv4) { 2035 if (sel->ips_protocol != IPPROTO_ICMP) { 2036 cmn_err(CE_WARN, "ipsec_find_policy_head:" 2037 " expecting icmp, got %d", 2038 sel->ips_protocol); 2039 } 2040 } else { 2041 if (sel->ips_protocol != IPPROTO_ICMPV6) { 2042 cmn_err(CE_WARN, "ipsec_find_policy_head:" 2043 " expecting icmpv6, got %d", 2044 sel->ips_protocol); 2045 } 2046 } 2047 } 2048 #endif 2049 2050 rw_enter(&head->iph_lock, RW_READER); 2051 2052 if (root->ipr_nchains > 0) { 2053 curbest = ipsec_find_policy_chain(curbest, 2054 root->ipr_hash[selector_hash(sel, root)].hash_head, sel, 2055 is_icmp_inv_acq); 2056 } 2057 curbest = ipsec_find_policy_chain(curbest, root->ipr_nonhash[af], sel, 2058 is_icmp_inv_acq); 2059 2060 /* 2061 * Adjust reference counts if we found anything new. 2062 */ 2063 if (curbest != best) { 2064 ASSERT(curbest != NULL); 2065 IPPOL_REFHOLD(curbest); 2066 2067 if (best != NULL) { 2068 IPPOL_REFRELE(best); 2069 } 2070 } 2071 2072 rw_exit(&head->iph_lock); 2073 2074 return (curbest); 2075 } 2076 2077 /* 2078 * Find the best system policy (either global or per-interface) which 2079 * applies to the given selector; look in all the relevant policy roots 2080 * to figure out which policy wins. 2081 * 2082 * Returns a reference to a policy; caller must release this 2083 * reference when done. 2084 */ 2085 ipsec_policy_t * 2086 ipsec_find_policy(int direction, const conn_t *connp, ipsec_selector_t *sel, 2087 netstack_t *ns) 2088 { 2089 ipsec_policy_t *p; 2090 ipsec_stack_t *ipss = ns->netstack_ipsec; 2091 2092 p = ipsec_find_policy_head(NULL, &ipss->ipsec_system_policy, 2093 direction, sel); 2094 if ((connp != NULL) && (connp->conn_policy != NULL)) { 2095 p = ipsec_find_policy_head(p, connp->conn_policy, 2096 direction, sel); 2097 } 2098 2099 return (p); 2100 } 2101 2102 /* 2103 * Check with global policy and see whether this inbound 2104 * packet meets the policy constraints. 2105 * 2106 * Locate appropriate policy from global policy, supplemented by the 2107 * conn's configured and/or cached policy if the conn is supplied. 2108 * 2109 * Dispatch to ipsec_check_ipsecin_policy if we have policy and an 2110 * encrypted packet to see if they match. 2111 * 2112 * Otherwise, see if the policy allows cleartext; if not, drop it on the 2113 * floor. 2114 */ 2115 mblk_t * 2116 ipsec_check_global_policy(mblk_t *data_mp, conn_t *connp, 2117 ipha_t *ipha, ip6_t *ip6h, ip_recv_attr_t *ira, netstack_t *ns) 2118 { 2119 ipsec_policy_t *p; 2120 ipsec_selector_t sel; 2121 boolean_t policy_present; 2122 kstat_named_t *counter; 2123 uint64_t pkt_unique; 2124 ip_stack_t *ipst = ns->netstack_ip; 2125 ipsec_stack_t *ipss = ns->netstack_ipsec; 2126 2127 sel.ips_is_icmp_inv_acq = 0; 2128 2129 ASSERT((ipha == NULL && ip6h != NULL) || 2130 (ip6h == NULL && ipha != NULL)); 2131 2132 if (ipha != NULL) 2133 policy_present = ipss->ipsec_inbound_v4_policy_present; 2134 else 2135 policy_present = ipss->ipsec_inbound_v6_policy_present; 2136 2137 if (!policy_present && connp == NULL) { 2138 /* 2139 * No global policy and no per-socket policy; 2140 * just pass it back (but we shouldn't get here in that case) 2141 */ 2142 return (data_mp); 2143 } 2144 2145 /* 2146 * If we have cached policy, use it. 2147 * Otherwise consult system policy. 2148 */ 2149 if ((connp != NULL) && (connp->conn_latch != NULL)) { 2150 p = connp->conn_latch_in_policy; 2151 if (p != NULL) { 2152 IPPOL_REFHOLD(p); 2153 } 2154 /* 2155 * Fudge sel for UNIQUE_ID setting below. 2156 */ 2157 pkt_unique = conn_to_unique(connp, data_mp, ipha, ip6h); 2158 } else { 2159 /* Initialize the ports in the selector */ 2160 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, 2161 SEL_NONE) == SELRET_NOMEM) { 2162 /* 2163 * Technically not a policy mismatch, but it is 2164 * an internal failure. 2165 */ 2166 ipsec_log_policy_failure(IPSEC_POLICY_MISMATCH, 2167 "ipsec_init_inbound_sel", ipha, ip6h, B_TRUE, ns); 2168 counter = DROPPER(ipss, ipds_spd_nomem); 2169 goto fail; 2170 } 2171 2172 /* 2173 * Find the policy which best applies. 2174 * 2175 * If we find global policy, we should look at both 2176 * local policy and global policy and see which is 2177 * stronger and match accordingly. 2178 * 2179 * If we don't find a global policy, check with 2180 * local policy alone. 2181 */ 2182 2183 p = ipsec_find_policy(IPSEC_TYPE_INBOUND, connp, &sel, ns); 2184 pkt_unique = SA_UNIQUE_ID(sel.ips_remote_port, 2185 sel.ips_local_port, sel.ips_protocol, 0); 2186 } 2187 2188 if (p == NULL) { 2189 if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) { 2190 /* 2191 * We have no policy; default to succeeding. 2192 * XXX paranoid system design doesn't do this. 2193 */ 2194 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2195 return (data_mp); 2196 } else { 2197 counter = DROPPER(ipss, ipds_spd_got_secure); 2198 ipsec_log_policy_failure(IPSEC_POLICY_NOT_NEEDED, 2199 "ipsec_check_global_policy", ipha, ip6h, B_TRUE, 2200 ns); 2201 goto fail; 2202 } 2203 } 2204 if (ira->ira_flags & IRAF_IPSEC_SECURE) { 2205 return (ipsec_check_ipsecin_policy(data_mp, p, ipha, ip6h, 2206 pkt_unique, ira, ns)); 2207 } 2208 if (p->ipsp_act->ipa_allow_clear) { 2209 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2210 IPPOL_REFRELE(p); 2211 return (data_mp); 2212 } 2213 IPPOL_REFRELE(p); 2214 /* 2215 * If we reach here, we will drop the packet because it failed the 2216 * global policy check because the packet was cleartext, and it 2217 * should not have been. 2218 */ 2219 ipsec_log_policy_failure(IPSEC_POLICY_MISMATCH, 2220 "ipsec_check_global_policy", ipha, ip6h, B_FALSE, ns); 2221 counter = DROPPER(ipss, ipds_spd_got_clear); 2222 2223 fail: 2224 ip_drop_packet(data_mp, B_TRUE, NULL, counter, 2225 &ipss->ipsec_spd_dropper); 2226 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2227 return (NULL); 2228 } 2229 2230 /* 2231 * We check whether an inbound datagram is a valid one 2232 * to accept in clear. If it is secure, it is the job 2233 * of IPSEC to log information appropriately if it 2234 * suspects that it may not be the real one. 2235 * 2236 * It is called only while fanning out to the ULP 2237 * where ULP accepts only secure data and the incoming 2238 * is clear. Usually we never accept clear datagrams in 2239 * such cases. ICMP is the only exception. 2240 * 2241 * NOTE : We don't call this function if the client (ULP) 2242 * is willing to accept things in clear. 2243 */ 2244 boolean_t 2245 ipsec_inbound_accept_clear(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h) 2246 { 2247 ushort_t iph_hdr_length; 2248 icmph_t *icmph; 2249 icmp6_t *icmp6; 2250 uint8_t *nexthdrp; 2251 2252 ASSERT((ipha != NULL && ip6h == NULL) || 2253 (ipha == NULL && ip6h != NULL)); 2254 2255 if (ip6h != NULL) { 2256 iph_hdr_length = ip_hdr_length_v6(mp, ip6h); 2257 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, 2258 &nexthdrp)) { 2259 return (B_FALSE); 2260 } 2261 if (*nexthdrp != IPPROTO_ICMPV6) 2262 return (B_FALSE); 2263 icmp6 = (icmp6_t *)(&mp->b_rptr[iph_hdr_length]); 2264 /* Match IPv6 ICMP policy as closely as IPv4 as possible. */ 2265 switch (icmp6->icmp6_type) { 2266 case ICMP6_PARAM_PROB: 2267 /* Corresponds to port/proto unreach in IPv4. */ 2268 case ICMP6_ECHO_REQUEST: 2269 /* Just like IPv4. */ 2270 return (B_FALSE); 2271 2272 case MLD_LISTENER_QUERY: 2273 case MLD_LISTENER_REPORT: 2274 case MLD_LISTENER_REDUCTION: 2275 /* 2276 * XXX Seperate NDD in IPv4 what about here? 2277 * Plus, mcast is important to ND. 2278 */ 2279 case ICMP6_DST_UNREACH: 2280 /* Corresponds to HOST/NET unreachable in IPv4. */ 2281 case ICMP6_PACKET_TOO_BIG: 2282 case ICMP6_ECHO_REPLY: 2283 /* These are trusted in IPv4. */ 2284 case ND_ROUTER_SOLICIT: 2285 case ND_ROUTER_ADVERT: 2286 case ND_NEIGHBOR_SOLICIT: 2287 case ND_NEIGHBOR_ADVERT: 2288 case ND_REDIRECT: 2289 /* Trust ND messages for now. */ 2290 case ICMP6_TIME_EXCEEDED: 2291 default: 2292 return (B_TRUE); 2293 } 2294 } else { 2295 /* 2296 * If it is not ICMP, fail this request. 2297 */ 2298 if (ipha->ipha_protocol != IPPROTO_ICMP) { 2299 #ifdef FRAGCACHE_DEBUG 2300 cmn_err(CE_WARN, "Dropping - ipha_proto = %d\n", 2301 ipha->ipha_protocol); 2302 #endif 2303 return (B_FALSE); 2304 } 2305 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2306 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 2307 /* 2308 * It is an insecure icmp message. Check to see whether we are 2309 * willing to accept this one. 2310 */ 2311 2312 switch (icmph->icmph_type) { 2313 case ICMP_ECHO_REPLY: 2314 case ICMP_TIME_STAMP_REPLY: 2315 case ICMP_INFO_REPLY: 2316 case ICMP_ROUTER_ADVERTISEMENT: 2317 /* 2318 * We should not encourage clear replies if this 2319 * client expects secure. If somebody is replying 2320 * in clear some mailicious user watching both the 2321 * request and reply, can do chosen-plain-text attacks. 2322 * With global policy we might be just expecting secure 2323 * but sending out clear. We don't know what the right 2324 * thing is. We can't do much here as we can't control 2325 * the sender here. Till we are sure of what to do, 2326 * accept them. 2327 */ 2328 return (B_TRUE); 2329 case ICMP_ECHO_REQUEST: 2330 case ICMP_TIME_STAMP_REQUEST: 2331 case ICMP_INFO_REQUEST: 2332 case ICMP_ADDRESS_MASK_REQUEST: 2333 case ICMP_ROUTER_SOLICITATION: 2334 case ICMP_ADDRESS_MASK_REPLY: 2335 /* 2336 * Don't accept this as somebody could be sending 2337 * us plain text to get encrypted data. If we reply, 2338 * it will lead to chosen plain text attack. 2339 */ 2340 return (B_FALSE); 2341 case ICMP_DEST_UNREACHABLE: 2342 switch (icmph->icmph_code) { 2343 case ICMP_FRAGMENTATION_NEEDED: 2344 /* 2345 * Be in sync with icmp_inbound, where we have 2346 * already set dce_pmtu 2347 */ 2348 #ifdef FRAGCACHE_DEBUG 2349 cmn_err(CE_WARN, "ICMP frag needed\n"); 2350 #endif 2351 return (B_TRUE); 2352 case ICMP_HOST_UNREACHABLE: 2353 case ICMP_NET_UNREACHABLE: 2354 /* 2355 * By accepting, we could reset a connection. 2356 * How do we solve the problem of some 2357 * intermediate router sending in-secure ICMP 2358 * messages ? 2359 */ 2360 return (B_TRUE); 2361 case ICMP_PORT_UNREACHABLE: 2362 case ICMP_PROTOCOL_UNREACHABLE: 2363 default : 2364 return (B_FALSE); 2365 } 2366 case ICMP_SOURCE_QUENCH: 2367 /* 2368 * If this is an attack, TCP will slow start 2369 * because of this. Is it very harmful ? 2370 */ 2371 return (B_TRUE); 2372 case ICMP_PARAM_PROBLEM: 2373 return (B_FALSE); 2374 case ICMP_TIME_EXCEEDED: 2375 return (B_TRUE); 2376 case ICMP_REDIRECT: 2377 return (B_FALSE); 2378 default : 2379 return (B_FALSE); 2380 } 2381 } 2382 } 2383 2384 void 2385 ipsec_latch_ids(ipsec_latch_t *ipl, ipsid_t *local, ipsid_t *remote) 2386 { 2387 mutex_enter(&ipl->ipl_lock); 2388 2389 if (ipl->ipl_ids_latched) { 2390 /* I lost, someone else got here before me */ 2391 mutex_exit(&ipl->ipl_lock); 2392 return; 2393 } 2394 2395 if (local != NULL) 2396 IPSID_REFHOLD(local); 2397 if (remote != NULL) 2398 IPSID_REFHOLD(remote); 2399 2400 ipl->ipl_local_cid = local; 2401 ipl->ipl_remote_cid = remote; 2402 ipl->ipl_ids_latched = B_TRUE; 2403 mutex_exit(&ipl->ipl_lock); 2404 } 2405 2406 void 2407 ipsec_latch_inbound(conn_t *connp, ip_recv_attr_t *ira) 2408 { 2409 ipsa_t *sa; 2410 ipsec_latch_t *ipl = connp->conn_latch; 2411 2412 if (!ipl->ipl_ids_latched) { 2413 ipsid_t *local = NULL; 2414 ipsid_t *remote = NULL; 2415 2416 if (!(ira->ira_flags & IRAF_LOOPBACK)) { 2417 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 2418 if (ira->ira_ipsec_esp_sa != NULL) 2419 sa = ira->ira_ipsec_esp_sa; 2420 else 2421 sa = ira->ira_ipsec_ah_sa; 2422 ASSERT(sa != NULL); 2423 local = sa->ipsa_dst_cid; 2424 remote = sa->ipsa_src_cid; 2425 } 2426 ipsec_latch_ids(ipl, local, remote); 2427 } 2428 if (ira->ira_flags & IRAF_IPSEC_SECURE) { 2429 if (connp->conn_latch_in_action != NULL) { 2430 /* 2431 * Previously cached action. This is probably 2432 * harmless, but in DEBUG kernels, check for 2433 * action equality. 2434 * 2435 * Preserve the existing action to preserve latch 2436 * invariance. 2437 */ 2438 ASSERT(connp->conn_latch_in_action == 2439 ira->ira_ipsec_action); 2440 return; 2441 } 2442 connp->conn_latch_in_action = ira->ira_ipsec_action; 2443 IPACT_REFHOLD(connp->conn_latch_in_action); 2444 } 2445 } 2446 2447 /* 2448 * Check whether the policy constraints are met either for an 2449 * inbound datagram; called from IP in numerous places. 2450 * 2451 * Note that this is not a chokepoint for inbound policy checks; 2452 * see also ipsec_check_ipsecin_latch() and ipsec_check_global_policy() 2453 */ 2454 mblk_t * 2455 ipsec_check_inbound_policy(mblk_t *mp, conn_t *connp, 2456 ipha_t *ipha, ip6_t *ip6h, ip_recv_attr_t *ira) 2457 { 2458 boolean_t ret; 2459 ipsec_latch_t *ipl; 2460 ipsec_action_t *ap; 2461 uint64_t unique_id; 2462 ipsec_stack_t *ipss; 2463 ip_stack_t *ipst; 2464 netstack_t *ns; 2465 ipsec_policy_head_t *policy_head; 2466 ipsec_policy_t *p = NULL; 2467 2468 ASSERT(connp != NULL); 2469 ns = connp->conn_netstack; 2470 ipss = ns->netstack_ipsec; 2471 ipst = ns->netstack_ip; 2472 2473 if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) { 2474 /* 2475 * This is the case where the incoming datagram is 2476 * cleartext and we need to see whether this client 2477 * would like to receive such untrustworthy things from 2478 * the wire. 2479 */ 2480 ASSERT(mp != NULL); 2481 2482 mutex_enter(&connp->conn_lock); 2483 if (connp->conn_state_flags & CONN_CONDEMNED) { 2484 mutex_exit(&connp->conn_lock); 2485 ip_drop_packet(mp, B_TRUE, NULL, 2486 DROPPER(ipss, ipds_spd_got_clear), 2487 &ipss->ipsec_spd_dropper); 2488 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2489 return (NULL); 2490 } 2491 if (connp->conn_latch != NULL) { 2492 /* Hold a reference in case the conn is closing */ 2493 p = connp->conn_latch_in_policy; 2494 if (p != NULL) 2495 IPPOL_REFHOLD(p); 2496 mutex_exit(&connp->conn_lock); 2497 /* 2498 * Policy is cached in the conn. 2499 */ 2500 if (p != NULL && !p->ipsp_act->ipa_allow_clear) { 2501 ret = ipsec_inbound_accept_clear(mp, 2502 ipha, ip6h); 2503 if (ret) { 2504 BUMP_MIB(&ipst->ips_ip_mib, 2505 ipsecInSucceeded); 2506 IPPOL_REFRELE(p); 2507 return (mp); 2508 } else { 2509 ipsec_log_policy_failure( 2510 IPSEC_POLICY_MISMATCH, 2511 "ipsec_check_inbound_policy", ipha, 2512 ip6h, B_FALSE, ns); 2513 ip_drop_packet(mp, B_TRUE, NULL, 2514 DROPPER(ipss, ipds_spd_got_clear), 2515 &ipss->ipsec_spd_dropper); 2516 BUMP_MIB(&ipst->ips_ip_mib, 2517 ipsecInFailed); 2518 IPPOL_REFRELE(p); 2519 return (NULL); 2520 } 2521 } else { 2522 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2523 if (p != NULL) 2524 IPPOL_REFRELE(p); 2525 return (mp); 2526 } 2527 } else { 2528 policy_head = connp->conn_policy; 2529 2530 /* Hold a reference in case the conn is closing */ 2531 if (policy_head != NULL) 2532 IPPH_REFHOLD(policy_head); 2533 mutex_exit(&connp->conn_lock); 2534 /* 2535 * As this is a non-hardbound connection we need 2536 * to look at both per-socket policy and global 2537 * policy. 2538 */ 2539 mp = ipsec_check_global_policy(mp, connp, 2540 ipha, ip6h, ira, ns); 2541 if (policy_head != NULL) 2542 IPPH_REFRELE(policy_head, ns); 2543 return (mp); 2544 } 2545 } 2546 2547 mutex_enter(&connp->conn_lock); 2548 /* Connection is closing */ 2549 if (connp->conn_state_flags & CONN_CONDEMNED) { 2550 mutex_exit(&connp->conn_lock); 2551 ip_drop_packet(mp, B_TRUE, NULL, 2552 DROPPER(ipss, ipds_spd_got_clear), 2553 &ipss->ipsec_spd_dropper); 2554 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2555 return (NULL); 2556 } 2557 2558 /* 2559 * Once a connection is latched it remains so for life, the conn_latch 2560 * pointer on the conn has not changed, simply initializing ipl here 2561 * as the earlier initialization was done only in the cleartext case. 2562 */ 2563 if ((ipl = connp->conn_latch) == NULL) { 2564 mblk_t *retmp; 2565 policy_head = connp->conn_policy; 2566 2567 /* Hold a reference in case the conn is closing */ 2568 if (policy_head != NULL) 2569 IPPH_REFHOLD(policy_head); 2570 mutex_exit(&connp->conn_lock); 2571 /* 2572 * We don't have policies cached in the conn 2573 * for this stream. So, look at the global 2574 * policy. It will check against conn or global 2575 * depending on whichever is stronger. 2576 */ 2577 retmp = ipsec_check_global_policy(mp, connp, 2578 ipha, ip6h, ira, ns); 2579 if (policy_head != NULL) 2580 IPPH_REFRELE(policy_head, ns); 2581 return (retmp); 2582 } 2583 2584 IPLATCH_REFHOLD(ipl); 2585 /* Hold reference on conn_latch_in_action in case conn is closing */ 2586 ap = connp->conn_latch_in_action; 2587 if (ap != NULL) 2588 IPACT_REFHOLD(ap); 2589 mutex_exit(&connp->conn_lock); 2590 2591 if (ap != NULL) { 2592 /* Policy is cached & latched; fast(er) path */ 2593 const char *reason; 2594 kstat_named_t *counter; 2595 2596 if (ipsec_check_ipsecin_latch(ira, mp, ipl, ap, 2597 ipha, ip6h, &reason, &counter, connp, ns)) { 2598 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2599 IPLATCH_REFRELE(ipl); 2600 IPACT_REFRELE(ap); 2601 return (mp); 2602 } 2603 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, 2604 SL_ERROR|SL_WARN|SL_CONSOLE, 2605 "ipsec inbound policy mismatch: %s, packet dropped\n", 2606 reason); 2607 ip_drop_packet(mp, B_TRUE, NULL, counter, 2608 &ipss->ipsec_spd_dropper); 2609 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2610 IPLATCH_REFRELE(ipl); 2611 IPACT_REFRELE(ap); 2612 return (NULL); 2613 } 2614 if ((p = connp->conn_latch_in_policy) == NULL) { 2615 ipsec_weird_null_inbound_policy++; 2616 IPLATCH_REFRELE(ipl); 2617 return (mp); 2618 } 2619 2620 unique_id = conn_to_unique(connp, mp, ipha, ip6h); 2621 IPPOL_REFHOLD(p); 2622 mp = ipsec_check_ipsecin_policy(mp, p, ipha, ip6h, unique_id, ira, ns); 2623 /* 2624 * NOTE: ipsecIn{Failed,Succeeeded} bumped by 2625 * ipsec_check_ipsecin_policy(). 2626 */ 2627 if (mp != NULL) 2628 ipsec_latch_inbound(connp, ira); 2629 IPLATCH_REFRELE(ipl); 2630 return (mp); 2631 } 2632 2633 /* 2634 * Handle all sorts of cases like tunnel-mode and ICMP. 2635 */ 2636 static int 2637 prepended_length(mblk_t *mp, uintptr_t hptr) 2638 { 2639 int rc = 0; 2640 2641 while (mp != NULL) { 2642 if (hptr >= (uintptr_t)mp->b_rptr && hptr < 2643 (uintptr_t)mp->b_wptr) { 2644 rc += (int)(hptr - (uintptr_t)mp->b_rptr); 2645 break; /* out of while loop */ 2646 } 2647 rc += (int)MBLKL(mp); 2648 mp = mp->b_cont; 2649 } 2650 2651 if (mp == NULL) { 2652 /* 2653 * IF (big IF) we make it here by naturally exiting the loop, 2654 * then ip6h isn't in the mblk chain "mp" at all. 2655 * 2656 * The only case where this happens is with a reversed IP 2657 * header that gets passed up by inbound ICMP processing. 2658 * This unfortunately triggers longstanding bug 6478464. For 2659 * now, just pass up 0 for the answer. 2660 */ 2661 #ifdef DEBUG_NOT_UNTIL_6478464 2662 ASSERT(mp != NULL); 2663 #endif 2664 rc = 0; 2665 } 2666 2667 return (rc); 2668 } 2669 2670 /* 2671 * Returns: 2672 * 2673 * SELRET_NOMEM --> msgpullup() needed to gather things failed. 2674 * SELRET_BADPKT --> If we're being called after tunnel-mode fragment 2675 * gathering, the initial fragment is too short for 2676 * useful data. Only returned if SEL_TUNNEL_FIRSTFRAG is 2677 * set. 2678 * SELRET_SUCCESS --> "sel" now has initialized IPsec selector data. 2679 * SELRET_TUNFRAG --> This is a fragment in a tunnel-mode packet. Caller 2680 * should put this packet in a fragment-gathering queue. 2681 * Only returned if SEL_TUNNEL_MODE and SEL_PORT_POLICY 2682 * is set. 2683 * 2684 * Note that ipha/ip6h can be in a different mblk (mp->b_cont) in the case 2685 * of tunneled packets. 2686 * Also, mp->b_rptr can be an ICMP error where ipha/ip6h is the packet in 2687 * error past the ICMP error. 2688 */ 2689 static selret_t 2690 ipsec_init_inbound_sel(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha, 2691 ip6_t *ip6h, uint8_t sel_flags) 2692 { 2693 uint16_t *ports; 2694 int outer_hdr_len = 0; /* For ICMP or tunnel-mode cases... */ 2695 ushort_t hdr_len; 2696 mblk_t *spare_mp = NULL; 2697 uint8_t *nexthdrp, *transportp; 2698 uint8_t nexthdr; 2699 uint8_t icmp_proto; 2700 ip_pkt_t ipp; 2701 boolean_t port_policy_present = (sel_flags & SEL_PORT_POLICY); 2702 boolean_t is_icmp = (sel_flags & SEL_IS_ICMP); 2703 boolean_t tunnel_mode = (sel_flags & SEL_TUNNEL_MODE); 2704 boolean_t post_frag = (sel_flags & SEL_POST_FRAG); 2705 2706 ASSERT((ipha == NULL && ip6h != NULL) || 2707 (ipha != NULL && ip6h == NULL)); 2708 2709 if (ip6h != NULL) { 2710 outer_hdr_len = prepended_length(mp, (uintptr_t)ip6h); 2711 nexthdr = ip6h->ip6_nxt; 2712 icmp_proto = IPPROTO_ICMPV6; 2713 sel->ips_isv4 = B_FALSE; 2714 sel->ips_local_addr_v6 = ip6h->ip6_dst; 2715 sel->ips_remote_addr_v6 = ip6h->ip6_src; 2716 2717 bzero(&ipp, sizeof (ipp)); 2718 2719 switch (nexthdr) { 2720 case IPPROTO_HOPOPTS: 2721 case IPPROTO_ROUTING: 2722 case IPPROTO_DSTOPTS: 2723 case IPPROTO_FRAGMENT: 2724 /* 2725 * Use ip_hdr_length_nexthdr_v6(). And have a spare 2726 * mblk that's contiguous to feed it 2727 */ 2728 if ((spare_mp = msgpullup(mp, -1)) == NULL) 2729 return (SELRET_NOMEM); 2730 if (!ip_hdr_length_nexthdr_v6(spare_mp, 2731 (ip6_t *)(spare_mp->b_rptr + outer_hdr_len), 2732 &hdr_len, &nexthdrp)) { 2733 /* Malformed packet - caller frees. */ 2734 ipsec_freemsg_chain(spare_mp); 2735 return (SELRET_BADPKT); 2736 } 2737 /* Repopulate now that we have the whole packet */ 2738 ip6h = (ip6_t *)(spare_mp->b_rptr + outer_hdr_len); 2739 (void) ip_find_hdr_v6(spare_mp, ip6h, B_FALSE, &ipp, 2740 NULL); 2741 nexthdr = *nexthdrp; 2742 /* We can just extract based on hdr_len now. */ 2743 break; 2744 default: 2745 (void) ip_find_hdr_v6(mp, ip6h, B_FALSE, &ipp, NULL); 2746 hdr_len = IPV6_HDR_LEN; 2747 break; 2748 } 2749 if (port_policy_present && IS_V6_FRAGMENT(ipp) && !is_icmp) { 2750 /* IPv6 Fragment */ 2751 ipsec_freemsg_chain(spare_mp); 2752 return (SELRET_TUNFRAG); 2753 } 2754 transportp = (uint8_t *)ip6h + hdr_len; 2755 } else { 2756 outer_hdr_len = prepended_length(mp, (uintptr_t)ipha); 2757 icmp_proto = IPPROTO_ICMP; 2758 sel->ips_isv4 = B_TRUE; 2759 sel->ips_local_addr_v4 = ipha->ipha_dst; 2760 sel->ips_remote_addr_v4 = ipha->ipha_src; 2761 nexthdr = ipha->ipha_protocol; 2762 hdr_len = IPH_HDR_LENGTH(ipha); 2763 2764 if (port_policy_present && 2765 IS_V4_FRAGMENT(ipha->ipha_fragment_offset_and_flags) && 2766 !is_icmp) { 2767 /* IPv4 Fragment */ 2768 ipsec_freemsg_chain(spare_mp); 2769 return (SELRET_TUNFRAG); 2770 } 2771 transportp = (uint8_t *)ipha + hdr_len; 2772 } 2773 sel->ips_protocol = nexthdr; 2774 2775 if ((nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP && 2776 nexthdr != IPPROTO_SCTP && nexthdr != icmp_proto) || 2777 (!port_policy_present && !post_frag && tunnel_mode)) { 2778 sel->ips_remote_port = sel->ips_local_port = 0; 2779 ipsec_freemsg_chain(spare_mp); 2780 return (SELRET_SUCCESS); 2781 } 2782 2783 if (transportp + 4 > mp->b_wptr) { 2784 /* If we didn't pullup a copy already, do so now. */ 2785 /* 2786 * XXX performance, will upper-layers frequently split TCP/UDP 2787 * apart from IP or options? If so, perhaps we should revisit 2788 * the spare_mp strategy. 2789 */ 2790 ipsec_hdr_pullup_needed++; 2791 if (spare_mp == NULL && 2792 (spare_mp = msgpullup(mp, -1)) == NULL) { 2793 return (SELRET_NOMEM); 2794 } 2795 transportp = &spare_mp->b_rptr[hdr_len + outer_hdr_len]; 2796 } 2797 2798 if (nexthdr == icmp_proto) { 2799 sel->ips_icmp_type = *transportp++; 2800 sel->ips_icmp_code = *transportp; 2801 sel->ips_remote_port = sel->ips_local_port = 0; 2802 } else { 2803 ports = (uint16_t *)transportp; 2804 sel->ips_remote_port = *ports++; 2805 sel->ips_local_port = *ports; 2806 } 2807 ipsec_freemsg_chain(spare_mp); 2808 return (SELRET_SUCCESS); 2809 } 2810 2811 /* 2812 * This is called with a b_next chain of messages from the fragcache code, 2813 * hence it needs to discard a chain on error. 2814 */ 2815 static boolean_t 2816 ipsec_init_outbound_ports(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha, 2817 ip6_t *ip6h, int outer_hdr_len, ipsec_stack_t *ipss) 2818 { 2819 /* 2820 * XXX cut&paste shared with ipsec_init_inbound_sel 2821 */ 2822 uint16_t *ports; 2823 ushort_t hdr_len; 2824 mblk_t *spare_mp = NULL; 2825 uint8_t *nexthdrp; 2826 uint8_t nexthdr; 2827 uint8_t *typecode; 2828 uint8_t check_proto; 2829 2830 ASSERT((ipha == NULL && ip6h != NULL) || 2831 (ipha != NULL && ip6h == NULL)); 2832 2833 if (ip6h != NULL) { 2834 check_proto = IPPROTO_ICMPV6; 2835 nexthdr = ip6h->ip6_nxt; 2836 switch (nexthdr) { 2837 case IPPROTO_HOPOPTS: 2838 case IPPROTO_ROUTING: 2839 case IPPROTO_DSTOPTS: 2840 case IPPROTO_FRAGMENT: 2841 /* 2842 * Use ip_hdr_length_nexthdr_v6(). And have a spare 2843 * mblk that's contiguous to feed it 2844 */ 2845 spare_mp = msgpullup(mp, -1); 2846 if (spare_mp == NULL || 2847 !ip_hdr_length_nexthdr_v6(spare_mp, 2848 (ip6_t *)(spare_mp->b_rptr + outer_hdr_len), 2849 &hdr_len, &nexthdrp)) { 2850 /* Always works, even if NULL. */ 2851 ipsec_freemsg_chain(spare_mp); 2852 ip_drop_packet_chain(mp, B_FALSE, NULL, 2853 DROPPER(ipss, ipds_spd_nomem), 2854 &ipss->ipsec_spd_dropper); 2855 return (B_FALSE); 2856 } else { 2857 nexthdr = *nexthdrp; 2858 /* We can just extract based on hdr_len now. */ 2859 } 2860 break; 2861 default: 2862 hdr_len = IPV6_HDR_LEN; 2863 break; 2864 } 2865 } else { 2866 check_proto = IPPROTO_ICMP; 2867 hdr_len = IPH_HDR_LENGTH(ipha); 2868 nexthdr = ipha->ipha_protocol; 2869 } 2870 2871 sel->ips_protocol = nexthdr; 2872 if (nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP && 2873 nexthdr != IPPROTO_SCTP && nexthdr != check_proto) { 2874 sel->ips_local_port = sel->ips_remote_port = 0; 2875 ipsec_freemsg_chain(spare_mp); /* Always works, even if NULL */ 2876 return (B_TRUE); 2877 } 2878 2879 if (&mp->b_rptr[hdr_len] + 4 + outer_hdr_len > mp->b_wptr) { 2880 /* If we didn't pullup a copy already, do so now. */ 2881 /* 2882 * XXX performance, will upper-layers frequently split TCP/UDP 2883 * apart from IP or options? If so, perhaps we should revisit 2884 * the spare_mp strategy. 2885 * 2886 * XXX should this be msgpullup(mp, hdr_len+4) ??? 2887 */ 2888 if (spare_mp == NULL && 2889 (spare_mp = msgpullup(mp, -1)) == NULL) { 2890 ip_drop_packet_chain(mp, B_FALSE, NULL, 2891 DROPPER(ipss, ipds_spd_nomem), 2892 &ipss->ipsec_spd_dropper); 2893 return (B_FALSE); 2894 } 2895 ports = (uint16_t *)&spare_mp->b_rptr[hdr_len + outer_hdr_len]; 2896 } else { 2897 ports = (uint16_t *)&mp->b_rptr[hdr_len + outer_hdr_len]; 2898 } 2899 2900 if (nexthdr == check_proto) { 2901 typecode = (uint8_t *)ports; 2902 sel->ips_icmp_type = *typecode++; 2903 sel->ips_icmp_code = *typecode; 2904 sel->ips_remote_port = sel->ips_local_port = 0; 2905 } else { 2906 sel->ips_local_port = *ports++; 2907 sel->ips_remote_port = *ports; 2908 } 2909 ipsec_freemsg_chain(spare_mp); /* Always works, even if NULL */ 2910 return (B_TRUE); 2911 } 2912 2913 /* 2914 * Prepend an mblk with a ipsec_crypto_t to the message chain. 2915 * Frees the argument and returns NULL should the allocation fail. 2916 * Returns the pointer to the crypto data part. 2917 */ 2918 mblk_t * 2919 ipsec_add_crypto_data(mblk_t *data_mp, ipsec_crypto_t **icp) 2920 { 2921 mblk_t *mp; 2922 2923 mp = allocb(sizeof (ipsec_crypto_t), BPRI_MED); 2924 if (mp == NULL) { 2925 freemsg(data_mp); 2926 return (NULL); 2927 } 2928 bzero(mp->b_rptr, sizeof (ipsec_crypto_t)); 2929 mp->b_wptr += sizeof (ipsec_crypto_t); 2930 mp->b_cont = data_mp; 2931 mp->b_datap->db_type = M_EVENT; /* For ASSERT */ 2932 *icp = (ipsec_crypto_t *)mp->b_rptr; 2933 return (mp); 2934 } 2935 2936 /* 2937 * Remove what was prepended above. Return b_cont and a pointer to the 2938 * crypto data. 2939 * The caller must call ipsec_free_crypto_data for mblk once it is done 2940 * with the crypto data. 2941 */ 2942 mblk_t * 2943 ipsec_remove_crypto_data(mblk_t *crypto_mp, ipsec_crypto_t **icp) 2944 { 2945 ASSERT(crypto_mp->b_datap->db_type == M_EVENT); 2946 ASSERT(MBLKL(crypto_mp) == sizeof (ipsec_crypto_t)); 2947 2948 *icp = (ipsec_crypto_t *)crypto_mp->b_rptr; 2949 return (crypto_mp->b_cont); 2950 } 2951 2952 /* 2953 * Free what was prepended above. Return b_cont. 2954 */ 2955 mblk_t * 2956 ipsec_free_crypto_data(mblk_t *crypto_mp) 2957 { 2958 mblk_t *mp; 2959 2960 ASSERT(crypto_mp->b_datap->db_type == M_EVENT); 2961 ASSERT(MBLKL(crypto_mp) == sizeof (ipsec_crypto_t)); 2962 2963 mp = crypto_mp->b_cont; 2964 freeb(crypto_mp); 2965 return (mp); 2966 } 2967 2968 /* 2969 * Create an ipsec_action_t based on the way an inbound packet was protected. 2970 * Used to reflect traffic back to a sender. 2971 * 2972 * We don't bother interning the action into the hash table. 2973 */ 2974 ipsec_action_t * 2975 ipsec_in_to_out_action(ip_recv_attr_t *ira) 2976 { 2977 ipsa_t *ah_assoc, *esp_assoc; 2978 uint_t auth_alg = 0, encr_alg = 0, espa_alg = 0; 2979 ipsec_action_t *ap; 2980 boolean_t unique; 2981 2982 ap = kmem_cache_alloc(ipsec_action_cache, KM_NOSLEEP); 2983 2984 if (ap == NULL) 2985 return (NULL); 2986 2987 bzero(ap, sizeof (*ap)); 2988 HASH_NULL(ap, ipa_hash); 2989 ap->ipa_next = NULL; 2990 ap->ipa_refs = 1; 2991 2992 /* 2993 * Get the algorithms that were used for this packet. 2994 */ 2995 ap->ipa_act.ipa_type = IPSEC_ACT_APPLY; 2996 ap->ipa_act.ipa_log = 0; 2997 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 2998 2999 ah_assoc = ira->ira_ipsec_ah_sa; 3000 ap->ipa_act.ipa_apply.ipp_use_ah = (ah_assoc != NULL); 3001 3002 esp_assoc = ira->ira_ipsec_esp_sa; 3003 ap->ipa_act.ipa_apply.ipp_use_esp = (esp_assoc != NULL); 3004 3005 if (esp_assoc != NULL) { 3006 encr_alg = esp_assoc->ipsa_encr_alg; 3007 espa_alg = esp_assoc->ipsa_auth_alg; 3008 ap->ipa_act.ipa_apply.ipp_use_espa = (espa_alg != 0); 3009 } 3010 if (ah_assoc != NULL) 3011 auth_alg = ah_assoc->ipsa_auth_alg; 3012 3013 ap->ipa_act.ipa_apply.ipp_encr_alg = (uint8_t)encr_alg; 3014 ap->ipa_act.ipa_apply.ipp_auth_alg = (uint8_t)auth_alg; 3015 ap->ipa_act.ipa_apply.ipp_esp_auth_alg = (uint8_t)espa_alg; 3016 ap->ipa_act.ipa_apply.ipp_use_se = 3017 !!(ira->ira_flags & IRAF_IPSEC_DECAPS); 3018 unique = B_FALSE; 3019 3020 if (esp_assoc != NULL) { 3021 ap->ipa_act.ipa_apply.ipp_espa_minbits = 3022 esp_assoc->ipsa_authkeybits; 3023 ap->ipa_act.ipa_apply.ipp_espa_maxbits = 3024 esp_assoc->ipsa_authkeybits; 3025 ap->ipa_act.ipa_apply.ipp_espe_minbits = 3026 esp_assoc->ipsa_encrkeybits; 3027 ap->ipa_act.ipa_apply.ipp_espe_maxbits = 3028 esp_assoc->ipsa_encrkeybits; 3029 ap->ipa_act.ipa_apply.ipp_km_proto = esp_assoc->ipsa_kmp; 3030 ap->ipa_act.ipa_apply.ipp_km_cookie = esp_assoc->ipsa_kmc; 3031 if (esp_assoc->ipsa_flags & IPSA_F_UNIQUE) 3032 unique = B_TRUE; 3033 } 3034 if (ah_assoc != NULL) { 3035 ap->ipa_act.ipa_apply.ipp_ah_minbits = 3036 ah_assoc->ipsa_authkeybits; 3037 ap->ipa_act.ipa_apply.ipp_ah_maxbits = 3038 ah_assoc->ipsa_authkeybits; 3039 ap->ipa_act.ipa_apply.ipp_km_proto = ah_assoc->ipsa_kmp; 3040 ap->ipa_act.ipa_apply.ipp_km_cookie = ah_assoc->ipsa_kmc; 3041 if (ah_assoc->ipsa_flags & IPSA_F_UNIQUE) 3042 unique = B_TRUE; 3043 } 3044 ap->ipa_act.ipa_apply.ipp_use_unique = unique; 3045 ap->ipa_want_unique = unique; 3046 ap->ipa_allow_clear = B_FALSE; 3047 ap->ipa_want_se = !!(ira->ira_flags & IRAF_IPSEC_DECAPS); 3048 ap->ipa_want_ah = (ah_assoc != NULL); 3049 ap->ipa_want_esp = (esp_assoc != NULL); 3050 3051 ap->ipa_ovhd = ipsec_act_ovhd(&ap->ipa_act); 3052 3053 ap->ipa_act.ipa_apply.ipp_replay_depth = 0; /* don't care */ 3054 3055 return (ap); 3056 } 3057 3058 3059 /* 3060 * Compute the worst-case amount of extra space required by an action. 3061 * Note that, because of the ESP considerations listed below, this is 3062 * actually not the same as the best-case reduction in the MTU; in the 3063 * future, we should pass additional information to this function to 3064 * allow the actual MTU impact to be computed. 3065 * 3066 * AH: Revisit this if we implement algorithms with 3067 * a verifier size of more than 12 bytes. 3068 * 3069 * ESP: A more exact but more messy computation would take into 3070 * account the interaction between the cipher block size and the 3071 * effective MTU, yielding the inner payload size which reflects a 3072 * packet with *minimum* ESP padding.. 3073 */ 3074 int32_t 3075 ipsec_act_ovhd(const ipsec_act_t *act) 3076 { 3077 int32_t overhead = 0; 3078 3079 if (act->ipa_type == IPSEC_ACT_APPLY) { 3080 const ipsec_prot_t *ipp = &act->ipa_apply; 3081 3082 if (ipp->ipp_use_ah) 3083 overhead += IPSEC_MAX_AH_HDR_SIZE; 3084 if (ipp->ipp_use_esp) { 3085 overhead += IPSEC_MAX_ESP_HDR_SIZE; 3086 overhead += sizeof (struct udphdr); 3087 } 3088 if (ipp->ipp_use_se) 3089 overhead += IP_SIMPLE_HDR_LENGTH; 3090 } 3091 return (overhead); 3092 } 3093 3094 /* 3095 * This hash function is used only when creating policies and thus is not 3096 * performance-critical for packet flows. 3097 * 3098 * Future work: canonicalize the structures hashed with this (i.e., 3099 * zeroize padding) so the hash works correctly. 3100 */ 3101 /* ARGSUSED */ 3102 static uint32_t 3103 policy_hash(int size, const void *start, const void *end) 3104 { 3105 return (0); 3106 } 3107 3108 3109 /* 3110 * Hash function macros for each address type. 3111 * 3112 * The IPV6 hash function assumes that the low order 32-bits of the 3113 * address (typically containing the low order 24 bits of the mac 3114 * address) are reasonably well-distributed. Revisit this if we run 3115 * into trouble from lots of collisions on ::1 addresses and the like 3116 * (seems unlikely). 3117 */ 3118 #define IPSEC_IPV4_HASH(a, n) ((a) % (n)) 3119 #define IPSEC_IPV6_HASH(a, n) (((a).s6_addr32[3]) % (n)) 3120 3121 /* 3122 * These two hash functions should produce coordinated values 3123 * but have slightly different roles. 3124 */ 3125 static uint32_t 3126 selkey_hash(const ipsec_selkey_t *selkey, netstack_t *ns) 3127 { 3128 uint32_t valid = selkey->ipsl_valid; 3129 ipsec_stack_t *ipss = ns->netstack_ipsec; 3130 3131 if (!(valid & IPSL_REMOTE_ADDR)) 3132 return (IPSEC_SEL_NOHASH); 3133 3134 if (valid & IPSL_IPV4) { 3135 if (selkey->ipsl_remote_pfxlen == 32) { 3136 return (IPSEC_IPV4_HASH(selkey->ipsl_remote.ipsad_v4, 3137 ipss->ipsec_spd_hashsize)); 3138 } 3139 } 3140 if (valid & IPSL_IPV6) { 3141 if (selkey->ipsl_remote_pfxlen == 128) { 3142 return (IPSEC_IPV6_HASH(selkey->ipsl_remote.ipsad_v6, 3143 ipss->ipsec_spd_hashsize)); 3144 } 3145 } 3146 return (IPSEC_SEL_NOHASH); 3147 } 3148 3149 static uint32_t 3150 selector_hash(ipsec_selector_t *sel, ipsec_policy_root_t *root) 3151 { 3152 if (sel->ips_isv4) { 3153 return (IPSEC_IPV4_HASH(sel->ips_remote_addr_v4, 3154 root->ipr_nchains)); 3155 } 3156 return (IPSEC_IPV6_HASH(sel->ips_remote_addr_v6, root->ipr_nchains)); 3157 } 3158 3159 /* 3160 * Intern actions into the action hash table. 3161 */ 3162 ipsec_action_t * 3163 ipsec_act_find(const ipsec_act_t *a, int n, netstack_t *ns) 3164 { 3165 int i; 3166 uint32_t hval; 3167 ipsec_action_t *ap; 3168 ipsec_action_t *prev = NULL; 3169 int32_t overhead, maxovhd = 0; 3170 boolean_t allow_clear = B_FALSE; 3171 boolean_t want_ah = B_FALSE; 3172 boolean_t want_esp = B_FALSE; 3173 boolean_t want_se = B_FALSE; 3174 boolean_t want_unique = B_FALSE; 3175 ipsec_stack_t *ipss = ns->netstack_ipsec; 3176 3177 /* 3178 * TODO: should canonicalize a[] (i.e., zeroize any padding) 3179 * so we can use a non-trivial policy_hash function. 3180 */ 3181 ap = NULL; 3182 for (i = n-1; i >= 0; i--) { 3183 hval = policy_hash(IPSEC_ACTION_HASH_SIZE, &a[i], &a[n]); 3184 3185 HASH_LOCK(ipss->ipsec_action_hash, hval); 3186 3187 for (HASH_ITERATE(ap, ipa_hash, 3188 ipss->ipsec_action_hash, hval)) { 3189 if (bcmp(&ap->ipa_act, &a[i], sizeof (*a)) != 0) 3190 continue; 3191 if (ap->ipa_next != prev) 3192 continue; 3193 break; 3194 } 3195 if (ap != NULL) { 3196 HASH_UNLOCK(ipss->ipsec_action_hash, hval); 3197 prev = ap; 3198 continue; 3199 } 3200 /* 3201 * need to allocate a new one.. 3202 */ 3203 ap = kmem_cache_alloc(ipsec_action_cache, KM_NOSLEEP); 3204 if (ap == NULL) { 3205 HASH_UNLOCK(ipss->ipsec_action_hash, hval); 3206 if (prev != NULL) 3207 ipsec_action_free(prev); 3208 return (NULL); 3209 } 3210 HASH_INSERT(ap, ipa_hash, ipss->ipsec_action_hash, hval); 3211 3212 ap->ipa_next = prev; 3213 ap->ipa_act = a[i]; 3214 3215 overhead = ipsec_act_ovhd(&a[i]); 3216 if (maxovhd < overhead) 3217 maxovhd = overhead; 3218 3219 if ((a[i].ipa_type == IPSEC_ACT_BYPASS) || 3220 (a[i].ipa_type == IPSEC_ACT_CLEAR)) 3221 allow_clear = B_TRUE; 3222 if (a[i].ipa_type == IPSEC_ACT_APPLY) { 3223 const ipsec_prot_t *ipp = &a[i].ipa_apply; 3224 3225 ASSERT(ipp->ipp_use_ah || ipp->ipp_use_esp); 3226 want_ah |= ipp->ipp_use_ah; 3227 want_esp |= ipp->ipp_use_esp; 3228 want_se |= ipp->ipp_use_se; 3229 want_unique |= ipp->ipp_use_unique; 3230 } 3231 ap->ipa_allow_clear = allow_clear; 3232 ap->ipa_want_ah = want_ah; 3233 ap->ipa_want_esp = want_esp; 3234 ap->ipa_want_se = want_se; 3235 ap->ipa_want_unique = want_unique; 3236 ap->ipa_refs = 1; /* from the hash table */ 3237 ap->ipa_ovhd = maxovhd; 3238 if (prev) 3239 prev->ipa_refs++; 3240 prev = ap; 3241 HASH_UNLOCK(ipss->ipsec_action_hash, hval); 3242 } 3243 3244 ap->ipa_refs++; /* caller's reference */ 3245 3246 return (ap); 3247 } 3248 3249 /* 3250 * Called when refcount goes to 0, indicating that all references to this 3251 * node are gone. 3252 * 3253 * This does not unchain the action from the hash table. 3254 */ 3255 void 3256 ipsec_action_free(ipsec_action_t *ap) 3257 { 3258 for (;;) { 3259 ipsec_action_t *np = ap->ipa_next; 3260 ASSERT(ap->ipa_refs == 0); 3261 ASSERT(ap->ipa_hash.hash_pp == NULL); 3262 kmem_cache_free(ipsec_action_cache, ap); 3263 ap = np; 3264 /* Inlined IPACT_REFRELE -- avoid recursion */ 3265 if (ap == NULL) 3266 break; 3267 membar_exit(); 3268 if (atomic_dec_32_nv(&(ap)->ipa_refs) != 0) 3269 break; 3270 /* End inlined IPACT_REFRELE */ 3271 } 3272 } 3273 3274 /* 3275 * Called when the action hash table goes away. 3276 * 3277 * The actions can be queued on an mblk with ipsec_in or 3278 * ipsec_out, hence the actions might still be around. 3279 * But we decrement ipa_refs here since we no longer have 3280 * a reference to the action from the hash table. 3281 */ 3282 static void 3283 ipsec_action_free_table(ipsec_action_t *ap) 3284 { 3285 while (ap != NULL) { 3286 ipsec_action_t *np = ap->ipa_next; 3287 3288 /* FIXME: remove? */ 3289 (void) printf("ipsec_action_free_table(%p) ref %d\n", 3290 (void *)ap, ap->ipa_refs); 3291 ASSERT(ap->ipa_refs > 0); 3292 IPACT_REFRELE(ap); 3293 ap = np; 3294 } 3295 } 3296 3297 /* 3298 * Need to walk all stack instances since the reclaim function 3299 * is global for all instances 3300 */ 3301 /* ARGSUSED */ 3302 static void 3303 ipsec_action_reclaim(void *arg) 3304 { 3305 netstack_handle_t nh; 3306 netstack_t *ns; 3307 ipsec_stack_t *ipss; 3308 3309 netstack_next_init(&nh); 3310 while ((ns = netstack_next(&nh)) != NULL) { 3311 /* 3312 * netstack_next() can return a netstack_t with a NULL 3313 * netstack_ipsec at boot time. 3314 */ 3315 if ((ipss = ns->netstack_ipsec) == NULL) { 3316 netstack_rele(ns); 3317 continue; 3318 } 3319 ipsec_action_reclaim_stack(ipss); 3320 netstack_rele(ns); 3321 } 3322 netstack_next_fini(&nh); 3323 } 3324 3325 /* 3326 * Periodically sweep action hash table for actions with refcount==1, and 3327 * nuke them. We cannot do this "on demand" (i.e., from IPACT_REFRELE) 3328 * because we can't close the race between another thread finding the action 3329 * in the hash table without holding the bucket lock during IPACT_REFRELE. 3330 * Instead, we run this function sporadically to clean up after ourselves; 3331 * we also set it as the "reclaim" function for the action kmem_cache. 3332 * 3333 * Note that it may take several passes of ipsec_action_gc() to free all 3334 * "stale" actions. 3335 */ 3336 static void 3337 ipsec_action_reclaim_stack(ipsec_stack_t *ipss) 3338 { 3339 int i; 3340 3341 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) { 3342 ipsec_action_t *ap, *np; 3343 3344 /* skip the lock if nobody home */ 3345 if (ipss->ipsec_action_hash[i].hash_head == NULL) 3346 continue; 3347 3348 HASH_LOCK(ipss->ipsec_action_hash, i); 3349 for (ap = ipss->ipsec_action_hash[i].hash_head; 3350 ap != NULL; ap = np) { 3351 ASSERT(ap->ipa_refs > 0); 3352 np = ap->ipa_hash.hash_next; 3353 if (ap->ipa_refs > 1) 3354 continue; 3355 HASH_UNCHAIN(ap, ipa_hash, 3356 ipss->ipsec_action_hash, i); 3357 IPACT_REFRELE(ap); 3358 } 3359 HASH_UNLOCK(ipss->ipsec_action_hash, i); 3360 } 3361 } 3362 3363 /* 3364 * Intern a selector set into the selector set hash table. 3365 * This is simpler than the actions case.. 3366 */ 3367 static ipsec_sel_t * 3368 ipsec_find_sel(ipsec_selkey_t *selkey, netstack_t *ns) 3369 { 3370 ipsec_sel_t *sp; 3371 uint32_t hval, bucket; 3372 ipsec_stack_t *ipss = ns->netstack_ipsec; 3373 3374 /* 3375 * Exactly one AF bit should be set in selkey. 3376 */ 3377 ASSERT(!(selkey->ipsl_valid & IPSL_IPV4) ^ 3378 !(selkey->ipsl_valid & IPSL_IPV6)); 3379 3380 hval = selkey_hash(selkey, ns); 3381 /* Set pol_hval to uninitialized until we put it in a polhead. */ 3382 selkey->ipsl_sel_hval = hval; 3383 3384 bucket = (hval == IPSEC_SEL_NOHASH) ? 0 : hval; 3385 3386 ASSERT(!HASH_LOCKED(ipss->ipsec_sel_hash, bucket)); 3387 HASH_LOCK(ipss->ipsec_sel_hash, bucket); 3388 3389 for (HASH_ITERATE(sp, ipsl_hash, ipss->ipsec_sel_hash, bucket)) { 3390 if (bcmp(&sp->ipsl_key, selkey, 3391 offsetof(ipsec_selkey_t, ipsl_pol_hval)) == 0) 3392 break; 3393 } 3394 if (sp != NULL) { 3395 sp->ipsl_refs++; 3396 3397 HASH_UNLOCK(ipss->ipsec_sel_hash, bucket); 3398 return (sp); 3399 } 3400 3401 sp = kmem_cache_alloc(ipsec_sel_cache, KM_NOSLEEP); 3402 if (sp == NULL) { 3403 HASH_UNLOCK(ipss->ipsec_sel_hash, bucket); 3404 return (NULL); 3405 } 3406 3407 HASH_INSERT(sp, ipsl_hash, ipss->ipsec_sel_hash, bucket); 3408 sp->ipsl_refs = 2; /* one for hash table, one for caller */ 3409 sp->ipsl_key = *selkey; 3410 /* Set to uninitalized and have insertion into polhead fix things. */ 3411 if (selkey->ipsl_sel_hval != IPSEC_SEL_NOHASH) 3412 sp->ipsl_key.ipsl_pol_hval = 0; 3413 else 3414 sp->ipsl_key.ipsl_pol_hval = IPSEC_SEL_NOHASH; 3415 3416 HASH_UNLOCK(ipss->ipsec_sel_hash, bucket); 3417 3418 return (sp); 3419 } 3420 3421 static void 3422 ipsec_sel_rel(ipsec_sel_t **spp, netstack_t *ns) 3423 { 3424 ipsec_sel_t *sp = *spp; 3425 int hval = sp->ipsl_key.ipsl_sel_hval; 3426 ipsec_stack_t *ipss = ns->netstack_ipsec; 3427 3428 *spp = NULL; 3429 3430 if (hval == IPSEC_SEL_NOHASH) 3431 hval = 0; 3432 3433 ASSERT(!HASH_LOCKED(ipss->ipsec_sel_hash, hval)); 3434 HASH_LOCK(ipss->ipsec_sel_hash, hval); 3435 if (--sp->ipsl_refs == 1) { 3436 HASH_UNCHAIN(sp, ipsl_hash, ipss->ipsec_sel_hash, hval); 3437 sp->ipsl_refs--; 3438 HASH_UNLOCK(ipss->ipsec_sel_hash, hval); 3439 ASSERT(sp->ipsl_refs == 0); 3440 kmem_cache_free(ipsec_sel_cache, sp); 3441 /* Caller unlocks */ 3442 return; 3443 } 3444 3445 HASH_UNLOCK(ipss->ipsec_sel_hash, hval); 3446 } 3447 3448 /* 3449 * Free a policy rule which we know is no longer being referenced. 3450 */ 3451 void 3452 ipsec_policy_free(ipsec_policy_t *ipp) 3453 { 3454 ASSERT(ipp->ipsp_refs == 0); 3455 ASSERT(ipp->ipsp_sel != NULL); 3456 ASSERT(ipp->ipsp_act != NULL); 3457 ASSERT(ipp->ipsp_netstack != NULL); 3458 3459 ipsec_sel_rel(&ipp->ipsp_sel, ipp->ipsp_netstack); 3460 IPACT_REFRELE(ipp->ipsp_act); 3461 kmem_cache_free(ipsec_pol_cache, ipp); 3462 } 3463 3464 /* 3465 * Construction of new policy rules; construct a policy, and add it to 3466 * the appropriate tables. 3467 */ 3468 ipsec_policy_t * 3469 ipsec_policy_create(ipsec_selkey_t *keys, const ipsec_act_t *a, 3470 int nacts, int prio, uint64_t *index_ptr, netstack_t *ns) 3471 { 3472 ipsec_action_t *ap; 3473 ipsec_sel_t *sp; 3474 ipsec_policy_t *ipp; 3475 ipsec_stack_t *ipss = ns->netstack_ipsec; 3476 3477 if (index_ptr == NULL) 3478 index_ptr = &ipss->ipsec_next_policy_index; 3479 3480 ipp = kmem_cache_alloc(ipsec_pol_cache, KM_NOSLEEP); 3481 ap = ipsec_act_find(a, nacts, ns); 3482 sp = ipsec_find_sel(keys, ns); 3483 3484 if ((ap == NULL) || (sp == NULL) || (ipp == NULL)) { 3485 if (ap != NULL) { 3486 IPACT_REFRELE(ap); 3487 } 3488 if (sp != NULL) 3489 ipsec_sel_rel(&sp, ns); 3490 if (ipp != NULL) 3491 kmem_cache_free(ipsec_pol_cache, ipp); 3492 return (NULL); 3493 } 3494 3495 HASH_NULL(ipp, ipsp_hash); 3496 3497 ipp->ipsp_netstack = ns; /* Needed for ipsec_policy_free */ 3498 ipp->ipsp_refs = 1; /* caller's reference */ 3499 ipp->ipsp_sel = sp; 3500 ipp->ipsp_act = ap; 3501 ipp->ipsp_prio = prio; /* rule priority */ 3502 ipp->ipsp_index = *index_ptr; 3503 (*index_ptr)++; 3504 3505 return (ipp); 3506 } 3507 3508 static void 3509 ipsec_update_present_flags(ipsec_stack_t *ipss) 3510 { 3511 boolean_t hashpol; 3512 3513 hashpol = (avl_numnodes(&ipss->ipsec_system_policy.iph_rulebyid) > 0); 3514 3515 if (hashpol) { 3516 ipss->ipsec_outbound_v4_policy_present = B_TRUE; 3517 ipss->ipsec_outbound_v6_policy_present = B_TRUE; 3518 ipss->ipsec_inbound_v4_policy_present = B_TRUE; 3519 ipss->ipsec_inbound_v6_policy_present = B_TRUE; 3520 return; 3521 } 3522 3523 ipss->ipsec_outbound_v4_policy_present = (NULL != 3524 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_OUTBOUND]. 3525 ipr_nonhash[IPSEC_AF_V4]); 3526 ipss->ipsec_outbound_v6_policy_present = (NULL != 3527 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_OUTBOUND]. 3528 ipr_nonhash[IPSEC_AF_V6]); 3529 ipss->ipsec_inbound_v4_policy_present = (NULL != 3530 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_INBOUND]. 3531 ipr_nonhash[IPSEC_AF_V4]); 3532 ipss->ipsec_inbound_v6_policy_present = (NULL != 3533 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_INBOUND]. 3534 ipr_nonhash[IPSEC_AF_V6]); 3535 } 3536 3537 boolean_t 3538 ipsec_policy_delete(ipsec_policy_head_t *php, ipsec_selkey_t *keys, int dir, 3539 netstack_t *ns) 3540 { 3541 ipsec_sel_t *sp; 3542 ipsec_policy_t *ip, *nip, *head; 3543 int af; 3544 ipsec_policy_root_t *pr = &php->iph_root[dir]; 3545 3546 sp = ipsec_find_sel(keys, ns); 3547 3548 if (sp == NULL) 3549 return (B_FALSE); 3550 3551 af = (sp->ipsl_key.ipsl_valid & IPSL_IPV4) ? IPSEC_AF_V4 : IPSEC_AF_V6; 3552 3553 rw_enter(&php->iph_lock, RW_WRITER); 3554 3555 if (sp->ipsl_key.ipsl_pol_hval == IPSEC_SEL_NOHASH) { 3556 head = pr->ipr_nonhash[af]; 3557 } else { 3558 head = pr->ipr_hash[sp->ipsl_key.ipsl_pol_hval].hash_head; 3559 } 3560 3561 for (ip = head; ip != NULL; ip = nip) { 3562 nip = ip->ipsp_hash.hash_next; 3563 if (ip->ipsp_sel != sp) { 3564 continue; 3565 } 3566 3567 IPPOL_UNCHAIN(php, ip); 3568 3569 php->iph_gen++; 3570 ipsec_update_present_flags(ns->netstack_ipsec); 3571 3572 rw_exit(&php->iph_lock); 3573 3574 ipsec_sel_rel(&sp, ns); 3575 3576 return (B_TRUE); 3577 } 3578 3579 rw_exit(&php->iph_lock); 3580 ipsec_sel_rel(&sp, ns); 3581 return (B_FALSE); 3582 } 3583 3584 int 3585 ipsec_policy_delete_index(ipsec_policy_head_t *php, uint64_t policy_index, 3586 netstack_t *ns) 3587 { 3588 boolean_t found = B_FALSE; 3589 ipsec_policy_t ipkey; 3590 ipsec_policy_t *ip; 3591 avl_index_t where; 3592 3593 bzero(&ipkey, sizeof (ipkey)); 3594 ipkey.ipsp_index = policy_index; 3595 3596 rw_enter(&php->iph_lock, RW_WRITER); 3597 3598 /* 3599 * We could be cleverer here about the walk. 3600 * but well, (k+1)*log(N) will do for now (k==number of matches, 3601 * N==number of table entries 3602 */ 3603 for (;;) { 3604 ip = (ipsec_policy_t *)avl_find(&php->iph_rulebyid, 3605 (void *)&ipkey, &where); 3606 ASSERT(ip == NULL); 3607 3608 ip = avl_nearest(&php->iph_rulebyid, where, AVL_AFTER); 3609 3610 if (ip == NULL) 3611 break; 3612 3613 if (ip->ipsp_index != policy_index) { 3614 ASSERT(ip->ipsp_index > policy_index); 3615 break; 3616 } 3617 3618 IPPOL_UNCHAIN(php, ip); 3619 found = B_TRUE; 3620 } 3621 3622 if (found) { 3623 php->iph_gen++; 3624 ipsec_update_present_flags(ns->netstack_ipsec); 3625 } 3626 3627 rw_exit(&php->iph_lock); 3628 3629 return (found ? 0 : ENOENT); 3630 } 3631 3632 /* 3633 * Given a constructed ipsec_policy_t policy rule, see if it can be entered 3634 * into the correct policy ruleset. As a side-effect, it sets the hash 3635 * entries on "ipp"'s ipsp_pol_hval. 3636 * 3637 * Returns B_TRUE if it can be entered, B_FALSE if it can't be (because a 3638 * duplicate policy exists with exactly the same selectors), or an icmp 3639 * rule exists with a different encryption/authentication action. 3640 */ 3641 boolean_t 3642 ipsec_check_policy(ipsec_policy_head_t *php, ipsec_policy_t *ipp, int direction) 3643 { 3644 ipsec_policy_root_t *pr = &php->iph_root[direction]; 3645 int af = -1; 3646 ipsec_policy_t *p2, *head; 3647 uint8_t check_proto; 3648 ipsec_selkey_t *selkey = &ipp->ipsp_sel->ipsl_key; 3649 uint32_t valid = selkey->ipsl_valid; 3650 3651 if (valid & IPSL_IPV6) { 3652 ASSERT(!(valid & IPSL_IPV4)); 3653 af = IPSEC_AF_V6; 3654 check_proto = IPPROTO_ICMPV6; 3655 } else { 3656 ASSERT(valid & IPSL_IPV4); 3657 af = IPSEC_AF_V4; 3658 check_proto = IPPROTO_ICMP; 3659 } 3660 3661 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3662 3663 /* 3664 * Double-check that we don't have any duplicate selectors here. 3665 * Because selectors are interned below, we need only compare pointers 3666 * for equality. 3667 */ 3668 if (selkey->ipsl_sel_hval == IPSEC_SEL_NOHASH) { 3669 head = pr->ipr_nonhash[af]; 3670 } else { 3671 selkey->ipsl_pol_hval = 3672 (selkey->ipsl_valid & IPSL_IPV4) ? 3673 IPSEC_IPV4_HASH(selkey->ipsl_remote.ipsad_v4, 3674 pr->ipr_nchains) : 3675 IPSEC_IPV6_HASH(selkey->ipsl_remote.ipsad_v6, 3676 pr->ipr_nchains); 3677 3678 head = pr->ipr_hash[selkey->ipsl_pol_hval].hash_head; 3679 } 3680 3681 for (p2 = head; p2 != NULL; p2 = p2->ipsp_hash.hash_next) { 3682 if (p2->ipsp_sel == ipp->ipsp_sel) 3683 return (B_FALSE); 3684 } 3685 3686 /* 3687 * If it's ICMP and not a drop or pass rule, run through the ICMP 3688 * rules and make sure the action is either new or the same as any 3689 * other actions. We don't have to check the full chain because 3690 * discard and bypass will override all other actions 3691 */ 3692 3693 if (valid & IPSL_PROTOCOL && 3694 selkey->ipsl_proto == check_proto && 3695 (ipp->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_APPLY)) { 3696 3697 for (p2 = head; p2 != NULL; p2 = p2->ipsp_hash.hash_next) { 3698 3699 if (p2->ipsp_sel->ipsl_key.ipsl_valid & IPSL_PROTOCOL && 3700 p2->ipsp_sel->ipsl_key.ipsl_proto == check_proto && 3701 (p2->ipsp_act->ipa_act.ipa_type == 3702 IPSEC_ACT_APPLY)) { 3703 return (ipsec_compare_action(p2, ipp)); 3704 } 3705 } 3706 } 3707 3708 return (B_TRUE); 3709 } 3710 3711 /* 3712 * compare the action chains of two policies for equality 3713 * B_TRUE -> effective equality 3714 */ 3715 3716 static boolean_t 3717 ipsec_compare_action(ipsec_policy_t *p1, ipsec_policy_t *p2) 3718 { 3719 3720 ipsec_action_t *act1, *act2; 3721 3722 /* We have a valid rule. Let's compare the actions */ 3723 if (p1->ipsp_act == p2->ipsp_act) { 3724 /* same action. We are good */ 3725 return (B_TRUE); 3726 } 3727 3728 /* we have to walk the chain */ 3729 3730 act1 = p1->ipsp_act; 3731 act2 = p2->ipsp_act; 3732 3733 while (act1 != NULL && act2 != NULL) { 3734 3735 /* otherwise, Are we close enough? */ 3736 if (act1->ipa_allow_clear != act2->ipa_allow_clear || 3737 act1->ipa_want_ah != act2->ipa_want_ah || 3738 act1->ipa_want_esp != act2->ipa_want_esp || 3739 act1->ipa_want_se != act2->ipa_want_se) { 3740 /* Nope, we aren't */ 3741 return (B_FALSE); 3742 } 3743 3744 if (act1->ipa_want_ah) { 3745 if (act1->ipa_act.ipa_apply.ipp_auth_alg != 3746 act2->ipa_act.ipa_apply.ipp_auth_alg) { 3747 return (B_FALSE); 3748 } 3749 3750 if (act1->ipa_act.ipa_apply.ipp_ah_minbits != 3751 act2->ipa_act.ipa_apply.ipp_ah_minbits || 3752 act1->ipa_act.ipa_apply.ipp_ah_maxbits != 3753 act2->ipa_act.ipa_apply.ipp_ah_maxbits) { 3754 return (B_FALSE); 3755 } 3756 } 3757 3758 if (act1->ipa_want_esp) { 3759 if (act1->ipa_act.ipa_apply.ipp_use_esp != 3760 act2->ipa_act.ipa_apply.ipp_use_esp || 3761 act1->ipa_act.ipa_apply.ipp_use_espa != 3762 act2->ipa_act.ipa_apply.ipp_use_espa) { 3763 return (B_FALSE); 3764 } 3765 3766 if (act1->ipa_act.ipa_apply.ipp_use_esp) { 3767 if (act1->ipa_act.ipa_apply.ipp_encr_alg != 3768 act2->ipa_act.ipa_apply.ipp_encr_alg) { 3769 return (B_FALSE); 3770 } 3771 3772 if (act1->ipa_act.ipa_apply.ipp_espe_minbits != 3773 act2->ipa_act.ipa_apply.ipp_espe_minbits || 3774 act1->ipa_act.ipa_apply.ipp_espe_maxbits != 3775 act2->ipa_act.ipa_apply.ipp_espe_maxbits) { 3776 return (B_FALSE); 3777 } 3778 } 3779 3780 if (act1->ipa_act.ipa_apply.ipp_use_espa) { 3781 if (act1->ipa_act.ipa_apply.ipp_esp_auth_alg != 3782 act2->ipa_act.ipa_apply.ipp_esp_auth_alg) { 3783 return (B_FALSE); 3784 } 3785 3786 if (act1->ipa_act.ipa_apply.ipp_espa_minbits != 3787 act2->ipa_act.ipa_apply.ipp_espa_minbits || 3788 act1->ipa_act.ipa_apply.ipp_espa_maxbits != 3789 act2->ipa_act.ipa_apply.ipp_espa_maxbits) { 3790 return (B_FALSE); 3791 } 3792 } 3793 3794 } 3795 3796 act1 = act1->ipa_next; 3797 act2 = act2->ipa_next; 3798 } 3799 3800 if (act1 != NULL || act2 != NULL) { 3801 return (B_FALSE); 3802 } 3803 3804 return (B_TRUE); 3805 } 3806 3807 3808 /* 3809 * Given a constructed ipsec_policy_t policy rule, enter it into 3810 * the correct policy ruleset. 3811 * 3812 * ipsec_check_policy() is assumed to have succeeded first (to check for 3813 * duplicates). 3814 */ 3815 void 3816 ipsec_enter_policy(ipsec_policy_head_t *php, ipsec_policy_t *ipp, int direction, 3817 netstack_t *ns) 3818 { 3819 ipsec_policy_root_t *pr = &php->iph_root[direction]; 3820 ipsec_selkey_t *selkey = &ipp->ipsp_sel->ipsl_key; 3821 uint32_t valid = selkey->ipsl_valid; 3822 uint32_t hval = selkey->ipsl_pol_hval; 3823 int af = -1; 3824 3825 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3826 3827 if (valid & IPSL_IPV6) { 3828 ASSERT(!(valid & IPSL_IPV4)); 3829 af = IPSEC_AF_V6; 3830 } else { 3831 ASSERT(valid & IPSL_IPV4); 3832 af = IPSEC_AF_V4; 3833 } 3834 3835 php->iph_gen++; 3836 3837 if (hval == IPSEC_SEL_NOHASH) { 3838 HASHLIST_INSERT(ipp, ipsp_hash, pr->ipr_nonhash[af]); 3839 } else { 3840 HASH_LOCK(pr->ipr_hash, hval); 3841 HASH_INSERT(ipp, ipsp_hash, pr->ipr_hash, hval); 3842 HASH_UNLOCK(pr->ipr_hash, hval); 3843 } 3844 3845 ipsec_insert_always(&php->iph_rulebyid, ipp); 3846 3847 ipsec_update_present_flags(ns->netstack_ipsec); 3848 } 3849 3850 static void 3851 ipsec_ipr_flush(ipsec_policy_head_t *php, ipsec_policy_root_t *ipr) 3852 { 3853 ipsec_policy_t *ip, *nip; 3854 int af, chain, nchain; 3855 3856 for (af = 0; af < IPSEC_NAF; af++) { 3857 for (ip = ipr->ipr_nonhash[af]; ip != NULL; ip = nip) { 3858 nip = ip->ipsp_hash.hash_next; 3859 IPPOL_UNCHAIN(php, ip); 3860 } 3861 ipr->ipr_nonhash[af] = NULL; 3862 } 3863 nchain = ipr->ipr_nchains; 3864 3865 for (chain = 0; chain < nchain; chain++) { 3866 for (ip = ipr->ipr_hash[chain].hash_head; ip != NULL; 3867 ip = nip) { 3868 nip = ip->ipsp_hash.hash_next; 3869 IPPOL_UNCHAIN(php, ip); 3870 } 3871 ipr->ipr_hash[chain].hash_head = NULL; 3872 } 3873 } 3874 3875 /* 3876 * Create and insert inbound or outbound policy associated with actp for the 3877 * address family fam into the policy head ph. Returns B_TRUE if policy was 3878 * inserted, and B_FALSE otherwise. 3879 */ 3880 boolean_t 3881 ipsec_polhead_insert(ipsec_policy_head_t *ph, ipsec_act_t *actp, uint_t nact, 3882 int fam, int ptype, netstack_t *ns) 3883 { 3884 ipsec_selkey_t sel; 3885 ipsec_policy_t *pol; 3886 ipsec_policy_root_t *pr; 3887 3888 bzero(&sel, sizeof (sel)); 3889 sel.ipsl_valid = (fam == IPSEC_AF_V4 ? IPSL_IPV4 : IPSL_IPV6); 3890 if ((pol = ipsec_policy_create(&sel, actp, nact, IPSEC_PRIO_SOCKET, 3891 NULL, ns)) != NULL) { 3892 pr = &ph->iph_root[ptype]; 3893 HASHLIST_INSERT(pol, ipsp_hash, pr->ipr_nonhash[fam]); 3894 ipsec_insert_always(&ph->iph_rulebyid, pol); 3895 } 3896 return (pol != NULL); 3897 } 3898 3899 void 3900 ipsec_polhead_flush(ipsec_policy_head_t *php, netstack_t *ns) 3901 { 3902 int dir; 3903 3904 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3905 3906 for (dir = 0; dir < IPSEC_NTYPES; dir++) 3907 ipsec_ipr_flush(php, &php->iph_root[dir]); 3908 3909 php->iph_gen++; 3910 ipsec_update_present_flags(ns->netstack_ipsec); 3911 } 3912 3913 void 3914 ipsec_polhead_free(ipsec_policy_head_t *php, netstack_t *ns) 3915 { 3916 int dir; 3917 3918 ASSERT(php->iph_refs == 0); 3919 3920 rw_enter(&php->iph_lock, RW_WRITER); 3921 ipsec_polhead_flush(php, ns); 3922 rw_exit(&php->iph_lock); 3923 rw_destroy(&php->iph_lock); 3924 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 3925 ipsec_policy_root_t *ipr = &php->iph_root[dir]; 3926 int chain; 3927 3928 for (chain = 0; chain < ipr->ipr_nchains; chain++) 3929 mutex_destroy(&(ipr->ipr_hash[chain].hash_lock)); 3930 3931 } 3932 ipsec_polhead_free_table(php); 3933 kmem_free(php, sizeof (*php)); 3934 } 3935 3936 static void 3937 ipsec_ipr_init(ipsec_policy_root_t *ipr) 3938 { 3939 int af; 3940 3941 ipr->ipr_nchains = 0; 3942 ipr->ipr_hash = NULL; 3943 3944 for (af = 0; af < IPSEC_NAF; af++) { 3945 ipr->ipr_nonhash[af] = NULL; 3946 } 3947 } 3948 3949 ipsec_policy_head_t * 3950 ipsec_polhead_create(void) 3951 { 3952 ipsec_policy_head_t *php; 3953 3954 php = kmem_alloc(sizeof (*php), KM_NOSLEEP); 3955 if (php == NULL) 3956 return (php); 3957 3958 rw_init(&php->iph_lock, NULL, RW_DEFAULT, NULL); 3959 php->iph_refs = 1; 3960 php->iph_gen = 0; 3961 3962 ipsec_ipr_init(&php->iph_root[IPSEC_TYPE_INBOUND]); 3963 ipsec_ipr_init(&php->iph_root[IPSEC_TYPE_OUTBOUND]); 3964 3965 avl_create(&php->iph_rulebyid, ipsec_policy_cmpbyid, 3966 sizeof (ipsec_policy_t), offsetof(ipsec_policy_t, ipsp_byid)); 3967 3968 return (php); 3969 } 3970 3971 /* 3972 * Clone the policy head into a new polhead; release one reference to the 3973 * old one and return the only reference to the new one. 3974 * If the old one had a refcount of 1, just return it. 3975 */ 3976 ipsec_policy_head_t * 3977 ipsec_polhead_split(ipsec_policy_head_t *php, netstack_t *ns) 3978 { 3979 ipsec_policy_head_t *nphp; 3980 3981 if (php == NULL) 3982 return (ipsec_polhead_create()); 3983 else if (php->iph_refs == 1) 3984 return (php); 3985 3986 nphp = ipsec_polhead_create(); 3987 if (nphp == NULL) 3988 return (NULL); 3989 3990 if (ipsec_copy_polhead(php, nphp, ns) != 0) { 3991 ipsec_polhead_free(nphp, ns); 3992 return (NULL); 3993 } 3994 IPPH_REFRELE(php, ns); 3995 return (nphp); 3996 } 3997 3998 /* 3999 * When sending a response to a ICMP request or generating a RST 4000 * in the TCP case, the outbound packets need to go at the same level 4001 * of protection as the incoming ones i.e we associate our outbound 4002 * policy with how the packet came in. We call this after we have 4003 * accepted the incoming packet which may or may not have been in 4004 * clear and hence we are sending the reply back with the policy 4005 * matching the incoming datagram's policy. 4006 * 4007 * NOTE : This technology serves two purposes : 4008 * 4009 * 1) If we have multiple outbound policies, we send out a reply 4010 * matching with how it came in rather than matching the outbound 4011 * policy. 4012 * 4013 * 2) For assymetric policies, we want to make sure that incoming 4014 * and outgoing has the same level of protection. Assymetric 4015 * policies exist only with global policy where we may not have 4016 * both outbound and inbound at the same time. 4017 * 4018 * NOTE2: This function is called by cleartext cases, so it needs to be 4019 * in IP proper. 4020 * 4021 * Note: the caller has moved other parts of ira into ixa already. 4022 */ 4023 boolean_t 4024 ipsec_in_to_out(ip_recv_attr_t *ira, ip_xmit_attr_t *ixa, mblk_t *data_mp, 4025 ipha_t *ipha, ip6_t *ip6h) 4026 { 4027 ipsec_selector_t sel; 4028 ipsec_action_t *reflect_action = NULL; 4029 netstack_t *ns = ixa->ixa_ipst->ips_netstack; 4030 4031 bzero((void*)&sel, sizeof (sel)); 4032 4033 if (ira->ira_ipsec_action != NULL) { 4034 /* transfer reference.. */ 4035 reflect_action = ira->ira_ipsec_action; 4036 ira->ira_ipsec_action = NULL; 4037 } else if (!(ira->ira_flags & IRAF_LOOPBACK)) 4038 reflect_action = ipsec_in_to_out_action(ira); 4039 4040 /* 4041 * The caller is going to send the datagram out which might 4042 * go on the wire or delivered locally through ire_send_local. 4043 * 4044 * 1) If it goes out on the wire, new associations will be 4045 * obtained. 4046 * 2) If it is delivered locally, ire_send_local will convert 4047 * this ip_xmit_attr_t back to a ip_recv_attr_t looking at the 4048 * requests. 4049 */ 4050 ixa->ixa_ipsec_action = reflect_action; 4051 4052 if (!ipsec_init_outbound_ports(&sel, data_mp, ipha, ip6h, 0, 4053 ns->netstack_ipsec)) { 4054 /* Note: data_mp already consumed and ip_drop_packet done */ 4055 return (B_FALSE); 4056 } 4057 ixa->ixa_ipsec_src_port = sel.ips_local_port; 4058 ixa->ixa_ipsec_dst_port = sel.ips_remote_port; 4059 ixa->ixa_ipsec_proto = sel.ips_protocol; 4060 ixa->ixa_ipsec_icmp_type = sel.ips_icmp_type; 4061 ixa->ixa_ipsec_icmp_code = sel.ips_icmp_code; 4062 4063 /* 4064 * Don't use global policy for this, as we want 4065 * to use the same protection that was applied to the inbound packet. 4066 * Thus we set IXAF_NO_IPSEC is it arrived in the clear to make 4067 * it be sent in the clear. 4068 */ 4069 if (ira->ira_flags & IRAF_IPSEC_SECURE) 4070 ixa->ixa_flags |= IXAF_IPSEC_SECURE; 4071 else 4072 ixa->ixa_flags |= IXAF_NO_IPSEC; 4073 4074 return (B_TRUE); 4075 } 4076 4077 void 4078 ipsec_out_release_refs(ip_xmit_attr_t *ixa) 4079 { 4080 if (!(ixa->ixa_flags & IXAF_IPSEC_SECURE)) 4081 return; 4082 4083 if (ixa->ixa_ipsec_ah_sa != NULL) { 4084 IPSA_REFRELE(ixa->ixa_ipsec_ah_sa); 4085 ixa->ixa_ipsec_ah_sa = NULL; 4086 } 4087 if (ixa->ixa_ipsec_esp_sa != NULL) { 4088 IPSA_REFRELE(ixa->ixa_ipsec_esp_sa); 4089 ixa->ixa_ipsec_esp_sa = NULL; 4090 } 4091 if (ixa->ixa_ipsec_policy != NULL) { 4092 IPPOL_REFRELE(ixa->ixa_ipsec_policy); 4093 ixa->ixa_ipsec_policy = NULL; 4094 } 4095 if (ixa->ixa_ipsec_action != NULL) { 4096 IPACT_REFRELE(ixa->ixa_ipsec_action); 4097 ixa->ixa_ipsec_action = NULL; 4098 } 4099 if (ixa->ixa_ipsec_latch) { 4100 IPLATCH_REFRELE(ixa->ixa_ipsec_latch); 4101 ixa->ixa_ipsec_latch = NULL; 4102 } 4103 /* Clear the soft references to the SAs */ 4104 ixa->ixa_ipsec_ref[0].ipsr_sa = NULL; 4105 ixa->ixa_ipsec_ref[0].ipsr_bucket = NULL; 4106 ixa->ixa_ipsec_ref[0].ipsr_gen = 0; 4107 ixa->ixa_ipsec_ref[1].ipsr_sa = NULL; 4108 ixa->ixa_ipsec_ref[1].ipsr_bucket = NULL; 4109 ixa->ixa_ipsec_ref[1].ipsr_gen = 0; 4110 ixa->ixa_flags &= ~IXAF_IPSEC_SECURE; 4111 } 4112 4113 void 4114 ipsec_in_release_refs(ip_recv_attr_t *ira) 4115 { 4116 if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) 4117 return; 4118 4119 if (ira->ira_ipsec_ah_sa != NULL) { 4120 IPSA_REFRELE(ira->ira_ipsec_ah_sa); 4121 ira->ira_ipsec_ah_sa = NULL; 4122 } 4123 if (ira->ira_ipsec_esp_sa != NULL) { 4124 IPSA_REFRELE(ira->ira_ipsec_esp_sa); 4125 ira->ira_ipsec_esp_sa = NULL; 4126 } 4127 if (ira->ira_ipsec_action != NULL) { 4128 IPACT_REFRELE(ira->ira_ipsec_action); 4129 ira->ira_ipsec_action = NULL; 4130 } 4131 4132 ira->ira_flags &= ~IRAF_IPSEC_SECURE; 4133 } 4134 4135 /* 4136 * This is called from ire_send_local when a packet 4137 * is looped back. We setup the ip_recv_attr_t "borrowing" the references 4138 * held by the callers. 4139 * Note that we don't do any IPsec but we carry the actions and IPSEC flags 4140 * across so that the fanout policy checks see that IPsec was applied. 4141 * 4142 * The caller should do ipsec_in_release_refs() on the ira by calling 4143 * ira_cleanup(). 4144 */ 4145 void 4146 ipsec_out_to_in(ip_xmit_attr_t *ixa, ill_t *ill, ip_recv_attr_t *ira) 4147 { 4148 ipsec_policy_t *pol; 4149 ipsec_action_t *act; 4150 4151 /* Non-IPsec operations */ 4152 ira->ira_free_flags = 0; 4153 ira->ira_zoneid = ixa->ixa_zoneid; 4154 ira->ira_cred = ixa->ixa_cred; 4155 ira->ira_cpid = ixa->ixa_cpid; 4156 ira->ira_tsl = ixa->ixa_tsl; 4157 ira->ira_ill = ira->ira_rill = ill; 4158 ira->ira_flags = ixa->ixa_flags & IAF_MASK; 4159 ira->ira_no_loop_zoneid = ixa->ixa_no_loop_zoneid; 4160 ira->ira_pktlen = ixa->ixa_pktlen; 4161 ira->ira_ip_hdr_length = ixa->ixa_ip_hdr_length; 4162 ira->ira_protocol = ixa->ixa_protocol; 4163 ira->ira_mhip = NULL; 4164 4165 ira->ira_flags |= IRAF_LOOPBACK | IRAF_L2SRC_LOOPBACK; 4166 4167 ira->ira_sqp = ixa->ixa_sqp; 4168 ira->ira_ring = NULL; 4169 4170 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 4171 ira->ira_rifindex = ira->ira_ruifindex; 4172 4173 if (!(ixa->ixa_flags & IXAF_IPSEC_SECURE)) 4174 return; 4175 4176 ira->ira_flags |= IRAF_IPSEC_SECURE; 4177 4178 ira->ira_ipsec_ah_sa = NULL; 4179 ira->ira_ipsec_esp_sa = NULL; 4180 4181 act = ixa->ixa_ipsec_action; 4182 if (act == NULL) { 4183 pol = ixa->ixa_ipsec_policy; 4184 if (pol != NULL) { 4185 act = pol->ipsp_act; 4186 IPACT_REFHOLD(act); 4187 } 4188 } 4189 ixa->ixa_ipsec_action = NULL; 4190 ira->ira_ipsec_action = act; 4191 } 4192 4193 /* 4194 * Consults global policy and per-socket policy to see whether this datagram 4195 * should go out secure. If so it updates the ip_xmit_attr_t 4196 * Should not be used when connecting, since then we want to latch the policy. 4197 * 4198 * If connp is NULL we just look at the global policy. 4199 * 4200 * Returns NULL if the packet was dropped, in which case the MIB has 4201 * been incremented and ip_drop_packet done. 4202 */ 4203 mblk_t * 4204 ip_output_attach_policy(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h, 4205 const conn_t *connp, ip_xmit_attr_t *ixa) 4206 { 4207 ipsec_selector_t sel; 4208 boolean_t policy_present; 4209 ip_stack_t *ipst = ixa->ixa_ipst; 4210 netstack_t *ns = ipst->ips_netstack; 4211 ipsec_stack_t *ipss = ns->netstack_ipsec; 4212 ipsec_policy_t *p; 4213 4214 ixa->ixa_ipsec_policy_gen = ipss->ipsec_system_policy.iph_gen; 4215 ASSERT((ipha != NULL && ip6h == NULL) || 4216 (ip6h != NULL && ipha == NULL)); 4217 4218 if (ipha != NULL) 4219 policy_present = ipss->ipsec_outbound_v4_policy_present; 4220 else 4221 policy_present = ipss->ipsec_outbound_v6_policy_present; 4222 4223 if (!policy_present && (connp == NULL || connp->conn_policy == NULL)) 4224 return (mp); 4225 4226 bzero((void*)&sel, sizeof (sel)); 4227 4228 if (ipha != NULL) { 4229 sel.ips_local_addr_v4 = ipha->ipha_src; 4230 sel.ips_remote_addr_v4 = ip_get_dst(ipha); 4231 sel.ips_isv4 = B_TRUE; 4232 } else { 4233 sel.ips_isv4 = B_FALSE; 4234 sel.ips_local_addr_v6 = ip6h->ip6_src; 4235 sel.ips_remote_addr_v6 = ip_get_dst_v6(ip6h, mp, NULL); 4236 } 4237 sel.ips_protocol = ixa->ixa_protocol; 4238 4239 if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0, ipss)) { 4240 if (ipha != NULL) { 4241 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 4242 } else { 4243 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 4244 } 4245 /* Note: mp already consumed and ip_drop_packet done */ 4246 return (NULL); 4247 } 4248 4249 ASSERT(ixa->ixa_ipsec_policy == NULL); 4250 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, &sel, ns); 4251 ixa->ixa_ipsec_policy = p; 4252 if (p != NULL) { 4253 ixa->ixa_flags |= IXAF_IPSEC_SECURE; 4254 if (connp == NULL || connp->conn_policy == NULL) 4255 ixa->ixa_flags |= IXAF_IPSEC_GLOBAL_POLICY; 4256 } else { 4257 ixa->ixa_flags &= ~IXAF_IPSEC_SECURE; 4258 } 4259 4260 /* 4261 * Copy the right port information. 4262 */ 4263 ixa->ixa_ipsec_src_port = sel.ips_local_port; 4264 ixa->ixa_ipsec_dst_port = sel.ips_remote_port; 4265 ixa->ixa_ipsec_icmp_type = sel.ips_icmp_type; 4266 ixa->ixa_ipsec_icmp_code = sel.ips_icmp_code; 4267 ixa->ixa_ipsec_proto = sel.ips_protocol; 4268 return (mp); 4269 } 4270 4271 /* 4272 * When appropriate, this function caches inbound and outbound policy 4273 * for this connection. The outbound policy is stored in conn_ixa. 4274 * Note that it can not be used for SCTP since conn_faddr isn't set for SCTP. 4275 * 4276 * XXX need to work out more details about per-interface policy and 4277 * caching here! 4278 * 4279 * XXX may want to split inbound and outbound caching for ill.. 4280 */ 4281 int 4282 ipsec_conn_cache_policy(conn_t *connp, boolean_t isv4) 4283 { 4284 boolean_t global_policy_present; 4285 netstack_t *ns = connp->conn_netstack; 4286 ipsec_stack_t *ipss = ns->netstack_ipsec; 4287 4288 connp->conn_ixa->ixa_ipsec_policy_gen = 4289 ipss->ipsec_system_policy.iph_gen; 4290 /* 4291 * There is no policy latching for ICMP sockets because we can't 4292 * decide on which policy to use until we see the packet and get 4293 * type/code selectors. 4294 */ 4295 if (connp->conn_proto == IPPROTO_ICMP || 4296 connp->conn_proto == IPPROTO_ICMPV6) { 4297 connp->conn_in_enforce_policy = 4298 connp->conn_out_enforce_policy = B_TRUE; 4299 if (connp->conn_latch != NULL) { 4300 IPLATCH_REFRELE(connp->conn_latch); 4301 connp->conn_latch = NULL; 4302 } 4303 if (connp->conn_latch_in_policy != NULL) { 4304 IPPOL_REFRELE(connp->conn_latch_in_policy); 4305 connp->conn_latch_in_policy = NULL; 4306 } 4307 if (connp->conn_latch_in_action != NULL) { 4308 IPACT_REFRELE(connp->conn_latch_in_action); 4309 connp->conn_latch_in_action = NULL; 4310 } 4311 if (connp->conn_ixa->ixa_ipsec_policy != NULL) { 4312 IPPOL_REFRELE(connp->conn_ixa->ixa_ipsec_policy); 4313 connp->conn_ixa->ixa_ipsec_policy = NULL; 4314 } 4315 if (connp->conn_ixa->ixa_ipsec_action != NULL) { 4316 IPACT_REFRELE(connp->conn_ixa->ixa_ipsec_action); 4317 connp->conn_ixa->ixa_ipsec_action = NULL; 4318 } 4319 connp->conn_ixa->ixa_flags &= ~IXAF_IPSEC_SECURE; 4320 return (0); 4321 } 4322 4323 global_policy_present = isv4 ? 4324 (ipss->ipsec_outbound_v4_policy_present || 4325 ipss->ipsec_inbound_v4_policy_present) : 4326 (ipss->ipsec_outbound_v6_policy_present || 4327 ipss->ipsec_inbound_v6_policy_present); 4328 4329 if ((connp->conn_policy != NULL) || global_policy_present) { 4330 ipsec_selector_t sel; 4331 ipsec_policy_t *p; 4332 4333 if (connp->conn_latch == NULL && 4334 (connp->conn_latch = iplatch_create()) == NULL) { 4335 return (ENOMEM); 4336 } 4337 4338 bzero((void*)&sel, sizeof (sel)); 4339 4340 sel.ips_protocol = connp->conn_proto; 4341 sel.ips_local_port = connp->conn_lport; 4342 sel.ips_remote_port = connp->conn_fport; 4343 sel.ips_is_icmp_inv_acq = 0; 4344 sel.ips_isv4 = isv4; 4345 if (isv4) { 4346 sel.ips_local_addr_v4 = connp->conn_laddr_v4; 4347 sel.ips_remote_addr_v4 = connp->conn_faddr_v4; 4348 } else { 4349 sel.ips_local_addr_v6 = connp->conn_laddr_v6; 4350 sel.ips_remote_addr_v6 = connp->conn_faddr_v6; 4351 } 4352 4353 p = ipsec_find_policy(IPSEC_TYPE_INBOUND, connp, &sel, ns); 4354 if (connp->conn_latch_in_policy != NULL) 4355 IPPOL_REFRELE(connp->conn_latch_in_policy); 4356 connp->conn_latch_in_policy = p; 4357 connp->conn_in_enforce_policy = (p != NULL); 4358 4359 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, &sel, ns); 4360 if (connp->conn_ixa->ixa_ipsec_policy != NULL) 4361 IPPOL_REFRELE(connp->conn_ixa->ixa_ipsec_policy); 4362 connp->conn_ixa->ixa_ipsec_policy = p; 4363 connp->conn_out_enforce_policy = (p != NULL); 4364 if (p != NULL) { 4365 connp->conn_ixa->ixa_flags |= IXAF_IPSEC_SECURE; 4366 if (connp->conn_policy == NULL) { 4367 connp->conn_ixa->ixa_flags |= 4368 IXAF_IPSEC_GLOBAL_POLICY; 4369 } 4370 } else { 4371 connp->conn_ixa->ixa_flags &= ~IXAF_IPSEC_SECURE; 4372 } 4373 /* Clear the latched actions too, in case we're recaching. */ 4374 if (connp->conn_ixa->ixa_ipsec_action != NULL) { 4375 IPACT_REFRELE(connp->conn_ixa->ixa_ipsec_action); 4376 connp->conn_ixa->ixa_ipsec_action = NULL; 4377 } 4378 if (connp->conn_latch_in_action != NULL) { 4379 IPACT_REFRELE(connp->conn_latch_in_action); 4380 connp->conn_latch_in_action = NULL; 4381 } 4382 connp->conn_ixa->ixa_ipsec_src_port = sel.ips_local_port; 4383 connp->conn_ixa->ixa_ipsec_dst_port = sel.ips_remote_port; 4384 connp->conn_ixa->ixa_ipsec_icmp_type = sel.ips_icmp_type; 4385 connp->conn_ixa->ixa_ipsec_icmp_code = sel.ips_icmp_code; 4386 connp->conn_ixa->ixa_ipsec_proto = sel.ips_protocol; 4387 } else { 4388 connp->conn_ixa->ixa_flags &= ~IXAF_IPSEC_SECURE; 4389 } 4390 4391 /* 4392 * We may or may not have policy for this endpoint. We still set 4393 * conn_policy_cached so that inbound datagrams don't have to look 4394 * at global policy as policy is considered latched for these 4395 * endpoints. We should not set conn_policy_cached until the conn 4396 * reflects the actual policy. If we *set* this before inheriting 4397 * the policy there is a window where the check 4398 * CONN_INBOUND_POLICY_PRESENT, will neither check with the policy 4399 * on the conn (because we have not yet copied the policy on to 4400 * conn and hence not set conn_in_enforce_policy) nor with the 4401 * global policy (because conn_policy_cached is already set). 4402 */ 4403 connp->conn_policy_cached = B_TRUE; 4404 return (0); 4405 } 4406 4407 /* 4408 * When appropriate, this function caches outbound policy for faddr/fport. 4409 * It is used when we are not connected i.e., when we can not latch the 4410 * policy. 4411 */ 4412 void 4413 ipsec_cache_outbound_policy(const conn_t *connp, const in6_addr_t *v6src, 4414 const in6_addr_t *v6dst, in_port_t dstport, ip_xmit_attr_t *ixa) 4415 { 4416 boolean_t isv4 = (ixa->ixa_flags & IXAF_IS_IPV4) != 0; 4417 boolean_t global_policy_present; 4418 netstack_t *ns = connp->conn_netstack; 4419 ipsec_stack_t *ipss = ns->netstack_ipsec; 4420 4421 ixa->ixa_ipsec_policy_gen = ipss->ipsec_system_policy.iph_gen; 4422 4423 /* 4424 * There is no policy caching for ICMP sockets because we can't 4425 * decide on which policy to use until we see the packet and get 4426 * type/code selectors. 4427 */ 4428 if (connp->conn_proto == IPPROTO_ICMP || 4429 connp->conn_proto == IPPROTO_ICMPV6) { 4430 ixa->ixa_flags &= ~IXAF_IPSEC_SECURE; 4431 if (ixa->ixa_ipsec_policy != NULL) { 4432 IPPOL_REFRELE(ixa->ixa_ipsec_policy); 4433 ixa->ixa_ipsec_policy = NULL; 4434 } 4435 if (ixa->ixa_ipsec_action != NULL) { 4436 IPACT_REFRELE(ixa->ixa_ipsec_action); 4437 ixa->ixa_ipsec_action = NULL; 4438 } 4439 return; 4440 } 4441 4442 global_policy_present = isv4 ? 4443 (ipss->ipsec_outbound_v4_policy_present || 4444 ipss->ipsec_inbound_v4_policy_present) : 4445 (ipss->ipsec_outbound_v6_policy_present || 4446 ipss->ipsec_inbound_v6_policy_present); 4447 4448 if ((connp->conn_policy != NULL) || global_policy_present) { 4449 ipsec_selector_t sel; 4450 ipsec_policy_t *p; 4451 4452 bzero((void*)&sel, sizeof (sel)); 4453 4454 sel.ips_protocol = connp->conn_proto; 4455 sel.ips_local_port = connp->conn_lport; 4456 sel.ips_remote_port = dstport; 4457 sel.ips_is_icmp_inv_acq = 0; 4458 sel.ips_isv4 = isv4; 4459 if (isv4) { 4460 IN6_V4MAPPED_TO_IPADDR(v6src, sel.ips_local_addr_v4); 4461 IN6_V4MAPPED_TO_IPADDR(v6dst, sel.ips_remote_addr_v4); 4462 } else { 4463 sel.ips_local_addr_v6 = *v6src; 4464 sel.ips_remote_addr_v6 = *v6dst; 4465 } 4466 4467 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, &sel, ns); 4468 if (ixa->ixa_ipsec_policy != NULL) 4469 IPPOL_REFRELE(ixa->ixa_ipsec_policy); 4470 ixa->ixa_ipsec_policy = p; 4471 if (p != NULL) { 4472 ixa->ixa_flags |= IXAF_IPSEC_SECURE; 4473 if (connp->conn_policy == NULL) 4474 ixa->ixa_flags |= IXAF_IPSEC_GLOBAL_POLICY; 4475 } else { 4476 ixa->ixa_flags &= ~IXAF_IPSEC_SECURE; 4477 } 4478 /* Clear the latched actions too, in case we're recaching. */ 4479 if (ixa->ixa_ipsec_action != NULL) { 4480 IPACT_REFRELE(ixa->ixa_ipsec_action); 4481 ixa->ixa_ipsec_action = NULL; 4482 } 4483 4484 ixa->ixa_ipsec_src_port = sel.ips_local_port; 4485 ixa->ixa_ipsec_dst_port = sel.ips_remote_port; 4486 ixa->ixa_ipsec_icmp_type = sel.ips_icmp_type; 4487 ixa->ixa_ipsec_icmp_code = sel.ips_icmp_code; 4488 ixa->ixa_ipsec_proto = sel.ips_protocol; 4489 } else { 4490 ixa->ixa_flags &= ~IXAF_IPSEC_SECURE; 4491 if (ixa->ixa_ipsec_policy != NULL) { 4492 IPPOL_REFRELE(ixa->ixa_ipsec_policy); 4493 ixa->ixa_ipsec_policy = NULL; 4494 } 4495 if (ixa->ixa_ipsec_action != NULL) { 4496 IPACT_REFRELE(ixa->ixa_ipsec_action); 4497 ixa->ixa_ipsec_action = NULL; 4498 } 4499 } 4500 } 4501 4502 /* 4503 * Returns B_FALSE if the policy has gone stale. 4504 */ 4505 boolean_t 4506 ipsec_outbound_policy_current(ip_xmit_attr_t *ixa) 4507 { 4508 ipsec_stack_t *ipss = ixa->ixa_ipst->ips_netstack->netstack_ipsec; 4509 4510 if (!(ixa->ixa_flags & IXAF_IPSEC_GLOBAL_POLICY)) 4511 return (B_TRUE); 4512 4513 return (ixa->ixa_ipsec_policy_gen == ipss->ipsec_system_policy.iph_gen); 4514 } 4515 4516 void 4517 iplatch_free(ipsec_latch_t *ipl) 4518 { 4519 if (ipl->ipl_local_cid != NULL) 4520 IPSID_REFRELE(ipl->ipl_local_cid); 4521 if (ipl->ipl_remote_cid != NULL) 4522 IPSID_REFRELE(ipl->ipl_remote_cid); 4523 mutex_destroy(&ipl->ipl_lock); 4524 kmem_free(ipl, sizeof (*ipl)); 4525 } 4526 4527 ipsec_latch_t * 4528 iplatch_create() 4529 { 4530 ipsec_latch_t *ipl = kmem_zalloc(sizeof (*ipl), KM_NOSLEEP); 4531 if (ipl == NULL) 4532 return (ipl); 4533 mutex_init(&ipl->ipl_lock, NULL, MUTEX_DEFAULT, NULL); 4534 ipl->ipl_refcnt = 1; 4535 return (ipl); 4536 } 4537 4538 /* 4539 * Hash function for ID hash table. 4540 */ 4541 static uint32_t 4542 ipsid_hash(int idtype, char *idstring) 4543 { 4544 uint32_t hval = idtype; 4545 unsigned char c; 4546 4547 while ((c = *idstring++) != 0) { 4548 hval = (hval << 4) | (hval >> 28); 4549 hval ^= c; 4550 } 4551 hval = hval ^ (hval >> 16); 4552 return (hval & (IPSID_HASHSIZE-1)); 4553 } 4554 4555 /* 4556 * Look up identity string in hash table. Return identity object 4557 * corresponding to the name -- either preexisting, or newly allocated. 4558 * 4559 * Return NULL if we need to allocate a new one and can't get memory. 4560 */ 4561 ipsid_t * 4562 ipsid_lookup(int idtype, char *idstring, netstack_t *ns) 4563 { 4564 ipsid_t *retval; 4565 char *nstr; 4566 int idlen = strlen(idstring) + 1; 4567 ipsec_stack_t *ipss = ns->netstack_ipsec; 4568 ipsif_t *bucket; 4569 4570 bucket = &ipss->ipsec_ipsid_buckets[ipsid_hash(idtype, idstring)]; 4571 4572 mutex_enter(&bucket->ipsif_lock); 4573 4574 for (retval = bucket->ipsif_head; retval != NULL; 4575 retval = retval->ipsid_next) { 4576 if (idtype != retval->ipsid_type) 4577 continue; 4578 if (bcmp(idstring, retval->ipsid_cid, idlen) != 0) 4579 continue; 4580 4581 IPSID_REFHOLD(retval); 4582 mutex_exit(&bucket->ipsif_lock); 4583 return (retval); 4584 } 4585 4586 retval = kmem_alloc(sizeof (*retval), KM_NOSLEEP); 4587 if (!retval) { 4588 mutex_exit(&bucket->ipsif_lock); 4589 return (NULL); 4590 } 4591 4592 nstr = kmem_alloc(idlen, KM_NOSLEEP); 4593 if (!nstr) { 4594 mutex_exit(&bucket->ipsif_lock); 4595 kmem_free(retval, sizeof (*retval)); 4596 return (NULL); 4597 } 4598 4599 retval->ipsid_refcnt = 1; 4600 retval->ipsid_next = bucket->ipsif_head; 4601 if (retval->ipsid_next != NULL) 4602 retval->ipsid_next->ipsid_ptpn = &retval->ipsid_next; 4603 retval->ipsid_ptpn = &bucket->ipsif_head; 4604 retval->ipsid_type = idtype; 4605 retval->ipsid_cid = nstr; 4606 bucket->ipsif_head = retval; 4607 bcopy(idstring, nstr, idlen); 4608 mutex_exit(&bucket->ipsif_lock); 4609 4610 return (retval); 4611 } 4612 4613 /* 4614 * Garbage collect the identity hash table. 4615 */ 4616 void 4617 ipsid_gc(netstack_t *ns) 4618 { 4619 int i, len; 4620 ipsid_t *id, *nid; 4621 ipsif_t *bucket; 4622 ipsec_stack_t *ipss = ns->netstack_ipsec; 4623 4624 for (i = 0; i < IPSID_HASHSIZE; i++) { 4625 bucket = &ipss->ipsec_ipsid_buckets[i]; 4626 mutex_enter(&bucket->ipsif_lock); 4627 for (id = bucket->ipsif_head; id != NULL; id = nid) { 4628 nid = id->ipsid_next; 4629 if (id->ipsid_refcnt == 0) { 4630 *id->ipsid_ptpn = nid; 4631 if (nid != NULL) 4632 nid->ipsid_ptpn = id->ipsid_ptpn; 4633 len = strlen(id->ipsid_cid) + 1; 4634 kmem_free(id->ipsid_cid, len); 4635 kmem_free(id, sizeof (*id)); 4636 } 4637 } 4638 mutex_exit(&bucket->ipsif_lock); 4639 } 4640 } 4641 4642 /* 4643 * Return true if two identities are the same. 4644 */ 4645 boolean_t 4646 ipsid_equal(ipsid_t *id1, ipsid_t *id2) 4647 { 4648 if (id1 == id2) 4649 return (B_TRUE); 4650 #ifdef DEBUG 4651 if ((id1 == NULL) || (id2 == NULL)) 4652 return (B_FALSE); 4653 /* 4654 * test that we're interning id's correctly.. 4655 */ 4656 ASSERT((strcmp(id1->ipsid_cid, id2->ipsid_cid) != 0) || 4657 (id1->ipsid_type != id2->ipsid_type)); 4658 #endif 4659 return (B_FALSE); 4660 } 4661 4662 /* 4663 * Initialize identity table; called during module initialization. 4664 */ 4665 static void 4666 ipsid_init(netstack_t *ns) 4667 { 4668 ipsif_t *bucket; 4669 int i; 4670 ipsec_stack_t *ipss = ns->netstack_ipsec; 4671 4672 for (i = 0; i < IPSID_HASHSIZE; i++) { 4673 bucket = &ipss->ipsec_ipsid_buckets[i]; 4674 mutex_init(&bucket->ipsif_lock, NULL, MUTEX_DEFAULT, NULL); 4675 } 4676 } 4677 4678 /* 4679 * Free identity table (preparatory to module unload) 4680 */ 4681 static void 4682 ipsid_fini(netstack_t *ns) 4683 { 4684 ipsif_t *bucket; 4685 int i; 4686 ipsec_stack_t *ipss = ns->netstack_ipsec; 4687 4688 for (i = 0; i < IPSID_HASHSIZE; i++) { 4689 bucket = &ipss->ipsec_ipsid_buckets[i]; 4690 ASSERT(bucket->ipsif_head == NULL); 4691 mutex_destroy(&bucket->ipsif_lock); 4692 } 4693 } 4694 4695 /* 4696 * Update the minimum and maximum supported key sizes for the specified 4697 * algorithm, which is either a member of a netstack alg array or about to be, 4698 * and therefore must be called holding ipsec_alg_lock for write. 4699 */ 4700 void 4701 ipsec_alg_fix_min_max(ipsec_alginfo_t *alg, ipsec_algtype_t alg_type, 4702 netstack_t *ns) 4703 { 4704 size_t crypto_min = (size_t)-1, crypto_max = 0; 4705 size_t cur_crypto_min, cur_crypto_max; 4706 boolean_t is_valid; 4707 crypto_mechanism_info_t *mech_infos; 4708 uint_t nmech_infos; 4709 int crypto_rc, i; 4710 crypto_mech_usage_t mask; 4711 ipsec_stack_t *ipss = ns->netstack_ipsec; 4712 4713 ASSERT(RW_WRITE_HELD(&ipss->ipsec_alg_lock)); 4714 4715 /* 4716 * Compute the min, max, and default key sizes (in number of 4717 * increments to the default key size in bits) as defined 4718 * by the algorithm mappings. This range of key sizes is used 4719 * for policy related operations. The effective key sizes 4720 * supported by the framework could be more limited than 4721 * those defined for an algorithm. 4722 */ 4723 alg->alg_default_bits = alg->alg_key_sizes[0]; 4724 alg->alg_default = 0; 4725 if (alg->alg_increment != 0) { 4726 /* key sizes are defined by range & increment */ 4727 alg->alg_minbits = alg->alg_key_sizes[1]; 4728 alg->alg_maxbits = alg->alg_key_sizes[2]; 4729 } else if (alg->alg_nkey_sizes == 0) { 4730 /* no specified key size for algorithm */ 4731 alg->alg_minbits = alg->alg_maxbits = 0; 4732 } else { 4733 /* key sizes are defined by enumeration */ 4734 alg->alg_minbits = (uint16_t)-1; 4735 alg->alg_maxbits = 0; 4736 4737 for (i = 0; i < alg->alg_nkey_sizes; i++) { 4738 if (alg->alg_key_sizes[i] < alg->alg_minbits) 4739 alg->alg_minbits = alg->alg_key_sizes[i]; 4740 if (alg->alg_key_sizes[i] > alg->alg_maxbits) 4741 alg->alg_maxbits = alg->alg_key_sizes[i]; 4742 } 4743 } 4744 4745 if (!(alg->alg_flags & ALG_FLAG_VALID)) 4746 return; 4747 4748 /* 4749 * Mechanisms do not apply to the NULL encryption 4750 * algorithm, so simply return for this case. 4751 */ 4752 if (alg->alg_id == SADB_EALG_NULL) 4753 return; 4754 4755 /* 4756 * Find the min and max key sizes supported by the cryptographic 4757 * framework providers. 4758 */ 4759 4760 /* get the key sizes supported by the framework */ 4761 crypto_rc = crypto_get_all_mech_info(alg->alg_mech_type, 4762 &mech_infos, &nmech_infos, KM_SLEEP); 4763 if (crypto_rc != CRYPTO_SUCCESS || nmech_infos == 0) { 4764 alg->alg_flags &= ~ALG_FLAG_VALID; 4765 return; 4766 } 4767 4768 /* min and max key sizes supported by framework */ 4769 for (i = 0, is_valid = B_FALSE; i < nmech_infos; i++) { 4770 int unit_bits; 4771 4772 /* 4773 * Ignore entries that do not support the operations 4774 * needed for the algorithm type. 4775 */ 4776 if (alg_type == IPSEC_ALG_AUTH) { 4777 mask = CRYPTO_MECH_USAGE_MAC; 4778 } else { 4779 mask = CRYPTO_MECH_USAGE_ENCRYPT | 4780 CRYPTO_MECH_USAGE_DECRYPT; 4781 } 4782 if ((mech_infos[i].mi_usage & mask) != mask) 4783 continue; 4784 4785 unit_bits = (mech_infos[i].mi_keysize_unit == 4786 CRYPTO_KEYSIZE_UNIT_IN_BYTES) ? 8 : 1; 4787 /* adjust min/max supported by framework */ 4788 cur_crypto_min = mech_infos[i].mi_min_key_size * unit_bits; 4789 cur_crypto_max = mech_infos[i].mi_max_key_size * unit_bits; 4790 4791 if (cur_crypto_min < crypto_min) 4792 crypto_min = cur_crypto_min; 4793 4794 /* 4795 * CRYPTO_EFFECTIVELY_INFINITE is a special value of 4796 * the crypto framework which means "no upper limit". 4797 */ 4798 if (mech_infos[i].mi_max_key_size == 4799 CRYPTO_EFFECTIVELY_INFINITE) { 4800 crypto_max = (size_t)-1; 4801 } else if (cur_crypto_max > crypto_max) { 4802 crypto_max = cur_crypto_max; 4803 } 4804 4805 is_valid = B_TRUE; 4806 } 4807 4808 kmem_free(mech_infos, sizeof (crypto_mechanism_info_t) * 4809 nmech_infos); 4810 4811 if (!is_valid) { 4812 /* no key sizes supported by framework */ 4813 alg->alg_flags &= ~ALG_FLAG_VALID; 4814 return; 4815 } 4816 4817 /* 4818 * Determine min and max key sizes from alg_key_sizes[]. 4819 * defined for the algorithm entry. Adjust key sizes based on 4820 * those supported by the framework. 4821 */ 4822 alg->alg_ef_default_bits = alg->alg_key_sizes[0]; 4823 4824 /* 4825 * For backwards compatability, assume that the IV length 4826 * is the same as the data length. 4827 */ 4828 alg->alg_ivlen = alg->alg_datalen; 4829 4830 /* 4831 * Copy any algorithm parameters (if provided) into dedicated 4832 * elements in the ipsec_alginfo_t structure. 4833 * There may be a better place to put this code. 4834 */ 4835 for (i = 0; i < alg->alg_nparams; i++) { 4836 switch (i) { 4837 case 0: 4838 /* Initialisation Vector length (bytes) */ 4839 alg->alg_ivlen = alg->alg_params[0]; 4840 break; 4841 case 1: 4842 /* Integrity Check Vector length (bytes) */ 4843 alg->alg_icvlen = alg->alg_params[1]; 4844 break; 4845 case 2: 4846 /* Salt length (bytes) */ 4847 alg->alg_saltlen = (uint8_t)alg->alg_params[2]; 4848 break; 4849 default: 4850 break; 4851 } 4852 } 4853 4854 /* Default if the IV length is not specified. */ 4855 if (alg_type == IPSEC_ALG_ENCR && alg->alg_ivlen == 0) 4856 alg->alg_ivlen = alg->alg_datalen; 4857 4858 alg_flag_check(alg); 4859 4860 if (alg->alg_increment != 0) { 4861 /* supported key sizes are defined by range & increment */ 4862 crypto_min = ALGBITS_ROUND_UP(crypto_min, alg->alg_increment); 4863 crypto_max = ALGBITS_ROUND_DOWN(crypto_max, alg->alg_increment); 4864 4865 alg->alg_ef_minbits = MAX(alg->alg_minbits, 4866 (uint16_t)crypto_min); 4867 alg->alg_ef_maxbits = MIN(alg->alg_maxbits, 4868 (uint16_t)crypto_max); 4869 4870 /* 4871 * If the sizes supported by the framework are outside 4872 * the range of sizes defined by the algorithm mappings, 4873 * the algorithm cannot be used. Check for this 4874 * condition here. 4875 */ 4876 if (alg->alg_ef_minbits > alg->alg_ef_maxbits) { 4877 alg->alg_flags &= ~ALG_FLAG_VALID; 4878 return; 4879 } 4880 if (alg->alg_ef_default_bits < alg->alg_ef_minbits) 4881 alg->alg_ef_default_bits = alg->alg_ef_minbits; 4882 if (alg->alg_ef_default_bits > alg->alg_ef_maxbits) 4883 alg->alg_ef_default_bits = alg->alg_ef_maxbits; 4884 } else if (alg->alg_nkey_sizes == 0) { 4885 /* no specified key size for algorithm */ 4886 alg->alg_ef_minbits = alg->alg_ef_maxbits = 0; 4887 } else { 4888 /* supported key sizes are defined by enumeration */ 4889 alg->alg_ef_minbits = (uint16_t)-1; 4890 alg->alg_ef_maxbits = 0; 4891 4892 for (i = 0, is_valid = B_FALSE; i < alg->alg_nkey_sizes; i++) { 4893 /* 4894 * Ignore the current key size if it is not in the 4895 * range of sizes supported by the framework. 4896 */ 4897 if (alg->alg_key_sizes[i] < crypto_min || 4898 alg->alg_key_sizes[i] > crypto_max) 4899 continue; 4900 if (alg->alg_key_sizes[i] < alg->alg_ef_minbits) 4901 alg->alg_ef_minbits = alg->alg_key_sizes[i]; 4902 if (alg->alg_key_sizes[i] > alg->alg_ef_maxbits) 4903 alg->alg_ef_maxbits = alg->alg_key_sizes[i]; 4904 is_valid = B_TRUE; 4905 } 4906 4907 if (!is_valid) { 4908 alg->alg_flags &= ~ALG_FLAG_VALID; 4909 return; 4910 } 4911 alg->alg_ef_default = 0; 4912 } 4913 } 4914 4915 /* 4916 * Sanity check parameters provided by ipsecalgs(8). Assume that 4917 * the algoritm is marked as valid, there is a check at the top 4918 * of this function. If any of the checks below fail, the algorithm 4919 * entry is invalid. 4920 */ 4921 void 4922 alg_flag_check(ipsec_alginfo_t *alg) 4923 { 4924 alg->alg_flags &= ~ALG_FLAG_VALID; 4925 4926 /* 4927 * Can't have the algorithm marked as CCM and GCM. 4928 * Check the ALG_FLAG_COMBINED and ALG_FLAG_COUNTERMODE 4929 * flags are set for CCM & GCM. 4930 */ 4931 if ((alg->alg_flags & (ALG_FLAG_CCM|ALG_FLAG_GCM)) == 4932 (ALG_FLAG_CCM|ALG_FLAG_GCM)) 4933 return; 4934 if (alg->alg_flags & (ALG_FLAG_CCM|ALG_FLAG_GCM)) { 4935 if (!(alg->alg_flags & ALG_FLAG_COUNTERMODE)) 4936 return; 4937 if (!(alg->alg_flags & ALG_FLAG_COMBINED)) 4938 return; 4939 } 4940 4941 /* 4942 * For ALG_FLAG_COUNTERMODE, check the parameters 4943 * fit in the ipsec_nonce_t structure. 4944 */ 4945 if (alg->alg_flags & ALG_FLAG_COUNTERMODE) { 4946 if (alg->alg_ivlen != sizeof (((ipsec_nonce_t *)NULL)->iv)) 4947 return; 4948 if (alg->alg_saltlen > sizeof (((ipsec_nonce_t *)NULL)->salt)) 4949 return; 4950 } 4951 if ((alg->alg_flags & ALG_FLAG_COMBINED) && 4952 (alg->alg_icvlen == 0)) 4953 return; 4954 4955 /* all is well. */ 4956 alg->alg_flags |= ALG_FLAG_VALID; 4957 } 4958 4959 /* 4960 * Free the memory used by the specified algorithm. 4961 */ 4962 void 4963 ipsec_alg_free(ipsec_alginfo_t *alg) 4964 { 4965 if (alg == NULL) 4966 return; 4967 4968 if (alg->alg_key_sizes != NULL) { 4969 kmem_free(alg->alg_key_sizes, 4970 (alg->alg_nkey_sizes + 1) * sizeof (uint16_t)); 4971 alg->alg_key_sizes = NULL; 4972 } 4973 if (alg->alg_block_sizes != NULL) { 4974 kmem_free(alg->alg_block_sizes, 4975 (alg->alg_nblock_sizes + 1) * sizeof (uint16_t)); 4976 alg->alg_block_sizes = NULL; 4977 } 4978 if (alg->alg_params != NULL) { 4979 kmem_free(alg->alg_params, 4980 (alg->alg_nparams + 1) * sizeof (uint16_t)); 4981 alg->alg_params = NULL; 4982 } 4983 kmem_free(alg, sizeof (*alg)); 4984 } 4985 4986 /* 4987 * Check the validity of the specified key size for an algorithm. 4988 * Returns B_TRUE if key size is valid, B_FALSE otherwise. 4989 */ 4990 boolean_t 4991 ipsec_valid_key_size(uint16_t key_size, ipsec_alginfo_t *alg) 4992 { 4993 if (key_size < alg->alg_ef_minbits || key_size > alg->alg_ef_maxbits) 4994 return (B_FALSE); 4995 4996 if (alg->alg_increment == 0 && alg->alg_nkey_sizes != 0) { 4997 /* 4998 * If the key sizes are defined by enumeration, the new 4999 * key size must be equal to one of the supported values. 5000 */ 5001 int i; 5002 5003 for (i = 0; i < alg->alg_nkey_sizes; i++) 5004 if (key_size == alg->alg_key_sizes[i]) 5005 break; 5006 if (i == alg->alg_nkey_sizes) 5007 return (B_FALSE); 5008 } 5009 5010 return (B_TRUE); 5011 } 5012 5013 /* 5014 * Callback function invoked by the crypto framework when a provider 5015 * registers or unregisters. This callback updates the algorithms 5016 * tables when a crypto algorithm is no longer available or becomes 5017 * available, and triggers the freeing/creation of context templates 5018 * associated with existing SAs, if needed. 5019 * 5020 * Need to walk all stack instances since the callback is global 5021 * for all instances 5022 */ 5023 void 5024 ipsec_prov_update_callback(uint32_t event, void *event_arg) 5025 { 5026 netstack_handle_t nh; 5027 netstack_t *ns; 5028 5029 netstack_next_init(&nh); 5030 while ((ns = netstack_next(&nh)) != NULL) { 5031 ipsec_prov_update_callback_stack(event, event_arg, ns); 5032 netstack_rele(ns); 5033 } 5034 netstack_next_fini(&nh); 5035 } 5036 5037 static void 5038 ipsec_prov_update_callback_stack(uint32_t event, void *event_arg, 5039 netstack_t *ns) 5040 { 5041 crypto_notify_event_change_t *prov_change = 5042 (crypto_notify_event_change_t *)event_arg; 5043 uint_t algidx, algid, algtype, mech_count, mech_idx; 5044 ipsec_alginfo_t *alg; 5045 ipsec_alginfo_t oalg; 5046 crypto_mech_name_t *mechs; 5047 boolean_t alg_changed = B_FALSE; 5048 ipsec_stack_t *ipss = ns->netstack_ipsec; 5049 5050 /* ignore events for which we didn't register */ 5051 if (event != CRYPTO_EVENT_MECHS_CHANGED) { 5052 ip1dbg(("ipsec_prov_update_callback: unexpected event 0x%x " 5053 " received from crypto framework\n", event)); 5054 return; 5055 } 5056 5057 mechs = crypto_get_mech_list(&mech_count, KM_SLEEP); 5058 if (mechs == NULL) 5059 return; 5060 5061 /* 5062 * Walk the list of currently defined IPsec algorithm. Update 5063 * the algorithm valid flag and trigger an update of the 5064 * SAs that depend on that algorithm. 5065 */ 5066 rw_enter(&ipss->ipsec_alg_lock, RW_WRITER); 5067 for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype++) { 5068 for (algidx = 0; algidx < ipss->ipsec_nalgs[algtype]; 5069 algidx++) { 5070 5071 algid = ipss->ipsec_sortlist[algtype][algidx]; 5072 alg = ipss->ipsec_alglists[algtype][algid]; 5073 ASSERT(alg != NULL); 5074 5075 /* 5076 * Skip the algorithms which do not map to the 5077 * crypto framework provider being added or removed. 5078 */ 5079 if (strncmp(alg->alg_mech_name, 5080 prov_change->ec_mech_name, 5081 CRYPTO_MAX_MECH_NAME) != 0) 5082 continue; 5083 5084 /* 5085 * Determine if the mechanism is valid. If it 5086 * is not, mark the algorithm as being invalid. If 5087 * it is, mark the algorithm as being valid. 5088 */ 5089 for (mech_idx = 0; mech_idx < mech_count; mech_idx++) 5090 if (strncmp(alg->alg_mech_name, 5091 mechs[mech_idx], CRYPTO_MAX_MECH_NAME) == 0) 5092 break; 5093 if (mech_idx == mech_count && 5094 alg->alg_flags & ALG_FLAG_VALID) { 5095 alg->alg_flags &= ~ALG_FLAG_VALID; 5096 alg_changed = B_TRUE; 5097 } else if (mech_idx < mech_count && 5098 !(alg->alg_flags & ALG_FLAG_VALID)) { 5099 alg->alg_flags |= ALG_FLAG_VALID; 5100 alg_changed = B_TRUE; 5101 } 5102 5103 /* 5104 * Update the supported key sizes, regardless 5105 * of whether a crypto provider was added or 5106 * removed. 5107 */ 5108 oalg = *alg; 5109 ipsec_alg_fix_min_max(alg, algtype, ns); 5110 if (!alg_changed && 5111 alg->alg_ef_minbits != oalg.alg_ef_minbits || 5112 alg->alg_ef_maxbits != oalg.alg_ef_maxbits || 5113 alg->alg_ef_default != oalg.alg_ef_default || 5114 alg->alg_ef_default_bits != 5115 oalg.alg_ef_default_bits) 5116 alg_changed = B_TRUE; 5117 5118 /* 5119 * Update the affected SAs if a software provider is 5120 * being added or removed. 5121 */ 5122 if (prov_change->ec_provider_type == 5123 CRYPTO_SW_PROVIDER) 5124 sadb_alg_update(algtype, alg->alg_id, 5125 prov_change->ec_change == 5126 CRYPTO_MECH_ADDED, ns); 5127 } 5128 } 5129 rw_exit(&ipss->ipsec_alg_lock); 5130 crypto_free_mech_list(mechs, mech_count); 5131 5132 if (alg_changed) { 5133 /* 5134 * An algorithm has changed, i.e. it became valid or 5135 * invalid, or its support key sizes have changed. 5136 * Notify ipsecah and ipsecesp of this change so 5137 * that they can send a SADB_REGISTER to their consumers. 5138 */ 5139 ipsecah_algs_changed(ns); 5140 ipsecesp_algs_changed(ns); 5141 } 5142 } 5143 5144 /* 5145 * Registers with the crypto framework to be notified of crypto 5146 * providers changes. Used to update the algorithm tables and 5147 * to free or create context templates if needed. Invoked after IPsec 5148 * is loaded successfully. 5149 * 5150 * This is called separately for each IP instance, so we ensure we only 5151 * register once. 5152 */ 5153 void 5154 ipsec_register_prov_update(void) 5155 { 5156 if (prov_update_handle != NULL) 5157 return; 5158 5159 prov_update_handle = crypto_notify_events( 5160 ipsec_prov_update_callback, CRYPTO_EVENT_MECHS_CHANGED); 5161 } 5162 5163 /* 5164 * Unregisters from the framework to be notified of crypto providers 5165 * changes. Called from ipsec_policy_g_destroy(). 5166 */ 5167 static void 5168 ipsec_unregister_prov_update(void) 5169 { 5170 if (prov_update_handle != NULL) 5171 crypto_unnotify_events(prov_update_handle); 5172 } 5173 5174 /* 5175 * Tunnel-mode support routines. 5176 */ 5177 5178 /* 5179 * Returns an mblk chain suitable for putnext() if policies match and IPsec 5180 * SAs are available. If there's no per-tunnel policy, or a match comes back 5181 * with no match, then still return the packet and have global policy take 5182 * a crack at it in IP. 5183 * This updates the ip_xmit_attr with the IPsec policy. 5184 * 5185 * Remember -> we can be forwarding packets. Keep that in mind w.r.t. 5186 * inner-packet contents. 5187 */ 5188 mblk_t * 5189 ipsec_tun_outbound(mblk_t *mp, iptun_t *iptun, ipha_t *inner_ipv4, 5190 ip6_t *inner_ipv6, ipha_t *outer_ipv4, ip6_t *outer_ipv6, int outer_hdr_len, 5191 ip_xmit_attr_t *ixa) 5192 { 5193 ipsec_policy_head_t *polhead; 5194 ipsec_selector_t sel; 5195 mblk_t *nmp; 5196 boolean_t is_fragment; 5197 ipsec_policy_t *pol; 5198 ipsec_tun_pol_t *itp = iptun->iptun_itp; 5199 netstack_t *ns = iptun->iptun_ns; 5200 ipsec_stack_t *ipss = ns->netstack_ipsec; 5201 5202 ASSERT(outer_ipv6 != NULL && outer_ipv4 == NULL || 5203 outer_ipv4 != NULL && outer_ipv6 == NULL); 5204 /* We take care of inners in a bit. */ 5205 5206 /* Are the IPsec fields initialized at all? */ 5207 if (!(ixa->ixa_flags & IXAF_IPSEC_SECURE)) { 5208 ASSERT(ixa->ixa_ipsec_policy == NULL); 5209 ASSERT(ixa->ixa_ipsec_latch == NULL); 5210 ASSERT(ixa->ixa_ipsec_action == NULL); 5211 ASSERT(ixa->ixa_ipsec_ah_sa == NULL); 5212 ASSERT(ixa->ixa_ipsec_esp_sa == NULL); 5213 } 5214 5215 ASSERT(itp != NULL && (itp->itp_flags & ITPF_P_ACTIVE)); 5216 polhead = itp->itp_policy; 5217 5218 bzero(&sel, sizeof (sel)); 5219 if (inner_ipv4 != NULL) { 5220 ASSERT(inner_ipv6 == NULL); 5221 sel.ips_isv4 = B_TRUE; 5222 sel.ips_local_addr_v4 = inner_ipv4->ipha_src; 5223 sel.ips_remote_addr_v4 = inner_ipv4->ipha_dst; 5224 sel.ips_protocol = (uint8_t)inner_ipv4->ipha_protocol; 5225 } else { 5226 ASSERT(inner_ipv6 != NULL); 5227 sel.ips_isv4 = B_FALSE; 5228 sel.ips_local_addr_v6 = inner_ipv6->ip6_src; 5229 /* 5230 * We don't care about routing-header dests in the 5231 * forwarding/tunnel path, so just grab ip6_dst. 5232 */ 5233 sel.ips_remote_addr_v6 = inner_ipv6->ip6_dst; 5234 } 5235 5236 if (itp->itp_flags & ITPF_P_PER_PORT_SECURITY) { 5237 /* 5238 * Caller can prepend the outer header, which means 5239 * inner_ipv[46] may be stuck in the middle. Pullup the whole 5240 * mess now if need-be, for easier processing later. Don't 5241 * forget to rewire the outer header too. 5242 */ 5243 if (mp->b_cont != NULL) { 5244 nmp = msgpullup(mp, -1); 5245 if (nmp == NULL) { 5246 ip_drop_packet(mp, B_FALSE, NULL, 5247 DROPPER(ipss, ipds_spd_nomem), 5248 &ipss->ipsec_spd_dropper); 5249 return (NULL); 5250 } 5251 freemsg(mp); 5252 mp = nmp; 5253 if (outer_ipv4 != NULL) 5254 outer_ipv4 = (ipha_t *)mp->b_rptr; 5255 else 5256 outer_ipv6 = (ip6_t *)mp->b_rptr; 5257 if (inner_ipv4 != NULL) { 5258 inner_ipv4 = 5259 (ipha_t *)(mp->b_rptr + outer_hdr_len); 5260 } else { 5261 inner_ipv6 = 5262 (ip6_t *)(mp->b_rptr + outer_hdr_len); 5263 } 5264 } 5265 if (inner_ipv4 != NULL) { 5266 is_fragment = IS_V4_FRAGMENT( 5267 inner_ipv4->ipha_fragment_offset_and_flags); 5268 } else { 5269 sel.ips_remote_addr_v6 = ip_get_dst_v6(inner_ipv6, mp, 5270 &is_fragment); 5271 } 5272 5273 if (is_fragment) { 5274 ipha_t *oiph; 5275 ipha_t *iph = NULL; 5276 ip6_t *ip6h = NULL; 5277 int hdr_len; 5278 uint16_t ip6_hdr_length; 5279 uint8_t v6_proto; 5280 uint8_t *v6_proto_p; 5281 5282 /* 5283 * We have a fragment we need to track! 5284 */ 5285 mp = ipsec_fragcache_add(&itp->itp_fragcache, NULL, mp, 5286 outer_hdr_len, ipss); 5287 if (mp == NULL) 5288 return (NULL); 5289 ASSERT(mp->b_cont == NULL); 5290 5291 /* 5292 * If we get here, we have a full fragment chain 5293 */ 5294 5295 oiph = (ipha_t *)mp->b_rptr; 5296 if (IPH_HDR_VERSION(oiph) == IPV4_VERSION) { 5297 hdr_len = ((outer_hdr_len != 0) ? 5298 IPH_HDR_LENGTH(oiph) : 0); 5299 iph = (ipha_t *)(mp->b_rptr + hdr_len); 5300 } else { 5301 ASSERT(IPH_HDR_VERSION(oiph) == IPV6_VERSION); 5302 ip6h = (ip6_t *)mp->b_rptr; 5303 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 5304 &ip6_hdr_length, &v6_proto_p)) { 5305 ip_drop_packet_chain(mp, B_FALSE, NULL, 5306 DROPPER(ipss, 5307 ipds_spd_malformed_packet), 5308 &ipss->ipsec_spd_dropper); 5309 return (NULL); 5310 } 5311 hdr_len = ip6_hdr_length; 5312 } 5313 outer_hdr_len = hdr_len; 5314 5315 if (sel.ips_isv4) { 5316 if (iph == NULL) { 5317 /* Was v6 outer */ 5318 iph = (ipha_t *)(mp->b_rptr + hdr_len); 5319 } 5320 inner_ipv4 = iph; 5321 sel.ips_local_addr_v4 = inner_ipv4->ipha_src; 5322 sel.ips_remote_addr_v4 = inner_ipv4->ipha_dst; 5323 sel.ips_protocol = 5324 (uint8_t)inner_ipv4->ipha_protocol; 5325 } else { 5326 inner_ipv6 = (ip6_t *)(mp->b_rptr + 5327 hdr_len); 5328 sel.ips_local_addr_v6 = inner_ipv6->ip6_src; 5329 sel.ips_remote_addr_v6 = inner_ipv6->ip6_dst; 5330 if (!ip_hdr_length_nexthdr_v6(mp, 5331 inner_ipv6, &ip6_hdr_length, &v6_proto_p)) { 5332 ip_drop_packet_chain(mp, B_FALSE, NULL, 5333 DROPPER(ipss, 5334 ipds_spd_malformed_frag), 5335 &ipss->ipsec_spd_dropper); 5336 return (NULL); 5337 } 5338 v6_proto = *v6_proto_p; 5339 sel.ips_protocol = v6_proto; 5340 #ifdef FRAGCACHE_DEBUG 5341 cmn_err(CE_WARN, "v6_sel.ips_protocol = %d\n", 5342 sel.ips_protocol); 5343 #endif 5344 } 5345 /* Ports are extracted below */ 5346 } 5347 5348 /* Get ports... */ 5349 if (!ipsec_init_outbound_ports(&sel, mp, 5350 inner_ipv4, inner_ipv6, outer_hdr_len, ipss)) { 5351 /* callee did ip_drop_packet_chain() on mp. */ 5352 return (NULL); 5353 } 5354 #ifdef FRAGCACHE_DEBUG 5355 if (inner_ipv4 != NULL) 5356 cmn_err(CE_WARN, 5357 "(v4) sel.ips_protocol = %d, " 5358 "sel.ips_local_port = %d, " 5359 "sel.ips_remote_port = %d\n", 5360 sel.ips_protocol, ntohs(sel.ips_local_port), 5361 ntohs(sel.ips_remote_port)); 5362 if (inner_ipv6 != NULL) 5363 cmn_err(CE_WARN, 5364 "(v6) sel.ips_protocol = %d, " 5365 "sel.ips_local_port = %d, " 5366 "sel.ips_remote_port = %d\n", 5367 sel.ips_protocol, ntohs(sel.ips_local_port), 5368 ntohs(sel.ips_remote_port)); 5369 #endif 5370 /* Success so far! */ 5371 } 5372 rw_enter(&polhead->iph_lock, RW_READER); 5373 pol = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_OUTBOUND, &sel); 5374 rw_exit(&polhead->iph_lock); 5375 if (pol == NULL) { 5376 /* 5377 * No matching policy on this tunnel, drop the packet. 5378 * 5379 * NOTE: Tunnel-mode tunnels are different from the 5380 * IP global transport mode policy head. For a tunnel-mode 5381 * tunnel, we drop the packet in lieu of passing it 5382 * along accepted the way a global-policy miss would. 5383 * 5384 * NOTE2: "negotiate transport" tunnels should match ALL 5385 * inbound packets, but we do not uncomment the ASSERT() 5386 * below because if/when we open PF_POLICY, a user can 5387 * shoot themself in the foot with a 0 priority. 5388 */ 5389 5390 /* ASSERT(itp->itp_flags & ITPF_P_TUNNEL); */ 5391 #ifdef FRAGCACHE_DEBUG 5392 cmn_err(CE_WARN, "ipsec_tun_outbound(): No matching tunnel " 5393 "per-port policy\n"); 5394 #endif 5395 ip_drop_packet_chain(mp, B_FALSE, NULL, 5396 DROPPER(ipss, ipds_spd_explicit), 5397 &ipss->ipsec_spd_dropper); 5398 return (NULL); 5399 } 5400 5401 #ifdef FRAGCACHE_DEBUG 5402 cmn_err(CE_WARN, "Having matching tunnel per-port policy\n"); 5403 #endif 5404 5405 /* 5406 * NOTE: ixa_cleanup() function will release pol references. 5407 */ 5408 ixa->ixa_ipsec_policy = pol; 5409 /* 5410 * NOTE: There is a subtle difference between iptun_zoneid and 5411 * iptun_connp->conn_zoneid explained in iptun_conn_create(). When 5412 * interacting with the ip module, we must use conn_zoneid. 5413 */ 5414 ixa->ixa_zoneid = iptun->iptun_connp->conn_zoneid; 5415 5416 ASSERT((outer_ipv4 != NULL) ? (ixa->ixa_flags & IXAF_IS_IPV4) : 5417 !(ixa->ixa_flags & IXAF_IS_IPV4)); 5418 ASSERT(ixa->ixa_ipsec_policy != NULL); 5419 ixa->ixa_flags |= IXAF_IPSEC_SECURE; 5420 5421 if (!(itp->itp_flags & ITPF_P_TUNNEL)) { 5422 /* Set up transport mode for tunnelled packets. */ 5423 ixa->ixa_ipsec_proto = (inner_ipv4 != NULL) ? IPPROTO_ENCAP : 5424 IPPROTO_IPV6; 5425 return (mp); 5426 } 5427 5428 /* Fill in tunnel-mode goodies here. */ 5429 ixa->ixa_flags |= IXAF_IPSEC_TUNNEL; 5430 /* XXX Do I need to fill in all of the goodies here? */ 5431 if (inner_ipv4) { 5432 ixa->ixa_ipsec_inaf = AF_INET; 5433 ixa->ixa_ipsec_insrc[0] = 5434 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v4; 5435 ixa->ixa_ipsec_indst[0] = 5436 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v4; 5437 } else { 5438 ixa->ixa_ipsec_inaf = AF_INET6; 5439 ixa->ixa_ipsec_insrc[0] = 5440 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[0]; 5441 ixa->ixa_ipsec_insrc[1] = 5442 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[1]; 5443 ixa->ixa_ipsec_insrc[2] = 5444 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[2]; 5445 ixa->ixa_ipsec_insrc[3] = 5446 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[3]; 5447 ixa->ixa_ipsec_indst[0] = 5448 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[0]; 5449 ixa->ixa_ipsec_indst[1] = 5450 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[1]; 5451 ixa->ixa_ipsec_indst[2] = 5452 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[2]; 5453 ixa->ixa_ipsec_indst[3] = 5454 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[3]; 5455 } 5456 ixa->ixa_ipsec_insrcpfx = pol->ipsp_sel->ipsl_key.ipsl_local_pfxlen; 5457 ixa->ixa_ipsec_indstpfx = pol->ipsp_sel->ipsl_key.ipsl_remote_pfxlen; 5458 /* NOTE: These are used for transport mode too. */ 5459 ixa->ixa_ipsec_src_port = pol->ipsp_sel->ipsl_key.ipsl_lport; 5460 ixa->ixa_ipsec_dst_port = pol->ipsp_sel->ipsl_key.ipsl_rport; 5461 ixa->ixa_ipsec_proto = pol->ipsp_sel->ipsl_key.ipsl_proto; 5462 5463 return (mp); 5464 } 5465 5466 /* 5467 * NOTE: The following releases pol's reference and 5468 * calls ip_drop_packet() for me on NULL returns. 5469 */ 5470 mblk_t * 5471 ipsec_check_ipsecin_policy_reasm(mblk_t *attr_mp, ipsec_policy_t *pol, 5472 ipha_t *inner_ipv4, ip6_t *inner_ipv6, uint64_t pkt_unique, netstack_t *ns) 5473 { 5474 /* Assume attr_mp is a chain of b_next-linked ip_recv_attr mblk. */ 5475 mblk_t *data_chain = NULL, *data_tail = NULL; 5476 mblk_t *next; 5477 mblk_t *data_mp; 5478 ip_recv_attr_t iras; 5479 5480 while (attr_mp != NULL) { 5481 ASSERT(ip_recv_attr_is_mblk(attr_mp)); 5482 next = attr_mp->b_next; 5483 attr_mp->b_next = NULL; /* No tripping asserts. */ 5484 5485 data_mp = attr_mp->b_cont; 5486 attr_mp->b_cont = NULL; 5487 if (!ip_recv_attr_from_mblk(attr_mp, &iras)) { 5488 /* The ill or ip_stack_t disappeared on us */ 5489 freemsg(data_mp); /* ip_drop_packet?? */ 5490 ira_cleanup(&iras, B_TRUE); 5491 goto fail; 5492 } 5493 5494 /* 5495 * Need IPPOL_REFHOLD(pol) for extras because 5496 * ipsecin_policy does the refrele. 5497 */ 5498 IPPOL_REFHOLD(pol); 5499 5500 data_mp = ipsec_check_ipsecin_policy(data_mp, pol, inner_ipv4, 5501 inner_ipv6, pkt_unique, &iras, ns); 5502 ira_cleanup(&iras, B_TRUE); 5503 5504 if (data_mp == NULL) 5505 goto fail; 5506 5507 if (data_tail == NULL) { 5508 /* First one */ 5509 data_chain = data_tail = data_mp; 5510 } else { 5511 data_tail->b_next = data_mp; 5512 data_tail = data_mp; 5513 } 5514 attr_mp = next; 5515 } 5516 /* 5517 * One last release because either the loop bumped it up, or we never 5518 * called ipsec_check_ipsecin_policy(). 5519 */ 5520 IPPOL_REFRELE(pol); 5521 5522 /* data_chain is ready for return to tun module. */ 5523 return (data_chain); 5524 5525 fail: 5526 /* 5527 * Need to get rid of any extra pol 5528 * references, and any remaining bits as well. 5529 */ 5530 IPPOL_REFRELE(pol); 5531 ipsec_freemsg_chain(data_chain); 5532 ipsec_freemsg_chain(next); /* ipdrop stats? */ 5533 return (NULL); 5534 } 5535 5536 /* 5537 * Return a message if the inbound packet passed an IPsec policy check. Returns 5538 * NULL if it failed or if it is a fragment needing its friends before a 5539 * policy check can be performed. 5540 * 5541 * Expects a non-NULL data_mp, and a non-NULL polhead. 5542 * The returned mblk may be a b_next chain of packets if fragments 5543 * neeeded to be collected for a proper policy check. 5544 * 5545 * This function calls ip_drop_packet() on data_mp if need be. 5546 * 5547 * NOTE: outer_hdr_len is signed. If it's a negative value, the caller 5548 * is inspecting an ICMP packet. 5549 */ 5550 mblk_t * 5551 ipsec_tun_inbound(ip_recv_attr_t *ira, mblk_t *data_mp, ipsec_tun_pol_t *itp, 5552 ipha_t *inner_ipv4, ip6_t *inner_ipv6, ipha_t *outer_ipv4, 5553 ip6_t *outer_ipv6, int outer_hdr_len, netstack_t *ns) 5554 { 5555 ipsec_policy_head_t *polhead; 5556 ipsec_selector_t sel; 5557 ipsec_policy_t *pol; 5558 uint16_t tmpport; 5559 selret_t rc; 5560 boolean_t port_policy_present, is_icmp, global_present; 5561 in6_addr_t tmpaddr; 5562 ipaddr_t tmp4; 5563 uint8_t flags, *inner_hdr; 5564 ipsec_stack_t *ipss = ns->netstack_ipsec; 5565 5566 sel.ips_is_icmp_inv_acq = 0; 5567 5568 if (outer_ipv4 != NULL) { 5569 ASSERT(outer_ipv6 == NULL); 5570 global_present = ipss->ipsec_inbound_v4_policy_present; 5571 } else { 5572 ASSERT(outer_ipv6 != NULL); 5573 global_present = ipss->ipsec_inbound_v6_policy_present; 5574 } 5575 5576 ASSERT(inner_ipv4 != NULL && inner_ipv6 == NULL || 5577 inner_ipv4 == NULL && inner_ipv6 != NULL); 5578 5579 if (outer_hdr_len < 0) { 5580 outer_hdr_len = (-outer_hdr_len); 5581 is_icmp = B_TRUE; 5582 } else { 5583 is_icmp = B_FALSE; 5584 } 5585 5586 if (itp != NULL && (itp->itp_flags & ITPF_P_ACTIVE)) { 5587 mblk_t *mp = data_mp; 5588 5589 polhead = itp->itp_policy; 5590 /* 5591 * We need to perform full Tunnel-Mode enforcement, 5592 * and we need to have inner-header data for such enforcement. 5593 * 5594 * See ipsec_init_inbound_sel() for the 0x80000000 on inbound 5595 * and on return. 5596 */ 5597 5598 port_policy_present = ((itp->itp_flags & 5599 ITPF_P_PER_PORT_SECURITY) ? B_TRUE : B_FALSE); 5600 /* 5601 * NOTE: Even if our policy is transport mode, set the 5602 * SEL_TUNNEL_MODE flag so ipsec_init_inbound_sel() can 5603 * do the right thing w.r.t. outer headers. 5604 */ 5605 flags = ((port_policy_present ? SEL_PORT_POLICY : SEL_NONE) | 5606 (is_icmp ? SEL_IS_ICMP : SEL_NONE) | SEL_TUNNEL_MODE); 5607 5608 rc = ipsec_init_inbound_sel(&sel, data_mp, inner_ipv4, 5609 inner_ipv6, flags); 5610 5611 switch (rc) { 5612 case SELRET_NOMEM: 5613 ip_drop_packet(data_mp, B_TRUE, NULL, 5614 DROPPER(ipss, ipds_spd_nomem), 5615 &ipss->ipsec_spd_dropper); 5616 return (NULL); 5617 case SELRET_TUNFRAG: 5618 /* 5619 * At this point, if we're cleartext, we don't want 5620 * to go there. 5621 */ 5622 if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) { 5623 ip_drop_packet(data_mp, B_TRUE, NULL, 5624 DROPPER(ipss, ipds_spd_got_clear), 5625 &ipss->ipsec_spd_dropper); 5626 return (NULL); 5627 } 5628 5629 /* 5630 * Inner and outer headers may not be contiguous. 5631 * Pullup the data_mp now to satisfy assumptions of 5632 * ipsec_fragcache_add() 5633 */ 5634 if (data_mp->b_cont != NULL) { 5635 mblk_t *nmp; 5636 5637 nmp = msgpullup(data_mp, -1); 5638 if (nmp == NULL) { 5639 ip_drop_packet(data_mp, B_TRUE, NULL, 5640 DROPPER(ipss, ipds_spd_nomem), 5641 &ipss->ipsec_spd_dropper); 5642 return (NULL); 5643 } 5644 freemsg(data_mp); 5645 data_mp = nmp; 5646 if (outer_ipv4 != NULL) 5647 outer_ipv4 = 5648 (ipha_t *)data_mp->b_rptr; 5649 else 5650 outer_ipv6 = 5651 (ip6_t *)data_mp->b_rptr; 5652 if (inner_ipv4 != NULL) { 5653 inner_ipv4 = 5654 (ipha_t *)(data_mp->b_rptr + 5655 outer_hdr_len); 5656 } else { 5657 inner_ipv6 = 5658 (ip6_t *)(data_mp->b_rptr + 5659 outer_hdr_len); 5660 } 5661 } 5662 5663 /* 5664 * If we need to queue the packet. First we 5665 * get an mblk with the attributes. ipsec_fragcache_add 5666 * will prepend that to the queued data and return 5667 * a list of b_next messages each of which starts with 5668 * the attribute mblk. 5669 */ 5670 mp = ip_recv_attr_to_mblk(ira); 5671 if (mp == NULL) { 5672 ip_drop_packet(data_mp, B_TRUE, NULL, 5673 DROPPER(ipss, ipds_spd_nomem), 5674 &ipss->ipsec_spd_dropper); 5675 return (NULL); 5676 } 5677 5678 mp = ipsec_fragcache_add(&itp->itp_fragcache, 5679 mp, data_mp, outer_hdr_len, ipss); 5680 5681 if (mp == NULL) { 5682 /* 5683 * Data is cached, fragment chain is not 5684 * complete. 5685 */ 5686 return (NULL); 5687 } 5688 5689 /* 5690 * If we get here, we have a full fragment chain. 5691 * Reacquire headers and selectors from first fragment. 5692 */ 5693 ASSERT(ip_recv_attr_is_mblk(mp)); 5694 data_mp = mp->b_cont; 5695 inner_hdr = data_mp->b_rptr; 5696 if (outer_ipv4 != NULL) { 5697 inner_hdr += IPH_HDR_LENGTH( 5698 (ipha_t *)data_mp->b_rptr); 5699 } else { 5700 inner_hdr += ip_hdr_length_v6(data_mp, 5701 (ip6_t *)data_mp->b_rptr); 5702 } 5703 ASSERT(inner_hdr <= data_mp->b_wptr); 5704 5705 if (inner_ipv4 != NULL) { 5706 inner_ipv4 = (ipha_t *)inner_hdr; 5707 inner_ipv6 = NULL; 5708 } else { 5709 inner_ipv6 = (ip6_t *)inner_hdr; 5710 inner_ipv4 = NULL; 5711 } 5712 5713 /* 5714 * Use SEL_TUNNEL_MODE to take into account the outer 5715 * header. Use SEL_POST_FRAG so we always get ports. 5716 */ 5717 rc = ipsec_init_inbound_sel(&sel, data_mp, 5718 inner_ipv4, inner_ipv6, 5719 SEL_TUNNEL_MODE | SEL_POST_FRAG); 5720 switch (rc) { 5721 case SELRET_SUCCESS: 5722 /* 5723 * Get to same place as first caller's 5724 * SELRET_SUCCESS case. 5725 */ 5726 break; 5727 case SELRET_NOMEM: 5728 ip_drop_packet_chain(mp, B_TRUE, NULL, 5729 DROPPER(ipss, ipds_spd_nomem), 5730 &ipss->ipsec_spd_dropper); 5731 return (NULL); 5732 case SELRET_BADPKT: 5733 ip_drop_packet_chain(mp, B_TRUE, NULL, 5734 DROPPER(ipss, ipds_spd_malformed_frag), 5735 &ipss->ipsec_spd_dropper); 5736 return (NULL); 5737 case SELRET_TUNFRAG: 5738 cmn_err(CE_WARN, "(TUNFRAG on 2nd call...)"); 5739 /* FALLTHRU */ 5740 default: 5741 cmn_err(CE_WARN, "ipsec_init_inbound_sel(mark2)" 5742 " returns bizarro 0x%x", rc); 5743 /* Guaranteed panic! */ 5744 ASSERT(rc == SELRET_NOMEM); 5745 return (NULL); 5746 } 5747 /* FALLTHRU */ 5748 case SELRET_SUCCESS: 5749 /* 5750 * Common case: 5751 * No per-port policy or a non-fragment. Keep going. 5752 */ 5753 break; 5754 case SELRET_BADPKT: 5755 /* 5756 * We may receive ICMP (with IPv6 inner) packets that 5757 * trigger this return value. Send 'em in for 5758 * enforcement checking. 5759 */ 5760 cmn_err(CE_NOTE, "ipsec_tun_inbound(): " 5761 "sending 'bad packet' in for enforcement"); 5762 break; 5763 default: 5764 cmn_err(CE_WARN, 5765 "ipsec_init_inbound_sel() returns bizarro 0x%x", 5766 rc); 5767 ASSERT(rc == SELRET_NOMEM); /* Guaranteed panic! */ 5768 return (NULL); 5769 } 5770 5771 if (is_icmp) { 5772 /* 5773 * Swap local/remote because this is an ICMP packet. 5774 */ 5775 tmpaddr = sel.ips_local_addr_v6; 5776 sel.ips_local_addr_v6 = sel.ips_remote_addr_v6; 5777 sel.ips_remote_addr_v6 = tmpaddr; 5778 tmpport = sel.ips_local_port; 5779 sel.ips_local_port = sel.ips_remote_port; 5780 sel.ips_remote_port = tmpport; 5781 } 5782 5783 /* find_policy_head() */ 5784 rw_enter(&polhead->iph_lock, RW_READER); 5785 pol = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_INBOUND, 5786 &sel); 5787 rw_exit(&polhead->iph_lock); 5788 if (pol != NULL) { 5789 uint64_t pkt_unique; 5790 5791 if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) { 5792 if (!pol->ipsp_act->ipa_allow_clear) { 5793 /* 5794 * XXX should never get here with 5795 * tunnel reassembled fragments? 5796 */ 5797 ASSERT(mp == data_mp); 5798 ip_drop_packet(data_mp, B_TRUE, NULL, 5799 DROPPER(ipss, ipds_spd_got_clear), 5800 &ipss->ipsec_spd_dropper); 5801 IPPOL_REFRELE(pol); 5802 return (NULL); 5803 } else { 5804 IPPOL_REFRELE(pol); 5805 return (mp); 5806 } 5807 } 5808 pkt_unique = SA_UNIQUE_ID(sel.ips_remote_port, 5809 sel.ips_local_port, 5810 (inner_ipv4 == NULL) ? IPPROTO_IPV6 : 5811 IPPROTO_ENCAP, sel.ips_protocol); 5812 5813 /* 5814 * NOTE: The following releases pol's reference and 5815 * calls ip_drop_packet() for me on NULL returns. 5816 * 5817 * "sel" is still good here, so let's use it! 5818 */ 5819 if (data_mp == mp) { 5820 /* A single packet without attributes */ 5821 data_mp = ipsec_check_ipsecin_policy(data_mp, 5822 pol, inner_ipv4, inner_ipv6, pkt_unique, 5823 ira, ns); 5824 } else { 5825 /* 5826 * We pass in the b_next chain of attr_mp's 5827 * and get back a b_next chain of data_mp's. 5828 */ 5829 data_mp = ipsec_check_ipsecin_policy_reasm(mp, 5830 pol, inner_ipv4, inner_ipv6, pkt_unique, 5831 ns); 5832 } 5833 return (data_mp); 5834 } 5835 5836 /* 5837 * Else fallthru and check the global policy on the outer 5838 * header(s) if this tunnel is an old-style transport-mode 5839 * one. Drop the packet explicitly (no policy entry) for 5840 * a new-style tunnel-mode tunnel. 5841 */ 5842 if ((itp->itp_flags & ITPF_P_TUNNEL) && !is_icmp) { 5843 ip_drop_packet_chain(data_mp, B_TRUE, NULL, 5844 DROPPER(ipss, ipds_spd_explicit), 5845 &ipss->ipsec_spd_dropper); 5846 return (NULL); 5847 } 5848 } 5849 5850 /* 5851 * NOTE: If we reach here, we will not have packet chains from 5852 * fragcache_add(), because the only way I get chains is on a 5853 * tunnel-mode tunnel, which either returns with a pass, or gets 5854 * hit by the ip_drop_packet_chain() call right above here. 5855 */ 5856 ASSERT(data_mp->b_next == NULL); 5857 5858 /* If no per-tunnel security, check global policy now. */ 5859 if ((ira->ira_flags & IRAF_IPSEC_SECURE) && !global_present) { 5860 if (ira->ira_flags & IRAF_TRUSTED_ICMP) { 5861 /* 5862 * This is an ICMP message that was geenrated locally. 5863 * We should accept it. 5864 */ 5865 return (data_mp); 5866 } 5867 5868 ip_drop_packet(data_mp, B_TRUE, NULL, 5869 DROPPER(ipss, ipds_spd_got_secure), 5870 &ipss->ipsec_spd_dropper); 5871 return (NULL); 5872 } 5873 5874 if (is_icmp) { 5875 /* 5876 * For ICMP packets, "outer_ipvN" is set to the outer header 5877 * that is *INSIDE* the ICMP payload. For global policy 5878 * checking, we need to reverse src/dst on the payload in 5879 * order to construct selectors appropriately. See "ripha" 5880 * constructions in ip.c. To avoid a bug like 6478464 (see 5881 * earlier in this file), we will actually exchange src/dst 5882 * in the packet, and reverse if after the call to 5883 * ipsec_check_global_policy(). 5884 */ 5885 if (outer_ipv4 != NULL) { 5886 tmp4 = outer_ipv4->ipha_src; 5887 outer_ipv4->ipha_src = outer_ipv4->ipha_dst; 5888 outer_ipv4->ipha_dst = tmp4; 5889 } else { 5890 ASSERT(outer_ipv6 != NULL); 5891 tmpaddr = outer_ipv6->ip6_src; 5892 outer_ipv6->ip6_src = outer_ipv6->ip6_dst; 5893 outer_ipv6->ip6_dst = tmpaddr; 5894 } 5895 } 5896 5897 data_mp = ipsec_check_global_policy(data_mp, NULL, outer_ipv4, 5898 outer_ipv6, ira, ns); 5899 if (data_mp == NULL) 5900 return (NULL); 5901 5902 if (is_icmp) { 5903 /* Set things back to normal. */ 5904 if (outer_ipv4 != NULL) { 5905 tmp4 = outer_ipv4->ipha_src; 5906 outer_ipv4->ipha_src = outer_ipv4->ipha_dst; 5907 outer_ipv4->ipha_dst = tmp4; 5908 } else { 5909 /* No need for ASSERT()s now. */ 5910 tmpaddr = outer_ipv6->ip6_src; 5911 outer_ipv6->ip6_src = outer_ipv6->ip6_dst; 5912 outer_ipv6->ip6_dst = tmpaddr; 5913 } 5914 } 5915 5916 /* 5917 * At this point, we pretend it's a cleartext accepted 5918 * packet. 5919 */ 5920 return (data_mp); 5921 } 5922 5923 /* 5924 * AVL comparison routine for our list of tunnel polheads. 5925 */ 5926 static int 5927 tunnel_compare(const void *arg1, const void *arg2) 5928 { 5929 ipsec_tun_pol_t *left, *right; 5930 int rc; 5931 5932 left = (ipsec_tun_pol_t *)arg1; 5933 right = (ipsec_tun_pol_t *)arg2; 5934 5935 rc = strncmp(left->itp_name, right->itp_name, LIFNAMSIZ); 5936 return (rc == 0 ? rc : (rc > 0 ? 1 : -1)); 5937 } 5938 5939 /* 5940 * Free a tunnel policy node. 5941 */ 5942 void 5943 itp_free(ipsec_tun_pol_t *node, netstack_t *ns) 5944 { 5945 if (node->itp_policy != NULL) { 5946 IPPH_REFRELE(node->itp_policy, ns); 5947 node->itp_policy = NULL; 5948 } 5949 if (node->itp_inactive != NULL) { 5950 IPPH_REFRELE(node->itp_inactive, ns); 5951 node->itp_inactive = NULL; 5952 } 5953 mutex_destroy(&node->itp_lock); 5954 kmem_free(node, sizeof (*node)); 5955 } 5956 5957 void 5958 itp_unlink(ipsec_tun_pol_t *node, netstack_t *ns) 5959 { 5960 ipsec_stack_t *ipss = ns->netstack_ipsec; 5961 5962 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_WRITER); 5963 ipss->ipsec_tunnel_policy_gen++; 5964 ipsec_fragcache_uninit(&node->itp_fragcache, ipss); 5965 avl_remove(&ipss->ipsec_tunnel_policies, node); 5966 rw_exit(&ipss->ipsec_tunnel_policy_lock); 5967 ITP_REFRELE(node, ns); 5968 } 5969 5970 /* 5971 * Public interface to look up a tunnel security policy by name. Used by 5972 * spdsock mostly. Returns "node" with a bumped refcnt. 5973 */ 5974 ipsec_tun_pol_t * 5975 get_tunnel_policy(char *name, netstack_t *ns) 5976 { 5977 ipsec_tun_pol_t *node, lookup; 5978 ipsec_stack_t *ipss = ns->netstack_ipsec; 5979 5980 (void) strncpy(lookup.itp_name, name, LIFNAMSIZ); 5981 5982 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_READER); 5983 node = (ipsec_tun_pol_t *)avl_find(&ipss->ipsec_tunnel_policies, 5984 &lookup, NULL); 5985 if (node != NULL) { 5986 ITP_REFHOLD(node); 5987 } 5988 rw_exit(&ipss->ipsec_tunnel_policy_lock); 5989 5990 return (node); 5991 } 5992 5993 /* 5994 * Public interface to walk all tunnel security polcies. Useful for spdsock 5995 * DUMP operations. iterator() will not consume a reference. 5996 */ 5997 void 5998 itp_walk(void (*iterator)(ipsec_tun_pol_t *, void *, netstack_t *), 5999 void *arg, netstack_t *ns) 6000 { 6001 ipsec_tun_pol_t *node; 6002 ipsec_stack_t *ipss = ns->netstack_ipsec; 6003 6004 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_READER); 6005 for (node = avl_first(&ipss->ipsec_tunnel_policies); node != NULL; 6006 node = AVL_NEXT(&ipss->ipsec_tunnel_policies, node)) { 6007 iterator(node, arg, ns); 6008 } 6009 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6010 } 6011 6012 /* 6013 * Initialize policy head. This can only fail if there's a memory problem. 6014 */ 6015 static boolean_t 6016 tunnel_polhead_init(ipsec_policy_head_t *iph, netstack_t *ns) 6017 { 6018 ipsec_stack_t *ipss = ns->netstack_ipsec; 6019 6020 rw_init(&iph->iph_lock, NULL, RW_DEFAULT, NULL); 6021 iph->iph_refs = 1; 6022 iph->iph_gen = 0; 6023 if (ipsec_alloc_table(iph, ipss->ipsec_tun_spd_hashsize, 6024 KM_SLEEP, B_FALSE, ns) != 0) { 6025 ipsec_polhead_free_table(iph); 6026 return (B_FALSE); 6027 } 6028 ipsec_polhead_init(iph, ipss->ipsec_tun_spd_hashsize); 6029 return (B_TRUE); 6030 } 6031 6032 /* 6033 * Create a tunnel policy node with "name". Set errno with 6034 * ENOMEM if there's a memory problem, and EEXIST if there's an existing 6035 * node. 6036 */ 6037 ipsec_tun_pol_t * 6038 create_tunnel_policy(char *name, int *errno, uint64_t *gen, netstack_t *ns) 6039 { 6040 ipsec_tun_pol_t *newbie, *existing; 6041 avl_index_t where; 6042 ipsec_stack_t *ipss = ns->netstack_ipsec; 6043 6044 newbie = kmem_zalloc(sizeof (*newbie), KM_NOSLEEP); 6045 if (newbie == NULL) { 6046 *errno = ENOMEM; 6047 return (NULL); 6048 } 6049 if (!ipsec_fragcache_init(&newbie->itp_fragcache)) { 6050 kmem_free(newbie, sizeof (*newbie)); 6051 *errno = ENOMEM; 6052 return (NULL); 6053 } 6054 6055 (void) strncpy(newbie->itp_name, name, LIFNAMSIZ); 6056 6057 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_WRITER); 6058 existing = (ipsec_tun_pol_t *)avl_find(&ipss->ipsec_tunnel_policies, 6059 newbie, &where); 6060 if (existing != NULL) { 6061 itp_free(newbie, ns); 6062 *errno = EEXIST; 6063 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6064 return (NULL); 6065 } 6066 ipss->ipsec_tunnel_policy_gen++; 6067 *gen = ipss->ipsec_tunnel_policy_gen; 6068 newbie->itp_refcnt = 2; /* One for the caller, one for the tree. */ 6069 newbie->itp_next_policy_index = 1; 6070 avl_insert(&ipss->ipsec_tunnel_policies, newbie, where); 6071 mutex_init(&newbie->itp_lock, NULL, MUTEX_DEFAULT, NULL); 6072 newbie->itp_policy = kmem_zalloc(sizeof (ipsec_policy_head_t), 6073 KM_NOSLEEP); 6074 if (newbie->itp_policy == NULL) 6075 goto nomem; 6076 newbie->itp_inactive = kmem_zalloc(sizeof (ipsec_policy_head_t), 6077 KM_NOSLEEP); 6078 if (newbie->itp_inactive == NULL) { 6079 kmem_free(newbie->itp_policy, sizeof (ipsec_policy_head_t)); 6080 goto nomem; 6081 } 6082 6083 if (!tunnel_polhead_init(newbie->itp_policy, ns)) { 6084 kmem_free(newbie->itp_policy, sizeof (ipsec_policy_head_t)); 6085 kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t)); 6086 goto nomem; 6087 } else if (!tunnel_polhead_init(newbie->itp_inactive, ns)) { 6088 IPPH_REFRELE(newbie->itp_policy, ns); 6089 kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t)); 6090 goto nomem; 6091 } 6092 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6093 6094 return (newbie); 6095 nomem: 6096 *errno = ENOMEM; 6097 kmem_free(newbie, sizeof (*newbie)); 6098 return (NULL); 6099 } 6100 6101 /* 6102 * Given two addresses, find a tunnel instance's IPsec policy heads. 6103 * Returns NULL on failure. 6104 */ 6105 ipsec_tun_pol_t * 6106 itp_get_byaddr(uint32_t *laddr, uint32_t *faddr, int af, ip_stack_t *ipst) 6107 { 6108 conn_t *connp; 6109 iptun_t *iptun; 6110 ipsec_tun_pol_t *itp = NULL; 6111 6112 /* Classifiers are used to "src" being foreign. */ 6113 if (af == AF_INET) { 6114 connp = ipcl_iptun_classify_v4((ipaddr_t *)faddr, 6115 (ipaddr_t *)laddr, ipst); 6116 } else { 6117 ASSERT(af == AF_INET6); 6118 ASSERT(!IN6_IS_ADDR_V4MAPPED((in6_addr_t *)laddr)); 6119 ASSERT(!IN6_IS_ADDR_V4MAPPED((in6_addr_t *)faddr)); 6120 connp = ipcl_iptun_classify_v6((in6_addr_t *)faddr, 6121 (in6_addr_t *)laddr, ipst); 6122 } 6123 6124 if (connp == NULL) 6125 return (NULL); 6126 6127 if (IPCL_IS_IPTUN(connp)) { 6128 iptun = connp->conn_iptun; 6129 if (iptun != NULL) { 6130 itp = iptun->iptun_itp; 6131 if (itp != NULL) { 6132 /* Braces due to the macro's nature... */ 6133 ITP_REFHOLD(itp); 6134 } 6135 } /* Else itp is already NULL. */ 6136 } 6137 6138 CONN_DEC_REF(connp); 6139 return (itp); 6140 } 6141 6142 /* 6143 * Frag cache code, based on SunScreen 3.2 source 6144 * screen/kernel/common/screen_fragcache.c 6145 */ 6146 6147 #define IPSEC_FRAG_TTL_MAX 5 6148 /* 6149 * Note that the following parameters create 256 hash buckets 6150 * with 1024 free entries to be distributed. Things are cleaned 6151 * periodically and are attempted to be cleaned when there is no 6152 * free space, but this system errs on the side of dropping packets 6153 * over creating memory exhaustion. We may decide to make hash 6154 * factor a tunable if this proves to be a bad decision. 6155 */ 6156 #define IPSEC_FRAG_HASH_SLOTS (1<<8) 6157 #define IPSEC_FRAG_HASH_FACTOR 4 6158 #define IPSEC_FRAG_HASH_SIZE (IPSEC_FRAG_HASH_SLOTS * IPSEC_FRAG_HASH_FACTOR) 6159 6160 #define IPSEC_FRAG_HASH_MASK (IPSEC_FRAG_HASH_SLOTS - 1) 6161 #define IPSEC_FRAG_HASH_FUNC(id) (((id) & IPSEC_FRAG_HASH_MASK) ^ \ 6162 (((id) / \ 6163 (ushort_t)IPSEC_FRAG_HASH_SLOTS) & \ 6164 IPSEC_FRAG_HASH_MASK)) 6165 6166 /* Maximum fragments per packet. 48 bytes payload x 1366 packets > 64KB */ 6167 #define IPSEC_MAX_FRAGS 1366 6168 6169 #define V4_FRAG_OFFSET(ipha) ((ntohs(ipha->ipha_fragment_offset_and_flags) & \ 6170 IPH_OFFSET) << 3) 6171 #define V4_MORE_FRAGS(ipha) (ntohs(ipha->ipha_fragment_offset_and_flags) & \ 6172 IPH_MF) 6173 6174 /* 6175 * Initialize an ipsec fragcache instance. 6176 * Returns B_FALSE if memory allocation fails. 6177 */ 6178 boolean_t 6179 ipsec_fragcache_init(ipsec_fragcache_t *frag) 6180 { 6181 ipsec_fragcache_entry_t *ftemp; 6182 int i; 6183 6184 mutex_init(&frag->itpf_lock, NULL, MUTEX_DEFAULT, NULL); 6185 frag->itpf_ptr = (ipsec_fragcache_entry_t **) 6186 kmem_zalloc(sizeof (ipsec_fragcache_entry_t *) * 6187 IPSEC_FRAG_HASH_SLOTS, KM_NOSLEEP); 6188 if (frag->itpf_ptr == NULL) 6189 return (B_FALSE); 6190 6191 ftemp = (ipsec_fragcache_entry_t *) 6192 kmem_zalloc(sizeof (ipsec_fragcache_entry_t) * 6193 IPSEC_FRAG_HASH_SIZE, KM_NOSLEEP); 6194 if (ftemp == NULL) { 6195 kmem_free(frag->itpf_ptr, sizeof (ipsec_fragcache_entry_t *) * 6196 IPSEC_FRAG_HASH_SLOTS); 6197 return (B_FALSE); 6198 } 6199 6200 frag->itpf_freelist = NULL; 6201 6202 for (i = 0; i < IPSEC_FRAG_HASH_SIZE; i++) { 6203 ftemp->itpfe_next = frag->itpf_freelist; 6204 frag->itpf_freelist = ftemp; 6205 ftemp++; 6206 } 6207 6208 frag->itpf_expire_hint = 0; 6209 6210 return (B_TRUE); 6211 } 6212 6213 void 6214 ipsec_fragcache_uninit(ipsec_fragcache_t *frag, ipsec_stack_t *ipss) 6215 { 6216 ipsec_fragcache_entry_t *fep; 6217 int i; 6218 6219 mutex_enter(&frag->itpf_lock); 6220 if (frag->itpf_ptr) { 6221 /* Delete any existing fragcache entry chains */ 6222 for (i = 0; i < IPSEC_FRAG_HASH_SLOTS; i++) { 6223 fep = (frag->itpf_ptr)[i]; 6224 while (fep != NULL) { 6225 /* Returned fep is next in chain or NULL */ 6226 fep = fragcache_delentry(i, fep, frag, ipss); 6227 } 6228 } 6229 /* 6230 * Chase the pointers back to the beginning 6231 * of the memory allocation and then 6232 * get rid of the allocated freelist 6233 */ 6234 while (frag->itpf_freelist->itpfe_next != NULL) 6235 frag->itpf_freelist = frag->itpf_freelist->itpfe_next; 6236 /* 6237 * XXX - If we ever dynamically grow the freelist 6238 * then we'll have to free entries individually 6239 * or determine how many entries or chunks we have 6240 * grown since the initial allocation. 6241 */ 6242 kmem_free(frag->itpf_freelist, 6243 sizeof (ipsec_fragcache_entry_t) * 6244 IPSEC_FRAG_HASH_SIZE); 6245 /* Free the fragcache structure */ 6246 kmem_free(frag->itpf_ptr, 6247 sizeof (ipsec_fragcache_entry_t *) * 6248 IPSEC_FRAG_HASH_SLOTS); 6249 } 6250 mutex_exit(&frag->itpf_lock); 6251 mutex_destroy(&frag->itpf_lock); 6252 } 6253 6254 /* 6255 * Add a fragment to the fragment cache. Consumes mp if NULL is returned. 6256 * Returns mp if a whole fragment has been assembled, NULL otherwise 6257 * The returned mp could be a b_next chain of fragments. 6258 * 6259 * The iramp argument is set on inbound; NULL if outbound. 6260 */ 6261 mblk_t * 6262 ipsec_fragcache_add(ipsec_fragcache_t *frag, mblk_t *iramp, mblk_t *mp, 6263 int outer_hdr_len, ipsec_stack_t *ipss) 6264 { 6265 boolean_t is_v4; 6266 time_t itpf_time; 6267 ipha_t *iph; 6268 ipha_t *oiph; 6269 ip6_t *ip6h = NULL; 6270 uint8_t v6_proto; 6271 uint8_t *v6_proto_p; 6272 uint16_t ip6_hdr_length; 6273 ip_pkt_t ipp; 6274 ip6_frag_t *fraghdr; 6275 ipsec_fragcache_entry_t *fep; 6276 int i; 6277 mblk_t *nmp, *prevmp; 6278 int firstbyte, lastbyte; 6279 int offset; 6280 int last; 6281 boolean_t inbound = (iramp != NULL); 6282 6283 #ifdef FRAGCACHE_DEBUG 6284 cmn_err(CE_WARN, "Fragcache: %s\n", inbound ? "INBOUND" : "OUTBOUND"); 6285 #endif 6286 v6_proto = 0; 6287 fraghdr = NULL; 6288 6289 /* 6290 * You're on the slow path, so insure that every packet in the 6291 * cache is a single-mblk one. 6292 */ 6293 if (mp->b_cont != NULL) { 6294 nmp = msgpullup(mp, -1); 6295 if (nmp == NULL) { 6296 ip_drop_packet(mp, inbound, NULL, 6297 DROPPER(ipss, ipds_spd_nomem), 6298 &ipss->ipsec_spd_dropper); 6299 if (inbound) 6300 (void) ip_recv_attr_free_mblk(iramp); 6301 return (NULL); 6302 } 6303 freemsg(mp); 6304 mp = nmp; 6305 } 6306 6307 mutex_enter(&frag->itpf_lock); 6308 6309 oiph = (ipha_t *)mp->b_rptr; 6310 iph = (ipha_t *)(mp->b_rptr + outer_hdr_len); 6311 6312 if (IPH_HDR_VERSION(iph) == IPV4_VERSION) { 6313 is_v4 = B_TRUE; 6314 } else { 6315 ASSERT(IPH_HDR_VERSION(iph) == IPV6_VERSION); 6316 ip6h = (ip6_t *)(mp->b_rptr + outer_hdr_len); 6317 6318 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &ip6_hdr_length, 6319 &v6_proto_p)) { 6320 /* 6321 * Find upper layer protocol. 6322 * If it fails we have a malformed packet 6323 */ 6324 mutex_exit(&frag->itpf_lock); 6325 ip_drop_packet(mp, inbound, NULL, 6326 DROPPER(ipss, ipds_spd_malformed_packet), 6327 &ipss->ipsec_spd_dropper); 6328 if (inbound) 6329 (void) ip_recv_attr_free_mblk(iramp); 6330 return (NULL); 6331 } else { 6332 v6_proto = *v6_proto_p; 6333 } 6334 6335 6336 bzero(&ipp, sizeof (ipp)); 6337 (void) ip_find_hdr_v6(mp, ip6h, B_FALSE, &ipp, NULL); 6338 if (!(ipp.ipp_fields & IPPF_FRAGHDR)) { 6339 /* 6340 * We think this is a fragment, but didn't find 6341 * a fragment header. Something is wrong. 6342 */ 6343 mutex_exit(&frag->itpf_lock); 6344 ip_drop_packet(mp, inbound, NULL, 6345 DROPPER(ipss, ipds_spd_malformed_frag), 6346 &ipss->ipsec_spd_dropper); 6347 if (inbound) 6348 (void) ip_recv_attr_free_mblk(iramp); 6349 return (NULL); 6350 } 6351 fraghdr = ipp.ipp_fraghdr; 6352 is_v4 = B_FALSE; 6353 } 6354 6355 /* Anything to cleanup? */ 6356 6357 /* 6358 * This cleanup call could be put in a timer loop 6359 * but it may actually be just as reasonable a decision to 6360 * leave it here. The disadvantage is this only gets called when 6361 * frags are added. The advantage is that it is not 6362 * susceptible to race conditions like a time-based cleanup 6363 * may be. 6364 */ 6365 itpf_time = gethrestime_sec(); 6366 if (itpf_time >= frag->itpf_expire_hint) 6367 ipsec_fragcache_clean(frag, ipss); 6368 6369 /* Lookup to see if there is an existing entry */ 6370 6371 if (is_v4) 6372 i = IPSEC_FRAG_HASH_FUNC(iph->ipha_ident); 6373 else 6374 i = IPSEC_FRAG_HASH_FUNC(fraghdr->ip6f_ident); 6375 6376 for (fep = (frag->itpf_ptr)[i]; fep; fep = fep->itpfe_next) { 6377 if (is_v4) { 6378 ASSERT(iph != NULL); 6379 if ((fep->itpfe_id == iph->ipha_ident) && 6380 (fep->itpfe_src == iph->ipha_src) && 6381 (fep->itpfe_dst == iph->ipha_dst) && 6382 (fep->itpfe_proto == iph->ipha_protocol)) 6383 break; 6384 } else { 6385 ASSERT(fraghdr != NULL); 6386 ASSERT(fep != NULL); 6387 if ((fep->itpfe_id == fraghdr->ip6f_ident) && 6388 IN6_ARE_ADDR_EQUAL(&fep->itpfe_src6, 6389 &ip6h->ip6_src) && 6390 IN6_ARE_ADDR_EQUAL(&fep->itpfe_dst6, 6391 &ip6h->ip6_dst) && (fep->itpfe_proto == v6_proto)) 6392 break; 6393 } 6394 } 6395 6396 if (is_v4) { 6397 firstbyte = V4_FRAG_OFFSET(iph); 6398 lastbyte = firstbyte + ntohs(iph->ipha_length) - 6399 IPH_HDR_LENGTH(iph); 6400 last = (V4_MORE_FRAGS(iph) == 0); 6401 #ifdef FRAGCACHE_DEBUG 6402 cmn_err(CE_WARN, "V4 fragcache: firstbyte = %d, lastbyte = %d, " 6403 "is_last_frag = %d, id = %d, mp = %p\n", firstbyte, 6404 lastbyte, last, iph->ipha_ident, mp); 6405 #endif 6406 } else { 6407 firstbyte = ntohs(fraghdr->ip6f_offlg & IP6F_OFF_MASK); 6408 lastbyte = firstbyte + ntohs(ip6h->ip6_plen) + 6409 sizeof (ip6_t) - ip6_hdr_length; 6410 last = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG) == 0; 6411 #ifdef FRAGCACHE_DEBUG 6412 cmn_err(CE_WARN, "V6 fragcache: firstbyte = %d, lastbyte = %d, " 6413 "is_last_frag = %d, id = %d, fraghdr = %p, mp = %p\n", 6414 firstbyte, lastbyte, last, fraghdr->ip6f_ident, fraghdr, 6415 mp); 6416 #endif 6417 } 6418 6419 /* check for bogus fragments and delete the entry */ 6420 if (firstbyte > 0 && firstbyte <= 8) { 6421 if (fep != NULL) 6422 (void) fragcache_delentry(i, fep, frag, ipss); 6423 mutex_exit(&frag->itpf_lock); 6424 ip_drop_packet(mp, inbound, NULL, 6425 DROPPER(ipss, ipds_spd_malformed_frag), 6426 &ipss->ipsec_spd_dropper); 6427 if (inbound) 6428 (void) ip_recv_attr_free_mblk(iramp); 6429 return (NULL); 6430 } 6431 6432 /* Not found, allocate a new entry */ 6433 if (fep == NULL) { 6434 if (frag->itpf_freelist == NULL) { 6435 /* see if there is some space */ 6436 ipsec_fragcache_clean(frag, ipss); 6437 if (frag->itpf_freelist == NULL) { 6438 mutex_exit(&frag->itpf_lock); 6439 ip_drop_packet(mp, inbound, NULL, 6440 DROPPER(ipss, ipds_spd_nomem), 6441 &ipss->ipsec_spd_dropper); 6442 if (inbound) 6443 (void) ip_recv_attr_free_mblk(iramp); 6444 return (NULL); 6445 } 6446 } 6447 6448 fep = frag->itpf_freelist; 6449 frag->itpf_freelist = fep->itpfe_next; 6450 6451 if (is_v4) { 6452 bcopy((caddr_t)&iph->ipha_src, (caddr_t)&fep->itpfe_src, 6453 sizeof (struct in_addr)); 6454 bcopy((caddr_t)&iph->ipha_dst, (caddr_t)&fep->itpfe_dst, 6455 sizeof (struct in_addr)); 6456 fep->itpfe_id = iph->ipha_ident; 6457 fep->itpfe_proto = iph->ipha_protocol; 6458 i = IPSEC_FRAG_HASH_FUNC(fep->itpfe_id); 6459 } else { 6460 bcopy((in6_addr_t *)&ip6h->ip6_src, 6461 (in6_addr_t *)&fep->itpfe_src6, 6462 sizeof (struct in6_addr)); 6463 bcopy((in6_addr_t *)&ip6h->ip6_dst, 6464 (in6_addr_t *)&fep->itpfe_dst6, 6465 sizeof (struct in6_addr)); 6466 fep->itpfe_id = fraghdr->ip6f_ident; 6467 fep->itpfe_proto = v6_proto; 6468 i = IPSEC_FRAG_HASH_FUNC(fep->itpfe_id); 6469 } 6470 itpf_time = gethrestime_sec(); 6471 fep->itpfe_exp = itpf_time + IPSEC_FRAG_TTL_MAX + 1; 6472 fep->itpfe_last = 0; 6473 fep->itpfe_fraglist = NULL; 6474 fep->itpfe_depth = 0; 6475 fep->itpfe_next = (frag->itpf_ptr)[i]; 6476 (frag->itpf_ptr)[i] = fep; 6477 6478 if (frag->itpf_expire_hint > fep->itpfe_exp) 6479 frag->itpf_expire_hint = fep->itpfe_exp; 6480 6481 } 6482 6483 /* Insert it in the frag list */ 6484 /* List is in order by starting offset of fragments */ 6485 6486 prevmp = NULL; 6487 for (nmp = fep->itpfe_fraglist; nmp; nmp = nmp->b_next) { 6488 ipha_t *niph; 6489 ipha_t *oniph; 6490 ip6_t *nip6h; 6491 ip_pkt_t nipp; 6492 ip6_frag_t *nfraghdr; 6493 uint16_t nip6_hdr_length; 6494 uint8_t *nv6_proto_p; 6495 int nfirstbyte, nlastbyte; 6496 char *data, *ndata; 6497 mblk_t *ndata_mp = (inbound ? nmp->b_cont : nmp); 6498 int hdr_len; 6499 6500 oniph = (ipha_t *)mp->b_rptr; 6501 nip6h = NULL; 6502 niph = NULL; 6503 6504 /* 6505 * Determine outer header type and length and set 6506 * pointers appropriately 6507 */ 6508 6509 if (IPH_HDR_VERSION(oniph) == IPV4_VERSION) { 6510 hdr_len = ((outer_hdr_len != 0) ? 6511 IPH_HDR_LENGTH(oiph) : 0); 6512 niph = (ipha_t *)(ndata_mp->b_rptr + hdr_len); 6513 } else { 6514 ASSERT(IPH_HDR_VERSION(oniph) == IPV6_VERSION); 6515 ASSERT(ndata_mp->b_cont == NULL); 6516 nip6h = (ip6_t *)ndata_mp->b_rptr; 6517 (void) ip_hdr_length_nexthdr_v6(ndata_mp, nip6h, 6518 &nip6_hdr_length, &v6_proto_p); 6519 hdr_len = ((outer_hdr_len != 0) ? nip6_hdr_length : 0); 6520 } 6521 6522 /* 6523 * Determine inner header type and length and set 6524 * pointers appropriately 6525 */ 6526 6527 if (is_v4) { 6528 if (niph == NULL) { 6529 /* Was v6 outer */ 6530 niph = (ipha_t *)(ndata_mp->b_rptr + hdr_len); 6531 } 6532 nfirstbyte = V4_FRAG_OFFSET(niph); 6533 nlastbyte = nfirstbyte + ntohs(niph->ipha_length) - 6534 IPH_HDR_LENGTH(niph); 6535 } else { 6536 ASSERT(ndata_mp->b_cont == NULL); 6537 nip6h = (ip6_t *)(ndata_mp->b_rptr + hdr_len); 6538 if (!ip_hdr_length_nexthdr_v6(ndata_mp, nip6h, 6539 &nip6_hdr_length, &nv6_proto_p)) { 6540 mutex_exit(&frag->itpf_lock); 6541 ip_drop_packet_chain(nmp, inbound, NULL, 6542 DROPPER(ipss, ipds_spd_malformed_frag), 6543 &ipss->ipsec_spd_dropper); 6544 ipsec_freemsg_chain(ndata_mp); 6545 if (inbound) 6546 (void) ip_recv_attr_free_mblk(iramp); 6547 return (NULL); 6548 } 6549 bzero(&nipp, sizeof (nipp)); 6550 (void) ip_find_hdr_v6(ndata_mp, nip6h, B_FALSE, &nipp, 6551 NULL); 6552 nfraghdr = nipp.ipp_fraghdr; 6553 nfirstbyte = ntohs(nfraghdr->ip6f_offlg & 6554 IP6F_OFF_MASK); 6555 nlastbyte = nfirstbyte + ntohs(nip6h->ip6_plen) + 6556 sizeof (ip6_t) - nip6_hdr_length; 6557 } 6558 6559 /* Check for overlapping fragments */ 6560 if (firstbyte >= nfirstbyte && firstbyte < nlastbyte) { 6561 /* 6562 * Overlap Check: 6563 * ~~~~--------- # Check if the newly 6564 * ~ ndata_mp| # received fragment 6565 * ~~~~--------- # overlaps with the 6566 * ---------~~~~~~ # current fragment. 6567 * | mp ~ 6568 * ---------~~~~~~ 6569 */ 6570 if (is_v4) { 6571 data = (char *)iph + IPH_HDR_LENGTH(iph) + 6572 firstbyte - nfirstbyte; 6573 ndata = (char *)niph + IPH_HDR_LENGTH(niph); 6574 } else { 6575 data = (char *)ip6h + 6576 nip6_hdr_length + firstbyte - 6577 nfirstbyte; 6578 ndata = (char *)nip6h + nip6_hdr_length; 6579 } 6580 if (bcmp(data, ndata, MIN(lastbyte, nlastbyte) - 6581 firstbyte)) { 6582 /* Overlapping data does not match */ 6583 (void) fragcache_delentry(i, fep, frag, ipss); 6584 mutex_exit(&frag->itpf_lock); 6585 ip_drop_packet(mp, inbound, NULL, 6586 DROPPER(ipss, ipds_spd_overlap_frag), 6587 &ipss->ipsec_spd_dropper); 6588 if (inbound) 6589 (void) ip_recv_attr_free_mblk(iramp); 6590 return (NULL); 6591 } 6592 /* Part of defense for jolt2.c fragmentation attack */ 6593 if (firstbyte >= nfirstbyte && lastbyte <= nlastbyte) { 6594 /* 6595 * Check for identical or subset fragments: 6596 * ---------- ~~~~--------~~~~~ 6597 * | nmp | or ~ nmp ~ 6598 * ---------- ~~~~--------~~~~~ 6599 * ---------- ------ 6600 * | mp | | mp | 6601 * ---------- ------ 6602 */ 6603 mutex_exit(&frag->itpf_lock); 6604 ip_drop_packet(mp, inbound, NULL, 6605 DROPPER(ipss, ipds_spd_evil_frag), 6606 &ipss->ipsec_spd_dropper); 6607 if (inbound) 6608 (void) ip_recv_attr_free_mblk(iramp); 6609 return (NULL); 6610 } 6611 6612 } 6613 6614 /* Correct location for this fragment? */ 6615 if (firstbyte <= nfirstbyte) { 6616 /* 6617 * Check if the tail end of the new fragment overlaps 6618 * with the head of the current fragment. 6619 * --------~~~~~~~ 6620 * | nmp ~ 6621 * --------~~~~~~~ 6622 * ~~~~~-------- 6623 * ~ mp | 6624 * ~~~~~-------- 6625 */ 6626 if (lastbyte > nfirstbyte) { 6627 /* Fragments overlap */ 6628 data = (char *)iph + IPH_HDR_LENGTH(iph) + 6629 firstbyte - nfirstbyte; 6630 ndata = (char *)niph + IPH_HDR_LENGTH(niph); 6631 if (is_v4) { 6632 data = (char *)iph + 6633 IPH_HDR_LENGTH(iph) + firstbyte - 6634 nfirstbyte; 6635 ndata = (char *)niph + 6636 IPH_HDR_LENGTH(niph); 6637 } else { 6638 data = (char *)ip6h + 6639 nip6_hdr_length + firstbyte - 6640 nfirstbyte; 6641 ndata = (char *)nip6h + nip6_hdr_length; 6642 } 6643 if (bcmp(data, ndata, MIN(lastbyte, nlastbyte) 6644 - nfirstbyte)) { 6645 /* Overlap mismatch */ 6646 (void) fragcache_delentry(i, fep, frag, 6647 ipss); 6648 mutex_exit(&frag->itpf_lock); 6649 ip_drop_packet(mp, inbound, NULL, 6650 DROPPER(ipss, 6651 ipds_spd_overlap_frag), 6652 &ipss->ipsec_spd_dropper); 6653 if (inbound) { 6654 (void) ip_recv_attr_free_mblk( 6655 iramp); 6656 } 6657 return (NULL); 6658 } 6659 } 6660 6661 /* 6662 * Fragment does not illegally overlap and can now 6663 * be inserted into the chain 6664 */ 6665 break; 6666 } 6667 6668 prevmp = nmp; 6669 } 6670 /* Prepend the attributes before we link it in */ 6671 if (iramp != NULL) { 6672 ASSERT(iramp->b_cont == NULL); 6673 iramp->b_cont = mp; 6674 mp = iramp; 6675 iramp = NULL; 6676 } 6677 mp->b_next = nmp; 6678 6679 if (prevmp == NULL) { 6680 fep->itpfe_fraglist = mp; 6681 } else { 6682 prevmp->b_next = mp; 6683 } 6684 if (last) 6685 fep->itpfe_last = 1; 6686 6687 /* Part of defense for jolt2.c fragmentation attack */ 6688 if (++(fep->itpfe_depth) > IPSEC_MAX_FRAGS) { 6689 (void) fragcache_delentry(i, fep, frag, ipss); 6690 mutex_exit(&frag->itpf_lock); 6691 if (inbound) 6692 mp = ip_recv_attr_free_mblk(mp); 6693 6694 ip_drop_packet(mp, inbound, NULL, 6695 DROPPER(ipss, ipds_spd_max_frags), 6696 &ipss->ipsec_spd_dropper); 6697 return (NULL); 6698 } 6699 6700 /* Check for complete packet */ 6701 6702 if (!fep->itpfe_last) { 6703 mutex_exit(&frag->itpf_lock); 6704 #ifdef FRAGCACHE_DEBUG 6705 cmn_err(CE_WARN, "Fragment cached, last not yet seen.\n"); 6706 #endif 6707 return (NULL); 6708 } 6709 6710 offset = 0; 6711 for (mp = fep->itpfe_fraglist; mp; mp = mp->b_next) { 6712 mblk_t *data_mp = (inbound ? mp->b_cont : mp); 6713 int hdr_len; 6714 6715 oiph = (ipha_t *)data_mp->b_rptr; 6716 ip6h = NULL; 6717 iph = NULL; 6718 6719 if (IPH_HDR_VERSION(oiph) == IPV4_VERSION) { 6720 hdr_len = ((outer_hdr_len != 0) ? 6721 IPH_HDR_LENGTH(oiph) : 0); 6722 iph = (ipha_t *)(data_mp->b_rptr + hdr_len); 6723 } else { 6724 ASSERT(IPH_HDR_VERSION(oiph) == IPV6_VERSION); 6725 ASSERT(data_mp->b_cont == NULL); 6726 ip6h = (ip6_t *)data_mp->b_rptr; 6727 (void) ip_hdr_length_nexthdr_v6(data_mp, ip6h, 6728 &ip6_hdr_length, &v6_proto_p); 6729 hdr_len = ((outer_hdr_len != 0) ? ip6_hdr_length : 0); 6730 } 6731 6732 /* Calculate current fragment start/end */ 6733 if (is_v4) { 6734 if (iph == NULL) { 6735 /* Was v6 outer */ 6736 iph = (ipha_t *)(data_mp->b_rptr + hdr_len); 6737 } 6738 firstbyte = V4_FRAG_OFFSET(iph); 6739 lastbyte = firstbyte + ntohs(iph->ipha_length) - 6740 IPH_HDR_LENGTH(iph); 6741 } else { 6742 ASSERT(data_mp->b_cont == NULL); 6743 ip6h = (ip6_t *)(data_mp->b_rptr + hdr_len); 6744 if (!ip_hdr_length_nexthdr_v6(data_mp, ip6h, 6745 &ip6_hdr_length, &v6_proto_p)) { 6746 mutex_exit(&frag->itpf_lock); 6747 ip_drop_packet_chain(mp, inbound, NULL, 6748 DROPPER(ipss, ipds_spd_malformed_frag), 6749 &ipss->ipsec_spd_dropper); 6750 return (NULL); 6751 } 6752 v6_proto = *v6_proto_p; 6753 bzero(&ipp, sizeof (ipp)); 6754 (void) ip_find_hdr_v6(data_mp, ip6h, B_FALSE, &ipp, 6755 NULL); 6756 fraghdr = ipp.ipp_fraghdr; 6757 firstbyte = ntohs(fraghdr->ip6f_offlg & 6758 IP6F_OFF_MASK); 6759 lastbyte = firstbyte + ntohs(ip6h->ip6_plen) + 6760 sizeof (ip6_t) - ip6_hdr_length; 6761 } 6762 6763 /* 6764 * If this fragment is greater than current offset, 6765 * we have a missing fragment so return NULL 6766 */ 6767 if (firstbyte > offset) { 6768 mutex_exit(&frag->itpf_lock); 6769 #ifdef FRAGCACHE_DEBUG 6770 /* 6771 * Note, this can happen when the last frag 6772 * gets sent through because it is smaller 6773 * than the MTU. It is not necessarily an 6774 * error condition. 6775 */ 6776 cmn_err(CE_WARN, "Frag greater than offset! : " 6777 "missing fragment: firstbyte = %d, offset = %d, " 6778 "mp = %p\n", firstbyte, offset, mp); 6779 #endif 6780 return (NULL); 6781 } 6782 #ifdef FRAGCACHE_DEBUG 6783 cmn_err(CE_WARN, "Frag offsets : " 6784 "firstbyte = %d, offset = %d, mp = %p\n", 6785 firstbyte, offset, mp); 6786 #endif 6787 6788 /* 6789 * If we are at the last fragment, we have the complete 6790 * packet, so rechain things and return it to caller 6791 * for processing 6792 */ 6793 6794 if ((is_v4 && !V4_MORE_FRAGS(iph)) || 6795 (!is_v4 && !(fraghdr->ip6f_offlg & IP6F_MORE_FRAG))) { 6796 mp = fep->itpfe_fraglist; 6797 fep->itpfe_fraglist = NULL; 6798 (void) fragcache_delentry(i, fep, frag, ipss); 6799 mutex_exit(&frag->itpf_lock); 6800 6801 if ((is_v4 && (firstbyte + ntohs(iph->ipha_length) > 6802 65535)) || (!is_v4 && (firstbyte + 6803 ntohs(ip6h->ip6_plen) > 65535))) { 6804 /* It is an invalid "ping-o-death" packet */ 6805 /* Discard it */ 6806 ip_drop_packet_chain(mp, inbound, NULL, 6807 DROPPER(ipss, ipds_spd_evil_frag), 6808 &ipss->ipsec_spd_dropper); 6809 return (NULL); 6810 } 6811 #ifdef FRAGCACHE_DEBUG 6812 cmn_err(CE_WARN, "Fragcache returning mp = %p, " 6813 "mp->b_next = %p", mp, mp->b_next); 6814 #endif 6815 /* 6816 * For inbound case, mp has attrmp b_next'd chain 6817 * For outbound case, it is just data mp chain 6818 */ 6819 return (mp); 6820 } 6821 6822 /* 6823 * Update new ending offset if this 6824 * fragment extends the packet 6825 */ 6826 if (offset < lastbyte) 6827 offset = lastbyte; 6828 } 6829 6830 mutex_exit(&frag->itpf_lock); 6831 6832 /* Didn't find last fragment, so return NULL */ 6833 return (NULL); 6834 } 6835 6836 static void 6837 ipsec_fragcache_clean(ipsec_fragcache_t *frag, ipsec_stack_t *ipss) 6838 { 6839 ipsec_fragcache_entry_t *fep; 6840 int i; 6841 ipsec_fragcache_entry_t *earlyfep = NULL; 6842 time_t itpf_time; 6843 int earlyexp; 6844 int earlyi = 0; 6845 6846 ASSERT(MUTEX_HELD(&frag->itpf_lock)); 6847 6848 itpf_time = gethrestime_sec(); 6849 earlyexp = itpf_time + 10000; 6850 6851 for (i = 0; i < IPSEC_FRAG_HASH_SLOTS; i++) { 6852 fep = (frag->itpf_ptr)[i]; 6853 while (fep) { 6854 if (fep->itpfe_exp < itpf_time) { 6855 /* found */ 6856 fep = fragcache_delentry(i, fep, frag, ipss); 6857 } else { 6858 if (fep->itpfe_exp < earlyexp) { 6859 earlyfep = fep; 6860 earlyexp = fep->itpfe_exp; 6861 earlyi = i; 6862 } 6863 fep = fep->itpfe_next; 6864 } 6865 } 6866 } 6867 6868 frag->itpf_expire_hint = earlyexp; 6869 6870 /* if (!found) */ 6871 if (frag->itpf_freelist == NULL) 6872 (void) fragcache_delentry(earlyi, earlyfep, frag, ipss); 6873 } 6874 6875 static ipsec_fragcache_entry_t * 6876 fragcache_delentry(int slot, ipsec_fragcache_entry_t *fep, 6877 ipsec_fragcache_t *frag, ipsec_stack_t *ipss) 6878 { 6879 ipsec_fragcache_entry_t *targp; 6880 ipsec_fragcache_entry_t *nextp = fep->itpfe_next; 6881 6882 ASSERT(MUTEX_HELD(&frag->itpf_lock)); 6883 6884 /* Free up any fragment list still in cache entry */ 6885 if (fep->itpfe_fraglist != NULL) { 6886 ip_drop_packet_chain(fep->itpfe_fraglist, 6887 ip_recv_attr_is_mblk(fep->itpfe_fraglist), NULL, 6888 DROPPER(ipss, ipds_spd_expired_frags), 6889 &ipss->ipsec_spd_dropper); 6890 } 6891 fep->itpfe_fraglist = NULL; 6892 6893 targp = (frag->itpf_ptr)[slot]; 6894 ASSERT(targp != 0); 6895 6896 if (targp == fep) { 6897 /* unlink from head of hash chain */ 6898 (frag->itpf_ptr)[slot] = nextp; 6899 /* link into free list */ 6900 fep->itpfe_next = frag->itpf_freelist; 6901 frag->itpf_freelist = fep; 6902 return (nextp); 6903 } 6904 6905 /* maybe should use double linked list to make update faster */ 6906 /* must be past front of chain */ 6907 while (targp) { 6908 if (targp->itpfe_next == fep) { 6909 /* unlink from hash chain */ 6910 targp->itpfe_next = nextp; 6911 /* link into free list */ 6912 fep->itpfe_next = frag->itpf_freelist; 6913 frag->itpf_freelist = fep; 6914 return (nextp); 6915 } 6916 targp = targp->itpfe_next; 6917 ASSERT(targp != 0); 6918 } 6919 /* NOTREACHED */ 6920 return (NULL); 6921 } 6922