1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * Copyright (c) 2012 Nexenta Systems, Inc. All rights reserved. 25 * Copyright (c) 2016 by Delphix. All rights reserved. 26 */ 27 28 /* 29 * IPsec Security Policy Database. 30 * 31 * This module maintains the SPD and provides routines used by ip and ip6 32 * to apply IPsec policy to inbound and outbound datagrams. 33 */ 34 35 #include <sys/types.h> 36 #include <sys/stream.h> 37 #include <sys/stropts.h> 38 #include <sys/sysmacros.h> 39 #include <sys/strsubr.h> 40 #include <sys/strsun.h> 41 #include <sys/strlog.h> 42 #include <sys/strsun.h> 43 #include <sys/cmn_err.h> 44 #include <sys/zone.h> 45 46 #include <sys/systm.h> 47 #include <sys/param.h> 48 #include <sys/kmem.h> 49 #include <sys/ddi.h> 50 51 #include <sys/crypto/api.h> 52 53 #include <inet/common.h> 54 #include <inet/mi.h> 55 56 #include <netinet/ip6.h> 57 #include <netinet/icmp6.h> 58 #include <netinet/udp.h> 59 60 #include <inet/ip.h> 61 #include <inet/ip6.h> 62 63 #include <net/pfkeyv2.h> 64 #include <net/pfpolicy.h> 65 #include <inet/sadb.h> 66 #include <inet/ipsec_impl.h> 67 68 #include <inet/ip_impl.h> /* For IP_MOD_ID */ 69 70 #include <inet/ipsecah.h> 71 #include <inet/ipsecesp.h> 72 #include <inet/ipdrop.h> 73 #include <inet/ipclassifier.h> 74 #include <inet/iptun.h> 75 #include <inet/iptun/iptun_impl.h> 76 77 static void ipsec_update_present_flags(ipsec_stack_t *); 78 static ipsec_act_t *ipsec_act_wildcard_expand(ipsec_act_t *, uint_t *, 79 netstack_t *); 80 static mblk_t *ipsec_check_ipsecin_policy(mblk_t *, ipsec_policy_t *, 81 ipha_t *, ip6_t *, uint64_t, ip_recv_attr_t *, netstack_t *); 82 static void ipsec_action_free_table(ipsec_action_t *); 83 static void ipsec_action_reclaim(void *); 84 static void ipsec_action_reclaim_stack(ipsec_stack_t *); 85 static void ipsid_init(netstack_t *); 86 static void ipsid_fini(netstack_t *); 87 88 /* sel_flags values for ipsec_init_inbound_sel(). */ 89 #define SEL_NONE 0x0000 90 #define SEL_PORT_POLICY 0x0001 91 #define SEL_IS_ICMP 0x0002 92 #define SEL_TUNNEL_MODE 0x0004 93 #define SEL_POST_FRAG 0x0008 94 95 /* Return values for ipsec_init_inbound_sel(). */ 96 typedef enum { SELRET_NOMEM, SELRET_BADPKT, SELRET_SUCCESS, SELRET_TUNFRAG} 97 selret_t; 98 99 static selret_t ipsec_init_inbound_sel(ipsec_selector_t *, mblk_t *, 100 ipha_t *, ip6_t *, uint8_t); 101 102 static boolean_t ipsec_check_ipsecin_action(ip_recv_attr_t *, mblk_t *, 103 struct ipsec_action_s *, ipha_t *ipha, ip6_t *ip6h, const char **, 104 kstat_named_t **, netstack_t *); 105 static void ipsec_unregister_prov_update(void); 106 static void ipsec_prov_update_callback_stack(uint32_t, void *, netstack_t *); 107 static boolean_t ipsec_compare_action(ipsec_policy_t *, ipsec_policy_t *); 108 static uint32_t selector_hash(ipsec_selector_t *, ipsec_policy_root_t *); 109 static boolean_t ipsec_kstat_init(ipsec_stack_t *); 110 static void ipsec_kstat_destroy(ipsec_stack_t *); 111 static int ipsec_free_tables(ipsec_stack_t *); 112 static int tunnel_compare(const void *, const void *); 113 static void ipsec_freemsg_chain(mblk_t *); 114 static void ip_drop_packet_chain(mblk_t *, boolean_t, ill_t *, 115 struct kstat_named *, ipdropper_t *); 116 static boolean_t ipsec_kstat_init(ipsec_stack_t *); 117 static void ipsec_kstat_destroy(ipsec_stack_t *); 118 static int ipsec_free_tables(ipsec_stack_t *); 119 static int tunnel_compare(const void *, const void *); 120 static void ipsec_freemsg_chain(mblk_t *); 121 122 /* 123 * Selector hash table is statically sized at module load time. 124 * we default to 251 buckets, which is the largest prime number under 255 125 */ 126 127 #define IPSEC_SPDHASH_DEFAULT 251 128 129 /* SPD hash-size tunable per tunnel. */ 130 #define TUN_SPDHASH_DEFAULT 5 131 132 uint32_t ipsec_spd_hashsize; 133 uint32_t tun_spd_hashsize; 134 135 #define IPSEC_SEL_NOHASH ((uint32_t)(~0)) 136 137 /* 138 * Handle global across all stack instances 139 */ 140 static crypto_notify_handle_t prov_update_handle = NULL; 141 142 static kmem_cache_t *ipsec_action_cache; 143 static kmem_cache_t *ipsec_sel_cache; 144 static kmem_cache_t *ipsec_pol_cache; 145 146 /* Frag cache prototypes */ 147 static void ipsec_fragcache_clean(ipsec_fragcache_t *, ipsec_stack_t *); 148 static ipsec_fragcache_entry_t *fragcache_delentry(int, 149 ipsec_fragcache_entry_t *, ipsec_fragcache_t *, ipsec_stack_t *); 150 boolean_t ipsec_fragcache_init(ipsec_fragcache_t *); 151 void ipsec_fragcache_uninit(ipsec_fragcache_t *, ipsec_stack_t *ipss); 152 mblk_t *ipsec_fragcache_add(ipsec_fragcache_t *, mblk_t *, mblk_t *, 153 int, ipsec_stack_t *); 154 155 int ipsec_hdr_pullup_needed = 0; 156 int ipsec_weird_null_inbound_policy = 0; 157 158 #define ALGBITS_ROUND_DOWN(x, align) (((x)/(align))*(align)) 159 #define ALGBITS_ROUND_UP(x, align) ALGBITS_ROUND_DOWN((x)+(align)-1, align) 160 161 /* 162 * Inbound traffic should have matching identities for both SA's. 163 */ 164 165 #define SA_IDS_MATCH(sa1, sa2) \ 166 (((sa1) == NULL) || ((sa2) == NULL) || \ 167 (((sa1)->ipsa_src_cid == (sa2)->ipsa_src_cid) && \ 168 (((sa1)->ipsa_dst_cid == (sa2)->ipsa_dst_cid)))) 169 170 /* 171 * IPv6 Fragments 172 */ 173 #define IS_V6_FRAGMENT(ipp) (ipp.ipp_fields & IPPF_FRAGHDR) 174 175 /* 176 * Policy failure messages. 177 */ 178 static char *ipsec_policy_failure_msgs[] = { 179 180 /* IPSEC_POLICY_NOT_NEEDED */ 181 "%s: Dropping the datagram because the incoming packet " 182 "is %s, but the recipient expects clear; Source %s, " 183 "Destination %s.\n", 184 185 /* IPSEC_POLICY_MISMATCH */ 186 "%s: Policy Failure for the incoming packet (%s); Source %s, " 187 "Destination %s.\n", 188 189 /* IPSEC_POLICY_AUTH_NOT_NEEDED */ 190 "%s: Authentication present while not expected in the " 191 "incoming %s packet; Source %s, Destination %s.\n", 192 193 /* IPSEC_POLICY_ENCR_NOT_NEEDED */ 194 "%s: Encryption present while not expected in the " 195 "incoming %s packet; Source %s, Destination %s.\n", 196 197 /* IPSEC_POLICY_SE_NOT_NEEDED */ 198 "%s: Self-Encapsulation present while not expected in the " 199 "incoming %s packet; Source %s, Destination %s.\n", 200 }; 201 202 /* 203 * General overviews: 204 * 205 * Locking: 206 * 207 * All of the system policy structures are protected by a single 208 * rwlock. These structures are threaded in a 209 * fairly complex fashion and are not expected to change on a 210 * regular basis, so this should not cause scaling/contention 211 * problems. As a result, policy checks should (hopefully) be MT-hot. 212 * 213 * Allocation policy: 214 * 215 * We use custom kmem cache types for the various 216 * bits & pieces of the policy data structures. All allocations 217 * use KM_NOSLEEP instead of KM_SLEEP for policy allocation. The 218 * policy table is of potentially unbounded size, so we don't 219 * want to provide a way to hog all system memory with policy 220 * entries.. 221 */ 222 223 /* Convenient functions for freeing or dropping a b_next linked mblk chain */ 224 225 /* Free all messages in an mblk chain */ 226 static void 227 ipsec_freemsg_chain(mblk_t *mp) 228 { 229 mblk_t *mpnext; 230 while (mp != NULL) { 231 ASSERT(mp->b_prev == NULL); 232 mpnext = mp->b_next; 233 mp->b_next = NULL; 234 freemsg(mp); 235 mp = mpnext; 236 } 237 } 238 239 /* 240 * ip_drop all messages in an mblk chain 241 * Can handle a b_next chain of ip_recv_attr_t mblks, or just a b_next chain 242 * of data. 243 */ 244 static void 245 ip_drop_packet_chain(mblk_t *mp, boolean_t inbound, ill_t *ill, 246 struct kstat_named *counter, ipdropper_t *who_called) 247 { 248 mblk_t *mpnext; 249 while (mp != NULL) { 250 ASSERT(mp->b_prev == NULL); 251 mpnext = mp->b_next; 252 mp->b_next = NULL; 253 if (ip_recv_attr_is_mblk(mp)) 254 mp = ip_recv_attr_free_mblk(mp); 255 ip_drop_packet(mp, inbound, ill, counter, who_called); 256 mp = mpnext; 257 } 258 } 259 260 /* 261 * AVL tree comparison function. 262 * the in-kernel avl assumes unique keys for all objects. 263 * Since sometimes policy will duplicate rules, we may insert 264 * multiple rules with the same rule id, so we need a tie-breaker. 265 */ 266 static int 267 ipsec_policy_cmpbyid(const void *a, const void *b) 268 { 269 const ipsec_policy_t *ipa, *ipb; 270 uint64_t idxa, idxb; 271 272 ipa = (const ipsec_policy_t *)a; 273 ipb = (const ipsec_policy_t *)b; 274 idxa = ipa->ipsp_index; 275 idxb = ipb->ipsp_index; 276 277 if (idxa < idxb) 278 return (-1); 279 if (idxa > idxb) 280 return (1); 281 /* 282 * Tie-breaker #1: All installed policy rules have a non-NULL 283 * ipsl_sel (selector set), so an entry with a NULL ipsp_sel is not 284 * actually in-tree but rather a template node being used in 285 * an avl_find query; see ipsec_policy_delete(). This gives us 286 * a placeholder in the ordering just before the first entry with 287 * a key >= the one we're looking for, so we can walk forward from 288 * that point to get the remaining entries with the same id. 289 */ 290 if ((ipa->ipsp_sel == NULL) && (ipb->ipsp_sel != NULL)) 291 return (-1); 292 if ((ipb->ipsp_sel == NULL) && (ipa->ipsp_sel != NULL)) 293 return (1); 294 /* 295 * At most one of the arguments to the comparison should have a 296 * NULL selector pointer; if not, the tree is broken. 297 */ 298 ASSERT(ipa->ipsp_sel != NULL); 299 ASSERT(ipb->ipsp_sel != NULL); 300 /* 301 * Tie-breaker #2: use the virtual address of the policy node 302 * to arbitrarily break ties. Since we use the new tree node in 303 * the avl_find() in ipsec_insert_always, the new node will be 304 * inserted into the tree in the right place in the sequence. 305 */ 306 if (ipa < ipb) 307 return (-1); 308 if (ipa > ipb) 309 return (1); 310 return (0); 311 } 312 313 /* 314 * Free what ipsec_alloc_table allocated. 315 */ 316 void 317 ipsec_polhead_free_table(ipsec_policy_head_t *iph) 318 { 319 int dir; 320 int i; 321 322 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 323 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 324 325 if (ipr->ipr_hash == NULL) 326 continue; 327 328 for (i = 0; i < ipr->ipr_nchains; i++) { 329 ASSERT(ipr->ipr_hash[i].hash_head == NULL); 330 } 331 kmem_free(ipr->ipr_hash, ipr->ipr_nchains * 332 sizeof (ipsec_policy_hash_t)); 333 ipr->ipr_hash = NULL; 334 } 335 } 336 337 void 338 ipsec_polhead_destroy(ipsec_policy_head_t *iph) 339 { 340 int dir; 341 342 avl_destroy(&iph->iph_rulebyid); 343 rw_destroy(&iph->iph_lock); 344 345 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 346 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 347 int chain; 348 349 for (chain = 0; chain < ipr->ipr_nchains; chain++) 350 mutex_destroy(&(ipr->ipr_hash[chain].hash_lock)); 351 352 } 353 ipsec_polhead_free_table(iph); 354 } 355 356 /* 357 * Free the IPsec stack instance. 358 */ 359 /* ARGSUSED */ 360 static void 361 ipsec_stack_fini(netstackid_t stackid, void *arg) 362 { 363 ipsec_stack_t *ipss = (ipsec_stack_t *)arg; 364 void *cookie; 365 ipsec_tun_pol_t *node; 366 netstack_t *ns = ipss->ipsec_netstack; 367 int i; 368 ipsec_algtype_t algtype; 369 370 ipsec_loader_destroy(ipss); 371 372 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_WRITER); 373 /* 374 * It's possible we can just ASSERT() the tree is empty. After all, 375 * we aren't called until IP is ready to unload (and presumably all 376 * tunnels have been unplumbed). But we'll play it safe for now, the 377 * loop will just exit immediately if it's empty. 378 */ 379 cookie = NULL; 380 while ((node = (ipsec_tun_pol_t *) 381 avl_destroy_nodes(&ipss->ipsec_tunnel_policies, 382 &cookie)) != NULL) { 383 ITP_REFRELE(node, ns); 384 } 385 avl_destroy(&ipss->ipsec_tunnel_policies); 386 rw_exit(&ipss->ipsec_tunnel_policy_lock); 387 rw_destroy(&ipss->ipsec_tunnel_policy_lock); 388 389 ipsec_config_flush(ns); 390 391 ipsec_kstat_destroy(ipss); 392 393 ip_drop_unregister(&ipss->ipsec_dropper); 394 395 ip_drop_unregister(&ipss->ipsec_spd_dropper); 396 ip_drop_destroy(ipss); 397 /* 398 * Globals start with ref == 1 to prevent IPPH_REFRELE() from 399 * attempting to free them, hence they should have 1 now. 400 */ 401 ipsec_polhead_destroy(&ipss->ipsec_system_policy); 402 ASSERT(ipss->ipsec_system_policy.iph_refs == 1); 403 ipsec_polhead_destroy(&ipss->ipsec_inactive_policy); 404 ASSERT(ipss->ipsec_inactive_policy.iph_refs == 1); 405 406 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) { 407 ipsec_action_free_table(ipss->ipsec_action_hash[i].hash_head); 408 ipss->ipsec_action_hash[i].hash_head = NULL; 409 mutex_destroy(&(ipss->ipsec_action_hash[i].hash_lock)); 410 } 411 412 for (i = 0; i < ipss->ipsec_spd_hashsize; i++) { 413 ASSERT(ipss->ipsec_sel_hash[i].hash_head == NULL); 414 mutex_destroy(&(ipss->ipsec_sel_hash[i].hash_lock)); 415 } 416 417 rw_enter(&ipss->ipsec_alg_lock, RW_WRITER); 418 for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype ++) { 419 int nalgs = ipss->ipsec_nalgs[algtype]; 420 421 for (i = 0; i < nalgs; i++) { 422 if (ipss->ipsec_alglists[algtype][i] != NULL) 423 ipsec_alg_unreg(algtype, i, ns); 424 } 425 } 426 rw_exit(&ipss->ipsec_alg_lock); 427 rw_destroy(&ipss->ipsec_alg_lock); 428 429 ipsid_gc(ns); 430 ipsid_fini(ns); 431 432 (void) ipsec_free_tables(ipss); 433 kmem_free(ipss, sizeof (*ipss)); 434 } 435 436 void 437 ipsec_policy_g_destroy(void) 438 { 439 kmem_cache_destroy(ipsec_action_cache); 440 kmem_cache_destroy(ipsec_sel_cache); 441 kmem_cache_destroy(ipsec_pol_cache); 442 443 ipsec_unregister_prov_update(); 444 445 netstack_unregister(NS_IPSEC); 446 } 447 448 449 /* 450 * Free what ipsec_alloc_tables allocated. 451 * Called when table allocation fails to free the table. 452 */ 453 static int 454 ipsec_free_tables(ipsec_stack_t *ipss) 455 { 456 int i; 457 458 if (ipss->ipsec_sel_hash != NULL) { 459 for (i = 0; i < ipss->ipsec_spd_hashsize; i++) { 460 ASSERT(ipss->ipsec_sel_hash[i].hash_head == NULL); 461 } 462 kmem_free(ipss->ipsec_sel_hash, ipss->ipsec_spd_hashsize * 463 sizeof (*ipss->ipsec_sel_hash)); 464 ipss->ipsec_sel_hash = NULL; 465 ipss->ipsec_spd_hashsize = 0; 466 } 467 ipsec_polhead_free_table(&ipss->ipsec_system_policy); 468 ipsec_polhead_free_table(&ipss->ipsec_inactive_policy); 469 470 return (ENOMEM); 471 } 472 473 /* 474 * Attempt to allocate the tables in a single policy head. 475 * Return nonzero on failure after cleaning up any work in progress. 476 */ 477 int 478 ipsec_alloc_table(ipsec_policy_head_t *iph, int nchains, int kmflag, 479 boolean_t global_cleanup, netstack_t *ns) 480 { 481 int dir; 482 483 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 484 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 485 486 ipr->ipr_nchains = nchains; 487 ipr->ipr_hash = kmem_zalloc(nchains * 488 sizeof (ipsec_policy_hash_t), kmflag); 489 if (ipr->ipr_hash == NULL) 490 return (global_cleanup ? 491 ipsec_free_tables(ns->netstack_ipsec) : 492 ENOMEM); 493 } 494 return (0); 495 } 496 497 /* 498 * Attempt to allocate the various tables. Return nonzero on failure 499 * after cleaning up any work in progress. 500 */ 501 static int 502 ipsec_alloc_tables(int kmflag, netstack_t *ns) 503 { 504 int error; 505 ipsec_stack_t *ipss = ns->netstack_ipsec; 506 507 error = ipsec_alloc_table(&ipss->ipsec_system_policy, 508 ipss->ipsec_spd_hashsize, kmflag, B_TRUE, ns); 509 if (error != 0) 510 return (error); 511 512 error = ipsec_alloc_table(&ipss->ipsec_inactive_policy, 513 ipss->ipsec_spd_hashsize, kmflag, B_TRUE, ns); 514 if (error != 0) 515 return (error); 516 517 ipss->ipsec_sel_hash = kmem_zalloc(ipss->ipsec_spd_hashsize * 518 sizeof (*ipss->ipsec_sel_hash), kmflag); 519 520 if (ipss->ipsec_sel_hash == NULL) 521 return (ipsec_free_tables(ipss)); 522 523 return (0); 524 } 525 526 /* 527 * After table allocation, initialize a policy head. 528 */ 529 void 530 ipsec_polhead_init(ipsec_policy_head_t *iph, int nchains) 531 { 532 int dir, chain; 533 534 rw_init(&iph->iph_lock, NULL, RW_DEFAULT, NULL); 535 avl_create(&iph->iph_rulebyid, ipsec_policy_cmpbyid, 536 sizeof (ipsec_policy_t), offsetof(ipsec_policy_t, ipsp_byid)); 537 538 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 539 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 540 ipr->ipr_nchains = nchains; 541 542 for (chain = 0; chain < nchains; chain++) { 543 mutex_init(&(ipr->ipr_hash[chain].hash_lock), 544 NULL, MUTEX_DEFAULT, NULL); 545 } 546 } 547 } 548 549 static boolean_t 550 ipsec_kstat_init(ipsec_stack_t *ipss) 551 { 552 ipss->ipsec_ksp = kstat_create_netstack("ip", 0, "ipsec_stat", "net", 553 KSTAT_TYPE_NAMED, sizeof (ipsec_kstats_t) / sizeof (kstat_named_t), 554 KSTAT_FLAG_PERSISTENT, ipss->ipsec_netstack->netstack_stackid); 555 556 if (ipss->ipsec_ksp == NULL || ipss->ipsec_ksp->ks_data == NULL) 557 return (B_FALSE); 558 559 ipss->ipsec_kstats = ipss->ipsec_ksp->ks_data; 560 561 #define KI(x) kstat_named_init(&ipss->ipsec_kstats->x, #x, KSTAT_DATA_UINT64) 562 KI(esp_stat_in_requests); 563 KI(esp_stat_in_discards); 564 KI(esp_stat_lookup_failure); 565 KI(ah_stat_in_requests); 566 KI(ah_stat_in_discards); 567 KI(ah_stat_lookup_failure); 568 KI(sadb_acquire_maxpackets); 569 KI(sadb_acquire_qhiwater); 570 #undef KI 571 572 kstat_install(ipss->ipsec_ksp); 573 return (B_TRUE); 574 } 575 576 static void 577 ipsec_kstat_destroy(ipsec_stack_t *ipss) 578 { 579 kstat_delete_netstack(ipss->ipsec_ksp, 580 ipss->ipsec_netstack->netstack_stackid); 581 ipss->ipsec_kstats = NULL; 582 583 } 584 585 /* 586 * Initialize the IPsec stack instance. 587 */ 588 /* ARGSUSED */ 589 static void * 590 ipsec_stack_init(netstackid_t stackid, netstack_t *ns) 591 { 592 ipsec_stack_t *ipss; 593 int i; 594 595 ipss = (ipsec_stack_t *)kmem_zalloc(sizeof (*ipss), KM_SLEEP); 596 ipss->ipsec_netstack = ns; 597 598 /* 599 * FIXME: netstack_ipsec is used by some of the routines we call 600 * below, but it isn't set until this routine returns. 601 * Either we introduce optional xxx_stack_alloc() functions 602 * that will be called by the netstack framework before xxx_stack_init, 603 * or we switch spd.c and sadb.c to operate on ipsec_stack_t 604 * (latter has some include file order issues for sadb.h, but makes 605 * sense if we merge some of the ipsec related stack_t's together. 606 */ 607 ns->netstack_ipsec = ipss; 608 609 /* 610 * Make two attempts to allocate policy hash tables; try it at 611 * the "preferred" size (may be set in /etc/system) first, 612 * then fall back to the default size. 613 */ 614 ipss->ipsec_spd_hashsize = (ipsec_spd_hashsize == 0) ? 615 IPSEC_SPDHASH_DEFAULT : ipsec_spd_hashsize; 616 617 if (ipsec_alloc_tables(KM_NOSLEEP, ns) != 0) { 618 cmn_err(CE_WARN, 619 "Unable to allocate %d entry IPsec policy hash table", 620 ipss->ipsec_spd_hashsize); 621 ipss->ipsec_spd_hashsize = IPSEC_SPDHASH_DEFAULT; 622 cmn_err(CE_WARN, "Falling back to %d entries", 623 ipss->ipsec_spd_hashsize); 624 (void) ipsec_alloc_tables(KM_SLEEP, ns); 625 } 626 627 /* Just set a default for tunnels. */ 628 ipss->ipsec_tun_spd_hashsize = (tun_spd_hashsize == 0) ? 629 TUN_SPDHASH_DEFAULT : tun_spd_hashsize; 630 631 ipsid_init(ns); 632 /* 633 * Globals need ref == 1 to prevent IPPH_REFRELE() from attempting 634 * to free them. 635 */ 636 ipss->ipsec_system_policy.iph_refs = 1; 637 ipss->ipsec_inactive_policy.iph_refs = 1; 638 ipsec_polhead_init(&ipss->ipsec_system_policy, 639 ipss->ipsec_spd_hashsize); 640 ipsec_polhead_init(&ipss->ipsec_inactive_policy, 641 ipss->ipsec_spd_hashsize); 642 rw_init(&ipss->ipsec_tunnel_policy_lock, NULL, RW_DEFAULT, NULL); 643 avl_create(&ipss->ipsec_tunnel_policies, tunnel_compare, 644 sizeof (ipsec_tun_pol_t), 0); 645 646 ipss->ipsec_next_policy_index = 1; 647 648 rw_init(&ipss->ipsec_system_policy.iph_lock, NULL, RW_DEFAULT, NULL); 649 rw_init(&ipss->ipsec_inactive_policy.iph_lock, NULL, RW_DEFAULT, NULL); 650 651 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) 652 mutex_init(&(ipss->ipsec_action_hash[i].hash_lock), 653 NULL, MUTEX_DEFAULT, NULL); 654 655 for (i = 0; i < ipss->ipsec_spd_hashsize; i++) 656 mutex_init(&(ipss->ipsec_sel_hash[i].hash_lock), 657 NULL, MUTEX_DEFAULT, NULL); 658 659 rw_init(&ipss->ipsec_alg_lock, NULL, RW_DEFAULT, NULL); 660 for (i = 0; i < IPSEC_NALGTYPES; i++) { 661 ipss->ipsec_nalgs[i] = 0; 662 } 663 664 ip_drop_init(ipss); 665 ip_drop_register(&ipss->ipsec_spd_dropper, "IPsec SPD"); 666 667 /* IP's IPsec code calls the packet dropper */ 668 ip_drop_register(&ipss->ipsec_dropper, "IP IPsec processing"); 669 670 (void) ipsec_kstat_init(ipss); 671 672 ipsec_loader_init(ipss); 673 ipsec_loader_start(ipss); 674 675 return (ipss); 676 } 677 678 /* Global across all stack instances */ 679 void 680 ipsec_policy_g_init(void) 681 { 682 ipsec_action_cache = kmem_cache_create("ipsec_actions", 683 sizeof (ipsec_action_t), _POINTER_ALIGNMENT, NULL, NULL, 684 ipsec_action_reclaim, NULL, NULL, 0); 685 ipsec_sel_cache = kmem_cache_create("ipsec_selectors", 686 sizeof (ipsec_sel_t), _POINTER_ALIGNMENT, NULL, NULL, 687 NULL, NULL, NULL, 0); 688 ipsec_pol_cache = kmem_cache_create("ipsec_policy", 689 sizeof (ipsec_policy_t), _POINTER_ALIGNMENT, NULL, NULL, 690 NULL, NULL, NULL, 0); 691 692 /* 693 * We want to be informed each time a stack is created or 694 * destroyed in the kernel, so we can maintain the 695 * set of ipsec_stack_t's. 696 */ 697 netstack_register(NS_IPSEC, ipsec_stack_init, NULL, ipsec_stack_fini); 698 } 699 700 /* 701 * Sort algorithm lists. 702 * 703 * I may need to split this based on 704 * authentication/encryption, and I may wish to have an administrator 705 * configure this list. Hold on to some NDD variables... 706 * 707 * XXX For now, sort on minimum key size (GAG!). While minimum key size is 708 * not the ideal metric, it's the only quantifiable measure available. 709 * We need a better metric for sorting algorithms by preference. 710 */ 711 static void 712 alg_insert_sortlist(enum ipsec_algtype at, uint8_t algid, netstack_t *ns) 713 { 714 ipsec_stack_t *ipss = ns->netstack_ipsec; 715 ipsec_alginfo_t *ai = ipss->ipsec_alglists[at][algid]; 716 uint8_t holder, swap; 717 uint_t i; 718 uint_t count = ipss->ipsec_nalgs[at]; 719 ASSERT(ai != NULL); 720 ASSERT(algid == ai->alg_id); 721 722 ASSERT(RW_WRITE_HELD(&ipss->ipsec_alg_lock)); 723 724 holder = algid; 725 726 for (i = 0; i < count - 1; i++) { 727 ipsec_alginfo_t *alt; 728 729 alt = ipss->ipsec_alglists[at][ipss->ipsec_sortlist[at][i]]; 730 /* 731 * If you want to give precedence to newly added algs, 732 * add the = in the > comparison. 733 */ 734 if ((holder != algid) || (ai->alg_minbits > alt->alg_minbits)) { 735 /* Swap sortlist[i] and holder. */ 736 swap = ipss->ipsec_sortlist[at][i]; 737 ipss->ipsec_sortlist[at][i] = holder; 738 holder = swap; 739 ai = alt; 740 } /* Else just continue. */ 741 } 742 743 /* Store holder in last slot. */ 744 ipss->ipsec_sortlist[at][i] = holder; 745 } 746 747 /* 748 * Remove an algorithm from a sorted algorithm list. 749 * This should be considerably easier, even with complex sorting. 750 */ 751 static void 752 alg_remove_sortlist(enum ipsec_algtype at, uint8_t algid, netstack_t *ns) 753 { 754 boolean_t copyback = B_FALSE; 755 int i; 756 ipsec_stack_t *ipss = ns->netstack_ipsec; 757 int newcount = ipss->ipsec_nalgs[at]; 758 759 ASSERT(RW_WRITE_HELD(&ipss->ipsec_alg_lock)); 760 761 for (i = 0; i <= newcount; i++) { 762 if (copyback) { 763 ipss->ipsec_sortlist[at][i-1] = 764 ipss->ipsec_sortlist[at][i]; 765 } else if (ipss->ipsec_sortlist[at][i] == algid) { 766 copyback = B_TRUE; 767 } 768 } 769 } 770 771 /* 772 * Add the specified algorithm to the algorithm tables. 773 * Must be called while holding the algorithm table writer lock. 774 */ 775 void 776 ipsec_alg_reg(ipsec_algtype_t algtype, ipsec_alginfo_t *alg, netstack_t *ns) 777 { 778 ipsec_stack_t *ipss = ns->netstack_ipsec; 779 780 ASSERT(RW_WRITE_HELD(&ipss->ipsec_alg_lock)); 781 782 ASSERT(ipss->ipsec_alglists[algtype][alg->alg_id] == NULL); 783 ipsec_alg_fix_min_max(alg, algtype, ns); 784 ipss->ipsec_alglists[algtype][alg->alg_id] = alg; 785 786 ipss->ipsec_nalgs[algtype]++; 787 alg_insert_sortlist(algtype, alg->alg_id, ns); 788 } 789 790 /* 791 * Remove the specified algorithm from the algorithm tables. 792 * Must be called while holding the algorithm table writer lock. 793 */ 794 void 795 ipsec_alg_unreg(ipsec_algtype_t algtype, uint8_t algid, netstack_t *ns) 796 { 797 ipsec_stack_t *ipss = ns->netstack_ipsec; 798 799 ASSERT(RW_WRITE_HELD(&ipss->ipsec_alg_lock)); 800 801 ASSERT(ipss->ipsec_alglists[algtype][algid] != NULL); 802 ipsec_alg_free(ipss->ipsec_alglists[algtype][algid]); 803 ipss->ipsec_alglists[algtype][algid] = NULL; 804 805 ipss->ipsec_nalgs[algtype]--; 806 alg_remove_sortlist(algtype, algid, ns); 807 } 808 809 /* 810 * Hooks for spdsock to get a grip on system policy. 811 */ 812 813 ipsec_policy_head_t * 814 ipsec_system_policy(netstack_t *ns) 815 { 816 ipsec_stack_t *ipss = ns->netstack_ipsec; 817 ipsec_policy_head_t *h = &ipss->ipsec_system_policy; 818 819 IPPH_REFHOLD(h); 820 return (h); 821 } 822 823 ipsec_policy_head_t * 824 ipsec_inactive_policy(netstack_t *ns) 825 { 826 ipsec_stack_t *ipss = ns->netstack_ipsec; 827 ipsec_policy_head_t *h = &ipss->ipsec_inactive_policy; 828 829 IPPH_REFHOLD(h); 830 return (h); 831 } 832 833 /* 834 * Lock inactive policy, then active policy, then exchange policy root 835 * pointers. 836 */ 837 void 838 ipsec_swap_policy(ipsec_policy_head_t *active, ipsec_policy_head_t *inactive, 839 netstack_t *ns) 840 { 841 int af, dir; 842 avl_tree_t r1, r2; 843 844 rw_enter(&inactive->iph_lock, RW_WRITER); 845 rw_enter(&active->iph_lock, RW_WRITER); 846 847 r1 = active->iph_rulebyid; 848 r2 = inactive->iph_rulebyid; 849 active->iph_rulebyid = r2; 850 inactive->iph_rulebyid = r1; 851 852 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 853 ipsec_policy_hash_t *h1, *h2; 854 855 h1 = active->iph_root[dir].ipr_hash; 856 h2 = inactive->iph_root[dir].ipr_hash; 857 active->iph_root[dir].ipr_hash = h2; 858 inactive->iph_root[dir].ipr_hash = h1; 859 860 for (af = 0; af < IPSEC_NAF; af++) { 861 ipsec_policy_t *t1, *t2; 862 863 t1 = active->iph_root[dir].ipr_nonhash[af]; 864 t2 = inactive->iph_root[dir].ipr_nonhash[af]; 865 active->iph_root[dir].ipr_nonhash[af] = t2; 866 inactive->iph_root[dir].ipr_nonhash[af] = t1; 867 if (t1 != NULL) { 868 t1->ipsp_hash.hash_pp = 869 &(inactive->iph_root[dir].ipr_nonhash[af]); 870 } 871 if (t2 != NULL) { 872 t2->ipsp_hash.hash_pp = 873 &(active->iph_root[dir].ipr_nonhash[af]); 874 } 875 876 } 877 } 878 active->iph_gen++; 879 inactive->iph_gen++; 880 ipsec_update_present_flags(ns->netstack_ipsec); 881 rw_exit(&active->iph_lock); 882 rw_exit(&inactive->iph_lock); 883 } 884 885 /* 886 * Swap global policy primary/secondary. 887 */ 888 void 889 ipsec_swap_global_policy(netstack_t *ns) 890 { 891 ipsec_stack_t *ipss = ns->netstack_ipsec; 892 893 ipsec_swap_policy(&ipss->ipsec_system_policy, 894 &ipss->ipsec_inactive_policy, ns); 895 } 896 897 /* 898 * Clone one policy rule.. 899 */ 900 static ipsec_policy_t * 901 ipsec_copy_policy(const ipsec_policy_t *src) 902 { 903 ipsec_policy_t *dst = kmem_cache_alloc(ipsec_pol_cache, KM_NOSLEEP); 904 905 if (dst == NULL) 906 return (NULL); 907 908 /* 909 * Adjust refcounts of cloned state. 910 */ 911 IPACT_REFHOLD(src->ipsp_act); 912 src->ipsp_sel->ipsl_refs++; 913 914 HASH_NULL(dst, ipsp_hash); 915 dst->ipsp_netstack = src->ipsp_netstack; 916 dst->ipsp_refs = 1; 917 dst->ipsp_sel = src->ipsp_sel; 918 dst->ipsp_act = src->ipsp_act; 919 dst->ipsp_prio = src->ipsp_prio; 920 dst->ipsp_index = src->ipsp_index; 921 922 return (dst); 923 } 924 925 void 926 ipsec_insert_always(avl_tree_t *tree, void *new_node) 927 { 928 void *node; 929 avl_index_t where; 930 931 node = avl_find(tree, new_node, &where); 932 ASSERT(node == NULL); 933 avl_insert(tree, new_node, where); 934 } 935 936 937 static int 938 ipsec_copy_chain(ipsec_policy_head_t *dph, ipsec_policy_t *src, 939 ipsec_policy_t **dstp) 940 { 941 for (; src != NULL; src = src->ipsp_hash.hash_next) { 942 ipsec_policy_t *dst = ipsec_copy_policy(src); 943 if (dst == NULL) 944 return (ENOMEM); 945 946 HASHLIST_INSERT(dst, ipsp_hash, *dstp); 947 ipsec_insert_always(&dph->iph_rulebyid, dst); 948 } 949 return (0); 950 } 951 952 953 954 /* 955 * Make one policy head look exactly like another. 956 * 957 * As with ipsec_swap_policy, we lock the destination policy head first, then 958 * the source policy head. Note that we only need to read-lock the source 959 * policy head as we are not changing it. 960 */ 961 int 962 ipsec_copy_polhead(ipsec_policy_head_t *sph, ipsec_policy_head_t *dph, 963 netstack_t *ns) 964 { 965 int af, dir, chain, nchains; 966 967 rw_enter(&dph->iph_lock, RW_WRITER); 968 969 ipsec_polhead_flush(dph, ns); 970 971 rw_enter(&sph->iph_lock, RW_READER); 972 973 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 974 ipsec_policy_root_t *dpr = &dph->iph_root[dir]; 975 ipsec_policy_root_t *spr = &sph->iph_root[dir]; 976 nchains = dpr->ipr_nchains; 977 978 ASSERT(dpr->ipr_nchains == spr->ipr_nchains); 979 980 for (af = 0; af < IPSEC_NAF; af++) { 981 if (ipsec_copy_chain(dph, spr->ipr_nonhash[af], 982 &dpr->ipr_nonhash[af])) 983 goto abort_copy; 984 } 985 986 for (chain = 0; chain < nchains; chain++) { 987 if (ipsec_copy_chain(dph, 988 spr->ipr_hash[chain].hash_head, 989 &dpr->ipr_hash[chain].hash_head)) 990 goto abort_copy; 991 } 992 } 993 994 dph->iph_gen++; 995 996 rw_exit(&sph->iph_lock); 997 rw_exit(&dph->iph_lock); 998 return (0); 999 1000 abort_copy: 1001 ipsec_polhead_flush(dph, ns); 1002 rw_exit(&sph->iph_lock); 1003 rw_exit(&dph->iph_lock); 1004 return (ENOMEM); 1005 } 1006 1007 /* 1008 * Clone currently active policy to the inactive policy list. 1009 */ 1010 int 1011 ipsec_clone_system_policy(netstack_t *ns) 1012 { 1013 ipsec_stack_t *ipss = ns->netstack_ipsec; 1014 1015 return (ipsec_copy_polhead(&ipss->ipsec_system_policy, 1016 &ipss->ipsec_inactive_policy, ns)); 1017 } 1018 1019 /* 1020 * Extract the string from ipsec_policy_failure_msgs[type] and 1021 * log it. 1022 * 1023 */ 1024 void 1025 ipsec_log_policy_failure(int type, char *func_name, ipha_t *ipha, ip6_t *ip6h, 1026 boolean_t secure, netstack_t *ns) 1027 { 1028 char sbuf[INET6_ADDRSTRLEN]; 1029 char dbuf[INET6_ADDRSTRLEN]; 1030 char *s; 1031 char *d; 1032 ipsec_stack_t *ipss = ns->netstack_ipsec; 1033 1034 ASSERT((ipha == NULL && ip6h != NULL) || 1035 (ip6h == NULL && ipha != NULL)); 1036 1037 if (ipha != NULL) { 1038 s = inet_ntop(AF_INET, &ipha->ipha_src, sbuf, sizeof (sbuf)); 1039 d = inet_ntop(AF_INET, &ipha->ipha_dst, dbuf, sizeof (dbuf)); 1040 } else { 1041 s = inet_ntop(AF_INET6, &ip6h->ip6_src, sbuf, sizeof (sbuf)); 1042 d = inet_ntop(AF_INET6, &ip6h->ip6_dst, dbuf, sizeof (dbuf)); 1043 1044 } 1045 1046 /* Always bump the policy failure counter. */ 1047 ipss->ipsec_policy_failure_count[type]++; 1048 1049 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, SL_ERROR|SL_WARN|SL_CONSOLE, 1050 ipsec_policy_failure_msgs[type], func_name, 1051 (secure ? "secure" : "not secure"), s, d); 1052 } 1053 1054 /* 1055 * Rate-limiting front-end to strlog() for AH and ESP. Uses the ndd variables 1056 * in /dev/ip and the same rate-limiting clock so that there's a single 1057 * knob to turn to throttle the rate of messages. 1058 */ 1059 void 1060 ipsec_rl_strlog(netstack_t *ns, short mid, short sid, char level, ushort_t sl, 1061 char *fmt, ...) 1062 { 1063 va_list adx; 1064 hrtime_t current = gethrtime(); 1065 ip_stack_t *ipst = ns->netstack_ip; 1066 ipsec_stack_t *ipss = ns->netstack_ipsec; 1067 1068 sl |= SL_CONSOLE; 1069 /* 1070 * Throttle logging to stop syslog from being swamped. If variable 1071 * 'ipsec_policy_log_interval' is zero, don't log any messages at 1072 * all, otherwise log only one message every 'ipsec_policy_log_interval' 1073 * msec. Convert interval (in msec) to hrtime (in nsec). 1074 */ 1075 1076 if (ipst->ips_ipsec_policy_log_interval) { 1077 if (ipss->ipsec_policy_failure_last + 1078 MSEC2NSEC(ipst->ips_ipsec_policy_log_interval) <= current) { 1079 va_start(adx, fmt); 1080 (void) vstrlog(mid, sid, level, sl, fmt, adx); 1081 va_end(adx); 1082 ipss->ipsec_policy_failure_last = current; 1083 } 1084 } 1085 } 1086 1087 void 1088 ipsec_config_flush(netstack_t *ns) 1089 { 1090 ipsec_stack_t *ipss = ns->netstack_ipsec; 1091 1092 rw_enter(&ipss->ipsec_system_policy.iph_lock, RW_WRITER); 1093 ipsec_polhead_flush(&ipss->ipsec_system_policy, ns); 1094 ipss->ipsec_next_policy_index = 1; 1095 rw_exit(&ipss->ipsec_system_policy.iph_lock); 1096 ipsec_action_reclaim_stack(ipss); 1097 } 1098 1099 /* 1100 * Clip a policy's min/max keybits vs. the capabilities of the 1101 * algorithm. 1102 */ 1103 static void 1104 act_alg_adjust(uint_t algtype, uint_t algid, 1105 uint16_t *minbits, uint16_t *maxbits, netstack_t *ns) 1106 { 1107 ipsec_stack_t *ipss = ns->netstack_ipsec; 1108 ipsec_alginfo_t *algp = ipss->ipsec_alglists[algtype][algid]; 1109 1110 if (algp != NULL) { 1111 /* 1112 * If passed-in minbits is zero, we assume the caller trusts 1113 * us with setting the minimum key size. We pick the 1114 * algorithms DEFAULT key size for the minimum in this case. 1115 */ 1116 if (*minbits == 0) { 1117 *minbits = algp->alg_default_bits; 1118 ASSERT(*minbits >= algp->alg_minbits); 1119 } else { 1120 *minbits = MAX(MIN(*minbits, algp->alg_maxbits), 1121 algp->alg_minbits); 1122 } 1123 if (*maxbits == 0) 1124 *maxbits = algp->alg_maxbits; 1125 else 1126 *maxbits = MIN(MAX(*maxbits, algp->alg_minbits), 1127 algp->alg_maxbits); 1128 ASSERT(*minbits <= *maxbits); 1129 } else { 1130 *minbits = 0; 1131 *maxbits = 0; 1132 } 1133 } 1134 1135 /* 1136 * Check an action's requested algorithms against the algorithms currently 1137 * loaded in the system. 1138 */ 1139 boolean_t 1140 ipsec_check_action(ipsec_act_t *act, int *diag, netstack_t *ns) 1141 { 1142 ipsec_prot_t *ipp; 1143 ipsec_stack_t *ipss = ns->netstack_ipsec; 1144 1145 ipp = &act->ipa_apply; 1146 1147 if (ipp->ipp_use_ah && 1148 ipss->ipsec_alglists[IPSEC_ALG_AUTH][ipp->ipp_auth_alg] == NULL) { 1149 *diag = SPD_DIAGNOSTIC_UNSUPP_AH_ALG; 1150 return (B_FALSE); 1151 } 1152 if (ipp->ipp_use_espa && 1153 ipss->ipsec_alglists[IPSEC_ALG_AUTH][ipp->ipp_esp_auth_alg] == 1154 NULL) { 1155 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_AUTH_ALG; 1156 return (B_FALSE); 1157 } 1158 if (ipp->ipp_use_esp && 1159 ipss->ipsec_alglists[IPSEC_ALG_ENCR][ipp->ipp_encr_alg] == NULL) { 1160 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_ENCR_ALG; 1161 return (B_FALSE); 1162 } 1163 1164 act_alg_adjust(IPSEC_ALG_AUTH, ipp->ipp_auth_alg, 1165 &ipp->ipp_ah_minbits, &ipp->ipp_ah_maxbits, ns); 1166 act_alg_adjust(IPSEC_ALG_AUTH, ipp->ipp_esp_auth_alg, 1167 &ipp->ipp_espa_minbits, &ipp->ipp_espa_maxbits, ns); 1168 act_alg_adjust(IPSEC_ALG_ENCR, ipp->ipp_encr_alg, 1169 &ipp->ipp_espe_minbits, &ipp->ipp_espe_maxbits, ns); 1170 1171 if (ipp->ipp_ah_minbits > ipp->ipp_ah_maxbits) { 1172 *diag = SPD_DIAGNOSTIC_UNSUPP_AH_KEYSIZE; 1173 return (B_FALSE); 1174 } 1175 if (ipp->ipp_espa_minbits > ipp->ipp_espa_maxbits) { 1176 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_AUTH_KEYSIZE; 1177 return (B_FALSE); 1178 } 1179 if (ipp->ipp_espe_minbits > ipp->ipp_espe_maxbits) { 1180 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_ENCR_KEYSIZE; 1181 return (B_FALSE); 1182 } 1183 /* TODO: sanity check lifetimes */ 1184 return (B_TRUE); 1185 } 1186 1187 /* 1188 * Set up a single action during wildcard expansion.. 1189 */ 1190 static void 1191 ipsec_setup_act(ipsec_act_t *outact, ipsec_act_t *act, 1192 uint_t auth_alg, uint_t encr_alg, uint_t eauth_alg, netstack_t *ns) 1193 { 1194 ipsec_prot_t *ipp; 1195 1196 *outact = *act; 1197 ipp = &outact->ipa_apply; 1198 ipp->ipp_auth_alg = (uint8_t)auth_alg; 1199 ipp->ipp_encr_alg = (uint8_t)encr_alg; 1200 ipp->ipp_esp_auth_alg = (uint8_t)eauth_alg; 1201 1202 act_alg_adjust(IPSEC_ALG_AUTH, auth_alg, 1203 &ipp->ipp_ah_minbits, &ipp->ipp_ah_maxbits, ns); 1204 act_alg_adjust(IPSEC_ALG_AUTH, eauth_alg, 1205 &ipp->ipp_espa_minbits, &ipp->ipp_espa_maxbits, ns); 1206 act_alg_adjust(IPSEC_ALG_ENCR, encr_alg, 1207 &ipp->ipp_espe_minbits, &ipp->ipp_espe_maxbits, ns); 1208 } 1209 1210 /* 1211 * combinatoric expansion time: expand a wildcarded action into an 1212 * array of wildcarded actions; we return the exploded action list, 1213 * and return a count in *nact (output only). 1214 */ 1215 static ipsec_act_t * 1216 ipsec_act_wildcard_expand(ipsec_act_t *act, uint_t *nact, netstack_t *ns) 1217 { 1218 boolean_t use_ah, use_esp, use_espa; 1219 boolean_t wild_auth, wild_encr, wild_eauth; 1220 uint_t auth_alg, auth_idx, auth_min, auth_max; 1221 uint_t eauth_alg, eauth_idx, eauth_min, eauth_max; 1222 uint_t encr_alg, encr_idx, encr_min, encr_max; 1223 uint_t action_count, ai; 1224 ipsec_act_t *outact; 1225 ipsec_stack_t *ipss = ns->netstack_ipsec; 1226 1227 if (act->ipa_type != IPSEC_ACT_APPLY) { 1228 outact = kmem_alloc(sizeof (*act), KM_NOSLEEP); 1229 *nact = 1; 1230 if (outact != NULL) 1231 bcopy(act, outact, sizeof (*act)); 1232 return (outact); 1233 } 1234 /* 1235 * compute the combinatoric explosion.. 1236 * 1237 * we assume a request for encr if esp_req is PREF_REQUIRED 1238 * we assume a request for ah auth if ah_req is PREF_REQUIRED. 1239 * we assume a request for esp auth if !ah and esp_req is PREF_REQUIRED 1240 */ 1241 1242 use_ah = act->ipa_apply.ipp_use_ah; 1243 use_esp = act->ipa_apply.ipp_use_esp; 1244 use_espa = act->ipa_apply.ipp_use_espa; 1245 auth_alg = act->ipa_apply.ipp_auth_alg; 1246 eauth_alg = act->ipa_apply.ipp_esp_auth_alg; 1247 encr_alg = act->ipa_apply.ipp_encr_alg; 1248 1249 wild_auth = use_ah && (auth_alg == 0); 1250 wild_eauth = use_espa && (eauth_alg == 0); 1251 wild_encr = use_esp && (encr_alg == 0); 1252 1253 action_count = 1; 1254 auth_min = auth_max = auth_alg; 1255 eauth_min = eauth_max = eauth_alg; 1256 encr_min = encr_max = encr_alg; 1257 1258 /* 1259 * set up for explosion.. for each dimension, expand output 1260 * size by the explosion factor. 1261 * 1262 * Don't include the "any" algorithms, if defined, as no 1263 * kernel policies should be set for these algorithms. 1264 */ 1265 1266 #define SET_EXP_MINMAX(type, wild, alg, min, max, ipss) \ 1267 if (wild) { \ 1268 int nalgs = ipss->ipsec_nalgs[type]; \ 1269 if (ipss->ipsec_alglists[type][alg] != NULL) \ 1270 nalgs--; \ 1271 action_count *= nalgs; \ 1272 min = 0; \ 1273 max = ipss->ipsec_nalgs[type] - 1; \ 1274 } 1275 1276 SET_EXP_MINMAX(IPSEC_ALG_AUTH, wild_auth, SADB_AALG_NONE, 1277 auth_min, auth_max, ipss); 1278 SET_EXP_MINMAX(IPSEC_ALG_AUTH, wild_eauth, SADB_AALG_NONE, 1279 eauth_min, eauth_max, ipss); 1280 SET_EXP_MINMAX(IPSEC_ALG_ENCR, wild_encr, SADB_EALG_NONE, 1281 encr_min, encr_max, ipss); 1282 1283 #undef SET_EXP_MINMAX 1284 1285 /* 1286 * ok, allocate the whole mess.. 1287 */ 1288 1289 outact = kmem_alloc(sizeof (*outact) * action_count, KM_NOSLEEP); 1290 if (outact == NULL) 1291 return (NULL); 1292 1293 /* 1294 * Now compute all combinations. Note that non-wildcarded 1295 * dimensions just get a single value from auth_min, while 1296 * wildcarded dimensions indirect through the sortlist. 1297 * 1298 * We do encryption outermost since, at this time, there's 1299 * greater difference in security and performance between 1300 * encryption algorithms vs. authentication algorithms. 1301 */ 1302 1303 ai = 0; 1304 1305 #define WHICH_ALG(type, wild, idx, ipss) \ 1306 ((wild)?(ipss->ipsec_sortlist[type][idx]):(idx)) 1307 1308 for (encr_idx = encr_min; encr_idx <= encr_max; encr_idx++) { 1309 encr_alg = WHICH_ALG(IPSEC_ALG_ENCR, wild_encr, encr_idx, ipss); 1310 if (wild_encr && encr_alg == SADB_EALG_NONE) 1311 continue; 1312 for (auth_idx = auth_min; auth_idx <= auth_max; auth_idx++) { 1313 auth_alg = WHICH_ALG(IPSEC_ALG_AUTH, wild_auth, 1314 auth_idx, ipss); 1315 if (wild_auth && auth_alg == SADB_AALG_NONE) 1316 continue; 1317 for (eauth_idx = eauth_min; eauth_idx <= eauth_max; 1318 eauth_idx++) { 1319 eauth_alg = WHICH_ALG(IPSEC_ALG_AUTH, 1320 wild_eauth, eauth_idx, ipss); 1321 if (wild_eauth && eauth_alg == SADB_AALG_NONE) 1322 continue; 1323 1324 ipsec_setup_act(&outact[ai], act, 1325 auth_alg, encr_alg, eauth_alg, ns); 1326 ai++; 1327 } 1328 } 1329 } 1330 1331 #undef WHICH_ALG 1332 1333 ASSERT(ai == action_count); 1334 *nact = action_count; 1335 return (outact); 1336 } 1337 1338 /* 1339 * Extract the parts of an ipsec_prot_t from an old-style ipsec_req_t. 1340 */ 1341 static void 1342 ipsec_prot_from_req(const ipsec_req_t *req, ipsec_prot_t *ipp) 1343 { 1344 bzero(ipp, sizeof (*ipp)); 1345 /* 1346 * ipp_use_* are bitfields. Look at "!!" in the following as a 1347 * "boolean canonicalization" operator. 1348 */ 1349 ipp->ipp_use_ah = !!(req->ipsr_ah_req & IPSEC_PREF_REQUIRED); 1350 ipp->ipp_use_esp = !!(req->ipsr_esp_req & IPSEC_PREF_REQUIRED); 1351 ipp->ipp_use_espa = !!(req->ipsr_esp_auth_alg); 1352 ipp->ipp_use_se = !!(req->ipsr_self_encap_req & IPSEC_PREF_REQUIRED); 1353 ipp->ipp_use_unique = !!((req->ipsr_ah_req|req->ipsr_esp_req) & 1354 IPSEC_PREF_UNIQUE); 1355 ipp->ipp_encr_alg = req->ipsr_esp_alg; 1356 /* 1357 * SADB_AALG_ANY is a placeholder to distinguish "any" from 1358 * "none" above. If auth is required, as determined above, 1359 * SADB_AALG_ANY becomes 0, which is the representation 1360 * of "any" and "none" in PF_KEY v2. 1361 */ 1362 ipp->ipp_auth_alg = (req->ipsr_auth_alg != SADB_AALG_ANY) ? 1363 req->ipsr_auth_alg : 0; 1364 ipp->ipp_esp_auth_alg = (req->ipsr_esp_auth_alg != SADB_AALG_ANY) ? 1365 req->ipsr_esp_auth_alg : 0; 1366 } 1367 1368 /* 1369 * Extract a new-style action from a request. 1370 */ 1371 void 1372 ipsec_actvec_from_req(const ipsec_req_t *req, ipsec_act_t **actp, uint_t *nactp, 1373 netstack_t *ns) 1374 { 1375 struct ipsec_act act; 1376 1377 bzero(&act, sizeof (act)); 1378 if ((req->ipsr_ah_req & IPSEC_PREF_NEVER) && 1379 (req->ipsr_esp_req & IPSEC_PREF_NEVER)) { 1380 act.ipa_type = IPSEC_ACT_BYPASS; 1381 } else { 1382 act.ipa_type = IPSEC_ACT_APPLY; 1383 ipsec_prot_from_req(req, &act.ipa_apply); 1384 } 1385 *actp = ipsec_act_wildcard_expand(&act, nactp, ns); 1386 } 1387 1388 /* 1389 * Convert a new-style "prot" back to an ipsec_req_t (more backwards compat). 1390 * We assume caller has already zero'ed *req for us. 1391 */ 1392 static int 1393 ipsec_req_from_prot(ipsec_prot_t *ipp, ipsec_req_t *req) 1394 { 1395 req->ipsr_esp_alg = ipp->ipp_encr_alg; 1396 req->ipsr_auth_alg = ipp->ipp_auth_alg; 1397 req->ipsr_esp_auth_alg = ipp->ipp_esp_auth_alg; 1398 1399 if (ipp->ipp_use_unique) { 1400 req->ipsr_ah_req |= IPSEC_PREF_UNIQUE; 1401 req->ipsr_esp_req |= IPSEC_PREF_UNIQUE; 1402 } 1403 if (ipp->ipp_use_se) 1404 req->ipsr_self_encap_req |= IPSEC_PREF_REQUIRED; 1405 if (ipp->ipp_use_ah) 1406 req->ipsr_ah_req |= IPSEC_PREF_REQUIRED; 1407 if (ipp->ipp_use_esp) 1408 req->ipsr_esp_req |= IPSEC_PREF_REQUIRED; 1409 return (sizeof (*req)); 1410 } 1411 1412 /* 1413 * Convert a new-style action back to an ipsec_req_t (more backwards compat). 1414 * We assume caller has already zero'ed *req for us. 1415 */ 1416 static int 1417 ipsec_req_from_act(ipsec_action_t *ap, ipsec_req_t *req) 1418 { 1419 switch (ap->ipa_act.ipa_type) { 1420 case IPSEC_ACT_BYPASS: 1421 req->ipsr_ah_req = IPSEC_PREF_NEVER; 1422 req->ipsr_esp_req = IPSEC_PREF_NEVER; 1423 return (sizeof (*req)); 1424 case IPSEC_ACT_APPLY: 1425 return (ipsec_req_from_prot(&ap->ipa_act.ipa_apply, req)); 1426 } 1427 return (sizeof (*req)); 1428 } 1429 1430 /* 1431 * Convert a new-style action back to an ipsec_req_t (more backwards compat). 1432 * We assume caller has already zero'ed *req for us. 1433 */ 1434 int 1435 ipsec_req_from_head(ipsec_policy_head_t *ph, ipsec_req_t *req, int af) 1436 { 1437 ipsec_policy_t *p; 1438 1439 /* 1440 * FULL-PERSOCK: consult hash table, too? 1441 */ 1442 for (p = ph->iph_root[IPSEC_INBOUND].ipr_nonhash[af]; 1443 p != NULL; 1444 p = p->ipsp_hash.hash_next) { 1445 if ((p->ipsp_sel->ipsl_key.ipsl_valid & IPSL_WILDCARD) == 0) 1446 return (ipsec_req_from_act(p->ipsp_act, req)); 1447 } 1448 return (sizeof (*req)); 1449 } 1450 1451 /* 1452 * Based on per-socket or latched policy, convert to an appropriate 1453 * IP_SEC_OPT ipsec_req_t for the socket option; return size so we can 1454 * be tail-called from ip. 1455 */ 1456 int 1457 ipsec_req_from_conn(conn_t *connp, ipsec_req_t *req, int af) 1458 { 1459 ipsec_latch_t *ipl; 1460 int rv = sizeof (ipsec_req_t); 1461 1462 bzero(req, sizeof (*req)); 1463 1464 ASSERT(MUTEX_HELD(&connp->conn_lock)); 1465 ipl = connp->conn_latch; 1466 1467 /* 1468 * Find appropriate policy. First choice is latched action; 1469 * failing that, see latched policy; failing that, 1470 * look at configured policy. 1471 */ 1472 if (ipl != NULL) { 1473 if (connp->conn_latch_in_action != NULL) { 1474 rv = ipsec_req_from_act(connp->conn_latch_in_action, 1475 req); 1476 goto done; 1477 } 1478 if (connp->conn_latch_in_policy != NULL) { 1479 rv = ipsec_req_from_act( 1480 connp->conn_latch_in_policy->ipsp_act, req); 1481 goto done; 1482 } 1483 } 1484 if (connp->conn_policy != NULL) 1485 rv = ipsec_req_from_head(connp->conn_policy, req, af); 1486 done: 1487 return (rv); 1488 } 1489 1490 void 1491 ipsec_actvec_free(ipsec_act_t *act, uint_t nact) 1492 { 1493 kmem_free(act, nact * sizeof (*act)); 1494 } 1495 1496 /* 1497 * Consumes a reference to ipsp. 1498 */ 1499 static mblk_t * 1500 ipsec_check_loopback_policy(mblk_t *data_mp, ip_recv_attr_t *ira, 1501 ipsec_policy_t *ipsp) 1502 { 1503 if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) 1504 return (data_mp); 1505 1506 ASSERT(ira->ira_flags & IRAF_LOOPBACK); 1507 1508 IPPOL_REFRELE(ipsp); 1509 1510 /* 1511 * We should do an actual policy check here. Revisit this 1512 * when we revisit the IPsec API. (And pass a conn_t in when we 1513 * get there.) 1514 */ 1515 1516 return (data_mp); 1517 } 1518 1519 /* 1520 * Check that packet's inbound ports & proto match the selectors 1521 * expected by the SAs it traversed on the way in. 1522 */ 1523 static boolean_t 1524 ipsec_check_ipsecin_unique(ip_recv_attr_t *ira, const char **reason, 1525 kstat_named_t **counter, uint64_t pkt_unique, netstack_t *ns) 1526 { 1527 uint64_t ah_mask, esp_mask; 1528 ipsa_t *ah_assoc; 1529 ipsa_t *esp_assoc; 1530 ipsec_stack_t *ipss = ns->netstack_ipsec; 1531 1532 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 1533 ASSERT(!(ira->ira_flags & IRAF_LOOPBACK)); 1534 1535 ah_assoc = ira->ira_ipsec_ah_sa; 1536 esp_assoc = ira->ira_ipsec_esp_sa; 1537 ASSERT((ah_assoc != NULL) || (esp_assoc != NULL)); 1538 1539 ah_mask = (ah_assoc != NULL) ? ah_assoc->ipsa_unique_mask : 0; 1540 esp_mask = (esp_assoc != NULL) ? esp_assoc->ipsa_unique_mask : 0; 1541 1542 if ((ah_mask == 0) && (esp_mask == 0)) 1543 return (B_TRUE); 1544 1545 /* 1546 * The pkt_unique check will also check for tunnel mode on the SA 1547 * vs. the tunneled_packet boolean. "Be liberal in what you receive" 1548 * should not apply in this case. ;) 1549 */ 1550 1551 if (ah_mask != 0 && 1552 ah_assoc->ipsa_unique_id != (pkt_unique & ah_mask)) { 1553 *reason = "AH inner header mismatch"; 1554 *counter = DROPPER(ipss, ipds_spd_ah_innermismatch); 1555 return (B_FALSE); 1556 } 1557 if (esp_mask != 0 && 1558 esp_assoc->ipsa_unique_id != (pkt_unique & esp_mask)) { 1559 *reason = "ESP inner header mismatch"; 1560 *counter = DROPPER(ipss, ipds_spd_esp_innermismatch); 1561 return (B_FALSE); 1562 } 1563 return (B_TRUE); 1564 } 1565 1566 static boolean_t 1567 ipsec_check_ipsecin_action(ip_recv_attr_t *ira, mblk_t *mp, ipsec_action_t *ap, 1568 ipha_t *ipha, ip6_t *ip6h, const char **reason, kstat_named_t **counter, 1569 netstack_t *ns) 1570 { 1571 boolean_t ret = B_TRUE; 1572 ipsec_prot_t *ipp; 1573 ipsa_t *ah_assoc; 1574 ipsa_t *esp_assoc; 1575 boolean_t decaps; 1576 ipsec_stack_t *ipss = ns->netstack_ipsec; 1577 1578 ASSERT((ipha == NULL && ip6h != NULL) || 1579 (ip6h == NULL && ipha != NULL)); 1580 1581 if (ira->ira_flags & IRAF_LOOPBACK) { 1582 /* 1583 * Besides accepting pointer-equivalent actions, we also 1584 * accept any ICMP errors we generated for ourselves, 1585 * regardless of policy. If we do not wish to make this 1586 * assumption in the future, check here, and where 1587 * IXAF_TRUSTED_ICMP is initialized in ip.c and ip6.c. 1588 */ 1589 if (ap == ira->ira_ipsec_action || 1590 (ira->ira_flags & IRAF_TRUSTED_ICMP)) 1591 return (B_TRUE); 1592 1593 /* Deep compare necessary here?? */ 1594 *counter = DROPPER(ipss, ipds_spd_loopback_mismatch); 1595 *reason = "loopback policy mismatch"; 1596 return (B_FALSE); 1597 } 1598 ASSERT(!(ira->ira_flags & IRAF_TRUSTED_ICMP)); 1599 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 1600 1601 ah_assoc = ira->ira_ipsec_ah_sa; 1602 esp_assoc = ira->ira_ipsec_esp_sa; 1603 1604 decaps = (ira->ira_flags & IRAF_IPSEC_DECAPS); 1605 1606 switch (ap->ipa_act.ipa_type) { 1607 case IPSEC_ACT_DISCARD: 1608 case IPSEC_ACT_REJECT: 1609 /* Should "fail hard" */ 1610 *counter = DROPPER(ipss, ipds_spd_explicit); 1611 *reason = "blocked by policy"; 1612 return (B_FALSE); 1613 1614 case IPSEC_ACT_BYPASS: 1615 case IPSEC_ACT_CLEAR: 1616 *counter = DROPPER(ipss, ipds_spd_got_secure); 1617 *reason = "expected clear, got protected"; 1618 return (B_FALSE); 1619 1620 case IPSEC_ACT_APPLY: 1621 ipp = &ap->ipa_act.ipa_apply; 1622 /* 1623 * As of now we do the simple checks of whether 1624 * the datagram has gone through the required IPSEC 1625 * protocol constraints or not. We might have more 1626 * in the future like sensitive levels, key bits, etc. 1627 * If it fails the constraints, check whether we would 1628 * have accepted this if it had come in clear. 1629 */ 1630 if (ipp->ipp_use_ah) { 1631 if (ah_assoc == NULL) { 1632 ret = ipsec_inbound_accept_clear(mp, ipha, 1633 ip6h); 1634 *counter = DROPPER(ipss, ipds_spd_got_clear); 1635 *reason = "unprotected not accepted"; 1636 break; 1637 } 1638 ASSERT(ah_assoc != NULL); 1639 ASSERT(ipp->ipp_auth_alg != 0); 1640 1641 if (ah_assoc->ipsa_auth_alg != 1642 ipp->ipp_auth_alg) { 1643 *counter = DROPPER(ipss, ipds_spd_bad_ahalg); 1644 *reason = "unacceptable ah alg"; 1645 ret = B_FALSE; 1646 break; 1647 } 1648 } else if (ah_assoc != NULL) { 1649 /* 1650 * Don't allow this. Check IPSEC NOTE above 1651 * ip_fanout_proto(). 1652 */ 1653 *counter = DROPPER(ipss, ipds_spd_got_ah); 1654 *reason = "unexpected AH"; 1655 ret = B_FALSE; 1656 break; 1657 } 1658 if (ipp->ipp_use_esp) { 1659 if (esp_assoc == NULL) { 1660 ret = ipsec_inbound_accept_clear(mp, ipha, 1661 ip6h); 1662 *counter = DROPPER(ipss, ipds_spd_got_clear); 1663 *reason = "unprotected not accepted"; 1664 break; 1665 } 1666 ASSERT(esp_assoc != NULL); 1667 ASSERT(ipp->ipp_encr_alg != 0); 1668 1669 if (esp_assoc->ipsa_encr_alg != 1670 ipp->ipp_encr_alg) { 1671 *counter = DROPPER(ipss, ipds_spd_bad_espealg); 1672 *reason = "unacceptable esp alg"; 1673 ret = B_FALSE; 1674 break; 1675 } 1676 /* 1677 * If the client does not need authentication, 1678 * we don't verify the alogrithm. 1679 */ 1680 if (ipp->ipp_use_espa) { 1681 if (esp_assoc->ipsa_auth_alg != 1682 ipp->ipp_esp_auth_alg) { 1683 *counter = DROPPER(ipss, 1684 ipds_spd_bad_espaalg); 1685 *reason = "unacceptable esp auth alg"; 1686 ret = B_FALSE; 1687 break; 1688 } 1689 } 1690 } else if (esp_assoc != NULL) { 1691 /* 1692 * Don't allow this. Check IPSEC NOTE above 1693 * ip_fanout_proto(). 1694 */ 1695 *counter = DROPPER(ipss, ipds_spd_got_esp); 1696 *reason = "unexpected ESP"; 1697 ret = B_FALSE; 1698 break; 1699 } 1700 if (ipp->ipp_use_se) { 1701 if (!decaps) { 1702 ret = ipsec_inbound_accept_clear(mp, ipha, 1703 ip6h); 1704 if (!ret) { 1705 /* XXX mutant? */ 1706 *counter = DROPPER(ipss, 1707 ipds_spd_bad_selfencap); 1708 *reason = "self encap not found"; 1709 break; 1710 } 1711 } 1712 } else if (decaps) { 1713 /* 1714 * XXX If the packet comes in tunneled and the 1715 * recipient does not expect it to be tunneled, it 1716 * is okay. But we drop to be consistent with the 1717 * other cases. 1718 */ 1719 *counter = DROPPER(ipss, ipds_spd_got_selfencap); 1720 *reason = "unexpected self encap"; 1721 ret = B_FALSE; 1722 break; 1723 } 1724 if (ira->ira_ipsec_action != NULL) { 1725 /* 1726 * This can happen if we do a double policy-check on 1727 * a packet 1728 * XXX XXX should fix this case! 1729 */ 1730 IPACT_REFRELE(ira->ira_ipsec_action); 1731 } 1732 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 1733 ASSERT(ira->ira_ipsec_action == NULL); 1734 IPACT_REFHOLD(ap); 1735 ira->ira_ipsec_action = ap; 1736 break; /* from switch */ 1737 } 1738 return (ret); 1739 } 1740 1741 static boolean_t 1742 spd_match_inbound_ids(ipsec_latch_t *ipl, ipsa_t *sa) 1743 { 1744 ASSERT(ipl->ipl_ids_latched == B_TRUE); 1745 return ipsid_equal(ipl->ipl_remote_cid, sa->ipsa_src_cid) && 1746 ipsid_equal(ipl->ipl_local_cid, sa->ipsa_dst_cid); 1747 } 1748 1749 /* 1750 * Takes a latched conn and an inbound packet and returns a unique_id suitable 1751 * for SA comparisons. Most of the time we will copy from the conn_t, but 1752 * there are cases when the conn_t is latched but it has wildcard selectors, 1753 * and then we need to fallback to scooping them out of the packet. 1754 * 1755 * Assume we'll never have 0 with a conn_t present, so use 0 as a failure. We 1756 * can get away with this because we only have non-zero ports/proto for 1757 * latched conn_ts. 1758 * 1759 * Ideal candidate for an "inline" keyword, as we're JUST convoluted enough 1760 * to not be a nice macro. 1761 */ 1762 static uint64_t 1763 conn_to_unique(conn_t *connp, mblk_t *data_mp, ipha_t *ipha, ip6_t *ip6h) 1764 { 1765 ipsec_selector_t sel; 1766 uint8_t ulp = connp->conn_proto; 1767 1768 ASSERT(connp->conn_latch_in_policy != NULL); 1769 1770 if ((ulp == IPPROTO_TCP || ulp == IPPROTO_UDP || ulp == IPPROTO_SCTP) && 1771 (connp->conn_fport == 0 || connp->conn_lport == 0)) { 1772 /* Slow path - we gotta grab from the packet. */ 1773 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, 1774 SEL_NONE) != SELRET_SUCCESS) { 1775 /* Failure -> have caller free packet with ENOMEM. */ 1776 return (0); 1777 } 1778 return (SA_UNIQUE_ID(sel.ips_remote_port, sel.ips_local_port, 1779 sel.ips_protocol, 0)); 1780 } 1781 1782 #ifdef DEBUG_NOT_UNTIL_6478464 1783 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, SEL_NONE) == 1784 SELRET_SUCCESS) { 1785 ASSERT(sel.ips_local_port == connp->conn_lport); 1786 ASSERT(sel.ips_remote_port == connp->conn_fport); 1787 ASSERT(sel.ips_protocol == connp->conn_proto); 1788 } 1789 ASSERT(connp->conn_proto != 0); 1790 #endif 1791 1792 return (SA_UNIQUE_ID(connp->conn_fport, connp->conn_lport, ulp, 0)); 1793 } 1794 1795 /* 1796 * Called to check policy on a latched connection. 1797 * Note that we don't dereference conn_latch or conn_ihere since the conn might 1798 * be closing. The caller passes a held ipsec_latch_t instead. 1799 */ 1800 static boolean_t 1801 ipsec_check_ipsecin_latch(ip_recv_attr_t *ira, mblk_t *mp, ipsec_latch_t *ipl, 1802 ipsec_action_t *ap, ipha_t *ipha, ip6_t *ip6h, const char **reason, 1803 kstat_named_t **counter, conn_t *connp, netstack_t *ns) 1804 { 1805 ipsec_stack_t *ipss = ns->netstack_ipsec; 1806 1807 ASSERT(ipl->ipl_ids_latched == B_TRUE); 1808 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 1809 1810 if (!(ira->ira_flags & IRAF_LOOPBACK)) { 1811 /* 1812 * Over loopback, there aren't real security associations, 1813 * so there are neither identities nor "unique" values 1814 * for us to check the packet against. 1815 */ 1816 if (ira->ira_ipsec_ah_sa != NULL) { 1817 if (!spd_match_inbound_ids(ipl, 1818 ira->ira_ipsec_ah_sa)) { 1819 *counter = DROPPER(ipss, ipds_spd_ah_badid); 1820 *reason = "AH identity mismatch"; 1821 return (B_FALSE); 1822 } 1823 } 1824 1825 if (ira->ira_ipsec_esp_sa != NULL) { 1826 if (!spd_match_inbound_ids(ipl, 1827 ira->ira_ipsec_esp_sa)) { 1828 *counter = DROPPER(ipss, ipds_spd_esp_badid); 1829 *reason = "ESP identity mismatch"; 1830 return (B_FALSE); 1831 } 1832 } 1833 1834 /* 1835 * Can fudge pkt_unique from connp because we're latched. 1836 * In DEBUG kernels (see conn_to_unique()'s implementation), 1837 * verify this even if it REALLY slows things down. 1838 */ 1839 if (!ipsec_check_ipsecin_unique(ira, reason, counter, 1840 conn_to_unique(connp, mp, ipha, ip6h), ns)) { 1841 return (B_FALSE); 1842 } 1843 } 1844 return (ipsec_check_ipsecin_action(ira, mp, ap, ipha, ip6h, reason, 1845 counter, ns)); 1846 } 1847 1848 /* 1849 * Check to see whether this secured datagram meets the policy 1850 * constraints specified in ipsp. 1851 * 1852 * Called from ipsec_check_global_policy, and ipsec_check_inbound_policy. 1853 * 1854 * Consumes a reference to ipsp. 1855 * Returns the mblk if ok. 1856 */ 1857 static mblk_t * 1858 ipsec_check_ipsecin_policy(mblk_t *data_mp, ipsec_policy_t *ipsp, 1859 ipha_t *ipha, ip6_t *ip6h, uint64_t pkt_unique, ip_recv_attr_t *ira, 1860 netstack_t *ns) 1861 { 1862 ipsec_action_t *ap; 1863 const char *reason = "no policy actions found"; 1864 ip_stack_t *ipst = ns->netstack_ip; 1865 ipsec_stack_t *ipss = ns->netstack_ipsec; 1866 kstat_named_t *counter; 1867 1868 counter = DROPPER(ipss, ipds_spd_got_secure); 1869 1870 ASSERT(ipsp != NULL); 1871 1872 ASSERT((ipha == NULL && ip6h != NULL) || 1873 (ip6h == NULL && ipha != NULL)); 1874 1875 if (ira->ira_flags & IRAF_LOOPBACK) 1876 return (ipsec_check_loopback_policy(data_mp, ira, ipsp)); 1877 1878 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 1879 1880 if (ira->ira_ipsec_action != NULL) { 1881 /* 1882 * this can happen if we do a double policy-check on a packet 1883 * Would be nice to be able to delete this test.. 1884 */ 1885 IPACT_REFRELE(ira->ira_ipsec_action); 1886 } 1887 ASSERT(ira->ira_ipsec_action == NULL); 1888 1889 if (!SA_IDS_MATCH(ira->ira_ipsec_ah_sa, ira->ira_ipsec_esp_sa)) { 1890 reason = "inbound AH and ESP identities differ"; 1891 counter = DROPPER(ipss, ipds_spd_ahesp_diffid); 1892 goto drop; 1893 } 1894 1895 if (!ipsec_check_ipsecin_unique(ira, &reason, &counter, pkt_unique, 1896 ns)) 1897 goto drop; 1898 1899 /* 1900 * Ok, now loop through the possible actions and see if any 1901 * of them work for us. 1902 */ 1903 1904 for (ap = ipsp->ipsp_act; ap != NULL; ap = ap->ipa_next) { 1905 if (ipsec_check_ipsecin_action(ira, data_mp, ap, 1906 ipha, ip6h, &reason, &counter, ns)) { 1907 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 1908 IPPOL_REFRELE(ipsp); 1909 return (data_mp); 1910 } 1911 } 1912 drop: 1913 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, SL_ERROR|SL_WARN|SL_CONSOLE, 1914 "ipsec inbound policy mismatch: %s, packet dropped\n", 1915 reason); 1916 IPPOL_REFRELE(ipsp); 1917 ASSERT(ira->ira_ipsec_action == NULL); 1918 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 1919 ip_drop_packet(data_mp, B_TRUE, NULL, counter, 1920 &ipss->ipsec_spd_dropper); 1921 return (NULL); 1922 } 1923 1924 /* 1925 * sleazy prefix-length-based compare. 1926 * another inlining candidate.. 1927 */ 1928 boolean_t 1929 ip_addr_match(uint8_t *addr1, int pfxlen, in6_addr_t *addr2p) 1930 { 1931 int offset = pfxlen>>3; 1932 int bitsleft = pfxlen & 7; 1933 uint8_t *addr2 = (uint8_t *)addr2p; 1934 1935 /* 1936 * and there was much evil.. 1937 * XXX should inline-expand the bcmp here and do this 32 bits 1938 * or 64 bits at a time.. 1939 */ 1940 return ((bcmp(addr1, addr2, offset) == 0) && 1941 ((bitsleft == 0) || 1942 (((addr1[offset] ^ addr2[offset]) & (0xff<<(8-bitsleft))) == 0))); 1943 } 1944 1945 static ipsec_policy_t * 1946 ipsec_find_policy_chain(ipsec_policy_t *best, ipsec_policy_t *chain, 1947 ipsec_selector_t *sel, boolean_t is_icmp_inv_acq) 1948 { 1949 ipsec_selkey_t *isel; 1950 ipsec_policy_t *p; 1951 int bpri = best ? best->ipsp_prio : 0; 1952 1953 for (p = chain; p != NULL; p = p->ipsp_hash.hash_next) { 1954 uint32_t valid; 1955 1956 if (p->ipsp_prio <= bpri) 1957 continue; 1958 isel = &p->ipsp_sel->ipsl_key; 1959 valid = isel->ipsl_valid; 1960 1961 if ((valid & IPSL_PROTOCOL) && 1962 (isel->ipsl_proto != sel->ips_protocol)) 1963 continue; 1964 1965 if ((valid & IPSL_REMOTE_ADDR) && 1966 !ip_addr_match((uint8_t *)&isel->ipsl_remote, 1967 isel->ipsl_remote_pfxlen, &sel->ips_remote_addr_v6)) 1968 continue; 1969 1970 if ((valid & IPSL_LOCAL_ADDR) && 1971 !ip_addr_match((uint8_t *)&isel->ipsl_local, 1972 isel->ipsl_local_pfxlen, &sel->ips_local_addr_v6)) 1973 continue; 1974 1975 if ((valid & IPSL_REMOTE_PORT) && 1976 isel->ipsl_rport != sel->ips_remote_port) 1977 continue; 1978 1979 if ((valid & IPSL_LOCAL_PORT) && 1980 isel->ipsl_lport != sel->ips_local_port) 1981 continue; 1982 1983 if (!is_icmp_inv_acq) { 1984 if ((valid & IPSL_ICMP_TYPE) && 1985 (isel->ipsl_icmp_type > sel->ips_icmp_type || 1986 isel->ipsl_icmp_type_end < sel->ips_icmp_type)) { 1987 continue; 1988 } 1989 1990 if ((valid & IPSL_ICMP_CODE) && 1991 (isel->ipsl_icmp_code > sel->ips_icmp_code || 1992 isel->ipsl_icmp_code_end < 1993 sel->ips_icmp_code)) { 1994 continue; 1995 } 1996 } else { 1997 /* 1998 * special case for icmp inverse acquire 1999 * we only want policies that aren't drop/pass 2000 */ 2001 if (p->ipsp_act->ipa_act.ipa_type != IPSEC_ACT_APPLY) 2002 continue; 2003 } 2004 2005 /* we matched all the packet-port-field selectors! */ 2006 best = p; 2007 bpri = p->ipsp_prio; 2008 } 2009 2010 return (best); 2011 } 2012 2013 /* 2014 * Try to find and return the best policy entry under a given policy 2015 * root for a given set of selectors; the first parameter "best" is 2016 * the current best policy so far. If "best" is non-null, we have a 2017 * reference to it. We return a reference to a policy; if that policy 2018 * is not the original "best", we need to release that reference 2019 * before returning. 2020 */ 2021 ipsec_policy_t * 2022 ipsec_find_policy_head(ipsec_policy_t *best, ipsec_policy_head_t *head, 2023 int direction, ipsec_selector_t *sel) 2024 { 2025 ipsec_policy_t *curbest; 2026 ipsec_policy_root_t *root; 2027 uint8_t is_icmp_inv_acq = sel->ips_is_icmp_inv_acq; 2028 int af = sel->ips_isv4 ? IPSEC_AF_V4 : IPSEC_AF_V6; 2029 2030 curbest = best; 2031 root = &head->iph_root[direction]; 2032 2033 #ifdef DEBUG 2034 if (is_icmp_inv_acq) { 2035 if (sel->ips_isv4) { 2036 if (sel->ips_protocol != IPPROTO_ICMP) { 2037 cmn_err(CE_WARN, "ipsec_find_policy_head:" 2038 " expecting icmp, got %d", 2039 sel->ips_protocol); 2040 } 2041 } else { 2042 if (sel->ips_protocol != IPPROTO_ICMPV6) { 2043 cmn_err(CE_WARN, "ipsec_find_policy_head:" 2044 " expecting icmpv6, got %d", 2045 sel->ips_protocol); 2046 } 2047 } 2048 } 2049 #endif 2050 2051 rw_enter(&head->iph_lock, RW_READER); 2052 2053 if (root->ipr_nchains > 0) { 2054 curbest = ipsec_find_policy_chain(curbest, 2055 root->ipr_hash[selector_hash(sel, root)].hash_head, sel, 2056 is_icmp_inv_acq); 2057 } 2058 curbest = ipsec_find_policy_chain(curbest, root->ipr_nonhash[af], sel, 2059 is_icmp_inv_acq); 2060 2061 /* 2062 * Adjust reference counts if we found anything new. 2063 */ 2064 if (curbest != best) { 2065 ASSERT(curbest != NULL); 2066 IPPOL_REFHOLD(curbest); 2067 2068 if (best != NULL) { 2069 IPPOL_REFRELE(best); 2070 } 2071 } 2072 2073 rw_exit(&head->iph_lock); 2074 2075 return (curbest); 2076 } 2077 2078 /* 2079 * Find the best system policy (either global or per-interface) which 2080 * applies to the given selector; look in all the relevant policy roots 2081 * to figure out which policy wins. 2082 * 2083 * Returns a reference to a policy; caller must release this 2084 * reference when done. 2085 */ 2086 ipsec_policy_t * 2087 ipsec_find_policy(int direction, const conn_t *connp, ipsec_selector_t *sel, 2088 netstack_t *ns) 2089 { 2090 ipsec_policy_t *p; 2091 ipsec_stack_t *ipss = ns->netstack_ipsec; 2092 2093 p = ipsec_find_policy_head(NULL, &ipss->ipsec_system_policy, 2094 direction, sel); 2095 if ((connp != NULL) && (connp->conn_policy != NULL)) { 2096 p = ipsec_find_policy_head(p, connp->conn_policy, 2097 direction, sel); 2098 } 2099 2100 return (p); 2101 } 2102 2103 /* 2104 * Check with global policy and see whether this inbound 2105 * packet meets the policy constraints. 2106 * 2107 * Locate appropriate policy from global policy, supplemented by the 2108 * conn's configured and/or cached policy if the conn is supplied. 2109 * 2110 * Dispatch to ipsec_check_ipsecin_policy if we have policy and an 2111 * encrypted packet to see if they match. 2112 * 2113 * Otherwise, see if the policy allows cleartext; if not, drop it on the 2114 * floor. 2115 */ 2116 mblk_t * 2117 ipsec_check_global_policy(mblk_t *data_mp, conn_t *connp, 2118 ipha_t *ipha, ip6_t *ip6h, ip_recv_attr_t *ira, netstack_t *ns) 2119 { 2120 ipsec_policy_t *p; 2121 ipsec_selector_t sel; 2122 boolean_t policy_present; 2123 kstat_named_t *counter; 2124 uint64_t pkt_unique; 2125 ip_stack_t *ipst = ns->netstack_ip; 2126 ipsec_stack_t *ipss = ns->netstack_ipsec; 2127 2128 sel.ips_is_icmp_inv_acq = 0; 2129 2130 ASSERT((ipha == NULL && ip6h != NULL) || 2131 (ip6h == NULL && ipha != NULL)); 2132 2133 if (ipha != NULL) 2134 policy_present = ipss->ipsec_inbound_v4_policy_present; 2135 else 2136 policy_present = ipss->ipsec_inbound_v6_policy_present; 2137 2138 if (!policy_present && connp == NULL) { 2139 /* 2140 * No global policy and no per-socket policy; 2141 * just pass it back (but we shouldn't get here in that case) 2142 */ 2143 return (data_mp); 2144 } 2145 2146 /* 2147 * If we have cached policy, use it. 2148 * Otherwise consult system policy. 2149 */ 2150 if ((connp != NULL) && (connp->conn_latch != NULL)) { 2151 p = connp->conn_latch_in_policy; 2152 if (p != NULL) { 2153 IPPOL_REFHOLD(p); 2154 } 2155 /* 2156 * Fudge sel for UNIQUE_ID setting below. 2157 */ 2158 pkt_unique = conn_to_unique(connp, data_mp, ipha, ip6h); 2159 } else { 2160 /* Initialize the ports in the selector */ 2161 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, 2162 SEL_NONE) == SELRET_NOMEM) { 2163 /* 2164 * Technically not a policy mismatch, but it is 2165 * an internal failure. 2166 */ 2167 ipsec_log_policy_failure(IPSEC_POLICY_MISMATCH, 2168 "ipsec_init_inbound_sel", ipha, ip6h, B_TRUE, ns); 2169 counter = DROPPER(ipss, ipds_spd_nomem); 2170 goto fail; 2171 } 2172 2173 /* 2174 * Find the policy which best applies. 2175 * 2176 * If we find global policy, we should look at both 2177 * local policy and global policy and see which is 2178 * stronger and match accordingly. 2179 * 2180 * If we don't find a global policy, check with 2181 * local policy alone. 2182 */ 2183 2184 p = ipsec_find_policy(IPSEC_TYPE_INBOUND, connp, &sel, ns); 2185 pkt_unique = SA_UNIQUE_ID(sel.ips_remote_port, 2186 sel.ips_local_port, sel.ips_protocol, 0); 2187 } 2188 2189 if (p == NULL) { 2190 if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) { 2191 /* 2192 * We have no policy; default to succeeding. 2193 * XXX paranoid system design doesn't do this. 2194 */ 2195 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2196 return (data_mp); 2197 } else { 2198 counter = DROPPER(ipss, ipds_spd_got_secure); 2199 ipsec_log_policy_failure(IPSEC_POLICY_NOT_NEEDED, 2200 "ipsec_check_global_policy", ipha, ip6h, B_TRUE, 2201 ns); 2202 goto fail; 2203 } 2204 } 2205 if (ira->ira_flags & IRAF_IPSEC_SECURE) { 2206 return (ipsec_check_ipsecin_policy(data_mp, p, ipha, ip6h, 2207 pkt_unique, ira, ns)); 2208 } 2209 if (p->ipsp_act->ipa_allow_clear) { 2210 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2211 IPPOL_REFRELE(p); 2212 return (data_mp); 2213 } 2214 IPPOL_REFRELE(p); 2215 /* 2216 * If we reach here, we will drop the packet because it failed the 2217 * global policy check because the packet was cleartext, and it 2218 * should not have been. 2219 */ 2220 ipsec_log_policy_failure(IPSEC_POLICY_MISMATCH, 2221 "ipsec_check_global_policy", ipha, ip6h, B_FALSE, ns); 2222 counter = DROPPER(ipss, ipds_spd_got_clear); 2223 2224 fail: 2225 ip_drop_packet(data_mp, B_TRUE, NULL, counter, 2226 &ipss->ipsec_spd_dropper); 2227 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2228 return (NULL); 2229 } 2230 2231 /* 2232 * We check whether an inbound datagram is a valid one 2233 * to accept in clear. If it is secure, it is the job 2234 * of IPSEC to log information appropriately if it 2235 * suspects that it may not be the real one. 2236 * 2237 * It is called only while fanning out to the ULP 2238 * where ULP accepts only secure data and the incoming 2239 * is clear. Usually we never accept clear datagrams in 2240 * such cases. ICMP is the only exception. 2241 * 2242 * NOTE : We don't call this function if the client (ULP) 2243 * is willing to accept things in clear. 2244 */ 2245 boolean_t 2246 ipsec_inbound_accept_clear(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h) 2247 { 2248 ushort_t iph_hdr_length; 2249 icmph_t *icmph; 2250 icmp6_t *icmp6; 2251 uint8_t *nexthdrp; 2252 2253 ASSERT((ipha != NULL && ip6h == NULL) || 2254 (ipha == NULL && ip6h != NULL)); 2255 2256 if (ip6h != NULL) { 2257 iph_hdr_length = ip_hdr_length_v6(mp, ip6h); 2258 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, 2259 &nexthdrp)) { 2260 return (B_FALSE); 2261 } 2262 if (*nexthdrp != IPPROTO_ICMPV6) 2263 return (B_FALSE); 2264 icmp6 = (icmp6_t *)(&mp->b_rptr[iph_hdr_length]); 2265 /* Match IPv6 ICMP policy as closely as IPv4 as possible. */ 2266 switch (icmp6->icmp6_type) { 2267 case ICMP6_PARAM_PROB: 2268 /* Corresponds to port/proto unreach in IPv4. */ 2269 case ICMP6_ECHO_REQUEST: 2270 /* Just like IPv4. */ 2271 return (B_FALSE); 2272 2273 case MLD_LISTENER_QUERY: 2274 case MLD_LISTENER_REPORT: 2275 case MLD_LISTENER_REDUCTION: 2276 /* 2277 * XXX Seperate NDD in IPv4 what about here? 2278 * Plus, mcast is important to ND. 2279 */ 2280 case ICMP6_DST_UNREACH: 2281 /* Corresponds to HOST/NET unreachable in IPv4. */ 2282 case ICMP6_PACKET_TOO_BIG: 2283 case ICMP6_ECHO_REPLY: 2284 /* These are trusted in IPv4. */ 2285 case ND_ROUTER_SOLICIT: 2286 case ND_ROUTER_ADVERT: 2287 case ND_NEIGHBOR_SOLICIT: 2288 case ND_NEIGHBOR_ADVERT: 2289 case ND_REDIRECT: 2290 /* Trust ND messages for now. */ 2291 case ICMP6_TIME_EXCEEDED: 2292 default: 2293 return (B_TRUE); 2294 } 2295 } else { 2296 /* 2297 * If it is not ICMP, fail this request. 2298 */ 2299 if (ipha->ipha_protocol != IPPROTO_ICMP) { 2300 #ifdef FRAGCACHE_DEBUG 2301 cmn_err(CE_WARN, "Dropping - ipha_proto = %d\n", 2302 ipha->ipha_protocol); 2303 #endif 2304 return (B_FALSE); 2305 } 2306 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2307 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 2308 /* 2309 * It is an insecure icmp message. Check to see whether we are 2310 * willing to accept this one. 2311 */ 2312 2313 switch (icmph->icmph_type) { 2314 case ICMP_ECHO_REPLY: 2315 case ICMP_TIME_STAMP_REPLY: 2316 case ICMP_INFO_REPLY: 2317 case ICMP_ROUTER_ADVERTISEMENT: 2318 /* 2319 * We should not encourage clear replies if this 2320 * client expects secure. If somebody is replying 2321 * in clear some mailicious user watching both the 2322 * request and reply, can do chosen-plain-text attacks. 2323 * With global policy we might be just expecting secure 2324 * but sending out clear. We don't know what the right 2325 * thing is. We can't do much here as we can't control 2326 * the sender here. Till we are sure of what to do, 2327 * accept them. 2328 */ 2329 return (B_TRUE); 2330 case ICMP_ECHO_REQUEST: 2331 case ICMP_TIME_STAMP_REQUEST: 2332 case ICMP_INFO_REQUEST: 2333 case ICMP_ADDRESS_MASK_REQUEST: 2334 case ICMP_ROUTER_SOLICITATION: 2335 case ICMP_ADDRESS_MASK_REPLY: 2336 /* 2337 * Don't accept this as somebody could be sending 2338 * us plain text to get encrypted data. If we reply, 2339 * it will lead to chosen plain text attack. 2340 */ 2341 return (B_FALSE); 2342 case ICMP_DEST_UNREACHABLE: 2343 switch (icmph->icmph_code) { 2344 case ICMP_FRAGMENTATION_NEEDED: 2345 /* 2346 * Be in sync with icmp_inbound, where we have 2347 * already set dce_pmtu 2348 */ 2349 #ifdef FRAGCACHE_DEBUG 2350 cmn_err(CE_WARN, "ICMP frag needed\n"); 2351 #endif 2352 return (B_TRUE); 2353 case ICMP_HOST_UNREACHABLE: 2354 case ICMP_NET_UNREACHABLE: 2355 /* 2356 * By accepting, we could reset a connection. 2357 * How do we solve the problem of some 2358 * intermediate router sending in-secure ICMP 2359 * messages ? 2360 */ 2361 return (B_TRUE); 2362 case ICMP_PORT_UNREACHABLE: 2363 case ICMP_PROTOCOL_UNREACHABLE: 2364 default : 2365 return (B_FALSE); 2366 } 2367 case ICMP_SOURCE_QUENCH: 2368 /* 2369 * If this is an attack, TCP will slow start 2370 * because of this. Is it very harmful ? 2371 */ 2372 return (B_TRUE); 2373 case ICMP_PARAM_PROBLEM: 2374 return (B_FALSE); 2375 case ICMP_TIME_EXCEEDED: 2376 return (B_TRUE); 2377 case ICMP_REDIRECT: 2378 return (B_FALSE); 2379 default : 2380 return (B_FALSE); 2381 } 2382 } 2383 } 2384 2385 void 2386 ipsec_latch_ids(ipsec_latch_t *ipl, ipsid_t *local, ipsid_t *remote) 2387 { 2388 mutex_enter(&ipl->ipl_lock); 2389 2390 if (ipl->ipl_ids_latched) { 2391 /* I lost, someone else got here before me */ 2392 mutex_exit(&ipl->ipl_lock); 2393 return; 2394 } 2395 2396 if (local != NULL) 2397 IPSID_REFHOLD(local); 2398 if (remote != NULL) 2399 IPSID_REFHOLD(remote); 2400 2401 ipl->ipl_local_cid = local; 2402 ipl->ipl_remote_cid = remote; 2403 ipl->ipl_ids_latched = B_TRUE; 2404 mutex_exit(&ipl->ipl_lock); 2405 } 2406 2407 void 2408 ipsec_latch_inbound(conn_t *connp, ip_recv_attr_t *ira) 2409 { 2410 ipsa_t *sa; 2411 ipsec_latch_t *ipl = connp->conn_latch; 2412 2413 if (!ipl->ipl_ids_latched) { 2414 ipsid_t *local = NULL; 2415 ipsid_t *remote = NULL; 2416 2417 if (!(ira->ira_flags & IRAF_LOOPBACK)) { 2418 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 2419 if (ira->ira_ipsec_esp_sa != NULL) 2420 sa = ira->ira_ipsec_esp_sa; 2421 else 2422 sa = ira->ira_ipsec_ah_sa; 2423 ASSERT(sa != NULL); 2424 local = sa->ipsa_dst_cid; 2425 remote = sa->ipsa_src_cid; 2426 } 2427 ipsec_latch_ids(ipl, local, remote); 2428 } 2429 if (ira->ira_flags & IRAF_IPSEC_SECURE) { 2430 if (connp->conn_latch_in_action != NULL) { 2431 /* 2432 * Previously cached action. This is probably 2433 * harmless, but in DEBUG kernels, check for 2434 * action equality. 2435 * 2436 * Preserve the existing action to preserve latch 2437 * invariance. 2438 */ 2439 ASSERT(connp->conn_latch_in_action == 2440 ira->ira_ipsec_action); 2441 return; 2442 } 2443 connp->conn_latch_in_action = ira->ira_ipsec_action; 2444 IPACT_REFHOLD(connp->conn_latch_in_action); 2445 } 2446 } 2447 2448 /* 2449 * Check whether the policy constraints are met either for an 2450 * inbound datagram; called from IP in numerous places. 2451 * 2452 * Note that this is not a chokepoint for inbound policy checks; 2453 * see also ipsec_check_ipsecin_latch() and ipsec_check_global_policy() 2454 */ 2455 mblk_t * 2456 ipsec_check_inbound_policy(mblk_t *mp, conn_t *connp, 2457 ipha_t *ipha, ip6_t *ip6h, ip_recv_attr_t *ira) 2458 { 2459 boolean_t ret; 2460 ipsec_latch_t *ipl; 2461 ipsec_action_t *ap; 2462 uint64_t unique_id; 2463 ipsec_stack_t *ipss; 2464 ip_stack_t *ipst; 2465 netstack_t *ns; 2466 ipsec_policy_head_t *policy_head; 2467 ipsec_policy_t *p = NULL; 2468 2469 ASSERT(connp != NULL); 2470 ns = connp->conn_netstack; 2471 ipss = ns->netstack_ipsec; 2472 ipst = ns->netstack_ip; 2473 2474 if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) { 2475 /* 2476 * This is the case where the incoming datagram is 2477 * cleartext and we need to see whether this client 2478 * would like to receive such untrustworthy things from 2479 * the wire. 2480 */ 2481 ASSERT(mp != NULL); 2482 2483 mutex_enter(&connp->conn_lock); 2484 if (connp->conn_state_flags & CONN_CONDEMNED) { 2485 mutex_exit(&connp->conn_lock); 2486 ip_drop_packet(mp, B_TRUE, NULL, 2487 DROPPER(ipss, ipds_spd_got_clear), 2488 &ipss->ipsec_spd_dropper); 2489 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2490 return (NULL); 2491 } 2492 if (connp->conn_latch != NULL) { 2493 /* Hold a reference in case the conn is closing */ 2494 p = connp->conn_latch_in_policy; 2495 if (p != NULL) 2496 IPPOL_REFHOLD(p); 2497 mutex_exit(&connp->conn_lock); 2498 /* 2499 * Policy is cached in the conn. 2500 */ 2501 if (p != NULL && !p->ipsp_act->ipa_allow_clear) { 2502 ret = ipsec_inbound_accept_clear(mp, 2503 ipha, ip6h); 2504 if (ret) { 2505 BUMP_MIB(&ipst->ips_ip_mib, 2506 ipsecInSucceeded); 2507 IPPOL_REFRELE(p); 2508 return (mp); 2509 } else { 2510 ipsec_log_policy_failure( 2511 IPSEC_POLICY_MISMATCH, 2512 "ipsec_check_inbound_policy", ipha, 2513 ip6h, B_FALSE, ns); 2514 ip_drop_packet(mp, B_TRUE, NULL, 2515 DROPPER(ipss, ipds_spd_got_clear), 2516 &ipss->ipsec_spd_dropper); 2517 BUMP_MIB(&ipst->ips_ip_mib, 2518 ipsecInFailed); 2519 IPPOL_REFRELE(p); 2520 return (NULL); 2521 } 2522 } else { 2523 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2524 if (p != NULL) 2525 IPPOL_REFRELE(p); 2526 return (mp); 2527 } 2528 } else { 2529 policy_head = connp->conn_policy; 2530 2531 /* Hold a reference in case the conn is closing */ 2532 if (policy_head != NULL) 2533 IPPH_REFHOLD(policy_head); 2534 mutex_exit(&connp->conn_lock); 2535 /* 2536 * As this is a non-hardbound connection we need 2537 * to look at both per-socket policy and global 2538 * policy. 2539 */ 2540 mp = ipsec_check_global_policy(mp, connp, 2541 ipha, ip6h, ira, ns); 2542 if (policy_head != NULL) 2543 IPPH_REFRELE(policy_head, ns); 2544 return (mp); 2545 } 2546 } 2547 2548 mutex_enter(&connp->conn_lock); 2549 /* Connection is closing */ 2550 if (connp->conn_state_flags & CONN_CONDEMNED) { 2551 mutex_exit(&connp->conn_lock); 2552 ip_drop_packet(mp, B_TRUE, NULL, 2553 DROPPER(ipss, ipds_spd_got_clear), 2554 &ipss->ipsec_spd_dropper); 2555 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2556 return (NULL); 2557 } 2558 2559 /* 2560 * Once a connection is latched it remains so for life, the conn_latch 2561 * pointer on the conn has not changed, simply initializing ipl here 2562 * as the earlier initialization was done only in the cleartext case. 2563 */ 2564 if ((ipl = connp->conn_latch) == NULL) { 2565 mblk_t *retmp; 2566 policy_head = connp->conn_policy; 2567 2568 /* Hold a reference in case the conn is closing */ 2569 if (policy_head != NULL) 2570 IPPH_REFHOLD(policy_head); 2571 mutex_exit(&connp->conn_lock); 2572 /* 2573 * We don't have policies cached in the conn 2574 * for this stream. So, look at the global 2575 * policy. It will check against conn or global 2576 * depending on whichever is stronger. 2577 */ 2578 retmp = ipsec_check_global_policy(mp, connp, 2579 ipha, ip6h, ira, ns); 2580 if (policy_head != NULL) 2581 IPPH_REFRELE(policy_head, ns); 2582 return (retmp); 2583 } 2584 2585 IPLATCH_REFHOLD(ipl); 2586 /* Hold reference on conn_latch_in_action in case conn is closing */ 2587 ap = connp->conn_latch_in_action; 2588 if (ap != NULL) 2589 IPACT_REFHOLD(ap); 2590 mutex_exit(&connp->conn_lock); 2591 2592 if (ap != NULL) { 2593 /* Policy is cached & latched; fast(er) path */ 2594 const char *reason; 2595 kstat_named_t *counter; 2596 2597 if (ipsec_check_ipsecin_latch(ira, mp, ipl, ap, 2598 ipha, ip6h, &reason, &counter, connp, ns)) { 2599 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2600 IPLATCH_REFRELE(ipl); 2601 IPACT_REFRELE(ap); 2602 return (mp); 2603 } 2604 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, 2605 SL_ERROR|SL_WARN|SL_CONSOLE, 2606 "ipsec inbound policy mismatch: %s, packet dropped\n", 2607 reason); 2608 ip_drop_packet(mp, B_TRUE, NULL, counter, 2609 &ipss->ipsec_spd_dropper); 2610 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2611 IPLATCH_REFRELE(ipl); 2612 IPACT_REFRELE(ap); 2613 return (NULL); 2614 } 2615 if ((p = connp->conn_latch_in_policy) == NULL) { 2616 ipsec_weird_null_inbound_policy++; 2617 IPLATCH_REFRELE(ipl); 2618 return (mp); 2619 } 2620 2621 unique_id = conn_to_unique(connp, mp, ipha, ip6h); 2622 IPPOL_REFHOLD(p); 2623 mp = ipsec_check_ipsecin_policy(mp, p, ipha, ip6h, unique_id, ira, ns); 2624 /* 2625 * NOTE: ipsecIn{Failed,Succeeeded} bumped by 2626 * ipsec_check_ipsecin_policy(). 2627 */ 2628 if (mp != NULL) 2629 ipsec_latch_inbound(connp, ira); 2630 IPLATCH_REFRELE(ipl); 2631 return (mp); 2632 } 2633 2634 /* 2635 * Handle all sorts of cases like tunnel-mode and ICMP. 2636 */ 2637 static int 2638 prepended_length(mblk_t *mp, uintptr_t hptr) 2639 { 2640 int rc = 0; 2641 2642 while (mp != NULL) { 2643 if (hptr >= (uintptr_t)mp->b_rptr && hptr < 2644 (uintptr_t)mp->b_wptr) { 2645 rc += (int)(hptr - (uintptr_t)mp->b_rptr); 2646 break; /* out of while loop */ 2647 } 2648 rc += (int)MBLKL(mp); 2649 mp = mp->b_cont; 2650 } 2651 2652 if (mp == NULL) { 2653 /* 2654 * IF (big IF) we make it here by naturally exiting the loop, 2655 * then ip6h isn't in the mblk chain "mp" at all. 2656 * 2657 * The only case where this happens is with a reversed IP 2658 * header that gets passed up by inbound ICMP processing. 2659 * This unfortunately triggers longstanding bug 6478464. For 2660 * now, just pass up 0 for the answer. 2661 */ 2662 #ifdef DEBUG_NOT_UNTIL_6478464 2663 ASSERT(mp != NULL); 2664 #endif 2665 rc = 0; 2666 } 2667 2668 return (rc); 2669 } 2670 2671 /* 2672 * Returns: 2673 * 2674 * SELRET_NOMEM --> msgpullup() needed to gather things failed. 2675 * SELRET_BADPKT --> If we're being called after tunnel-mode fragment 2676 * gathering, the initial fragment is too short for 2677 * useful data. Only returned if SEL_TUNNEL_FIRSTFRAG is 2678 * set. 2679 * SELRET_SUCCESS --> "sel" now has initialized IPsec selector data. 2680 * SELRET_TUNFRAG --> This is a fragment in a tunnel-mode packet. Caller 2681 * should put this packet in a fragment-gathering queue. 2682 * Only returned if SEL_TUNNEL_MODE and SEL_PORT_POLICY 2683 * is set. 2684 * 2685 * Note that ipha/ip6h can be in a different mblk (mp->b_cont) in the case 2686 * of tunneled packets. 2687 * Also, mp->b_rptr can be an ICMP error where ipha/ip6h is the packet in 2688 * error past the ICMP error. 2689 */ 2690 static selret_t 2691 ipsec_init_inbound_sel(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha, 2692 ip6_t *ip6h, uint8_t sel_flags) 2693 { 2694 uint16_t *ports; 2695 int outer_hdr_len = 0; /* For ICMP or tunnel-mode cases... */ 2696 ushort_t hdr_len; 2697 mblk_t *spare_mp = NULL; 2698 uint8_t *nexthdrp, *transportp; 2699 uint8_t nexthdr; 2700 uint8_t icmp_proto; 2701 ip_pkt_t ipp; 2702 boolean_t port_policy_present = (sel_flags & SEL_PORT_POLICY); 2703 boolean_t is_icmp = (sel_flags & SEL_IS_ICMP); 2704 boolean_t tunnel_mode = (sel_flags & SEL_TUNNEL_MODE); 2705 boolean_t post_frag = (sel_flags & SEL_POST_FRAG); 2706 2707 ASSERT((ipha == NULL && ip6h != NULL) || 2708 (ipha != NULL && ip6h == NULL)); 2709 2710 if (ip6h != NULL) { 2711 outer_hdr_len = prepended_length(mp, (uintptr_t)ip6h); 2712 nexthdr = ip6h->ip6_nxt; 2713 icmp_proto = IPPROTO_ICMPV6; 2714 sel->ips_isv4 = B_FALSE; 2715 sel->ips_local_addr_v6 = ip6h->ip6_dst; 2716 sel->ips_remote_addr_v6 = ip6h->ip6_src; 2717 2718 bzero(&ipp, sizeof (ipp)); 2719 2720 switch (nexthdr) { 2721 case IPPROTO_HOPOPTS: 2722 case IPPROTO_ROUTING: 2723 case IPPROTO_DSTOPTS: 2724 case IPPROTO_FRAGMENT: 2725 /* 2726 * Use ip_hdr_length_nexthdr_v6(). And have a spare 2727 * mblk that's contiguous to feed it 2728 */ 2729 if ((spare_mp = msgpullup(mp, -1)) == NULL) 2730 return (SELRET_NOMEM); 2731 if (!ip_hdr_length_nexthdr_v6(spare_mp, 2732 (ip6_t *)(spare_mp->b_rptr + outer_hdr_len), 2733 &hdr_len, &nexthdrp)) { 2734 /* Malformed packet - caller frees. */ 2735 ipsec_freemsg_chain(spare_mp); 2736 return (SELRET_BADPKT); 2737 } 2738 /* Repopulate now that we have the whole packet */ 2739 ip6h = (ip6_t *)(spare_mp->b_rptr + outer_hdr_len); 2740 (void) ip_find_hdr_v6(spare_mp, ip6h, B_FALSE, &ipp, 2741 NULL); 2742 nexthdr = *nexthdrp; 2743 /* We can just extract based on hdr_len now. */ 2744 break; 2745 default: 2746 (void) ip_find_hdr_v6(mp, ip6h, B_FALSE, &ipp, NULL); 2747 hdr_len = IPV6_HDR_LEN; 2748 break; 2749 } 2750 if (port_policy_present && IS_V6_FRAGMENT(ipp) && !is_icmp) { 2751 /* IPv6 Fragment */ 2752 ipsec_freemsg_chain(spare_mp); 2753 return (SELRET_TUNFRAG); 2754 } 2755 transportp = (uint8_t *)ip6h + hdr_len; 2756 } else { 2757 outer_hdr_len = prepended_length(mp, (uintptr_t)ipha); 2758 icmp_proto = IPPROTO_ICMP; 2759 sel->ips_isv4 = B_TRUE; 2760 sel->ips_local_addr_v4 = ipha->ipha_dst; 2761 sel->ips_remote_addr_v4 = ipha->ipha_src; 2762 nexthdr = ipha->ipha_protocol; 2763 hdr_len = IPH_HDR_LENGTH(ipha); 2764 2765 if (port_policy_present && 2766 IS_V4_FRAGMENT(ipha->ipha_fragment_offset_and_flags) && 2767 !is_icmp) { 2768 /* IPv4 Fragment */ 2769 ipsec_freemsg_chain(spare_mp); 2770 return (SELRET_TUNFRAG); 2771 } 2772 transportp = (uint8_t *)ipha + hdr_len; 2773 } 2774 sel->ips_protocol = nexthdr; 2775 2776 if ((nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP && 2777 nexthdr != IPPROTO_SCTP && nexthdr != icmp_proto) || 2778 (!port_policy_present && !post_frag && tunnel_mode)) { 2779 sel->ips_remote_port = sel->ips_local_port = 0; 2780 ipsec_freemsg_chain(spare_mp); 2781 return (SELRET_SUCCESS); 2782 } 2783 2784 if (transportp + 4 > mp->b_wptr) { 2785 /* If we didn't pullup a copy already, do so now. */ 2786 /* 2787 * XXX performance, will upper-layers frequently split TCP/UDP 2788 * apart from IP or options? If so, perhaps we should revisit 2789 * the spare_mp strategy. 2790 */ 2791 ipsec_hdr_pullup_needed++; 2792 if (spare_mp == NULL && 2793 (spare_mp = msgpullup(mp, -1)) == NULL) { 2794 return (SELRET_NOMEM); 2795 } 2796 transportp = &spare_mp->b_rptr[hdr_len + outer_hdr_len]; 2797 } 2798 2799 if (nexthdr == icmp_proto) { 2800 sel->ips_icmp_type = *transportp++; 2801 sel->ips_icmp_code = *transportp; 2802 sel->ips_remote_port = sel->ips_local_port = 0; 2803 } else { 2804 ports = (uint16_t *)transportp; 2805 sel->ips_remote_port = *ports++; 2806 sel->ips_local_port = *ports; 2807 } 2808 ipsec_freemsg_chain(spare_mp); 2809 return (SELRET_SUCCESS); 2810 } 2811 2812 /* 2813 * This is called with a b_next chain of messages from the fragcache code, 2814 * hence it needs to discard a chain on error. 2815 */ 2816 static boolean_t 2817 ipsec_init_outbound_ports(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha, 2818 ip6_t *ip6h, int outer_hdr_len, ipsec_stack_t *ipss) 2819 { 2820 /* 2821 * XXX cut&paste shared with ipsec_init_inbound_sel 2822 */ 2823 uint16_t *ports; 2824 ushort_t hdr_len; 2825 mblk_t *spare_mp = NULL; 2826 uint8_t *nexthdrp; 2827 uint8_t nexthdr; 2828 uint8_t *typecode; 2829 uint8_t check_proto; 2830 2831 ASSERT((ipha == NULL && ip6h != NULL) || 2832 (ipha != NULL && ip6h == NULL)); 2833 2834 if (ip6h != NULL) { 2835 check_proto = IPPROTO_ICMPV6; 2836 nexthdr = ip6h->ip6_nxt; 2837 switch (nexthdr) { 2838 case IPPROTO_HOPOPTS: 2839 case IPPROTO_ROUTING: 2840 case IPPROTO_DSTOPTS: 2841 case IPPROTO_FRAGMENT: 2842 /* 2843 * Use ip_hdr_length_nexthdr_v6(). And have a spare 2844 * mblk that's contiguous to feed it 2845 */ 2846 spare_mp = msgpullup(mp, -1); 2847 if (spare_mp == NULL || 2848 !ip_hdr_length_nexthdr_v6(spare_mp, 2849 (ip6_t *)(spare_mp->b_rptr + outer_hdr_len), 2850 &hdr_len, &nexthdrp)) { 2851 /* Always works, even if NULL. */ 2852 ipsec_freemsg_chain(spare_mp); 2853 ip_drop_packet_chain(mp, B_FALSE, NULL, 2854 DROPPER(ipss, ipds_spd_nomem), 2855 &ipss->ipsec_spd_dropper); 2856 return (B_FALSE); 2857 } else { 2858 nexthdr = *nexthdrp; 2859 /* We can just extract based on hdr_len now. */ 2860 } 2861 break; 2862 default: 2863 hdr_len = IPV6_HDR_LEN; 2864 break; 2865 } 2866 } else { 2867 check_proto = IPPROTO_ICMP; 2868 hdr_len = IPH_HDR_LENGTH(ipha); 2869 nexthdr = ipha->ipha_protocol; 2870 } 2871 2872 sel->ips_protocol = nexthdr; 2873 if (nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP && 2874 nexthdr != IPPROTO_SCTP && nexthdr != check_proto) { 2875 sel->ips_local_port = sel->ips_remote_port = 0; 2876 ipsec_freemsg_chain(spare_mp); /* Always works, even if NULL */ 2877 return (B_TRUE); 2878 } 2879 2880 if (&mp->b_rptr[hdr_len] + 4 + outer_hdr_len > mp->b_wptr) { 2881 /* If we didn't pullup a copy already, do so now. */ 2882 /* 2883 * XXX performance, will upper-layers frequently split TCP/UDP 2884 * apart from IP or options? If so, perhaps we should revisit 2885 * the spare_mp strategy. 2886 * 2887 * XXX should this be msgpullup(mp, hdr_len+4) ??? 2888 */ 2889 if (spare_mp == NULL && 2890 (spare_mp = msgpullup(mp, -1)) == NULL) { 2891 ip_drop_packet_chain(mp, B_FALSE, NULL, 2892 DROPPER(ipss, ipds_spd_nomem), 2893 &ipss->ipsec_spd_dropper); 2894 return (B_FALSE); 2895 } 2896 ports = (uint16_t *)&spare_mp->b_rptr[hdr_len + outer_hdr_len]; 2897 } else { 2898 ports = (uint16_t *)&mp->b_rptr[hdr_len + outer_hdr_len]; 2899 } 2900 2901 if (nexthdr == check_proto) { 2902 typecode = (uint8_t *)ports; 2903 sel->ips_icmp_type = *typecode++; 2904 sel->ips_icmp_code = *typecode; 2905 sel->ips_remote_port = sel->ips_local_port = 0; 2906 } else { 2907 sel->ips_local_port = *ports++; 2908 sel->ips_remote_port = *ports; 2909 } 2910 ipsec_freemsg_chain(spare_mp); /* Always works, even if NULL */ 2911 return (B_TRUE); 2912 } 2913 2914 /* 2915 * Prepend an mblk with a ipsec_crypto_t to the message chain. 2916 * Frees the argument and returns NULL should the allocation fail. 2917 * Returns the pointer to the crypto data part. 2918 */ 2919 mblk_t * 2920 ipsec_add_crypto_data(mblk_t *data_mp, ipsec_crypto_t **icp) 2921 { 2922 mblk_t *mp; 2923 2924 mp = allocb(sizeof (ipsec_crypto_t), BPRI_MED); 2925 if (mp == NULL) { 2926 freemsg(data_mp); 2927 return (NULL); 2928 } 2929 bzero(mp->b_rptr, sizeof (ipsec_crypto_t)); 2930 mp->b_wptr += sizeof (ipsec_crypto_t); 2931 mp->b_cont = data_mp; 2932 mp->b_datap->db_type = M_EVENT; /* For ASSERT */ 2933 *icp = (ipsec_crypto_t *)mp->b_rptr; 2934 return (mp); 2935 } 2936 2937 /* 2938 * Remove what was prepended above. Return b_cont and a pointer to the 2939 * crypto data. 2940 * The caller must call ipsec_free_crypto_data for mblk once it is done 2941 * with the crypto data. 2942 */ 2943 mblk_t * 2944 ipsec_remove_crypto_data(mblk_t *crypto_mp, ipsec_crypto_t **icp) 2945 { 2946 ASSERT(crypto_mp->b_datap->db_type == M_EVENT); 2947 ASSERT(MBLKL(crypto_mp) == sizeof (ipsec_crypto_t)); 2948 2949 *icp = (ipsec_crypto_t *)crypto_mp->b_rptr; 2950 return (crypto_mp->b_cont); 2951 } 2952 2953 /* 2954 * Free what was prepended above. Return b_cont. 2955 */ 2956 mblk_t * 2957 ipsec_free_crypto_data(mblk_t *crypto_mp) 2958 { 2959 mblk_t *mp; 2960 2961 ASSERT(crypto_mp->b_datap->db_type == M_EVENT); 2962 ASSERT(MBLKL(crypto_mp) == sizeof (ipsec_crypto_t)); 2963 2964 mp = crypto_mp->b_cont; 2965 freeb(crypto_mp); 2966 return (mp); 2967 } 2968 2969 /* 2970 * Create an ipsec_action_t based on the way an inbound packet was protected. 2971 * Used to reflect traffic back to a sender. 2972 * 2973 * We don't bother interning the action into the hash table. 2974 */ 2975 ipsec_action_t * 2976 ipsec_in_to_out_action(ip_recv_attr_t *ira) 2977 { 2978 ipsa_t *ah_assoc, *esp_assoc; 2979 uint_t auth_alg = 0, encr_alg = 0, espa_alg = 0; 2980 ipsec_action_t *ap; 2981 boolean_t unique; 2982 2983 ap = kmem_cache_alloc(ipsec_action_cache, KM_NOSLEEP); 2984 2985 if (ap == NULL) 2986 return (NULL); 2987 2988 bzero(ap, sizeof (*ap)); 2989 HASH_NULL(ap, ipa_hash); 2990 ap->ipa_next = NULL; 2991 ap->ipa_refs = 1; 2992 2993 /* 2994 * Get the algorithms that were used for this packet. 2995 */ 2996 ap->ipa_act.ipa_type = IPSEC_ACT_APPLY; 2997 ap->ipa_act.ipa_log = 0; 2998 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 2999 3000 ah_assoc = ira->ira_ipsec_ah_sa; 3001 ap->ipa_act.ipa_apply.ipp_use_ah = (ah_assoc != NULL); 3002 3003 esp_assoc = ira->ira_ipsec_esp_sa; 3004 ap->ipa_act.ipa_apply.ipp_use_esp = (esp_assoc != NULL); 3005 3006 if (esp_assoc != NULL) { 3007 encr_alg = esp_assoc->ipsa_encr_alg; 3008 espa_alg = esp_assoc->ipsa_auth_alg; 3009 ap->ipa_act.ipa_apply.ipp_use_espa = (espa_alg != 0); 3010 } 3011 if (ah_assoc != NULL) 3012 auth_alg = ah_assoc->ipsa_auth_alg; 3013 3014 ap->ipa_act.ipa_apply.ipp_encr_alg = (uint8_t)encr_alg; 3015 ap->ipa_act.ipa_apply.ipp_auth_alg = (uint8_t)auth_alg; 3016 ap->ipa_act.ipa_apply.ipp_esp_auth_alg = (uint8_t)espa_alg; 3017 ap->ipa_act.ipa_apply.ipp_use_se = 3018 !!(ira->ira_flags & IRAF_IPSEC_DECAPS); 3019 unique = B_FALSE; 3020 3021 if (esp_assoc != NULL) { 3022 ap->ipa_act.ipa_apply.ipp_espa_minbits = 3023 esp_assoc->ipsa_authkeybits; 3024 ap->ipa_act.ipa_apply.ipp_espa_maxbits = 3025 esp_assoc->ipsa_authkeybits; 3026 ap->ipa_act.ipa_apply.ipp_espe_minbits = 3027 esp_assoc->ipsa_encrkeybits; 3028 ap->ipa_act.ipa_apply.ipp_espe_maxbits = 3029 esp_assoc->ipsa_encrkeybits; 3030 ap->ipa_act.ipa_apply.ipp_km_proto = esp_assoc->ipsa_kmp; 3031 ap->ipa_act.ipa_apply.ipp_km_cookie = esp_assoc->ipsa_kmc; 3032 if (esp_assoc->ipsa_flags & IPSA_F_UNIQUE) 3033 unique = B_TRUE; 3034 } 3035 if (ah_assoc != NULL) { 3036 ap->ipa_act.ipa_apply.ipp_ah_minbits = 3037 ah_assoc->ipsa_authkeybits; 3038 ap->ipa_act.ipa_apply.ipp_ah_maxbits = 3039 ah_assoc->ipsa_authkeybits; 3040 ap->ipa_act.ipa_apply.ipp_km_proto = ah_assoc->ipsa_kmp; 3041 ap->ipa_act.ipa_apply.ipp_km_cookie = ah_assoc->ipsa_kmc; 3042 if (ah_assoc->ipsa_flags & IPSA_F_UNIQUE) 3043 unique = B_TRUE; 3044 } 3045 ap->ipa_act.ipa_apply.ipp_use_unique = unique; 3046 ap->ipa_want_unique = unique; 3047 ap->ipa_allow_clear = B_FALSE; 3048 ap->ipa_want_se = !!(ira->ira_flags & IRAF_IPSEC_DECAPS); 3049 ap->ipa_want_ah = (ah_assoc != NULL); 3050 ap->ipa_want_esp = (esp_assoc != NULL); 3051 3052 ap->ipa_ovhd = ipsec_act_ovhd(&ap->ipa_act); 3053 3054 ap->ipa_act.ipa_apply.ipp_replay_depth = 0; /* don't care */ 3055 3056 return (ap); 3057 } 3058 3059 3060 /* 3061 * Compute the worst-case amount of extra space required by an action. 3062 * Note that, because of the ESP considerations listed below, this is 3063 * actually not the same as the best-case reduction in the MTU; in the 3064 * future, we should pass additional information to this function to 3065 * allow the actual MTU impact to be computed. 3066 * 3067 * AH: Revisit this if we implement algorithms with 3068 * a verifier size of more than 12 bytes. 3069 * 3070 * ESP: A more exact but more messy computation would take into 3071 * account the interaction between the cipher block size and the 3072 * effective MTU, yielding the inner payload size which reflects a 3073 * packet with *minimum* ESP padding.. 3074 */ 3075 int32_t 3076 ipsec_act_ovhd(const ipsec_act_t *act) 3077 { 3078 int32_t overhead = 0; 3079 3080 if (act->ipa_type == IPSEC_ACT_APPLY) { 3081 const ipsec_prot_t *ipp = &act->ipa_apply; 3082 3083 if (ipp->ipp_use_ah) 3084 overhead += IPSEC_MAX_AH_HDR_SIZE; 3085 if (ipp->ipp_use_esp) { 3086 overhead += IPSEC_MAX_ESP_HDR_SIZE; 3087 overhead += sizeof (struct udphdr); 3088 } 3089 if (ipp->ipp_use_se) 3090 overhead += IP_SIMPLE_HDR_LENGTH; 3091 } 3092 return (overhead); 3093 } 3094 3095 /* 3096 * This hash function is used only when creating policies and thus is not 3097 * performance-critical for packet flows. 3098 * 3099 * Future work: canonicalize the structures hashed with this (i.e., 3100 * zeroize padding) so the hash works correctly. 3101 */ 3102 /* ARGSUSED */ 3103 static uint32_t 3104 policy_hash(int size, const void *start, const void *end) 3105 { 3106 return (0); 3107 } 3108 3109 3110 /* 3111 * Hash function macros for each address type. 3112 * 3113 * The IPV6 hash function assumes that the low order 32-bits of the 3114 * address (typically containing the low order 24 bits of the mac 3115 * address) are reasonably well-distributed. Revisit this if we run 3116 * into trouble from lots of collisions on ::1 addresses and the like 3117 * (seems unlikely). 3118 */ 3119 #define IPSEC_IPV4_HASH(a, n) ((a) % (n)) 3120 #define IPSEC_IPV6_HASH(a, n) (((a).s6_addr32[3]) % (n)) 3121 3122 /* 3123 * These two hash functions should produce coordinated values 3124 * but have slightly different roles. 3125 */ 3126 static uint32_t 3127 selkey_hash(const ipsec_selkey_t *selkey, netstack_t *ns) 3128 { 3129 uint32_t valid = selkey->ipsl_valid; 3130 ipsec_stack_t *ipss = ns->netstack_ipsec; 3131 3132 if (!(valid & IPSL_REMOTE_ADDR)) 3133 return (IPSEC_SEL_NOHASH); 3134 3135 if (valid & IPSL_IPV4) { 3136 if (selkey->ipsl_remote_pfxlen == 32) { 3137 return (IPSEC_IPV4_HASH(selkey->ipsl_remote.ipsad_v4, 3138 ipss->ipsec_spd_hashsize)); 3139 } 3140 } 3141 if (valid & IPSL_IPV6) { 3142 if (selkey->ipsl_remote_pfxlen == 128) { 3143 return (IPSEC_IPV6_HASH(selkey->ipsl_remote.ipsad_v6, 3144 ipss->ipsec_spd_hashsize)); 3145 } 3146 } 3147 return (IPSEC_SEL_NOHASH); 3148 } 3149 3150 static uint32_t 3151 selector_hash(ipsec_selector_t *sel, ipsec_policy_root_t *root) 3152 { 3153 if (sel->ips_isv4) { 3154 return (IPSEC_IPV4_HASH(sel->ips_remote_addr_v4, 3155 root->ipr_nchains)); 3156 } 3157 return (IPSEC_IPV6_HASH(sel->ips_remote_addr_v6, root->ipr_nchains)); 3158 } 3159 3160 /* 3161 * Intern actions into the action hash table. 3162 */ 3163 ipsec_action_t * 3164 ipsec_act_find(const ipsec_act_t *a, int n, netstack_t *ns) 3165 { 3166 int i; 3167 uint32_t hval; 3168 ipsec_action_t *ap; 3169 ipsec_action_t *prev = NULL; 3170 int32_t overhead, maxovhd = 0; 3171 boolean_t allow_clear = B_FALSE; 3172 boolean_t want_ah = B_FALSE; 3173 boolean_t want_esp = B_FALSE; 3174 boolean_t want_se = B_FALSE; 3175 boolean_t want_unique = B_FALSE; 3176 ipsec_stack_t *ipss = ns->netstack_ipsec; 3177 3178 /* 3179 * TODO: should canonicalize a[] (i.e., zeroize any padding) 3180 * so we can use a non-trivial policy_hash function. 3181 */ 3182 for (i = n-1; i >= 0; i--) { 3183 hval = policy_hash(IPSEC_ACTION_HASH_SIZE, &a[i], &a[n]); 3184 3185 HASH_LOCK(ipss->ipsec_action_hash, hval); 3186 3187 for (HASH_ITERATE(ap, ipa_hash, 3188 ipss->ipsec_action_hash, hval)) { 3189 if (bcmp(&ap->ipa_act, &a[i], sizeof (*a)) != 0) 3190 continue; 3191 if (ap->ipa_next != prev) 3192 continue; 3193 break; 3194 } 3195 if (ap != NULL) { 3196 HASH_UNLOCK(ipss->ipsec_action_hash, hval); 3197 prev = ap; 3198 continue; 3199 } 3200 /* 3201 * need to allocate a new one.. 3202 */ 3203 ap = kmem_cache_alloc(ipsec_action_cache, KM_NOSLEEP); 3204 if (ap == NULL) { 3205 HASH_UNLOCK(ipss->ipsec_action_hash, hval); 3206 if (prev != NULL) 3207 ipsec_action_free(prev); 3208 return (NULL); 3209 } 3210 HASH_INSERT(ap, ipa_hash, ipss->ipsec_action_hash, hval); 3211 3212 ap->ipa_next = prev; 3213 ap->ipa_act = a[i]; 3214 3215 overhead = ipsec_act_ovhd(&a[i]); 3216 if (maxovhd < overhead) 3217 maxovhd = overhead; 3218 3219 if ((a[i].ipa_type == IPSEC_ACT_BYPASS) || 3220 (a[i].ipa_type == IPSEC_ACT_CLEAR)) 3221 allow_clear = B_TRUE; 3222 if (a[i].ipa_type == IPSEC_ACT_APPLY) { 3223 const ipsec_prot_t *ipp = &a[i].ipa_apply; 3224 3225 ASSERT(ipp->ipp_use_ah || ipp->ipp_use_esp); 3226 want_ah |= ipp->ipp_use_ah; 3227 want_esp |= ipp->ipp_use_esp; 3228 want_se |= ipp->ipp_use_se; 3229 want_unique |= ipp->ipp_use_unique; 3230 } 3231 ap->ipa_allow_clear = allow_clear; 3232 ap->ipa_want_ah = want_ah; 3233 ap->ipa_want_esp = want_esp; 3234 ap->ipa_want_se = want_se; 3235 ap->ipa_want_unique = want_unique; 3236 ap->ipa_refs = 1; /* from the hash table */ 3237 ap->ipa_ovhd = maxovhd; 3238 if (prev) 3239 prev->ipa_refs++; 3240 prev = ap; 3241 HASH_UNLOCK(ipss->ipsec_action_hash, hval); 3242 } 3243 3244 ap->ipa_refs++; /* caller's reference */ 3245 3246 return (ap); 3247 } 3248 3249 /* 3250 * Called when refcount goes to 0, indicating that all references to this 3251 * node are gone. 3252 * 3253 * This does not unchain the action from the hash table. 3254 */ 3255 void 3256 ipsec_action_free(ipsec_action_t *ap) 3257 { 3258 for (;;) { 3259 ipsec_action_t *np = ap->ipa_next; 3260 ASSERT(ap->ipa_refs == 0); 3261 ASSERT(ap->ipa_hash.hash_pp == NULL); 3262 kmem_cache_free(ipsec_action_cache, ap); 3263 ap = np; 3264 /* Inlined IPACT_REFRELE -- avoid recursion */ 3265 if (ap == NULL) 3266 break; 3267 membar_exit(); 3268 if (atomic_dec_32_nv(&(ap)->ipa_refs) != 0) 3269 break; 3270 /* End inlined IPACT_REFRELE */ 3271 } 3272 } 3273 3274 /* 3275 * Called when the action hash table goes away. 3276 * 3277 * The actions can be queued on an mblk with ipsec_in or 3278 * ipsec_out, hence the actions might still be around. 3279 * But we decrement ipa_refs here since we no longer have 3280 * a reference to the action from the hash table. 3281 */ 3282 static void 3283 ipsec_action_free_table(ipsec_action_t *ap) 3284 { 3285 while (ap != NULL) { 3286 ipsec_action_t *np = ap->ipa_next; 3287 3288 /* FIXME: remove? */ 3289 (void) printf("ipsec_action_free_table(%p) ref %d\n", 3290 (void *)ap, ap->ipa_refs); 3291 ASSERT(ap->ipa_refs > 0); 3292 IPACT_REFRELE(ap); 3293 ap = np; 3294 } 3295 } 3296 3297 /* 3298 * Need to walk all stack instances since the reclaim function 3299 * is global for all instances 3300 */ 3301 /* ARGSUSED */ 3302 static void 3303 ipsec_action_reclaim(void *arg) 3304 { 3305 netstack_handle_t nh; 3306 netstack_t *ns; 3307 ipsec_stack_t *ipss; 3308 3309 netstack_next_init(&nh); 3310 while ((ns = netstack_next(&nh)) != NULL) { 3311 /* 3312 * netstack_next() can return a netstack_t with a NULL 3313 * netstack_ipsec at boot time. 3314 */ 3315 if ((ipss = ns->netstack_ipsec) == NULL) { 3316 netstack_rele(ns); 3317 continue; 3318 } 3319 ipsec_action_reclaim_stack(ipss); 3320 netstack_rele(ns); 3321 } 3322 netstack_next_fini(&nh); 3323 } 3324 3325 /* 3326 * Periodically sweep action hash table for actions with refcount==1, and 3327 * nuke them. We cannot do this "on demand" (i.e., from IPACT_REFRELE) 3328 * because we can't close the race between another thread finding the action 3329 * in the hash table without holding the bucket lock during IPACT_REFRELE. 3330 * Instead, we run this function sporadically to clean up after ourselves; 3331 * we also set it as the "reclaim" function for the action kmem_cache. 3332 * 3333 * Note that it may take several passes of ipsec_action_gc() to free all 3334 * "stale" actions. 3335 */ 3336 static void 3337 ipsec_action_reclaim_stack(ipsec_stack_t *ipss) 3338 { 3339 int i; 3340 3341 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) { 3342 ipsec_action_t *ap, *np; 3343 3344 /* skip the lock if nobody home */ 3345 if (ipss->ipsec_action_hash[i].hash_head == NULL) 3346 continue; 3347 3348 HASH_LOCK(ipss->ipsec_action_hash, i); 3349 for (ap = ipss->ipsec_action_hash[i].hash_head; 3350 ap != NULL; ap = np) { 3351 ASSERT(ap->ipa_refs > 0); 3352 np = ap->ipa_hash.hash_next; 3353 if (ap->ipa_refs > 1) 3354 continue; 3355 HASH_UNCHAIN(ap, ipa_hash, 3356 ipss->ipsec_action_hash, i); 3357 IPACT_REFRELE(ap); 3358 } 3359 HASH_UNLOCK(ipss->ipsec_action_hash, i); 3360 } 3361 } 3362 3363 /* 3364 * Intern a selector set into the selector set hash table. 3365 * This is simpler than the actions case.. 3366 */ 3367 static ipsec_sel_t * 3368 ipsec_find_sel(ipsec_selkey_t *selkey, netstack_t *ns) 3369 { 3370 ipsec_sel_t *sp; 3371 uint32_t hval, bucket; 3372 ipsec_stack_t *ipss = ns->netstack_ipsec; 3373 3374 /* 3375 * Exactly one AF bit should be set in selkey. 3376 */ 3377 ASSERT(!(selkey->ipsl_valid & IPSL_IPV4) ^ 3378 !(selkey->ipsl_valid & IPSL_IPV6)); 3379 3380 hval = selkey_hash(selkey, ns); 3381 /* Set pol_hval to uninitialized until we put it in a polhead. */ 3382 selkey->ipsl_sel_hval = hval; 3383 3384 bucket = (hval == IPSEC_SEL_NOHASH) ? 0 : hval; 3385 3386 ASSERT(!HASH_LOCKED(ipss->ipsec_sel_hash, bucket)); 3387 HASH_LOCK(ipss->ipsec_sel_hash, bucket); 3388 3389 for (HASH_ITERATE(sp, ipsl_hash, ipss->ipsec_sel_hash, bucket)) { 3390 if (bcmp(&sp->ipsl_key, selkey, 3391 offsetof(ipsec_selkey_t, ipsl_pol_hval)) == 0) 3392 break; 3393 } 3394 if (sp != NULL) { 3395 sp->ipsl_refs++; 3396 3397 HASH_UNLOCK(ipss->ipsec_sel_hash, bucket); 3398 return (sp); 3399 } 3400 3401 sp = kmem_cache_alloc(ipsec_sel_cache, KM_NOSLEEP); 3402 if (sp == NULL) { 3403 HASH_UNLOCK(ipss->ipsec_sel_hash, bucket); 3404 return (NULL); 3405 } 3406 3407 HASH_INSERT(sp, ipsl_hash, ipss->ipsec_sel_hash, bucket); 3408 sp->ipsl_refs = 2; /* one for hash table, one for caller */ 3409 sp->ipsl_key = *selkey; 3410 /* Set to uninitalized and have insertion into polhead fix things. */ 3411 if (selkey->ipsl_sel_hval != IPSEC_SEL_NOHASH) 3412 sp->ipsl_key.ipsl_pol_hval = 0; 3413 else 3414 sp->ipsl_key.ipsl_pol_hval = IPSEC_SEL_NOHASH; 3415 3416 HASH_UNLOCK(ipss->ipsec_sel_hash, bucket); 3417 3418 return (sp); 3419 } 3420 3421 static void 3422 ipsec_sel_rel(ipsec_sel_t **spp, netstack_t *ns) 3423 { 3424 ipsec_sel_t *sp = *spp; 3425 int hval = sp->ipsl_key.ipsl_sel_hval; 3426 ipsec_stack_t *ipss = ns->netstack_ipsec; 3427 3428 *spp = NULL; 3429 3430 if (hval == IPSEC_SEL_NOHASH) 3431 hval = 0; 3432 3433 ASSERT(!HASH_LOCKED(ipss->ipsec_sel_hash, hval)); 3434 HASH_LOCK(ipss->ipsec_sel_hash, hval); 3435 if (--sp->ipsl_refs == 1) { 3436 HASH_UNCHAIN(sp, ipsl_hash, ipss->ipsec_sel_hash, hval); 3437 sp->ipsl_refs--; 3438 HASH_UNLOCK(ipss->ipsec_sel_hash, hval); 3439 ASSERT(sp->ipsl_refs == 0); 3440 kmem_cache_free(ipsec_sel_cache, sp); 3441 /* Caller unlocks */ 3442 return; 3443 } 3444 3445 HASH_UNLOCK(ipss->ipsec_sel_hash, hval); 3446 } 3447 3448 /* 3449 * Free a policy rule which we know is no longer being referenced. 3450 */ 3451 void 3452 ipsec_policy_free(ipsec_policy_t *ipp) 3453 { 3454 ASSERT(ipp->ipsp_refs == 0); 3455 ASSERT(ipp->ipsp_sel != NULL); 3456 ASSERT(ipp->ipsp_act != NULL); 3457 ASSERT(ipp->ipsp_netstack != NULL); 3458 3459 ipsec_sel_rel(&ipp->ipsp_sel, ipp->ipsp_netstack); 3460 IPACT_REFRELE(ipp->ipsp_act); 3461 kmem_cache_free(ipsec_pol_cache, ipp); 3462 } 3463 3464 /* 3465 * Construction of new policy rules; construct a policy, and add it to 3466 * the appropriate tables. 3467 */ 3468 ipsec_policy_t * 3469 ipsec_policy_create(ipsec_selkey_t *keys, const ipsec_act_t *a, 3470 int nacts, int prio, uint64_t *index_ptr, netstack_t *ns) 3471 { 3472 ipsec_action_t *ap; 3473 ipsec_sel_t *sp; 3474 ipsec_policy_t *ipp; 3475 ipsec_stack_t *ipss = ns->netstack_ipsec; 3476 3477 if (index_ptr == NULL) 3478 index_ptr = &ipss->ipsec_next_policy_index; 3479 3480 ipp = kmem_cache_alloc(ipsec_pol_cache, KM_NOSLEEP); 3481 ap = ipsec_act_find(a, nacts, ns); 3482 sp = ipsec_find_sel(keys, ns); 3483 3484 if ((ap == NULL) || (sp == NULL) || (ipp == NULL)) { 3485 if (ap != NULL) { 3486 IPACT_REFRELE(ap); 3487 } 3488 if (sp != NULL) 3489 ipsec_sel_rel(&sp, ns); 3490 if (ipp != NULL) 3491 kmem_cache_free(ipsec_pol_cache, ipp); 3492 return (NULL); 3493 } 3494 3495 HASH_NULL(ipp, ipsp_hash); 3496 3497 ipp->ipsp_netstack = ns; /* Needed for ipsec_policy_free */ 3498 ipp->ipsp_refs = 1; /* caller's reference */ 3499 ipp->ipsp_sel = sp; 3500 ipp->ipsp_act = ap; 3501 ipp->ipsp_prio = prio; /* rule priority */ 3502 ipp->ipsp_index = *index_ptr; 3503 (*index_ptr)++; 3504 3505 return (ipp); 3506 } 3507 3508 static void 3509 ipsec_update_present_flags(ipsec_stack_t *ipss) 3510 { 3511 boolean_t hashpol; 3512 3513 hashpol = (avl_numnodes(&ipss->ipsec_system_policy.iph_rulebyid) > 0); 3514 3515 if (hashpol) { 3516 ipss->ipsec_outbound_v4_policy_present = B_TRUE; 3517 ipss->ipsec_outbound_v6_policy_present = B_TRUE; 3518 ipss->ipsec_inbound_v4_policy_present = B_TRUE; 3519 ipss->ipsec_inbound_v6_policy_present = B_TRUE; 3520 return; 3521 } 3522 3523 ipss->ipsec_outbound_v4_policy_present = (NULL != 3524 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_OUTBOUND]. 3525 ipr_nonhash[IPSEC_AF_V4]); 3526 ipss->ipsec_outbound_v6_policy_present = (NULL != 3527 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_OUTBOUND]. 3528 ipr_nonhash[IPSEC_AF_V6]); 3529 ipss->ipsec_inbound_v4_policy_present = (NULL != 3530 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_INBOUND]. 3531 ipr_nonhash[IPSEC_AF_V4]); 3532 ipss->ipsec_inbound_v6_policy_present = (NULL != 3533 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_INBOUND]. 3534 ipr_nonhash[IPSEC_AF_V6]); 3535 } 3536 3537 boolean_t 3538 ipsec_policy_delete(ipsec_policy_head_t *php, ipsec_selkey_t *keys, int dir, 3539 netstack_t *ns) 3540 { 3541 ipsec_sel_t *sp; 3542 ipsec_policy_t *ip, *nip, *head; 3543 int af; 3544 ipsec_policy_root_t *pr = &php->iph_root[dir]; 3545 3546 sp = ipsec_find_sel(keys, ns); 3547 3548 if (sp == NULL) 3549 return (B_FALSE); 3550 3551 af = (sp->ipsl_key.ipsl_valid & IPSL_IPV4) ? IPSEC_AF_V4 : IPSEC_AF_V6; 3552 3553 rw_enter(&php->iph_lock, RW_WRITER); 3554 3555 if (sp->ipsl_key.ipsl_pol_hval == IPSEC_SEL_NOHASH) { 3556 head = pr->ipr_nonhash[af]; 3557 } else { 3558 head = pr->ipr_hash[sp->ipsl_key.ipsl_pol_hval].hash_head; 3559 } 3560 3561 for (ip = head; ip != NULL; ip = nip) { 3562 nip = ip->ipsp_hash.hash_next; 3563 if (ip->ipsp_sel != sp) { 3564 continue; 3565 } 3566 3567 IPPOL_UNCHAIN(php, ip); 3568 3569 php->iph_gen++; 3570 ipsec_update_present_flags(ns->netstack_ipsec); 3571 3572 rw_exit(&php->iph_lock); 3573 3574 ipsec_sel_rel(&sp, ns); 3575 3576 return (B_TRUE); 3577 } 3578 3579 rw_exit(&php->iph_lock); 3580 ipsec_sel_rel(&sp, ns); 3581 return (B_FALSE); 3582 } 3583 3584 int 3585 ipsec_policy_delete_index(ipsec_policy_head_t *php, uint64_t policy_index, 3586 netstack_t *ns) 3587 { 3588 boolean_t found = B_FALSE; 3589 ipsec_policy_t ipkey; 3590 ipsec_policy_t *ip; 3591 avl_index_t where; 3592 3593 bzero(&ipkey, sizeof (ipkey)); 3594 ipkey.ipsp_index = policy_index; 3595 3596 rw_enter(&php->iph_lock, RW_WRITER); 3597 3598 /* 3599 * We could be cleverer here about the walk. 3600 * but well, (k+1)*log(N) will do for now (k==number of matches, 3601 * N==number of table entries 3602 */ 3603 for (;;) { 3604 ip = (ipsec_policy_t *)avl_find(&php->iph_rulebyid, 3605 (void *)&ipkey, &where); 3606 ASSERT(ip == NULL); 3607 3608 ip = avl_nearest(&php->iph_rulebyid, where, AVL_AFTER); 3609 3610 if (ip == NULL) 3611 break; 3612 3613 if (ip->ipsp_index != policy_index) { 3614 ASSERT(ip->ipsp_index > policy_index); 3615 break; 3616 } 3617 3618 IPPOL_UNCHAIN(php, ip); 3619 found = B_TRUE; 3620 } 3621 3622 if (found) { 3623 php->iph_gen++; 3624 ipsec_update_present_flags(ns->netstack_ipsec); 3625 } 3626 3627 rw_exit(&php->iph_lock); 3628 3629 return (found ? 0 : ENOENT); 3630 } 3631 3632 /* 3633 * Given a constructed ipsec_policy_t policy rule, see if it can be entered 3634 * into the correct policy ruleset. As a side-effect, it sets the hash 3635 * entries on "ipp"'s ipsp_pol_hval. 3636 * 3637 * Returns B_TRUE if it can be entered, B_FALSE if it can't be (because a 3638 * duplicate policy exists with exactly the same selectors), or an icmp 3639 * rule exists with a different encryption/authentication action. 3640 */ 3641 boolean_t 3642 ipsec_check_policy(ipsec_policy_head_t *php, ipsec_policy_t *ipp, int direction) 3643 { 3644 ipsec_policy_root_t *pr = &php->iph_root[direction]; 3645 int af = -1; 3646 ipsec_policy_t *p2, *head; 3647 uint8_t check_proto; 3648 ipsec_selkey_t *selkey = &ipp->ipsp_sel->ipsl_key; 3649 uint32_t valid = selkey->ipsl_valid; 3650 3651 if (valid & IPSL_IPV6) { 3652 ASSERT(!(valid & IPSL_IPV4)); 3653 af = IPSEC_AF_V6; 3654 check_proto = IPPROTO_ICMPV6; 3655 } else { 3656 ASSERT(valid & IPSL_IPV4); 3657 af = IPSEC_AF_V4; 3658 check_proto = IPPROTO_ICMP; 3659 } 3660 3661 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3662 3663 /* 3664 * Double-check that we don't have any duplicate selectors here. 3665 * Because selectors are interned below, we need only compare pointers 3666 * for equality. 3667 */ 3668 if (selkey->ipsl_sel_hval == IPSEC_SEL_NOHASH) { 3669 head = pr->ipr_nonhash[af]; 3670 } else { 3671 selkey->ipsl_pol_hval = 3672 (selkey->ipsl_valid & IPSL_IPV4) ? 3673 IPSEC_IPV4_HASH(selkey->ipsl_remote.ipsad_v4, 3674 pr->ipr_nchains) : 3675 IPSEC_IPV6_HASH(selkey->ipsl_remote.ipsad_v6, 3676 pr->ipr_nchains); 3677 3678 head = pr->ipr_hash[selkey->ipsl_pol_hval].hash_head; 3679 } 3680 3681 for (p2 = head; p2 != NULL; p2 = p2->ipsp_hash.hash_next) { 3682 if (p2->ipsp_sel == ipp->ipsp_sel) 3683 return (B_FALSE); 3684 } 3685 3686 /* 3687 * If it's ICMP and not a drop or pass rule, run through the ICMP 3688 * rules and make sure the action is either new or the same as any 3689 * other actions. We don't have to check the full chain because 3690 * discard and bypass will override all other actions 3691 */ 3692 3693 if (valid & IPSL_PROTOCOL && 3694 selkey->ipsl_proto == check_proto && 3695 (ipp->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_APPLY)) { 3696 3697 for (p2 = head; p2 != NULL; p2 = p2->ipsp_hash.hash_next) { 3698 3699 if (p2->ipsp_sel->ipsl_key.ipsl_valid & IPSL_PROTOCOL && 3700 p2->ipsp_sel->ipsl_key.ipsl_proto == check_proto && 3701 (p2->ipsp_act->ipa_act.ipa_type == 3702 IPSEC_ACT_APPLY)) { 3703 return (ipsec_compare_action(p2, ipp)); 3704 } 3705 } 3706 } 3707 3708 return (B_TRUE); 3709 } 3710 3711 /* 3712 * compare the action chains of two policies for equality 3713 * B_TRUE -> effective equality 3714 */ 3715 3716 static boolean_t 3717 ipsec_compare_action(ipsec_policy_t *p1, ipsec_policy_t *p2) 3718 { 3719 3720 ipsec_action_t *act1, *act2; 3721 3722 /* We have a valid rule. Let's compare the actions */ 3723 if (p1->ipsp_act == p2->ipsp_act) { 3724 /* same action. We are good */ 3725 return (B_TRUE); 3726 } 3727 3728 /* we have to walk the chain */ 3729 3730 act1 = p1->ipsp_act; 3731 act2 = p2->ipsp_act; 3732 3733 while (act1 != NULL && act2 != NULL) { 3734 3735 /* otherwise, Are we close enough? */ 3736 if (act1->ipa_allow_clear != act2->ipa_allow_clear || 3737 act1->ipa_want_ah != act2->ipa_want_ah || 3738 act1->ipa_want_esp != act2->ipa_want_esp || 3739 act1->ipa_want_se != act2->ipa_want_se) { 3740 /* Nope, we aren't */ 3741 return (B_FALSE); 3742 } 3743 3744 if (act1->ipa_want_ah) { 3745 if (act1->ipa_act.ipa_apply.ipp_auth_alg != 3746 act2->ipa_act.ipa_apply.ipp_auth_alg) { 3747 return (B_FALSE); 3748 } 3749 3750 if (act1->ipa_act.ipa_apply.ipp_ah_minbits != 3751 act2->ipa_act.ipa_apply.ipp_ah_minbits || 3752 act1->ipa_act.ipa_apply.ipp_ah_maxbits != 3753 act2->ipa_act.ipa_apply.ipp_ah_maxbits) { 3754 return (B_FALSE); 3755 } 3756 } 3757 3758 if (act1->ipa_want_esp) { 3759 if (act1->ipa_act.ipa_apply.ipp_use_esp != 3760 act2->ipa_act.ipa_apply.ipp_use_esp || 3761 act1->ipa_act.ipa_apply.ipp_use_espa != 3762 act2->ipa_act.ipa_apply.ipp_use_espa) { 3763 return (B_FALSE); 3764 } 3765 3766 if (act1->ipa_act.ipa_apply.ipp_use_esp) { 3767 if (act1->ipa_act.ipa_apply.ipp_encr_alg != 3768 act2->ipa_act.ipa_apply.ipp_encr_alg) { 3769 return (B_FALSE); 3770 } 3771 3772 if (act1->ipa_act.ipa_apply.ipp_espe_minbits != 3773 act2->ipa_act.ipa_apply.ipp_espe_minbits || 3774 act1->ipa_act.ipa_apply.ipp_espe_maxbits != 3775 act2->ipa_act.ipa_apply.ipp_espe_maxbits) { 3776 return (B_FALSE); 3777 } 3778 } 3779 3780 if (act1->ipa_act.ipa_apply.ipp_use_espa) { 3781 if (act1->ipa_act.ipa_apply.ipp_esp_auth_alg != 3782 act2->ipa_act.ipa_apply.ipp_esp_auth_alg) { 3783 return (B_FALSE); 3784 } 3785 3786 if (act1->ipa_act.ipa_apply.ipp_espa_minbits != 3787 act2->ipa_act.ipa_apply.ipp_espa_minbits || 3788 act1->ipa_act.ipa_apply.ipp_espa_maxbits != 3789 act2->ipa_act.ipa_apply.ipp_espa_maxbits) { 3790 return (B_FALSE); 3791 } 3792 } 3793 3794 } 3795 3796 act1 = act1->ipa_next; 3797 act2 = act2->ipa_next; 3798 } 3799 3800 if (act1 != NULL || act2 != NULL) { 3801 return (B_FALSE); 3802 } 3803 3804 return (B_TRUE); 3805 } 3806 3807 3808 /* 3809 * Given a constructed ipsec_policy_t policy rule, enter it into 3810 * the correct policy ruleset. 3811 * 3812 * ipsec_check_policy() is assumed to have succeeded first (to check for 3813 * duplicates). 3814 */ 3815 void 3816 ipsec_enter_policy(ipsec_policy_head_t *php, ipsec_policy_t *ipp, int direction, 3817 netstack_t *ns) 3818 { 3819 ipsec_policy_root_t *pr = &php->iph_root[direction]; 3820 ipsec_selkey_t *selkey = &ipp->ipsp_sel->ipsl_key; 3821 uint32_t valid = selkey->ipsl_valid; 3822 uint32_t hval = selkey->ipsl_pol_hval; 3823 int af = -1; 3824 3825 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3826 3827 if (valid & IPSL_IPV6) { 3828 ASSERT(!(valid & IPSL_IPV4)); 3829 af = IPSEC_AF_V6; 3830 } else { 3831 ASSERT(valid & IPSL_IPV4); 3832 af = IPSEC_AF_V4; 3833 } 3834 3835 php->iph_gen++; 3836 3837 if (hval == IPSEC_SEL_NOHASH) { 3838 HASHLIST_INSERT(ipp, ipsp_hash, pr->ipr_nonhash[af]); 3839 } else { 3840 HASH_LOCK(pr->ipr_hash, hval); 3841 HASH_INSERT(ipp, ipsp_hash, pr->ipr_hash, hval); 3842 HASH_UNLOCK(pr->ipr_hash, hval); 3843 } 3844 3845 ipsec_insert_always(&php->iph_rulebyid, ipp); 3846 3847 ipsec_update_present_flags(ns->netstack_ipsec); 3848 } 3849 3850 static void 3851 ipsec_ipr_flush(ipsec_policy_head_t *php, ipsec_policy_root_t *ipr) 3852 { 3853 ipsec_policy_t *ip, *nip; 3854 int af, chain, nchain; 3855 3856 for (af = 0; af < IPSEC_NAF; af++) { 3857 for (ip = ipr->ipr_nonhash[af]; ip != NULL; ip = nip) { 3858 nip = ip->ipsp_hash.hash_next; 3859 IPPOL_UNCHAIN(php, ip); 3860 } 3861 ipr->ipr_nonhash[af] = NULL; 3862 } 3863 nchain = ipr->ipr_nchains; 3864 3865 for (chain = 0; chain < nchain; chain++) { 3866 for (ip = ipr->ipr_hash[chain].hash_head; ip != NULL; 3867 ip = nip) { 3868 nip = ip->ipsp_hash.hash_next; 3869 IPPOL_UNCHAIN(php, ip); 3870 } 3871 ipr->ipr_hash[chain].hash_head = NULL; 3872 } 3873 } 3874 3875 /* 3876 * Create and insert inbound or outbound policy associated with actp for the 3877 * address family fam into the policy head ph. Returns B_TRUE if policy was 3878 * inserted, and B_FALSE otherwise. 3879 */ 3880 boolean_t 3881 ipsec_polhead_insert(ipsec_policy_head_t *ph, ipsec_act_t *actp, uint_t nact, 3882 int fam, int ptype, netstack_t *ns) 3883 { 3884 ipsec_selkey_t sel; 3885 ipsec_policy_t *pol; 3886 ipsec_policy_root_t *pr; 3887 3888 bzero(&sel, sizeof (sel)); 3889 sel.ipsl_valid = (fam == IPSEC_AF_V4 ? IPSL_IPV4 : IPSL_IPV6); 3890 if ((pol = ipsec_policy_create(&sel, actp, nact, IPSEC_PRIO_SOCKET, 3891 NULL, ns)) != NULL) { 3892 pr = &ph->iph_root[ptype]; 3893 HASHLIST_INSERT(pol, ipsp_hash, pr->ipr_nonhash[fam]); 3894 ipsec_insert_always(&ph->iph_rulebyid, pol); 3895 } 3896 return (pol != NULL); 3897 } 3898 3899 void 3900 ipsec_polhead_flush(ipsec_policy_head_t *php, netstack_t *ns) 3901 { 3902 int dir; 3903 3904 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3905 3906 for (dir = 0; dir < IPSEC_NTYPES; dir++) 3907 ipsec_ipr_flush(php, &php->iph_root[dir]); 3908 3909 php->iph_gen++; 3910 ipsec_update_present_flags(ns->netstack_ipsec); 3911 } 3912 3913 void 3914 ipsec_polhead_free(ipsec_policy_head_t *php, netstack_t *ns) 3915 { 3916 int dir; 3917 3918 ASSERT(php->iph_refs == 0); 3919 3920 rw_enter(&php->iph_lock, RW_WRITER); 3921 ipsec_polhead_flush(php, ns); 3922 rw_exit(&php->iph_lock); 3923 rw_destroy(&php->iph_lock); 3924 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 3925 ipsec_policy_root_t *ipr = &php->iph_root[dir]; 3926 int chain; 3927 3928 for (chain = 0; chain < ipr->ipr_nchains; chain++) 3929 mutex_destroy(&(ipr->ipr_hash[chain].hash_lock)); 3930 3931 } 3932 ipsec_polhead_free_table(php); 3933 kmem_free(php, sizeof (*php)); 3934 } 3935 3936 static void 3937 ipsec_ipr_init(ipsec_policy_root_t *ipr) 3938 { 3939 int af; 3940 3941 ipr->ipr_nchains = 0; 3942 ipr->ipr_hash = NULL; 3943 3944 for (af = 0; af < IPSEC_NAF; af++) { 3945 ipr->ipr_nonhash[af] = NULL; 3946 } 3947 } 3948 3949 ipsec_policy_head_t * 3950 ipsec_polhead_create(void) 3951 { 3952 ipsec_policy_head_t *php; 3953 3954 php = kmem_alloc(sizeof (*php), KM_NOSLEEP); 3955 if (php == NULL) 3956 return (php); 3957 3958 rw_init(&php->iph_lock, NULL, RW_DEFAULT, NULL); 3959 php->iph_refs = 1; 3960 php->iph_gen = 0; 3961 3962 ipsec_ipr_init(&php->iph_root[IPSEC_TYPE_INBOUND]); 3963 ipsec_ipr_init(&php->iph_root[IPSEC_TYPE_OUTBOUND]); 3964 3965 avl_create(&php->iph_rulebyid, ipsec_policy_cmpbyid, 3966 sizeof (ipsec_policy_t), offsetof(ipsec_policy_t, ipsp_byid)); 3967 3968 return (php); 3969 } 3970 3971 /* 3972 * Clone the policy head into a new polhead; release one reference to the 3973 * old one and return the only reference to the new one. 3974 * If the old one had a refcount of 1, just return it. 3975 */ 3976 ipsec_policy_head_t * 3977 ipsec_polhead_split(ipsec_policy_head_t *php, netstack_t *ns) 3978 { 3979 ipsec_policy_head_t *nphp; 3980 3981 if (php == NULL) 3982 return (ipsec_polhead_create()); 3983 else if (php->iph_refs == 1) 3984 return (php); 3985 3986 nphp = ipsec_polhead_create(); 3987 if (nphp == NULL) 3988 return (NULL); 3989 3990 if (ipsec_copy_polhead(php, nphp, ns) != 0) { 3991 ipsec_polhead_free(nphp, ns); 3992 return (NULL); 3993 } 3994 IPPH_REFRELE(php, ns); 3995 return (nphp); 3996 } 3997 3998 /* 3999 * When sending a response to a ICMP request or generating a RST 4000 * in the TCP case, the outbound packets need to go at the same level 4001 * of protection as the incoming ones i.e we associate our outbound 4002 * policy with how the packet came in. We call this after we have 4003 * accepted the incoming packet which may or may not have been in 4004 * clear and hence we are sending the reply back with the policy 4005 * matching the incoming datagram's policy. 4006 * 4007 * NOTE : This technology serves two purposes : 4008 * 4009 * 1) If we have multiple outbound policies, we send out a reply 4010 * matching with how it came in rather than matching the outbound 4011 * policy. 4012 * 4013 * 2) For assymetric policies, we want to make sure that incoming 4014 * and outgoing has the same level of protection. Assymetric 4015 * policies exist only with global policy where we may not have 4016 * both outbound and inbound at the same time. 4017 * 4018 * NOTE2: This function is called by cleartext cases, so it needs to be 4019 * in IP proper. 4020 * 4021 * Note: the caller has moved other parts of ira into ixa already. 4022 */ 4023 boolean_t 4024 ipsec_in_to_out(ip_recv_attr_t *ira, ip_xmit_attr_t *ixa, mblk_t *data_mp, 4025 ipha_t *ipha, ip6_t *ip6h) 4026 { 4027 ipsec_selector_t sel; 4028 ipsec_action_t *reflect_action = NULL; 4029 netstack_t *ns = ixa->ixa_ipst->ips_netstack; 4030 4031 bzero((void*)&sel, sizeof (sel)); 4032 4033 if (ira->ira_ipsec_action != NULL) { 4034 /* transfer reference.. */ 4035 reflect_action = ira->ira_ipsec_action; 4036 ira->ira_ipsec_action = NULL; 4037 } else if (!(ira->ira_flags & IRAF_LOOPBACK)) 4038 reflect_action = ipsec_in_to_out_action(ira); 4039 4040 /* 4041 * The caller is going to send the datagram out which might 4042 * go on the wire or delivered locally through ire_send_local. 4043 * 4044 * 1) If it goes out on the wire, new associations will be 4045 * obtained. 4046 * 2) If it is delivered locally, ire_send_local will convert 4047 * this ip_xmit_attr_t back to a ip_recv_attr_t looking at the 4048 * requests. 4049 */ 4050 ixa->ixa_ipsec_action = reflect_action; 4051 4052 if (!ipsec_init_outbound_ports(&sel, data_mp, ipha, ip6h, 0, 4053 ns->netstack_ipsec)) { 4054 /* Note: data_mp already consumed and ip_drop_packet done */ 4055 return (B_FALSE); 4056 } 4057 ixa->ixa_ipsec_src_port = sel.ips_local_port; 4058 ixa->ixa_ipsec_dst_port = sel.ips_remote_port; 4059 ixa->ixa_ipsec_proto = sel.ips_protocol; 4060 ixa->ixa_ipsec_icmp_type = sel.ips_icmp_type; 4061 ixa->ixa_ipsec_icmp_code = sel.ips_icmp_code; 4062 4063 /* 4064 * Don't use global policy for this, as we want 4065 * to use the same protection that was applied to the inbound packet. 4066 * Thus we set IXAF_NO_IPSEC is it arrived in the clear to make 4067 * it be sent in the clear. 4068 */ 4069 if (ira->ira_flags & IRAF_IPSEC_SECURE) 4070 ixa->ixa_flags |= IXAF_IPSEC_SECURE; 4071 else 4072 ixa->ixa_flags |= IXAF_NO_IPSEC; 4073 4074 return (B_TRUE); 4075 } 4076 4077 void 4078 ipsec_out_release_refs(ip_xmit_attr_t *ixa) 4079 { 4080 if (!(ixa->ixa_flags & IXAF_IPSEC_SECURE)) 4081 return; 4082 4083 if (ixa->ixa_ipsec_ah_sa != NULL) { 4084 IPSA_REFRELE(ixa->ixa_ipsec_ah_sa); 4085 ixa->ixa_ipsec_ah_sa = NULL; 4086 } 4087 if (ixa->ixa_ipsec_esp_sa != NULL) { 4088 IPSA_REFRELE(ixa->ixa_ipsec_esp_sa); 4089 ixa->ixa_ipsec_esp_sa = NULL; 4090 } 4091 if (ixa->ixa_ipsec_policy != NULL) { 4092 IPPOL_REFRELE(ixa->ixa_ipsec_policy); 4093 ixa->ixa_ipsec_policy = NULL; 4094 } 4095 if (ixa->ixa_ipsec_action != NULL) { 4096 IPACT_REFRELE(ixa->ixa_ipsec_action); 4097 ixa->ixa_ipsec_action = NULL; 4098 } 4099 if (ixa->ixa_ipsec_latch) { 4100 IPLATCH_REFRELE(ixa->ixa_ipsec_latch); 4101 ixa->ixa_ipsec_latch = NULL; 4102 } 4103 /* Clear the soft references to the SAs */ 4104 ixa->ixa_ipsec_ref[0].ipsr_sa = NULL; 4105 ixa->ixa_ipsec_ref[0].ipsr_bucket = NULL; 4106 ixa->ixa_ipsec_ref[0].ipsr_gen = 0; 4107 ixa->ixa_ipsec_ref[1].ipsr_sa = NULL; 4108 ixa->ixa_ipsec_ref[1].ipsr_bucket = NULL; 4109 ixa->ixa_ipsec_ref[1].ipsr_gen = 0; 4110 ixa->ixa_flags &= ~IXAF_IPSEC_SECURE; 4111 } 4112 4113 void 4114 ipsec_in_release_refs(ip_recv_attr_t *ira) 4115 { 4116 if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) 4117 return; 4118 4119 if (ira->ira_ipsec_ah_sa != NULL) { 4120 IPSA_REFRELE(ira->ira_ipsec_ah_sa); 4121 ira->ira_ipsec_ah_sa = NULL; 4122 } 4123 if (ira->ira_ipsec_esp_sa != NULL) { 4124 IPSA_REFRELE(ira->ira_ipsec_esp_sa); 4125 ira->ira_ipsec_esp_sa = NULL; 4126 } 4127 ira->ira_flags &= ~IRAF_IPSEC_SECURE; 4128 } 4129 4130 /* 4131 * This is called from ire_send_local when a packet 4132 * is looped back. We setup the ip_recv_attr_t "borrowing" the references 4133 * held by the callers. 4134 * Note that we don't do any IPsec but we carry the actions and IPSEC flags 4135 * across so that the fanout policy checks see that IPsec was applied. 4136 * 4137 * The caller should do ipsec_in_release_refs() on the ira by calling 4138 * ira_cleanup(). 4139 */ 4140 void 4141 ipsec_out_to_in(ip_xmit_attr_t *ixa, ill_t *ill, ip_recv_attr_t *ira) 4142 { 4143 ipsec_policy_t *pol; 4144 ipsec_action_t *act; 4145 4146 /* Non-IPsec operations */ 4147 ira->ira_free_flags = 0; 4148 ira->ira_zoneid = ixa->ixa_zoneid; 4149 ira->ira_cred = ixa->ixa_cred; 4150 ira->ira_cpid = ixa->ixa_cpid; 4151 ira->ira_tsl = ixa->ixa_tsl; 4152 ira->ira_ill = ira->ira_rill = ill; 4153 ira->ira_flags = ixa->ixa_flags & IAF_MASK; 4154 ira->ira_no_loop_zoneid = ixa->ixa_no_loop_zoneid; 4155 ira->ira_pktlen = ixa->ixa_pktlen; 4156 ira->ira_ip_hdr_length = ixa->ixa_ip_hdr_length; 4157 ira->ira_protocol = ixa->ixa_protocol; 4158 ira->ira_mhip = NULL; 4159 4160 ira->ira_flags |= IRAF_LOOPBACK | IRAF_L2SRC_LOOPBACK; 4161 4162 ira->ira_sqp = ixa->ixa_sqp; 4163 ira->ira_ring = NULL; 4164 4165 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 4166 ira->ira_rifindex = ira->ira_ruifindex; 4167 4168 if (!(ixa->ixa_flags & IXAF_IPSEC_SECURE)) 4169 return; 4170 4171 ira->ira_flags |= IRAF_IPSEC_SECURE; 4172 4173 ira->ira_ipsec_ah_sa = NULL; 4174 ira->ira_ipsec_esp_sa = NULL; 4175 4176 act = ixa->ixa_ipsec_action; 4177 if (act == NULL) { 4178 pol = ixa->ixa_ipsec_policy; 4179 if (pol != NULL) { 4180 act = pol->ipsp_act; 4181 IPACT_REFHOLD(act); 4182 } 4183 } 4184 ixa->ixa_ipsec_action = NULL; 4185 ira->ira_ipsec_action = act; 4186 } 4187 4188 /* 4189 * Consults global policy and per-socket policy to see whether this datagram 4190 * should go out secure. If so it updates the ip_xmit_attr_t 4191 * Should not be used when connecting, since then we want to latch the policy. 4192 * 4193 * If connp is NULL we just look at the global policy. 4194 * 4195 * Returns NULL if the packet was dropped, in which case the MIB has 4196 * been incremented and ip_drop_packet done. 4197 */ 4198 mblk_t * 4199 ip_output_attach_policy(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h, 4200 const conn_t *connp, ip_xmit_attr_t *ixa) 4201 { 4202 ipsec_selector_t sel; 4203 boolean_t policy_present; 4204 ip_stack_t *ipst = ixa->ixa_ipst; 4205 netstack_t *ns = ipst->ips_netstack; 4206 ipsec_stack_t *ipss = ns->netstack_ipsec; 4207 ipsec_policy_t *p; 4208 4209 ixa->ixa_ipsec_policy_gen = ipss->ipsec_system_policy.iph_gen; 4210 ASSERT((ipha != NULL && ip6h == NULL) || 4211 (ip6h != NULL && ipha == NULL)); 4212 4213 if (ipha != NULL) 4214 policy_present = ipss->ipsec_outbound_v4_policy_present; 4215 else 4216 policy_present = ipss->ipsec_outbound_v6_policy_present; 4217 4218 if (!policy_present && (connp == NULL || connp->conn_policy == NULL)) 4219 return (mp); 4220 4221 bzero((void*)&sel, sizeof (sel)); 4222 4223 if (ipha != NULL) { 4224 sel.ips_local_addr_v4 = ipha->ipha_src; 4225 sel.ips_remote_addr_v4 = ip_get_dst(ipha); 4226 sel.ips_isv4 = B_TRUE; 4227 } else { 4228 sel.ips_isv4 = B_FALSE; 4229 sel.ips_local_addr_v6 = ip6h->ip6_src; 4230 sel.ips_remote_addr_v6 = ip_get_dst_v6(ip6h, mp, NULL); 4231 } 4232 sel.ips_protocol = ixa->ixa_protocol; 4233 4234 if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0, ipss)) { 4235 if (ipha != NULL) { 4236 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 4237 } else { 4238 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 4239 } 4240 /* Note: mp already consumed and ip_drop_packet done */ 4241 return (NULL); 4242 } 4243 4244 ASSERT(ixa->ixa_ipsec_policy == NULL); 4245 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, &sel, ns); 4246 ixa->ixa_ipsec_policy = p; 4247 if (p != NULL) { 4248 ixa->ixa_flags |= IXAF_IPSEC_SECURE; 4249 if (connp == NULL || connp->conn_policy == NULL) 4250 ixa->ixa_flags |= IXAF_IPSEC_GLOBAL_POLICY; 4251 } else { 4252 ixa->ixa_flags &= ~IXAF_IPSEC_SECURE; 4253 } 4254 4255 /* 4256 * Copy the right port information. 4257 */ 4258 ixa->ixa_ipsec_src_port = sel.ips_local_port; 4259 ixa->ixa_ipsec_dst_port = sel.ips_remote_port; 4260 ixa->ixa_ipsec_icmp_type = sel.ips_icmp_type; 4261 ixa->ixa_ipsec_icmp_code = sel.ips_icmp_code; 4262 ixa->ixa_ipsec_proto = sel.ips_protocol; 4263 return (mp); 4264 } 4265 4266 /* 4267 * When appropriate, this function caches inbound and outbound policy 4268 * for this connection. The outbound policy is stored in conn_ixa. 4269 * Note that it can not be used for SCTP since conn_faddr isn't set for SCTP. 4270 * 4271 * XXX need to work out more details about per-interface policy and 4272 * caching here! 4273 * 4274 * XXX may want to split inbound and outbound caching for ill.. 4275 */ 4276 int 4277 ipsec_conn_cache_policy(conn_t *connp, boolean_t isv4) 4278 { 4279 boolean_t global_policy_present; 4280 netstack_t *ns = connp->conn_netstack; 4281 ipsec_stack_t *ipss = ns->netstack_ipsec; 4282 4283 connp->conn_ixa->ixa_ipsec_policy_gen = 4284 ipss->ipsec_system_policy.iph_gen; 4285 /* 4286 * There is no policy latching for ICMP sockets because we can't 4287 * decide on which policy to use until we see the packet and get 4288 * type/code selectors. 4289 */ 4290 if (connp->conn_proto == IPPROTO_ICMP || 4291 connp->conn_proto == IPPROTO_ICMPV6) { 4292 connp->conn_in_enforce_policy = 4293 connp->conn_out_enforce_policy = B_TRUE; 4294 if (connp->conn_latch != NULL) { 4295 IPLATCH_REFRELE(connp->conn_latch); 4296 connp->conn_latch = NULL; 4297 } 4298 if (connp->conn_latch_in_policy != NULL) { 4299 IPPOL_REFRELE(connp->conn_latch_in_policy); 4300 connp->conn_latch_in_policy = NULL; 4301 } 4302 if (connp->conn_latch_in_action != NULL) { 4303 IPACT_REFRELE(connp->conn_latch_in_action); 4304 connp->conn_latch_in_action = NULL; 4305 } 4306 if (connp->conn_ixa->ixa_ipsec_policy != NULL) { 4307 IPPOL_REFRELE(connp->conn_ixa->ixa_ipsec_policy); 4308 connp->conn_ixa->ixa_ipsec_policy = NULL; 4309 } 4310 if (connp->conn_ixa->ixa_ipsec_action != NULL) { 4311 IPACT_REFRELE(connp->conn_ixa->ixa_ipsec_action); 4312 connp->conn_ixa->ixa_ipsec_action = NULL; 4313 } 4314 connp->conn_ixa->ixa_flags &= ~IXAF_IPSEC_SECURE; 4315 return (0); 4316 } 4317 4318 global_policy_present = isv4 ? 4319 (ipss->ipsec_outbound_v4_policy_present || 4320 ipss->ipsec_inbound_v4_policy_present) : 4321 (ipss->ipsec_outbound_v6_policy_present || 4322 ipss->ipsec_inbound_v6_policy_present); 4323 4324 if ((connp->conn_policy != NULL) || global_policy_present) { 4325 ipsec_selector_t sel; 4326 ipsec_policy_t *p; 4327 4328 if (connp->conn_latch == NULL && 4329 (connp->conn_latch = iplatch_create()) == NULL) { 4330 return (ENOMEM); 4331 } 4332 4333 bzero((void*)&sel, sizeof (sel)); 4334 4335 sel.ips_protocol = connp->conn_proto; 4336 sel.ips_local_port = connp->conn_lport; 4337 sel.ips_remote_port = connp->conn_fport; 4338 sel.ips_is_icmp_inv_acq = 0; 4339 sel.ips_isv4 = isv4; 4340 if (isv4) { 4341 sel.ips_local_addr_v4 = connp->conn_laddr_v4; 4342 sel.ips_remote_addr_v4 = connp->conn_faddr_v4; 4343 } else { 4344 sel.ips_local_addr_v6 = connp->conn_laddr_v6; 4345 sel.ips_remote_addr_v6 = connp->conn_faddr_v6; 4346 } 4347 4348 p = ipsec_find_policy(IPSEC_TYPE_INBOUND, connp, &sel, ns); 4349 if (connp->conn_latch_in_policy != NULL) 4350 IPPOL_REFRELE(connp->conn_latch_in_policy); 4351 connp->conn_latch_in_policy = p; 4352 connp->conn_in_enforce_policy = (p != NULL); 4353 4354 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, &sel, ns); 4355 if (connp->conn_ixa->ixa_ipsec_policy != NULL) 4356 IPPOL_REFRELE(connp->conn_ixa->ixa_ipsec_policy); 4357 connp->conn_ixa->ixa_ipsec_policy = p; 4358 connp->conn_out_enforce_policy = (p != NULL); 4359 if (p != NULL) { 4360 connp->conn_ixa->ixa_flags |= IXAF_IPSEC_SECURE; 4361 if (connp->conn_policy == NULL) { 4362 connp->conn_ixa->ixa_flags |= 4363 IXAF_IPSEC_GLOBAL_POLICY; 4364 } 4365 } else { 4366 connp->conn_ixa->ixa_flags &= ~IXAF_IPSEC_SECURE; 4367 } 4368 /* Clear the latched actions too, in case we're recaching. */ 4369 if (connp->conn_ixa->ixa_ipsec_action != NULL) { 4370 IPACT_REFRELE(connp->conn_ixa->ixa_ipsec_action); 4371 connp->conn_ixa->ixa_ipsec_action = NULL; 4372 } 4373 if (connp->conn_latch_in_action != NULL) { 4374 IPACT_REFRELE(connp->conn_latch_in_action); 4375 connp->conn_latch_in_action = NULL; 4376 } 4377 connp->conn_ixa->ixa_ipsec_src_port = sel.ips_local_port; 4378 connp->conn_ixa->ixa_ipsec_dst_port = sel.ips_remote_port; 4379 connp->conn_ixa->ixa_ipsec_icmp_type = sel.ips_icmp_type; 4380 connp->conn_ixa->ixa_ipsec_icmp_code = sel.ips_icmp_code; 4381 connp->conn_ixa->ixa_ipsec_proto = sel.ips_protocol; 4382 } else { 4383 connp->conn_ixa->ixa_flags &= ~IXAF_IPSEC_SECURE; 4384 } 4385 4386 /* 4387 * We may or may not have policy for this endpoint. We still set 4388 * conn_policy_cached so that inbound datagrams don't have to look 4389 * at global policy as policy is considered latched for these 4390 * endpoints. We should not set conn_policy_cached until the conn 4391 * reflects the actual policy. If we *set* this before inheriting 4392 * the policy there is a window where the check 4393 * CONN_INBOUND_POLICY_PRESENT, will neither check with the policy 4394 * on the conn (because we have not yet copied the policy on to 4395 * conn and hence not set conn_in_enforce_policy) nor with the 4396 * global policy (because conn_policy_cached is already set). 4397 */ 4398 connp->conn_policy_cached = B_TRUE; 4399 return (0); 4400 } 4401 4402 /* 4403 * When appropriate, this function caches outbound policy for faddr/fport. 4404 * It is used when we are not connected i.e., when we can not latch the 4405 * policy. 4406 */ 4407 void 4408 ipsec_cache_outbound_policy(const conn_t *connp, const in6_addr_t *v6src, 4409 const in6_addr_t *v6dst, in_port_t dstport, ip_xmit_attr_t *ixa) 4410 { 4411 boolean_t isv4 = (ixa->ixa_flags & IXAF_IS_IPV4) != 0; 4412 boolean_t global_policy_present; 4413 netstack_t *ns = connp->conn_netstack; 4414 ipsec_stack_t *ipss = ns->netstack_ipsec; 4415 4416 ixa->ixa_ipsec_policy_gen = ipss->ipsec_system_policy.iph_gen; 4417 4418 /* 4419 * There is no policy caching for ICMP sockets because we can't 4420 * decide on which policy to use until we see the packet and get 4421 * type/code selectors. 4422 */ 4423 if (connp->conn_proto == IPPROTO_ICMP || 4424 connp->conn_proto == IPPROTO_ICMPV6) { 4425 ixa->ixa_flags &= ~IXAF_IPSEC_SECURE; 4426 if (ixa->ixa_ipsec_policy != NULL) { 4427 IPPOL_REFRELE(ixa->ixa_ipsec_policy); 4428 ixa->ixa_ipsec_policy = NULL; 4429 } 4430 if (ixa->ixa_ipsec_action != NULL) { 4431 IPACT_REFRELE(ixa->ixa_ipsec_action); 4432 ixa->ixa_ipsec_action = NULL; 4433 } 4434 return; 4435 } 4436 4437 global_policy_present = isv4 ? 4438 (ipss->ipsec_outbound_v4_policy_present || 4439 ipss->ipsec_inbound_v4_policy_present) : 4440 (ipss->ipsec_outbound_v6_policy_present || 4441 ipss->ipsec_inbound_v6_policy_present); 4442 4443 if ((connp->conn_policy != NULL) || global_policy_present) { 4444 ipsec_selector_t sel; 4445 ipsec_policy_t *p; 4446 4447 bzero((void*)&sel, sizeof (sel)); 4448 4449 sel.ips_protocol = connp->conn_proto; 4450 sel.ips_local_port = connp->conn_lport; 4451 sel.ips_remote_port = dstport; 4452 sel.ips_is_icmp_inv_acq = 0; 4453 sel.ips_isv4 = isv4; 4454 if (isv4) { 4455 IN6_V4MAPPED_TO_IPADDR(v6src, sel.ips_local_addr_v4); 4456 IN6_V4MAPPED_TO_IPADDR(v6dst, sel.ips_remote_addr_v4); 4457 } else { 4458 sel.ips_local_addr_v6 = *v6src; 4459 sel.ips_remote_addr_v6 = *v6dst; 4460 } 4461 4462 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, &sel, ns); 4463 if (ixa->ixa_ipsec_policy != NULL) 4464 IPPOL_REFRELE(ixa->ixa_ipsec_policy); 4465 ixa->ixa_ipsec_policy = p; 4466 if (p != NULL) { 4467 ixa->ixa_flags |= IXAF_IPSEC_SECURE; 4468 if (connp->conn_policy == NULL) 4469 ixa->ixa_flags |= IXAF_IPSEC_GLOBAL_POLICY; 4470 } else { 4471 ixa->ixa_flags &= ~IXAF_IPSEC_SECURE; 4472 } 4473 /* Clear the latched actions too, in case we're recaching. */ 4474 if (ixa->ixa_ipsec_action != NULL) { 4475 IPACT_REFRELE(ixa->ixa_ipsec_action); 4476 ixa->ixa_ipsec_action = NULL; 4477 } 4478 4479 ixa->ixa_ipsec_src_port = sel.ips_local_port; 4480 ixa->ixa_ipsec_dst_port = sel.ips_remote_port; 4481 ixa->ixa_ipsec_icmp_type = sel.ips_icmp_type; 4482 ixa->ixa_ipsec_icmp_code = sel.ips_icmp_code; 4483 ixa->ixa_ipsec_proto = sel.ips_protocol; 4484 } else { 4485 ixa->ixa_flags &= ~IXAF_IPSEC_SECURE; 4486 if (ixa->ixa_ipsec_policy != NULL) { 4487 IPPOL_REFRELE(ixa->ixa_ipsec_policy); 4488 ixa->ixa_ipsec_policy = NULL; 4489 } 4490 if (ixa->ixa_ipsec_action != NULL) { 4491 IPACT_REFRELE(ixa->ixa_ipsec_action); 4492 ixa->ixa_ipsec_action = NULL; 4493 } 4494 } 4495 } 4496 4497 /* 4498 * Returns B_FALSE if the policy has gone stale. 4499 */ 4500 boolean_t 4501 ipsec_outbound_policy_current(ip_xmit_attr_t *ixa) 4502 { 4503 ipsec_stack_t *ipss = ixa->ixa_ipst->ips_netstack->netstack_ipsec; 4504 4505 if (!(ixa->ixa_flags & IXAF_IPSEC_GLOBAL_POLICY)) 4506 return (B_TRUE); 4507 4508 return (ixa->ixa_ipsec_policy_gen == ipss->ipsec_system_policy.iph_gen); 4509 } 4510 4511 void 4512 iplatch_free(ipsec_latch_t *ipl) 4513 { 4514 if (ipl->ipl_local_cid != NULL) 4515 IPSID_REFRELE(ipl->ipl_local_cid); 4516 if (ipl->ipl_remote_cid != NULL) 4517 IPSID_REFRELE(ipl->ipl_remote_cid); 4518 mutex_destroy(&ipl->ipl_lock); 4519 kmem_free(ipl, sizeof (*ipl)); 4520 } 4521 4522 ipsec_latch_t * 4523 iplatch_create() 4524 { 4525 ipsec_latch_t *ipl = kmem_zalloc(sizeof (*ipl), KM_NOSLEEP); 4526 if (ipl == NULL) 4527 return (ipl); 4528 mutex_init(&ipl->ipl_lock, NULL, MUTEX_DEFAULT, NULL); 4529 ipl->ipl_refcnt = 1; 4530 return (ipl); 4531 } 4532 4533 /* 4534 * Hash function for ID hash table. 4535 */ 4536 static uint32_t 4537 ipsid_hash(int idtype, char *idstring) 4538 { 4539 uint32_t hval = idtype; 4540 unsigned char c; 4541 4542 while ((c = *idstring++) != 0) { 4543 hval = (hval << 4) | (hval >> 28); 4544 hval ^= c; 4545 } 4546 hval = hval ^ (hval >> 16); 4547 return (hval & (IPSID_HASHSIZE-1)); 4548 } 4549 4550 /* 4551 * Look up identity string in hash table. Return identity object 4552 * corresponding to the name -- either preexisting, or newly allocated. 4553 * 4554 * Return NULL if we need to allocate a new one and can't get memory. 4555 */ 4556 ipsid_t * 4557 ipsid_lookup(int idtype, char *idstring, netstack_t *ns) 4558 { 4559 ipsid_t *retval; 4560 char *nstr; 4561 int idlen = strlen(idstring) + 1; 4562 ipsec_stack_t *ipss = ns->netstack_ipsec; 4563 ipsif_t *bucket; 4564 4565 bucket = &ipss->ipsec_ipsid_buckets[ipsid_hash(idtype, idstring)]; 4566 4567 mutex_enter(&bucket->ipsif_lock); 4568 4569 for (retval = bucket->ipsif_head; retval != NULL; 4570 retval = retval->ipsid_next) { 4571 if (idtype != retval->ipsid_type) 4572 continue; 4573 if (bcmp(idstring, retval->ipsid_cid, idlen) != 0) 4574 continue; 4575 4576 IPSID_REFHOLD(retval); 4577 mutex_exit(&bucket->ipsif_lock); 4578 return (retval); 4579 } 4580 4581 retval = kmem_alloc(sizeof (*retval), KM_NOSLEEP); 4582 if (!retval) { 4583 mutex_exit(&bucket->ipsif_lock); 4584 return (NULL); 4585 } 4586 4587 nstr = kmem_alloc(idlen, KM_NOSLEEP); 4588 if (!nstr) { 4589 mutex_exit(&bucket->ipsif_lock); 4590 kmem_free(retval, sizeof (*retval)); 4591 return (NULL); 4592 } 4593 4594 retval->ipsid_refcnt = 1; 4595 retval->ipsid_next = bucket->ipsif_head; 4596 if (retval->ipsid_next != NULL) 4597 retval->ipsid_next->ipsid_ptpn = &retval->ipsid_next; 4598 retval->ipsid_ptpn = &bucket->ipsif_head; 4599 retval->ipsid_type = idtype; 4600 retval->ipsid_cid = nstr; 4601 bucket->ipsif_head = retval; 4602 bcopy(idstring, nstr, idlen); 4603 mutex_exit(&bucket->ipsif_lock); 4604 4605 return (retval); 4606 } 4607 4608 /* 4609 * Garbage collect the identity hash table. 4610 */ 4611 void 4612 ipsid_gc(netstack_t *ns) 4613 { 4614 int i, len; 4615 ipsid_t *id, *nid; 4616 ipsif_t *bucket; 4617 ipsec_stack_t *ipss = ns->netstack_ipsec; 4618 4619 for (i = 0; i < IPSID_HASHSIZE; i++) { 4620 bucket = &ipss->ipsec_ipsid_buckets[i]; 4621 mutex_enter(&bucket->ipsif_lock); 4622 for (id = bucket->ipsif_head; id != NULL; id = nid) { 4623 nid = id->ipsid_next; 4624 if (id->ipsid_refcnt == 0) { 4625 *id->ipsid_ptpn = nid; 4626 if (nid != NULL) 4627 nid->ipsid_ptpn = id->ipsid_ptpn; 4628 len = strlen(id->ipsid_cid) + 1; 4629 kmem_free(id->ipsid_cid, len); 4630 kmem_free(id, sizeof (*id)); 4631 } 4632 } 4633 mutex_exit(&bucket->ipsif_lock); 4634 } 4635 } 4636 4637 /* 4638 * Return true if two identities are the same. 4639 */ 4640 boolean_t 4641 ipsid_equal(ipsid_t *id1, ipsid_t *id2) 4642 { 4643 if (id1 == id2) 4644 return (B_TRUE); 4645 #ifdef DEBUG 4646 if ((id1 == NULL) || (id2 == NULL)) 4647 return (B_FALSE); 4648 /* 4649 * test that we're interning id's correctly.. 4650 */ 4651 ASSERT((strcmp(id1->ipsid_cid, id2->ipsid_cid) != 0) || 4652 (id1->ipsid_type != id2->ipsid_type)); 4653 #endif 4654 return (B_FALSE); 4655 } 4656 4657 /* 4658 * Initialize identity table; called during module initialization. 4659 */ 4660 static void 4661 ipsid_init(netstack_t *ns) 4662 { 4663 ipsif_t *bucket; 4664 int i; 4665 ipsec_stack_t *ipss = ns->netstack_ipsec; 4666 4667 for (i = 0; i < IPSID_HASHSIZE; i++) { 4668 bucket = &ipss->ipsec_ipsid_buckets[i]; 4669 mutex_init(&bucket->ipsif_lock, NULL, MUTEX_DEFAULT, NULL); 4670 } 4671 } 4672 4673 /* 4674 * Free identity table (preparatory to module unload) 4675 */ 4676 static void 4677 ipsid_fini(netstack_t *ns) 4678 { 4679 ipsif_t *bucket; 4680 int i; 4681 ipsec_stack_t *ipss = ns->netstack_ipsec; 4682 4683 for (i = 0; i < IPSID_HASHSIZE; i++) { 4684 bucket = &ipss->ipsec_ipsid_buckets[i]; 4685 ASSERT(bucket->ipsif_head == NULL); 4686 mutex_destroy(&bucket->ipsif_lock); 4687 } 4688 } 4689 4690 /* 4691 * Update the minimum and maximum supported key sizes for the 4692 * specified algorithm. Must be called while holding the algorithms lock. 4693 */ 4694 void 4695 ipsec_alg_fix_min_max(ipsec_alginfo_t *alg, ipsec_algtype_t alg_type, 4696 netstack_t *ns) 4697 { 4698 size_t crypto_min = (size_t)-1, crypto_max = 0; 4699 size_t cur_crypto_min, cur_crypto_max; 4700 boolean_t is_valid; 4701 crypto_mechanism_info_t *mech_infos; 4702 uint_t nmech_infos; 4703 int crypto_rc, i; 4704 crypto_mech_usage_t mask; 4705 ipsec_stack_t *ipss = ns->netstack_ipsec; 4706 4707 ASSERT(RW_WRITE_HELD(&ipss->ipsec_alg_lock)); 4708 4709 /* 4710 * Compute the min, max, and default key sizes (in number of 4711 * increments to the default key size in bits) as defined 4712 * by the algorithm mappings. This range of key sizes is used 4713 * for policy related operations. The effective key sizes 4714 * supported by the framework could be more limited than 4715 * those defined for an algorithm. 4716 */ 4717 alg->alg_default_bits = alg->alg_key_sizes[0]; 4718 alg->alg_default = 0; 4719 if (alg->alg_increment != 0) { 4720 /* key sizes are defined by range & increment */ 4721 alg->alg_minbits = alg->alg_key_sizes[1]; 4722 alg->alg_maxbits = alg->alg_key_sizes[2]; 4723 } else if (alg->alg_nkey_sizes == 0) { 4724 /* no specified key size for algorithm */ 4725 alg->alg_minbits = alg->alg_maxbits = 0; 4726 } else { 4727 /* key sizes are defined by enumeration */ 4728 alg->alg_minbits = (uint16_t)-1; 4729 alg->alg_maxbits = 0; 4730 4731 for (i = 0; i < alg->alg_nkey_sizes; i++) { 4732 if (alg->alg_key_sizes[i] < alg->alg_minbits) 4733 alg->alg_minbits = alg->alg_key_sizes[i]; 4734 if (alg->alg_key_sizes[i] > alg->alg_maxbits) 4735 alg->alg_maxbits = alg->alg_key_sizes[i]; 4736 } 4737 } 4738 4739 if (!(alg->alg_flags & ALG_FLAG_VALID)) 4740 return; 4741 4742 /* 4743 * Mechanisms do not apply to the NULL encryption 4744 * algorithm, so simply return for this case. 4745 */ 4746 if (alg->alg_id == SADB_EALG_NULL) 4747 return; 4748 4749 /* 4750 * Find the min and max key sizes supported by the cryptographic 4751 * framework providers. 4752 */ 4753 4754 /* get the key sizes supported by the framework */ 4755 crypto_rc = crypto_get_all_mech_info(alg->alg_mech_type, 4756 &mech_infos, &nmech_infos, KM_SLEEP); 4757 if (crypto_rc != CRYPTO_SUCCESS || nmech_infos == 0) { 4758 alg->alg_flags &= ~ALG_FLAG_VALID; 4759 return; 4760 } 4761 4762 /* min and max key sizes supported by framework */ 4763 for (i = 0, is_valid = B_FALSE; i < nmech_infos; i++) { 4764 int unit_bits; 4765 4766 /* 4767 * Ignore entries that do not support the operations 4768 * needed for the algorithm type. 4769 */ 4770 if (alg_type == IPSEC_ALG_AUTH) { 4771 mask = CRYPTO_MECH_USAGE_MAC; 4772 } else { 4773 mask = CRYPTO_MECH_USAGE_ENCRYPT | 4774 CRYPTO_MECH_USAGE_DECRYPT; 4775 } 4776 if ((mech_infos[i].mi_usage & mask) != mask) 4777 continue; 4778 4779 unit_bits = (mech_infos[i].mi_keysize_unit == 4780 CRYPTO_KEYSIZE_UNIT_IN_BYTES) ? 8 : 1; 4781 /* adjust min/max supported by framework */ 4782 cur_crypto_min = mech_infos[i].mi_min_key_size * unit_bits; 4783 cur_crypto_max = mech_infos[i].mi_max_key_size * unit_bits; 4784 4785 if (cur_crypto_min < crypto_min) 4786 crypto_min = cur_crypto_min; 4787 4788 /* 4789 * CRYPTO_EFFECTIVELY_INFINITE is a special value of 4790 * the crypto framework which means "no upper limit". 4791 */ 4792 if (mech_infos[i].mi_max_key_size == 4793 CRYPTO_EFFECTIVELY_INFINITE) { 4794 crypto_max = (size_t)-1; 4795 } else if (cur_crypto_max > crypto_max) { 4796 crypto_max = cur_crypto_max; 4797 } 4798 4799 is_valid = B_TRUE; 4800 } 4801 4802 kmem_free(mech_infos, sizeof (crypto_mechanism_info_t) * 4803 nmech_infos); 4804 4805 if (!is_valid) { 4806 /* no key sizes supported by framework */ 4807 alg->alg_flags &= ~ALG_FLAG_VALID; 4808 return; 4809 } 4810 4811 /* 4812 * Determine min and max key sizes from alg_key_sizes[]. 4813 * defined for the algorithm entry. Adjust key sizes based on 4814 * those supported by the framework. 4815 */ 4816 alg->alg_ef_default_bits = alg->alg_key_sizes[0]; 4817 4818 /* 4819 * For backwards compatability, assume that the IV length 4820 * is the same as the data length. 4821 */ 4822 alg->alg_ivlen = alg->alg_datalen; 4823 4824 /* 4825 * Copy any algorithm parameters (if provided) into dedicated 4826 * elements in the ipsec_alginfo_t structure. 4827 * There may be a better place to put this code. 4828 */ 4829 for (i = 0; i < alg->alg_nparams; i++) { 4830 switch (i) { 4831 case 0: 4832 /* Initialisation Vector length (bytes) */ 4833 alg->alg_ivlen = alg->alg_params[0]; 4834 break; 4835 case 1: 4836 /* Integrity Check Vector length (bytes) */ 4837 alg->alg_icvlen = alg->alg_params[1]; 4838 break; 4839 case 2: 4840 /* Salt length (bytes) */ 4841 alg->alg_saltlen = (uint8_t)alg->alg_params[2]; 4842 break; 4843 default: 4844 break; 4845 } 4846 } 4847 4848 /* Default if the IV length is not specified. */ 4849 if (alg_type == IPSEC_ALG_ENCR && alg->alg_ivlen == 0) 4850 alg->alg_ivlen = alg->alg_datalen; 4851 4852 alg_flag_check(alg); 4853 4854 if (alg->alg_increment != 0) { 4855 /* supported key sizes are defined by range & increment */ 4856 crypto_min = ALGBITS_ROUND_UP(crypto_min, alg->alg_increment); 4857 crypto_max = ALGBITS_ROUND_DOWN(crypto_max, alg->alg_increment); 4858 4859 alg->alg_ef_minbits = MAX(alg->alg_minbits, 4860 (uint16_t)crypto_min); 4861 alg->alg_ef_maxbits = MIN(alg->alg_maxbits, 4862 (uint16_t)crypto_max); 4863 4864 /* 4865 * If the sizes supported by the framework are outside 4866 * the range of sizes defined by the algorithm mappings, 4867 * the algorithm cannot be used. Check for this 4868 * condition here. 4869 */ 4870 if (alg->alg_ef_minbits > alg->alg_ef_maxbits) { 4871 alg->alg_flags &= ~ALG_FLAG_VALID; 4872 return; 4873 } 4874 if (alg->alg_ef_default_bits < alg->alg_ef_minbits) 4875 alg->alg_ef_default_bits = alg->alg_ef_minbits; 4876 if (alg->alg_ef_default_bits > alg->alg_ef_maxbits) 4877 alg->alg_ef_default_bits = alg->alg_ef_maxbits; 4878 } else if (alg->alg_nkey_sizes == 0) { 4879 /* no specified key size for algorithm */ 4880 alg->alg_ef_minbits = alg->alg_ef_maxbits = 0; 4881 } else { 4882 /* supported key sizes are defined by enumeration */ 4883 alg->alg_ef_minbits = (uint16_t)-1; 4884 alg->alg_ef_maxbits = 0; 4885 4886 for (i = 0, is_valid = B_FALSE; i < alg->alg_nkey_sizes; i++) { 4887 /* 4888 * Ignore the current key size if it is not in the 4889 * range of sizes supported by the framework. 4890 */ 4891 if (alg->alg_key_sizes[i] < crypto_min || 4892 alg->alg_key_sizes[i] > crypto_max) 4893 continue; 4894 if (alg->alg_key_sizes[i] < alg->alg_ef_minbits) 4895 alg->alg_ef_minbits = alg->alg_key_sizes[i]; 4896 if (alg->alg_key_sizes[i] > alg->alg_ef_maxbits) 4897 alg->alg_ef_maxbits = alg->alg_key_sizes[i]; 4898 is_valid = B_TRUE; 4899 } 4900 4901 if (!is_valid) { 4902 alg->alg_flags &= ~ALG_FLAG_VALID; 4903 return; 4904 } 4905 alg->alg_ef_default = 0; 4906 } 4907 } 4908 4909 /* 4910 * Sanity check parameters provided by ipsecalgs(1m). Assume that 4911 * the algoritm is marked as valid, there is a check at the top 4912 * of this function. If any of the checks below fail, the algorithm 4913 * entry is invalid. 4914 */ 4915 void 4916 alg_flag_check(ipsec_alginfo_t *alg) 4917 { 4918 alg->alg_flags &= ~ALG_FLAG_VALID; 4919 4920 /* 4921 * Can't have the algorithm marked as CCM and GCM. 4922 * Check the ALG_FLAG_COMBINED and ALG_FLAG_COUNTERMODE 4923 * flags are set for CCM & GCM. 4924 */ 4925 if ((alg->alg_flags & (ALG_FLAG_CCM|ALG_FLAG_GCM)) == 4926 (ALG_FLAG_CCM|ALG_FLAG_GCM)) 4927 return; 4928 if (alg->alg_flags & (ALG_FLAG_CCM|ALG_FLAG_GCM)) { 4929 if (!(alg->alg_flags & ALG_FLAG_COUNTERMODE)) 4930 return; 4931 if (!(alg->alg_flags & ALG_FLAG_COMBINED)) 4932 return; 4933 } 4934 4935 /* 4936 * For ALG_FLAG_COUNTERMODE, check the parameters 4937 * fit in the ipsec_nonce_t structure. 4938 */ 4939 if (alg->alg_flags & ALG_FLAG_COUNTERMODE) { 4940 if (alg->alg_ivlen != sizeof (((ipsec_nonce_t *)NULL)->iv)) 4941 return; 4942 if (alg->alg_saltlen > sizeof (((ipsec_nonce_t *)NULL)->salt)) 4943 return; 4944 } 4945 if ((alg->alg_flags & ALG_FLAG_COMBINED) && 4946 (alg->alg_icvlen == 0)) 4947 return; 4948 4949 /* all is well. */ 4950 alg->alg_flags |= ALG_FLAG_VALID; 4951 } 4952 4953 /* 4954 * Free the memory used by the specified algorithm. 4955 */ 4956 void 4957 ipsec_alg_free(ipsec_alginfo_t *alg) 4958 { 4959 if (alg == NULL) 4960 return; 4961 4962 if (alg->alg_key_sizes != NULL) { 4963 kmem_free(alg->alg_key_sizes, 4964 (alg->alg_nkey_sizes + 1) * sizeof (uint16_t)); 4965 alg->alg_key_sizes = NULL; 4966 } 4967 if (alg->alg_block_sizes != NULL) { 4968 kmem_free(alg->alg_block_sizes, 4969 (alg->alg_nblock_sizes + 1) * sizeof (uint16_t)); 4970 alg->alg_block_sizes = NULL; 4971 } 4972 if (alg->alg_params != NULL) { 4973 kmem_free(alg->alg_params, 4974 (alg->alg_nparams + 1) * sizeof (uint16_t)); 4975 alg->alg_params = NULL; 4976 } 4977 kmem_free(alg, sizeof (*alg)); 4978 } 4979 4980 /* 4981 * Check the validity of the specified key size for an algorithm. 4982 * Returns B_TRUE if key size is valid, B_FALSE otherwise. 4983 */ 4984 boolean_t 4985 ipsec_valid_key_size(uint16_t key_size, ipsec_alginfo_t *alg) 4986 { 4987 if (key_size < alg->alg_ef_minbits || key_size > alg->alg_ef_maxbits) 4988 return (B_FALSE); 4989 4990 if (alg->alg_increment == 0 && alg->alg_nkey_sizes != 0) { 4991 /* 4992 * If the key sizes are defined by enumeration, the new 4993 * key size must be equal to one of the supported values. 4994 */ 4995 int i; 4996 4997 for (i = 0; i < alg->alg_nkey_sizes; i++) 4998 if (key_size == alg->alg_key_sizes[i]) 4999 break; 5000 if (i == alg->alg_nkey_sizes) 5001 return (B_FALSE); 5002 } 5003 5004 return (B_TRUE); 5005 } 5006 5007 /* 5008 * Callback function invoked by the crypto framework when a provider 5009 * registers or unregisters. This callback updates the algorithms 5010 * tables when a crypto algorithm is no longer available or becomes 5011 * available, and triggers the freeing/creation of context templates 5012 * associated with existing SAs, if needed. 5013 * 5014 * Need to walk all stack instances since the callback is global 5015 * for all instances 5016 */ 5017 void 5018 ipsec_prov_update_callback(uint32_t event, void *event_arg) 5019 { 5020 netstack_handle_t nh; 5021 netstack_t *ns; 5022 5023 netstack_next_init(&nh); 5024 while ((ns = netstack_next(&nh)) != NULL) { 5025 ipsec_prov_update_callback_stack(event, event_arg, ns); 5026 netstack_rele(ns); 5027 } 5028 netstack_next_fini(&nh); 5029 } 5030 5031 static void 5032 ipsec_prov_update_callback_stack(uint32_t event, void *event_arg, 5033 netstack_t *ns) 5034 { 5035 crypto_notify_event_change_t *prov_change = 5036 (crypto_notify_event_change_t *)event_arg; 5037 uint_t algidx, algid, algtype, mech_count, mech_idx; 5038 ipsec_alginfo_t *alg; 5039 ipsec_alginfo_t oalg; 5040 crypto_mech_name_t *mechs; 5041 boolean_t alg_changed = B_FALSE; 5042 ipsec_stack_t *ipss = ns->netstack_ipsec; 5043 5044 /* ignore events for which we didn't register */ 5045 if (event != CRYPTO_EVENT_MECHS_CHANGED) { 5046 ip1dbg(("ipsec_prov_update_callback: unexpected event 0x%x " 5047 " received from crypto framework\n", event)); 5048 return; 5049 } 5050 5051 mechs = crypto_get_mech_list(&mech_count, KM_SLEEP); 5052 if (mechs == NULL) 5053 return; 5054 5055 /* 5056 * Walk the list of currently defined IPsec algorithm. Update 5057 * the algorithm valid flag and trigger an update of the 5058 * SAs that depend on that algorithm. 5059 */ 5060 rw_enter(&ipss->ipsec_alg_lock, RW_WRITER); 5061 for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype++) { 5062 for (algidx = 0; algidx < ipss->ipsec_nalgs[algtype]; 5063 algidx++) { 5064 5065 algid = ipss->ipsec_sortlist[algtype][algidx]; 5066 alg = ipss->ipsec_alglists[algtype][algid]; 5067 ASSERT(alg != NULL); 5068 5069 /* 5070 * Skip the algorithms which do not map to the 5071 * crypto framework provider being added or removed. 5072 */ 5073 if (strncmp(alg->alg_mech_name, 5074 prov_change->ec_mech_name, 5075 CRYPTO_MAX_MECH_NAME) != 0) 5076 continue; 5077 5078 /* 5079 * Determine if the mechanism is valid. If it 5080 * is not, mark the algorithm as being invalid. If 5081 * it is, mark the algorithm as being valid. 5082 */ 5083 for (mech_idx = 0; mech_idx < mech_count; mech_idx++) 5084 if (strncmp(alg->alg_mech_name, 5085 mechs[mech_idx], CRYPTO_MAX_MECH_NAME) == 0) 5086 break; 5087 if (mech_idx == mech_count && 5088 alg->alg_flags & ALG_FLAG_VALID) { 5089 alg->alg_flags &= ~ALG_FLAG_VALID; 5090 alg_changed = B_TRUE; 5091 } else if (mech_idx < mech_count && 5092 !(alg->alg_flags & ALG_FLAG_VALID)) { 5093 alg->alg_flags |= ALG_FLAG_VALID; 5094 alg_changed = B_TRUE; 5095 } 5096 5097 /* 5098 * Update the supported key sizes, regardless 5099 * of whether a crypto provider was added or 5100 * removed. 5101 */ 5102 oalg = *alg; 5103 ipsec_alg_fix_min_max(alg, algtype, ns); 5104 if (!alg_changed && 5105 alg->alg_ef_minbits != oalg.alg_ef_minbits || 5106 alg->alg_ef_maxbits != oalg.alg_ef_maxbits || 5107 alg->alg_ef_default != oalg.alg_ef_default || 5108 alg->alg_ef_default_bits != 5109 oalg.alg_ef_default_bits) 5110 alg_changed = B_TRUE; 5111 5112 /* 5113 * Update the affected SAs if a software provider is 5114 * being added or removed. 5115 */ 5116 if (prov_change->ec_provider_type == 5117 CRYPTO_SW_PROVIDER) 5118 sadb_alg_update(algtype, alg->alg_id, 5119 prov_change->ec_change == 5120 CRYPTO_MECH_ADDED, ns); 5121 } 5122 } 5123 rw_exit(&ipss->ipsec_alg_lock); 5124 crypto_free_mech_list(mechs, mech_count); 5125 5126 if (alg_changed) { 5127 /* 5128 * An algorithm has changed, i.e. it became valid or 5129 * invalid, or its support key sizes have changed. 5130 * Notify ipsecah and ipsecesp of this change so 5131 * that they can send a SADB_REGISTER to their consumers. 5132 */ 5133 ipsecah_algs_changed(ns); 5134 ipsecesp_algs_changed(ns); 5135 } 5136 } 5137 5138 /* 5139 * Registers with the crypto framework to be notified of crypto 5140 * providers changes. Used to update the algorithm tables and 5141 * to free or create context templates if needed. Invoked after IPsec 5142 * is loaded successfully. 5143 * 5144 * This is called separately for each IP instance, so we ensure we only 5145 * register once. 5146 */ 5147 void 5148 ipsec_register_prov_update(void) 5149 { 5150 if (prov_update_handle != NULL) 5151 return; 5152 5153 prov_update_handle = crypto_notify_events( 5154 ipsec_prov_update_callback, CRYPTO_EVENT_MECHS_CHANGED); 5155 } 5156 5157 /* 5158 * Unregisters from the framework to be notified of crypto providers 5159 * changes. Called from ipsec_policy_g_destroy(). 5160 */ 5161 static void 5162 ipsec_unregister_prov_update(void) 5163 { 5164 if (prov_update_handle != NULL) 5165 crypto_unnotify_events(prov_update_handle); 5166 } 5167 5168 /* 5169 * Tunnel-mode support routines. 5170 */ 5171 5172 /* 5173 * Returns an mblk chain suitable for putnext() if policies match and IPsec 5174 * SAs are available. If there's no per-tunnel policy, or a match comes back 5175 * with no match, then still return the packet and have global policy take 5176 * a crack at it in IP. 5177 * This updates the ip_xmit_attr with the IPsec policy. 5178 * 5179 * Remember -> we can be forwarding packets. Keep that in mind w.r.t. 5180 * inner-packet contents. 5181 */ 5182 mblk_t * 5183 ipsec_tun_outbound(mblk_t *mp, iptun_t *iptun, ipha_t *inner_ipv4, 5184 ip6_t *inner_ipv6, ipha_t *outer_ipv4, ip6_t *outer_ipv6, int outer_hdr_len, 5185 ip_xmit_attr_t *ixa) 5186 { 5187 ipsec_policy_head_t *polhead; 5188 ipsec_selector_t sel; 5189 mblk_t *nmp; 5190 boolean_t is_fragment; 5191 ipsec_policy_t *pol; 5192 ipsec_tun_pol_t *itp = iptun->iptun_itp; 5193 netstack_t *ns = iptun->iptun_ns; 5194 ipsec_stack_t *ipss = ns->netstack_ipsec; 5195 5196 ASSERT(outer_ipv6 != NULL && outer_ipv4 == NULL || 5197 outer_ipv4 != NULL && outer_ipv6 == NULL); 5198 /* We take care of inners in a bit. */ 5199 5200 /* Are the IPsec fields initialized at all? */ 5201 if (!(ixa->ixa_flags & IXAF_IPSEC_SECURE)) { 5202 ASSERT(ixa->ixa_ipsec_policy == NULL); 5203 ASSERT(ixa->ixa_ipsec_latch == NULL); 5204 ASSERT(ixa->ixa_ipsec_action == NULL); 5205 ASSERT(ixa->ixa_ipsec_ah_sa == NULL); 5206 ASSERT(ixa->ixa_ipsec_esp_sa == NULL); 5207 } 5208 5209 ASSERT(itp != NULL && (itp->itp_flags & ITPF_P_ACTIVE)); 5210 polhead = itp->itp_policy; 5211 5212 bzero(&sel, sizeof (sel)); 5213 if (inner_ipv4 != NULL) { 5214 ASSERT(inner_ipv6 == NULL); 5215 sel.ips_isv4 = B_TRUE; 5216 sel.ips_local_addr_v4 = inner_ipv4->ipha_src; 5217 sel.ips_remote_addr_v4 = inner_ipv4->ipha_dst; 5218 sel.ips_protocol = (uint8_t)inner_ipv4->ipha_protocol; 5219 } else { 5220 ASSERT(inner_ipv6 != NULL); 5221 sel.ips_isv4 = B_FALSE; 5222 sel.ips_local_addr_v6 = inner_ipv6->ip6_src; 5223 /* 5224 * We don't care about routing-header dests in the 5225 * forwarding/tunnel path, so just grab ip6_dst. 5226 */ 5227 sel.ips_remote_addr_v6 = inner_ipv6->ip6_dst; 5228 } 5229 5230 if (itp->itp_flags & ITPF_P_PER_PORT_SECURITY) { 5231 /* 5232 * Caller can prepend the outer header, which means 5233 * inner_ipv[46] may be stuck in the middle. Pullup the whole 5234 * mess now if need-be, for easier processing later. Don't 5235 * forget to rewire the outer header too. 5236 */ 5237 if (mp->b_cont != NULL) { 5238 nmp = msgpullup(mp, -1); 5239 if (nmp == NULL) { 5240 ip_drop_packet(mp, B_FALSE, NULL, 5241 DROPPER(ipss, ipds_spd_nomem), 5242 &ipss->ipsec_spd_dropper); 5243 return (NULL); 5244 } 5245 freemsg(mp); 5246 mp = nmp; 5247 if (outer_ipv4 != NULL) 5248 outer_ipv4 = (ipha_t *)mp->b_rptr; 5249 else 5250 outer_ipv6 = (ip6_t *)mp->b_rptr; 5251 if (inner_ipv4 != NULL) { 5252 inner_ipv4 = 5253 (ipha_t *)(mp->b_rptr + outer_hdr_len); 5254 } else { 5255 inner_ipv6 = 5256 (ip6_t *)(mp->b_rptr + outer_hdr_len); 5257 } 5258 } 5259 if (inner_ipv4 != NULL) { 5260 is_fragment = IS_V4_FRAGMENT( 5261 inner_ipv4->ipha_fragment_offset_and_flags); 5262 } else { 5263 sel.ips_remote_addr_v6 = ip_get_dst_v6(inner_ipv6, mp, 5264 &is_fragment); 5265 } 5266 5267 if (is_fragment) { 5268 ipha_t *oiph; 5269 ipha_t *iph = NULL; 5270 ip6_t *ip6h = NULL; 5271 int hdr_len; 5272 uint16_t ip6_hdr_length; 5273 uint8_t v6_proto; 5274 uint8_t *v6_proto_p; 5275 5276 /* 5277 * We have a fragment we need to track! 5278 */ 5279 mp = ipsec_fragcache_add(&itp->itp_fragcache, NULL, mp, 5280 outer_hdr_len, ipss); 5281 if (mp == NULL) 5282 return (NULL); 5283 ASSERT(mp->b_cont == NULL); 5284 5285 /* 5286 * If we get here, we have a full fragment chain 5287 */ 5288 5289 oiph = (ipha_t *)mp->b_rptr; 5290 if (IPH_HDR_VERSION(oiph) == IPV4_VERSION) { 5291 hdr_len = ((outer_hdr_len != 0) ? 5292 IPH_HDR_LENGTH(oiph) : 0); 5293 iph = (ipha_t *)(mp->b_rptr + hdr_len); 5294 } else { 5295 ASSERT(IPH_HDR_VERSION(oiph) == IPV6_VERSION); 5296 ip6h = (ip6_t *)mp->b_rptr; 5297 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 5298 &ip6_hdr_length, &v6_proto_p)) { 5299 ip_drop_packet_chain(mp, B_FALSE, NULL, 5300 DROPPER(ipss, 5301 ipds_spd_malformed_packet), 5302 &ipss->ipsec_spd_dropper); 5303 return (NULL); 5304 } 5305 hdr_len = ip6_hdr_length; 5306 } 5307 outer_hdr_len = hdr_len; 5308 5309 if (sel.ips_isv4) { 5310 if (iph == NULL) { 5311 /* Was v6 outer */ 5312 iph = (ipha_t *)(mp->b_rptr + hdr_len); 5313 } 5314 inner_ipv4 = iph; 5315 sel.ips_local_addr_v4 = inner_ipv4->ipha_src; 5316 sel.ips_remote_addr_v4 = inner_ipv4->ipha_dst; 5317 sel.ips_protocol = 5318 (uint8_t)inner_ipv4->ipha_protocol; 5319 } else { 5320 inner_ipv6 = (ip6_t *)(mp->b_rptr + 5321 hdr_len); 5322 sel.ips_local_addr_v6 = inner_ipv6->ip6_src; 5323 sel.ips_remote_addr_v6 = inner_ipv6->ip6_dst; 5324 if (!ip_hdr_length_nexthdr_v6(mp, 5325 inner_ipv6, &ip6_hdr_length, &v6_proto_p)) { 5326 ip_drop_packet_chain(mp, B_FALSE, NULL, 5327 DROPPER(ipss, 5328 ipds_spd_malformed_frag), 5329 &ipss->ipsec_spd_dropper); 5330 return (NULL); 5331 } 5332 v6_proto = *v6_proto_p; 5333 sel.ips_protocol = v6_proto; 5334 #ifdef FRAGCACHE_DEBUG 5335 cmn_err(CE_WARN, "v6_sel.ips_protocol = %d\n", 5336 sel.ips_protocol); 5337 #endif 5338 } 5339 /* Ports are extracted below */ 5340 } 5341 5342 /* Get ports... */ 5343 if (!ipsec_init_outbound_ports(&sel, mp, 5344 inner_ipv4, inner_ipv6, outer_hdr_len, ipss)) { 5345 /* callee did ip_drop_packet_chain() on mp. */ 5346 return (NULL); 5347 } 5348 #ifdef FRAGCACHE_DEBUG 5349 if (inner_ipv4 != NULL) 5350 cmn_err(CE_WARN, 5351 "(v4) sel.ips_protocol = %d, " 5352 "sel.ips_local_port = %d, " 5353 "sel.ips_remote_port = %d\n", 5354 sel.ips_protocol, ntohs(sel.ips_local_port), 5355 ntohs(sel.ips_remote_port)); 5356 if (inner_ipv6 != NULL) 5357 cmn_err(CE_WARN, 5358 "(v6) sel.ips_protocol = %d, " 5359 "sel.ips_local_port = %d, " 5360 "sel.ips_remote_port = %d\n", 5361 sel.ips_protocol, ntohs(sel.ips_local_port), 5362 ntohs(sel.ips_remote_port)); 5363 #endif 5364 /* Success so far! */ 5365 } 5366 rw_enter(&polhead->iph_lock, RW_READER); 5367 pol = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_OUTBOUND, &sel); 5368 rw_exit(&polhead->iph_lock); 5369 if (pol == NULL) { 5370 /* 5371 * No matching policy on this tunnel, drop the packet. 5372 * 5373 * NOTE: Tunnel-mode tunnels are different from the 5374 * IP global transport mode policy head. For a tunnel-mode 5375 * tunnel, we drop the packet in lieu of passing it 5376 * along accepted the way a global-policy miss would. 5377 * 5378 * NOTE2: "negotiate transport" tunnels should match ALL 5379 * inbound packets, but we do not uncomment the ASSERT() 5380 * below because if/when we open PF_POLICY, a user can 5381 * shoot themself in the foot with a 0 priority. 5382 */ 5383 5384 /* ASSERT(itp->itp_flags & ITPF_P_TUNNEL); */ 5385 #ifdef FRAGCACHE_DEBUG 5386 cmn_err(CE_WARN, "ipsec_tun_outbound(): No matching tunnel " 5387 "per-port policy\n"); 5388 #endif 5389 ip_drop_packet_chain(mp, B_FALSE, NULL, 5390 DROPPER(ipss, ipds_spd_explicit), 5391 &ipss->ipsec_spd_dropper); 5392 return (NULL); 5393 } 5394 5395 #ifdef FRAGCACHE_DEBUG 5396 cmn_err(CE_WARN, "Having matching tunnel per-port policy\n"); 5397 #endif 5398 5399 /* 5400 * NOTE: ixa_cleanup() function will release pol references. 5401 */ 5402 ixa->ixa_ipsec_policy = pol; 5403 /* 5404 * NOTE: There is a subtle difference between iptun_zoneid and 5405 * iptun_connp->conn_zoneid explained in iptun_conn_create(). When 5406 * interacting with the ip module, we must use conn_zoneid. 5407 */ 5408 ixa->ixa_zoneid = iptun->iptun_connp->conn_zoneid; 5409 5410 ASSERT((outer_ipv4 != NULL) ? (ixa->ixa_flags & IXAF_IS_IPV4) : 5411 !(ixa->ixa_flags & IXAF_IS_IPV4)); 5412 ASSERT(ixa->ixa_ipsec_policy != NULL); 5413 ixa->ixa_flags |= IXAF_IPSEC_SECURE; 5414 5415 if (!(itp->itp_flags & ITPF_P_TUNNEL)) { 5416 /* Set up transport mode for tunnelled packets. */ 5417 ixa->ixa_ipsec_proto = (inner_ipv4 != NULL) ? IPPROTO_ENCAP : 5418 IPPROTO_IPV6; 5419 return (mp); 5420 } 5421 5422 /* Fill in tunnel-mode goodies here. */ 5423 ixa->ixa_flags |= IXAF_IPSEC_TUNNEL; 5424 /* XXX Do I need to fill in all of the goodies here? */ 5425 if (inner_ipv4) { 5426 ixa->ixa_ipsec_inaf = AF_INET; 5427 ixa->ixa_ipsec_insrc[0] = 5428 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v4; 5429 ixa->ixa_ipsec_indst[0] = 5430 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v4; 5431 } else { 5432 ixa->ixa_ipsec_inaf = AF_INET6; 5433 ixa->ixa_ipsec_insrc[0] = 5434 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[0]; 5435 ixa->ixa_ipsec_insrc[1] = 5436 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[1]; 5437 ixa->ixa_ipsec_insrc[2] = 5438 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[2]; 5439 ixa->ixa_ipsec_insrc[3] = 5440 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[3]; 5441 ixa->ixa_ipsec_indst[0] = 5442 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[0]; 5443 ixa->ixa_ipsec_indst[1] = 5444 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[1]; 5445 ixa->ixa_ipsec_indst[2] = 5446 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[2]; 5447 ixa->ixa_ipsec_indst[3] = 5448 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[3]; 5449 } 5450 ixa->ixa_ipsec_insrcpfx = pol->ipsp_sel->ipsl_key.ipsl_local_pfxlen; 5451 ixa->ixa_ipsec_indstpfx = pol->ipsp_sel->ipsl_key.ipsl_remote_pfxlen; 5452 /* NOTE: These are used for transport mode too. */ 5453 ixa->ixa_ipsec_src_port = pol->ipsp_sel->ipsl_key.ipsl_lport; 5454 ixa->ixa_ipsec_dst_port = pol->ipsp_sel->ipsl_key.ipsl_rport; 5455 ixa->ixa_ipsec_proto = pol->ipsp_sel->ipsl_key.ipsl_proto; 5456 5457 return (mp); 5458 } 5459 5460 /* 5461 * NOTE: The following releases pol's reference and 5462 * calls ip_drop_packet() for me on NULL returns. 5463 */ 5464 mblk_t * 5465 ipsec_check_ipsecin_policy_reasm(mblk_t *attr_mp, ipsec_policy_t *pol, 5466 ipha_t *inner_ipv4, ip6_t *inner_ipv6, uint64_t pkt_unique, netstack_t *ns) 5467 { 5468 /* Assume attr_mp is a chain of b_next-linked ip_recv_attr mblk. */ 5469 mblk_t *data_chain = NULL, *data_tail = NULL; 5470 mblk_t *next; 5471 mblk_t *data_mp; 5472 ip_recv_attr_t iras; 5473 5474 while (attr_mp != NULL) { 5475 ASSERT(ip_recv_attr_is_mblk(attr_mp)); 5476 next = attr_mp->b_next; 5477 attr_mp->b_next = NULL; /* No tripping asserts. */ 5478 5479 data_mp = attr_mp->b_cont; 5480 attr_mp->b_cont = NULL; 5481 if (!ip_recv_attr_from_mblk(attr_mp, &iras)) { 5482 /* The ill or ip_stack_t disappeared on us */ 5483 freemsg(data_mp); /* ip_drop_packet?? */ 5484 ira_cleanup(&iras, B_TRUE); 5485 goto fail; 5486 } 5487 5488 /* 5489 * Need IPPOL_REFHOLD(pol) for extras because 5490 * ipsecin_policy does the refrele. 5491 */ 5492 IPPOL_REFHOLD(pol); 5493 5494 data_mp = ipsec_check_ipsecin_policy(data_mp, pol, inner_ipv4, 5495 inner_ipv6, pkt_unique, &iras, ns); 5496 ira_cleanup(&iras, B_TRUE); 5497 5498 if (data_mp == NULL) 5499 goto fail; 5500 5501 if (data_tail == NULL) { 5502 /* First one */ 5503 data_chain = data_tail = data_mp; 5504 } else { 5505 data_tail->b_next = data_mp; 5506 data_tail = data_mp; 5507 } 5508 attr_mp = next; 5509 } 5510 /* 5511 * One last release because either the loop bumped it up, or we never 5512 * called ipsec_check_ipsecin_policy(). 5513 */ 5514 IPPOL_REFRELE(pol); 5515 5516 /* data_chain is ready for return to tun module. */ 5517 return (data_chain); 5518 5519 fail: 5520 /* 5521 * Need to get rid of any extra pol 5522 * references, and any remaining bits as well. 5523 */ 5524 IPPOL_REFRELE(pol); 5525 ipsec_freemsg_chain(data_chain); 5526 ipsec_freemsg_chain(next); /* ipdrop stats? */ 5527 return (NULL); 5528 } 5529 5530 /* 5531 * Return a message if the inbound packet passed an IPsec policy check. Returns 5532 * NULL if it failed or if it is a fragment needing its friends before a 5533 * policy check can be performed. 5534 * 5535 * Expects a non-NULL data_mp, and a non-NULL polhead. 5536 * The returned mblk may be a b_next chain of packets if fragments 5537 * neeeded to be collected for a proper policy check. 5538 * 5539 * This function calls ip_drop_packet() on data_mp if need be. 5540 * 5541 * NOTE: outer_hdr_len is signed. If it's a negative value, the caller 5542 * is inspecting an ICMP packet. 5543 */ 5544 mblk_t * 5545 ipsec_tun_inbound(ip_recv_attr_t *ira, mblk_t *data_mp, ipsec_tun_pol_t *itp, 5546 ipha_t *inner_ipv4, ip6_t *inner_ipv6, ipha_t *outer_ipv4, 5547 ip6_t *outer_ipv6, int outer_hdr_len, netstack_t *ns) 5548 { 5549 ipsec_policy_head_t *polhead; 5550 ipsec_selector_t sel; 5551 ipsec_policy_t *pol; 5552 uint16_t tmpport; 5553 selret_t rc; 5554 boolean_t port_policy_present, is_icmp, global_present; 5555 in6_addr_t tmpaddr; 5556 ipaddr_t tmp4; 5557 uint8_t flags, *inner_hdr; 5558 ipsec_stack_t *ipss = ns->netstack_ipsec; 5559 5560 sel.ips_is_icmp_inv_acq = 0; 5561 5562 if (outer_ipv4 != NULL) { 5563 ASSERT(outer_ipv6 == NULL); 5564 global_present = ipss->ipsec_inbound_v4_policy_present; 5565 } else { 5566 ASSERT(outer_ipv6 != NULL); 5567 global_present = ipss->ipsec_inbound_v6_policy_present; 5568 } 5569 5570 ASSERT(inner_ipv4 != NULL && inner_ipv6 == NULL || 5571 inner_ipv4 == NULL && inner_ipv6 != NULL); 5572 5573 if (outer_hdr_len < 0) { 5574 outer_hdr_len = (-outer_hdr_len); 5575 is_icmp = B_TRUE; 5576 } else { 5577 is_icmp = B_FALSE; 5578 } 5579 5580 if (itp != NULL && (itp->itp_flags & ITPF_P_ACTIVE)) { 5581 mblk_t *mp = data_mp; 5582 5583 polhead = itp->itp_policy; 5584 /* 5585 * We need to perform full Tunnel-Mode enforcement, 5586 * and we need to have inner-header data for such enforcement. 5587 * 5588 * See ipsec_init_inbound_sel() for the 0x80000000 on inbound 5589 * and on return. 5590 */ 5591 5592 port_policy_present = ((itp->itp_flags & 5593 ITPF_P_PER_PORT_SECURITY) ? B_TRUE : B_FALSE); 5594 /* 5595 * NOTE: Even if our policy is transport mode, set the 5596 * SEL_TUNNEL_MODE flag so ipsec_init_inbound_sel() can 5597 * do the right thing w.r.t. outer headers. 5598 */ 5599 flags = ((port_policy_present ? SEL_PORT_POLICY : SEL_NONE) | 5600 (is_icmp ? SEL_IS_ICMP : SEL_NONE) | SEL_TUNNEL_MODE); 5601 5602 rc = ipsec_init_inbound_sel(&sel, data_mp, inner_ipv4, 5603 inner_ipv6, flags); 5604 5605 switch (rc) { 5606 case SELRET_NOMEM: 5607 ip_drop_packet(data_mp, B_TRUE, NULL, 5608 DROPPER(ipss, ipds_spd_nomem), 5609 &ipss->ipsec_spd_dropper); 5610 return (NULL); 5611 case SELRET_TUNFRAG: 5612 /* 5613 * At this point, if we're cleartext, we don't want 5614 * to go there. 5615 */ 5616 if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) { 5617 ip_drop_packet(data_mp, B_TRUE, NULL, 5618 DROPPER(ipss, ipds_spd_got_clear), 5619 &ipss->ipsec_spd_dropper); 5620 return (NULL); 5621 } 5622 5623 /* 5624 * Inner and outer headers may not be contiguous. 5625 * Pullup the data_mp now to satisfy assumptions of 5626 * ipsec_fragcache_add() 5627 */ 5628 if (data_mp->b_cont != NULL) { 5629 mblk_t *nmp; 5630 5631 nmp = msgpullup(data_mp, -1); 5632 if (nmp == NULL) { 5633 ip_drop_packet(data_mp, B_TRUE, NULL, 5634 DROPPER(ipss, ipds_spd_nomem), 5635 &ipss->ipsec_spd_dropper); 5636 return (NULL); 5637 } 5638 freemsg(data_mp); 5639 data_mp = nmp; 5640 if (outer_ipv4 != NULL) 5641 outer_ipv4 = 5642 (ipha_t *)data_mp->b_rptr; 5643 else 5644 outer_ipv6 = 5645 (ip6_t *)data_mp->b_rptr; 5646 if (inner_ipv4 != NULL) { 5647 inner_ipv4 = 5648 (ipha_t *)(data_mp->b_rptr + 5649 outer_hdr_len); 5650 } else { 5651 inner_ipv6 = 5652 (ip6_t *)(data_mp->b_rptr + 5653 outer_hdr_len); 5654 } 5655 } 5656 5657 /* 5658 * If we need to queue the packet. First we 5659 * get an mblk with the attributes. ipsec_fragcache_add 5660 * will prepend that to the queued data and return 5661 * a list of b_next messages each of which starts with 5662 * the attribute mblk. 5663 */ 5664 mp = ip_recv_attr_to_mblk(ira); 5665 if (mp == NULL) { 5666 ip_drop_packet(data_mp, B_TRUE, NULL, 5667 DROPPER(ipss, ipds_spd_nomem), 5668 &ipss->ipsec_spd_dropper); 5669 return (NULL); 5670 } 5671 5672 mp = ipsec_fragcache_add(&itp->itp_fragcache, 5673 mp, data_mp, outer_hdr_len, ipss); 5674 5675 if (mp == NULL) { 5676 /* 5677 * Data is cached, fragment chain is not 5678 * complete. 5679 */ 5680 return (NULL); 5681 } 5682 5683 /* 5684 * If we get here, we have a full fragment chain. 5685 * Reacquire headers and selectors from first fragment. 5686 */ 5687 ASSERT(ip_recv_attr_is_mblk(mp)); 5688 data_mp = mp->b_cont; 5689 inner_hdr = data_mp->b_rptr; 5690 if (outer_ipv4 != NULL) { 5691 inner_hdr += IPH_HDR_LENGTH( 5692 (ipha_t *)data_mp->b_rptr); 5693 } else { 5694 inner_hdr += ip_hdr_length_v6(data_mp, 5695 (ip6_t *)data_mp->b_rptr); 5696 } 5697 ASSERT(inner_hdr <= data_mp->b_wptr); 5698 5699 if (inner_ipv4 != NULL) { 5700 inner_ipv4 = (ipha_t *)inner_hdr; 5701 inner_ipv6 = NULL; 5702 } else { 5703 inner_ipv6 = (ip6_t *)inner_hdr; 5704 inner_ipv4 = NULL; 5705 } 5706 5707 /* 5708 * Use SEL_TUNNEL_MODE to take into account the outer 5709 * header. Use SEL_POST_FRAG so we always get ports. 5710 */ 5711 rc = ipsec_init_inbound_sel(&sel, data_mp, 5712 inner_ipv4, inner_ipv6, 5713 SEL_TUNNEL_MODE | SEL_POST_FRAG); 5714 switch (rc) { 5715 case SELRET_SUCCESS: 5716 /* 5717 * Get to same place as first caller's 5718 * SELRET_SUCCESS case. 5719 */ 5720 break; 5721 case SELRET_NOMEM: 5722 ip_drop_packet_chain(mp, B_TRUE, NULL, 5723 DROPPER(ipss, ipds_spd_nomem), 5724 &ipss->ipsec_spd_dropper); 5725 return (NULL); 5726 case SELRET_BADPKT: 5727 ip_drop_packet_chain(mp, B_TRUE, NULL, 5728 DROPPER(ipss, ipds_spd_malformed_frag), 5729 &ipss->ipsec_spd_dropper); 5730 return (NULL); 5731 case SELRET_TUNFRAG: 5732 cmn_err(CE_WARN, "(TUNFRAG on 2nd call...)"); 5733 /* FALLTHRU */ 5734 default: 5735 cmn_err(CE_WARN, "ipsec_init_inbound_sel(mark2)" 5736 " returns bizarro 0x%x", rc); 5737 /* Guaranteed panic! */ 5738 ASSERT(rc == SELRET_NOMEM); 5739 return (NULL); 5740 } 5741 /* FALLTHRU */ 5742 case SELRET_SUCCESS: 5743 /* 5744 * Common case: 5745 * No per-port policy or a non-fragment. Keep going. 5746 */ 5747 break; 5748 case SELRET_BADPKT: 5749 /* 5750 * We may receive ICMP (with IPv6 inner) packets that 5751 * trigger this return value. Send 'em in for 5752 * enforcement checking. 5753 */ 5754 cmn_err(CE_NOTE, "ipsec_tun_inbound(): " 5755 "sending 'bad packet' in for enforcement"); 5756 break; 5757 default: 5758 cmn_err(CE_WARN, 5759 "ipsec_init_inbound_sel() returns bizarro 0x%x", 5760 rc); 5761 ASSERT(rc == SELRET_NOMEM); /* Guaranteed panic! */ 5762 return (NULL); 5763 } 5764 5765 if (is_icmp) { 5766 /* 5767 * Swap local/remote because this is an ICMP packet. 5768 */ 5769 tmpaddr = sel.ips_local_addr_v6; 5770 sel.ips_local_addr_v6 = sel.ips_remote_addr_v6; 5771 sel.ips_remote_addr_v6 = tmpaddr; 5772 tmpport = sel.ips_local_port; 5773 sel.ips_local_port = sel.ips_remote_port; 5774 sel.ips_remote_port = tmpport; 5775 } 5776 5777 /* find_policy_head() */ 5778 rw_enter(&polhead->iph_lock, RW_READER); 5779 pol = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_INBOUND, 5780 &sel); 5781 rw_exit(&polhead->iph_lock); 5782 if (pol != NULL) { 5783 uint64_t pkt_unique; 5784 5785 if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) { 5786 if (!pol->ipsp_act->ipa_allow_clear) { 5787 /* 5788 * XXX should never get here with 5789 * tunnel reassembled fragments? 5790 */ 5791 ASSERT(mp == data_mp); 5792 ip_drop_packet(data_mp, B_TRUE, NULL, 5793 DROPPER(ipss, ipds_spd_got_clear), 5794 &ipss->ipsec_spd_dropper); 5795 IPPOL_REFRELE(pol); 5796 return (NULL); 5797 } else { 5798 IPPOL_REFRELE(pol); 5799 return (mp); 5800 } 5801 } 5802 pkt_unique = SA_UNIQUE_ID(sel.ips_remote_port, 5803 sel.ips_local_port, 5804 (inner_ipv4 == NULL) ? IPPROTO_IPV6 : 5805 IPPROTO_ENCAP, sel.ips_protocol); 5806 5807 /* 5808 * NOTE: The following releases pol's reference and 5809 * calls ip_drop_packet() for me on NULL returns. 5810 * 5811 * "sel" is still good here, so let's use it! 5812 */ 5813 if (data_mp == mp) { 5814 /* A single packet without attributes */ 5815 data_mp = ipsec_check_ipsecin_policy(data_mp, 5816 pol, inner_ipv4, inner_ipv6, pkt_unique, 5817 ira, ns); 5818 } else { 5819 /* 5820 * We pass in the b_next chain of attr_mp's 5821 * and get back a b_next chain of data_mp's. 5822 */ 5823 data_mp = ipsec_check_ipsecin_policy_reasm(mp, 5824 pol, inner_ipv4, inner_ipv6, pkt_unique, 5825 ns); 5826 } 5827 return (data_mp); 5828 } 5829 5830 /* 5831 * Else fallthru and check the global policy on the outer 5832 * header(s) if this tunnel is an old-style transport-mode 5833 * one. Drop the packet explicitly (no policy entry) for 5834 * a new-style tunnel-mode tunnel. 5835 */ 5836 if ((itp->itp_flags & ITPF_P_TUNNEL) && !is_icmp) { 5837 ip_drop_packet_chain(data_mp, B_TRUE, NULL, 5838 DROPPER(ipss, ipds_spd_explicit), 5839 &ipss->ipsec_spd_dropper); 5840 return (NULL); 5841 } 5842 } 5843 5844 /* 5845 * NOTE: If we reach here, we will not have packet chains from 5846 * fragcache_add(), because the only way I get chains is on a 5847 * tunnel-mode tunnel, which either returns with a pass, or gets 5848 * hit by the ip_drop_packet_chain() call right above here. 5849 */ 5850 ASSERT(data_mp->b_next == NULL); 5851 5852 /* If no per-tunnel security, check global policy now. */ 5853 if ((ira->ira_flags & IRAF_IPSEC_SECURE) && !global_present) { 5854 if (ira->ira_flags & IRAF_TRUSTED_ICMP) { 5855 /* 5856 * This is an ICMP message that was geenrated locally. 5857 * We should accept it. 5858 */ 5859 return (data_mp); 5860 } 5861 5862 ip_drop_packet(data_mp, B_TRUE, NULL, 5863 DROPPER(ipss, ipds_spd_got_secure), 5864 &ipss->ipsec_spd_dropper); 5865 return (NULL); 5866 } 5867 5868 if (is_icmp) { 5869 /* 5870 * For ICMP packets, "outer_ipvN" is set to the outer header 5871 * that is *INSIDE* the ICMP payload. For global policy 5872 * checking, we need to reverse src/dst on the payload in 5873 * order to construct selectors appropriately. See "ripha" 5874 * constructions in ip.c. To avoid a bug like 6478464 (see 5875 * earlier in this file), we will actually exchange src/dst 5876 * in the packet, and reverse if after the call to 5877 * ipsec_check_global_policy(). 5878 */ 5879 if (outer_ipv4 != NULL) { 5880 tmp4 = outer_ipv4->ipha_src; 5881 outer_ipv4->ipha_src = outer_ipv4->ipha_dst; 5882 outer_ipv4->ipha_dst = tmp4; 5883 } else { 5884 ASSERT(outer_ipv6 != NULL); 5885 tmpaddr = outer_ipv6->ip6_src; 5886 outer_ipv6->ip6_src = outer_ipv6->ip6_dst; 5887 outer_ipv6->ip6_dst = tmpaddr; 5888 } 5889 } 5890 5891 data_mp = ipsec_check_global_policy(data_mp, NULL, outer_ipv4, 5892 outer_ipv6, ira, ns); 5893 if (data_mp == NULL) 5894 return (NULL); 5895 5896 if (is_icmp) { 5897 /* Set things back to normal. */ 5898 if (outer_ipv4 != NULL) { 5899 tmp4 = outer_ipv4->ipha_src; 5900 outer_ipv4->ipha_src = outer_ipv4->ipha_dst; 5901 outer_ipv4->ipha_dst = tmp4; 5902 } else { 5903 /* No need for ASSERT()s now. */ 5904 tmpaddr = outer_ipv6->ip6_src; 5905 outer_ipv6->ip6_src = outer_ipv6->ip6_dst; 5906 outer_ipv6->ip6_dst = tmpaddr; 5907 } 5908 } 5909 5910 /* 5911 * At this point, we pretend it's a cleartext accepted 5912 * packet. 5913 */ 5914 return (data_mp); 5915 } 5916 5917 /* 5918 * AVL comparison routine for our list of tunnel polheads. 5919 */ 5920 static int 5921 tunnel_compare(const void *arg1, const void *arg2) 5922 { 5923 ipsec_tun_pol_t *left, *right; 5924 int rc; 5925 5926 left = (ipsec_tun_pol_t *)arg1; 5927 right = (ipsec_tun_pol_t *)arg2; 5928 5929 rc = strncmp(left->itp_name, right->itp_name, LIFNAMSIZ); 5930 return (rc == 0 ? rc : (rc > 0 ? 1 : -1)); 5931 } 5932 5933 /* 5934 * Free a tunnel policy node. 5935 */ 5936 void 5937 itp_free(ipsec_tun_pol_t *node, netstack_t *ns) 5938 { 5939 if (node->itp_policy != NULL) { 5940 IPPH_REFRELE(node->itp_policy, ns); 5941 node->itp_policy = NULL; 5942 } 5943 if (node->itp_inactive != NULL) { 5944 IPPH_REFRELE(node->itp_inactive, ns); 5945 node->itp_inactive = NULL; 5946 } 5947 mutex_destroy(&node->itp_lock); 5948 kmem_free(node, sizeof (*node)); 5949 } 5950 5951 void 5952 itp_unlink(ipsec_tun_pol_t *node, netstack_t *ns) 5953 { 5954 ipsec_stack_t *ipss = ns->netstack_ipsec; 5955 5956 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_WRITER); 5957 ipss->ipsec_tunnel_policy_gen++; 5958 ipsec_fragcache_uninit(&node->itp_fragcache, ipss); 5959 avl_remove(&ipss->ipsec_tunnel_policies, node); 5960 rw_exit(&ipss->ipsec_tunnel_policy_lock); 5961 ITP_REFRELE(node, ns); 5962 } 5963 5964 /* 5965 * Public interface to look up a tunnel security policy by name. Used by 5966 * spdsock mostly. Returns "node" with a bumped refcnt. 5967 */ 5968 ipsec_tun_pol_t * 5969 get_tunnel_policy(char *name, netstack_t *ns) 5970 { 5971 ipsec_tun_pol_t *node, lookup; 5972 ipsec_stack_t *ipss = ns->netstack_ipsec; 5973 5974 (void) strncpy(lookup.itp_name, name, LIFNAMSIZ); 5975 5976 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_READER); 5977 node = (ipsec_tun_pol_t *)avl_find(&ipss->ipsec_tunnel_policies, 5978 &lookup, NULL); 5979 if (node != NULL) { 5980 ITP_REFHOLD(node); 5981 } 5982 rw_exit(&ipss->ipsec_tunnel_policy_lock); 5983 5984 return (node); 5985 } 5986 5987 /* 5988 * Public interface to walk all tunnel security polcies. Useful for spdsock 5989 * DUMP operations. iterator() will not consume a reference. 5990 */ 5991 void 5992 itp_walk(void (*iterator)(ipsec_tun_pol_t *, void *, netstack_t *), 5993 void *arg, netstack_t *ns) 5994 { 5995 ipsec_tun_pol_t *node; 5996 ipsec_stack_t *ipss = ns->netstack_ipsec; 5997 5998 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_READER); 5999 for (node = avl_first(&ipss->ipsec_tunnel_policies); node != NULL; 6000 node = AVL_NEXT(&ipss->ipsec_tunnel_policies, node)) { 6001 iterator(node, arg, ns); 6002 } 6003 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6004 } 6005 6006 /* 6007 * Initialize policy head. This can only fail if there's a memory problem. 6008 */ 6009 static boolean_t 6010 tunnel_polhead_init(ipsec_policy_head_t *iph, netstack_t *ns) 6011 { 6012 ipsec_stack_t *ipss = ns->netstack_ipsec; 6013 6014 rw_init(&iph->iph_lock, NULL, RW_DEFAULT, NULL); 6015 iph->iph_refs = 1; 6016 iph->iph_gen = 0; 6017 if (ipsec_alloc_table(iph, ipss->ipsec_tun_spd_hashsize, 6018 KM_SLEEP, B_FALSE, ns) != 0) { 6019 ipsec_polhead_free_table(iph); 6020 return (B_FALSE); 6021 } 6022 ipsec_polhead_init(iph, ipss->ipsec_tun_spd_hashsize); 6023 return (B_TRUE); 6024 } 6025 6026 /* 6027 * Create a tunnel policy node with "name". Set errno with 6028 * ENOMEM if there's a memory problem, and EEXIST if there's an existing 6029 * node. 6030 */ 6031 ipsec_tun_pol_t * 6032 create_tunnel_policy(char *name, int *errno, uint64_t *gen, netstack_t *ns) 6033 { 6034 ipsec_tun_pol_t *newbie, *existing; 6035 avl_index_t where; 6036 ipsec_stack_t *ipss = ns->netstack_ipsec; 6037 6038 newbie = kmem_zalloc(sizeof (*newbie), KM_NOSLEEP); 6039 if (newbie == NULL) { 6040 *errno = ENOMEM; 6041 return (NULL); 6042 } 6043 if (!ipsec_fragcache_init(&newbie->itp_fragcache)) { 6044 kmem_free(newbie, sizeof (*newbie)); 6045 *errno = ENOMEM; 6046 return (NULL); 6047 } 6048 6049 (void) strncpy(newbie->itp_name, name, LIFNAMSIZ); 6050 6051 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_WRITER); 6052 existing = (ipsec_tun_pol_t *)avl_find(&ipss->ipsec_tunnel_policies, 6053 newbie, &where); 6054 if (existing != NULL) { 6055 itp_free(newbie, ns); 6056 *errno = EEXIST; 6057 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6058 return (NULL); 6059 } 6060 ipss->ipsec_tunnel_policy_gen++; 6061 *gen = ipss->ipsec_tunnel_policy_gen; 6062 newbie->itp_refcnt = 2; /* One for the caller, one for the tree. */ 6063 newbie->itp_next_policy_index = 1; 6064 avl_insert(&ipss->ipsec_tunnel_policies, newbie, where); 6065 mutex_init(&newbie->itp_lock, NULL, MUTEX_DEFAULT, NULL); 6066 newbie->itp_policy = kmem_zalloc(sizeof (ipsec_policy_head_t), 6067 KM_NOSLEEP); 6068 if (newbie->itp_policy == NULL) 6069 goto nomem; 6070 newbie->itp_inactive = kmem_zalloc(sizeof (ipsec_policy_head_t), 6071 KM_NOSLEEP); 6072 if (newbie->itp_inactive == NULL) { 6073 kmem_free(newbie->itp_policy, sizeof (ipsec_policy_head_t)); 6074 goto nomem; 6075 } 6076 6077 if (!tunnel_polhead_init(newbie->itp_policy, ns)) { 6078 kmem_free(newbie->itp_policy, sizeof (ipsec_policy_head_t)); 6079 kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t)); 6080 goto nomem; 6081 } else if (!tunnel_polhead_init(newbie->itp_inactive, ns)) { 6082 IPPH_REFRELE(newbie->itp_policy, ns); 6083 kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t)); 6084 goto nomem; 6085 } 6086 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6087 6088 return (newbie); 6089 nomem: 6090 *errno = ENOMEM; 6091 kmem_free(newbie, sizeof (*newbie)); 6092 return (NULL); 6093 } 6094 6095 /* 6096 * Given two addresses, find a tunnel instance's IPsec policy heads. 6097 * Returns NULL on failure. 6098 */ 6099 ipsec_tun_pol_t * 6100 itp_get_byaddr(uint32_t *laddr, uint32_t *faddr, int af, ip_stack_t *ipst) 6101 { 6102 conn_t *connp; 6103 iptun_t *iptun; 6104 ipsec_tun_pol_t *itp = NULL; 6105 6106 /* Classifiers are used to "src" being foreign. */ 6107 if (af == AF_INET) { 6108 connp = ipcl_iptun_classify_v4((ipaddr_t *)faddr, 6109 (ipaddr_t *)laddr, ipst); 6110 } else { 6111 ASSERT(af == AF_INET6); 6112 ASSERT(!IN6_IS_ADDR_V4MAPPED((in6_addr_t *)laddr)); 6113 ASSERT(!IN6_IS_ADDR_V4MAPPED((in6_addr_t *)faddr)); 6114 connp = ipcl_iptun_classify_v6((in6_addr_t *)faddr, 6115 (in6_addr_t *)laddr, ipst); 6116 } 6117 6118 if (connp == NULL) 6119 return (NULL); 6120 6121 if (IPCL_IS_IPTUN(connp)) { 6122 iptun = connp->conn_iptun; 6123 if (iptun != NULL) { 6124 itp = iptun->iptun_itp; 6125 if (itp != NULL) { 6126 /* Braces due to the macro's nature... */ 6127 ITP_REFHOLD(itp); 6128 } 6129 } /* Else itp is already NULL. */ 6130 } 6131 6132 CONN_DEC_REF(connp); 6133 return (itp); 6134 } 6135 6136 /* 6137 * Frag cache code, based on SunScreen 3.2 source 6138 * screen/kernel/common/screen_fragcache.c 6139 */ 6140 6141 #define IPSEC_FRAG_TTL_MAX 5 6142 /* 6143 * Note that the following parameters create 256 hash buckets 6144 * with 1024 free entries to be distributed. Things are cleaned 6145 * periodically and are attempted to be cleaned when there is no 6146 * free space, but this system errs on the side of dropping packets 6147 * over creating memory exhaustion. We may decide to make hash 6148 * factor a tunable if this proves to be a bad decision. 6149 */ 6150 #define IPSEC_FRAG_HASH_SLOTS (1<<8) 6151 #define IPSEC_FRAG_HASH_FACTOR 4 6152 #define IPSEC_FRAG_HASH_SIZE (IPSEC_FRAG_HASH_SLOTS * IPSEC_FRAG_HASH_FACTOR) 6153 6154 #define IPSEC_FRAG_HASH_MASK (IPSEC_FRAG_HASH_SLOTS - 1) 6155 #define IPSEC_FRAG_HASH_FUNC(id) (((id) & IPSEC_FRAG_HASH_MASK) ^ \ 6156 (((id) / \ 6157 (ushort_t)IPSEC_FRAG_HASH_SLOTS) & \ 6158 IPSEC_FRAG_HASH_MASK)) 6159 6160 /* Maximum fragments per packet. 48 bytes payload x 1366 packets > 64KB */ 6161 #define IPSEC_MAX_FRAGS 1366 6162 6163 #define V4_FRAG_OFFSET(ipha) ((ntohs(ipha->ipha_fragment_offset_and_flags) & \ 6164 IPH_OFFSET) << 3) 6165 #define V4_MORE_FRAGS(ipha) (ntohs(ipha->ipha_fragment_offset_and_flags) & \ 6166 IPH_MF) 6167 6168 /* 6169 * Initialize an ipsec fragcache instance. 6170 * Returns B_FALSE if memory allocation fails. 6171 */ 6172 boolean_t 6173 ipsec_fragcache_init(ipsec_fragcache_t *frag) 6174 { 6175 ipsec_fragcache_entry_t *ftemp; 6176 int i; 6177 6178 mutex_init(&frag->itpf_lock, NULL, MUTEX_DEFAULT, NULL); 6179 frag->itpf_ptr = (ipsec_fragcache_entry_t **) 6180 kmem_zalloc(sizeof (ipsec_fragcache_entry_t *) * 6181 IPSEC_FRAG_HASH_SLOTS, KM_NOSLEEP); 6182 if (frag->itpf_ptr == NULL) 6183 return (B_FALSE); 6184 6185 ftemp = (ipsec_fragcache_entry_t *) 6186 kmem_zalloc(sizeof (ipsec_fragcache_entry_t) * 6187 IPSEC_FRAG_HASH_SIZE, KM_NOSLEEP); 6188 if (ftemp == NULL) { 6189 kmem_free(frag->itpf_ptr, sizeof (ipsec_fragcache_entry_t *) * 6190 IPSEC_FRAG_HASH_SLOTS); 6191 return (B_FALSE); 6192 } 6193 6194 frag->itpf_freelist = NULL; 6195 6196 for (i = 0; i < IPSEC_FRAG_HASH_SIZE; i++) { 6197 ftemp->itpfe_next = frag->itpf_freelist; 6198 frag->itpf_freelist = ftemp; 6199 ftemp++; 6200 } 6201 6202 frag->itpf_expire_hint = 0; 6203 6204 return (B_TRUE); 6205 } 6206 6207 void 6208 ipsec_fragcache_uninit(ipsec_fragcache_t *frag, ipsec_stack_t *ipss) 6209 { 6210 ipsec_fragcache_entry_t *fep; 6211 int i; 6212 6213 mutex_enter(&frag->itpf_lock); 6214 if (frag->itpf_ptr) { 6215 /* Delete any existing fragcache entry chains */ 6216 for (i = 0; i < IPSEC_FRAG_HASH_SLOTS; i++) { 6217 fep = (frag->itpf_ptr)[i]; 6218 while (fep != NULL) { 6219 /* Returned fep is next in chain or NULL */ 6220 fep = fragcache_delentry(i, fep, frag, ipss); 6221 } 6222 } 6223 /* 6224 * Chase the pointers back to the beginning 6225 * of the memory allocation and then 6226 * get rid of the allocated freelist 6227 */ 6228 while (frag->itpf_freelist->itpfe_next != NULL) 6229 frag->itpf_freelist = frag->itpf_freelist->itpfe_next; 6230 /* 6231 * XXX - If we ever dynamically grow the freelist 6232 * then we'll have to free entries individually 6233 * or determine how many entries or chunks we have 6234 * grown since the initial allocation. 6235 */ 6236 kmem_free(frag->itpf_freelist, 6237 sizeof (ipsec_fragcache_entry_t) * 6238 IPSEC_FRAG_HASH_SIZE); 6239 /* Free the fragcache structure */ 6240 kmem_free(frag->itpf_ptr, 6241 sizeof (ipsec_fragcache_entry_t *) * 6242 IPSEC_FRAG_HASH_SLOTS); 6243 } 6244 mutex_exit(&frag->itpf_lock); 6245 mutex_destroy(&frag->itpf_lock); 6246 } 6247 6248 /* 6249 * Add a fragment to the fragment cache. Consumes mp if NULL is returned. 6250 * Returns mp if a whole fragment has been assembled, NULL otherwise 6251 * The returned mp could be a b_next chain of fragments. 6252 * 6253 * The iramp argument is set on inbound; NULL if outbound. 6254 */ 6255 mblk_t * 6256 ipsec_fragcache_add(ipsec_fragcache_t *frag, mblk_t *iramp, mblk_t *mp, 6257 int outer_hdr_len, ipsec_stack_t *ipss) 6258 { 6259 boolean_t is_v4; 6260 time_t itpf_time; 6261 ipha_t *iph; 6262 ipha_t *oiph; 6263 ip6_t *ip6h = NULL; 6264 uint8_t v6_proto; 6265 uint8_t *v6_proto_p; 6266 uint16_t ip6_hdr_length; 6267 ip_pkt_t ipp; 6268 ip6_frag_t *fraghdr; 6269 ipsec_fragcache_entry_t *fep; 6270 int i; 6271 mblk_t *nmp, *prevmp; 6272 int firstbyte, lastbyte; 6273 int offset; 6274 int last; 6275 boolean_t inbound = (iramp != NULL); 6276 6277 #ifdef FRAGCACHE_DEBUG 6278 cmn_err(CE_WARN, "Fragcache: %s\n", inbound ? "INBOUND" : "OUTBOUND"); 6279 #endif 6280 /* 6281 * You're on the slow path, so insure that every packet in the 6282 * cache is a single-mblk one. 6283 */ 6284 if (mp->b_cont != NULL) { 6285 nmp = msgpullup(mp, -1); 6286 if (nmp == NULL) { 6287 ip_drop_packet(mp, inbound, NULL, 6288 DROPPER(ipss, ipds_spd_nomem), 6289 &ipss->ipsec_spd_dropper); 6290 if (inbound) 6291 (void) ip_recv_attr_free_mblk(iramp); 6292 return (NULL); 6293 } 6294 freemsg(mp); 6295 mp = nmp; 6296 } 6297 6298 mutex_enter(&frag->itpf_lock); 6299 6300 oiph = (ipha_t *)mp->b_rptr; 6301 iph = (ipha_t *)(mp->b_rptr + outer_hdr_len); 6302 6303 if (IPH_HDR_VERSION(iph) == IPV4_VERSION) { 6304 is_v4 = B_TRUE; 6305 } else { 6306 ASSERT(IPH_HDR_VERSION(iph) == IPV6_VERSION); 6307 ip6h = (ip6_t *)(mp->b_rptr + outer_hdr_len); 6308 6309 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &ip6_hdr_length, 6310 &v6_proto_p)) { 6311 /* 6312 * Find upper layer protocol. 6313 * If it fails we have a malformed packet 6314 */ 6315 mutex_exit(&frag->itpf_lock); 6316 ip_drop_packet(mp, inbound, NULL, 6317 DROPPER(ipss, ipds_spd_malformed_packet), 6318 &ipss->ipsec_spd_dropper); 6319 if (inbound) 6320 (void) ip_recv_attr_free_mblk(iramp); 6321 return (NULL); 6322 } else { 6323 v6_proto = *v6_proto_p; 6324 } 6325 6326 6327 bzero(&ipp, sizeof (ipp)); 6328 (void) ip_find_hdr_v6(mp, ip6h, B_FALSE, &ipp, NULL); 6329 if (!(ipp.ipp_fields & IPPF_FRAGHDR)) { 6330 /* 6331 * We think this is a fragment, but didn't find 6332 * a fragment header. Something is wrong. 6333 */ 6334 mutex_exit(&frag->itpf_lock); 6335 ip_drop_packet(mp, inbound, NULL, 6336 DROPPER(ipss, ipds_spd_malformed_frag), 6337 &ipss->ipsec_spd_dropper); 6338 if (inbound) 6339 (void) ip_recv_attr_free_mblk(iramp); 6340 return (NULL); 6341 } 6342 fraghdr = ipp.ipp_fraghdr; 6343 is_v4 = B_FALSE; 6344 } 6345 6346 /* Anything to cleanup? */ 6347 6348 /* 6349 * This cleanup call could be put in a timer loop 6350 * but it may actually be just as reasonable a decision to 6351 * leave it here. The disadvantage is this only gets called when 6352 * frags are added. The advantage is that it is not 6353 * susceptible to race conditions like a time-based cleanup 6354 * may be. 6355 */ 6356 itpf_time = gethrestime_sec(); 6357 if (itpf_time >= frag->itpf_expire_hint) 6358 ipsec_fragcache_clean(frag, ipss); 6359 6360 /* Lookup to see if there is an existing entry */ 6361 6362 if (is_v4) 6363 i = IPSEC_FRAG_HASH_FUNC(iph->ipha_ident); 6364 else 6365 i = IPSEC_FRAG_HASH_FUNC(fraghdr->ip6f_ident); 6366 6367 for (fep = (frag->itpf_ptr)[i]; fep; fep = fep->itpfe_next) { 6368 if (is_v4) { 6369 ASSERT(iph != NULL); 6370 if ((fep->itpfe_id == iph->ipha_ident) && 6371 (fep->itpfe_src == iph->ipha_src) && 6372 (fep->itpfe_dst == iph->ipha_dst) && 6373 (fep->itpfe_proto == iph->ipha_protocol)) 6374 break; 6375 } else { 6376 ASSERT(fraghdr != NULL); 6377 ASSERT(fep != NULL); 6378 if ((fep->itpfe_id == fraghdr->ip6f_ident) && 6379 IN6_ARE_ADDR_EQUAL(&fep->itpfe_src6, 6380 &ip6h->ip6_src) && 6381 IN6_ARE_ADDR_EQUAL(&fep->itpfe_dst6, 6382 &ip6h->ip6_dst) && (fep->itpfe_proto == v6_proto)) 6383 break; 6384 } 6385 } 6386 6387 if (is_v4) { 6388 firstbyte = V4_FRAG_OFFSET(iph); 6389 lastbyte = firstbyte + ntohs(iph->ipha_length) - 6390 IPH_HDR_LENGTH(iph); 6391 last = (V4_MORE_FRAGS(iph) == 0); 6392 #ifdef FRAGCACHE_DEBUG 6393 cmn_err(CE_WARN, "V4 fragcache: firstbyte = %d, lastbyte = %d, " 6394 "is_last_frag = %d, id = %d, mp = %p\n", firstbyte, 6395 lastbyte, last, iph->ipha_ident, mp); 6396 #endif 6397 } else { 6398 firstbyte = ntohs(fraghdr->ip6f_offlg & IP6F_OFF_MASK); 6399 lastbyte = firstbyte + ntohs(ip6h->ip6_plen) + 6400 sizeof (ip6_t) - ip6_hdr_length; 6401 last = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG) == 0; 6402 #ifdef FRAGCACHE_DEBUG 6403 cmn_err(CE_WARN, "V6 fragcache: firstbyte = %d, lastbyte = %d, " 6404 "is_last_frag = %d, id = %d, fraghdr = %p, mp = %p\n", 6405 firstbyte, lastbyte, last, fraghdr->ip6f_ident, fraghdr, 6406 mp); 6407 #endif 6408 } 6409 6410 /* check for bogus fragments and delete the entry */ 6411 if (firstbyte > 0 && firstbyte <= 8) { 6412 if (fep != NULL) 6413 (void) fragcache_delentry(i, fep, frag, ipss); 6414 mutex_exit(&frag->itpf_lock); 6415 ip_drop_packet(mp, inbound, NULL, 6416 DROPPER(ipss, ipds_spd_malformed_frag), 6417 &ipss->ipsec_spd_dropper); 6418 if (inbound) 6419 (void) ip_recv_attr_free_mblk(iramp); 6420 return (NULL); 6421 } 6422 6423 /* Not found, allocate a new entry */ 6424 if (fep == NULL) { 6425 if (frag->itpf_freelist == NULL) { 6426 /* see if there is some space */ 6427 ipsec_fragcache_clean(frag, ipss); 6428 if (frag->itpf_freelist == NULL) { 6429 mutex_exit(&frag->itpf_lock); 6430 ip_drop_packet(mp, inbound, NULL, 6431 DROPPER(ipss, ipds_spd_nomem), 6432 &ipss->ipsec_spd_dropper); 6433 if (inbound) 6434 (void) ip_recv_attr_free_mblk(iramp); 6435 return (NULL); 6436 } 6437 } 6438 6439 fep = frag->itpf_freelist; 6440 frag->itpf_freelist = fep->itpfe_next; 6441 6442 if (is_v4) { 6443 bcopy((caddr_t)&iph->ipha_src, (caddr_t)&fep->itpfe_src, 6444 sizeof (struct in_addr)); 6445 bcopy((caddr_t)&iph->ipha_dst, (caddr_t)&fep->itpfe_dst, 6446 sizeof (struct in_addr)); 6447 fep->itpfe_id = iph->ipha_ident; 6448 fep->itpfe_proto = iph->ipha_protocol; 6449 i = IPSEC_FRAG_HASH_FUNC(fep->itpfe_id); 6450 } else { 6451 bcopy((in6_addr_t *)&ip6h->ip6_src, 6452 (in6_addr_t *)&fep->itpfe_src6, 6453 sizeof (struct in6_addr)); 6454 bcopy((in6_addr_t *)&ip6h->ip6_dst, 6455 (in6_addr_t *)&fep->itpfe_dst6, 6456 sizeof (struct in6_addr)); 6457 fep->itpfe_id = fraghdr->ip6f_ident; 6458 fep->itpfe_proto = v6_proto; 6459 i = IPSEC_FRAG_HASH_FUNC(fep->itpfe_id); 6460 } 6461 itpf_time = gethrestime_sec(); 6462 fep->itpfe_exp = itpf_time + IPSEC_FRAG_TTL_MAX + 1; 6463 fep->itpfe_last = 0; 6464 fep->itpfe_fraglist = NULL; 6465 fep->itpfe_depth = 0; 6466 fep->itpfe_next = (frag->itpf_ptr)[i]; 6467 (frag->itpf_ptr)[i] = fep; 6468 6469 if (frag->itpf_expire_hint > fep->itpfe_exp) 6470 frag->itpf_expire_hint = fep->itpfe_exp; 6471 6472 } 6473 6474 /* Insert it in the frag list */ 6475 /* List is in order by starting offset of fragments */ 6476 6477 prevmp = NULL; 6478 for (nmp = fep->itpfe_fraglist; nmp; nmp = nmp->b_next) { 6479 ipha_t *niph; 6480 ipha_t *oniph; 6481 ip6_t *nip6h; 6482 ip_pkt_t nipp; 6483 ip6_frag_t *nfraghdr; 6484 uint16_t nip6_hdr_length; 6485 uint8_t *nv6_proto_p; 6486 int nfirstbyte, nlastbyte; 6487 char *data, *ndata; 6488 mblk_t *ndata_mp = (inbound ? nmp->b_cont : nmp); 6489 int hdr_len; 6490 6491 oniph = (ipha_t *)mp->b_rptr; 6492 nip6h = NULL; 6493 niph = NULL; 6494 6495 /* 6496 * Determine outer header type and length and set 6497 * pointers appropriately 6498 */ 6499 6500 if (IPH_HDR_VERSION(oniph) == IPV4_VERSION) { 6501 hdr_len = ((outer_hdr_len != 0) ? 6502 IPH_HDR_LENGTH(oiph) : 0); 6503 niph = (ipha_t *)(ndata_mp->b_rptr + hdr_len); 6504 } else { 6505 ASSERT(IPH_HDR_VERSION(oniph) == IPV6_VERSION); 6506 ASSERT(ndata_mp->b_cont == NULL); 6507 nip6h = (ip6_t *)ndata_mp->b_rptr; 6508 (void) ip_hdr_length_nexthdr_v6(ndata_mp, nip6h, 6509 &nip6_hdr_length, &v6_proto_p); 6510 hdr_len = ((outer_hdr_len != 0) ? nip6_hdr_length : 0); 6511 } 6512 6513 /* 6514 * Determine inner header type and length and set 6515 * pointers appropriately 6516 */ 6517 6518 if (is_v4) { 6519 if (niph == NULL) { 6520 /* Was v6 outer */ 6521 niph = (ipha_t *)(ndata_mp->b_rptr + hdr_len); 6522 } 6523 nfirstbyte = V4_FRAG_OFFSET(niph); 6524 nlastbyte = nfirstbyte + ntohs(niph->ipha_length) - 6525 IPH_HDR_LENGTH(niph); 6526 } else { 6527 ASSERT(ndata_mp->b_cont == NULL); 6528 nip6h = (ip6_t *)(ndata_mp->b_rptr + hdr_len); 6529 if (!ip_hdr_length_nexthdr_v6(ndata_mp, nip6h, 6530 &nip6_hdr_length, &nv6_proto_p)) { 6531 mutex_exit(&frag->itpf_lock); 6532 ip_drop_packet_chain(nmp, inbound, NULL, 6533 DROPPER(ipss, ipds_spd_malformed_frag), 6534 &ipss->ipsec_spd_dropper); 6535 ipsec_freemsg_chain(ndata_mp); 6536 if (inbound) 6537 (void) ip_recv_attr_free_mblk(iramp); 6538 return (NULL); 6539 } 6540 bzero(&nipp, sizeof (nipp)); 6541 (void) ip_find_hdr_v6(ndata_mp, nip6h, B_FALSE, &nipp, 6542 NULL); 6543 nfraghdr = nipp.ipp_fraghdr; 6544 nfirstbyte = ntohs(nfraghdr->ip6f_offlg & 6545 IP6F_OFF_MASK); 6546 nlastbyte = nfirstbyte + ntohs(nip6h->ip6_plen) + 6547 sizeof (ip6_t) - nip6_hdr_length; 6548 } 6549 6550 /* Check for overlapping fragments */ 6551 if (firstbyte >= nfirstbyte && firstbyte < nlastbyte) { 6552 /* 6553 * Overlap Check: 6554 * ~~~~--------- # Check if the newly 6555 * ~ ndata_mp| # received fragment 6556 * ~~~~--------- # overlaps with the 6557 * ---------~~~~~~ # current fragment. 6558 * | mp ~ 6559 * ---------~~~~~~ 6560 */ 6561 if (is_v4) { 6562 data = (char *)iph + IPH_HDR_LENGTH(iph) + 6563 firstbyte - nfirstbyte; 6564 ndata = (char *)niph + IPH_HDR_LENGTH(niph); 6565 } else { 6566 data = (char *)ip6h + 6567 nip6_hdr_length + firstbyte - 6568 nfirstbyte; 6569 ndata = (char *)nip6h + nip6_hdr_length; 6570 } 6571 if (bcmp(data, ndata, MIN(lastbyte, nlastbyte) - 6572 firstbyte)) { 6573 /* Overlapping data does not match */ 6574 (void) fragcache_delentry(i, fep, frag, ipss); 6575 mutex_exit(&frag->itpf_lock); 6576 ip_drop_packet(mp, inbound, NULL, 6577 DROPPER(ipss, ipds_spd_overlap_frag), 6578 &ipss->ipsec_spd_dropper); 6579 if (inbound) 6580 (void) ip_recv_attr_free_mblk(iramp); 6581 return (NULL); 6582 } 6583 /* Part of defense for jolt2.c fragmentation attack */ 6584 if (firstbyte >= nfirstbyte && lastbyte <= nlastbyte) { 6585 /* 6586 * Check for identical or subset fragments: 6587 * ---------- ~~~~--------~~~~~ 6588 * | nmp | or ~ nmp ~ 6589 * ---------- ~~~~--------~~~~~ 6590 * ---------- ------ 6591 * | mp | | mp | 6592 * ---------- ------ 6593 */ 6594 mutex_exit(&frag->itpf_lock); 6595 ip_drop_packet(mp, inbound, NULL, 6596 DROPPER(ipss, ipds_spd_evil_frag), 6597 &ipss->ipsec_spd_dropper); 6598 if (inbound) 6599 (void) ip_recv_attr_free_mblk(iramp); 6600 return (NULL); 6601 } 6602 6603 } 6604 6605 /* Correct location for this fragment? */ 6606 if (firstbyte <= nfirstbyte) { 6607 /* 6608 * Check if the tail end of the new fragment overlaps 6609 * with the head of the current fragment. 6610 * --------~~~~~~~ 6611 * | nmp ~ 6612 * --------~~~~~~~ 6613 * ~~~~~-------- 6614 * ~ mp | 6615 * ~~~~~-------- 6616 */ 6617 if (lastbyte > nfirstbyte) { 6618 /* Fragments overlap */ 6619 data = (char *)iph + IPH_HDR_LENGTH(iph) + 6620 firstbyte - nfirstbyte; 6621 ndata = (char *)niph + IPH_HDR_LENGTH(niph); 6622 if (is_v4) { 6623 data = (char *)iph + 6624 IPH_HDR_LENGTH(iph) + firstbyte - 6625 nfirstbyte; 6626 ndata = (char *)niph + 6627 IPH_HDR_LENGTH(niph); 6628 } else { 6629 data = (char *)ip6h + 6630 nip6_hdr_length + firstbyte - 6631 nfirstbyte; 6632 ndata = (char *)nip6h + nip6_hdr_length; 6633 } 6634 if (bcmp(data, ndata, MIN(lastbyte, nlastbyte) 6635 - nfirstbyte)) { 6636 /* Overlap mismatch */ 6637 (void) fragcache_delentry(i, fep, frag, 6638 ipss); 6639 mutex_exit(&frag->itpf_lock); 6640 ip_drop_packet(mp, inbound, NULL, 6641 DROPPER(ipss, 6642 ipds_spd_overlap_frag), 6643 &ipss->ipsec_spd_dropper); 6644 if (inbound) { 6645 (void) ip_recv_attr_free_mblk( 6646 iramp); 6647 } 6648 return (NULL); 6649 } 6650 } 6651 6652 /* 6653 * Fragment does not illegally overlap and can now 6654 * be inserted into the chain 6655 */ 6656 break; 6657 } 6658 6659 prevmp = nmp; 6660 } 6661 /* Prepend the attributes before we link it in */ 6662 if (iramp != NULL) { 6663 ASSERT(iramp->b_cont == NULL); 6664 iramp->b_cont = mp; 6665 mp = iramp; 6666 iramp = NULL; 6667 } 6668 mp->b_next = nmp; 6669 6670 if (prevmp == NULL) { 6671 fep->itpfe_fraglist = mp; 6672 } else { 6673 prevmp->b_next = mp; 6674 } 6675 if (last) 6676 fep->itpfe_last = 1; 6677 6678 /* Part of defense for jolt2.c fragmentation attack */ 6679 if (++(fep->itpfe_depth) > IPSEC_MAX_FRAGS) { 6680 (void) fragcache_delentry(i, fep, frag, ipss); 6681 mutex_exit(&frag->itpf_lock); 6682 if (inbound) 6683 mp = ip_recv_attr_free_mblk(mp); 6684 6685 ip_drop_packet(mp, inbound, NULL, 6686 DROPPER(ipss, ipds_spd_max_frags), 6687 &ipss->ipsec_spd_dropper); 6688 return (NULL); 6689 } 6690 6691 /* Check for complete packet */ 6692 6693 if (!fep->itpfe_last) { 6694 mutex_exit(&frag->itpf_lock); 6695 #ifdef FRAGCACHE_DEBUG 6696 cmn_err(CE_WARN, "Fragment cached, last not yet seen.\n"); 6697 #endif 6698 return (NULL); 6699 } 6700 6701 offset = 0; 6702 for (mp = fep->itpfe_fraglist; mp; mp = mp->b_next) { 6703 mblk_t *data_mp = (inbound ? mp->b_cont : mp); 6704 int hdr_len; 6705 6706 oiph = (ipha_t *)data_mp->b_rptr; 6707 ip6h = NULL; 6708 iph = NULL; 6709 6710 if (IPH_HDR_VERSION(oiph) == IPV4_VERSION) { 6711 hdr_len = ((outer_hdr_len != 0) ? 6712 IPH_HDR_LENGTH(oiph) : 0); 6713 iph = (ipha_t *)(data_mp->b_rptr + hdr_len); 6714 } else { 6715 ASSERT(IPH_HDR_VERSION(oiph) == IPV6_VERSION); 6716 ASSERT(data_mp->b_cont == NULL); 6717 ip6h = (ip6_t *)data_mp->b_rptr; 6718 (void) ip_hdr_length_nexthdr_v6(data_mp, ip6h, 6719 &ip6_hdr_length, &v6_proto_p); 6720 hdr_len = ((outer_hdr_len != 0) ? ip6_hdr_length : 0); 6721 } 6722 6723 /* Calculate current fragment start/end */ 6724 if (is_v4) { 6725 if (iph == NULL) { 6726 /* Was v6 outer */ 6727 iph = (ipha_t *)(data_mp->b_rptr + hdr_len); 6728 } 6729 firstbyte = V4_FRAG_OFFSET(iph); 6730 lastbyte = firstbyte + ntohs(iph->ipha_length) - 6731 IPH_HDR_LENGTH(iph); 6732 } else { 6733 ASSERT(data_mp->b_cont == NULL); 6734 ip6h = (ip6_t *)(data_mp->b_rptr + hdr_len); 6735 if (!ip_hdr_length_nexthdr_v6(data_mp, ip6h, 6736 &ip6_hdr_length, &v6_proto_p)) { 6737 mutex_exit(&frag->itpf_lock); 6738 ip_drop_packet_chain(mp, inbound, NULL, 6739 DROPPER(ipss, ipds_spd_malformed_frag), 6740 &ipss->ipsec_spd_dropper); 6741 return (NULL); 6742 } 6743 v6_proto = *v6_proto_p; 6744 bzero(&ipp, sizeof (ipp)); 6745 (void) ip_find_hdr_v6(data_mp, ip6h, B_FALSE, &ipp, 6746 NULL); 6747 fraghdr = ipp.ipp_fraghdr; 6748 firstbyte = ntohs(fraghdr->ip6f_offlg & 6749 IP6F_OFF_MASK); 6750 lastbyte = firstbyte + ntohs(ip6h->ip6_plen) + 6751 sizeof (ip6_t) - ip6_hdr_length; 6752 } 6753 6754 /* 6755 * If this fragment is greater than current offset, 6756 * we have a missing fragment so return NULL 6757 */ 6758 if (firstbyte > offset) { 6759 mutex_exit(&frag->itpf_lock); 6760 #ifdef FRAGCACHE_DEBUG 6761 /* 6762 * Note, this can happen when the last frag 6763 * gets sent through because it is smaller 6764 * than the MTU. It is not necessarily an 6765 * error condition. 6766 */ 6767 cmn_err(CE_WARN, "Frag greater than offset! : " 6768 "missing fragment: firstbyte = %d, offset = %d, " 6769 "mp = %p\n", firstbyte, offset, mp); 6770 #endif 6771 return (NULL); 6772 } 6773 #ifdef FRAGCACHE_DEBUG 6774 cmn_err(CE_WARN, "Frag offsets : " 6775 "firstbyte = %d, offset = %d, mp = %p\n", 6776 firstbyte, offset, mp); 6777 #endif 6778 6779 /* 6780 * If we are at the last fragment, we have the complete 6781 * packet, so rechain things and return it to caller 6782 * for processing 6783 */ 6784 6785 if ((is_v4 && !V4_MORE_FRAGS(iph)) || 6786 (!is_v4 && !(fraghdr->ip6f_offlg & IP6F_MORE_FRAG))) { 6787 mp = fep->itpfe_fraglist; 6788 fep->itpfe_fraglist = NULL; 6789 (void) fragcache_delentry(i, fep, frag, ipss); 6790 mutex_exit(&frag->itpf_lock); 6791 6792 if ((is_v4 && (firstbyte + ntohs(iph->ipha_length) > 6793 65535)) || (!is_v4 && (firstbyte + 6794 ntohs(ip6h->ip6_plen) > 65535))) { 6795 /* It is an invalid "ping-o-death" packet */ 6796 /* Discard it */ 6797 ip_drop_packet_chain(mp, inbound, NULL, 6798 DROPPER(ipss, ipds_spd_evil_frag), 6799 &ipss->ipsec_spd_dropper); 6800 return (NULL); 6801 } 6802 #ifdef FRAGCACHE_DEBUG 6803 cmn_err(CE_WARN, "Fragcache returning mp = %p, " 6804 "mp->b_next = %p", mp, mp->b_next); 6805 #endif 6806 /* 6807 * For inbound case, mp has attrmp b_next'd chain 6808 * For outbound case, it is just data mp chain 6809 */ 6810 return (mp); 6811 } 6812 6813 /* 6814 * Update new ending offset if this 6815 * fragment extends the packet 6816 */ 6817 if (offset < lastbyte) 6818 offset = lastbyte; 6819 } 6820 6821 mutex_exit(&frag->itpf_lock); 6822 6823 /* Didn't find last fragment, so return NULL */ 6824 return (NULL); 6825 } 6826 6827 static void 6828 ipsec_fragcache_clean(ipsec_fragcache_t *frag, ipsec_stack_t *ipss) 6829 { 6830 ipsec_fragcache_entry_t *fep; 6831 int i; 6832 ipsec_fragcache_entry_t *earlyfep = NULL; 6833 time_t itpf_time; 6834 int earlyexp; 6835 int earlyi = 0; 6836 6837 ASSERT(MUTEX_HELD(&frag->itpf_lock)); 6838 6839 itpf_time = gethrestime_sec(); 6840 earlyexp = itpf_time + 10000; 6841 6842 for (i = 0; i < IPSEC_FRAG_HASH_SLOTS; i++) { 6843 fep = (frag->itpf_ptr)[i]; 6844 while (fep) { 6845 if (fep->itpfe_exp < itpf_time) { 6846 /* found */ 6847 fep = fragcache_delentry(i, fep, frag, ipss); 6848 } else { 6849 if (fep->itpfe_exp < earlyexp) { 6850 earlyfep = fep; 6851 earlyexp = fep->itpfe_exp; 6852 earlyi = i; 6853 } 6854 fep = fep->itpfe_next; 6855 } 6856 } 6857 } 6858 6859 frag->itpf_expire_hint = earlyexp; 6860 6861 /* if (!found) */ 6862 if (frag->itpf_freelist == NULL) 6863 (void) fragcache_delentry(earlyi, earlyfep, frag, ipss); 6864 } 6865 6866 static ipsec_fragcache_entry_t * 6867 fragcache_delentry(int slot, ipsec_fragcache_entry_t *fep, 6868 ipsec_fragcache_t *frag, ipsec_stack_t *ipss) 6869 { 6870 ipsec_fragcache_entry_t *targp; 6871 ipsec_fragcache_entry_t *nextp = fep->itpfe_next; 6872 6873 ASSERT(MUTEX_HELD(&frag->itpf_lock)); 6874 6875 /* Free up any fragment list still in cache entry */ 6876 if (fep->itpfe_fraglist != NULL) { 6877 ip_drop_packet_chain(fep->itpfe_fraglist, 6878 ip_recv_attr_is_mblk(fep->itpfe_fraglist), NULL, 6879 DROPPER(ipss, ipds_spd_expired_frags), 6880 &ipss->ipsec_spd_dropper); 6881 } 6882 fep->itpfe_fraglist = NULL; 6883 6884 targp = (frag->itpf_ptr)[slot]; 6885 ASSERT(targp != 0); 6886 6887 if (targp == fep) { 6888 /* unlink from head of hash chain */ 6889 (frag->itpf_ptr)[slot] = nextp; 6890 /* link into free list */ 6891 fep->itpfe_next = frag->itpf_freelist; 6892 frag->itpf_freelist = fep; 6893 return (nextp); 6894 } 6895 6896 /* maybe should use double linked list to make update faster */ 6897 /* must be past front of chain */ 6898 while (targp) { 6899 if (targp->itpfe_next == fep) { 6900 /* unlink from hash chain */ 6901 targp->itpfe_next = nextp; 6902 /* link into free list */ 6903 fep->itpfe_next = frag->itpf_freelist; 6904 frag->itpf_freelist = fep; 6905 return (nextp); 6906 } 6907 targp = targp->itpfe_next; 6908 ASSERT(targp != 0); 6909 } 6910 /* NOTREACHED */ 6911 return (NULL); 6912 } 6913