1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * IPsec Security Policy Database. 28 * 29 * This module maintains the SPD and provides routines used by ip and ip6 30 * to apply IPsec policy to inbound and outbound datagrams. 31 */ 32 33 #include <sys/types.h> 34 #include <sys/stream.h> 35 #include <sys/stropts.h> 36 #include <sys/sysmacros.h> 37 #include <sys/strsubr.h> 38 #include <sys/strsun.h> 39 #include <sys/strlog.h> 40 #include <sys/cmn_err.h> 41 #include <sys/zone.h> 42 43 #include <sys/systm.h> 44 #include <sys/param.h> 45 #include <sys/kmem.h> 46 #include <sys/ddi.h> 47 48 #include <sys/crypto/api.h> 49 50 #include <inet/common.h> 51 #include <inet/mi.h> 52 53 #include <netinet/ip6.h> 54 #include <netinet/icmp6.h> 55 #include <netinet/udp.h> 56 57 #include <inet/ip.h> 58 #include <inet/ip6.h> 59 60 #include <net/pfkeyv2.h> 61 #include <net/pfpolicy.h> 62 #include <inet/ipsec_info.h> 63 #include <inet/sadb.h> 64 #include <inet/ipsec_impl.h> 65 66 #include <inet/ip_impl.h> /* For IP_MOD_ID */ 67 68 #include <inet/ipsecah.h> 69 #include <inet/ipsecesp.h> 70 #include <inet/ipdrop.h> 71 #include <inet/ipclassifier.h> 72 #include <inet/iptun.h> 73 #include <inet/iptun/iptun_impl.h> 74 75 static void ipsec_update_present_flags(ipsec_stack_t *); 76 static ipsec_act_t *ipsec_act_wildcard_expand(ipsec_act_t *, uint_t *, 77 netstack_t *); 78 static void ipsec_out_free(void *); 79 static void ipsec_in_free(void *); 80 static mblk_t *ipsec_attach_global_policy(mblk_t **, conn_t *, 81 ipsec_selector_t *, netstack_t *); 82 static mblk_t *ipsec_apply_global_policy(mblk_t *, conn_t *, 83 ipsec_selector_t *, netstack_t *); 84 static mblk_t *ipsec_check_ipsecin_policy(mblk_t *, ipsec_policy_t *, 85 ipha_t *, ip6_t *, uint64_t, netstack_t *); 86 static void ipsec_in_release_refs(ipsec_in_t *); 87 static void ipsec_out_release_refs(ipsec_out_t *); 88 static void ipsec_action_free_table(ipsec_action_t *); 89 static void ipsec_action_reclaim(void *); 90 static void ipsec_action_reclaim_stack(netstack_t *); 91 static void ipsid_init(netstack_t *); 92 static void ipsid_fini(netstack_t *); 93 94 /* sel_flags values for ipsec_init_inbound_sel(). */ 95 #define SEL_NONE 0x0000 96 #define SEL_PORT_POLICY 0x0001 97 #define SEL_IS_ICMP 0x0002 98 #define SEL_TUNNEL_MODE 0x0004 99 #define SEL_POST_FRAG 0x0008 100 101 /* Return values for ipsec_init_inbound_sel(). */ 102 typedef enum { SELRET_NOMEM, SELRET_BADPKT, SELRET_SUCCESS, SELRET_TUNFRAG} 103 selret_t; 104 105 static selret_t ipsec_init_inbound_sel(ipsec_selector_t *, mblk_t *, 106 ipha_t *, ip6_t *, uint8_t); 107 108 static boolean_t ipsec_check_ipsecin_action(struct ipsec_in_s *, mblk_t *, 109 struct ipsec_action_s *, ipha_t *ipha, ip6_t *ip6h, const char **, 110 kstat_named_t **); 111 static void ipsec_unregister_prov_update(void); 112 static void ipsec_prov_update_callback_stack(uint32_t, void *, netstack_t *); 113 static boolean_t ipsec_compare_action(ipsec_policy_t *, ipsec_policy_t *); 114 static uint32_t selector_hash(ipsec_selector_t *, ipsec_policy_root_t *); 115 static boolean_t ipsec_kstat_init(ipsec_stack_t *); 116 static void ipsec_kstat_destroy(ipsec_stack_t *); 117 static int ipsec_free_tables(ipsec_stack_t *); 118 static int tunnel_compare(const void *, const void *); 119 static void ipsec_freemsg_chain(mblk_t *); 120 static void ip_drop_packet_chain(mblk_t *, boolean_t, ill_t *, ire_t *, 121 struct kstat_named *, ipdropper_t *); 122 static boolean_t ipsec_kstat_init(ipsec_stack_t *); 123 static void ipsec_kstat_destroy(ipsec_stack_t *); 124 static int ipsec_free_tables(ipsec_stack_t *); 125 static int tunnel_compare(const void *, const void *); 126 static void ipsec_freemsg_chain(mblk_t *); 127 static void ip_drop_packet_chain(mblk_t *, boolean_t, ill_t *, ire_t *, 128 struct kstat_named *, ipdropper_t *); 129 130 /* 131 * Selector hash table is statically sized at module load time. 132 * we default to 251 buckets, which is the largest prime number under 255 133 */ 134 135 #define IPSEC_SPDHASH_DEFAULT 251 136 137 /* SPD hash-size tunable per tunnel. */ 138 #define TUN_SPDHASH_DEFAULT 5 139 140 uint32_t ipsec_spd_hashsize; 141 uint32_t tun_spd_hashsize; 142 143 #define IPSEC_SEL_NOHASH ((uint32_t)(~0)) 144 145 /* 146 * Handle global across all stack instances 147 */ 148 static crypto_notify_handle_t prov_update_handle = NULL; 149 150 static kmem_cache_t *ipsec_action_cache; 151 static kmem_cache_t *ipsec_sel_cache; 152 static kmem_cache_t *ipsec_pol_cache; 153 static kmem_cache_t *ipsec_info_cache; 154 155 /* Frag cache prototypes */ 156 static void ipsec_fragcache_clean(ipsec_fragcache_t *); 157 static ipsec_fragcache_entry_t *fragcache_delentry(int, 158 ipsec_fragcache_entry_t *, ipsec_fragcache_t *); 159 boolean_t ipsec_fragcache_init(ipsec_fragcache_t *); 160 void ipsec_fragcache_uninit(ipsec_fragcache_t *); 161 mblk_t *ipsec_fragcache_add(ipsec_fragcache_t *, mblk_t *, mblk_t *, int, 162 ipsec_stack_t *); 163 164 int ipsec_hdr_pullup_needed = 0; 165 int ipsec_weird_null_inbound_policy = 0; 166 167 #define ALGBITS_ROUND_DOWN(x, align) (((x)/(align))*(align)) 168 #define ALGBITS_ROUND_UP(x, align) ALGBITS_ROUND_DOWN((x)+(align)-1, align) 169 170 /* 171 * Inbound traffic should have matching identities for both SA's. 172 */ 173 174 #define SA_IDS_MATCH(sa1, sa2) \ 175 (((sa1) == NULL) || ((sa2) == NULL) || \ 176 (((sa1)->ipsa_src_cid == (sa2)->ipsa_src_cid) && \ 177 (((sa1)->ipsa_dst_cid == (sa2)->ipsa_dst_cid)))) 178 179 /* 180 * IPv6 Fragments 181 */ 182 #define IS_V6_FRAGMENT(ipp) (ipp.ipp_fields & IPPF_FRAGHDR) 183 184 /* 185 * Policy failure messages. 186 */ 187 static char *ipsec_policy_failure_msgs[] = { 188 189 /* IPSEC_POLICY_NOT_NEEDED */ 190 "%s: Dropping the datagram because the incoming packet " 191 "is %s, but the recipient expects clear; Source %s, " 192 "Destination %s.\n", 193 194 /* IPSEC_POLICY_MISMATCH */ 195 "%s: Policy Failure for the incoming packet (%s); Source %s, " 196 "Destination %s.\n", 197 198 /* IPSEC_POLICY_AUTH_NOT_NEEDED */ 199 "%s: Authentication present while not expected in the " 200 "incoming %s packet; Source %s, Destination %s.\n", 201 202 /* IPSEC_POLICY_ENCR_NOT_NEEDED */ 203 "%s: Encryption present while not expected in the " 204 "incoming %s packet; Source %s, Destination %s.\n", 205 206 /* IPSEC_POLICY_SE_NOT_NEEDED */ 207 "%s: Self-Encapsulation present while not expected in the " 208 "incoming %s packet; Source %s, Destination %s.\n", 209 }; 210 211 /* 212 * General overviews: 213 * 214 * Locking: 215 * 216 * All of the system policy structures are protected by a single 217 * rwlock. These structures are threaded in a 218 * fairly complex fashion and are not expected to change on a 219 * regular basis, so this should not cause scaling/contention 220 * problems. As a result, policy checks should (hopefully) be MT-hot. 221 * 222 * Allocation policy: 223 * 224 * We use custom kmem cache types for the various 225 * bits & pieces of the policy data structures. All allocations 226 * use KM_NOSLEEP instead of KM_SLEEP for policy allocation. The 227 * policy table is of potentially unbounded size, so we don't 228 * want to provide a way to hog all system memory with policy 229 * entries.. 230 */ 231 232 /* Convenient functions for freeing or dropping a b_next linked mblk chain */ 233 234 /* Free all messages in an mblk chain */ 235 static void 236 ipsec_freemsg_chain(mblk_t *mp) 237 { 238 mblk_t *mpnext; 239 while (mp != NULL) { 240 ASSERT(mp->b_prev == NULL); 241 mpnext = mp->b_next; 242 mp->b_next = NULL; 243 freemsg(mp); /* Always works, even if NULL */ 244 mp = mpnext; 245 } 246 } 247 248 /* ip_drop all messages in an mblk chain */ 249 static void 250 ip_drop_packet_chain(mblk_t *mp, boolean_t inbound, ill_t *arriving, 251 ire_t *outbound_ire, struct kstat_named *counter, ipdropper_t *who_called) 252 { 253 mblk_t *mpnext; 254 while (mp != NULL) { 255 ASSERT(mp->b_prev == NULL); 256 mpnext = mp->b_next; 257 mp->b_next = NULL; 258 ip_drop_packet(mp, inbound, arriving, outbound_ire, counter, 259 who_called); 260 mp = mpnext; 261 } 262 } 263 264 /* 265 * AVL tree comparison function. 266 * the in-kernel avl assumes unique keys for all objects. 267 * Since sometimes policy will duplicate rules, we may insert 268 * multiple rules with the same rule id, so we need a tie-breaker. 269 */ 270 static int 271 ipsec_policy_cmpbyid(const void *a, const void *b) 272 { 273 const ipsec_policy_t *ipa, *ipb; 274 uint64_t idxa, idxb; 275 276 ipa = (const ipsec_policy_t *)a; 277 ipb = (const ipsec_policy_t *)b; 278 idxa = ipa->ipsp_index; 279 idxb = ipb->ipsp_index; 280 281 if (idxa < idxb) 282 return (-1); 283 if (idxa > idxb) 284 return (1); 285 /* 286 * Tie-breaker #1: All installed policy rules have a non-NULL 287 * ipsl_sel (selector set), so an entry with a NULL ipsp_sel is not 288 * actually in-tree but rather a template node being used in 289 * an avl_find query; see ipsec_policy_delete(). This gives us 290 * a placeholder in the ordering just before the the first entry with 291 * a key >= the one we're looking for, so we can walk forward from 292 * that point to get the remaining entries with the same id. 293 */ 294 if ((ipa->ipsp_sel == NULL) && (ipb->ipsp_sel != NULL)) 295 return (-1); 296 if ((ipb->ipsp_sel == NULL) && (ipa->ipsp_sel != NULL)) 297 return (1); 298 /* 299 * At most one of the arguments to the comparison should have a 300 * NULL selector pointer; if not, the tree is broken. 301 */ 302 ASSERT(ipa->ipsp_sel != NULL); 303 ASSERT(ipb->ipsp_sel != NULL); 304 /* 305 * Tie-breaker #2: use the virtual address of the policy node 306 * to arbitrarily break ties. Since we use the new tree node in 307 * the avl_find() in ipsec_insert_always, the new node will be 308 * inserted into the tree in the right place in the sequence. 309 */ 310 if (ipa < ipb) 311 return (-1); 312 if (ipa > ipb) 313 return (1); 314 return (0); 315 } 316 317 /* 318 * Free what ipsec_alloc_table allocated. 319 */ 320 void 321 ipsec_polhead_free_table(ipsec_policy_head_t *iph) 322 { 323 int dir; 324 int i; 325 326 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 327 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 328 329 if (ipr->ipr_hash == NULL) 330 continue; 331 332 for (i = 0; i < ipr->ipr_nchains; i++) { 333 ASSERT(ipr->ipr_hash[i].hash_head == NULL); 334 } 335 kmem_free(ipr->ipr_hash, ipr->ipr_nchains * 336 sizeof (ipsec_policy_hash_t)); 337 ipr->ipr_hash = NULL; 338 } 339 } 340 341 void 342 ipsec_polhead_destroy(ipsec_policy_head_t *iph) 343 { 344 int dir; 345 346 avl_destroy(&iph->iph_rulebyid); 347 rw_destroy(&iph->iph_lock); 348 349 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 350 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 351 int chain; 352 353 for (chain = 0; chain < ipr->ipr_nchains; chain++) 354 mutex_destroy(&(ipr->ipr_hash[chain].hash_lock)); 355 356 } 357 ipsec_polhead_free_table(iph); 358 } 359 360 /* 361 * Free the IPsec stack instance. 362 */ 363 /* ARGSUSED */ 364 static void 365 ipsec_stack_fini(netstackid_t stackid, void *arg) 366 { 367 ipsec_stack_t *ipss = (ipsec_stack_t *)arg; 368 void *cookie; 369 ipsec_tun_pol_t *node; 370 netstack_t *ns = ipss->ipsec_netstack; 371 int i; 372 ipsec_algtype_t algtype; 373 374 ipsec_loader_destroy(ipss); 375 376 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_WRITER); 377 /* 378 * It's possible we can just ASSERT() the tree is empty. After all, 379 * we aren't called until IP is ready to unload (and presumably all 380 * tunnels have been unplumbed). But we'll play it safe for now, the 381 * loop will just exit immediately if it's empty. 382 */ 383 cookie = NULL; 384 while ((node = (ipsec_tun_pol_t *) 385 avl_destroy_nodes(&ipss->ipsec_tunnel_policies, 386 &cookie)) != NULL) { 387 ITP_REFRELE(node, ns); 388 } 389 avl_destroy(&ipss->ipsec_tunnel_policies); 390 rw_exit(&ipss->ipsec_tunnel_policy_lock); 391 rw_destroy(&ipss->ipsec_tunnel_policy_lock); 392 393 ipsec_config_flush(ns); 394 395 ipsec_kstat_destroy(ipss); 396 397 ip_drop_unregister(&ipss->ipsec_dropper); 398 399 ip_drop_unregister(&ipss->ipsec_spd_dropper); 400 ip_drop_destroy(ipss); 401 /* 402 * Globals start with ref == 1 to prevent IPPH_REFRELE() from 403 * attempting to free them, hence they should have 1 now. 404 */ 405 ipsec_polhead_destroy(&ipss->ipsec_system_policy); 406 ASSERT(ipss->ipsec_system_policy.iph_refs == 1); 407 ipsec_polhead_destroy(&ipss->ipsec_inactive_policy); 408 ASSERT(ipss->ipsec_inactive_policy.iph_refs == 1); 409 410 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) { 411 ipsec_action_free_table(ipss->ipsec_action_hash[i].hash_head); 412 ipss->ipsec_action_hash[i].hash_head = NULL; 413 mutex_destroy(&(ipss->ipsec_action_hash[i].hash_lock)); 414 } 415 416 for (i = 0; i < ipss->ipsec_spd_hashsize; i++) { 417 ASSERT(ipss->ipsec_sel_hash[i].hash_head == NULL); 418 mutex_destroy(&(ipss->ipsec_sel_hash[i].hash_lock)); 419 } 420 421 mutex_enter(&ipss->ipsec_alg_lock); 422 for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype ++) { 423 int nalgs = ipss->ipsec_nalgs[algtype]; 424 425 for (i = 0; i < nalgs; i++) { 426 if (ipss->ipsec_alglists[algtype][i] != NULL) 427 ipsec_alg_unreg(algtype, i, ns); 428 } 429 } 430 mutex_exit(&ipss->ipsec_alg_lock); 431 mutex_destroy(&ipss->ipsec_alg_lock); 432 433 ipsid_gc(ns); 434 ipsid_fini(ns); 435 436 (void) ipsec_free_tables(ipss); 437 kmem_free(ipss, sizeof (*ipss)); 438 } 439 440 void 441 ipsec_policy_g_destroy(void) 442 { 443 kmem_cache_destroy(ipsec_action_cache); 444 kmem_cache_destroy(ipsec_sel_cache); 445 kmem_cache_destroy(ipsec_pol_cache); 446 kmem_cache_destroy(ipsec_info_cache); 447 448 ipsec_unregister_prov_update(); 449 450 netstack_unregister(NS_IPSEC); 451 } 452 453 454 /* 455 * Free what ipsec_alloc_tables allocated. 456 * Called when table allocation fails to free the table. 457 */ 458 static int 459 ipsec_free_tables(ipsec_stack_t *ipss) 460 { 461 int i; 462 463 if (ipss->ipsec_sel_hash != NULL) { 464 for (i = 0; i < ipss->ipsec_spd_hashsize; i++) { 465 ASSERT(ipss->ipsec_sel_hash[i].hash_head == NULL); 466 } 467 kmem_free(ipss->ipsec_sel_hash, ipss->ipsec_spd_hashsize * 468 sizeof (*ipss->ipsec_sel_hash)); 469 ipss->ipsec_sel_hash = NULL; 470 ipss->ipsec_spd_hashsize = 0; 471 } 472 ipsec_polhead_free_table(&ipss->ipsec_system_policy); 473 ipsec_polhead_free_table(&ipss->ipsec_inactive_policy); 474 475 return (ENOMEM); 476 } 477 478 /* 479 * Attempt to allocate the tables in a single policy head. 480 * Return nonzero on failure after cleaning up any work in progress. 481 */ 482 int 483 ipsec_alloc_table(ipsec_policy_head_t *iph, int nchains, int kmflag, 484 boolean_t global_cleanup, netstack_t *ns) 485 { 486 int dir; 487 488 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 489 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 490 491 ipr->ipr_nchains = nchains; 492 ipr->ipr_hash = kmem_zalloc(nchains * 493 sizeof (ipsec_policy_hash_t), kmflag); 494 if (ipr->ipr_hash == NULL) 495 return (global_cleanup ? 496 ipsec_free_tables(ns->netstack_ipsec) : 497 ENOMEM); 498 } 499 return (0); 500 } 501 502 /* 503 * Attempt to allocate the various tables. Return nonzero on failure 504 * after cleaning up any work in progress. 505 */ 506 static int 507 ipsec_alloc_tables(int kmflag, netstack_t *ns) 508 { 509 int error; 510 ipsec_stack_t *ipss = ns->netstack_ipsec; 511 512 error = ipsec_alloc_table(&ipss->ipsec_system_policy, 513 ipss->ipsec_spd_hashsize, kmflag, B_TRUE, ns); 514 if (error != 0) 515 return (error); 516 517 error = ipsec_alloc_table(&ipss->ipsec_inactive_policy, 518 ipss->ipsec_spd_hashsize, kmflag, B_TRUE, ns); 519 if (error != 0) 520 return (error); 521 522 ipss->ipsec_sel_hash = kmem_zalloc(ipss->ipsec_spd_hashsize * 523 sizeof (*ipss->ipsec_sel_hash), kmflag); 524 525 if (ipss->ipsec_sel_hash == NULL) 526 return (ipsec_free_tables(ipss)); 527 528 return (0); 529 } 530 531 /* 532 * After table allocation, initialize a policy head. 533 */ 534 void 535 ipsec_polhead_init(ipsec_policy_head_t *iph, int nchains) 536 { 537 int dir, chain; 538 539 rw_init(&iph->iph_lock, NULL, RW_DEFAULT, NULL); 540 avl_create(&iph->iph_rulebyid, ipsec_policy_cmpbyid, 541 sizeof (ipsec_policy_t), offsetof(ipsec_policy_t, ipsp_byid)); 542 543 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 544 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 545 ipr->ipr_nchains = nchains; 546 547 for (chain = 0; chain < nchains; chain++) { 548 mutex_init(&(ipr->ipr_hash[chain].hash_lock), 549 NULL, MUTEX_DEFAULT, NULL); 550 } 551 } 552 } 553 554 static boolean_t 555 ipsec_kstat_init(ipsec_stack_t *ipss) 556 { 557 ipss->ipsec_ksp = kstat_create_netstack("ip", 0, "ipsec_stat", "net", 558 KSTAT_TYPE_NAMED, sizeof (ipsec_kstats_t) / sizeof (kstat_named_t), 559 KSTAT_FLAG_PERSISTENT, ipss->ipsec_netstack->netstack_stackid); 560 561 if (ipss->ipsec_ksp == NULL || ipss->ipsec_ksp->ks_data == NULL) 562 return (B_FALSE); 563 564 ipss->ipsec_kstats = ipss->ipsec_ksp->ks_data; 565 566 #define KI(x) kstat_named_init(&ipss->ipsec_kstats->x, #x, KSTAT_DATA_UINT64) 567 KI(esp_stat_in_requests); 568 KI(esp_stat_in_discards); 569 KI(esp_stat_lookup_failure); 570 KI(ah_stat_in_requests); 571 KI(ah_stat_in_discards); 572 KI(ah_stat_lookup_failure); 573 KI(sadb_acquire_maxpackets); 574 KI(sadb_acquire_qhiwater); 575 #undef KI 576 577 kstat_install(ipss->ipsec_ksp); 578 return (B_TRUE); 579 } 580 581 static void 582 ipsec_kstat_destroy(ipsec_stack_t *ipss) 583 { 584 kstat_delete_netstack(ipss->ipsec_ksp, 585 ipss->ipsec_netstack->netstack_stackid); 586 ipss->ipsec_kstats = NULL; 587 588 } 589 590 /* 591 * Initialize the IPsec stack instance. 592 */ 593 /* ARGSUSED */ 594 static void * 595 ipsec_stack_init(netstackid_t stackid, netstack_t *ns) 596 { 597 ipsec_stack_t *ipss; 598 int i; 599 600 ipss = (ipsec_stack_t *)kmem_zalloc(sizeof (*ipss), KM_SLEEP); 601 ipss->ipsec_netstack = ns; 602 603 /* 604 * FIXME: netstack_ipsec is used by some of the routines we call 605 * below, but it isn't set until this routine returns. 606 * Either we introduce optional xxx_stack_alloc() functions 607 * that will be called by the netstack framework before xxx_stack_init, 608 * or we switch spd.c and sadb.c to operate on ipsec_stack_t 609 * (latter has some include file order issues for sadb.h, but makes 610 * sense if we merge some of the ipsec related stack_t's together. 611 */ 612 ns->netstack_ipsec = ipss; 613 614 /* 615 * Make two attempts to allocate policy hash tables; try it at 616 * the "preferred" size (may be set in /etc/system) first, 617 * then fall back to the default size. 618 */ 619 ipss->ipsec_spd_hashsize = (ipsec_spd_hashsize == 0) ? 620 IPSEC_SPDHASH_DEFAULT : ipsec_spd_hashsize; 621 622 if (ipsec_alloc_tables(KM_NOSLEEP, ns) != 0) { 623 cmn_err(CE_WARN, 624 "Unable to allocate %d entry IPsec policy hash table", 625 ipss->ipsec_spd_hashsize); 626 ipss->ipsec_spd_hashsize = IPSEC_SPDHASH_DEFAULT; 627 cmn_err(CE_WARN, "Falling back to %d entries", 628 ipss->ipsec_spd_hashsize); 629 (void) ipsec_alloc_tables(KM_SLEEP, ns); 630 } 631 632 /* Just set a default for tunnels. */ 633 ipss->ipsec_tun_spd_hashsize = (tun_spd_hashsize == 0) ? 634 TUN_SPDHASH_DEFAULT : tun_spd_hashsize; 635 636 ipsid_init(ns); 637 /* 638 * Globals need ref == 1 to prevent IPPH_REFRELE() from attempting 639 * to free them. 640 */ 641 ipss->ipsec_system_policy.iph_refs = 1; 642 ipss->ipsec_inactive_policy.iph_refs = 1; 643 ipsec_polhead_init(&ipss->ipsec_system_policy, 644 ipss->ipsec_spd_hashsize); 645 ipsec_polhead_init(&ipss->ipsec_inactive_policy, 646 ipss->ipsec_spd_hashsize); 647 rw_init(&ipss->ipsec_tunnel_policy_lock, NULL, RW_DEFAULT, NULL); 648 avl_create(&ipss->ipsec_tunnel_policies, tunnel_compare, 649 sizeof (ipsec_tun_pol_t), 0); 650 651 ipss->ipsec_next_policy_index = 1; 652 653 rw_init(&ipss->ipsec_system_policy.iph_lock, NULL, RW_DEFAULT, NULL); 654 rw_init(&ipss->ipsec_inactive_policy.iph_lock, NULL, RW_DEFAULT, NULL); 655 656 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) 657 mutex_init(&(ipss->ipsec_action_hash[i].hash_lock), 658 NULL, MUTEX_DEFAULT, NULL); 659 660 for (i = 0; i < ipss->ipsec_spd_hashsize; i++) 661 mutex_init(&(ipss->ipsec_sel_hash[i].hash_lock), 662 NULL, MUTEX_DEFAULT, NULL); 663 664 mutex_init(&ipss->ipsec_alg_lock, NULL, MUTEX_DEFAULT, NULL); 665 for (i = 0; i < IPSEC_NALGTYPES; i++) { 666 ipss->ipsec_nalgs[i] = 0; 667 } 668 669 ip_drop_init(ipss); 670 ip_drop_register(&ipss->ipsec_spd_dropper, "IPsec SPD"); 671 672 /* IP's IPsec code calls the packet dropper */ 673 ip_drop_register(&ipss->ipsec_dropper, "IP IPsec processing"); 674 675 (void) ipsec_kstat_init(ipss); 676 677 ipsec_loader_init(ipss); 678 ipsec_loader_start(ipss); 679 680 return (ipss); 681 } 682 683 /* Global across all stack instances */ 684 void 685 ipsec_policy_g_init(void) 686 { 687 ipsec_action_cache = kmem_cache_create("ipsec_actions", 688 sizeof (ipsec_action_t), _POINTER_ALIGNMENT, NULL, NULL, 689 ipsec_action_reclaim, NULL, NULL, 0); 690 ipsec_sel_cache = kmem_cache_create("ipsec_selectors", 691 sizeof (ipsec_sel_t), _POINTER_ALIGNMENT, NULL, NULL, 692 NULL, NULL, NULL, 0); 693 ipsec_pol_cache = kmem_cache_create("ipsec_policy", 694 sizeof (ipsec_policy_t), _POINTER_ALIGNMENT, NULL, NULL, 695 NULL, NULL, NULL, 0); 696 ipsec_info_cache = kmem_cache_create("ipsec_info", 697 sizeof (ipsec_info_t), _POINTER_ALIGNMENT, NULL, NULL, 698 NULL, NULL, NULL, 0); 699 700 /* 701 * We want to be informed each time a stack is created or 702 * destroyed in the kernel, so we can maintain the 703 * set of ipsec_stack_t's. 704 */ 705 netstack_register(NS_IPSEC, ipsec_stack_init, NULL, ipsec_stack_fini); 706 } 707 708 /* 709 * Sort algorithm lists. 710 * 711 * I may need to split this based on 712 * authentication/encryption, and I may wish to have an administrator 713 * configure this list. Hold on to some NDD variables... 714 * 715 * XXX For now, sort on minimum key size (GAG!). While minimum key size is 716 * not the ideal metric, it's the only quantifiable measure available. 717 * We need a better metric for sorting algorithms by preference. 718 */ 719 static void 720 alg_insert_sortlist(enum ipsec_algtype at, uint8_t algid, netstack_t *ns) 721 { 722 ipsec_stack_t *ipss = ns->netstack_ipsec; 723 ipsec_alginfo_t *ai = ipss->ipsec_alglists[at][algid]; 724 uint8_t holder, swap; 725 uint_t i; 726 uint_t count = ipss->ipsec_nalgs[at]; 727 ASSERT(ai != NULL); 728 ASSERT(algid == ai->alg_id); 729 730 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 731 732 holder = algid; 733 734 for (i = 0; i < count - 1; i++) { 735 ipsec_alginfo_t *alt; 736 737 alt = ipss->ipsec_alglists[at][ipss->ipsec_sortlist[at][i]]; 738 /* 739 * If you want to give precedence to newly added algs, 740 * add the = in the > comparison. 741 */ 742 if ((holder != algid) || (ai->alg_minbits > alt->alg_minbits)) { 743 /* Swap sortlist[i] and holder. */ 744 swap = ipss->ipsec_sortlist[at][i]; 745 ipss->ipsec_sortlist[at][i] = holder; 746 holder = swap; 747 ai = alt; 748 } /* Else just continue. */ 749 } 750 751 /* Store holder in last slot. */ 752 ipss->ipsec_sortlist[at][i] = holder; 753 } 754 755 /* 756 * Remove an algorithm from a sorted algorithm list. 757 * This should be considerably easier, even with complex sorting. 758 */ 759 static void 760 alg_remove_sortlist(enum ipsec_algtype at, uint8_t algid, netstack_t *ns) 761 { 762 boolean_t copyback = B_FALSE; 763 int i; 764 ipsec_stack_t *ipss = ns->netstack_ipsec; 765 int newcount = ipss->ipsec_nalgs[at]; 766 767 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 768 769 for (i = 0; i <= newcount; i++) { 770 if (copyback) { 771 ipss->ipsec_sortlist[at][i-1] = 772 ipss->ipsec_sortlist[at][i]; 773 } else if (ipss->ipsec_sortlist[at][i] == algid) { 774 copyback = B_TRUE; 775 } 776 } 777 } 778 779 /* 780 * Add the specified algorithm to the algorithm tables. 781 * Must be called while holding the algorithm table writer lock. 782 */ 783 void 784 ipsec_alg_reg(ipsec_algtype_t algtype, ipsec_alginfo_t *alg, netstack_t *ns) 785 { 786 ipsec_stack_t *ipss = ns->netstack_ipsec; 787 788 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 789 790 ASSERT(ipss->ipsec_alglists[algtype][alg->alg_id] == NULL); 791 ipsec_alg_fix_min_max(alg, algtype, ns); 792 ipss->ipsec_alglists[algtype][alg->alg_id] = alg; 793 794 ipss->ipsec_nalgs[algtype]++; 795 alg_insert_sortlist(algtype, alg->alg_id, ns); 796 } 797 798 /* 799 * Remove the specified algorithm from the algorithm tables. 800 * Must be called while holding the algorithm table writer lock. 801 */ 802 void 803 ipsec_alg_unreg(ipsec_algtype_t algtype, uint8_t algid, netstack_t *ns) 804 { 805 ipsec_stack_t *ipss = ns->netstack_ipsec; 806 807 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 808 809 ASSERT(ipss->ipsec_alglists[algtype][algid] != NULL); 810 ipsec_alg_free(ipss->ipsec_alglists[algtype][algid]); 811 ipss->ipsec_alglists[algtype][algid] = NULL; 812 813 ipss->ipsec_nalgs[algtype]--; 814 alg_remove_sortlist(algtype, algid, ns); 815 } 816 817 /* 818 * Hooks for spdsock to get a grip on system policy. 819 */ 820 821 ipsec_policy_head_t * 822 ipsec_system_policy(netstack_t *ns) 823 { 824 ipsec_stack_t *ipss = ns->netstack_ipsec; 825 ipsec_policy_head_t *h = &ipss->ipsec_system_policy; 826 827 IPPH_REFHOLD(h); 828 return (h); 829 } 830 831 ipsec_policy_head_t * 832 ipsec_inactive_policy(netstack_t *ns) 833 { 834 ipsec_stack_t *ipss = ns->netstack_ipsec; 835 ipsec_policy_head_t *h = &ipss->ipsec_inactive_policy; 836 837 IPPH_REFHOLD(h); 838 return (h); 839 } 840 841 /* 842 * Lock inactive policy, then active policy, then exchange policy root 843 * pointers. 844 */ 845 void 846 ipsec_swap_policy(ipsec_policy_head_t *active, ipsec_policy_head_t *inactive, 847 netstack_t *ns) 848 { 849 int af, dir; 850 avl_tree_t r1, r2; 851 852 rw_enter(&inactive->iph_lock, RW_WRITER); 853 rw_enter(&active->iph_lock, RW_WRITER); 854 855 r1 = active->iph_rulebyid; 856 r2 = inactive->iph_rulebyid; 857 active->iph_rulebyid = r2; 858 inactive->iph_rulebyid = r1; 859 860 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 861 ipsec_policy_hash_t *h1, *h2; 862 863 h1 = active->iph_root[dir].ipr_hash; 864 h2 = inactive->iph_root[dir].ipr_hash; 865 active->iph_root[dir].ipr_hash = h2; 866 inactive->iph_root[dir].ipr_hash = h1; 867 868 for (af = 0; af < IPSEC_NAF; af++) { 869 ipsec_policy_t *t1, *t2; 870 871 t1 = active->iph_root[dir].ipr_nonhash[af]; 872 t2 = inactive->iph_root[dir].ipr_nonhash[af]; 873 active->iph_root[dir].ipr_nonhash[af] = t2; 874 inactive->iph_root[dir].ipr_nonhash[af] = t1; 875 if (t1 != NULL) { 876 t1->ipsp_hash.hash_pp = 877 &(inactive->iph_root[dir].ipr_nonhash[af]); 878 } 879 if (t2 != NULL) { 880 t2->ipsp_hash.hash_pp = 881 &(active->iph_root[dir].ipr_nonhash[af]); 882 } 883 884 } 885 } 886 active->iph_gen++; 887 inactive->iph_gen++; 888 ipsec_update_present_flags(ns->netstack_ipsec); 889 rw_exit(&active->iph_lock); 890 rw_exit(&inactive->iph_lock); 891 } 892 893 /* 894 * Swap global policy primary/secondary. 895 */ 896 void 897 ipsec_swap_global_policy(netstack_t *ns) 898 { 899 ipsec_stack_t *ipss = ns->netstack_ipsec; 900 901 ipsec_swap_policy(&ipss->ipsec_system_policy, 902 &ipss->ipsec_inactive_policy, ns); 903 } 904 905 /* 906 * Clone one policy rule.. 907 */ 908 static ipsec_policy_t * 909 ipsec_copy_policy(const ipsec_policy_t *src) 910 { 911 ipsec_policy_t *dst = kmem_cache_alloc(ipsec_pol_cache, KM_NOSLEEP); 912 913 if (dst == NULL) 914 return (NULL); 915 916 /* 917 * Adjust refcounts of cloned state. 918 */ 919 IPACT_REFHOLD(src->ipsp_act); 920 src->ipsp_sel->ipsl_refs++; 921 922 HASH_NULL(dst, ipsp_hash); 923 dst->ipsp_refs = 1; 924 dst->ipsp_sel = src->ipsp_sel; 925 dst->ipsp_act = src->ipsp_act; 926 dst->ipsp_prio = src->ipsp_prio; 927 dst->ipsp_index = src->ipsp_index; 928 929 return (dst); 930 } 931 932 void 933 ipsec_insert_always(avl_tree_t *tree, void *new_node) 934 { 935 void *node; 936 avl_index_t where; 937 938 node = avl_find(tree, new_node, &where); 939 ASSERT(node == NULL); 940 avl_insert(tree, new_node, where); 941 } 942 943 944 static int 945 ipsec_copy_chain(ipsec_policy_head_t *dph, ipsec_policy_t *src, 946 ipsec_policy_t **dstp) 947 { 948 for (; src != NULL; src = src->ipsp_hash.hash_next) { 949 ipsec_policy_t *dst = ipsec_copy_policy(src); 950 if (dst == NULL) 951 return (ENOMEM); 952 953 HASHLIST_INSERT(dst, ipsp_hash, *dstp); 954 ipsec_insert_always(&dph->iph_rulebyid, dst); 955 } 956 return (0); 957 } 958 959 960 961 /* 962 * Make one policy head look exactly like another. 963 * 964 * As with ipsec_swap_policy, we lock the destination policy head first, then 965 * the source policy head. Note that we only need to read-lock the source 966 * policy head as we are not changing it. 967 */ 968 int 969 ipsec_copy_polhead(ipsec_policy_head_t *sph, ipsec_policy_head_t *dph, 970 netstack_t *ns) 971 { 972 int af, dir, chain, nchains; 973 974 rw_enter(&dph->iph_lock, RW_WRITER); 975 976 ipsec_polhead_flush(dph, ns); 977 978 rw_enter(&sph->iph_lock, RW_READER); 979 980 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 981 ipsec_policy_root_t *dpr = &dph->iph_root[dir]; 982 ipsec_policy_root_t *spr = &sph->iph_root[dir]; 983 nchains = dpr->ipr_nchains; 984 985 ASSERT(dpr->ipr_nchains == spr->ipr_nchains); 986 987 for (af = 0; af < IPSEC_NAF; af++) { 988 if (ipsec_copy_chain(dph, spr->ipr_nonhash[af], 989 &dpr->ipr_nonhash[af])) 990 goto abort_copy; 991 } 992 993 for (chain = 0; chain < nchains; chain++) { 994 if (ipsec_copy_chain(dph, 995 spr->ipr_hash[chain].hash_head, 996 &dpr->ipr_hash[chain].hash_head)) 997 goto abort_copy; 998 } 999 } 1000 1001 dph->iph_gen++; 1002 1003 rw_exit(&sph->iph_lock); 1004 rw_exit(&dph->iph_lock); 1005 return (0); 1006 1007 abort_copy: 1008 ipsec_polhead_flush(dph, ns); 1009 rw_exit(&sph->iph_lock); 1010 rw_exit(&dph->iph_lock); 1011 return (ENOMEM); 1012 } 1013 1014 /* 1015 * Clone currently active policy to the inactive policy list. 1016 */ 1017 int 1018 ipsec_clone_system_policy(netstack_t *ns) 1019 { 1020 ipsec_stack_t *ipss = ns->netstack_ipsec; 1021 1022 return (ipsec_copy_polhead(&ipss->ipsec_system_policy, 1023 &ipss->ipsec_inactive_policy, ns)); 1024 } 1025 1026 /* 1027 * Extract the string from ipsec_policy_failure_msgs[type] and 1028 * log it. 1029 * 1030 */ 1031 void 1032 ipsec_log_policy_failure(int type, char *func_name, ipha_t *ipha, ip6_t *ip6h, 1033 boolean_t secure, netstack_t *ns) 1034 { 1035 char sbuf[INET6_ADDRSTRLEN]; 1036 char dbuf[INET6_ADDRSTRLEN]; 1037 char *s; 1038 char *d; 1039 ipsec_stack_t *ipss = ns->netstack_ipsec; 1040 1041 ASSERT((ipha == NULL && ip6h != NULL) || 1042 (ip6h == NULL && ipha != NULL)); 1043 1044 if (ipha != NULL) { 1045 s = inet_ntop(AF_INET, &ipha->ipha_src, sbuf, sizeof (sbuf)); 1046 d = inet_ntop(AF_INET, &ipha->ipha_dst, dbuf, sizeof (dbuf)); 1047 } else { 1048 s = inet_ntop(AF_INET6, &ip6h->ip6_src, sbuf, sizeof (sbuf)); 1049 d = inet_ntop(AF_INET6, &ip6h->ip6_dst, dbuf, sizeof (dbuf)); 1050 1051 } 1052 1053 /* Always bump the policy failure counter. */ 1054 ipss->ipsec_policy_failure_count[type]++; 1055 1056 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, SL_ERROR|SL_WARN|SL_CONSOLE, 1057 ipsec_policy_failure_msgs[type], func_name, 1058 (secure ? "secure" : "not secure"), s, d); 1059 } 1060 1061 /* 1062 * Rate-limiting front-end to strlog() for AH and ESP. Uses the ndd variables 1063 * in /dev/ip and the same rate-limiting clock so that there's a single 1064 * knob to turn to throttle the rate of messages. 1065 */ 1066 void 1067 ipsec_rl_strlog(netstack_t *ns, short mid, short sid, char level, ushort_t sl, 1068 char *fmt, ...) 1069 { 1070 va_list adx; 1071 hrtime_t current = gethrtime(); 1072 ip_stack_t *ipst = ns->netstack_ip; 1073 ipsec_stack_t *ipss = ns->netstack_ipsec; 1074 1075 sl |= SL_CONSOLE; 1076 /* 1077 * Throttle logging to stop syslog from being swamped. If variable 1078 * 'ipsec_policy_log_interval' is zero, don't log any messages at 1079 * all, otherwise log only one message every 'ipsec_policy_log_interval' 1080 * msec. Convert interval (in msec) to hrtime (in nsec). 1081 */ 1082 1083 if (ipst->ips_ipsec_policy_log_interval) { 1084 if (ipss->ipsec_policy_failure_last + 1085 ((hrtime_t)ipst->ips_ipsec_policy_log_interval * 1086 (hrtime_t)1000000) <= current) { 1087 va_start(adx, fmt); 1088 (void) vstrlog(mid, sid, level, sl, fmt, adx); 1089 va_end(adx); 1090 ipss->ipsec_policy_failure_last = current; 1091 } 1092 } 1093 } 1094 1095 void 1096 ipsec_config_flush(netstack_t *ns) 1097 { 1098 ipsec_stack_t *ipss = ns->netstack_ipsec; 1099 1100 rw_enter(&ipss->ipsec_system_policy.iph_lock, RW_WRITER); 1101 ipsec_polhead_flush(&ipss->ipsec_system_policy, ns); 1102 ipss->ipsec_next_policy_index = 1; 1103 rw_exit(&ipss->ipsec_system_policy.iph_lock); 1104 ipsec_action_reclaim_stack(ns); 1105 } 1106 1107 /* 1108 * Clip a policy's min/max keybits vs. the capabilities of the 1109 * algorithm. 1110 */ 1111 static void 1112 act_alg_adjust(uint_t algtype, uint_t algid, 1113 uint16_t *minbits, uint16_t *maxbits, netstack_t *ns) 1114 { 1115 ipsec_stack_t *ipss = ns->netstack_ipsec; 1116 ipsec_alginfo_t *algp = ipss->ipsec_alglists[algtype][algid]; 1117 1118 if (algp != NULL) { 1119 /* 1120 * If passed-in minbits is zero, we assume the caller trusts 1121 * us with setting the minimum key size. We pick the 1122 * algorithms DEFAULT key size for the minimum in this case. 1123 */ 1124 if (*minbits == 0) { 1125 *minbits = algp->alg_default_bits; 1126 ASSERT(*minbits >= algp->alg_minbits); 1127 } else { 1128 *minbits = MAX(MIN(*minbits, algp->alg_maxbits), 1129 algp->alg_minbits); 1130 } 1131 if (*maxbits == 0) 1132 *maxbits = algp->alg_maxbits; 1133 else 1134 *maxbits = MIN(MAX(*maxbits, algp->alg_minbits), 1135 algp->alg_maxbits); 1136 ASSERT(*minbits <= *maxbits); 1137 } else { 1138 *minbits = 0; 1139 *maxbits = 0; 1140 } 1141 } 1142 1143 /* 1144 * Check an action's requested algorithms against the algorithms currently 1145 * loaded in the system. 1146 */ 1147 boolean_t 1148 ipsec_check_action(ipsec_act_t *act, int *diag, netstack_t *ns) 1149 { 1150 ipsec_prot_t *ipp; 1151 ipsec_stack_t *ipss = ns->netstack_ipsec; 1152 1153 ipp = &act->ipa_apply; 1154 1155 if (ipp->ipp_use_ah && 1156 ipss->ipsec_alglists[IPSEC_ALG_AUTH][ipp->ipp_auth_alg] == NULL) { 1157 *diag = SPD_DIAGNOSTIC_UNSUPP_AH_ALG; 1158 return (B_FALSE); 1159 } 1160 if (ipp->ipp_use_espa && 1161 ipss->ipsec_alglists[IPSEC_ALG_AUTH][ipp->ipp_esp_auth_alg] == 1162 NULL) { 1163 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_AUTH_ALG; 1164 return (B_FALSE); 1165 } 1166 if (ipp->ipp_use_esp && 1167 ipss->ipsec_alglists[IPSEC_ALG_ENCR][ipp->ipp_encr_alg] == NULL) { 1168 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_ENCR_ALG; 1169 return (B_FALSE); 1170 } 1171 1172 act_alg_adjust(IPSEC_ALG_AUTH, ipp->ipp_auth_alg, 1173 &ipp->ipp_ah_minbits, &ipp->ipp_ah_maxbits, ns); 1174 act_alg_adjust(IPSEC_ALG_AUTH, ipp->ipp_esp_auth_alg, 1175 &ipp->ipp_espa_minbits, &ipp->ipp_espa_maxbits, ns); 1176 act_alg_adjust(IPSEC_ALG_ENCR, ipp->ipp_encr_alg, 1177 &ipp->ipp_espe_minbits, &ipp->ipp_espe_maxbits, ns); 1178 1179 if (ipp->ipp_ah_minbits > ipp->ipp_ah_maxbits) { 1180 *diag = SPD_DIAGNOSTIC_UNSUPP_AH_KEYSIZE; 1181 return (B_FALSE); 1182 } 1183 if (ipp->ipp_espa_minbits > ipp->ipp_espa_maxbits) { 1184 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_AUTH_KEYSIZE; 1185 return (B_FALSE); 1186 } 1187 if (ipp->ipp_espe_minbits > ipp->ipp_espe_maxbits) { 1188 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_ENCR_KEYSIZE; 1189 return (B_FALSE); 1190 } 1191 /* TODO: sanity check lifetimes */ 1192 return (B_TRUE); 1193 } 1194 1195 /* 1196 * Set up a single action during wildcard expansion.. 1197 */ 1198 static void 1199 ipsec_setup_act(ipsec_act_t *outact, ipsec_act_t *act, 1200 uint_t auth_alg, uint_t encr_alg, uint_t eauth_alg, netstack_t *ns) 1201 { 1202 ipsec_prot_t *ipp; 1203 1204 *outact = *act; 1205 ipp = &outact->ipa_apply; 1206 ipp->ipp_auth_alg = (uint8_t)auth_alg; 1207 ipp->ipp_encr_alg = (uint8_t)encr_alg; 1208 ipp->ipp_esp_auth_alg = (uint8_t)eauth_alg; 1209 1210 act_alg_adjust(IPSEC_ALG_AUTH, auth_alg, 1211 &ipp->ipp_ah_minbits, &ipp->ipp_ah_maxbits, ns); 1212 act_alg_adjust(IPSEC_ALG_AUTH, eauth_alg, 1213 &ipp->ipp_espa_minbits, &ipp->ipp_espa_maxbits, ns); 1214 act_alg_adjust(IPSEC_ALG_ENCR, encr_alg, 1215 &ipp->ipp_espe_minbits, &ipp->ipp_espe_maxbits, ns); 1216 } 1217 1218 /* 1219 * combinatoric expansion time: expand a wildcarded action into an 1220 * array of wildcarded actions; we return the exploded action list, 1221 * and return a count in *nact (output only). 1222 */ 1223 static ipsec_act_t * 1224 ipsec_act_wildcard_expand(ipsec_act_t *act, uint_t *nact, netstack_t *ns) 1225 { 1226 boolean_t use_ah, use_esp, use_espa; 1227 boolean_t wild_auth, wild_encr, wild_eauth; 1228 uint_t auth_alg, auth_idx, auth_min, auth_max; 1229 uint_t eauth_alg, eauth_idx, eauth_min, eauth_max; 1230 uint_t encr_alg, encr_idx, encr_min, encr_max; 1231 uint_t action_count, ai; 1232 ipsec_act_t *outact; 1233 ipsec_stack_t *ipss = ns->netstack_ipsec; 1234 1235 if (act->ipa_type != IPSEC_ACT_APPLY) { 1236 outact = kmem_alloc(sizeof (*act), KM_NOSLEEP); 1237 *nact = 1; 1238 if (outact != NULL) 1239 bcopy(act, outact, sizeof (*act)); 1240 return (outact); 1241 } 1242 /* 1243 * compute the combinatoric explosion.. 1244 * 1245 * we assume a request for encr if esp_req is PREF_REQUIRED 1246 * we assume a request for ah auth if ah_req is PREF_REQUIRED. 1247 * we assume a request for esp auth if !ah and esp_req is PREF_REQUIRED 1248 */ 1249 1250 use_ah = act->ipa_apply.ipp_use_ah; 1251 use_esp = act->ipa_apply.ipp_use_esp; 1252 use_espa = act->ipa_apply.ipp_use_espa; 1253 auth_alg = act->ipa_apply.ipp_auth_alg; 1254 eauth_alg = act->ipa_apply.ipp_esp_auth_alg; 1255 encr_alg = act->ipa_apply.ipp_encr_alg; 1256 1257 wild_auth = use_ah && (auth_alg == 0); 1258 wild_eauth = use_espa && (eauth_alg == 0); 1259 wild_encr = use_esp && (encr_alg == 0); 1260 1261 action_count = 1; 1262 auth_min = auth_max = auth_alg; 1263 eauth_min = eauth_max = eauth_alg; 1264 encr_min = encr_max = encr_alg; 1265 1266 /* 1267 * set up for explosion.. for each dimension, expand output 1268 * size by the explosion factor. 1269 * 1270 * Don't include the "any" algorithms, if defined, as no 1271 * kernel policies should be set for these algorithms. 1272 */ 1273 1274 #define SET_EXP_MINMAX(type, wild, alg, min, max, ipss) \ 1275 if (wild) { \ 1276 int nalgs = ipss->ipsec_nalgs[type]; \ 1277 if (ipss->ipsec_alglists[type][alg] != NULL) \ 1278 nalgs--; \ 1279 action_count *= nalgs; \ 1280 min = 0; \ 1281 max = ipss->ipsec_nalgs[type] - 1; \ 1282 } 1283 1284 SET_EXP_MINMAX(IPSEC_ALG_AUTH, wild_auth, SADB_AALG_NONE, 1285 auth_min, auth_max, ipss); 1286 SET_EXP_MINMAX(IPSEC_ALG_AUTH, wild_eauth, SADB_AALG_NONE, 1287 eauth_min, eauth_max, ipss); 1288 SET_EXP_MINMAX(IPSEC_ALG_ENCR, wild_encr, SADB_EALG_NONE, 1289 encr_min, encr_max, ipss); 1290 1291 #undef SET_EXP_MINMAX 1292 1293 /* 1294 * ok, allocate the whole mess.. 1295 */ 1296 1297 outact = kmem_alloc(sizeof (*outact) * action_count, KM_NOSLEEP); 1298 if (outact == NULL) 1299 return (NULL); 1300 1301 /* 1302 * Now compute all combinations. Note that non-wildcarded 1303 * dimensions just get a single value from auth_min, while 1304 * wildcarded dimensions indirect through the sortlist. 1305 * 1306 * We do encryption outermost since, at this time, there's 1307 * greater difference in security and performance between 1308 * encryption algorithms vs. authentication algorithms. 1309 */ 1310 1311 ai = 0; 1312 1313 #define WHICH_ALG(type, wild, idx, ipss) \ 1314 ((wild)?(ipss->ipsec_sortlist[type][idx]):(idx)) 1315 1316 for (encr_idx = encr_min; encr_idx <= encr_max; encr_idx++) { 1317 encr_alg = WHICH_ALG(IPSEC_ALG_ENCR, wild_encr, encr_idx, ipss); 1318 if (wild_encr && encr_alg == SADB_EALG_NONE) 1319 continue; 1320 for (auth_idx = auth_min; auth_idx <= auth_max; auth_idx++) { 1321 auth_alg = WHICH_ALG(IPSEC_ALG_AUTH, wild_auth, 1322 auth_idx, ipss); 1323 if (wild_auth && auth_alg == SADB_AALG_NONE) 1324 continue; 1325 for (eauth_idx = eauth_min; eauth_idx <= eauth_max; 1326 eauth_idx++) { 1327 eauth_alg = WHICH_ALG(IPSEC_ALG_AUTH, 1328 wild_eauth, eauth_idx, ipss); 1329 if (wild_eauth && eauth_alg == SADB_AALG_NONE) 1330 continue; 1331 1332 ipsec_setup_act(&outact[ai], act, 1333 auth_alg, encr_alg, eauth_alg, ns); 1334 ai++; 1335 } 1336 } 1337 } 1338 1339 #undef WHICH_ALG 1340 1341 ASSERT(ai == action_count); 1342 *nact = action_count; 1343 return (outact); 1344 } 1345 1346 /* 1347 * Extract the parts of an ipsec_prot_t from an old-style ipsec_req_t. 1348 */ 1349 static void 1350 ipsec_prot_from_req(const ipsec_req_t *req, ipsec_prot_t *ipp) 1351 { 1352 bzero(ipp, sizeof (*ipp)); 1353 /* 1354 * ipp_use_* are bitfields. Look at "!!" in the following as a 1355 * "boolean canonicalization" operator. 1356 */ 1357 ipp->ipp_use_ah = !!(req->ipsr_ah_req & IPSEC_PREF_REQUIRED); 1358 ipp->ipp_use_esp = !!(req->ipsr_esp_req & IPSEC_PREF_REQUIRED); 1359 ipp->ipp_use_espa = !!(req->ipsr_esp_auth_alg); 1360 ipp->ipp_use_se = !!(req->ipsr_self_encap_req & IPSEC_PREF_REQUIRED); 1361 ipp->ipp_use_unique = !!((req->ipsr_ah_req|req->ipsr_esp_req) & 1362 IPSEC_PREF_UNIQUE); 1363 ipp->ipp_encr_alg = req->ipsr_esp_alg; 1364 /* 1365 * SADB_AALG_ANY is a placeholder to distinguish "any" from 1366 * "none" above. If auth is required, as determined above, 1367 * SADB_AALG_ANY becomes 0, which is the representation 1368 * of "any" and "none" in PF_KEY v2. 1369 */ 1370 ipp->ipp_auth_alg = (req->ipsr_auth_alg != SADB_AALG_ANY) ? 1371 req->ipsr_auth_alg : 0; 1372 ipp->ipp_esp_auth_alg = (req->ipsr_esp_auth_alg != SADB_AALG_ANY) ? 1373 req->ipsr_esp_auth_alg : 0; 1374 } 1375 1376 /* 1377 * Extract a new-style action from a request. 1378 */ 1379 void 1380 ipsec_actvec_from_req(const ipsec_req_t *req, ipsec_act_t **actp, uint_t *nactp, 1381 netstack_t *ns) 1382 { 1383 struct ipsec_act act; 1384 1385 bzero(&act, sizeof (act)); 1386 if ((req->ipsr_ah_req & IPSEC_PREF_NEVER) && 1387 (req->ipsr_esp_req & IPSEC_PREF_NEVER)) { 1388 act.ipa_type = IPSEC_ACT_BYPASS; 1389 } else { 1390 act.ipa_type = IPSEC_ACT_APPLY; 1391 ipsec_prot_from_req(req, &act.ipa_apply); 1392 } 1393 *actp = ipsec_act_wildcard_expand(&act, nactp, ns); 1394 } 1395 1396 /* 1397 * Convert a new-style "prot" back to an ipsec_req_t (more backwards compat). 1398 * We assume caller has already zero'ed *req for us. 1399 */ 1400 static int 1401 ipsec_req_from_prot(ipsec_prot_t *ipp, ipsec_req_t *req) 1402 { 1403 req->ipsr_esp_alg = ipp->ipp_encr_alg; 1404 req->ipsr_auth_alg = ipp->ipp_auth_alg; 1405 req->ipsr_esp_auth_alg = ipp->ipp_esp_auth_alg; 1406 1407 if (ipp->ipp_use_unique) { 1408 req->ipsr_ah_req |= IPSEC_PREF_UNIQUE; 1409 req->ipsr_esp_req |= IPSEC_PREF_UNIQUE; 1410 } 1411 if (ipp->ipp_use_se) 1412 req->ipsr_self_encap_req |= IPSEC_PREF_REQUIRED; 1413 if (ipp->ipp_use_ah) 1414 req->ipsr_ah_req |= IPSEC_PREF_REQUIRED; 1415 if (ipp->ipp_use_esp) 1416 req->ipsr_esp_req |= IPSEC_PREF_REQUIRED; 1417 return (sizeof (*req)); 1418 } 1419 1420 /* 1421 * Convert a new-style action back to an ipsec_req_t (more backwards compat). 1422 * We assume caller has already zero'ed *req for us. 1423 */ 1424 static int 1425 ipsec_req_from_act(ipsec_action_t *ap, ipsec_req_t *req) 1426 { 1427 switch (ap->ipa_act.ipa_type) { 1428 case IPSEC_ACT_BYPASS: 1429 req->ipsr_ah_req = IPSEC_PREF_NEVER; 1430 req->ipsr_esp_req = IPSEC_PREF_NEVER; 1431 return (sizeof (*req)); 1432 case IPSEC_ACT_APPLY: 1433 return (ipsec_req_from_prot(&ap->ipa_act.ipa_apply, req)); 1434 } 1435 return (sizeof (*req)); 1436 } 1437 1438 /* 1439 * Convert a new-style action back to an ipsec_req_t (more backwards compat). 1440 * We assume caller has already zero'ed *req for us. 1441 */ 1442 int 1443 ipsec_req_from_head(ipsec_policy_head_t *ph, ipsec_req_t *req, int af) 1444 { 1445 ipsec_policy_t *p; 1446 1447 /* 1448 * FULL-PERSOCK: consult hash table, too? 1449 */ 1450 for (p = ph->iph_root[IPSEC_INBOUND].ipr_nonhash[af]; 1451 p != NULL; 1452 p = p->ipsp_hash.hash_next) { 1453 if ((p->ipsp_sel->ipsl_key.ipsl_valid & IPSL_WILDCARD) == 0) 1454 return (ipsec_req_from_act(p->ipsp_act, req)); 1455 } 1456 return (sizeof (*req)); 1457 } 1458 1459 /* 1460 * Based on per-socket or latched policy, convert to an appropriate 1461 * IP_SEC_OPT ipsec_req_t for the socket option; return size so we can 1462 * be tail-called from ip. 1463 */ 1464 int 1465 ipsec_req_from_conn(conn_t *connp, ipsec_req_t *req, int af) 1466 { 1467 ipsec_latch_t *ipl; 1468 int rv = sizeof (ipsec_req_t); 1469 1470 bzero(req, sizeof (*req)); 1471 1472 mutex_enter(&connp->conn_lock); 1473 ipl = connp->conn_latch; 1474 1475 /* 1476 * Find appropriate policy. First choice is latched action; 1477 * failing that, see latched policy; failing that, 1478 * look at configured policy. 1479 */ 1480 if (ipl != NULL) { 1481 if (ipl->ipl_in_action != NULL) { 1482 rv = ipsec_req_from_act(ipl->ipl_in_action, req); 1483 goto done; 1484 } 1485 if (ipl->ipl_in_policy != NULL) { 1486 rv = ipsec_req_from_act(ipl->ipl_in_policy->ipsp_act, 1487 req); 1488 goto done; 1489 } 1490 } 1491 if (connp->conn_policy != NULL) 1492 rv = ipsec_req_from_head(connp->conn_policy, req, af); 1493 done: 1494 mutex_exit(&connp->conn_lock); 1495 return (rv); 1496 } 1497 1498 void 1499 ipsec_actvec_free(ipsec_act_t *act, uint_t nact) 1500 { 1501 kmem_free(act, nact * sizeof (*act)); 1502 } 1503 1504 /* 1505 * When outbound policy is not cached, look it up the hard way and attach 1506 * an ipsec_out_t to the packet.. 1507 */ 1508 static mblk_t * 1509 ipsec_attach_global_policy(mblk_t **mp, conn_t *connp, ipsec_selector_t *sel, 1510 netstack_t *ns) 1511 { 1512 ipsec_policy_t *p; 1513 1514 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, sel, ns); 1515 1516 if (p == NULL) 1517 return (NULL); 1518 return (ipsec_attach_ipsec_out(mp, connp, p, sel->ips_protocol, ns)); 1519 } 1520 1521 /* 1522 * We have an ipsec_out already, but don't have cached policy; fill it in 1523 * with the right actions. 1524 */ 1525 static mblk_t * 1526 ipsec_apply_global_policy(mblk_t *ipsec_mp, conn_t *connp, 1527 ipsec_selector_t *sel, netstack_t *ns) 1528 { 1529 ipsec_out_t *io; 1530 ipsec_policy_t *p; 1531 1532 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 1533 ASSERT(ipsec_mp->b_cont->b_datap->db_type == M_DATA); 1534 1535 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1536 1537 if (io->ipsec_out_policy == NULL) { 1538 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, io, sel, ns); 1539 io->ipsec_out_policy = p; 1540 } 1541 return (ipsec_mp); 1542 } 1543 1544 1545 /* 1546 * Consumes a reference to ipsp. 1547 */ 1548 static mblk_t * 1549 ipsec_check_loopback_policy(mblk_t *first_mp, boolean_t mctl_present, 1550 ipsec_policy_t *ipsp) 1551 { 1552 mblk_t *ipsec_mp; 1553 ipsec_in_t *ii; 1554 netstack_t *ns; 1555 1556 if (!mctl_present) 1557 return (first_mp); 1558 1559 ipsec_mp = first_mp; 1560 1561 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1562 ns = ii->ipsec_in_ns; 1563 ASSERT(ii->ipsec_in_loopback); 1564 IPPOL_REFRELE(ipsp, ns); 1565 1566 /* 1567 * We should do an actual policy check here. Revisit this 1568 * when we revisit the IPsec API. (And pass a conn_t in when we 1569 * get there.) 1570 */ 1571 1572 return (first_mp); 1573 } 1574 1575 /* 1576 * Check that packet's inbound ports & proto match the selectors 1577 * expected by the SAs it traversed on the way in. 1578 */ 1579 static boolean_t 1580 ipsec_check_ipsecin_unique(ipsec_in_t *ii, const char **reason, 1581 kstat_named_t **counter, uint64_t pkt_unique) 1582 { 1583 uint64_t ah_mask, esp_mask; 1584 ipsa_t *ah_assoc; 1585 ipsa_t *esp_assoc; 1586 netstack_t *ns = ii->ipsec_in_ns; 1587 ipsec_stack_t *ipss = ns->netstack_ipsec; 1588 1589 ASSERT(ii->ipsec_in_secure); 1590 ASSERT(!ii->ipsec_in_loopback); 1591 1592 ah_assoc = ii->ipsec_in_ah_sa; 1593 esp_assoc = ii->ipsec_in_esp_sa; 1594 ASSERT((ah_assoc != NULL) || (esp_assoc != NULL)); 1595 1596 ah_mask = (ah_assoc != NULL) ? ah_assoc->ipsa_unique_mask : 0; 1597 esp_mask = (esp_assoc != NULL) ? esp_assoc->ipsa_unique_mask : 0; 1598 1599 if ((ah_mask == 0) && (esp_mask == 0)) 1600 return (B_TRUE); 1601 1602 /* 1603 * The pkt_unique check will also check for tunnel mode on the SA 1604 * vs. the tunneled_packet boolean. "Be liberal in what you receive" 1605 * should not apply in this case. ;) 1606 */ 1607 1608 if (ah_mask != 0 && 1609 ah_assoc->ipsa_unique_id != (pkt_unique & ah_mask)) { 1610 *reason = "AH inner header mismatch"; 1611 *counter = DROPPER(ipss, ipds_spd_ah_innermismatch); 1612 return (B_FALSE); 1613 } 1614 if (esp_mask != 0 && 1615 esp_assoc->ipsa_unique_id != (pkt_unique & esp_mask)) { 1616 *reason = "ESP inner header mismatch"; 1617 *counter = DROPPER(ipss, ipds_spd_esp_innermismatch); 1618 return (B_FALSE); 1619 } 1620 return (B_TRUE); 1621 } 1622 1623 static boolean_t 1624 ipsec_check_ipsecin_action(ipsec_in_t *ii, mblk_t *mp, ipsec_action_t *ap, 1625 ipha_t *ipha, ip6_t *ip6h, const char **reason, kstat_named_t **counter) 1626 { 1627 boolean_t ret = B_TRUE; 1628 ipsec_prot_t *ipp; 1629 ipsa_t *ah_assoc; 1630 ipsa_t *esp_assoc; 1631 boolean_t decaps; 1632 netstack_t *ns = ii->ipsec_in_ns; 1633 ipsec_stack_t *ipss = ns->netstack_ipsec; 1634 1635 ASSERT((ipha == NULL && ip6h != NULL) || 1636 (ip6h == NULL && ipha != NULL)); 1637 1638 if (ii->ipsec_in_loopback) { 1639 /* 1640 * Besides accepting pointer-equivalent actions, we also 1641 * accept any ICMP errors we generated for ourselves, 1642 * regardless of policy. If we do not wish to make this 1643 * assumption in the future, check here, and where 1644 * icmp_loopback is initialized in ip.c and ip6.c. (Look for 1645 * ipsec_out_icmp_loopback.) 1646 */ 1647 if (ap == ii->ipsec_in_action || ii->ipsec_in_icmp_loopback) 1648 return (B_TRUE); 1649 1650 /* Deep compare necessary here?? */ 1651 *counter = DROPPER(ipss, ipds_spd_loopback_mismatch); 1652 *reason = "loopback policy mismatch"; 1653 return (B_FALSE); 1654 } 1655 ASSERT(!ii->ipsec_in_icmp_loopback); 1656 1657 ah_assoc = ii->ipsec_in_ah_sa; 1658 esp_assoc = ii->ipsec_in_esp_sa; 1659 1660 decaps = ii->ipsec_in_decaps; 1661 1662 switch (ap->ipa_act.ipa_type) { 1663 case IPSEC_ACT_DISCARD: 1664 case IPSEC_ACT_REJECT: 1665 /* Should "fail hard" */ 1666 *counter = DROPPER(ipss, ipds_spd_explicit); 1667 *reason = "blocked by policy"; 1668 return (B_FALSE); 1669 1670 case IPSEC_ACT_BYPASS: 1671 case IPSEC_ACT_CLEAR: 1672 *counter = DROPPER(ipss, ipds_spd_got_secure); 1673 *reason = "expected clear, got protected"; 1674 return (B_FALSE); 1675 1676 case IPSEC_ACT_APPLY: 1677 ipp = &ap->ipa_act.ipa_apply; 1678 /* 1679 * As of now we do the simple checks of whether 1680 * the datagram has gone through the required IPSEC 1681 * protocol constraints or not. We might have more 1682 * in the future like sensitive levels, key bits, etc. 1683 * If it fails the constraints, check whether we would 1684 * have accepted this if it had come in clear. 1685 */ 1686 if (ipp->ipp_use_ah) { 1687 if (ah_assoc == NULL) { 1688 ret = ipsec_inbound_accept_clear(mp, ipha, 1689 ip6h); 1690 *counter = DROPPER(ipss, ipds_spd_got_clear); 1691 *reason = "unprotected not accepted"; 1692 break; 1693 } 1694 ASSERT(ah_assoc != NULL); 1695 ASSERT(ipp->ipp_auth_alg != 0); 1696 1697 if (ah_assoc->ipsa_auth_alg != 1698 ipp->ipp_auth_alg) { 1699 *counter = DROPPER(ipss, ipds_spd_bad_ahalg); 1700 *reason = "unacceptable ah alg"; 1701 ret = B_FALSE; 1702 break; 1703 } 1704 } else if (ah_assoc != NULL) { 1705 /* 1706 * Don't allow this. Check IPSEC NOTE above 1707 * ip_fanout_proto(). 1708 */ 1709 *counter = DROPPER(ipss, ipds_spd_got_ah); 1710 *reason = "unexpected AH"; 1711 ret = B_FALSE; 1712 break; 1713 } 1714 if (ipp->ipp_use_esp) { 1715 if (esp_assoc == NULL) { 1716 ret = ipsec_inbound_accept_clear(mp, ipha, 1717 ip6h); 1718 *counter = DROPPER(ipss, ipds_spd_got_clear); 1719 *reason = "unprotected not accepted"; 1720 break; 1721 } 1722 ASSERT(esp_assoc != NULL); 1723 ASSERT(ipp->ipp_encr_alg != 0); 1724 1725 if (esp_assoc->ipsa_encr_alg != 1726 ipp->ipp_encr_alg) { 1727 *counter = DROPPER(ipss, ipds_spd_bad_espealg); 1728 *reason = "unacceptable esp alg"; 1729 ret = B_FALSE; 1730 break; 1731 } 1732 /* 1733 * If the client does not need authentication, 1734 * we don't verify the alogrithm. 1735 */ 1736 if (ipp->ipp_use_espa) { 1737 if (esp_assoc->ipsa_auth_alg != 1738 ipp->ipp_esp_auth_alg) { 1739 *counter = DROPPER(ipss, 1740 ipds_spd_bad_espaalg); 1741 *reason = "unacceptable esp auth alg"; 1742 ret = B_FALSE; 1743 break; 1744 } 1745 } 1746 } else if (esp_assoc != NULL) { 1747 /* 1748 * Don't allow this. Check IPSEC NOTE above 1749 * ip_fanout_proto(). 1750 */ 1751 *counter = DROPPER(ipss, ipds_spd_got_esp); 1752 *reason = "unexpected ESP"; 1753 ret = B_FALSE; 1754 break; 1755 } 1756 if (ipp->ipp_use_se) { 1757 if (!decaps) { 1758 ret = ipsec_inbound_accept_clear(mp, ipha, 1759 ip6h); 1760 if (!ret) { 1761 /* XXX mutant? */ 1762 *counter = DROPPER(ipss, 1763 ipds_spd_bad_selfencap); 1764 *reason = "self encap not found"; 1765 break; 1766 } 1767 } 1768 } else if (decaps) { 1769 /* 1770 * XXX If the packet comes in tunneled and the 1771 * recipient does not expect it to be tunneled, it 1772 * is okay. But we drop to be consistent with the 1773 * other cases. 1774 */ 1775 *counter = DROPPER(ipss, ipds_spd_got_selfencap); 1776 *reason = "unexpected self encap"; 1777 ret = B_FALSE; 1778 break; 1779 } 1780 if (ii->ipsec_in_action != NULL) { 1781 /* 1782 * This can happen if we do a double policy-check on 1783 * a packet 1784 * XXX XXX should fix this case! 1785 */ 1786 IPACT_REFRELE(ii->ipsec_in_action); 1787 } 1788 ASSERT(ii->ipsec_in_action == NULL); 1789 IPACT_REFHOLD(ap); 1790 ii->ipsec_in_action = ap; 1791 break; /* from switch */ 1792 } 1793 return (ret); 1794 } 1795 1796 static boolean_t 1797 spd_match_inbound_ids(ipsec_latch_t *ipl, ipsa_t *sa) 1798 { 1799 ASSERT(ipl->ipl_ids_latched == B_TRUE); 1800 return ipsid_equal(ipl->ipl_remote_cid, sa->ipsa_src_cid) && 1801 ipsid_equal(ipl->ipl_local_cid, sa->ipsa_dst_cid); 1802 } 1803 1804 /* 1805 * Takes a latched conn and an inbound packet and returns a unique_id suitable 1806 * for SA comparisons. Most of the time we will copy from the conn_t, but 1807 * there are cases when the conn_t is latched but it has wildcard selectors, 1808 * and then we need to fallback to scooping them out of the packet. 1809 * 1810 * Assume we'll never have 0 with a conn_t present, so use 0 as a failure. We 1811 * can get away with this because we only have non-zero ports/proto for 1812 * latched conn_ts. 1813 * 1814 * Ideal candidate for an "inline" keyword, as we're JUST convoluted enough 1815 * to not be a nice macro. 1816 */ 1817 static uint64_t 1818 conn_to_unique(conn_t *connp, mblk_t *data_mp, ipha_t *ipha, ip6_t *ip6h) 1819 { 1820 ipsec_selector_t sel; 1821 uint8_t ulp = connp->conn_ulp; 1822 1823 ASSERT(connp->conn_latch->ipl_in_policy != NULL); 1824 1825 if ((ulp == IPPROTO_TCP || ulp == IPPROTO_UDP || ulp == IPPROTO_SCTP) && 1826 (connp->conn_fport == 0 || connp->conn_lport == 0)) { 1827 /* Slow path - we gotta grab from the packet. */ 1828 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, 1829 SEL_NONE) != SELRET_SUCCESS) { 1830 /* Failure -> have caller free packet with ENOMEM. */ 1831 return (0); 1832 } 1833 return (SA_UNIQUE_ID(sel.ips_remote_port, sel.ips_local_port, 1834 sel.ips_protocol, 0)); 1835 } 1836 1837 #ifdef DEBUG_NOT_UNTIL_6478464 1838 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, SEL_NONE) == 1839 SELRET_SUCCESS) { 1840 ASSERT(sel.ips_local_port == connp->conn_lport); 1841 ASSERT(sel.ips_remote_port == connp->conn_fport); 1842 ASSERT(sel.ips_protocol == connp->conn_ulp); 1843 } 1844 ASSERT(connp->conn_ulp != 0); 1845 #endif 1846 1847 return (SA_UNIQUE_ID(connp->conn_fport, connp->conn_lport, ulp, 0)); 1848 } 1849 1850 /* 1851 * Called to check policy on a latched connection, both from this file 1852 * and from tcp.c 1853 */ 1854 boolean_t 1855 ipsec_check_ipsecin_latch(ipsec_in_t *ii, mblk_t *mp, ipsec_latch_t *ipl, 1856 ipha_t *ipha, ip6_t *ip6h, const char **reason, kstat_named_t **counter, 1857 conn_t *connp) 1858 { 1859 netstack_t *ns = ii->ipsec_in_ns; 1860 ipsec_stack_t *ipss = ns->netstack_ipsec; 1861 1862 ASSERT(ipl->ipl_ids_latched == B_TRUE); 1863 1864 if (!ii->ipsec_in_loopback) { 1865 /* 1866 * Over loopback, there aren't real security associations, 1867 * so there are neither identities nor "unique" values 1868 * for us to check the packet against. 1869 */ 1870 if ((ii->ipsec_in_ah_sa != NULL) && 1871 (!spd_match_inbound_ids(ipl, ii->ipsec_in_ah_sa))) { 1872 *counter = DROPPER(ipss, ipds_spd_ah_badid); 1873 *reason = "AH identity mismatch"; 1874 return (B_FALSE); 1875 } 1876 1877 if ((ii->ipsec_in_esp_sa != NULL) && 1878 (!spd_match_inbound_ids(ipl, ii->ipsec_in_esp_sa))) { 1879 *counter = DROPPER(ipss, ipds_spd_esp_badid); 1880 *reason = "ESP identity mismatch"; 1881 return (B_FALSE); 1882 } 1883 1884 /* 1885 * Can fudge pkt_unique from connp because we're latched. 1886 * In DEBUG kernels (see conn_to_unique()'s implementation), 1887 * verify this even if it REALLY slows things down. 1888 */ 1889 if (!ipsec_check_ipsecin_unique(ii, reason, counter, 1890 conn_to_unique(connp, mp, ipha, ip6h))) { 1891 return (B_FALSE); 1892 } 1893 } 1894 1895 return (ipsec_check_ipsecin_action(ii, mp, ipl->ipl_in_action, 1896 ipha, ip6h, reason, counter)); 1897 } 1898 1899 /* 1900 * Check to see whether this secured datagram meets the policy 1901 * constraints specified in ipsp. 1902 * 1903 * Called from ipsec_check_global_policy, and ipsec_check_inbound_policy. 1904 * 1905 * Consumes a reference to ipsp. 1906 */ 1907 static mblk_t * 1908 ipsec_check_ipsecin_policy(mblk_t *first_mp, ipsec_policy_t *ipsp, 1909 ipha_t *ipha, ip6_t *ip6h, uint64_t pkt_unique, netstack_t *ns) 1910 { 1911 ipsec_in_t *ii; 1912 ipsec_action_t *ap; 1913 const char *reason = "no policy actions found"; 1914 mblk_t *data_mp, *ipsec_mp; 1915 ipsec_stack_t *ipss = ns->netstack_ipsec; 1916 ip_stack_t *ipst = ns->netstack_ip; 1917 kstat_named_t *counter; 1918 1919 counter = DROPPER(ipss, ipds_spd_got_secure); 1920 1921 data_mp = first_mp->b_cont; 1922 ipsec_mp = first_mp; 1923 1924 ASSERT(ipsp != NULL); 1925 1926 ASSERT((ipha == NULL && ip6h != NULL) || 1927 (ip6h == NULL && ipha != NULL)); 1928 1929 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1930 1931 if (ii->ipsec_in_loopback) 1932 return (ipsec_check_loopback_policy(first_mp, B_TRUE, ipsp)); 1933 ASSERT(ii->ipsec_in_type == IPSEC_IN); 1934 ASSERT(ii->ipsec_in_secure); 1935 1936 if (ii->ipsec_in_action != NULL) { 1937 /* 1938 * this can happen if we do a double policy-check on a packet 1939 * Would be nice to be able to delete this test.. 1940 */ 1941 IPACT_REFRELE(ii->ipsec_in_action); 1942 } 1943 ASSERT(ii->ipsec_in_action == NULL); 1944 1945 if (!SA_IDS_MATCH(ii->ipsec_in_ah_sa, ii->ipsec_in_esp_sa)) { 1946 reason = "inbound AH and ESP identities differ"; 1947 counter = DROPPER(ipss, ipds_spd_ahesp_diffid); 1948 goto drop; 1949 } 1950 1951 if (!ipsec_check_ipsecin_unique(ii, &reason, &counter, pkt_unique)) 1952 goto drop; 1953 1954 /* 1955 * Ok, now loop through the possible actions and see if any 1956 * of them work for us. 1957 */ 1958 1959 for (ap = ipsp->ipsp_act; ap != NULL; ap = ap->ipa_next) { 1960 if (ipsec_check_ipsecin_action(ii, data_mp, ap, 1961 ipha, ip6h, &reason, &counter)) { 1962 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 1963 IPPOL_REFRELE(ipsp, ns); 1964 return (first_mp); 1965 } 1966 } 1967 drop: 1968 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, SL_ERROR|SL_WARN|SL_CONSOLE, 1969 "ipsec inbound policy mismatch: %s, packet dropped\n", 1970 reason); 1971 IPPOL_REFRELE(ipsp, ns); 1972 ASSERT(ii->ipsec_in_action == NULL); 1973 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 1974 ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, 1975 &ipss->ipsec_spd_dropper); 1976 return (NULL); 1977 } 1978 1979 /* 1980 * sleazy prefix-length-based compare. 1981 * another inlining candidate.. 1982 */ 1983 boolean_t 1984 ip_addr_match(uint8_t *addr1, int pfxlen, in6_addr_t *addr2p) 1985 { 1986 int offset = pfxlen>>3; 1987 int bitsleft = pfxlen & 7; 1988 uint8_t *addr2 = (uint8_t *)addr2p; 1989 1990 /* 1991 * and there was much evil.. 1992 * XXX should inline-expand the bcmp here and do this 32 bits 1993 * or 64 bits at a time.. 1994 */ 1995 return ((bcmp(addr1, addr2, offset) == 0) && 1996 ((bitsleft == 0) || 1997 (((addr1[offset] ^ addr2[offset]) & (0xff<<(8-bitsleft))) == 0))); 1998 } 1999 2000 static ipsec_policy_t * 2001 ipsec_find_policy_chain(ipsec_policy_t *best, ipsec_policy_t *chain, 2002 ipsec_selector_t *sel, boolean_t is_icmp_inv_acq) 2003 { 2004 ipsec_selkey_t *isel; 2005 ipsec_policy_t *p; 2006 int bpri = best ? best->ipsp_prio : 0; 2007 2008 for (p = chain; p != NULL; p = p->ipsp_hash.hash_next) { 2009 uint32_t valid; 2010 2011 if (p->ipsp_prio <= bpri) 2012 continue; 2013 isel = &p->ipsp_sel->ipsl_key; 2014 valid = isel->ipsl_valid; 2015 2016 if ((valid & IPSL_PROTOCOL) && 2017 (isel->ipsl_proto != sel->ips_protocol)) 2018 continue; 2019 2020 if ((valid & IPSL_REMOTE_ADDR) && 2021 !ip_addr_match((uint8_t *)&isel->ipsl_remote, 2022 isel->ipsl_remote_pfxlen, &sel->ips_remote_addr_v6)) 2023 continue; 2024 2025 if ((valid & IPSL_LOCAL_ADDR) && 2026 !ip_addr_match((uint8_t *)&isel->ipsl_local, 2027 isel->ipsl_local_pfxlen, &sel->ips_local_addr_v6)) 2028 continue; 2029 2030 if ((valid & IPSL_REMOTE_PORT) && 2031 isel->ipsl_rport != sel->ips_remote_port) 2032 continue; 2033 2034 if ((valid & IPSL_LOCAL_PORT) && 2035 isel->ipsl_lport != sel->ips_local_port) 2036 continue; 2037 2038 if (!is_icmp_inv_acq) { 2039 if ((valid & IPSL_ICMP_TYPE) && 2040 (isel->ipsl_icmp_type > sel->ips_icmp_type || 2041 isel->ipsl_icmp_type_end < sel->ips_icmp_type)) { 2042 continue; 2043 } 2044 2045 if ((valid & IPSL_ICMP_CODE) && 2046 (isel->ipsl_icmp_code > sel->ips_icmp_code || 2047 isel->ipsl_icmp_code_end < 2048 sel->ips_icmp_code)) { 2049 continue; 2050 } 2051 } else { 2052 /* 2053 * special case for icmp inverse acquire 2054 * we only want policies that aren't drop/pass 2055 */ 2056 if (p->ipsp_act->ipa_act.ipa_type != IPSEC_ACT_APPLY) 2057 continue; 2058 } 2059 2060 /* we matched all the packet-port-field selectors! */ 2061 best = p; 2062 bpri = p->ipsp_prio; 2063 } 2064 2065 return (best); 2066 } 2067 2068 /* 2069 * Try to find and return the best policy entry under a given policy 2070 * root for a given set of selectors; the first parameter "best" is 2071 * the current best policy so far. If "best" is non-null, we have a 2072 * reference to it. We return a reference to a policy; if that policy 2073 * is not the original "best", we need to release that reference 2074 * before returning. 2075 */ 2076 ipsec_policy_t * 2077 ipsec_find_policy_head(ipsec_policy_t *best, ipsec_policy_head_t *head, 2078 int direction, ipsec_selector_t *sel, netstack_t *ns) 2079 { 2080 ipsec_policy_t *curbest; 2081 ipsec_policy_root_t *root; 2082 uint8_t is_icmp_inv_acq = sel->ips_is_icmp_inv_acq; 2083 int af = sel->ips_isv4 ? IPSEC_AF_V4 : IPSEC_AF_V6; 2084 2085 curbest = best; 2086 root = &head->iph_root[direction]; 2087 2088 #ifdef DEBUG 2089 if (is_icmp_inv_acq) { 2090 if (sel->ips_isv4) { 2091 if (sel->ips_protocol != IPPROTO_ICMP) { 2092 cmn_err(CE_WARN, "ipsec_find_policy_head:" 2093 " expecting icmp, got %d", 2094 sel->ips_protocol); 2095 } 2096 } else { 2097 if (sel->ips_protocol != IPPROTO_ICMPV6) { 2098 cmn_err(CE_WARN, "ipsec_find_policy_head:" 2099 " expecting icmpv6, got %d", 2100 sel->ips_protocol); 2101 } 2102 } 2103 } 2104 #endif 2105 2106 rw_enter(&head->iph_lock, RW_READER); 2107 2108 if (root->ipr_nchains > 0) { 2109 curbest = ipsec_find_policy_chain(curbest, 2110 root->ipr_hash[selector_hash(sel, root)].hash_head, sel, 2111 is_icmp_inv_acq); 2112 } 2113 curbest = ipsec_find_policy_chain(curbest, root->ipr_nonhash[af], sel, 2114 is_icmp_inv_acq); 2115 2116 /* 2117 * Adjust reference counts if we found anything new. 2118 */ 2119 if (curbest != best) { 2120 ASSERT(curbest != NULL); 2121 IPPOL_REFHOLD(curbest); 2122 2123 if (best != NULL) { 2124 IPPOL_REFRELE(best, ns); 2125 } 2126 } 2127 2128 rw_exit(&head->iph_lock); 2129 2130 return (curbest); 2131 } 2132 2133 /* 2134 * Find the best system policy (either global or per-interface) which 2135 * applies to the given selector; look in all the relevant policy roots 2136 * to figure out which policy wins. 2137 * 2138 * Returns a reference to a policy; caller must release this 2139 * reference when done. 2140 */ 2141 ipsec_policy_t * 2142 ipsec_find_policy(int direction, conn_t *connp, ipsec_out_t *io, 2143 ipsec_selector_t *sel, netstack_t *ns) 2144 { 2145 ipsec_policy_t *p; 2146 ipsec_stack_t *ipss = ns->netstack_ipsec; 2147 2148 p = ipsec_find_policy_head(NULL, &ipss->ipsec_system_policy, 2149 direction, sel, ns); 2150 if ((connp != NULL) && (connp->conn_policy != NULL)) { 2151 p = ipsec_find_policy_head(p, connp->conn_policy, 2152 direction, sel, ns); 2153 } else if ((io != NULL) && (io->ipsec_out_polhead != NULL)) { 2154 p = ipsec_find_policy_head(p, io->ipsec_out_polhead, 2155 direction, sel, ns); 2156 } 2157 2158 return (p); 2159 } 2160 2161 /* 2162 * Check with global policy and see whether this inbound 2163 * packet meets the policy constraints. 2164 * 2165 * Locate appropriate policy from global policy, supplemented by the 2166 * conn's configured and/or cached policy if the conn is supplied. 2167 * 2168 * Dispatch to ipsec_check_ipsecin_policy if we have policy and an 2169 * encrypted packet to see if they match. 2170 * 2171 * Otherwise, see if the policy allows cleartext; if not, drop it on the 2172 * floor. 2173 */ 2174 mblk_t * 2175 ipsec_check_global_policy(mblk_t *first_mp, conn_t *connp, 2176 ipha_t *ipha, ip6_t *ip6h, boolean_t mctl_present, netstack_t *ns) 2177 { 2178 ipsec_policy_t *p; 2179 ipsec_selector_t sel; 2180 mblk_t *data_mp, *ipsec_mp; 2181 boolean_t policy_present; 2182 kstat_named_t *counter; 2183 ipsec_in_t *ii = NULL; 2184 uint64_t pkt_unique; 2185 ipsec_stack_t *ipss = ns->netstack_ipsec; 2186 ip_stack_t *ipst = ns->netstack_ip; 2187 2188 data_mp = mctl_present ? first_mp->b_cont : first_mp; 2189 ipsec_mp = mctl_present ? first_mp : NULL; 2190 2191 sel.ips_is_icmp_inv_acq = 0; 2192 2193 ASSERT((ipha == NULL && ip6h != NULL) || 2194 (ip6h == NULL && ipha != NULL)); 2195 2196 if (ipha != NULL) 2197 policy_present = ipss->ipsec_inbound_v4_policy_present; 2198 else 2199 policy_present = ipss->ipsec_inbound_v6_policy_present; 2200 2201 if (!policy_present && connp == NULL) { 2202 /* 2203 * No global policy and no per-socket policy; 2204 * just pass it back (but we shouldn't get here in that case) 2205 */ 2206 return (first_mp); 2207 } 2208 2209 if (ipsec_mp != NULL) { 2210 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 2211 ii = (ipsec_in_t *)(ipsec_mp->b_rptr); 2212 ASSERT(ii->ipsec_in_type == IPSEC_IN); 2213 } 2214 2215 /* 2216 * If we have cached policy, use it. 2217 * Otherwise consult system policy. 2218 */ 2219 if ((connp != NULL) && (connp->conn_latch != NULL)) { 2220 p = connp->conn_latch->ipl_in_policy; 2221 if (p != NULL) { 2222 IPPOL_REFHOLD(p); 2223 } 2224 /* 2225 * The caller may have mistakenly assigned an ip6i_t as the 2226 * ip6h for this packet, so take that corner-case into 2227 * account. 2228 */ 2229 if (ip6h != NULL && ip6h->ip6_nxt == IPPROTO_RAW) { 2230 ip6h++; 2231 /* First check for bizarro split-mblk headers. */ 2232 if ((uintptr_t)ip6h > (uintptr_t)data_mp->b_wptr || 2233 ((uintptr_t)ip6h) + sizeof (ip6_t) > 2234 (uintptr_t)data_mp->b_wptr) { 2235 ipsec_log_policy_failure(IPSEC_POLICY_MISMATCH, 2236 "ipsec_check_global_policy", ipha, ip6h, 2237 B_TRUE, ns); 2238 counter = DROPPER(ipss, ipds_spd_nomem); 2239 goto fail; 2240 } 2241 /* Next, see if ip6i is at the end of an mblk. */ 2242 if (ip6h == (ip6_t *)data_mp->b_wptr) 2243 ip6h = (ip6_t *)data_mp->b_cont->b_rptr; 2244 } 2245 /* 2246 * Fudge sel for UNIQUE_ID setting below. 2247 */ 2248 pkt_unique = conn_to_unique(connp, data_mp, ipha, ip6h); 2249 } else { 2250 /* Initialize the ports in the selector */ 2251 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, 2252 SEL_NONE) == SELRET_NOMEM) { 2253 /* 2254 * Technically not a policy mismatch, but it is 2255 * an internal failure. 2256 */ 2257 ipsec_log_policy_failure(IPSEC_POLICY_MISMATCH, 2258 "ipsec_init_inbound_sel", ipha, ip6h, B_TRUE, ns); 2259 counter = DROPPER(ipss, ipds_spd_nomem); 2260 goto fail; 2261 } 2262 2263 /* 2264 * Find the policy which best applies. 2265 * 2266 * If we find global policy, we should look at both 2267 * local policy and global policy and see which is 2268 * stronger and match accordingly. 2269 * 2270 * If we don't find a global policy, check with 2271 * local policy alone. 2272 */ 2273 2274 p = ipsec_find_policy(IPSEC_TYPE_INBOUND, connp, NULL, &sel, 2275 ns); 2276 pkt_unique = SA_UNIQUE_ID(sel.ips_remote_port, 2277 sel.ips_local_port, sel.ips_protocol, 0); 2278 } 2279 2280 if (p == NULL) { 2281 if (ipsec_mp == NULL) { 2282 /* 2283 * We have no policy; default to succeeding. 2284 * XXX paranoid system design doesn't do this. 2285 */ 2286 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2287 return (first_mp); 2288 } else { 2289 counter = DROPPER(ipss, ipds_spd_got_secure); 2290 ipsec_log_policy_failure(IPSEC_POLICY_NOT_NEEDED, 2291 "ipsec_check_global_policy", ipha, ip6h, B_TRUE, 2292 ns); 2293 goto fail; 2294 } 2295 } 2296 if ((ii != NULL) && (ii->ipsec_in_secure)) { 2297 return (ipsec_check_ipsecin_policy(ipsec_mp, p, ipha, ip6h, 2298 pkt_unique, ns)); 2299 } 2300 if (p->ipsp_act->ipa_allow_clear) { 2301 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2302 IPPOL_REFRELE(p, ns); 2303 return (first_mp); 2304 } 2305 IPPOL_REFRELE(p, ns); 2306 /* 2307 * If we reach here, we will drop the packet because it failed the 2308 * global policy check because the packet was cleartext, and it 2309 * should not have been. 2310 */ 2311 ipsec_log_policy_failure(IPSEC_POLICY_MISMATCH, 2312 "ipsec_check_global_policy", ipha, ip6h, B_FALSE, ns); 2313 counter = DROPPER(ipss, ipds_spd_got_clear); 2314 2315 fail: 2316 ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, 2317 &ipss->ipsec_spd_dropper); 2318 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2319 return (NULL); 2320 } 2321 2322 /* 2323 * We check whether an inbound datagram is a valid one 2324 * to accept in clear. If it is secure, it is the job 2325 * of IPSEC to log information appropriately if it 2326 * suspects that it may not be the real one. 2327 * 2328 * It is called only while fanning out to the ULP 2329 * where ULP accepts only secure data and the incoming 2330 * is clear. Usually we never accept clear datagrams in 2331 * such cases. ICMP is the only exception. 2332 * 2333 * NOTE : We don't call this function if the client (ULP) 2334 * is willing to accept things in clear. 2335 */ 2336 boolean_t 2337 ipsec_inbound_accept_clear(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h) 2338 { 2339 ushort_t iph_hdr_length; 2340 icmph_t *icmph; 2341 icmp6_t *icmp6; 2342 uint8_t *nexthdrp; 2343 2344 ASSERT((ipha != NULL && ip6h == NULL) || 2345 (ipha == NULL && ip6h != NULL)); 2346 2347 if (ip6h != NULL) { 2348 iph_hdr_length = ip_hdr_length_v6(mp, ip6h); 2349 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, 2350 &nexthdrp)) { 2351 return (B_FALSE); 2352 } 2353 if (*nexthdrp != IPPROTO_ICMPV6) 2354 return (B_FALSE); 2355 icmp6 = (icmp6_t *)(&mp->b_rptr[iph_hdr_length]); 2356 /* Match IPv6 ICMP policy as closely as IPv4 as possible. */ 2357 switch (icmp6->icmp6_type) { 2358 case ICMP6_PARAM_PROB: 2359 /* Corresponds to port/proto unreach in IPv4. */ 2360 case ICMP6_ECHO_REQUEST: 2361 /* Just like IPv4. */ 2362 return (B_FALSE); 2363 2364 case MLD_LISTENER_QUERY: 2365 case MLD_LISTENER_REPORT: 2366 case MLD_LISTENER_REDUCTION: 2367 /* 2368 * XXX Seperate NDD in IPv4 what about here? 2369 * Plus, mcast is important to ND. 2370 */ 2371 case ICMP6_DST_UNREACH: 2372 /* Corresponds to HOST/NET unreachable in IPv4. */ 2373 case ICMP6_PACKET_TOO_BIG: 2374 case ICMP6_ECHO_REPLY: 2375 /* These are trusted in IPv4. */ 2376 case ND_ROUTER_SOLICIT: 2377 case ND_ROUTER_ADVERT: 2378 case ND_NEIGHBOR_SOLICIT: 2379 case ND_NEIGHBOR_ADVERT: 2380 case ND_REDIRECT: 2381 /* Trust ND messages for now. */ 2382 case ICMP6_TIME_EXCEEDED: 2383 default: 2384 return (B_TRUE); 2385 } 2386 } else { 2387 /* 2388 * If it is not ICMP, fail this request. 2389 */ 2390 if (ipha->ipha_protocol != IPPROTO_ICMP) { 2391 #ifdef FRAGCACHE_DEBUG 2392 cmn_err(CE_WARN, "Dropping - ipha_proto = %d\n", 2393 ipha->ipha_protocol); 2394 #endif 2395 return (B_FALSE); 2396 } 2397 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2398 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 2399 /* 2400 * It is an insecure icmp message. Check to see whether we are 2401 * willing to accept this one. 2402 */ 2403 2404 switch (icmph->icmph_type) { 2405 case ICMP_ECHO_REPLY: 2406 case ICMP_TIME_STAMP_REPLY: 2407 case ICMP_INFO_REPLY: 2408 case ICMP_ROUTER_ADVERTISEMENT: 2409 /* 2410 * We should not encourage clear replies if this 2411 * client expects secure. If somebody is replying 2412 * in clear some mailicious user watching both the 2413 * request and reply, can do chosen-plain-text attacks. 2414 * With global policy we might be just expecting secure 2415 * but sending out clear. We don't know what the right 2416 * thing is. We can't do much here as we can't control 2417 * the sender here. Till we are sure of what to do, 2418 * accept them. 2419 */ 2420 return (B_TRUE); 2421 case ICMP_ECHO_REQUEST: 2422 case ICMP_TIME_STAMP_REQUEST: 2423 case ICMP_INFO_REQUEST: 2424 case ICMP_ADDRESS_MASK_REQUEST: 2425 case ICMP_ROUTER_SOLICITATION: 2426 case ICMP_ADDRESS_MASK_REPLY: 2427 /* 2428 * Don't accept this as somebody could be sending 2429 * us plain text to get encrypted data. If we reply, 2430 * it will lead to chosen plain text attack. 2431 */ 2432 return (B_FALSE); 2433 case ICMP_DEST_UNREACHABLE: 2434 switch (icmph->icmph_code) { 2435 case ICMP_FRAGMENTATION_NEEDED: 2436 /* 2437 * Be in sync with icmp_inbound, where we have 2438 * already set ire_max_frag. 2439 */ 2440 #ifdef FRAGCACHE_DEBUG 2441 cmn_err(CE_WARN, "ICMP frag needed\n"); 2442 #endif 2443 return (B_TRUE); 2444 case ICMP_HOST_UNREACHABLE: 2445 case ICMP_NET_UNREACHABLE: 2446 /* 2447 * By accepting, we could reset a connection. 2448 * How do we solve the problem of some 2449 * intermediate router sending in-secure ICMP 2450 * messages ? 2451 */ 2452 return (B_TRUE); 2453 case ICMP_PORT_UNREACHABLE: 2454 case ICMP_PROTOCOL_UNREACHABLE: 2455 default : 2456 return (B_FALSE); 2457 } 2458 case ICMP_SOURCE_QUENCH: 2459 /* 2460 * If this is an attack, TCP will slow start 2461 * because of this. Is it very harmful ? 2462 */ 2463 return (B_TRUE); 2464 case ICMP_PARAM_PROBLEM: 2465 return (B_FALSE); 2466 case ICMP_TIME_EXCEEDED: 2467 return (B_TRUE); 2468 case ICMP_REDIRECT: 2469 return (B_FALSE); 2470 default : 2471 return (B_FALSE); 2472 } 2473 } 2474 } 2475 2476 void 2477 ipsec_latch_ids(ipsec_latch_t *ipl, ipsid_t *local, ipsid_t *remote) 2478 { 2479 mutex_enter(&ipl->ipl_lock); 2480 2481 if (ipl->ipl_ids_latched) { 2482 /* I lost, someone else got here before me */ 2483 mutex_exit(&ipl->ipl_lock); 2484 return; 2485 } 2486 2487 if (local != NULL) 2488 IPSID_REFHOLD(local); 2489 if (remote != NULL) 2490 IPSID_REFHOLD(remote); 2491 2492 ipl->ipl_local_cid = local; 2493 ipl->ipl_remote_cid = remote; 2494 ipl->ipl_ids_latched = B_TRUE; 2495 mutex_exit(&ipl->ipl_lock); 2496 } 2497 2498 void 2499 ipsec_latch_inbound(ipsec_latch_t *ipl, ipsec_in_t *ii) 2500 { 2501 ipsa_t *sa; 2502 2503 if (!ipl->ipl_ids_latched) { 2504 ipsid_t *local = NULL; 2505 ipsid_t *remote = NULL; 2506 2507 if (!ii->ipsec_in_loopback) { 2508 if (ii->ipsec_in_esp_sa != NULL) 2509 sa = ii->ipsec_in_esp_sa; 2510 else 2511 sa = ii->ipsec_in_ah_sa; 2512 ASSERT(sa != NULL); 2513 local = sa->ipsa_dst_cid; 2514 remote = sa->ipsa_src_cid; 2515 } 2516 ipsec_latch_ids(ipl, local, remote); 2517 } 2518 ipl->ipl_in_action = ii->ipsec_in_action; 2519 IPACT_REFHOLD(ipl->ipl_in_action); 2520 } 2521 2522 /* 2523 * Check whether the policy constraints are met either for an 2524 * inbound datagram; called from IP in numerous places. 2525 * 2526 * Note that this is not a chokepoint for inbound policy checks; 2527 * see also ipsec_check_ipsecin_latch() and ipsec_check_global_policy() 2528 */ 2529 mblk_t * 2530 ipsec_check_inbound_policy(mblk_t *first_mp, conn_t *connp, 2531 ipha_t *ipha, ip6_t *ip6h, boolean_t mctl_present) 2532 { 2533 ipsec_in_t *ii; 2534 boolean_t ret; 2535 mblk_t *mp = mctl_present ? first_mp->b_cont : first_mp; 2536 mblk_t *ipsec_mp = mctl_present ? first_mp : NULL; 2537 ipsec_latch_t *ipl; 2538 uint64_t unique_id; 2539 ipsec_stack_t *ipss; 2540 ip_stack_t *ipst; 2541 netstack_t *ns; 2542 ipsec_policy_head_t *policy_head; 2543 2544 ASSERT(connp != NULL); 2545 ns = connp->conn_netstack; 2546 ipss = ns->netstack_ipsec; 2547 ipst = ns->netstack_ip; 2548 2549 if (ipsec_mp == NULL) { 2550 clear: 2551 /* 2552 * This is the case where the incoming datagram is 2553 * cleartext and we need to see whether this client 2554 * would like to receive such untrustworthy things from 2555 * the wire. 2556 */ 2557 ASSERT(mp != NULL); 2558 2559 mutex_enter(&connp->conn_lock); 2560 if (connp->conn_state_flags & CONN_CONDEMNED) { 2561 mutex_exit(&connp->conn_lock); 2562 ip_drop_packet(first_mp, B_TRUE, NULL, 2563 NULL, DROPPER(ipss, ipds_spd_got_clear), 2564 &ipss->ipsec_spd_dropper); 2565 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2566 return (NULL); 2567 } 2568 if ((ipl = connp->conn_latch) != NULL) { 2569 /* Hold a reference in case the conn is closing */ 2570 IPLATCH_REFHOLD(ipl); 2571 mutex_exit(&connp->conn_lock); 2572 /* 2573 * Policy is cached in the conn. 2574 */ 2575 if ((ipl->ipl_in_policy != NULL) && 2576 (!ipl->ipl_in_policy->ipsp_act->ipa_allow_clear)) { 2577 ret = ipsec_inbound_accept_clear(mp, 2578 ipha, ip6h); 2579 if (ret) { 2580 BUMP_MIB(&ipst->ips_ip_mib, 2581 ipsecInSucceeded); 2582 IPLATCH_REFRELE(ipl, ns); 2583 return (first_mp); 2584 } else { 2585 ipsec_log_policy_failure( 2586 IPSEC_POLICY_MISMATCH, 2587 "ipsec_check_inbound_policy", ipha, 2588 ip6h, B_FALSE, ns); 2589 ip_drop_packet(first_mp, B_TRUE, NULL, 2590 NULL, 2591 DROPPER(ipss, ipds_spd_got_clear), 2592 &ipss->ipsec_spd_dropper); 2593 BUMP_MIB(&ipst->ips_ip_mib, 2594 ipsecInFailed); 2595 IPLATCH_REFRELE(ipl, ns); 2596 return (NULL); 2597 } 2598 } else { 2599 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2600 IPLATCH_REFRELE(ipl, ns); 2601 return (first_mp); 2602 } 2603 } else { 2604 uchar_t db_type; 2605 policy_head = connp->conn_policy; 2606 2607 /* Hold a reference in case the conn is closing */ 2608 if (policy_head != NULL) 2609 IPPH_REFHOLD(policy_head); 2610 mutex_exit(&connp->conn_lock); 2611 /* 2612 * As this is a non-hardbound connection we need 2613 * to look at both per-socket policy and global 2614 * policy. As this is cleartext, mark the mp as 2615 * M_DATA in case if it is an ICMP error being 2616 * reported before calling ipsec_check_global_policy 2617 * so that it does not mistake it for IPSEC_IN. 2618 */ 2619 db_type = mp->b_datap->db_type; 2620 mp->b_datap->db_type = M_DATA; 2621 first_mp = ipsec_check_global_policy(first_mp, connp, 2622 ipha, ip6h, mctl_present, ns); 2623 if (policy_head != NULL) 2624 IPPH_REFRELE(policy_head, ns); 2625 if (first_mp != NULL) 2626 mp->b_datap->db_type = db_type; 2627 return (first_mp); 2628 } 2629 } 2630 /* 2631 * If it is inbound check whether the attached message 2632 * is secure or not. We have a special case for ICMP, 2633 * where we have a IPSEC_IN message and the attached 2634 * message is not secure. See icmp_inbound_error_fanout 2635 * for details. 2636 */ 2637 ASSERT(ipsec_mp != NULL); 2638 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 2639 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 2640 2641 if (!ii->ipsec_in_secure) 2642 goto clear; 2643 2644 /* 2645 * mp->b_cont could be either a M_CTL message 2646 * for icmp errors being sent up or a M_DATA message. 2647 */ 2648 ASSERT(mp->b_datap->db_type == M_CTL || mp->b_datap->db_type == M_DATA); 2649 2650 ASSERT(ii->ipsec_in_type == IPSEC_IN); 2651 2652 mutex_enter(&connp->conn_lock); 2653 /* Connection is closing */ 2654 if (connp->conn_state_flags & CONN_CONDEMNED) { 2655 mutex_exit(&connp->conn_lock); 2656 ip_drop_packet(first_mp, B_TRUE, NULL, 2657 NULL, DROPPER(ipss, ipds_spd_got_clear), 2658 &ipss->ipsec_spd_dropper); 2659 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2660 return (NULL); 2661 } 2662 2663 /* 2664 * Once a connection is latched it remains so for life, the conn_latch 2665 * pointer on the conn has not changed, simply initializing ipl here 2666 * as the earlier initialization was done only in the cleartext case. 2667 */ 2668 if ((ipl = connp->conn_latch) == NULL) { 2669 mblk_t *retmp; 2670 policy_head = connp->conn_policy; 2671 2672 /* Hold a reference in case the conn is closing */ 2673 if (policy_head != NULL) 2674 IPPH_REFHOLD(policy_head); 2675 mutex_exit(&connp->conn_lock); 2676 /* 2677 * We don't have policies cached in the conn 2678 * for this stream. So, look at the global 2679 * policy. It will check against conn or global 2680 * depending on whichever is stronger. 2681 */ 2682 retmp = ipsec_check_global_policy(first_mp, connp, 2683 ipha, ip6h, mctl_present, ns); 2684 if (policy_head != NULL) 2685 IPPH_REFRELE(policy_head, ns); 2686 return (retmp); 2687 } 2688 2689 IPLATCH_REFHOLD(ipl); 2690 mutex_exit(&connp->conn_lock); 2691 2692 if (ipl->ipl_in_action != NULL) { 2693 /* Policy is cached & latched; fast(er) path */ 2694 const char *reason; 2695 kstat_named_t *counter; 2696 2697 if (ipsec_check_ipsecin_latch(ii, mp, ipl, 2698 ipha, ip6h, &reason, &counter, connp)) { 2699 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2700 IPLATCH_REFRELE(ipl, ns); 2701 return (first_mp); 2702 } 2703 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, 2704 SL_ERROR|SL_WARN|SL_CONSOLE, 2705 "ipsec inbound policy mismatch: %s, packet dropped\n", 2706 reason); 2707 ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, 2708 &ipss->ipsec_spd_dropper); 2709 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2710 IPLATCH_REFRELE(ipl, ns); 2711 return (NULL); 2712 } else if (ipl->ipl_in_policy == NULL) { 2713 ipsec_weird_null_inbound_policy++; 2714 IPLATCH_REFRELE(ipl, ns); 2715 return (first_mp); 2716 } 2717 2718 unique_id = conn_to_unique(connp, mp, ipha, ip6h); 2719 IPPOL_REFHOLD(ipl->ipl_in_policy); 2720 first_mp = ipsec_check_ipsecin_policy(first_mp, ipl->ipl_in_policy, 2721 ipha, ip6h, unique_id, ns); 2722 /* 2723 * NOTE: ipsecIn{Failed,Succeeeded} bumped by 2724 * ipsec_check_ipsecin_policy(). 2725 */ 2726 if (first_mp != NULL) 2727 ipsec_latch_inbound(ipl, ii); 2728 IPLATCH_REFRELE(ipl, ns); 2729 return (first_mp); 2730 } 2731 2732 /* 2733 * Handle all sorts of cases like tunnel-mode, ICMP, and ip6i prepending. 2734 */ 2735 static int 2736 prepended_length(mblk_t *mp, uintptr_t hptr) 2737 { 2738 int rc = 0; 2739 2740 while (mp != NULL) { 2741 if (hptr >= (uintptr_t)mp->b_rptr && hptr < 2742 (uintptr_t)mp->b_wptr) { 2743 rc += (int)(hptr - (uintptr_t)mp->b_rptr); 2744 break; /* out of while loop */ 2745 } 2746 rc += (int)MBLKL(mp); 2747 mp = mp->b_cont; 2748 } 2749 2750 if (mp == NULL) { 2751 /* 2752 * IF (big IF) we make it here by naturally exiting the loop, 2753 * then ip6h isn't in the mblk chain "mp" at all. 2754 * 2755 * The only case where this happens is with a reversed IP 2756 * header that gets passed up by inbound ICMP processing. 2757 * This unfortunately triggers longstanding bug 6478464. For 2758 * now, just pass up 0 for the answer. 2759 */ 2760 #ifdef DEBUG_NOT_UNTIL_6478464 2761 ASSERT(mp != NULL); 2762 #endif 2763 rc = 0; 2764 } 2765 2766 return (rc); 2767 } 2768 2769 /* 2770 * Returns: 2771 * 2772 * SELRET_NOMEM --> msgpullup() needed to gather things failed. 2773 * SELRET_BADPKT --> If we're being called after tunnel-mode fragment 2774 * gathering, the initial fragment is too short for 2775 * useful data. Only returned if SEL_TUNNEL_FIRSTFRAG is 2776 * set. 2777 * SELRET_SUCCESS --> "sel" now has initialized IPsec selector data. 2778 * SELRET_TUNFRAG --> This is a fragment in a tunnel-mode packet. Caller 2779 * should put this packet in a fragment-gathering queue. 2780 * Only returned if SEL_TUNNEL_MODE and SEL_PORT_POLICY 2781 * is set. 2782 */ 2783 static selret_t 2784 ipsec_init_inbound_sel(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha, 2785 ip6_t *ip6h, uint8_t sel_flags) 2786 { 2787 uint16_t *ports; 2788 int outer_hdr_len = 0; /* For ICMP, tunnel-mode, or ip6i cases... */ 2789 ushort_t hdr_len; 2790 mblk_t *spare_mp = NULL; 2791 uint8_t *nexthdrp, *transportp; 2792 uint8_t nexthdr; 2793 uint8_t icmp_proto; 2794 ip6_pkt_t ipp; 2795 boolean_t port_policy_present = (sel_flags & SEL_PORT_POLICY); 2796 boolean_t is_icmp = (sel_flags & SEL_IS_ICMP); 2797 boolean_t tunnel_mode = (sel_flags & SEL_TUNNEL_MODE); 2798 boolean_t post_frag = (sel_flags & SEL_POST_FRAG); 2799 2800 ASSERT((ipha == NULL && ip6h != NULL) || 2801 (ipha != NULL && ip6h == NULL)); 2802 2803 if (ip6h != NULL) { 2804 outer_hdr_len = prepended_length(mp, (uintptr_t)ip6h); 2805 2806 nexthdr = ip6h->ip6_nxt; 2807 2808 /* 2809 * The caller may have mistakenly assigned an ip6i_t as the 2810 * ip6h for this packet, so take that corner-case into 2811 * account. 2812 */ 2813 if (nexthdr == IPPROTO_RAW) { 2814 ip6h++; 2815 /* First check for bizarro split-mblk headers. */ 2816 if ((uintptr_t)ip6h > (uintptr_t)mp->b_wptr || 2817 ((uintptr_t)ip6h) + sizeof (ip6_t) > 2818 (uintptr_t)mp->b_wptr) { 2819 return (SELRET_BADPKT); 2820 } 2821 /* Next, see if ip6i is at the end of an mblk. */ 2822 if (ip6h == (ip6_t *)mp->b_wptr) 2823 ip6h = (ip6_t *)mp->b_cont->b_rptr; 2824 2825 nexthdr = ip6h->ip6_nxt; 2826 2827 /* 2828 * Finally, if we haven't adjusted for ip6i, do so 2829 * now. ip6i_t structs are prepended, so an ICMP 2830 * or tunnel packet would just be overwritten. 2831 */ 2832 if (outer_hdr_len == 0) 2833 outer_hdr_len = sizeof (ip6i_t); 2834 } 2835 2836 icmp_proto = IPPROTO_ICMPV6; 2837 sel->ips_isv4 = B_FALSE; 2838 sel->ips_local_addr_v6 = ip6h->ip6_dst; 2839 sel->ips_remote_addr_v6 = ip6h->ip6_src; 2840 2841 bzero(&ipp, sizeof (ipp)); 2842 (void) ip_find_hdr_v6(mp, ip6h, &ipp, NULL); 2843 2844 switch (nexthdr) { 2845 case IPPROTO_HOPOPTS: 2846 case IPPROTO_ROUTING: 2847 case IPPROTO_DSTOPTS: 2848 case IPPROTO_FRAGMENT: 2849 /* 2850 * Use ip_hdr_length_nexthdr_v6(). And have a spare 2851 * mblk that's contiguous to feed it 2852 */ 2853 if ((spare_mp = msgpullup(mp, -1)) == NULL) 2854 return (SELRET_NOMEM); 2855 2856 if (!ip_hdr_length_nexthdr_v6(spare_mp, 2857 (ip6_t *)(spare_mp->b_rptr + outer_hdr_len), 2858 &hdr_len, &nexthdrp)) { 2859 /* Malformed packet - caller frees. */ 2860 ipsec_freemsg_chain(spare_mp); 2861 return (SELRET_BADPKT); 2862 } 2863 nexthdr = *nexthdrp; 2864 /* We can just extract based on hdr_len now. */ 2865 break; 2866 default: 2867 hdr_len = IPV6_HDR_LEN; 2868 break; 2869 } 2870 2871 if (port_policy_present && IS_V6_FRAGMENT(ipp) && !is_icmp) { 2872 /* IPv6 Fragment */ 2873 ipsec_freemsg_chain(spare_mp); 2874 return (SELRET_TUNFRAG); 2875 } 2876 transportp = (uint8_t *)ip6h + hdr_len; 2877 } else { 2878 outer_hdr_len = prepended_length(mp, (uintptr_t)ipha); 2879 icmp_proto = IPPROTO_ICMP; 2880 sel->ips_isv4 = B_TRUE; 2881 sel->ips_local_addr_v4 = ipha->ipha_dst; 2882 sel->ips_remote_addr_v4 = ipha->ipha_src; 2883 nexthdr = ipha->ipha_protocol; 2884 hdr_len = IPH_HDR_LENGTH(ipha); 2885 2886 if (port_policy_present && 2887 IS_V4_FRAGMENT(ipha->ipha_fragment_offset_and_flags) && 2888 !is_icmp) { 2889 /* IPv4 Fragment */ 2890 ipsec_freemsg_chain(spare_mp); 2891 return (SELRET_TUNFRAG); 2892 } 2893 transportp = (uint8_t *)ipha + hdr_len; 2894 } 2895 sel->ips_protocol = nexthdr; 2896 2897 if ((nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP && 2898 nexthdr != IPPROTO_SCTP && nexthdr != icmp_proto) || 2899 (!port_policy_present && !post_frag && tunnel_mode)) { 2900 sel->ips_remote_port = sel->ips_local_port = 0; 2901 ipsec_freemsg_chain(spare_mp); 2902 return (SELRET_SUCCESS); 2903 } 2904 2905 if (transportp + 4 > mp->b_wptr) { 2906 /* If we didn't pullup a copy already, do so now. */ 2907 /* 2908 * XXX performance, will upper-layers frequently split TCP/UDP 2909 * apart from IP or options? If so, perhaps we should revisit 2910 * the spare_mp strategy. 2911 */ 2912 ipsec_hdr_pullup_needed++; 2913 if (spare_mp == NULL && 2914 (spare_mp = msgpullup(mp, -1)) == NULL) { 2915 return (SELRET_NOMEM); 2916 } 2917 transportp = &spare_mp->b_rptr[hdr_len + outer_hdr_len]; 2918 } 2919 2920 if (nexthdr == icmp_proto) { 2921 sel->ips_icmp_type = *transportp++; 2922 sel->ips_icmp_code = *transportp; 2923 sel->ips_remote_port = sel->ips_local_port = 0; 2924 } else { 2925 ports = (uint16_t *)transportp; 2926 sel->ips_remote_port = *ports++; 2927 sel->ips_local_port = *ports; 2928 } 2929 ipsec_freemsg_chain(spare_mp); 2930 return (SELRET_SUCCESS); 2931 } 2932 2933 static boolean_t 2934 ipsec_init_outbound_ports(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha, 2935 ip6_t *ip6h, int outer_hdr_len, ipsec_stack_t *ipss) 2936 { 2937 /* 2938 * XXX cut&paste shared with ipsec_init_inbound_sel 2939 */ 2940 uint16_t *ports; 2941 ushort_t hdr_len; 2942 mblk_t *spare_mp = NULL; 2943 uint8_t *nexthdrp; 2944 uint8_t nexthdr; 2945 uint8_t *typecode; 2946 uint8_t check_proto; 2947 2948 ASSERT((ipha == NULL && ip6h != NULL) || 2949 (ipha != NULL && ip6h == NULL)); 2950 2951 if (ip6h != NULL) { 2952 check_proto = IPPROTO_ICMPV6; 2953 nexthdr = ip6h->ip6_nxt; 2954 switch (nexthdr) { 2955 case IPPROTO_HOPOPTS: 2956 case IPPROTO_ROUTING: 2957 case IPPROTO_DSTOPTS: 2958 case IPPROTO_FRAGMENT: 2959 /* 2960 * Use ip_hdr_length_nexthdr_v6(). And have a spare 2961 * mblk that's contiguous to feed it 2962 */ 2963 spare_mp = msgpullup(mp, -1); 2964 if (spare_mp == NULL || 2965 !ip_hdr_length_nexthdr_v6(spare_mp, 2966 (ip6_t *)(spare_mp->b_rptr + outer_hdr_len), 2967 &hdr_len, &nexthdrp)) { 2968 /* Always works, even if NULL. */ 2969 ipsec_freemsg_chain(spare_mp); 2970 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 2971 DROPPER(ipss, ipds_spd_nomem), 2972 &ipss->ipsec_spd_dropper); 2973 return (B_FALSE); 2974 } else { 2975 nexthdr = *nexthdrp; 2976 /* We can just extract based on hdr_len now. */ 2977 } 2978 break; 2979 default: 2980 hdr_len = IPV6_HDR_LEN; 2981 break; 2982 } 2983 } else { 2984 check_proto = IPPROTO_ICMP; 2985 hdr_len = IPH_HDR_LENGTH(ipha); 2986 nexthdr = ipha->ipha_protocol; 2987 } 2988 2989 sel->ips_protocol = nexthdr; 2990 if (nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP && 2991 nexthdr != IPPROTO_SCTP && nexthdr != check_proto) { 2992 sel->ips_local_port = sel->ips_remote_port = 0; 2993 ipsec_freemsg_chain(spare_mp); /* Always works, even if NULL */ 2994 return (B_TRUE); 2995 } 2996 2997 if (&mp->b_rptr[hdr_len] + 4 + outer_hdr_len > mp->b_wptr) { 2998 /* If we didn't pullup a copy already, do so now. */ 2999 /* 3000 * XXX performance, will upper-layers frequently split TCP/UDP 3001 * apart from IP or options? If so, perhaps we should revisit 3002 * the spare_mp strategy. 3003 * 3004 * XXX should this be msgpullup(mp, hdr_len+4) ??? 3005 */ 3006 if (spare_mp == NULL && 3007 (spare_mp = msgpullup(mp, -1)) == NULL) { 3008 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 3009 DROPPER(ipss, ipds_spd_nomem), 3010 &ipss->ipsec_spd_dropper); 3011 return (B_FALSE); 3012 } 3013 ports = (uint16_t *)&spare_mp->b_rptr[hdr_len + outer_hdr_len]; 3014 } else { 3015 ports = (uint16_t *)&mp->b_rptr[hdr_len + outer_hdr_len]; 3016 } 3017 3018 if (nexthdr == check_proto) { 3019 typecode = (uint8_t *)ports; 3020 sel->ips_icmp_type = *typecode++; 3021 sel->ips_icmp_code = *typecode; 3022 sel->ips_remote_port = sel->ips_local_port = 0; 3023 } else { 3024 sel->ips_local_port = *ports++; 3025 sel->ips_remote_port = *ports; 3026 } 3027 ipsec_freemsg_chain(spare_mp); /* Always works, even if NULL */ 3028 return (B_TRUE); 3029 } 3030 3031 /* 3032 * Create an ipsec_action_t based on the way an inbound packet was protected. 3033 * Used to reflect traffic back to a sender. 3034 * 3035 * We don't bother interning the action into the hash table. 3036 */ 3037 ipsec_action_t * 3038 ipsec_in_to_out_action(ipsec_in_t *ii) 3039 { 3040 ipsa_t *ah_assoc, *esp_assoc; 3041 uint_t auth_alg = 0, encr_alg = 0, espa_alg = 0; 3042 ipsec_action_t *ap; 3043 boolean_t unique; 3044 3045 ap = kmem_cache_alloc(ipsec_action_cache, KM_NOSLEEP); 3046 3047 if (ap == NULL) 3048 return (NULL); 3049 3050 bzero(ap, sizeof (*ap)); 3051 HASH_NULL(ap, ipa_hash); 3052 ap->ipa_next = NULL; 3053 ap->ipa_refs = 1; 3054 3055 /* 3056 * Get the algorithms that were used for this packet. 3057 */ 3058 ap->ipa_act.ipa_type = IPSEC_ACT_APPLY; 3059 ap->ipa_act.ipa_log = 0; 3060 ah_assoc = ii->ipsec_in_ah_sa; 3061 ap->ipa_act.ipa_apply.ipp_use_ah = (ah_assoc != NULL); 3062 3063 esp_assoc = ii->ipsec_in_esp_sa; 3064 ap->ipa_act.ipa_apply.ipp_use_esp = (esp_assoc != NULL); 3065 3066 if (esp_assoc != NULL) { 3067 encr_alg = esp_assoc->ipsa_encr_alg; 3068 espa_alg = esp_assoc->ipsa_auth_alg; 3069 ap->ipa_act.ipa_apply.ipp_use_espa = (espa_alg != 0); 3070 } 3071 if (ah_assoc != NULL) 3072 auth_alg = ah_assoc->ipsa_auth_alg; 3073 3074 ap->ipa_act.ipa_apply.ipp_encr_alg = (uint8_t)encr_alg; 3075 ap->ipa_act.ipa_apply.ipp_auth_alg = (uint8_t)auth_alg; 3076 ap->ipa_act.ipa_apply.ipp_esp_auth_alg = (uint8_t)espa_alg; 3077 ap->ipa_act.ipa_apply.ipp_use_se = ii->ipsec_in_decaps; 3078 unique = B_FALSE; 3079 3080 if (esp_assoc != NULL) { 3081 ap->ipa_act.ipa_apply.ipp_espa_minbits = 3082 esp_assoc->ipsa_authkeybits; 3083 ap->ipa_act.ipa_apply.ipp_espa_maxbits = 3084 esp_assoc->ipsa_authkeybits; 3085 ap->ipa_act.ipa_apply.ipp_espe_minbits = 3086 esp_assoc->ipsa_encrkeybits; 3087 ap->ipa_act.ipa_apply.ipp_espe_maxbits = 3088 esp_assoc->ipsa_encrkeybits; 3089 ap->ipa_act.ipa_apply.ipp_km_proto = esp_assoc->ipsa_kmp; 3090 ap->ipa_act.ipa_apply.ipp_km_cookie = esp_assoc->ipsa_kmc; 3091 if (esp_assoc->ipsa_flags & IPSA_F_UNIQUE) 3092 unique = B_TRUE; 3093 } 3094 if (ah_assoc != NULL) { 3095 ap->ipa_act.ipa_apply.ipp_ah_minbits = 3096 ah_assoc->ipsa_authkeybits; 3097 ap->ipa_act.ipa_apply.ipp_ah_maxbits = 3098 ah_assoc->ipsa_authkeybits; 3099 ap->ipa_act.ipa_apply.ipp_km_proto = ah_assoc->ipsa_kmp; 3100 ap->ipa_act.ipa_apply.ipp_km_cookie = ah_assoc->ipsa_kmc; 3101 if (ah_assoc->ipsa_flags & IPSA_F_UNIQUE) 3102 unique = B_TRUE; 3103 } 3104 ap->ipa_act.ipa_apply.ipp_use_unique = unique; 3105 ap->ipa_want_unique = unique; 3106 ap->ipa_allow_clear = B_FALSE; 3107 ap->ipa_want_se = ii->ipsec_in_decaps; 3108 ap->ipa_want_ah = (ah_assoc != NULL); 3109 ap->ipa_want_esp = (esp_assoc != NULL); 3110 3111 ap->ipa_ovhd = ipsec_act_ovhd(&ap->ipa_act); 3112 3113 ap->ipa_act.ipa_apply.ipp_replay_depth = 0; /* don't care */ 3114 3115 return (ap); 3116 } 3117 3118 3119 /* 3120 * Compute the worst-case amount of extra space required by an action. 3121 * Note that, because of the ESP considerations listed below, this is 3122 * actually not the same as the best-case reduction in the MTU; in the 3123 * future, we should pass additional information to this function to 3124 * allow the actual MTU impact to be computed. 3125 * 3126 * AH: Revisit this if we implement algorithms with 3127 * a verifier size of more than 12 bytes. 3128 * 3129 * ESP: A more exact but more messy computation would take into 3130 * account the interaction between the cipher block size and the 3131 * effective MTU, yielding the inner payload size which reflects a 3132 * packet with *minimum* ESP padding.. 3133 */ 3134 int32_t 3135 ipsec_act_ovhd(const ipsec_act_t *act) 3136 { 3137 int32_t overhead = 0; 3138 3139 if (act->ipa_type == IPSEC_ACT_APPLY) { 3140 const ipsec_prot_t *ipp = &act->ipa_apply; 3141 3142 if (ipp->ipp_use_ah) 3143 overhead += IPSEC_MAX_AH_HDR_SIZE; 3144 if (ipp->ipp_use_esp) { 3145 overhead += IPSEC_MAX_ESP_HDR_SIZE; 3146 overhead += sizeof (struct udphdr); 3147 } 3148 if (ipp->ipp_use_se) 3149 overhead += IP_SIMPLE_HDR_LENGTH; 3150 } 3151 return (overhead); 3152 } 3153 3154 /* 3155 * This hash function is used only when creating policies and thus is not 3156 * performance-critical for packet flows. 3157 * 3158 * Future work: canonicalize the structures hashed with this (i.e., 3159 * zeroize padding) so the hash works correctly. 3160 */ 3161 /* ARGSUSED */ 3162 static uint32_t 3163 policy_hash(int size, const void *start, const void *end) 3164 { 3165 return (0); 3166 } 3167 3168 3169 /* 3170 * Hash function macros for each address type. 3171 * 3172 * The IPV6 hash function assumes that the low order 32-bits of the 3173 * address (typically containing the low order 24 bits of the mac 3174 * address) are reasonably well-distributed. Revisit this if we run 3175 * into trouble from lots of collisions on ::1 addresses and the like 3176 * (seems unlikely). 3177 */ 3178 #define IPSEC_IPV4_HASH(a, n) ((a) % (n)) 3179 #define IPSEC_IPV6_HASH(a, n) (((a).s6_addr32[3]) % (n)) 3180 3181 /* 3182 * These two hash functions should produce coordinated values 3183 * but have slightly different roles. 3184 */ 3185 static uint32_t 3186 selkey_hash(const ipsec_selkey_t *selkey, netstack_t *ns) 3187 { 3188 uint32_t valid = selkey->ipsl_valid; 3189 ipsec_stack_t *ipss = ns->netstack_ipsec; 3190 3191 if (!(valid & IPSL_REMOTE_ADDR)) 3192 return (IPSEC_SEL_NOHASH); 3193 3194 if (valid & IPSL_IPV4) { 3195 if (selkey->ipsl_remote_pfxlen == 32) { 3196 return (IPSEC_IPV4_HASH(selkey->ipsl_remote.ipsad_v4, 3197 ipss->ipsec_spd_hashsize)); 3198 } 3199 } 3200 if (valid & IPSL_IPV6) { 3201 if (selkey->ipsl_remote_pfxlen == 128) { 3202 return (IPSEC_IPV6_HASH(selkey->ipsl_remote.ipsad_v6, 3203 ipss->ipsec_spd_hashsize)); 3204 } 3205 } 3206 return (IPSEC_SEL_NOHASH); 3207 } 3208 3209 static uint32_t 3210 selector_hash(ipsec_selector_t *sel, ipsec_policy_root_t *root) 3211 { 3212 if (sel->ips_isv4) { 3213 return (IPSEC_IPV4_HASH(sel->ips_remote_addr_v4, 3214 root->ipr_nchains)); 3215 } 3216 return (IPSEC_IPV6_HASH(sel->ips_remote_addr_v6, root->ipr_nchains)); 3217 } 3218 3219 /* 3220 * Intern actions into the action hash table. 3221 */ 3222 ipsec_action_t * 3223 ipsec_act_find(const ipsec_act_t *a, int n, netstack_t *ns) 3224 { 3225 int i; 3226 uint32_t hval; 3227 ipsec_action_t *ap; 3228 ipsec_action_t *prev = NULL; 3229 int32_t overhead, maxovhd = 0; 3230 boolean_t allow_clear = B_FALSE; 3231 boolean_t want_ah = B_FALSE; 3232 boolean_t want_esp = B_FALSE; 3233 boolean_t want_se = B_FALSE; 3234 boolean_t want_unique = B_FALSE; 3235 ipsec_stack_t *ipss = ns->netstack_ipsec; 3236 3237 /* 3238 * TODO: should canonicalize a[] (i.e., zeroize any padding) 3239 * so we can use a non-trivial policy_hash function. 3240 */ 3241 for (i = n-1; i >= 0; i--) { 3242 hval = policy_hash(IPSEC_ACTION_HASH_SIZE, &a[i], &a[n]); 3243 3244 HASH_LOCK(ipss->ipsec_action_hash, hval); 3245 3246 for (HASH_ITERATE(ap, ipa_hash, 3247 ipss->ipsec_action_hash, hval)) { 3248 if (bcmp(&ap->ipa_act, &a[i], sizeof (*a)) != 0) 3249 continue; 3250 if (ap->ipa_next != prev) 3251 continue; 3252 break; 3253 } 3254 if (ap != NULL) { 3255 HASH_UNLOCK(ipss->ipsec_action_hash, hval); 3256 prev = ap; 3257 continue; 3258 } 3259 /* 3260 * need to allocate a new one.. 3261 */ 3262 ap = kmem_cache_alloc(ipsec_action_cache, KM_NOSLEEP); 3263 if (ap == NULL) { 3264 HASH_UNLOCK(ipss->ipsec_action_hash, hval); 3265 if (prev != NULL) 3266 ipsec_action_free(prev); 3267 return (NULL); 3268 } 3269 HASH_INSERT(ap, ipa_hash, ipss->ipsec_action_hash, hval); 3270 3271 ap->ipa_next = prev; 3272 ap->ipa_act = a[i]; 3273 3274 overhead = ipsec_act_ovhd(&a[i]); 3275 if (maxovhd < overhead) 3276 maxovhd = overhead; 3277 3278 if ((a[i].ipa_type == IPSEC_ACT_BYPASS) || 3279 (a[i].ipa_type == IPSEC_ACT_CLEAR)) 3280 allow_clear = B_TRUE; 3281 if (a[i].ipa_type == IPSEC_ACT_APPLY) { 3282 const ipsec_prot_t *ipp = &a[i].ipa_apply; 3283 3284 ASSERT(ipp->ipp_use_ah || ipp->ipp_use_esp); 3285 want_ah |= ipp->ipp_use_ah; 3286 want_esp |= ipp->ipp_use_esp; 3287 want_se |= ipp->ipp_use_se; 3288 want_unique |= ipp->ipp_use_unique; 3289 } 3290 ap->ipa_allow_clear = allow_clear; 3291 ap->ipa_want_ah = want_ah; 3292 ap->ipa_want_esp = want_esp; 3293 ap->ipa_want_se = want_se; 3294 ap->ipa_want_unique = want_unique; 3295 ap->ipa_refs = 1; /* from the hash table */ 3296 ap->ipa_ovhd = maxovhd; 3297 if (prev) 3298 prev->ipa_refs++; 3299 prev = ap; 3300 HASH_UNLOCK(ipss->ipsec_action_hash, hval); 3301 } 3302 3303 ap->ipa_refs++; /* caller's reference */ 3304 3305 return (ap); 3306 } 3307 3308 /* 3309 * Called when refcount goes to 0, indicating that all references to this 3310 * node are gone. 3311 * 3312 * This does not unchain the action from the hash table. 3313 */ 3314 void 3315 ipsec_action_free(ipsec_action_t *ap) 3316 { 3317 for (;;) { 3318 ipsec_action_t *np = ap->ipa_next; 3319 ASSERT(ap->ipa_refs == 0); 3320 ASSERT(ap->ipa_hash.hash_pp == NULL); 3321 kmem_cache_free(ipsec_action_cache, ap); 3322 ap = np; 3323 /* Inlined IPACT_REFRELE -- avoid recursion */ 3324 if (ap == NULL) 3325 break; 3326 membar_exit(); 3327 if (atomic_add_32_nv(&(ap)->ipa_refs, -1) != 0) 3328 break; 3329 /* End inlined IPACT_REFRELE */ 3330 } 3331 } 3332 3333 /* 3334 * Called when the action hash table goes away. 3335 * 3336 * The actions can be queued on an mblk with ipsec_in or 3337 * ipsec_out, hence the actions might still be around. 3338 * But we decrement ipa_refs here since we no longer have 3339 * a reference to the action from the hash table. 3340 */ 3341 static void 3342 ipsec_action_free_table(ipsec_action_t *ap) 3343 { 3344 while (ap != NULL) { 3345 ipsec_action_t *np = ap->ipa_next; 3346 3347 /* FIXME: remove? */ 3348 (void) printf("ipsec_action_free_table(%p) ref %d\n", 3349 (void *)ap, ap->ipa_refs); 3350 ASSERT(ap->ipa_refs > 0); 3351 IPACT_REFRELE(ap); 3352 ap = np; 3353 } 3354 } 3355 3356 /* 3357 * Need to walk all stack instances since the reclaim function 3358 * is global for all instances 3359 */ 3360 /* ARGSUSED */ 3361 static void 3362 ipsec_action_reclaim(void *arg) 3363 { 3364 netstack_handle_t nh; 3365 netstack_t *ns; 3366 3367 netstack_next_init(&nh); 3368 while ((ns = netstack_next(&nh)) != NULL) { 3369 ipsec_action_reclaim_stack(ns); 3370 netstack_rele(ns); 3371 } 3372 netstack_next_fini(&nh); 3373 } 3374 3375 /* 3376 * Periodically sweep action hash table for actions with refcount==1, and 3377 * nuke them. We cannot do this "on demand" (i.e., from IPACT_REFRELE) 3378 * because we can't close the race between another thread finding the action 3379 * in the hash table without holding the bucket lock during IPACT_REFRELE. 3380 * Instead, we run this function sporadically to clean up after ourselves; 3381 * we also set it as the "reclaim" function for the action kmem_cache. 3382 * 3383 * Note that it may take several passes of ipsec_action_gc() to free all 3384 * "stale" actions. 3385 */ 3386 static void 3387 ipsec_action_reclaim_stack(netstack_t *ns) 3388 { 3389 int i; 3390 ipsec_stack_t *ipss = ns->netstack_ipsec; 3391 3392 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) { 3393 ipsec_action_t *ap, *np; 3394 3395 /* skip the lock if nobody home */ 3396 if (ipss->ipsec_action_hash[i].hash_head == NULL) 3397 continue; 3398 3399 HASH_LOCK(ipss->ipsec_action_hash, i); 3400 for (ap = ipss->ipsec_action_hash[i].hash_head; 3401 ap != NULL; ap = np) { 3402 ASSERT(ap->ipa_refs > 0); 3403 np = ap->ipa_hash.hash_next; 3404 if (ap->ipa_refs > 1) 3405 continue; 3406 HASH_UNCHAIN(ap, ipa_hash, 3407 ipss->ipsec_action_hash, i); 3408 IPACT_REFRELE(ap); 3409 } 3410 HASH_UNLOCK(ipss->ipsec_action_hash, i); 3411 } 3412 } 3413 3414 /* 3415 * Intern a selector set into the selector set hash table. 3416 * This is simpler than the actions case.. 3417 */ 3418 static ipsec_sel_t * 3419 ipsec_find_sel(ipsec_selkey_t *selkey, netstack_t *ns) 3420 { 3421 ipsec_sel_t *sp; 3422 uint32_t hval, bucket; 3423 ipsec_stack_t *ipss = ns->netstack_ipsec; 3424 3425 /* 3426 * Exactly one AF bit should be set in selkey. 3427 */ 3428 ASSERT(!(selkey->ipsl_valid & IPSL_IPV4) ^ 3429 !(selkey->ipsl_valid & IPSL_IPV6)); 3430 3431 hval = selkey_hash(selkey, ns); 3432 /* Set pol_hval to uninitialized until we put it in a polhead. */ 3433 selkey->ipsl_sel_hval = hval; 3434 3435 bucket = (hval == IPSEC_SEL_NOHASH) ? 0 : hval; 3436 3437 ASSERT(!HASH_LOCKED(ipss->ipsec_sel_hash, bucket)); 3438 HASH_LOCK(ipss->ipsec_sel_hash, bucket); 3439 3440 for (HASH_ITERATE(sp, ipsl_hash, ipss->ipsec_sel_hash, bucket)) { 3441 if (bcmp(&sp->ipsl_key, selkey, 3442 offsetof(ipsec_selkey_t, ipsl_pol_hval)) == 0) 3443 break; 3444 } 3445 if (sp != NULL) { 3446 sp->ipsl_refs++; 3447 3448 HASH_UNLOCK(ipss->ipsec_sel_hash, bucket); 3449 return (sp); 3450 } 3451 3452 sp = kmem_cache_alloc(ipsec_sel_cache, KM_NOSLEEP); 3453 if (sp == NULL) { 3454 HASH_UNLOCK(ipss->ipsec_sel_hash, bucket); 3455 return (NULL); 3456 } 3457 3458 HASH_INSERT(sp, ipsl_hash, ipss->ipsec_sel_hash, bucket); 3459 sp->ipsl_refs = 2; /* one for hash table, one for caller */ 3460 sp->ipsl_key = *selkey; 3461 /* Set to uninitalized and have insertion into polhead fix things. */ 3462 if (selkey->ipsl_sel_hval != IPSEC_SEL_NOHASH) 3463 sp->ipsl_key.ipsl_pol_hval = 0; 3464 else 3465 sp->ipsl_key.ipsl_pol_hval = IPSEC_SEL_NOHASH; 3466 3467 HASH_UNLOCK(ipss->ipsec_sel_hash, bucket); 3468 3469 return (sp); 3470 } 3471 3472 static void 3473 ipsec_sel_rel(ipsec_sel_t **spp, netstack_t *ns) 3474 { 3475 ipsec_sel_t *sp = *spp; 3476 int hval = sp->ipsl_key.ipsl_sel_hval; 3477 ipsec_stack_t *ipss = ns->netstack_ipsec; 3478 3479 *spp = NULL; 3480 3481 if (hval == IPSEC_SEL_NOHASH) 3482 hval = 0; 3483 3484 ASSERT(!HASH_LOCKED(ipss->ipsec_sel_hash, hval)); 3485 HASH_LOCK(ipss->ipsec_sel_hash, hval); 3486 if (--sp->ipsl_refs == 1) { 3487 HASH_UNCHAIN(sp, ipsl_hash, ipss->ipsec_sel_hash, hval); 3488 sp->ipsl_refs--; 3489 HASH_UNLOCK(ipss->ipsec_sel_hash, hval); 3490 ASSERT(sp->ipsl_refs == 0); 3491 kmem_cache_free(ipsec_sel_cache, sp); 3492 /* Caller unlocks */ 3493 return; 3494 } 3495 3496 HASH_UNLOCK(ipss->ipsec_sel_hash, hval); 3497 } 3498 3499 /* 3500 * Free a policy rule which we know is no longer being referenced. 3501 */ 3502 void 3503 ipsec_policy_free(ipsec_policy_t *ipp, netstack_t *ns) 3504 { 3505 ASSERT(ipp->ipsp_refs == 0); 3506 ASSERT(ipp->ipsp_sel != NULL); 3507 ASSERT(ipp->ipsp_act != NULL); 3508 3509 ipsec_sel_rel(&ipp->ipsp_sel, ns); 3510 IPACT_REFRELE(ipp->ipsp_act); 3511 kmem_cache_free(ipsec_pol_cache, ipp); 3512 } 3513 3514 /* 3515 * Construction of new policy rules; construct a policy, and add it to 3516 * the appropriate tables. 3517 */ 3518 ipsec_policy_t * 3519 ipsec_policy_create(ipsec_selkey_t *keys, const ipsec_act_t *a, 3520 int nacts, int prio, uint64_t *index_ptr, netstack_t *ns) 3521 { 3522 ipsec_action_t *ap; 3523 ipsec_sel_t *sp; 3524 ipsec_policy_t *ipp; 3525 ipsec_stack_t *ipss = ns->netstack_ipsec; 3526 3527 if (index_ptr == NULL) 3528 index_ptr = &ipss->ipsec_next_policy_index; 3529 3530 ipp = kmem_cache_alloc(ipsec_pol_cache, KM_NOSLEEP); 3531 ap = ipsec_act_find(a, nacts, ns); 3532 sp = ipsec_find_sel(keys, ns); 3533 3534 if ((ap == NULL) || (sp == NULL) || (ipp == NULL)) { 3535 if (ap != NULL) { 3536 IPACT_REFRELE(ap); 3537 } 3538 if (sp != NULL) 3539 ipsec_sel_rel(&sp, ns); 3540 if (ipp != NULL) 3541 kmem_cache_free(ipsec_pol_cache, ipp); 3542 return (NULL); 3543 } 3544 3545 HASH_NULL(ipp, ipsp_hash); 3546 3547 ipp->ipsp_refs = 1; /* caller's reference */ 3548 ipp->ipsp_sel = sp; 3549 ipp->ipsp_act = ap; 3550 ipp->ipsp_prio = prio; /* rule priority */ 3551 ipp->ipsp_index = *index_ptr; 3552 (*index_ptr)++; 3553 3554 return (ipp); 3555 } 3556 3557 static void 3558 ipsec_update_present_flags(ipsec_stack_t *ipss) 3559 { 3560 boolean_t hashpol; 3561 3562 hashpol = (avl_numnodes(&ipss->ipsec_system_policy.iph_rulebyid) > 0); 3563 3564 if (hashpol) { 3565 ipss->ipsec_outbound_v4_policy_present = B_TRUE; 3566 ipss->ipsec_outbound_v6_policy_present = B_TRUE; 3567 ipss->ipsec_inbound_v4_policy_present = B_TRUE; 3568 ipss->ipsec_inbound_v6_policy_present = B_TRUE; 3569 return; 3570 } 3571 3572 ipss->ipsec_outbound_v4_policy_present = (NULL != 3573 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_OUTBOUND]. 3574 ipr_nonhash[IPSEC_AF_V4]); 3575 ipss->ipsec_outbound_v6_policy_present = (NULL != 3576 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_OUTBOUND]. 3577 ipr_nonhash[IPSEC_AF_V6]); 3578 ipss->ipsec_inbound_v4_policy_present = (NULL != 3579 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_INBOUND]. 3580 ipr_nonhash[IPSEC_AF_V4]); 3581 ipss->ipsec_inbound_v6_policy_present = (NULL != 3582 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_INBOUND]. 3583 ipr_nonhash[IPSEC_AF_V6]); 3584 } 3585 3586 boolean_t 3587 ipsec_policy_delete(ipsec_policy_head_t *php, ipsec_selkey_t *keys, int dir, 3588 netstack_t *ns) 3589 { 3590 ipsec_sel_t *sp; 3591 ipsec_policy_t *ip, *nip, *head; 3592 int af; 3593 ipsec_policy_root_t *pr = &php->iph_root[dir]; 3594 3595 sp = ipsec_find_sel(keys, ns); 3596 3597 if (sp == NULL) 3598 return (B_FALSE); 3599 3600 af = (sp->ipsl_key.ipsl_valid & IPSL_IPV4) ? IPSEC_AF_V4 : IPSEC_AF_V6; 3601 3602 rw_enter(&php->iph_lock, RW_WRITER); 3603 3604 if (sp->ipsl_key.ipsl_pol_hval == IPSEC_SEL_NOHASH) { 3605 head = pr->ipr_nonhash[af]; 3606 } else { 3607 head = pr->ipr_hash[sp->ipsl_key.ipsl_pol_hval].hash_head; 3608 } 3609 3610 for (ip = head; ip != NULL; ip = nip) { 3611 nip = ip->ipsp_hash.hash_next; 3612 if (ip->ipsp_sel != sp) { 3613 continue; 3614 } 3615 3616 IPPOL_UNCHAIN(php, ip, ns); 3617 3618 php->iph_gen++; 3619 ipsec_update_present_flags(ns->netstack_ipsec); 3620 3621 rw_exit(&php->iph_lock); 3622 3623 ipsec_sel_rel(&sp, ns); 3624 3625 return (B_TRUE); 3626 } 3627 3628 rw_exit(&php->iph_lock); 3629 ipsec_sel_rel(&sp, ns); 3630 return (B_FALSE); 3631 } 3632 3633 int 3634 ipsec_policy_delete_index(ipsec_policy_head_t *php, uint64_t policy_index, 3635 netstack_t *ns) 3636 { 3637 boolean_t found = B_FALSE; 3638 ipsec_policy_t ipkey; 3639 ipsec_policy_t *ip; 3640 avl_index_t where; 3641 3642 (void) memset(&ipkey, 0, sizeof (ipkey)); 3643 ipkey.ipsp_index = policy_index; 3644 3645 rw_enter(&php->iph_lock, RW_WRITER); 3646 3647 /* 3648 * We could be cleverer here about the walk. 3649 * but well, (k+1)*log(N) will do for now (k==number of matches, 3650 * N==number of table entries 3651 */ 3652 for (;;) { 3653 ip = (ipsec_policy_t *)avl_find(&php->iph_rulebyid, 3654 (void *)&ipkey, &where); 3655 ASSERT(ip == NULL); 3656 3657 ip = avl_nearest(&php->iph_rulebyid, where, AVL_AFTER); 3658 3659 if (ip == NULL) 3660 break; 3661 3662 if (ip->ipsp_index != policy_index) { 3663 ASSERT(ip->ipsp_index > policy_index); 3664 break; 3665 } 3666 3667 IPPOL_UNCHAIN(php, ip, ns); 3668 found = B_TRUE; 3669 } 3670 3671 if (found) { 3672 php->iph_gen++; 3673 ipsec_update_present_flags(ns->netstack_ipsec); 3674 } 3675 3676 rw_exit(&php->iph_lock); 3677 3678 return (found ? 0 : ENOENT); 3679 } 3680 3681 /* 3682 * Given a constructed ipsec_policy_t policy rule, see if it can be entered 3683 * into the correct policy ruleset. As a side-effect, it sets the hash 3684 * entries on "ipp"'s ipsp_pol_hval. 3685 * 3686 * Returns B_TRUE if it can be entered, B_FALSE if it can't be (because a 3687 * duplicate policy exists with exactly the same selectors), or an icmp 3688 * rule exists with a different encryption/authentication action. 3689 */ 3690 boolean_t 3691 ipsec_check_policy(ipsec_policy_head_t *php, ipsec_policy_t *ipp, int direction) 3692 { 3693 ipsec_policy_root_t *pr = &php->iph_root[direction]; 3694 int af = -1; 3695 ipsec_policy_t *p2, *head; 3696 uint8_t check_proto; 3697 ipsec_selkey_t *selkey = &ipp->ipsp_sel->ipsl_key; 3698 uint32_t valid = selkey->ipsl_valid; 3699 3700 if (valid & IPSL_IPV6) { 3701 ASSERT(!(valid & IPSL_IPV4)); 3702 af = IPSEC_AF_V6; 3703 check_proto = IPPROTO_ICMPV6; 3704 } else { 3705 ASSERT(valid & IPSL_IPV4); 3706 af = IPSEC_AF_V4; 3707 check_proto = IPPROTO_ICMP; 3708 } 3709 3710 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3711 3712 /* 3713 * Double-check that we don't have any duplicate selectors here. 3714 * Because selectors are interned below, we need only compare pointers 3715 * for equality. 3716 */ 3717 if (selkey->ipsl_sel_hval == IPSEC_SEL_NOHASH) { 3718 head = pr->ipr_nonhash[af]; 3719 } else { 3720 selkey->ipsl_pol_hval = 3721 (selkey->ipsl_valid & IPSL_IPV4) ? 3722 IPSEC_IPV4_HASH(selkey->ipsl_remote.ipsad_v4, 3723 pr->ipr_nchains) : 3724 IPSEC_IPV6_HASH(selkey->ipsl_remote.ipsad_v6, 3725 pr->ipr_nchains); 3726 3727 head = pr->ipr_hash[selkey->ipsl_pol_hval].hash_head; 3728 } 3729 3730 for (p2 = head; p2 != NULL; p2 = p2->ipsp_hash.hash_next) { 3731 if (p2->ipsp_sel == ipp->ipsp_sel) 3732 return (B_FALSE); 3733 } 3734 3735 /* 3736 * If it's ICMP and not a drop or pass rule, run through the ICMP 3737 * rules and make sure the action is either new or the same as any 3738 * other actions. We don't have to check the full chain because 3739 * discard and bypass will override all other actions 3740 */ 3741 3742 if (valid & IPSL_PROTOCOL && 3743 selkey->ipsl_proto == check_proto && 3744 (ipp->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_APPLY)) { 3745 3746 for (p2 = head; p2 != NULL; p2 = p2->ipsp_hash.hash_next) { 3747 3748 if (p2->ipsp_sel->ipsl_key.ipsl_valid & IPSL_PROTOCOL && 3749 p2->ipsp_sel->ipsl_key.ipsl_proto == check_proto && 3750 (p2->ipsp_act->ipa_act.ipa_type == 3751 IPSEC_ACT_APPLY)) { 3752 return (ipsec_compare_action(p2, ipp)); 3753 } 3754 } 3755 } 3756 3757 return (B_TRUE); 3758 } 3759 3760 /* 3761 * compare the action chains of two policies for equality 3762 * B_TRUE -> effective equality 3763 */ 3764 3765 static boolean_t 3766 ipsec_compare_action(ipsec_policy_t *p1, ipsec_policy_t *p2) 3767 { 3768 3769 ipsec_action_t *act1, *act2; 3770 3771 /* We have a valid rule. Let's compare the actions */ 3772 if (p1->ipsp_act == p2->ipsp_act) { 3773 /* same action. We are good */ 3774 return (B_TRUE); 3775 } 3776 3777 /* we have to walk the chain */ 3778 3779 act1 = p1->ipsp_act; 3780 act2 = p2->ipsp_act; 3781 3782 while (act1 != NULL && act2 != NULL) { 3783 3784 /* otherwise, Are we close enough? */ 3785 if (act1->ipa_allow_clear != act2->ipa_allow_clear || 3786 act1->ipa_want_ah != act2->ipa_want_ah || 3787 act1->ipa_want_esp != act2->ipa_want_esp || 3788 act1->ipa_want_se != act2->ipa_want_se) { 3789 /* Nope, we aren't */ 3790 return (B_FALSE); 3791 } 3792 3793 if (act1->ipa_want_ah) { 3794 if (act1->ipa_act.ipa_apply.ipp_auth_alg != 3795 act2->ipa_act.ipa_apply.ipp_auth_alg) { 3796 return (B_FALSE); 3797 } 3798 3799 if (act1->ipa_act.ipa_apply.ipp_ah_minbits != 3800 act2->ipa_act.ipa_apply.ipp_ah_minbits || 3801 act1->ipa_act.ipa_apply.ipp_ah_maxbits != 3802 act2->ipa_act.ipa_apply.ipp_ah_maxbits) { 3803 return (B_FALSE); 3804 } 3805 } 3806 3807 if (act1->ipa_want_esp) { 3808 if (act1->ipa_act.ipa_apply.ipp_use_esp != 3809 act2->ipa_act.ipa_apply.ipp_use_esp || 3810 act1->ipa_act.ipa_apply.ipp_use_espa != 3811 act2->ipa_act.ipa_apply.ipp_use_espa) { 3812 return (B_FALSE); 3813 } 3814 3815 if (act1->ipa_act.ipa_apply.ipp_use_esp) { 3816 if (act1->ipa_act.ipa_apply.ipp_encr_alg != 3817 act2->ipa_act.ipa_apply.ipp_encr_alg) { 3818 return (B_FALSE); 3819 } 3820 3821 if (act1->ipa_act.ipa_apply.ipp_espe_minbits != 3822 act2->ipa_act.ipa_apply.ipp_espe_minbits || 3823 act1->ipa_act.ipa_apply.ipp_espe_maxbits != 3824 act2->ipa_act.ipa_apply.ipp_espe_maxbits) { 3825 return (B_FALSE); 3826 } 3827 } 3828 3829 if (act1->ipa_act.ipa_apply.ipp_use_espa) { 3830 if (act1->ipa_act.ipa_apply.ipp_esp_auth_alg != 3831 act2->ipa_act.ipa_apply.ipp_esp_auth_alg) { 3832 return (B_FALSE); 3833 } 3834 3835 if (act1->ipa_act.ipa_apply.ipp_espa_minbits != 3836 act2->ipa_act.ipa_apply.ipp_espa_minbits || 3837 act1->ipa_act.ipa_apply.ipp_espa_maxbits != 3838 act2->ipa_act.ipa_apply.ipp_espa_maxbits) { 3839 return (B_FALSE); 3840 } 3841 } 3842 3843 } 3844 3845 act1 = act1->ipa_next; 3846 act2 = act2->ipa_next; 3847 } 3848 3849 if (act1 != NULL || act2 != NULL) { 3850 return (B_FALSE); 3851 } 3852 3853 return (B_TRUE); 3854 } 3855 3856 3857 /* 3858 * Given a constructed ipsec_policy_t policy rule, enter it into 3859 * the correct policy ruleset. 3860 * 3861 * ipsec_check_policy() is assumed to have succeeded first (to check for 3862 * duplicates). 3863 */ 3864 void 3865 ipsec_enter_policy(ipsec_policy_head_t *php, ipsec_policy_t *ipp, int direction, 3866 netstack_t *ns) 3867 { 3868 ipsec_policy_root_t *pr = &php->iph_root[direction]; 3869 ipsec_selkey_t *selkey = &ipp->ipsp_sel->ipsl_key; 3870 uint32_t valid = selkey->ipsl_valid; 3871 uint32_t hval = selkey->ipsl_pol_hval; 3872 int af = -1; 3873 3874 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3875 3876 if (valid & IPSL_IPV6) { 3877 ASSERT(!(valid & IPSL_IPV4)); 3878 af = IPSEC_AF_V6; 3879 } else { 3880 ASSERT(valid & IPSL_IPV4); 3881 af = IPSEC_AF_V4; 3882 } 3883 3884 php->iph_gen++; 3885 3886 if (hval == IPSEC_SEL_NOHASH) { 3887 HASHLIST_INSERT(ipp, ipsp_hash, pr->ipr_nonhash[af]); 3888 } else { 3889 HASH_LOCK(pr->ipr_hash, hval); 3890 HASH_INSERT(ipp, ipsp_hash, pr->ipr_hash, hval); 3891 HASH_UNLOCK(pr->ipr_hash, hval); 3892 } 3893 3894 ipsec_insert_always(&php->iph_rulebyid, ipp); 3895 3896 ipsec_update_present_flags(ns->netstack_ipsec); 3897 } 3898 3899 static void 3900 ipsec_ipr_flush(ipsec_policy_head_t *php, ipsec_policy_root_t *ipr, 3901 netstack_t *ns) 3902 { 3903 ipsec_policy_t *ip, *nip; 3904 int af, chain, nchain; 3905 3906 for (af = 0; af < IPSEC_NAF; af++) { 3907 for (ip = ipr->ipr_nonhash[af]; ip != NULL; ip = nip) { 3908 nip = ip->ipsp_hash.hash_next; 3909 IPPOL_UNCHAIN(php, ip, ns); 3910 } 3911 ipr->ipr_nonhash[af] = NULL; 3912 } 3913 nchain = ipr->ipr_nchains; 3914 3915 for (chain = 0; chain < nchain; chain++) { 3916 for (ip = ipr->ipr_hash[chain].hash_head; ip != NULL; 3917 ip = nip) { 3918 nip = ip->ipsp_hash.hash_next; 3919 IPPOL_UNCHAIN(php, ip, ns); 3920 } 3921 ipr->ipr_hash[chain].hash_head = NULL; 3922 } 3923 } 3924 3925 /* 3926 * Create and insert inbound or outbound policy associated with actp for the 3927 * address family fam into the policy head ph. Returns B_TRUE if policy was 3928 * inserted, and B_FALSE otherwise. 3929 */ 3930 boolean_t 3931 ipsec_polhead_insert(ipsec_policy_head_t *ph, ipsec_act_t *actp, uint_t nact, 3932 int fam, int ptype, netstack_t *ns) 3933 { 3934 ipsec_selkey_t sel; 3935 ipsec_policy_t *pol; 3936 ipsec_policy_root_t *pr; 3937 3938 bzero(&sel, sizeof (sel)); 3939 sel.ipsl_valid = (fam == IPSEC_AF_V4 ? IPSL_IPV4 : IPSL_IPV6); 3940 if ((pol = ipsec_policy_create(&sel, actp, nact, IPSEC_PRIO_SOCKET, 3941 NULL, ns)) != NULL) { 3942 pr = &ph->iph_root[ptype]; 3943 HASHLIST_INSERT(pol, ipsp_hash, pr->ipr_nonhash[fam]); 3944 ipsec_insert_always(&ph->iph_rulebyid, pol); 3945 } 3946 return (pol != NULL); 3947 } 3948 3949 void 3950 ipsec_polhead_flush(ipsec_policy_head_t *php, netstack_t *ns) 3951 { 3952 int dir; 3953 3954 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3955 3956 for (dir = 0; dir < IPSEC_NTYPES; dir++) 3957 ipsec_ipr_flush(php, &php->iph_root[dir], ns); 3958 3959 ipsec_update_present_flags(ns->netstack_ipsec); 3960 } 3961 3962 void 3963 ipsec_polhead_free(ipsec_policy_head_t *php, netstack_t *ns) 3964 { 3965 int dir; 3966 3967 ASSERT(php->iph_refs == 0); 3968 3969 rw_enter(&php->iph_lock, RW_WRITER); 3970 ipsec_polhead_flush(php, ns); 3971 rw_exit(&php->iph_lock); 3972 rw_destroy(&php->iph_lock); 3973 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 3974 ipsec_policy_root_t *ipr = &php->iph_root[dir]; 3975 int chain; 3976 3977 for (chain = 0; chain < ipr->ipr_nchains; chain++) 3978 mutex_destroy(&(ipr->ipr_hash[chain].hash_lock)); 3979 3980 } 3981 ipsec_polhead_free_table(php); 3982 kmem_free(php, sizeof (*php)); 3983 } 3984 3985 static void 3986 ipsec_ipr_init(ipsec_policy_root_t *ipr) 3987 { 3988 int af; 3989 3990 ipr->ipr_nchains = 0; 3991 ipr->ipr_hash = NULL; 3992 3993 for (af = 0; af < IPSEC_NAF; af++) { 3994 ipr->ipr_nonhash[af] = NULL; 3995 } 3996 } 3997 3998 ipsec_policy_head_t * 3999 ipsec_polhead_create(void) 4000 { 4001 ipsec_policy_head_t *php; 4002 4003 php = kmem_alloc(sizeof (*php), KM_NOSLEEP); 4004 if (php == NULL) 4005 return (php); 4006 4007 rw_init(&php->iph_lock, NULL, RW_DEFAULT, NULL); 4008 php->iph_refs = 1; 4009 php->iph_gen = 0; 4010 4011 ipsec_ipr_init(&php->iph_root[IPSEC_TYPE_INBOUND]); 4012 ipsec_ipr_init(&php->iph_root[IPSEC_TYPE_OUTBOUND]); 4013 4014 avl_create(&php->iph_rulebyid, ipsec_policy_cmpbyid, 4015 sizeof (ipsec_policy_t), offsetof(ipsec_policy_t, ipsp_byid)); 4016 4017 return (php); 4018 } 4019 4020 /* 4021 * Clone the policy head into a new polhead; release one reference to the 4022 * old one and return the only reference to the new one. 4023 * If the old one had a refcount of 1, just return it. 4024 */ 4025 ipsec_policy_head_t * 4026 ipsec_polhead_split(ipsec_policy_head_t *php, netstack_t *ns) 4027 { 4028 ipsec_policy_head_t *nphp; 4029 4030 if (php == NULL) 4031 return (ipsec_polhead_create()); 4032 else if (php->iph_refs == 1) 4033 return (php); 4034 4035 nphp = ipsec_polhead_create(); 4036 if (nphp == NULL) 4037 return (NULL); 4038 4039 if (ipsec_copy_polhead(php, nphp, ns) != 0) { 4040 ipsec_polhead_free(nphp, ns); 4041 return (NULL); 4042 } 4043 IPPH_REFRELE(php, ns); 4044 return (nphp); 4045 } 4046 4047 /* 4048 * When sending a response to a ICMP request or generating a RST 4049 * in the TCP case, the outbound packets need to go at the same level 4050 * of protection as the incoming ones i.e we associate our outbound 4051 * policy with how the packet came in. We call this after we have 4052 * accepted the incoming packet which may or may not have been in 4053 * clear and hence we are sending the reply back with the policy 4054 * matching the incoming datagram's policy. 4055 * 4056 * NOTE : This technology serves two purposes : 4057 * 4058 * 1) If we have multiple outbound policies, we send out a reply 4059 * matching with how it came in rather than matching the outbound 4060 * policy. 4061 * 4062 * 2) For assymetric policies, we want to make sure that incoming 4063 * and outgoing has the same level of protection. Assymetric 4064 * policies exist only with global policy where we may not have 4065 * both outbound and inbound at the same time. 4066 * 4067 * NOTE2: This function is called by cleartext cases, so it needs to be 4068 * in IP proper. 4069 */ 4070 boolean_t 4071 ipsec_in_to_out(mblk_t *ipsec_mp, ipha_t *ipha, ip6_t *ip6h, zoneid_t zoneid) 4072 { 4073 ipsec_in_t *ii; 4074 ipsec_out_t *io; 4075 boolean_t v4; 4076 mblk_t *mp; 4077 boolean_t secure; 4078 uint_t ifindex; 4079 ipsec_selector_t sel; 4080 ipsec_action_t *reflect_action = NULL; 4081 netstack_t *ns; 4082 4083 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 4084 4085 bzero((void*)&sel, sizeof (sel)); 4086 4087 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 4088 4089 mp = ipsec_mp->b_cont; 4090 ASSERT(mp != NULL); 4091 4092 if (ii->ipsec_in_action != NULL) { 4093 /* transfer reference.. */ 4094 reflect_action = ii->ipsec_in_action; 4095 ii->ipsec_in_action = NULL; 4096 } else if (!ii->ipsec_in_loopback) 4097 reflect_action = ipsec_in_to_out_action(ii); 4098 secure = ii->ipsec_in_secure; 4099 ifindex = ii->ipsec_in_ill_index; 4100 ns = ii->ipsec_in_ns; 4101 v4 = ii->ipsec_in_v4; 4102 4103 ipsec_in_release_refs(ii); /* No netstack_rele/hold needed */ 4104 4105 /* 4106 * Use the global zone's id if we don't have a specific zone 4107 * identified. This is likely to happen when the received packet's 4108 * destination is a Trusted Extensions all-zones address. We did 4109 * not copy the zoneid from ii->ipsec_in_zone id because that 4110 * information represents the zoneid we started input processing 4111 * with. The caller should have a better idea of which zone the 4112 * received packet was destined for. 4113 */ 4114 4115 if (zoneid == ALL_ZONES) 4116 zoneid = GLOBAL_ZONEID; 4117 4118 /* 4119 * The caller is going to send the datagram out which might 4120 * go on the wire or delivered locally through ip_wput_local. 4121 * 4122 * 1) If it goes out on the wire, new associations will be 4123 * obtained. 4124 * 2) If it is delivered locally, ip_wput_local will convert 4125 * this IPSEC_OUT to a IPSEC_IN looking at the requests. 4126 */ 4127 4128 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4129 bzero(io, sizeof (ipsec_out_t)); 4130 io->ipsec_out_type = IPSEC_OUT; 4131 io->ipsec_out_len = sizeof (ipsec_out_t); 4132 io->ipsec_out_frtn.free_func = ipsec_out_free; 4133 io->ipsec_out_frtn.free_arg = (char *)io; 4134 io->ipsec_out_act = reflect_action; 4135 4136 if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0, 4137 ns->netstack_ipsec)) 4138 return (B_FALSE); 4139 4140 io->ipsec_out_src_port = sel.ips_local_port; 4141 io->ipsec_out_dst_port = sel.ips_remote_port; 4142 io->ipsec_out_proto = sel.ips_protocol; 4143 io->ipsec_out_icmp_type = sel.ips_icmp_type; 4144 io->ipsec_out_icmp_code = sel.ips_icmp_code; 4145 4146 /* 4147 * Don't use global policy for this, as we want 4148 * to use the same protection that was applied to the inbound packet. 4149 */ 4150 io->ipsec_out_use_global_policy = B_FALSE; 4151 io->ipsec_out_proc_begin = B_FALSE; 4152 io->ipsec_out_secure = secure; 4153 io->ipsec_out_v4 = v4; 4154 io->ipsec_out_ill_index = ifindex; 4155 io->ipsec_out_zoneid = zoneid; 4156 io->ipsec_out_ns = ns; /* No netstack_hold */ 4157 4158 return (B_TRUE); 4159 } 4160 4161 mblk_t * 4162 ipsec_in_tag(mblk_t *mp, mblk_t *cont, netstack_t *ns) 4163 { 4164 ipsec_in_t *ii = (ipsec_in_t *)mp->b_rptr; 4165 ipsec_in_t *nii; 4166 mblk_t *nmp; 4167 frtn_t nfrtn; 4168 ipsec_stack_t *ipss = ns->netstack_ipsec; 4169 4170 ASSERT(ii->ipsec_in_type == IPSEC_IN); 4171 ASSERT(ii->ipsec_in_len == sizeof (ipsec_in_t)); 4172 4173 nmp = ipsec_in_alloc(ii->ipsec_in_v4, ns); 4174 if (nmp == NULL) { 4175 ip_drop_packet_chain(cont, B_FALSE, NULL, NULL, 4176 DROPPER(ipss, ipds_spd_nomem), 4177 &ipss->ipsec_spd_dropper); 4178 return (NULL); 4179 } 4180 4181 ASSERT(nmp->b_datap->db_type == M_CTL); 4182 ASSERT(nmp->b_wptr == (nmp->b_rptr + sizeof (ipsec_info_t))); 4183 4184 /* 4185 * Bump refcounts. 4186 */ 4187 if (ii->ipsec_in_ah_sa != NULL) 4188 IPSA_REFHOLD(ii->ipsec_in_ah_sa); 4189 if (ii->ipsec_in_esp_sa != NULL) 4190 IPSA_REFHOLD(ii->ipsec_in_esp_sa); 4191 if (ii->ipsec_in_policy != NULL) 4192 IPPH_REFHOLD(ii->ipsec_in_policy); 4193 4194 /* 4195 * Copy everything, but preserve the free routine provided by 4196 * ipsec_in_alloc(). 4197 */ 4198 nii = (ipsec_in_t *)nmp->b_rptr; 4199 nfrtn = nii->ipsec_in_frtn; 4200 bcopy(ii, nii, sizeof (*ii)); 4201 nii->ipsec_in_frtn = nfrtn; 4202 4203 nmp->b_cont = cont; 4204 4205 return (nmp); 4206 } 4207 4208 mblk_t * 4209 ipsec_out_tag(mblk_t *mp, mblk_t *cont, netstack_t *ns) 4210 { 4211 ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; 4212 ipsec_out_t *nio; 4213 mblk_t *nmp; 4214 frtn_t nfrtn; 4215 ipsec_stack_t *ipss = ns->netstack_ipsec; 4216 4217 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4218 ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t)); 4219 4220 nmp = ipsec_alloc_ipsec_out(ns); 4221 if (nmp == NULL) { 4222 ip_drop_packet_chain(cont, B_FALSE, NULL, NULL, 4223 DROPPER(ipss, ipds_spd_nomem), 4224 &ipss->ipsec_spd_dropper); 4225 return (NULL); 4226 } 4227 ASSERT(nmp->b_datap->db_type == M_CTL); 4228 ASSERT(nmp->b_wptr == (nmp->b_rptr + sizeof (ipsec_info_t))); 4229 4230 /* 4231 * Bump refcounts. 4232 */ 4233 if (io->ipsec_out_ah_sa != NULL) 4234 IPSA_REFHOLD(io->ipsec_out_ah_sa); 4235 if (io->ipsec_out_esp_sa != NULL) 4236 IPSA_REFHOLD(io->ipsec_out_esp_sa); 4237 if (io->ipsec_out_polhead != NULL) 4238 IPPH_REFHOLD(io->ipsec_out_polhead); 4239 if (io->ipsec_out_policy != NULL) 4240 IPPOL_REFHOLD(io->ipsec_out_policy); 4241 if (io->ipsec_out_act != NULL) 4242 IPACT_REFHOLD(io->ipsec_out_act); 4243 if (io->ipsec_out_latch != NULL) 4244 IPLATCH_REFHOLD(io->ipsec_out_latch); 4245 if (io->ipsec_out_cred != NULL) 4246 crhold(io->ipsec_out_cred); 4247 4248 /* 4249 * Copy everything, but preserve the free routine provided by 4250 * ipsec_alloc_ipsec_out(). 4251 */ 4252 nio = (ipsec_out_t *)nmp->b_rptr; 4253 nfrtn = nio->ipsec_out_frtn; 4254 bcopy(io, nio, sizeof (*io)); 4255 nio->ipsec_out_frtn = nfrtn; 4256 4257 nmp->b_cont = cont; 4258 4259 return (nmp); 4260 } 4261 4262 static void 4263 ipsec_out_release_refs(ipsec_out_t *io) 4264 { 4265 netstack_t *ns = io->ipsec_out_ns; 4266 4267 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4268 ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t)); 4269 ASSERT(io->ipsec_out_ns != NULL); 4270 4271 /* Note: IPSA_REFRELE is multi-line macro */ 4272 if (io->ipsec_out_ah_sa != NULL) 4273 IPSA_REFRELE(io->ipsec_out_ah_sa); 4274 if (io->ipsec_out_esp_sa != NULL) 4275 IPSA_REFRELE(io->ipsec_out_esp_sa); 4276 if (io->ipsec_out_polhead != NULL) 4277 IPPH_REFRELE(io->ipsec_out_polhead, ns); 4278 if (io->ipsec_out_policy != NULL) 4279 IPPOL_REFRELE(io->ipsec_out_policy, ns); 4280 if (io->ipsec_out_act != NULL) 4281 IPACT_REFRELE(io->ipsec_out_act); 4282 if (io->ipsec_out_cred != NULL) { 4283 crfree(io->ipsec_out_cred); 4284 io->ipsec_out_cred = NULL; 4285 } 4286 if (io->ipsec_out_latch) { 4287 IPLATCH_REFRELE(io->ipsec_out_latch, ns); 4288 io->ipsec_out_latch = NULL; 4289 } 4290 } 4291 4292 static void 4293 ipsec_out_free(void *arg) 4294 { 4295 ipsec_out_t *io = (ipsec_out_t *)arg; 4296 ipsec_out_release_refs(io); 4297 kmem_cache_free(ipsec_info_cache, arg); 4298 } 4299 4300 static void 4301 ipsec_in_release_refs(ipsec_in_t *ii) 4302 { 4303 netstack_t *ns = ii->ipsec_in_ns; 4304 4305 ASSERT(ii->ipsec_in_ns != NULL); 4306 4307 /* Note: IPSA_REFRELE is multi-line macro */ 4308 if (ii->ipsec_in_ah_sa != NULL) 4309 IPSA_REFRELE(ii->ipsec_in_ah_sa); 4310 if (ii->ipsec_in_esp_sa != NULL) 4311 IPSA_REFRELE(ii->ipsec_in_esp_sa); 4312 if (ii->ipsec_in_policy != NULL) 4313 IPPH_REFRELE(ii->ipsec_in_policy, ns); 4314 if (ii->ipsec_in_da != NULL) { 4315 freeb(ii->ipsec_in_da); 4316 ii->ipsec_in_da = NULL; 4317 } 4318 } 4319 4320 static void 4321 ipsec_in_free(void *arg) 4322 { 4323 ipsec_in_t *ii = (ipsec_in_t *)arg; 4324 ipsec_in_release_refs(ii); 4325 kmem_cache_free(ipsec_info_cache, arg); 4326 } 4327 4328 /* 4329 * This is called only for outbound datagrams if the datagram needs to 4330 * go out secure. A NULL mp can be passed to get an ipsec_out. This 4331 * facility is used by ip_unbind. 4332 * 4333 * NOTE : o As the data part could be modified by ipsec_out_process etc. 4334 * we can't make it fast by calling a dup. 4335 */ 4336 mblk_t * 4337 ipsec_alloc_ipsec_out(netstack_t *ns) 4338 { 4339 mblk_t *ipsec_mp; 4340 ipsec_out_t *io = kmem_cache_alloc(ipsec_info_cache, KM_NOSLEEP); 4341 4342 if (io == NULL) 4343 return (NULL); 4344 4345 bzero(io, sizeof (ipsec_out_t)); 4346 4347 io->ipsec_out_type = IPSEC_OUT; 4348 io->ipsec_out_len = sizeof (ipsec_out_t); 4349 io->ipsec_out_frtn.free_func = ipsec_out_free; 4350 io->ipsec_out_frtn.free_arg = (char *)io; 4351 4352 /* 4353 * Set the zoneid to ALL_ZONES which is used as an invalid value. Code 4354 * using ipsec_out_zoneid should assert that the zoneid has been set to 4355 * a sane value. 4356 */ 4357 io->ipsec_out_zoneid = ALL_ZONES; 4358 io->ipsec_out_ns = ns; /* No netstack_hold */ 4359 4360 ipsec_mp = desballoc((uint8_t *)io, sizeof (ipsec_info_t), BPRI_HI, 4361 &io->ipsec_out_frtn); 4362 if (ipsec_mp == NULL) { 4363 ipsec_out_free(io); 4364 4365 return (NULL); 4366 } 4367 ipsec_mp->b_datap->db_type = M_CTL; 4368 ipsec_mp->b_wptr = ipsec_mp->b_rptr + sizeof (ipsec_info_t); 4369 4370 return (ipsec_mp); 4371 } 4372 4373 /* 4374 * Attach an IPSEC_OUT; use pol for policy if it is non-null. 4375 * Otherwise initialize using conn. 4376 * 4377 * If pol is non-null, we consume a reference to it. 4378 */ 4379 mblk_t * 4380 ipsec_attach_ipsec_out(mblk_t **mp, conn_t *connp, ipsec_policy_t *pol, 4381 uint8_t proto, netstack_t *ns) 4382 { 4383 mblk_t *ipsec_mp; 4384 ipsec_stack_t *ipss = ns->netstack_ipsec; 4385 4386 ASSERT((pol != NULL) || (connp != NULL)); 4387 4388 ipsec_mp = ipsec_alloc_ipsec_out(ns); 4389 if (ipsec_mp == NULL) { 4390 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, SL_ERROR|SL_NOTE, 4391 "ipsec_attach_ipsec_out: Allocation failure\n"); 4392 ip_drop_packet(*mp, B_FALSE, NULL, NULL, 4393 DROPPER(ipss, ipds_spd_nomem), 4394 &ipss->ipsec_spd_dropper); 4395 *mp = NULL; 4396 return (NULL); 4397 } 4398 ipsec_mp->b_cont = *mp; 4399 /* 4400 * If *mp is NULL, ipsec_init_ipsec_out() won't/should not be using it. 4401 */ 4402 return (ipsec_init_ipsec_out(ipsec_mp, mp, connp, pol, proto, ns)); 4403 } 4404 4405 /* 4406 * Initialize the IPSEC_OUT (ipsec_mp) using pol if it is non-null. 4407 * Otherwise initialize using conn. 4408 * 4409 * If pol is non-null, we consume a reference to it. 4410 */ 4411 mblk_t * 4412 ipsec_init_ipsec_out(mblk_t *ipsec_mp, mblk_t **mp, conn_t *connp, 4413 ipsec_policy_t *pol, uint8_t proto, netstack_t *ns) 4414 { 4415 ipsec_out_t *io; 4416 ipsec_policy_t *p; 4417 ipha_t *ipha; 4418 ip6_t *ip6h; 4419 ipsec_stack_t *ipss = ns->netstack_ipsec; 4420 4421 ASSERT(ipsec_mp->b_cont == *mp); 4422 4423 ASSERT((pol != NULL) || (connp != NULL)); 4424 4425 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 4426 ASSERT(ipsec_mp->b_wptr == (ipsec_mp->b_rptr + sizeof (ipsec_info_t))); 4427 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4428 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4429 ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t)); 4430 io->ipsec_out_latch = NULL; 4431 /* 4432 * Set the zoneid when we have the connp. 4433 * Otherwise, we're called from ip_wput_attach_policy() who will take 4434 * care of setting the zoneid. 4435 */ 4436 if (connp != NULL) 4437 io->ipsec_out_zoneid = connp->conn_zoneid; 4438 4439 io->ipsec_out_ns = ns; /* No netstack_hold */ 4440 4441 if (*mp != NULL) { 4442 ipha = (ipha_t *)(*mp)->b_rptr; 4443 if (IPH_HDR_VERSION(ipha) == IP_VERSION) { 4444 io->ipsec_out_v4 = B_TRUE; 4445 ip6h = NULL; 4446 } else { 4447 io->ipsec_out_v4 = B_FALSE; 4448 ip6h = (ip6_t *)ipha; 4449 ipha = NULL; 4450 } 4451 } else { 4452 ASSERT(connp != NULL && connp->conn_policy_cached); 4453 ip6h = NULL; 4454 ipha = NULL; 4455 io->ipsec_out_v4 = !connp->conn_pkt_isv6; 4456 } 4457 4458 p = NULL; 4459 4460 /* 4461 * Take latched policies over global policy. Check here again for 4462 * this, in case we had conn_latch set while the packet was flying 4463 * around in IP. 4464 */ 4465 if (connp != NULL && connp->conn_latch != NULL) { 4466 ASSERT(ns == connp->conn_netstack); 4467 p = connp->conn_latch->ipl_out_policy; 4468 io->ipsec_out_latch = connp->conn_latch; 4469 IPLATCH_REFHOLD(connp->conn_latch); 4470 if (p != NULL) { 4471 IPPOL_REFHOLD(p); 4472 } 4473 io->ipsec_out_src_port = connp->conn_lport; 4474 io->ipsec_out_dst_port = connp->conn_fport; 4475 io->ipsec_out_icmp_type = io->ipsec_out_icmp_code = 0; 4476 if (pol != NULL) 4477 IPPOL_REFRELE(pol, ns); 4478 } else if (pol != NULL) { 4479 ipsec_selector_t sel; 4480 4481 bzero((void*)&sel, sizeof (sel)); 4482 4483 p = pol; 4484 /* 4485 * conn does not have the port information. Get 4486 * it from the packet. 4487 */ 4488 4489 if (!ipsec_init_outbound_ports(&sel, *mp, ipha, ip6h, 0, 4490 ns->netstack_ipsec)) { 4491 /* Callee did ip_drop_packet() on *mp. */ 4492 *mp = NULL; 4493 freeb(ipsec_mp); 4494 return (NULL); 4495 } 4496 io->ipsec_out_src_port = sel.ips_local_port; 4497 io->ipsec_out_dst_port = sel.ips_remote_port; 4498 io->ipsec_out_icmp_type = sel.ips_icmp_type; 4499 io->ipsec_out_icmp_code = sel.ips_icmp_code; 4500 } 4501 4502 io->ipsec_out_proto = proto; 4503 io->ipsec_out_use_global_policy = B_TRUE; 4504 io->ipsec_out_secure = (p != NULL); 4505 io->ipsec_out_policy = p; 4506 4507 if (p == NULL) { 4508 if (connp->conn_policy != NULL) { 4509 io->ipsec_out_secure = B_TRUE; 4510 ASSERT(io->ipsec_out_latch == NULL); 4511 ASSERT(io->ipsec_out_use_global_policy == B_TRUE); 4512 io->ipsec_out_need_policy = B_TRUE; 4513 ASSERT(io->ipsec_out_polhead == NULL); 4514 IPPH_REFHOLD(connp->conn_policy); 4515 io->ipsec_out_polhead = connp->conn_policy; 4516 } 4517 } else { 4518 /* Handle explicit drop action. */ 4519 if (p->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_DISCARD || 4520 p->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_REJECT) { 4521 ip_drop_packet(ipsec_mp, B_FALSE, NULL, NULL, 4522 DROPPER(ipss, ipds_spd_explicit), 4523 &ipss->ipsec_spd_dropper); 4524 *mp = NULL; 4525 ipsec_mp = NULL; 4526 } 4527 } 4528 4529 return (ipsec_mp); 4530 } 4531 4532 /* 4533 * Allocate an IPSEC_IN mblk. This will be prepended to an inbound datagram 4534 * and keep track of what-if-any IPsec processing will be applied to the 4535 * datagram. 4536 */ 4537 mblk_t * 4538 ipsec_in_alloc(boolean_t isv4, netstack_t *ns) 4539 { 4540 mblk_t *ipsec_in; 4541 ipsec_in_t *ii = kmem_cache_alloc(ipsec_info_cache, KM_NOSLEEP); 4542 4543 if (ii == NULL) 4544 return (NULL); 4545 4546 bzero(ii, sizeof (ipsec_info_t)); 4547 ii->ipsec_in_type = IPSEC_IN; 4548 ii->ipsec_in_len = sizeof (ipsec_in_t); 4549 4550 ii->ipsec_in_v4 = isv4; 4551 ii->ipsec_in_secure = B_TRUE; 4552 ii->ipsec_in_ns = ns; /* No netstack_hold */ 4553 ii->ipsec_in_stackid = ns->netstack_stackid; 4554 4555 ii->ipsec_in_frtn.free_func = ipsec_in_free; 4556 ii->ipsec_in_frtn.free_arg = (char *)ii; 4557 4558 ii->ipsec_in_zoneid = ALL_ZONES; /* default for received packets */ 4559 4560 ipsec_in = desballoc((uint8_t *)ii, sizeof (ipsec_info_t), BPRI_HI, 4561 &ii->ipsec_in_frtn); 4562 if (ipsec_in == NULL) { 4563 ip1dbg(("ipsec_in_alloc: IPSEC_IN allocation failure.\n")); 4564 ipsec_in_free(ii); 4565 return (NULL); 4566 } 4567 4568 ipsec_in->b_datap->db_type = M_CTL; 4569 ipsec_in->b_wptr += sizeof (ipsec_info_t); 4570 4571 return (ipsec_in); 4572 } 4573 4574 /* 4575 * This is called from ip_wput_local when a packet which needs 4576 * security is looped back, to convert the IPSEC_OUT to a IPSEC_IN 4577 * before fanout, where the policy check happens. In most of the 4578 * cases, IPSEC processing has *never* been done. There is one case 4579 * (ip_wput_ire_fragmentit -> ip_wput_frag -> icmp_frag_needed) where 4580 * the packet is destined for localhost, IPSEC processing has already 4581 * been done. 4582 * 4583 * Future: This could happen after SA selection has occurred for 4584 * outbound.. which will tell us who the src and dst identities are.. 4585 * Then it's just a matter of splicing the ah/esp SA pointers from the 4586 * ipsec_out_t to the ipsec_in_t. 4587 */ 4588 void 4589 ipsec_out_to_in(mblk_t *ipsec_mp) 4590 { 4591 ipsec_in_t *ii; 4592 ipsec_out_t *io; 4593 ipsec_policy_t *pol; 4594 ipsec_action_t *act; 4595 boolean_t v4, icmp_loopback; 4596 zoneid_t zoneid; 4597 netstack_t *ns; 4598 4599 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 4600 4601 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4602 4603 v4 = io->ipsec_out_v4; 4604 zoneid = io->ipsec_out_zoneid; 4605 icmp_loopback = io->ipsec_out_icmp_loopback; 4606 ns = io->ipsec_out_ns; 4607 4608 act = io->ipsec_out_act; 4609 if (act == NULL) { 4610 pol = io->ipsec_out_policy; 4611 if (pol != NULL) { 4612 act = pol->ipsp_act; 4613 IPACT_REFHOLD(act); 4614 } 4615 } 4616 io->ipsec_out_act = NULL; 4617 4618 ipsec_out_release_refs(io); /* No netstack_rele/hold needed */ 4619 4620 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 4621 bzero(ii, sizeof (ipsec_in_t)); 4622 ii->ipsec_in_type = IPSEC_IN; 4623 ii->ipsec_in_len = sizeof (ipsec_in_t); 4624 ii->ipsec_in_loopback = B_TRUE; 4625 ii->ipsec_in_ns = ns; /* No netstack_hold */ 4626 4627 ii->ipsec_in_frtn.free_func = ipsec_in_free; 4628 ii->ipsec_in_frtn.free_arg = (char *)ii; 4629 ii->ipsec_in_action = act; 4630 ii->ipsec_in_zoneid = zoneid; 4631 4632 /* 4633 * In most of the cases, we can't look at the ipsec_out_XXX_sa 4634 * because this never went through IPSEC processing. So, look at 4635 * the requests and infer whether it would have gone through 4636 * IPSEC processing or not. Initialize the "done" fields with 4637 * the requests. The possible values for "done" fields are : 4638 * 4639 * 1) zero, indicates that a particular preference was never 4640 * requested. 4641 * 2) non-zero, indicates that it could be IPSEC_PREF_REQUIRED/ 4642 * IPSEC_PREF_NEVER. If IPSEC_REQ_DONE is set, it means that 4643 * IPSEC processing has been completed. 4644 */ 4645 ii->ipsec_in_secure = B_TRUE; 4646 ii->ipsec_in_v4 = v4; 4647 ii->ipsec_in_icmp_loopback = icmp_loopback; 4648 } 4649 4650 /* 4651 * Consults global policy to see whether this datagram should 4652 * go out secure. If so it attaches a ipsec_mp in front and 4653 * returns. 4654 */ 4655 mblk_t * 4656 ip_wput_attach_policy(mblk_t *ipsec_mp, ipha_t *ipha, ip6_t *ip6h, ire_t *ire, 4657 conn_t *connp, boolean_t unspec_src, zoneid_t zoneid) 4658 { 4659 mblk_t *mp; 4660 ipsec_out_t *io = NULL; 4661 ipsec_selector_t sel; 4662 uint_t ill_index; 4663 boolean_t conn_dontroutex; 4664 boolean_t conn_multicast_loopx; 4665 boolean_t policy_present; 4666 ip_stack_t *ipst = ire->ire_ipst; 4667 netstack_t *ns = ipst->ips_netstack; 4668 ipsec_stack_t *ipss = ns->netstack_ipsec; 4669 4670 ASSERT((ipha != NULL && ip6h == NULL) || 4671 (ip6h != NULL && ipha == NULL)); 4672 4673 bzero((void*)&sel, sizeof (sel)); 4674 4675 if (ipha != NULL) 4676 policy_present = ipss->ipsec_outbound_v4_policy_present; 4677 else 4678 policy_present = ipss->ipsec_outbound_v6_policy_present; 4679 /* 4680 * Fast Path to see if there is any policy. 4681 */ 4682 if (!policy_present) { 4683 if (ipsec_mp->b_datap->db_type == M_CTL) { 4684 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4685 if (!io->ipsec_out_secure) { 4686 /* 4687 * If there is no global policy and ip_wput 4688 * or ip_wput_multicast has attached this mp 4689 * for multicast case, free the ipsec_mp and 4690 * return the original mp. 4691 */ 4692 mp = ipsec_mp->b_cont; 4693 freeb(ipsec_mp); 4694 ipsec_mp = mp; 4695 io = NULL; 4696 } 4697 ASSERT(io == NULL || !io->ipsec_out_tunnel); 4698 } 4699 if (((io == NULL) || (io->ipsec_out_polhead == NULL)) && 4700 ((connp == NULL) || (connp->conn_policy == NULL))) 4701 return (ipsec_mp); 4702 } 4703 4704 ill_index = 0; 4705 conn_multicast_loopx = conn_dontroutex = B_FALSE; 4706 mp = ipsec_mp; 4707 if (ipsec_mp->b_datap->db_type == M_CTL) { 4708 mp = ipsec_mp->b_cont; 4709 /* 4710 * This is a connection where we have some per-socket 4711 * policy or ip_wput has attached an ipsec_mp for 4712 * the multicast datagram. 4713 */ 4714 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4715 if (!io->ipsec_out_secure) { 4716 /* 4717 * This ipsec_mp was allocated in ip_wput or 4718 * ip_wput_multicast so that we will know the 4719 * value of ill_index, conn_dontroute, 4720 * conn_multicast_loop in the multicast case if 4721 * we inherit global policy here. 4722 */ 4723 ill_index = io->ipsec_out_ill_index; 4724 conn_dontroutex = io->ipsec_out_dontroute; 4725 conn_multicast_loopx = io->ipsec_out_multicast_loop; 4726 freeb(ipsec_mp); 4727 ipsec_mp = mp; 4728 io = NULL; 4729 } 4730 ASSERT(io == NULL || !io->ipsec_out_tunnel); 4731 } 4732 4733 if (ipha != NULL) { 4734 sel.ips_local_addr_v4 = (ipha->ipha_src != 0 ? 4735 ipha->ipha_src : ire->ire_src_addr); 4736 sel.ips_remote_addr_v4 = ip_get_dst(ipha); 4737 sel.ips_protocol = (uint8_t)ipha->ipha_protocol; 4738 sel.ips_isv4 = B_TRUE; 4739 } else { 4740 ushort_t hdr_len; 4741 uint8_t *nexthdrp; 4742 boolean_t is_fragment; 4743 4744 sel.ips_isv4 = B_FALSE; 4745 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4746 if (!unspec_src) 4747 sel.ips_local_addr_v6 = ire->ire_src_addr_v6; 4748 } else { 4749 sel.ips_local_addr_v6 = ip6h->ip6_src; 4750 } 4751 4752 sel.ips_remote_addr_v6 = ip_get_dst_v6(ip6h, mp, &is_fragment); 4753 if (is_fragment) { 4754 /* 4755 * It's a packet fragment for a packet that 4756 * we have already processed (since IPsec processing 4757 * is done before fragmentation), so we don't 4758 * have to do policy checks again. Fragments can 4759 * come back to us for processing if they have 4760 * been queued up due to flow control. 4761 */ 4762 if (ipsec_mp->b_datap->db_type == M_CTL) { 4763 mp = ipsec_mp->b_cont; 4764 freeb(ipsec_mp); 4765 ipsec_mp = mp; 4766 } 4767 return (ipsec_mp); 4768 } 4769 4770 /* IPv6 common-case. */ 4771 sel.ips_protocol = ip6h->ip6_nxt; 4772 switch (ip6h->ip6_nxt) { 4773 case IPPROTO_TCP: 4774 case IPPROTO_UDP: 4775 case IPPROTO_SCTP: 4776 case IPPROTO_ICMPV6: 4777 break; 4778 default: 4779 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 4780 &hdr_len, &nexthdrp)) { 4781 BUMP_MIB(&ipst->ips_ip6_mib, 4782 ipIfStatsOutDiscards); 4783 freemsg(ipsec_mp); /* Not IPsec-related drop. */ 4784 return (NULL); 4785 } 4786 sel.ips_protocol = *nexthdrp; 4787 break; 4788 } 4789 } 4790 4791 if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0, ipss)) { 4792 if (ipha != NULL) { 4793 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 4794 } else { 4795 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 4796 } 4797 4798 /* Callee dropped the packet. */ 4799 return (NULL); 4800 } 4801 4802 if (io != NULL) { 4803 /* 4804 * We seem to have some local policy (we already have 4805 * an ipsec_out). Look at global policy and see 4806 * whether we have to inherit or not. 4807 */ 4808 io->ipsec_out_need_policy = B_FALSE; 4809 ipsec_mp = ipsec_apply_global_policy(ipsec_mp, connp, 4810 &sel, ns); 4811 ASSERT((io->ipsec_out_policy != NULL) || 4812 (io->ipsec_out_act != NULL)); 4813 ASSERT(io->ipsec_out_need_policy == B_FALSE); 4814 return (ipsec_mp); 4815 } 4816 /* 4817 * We pass in a pointer to a pointer because mp can become 4818 * NULL due to allocation failures or explicit drops. Callers 4819 * of this function should assume a NULL mp means the packet 4820 * was dropped. 4821 */ 4822 ipsec_mp = ipsec_attach_global_policy(&mp, connp, &sel, ns); 4823 if (ipsec_mp == NULL) 4824 return (mp); 4825 4826 /* 4827 * Copy the right port information. 4828 */ 4829 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 4830 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4831 4832 ASSERT(io->ipsec_out_need_policy == B_FALSE); 4833 ASSERT((io->ipsec_out_policy != NULL) || 4834 (io->ipsec_out_act != NULL)); 4835 io->ipsec_out_src_port = sel.ips_local_port; 4836 io->ipsec_out_dst_port = sel.ips_remote_port; 4837 io->ipsec_out_icmp_type = sel.ips_icmp_type; 4838 io->ipsec_out_icmp_code = sel.ips_icmp_code; 4839 /* 4840 * Set ill_index, conn_dontroute and conn_multicast_loop 4841 * for multicast datagrams. 4842 */ 4843 io->ipsec_out_ill_index = ill_index; 4844 io->ipsec_out_dontroute = conn_dontroutex; 4845 io->ipsec_out_multicast_loop = conn_multicast_loopx; 4846 4847 if (zoneid == ALL_ZONES) 4848 zoneid = GLOBAL_ZONEID; 4849 io->ipsec_out_zoneid = zoneid; 4850 return (ipsec_mp); 4851 } 4852 4853 /* 4854 * When appropriate, this function caches inbound and outbound policy 4855 * for this connection. 4856 * 4857 * XXX need to work out more details about per-interface policy and 4858 * caching here! 4859 * 4860 * XXX may want to split inbound and outbound caching for ill.. 4861 */ 4862 int 4863 ipsec_conn_cache_policy(conn_t *connp, boolean_t isv4) 4864 { 4865 boolean_t global_policy_present; 4866 netstack_t *ns = connp->conn_netstack; 4867 ipsec_stack_t *ipss = ns->netstack_ipsec; 4868 4869 /* 4870 * There is no policy latching for ICMP sockets because we can't 4871 * decide on which policy to use until we see the packet and get 4872 * type/code selectors. 4873 */ 4874 if (connp->conn_ulp == IPPROTO_ICMP || 4875 connp->conn_ulp == IPPROTO_ICMPV6) { 4876 connp->conn_in_enforce_policy = 4877 connp->conn_out_enforce_policy = B_TRUE; 4878 if (connp->conn_latch != NULL) { 4879 IPLATCH_REFRELE(connp->conn_latch, ns); 4880 connp->conn_latch = NULL; 4881 } 4882 connp->conn_flags |= IPCL_CHECK_POLICY; 4883 return (0); 4884 } 4885 4886 global_policy_present = isv4 ? 4887 (ipss->ipsec_outbound_v4_policy_present || 4888 ipss->ipsec_inbound_v4_policy_present) : 4889 (ipss->ipsec_outbound_v6_policy_present || 4890 ipss->ipsec_inbound_v6_policy_present); 4891 4892 if ((connp->conn_policy != NULL) || global_policy_present) { 4893 ipsec_selector_t sel; 4894 ipsec_policy_t *p; 4895 4896 if (connp->conn_latch == NULL && 4897 (connp->conn_latch = iplatch_create()) == NULL) { 4898 return (ENOMEM); 4899 } 4900 4901 sel.ips_protocol = connp->conn_ulp; 4902 sel.ips_local_port = connp->conn_lport; 4903 sel.ips_remote_port = connp->conn_fport; 4904 sel.ips_is_icmp_inv_acq = 0; 4905 sel.ips_isv4 = isv4; 4906 if (isv4) { 4907 sel.ips_local_addr_v4 = connp->conn_src; 4908 sel.ips_remote_addr_v4 = connp->conn_rem; 4909 } else { 4910 sel.ips_local_addr_v6 = connp->conn_srcv6; 4911 sel.ips_remote_addr_v6 = connp->conn_remv6; 4912 } 4913 4914 p = ipsec_find_policy(IPSEC_TYPE_INBOUND, connp, NULL, &sel, 4915 ns); 4916 if (connp->conn_latch->ipl_in_policy != NULL) 4917 IPPOL_REFRELE(connp->conn_latch->ipl_in_policy, ns); 4918 connp->conn_latch->ipl_in_policy = p; 4919 connp->conn_in_enforce_policy = (p != NULL); 4920 4921 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, &sel, 4922 ns); 4923 if (connp->conn_latch->ipl_out_policy != NULL) 4924 IPPOL_REFRELE(connp->conn_latch->ipl_out_policy, ns); 4925 connp->conn_latch->ipl_out_policy = p; 4926 connp->conn_out_enforce_policy = (p != NULL); 4927 4928 /* Clear the latched actions too, in case we're recaching. */ 4929 if (connp->conn_latch->ipl_out_action != NULL) 4930 IPACT_REFRELE(connp->conn_latch->ipl_out_action); 4931 if (connp->conn_latch->ipl_in_action != NULL) 4932 IPACT_REFRELE(connp->conn_latch->ipl_in_action); 4933 } 4934 4935 /* 4936 * We may or may not have policy for this endpoint. We still set 4937 * conn_policy_cached so that inbound datagrams don't have to look 4938 * at global policy as policy is considered latched for these 4939 * endpoints. We should not set conn_policy_cached until the conn 4940 * reflects the actual policy. If we *set* this before inheriting 4941 * the policy there is a window where the check 4942 * CONN_INBOUND_POLICY_PRESENT, will neither check with the policy 4943 * on the conn (because we have not yet copied the policy on to 4944 * conn and hence not set conn_in_enforce_policy) nor with the 4945 * global policy (because conn_policy_cached is already set). 4946 */ 4947 connp->conn_policy_cached = B_TRUE; 4948 if (connp->conn_in_enforce_policy) 4949 connp->conn_flags |= IPCL_CHECK_POLICY; 4950 return (0); 4951 } 4952 4953 void 4954 iplatch_free(ipsec_latch_t *ipl, netstack_t *ns) 4955 { 4956 if (ipl->ipl_out_policy != NULL) 4957 IPPOL_REFRELE(ipl->ipl_out_policy, ns); 4958 if (ipl->ipl_in_policy != NULL) 4959 IPPOL_REFRELE(ipl->ipl_in_policy, ns); 4960 if (ipl->ipl_in_action != NULL) 4961 IPACT_REFRELE(ipl->ipl_in_action); 4962 if (ipl->ipl_out_action != NULL) 4963 IPACT_REFRELE(ipl->ipl_out_action); 4964 if (ipl->ipl_local_cid != NULL) 4965 IPSID_REFRELE(ipl->ipl_local_cid); 4966 if (ipl->ipl_remote_cid != NULL) 4967 IPSID_REFRELE(ipl->ipl_remote_cid); 4968 if (ipl->ipl_local_id != NULL) 4969 crfree(ipl->ipl_local_id); 4970 mutex_destroy(&ipl->ipl_lock); 4971 kmem_free(ipl, sizeof (*ipl)); 4972 } 4973 4974 ipsec_latch_t * 4975 iplatch_create() 4976 { 4977 ipsec_latch_t *ipl = kmem_alloc(sizeof (*ipl), KM_NOSLEEP); 4978 if (ipl == NULL) 4979 return (ipl); 4980 bzero(ipl, sizeof (*ipl)); 4981 mutex_init(&ipl->ipl_lock, NULL, MUTEX_DEFAULT, NULL); 4982 ipl->ipl_refcnt = 1; 4983 return (ipl); 4984 } 4985 4986 /* 4987 * Hash function for ID hash table. 4988 */ 4989 static uint32_t 4990 ipsid_hash(int idtype, char *idstring) 4991 { 4992 uint32_t hval = idtype; 4993 unsigned char c; 4994 4995 while ((c = *idstring++) != 0) { 4996 hval = (hval << 4) | (hval >> 28); 4997 hval ^= c; 4998 } 4999 hval = hval ^ (hval >> 16); 5000 return (hval & (IPSID_HASHSIZE-1)); 5001 } 5002 5003 /* 5004 * Look up identity string in hash table. Return identity object 5005 * corresponding to the name -- either preexisting, or newly allocated. 5006 * 5007 * Return NULL if we need to allocate a new one and can't get memory. 5008 */ 5009 ipsid_t * 5010 ipsid_lookup(int idtype, char *idstring, netstack_t *ns) 5011 { 5012 ipsid_t *retval; 5013 char *nstr; 5014 int idlen = strlen(idstring) + 1; 5015 ipsec_stack_t *ipss = ns->netstack_ipsec; 5016 ipsif_t *bucket; 5017 5018 bucket = &ipss->ipsec_ipsid_buckets[ipsid_hash(idtype, idstring)]; 5019 5020 mutex_enter(&bucket->ipsif_lock); 5021 5022 for (retval = bucket->ipsif_head; retval != NULL; 5023 retval = retval->ipsid_next) { 5024 if (idtype != retval->ipsid_type) 5025 continue; 5026 if (bcmp(idstring, retval->ipsid_cid, idlen) != 0) 5027 continue; 5028 5029 IPSID_REFHOLD(retval); 5030 mutex_exit(&bucket->ipsif_lock); 5031 return (retval); 5032 } 5033 5034 retval = kmem_alloc(sizeof (*retval), KM_NOSLEEP); 5035 if (!retval) { 5036 mutex_exit(&bucket->ipsif_lock); 5037 return (NULL); 5038 } 5039 5040 nstr = kmem_alloc(idlen, KM_NOSLEEP); 5041 if (!nstr) { 5042 mutex_exit(&bucket->ipsif_lock); 5043 kmem_free(retval, sizeof (*retval)); 5044 return (NULL); 5045 } 5046 5047 retval->ipsid_refcnt = 1; 5048 retval->ipsid_next = bucket->ipsif_head; 5049 if (retval->ipsid_next != NULL) 5050 retval->ipsid_next->ipsid_ptpn = &retval->ipsid_next; 5051 retval->ipsid_ptpn = &bucket->ipsif_head; 5052 retval->ipsid_type = idtype; 5053 retval->ipsid_cid = nstr; 5054 bucket->ipsif_head = retval; 5055 bcopy(idstring, nstr, idlen); 5056 mutex_exit(&bucket->ipsif_lock); 5057 5058 return (retval); 5059 } 5060 5061 /* 5062 * Garbage collect the identity hash table. 5063 */ 5064 void 5065 ipsid_gc(netstack_t *ns) 5066 { 5067 int i, len; 5068 ipsid_t *id, *nid; 5069 ipsif_t *bucket; 5070 ipsec_stack_t *ipss = ns->netstack_ipsec; 5071 5072 for (i = 0; i < IPSID_HASHSIZE; i++) { 5073 bucket = &ipss->ipsec_ipsid_buckets[i]; 5074 mutex_enter(&bucket->ipsif_lock); 5075 for (id = bucket->ipsif_head; id != NULL; id = nid) { 5076 nid = id->ipsid_next; 5077 if (id->ipsid_refcnt == 0) { 5078 *id->ipsid_ptpn = nid; 5079 if (nid != NULL) 5080 nid->ipsid_ptpn = id->ipsid_ptpn; 5081 len = strlen(id->ipsid_cid) + 1; 5082 kmem_free(id->ipsid_cid, len); 5083 kmem_free(id, sizeof (*id)); 5084 } 5085 } 5086 mutex_exit(&bucket->ipsif_lock); 5087 } 5088 } 5089 5090 /* 5091 * Return true if two identities are the same. 5092 */ 5093 boolean_t 5094 ipsid_equal(ipsid_t *id1, ipsid_t *id2) 5095 { 5096 if (id1 == id2) 5097 return (B_TRUE); 5098 #ifdef DEBUG 5099 if ((id1 == NULL) || (id2 == NULL)) 5100 return (B_FALSE); 5101 /* 5102 * test that we're interning id's correctly.. 5103 */ 5104 ASSERT((strcmp(id1->ipsid_cid, id2->ipsid_cid) != 0) || 5105 (id1->ipsid_type != id2->ipsid_type)); 5106 #endif 5107 return (B_FALSE); 5108 } 5109 5110 /* 5111 * Initialize identity table; called during module initialization. 5112 */ 5113 static void 5114 ipsid_init(netstack_t *ns) 5115 { 5116 ipsif_t *bucket; 5117 int i; 5118 ipsec_stack_t *ipss = ns->netstack_ipsec; 5119 5120 for (i = 0; i < IPSID_HASHSIZE; i++) { 5121 bucket = &ipss->ipsec_ipsid_buckets[i]; 5122 mutex_init(&bucket->ipsif_lock, NULL, MUTEX_DEFAULT, NULL); 5123 } 5124 } 5125 5126 /* 5127 * Free identity table (preparatory to module unload) 5128 */ 5129 static void 5130 ipsid_fini(netstack_t *ns) 5131 { 5132 ipsif_t *bucket; 5133 int i; 5134 ipsec_stack_t *ipss = ns->netstack_ipsec; 5135 5136 for (i = 0; i < IPSID_HASHSIZE; i++) { 5137 bucket = &ipss->ipsec_ipsid_buckets[i]; 5138 ASSERT(bucket->ipsif_head == NULL); 5139 mutex_destroy(&bucket->ipsif_lock); 5140 } 5141 } 5142 5143 /* 5144 * Update the minimum and maximum supported key sizes for the 5145 * specified algorithm. Must be called while holding the algorithms lock. 5146 */ 5147 void 5148 ipsec_alg_fix_min_max(ipsec_alginfo_t *alg, ipsec_algtype_t alg_type, 5149 netstack_t *ns) 5150 { 5151 size_t crypto_min = (size_t)-1, crypto_max = 0; 5152 size_t cur_crypto_min, cur_crypto_max; 5153 boolean_t is_valid; 5154 crypto_mechanism_info_t *mech_infos; 5155 uint_t nmech_infos; 5156 int crypto_rc, i; 5157 crypto_mech_usage_t mask; 5158 ipsec_stack_t *ipss = ns->netstack_ipsec; 5159 5160 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 5161 5162 /* 5163 * Compute the min, max, and default key sizes (in number of 5164 * increments to the default key size in bits) as defined 5165 * by the algorithm mappings. This range of key sizes is used 5166 * for policy related operations. The effective key sizes 5167 * supported by the framework could be more limited than 5168 * those defined for an algorithm. 5169 */ 5170 alg->alg_default_bits = alg->alg_key_sizes[0]; 5171 alg->alg_default = 0; 5172 if (alg->alg_increment != 0) { 5173 /* key sizes are defined by range & increment */ 5174 alg->alg_minbits = alg->alg_key_sizes[1]; 5175 alg->alg_maxbits = alg->alg_key_sizes[2]; 5176 } else if (alg->alg_nkey_sizes == 0) { 5177 /* no specified key size for algorithm */ 5178 alg->alg_minbits = alg->alg_maxbits = 0; 5179 } else { 5180 /* key sizes are defined by enumeration */ 5181 alg->alg_minbits = (uint16_t)-1; 5182 alg->alg_maxbits = 0; 5183 5184 for (i = 0; i < alg->alg_nkey_sizes; i++) { 5185 if (alg->alg_key_sizes[i] < alg->alg_minbits) 5186 alg->alg_minbits = alg->alg_key_sizes[i]; 5187 if (alg->alg_key_sizes[i] > alg->alg_maxbits) 5188 alg->alg_maxbits = alg->alg_key_sizes[i]; 5189 } 5190 } 5191 5192 if (!(alg->alg_flags & ALG_FLAG_VALID)) 5193 return; 5194 5195 /* 5196 * Mechanisms do not apply to the NULL encryption 5197 * algorithm, so simply return for this case. 5198 */ 5199 if (alg->alg_id == SADB_EALG_NULL) 5200 return; 5201 5202 /* 5203 * Find the min and max key sizes supported by the cryptographic 5204 * framework providers. 5205 */ 5206 5207 /* get the key sizes supported by the framework */ 5208 crypto_rc = crypto_get_all_mech_info(alg->alg_mech_type, 5209 &mech_infos, &nmech_infos, KM_SLEEP); 5210 if (crypto_rc != CRYPTO_SUCCESS || nmech_infos == 0) { 5211 alg->alg_flags &= ~ALG_FLAG_VALID; 5212 return; 5213 } 5214 5215 /* min and max key sizes supported by framework */ 5216 for (i = 0, is_valid = B_FALSE; i < nmech_infos; i++) { 5217 int unit_bits; 5218 5219 /* 5220 * Ignore entries that do not support the operations 5221 * needed for the algorithm type. 5222 */ 5223 if (alg_type == IPSEC_ALG_AUTH) { 5224 mask = CRYPTO_MECH_USAGE_MAC; 5225 } else { 5226 mask = CRYPTO_MECH_USAGE_ENCRYPT | 5227 CRYPTO_MECH_USAGE_DECRYPT; 5228 } 5229 if ((mech_infos[i].mi_usage & mask) != mask) 5230 continue; 5231 5232 unit_bits = (mech_infos[i].mi_keysize_unit == 5233 CRYPTO_KEYSIZE_UNIT_IN_BYTES) ? 8 : 1; 5234 /* adjust min/max supported by framework */ 5235 cur_crypto_min = mech_infos[i].mi_min_key_size * unit_bits; 5236 cur_crypto_max = mech_infos[i].mi_max_key_size * unit_bits; 5237 5238 if (cur_crypto_min < crypto_min) 5239 crypto_min = cur_crypto_min; 5240 5241 /* 5242 * CRYPTO_EFFECTIVELY_INFINITE is a special value of 5243 * the crypto framework which means "no upper limit". 5244 */ 5245 if (mech_infos[i].mi_max_key_size == 5246 CRYPTO_EFFECTIVELY_INFINITE) { 5247 crypto_max = (size_t)-1; 5248 } else if (cur_crypto_max > crypto_max) { 5249 crypto_max = cur_crypto_max; 5250 } 5251 5252 is_valid = B_TRUE; 5253 } 5254 5255 kmem_free(mech_infos, sizeof (crypto_mechanism_info_t) * 5256 nmech_infos); 5257 5258 if (!is_valid) { 5259 /* no key sizes supported by framework */ 5260 alg->alg_flags &= ~ALG_FLAG_VALID; 5261 return; 5262 } 5263 5264 /* 5265 * Determine min and max key sizes from alg_key_sizes[]. 5266 * defined for the algorithm entry. Adjust key sizes based on 5267 * those supported by the framework. 5268 */ 5269 alg->alg_ef_default_bits = alg->alg_key_sizes[0]; 5270 5271 /* 5272 * For backwards compatability, assume that the IV length 5273 * is the same as the data length. 5274 */ 5275 alg->alg_ivlen = alg->alg_datalen; 5276 5277 /* 5278 * Copy any algorithm parameters (if provided) into dedicated 5279 * elements in the ipsec_alginfo_t structure. 5280 * There may be a better place to put this code. 5281 */ 5282 for (i = 0; i < alg->alg_nparams; i++) { 5283 switch (i) { 5284 case 0: 5285 /* Initialisation Vector length (bytes) */ 5286 alg->alg_ivlen = alg->alg_params[0]; 5287 break; 5288 case 1: 5289 /* Integrity Check Vector length (bytes) */ 5290 alg->alg_icvlen = alg->alg_params[1]; 5291 break; 5292 case 2: 5293 /* Salt length (bytes) */ 5294 alg->alg_saltlen = (uint8_t)alg->alg_params[2]; 5295 break; 5296 default: 5297 break; 5298 } 5299 } 5300 5301 /* Default if the IV length is not specified. */ 5302 if (alg_type == IPSEC_ALG_ENCR && alg->alg_ivlen == 0) 5303 alg->alg_ivlen = alg->alg_datalen; 5304 5305 alg_flag_check(alg); 5306 5307 if (alg->alg_increment != 0) { 5308 /* supported key sizes are defined by range & increment */ 5309 crypto_min = ALGBITS_ROUND_UP(crypto_min, alg->alg_increment); 5310 crypto_max = ALGBITS_ROUND_DOWN(crypto_max, alg->alg_increment); 5311 5312 alg->alg_ef_minbits = MAX(alg->alg_minbits, 5313 (uint16_t)crypto_min); 5314 alg->alg_ef_maxbits = MIN(alg->alg_maxbits, 5315 (uint16_t)crypto_max); 5316 5317 /* 5318 * If the sizes supported by the framework are outside 5319 * the range of sizes defined by the algorithm mappings, 5320 * the algorithm cannot be used. Check for this 5321 * condition here. 5322 */ 5323 if (alg->alg_ef_minbits > alg->alg_ef_maxbits) { 5324 alg->alg_flags &= ~ALG_FLAG_VALID; 5325 return; 5326 } 5327 if (alg->alg_ef_default_bits < alg->alg_ef_minbits) 5328 alg->alg_ef_default_bits = alg->alg_ef_minbits; 5329 if (alg->alg_ef_default_bits > alg->alg_ef_maxbits) 5330 alg->alg_ef_default_bits = alg->alg_ef_maxbits; 5331 } else if (alg->alg_nkey_sizes == 0) { 5332 /* no specified key size for algorithm */ 5333 alg->alg_ef_minbits = alg->alg_ef_maxbits = 0; 5334 } else { 5335 /* supported key sizes are defined by enumeration */ 5336 alg->alg_ef_minbits = (uint16_t)-1; 5337 alg->alg_ef_maxbits = 0; 5338 5339 for (i = 0, is_valid = B_FALSE; i < alg->alg_nkey_sizes; i++) { 5340 /* 5341 * Ignore the current key size if it is not in the 5342 * range of sizes supported by the framework. 5343 */ 5344 if (alg->alg_key_sizes[i] < crypto_min || 5345 alg->alg_key_sizes[i] > crypto_max) 5346 continue; 5347 if (alg->alg_key_sizes[i] < alg->alg_ef_minbits) 5348 alg->alg_ef_minbits = alg->alg_key_sizes[i]; 5349 if (alg->alg_key_sizes[i] > alg->alg_ef_maxbits) 5350 alg->alg_ef_maxbits = alg->alg_key_sizes[i]; 5351 is_valid = B_TRUE; 5352 } 5353 5354 if (!is_valid) { 5355 alg->alg_flags &= ~ALG_FLAG_VALID; 5356 return; 5357 } 5358 alg->alg_ef_default = 0; 5359 } 5360 } 5361 5362 /* 5363 * Sanity check parameters provided by ipsecalgs(1m). Assume that 5364 * the algoritm is marked as valid, there is a check at the top 5365 * of this function. If any of the checks below fail, the algorithm 5366 * entry is invalid. 5367 */ 5368 void 5369 alg_flag_check(ipsec_alginfo_t *alg) 5370 { 5371 alg->alg_flags &= ~ALG_FLAG_VALID; 5372 5373 /* 5374 * Can't have the algorithm marked as CCM and GCM. 5375 * Check the ALG_FLAG_COMBINED and ALG_FLAG_COUNTERMODE 5376 * flags are set for CCM & GCM. 5377 */ 5378 if ((alg->alg_flags & (ALG_FLAG_CCM|ALG_FLAG_GCM)) == 5379 (ALG_FLAG_CCM|ALG_FLAG_GCM)) 5380 return; 5381 if (alg->alg_flags & (ALG_FLAG_CCM|ALG_FLAG_GCM)) { 5382 if (!(alg->alg_flags & ALG_FLAG_COUNTERMODE)) 5383 return; 5384 if (!(alg->alg_flags & ALG_FLAG_COMBINED)) 5385 return; 5386 } 5387 5388 /* 5389 * For ALG_FLAG_COUNTERMODE, check the parameters 5390 * fit in the ipsec_nonce_t structure. 5391 */ 5392 if (alg->alg_flags & ALG_FLAG_COUNTERMODE) { 5393 if (alg->alg_ivlen != sizeof (((ipsec_nonce_t *)NULL)->iv)) 5394 return; 5395 if (alg->alg_saltlen > sizeof (((ipsec_nonce_t *)NULL)->salt)) 5396 return; 5397 } 5398 if ((alg->alg_flags & ALG_FLAG_COMBINED) && 5399 (alg->alg_icvlen == 0)) 5400 return; 5401 5402 /* all is well. */ 5403 alg->alg_flags |= ALG_FLAG_VALID; 5404 } 5405 5406 /* 5407 * Free the memory used by the specified algorithm. 5408 */ 5409 void 5410 ipsec_alg_free(ipsec_alginfo_t *alg) 5411 { 5412 if (alg == NULL) 5413 return; 5414 5415 if (alg->alg_key_sizes != NULL) { 5416 kmem_free(alg->alg_key_sizes, 5417 (alg->alg_nkey_sizes + 1) * sizeof (uint16_t)); 5418 alg->alg_key_sizes = NULL; 5419 } 5420 if (alg->alg_block_sizes != NULL) { 5421 kmem_free(alg->alg_block_sizes, 5422 (alg->alg_nblock_sizes + 1) * sizeof (uint16_t)); 5423 alg->alg_block_sizes = NULL; 5424 } 5425 kmem_free(alg, sizeof (*alg)); 5426 } 5427 5428 /* 5429 * Check the validity of the specified key size for an algorithm. 5430 * Returns B_TRUE if key size is valid, B_FALSE otherwise. 5431 */ 5432 boolean_t 5433 ipsec_valid_key_size(uint16_t key_size, ipsec_alginfo_t *alg) 5434 { 5435 if (key_size < alg->alg_ef_minbits || key_size > alg->alg_ef_maxbits) 5436 return (B_FALSE); 5437 5438 if (alg->alg_increment == 0 && alg->alg_nkey_sizes != 0) { 5439 /* 5440 * If the key sizes are defined by enumeration, the new 5441 * key size must be equal to one of the supported values. 5442 */ 5443 int i; 5444 5445 for (i = 0; i < alg->alg_nkey_sizes; i++) 5446 if (key_size == alg->alg_key_sizes[i]) 5447 break; 5448 if (i == alg->alg_nkey_sizes) 5449 return (B_FALSE); 5450 } 5451 5452 return (B_TRUE); 5453 } 5454 5455 /* 5456 * Callback function invoked by the crypto framework when a provider 5457 * registers or unregisters. This callback updates the algorithms 5458 * tables when a crypto algorithm is no longer available or becomes 5459 * available, and triggers the freeing/creation of context templates 5460 * associated with existing SAs, if needed. 5461 * 5462 * Need to walk all stack instances since the callback is global 5463 * for all instances 5464 */ 5465 void 5466 ipsec_prov_update_callback(uint32_t event, void *event_arg) 5467 { 5468 netstack_handle_t nh; 5469 netstack_t *ns; 5470 5471 netstack_next_init(&nh); 5472 while ((ns = netstack_next(&nh)) != NULL) { 5473 ipsec_prov_update_callback_stack(event, event_arg, ns); 5474 netstack_rele(ns); 5475 } 5476 netstack_next_fini(&nh); 5477 } 5478 5479 static void 5480 ipsec_prov_update_callback_stack(uint32_t event, void *event_arg, 5481 netstack_t *ns) 5482 { 5483 crypto_notify_event_change_t *prov_change = 5484 (crypto_notify_event_change_t *)event_arg; 5485 uint_t algidx, algid, algtype, mech_count, mech_idx; 5486 ipsec_alginfo_t *alg; 5487 ipsec_alginfo_t oalg; 5488 crypto_mech_name_t *mechs; 5489 boolean_t alg_changed = B_FALSE; 5490 ipsec_stack_t *ipss = ns->netstack_ipsec; 5491 5492 /* ignore events for which we didn't register */ 5493 if (event != CRYPTO_EVENT_MECHS_CHANGED) { 5494 ip1dbg(("ipsec_prov_update_callback: unexpected event 0x%x " 5495 " received from crypto framework\n", event)); 5496 return; 5497 } 5498 5499 mechs = crypto_get_mech_list(&mech_count, KM_SLEEP); 5500 if (mechs == NULL) 5501 return; 5502 5503 /* 5504 * Walk the list of currently defined IPsec algorithm. Update 5505 * the algorithm valid flag and trigger an update of the 5506 * SAs that depend on that algorithm. 5507 */ 5508 mutex_enter(&ipss->ipsec_alg_lock); 5509 for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype++) { 5510 for (algidx = 0; algidx < ipss->ipsec_nalgs[algtype]; 5511 algidx++) { 5512 5513 algid = ipss->ipsec_sortlist[algtype][algidx]; 5514 alg = ipss->ipsec_alglists[algtype][algid]; 5515 ASSERT(alg != NULL); 5516 5517 /* 5518 * Skip the algorithms which do not map to the 5519 * crypto framework provider being added or removed. 5520 */ 5521 if (strncmp(alg->alg_mech_name, 5522 prov_change->ec_mech_name, 5523 CRYPTO_MAX_MECH_NAME) != 0) 5524 continue; 5525 5526 /* 5527 * Determine if the mechanism is valid. If it 5528 * is not, mark the algorithm as being invalid. If 5529 * it is, mark the algorithm as being valid. 5530 */ 5531 for (mech_idx = 0; mech_idx < mech_count; mech_idx++) 5532 if (strncmp(alg->alg_mech_name, 5533 mechs[mech_idx], CRYPTO_MAX_MECH_NAME) == 0) 5534 break; 5535 if (mech_idx == mech_count && 5536 alg->alg_flags & ALG_FLAG_VALID) { 5537 alg->alg_flags &= ~ALG_FLAG_VALID; 5538 alg_changed = B_TRUE; 5539 } else if (mech_idx < mech_count && 5540 !(alg->alg_flags & ALG_FLAG_VALID)) { 5541 alg->alg_flags |= ALG_FLAG_VALID; 5542 alg_changed = B_TRUE; 5543 } 5544 5545 /* 5546 * Update the supported key sizes, regardless 5547 * of whether a crypto provider was added or 5548 * removed. 5549 */ 5550 oalg = *alg; 5551 ipsec_alg_fix_min_max(alg, algtype, ns); 5552 if (!alg_changed && 5553 alg->alg_ef_minbits != oalg.alg_ef_minbits || 5554 alg->alg_ef_maxbits != oalg.alg_ef_maxbits || 5555 alg->alg_ef_default != oalg.alg_ef_default || 5556 alg->alg_ef_default_bits != 5557 oalg.alg_ef_default_bits) 5558 alg_changed = B_TRUE; 5559 5560 /* 5561 * Update the affected SAs if a software provider is 5562 * being added or removed. 5563 */ 5564 if (prov_change->ec_provider_type == 5565 CRYPTO_SW_PROVIDER) 5566 sadb_alg_update(algtype, alg->alg_id, 5567 prov_change->ec_change == 5568 CRYPTO_MECH_ADDED, ns); 5569 } 5570 } 5571 mutex_exit(&ipss->ipsec_alg_lock); 5572 crypto_free_mech_list(mechs, mech_count); 5573 5574 if (alg_changed) { 5575 /* 5576 * An algorithm has changed, i.e. it became valid or 5577 * invalid, or its support key sizes have changed. 5578 * Notify ipsecah and ipsecesp of this change so 5579 * that they can send a SADB_REGISTER to their consumers. 5580 */ 5581 ipsecah_algs_changed(ns); 5582 ipsecesp_algs_changed(ns); 5583 } 5584 } 5585 5586 /* 5587 * Registers with the crypto framework to be notified of crypto 5588 * providers changes. Used to update the algorithm tables and 5589 * to free or create context templates if needed. Invoked after IPsec 5590 * is loaded successfully. 5591 * 5592 * This is called separately for each IP instance, so we ensure we only 5593 * register once. 5594 */ 5595 void 5596 ipsec_register_prov_update(void) 5597 { 5598 if (prov_update_handle != NULL) 5599 return; 5600 5601 prov_update_handle = crypto_notify_events( 5602 ipsec_prov_update_callback, CRYPTO_EVENT_MECHS_CHANGED); 5603 } 5604 5605 /* 5606 * Unregisters from the framework to be notified of crypto providers 5607 * changes. Called from ipsec_policy_g_destroy(). 5608 */ 5609 static void 5610 ipsec_unregister_prov_update(void) 5611 { 5612 if (prov_update_handle != NULL) 5613 crypto_unnotify_events(prov_update_handle); 5614 } 5615 5616 /* 5617 * Tunnel-mode support routines. 5618 */ 5619 5620 /* 5621 * Returns an mblk chain suitable for putnext() if policies match and IPsec 5622 * SAs are available. If there's no per-tunnel policy, or a match comes back 5623 * with no match, then still return the packet and have global policy take 5624 * a crack at it in IP. 5625 * 5626 * Remember -> we can be forwarding packets. Keep that in mind w.r.t. 5627 * inner-packet contents. 5628 */ 5629 mblk_t * 5630 ipsec_tun_outbound(mblk_t *mp, iptun_t *iptun, ipha_t *inner_ipv4, 5631 ip6_t *inner_ipv6, ipha_t *outer_ipv4, ip6_t *outer_ipv6, int outer_hdr_len) 5632 { 5633 ipsec_policy_head_t *polhead; 5634 ipsec_selector_t sel; 5635 mblk_t *ipsec_mp, *ipsec_mp_head, *nmp; 5636 ipsec_out_t *io; 5637 boolean_t is_fragment; 5638 ipsec_policy_t *pol; 5639 ipsec_tun_pol_t *itp = iptun->iptun_itp; 5640 netstack_t *ns = iptun->iptun_ns; 5641 ipsec_stack_t *ipss = ns->netstack_ipsec; 5642 5643 ASSERT(outer_ipv6 != NULL && outer_ipv4 == NULL || 5644 outer_ipv4 != NULL && outer_ipv6 == NULL); 5645 /* We take care of inners in a bit. */ 5646 5647 ASSERT(itp != NULL && (itp->itp_flags & ITPF_P_ACTIVE)); 5648 polhead = itp->itp_policy; 5649 5650 bzero(&sel, sizeof (sel)); 5651 if (inner_ipv4 != NULL) { 5652 ASSERT(inner_ipv6 == NULL); 5653 sel.ips_isv4 = B_TRUE; 5654 sel.ips_local_addr_v4 = inner_ipv4->ipha_src; 5655 sel.ips_remote_addr_v4 = inner_ipv4->ipha_dst; 5656 sel.ips_protocol = (uint8_t)inner_ipv4->ipha_protocol; 5657 } else { 5658 ASSERT(inner_ipv6 != NULL); 5659 sel.ips_isv4 = B_FALSE; 5660 sel.ips_local_addr_v6 = inner_ipv6->ip6_src; 5661 /* 5662 * We don't care about routing-header dests in the 5663 * forwarding/tunnel path, so just grab ip6_dst. 5664 */ 5665 sel.ips_remote_addr_v6 = inner_ipv6->ip6_dst; 5666 } 5667 5668 if (itp->itp_flags & ITPF_P_PER_PORT_SECURITY) { 5669 /* 5670 * Caller can prepend the outer header, which means 5671 * inner_ipv[46] may be stuck in the middle. Pullup the whole 5672 * mess now if need-be, for easier processing later. Don't 5673 * forget to rewire the outer header too. 5674 */ 5675 if (mp->b_cont != NULL) { 5676 nmp = msgpullup(mp, -1); 5677 if (nmp == NULL) { 5678 ip_drop_packet(mp, B_FALSE, NULL, NULL, 5679 DROPPER(ipss, ipds_spd_nomem), 5680 &ipss->ipsec_spd_dropper); 5681 return (NULL); 5682 } 5683 freemsg(mp); 5684 mp = nmp; 5685 if (outer_ipv4 != NULL) 5686 outer_ipv4 = (ipha_t *)mp->b_rptr; 5687 else 5688 outer_ipv6 = (ip6_t *)mp->b_rptr; 5689 if (inner_ipv4 != NULL) { 5690 inner_ipv4 = 5691 (ipha_t *)(mp->b_rptr + outer_hdr_len); 5692 } else { 5693 inner_ipv6 = 5694 (ip6_t *)(mp->b_rptr + outer_hdr_len); 5695 } 5696 } 5697 if (inner_ipv4 != NULL) { 5698 is_fragment = IS_V4_FRAGMENT( 5699 inner_ipv4->ipha_fragment_offset_and_flags); 5700 } else { 5701 sel.ips_remote_addr_v6 = ip_get_dst_v6(inner_ipv6, mp, 5702 &is_fragment); 5703 } 5704 5705 if (is_fragment) { 5706 ipha_t *oiph; 5707 ipha_t *iph = NULL; 5708 ip6_t *ip6h = NULL; 5709 int hdr_len; 5710 uint16_t ip6_hdr_length; 5711 uint8_t v6_proto; 5712 uint8_t *v6_proto_p; 5713 5714 /* 5715 * We have a fragment we need to track! 5716 */ 5717 mp = ipsec_fragcache_add(&itp->itp_fragcache, NULL, mp, 5718 outer_hdr_len, ipss); 5719 if (mp == NULL) 5720 return (NULL); 5721 ASSERT(mp->b_cont == NULL); 5722 5723 /* 5724 * If we get here, we have a full fragment chain 5725 */ 5726 5727 oiph = (ipha_t *)mp->b_rptr; 5728 if (IPH_HDR_VERSION(oiph) == IPV4_VERSION) { 5729 hdr_len = ((outer_hdr_len != 0) ? 5730 IPH_HDR_LENGTH(oiph) : 0); 5731 iph = (ipha_t *)(mp->b_rptr + hdr_len); 5732 } else { 5733 ASSERT(IPH_HDR_VERSION(oiph) == IPV6_VERSION); 5734 ip6h = (ip6_t *)mp->b_rptr; 5735 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 5736 &ip6_hdr_length, &v6_proto_p)) { 5737 ip_drop_packet_chain(mp, B_FALSE, 5738 NULL, NULL, DROPPER(ipss, 5739 ipds_spd_malformed_packet), 5740 &ipss->ipsec_spd_dropper); 5741 return (NULL); 5742 } 5743 hdr_len = ip6_hdr_length; 5744 } 5745 outer_hdr_len = hdr_len; 5746 5747 if (sel.ips_isv4) { 5748 if (iph == NULL) { 5749 /* Was v6 outer */ 5750 iph = (ipha_t *)(mp->b_rptr + hdr_len); 5751 } 5752 inner_ipv4 = iph; 5753 sel.ips_local_addr_v4 = inner_ipv4->ipha_src; 5754 sel.ips_remote_addr_v4 = inner_ipv4->ipha_dst; 5755 sel.ips_protocol = 5756 (uint8_t)inner_ipv4->ipha_protocol; 5757 } else { 5758 inner_ipv6 = (ip6_t *)(mp->b_rptr + 5759 hdr_len); 5760 sel.ips_local_addr_v6 = inner_ipv6->ip6_src; 5761 sel.ips_remote_addr_v6 = inner_ipv6->ip6_dst; 5762 if (!ip_hdr_length_nexthdr_v6(mp, 5763 inner_ipv6, &ip6_hdr_length, &v6_proto_p)) { 5764 ip_drop_packet_chain(mp, B_FALSE, 5765 NULL, NULL, DROPPER(ipss, 5766 ipds_spd_malformed_frag), 5767 &ipss->ipsec_spd_dropper); 5768 return (NULL); 5769 } 5770 v6_proto = *v6_proto_p; 5771 sel.ips_protocol = v6_proto; 5772 #ifdef FRAGCACHE_DEBUG 5773 cmn_err(CE_WARN, "v6_sel.ips_protocol = %d\n", 5774 sel.ips_protocol); 5775 #endif 5776 } 5777 /* Ports are extracted below */ 5778 } 5779 5780 /* Get ports... */ 5781 if (!ipsec_init_outbound_ports(&sel, mp, 5782 inner_ipv4, inner_ipv6, outer_hdr_len, ipss)) { 5783 /* callee did ip_drop_packet_chain() on mp. */ 5784 return (NULL); 5785 } 5786 #ifdef FRAGCACHE_DEBUG 5787 if (inner_ipv4 != NULL) 5788 cmn_err(CE_WARN, 5789 "(v4) sel.ips_protocol = %d, " 5790 "sel.ips_local_port = %d, " 5791 "sel.ips_remote_port = %d\n", 5792 sel.ips_protocol, ntohs(sel.ips_local_port), 5793 ntohs(sel.ips_remote_port)); 5794 if (inner_ipv6 != NULL) 5795 cmn_err(CE_WARN, 5796 "(v6) sel.ips_protocol = %d, " 5797 "sel.ips_local_port = %d, " 5798 "sel.ips_remote_port = %d\n", 5799 sel.ips_protocol, ntohs(sel.ips_local_port), 5800 ntohs(sel.ips_remote_port)); 5801 #endif 5802 /* Success so far! */ 5803 } 5804 rw_enter(&polhead->iph_lock, RW_READER); 5805 pol = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_OUTBOUND, 5806 &sel, ns); 5807 rw_exit(&polhead->iph_lock); 5808 if (pol == NULL) { 5809 /* 5810 * No matching policy on this tunnel, drop the packet. 5811 * 5812 * NOTE: Tunnel-mode tunnels are different from the 5813 * IP global transport mode policy head. For a tunnel-mode 5814 * tunnel, we drop the packet in lieu of passing it 5815 * along accepted the way a global-policy miss would. 5816 * 5817 * NOTE2: "negotiate transport" tunnels should match ALL 5818 * inbound packets, but we do not uncomment the ASSERT() 5819 * below because if/when we open PF_POLICY, a user can 5820 * shoot him/her-self in the foot with a 0 priority. 5821 */ 5822 5823 /* ASSERT(itp->itp_flags & ITPF_P_TUNNEL); */ 5824 #ifdef FRAGCACHE_DEBUG 5825 cmn_err(CE_WARN, "ipsec_tun_outbound(): No matching tunnel " 5826 "per-port policy\n"); 5827 #endif 5828 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 5829 DROPPER(ipss, ipds_spd_explicit), 5830 &ipss->ipsec_spd_dropper); 5831 return (NULL); 5832 } 5833 5834 #ifdef FRAGCACHE_DEBUG 5835 cmn_err(CE_WARN, "Having matching tunnel per-port policy\n"); 5836 #endif 5837 5838 /* Construct an IPSEC_OUT message. */ 5839 ipsec_mp = ipsec_mp_head = ipsec_alloc_ipsec_out(ns); 5840 if (ipsec_mp == NULL) { 5841 IPPOL_REFRELE(pol, ns); 5842 ip_drop_packet(mp, B_FALSE, NULL, NULL, 5843 DROPPER(ipss, ipds_spd_nomem), 5844 &ipss->ipsec_spd_dropper); 5845 return (NULL); 5846 } 5847 ipsec_mp->b_cont = mp; 5848 io = (ipsec_out_t *)ipsec_mp->b_rptr; 5849 IPPH_REFHOLD(polhead); 5850 /* 5851 * NOTE: free() function of ipsec_out mblk will release polhead and 5852 * pol references. 5853 */ 5854 io->ipsec_out_polhead = polhead; 5855 io->ipsec_out_policy = pol; 5856 /* 5857 * NOTE: There is a subtle difference between iptun_zoneid and 5858 * iptun_connp->conn_zoneid explained in iptun_conn_create(). When 5859 * interacting with the ip module, we must use conn_zoneid. 5860 */ 5861 io->ipsec_out_zoneid = iptun->iptun_connp->conn_zoneid; 5862 io->ipsec_out_v4 = (outer_ipv4 != NULL); 5863 io->ipsec_out_secure = B_TRUE; 5864 5865 if (!(itp->itp_flags & ITPF_P_TUNNEL)) { 5866 /* Set up transport mode for tunnelled packets. */ 5867 io->ipsec_out_proto = (inner_ipv4 != NULL) ? IPPROTO_ENCAP : 5868 IPPROTO_IPV6; 5869 return (ipsec_mp); 5870 } 5871 5872 /* Fill in tunnel-mode goodies here. */ 5873 io->ipsec_out_tunnel = B_TRUE; 5874 /* XXX Do I need to fill in all of the goodies here? */ 5875 if (inner_ipv4) { 5876 io->ipsec_out_inaf = AF_INET; 5877 io->ipsec_out_insrc[0] = 5878 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v4; 5879 io->ipsec_out_indst[0] = 5880 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v4; 5881 } else { 5882 io->ipsec_out_inaf = AF_INET6; 5883 io->ipsec_out_insrc[0] = 5884 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[0]; 5885 io->ipsec_out_insrc[1] = 5886 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[1]; 5887 io->ipsec_out_insrc[2] = 5888 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[2]; 5889 io->ipsec_out_insrc[3] = 5890 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[3]; 5891 io->ipsec_out_indst[0] = 5892 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[0]; 5893 io->ipsec_out_indst[1] = 5894 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[1]; 5895 io->ipsec_out_indst[2] = 5896 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[2]; 5897 io->ipsec_out_indst[3] = 5898 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[3]; 5899 } 5900 io->ipsec_out_insrcpfx = pol->ipsp_sel->ipsl_key.ipsl_local_pfxlen; 5901 io->ipsec_out_indstpfx = pol->ipsp_sel->ipsl_key.ipsl_remote_pfxlen; 5902 /* NOTE: These are used for transport mode too. */ 5903 io->ipsec_out_src_port = pol->ipsp_sel->ipsl_key.ipsl_lport; 5904 io->ipsec_out_dst_port = pol->ipsp_sel->ipsl_key.ipsl_rport; 5905 io->ipsec_out_proto = pol->ipsp_sel->ipsl_key.ipsl_proto; 5906 5907 /* 5908 * The mp pointer still valid 5909 * Add ipsec_out to each fragment. 5910 * The fragment head already has one 5911 */ 5912 nmp = mp->b_next; 5913 mp->b_next = NULL; 5914 mp = nmp; 5915 ASSERT(ipsec_mp != NULL); 5916 while (mp != NULL) { 5917 nmp = mp->b_next; 5918 ipsec_mp->b_next = ipsec_out_tag(ipsec_mp_head, mp, ns); 5919 if (ipsec_mp->b_next == NULL) { 5920 ip_drop_packet_chain(ipsec_mp_head, B_FALSE, NULL, NULL, 5921 DROPPER(ipss, ipds_spd_nomem), 5922 &ipss->ipsec_spd_dropper); 5923 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 5924 DROPPER(ipss, ipds_spd_nomem), 5925 &ipss->ipsec_spd_dropper); 5926 return (NULL); 5927 } 5928 ipsec_mp = ipsec_mp->b_next; 5929 mp->b_next = NULL; 5930 mp = nmp; 5931 } 5932 return (ipsec_mp_head); 5933 } 5934 5935 /* 5936 * NOTE: The following releases pol's reference and 5937 * calls ip_drop_packet() for me on NULL returns. 5938 */ 5939 mblk_t * 5940 ipsec_check_ipsecin_policy_reasm(mblk_t *ipsec_mp, ipsec_policy_t *pol, 5941 ipha_t *inner_ipv4, ip6_t *inner_ipv6, uint64_t pkt_unique, netstack_t *ns) 5942 { 5943 /* Assume ipsec_mp is a chain of b_next-linked IPSEC_IN M_CTLs. */ 5944 mblk_t *data_chain = NULL, *data_tail = NULL; 5945 mblk_t *ii_next; 5946 5947 while (ipsec_mp != NULL) { 5948 ii_next = ipsec_mp->b_next; 5949 ipsec_mp->b_next = NULL; /* No tripping asserts. */ 5950 5951 /* 5952 * Need IPPOL_REFHOLD(pol) for extras because 5953 * ipsecin_policy does the refrele. 5954 */ 5955 IPPOL_REFHOLD(pol); 5956 5957 if (ipsec_check_ipsecin_policy(ipsec_mp, pol, inner_ipv4, 5958 inner_ipv6, pkt_unique, ns) != NULL) { 5959 if (data_tail == NULL) { 5960 /* First one */ 5961 data_chain = data_tail = ipsec_mp->b_cont; 5962 } else { 5963 data_tail->b_next = ipsec_mp->b_cont; 5964 data_tail = data_tail->b_next; 5965 } 5966 freeb(ipsec_mp); 5967 } else { 5968 /* 5969 * ipsec_check_ipsecin_policy() freed ipsec_mp 5970 * already. Need to get rid of any extra pol 5971 * references, and any remaining bits as well. 5972 */ 5973 IPPOL_REFRELE(pol, ns); 5974 ipsec_freemsg_chain(data_chain); 5975 ipsec_freemsg_chain(ii_next); /* ipdrop stats? */ 5976 return (NULL); 5977 } 5978 ipsec_mp = ii_next; 5979 } 5980 /* 5981 * One last release because either the loop bumped it up, or we never 5982 * called ipsec_check_ipsecin_policy(). 5983 */ 5984 IPPOL_REFRELE(pol, ns); 5985 5986 /* data_chain is ready for return to tun module. */ 5987 return (data_chain); 5988 } 5989 5990 5991 /* 5992 * Returns B_TRUE if the inbound packet passed an IPsec policy check. Returns 5993 * B_FALSE if it failed or if it is a fragment needing its friends before a 5994 * policy check can be performed. 5995 * 5996 * Expects a non-NULL *data_mp, an optional ipsec_mp, and a non-NULL polhead. 5997 * data_mp may be reassigned with a b_next chain of packets if fragments 5998 * neeeded to be collected for a proper policy check. 5999 * 6000 * Always frees ipsec_mp, but only frees data_mp if returns B_FALSE. This 6001 * function calls ip_drop_packet() on data_mp if need be. 6002 * 6003 * NOTE: outer_hdr_len is signed. If it's a negative value, the caller 6004 * is inspecting an ICMP packet. 6005 */ 6006 boolean_t 6007 ipsec_tun_inbound(mblk_t *ipsec_mp, mblk_t **data_mp, ipsec_tun_pol_t *itp, 6008 ipha_t *inner_ipv4, ip6_t *inner_ipv6, ipha_t *outer_ipv4, 6009 ip6_t *outer_ipv6, int outer_hdr_len, netstack_t *ns) 6010 { 6011 ipsec_policy_head_t *polhead; 6012 ipsec_selector_t sel; 6013 mblk_t *message = (ipsec_mp == NULL) ? *data_mp : ipsec_mp; 6014 ipsec_policy_t *pol; 6015 uint16_t tmpport; 6016 selret_t rc; 6017 boolean_t retval, port_policy_present, is_icmp, global_present; 6018 in6_addr_t tmpaddr; 6019 ipaddr_t tmp4; 6020 uint8_t flags, *inner_hdr; 6021 ipsec_stack_t *ipss = ns->netstack_ipsec; 6022 6023 sel.ips_is_icmp_inv_acq = 0; 6024 6025 if (outer_ipv4 != NULL) { 6026 ASSERT(outer_ipv6 == NULL); 6027 global_present = ipss->ipsec_inbound_v4_policy_present; 6028 } else { 6029 ASSERT(outer_ipv6 != NULL); 6030 global_present = ipss->ipsec_inbound_v6_policy_present; 6031 } 6032 6033 ASSERT(inner_ipv4 != NULL && inner_ipv6 == NULL || 6034 inner_ipv4 == NULL && inner_ipv6 != NULL); 6035 ASSERT(message == *data_mp || message->b_cont == *data_mp); 6036 6037 if (outer_hdr_len < 0) { 6038 outer_hdr_len = (-outer_hdr_len); 6039 is_icmp = B_TRUE; 6040 } else { 6041 is_icmp = B_FALSE; 6042 } 6043 6044 if (itp != NULL && (itp->itp_flags & ITPF_P_ACTIVE)) { 6045 polhead = itp->itp_policy; 6046 /* 6047 * We need to perform full Tunnel-Mode enforcement, 6048 * and we need to have inner-header data for such enforcement. 6049 * 6050 * See ipsec_init_inbound_sel() for the 0x80000000 on inbound 6051 * and on return. 6052 */ 6053 6054 port_policy_present = ((itp->itp_flags & 6055 ITPF_P_PER_PORT_SECURITY) ? B_TRUE : B_FALSE); 6056 /* 6057 * NOTE: Even if our policy is transport mode, set the 6058 * SEL_TUNNEL_MODE flag so ipsec_init_inbound_sel() can 6059 * do the right thing w.r.t. outer headers. 6060 */ 6061 flags = ((port_policy_present ? SEL_PORT_POLICY : SEL_NONE) | 6062 (is_icmp ? SEL_IS_ICMP : SEL_NONE) | SEL_TUNNEL_MODE); 6063 6064 rc = ipsec_init_inbound_sel(&sel, *data_mp, inner_ipv4, 6065 inner_ipv6, flags); 6066 6067 switch (rc) { 6068 case SELRET_NOMEM: 6069 ip_drop_packet(message, B_TRUE, NULL, NULL, 6070 DROPPER(ipss, ipds_spd_nomem), 6071 &ipss->ipsec_spd_dropper); 6072 return (B_FALSE); 6073 case SELRET_TUNFRAG: 6074 /* 6075 * At this point, if we're cleartext, we don't want 6076 * to go there. 6077 */ 6078 if (ipsec_mp == NULL) { 6079 ip_drop_packet(*data_mp, B_TRUE, NULL, NULL, 6080 DROPPER(ipss, ipds_spd_got_clear), 6081 &ipss->ipsec_spd_dropper); 6082 *data_mp = NULL; 6083 return (B_FALSE); 6084 } 6085 ASSERT(((ipsec_in_t *)ipsec_mp->b_rptr)-> 6086 ipsec_in_secure); 6087 message = ipsec_fragcache_add(&itp->itp_fragcache, 6088 ipsec_mp, *data_mp, outer_hdr_len, ipss); 6089 6090 if (message == NULL) { 6091 /* 6092 * Data is cached, fragment chain is not 6093 * complete. I consume ipsec_mp and data_mp 6094 */ 6095 return (B_FALSE); 6096 } 6097 6098 /* 6099 * If we get here, we have a full fragment chain. 6100 * Reacquire headers and selectors from first fragment. 6101 */ 6102 inner_hdr = message->b_cont->b_rptr; 6103 if (outer_ipv4 != NULL) { 6104 inner_hdr += IPH_HDR_LENGTH( 6105 (ipha_t *)message->b_cont->b_rptr); 6106 } else { 6107 inner_hdr += ip_hdr_length_v6(message->b_cont, 6108 (ip6_t *)message->b_cont->b_rptr); 6109 } 6110 ASSERT(inner_hdr <= message->b_cont->b_wptr); 6111 6112 if (inner_ipv4 != NULL) { 6113 inner_ipv4 = (ipha_t *)inner_hdr; 6114 inner_ipv6 = NULL; 6115 } else { 6116 inner_ipv6 = (ip6_t *)inner_hdr; 6117 inner_ipv4 = NULL; 6118 } 6119 6120 /* 6121 * Use SEL_TUNNEL_MODE to take into account the outer 6122 * header. Use SEL_POST_FRAG so we always get ports. 6123 */ 6124 rc = ipsec_init_inbound_sel(&sel, message->b_cont, 6125 inner_ipv4, inner_ipv6, 6126 SEL_TUNNEL_MODE | SEL_POST_FRAG); 6127 switch (rc) { 6128 case SELRET_SUCCESS: 6129 /* 6130 * Get to same place as first caller's 6131 * SELRET_SUCCESS case. 6132 */ 6133 break; 6134 case SELRET_NOMEM: 6135 ip_drop_packet_chain(message, B_TRUE, 6136 NULL, NULL, 6137 DROPPER(ipss, ipds_spd_nomem), 6138 &ipss->ipsec_spd_dropper); 6139 return (B_FALSE); 6140 case SELRET_BADPKT: 6141 ip_drop_packet_chain(message, B_TRUE, 6142 NULL, NULL, 6143 DROPPER(ipss, ipds_spd_malformed_frag), 6144 &ipss->ipsec_spd_dropper); 6145 return (B_FALSE); 6146 case SELRET_TUNFRAG: 6147 cmn_err(CE_WARN, "(TUNFRAG on 2nd call...)"); 6148 /* FALLTHRU */ 6149 default: 6150 cmn_err(CE_WARN, "ipsec_init_inbound_sel(mark2)" 6151 " returns bizarro 0x%x", rc); 6152 /* Guaranteed panic! */ 6153 ASSERT(rc == SELRET_NOMEM); 6154 return (B_FALSE); 6155 } 6156 /* FALLTHRU */ 6157 case SELRET_SUCCESS: 6158 /* 6159 * Common case: 6160 * No per-port policy or a non-fragment. Keep going. 6161 */ 6162 break; 6163 case SELRET_BADPKT: 6164 /* 6165 * We may receive ICMP (with IPv6 inner) packets that 6166 * trigger this return value. Send 'em in for 6167 * enforcement checking. 6168 */ 6169 cmn_err(CE_NOTE, "ipsec_tun_inbound(): " 6170 "sending 'bad packet' in for enforcement"); 6171 break; 6172 default: 6173 cmn_err(CE_WARN, 6174 "ipsec_init_inbound_sel() returns bizarro 0x%x", 6175 rc); 6176 ASSERT(rc == SELRET_NOMEM); /* Guaranteed panic! */ 6177 return (B_FALSE); 6178 } 6179 6180 if (is_icmp) { 6181 /* 6182 * Swap local/remote because this is an ICMP packet. 6183 */ 6184 tmpaddr = sel.ips_local_addr_v6; 6185 sel.ips_local_addr_v6 = sel.ips_remote_addr_v6; 6186 sel.ips_remote_addr_v6 = tmpaddr; 6187 tmpport = sel.ips_local_port; 6188 sel.ips_local_port = sel.ips_remote_port; 6189 sel.ips_remote_port = tmpport; 6190 } 6191 6192 /* find_policy_head() */ 6193 rw_enter(&polhead->iph_lock, RW_READER); 6194 pol = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_INBOUND, 6195 &sel, ns); 6196 rw_exit(&polhead->iph_lock); 6197 if (pol != NULL) { 6198 if (ipsec_mp == NULL || 6199 !((ipsec_in_t *)ipsec_mp->b_rptr)-> 6200 ipsec_in_secure) { 6201 retval = pol->ipsp_act->ipa_allow_clear; 6202 if (!retval) { 6203 /* 6204 * XXX should never get here with 6205 * tunnel reassembled fragments? 6206 */ 6207 ASSERT(message->b_next == NULL); 6208 ip_drop_packet(message, B_TRUE, NULL, 6209 NULL, 6210 DROPPER(ipss, ipds_spd_got_clear), 6211 &ipss->ipsec_spd_dropper); 6212 } else if (ipsec_mp != NULL) { 6213 freeb(ipsec_mp); 6214 } 6215 6216 IPPOL_REFRELE(pol, ns); 6217 return (retval); 6218 } 6219 /* 6220 * NOTE: The following releases pol's reference and 6221 * calls ip_drop_packet() for me on NULL returns. 6222 * 6223 * "sel" is still good here, so let's use it! 6224 */ 6225 *data_mp = ipsec_check_ipsecin_policy_reasm(message, 6226 pol, inner_ipv4, inner_ipv6, SA_UNIQUE_ID( 6227 sel.ips_remote_port, sel.ips_local_port, 6228 (inner_ipv4 == NULL) ? IPPROTO_IPV6 : 6229 IPPROTO_ENCAP, sel.ips_protocol), ns); 6230 return (*data_mp != NULL); 6231 } 6232 6233 /* 6234 * Else fallthru and check the global policy on the outer 6235 * header(s) if this tunnel is an old-style transport-mode 6236 * one. Drop the packet explicitly (no policy entry) for 6237 * a new-style tunnel-mode tunnel. 6238 */ 6239 if ((itp->itp_flags & ITPF_P_TUNNEL) && !is_icmp) { 6240 ip_drop_packet_chain(message, B_TRUE, NULL, 6241 NULL, 6242 DROPPER(ipss, ipds_spd_explicit), 6243 &ipss->ipsec_spd_dropper); 6244 return (B_FALSE); 6245 } 6246 } 6247 6248 /* 6249 * NOTE: If we reach here, we will not have packet chains from 6250 * fragcache_add(), because the only way I get chains is on a 6251 * tunnel-mode tunnel, which either returns with a pass, or gets 6252 * hit by the ip_drop_packet_chain() call right above here. 6253 */ 6254 6255 /* If no per-tunnel security, check global policy now. */ 6256 if (ipsec_mp != NULL && !global_present) { 6257 if (((ipsec_in_t *)(ipsec_mp->b_rptr))-> 6258 ipsec_in_icmp_loopback) { 6259 /* 6260 * This is an ICMP message with an ipsec_mp 6261 * attached. We should accept it. 6262 */ 6263 if (ipsec_mp != NULL) 6264 freeb(ipsec_mp); 6265 return (B_TRUE); 6266 } 6267 6268 ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL, 6269 DROPPER(ipss, ipds_spd_got_secure), 6270 &ipss->ipsec_spd_dropper); 6271 return (B_FALSE); 6272 } 6273 6274 if (is_icmp) { 6275 /* 6276 * For ICMP packets, "outer_ipvN" is set to the outer header 6277 * that is *INSIDE* the ICMP payload. For global policy 6278 * checking, we need to reverse src/dst on the payload in 6279 * order to construct selectors appropriately. See "ripha" 6280 * constructions in ip.c. To avoid a bug like 6478464 (see 6281 * earlier in this file), we will actually exchange src/dst 6282 * in the packet, and reverse if after the call to 6283 * ipsec_check_global_policy(). 6284 */ 6285 if (outer_ipv4 != NULL) { 6286 tmp4 = outer_ipv4->ipha_src; 6287 outer_ipv4->ipha_src = outer_ipv4->ipha_dst; 6288 outer_ipv4->ipha_dst = tmp4; 6289 } else { 6290 ASSERT(outer_ipv6 != NULL); 6291 tmpaddr = outer_ipv6->ip6_src; 6292 outer_ipv6->ip6_src = outer_ipv6->ip6_dst; 6293 outer_ipv6->ip6_dst = tmpaddr; 6294 } 6295 } 6296 6297 /* NOTE: Frees message if it returns NULL. */ 6298 if (ipsec_check_global_policy(message, NULL, outer_ipv4, outer_ipv6, 6299 (ipsec_mp != NULL), ns) == NULL) { 6300 return (B_FALSE); 6301 } 6302 6303 if (is_icmp) { 6304 /* Set things back to normal. */ 6305 if (outer_ipv4 != NULL) { 6306 tmp4 = outer_ipv4->ipha_src; 6307 outer_ipv4->ipha_src = outer_ipv4->ipha_dst; 6308 outer_ipv4->ipha_dst = tmp4; 6309 } else { 6310 /* No need for ASSERT()s now. */ 6311 tmpaddr = outer_ipv6->ip6_src; 6312 outer_ipv6->ip6_src = outer_ipv6->ip6_dst; 6313 outer_ipv6->ip6_dst = tmpaddr; 6314 } 6315 } 6316 6317 if (ipsec_mp != NULL) 6318 freeb(ipsec_mp); 6319 6320 /* 6321 * At this point, we pretend it's a cleartext accepted 6322 * packet. 6323 */ 6324 return (B_TRUE); 6325 } 6326 6327 /* 6328 * AVL comparison routine for our list of tunnel polheads. 6329 */ 6330 static int 6331 tunnel_compare(const void *arg1, const void *arg2) 6332 { 6333 ipsec_tun_pol_t *left, *right; 6334 int rc; 6335 6336 left = (ipsec_tun_pol_t *)arg1; 6337 right = (ipsec_tun_pol_t *)arg2; 6338 6339 rc = strncmp(left->itp_name, right->itp_name, LIFNAMSIZ); 6340 return (rc == 0 ? rc : (rc > 0 ? 1 : -1)); 6341 } 6342 6343 /* 6344 * Free a tunnel policy node. 6345 */ 6346 void 6347 itp_free(ipsec_tun_pol_t *node, netstack_t *ns) 6348 { 6349 if (node->itp_policy != NULL) { 6350 IPPH_REFRELE(node->itp_policy, ns); 6351 node->itp_policy = NULL; 6352 } 6353 if (node->itp_inactive != NULL) { 6354 IPPH_REFRELE(node->itp_inactive, ns); 6355 node->itp_inactive = NULL; 6356 } 6357 mutex_destroy(&node->itp_lock); 6358 kmem_free(node, sizeof (*node)); 6359 } 6360 6361 void 6362 itp_unlink(ipsec_tun_pol_t *node, netstack_t *ns) 6363 { 6364 ipsec_stack_t *ipss = ns->netstack_ipsec; 6365 6366 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_WRITER); 6367 ipss->ipsec_tunnel_policy_gen++; 6368 ipsec_fragcache_uninit(&node->itp_fragcache); 6369 avl_remove(&ipss->ipsec_tunnel_policies, node); 6370 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6371 ITP_REFRELE(node, ns); 6372 } 6373 6374 /* 6375 * Public interface to look up a tunnel security policy by name. Used by 6376 * spdsock mostly. Returns "node" with a bumped refcnt. 6377 */ 6378 ipsec_tun_pol_t * 6379 get_tunnel_policy(char *name, netstack_t *ns) 6380 { 6381 ipsec_tun_pol_t *node, lookup; 6382 ipsec_stack_t *ipss = ns->netstack_ipsec; 6383 6384 (void) strncpy(lookup.itp_name, name, LIFNAMSIZ); 6385 6386 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_READER); 6387 node = (ipsec_tun_pol_t *)avl_find(&ipss->ipsec_tunnel_policies, 6388 &lookup, NULL); 6389 if (node != NULL) { 6390 ITP_REFHOLD(node); 6391 } 6392 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6393 6394 return (node); 6395 } 6396 6397 /* 6398 * Public interface to walk all tunnel security polcies. Useful for spdsock 6399 * DUMP operations. iterator() will not consume a reference. 6400 */ 6401 void 6402 itp_walk(void (*iterator)(ipsec_tun_pol_t *, void *, netstack_t *), 6403 void *arg, netstack_t *ns) 6404 { 6405 ipsec_tun_pol_t *node; 6406 ipsec_stack_t *ipss = ns->netstack_ipsec; 6407 6408 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_READER); 6409 for (node = avl_first(&ipss->ipsec_tunnel_policies); node != NULL; 6410 node = AVL_NEXT(&ipss->ipsec_tunnel_policies, node)) { 6411 iterator(node, arg, ns); 6412 } 6413 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6414 } 6415 6416 /* 6417 * Initialize policy head. This can only fail if there's a memory problem. 6418 */ 6419 static boolean_t 6420 tunnel_polhead_init(ipsec_policy_head_t *iph, netstack_t *ns) 6421 { 6422 ipsec_stack_t *ipss = ns->netstack_ipsec; 6423 6424 rw_init(&iph->iph_lock, NULL, RW_DEFAULT, NULL); 6425 iph->iph_refs = 1; 6426 iph->iph_gen = 0; 6427 if (ipsec_alloc_table(iph, ipss->ipsec_tun_spd_hashsize, 6428 KM_SLEEP, B_FALSE, ns) != 0) { 6429 ipsec_polhead_free_table(iph); 6430 return (B_FALSE); 6431 } 6432 ipsec_polhead_init(iph, ipss->ipsec_tun_spd_hashsize); 6433 return (B_TRUE); 6434 } 6435 6436 /* 6437 * Create a tunnel policy node with "name". Set errno with 6438 * ENOMEM if there's a memory problem, and EEXIST if there's an existing 6439 * node. 6440 */ 6441 ipsec_tun_pol_t * 6442 create_tunnel_policy(char *name, int *errno, uint64_t *gen, netstack_t *ns) 6443 { 6444 ipsec_tun_pol_t *newbie, *existing; 6445 avl_index_t where; 6446 ipsec_stack_t *ipss = ns->netstack_ipsec; 6447 6448 newbie = kmem_zalloc(sizeof (*newbie), KM_NOSLEEP); 6449 if (newbie == NULL) { 6450 *errno = ENOMEM; 6451 return (NULL); 6452 } 6453 if (!ipsec_fragcache_init(&newbie->itp_fragcache)) { 6454 kmem_free(newbie, sizeof (*newbie)); 6455 *errno = ENOMEM; 6456 return (NULL); 6457 } 6458 6459 (void) strncpy(newbie->itp_name, name, LIFNAMSIZ); 6460 6461 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_WRITER); 6462 existing = (ipsec_tun_pol_t *)avl_find(&ipss->ipsec_tunnel_policies, 6463 newbie, &where); 6464 if (existing != NULL) { 6465 itp_free(newbie, ns); 6466 *errno = EEXIST; 6467 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6468 return (NULL); 6469 } 6470 ipss->ipsec_tunnel_policy_gen++; 6471 *gen = ipss->ipsec_tunnel_policy_gen; 6472 newbie->itp_refcnt = 2; /* One for the caller, one for the tree. */ 6473 newbie->itp_next_policy_index = 1; 6474 avl_insert(&ipss->ipsec_tunnel_policies, newbie, where); 6475 mutex_init(&newbie->itp_lock, NULL, MUTEX_DEFAULT, NULL); 6476 newbie->itp_policy = kmem_zalloc(sizeof (ipsec_policy_head_t), 6477 KM_NOSLEEP); 6478 if (newbie->itp_policy == NULL) 6479 goto nomem; 6480 newbie->itp_inactive = kmem_zalloc(sizeof (ipsec_policy_head_t), 6481 KM_NOSLEEP); 6482 if (newbie->itp_inactive == NULL) { 6483 kmem_free(newbie->itp_policy, sizeof (ipsec_policy_head_t)); 6484 goto nomem; 6485 } 6486 6487 if (!tunnel_polhead_init(newbie->itp_policy, ns)) { 6488 kmem_free(newbie->itp_policy, sizeof (ipsec_policy_head_t)); 6489 kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t)); 6490 goto nomem; 6491 } else if (!tunnel_polhead_init(newbie->itp_inactive, ns)) { 6492 IPPH_REFRELE(newbie->itp_policy, ns); 6493 kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t)); 6494 goto nomem; 6495 } 6496 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6497 6498 return (newbie); 6499 nomem: 6500 *errno = ENOMEM; 6501 kmem_free(newbie, sizeof (*newbie)); 6502 return (NULL); 6503 } 6504 6505 /* 6506 * Given two addresses, find a tunnel instance's IPsec policy heads. 6507 * Returns NULL on failure. 6508 */ 6509 ipsec_tun_pol_t * 6510 itp_get_byaddr(uint32_t *laddr, uint32_t *faddr, int af, ip_stack_t *ipst) 6511 { 6512 conn_t *connp; 6513 iptun_t *iptun; 6514 ipsec_tun_pol_t *itp = NULL; 6515 6516 /* Classifiers are used to "src" being foreign. */ 6517 if (af == AF_INET) { 6518 connp = ipcl_iptun_classify_v4((ipaddr_t *)faddr, 6519 (ipaddr_t *)laddr, ipst); 6520 } else { 6521 ASSERT(af == AF_INET6); 6522 ASSERT(!IN6_IS_ADDR_V4MAPPED((in6_addr_t *)laddr)); 6523 ASSERT(!IN6_IS_ADDR_V4MAPPED((in6_addr_t *)faddr)); 6524 connp = ipcl_iptun_classify_v6((in6_addr_t *)faddr, 6525 (in6_addr_t *)laddr, ipst); 6526 } 6527 6528 if (connp == NULL) 6529 return (NULL); 6530 6531 if (IPCL_IS_IPTUN(connp)) { 6532 iptun = connp->conn_iptun; 6533 if (iptun != NULL) { 6534 itp = iptun->iptun_itp; 6535 if (itp != NULL) { 6536 /* Braces due to the macro's nature... */ 6537 ITP_REFHOLD(itp); 6538 } 6539 } /* Else itp is already NULL. */ 6540 } 6541 6542 CONN_DEC_REF(connp); 6543 return (itp); 6544 } 6545 6546 /* 6547 * Frag cache code, based on SunScreen 3.2 source 6548 * screen/kernel/common/screen_fragcache.c 6549 */ 6550 6551 #define IPSEC_FRAG_TTL_MAX 5 6552 /* 6553 * Note that the following parameters create 256 hash buckets 6554 * with 1024 free entries to be distributed. Things are cleaned 6555 * periodically and are attempted to be cleaned when there is no 6556 * free space, but this system errs on the side of dropping packets 6557 * over creating memory exhaustion. We may decide to make hash 6558 * factor a tunable if this proves to be a bad decision. 6559 */ 6560 #define IPSEC_FRAG_HASH_SLOTS (1<<8) 6561 #define IPSEC_FRAG_HASH_FACTOR 4 6562 #define IPSEC_FRAG_HASH_SIZE (IPSEC_FRAG_HASH_SLOTS * IPSEC_FRAG_HASH_FACTOR) 6563 6564 #define IPSEC_FRAG_HASH_MASK (IPSEC_FRAG_HASH_SLOTS - 1) 6565 #define IPSEC_FRAG_HASH_FUNC(id) (((id) & IPSEC_FRAG_HASH_MASK) ^ \ 6566 (((id) / \ 6567 (ushort_t)IPSEC_FRAG_HASH_SLOTS) & \ 6568 IPSEC_FRAG_HASH_MASK)) 6569 6570 /* Maximum fragments per packet. 48 bytes payload x 1366 packets > 64KB */ 6571 #define IPSEC_MAX_FRAGS 1366 6572 6573 #define V4_FRAG_OFFSET(ipha) ((ntohs(ipha->ipha_fragment_offset_and_flags) & \ 6574 IPH_OFFSET) << 3) 6575 #define V4_MORE_FRAGS(ipha) (ntohs(ipha->ipha_fragment_offset_and_flags) & \ 6576 IPH_MF) 6577 6578 /* 6579 * Initialize an ipsec fragcache instance. 6580 * Returns B_FALSE if memory allocation fails. 6581 */ 6582 boolean_t 6583 ipsec_fragcache_init(ipsec_fragcache_t *frag) 6584 { 6585 ipsec_fragcache_entry_t *ftemp; 6586 int i; 6587 6588 mutex_init(&frag->itpf_lock, NULL, MUTEX_DEFAULT, NULL); 6589 frag->itpf_ptr = (ipsec_fragcache_entry_t **) 6590 kmem_zalloc(sizeof (ipsec_fragcache_entry_t *) * 6591 IPSEC_FRAG_HASH_SLOTS, KM_NOSLEEP); 6592 if (frag->itpf_ptr == NULL) 6593 return (B_FALSE); 6594 6595 ftemp = (ipsec_fragcache_entry_t *) 6596 kmem_zalloc(sizeof (ipsec_fragcache_entry_t) * 6597 IPSEC_FRAG_HASH_SIZE, KM_NOSLEEP); 6598 if (ftemp == NULL) { 6599 kmem_free(frag->itpf_ptr, sizeof (ipsec_fragcache_entry_t *) * 6600 IPSEC_FRAG_HASH_SLOTS); 6601 return (B_FALSE); 6602 } 6603 6604 frag->itpf_freelist = NULL; 6605 6606 for (i = 0; i < IPSEC_FRAG_HASH_SIZE; i++) { 6607 ftemp->itpfe_next = frag->itpf_freelist; 6608 frag->itpf_freelist = ftemp; 6609 ftemp++; 6610 } 6611 6612 frag->itpf_expire_hint = 0; 6613 6614 return (B_TRUE); 6615 } 6616 6617 void 6618 ipsec_fragcache_uninit(ipsec_fragcache_t *frag) 6619 { 6620 ipsec_fragcache_entry_t *fep; 6621 int i; 6622 6623 mutex_enter(&frag->itpf_lock); 6624 if (frag->itpf_ptr) { 6625 /* Delete any existing fragcache entry chains */ 6626 for (i = 0; i < IPSEC_FRAG_HASH_SLOTS; i++) { 6627 fep = (frag->itpf_ptr)[i]; 6628 while (fep != NULL) { 6629 /* Returned fep is next in chain or NULL */ 6630 fep = fragcache_delentry(i, fep, frag); 6631 } 6632 } 6633 /* 6634 * Chase the pointers back to the beginning 6635 * of the memory allocation and then 6636 * get rid of the allocated freelist 6637 */ 6638 while (frag->itpf_freelist->itpfe_next != NULL) 6639 frag->itpf_freelist = frag->itpf_freelist->itpfe_next; 6640 /* 6641 * XXX - If we ever dynamically grow the freelist 6642 * then we'll have to free entries individually 6643 * or determine how many entries or chunks we have 6644 * grown since the initial allocation. 6645 */ 6646 kmem_free(frag->itpf_freelist, 6647 sizeof (ipsec_fragcache_entry_t) * 6648 IPSEC_FRAG_HASH_SIZE); 6649 /* Free the fragcache structure */ 6650 kmem_free(frag->itpf_ptr, 6651 sizeof (ipsec_fragcache_entry_t *) * 6652 IPSEC_FRAG_HASH_SLOTS); 6653 } 6654 mutex_exit(&frag->itpf_lock); 6655 mutex_destroy(&frag->itpf_lock); 6656 } 6657 6658 /* 6659 * Add a fragment to the fragment cache. Consumes mp if NULL is returned. 6660 * Returns mp if a whole fragment has been assembled, NULL otherwise 6661 */ 6662 6663 mblk_t * 6664 ipsec_fragcache_add(ipsec_fragcache_t *frag, mblk_t *ipsec_mp, mblk_t *mp, 6665 int outer_hdr_len, ipsec_stack_t *ipss) 6666 { 6667 boolean_t is_v4; 6668 time_t itpf_time; 6669 ipha_t *iph; 6670 ipha_t *oiph; 6671 ip6_t *ip6h = NULL; 6672 uint8_t v6_proto; 6673 uint8_t *v6_proto_p; 6674 uint16_t ip6_hdr_length; 6675 ip6_pkt_t ipp; 6676 ip6_frag_t *fraghdr; 6677 ipsec_fragcache_entry_t *fep; 6678 int i; 6679 mblk_t *nmp, *prevmp; 6680 int firstbyte, lastbyte; 6681 int offset; 6682 int last; 6683 boolean_t inbound = (ipsec_mp != NULL); 6684 mblk_t *first_mp = inbound ? ipsec_mp : mp; 6685 6686 ASSERT(first_mp == mp || first_mp->b_cont == mp); 6687 6688 /* 6689 * You're on the slow path, so insure that every packet in the 6690 * cache is a single-mblk one. 6691 */ 6692 if (mp->b_cont != NULL) { 6693 nmp = msgpullup(mp, -1); 6694 if (nmp == NULL) { 6695 ip_drop_packet(first_mp, inbound, NULL, NULL, 6696 DROPPER(ipss, ipds_spd_nomem), 6697 &ipss->ipsec_spd_dropper); 6698 return (NULL); 6699 } 6700 freemsg(mp); 6701 if (ipsec_mp != NULL) 6702 ipsec_mp->b_cont = nmp; 6703 mp = nmp; 6704 } 6705 6706 mutex_enter(&frag->itpf_lock); 6707 6708 oiph = (ipha_t *)mp->b_rptr; 6709 iph = (ipha_t *)(mp->b_rptr + outer_hdr_len); 6710 6711 if (IPH_HDR_VERSION(iph) == IPV4_VERSION) { 6712 is_v4 = B_TRUE; 6713 } else { 6714 ASSERT(IPH_HDR_VERSION(iph) == IPV6_VERSION); 6715 ip6h = (ip6_t *)(mp->b_rptr + outer_hdr_len); 6716 6717 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &ip6_hdr_length, 6718 &v6_proto_p)) { 6719 /* 6720 * Find upper layer protocol. 6721 * If it fails we have a malformed packet 6722 */ 6723 mutex_exit(&frag->itpf_lock); 6724 ip_drop_packet(first_mp, inbound, NULL, NULL, 6725 DROPPER(ipss, ipds_spd_malformed_packet), 6726 &ipss->ipsec_spd_dropper); 6727 return (NULL); 6728 } else { 6729 v6_proto = *v6_proto_p; 6730 } 6731 6732 6733 bzero(&ipp, sizeof (ipp)); 6734 (void) ip_find_hdr_v6(mp, ip6h, &ipp, NULL); 6735 if (!(ipp.ipp_fields & IPPF_FRAGHDR)) { 6736 /* 6737 * We think this is a fragment, but didn't find 6738 * a fragment header. Something is wrong. 6739 */ 6740 mutex_exit(&frag->itpf_lock); 6741 ip_drop_packet(first_mp, inbound, NULL, NULL, 6742 DROPPER(ipss, ipds_spd_malformed_frag), 6743 &ipss->ipsec_spd_dropper); 6744 return (NULL); 6745 } 6746 fraghdr = ipp.ipp_fraghdr; 6747 is_v4 = B_FALSE; 6748 } 6749 6750 /* Anything to cleanup? */ 6751 6752 /* 6753 * This cleanup call could be put in a timer loop 6754 * but it may actually be just as reasonable a decision to 6755 * leave it here. The disadvantage is this only gets called when 6756 * frags are added. The advantage is that it is not 6757 * susceptible to race conditions like a time-based cleanup 6758 * may be. 6759 */ 6760 itpf_time = gethrestime_sec(); 6761 if (itpf_time >= frag->itpf_expire_hint) 6762 ipsec_fragcache_clean(frag); 6763 6764 /* Lookup to see if there is an existing entry */ 6765 6766 if (is_v4) 6767 i = IPSEC_FRAG_HASH_FUNC(iph->ipha_ident); 6768 else 6769 i = IPSEC_FRAG_HASH_FUNC(fraghdr->ip6f_ident); 6770 6771 for (fep = (frag->itpf_ptr)[i]; fep; fep = fep->itpfe_next) { 6772 if (is_v4) { 6773 ASSERT(iph != NULL); 6774 if ((fep->itpfe_id == iph->ipha_ident) && 6775 (fep->itpfe_src == iph->ipha_src) && 6776 (fep->itpfe_dst == iph->ipha_dst) && 6777 (fep->itpfe_proto == iph->ipha_protocol)) 6778 break; 6779 } else { 6780 ASSERT(fraghdr != NULL); 6781 ASSERT(fep != NULL); 6782 if ((fep->itpfe_id == fraghdr->ip6f_ident) && 6783 IN6_ARE_ADDR_EQUAL(&fep->itpfe_src6, 6784 &ip6h->ip6_src) && 6785 IN6_ARE_ADDR_EQUAL(&fep->itpfe_dst6, 6786 &ip6h->ip6_dst) && (fep->itpfe_proto == v6_proto)) 6787 break; 6788 } 6789 } 6790 6791 if (is_v4) { 6792 firstbyte = V4_FRAG_OFFSET(iph); 6793 lastbyte = firstbyte + ntohs(iph->ipha_length) - 6794 IPH_HDR_LENGTH(iph); 6795 last = (V4_MORE_FRAGS(iph) == 0); 6796 #ifdef FRAGCACHE_DEBUG 6797 cmn_err(CE_WARN, "V4 fragcache: firstbyte = %d, lastbyte = %d, " 6798 "last = %d, id = %d\n", firstbyte, lastbyte, last, 6799 iph->ipha_ident); 6800 #endif 6801 } else { 6802 firstbyte = ntohs(fraghdr->ip6f_offlg & IP6F_OFF_MASK); 6803 lastbyte = firstbyte + ntohs(ip6h->ip6_plen) + 6804 sizeof (ip6_t) - ip6_hdr_length; 6805 last = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG) == 0; 6806 #ifdef FRAGCACHE_DEBUG 6807 cmn_err(CE_WARN, "V6 fragcache: firstbyte = %d, lastbyte = %d, " 6808 "last = %d, id = %d, fraghdr = %p, mp = %p\n", 6809 firstbyte, lastbyte, last, fraghdr->ip6f_ident, 6810 fraghdr, mp); 6811 #endif 6812 } 6813 6814 /* check for bogus fragments and delete the entry */ 6815 if (firstbyte > 0 && firstbyte <= 8) { 6816 if (fep != NULL) 6817 (void) fragcache_delentry(i, fep, frag); 6818 mutex_exit(&frag->itpf_lock); 6819 ip_drop_packet(first_mp, inbound, NULL, NULL, 6820 DROPPER(ipss, ipds_spd_malformed_frag), 6821 &ipss->ipsec_spd_dropper); 6822 return (NULL); 6823 } 6824 6825 /* Not found, allocate a new entry */ 6826 if (fep == NULL) { 6827 if (frag->itpf_freelist == NULL) { 6828 /* see if there is some space */ 6829 ipsec_fragcache_clean(frag); 6830 if (frag->itpf_freelist == NULL) { 6831 mutex_exit(&frag->itpf_lock); 6832 ip_drop_packet(first_mp, inbound, NULL, NULL, 6833 DROPPER(ipss, ipds_spd_nomem), 6834 &ipss->ipsec_spd_dropper); 6835 return (NULL); 6836 } 6837 } 6838 6839 fep = frag->itpf_freelist; 6840 frag->itpf_freelist = fep->itpfe_next; 6841 6842 if (is_v4) { 6843 bcopy((caddr_t)&iph->ipha_src, (caddr_t)&fep->itpfe_src, 6844 sizeof (struct in_addr)); 6845 bcopy((caddr_t)&iph->ipha_dst, (caddr_t)&fep->itpfe_dst, 6846 sizeof (struct in_addr)); 6847 fep->itpfe_id = iph->ipha_ident; 6848 fep->itpfe_proto = iph->ipha_protocol; 6849 i = IPSEC_FRAG_HASH_FUNC(fep->itpfe_id); 6850 } else { 6851 bcopy((in6_addr_t *)&ip6h->ip6_src, 6852 (in6_addr_t *)&fep->itpfe_src6, 6853 sizeof (struct in6_addr)); 6854 bcopy((in6_addr_t *)&ip6h->ip6_dst, 6855 (in6_addr_t *)&fep->itpfe_dst6, 6856 sizeof (struct in6_addr)); 6857 fep->itpfe_id = fraghdr->ip6f_ident; 6858 fep->itpfe_proto = v6_proto; 6859 i = IPSEC_FRAG_HASH_FUNC(fep->itpfe_id); 6860 } 6861 itpf_time = gethrestime_sec(); 6862 fep->itpfe_exp = itpf_time + IPSEC_FRAG_TTL_MAX + 1; 6863 fep->itpfe_last = 0; 6864 fep->itpfe_fraglist = NULL; 6865 fep->itpfe_depth = 0; 6866 fep->itpfe_next = (frag->itpf_ptr)[i]; 6867 (frag->itpf_ptr)[i] = fep; 6868 6869 if (frag->itpf_expire_hint > fep->itpfe_exp) 6870 frag->itpf_expire_hint = fep->itpfe_exp; 6871 6872 } 6873 6874 /* Insert it in the frag list */ 6875 /* List is in order by starting offset of fragments */ 6876 6877 prevmp = NULL; 6878 for (nmp = fep->itpfe_fraglist; nmp; nmp = nmp->b_next) { 6879 ipha_t *niph; 6880 ipha_t *oniph; 6881 ip6_t *nip6h; 6882 ip6_pkt_t nipp; 6883 ip6_frag_t *nfraghdr; 6884 uint16_t nip6_hdr_length; 6885 uint8_t *nv6_proto_p; 6886 int nfirstbyte, nlastbyte; 6887 char *data, *ndata; 6888 mblk_t *ndata_mp = (inbound ? nmp->b_cont : nmp); 6889 int hdr_len; 6890 6891 oniph = (ipha_t *)mp->b_rptr; 6892 nip6h = NULL; 6893 niph = NULL; 6894 6895 /* 6896 * Determine outer header type and length and set 6897 * pointers appropriately 6898 */ 6899 6900 if (IPH_HDR_VERSION(oniph) == IPV4_VERSION) { 6901 hdr_len = ((outer_hdr_len != 0) ? 6902 IPH_HDR_LENGTH(oiph) : 0); 6903 niph = (ipha_t *)(ndata_mp->b_rptr + hdr_len); 6904 } else { 6905 ASSERT(IPH_HDR_VERSION(oniph) == IPV6_VERSION); 6906 ASSERT(ndata_mp->b_cont == NULL); 6907 nip6h = (ip6_t *)ndata_mp->b_rptr; 6908 (void) ip_hdr_length_nexthdr_v6(ndata_mp, nip6h, 6909 &nip6_hdr_length, &v6_proto_p); 6910 hdr_len = ((outer_hdr_len != 0) ? nip6_hdr_length : 0); 6911 } 6912 6913 /* 6914 * Determine inner header type and length and set 6915 * pointers appropriately 6916 */ 6917 6918 if (is_v4) { 6919 if (niph == NULL) { 6920 /* Was v6 outer */ 6921 niph = (ipha_t *)(ndata_mp->b_rptr + hdr_len); 6922 } 6923 nfirstbyte = V4_FRAG_OFFSET(niph); 6924 nlastbyte = nfirstbyte + ntohs(niph->ipha_length) - 6925 IPH_HDR_LENGTH(niph); 6926 } else { 6927 ASSERT(ndata_mp->b_cont == NULL); 6928 nip6h = (ip6_t *)(ndata_mp->b_rptr + hdr_len); 6929 if (!ip_hdr_length_nexthdr_v6(ndata_mp, nip6h, 6930 &nip6_hdr_length, &nv6_proto_p)) { 6931 mutex_exit(&frag->itpf_lock); 6932 ip_drop_packet_chain(nmp, inbound, NULL, NULL, 6933 DROPPER(ipss, ipds_spd_malformed_frag), 6934 &ipss->ipsec_spd_dropper); 6935 ipsec_freemsg_chain(ndata_mp); 6936 return (NULL); 6937 } 6938 bzero(&nipp, sizeof (nipp)); 6939 (void) ip_find_hdr_v6(ndata_mp, nip6h, &nipp, NULL); 6940 nfraghdr = nipp.ipp_fraghdr; 6941 nfirstbyte = ntohs(nfraghdr->ip6f_offlg & 6942 IP6F_OFF_MASK); 6943 nlastbyte = nfirstbyte + ntohs(nip6h->ip6_plen) + 6944 sizeof (ip6_t) - nip6_hdr_length; 6945 } 6946 6947 /* Check for overlapping fragments */ 6948 if (firstbyte >= nfirstbyte && firstbyte < nlastbyte) { 6949 /* 6950 * Overlap Check: 6951 * ~~~~--------- # Check if the newly 6952 * ~ ndata_mp| # received fragment 6953 * ~~~~--------- # overlaps with the 6954 * ---------~~~~~~ # current fragment. 6955 * | mp ~ 6956 * ---------~~~~~~ 6957 */ 6958 if (is_v4) { 6959 data = (char *)iph + IPH_HDR_LENGTH(iph) + 6960 firstbyte - nfirstbyte; 6961 ndata = (char *)niph + IPH_HDR_LENGTH(niph); 6962 } else { 6963 data = (char *)ip6h + 6964 nip6_hdr_length + firstbyte - 6965 nfirstbyte; 6966 ndata = (char *)nip6h + nip6_hdr_length; 6967 } 6968 if (bcmp(data, ndata, MIN(lastbyte, nlastbyte) - 6969 firstbyte)) { 6970 /* Overlapping data does not match */ 6971 (void) fragcache_delentry(i, fep, frag); 6972 mutex_exit(&frag->itpf_lock); 6973 ip_drop_packet(first_mp, inbound, NULL, NULL, 6974 DROPPER(ipss, ipds_spd_overlap_frag), 6975 &ipss->ipsec_spd_dropper); 6976 return (NULL); 6977 } 6978 /* Part of defense for jolt2.c fragmentation attack */ 6979 if (firstbyte >= nfirstbyte && lastbyte <= nlastbyte) { 6980 /* 6981 * Check for identical or subset fragments: 6982 * ---------- ~~~~--------~~~~~ 6983 * | nmp | or ~ nmp ~ 6984 * ---------- ~~~~--------~~~~~ 6985 * ---------- ------ 6986 * | mp | | mp | 6987 * ---------- ------ 6988 */ 6989 mutex_exit(&frag->itpf_lock); 6990 ip_drop_packet(first_mp, inbound, NULL, NULL, 6991 DROPPER(ipss, ipds_spd_evil_frag), 6992 &ipss->ipsec_spd_dropper); 6993 return (NULL); 6994 } 6995 6996 } 6997 6998 /* Correct location for this fragment? */ 6999 if (firstbyte <= nfirstbyte) { 7000 /* 7001 * Check if the tail end of the new fragment overlaps 7002 * with the head of the current fragment. 7003 * --------~~~~~~~ 7004 * | nmp ~ 7005 * --------~~~~~~~ 7006 * ~~~~~-------- 7007 * ~ mp | 7008 * ~~~~~-------- 7009 */ 7010 if (lastbyte > nfirstbyte) { 7011 /* Fragments overlap */ 7012 data = (char *)iph + IPH_HDR_LENGTH(iph) + 7013 firstbyte - nfirstbyte; 7014 ndata = (char *)niph + IPH_HDR_LENGTH(niph); 7015 if (is_v4) { 7016 data = (char *)iph + 7017 IPH_HDR_LENGTH(iph) + firstbyte - 7018 nfirstbyte; 7019 ndata = (char *)niph + 7020 IPH_HDR_LENGTH(niph); 7021 } else { 7022 data = (char *)ip6h + 7023 nip6_hdr_length + firstbyte - 7024 nfirstbyte; 7025 ndata = (char *)nip6h + nip6_hdr_length; 7026 } 7027 if (bcmp(data, ndata, MIN(lastbyte, nlastbyte) 7028 - nfirstbyte)) { 7029 /* Overlap mismatch */ 7030 (void) fragcache_delentry(i, fep, frag); 7031 mutex_exit(&frag->itpf_lock); 7032 ip_drop_packet(first_mp, inbound, NULL, 7033 NULL, DROPPER(ipss, 7034 ipds_spd_overlap_frag), 7035 &ipss->ipsec_spd_dropper); 7036 return (NULL); 7037 } 7038 } 7039 7040 /* 7041 * Fragment does not illegally overlap and can now 7042 * be inserted into the chain 7043 */ 7044 break; 7045 } 7046 7047 prevmp = nmp; 7048 } 7049 first_mp->b_next = nmp; 7050 7051 if (prevmp == NULL) { 7052 fep->itpfe_fraglist = first_mp; 7053 } else { 7054 prevmp->b_next = first_mp; 7055 } 7056 if (last) 7057 fep->itpfe_last = 1; 7058 7059 /* Part of defense for jolt2.c fragmentation attack */ 7060 if (++(fep->itpfe_depth) > IPSEC_MAX_FRAGS) { 7061 (void) fragcache_delentry(i, fep, frag); 7062 mutex_exit(&frag->itpf_lock); 7063 ip_drop_packet(first_mp, inbound, NULL, NULL, 7064 DROPPER(ipss, ipds_spd_max_frags), 7065 &ipss->ipsec_spd_dropper); 7066 return (NULL); 7067 } 7068 7069 /* Check for complete packet */ 7070 7071 if (!fep->itpfe_last) { 7072 mutex_exit(&frag->itpf_lock); 7073 #ifdef FRAGCACHE_DEBUG 7074 cmn_err(CE_WARN, "Fragment cached, not last.\n"); 7075 #endif 7076 return (NULL); 7077 } 7078 7079 #ifdef FRAGCACHE_DEBUG 7080 cmn_err(CE_WARN, "Last fragment cached.\n"); 7081 cmn_err(CE_WARN, "mp = %p, first_mp = %p.\n", mp, first_mp); 7082 #endif 7083 7084 offset = 0; 7085 for (mp = fep->itpfe_fraglist; mp; mp = mp->b_next) { 7086 mblk_t *data_mp = (inbound ? mp->b_cont : mp); 7087 int hdr_len; 7088 7089 oiph = (ipha_t *)data_mp->b_rptr; 7090 ip6h = NULL; 7091 iph = NULL; 7092 7093 if (IPH_HDR_VERSION(oiph) == IPV4_VERSION) { 7094 hdr_len = ((outer_hdr_len != 0) ? 7095 IPH_HDR_LENGTH(oiph) : 0); 7096 iph = (ipha_t *)(data_mp->b_rptr + hdr_len); 7097 } else { 7098 ASSERT(IPH_HDR_VERSION(oiph) == IPV6_VERSION); 7099 ASSERT(data_mp->b_cont == NULL); 7100 ip6h = (ip6_t *)data_mp->b_rptr; 7101 (void) ip_hdr_length_nexthdr_v6(data_mp, ip6h, 7102 &ip6_hdr_length, &v6_proto_p); 7103 hdr_len = ((outer_hdr_len != 0) ? ip6_hdr_length : 0); 7104 } 7105 7106 /* Calculate current fragment start/end */ 7107 if (is_v4) { 7108 if (iph == NULL) { 7109 /* Was v6 outer */ 7110 iph = (ipha_t *)(data_mp->b_rptr + hdr_len); 7111 } 7112 firstbyte = V4_FRAG_OFFSET(iph); 7113 lastbyte = firstbyte + ntohs(iph->ipha_length) - 7114 IPH_HDR_LENGTH(iph); 7115 } else { 7116 ASSERT(data_mp->b_cont == NULL); 7117 ip6h = (ip6_t *)(data_mp->b_rptr + hdr_len); 7118 if (!ip_hdr_length_nexthdr_v6(data_mp, ip6h, 7119 &ip6_hdr_length, &v6_proto_p)) { 7120 mutex_exit(&frag->itpf_lock); 7121 ip_drop_packet_chain(mp, inbound, NULL, NULL, 7122 DROPPER(ipss, ipds_spd_malformed_frag), 7123 &ipss->ipsec_spd_dropper); 7124 return (NULL); 7125 } 7126 v6_proto = *v6_proto_p; 7127 bzero(&ipp, sizeof (ipp)); 7128 (void) ip_find_hdr_v6(data_mp, ip6h, &ipp, NULL); 7129 fraghdr = ipp.ipp_fraghdr; 7130 firstbyte = ntohs(fraghdr->ip6f_offlg & 7131 IP6F_OFF_MASK); 7132 lastbyte = firstbyte + ntohs(ip6h->ip6_plen) + 7133 sizeof (ip6_t) - ip6_hdr_length; 7134 } 7135 7136 /* 7137 * If this fragment is greater than current offset, 7138 * we have a missing fragment so return NULL 7139 */ 7140 if (firstbyte > offset) { 7141 mutex_exit(&frag->itpf_lock); 7142 #ifdef FRAGCACHE_DEBUG 7143 /* 7144 * Note, this can happen when the last frag 7145 * gets sent through because it is smaller 7146 * than the MTU. It is not necessarily an 7147 * error condition. 7148 */ 7149 cmn_err(CE_WARN, "Frag greater than offset! : " 7150 "missing fragment: firstbyte = %d, offset = %d, " 7151 "mp = %p\n", firstbyte, offset, mp); 7152 #endif 7153 return (NULL); 7154 } 7155 7156 /* 7157 * If we are at the last fragment, we have the complete 7158 * packet, so rechain things and return it to caller 7159 * for processing 7160 */ 7161 7162 if ((is_v4 && !V4_MORE_FRAGS(iph)) || 7163 (!is_v4 && !(fraghdr->ip6f_offlg & IP6F_MORE_FRAG))) { 7164 mp = fep->itpfe_fraglist; 7165 fep->itpfe_fraglist = NULL; 7166 (void) fragcache_delentry(i, fep, frag); 7167 mutex_exit(&frag->itpf_lock); 7168 7169 if ((is_v4 && (firstbyte + ntohs(iph->ipha_length) > 7170 65535)) || (!is_v4 && (firstbyte + 7171 ntohs(ip6h->ip6_plen) > 65535))) { 7172 /* It is an invalid "ping-o-death" packet */ 7173 /* Discard it */ 7174 ip_drop_packet_chain(mp, inbound, NULL, NULL, 7175 DROPPER(ipss, ipds_spd_evil_frag), 7176 &ipss->ipsec_spd_dropper); 7177 return (NULL); 7178 } 7179 #ifdef FRAGCACHE_DEBUG 7180 cmn_err(CE_WARN, "Fragcache returning mp = %p, " 7181 "mp->b_next = %p", mp, mp->b_next); 7182 #endif 7183 /* 7184 * For inbound case, mp has ipsec_in b_next'd chain 7185 * For outbound case, it is just data mp chain 7186 */ 7187 return (mp); 7188 } 7189 7190 /* 7191 * Update new ending offset if this 7192 * fragment extends the packet 7193 */ 7194 if (offset < lastbyte) 7195 offset = lastbyte; 7196 } 7197 7198 mutex_exit(&frag->itpf_lock); 7199 7200 /* Didn't find last fragment, so return NULL */ 7201 return (NULL); 7202 } 7203 7204 static void 7205 ipsec_fragcache_clean(ipsec_fragcache_t *frag) 7206 { 7207 ipsec_fragcache_entry_t *fep; 7208 int i; 7209 ipsec_fragcache_entry_t *earlyfep = NULL; 7210 time_t itpf_time; 7211 int earlyexp; 7212 int earlyi = 0; 7213 7214 ASSERT(MUTEX_HELD(&frag->itpf_lock)); 7215 7216 itpf_time = gethrestime_sec(); 7217 earlyexp = itpf_time + 10000; 7218 7219 for (i = 0; i < IPSEC_FRAG_HASH_SLOTS; i++) { 7220 fep = (frag->itpf_ptr)[i]; 7221 while (fep) { 7222 if (fep->itpfe_exp < itpf_time) { 7223 /* found */ 7224 fep = fragcache_delentry(i, fep, frag); 7225 } else { 7226 if (fep->itpfe_exp < earlyexp) { 7227 earlyfep = fep; 7228 earlyexp = fep->itpfe_exp; 7229 earlyi = i; 7230 } 7231 fep = fep->itpfe_next; 7232 } 7233 } 7234 } 7235 7236 frag->itpf_expire_hint = earlyexp; 7237 7238 /* if (!found) */ 7239 if (frag->itpf_freelist == NULL) 7240 (void) fragcache_delentry(earlyi, earlyfep, frag); 7241 } 7242 7243 static ipsec_fragcache_entry_t * 7244 fragcache_delentry(int slot, ipsec_fragcache_entry_t *fep, 7245 ipsec_fragcache_t *frag) 7246 { 7247 ipsec_fragcache_entry_t *targp; 7248 ipsec_fragcache_entry_t *nextp = fep->itpfe_next; 7249 7250 ASSERT(MUTEX_HELD(&frag->itpf_lock)); 7251 7252 /* Free up any fragment list still in cache entry */ 7253 ipsec_freemsg_chain(fep->itpfe_fraglist); 7254 7255 targp = (frag->itpf_ptr)[slot]; 7256 ASSERT(targp != 0); 7257 7258 if (targp == fep) { 7259 /* unlink from head of hash chain */ 7260 (frag->itpf_ptr)[slot] = nextp; 7261 /* link into free list */ 7262 fep->itpfe_next = frag->itpf_freelist; 7263 frag->itpf_freelist = fep; 7264 return (nextp); 7265 } 7266 7267 /* maybe should use double linked list to make update faster */ 7268 /* must be past front of chain */ 7269 while (targp) { 7270 if (targp->itpfe_next == fep) { 7271 /* unlink from hash chain */ 7272 targp->itpfe_next = nextp; 7273 /* link into free list */ 7274 fep->itpfe_next = frag->itpf_freelist; 7275 frag->itpf_freelist = fep; 7276 return (nextp); 7277 } 7278 targp = targp->itpfe_next; 7279 ASSERT(targp != 0); 7280 } 7281 /* NOTREACHED */ 7282 return (NULL); 7283 } 7284