1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * IPsec Security Policy Database. 28 * 29 * This module maintains the SPD and provides routines used by ip and ip6 30 * to apply IPsec policy to inbound and outbound datagrams. 31 */ 32 33 #include <sys/types.h> 34 #include <sys/stream.h> 35 #include <sys/stropts.h> 36 #include <sys/sysmacros.h> 37 #include <sys/strsubr.h> 38 #include <sys/strlog.h> 39 #include <sys/cmn_err.h> 40 #include <sys/zone.h> 41 42 #include <sys/systm.h> 43 #include <sys/param.h> 44 #include <sys/kmem.h> 45 #include <sys/ddi.h> 46 47 #include <sys/crypto/api.h> 48 49 #include <inet/common.h> 50 #include <inet/mi.h> 51 52 #include <netinet/ip6.h> 53 #include <netinet/icmp6.h> 54 #include <netinet/udp.h> 55 56 #include <inet/ip.h> 57 #include <inet/ip6.h> 58 59 #include <net/pfkeyv2.h> 60 #include <net/pfpolicy.h> 61 #include <inet/ipsec_info.h> 62 #include <inet/sadb.h> 63 #include <inet/ipsec_impl.h> 64 65 #include <inet/ip_impl.h> /* For IP_MOD_ID */ 66 67 #include <inet/ipsecah.h> 68 #include <inet/ipsecesp.h> 69 #include <inet/ipdrop.h> 70 #include <inet/ipclassifier.h> 71 #include <inet/iptun.h> 72 #include <inet/iptun/iptun_impl.h> 73 74 static void ipsec_update_present_flags(ipsec_stack_t *); 75 static ipsec_act_t *ipsec_act_wildcard_expand(ipsec_act_t *, uint_t *, 76 netstack_t *); 77 static void ipsec_out_free(void *); 78 static void ipsec_in_free(void *); 79 static mblk_t *ipsec_attach_global_policy(mblk_t **, conn_t *, 80 ipsec_selector_t *, netstack_t *); 81 static mblk_t *ipsec_apply_global_policy(mblk_t *, conn_t *, 82 ipsec_selector_t *, netstack_t *); 83 static mblk_t *ipsec_check_ipsecin_policy(mblk_t *, ipsec_policy_t *, 84 ipha_t *, ip6_t *, uint64_t, netstack_t *); 85 static void ipsec_in_release_refs(ipsec_in_t *); 86 static void ipsec_out_release_refs(ipsec_out_t *); 87 static void ipsec_action_free_table(ipsec_action_t *); 88 static void ipsec_action_reclaim(void *); 89 static void ipsec_action_reclaim_stack(netstack_t *); 90 static void ipsid_init(netstack_t *); 91 static void ipsid_fini(netstack_t *); 92 93 /* sel_flags values for ipsec_init_inbound_sel(). */ 94 #define SEL_NONE 0x0000 95 #define SEL_PORT_POLICY 0x0001 96 #define SEL_IS_ICMP 0x0002 97 #define SEL_TUNNEL_MODE 0x0004 98 #define SEL_POST_FRAG 0x0008 99 100 /* Return values for ipsec_init_inbound_sel(). */ 101 typedef enum { SELRET_NOMEM, SELRET_BADPKT, SELRET_SUCCESS, SELRET_TUNFRAG} 102 selret_t; 103 104 static selret_t ipsec_init_inbound_sel(ipsec_selector_t *, mblk_t *, 105 ipha_t *, ip6_t *, uint8_t); 106 107 static boolean_t ipsec_check_ipsecin_action(struct ipsec_in_s *, mblk_t *, 108 struct ipsec_action_s *, ipha_t *ipha, ip6_t *ip6h, const char **, 109 kstat_named_t **); 110 static void ipsec_unregister_prov_update(void); 111 static void ipsec_prov_update_callback_stack(uint32_t, void *, netstack_t *); 112 static boolean_t ipsec_compare_action(ipsec_policy_t *, ipsec_policy_t *); 113 static uint32_t selector_hash(ipsec_selector_t *, ipsec_policy_root_t *); 114 static boolean_t ipsec_kstat_init(ipsec_stack_t *); 115 static void ipsec_kstat_destroy(ipsec_stack_t *); 116 static int ipsec_free_tables(ipsec_stack_t *); 117 static int tunnel_compare(const void *, const void *); 118 static void ipsec_freemsg_chain(mblk_t *); 119 static void ip_drop_packet_chain(mblk_t *, boolean_t, ill_t *, ire_t *, 120 struct kstat_named *, ipdropper_t *); 121 static boolean_t ipsec_kstat_init(ipsec_stack_t *); 122 static void ipsec_kstat_destroy(ipsec_stack_t *); 123 static int ipsec_free_tables(ipsec_stack_t *); 124 static int tunnel_compare(const void *, const void *); 125 static void ipsec_freemsg_chain(mblk_t *); 126 static void ip_drop_packet_chain(mblk_t *, boolean_t, ill_t *, ire_t *, 127 struct kstat_named *, ipdropper_t *); 128 129 /* 130 * Selector hash table is statically sized at module load time. 131 * we default to 251 buckets, which is the largest prime number under 255 132 */ 133 134 #define IPSEC_SPDHASH_DEFAULT 251 135 136 /* SPD hash-size tunable per tunnel. */ 137 #define TUN_SPDHASH_DEFAULT 5 138 139 uint32_t ipsec_spd_hashsize; 140 uint32_t tun_spd_hashsize; 141 142 #define IPSEC_SEL_NOHASH ((uint32_t)(~0)) 143 144 /* 145 * Handle global across all stack instances 146 */ 147 static crypto_notify_handle_t prov_update_handle = NULL; 148 149 static kmem_cache_t *ipsec_action_cache; 150 static kmem_cache_t *ipsec_sel_cache; 151 static kmem_cache_t *ipsec_pol_cache; 152 static kmem_cache_t *ipsec_info_cache; 153 154 /* Frag cache prototypes */ 155 static void ipsec_fragcache_clean(ipsec_fragcache_t *); 156 static ipsec_fragcache_entry_t *fragcache_delentry(int, 157 ipsec_fragcache_entry_t *, ipsec_fragcache_t *); 158 boolean_t ipsec_fragcache_init(ipsec_fragcache_t *); 159 void ipsec_fragcache_uninit(ipsec_fragcache_t *); 160 mblk_t *ipsec_fragcache_add(ipsec_fragcache_t *, mblk_t *, mblk_t *, int, 161 ipsec_stack_t *); 162 163 int ipsec_hdr_pullup_needed = 0; 164 int ipsec_weird_null_inbound_policy = 0; 165 166 #define ALGBITS_ROUND_DOWN(x, align) (((x)/(align))*(align)) 167 #define ALGBITS_ROUND_UP(x, align) ALGBITS_ROUND_DOWN((x)+(align)-1, align) 168 169 /* 170 * Inbound traffic should have matching identities for both SA's. 171 */ 172 173 #define SA_IDS_MATCH(sa1, sa2) \ 174 (((sa1) == NULL) || ((sa2) == NULL) || \ 175 (((sa1)->ipsa_src_cid == (sa2)->ipsa_src_cid) && \ 176 (((sa1)->ipsa_dst_cid == (sa2)->ipsa_dst_cid)))) 177 178 /* 179 * IPv6 Fragments 180 */ 181 #define IS_V6_FRAGMENT(ipp) (ipp.ipp_fields & IPPF_FRAGHDR) 182 183 /* 184 * Policy failure messages. 185 */ 186 static char *ipsec_policy_failure_msgs[] = { 187 188 /* IPSEC_POLICY_NOT_NEEDED */ 189 "%s: Dropping the datagram because the incoming packet " 190 "is %s, but the recipient expects clear; Source %s, " 191 "Destination %s.\n", 192 193 /* IPSEC_POLICY_MISMATCH */ 194 "%s: Policy Failure for the incoming packet (%s); Source %s, " 195 "Destination %s.\n", 196 197 /* IPSEC_POLICY_AUTH_NOT_NEEDED */ 198 "%s: Authentication present while not expected in the " 199 "incoming %s packet; Source %s, Destination %s.\n", 200 201 /* IPSEC_POLICY_ENCR_NOT_NEEDED */ 202 "%s: Encryption present while not expected in the " 203 "incoming %s packet; Source %s, Destination %s.\n", 204 205 /* IPSEC_POLICY_SE_NOT_NEEDED */ 206 "%s: Self-Encapsulation present while not expected in the " 207 "incoming %s packet; Source %s, Destination %s.\n", 208 }; 209 210 /* 211 * General overviews: 212 * 213 * Locking: 214 * 215 * All of the system policy structures are protected by a single 216 * rwlock. These structures are threaded in a 217 * fairly complex fashion and are not expected to change on a 218 * regular basis, so this should not cause scaling/contention 219 * problems. As a result, policy checks should (hopefully) be MT-hot. 220 * 221 * Allocation policy: 222 * 223 * We use custom kmem cache types for the various 224 * bits & pieces of the policy data structures. All allocations 225 * use KM_NOSLEEP instead of KM_SLEEP for policy allocation. The 226 * policy table is of potentially unbounded size, so we don't 227 * want to provide a way to hog all system memory with policy 228 * entries.. 229 */ 230 231 /* Convenient functions for freeing or dropping a b_next linked mblk chain */ 232 233 /* Free all messages in an mblk chain */ 234 static void 235 ipsec_freemsg_chain(mblk_t *mp) 236 { 237 mblk_t *mpnext; 238 while (mp != NULL) { 239 ASSERT(mp->b_prev == NULL); 240 mpnext = mp->b_next; 241 mp->b_next = NULL; 242 freemsg(mp); /* Always works, even if NULL */ 243 mp = mpnext; 244 } 245 } 246 247 /* ip_drop all messages in an mblk chain */ 248 static void 249 ip_drop_packet_chain(mblk_t *mp, boolean_t inbound, ill_t *arriving, 250 ire_t *outbound_ire, struct kstat_named *counter, ipdropper_t *who_called) 251 { 252 mblk_t *mpnext; 253 while (mp != NULL) { 254 ASSERT(mp->b_prev == NULL); 255 mpnext = mp->b_next; 256 mp->b_next = NULL; 257 ip_drop_packet(mp, inbound, arriving, outbound_ire, counter, 258 who_called); 259 mp = mpnext; 260 } 261 } 262 263 /* 264 * AVL tree comparison function. 265 * the in-kernel avl assumes unique keys for all objects. 266 * Since sometimes policy will duplicate rules, we may insert 267 * multiple rules with the same rule id, so we need a tie-breaker. 268 */ 269 static int 270 ipsec_policy_cmpbyid(const void *a, const void *b) 271 { 272 const ipsec_policy_t *ipa, *ipb; 273 uint64_t idxa, idxb; 274 275 ipa = (const ipsec_policy_t *)a; 276 ipb = (const ipsec_policy_t *)b; 277 idxa = ipa->ipsp_index; 278 idxb = ipb->ipsp_index; 279 280 if (idxa < idxb) 281 return (-1); 282 if (idxa > idxb) 283 return (1); 284 /* 285 * Tie-breaker #1: All installed policy rules have a non-NULL 286 * ipsl_sel (selector set), so an entry with a NULL ipsp_sel is not 287 * actually in-tree but rather a template node being used in 288 * an avl_find query; see ipsec_policy_delete(). This gives us 289 * a placeholder in the ordering just before the the first entry with 290 * a key >= the one we're looking for, so we can walk forward from 291 * that point to get the remaining entries with the same id. 292 */ 293 if ((ipa->ipsp_sel == NULL) && (ipb->ipsp_sel != NULL)) 294 return (-1); 295 if ((ipb->ipsp_sel == NULL) && (ipa->ipsp_sel != NULL)) 296 return (1); 297 /* 298 * At most one of the arguments to the comparison should have a 299 * NULL selector pointer; if not, the tree is broken. 300 */ 301 ASSERT(ipa->ipsp_sel != NULL); 302 ASSERT(ipb->ipsp_sel != NULL); 303 /* 304 * Tie-breaker #2: use the virtual address of the policy node 305 * to arbitrarily break ties. Since we use the new tree node in 306 * the avl_find() in ipsec_insert_always, the new node will be 307 * inserted into the tree in the right place in the sequence. 308 */ 309 if (ipa < ipb) 310 return (-1); 311 if (ipa > ipb) 312 return (1); 313 return (0); 314 } 315 316 /* 317 * Free what ipsec_alloc_table allocated. 318 */ 319 void 320 ipsec_polhead_free_table(ipsec_policy_head_t *iph) 321 { 322 int dir; 323 int i; 324 325 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 326 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 327 328 if (ipr->ipr_hash == NULL) 329 continue; 330 331 for (i = 0; i < ipr->ipr_nchains; i++) { 332 ASSERT(ipr->ipr_hash[i].hash_head == NULL); 333 } 334 kmem_free(ipr->ipr_hash, ipr->ipr_nchains * 335 sizeof (ipsec_policy_hash_t)); 336 ipr->ipr_hash = NULL; 337 } 338 } 339 340 void 341 ipsec_polhead_destroy(ipsec_policy_head_t *iph) 342 { 343 int dir; 344 345 avl_destroy(&iph->iph_rulebyid); 346 rw_destroy(&iph->iph_lock); 347 348 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 349 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 350 int chain; 351 352 for (chain = 0; chain < ipr->ipr_nchains; chain++) 353 mutex_destroy(&(ipr->ipr_hash[chain].hash_lock)); 354 355 } 356 ipsec_polhead_free_table(iph); 357 } 358 359 /* 360 * Free the IPsec stack instance. 361 */ 362 /* ARGSUSED */ 363 static void 364 ipsec_stack_fini(netstackid_t stackid, void *arg) 365 { 366 ipsec_stack_t *ipss = (ipsec_stack_t *)arg; 367 void *cookie; 368 ipsec_tun_pol_t *node; 369 netstack_t *ns = ipss->ipsec_netstack; 370 int i; 371 ipsec_algtype_t algtype; 372 373 ipsec_loader_destroy(ipss); 374 375 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_WRITER); 376 /* 377 * It's possible we can just ASSERT() the tree is empty. After all, 378 * we aren't called until IP is ready to unload (and presumably all 379 * tunnels have been unplumbed). But we'll play it safe for now, the 380 * loop will just exit immediately if it's empty. 381 */ 382 cookie = NULL; 383 while ((node = (ipsec_tun_pol_t *) 384 avl_destroy_nodes(&ipss->ipsec_tunnel_policies, 385 &cookie)) != NULL) { 386 ITP_REFRELE(node, ns); 387 } 388 avl_destroy(&ipss->ipsec_tunnel_policies); 389 rw_exit(&ipss->ipsec_tunnel_policy_lock); 390 rw_destroy(&ipss->ipsec_tunnel_policy_lock); 391 392 ipsec_config_flush(ns); 393 394 ipsec_kstat_destroy(ipss); 395 396 ip_drop_unregister(&ipss->ipsec_dropper); 397 398 ip_drop_unregister(&ipss->ipsec_spd_dropper); 399 ip_drop_destroy(ipss); 400 /* 401 * Globals start with ref == 1 to prevent IPPH_REFRELE() from 402 * attempting to free them, hence they should have 1 now. 403 */ 404 ipsec_polhead_destroy(&ipss->ipsec_system_policy); 405 ASSERT(ipss->ipsec_system_policy.iph_refs == 1); 406 ipsec_polhead_destroy(&ipss->ipsec_inactive_policy); 407 ASSERT(ipss->ipsec_inactive_policy.iph_refs == 1); 408 409 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) { 410 ipsec_action_free_table(ipss->ipsec_action_hash[i].hash_head); 411 ipss->ipsec_action_hash[i].hash_head = NULL; 412 mutex_destroy(&(ipss->ipsec_action_hash[i].hash_lock)); 413 } 414 415 for (i = 0; i < ipss->ipsec_spd_hashsize; i++) { 416 ASSERT(ipss->ipsec_sel_hash[i].hash_head == NULL); 417 mutex_destroy(&(ipss->ipsec_sel_hash[i].hash_lock)); 418 } 419 420 mutex_enter(&ipss->ipsec_alg_lock); 421 for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype ++) { 422 int nalgs = ipss->ipsec_nalgs[algtype]; 423 424 for (i = 0; i < nalgs; i++) { 425 if (ipss->ipsec_alglists[algtype][i] != NULL) 426 ipsec_alg_unreg(algtype, i, ns); 427 } 428 } 429 mutex_exit(&ipss->ipsec_alg_lock); 430 mutex_destroy(&ipss->ipsec_alg_lock); 431 432 ipsid_gc(ns); 433 ipsid_fini(ns); 434 435 (void) ipsec_free_tables(ipss); 436 kmem_free(ipss, sizeof (*ipss)); 437 } 438 439 void 440 ipsec_policy_g_destroy(void) 441 { 442 kmem_cache_destroy(ipsec_action_cache); 443 kmem_cache_destroy(ipsec_sel_cache); 444 kmem_cache_destroy(ipsec_pol_cache); 445 kmem_cache_destroy(ipsec_info_cache); 446 447 ipsec_unregister_prov_update(); 448 449 netstack_unregister(NS_IPSEC); 450 } 451 452 453 /* 454 * Free what ipsec_alloc_tables allocated. 455 * Called when table allocation fails to free the table. 456 */ 457 static int 458 ipsec_free_tables(ipsec_stack_t *ipss) 459 { 460 int i; 461 462 if (ipss->ipsec_sel_hash != NULL) { 463 for (i = 0; i < ipss->ipsec_spd_hashsize; i++) { 464 ASSERT(ipss->ipsec_sel_hash[i].hash_head == NULL); 465 } 466 kmem_free(ipss->ipsec_sel_hash, ipss->ipsec_spd_hashsize * 467 sizeof (*ipss->ipsec_sel_hash)); 468 ipss->ipsec_sel_hash = NULL; 469 ipss->ipsec_spd_hashsize = 0; 470 } 471 ipsec_polhead_free_table(&ipss->ipsec_system_policy); 472 ipsec_polhead_free_table(&ipss->ipsec_inactive_policy); 473 474 return (ENOMEM); 475 } 476 477 /* 478 * Attempt to allocate the tables in a single policy head. 479 * Return nonzero on failure after cleaning up any work in progress. 480 */ 481 int 482 ipsec_alloc_table(ipsec_policy_head_t *iph, int nchains, int kmflag, 483 boolean_t global_cleanup, netstack_t *ns) 484 { 485 int dir; 486 487 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 488 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 489 490 ipr->ipr_nchains = nchains; 491 ipr->ipr_hash = kmem_zalloc(nchains * 492 sizeof (ipsec_policy_hash_t), kmflag); 493 if (ipr->ipr_hash == NULL) 494 return (global_cleanup ? 495 ipsec_free_tables(ns->netstack_ipsec) : 496 ENOMEM); 497 } 498 return (0); 499 } 500 501 /* 502 * Attempt to allocate the various tables. Return nonzero on failure 503 * after cleaning up any work in progress. 504 */ 505 static int 506 ipsec_alloc_tables(int kmflag, netstack_t *ns) 507 { 508 int error; 509 ipsec_stack_t *ipss = ns->netstack_ipsec; 510 511 error = ipsec_alloc_table(&ipss->ipsec_system_policy, 512 ipss->ipsec_spd_hashsize, kmflag, B_TRUE, ns); 513 if (error != 0) 514 return (error); 515 516 error = ipsec_alloc_table(&ipss->ipsec_inactive_policy, 517 ipss->ipsec_spd_hashsize, kmflag, B_TRUE, ns); 518 if (error != 0) 519 return (error); 520 521 ipss->ipsec_sel_hash = kmem_zalloc(ipss->ipsec_spd_hashsize * 522 sizeof (*ipss->ipsec_sel_hash), kmflag); 523 524 if (ipss->ipsec_sel_hash == NULL) 525 return (ipsec_free_tables(ipss)); 526 527 return (0); 528 } 529 530 /* 531 * After table allocation, initialize a policy head. 532 */ 533 void 534 ipsec_polhead_init(ipsec_policy_head_t *iph, int nchains) 535 { 536 int dir, chain; 537 538 rw_init(&iph->iph_lock, NULL, RW_DEFAULT, NULL); 539 avl_create(&iph->iph_rulebyid, ipsec_policy_cmpbyid, 540 sizeof (ipsec_policy_t), offsetof(ipsec_policy_t, ipsp_byid)); 541 542 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 543 ipsec_policy_root_t *ipr = &iph->iph_root[dir]; 544 ipr->ipr_nchains = nchains; 545 546 for (chain = 0; chain < nchains; chain++) { 547 mutex_init(&(ipr->ipr_hash[chain].hash_lock), 548 NULL, MUTEX_DEFAULT, NULL); 549 } 550 } 551 } 552 553 static boolean_t 554 ipsec_kstat_init(ipsec_stack_t *ipss) 555 { 556 ipss->ipsec_ksp = kstat_create_netstack("ip", 0, "ipsec_stat", "net", 557 KSTAT_TYPE_NAMED, sizeof (ipsec_kstats_t) / sizeof (kstat_named_t), 558 KSTAT_FLAG_PERSISTENT, ipss->ipsec_netstack->netstack_stackid); 559 560 if (ipss->ipsec_ksp == NULL || ipss->ipsec_ksp->ks_data == NULL) 561 return (B_FALSE); 562 563 ipss->ipsec_kstats = ipss->ipsec_ksp->ks_data; 564 565 #define KI(x) kstat_named_init(&ipss->ipsec_kstats->x, #x, KSTAT_DATA_UINT64) 566 KI(esp_stat_in_requests); 567 KI(esp_stat_in_discards); 568 KI(esp_stat_lookup_failure); 569 KI(ah_stat_in_requests); 570 KI(ah_stat_in_discards); 571 KI(ah_stat_lookup_failure); 572 KI(sadb_acquire_maxpackets); 573 KI(sadb_acquire_qhiwater); 574 #undef KI 575 576 kstat_install(ipss->ipsec_ksp); 577 return (B_TRUE); 578 } 579 580 static void 581 ipsec_kstat_destroy(ipsec_stack_t *ipss) 582 { 583 kstat_delete_netstack(ipss->ipsec_ksp, 584 ipss->ipsec_netstack->netstack_stackid); 585 ipss->ipsec_kstats = NULL; 586 587 } 588 589 /* 590 * Initialize the IPsec stack instance. 591 */ 592 /* ARGSUSED */ 593 static void * 594 ipsec_stack_init(netstackid_t stackid, netstack_t *ns) 595 { 596 ipsec_stack_t *ipss; 597 int i; 598 599 ipss = (ipsec_stack_t *)kmem_zalloc(sizeof (*ipss), KM_SLEEP); 600 ipss->ipsec_netstack = ns; 601 602 /* 603 * FIXME: netstack_ipsec is used by some of the routines we call 604 * below, but it isn't set until this routine returns. 605 * Either we introduce optional xxx_stack_alloc() functions 606 * that will be called by the netstack framework before xxx_stack_init, 607 * or we switch spd.c and sadb.c to operate on ipsec_stack_t 608 * (latter has some include file order issues for sadb.h, but makes 609 * sense if we merge some of the ipsec related stack_t's together. 610 */ 611 ns->netstack_ipsec = ipss; 612 613 /* 614 * Make two attempts to allocate policy hash tables; try it at 615 * the "preferred" size (may be set in /etc/system) first, 616 * then fall back to the default size. 617 */ 618 ipss->ipsec_spd_hashsize = (ipsec_spd_hashsize == 0) ? 619 IPSEC_SPDHASH_DEFAULT : ipsec_spd_hashsize; 620 621 if (ipsec_alloc_tables(KM_NOSLEEP, ns) != 0) { 622 cmn_err(CE_WARN, 623 "Unable to allocate %d entry IPsec policy hash table", 624 ipss->ipsec_spd_hashsize); 625 ipss->ipsec_spd_hashsize = IPSEC_SPDHASH_DEFAULT; 626 cmn_err(CE_WARN, "Falling back to %d entries", 627 ipss->ipsec_spd_hashsize); 628 (void) ipsec_alloc_tables(KM_SLEEP, ns); 629 } 630 631 /* Just set a default for tunnels. */ 632 ipss->ipsec_tun_spd_hashsize = (tun_spd_hashsize == 0) ? 633 TUN_SPDHASH_DEFAULT : tun_spd_hashsize; 634 635 ipsid_init(ns); 636 /* 637 * Globals need ref == 1 to prevent IPPH_REFRELE() from attempting 638 * to free them. 639 */ 640 ipss->ipsec_system_policy.iph_refs = 1; 641 ipss->ipsec_inactive_policy.iph_refs = 1; 642 ipsec_polhead_init(&ipss->ipsec_system_policy, 643 ipss->ipsec_spd_hashsize); 644 ipsec_polhead_init(&ipss->ipsec_inactive_policy, 645 ipss->ipsec_spd_hashsize); 646 rw_init(&ipss->ipsec_tunnel_policy_lock, NULL, RW_DEFAULT, NULL); 647 avl_create(&ipss->ipsec_tunnel_policies, tunnel_compare, 648 sizeof (ipsec_tun_pol_t), 0); 649 650 ipss->ipsec_next_policy_index = 1; 651 652 rw_init(&ipss->ipsec_system_policy.iph_lock, NULL, RW_DEFAULT, NULL); 653 rw_init(&ipss->ipsec_inactive_policy.iph_lock, NULL, RW_DEFAULT, NULL); 654 655 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) 656 mutex_init(&(ipss->ipsec_action_hash[i].hash_lock), 657 NULL, MUTEX_DEFAULT, NULL); 658 659 for (i = 0; i < ipss->ipsec_spd_hashsize; i++) 660 mutex_init(&(ipss->ipsec_sel_hash[i].hash_lock), 661 NULL, MUTEX_DEFAULT, NULL); 662 663 mutex_init(&ipss->ipsec_alg_lock, NULL, MUTEX_DEFAULT, NULL); 664 for (i = 0; i < IPSEC_NALGTYPES; i++) { 665 ipss->ipsec_nalgs[i] = 0; 666 } 667 668 ip_drop_init(ipss); 669 ip_drop_register(&ipss->ipsec_spd_dropper, "IPsec SPD"); 670 671 /* IP's IPsec code calls the packet dropper */ 672 ip_drop_register(&ipss->ipsec_dropper, "IP IPsec processing"); 673 674 (void) ipsec_kstat_init(ipss); 675 676 ipsec_loader_init(ipss); 677 ipsec_loader_start(ipss); 678 679 return (ipss); 680 } 681 682 /* Global across all stack instances */ 683 void 684 ipsec_policy_g_init(void) 685 { 686 ipsec_action_cache = kmem_cache_create("ipsec_actions", 687 sizeof (ipsec_action_t), _POINTER_ALIGNMENT, NULL, NULL, 688 ipsec_action_reclaim, NULL, NULL, 0); 689 ipsec_sel_cache = kmem_cache_create("ipsec_selectors", 690 sizeof (ipsec_sel_t), _POINTER_ALIGNMENT, NULL, NULL, 691 NULL, NULL, NULL, 0); 692 ipsec_pol_cache = kmem_cache_create("ipsec_policy", 693 sizeof (ipsec_policy_t), _POINTER_ALIGNMENT, NULL, NULL, 694 NULL, NULL, NULL, 0); 695 ipsec_info_cache = kmem_cache_create("ipsec_info", 696 sizeof (ipsec_info_t), _POINTER_ALIGNMENT, NULL, NULL, 697 NULL, NULL, NULL, 0); 698 699 /* 700 * We want to be informed each time a stack is created or 701 * destroyed in the kernel, so we can maintain the 702 * set of ipsec_stack_t's. 703 */ 704 netstack_register(NS_IPSEC, ipsec_stack_init, NULL, ipsec_stack_fini); 705 } 706 707 /* 708 * Sort algorithm lists. 709 * 710 * I may need to split this based on 711 * authentication/encryption, and I may wish to have an administrator 712 * configure this list. Hold on to some NDD variables... 713 * 714 * XXX For now, sort on minimum key size (GAG!). While minimum key size is 715 * not the ideal metric, it's the only quantifiable measure available. 716 * We need a better metric for sorting algorithms by preference. 717 */ 718 static void 719 alg_insert_sortlist(enum ipsec_algtype at, uint8_t algid, netstack_t *ns) 720 { 721 ipsec_stack_t *ipss = ns->netstack_ipsec; 722 ipsec_alginfo_t *ai = ipss->ipsec_alglists[at][algid]; 723 uint8_t holder, swap; 724 uint_t i; 725 uint_t count = ipss->ipsec_nalgs[at]; 726 ASSERT(ai != NULL); 727 ASSERT(algid == ai->alg_id); 728 729 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 730 731 holder = algid; 732 733 for (i = 0; i < count - 1; i++) { 734 ipsec_alginfo_t *alt; 735 736 alt = ipss->ipsec_alglists[at][ipss->ipsec_sortlist[at][i]]; 737 /* 738 * If you want to give precedence to newly added algs, 739 * add the = in the > comparison. 740 */ 741 if ((holder != algid) || (ai->alg_minbits > alt->alg_minbits)) { 742 /* Swap sortlist[i] and holder. */ 743 swap = ipss->ipsec_sortlist[at][i]; 744 ipss->ipsec_sortlist[at][i] = holder; 745 holder = swap; 746 ai = alt; 747 } /* Else just continue. */ 748 } 749 750 /* Store holder in last slot. */ 751 ipss->ipsec_sortlist[at][i] = holder; 752 } 753 754 /* 755 * Remove an algorithm from a sorted algorithm list. 756 * This should be considerably easier, even with complex sorting. 757 */ 758 static void 759 alg_remove_sortlist(enum ipsec_algtype at, uint8_t algid, netstack_t *ns) 760 { 761 boolean_t copyback = B_FALSE; 762 int i; 763 ipsec_stack_t *ipss = ns->netstack_ipsec; 764 int newcount = ipss->ipsec_nalgs[at]; 765 766 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 767 768 for (i = 0; i <= newcount; i++) { 769 if (copyback) { 770 ipss->ipsec_sortlist[at][i-1] = 771 ipss->ipsec_sortlist[at][i]; 772 } else if (ipss->ipsec_sortlist[at][i] == algid) { 773 copyback = B_TRUE; 774 } 775 } 776 } 777 778 /* 779 * Add the specified algorithm to the algorithm tables. 780 * Must be called while holding the algorithm table writer lock. 781 */ 782 void 783 ipsec_alg_reg(ipsec_algtype_t algtype, ipsec_alginfo_t *alg, netstack_t *ns) 784 { 785 ipsec_stack_t *ipss = ns->netstack_ipsec; 786 787 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 788 789 ASSERT(ipss->ipsec_alglists[algtype][alg->alg_id] == NULL); 790 ipsec_alg_fix_min_max(alg, algtype, ns); 791 ipss->ipsec_alglists[algtype][alg->alg_id] = alg; 792 793 ipss->ipsec_nalgs[algtype]++; 794 alg_insert_sortlist(algtype, alg->alg_id, ns); 795 } 796 797 /* 798 * Remove the specified algorithm from the algorithm tables. 799 * Must be called while holding the algorithm table writer lock. 800 */ 801 void 802 ipsec_alg_unreg(ipsec_algtype_t algtype, uint8_t algid, netstack_t *ns) 803 { 804 ipsec_stack_t *ipss = ns->netstack_ipsec; 805 806 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 807 808 ASSERT(ipss->ipsec_alglists[algtype][algid] != NULL); 809 ipsec_alg_free(ipss->ipsec_alglists[algtype][algid]); 810 ipss->ipsec_alglists[algtype][algid] = NULL; 811 812 ipss->ipsec_nalgs[algtype]--; 813 alg_remove_sortlist(algtype, algid, ns); 814 } 815 816 /* 817 * Hooks for spdsock to get a grip on system policy. 818 */ 819 820 ipsec_policy_head_t * 821 ipsec_system_policy(netstack_t *ns) 822 { 823 ipsec_stack_t *ipss = ns->netstack_ipsec; 824 ipsec_policy_head_t *h = &ipss->ipsec_system_policy; 825 826 IPPH_REFHOLD(h); 827 return (h); 828 } 829 830 ipsec_policy_head_t * 831 ipsec_inactive_policy(netstack_t *ns) 832 { 833 ipsec_stack_t *ipss = ns->netstack_ipsec; 834 ipsec_policy_head_t *h = &ipss->ipsec_inactive_policy; 835 836 IPPH_REFHOLD(h); 837 return (h); 838 } 839 840 /* 841 * Lock inactive policy, then active policy, then exchange policy root 842 * pointers. 843 */ 844 void 845 ipsec_swap_policy(ipsec_policy_head_t *active, ipsec_policy_head_t *inactive, 846 netstack_t *ns) 847 { 848 int af, dir; 849 avl_tree_t r1, r2; 850 851 rw_enter(&inactive->iph_lock, RW_WRITER); 852 rw_enter(&active->iph_lock, RW_WRITER); 853 854 r1 = active->iph_rulebyid; 855 r2 = inactive->iph_rulebyid; 856 active->iph_rulebyid = r2; 857 inactive->iph_rulebyid = r1; 858 859 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 860 ipsec_policy_hash_t *h1, *h2; 861 862 h1 = active->iph_root[dir].ipr_hash; 863 h2 = inactive->iph_root[dir].ipr_hash; 864 active->iph_root[dir].ipr_hash = h2; 865 inactive->iph_root[dir].ipr_hash = h1; 866 867 for (af = 0; af < IPSEC_NAF; af++) { 868 ipsec_policy_t *t1, *t2; 869 870 t1 = active->iph_root[dir].ipr_nonhash[af]; 871 t2 = inactive->iph_root[dir].ipr_nonhash[af]; 872 active->iph_root[dir].ipr_nonhash[af] = t2; 873 inactive->iph_root[dir].ipr_nonhash[af] = t1; 874 if (t1 != NULL) { 875 t1->ipsp_hash.hash_pp = 876 &(inactive->iph_root[dir].ipr_nonhash[af]); 877 } 878 if (t2 != NULL) { 879 t2->ipsp_hash.hash_pp = 880 &(active->iph_root[dir].ipr_nonhash[af]); 881 } 882 883 } 884 } 885 active->iph_gen++; 886 inactive->iph_gen++; 887 ipsec_update_present_flags(ns->netstack_ipsec); 888 rw_exit(&active->iph_lock); 889 rw_exit(&inactive->iph_lock); 890 } 891 892 /* 893 * Swap global policy primary/secondary. 894 */ 895 void 896 ipsec_swap_global_policy(netstack_t *ns) 897 { 898 ipsec_stack_t *ipss = ns->netstack_ipsec; 899 900 ipsec_swap_policy(&ipss->ipsec_system_policy, 901 &ipss->ipsec_inactive_policy, ns); 902 } 903 904 /* 905 * Clone one policy rule.. 906 */ 907 static ipsec_policy_t * 908 ipsec_copy_policy(const ipsec_policy_t *src) 909 { 910 ipsec_policy_t *dst = kmem_cache_alloc(ipsec_pol_cache, KM_NOSLEEP); 911 912 if (dst == NULL) 913 return (NULL); 914 915 /* 916 * Adjust refcounts of cloned state. 917 */ 918 IPACT_REFHOLD(src->ipsp_act); 919 src->ipsp_sel->ipsl_refs++; 920 921 HASH_NULL(dst, ipsp_hash); 922 dst->ipsp_refs = 1; 923 dst->ipsp_sel = src->ipsp_sel; 924 dst->ipsp_act = src->ipsp_act; 925 dst->ipsp_prio = src->ipsp_prio; 926 dst->ipsp_index = src->ipsp_index; 927 928 return (dst); 929 } 930 931 void 932 ipsec_insert_always(avl_tree_t *tree, void *new_node) 933 { 934 void *node; 935 avl_index_t where; 936 937 node = avl_find(tree, new_node, &where); 938 ASSERT(node == NULL); 939 avl_insert(tree, new_node, where); 940 } 941 942 943 static int 944 ipsec_copy_chain(ipsec_policy_head_t *dph, ipsec_policy_t *src, 945 ipsec_policy_t **dstp) 946 { 947 for (; src != NULL; src = src->ipsp_hash.hash_next) { 948 ipsec_policy_t *dst = ipsec_copy_policy(src); 949 if (dst == NULL) 950 return (ENOMEM); 951 952 HASHLIST_INSERT(dst, ipsp_hash, *dstp); 953 ipsec_insert_always(&dph->iph_rulebyid, dst); 954 } 955 return (0); 956 } 957 958 959 960 /* 961 * Make one policy head look exactly like another. 962 * 963 * As with ipsec_swap_policy, we lock the destination policy head first, then 964 * the source policy head. Note that we only need to read-lock the source 965 * policy head as we are not changing it. 966 */ 967 int 968 ipsec_copy_polhead(ipsec_policy_head_t *sph, ipsec_policy_head_t *dph, 969 netstack_t *ns) 970 { 971 int af, dir, chain, nchains; 972 973 rw_enter(&dph->iph_lock, RW_WRITER); 974 975 ipsec_polhead_flush(dph, ns); 976 977 rw_enter(&sph->iph_lock, RW_READER); 978 979 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 980 ipsec_policy_root_t *dpr = &dph->iph_root[dir]; 981 ipsec_policy_root_t *spr = &sph->iph_root[dir]; 982 nchains = dpr->ipr_nchains; 983 984 ASSERT(dpr->ipr_nchains == spr->ipr_nchains); 985 986 for (af = 0; af < IPSEC_NAF; af++) { 987 if (ipsec_copy_chain(dph, spr->ipr_nonhash[af], 988 &dpr->ipr_nonhash[af])) 989 goto abort_copy; 990 } 991 992 for (chain = 0; chain < nchains; chain++) { 993 if (ipsec_copy_chain(dph, 994 spr->ipr_hash[chain].hash_head, 995 &dpr->ipr_hash[chain].hash_head)) 996 goto abort_copy; 997 } 998 } 999 1000 dph->iph_gen++; 1001 1002 rw_exit(&sph->iph_lock); 1003 rw_exit(&dph->iph_lock); 1004 return (0); 1005 1006 abort_copy: 1007 ipsec_polhead_flush(dph, ns); 1008 rw_exit(&sph->iph_lock); 1009 rw_exit(&dph->iph_lock); 1010 return (ENOMEM); 1011 } 1012 1013 /* 1014 * Clone currently active policy to the inactive policy list. 1015 */ 1016 int 1017 ipsec_clone_system_policy(netstack_t *ns) 1018 { 1019 ipsec_stack_t *ipss = ns->netstack_ipsec; 1020 1021 return (ipsec_copy_polhead(&ipss->ipsec_system_policy, 1022 &ipss->ipsec_inactive_policy, ns)); 1023 } 1024 1025 /* 1026 * Extract the string from ipsec_policy_failure_msgs[type] and 1027 * log it. 1028 * 1029 */ 1030 void 1031 ipsec_log_policy_failure(int type, char *func_name, ipha_t *ipha, ip6_t *ip6h, 1032 boolean_t secure, netstack_t *ns) 1033 { 1034 char sbuf[INET6_ADDRSTRLEN]; 1035 char dbuf[INET6_ADDRSTRLEN]; 1036 char *s; 1037 char *d; 1038 ipsec_stack_t *ipss = ns->netstack_ipsec; 1039 1040 ASSERT((ipha == NULL && ip6h != NULL) || 1041 (ip6h == NULL && ipha != NULL)); 1042 1043 if (ipha != NULL) { 1044 s = inet_ntop(AF_INET, &ipha->ipha_src, sbuf, sizeof (sbuf)); 1045 d = inet_ntop(AF_INET, &ipha->ipha_dst, dbuf, sizeof (dbuf)); 1046 } else { 1047 s = inet_ntop(AF_INET6, &ip6h->ip6_src, sbuf, sizeof (sbuf)); 1048 d = inet_ntop(AF_INET6, &ip6h->ip6_dst, dbuf, sizeof (dbuf)); 1049 1050 } 1051 1052 /* Always bump the policy failure counter. */ 1053 ipss->ipsec_policy_failure_count[type]++; 1054 1055 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, SL_ERROR|SL_WARN|SL_CONSOLE, 1056 ipsec_policy_failure_msgs[type], func_name, 1057 (secure ? "secure" : "not secure"), s, d); 1058 } 1059 1060 /* 1061 * Rate-limiting front-end to strlog() for AH and ESP. Uses the ndd variables 1062 * in /dev/ip and the same rate-limiting clock so that there's a single 1063 * knob to turn to throttle the rate of messages. 1064 */ 1065 void 1066 ipsec_rl_strlog(netstack_t *ns, short mid, short sid, char level, ushort_t sl, 1067 char *fmt, ...) 1068 { 1069 va_list adx; 1070 hrtime_t current = gethrtime(); 1071 ip_stack_t *ipst = ns->netstack_ip; 1072 ipsec_stack_t *ipss = ns->netstack_ipsec; 1073 1074 sl |= SL_CONSOLE; 1075 /* 1076 * Throttle logging to stop syslog from being swamped. If variable 1077 * 'ipsec_policy_log_interval' is zero, don't log any messages at 1078 * all, otherwise log only one message every 'ipsec_policy_log_interval' 1079 * msec. Convert interval (in msec) to hrtime (in nsec). 1080 */ 1081 1082 if (ipst->ips_ipsec_policy_log_interval) { 1083 if (ipss->ipsec_policy_failure_last + 1084 ((hrtime_t)ipst->ips_ipsec_policy_log_interval * 1085 (hrtime_t)1000000) <= current) { 1086 va_start(adx, fmt); 1087 (void) vstrlog(mid, sid, level, sl, fmt, adx); 1088 va_end(adx); 1089 ipss->ipsec_policy_failure_last = current; 1090 } 1091 } 1092 } 1093 1094 void 1095 ipsec_config_flush(netstack_t *ns) 1096 { 1097 ipsec_stack_t *ipss = ns->netstack_ipsec; 1098 1099 rw_enter(&ipss->ipsec_system_policy.iph_lock, RW_WRITER); 1100 ipsec_polhead_flush(&ipss->ipsec_system_policy, ns); 1101 ipss->ipsec_next_policy_index = 1; 1102 rw_exit(&ipss->ipsec_system_policy.iph_lock); 1103 ipsec_action_reclaim_stack(ns); 1104 } 1105 1106 /* 1107 * Clip a policy's min/max keybits vs. the capabilities of the 1108 * algorithm. 1109 */ 1110 static void 1111 act_alg_adjust(uint_t algtype, uint_t algid, 1112 uint16_t *minbits, uint16_t *maxbits, netstack_t *ns) 1113 { 1114 ipsec_stack_t *ipss = ns->netstack_ipsec; 1115 ipsec_alginfo_t *algp = ipss->ipsec_alglists[algtype][algid]; 1116 1117 if (algp != NULL) { 1118 /* 1119 * If passed-in minbits is zero, we assume the caller trusts 1120 * us with setting the minimum key size. We pick the 1121 * algorithms DEFAULT key size for the minimum in this case. 1122 */ 1123 if (*minbits == 0) { 1124 *minbits = algp->alg_default_bits; 1125 ASSERT(*minbits >= algp->alg_minbits); 1126 } else { 1127 *minbits = MAX(MIN(*minbits, algp->alg_maxbits), 1128 algp->alg_minbits); 1129 } 1130 if (*maxbits == 0) 1131 *maxbits = algp->alg_maxbits; 1132 else 1133 *maxbits = MIN(MAX(*maxbits, algp->alg_minbits), 1134 algp->alg_maxbits); 1135 ASSERT(*minbits <= *maxbits); 1136 } else { 1137 *minbits = 0; 1138 *maxbits = 0; 1139 } 1140 } 1141 1142 /* 1143 * Check an action's requested algorithms against the algorithms currently 1144 * loaded in the system. 1145 */ 1146 boolean_t 1147 ipsec_check_action(ipsec_act_t *act, int *diag, netstack_t *ns) 1148 { 1149 ipsec_prot_t *ipp; 1150 ipsec_stack_t *ipss = ns->netstack_ipsec; 1151 1152 ipp = &act->ipa_apply; 1153 1154 if (ipp->ipp_use_ah && 1155 ipss->ipsec_alglists[IPSEC_ALG_AUTH][ipp->ipp_auth_alg] == NULL) { 1156 *diag = SPD_DIAGNOSTIC_UNSUPP_AH_ALG; 1157 return (B_FALSE); 1158 } 1159 if (ipp->ipp_use_espa && 1160 ipss->ipsec_alglists[IPSEC_ALG_AUTH][ipp->ipp_esp_auth_alg] == 1161 NULL) { 1162 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_AUTH_ALG; 1163 return (B_FALSE); 1164 } 1165 if (ipp->ipp_use_esp && 1166 ipss->ipsec_alglists[IPSEC_ALG_ENCR][ipp->ipp_encr_alg] == NULL) { 1167 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_ENCR_ALG; 1168 return (B_FALSE); 1169 } 1170 1171 act_alg_adjust(IPSEC_ALG_AUTH, ipp->ipp_auth_alg, 1172 &ipp->ipp_ah_minbits, &ipp->ipp_ah_maxbits, ns); 1173 act_alg_adjust(IPSEC_ALG_AUTH, ipp->ipp_esp_auth_alg, 1174 &ipp->ipp_espa_minbits, &ipp->ipp_espa_maxbits, ns); 1175 act_alg_adjust(IPSEC_ALG_ENCR, ipp->ipp_encr_alg, 1176 &ipp->ipp_espe_minbits, &ipp->ipp_espe_maxbits, ns); 1177 1178 if (ipp->ipp_ah_minbits > ipp->ipp_ah_maxbits) { 1179 *diag = SPD_DIAGNOSTIC_UNSUPP_AH_KEYSIZE; 1180 return (B_FALSE); 1181 } 1182 if (ipp->ipp_espa_minbits > ipp->ipp_espa_maxbits) { 1183 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_AUTH_KEYSIZE; 1184 return (B_FALSE); 1185 } 1186 if (ipp->ipp_espe_minbits > ipp->ipp_espe_maxbits) { 1187 *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_ENCR_KEYSIZE; 1188 return (B_FALSE); 1189 } 1190 /* TODO: sanity check lifetimes */ 1191 return (B_TRUE); 1192 } 1193 1194 /* 1195 * Set up a single action during wildcard expansion.. 1196 */ 1197 static void 1198 ipsec_setup_act(ipsec_act_t *outact, ipsec_act_t *act, 1199 uint_t auth_alg, uint_t encr_alg, uint_t eauth_alg, netstack_t *ns) 1200 { 1201 ipsec_prot_t *ipp; 1202 1203 *outact = *act; 1204 ipp = &outact->ipa_apply; 1205 ipp->ipp_auth_alg = (uint8_t)auth_alg; 1206 ipp->ipp_encr_alg = (uint8_t)encr_alg; 1207 ipp->ipp_esp_auth_alg = (uint8_t)eauth_alg; 1208 1209 act_alg_adjust(IPSEC_ALG_AUTH, auth_alg, 1210 &ipp->ipp_ah_minbits, &ipp->ipp_ah_maxbits, ns); 1211 act_alg_adjust(IPSEC_ALG_AUTH, eauth_alg, 1212 &ipp->ipp_espa_minbits, &ipp->ipp_espa_maxbits, ns); 1213 act_alg_adjust(IPSEC_ALG_ENCR, encr_alg, 1214 &ipp->ipp_espe_minbits, &ipp->ipp_espe_maxbits, ns); 1215 } 1216 1217 /* 1218 * combinatoric expansion time: expand a wildcarded action into an 1219 * array of wildcarded actions; we return the exploded action list, 1220 * and return a count in *nact (output only). 1221 */ 1222 static ipsec_act_t * 1223 ipsec_act_wildcard_expand(ipsec_act_t *act, uint_t *nact, netstack_t *ns) 1224 { 1225 boolean_t use_ah, use_esp, use_espa; 1226 boolean_t wild_auth, wild_encr, wild_eauth; 1227 uint_t auth_alg, auth_idx, auth_min, auth_max; 1228 uint_t eauth_alg, eauth_idx, eauth_min, eauth_max; 1229 uint_t encr_alg, encr_idx, encr_min, encr_max; 1230 uint_t action_count, ai; 1231 ipsec_act_t *outact; 1232 ipsec_stack_t *ipss = ns->netstack_ipsec; 1233 1234 if (act->ipa_type != IPSEC_ACT_APPLY) { 1235 outact = kmem_alloc(sizeof (*act), KM_NOSLEEP); 1236 *nact = 1; 1237 if (outact != NULL) 1238 bcopy(act, outact, sizeof (*act)); 1239 return (outact); 1240 } 1241 /* 1242 * compute the combinatoric explosion.. 1243 * 1244 * we assume a request for encr if esp_req is PREF_REQUIRED 1245 * we assume a request for ah auth if ah_req is PREF_REQUIRED. 1246 * we assume a request for esp auth if !ah and esp_req is PREF_REQUIRED 1247 */ 1248 1249 use_ah = act->ipa_apply.ipp_use_ah; 1250 use_esp = act->ipa_apply.ipp_use_esp; 1251 use_espa = act->ipa_apply.ipp_use_espa; 1252 auth_alg = act->ipa_apply.ipp_auth_alg; 1253 eauth_alg = act->ipa_apply.ipp_esp_auth_alg; 1254 encr_alg = act->ipa_apply.ipp_encr_alg; 1255 1256 wild_auth = use_ah && (auth_alg == 0); 1257 wild_eauth = use_espa && (eauth_alg == 0); 1258 wild_encr = use_esp && (encr_alg == 0); 1259 1260 action_count = 1; 1261 auth_min = auth_max = auth_alg; 1262 eauth_min = eauth_max = eauth_alg; 1263 encr_min = encr_max = encr_alg; 1264 1265 /* 1266 * set up for explosion.. for each dimension, expand output 1267 * size by the explosion factor. 1268 * 1269 * Don't include the "any" algorithms, if defined, as no 1270 * kernel policies should be set for these algorithms. 1271 */ 1272 1273 #define SET_EXP_MINMAX(type, wild, alg, min, max, ipss) \ 1274 if (wild) { \ 1275 int nalgs = ipss->ipsec_nalgs[type]; \ 1276 if (ipss->ipsec_alglists[type][alg] != NULL) \ 1277 nalgs--; \ 1278 action_count *= nalgs; \ 1279 min = 0; \ 1280 max = ipss->ipsec_nalgs[type] - 1; \ 1281 } 1282 1283 SET_EXP_MINMAX(IPSEC_ALG_AUTH, wild_auth, SADB_AALG_NONE, 1284 auth_min, auth_max, ipss); 1285 SET_EXP_MINMAX(IPSEC_ALG_AUTH, wild_eauth, SADB_AALG_NONE, 1286 eauth_min, eauth_max, ipss); 1287 SET_EXP_MINMAX(IPSEC_ALG_ENCR, wild_encr, SADB_EALG_NONE, 1288 encr_min, encr_max, ipss); 1289 1290 #undef SET_EXP_MINMAX 1291 1292 /* 1293 * ok, allocate the whole mess.. 1294 */ 1295 1296 outact = kmem_alloc(sizeof (*outact) * action_count, KM_NOSLEEP); 1297 if (outact == NULL) 1298 return (NULL); 1299 1300 /* 1301 * Now compute all combinations. Note that non-wildcarded 1302 * dimensions just get a single value from auth_min, while 1303 * wildcarded dimensions indirect through the sortlist. 1304 * 1305 * We do encryption outermost since, at this time, there's 1306 * greater difference in security and performance between 1307 * encryption algorithms vs. authentication algorithms. 1308 */ 1309 1310 ai = 0; 1311 1312 #define WHICH_ALG(type, wild, idx, ipss) \ 1313 ((wild)?(ipss->ipsec_sortlist[type][idx]):(idx)) 1314 1315 for (encr_idx = encr_min; encr_idx <= encr_max; encr_idx++) { 1316 encr_alg = WHICH_ALG(IPSEC_ALG_ENCR, wild_encr, encr_idx, ipss); 1317 if (wild_encr && encr_alg == SADB_EALG_NONE) 1318 continue; 1319 for (auth_idx = auth_min; auth_idx <= auth_max; auth_idx++) { 1320 auth_alg = WHICH_ALG(IPSEC_ALG_AUTH, wild_auth, 1321 auth_idx, ipss); 1322 if (wild_auth && auth_alg == SADB_AALG_NONE) 1323 continue; 1324 for (eauth_idx = eauth_min; eauth_idx <= eauth_max; 1325 eauth_idx++) { 1326 eauth_alg = WHICH_ALG(IPSEC_ALG_AUTH, 1327 wild_eauth, eauth_idx, ipss); 1328 if (wild_eauth && eauth_alg == SADB_AALG_NONE) 1329 continue; 1330 1331 ipsec_setup_act(&outact[ai], act, 1332 auth_alg, encr_alg, eauth_alg, ns); 1333 ai++; 1334 } 1335 } 1336 } 1337 1338 #undef WHICH_ALG 1339 1340 ASSERT(ai == action_count); 1341 *nact = action_count; 1342 return (outact); 1343 } 1344 1345 /* 1346 * Extract the parts of an ipsec_prot_t from an old-style ipsec_req_t. 1347 */ 1348 static void 1349 ipsec_prot_from_req(const ipsec_req_t *req, ipsec_prot_t *ipp) 1350 { 1351 bzero(ipp, sizeof (*ipp)); 1352 /* 1353 * ipp_use_* are bitfields. Look at "!!" in the following as a 1354 * "boolean canonicalization" operator. 1355 */ 1356 ipp->ipp_use_ah = !!(req->ipsr_ah_req & IPSEC_PREF_REQUIRED); 1357 ipp->ipp_use_esp = !!(req->ipsr_esp_req & IPSEC_PREF_REQUIRED); 1358 ipp->ipp_use_espa = !!(req->ipsr_esp_auth_alg); 1359 ipp->ipp_use_se = !!(req->ipsr_self_encap_req & IPSEC_PREF_REQUIRED); 1360 ipp->ipp_use_unique = !!((req->ipsr_ah_req|req->ipsr_esp_req) & 1361 IPSEC_PREF_UNIQUE); 1362 ipp->ipp_encr_alg = req->ipsr_esp_alg; 1363 /* 1364 * SADB_AALG_ANY is a placeholder to distinguish "any" from 1365 * "none" above. If auth is required, as determined above, 1366 * SADB_AALG_ANY becomes 0, which is the representation 1367 * of "any" and "none" in PF_KEY v2. 1368 */ 1369 ipp->ipp_auth_alg = (req->ipsr_auth_alg != SADB_AALG_ANY) ? 1370 req->ipsr_auth_alg : 0; 1371 ipp->ipp_esp_auth_alg = (req->ipsr_esp_auth_alg != SADB_AALG_ANY) ? 1372 req->ipsr_esp_auth_alg : 0; 1373 } 1374 1375 /* 1376 * Extract a new-style action from a request. 1377 */ 1378 void 1379 ipsec_actvec_from_req(const ipsec_req_t *req, ipsec_act_t **actp, uint_t *nactp, 1380 netstack_t *ns) 1381 { 1382 struct ipsec_act act; 1383 1384 bzero(&act, sizeof (act)); 1385 if ((req->ipsr_ah_req & IPSEC_PREF_NEVER) && 1386 (req->ipsr_esp_req & IPSEC_PREF_NEVER)) { 1387 act.ipa_type = IPSEC_ACT_BYPASS; 1388 } else { 1389 act.ipa_type = IPSEC_ACT_APPLY; 1390 ipsec_prot_from_req(req, &act.ipa_apply); 1391 } 1392 *actp = ipsec_act_wildcard_expand(&act, nactp, ns); 1393 } 1394 1395 /* 1396 * Convert a new-style "prot" back to an ipsec_req_t (more backwards compat). 1397 * We assume caller has already zero'ed *req for us. 1398 */ 1399 static int 1400 ipsec_req_from_prot(ipsec_prot_t *ipp, ipsec_req_t *req) 1401 { 1402 req->ipsr_esp_alg = ipp->ipp_encr_alg; 1403 req->ipsr_auth_alg = ipp->ipp_auth_alg; 1404 req->ipsr_esp_auth_alg = ipp->ipp_esp_auth_alg; 1405 1406 if (ipp->ipp_use_unique) { 1407 req->ipsr_ah_req |= IPSEC_PREF_UNIQUE; 1408 req->ipsr_esp_req |= IPSEC_PREF_UNIQUE; 1409 } 1410 if (ipp->ipp_use_se) 1411 req->ipsr_self_encap_req |= IPSEC_PREF_REQUIRED; 1412 if (ipp->ipp_use_ah) 1413 req->ipsr_ah_req |= IPSEC_PREF_REQUIRED; 1414 if (ipp->ipp_use_esp) 1415 req->ipsr_esp_req |= IPSEC_PREF_REQUIRED; 1416 return (sizeof (*req)); 1417 } 1418 1419 /* 1420 * Convert a new-style action back to an ipsec_req_t (more backwards compat). 1421 * We assume caller has already zero'ed *req for us. 1422 */ 1423 static int 1424 ipsec_req_from_act(ipsec_action_t *ap, ipsec_req_t *req) 1425 { 1426 switch (ap->ipa_act.ipa_type) { 1427 case IPSEC_ACT_BYPASS: 1428 req->ipsr_ah_req = IPSEC_PREF_NEVER; 1429 req->ipsr_esp_req = IPSEC_PREF_NEVER; 1430 return (sizeof (*req)); 1431 case IPSEC_ACT_APPLY: 1432 return (ipsec_req_from_prot(&ap->ipa_act.ipa_apply, req)); 1433 } 1434 return (sizeof (*req)); 1435 } 1436 1437 /* 1438 * Convert a new-style action back to an ipsec_req_t (more backwards compat). 1439 * We assume caller has already zero'ed *req for us. 1440 */ 1441 int 1442 ipsec_req_from_head(ipsec_policy_head_t *ph, ipsec_req_t *req, int af) 1443 { 1444 ipsec_policy_t *p; 1445 1446 /* 1447 * FULL-PERSOCK: consult hash table, too? 1448 */ 1449 for (p = ph->iph_root[IPSEC_INBOUND].ipr_nonhash[af]; 1450 p != NULL; 1451 p = p->ipsp_hash.hash_next) { 1452 if ((p->ipsp_sel->ipsl_key.ipsl_valid & IPSL_WILDCARD) == 0) 1453 return (ipsec_req_from_act(p->ipsp_act, req)); 1454 } 1455 return (sizeof (*req)); 1456 } 1457 1458 /* 1459 * Based on per-socket or latched policy, convert to an appropriate 1460 * IP_SEC_OPT ipsec_req_t for the socket option; return size so we can 1461 * be tail-called from ip. 1462 */ 1463 int 1464 ipsec_req_from_conn(conn_t *connp, ipsec_req_t *req, int af) 1465 { 1466 ipsec_latch_t *ipl; 1467 int rv = sizeof (ipsec_req_t); 1468 1469 bzero(req, sizeof (*req)); 1470 1471 mutex_enter(&connp->conn_lock); 1472 ipl = connp->conn_latch; 1473 1474 /* 1475 * Find appropriate policy. First choice is latched action; 1476 * failing that, see latched policy; failing that, 1477 * look at configured policy. 1478 */ 1479 if (ipl != NULL) { 1480 if (ipl->ipl_in_action != NULL) { 1481 rv = ipsec_req_from_act(ipl->ipl_in_action, req); 1482 goto done; 1483 } 1484 if (ipl->ipl_in_policy != NULL) { 1485 rv = ipsec_req_from_act(ipl->ipl_in_policy->ipsp_act, 1486 req); 1487 goto done; 1488 } 1489 } 1490 if (connp->conn_policy != NULL) 1491 rv = ipsec_req_from_head(connp->conn_policy, req, af); 1492 done: 1493 mutex_exit(&connp->conn_lock); 1494 return (rv); 1495 } 1496 1497 void 1498 ipsec_actvec_free(ipsec_act_t *act, uint_t nact) 1499 { 1500 kmem_free(act, nact * sizeof (*act)); 1501 } 1502 1503 /* 1504 * When outbound policy is not cached, look it up the hard way and attach 1505 * an ipsec_out_t to the packet.. 1506 */ 1507 static mblk_t * 1508 ipsec_attach_global_policy(mblk_t **mp, conn_t *connp, ipsec_selector_t *sel, 1509 netstack_t *ns) 1510 { 1511 ipsec_policy_t *p; 1512 1513 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, sel, ns); 1514 1515 if (p == NULL) 1516 return (NULL); 1517 return (ipsec_attach_ipsec_out(mp, connp, p, sel->ips_protocol, ns)); 1518 } 1519 1520 /* 1521 * We have an ipsec_out already, but don't have cached policy; fill it in 1522 * with the right actions. 1523 */ 1524 static mblk_t * 1525 ipsec_apply_global_policy(mblk_t *ipsec_mp, conn_t *connp, 1526 ipsec_selector_t *sel, netstack_t *ns) 1527 { 1528 ipsec_out_t *io; 1529 ipsec_policy_t *p; 1530 1531 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 1532 ASSERT(ipsec_mp->b_cont->b_datap->db_type == M_DATA); 1533 1534 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1535 1536 if (io->ipsec_out_policy == NULL) { 1537 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, io, sel, ns); 1538 io->ipsec_out_policy = p; 1539 } 1540 return (ipsec_mp); 1541 } 1542 1543 1544 /* 1545 * Consumes a reference to ipsp. 1546 */ 1547 static mblk_t * 1548 ipsec_check_loopback_policy(mblk_t *first_mp, boolean_t mctl_present, 1549 ipsec_policy_t *ipsp) 1550 { 1551 mblk_t *ipsec_mp; 1552 ipsec_in_t *ii; 1553 netstack_t *ns; 1554 1555 if (!mctl_present) 1556 return (first_mp); 1557 1558 ipsec_mp = first_mp; 1559 1560 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1561 ns = ii->ipsec_in_ns; 1562 ASSERT(ii->ipsec_in_loopback); 1563 IPPOL_REFRELE(ipsp, ns); 1564 1565 /* 1566 * We should do an actual policy check here. Revisit this 1567 * when we revisit the IPsec API. (And pass a conn_t in when we 1568 * get there.) 1569 */ 1570 1571 return (first_mp); 1572 } 1573 1574 /* 1575 * Check that packet's inbound ports & proto match the selectors 1576 * expected by the SAs it traversed on the way in. 1577 */ 1578 static boolean_t 1579 ipsec_check_ipsecin_unique(ipsec_in_t *ii, const char **reason, 1580 kstat_named_t **counter, uint64_t pkt_unique) 1581 { 1582 uint64_t ah_mask, esp_mask; 1583 ipsa_t *ah_assoc; 1584 ipsa_t *esp_assoc; 1585 netstack_t *ns = ii->ipsec_in_ns; 1586 ipsec_stack_t *ipss = ns->netstack_ipsec; 1587 1588 ASSERT(ii->ipsec_in_secure); 1589 ASSERT(!ii->ipsec_in_loopback); 1590 1591 ah_assoc = ii->ipsec_in_ah_sa; 1592 esp_assoc = ii->ipsec_in_esp_sa; 1593 ASSERT((ah_assoc != NULL) || (esp_assoc != NULL)); 1594 1595 ah_mask = (ah_assoc != NULL) ? ah_assoc->ipsa_unique_mask : 0; 1596 esp_mask = (esp_assoc != NULL) ? esp_assoc->ipsa_unique_mask : 0; 1597 1598 if ((ah_mask == 0) && (esp_mask == 0)) 1599 return (B_TRUE); 1600 1601 /* 1602 * The pkt_unique check will also check for tunnel mode on the SA 1603 * vs. the tunneled_packet boolean. "Be liberal in what you receive" 1604 * should not apply in this case. ;) 1605 */ 1606 1607 if (ah_mask != 0 && 1608 ah_assoc->ipsa_unique_id != (pkt_unique & ah_mask)) { 1609 *reason = "AH inner header mismatch"; 1610 *counter = DROPPER(ipss, ipds_spd_ah_innermismatch); 1611 return (B_FALSE); 1612 } 1613 if (esp_mask != 0 && 1614 esp_assoc->ipsa_unique_id != (pkt_unique & esp_mask)) { 1615 *reason = "ESP inner header mismatch"; 1616 *counter = DROPPER(ipss, ipds_spd_esp_innermismatch); 1617 return (B_FALSE); 1618 } 1619 return (B_TRUE); 1620 } 1621 1622 static boolean_t 1623 ipsec_check_ipsecin_action(ipsec_in_t *ii, mblk_t *mp, ipsec_action_t *ap, 1624 ipha_t *ipha, ip6_t *ip6h, const char **reason, kstat_named_t **counter) 1625 { 1626 boolean_t ret = B_TRUE; 1627 ipsec_prot_t *ipp; 1628 ipsa_t *ah_assoc; 1629 ipsa_t *esp_assoc; 1630 boolean_t decaps; 1631 netstack_t *ns = ii->ipsec_in_ns; 1632 ipsec_stack_t *ipss = ns->netstack_ipsec; 1633 1634 ASSERT((ipha == NULL && ip6h != NULL) || 1635 (ip6h == NULL && ipha != NULL)); 1636 1637 if (ii->ipsec_in_loopback) { 1638 /* 1639 * Besides accepting pointer-equivalent actions, we also 1640 * accept any ICMP errors we generated for ourselves, 1641 * regardless of policy. If we do not wish to make this 1642 * assumption in the future, check here, and where 1643 * icmp_loopback is initialized in ip.c and ip6.c. (Look for 1644 * ipsec_out_icmp_loopback.) 1645 */ 1646 if (ap == ii->ipsec_in_action || ii->ipsec_in_icmp_loopback) 1647 return (B_TRUE); 1648 1649 /* Deep compare necessary here?? */ 1650 *counter = DROPPER(ipss, ipds_spd_loopback_mismatch); 1651 *reason = "loopback policy mismatch"; 1652 return (B_FALSE); 1653 } 1654 ASSERT(!ii->ipsec_in_icmp_loopback); 1655 1656 ah_assoc = ii->ipsec_in_ah_sa; 1657 esp_assoc = ii->ipsec_in_esp_sa; 1658 1659 decaps = ii->ipsec_in_decaps; 1660 1661 switch (ap->ipa_act.ipa_type) { 1662 case IPSEC_ACT_DISCARD: 1663 case IPSEC_ACT_REJECT: 1664 /* Should "fail hard" */ 1665 *counter = DROPPER(ipss, ipds_spd_explicit); 1666 *reason = "blocked by policy"; 1667 return (B_FALSE); 1668 1669 case IPSEC_ACT_BYPASS: 1670 case IPSEC_ACT_CLEAR: 1671 *counter = DROPPER(ipss, ipds_spd_got_secure); 1672 *reason = "expected clear, got protected"; 1673 return (B_FALSE); 1674 1675 case IPSEC_ACT_APPLY: 1676 ipp = &ap->ipa_act.ipa_apply; 1677 /* 1678 * As of now we do the simple checks of whether 1679 * the datagram has gone through the required IPSEC 1680 * protocol constraints or not. We might have more 1681 * in the future like sensitive levels, key bits, etc. 1682 * If it fails the constraints, check whether we would 1683 * have accepted this if it had come in clear. 1684 */ 1685 if (ipp->ipp_use_ah) { 1686 if (ah_assoc == NULL) { 1687 ret = ipsec_inbound_accept_clear(mp, ipha, 1688 ip6h); 1689 *counter = DROPPER(ipss, ipds_spd_got_clear); 1690 *reason = "unprotected not accepted"; 1691 break; 1692 } 1693 ASSERT(ah_assoc != NULL); 1694 ASSERT(ipp->ipp_auth_alg != 0); 1695 1696 if (ah_assoc->ipsa_auth_alg != 1697 ipp->ipp_auth_alg) { 1698 *counter = DROPPER(ipss, ipds_spd_bad_ahalg); 1699 *reason = "unacceptable ah alg"; 1700 ret = B_FALSE; 1701 break; 1702 } 1703 } else if (ah_assoc != NULL) { 1704 /* 1705 * Don't allow this. Check IPSEC NOTE above 1706 * ip_fanout_proto(). 1707 */ 1708 *counter = DROPPER(ipss, ipds_spd_got_ah); 1709 *reason = "unexpected AH"; 1710 ret = B_FALSE; 1711 break; 1712 } 1713 if (ipp->ipp_use_esp) { 1714 if (esp_assoc == NULL) { 1715 ret = ipsec_inbound_accept_clear(mp, ipha, 1716 ip6h); 1717 *counter = DROPPER(ipss, ipds_spd_got_clear); 1718 *reason = "unprotected not accepted"; 1719 break; 1720 } 1721 ASSERT(esp_assoc != NULL); 1722 ASSERT(ipp->ipp_encr_alg != 0); 1723 1724 if (esp_assoc->ipsa_encr_alg != 1725 ipp->ipp_encr_alg) { 1726 *counter = DROPPER(ipss, ipds_spd_bad_espealg); 1727 *reason = "unacceptable esp alg"; 1728 ret = B_FALSE; 1729 break; 1730 } 1731 /* 1732 * If the client does not need authentication, 1733 * we don't verify the alogrithm. 1734 */ 1735 if (ipp->ipp_use_espa) { 1736 if (esp_assoc->ipsa_auth_alg != 1737 ipp->ipp_esp_auth_alg) { 1738 *counter = DROPPER(ipss, 1739 ipds_spd_bad_espaalg); 1740 *reason = "unacceptable esp auth alg"; 1741 ret = B_FALSE; 1742 break; 1743 } 1744 } 1745 } else if (esp_assoc != NULL) { 1746 /* 1747 * Don't allow this. Check IPSEC NOTE above 1748 * ip_fanout_proto(). 1749 */ 1750 *counter = DROPPER(ipss, ipds_spd_got_esp); 1751 *reason = "unexpected ESP"; 1752 ret = B_FALSE; 1753 break; 1754 } 1755 if (ipp->ipp_use_se) { 1756 if (!decaps) { 1757 ret = ipsec_inbound_accept_clear(mp, ipha, 1758 ip6h); 1759 if (!ret) { 1760 /* XXX mutant? */ 1761 *counter = DROPPER(ipss, 1762 ipds_spd_bad_selfencap); 1763 *reason = "self encap not found"; 1764 break; 1765 } 1766 } 1767 } else if (decaps) { 1768 /* 1769 * XXX If the packet comes in tunneled and the 1770 * recipient does not expect it to be tunneled, it 1771 * is okay. But we drop to be consistent with the 1772 * other cases. 1773 */ 1774 *counter = DROPPER(ipss, ipds_spd_got_selfencap); 1775 *reason = "unexpected self encap"; 1776 ret = B_FALSE; 1777 break; 1778 } 1779 if (ii->ipsec_in_action != NULL) { 1780 /* 1781 * This can happen if we do a double policy-check on 1782 * a packet 1783 * XXX XXX should fix this case! 1784 */ 1785 IPACT_REFRELE(ii->ipsec_in_action); 1786 } 1787 ASSERT(ii->ipsec_in_action == NULL); 1788 IPACT_REFHOLD(ap); 1789 ii->ipsec_in_action = ap; 1790 break; /* from switch */ 1791 } 1792 return (ret); 1793 } 1794 1795 static boolean_t 1796 spd_match_inbound_ids(ipsec_latch_t *ipl, ipsa_t *sa) 1797 { 1798 ASSERT(ipl->ipl_ids_latched == B_TRUE); 1799 return ipsid_equal(ipl->ipl_remote_cid, sa->ipsa_src_cid) && 1800 ipsid_equal(ipl->ipl_local_cid, sa->ipsa_dst_cid); 1801 } 1802 1803 /* 1804 * Takes a latched conn and an inbound packet and returns a unique_id suitable 1805 * for SA comparisons. Most of the time we will copy from the conn_t, but 1806 * there are cases when the conn_t is latched but it has wildcard selectors, 1807 * and then we need to fallback to scooping them out of the packet. 1808 * 1809 * Assume we'll never have 0 with a conn_t present, so use 0 as a failure. We 1810 * can get away with this because we only have non-zero ports/proto for 1811 * latched conn_ts. 1812 * 1813 * Ideal candidate for an "inline" keyword, as we're JUST convoluted enough 1814 * to not be a nice macro. 1815 */ 1816 static uint64_t 1817 conn_to_unique(conn_t *connp, mblk_t *data_mp, ipha_t *ipha, ip6_t *ip6h) 1818 { 1819 ipsec_selector_t sel; 1820 uint8_t ulp = connp->conn_ulp; 1821 1822 ASSERT(connp->conn_latch->ipl_in_policy != NULL); 1823 1824 if ((ulp == IPPROTO_TCP || ulp == IPPROTO_UDP || ulp == IPPROTO_SCTP) && 1825 (connp->conn_fport == 0 || connp->conn_lport == 0)) { 1826 /* Slow path - we gotta grab from the packet. */ 1827 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, 1828 SEL_NONE) != SELRET_SUCCESS) { 1829 /* Failure -> have caller free packet with ENOMEM. */ 1830 return (0); 1831 } 1832 return (SA_UNIQUE_ID(sel.ips_remote_port, sel.ips_local_port, 1833 sel.ips_protocol, 0)); 1834 } 1835 1836 #ifdef DEBUG_NOT_UNTIL_6478464 1837 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, SEL_NONE) == 1838 SELRET_SUCCESS) { 1839 ASSERT(sel.ips_local_port == connp->conn_lport); 1840 ASSERT(sel.ips_remote_port == connp->conn_fport); 1841 ASSERT(sel.ips_protocol == connp->conn_ulp); 1842 } 1843 ASSERT(connp->conn_ulp != 0); 1844 #endif 1845 1846 return (SA_UNIQUE_ID(connp->conn_fport, connp->conn_lport, ulp, 0)); 1847 } 1848 1849 /* 1850 * Called to check policy on a latched connection, both from this file 1851 * and from tcp.c 1852 */ 1853 boolean_t 1854 ipsec_check_ipsecin_latch(ipsec_in_t *ii, mblk_t *mp, ipsec_latch_t *ipl, 1855 ipha_t *ipha, ip6_t *ip6h, const char **reason, kstat_named_t **counter, 1856 conn_t *connp) 1857 { 1858 netstack_t *ns = ii->ipsec_in_ns; 1859 ipsec_stack_t *ipss = ns->netstack_ipsec; 1860 1861 ASSERT(ipl->ipl_ids_latched == B_TRUE); 1862 1863 if (!ii->ipsec_in_loopback) { 1864 /* 1865 * Over loopback, there aren't real security associations, 1866 * so there are neither identities nor "unique" values 1867 * for us to check the packet against. 1868 */ 1869 if ((ii->ipsec_in_ah_sa != NULL) && 1870 (!spd_match_inbound_ids(ipl, ii->ipsec_in_ah_sa))) { 1871 *counter = DROPPER(ipss, ipds_spd_ah_badid); 1872 *reason = "AH identity mismatch"; 1873 return (B_FALSE); 1874 } 1875 1876 if ((ii->ipsec_in_esp_sa != NULL) && 1877 (!spd_match_inbound_ids(ipl, ii->ipsec_in_esp_sa))) { 1878 *counter = DROPPER(ipss, ipds_spd_esp_badid); 1879 *reason = "ESP identity mismatch"; 1880 return (B_FALSE); 1881 } 1882 1883 /* 1884 * Can fudge pkt_unique from connp because we're latched. 1885 * In DEBUG kernels (see conn_to_unique()'s implementation), 1886 * verify this even if it REALLY slows things down. 1887 */ 1888 if (!ipsec_check_ipsecin_unique(ii, reason, counter, 1889 conn_to_unique(connp, mp, ipha, ip6h))) { 1890 return (B_FALSE); 1891 } 1892 } 1893 1894 return (ipsec_check_ipsecin_action(ii, mp, ipl->ipl_in_action, 1895 ipha, ip6h, reason, counter)); 1896 } 1897 1898 /* 1899 * Check to see whether this secured datagram meets the policy 1900 * constraints specified in ipsp. 1901 * 1902 * Called from ipsec_check_global_policy, and ipsec_check_inbound_policy. 1903 * 1904 * Consumes a reference to ipsp. 1905 */ 1906 static mblk_t * 1907 ipsec_check_ipsecin_policy(mblk_t *first_mp, ipsec_policy_t *ipsp, 1908 ipha_t *ipha, ip6_t *ip6h, uint64_t pkt_unique, netstack_t *ns) 1909 { 1910 ipsec_in_t *ii; 1911 ipsec_action_t *ap; 1912 const char *reason = "no policy actions found"; 1913 mblk_t *data_mp, *ipsec_mp; 1914 ipsec_stack_t *ipss = ns->netstack_ipsec; 1915 ip_stack_t *ipst = ns->netstack_ip; 1916 kstat_named_t *counter; 1917 1918 counter = DROPPER(ipss, ipds_spd_got_secure); 1919 1920 data_mp = first_mp->b_cont; 1921 ipsec_mp = first_mp; 1922 1923 ASSERT(ipsp != NULL); 1924 1925 ASSERT((ipha == NULL && ip6h != NULL) || 1926 (ip6h == NULL && ipha != NULL)); 1927 1928 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1929 1930 if (ii->ipsec_in_loopback) 1931 return (ipsec_check_loopback_policy(first_mp, B_TRUE, ipsp)); 1932 ASSERT(ii->ipsec_in_type == IPSEC_IN); 1933 ASSERT(ii->ipsec_in_secure); 1934 1935 if (ii->ipsec_in_action != NULL) { 1936 /* 1937 * this can happen if we do a double policy-check on a packet 1938 * Would be nice to be able to delete this test.. 1939 */ 1940 IPACT_REFRELE(ii->ipsec_in_action); 1941 } 1942 ASSERT(ii->ipsec_in_action == NULL); 1943 1944 if (!SA_IDS_MATCH(ii->ipsec_in_ah_sa, ii->ipsec_in_esp_sa)) { 1945 reason = "inbound AH and ESP identities differ"; 1946 counter = DROPPER(ipss, ipds_spd_ahesp_diffid); 1947 goto drop; 1948 } 1949 1950 if (!ipsec_check_ipsecin_unique(ii, &reason, &counter, pkt_unique)) 1951 goto drop; 1952 1953 /* 1954 * Ok, now loop through the possible actions and see if any 1955 * of them work for us. 1956 */ 1957 1958 for (ap = ipsp->ipsp_act; ap != NULL; ap = ap->ipa_next) { 1959 if (ipsec_check_ipsecin_action(ii, data_mp, ap, 1960 ipha, ip6h, &reason, &counter)) { 1961 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 1962 IPPOL_REFRELE(ipsp, ns); 1963 return (first_mp); 1964 } 1965 } 1966 drop: 1967 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, SL_ERROR|SL_WARN|SL_CONSOLE, 1968 "ipsec inbound policy mismatch: %s, packet dropped\n", 1969 reason); 1970 IPPOL_REFRELE(ipsp, ns); 1971 ASSERT(ii->ipsec_in_action == NULL); 1972 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 1973 ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, 1974 &ipss->ipsec_spd_dropper); 1975 return (NULL); 1976 } 1977 1978 /* 1979 * sleazy prefix-length-based compare. 1980 * another inlining candidate.. 1981 */ 1982 boolean_t 1983 ip_addr_match(uint8_t *addr1, int pfxlen, in6_addr_t *addr2p) 1984 { 1985 int offset = pfxlen>>3; 1986 int bitsleft = pfxlen & 7; 1987 uint8_t *addr2 = (uint8_t *)addr2p; 1988 1989 /* 1990 * and there was much evil.. 1991 * XXX should inline-expand the bcmp here and do this 32 bits 1992 * or 64 bits at a time.. 1993 */ 1994 return ((bcmp(addr1, addr2, offset) == 0) && 1995 ((bitsleft == 0) || 1996 (((addr1[offset] ^ addr2[offset]) & (0xff<<(8-bitsleft))) == 0))); 1997 } 1998 1999 static ipsec_policy_t * 2000 ipsec_find_policy_chain(ipsec_policy_t *best, ipsec_policy_t *chain, 2001 ipsec_selector_t *sel, boolean_t is_icmp_inv_acq) 2002 { 2003 ipsec_selkey_t *isel; 2004 ipsec_policy_t *p; 2005 int bpri = best ? best->ipsp_prio : 0; 2006 2007 for (p = chain; p != NULL; p = p->ipsp_hash.hash_next) { 2008 uint32_t valid; 2009 2010 if (p->ipsp_prio <= bpri) 2011 continue; 2012 isel = &p->ipsp_sel->ipsl_key; 2013 valid = isel->ipsl_valid; 2014 2015 if ((valid & IPSL_PROTOCOL) && 2016 (isel->ipsl_proto != sel->ips_protocol)) 2017 continue; 2018 2019 if ((valid & IPSL_REMOTE_ADDR) && 2020 !ip_addr_match((uint8_t *)&isel->ipsl_remote, 2021 isel->ipsl_remote_pfxlen, &sel->ips_remote_addr_v6)) 2022 continue; 2023 2024 if ((valid & IPSL_LOCAL_ADDR) && 2025 !ip_addr_match((uint8_t *)&isel->ipsl_local, 2026 isel->ipsl_local_pfxlen, &sel->ips_local_addr_v6)) 2027 continue; 2028 2029 if ((valid & IPSL_REMOTE_PORT) && 2030 isel->ipsl_rport != sel->ips_remote_port) 2031 continue; 2032 2033 if ((valid & IPSL_LOCAL_PORT) && 2034 isel->ipsl_lport != sel->ips_local_port) 2035 continue; 2036 2037 if (!is_icmp_inv_acq) { 2038 if ((valid & IPSL_ICMP_TYPE) && 2039 (isel->ipsl_icmp_type > sel->ips_icmp_type || 2040 isel->ipsl_icmp_type_end < sel->ips_icmp_type)) { 2041 continue; 2042 } 2043 2044 if ((valid & IPSL_ICMP_CODE) && 2045 (isel->ipsl_icmp_code > sel->ips_icmp_code || 2046 isel->ipsl_icmp_code_end < 2047 sel->ips_icmp_code)) { 2048 continue; 2049 } 2050 } else { 2051 /* 2052 * special case for icmp inverse acquire 2053 * we only want policies that aren't drop/pass 2054 */ 2055 if (p->ipsp_act->ipa_act.ipa_type != IPSEC_ACT_APPLY) 2056 continue; 2057 } 2058 2059 /* we matched all the packet-port-field selectors! */ 2060 best = p; 2061 bpri = p->ipsp_prio; 2062 } 2063 2064 return (best); 2065 } 2066 2067 /* 2068 * Try to find and return the best policy entry under a given policy 2069 * root for a given set of selectors; the first parameter "best" is 2070 * the current best policy so far. If "best" is non-null, we have a 2071 * reference to it. We return a reference to a policy; if that policy 2072 * is not the original "best", we need to release that reference 2073 * before returning. 2074 */ 2075 ipsec_policy_t * 2076 ipsec_find_policy_head(ipsec_policy_t *best, ipsec_policy_head_t *head, 2077 int direction, ipsec_selector_t *sel, netstack_t *ns) 2078 { 2079 ipsec_policy_t *curbest; 2080 ipsec_policy_root_t *root; 2081 uint8_t is_icmp_inv_acq = sel->ips_is_icmp_inv_acq; 2082 int af = sel->ips_isv4 ? IPSEC_AF_V4 : IPSEC_AF_V6; 2083 2084 curbest = best; 2085 root = &head->iph_root[direction]; 2086 2087 #ifdef DEBUG 2088 if (is_icmp_inv_acq) { 2089 if (sel->ips_isv4) { 2090 if (sel->ips_protocol != IPPROTO_ICMP) { 2091 cmn_err(CE_WARN, "ipsec_find_policy_head:" 2092 " expecting icmp, got %d", 2093 sel->ips_protocol); 2094 } 2095 } else { 2096 if (sel->ips_protocol != IPPROTO_ICMPV6) { 2097 cmn_err(CE_WARN, "ipsec_find_policy_head:" 2098 " expecting icmpv6, got %d", 2099 sel->ips_protocol); 2100 } 2101 } 2102 } 2103 #endif 2104 2105 rw_enter(&head->iph_lock, RW_READER); 2106 2107 if (root->ipr_nchains > 0) { 2108 curbest = ipsec_find_policy_chain(curbest, 2109 root->ipr_hash[selector_hash(sel, root)].hash_head, sel, 2110 is_icmp_inv_acq); 2111 } 2112 curbest = ipsec_find_policy_chain(curbest, root->ipr_nonhash[af], sel, 2113 is_icmp_inv_acq); 2114 2115 /* 2116 * Adjust reference counts if we found anything new. 2117 */ 2118 if (curbest != best) { 2119 ASSERT(curbest != NULL); 2120 IPPOL_REFHOLD(curbest); 2121 2122 if (best != NULL) { 2123 IPPOL_REFRELE(best, ns); 2124 } 2125 } 2126 2127 rw_exit(&head->iph_lock); 2128 2129 return (curbest); 2130 } 2131 2132 /* 2133 * Find the best system policy (either global or per-interface) which 2134 * applies to the given selector; look in all the relevant policy roots 2135 * to figure out which policy wins. 2136 * 2137 * Returns a reference to a policy; caller must release this 2138 * reference when done. 2139 */ 2140 ipsec_policy_t * 2141 ipsec_find_policy(int direction, conn_t *connp, ipsec_out_t *io, 2142 ipsec_selector_t *sel, netstack_t *ns) 2143 { 2144 ipsec_policy_t *p; 2145 ipsec_stack_t *ipss = ns->netstack_ipsec; 2146 2147 p = ipsec_find_policy_head(NULL, &ipss->ipsec_system_policy, 2148 direction, sel, ns); 2149 if ((connp != NULL) && (connp->conn_policy != NULL)) { 2150 p = ipsec_find_policy_head(p, connp->conn_policy, 2151 direction, sel, ns); 2152 } else if ((io != NULL) && (io->ipsec_out_polhead != NULL)) { 2153 p = ipsec_find_policy_head(p, io->ipsec_out_polhead, 2154 direction, sel, ns); 2155 } 2156 2157 return (p); 2158 } 2159 2160 /* 2161 * Check with global policy and see whether this inbound 2162 * packet meets the policy constraints. 2163 * 2164 * Locate appropriate policy from global policy, supplemented by the 2165 * conn's configured and/or cached policy if the conn is supplied. 2166 * 2167 * Dispatch to ipsec_check_ipsecin_policy if we have policy and an 2168 * encrypted packet to see if they match. 2169 * 2170 * Otherwise, see if the policy allows cleartext; if not, drop it on the 2171 * floor. 2172 */ 2173 mblk_t * 2174 ipsec_check_global_policy(mblk_t *first_mp, conn_t *connp, 2175 ipha_t *ipha, ip6_t *ip6h, boolean_t mctl_present, netstack_t *ns) 2176 { 2177 ipsec_policy_t *p; 2178 ipsec_selector_t sel; 2179 mblk_t *data_mp, *ipsec_mp; 2180 boolean_t policy_present; 2181 kstat_named_t *counter; 2182 ipsec_in_t *ii = NULL; 2183 uint64_t pkt_unique; 2184 ipsec_stack_t *ipss = ns->netstack_ipsec; 2185 ip_stack_t *ipst = ns->netstack_ip; 2186 2187 data_mp = mctl_present ? first_mp->b_cont : first_mp; 2188 ipsec_mp = mctl_present ? first_mp : NULL; 2189 2190 sel.ips_is_icmp_inv_acq = 0; 2191 2192 ASSERT((ipha == NULL && ip6h != NULL) || 2193 (ip6h == NULL && ipha != NULL)); 2194 2195 if (ipha != NULL) 2196 policy_present = ipss->ipsec_inbound_v4_policy_present; 2197 else 2198 policy_present = ipss->ipsec_inbound_v6_policy_present; 2199 2200 if (!policy_present && connp == NULL) { 2201 /* 2202 * No global policy and no per-socket policy; 2203 * just pass it back (but we shouldn't get here in that case) 2204 */ 2205 return (first_mp); 2206 } 2207 2208 if (ipsec_mp != NULL) { 2209 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 2210 ii = (ipsec_in_t *)(ipsec_mp->b_rptr); 2211 ASSERT(ii->ipsec_in_type == IPSEC_IN); 2212 } 2213 2214 /* 2215 * If we have cached policy, use it. 2216 * Otherwise consult system policy. 2217 */ 2218 if ((connp != NULL) && (connp->conn_latch != NULL)) { 2219 p = connp->conn_latch->ipl_in_policy; 2220 if (p != NULL) { 2221 IPPOL_REFHOLD(p); 2222 } 2223 /* 2224 * Fudge sel for UNIQUE_ID setting below. 2225 */ 2226 pkt_unique = conn_to_unique(connp, data_mp, ipha, ip6h); 2227 } else { 2228 /* Initialize the ports in the selector */ 2229 if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, 2230 SEL_NONE) == SELRET_NOMEM) { 2231 /* 2232 * Technically not a policy mismatch, but it is 2233 * an internal failure. 2234 */ 2235 ipsec_log_policy_failure(IPSEC_POLICY_MISMATCH, 2236 "ipsec_init_inbound_sel", ipha, ip6h, B_FALSE, ns); 2237 counter = DROPPER(ipss, ipds_spd_nomem); 2238 goto fail; 2239 } 2240 2241 /* 2242 * Find the policy which best applies. 2243 * 2244 * If we find global policy, we should look at both 2245 * local policy and global policy and see which is 2246 * stronger and match accordingly. 2247 * 2248 * If we don't find a global policy, check with 2249 * local policy alone. 2250 */ 2251 2252 p = ipsec_find_policy(IPSEC_TYPE_INBOUND, connp, NULL, &sel, 2253 ns); 2254 pkt_unique = SA_UNIQUE_ID(sel.ips_remote_port, 2255 sel.ips_local_port, sel.ips_protocol, 0); 2256 } 2257 2258 if (p == NULL) { 2259 if (ipsec_mp == NULL) { 2260 /* 2261 * We have no policy; default to succeeding. 2262 * XXX paranoid system design doesn't do this. 2263 */ 2264 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2265 return (first_mp); 2266 } else { 2267 counter = DROPPER(ipss, ipds_spd_got_secure); 2268 ipsec_log_policy_failure(IPSEC_POLICY_NOT_NEEDED, 2269 "ipsec_check_global_policy", ipha, ip6h, B_TRUE, 2270 ns); 2271 goto fail; 2272 } 2273 } 2274 if ((ii != NULL) && (ii->ipsec_in_secure)) { 2275 return (ipsec_check_ipsecin_policy(ipsec_mp, p, ipha, ip6h, 2276 pkt_unique, ns)); 2277 } 2278 if (p->ipsp_act->ipa_allow_clear) { 2279 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2280 IPPOL_REFRELE(p, ns); 2281 return (first_mp); 2282 } 2283 IPPOL_REFRELE(p, ns); 2284 /* 2285 * If we reach here, we will drop the packet because it failed the 2286 * global policy check because the packet was cleartext, and it 2287 * should not have been. 2288 */ 2289 ipsec_log_policy_failure(IPSEC_POLICY_MISMATCH, 2290 "ipsec_check_global_policy", ipha, ip6h, B_FALSE, ns); 2291 counter = DROPPER(ipss, ipds_spd_got_clear); 2292 2293 fail: 2294 ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, 2295 &ipss->ipsec_spd_dropper); 2296 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2297 return (NULL); 2298 } 2299 2300 /* 2301 * We check whether an inbound datagram is a valid one 2302 * to accept in clear. If it is secure, it is the job 2303 * of IPSEC to log information appropriately if it 2304 * suspects that it may not be the real one. 2305 * 2306 * It is called only while fanning out to the ULP 2307 * where ULP accepts only secure data and the incoming 2308 * is clear. Usually we never accept clear datagrams in 2309 * such cases. ICMP is the only exception. 2310 * 2311 * NOTE : We don't call this function if the client (ULP) 2312 * is willing to accept things in clear. 2313 */ 2314 boolean_t 2315 ipsec_inbound_accept_clear(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h) 2316 { 2317 ushort_t iph_hdr_length; 2318 icmph_t *icmph; 2319 icmp6_t *icmp6; 2320 uint8_t *nexthdrp; 2321 2322 ASSERT((ipha != NULL && ip6h == NULL) || 2323 (ipha == NULL && ip6h != NULL)); 2324 2325 if (ip6h != NULL) { 2326 iph_hdr_length = ip_hdr_length_v6(mp, ip6h); 2327 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, 2328 &nexthdrp)) { 2329 return (B_FALSE); 2330 } 2331 if (*nexthdrp != IPPROTO_ICMPV6) 2332 return (B_FALSE); 2333 icmp6 = (icmp6_t *)(&mp->b_rptr[iph_hdr_length]); 2334 /* Match IPv6 ICMP policy as closely as IPv4 as possible. */ 2335 switch (icmp6->icmp6_type) { 2336 case ICMP6_PARAM_PROB: 2337 /* Corresponds to port/proto unreach in IPv4. */ 2338 case ICMP6_ECHO_REQUEST: 2339 /* Just like IPv4. */ 2340 return (B_FALSE); 2341 2342 case MLD_LISTENER_QUERY: 2343 case MLD_LISTENER_REPORT: 2344 case MLD_LISTENER_REDUCTION: 2345 /* 2346 * XXX Seperate NDD in IPv4 what about here? 2347 * Plus, mcast is important to ND. 2348 */ 2349 case ICMP6_DST_UNREACH: 2350 /* Corresponds to HOST/NET unreachable in IPv4. */ 2351 case ICMP6_PACKET_TOO_BIG: 2352 case ICMP6_ECHO_REPLY: 2353 /* These are trusted in IPv4. */ 2354 case ND_ROUTER_SOLICIT: 2355 case ND_ROUTER_ADVERT: 2356 case ND_NEIGHBOR_SOLICIT: 2357 case ND_NEIGHBOR_ADVERT: 2358 case ND_REDIRECT: 2359 /* Trust ND messages for now. */ 2360 case ICMP6_TIME_EXCEEDED: 2361 default: 2362 return (B_TRUE); 2363 } 2364 } else { 2365 /* 2366 * If it is not ICMP, fail this request. 2367 */ 2368 if (ipha->ipha_protocol != IPPROTO_ICMP) { 2369 #ifdef FRAGCACHE_DEBUG 2370 cmn_err(CE_WARN, "Dropping - ipha_proto = %d\n", 2371 ipha->ipha_protocol); 2372 #endif 2373 return (B_FALSE); 2374 } 2375 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2376 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 2377 /* 2378 * It is an insecure icmp message. Check to see whether we are 2379 * willing to accept this one. 2380 */ 2381 2382 switch (icmph->icmph_type) { 2383 case ICMP_ECHO_REPLY: 2384 case ICMP_TIME_STAMP_REPLY: 2385 case ICMP_INFO_REPLY: 2386 case ICMP_ROUTER_ADVERTISEMENT: 2387 /* 2388 * We should not encourage clear replies if this 2389 * client expects secure. If somebody is replying 2390 * in clear some mailicious user watching both the 2391 * request and reply, can do chosen-plain-text attacks. 2392 * With global policy we might be just expecting secure 2393 * but sending out clear. We don't know what the right 2394 * thing is. We can't do much here as we can't control 2395 * the sender here. Till we are sure of what to do, 2396 * accept them. 2397 */ 2398 return (B_TRUE); 2399 case ICMP_ECHO_REQUEST: 2400 case ICMP_TIME_STAMP_REQUEST: 2401 case ICMP_INFO_REQUEST: 2402 case ICMP_ADDRESS_MASK_REQUEST: 2403 case ICMP_ROUTER_SOLICITATION: 2404 case ICMP_ADDRESS_MASK_REPLY: 2405 /* 2406 * Don't accept this as somebody could be sending 2407 * us plain text to get encrypted data. If we reply, 2408 * it will lead to chosen plain text attack. 2409 */ 2410 return (B_FALSE); 2411 case ICMP_DEST_UNREACHABLE: 2412 switch (icmph->icmph_code) { 2413 case ICMP_FRAGMENTATION_NEEDED: 2414 /* 2415 * Be in sync with icmp_inbound, where we have 2416 * already set ire_max_frag. 2417 */ 2418 #ifdef FRAGCACHE_DEBUG 2419 cmn_err(CE_WARN, "ICMP frag needed\n"); 2420 #endif 2421 return (B_TRUE); 2422 case ICMP_HOST_UNREACHABLE: 2423 case ICMP_NET_UNREACHABLE: 2424 /* 2425 * By accepting, we could reset a connection. 2426 * How do we solve the problem of some 2427 * intermediate router sending in-secure ICMP 2428 * messages ? 2429 */ 2430 return (B_TRUE); 2431 case ICMP_PORT_UNREACHABLE: 2432 case ICMP_PROTOCOL_UNREACHABLE: 2433 default : 2434 return (B_FALSE); 2435 } 2436 case ICMP_SOURCE_QUENCH: 2437 /* 2438 * If this is an attack, TCP will slow start 2439 * because of this. Is it very harmful ? 2440 */ 2441 return (B_TRUE); 2442 case ICMP_PARAM_PROBLEM: 2443 return (B_FALSE); 2444 case ICMP_TIME_EXCEEDED: 2445 return (B_TRUE); 2446 case ICMP_REDIRECT: 2447 return (B_FALSE); 2448 default : 2449 return (B_FALSE); 2450 } 2451 } 2452 } 2453 2454 void 2455 ipsec_latch_ids(ipsec_latch_t *ipl, ipsid_t *local, ipsid_t *remote) 2456 { 2457 mutex_enter(&ipl->ipl_lock); 2458 2459 if (ipl->ipl_ids_latched) { 2460 /* I lost, someone else got here before me */ 2461 mutex_exit(&ipl->ipl_lock); 2462 return; 2463 } 2464 2465 if (local != NULL) 2466 IPSID_REFHOLD(local); 2467 if (remote != NULL) 2468 IPSID_REFHOLD(remote); 2469 2470 ipl->ipl_local_cid = local; 2471 ipl->ipl_remote_cid = remote; 2472 ipl->ipl_ids_latched = B_TRUE; 2473 mutex_exit(&ipl->ipl_lock); 2474 } 2475 2476 void 2477 ipsec_latch_inbound(ipsec_latch_t *ipl, ipsec_in_t *ii) 2478 { 2479 ipsa_t *sa; 2480 2481 if (!ipl->ipl_ids_latched) { 2482 ipsid_t *local = NULL; 2483 ipsid_t *remote = NULL; 2484 2485 if (!ii->ipsec_in_loopback) { 2486 if (ii->ipsec_in_esp_sa != NULL) 2487 sa = ii->ipsec_in_esp_sa; 2488 else 2489 sa = ii->ipsec_in_ah_sa; 2490 ASSERT(sa != NULL); 2491 local = sa->ipsa_dst_cid; 2492 remote = sa->ipsa_src_cid; 2493 } 2494 ipsec_latch_ids(ipl, local, remote); 2495 } 2496 ipl->ipl_in_action = ii->ipsec_in_action; 2497 IPACT_REFHOLD(ipl->ipl_in_action); 2498 } 2499 2500 /* 2501 * Check whether the policy constraints are met either for an 2502 * inbound datagram; called from IP in numerous places. 2503 * 2504 * Note that this is not a chokepoint for inbound policy checks; 2505 * see also ipsec_check_ipsecin_latch() and ipsec_check_global_policy() 2506 */ 2507 mblk_t * 2508 ipsec_check_inbound_policy(mblk_t *first_mp, conn_t *connp, 2509 ipha_t *ipha, ip6_t *ip6h, boolean_t mctl_present) 2510 { 2511 ipsec_in_t *ii; 2512 boolean_t ret; 2513 mblk_t *mp = mctl_present ? first_mp->b_cont : first_mp; 2514 mblk_t *ipsec_mp = mctl_present ? first_mp : NULL; 2515 ipsec_latch_t *ipl; 2516 uint64_t unique_id; 2517 ipsec_stack_t *ipss; 2518 ip_stack_t *ipst; 2519 netstack_t *ns; 2520 ipsec_policy_head_t *policy_head; 2521 2522 ASSERT(connp != NULL); 2523 ns = connp->conn_netstack; 2524 ipss = ns->netstack_ipsec; 2525 ipst = ns->netstack_ip; 2526 2527 if (ipsec_mp == NULL) { 2528 clear: 2529 /* 2530 * This is the case where the incoming datagram is 2531 * cleartext and we need to see whether this client 2532 * would like to receive such untrustworthy things from 2533 * the wire. 2534 */ 2535 ASSERT(mp != NULL); 2536 2537 mutex_enter(&connp->conn_lock); 2538 if (connp->conn_state_flags & CONN_CONDEMNED) { 2539 mutex_exit(&connp->conn_lock); 2540 ip_drop_packet(first_mp, B_TRUE, NULL, 2541 NULL, DROPPER(ipss, ipds_spd_got_clear), 2542 &ipss->ipsec_spd_dropper); 2543 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2544 return (NULL); 2545 } 2546 if ((ipl = connp->conn_latch) != NULL) { 2547 /* Hold a reference in case the conn is closing */ 2548 IPLATCH_REFHOLD(ipl); 2549 mutex_exit(&connp->conn_lock); 2550 /* 2551 * Policy is cached in the conn. 2552 */ 2553 if ((ipl->ipl_in_policy != NULL) && 2554 (!ipl->ipl_in_policy->ipsp_act->ipa_allow_clear)) { 2555 ret = ipsec_inbound_accept_clear(mp, 2556 ipha, ip6h); 2557 if (ret) { 2558 BUMP_MIB(&ipst->ips_ip_mib, 2559 ipsecInSucceeded); 2560 IPLATCH_REFRELE(ipl, ns); 2561 return (first_mp); 2562 } else { 2563 ipsec_log_policy_failure( 2564 IPSEC_POLICY_MISMATCH, 2565 "ipsec_check_inbound_policy", ipha, 2566 ip6h, B_FALSE, ns); 2567 ip_drop_packet(first_mp, B_TRUE, NULL, 2568 NULL, 2569 DROPPER(ipss, ipds_spd_got_clear), 2570 &ipss->ipsec_spd_dropper); 2571 BUMP_MIB(&ipst->ips_ip_mib, 2572 ipsecInFailed); 2573 IPLATCH_REFRELE(ipl, ns); 2574 return (NULL); 2575 } 2576 } else { 2577 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2578 IPLATCH_REFRELE(ipl, ns); 2579 return (first_mp); 2580 } 2581 } else { 2582 uchar_t db_type; 2583 policy_head = connp->conn_policy; 2584 2585 /* Hold a reference in case the conn is closing */ 2586 if (policy_head != NULL) 2587 IPPH_REFHOLD(policy_head); 2588 mutex_exit(&connp->conn_lock); 2589 /* 2590 * As this is a non-hardbound connection we need 2591 * to look at both per-socket policy and global 2592 * policy. As this is cleartext, mark the mp as 2593 * M_DATA in case if it is an ICMP error being 2594 * reported before calling ipsec_check_global_policy 2595 * so that it does not mistake it for IPSEC_IN. 2596 */ 2597 db_type = mp->b_datap->db_type; 2598 mp->b_datap->db_type = M_DATA; 2599 first_mp = ipsec_check_global_policy(first_mp, connp, 2600 ipha, ip6h, mctl_present, ns); 2601 if (policy_head != NULL) 2602 IPPH_REFRELE(policy_head, ns); 2603 if (first_mp != NULL) 2604 mp->b_datap->db_type = db_type; 2605 return (first_mp); 2606 } 2607 } 2608 /* 2609 * If it is inbound check whether the attached message 2610 * is secure or not. We have a special case for ICMP, 2611 * where we have a IPSEC_IN message and the attached 2612 * message is not secure. See icmp_inbound_error_fanout 2613 * for details. 2614 */ 2615 ASSERT(ipsec_mp != NULL); 2616 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 2617 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 2618 2619 if (!ii->ipsec_in_secure) 2620 goto clear; 2621 2622 /* 2623 * mp->b_cont could be either a M_CTL message 2624 * for icmp errors being sent up or a M_DATA message. 2625 */ 2626 ASSERT(mp->b_datap->db_type == M_CTL || mp->b_datap->db_type == M_DATA); 2627 2628 ASSERT(ii->ipsec_in_type == IPSEC_IN); 2629 2630 mutex_enter(&connp->conn_lock); 2631 /* Connection is closing */ 2632 if (connp->conn_state_flags & CONN_CONDEMNED) { 2633 mutex_exit(&connp->conn_lock); 2634 ip_drop_packet(first_mp, B_TRUE, NULL, 2635 NULL, DROPPER(ipss, ipds_spd_got_clear), 2636 &ipss->ipsec_spd_dropper); 2637 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2638 return (NULL); 2639 } 2640 2641 /* 2642 * Once a connection is latched it remains so for life, the conn_latch 2643 * pointer on the conn has not changed, simply initializing ipl here 2644 * as the earlier initialization was done only in the cleartext case. 2645 */ 2646 if ((ipl = connp->conn_latch) == NULL) { 2647 mblk_t *retmp; 2648 policy_head = connp->conn_policy; 2649 2650 /* Hold a reference in case the conn is closing */ 2651 if (policy_head != NULL) 2652 IPPH_REFHOLD(policy_head); 2653 mutex_exit(&connp->conn_lock); 2654 /* 2655 * We don't have policies cached in the conn 2656 * for this stream. So, look at the global 2657 * policy. It will check against conn or global 2658 * depending on whichever is stronger. 2659 */ 2660 retmp = ipsec_check_global_policy(first_mp, connp, 2661 ipha, ip6h, mctl_present, ns); 2662 if (policy_head != NULL) 2663 IPPH_REFRELE(policy_head, ns); 2664 return (retmp); 2665 } 2666 2667 IPLATCH_REFHOLD(ipl); 2668 mutex_exit(&connp->conn_lock); 2669 2670 if (ipl->ipl_in_action != NULL) { 2671 /* Policy is cached & latched; fast(er) path */ 2672 const char *reason; 2673 kstat_named_t *counter; 2674 2675 if (ipsec_check_ipsecin_latch(ii, mp, ipl, 2676 ipha, ip6h, &reason, &counter, connp)) { 2677 BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); 2678 IPLATCH_REFRELE(ipl, ns); 2679 return (first_mp); 2680 } 2681 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, 2682 SL_ERROR|SL_WARN|SL_CONSOLE, 2683 "ipsec inbound policy mismatch: %s, packet dropped\n", 2684 reason); 2685 ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, 2686 &ipss->ipsec_spd_dropper); 2687 BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); 2688 IPLATCH_REFRELE(ipl, ns); 2689 return (NULL); 2690 } else if (ipl->ipl_in_policy == NULL) { 2691 ipsec_weird_null_inbound_policy++; 2692 IPLATCH_REFRELE(ipl, ns); 2693 return (first_mp); 2694 } 2695 2696 unique_id = conn_to_unique(connp, mp, ipha, ip6h); 2697 IPPOL_REFHOLD(ipl->ipl_in_policy); 2698 first_mp = ipsec_check_ipsecin_policy(first_mp, ipl->ipl_in_policy, 2699 ipha, ip6h, unique_id, ns); 2700 /* 2701 * NOTE: ipsecIn{Failed,Succeeeded} bumped by 2702 * ipsec_check_ipsecin_policy(). 2703 */ 2704 if (first_mp != NULL) 2705 ipsec_latch_inbound(ipl, ii); 2706 IPLATCH_REFRELE(ipl, ns); 2707 return (first_mp); 2708 } 2709 2710 /* 2711 * Returns: 2712 * 2713 * SELRET_NOMEM --> msgpullup() needed to gather things failed. 2714 * SELRET_BADPKT --> If we're being called after tunnel-mode fragment 2715 * gathering, the initial fragment is too short for 2716 * useful data. Only returned if SEL_TUNNEL_FIRSTFRAG is 2717 * set. 2718 * SELRET_SUCCESS --> "sel" now has initialized IPsec selector data. 2719 * SELRET_TUNFRAG --> This is a fragment in a tunnel-mode packet. Caller 2720 * should put this packet in a fragment-gathering queue. 2721 * Only returned if SEL_TUNNEL_MODE and SEL_PORT_POLICY 2722 * is set. 2723 */ 2724 static selret_t 2725 ipsec_init_inbound_sel(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha, 2726 ip6_t *ip6h, uint8_t sel_flags) 2727 { 2728 uint16_t *ports; 2729 ushort_t hdr_len; 2730 int outer_hdr_len = 0; /* For ICMP tunnel-mode cases... */ 2731 mblk_t *spare_mp = NULL; 2732 uint8_t *nexthdrp; 2733 uint8_t nexthdr; 2734 uint8_t *typecode; 2735 uint8_t check_proto; 2736 ip6_pkt_t ipp; 2737 boolean_t port_policy_present = (sel_flags & SEL_PORT_POLICY); 2738 boolean_t is_icmp = (sel_flags & SEL_IS_ICMP); 2739 boolean_t tunnel_mode = (sel_flags & SEL_TUNNEL_MODE); 2740 boolean_t post_frag = (sel_flags & SEL_POST_FRAG); 2741 2742 ASSERT((ipha == NULL && ip6h != NULL) || 2743 (ipha != NULL && ip6h == NULL)); 2744 2745 if (ip6h != NULL) { 2746 if (is_icmp || tunnel_mode) 2747 outer_hdr_len = ((uint8_t *)ip6h) - mp->b_rptr; 2748 2749 check_proto = IPPROTO_ICMPV6; 2750 sel->ips_isv4 = B_FALSE; 2751 sel->ips_local_addr_v6 = ip6h->ip6_dst; 2752 sel->ips_remote_addr_v6 = ip6h->ip6_src; 2753 2754 bzero(&ipp, sizeof (ipp)); 2755 (void) ip_find_hdr_v6(mp, ip6h, &ipp, NULL); 2756 2757 nexthdr = ip6h->ip6_nxt; 2758 switch (nexthdr) { 2759 case IPPROTO_HOPOPTS: 2760 case IPPROTO_ROUTING: 2761 case IPPROTO_DSTOPTS: 2762 case IPPROTO_FRAGMENT: 2763 /* 2764 * Use ip_hdr_length_nexthdr_v6(). And have a spare 2765 * mblk that's contiguous to feed it 2766 */ 2767 if ((spare_mp = msgpullup(mp, -1)) == NULL) 2768 return (SELRET_NOMEM); 2769 if (!ip_hdr_length_nexthdr_v6(spare_mp, 2770 (ip6_t *)(spare_mp->b_rptr + outer_hdr_len), 2771 &hdr_len, &nexthdrp)) { 2772 /* Malformed packet - caller frees. */ 2773 ipsec_freemsg_chain(spare_mp); 2774 return (SELRET_BADPKT); 2775 } 2776 nexthdr = *nexthdrp; 2777 /* We can just extract based on hdr_len now. */ 2778 break; 2779 default: 2780 hdr_len = IPV6_HDR_LEN; 2781 break; 2782 } 2783 2784 if (port_policy_present && IS_V6_FRAGMENT(ipp) && !is_icmp) { 2785 /* IPv6 Fragment */ 2786 ipsec_freemsg_chain(spare_mp); 2787 return (SELRET_TUNFRAG); 2788 } 2789 } else { 2790 if (is_icmp || tunnel_mode) 2791 outer_hdr_len = ((uint8_t *)ipha) - mp->b_rptr; 2792 check_proto = IPPROTO_ICMP; 2793 sel->ips_isv4 = B_TRUE; 2794 sel->ips_local_addr_v4 = ipha->ipha_dst; 2795 sel->ips_remote_addr_v4 = ipha->ipha_src; 2796 nexthdr = ipha->ipha_protocol; 2797 hdr_len = IPH_HDR_LENGTH(ipha); 2798 2799 if (port_policy_present && 2800 IS_V4_FRAGMENT(ipha->ipha_fragment_offset_and_flags) && 2801 !is_icmp) { 2802 /* IPv4 Fragment */ 2803 ipsec_freemsg_chain(spare_mp); 2804 return (SELRET_TUNFRAG); 2805 } 2806 2807 } 2808 sel->ips_protocol = nexthdr; 2809 2810 if ((nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP && 2811 nexthdr != IPPROTO_SCTP && nexthdr != check_proto) || 2812 (!port_policy_present && !post_frag && tunnel_mode)) { 2813 sel->ips_remote_port = sel->ips_local_port = 0; 2814 ipsec_freemsg_chain(spare_mp); 2815 return (SELRET_SUCCESS); 2816 } 2817 2818 if (&mp->b_rptr[hdr_len] + 4 > mp->b_wptr) { 2819 /* If we didn't pullup a copy already, do so now. */ 2820 /* 2821 * XXX performance, will upper-layers frequently split TCP/UDP 2822 * apart from IP or options? If so, perhaps we should revisit 2823 * the spare_mp strategy. 2824 */ 2825 ipsec_hdr_pullup_needed++; 2826 if (spare_mp == NULL && 2827 (spare_mp = msgpullup(mp, -1)) == NULL) { 2828 return (SELRET_NOMEM); 2829 } 2830 ports = (uint16_t *)&spare_mp->b_rptr[hdr_len + outer_hdr_len]; 2831 } else { 2832 ports = (uint16_t *)&mp->b_rptr[hdr_len + outer_hdr_len]; 2833 } 2834 2835 if (nexthdr == check_proto) { 2836 typecode = (uint8_t *)ports; 2837 sel->ips_icmp_type = *typecode++; 2838 sel->ips_icmp_code = *typecode; 2839 sel->ips_remote_port = sel->ips_local_port = 0; 2840 } else { 2841 sel->ips_remote_port = *ports++; 2842 sel->ips_local_port = *ports; 2843 } 2844 ipsec_freemsg_chain(spare_mp); 2845 return (SELRET_SUCCESS); 2846 } 2847 2848 static boolean_t 2849 ipsec_init_outbound_ports(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha, 2850 ip6_t *ip6h, int outer_hdr_len, ipsec_stack_t *ipss) 2851 { 2852 /* 2853 * XXX cut&paste shared with ipsec_init_inbound_sel 2854 */ 2855 uint16_t *ports; 2856 ushort_t hdr_len; 2857 mblk_t *spare_mp = NULL; 2858 uint8_t *nexthdrp; 2859 uint8_t nexthdr; 2860 uint8_t *typecode; 2861 uint8_t check_proto; 2862 2863 ASSERT((ipha == NULL && ip6h != NULL) || 2864 (ipha != NULL && ip6h == NULL)); 2865 2866 if (ip6h != NULL) { 2867 check_proto = IPPROTO_ICMPV6; 2868 nexthdr = ip6h->ip6_nxt; 2869 switch (nexthdr) { 2870 case IPPROTO_HOPOPTS: 2871 case IPPROTO_ROUTING: 2872 case IPPROTO_DSTOPTS: 2873 case IPPROTO_FRAGMENT: 2874 /* 2875 * Use ip_hdr_length_nexthdr_v6(). And have a spare 2876 * mblk that's contiguous to feed it 2877 */ 2878 spare_mp = msgpullup(mp, -1); 2879 if (spare_mp == NULL || 2880 !ip_hdr_length_nexthdr_v6(spare_mp, 2881 (ip6_t *)(spare_mp->b_rptr + outer_hdr_len), 2882 &hdr_len, &nexthdrp)) { 2883 /* Always works, even if NULL. */ 2884 ipsec_freemsg_chain(spare_mp); 2885 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 2886 DROPPER(ipss, ipds_spd_nomem), 2887 &ipss->ipsec_spd_dropper); 2888 return (B_FALSE); 2889 } else { 2890 nexthdr = *nexthdrp; 2891 /* We can just extract based on hdr_len now. */ 2892 } 2893 break; 2894 default: 2895 hdr_len = IPV6_HDR_LEN; 2896 break; 2897 } 2898 } else { 2899 check_proto = IPPROTO_ICMP; 2900 hdr_len = IPH_HDR_LENGTH(ipha); 2901 nexthdr = ipha->ipha_protocol; 2902 } 2903 2904 sel->ips_protocol = nexthdr; 2905 if (nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP && 2906 nexthdr != IPPROTO_SCTP && nexthdr != check_proto) { 2907 sel->ips_local_port = sel->ips_remote_port = 0; 2908 ipsec_freemsg_chain(spare_mp); /* Always works, even if NULL */ 2909 return (B_TRUE); 2910 } 2911 2912 if (&mp->b_rptr[hdr_len] + 4 + outer_hdr_len > mp->b_wptr) { 2913 /* If we didn't pullup a copy already, do so now. */ 2914 /* 2915 * XXX performance, will upper-layers frequently split TCP/UDP 2916 * apart from IP or options? If so, perhaps we should revisit 2917 * the spare_mp strategy. 2918 * 2919 * XXX should this be msgpullup(mp, hdr_len+4) ??? 2920 */ 2921 if (spare_mp == NULL && 2922 (spare_mp = msgpullup(mp, -1)) == NULL) { 2923 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 2924 DROPPER(ipss, ipds_spd_nomem), 2925 &ipss->ipsec_spd_dropper); 2926 return (B_FALSE); 2927 } 2928 ports = (uint16_t *)&spare_mp->b_rptr[hdr_len + outer_hdr_len]; 2929 } else { 2930 ports = (uint16_t *)&mp->b_rptr[hdr_len + outer_hdr_len]; 2931 } 2932 2933 if (nexthdr == check_proto) { 2934 typecode = (uint8_t *)ports; 2935 sel->ips_icmp_type = *typecode++; 2936 sel->ips_icmp_code = *typecode; 2937 sel->ips_remote_port = sel->ips_local_port = 0; 2938 } else { 2939 sel->ips_local_port = *ports++; 2940 sel->ips_remote_port = *ports; 2941 } 2942 ipsec_freemsg_chain(spare_mp); /* Always works, even if NULL */ 2943 return (B_TRUE); 2944 } 2945 2946 /* 2947 * Create an ipsec_action_t based on the way an inbound packet was protected. 2948 * Used to reflect traffic back to a sender. 2949 * 2950 * We don't bother interning the action into the hash table. 2951 */ 2952 ipsec_action_t * 2953 ipsec_in_to_out_action(ipsec_in_t *ii) 2954 { 2955 ipsa_t *ah_assoc, *esp_assoc; 2956 uint_t auth_alg = 0, encr_alg = 0, espa_alg = 0; 2957 ipsec_action_t *ap; 2958 boolean_t unique; 2959 2960 ap = kmem_cache_alloc(ipsec_action_cache, KM_NOSLEEP); 2961 2962 if (ap == NULL) 2963 return (NULL); 2964 2965 bzero(ap, sizeof (*ap)); 2966 HASH_NULL(ap, ipa_hash); 2967 ap->ipa_next = NULL; 2968 ap->ipa_refs = 1; 2969 2970 /* 2971 * Get the algorithms that were used for this packet. 2972 */ 2973 ap->ipa_act.ipa_type = IPSEC_ACT_APPLY; 2974 ap->ipa_act.ipa_log = 0; 2975 ah_assoc = ii->ipsec_in_ah_sa; 2976 ap->ipa_act.ipa_apply.ipp_use_ah = (ah_assoc != NULL); 2977 2978 esp_assoc = ii->ipsec_in_esp_sa; 2979 ap->ipa_act.ipa_apply.ipp_use_esp = (esp_assoc != NULL); 2980 2981 if (esp_assoc != NULL) { 2982 encr_alg = esp_assoc->ipsa_encr_alg; 2983 espa_alg = esp_assoc->ipsa_auth_alg; 2984 ap->ipa_act.ipa_apply.ipp_use_espa = (espa_alg != 0); 2985 } 2986 if (ah_assoc != NULL) 2987 auth_alg = ah_assoc->ipsa_auth_alg; 2988 2989 ap->ipa_act.ipa_apply.ipp_encr_alg = (uint8_t)encr_alg; 2990 ap->ipa_act.ipa_apply.ipp_auth_alg = (uint8_t)auth_alg; 2991 ap->ipa_act.ipa_apply.ipp_esp_auth_alg = (uint8_t)espa_alg; 2992 ap->ipa_act.ipa_apply.ipp_use_se = ii->ipsec_in_decaps; 2993 unique = B_FALSE; 2994 2995 if (esp_assoc != NULL) { 2996 ap->ipa_act.ipa_apply.ipp_espa_minbits = 2997 esp_assoc->ipsa_authkeybits; 2998 ap->ipa_act.ipa_apply.ipp_espa_maxbits = 2999 esp_assoc->ipsa_authkeybits; 3000 ap->ipa_act.ipa_apply.ipp_espe_minbits = 3001 esp_assoc->ipsa_encrkeybits; 3002 ap->ipa_act.ipa_apply.ipp_espe_maxbits = 3003 esp_assoc->ipsa_encrkeybits; 3004 ap->ipa_act.ipa_apply.ipp_km_proto = esp_assoc->ipsa_kmp; 3005 ap->ipa_act.ipa_apply.ipp_km_cookie = esp_assoc->ipsa_kmc; 3006 if (esp_assoc->ipsa_flags & IPSA_F_UNIQUE) 3007 unique = B_TRUE; 3008 } 3009 if (ah_assoc != NULL) { 3010 ap->ipa_act.ipa_apply.ipp_ah_minbits = 3011 ah_assoc->ipsa_authkeybits; 3012 ap->ipa_act.ipa_apply.ipp_ah_maxbits = 3013 ah_assoc->ipsa_authkeybits; 3014 ap->ipa_act.ipa_apply.ipp_km_proto = ah_assoc->ipsa_kmp; 3015 ap->ipa_act.ipa_apply.ipp_km_cookie = ah_assoc->ipsa_kmc; 3016 if (ah_assoc->ipsa_flags & IPSA_F_UNIQUE) 3017 unique = B_TRUE; 3018 } 3019 ap->ipa_act.ipa_apply.ipp_use_unique = unique; 3020 ap->ipa_want_unique = unique; 3021 ap->ipa_allow_clear = B_FALSE; 3022 ap->ipa_want_se = ii->ipsec_in_decaps; 3023 ap->ipa_want_ah = (ah_assoc != NULL); 3024 ap->ipa_want_esp = (esp_assoc != NULL); 3025 3026 ap->ipa_ovhd = ipsec_act_ovhd(&ap->ipa_act); 3027 3028 ap->ipa_act.ipa_apply.ipp_replay_depth = 0; /* don't care */ 3029 3030 return (ap); 3031 } 3032 3033 3034 /* 3035 * Compute the worst-case amount of extra space required by an action. 3036 * Note that, because of the ESP considerations listed below, this is 3037 * actually not the same as the best-case reduction in the MTU; in the 3038 * future, we should pass additional information to this function to 3039 * allow the actual MTU impact to be computed. 3040 * 3041 * AH: Revisit this if we implement algorithms with 3042 * a verifier size of more than 12 bytes. 3043 * 3044 * ESP: A more exact but more messy computation would take into 3045 * account the interaction between the cipher block size and the 3046 * effective MTU, yielding the inner payload size which reflects a 3047 * packet with *minimum* ESP padding.. 3048 */ 3049 int32_t 3050 ipsec_act_ovhd(const ipsec_act_t *act) 3051 { 3052 int32_t overhead = 0; 3053 3054 if (act->ipa_type == IPSEC_ACT_APPLY) { 3055 const ipsec_prot_t *ipp = &act->ipa_apply; 3056 3057 if (ipp->ipp_use_ah) 3058 overhead += IPSEC_MAX_AH_HDR_SIZE; 3059 if (ipp->ipp_use_esp) { 3060 overhead += IPSEC_MAX_ESP_HDR_SIZE; 3061 overhead += sizeof (struct udphdr); 3062 } 3063 if (ipp->ipp_use_se) 3064 overhead += IP_SIMPLE_HDR_LENGTH; 3065 } 3066 return (overhead); 3067 } 3068 3069 /* 3070 * This hash function is used only when creating policies and thus is not 3071 * performance-critical for packet flows. 3072 * 3073 * Future work: canonicalize the structures hashed with this (i.e., 3074 * zeroize padding) so the hash works correctly. 3075 */ 3076 /* ARGSUSED */ 3077 static uint32_t 3078 policy_hash(int size, const void *start, const void *end) 3079 { 3080 return (0); 3081 } 3082 3083 3084 /* 3085 * Hash function macros for each address type. 3086 * 3087 * The IPV6 hash function assumes that the low order 32-bits of the 3088 * address (typically containing the low order 24 bits of the mac 3089 * address) are reasonably well-distributed. Revisit this if we run 3090 * into trouble from lots of collisions on ::1 addresses and the like 3091 * (seems unlikely). 3092 */ 3093 #define IPSEC_IPV4_HASH(a, n) ((a) % (n)) 3094 #define IPSEC_IPV6_HASH(a, n) (((a).s6_addr32[3]) % (n)) 3095 3096 /* 3097 * These two hash functions should produce coordinated values 3098 * but have slightly different roles. 3099 */ 3100 static uint32_t 3101 selkey_hash(const ipsec_selkey_t *selkey, netstack_t *ns) 3102 { 3103 uint32_t valid = selkey->ipsl_valid; 3104 ipsec_stack_t *ipss = ns->netstack_ipsec; 3105 3106 if (!(valid & IPSL_REMOTE_ADDR)) 3107 return (IPSEC_SEL_NOHASH); 3108 3109 if (valid & IPSL_IPV4) { 3110 if (selkey->ipsl_remote_pfxlen == 32) { 3111 return (IPSEC_IPV4_HASH(selkey->ipsl_remote.ipsad_v4, 3112 ipss->ipsec_spd_hashsize)); 3113 } 3114 } 3115 if (valid & IPSL_IPV6) { 3116 if (selkey->ipsl_remote_pfxlen == 128) { 3117 return (IPSEC_IPV6_HASH(selkey->ipsl_remote.ipsad_v6, 3118 ipss->ipsec_spd_hashsize)); 3119 } 3120 } 3121 return (IPSEC_SEL_NOHASH); 3122 } 3123 3124 static uint32_t 3125 selector_hash(ipsec_selector_t *sel, ipsec_policy_root_t *root) 3126 { 3127 if (sel->ips_isv4) { 3128 return (IPSEC_IPV4_HASH(sel->ips_remote_addr_v4, 3129 root->ipr_nchains)); 3130 } 3131 return (IPSEC_IPV6_HASH(sel->ips_remote_addr_v6, root->ipr_nchains)); 3132 } 3133 3134 /* 3135 * Intern actions into the action hash table. 3136 */ 3137 ipsec_action_t * 3138 ipsec_act_find(const ipsec_act_t *a, int n, netstack_t *ns) 3139 { 3140 int i; 3141 uint32_t hval; 3142 ipsec_action_t *ap; 3143 ipsec_action_t *prev = NULL; 3144 int32_t overhead, maxovhd = 0; 3145 boolean_t allow_clear = B_FALSE; 3146 boolean_t want_ah = B_FALSE; 3147 boolean_t want_esp = B_FALSE; 3148 boolean_t want_se = B_FALSE; 3149 boolean_t want_unique = B_FALSE; 3150 ipsec_stack_t *ipss = ns->netstack_ipsec; 3151 3152 /* 3153 * TODO: should canonicalize a[] (i.e., zeroize any padding) 3154 * so we can use a non-trivial policy_hash function. 3155 */ 3156 for (i = n-1; i >= 0; i--) { 3157 hval = policy_hash(IPSEC_ACTION_HASH_SIZE, &a[i], &a[n]); 3158 3159 HASH_LOCK(ipss->ipsec_action_hash, hval); 3160 3161 for (HASH_ITERATE(ap, ipa_hash, 3162 ipss->ipsec_action_hash, hval)) { 3163 if (bcmp(&ap->ipa_act, &a[i], sizeof (*a)) != 0) 3164 continue; 3165 if (ap->ipa_next != prev) 3166 continue; 3167 break; 3168 } 3169 if (ap != NULL) { 3170 HASH_UNLOCK(ipss->ipsec_action_hash, hval); 3171 prev = ap; 3172 continue; 3173 } 3174 /* 3175 * need to allocate a new one.. 3176 */ 3177 ap = kmem_cache_alloc(ipsec_action_cache, KM_NOSLEEP); 3178 if (ap == NULL) { 3179 HASH_UNLOCK(ipss->ipsec_action_hash, hval); 3180 if (prev != NULL) 3181 ipsec_action_free(prev); 3182 return (NULL); 3183 } 3184 HASH_INSERT(ap, ipa_hash, ipss->ipsec_action_hash, hval); 3185 3186 ap->ipa_next = prev; 3187 ap->ipa_act = a[i]; 3188 3189 overhead = ipsec_act_ovhd(&a[i]); 3190 if (maxovhd < overhead) 3191 maxovhd = overhead; 3192 3193 if ((a[i].ipa_type == IPSEC_ACT_BYPASS) || 3194 (a[i].ipa_type == IPSEC_ACT_CLEAR)) 3195 allow_clear = B_TRUE; 3196 if (a[i].ipa_type == IPSEC_ACT_APPLY) { 3197 const ipsec_prot_t *ipp = &a[i].ipa_apply; 3198 3199 ASSERT(ipp->ipp_use_ah || ipp->ipp_use_esp); 3200 want_ah |= ipp->ipp_use_ah; 3201 want_esp |= ipp->ipp_use_esp; 3202 want_se |= ipp->ipp_use_se; 3203 want_unique |= ipp->ipp_use_unique; 3204 } 3205 ap->ipa_allow_clear = allow_clear; 3206 ap->ipa_want_ah = want_ah; 3207 ap->ipa_want_esp = want_esp; 3208 ap->ipa_want_se = want_se; 3209 ap->ipa_want_unique = want_unique; 3210 ap->ipa_refs = 1; /* from the hash table */ 3211 ap->ipa_ovhd = maxovhd; 3212 if (prev) 3213 prev->ipa_refs++; 3214 prev = ap; 3215 HASH_UNLOCK(ipss->ipsec_action_hash, hval); 3216 } 3217 3218 ap->ipa_refs++; /* caller's reference */ 3219 3220 return (ap); 3221 } 3222 3223 /* 3224 * Called when refcount goes to 0, indicating that all references to this 3225 * node are gone. 3226 * 3227 * This does not unchain the action from the hash table. 3228 */ 3229 void 3230 ipsec_action_free(ipsec_action_t *ap) 3231 { 3232 for (;;) { 3233 ipsec_action_t *np = ap->ipa_next; 3234 ASSERT(ap->ipa_refs == 0); 3235 ASSERT(ap->ipa_hash.hash_pp == NULL); 3236 kmem_cache_free(ipsec_action_cache, ap); 3237 ap = np; 3238 /* Inlined IPACT_REFRELE -- avoid recursion */ 3239 if (ap == NULL) 3240 break; 3241 membar_exit(); 3242 if (atomic_add_32_nv(&(ap)->ipa_refs, -1) != 0) 3243 break; 3244 /* End inlined IPACT_REFRELE */ 3245 } 3246 } 3247 3248 /* 3249 * Called when the action hash table goes away. 3250 * 3251 * The actions can be queued on an mblk with ipsec_in or 3252 * ipsec_out, hence the actions might still be around. 3253 * But we decrement ipa_refs here since we no longer have 3254 * a reference to the action from the hash table. 3255 */ 3256 static void 3257 ipsec_action_free_table(ipsec_action_t *ap) 3258 { 3259 while (ap != NULL) { 3260 ipsec_action_t *np = ap->ipa_next; 3261 3262 /* FIXME: remove? */ 3263 (void) printf("ipsec_action_free_table(%p) ref %d\n", 3264 (void *)ap, ap->ipa_refs); 3265 ASSERT(ap->ipa_refs > 0); 3266 IPACT_REFRELE(ap); 3267 ap = np; 3268 } 3269 } 3270 3271 /* 3272 * Need to walk all stack instances since the reclaim function 3273 * is global for all instances 3274 */ 3275 /* ARGSUSED */ 3276 static void 3277 ipsec_action_reclaim(void *arg) 3278 { 3279 netstack_handle_t nh; 3280 netstack_t *ns; 3281 3282 netstack_next_init(&nh); 3283 while ((ns = netstack_next(&nh)) != NULL) { 3284 ipsec_action_reclaim_stack(ns); 3285 netstack_rele(ns); 3286 } 3287 netstack_next_fini(&nh); 3288 } 3289 3290 /* 3291 * Periodically sweep action hash table for actions with refcount==1, and 3292 * nuke them. We cannot do this "on demand" (i.e., from IPACT_REFRELE) 3293 * because we can't close the race between another thread finding the action 3294 * in the hash table without holding the bucket lock during IPACT_REFRELE. 3295 * Instead, we run this function sporadically to clean up after ourselves; 3296 * we also set it as the "reclaim" function for the action kmem_cache. 3297 * 3298 * Note that it may take several passes of ipsec_action_gc() to free all 3299 * "stale" actions. 3300 */ 3301 static void 3302 ipsec_action_reclaim_stack(netstack_t *ns) 3303 { 3304 int i; 3305 ipsec_stack_t *ipss = ns->netstack_ipsec; 3306 3307 for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) { 3308 ipsec_action_t *ap, *np; 3309 3310 /* skip the lock if nobody home */ 3311 if (ipss->ipsec_action_hash[i].hash_head == NULL) 3312 continue; 3313 3314 HASH_LOCK(ipss->ipsec_action_hash, i); 3315 for (ap = ipss->ipsec_action_hash[i].hash_head; 3316 ap != NULL; ap = np) { 3317 ASSERT(ap->ipa_refs > 0); 3318 np = ap->ipa_hash.hash_next; 3319 if (ap->ipa_refs > 1) 3320 continue; 3321 HASH_UNCHAIN(ap, ipa_hash, 3322 ipss->ipsec_action_hash, i); 3323 IPACT_REFRELE(ap); 3324 } 3325 HASH_UNLOCK(ipss->ipsec_action_hash, i); 3326 } 3327 } 3328 3329 /* 3330 * Intern a selector set into the selector set hash table. 3331 * This is simpler than the actions case.. 3332 */ 3333 static ipsec_sel_t * 3334 ipsec_find_sel(ipsec_selkey_t *selkey, netstack_t *ns) 3335 { 3336 ipsec_sel_t *sp; 3337 uint32_t hval, bucket; 3338 ipsec_stack_t *ipss = ns->netstack_ipsec; 3339 3340 /* 3341 * Exactly one AF bit should be set in selkey. 3342 */ 3343 ASSERT(!(selkey->ipsl_valid & IPSL_IPV4) ^ 3344 !(selkey->ipsl_valid & IPSL_IPV6)); 3345 3346 hval = selkey_hash(selkey, ns); 3347 /* Set pol_hval to uninitialized until we put it in a polhead. */ 3348 selkey->ipsl_sel_hval = hval; 3349 3350 bucket = (hval == IPSEC_SEL_NOHASH) ? 0 : hval; 3351 3352 ASSERT(!HASH_LOCKED(ipss->ipsec_sel_hash, bucket)); 3353 HASH_LOCK(ipss->ipsec_sel_hash, bucket); 3354 3355 for (HASH_ITERATE(sp, ipsl_hash, ipss->ipsec_sel_hash, bucket)) { 3356 if (bcmp(&sp->ipsl_key, selkey, 3357 offsetof(ipsec_selkey_t, ipsl_pol_hval)) == 0) 3358 break; 3359 } 3360 if (sp != NULL) { 3361 sp->ipsl_refs++; 3362 3363 HASH_UNLOCK(ipss->ipsec_sel_hash, bucket); 3364 return (sp); 3365 } 3366 3367 sp = kmem_cache_alloc(ipsec_sel_cache, KM_NOSLEEP); 3368 if (sp == NULL) { 3369 HASH_UNLOCK(ipss->ipsec_sel_hash, bucket); 3370 return (NULL); 3371 } 3372 3373 HASH_INSERT(sp, ipsl_hash, ipss->ipsec_sel_hash, bucket); 3374 sp->ipsl_refs = 2; /* one for hash table, one for caller */ 3375 sp->ipsl_key = *selkey; 3376 /* Set to uninitalized and have insertion into polhead fix things. */ 3377 if (selkey->ipsl_sel_hval != IPSEC_SEL_NOHASH) 3378 sp->ipsl_key.ipsl_pol_hval = 0; 3379 else 3380 sp->ipsl_key.ipsl_pol_hval = IPSEC_SEL_NOHASH; 3381 3382 HASH_UNLOCK(ipss->ipsec_sel_hash, bucket); 3383 3384 return (sp); 3385 } 3386 3387 static void 3388 ipsec_sel_rel(ipsec_sel_t **spp, netstack_t *ns) 3389 { 3390 ipsec_sel_t *sp = *spp; 3391 int hval = sp->ipsl_key.ipsl_sel_hval; 3392 ipsec_stack_t *ipss = ns->netstack_ipsec; 3393 3394 *spp = NULL; 3395 3396 if (hval == IPSEC_SEL_NOHASH) 3397 hval = 0; 3398 3399 ASSERT(!HASH_LOCKED(ipss->ipsec_sel_hash, hval)); 3400 HASH_LOCK(ipss->ipsec_sel_hash, hval); 3401 if (--sp->ipsl_refs == 1) { 3402 HASH_UNCHAIN(sp, ipsl_hash, ipss->ipsec_sel_hash, hval); 3403 sp->ipsl_refs--; 3404 HASH_UNLOCK(ipss->ipsec_sel_hash, hval); 3405 ASSERT(sp->ipsl_refs == 0); 3406 kmem_cache_free(ipsec_sel_cache, sp); 3407 /* Caller unlocks */ 3408 return; 3409 } 3410 3411 HASH_UNLOCK(ipss->ipsec_sel_hash, hval); 3412 } 3413 3414 /* 3415 * Free a policy rule which we know is no longer being referenced. 3416 */ 3417 void 3418 ipsec_policy_free(ipsec_policy_t *ipp, netstack_t *ns) 3419 { 3420 ASSERT(ipp->ipsp_refs == 0); 3421 ASSERT(ipp->ipsp_sel != NULL); 3422 ASSERT(ipp->ipsp_act != NULL); 3423 3424 ipsec_sel_rel(&ipp->ipsp_sel, ns); 3425 IPACT_REFRELE(ipp->ipsp_act); 3426 kmem_cache_free(ipsec_pol_cache, ipp); 3427 } 3428 3429 /* 3430 * Construction of new policy rules; construct a policy, and add it to 3431 * the appropriate tables. 3432 */ 3433 ipsec_policy_t * 3434 ipsec_policy_create(ipsec_selkey_t *keys, const ipsec_act_t *a, 3435 int nacts, int prio, uint64_t *index_ptr, netstack_t *ns) 3436 { 3437 ipsec_action_t *ap; 3438 ipsec_sel_t *sp; 3439 ipsec_policy_t *ipp; 3440 ipsec_stack_t *ipss = ns->netstack_ipsec; 3441 3442 if (index_ptr == NULL) 3443 index_ptr = &ipss->ipsec_next_policy_index; 3444 3445 ipp = kmem_cache_alloc(ipsec_pol_cache, KM_NOSLEEP); 3446 ap = ipsec_act_find(a, nacts, ns); 3447 sp = ipsec_find_sel(keys, ns); 3448 3449 if ((ap == NULL) || (sp == NULL) || (ipp == NULL)) { 3450 if (ap != NULL) { 3451 IPACT_REFRELE(ap); 3452 } 3453 if (sp != NULL) 3454 ipsec_sel_rel(&sp, ns); 3455 if (ipp != NULL) 3456 kmem_cache_free(ipsec_pol_cache, ipp); 3457 return (NULL); 3458 } 3459 3460 HASH_NULL(ipp, ipsp_hash); 3461 3462 ipp->ipsp_refs = 1; /* caller's reference */ 3463 ipp->ipsp_sel = sp; 3464 ipp->ipsp_act = ap; 3465 ipp->ipsp_prio = prio; /* rule priority */ 3466 ipp->ipsp_index = *index_ptr; 3467 (*index_ptr)++; 3468 3469 return (ipp); 3470 } 3471 3472 static void 3473 ipsec_update_present_flags(ipsec_stack_t *ipss) 3474 { 3475 boolean_t hashpol; 3476 3477 hashpol = (avl_numnodes(&ipss->ipsec_system_policy.iph_rulebyid) > 0); 3478 3479 if (hashpol) { 3480 ipss->ipsec_outbound_v4_policy_present = B_TRUE; 3481 ipss->ipsec_outbound_v6_policy_present = B_TRUE; 3482 ipss->ipsec_inbound_v4_policy_present = B_TRUE; 3483 ipss->ipsec_inbound_v6_policy_present = B_TRUE; 3484 return; 3485 } 3486 3487 ipss->ipsec_outbound_v4_policy_present = (NULL != 3488 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_OUTBOUND]. 3489 ipr_nonhash[IPSEC_AF_V4]); 3490 ipss->ipsec_outbound_v6_policy_present = (NULL != 3491 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_OUTBOUND]. 3492 ipr_nonhash[IPSEC_AF_V6]); 3493 ipss->ipsec_inbound_v4_policy_present = (NULL != 3494 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_INBOUND]. 3495 ipr_nonhash[IPSEC_AF_V4]); 3496 ipss->ipsec_inbound_v6_policy_present = (NULL != 3497 ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_INBOUND]. 3498 ipr_nonhash[IPSEC_AF_V6]); 3499 } 3500 3501 boolean_t 3502 ipsec_policy_delete(ipsec_policy_head_t *php, ipsec_selkey_t *keys, int dir, 3503 netstack_t *ns) 3504 { 3505 ipsec_sel_t *sp; 3506 ipsec_policy_t *ip, *nip, *head; 3507 int af; 3508 ipsec_policy_root_t *pr = &php->iph_root[dir]; 3509 3510 sp = ipsec_find_sel(keys, ns); 3511 3512 if (sp == NULL) 3513 return (B_FALSE); 3514 3515 af = (sp->ipsl_key.ipsl_valid & IPSL_IPV4) ? IPSEC_AF_V4 : IPSEC_AF_V6; 3516 3517 rw_enter(&php->iph_lock, RW_WRITER); 3518 3519 if (sp->ipsl_key.ipsl_pol_hval == IPSEC_SEL_NOHASH) { 3520 head = pr->ipr_nonhash[af]; 3521 } else { 3522 head = pr->ipr_hash[sp->ipsl_key.ipsl_pol_hval].hash_head; 3523 } 3524 3525 for (ip = head; ip != NULL; ip = nip) { 3526 nip = ip->ipsp_hash.hash_next; 3527 if (ip->ipsp_sel != sp) { 3528 continue; 3529 } 3530 3531 IPPOL_UNCHAIN(php, ip, ns); 3532 3533 php->iph_gen++; 3534 ipsec_update_present_flags(ns->netstack_ipsec); 3535 3536 rw_exit(&php->iph_lock); 3537 3538 ipsec_sel_rel(&sp, ns); 3539 3540 return (B_TRUE); 3541 } 3542 3543 rw_exit(&php->iph_lock); 3544 ipsec_sel_rel(&sp, ns); 3545 return (B_FALSE); 3546 } 3547 3548 int 3549 ipsec_policy_delete_index(ipsec_policy_head_t *php, uint64_t policy_index, 3550 netstack_t *ns) 3551 { 3552 boolean_t found = B_FALSE; 3553 ipsec_policy_t ipkey; 3554 ipsec_policy_t *ip; 3555 avl_index_t where; 3556 3557 (void) memset(&ipkey, 0, sizeof (ipkey)); 3558 ipkey.ipsp_index = policy_index; 3559 3560 rw_enter(&php->iph_lock, RW_WRITER); 3561 3562 /* 3563 * We could be cleverer here about the walk. 3564 * but well, (k+1)*log(N) will do for now (k==number of matches, 3565 * N==number of table entries 3566 */ 3567 for (;;) { 3568 ip = (ipsec_policy_t *)avl_find(&php->iph_rulebyid, 3569 (void *)&ipkey, &where); 3570 ASSERT(ip == NULL); 3571 3572 ip = avl_nearest(&php->iph_rulebyid, where, AVL_AFTER); 3573 3574 if (ip == NULL) 3575 break; 3576 3577 if (ip->ipsp_index != policy_index) { 3578 ASSERT(ip->ipsp_index > policy_index); 3579 break; 3580 } 3581 3582 IPPOL_UNCHAIN(php, ip, ns); 3583 found = B_TRUE; 3584 } 3585 3586 if (found) { 3587 php->iph_gen++; 3588 ipsec_update_present_flags(ns->netstack_ipsec); 3589 } 3590 3591 rw_exit(&php->iph_lock); 3592 3593 return (found ? 0 : ENOENT); 3594 } 3595 3596 /* 3597 * Given a constructed ipsec_policy_t policy rule, see if it can be entered 3598 * into the correct policy ruleset. As a side-effect, it sets the hash 3599 * entries on "ipp"'s ipsp_pol_hval. 3600 * 3601 * Returns B_TRUE if it can be entered, B_FALSE if it can't be (because a 3602 * duplicate policy exists with exactly the same selectors), or an icmp 3603 * rule exists with a different encryption/authentication action. 3604 */ 3605 boolean_t 3606 ipsec_check_policy(ipsec_policy_head_t *php, ipsec_policy_t *ipp, int direction) 3607 { 3608 ipsec_policy_root_t *pr = &php->iph_root[direction]; 3609 int af = -1; 3610 ipsec_policy_t *p2, *head; 3611 uint8_t check_proto; 3612 ipsec_selkey_t *selkey = &ipp->ipsp_sel->ipsl_key; 3613 uint32_t valid = selkey->ipsl_valid; 3614 3615 if (valid & IPSL_IPV6) { 3616 ASSERT(!(valid & IPSL_IPV4)); 3617 af = IPSEC_AF_V6; 3618 check_proto = IPPROTO_ICMPV6; 3619 } else { 3620 ASSERT(valid & IPSL_IPV4); 3621 af = IPSEC_AF_V4; 3622 check_proto = IPPROTO_ICMP; 3623 } 3624 3625 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3626 3627 /* 3628 * Double-check that we don't have any duplicate selectors here. 3629 * Because selectors are interned below, we need only compare pointers 3630 * for equality. 3631 */ 3632 if (selkey->ipsl_sel_hval == IPSEC_SEL_NOHASH) { 3633 head = pr->ipr_nonhash[af]; 3634 } else { 3635 selkey->ipsl_pol_hval = 3636 (selkey->ipsl_valid & IPSL_IPV4) ? 3637 IPSEC_IPV4_HASH(selkey->ipsl_remote.ipsad_v4, 3638 pr->ipr_nchains) : 3639 IPSEC_IPV6_HASH(selkey->ipsl_remote.ipsad_v6, 3640 pr->ipr_nchains); 3641 3642 head = pr->ipr_hash[selkey->ipsl_pol_hval].hash_head; 3643 } 3644 3645 for (p2 = head; p2 != NULL; p2 = p2->ipsp_hash.hash_next) { 3646 if (p2->ipsp_sel == ipp->ipsp_sel) 3647 return (B_FALSE); 3648 } 3649 3650 /* 3651 * If it's ICMP and not a drop or pass rule, run through the ICMP 3652 * rules and make sure the action is either new or the same as any 3653 * other actions. We don't have to check the full chain because 3654 * discard and bypass will override all other actions 3655 */ 3656 3657 if (valid & IPSL_PROTOCOL && 3658 selkey->ipsl_proto == check_proto && 3659 (ipp->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_APPLY)) { 3660 3661 for (p2 = head; p2 != NULL; p2 = p2->ipsp_hash.hash_next) { 3662 3663 if (p2->ipsp_sel->ipsl_key.ipsl_valid & IPSL_PROTOCOL && 3664 p2->ipsp_sel->ipsl_key.ipsl_proto == check_proto && 3665 (p2->ipsp_act->ipa_act.ipa_type == 3666 IPSEC_ACT_APPLY)) { 3667 return (ipsec_compare_action(p2, ipp)); 3668 } 3669 } 3670 } 3671 3672 return (B_TRUE); 3673 } 3674 3675 /* 3676 * compare the action chains of two policies for equality 3677 * B_TRUE -> effective equality 3678 */ 3679 3680 static boolean_t 3681 ipsec_compare_action(ipsec_policy_t *p1, ipsec_policy_t *p2) 3682 { 3683 3684 ipsec_action_t *act1, *act2; 3685 3686 /* We have a valid rule. Let's compare the actions */ 3687 if (p1->ipsp_act == p2->ipsp_act) { 3688 /* same action. We are good */ 3689 return (B_TRUE); 3690 } 3691 3692 /* we have to walk the chain */ 3693 3694 act1 = p1->ipsp_act; 3695 act2 = p2->ipsp_act; 3696 3697 while (act1 != NULL && act2 != NULL) { 3698 3699 /* otherwise, Are we close enough? */ 3700 if (act1->ipa_allow_clear != act2->ipa_allow_clear || 3701 act1->ipa_want_ah != act2->ipa_want_ah || 3702 act1->ipa_want_esp != act2->ipa_want_esp || 3703 act1->ipa_want_se != act2->ipa_want_se) { 3704 /* Nope, we aren't */ 3705 return (B_FALSE); 3706 } 3707 3708 if (act1->ipa_want_ah) { 3709 if (act1->ipa_act.ipa_apply.ipp_auth_alg != 3710 act2->ipa_act.ipa_apply.ipp_auth_alg) { 3711 return (B_FALSE); 3712 } 3713 3714 if (act1->ipa_act.ipa_apply.ipp_ah_minbits != 3715 act2->ipa_act.ipa_apply.ipp_ah_minbits || 3716 act1->ipa_act.ipa_apply.ipp_ah_maxbits != 3717 act2->ipa_act.ipa_apply.ipp_ah_maxbits) { 3718 return (B_FALSE); 3719 } 3720 } 3721 3722 if (act1->ipa_want_esp) { 3723 if (act1->ipa_act.ipa_apply.ipp_use_esp != 3724 act2->ipa_act.ipa_apply.ipp_use_esp || 3725 act1->ipa_act.ipa_apply.ipp_use_espa != 3726 act2->ipa_act.ipa_apply.ipp_use_espa) { 3727 return (B_FALSE); 3728 } 3729 3730 if (act1->ipa_act.ipa_apply.ipp_use_esp) { 3731 if (act1->ipa_act.ipa_apply.ipp_encr_alg != 3732 act2->ipa_act.ipa_apply.ipp_encr_alg) { 3733 return (B_FALSE); 3734 } 3735 3736 if (act1->ipa_act.ipa_apply.ipp_espe_minbits != 3737 act2->ipa_act.ipa_apply.ipp_espe_minbits || 3738 act1->ipa_act.ipa_apply.ipp_espe_maxbits != 3739 act2->ipa_act.ipa_apply.ipp_espe_maxbits) { 3740 return (B_FALSE); 3741 } 3742 } 3743 3744 if (act1->ipa_act.ipa_apply.ipp_use_espa) { 3745 if (act1->ipa_act.ipa_apply.ipp_esp_auth_alg != 3746 act2->ipa_act.ipa_apply.ipp_esp_auth_alg) { 3747 return (B_FALSE); 3748 } 3749 3750 if (act1->ipa_act.ipa_apply.ipp_espa_minbits != 3751 act2->ipa_act.ipa_apply.ipp_espa_minbits || 3752 act1->ipa_act.ipa_apply.ipp_espa_maxbits != 3753 act2->ipa_act.ipa_apply.ipp_espa_maxbits) { 3754 return (B_FALSE); 3755 } 3756 } 3757 3758 } 3759 3760 act1 = act1->ipa_next; 3761 act2 = act2->ipa_next; 3762 } 3763 3764 if (act1 != NULL || act2 != NULL) { 3765 return (B_FALSE); 3766 } 3767 3768 return (B_TRUE); 3769 } 3770 3771 3772 /* 3773 * Given a constructed ipsec_policy_t policy rule, enter it into 3774 * the correct policy ruleset. 3775 * 3776 * ipsec_check_policy() is assumed to have succeeded first (to check for 3777 * duplicates). 3778 */ 3779 void 3780 ipsec_enter_policy(ipsec_policy_head_t *php, ipsec_policy_t *ipp, int direction, 3781 netstack_t *ns) 3782 { 3783 ipsec_policy_root_t *pr = &php->iph_root[direction]; 3784 ipsec_selkey_t *selkey = &ipp->ipsp_sel->ipsl_key; 3785 uint32_t valid = selkey->ipsl_valid; 3786 uint32_t hval = selkey->ipsl_pol_hval; 3787 int af = -1; 3788 3789 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3790 3791 if (valid & IPSL_IPV6) { 3792 ASSERT(!(valid & IPSL_IPV4)); 3793 af = IPSEC_AF_V6; 3794 } else { 3795 ASSERT(valid & IPSL_IPV4); 3796 af = IPSEC_AF_V4; 3797 } 3798 3799 php->iph_gen++; 3800 3801 if (hval == IPSEC_SEL_NOHASH) { 3802 HASHLIST_INSERT(ipp, ipsp_hash, pr->ipr_nonhash[af]); 3803 } else { 3804 HASH_LOCK(pr->ipr_hash, hval); 3805 HASH_INSERT(ipp, ipsp_hash, pr->ipr_hash, hval); 3806 HASH_UNLOCK(pr->ipr_hash, hval); 3807 } 3808 3809 ipsec_insert_always(&php->iph_rulebyid, ipp); 3810 3811 ipsec_update_present_flags(ns->netstack_ipsec); 3812 } 3813 3814 static void 3815 ipsec_ipr_flush(ipsec_policy_head_t *php, ipsec_policy_root_t *ipr, 3816 netstack_t *ns) 3817 { 3818 ipsec_policy_t *ip, *nip; 3819 int af, chain, nchain; 3820 3821 for (af = 0; af < IPSEC_NAF; af++) { 3822 for (ip = ipr->ipr_nonhash[af]; ip != NULL; ip = nip) { 3823 nip = ip->ipsp_hash.hash_next; 3824 IPPOL_UNCHAIN(php, ip, ns); 3825 } 3826 ipr->ipr_nonhash[af] = NULL; 3827 } 3828 nchain = ipr->ipr_nchains; 3829 3830 for (chain = 0; chain < nchain; chain++) { 3831 for (ip = ipr->ipr_hash[chain].hash_head; ip != NULL; 3832 ip = nip) { 3833 nip = ip->ipsp_hash.hash_next; 3834 IPPOL_UNCHAIN(php, ip, ns); 3835 } 3836 ipr->ipr_hash[chain].hash_head = NULL; 3837 } 3838 } 3839 3840 /* 3841 * Create and insert inbound or outbound policy associated with actp for the 3842 * address family fam into the policy head ph. Returns B_TRUE if policy was 3843 * inserted, and B_FALSE otherwise. 3844 */ 3845 boolean_t 3846 ipsec_polhead_insert(ipsec_policy_head_t *ph, ipsec_act_t *actp, uint_t nact, 3847 int fam, int ptype, netstack_t *ns) 3848 { 3849 ipsec_selkey_t sel; 3850 ipsec_policy_t *pol; 3851 ipsec_policy_root_t *pr; 3852 3853 bzero(&sel, sizeof (sel)); 3854 sel.ipsl_valid = (fam == IPSEC_AF_V4 ? IPSL_IPV4 : IPSL_IPV6); 3855 if ((pol = ipsec_policy_create(&sel, actp, nact, IPSEC_PRIO_SOCKET, 3856 NULL, ns)) != NULL) { 3857 pr = &ph->iph_root[ptype]; 3858 HASHLIST_INSERT(pol, ipsp_hash, pr->ipr_nonhash[fam]); 3859 ipsec_insert_always(&ph->iph_rulebyid, pol); 3860 } 3861 return (pol != NULL); 3862 } 3863 3864 void 3865 ipsec_polhead_flush(ipsec_policy_head_t *php, netstack_t *ns) 3866 { 3867 int dir; 3868 3869 ASSERT(RW_WRITE_HELD(&php->iph_lock)); 3870 3871 for (dir = 0; dir < IPSEC_NTYPES; dir++) 3872 ipsec_ipr_flush(php, &php->iph_root[dir], ns); 3873 3874 ipsec_update_present_flags(ns->netstack_ipsec); 3875 } 3876 3877 void 3878 ipsec_polhead_free(ipsec_policy_head_t *php, netstack_t *ns) 3879 { 3880 int dir; 3881 3882 ASSERT(php->iph_refs == 0); 3883 3884 rw_enter(&php->iph_lock, RW_WRITER); 3885 ipsec_polhead_flush(php, ns); 3886 rw_exit(&php->iph_lock); 3887 rw_destroy(&php->iph_lock); 3888 for (dir = 0; dir < IPSEC_NTYPES; dir++) { 3889 ipsec_policy_root_t *ipr = &php->iph_root[dir]; 3890 int chain; 3891 3892 for (chain = 0; chain < ipr->ipr_nchains; chain++) 3893 mutex_destroy(&(ipr->ipr_hash[chain].hash_lock)); 3894 3895 } 3896 ipsec_polhead_free_table(php); 3897 kmem_free(php, sizeof (*php)); 3898 } 3899 3900 static void 3901 ipsec_ipr_init(ipsec_policy_root_t *ipr) 3902 { 3903 int af; 3904 3905 ipr->ipr_nchains = 0; 3906 ipr->ipr_hash = NULL; 3907 3908 for (af = 0; af < IPSEC_NAF; af++) { 3909 ipr->ipr_nonhash[af] = NULL; 3910 } 3911 } 3912 3913 ipsec_policy_head_t * 3914 ipsec_polhead_create(void) 3915 { 3916 ipsec_policy_head_t *php; 3917 3918 php = kmem_alloc(sizeof (*php), KM_NOSLEEP); 3919 if (php == NULL) 3920 return (php); 3921 3922 rw_init(&php->iph_lock, NULL, RW_DEFAULT, NULL); 3923 php->iph_refs = 1; 3924 php->iph_gen = 0; 3925 3926 ipsec_ipr_init(&php->iph_root[IPSEC_TYPE_INBOUND]); 3927 ipsec_ipr_init(&php->iph_root[IPSEC_TYPE_OUTBOUND]); 3928 3929 avl_create(&php->iph_rulebyid, ipsec_policy_cmpbyid, 3930 sizeof (ipsec_policy_t), offsetof(ipsec_policy_t, ipsp_byid)); 3931 3932 return (php); 3933 } 3934 3935 /* 3936 * Clone the policy head into a new polhead; release one reference to the 3937 * old one and return the only reference to the new one. 3938 * If the old one had a refcount of 1, just return it. 3939 */ 3940 ipsec_policy_head_t * 3941 ipsec_polhead_split(ipsec_policy_head_t *php, netstack_t *ns) 3942 { 3943 ipsec_policy_head_t *nphp; 3944 3945 if (php == NULL) 3946 return (ipsec_polhead_create()); 3947 else if (php->iph_refs == 1) 3948 return (php); 3949 3950 nphp = ipsec_polhead_create(); 3951 if (nphp == NULL) 3952 return (NULL); 3953 3954 if (ipsec_copy_polhead(php, nphp, ns) != 0) { 3955 ipsec_polhead_free(nphp, ns); 3956 return (NULL); 3957 } 3958 IPPH_REFRELE(php, ns); 3959 return (nphp); 3960 } 3961 3962 /* 3963 * When sending a response to a ICMP request or generating a RST 3964 * in the TCP case, the outbound packets need to go at the same level 3965 * of protection as the incoming ones i.e we associate our outbound 3966 * policy with how the packet came in. We call this after we have 3967 * accepted the incoming packet which may or may not have been in 3968 * clear and hence we are sending the reply back with the policy 3969 * matching the incoming datagram's policy. 3970 * 3971 * NOTE : This technology serves two purposes : 3972 * 3973 * 1) If we have multiple outbound policies, we send out a reply 3974 * matching with how it came in rather than matching the outbound 3975 * policy. 3976 * 3977 * 2) For assymetric policies, we want to make sure that incoming 3978 * and outgoing has the same level of protection. Assymetric 3979 * policies exist only with global policy where we may not have 3980 * both outbound and inbound at the same time. 3981 * 3982 * NOTE2: This function is called by cleartext cases, so it needs to be 3983 * in IP proper. 3984 */ 3985 boolean_t 3986 ipsec_in_to_out(mblk_t *ipsec_mp, ipha_t *ipha, ip6_t *ip6h) 3987 { 3988 ipsec_in_t *ii; 3989 ipsec_out_t *io; 3990 boolean_t v4; 3991 mblk_t *mp; 3992 boolean_t secure; 3993 uint_t ifindex; 3994 ipsec_selector_t sel; 3995 ipsec_action_t *reflect_action = NULL; 3996 zoneid_t zoneid; 3997 netstack_t *ns; 3998 3999 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 4000 4001 bzero((void*)&sel, sizeof (sel)); 4002 4003 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 4004 4005 mp = ipsec_mp->b_cont; 4006 ASSERT(mp != NULL); 4007 4008 if (ii->ipsec_in_action != NULL) { 4009 /* transfer reference.. */ 4010 reflect_action = ii->ipsec_in_action; 4011 ii->ipsec_in_action = NULL; 4012 } else if (!ii->ipsec_in_loopback) 4013 reflect_action = ipsec_in_to_out_action(ii); 4014 secure = ii->ipsec_in_secure; 4015 ifindex = ii->ipsec_in_ill_index; 4016 zoneid = ii->ipsec_in_zoneid; 4017 ASSERT(zoneid != ALL_ZONES); 4018 ns = ii->ipsec_in_ns; 4019 v4 = ii->ipsec_in_v4; 4020 4021 ipsec_in_release_refs(ii); /* No netstack_rele/hold needed */ 4022 4023 /* 4024 * The caller is going to send the datagram out which might 4025 * go on the wire or delivered locally through ip_wput_local. 4026 * 4027 * 1) If it goes out on the wire, new associations will be 4028 * obtained. 4029 * 2) If it is delivered locally, ip_wput_local will convert 4030 * this IPSEC_OUT to a IPSEC_IN looking at the requests. 4031 */ 4032 4033 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4034 bzero(io, sizeof (ipsec_out_t)); 4035 io->ipsec_out_type = IPSEC_OUT; 4036 io->ipsec_out_len = sizeof (ipsec_out_t); 4037 io->ipsec_out_frtn.free_func = ipsec_out_free; 4038 io->ipsec_out_frtn.free_arg = (char *)io; 4039 io->ipsec_out_act = reflect_action; 4040 4041 if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0, 4042 ns->netstack_ipsec)) 4043 return (B_FALSE); 4044 4045 io->ipsec_out_src_port = sel.ips_local_port; 4046 io->ipsec_out_dst_port = sel.ips_remote_port; 4047 io->ipsec_out_proto = sel.ips_protocol; 4048 io->ipsec_out_icmp_type = sel.ips_icmp_type; 4049 io->ipsec_out_icmp_code = sel.ips_icmp_code; 4050 4051 /* 4052 * Don't use global policy for this, as we want 4053 * to use the same protection that was applied to the inbound packet. 4054 */ 4055 io->ipsec_out_use_global_policy = B_FALSE; 4056 io->ipsec_out_proc_begin = B_FALSE; 4057 io->ipsec_out_secure = secure; 4058 io->ipsec_out_v4 = v4; 4059 io->ipsec_out_ill_index = ifindex; 4060 io->ipsec_out_zoneid = zoneid; 4061 io->ipsec_out_ns = ns; /* No netstack_hold */ 4062 4063 return (B_TRUE); 4064 } 4065 4066 mblk_t * 4067 ipsec_in_tag(mblk_t *mp, mblk_t *cont, netstack_t *ns) 4068 { 4069 ipsec_in_t *ii = (ipsec_in_t *)mp->b_rptr; 4070 ipsec_in_t *nii; 4071 mblk_t *nmp; 4072 frtn_t nfrtn; 4073 ipsec_stack_t *ipss = ns->netstack_ipsec; 4074 4075 ASSERT(ii->ipsec_in_type == IPSEC_IN); 4076 ASSERT(ii->ipsec_in_len == sizeof (ipsec_in_t)); 4077 4078 nmp = ipsec_in_alloc(ii->ipsec_in_v4, ns); 4079 if (nmp == NULL) { 4080 ip_drop_packet_chain(cont, B_FALSE, NULL, NULL, 4081 DROPPER(ipss, ipds_spd_nomem), 4082 &ipss->ipsec_spd_dropper); 4083 return (NULL); 4084 } 4085 4086 ASSERT(nmp->b_datap->db_type == M_CTL); 4087 ASSERT(nmp->b_wptr == (nmp->b_rptr + sizeof (ipsec_info_t))); 4088 4089 /* 4090 * Bump refcounts. 4091 */ 4092 if (ii->ipsec_in_ah_sa != NULL) 4093 IPSA_REFHOLD(ii->ipsec_in_ah_sa); 4094 if (ii->ipsec_in_esp_sa != NULL) 4095 IPSA_REFHOLD(ii->ipsec_in_esp_sa); 4096 if (ii->ipsec_in_policy != NULL) 4097 IPPH_REFHOLD(ii->ipsec_in_policy); 4098 4099 /* 4100 * Copy everything, but preserve the free routine provided by 4101 * ipsec_in_alloc(). 4102 */ 4103 nii = (ipsec_in_t *)nmp->b_rptr; 4104 nfrtn = nii->ipsec_in_frtn; 4105 bcopy(ii, nii, sizeof (*ii)); 4106 nii->ipsec_in_frtn = nfrtn; 4107 4108 nmp->b_cont = cont; 4109 4110 return (nmp); 4111 } 4112 4113 mblk_t * 4114 ipsec_out_tag(mblk_t *mp, mblk_t *cont, netstack_t *ns) 4115 { 4116 ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; 4117 ipsec_out_t *nio; 4118 mblk_t *nmp; 4119 frtn_t nfrtn; 4120 ipsec_stack_t *ipss = ns->netstack_ipsec; 4121 4122 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4123 ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t)); 4124 4125 nmp = ipsec_alloc_ipsec_out(ns); 4126 if (nmp == NULL) { 4127 ip_drop_packet_chain(cont, B_FALSE, NULL, NULL, 4128 DROPPER(ipss, ipds_spd_nomem), 4129 &ipss->ipsec_spd_dropper); 4130 return (NULL); 4131 } 4132 ASSERT(nmp->b_datap->db_type == M_CTL); 4133 ASSERT(nmp->b_wptr == (nmp->b_rptr + sizeof (ipsec_info_t))); 4134 4135 /* 4136 * Bump refcounts. 4137 */ 4138 if (io->ipsec_out_ah_sa != NULL) 4139 IPSA_REFHOLD(io->ipsec_out_ah_sa); 4140 if (io->ipsec_out_esp_sa != NULL) 4141 IPSA_REFHOLD(io->ipsec_out_esp_sa); 4142 if (io->ipsec_out_polhead != NULL) 4143 IPPH_REFHOLD(io->ipsec_out_polhead); 4144 if (io->ipsec_out_policy != NULL) 4145 IPPOL_REFHOLD(io->ipsec_out_policy); 4146 if (io->ipsec_out_act != NULL) 4147 IPACT_REFHOLD(io->ipsec_out_act); 4148 if (io->ipsec_out_latch != NULL) 4149 IPLATCH_REFHOLD(io->ipsec_out_latch); 4150 if (io->ipsec_out_cred != NULL) 4151 crhold(io->ipsec_out_cred); 4152 4153 /* 4154 * Copy everything, but preserve the free routine provided by 4155 * ipsec_alloc_ipsec_out(). 4156 */ 4157 nio = (ipsec_out_t *)nmp->b_rptr; 4158 nfrtn = nio->ipsec_out_frtn; 4159 bcopy(io, nio, sizeof (*io)); 4160 nio->ipsec_out_frtn = nfrtn; 4161 4162 nmp->b_cont = cont; 4163 4164 return (nmp); 4165 } 4166 4167 static void 4168 ipsec_out_release_refs(ipsec_out_t *io) 4169 { 4170 netstack_t *ns = io->ipsec_out_ns; 4171 4172 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4173 ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t)); 4174 ASSERT(io->ipsec_out_ns != NULL); 4175 4176 /* Note: IPSA_REFRELE is multi-line macro */ 4177 if (io->ipsec_out_ah_sa != NULL) 4178 IPSA_REFRELE(io->ipsec_out_ah_sa); 4179 if (io->ipsec_out_esp_sa != NULL) 4180 IPSA_REFRELE(io->ipsec_out_esp_sa); 4181 if (io->ipsec_out_polhead != NULL) 4182 IPPH_REFRELE(io->ipsec_out_polhead, ns); 4183 if (io->ipsec_out_policy != NULL) 4184 IPPOL_REFRELE(io->ipsec_out_policy, ns); 4185 if (io->ipsec_out_act != NULL) 4186 IPACT_REFRELE(io->ipsec_out_act); 4187 if (io->ipsec_out_cred != NULL) { 4188 crfree(io->ipsec_out_cred); 4189 io->ipsec_out_cred = NULL; 4190 } 4191 if (io->ipsec_out_latch) { 4192 IPLATCH_REFRELE(io->ipsec_out_latch, ns); 4193 io->ipsec_out_latch = NULL; 4194 } 4195 } 4196 4197 static void 4198 ipsec_out_free(void *arg) 4199 { 4200 ipsec_out_t *io = (ipsec_out_t *)arg; 4201 ipsec_out_release_refs(io); 4202 kmem_cache_free(ipsec_info_cache, arg); 4203 } 4204 4205 static void 4206 ipsec_in_release_refs(ipsec_in_t *ii) 4207 { 4208 netstack_t *ns = ii->ipsec_in_ns; 4209 4210 ASSERT(ii->ipsec_in_ns != NULL); 4211 4212 /* Note: IPSA_REFRELE is multi-line macro */ 4213 if (ii->ipsec_in_ah_sa != NULL) 4214 IPSA_REFRELE(ii->ipsec_in_ah_sa); 4215 if (ii->ipsec_in_esp_sa != NULL) 4216 IPSA_REFRELE(ii->ipsec_in_esp_sa); 4217 if (ii->ipsec_in_policy != NULL) 4218 IPPH_REFRELE(ii->ipsec_in_policy, ns); 4219 if (ii->ipsec_in_da != NULL) { 4220 freeb(ii->ipsec_in_da); 4221 ii->ipsec_in_da = NULL; 4222 } 4223 } 4224 4225 static void 4226 ipsec_in_free(void *arg) 4227 { 4228 ipsec_in_t *ii = (ipsec_in_t *)arg; 4229 ipsec_in_release_refs(ii); 4230 kmem_cache_free(ipsec_info_cache, arg); 4231 } 4232 4233 /* 4234 * This is called only for outbound datagrams if the datagram needs to 4235 * go out secure. A NULL mp can be passed to get an ipsec_out. This 4236 * facility is used by ip_unbind. 4237 * 4238 * NOTE : o As the data part could be modified by ipsec_out_process etc. 4239 * we can't make it fast by calling a dup. 4240 */ 4241 mblk_t * 4242 ipsec_alloc_ipsec_out(netstack_t *ns) 4243 { 4244 mblk_t *ipsec_mp; 4245 ipsec_out_t *io = kmem_cache_alloc(ipsec_info_cache, KM_NOSLEEP); 4246 4247 if (io == NULL) 4248 return (NULL); 4249 4250 bzero(io, sizeof (ipsec_out_t)); 4251 4252 io->ipsec_out_type = IPSEC_OUT; 4253 io->ipsec_out_len = sizeof (ipsec_out_t); 4254 io->ipsec_out_frtn.free_func = ipsec_out_free; 4255 io->ipsec_out_frtn.free_arg = (char *)io; 4256 4257 /* 4258 * Set the zoneid to ALL_ZONES which is used as an invalid value. Code 4259 * using ipsec_out_zoneid should assert that the zoneid has been set to 4260 * a sane value. 4261 */ 4262 io->ipsec_out_zoneid = ALL_ZONES; 4263 io->ipsec_out_ns = ns; /* No netstack_hold */ 4264 4265 ipsec_mp = desballoc((uint8_t *)io, sizeof (ipsec_info_t), BPRI_HI, 4266 &io->ipsec_out_frtn); 4267 if (ipsec_mp == NULL) { 4268 ipsec_out_free(io); 4269 4270 return (NULL); 4271 } 4272 ipsec_mp->b_datap->db_type = M_CTL; 4273 ipsec_mp->b_wptr = ipsec_mp->b_rptr + sizeof (ipsec_info_t); 4274 4275 return (ipsec_mp); 4276 } 4277 4278 /* 4279 * Attach an IPSEC_OUT; use pol for policy if it is non-null. 4280 * Otherwise initialize using conn. 4281 * 4282 * If pol is non-null, we consume a reference to it. 4283 */ 4284 mblk_t * 4285 ipsec_attach_ipsec_out(mblk_t **mp, conn_t *connp, ipsec_policy_t *pol, 4286 uint8_t proto, netstack_t *ns) 4287 { 4288 mblk_t *ipsec_mp; 4289 ipsec_stack_t *ipss = ns->netstack_ipsec; 4290 4291 ASSERT((pol != NULL) || (connp != NULL)); 4292 4293 ipsec_mp = ipsec_alloc_ipsec_out(ns); 4294 if (ipsec_mp == NULL) { 4295 ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, SL_ERROR|SL_NOTE, 4296 "ipsec_attach_ipsec_out: Allocation failure\n"); 4297 ip_drop_packet(*mp, B_FALSE, NULL, NULL, 4298 DROPPER(ipss, ipds_spd_nomem), 4299 &ipss->ipsec_spd_dropper); 4300 *mp = NULL; 4301 return (NULL); 4302 } 4303 ipsec_mp->b_cont = *mp; 4304 /* 4305 * If *mp is NULL, ipsec_init_ipsec_out() won't/should not be using it. 4306 */ 4307 return (ipsec_init_ipsec_out(ipsec_mp, mp, connp, pol, proto, ns)); 4308 } 4309 4310 /* 4311 * Initialize the IPSEC_OUT (ipsec_mp) using pol if it is non-null. 4312 * Otherwise initialize using conn. 4313 * 4314 * If pol is non-null, we consume a reference to it. 4315 */ 4316 mblk_t * 4317 ipsec_init_ipsec_out(mblk_t *ipsec_mp, mblk_t **mp, conn_t *connp, 4318 ipsec_policy_t *pol, uint8_t proto, netstack_t *ns) 4319 { 4320 ipsec_out_t *io; 4321 ipsec_policy_t *p; 4322 ipha_t *ipha; 4323 ip6_t *ip6h; 4324 ipsec_stack_t *ipss = ns->netstack_ipsec; 4325 4326 ASSERT(ipsec_mp->b_cont == *mp); 4327 4328 ASSERT((pol != NULL) || (connp != NULL)); 4329 4330 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 4331 ASSERT(ipsec_mp->b_wptr == (ipsec_mp->b_rptr + sizeof (ipsec_info_t))); 4332 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4333 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4334 ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t)); 4335 io->ipsec_out_latch = NULL; 4336 /* 4337 * Set the zoneid when we have the connp. 4338 * Otherwise, we're called from ip_wput_attach_policy() who will take 4339 * care of setting the zoneid. 4340 */ 4341 if (connp != NULL) 4342 io->ipsec_out_zoneid = connp->conn_zoneid; 4343 4344 io->ipsec_out_ns = ns; /* No netstack_hold */ 4345 4346 if (*mp != NULL) { 4347 ipha = (ipha_t *)(*mp)->b_rptr; 4348 if (IPH_HDR_VERSION(ipha) == IP_VERSION) { 4349 io->ipsec_out_v4 = B_TRUE; 4350 ip6h = NULL; 4351 } else { 4352 io->ipsec_out_v4 = B_FALSE; 4353 ip6h = (ip6_t *)ipha; 4354 ipha = NULL; 4355 } 4356 } else { 4357 ASSERT(connp != NULL && connp->conn_policy_cached); 4358 ip6h = NULL; 4359 ipha = NULL; 4360 io->ipsec_out_v4 = !connp->conn_pkt_isv6; 4361 } 4362 4363 p = NULL; 4364 4365 /* 4366 * Take latched policies over global policy. Check here again for 4367 * this, in case we had conn_latch set while the packet was flying 4368 * around in IP. 4369 */ 4370 if (connp != NULL && connp->conn_latch != NULL) { 4371 ASSERT(ns == connp->conn_netstack); 4372 p = connp->conn_latch->ipl_out_policy; 4373 io->ipsec_out_latch = connp->conn_latch; 4374 IPLATCH_REFHOLD(connp->conn_latch); 4375 if (p != NULL) { 4376 IPPOL_REFHOLD(p); 4377 } 4378 io->ipsec_out_src_port = connp->conn_lport; 4379 io->ipsec_out_dst_port = connp->conn_fport; 4380 io->ipsec_out_icmp_type = io->ipsec_out_icmp_code = 0; 4381 if (pol != NULL) 4382 IPPOL_REFRELE(pol, ns); 4383 } else if (pol != NULL) { 4384 ipsec_selector_t sel; 4385 4386 bzero((void*)&sel, sizeof (sel)); 4387 4388 p = pol; 4389 /* 4390 * conn does not have the port information. Get 4391 * it from the packet. 4392 */ 4393 4394 if (!ipsec_init_outbound_ports(&sel, *mp, ipha, ip6h, 0, 4395 ns->netstack_ipsec)) { 4396 /* Callee did ip_drop_packet() on *mp. */ 4397 *mp = NULL; 4398 freeb(ipsec_mp); 4399 return (NULL); 4400 } 4401 io->ipsec_out_src_port = sel.ips_local_port; 4402 io->ipsec_out_dst_port = sel.ips_remote_port; 4403 io->ipsec_out_icmp_type = sel.ips_icmp_type; 4404 io->ipsec_out_icmp_code = sel.ips_icmp_code; 4405 } 4406 4407 io->ipsec_out_proto = proto; 4408 io->ipsec_out_use_global_policy = B_TRUE; 4409 io->ipsec_out_secure = (p != NULL); 4410 io->ipsec_out_policy = p; 4411 4412 if (p == NULL) { 4413 if (connp->conn_policy != NULL) { 4414 io->ipsec_out_secure = B_TRUE; 4415 ASSERT(io->ipsec_out_latch == NULL); 4416 ASSERT(io->ipsec_out_use_global_policy == B_TRUE); 4417 io->ipsec_out_need_policy = B_TRUE; 4418 ASSERT(io->ipsec_out_polhead == NULL); 4419 IPPH_REFHOLD(connp->conn_policy); 4420 io->ipsec_out_polhead = connp->conn_policy; 4421 } 4422 } else { 4423 /* Handle explicit drop action. */ 4424 if (p->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_DISCARD || 4425 p->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_REJECT) { 4426 ip_drop_packet(ipsec_mp, B_FALSE, NULL, NULL, 4427 DROPPER(ipss, ipds_spd_explicit), 4428 &ipss->ipsec_spd_dropper); 4429 *mp = NULL; 4430 ipsec_mp = NULL; 4431 } 4432 } 4433 4434 return (ipsec_mp); 4435 } 4436 4437 /* 4438 * Allocate an IPSEC_IN mblk. This will be prepended to an inbound datagram 4439 * and keep track of what-if-any IPsec processing will be applied to the 4440 * datagram. 4441 */ 4442 mblk_t * 4443 ipsec_in_alloc(boolean_t isv4, netstack_t *ns) 4444 { 4445 mblk_t *ipsec_in; 4446 ipsec_in_t *ii = kmem_cache_alloc(ipsec_info_cache, KM_NOSLEEP); 4447 4448 if (ii == NULL) 4449 return (NULL); 4450 4451 bzero(ii, sizeof (ipsec_info_t)); 4452 ii->ipsec_in_type = IPSEC_IN; 4453 ii->ipsec_in_len = sizeof (ipsec_in_t); 4454 4455 ii->ipsec_in_v4 = isv4; 4456 ii->ipsec_in_secure = B_TRUE; 4457 ii->ipsec_in_ns = ns; /* No netstack_hold */ 4458 ii->ipsec_in_stackid = ns->netstack_stackid; 4459 4460 ii->ipsec_in_frtn.free_func = ipsec_in_free; 4461 ii->ipsec_in_frtn.free_arg = (char *)ii; 4462 4463 ipsec_in = desballoc((uint8_t *)ii, sizeof (ipsec_info_t), BPRI_HI, 4464 &ii->ipsec_in_frtn); 4465 if (ipsec_in == NULL) { 4466 ip1dbg(("ipsec_in_alloc: IPSEC_IN allocation failure.\n")); 4467 ipsec_in_free(ii); 4468 return (NULL); 4469 } 4470 4471 ipsec_in->b_datap->db_type = M_CTL; 4472 ipsec_in->b_wptr += sizeof (ipsec_info_t); 4473 4474 return (ipsec_in); 4475 } 4476 4477 /* 4478 * This is called from ip_wput_local when a packet which needs 4479 * security is looped back, to convert the IPSEC_OUT to a IPSEC_IN 4480 * before fanout, where the policy check happens. In most of the 4481 * cases, IPSEC processing has *never* been done. There is one case 4482 * (ip_wput_ire_fragmentit -> ip_wput_frag -> icmp_frag_needed) where 4483 * the packet is destined for localhost, IPSEC processing has already 4484 * been done. 4485 * 4486 * Future: This could happen after SA selection has occurred for 4487 * outbound.. which will tell us who the src and dst identities are.. 4488 * Then it's just a matter of splicing the ah/esp SA pointers from the 4489 * ipsec_out_t to the ipsec_in_t. 4490 */ 4491 void 4492 ipsec_out_to_in(mblk_t *ipsec_mp) 4493 { 4494 ipsec_in_t *ii; 4495 ipsec_out_t *io; 4496 ipsec_policy_t *pol; 4497 ipsec_action_t *act; 4498 boolean_t v4, icmp_loopback; 4499 zoneid_t zoneid; 4500 netstack_t *ns; 4501 4502 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 4503 4504 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4505 4506 v4 = io->ipsec_out_v4; 4507 zoneid = io->ipsec_out_zoneid; 4508 icmp_loopback = io->ipsec_out_icmp_loopback; 4509 ns = io->ipsec_out_ns; 4510 4511 act = io->ipsec_out_act; 4512 if (act == NULL) { 4513 pol = io->ipsec_out_policy; 4514 if (pol != NULL) { 4515 act = pol->ipsp_act; 4516 IPACT_REFHOLD(act); 4517 } 4518 } 4519 io->ipsec_out_act = NULL; 4520 4521 ipsec_out_release_refs(io); /* No netstack_rele/hold needed */ 4522 4523 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 4524 bzero(ii, sizeof (ipsec_in_t)); 4525 ii->ipsec_in_type = IPSEC_IN; 4526 ii->ipsec_in_len = sizeof (ipsec_in_t); 4527 ii->ipsec_in_loopback = B_TRUE; 4528 ii->ipsec_in_ns = ns; /* No netstack_hold */ 4529 4530 ii->ipsec_in_frtn.free_func = ipsec_in_free; 4531 ii->ipsec_in_frtn.free_arg = (char *)ii; 4532 ii->ipsec_in_action = act; 4533 ii->ipsec_in_zoneid = zoneid; 4534 4535 /* 4536 * In most of the cases, we can't look at the ipsec_out_XXX_sa 4537 * because this never went through IPSEC processing. So, look at 4538 * the requests and infer whether it would have gone through 4539 * IPSEC processing or not. Initialize the "done" fields with 4540 * the requests. The possible values for "done" fields are : 4541 * 4542 * 1) zero, indicates that a particular preference was never 4543 * requested. 4544 * 2) non-zero, indicates that it could be IPSEC_PREF_REQUIRED/ 4545 * IPSEC_PREF_NEVER. If IPSEC_REQ_DONE is set, it means that 4546 * IPSEC processing has been completed. 4547 */ 4548 ii->ipsec_in_secure = B_TRUE; 4549 ii->ipsec_in_v4 = v4; 4550 ii->ipsec_in_icmp_loopback = icmp_loopback; 4551 } 4552 4553 /* 4554 * Consults global policy to see whether this datagram should 4555 * go out secure. If so it attaches a ipsec_mp in front and 4556 * returns. 4557 */ 4558 mblk_t * 4559 ip_wput_attach_policy(mblk_t *ipsec_mp, ipha_t *ipha, ip6_t *ip6h, ire_t *ire, 4560 conn_t *connp, boolean_t unspec_src, zoneid_t zoneid) 4561 { 4562 mblk_t *mp; 4563 ipsec_out_t *io = NULL; 4564 ipsec_selector_t sel; 4565 uint_t ill_index; 4566 boolean_t conn_dontroutex; 4567 boolean_t conn_multicast_loopx; 4568 boolean_t policy_present; 4569 ip_stack_t *ipst = ire->ire_ipst; 4570 netstack_t *ns = ipst->ips_netstack; 4571 ipsec_stack_t *ipss = ns->netstack_ipsec; 4572 4573 ASSERT((ipha != NULL && ip6h == NULL) || 4574 (ip6h != NULL && ipha == NULL)); 4575 4576 bzero((void*)&sel, sizeof (sel)); 4577 4578 if (ipha != NULL) 4579 policy_present = ipss->ipsec_outbound_v4_policy_present; 4580 else 4581 policy_present = ipss->ipsec_outbound_v6_policy_present; 4582 /* 4583 * Fast Path to see if there is any policy. 4584 */ 4585 if (!policy_present) { 4586 if (ipsec_mp->b_datap->db_type == M_CTL) { 4587 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4588 if (!io->ipsec_out_secure) { 4589 /* 4590 * If there is no global policy and ip_wput 4591 * or ip_wput_multicast has attached this mp 4592 * for multicast case, free the ipsec_mp and 4593 * return the original mp. 4594 */ 4595 mp = ipsec_mp->b_cont; 4596 freeb(ipsec_mp); 4597 ipsec_mp = mp; 4598 io = NULL; 4599 } 4600 ASSERT(io == NULL || !io->ipsec_out_tunnel); 4601 } 4602 if (((io == NULL) || (io->ipsec_out_polhead == NULL)) && 4603 ((connp == NULL) || (connp->conn_policy == NULL))) 4604 return (ipsec_mp); 4605 } 4606 4607 ill_index = 0; 4608 conn_multicast_loopx = conn_dontroutex = B_FALSE; 4609 mp = ipsec_mp; 4610 if (ipsec_mp->b_datap->db_type == M_CTL) { 4611 mp = ipsec_mp->b_cont; 4612 /* 4613 * This is a connection where we have some per-socket 4614 * policy or ip_wput has attached an ipsec_mp for 4615 * the multicast datagram. 4616 */ 4617 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4618 if (!io->ipsec_out_secure) { 4619 /* 4620 * This ipsec_mp was allocated in ip_wput or 4621 * ip_wput_multicast so that we will know the 4622 * value of ill_index, conn_dontroute, 4623 * conn_multicast_loop in the multicast case if 4624 * we inherit global policy here. 4625 */ 4626 ill_index = io->ipsec_out_ill_index; 4627 conn_dontroutex = io->ipsec_out_dontroute; 4628 conn_multicast_loopx = io->ipsec_out_multicast_loop; 4629 freeb(ipsec_mp); 4630 ipsec_mp = mp; 4631 io = NULL; 4632 } 4633 ASSERT(io == NULL || !io->ipsec_out_tunnel); 4634 } 4635 4636 if (ipha != NULL) { 4637 sel.ips_local_addr_v4 = (ipha->ipha_src != 0 ? 4638 ipha->ipha_src : ire->ire_src_addr); 4639 sel.ips_remote_addr_v4 = ip_get_dst(ipha); 4640 sel.ips_protocol = (uint8_t)ipha->ipha_protocol; 4641 sel.ips_isv4 = B_TRUE; 4642 } else { 4643 ushort_t hdr_len; 4644 uint8_t *nexthdrp; 4645 boolean_t is_fragment; 4646 4647 sel.ips_isv4 = B_FALSE; 4648 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4649 if (!unspec_src) 4650 sel.ips_local_addr_v6 = ire->ire_src_addr_v6; 4651 } else { 4652 sel.ips_local_addr_v6 = ip6h->ip6_src; 4653 } 4654 4655 sel.ips_remote_addr_v6 = ip_get_dst_v6(ip6h, mp, &is_fragment); 4656 if (is_fragment) { 4657 /* 4658 * It's a packet fragment for a packet that 4659 * we have already processed (since IPsec processing 4660 * is done before fragmentation), so we don't 4661 * have to do policy checks again. Fragments can 4662 * come back to us for processing if they have 4663 * been queued up due to flow control. 4664 */ 4665 if (ipsec_mp->b_datap->db_type == M_CTL) { 4666 mp = ipsec_mp->b_cont; 4667 freeb(ipsec_mp); 4668 ipsec_mp = mp; 4669 } 4670 return (ipsec_mp); 4671 } 4672 4673 /* IPv6 common-case. */ 4674 sel.ips_protocol = ip6h->ip6_nxt; 4675 switch (ip6h->ip6_nxt) { 4676 case IPPROTO_TCP: 4677 case IPPROTO_UDP: 4678 case IPPROTO_SCTP: 4679 case IPPROTO_ICMPV6: 4680 break; 4681 default: 4682 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 4683 &hdr_len, &nexthdrp)) { 4684 BUMP_MIB(&ipst->ips_ip6_mib, 4685 ipIfStatsOutDiscards); 4686 freemsg(ipsec_mp); /* Not IPsec-related drop. */ 4687 return (NULL); 4688 } 4689 sel.ips_protocol = *nexthdrp; 4690 break; 4691 } 4692 } 4693 4694 if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0, ipss)) { 4695 if (ipha != NULL) { 4696 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 4697 } else { 4698 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 4699 } 4700 4701 /* Callee dropped the packet. */ 4702 return (NULL); 4703 } 4704 4705 if (io != NULL) { 4706 /* 4707 * We seem to have some local policy (we already have 4708 * an ipsec_out). Look at global policy and see 4709 * whether we have to inherit or not. 4710 */ 4711 io->ipsec_out_need_policy = B_FALSE; 4712 ipsec_mp = ipsec_apply_global_policy(ipsec_mp, connp, 4713 &sel, ns); 4714 ASSERT((io->ipsec_out_policy != NULL) || 4715 (io->ipsec_out_act != NULL)); 4716 ASSERT(io->ipsec_out_need_policy == B_FALSE); 4717 return (ipsec_mp); 4718 } 4719 /* 4720 * We pass in a pointer to a pointer because mp can become 4721 * NULL due to allocation failures or explicit drops. Callers 4722 * of this function should assume a NULL mp means the packet 4723 * was dropped. 4724 */ 4725 ipsec_mp = ipsec_attach_global_policy(&mp, connp, &sel, ns); 4726 if (ipsec_mp == NULL) 4727 return (mp); 4728 4729 /* 4730 * Copy the right port information. 4731 */ 4732 ASSERT(ipsec_mp->b_datap->db_type == M_CTL); 4733 io = (ipsec_out_t *)ipsec_mp->b_rptr; 4734 4735 ASSERT(io->ipsec_out_need_policy == B_FALSE); 4736 ASSERT((io->ipsec_out_policy != NULL) || 4737 (io->ipsec_out_act != NULL)); 4738 io->ipsec_out_src_port = sel.ips_local_port; 4739 io->ipsec_out_dst_port = sel.ips_remote_port; 4740 io->ipsec_out_icmp_type = sel.ips_icmp_type; 4741 io->ipsec_out_icmp_code = sel.ips_icmp_code; 4742 /* 4743 * Set ill_index, conn_dontroute and conn_multicast_loop 4744 * for multicast datagrams. 4745 */ 4746 io->ipsec_out_ill_index = ill_index; 4747 io->ipsec_out_dontroute = conn_dontroutex; 4748 io->ipsec_out_multicast_loop = conn_multicast_loopx; 4749 4750 if (zoneid == ALL_ZONES) 4751 zoneid = GLOBAL_ZONEID; 4752 io->ipsec_out_zoneid = zoneid; 4753 return (ipsec_mp); 4754 } 4755 4756 /* 4757 * When appropriate, this function caches inbound and outbound policy 4758 * for this connection. 4759 * 4760 * XXX need to work out more details about per-interface policy and 4761 * caching here! 4762 * 4763 * XXX may want to split inbound and outbound caching for ill.. 4764 */ 4765 int 4766 ipsec_conn_cache_policy(conn_t *connp, boolean_t isv4) 4767 { 4768 boolean_t global_policy_present; 4769 netstack_t *ns = connp->conn_netstack; 4770 ipsec_stack_t *ipss = ns->netstack_ipsec; 4771 4772 /* 4773 * There is no policy latching for ICMP sockets because we can't 4774 * decide on which policy to use until we see the packet and get 4775 * type/code selectors. 4776 */ 4777 if (connp->conn_ulp == IPPROTO_ICMP || 4778 connp->conn_ulp == IPPROTO_ICMPV6) { 4779 connp->conn_in_enforce_policy = 4780 connp->conn_out_enforce_policy = B_TRUE; 4781 if (connp->conn_latch != NULL) { 4782 IPLATCH_REFRELE(connp->conn_latch, ns); 4783 connp->conn_latch = NULL; 4784 } 4785 connp->conn_flags |= IPCL_CHECK_POLICY; 4786 return (0); 4787 } 4788 4789 global_policy_present = isv4 ? 4790 (ipss->ipsec_outbound_v4_policy_present || 4791 ipss->ipsec_inbound_v4_policy_present) : 4792 (ipss->ipsec_outbound_v6_policy_present || 4793 ipss->ipsec_inbound_v6_policy_present); 4794 4795 if ((connp->conn_policy != NULL) || global_policy_present) { 4796 ipsec_selector_t sel; 4797 ipsec_policy_t *p; 4798 4799 if (connp->conn_latch == NULL && 4800 (connp->conn_latch = iplatch_create()) == NULL) { 4801 return (ENOMEM); 4802 } 4803 4804 sel.ips_protocol = connp->conn_ulp; 4805 sel.ips_local_port = connp->conn_lport; 4806 sel.ips_remote_port = connp->conn_fport; 4807 sel.ips_is_icmp_inv_acq = 0; 4808 sel.ips_isv4 = isv4; 4809 if (isv4) { 4810 sel.ips_local_addr_v4 = connp->conn_src; 4811 sel.ips_remote_addr_v4 = connp->conn_rem; 4812 } else { 4813 sel.ips_local_addr_v6 = connp->conn_srcv6; 4814 sel.ips_remote_addr_v6 = connp->conn_remv6; 4815 } 4816 4817 p = ipsec_find_policy(IPSEC_TYPE_INBOUND, connp, NULL, &sel, 4818 ns); 4819 if (connp->conn_latch->ipl_in_policy != NULL) 4820 IPPOL_REFRELE(connp->conn_latch->ipl_in_policy, ns); 4821 connp->conn_latch->ipl_in_policy = p; 4822 connp->conn_in_enforce_policy = (p != NULL); 4823 4824 p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, &sel, 4825 ns); 4826 if (connp->conn_latch->ipl_out_policy != NULL) 4827 IPPOL_REFRELE(connp->conn_latch->ipl_out_policy, ns); 4828 connp->conn_latch->ipl_out_policy = p; 4829 connp->conn_out_enforce_policy = (p != NULL); 4830 4831 /* Clear the latched actions too, in case we're recaching. */ 4832 if (connp->conn_latch->ipl_out_action != NULL) 4833 IPACT_REFRELE(connp->conn_latch->ipl_out_action); 4834 if (connp->conn_latch->ipl_in_action != NULL) 4835 IPACT_REFRELE(connp->conn_latch->ipl_in_action); 4836 } 4837 4838 /* 4839 * We may or may not have policy for this endpoint. We still set 4840 * conn_policy_cached so that inbound datagrams don't have to look 4841 * at global policy as policy is considered latched for these 4842 * endpoints. We should not set conn_policy_cached until the conn 4843 * reflects the actual policy. If we *set* this before inheriting 4844 * the policy there is a window where the check 4845 * CONN_INBOUND_POLICY_PRESENT, will neither check with the policy 4846 * on the conn (because we have not yet copied the policy on to 4847 * conn and hence not set conn_in_enforce_policy) nor with the 4848 * global policy (because conn_policy_cached is already set). 4849 */ 4850 connp->conn_policy_cached = B_TRUE; 4851 if (connp->conn_in_enforce_policy) 4852 connp->conn_flags |= IPCL_CHECK_POLICY; 4853 return (0); 4854 } 4855 4856 void 4857 iplatch_free(ipsec_latch_t *ipl, netstack_t *ns) 4858 { 4859 if (ipl->ipl_out_policy != NULL) 4860 IPPOL_REFRELE(ipl->ipl_out_policy, ns); 4861 if (ipl->ipl_in_policy != NULL) 4862 IPPOL_REFRELE(ipl->ipl_in_policy, ns); 4863 if (ipl->ipl_in_action != NULL) 4864 IPACT_REFRELE(ipl->ipl_in_action); 4865 if (ipl->ipl_out_action != NULL) 4866 IPACT_REFRELE(ipl->ipl_out_action); 4867 if (ipl->ipl_local_cid != NULL) 4868 IPSID_REFRELE(ipl->ipl_local_cid); 4869 if (ipl->ipl_remote_cid != NULL) 4870 IPSID_REFRELE(ipl->ipl_remote_cid); 4871 if (ipl->ipl_local_id != NULL) 4872 crfree(ipl->ipl_local_id); 4873 mutex_destroy(&ipl->ipl_lock); 4874 kmem_free(ipl, sizeof (*ipl)); 4875 } 4876 4877 ipsec_latch_t * 4878 iplatch_create() 4879 { 4880 ipsec_latch_t *ipl = kmem_alloc(sizeof (*ipl), KM_NOSLEEP); 4881 if (ipl == NULL) 4882 return (ipl); 4883 bzero(ipl, sizeof (*ipl)); 4884 mutex_init(&ipl->ipl_lock, NULL, MUTEX_DEFAULT, NULL); 4885 ipl->ipl_refcnt = 1; 4886 return (ipl); 4887 } 4888 4889 /* 4890 * Hash function for ID hash table. 4891 */ 4892 static uint32_t 4893 ipsid_hash(int idtype, char *idstring) 4894 { 4895 uint32_t hval = idtype; 4896 unsigned char c; 4897 4898 while ((c = *idstring++) != 0) { 4899 hval = (hval << 4) | (hval >> 28); 4900 hval ^= c; 4901 } 4902 hval = hval ^ (hval >> 16); 4903 return (hval & (IPSID_HASHSIZE-1)); 4904 } 4905 4906 /* 4907 * Look up identity string in hash table. Return identity object 4908 * corresponding to the name -- either preexisting, or newly allocated. 4909 * 4910 * Return NULL if we need to allocate a new one and can't get memory. 4911 */ 4912 ipsid_t * 4913 ipsid_lookup(int idtype, char *idstring, netstack_t *ns) 4914 { 4915 ipsid_t *retval; 4916 char *nstr; 4917 int idlen = strlen(idstring) + 1; 4918 ipsec_stack_t *ipss = ns->netstack_ipsec; 4919 ipsif_t *bucket; 4920 4921 bucket = &ipss->ipsec_ipsid_buckets[ipsid_hash(idtype, idstring)]; 4922 4923 mutex_enter(&bucket->ipsif_lock); 4924 4925 for (retval = bucket->ipsif_head; retval != NULL; 4926 retval = retval->ipsid_next) { 4927 if (idtype != retval->ipsid_type) 4928 continue; 4929 if (bcmp(idstring, retval->ipsid_cid, idlen) != 0) 4930 continue; 4931 4932 IPSID_REFHOLD(retval); 4933 mutex_exit(&bucket->ipsif_lock); 4934 return (retval); 4935 } 4936 4937 retval = kmem_alloc(sizeof (*retval), KM_NOSLEEP); 4938 if (!retval) { 4939 mutex_exit(&bucket->ipsif_lock); 4940 return (NULL); 4941 } 4942 4943 nstr = kmem_alloc(idlen, KM_NOSLEEP); 4944 if (!nstr) { 4945 mutex_exit(&bucket->ipsif_lock); 4946 kmem_free(retval, sizeof (*retval)); 4947 return (NULL); 4948 } 4949 4950 retval->ipsid_refcnt = 1; 4951 retval->ipsid_next = bucket->ipsif_head; 4952 if (retval->ipsid_next != NULL) 4953 retval->ipsid_next->ipsid_ptpn = &retval->ipsid_next; 4954 retval->ipsid_ptpn = &bucket->ipsif_head; 4955 retval->ipsid_type = idtype; 4956 retval->ipsid_cid = nstr; 4957 bucket->ipsif_head = retval; 4958 bcopy(idstring, nstr, idlen); 4959 mutex_exit(&bucket->ipsif_lock); 4960 4961 return (retval); 4962 } 4963 4964 /* 4965 * Garbage collect the identity hash table. 4966 */ 4967 void 4968 ipsid_gc(netstack_t *ns) 4969 { 4970 int i, len; 4971 ipsid_t *id, *nid; 4972 ipsif_t *bucket; 4973 ipsec_stack_t *ipss = ns->netstack_ipsec; 4974 4975 for (i = 0; i < IPSID_HASHSIZE; i++) { 4976 bucket = &ipss->ipsec_ipsid_buckets[i]; 4977 mutex_enter(&bucket->ipsif_lock); 4978 for (id = bucket->ipsif_head; id != NULL; id = nid) { 4979 nid = id->ipsid_next; 4980 if (id->ipsid_refcnt == 0) { 4981 *id->ipsid_ptpn = nid; 4982 if (nid != NULL) 4983 nid->ipsid_ptpn = id->ipsid_ptpn; 4984 len = strlen(id->ipsid_cid) + 1; 4985 kmem_free(id->ipsid_cid, len); 4986 kmem_free(id, sizeof (*id)); 4987 } 4988 } 4989 mutex_exit(&bucket->ipsif_lock); 4990 } 4991 } 4992 4993 /* 4994 * Return true if two identities are the same. 4995 */ 4996 boolean_t 4997 ipsid_equal(ipsid_t *id1, ipsid_t *id2) 4998 { 4999 if (id1 == id2) 5000 return (B_TRUE); 5001 #ifdef DEBUG 5002 if ((id1 == NULL) || (id2 == NULL)) 5003 return (B_FALSE); 5004 /* 5005 * test that we're interning id's correctly.. 5006 */ 5007 ASSERT((strcmp(id1->ipsid_cid, id2->ipsid_cid) != 0) || 5008 (id1->ipsid_type != id2->ipsid_type)); 5009 #endif 5010 return (B_FALSE); 5011 } 5012 5013 /* 5014 * Initialize identity table; called during module initialization. 5015 */ 5016 static void 5017 ipsid_init(netstack_t *ns) 5018 { 5019 ipsif_t *bucket; 5020 int i; 5021 ipsec_stack_t *ipss = ns->netstack_ipsec; 5022 5023 for (i = 0; i < IPSID_HASHSIZE; i++) { 5024 bucket = &ipss->ipsec_ipsid_buckets[i]; 5025 mutex_init(&bucket->ipsif_lock, NULL, MUTEX_DEFAULT, NULL); 5026 } 5027 } 5028 5029 /* 5030 * Free identity table (preparatory to module unload) 5031 */ 5032 static void 5033 ipsid_fini(netstack_t *ns) 5034 { 5035 ipsif_t *bucket; 5036 int i; 5037 ipsec_stack_t *ipss = ns->netstack_ipsec; 5038 5039 for (i = 0; i < IPSID_HASHSIZE; i++) { 5040 bucket = &ipss->ipsec_ipsid_buckets[i]; 5041 ASSERT(bucket->ipsif_head == NULL); 5042 mutex_destroy(&bucket->ipsif_lock); 5043 } 5044 } 5045 5046 /* 5047 * Update the minimum and maximum supported key sizes for the 5048 * specified algorithm. Must be called while holding the algorithms lock. 5049 */ 5050 void 5051 ipsec_alg_fix_min_max(ipsec_alginfo_t *alg, ipsec_algtype_t alg_type, 5052 netstack_t *ns) 5053 { 5054 size_t crypto_min = (size_t)-1, crypto_max = 0; 5055 size_t cur_crypto_min, cur_crypto_max; 5056 boolean_t is_valid; 5057 crypto_mechanism_info_t *mech_infos; 5058 uint_t nmech_infos; 5059 int crypto_rc, i; 5060 crypto_mech_usage_t mask; 5061 ipsec_stack_t *ipss = ns->netstack_ipsec; 5062 5063 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 5064 5065 /* 5066 * Compute the min, max, and default key sizes (in number of 5067 * increments to the default key size in bits) as defined 5068 * by the algorithm mappings. This range of key sizes is used 5069 * for policy related operations. The effective key sizes 5070 * supported by the framework could be more limited than 5071 * those defined for an algorithm. 5072 */ 5073 alg->alg_default_bits = alg->alg_key_sizes[0]; 5074 alg->alg_default = 0; 5075 if (alg->alg_increment != 0) { 5076 /* key sizes are defined by range & increment */ 5077 alg->alg_minbits = alg->alg_key_sizes[1]; 5078 alg->alg_maxbits = alg->alg_key_sizes[2]; 5079 } else if (alg->alg_nkey_sizes == 0) { 5080 /* no specified key size for algorithm */ 5081 alg->alg_minbits = alg->alg_maxbits = 0; 5082 } else { 5083 /* key sizes are defined by enumeration */ 5084 alg->alg_minbits = (uint16_t)-1; 5085 alg->alg_maxbits = 0; 5086 5087 for (i = 0; i < alg->alg_nkey_sizes; i++) { 5088 if (alg->alg_key_sizes[i] < alg->alg_minbits) 5089 alg->alg_minbits = alg->alg_key_sizes[i]; 5090 if (alg->alg_key_sizes[i] > alg->alg_maxbits) 5091 alg->alg_maxbits = alg->alg_key_sizes[i]; 5092 } 5093 } 5094 5095 if (!(alg->alg_flags & ALG_FLAG_VALID)) 5096 return; 5097 5098 /* 5099 * Mechanisms do not apply to the NULL encryption 5100 * algorithm, so simply return for this case. 5101 */ 5102 if (alg->alg_id == SADB_EALG_NULL) 5103 return; 5104 5105 /* 5106 * Find the min and max key sizes supported by the cryptographic 5107 * framework providers. 5108 */ 5109 5110 /* get the key sizes supported by the framework */ 5111 crypto_rc = crypto_get_all_mech_info(alg->alg_mech_type, 5112 &mech_infos, &nmech_infos, KM_SLEEP); 5113 if (crypto_rc != CRYPTO_SUCCESS || nmech_infos == 0) { 5114 alg->alg_flags &= ~ALG_FLAG_VALID; 5115 return; 5116 } 5117 5118 /* min and max key sizes supported by framework */ 5119 for (i = 0, is_valid = B_FALSE; i < nmech_infos; i++) { 5120 int unit_bits; 5121 5122 /* 5123 * Ignore entries that do not support the operations 5124 * needed for the algorithm type. 5125 */ 5126 if (alg_type == IPSEC_ALG_AUTH) { 5127 mask = CRYPTO_MECH_USAGE_MAC; 5128 } else { 5129 mask = CRYPTO_MECH_USAGE_ENCRYPT | 5130 CRYPTO_MECH_USAGE_DECRYPT; 5131 } 5132 if ((mech_infos[i].mi_usage & mask) != mask) 5133 continue; 5134 5135 unit_bits = (mech_infos[i].mi_keysize_unit == 5136 CRYPTO_KEYSIZE_UNIT_IN_BYTES) ? 8 : 1; 5137 /* adjust min/max supported by framework */ 5138 cur_crypto_min = mech_infos[i].mi_min_key_size * unit_bits; 5139 cur_crypto_max = mech_infos[i].mi_max_key_size * unit_bits; 5140 5141 if (cur_crypto_min < crypto_min) 5142 crypto_min = cur_crypto_min; 5143 5144 /* 5145 * CRYPTO_EFFECTIVELY_INFINITE is a special value of 5146 * the crypto framework which means "no upper limit". 5147 */ 5148 if (mech_infos[i].mi_max_key_size == 5149 CRYPTO_EFFECTIVELY_INFINITE) { 5150 crypto_max = (size_t)-1; 5151 } else if (cur_crypto_max > crypto_max) { 5152 crypto_max = cur_crypto_max; 5153 } 5154 5155 is_valid = B_TRUE; 5156 } 5157 5158 kmem_free(mech_infos, sizeof (crypto_mechanism_info_t) * 5159 nmech_infos); 5160 5161 if (!is_valid) { 5162 /* no key sizes supported by framework */ 5163 alg->alg_flags &= ~ALG_FLAG_VALID; 5164 return; 5165 } 5166 5167 /* 5168 * Determine min and max key sizes from alg_key_sizes[]. 5169 * defined for the algorithm entry. Adjust key sizes based on 5170 * those supported by the framework. 5171 */ 5172 alg->alg_ef_default_bits = alg->alg_key_sizes[0]; 5173 5174 /* 5175 * For backwards compatability, assume that the IV length 5176 * is the same as the data length. 5177 */ 5178 alg->alg_ivlen = alg->alg_datalen; 5179 5180 /* 5181 * Copy any algorithm parameters (if provided) into dedicated 5182 * elements in the ipsec_alginfo_t structure. 5183 * There may be a better place to put this code. 5184 */ 5185 for (i = 0; i < alg->alg_nparams; i++) { 5186 switch (i) { 5187 case 0: 5188 /* Initialisation Vector length (bytes) */ 5189 alg->alg_ivlen = alg->alg_params[0]; 5190 break; 5191 case 1: 5192 /* Integrity Check Vector length (bytes) */ 5193 alg->alg_icvlen = alg->alg_params[1]; 5194 break; 5195 case 2: 5196 /* Salt length (bytes) */ 5197 alg->alg_saltlen = (uint8_t)alg->alg_params[2]; 5198 break; 5199 default: 5200 break; 5201 } 5202 } 5203 5204 /* Default if the IV length is not specified. */ 5205 if (alg_type == IPSEC_ALG_ENCR && alg->alg_ivlen == 0) 5206 alg->alg_ivlen = alg->alg_datalen; 5207 5208 alg_flag_check(alg); 5209 5210 if (alg->alg_increment != 0) { 5211 /* supported key sizes are defined by range & increment */ 5212 crypto_min = ALGBITS_ROUND_UP(crypto_min, alg->alg_increment); 5213 crypto_max = ALGBITS_ROUND_DOWN(crypto_max, alg->alg_increment); 5214 5215 alg->alg_ef_minbits = MAX(alg->alg_minbits, 5216 (uint16_t)crypto_min); 5217 alg->alg_ef_maxbits = MIN(alg->alg_maxbits, 5218 (uint16_t)crypto_max); 5219 5220 /* 5221 * If the sizes supported by the framework are outside 5222 * the range of sizes defined by the algorithm mappings, 5223 * the algorithm cannot be used. Check for this 5224 * condition here. 5225 */ 5226 if (alg->alg_ef_minbits > alg->alg_ef_maxbits) { 5227 alg->alg_flags &= ~ALG_FLAG_VALID; 5228 return; 5229 } 5230 if (alg->alg_ef_default_bits < alg->alg_ef_minbits) 5231 alg->alg_ef_default_bits = alg->alg_ef_minbits; 5232 if (alg->alg_ef_default_bits > alg->alg_ef_maxbits) 5233 alg->alg_ef_default_bits = alg->alg_ef_maxbits; 5234 } else if (alg->alg_nkey_sizes == 0) { 5235 /* no specified key size for algorithm */ 5236 alg->alg_ef_minbits = alg->alg_ef_maxbits = 0; 5237 } else { 5238 /* supported key sizes are defined by enumeration */ 5239 alg->alg_ef_minbits = (uint16_t)-1; 5240 alg->alg_ef_maxbits = 0; 5241 5242 for (i = 0, is_valid = B_FALSE; i < alg->alg_nkey_sizes; i++) { 5243 /* 5244 * Ignore the current key size if it is not in the 5245 * range of sizes supported by the framework. 5246 */ 5247 if (alg->alg_key_sizes[i] < crypto_min || 5248 alg->alg_key_sizes[i] > crypto_max) 5249 continue; 5250 if (alg->alg_key_sizes[i] < alg->alg_ef_minbits) 5251 alg->alg_ef_minbits = alg->alg_key_sizes[i]; 5252 if (alg->alg_key_sizes[i] > alg->alg_ef_maxbits) 5253 alg->alg_ef_maxbits = alg->alg_key_sizes[i]; 5254 is_valid = B_TRUE; 5255 } 5256 5257 if (!is_valid) { 5258 alg->alg_flags &= ~ALG_FLAG_VALID; 5259 return; 5260 } 5261 alg->alg_ef_default = 0; 5262 } 5263 } 5264 5265 /* 5266 * Sanity check parameters provided by ipsecalgs(1m). Assume that 5267 * the algoritm is marked as valid, there is a check at the top 5268 * of this function. If any of the checks below fail, the algorithm 5269 * entry is invalid. 5270 */ 5271 void 5272 alg_flag_check(ipsec_alginfo_t *alg) 5273 { 5274 alg->alg_flags &= ~ALG_FLAG_VALID; 5275 5276 /* 5277 * Can't have the algorithm marked as CCM and GCM. 5278 * Check the ALG_FLAG_COMBINED and ALG_FLAG_COUNTERMODE 5279 * flags are set for CCM & GCM. 5280 */ 5281 if ((alg->alg_flags & (ALG_FLAG_CCM|ALG_FLAG_GCM)) == 5282 (ALG_FLAG_CCM|ALG_FLAG_GCM)) 5283 return; 5284 if (alg->alg_flags & (ALG_FLAG_CCM|ALG_FLAG_GCM)) { 5285 if (!(alg->alg_flags & ALG_FLAG_COUNTERMODE)) 5286 return; 5287 if (!(alg->alg_flags & ALG_FLAG_COMBINED)) 5288 return; 5289 } 5290 5291 /* 5292 * For ALG_FLAG_COUNTERMODE, check the parameters 5293 * fit in the ipsec_nonce_t structure. 5294 */ 5295 if (alg->alg_flags & ALG_FLAG_COUNTERMODE) { 5296 if (alg->alg_ivlen != sizeof (((ipsec_nonce_t *)NULL)->iv)) 5297 return; 5298 if (alg->alg_saltlen > sizeof (((ipsec_nonce_t *)NULL)->salt)) 5299 return; 5300 } 5301 if ((alg->alg_flags & ALG_FLAG_COMBINED) && 5302 (alg->alg_icvlen == 0)) 5303 return; 5304 5305 /* all is well. */ 5306 alg->alg_flags |= ALG_FLAG_VALID; 5307 } 5308 5309 /* 5310 * Free the memory used by the specified algorithm. 5311 */ 5312 void 5313 ipsec_alg_free(ipsec_alginfo_t *alg) 5314 { 5315 if (alg == NULL) 5316 return; 5317 5318 if (alg->alg_key_sizes != NULL) { 5319 kmem_free(alg->alg_key_sizes, 5320 (alg->alg_nkey_sizes + 1) * sizeof (uint16_t)); 5321 alg->alg_key_sizes = NULL; 5322 } 5323 if (alg->alg_block_sizes != NULL) { 5324 kmem_free(alg->alg_block_sizes, 5325 (alg->alg_nblock_sizes + 1) * sizeof (uint16_t)); 5326 alg->alg_block_sizes = NULL; 5327 } 5328 kmem_free(alg, sizeof (*alg)); 5329 } 5330 5331 /* 5332 * Check the validity of the specified key size for an algorithm. 5333 * Returns B_TRUE if key size is valid, B_FALSE otherwise. 5334 */ 5335 boolean_t 5336 ipsec_valid_key_size(uint16_t key_size, ipsec_alginfo_t *alg) 5337 { 5338 if (key_size < alg->alg_ef_minbits || key_size > alg->alg_ef_maxbits) 5339 return (B_FALSE); 5340 5341 if (alg->alg_increment == 0 && alg->alg_nkey_sizes != 0) { 5342 /* 5343 * If the key sizes are defined by enumeration, the new 5344 * key size must be equal to one of the supported values. 5345 */ 5346 int i; 5347 5348 for (i = 0; i < alg->alg_nkey_sizes; i++) 5349 if (key_size == alg->alg_key_sizes[i]) 5350 break; 5351 if (i == alg->alg_nkey_sizes) 5352 return (B_FALSE); 5353 } 5354 5355 return (B_TRUE); 5356 } 5357 5358 /* 5359 * Callback function invoked by the crypto framework when a provider 5360 * registers or unregisters. This callback updates the algorithms 5361 * tables when a crypto algorithm is no longer available or becomes 5362 * available, and triggers the freeing/creation of context templates 5363 * associated with existing SAs, if needed. 5364 * 5365 * Need to walk all stack instances since the callback is global 5366 * for all instances 5367 */ 5368 void 5369 ipsec_prov_update_callback(uint32_t event, void *event_arg) 5370 { 5371 netstack_handle_t nh; 5372 netstack_t *ns; 5373 5374 netstack_next_init(&nh); 5375 while ((ns = netstack_next(&nh)) != NULL) { 5376 ipsec_prov_update_callback_stack(event, event_arg, ns); 5377 netstack_rele(ns); 5378 } 5379 netstack_next_fini(&nh); 5380 } 5381 5382 static void 5383 ipsec_prov_update_callback_stack(uint32_t event, void *event_arg, 5384 netstack_t *ns) 5385 { 5386 crypto_notify_event_change_t *prov_change = 5387 (crypto_notify_event_change_t *)event_arg; 5388 uint_t algidx, algid, algtype, mech_count, mech_idx; 5389 ipsec_alginfo_t *alg; 5390 ipsec_alginfo_t oalg; 5391 crypto_mech_name_t *mechs; 5392 boolean_t alg_changed = B_FALSE; 5393 ipsec_stack_t *ipss = ns->netstack_ipsec; 5394 5395 /* ignore events for which we didn't register */ 5396 if (event != CRYPTO_EVENT_MECHS_CHANGED) { 5397 ip1dbg(("ipsec_prov_update_callback: unexpected event 0x%x " 5398 " received from crypto framework\n", event)); 5399 return; 5400 } 5401 5402 mechs = crypto_get_mech_list(&mech_count, KM_SLEEP); 5403 if (mechs == NULL) 5404 return; 5405 5406 /* 5407 * Walk the list of currently defined IPsec algorithm. Update 5408 * the algorithm valid flag and trigger an update of the 5409 * SAs that depend on that algorithm. 5410 */ 5411 mutex_enter(&ipss->ipsec_alg_lock); 5412 for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype++) { 5413 for (algidx = 0; algidx < ipss->ipsec_nalgs[algtype]; 5414 algidx++) { 5415 5416 algid = ipss->ipsec_sortlist[algtype][algidx]; 5417 alg = ipss->ipsec_alglists[algtype][algid]; 5418 ASSERT(alg != NULL); 5419 5420 /* 5421 * Skip the algorithms which do not map to the 5422 * crypto framework provider being added or removed. 5423 */ 5424 if (strncmp(alg->alg_mech_name, 5425 prov_change->ec_mech_name, 5426 CRYPTO_MAX_MECH_NAME) != 0) 5427 continue; 5428 5429 /* 5430 * Determine if the mechanism is valid. If it 5431 * is not, mark the algorithm as being invalid. If 5432 * it is, mark the algorithm as being valid. 5433 */ 5434 for (mech_idx = 0; mech_idx < mech_count; mech_idx++) 5435 if (strncmp(alg->alg_mech_name, 5436 mechs[mech_idx], CRYPTO_MAX_MECH_NAME) == 0) 5437 break; 5438 if (mech_idx == mech_count && 5439 alg->alg_flags & ALG_FLAG_VALID) { 5440 alg->alg_flags &= ~ALG_FLAG_VALID; 5441 alg_changed = B_TRUE; 5442 } else if (mech_idx < mech_count && 5443 !(alg->alg_flags & ALG_FLAG_VALID)) { 5444 alg->alg_flags |= ALG_FLAG_VALID; 5445 alg_changed = B_TRUE; 5446 } 5447 5448 /* 5449 * Update the supported key sizes, regardless 5450 * of whether a crypto provider was added or 5451 * removed. 5452 */ 5453 oalg = *alg; 5454 ipsec_alg_fix_min_max(alg, algtype, ns); 5455 if (!alg_changed && 5456 alg->alg_ef_minbits != oalg.alg_ef_minbits || 5457 alg->alg_ef_maxbits != oalg.alg_ef_maxbits || 5458 alg->alg_ef_default != oalg.alg_ef_default || 5459 alg->alg_ef_default_bits != 5460 oalg.alg_ef_default_bits) 5461 alg_changed = B_TRUE; 5462 5463 /* 5464 * Update the affected SAs if a software provider is 5465 * being added or removed. 5466 */ 5467 if (prov_change->ec_provider_type == 5468 CRYPTO_SW_PROVIDER) 5469 sadb_alg_update(algtype, alg->alg_id, 5470 prov_change->ec_change == 5471 CRYPTO_MECH_ADDED, ns); 5472 } 5473 } 5474 mutex_exit(&ipss->ipsec_alg_lock); 5475 crypto_free_mech_list(mechs, mech_count); 5476 5477 if (alg_changed) { 5478 /* 5479 * An algorithm has changed, i.e. it became valid or 5480 * invalid, or its support key sizes have changed. 5481 * Notify ipsecah and ipsecesp of this change so 5482 * that they can send a SADB_REGISTER to their consumers. 5483 */ 5484 ipsecah_algs_changed(ns); 5485 ipsecesp_algs_changed(ns); 5486 } 5487 } 5488 5489 /* 5490 * Registers with the crypto framework to be notified of crypto 5491 * providers changes. Used to update the algorithm tables and 5492 * to free or create context templates if needed. Invoked after IPsec 5493 * is loaded successfully. 5494 * 5495 * This is called separately for each IP instance, so we ensure we only 5496 * register once. 5497 */ 5498 void 5499 ipsec_register_prov_update(void) 5500 { 5501 if (prov_update_handle != NULL) 5502 return; 5503 5504 prov_update_handle = crypto_notify_events( 5505 ipsec_prov_update_callback, CRYPTO_EVENT_MECHS_CHANGED); 5506 } 5507 5508 /* 5509 * Unregisters from the framework to be notified of crypto providers 5510 * changes. Called from ipsec_policy_g_destroy(). 5511 */ 5512 static void 5513 ipsec_unregister_prov_update(void) 5514 { 5515 if (prov_update_handle != NULL) 5516 crypto_unnotify_events(prov_update_handle); 5517 } 5518 5519 /* 5520 * Tunnel-mode support routines. 5521 */ 5522 5523 /* 5524 * Returns an mblk chain suitable for putnext() if policies match and IPsec 5525 * SAs are available. If there's no per-tunnel policy, or a match comes back 5526 * with no match, then still return the packet and have global policy take 5527 * a crack at it in IP. 5528 * 5529 * Remember -> we can be forwarding packets. Keep that in mind w.r.t. 5530 * inner-packet contents. 5531 */ 5532 mblk_t * 5533 ipsec_tun_outbound(mblk_t *mp, iptun_t *iptun, ipha_t *inner_ipv4, 5534 ip6_t *inner_ipv6, ipha_t *outer_ipv4, ip6_t *outer_ipv6, int outer_hdr_len) 5535 { 5536 ipsec_policy_head_t *polhead; 5537 ipsec_selector_t sel; 5538 mblk_t *ipsec_mp, *ipsec_mp_head, *nmp; 5539 ipsec_out_t *io; 5540 boolean_t is_fragment; 5541 ipsec_policy_t *pol; 5542 ipsec_tun_pol_t *itp = iptun->iptun_itp; 5543 netstack_t *ns = iptun->iptun_ns; 5544 ipsec_stack_t *ipss = ns->netstack_ipsec; 5545 5546 ASSERT(outer_ipv6 != NULL && outer_ipv4 == NULL || 5547 outer_ipv4 != NULL && outer_ipv6 == NULL); 5548 /* We take care of inners in a bit. */ 5549 5550 ASSERT(itp != NULL && (itp->itp_flags & ITPF_P_ACTIVE)); 5551 polhead = itp->itp_policy; 5552 5553 bzero(&sel, sizeof (sel)); 5554 if (inner_ipv4 != NULL) { 5555 ASSERT(inner_ipv6 == NULL); 5556 sel.ips_isv4 = B_TRUE; 5557 sel.ips_local_addr_v4 = inner_ipv4->ipha_src; 5558 sel.ips_remote_addr_v4 = inner_ipv4->ipha_dst; 5559 sel.ips_protocol = (uint8_t)inner_ipv4->ipha_protocol; 5560 } else { 5561 ASSERT(inner_ipv6 != NULL); 5562 sel.ips_isv4 = B_FALSE; 5563 sel.ips_local_addr_v6 = inner_ipv6->ip6_src; 5564 /* 5565 * We don't care about routing-header dests in the 5566 * forwarding/tunnel path, so just grab ip6_dst. 5567 */ 5568 sel.ips_remote_addr_v6 = inner_ipv6->ip6_dst; 5569 } 5570 5571 if (itp->itp_flags & ITPF_P_PER_PORT_SECURITY) { 5572 /* 5573 * Caller can prepend the outer header, which means 5574 * inner_ipv[46] may be stuck in the middle. Pullup the whole 5575 * mess now if need-be, for easier processing later. Don't 5576 * forget to rewire the outer header too. 5577 */ 5578 if (mp->b_cont != NULL) { 5579 nmp = msgpullup(mp, -1); 5580 if (nmp == NULL) { 5581 ip_drop_packet(mp, B_FALSE, NULL, NULL, 5582 DROPPER(ipss, ipds_spd_nomem), 5583 &ipss->ipsec_spd_dropper); 5584 return (NULL); 5585 } 5586 freemsg(mp); 5587 mp = nmp; 5588 if (outer_ipv4 != NULL) 5589 outer_ipv4 = (ipha_t *)mp->b_rptr; 5590 else 5591 outer_ipv6 = (ip6_t *)mp->b_rptr; 5592 if (inner_ipv4 != NULL) { 5593 inner_ipv4 = 5594 (ipha_t *)(mp->b_rptr + outer_hdr_len); 5595 } else { 5596 inner_ipv6 = 5597 (ip6_t *)(mp->b_rptr + outer_hdr_len); 5598 } 5599 } 5600 if (inner_ipv4 != NULL) { 5601 is_fragment = IS_V4_FRAGMENT( 5602 inner_ipv4->ipha_fragment_offset_and_flags); 5603 } else { 5604 sel.ips_remote_addr_v6 = ip_get_dst_v6(inner_ipv6, mp, 5605 &is_fragment); 5606 } 5607 5608 if (is_fragment) { 5609 ipha_t *oiph; 5610 ipha_t *iph = NULL; 5611 ip6_t *ip6h = NULL; 5612 int hdr_len; 5613 uint16_t ip6_hdr_length; 5614 uint8_t v6_proto; 5615 uint8_t *v6_proto_p; 5616 5617 /* 5618 * We have a fragment we need to track! 5619 */ 5620 mp = ipsec_fragcache_add(&itp->itp_fragcache, NULL, mp, 5621 outer_hdr_len, ipss); 5622 if (mp == NULL) 5623 return (NULL); 5624 ASSERT(mp->b_cont == NULL); 5625 5626 /* 5627 * If we get here, we have a full fragment chain 5628 */ 5629 5630 oiph = (ipha_t *)mp->b_rptr; 5631 if (IPH_HDR_VERSION(oiph) == IPV4_VERSION) { 5632 hdr_len = ((outer_hdr_len != 0) ? 5633 IPH_HDR_LENGTH(oiph) : 0); 5634 iph = (ipha_t *)(mp->b_rptr + hdr_len); 5635 } else { 5636 ASSERT(IPH_HDR_VERSION(oiph) == IPV6_VERSION); 5637 ip6h = (ip6_t *)mp->b_rptr; 5638 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 5639 &ip6_hdr_length, &v6_proto_p)) { 5640 ip_drop_packet_chain(mp, B_FALSE, 5641 NULL, NULL, DROPPER(ipss, 5642 ipds_spd_malformed_packet), 5643 &ipss->ipsec_spd_dropper); 5644 return (NULL); 5645 } 5646 hdr_len = ip6_hdr_length; 5647 } 5648 outer_hdr_len = hdr_len; 5649 5650 if (sel.ips_isv4) { 5651 if (iph == NULL) { 5652 /* Was v6 outer */ 5653 iph = (ipha_t *)(mp->b_rptr + hdr_len); 5654 } 5655 inner_ipv4 = iph; 5656 sel.ips_local_addr_v4 = inner_ipv4->ipha_src; 5657 sel.ips_remote_addr_v4 = inner_ipv4->ipha_dst; 5658 sel.ips_protocol = 5659 (uint8_t)inner_ipv4->ipha_protocol; 5660 } else { 5661 inner_ipv6 = (ip6_t *)(mp->b_rptr + 5662 hdr_len); 5663 sel.ips_local_addr_v6 = inner_ipv6->ip6_src; 5664 sel.ips_remote_addr_v6 = inner_ipv6->ip6_dst; 5665 if (!ip_hdr_length_nexthdr_v6(mp, 5666 inner_ipv6, &ip6_hdr_length, &v6_proto_p)) { 5667 ip_drop_packet_chain(mp, B_FALSE, 5668 NULL, NULL, DROPPER(ipss, 5669 ipds_spd_malformed_frag), 5670 &ipss->ipsec_spd_dropper); 5671 return (NULL); 5672 } 5673 v6_proto = *v6_proto_p; 5674 sel.ips_protocol = v6_proto; 5675 #ifdef FRAGCACHE_DEBUG 5676 cmn_err(CE_WARN, "v6_sel.ips_protocol = %d\n", 5677 sel.ips_protocol); 5678 #endif 5679 } 5680 /* Ports are extracted below */ 5681 } 5682 5683 /* Get ports... */ 5684 if (!ipsec_init_outbound_ports(&sel, mp, 5685 inner_ipv4, inner_ipv6, outer_hdr_len, ipss)) { 5686 /* callee did ip_drop_packet_chain() on mp. */ 5687 return (NULL); 5688 } 5689 #ifdef FRAGCACHE_DEBUG 5690 if (inner_ipv4 != NULL) 5691 cmn_err(CE_WARN, 5692 "(v4) sel.ips_protocol = %d, " 5693 "sel.ips_local_port = %d, " 5694 "sel.ips_remote_port = %d\n", 5695 sel.ips_protocol, ntohs(sel.ips_local_port), 5696 ntohs(sel.ips_remote_port)); 5697 if (inner_ipv6 != NULL) 5698 cmn_err(CE_WARN, 5699 "(v6) sel.ips_protocol = %d, " 5700 "sel.ips_local_port = %d, " 5701 "sel.ips_remote_port = %d\n", 5702 sel.ips_protocol, ntohs(sel.ips_local_port), 5703 ntohs(sel.ips_remote_port)); 5704 #endif 5705 /* Success so far! */ 5706 } 5707 rw_enter(&polhead->iph_lock, RW_READER); 5708 pol = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_OUTBOUND, 5709 &sel, ns); 5710 rw_exit(&polhead->iph_lock); 5711 if (pol == NULL) { 5712 /* 5713 * No matching policy on this tunnel, drop the packet. 5714 * 5715 * NOTE: Tunnel-mode tunnels are different from the 5716 * IP global transport mode policy head. For a tunnel-mode 5717 * tunnel, we drop the packet in lieu of passing it 5718 * along accepted the way a global-policy miss would. 5719 * 5720 * NOTE2: "negotiate transport" tunnels should match ALL 5721 * inbound packets, but we do not uncomment the ASSERT() 5722 * below because if/when we open PF_POLICY, a user can 5723 * shoot him/her-self in the foot with a 0 priority. 5724 */ 5725 5726 /* ASSERT(itp->itp_flags & ITPF_P_TUNNEL); */ 5727 #ifdef FRAGCACHE_DEBUG 5728 cmn_err(CE_WARN, "ipsec_tun_outbound(): No matching tunnel " 5729 "per-port policy\n"); 5730 #endif 5731 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 5732 DROPPER(ipss, ipds_spd_explicit), 5733 &ipss->ipsec_spd_dropper); 5734 return (NULL); 5735 } 5736 5737 #ifdef FRAGCACHE_DEBUG 5738 cmn_err(CE_WARN, "Having matching tunnel per-port policy\n"); 5739 #endif 5740 5741 /* Construct an IPSEC_OUT message. */ 5742 ipsec_mp = ipsec_mp_head = ipsec_alloc_ipsec_out(ns); 5743 if (ipsec_mp == NULL) { 5744 IPPOL_REFRELE(pol, ns); 5745 ip_drop_packet(mp, B_FALSE, NULL, NULL, 5746 DROPPER(ipss, ipds_spd_nomem), 5747 &ipss->ipsec_spd_dropper); 5748 return (NULL); 5749 } 5750 ipsec_mp->b_cont = mp; 5751 io = (ipsec_out_t *)ipsec_mp->b_rptr; 5752 IPPH_REFHOLD(polhead); 5753 /* 5754 * NOTE: free() function of ipsec_out mblk will release polhead and 5755 * pol references. 5756 */ 5757 io->ipsec_out_polhead = polhead; 5758 io->ipsec_out_policy = pol; 5759 /* 5760 * NOTE: There is a subtle difference between iptun_zoneid and 5761 * iptun_connp->conn_zoneid explained in iptun_conn_create(). When 5762 * interacting with the ip module, we must use conn_zoneid. 5763 */ 5764 io->ipsec_out_zoneid = iptun->iptun_connp->conn_zoneid; 5765 io->ipsec_out_v4 = (outer_ipv4 != NULL); 5766 io->ipsec_out_secure = B_TRUE; 5767 5768 if (!(itp->itp_flags & ITPF_P_TUNNEL)) { 5769 /* Set up transport mode for tunnelled packets. */ 5770 io->ipsec_out_proto = (inner_ipv4 != NULL) ? IPPROTO_ENCAP : 5771 IPPROTO_IPV6; 5772 return (ipsec_mp); 5773 } 5774 5775 /* Fill in tunnel-mode goodies here. */ 5776 io->ipsec_out_tunnel = B_TRUE; 5777 /* XXX Do I need to fill in all of the goodies here? */ 5778 if (inner_ipv4) { 5779 io->ipsec_out_inaf = AF_INET; 5780 io->ipsec_out_insrc[0] = 5781 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v4; 5782 io->ipsec_out_indst[0] = 5783 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v4; 5784 } else { 5785 io->ipsec_out_inaf = AF_INET6; 5786 io->ipsec_out_insrc[0] = 5787 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[0]; 5788 io->ipsec_out_insrc[1] = 5789 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[1]; 5790 io->ipsec_out_insrc[2] = 5791 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[2]; 5792 io->ipsec_out_insrc[3] = 5793 pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[3]; 5794 io->ipsec_out_indst[0] = 5795 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[0]; 5796 io->ipsec_out_indst[1] = 5797 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[1]; 5798 io->ipsec_out_indst[2] = 5799 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[2]; 5800 io->ipsec_out_indst[3] = 5801 pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[3]; 5802 } 5803 io->ipsec_out_insrcpfx = pol->ipsp_sel->ipsl_key.ipsl_local_pfxlen; 5804 io->ipsec_out_indstpfx = pol->ipsp_sel->ipsl_key.ipsl_remote_pfxlen; 5805 /* NOTE: These are used for transport mode too. */ 5806 io->ipsec_out_src_port = pol->ipsp_sel->ipsl_key.ipsl_lport; 5807 io->ipsec_out_dst_port = pol->ipsp_sel->ipsl_key.ipsl_rport; 5808 io->ipsec_out_proto = pol->ipsp_sel->ipsl_key.ipsl_proto; 5809 5810 /* 5811 * The mp pointer still valid 5812 * Add ipsec_out to each fragment. 5813 * The fragment head already has one 5814 */ 5815 nmp = mp->b_next; 5816 mp->b_next = NULL; 5817 mp = nmp; 5818 ASSERT(ipsec_mp != NULL); 5819 while (mp != NULL) { 5820 nmp = mp->b_next; 5821 ipsec_mp->b_next = ipsec_out_tag(ipsec_mp_head, mp, ns); 5822 if (ipsec_mp->b_next == NULL) { 5823 ip_drop_packet_chain(ipsec_mp_head, B_FALSE, NULL, NULL, 5824 DROPPER(ipss, ipds_spd_nomem), 5825 &ipss->ipsec_spd_dropper); 5826 ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, 5827 DROPPER(ipss, ipds_spd_nomem), 5828 &ipss->ipsec_spd_dropper); 5829 return (NULL); 5830 } 5831 ipsec_mp = ipsec_mp->b_next; 5832 mp->b_next = NULL; 5833 mp = nmp; 5834 } 5835 return (ipsec_mp_head); 5836 } 5837 5838 /* 5839 * NOTE: The following releases pol's reference and 5840 * calls ip_drop_packet() for me on NULL returns. 5841 */ 5842 mblk_t * 5843 ipsec_check_ipsecin_policy_reasm(mblk_t *ipsec_mp, ipsec_policy_t *pol, 5844 ipha_t *inner_ipv4, ip6_t *inner_ipv6, uint64_t pkt_unique, netstack_t *ns) 5845 { 5846 /* Assume ipsec_mp is a chain of b_next-linked IPSEC_IN M_CTLs. */ 5847 mblk_t *data_chain = NULL, *data_tail = NULL; 5848 mblk_t *ii_next; 5849 5850 while (ipsec_mp != NULL) { 5851 ii_next = ipsec_mp->b_next; 5852 ipsec_mp->b_next = NULL; /* No tripping asserts. */ 5853 5854 /* 5855 * Need IPPOL_REFHOLD(pol) for extras because 5856 * ipsecin_policy does the refrele. 5857 */ 5858 IPPOL_REFHOLD(pol); 5859 5860 if (ipsec_check_ipsecin_policy(ipsec_mp, pol, inner_ipv4, 5861 inner_ipv6, pkt_unique, ns) != NULL) { 5862 if (data_tail == NULL) { 5863 /* First one */ 5864 data_chain = data_tail = ipsec_mp->b_cont; 5865 } else { 5866 data_tail->b_next = ipsec_mp->b_cont; 5867 data_tail = data_tail->b_next; 5868 } 5869 freeb(ipsec_mp); 5870 } else { 5871 /* 5872 * ipsec_check_ipsecin_policy() freed ipsec_mp 5873 * already. Need to get rid of any extra pol 5874 * references, and any remaining bits as well. 5875 */ 5876 IPPOL_REFRELE(pol, ns); 5877 ipsec_freemsg_chain(data_chain); 5878 ipsec_freemsg_chain(ii_next); /* ipdrop stats? */ 5879 return (NULL); 5880 } 5881 ipsec_mp = ii_next; 5882 } 5883 /* 5884 * One last release because either the loop bumped it up, or we never 5885 * called ipsec_check_ipsecin_policy(). 5886 */ 5887 IPPOL_REFRELE(pol, ns); 5888 5889 /* data_chain is ready for return to tun module. */ 5890 return (data_chain); 5891 } 5892 5893 5894 /* 5895 * Returns B_TRUE if the inbound packet passed an IPsec policy check. Returns 5896 * B_FALSE if it failed or if it is a fragment needing its friends before a 5897 * policy check can be performed. 5898 * 5899 * Expects a non-NULL *data_mp, an optional ipsec_mp, and a non-NULL polhead. 5900 * data_mp may be reassigned with a b_next chain of packets if fragments 5901 * neeeded to be collected for a proper policy check. 5902 * 5903 * Always frees ipsec_mp, but only frees data_mp if returns B_FALSE. This 5904 * function calls ip_drop_packet() on data_mp if need be. 5905 * 5906 * NOTE: outer_hdr_len is signed. If it's a negative value, the caller 5907 * is inspecting an ICMP packet. 5908 */ 5909 boolean_t 5910 ipsec_tun_inbound(mblk_t *ipsec_mp, mblk_t **data_mp, ipsec_tun_pol_t *itp, 5911 ipha_t *inner_ipv4, ip6_t *inner_ipv6, ipha_t *outer_ipv4, 5912 ip6_t *outer_ipv6, int outer_hdr_len, netstack_t *ns) 5913 { 5914 ipsec_policy_head_t *polhead; 5915 ipsec_selector_t sel; 5916 mblk_t *message = (ipsec_mp == NULL) ? *data_mp : ipsec_mp; 5917 ipsec_policy_t *pol; 5918 uint16_t tmpport; 5919 selret_t rc; 5920 boolean_t retval, port_policy_present, is_icmp, global_present; 5921 in6_addr_t tmpaddr; 5922 ipaddr_t tmp4; 5923 uint8_t flags, *inner_hdr; 5924 ipsec_stack_t *ipss = ns->netstack_ipsec; 5925 5926 sel.ips_is_icmp_inv_acq = 0; 5927 5928 if (outer_ipv4 != NULL) { 5929 ASSERT(outer_ipv6 == NULL); 5930 global_present = ipss->ipsec_inbound_v4_policy_present; 5931 } else { 5932 ASSERT(outer_ipv6 != NULL); 5933 global_present = ipss->ipsec_inbound_v6_policy_present; 5934 } 5935 5936 ASSERT(inner_ipv4 != NULL && inner_ipv6 == NULL || 5937 inner_ipv4 == NULL && inner_ipv6 != NULL); 5938 ASSERT(message == *data_mp || message->b_cont == *data_mp); 5939 5940 if (outer_hdr_len < 0) { 5941 outer_hdr_len = (-outer_hdr_len); 5942 is_icmp = B_TRUE; 5943 } else { 5944 is_icmp = B_FALSE; 5945 } 5946 5947 if (itp != NULL && (itp->itp_flags & ITPF_P_ACTIVE)) { 5948 polhead = itp->itp_policy; 5949 /* 5950 * We need to perform full Tunnel-Mode enforcement, 5951 * and we need to have inner-header data for such enforcement. 5952 * 5953 * See ipsec_init_inbound_sel() for the 0x80000000 on inbound 5954 * and on return. 5955 */ 5956 5957 port_policy_present = ((itp->itp_flags & 5958 ITPF_P_PER_PORT_SECURITY) ? B_TRUE : B_FALSE); 5959 /* 5960 * NOTE: Even if our policy is transport mode, set the 5961 * SEL_TUNNEL_MODE flag so ipsec_init_inbound_sel() can 5962 * do the right thing w.r.t. outer headers. 5963 */ 5964 flags = ((port_policy_present ? SEL_PORT_POLICY : SEL_NONE) | 5965 (is_icmp ? SEL_IS_ICMP : SEL_NONE) | SEL_TUNNEL_MODE); 5966 5967 rc = ipsec_init_inbound_sel(&sel, *data_mp, inner_ipv4, 5968 inner_ipv6, flags); 5969 5970 switch (rc) { 5971 case SELRET_NOMEM: 5972 ip_drop_packet(message, B_TRUE, NULL, NULL, 5973 DROPPER(ipss, ipds_spd_nomem), 5974 &ipss->ipsec_spd_dropper); 5975 return (B_FALSE); 5976 case SELRET_TUNFRAG: 5977 /* 5978 * At this point, if we're cleartext, we don't want 5979 * to go there. 5980 */ 5981 if (ipsec_mp == NULL) { 5982 ip_drop_packet(*data_mp, B_TRUE, NULL, NULL, 5983 DROPPER(ipss, ipds_spd_got_clear), 5984 &ipss->ipsec_spd_dropper); 5985 *data_mp = NULL; 5986 return (B_FALSE); 5987 } 5988 ASSERT(((ipsec_in_t *)ipsec_mp->b_rptr)-> 5989 ipsec_in_secure); 5990 message = ipsec_fragcache_add(&itp->itp_fragcache, 5991 ipsec_mp, *data_mp, outer_hdr_len, ipss); 5992 5993 if (message == NULL) { 5994 /* 5995 * Data is cached, fragment chain is not 5996 * complete. I consume ipsec_mp and data_mp 5997 */ 5998 return (B_FALSE); 5999 } 6000 6001 /* 6002 * If we get here, we have a full fragment chain. 6003 * Reacquire headers and selectors from first fragment. 6004 */ 6005 inner_hdr = message->b_cont->b_rptr; 6006 if (outer_ipv4 != NULL) { 6007 inner_hdr += IPH_HDR_LENGTH( 6008 (ipha_t *)message->b_cont->b_rptr); 6009 } else { 6010 inner_hdr += ip_hdr_length_v6(message->b_cont, 6011 (ip6_t *)message->b_cont->b_rptr); 6012 } 6013 ASSERT(inner_hdr <= message->b_cont->b_wptr); 6014 6015 if (inner_ipv4 != NULL) { 6016 inner_ipv4 = (ipha_t *)inner_hdr; 6017 inner_ipv6 = NULL; 6018 } else { 6019 inner_ipv6 = (ip6_t *)inner_hdr; 6020 inner_ipv4 = NULL; 6021 } 6022 6023 /* 6024 * Use SEL_TUNNEL_MODE to take into account the outer 6025 * header. Use SEL_POST_FRAG so we always get ports. 6026 */ 6027 rc = ipsec_init_inbound_sel(&sel, message->b_cont, 6028 inner_ipv4, inner_ipv6, 6029 SEL_TUNNEL_MODE | SEL_POST_FRAG); 6030 switch (rc) { 6031 case SELRET_SUCCESS: 6032 /* 6033 * Get to same place as first caller's 6034 * SELRET_SUCCESS case. 6035 */ 6036 break; 6037 case SELRET_NOMEM: 6038 ip_drop_packet_chain(message, B_TRUE, 6039 NULL, NULL, 6040 DROPPER(ipss, ipds_spd_nomem), 6041 &ipss->ipsec_spd_dropper); 6042 return (B_FALSE); 6043 case SELRET_BADPKT: 6044 ip_drop_packet_chain(message, B_TRUE, 6045 NULL, NULL, 6046 DROPPER(ipss, ipds_spd_malformed_frag), 6047 &ipss->ipsec_spd_dropper); 6048 return (B_FALSE); 6049 case SELRET_TUNFRAG: 6050 cmn_err(CE_WARN, "(TUNFRAG on 2nd call...)"); 6051 /* FALLTHRU */ 6052 default: 6053 cmn_err(CE_WARN, "ipsec_init_inbound_sel(mark2)" 6054 " returns bizarro 0x%x", rc); 6055 /* Guaranteed panic! */ 6056 ASSERT(rc == SELRET_NOMEM); 6057 return (B_FALSE); 6058 } 6059 /* FALLTHRU */ 6060 case SELRET_SUCCESS: 6061 /* 6062 * Common case: 6063 * No per-port policy or a non-fragment. Keep going. 6064 */ 6065 break; 6066 case SELRET_BADPKT: 6067 /* 6068 * We may receive ICMP (with IPv6 inner) packets that 6069 * trigger this return value. Send 'em in for 6070 * enforcement checking. 6071 */ 6072 cmn_err(CE_NOTE, "ipsec_tun_inbound(): " 6073 "sending 'bad packet' in for enforcement"); 6074 break; 6075 default: 6076 cmn_err(CE_WARN, 6077 "ipsec_init_inbound_sel() returns bizarro 0x%x", 6078 rc); 6079 ASSERT(rc == SELRET_NOMEM); /* Guaranteed panic! */ 6080 return (B_FALSE); 6081 } 6082 6083 if (is_icmp) { 6084 /* 6085 * Swap local/remote because this is an ICMP packet. 6086 */ 6087 tmpaddr = sel.ips_local_addr_v6; 6088 sel.ips_local_addr_v6 = sel.ips_remote_addr_v6; 6089 sel.ips_remote_addr_v6 = tmpaddr; 6090 tmpport = sel.ips_local_port; 6091 sel.ips_local_port = sel.ips_remote_port; 6092 sel.ips_remote_port = tmpport; 6093 } 6094 6095 /* find_policy_head() */ 6096 rw_enter(&polhead->iph_lock, RW_READER); 6097 pol = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_INBOUND, 6098 &sel, ns); 6099 rw_exit(&polhead->iph_lock); 6100 if (pol != NULL) { 6101 if (ipsec_mp == NULL || 6102 !((ipsec_in_t *)ipsec_mp->b_rptr)-> 6103 ipsec_in_secure) { 6104 retval = pol->ipsp_act->ipa_allow_clear; 6105 if (!retval) { 6106 /* 6107 * XXX should never get here with 6108 * tunnel reassembled fragments? 6109 */ 6110 ASSERT(message->b_next == NULL); 6111 ip_drop_packet(message, B_TRUE, NULL, 6112 NULL, 6113 DROPPER(ipss, ipds_spd_got_clear), 6114 &ipss->ipsec_spd_dropper); 6115 } else if (ipsec_mp != NULL) { 6116 freeb(ipsec_mp); 6117 } 6118 6119 IPPOL_REFRELE(pol, ns); 6120 return (retval); 6121 } 6122 /* 6123 * NOTE: The following releases pol's reference and 6124 * calls ip_drop_packet() for me on NULL returns. 6125 * 6126 * "sel" is still good here, so let's use it! 6127 */ 6128 *data_mp = ipsec_check_ipsecin_policy_reasm(message, 6129 pol, inner_ipv4, inner_ipv6, SA_UNIQUE_ID( 6130 sel.ips_remote_port, sel.ips_local_port, 6131 (inner_ipv4 == NULL) ? IPPROTO_IPV6 : 6132 IPPROTO_ENCAP, sel.ips_protocol), ns); 6133 return (*data_mp != NULL); 6134 } 6135 6136 /* 6137 * Else fallthru and check the global policy on the outer 6138 * header(s) if this tunnel is an old-style transport-mode 6139 * one. Drop the packet explicitly (no policy entry) for 6140 * a new-style tunnel-mode tunnel. 6141 */ 6142 if ((itp->itp_flags & ITPF_P_TUNNEL) && !is_icmp) { 6143 ip_drop_packet_chain(message, B_TRUE, NULL, 6144 NULL, 6145 DROPPER(ipss, ipds_spd_explicit), 6146 &ipss->ipsec_spd_dropper); 6147 return (B_FALSE); 6148 } 6149 } 6150 6151 /* 6152 * NOTE: If we reach here, we will not have packet chains from 6153 * fragcache_add(), because the only way I get chains is on a 6154 * tunnel-mode tunnel, which either returns with a pass, or gets 6155 * hit by the ip_drop_packet_chain() call right above here. 6156 */ 6157 6158 /* If no per-tunnel security, check global policy now. */ 6159 if (ipsec_mp != NULL && !global_present) { 6160 if (((ipsec_in_t *)(ipsec_mp->b_rptr))-> 6161 ipsec_in_icmp_loopback) { 6162 /* 6163 * This is an ICMP message with an ipsec_mp 6164 * attached. We should accept it. 6165 */ 6166 if (ipsec_mp != NULL) 6167 freeb(ipsec_mp); 6168 return (B_TRUE); 6169 } 6170 6171 ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL, 6172 DROPPER(ipss, ipds_spd_got_secure), 6173 &ipss->ipsec_spd_dropper); 6174 return (B_FALSE); 6175 } 6176 6177 if (is_icmp) { 6178 /* 6179 * For ICMP packets, "outer_ipvN" is set to the outer header 6180 * that is *INSIDE* the ICMP payload. For global policy 6181 * checking, we need to reverse src/dst on the payload in 6182 * order to construct selectors appropriately. See "ripha" 6183 * constructions in ip.c. To avoid a bug like 6478464 (see 6184 * earlier in this file), we will actually exchange src/dst 6185 * in the packet, and reverse if after the call to 6186 * ipsec_check_global_policy(). 6187 */ 6188 if (outer_ipv4 != NULL) { 6189 tmp4 = outer_ipv4->ipha_src; 6190 outer_ipv4->ipha_src = outer_ipv4->ipha_dst; 6191 outer_ipv4->ipha_dst = tmp4; 6192 } else { 6193 ASSERT(outer_ipv6 != NULL); 6194 tmpaddr = outer_ipv6->ip6_src; 6195 outer_ipv6->ip6_src = outer_ipv6->ip6_dst; 6196 outer_ipv6->ip6_dst = tmpaddr; 6197 } 6198 } 6199 6200 /* NOTE: Frees message if it returns NULL. */ 6201 if (ipsec_check_global_policy(message, NULL, outer_ipv4, outer_ipv6, 6202 (ipsec_mp != NULL), ns) == NULL) { 6203 return (B_FALSE); 6204 } 6205 6206 if (is_icmp) { 6207 /* Set things back to normal. */ 6208 if (outer_ipv4 != NULL) { 6209 tmp4 = outer_ipv4->ipha_src; 6210 outer_ipv4->ipha_src = outer_ipv4->ipha_dst; 6211 outer_ipv4->ipha_dst = tmp4; 6212 } else { 6213 /* No need for ASSERT()s now. */ 6214 tmpaddr = outer_ipv6->ip6_src; 6215 outer_ipv6->ip6_src = outer_ipv6->ip6_dst; 6216 outer_ipv6->ip6_dst = tmpaddr; 6217 } 6218 } 6219 6220 if (ipsec_mp != NULL) 6221 freeb(ipsec_mp); 6222 6223 /* 6224 * At this point, we pretend it's a cleartext accepted 6225 * packet. 6226 */ 6227 return (B_TRUE); 6228 } 6229 6230 /* 6231 * AVL comparison routine for our list of tunnel polheads. 6232 */ 6233 static int 6234 tunnel_compare(const void *arg1, const void *arg2) 6235 { 6236 ipsec_tun_pol_t *left, *right; 6237 int rc; 6238 6239 left = (ipsec_tun_pol_t *)arg1; 6240 right = (ipsec_tun_pol_t *)arg2; 6241 6242 rc = strncmp(left->itp_name, right->itp_name, LIFNAMSIZ); 6243 return (rc == 0 ? rc : (rc > 0 ? 1 : -1)); 6244 } 6245 6246 /* 6247 * Free a tunnel policy node. 6248 */ 6249 void 6250 itp_free(ipsec_tun_pol_t *node, netstack_t *ns) 6251 { 6252 if (node->itp_policy != NULL) { 6253 IPPH_REFRELE(node->itp_policy, ns); 6254 node->itp_policy = NULL; 6255 } 6256 if (node->itp_inactive != NULL) { 6257 IPPH_REFRELE(node->itp_inactive, ns); 6258 node->itp_inactive = NULL; 6259 } 6260 mutex_destroy(&node->itp_lock); 6261 kmem_free(node, sizeof (*node)); 6262 } 6263 6264 void 6265 itp_unlink(ipsec_tun_pol_t *node, netstack_t *ns) 6266 { 6267 ipsec_stack_t *ipss = ns->netstack_ipsec; 6268 6269 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_WRITER); 6270 ipss->ipsec_tunnel_policy_gen++; 6271 ipsec_fragcache_uninit(&node->itp_fragcache); 6272 avl_remove(&ipss->ipsec_tunnel_policies, node); 6273 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6274 ITP_REFRELE(node, ns); 6275 } 6276 6277 /* 6278 * Public interface to look up a tunnel security policy by name. Used by 6279 * spdsock mostly. Returns "node" with a bumped refcnt. 6280 */ 6281 ipsec_tun_pol_t * 6282 get_tunnel_policy(char *name, netstack_t *ns) 6283 { 6284 ipsec_tun_pol_t *node, lookup; 6285 ipsec_stack_t *ipss = ns->netstack_ipsec; 6286 6287 (void) strncpy(lookup.itp_name, name, LIFNAMSIZ); 6288 6289 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_READER); 6290 node = (ipsec_tun_pol_t *)avl_find(&ipss->ipsec_tunnel_policies, 6291 &lookup, NULL); 6292 if (node != NULL) { 6293 ITP_REFHOLD(node); 6294 } 6295 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6296 6297 return (node); 6298 } 6299 6300 /* 6301 * Public interface to walk all tunnel security polcies. Useful for spdsock 6302 * DUMP operations. iterator() will not consume a reference. 6303 */ 6304 void 6305 itp_walk(void (*iterator)(ipsec_tun_pol_t *, void *, netstack_t *), 6306 void *arg, netstack_t *ns) 6307 { 6308 ipsec_tun_pol_t *node; 6309 ipsec_stack_t *ipss = ns->netstack_ipsec; 6310 6311 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_READER); 6312 for (node = avl_first(&ipss->ipsec_tunnel_policies); node != NULL; 6313 node = AVL_NEXT(&ipss->ipsec_tunnel_policies, node)) { 6314 iterator(node, arg, ns); 6315 } 6316 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6317 } 6318 6319 /* 6320 * Initialize policy head. This can only fail if there's a memory problem. 6321 */ 6322 static boolean_t 6323 tunnel_polhead_init(ipsec_policy_head_t *iph, netstack_t *ns) 6324 { 6325 ipsec_stack_t *ipss = ns->netstack_ipsec; 6326 6327 rw_init(&iph->iph_lock, NULL, RW_DEFAULT, NULL); 6328 iph->iph_refs = 1; 6329 iph->iph_gen = 0; 6330 if (ipsec_alloc_table(iph, ipss->ipsec_tun_spd_hashsize, 6331 KM_SLEEP, B_FALSE, ns) != 0) { 6332 ipsec_polhead_free_table(iph); 6333 return (B_FALSE); 6334 } 6335 ipsec_polhead_init(iph, ipss->ipsec_tun_spd_hashsize); 6336 return (B_TRUE); 6337 } 6338 6339 /* 6340 * Create a tunnel policy node with "name". Set errno with 6341 * ENOMEM if there's a memory problem, and EEXIST if there's an existing 6342 * node. 6343 */ 6344 ipsec_tun_pol_t * 6345 create_tunnel_policy(char *name, int *errno, uint64_t *gen, netstack_t *ns) 6346 { 6347 ipsec_tun_pol_t *newbie, *existing; 6348 avl_index_t where; 6349 ipsec_stack_t *ipss = ns->netstack_ipsec; 6350 6351 newbie = kmem_zalloc(sizeof (*newbie), KM_NOSLEEP); 6352 if (newbie == NULL) { 6353 *errno = ENOMEM; 6354 return (NULL); 6355 } 6356 if (!ipsec_fragcache_init(&newbie->itp_fragcache)) { 6357 kmem_free(newbie, sizeof (*newbie)); 6358 *errno = ENOMEM; 6359 return (NULL); 6360 } 6361 6362 (void) strncpy(newbie->itp_name, name, LIFNAMSIZ); 6363 6364 rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_WRITER); 6365 existing = (ipsec_tun_pol_t *)avl_find(&ipss->ipsec_tunnel_policies, 6366 newbie, &where); 6367 if (existing != NULL) { 6368 itp_free(newbie, ns); 6369 *errno = EEXIST; 6370 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6371 return (NULL); 6372 } 6373 ipss->ipsec_tunnel_policy_gen++; 6374 *gen = ipss->ipsec_tunnel_policy_gen; 6375 newbie->itp_refcnt = 2; /* One for the caller, one for the tree. */ 6376 newbie->itp_next_policy_index = 1; 6377 avl_insert(&ipss->ipsec_tunnel_policies, newbie, where); 6378 mutex_init(&newbie->itp_lock, NULL, MUTEX_DEFAULT, NULL); 6379 newbie->itp_policy = kmem_zalloc(sizeof (ipsec_policy_head_t), 6380 KM_NOSLEEP); 6381 if (newbie->itp_policy == NULL) 6382 goto nomem; 6383 newbie->itp_inactive = kmem_zalloc(sizeof (ipsec_policy_head_t), 6384 KM_NOSLEEP); 6385 if (newbie->itp_inactive == NULL) { 6386 kmem_free(newbie->itp_policy, sizeof (ipsec_policy_head_t)); 6387 goto nomem; 6388 } 6389 6390 if (!tunnel_polhead_init(newbie->itp_policy, ns)) { 6391 kmem_free(newbie->itp_policy, sizeof (ipsec_policy_head_t)); 6392 kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t)); 6393 goto nomem; 6394 } else if (!tunnel_polhead_init(newbie->itp_inactive, ns)) { 6395 IPPH_REFRELE(newbie->itp_policy, ns); 6396 kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t)); 6397 goto nomem; 6398 } 6399 rw_exit(&ipss->ipsec_tunnel_policy_lock); 6400 6401 return (newbie); 6402 nomem: 6403 *errno = ENOMEM; 6404 kmem_free(newbie, sizeof (*newbie)); 6405 return (NULL); 6406 } 6407 6408 /* 6409 * Given two addresses, find a tunnel instance's IPsec policy heads. 6410 * Returns NULL on failure. 6411 */ 6412 ipsec_tun_pol_t * 6413 itp_get_byaddr(uint32_t *laddr, uint32_t *faddr, int af, ip_stack_t *ipst) 6414 { 6415 conn_t *connp; 6416 iptun_t *iptun; 6417 ipsec_tun_pol_t *itp = NULL; 6418 6419 /* Classifiers are used to "src" being foreign. */ 6420 if (af == AF_INET) { 6421 connp = ipcl_iptun_classify_v4((ipaddr_t *)faddr, 6422 (ipaddr_t *)laddr, ipst); 6423 } else { 6424 ASSERT(af == AF_INET6); 6425 ASSERT(!IN6_IS_ADDR_V4MAPPED((in6_addr_t *)laddr)); 6426 ASSERT(!IN6_IS_ADDR_V4MAPPED((in6_addr_t *)faddr)); 6427 connp = ipcl_iptun_classify_v6((in6_addr_t *)faddr, 6428 (in6_addr_t *)laddr, ipst); 6429 } 6430 6431 if (connp == NULL) 6432 return (NULL); 6433 6434 if (IPCL_IS_IPTUN(connp)) { 6435 iptun = connp->conn_iptun; 6436 if (iptun != NULL) { 6437 itp = iptun->iptun_itp; 6438 if (itp != NULL) { 6439 /* Braces due to the macro's nature... */ 6440 ITP_REFHOLD(itp); 6441 } 6442 } /* Else itp is already NULL. */ 6443 } 6444 6445 CONN_DEC_REF(connp); 6446 return (itp); 6447 } 6448 6449 /* 6450 * Frag cache code, based on SunScreen 3.2 source 6451 * screen/kernel/common/screen_fragcache.c 6452 */ 6453 6454 #define IPSEC_FRAG_TTL_MAX 5 6455 /* 6456 * Note that the following parameters create 256 hash buckets 6457 * with 1024 free entries to be distributed. Things are cleaned 6458 * periodically and are attempted to be cleaned when there is no 6459 * free space, but this system errs on the side of dropping packets 6460 * over creating memory exhaustion. We may decide to make hash 6461 * factor a tunable if this proves to be a bad decision. 6462 */ 6463 #define IPSEC_FRAG_HASH_SLOTS (1<<8) 6464 #define IPSEC_FRAG_HASH_FACTOR 4 6465 #define IPSEC_FRAG_HASH_SIZE (IPSEC_FRAG_HASH_SLOTS * IPSEC_FRAG_HASH_FACTOR) 6466 6467 #define IPSEC_FRAG_HASH_MASK (IPSEC_FRAG_HASH_SLOTS - 1) 6468 #define IPSEC_FRAG_HASH_FUNC(id) (((id) & IPSEC_FRAG_HASH_MASK) ^ \ 6469 (((id) / \ 6470 (ushort_t)IPSEC_FRAG_HASH_SLOTS) & \ 6471 IPSEC_FRAG_HASH_MASK)) 6472 6473 /* Maximum fragments per packet. 48 bytes payload x 1366 packets > 64KB */ 6474 #define IPSEC_MAX_FRAGS 1366 6475 6476 #define V4_FRAG_OFFSET(ipha) ((ntohs(ipha->ipha_fragment_offset_and_flags) & \ 6477 IPH_OFFSET) << 3) 6478 #define V4_MORE_FRAGS(ipha) (ntohs(ipha->ipha_fragment_offset_and_flags) & \ 6479 IPH_MF) 6480 6481 /* 6482 * Initialize an ipsec fragcache instance. 6483 * Returns B_FALSE if memory allocation fails. 6484 */ 6485 boolean_t 6486 ipsec_fragcache_init(ipsec_fragcache_t *frag) 6487 { 6488 ipsec_fragcache_entry_t *ftemp; 6489 int i; 6490 6491 mutex_init(&frag->itpf_lock, NULL, MUTEX_DEFAULT, NULL); 6492 frag->itpf_ptr = (ipsec_fragcache_entry_t **) 6493 kmem_zalloc(sizeof (ipsec_fragcache_entry_t *) * 6494 IPSEC_FRAG_HASH_SLOTS, KM_NOSLEEP); 6495 if (frag->itpf_ptr == NULL) 6496 return (B_FALSE); 6497 6498 ftemp = (ipsec_fragcache_entry_t *) 6499 kmem_zalloc(sizeof (ipsec_fragcache_entry_t) * 6500 IPSEC_FRAG_HASH_SIZE, KM_NOSLEEP); 6501 if (ftemp == NULL) { 6502 kmem_free(frag->itpf_ptr, sizeof (ipsec_fragcache_entry_t *) * 6503 IPSEC_FRAG_HASH_SLOTS); 6504 return (B_FALSE); 6505 } 6506 6507 frag->itpf_freelist = NULL; 6508 6509 for (i = 0; i < IPSEC_FRAG_HASH_SIZE; i++) { 6510 ftemp->itpfe_next = frag->itpf_freelist; 6511 frag->itpf_freelist = ftemp; 6512 ftemp++; 6513 } 6514 6515 frag->itpf_expire_hint = 0; 6516 6517 return (B_TRUE); 6518 } 6519 6520 void 6521 ipsec_fragcache_uninit(ipsec_fragcache_t *frag) 6522 { 6523 ipsec_fragcache_entry_t *fep; 6524 int i; 6525 6526 mutex_enter(&frag->itpf_lock); 6527 if (frag->itpf_ptr) { 6528 /* Delete any existing fragcache entry chains */ 6529 for (i = 0; i < IPSEC_FRAG_HASH_SLOTS; i++) { 6530 fep = (frag->itpf_ptr)[i]; 6531 while (fep != NULL) { 6532 /* Returned fep is next in chain or NULL */ 6533 fep = fragcache_delentry(i, fep, frag); 6534 } 6535 } 6536 /* 6537 * Chase the pointers back to the beginning 6538 * of the memory allocation and then 6539 * get rid of the allocated freelist 6540 */ 6541 while (frag->itpf_freelist->itpfe_next != NULL) 6542 frag->itpf_freelist = frag->itpf_freelist->itpfe_next; 6543 /* 6544 * XXX - If we ever dynamically grow the freelist 6545 * then we'll have to free entries individually 6546 * or determine how many entries or chunks we have 6547 * grown since the initial allocation. 6548 */ 6549 kmem_free(frag->itpf_freelist, 6550 sizeof (ipsec_fragcache_entry_t) * 6551 IPSEC_FRAG_HASH_SIZE); 6552 /* Free the fragcache structure */ 6553 kmem_free(frag->itpf_ptr, 6554 sizeof (ipsec_fragcache_entry_t *) * 6555 IPSEC_FRAG_HASH_SLOTS); 6556 } 6557 mutex_exit(&frag->itpf_lock); 6558 mutex_destroy(&frag->itpf_lock); 6559 } 6560 6561 /* 6562 * Add a fragment to the fragment cache. Consumes mp if NULL is returned. 6563 * Returns mp if a whole fragment has been assembled, NULL otherwise 6564 */ 6565 6566 mblk_t * 6567 ipsec_fragcache_add(ipsec_fragcache_t *frag, mblk_t *ipsec_mp, mblk_t *mp, 6568 int outer_hdr_len, ipsec_stack_t *ipss) 6569 { 6570 boolean_t is_v4; 6571 time_t itpf_time; 6572 ipha_t *iph; 6573 ipha_t *oiph; 6574 ip6_t *ip6h = NULL; 6575 uint8_t v6_proto; 6576 uint8_t *v6_proto_p; 6577 uint16_t ip6_hdr_length; 6578 ip6_pkt_t ipp; 6579 ip6_frag_t *fraghdr; 6580 ipsec_fragcache_entry_t *fep; 6581 int i; 6582 mblk_t *nmp, *prevmp; 6583 int firstbyte, lastbyte; 6584 int offset; 6585 int last; 6586 boolean_t inbound = (ipsec_mp != NULL); 6587 mblk_t *first_mp = inbound ? ipsec_mp : mp; 6588 6589 ASSERT(first_mp == mp || first_mp->b_cont == mp); 6590 6591 /* 6592 * You're on the slow path, so insure that every packet in the 6593 * cache is a single-mblk one. 6594 */ 6595 if (mp->b_cont != NULL) { 6596 nmp = msgpullup(mp, -1); 6597 if (nmp == NULL) { 6598 ip_drop_packet(first_mp, inbound, NULL, NULL, 6599 DROPPER(ipss, ipds_spd_nomem), 6600 &ipss->ipsec_spd_dropper); 6601 return (NULL); 6602 } 6603 freemsg(mp); 6604 if (ipsec_mp != NULL) 6605 ipsec_mp->b_cont = nmp; 6606 mp = nmp; 6607 } 6608 6609 mutex_enter(&frag->itpf_lock); 6610 6611 oiph = (ipha_t *)mp->b_rptr; 6612 iph = (ipha_t *)(mp->b_rptr + outer_hdr_len); 6613 6614 if (IPH_HDR_VERSION(iph) == IPV4_VERSION) { 6615 is_v4 = B_TRUE; 6616 } else { 6617 ASSERT(IPH_HDR_VERSION(iph) == IPV6_VERSION); 6618 ip6h = (ip6_t *)(mp->b_rptr + outer_hdr_len); 6619 6620 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &ip6_hdr_length, 6621 &v6_proto_p)) { 6622 /* 6623 * Find upper layer protocol. 6624 * If it fails we have a malformed packet 6625 */ 6626 mutex_exit(&frag->itpf_lock); 6627 ip_drop_packet(first_mp, inbound, NULL, NULL, 6628 DROPPER(ipss, ipds_spd_malformed_packet), 6629 &ipss->ipsec_spd_dropper); 6630 return (NULL); 6631 } else { 6632 v6_proto = *v6_proto_p; 6633 } 6634 6635 6636 bzero(&ipp, sizeof (ipp)); 6637 (void) ip_find_hdr_v6(mp, ip6h, &ipp, NULL); 6638 if (!(ipp.ipp_fields & IPPF_FRAGHDR)) { 6639 /* 6640 * We think this is a fragment, but didn't find 6641 * a fragment header. Something is wrong. 6642 */ 6643 mutex_exit(&frag->itpf_lock); 6644 ip_drop_packet(first_mp, inbound, NULL, NULL, 6645 DROPPER(ipss, ipds_spd_malformed_frag), 6646 &ipss->ipsec_spd_dropper); 6647 return (NULL); 6648 } 6649 fraghdr = ipp.ipp_fraghdr; 6650 is_v4 = B_FALSE; 6651 } 6652 6653 /* Anything to cleanup? */ 6654 6655 /* 6656 * This cleanup call could be put in a timer loop 6657 * but it may actually be just as reasonable a decision to 6658 * leave it here. The disadvantage is this only gets called when 6659 * frags are added. The advantage is that it is not 6660 * susceptible to race conditions like a time-based cleanup 6661 * may be. 6662 */ 6663 itpf_time = gethrestime_sec(); 6664 if (itpf_time >= frag->itpf_expire_hint) 6665 ipsec_fragcache_clean(frag); 6666 6667 /* Lookup to see if there is an existing entry */ 6668 6669 if (is_v4) 6670 i = IPSEC_FRAG_HASH_FUNC(iph->ipha_ident); 6671 else 6672 i = IPSEC_FRAG_HASH_FUNC(fraghdr->ip6f_ident); 6673 6674 for (fep = (frag->itpf_ptr)[i]; fep; fep = fep->itpfe_next) { 6675 if (is_v4) { 6676 ASSERT(iph != NULL); 6677 if ((fep->itpfe_id == iph->ipha_ident) && 6678 (fep->itpfe_src == iph->ipha_src) && 6679 (fep->itpfe_dst == iph->ipha_dst) && 6680 (fep->itpfe_proto == iph->ipha_protocol)) 6681 break; 6682 } else { 6683 ASSERT(fraghdr != NULL); 6684 ASSERT(fep != NULL); 6685 if ((fep->itpfe_id == fraghdr->ip6f_ident) && 6686 IN6_ARE_ADDR_EQUAL(&fep->itpfe_src6, 6687 &ip6h->ip6_src) && 6688 IN6_ARE_ADDR_EQUAL(&fep->itpfe_dst6, 6689 &ip6h->ip6_dst) && (fep->itpfe_proto == v6_proto)) 6690 break; 6691 } 6692 } 6693 6694 if (is_v4) { 6695 firstbyte = V4_FRAG_OFFSET(iph); 6696 lastbyte = firstbyte + ntohs(iph->ipha_length) - 6697 IPH_HDR_LENGTH(iph); 6698 last = (V4_MORE_FRAGS(iph) == 0); 6699 #ifdef FRAGCACHE_DEBUG 6700 cmn_err(CE_WARN, "V4 fragcache: firstbyte = %d, lastbyte = %d, " 6701 "last = %d, id = %d\n", firstbyte, lastbyte, last, 6702 iph->ipha_ident); 6703 #endif 6704 } else { 6705 firstbyte = ntohs(fraghdr->ip6f_offlg & IP6F_OFF_MASK); 6706 lastbyte = firstbyte + ntohs(ip6h->ip6_plen) + 6707 sizeof (ip6_t) - ip6_hdr_length; 6708 last = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG) == 0; 6709 #ifdef FRAGCACHE_DEBUG 6710 cmn_err(CE_WARN, "V6 fragcache: firstbyte = %d, lastbyte = %d, " 6711 "last = %d, id = %d, fraghdr = %p, mp = %p\n", 6712 firstbyte, lastbyte, last, fraghdr->ip6f_ident, 6713 fraghdr, mp); 6714 #endif 6715 } 6716 6717 /* check for bogus fragments and delete the entry */ 6718 if (firstbyte > 0 && firstbyte <= 8) { 6719 if (fep != NULL) 6720 (void) fragcache_delentry(i, fep, frag); 6721 mutex_exit(&frag->itpf_lock); 6722 ip_drop_packet(first_mp, inbound, NULL, NULL, 6723 DROPPER(ipss, ipds_spd_malformed_frag), 6724 &ipss->ipsec_spd_dropper); 6725 return (NULL); 6726 } 6727 6728 /* Not found, allocate a new entry */ 6729 if (fep == NULL) { 6730 if (frag->itpf_freelist == NULL) { 6731 /* see if there is some space */ 6732 ipsec_fragcache_clean(frag); 6733 if (frag->itpf_freelist == NULL) { 6734 mutex_exit(&frag->itpf_lock); 6735 ip_drop_packet(first_mp, inbound, NULL, NULL, 6736 DROPPER(ipss, ipds_spd_nomem), 6737 &ipss->ipsec_spd_dropper); 6738 return (NULL); 6739 } 6740 } 6741 6742 fep = frag->itpf_freelist; 6743 frag->itpf_freelist = fep->itpfe_next; 6744 6745 if (is_v4) { 6746 bcopy((caddr_t)&iph->ipha_src, (caddr_t)&fep->itpfe_src, 6747 sizeof (struct in_addr)); 6748 bcopy((caddr_t)&iph->ipha_dst, (caddr_t)&fep->itpfe_dst, 6749 sizeof (struct in_addr)); 6750 fep->itpfe_id = iph->ipha_ident; 6751 fep->itpfe_proto = iph->ipha_protocol; 6752 i = IPSEC_FRAG_HASH_FUNC(fep->itpfe_id); 6753 } else { 6754 bcopy((in6_addr_t *)&ip6h->ip6_src, 6755 (in6_addr_t *)&fep->itpfe_src6, 6756 sizeof (struct in6_addr)); 6757 bcopy((in6_addr_t *)&ip6h->ip6_dst, 6758 (in6_addr_t *)&fep->itpfe_dst6, 6759 sizeof (struct in6_addr)); 6760 fep->itpfe_id = fraghdr->ip6f_ident; 6761 fep->itpfe_proto = v6_proto; 6762 i = IPSEC_FRAG_HASH_FUNC(fep->itpfe_id); 6763 } 6764 itpf_time = gethrestime_sec(); 6765 fep->itpfe_exp = itpf_time + IPSEC_FRAG_TTL_MAX + 1; 6766 fep->itpfe_last = 0; 6767 fep->itpfe_fraglist = NULL; 6768 fep->itpfe_depth = 0; 6769 fep->itpfe_next = (frag->itpf_ptr)[i]; 6770 (frag->itpf_ptr)[i] = fep; 6771 6772 if (frag->itpf_expire_hint > fep->itpfe_exp) 6773 frag->itpf_expire_hint = fep->itpfe_exp; 6774 6775 } 6776 6777 /* Insert it in the frag list */ 6778 /* List is in order by starting offset of fragments */ 6779 6780 prevmp = NULL; 6781 for (nmp = fep->itpfe_fraglist; nmp; nmp = nmp->b_next) { 6782 ipha_t *niph; 6783 ipha_t *oniph; 6784 ip6_t *nip6h; 6785 ip6_pkt_t nipp; 6786 ip6_frag_t *nfraghdr; 6787 uint16_t nip6_hdr_length; 6788 uint8_t *nv6_proto_p; 6789 int nfirstbyte, nlastbyte; 6790 char *data, *ndata; 6791 mblk_t *ndata_mp = (inbound ? nmp->b_cont : nmp); 6792 int hdr_len; 6793 6794 oniph = (ipha_t *)mp->b_rptr; 6795 nip6h = NULL; 6796 niph = NULL; 6797 6798 /* 6799 * Determine outer header type and length and set 6800 * pointers appropriately 6801 */ 6802 6803 if (IPH_HDR_VERSION(oniph) == IPV4_VERSION) { 6804 hdr_len = ((outer_hdr_len != 0) ? 6805 IPH_HDR_LENGTH(oiph) : 0); 6806 niph = (ipha_t *)(ndata_mp->b_rptr + hdr_len); 6807 } else { 6808 ASSERT(IPH_HDR_VERSION(oniph) == IPV6_VERSION); 6809 ASSERT(ndata_mp->b_cont == NULL); 6810 nip6h = (ip6_t *)ndata_mp->b_rptr; 6811 (void) ip_hdr_length_nexthdr_v6(ndata_mp, nip6h, 6812 &nip6_hdr_length, &v6_proto_p); 6813 hdr_len = ((outer_hdr_len != 0) ? nip6_hdr_length : 0); 6814 } 6815 6816 /* 6817 * Determine inner header type and length and set 6818 * pointers appropriately 6819 */ 6820 6821 if (is_v4) { 6822 if (niph == NULL) { 6823 /* Was v6 outer */ 6824 niph = (ipha_t *)(ndata_mp->b_rptr + hdr_len); 6825 } 6826 nfirstbyte = V4_FRAG_OFFSET(niph); 6827 nlastbyte = nfirstbyte + ntohs(niph->ipha_length) - 6828 IPH_HDR_LENGTH(niph); 6829 } else { 6830 ASSERT(ndata_mp->b_cont == NULL); 6831 nip6h = (ip6_t *)(ndata_mp->b_rptr + hdr_len); 6832 if (!ip_hdr_length_nexthdr_v6(ndata_mp, nip6h, 6833 &nip6_hdr_length, &nv6_proto_p)) { 6834 mutex_exit(&frag->itpf_lock); 6835 ip_drop_packet_chain(nmp, inbound, NULL, NULL, 6836 DROPPER(ipss, ipds_spd_malformed_frag), 6837 &ipss->ipsec_spd_dropper); 6838 ipsec_freemsg_chain(ndata_mp); 6839 return (NULL); 6840 } 6841 bzero(&nipp, sizeof (nipp)); 6842 (void) ip_find_hdr_v6(ndata_mp, nip6h, &nipp, NULL); 6843 nfraghdr = nipp.ipp_fraghdr; 6844 nfirstbyte = ntohs(nfraghdr->ip6f_offlg & 6845 IP6F_OFF_MASK); 6846 nlastbyte = nfirstbyte + ntohs(nip6h->ip6_plen) + 6847 sizeof (ip6_t) - nip6_hdr_length; 6848 } 6849 6850 /* Check for overlapping fragments */ 6851 if (firstbyte >= nfirstbyte && firstbyte < nlastbyte) { 6852 /* 6853 * Overlap Check: 6854 * ~~~~--------- # Check if the newly 6855 * ~ ndata_mp| # received fragment 6856 * ~~~~--------- # overlaps with the 6857 * ---------~~~~~~ # current fragment. 6858 * | mp ~ 6859 * ---------~~~~~~ 6860 */ 6861 if (is_v4) { 6862 data = (char *)iph + IPH_HDR_LENGTH(iph) + 6863 firstbyte - nfirstbyte; 6864 ndata = (char *)niph + IPH_HDR_LENGTH(niph); 6865 } else { 6866 data = (char *)ip6h + 6867 nip6_hdr_length + firstbyte - 6868 nfirstbyte; 6869 ndata = (char *)nip6h + nip6_hdr_length; 6870 } 6871 if (bcmp(data, ndata, MIN(lastbyte, nlastbyte) - 6872 firstbyte)) { 6873 /* Overlapping data does not match */ 6874 (void) fragcache_delentry(i, fep, frag); 6875 mutex_exit(&frag->itpf_lock); 6876 ip_drop_packet(first_mp, inbound, NULL, NULL, 6877 DROPPER(ipss, ipds_spd_overlap_frag), 6878 &ipss->ipsec_spd_dropper); 6879 return (NULL); 6880 } 6881 /* Part of defense for jolt2.c fragmentation attack */ 6882 if (firstbyte >= nfirstbyte && lastbyte <= nlastbyte) { 6883 /* 6884 * Check for identical or subset fragments: 6885 * ---------- ~~~~--------~~~~~ 6886 * | nmp | or ~ nmp ~ 6887 * ---------- ~~~~--------~~~~~ 6888 * ---------- ------ 6889 * | mp | | mp | 6890 * ---------- ------ 6891 */ 6892 mutex_exit(&frag->itpf_lock); 6893 ip_drop_packet(first_mp, inbound, NULL, NULL, 6894 DROPPER(ipss, ipds_spd_evil_frag), 6895 &ipss->ipsec_spd_dropper); 6896 return (NULL); 6897 } 6898 6899 } 6900 6901 /* Correct location for this fragment? */ 6902 if (firstbyte <= nfirstbyte) { 6903 /* 6904 * Check if the tail end of the new fragment overlaps 6905 * with the head of the current fragment. 6906 * --------~~~~~~~ 6907 * | nmp ~ 6908 * --------~~~~~~~ 6909 * ~~~~~-------- 6910 * ~ mp | 6911 * ~~~~~-------- 6912 */ 6913 if (lastbyte > nfirstbyte) { 6914 /* Fragments overlap */ 6915 data = (char *)iph + IPH_HDR_LENGTH(iph) + 6916 firstbyte - nfirstbyte; 6917 ndata = (char *)niph + IPH_HDR_LENGTH(niph); 6918 if (is_v4) { 6919 data = (char *)iph + 6920 IPH_HDR_LENGTH(iph) + firstbyte - 6921 nfirstbyte; 6922 ndata = (char *)niph + 6923 IPH_HDR_LENGTH(niph); 6924 } else { 6925 data = (char *)ip6h + 6926 nip6_hdr_length + firstbyte - 6927 nfirstbyte; 6928 ndata = (char *)nip6h + nip6_hdr_length; 6929 } 6930 if (bcmp(data, ndata, MIN(lastbyte, nlastbyte) 6931 - nfirstbyte)) { 6932 /* Overlap mismatch */ 6933 (void) fragcache_delentry(i, fep, frag); 6934 mutex_exit(&frag->itpf_lock); 6935 ip_drop_packet(first_mp, inbound, NULL, 6936 NULL, DROPPER(ipss, 6937 ipds_spd_overlap_frag), 6938 &ipss->ipsec_spd_dropper); 6939 return (NULL); 6940 } 6941 } 6942 6943 /* 6944 * Fragment does not illegally overlap and can now 6945 * be inserted into the chain 6946 */ 6947 break; 6948 } 6949 6950 prevmp = nmp; 6951 } 6952 first_mp->b_next = nmp; 6953 6954 if (prevmp == NULL) { 6955 fep->itpfe_fraglist = first_mp; 6956 } else { 6957 prevmp->b_next = first_mp; 6958 } 6959 if (last) 6960 fep->itpfe_last = 1; 6961 6962 /* Part of defense for jolt2.c fragmentation attack */ 6963 if (++(fep->itpfe_depth) > IPSEC_MAX_FRAGS) { 6964 (void) fragcache_delentry(i, fep, frag); 6965 mutex_exit(&frag->itpf_lock); 6966 ip_drop_packet(first_mp, inbound, NULL, NULL, 6967 DROPPER(ipss, ipds_spd_max_frags), 6968 &ipss->ipsec_spd_dropper); 6969 return (NULL); 6970 } 6971 6972 /* Check for complete packet */ 6973 6974 if (!fep->itpfe_last) { 6975 mutex_exit(&frag->itpf_lock); 6976 #ifdef FRAGCACHE_DEBUG 6977 cmn_err(CE_WARN, "Fragment cached, not last.\n"); 6978 #endif 6979 return (NULL); 6980 } 6981 6982 #ifdef FRAGCACHE_DEBUG 6983 cmn_err(CE_WARN, "Last fragment cached.\n"); 6984 cmn_err(CE_WARN, "mp = %p, first_mp = %p.\n", mp, first_mp); 6985 #endif 6986 6987 offset = 0; 6988 for (mp = fep->itpfe_fraglist; mp; mp = mp->b_next) { 6989 mblk_t *data_mp = (inbound ? mp->b_cont : mp); 6990 int hdr_len; 6991 6992 oiph = (ipha_t *)data_mp->b_rptr; 6993 ip6h = NULL; 6994 iph = NULL; 6995 6996 if (IPH_HDR_VERSION(oiph) == IPV4_VERSION) { 6997 hdr_len = ((outer_hdr_len != 0) ? 6998 IPH_HDR_LENGTH(oiph) : 0); 6999 iph = (ipha_t *)(data_mp->b_rptr + hdr_len); 7000 } else { 7001 ASSERT(IPH_HDR_VERSION(oiph) == IPV6_VERSION); 7002 ASSERT(data_mp->b_cont == NULL); 7003 ip6h = (ip6_t *)data_mp->b_rptr; 7004 (void) ip_hdr_length_nexthdr_v6(data_mp, ip6h, 7005 &ip6_hdr_length, &v6_proto_p); 7006 hdr_len = ((outer_hdr_len != 0) ? ip6_hdr_length : 0); 7007 } 7008 7009 /* Calculate current fragment start/end */ 7010 if (is_v4) { 7011 if (iph == NULL) { 7012 /* Was v6 outer */ 7013 iph = (ipha_t *)(data_mp->b_rptr + hdr_len); 7014 } 7015 firstbyte = V4_FRAG_OFFSET(iph); 7016 lastbyte = firstbyte + ntohs(iph->ipha_length) - 7017 IPH_HDR_LENGTH(iph); 7018 } else { 7019 ASSERT(data_mp->b_cont == NULL); 7020 ip6h = (ip6_t *)(data_mp->b_rptr + hdr_len); 7021 if (!ip_hdr_length_nexthdr_v6(data_mp, ip6h, 7022 &ip6_hdr_length, &v6_proto_p)) { 7023 mutex_exit(&frag->itpf_lock); 7024 ip_drop_packet_chain(mp, inbound, NULL, NULL, 7025 DROPPER(ipss, ipds_spd_malformed_frag), 7026 &ipss->ipsec_spd_dropper); 7027 return (NULL); 7028 } 7029 v6_proto = *v6_proto_p; 7030 bzero(&ipp, sizeof (ipp)); 7031 (void) ip_find_hdr_v6(data_mp, ip6h, &ipp, NULL); 7032 fraghdr = ipp.ipp_fraghdr; 7033 firstbyte = ntohs(fraghdr->ip6f_offlg & 7034 IP6F_OFF_MASK); 7035 lastbyte = firstbyte + ntohs(ip6h->ip6_plen) + 7036 sizeof (ip6_t) - ip6_hdr_length; 7037 } 7038 7039 /* 7040 * If this fragment is greater than current offset, 7041 * we have a missing fragment so return NULL 7042 */ 7043 if (firstbyte > offset) { 7044 mutex_exit(&frag->itpf_lock); 7045 #ifdef FRAGCACHE_DEBUG 7046 /* 7047 * Note, this can happen when the last frag 7048 * gets sent through because it is smaller 7049 * than the MTU. It is not necessarily an 7050 * error condition. 7051 */ 7052 cmn_err(CE_WARN, "Frag greater than offset! : " 7053 "missing fragment: firstbyte = %d, offset = %d, " 7054 "mp = %p\n", firstbyte, offset, mp); 7055 #endif 7056 return (NULL); 7057 } 7058 7059 /* 7060 * If we are at the last fragment, we have the complete 7061 * packet, so rechain things and return it to caller 7062 * for processing 7063 */ 7064 7065 if ((is_v4 && !V4_MORE_FRAGS(iph)) || 7066 (!is_v4 && !(fraghdr->ip6f_offlg & IP6F_MORE_FRAG))) { 7067 mp = fep->itpfe_fraglist; 7068 fep->itpfe_fraglist = NULL; 7069 (void) fragcache_delentry(i, fep, frag); 7070 mutex_exit(&frag->itpf_lock); 7071 7072 if ((is_v4 && (firstbyte + ntohs(iph->ipha_length) > 7073 65535)) || (!is_v4 && (firstbyte + 7074 ntohs(ip6h->ip6_plen) > 65535))) { 7075 /* It is an invalid "ping-o-death" packet */ 7076 /* Discard it */ 7077 ip_drop_packet_chain(mp, inbound, NULL, NULL, 7078 DROPPER(ipss, ipds_spd_evil_frag), 7079 &ipss->ipsec_spd_dropper); 7080 return (NULL); 7081 } 7082 #ifdef FRAGCACHE_DEBUG 7083 cmn_err(CE_WARN, "Fragcache returning mp = %p, " 7084 "mp->b_next = %p", mp, mp->b_next); 7085 #endif 7086 /* 7087 * For inbound case, mp has ipsec_in b_next'd chain 7088 * For outbound case, it is just data mp chain 7089 */ 7090 return (mp); 7091 } 7092 7093 /* 7094 * Update new ending offset if this 7095 * fragment extends the packet 7096 */ 7097 if (offset < lastbyte) 7098 offset = lastbyte; 7099 } 7100 7101 mutex_exit(&frag->itpf_lock); 7102 7103 /* Didn't find last fragment, so return NULL */ 7104 return (NULL); 7105 } 7106 7107 static void 7108 ipsec_fragcache_clean(ipsec_fragcache_t *frag) 7109 { 7110 ipsec_fragcache_entry_t *fep; 7111 int i; 7112 ipsec_fragcache_entry_t *earlyfep = NULL; 7113 time_t itpf_time; 7114 int earlyexp; 7115 int earlyi = 0; 7116 7117 ASSERT(MUTEX_HELD(&frag->itpf_lock)); 7118 7119 itpf_time = gethrestime_sec(); 7120 earlyexp = itpf_time + 10000; 7121 7122 for (i = 0; i < IPSEC_FRAG_HASH_SLOTS; i++) { 7123 fep = (frag->itpf_ptr)[i]; 7124 while (fep) { 7125 if (fep->itpfe_exp < itpf_time) { 7126 /* found */ 7127 fep = fragcache_delentry(i, fep, frag); 7128 } else { 7129 if (fep->itpfe_exp < earlyexp) { 7130 earlyfep = fep; 7131 earlyexp = fep->itpfe_exp; 7132 earlyi = i; 7133 } 7134 fep = fep->itpfe_next; 7135 } 7136 } 7137 } 7138 7139 frag->itpf_expire_hint = earlyexp; 7140 7141 /* if (!found) */ 7142 if (frag->itpf_freelist == NULL) 7143 (void) fragcache_delentry(earlyi, earlyfep, frag); 7144 } 7145 7146 static ipsec_fragcache_entry_t * 7147 fragcache_delentry(int slot, ipsec_fragcache_entry_t *fep, 7148 ipsec_fragcache_t *frag) 7149 { 7150 ipsec_fragcache_entry_t *targp; 7151 ipsec_fragcache_entry_t *nextp = fep->itpfe_next; 7152 7153 ASSERT(MUTEX_HELD(&frag->itpf_lock)); 7154 7155 /* Free up any fragment list still in cache entry */ 7156 ipsec_freemsg_chain(fep->itpfe_fraglist); 7157 7158 targp = (frag->itpf_ptr)[slot]; 7159 ASSERT(targp != 0); 7160 7161 if (targp == fep) { 7162 /* unlink from head of hash chain */ 7163 (frag->itpf_ptr)[slot] = nextp; 7164 /* link into free list */ 7165 fep->itpfe_next = frag->itpf_freelist; 7166 frag->itpf_freelist = fep; 7167 return (nextp); 7168 } 7169 7170 /* maybe should use double linked list to make update faster */ 7171 /* must be past front of chain */ 7172 while (targp) { 7173 if (targp->itpfe_next == fep) { 7174 /* unlink from hash chain */ 7175 targp->itpfe_next = nextp; 7176 /* link into free list */ 7177 fep->itpfe_next = frag->itpf_freelist; 7178 frag->itpf_freelist = fep; 7179 return (nextp); 7180 } 7181 targp = targp->itpfe_next; 7182 ASSERT(targp != 0); 7183 } 7184 /* NOTREACHED */ 7185 return (NULL); 7186 } 7187