1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * Copyright (c) 2012 Nexenta Systems, Inc. All rights reserved. 25 * Copyright (c) 2017 Joyent, Inc. 26 */ 27 28 #include <sys/types.h> 29 #include <sys/stream.h> 30 #include <sys/stropts.h> 31 #include <sys/errno.h> 32 #include <sys/strlog.h> 33 #include <sys/tihdr.h> 34 #include <sys/socket.h> 35 #include <sys/ddi.h> 36 #include <sys/sunddi.h> 37 #include <sys/kmem.h> 38 #include <sys/zone.h> 39 #include <sys/sysmacros.h> 40 #include <sys/cmn_err.h> 41 #include <sys/vtrace.h> 42 #include <sys/debug.h> 43 #include <sys/atomic.h> 44 #include <sys/strsun.h> 45 #include <sys/random.h> 46 #include <netinet/in.h> 47 #include <net/if.h> 48 #include <netinet/ip6.h> 49 #include <net/pfkeyv2.h> 50 #include <net/pfpolicy.h> 51 52 #include <inet/common.h> 53 #include <inet/mi.h> 54 #include <inet/nd.h> 55 #include <inet/ip.h> 56 #include <inet/ip_impl.h> 57 #include <inet/ip6.h> 58 #include <inet/ip_if.h> 59 #include <inet/ip_ndp.h> 60 #include <inet/sadb.h> 61 #include <inet/ipsec_info.h> 62 #include <inet/ipsec_impl.h> 63 #include <inet/ipsecesp.h> 64 #include <inet/ipdrop.h> 65 #include <inet/tcp.h> 66 #include <sys/kstat.h> 67 #include <sys/policy.h> 68 #include <sys/strsun.h> 69 #include <sys/strsubr.h> 70 #include <inet/udp_impl.h> 71 #include <sys/taskq.h> 72 #include <sys/note.h> 73 74 #include <sys/tsol/tnet.h> 75 76 /* 77 * Table of ND variables supported by ipsecesp. These are loaded into 78 * ipsecesp_g_nd in ipsecesp_init_nd. 79 * All of these are alterable, within the min/max values given, at run time. 80 */ 81 static ipsecespparam_t lcl_param_arr[] = { 82 /* min max value name */ 83 { 0, 3, 0, "ipsecesp_debug"}, 84 { 125, 32000, SADB_AGE_INTERVAL_DEFAULT, "ipsecesp_age_interval"}, 85 { 1, 10, 1, "ipsecesp_reap_delay"}, 86 { 1, SADB_MAX_REPLAY, 64, "ipsecesp_replay_size"}, 87 { 1, 300, 15, "ipsecesp_acquire_timeout"}, 88 { 1, 1800, 90, "ipsecesp_larval_timeout"}, 89 /* Default lifetime values for ACQUIRE messages. */ 90 { 0, 0xffffffffU, 0, "ipsecesp_default_soft_bytes"}, 91 { 0, 0xffffffffU, 0, "ipsecesp_default_hard_bytes"}, 92 { 0, 0xffffffffU, 24000, "ipsecesp_default_soft_addtime"}, 93 { 0, 0xffffffffU, 28800, "ipsecesp_default_hard_addtime"}, 94 { 0, 0xffffffffU, 0, "ipsecesp_default_soft_usetime"}, 95 { 0, 0xffffffffU, 0, "ipsecesp_default_hard_usetime"}, 96 { 0, 1, 0, "ipsecesp_log_unknown_spi"}, 97 { 0, 2, 1, "ipsecesp_padding_check"}, 98 { 0, 600, 20, "ipsecesp_nat_keepalive_interval"}, 99 }; 100 /* For ipsecesp_nat_keepalive_interval, see ipsecesp.h. */ 101 102 #define esp0dbg(a) printf a 103 /* NOTE: != 0 instead of > 0 so lint doesn't complain. */ 104 #define esp1dbg(espstack, a) if (espstack->ipsecesp_debug != 0) printf a 105 #define esp2dbg(espstack, a) if (espstack->ipsecesp_debug > 1) printf a 106 #define esp3dbg(espstack, a) if (espstack->ipsecesp_debug > 2) printf a 107 108 static int ipsecesp_open(queue_t *, dev_t *, int, int, cred_t *); 109 static int ipsecesp_close(queue_t *, int, cred_t *); 110 static int ipsecesp_rput(queue_t *, mblk_t *); 111 static int ipsecesp_wput(queue_t *, mblk_t *); 112 static void *ipsecesp_stack_init(netstackid_t stackid, netstack_t *ns); 113 static void ipsecesp_stack_fini(netstackid_t stackid, void *arg); 114 115 static void esp_prepare_udp(netstack_t *, mblk_t *, ipha_t *); 116 static void esp_outbound_finish(mblk_t *, ip_xmit_attr_t *); 117 static void esp_inbound_restart(mblk_t *, ip_recv_attr_t *); 118 119 static boolean_t esp_register_out(uint32_t, uint32_t, uint_t, 120 ipsecesp_stack_t *, cred_t *); 121 static boolean_t esp_strip_header(mblk_t *, boolean_t, uint32_t, 122 kstat_named_t **, ipsecesp_stack_t *); 123 static mblk_t *esp_submit_req_inbound(mblk_t *, ip_recv_attr_t *, 124 ipsa_t *, uint_t); 125 static mblk_t *esp_submit_req_outbound(mblk_t *, ip_xmit_attr_t *, 126 ipsa_t *, uchar_t *, uint_t); 127 128 /* Setable in /etc/system */ 129 uint32_t esp_hash_size = IPSEC_DEFAULT_HASH_SIZE; 130 131 static struct module_info info = { 132 5137, "ipsecesp", 0, INFPSZ, 65536, 1024 133 }; 134 135 static struct qinit rinit = { 136 ipsecesp_rput, NULL, ipsecesp_open, ipsecesp_close, NULL, &info, 137 NULL 138 }; 139 140 static struct qinit winit = { 141 ipsecesp_wput, NULL, ipsecesp_open, ipsecesp_close, NULL, &info, 142 NULL 143 }; 144 145 struct streamtab ipsecespinfo = { 146 &rinit, &winit, NULL, NULL 147 }; 148 149 static taskq_t *esp_taskq; 150 151 /* 152 * OTOH, this one is set at open/close, and I'm D_MTQPAIR for now. 153 * 154 * Question: Do I need this, given that all instance's esps->esps_wq point 155 * to IP? 156 * 157 * Answer: Yes, because I need to know which queue is BOUND to 158 * IPPROTO_ESP 159 */ 160 161 static int esp_kstat_update(kstat_t *, int); 162 163 static boolean_t 164 esp_kstat_init(ipsecesp_stack_t *espstack, netstackid_t stackid) 165 { 166 espstack->esp_ksp = kstat_create_netstack("ipsecesp", 0, "esp_stat", 167 "net", KSTAT_TYPE_NAMED, 168 sizeof (esp_kstats_t) / sizeof (kstat_named_t), 0, stackid); 169 170 if (espstack->esp_ksp == NULL || espstack->esp_ksp->ks_data == NULL) 171 return (B_FALSE); 172 173 espstack->esp_kstats = espstack->esp_ksp->ks_data; 174 175 espstack->esp_ksp->ks_update = esp_kstat_update; 176 espstack->esp_ksp->ks_private = (void *)(uintptr_t)stackid; 177 178 #define K64 KSTAT_DATA_UINT64 179 #define KI(x) kstat_named_init(&(espstack->esp_kstats->esp_stat_##x), #x, K64) 180 181 KI(num_aalgs); 182 KI(num_ealgs); 183 KI(good_auth); 184 KI(bad_auth); 185 KI(bad_padding); 186 KI(replay_failures); 187 KI(replay_early_failures); 188 KI(keysock_in); 189 KI(out_requests); 190 KI(acquire_requests); 191 KI(bytes_expired); 192 KI(out_discards); 193 KI(crypto_sync); 194 KI(crypto_async); 195 KI(crypto_failures); 196 KI(bad_decrypt); 197 KI(sa_port_renumbers); 198 199 #undef KI 200 #undef K64 201 202 kstat_install(espstack->esp_ksp); 203 204 return (B_TRUE); 205 } 206 207 static int 208 esp_kstat_update(kstat_t *kp, int rw) 209 { 210 esp_kstats_t *ekp; 211 netstackid_t stackid = (zoneid_t)(uintptr_t)kp->ks_private; 212 netstack_t *ns; 213 ipsec_stack_t *ipss; 214 215 if ((kp == NULL) || (kp->ks_data == NULL)) 216 return (EIO); 217 218 if (rw == KSTAT_WRITE) 219 return (EACCES); 220 221 ns = netstack_find_by_stackid(stackid); 222 if (ns == NULL) 223 return (-1); 224 ipss = ns->netstack_ipsec; 225 if (ipss == NULL) { 226 netstack_rele(ns); 227 return (-1); 228 } 229 ekp = (esp_kstats_t *)kp->ks_data; 230 231 rw_enter(&ipss->ipsec_alg_lock, RW_READER); 232 ekp->esp_stat_num_aalgs.value.ui64 = 233 ipss->ipsec_nalgs[IPSEC_ALG_AUTH]; 234 ekp->esp_stat_num_ealgs.value.ui64 = 235 ipss->ipsec_nalgs[IPSEC_ALG_ENCR]; 236 rw_exit(&ipss->ipsec_alg_lock); 237 238 netstack_rele(ns); 239 return (0); 240 } 241 242 #ifdef DEBUG 243 /* 244 * Debug routine, useful to see pre-encryption data. 245 */ 246 static char * 247 dump_msg(mblk_t *mp) 248 { 249 char tmp_str[3], tmp_line[256]; 250 251 while (mp != NULL) { 252 unsigned char *ptr; 253 254 printf("mblk address 0x%p, length %ld, db_ref %d " 255 "type %d, base 0x%p, lim 0x%p\n", 256 (void *) mp, (long)(mp->b_wptr - mp->b_rptr), 257 mp->b_datap->db_ref, mp->b_datap->db_type, 258 (void *)mp->b_datap->db_base, (void *)mp->b_datap->db_lim); 259 ptr = mp->b_rptr; 260 261 tmp_line[0] = '\0'; 262 while (ptr < mp->b_wptr) { 263 uint_t diff; 264 265 diff = (ptr - mp->b_rptr); 266 if (!(diff & 0x1f)) { 267 if (strlen(tmp_line) > 0) { 268 printf("bytes: %s\n", tmp_line); 269 tmp_line[0] = '\0'; 270 } 271 } 272 if (!(diff & 0x3)) 273 (void) strcat(tmp_line, " "); 274 (void) sprintf(tmp_str, "%02x", *ptr); 275 (void) strcat(tmp_line, tmp_str); 276 ptr++; 277 } 278 if (strlen(tmp_line) > 0) 279 printf("bytes: %s\n", tmp_line); 280 281 mp = mp->b_cont; 282 } 283 284 return ("\n"); 285 } 286 287 #else /* DEBUG */ 288 static char * 289 dump_msg(mblk_t *mp) 290 { 291 printf("Find value of mp %p.\n", mp); 292 return ("\n"); 293 } 294 #endif /* DEBUG */ 295 296 /* 297 * Don't have to lock age_interval, as only one thread will access it at 298 * a time, because I control the one function that does with timeout(). 299 */ 300 static void 301 esp_ager(void *arg) 302 { 303 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)arg; 304 netstack_t *ns = espstack->ipsecesp_netstack; 305 hrtime_t begin = gethrtime(); 306 307 sadb_ager(&espstack->esp_sadb.s_v4, espstack->esp_pfkey_q, 308 espstack->ipsecesp_reap_delay, ns); 309 sadb_ager(&espstack->esp_sadb.s_v6, espstack->esp_pfkey_q, 310 espstack->ipsecesp_reap_delay, ns); 311 312 espstack->esp_event = sadb_retimeout(begin, espstack->esp_pfkey_q, 313 esp_ager, espstack, 314 &espstack->ipsecesp_age_interval, espstack->ipsecesp_age_int_max, 315 info.mi_idnum); 316 } 317 318 /* 319 * Get an ESP NDD parameter. 320 */ 321 /* ARGSUSED */ 322 static int 323 ipsecesp_param_get( 324 queue_t *q, 325 mblk_t *mp, 326 caddr_t cp, 327 cred_t *cr) 328 { 329 ipsecespparam_t *ipsecesppa = (ipsecespparam_t *)cp; 330 uint_t value; 331 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; 332 333 mutex_enter(&espstack->ipsecesp_param_lock); 334 value = ipsecesppa->ipsecesp_param_value; 335 mutex_exit(&espstack->ipsecesp_param_lock); 336 337 (void) mi_mpprintf(mp, "%u", value); 338 return (0); 339 } 340 341 /* 342 * This routine sets an NDD variable in a ipsecespparam_t structure. 343 */ 344 /* ARGSUSED */ 345 static int 346 ipsecesp_param_set( 347 queue_t *q, 348 mblk_t *mp, 349 char *value, 350 caddr_t cp, 351 cred_t *cr) 352 { 353 ulong_t new_value; 354 ipsecespparam_t *ipsecesppa = (ipsecespparam_t *)cp; 355 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; 356 357 /* 358 * Fail the request if the new value does not lie within the 359 * required bounds. 360 */ 361 if (ddi_strtoul(value, NULL, 10, &new_value) != 0 || 362 new_value < ipsecesppa->ipsecesp_param_min || 363 new_value > ipsecesppa->ipsecesp_param_max) { 364 return (EINVAL); 365 } 366 367 /* Set the new value */ 368 mutex_enter(&espstack->ipsecesp_param_lock); 369 ipsecesppa->ipsecesp_param_value = new_value; 370 mutex_exit(&espstack->ipsecesp_param_lock); 371 return (0); 372 } 373 374 /* 375 * Using lifetime NDD variables, fill in an extended combination's 376 * lifetime information. 377 */ 378 void 379 ipsecesp_fill_defs(sadb_x_ecomb_t *ecomb, netstack_t *ns) 380 { 381 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 382 383 ecomb->sadb_x_ecomb_soft_bytes = espstack->ipsecesp_default_soft_bytes; 384 ecomb->sadb_x_ecomb_hard_bytes = espstack->ipsecesp_default_hard_bytes; 385 ecomb->sadb_x_ecomb_soft_addtime = 386 espstack->ipsecesp_default_soft_addtime; 387 ecomb->sadb_x_ecomb_hard_addtime = 388 espstack->ipsecesp_default_hard_addtime; 389 ecomb->sadb_x_ecomb_soft_usetime = 390 espstack->ipsecesp_default_soft_usetime; 391 ecomb->sadb_x_ecomb_hard_usetime = 392 espstack->ipsecesp_default_hard_usetime; 393 } 394 395 /* 396 * Initialize things for ESP at module load time. 397 */ 398 boolean_t 399 ipsecesp_ddi_init(void) 400 { 401 esp_taskq = taskq_create("esp_taskq", 1, minclsyspri, 402 IPSEC_TASKQ_MIN, IPSEC_TASKQ_MAX, 0); 403 404 /* 405 * We want to be informed each time a stack is created or 406 * destroyed in the kernel, so we can maintain the 407 * set of ipsecesp_stack_t's. 408 */ 409 netstack_register(NS_IPSECESP, ipsecesp_stack_init, NULL, 410 ipsecesp_stack_fini); 411 412 return (B_TRUE); 413 } 414 415 /* 416 * Walk through the param array specified registering each element with the 417 * named dispatch handler. 418 */ 419 static boolean_t 420 ipsecesp_param_register(IDP *ndp, ipsecespparam_t *espp, int cnt) 421 { 422 for (; cnt-- > 0; espp++) { 423 if (espp->ipsecesp_param_name != NULL && 424 espp->ipsecesp_param_name[0]) { 425 if (!nd_load(ndp, 426 espp->ipsecesp_param_name, 427 ipsecesp_param_get, ipsecesp_param_set, 428 (caddr_t)espp)) { 429 nd_free(ndp); 430 return (B_FALSE); 431 } 432 } 433 } 434 return (B_TRUE); 435 } 436 437 /* 438 * Initialize things for ESP for each stack instance 439 */ 440 static void * 441 ipsecesp_stack_init(netstackid_t stackid, netstack_t *ns) 442 { 443 ipsecesp_stack_t *espstack; 444 ipsecespparam_t *espp; 445 446 espstack = (ipsecesp_stack_t *)kmem_zalloc(sizeof (*espstack), 447 KM_SLEEP); 448 espstack->ipsecesp_netstack = ns; 449 450 espp = (ipsecespparam_t *)kmem_alloc(sizeof (lcl_param_arr), KM_SLEEP); 451 espstack->ipsecesp_params = espp; 452 bcopy(lcl_param_arr, espp, sizeof (lcl_param_arr)); 453 454 (void) ipsecesp_param_register(&espstack->ipsecesp_g_nd, espp, 455 A_CNT(lcl_param_arr)); 456 457 (void) esp_kstat_init(espstack, stackid); 458 459 espstack->esp_sadb.s_acquire_timeout = 460 &espstack->ipsecesp_acquire_timeout; 461 sadbp_init("ESP", &espstack->esp_sadb, SADB_SATYPE_ESP, esp_hash_size, 462 espstack->ipsecesp_netstack); 463 464 mutex_init(&espstack->ipsecesp_param_lock, NULL, MUTEX_DEFAULT, 0); 465 466 ip_drop_register(&espstack->esp_dropper, "IPsec ESP"); 467 return (espstack); 468 } 469 470 /* 471 * Destroy things for ESP at module unload time. 472 */ 473 void 474 ipsecesp_ddi_destroy(void) 475 { 476 netstack_unregister(NS_IPSECESP); 477 taskq_destroy(esp_taskq); 478 } 479 480 /* 481 * Destroy things for ESP for one stack instance 482 */ 483 static void 484 ipsecesp_stack_fini(netstackid_t stackid, void *arg) 485 { 486 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)arg; 487 488 if (espstack->esp_pfkey_q != NULL) { 489 (void) quntimeout(espstack->esp_pfkey_q, espstack->esp_event); 490 } 491 espstack->esp_sadb.s_acquire_timeout = NULL; 492 sadbp_destroy(&espstack->esp_sadb, espstack->ipsecesp_netstack); 493 ip_drop_unregister(&espstack->esp_dropper); 494 mutex_destroy(&espstack->ipsecesp_param_lock); 495 nd_free(&espstack->ipsecesp_g_nd); 496 497 kmem_free(espstack->ipsecesp_params, sizeof (lcl_param_arr)); 498 espstack->ipsecesp_params = NULL; 499 kstat_delete_netstack(espstack->esp_ksp, stackid); 500 espstack->esp_ksp = NULL; 501 espstack->esp_kstats = NULL; 502 kmem_free(espstack, sizeof (*espstack)); 503 } 504 505 /* 506 * ESP module open routine, which is here for keysock plumbing. 507 * Keysock is pushed over {AH,ESP} which is an artifact from the Bad Old 508 * Days of export control, and fears that ESP would not be allowed 509 * to be shipped at all by default. Eventually, keysock should 510 * either access AH and ESP via modstubs or krtld dependencies, or 511 * perhaps be folded in with AH and ESP into a single IPsec/netsec 512 * module ("netsec" if PF_KEY provides more than AH/ESP keying tables). 513 */ 514 /* ARGSUSED */ 515 static int 516 ipsecesp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 517 { 518 netstack_t *ns; 519 ipsecesp_stack_t *espstack; 520 521 if (secpolicy_ip_config(credp, B_FALSE) != 0) 522 return (EPERM); 523 524 if (q->q_ptr != NULL) 525 return (0); /* Re-open of an already open instance. */ 526 527 if (sflag != MODOPEN) 528 return (EINVAL); 529 530 ns = netstack_find_by_cred(credp); 531 ASSERT(ns != NULL); 532 espstack = ns->netstack_ipsecesp; 533 ASSERT(espstack != NULL); 534 535 q->q_ptr = espstack; 536 WR(q)->q_ptr = q->q_ptr; 537 538 qprocson(q); 539 return (0); 540 } 541 542 /* 543 * ESP module close routine. 544 */ 545 /* ARGSUSED */ 546 static int 547 ipsecesp_close(queue_t *q, int flags __unused, cred_t *credp __unused) 548 { 549 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; 550 551 /* 552 * Clean up q_ptr, if needed. 553 */ 554 qprocsoff(q); 555 556 /* Keysock queue check is safe, because of OCEXCL perimeter. */ 557 558 if (q == espstack->esp_pfkey_q) { 559 esp1dbg(espstack, 560 ("ipsecesp_close: Ummm... keysock is closing ESP.\n")); 561 espstack->esp_pfkey_q = NULL; 562 /* Detach qtimeouts. */ 563 (void) quntimeout(q, espstack->esp_event); 564 } 565 566 netstack_rele(espstack->ipsecesp_netstack); 567 return (0); 568 } 569 570 /* 571 * Add a number of bytes to what the SA has protected so far. Return 572 * B_TRUE if the SA can still protect that many bytes. 573 * 574 * Caller must REFRELE the passed-in assoc. This function must REFRELE 575 * any obtained peer SA. 576 */ 577 static boolean_t 578 esp_age_bytes(ipsa_t *assoc, uint64_t bytes, boolean_t inbound) 579 { 580 ipsa_t *inassoc, *outassoc; 581 isaf_t *bucket; 582 boolean_t inrc, outrc, isv6; 583 sadb_t *sp; 584 int outhash; 585 netstack_t *ns = assoc->ipsa_netstack; 586 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 587 588 /* No peer? No problem! */ 589 if (!assoc->ipsa_haspeer) { 590 return (sadb_age_bytes(espstack->esp_pfkey_q, assoc, bytes, 591 B_TRUE)); 592 } 593 594 /* 595 * Otherwise, we want to grab both the original assoc and its peer. 596 * There might be a race for this, but if it's a real race, two 597 * expire messages may occur. We limit this by only sending the 598 * expire message on one of the peers, we'll pick the inbound 599 * arbitrarily. 600 * 601 * If we need tight synchronization on the peer SA, then we need to 602 * reconsider. 603 */ 604 605 /* Use address length to select IPv6/IPv4 */ 606 isv6 = (assoc->ipsa_addrfam == AF_INET6); 607 sp = isv6 ? &espstack->esp_sadb.s_v6 : &espstack->esp_sadb.s_v4; 608 609 if (inbound) { 610 inassoc = assoc; 611 if (isv6) { 612 outhash = OUTBOUND_HASH_V6(sp, *((in6_addr_t *) 613 &inassoc->ipsa_dstaddr)); 614 } else { 615 outhash = OUTBOUND_HASH_V4(sp, *((ipaddr_t *) 616 &inassoc->ipsa_dstaddr)); 617 } 618 bucket = &sp->sdb_of[outhash]; 619 mutex_enter(&bucket->isaf_lock); 620 outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi, 621 inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr, 622 inassoc->ipsa_addrfam); 623 mutex_exit(&bucket->isaf_lock); 624 if (outassoc == NULL) { 625 /* Q: Do we wish to set haspeer == B_FALSE? */ 626 esp0dbg(("esp_age_bytes: " 627 "can't find peer for inbound.\n")); 628 return (sadb_age_bytes(espstack->esp_pfkey_q, inassoc, 629 bytes, B_TRUE)); 630 } 631 } else { 632 outassoc = assoc; 633 bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi); 634 mutex_enter(&bucket->isaf_lock); 635 inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi, 636 outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr, 637 outassoc->ipsa_addrfam); 638 mutex_exit(&bucket->isaf_lock); 639 if (inassoc == NULL) { 640 /* Q: Do we wish to set haspeer == B_FALSE? */ 641 esp0dbg(("esp_age_bytes: " 642 "can't find peer for outbound.\n")); 643 return (sadb_age_bytes(espstack->esp_pfkey_q, outassoc, 644 bytes, B_TRUE)); 645 } 646 } 647 648 inrc = sadb_age_bytes(espstack->esp_pfkey_q, inassoc, bytes, B_TRUE); 649 outrc = sadb_age_bytes(espstack->esp_pfkey_q, outassoc, bytes, B_FALSE); 650 651 /* 652 * REFRELE any peer SA. 653 * 654 * Because of the multi-line macro nature of IPSA_REFRELE, keep 655 * them in { }. 656 */ 657 if (inbound) { 658 IPSA_REFRELE(outassoc); 659 } else { 660 IPSA_REFRELE(inassoc); 661 } 662 663 return (inrc && outrc); 664 } 665 666 /* 667 * Do incoming NAT-T manipulations for packet. 668 * Returns NULL if the mblk chain is consumed. 669 */ 670 static mblk_t * 671 esp_fix_natt_checksums(mblk_t *data_mp, ipsa_t *assoc) 672 { 673 ipha_t *ipha = (ipha_t *)data_mp->b_rptr; 674 tcpha_t *tcpha; 675 udpha_t *udpha; 676 /* Initialize to our inbound cksum adjustment... */ 677 uint32_t sum = assoc->ipsa_inbound_cksum; 678 679 switch (ipha->ipha_protocol) { 680 case IPPROTO_TCP: 681 tcpha = (tcpha_t *)(data_mp->b_rptr + 682 IPH_HDR_LENGTH(ipha)); 683 684 #define DOWN_SUM(x) (x) = ((x) & 0xFFFF) + ((x) >> 16) 685 sum += ~ntohs(tcpha->tha_sum) & 0xFFFF; 686 DOWN_SUM(sum); 687 DOWN_SUM(sum); 688 tcpha->tha_sum = ~htons(sum); 689 break; 690 case IPPROTO_UDP: 691 udpha = (udpha_t *)(data_mp->b_rptr + IPH_HDR_LENGTH(ipha)); 692 693 if (udpha->uha_checksum != 0) { 694 /* Adujst if the inbound one was not zero. */ 695 sum += ~ntohs(udpha->uha_checksum) & 0xFFFF; 696 DOWN_SUM(sum); 697 DOWN_SUM(sum); 698 udpha->uha_checksum = ~htons(sum); 699 if (udpha->uha_checksum == 0) 700 udpha->uha_checksum = 0xFFFF; 701 } 702 #undef DOWN_SUM 703 break; 704 case IPPROTO_IP: 705 /* 706 * This case is only an issue for self-encapsulated 707 * packets. So for now, fall through. 708 */ 709 break; 710 } 711 return (data_mp); 712 } 713 714 715 /* 716 * Strip ESP header, check padding, and fix IP header. 717 * Returns B_TRUE on success, B_FALSE if an error occured. 718 */ 719 static boolean_t 720 esp_strip_header(mblk_t *data_mp, boolean_t isv4, uint32_t ivlen, 721 kstat_named_t **counter, ipsecesp_stack_t *espstack) 722 { 723 ipha_t *ipha; 724 ip6_t *ip6h; 725 uint_t divpoint; 726 mblk_t *scratch; 727 uint8_t nexthdr, padlen; 728 uint8_t lastpad; 729 ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec; 730 uint8_t *lastbyte; 731 732 /* 733 * Strip ESP data and fix IP header. 734 * 735 * XXX In case the beginning of esp_inbound() changes to not do a 736 * pullup, this part of the code can remain unchanged. 737 */ 738 if (isv4) { 739 ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ipha_t)); 740 ipha = (ipha_t *)data_mp->b_rptr; 741 ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (esph_t) + 742 IPH_HDR_LENGTH(ipha)); 743 divpoint = IPH_HDR_LENGTH(ipha); 744 } else { 745 ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ip6_t)); 746 ip6h = (ip6_t *)data_mp->b_rptr; 747 divpoint = ip_hdr_length_v6(data_mp, ip6h); 748 } 749 750 scratch = data_mp; 751 while (scratch->b_cont != NULL) 752 scratch = scratch->b_cont; 753 754 ASSERT((scratch->b_wptr - scratch->b_rptr) >= 3); 755 756 /* 757 * "Next header" and padding length are the last two bytes in the 758 * ESP-protected datagram, thus the explicit - 1 and - 2. 759 * lastpad is the last byte of the padding, which can be used for 760 * a quick check to see if the padding is correct. 761 */ 762 lastbyte = scratch->b_wptr - 1; 763 nexthdr = *lastbyte--; 764 padlen = *lastbyte--; 765 766 if (isv4) { 767 /* Fix part of the IP header. */ 768 ipha->ipha_protocol = nexthdr; 769 /* 770 * Reality check the padlen. The explicit - 2 is for the 771 * padding length and the next-header bytes. 772 */ 773 if (padlen >= ntohs(ipha->ipha_length) - sizeof (ipha_t) - 2 - 774 sizeof (esph_t) - ivlen) { 775 ESP_BUMP_STAT(espstack, bad_decrypt); 776 ipsec_rl_strlog(espstack->ipsecesp_netstack, 777 info.mi_idnum, 0, 0, 778 SL_ERROR | SL_WARN, 779 "Corrupt ESP packet (padlen too big).\n"); 780 esp1dbg(espstack, ("padlen (%d) is greater than:\n", 781 padlen)); 782 esp1dbg(espstack, ("pkt len(%d) - ip hdr - esp " 783 "hdr - ivlen(%d) = %d.\n", 784 ntohs(ipha->ipha_length), ivlen, 785 (int)(ntohs(ipha->ipha_length) - sizeof (ipha_t) - 786 2 - sizeof (esph_t) - ivlen))); 787 *counter = DROPPER(ipss, ipds_esp_bad_padlen); 788 return (B_FALSE); 789 } 790 791 /* 792 * Fix the rest of the header. The explicit - 2 is for the 793 * padding length and the next-header bytes. 794 */ 795 ipha->ipha_length = htons(ntohs(ipha->ipha_length) - padlen - 796 2 - sizeof (esph_t) - ivlen); 797 ipha->ipha_hdr_checksum = 0; 798 ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha); 799 } else { 800 if (ip6h->ip6_nxt == IPPROTO_ESP) { 801 ip6h->ip6_nxt = nexthdr; 802 } else { 803 ip_pkt_t ipp; 804 805 bzero(&ipp, sizeof (ipp)); 806 (void) ip_find_hdr_v6(data_mp, ip6h, B_FALSE, &ipp, 807 NULL); 808 if (ipp.ipp_dstopts != NULL) { 809 ipp.ipp_dstopts->ip6d_nxt = nexthdr; 810 } else if (ipp.ipp_rthdr != NULL) { 811 ipp.ipp_rthdr->ip6r_nxt = nexthdr; 812 } else if (ipp.ipp_hopopts != NULL) { 813 ipp.ipp_hopopts->ip6h_nxt = nexthdr; 814 } else { 815 /* Panic a DEBUG kernel. */ 816 ASSERT(ipp.ipp_hopopts != NULL); 817 /* Otherwise, pretend it's IP + ESP. */ 818 cmn_err(CE_WARN, "ESP IPv6 headers wrong.\n"); 819 ip6h->ip6_nxt = nexthdr; 820 } 821 } 822 823 if (padlen >= ntohs(ip6h->ip6_plen) - 2 - sizeof (esph_t) - 824 ivlen) { 825 ESP_BUMP_STAT(espstack, bad_decrypt); 826 ipsec_rl_strlog(espstack->ipsecesp_netstack, 827 info.mi_idnum, 0, 0, 828 SL_ERROR | SL_WARN, 829 "Corrupt ESP packet (v6 padlen too big).\n"); 830 esp1dbg(espstack, ("padlen (%d) is greater than:\n", 831 padlen)); 832 esp1dbg(espstack, 833 ("pkt len(%u) - ip hdr - esp hdr - ivlen(%d) = " 834 "%u.\n", (unsigned)(ntohs(ip6h->ip6_plen) 835 + sizeof (ip6_t)), ivlen, 836 (unsigned)(ntohs(ip6h->ip6_plen) - 2 - 837 sizeof (esph_t) - ivlen))); 838 *counter = DROPPER(ipss, ipds_esp_bad_padlen); 839 return (B_FALSE); 840 } 841 842 843 /* 844 * Fix the rest of the header. The explicit - 2 is for the 845 * padding length and the next-header bytes. IPv6 is nice, 846 * because there's no hdr checksum! 847 */ 848 ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - padlen - 849 2 - sizeof (esph_t) - ivlen); 850 } 851 852 if (espstack->ipsecesp_padding_check > 0 && padlen > 0) { 853 /* 854 * Weak padding check: compare last-byte to length, they 855 * should be equal. 856 */ 857 lastpad = *lastbyte--; 858 859 if (padlen != lastpad) { 860 ipsec_rl_strlog(espstack->ipsecesp_netstack, 861 info.mi_idnum, 0, 0, SL_ERROR | SL_WARN, 862 "Corrupt ESP packet (lastpad != padlen).\n"); 863 esp1dbg(espstack, 864 ("lastpad (%d) not equal to padlen (%d):\n", 865 lastpad, padlen)); 866 ESP_BUMP_STAT(espstack, bad_padding); 867 *counter = DROPPER(ipss, ipds_esp_bad_padding); 868 return (B_FALSE); 869 } 870 871 /* 872 * Strong padding check: Check all pad bytes to see that 873 * they're ascending. Go backwards using a descending counter 874 * to verify. padlen == 1 is checked by previous block, so 875 * only bother if we've more than 1 byte of padding. 876 * Consequently, start the check one byte before the location 877 * of "lastpad". 878 */ 879 if (espstack->ipsecesp_padding_check > 1) { 880 /* 881 * This assert may have to become an if and a pullup 882 * if we start accepting multi-dblk mblks. For now, 883 * though, any packet here will have been pulled up in 884 * esp_inbound. 885 */ 886 ASSERT(MBLKL(scratch) >= lastpad + 3); 887 888 /* 889 * Use "--lastpad" because we already checked the very 890 * last pad byte previously. 891 */ 892 while (--lastpad != 0) { 893 if (lastpad != *lastbyte) { 894 ipsec_rl_strlog( 895 espstack->ipsecesp_netstack, 896 info.mi_idnum, 0, 0, 897 SL_ERROR | SL_WARN, "Corrupt ESP " 898 "packet (bad padding).\n"); 899 esp1dbg(espstack, 900 ("padding not in correct" 901 " format:\n")); 902 ESP_BUMP_STAT(espstack, bad_padding); 903 *counter = DROPPER(ipss, 904 ipds_esp_bad_padding); 905 return (B_FALSE); 906 } 907 lastbyte--; 908 } 909 } 910 } 911 912 /* Trim off the padding. */ 913 ASSERT(data_mp->b_cont == NULL); 914 data_mp->b_wptr -= (padlen + 2); 915 916 /* 917 * Remove the ESP header. 918 * 919 * The above assertions about data_mp's size will make this work. 920 * 921 * XXX Question: If I send up and get back a contiguous mblk, 922 * would it be quicker to bcopy over, or keep doing the dupb stuff? 923 * I go with copying for now. 924 */ 925 926 if (IS_P2ALIGNED(data_mp->b_rptr, sizeof (uint32_t)) && 927 IS_P2ALIGNED(ivlen, sizeof (uint32_t))) { 928 uint8_t *start = data_mp->b_rptr; 929 uint32_t *src, *dst; 930 931 src = (uint32_t *)(start + divpoint); 932 dst = (uint32_t *)(start + divpoint + sizeof (esph_t) + ivlen); 933 934 ASSERT(IS_P2ALIGNED(dst, sizeof (uint32_t)) && 935 IS_P2ALIGNED(src, sizeof (uint32_t))); 936 937 do { 938 src--; 939 dst--; 940 *dst = *src; 941 } while (src != (uint32_t *)start); 942 943 data_mp->b_rptr = (uchar_t *)dst; 944 } else { 945 uint8_t *start = data_mp->b_rptr; 946 uint8_t *src, *dst; 947 948 src = start + divpoint; 949 dst = src + sizeof (esph_t) + ivlen; 950 951 do { 952 src--; 953 dst--; 954 *dst = *src; 955 } while (src != start); 956 957 data_mp->b_rptr = dst; 958 } 959 960 esp2dbg(espstack, ("data_mp after inbound ESP adjustment:\n")); 961 esp2dbg(espstack, (dump_msg(data_mp))); 962 963 return (B_TRUE); 964 } 965 966 /* 967 * Updating use times can be tricky business if the ipsa_haspeer flag is 968 * set. This function is called once in an SA's lifetime. 969 * 970 * Caller has to REFRELE "assoc" which is passed in. This function has 971 * to REFRELE any peer SA that is obtained. 972 */ 973 static void 974 esp_set_usetime(ipsa_t *assoc, boolean_t inbound) 975 { 976 ipsa_t *inassoc, *outassoc; 977 isaf_t *bucket; 978 sadb_t *sp; 979 int outhash; 980 boolean_t isv6; 981 netstack_t *ns = assoc->ipsa_netstack; 982 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 983 984 /* No peer? No problem! */ 985 if (!assoc->ipsa_haspeer) { 986 sadb_set_usetime(assoc); 987 return; 988 } 989 990 /* 991 * Otherwise, we want to grab both the original assoc and its peer. 992 * There might be a race for this, but if it's a real race, the times 993 * will be out-of-synch by at most a second, and since our time 994 * granularity is a second, this won't be a problem. 995 * 996 * If we need tight synchronization on the peer SA, then we need to 997 * reconsider. 998 */ 999 1000 /* Use address length to select IPv6/IPv4 */ 1001 isv6 = (assoc->ipsa_addrfam == AF_INET6); 1002 sp = isv6 ? &espstack->esp_sadb.s_v6 : &espstack->esp_sadb.s_v4; 1003 1004 if (inbound) { 1005 inassoc = assoc; 1006 if (isv6) { 1007 outhash = OUTBOUND_HASH_V6(sp, *((in6_addr_t *) 1008 &inassoc->ipsa_dstaddr)); 1009 } else { 1010 outhash = OUTBOUND_HASH_V4(sp, *((ipaddr_t *) 1011 &inassoc->ipsa_dstaddr)); 1012 } 1013 bucket = &sp->sdb_of[outhash]; 1014 mutex_enter(&bucket->isaf_lock); 1015 outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi, 1016 inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr, 1017 inassoc->ipsa_addrfam); 1018 mutex_exit(&bucket->isaf_lock); 1019 if (outassoc == NULL) { 1020 /* Q: Do we wish to set haspeer == B_FALSE? */ 1021 esp0dbg(("esp_set_usetime: " 1022 "can't find peer for inbound.\n")); 1023 sadb_set_usetime(inassoc); 1024 return; 1025 } 1026 } else { 1027 outassoc = assoc; 1028 bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi); 1029 mutex_enter(&bucket->isaf_lock); 1030 inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi, 1031 outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr, 1032 outassoc->ipsa_addrfam); 1033 mutex_exit(&bucket->isaf_lock); 1034 if (inassoc == NULL) { 1035 /* Q: Do we wish to set haspeer == B_FALSE? */ 1036 esp0dbg(("esp_set_usetime: " 1037 "can't find peer for outbound.\n")); 1038 sadb_set_usetime(outassoc); 1039 return; 1040 } 1041 } 1042 1043 /* Update usetime on both. */ 1044 sadb_set_usetime(inassoc); 1045 sadb_set_usetime(outassoc); 1046 1047 /* 1048 * REFRELE any peer SA. 1049 * 1050 * Because of the multi-line macro nature of IPSA_REFRELE, keep 1051 * them in { }. 1052 */ 1053 if (inbound) { 1054 IPSA_REFRELE(outassoc); 1055 } else { 1056 IPSA_REFRELE(inassoc); 1057 } 1058 } 1059 1060 /* 1061 * Handle ESP inbound data for IPv4 and IPv6. 1062 * On success returns B_TRUE, on failure returns B_FALSE and frees the 1063 * mblk chain data_mp. 1064 */ 1065 mblk_t * 1066 esp_inbound(mblk_t *data_mp, void *arg, ip_recv_attr_t *ira) 1067 { 1068 esph_t *esph = (esph_t *)arg; 1069 ipsa_t *ipsa = ira->ira_ipsec_esp_sa; 1070 netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack; 1071 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 1072 ipsec_stack_t *ipss = ns->netstack_ipsec; 1073 1074 /* 1075 * We may wish to check replay in-range-only here as an optimization. 1076 * Include the reality check of ipsa->ipsa_replay > 1077 * ipsa->ipsa_replay_wsize for times when it's the first N packets, 1078 * where N == ipsa->ipsa_replay_wsize. 1079 * 1080 * Another check that may come here later is the "collision" check. 1081 * If legitimate packets flow quickly enough, this won't be a problem, 1082 * but collisions may cause authentication algorithm crunching to 1083 * take place when it doesn't need to. 1084 */ 1085 if (!sadb_replay_peek(ipsa, esph->esph_replay)) { 1086 ESP_BUMP_STAT(espstack, replay_early_failures); 1087 IP_ESP_BUMP_STAT(ipss, in_discards); 1088 ip_drop_packet(data_mp, B_TRUE, ira->ira_ill, 1089 DROPPER(ipss, ipds_esp_early_replay), 1090 &espstack->esp_dropper); 1091 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards); 1092 return (NULL); 1093 } 1094 1095 /* 1096 * Adjust the IP header's payload length to reflect the removal 1097 * of the ICV. 1098 */ 1099 if (!(ira->ira_flags & IRAF_IS_IPV4)) { 1100 ip6_t *ip6h = (ip6_t *)data_mp->b_rptr; 1101 ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - 1102 ipsa->ipsa_mac_len); 1103 } else { 1104 ipha_t *ipha = (ipha_t *)data_mp->b_rptr; 1105 ipha->ipha_length = htons(ntohs(ipha->ipha_length) - 1106 ipsa->ipsa_mac_len); 1107 } 1108 1109 /* submit the request to the crypto framework */ 1110 return (esp_submit_req_inbound(data_mp, ira, ipsa, 1111 (uint8_t *)esph - data_mp->b_rptr)); 1112 } 1113 1114 /* XXX refactor me */ 1115 /* 1116 * Handle the SADB_GETSPI message. Create a larval SA. 1117 */ 1118 static void 1119 esp_getspi(mblk_t *mp, keysock_in_t *ksi, ipsecesp_stack_t *espstack) 1120 { 1121 ipsa_t *newbie, *target; 1122 isaf_t *outbound, *inbound; 1123 int rc, diagnostic; 1124 sadb_sa_t *assoc; 1125 keysock_out_t *kso; 1126 uint32_t newspi; 1127 1128 /* 1129 * Randomly generate a proposed SPI value 1130 */ 1131 if (cl_inet_getspi != NULL) { 1132 cl_inet_getspi(espstack->ipsecesp_netstack->netstack_stackid, 1133 IPPROTO_ESP, (uint8_t *)&newspi, sizeof (uint32_t), NULL); 1134 } else { 1135 (void) random_get_pseudo_bytes((uint8_t *)&newspi, 1136 sizeof (uint32_t)); 1137 } 1138 newbie = sadb_getspi(ksi, newspi, &diagnostic, 1139 espstack->ipsecesp_netstack, IPPROTO_ESP); 1140 1141 if (newbie == NULL) { 1142 sadb_pfkey_error(espstack->esp_pfkey_q, mp, ENOMEM, diagnostic, 1143 ksi->ks_in_serial); 1144 return; 1145 } else if (newbie == (ipsa_t *)-1) { 1146 sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL, diagnostic, 1147 ksi->ks_in_serial); 1148 return; 1149 } 1150 1151 /* 1152 * XXX - We may randomly collide. We really should recover from this. 1153 * Unfortunately, that could require spending way-too-much-time 1154 * in here. For now, let the user retry. 1155 */ 1156 1157 if (newbie->ipsa_addrfam == AF_INET6) { 1158 outbound = OUTBOUND_BUCKET_V6(&espstack->esp_sadb.s_v6, 1159 *(uint32_t *)(newbie->ipsa_dstaddr)); 1160 inbound = INBOUND_BUCKET(&espstack->esp_sadb.s_v6, 1161 newbie->ipsa_spi); 1162 } else { 1163 ASSERT(newbie->ipsa_addrfam == AF_INET); 1164 outbound = OUTBOUND_BUCKET_V4(&espstack->esp_sadb.s_v4, 1165 *(uint32_t *)(newbie->ipsa_dstaddr)); 1166 inbound = INBOUND_BUCKET(&espstack->esp_sadb.s_v4, 1167 newbie->ipsa_spi); 1168 } 1169 1170 mutex_enter(&outbound->isaf_lock); 1171 mutex_enter(&inbound->isaf_lock); 1172 1173 /* 1174 * Check for collisions (i.e. did sadb_getspi() return with something 1175 * that already exists?). 1176 * 1177 * Try outbound first. Even though SADB_GETSPI is traditionally 1178 * for inbound SAs, you never know what a user might do. 1179 */ 1180 target = ipsec_getassocbyspi(outbound, newbie->ipsa_spi, 1181 newbie->ipsa_srcaddr, newbie->ipsa_dstaddr, newbie->ipsa_addrfam); 1182 if (target == NULL) { 1183 target = ipsec_getassocbyspi(inbound, newbie->ipsa_spi, 1184 newbie->ipsa_srcaddr, newbie->ipsa_dstaddr, 1185 newbie->ipsa_addrfam); 1186 } 1187 1188 /* 1189 * I don't have collisions elsewhere! 1190 * (Nor will I because I'm still holding inbound/outbound locks.) 1191 */ 1192 1193 if (target != NULL) { 1194 rc = EEXIST; 1195 IPSA_REFRELE(target); 1196 } else { 1197 /* 1198 * sadb_insertassoc() also checks for collisions, so 1199 * if there's a colliding entry, rc will be set 1200 * to EEXIST. 1201 */ 1202 rc = sadb_insertassoc(newbie, inbound); 1203 newbie->ipsa_hardexpiretime = gethrestime_sec(); 1204 newbie->ipsa_hardexpiretime += 1205 espstack->ipsecesp_larval_timeout; 1206 } 1207 1208 /* 1209 * Can exit outbound mutex. Hold inbound until we're done 1210 * with newbie. 1211 */ 1212 mutex_exit(&outbound->isaf_lock); 1213 1214 if (rc != 0) { 1215 mutex_exit(&inbound->isaf_lock); 1216 IPSA_REFRELE(newbie); 1217 sadb_pfkey_error(espstack->esp_pfkey_q, mp, rc, 1218 SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial); 1219 return; 1220 } 1221 1222 1223 /* Can write here because I'm still holding the bucket lock. */ 1224 newbie->ipsa_type = SADB_SATYPE_ESP; 1225 1226 /* 1227 * Construct successful return message. We have one thing going 1228 * for us in PF_KEY v2. That's the fact that 1229 * sizeof (sadb_spirange_t) == sizeof (sadb_sa_t) 1230 */ 1231 assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SPIRANGE]; 1232 assoc->sadb_sa_exttype = SADB_EXT_SA; 1233 assoc->sadb_sa_spi = newbie->ipsa_spi; 1234 *((uint64_t *)(&assoc->sadb_sa_replay)) = 0; 1235 mutex_exit(&inbound->isaf_lock); 1236 1237 /* Convert KEYSOCK_IN to KEYSOCK_OUT. */ 1238 kso = (keysock_out_t *)ksi; 1239 kso->ks_out_len = sizeof (*kso); 1240 kso->ks_out_serial = ksi->ks_in_serial; 1241 kso->ks_out_type = KEYSOCK_OUT; 1242 1243 /* 1244 * Can safely putnext() to esp_pfkey_q, because this is a turnaround 1245 * from the esp_pfkey_q. 1246 */ 1247 putnext(espstack->esp_pfkey_q, mp); 1248 } 1249 1250 /* 1251 * Insert the ESP header into a packet. Duplicate an mblk, and insert a newly 1252 * allocated mblk with the ESP header in between the two. 1253 */ 1254 static boolean_t 1255 esp_insert_esp(mblk_t *mp, mblk_t *esp_mp, uint_t divpoint, 1256 ipsecesp_stack_t *espstack) 1257 { 1258 mblk_t *split_mp = mp; 1259 uint_t wheretodiv = divpoint; 1260 1261 while ((split_mp->b_wptr - split_mp->b_rptr) < wheretodiv) { 1262 wheretodiv -= (split_mp->b_wptr - split_mp->b_rptr); 1263 split_mp = split_mp->b_cont; 1264 ASSERT(split_mp != NULL); 1265 } 1266 1267 if (split_mp->b_wptr - split_mp->b_rptr != wheretodiv) { 1268 mblk_t *scratch; 1269 1270 /* "scratch" is the 2nd half, split_mp is the first. */ 1271 scratch = dupb(split_mp); 1272 if (scratch == NULL) { 1273 esp1dbg(espstack, 1274 ("esp_insert_esp: can't allocate scratch.\n")); 1275 return (B_FALSE); 1276 } 1277 /* NOTE: dupb() doesn't set b_cont appropriately. */ 1278 scratch->b_cont = split_mp->b_cont; 1279 scratch->b_rptr += wheretodiv; 1280 split_mp->b_wptr = split_mp->b_rptr + wheretodiv; 1281 split_mp->b_cont = scratch; 1282 } 1283 /* 1284 * At this point, split_mp is exactly "wheretodiv" bytes long, and 1285 * holds the end of the pre-ESP part of the datagram. 1286 */ 1287 esp_mp->b_cont = split_mp->b_cont; 1288 split_mp->b_cont = esp_mp; 1289 1290 return (B_TRUE); 1291 } 1292 1293 /* 1294 * Section 7 of RFC 3947 says: 1295 * 1296 * 7. Recovering from the Expiring NAT Mappings 1297 * 1298 * There are cases where NAT box decides to remove mappings that are still 1299 * alive (for example, when the keepalive interval is too long, or when the 1300 * NAT box is rebooted). To recover from this, ends that are NOT behind 1301 * NAT SHOULD use the last valid UDP encapsulated IKE or IPsec packet from 1302 * the other end to determine which IP and port addresses should be used. 1303 * The host behind dynamic NAT MUST NOT do this, as otherwise it opens a 1304 * DoS attack possibility because the IP address or port of the other host 1305 * will not change (it is not behind NAT). 1306 * 1307 * Keepalives cannot be used for these purposes, as they are not 1308 * authenticated, but any IKE authenticated IKE packet or ESP packet can be 1309 * used to detect whether the IP address or the port has changed. 1310 * 1311 * The following function will check an SA and its explicitly-set pair to see 1312 * if the NAT-T remote port matches the received packet (which must have 1313 * passed ESP authentication, see esp_in_done() for the caller context). If 1314 * there is a mismatch, the SAs are updated. It is not important if we race 1315 * with a transmitting thread, as if there is a transmitting thread, it will 1316 * merely emit a packet that will most-likely be dropped. 1317 * 1318 * "ports" are ordered src,dst, and assoc is an inbound SA, where src should 1319 * match ipsa_remote_nat_port and dst should match ipsa_local_nat_port. 1320 */ 1321 #ifdef _LITTLE_ENDIAN 1322 #define FIRST_16(x) ((x) & 0xFFFF) 1323 #define NEXT_16(x) (((x) >> 16) & 0xFFFF) 1324 #else 1325 #define FIRST_16(x) (((x) >> 16) & 0xFFFF) 1326 #define NEXT_16(x) ((x) & 0xFFFF) 1327 #endif 1328 static void 1329 esp_port_freshness(uint32_t ports, ipsa_t *assoc) 1330 { 1331 uint16_t remote = FIRST_16(ports); 1332 uint16_t local = NEXT_16(ports); 1333 ipsa_t *outbound_peer; 1334 isaf_t *bucket; 1335 ipsecesp_stack_t *espstack = assoc->ipsa_netstack->netstack_ipsecesp; 1336 1337 /* We found a conn_t, therefore local != 0. */ 1338 ASSERT(local != 0); 1339 /* Assume an IPv4 SA. */ 1340 ASSERT(assoc->ipsa_addrfam == AF_INET); 1341 1342 /* 1343 * On-the-wire rport == 0 means something's very wrong. 1344 * An unpaired SA is also useless to us. 1345 * If we are behind the NAT, don't bother. 1346 * A zero local NAT port defaults to 4500, so check that too. 1347 * And, of course, if the ports already match, we don't need to 1348 * bother. 1349 */ 1350 if (remote == 0 || assoc->ipsa_otherspi == 0 || 1351 (assoc->ipsa_flags & IPSA_F_BEHIND_NAT) || 1352 (assoc->ipsa_remote_nat_port == 0 && 1353 remote == htons(IPPORT_IKE_NATT)) || 1354 remote == assoc->ipsa_remote_nat_port) 1355 return; 1356 1357 /* Try and snag the peer. NOTE: Assume IPv4 for now. */ 1358 bucket = OUTBOUND_BUCKET_V4(&(espstack->esp_sadb.s_v4), 1359 assoc->ipsa_srcaddr[0]); 1360 mutex_enter(&bucket->isaf_lock); 1361 outbound_peer = ipsec_getassocbyspi(bucket, assoc->ipsa_otherspi, 1362 assoc->ipsa_dstaddr, assoc->ipsa_srcaddr, AF_INET); 1363 mutex_exit(&bucket->isaf_lock); 1364 1365 /* We probably lost a race to a deleting or expiring thread. */ 1366 if (outbound_peer == NULL) 1367 return; 1368 1369 /* 1370 * Hold the mutexes for both SAs so we don't race another inbound 1371 * thread. A lock-entry order shouldn't matter, since all other 1372 * per-ipsa locks are individually held-then-released. 1373 * 1374 * Luckily, this has nothing to do with the remote-NAT address, 1375 * so we don't have to re-scribble the cached-checksum differential. 1376 */ 1377 mutex_enter(&outbound_peer->ipsa_lock); 1378 mutex_enter(&assoc->ipsa_lock); 1379 outbound_peer->ipsa_remote_nat_port = assoc->ipsa_remote_nat_port = 1380 remote; 1381 mutex_exit(&assoc->ipsa_lock); 1382 mutex_exit(&outbound_peer->ipsa_lock); 1383 IPSA_REFRELE(outbound_peer); 1384 ESP_BUMP_STAT(espstack, sa_port_renumbers); 1385 } 1386 /* 1387 * Finish processing of an inbound ESP packet after processing by the 1388 * crypto framework. 1389 * - Remove the ESP header. 1390 * - Send packet back to IP. 1391 * If authentication was performed on the packet, this function is called 1392 * only if the authentication succeeded. 1393 * On success returns B_TRUE, on failure returns B_FALSE and frees the 1394 * mblk chain data_mp. 1395 */ 1396 static mblk_t * 1397 esp_in_done(mblk_t *data_mp, ip_recv_attr_t *ira, ipsec_crypto_t *ic) 1398 { 1399 ipsa_t *assoc; 1400 uint_t espstart; 1401 uint32_t ivlen = 0; 1402 uint_t processed_len; 1403 esph_t *esph; 1404 kstat_named_t *counter; 1405 boolean_t is_natt; 1406 netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack; 1407 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 1408 ipsec_stack_t *ipss = ns->netstack_ipsec; 1409 1410 assoc = ira->ira_ipsec_esp_sa; 1411 ASSERT(assoc != NULL); 1412 1413 is_natt = ((assoc->ipsa_flags & IPSA_F_NATT) != 0); 1414 1415 /* get the pointer to the ESP header */ 1416 if (assoc->ipsa_encr_alg == SADB_EALG_NULL) { 1417 /* authentication-only ESP */ 1418 espstart = ic->ic_crypto_data.cd_offset; 1419 processed_len = ic->ic_crypto_data.cd_length; 1420 } else { 1421 /* encryption present */ 1422 ivlen = assoc->ipsa_iv_len; 1423 if (assoc->ipsa_auth_alg == SADB_AALG_NONE) { 1424 /* encryption-only ESP */ 1425 espstart = ic->ic_crypto_data.cd_offset - 1426 sizeof (esph_t) - assoc->ipsa_iv_len; 1427 processed_len = ic->ic_crypto_data.cd_length + 1428 ivlen; 1429 } else { 1430 /* encryption with authentication */ 1431 espstart = ic->ic_crypto_dual_data.dd_offset1; 1432 processed_len = ic->ic_crypto_dual_data.dd_len2 + 1433 ivlen; 1434 } 1435 } 1436 1437 esph = (esph_t *)(data_mp->b_rptr + espstart); 1438 1439 if (assoc->ipsa_auth_alg != IPSA_AALG_NONE || 1440 (assoc->ipsa_flags & IPSA_F_COMBINED)) { 1441 /* 1442 * Authentication passed if we reach this point. 1443 * Packets with authentication will have the ICV 1444 * after the crypto data. Adjust b_wptr before 1445 * making padlen checks. 1446 */ 1447 ESP_BUMP_STAT(espstack, good_auth); 1448 data_mp->b_wptr -= assoc->ipsa_mac_len; 1449 1450 /* 1451 * Check replay window here! 1452 * For right now, assume keysock will set the replay window 1453 * size to zero for SAs that have an unspecified sender. 1454 * This may change... 1455 */ 1456 1457 if (!sadb_replay_check(assoc, esph->esph_replay)) { 1458 /* 1459 * Log the event. As of now we print out an event. 1460 * Do not print the replay failure number, or else 1461 * syslog cannot collate the error messages. Printing 1462 * the replay number that failed opens a denial-of- 1463 * service attack. 1464 */ 1465 ipsec_assocfailure(info.mi_idnum, 0, 0, 1466 SL_ERROR | SL_WARN, 1467 "Replay failed for ESP spi 0x%x, dst %s.\n", 1468 assoc->ipsa_spi, assoc->ipsa_dstaddr, 1469 assoc->ipsa_addrfam, espstack->ipsecesp_netstack); 1470 ESP_BUMP_STAT(espstack, replay_failures); 1471 counter = DROPPER(ipss, ipds_esp_replay); 1472 goto drop_and_bail; 1473 } 1474 1475 if (is_natt) { 1476 ASSERT(ira->ira_flags & IRAF_ESP_UDP_PORTS); 1477 ASSERT(ira->ira_esp_udp_ports != 0); 1478 esp_port_freshness(ira->ira_esp_udp_ports, assoc); 1479 } 1480 } 1481 1482 esp_set_usetime(assoc, B_TRUE); 1483 1484 if (!esp_age_bytes(assoc, processed_len, B_TRUE)) { 1485 /* The ipsa has hit hard expiration, LOG and AUDIT. */ 1486 ipsec_assocfailure(info.mi_idnum, 0, 0, 1487 SL_ERROR | SL_WARN, 1488 "ESP association 0x%x, dst %s had bytes expire.\n", 1489 assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam, 1490 espstack->ipsecesp_netstack); 1491 ESP_BUMP_STAT(espstack, bytes_expired); 1492 counter = DROPPER(ipss, ipds_esp_bytes_expire); 1493 goto drop_and_bail; 1494 } 1495 1496 /* 1497 * Remove ESP header and padding from packet. I hope the compiler 1498 * spews "branch, predict taken" code for this. 1499 */ 1500 1501 if (esp_strip_header(data_mp, (ira->ira_flags & IRAF_IS_IPV4), 1502 ivlen, &counter, espstack)) { 1503 1504 if (is_system_labeled() && assoc->ipsa_tsl != NULL) { 1505 if (!ip_recv_attr_replace_label(ira, assoc->ipsa_tsl)) { 1506 ip_drop_packet(data_mp, B_TRUE, ira->ira_ill, 1507 DROPPER(ipss, ipds_ah_nomem), 1508 &espstack->esp_dropper); 1509 BUMP_MIB(ira->ira_ill->ill_ip_mib, 1510 ipIfStatsInDiscards); 1511 return (NULL); 1512 } 1513 } 1514 if (is_natt) 1515 return (esp_fix_natt_checksums(data_mp, assoc)); 1516 1517 if (assoc->ipsa_state == IPSA_STATE_IDLE) { 1518 /* 1519 * Cluster buffering case. Tell caller that we're 1520 * handling the packet. 1521 */ 1522 sadb_buf_pkt(assoc, data_mp, ira); 1523 return (NULL); 1524 } 1525 1526 return (data_mp); 1527 } 1528 1529 esp1dbg(espstack, ("esp_in_done: esp_strip_header() failed\n")); 1530 drop_and_bail: 1531 IP_ESP_BUMP_STAT(ipss, in_discards); 1532 ip_drop_packet(data_mp, B_TRUE, ira->ira_ill, counter, 1533 &espstack->esp_dropper); 1534 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards); 1535 return (NULL); 1536 } 1537 1538 /* 1539 * Called upon failing the inbound ICV check. The message passed as 1540 * argument is freed. 1541 */ 1542 static void 1543 esp_log_bad_auth(mblk_t *mp, ip_recv_attr_t *ira) 1544 { 1545 ipsa_t *assoc = ira->ira_ipsec_esp_sa; 1546 netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack; 1547 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 1548 ipsec_stack_t *ipss = ns->netstack_ipsec; 1549 1550 /* 1551 * Log the event. Don't print to the console, block 1552 * potential denial-of-service attack. 1553 */ 1554 ESP_BUMP_STAT(espstack, bad_auth); 1555 1556 ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN, 1557 "ESP Authentication failed for spi 0x%x, dst %s.\n", 1558 assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam, 1559 espstack->ipsecesp_netstack); 1560 1561 IP_ESP_BUMP_STAT(ipss, in_discards); 1562 ip_drop_packet(mp, B_TRUE, ira->ira_ill, 1563 DROPPER(ipss, ipds_esp_bad_auth), 1564 &espstack->esp_dropper); 1565 } 1566 1567 1568 /* 1569 * Invoked for outbound packets after ESP processing. If the packet 1570 * also requires AH, performs the AH SA selection and AH processing. 1571 * 1572 * Returns data_mp (possibly with AH added) unless data_mp was consumed 1573 * due to an error, or queued due to async. crypto or an ACQUIRE trigger. 1574 */ 1575 static mblk_t * 1576 esp_do_outbound_ah(mblk_t *data_mp, ip_xmit_attr_t *ixa) 1577 { 1578 ipsec_action_t *ap; 1579 1580 ap = ixa->ixa_ipsec_action; 1581 if (ap == NULL) { 1582 ipsec_policy_t *pp = ixa->ixa_ipsec_policy; 1583 ap = pp->ipsp_act; 1584 } 1585 1586 if (!ap->ipa_want_ah) 1587 return (data_mp); 1588 1589 /* 1590 * Normally the AH SA would have already been put in place 1591 * but it could have been flushed so we need to look for it. 1592 */ 1593 if (ixa->ixa_ipsec_ah_sa == NULL) { 1594 if (!ipsec_outbound_sa(data_mp, ixa, IPPROTO_AH)) { 1595 sadb_acquire(data_mp, ixa, B_TRUE, B_FALSE); 1596 return (NULL); 1597 } 1598 } 1599 ASSERT(ixa->ixa_ipsec_ah_sa != NULL); 1600 1601 data_mp = ixa->ixa_ipsec_ah_sa->ipsa_output_func(data_mp, ixa); 1602 return (data_mp); 1603 } 1604 1605 1606 /* 1607 * Kernel crypto framework callback invoked after completion of async 1608 * crypto requests for outbound packets. 1609 */ 1610 static void 1611 esp_kcf_callback_outbound(void *arg, int status) 1612 { 1613 mblk_t *mp = (mblk_t *)arg; 1614 mblk_t *async_mp; 1615 netstack_t *ns; 1616 ipsec_stack_t *ipss; 1617 ipsecesp_stack_t *espstack; 1618 mblk_t *data_mp; 1619 ip_xmit_attr_t ixas; 1620 ipsec_crypto_t *ic; 1621 ill_t *ill; 1622 1623 /* 1624 * First remove the ipsec_crypto_t mblk 1625 * Note that we need to ipsec_free_crypto_data(mp) once done with ic. 1626 */ 1627 async_mp = ipsec_remove_crypto_data(mp, &ic); 1628 ASSERT(async_mp != NULL); 1629 1630 /* 1631 * Extract the ip_xmit_attr_t from the first mblk. 1632 * Verifies that the netstack and ill is still around; could 1633 * have vanished while kEf was doing its work. 1634 * On succesful return we have a nce_t and the ill/ipst can't 1635 * disappear until we do the nce_refrele in ixa_cleanup. 1636 */ 1637 data_mp = async_mp->b_cont; 1638 async_mp->b_cont = NULL; 1639 if (!ip_xmit_attr_from_mblk(async_mp, &ixas)) { 1640 /* Disappeared on us - no ill/ipst for MIB */ 1641 /* We have nowhere to do stats since ixa_ipst could be NULL */ 1642 if (ixas.ixa_nce != NULL) { 1643 ill = ixas.ixa_nce->nce_ill; 1644 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 1645 ip_drop_output("ipIfStatsOutDiscards", data_mp, ill); 1646 } 1647 freemsg(data_mp); 1648 goto done; 1649 } 1650 ns = ixas.ixa_ipst->ips_netstack; 1651 espstack = ns->netstack_ipsecesp; 1652 ipss = ns->netstack_ipsec; 1653 ill = ixas.ixa_nce->nce_ill; 1654 1655 if (status == CRYPTO_SUCCESS) { 1656 /* 1657 * If a ICV was computed, it was stored by the 1658 * crypto framework at the end of the packet. 1659 */ 1660 ipha_t *ipha = (ipha_t *)data_mp->b_rptr; 1661 1662 esp_set_usetime(ixas.ixa_ipsec_esp_sa, B_FALSE); 1663 /* NAT-T packet. */ 1664 if (IPH_HDR_VERSION(ipha) == IP_VERSION && 1665 ipha->ipha_protocol == IPPROTO_UDP) 1666 esp_prepare_udp(ns, data_mp, ipha); 1667 1668 /* do AH processing if needed */ 1669 data_mp = esp_do_outbound_ah(data_mp, &ixas); 1670 if (data_mp == NULL) 1671 goto done; 1672 1673 (void) ip_output_post_ipsec(data_mp, &ixas); 1674 } else { 1675 /* Outbound shouldn't see invalid MAC */ 1676 ASSERT(status != CRYPTO_INVALID_MAC); 1677 1678 esp1dbg(espstack, 1679 ("esp_kcf_callback_outbound: crypto failed with 0x%x\n", 1680 status)); 1681 ESP_BUMP_STAT(espstack, crypto_failures); 1682 ESP_BUMP_STAT(espstack, out_discards); 1683 ip_drop_packet(data_mp, B_FALSE, ill, 1684 DROPPER(ipss, ipds_esp_crypto_failed), 1685 &espstack->esp_dropper); 1686 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 1687 } 1688 done: 1689 ixa_cleanup(&ixas); 1690 (void) ipsec_free_crypto_data(mp); 1691 } 1692 1693 /* 1694 * Kernel crypto framework callback invoked after completion of async 1695 * crypto requests for inbound packets. 1696 */ 1697 static void 1698 esp_kcf_callback_inbound(void *arg, int status) 1699 { 1700 mblk_t *mp = (mblk_t *)arg; 1701 mblk_t *async_mp; 1702 netstack_t *ns; 1703 ipsecesp_stack_t *espstack; 1704 ipsec_stack_t *ipss; 1705 mblk_t *data_mp; 1706 ip_recv_attr_t iras; 1707 ipsec_crypto_t *ic; 1708 1709 /* 1710 * First remove the ipsec_crypto_t mblk 1711 * Note that we need to ipsec_free_crypto_data(mp) once done with ic. 1712 */ 1713 async_mp = ipsec_remove_crypto_data(mp, &ic); 1714 ASSERT(async_mp != NULL); 1715 1716 /* 1717 * Extract the ip_recv_attr_t from the first mblk. 1718 * Verifies that the netstack and ill is still around; could 1719 * have vanished while kEf was doing its work. 1720 */ 1721 data_mp = async_mp->b_cont; 1722 async_mp->b_cont = NULL; 1723 if (!ip_recv_attr_from_mblk(async_mp, &iras)) { 1724 /* The ill or ip_stack_t disappeared on us */ 1725 ip_drop_input("ip_recv_attr_from_mblk", data_mp, NULL); 1726 freemsg(data_mp); 1727 goto done; 1728 } 1729 1730 ns = iras.ira_ill->ill_ipst->ips_netstack; 1731 espstack = ns->netstack_ipsecesp; 1732 ipss = ns->netstack_ipsec; 1733 1734 if (status == CRYPTO_SUCCESS) { 1735 data_mp = esp_in_done(data_mp, &iras, ic); 1736 if (data_mp == NULL) 1737 goto done; 1738 1739 /* finish IPsec processing */ 1740 ip_input_post_ipsec(data_mp, &iras); 1741 } else if (status == CRYPTO_INVALID_MAC) { 1742 esp_log_bad_auth(data_mp, &iras); 1743 } else { 1744 esp1dbg(espstack, 1745 ("esp_kcf_callback: crypto failed with 0x%x\n", 1746 status)); 1747 ESP_BUMP_STAT(espstack, crypto_failures); 1748 IP_ESP_BUMP_STAT(ipss, in_discards); 1749 ip_drop_packet(data_mp, B_TRUE, iras.ira_ill, 1750 DROPPER(ipss, ipds_esp_crypto_failed), 1751 &espstack->esp_dropper); 1752 BUMP_MIB(iras.ira_ill->ill_ip_mib, ipIfStatsInDiscards); 1753 } 1754 done: 1755 ira_cleanup(&iras, B_TRUE); 1756 (void) ipsec_free_crypto_data(mp); 1757 } 1758 1759 /* 1760 * Invoked on crypto framework failure during inbound and outbound processing. 1761 */ 1762 static void 1763 esp_crypto_failed(mblk_t *data_mp, boolean_t is_inbound, int kef_rc, 1764 ill_t *ill, ipsecesp_stack_t *espstack) 1765 { 1766 ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec; 1767 1768 esp1dbg(espstack, ("crypto failed for %s ESP with 0x%x\n", 1769 is_inbound ? "inbound" : "outbound", kef_rc)); 1770 ip_drop_packet(data_mp, is_inbound, ill, 1771 DROPPER(ipss, ipds_esp_crypto_failed), 1772 &espstack->esp_dropper); 1773 ESP_BUMP_STAT(espstack, crypto_failures); 1774 if (is_inbound) 1775 IP_ESP_BUMP_STAT(ipss, in_discards); 1776 else 1777 ESP_BUMP_STAT(espstack, out_discards); 1778 } 1779 1780 /* 1781 * A statement-equivalent macro, _cr MUST point to a modifiable 1782 * crypto_call_req_t. 1783 */ 1784 #define ESP_INIT_CALLREQ(_cr, _mp, _callback) \ 1785 (_cr)->cr_flag = CRYPTO_SKIP_REQID|CRYPTO_ALWAYS_QUEUE; \ 1786 (_cr)->cr_callback_arg = (_mp); \ 1787 (_cr)->cr_callback_func = (_callback) 1788 1789 #define ESP_INIT_CRYPTO_MAC(mac, icvlen, icvbuf) { \ 1790 (mac)->cd_format = CRYPTO_DATA_RAW; \ 1791 (mac)->cd_offset = 0; \ 1792 (mac)->cd_length = icvlen; \ 1793 (mac)->cd_raw.iov_base = (char *)icvbuf; \ 1794 (mac)->cd_raw.iov_len = icvlen; \ 1795 } 1796 1797 #define ESP_INIT_CRYPTO_DATA(data, mp, off, len) { \ 1798 if (MBLKL(mp) >= (len) + (off)) { \ 1799 (data)->cd_format = CRYPTO_DATA_RAW; \ 1800 (data)->cd_raw.iov_base = (char *)(mp)->b_rptr; \ 1801 (data)->cd_raw.iov_len = MBLKL(mp); \ 1802 (data)->cd_offset = off; \ 1803 } else { \ 1804 (data)->cd_format = CRYPTO_DATA_MBLK; \ 1805 (data)->cd_mp = mp; \ 1806 (data)->cd_offset = off; \ 1807 } \ 1808 (data)->cd_length = len; \ 1809 } 1810 1811 #define ESP_INIT_CRYPTO_DUAL_DATA(data, mp, off1, len1, off2, len2) { \ 1812 (data)->dd_format = CRYPTO_DATA_MBLK; \ 1813 (data)->dd_mp = mp; \ 1814 (data)->dd_len1 = len1; \ 1815 (data)->dd_offset1 = off1; \ 1816 (data)->dd_len2 = len2; \ 1817 (data)->dd_offset2 = off2; \ 1818 } 1819 1820 /* 1821 * Returns data_mp if successfully completed the request. Returns 1822 * NULL if it failed (and increments InDiscards) or if it is pending. 1823 */ 1824 static mblk_t * 1825 esp_submit_req_inbound(mblk_t *esp_mp, ip_recv_attr_t *ira, 1826 ipsa_t *assoc, uint_t esph_offset) 1827 { 1828 uint_t auth_offset, msg_len, auth_len; 1829 crypto_call_req_t call_req, *callrp; 1830 mblk_t *mp; 1831 esph_t *esph_ptr; 1832 int kef_rc; 1833 uint_t icv_len = assoc->ipsa_mac_len; 1834 crypto_ctx_template_t auth_ctx_tmpl; 1835 boolean_t do_auth, do_encr, force; 1836 uint_t encr_offset, encr_len; 1837 uint_t iv_len = assoc->ipsa_iv_len; 1838 crypto_ctx_template_t encr_ctx_tmpl; 1839 ipsec_crypto_t *ic, icstack; 1840 uchar_t *iv_ptr; 1841 netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack; 1842 ipsec_stack_t *ipss = ns->netstack_ipsec; 1843 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 1844 1845 do_auth = assoc->ipsa_auth_alg != SADB_AALG_NONE; 1846 do_encr = assoc->ipsa_encr_alg != SADB_EALG_NULL; 1847 force = (assoc->ipsa_flags & IPSA_F_ASYNC); 1848 1849 #ifdef IPSEC_LATENCY_TEST 1850 kef_rc = CRYPTO_SUCCESS; 1851 #else 1852 kef_rc = CRYPTO_FAILED; 1853 #endif 1854 1855 /* 1856 * An inbound packet is of the form: 1857 * [IP,options,ESP,IV,data,ICV,pad] 1858 */ 1859 esph_ptr = (esph_t *)(esp_mp->b_rptr + esph_offset); 1860 iv_ptr = (uchar_t *)(esph_ptr + 1); 1861 /* Packet length starting at IP header ending after ESP ICV. */ 1862 msg_len = MBLKL(esp_mp); 1863 1864 encr_offset = esph_offset + sizeof (esph_t) + iv_len; 1865 encr_len = msg_len - encr_offset; 1866 1867 /* 1868 * Counter mode algs need a nonce. This is setup in sadb_common_add(). 1869 * If for some reason we are using a SA which does not have a nonce 1870 * then we must fail here. 1871 */ 1872 if ((assoc->ipsa_flags & IPSA_F_COUNTERMODE) && 1873 (assoc->ipsa_nonce == NULL)) { 1874 ip_drop_packet(esp_mp, B_TRUE, ira->ira_ill, 1875 DROPPER(ipss, ipds_esp_nomem), &espstack->esp_dropper); 1876 return (NULL); 1877 } 1878 1879 if (force) { 1880 /* We are doing asynch; allocate mblks to hold state */ 1881 if ((mp = ip_recv_attr_to_mblk(ira)) == NULL || 1882 (mp = ipsec_add_crypto_data(mp, &ic)) == NULL) { 1883 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards); 1884 ip_drop_input("ipIfStatsInDiscards", esp_mp, 1885 ira->ira_ill); 1886 return (NULL); 1887 } 1888 linkb(mp, esp_mp); 1889 callrp = &call_req; 1890 ESP_INIT_CALLREQ(callrp, mp, esp_kcf_callback_inbound); 1891 } else { 1892 /* 1893 * If we know we are going to do sync then ipsec_crypto_t 1894 * should be on the stack. 1895 */ 1896 ic = &icstack; 1897 bzero(ic, sizeof (*ic)); 1898 callrp = NULL; 1899 } 1900 1901 if (do_auth) { 1902 /* authentication context template */ 1903 IPSEC_CTX_TMPL(assoc, ipsa_authtmpl, IPSEC_ALG_AUTH, 1904 auth_ctx_tmpl); 1905 1906 /* ICV to be verified */ 1907 ESP_INIT_CRYPTO_MAC(&ic->ic_crypto_mac, 1908 icv_len, esp_mp->b_wptr - icv_len); 1909 1910 /* authentication starts at the ESP header */ 1911 auth_offset = esph_offset; 1912 auth_len = msg_len - auth_offset - icv_len; 1913 if (!do_encr) { 1914 /* authentication only */ 1915 /* initialize input data argument */ 1916 ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data, 1917 esp_mp, auth_offset, auth_len); 1918 1919 /* call the crypto framework */ 1920 kef_rc = crypto_mac_verify(&assoc->ipsa_amech, 1921 &ic->ic_crypto_data, 1922 &assoc->ipsa_kcfauthkey, auth_ctx_tmpl, 1923 &ic->ic_crypto_mac, callrp); 1924 } 1925 } 1926 1927 if (do_encr) { 1928 /* encryption template */ 1929 IPSEC_CTX_TMPL(assoc, ipsa_encrtmpl, IPSEC_ALG_ENCR, 1930 encr_ctx_tmpl); 1931 1932 /* Call the nonce update function. Also passes in IV */ 1933 (assoc->ipsa_noncefunc)(assoc, (uchar_t *)esph_ptr, encr_len, 1934 iv_ptr, &ic->ic_cmm, &ic->ic_crypto_data); 1935 1936 if (!do_auth) { 1937 /* decryption only */ 1938 /* initialize input data argument */ 1939 ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data, 1940 esp_mp, encr_offset, encr_len); 1941 1942 /* call the crypto framework */ 1943 kef_rc = crypto_decrypt((crypto_mechanism_t *) 1944 &ic->ic_cmm, &ic->ic_crypto_data, 1945 &assoc->ipsa_kcfencrkey, encr_ctx_tmpl, 1946 NULL, callrp); 1947 } 1948 } 1949 1950 if (do_auth && do_encr) { 1951 /* dual operation */ 1952 /* initialize input data argument */ 1953 ESP_INIT_CRYPTO_DUAL_DATA(&ic->ic_crypto_dual_data, 1954 esp_mp, auth_offset, auth_len, 1955 encr_offset, encr_len - icv_len); 1956 1957 /* specify IV */ 1958 ic->ic_crypto_dual_data.dd_miscdata = (char *)iv_ptr; 1959 1960 /* call the framework */ 1961 kef_rc = crypto_mac_verify_decrypt(&assoc->ipsa_amech, 1962 &assoc->ipsa_emech, &ic->ic_crypto_dual_data, 1963 &assoc->ipsa_kcfauthkey, &assoc->ipsa_kcfencrkey, 1964 auth_ctx_tmpl, encr_ctx_tmpl, &ic->ic_crypto_mac, 1965 NULL, callrp); 1966 } 1967 1968 switch (kef_rc) { 1969 case CRYPTO_SUCCESS: 1970 ESP_BUMP_STAT(espstack, crypto_sync); 1971 esp_mp = esp_in_done(esp_mp, ira, ic); 1972 if (force) { 1973 /* Free mp after we are done with ic */ 1974 mp = ipsec_free_crypto_data(mp); 1975 (void) ip_recv_attr_free_mblk(mp); 1976 } 1977 return (esp_mp); 1978 case CRYPTO_QUEUED: 1979 /* esp_kcf_callback_inbound() will be invoked on completion */ 1980 ESP_BUMP_STAT(espstack, crypto_async); 1981 return (NULL); 1982 case CRYPTO_INVALID_MAC: 1983 if (force) { 1984 mp = ipsec_free_crypto_data(mp); 1985 esp_mp = ip_recv_attr_free_mblk(mp); 1986 } 1987 ESP_BUMP_STAT(espstack, crypto_sync); 1988 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards); 1989 esp_log_bad_auth(esp_mp, ira); 1990 /* esp_mp was passed to ip_drop_packet */ 1991 return (NULL); 1992 } 1993 1994 if (force) { 1995 mp = ipsec_free_crypto_data(mp); 1996 esp_mp = ip_recv_attr_free_mblk(mp); 1997 } 1998 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards); 1999 esp_crypto_failed(esp_mp, B_TRUE, kef_rc, ira->ira_ill, espstack); 2000 /* esp_mp was passed to ip_drop_packet */ 2001 return (NULL); 2002 } 2003 2004 /* 2005 * Compute the IP and UDP checksums -- common code for both keepalives and 2006 * actual ESP-in-UDP packets. Be flexible with multiple mblks because ESP 2007 * uses mblk-insertion to insert the UDP header. 2008 * TODO - If there is an easy way to prep a packet for HW checksums, make 2009 * it happen here. 2010 * Note that this is used before both before calling ip_output_simple and 2011 * in the esp datapath. The former could use IXAF_SET_ULP_CKSUM but not the 2012 * latter. 2013 */ 2014 static void 2015 esp_prepare_udp(netstack_t *ns, mblk_t *mp, ipha_t *ipha) 2016 { 2017 int offset; 2018 uint32_t cksum; 2019 uint16_t *arr; 2020 mblk_t *udpmp = mp; 2021 uint_t hlen = IPH_HDR_LENGTH(ipha); 2022 2023 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 2024 2025 ipha->ipha_hdr_checksum = 0; 2026 ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 2027 2028 if (ns->netstack_udp->us_do_checksum) { 2029 ASSERT(MBLKL(udpmp) >= sizeof (udpha_t)); 2030 /* arr points to the IP header. */ 2031 arr = (uint16_t *)ipha; 2032 IP_STAT(ns->netstack_ip, ip_out_sw_cksum); 2033 IP_STAT_UPDATE(ns->netstack_ip, ip_out_sw_cksum_bytes, 2034 ntohs(htons(ipha->ipha_length) - hlen)); 2035 /* arr[6-9] are the IP addresses. */ 2036 cksum = IP_UDP_CSUM_COMP + arr[6] + arr[7] + arr[8] + arr[9] + 2037 ntohs(htons(ipha->ipha_length) - hlen); 2038 cksum = IP_CSUM(mp, hlen, cksum); 2039 offset = hlen + UDP_CHECKSUM_OFFSET; 2040 while (offset >= MBLKL(udpmp)) { 2041 offset -= MBLKL(udpmp); 2042 udpmp = udpmp->b_cont; 2043 } 2044 /* arr points to the UDP header's checksum field. */ 2045 arr = (uint16_t *)(udpmp->b_rptr + offset); 2046 *arr = cksum; 2047 } 2048 } 2049 2050 /* 2051 * taskq handler so we can send the NAT-T keepalive on a separate thread. 2052 */ 2053 static void 2054 actually_send_keepalive(void *arg) 2055 { 2056 mblk_t *mp = (mblk_t *)arg; 2057 ip_xmit_attr_t ixas; 2058 netstack_t *ns; 2059 netstackid_t stackid; 2060 2061 stackid = (netstackid_t)(uintptr_t)mp->b_prev; 2062 mp->b_prev = NULL; 2063 ns = netstack_find_by_stackid(stackid); 2064 if (ns == NULL) { 2065 /* Disappeared */ 2066 ip_drop_output("ipIfStatsOutDiscards", mp, NULL); 2067 freemsg(mp); 2068 return; 2069 } 2070 2071 bzero(&ixas, sizeof (ixas)); 2072 ixas.ixa_zoneid = ALL_ZONES; 2073 ixas.ixa_cred = kcred; 2074 ixas.ixa_cpid = NOPID; 2075 ixas.ixa_tsl = NULL; 2076 ixas.ixa_ipst = ns->netstack_ip; 2077 /* No ULP checksum; done by esp_prepare_udp */ 2078 ixas.ixa_flags = (IXAF_IS_IPV4 | IXAF_NO_IPSEC | IXAF_VERIFY_SOURCE); 2079 2080 (void) ip_output_simple(mp, &ixas); 2081 ixa_cleanup(&ixas); 2082 netstack_rele(ns); 2083 } 2084 2085 /* 2086 * Send a one-byte UDP NAT-T keepalive. 2087 */ 2088 void 2089 ipsecesp_send_keepalive(ipsa_t *assoc) 2090 { 2091 mblk_t *mp; 2092 ipha_t *ipha; 2093 udpha_t *udpha; 2094 netstack_t *ns = assoc->ipsa_netstack; 2095 2096 ASSERT(MUTEX_NOT_HELD(&assoc->ipsa_lock)); 2097 2098 mp = allocb(sizeof (ipha_t) + sizeof (udpha_t) + 1, BPRI_HI); 2099 if (mp == NULL) 2100 return; 2101 ipha = (ipha_t *)mp->b_rptr; 2102 ipha->ipha_version_and_hdr_length = IP_SIMPLE_HDR_VERSION; 2103 ipha->ipha_type_of_service = 0; 2104 ipha->ipha_length = htons(sizeof (ipha_t) + sizeof (udpha_t) + 1); 2105 /* Use the low-16 of the SPI so we have some clue where it came from. */ 2106 ipha->ipha_ident = *(((uint16_t *)(&assoc->ipsa_spi)) + 1); 2107 ipha->ipha_fragment_offset_and_flags = 0; /* Too small to fragment! */ 2108 ipha->ipha_ttl = 0xFF; 2109 ipha->ipha_protocol = IPPROTO_UDP; 2110 ipha->ipha_hdr_checksum = 0; 2111 ipha->ipha_src = assoc->ipsa_srcaddr[0]; 2112 ipha->ipha_dst = assoc->ipsa_dstaddr[0]; 2113 udpha = (udpha_t *)(ipha + 1); 2114 udpha->uha_src_port = (assoc->ipsa_local_nat_port != 0) ? 2115 assoc->ipsa_local_nat_port : htons(IPPORT_IKE_NATT); 2116 udpha->uha_dst_port = (assoc->ipsa_remote_nat_port != 0) ? 2117 assoc->ipsa_remote_nat_port : htons(IPPORT_IKE_NATT); 2118 udpha->uha_length = htons(sizeof (udpha_t) + 1); 2119 udpha->uha_checksum = 0; 2120 mp->b_wptr = (uint8_t *)(udpha + 1); 2121 *(mp->b_wptr++) = 0xFF; 2122 2123 esp_prepare_udp(ns, mp, ipha); 2124 2125 /* 2126 * We're holding an isaf_t bucket lock, so pawn off the actual 2127 * packet transmission to another thread. Just in case syncq 2128 * processing causes a same-bucket packet to be processed. 2129 */ 2130 mp->b_prev = (mblk_t *)(uintptr_t)ns->netstack_stackid; 2131 2132 if (taskq_dispatch(esp_taskq, actually_send_keepalive, mp, 2133 TQ_NOSLEEP) == TASKQID_INVALID) { 2134 /* Assume no memory if taskq_dispatch() fails. */ 2135 mp->b_prev = NULL; 2136 ip_drop_packet(mp, B_FALSE, NULL, 2137 DROPPER(ns->netstack_ipsec, ipds_esp_nomem), 2138 &ns->netstack_ipsecesp->esp_dropper); 2139 } 2140 } 2141 2142 /* 2143 * Returns mp if successfully completed the request. Returns 2144 * NULL if it failed (and increments InDiscards) or if it is pending. 2145 */ 2146 static mblk_t * 2147 esp_submit_req_outbound(mblk_t *data_mp, ip_xmit_attr_t *ixa, ipsa_t *assoc, 2148 uchar_t *icv_buf, uint_t payload_len) 2149 { 2150 uint_t auth_len; 2151 crypto_call_req_t call_req, *callrp; 2152 mblk_t *esp_mp; 2153 esph_t *esph_ptr; 2154 mblk_t *mp; 2155 int kef_rc = CRYPTO_FAILED; 2156 uint_t icv_len = assoc->ipsa_mac_len; 2157 crypto_ctx_template_t auth_ctx_tmpl; 2158 boolean_t do_auth, do_encr, force; 2159 uint_t iv_len = assoc->ipsa_iv_len; 2160 crypto_ctx_template_t encr_ctx_tmpl; 2161 boolean_t is_natt = ((assoc->ipsa_flags & IPSA_F_NATT) != 0); 2162 size_t esph_offset = (is_natt ? UDPH_SIZE : 0); 2163 netstack_t *ns = ixa->ixa_ipst->ips_netstack; 2164 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 2165 ipsec_crypto_t *ic, icstack; 2166 uchar_t *iv_ptr; 2167 crypto_data_t *cd_ptr = NULL; 2168 ill_t *ill = ixa->ixa_nce->nce_ill; 2169 ipsec_stack_t *ipss = ns->netstack_ipsec; 2170 2171 esp3dbg(espstack, ("esp_submit_req_outbound:%s", 2172 is_natt ? "natt" : "not natt")); 2173 2174 do_encr = assoc->ipsa_encr_alg != SADB_EALG_NULL; 2175 do_auth = assoc->ipsa_auth_alg != SADB_AALG_NONE; 2176 force = (assoc->ipsa_flags & IPSA_F_ASYNC); 2177 2178 #ifdef IPSEC_LATENCY_TEST 2179 kef_rc = CRYPTO_SUCCESS; 2180 #else 2181 kef_rc = CRYPTO_FAILED; 2182 #endif 2183 2184 /* 2185 * Outbound IPsec packets are of the form: 2186 * [IP,options] -> [ESP,IV] -> [data] -> [pad,ICV] 2187 * unless it's NATT, then it's 2188 * [IP,options] -> [udp][ESP,IV] -> [data] -> [pad,ICV] 2189 * Get a pointer to the mblk containing the ESP header. 2190 */ 2191 ASSERT(data_mp->b_cont != NULL); 2192 esp_mp = data_mp->b_cont; 2193 esph_ptr = (esph_t *)(esp_mp->b_rptr + esph_offset); 2194 iv_ptr = (uchar_t *)(esph_ptr + 1); 2195 2196 /* 2197 * Combined mode algs need a nonce. This is setup in sadb_common_add(). 2198 * If for some reason we are using a SA which does not have a nonce 2199 * then we must fail here. 2200 */ 2201 if ((assoc->ipsa_flags & IPSA_F_COUNTERMODE) && 2202 (assoc->ipsa_nonce == NULL)) { 2203 ip_drop_packet(data_mp, B_FALSE, NULL, 2204 DROPPER(ipss, ipds_esp_nomem), &espstack->esp_dropper); 2205 return (NULL); 2206 } 2207 2208 if (force) { 2209 /* We are doing asynch; allocate mblks to hold state */ 2210 if ((mp = ip_xmit_attr_to_mblk(ixa)) == NULL || 2211 (mp = ipsec_add_crypto_data(mp, &ic)) == NULL) { 2212 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2213 ip_drop_output("ipIfStatsOutDiscards", data_mp, ill); 2214 freemsg(data_mp); 2215 return (NULL); 2216 } 2217 2218 linkb(mp, data_mp); 2219 callrp = &call_req; 2220 ESP_INIT_CALLREQ(callrp, mp, esp_kcf_callback_outbound); 2221 } else { 2222 /* 2223 * If we know we are going to do sync then ipsec_crypto_t 2224 * should be on the stack. 2225 */ 2226 ic = &icstack; 2227 bzero(ic, sizeof (*ic)); 2228 callrp = NULL; 2229 } 2230 2231 2232 if (do_auth) { 2233 /* authentication context template */ 2234 IPSEC_CTX_TMPL(assoc, ipsa_authtmpl, IPSEC_ALG_AUTH, 2235 auth_ctx_tmpl); 2236 2237 /* where to store the computed mac */ 2238 ESP_INIT_CRYPTO_MAC(&ic->ic_crypto_mac, 2239 icv_len, icv_buf); 2240 2241 /* authentication starts at the ESP header */ 2242 auth_len = payload_len + iv_len + sizeof (esph_t); 2243 if (!do_encr) { 2244 /* authentication only */ 2245 /* initialize input data argument */ 2246 ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data, 2247 esp_mp, esph_offset, auth_len); 2248 2249 /* call the crypto framework */ 2250 kef_rc = crypto_mac(&assoc->ipsa_amech, 2251 &ic->ic_crypto_data, 2252 &assoc->ipsa_kcfauthkey, auth_ctx_tmpl, 2253 &ic->ic_crypto_mac, callrp); 2254 } 2255 } 2256 2257 if (do_encr) { 2258 /* encryption context template */ 2259 IPSEC_CTX_TMPL(assoc, ipsa_encrtmpl, IPSEC_ALG_ENCR, 2260 encr_ctx_tmpl); 2261 /* Call the nonce update function. */ 2262 (assoc->ipsa_noncefunc)(assoc, (uchar_t *)esph_ptr, payload_len, 2263 iv_ptr, &ic->ic_cmm, &ic->ic_crypto_data); 2264 2265 if (!do_auth) { 2266 /* encryption only, skip mblk that contains ESP hdr */ 2267 /* initialize input data argument */ 2268 ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data, 2269 esp_mp->b_cont, 0, payload_len); 2270 2271 /* 2272 * For combined mode ciphers, the ciphertext is the same 2273 * size as the clear text, the ICV should follow the 2274 * ciphertext. To convince the kcf to allow in-line 2275 * encryption, with an ICV, use ipsec_out_crypto_mac 2276 * to point to the same buffer as the data. The calling 2277 * function need to ensure the buffer is large enough to 2278 * include the ICV. 2279 * 2280 * The IV is already written to the packet buffer, the 2281 * nonce setup function copied it to the params struct 2282 * for the cipher to use. 2283 */ 2284 if (assoc->ipsa_flags & IPSA_F_COMBINED) { 2285 bcopy(&ic->ic_crypto_data, 2286 &ic->ic_crypto_mac, 2287 sizeof (crypto_data_t)); 2288 ic->ic_crypto_mac.cd_length = 2289 payload_len + icv_len; 2290 cd_ptr = &ic->ic_crypto_mac; 2291 } 2292 2293 /* call the crypto framework */ 2294 kef_rc = crypto_encrypt((crypto_mechanism_t *) 2295 &ic->ic_cmm, &ic->ic_crypto_data, 2296 &assoc->ipsa_kcfencrkey, encr_ctx_tmpl, 2297 cd_ptr, callrp); 2298 2299 } 2300 } 2301 2302 if (do_auth && do_encr) { 2303 /* 2304 * Encryption and authentication: 2305 * Pass the pointer to the mblk chain starting at the ESP 2306 * header to the framework. Skip the ESP header mblk 2307 * for encryption, which is reflected by an encryption 2308 * offset equal to the length of that mblk. Start 2309 * the authentication at the ESP header, i.e. use an 2310 * authentication offset of zero. 2311 */ 2312 ESP_INIT_CRYPTO_DUAL_DATA(&ic->ic_crypto_dual_data, 2313 esp_mp, MBLKL(esp_mp), payload_len, esph_offset, auth_len); 2314 2315 /* specify IV */ 2316 ic->ic_crypto_dual_data.dd_miscdata = (char *)iv_ptr; 2317 2318 /* call the framework */ 2319 kef_rc = crypto_encrypt_mac(&assoc->ipsa_emech, 2320 &assoc->ipsa_amech, NULL, 2321 &assoc->ipsa_kcfencrkey, &assoc->ipsa_kcfauthkey, 2322 encr_ctx_tmpl, auth_ctx_tmpl, 2323 &ic->ic_crypto_dual_data, 2324 &ic->ic_crypto_mac, callrp); 2325 } 2326 2327 switch (kef_rc) { 2328 case CRYPTO_SUCCESS: 2329 ESP_BUMP_STAT(espstack, crypto_sync); 2330 esp_set_usetime(assoc, B_FALSE); 2331 if (force) { 2332 mp = ipsec_free_crypto_data(mp); 2333 data_mp = ip_xmit_attr_free_mblk(mp); 2334 } 2335 if (is_natt) 2336 esp_prepare_udp(ns, data_mp, (ipha_t *)data_mp->b_rptr); 2337 return (data_mp); 2338 case CRYPTO_QUEUED: 2339 /* esp_kcf_callback_outbound() will be invoked on completion */ 2340 ESP_BUMP_STAT(espstack, crypto_async); 2341 return (NULL); 2342 } 2343 2344 if (force) { 2345 mp = ipsec_free_crypto_data(mp); 2346 data_mp = ip_xmit_attr_free_mblk(mp); 2347 } 2348 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2349 esp_crypto_failed(data_mp, B_FALSE, kef_rc, NULL, espstack); 2350 /* data_mp was passed to ip_drop_packet */ 2351 return (NULL); 2352 } 2353 2354 /* 2355 * Handle outbound IPsec processing for IPv4 and IPv6 2356 * 2357 * Returns data_mp if successfully completed the request. Returns 2358 * NULL if it failed (and increments InDiscards) or if it is pending. 2359 */ 2360 static mblk_t * 2361 esp_outbound(mblk_t *data_mp, ip_xmit_attr_t *ixa) 2362 { 2363 mblk_t *espmp, *tailmp; 2364 ipha_t *ipha; 2365 ip6_t *ip6h; 2366 esph_t *esph_ptr, *iv_ptr; 2367 uint_t af; 2368 uint8_t *nhp; 2369 uintptr_t divpoint, datalen, adj, padlen, i, alloclen; 2370 uintptr_t esplen = sizeof (esph_t); 2371 uint8_t protocol; 2372 ipsa_t *assoc; 2373 uint_t iv_len, block_size, mac_len = 0; 2374 uchar_t *icv_buf; 2375 udpha_t *udpha; 2376 boolean_t is_natt = B_FALSE; 2377 netstack_t *ns = ixa->ixa_ipst->ips_netstack; 2378 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 2379 ipsec_stack_t *ipss = ns->netstack_ipsec; 2380 ill_t *ill = ixa->ixa_nce->nce_ill; 2381 boolean_t need_refrele = B_FALSE; 2382 2383 ESP_BUMP_STAT(espstack, out_requests); 2384 2385 /* 2386 * <sigh> We have to copy the message here, because TCP (for example) 2387 * keeps a dupb() of the message lying around for retransmission. 2388 * Since ESP changes the whole of the datagram, we have to create our 2389 * own copy lest we clobber TCP's data. Since we have to copy anyway, 2390 * we might as well make use of msgpullup() and get the mblk into one 2391 * contiguous piece! 2392 */ 2393 tailmp = msgpullup(data_mp, -1); 2394 if (tailmp == NULL) { 2395 esp0dbg(("esp_outbound: msgpullup() failed, " 2396 "dropping packet.\n")); 2397 ip_drop_packet(data_mp, B_FALSE, ill, 2398 DROPPER(ipss, ipds_esp_nomem), 2399 &espstack->esp_dropper); 2400 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2401 return (NULL); 2402 } 2403 freemsg(data_mp); 2404 data_mp = tailmp; 2405 2406 assoc = ixa->ixa_ipsec_esp_sa; 2407 ASSERT(assoc != NULL); 2408 2409 /* 2410 * Get the outer IP header in shape to escape this system.. 2411 */ 2412 if (is_system_labeled() && (assoc->ipsa_otsl != NULL)) { 2413 /* 2414 * Need to update packet with any CIPSO option and update 2415 * ixa_tsl to capture the new label. 2416 * We allocate a separate ixa for that purpose. 2417 */ 2418 ixa = ip_xmit_attr_duplicate(ixa); 2419 if (ixa == NULL) { 2420 ip_drop_packet(data_mp, B_FALSE, ill, 2421 DROPPER(ipss, ipds_esp_nomem), 2422 &espstack->esp_dropper); 2423 return (NULL); 2424 } 2425 need_refrele = B_TRUE; 2426 2427 label_hold(assoc->ipsa_otsl); 2428 ip_xmit_attr_replace_tsl(ixa, assoc->ipsa_otsl); 2429 2430 data_mp = sadb_whack_label(data_mp, assoc, ixa, 2431 DROPPER(ipss, ipds_esp_nomem), &espstack->esp_dropper); 2432 if (data_mp == NULL) { 2433 /* Packet dropped by sadb_whack_label */ 2434 ixa_refrele(ixa); 2435 return (NULL); 2436 } 2437 } 2438 2439 /* 2440 * Reality check.... 2441 */ 2442 ipha = (ipha_t *)data_mp->b_rptr; /* So we can call esp_acquire(). */ 2443 2444 if (ixa->ixa_flags & IXAF_IS_IPV4) { 2445 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 2446 2447 af = AF_INET; 2448 divpoint = IPH_HDR_LENGTH(ipha); 2449 datalen = ntohs(ipha->ipha_length) - divpoint; 2450 nhp = (uint8_t *)&ipha->ipha_protocol; 2451 } else { 2452 ip_pkt_t ipp; 2453 2454 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 2455 2456 af = AF_INET6; 2457 ip6h = (ip6_t *)ipha; 2458 bzero(&ipp, sizeof (ipp)); 2459 divpoint = ip_find_hdr_v6(data_mp, ip6h, B_FALSE, &ipp, NULL); 2460 if (ipp.ipp_dstopts != NULL && 2461 ipp.ipp_dstopts->ip6d_nxt != IPPROTO_ROUTING) { 2462 /* 2463 * Destination options are tricky. If we get in here, 2464 * then we have a terminal header following the 2465 * destination options. We need to adjust backwards 2466 * so we insert ESP BEFORE the destination options 2467 * bag. (So that the dstopts get encrypted!) 2468 * 2469 * Since this is for outbound packets only, we know 2470 * that non-terminal destination options only precede 2471 * routing headers. 2472 */ 2473 divpoint -= ipp.ipp_dstoptslen; 2474 } 2475 datalen = ntohs(ip6h->ip6_plen) + sizeof (ip6_t) - divpoint; 2476 2477 if (ipp.ipp_rthdr != NULL) { 2478 nhp = &ipp.ipp_rthdr->ip6r_nxt; 2479 } else if (ipp.ipp_hopopts != NULL) { 2480 nhp = &ipp.ipp_hopopts->ip6h_nxt; 2481 } else { 2482 ASSERT(divpoint == sizeof (ip6_t)); 2483 /* It's probably IP + ESP. */ 2484 nhp = &ip6h->ip6_nxt; 2485 } 2486 } 2487 2488 mac_len = assoc->ipsa_mac_len; 2489 2490 if (assoc->ipsa_flags & IPSA_F_NATT) { 2491 /* wedge in UDP header */ 2492 is_natt = B_TRUE; 2493 esplen += UDPH_SIZE; 2494 } 2495 2496 /* 2497 * Set up ESP header and encryption padding for ENCR PI request. 2498 */ 2499 2500 /* Determine the padding length. Pad to 4-bytes for no-encryption. */ 2501 if (assoc->ipsa_encr_alg != SADB_EALG_NULL) { 2502 iv_len = assoc->ipsa_iv_len; 2503 block_size = assoc->ipsa_datalen; 2504 2505 /* 2506 * Pad the data to the length of the cipher block size. 2507 * Include the two additional bytes (hence the - 2) for the 2508 * padding length and the next header. Take this into account 2509 * when calculating the actual length of the padding. 2510 */ 2511 ASSERT(ISP2(iv_len)); 2512 padlen = ((unsigned)(block_size - datalen - 2)) & 2513 (block_size - 1); 2514 } else { 2515 iv_len = 0; 2516 padlen = ((unsigned)(sizeof (uint32_t) - datalen - 2)) & 2517 (sizeof (uint32_t) - 1); 2518 } 2519 2520 /* Allocate ESP header and IV. */ 2521 esplen += iv_len; 2522 2523 /* 2524 * Update association byte-count lifetimes. Don't forget to take 2525 * into account the padding length and next-header (hence the + 2). 2526 * 2527 * Use the amount of data fed into the "encryption algorithm". This 2528 * is the IV, the data length, the padding length, and the final two 2529 * bytes (padlen, and next-header). 2530 * 2531 */ 2532 2533 if (!esp_age_bytes(assoc, datalen + padlen + iv_len + 2, B_FALSE)) { 2534 ip_drop_packet(data_mp, B_FALSE, ill, 2535 DROPPER(ipss, ipds_esp_bytes_expire), 2536 &espstack->esp_dropper); 2537 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2538 if (need_refrele) 2539 ixa_refrele(ixa); 2540 return (NULL); 2541 } 2542 2543 espmp = allocb(esplen, BPRI_HI); 2544 if (espmp == NULL) { 2545 ESP_BUMP_STAT(espstack, out_discards); 2546 esp1dbg(espstack, ("esp_outbound: can't allocate espmp.\n")); 2547 ip_drop_packet(data_mp, B_FALSE, ill, 2548 DROPPER(ipss, ipds_esp_nomem), 2549 &espstack->esp_dropper); 2550 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2551 if (need_refrele) 2552 ixa_refrele(ixa); 2553 return (NULL); 2554 } 2555 espmp->b_wptr += esplen; 2556 esph_ptr = (esph_t *)espmp->b_rptr; 2557 2558 if (is_natt) { 2559 esp3dbg(espstack, ("esp_outbound: NATT")); 2560 2561 udpha = (udpha_t *)espmp->b_rptr; 2562 udpha->uha_src_port = (assoc->ipsa_local_nat_port != 0) ? 2563 assoc->ipsa_local_nat_port : htons(IPPORT_IKE_NATT); 2564 udpha->uha_dst_port = (assoc->ipsa_remote_nat_port != 0) ? 2565 assoc->ipsa_remote_nat_port : htons(IPPORT_IKE_NATT); 2566 /* 2567 * Set the checksum to 0, so that the esp_prepare_udp() call 2568 * can do the right thing. 2569 */ 2570 udpha->uha_checksum = 0; 2571 esph_ptr = (esph_t *)(udpha + 1); 2572 } 2573 2574 esph_ptr->esph_spi = assoc->ipsa_spi; 2575 2576 esph_ptr->esph_replay = htonl(atomic_inc_32_nv(&assoc->ipsa_replay)); 2577 if (esph_ptr->esph_replay == 0 && assoc->ipsa_replay_wsize != 0) { 2578 /* 2579 * XXX We have replay counter wrapping. 2580 * We probably want to nuke this SA (and its peer). 2581 */ 2582 ipsec_assocfailure(info.mi_idnum, 0, 0, 2583 SL_ERROR | SL_CONSOLE | SL_WARN, 2584 "Outbound ESP SA (0x%x, %s) has wrapped sequence.\n", 2585 esph_ptr->esph_spi, assoc->ipsa_dstaddr, af, 2586 espstack->ipsecesp_netstack); 2587 2588 ESP_BUMP_STAT(espstack, out_discards); 2589 sadb_replay_delete(assoc); 2590 ip_drop_packet(data_mp, B_FALSE, ill, 2591 DROPPER(ipss, ipds_esp_replay), 2592 &espstack->esp_dropper); 2593 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2594 if (need_refrele) 2595 ixa_refrele(ixa); 2596 return (NULL); 2597 } 2598 2599 iv_ptr = (esph_ptr + 1); 2600 /* 2601 * iv_ptr points to the mblk which will contain the IV once we have 2602 * written it there. This mblk will be part of a mblk chain that 2603 * will make up the packet. 2604 * 2605 * For counter mode algorithms, the IV is a 64 bit quantity, it 2606 * must NEVER repeat in the lifetime of the SA, otherwise an 2607 * attacker who had recorded enough packets might be able to 2608 * determine some clear text. 2609 * 2610 * To ensure this does not happen, the IV is stored in the SA and 2611 * incremented for each packet, the IV is then copied into the 2612 * "packet" for transmission to the receiving system. The IV will 2613 * also be copied into the nonce, when the packet is encrypted. 2614 * 2615 * CBC mode algorithms use a random IV for each packet. We do not 2616 * require the highest quality random bits, but for best security 2617 * with CBC mode ciphers, the value must be unlikely to repeat and 2618 * must not be known in advance to an adversary capable of influencing 2619 * the clear text. 2620 */ 2621 if (!update_iv((uint8_t *)iv_ptr, espstack->esp_pfkey_q, assoc, 2622 espstack)) { 2623 ip_drop_packet(data_mp, B_FALSE, ill, 2624 DROPPER(ipss, ipds_esp_iv_wrap), &espstack->esp_dropper); 2625 if (need_refrele) 2626 ixa_refrele(ixa); 2627 return (NULL); 2628 } 2629 2630 /* Fix the IP header. */ 2631 alloclen = padlen + 2 + mac_len; 2632 adj = alloclen + (espmp->b_wptr - espmp->b_rptr); 2633 2634 protocol = *nhp; 2635 2636 if (ixa->ixa_flags & IXAF_IS_IPV4) { 2637 ipha->ipha_length = htons(ntohs(ipha->ipha_length) + adj); 2638 if (is_natt) { 2639 *nhp = IPPROTO_UDP; 2640 udpha->uha_length = htons(ntohs(ipha->ipha_length) - 2641 IPH_HDR_LENGTH(ipha)); 2642 } else { 2643 *nhp = IPPROTO_ESP; 2644 } 2645 ipha->ipha_hdr_checksum = 0; 2646 ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha); 2647 } else { 2648 ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) + adj); 2649 *nhp = IPPROTO_ESP; 2650 } 2651 2652 /* I've got the two ESP mblks, now insert them. */ 2653 2654 esp2dbg(espstack, ("data_mp before outbound ESP adjustment:\n")); 2655 esp2dbg(espstack, (dump_msg(data_mp))); 2656 2657 if (!esp_insert_esp(data_mp, espmp, divpoint, espstack)) { 2658 ESP_BUMP_STAT(espstack, out_discards); 2659 /* NOTE: esp_insert_esp() only fails if there's no memory. */ 2660 ip_drop_packet(data_mp, B_FALSE, ill, 2661 DROPPER(ipss, ipds_esp_nomem), 2662 &espstack->esp_dropper); 2663 freeb(espmp); 2664 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2665 if (need_refrele) 2666 ixa_refrele(ixa); 2667 return (NULL); 2668 } 2669 2670 /* Append padding (and leave room for ICV). */ 2671 for (tailmp = data_mp; tailmp->b_cont != NULL; tailmp = tailmp->b_cont) 2672 ; 2673 if (tailmp->b_wptr + alloclen > tailmp->b_datap->db_lim) { 2674 tailmp->b_cont = allocb(alloclen, BPRI_HI); 2675 if (tailmp->b_cont == NULL) { 2676 ESP_BUMP_STAT(espstack, out_discards); 2677 esp0dbg(("esp_outbound: Can't allocate tailmp.\n")); 2678 ip_drop_packet(data_mp, B_FALSE, ill, 2679 DROPPER(ipss, ipds_esp_nomem), 2680 &espstack->esp_dropper); 2681 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2682 if (need_refrele) 2683 ixa_refrele(ixa); 2684 return (NULL); 2685 } 2686 tailmp = tailmp->b_cont; 2687 } 2688 2689 /* 2690 * If there's padding, N bytes of padding must be of the form 0x1, 2691 * 0x2, 0x3... 0xN. 2692 */ 2693 for (i = 0; i < padlen; ) { 2694 i++; 2695 *tailmp->b_wptr++ = i; 2696 } 2697 *tailmp->b_wptr++ = i; 2698 *tailmp->b_wptr++ = protocol; 2699 2700 esp2dbg(espstack, ("data_Mp before encryption:\n")); 2701 esp2dbg(espstack, (dump_msg(data_mp))); 2702 2703 /* 2704 * Okay. I've set up the pre-encryption ESP. Let's do it! 2705 */ 2706 2707 if (mac_len > 0) { 2708 ASSERT(tailmp->b_wptr + mac_len <= tailmp->b_datap->db_lim); 2709 icv_buf = tailmp->b_wptr; 2710 tailmp->b_wptr += mac_len; 2711 } else { 2712 icv_buf = NULL; 2713 } 2714 2715 data_mp = esp_submit_req_outbound(data_mp, ixa, assoc, icv_buf, 2716 datalen + padlen + 2); 2717 if (need_refrele) 2718 ixa_refrele(ixa); 2719 return (data_mp); 2720 } 2721 2722 /* 2723 * IP calls this to validate the ICMP errors that 2724 * we got from the network. 2725 */ 2726 mblk_t * 2727 ipsecesp_icmp_error(mblk_t *data_mp, ip_recv_attr_t *ira) 2728 { 2729 netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack; 2730 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 2731 ipsec_stack_t *ipss = ns->netstack_ipsec; 2732 2733 /* 2734 * Unless we get an entire packet back, this function is useless. 2735 * Why? 2736 * 2737 * 1.) Partial packets are useless, because the "next header" 2738 * is at the end of the decrypted ESP packet. Without the 2739 * whole packet, this is useless. 2740 * 2741 * 2.) If we every use a stateful cipher, such as a stream or a 2742 * one-time pad, we can't do anything. 2743 * 2744 * Since the chances of us getting an entire packet back are very 2745 * very small, we discard here. 2746 */ 2747 IP_ESP_BUMP_STAT(ipss, in_discards); 2748 ip_drop_packet(data_mp, B_TRUE, ira->ira_ill, 2749 DROPPER(ipss, ipds_esp_icmp), 2750 &espstack->esp_dropper); 2751 return (NULL); 2752 } 2753 2754 /* 2755 * Construct an SADB_REGISTER message with the current algorithms. 2756 * This function gets called when 'ipsecalgs -s' is run or when 2757 * in.iked (or other KMD) starts. 2758 */ 2759 static boolean_t 2760 esp_register_out(uint32_t sequence, uint32_t pid, uint_t serial, 2761 ipsecesp_stack_t *espstack, cred_t *cr) 2762 { 2763 mblk_t *pfkey_msg_mp, *keysock_out_mp; 2764 sadb_msg_t *samsg; 2765 sadb_supported_t *sasupp_auth = NULL; 2766 sadb_supported_t *sasupp_encr = NULL; 2767 sadb_alg_t *saalg; 2768 uint_t allocsize = sizeof (*samsg); 2769 uint_t i, numalgs_snap; 2770 int current_aalgs; 2771 ipsec_alginfo_t **authalgs; 2772 uint_t num_aalgs; 2773 int current_ealgs; 2774 ipsec_alginfo_t **encralgs; 2775 uint_t num_ealgs; 2776 ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec; 2777 sadb_sens_t *sens; 2778 size_t sens_len = 0; 2779 sadb_ext_t *nextext; 2780 ts_label_t *sens_tsl = NULL; 2781 2782 /* Allocate the KEYSOCK_OUT. */ 2783 keysock_out_mp = sadb_keysock_out(serial); 2784 if (keysock_out_mp == NULL) { 2785 esp0dbg(("esp_register_out: couldn't allocate mblk.\n")); 2786 return (B_FALSE); 2787 } 2788 2789 if (is_system_labeled() && (cr != NULL)) { 2790 sens_tsl = crgetlabel(cr); 2791 if (sens_tsl != NULL) { 2792 sens_len = sadb_sens_len_from_label(sens_tsl); 2793 allocsize += sens_len; 2794 } 2795 } 2796 2797 /* 2798 * Allocate the PF_KEY message that follows KEYSOCK_OUT. 2799 */ 2800 2801 rw_enter(&ipss->ipsec_alg_lock, RW_READER); 2802 /* 2803 * Fill SADB_REGISTER message's algorithm descriptors. Hold 2804 * down the lock while filling it. 2805 * 2806 * Return only valid algorithms, so the number of algorithms 2807 * to send up may be less than the number of algorithm entries 2808 * in the table. 2809 */ 2810 authalgs = ipss->ipsec_alglists[IPSEC_ALG_AUTH]; 2811 for (num_aalgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++) 2812 if (authalgs[i] != NULL && ALG_VALID(authalgs[i])) 2813 num_aalgs++; 2814 2815 if (num_aalgs != 0) { 2816 allocsize += (num_aalgs * sizeof (*saalg)); 2817 allocsize += sizeof (*sasupp_auth); 2818 } 2819 encralgs = ipss->ipsec_alglists[IPSEC_ALG_ENCR]; 2820 for (num_ealgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++) 2821 if (encralgs[i] != NULL && ALG_VALID(encralgs[i])) 2822 num_ealgs++; 2823 2824 if (num_ealgs != 0) { 2825 allocsize += (num_ealgs * sizeof (*saalg)); 2826 allocsize += sizeof (*sasupp_encr); 2827 } 2828 keysock_out_mp->b_cont = allocb(allocsize, BPRI_HI); 2829 if (keysock_out_mp->b_cont == NULL) { 2830 rw_exit(&ipss->ipsec_alg_lock); 2831 freemsg(keysock_out_mp); 2832 return (B_FALSE); 2833 } 2834 pfkey_msg_mp = keysock_out_mp->b_cont; 2835 pfkey_msg_mp->b_wptr += allocsize; 2836 2837 nextext = (sadb_ext_t *)(pfkey_msg_mp->b_rptr + sizeof (*samsg)); 2838 2839 if (num_aalgs != 0) { 2840 sasupp_auth = (sadb_supported_t *)nextext; 2841 saalg = (sadb_alg_t *)(sasupp_auth + 1); 2842 2843 ASSERT(((ulong_t)saalg & 0x7) == 0); 2844 2845 numalgs_snap = 0; 2846 for (i = 0; 2847 ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_aalgs)); 2848 i++) { 2849 if (authalgs[i] == NULL || !ALG_VALID(authalgs[i])) 2850 continue; 2851 2852 saalg->sadb_alg_id = authalgs[i]->alg_id; 2853 saalg->sadb_alg_ivlen = 0; 2854 saalg->sadb_alg_minbits = authalgs[i]->alg_ef_minbits; 2855 saalg->sadb_alg_maxbits = authalgs[i]->alg_ef_maxbits; 2856 saalg->sadb_x_alg_increment = 2857 authalgs[i]->alg_increment; 2858 saalg->sadb_x_alg_saltbits = SADB_8TO1( 2859 authalgs[i]->alg_saltlen); 2860 numalgs_snap++; 2861 saalg++; 2862 } 2863 ASSERT(numalgs_snap == num_aalgs); 2864 #ifdef DEBUG 2865 /* 2866 * Reality check to make sure I snagged all of the 2867 * algorithms. 2868 */ 2869 for (; i < IPSEC_MAX_ALGS; i++) { 2870 if (authalgs[i] != NULL && ALG_VALID(authalgs[i])) { 2871 cmn_err(CE_PANIC, "esp_register_out()! " 2872 "Missed aalg #%d.\n", i); 2873 } 2874 } 2875 #endif /* DEBUG */ 2876 nextext = (sadb_ext_t *)saalg; 2877 } 2878 2879 if (num_ealgs != 0) { 2880 sasupp_encr = (sadb_supported_t *)nextext; 2881 saalg = (sadb_alg_t *)(sasupp_encr + 1); 2882 2883 numalgs_snap = 0; 2884 for (i = 0; 2885 ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_ealgs)); i++) { 2886 if (encralgs[i] == NULL || !ALG_VALID(encralgs[i])) 2887 continue; 2888 saalg->sadb_alg_id = encralgs[i]->alg_id; 2889 saalg->sadb_alg_ivlen = encralgs[i]->alg_ivlen; 2890 saalg->sadb_alg_minbits = encralgs[i]->alg_ef_minbits; 2891 saalg->sadb_alg_maxbits = encralgs[i]->alg_ef_maxbits; 2892 /* 2893 * We could advertise the ICV length, except there 2894 * is not a value in sadb_x_algb to do this. 2895 * saalg->sadb_alg_maclen = encralgs[i]->alg_maclen; 2896 */ 2897 saalg->sadb_x_alg_increment = 2898 encralgs[i]->alg_increment; 2899 saalg->sadb_x_alg_saltbits = 2900 SADB_8TO1(encralgs[i]->alg_saltlen); 2901 2902 numalgs_snap++; 2903 saalg++; 2904 } 2905 ASSERT(numalgs_snap == num_ealgs); 2906 #ifdef DEBUG 2907 /* 2908 * Reality check to make sure I snagged all of the 2909 * algorithms. 2910 */ 2911 for (; i < IPSEC_MAX_ALGS; i++) { 2912 if (encralgs[i] != NULL && ALG_VALID(encralgs[i])) { 2913 cmn_err(CE_PANIC, "esp_register_out()! " 2914 "Missed ealg #%d.\n", i); 2915 } 2916 } 2917 #endif /* DEBUG */ 2918 nextext = (sadb_ext_t *)saalg; 2919 } 2920 2921 current_aalgs = num_aalgs; 2922 current_ealgs = num_ealgs; 2923 2924 rw_exit(&ipss->ipsec_alg_lock); 2925 2926 if (sens_tsl != NULL) { 2927 sens = (sadb_sens_t *)nextext; 2928 sadb_sens_from_label(sens, SADB_EXT_SENSITIVITY, 2929 sens_tsl, sens_len); 2930 2931 nextext = (sadb_ext_t *)(((uint8_t *)sens) + sens_len); 2932 } 2933 2934 /* Now fill the rest of the SADB_REGISTER message. */ 2935 2936 samsg = (sadb_msg_t *)pfkey_msg_mp->b_rptr; 2937 samsg->sadb_msg_version = PF_KEY_V2; 2938 samsg->sadb_msg_type = SADB_REGISTER; 2939 samsg->sadb_msg_errno = 0; 2940 samsg->sadb_msg_satype = SADB_SATYPE_ESP; 2941 samsg->sadb_msg_len = SADB_8TO64(allocsize); 2942 samsg->sadb_msg_reserved = 0; 2943 /* 2944 * Assume caller has sufficient sequence/pid number info. If it's one 2945 * from me over a new alg., I could give two hoots about sequence. 2946 */ 2947 samsg->sadb_msg_seq = sequence; 2948 samsg->sadb_msg_pid = pid; 2949 2950 if (sasupp_auth != NULL) { 2951 sasupp_auth->sadb_supported_len = SADB_8TO64( 2952 sizeof (*sasupp_auth) + sizeof (*saalg) * current_aalgs); 2953 sasupp_auth->sadb_supported_exttype = SADB_EXT_SUPPORTED_AUTH; 2954 sasupp_auth->sadb_supported_reserved = 0; 2955 } 2956 2957 if (sasupp_encr != NULL) { 2958 sasupp_encr->sadb_supported_len = SADB_8TO64( 2959 sizeof (*sasupp_encr) + sizeof (*saalg) * current_ealgs); 2960 sasupp_encr->sadb_supported_exttype = 2961 SADB_EXT_SUPPORTED_ENCRYPT; 2962 sasupp_encr->sadb_supported_reserved = 0; 2963 } 2964 2965 if (espstack->esp_pfkey_q != NULL) 2966 putnext(espstack->esp_pfkey_q, keysock_out_mp); 2967 else { 2968 freemsg(keysock_out_mp); 2969 return (B_FALSE); 2970 } 2971 2972 return (B_TRUE); 2973 } 2974 2975 /* 2976 * Invoked when the algorithm table changes. Causes SADB_REGISTER 2977 * messages continaining the current list of algorithms to be 2978 * sent up to the ESP listeners. 2979 */ 2980 void 2981 ipsecesp_algs_changed(netstack_t *ns) 2982 { 2983 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 2984 2985 /* 2986 * Time to send a PF_KEY SADB_REGISTER message to ESP listeners 2987 * everywhere. (The function itself checks for NULL esp_pfkey_q.) 2988 */ 2989 (void) esp_register_out(0, 0, 0, espstack, NULL); 2990 } 2991 2992 /* 2993 * Stub function that taskq_dispatch() invokes to take the mblk (in arg) 2994 * and send() it into ESP and IP again. 2995 */ 2996 static void 2997 inbound_task(void *arg) 2998 { 2999 mblk_t *mp = (mblk_t *)arg; 3000 mblk_t *async_mp; 3001 ip_recv_attr_t iras; 3002 3003 async_mp = mp; 3004 mp = async_mp->b_cont; 3005 async_mp->b_cont = NULL; 3006 if (!ip_recv_attr_from_mblk(async_mp, &iras)) { 3007 /* The ill or ip_stack_t disappeared on us */ 3008 ip_drop_input("ip_recv_attr_from_mblk", mp, NULL); 3009 freemsg(mp); 3010 goto done; 3011 } 3012 3013 esp_inbound_restart(mp, &iras); 3014 done: 3015 ira_cleanup(&iras, B_TRUE); 3016 } 3017 3018 /* 3019 * Restart ESP after the SA has been added. 3020 */ 3021 static void 3022 esp_inbound_restart(mblk_t *mp, ip_recv_attr_t *ira) 3023 { 3024 esph_t *esph; 3025 netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack; 3026 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 3027 3028 esp2dbg(espstack, ("in ESP inbound_task")); 3029 ASSERT(espstack != NULL); 3030 3031 mp = ipsec_inbound_esp_sa(mp, ira, &esph); 3032 if (mp == NULL) 3033 return; 3034 3035 ASSERT(esph != NULL); 3036 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 3037 ASSERT(ira->ira_ipsec_esp_sa != NULL); 3038 3039 mp = ira->ira_ipsec_esp_sa->ipsa_input_func(mp, esph, ira); 3040 if (mp == NULL) { 3041 /* 3042 * Either it failed or is pending. In the former case 3043 * ipIfStatsInDiscards was increased. 3044 */ 3045 return; 3046 } 3047 3048 ip_input_post_ipsec(mp, ira); 3049 } 3050 3051 /* 3052 * Now that weak-key passed, actually ADD the security association, and 3053 * send back a reply ADD message. 3054 */ 3055 static int 3056 esp_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi, 3057 int *diagnostic, ipsecesp_stack_t *espstack) 3058 { 3059 isaf_t *primary = NULL, *secondary; 3060 boolean_t clone = B_FALSE, is_inbound = B_FALSE; 3061 ipsa_t *larval = NULL; 3062 ipsacq_t *acqrec; 3063 iacqf_t *acq_bucket; 3064 mblk_t *acq_msgs = NULL; 3065 int rc; 3066 mblk_t *lpkt; 3067 int error; 3068 ipsa_query_t sq; 3069 ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec; 3070 3071 /* 3072 * Locate the appropriate table(s). 3073 */ 3074 sq.spp = &espstack->esp_sadb; /* XXX */ 3075 error = sadb_form_query(ksi, IPSA_Q_SA|IPSA_Q_DST, 3076 IPSA_Q_SA|IPSA_Q_DST|IPSA_Q_INBOUND|IPSA_Q_OUTBOUND, 3077 &sq, diagnostic); 3078 if (error) 3079 return (error); 3080 3081 /* 3082 * Use the direction flags provided by the KMD to determine 3083 * if the inbound or outbound table should be the primary 3084 * for this SA. If these flags were absent then make this 3085 * decision based on the addresses. 3086 */ 3087 if (sq.assoc->sadb_sa_flags & IPSA_F_INBOUND) { 3088 primary = sq.inbound; 3089 secondary = sq.outbound; 3090 is_inbound = B_TRUE; 3091 if (sq.assoc->sadb_sa_flags & IPSA_F_OUTBOUND) 3092 clone = B_TRUE; 3093 } else if (sq.assoc->sadb_sa_flags & IPSA_F_OUTBOUND) { 3094 primary = sq.outbound; 3095 secondary = sq.inbound; 3096 } 3097 3098 if (primary == NULL) { 3099 /* 3100 * The KMD did not set a direction flag, determine which 3101 * table to insert the SA into based on addresses. 3102 */ 3103 switch (ksi->ks_in_dsttype) { 3104 case KS_IN_ADDR_MBCAST: 3105 clone = B_TRUE; /* All mcast SAs can be bidirectional */ 3106 sq.assoc->sadb_sa_flags |= IPSA_F_OUTBOUND; 3107 /* FALLTHRU */ 3108 /* 3109 * If the source address is either one of mine, or unspecified 3110 * (which is best summed up by saying "not 'not mine'"), 3111 * then the association is potentially bi-directional, 3112 * in that it can be used for inbound traffic and outbound 3113 * traffic. The best example of such an SA is a multicast 3114 * SA (which allows me to receive the outbound traffic). 3115 */ 3116 case KS_IN_ADDR_ME: 3117 sq.assoc->sadb_sa_flags |= IPSA_F_INBOUND; 3118 primary = sq.inbound; 3119 secondary = sq.outbound; 3120 if (ksi->ks_in_srctype != KS_IN_ADDR_NOTME) 3121 clone = B_TRUE; 3122 is_inbound = B_TRUE; 3123 break; 3124 /* 3125 * If the source address literally not mine (either 3126 * unspecified or not mine), then this SA may have an 3127 * address that WILL be mine after some configuration. 3128 * We pay the price for this by making it a bi-directional 3129 * SA. 3130 */ 3131 case KS_IN_ADDR_NOTME: 3132 sq.assoc->sadb_sa_flags |= IPSA_F_OUTBOUND; 3133 primary = sq.outbound; 3134 secondary = sq.inbound; 3135 if (ksi->ks_in_srctype != KS_IN_ADDR_ME) { 3136 sq.assoc->sadb_sa_flags |= IPSA_F_INBOUND; 3137 clone = B_TRUE; 3138 } 3139 break; 3140 default: 3141 *diagnostic = SADB_X_DIAGNOSTIC_BAD_DST; 3142 return (EINVAL); 3143 } 3144 } 3145 3146 /* 3147 * Find a ACQUIRE list entry if possible. If we've added an SA that 3148 * suits the needs of an ACQUIRE list entry, we can eliminate the 3149 * ACQUIRE list entry and transmit the enqueued packets. Use the 3150 * high-bit of the sequence number to queue it. Key off destination 3151 * addr, and change acqrec's state. 3152 */ 3153 3154 if (samsg->sadb_msg_seq & IACQF_LOWEST_SEQ) { 3155 acq_bucket = &(sq.sp->sdb_acq[sq.outhash]); 3156 mutex_enter(&acq_bucket->iacqf_lock); 3157 for (acqrec = acq_bucket->iacqf_ipsacq; acqrec != NULL; 3158 acqrec = acqrec->ipsacq_next) { 3159 mutex_enter(&acqrec->ipsacq_lock); 3160 /* 3161 * Q: I only check sequence. Should I check dst? 3162 * A: Yes, check dest because those are the packets 3163 * that are queued up. 3164 */ 3165 if (acqrec->ipsacq_seq == samsg->sadb_msg_seq && 3166 IPSA_ARE_ADDR_EQUAL(sq.dstaddr, 3167 acqrec->ipsacq_dstaddr, acqrec->ipsacq_addrfam)) 3168 break; 3169 mutex_exit(&acqrec->ipsacq_lock); 3170 } 3171 if (acqrec != NULL) { 3172 /* 3173 * AHA! I found an ACQUIRE record for this SA. 3174 * Grab the msg list, and free the acquire record. 3175 * I already am holding the lock for this record, 3176 * so all I have to do is free it. 3177 */ 3178 acq_msgs = acqrec->ipsacq_mp; 3179 acqrec->ipsacq_mp = NULL; 3180 mutex_exit(&acqrec->ipsacq_lock); 3181 sadb_destroy_acquire(acqrec, 3182 espstack->ipsecesp_netstack); 3183 } 3184 mutex_exit(&acq_bucket->iacqf_lock); 3185 } 3186 3187 /* 3188 * Find PF_KEY message, and see if I'm an update. If so, find entry 3189 * in larval list (if there). 3190 */ 3191 if (samsg->sadb_msg_type == SADB_UPDATE) { 3192 mutex_enter(&sq.inbound->isaf_lock); 3193 larval = ipsec_getassocbyspi(sq.inbound, sq.assoc->sadb_sa_spi, 3194 ALL_ZEROES_PTR, sq.dstaddr, sq.dst->sin_family); 3195 mutex_exit(&sq.inbound->isaf_lock); 3196 3197 if ((larval == NULL) || 3198 (larval->ipsa_state != IPSA_STATE_LARVAL)) { 3199 *diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND; 3200 if (larval != NULL) { 3201 IPSA_REFRELE(larval); 3202 } 3203 esp0dbg(("Larval update, but larval disappeared.\n")); 3204 return (ESRCH); 3205 } /* Else sadb_common_add unlinks it for me! */ 3206 } 3207 3208 if (larval != NULL) { 3209 /* 3210 * Hold again, because sadb_common_add() consumes a reference, 3211 * and we don't want to clear_lpkt() without a reference. 3212 */ 3213 IPSA_REFHOLD(larval); 3214 } 3215 3216 rc = sadb_common_add(espstack->esp_pfkey_q, 3217 mp, samsg, ksi, primary, secondary, larval, clone, is_inbound, 3218 diagnostic, espstack->ipsecesp_netstack, &espstack->esp_sadb); 3219 3220 if (larval != NULL) { 3221 if (rc == 0) { 3222 lpkt = sadb_clear_lpkt(larval); 3223 if (lpkt != NULL) { 3224 rc = taskq_dispatch(esp_taskq, inbound_task, 3225 lpkt, TQ_NOSLEEP) == TASKQID_INVALID; 3226 } 3227 } 3228 IPSA_REFRELE(larval); 3229 } 3230 3231 /* 3232 * How much more stack will I create with all of these 3233 * esp_outbound() calls? 3234 */ 3235 3236 /* Handle the packets queued waiting for the SA */ 3237 while (acq_msgs != NULL) { 3238 mblk_t *asyncmp; 3239 mblk_t *data_mp; 3240 ip_xmit_attr_t ixas; 3241 ill_t *ill; 3242 3243 asyncmp = acq_msgs; 3244 acq_msgs = acq_msgs->b_next; 3245 asyncmp->b_next = NULL; 3246 3247 /* 3248 * Extract the ip_xmit_attr_t from the first mblk. 3249 * Verifies that the netstack and ill is still around; could 3250 * have vanished while iked was doing its work. 3251 * On succesful return we have a nce_t and the ill/ipst can't 3252 * disappear until we do the nce_refrele in ixa_cleanup. 3253 */ 3254 data_mp = asyncmp->b_cont; 3255 asyncmp->b_cont = NULL; 3256 if (!ip_xmit_attr_from_mblk(asyncmp, &ixas)) { 3257 ESP_BUMP_STAT(espstack, out_discards); 3258 ip_drop_packet(data_mp, B_FALSE, NULL, 3259 DROPPER(ipss, ipds_sadb_acquire_timeout), 3260 &espstack->esp_dropper); 3261 } else if (rc != 0) { 3262 ill = ixas.ixa_nce->nce_ill; 3263 ESP_BUMP_STAT(espstack, out_discards); 3264 ip_drop_packet(data_mp, B_FALSE, ill, 3265 DROPPER(ipss, ipds_sadb_acquire_timeout), 3266 &espstack->esp_dropper); 3267 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 3268 } else { 3269 esp_outbound_finish(data_mp, &ixas); 3270 } 3271 ixa_cleanup(&ixas); 3272 } 3273 3274 return (rc); 3275 } 3276 3277 /* 3278 * Process one of the queued messages (from ipsacq_mp) once the SA 3279 * has been added. 3280 */ 3281 static void 3282 esp_outbound_finish(mblk_t *data_mp, ip_xmit_attr_t *ixa) 3283 { 3284 netstack_t *ns = ixa->ixa_ipst->ips_netstack; 3285 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 3286 ipsec_stack_t *ipss = ns->netstack_ipsec; 3287 ill_t *ill = ixa->ixa_nce->nce_ill; 3288 3289 if (!ipsec_outbound_sa(data_mp, ixa, IPPROTO_ESP)) { 3290 ESP_BUMP_STAT(espstack, out_discards); 3291 ip_drop_packet(data_mp, B_FALSE, ill, 3292 DROPPER(ipss, ipds_sadb_acquire_timeout), 3293 &espstack->esp_dropper); 3294 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 3295 return; 3296 } 3297 3298 data_mp = esp_outbound(data_mp, ixa); 3299 if (data_mp == NULL) 3300 return; 3301 3302 /* do AH processing if needed */ 3303 data_mp = esp_do_outbound_ah(data_mp, ixa); 3304 if (data_mp == NULL) 3305 return; 3306 3307 (void) ip_output_post_ipsec(data_mp, ixa); 3308 } 3309 3310 /* 3311 * Add new ESP security association. This may become a generic AH/ESP 3312 * routine eventually. 3313 */ 3314 static int 3315 esp_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, netstack_t *ns) 3316 { 3317 sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA]; 3318 sadb_address_t *srcext = 3319 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC]; 3320 sadb_address_t *dstext = 3321 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST]; 3322 sadb_address_t *isrcext = 3323 (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC]; 3324 sadb_address_t *idstext = 3325 (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST]; 3326 sadb_address_t *nttext_loc = 3327 (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC]; 3328 sadb_address_t *nttext_rem = 3329 (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM]; 3330 sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH]; 3331 sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT]; 3332 struct sockaddr_in *src, *dst; 3333 struct sockaddr_in *natt_loc, *natt_rem; 3334 struct sockaddr_in6 *natt_loc6, *natt_rem6; 3335 sadb_lifetime_t *soft = 3336 (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT]; 3337 sadb_lifetime_t *hard = 3338 (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD]; 3339 sadb_lifetime_t *idle = 3340 (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE]; 3341 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 3342 ipsec_stack_t *ipss = ns->netstack_ipsec; 3343 3344 3345 3346 /* I need certain extensions present for an ADD message. */ 3347 if (srcext == NULL) { 3348 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC; 3349 return (EINVAL); 3350 } 3351 if (dstext == NULL) { 3352 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST; 3353 return (EINVAL); 3354 } 3355 if (isrcext == NULL && idstext != NULL) { 3356 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC; 3357 return (EINVAL); 3358 } 3359 if (isrcext != NULL && idstext == NULL) { 3360 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST; 3361 return (EINVAL); 3362 } 3363 if (assoc == NULL) { 3364 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA; 3365 return (EINVAL); 3366 } 3367 if (ekey == NULL && assoc->sadb_sa_encrypt != SADB_EALG_NULL) { 3368 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_EKEY; 3369 return (EINVAL); 3370 } 3371 3372 src = (struct sockaddr_in *)(srcext + 1); 3373 dst = (struct sockaddr_in *)(dstext + 1); 3374 natt_loc = (struct sockaddr_in *)(nttext_loc + 1); 3375 natt_loc6 = (struct sockaddr_in6 *)(nttext_loc + 1); 3376 natt_rem = (struct sockaddr_in *)(nttext_rem + 1); 3377 natt_rem6 = (struct sockaddr_in6 *)(nttext_rem + 1); 3378 3379 /* Sundry ADD-specific reality checks. */ 3380 /* XXX STATS : Logging/stats here? */ 3381 3382 if ((assoc->sadb_sa_state != SADB_SASTATE_MATURE) && 3383 (assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE_ELSEWHERE)) { 3384 *diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE; 3385 return (EINVAL); 3386 } 3387 if (assoc->sadb_sa_encrypt == SADB_EALG_NONE) { 3388 *diagnostic = SADB_X_DIAGNOSTIC_BAD_EALG; 3389 return (EINVAL); 3390 } 3391 3392 #ifndef IPSEC_LATENCY_TEST 3393 if (assoc->sadb_sa_encrypt == SADB_EALG_NULL && 3394 assoc->sadb_sa_auth == SADB_AALG_NONE) { 3395 *diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG; 3396 return (EINVAL); 3397 } 3398 #endif 3399 3400 if (assoc->sadb_sa_flags & ~espstack->esp_sadb.s_addflags) { 3401 *diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS; 3402 return (EINVAL); 3403 } 3404 3405 if ((*diagnostic = sadb_hardsoftchk(hard, soft, idle)) != 0) { 3406 return (EINVAL); 3407 } 3408 ASSERT(src->sin_family == dst->sin_family); 3409 3410 if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_NATT_LOC) { 3411 if (nttext_loc == NULL) { 3412 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_LOC; 3413 return (EINVAL); 3414 } 3415 3416 if (natt_loc->sin_family == AF_INET6 && 3417 !IN6_IS_ADDR_V4MAPPED(&natt_loc6->sin6_addr)) { 3418 *diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_NATT_LOC; 3419 return (EINVAL); 3420 } 3421 } 3422 3423 if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_NATT_REM) { 3424 if (nttext_rem == NULL) { 3425 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_REM; 3426 return (EINVAL); 3427 } 3428 if (natt_rem->sin_family == AF_INET6 && 3429 !IN6_IS_ADDR_V4MAPPED(&natt_rem6->sin6_addr)) { 3430 *diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_NATT_REM; 3431 return (EINVAL); 3432 } 3433 } 3434 3435 3436 /* Stuff I don't support, for now. XXX Diagnostic? */ 3437 if (ksi->ks_in_extv[SADB_EXT_LIFETIME_CURRENT] != NULL) 3438 return (EOPNOTSUPP); 3439 3440 if ((*diagnostic = sadb_labelchk(ksi)) != 0) 3441 return (EINVAL); 3442 3443 /* 3444 * XXX Policy : I'm not checking identities at this time, 3445 * but if I did, I'd do them here, before I sent 3446 * the weak key check up to the algorithm. 3447 */ 3448 3449 rw_enter(&ipss->ipsec_alg_lock, RW_READER); 3450 3451 /* 3452 * First locate the authentication algorithm. 3453 */ 3454 #ifdef IPSEC_LATENCY_TEST 3455 if (akey != NULL && assoc->sadb_sa_auth != SADB_AALG_NONE) { 3456 #else 3457 if (akey != NULL) { 3458 #endif 3459 ipsec_alginfo_t *aalg; 3460 3461 aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH] 3462 [assoc->sadb_sa_auth]; 3463 if (aalg == NULL || !ALG_VALID(aalg)) { 3464 rw_exit(&ipss->ipsec_alg_lock); 3465 esp1dbg(espstack, ("Couldn't find auth alg #%d.\n", 3466 assoc->sadb_sa_auth)); 3467 *diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG; 3468 return (EINVAL); 3469 } 3470 3471 /* 3472 * Sanity check key sizes. 3473 * Note: It's not possible to use SADB_AALG_NONE because 3474 * this auth_alg is not defined with ALG_FLAG_VALID. If this 3475 * ever changes, the same check for SADB_AALG_NONE and 3476 * a auth_key != NULL should be made here ( see below). 3477 */ 3478 if (!ipsec_valid_key_size(akey->sadb_key_bits, aalg)) { 3479 rw_exit(&ipss->ipsec_alg_lock); 3480 *diagnostic = SADB_X_DIAGNOSTIC_BAD_AKEYBITS; 3481 return (EINVAL); 3482 } 3483 ASSERT(aalg->alg_mech_type != CRYPTO_MECHANISM_INVALID); 3484 3485 /* check key and fix parity if needed */ 3486 if (ipsec_check_key(aalg->alg_mech_type, akey, B_TRUE, 3487 diagnostic) != 0) { 3488 rw_exit(&ipss->ipsec_alg_lock); 3489 return (EINVAL); 3490 } 3491 } 3492 3493 /* 3494 * Then locate the encryption algorithm. 3495 */ 3496 if (ekey != NULL) { 3497 uint_t keybits; 3498 ipsec_alginfo_t *ealg; 3499 3500 ealg = ipss->ipsec_alglists[IPSEC_ALG_ENCR] 3501 [assoc->sadb_sa_encrypt]; 3502 if (ealg == NULL || !ALG_VALID(ealg)) { 3503 rw_exit(&ipss->ipsec_alg_lock); 3504 esp1dbg(espstack, ("Couldn't find encr alg #%d.\n", 3505 assoc->sadb_sa_encrypt)); 3506 *diagnostic = SADB_X_DIAGNOSTIC_BAD_EALG; 3507 return (EINVAL); 3508 } 3509 3510 /* 3511 * Sanity check key sizes. If the encryption algorithm is 3512 * SADB_EALG_NULL but the encryption key is NOT 3513 * NULL then complain. 3514 * 3515 * The keying material includes salt bits if required by 3516 * algorithm and optionally the Initial IV, check the 3517 * length of whats left. 3518 */ 3519 keybits = ekey->sadb_key_bits; 3520 keybits -= ekey->sadb_key_reserved; 3521 keybits -= SADB_8TO1(ealg->alg_saltlen); 3522 if ((assoc->sadb_sa_encrypt == SADB_EALG_NULL) || 3523 (!ipsec_valid_key_size(keybits, ealg))) { 3524 rw_exit(&ipss->ipsec_alg_lock); 3525 *diagnostic = SADB_X_DIAGNOSTIC_BAD_EKEYBITS; 3526 return (EINVAL); 3527 } 3528 ASSERT(ealg->alg_mech_type != CRYPTO_MECHANISM_INVALID); 3529 3530 /* check key */ 3531 if (ipsec_check_key(ealg->alg_mech_type, ekey, B_FALSE, 3532 diagnostic) != 0) { 3533 rw_exit(&ipss->ipsec_alg_lock); 3534 return (EINVAL); 3535 } 3536 } 3537 rw_exit(&ipss->ipsec_alg_lock); 3538 3539 return (esp_add_sa_finish(mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi, 3540 diagnostic, espstack)); 3541 } 3542 3543 /* 3544 * Update a security association. Updates come in two varieties. The first 3545 * is an update of lifetimes on a non-larval SA. The second is an update of 3546 * a larval SA, which ends up looking a lot more like an add. 3547 */ 3548 static int 3549 esp_update_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, 3550 ipsecesp_stack_t *espstack, uint8_t sadb_msg_type) 3551 { 3552 sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA]; 3553 mblk_t *buf_pkt; 3554 int rcode; 3555 3556 sadb_address_t *dstext = 3557 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST]; 3558 3559 if (dstext == NULL) { 3560 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST; 3561 return (EINVAL); 3562 } 3563 3564 rcode = sadb_update_sa(mp, ksi, &buf_pkt, &espstack->esp_sadb, 3565 diagnostic, espstack->esp_pfkey_q, esp_add_sa, 3566 espstack->ipsecesp_netstack, sadb_msg_type); 3567 3568 if ((assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE) || 3569 (rcode != 0)) { 3570 return (rcode); 3571 } 3572 3573 HANDLE_BUF_PKT(esp_taskq, espstack->ipsecesp_netstack->netstack_ipsec, 3574 espstack->esp_dropper, buf_pkt); 3575 3576 return (rcode); 3577 } 3578 3579 /* XXX refactor me */ 3580 /* 3581 * Delete a security association. This is REALLY likely to be code common to 3582 * both AH and ESP. Find the association, then unlink it. 3583 */ 3584 static int 3585 esp_del_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, 3586 ipsecesp_stack_t *espstack, uint8_t sadb_msg_type) 3587 { 3588 sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA]; 3589 sadb_address_t *dstext = 3590 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST]; 3591 sadb_address_t *srcext = 3592 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC]; 3593 struct sockaddr_in *sin; 3594 3595 if (assoc == NULL) { 3596 if (dstext != NULL) { 3597 sin = (struct sockaddr_in *)(dstext + 1); 3598 } else if (srcext != NULL) { 3599 sin = (struct sockaddr_in *)(srcext + 1); 3600 } else { 3601 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA; 3602 return (EINVAL); 3603 } 3604 return (sadb_purge_sa(mp, ksi, 3605 (sin->sin_family == AF_INET6) ? &espstack->esp_sadb.s_v6 : 3606 &espstack->esp_sadb.s_v4, diagnostic, 3607 espstack->esp_pfkey_q)); 3608 } 3609 3610 return (sadb_delget_sa(mp, ksi, &espstack->esp_sadb, diagnostic, 3611 espstack->esp_pfkey_q, sadb_msg_type)); 3612 } 3613 3614 /* XXX refactor me */ 3615 /* 3616 * Convert the entire contents of all of ESP's SA tables into PF_KEY SADB_DUMP 3617 * messages. 3618 */ 3619 static void 3620 esp_dump(mblk_t *mp, keysock_in_t *ksi, ipsecesp_stack_t *espstack) 3621 { 3622 int error; 3623 sadb_msg_t *samsg; 3624 3625 /* 3626 * Dump each fanout, bailing if error is non-zero. 3627 */ 3628 3629 error = sadb_dump(espstack->esp_pfkey_q, mp, ksi, 3630 &espstack->esp_sadb.s_v4); 3631 if (error != 0) 3632 goto bail; 3633 3634 error = sadb_dump(espstack->esp_pfkey_q, mp, ksi, 3635 &espstack->esp_sadb.s_v6); 3636 bail: 3637 ASSERT(mp->b_cont != NULL); 3638 samsg = (sadb_msg_t *)mp->b_cont->b_rptr; 3639 samsg->sadb_msg_errno = (uint8_t)error; 3640 sadb_pfkey_echo(espstack->esp_pfkey_q, mp, 3641 (sadb_msg_t *)mp->b_cont->b_rptr, ksi, NULL); 3642 } 3643 3644 /* 3645 * First-cut reality check for an inbound PF_KEY message. 3646 */ 3647 static boolean_t 3648 esp_pfkey_reality_failures(mblk_t *mp, keysock_in_t *ksi, 3649 ipsecesp_stack_t *espstack) 3650 { 3651 int diagnostic; 3652 3653 if (ksi->ks_in_extv[SADB_EXT_PROPOSAL] != NULL) { 3654 diagnostic = SADB_X_DIAGNOSTIC_PROP_PRESENT; 3655 goto badmsg; 3656 } 3657 if (ksi->ks_in_extv[SADB_EXT_SUPPORTED_AUTH] != NULL || 3658 ksi->ks_in_extv[SADB_EXT_SUPPORTED_ENCRYPT] != NULL) { 3659 diagnostic = SADB_X_DIAGNOSTIC_SUPP_PRESENT; 3660 goto badmsg; 3661 } 3662 return (B_FALSE); /* False ==> no failures */ 3663 3664 badmsg: 3665 sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL, diagnostic, 3666 ksi->ks_in_serial); 3667 return (B_TRUE); /* True ==> failures */ 3668 } 3669 3670 /* 3671 * ESP parsing of PF_KEY messages. Keysock did most of the really silly 3672 * error cases. What I receive is a fully-formed, syntactically legal 3673 * PF_KEY message. I then need to check semantics... 3674 * 3675 * This code may become common to AH and ESP. Stay tuned. 3676 * 3677 * I also make the assumption that db_ref's are cool. If this assumption 3678 * is wrong, this means that someone other than keysock or me has been 3679 * mucking with PF_KEY messages. 3680 */ 3681 static void 3682 esp_parse_pfkey(mblk_t *mp, ipsecesp_stack_t *espstack) 3683 { 3684 mblk_t *msg = mp->b_cont; 3685 sadb_msg_t *samsg; 3686 keysock_in_t *ksi; 3687 int error; 3688 int diagnostic = SADB_X_DIAGNOSTIC_NONE; 3689 3690 ASSERT(msg != NULL); 3691 3692 samsg = (sadb_msg_t *)msg->b_rptr; 3693 ksi = (keysock_in_t *)mp->b_rptr; 3694 3695 /* 3696 * If applicable, convert unspecified AF_INET6 to unspecified 3697 * AF_INET. And do other address reality checks. 3698 */ 3699 if (!sadb_addrfix(ksi, espstack->esp_pfkey_q, mp, 3700 espstack->ipsecesp_netstack) || 3701 esp_pfkey_reality_failures(mp, ksi, espstack)) { 3702 return; 3703 } 3704 3705 switch (samsg->sadb_msg_type) { 3706 case SADB_ADD: 3707 error = esp_add_sa(mp, ksi, &diagnostic, 3708 espstack->ipsecesp_netstack); 3709 if (error != 0) { 3710 sadb_pfkey_error(espstack->esp_pfkey_q, mp, error, 3711 diagnostic, ksi->ks_in_serial); 3712 } 3713 /* else esp_add_sa() took care of things. */ 3714 break; 3715 case SADB_DELETE: 3716 case SADB_X_DELPAIR: 3717 case SADB_X_DELPAIR_STATE: 3718 error = esp_del_sa(mp, ksi, &diagnostic, espstack, 3719 samsg->sadb_msg_type); 3720 if (error != 0) { 3721 sadb_pfkey_error(espstack->esp_pfkey_q, mp, error, 3722 diagnostic, ksi->ks_in_serial); 3723 } 3724 /* Else esp_del_sa() took care of things. */ 3725 break; 3726 case SADB_GET: 3727 error = sadb_delget_sa(mp, ksi, &espstack->esp_sadb, 3728 &diagnostic, espstack->esp_pfkey_q, samsg->sadb_msg_type); 3729 if (error != 0) { 3730 sadb_pfkey_error(espstack->esp_pfkey_q, mp, error, 3731 diagnostic, ksi->ks_in_serial); 3732 } 3733 /* Else sadb_get_sa() took care of things. */ 3734 break; 3735 case SADB_FLUSH: 3736 sadbp_flush(&espstack->esp_sadb, espstack->ipsecesp_netstack); 3737 sadb_pfkey_echo(espstack->esp_pfkey_q, mp, samsg, ksi, NULL); 3738 break; 3739 case SADB_REGISTER: 3740 /* 3741 * Hmmm, let's do it! Check for extensions (there should 3742 * be none), extract the fields, call esp_register_out(), 3743 * then either free or report an error. 3744 * 3745 * Keysock takes care of the PF_KEY bookkeeping for this. 3746 */ 3747 if (esp_register_out(samsg->sadb_msg_seq, samsg->sadb_msg_pid, 3748 ksi->ks_in_serial, espstack, msg_getcred(mp, NULL))) { 3749 freemsg(mp); 3750 } else { 3751 /* 3752 * Only way this path hits is if there is a memory 3753 * failure. It will not return B_FALSE because of 3754 * lack of esp_pfkey_q if I am in wput(). 3755 */ 3756 sadb_pfkey_error(espstack->esp_pfkey_q, mp, ENOMEM, 3757 diagnostic, ksi->ks_in_serial); 3758 } 3759 break; 3760 case SADB_UPDATE: 3761 case SADB_X_UPDATEPAIR: 3762 /* 3763 * Find a larval, if not there, find a full one and get 3764 * strict. 3765 */ 3766 error = esp_update_sa(mp, ksi, &diagnostic, espstack, 3767 samsg->sadb_msg_type); 3768 if (error != 0) { 3769 sadb_pfkey_error(espstack->esp_pfkey_q, mp, error, 3770 diagnostic, ksi->ks_in_serial); 3771 } 3772 /* else esp_update_sa() took care of things. */ 3773 break; 3774 case SADB_GETSPI: 3775 /* 3776 * Reserve a new larval entry. 3777 */ 3778 esp_getspi(mp, ksi, espstack); 3779 break; 3780 case SADB_ACQUIRE: 3781 /* 3782 * Find larval and/or ACQUIRE record and kill it (them), I'm 3783 * most likely an error. Inbound ACQUIRE messages should only 3784 * have the base header. 3785 */ 3786 sadb_in_acquire(samsg, &espstack->esp_sadb, 3787 espstack->esp_pfkey_q, espstack->ipsecesp_netstack); 3788 freemsg(mp); 3789 break; 3790 case SADB_DUMP: 3791 /* 3792 * Dump all entries. 3793 */ 3794 esp_dump(mp, ksi, espstack); 3795 /* esp_dump will take care of the return message, etc. */ 3796 break; 3797 case SADB_EXPIRE: 3798 /* Should never reach me. */ 3799 sadb_pfkey_error(espstack->esp_pfkey_q, mp, EOPNOTSUPP, 3800 diagnostic, ksi->ks_in_serial); 3801 break; 3802 default: 3803 sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL, 3804 SADB_X_DIAGNOSTIC_UNKNOWN_MSG, ksi->ks_in_serial); 3805 break; 3806 } 3807 } 3808 3809 /* 3810 * Handle case where PF_KEY says it can't find a keysock for one of my 3811 * ACQUIRE messages. 3812 */ 3813 static void 3814 esp_keysock_no_socket(mblk_t *mp, ipsecesp_stack_t *espstack) 3815 { 3816 sadb_msg_t *samsg; 3817 keysock_out_err_t *kse = (keysock_out_err_t *)mp->b_rptr; 3818 3819 if (mp->b_cont == NULL) { 3820 freemsg(mp); 3821 return; 3822 } 3823 samsg = (sadb_msg_t *)mp->b_cont->b_rptr; 3824 3825 /* 3826 * If keysock can't find any registered, delete the acquire record 3827 * immediately, and handle errors. 3828 */ 3829 if (samsg->sadb_msg_type == SADB_ACQUIRE) { 3830 samsg->sadb_msg_errno = kse->ks_err_errno; 3831 samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg)); 3832 /* 3833 * Use the write-side of the esp_pfkey_q 3834 */ 3835 sadb_in_acquire(samsg, &espstack->esp_sadb, 3836 WR(espstack->esp_pfkey_q), espstack->ipsecesp_netstack); 3837 } 3838 3839 freemsg(mp); 3840 } 3841 3842 /* 3843 * ESP module read put routine. 3844 */ 3845 static int 3846 ipsecesp_rput(queue_t *q, mblk_t *mp) 3847 { 3848 putnext(q, mp); 3849 return (0); 3850 } 3851 3852 /* 3853 * ESP module write put routine. 3854 */ 3855 static int 3856 ipsecesp_wput(queue_t *q, mblk_t *mp) 3857 { 3858 ipsec_info_t *ii; 3859 struct iocblk *iocp; 3860 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; 3861 3862 esp3dbg(espstack, ("In esp_wput().\n")); 3863 3864 /* NOTE: Each case must take care of freeing or passing mp. */ 3865 switch (mp->b_datap->db_type) { 3866 case M_CTL: 3867 if ((mp->b_wptr - mp->b_rptr) < sizeof (ipsec_info_t)) { 3868 /* Not big enough message. */ 3869 freemsg(mp); 3870 break; 3871 } 3872 ii = (ipsec_info_t *)mp->b_rptr; 3873 3874 switch (ii->ipsec_info_type) { 3875 case KEYSOCK_OUT_ERR: 3876 esp1dbg(espstack, ("Got KEYSOCK_OUT_ERR message.\n")); 3877 esp_keysock_no_socket(mp, espstack); 3878 break; 3879 case KEYSOCK_IN: 3880 ESP_BUMP_STAT(espstack, keysock_in); 3881 esp3dbg(espstack, ("Got KEYSOCK_IN message.\n")); 3882 3883 /* Parse the message. */ 3884 esp_parse_pfkey(mp, espstack); 3885 break; 3886 case KEYSOCK_HELLO: 3887 sadb_keysock_hello(&espstack->esp_pfkey_q, q, mp, 3888 esp_ager, (void *)espstack, &espstack->esp_event, 3889 SADB_SATYPE_ESP); 3890 break; 3891 default: 3892 esp2dbg(espstack, ("Got M_CTL from above of 0x%x.\n", 3893 ii->ipsec_info_type)); 3894 freemsg(mp); 3895 break; 3896 } 3897 break; 3898 case M_IOCTL: 3899 iocp = (struct iocblk *)mp->b_rptr; 3900 switch (iocp->ioc_cmd) { 3901 case ND_SET: 3902 case ND_GET: 3903 if (nd_getset(q, espstack->ipsecesp_g_nd, mp)) { 3904 qreply(q, mp); 3905 return (0); 3906 } else { 3907 iocp->ioc_error = ENOENT; 3908 } 3909 /* FALLTHRU */ 3910 default: 3911 /* We really don't support any other ioctls, do we? */ 3912 3913 /* Return EINVAL */ 3914 if (iocp->ioc_error != ENOENT) 3915 iocp->ioc_error = EINVAL; 3916 iocp->ioc_count = 0; 3917 mp->b_datap->db_type = M_IOCACK; 3918 qreply(q, mp); 3919 return (0); 3920 } 3921 default: 3922 esp3dbg(espstack, 3923 ("Got default message, type %d, passing to IP.\n", 3924 mp->b_datap->db_type)); 3925 putnext(q, mp); 3926 } 3927 return (0); 3928 } 3929 3930 /* 3931 * Wrapper to allow IP to trigger an ESP association failure message 3932 * during inbound SA selection. 3933 */ 3934 void 3935 ipsecesp_in_assocfailure(mblk_t *mp, char level, ushort_t sl, char *fmt, 3936 uint32_t spi, void *addr, int af, ip_recv_attr_t *ira) 3937 { 3938 netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack; 3939 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 3940 ipsec_stack_t *ipss = ns->netstack_ipsec; 3941 3942 if (espstack->ipsecesp_log_unknown_spi) { 3943 ipsec_assocfailure(info.mi_idnum, 0, level, sl, fmt, spi, 3944 addr, af, espstack->ipsecesp_netstack); 3945 } 3946 3947 ip_drop_packet(mp, B_TRUE, ira->ira_ill, 3948 DROPPER(ipss, ipds_esp_no_sa), 3949 &espstack->esp_dropper); 3950 } 3951 3952 /* 3953 * Initialize the ESP input and output processing functions. 3954 */ 3955 void 3956 ipsecesp_init_funcs(ipsa_t *sa) 3957 { 3958 if (sa->ipsa_output_func == NULL) 3959 sa->ipsa_output_func = esp_outbound; 3960 if (sa->ipsa_input_func == NULL) 3961 sa->ipsa_input_func = esp_inbound; 3962 } 3963