1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * Copyright (c) 2012 Nexenta Systems, Inc. All rights reserved. 25 * Copyright (c) 2017 Joyent, Inc. 26 */ 27 28 #include <sys/types.h> 29 #include <sys/stream.h> 30 #include <sys/stropts.h> 31 #include <sys/errno.h> 32 #include <sys/strlog.h> 33 #include <sys/tihdr.h> 34 #include <sys/socket.h> 35 #include <sys/ddi.h> 36 #include <sys/sunddi.h> 37 #include <sys/kmem.h> 38 #include <sys/zone.h> 39 #include <sys/sysmacros.h> 40 #include <sys/cmn_err.h> 41 #include <sys/vtrace.h> 42 #include <sys/debug.h> 43 #include <sys/atomic.h> 44 #include <sys/strsun.h> 45 #include <sys/random.h> 46 #include <netinet/in.h> 47 #include <net/if.h> 48 #include <netinet/ip6.h> 49 #include <net/pfkeyv2.h> 50 #include <net/pfpolicy.h> 51 52 #include <inet/common.h> 53 #include <inet/mi.h> 54 #include <inet/nd.h> 55 #include <inet/ip.h> 56 #include <inet/ip_impl.h> 57 #include <inet/ip6.h> 58 #include <inet/ip_if.h> 59 #include <inet/ip_ndp.h> 60 #include <inet/sadb.h> 61 #include <inet/ipsec_info.h> 62 #include <inet/ipsec_impl.h> 63 #include <inet/ipsecesp.h> 64 #include <inet/ipdrop.h> 65 #include <inet/tcp.h> 66 #include <sys/kstat.h> 67 #include <sys/policy.h> 68 #include <sys/strsun.h> 69 #include <sys/strsubr.h> 70 #include <inet/udp_impl.h> 71 #include <sys/taskq.h> 72 #include <sys/note.h> 73 74 #include <sys/tsol/tnet.h> 75 76 /* 77 * Table of ND variables supported by ipsecesp. These are loaded into 78 * ipsecesp_g_nd in ipsecesp_init_nd. 79 * All of these are alterable, within the min/max values given, at run time. 80 */ 81 static ipsecespparam_t lcl_param_arr[] = { 82 /* min max value name */ 83 { 0, 3, 0, "ipsecesp_debug"}, 84 { 125, 32000, SADB_AGE_INTERVAL_DEFAULT, "ipsecesp_age_interval"}, 85 { 1, 10, 1, "ipsecesp_reap_delay"}, 86 { 1, SADB_MAX_REPLAY, 64, "ipsecesp_replay_size"}, 87 { 1, 300, 15, "ipsecesp_acquire_timeout"}, 88 { 1, 1800, 90, "ipsecesp_larval_timeout"}, 89 /* Default lifetime values for ACQUIRE messages. */ 90 { 0, 0xffffffffU, 0, "ipsecesp_default_soft_bytes"}, 91 { 0, 0xffffffffU, 0, "ipsecesp_default_hard_bytes"}, 92 { 0, 0xffffffffU, 24000, "ipsecesp_default_soft_addtime"}, 93 { 0, 0xffffffffU, 28800, "ipsecesp_default_hard_addtime"}, 94 { 0, 0xffffffffU, 0, "ipsecesp_default_soft_usetime"}, 95 { 0, 0xffffffffU, 0, "ipsecesp_default_hard_usetime"}, 96 { 0, 1, 0, "ipsecesp_log_unknown_spi"}, 97 { 0, 2, 1, "ipsecesp_padding_check"}, 98 { 0, 600, 20, "ipsecesp_nat_keepalive_interval"}, 99 }; 100 /* For ipsecesp_nat_keepalive_interval, see ipsecesp.h. */ 101 102 #define esp0dbg(a) printf a 103 /* NOTE: != 0 instead of > 0 so lint doesn't complain. */ 104 #define esp1dbg(espstack, a) if (espstack->ipsecesp_debug != 0) printf a 105 #define esp2dbg(espstack, a) if (espstack->ipsecesp_debug > 1) printf a 106 #define esp3dbg(espstack, a) if (espstack->ipsecesp_debug > 2) printf a 107 108 static int ipsecesp_open(queue_t *, dev_t *, int, int, cred_t *); 109 static int ipsecesp_close(queue_t *, int, cred_t *); 110 static void ipsecesp_wput(queue_t *, mblk_t *); 111 static void *ipsecesp_stack_init(netstackid_t stackid, netstack_t *ns); 112 static void ipsecesp_stack_fini(netstackid_t stackid, void *arg); 113 114 static void esp_prepare_udp(netstack_t *, mblk_t *, ipha_t *); 115 static void esp_outbound_finish(mblk_t *, ip_xmit_attr_t *); 116 static void esp_inbound_restart(mblk_t *, ip_recv_attr_t *); 117 118 static boolean_t esp_register_out(uint32_t, uint32_t, uint_t, 119 ipsecesp_stack_t *, cred_t *); 120 static boolean_t esp_strip_header(mblk_t *, boolean_t, uint32_t, 121 kstat_named_t **, ipsecesp_stack_t *); 122 static mblk_t *esp_submit_req_inbound(mblk_t *, ip_recv_attr_t *, 123 ipsa_t *, uint_t); 124 static mblk_t *esp_submit_req_outbound(mblk_t *, ip_xmit_attr_t *, 125 ipsa_t *, uchar_t *, uint_t); 126 127 /* Setable in /etc/system */ 128 uint32_t esp_hash_size = IPSEC_DEFAULT_HASH_SIZE; 129 130 static struct module_info info = { 131 5137, "ipsecesp", 0, INFPSZ, 65536, 1024 132 }; 133 134 static struct qinit rinit = { 135 (pfi_t)putnext, NULL, ipsecesp_open, ipsecesp_close, NULL, &info, 136 NULL 137 }; 138 139 static struct qinit winit = { 140 (pfi_t)ipsecesp_wput, NULL, ipsecesp_open, ipsecesp_close, NULL, &info, 141 NULL 142 }; 143 144 struct streamtab ipsecespinfo = { 145 &rinit, &winit, NULL, NULL 146 }; 147 148 static taskq_t *esp_taskq; 149 150 /* 151 * OTOH, this one is set at open/close, and I'm D_MTQPAIR for now. 152 * 153 * Question: Do I need this, given that all instance's esps->esps_wq point 154 * to IP? 155 * 156 * Answer: Yes, because I need to know which queue is BOUND to 157 * IPPROTO_ESP 158 */ 159 160 static int esp_kstat_update(kstat_t *, int); 161 162 static boolean_t 163 esp_kstat_init(ipsecesp_stack_t *espstack, netstackid_t stackid) 164 { 165 espstack->esp_ksp = kstat_create_netstack("ipsecesp", 0, "esp_stat", 166 "net", KSTAT_TYPE_NAMED, 167 sizeof (esp_kstats_t) / sizeof (kstat_named_t), 0, stackid); 168 169 if (espstack->esp_ksp == NULL || espstack->esp_ksp->ks_data == NULL) 170 return (B_FALSE); 171 172 espstack->esp_kstats = espstack->esp_ksp->ks_data; 173 174 espstack->esp_ksp->ks_update = esp_kstat_update; 175 espstack->esp_ksp->ks_private = (void *)(uintptr_t)stackid; 176 177 #define K64 KSTAT_DATA_UINT64 178 #define KI(x) kstat_named_init(&(espstack->esp_kstats->esp_stat_##x), #x, K64) 179 180 KI(num_aalgs); 181 KI(num_ealgs); 182 KI(good_auth); 183 KI(bad_auth); 184 KI(bad_padding); 185 KI(replay_failures); 186 KI(replay_early_failures); 187 KI(keysock_in); 188 KI(out_requests); 189 KI(acquire_requests); 190 KI(bytes_expired); 191 KI(out_discards); 192 KI(crypto_sync); 193 KI(crypto_async); 194 KI(crypto_failures); 195 KI(bad_decrypt); 196 KI(sa_port_renumbers); 197 198 #undef KI 199 #undef K64 200 201 kstat_install(espstack->esp_ksp); 202 203 return (B_TRUE); 204 } 205 206 static int 207 esp_kstat_update(kstat_t *kp, int rw) 208 { 209 esp_kstats_t *ekp; 210 netstackid_t stackid = (zoneid_t)(uintptr_t)kp->ks_private; 211 netstack_t *ns; 212 ipsec_stack_t *ipss; 213 214 if ((kp == NULL) || (kp->ks_data == NULL)) 215 return (EIO); 216 217 if (rw == KSTAT_WRITE) 218 return (EACCES); 219 220 ns = netstack_find_by_stackid(stackid); 221 if (ns == NULL) 222 return (-1); 223 ipss = ns->netstack_ipsec; 224 if (ipss == NULL) { 225 netstack_rele(ns); 226 return (-1); 227 } 228 ekp = (esp_kstats_t *)kp->ks_data; 229 230 rw_enter(&ipss->ipsec_alg_lock, RW_READER); 231 ekp->esp_stat_num_aalgs.value.ui64 = 232 ipss->ipsec_nalgs[IPSEC_ALG_AUTH]; 233 ekp->esp_stat_num_ealgs.value.ui64 = 234 ipss->ipsec_nalgs[IPSEC_ALG_ENCR]; 235 rw_exit(&ipss->ipsec_alg_lock); 236 237 netstack_rele(ns); 238 return (0); 239 } 240 241 #ifdef DEBUG 242 /* 243 * Debug routine, useful to see pre-encryption data. 244 */ 245 static char * 246 dump_msg(mblk_t *mp) 247 { 248 char tmp_str[3], tmp_line[256]; 249 250 while (mp != NULL) { 251 unsigned char *ptr; 252 253 printf("mblk address 0x%p, length %ld, db_ref %d " 254 "type %d, base 0x%p, lim 0x%p\n", 255 (void *) mp, (long)(mp->b_wptr - mp->b_rptr), 256 mp->b_datap->db_ref, mp->b_datap->db_type, 257 (void *)mp->b_datap->db_base, (void *)mp->b_datap->db_lim); 258 ptr = mp->b_rptr; 259 260 tmp_line[0] = '\0'; 261 while (ptr < mp->b_wptr) { 262 uint_t diff; 263 264 diff = (ptr - mp->b_rptr); 265 if (!(diff & 0x1f)) { 266 if (strlen(tmp_line) > 0) { 267 printf("bytes: %s\n", tmp_line); 268 tmp_line[0] = '\0'; 269 } 270 } 271 if (!(diff & 0x3)) 272 (void) strcat(tmp_line, " "); 273 (void) sprintf(tmp_str, "%02x", *ptr); 274 (void) strcat(tmp_line, tmp_str); 275 ptr++; 276 } 277 if (strlen(tmp_line) > 0) 278 printf("bytes: %s\n", tmp_line); 279 280 mp = mp->b_cont; 281 } 282 283 return ("\n"); 284 } 285 286 #else /* DEBUG */ 287 static char * 288 dump_msg(mblk_t *mp) 289 { 290 printf("Find value of mp %p.\n", mp); 291 return ("\n"); 292 } 293 #endif /* DEBUG */ 294 295 /* 296 * Don't have to lock age_interval, as only one thread will access it at 297 * a time, because I control the one function that does with timeout(). 298 */ 299 static void 300 esp_ager(void *arg) 301 { 302 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)arg; 303 netstack_t *ns = espstack->ipsecesp_netstack; 304 hrtime_t begin = gethrtime(); 305 306 sadb_ager(&espstack->esp_sadb.s_v4, espstack->esp_pfkey_q, 307 espstack->ipsecesp_reap_delay, ns); 308 sadb_ager(&espstack->esp_sadb.s_v6, espstack->esp_pfkey_q, 309 espstack->ipsecesp_reap_delay, ns); 310 311 espstack->esp_event = sadb_retimeout(begin, espstack->esp_pfkey_q, 312 esp_ager, espstack, 313 &espstack->ipsecesp_age_interval, espstack->ipsecesp_age_int_max, 314 info.mi_idnum); 315 } 316 317 /* 318 * Get an ESP NDD parameter. 319 */ 320 /* ARGSUSED */ 321 static int 322 ipsecesp_param_get( 323 queue_t *q, 324 mblk_t *mp, 325 caddr_t cp, 326 cred_t *cr) 327 { 328 ipsecespparam_t *ipsecesppa = (ipsecespparam_t *)cp; 329 uint_t value; 330 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; 331 332 mutex_enter(&espstack->ipsecesp_param_lock); 333 value = ipsecesppa->ipsecesp_param_value; 334 mutex_exit(&espstack->ipsecesp_param_lock); 335 336 (void) mi_mpprintf(mp, "%u", value); 337 return (0); 338 } 339 340 /* 341 * This routine sets an NDD variable in a ipsecespparam_t structure. 342 */ 343 /* ARGSUSED */ 344 static int 345 ipsecesp_param_set( 346 queue_t *q, 347 mblk_t *mp, 348 char *value, 349 caddr_t cp, 350 cred_t *cr) 351 { 352 ulong_t new_value; 353 ipsecespparam_t *ipsecesppa = (ipsecespparam_t *)cp; 354 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; 355 356 /* 357 * Fail the request if the new value does not lie within the 358 * required bounds. 359 */ 360 if (ddi_strtoul(value, NULL, 10, &new_value) != 0 || 361 new_value < ipsecesppa->ipsecesp_param_min || 362 new_value > ipsecesppa->ipsecesp_param_max) { 363 return (EINVAL); 364 } 365 366 /* Set the new value */ 367 mutex_enter(&espstack->ipsecesp_param_lock); 368 ipsecesppa->ipsecesp_param_value = new_value; 369 mutex_exit(&espstack->ipsecesp_param_lock); 370 return (0); 371 } 372 373 /* 374 * Using lifetime NDD variables, fill in an extended combination's 375 * lifetime information. 376 */ 377 void 378 ipsecesp_fill_defs(sadb_x_ecomb_t *ecomb, netstack_t *ns) 379 { 380 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 381 382 ecomb->sadb_x_ecomb_soft_bytes = espstack->ipsecesp_default_soft_bytes; 383 ecomb->sadb_x_ecomb_hard_bytes = espstack->ipsecesp_default_hard_bytes; 384 ecomb->sadb_x_ecomb_soft_addtime = 385 espstack->ipsecesp_default_soft_addtime; 386 ecomb->sadb_x_ecomb_hard_addtime = 387 espstack->ipsecesp_default_hard_addtime; 388 ecomb->sadb_x_ecomb_soft_usetime = 389 espstack->ipsecesp_default_soft_usetime; 390 ecomb->sadb_x_ecomb_hard_usetime = 391 espstack->ipsecesp_default_hard_usetime; 392 } 393 394 /* 395 * Initialize things for ESP at module load time. 396 */ 397 boolean_t 398 ipsecesp_ddi_init(void) 399 { 400 esp_taskq = taskq_create("esp_taskq", 1, minclsyspri, 401 IPSEC_TASKQ_MIN, IPSEC_TASKQ_MAX, 0); 402 403 /* 404 * We want to be informed each time a stack is created or 405 * destroyed in the kernel, so we can maintain the 406 * set of ipsecesp_stack_t's. 407 */ 408 netstack_register(NS_IPSECESP, ipsecesp_stack_init, NULL, 409 ipsecesp_stack_fini); 410 411 return (B_TRUE); 412 } 413 414 /* 415 * Walk through the param array specified registering each element with the 416 * named dispatch handler. 417 */ 418 static boolean_t 419 ipsecesp_param_register(IDP *ndp, ipsecespparam_t *espp, int cnt) 420 { 421 for (; cnt-- > 0; espp++) { 422 if (espp->ipsecesp_param_name != NULL && 423 espp->ipsecesp_param_name[0]) { 424 if (!nd_load(ndp, 425 espp->ipsecesp_param_name, 426 ipsecesp_param_get, ipsecesp_param_set, 427 (caddr_t)espp)) { 428 nd_free(ndp); 429 return (B_FALSE); 430 } 431 } 432 } 433 return (B_TRUE); 434 } 435 436 /* 437 * Initialize things for ESP for each stack instance 438 */ 439 static void * 440 ipsecesp_stack_init(netstackid_t stackid, netstack_t *ns) 441 { 442 ipsecesp_stack_t *espstack; 443 ipsecespparam_t *espp; 444 445 espstack = (ipsecesp_stack_t *)kmem_zalloc(sizeof (*espstack), 446 KM_SLEEP); 447 espstack->ipsecesp_netstack = ns; 448 449 espp = (ipsecespparam_t *)kmem_alloc(sizeof (lcl_param_arr), KM_SLEEP); 450 espstack->ipsecesp_params = espp; 451 bcopy(lcl_param_arr, espp, sizeof (lcl_param_arr)); 452 453 (void) ipsecesp_param_register(&espstack->ipsecesp_g_nd, espp, 454 A_CNT(lcl_param_arr)); 455 456 (void) esp_kstat_init(espstack, stackid); 457 458 espstack->esp_sadb.s_acquire_timeout = 459 &espstack->ipsecesp_acquire_timeout; 460 sadbp_init("ESP", &espstack->esp_sadb, SADB_SATYPE_ESP, esp_hash_size, 461 espstack->ipsecesp_netstack); 462 463 mutex_init(&espstack->ipsecesp_param_lock, NULL, MUTEX_DEFAULT, 0); 464 465 ip_drop_register(&espstack->esp_dropper, "IPsec ESP"); 466 return (espstack); 467 } 468 469 /* 470 * Destroy things for ESP at module unload time. 471 */ 472 void 473 ipsecesp_ddi_destroy(void) 474 { 475 netstack_unregister(NS_IPSECESP); 476 taskq_destroy(esp_taskq); 477 } 478 479 /* 480 * Destroy things for ESP for one stack instance 481 */ 482 static void 483 ipsecesp_stack_fini(netstackid_t stackid, void *arg) 484 { 485 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)arg; 486 487 if (espstack->esp_pfkey_q != NULL) { 488 (void) quntimeout(espstack->esp_pfkey_q, espstack->esp_event); 489 } 490 espstack->esp_sadb.s_acquire_timeout = NULL; 491 sadbp_destroy(&espstack->esp_sadb, espstack->ipsecesp_netstack); 492 ip_drop_unregister(&espstack->esp_dropper); 493 mutex_destroy(&espstack->ipsecesp_param_lock); 494 nd_free(&espstack->ipsecesp_g_nd); 495 496 kmem_free(espstack->ipsecesp_params, sizeof (lcl_param_arr)); 497 espstack->ipsecesp_params = NULL; 498 kstat_delete_netstack(espstack->esp_ksp, stackid); 499 espstack->esp_ksp = NULL; 500 espstack->esp_kstats = NULL; 501 kmem_free(espstack, sizeof (*espstack)); 502 } 503 504 /* 505 * ESP module open routine, which is here for keysock plumbing. 506 * Keysock is pushed over {AH,ESP} which is an artifact from the Bad Old 507 * Days of export control, and fears that ESP would not be allowed 508 * to be shipped at all by default. Eventually, keysock should 509 * either access AH and ESP via modstubs or krtld dependencies, or 510 * perhaps be folded in with AH and ESP into a single IPsec/netsec 511 * module ("netsec" if PF_KEY provides more than AH/ESP keying tables). 512 */ 513 /* ARGSUSED */ 514 static int 515 ipsecesp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 516 { 517 netstack_t *ns; 518 ipsecesp_stack_t *espstack; 519 520 if (secpolicy_ip_config(credp, B_FALSE) != 0) 521 return (EPERM); 522 523 if (q->q_ptr != NULL) 524 return (0); /* Re-open of an already open instance. */ 525 526 if (sflag != MODOPEN) 527 return (EINVAL); 528 529 ns = netstack_find_by_cred(credp); 530 ASSERT(ns != NULL); 531 espstack = ns->netstack_ipsecesp; 532 ASSERT(espstack != NULL); 533 534 q->q_ptr = espstack; 535 WR(q)->q_ptr = q->q_ptr; 536 537 qprocson(q); 538 return (0); 539 } 540 541 /* 542 * ESP module close routine. 543 */ 544 /* ARGSUSED */ 545 static int 546 ipsecesp_close(queue_t *q, int flags __unused, cred_t *credp __unused) 547 { 548 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; 549 550 /* 551 * Clean up q_ptr, if needed. 552 */ 553 qprocsoff(q); 554 555 /* Keysock queue check is safe, because of OCEXCL perimeter. */ 556 557 if (q == espstack->esp_pfkey_q) { 558 esp1dbg(espstack, 559 ("ipsecesp_close: Ummm... keysock is closing ESP.\n")); 560 espstack->esp_pfkey_q = NULL; 561 /* Detach qtimeouts. */ 562 (void) quntimeout(q, espstack->esp_event); 563 } 564 565 netstack_rele(espstack->ipsecesp_netstack); 566 return (0); 567 } 568 569 /* 570 * Add a number of bytes to what the SA has protected so far. Return 571 * B_TRUE if the SA can still protect that many bytes. 572 * 573 * Caller must REFRELE the passed-in assoc. This function must REFRELE 574 * any obtained peer SA. 575 */ 576 static boolean_t 577 esp_age_bytes(ipsa_t *assoc, uint64_t bytes, boolean_t inbound) 578 { 579 ipsa_t *inassoc, *outassoc; 580 isaf_t *bucket; 581 boolean_t inrc, outrc, isv6; 582 sadb_t *sp; 583 int outhash; 584 netstack_t *ns = assoc->ipsa_netstack; 585 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 586 587 /* No peer? No problem! */ 588 if (!assoc->ipsa_haspeer) { 589 return (sadb_age_bytes(espstack->esp_pfkey_q, assoc, bytes, 590 B_TRUE)); 591 } 592 593 /* 594 * Otherwise, we want to grab both the original assoc and its peer. 595 * There might be a race for this, but if it's a real race, two 596 * expire messages may occur. We limit this by only sending the 597 * expire message on one of the peers, we'll pick the inbound 598 * arbitrarily. 599 * 600 * If we need tight synchronization on the peer SA, then we need to 601 * reconsider. 602 */ 603 604 /* Use address length to select IPv6/IPv4 */ 605 isv6 = (assoc->ipsa_addrfam == AF_INET6); 606 sp = isv6 ? &espstack->esp_sadb.s_v6 : &espstack->esp_sadb.s_v4; 607 608 if (inbound) { 609 inassoc = assoc; 610 if (isv6) { 611 outhash = OUTBOUND_HASH_V6(sp, *((in6_addr_t *) 612 &inassoc->ipsa_dstaddr)); 613 } else { 614 outhash = OUTBOUND_HASH_V4(sp, *((ipaddr_t *) 615 &inassoc->ipsa_dstaddr)); 616 } 617 bucket = &sp->sdb_of[outhash]; 618 mutex_enter(&bucket->isaf_lock); 619 outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi, 620 inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr, 621 inassoc->ipsa_addrfam); 622 mutex_exit(&bucket->isaf_lock); 623 if (outassoc == NULL) { 624 /* Q: Do we wish to set haspeer == B_FALSE? */ 625 esp0dbg(("esp_age_bytes: " 626 "can't find peer for inbound.\n")); 627 return (sadb_age_bytes(espstack->esp_pfkey_q, inassoc, 628 bytes, B_TRUE)); 629 } 630 } else { 631 outassoc = assoc; 632 bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi); 633 mutex_enter(&bucket->isaf_lock); 634 inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi, 635 outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr, 636 outassoc->ipsa_addrfam); 637 mutex_exit(&bucket->isaf_lock); 638 if (inassoc == NULL) { 639 /* Q: Do we wish to set haspeer == B_FALSE? */ 640 esp0dbg(("esp_age_bytes: " 641 "can't find peer for outbound.\n")); 642 return (sadb_age_bytes(espstack->esp_pfkey_q, outassoc, 643 bytes, B_TRUE)); 644 } 645 } 646 647 inrc = sadb_age_bytes(espstack->esp_pfkey_q, inassoc, bytes, B_TRUE); 648 outrc = sadb_age_bytes(espstack->esp_pfkey_q, outassoc, bytes, B_FALSE); 649 650 /* 651 * REFRELE any peer SA. 652 * 653 * Because of the multi-line macro nature of IPSA_REFRELE, keep 654 * them in { }. 655 */ 656 if (inbound) { 657 IPSA_REFRELE(outassoc); 658 } else { 659 IPSA_REFRELE(inassoc); 660 } 661 662 return (inrc && outrc); 663 } 664 665 /* 666 * Do incoming NAT-T manipulations for packet. 667 * Returns NULL if the mblk chain is consumed. 668 */ 669 static mblk_t * 670 esp_fix_natt_checksums(mblk_t *data_mp, ipsa_t *assoc) 671 { 672 ipha_t *ipha = (ipha_t *)data_mp->b_rptr; 673 tcpha_t *tcpha; 674 udpha_t *udpha; 675 /* Initialize to our inbound cksum adjustment... */ 676 uint32_t sum = assoc->ipsa_inbound_cksum; 677 678 switch (ipha->ipha_protocol) { 679 case IPPROTO_TCP: 680 tcpha = (tcpha_t *)(data_mp->b_rptr + 681 IPH_HDR_LENGTH(ipha)); 682 683 #define DOWN_SUM(x) (x) = ((x) & 0xFFFF) + ((x) >> 16) 684 sum += ~ntohs(tcpha->tha_sum) & 0xFFFF; 685 DOWN_SUM(sum); 686 DOWN_SUM(sum); 687 tcpha->tha_sum = ~htons(sum); 688 break; 689 case IPPROTO_UDP: 690 udpha = (udpha_t *)(data_mp->b_rptr + IPH_HDR_LENGTH(ipha)); 691 692 if (udpha->uha_checksum != 0) { 693 /* Adujst if the inbound one was not zero. */ 694 sum += ~ntohs(udpha->uha_checksum) & 0xFFFF; 695 DOWN_SUM(sum); 696 DOWN_SUM(sum); 697 udpha->uha_checksum = ~htons(sum); 698 if (udpha->uha_checksum == 0) 699 udpha->uha_checksum = 0xFFFF; 700 } 701 #undef DOWN_SUM 702 break; 703 case IPPROTO_IP: 704 /* 705 * This case is only an issue for self-encapsulated 706 * packets. So for now, fall through. 707 */ 708 break; 709 } 710 return (data_mp); 711 } 712 713 714 /* 715 * Strip ESP header, check padding, and fix IP header. 716 * Returns B_TRUE on success, B_FALSE if an error occured. 717 */ 718 static boolean_t 719 esp_strip_header(mblk_t *data_mp, boolean_t isv4, uint32_t ivlen, 720 kstat_named_t **counter, ipsecesp_stack_t *espstack) 721 { 722 ipha_t *ipha; 723 ip6_t *ip6h; 724 uint_t divpoint; 725 mblk_t *scratch; 726 uint8_t nexthdr, padlen; 727 uint8_t lastpad; 728 ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec; 729 uint8_t *lastbyte; 730 731 /* 732 * Strip ESP data and fix IP header. 733 * 734 * XXX In case the beginning of esp_inbound() changes to not do a 735 * pullup, this part of the code can remain unchanged. 736 */ 737 if (isv4) { 738 ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ipha_t)); 739 ipha = (ipha_t *)data_mp->b_rptr; 740 ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (esph_t) + 741 IPH_HDR_LENGTH(ipha)); 742 divpoint = IPH_HDR_LENGTH(ipha); 743 } else { 744 ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ip6_t)); 745 ip6h = (ip6_t *)data_mp->b_rptr; 746 divpoint = ip_hdr_length_v6(data_mp, ip6h); 747 } 748 749 scratch = data_mp; 750 while (scratch->b_cont != NULL) 751 scratch = scratch->b_cont; 752 753 ASSERT((scratch->b_wptr - scratch->b_rptr) >= 3); 754 755 /* 756 * "Next header" and padding length are the last two bytes in the 757 * ESP-protected datagram, thus the explicit - 1 and - 2. 758 * lastpad is the last byte of the padding, which can be used for 759 * a quick check to see if the padding is correct. 760 */ 761 lastbyte = scratch->b_wptr - 1; 762 nexthdr = *lastbyte--; 763 padlen = *lastbyte--; 764 765 if (isv4) { 766 /* Fix part of the IP header. */ 767 ipha->ipha_protocol = nexthdr; 768 /* 769 * Reality check the padlen. The explicit - 2 is for the 770 * padding length and the next-header bytes. 771 */ 772 if (padlen >= ntohs(ipha->ipha_length) - sizeof (ipha_t) - 2 - 773 sizeof (esph_t) - ivlen) { 774 ESP_BUMP_STAT(espstack, bad_decrypt); 775 ipsec_rl_strlog(espstack->ipsecesp_netstack, 776 info.mi_idnum, 0, 0, 777 SL_ERROR | SL_WARN, 778 "Corrupt ESP packet (padlen too big).\n"); 779 esp1dbg(espstack, ("padlen (%d) is greater than:\n", 780 padlen)); 781 esp1dbg(espstack, ("pkt len(%d) - ip hdr - esp " 782 "hdr - ivlen(%d) = %d.\n", 783 ntohs(ipha->ipha_length), ivlen, 784 (int)(ntohs(ipha->ipha_length) - sizeof (ipha_t) - 785 2 - sizeof (esph_t) - ivlen))); 786 *counter = DROPPER(ipss, ipds_esp_bad_padlen); 787 return (B_FALSE); 788 } 789 790 /* 791 * Fix the rest of the header. The explicit - 2 is for the 792 * padding length and the next-header bytes. 793 */ 794 ipha->ipha_length = htons(ntohs(ipha->ipha_length) - padlen - 795 2 - sizeof (esph_t) - ivlen); 796 ipha->ipha_hdr_checksum = 0; 797 ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha); 798 } else { 799 if (ip6h->ip6_nxt == IPPROTO_ESP) { 800 ip6h->ip6_nxt = nexthdr; 801 } else { 802 ip_pkt_t ipp; 803 804 bzero(&ipp, sizeof (ipp)); 805 (void) ip_find_hdr_v6(data_mp, ip6h, B_FALSE, &ipp, 806 NULL); 807 if (ipp.ipp_dstopts != NULL) { 808 ipp.ipp_dstopts->ip6d_nxt = nexthdr; 809 } else if (ipp.ipp_rthdr != NULL) { 810 ipp.ipp_rthdr->ip6r_nxt = nexthdr; 811 } else if (ipp.ipp_hopopts != NULL) { 812 ipp.ipp_hopopts->ip6h_nxt = nexthdr; 813 } else { 814 /* Panic a DEBUG kernel. */ 815 ASSERT(ipp.ipp_hopopts != NULL); 816 /* Otherwise, pretend it's IP + ESP. */ 817 cmn_err(CE_WARN, "ESP IPv6 headers wrong.\n"); 818 ip6h->ip6_nxt = nexthdr; 819 } 820 } 821 822 if (padlen >= ntohs(ip6h->ip6_plen) - 2 - sizeof (esph_t) - 823 ivlen) { 824 ESP_BUMP_STAT(espstack, bad_decrypt); 825 ipsec_rl_strlog(espstack->ipsecesp_netstack, 826 info.mi_idnum, 0, 0, 827 SL_ERROR | SL_WARN, 828 "Corrupt ESP packet (v6 padlen too big).\n"); 829 esp1dbg(espstack, ("padlen (%d) is greater than:\n", 830 padlen)); 831 esp1dbg(espstack, 832 ("pkt len(%u) - ip hdr - esp hdr - ivlen(%d) = " 833 "%u.\n", (unsigned)(ntohs(ip6h->ip6_plen) 834 + sizeof (ip6_t)), ivlen, 835 (unsigned)(ntohs(ip6h->ip6_plen) - 2 - 836 sizeof (esph_t) - ivlen))); 837 *counter = DROPPER(ipss, ipds_esp_bad_padlen); 838 return (B_FALSE); 839 } 840 841 842 /* 843 * Fix the rest of the header. The explicit - 2 is for the 844 * padding length and the next-header bytes. IPv6 is nice, 845 * because there's no hdr checksum! 846 */ 847 ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - padlen - 848 2 - sizeof (esph_t) - ivlen); 849 } 850 851 if (espstack->ipsecesp_padding_check > 0 && padlen > 0) { 852 /* 853 * Weak padding check: compare last-byte to length, they 854 * should be equal. 855 */ 856 lastpad = *lastbyte--; 857 858 if (padlen != lastpad) { 859 ipsec_rl_strlog(espstack->ipsecesp_netstack, 860 info.mi_idnum, 0, 0, SL_ERROR | SL_WARN, 861 "Corrupt ESP packet (lastpad != padlen).\n"); 862 esp1dbg(espstack, 863 ("lastpad (%d) not equal to padlen (%d):\n", 864 lastpad, padlen)); 865 ESP_BUMP_STAT(espstack, bad_padding); 866 *counter = DROPPER(ipss, ipds_esp_bad_padding); 867 return (B_FALSE); 868 } 869 870 /* 871 * Strong padding check: Check all pad bytes to see that 872 * they're ascending. Go backwards using a descending counter 873 * to verify. padlen == 1 is checked by previous block, so 874 * only bother if we've more than 1 byte of padding. 875 * Consequently, start the check one byte before the location 876 * of "lastpad". 877 */ 878 if (espstack->ipsecesp_padding_check > 1) { 879 /* 880 * This assert may have to become an if and a pullup 881 * if we start accepting multi-dblk mblks. For now, 882 * though, any packet here will have been pulled up in 883 * esp_inbound. 884 */ 885 ASSERT(MBLKL(scratch) >= lastpad + 3); 886 887 /* 888 * Use "--lastpad" because we already checked the very 889 * last pad byte previously. 890 */ 891 while (--lastpad != 0) { 892 if (lastpad != *lastbyte) { 893 ipsec_rl_strlog( 894 espstack->ipsecesp_netstack, 895 info.mi_idnum, 0, 0, 896 SL_ERROR | SL_WARN, "Corrupt ESP " 897 "packet (bad padding).\n"); 898 esp1dbg(espstack, 899 ("padding not in correct" 900 " format:\n")); 901 ESP_BUMP_STAT(espstack, bad_padding); 902 *counter = DROPPER(ipss, 903 ipds_esp_bad_padding); 904 return (B_FALSE); 905 } 906 lastbyte--; 907 } 908 } 909 } 910 911 /* Trim off the padding. */ 912 ASSERT(data_mp->b_cont == NULL); 913 data_mp->b_wptr -= (padlen + 2); 914 915 /* 916 * Remove the ESP header. 917 * 918 * The above assertions about data_mp's size will make this work. 919 * 920 * XXX Question: If I send up and get back a contiguous mblk, 921 * would it be quicker to bcopy over, or keep doing the dupb stuff? 922 * I go with copying for now. 923 */ 924 925 if (IS_P2ALIGNED(data_mp->b_rptr, sizeof (uint32_t)) && 926 IS_P2ALIGNED(ivlen, sizeof (uint32_t))) { 927 uint8_t *start = data_mp->b_rptr; 928 uint32_t *src, *dst; 929 930 src = (uint32_t *)(start + divpoint); 931 dst = (uint32_t *)(start + divpoint + sizeof (esph_t) + ivlen); 932 933 ASSERT(IS_P2ALIGNED(dst, sizeof (uint32_t)) && 934 IS_P2ALIGNED(src, sizeof (uint32_t))); 935 936 do { 937 src--; 938 dst--; 939 *dst = *src; 940 } while (src != (uint32_t *)start); 941 942 data_mp->b_rptr = (uchar_t *)dst; 943 } else { 944 uint8_t *start = data_mp->b_rptr; 945 uint8_t *src, *dst; 946 947 src = start + divpoint; 948 dst = src + sizeof (esph_t) + ivlen; 949 950 do { 951 src--; 952 dst--; 953 *dst = *src; 954 } while (src != start); 955 956 data_mp->b_rptr = dst; 957 } 958 959 esp2dbg(espstack, ("data_mp after inbound ESP adjustment:\n")); 960 esp2dbg(espstack, (dump_msg(data_mp))); 961 962 return (B_TRUE); 963 } 964 965 /* 966 * Updating use times can be tricky business if the ipsa_haspeer flag is 967 * set. This function is called once in an SA's lifetime. 968 * 969 * Caller has to REFRELE "assoc" which is passed in. This function has 970 * to REFRELE any peer SA that is obtained. 971 */ 972 static void 973 esp_set_usetime(ipsa_t *assoc, boolean_t inbound) 974 { 975 ipsa_t *inassoc, *outassoc; 976 isaf_t *bucket; 977 sadb_t *sp; 978 int outhash; 979 boolean_t isv6; 980 netstack_t *ns = assoc->ipsa_netstack; 981 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 982 983 /* No peer? No problem! */ 984 if (!assoc->ipsa_haspeer) { 985 sadb_set_usetime(assoc); 986 return; 987 } 988 989 /* 990 * Otherwise, we want to grab both the original assoc and its peer. 991 * There might be a race for this, but if it's a real race, the times 992 * will be out-of-synch by at most a second, and since our time 993 * granularity is a second, this won't be a problem. 994 * 995 * If we need tight synchronization on the peer SA, then we need to 996 * reconsider. 997 */ 998 999 /* Use address length to select IPv6/IPv4 */ 1000 isv6 = (assoc->ipsa_addrfam == AF_INET6); 1001 sp = isv6 ? &espstack->esp_sadb.s_v6 : &espstack->esp_sadb.s_v4; 1002 1003 if (inbound) { 1004 inassoc = assoc; 1005 if (isv6) { 1006 outhash = OUTBOUND_HASH_V6(sp, *((in6_addr_t *) 1007 &inassoc->ipsa_dstaddr)); 1008 } else { 1009 outhash = OUTBOUND_HASH_V4(sp, *((ipaddr_t *) 1010 &inassoc->ipsa_dstaddr)); 1011 } 1012 bucket = &sp->sdb_of[outhash]; 1013 mutex_enter(&bucket->isaf_lock); 1014 outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi, 1015 inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr, 1016 inassoc->ipsa_addrfam); 1017 mutex_exit(&bucket->isaf_lock); 1018 if (outassoc == NULL) { 1019 /* Q: Do we wish to set haspeer == B_FALSE? */ 1020 esp0dbg(("esp_set_usetime: " 1021 "can't find peer for inbound.\n")); 1022 sadb_set_usetime(inassoc); 1023 return; 1024 } 1025 } else { 1026 outassoc = assoc; 1027 bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi); 1028 mutex_enter(&bucket->isaf_lock); 1029 inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi, 1030 outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr, 1031 outassoc->ipsa_addrfam); 1032 mutex_exit(&bucket->isaf_lock); 1033 if (inassoc == NULL) { 1034 /* Q: Do we wish to set haspeer == B_FALSE? */ 1035 esp0dbg(("esp_set_usetime: " 1036 "can't find peer for outbound.\n")); 1037 sadb_set_usetime(outassoc); 1038 return; 1039 } 1040 } 1041 1042 /* Update usetime on both. */ 1043 sadb_set_usetime(inassoc); 1044 sadb_set_usetime(outassoc); 1045 1046 /* 1047 * REFRELE any peer SA. 1048 * 1049 * Because of the multi-line macro nature of IPSA_REFRELE, keep 1050 * them in { }. 1051 */ 1052 if (inbound) { 1053 IPSA_REFRELE(outassoc); 1054 } else { 1055 IPSA_REFRELE(inassoc); 1056 } 1057 } 1058 1059 /* 1060 * Handle ESP inbound data for IPv4 and IPv6. 1061 * On success returns B_TRUE, on failure returns B_FALSE and frees the 1062 * mblk chain data_mp. 1063 */ 1064 mblk_t * 1065 esp_inbound(mblk_t *data_mp, void *arg, ip_recv_attr_t *ira) 1066 { 1067 esph_t *esph = (esph_t *)arg; 1068 ipsa_t *ipsa = ira->ira_ipsec_esp_sa; 1069 netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack; 1070 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 1071 ipsec_stack_t *ipss = ns->netstack_ipsec; 1072 1073 /* 1074 * We may wish to check replay in-range-only here as an optimization. 1075 * Include the reality check of ipsa->ipsa_replay > 1076 * ipsa->ipsa_replay_wsize for times when it's the first N packets, 1077 * where N == ipsa->ipsa_replay_wsize. 1078 * 1079 * Another check that may come here later is the "collision" check. 1080 * If legitimate packets flow quickly enough, this won't be a problem, 1081 * but collisions may cause authentication algorithm crunching to 1082 * take place when it doesn't need to. 1083 */ 1084 if (!sadb_replay_peek(ipsa, esph->esph_replay)) { 1085 ESP_BUMP_STAT(espstack, replay_early_failures); 1086 IP_ESP_BUMP_STAT(ipss, in_discards); 1087 ip_drop_packet(data_mp, B_TRUE, ira->ira_ill, 1088 DROPPER(ipss, ipds_esp_early_replay), 1089 &espstack->esp_dropper); 1090 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards); 1091 return (NULL); 1092 } 1093 1094 /* 1095 * Adjust the IP header's payload length to reflect the removal 1096 * of the ICV. 1097 */ 1098 if (!(ira->ira_flags & IRAF_IS_IPV4)) { 1099 ip6_t *ip6h = (ip6_t *)data_mp->b_rptr; 1100 ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - 1101 ipsa->ipsa_mac_len); 1102 } else { 1103 ipha_t *ipha = (ipha_t *)data_mp->b_rptr; 1104 ipha->ipha_length = htons(ntohs(ipha->ipha_length) - 1105 ipsa->ipsa_mac_len); 1106 } 1107 1108 /* submit the request to the crypto framework */ 1109 return (esp_submit_req_inbound(data_mp, ira, ipsa, 1110 (uint8_t *)esph - data_mp->b_rptr)); 1111 } 1112 1113 /* XXX refactor me */ 1114 /* 1115 * Handle the SADB_GETSPI message. Create a larval SA. 1116 */ 1117 static void 1118 esp_getspi(mblk_t *mp, keysock_in_t *ksi, ipsecesp_stack_t *espstack) 1119 { 1120 ipsa_t *newbie, *target; 1121 isaf_t *outbound, *inbound; 1122 int rc, diagnostic; 1123 sadb_sa_t *assoc; 1124 keysock_out_t *kso; 1125 uint32_t newspi; 1126 1127 /* 1128 * Randomly generate a proposed SPI value 1129 */ 1130 if (cl_inet_getspi != NULL) { 1131 cl_inet_getspi(espstack->ipsecesp_netstack->netstack_stackid, 1132 IPPROTO_ESP, (uint8_t *)&newspi, sizeof (uint32_t), NULL); 1133 } else { 1134 (void) random_get_pseudo_bytes((uint8_t *)&newspi, 1135 sizeof (uint32_t)); 1136 } 1137 newbie = sadb_getspi(ksi, newspi, &diagnostic, 1138 espstack->ipsecesp_netstack, IPPROTO_ESP); 1139 1140 if (newbie == NULL) { 1141 sadb_pfkey_error(espstack->esp_pfkey_q, mp, ENOMEM, diagnostic, 1142 ksi->ks_in_serial); 1143 return; 1144 } else if (newbie == (ipsa_t *)-1) { 1145 sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL, diagnostic, 1146 ksi->ks_in_serial); 1147 return; 1148 } 1149 1150 /* 1151 * XXX - We may randomly collide. We really should recover from this. 1152 * Unfortunately, that could require spending way-too-much-time 1153 * in here. For now, let the user retry. 1154 */ 1155 1156 if (newbie->ipsa_addrfam == AF_INET6) { 1157 outbound = OUTBOUND_BUCKET_V6(&espstack->esp_sadb.s_v6, 1158 *(uint32_t *)(newbie->ipsa_dstaddr)); 1159 inbound = INBOUND_BUCKET(&espstack->esp_sadb.s_v6, 1160 newbie->ipsa_spi); 1161 } else { 1162 ASSERT(newbie->ipsa_addrfam == AF_INET); 1163 outbound = OUTBOUND_BUCKET_V4(&espstack->esp_sadb.s_v4, 1164 *(uint32_t *)(newbie->ipsa_dstaddr)); 1165 inbound = INBOUND_BUCKET(&espstack->esp_sadb.s_v4, 1166 newbie->ipsa_spi); 1167 } 1168 1169 mutex_enter(&outbound->isaf_lock); 1170 mutex_enter(&inbound->isaf_lock); 1171 1172 /* 1173 * Check for collisions (i.e. did sadb_getspi() return with something 1174 * that already exists?). 1175 * 1176 * Try outbound first. Even though SADB_GETSPI is traditionally 1177 * for inbound SAs, you never know what a user might do. 1178 */ 1179 target = ipsec_getassocbyspi(outbound, newbie->ipsa_spi, 1180 newbie->ipsa_srcaddr, newbie->ipsa_dstaddr, newbie->ipsa_addrfam); 1181 if (target == NULL) { 1182 target = ipsec_getassocbyspi(inbound, newbie->ipsa_spi, 1183 newbie->ipsa_srcaddr, newbie->ipsa_dstaddr, 1184 newbie->ipsa_addrfam); 1185 } 1186 1187 /* 1188 * I don't have collisions elsewhere! 1189 * (Nor will I because I'm still holding inbound/outbound locks.) 1190 */ 1191 1192 if (target != NULL) { 1193 rc = EEXIST; 1194 IPSA_REFRELE(target); 1195 } else { 1196 /* 1197 * sadb_insertassoc() also checks for collisions, so 1198 * if there's a colliding entry, rc will be set 1199 * to EEXIST. 1200 */ 1201 rc = sadb_insertassoc(newbie, inbound); 1202 newbie->ipsa_hardexpiretime = gethrestime_sec(); 1203 newbie->ipsa_hardexpiretime += 1204 espstack->ipsecesp_larval_timeout; 1205 } 1206 1207 /* 1208 * Can exit outbound mutex. Hold inbound until we're done 1209 * with newbie. 1210 */ 1211 mutex_exit(&outbound->isaf_lock); 1212 1213 if (rc != 0) { 1214 mutex_exit(&inbound->isaf_lock); 1215 IPSA_REFRELE(newbie); 1216 sadb_pfkey_error(espstack->esp_pfkey_q, mp, rc, 1217 SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial); 1218 return; 1219 } 1220 1221 1222 /* Can write here because I'm still holding the bucket lock. */ 1223 newbie->ipsa_type = SADB_SATYPE_ESP; 1224 1225 /* 1226 * Construct successful return message. We have one thing going 1227 * for us in PF_KEY v2. That's the fact that 1228 * sizeof (sadb_spirange_t) == sizeof (sadb_sa_t) 1229 */ 1230 assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SPIRANGE]; 1231 assoc->sadb_sa_exttype = SADB_EXT_SA; 1232 assoc->sadb_sa_spi = newbie->ipsa_spi; 1233 *((uint64_t *)(&assoc->sadb_sa_replay)) = 0; 1234 mutex_exit(&inbound->isaf_lock); 1235 1236 /* Convert KEYSOCK_IN to KEYSOCK_OUT. */ 1237 kso = (keysock_out_t *)ksi; 1238 kso->ks_out_len = sizeof (*kso); 1239 kso->ks_out_serial = ksi->ks_in_serial; 1240 kso->ks_out_type = KEYSOCK_OUT; 1241 1242 /* 1243 * Can safely putnext() to esp_pfkey_q, because this is a turnaround 1244 * from the esp_pfkey_q. 1245 */ 1246 putnext(espstack->esp_pfkey_q, mp); 1247 } 1248 1249 /* 1250 * Insert the ESP header into a packet. Duplicate an mblk, and insert a newly 1251 * allocated mblk with the ESP header in between the two. 1252 */ 1253 static boolean_t 1254 esp_insert_esp(mblk_t *mp, mblk_t *esp_mp, uint_t divpoint, 1255 ipsecesp_stack_t *espstack) 1256 { 1257 mblk_t *split_mp = mp; 1258 uint_t wheretodiv = divpoint; 1259 1260 while ((split_mp->b_wptr - split_mp->b_rptr) < wheretodiv) { 1261 wheretodiv -= (split_mp->b_wptr - split_mp->b_rptr); 1262 split_mp = split_mp->b_cont; 1263 ASSERT(split_mp != NULL); 1264 } 1265 1266 if (split_mp->b_wptr - split_mp->b_rptr != wheretodiv) { 1267 mblk_t *scratch; 1268 1269 /* "scratch" is the 2nd half, split_mp is the first. */ 1270 scratch = dupb(split_mp); 1271 if (scratch == NULL) { 1272 esp1dbg(espstack, 1273 ("esp_insert_esp: can't allocate scratch.\n")); 1274 return (B_FALSE); 1275 } 1276 /* NOTE: dupb() doesn't set b_cont appropriately. */ 1277 scratch->b_cont = split_mp->b_cont; 1278 scratch->b_rptr += wheretodiv; 1279 split_mp->b_wptr = split_mp->b_rptr + wheretodiv; 1280 split_mp->b_cont = scratch; 1281 } 1282 /* 1283 * At this point, split_mp is exactly "wheretodiv" bytes long, and 1284 * holds the end of the pre-ESP part of the datagram. 1285 */ 1286 esp_mp->b_cont = split_mp->b_cont; 1287 split_mp->b_cont = esp_mp; 1288 1289 return (B_TRUE); 1290 } 1291 1292 /* 1293 * Section 7 of RFC 3947 says: 1294 * 1295 * 7. Recovering from the Expiring NAT Mappings 1296 * 1297 * There are cases where NAT box decides to remove mappings that are still 1298 * alive (for example, when the keepalive interval is too long, or when the 1299 * NAT box is rebooted). To recover from this, ends that are NOT behind 1300 * NAT SHOULD use the last valid UDP encapsulated IKE or IPsec packet from 1301 * the other end to determine which IP and port addresses should be used. 1302 * The host behind dynamic NAT MUST NOT do this, as otherwise it opens a 1303 * DoS attack possibility because the IP address or port of the other host 1304 * will not change (it is not behind NAT). 1305 * 1306 * Keepalives cannot be used for these purposes, as they are not 1307 * authenticated, but any IKE authenticated IKE packet or ESP packet can be 1308 * used to detect whether the IP address or the port has changed. 1309 * 1310 * The following function will check an SA and its explicitly-set pair to see 1311 * if the NAT-T remote port matches the received packet (which must have 1312 * passed ESP authentication, see esp_in_done() for the caller context). If 1313 * there is a mismatch, the SAs are updated. It is not important if we race 1314 * with a transmitting thread, as if there is a transmitting thread, it will 1315 * merely emit a packet that will most-likely be dropped. 1316 * 1317 * "ports" are ordered src,dst, and assoc is an inbound SA, where src should 1318 * match ipsa_remote_nat_port and dst should match ipsa_local_nat_port. 1319 */ 1320 #ifdef _LITTLE_ENDIAN 1321 #define FIRST_16(x) ((x) & 0xFFFF) 1322 #define NEXT_16(x) (((x) >> 16) & 0xFFFF) 1323 #else 1324 #define FIRST_16(x) (((x) >> 16) & 0xFFFF) 1325 #define NEXT_16(x) ((x) & 0xFFFF) 1326 #endif 1327 static void 1328 esp_port_freshness(uint32_t ports, ipsa_t *assoc) 1329 { 1330 uint16_t remote = FIRST_16(ports); 1331 uint16_t local = NEXT_16(ports); 1332 ipsa_t *outbound_peer; 1333 isaf_t *bucket; 1334 ipsecesp_stack_t *espstack = assoc->ipsa_netstack->netstack_ipsecesp; 1335 1336 /* We found a conn_t, therefore local != 0. */ 1337 ASSERT(local != 0); 1338 /* Assume an IPv4 SA. */ 1339 ASSERT(assoc->ipsa_addrfam == AF_INET); 1340 1341 /* 1342 * On-the-wire rport == 0 means something's very wrong. 1343 * An unpaired SA is also useless to us. 1344 * If we are behind the NAT, don't bother. 1345 * A zero local NAT port defaults to 4500, so check that too. 1346 * And, of course, if the ports already match, we don't need to 1347 * bother. 1348 */ 1349 if (remote == 0 || assoc->ipsa_otherspi == 0 || 1350 (assoc->ipsa_flags & IPSA_F_BEHIND_NAT) || 1351 (assoc->ipsa_remote_nat_port == 0 && 1352 remote == htons(IPPORT_IKE_NATT)) || 1353 remote == assoc->ipsa_remote_nat_port) 1354 return; 1355 1356 /* Try and snag the peer. NOTE: Assume IPv4 for now. */ 1357 bucket = OUTBOUND_BUCKET_V4(&(espstack->esp_sadb.s_v4), 1358 assoc->ipsa_srcaddr[0]); 1359 mutex_enter(&bucket->isaf_lock); 1360 outbound_peer = ipsec_getassocbyspi(bucket, assoc->ipsa_otherspi, 1361 assoc->ipsa_dstaddr, assoc->ipsa_srcaddr, AF_INET); 1362 mutex_exit(&bucket->isaf_lock); 1363 1364 /* We probably lost a race to a deleting or expiring thread. */ 1365 if (outbound_peer == NULL) 1366 return; 1367 1368 /* 1369 * Hold the mutexes for both SAs so we don't race another inbound 1370 * thread. A lock-entry order shouldn't matter, since all other 1371 * per-ipsa locks are individually held-then-released. 1372 * 1373 * Luckily, this has nothing to do with the remote-NAT address, 1374 * so we don't have to re-scribble the cached-checksum differential. 1375 */ 1376 mutex_enter(&outbound_peer->ipsa_lock); 1377 mutex_enter(&assoc->ipsa_lock); 1378 outbound_peer->ipsa_remote_nat_port = assoc->ipsa_remote_nat_port = 1379 remote; 1380 mutex_exit(&assoc->ipsa_lock); 1381 mutex_exit(&outbound_peer->ipsa_lock); 1382 IPSA_REFRELE(outbound_peer); 1383 ESP_BUMP_STAT(espstack, sa_port_renumbers); 1384 } 1385 /* 1386 * Finish processing of an inbound ESP packet after processing by the 1387 * crypto framework. 1388 * - Remove the ESP header. 1389 * - Send packet back to IP. 1390 * If authentication was performed on the packet, this function is called 1391 * only if the authentication succeeded. 1392 * On success returns B_TRUE, on failure returns B_FALSE and frees the 1393 * mblk chain data_mp. 1394 */ 1395 static mblk_t * 1396 esp_in_done(mblk_t *data_mp, ip_recv_attr_t *ira, ipsec_crypto_t *ic) 1397 { 1398 ipsa_t *assoc; 1399 uint_t espstart; 1400 uint32_t ivlen = 0; 1401 uint_t processed_len; 1402 esph_t *esph; 1403 kstat_named_t *counter; 1404 boolean_t is_natt; 1405 netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack; 1406 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 1407 ipsec_stack_t *ipss = ns->netstack_ipsec; 1408 1409 assoc = ira->ira_ipsec_esp_sa; 1410 ASSERT(assoc != NULL); 1411 1412 is_natt = ((assoc->ipsa_flags & IPSA_F_NATT) != 0); 1413 1414 /* get the pointer to the ESP header */ 1415 if (assoc->ipsa_encr_alg == SADB_EALG_NULL) { 1416 /* authentication-only ESP */ 1417 espstart = ic->ic_crypto_data.cd_offset; 1418 processed_len = ic->ic_crypto_data.cd_length; 1419 } else { 1420 /* encryption present */ 1421 ivlen = assoc->ipsa_iv_len; 1422 if (assoc->ipsa_auth_alg == SADB_AALG_NONE) { 1423 /* encryption-only ESP */ 1424 espstart = ic->ic_crypto_data.cd_offset - 1425 sizeof (esph_t) - assoc->ipsa_iv_len; 1426 processed_len = ic->ic_crypto_data.cd_length + 1427 ivlen; 1428 } else { 1429 /* encryption with authentication */ 1430 espstart = ic->ic_crypto_dual_data.dd_offset1; 1431 processed_len = ic->ic_crypto_dual_data.dd_len2 + 1432 ivlen; 1433 } 1434 } 1435 1436 esph = (esph_t *)(data_mp->b_rptr + espstart); 1437 1438 if (assoc->ipsa_auth_alg != IPSA_AALG_NONE || 1439 (assoc->ipsa_flags & IPSA_F_COMBINED)) { 1440 /* 1441 * Authentication passed if we reach this point. 1442 * Packets with authentication will have the ICV 1443 * after the crypto data. Adjust b_wptr before 1444 * making padlen checks. 1445 */ 1446 ESP_BUMP_STAT(espstack, good_auth); 1447 data_mp->b_wptr -= assoc->ipsa_mac_len; 1448 1449 /* 1450 * Check replay window here! 1451 * For right now, assume keysock will set the replay window 1452 * size to zero for SAs that have an unspecified sender. 1453 * This may change... 1454 */ 1455 1456 if (!sadb_replay_check(assoc, esph->esph_replay)) { 1457 /* 1458 * Log the event. As of now we print out an event. 1459 * Do not print the replay failure number, or else 1460 * syslog cannot collate the error messages. Printing 1461 * the replay number that failed opens a denial-of- 1462 * service attack. 1463 */ 1464 ipsec_assocfailure(info.mi_idnum, 0, 0, 1465 SL_ERROR | SL_WARN, 1466 "Replay failed for ESP spi 0x%x, dst %s.\n", 1467 assoc->ipsa_spi, assoc->ipsa_dstaddr, 1468 assoc->ipsa_addrfam, espstack->ipsecesp_netstack); 1469 ESP_BUMP_STAT(espstack, replay_failures); 1470 counter = DROPPER(ipss, ipds_esp_replay); 1471 goto drop_and_bail; 1472 } 1473 1474 if (is_natt) { 1475 ASSERT(ira->ira_flags & IRAF_ESP_UDP_PORTS); 1476 ASSERT(ira->ira_esp_udp_ports != 0); 1477 esp_port_freshness(ira->ira_esp_udp_ports, assoc); 1478 } 1479 } 1480 1481 esp_set_usetime(assoc, B_TRUE); 1482 1483 if (!esp_age_bytes(assoc, processed_len, B_TRUE)) { 1484 /* The ipsa has hit hard expiration, LOG and AUDIT. */ 1485 ipsec_assocfailure(info.mi_idnum, 0, 0, 1486 SL_ERROR | SL_WARN, 1487 "ESP association 0x%x, dst %s had bytes expire.\n", 1488 assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam, 1489 espstack->ipsecesp_netstack); 1490 ESP_BUMP_STAT(espstack, bytes_expired); 1491 counter = DROPPER(ipss, ipds_esp_bytes_expire); 1492 goto drop_and_bail; 1493 } 1494 1495 /* 1496 * Remove ESP header and padding from packet. I hope the compiler 1497 * spews "branch, predict taken" code for this. 1498 */ 1499 1500 if (esp_strip_header(data_mp, (ira->ira_flags & IRAF_IS_IPV4), 1501 ivlen, &counter, espstack)) { 1502 1503 if (is_system_labeled() && assoc->ipsa_tsl != NULL) { 1504 if (!ip_recv_attr_replace_label(ira, assoc->ipsa_tsl)) { 1505 ip_drop_packet(data_mp, B_TRUE, ira->ira_ill, 1506 DROPPER(ipss, ipds_ah_nomem), 1507 &espstack->esp_dropper); 1508 BUMP_MIB(ira->ira_ill->ill_ip_mib, 1509 ipIfStatsInDiscards); 1510 return (NULL); 1511 } 1512 } 1513 if (is_natt) 1514 return (esp_fix_natt_checksums(data_mp, assoc)); 1515 1516 if (assoc->ipsa_state == IPSA_STATE_IDLE) { 1517 /* 1518 * Cluster buffering case. Tell caller that we're 1519 * handling the packet. 1520 */ 1521 sadb_buf_pkt(assoc, data_mp, ira); 1522 return (NULL); 1523 } 1524 1525 return (data_mp); 1526 } 1527 1528 esp1dbg(espstack, ("esp_in_done: esp_strip_header() failed\n")); 1529 drop_and_bail: 1530 IP_ESP_BUMP_STAT(ipss, in_discards); 1531 ip_drop_packet(data_mp, B_TRUE, ira->ira_ill, counter, 1532 &espstack->esp_dropper); 1533 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards); 1534 return (NULL); 1535 } 1536 1537 /* 1538 * Called upon failing the inbound ICV check. The message passed as 1539 * argument is freed. 1540 */ 1541 static void 1542 esp_log_bad_auth(mblk_t *mp, ip_recv_attr_t *ira) 1543 { 1544 ipsa_t *assoc = ira->ira_ipsec_esp_sa; 1545 netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack; 1546 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 1547 ipsec_stack_t *ipss = ns->netstack_ipsec; 1548 1549 /* 1550 * Log the event. Don't print to the console, block 1551 * potential denial-of-service attack. 1552 */ 1553 ESP_BUMP_STAT(espstack, bad_auth); 1554 1555 ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN, 1556 "ESP Authentication failed for spi 0x%x, dst %s.\n", 1557 assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam, 1558 espstack->ipsecesp_netstack); 1559 1560 IP_ESP_BUMP_STAT(ipss, in_discards); 1561 ip_drop_packet(mp, B_TRUE, ira->ira_ill, 1562 DROPPER(ipss, ipds_esp_bad_auth), 1563 &espstack->esp_dropper); 1564 } 1565 1566 1567 /* 1568 * Invoked for outbound packets after ESP processing. If the packet 1569 * also requires AH, performs the AH SA selection and AH processing. 1570 * 1571 * Returns data_mp (possibly with AH added) unless data_mp was consumed 1572 * due to an error, or queued due to async. crypto or an ACQUIRE trigger. 1573 */ 1574 static mblk_t * 1575 esp_do_outbound_ah(mblk_t *data_mp, ip_xmit_attr_t *ixa) 1576 { 1577 ipsec_action_t *ap; 1578 1579 ap = ixa->ixa_ipsec_action; 1580 if (ap == NULL) { 1581 ipsec_policy_t *pp = ixa->ixa_ipsec_policy; 1582 ap = pp->ipsp_act; 1583 } 1584 1585 if (!ap->ipa_want_ah) 1586 return (data_mp); 1587 1588 /* 1589 * Normally the AH SA would have already been put in place 1590 * but it could have been flushed so we need to look for it. 1591 */ 1592 if (ixa->ixa_ipsec_ah_sa == NULL) { 1593 if (!ipsec_outbound_sa(data_mp, ixa, IPPROTO_AH)) { 1594 sadb_acquire(data_mp, ixa, B_TRUE, B_FALSE); 1595 return (NULL); 1596 } 1597 } 1598 ASSERT(ixa->ixa_ipsec_ah_sa != NULL); 1599 1600 data_mp = ixa->ixa_ipsec_ah_sa->ipsa_output_func(data_mp, ixa); 1601 return (data_mp); 1602 } 1603 1604 1605 /* 1606 * Kernel crypto framework callback invoked after completion of async 1607 * crypto requests for outbound packets. 1608 */ 1609 static void 1610 esp_kcf_callback_outbound(void *arg, int status) 1611 { 1612 mblk_t *mp = (mblk_t *)arg; 1613 mblk_t *async_mp; 1614 netstack_t *ns; 1615 ipsec_stack_t *ipss; 1616 ipsecesp_stack_t *espstack; 1617 mblk_t *data_mp; 1618 ip_xmit_attr_t ixas; 1619 ipsec_crypto_t *ic; 1620 ill_t *ill; 1621 1622 /* 1623 * First remove the ipsec_crypto_t mblk 1624 * Note that we need to ipsec_free_crypto_data(mp) once done with ic. 1625 */ 1626 async_mp = ipsec_remove_crypto_data(mp, &ic); 1627 ASSERT(async_mp != NULL); 1628 1629 /* 1630 * Extract the ip_xmit_attr_t from the first mblk. 1631 * Verifies that the netstack and ill is still around; could 1632 * have vanished while kEf was doing its work. 1633 * On succesful return we have a nce_t and the ill/ipst can't 1634 * disappear until we do the nce_refrele in ixa_cleanup. 1635 */ 1636 data_mp = async_mp->b_cont; 1637 async_mp->b_cont = NULL; 1638 if (!ip_xmit_attr_from_mblk(async_mp, &ixas)) { 1639 /* Disappeared on us - no ill/ipst for MIB */ 1640 /* We have nowhere to do stats since ixa_ipst could be NULL */ 1641 if (ixas.ixa_nce != NULL) { 1642 ill = ixas.ixa_nce->nce_ill; 1643 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 1644 ip_drop_output("ipIfStatsOutDiscards", data_mp, ill); 1645 } 1646 freemsg(data_mp); 1647 goto done; 1648 } 1649 ns = ixas.ixa_ipst->ips_netstack; 1650 espstack = ns->netstack_ipsecesp; 1651 ipss = ns->netstack_ipsec; 1652 ill = ixas.ixa_nce->nce_ill; 1653 1654 if (status == CRYPTO_SUCCESS) { 1655 /* 1656 * If a ICV was computed, it was stored by the 1657 * crypto framework at the end of the packet. 1658 */ 1659 ipha_t *ipha = (ipha_t *)data_mp->b_rptr; 1660 1661 esp_set_usetime(ixas.ixa_ipsec_esp_sa, B_FALSE); 1662 /* NAT-T packet. */ 1663 if (IPH_HDR_VERSION(ipha) == IP_VERSION && 1664 ipha->ipha_protocol == IPPROTO_UDP) 1665 esp_prepare_udp(ns, data_mp, ipha); 1666 1667 /* do AH processing if needed */ 1668 data_mp = esp_do_outbound_ah(data_mp, &ixas); 1669 if (data_mp == NULL) 1670 goto done; 1671 1672 (void) ip_output_post_ipsec(data_mp, &ixas); 1673 } else { 1674 /* Outbound shouldn't see invalid MAC */ 1675 ASSERT(status != CRYPTO_INVALID_MAC); 1676 1677 esp1dbg(espstack, 1678 ("esp_kcf_callback_outbound: crypto failed with 0x%x\n", 1679 status)); 1680 ESP_BUMP_STAT(espstack, crypto_failures); 1681 ESP_BUMP_STAT(espstack, out_discards); 1682 ip_drop_packet(data_mp, B_FALSE, ill, 1683 DROPPER(ipss, ipds_esp_crypto_failed), 1684 &espstack->esp_dropper); 1685 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 1686 } 1687 done: 1688 ixa_cleanup(&ixas); 1689 (void) ipsec_free_crypto_data(mp); 1690 } 1691 1692 /* 1693 * Kernel crypto framework callback invoked after completion of async 1694 * crypto requests for inbound packets. 1695 */ 1696 static void 1697 esp_kcf_callback_inbound(void *arg, int status) 1698 { 1699 mblk_t *mp = (mblk_t *)arg; 1700 mblk_t *async_mp; 1701 netstack_t *ns; 1702 ipsecesp_stack_t *espstack; 1703 ipsec_stack_t *ipss; 1704 mblk_t *data_mp; 1705 ip_recv_attr_t iras; 1706 ipsec_crypto_t *ic; 1707 1708 /* 1709 * First remove the ipsec_crypto_t mblk 1710 * Note that we need to ipsec_free_crypto_data(mp) once done with ic. 1711 */ 1712 async_mp = ipsec_remove_crypto_data(mp, &ic); 1713 ASSERT(async_mp != NULL); 1714 1715 /* 1716 * Extract the ip_recv_attr_t from the first mblk. 1717 * Verifies that the netstack and ill is still around; could 1718 * have vanished while kEf was doing its work. 1719 */ 1720 data_mp = async_mp->b_cont; 1721 async_mp->b_cont = NULL; 1722 if (!ip_recv_attr_from_mblk(async_mp, &iras)) { 1723 /* The ill or ip_stack_t disappeared on us */ 1724 ip_drop_input("ip_recv_attr_from_mblk", data_mp, NULL); 1725 freemsg(data_mp); 1726 goto done; 1727 } 1728 1729 ns = iras.ira_ill->ill_ipst->ips_netstack; 1730 espstack = ns->netstack_ipsecesp; 1731 ipss = ns->netstack_ipsec; 1732 1733 if (status == CRYPTO_SUCCESS) { 1734 data_mp = esp_in_done(data_mp, &iras, ic); 1735 if (data_mp == NULL) 1736 goto done; 1737 1738 /* finish IPsec processing */ 1739 ip_input_post_ipsec(data_mp, &iras); 1740 } else if (status == CRYPTO_INVALID_MAC) { 1741 esp_log_bad_auth(data_mp, &iras); 1742 } else { 1743 esp1dbg(espstack, 1744 ("esp_kcf_callback: crypto failed with 0x%x\n", 1745 status)); 1746 ESP_BUMP_STAT(espstack, crypto_failures); 1747 IP_ESP_BUMP_STAT(ipss, in_discards); 1748 ip_drop_packet(data_mp, B_TRUE, iras.ira_ill, 1749 DROPPER(ipss, ipds_esp_crypto_failed), 1750 &espstack->esp_dropper); 1751 BUMP_MIB(iras.ira_ill->ill_ip_mib, ipIfStatsInDiscards); 1752 } 1753 done: 1754 ira_cleanup(&iras, B_TRUE); 1755 (void) ipsec_free_crypto_data(mp); 1756 } 1757 1758 /* 1759 * Invoked on crypto framework failure during inbound and outbound processing. 1760 */ 1761 static void 1762 esp_crypto_failed(mblk_t *data_mp, boolean_t is_inbound, int kef_rc, 1763 ill_t *ill, ipsecesp_stack_t *espstack) 1764 { 1765 ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec; 1766 1767 esp1dbg(espstack, ("crypto failed for %s ESP with 0x%x\n", 1768 is_inbound ? "inbound" : "outbound", kef_rc)); 1769 ip_drop_packet(data_mp, is_inbound, ill, 1770 DROPPER(ipss, ipds_esp_crypto_failed), 1771 &espstack->esp_dropper); 1772 ESP_BUMP_STAT(espstack, crypto_failures); 1773 if (is_inbound) 1774 IP_ESP_BUMP_STAT(ipss, in_discards); 1775 else 1776 ESP_BUMP_STAT(espstack, out_discards); 1777 } 1778 1779 /* 1780 * A statement-equivalent macro, _cr MUST point to a modifiable 1781 * crypto_call_req_t. 1782 */ 1783 #define ESP_INIT_CALLREQ(_cr, _mp, _callback) \ 1784 (_cr)->cr_flag = CRYPTO_SKIP_REQID|CRYPTO_ALWAYS_QUEUE; \ 1785 (_cr)->cr_callback_arg = (_mp); \ 1786 (_cr)->cr_callback_func = (_callback) 1787 1788 #define ESP_INIT_CRYPTO_MAC(mac, icvlen, icvbuf) { \ 1789 (mac)->cd_format = CRYPTO_DATA_RAW; \ 1790 (mac)->cd_offset = 0; \ 1791 (mac)->cd_length = icvlen; \ 1792 (mac)->cd_raw.iov_base = (char *)icvbuf; \ 1793 (mac)->cd_raw.iov_len = icvlen; \ 1794 } 1795 1796 #define ESP_INIT_CRYPTO_DATA(data, mp, off, len) { \ 1797 if (MBLKL(mp) >= (len) + (off)) { \ 1798 (data)->cd_format = CRYPTO_DATA_RAW; \ 1799 (data)->cd_raw.iov_base = (char *)(mp)->b_rptr; \ 1800 (data)->cd_raw.iov_len = MBLKL(mp); \ 1801 (data)->cd_offset = off; \ 1802 } else { \ 1803 (data)->cd_format = CRYPTO_DATA_MBLK; \ 1804 (data)->cd_mp = mp; \ 1805 (data)->cd_offset = off; \ 1806 } \ 1807 (data)->cd_length = len; \ 1808 } 1809 1810 #define ESP_INIT_CRYPTO_DUAL_DATA(data, mp, off1, len1, off2, len2) { \ 1811 (data)->dd_format = CRYPTO_DATA_MBLK; \ 1812 (data)->dd_mp = mp; \ 1813 (data)->dd_len1 = len1; \ 1814 (data)->dd_offset1 = off1; \ 1815 (data)->dd_len2 = len2; \ 1816 (data)->dd_offset2 = off2; \ 1817 } 1818 1819 /* 1820 * Returns data_mp if successfully completed the request. Returns 1821 * NULL if it failed (and increments InDiscards) or if it is pending. 1822 */ 1823 static mblk_t * 1824 esp_submit_req_inbound(mblk_t *esp_mp, ip_recv_attr_t *ira, 1825 ipsa_t *assoc, uint_t esph_offset) 1826 { 1827 uint_t auth_offset, msg_len, auth_len; 1828 crypto_call_req_t call_req, *callrp; 1829 mblk_t *mp; 1830 esph_t *esph_ptr; 1831 int kef_rc; 1832 uint_t icv_len = assoc->ipsa_mac_len; 1833 crypto_ctx_template_t auth_ctx_tmpl; 1834 boolean_t do_auth, do_encr, force; 1835 uint_t encr_offset, encr_len; 1836 uint_t iv_len = assoc->ipsa_iv_len; 1837 crypto_ctx_template_t encr_ctx_tmpl; 1838 ipsec_crypto_t *ic, icstack; 1839 uchar_t *iv_ptr; 1840 netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack; 1841 ipsec_stack_t *ipss = ns->netstack_ipsec; 1842 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 1843 1844 do_auth = assoc->ipsa_auth_alg != SADB_AALG_NONE; 1845 do_encr = assoc->ipsa_encr_alg != SADB_EALG_NULL; 1846 force = (assoc->ipsa_flags & IPSA_F_ASYNC); 1847 1848 #ifdef IPSEC_LATENCY_TEST 1849 kef_rc = CRYPTO_SUCCESS; 1850 #else 1851 kef_rc = CRYPTO_FAILED; 1852 #endif 1853 1854 /* 1855 * An inbound packet is of the form: 1856 * [IP,options,ESP,IV,data,ICV,pad] 1857 */ 1858 esph_ptr = (esph_t *)(esp_mp->b_rptr + esph_offset); 1859 iv_ptr = (uchar_t *)(esph_ptr + 1); 1860 /* Packet length starting at IP header ending after ESP ICV. */ 1861 msg_len = MBLKL(esp_mp); 1862 1863 encr_offset = esph_offset + sizeof (esph_t) + iv_len; 1864 encr_len = msg_len - encr_offset; 1865 1866 /* 1867 * Counter mode algs need a nonce. This is setup in sadb_common_add(). 1868 * If for some reason we are using a SA which does not have a nonce 1869 * then we must fail here. 1870 */ 1871 if ((assoc->ipsa_flags & IPSA_F_COUNTERMODE) && 1872 (assoc->ipsa_nonce == NULL)) { 1873 ip_drop_packet(esp_mp, B_TRUE, ira->ira_ill, 1874 DROPPER(ipss, ipds_esp_nomem), &espstack->esp_dropper); 1875 return (NULL); 1876 } 1877 1878 if (force) { 1879 /* We are doing asynch; allocate mblks to hold state */ 1880 if ((mp = ip_recv_attr_to_mblk(ira)) == NULL || 1881 (mp = ipsec_add_crypto_data(mp, &ic)) == NULL) { 1882 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards); 1883 ip_drop_input("ipIfStatsInDiscards", esp_mp, 1884 ira->ira_ill); 1885 return (NULL); 1886 } 1887 linkb(mp, esp_mp); 1888 callrp = &call_req; 1889 ESP_INIT_CALLREQ(callrp, mp, esp_kcf_callback_inbound); 1890 } else { 1891 /* 1892 * If we know we are going to do sync then ipsec_crypto_t 1893 * should be on the stack. 1894 */ 1895 ic = &icstack; 1896 bzero(ic, sizeof (*ic)); 1897 callrp = NULL; 1898 } 1899 1900 if (do_auth) { 1901 /* authentication context template */ 1902 IPSEC_CTX_TMPL(assoc, ipsa_authtmpl, IPSEC_ALG_AUTH, 1903 auth_ctx_tmpl); 1904 1905 /* ICV to be verified */ 1906 ESP_INIT_CRYPTO_MAC(&ic->ic_crypto_mac, 1907 icv_len, esp_mp->b_wptr - icv_len); 1908 1909 /* authentication starts at the ESP header */ 1910 auth_offset = esph_offset; 1911 auth_len = msg_len - auth_offset - icv_len; 1912 if (!do_encr) { 1913 /* authentication only */ 1914 /* initialize input data argument */ 1915 ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data, 1916 esp_mp, auth_offset, auth_len); 1917 1918 /* call the crypto framework */ 1919 kef_rc = crypto_mac_verify(&assoc->ipsa_amech, 1920 &ic->ic_crypto_data, 1921 &assoc->ipsa_kcfauthkey, auth_ctx_tmpl, 1922 &ic->ic_crypto_mac, callrp); 1923 } 1924 } 1925 1926 if (do_encr) { 1927 /* encryption template */ 1928 IPSEC_CTX_TMPL(assoc, ipsa_encrtmpl, IPSEC_ALG_ENCR, 1929 encr_ctx_tmpl); 1930 1931 /* Call the nonce update function. Also passes in IV */ 1932 (assoc->ipsa_noncefunc)(assoc, (uchar_t *)esph_ptr, encr_len, 1933 iv_ptr, &ic->ic_cmm, &ic->ic_crypto_data); 1934 1935 if (!do_auth) { 1936 /* decryption only */ 1937 /* initialize input data argument */ 1938 ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data, 1939 esp_mp, encr_offset, encr_len); 1940 1941 /* call the crypto framework */ 1942 kef_rc = crypto_decrypt((crypto_mechanism_t *) 1943 &ic->ic_cmm, &ic->ic_crypto_data, 1944 &assoc->ipsa_kcfencrkey, encr_ctx_tmpl, 1945 NULL, callrp); 1946 } 1947 } 1948 1949 if (do_auth && do_encr) { 1950 /* dual operation */ 1951 /* initialize input data argument */ 1952 ESP_INIT_CRYPTO_DUAL_DATA(&ic->ic_crypto_dual_data, 1953 esp_mp, auth_offset, auth_len, 1954 encr_offset, encr_len - icv_len); 1955 1956 /* specify IV */ 1957 ic->ic_crypto_dual_data.dd_miscdata = (char *)iv_ptr; 1958 1959 /* call the framework */ 1960 kef_rc = crypto_mac_verify_decrypt(&assoc->ipsa_amech, 1961 &assoc->ipsa_emech, &ic->ic_crypto_dual_data, 1962 &assoc->ipsa_kcfauthkey, &assoc->ipsa_kcfencrkey, 1963 auth_ctx_tmpl, encr_ctx_tmpl, &ic->ic_crypto_mac, 1964 NULL, callrp); 1965 } 1966 1967 switch (kef_rc) { 1968 case CRYPTO_SUCCESS: 1969 ESP_BUMP_STAT(espstack, crypto_sync); 1970 esp_mp = esp_in_done(esp_mp, ira, ic); 1971 if (force) { 1972 /* Free mp after we are done with ic */ 1973 mp = ipsec_free_crypto_data(mp); 1974 (void) ip_recv_attr_free_mblk(mp); 1975 } 1976 return (esp_mp); 1977 case CRYPTO_QUEUED: 1978 /* esp_kcf_callback_inbound() will be invoked on completion */ 1979 ESP_BUMP_STAT(espstack, crypto_async); 1980 return (NULL); 1981 case CRYPTO_INVALID_MAC: 1982 if (force) { 1983 mp = ipsec_free_crypto_data(mp); 1984 esp_mp = ip_recv_attr_free_mblk(mp); 1985 } 1986 ESP_BUMP_STAT(espstack, crypto_sync); 1987 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards); 1988 esp_log_bad_auth(esp_mp, ira); 1989 /* esp_mp was passed to ip_drop_packet */ 1990 return (NULL); 1991 } 1992 1993 if (force) { 1994 mp = ipsec_free_crypto_data(mp); 1995 esp_mp = ip_recv_attr_free_mblk(mp); 1996 } 1997 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards); 1998 esp_crypto_failed(esp_mp, B_TRUE, kef_rc, ira->ira_ill, espstack); 1999 /* esp_mp was passed to ip_drop_packet */ 2000 return (NULL); 2001 } 2002 2003 /* 2004 * Compute the IP and UDP checksums -- common code for both keepalives and 2005 * actual ESP-in-UDP packets. Be flexible with multiple mblks because ESP 2006 * uses mblk-insertion to insert the UDP header. 2007 * TODO - If there is an easy way to prep a packet for HW checksums, make 2008 * it happen here. 2009 * Note that this is used before both before calling ip_output_simple and 2010 * in the esp datapath. The former could use IXAF_SET_ULP_CKSUM but not the 2011 * latter. 2012 */ 2013 static void 2014 esp_prepare_udp(netstack_t *ns, mblk_t *mp, ipha_t *ipha) 2015 { 2016 int offset; 2017 uint32_t cksum; 2018 uint16_t *arr; 2019 mblk_t *udpmp = mp; 2020 uint_t hlen = IPH_HDR_LENGTH(ipha); 2021 2022 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 2023 2024 ipha->ipha_hdr_checksum = 0; 2025 ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 2026 2027 if (ns->netstack_udp->us_do_checksum) { 2028 ASSERT(MBLKL(udpmp) >= sizeof (udpha_t)); 2029 /* arr points to the IP header. */ 2030 arr = (uint16_t *)ipha; 2031 IP_STAT(ns->netstack_ip, ip_out_sw_cksum); 2032 IP_STAT_UPDATE(ns->netstack_ip, ip_out_sw_cksum_bytes, 2033 ntohs(htons(ipha->ipha_length) - hlen)); 2034 /* arr[6-9] are the IP addresses. */ 2035 cksum = IP_UDP_CSUM_COMP + arr[6] + arr[7] + arr[8] + arr[9] + 2036 ntohs(htons(ipha->ipha_length) - hlen); 2037 cksum = IP_CSUM(mp, hlen, cksum); 2038 offset = hlen + UDP_CHECKSUM_OFFSET; 2039 while (offset >= MBLKL(udpmp)) { 2040 offset -= MBLKL(udpmp); 2041 udpmp = udpmp->b_cont; 2042 } 2043 /* arr points to the UDP header's checksum field. */ 2044 arr = (uint16_t *)(udpmp->b_rptr + offset); 2045 *arr = cksum; 2046 } 2047 } 2048 2049 /* 2050 * taskq handler so we can send the NAT-T keepalive on a separate thread. 2051 */ 2052 static void 2053 actually_send_keepalive(void *arg) 2054 { 2055 mblk_t *mp = (mblk_t *)arg; 2056 ip_xmit_attr_t ixas; 2057 netstack_t *ns; 2058 netstackid_t stackid; 2059 2060 stackid = (netstackid_t)(uintptr_t)mp->b_prev; 2061 mp->b_prev = NULL; 2062 ns = netstack_find_by_stackid(stackid); 2063 if (ns == NULL) { 2064 /* Disappeared */ 2065 ip_drop_output("ipIfStatsOutDiscards", mp, NULL); 2066 freemsg(mp); 2067 return; 2068 } 2069 2070 bzero(&ixas, sizeof (ixas)); 2071 ixas.ixa_zoneid = ALL_ZONES; 2072 ixas.ixa_cred = kcred; 2073 ixas.ixa_cpid = NOPID; 2074 ixas.ixa_tsl = NULL; 2075 ixas.ixa_ipst = ns->netstack_ip; 2076 /* No ULP checksum; done by esp_prepare_udp */ 2077 ixas.ixa_flags = (IXAF_IS_IPV4 | IXAF_NO_IPSEC | IXAF_VERIFY_SOURCE); 2078 2079 (void) ip_output_simple(mp, &ixas); 2080 ixa_cleanup(&ixas); 2081 netstack_rele(ns); 2082 } 2083 2084 /* 2085 * Send a one-byte UDP NAT-T keepalive. 2086 */ 2087 void 2088 ipsecesp_send_keepalive(ipsa_t *assoc) 2089 { 2090 mblk_t *mp; 2091 ipha_t *ipha; 2092 udpha_t *udpha; 2093 netstack_t *ns = assoc->ipsa_netstack; 2094 2095 ASSERT(MUTEX_NOT_HELD(&assoc->ipsa_lock)); 2096 2097 mp = allocb(sizeof (ipha_t) + sizeof (udpha_t) + 1, BPRI_HI); 2098 if (mp == NULL) 2099 return; 2100 ipha = (ipha_t *)mp->b_rptr; 2101 ipha->ipha_version_and_hdr_length = IP_SIMPLE_HDR_VERSION; 2102 ipha->ipha_type_of_service = 0; 2103 ipha->ipha_length = htons(sizeof (ipha_t) + sizeof (udpha_t) + 1); 2104 /* Use the low-16 of the SPI so we have some clue where it came from. */ 2105 ipha->ipha_ident = *(((uint16_t *)(&assoc->ipsa_spi)) + 1); 2106 ipha->ipha_fragment_offset_and_flags = 0; /* Too small to fragment! */ 2107 ipha->ipha_ttl = 0xFF; 2108 ipha->ipha_protocol = IPPROTO_UDP; 2109 ipha->ipha_hdr_checksum = 0; 2110 ipha->ipha_src = assoc->ipsa_srcaddr[0]; 2111 ipha->ipha_dst = assoc->ipsa_dstaddr[0]; 2112 udpha = (udpha_t *)(ipha + 1); 2113 udpha->uha_src_port = (assoc->ipsa_local_nat_port != 0) ? 2114 assoc->ipsa_local_nat_port : htons(IPPORT_IKE_NATT); 2115 udpha->uha_dst_port = (assoc->ipsa_remote_nat_port != 0) ? 2116 assoc->ipsa_remote_nat_port : htons(IPPORT_IKE_NATT); 2117 udpha->uha_length = htons(sizeof (udpha_t) + 1); 2118 udpha->uha_checksum = 0; 2119 mp->b_wptr = (uint8_t *)(udpha + 1); 2120 *(mp->b_wptr++) = 0xFF; 2121 2122 esp_prepare_udp(ns, mp, ipha); 2123 2124 /* 2125 * We're holding an isaf_t bucket lock, so pawn off the actual 2126 * packet transmission to another thread. Just in case syncq 2127 * processing causes a same-bucket packet to be processed. 2128 */ 2129 mp->b_prev = (mblk_t *)(uintptr_t)ns->netstack_stackid; 2130 2131 if (taskq_dispatch(esp_taskq, actually_send_keepalive, mp, 2132 TQ_NOSLEEP) == 0) { 2133 /* Assume no memory if taskq_dispatch() fails. */ 2134 mp->b_prev = NULL; 2135 ip_drop_packet(mp, B_FALSE, NULL, 2136 DROPPER(ns->netstack_ipsec, ipds_esp_nomem), 2137 &ns->netstack_ipsecesp->esp_dropper); 2138 } 2139 } 2140 2141 /* 2142 * Returns mp if successfully completed the request. Returns 2143 * NULL if it failed (and increments InDiscards) or if it is pending. 2144 */ 2145 static mblk_t * 2146 esp_submit_req_outbound(mblk_t *data_mp, ip_xmit_attr_t *ixa, ipsa_t *assoc, 2147 uchar_t *icv_buf, uint_t payload_len) 2148 { 2149 uint_t auth_len; 2150 crypto_call_req_t call_req, *callrp; 2151 mblk_t *esp_mp; 2152 esph_t *esph_ptr; 2153 mblk_t *mp; 2154 int kef_rc = CRYPTO_FAILED; 2155 uint_t icv_len = assoc->ipsa_mac_len; 2156 crypto_ctx_template_t auth_ctx_tmpl; 2157 boolean_t do_auth, do_encr, force; 2158 uint_t iv_len = assoc->ipsa_iv_len; 2159 crypto_ctx_template_t encr_ctx_tmpl; 2160 boolean_t is_natt = ((assoc->ipsa_flags & IPSA_F_NATT) != 0); 2161 size_t esph_offset = (is_natt ? UDPH_SIZE : 0); 2162 netstack_t *ns = ixa->ixa_ipst->ips_netstack; 2163 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 2164 ipsec_crypto_t *ic, icstack; 2165 uchar_t *iv_ptr; 2166 crypto_data_t *cd_ptr = NULL; 2167 ill_t *ill = ixa->ixa_nce->nce_ill; 2168 ipsec_stack_t *ipss = ns->netstack_ipsec; 2169 2170 esp3dbg(espstack, ("esp_submit_req_outbound:%s", 2171 is_natt ? "natt" : "not natt")); 2172 2173 do_encr = assoc->ipsa_encr_alg != SADB_EALG_NULL; 2174 do_auth = assoc->ipsa_auth_alg != SADB_AALG_NONE; 2175 force = (assoc->ipsa_flags & IPSA_F_ASYNC); 2176 2177 #ifdef IPSEC_LATENCY_TEST 2178 kef_rc = CRYPTO_SUCCESS; 2179 #else 2180 kef_rc = CRYPTO_FAILED; 2181 #endif 2182 2183 /* 2184 * Outbound IPsec packets are of the form: 2185 * [IP,options] -> [ESP,IV] -> [data] -> [pad,ICV] 2186 * unless it's NATT, then it's 2187 * [IP,options] -> [udp][ESP,IV] -> [data] -> [pad,ICV] 2188 * Get a pointer to the mblk containing the ESP header. 2189 */ 2190 ASSERT(data_mp->b_cont != NULL); 2191 esp_mp = data_mp->b_cont; 2192 esph_ptr = (esph_t *)(esp_mp->b_rptr + esph_offset); 2193 iv_ptr = (uchar_t *)(esph_ptr + 1); 2194 2195 /* 2196 * Combined mode algs need a nonce. This is setup in sadb_common_add(). 2197 * If for some reason we are using a SA which does not have a nonce 2198 * then we must fail here. 2199 */ 2200 if ((assoc->ipsa_flags & IPSA_F_COUNTERMODE) && 2201 (assoc->ipsa_nonce == NULL)) { 2202 ip_drop_packet(data_mp, B_FALSE, NULL, 2203 DROPPER(ipss, ipds_esp_nomem), &espstack->esp_dropper); 2204 return (NULL); 2205 } 2206 2207 if (force) { 2208 /* We are doing asynch; allocate mblks to hold state */ 2209 if ((mp = ip_xmit_attr_to_mblk(ixa)) == NULL || 2210 (mp = ipsec_add_crypto_data(mp, &ic)) == NULL) { 2211 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2212 ip_drop_output("ipIfStatsOutDiscards", data_mp, ill); 2213 freemsg(data_mp); 2214 return (NULL); 2215 } 2216 2217 linkb(mp, data_mp); 2218 callrp = &call_req; 2219 ESP_INIT_CALLREQ(callrp, mp, esp_kcf_callback_outbound); 2220 } else { 2221 /* 2222 * If we know we are going to do sync then ipsec_crypto_t 2223 * should be on the stack. 2224 */ 2225 ic = &icstack; 2226 bzero(ic, sizeof (*ic)); 2227 callrp = NULL; 2228 } 2229 2230 2231 if (do_auth) { 2232 /* authentication context template */ 2233 IPSEC_CTX_TMPL(assoc, ipsa_authtmpl, IPSEC_ALG_AUTH, 2234 auth_ctx_tmpl); 2235 2236 /* where to store the computed mac */ 2237 ESP_INIT_CRYPTO_MAC(&ic->ic_crypto_mac, 2238 icv_len, icv_buf); 2239 2240 /* authentication starts at the ESP header */ 2241 auth_len = payload_len + iv_len + sizeof (esph_t); 2242 if (!do_encr) { 2243 /* authentication only */ 2244 /* initialize input data argument */ 2245 ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data, 2246 esp_mp, esph_offset, auth_len); 2247 2248 /* call the crypto framework */ 2249 kef_rc = crypto_mac(&assoc->ipsa_amech, 2250 &ic->ic_crypto_data, 2251 &assoc->ipsa_kcfauthkey, auth_ctx_tmpl, 2252 &ic->ic_crypto_mac, callrp); 2253 } 2254 } 2255 2256 if (do_encr) { 2257 /* encryption context template */ 2258 IPSEC_CTX_TMPL(assoc, ipsa_encrtmpl, IPSEC_ALG_ENCR, 2259 encr_ctx_tmpl); 2260 /* Call the nonce update function. */ 2261 (assoc->ipsa_noncefunc)(assoc, (uchar_t *)esph_ptr, payload_len, 2262 iv_ptr, &ic->ic_cmm, &ic->ic_crypto_data); 2263 2264 if (!do_auth) { 2265 /* encryption only, skip mblk that contains ESP hdr */ 2266 /* initialize input data argument */ 2267 ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data, 2268 esp_mp->b_cont, 0, payload_len); 2269 2270 /* 2271 * For combined mode ciphers, the ciphertext is the same 2272 * size as the clear text, the ICV should follow the 2273 * ciphertext. To convince the kcf to allow in-line 2274 * encryption, with an ICV, use ipsec_out_crypto_mac 2275 * to point to the same buffer as the data. The calling 2276 * function need to ensure the buffer is large enough to 2277 * include the ICV. 2278 * 2279 * The IV is already written to the packet buffer, the 2280 * nonce setup function copied it to the params struct 2281 * for the cipher to use. 2282 */ 2283 if (assoc->ipsa_flags & IPSA_F_COMBINED) { 2284 bcopy(&ic->ic_crypto_data, 2285 &ic->ic_crypto_mac, 2286 sizeof (crypto_data_t)); 2287 ic->ic_crypto_mac.cd_length = 2288 payload_len + icv_len; 2289 cd_ptr = &ic->ic_crypto_mac; 2290 } 2291 2292 /* call the crypto framework */ 2293 kef_rc = crypto_encrypt((crypto_mechanism_t *) 2294 &ic->ic_cmm, &ic->ic_crypto_data, 2295 &assoc->ipsa_kcfencrkey, encr_ctx_tmpl, 2296 cd_ptr, callrp); 2297 2298 } 2299 } 2300 2301 if (do_auth && do_encr) { 2302 /* 2303 * Encryption and authentication: 2304 * Pass the pointer to the mblk chain starting at the ESP 2305 * header to the framework. Skip the ESP header mblk 2306 * for encryption, which is reflected by an encryption 2307 * offset equal to the length of that mblk. Start 2308 * the authentication at the ESP header, i.e. use an 2309 * authentication offset of zero. 2310 */ 2311 ESP_INIT_CRYPTO_DUAL_DATA(&ic->ic_crypto_dual_data, 2312 esp_mp, MBLKL(esp_mp), payload_len, esph_offset, auth_len); 2313 2314 /* specify IV */ 2315 ic->ic_crypto_dual_data.dd_miscdata = (char *)iv_ptr; 2316 2317 /* call the framework */ 2318 kef_rc = crypto_encrypt_mac(&assoc->ipsa_emech, 2319 &assoc->ipsa_amech, NULL, 2320 &assoc->ipsa_kcfencrkey, &assoc->ipsa_kcfauthkey, 2321 encr_ctx_tmpl, auth_ctx_tmpl, 2322 &ic->ic_crypto_dual_data, 2323 &ic->ic_crypto_mac, callrp); 2324 } 2325 2326 switch (kef_rc) { 2327 case CRYPTO_SUCCESS: 2328 ESP_BUMP_STAT(espstack, crypto_sync); 2329 esp_set_usetime(assoc, B_FALSE); 2330 if (force) { 2331 mp = ipsec_free_crypto_data(mp); 2332 data_mp = ip_xmit_attr_free_mblk(mp); 2333 } 2334 if (is_natt) 2335 esp_prepare_udp(ns, data_mp, (ipha_t *)data_mp->b_rptr); 2336 return (data_mp); 2337 case CRYPTO_QUEUED: 2338 /* esp_kcf_callback_outbound() will be invoked on completion */ 2339 ESP_BUMP_STAT(espstack, crypto_async); 2340 return (NULL); 2341 } 2342 2343 if (force) { 2344 mp = ipsec_free_crypto_data(mp); 2345 data_mp = ip_xmit_attr_free_mblk(mp); 2346 } 2347 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2348 esp_crypto_failed(data_mp, B_FALSE, kef_rc, NULL, espstack); 2349 /* data_mp was passed to ip_drop_packet */ 2350 return (NULL); 2351 } 2352 2353 /* 2354 * Handle outbound IPsec processing for IPv4 and IPv6 2355 * 2356 * Returns data_mp if successfully completed the request. Returns 2357 * NULL if it failed (and increments InDiscards) or if it is pending. 2358 */ 2359 static mblk_t * 2360 esp_outbound(mblk_t *data_mp, ip_xmit_attr_t *ixa) 2361 { 2362 mblk_t *espmp, *tailmp; 2363 ipha_t *ipha; 2364 ip6_t *ip6h; 2365 esph_t *esph_ptr, *iv_ptr; 2366 uint_t af; 2367 uint8_t *nhp; 2368 uintptr_t divpoint, datalen, adj, padlen, i, alloclen; 2369 uintptr_t esplen = sizeof (esph_t); 2370 uint8_t protocol; 2371 ipsa_t *assoc; 2372 uint_t iv_len, block_size, mac_len = 0; 2373 uchar_t *icv_buf; 2374 udpha_t *udpha; 2375 boolean_t is_natt = B_FALSE; 2376 netstack_t *ns = ixa->ixa_ipst->ips_netstack; 2377 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 2378 ipsec_stack_t *ipss = ns->netstack_ipsec; 2379 ill_t *ill = ixa->ixa_nce->nce_ill; 2380 boolean_t need_refrele = B_FALSE; 2381 2382 ESP_BUMP_STAT(espstack, out_requests); 2383 2384 /* 2385 * <sigh> We have to copy the message here, because TCP (for example) 2386 * keeps a dupb() of the message lying around for retransmission. 2387 * Since ESP changes the whole of the datagram, we have to create our 2388 * own copy lest we clobber TCP's data. Since we have to copy anyway, 2389 * we might as well make use of msgpullup() and get the mblk into one 2390 * contiguous piece! 2391 */ 2392 tailmp = msgpullup(data_mp, -1); 2393 if (tailmp == NULL) { 2394 esp0dbg(("esp_outbound: msgpullup() failed, " 2395 "dropping packet.\n")); 2396 ip_drop_packet(data_mp, B_FALSE, ill, 2397 DROPPER(ipss, ipds_esp_nomem), 2398 &espstack->esp_dropper); 2399 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2400 return (NULL); 2401 } 2402 freemsg(data_mp); 2403 data_mp = tailmp; 2404 2405 assoc = ixa->ixa_ipsec_esp_sa; 2406 ASSERT(assoc != NULL); 2407 2408 /* 2409 * Get the outer IP header in shape to escape this system.. 2410 */ 2411 if (is_system_labeled() && (assoc->ipsa_otsl != NULL)) { 2412 /* 2413 * Need to update packet with any CIPSO option and update 2414 * ixa_tsl to capture the new label. 2415 * We allocate a separate ixa for that purpose. 2416 */ 2417 ixa = ip_xmit_attr_duplicate(ixa); 2418 if (ixa == NULL) { 2419 ip_drop_packet(data_mp, B_FALSE, ill, 2420 DROPPER(ipss, ipds_esp_nomem), 2421 &espstack->esp_dropper); 2422 return (NULL); 2423 } 2424 need_refrele = B_TRUE; 2425 2426 label_hold(assoc->ipsa_otsl); 2427 ip_xmit_attr_replace_tsl(ixa, assoc->ipsa_otsl); 2428 2429 data_mp = sadb_whack_label(data_mp, assoc, ixa, 2430 DROPPER(ipss, ipds_esp_nomem), &espstack->esp_dropper); 2431 if (data_mp == NULL) { 2432 /* Packet dropped by sadb_whack_label */ 2433 ixa_refrele(ixa); 2434 return (NULL); 2435 } 2436 } 2437 2438 /* 2439 * Reality check.... 2440 */ 2441 ipha = (ipha_t *)data_mp->b_rptr; /* So we can call esp_acquire(). */ 2442 2443 if (ixa->ixa_flags & IXAF_IS_IPV4) { 2444 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 2445 2446 af = AF_INET; 2447 divpoint = IPH_HDR_LENGTH(ipha); 2448 datalen = ntohs(ipha->ipha_length) - divpoint; 2449 nhp = (uint8_t *)&ipha->ipha_protocol; 2450 } else { 2451 ip_pkt_t ipp; 2452 2453 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 2454 2455 af = AF_INET6; 2456 ip6h = (ip6_t *)ipha; 2457 bzero(&ipp, sizeof (ipp)); 2458 divpoint = ip_find_hdr_v6(data_mp, ip6h, B_FALSE, &ipp, NULL); 2459 if (ipp.ipp_dstopts != NULL && 2460 ipp.ipp_dstopts->ip6d_nxt != IPPROTO_ROUTING) { 2461 /* 2462 * Destination options are tricky. If we get in here, 2463 * then we have a terminal header following the 2464 * destination options. We need to adjust backwards 2465 * so we insert ESP BEFORE the destination options 2466 * bag. (So that the dstopts get encrypted!) 2467 * 2468 * Since this is for outbound packets only, we know 2469 * that non-terminal destination options only precede 2470 * routing headers. 2471 */ 2472 divpoint -= ipp.ipp_dstoptslen; 2473 } 2474 datalen = ntohs(ip6h->ip6_plen) + sizeof (ip6_t) - divpoint; 2475 2476 if (ipp.ipp_rthdr != NULL) { 2477 nhp = &ipp.ipp_rthdr->ip6r_nxt; 2478 } else if (ipp.ipp_hopopts != NULL) { 2479 nhp = &ipp.ipp_hopopts->ip6h_nxt; 2480 } else { 2481 ASSERT(divpoint == sizeof (ip6_t)); 2482 /* It's probably IP + ESP. */ 2483 nhp = &ip6h->ip6_nxt; 2484 } 2485 } 2486 2487 mac_len = assoc->ipsa_mac_len; 2488 2489 if (assoc->ipsa_flags & IPSA_F_NATT) { 2490 /* wedge in UDP header */ 2491 is_natt = B_TRUE; 2492 esplen += UDPH_SIZE; 2493 } 2494 2495 /* 2496 * Set up ESP header and encryption padding for ENCR PI request. 2497 */ 2498 2499 /* Determine the padding length. Pad to 4-bytes for no-encryption. */ 2500 if (assoc->ipsa_encr_alg != SADB_EALG_NULL) { 2501 iv_len = assoc->ipsa_iv_len; 2502 block_size = assoc->ipsa_datalen; 2503 2504 /* 2505 * Pad the data to the length of the cipher block size. 2506 * Include the two additional bytes (hence the - 2) for the 2507 * padding length and the next header. Take this into account 2508 * when calculating the actual length of the padding. 2509 */ 2510 ASSERT(ISP2(iv_len)); 2511 padlen = ((unsigned)(block_size - datalen - 2)) & 2512 (block_size - 1); 2513 } else { 2514 iv_len = 0; 2515 padlen = ((unsigned)(sizeof (uint32_t) - datalen - 2)) & 2516 (sizeof (uint32_t) - 1); 2517 } 2518 2519 /* Allocate ESP header and IV. */ 2520 esplen += iv_len; 2521 2522 /* 2523 * Update association byte-count lifetimes. Don't forget to take 2524 * into account the padding length and next-header (hence the + 2). 2525 * 2526 * Use the amount of data fed into the "encryption algorithm". This 2527 * is the IV, the data length, the padding length, and the final two 2528 * bytes (padlen, and next-header). 2529 * 2530 */ 2531 2532 if (!esp_age_bytes(assoc, datalen + padlen + iv_len + 2, B_FALSE)) { 2533 ip_drop_packet(data_mp, B_FALSE, ill, 2534 DROPPER(ipss, ipds_esp_bytes_expire), 2535 &espstack->esp_dropper); 2536 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2537 if (need_refrele) 2538 ixa_refrele(ixa); 2539 return (NULL); 2540 } 2541 2542 espmp = allocb(esplen, BPRI_HI); 2543 if (espmp == NULL) { 2544 ESP_BUMP_STAT(espstack, out_discards); 2545 esp1dbg(espstack, ("esp_outbound: can't allocate espmp.\n")); 2546 ip_drop_packet(data_mp, B_FALSE, ill, 2547 DROPPER(ipss, ipds_esp_nomem), 2548 &espstack->esp_dropper); 2549 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2550 if (need_refrele) 2551 ixa_refrele(ixa); 2552 return (NULL); 2553 } 2554 espmp->b_wptr += esplen; 2555 esph_ptr = (esph_t *)espmp->b_rptr; 2556 2557 if (is_natt) { 2558 esp3dbg(espstack, ("esp_outbound: NATT")); 2559 2560 udpha = (udpha_t *)espmp->b_rptr; 2561 udpha->uha_src_port = (assoc->ipsa_local_nat_port != 0) ? 2562 assoc->ipsa_local_nat_port : htons(IPPORT_IKE_NATT); 2563 udpha->uha_dst_port = (assoc->ipsa_remote_nat_port != 0) ? 2564 assoc->ipsa_remote_nat_port : htons(IPPORT_IKE_NATT); 2565 /* 2566 * Set the checksum to 0, so that the esp_prepare_udp() call 2567 * can do the right thing. 2568 */ 2569 udpha->uha_checksum = 0; 2570 esph_ptr = (esph_t *)(udpha + 1); 2571 } 2572 2573 esph_ptr->esph_spi = assoc->ipsa_spi; 2574 2575 esph_ptr->esph_replay = htonl(atomic_inc_32_nv(&assoc->ipsa_replay)); 2576 if (esph_ptr->esph_replay == 0 && assoc->ipsa_replay_wsize != 0) { 2577 /* 2578 * XXX We have replay counter wrapping. 2579 * We probably want to nuke this SA (and its peer). 2580 */ 2581 ipsec_assocfailure(info.mi_idnum, 0, 0, 2582 SL_ERROR | SL_CONSOLE | SL_WARN, 2583 "Outbound ESP SA (0x%x, %s) has wrapped sequence.\n", 2584 esph_ptr->esph_spi, assoc->ipsa_dstaddr, af, 2585 espstack->ipsecesp_netstack); 2586 2587 ESP_BUMP_STAT(espstack, out_discards); 2588 sadb_replay_delete(assoc); 2589 ip_drop_packet(data_mp, B_FALSE, ill, 2590 DROPPER(ipss, ipds_esp_replay), 2591 &espstack->esp_dropper); 2592 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2593 if (need_refrele) 2594 ixa_refrele(ixa); 2595 return (NULL); 2596 } 2597 2598 iv_ptr = (esph_ptr + 1); 2599 /* 2600 * iv_ptr points to the mblk which will contain the IV once we have 2601 * written it there. This mblk will be part of a mblk chain that 2602 * will make up the packet. 2603 * 2604 * For counter mode algorithms, the IV is a 64 bit quantity, it 2605 * must NEVER repeat in the lifetime of the SA, otherwise an 2606 * attacker who had recorded enough packets might be able to 2607 * determine some clear text. 2608 * 2609 * To ensure this does not happen, the IV is stored in the SA and 2610 * incremented for each packet, the IV is then copied into the 2611 * "packet" for transmission to the receiving system. The IV will 2612 * also be copied into the nonce, when the packet is encrypted. 2613 * 2614 * CBC mode algorithms use a random IV for each packet. We do not 2615 * require the highest quality random bits, but for best security 2616 * with CBC mode ciphers, the value must be unlikely to repeat and 2617 * must not be known in advance to an adversary capable of influencing 2618 * the clear text. 2619 */ 2620 if (!update_iv((uint8_t *)iv_ptr, espstack->esp_pfkey_q, assoc, 2621 espstack)) { 2622 ip_drop_packet(data_mp, B_FALSE, ill, 2623 DROPPER(ipss, ipds_esp_iv_wrap), &espstack->esp_dropper); 2624 if (need_refrele) 2625 ixa_refrele(ixa); 2626 return (NULL); 2627 } 2628 2629 /* Fix the IP header. */ 2630 alloclen = padlen + 2 + mac_len; 2631 adj = alloclen + (espmp->b_wptr - espmp->b_rptr); 2632 2633 protocol = *nhp; 2634 2635 if (ixa->ixa_flags & IXAF_IS_IPV4) { 2636 ipha->ipha_length = htons(ntohs(ipha->ipha_length) + adj); 2637 if (is_natt) { 2638 *nhp = IPPROTO_UDP; 2639 udpha->uha_length = htons(ntohs(ipha->ipha_length) - 2640 IPH_HDR_LENGTH(ipha)); 2641 } else { 2642 *nhp = IPPROTO_ESP; 2643 } 2644 ipha->ipha_hdr_checksum = 0; 2645 ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha); 2646 } else { 2647 ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) + adj); 2648 *nhp = IPPROTO_ESP; 2649 } 2650 2651 /* I've got the two ESP mblks, now insert them. */ 2652 2653 esp2dbg(espstack, ("data_mp before outbound ESP adjustment:\n")); 2654 esp2dbg(espstack, (dump_msg(data_mp))); 2655 2656 if (!esp_insert_esp(data_mp, espmp, divpoint, espstack)) { 2657 ESP_BUMP_STAT(espstack, out_discards); 2658 /* NOTE: esp_insert_esp() only fails if there's no memory. */ 2659 ip_drop_packet(data_mp, B_FALSE, ill, 2660 DROPPER(ipss, ipds_esp_nomem), 2661 &espstack->esp_dropper); 2662 freeb(espmp); 2663 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2664 if (need_refrele) 2665 ixa_refrele(ixa); 2666 return (NULL); 2667 } 2668 2669 /* Append padding (and leave room for ICV). */ 2670 for (tailmp = data_mp; tailmp->b_cont != NULL; tailmp = tailmp->b_cont) 2671 ; 2672 if (tailmp->b_wptr + alloclen > tailmp->b_datap->db_lim) { 2673 tailmp->b_cont = allocb(alloclen, BPRI_HI); 2674 if (tailmp->b_cont == NULL) { 2675 ESP_BUMP_STAT(espstack, out_discards); 2676 esp0dbg(("esp_outbound: Can't allocate tailmp.\n")); 2677 ip_drop_packet(data_mp, B_FALSE, ill, 2678 DROPPER(ipss, ipds_esp_nomem), 2679 &espstack->esp_dropper); 2680 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2681 if (need_refrele) 2682 ixa_refrele(ixa); 2683 return (NULL); 2684 } 2685 tailmp = tailmp->b_cont; 2686 } 2687 2688 /* 2689 * If there's padding, N bytes of padding must be of the form 0x1, 2690 * 0x2, 0x3... 0xN. 2691 */ 2692 for (i = 0; i < padlen; ) { 2693 i++; 2694 *tailmp->b_wptr++ = i; 2695 } 2696 *tailmp->b_wptr++ = i; 2697 *tailmp->b_wptr++ = protocol; 2698 2699 esp2dbg(espstack, ("data_Mp before encryption:\n")); 2700 esp2dbg(espstack, (dump_msg(data_mp))); 2701 2702 /* 2703 * Okay. I've set up the pre-encryption ESP. Let's do it! 2704 */ 2705 2706 if (mac_len > 0) { 2707 ASSERT(tailmp->b_wptr + mac_len <= tailmp->b_datap->db_lim); 2708 icv_buf = tailmp->b_wptr; 2709 tailmp->b_wptr += mac_len; 2710 } else { 2711 icv_buf = NULL; 2712 } 2713 2714 data_mp = esp_submit_req_outbound(data_mp, ixa, assoc, icv_buf, 2715 datalen + padlen + 2); 2716 if (need_refrele) 2717 ixa_refrele(ixa); 2718 return (data_mp); 2719 } 2720 2721 /* 2722 * IP calls this to validate the ICMP errors that 2723 * we got from the network. 2724 */ 2725 mblk_t * 2726 ipsecesp_icmp_error(mblk_t *data_mp, ip_recv_attr_t *ira) 2727 { 2728 netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack; 2729 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 2730 ipsec_stack_t *ipss = ns->netstack_ipsec; 2731 2732 /* 2733 * Unless we get an entire packet back, this function is useless. 2734 * Why? 2735 * 2736 * 1.) Partial packets are useless, because the "next header" 2737 * is at the end of the decrypted ESP packet. Without the 2738 * whole packet, this is useless. 2739 * 2740 * 2.) If we every use a stateful cipher, such as a stream or a 2741 * one-time pad, we can't do anything. 2742 * 2743 * Since the chances of us getting an entire packet back are very 2744 * very small, we discard here. 2745 */ 2746 IP_ESP_BUMP_STAT(ipss, in_discards); 2747 ip_drop_packet(data_mp, B_TRUE, ira->ira_ill, 2748 DROPPER(ipss, ipds_esp_icmp), 2749 &espstack->esp_dropper); 2750 return (NULL); 2751 } 2752 2753 /* 2754 * Construct an SADB_REGISTER message with the current algorithms. 2755 * This function gets called when 'ipsecalgs -s' is run or when 2756 * in.iked (or other KMD) starts. 2757 */ 2758 static boolean_t 2759 esp_register_out(uint32_t sequence, uint32_t pid, uint_t serial, 2760 ipsecesp_stack_t *espstack, cred_t *cr) 2761 { 2762 mblk_t *pfkey_msg_mp, *keysock_out_mp; 2763 sadb_msg_t *samsg; 2764 sadb_supported_t *sasupp_auth = NULL; 2765 sadb_supported_t *sasupp_encr = NULL; 2766 sadb_alg_t *saalg; 2767 uint_t allocsize = sizeof (*samsg); 2768 uint_t i, numalgs_snap; 2769 int current_aalgs; 2770 ipsec_alginfo_t **authalgs; 2771 uint_t num_aalgs; 2772 int current_ealgs; 2773 ipsec_alginfo_t **encralgs; 2774 uint_t num_ealgs; 2775 ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec; 2776 sadb_sens_t *sens; 2777 size_t sens_len = 0; 2778 sadb_ext_t *nextext; 2779 ts_label_t *sens_tsl = NULL; 2780 2781 /* Allocate the KEYSOCK_OUT. */ 2782 keysock_out_mp = sadb_keysock_out(serial); 2783 if (keysock_out_mp == NULL) { 2784 esp0dbg(("esp_register_out: couldn't allocate mblk.\n")); 2785 return (B_FALSE); 2786 } 2787 2788 if (is_system_labeled() && (cr != NULL)) { 2789 sens_tsl = crgetlabel(cr); 2790 if (sens_tsl != NULL) { 2791 sens_len = sadb_sens_len_from_label(sens_tsl); 2792 allocsize += sens_len; 2793 } 2794 } 2795 2796 /* 2797 * Allocate the PF_KEY message that follows KEYSOCK_OUT. 2798 */ 2799 2800 rw_enter(&ipss->ipsec_alg_lock, RW_READER); 2801 /* 2802 * Fill SADB_REGISTER message's algorithm descriptors. Hold 2803 * down the lock while filling it. 2804 * 2805 * Return only valid algorithms, so the number of algorithms 2806 * to send up may be less than the number of algorithm entries 2807 * in the table. 2808 */ 2809 authalgs = ipss->ipsec_alglists[IPSEC_ALG_AUTH]; 2810 for (num_aalgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++) 2811 if (authalgs[i] != NULL && ALG_VALID(authalgs[i])) 2812 num_aalgs++; 2813 2814 if (num_aalgs != 0) { 2815 allocsize += (num_aalgs * sizeof (*saalg)); 2816 allocsize += sizeof (*sasupp_auth); 2817 } 2818 encralgs = ipss->ipsec_alglists[IPSEC_ALG_ENCR]; 2819 for (num_ealgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++) 2820 if (encralgs[i] != NULL && ALG_VALID(encralgs[i])) 2821 num_ealgs++; 2822 2823 if (num_ealgs != 0) { 2824 allocsize += (num_ealgs * sizeof (*saalg)); 2825 allocsize += sizeof (*sasupp_encr); 2826 } 2827 keysock_out_mp->b_cont = allocb(allocsize, BPRI_HI); 2828 if (keysock_out_mp->b_cont == NULL) { 2829 rw_exit(&ipss->ipsec_alg_lock); 2830 freemsg(keysock_out_mp); 2831 return (B_FALSE); 2832 } 2833 pfkey_msg_mp = keysock_out_mp->b_cont; 2834 pfkey_msg_mp->b_wptr += allocsize; 2835 2836 nextext = (sadb_ext_t *)(pfkey_msg_mp->b_rptr + sizeof (*samsg)); 2837 2838 if (num_aalgs != 0) { 2839 sasupp_auth = (sadb_supported_t *)nextext; 2840 saalg = (sadb_alg_t *)(sasupp_auth + 1); 2841 2842 ASSERT(((ulong_t)saalg & 0x7) == 0); 2843 2844 numalgs_snap = 0; 2845 for (i = 0; 2846 ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_aalgs)); 2847 i++) { 2848 if (authalgs[i] == NULL || !ALG_VALID(authalgs[i])) 2849 continue; 2850 2851 saalg->sadb_alg_id = authalgs[i]->alg_id; 2852 saalg->sadb_alg_ivlen = 0; 2853 saalg->sadb_alg_minbits = authalgs[i]->alg_ef_minbits; 2854 saalg->sadb_alg_maxbits = authalgs[i]->alg_ef_maxbits; 2855 saalg->sadb_x_alg_increment = 2856 authalgs[i]->alg_increment; 2857 saalg->sadb_x_alg_saltbits = SADB_8TO1( 2858 authalgs[i]->alg_saltlen); 2859 numalgs_snap++; 2860 saalg++; 2861 } 2862 ASSERT(numalgs_snap == num_aalgs); 2863 #ifdef DEBUG 2864 /* 2865 * Reality check to make sure I snagged all of the 2866 * algorithms. 2867 */ 2868 for (; i < IPSEC_MAX_ALGS; i++) { 2869 if (authalgs[i] != NULL && ALG_VALID(authalgs[i])) { 2870 cmn_err(CE_PANIC, "esp_register_out()! " 2871 "Missed aalg #%d.\n", i); 2872 } 2873 } 2874 #endif /* DEBUG */ 2875 nextext = (sadb_ext_t *)saalg; 2876 } 2877 2878 if (num_ealgs != 0) { 2879 sasupp_encr = (sadb_supported_t *)nextext; 2880 saalg = (sadb_alg_t *)(sasupp_encr + 1); 2881 2882 numalgs_snap = 0; 2883 for (i = 0; 2884 ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_ealgs)); i++) { 2885 if (encralgs[i] == NULL || !ALG_VALID(encralgs[i])) 2886 continue; 2887 saalg->sadb_alg_id = encralgs[i]->alg_id; 2888 saalg->sadb_alg_ivlen = encralgs[i]->alg_ivlen; 2889 saalg->sadb_alg_minbits = encralgs[i]->alg_ef_minbits; 2890 saalg->sadb_alg_maxbits = encralgs[i]->alg_ef_maxbits; 2891 /* 2892 * We could advertise the ICV length, except there 2893 * is not a value in sadb_x_algb to do this. 2894 * saalg->sadb_alg_maclen = encralgs[i]->alg_maclen; 2895 */ 2896 saalg->sadb_x_alg_increment = 2897 encralgs[i]->alg_increment; 2898 saalg->sadb_x_alg_saltbits = 2899 SADB_8TO1(encralgs[i]->alg_saltlen); 2900 2901 numalgs_snap++; 2902 saalg++; 2903 } 2904 ASSERT(numalgs_snap == num_ealgs); 2905 #ifdef DEBUG 2906 /* 2907 * Reality check to make sure I snagged all of the 2908 * algorithms. 2909 */ 2910 for (; i < IPSEC_MAX_ALGS; i++) { 2911 if (encralgs[i] != NULL && ALG_VALID(encralgs[i])) { 2912 cmn_err(CE_PANIC, "esp_register_out()! " 2913 "Missed ealg #%d.\n", i); 2914 } 2915 } 2916 #endif /* DEBUG */ 2917 nextext = (sadb_ext_t *)saalg; 2918 } 2919 2920 current_aalgs = num_aalgs; 2921 current_ealgs = num_ealgs; 2922 2923 rw_exit(&ipss->ipsec_alg_lock); 2924 2925 if (sens_tsl != NULL) { 2926 sens = (sadb_sens_t *)nextext; 2927 sadb_sens_from_label(sens, SADB_EXT_SENSITIVITY, 2928 sens_tsl, sens_len); 2929 2930 nextext = (sadb_ext_t *)(((uint8_t *)sens) + sens_len); 2931 } 2932 2933 /* Now fill the rest of the SADB_REGISTER message. */ 2934 2935 samsg = (sadb_msg_t *)pfkey_msg_mp->b_rptr; 2936 samsg->sadb_msg_version = PF_KEY_V2; 2937 samsg->sadb_msg_type = SADB_REGISTER; 2938 samsg->sadb_msg_errno = 0; 2939 samsg->sadb_msg_satype = SADB_SATYPE_ESP; 2940 samsg->sadb_msg_len = SADB_8TO64(allocsize); 2941 samsg->sadb_msg_reserved = 0; 2942 /* 2943 * Assume caller has sufficient sequence/pid number info. If it's one 2944 * from me over a new alg., I could give two hoots about sequence. 2945 */ 2946 samsg->sadb_msg_seq = sequence; 2947 samsg->sadb_msg_pid = pid; 2948 2949 if (sasupp_auth != NULL) { 2950 sasupp_auth->sadb_supported_len = SADB_8TO64( 2951 sizeof (*sasupp_auth) + sizeof (*saalg) * current_aalgs); 2952 sasupp_auth->sadb_supported_exttype = SADB_EXT_SUPPORTED_AUTH; 2953 sasupp_auth->sadb_supported_reserved = 0; 2954 } 2955 2956 if (sasupp_encr != NULL) { 2957 sasupp_encr->sadb_supported_len = SADB_8TO64( 2958 sizeof (*sasupp_encr) + sizeof (*saalg) * current_ealgs); 2959 sasupp_encr->sadb_supported_exttype = 2960 SADB_EXT_SUPPORTED_ENCRYPT; 2961 sasupp_encr->sadb_supported_reserved = 0; 2962 } 2963 2964 if (espstack->esp_pfkey_q != NULL) 2965 putnext(espstack->esp_pfkey_q, keysock_out_mp); 2966 else { 2967 freemsg(keysock_out_mp); 2968 return (B_FALSE); 2969 } 2970 2971 return (B_TRUE); 2972 } 2973 2974 /* 2975 * Invoked when the algorithm table changes. Causes SADB_REGISTER 2976 * messages continaining the current list of algorithms to be 2977 * sent up to the ESP listeners. 2978 */ 2979 void 2980 ipsecesp_algs_changed(netstack_t *ns) 2981 { 2982 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 2983 2984 /* 2985 * Time to send a PF_KEY SADB_REGISTER message to ESP listeners 2986 * everywhere. (The function itself checks for NULL esp_pfkey_q.) 2987 */ 2988 (void) esp_register_out(0, 0, 0, espstack, NULL); 2989 } 2990 2991 /* 2992 * Stub function that taskq_dispatch() invokes to take the mblk (in arg) 2993 * and send() it into ESP and IP again. 2994 */ 2995 static void 2996 inbound_task(void *arg) 2997 { 2998 mblk_t *mp = (mblk_t *)arg; 2999 mblk_t *async_mp; 3000 ip_recv_attr_t iras; 3001 3002 async_mp = mp; 3003 mp = async_mp->b_cont; 3004 async_mp->b_cont = NULL; 3005 if (!ip_recv_attr_from_mblk(async_mp, &iras)) { 3006 /* The ill or ip_stack_t disappeared on us */ 3007 ip_drop_input("ip_recv_attr_from_mblk", mp, NULL); 3008 freemsg(mp); 3009 goto done; 3010 } 3011 3012 esp_inbound_restart(mp, &iras); 3013 done: 3014 ira_cleanup(&iras, B_TRUE); 3015 } 3016 3017 /* 3018 * Restart ESP after the SA has been added. 3019 */ 3020 static void 3021 esp_inbound_restart(mblk_t *mp, ip_recv_attr_t *ira) 3022 { 3023 esph_t *esph; 3024 netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack; 3025 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 3026 3027 esp2dbg(espstack, ("in ESP inbound_task")); 3028 ASSERT(espstack != NULL); 3029 3030 mp = ipsec_inbound_esp_sa(mp, ira, &esph); 3031 if (mp == NULL) 3032 return; 3033 3034 ASSERT(esph != NULL); 3035 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 3036 ASSERT(ira->ira_ipsec_esp_sa != NULL); 3037 3038 mp = ira->ira_ipsec_esp_sa->ipsa_input_func(mp, esph, ira); 3039 if (mp == NULL) { 3040 /* 3041 * Either it failed or is pending. In the former case 3042 * ipIfStatsInDiscards was increased. 3043 */ 3044 return; 3045 } 3046 3047 ip_input_post_ipsec(mp, ira); 3048 } 3049 3050 /* 3051 * Now that weak-key passed, actually ADD the security association, and 3052 * send back a reply ADD message. 3053 */ 3054 static int 3055 esp_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi, 3056 int *diagnostic, ipsecesp_stack_t *espstack) 3057 { 3058 isaf_t *primary = NULL, *secondary; 3059 boolean_t clone = B_FALSE, is_inbound = B_FALSE; 3060 ipsa_t *larval = NULL; 3061 ipsacq_t *acqrec; 3062 iacqf_t *acq_bucket; 3063 mblk_t *acq_msgs = NULL; 3064 int rc; 3065 mblk_t *lpkt; 3066 int error; 3067 ipsa_query_t sq; 3068 ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec; 3069 3070 /* 3071 * Locate the appropriate table(s). 3072 */ 3073 sq.spp = &espstack->esp_sadb; /* XXX */ 3074 error = sadb_form_query(ksi, IPSA_Q_SA|IPSA_Q_DST, 3075 IPSA_Q_SA|IPSA_Q_DST|IPSA_Q_INBOUND|IPSA_Q_OUTBOUND, 3076 &sq, diagnostic); 3077 if (error) 3078 return (error); 3079 3080 /* 3081 * Use the direction flags provided by the KMD to determine 3082 * if the inbound or outbound table should be the primary 3083 * for this SA. If these flags were absent then make this 3084 * decision based on the addresses. 3085 */ 3086 if (sq.assoc->sadb_sa_flags & IPSA_F_INBOUND) { 3087 primary = sq.inbound; 3088 secondary = sq.outbound; 3089 is_inbound = B_TRUE; 3090 if (sq.assoc->sadb_sa_flags & IPSA_F_OUTBOUND) 3091 clone = B_TRUE; 3092 } else if (sq.assoc->sadb_sa_flags & IPSA_F_OUTBOUND) { 3093 primary = sq.outbound; 3094 secondary = sq.inbound; 3095 } 3096 3097 if (primary == NULL) { 3098 /* 3099 * The KMD did not set a direction flag, determine which 3100 * table to insert the SA into based on addresses. 3101 */ 3102 switch (ksi->ks_in_dsttype) { 3103 case KS_IN_ADDR_MBCAST: 3104 clone = B_TRUE; /* All mcast SAs can be bidirectional */ 3105 sq.assoc->sadb_sa_flags |= IPSA_F_OUTBOUND; 3106 /* FALLTHRU */ 3107 /* 3108 * If the source address is either one of mine, or unspecified 3109 * (which is best summed up by saying "not 'not mine'"), 3110 * then the association is potentially bi-directional, 3111 * in that it can be used for inbound traffic and outbound 3112 * traffic. The best example of such an SA is a multicast 3113 * SA (which allows me to receive the outbound traffic). 3114 */ 3115 case KS_IN_ADDR_ME: 3116 sq.assoc->sadb_sa_flags |= IPSA_F_INBOUND; 3117 primary = sq.inbound; 3118 secondary = sq.outbound; 3119 if (ksi->ks_in_srctype != KS_IN_ADDR_NOTME) 3120 clone = B_TRUE; 3121 is_inbound = B_TRUE; 3122 break; 3123 /* 3124 * If the source address literally not mine (either 3125 * unspecified or not mine), then this SA may have an 3126 * address that WILL be mine after some configuration. 3127 * We pay the price for this by making it a bi-directional 3128 * SA. 3129 */ 3130 case KS_IN_ADDR_NOTME: 3131 sq.assoc->sadb_sa_flags |= IPSA_F_OUTBOUND; 3132 primary = sq.outbound; 3133 secondary = sq.inbound; 3134 if (ksi->ks_in_srctype != KS_IN_ADDR_ME) { 3135 sq.assoc->sadb_sa_flags |= IPSA_F_INBOUND; 3136 clone = B_TRUE; 3137 } 3138 break; 3139 default: 3140 *diagnostic = SADB_X_DIAGNOSTIC_BAD_DST; 3141 return (EINVAL); 3142 } 3143 } 3144 3145 /* 3146 * Find a ACQUIRE list entry if possible. If we've added an SA that 3147 * suits the needs of an ACQUIRE list entry, we can eliminate the 3148 * ACQUIRE list entry and transmit the enqueued packets. Use the 3149 * high-bit of the sequence number to queue it. Key off destination 3150 * addr, and change acqrec's state. 3151 */ 3152 3153 if (samsg->sadb_msg_seq & IACQF_LOWEST_SEQ) { 3154 acq_bucket = &(sq.sp->sdb_acq[sq.outhash]); 3155 mutex_enter(&acq_bucket->iacqf_lock); 3156 for (acqrec = acq_bucket->iacqf_ipsacq; acqrec != NULL; 3157 acqrec = acqrec->ipsacq_next) { 3158 mutex_enter(&acqrec->ipsacq_lock); 3159 /* 3160 * Q: I only check sequence. Should I check dst? 3161 * A: Yes, check dest because those are the packets 3162 * that are queued up. 3163 */ 3164 if (acqrec->ipsacq_seq == samsg->sadb_msg_seq && 3165 IPSA_ARE_ADDR_EQUAL(sq.dstaddr, 3166 acqrec->ipsacq_dstaddr, acqrec->ipsacq_addrfam)) 3167 break; 3168 mutex_exit(&acqrec->ipsacq_lock); 3169 } 3170 if (acqrec != NULL) { 3171 /* 3172 * AHA! I found an ACQUIRE record for this SA. 3173 * Grab the msg list, and free the acquire record. 3174 * I already am holding the lock for this record, 3175 * so all I have to do is free it. 3176 */ 3177 acq_msgs = acqrec->ipsacq_mp; 3178 acqrec->ipsacq_mp = NULL; 3179 mutex_exit(&acqrec->ipsacq_lock); 3180 sadb_destroy_acquire(acqrec, 3181 espstack->ipsecesp_netstack); 3182 } 3183 mutex_exit(&acq_bucket->iacqf_lock); 3184 } 3185 3186 /* 3187 * Find PF_KEY message, and see if I'm an update. If so, find entry 3188 * in larval list (if there). 3189 */ 3190 if (samsg->sadb_msg_type == SADB_UPDATE) { 3191 mutex_enter(&sq.inbound->isaf_lock); 3192 larval = ipsec_getassocbyspi(sq.inbound, sq.assoc->sadb_sa_spi, 3193 ALL_ZEROES_PTR, sq.dstaddr, sq.dst->sin_family); 3194 mutex_exit(&sq.inbound->isaf_lock); 3195 3196 if ((larval == NULL) || 3197 (larval->ipsa_state != IPSA_STATE_LARVAL)) { 3198 *diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND; 3199 if (larval != NULL) { 3200 IPSA_REFRELE(larval); 3201 } 3202 esp0dbg(("Larval update, but larval disappeared.\n")); 3203 return (ESRCH); 3204 } /* Else sadb_common_add unlinks it for me! */ 3205 } 3206 3207 if (larval != NULL) { 3208 /* 3209 * Hold again, because sadb_common_add() consumes a reference, 3210 * and we don't want to clear_lpkt() without a reference. 3211 */ 3212 IPSA_REFHOLD(larval); 3213 } 3214 3215 rc = sadb_common_add(espstack->esp_pfkey_q, 3216 mp, samsg, ksi, primary, secondary, larval, clone, is_inbound, 3217 diagnostic, espstack->ipsecesp_netstack, &espstack->esp_sadb); 3218 3219 if (larval != NULL) { 3220 if (rc == 0) { 3221 lpkt = sadb_clear_lpkt(larval); 3222 if (lpkt != NULL) { 3223 rc = !taskq_dispatch(esp_taskq, inbound_task, 3224 lpkt, TQ_NOSLEEP); 3225 } 3226 } 3227 IPSA_REFRELE(larval); 3228 } 3229 3230 /* 3231 * How much more stack will I create with all of these 3232 * esp_outbound() calls? 3233 */ 3234 3235 /* Handle the packets queued waiting for the SA */ 3236 while (acq_msgs != NULL) { 3237 mblk_t *asyncmp; 3238 mblk_t *data_mp; 3239 ip_xmit_attr_t ixas; 3240 ill_t *ill; 3241 3242 asyncmp = acq_msgs; 3243 acq_msgs = acq_msgs->b_next; 3244 asyncmp->b_next = NULL; 3245 3246 /* 3247 * Extract the ip_xmit_attr_t from the first mblk. 3248 * Verifies that the netstack and ill is still around; could 3249 * have vanished while iked was doing its work. 3250 * On succesful return we have a nce_t and the ill/ipst can't 3251 * disappear until we do the nce_refrele in ixa_cleanup. 3252 */ 3253 data_mp = asyncmp->b_cont; 3254 asyncmp->b_cont = NULL; 3255 if (!ip_xmit_attr_from_mblk(asyncmp, &ixas)) { 3256 ESP_BUMP_STAT(espstack, out_discards); 3257 ip_drop_packet(data_mp, B_FALSE, NULL, 3258 DROPPER(ipss, ipds_sadb_acquire_timeout), 3259 &espstack->esp_dropper); 3260 } else if (rc != 0) { 3261 ill = ixas.ixa_nce->nce_ill; 3262 ESP_BUMP_STAT(espstack, out_discards); 3263 ip_drop_packet(data_mp, B_FALSE, ill, 3264 DROPPER(ipss, ipds_sadb_acquire_timeout), 3265 &espstack->esp_dropper); 3266 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 3267 } else { 3268 esp_outbound_finish(data_mp, &ixas); 3269 } 3270 ixa_cleanup(&ixas); 3271 } 3272 3273 return (rc); 3274 } 3275 3276 /* 3277 * Process one of the queued messages (from ipsacq_mp) once the SA 3278 * has been added. 3279 */ 3280 static void 3281 esp_outbound_finish(mblk_t *data_mp, ip_xmit_attr_t *ixa) 3282 { 3283 netstack_t *ns = ixa->ixa_ipst->ips_netstack; 3284 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 3285 ipsec_stack_t *ipss = ns->netstack_ipsec; 3286 ill_t *ill = ixa->ixa_nce->nce_ill; 3287 3288 if (!ipsec_outbound_sa(data_mp, ixa, IPPROTO_ESP)) { 3289 ESP_BUMP_STAT(espstack, out_discards); 3290 ip_drop_packet(data_mp, B_FALSE, ill, 3291 DROPPER(ipss, ipds_sadb_acquire_timeout), 3292 &espstack->esp_dropper); 3293 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 3294 return; 3295 } 3296 3297 data_mp = esp_outbound(data_mp, ixa); 3298 if (data_mp == NULL) 3299 return; 3300 3301 /* do AH processing if needed */ 3302 data_mp = esp_do_outbound_ah(data_mp, ixa); 3303 if (data_mp == NULL) 3304 return; 3305 3306 (void) ip_output_post_ipsec(data_mp, ixa); 3307 } 3308 3309 /* 3310 * Add new ESP security association. This may become a generic AH/ESP 3311 * routine eventually. 3312 */ 3313 static int 3314 esp_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, netstack_t *ns) 3315 { 3316 sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA]; 3317 sadb_address_t *srcext = 3318 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC]; 3319 sadb_address_t *dstext = 3320 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST]; 3321 sadb_address_t *isrcext = 3322 (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC]; 3323 sadb_address_t *idstext = 3324 (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST]; 3325 sadb_address_t *nttext_loc = 3326 (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC]; 3327 sadb_address_t *nttext_rem = 3328 (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM]; 3329 sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH]; 3330 sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT]; 3331 struct sockaddr_in *src, *dst; 3332 struct sockaddr_in *natt_loc, *natt_rem; 3333 struct sockaddr_in6 *natt_loc6, *natt_rem6; 3334 sadb_lifetime_t *soft = 3335 (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT]; 3336 sadb_lifetime_t *hard = 3337 (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD]; 3338 sadb_lifetime_t *idle = 3339 (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE]; 3340 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 3341 ipsec_stack_t *ipss = ns->netstack_ipsec; 3342 3343 3344 3345 /* I need certain extensions present for an ADD message. */ 3346 if (srcext == NULL) { 3347 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC; 3348 return (EINVAL); 3349 } 3350 if (dstext == NULL) { 3351 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST; 3352 return (EINVAL); 3353 } 3354 if (isrcext == NULL && idstext != NULL) { 3355 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC; 3356 return (EINVAL); 3357 } 3358 if (isrcext != NULL && idstext == NULL) { 3359 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST; 3360 return (EINVAL); 3361 } 3362 if (assoc == NULL) { 3363 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA; 3364 return (EINVAL); 3365 } 3366 if (ekey == NULL && assoc->sadb_sa_encrypt != SADB_EALG_NULL) { 3367 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_EKEY; 3368 return (EINVAL); 3369 } 3370 3371 src = (struct sockaddr_in *)(srcext + 1); 3372 dst = (struct sockaddr_in *)(dstext + 1); 3373 natt_loc = (struct sockaddr_in *)(nttext_loc + 1); 3374 natt_loc6 = (struct sockaddr_in6 *)(nttext_loc + 1); 3375 natt_rem = (struct sockaddr_in *)(nttext_rem + 1); 3376 natt_rem6 = (struct sockaddr_in6 *)(nttext_rem + 1); 3377 3378 /* Sundry ADD-specific reality checks. */ 3379 /* XXX STATS : Logging/stats here? */ 3380 3381 if ((assoc->sadb_sa_state != SADB_SASTATE_MATURE) && 3382 (assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE_ELSEWHERE)) { 3383 *diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE; 3384 return (EINVAL); 3385 } 3386 if (assoc->sadb_sa_encrypt == SADB_EALG_NONE) { 3387 *diagnostic = SADB_X_DIAGNOSTIC_BAD_EALG; 3388 return (EINVAL); 3389 } 3390 3391 #ifndef IPSEC_LATENCY_TEST 3392 if (assoc->sadb_sa_encrypt == SADB_EALG_NULL && 3393 assoc->sadb_sa_auth == SADB_AALG_NONE) { 3394 *diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG; 3395 return (EINVAL); 3396 } 3397 #endif 3398 3399 if (assoc->sadb_sa_flags & ~espstack->esp_sadb.s_addflags) { 3400 *diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS; 3401 return (EINVAL); 3402 } 3403 3404 if ((*diagnostic = sadb_hardsoftchk(hard, soft, idle)) != 0) { 3405 return (EINVAL); 3406 } 3407 ASSERT(src->sin_family == dst->sin_family); 3408 3409 if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_NATT_LOC) { 3410 if (nttext_loc == NULL) { 3411 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_LOC; 3412 return (EINVAL); 3413 } 3414 3415 if (natt_loc->sin_family == AF_INET6 && 3416 !IN6_IS_ADDR_V4MAPPED(&natt_loc6->sin6_addr)) { 3417 *diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_NATT_LOC; 3418 return (EINVAL); 3419 } 3420 } 3421 3422 if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_NATT_REM) { 3423 if (nttext_rem == NULL) { 3424 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_REM; 3425 return (EINVAL); 3426 } 3427 if (natt_rem->sin_family == AF_INET6 && 3428 !IN6_IS_ADDR_V4MAPPED(&natt_rem6->sin6_addr)) { 3429 *diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_NATT_REM; 3430 return (EINVAL); 3431 } 3432 } 3433 3434 3435 /* Stuff I don't support, for now. XXX Diagnostic? */ 3436 if (ksi->ks_in_extv[SADB_EXT_LIFETIME_CURRENT] != NULL) 3437 return (EOPNOTSUPP); 3438 3439 if ((*diagnostic = sadb_labelchk(ksi)) != 0) 3440 return (EINVAL); 3441 3442 /* 3443 * XXX Policy : I'm not checking identities at this time, 3444 * but if I did, I'd do them here, before I sent 3445 * the weak key check up to the algorithm. 3446 */ 3447 3448 rw_enter(&ipss->ipsec_alg_lock, RW_READER); 3449 3450 /* 3451 * First locate the authentication algorithm. 3452 */ 3453 #ifdef IPSEC_LATENCY_TEST 3454 if (akey != NULL && assoc->sadb_sa_auth != SADB_AALG_NONE) { 3455 #else 3456 if (akey != NULL) { 3457 #endif 3458 ipsec_alginfo_t *aalg; 3459 3460 aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH] 3461 [assoc->sadb_sa_auth]; 3462 if (aalg == NULL || !ALG_VALID(aalg)) { 3463 rw_exit(&ipss->ipsec_alg_lock); 3464 esp1dbg(espstack, ("Couldn't find auth alg #%d.\n", 3465 assoc->sadb_sa_auth)); 3466 *diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG; 3467 return (EINVAL); 3468 } 3469 3470 /* 3471 * Sanity check key sizes. 3472 * Note: It's not possible to use SADB_AALG_NONE because 3473 * this auth_alg is not defined with ALG_FLAG_VALID. If this 3474 * ever changes, the same check for SADB_AALG_NONE and 3475 * a auth_key != NULL should be made here ( see below). 3476 */ 3477 if (!ipsec_valid_key_size(akey->sadb_key_bits, aalg)) { 3478 rw_exit(&ipss->ipsec_alg_lock); 3479 *diagnostic = SADB_X_DIAGNOSTIC_BAD_AKEYBITS; 3480 return (EINVAL); 3481 } 3482 ASSERT(aalg->alg_mech_type != CRYPTO_MECHANISM_INVALID); 3483 3484 /* check key and fix parity if needed */ 3485 if (ipsec_check_key(aalg->alg_mech_type, akey, B_TRUE, 3486 diagnostic) != 0) { 3487 rw_exit(&ipss->ipsec_alg_lock); 3488 return (EINVAL); 3489 } 3490 } 3491 3492 /* 3493 * Then locate the encryption algorithm. 3494 */ 3495 if (ekey != NULL) { 3496 uint_t keybits; 3497 ipsec_alginfo_t *ealg; 3498 3499 ealg = ipss->ipsec_alglists[IPSEC_ALG_ENCR] 3500 [assoc->sadb_sa_encrypt]; 3501 if (ealg == NULL || !ALG_VALID(ealg)) { 3502 rw_exit(&ipss->ipsec_alg_lock); 3503 esp1dbg(espstack, ("Couldn't find encr alg #%d.\n", 3504 assoc->sadb_sa_encrypt)); 3505 *diagnostic = SADB_X_DIAGNOSTIC_BAD_EALG; 3506 return (EINVAL); 3507 } 3508 3509 /* 3510 * Sanity check key sizes. If the encryption algorithm is 3511 * SADB_EALG_NULL but the encryption key is NOT 3512 * NULL then complain. 3513 * 3514 * The keying material includes salt bits if required by 3515 * algorithm and optionally the Initial IV, check the 3516 * length of whats left. 3517 */ 3518 keybits = ekey->sadb_key_bits; 3519 keybits -= ekey->sadb_key_reserved; 3520 keybits -= SADB_8TO1(ealg->alg_saltlen); 3521 if ((assoc->sadb_sa_encrypt == SADB_EALG_NULL) || 3522 (!ipsec_valid_key_size(keybits, ealg))) { 3523 rw_exit(&ipss->ipsec_alg_lock); 3524 *diagnostic = SADB_X_DIAGNOSTIC_BAD_EKEYBITS; 3525 return (EINVAL); 3526 } 3527 ASSERT(ealg->alg_mech_type != CRYPTO_MECHANISM_INVALID); 3528 3529 /* check key */ 3530 if (ipsec_check_key(ealg->alg_mech_type, ekey, B_FALSE, 3531 diagnostic) != 0) { 3532 rw_exit(&ipss->ipsec_alg_lock); 3533 return (EINVAL); 3534 } 3535 } 3536 rw_exit(&ipss->ipsec_alg_lock); 3537 3538 return (esp_add_sa_finish(mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi, 3539 diagnostic, espstack)); 3540 } 3541 3542 /* 3543 * Update a security association. Updates come in two varieties. The first 3544 * is an update of lifetimes on a non-larval SA. The second is an update of 3545 * a larval SA, which ends up looking a lot more like an add. 3546 */ 3547 static int 3548 esp_update_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, 3549 ipsecesp_stack_t *espstack, uint8_t sadb_msg_type) 3550 { 3551 sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA]; 3552 mblk_t *buf_pkt; 3553 int rcode; 3554 3555 sadb_address_t *dstext = 3556 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST]; 3557 3558 if (dstext == NULL) { 3559 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST; 3560 return (EINVAL); 3561 } 3562 3563 rcode = sadb_update_sa(mp, ksi, &buf_pkt, &espstack->esp_sadb, 3564 diagnostic, espstack->esp_pfkey_q, esp_add_sa, 3565 espstack->ipsecesp_netstack, sadb_msg_type); 3566 3567 if ((assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE) || 3568 (rcode != 0)) { 3569 return (rcode); 3570 } 3571 3572 HANDLE_BUF_PKT(esp_taskq, espstack->ipsecesp_netstack->netstack_ipsec, 3573 espstack->esp_dropper, buf_pkt); 3574 3575 return (rcode); 3576 } 3577 3578 /* XXX refactor me */ 3579 /* 3580 * Delete a security association. This is REALLY likely to be code common to 3581 * both AH and ESP. Find the association, then unlink it. 3582 */ 3583 static int 3584 esp_del_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, 3585 ipsecesp_stack_t *espstack, uint8_t sadb_msg_type) 3586 { 3587 sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA]; 3588 sadb_address_t *dstext = 3589 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST]; 3590 sadb_address_t *srcext = 3591 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC]; 3592 struct sockaddr_in *sin; 3593 3594 if (assoc == NULL) { 3595 if (dstext != NULL) { 3596 sin = (struct sockaddr_in *)(dstext + 1); 3597 } else if (srcext != NULL) { 3598 sin = (struct sockaddr_in *)(srcext + 1); 3599 } else { 3600 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA; 3601 return (EINVAL); 3602 } 3603 return (sadb_purge_sa(mp, ksi, 3604 (sin->sin_family == AF_INET6) ? &espstack->esp_sadb.s_v6 : 3605 &espstack->esp_sadb.s_v4, diagnostic, 3606 espstack->esp_pfkey_q)); 3607 } 3608 3609 return (sadb_delget_sa(mp, ksi, &espstack->esp_sadb, diagnostic, 3610 espstack->esp_pfkey_q, sadb_msg_type)); 3611 } 3612 3613 /* XXX refactor me */ 3614 /* 3615 * Convert the entire contents of all of ESP's SA tables into PF_KEY SADB_DUMP 3616 * messages. 3617 */ 3618 static void 3619 esp_dump(mblk_t *mp, keysock_in_t *ksi, ipsecesp_stack_t *espstack) 3620 { 3621 int error; 3622 sadb_msg_t *samsg; 3623 3624 /* 3625 * Dump each fanout, bailing if error is non-zero. 3626 */ 3627 3628 error = sadb_dump(espstack->esp_pfkey_q, mp, ksi, 3629 &espstack->esp_sadb.s_v4); 3630 if (error != 0) 3631 goto bail; 3632 3633 error = sadb_dump(espstack->esp_pfkey_q, mp, ksi, 3634 &espstack->esp_sadb.s_v6); 3635 bail: 3636 ASSERT(mp->b_cont != NULL); 3637 samsg = (sadb_msg_t *)mp->b_cont->b_rptr; 3638 samsg->sadb_msg_errno = (uint8_t)error; 3639 sadb_pfkey_echo(espstack->esp_pfkey_q, mp, 3640 (sadb_msg_t *)mp->b_cont->b_rptr, ksi, NULL); 3641 } 3642 3643 /* 3644 * First-cut reality check for an inbound PF_KEY message. 3645 */ 3646 static boolean_t 3647 esp_pfkey_reality_failures(mblk_t *mp, keysock_in_t *ksi, 3648 ipsecesp_stack_t *espstack) 3649 { 3650 int diagnostic; 3651 3652 if (ksi->ks_in_extv[SADB_EXT_PROPOSAL] != NULL) { 3653 diagnostic = SADB_X_DIAGNOSTIC_PROP_PRESENT; 3654 goto badmsg; 3655 } 3656 if (ksi->ks_in_extv[SADB_EXT_SUPPORTED_AUTH] != NULL || 3657 ksi->ks_in_extv[SADB_EXT_SUPPORTED_ENCRYPT] != NULL) { 3658 diagnostic = SADB_X_DIAGNOSTIC_SUPP_PRESENT; 3659 goto badmsg; 3660 } 3661 return (B_FALSE); /* False ==> no failures */ 3662 3663 badmsg: 3664 sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL, diagnostic, 3665 ksi->ks_in_serial); 3666 return (B_TRUE); /* True ==> failures */ 3667 } 3668 3669 /* 3670 * ESP parsing of PF_KEY messages. Keysock did most of the really silly 3671 * error cases. What I receive is a fully-formed, syntactically legal 3672 * PF_KEY message. I then need to check semantics... 3673 * 3674 * This code may become common to AH and ESP. Stay tuned. 3675 * 3676 * I also make the assumption that db_ref's are cool. If this assumption 3677 * is wrong, this means that someone other than keysock or me has been 3678 * mucking with PF_KEY messages. 3679 */ 3680 static void 3681 esp_parse_pfkey(mblk_t *mp, ipsecesp_stack_t *espstack) 3682 { 3683 mblk_t *msg = mp->b_cont; 3684 sadb_msg_t *samsg; 3685 keysock_in_t *ksi; 3686 int error; 3687 int diagnostic = SADB_X_DIAGNOSTIC_NONE; 3688 3689 ASSERT(msg != NULL); 3690 3691 samsg = (sadb_msg_t *)msg->b_rptr; 3692 ksi = (keysock_in_t *)mp->b_rptr; 3693 3694 /* 3695 * If applicable, convert unspecified AF_INET6 to unspecified 3696 * AF_INET. And do other address reality checks. 3697 */ 3698 if (!sadb_addrfix(ksi, espstack->esp_pfkey_q, mp, 3699 espstack->ipsecesp_netstack) || 3700 esp_pfkey_reality_failures(mp, ksi, espstack)) { 3701 return; 3702 } 3703 3704 switch (samsg->sadb_msg_type) { 3705 case SADB_ADD: 3706 error = esp_add_sa(mp, ksi, &diagnostic, 3707 espstack->ipsecesp_netstack); 3708 if (error != 0) { 3709 sadb_pfkey_error(espstack->esp_pfkey_q, mp, error, 3710 diagnostic, ksi->ks_in_serial); 3711 } 3712 /* else esp_add_sa() took care of things. */ 3713 break; 3714 case SADB_DELETE: 3715 case SADB_X_DELPAIR: 3716 case SADB_X_DELPAIR_STATE: 3717 error = esp_del_sa(mp, ksi, &diagnostic, espstack, 3718 samsg->sadb_msg_type); 3719 if (error != 0) { 3720 sadb_pfkey_error(espstack->esp_pfkey_q, mp, error, 3721 diagnostic, ksi->ks_in_serial); 3722 } 3723 /* Else esp_del_sa() took care of things. */ 3724 break; 3725 case SADB_GET: 3726 error = sadb_delget_sa(mp, ksi, &espstack->esp_sadb, 3727 &diagnostic, espstack->esp_pfkey_q, samsg->sadb_msg_type); 3728 if (error != 0) { 3729 sadb_pfkey_error(espstack->esp_pfkey_q, mp, error, 3730 diagnostic, ksi->ks_in_serial); 3731 } 3732 /* Else sadb_get_sa() took care of things. */ 3733 break; 3734 case SADB_FLUSH: 3735 sadbp_flush(&espstack->esp_sadb, espstack->ipsecesp_netstack); 3736 sadb_pfkey_echo(espstack->esp_pfkey_q, mp, samsg, ksi, NULL); 3737 break; 3738 case SADB_REGISTER: 3739 /* 3740 * Hmmm, let's do it! Check for extensions (there should 3741 * be none), extract the fields, call esp_register_out(), 3742 * then either free or report an error. 3743 * 3744 * Keysock takes care of the PF_KEY bookkeeping for this. 3745 */ 3746 if (esp_register_out(samsg->sadb_msg_seq, samsg->sadb_msg_pid, 3747 ksi->ks_in_serial, espstack, msg_getcred(mp, NULL))) { 3748 freemsg(mp); 3749 } else { 3750 /* 3751 * Only way this path hits is if there is a memory 3752 * failure. It will not return B_FALSE because of 3753 * lack of esp_pfkey_q if I am in wput(). 3754 */ 3755 sadb_pfkey_error(espstack->esp_pfkey_q, mp, ENOMEM, 3756 diagnostic, ksi->ks_in_serial); 3757 } 3758 break; 3759 case SADB_UPDATE: 3760 case SADB_X_UPDATEPAIR: 3761 /* 3762 * Find a larval, if not there, find a full one and get 3763 * strict. 3764 */ 3765 error = esp_update_sa(mp, ksi, &diagnostic, espstack, 3766 samsg->sadb_msg_type); 3767 if (error != 0) { 3768 sadb_pfkey_error(espstack->esp_pfkey_q, mp, error, 3769 diagnostic, ksi->ks_in_serial); 3770 } 3771 /* else esp_update_sa() took care of things. */ 3772 break; 3773 case SADB_GETSPI: 3774 /* 3775 * Reserve a new larval entry. 3776 */ 3777 esp_getspi(mp, ksi, espstack); 3778 break; 3779 case SADB_ACQUIRE: 3780 /* 3781 * Find larval and/or ACQUIRE record and kill it (them), I'm 3782 * most likely an error. Inbound ACQUIRE messages should only 3783 * have the base header. 3784 */ 3785 sadb_in_acquire(samsg, &espstack->esp_sadb, 3786 espstack->esp_pfkey_q, espstack->ipsecesp_netstack); 3787 freemsg(mp); 3788 break; 3789 case SADB_DUMP: 3790 /* 3791 * Dump all entries. 3792 */ 3793 esp_dump(mp, ksi, espstack); 3794 /* esp_dump will take care of the return message, etc. */ 3795 break; 3796 case SADB_EXPIRE: 3797 /* Should never reach me. */ 3798 sadb_pfkey_error(espstack->esp_pfkey_q, mp, EOPNOTSUPP, 3799 diagnostic, ksi->ks_in_serial); 3800 break; 3801 default: 3802 sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL, 3803 SADB_X_DIAGNOSTIC_UNKNOWN_MSG, ksi->ks_in_serial); 3804 break; 3805 } 3806 } 3807 3808 /* 3809 * Handle case where PF_KEY says it can't find a keysock for one of my 3810 * ACQUIRE messages. 3811 */ 3812 static void 3813 esp_keysock_no_socket(mblk_t *mp, ipsecesp_stack_t *espstack) 3814 { 3815 sadb_msg_t *samsg; 3816 keysock_out_err_t *kse = (keysock_out_err_t *)mp->b_rptr; 3817 3818 if (mp->b_cont == NULL) { 3819 freemsg(mp); 3820 return; 3821 } 3822 samsg = (sadb_msg_t *)mp->b_cont->b_rptr; 3823 3824 /* 3825 * If keysock can't find any registered, delete the acquire record 3826 * immediately, and handle errors. 3827 */ 3828 if (samsg->sadb_msg_type == SADB_ACQUIRE) { 3829 samsg->sadb_msg_errno = kse->ks_err_errno; 3830 samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg)); 3831 /* 3832 * Use the write-side of the esp_pfkey_q 3833 */ 3834 sadb_in_acquire(samsg, &espstack->esp_sadb, 3835 WR(espstack->esp_pfkey_q), espstack->ipsecesp_netstack); 3836 } 3837 3838 freemsg(mp); 3839 } 3840 3841 /* 3842 * ESP module write put routine. 3843 */ 3844 static void 3845 ipsecesp_wput(queue_t *q, mblk_t *mp) 3846 { 3847 ipsec_info_t *ii; 3848 struct iocblk *iocp; 3849 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; 3850 3851 esp3dbg(espstack, ("In esp_wput().\n")); 3852 3853 /* NOTE: Each case must take care of freeing or passing mp. */ 3854 switch (mp->b_datap->db_type) { 3855 case M_CTL: 3856 if ((mp->b_wptr - mp->b_rptr) < sizeof (ipsec_info_t)) { 3857 /* Not big enough message. */ 3858 freemsg(mp); 3859 break; 3860 } 3861 ii = (ipsec_info_t *)mp->b_rptr; 3862 3863 switch (ii->ipsec_info_type) { 3864 case KEYSOCK_OUT_ERR: 3865 esp1dbg(espstack, ("Got KEYSOCK_OUT_ERR message.\n")); 3866 esp_keysock_no_socket(mp, espstack); 3867 break; 3868 case KEYSOCK_IN: 3869 ESP_BUMP_STAT(espstack, keysock_in); 3870 esp3dbg(espstack, ("Got KEYSOCK_IN message.\n")); 3871 3872 /* Parse the message. */ 3873 esp_parse_pfkey(mp, espstack); 3874 break; 3875 case KEYSOCK_HELLO: 3876 sadb_keysock_hello(&espstack->esp_pfkey_q, q, mp, 3877 esp_ager, (void *)espstack, &espstack->esp_event, 3878 SADB_SATYPE_ESP); 3879 break; 3880 default: 3881 esp2dbg(espstack, ("Got M_CTL from above of 0x%x.\n", 3882 ii->ipsec_info_type)); 3883 freemsg(mp); 3884 break; 3885 } 3886 break; 3887 case M_IOCTL: 3888 iocp = (struct iocblk *)mp->b_rptr; 3889 switch (iocp->ioc_cmd) { 3890 case ND_SET: 3891 case ND_GET: 3892 if (nd_getset(q, espstack->ipsecesp_g_nd, mp)) { 3893 qreply(q, mp); 3894 return; 3895 } else { 3896 iocp->ioc_error = ENOENT; 3897 } 3898 /* FALLTHRU */ 3899 default: 3900 /* We really don't support any other ioctls, do we? */ 3901 3902 /* Return EINVAL */ 3903 if (iocp->ioc_error != ENOENT) 3904 iocp->ioc_error = EINVAL; 3905 iocp->ioc_count = 0; 3906 mp->b_datap->db_type = M_IOCACK; 3907 qreply(q, mp); 3908 return; 3909 } 3910 default: 3911 esp3dbg(espstack, 3912 ("Got default message, type %d, passing to IP.\n", 3913 mp->b_datap->db_type)); 3914 putnext(q, mp); 3915 } 3916 } 3917 3918 /* 3919 * Wrapper to allow IP to trigger an ESP association failure message 3920 * during inbound SA selection. 3921 */ 3922 void 3923 ipsecesp_in_assocfailure(mblk_t *mp, char level, ushort_t sl, char *fmt, 3924 uint32_t spi, void *addr, int af, ip_recv_attr_t *ira) 3925 { 3926 netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack; 3927 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 3928 ipsec_stack_t *ipss = ns->netstack_ipsec; 3929 3930 if (espstack->ipsecesp_log_unknown_spi) { 3931 ipsec_assocfailure(info.mi_idnum, 0, level, sl, fmt, spi, 3932 addr, af, espstack->ipsecesp_netstack); 3933 } 3934 3935 ip_drop_packet(mp, B_TRUE, ira->ira_ill, 3936 DROPPER(ipss, ipds_esp_no_sa), 3937 &espstack->esp_dropper); 3938 } 3939 3940 /* 3941 * Initialize the ESP input and output processing functions. 3942 */ 3943 void 3944 ipsecesp_init_funcs(ipsa_t *sa) 3945 { 3946 if (sa->ipsa_output_func == NULL) 3947 sa->ipsa_output_func = esp_outbound; 3948 if (sa->ipsa_input_func == NULL) 3949 sa->ipsa_input_func = esp_inbound; 3950 } 3951