1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * Copyright (c) 2012 Nexenta Systems, Inc. All rights reserved. 25 * Copyright (c) 2017 Joyent, Inc. 26 */ 27 28 #include <sys/types.h> 29 #include <sys/stream.h> 30 #include <sys/stropts.h> 31 #include <sys/errno.h> 32 #include <sys/strlog.h> 33 #include <sys/tihdr.h> 34 #include <sys/socket.h> 35 #include <sys/ddi.h> 36 #include <sys/sunddi.h> 37 #include <sys/kmem.h> 38 #include <sys/zone.h> 39 #include <sys/sysmacros.h> 40 #include <sys/cmn_err.h> 41 #include <sys/vtrace.h> 42 #include <sys/debug.h> 43 #include <sys/atomic.h> 44 #include <sys/strsun.h> 45 #include <sys/random.h> 46 #include <netinet/in.h> 47 #include <net/if.h> 48 #include <netinet/ip6.h> 49 #include <net/pfkeyv2.h> 50 #include <net/pfpolicy.h> 51 52 #include <inet/common.h> 53 #include <inet/mi.h> 54 #include <inet/nd.h> 55 #include <inet/ip.h> 56 #include <inet/ip_impl.h> 57 #include <inet/ip6.h> 58 #include <inet/ip_if.h> 59 #include <inet/ip_ndp.h> 60 #include <inet/sadb.h> 61 #include <inet/ipsec_info.h> 62 #include <inet/ipsec_impl.h> 63 #include <inet/ipsecesp.h> 64 #include <inet/ipdrop.h> 65 #include <inet/tcp.h> 66 #include <sys/kstat.h> 67 #include <sys/policy.h> 68 #include <sys/strsun.h> 69 #include <sys/strsubr.h> 70 #include <inet/udp_impl.h> 71 #include <sys/taskq.h> 72 #include <sys/note.h> 73 74 #include <sys/tsol/tnet.h> 75 76 /* 77 * Table of ND variables supported by ipsecesp. These are loaded into 78 * ipsecesp_g_nd in ipsecesp_init_nd. 79 * All of these are alterable, within the min/max values given, at run time. 80 */ 81 static ipsecespparam_t lcl_param_arr[] = { 82 /* min max value name */ 83 { 0, 3, 0, "ipsecesp_debug"}, 84 { 125, 32000, SADB_AGE_INTERVAL_DEFAULT, "ipsecesp_age_interval"}, 85 { 1, 10, 1, "ipsecesp_reap_delay"}, 86 { 1, SADB_MAX_REPLAY, 64, "ipsecesp_replay_size"}, 87 { 1, 300, 15, "ipsecesp_acquire_timeout"}, 88 { 1, 1800, 90, "ipsecesp_larval_timeout"}, 89 /* Default lifetime values for ACQUIRE messages. */ 90 { 0, 0xffffffffU, 0, "ipsecesp_default_soft_bytes"}, 91 { 0, 0xffffffffU, 0, "ipsecesp_default_hard_bytes"}, 92 { 0, 0xffffffffU, 24000, "ipsecesp_default_soft_addtime"}, 93 { 0, 0xffffffffU, 28800, "ipsecesp_default_hard_addtime"}, 94 { 0, 0xffffffffU, 0, "ipsecesp_default_soft_usetime"}, 95 { 0, 0xffffffffU, 0, "ipsecesp_default_hard_usetime"}, 96 { 0, 1, 0, "ipsecesp_log_unknown_spi"}, 97 { 0, 2, 1, "ipsecesp_padding_check"}, 98 { 0, 600, 20, "ipsecesp_nat_keepalive_interval"}, 99 }; 100 /* For ipsecesp_nat_keepalive_interval, see ipsecesp.h. */ 101 102 #define esp0dbg(a) printf a 103 /* NOTE: != 0 instead of > 0 so lint doesn't complain. */ 104 #define esp1dbg(espstack, a) if (espstack->ipsecesp_debug != 0) printf a 105 #define esp2dbg(espstack, a) if (espstack->ipsecesp_debug > 1) printf a 106 #define esp3dbg(espstack, a) if (espstack->ipsecesp_debug > 2) printf a 107 108 static int ipsecesp_open(queue_t *, dev_t *, int, int, cred_t *); 109 static int ipsecesp_close(queue_t *, int, cred_t *); 110 static int ipsecesp_rput(queue_t *, mblk_t *); 111 static int ipsecesp_wput(queue_t *, mblk_t *); 112 static void *ipsecesp_stack_init(netstackid_t stackid, netstack_t *ns); 113 static void ipsecesp_stack_fini(netstackid_t stackid, void *arg); 114 115 static void esp_prepare_udp(netstack_t *, mblk_t *, ipha_t *); 116 static void esp_outbound_finish(mblk_t *, ip_xmit_attr_t *); 117 static void esp_inbound_restart(mblk_t *, ip_recv_attr_t *); 118 119 static boolean_t esp_register_out(uint32_t, uint32_t, uint_t, 120 ipsecesp_stack_t *, cred_t *); 121 static boolean_t esp_strip_header(mblk_t *, boolean_t, uint32_t, 122 kstat_named_t **, ipsecesp_stack_t *); 123 static mblk_t *esp_submit_req_inbound(mblk_t *, ip_recv_attr_t *, 124 ipsa_t *, uint_t); 125 static mblk_t *esp_submit_req_outbound(mblk_t *, ip_xmit_attr_t *, 126 ipsa_t *, uchar_t *, uint_t); 127 128 /* Setable in /etc/system */ 129 uint32_t esp_hash_size = IPSEC_DEFAULT_HASH_SIZE; 130 131 static struct module_info info = { 132 5137, "ipsecesp", 0, INFPSZ, 65536, 1024 133 }; 134 135 static struct qinit rinit = { 136 ipsecesp_rput, NULL, ipsecesp_open, ipsecesp_close, NULL, &info, 137 NULL 138 }; 139 140 static struct qinit winit = { 141 ipsecesp_wput, NULL, ipsecesp_open, ipsecesp_close, NULL, &info, 142 NULL 143 }; 144 145 struct streamtab ipsecespinfo = { 146 &rinit, &winit, NULL, NULL 147 }; 148 149 static taskq_t *esp_taskq; 150 151 /* 152 * OTOH, this one is set at open/close, and I'm D_MTQPAIR for now. 153 * 154 * Question: Do I need this, given that all instance's esps->esps_wq point 155 * to IP? 156 * 157 * Answer: Yes, because I need to know which queue is BOUND to 158 * IPPROTO_ESP 159 */ 160 161 static int esp_kstat_update(kstat_t *, int); 162 163 static boolean_t 164 esp_kstat_init(ipsecesp_stack_t *espstack, netstackid_t stackid) 165 { 166 espstack->esp_ksp = kstat_create_netstack("ipsecesp", 0, "esp_stat", 167 "net", KSTAT_TYPE_NAMED, 168 sizeof (esp_kstats_t) / sizeof (kstat_named_t), 0, stackid); 169 170 if (espstack->esp_ksp == NULL || espstack->esp_ksp->ks_data == NULL) 171 return (B_FALSE); 172 173 espstack->esp_kstats = espstack->esp_ksp->ks_data; 174 175 espstack->esp_ksp->ks_update = esp_kstat_update; 176 espstack->esp_ksp->ks_private = (void *)(uintptr_t)stackid; 177 178 #define K64 KSTAT_DATA_UINT64 179 #define KI(x) kstat_named_init(&(espstack->esp_kstats->esp_stat_##x), #x, K64) 180 181 KI(num_aalgs); 182 KI(num_ealgs); 183 KI(good_auth); 184 KI(bad_auth); 185 KI(bad_padding); 186 KI(replay_failures); 187 KI(replay_early_failures); 188 KI(keysock_in); 189 KI(out_requests); 190 KI(acquire_requests); 191 KI(bytes_expired); 192 KI(out_discards); 193 KI(crypto_sync); 194 KI(crypto_async); 195 KI(crypto_failures); 196 KI(bad_decrypt); 197 KI(sa_port_renumbers); 198 199 #undef KI 200 #undef K64 201 202 kstat_install(espstack->esp_ksp); 203 204 return (B_TRUE); 205 } 206 207 static int 208 esp_kstat_update(kstat_t *kp, int rw) 209 { 210 esp_kstats_t *ekp; 211 netstackid_t stackid; 212 netstack_t *ns; 213 ipsec_stack_t *ipss; 214 215 if ((kp == NULL) || (kp->ks_data == NULL)) 216 return (EIO); 217 218 if (rw == KSTAT_WRITE) 219 return (EACCES); 220 221 stackid = (zoneid_t)(uintptr_t)kp->ks_private; 222 ns = netstack_find_by_stackid(stackid); 223 if (ns == NULL) 224 return (-1); 225 ipss = ns->netstack_ipsec; 226 if (ipss == NULL) { 227 netstack_rele(ns); 228 return (-1); 229 } 230 ekp = (esp_kstats_t *)kp->ks_data; 231 232 rw_enter(&ipss->ipsec_alg_lock, RW_READER); 233 ekp->esp_stat_num_aalgs.value.ui64 = 234 ipss->ipsec_nalgs[IPSEC_ALG_AUTH]; 235 ekp->esp_stat_num_ealgs.value.ui64 = 236 ipss->ipsec_nalgs[IPSEC_ALG_ENCR]; 237 rw_exit(&ipss->ipsec_alg_lock); 238 239 netstack_rele(ns); 240 return (0); 241 } 242 243 #ifdef DEBUG 244 /* 245 * Debug routine, useful to see pre-encryption data. 246 */ 247 static char * 248 dump_msg(mblk_t *mp) 249 { 250 char tmp_str[3], tmp_line[256]; 251 252 while (mp != NULL) { 253 unsigned char *ptr; 254 255 printf("mblk address 0x%p, length %ld, db_ref %d " 256 "type %d, base 0x%p, lim 0x%p\n", 257 (void *) mp, (long)(mp->b_wptr - mp->b_rptr), 258 mp->b_datap->db_ref, mp->b_datap->db_type, 259 (void *)mp->b_datap->db_base, (void *)mp->b_datap->db_lim); 260 ptr = mp->b_rptr; 261 262 tmp_line[0] = '\0'; 263 while (ptr < mp->b_wptr) { 264 uint_t diff; 265 266 diff = (ptr - mp->b_rptr); 267 if (!(diff & 0x1f)) { 268 if (strlen(tmp_line) > 0) { 269 printf("bytes: %s\n", tmp_line); 270 tmp_line[0] = '\0'; 271 } 272 } 273 if (!(diff & 0x3)) 274 (void) strcat(tmp_line, " "); 275 (void) sprintf(tmp_str, "%02x", *ptr); 276 (void) strcat(tmp_line, tmp_str); 277 ptr++; 278 } 279 if (strlen(tmp_line) > 0) 280 printf("bytes: %s\n", tmp_line); 281 282 mp = mp->b_cont; 283 } 284 285 return ("\n"); 286 } 287 288 #else /* DEBUG */ 289 static char * 290 dump_msg(mblk_t *mp) 291 { 292 printf("Find value of mp %p.\n", mp); 293 return ("\n"); 294 } 295 #endif /* DEBUG */ 296 297 /* 298 * Don't have to lock age_interval, as only one thread will access it at 299 * a time, because I control the one function that does with timeout(). 300 */ 301 static void 302 esp_ager(void *arg) 303 { 304 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)arg; 305 netstack_t *ns = espstack->ipsecesp_netstack; 306 hrtime_t begin = gethrtime(); 307 308 sadb_ager(&espstack->esp_sadb.s_v4, espstack->esp_pfkey_q, 309 espstack->ipsecesp_reap_delay, ns); 310 sadb_ager(&espstack->esp_sadb.s_v6, espstack->esp_pfkey_q, 311 espstack->ipsecesp_reap_delay, ns); 312 313 espstack->esp_event = sadb_retimeout(begin, espstack->esp_pfkey_q, 314 esp_ager, espstack, 315 &espstack->ipsecesp_age_interval, espstack->ipsecesp_age_int_max, 316 info.mi_idnum); 317 } 318 319 /* 320 * Get an ESP NDD parameter. 321 */ 322 /* ARGSUSED */ 323 static int 324 ipsecesp_param_get( 325 queue_t *q, 326 mblk_t *mp, 327 caddr_t cp, 328 cred_t *cr) 329 { 330 ipsecespparam_t *ipsecesppa = (ipsecespparam_t *)cp; 331 uint_t value; 332 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; 333 334 mutex_enter(&espstack->ipsecesp_param_lock); 335 value = ipsecesppa->ipsecesp_param_value; 336 mutex_exit(&espstack->ipsecesp_param_lock); 337 338 (void) mi_mpprintf(mp, "%u", value); 339 return (0); 340 } 341 342 /* 343 * This routine sets an NDD variable in a ipsecespparam_t structure. 344 */ 345 /* ARGSUSED */ 346 static int 347 ipsecesp_param_set( 348 queue_t *q, 349 mblk_t *mp, 350 char *value, 351 caddr_t cp, 352 cred_t *cr) 353 { 354 ulong_t new_value; 355 ipsecespparam_t *ipsecesppa = (ipsecespparam_t *)cp; 356 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; 357 358 /* 359 * Fail the request if the new value does not lie within the 360 * required bounds. 361 */ 362 if (ddi_strtoul(value, NULL, 10, &new_value) != 0 || 363 new_value < ipsecesppa->ipsecesp_param_min || 364 new_value > ipsecesppa->ipsecesp_param_max) { 365 return (EINVAL); 366 } 367 368 /* Set the new value */ 369 mutex_enter(&espstack->ipsecesp_param_lock); 370 ipsecesppa->ipsecesp_param_value = new_value; 371 mutex_exit(&espstack->ipsecesp_param_lock); 372 return (0); 373 } 374 375 /* 376 * Using lifetime NDD variables, fill in an extended combination's 377 * lifetime information. 378 */ 379 void 380 ipsecesp_fill_defs(sadb_x_ecomb_t *ecomb, netstack_t *ns) 381 { 382 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 383 384 ecomb->sadb_x_ecomb_soft_bytes = espstack->ipsecesp_default_soft_bytes; 385 ecomb->sadb_x_ecomb_hard_bytes = espstack->ipsecesp_default_hard_bytes; 386 ecomb->sadb_x_ecomb_soft_addtime = 387 espstack->ipsecesp_default_soft_addtime; 388 ecomb->sadb_x_ecomb_hard_addtime = 389 espstack->ipsecesp_default_hard_addtime; 390 ecomb->sadb_x_ecomb_soft_usetime = 391 espstack->ipsecesp_default_soft_usetime; 392 ecomb->sadb_x_ecomb_hard_usetime = 393 espstack->ipsecesp_default_hard_usetime; 394 } 395 396 /* 397 * Initialize things for ESP at module load time. 398 */ 399 boolean_t 400 ipsecesp_ddi_init(void) 401 { 402 esp_taskq = taskq_create("esp_taskq", 1, minclsyspri, 403 IPSEC_TASKQ_MIN, IPSEC_TASKQ_MAX, 0); 404 405 /* 406 * We want to be informed each time a stack is created or 407 * destroyed in the kernel, so we can maintain the 408 * set of ipsecesp_stack_t's. 409 */ 410 netstack_register(NS_IPSECESP, ipsecesp_stack_init, NULL, 411 ipsecesp_stack_fini); 412 413 return (B_TRUE); 414 } 415 416 /* 417 * Walk through the param array specified registering each element with the 418 * named dispatch handler. 419 */ 420 static boolean_t 421 ipsecesp_param_register(IDP *ndp, ipsecespparam_t *espp, int cnt) 422 { 423 for (; cnt-- > 0; espp++) { 424 if (espp->ipsecesp_param_name != NULL && 425 espp->ipsecesp_param_name[0]) { 426 if (!nd_load(ndp, 427 espp->ipsecesp_param_name, 428 ipsecesp_param_get, ipsecesp_param_set, 429 (caddr_t)espp)) { 430 nd_free(ndp); 431 return (B_FALSE); 432 } 433 } 434 } 435 return (B_TRUE); 436 } 437 438 /* 439 * Initialize things for ESP for each stack instance 440 */ 441 static void * 442 ipsecesp_stack_init(netstackid_t stackid, netstack_t *ns) 443 { 444 ipsecesp_stack_t *espstack; 445 ipsecespparam_t *espp; 446 447 espstack = (ipsecesp_stack_t *)kmem_zalloc(sizeof (*espstack), 448 KM_SLEEP); 449 espstack->ipsecesp_netstack = ns; 450 451 espp = (ipsecespparam_t *)kmem_alloc(sizeof (lcl_param_arr), KM_SLEEP); 452 espstack->ipsecesp_params = espp; 453 bcopy(lcl_param_arr, espp, sizeof (lcl_param_arr)); 454 455 (void) ipsecesp_param_register(&espstack->ipsecesp_g_nd, espp, 456 A_CNT(lcl_param_arr)); 457 458 (void) esp_kstat_init(espstack, stackid); 459 460 espstack->esp_sadb.s_acquire_timeout = 461 &espstack->ipsecesp_acquire_timeout; 462 sadbp_init("ESP", &espstack->esp_sadb, SADB_SATYPE_ESP, esp_hash_size, 463 espstack->ipsecesp_netstack); 464 465 mutex_init(&espstack->ipsecesp_param_lock, NULL, MUTEX_DEFAULT, 0); 466 467 ip_drop_register(&espstack->esp_dropper, "IPsec ESP"); 468 return (espstack); 469 } 470 471 /* 472 * Destroy things for ESP at module unload time. 473 */ 474 void 475 ipsecesp_ddi_destroy(void) 476 { 477 netstack_unregister(NS_IPSECESP); 478 taskq_destroy(esp_taskq); 479 } 480 481 /* 482 * Destroy things for ESP for one stack instance 483 */ 484 static void 485 ipsecesp_stack_fini(netstackid_t stackid, void *arg) 486 { 487 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)arg; 488 489 if (espstack->esp_pfkey_q != NULL) { 490 (void) quntimeout(espstack->esp_pfkey_q, espstack->esp_event); 491 } 492 espstack->esp_sadb.s_acquire_timeout = NULL; 493 sadbp_destroy(&espstack->esp_sadb, espstack->ipsecesp_netstack); 494 ip_drop_unregister(&espstack->esp_dropper); 495 mutex_destroy(&espstack->ipsecesp_param_lock); 496 nd_free(&espstack->ipsecesp_g_nd); 497 498 kmem_free(espstack->ipsecesp_params, sizeof (lcl_param_arr)); 499 espstack->ipsecesp_params = NULL; 500 kstat_delete_netstack(espstack->esp_ksp, stackid); 501 espstack->esp_ksp = NULL; 502 espstack->esp_kstats = NULL; 503 kmem_free(espstack, sizeof (*espstack)); 504 } 505 506 /* 507 * ESP module open routine, which is here for keysock plumbing. 508 * Keysock is pushed over {AH,ESP} which is an artifact from the Bad Old 509 * Days of export control, and fears that ESP would not be allowed 510 * to be shipped at all by default. Eventually, keysock should 511 * either access AH and ESP via modstubs or krtld dependencies, or 512 * perhaps be folded in with AH and ESP into a single IPsec/netsec 513 * module ("netsec" if PF_KEY provides more than AH/ESP keying tables). 514 */ 515 /* ARGSUSED */ 516 static int 517 ipsecesp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 518 { 519 netstack_t *ns; 520 ipsecesp_stack_t *espstack; 521 522 if (secpolicy_ip_config(credp, B_FALSE) != 0) 523 return (EPERM); 524 525 if (q->q_ptr != NULL) 526 return (0); /* Re-open of an already open instance. */ 527 528 if (sflag != MODOPEN) 529 return (EINVAL); 530 531 ns = netstack_find_by_cred(credp); 532 ASSERT(ns != NULL); 533 espstack = ns->netstack_ipsecesp; 534 ASSERT(espstack != NULL); 535 536 q->q_ptr = espstack; 537 WR(q)->q_ptr = q->q_ptr; 538 539 qprocson(q); 540 return (0); 541 } 542 543 /* 544 * ESP module close routine. 545 */ 546 /* ARGSUSED */ 547 static int 548 ipsecesp_close(queue_t *q, int flags __unused, cred_t *credp __unused) 549 { 550 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; 551 552 /* 553 * Clean up q_ptr, if needed. 554 */ 555 qprocsoff(q); 556 557 /* Keysock queue check is safe, because of OCEXCL perimeter. */ 558 559 if (q == espstack->esp_pfkey_q) { 560 esp1dbg(espstack, 561 ("ipsecesp_close: Ummm... keysock is closing ESP.\n")); 562 espstack->esp_pfkey_q = NULL; 563 /* Detach qtimeouts. */ 564 (void) quntimeout(q, espstack->esp_event); 565 } 566 567 netstack_rele(espstack->ipsecesp_netstack); 568 return (0); 569 } 570 571 /* 572 * Add a number of bytes to what the SA has protected so far. Return 573 * B_TRUE if the SA can still protect that many bytes. 574 * 575 * Caller must REFRELE the passed-in assoc. This function must REFRELE 576 * any obtained peer SA. 577 */ 578 static boolean_t 579 esp_age_bytes(ipsa_t *assoc, uint64_t bytes, boolean_t inbound) 580 { 581 ipsa_t *inassoc, *outassoc; 582 isaf_t *bucket; 583 boolean_t inrc, outrc, isv6; 584 sadb_t *sp; 585 int outhash; 586 netstack_t *ns = assoc->ipsa_netstack; 587 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 588 589 /* No peer? No problem! */ 590 if (!assoc->ipsa_haspeer) { 591 return (sadb_age_bytes(espstack->esp_pfkey_q, assoc, bytes, 592 B_TRUE)); 593 } 594 595 /* 596 * Otherwise, we want to grab both the original assoc and its peer. 597 * There might be a race for this, but if it's a real race, two 598 * expire messages may occur. We limit this by only sending the 599 * expire message on one of the peers, we'll pick the inbound 600 * arbitrarily. 601 * 602 * If we need tight synchronization on the peer SA, then we need to 603 * reconsider. 604 */ 605 606 /* Use address length to select IPv6/IPv4 */ 607 isv6 = (assoc->ipsa_addrfam == AF_INET6); 608 sp = isv6 ? &espstack->esp_sadb.s_v6 : &espstack->esp_sadb.s_v4; 609 610 if (inbound) { 611 inassoc = assoc; 612 if (isv6) { 613 outhash = OUTBOUND_HASH_V6(sp, *((in6_addr_t *) 614 &inassoc->ipsa_dstaddr)); 615 } else { 616 outhash = OUTBOUND_HASH_V4(sp, *((ipaddr_t *) 617 &inassoc->ipsa_dstaddr)); 618 } 619 bucket = &sp->sdb_of[outhash]; 620 mutex_enter(&bucket->isaf_lock); 621 outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi, 622 inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr, 623 inassoc->ipsa_addrfam); 624 mutex_exit(&bucket->isaf_lock); 625 if (outassoc == NULL) { 626 /* Q: Do we wish to set haspeer == B_FALSE? */ 627 esp0dbg(("esp_age_bytes: " 628 "can't find peer for inbound.\n")); 629 return (sadb_age_bytes(espstack->esp_pfkey_q, inassoc, 630 bytes, B_TRUE)); 631 } 632 } else { 633 outassoc = assoc; 634 bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi); 635 mutex_enter(&bucket->isaf_lock); 636 inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi, 637 outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr, 638 outassoc->ipsa_addrfam); 639 mutex_exit(&bucket->isaf_lock); 640 if (inassoc == NULL) { 641 /* Q: Do we wish to set haspeer == B_FALSE? */ 642 esp0dbg(("esp_age_bytes: " 643 "can't find peer for outbound.\n")); 644 return (sadb_age_bytes(espstack->esp_pfkey_q, outassoc, 645 bytes, B_TRUE)); 646 } 647 } 648 649 inrc = sadb_age_bytes(espstack->esp_pfkey_q, inassoc, bytes, B_TRUE); 650 outrc = sadb_age_bytes(espstack->esp_pfkey_q, outassoc, bytes, B_FALSE); 651 652 /* 653 * REFRELE any peer SA. 654 * 655 * Because of the multi-line macro nature of IPSA_REFRELE, keep 656 * them in { }. 657 */ 658 if (inbound) { 659 IPSA_REFRELE(outassoc); 660 } else { 661 IPSA_REFRELE(inassoc); 662 } 663 664 return (inrc && outrc); 665 } 666 667 /* 668 * Do incoming NAT-T manipulations for packet. 669 * Returns NULL if the mblk chain is consumed. 670 */ 671 static mblk_t * 672 esp_fix_natt_checksums(mblk_t *data_mp, ipsa_t *assoc) 673 { 674 ipha_t *ipha = (ipha_t *)data_mp->b_rptr; 675 tcpha_t *tcpha; 676 udpha_t *udpha; 677 /* Initialize to our inbound cksum adjustment... */ 678 uint32_t sum = assoc->ipsa_inbound_cksum; 679 680 switch (ipha->ipha_protocol) { 681 case IPPROTO_TCP: 682 tcpha = (tcpha_t *)(data_mp->b_rptr + 683 IPH_HDR_LENGTH(ipha)); 684 685 #define DOWN_SUM(x) (x) = ((x) & 0xFFFF) + ((x) >> 16) 686 sum += ~ntohs(tcpha->tha_sum) & 0xFFFF; 687 DOWN_SUM(sum); 688 DOWN_SUM(sum); 689 tcpha->tha_sum = ~htons(sum); 690 break; 691 case IPPROTO_UDP: 692 udpha = (udpha_t *)(data_mp->b_rptr + IPH_HDR_LENGTH(ipha)); 693 694 if (udpha->uha_checksum != 0) { 695 /* Adujst if the inbound one was not zero. */ 696 sum += ~ntohs(udpha->uha_checksum) & 0xFFFF; 697 DOWN_SUM(sum); 698 DOWN_SUM(sum); 699 udpha->uha_checksum = ~htons(sum); 700 if (udpha->uha_checksum == 0) 701 udpha->uha_checksum = 0xFFFF; 702 } 703 #undef DOWN_SUM 704 break; 705 case IPPROTO_IP: 706 /* 707 * This case is only an issue for self-encapsulated 708 * packets. So for now, fall through. 709 */ 710 break; 711 } 712 return (data_mp); 713 } 714 715 716 /* 717 * Strip ESP header, check padding, and fix IP header. 718 * Returns B_TRUE on success, B_FALSE if an error occured. 719 */ 720 static boolean_t 721 esp_strip_header(mblk_t *data_mp, boolean_t isv4, uint32_t ivlen, 722 kstat_named_t **counter, ipsecesp_stack_t *espstack) 723 { 724 ipha_t *ipha; 725 ip6_t *ip6h; 726 uint_t divpoint; 727 mblk_t *scratch; 728 uint8_t nexthdr, padlen; 729 uint8_t lastpad; 730 ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec; 731 uint8_t *lastbyte; 732 733 /* 734 * Strip ESP data and fix IP header. 735 * 736 * XXX In case the beginning of esp_inbound() changes to not do a 737 * pullup, this part of the code can remain unchanged. 738 */ 739 if (isv4) { 740 ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ipha_t)); 741 ipha = (ipha_t *)data_mp->b_rptr; 742 ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (esph_t) + 743 IPH_HDR_LENGTH(ipha)); 744 divpoint = IPH_HDR_LENGTH(ipha); 745 } else { 746 ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ip6_t)); 747 ip6h = (ip6_t *)data_mp->b_rptr; 748 divpoint = ip_hdr_length_v6(data_mp, ip6h); 749 } 750 751 scratch = data_mp; 752 while (scratch->b_cont != NULL) 753 scratch = scratch->b_cont; 754 755 ASSERT((scratch->b_wptr - scratch->b_rptr) >= 3); 756 757 /* 758 * "Next header" and padding length are the last two bytes in the 759 * ESP-protected datagram, thus the explicit - 1 and - 2. 760 * lastpad is the last byte of the padding, which can be used for 761 * a quick check to see if the padding is correct. 762 */ 763 lastbyte = scratch->b_wptr - 1; 764 nexthdr = *lastbyte--; 765 padlen = *lastbyte--; 766 767 if (isv4) { 768 /* Fix part of the IP header. */ 769 ipha->ipha_protocol = nexthdr; 770 /* 771 * Reality check the padlen. The explicit - 2 is for the 772 * padding length and the next-header bytes. 773 */ 774 if (padlen >= ntohs(ipha->ipha_length) - sizeof (ipha_t) - 2 - 775 sizeof (esph_t) - ivlen) { 776 ESP_BUMP_STAT(espstack, bad_decrypt); 777 ipsec_rl_strlog(espstack->ipsecesp_netstack, 778 info.mi_idnum, 0, 0, 779 SL_ERROR | SL_WARN, 780 "Corrupt ESP packet (padlen too big).\n"); 781 esp1dbg(espstack, ("padlen (%d) is greater than:\n", 782 padlen)); 783 esp1dbg(espstack, ("pkt len(%d) - ip hdr - esp " 784 "hdr - ivlen(%d) = %d.\n", 785 ntohs(ipha->ipha_length), ivlen, 786 (int)(ntohs(ipha->ipha_length) - sizeof (ipha_t) - 787 2 - sizeof (esph_t) - ivlen))); 788 *counter = DROPPER(ipss, ipds_esp_bad_padlen); 789 return (B_FALSE); 790 } 791 792 /* 793 * Fix the rest of the header. The explicit - 2 is for the 794 * padding length and the next-header bytes. 795 */ 796 ipha->ipha_length = htons(ntohs(ipha->ipha_length) - padlen - 797 2 - sizeof (esph_t) - ivlen); 798 ipha->ipha_hdr_checksum = 0; 799 ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha); 800 } else { 801 if (ip6h->ip6_nxt == IPPROTO_ESP) { 802 ip6h->ip6_nxt = nexthdr; 803 } else { 804 ip_pkt_t ipp; 805 806 bzero(&ipp, sizeof (ipp)); 807 (void) ip_find_hdr_v6(data_mp, ip6h, B_FALSE, &ipp, 808 NULL); 809 if (ipp.ipp_dstopts != NULL) { 810 ipp.ipp_dstopts->ip6d_nxt = nexthdr; 811 } else if (ipp.ipp_rthdr != NULL) { 812 ipp.ipp_rthdr->ip6r_nxt = nexthdr; 813 } else if (ipp.ipp_hopopts != NULL) { 814 ipp.ipp_hopopts->ip6h_nxt = nexthdr; 815 } else { 816 /* Panic a DEBUG kernel. */ 817 ASSERT(ipp.ipp_hopopts != NULL); 818 /* Otherwise, pretend it's IP + ESP. */ 819 cmn_err(CE_WARN, "ESP IPv6 headers wrong.\n"); 820 ip6h->ip6_nxt = nexthdr; 821 } 822 } 823 824 if (padlen >= ntohs(ip6h->ip6_plen) - 2 - sizeof (esph_t) - 825 ivlen) { 826 ESP_BUMP_STAT(espstack, bad_decrypt); 827 ipsec_rl_strlog(espstack->ipsecesp_netstack, 828 info.mi_idnum, 0, 0, 829 SL_ERROR | SL_WARN, 830 "Corrupt ESP packet (v6 padlen too big).\n"); 831 esp1dbg(espstack, ("padlen (%d) is greater than:\n", 832 padlen)); 833 esp1dbg(espstack, 834 ("pkt len(%u) - ip hdr - esp hdr - ivlen(%d) = " 835 "%u.\n", (unsigned)(ntohs(ip6h->ip6_plen) 836 + sizeof (ip6_t)), ivlen, 837 (unsigned)(ntohs(ip6h->ip6_plen) - 2 - 838 sizeof (esph_t) - ivlen))); 839 *counter = DROPPER(ipss, ipds_esp_bad_padlen); 840 return (B_FALSE); 841 } 842 843 844 /* 845 * Fix the rest of the header. The explicit - 2 is for the 846 * padding length and the next-header bytes. IPv6 is nice, 847 * because there's no hdr checksum! 848 */ 849 ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - padlen - 850 2 - sizeof (esph_t) - ivlen); 851 } 852 853 if (espstack->ipsecesp_padding_check > 0 && padlen > 0) { 854 /* 855 * Weak padding check: compare last-byte to length, they 856 * should be equal. 857 */ 858 lastpad = *lastbyte--; 859 860 if (padlen != lastpad) { 861 ipsec_rl_strlog(espstack->ipsecesp_netstack, 862 info.mi_idnum, 0, 0, SL_ERROR | SL_WARN, 863 "Corrupt ESP packet (lastpad != padlen).\n"); 864 esp1dbg(espstack, 865 ("lastpad (%d) not equal to padlen (%d):\n", 866 lastpad, padlen)); 867 ESP_BUMP_STAT(espstack, bad_padding); 868 *counter = DROPPER(ipss, ipds_esp_bad_padding); 869 return (B_FALSE); 870 } 871 872 /* 873 * Strong padding check: Check all pad bytes to see that 874 * they're ascending. Go backwards using a descending counter 875 * to verify. padlen == 1 is checked by previous block, so 876 * only bother if we've more than 1 byte of padding. 877 * Consequently, start the check one byte before the location 878 * of "lastpad". 879 */ 880 if (espstack->ipsecesp_padding_check > 1) { 881 /* 882 * This assert may have to become an if and a pullup 883 * if we start accepting multi-dblk mblks. For now, 884 * though, any packet here will have been pulled up in 885 * esp_inbound. 886 */ 887 ASSERT(MBLKL(scratch) >= lastpad + 3); 888 889 /* 890 * Use "--lastpad" because we already checked the very 891 * last pad byte previously. 892 */ 893 while (--lastpad != 0) { 894 if (lastpad != *lastbyte) { 895 ipsec_rl_strlog( 896 espstack->ipsecesp_netstack, 897 info.mi_idnum, 0, 0, 898 SL_ERROR | SL_WARN, "Corrupt ESP " 899 "packet (bad padding).\n"); 900 esp1dbg(espstack, 901 ("padding not in correct" 902 " format:\n")); 903 ESP_BUMP_STAT(espstack, bad_padding); 904 *counter = DROPPER(ipss, 905 ipds_esp_bad_padding); 906 return (B_FALSE); 907 } 908 lastbyte--; 909 } 910 } 911 } 912 913 /* Trim off the padding. */ 914 ASSERT(data_mp->b_cont == NULL); 915 data_mp->b_wptr -= (padlen + 2); 916 917 /* 918 * Remove the ESP header. 919 * 920 * The above assertions about data_mp's size will make this work. 921 * 922 * XXX Question: If I send up and get back a contiguous mblk, 923 * would it be quicker to bcopy over, or keep doing the dupb stuff? 924 * I go with copying for now. 925 */ 926 927 if (IS_P2ALIGNED(data_mp->b_rptr, sizeof (uint32_t)) && 928 IS_P2ALIGNED(ivlen, sizeof (uint32_t))) { 929 uint8_t *start = data_mp->b_rptr; 930 uint32_t *src, *dst; 931 932 src = (uint32_t *)(start + divpoint); 933 dst = (uint32_t *)(start + divpoint + sizeof (esph_t) + ivlen); 934 935 ASSERT(IS_P2ALIGNED(dst, sizeof (uint32_t)) && 936 IS_P2ALIGNED(src, sizeof (uint32_t))); 937 938 do { 939 src--; 940 dst--; 941 *dst = *src; 942 } while (src != (uint32_t *)start); 943 944 data_mp->b_rptr = (uchar_t *)dst; 945 } else { 946 uint8_t *start = data_mp->b_rptr; 947 uint8_t *src, *dst; 948 949 src = start + divpoint; 950 dst = src + sizeof (esph_t) + ivlen; 951 952 do { 953 src--; 954 dst--; 955 *dst = *src; 956 } while (src != start); 957 958 data_mp->b_rptr = dst; 959 } 960 961 esp2dbg(espstack, ("data_mp after inbound ESP adjustment:\n")); 962 esp2dbg(espstack, (dump_msg(data_mp))); 963 964 return (B_TRUE); 965 } 966 967 /* 968 * Updating use times can be tricky business if the ipsa_haspeer flag is 969 * set. This function is called once in an SA's lifetime. 970 * 971 * Caller has to REFRELE "assoc" which is passed in. This function has 972 * to REFRELE any peer SA that is obtained. 973 */ 974 static void 975 esp_set_usetime(ipsa_t *assoc, boolean_t inbound) 976 { 977 ipsa_t *inassoc, *outassoc; 978 isaf_t *bucket; 979 sadb_t *sp; 980 int outhash; 981 boolean_t isv6; 982 netstack_t *ns = assoc->ipsa_netstack; 983 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 984 985 /* No peer? No problem! */ 986 if (!assoc->ipsa_haspeer) { 987 sadb_set_usetime(assoc); 988 return; 989 } 990 991 /* 992 * Otherwise, we want to grab both the original assoc and its peer. 993 * There might be a race for this, but if it's a real race, the times 994 * will be out-of-synch by at most a second, and since our time 995 * granularity is a second, this won't be a problem. 996 * 997 * If we need tight synchronization on the peer SA, then we need to 998 * reconsider. 999 */ 1000 1001 /* Use address length to select IPv6/IPv4 */ 1002 isv6 = (assoc->ipsa_addrfam == AF_INET6); 1003 sp = isv6 ? &espstack->esp_sadb.s_v6 : &espstack->esp_sadb.s_v4; 1004 1005 if (inbound) { 1006 inassoc = assoc; 1007 if (isv6) { 1008 outhash = OUTBOUND_HASH_V6(sp, *((in6_addr_t *) 1009 &inassoc->ipsa_dstaddr)); 1010 } else { 1011 outhash = OUTBOUND_HASH_V4(sp, *((ipaddr_t *) 1012 &inassoc->ipsa_dstaddr)); 1013 } 1014 bucket = &sp->sdb_of[outhash]; 1015 mutex_enter(&bucket->isaf_lock); 1016 outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi, 1017 inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr, 1018 inassoc->ipsa_addrfam); 1019 mutex_exit(&bucket->isaf_lock); 1020 if (outassoc == NULL) { 1021 /* Q: Do we wish to set haspeer == B_FALSE? */ 1022 esp0dbg(("esp_set_usetime: " 1023 "can't find peer for inbound.\n")); 1024 sadb_set_usetime(inassoc); 1025 return; 1026 } 1027 } else { 1028 outassoc = assoc; 1029 bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi); 1030 mutex_enter(&bucket->isaf_lock); 1031 inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi, 1032 outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr, 1033 outassoc->ipsa_addrfam); 1034 mutex_exit(&bucket->isaf_lock); 1035 if (inassoc == NULL) { 1036 /* Q: Do we wish to set haspeer == B_FALSE? */ 1037 esp0dbg(("esp_set_usetime: " 1038 "can't find peer for outbound.\n")); 1039 sadb_set_usetime(outassoc); 1040 return; 1041 } 1042 } 1043 1044 /* Update usetime on both. */ 1045 sadb_set_usetime(inassoc); 1046 sadb_set_usetime(outassoc); 1047 1048 /* 1049 * REFRELE any peer SA. 1050 * 1051 * Because of the multi-line macro nature of IPSA_REFRELE, keep 1052 * them in { }. 1053 */ 1054 if (inbound) { 1055 IPSA_REFRELE(outassoc); 1056 } else { 1057 IPSA_REFRELE(inassoc); 1058 } 1059 } 1060 1061 /* 1062 * Handle ESP inbound data for IPv4 and IPv6. 1063 * On success returns B_TRUE, on failure returns B_FALSE and frees the 1064 * mblk chain data_mp. 1065 */ 1066 mblk_t * 1067 esp_inbound(mblk_t *data_mp, void *arg, ip_recv_attr_t *ira) 1068 { 1069 esph_t *esph = (esph_t *)arg; 1070 ipsa_t *ipsa = ira->ira_ipsec_esp_sa; 1071 netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack; 1072 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 1073 ipsec_stack_t *ipss = ns->netstack_ipsec; 1074 1075 /* 1076 * We may wish to check replay in-range-only here as an optimization. 1077 * Include the reality check of ipsa->ipsa_replay > 1078 * ipsa->ipsa_replay_wsize for times when it's the first N packets, 1079 * where N == ipsa->ipsa_replay_wsize. 1080 * 1081 * Another check that may come here later is the "collision" check. 1082 * If legitimate packets flow quickly enough, this won't be a problem, 1083 * but collisions may cause authentication algorithm crunching to 1084 * take place when it doesn't need to. 1085 */ 1086 if (!sadb_replay_peek(ipsa, esph->esph_replay)) { 1087 ESP_BUMP_STAT(espstack, replay_early_failures); 1088 IP_ESP_BUMP_STAT(ipss, in_discards); 1089 ip_drop_packet(data_mp, B_TRUE, ira->ira_ill, 1090 DROPPER(ipss, ipds_esp_early_replay), 1091 &espstack->esp_dropper); 1092 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards); 1093 return (NULL); 1094 } 1095 1096 /* 1097 * Adjust the IP header's payload length to reflect the removal 1098 * of the ICV. 1099 */ 1100 if (!(ira->ira_flags & IRAF_IS_IPV4)) { 1101 ip6_t *ip6h = (ip6_t *)data_mp->b_rptr; 1102 ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - 1103 ipsa->ipsa_mac_len); 1104 } else { 1105 ipha_t *ipha = (ipha_t *)data_mp->b_rptr; 1106 ipha->ipha_length = htons(ntohs(ipha->ipha_length) - 1107 ipsa->ipsa_mac_len); 1108 } 1109 1110 /* submit the request to the crypto framework */ 1111 return (esp_submit_req_inbound(data_mp, ira, ipsa, 1112 (uint8_t *)esph - data_mp->b_rptr)); 1113 } 1114 1115 /* XXX refactor me */ 1116 /* 1117 * Handle the SADB_GETSPI message. Create a larval SA. 1118 */ 1119 static void 1120 esp_getspi(mblk_t *mp, keysock_in_t *ksi, ipsecesp_stack_t *espstack) 1121 { 1122 ipsa_t *newbie, *target; 1123 isaf_t *outbound, *inbound; 1124 int rc, diagnostic; 1125 sadb_sa_t *assoc; 1126 keysock_out_t *kso; 1127 uint32_t newspi; 1128 1129 /* 1130 * Randomly generate a proposed SPI value 1131 */ 1132 if (cl_inet_getspi != NULL) { 1133 cl_inet_getspi(espstack->ipsecesp_netstack->netstack_stackid, 1134 IPPROTO_ESP, (uint8_t *)&newspi, sizeof (uint32_t), NULL); 1135 } else { 1136 (void) random_get_pseudo_bytes((uint8_t *)&newspi, 1137 sizeof (uint32_t)); 1138 } 1139 newbie = sadb_getspi(ksi, newspi, &diagnostic, 1140 espstack->ipsecesp_netstack, IPPROTO_ESP); 1141 1142 if (newbie == NULL) { 1143 sadb_pfkey_error(espstack->esp_pfkey_q, mp, ENOMEM, diagnostic, 1144 ksi->ks_in_serial); 1145 return; 1146 } else if (newbie == (ipsa_t *)-1) { 1147 sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL, diagnostic, 1148 ksi->ks_in_serial); 1149 return; 1150 } 1151 1152 /* 1153 * XXX - We may randomly collide. We really should recover from this. 1154 * Unfortunately, that could require spending way-too-much-time 1155 * in here. For now, let the user retry. 1156 */ 1157 1158 if (newbie->ipsa_addrfam == AF_INET6) { 1159 outbound = OUTBOUND_BUCKET_V6(&espstack->esp_sadb.s_v6, 1160 *(uint32_t *)(newbie->ipsa_dstaddr)); 1161 inbound = INBOUND_BUCKET(&espstack->esp_sadb.s_v6, 1162 newbie->ipsa_spi); 1163 } else { 1164 ASSERT(newbie->ipsa_addrfam == AF_INET); 1165 outbound = OUTBOUND_BUCKET_V4(&espstack->esp_sadb.s_v4, 1166 *(uint32_t *)(newbie->ipsa_dstaddr)); 1167 inbound = INBOUND_BUCKET(&espstack->esp_sadb.s_v4, 1168 newbie->ipsa_spi); 1169 } 1170 1171 mutex_enter(&outbound->isaf_lock); 1172 mutex_enter(&inbound->isaf_lock); 1173 1174 /* 1175 * Check for collisions (i.e. did sadb_getspi() return with something 1176 * that already exists?). 1177 * 1178 * Try outbound first. Even though SADB_GETSPI is traditionally 1179 * for inbound SAs, you never know what a user might do. 1180 */ 1181 target = ipsec_getassocbyspi(outbound, newbie->ipsa_spi, 1182 newbie->ipsa_srcaddr, newbie->ipsa_dstaddr, newbie->ipsa_addrfam); 1183 if (target == NULL) { 1184 target = ipsec_getassocbyspi(inbound, newbie->ipsa_spi, 1185 newbie->ipsa_srcaddr, newbie->ipsa_dstaddr, 1186 newbie->ipsa_addrfam); 1187 } 1188 1189 /* 1190 * I don't have collisions elsewhere! 1191 * (Nor will I because I'm still holding inbound/outbound locks.) 1192 */ 1193 1194 if (target != NULL) { 1195 rc = EEXIST; 1196 IPSA_REFRELE(target); 1197 } else { 1198 /* 1199 * sadb_insertassoc() also checks for collisions, so 1200 * if there's a colliding entry, rc will be set 1201 * to EEXIST. 1202 */ 1203 rc = sadb_insertassoc(newbie, inbound); 1204 newbie->ipsa_hardexpiretime = gethrestime_sec(); 1205 newbie->ipsa_hardexpiretime += 1206 espstack->ipsecesp_larval_timeout; 1207 } 1208 1209 /* 1210 * Can exit outbound mutex. Hold inbound until we're done 1211 * with newbie. 1212 */ 1213 mutex_exit(&outbound->isaf_lock); 1214 1215 if (rc != 0) { 1216 mutex_exit(&inbound->isaf_lock); 1217 IPSA_REFRELE(newbie); 1218 sadb_pfkey_error(espstack->esp_pfkey_q, mp, rc, 1219 SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial); 1220 return; 1221 } 1222 1223 1224 /* Can write here because I'm still holding the bucket lock. */ 1225 newbie->ipsa_type = SADB_SATYPE_ESP; 1226 1227 /* 1228 * Construct successful return message. We have one thing going 1229 * for us in PF_KEY v2. That's the fact that 1230 * sizeof (sadb_spirange_t) == sizeof (sadb_sa_t) 1231 */ 1232 assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SPIRANGE]; 1233 assoc->sadb_sa_exttype = SADB_EXT_SA; 1234 assoc->sadb_sa_spi = newbie->ipsa_spi; 1235 *((uint64_t *)(&assoc->sadb_sa_replay)) = 0; 1236 mutex_exit(&inbound->isaf_lock); 1237 1238 /* Convert KEYSOCK_IN to KEYSOCK_OUT. */ 1239 kso = (keysock_out_t *)ksi; 1240 kso->ks_out_len = sizeof (*kso); 1241 kso->ks_out_serial = ksi->ks_in_serial; 1242 kso->ks_out_type = KEYSOCK_OUT; 1243 1244 /* 1245 * Can safely putnext() to esp_pfkey_q, because this is a turnaround 1246 * from the esp_pfkey_q. 1247 */ 1248 putnext(espstack->esp_pfkey_q, mp); 1249 } 1250 1251 /* 1252 * Insert the ESP header into a packet. Duplicate an mblk, and insert a newly 1253 * allocated mblk with the ESP header in between the two. 1254 */ 1255 static boolean_t 1256 esp_insert_esp(mblk_t *mp, mblk_t *esp_mp, uint_t divpoint, 1257 ipsecesp_stack_t *espstack) 1258 { 1259 mblk_t *split_mp = mp; 1260 uint_t wheretodiv = divpoint; 1261 1262 while ((split_mp->b_wptr - split_mp->b_rptr) < wheretodiv) { 1263 wheretodiv -= (split_mp->b_wptr - split_mp->b_rptr); 1264 split_mp = split_mp->b_cont; 1265 ASSERT(split_mp != NULL); 1266 } 1267 1268 if (split_mp->b_wptr - split_mp->b_rptr != wheretodiv) { 1269 mblk_t *scratch; 1270 1271 /* "scratch" is the 2nd half, split_mp is the first. */ 1272 scratch = dupb(split_mp); 1273 if (scratch == NULL) { 1274 esp1dbg(espstack, 1275 ("esp_insert_esp: can't allocate scratch.\n")); 1276 return (B_FALSE); 1277 } 1278 /* NOTE: dupb() doesn't set b_cont appropriately. */ 1279 scratch->b_cont = split_mp->b_cont; 1280 scratch->b_rptr += wheretodiv; 1281 split_mp->b_wptr = split_mp->b_rptr + wheretodiv; 1282 split_mp->b_cont = scratch; 1283 } 1284 /* 1285 * At this point, split_mp is exactly "wheretodiv" bytes long, and 1286 * holds the end of the pre-ESP part of the datagram. 1287 */ 1288 esp_mp->b_cont = split_mp->b_cont; 1289 split_mp->b_cont = esp_mp; 1290 1291 return (B_TRUE); 1292 } 1293 1294 /* 1295 * Section 7 of RFC 3947 says: 1296 * 1297 * 7. Recovering from the Expiring NAT Mappings 1298 * 1299 * There are cases where NAT box decides to remove mappings that are still 1300 * alive (for example, when the keepalive interval is too long, or when the 1301 * NAT box is rebooted). To recover from this, ends that are NOT behind 1302 * NAT SHOULD use the last valid UDP encapsulated IKE or IPsec packet from 1303 * the other end to determine which IP and port addresses should be used. 1304 * The host behind dynamic NAT MUST NOT do this, as otherwise it opens a 1305 * DoS attack possibility because the IP address or port of the other host 1306 * will not change (it is not behind NAT). 1307 * 1308 * Keepalives cannot be used for these purposes, as they are not 1309 * authenticated, but any IKE authenticated IKE packet or ESP packet can be 1310 * used to detect whether the IP address or the port has changed. 1311 * 1312 * The following function will check an SA and its explicitly-set pair to see 1313 * if the NAT-T remote port matches the received packet (which must have 1314 * passed ESP authentication, see esp_in_done() for the caller context). If 1315 * there is a mismatch, the SAs are updated. It is not important if we race 1316 * with a transmitting thread, as if there is a transmitting thread, it will 1317 * merely emit a packet that will most-likely be dropped. 1318 * 1319 * "ports" are ordered src,dst, and assoc is an inbound SA, where src should 1320 * match ipsa_remote_nat_port and dst should match ipsa_local_nat_port. 1321 */ 1322 #ifdef _LITTLE_ENDIAN 1323 #define FIRST_16(x) ((x) & 0xFFFF) 1324 #define NEXT_16(x) (((x) >> 16) & 0xFFFF) 1325 #else 1326 #define FIRST_16(x) (((x) >> 16) & 0xFFFF) 1327 #define NEXT_16(x) ((x) & 0xFFFF) 1328 #endif 1329 static void 1330 esp_port_freshness(uint32_t ports, ipsa_t *assoc) 1331 { 1332 uint16_t remote = FIRST_16(ports); 1333 uint16_t local = NEXT_16(ports); 1334 ipsa_t *outbound_peer; 1335 isaf_t *bucket; 1336 ipsecesp_stack_t *espstack = assoc->ipsa_netstack->netstack_ipsecesp; 1337 1338 /* We found a conn_t, therefore local != 0. */ 1339 ASSERT(local != 0); 1340 /* Assume an IPv4 SA. */ 1341 ASSERT(assoc->ipsa_addrfam == AF_INET); 1342 1343 /* 1344 * On-the-wire rport == 0 means something's very wrong. 1345 * An unpaired SA is also useless to us. 1346 * If we are behind the NAT, don't bother. 1347 * A zero local NAT port defaults to 4500, so check that too. 1348 * And, of course, if the ports already match, we don't need to 1349 * bother. 1350 */ 1351 if (remote == 0 || assoc->ipsa_otherspi == 0 || 1352 (assoc->ipsa_flags & IPSA_F_BEHIND_NAT) || 1353 (assoc->ipsa_remote_nat_port == 0 && 1354 remote == htons(IPPORT_IKE_NATT)) || 1355 remote == assoc->ipsa_remote_nat_port) 1356 return; 1357 1358 /* Try and snag the peer. NOTE: Assume IPv4 for now. */ 1359 bucket = OUTBOUND_BUCKET_V4(&(espstack->esp_sadb.s_v4), 1360 assoc->ipsa_srcaddr[0]); 1361 mutex_enter(&bucket->isaf_lock); 1362 outbound_peer = ipsec_getassocbyspi(bucket, assoc->ipsa_otherspi, 1363 assoc->ipsa_dstaddr, assoc->ipsa_srcaddr, AF_INET); 1364 mutex_exit(&bucket->isaf_lock); 1365 1366 /* We probably lost a race to a deleting or expiring thread. */ 1367 if (outbound_peer == NULL) 1368 return; 1369 1370 /* 1371 * Hold the mutexes for both SAs so we don't race another inbound 1372 * thread. A lock-entry order shouldn't matter, since all other 1373 * per-ipsa locks are individually held-then-released. 1374 * 1375 * Luckily, this has nothing to do with the remote-NAT address, 1376 * so we don't have to re-scribble the cached-checksum differential. 1377 */ 1378 mutex_enter(&outbound_peer->ipsa_lock); 1379 mutex_enter(&assoc->ipsa_lock); 1380 outbound_peer->ipsa_remote_nat_port = assoc->ipsa_remote_nat_port = 1381 remote; 1382 mutex_exit(&assoc->ipsa_lock); 1383 mutex_exit(&outbound_peer->ipsa_lock); 1384 IPSA_REFRELE(outbound_peer); 1385 ESP_BUMP_STAT(espstack, sa_port_renumbers); 1386 } 1387 /* 1388 * Finish processing of an inbound ESP packet after processing by the 1389 * crypto framework. 1390 * - Remove the ESP header. 1391 * - Send packet back to IP. 1392 * If authentication was performed on the packet, this function is called 1393 * only if the authentication succeeded. 1394 * On success returns B_TRUE, on failure returns B_FALSE and frees the 1395 * mblk chain data_mp. 1396 */ 1397 static mblk_t * 1398 esp_in_done(mblk_t *data_mp, ip_recv_attr_t *ira, ipsec_crypto_t *ic) 1399 { 1400 ipsa_t *assoc; 1401 uint_t espstart; 1402 uint32_t ivlen = 0; 1403 uint_t processed_len; 1404 esph_t *esph; 1405 kstat_named_t *counter; 1406 boolean_t is_natt; 1407 netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack; 1408 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 1409 ipsec_stack_t *ipss = ns->netstack_ipsec; 1410 1411 assoc = ira->ira_ipsec_esp_sa; 1412 ASSERT(assoc != NULL); 1413 1414 is_natt = ((assoc->ipsa_flags & IPSA_F_NATT) != 0); 1415 1416 /* get the pointer to the ESP header */ 1417 if (assoc->ipsa_encr_alg == SADB_EALG_NULL) { 1418 /* authentication-only ESP */ 1419 espstart = ic->ic_crypto_data.cd_offset; 1420 processed_len = ic->ic_crypto_data.cd_length; 1421 } else { 1422 /* encryption present */ 1423 ivlen = assoc->ipsa_iv_len; 1424 if (assoc->ipsa_auth_alg == SADB_AALG_NONE) { 1425 /* encryption-only ESP */ 1426 espstart = ic->ic_crypto_data.cd_offset - 1427 sizeof (esph_t) - assoc->ipsa_iv_len; 1428 processed_len = ic->ic_crypto_data.cd_length + 1429 ivlen; 1430 } else { 1431 /* encryption with authentication */ 1432 espstart = ic->ic_crypto_dual_data.dd_offset1; 1433 processed_len = ic->ic_crypto_dual_data.dd_len2 + 1434 ivlen; 1435 } 1436 } 1437 1438 esph = (esph_t *)(data_mp->b_rptr + espstart); 1439 1440 if (assoc->ipsa_auth_alg != IPSA_AALG_NONE || 1441 (assoc->ipsa_flags & IPSA_F_COMBINED)) { 1442 /* 1443 * Authentication passed if we reach this point. 1444 * Packets with authentication will have the ICV 1445 * after the crypto data. Adjust b_wptr before 1446 * making padlen checks. 1447 */ 1448 ESP_BUMP_STAT(espstack, good_auth); 1449 data_mp->b_wptr -= assoc->ipsa_mac_len; 1450 1451 /* 1452 * Check replay window here! 1453 * For right now, assume keysock will set the replay window 1454 * size to zero for SAs that have an unspecified sender. 1455 * This may change... 1456 */ 1457 1458 if (!sadb_replay_check(assoc, esph->esph_replay)) { 1459 /* 1460 * Log the event. As of now we print out an event. 1461 * Do not print the replay failure number, or else 1462 * syslog cannot collate the error messages. Printing 1463 * the replay number that failed opens a denial-of- 1464 * service attack. 1465 */ 1466 ipsec_assocfailure(info.mi_idnum, 0, 0, 1467 SL_ERROR | SL_WARN, 1468 "Replay failed for ESP spi 0x%x, dst %s.\n", 1469 assoc->ipsa_spi, assoc->ipsa_dstaddr, 1470 assoc->ipsa_addrfam, espstack->ipsecesp_netstack); 1471 ESP_BUMP_STAT(espstack, replay_failures); 1472 counter = DROPPER(ipss, ipds_esp_replay); 1473 goto drop_and_bail; 1474 } 1475 1476 if (is_natt) { 1477 ASSERT(ira->ira_flags & IRAF_ESP_UDP_PORTS); 1478 ASSERT(ira->ira_esp_udp_ports != 0); 1479 esp_port_freshness(ira->ira_esp_udp_ports, assoc); 1480 } 1481 } 1482 1483 esp_set_usetime(assoc, B_TRUE); 1484 1485 if (!esp_age_bytes(assoc, processed_len, B_TRUE)) { 1486 /* The ipsa has hit hard expiration, LOG and AUDIT. */ 1487 ipsec_assocfailure(info.mi_idnum, 0, 0, 1488 SL_ERROR | SL_WARN, 1489 "ESP association 0x%x, dst %s had bytes expire.\n", 1490 assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam, 1491 espstack->ipsecesp_netstack); 1492 ESP_BUMP_STAT(espstack, bytes_expired); 1493 counter = DROPPER(ipss, ipds_esp_bytes_expire); 1494 goto drop_and_bail; 1495 } 1496 1497 /* 1498 * Remove ESP header and padding from packet. I hope the compiler 1499 * spews "branch, predict taken" code for this. 1500 */ 1501 1502 if (esp_strip_header(data_mp, (ira->ira_flags & IRAF_IS_IPV4), 1503 ivlen, &counter, espstack)) { 1504 1505 if (is_system_labeled() && assoc->ipsa_tsl != NULL) { 1506 if (!ip_recv_attr_replace_label(ira, assoc->ipsa_tsl)) { 1507 ip_drop_packet(data_mp, B_TRUE, ira->ira_ill, 1508 DROPPER(ipss, ipds_ah_nomem), 1509 &espstack->esp_dropper); 1510 BUMP_MIB(ira->ira_ill->ill_ip_mib, 1511 ipIfStatsInDiscards); 1512 return (NULL); 1513 } 1514 } 1515 if (is_natt) 1516 return (esp_fix_natt_checksums(data_mp, assoc)); 1517 1518 if (assoc->ipsa_state == IPSA_STATE_IDLE) { 1519 /* 1520 * Cluster buffering case. Tell caller that we're 1521 * handling the packet. 1522 */ 1523 sadb_buf_pkt(assoc, data_mp, ira); 1524 return (NULL); 1525 } 1526 1527 return (data_mp); 1528 } 1529 1530 esp1dbg(espstack, ("esp_in_done: esp_strip_header() failed\n")); 1531 drop_and_bail: 1532 IP_ESP_BUMP_STAT(ipss, in_discards); 1533 ip_drop_packet(data_mp, B_TRUE, ira->ira_ill, counter, 1534 &espstack->esp_dropper); 1535 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards); 1536 return (NULL); 1537 } 1538 1539 /* 1540 * Called upon failing the inbound ICV check. The message passed as 1541 * argument is freed. 1542 */ 1543 static void 1544 esp_log_bad_auth(mblk_t *mp, ip_recv_attr_t *ira) 1545 { 1546 ipsa_t *assoc = ira->ira_ipsec_esp_sa; 1547 netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack; 1548 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 1549 ipsec_stack_t *ipss = ns->netstack_ipsec; 1550 1551 /* 1552 * Log the event. Don't print to the console, block 1553 * potential denial-of-service attack. 1554 */ 1555 ESP_BUMP_STAT(espstack, bad_auth); 1556 1557 ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN, 1558 "ESP Authentication failed for spi 0x%x, dst %s.\n", 1559 assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam, 1560 espstack->ipsecesp_netstack); 1561 1562 IP_ESP_BUMP_STAT(ipss, in_discards); 1563 ip_drop_packet(mp, B_TRUE, ira->ira_ill, 1564 DROPPER(ipss, ipds_esp_bad_auth), 1565 &espstack->esp_dropper); 1566 } 1567 1568 1569 /* 1570 * Invoked for outbound packets after ESP processing. If the packet 1571 * also requires AH, performs the AH SA selection and AH processing. 1572 * 1573 * Returns data_mp (possibly with AH added) unless data_mp was consumed 1574 * due to an error, or queued due to async. crypto or an ACQUIRE trigger. 1575 */ 1576 static mblk_t * 1577 esp_do_outbound_ah(mblk_t *data_mp, ip_xmit_attr_t *ixa) 1578 { 1579 ipsec_action_t *ap; 1580 1581 ap = ixa->ixa_ipsec_action; 1582 if (ap == NULL) { 1583 ipsec_policy_t *pp = ixa->ixa_ipsec_policy; 1584 ap = pp->ipsp_act; 1585 } 1586 1587 if (!ap->ipa_want_ah) 1588 return (data_mp); 1589 1590 /* 1591 * Normally the AH SA would have already been put in place 1592 * but it could have been flushed so we need to look for it. 1593 */ 1594 if (ixa->ixa_ipsec_ah_sa == NULL) { 1595 if (!ipsec_outbound_sa(data_mp, ixa, IPPROTO_AH)) { 1596 sadb_acquire(data_mp, ixa, B_TRUE, B_FALSE); 1597 return (NULL); 1598 } 1599 } 1600 ASSERT(ixa->ixa_ipsec_ah_sa != NULL); 1601 1602 data_mp = ixa->ixa_ipsec_ah_sa->ipsa_output_func(data_mp, ixa); 1603 return (data_mp); 1604 } 1605 1606 1607 /* 1608 * Kernel crypto framework callback invoked after completion of async 1609 * crypto requests for outbound packets. 1610 */ 1611 static void 1612 esp_kcf_callback_outbound(void *arg, int status) 1613 { 1614 mblk_t *mp = (mblk_t *)arg; 1615 mblk_t *async_mp; 1616 netstack_t *ns; 1617 ipsec_stack_t *ipss; 1618 ipsecesp_stack_t *espstack; 1619 mblk_t *data_mp; 1620 ip_xmit_attr_t ixas; 1621 ipsec_crypto_t *ic; 1622 ill_t *ill; 1623 1624 /* 1625 * First remove the ipsec_crypto_t mblk 1626 * Note that we need to ipsec_free_crypto_data(mp) once done with ic. 1627 */ 1628 async_mp = ipsec_remove_crypto_data(mp, &ic); 1629 ASSERT(async_mp != NULL); 1630 1631 /* 1632 * Extract the ip_xmit_attr_t from the first mblk. 1633 * Verifies that the netstack and ill is still around; could 1634 * have vanished while kEf was doing its work. 1635 * On succesful return we have a nce_t and the ill/ipst can't 1636 * disappear until we do the nce_refrele in ixa_cleanup. 1637 */ 1638 data_mp = async_mp->b_cont; 1639 async_mp->b_cont = NULL; 1640 if (!ip_xmit_attr_from_mblk(async_mp, &ixas)) { 1641 /* Disappeared on us - no ill/ipst for MIB */ 1642 /* We have nowhere to do stats since ixa_ipst could be NULL */ 1643 if (ixas.ixa_nce != NULL) { 1644 ill = ixas.ixa_nce->nce_ill; 1645 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 1646 ip_drop_output("ipIfStatsOutDiscards", data_mp, ill); 1647 } 1648 freemsg(data_mp); 1649 goto done; 1650 } 1651 ns = ixas.ixa_ipst->ips_netstack; 1652 espstack = ns->netstack_ipsecesp; 1653 ipss = ns->netstack_ipsec; 1654 ill = ixas.ixa_nce->nce_ill; 1655 1656 if (status == CRYPTO_SUCCESS) { 1657 /* 1658 * If a ICV was computed, it was stored by the 1659 * crypto framework at the end of the packet. 1660 */ 1661 ipha_t *ipha = (ipha_t *)data_mp->b_rptr; 1662 1663 esp_set_usetime(ixas.ixa_ipsec_esp_sa, B_FALSE); 1664 /* NAT-T packet. */ 1665 if (IPH_HDR_VERSION(ipha) == IP_VERSION && 1666 ipha->ipha_protocol == IPPROTO_UDP) 1667 esp_prepare_udp(ns, data_mp, ipha); 1668 1669 /* do AH processing if needed */ 1670 data_mp = esp_do_outbound_ah(data_mp, &ixas); 1671 if (data_mp == NULL) 1672 goto done; 1673 1674 (void) ip_output_post_ipsec(data_mp, &ixas); 1675 } else { 1676 /* Outbound shouldn't see invalid MAC */ 1677 ASSERT(status != CRYPTO_INVALID_MAC); 1678 1679 esp1dbg(espstack, 1680 ("esp_kcf_callback_outbound: crypto failed with 0x%x\n", 1681 status)); 1682 ESP_BUMP_STAT(espstack, crypto_failures); 1683 ESP_BUMP_STAT(espstack, out_discards); 1684 ip_drop_packet(data_mp, B_FALSE, ill, 1685 DROPPER(ipss, ipds_esp_crypto_failed), 1686 &espstack->esp_dropper); 1687 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 1688 } 1689 done: 1690 ixa_cleanup(&ixas); 1691 (void) ipsec_free_crypto_data(mp); 1692 } 1693 1694 /* 1695 * Kernel crypto framework callback invoked after completion of async 1696 * crypto requests for inbound packets. 1697 */ 1698 static void 1699 esp_kcf_callback_inbound(void *arg, int status) 1700 { 1701 mblk_t *mp = (mblk_t *)arg; 1702 mblk_t *async_mp; 1703 netstack_t *ns; 1704 ipsecesp_stack_t *espstack; 1705 ipsec_stack_t *ipss; 1706 mblk_t *data_mp; 1707 ip_recv_attr_t iras; 1708 ipsec_crypto_t *ic; 1709 1710 /* 1711 * First remove the ipsec_crypto_t mblk 1712 * Note that we need to ipsec_free_crypto_data(mp) once done with ic. 1713 */ 1714 async_mp = ipsec_remove_crypto_data(mp, &ic); 1715 ASSERT(async_mp != NULL); 1716 1717 /* 1718 * Extract the ip_recv_attr_t from the first mblk. 1719 * Verifies that the netstack and ill is still around; could 1720 * have vanished while kEf was doing its work. 1721 */ 1722 data_mp = async_mp->b_cont; 1723 async_mp->b_cont = NULL; 1724 if (!ip_recv_attr_from_mblk(async_mp, &iras)) { 1725 /* The ill or ip_stack_t disappeared on us */ 1726 ip_drop_input("ip_recv_attr_from_mblk", data_mp, NULL); 1727 freemsg(data_mp); 1728 goto done; 1729 } 1730 1731 ns = iras.ira_ill->ill_ipst->ips_netstack; 1732 espstack = ns->netstack_ipsecesp; 1733 ipss = ns->netstack_ipsec; 1734 1735 if (status == CRYPTO_SUCCESS) { 1736 data_mp = esp_in_done(data_mp, &iras, ic); 1737 if (data_mp == NULL) 1738 goto done; 1739 1740 /* finish IPsec processing */ 1741 ip_input_post_ipsec(data_mp, &iras); 1742 } else if (status == CRYPTO_INVALID_MAC) { 1743 esp_log_bad_auth(data_mp, &iras); 1744 } else { 1745 esp1dbg(espstack, 1746 ("esp_kcf_callback: crypto failed with 0x%x\n", 1747 status)); 1748 ESP_BUMP_STAT(espstack, crypto_failures); 1749 IP_ESP_BUMP_STAT(ipss, in_discards); 1750 ip_drop_packet(data_mp, B_TRUE, iras.ira_ill, 1751 DROPPER(ipss, ipds_esp_crypto_failed), 1752 &espstack->esp_dropper); 1753 BUMP_MIB(iras.ira_ill->ill_ip_mib, ipIfStatsInDiscards); 1754 } 1755 done: 1756 ira_cleanup(&iras, B_TRUE); 1757 (void) ipsec_free_crypto_data(mp); 1758 } 1759 1760 /* 1761 * Invoked on crypto framework failure during inbound and outbound processing. 1762 */ 1763 static void 1764 esp_crypto_failed(mblk_t *data_mp, boolean_t is_inbound, int kef_rc, 1765 ill_t *ill, ipsecesp_stack_t *espstack) 1766 { 1767 ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec; 1768 1769 esp1dbg(espstack, ("crypto failed for %s ESP with 0x%x\n", 1770 is_inbound ? "inbound" : "outbound", kef_rc)); 1771 ip_drop_packet(data_mp, is_inbound, ill, 1772 DROPPER(ipss, ipds_esp_crypto_failed), 1773 &espstack->esp_dropper); 1774 ESP_BUMP_STAT(espstack, crypto_failures); 1775 if (is_inbound) 1776 IP_ESP_BUMP_STAT(ipss, in_discards); 1777 else 1778 ESP_BUMP_STAT(espstack, out_discards); 1779 } 1780 1781 /* 1782 * A statement-equivalent macro, _cr MUST point to a modifiable 1783 * crypto_call_req_t. 1784 */ 1785 #define ESP_INIT_CALLREQ(_cr, _mp, _callback) \ 1786 (_cr)->cr_flag = CRYPTO_SKIP_REQID|CRYPTO_ALWAYS_QUEUE; \ 1787 (_cr)->cr_callback_arg = (_mp); \ 1788 (_cr)->cr_callback_func = (_callback) 1789 1790 #define ESP_INIT_CRYPTO_MAC(mac, icvlen, icvbuf) { \ 1791 (mac)->cd_format = CRYPTO_DATA_RAW; \ 1792 (mac)->cd_offset = 0; \ 1793 (mac)->cd_length = icvlen; \ 1794 (mac)->cd_raw.iov_base = (char *)icvbuf; \ 1795 (mac)->cd_raw.iov_len = icvlen; \ 1796 } 1797 1798 #define ESP_INIT_CRYPTO_DATA(data, mp, off, len) { \ 1799 if (MBLKL(mp) >= (len) + (off)) { \ 1800 (data)->cd_format = CRYPTO_DATA_RAW; \ 1801 (data)->cd_raw.iov_base = (char *)(mp)->b_rptr; \ 1802 (data)->cd_raw.iov_len = MBLKL(mp); \ 1803 (data)->cd_offset = off; \ 1804 } else { \ 1805 (data)->cd_format = CRYPTO_DATA_MBLK; \ 1806 (data)->cd_mp = mp; \ 1807 (data)->cd_offset = off; \ 1808 } \ 1809 (data)->cd_length = len; \ 1810 } 1811 1812 #define ESP_INIT_CRYPTO_DUAL_DATA(data, mp, off1, len1, off2, len2) { \ 1813 (data)->dd_format = CRYPTO_DATA_MBLK; \ 1814 (data)->dd_mp = mp; \ 1815 (data)->dd_len1 = len1; \ 1816 (data)->dd_offset1 = off1; \ 1817 (data)->dd_len2 = len2; \ 1818 (data)->dd_offset2 = off2; \ 1819 } 1820 1821 /* 1822 * Returns data_mp if successfully completed the request. Returns 1823 * NULL if it failed (and increments InDiscards) or if it is pending. 1824 */ 1825 static mblk_t * 1826 esp_submit_req_inbound(mblk_t *esp_mp, ip_recv_attr_t *ira, 1827 ipsa_t *assoc, uint_t esph_offset) 1828 { 1829 uint_t auth_offset, msg_len, auth_len; 1830 crypto_call_req_t call_req, *callrp; 1831 mblk_t *mp; 1832 esph_t *esph_ptr; 1833 int kef_rc; 1834 uint_t icv_len = assoc->ipsa_mac_len; 1835 crypto_ctx_template_t auth_ctx_tmpl; 1836 boolean_t do_auth, do_encr, force; 1837 uint_t encr_offset, encr_len; 1838 uint_t iv_len = assoc->ipsa_iv_len; 1839 crypto_ctx_template_t encr_ctx_tmpl; 1840 ipsec_crypto_t *ic, icstack; 1841 uchar_t *iv_ptr; 1842 netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack; 1843 ipsec_stack_t *ipss = ns->netstack_ipsec; 1844 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 1845 1846 do_auth = assoc->ipsa_auth_alg != SADB_AALG_NONE; 1847 do_encr = assoc->ipsa_encr_alg != SADB_EALG_NULL; 1848 force = (assoc->ipsa_flags & IPSA_F_ASYNC); 1849 1850 #ifdef IPSEC_LATENCY_TEST 1851 kef_rc = CRYPTO_SUCCESS; 1852 #else 1853 kef_rc = CRYPTO_FAILED; 1854 #endif 1855 1856 /* 1857 * An inbound packet is of the form: 1858 * [IP,options,ESP,IV,data,ICV,pad] 1859 */ 1860 esph_ptr = (esph_t *)(esp_mp->b_rptr + esph_offset); 1861 iv_ptr = (uchar_t *)(esph_ptr + 1); 1862 /* Packet length starting at IP header ending after ESP ICV. */ 1863 msg_len = MBLKL(esp_mp); 1864 1865 encr_offset = esph_offset + sizeof (esph_t) + iv_len; 1866 encr_len = msg_len - encr_offset; 1867 1868 /* 1869 * Counter mode algs need a nonce. This is setup in sadb_common_add(). 1870 * If for some reason we are using a SA which does not have a nonce 1871 * then we must fail here. 1872 */ 1873 if ((assoc->ipsa_flags & IPSA_F_COUNTERMODE) && 1874 (assoc->ipsa_nonce == NULL)) { 1875 ip_drop_packet(esp_mp, B_TRUE, ira->ira_ill, 1876 DROPPER(ipss, ipds_esp_nomem), &espstack->esp_dropper); 1877 return (NULL); 1878 } 1879 1880 if (force) { 1881 /* We are doing asynch; allocate mblks to hold state */ 1882 if ((mp = ip_recv_attr_to_mblk(ira)) == NULL || 1883 (mp = ipsec_add_crypto_data(mp, &ic)) == NULL) { 1884 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards); 1885 ip_drop_input("ipIfStatsInDiscards", esp_mp, 1886 ira->ira_ill); 1887 return (NULL); 1888 } 1889 linkb(mp, esp_mp); 1890 callrp = &call_req; 1891 ESP_INIT_CALLREQ(callrp, mp, esp_kcf_callback_inbound); 1892 } else { 1893 /* 1894 * If we know we are going to do sync then ipsec_crypto_t 1895 * should be on the stack. 1896 */ 1897 ic = &icstack; 1898 bzero(ic, sizeof (*ic)); 1899 callrp = NULL; 1900 } 1901 1902 if (do_auth) { 1903 /* authentication context template */ 1904 IPSEC_CTX_TMPL(assoc, ipsa_authtmpl, IPSEC_ALG_AUTH, 1905 auth_ctx_tmpl); 1906 1907 /* ICV to be verified */ 1908 ESP_INIT_CRYPTO_MAC(&ic->ic_crypto_mac, 1909 icv_len, esp_mp->b_wptr - icv_len); 1910 1911 /* authentication starts at the ESP header */ 1912 auth_offset = esph_offset; 1913 auth_len = msg_len - auth_offset - icv_len; 1914 if (!do_encr) { 1915 /* authentication only */ 1916 /* initialize input data argument */ 1917 ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data, 1918 esp_mp, auth_offset, auth_len); 1919 1920 /* call the crypto framework */ 1921 kef_rc = crypto_mac_verify(&assoc->ipsa_amech, 1922 &ic->ic_crypto_data, 1923 &assoc->ipsa_kcfauthkey, auth_ctx_tmpl, 1924 &ic->ic_crypto_mac, callrp); 1925 } 1926 } 1927 1928 if (do_encr) { 1929 /* encryption template */ 1930 IPSEC_CTX_TMPL(assoc, ipsa_encrtmpl, IPSEC_ALG_ENCR, 1931 encr_ctx_tmpl); 1932 1933 /* Call the nonce update function. Also passes in IV */ 1934 (assoc->ipsa_noncefunc)(assoc, (uchar_t *)esph_ptr, encr_len, 1935 iv_ptr, &ic->ic_cmm, &ic->ic_crypto_data); 1936 1937 if (!do_auth) { 1938 /* decryption only */ 1939 /* initialize input data argument */ 1940 ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data, 1941 esp_mp, encr_offset, encr_len); 1942 1943 /* call the crypto framework */ 1944 kef_rc = crypto_decrypt((crypto_mechanism_t *) 1945 &ic->ic_cmm, &ic->ic_crypto_data, 1946 &assoc->ipsa_kcfencrkey, encr_ctx_tmpl, 1947 NULL, callrp); 1948 } 1949 } 1950 1951 if (do_auth && do_encr) { 1952 /* dual operation */ 1953 /* initialize input data argument */ 1954 ESP_INIT_CRYPTO_DUAL_DATA(&ic->ic_crypto_dual_data, 1955 esp_mp, auth_offset, auth_len, 1956 encr_offset, encr_len - icv_len); 1957 1958 /* specify IV */ 1959 ic->ic_crypto_dual_data.dd_miscdata = (char *)iv_ptr; 1960 1961 /* call the framework */ 1962 kef_rc = crypto_mac_verify_decrypt(&assoc->ipsa_amech, 1963 &assoc->ipsa_emech, &ic->ic_crypto_dual_data, 1964 &assoc->ipsa_kcfauthkey, &assoc->ipsa_kcfencrkey, 1965 auth_ctx_tmpl, encr_ctx_tmpl, &ic->ic_crypto_mac, 1966 NULL, callrp); 1967 } 1968 1969 switch (kef_rc) { 1970 case CRYPTO_SUCCESS: 1971 ESP_BUMP_STAT(espstack, crypto_sync); 1972 esp_mp = esp_in_done(esp_mp, ira, ic); 1973 if (force) { 1974 /* Free mp after we are done with ic */ 1975 mp = ipsec_free_crypto_data(mp); 1976 (void) ip_recv_attr_free_mblk(mp); 1977 } 1978 return (esp_mp); 1979 case CRYPTO_QUEUED: 1980 /* esp_kcf_callback_inbound() will be invoked on completion */ 1981 ESP_BUMP_STAT(espstack, crypto_async); 1982 return (NULL); 1983 case CRYPTO_INVALID_MAC: 1984 if (force) { 1985 mp = ipsec_free_crypto_data(mp); 1986 esp_mp = ip_recv_attr_free_mblk(mp); 1987 } 1988 ESP_BUMP_STAT(espstack, crypto_sync); 1989 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards); 1990 esp_log_bad_auth(esp_mp, ira); 1991 /* esp_mp was passed to ip_drop_packet */ 1992 return (NULL); 1993 } 1994 1995 if (force) { 1996 mp = ipsec_free_crypto_data(mp); 1997 esp_mp = ip_recv_attr_free_mblk(mp); 1998 } 1999 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards); 2000 esp_crypto_failed(esp_mp, B_TRUE, kef_rc, ira->ira_ill, espstack); 2001 /* esp_mp was passed to ip_drop_packet */ 2002 return (NULL); 2003 } 2004 2005 /* 2006 * Compute the IP and UDP checksums -- common code for both keepalives and 2007 * actual ESP-in-UDP packets. Be flexible with multiple mblks because ESP 2008 * uses mblk-insertion to insert the UDP header. 2009 * TODO - If there is an easy way to prep a packet for HW checksums, make 2010 * it happen here. 2011 * Note that this is used before both before calling ip_output_simple and 2012 * in the esp datapath. The former could use IXAF_SET_ULP_CKSUM but not the 2013 * latter. 2014 */ 2015 static void 2016 esp_prepare_udp(netstack_t *ns, mblk_t *mp, ipha_t *ipha) 2017 { 2018 int offset; 2019 uint32_t cksum; 2020 uint16_t *arr; 2021 mblk_t *udpmp = mp; 2022 uint_t hlen = IPH_HDR_LENGTH(ipha); 2023 2024 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 2025 2026 ipha->ipha_hdr_checksum = 0; 2027 ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 2028 2029 if (ns->netstack_udp->us_do_checksum) { 2030 ASSERT(MBLKL(udpmp) >= sizeof (udpha_t)); 2031 /* arr points to the IP header. */ 2032 arr = (uint16_t *)ipha; 2033 IP_STAT(ns->netstack_ip, ip_out_sw_cksum); 2034 IP_STAT_UPDATE(ns->netstack_ip, ip_out_sw_cksum_bytes, 2035 ntohs(htons(ipha->ipha_length) - hlen)); 2036 /* arr[6-9] are the IP addresses. */ 2037 cksum = IP_UDP_CSUM_COMP + arr[6] + arr[7] + arr[8] + arr[9] + 2038 ntohs(htons(ipha->ipha_length) - hlen); 2039 cksum = IP_CSUM(mp, hlen, cksum); 2040 offset = hlen + UDP_CHECKSUM_OFFSET; 2041 while (offset >= MBLKL(udpmp)) { 2042 offset -= MBLKL(udpmp); 2043 udpmp = udpmp->b_cont; 2044 } 2045 /* arr points to the UDP header's checksum field. */ 2046 arr = (uint16_t *)(udpmp->b_rptr + offset); 2047 *arr = cksum; 2048 } 2049 } 2050 2051 /* 2052 * taskq handler so we can send the NAT-T keepalive on a separate thread. 2053 */ 2054 static void 2055 actually_send_keepalive(void *arg) 2056 { 2057 mblk_t *mp = (mblk_t *)arg; 2058 ip_xmit_attr_t ixas; 2059 netstack_t *ns; 2060 netstackid_t stackid; 2061 2062 stackid = (netstackid_t)(uintptr_t)mp->b_prev; 2063 mp->b_prev = NULL; 2064 ns = netstack_find_by_stackid(stackid); 2065 if (ns == NULL) { 2066 /* Disappeared */ 2067 ip_drop_output("ipIfStatsOutDiscards", mp, NULL); 2068 freemsg(mp); 2069 return; 2070 } 2071 2072 bzero(&ixas, sizeof (ixas)); 2073 ixas.ixa_zoneid = ALL_ZONES; 2074 ixas.ixa_cred = kcred; 2075 ixas.ixa_cpid = NOPID; 2076 ixas.ixa_tsl = NULL; 2077 ixas.ixa_ipst = ns->netstack_ip; 2078 /* No ULP checksum; done by esp_prepare_udp */ 2079 ixas.ixa_flags = (IXAF_IS_IPV4 | IXAF_NO_IPSEC | IXAF_VERIFY_SOURCE); 2080 2081 (void) ip_output_simple(mp, &ixas); 2082 ixa_cleanup(&ixas); 2083 netstack_rele(ns); 2084 } 2085 2086 /* 2087 * Send a one-byte UDP NAT-T keepalive. 2088 */ 2089 void 2090 ipsecesp_send_keepalive(ipsa_t *assoc) 2091 { 2092 mblk_t *mp; 2093 ipha_t *ipha; 2094 udpha_t *udpha; 2095 netstack_t *ns = assoc->ipsa_netstack; 2096 2097 ASSERT(MUTEX_NOT_HELD(&assoc->ipsa_lock)); 2098 2099 mp = allocb(sizeof (ipha_t) + sizeof (udpha_t) + 1, BPRI_HI); 2100 if (mp == NULL) 2101 return; 2102 ipha = (ipha_t *)mp->b_rptr; 2103 ipha->ipha_version_and_hdr_length = IP_SIMPLE_HDR_VERSION; 2104 ipha->ipha_type_of_service = 0; 2105 ipha->ipha_length = htons(sizeof (ipha_t) + sizeof (udpha_t) + 1); 2106 /* Use the low-16 of the SPI so we have some clue where it came from. */ 2107 ipha->ipha_ident = *(((uint16_t *)(&assoc->ipsa_spi)) + 1); 2108 ipha->ipha_fragment_offset_and_flags = 0; /* Too small to fragment! */ 2109 ipha->ipha_ttl = 0xFF; 2110 ipha->ipha_protocol = IPPROTO_UDP; 2111 ipha->ipha_hdr_checksum = 0; 2112 ipha->ipha_src = assoc->ipsa_srcaddr[0]; 2113 ipha->ipha_dst = assoc->ipsa_dstaddr[0]; 2114 udpha = (udpha_t *)(ipha + 1); 2115 udpha->uha_src_port = (assoc->ipsa_local_nat_port != 0) ? 2116 assoc->ipsa_local_nat_port : htons(IPPORT_IKE_NATT); 2117 udpha->uha_dst_port = (assoc->ipsa_remote_nat_port != 0) ? 2118 assoc->ipsa_remote_nat_port : htons(IPPORT_IKE_NATT); 2119 udpha->uha_length = htons(sizeof (udpha_t) + 1); 2120 udpha->uha_checksum = 0; 2121 mp->b_wptr = (uint8_t *)(udpha + 1); 2122 *(mp->b_wptr++) = 0xFF; 2123 2124 esp_prepare_udp(ns, mp, ipha); 2125 2126 /* 2127 * We're holding an isaf_t bucket lock, so pawn off the actual 2128 * packet transmission to another thread. Just in case syncq 2129 * processing causes a same-bucket packet to be processed. 2130 */ 2131 mp->b_prev = (mblk_t *)(uintptr_t)ns->netstack_stackid; 2132 2133 if (taskq_dispatch(esp_taskq, actually_send_keepalive, mp, 2134 TQ_NOSLEEP) == TASKQID_INVALID) { 2135 /* Assume no memory if taskq_dispatch() fails. */ 2136 mp->b_prev = NULL; 2137 ip_drop_packet(mp, B_FALSE, NULL, 2138 DROPPER(ns->netstack_ipsec, ipds_esp_nomem), 2139 &ns->netstack_ipsecesp->esp_dropper); 2140 } 2141 } 2142 2143 /* 2144 * Returns mp if successfully completed the request. Returns 2145 * NULL if it failed (and increments InDiscards) or if it is pending. 2146 */ 2147 static mblk_t * 2148 esp_submit_req_outbound(mblk_t *data_mp, ip_xmit_attr_t *ixa, ipsa_t *assoc, 2149 uchar_t *icv_buf, uint_t payload_len) 2150 { 2151 uint_t auth_len; 2152 crypto_call_req_t call_req, *callrp; 2153 mblk_t *esp_mp; 2154 esph_t *esph_ptr; 2155 mblk_t *mp; 2156 int kef_rc = CRYPTO_FAILED; 2157 uint_t icv_len = assoc->ipsa_mac_len; 2158 crypto_ctx_template_t auth_ctx_tmpl; 2159 boolean_t do_auth, do_encr, force; 2160 uint_t iv_len = assoc->ipsa_iv_len; 2161 crypto_ctx_template_t encr_ctx_tmpl; 2162 boolean_t is_natt = ((assoc->ipsa_flags & IPSA_F_NATT) != 0); 2163 size_t esph_offset = (is_natt ? UDPH_SIZE : 0); 2164 netstack_t *ns = ixa->ixa_ipst->ips_netstack; 2165 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 2166 ipsec_crypto_t *ic, icstack; 2167 uchar_t *iv_ptr; 2168 crypto_data_t *cd_ptr = NULL; 2169 ill_t *ill = ixa->ixa_nce->nce_ill; 2170 ipsec_stack_t *ipss = ns->netstack_ipsec; 2171 2172 esp3dbg(espstack, ("esp_submit_req_outbound:%s", 2173 is_natt ? "natt" : "not natt")); 2174 2175 do_encr = assoc->ipsa_encr_alg != SADB_EALG_NULL; 2176 do_auth = assoc->ipsa_auth_alg != SADB_AALG_NONE; 2177 force = (assoc->ipsa_flags & IPSA_F_ASYNC); 2178 2179 #ifdef IPSEC_LATENCY_TEST 2180 kef_rc = CRYPTO_SUCCESS; 2181 #else 2182 kef_rc = CRYPTO_FAILED; 2183 #endif 2184 2185 /* 2186 * Outbound IPsec packets are of the form: 2187 * [IP,options] -> [ESP,IV] -> [data] -> [pad,ICV] 2188 * unless it's NATT, then it's 2189 * [IP,options] -> [udp][ESP,IV] -> [data] -> [pad,ICV] 2190 * Get a pointer to the mblk containing the ESP header. 2191 */ 2192 ASSERT(data_mp->b_cont != NULL); 2193 esp_mp = data_mp->b_cont; 2194 esph_ptr = (esph_t *)(esp_mp->b_rptr + esph_offset); 2195 iv_ptr = (uchar_t *)(esph_ptr + 1); 2196 2197 /* 2198 * Combined mode algs need a nonce. This is setup in sadb_common_add(). 2199 * If for some reason we are using a SA which does not have a nonce 2200 * then we must fail here. 2201 */ 2202 if ((assoc->ipsa_flags & IPSA_F_COUNTERMODE) && 2203 (assoc->ipsa_nonce == NULL)) { 2204 ip_drop_packet(data_mp, B_FALSE, NULL, 2205 DROPPER(ipss, ipds_esp_nomem), &espstack->esp_dropper); 2206 return (NULL); 2207 } 2208 2209 if (force) { 2210 /* We are doing asynch; allocate mblks to hold state */ 2211 if ((mp = ip_xmit_attr_to_mblk(ixa)) == NULL || 2212 (mp = ipsec_add_crypto_data(mp, &ic)) == NULL) { 2213 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2214 ip_drop_output("ipIfStatsOutDiscards", data_mp, ill); 2215 freemsg(data_mp); 2216 return (NULL); 2217 } 2218 2219 linkb(mp, data_mp); 2220 callrp = &call_req; 2221 ESP_INIT_CALLREQ(callrp, mp, esp_kcf_callback_outbound); 2222 } else { 2223 /* 2224 * If we know we are going to do sync then ipsec_crypto_t 2225 * should be on the stack. 2226 */ 2227 ic = &icstack; 2228 bzero(ic, sizeof (*ic)); 2229 callrp = NULL; 2230 } 2231 2232 2233 if (do_auth) { 2234 /* authentication context template */ 2235 IPSEC_CTX_TMPL(assoc, ipsa_authtmpl, IPSEC_ALG_AUTH, 2236 auth_ctx_tmpl); 2237 2238 /* where to store the computed mac */ 2239 ESP_INIT_CRYPTO_MAC(&ic->ic_crypto_mac, 2240 icv_len, icv_buf); 2241 2242 /* authentication starts at the ESP header */ 2243 auth_len = payload_len + iv_len + sizeof (esph_t); 2244 if (!do_encr) { 2245 /* authentication only */ 2246 /* initialize input data argument */ 2247 ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data, 2248 esp_mp, esph_offset, auth_len); 2249 2250 /* call the crypto framework */ 2251 kef_rc = crypto_mac(&assoc->ipsa_amech, 2252 &ic->ic_crypto_data, 2253 &assoc->ipsa_kcfauthkey, auth_ctx_tmpl, 2254 &ic->ic_crypto_mac, callrp); 2255 } 2256 } 2257 2258 if (do_encr) { 2259 /* encryption context template */ 2260 IPSEC_CTX_TMPL(assoc, ipsa_encrtmpl, IPSEC_ALG_ENCR, 2261 encr_ctx_tmpl); 2262 /* Call the nonce update function. */ 2263 (assoc->ipsa_noncefunc)(assoc, (uchar_t *)esph_ptr, payload_len, 2264 iv_ptr, &ic->ic_cmm, &ic->ic_crypto_data); 2265 2266 if (!do_auth) { 2267 /* encryption only, skip mblk that contains ESP hdr */ 2268 /* initialize input data argument */ 2269 ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data, 2270 esp_mp->b_cont, 0, payload_len); 2271 2272 /* 2273 * For combined mode ciphers, the ciphertext is the same 2274 * size as the clear text, the ICV should follow the 2275 * ciphertext. To convince the kcf to allow in-line 2276 * encryption, with an ICV, use ipsec_out_crypto_mac 2277 * to point to the same buffer as the data. The calling 2278 * function need to ensure the buffer is large enough to 2279 * include the ICV. 2280 * 2281 * The IV is already written to the packet buffer, the 2282 * nonce setup function copied it to the params struct 2283 * for the cipher to use. 2284 */ 2285 if (assoc->ipsa_flags & IPSA_F_COMBINED) { 2286 bcopy(&ic->ic_crypto_data, 2287 &ic->ic_crypto_mac, 2288 sizeof (crypto_data_t)); 2289 ic->ic_crypto_mac.cd_length = 2290 payload_len + icv_len; 2291 cd_ptr = &ic->ic_crypto_mac; 2292 } 2293 2294 /* call the crypto framework */ 2295 kef_rc = crypto_encrypt((crypto_mechanism_t *) 2296 &ic->ic_cmm, &ic->ic_crypto_data, 2297 &assoc->ipsa_kcfencrkey, encr_ctx_tmpl, 2298 cd_ptr, callrp); 2299 2300 } 2301 } 2302 2303 if (do_auth && do_encr) { 2304 /* 2305 * Encryption and authentication: 2306 * Pass the pointer to the mblk chain starting at the ESP 2307 * header to the framework. Skip the ESP header mblk 2308 * for encryption, which is reflected by an encryption 2309 * offset equal to the length of that mblk. Start 2310 * the authentication at the ESP header, i.e. use an 2311 * authentication offset of zero. 2312 */ 2313 ESP_INIT_CRYPTO_DUAL_DATA(&ic->ic_crypto_dual_data, 2314 esp_mp, MBLKL(esp_mp), payload_len, esph_offset, auth_len); 2315 2316 /* specify IV */ 2317 ic->ic_crypto_dual_data.dd_miscdata = (char *)iv_ptr; 2318 2319 /* call the framework */ 2320 kef_rc = crypto_encrypt_mac(&assoc->ipsa_emech, 2321 &assoc->ipsa_amech, NULL, 2322 &assoc->ipsa_kcfencrkey, &assoc->ipsa_kcfauthkey, 2323 encr_ctx_tmpl, auth_ctx_tmpl, 2324 &ic->ic_crypto_dual_data, 2325 &ic->ic_crypto_mac, callrp); 2326 } 2327 2328 switch (kef_rc) { 2329 case CRYPTO_SUCCESS: 2330 ESP_BUMP_STAT(espstack, crypto_sync); 2331 esp_set_usetime(assoc, B_FALSE); 2332 if (force) { 2333 mp = ipsec_free_crypto_data(mp); 2334 data_mp = ip_xmit_attr_free_mblk(mp); 2335 } 2336 if (is_natt) 2337 esp_prepare_udp(ns, data_mp, (ipha_t *)data_mp->b_rptr); 2338 return (data_mp); 2339 case CRYPTO_QUEUED: 2340 /* esp_kcf_callback_outbound() will be invoked on completion */ 2341 ESP_BUMP_STAT(espstack, crypto_async); 2342 return (NULL); 2343 } 2344 2345 if (force) { 2346 mp = ipsec_free_crypto_data(mp); 2347 data_mp = ip_xmit_attr_free_mblk(mp); 2348 } 2349 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2350 esp_crypto_failed(data_mp, B_FALSE, kef_rc, NULL, espstack); 2351 /* data_mp was passed to ip_drop_packet */ 2352 return (NULL); 2353 } 2354 2355 /* 2356 * Handle outbound IPsec processing for IPv4 and IPv6 2357 * 2358 * Returns data_mp if successfully completed the request. Returns 2359 * NULL if it failed (and increments InDiscards) or if it is pending. 2360 */ 2361 static mblk_t * 2362 esp_outbound(mblk_t *data_mp, ip_xmit_attr_t *ixa) 2363 { 2364 mblk_t *espmp, *tailmp; 2365 ipha_t *ipha; 2366 ip6_t *ip6h; 2367 esph_t *esph_ptr, *iv_ptr; 2368 uint_t af; 2369 uint8_t *nhp; 2370 uintptr_t divpoint, datalen, adj, padlen, i, alloclen; 2371 uintptr_t esplen = sizeof (esph_t); 2372 uint8_t protocol; 2373 ipsa_t *assoc; 2374 uint_t iv_len, block_size, mac_len = 0; 2375 uchar_t *icv_buf; 2376 udpha_t *udpha; 2377 boolean_t is_natt = B_FALSE; 2378 netstack_t *ns = ixa->ixa_ipst->ips_netstack; 2379 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 2380 ipsec_stack_t *ipss = ns->netstack_ipsec; 2381 ill_t *ill = ixa->ixa_nce->nce_ill; 2382 boolean_t need_refrele = B_FALSE; 2383 2384 ESP_BUMP_STAT(espstack, out_requests); 2385 2386 /* 2387 * <sigh> We have to copy the message here, because TCP (for example) 2388 * keeps a dupb() of the message lying around for retransmission. 2389 * Since ESP changes the whole of the datagram, we have to create our 2390 * own copy lest we clobber TCP's data. Since we have to copy anyway, 2391 * we might as well make use of msgpullup() and get the mblk into one 2392 * contiguous piece! 2393 */ 2394 tailmp = msgpullup(data_mp, -1); 2395 if (tailmp == NULL) { 2396 esp0dbg(("esp_outbound: msgpullup() failed, " 2397 "dropping packet.\n")); 2398 ip_drop_packet(data_mp, B_FALSE, ill, 2399 DROPPER(ipss, ipds_esp_nomem), 2400 &espstack->esp_dropper); 2401 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2402 return (NULL); 2403 } 2404 freemsg(data_mp); 2405 data_mp = tailmp; 2406 2407 assoc = ixa->ixa_ipsec_esp_sa; 2408 ASSERT(assoc != NULL); 2409 2410 /* 2411 * Get the outer IP header in shape to escape this system.. 2412 */ 2413 if (is_system_labeled() && (assoc->ipsa_otsl != NULL)) { 2414 /* 2415 * Need to update packet with any CIPSO option and update 2416 * ixa_tsl to capture the new label. 2417 * We allocate a separate ixa for that purpose. 2418 */ 2419 ixa = ip_xmit_attr_duplicate(ixa); 2420 if (ixa == NULL) { 2421 ip_drop_packet(data_mp, B_FALSE, ill, 2422 DROPPER(ipss, ipds_esp_nomem), 2423 &espstack->esp_dropper); 2424 return (NULL); 2425 } 2426 need_refrele = B_TRUE; 2427 2428 label_hold(assoc->ipsa_otsl); 2429 ip_xmit_attr_replace_tsl(ixa, assoc->ipsa_otsl); 2430 2431 data_mp = sadb_whack_label(data_mp, assoc, ixa, 2432 DROPPER(ipss, ipds_esp_nomem), &espstack->esp_dropper); 2433 if (data_mp == NULL) { 2434 /* Packet dropped by sadb_whack_label */ 2435 ixa_refrele(ixa); 2436 return (NULL); 2437 } 2438 } 2439 2440 /* 2441 * Reality check.... 2442 */ 2443 ipha = (ipha_t *)data_mp->b_rptr; /* So we can call esp_acquire(). */ 2444 2445 if (ixa->ixa_flags & IXAF_IS_IPV4) { 2446 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 2447 2448 af = AF_INET; 2449 divpoint = IPH_HDR_LENGTH(ipha); 2450 datalen = ntohs(ipha->ipha_length) - divpoint; 2451 nhp = (uint8_t *)&ipha->ipha_protocol; 2452 } else { 2453 ip_pkt_t ipp; 2454 2455 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 2456 2457 af = AF_INET6; 2458 ip6h = (ip6_t *)ipha; 2459 bzero(&ipp, sizeof (ipp)); 2460 divpoint = ip_find_hdr_v6(data_mp, ip6h, B_FALSE, &ipp, NULL); 2461 if (ipp.ipp_dstopts != NULL && 2462 ipp.ipp_dstopts->ip6d_nxt != IPPROTO_ROUTING) { 2463 /* 2464 * Destination options are tricky. If we get in here, 2465 * then we have a terminal header following the 2466 * destination options. We need to adjust backwards 2467 * so we insert ESP BEFORE the destination options 2468 * bag. (So that the dstopts get encrypted!) 2469 * 2470 * Since this is for outbound packets only, we know 2471 * that non-terminal destination options only precede 2472 * routing headers. 2473 */ 2474 divpoint -= ipp.ipp_dstoptslen; 2475 } 2476 datalen = ntohs(ip6h->ip6_plen) + sizeof (ip6_t) - divpoint; 2477 2478 if (ipp.ipp_rthdr != NULL) { 2479 nhp = &ipp.ipp_rthdr->ip6r_nxt; 2480 } else if (ipp.ipp_hopopts != NULL) { 2481 nhp = &ipp.ipp_hopopts->ip6h_nxt; 2482 } else { 2483 ASSERT(divpoint == sizeof (ip6_t)); 2484 /* It's probably IP + ESP. */ 2485 nhp = &ip6h->ip6_nxt; 2486 } 2487 } 2488 2489 mac_len = assoc->ipsa_mac_len; 2490 2491 if (assoc->ipsa_flags & IPSA_F_NATT) { 2492 /* wedge in UDP header */ 2493 is_natt = B_TRUE; 2494 esplen += UDPH_SIZE; 2495 } 2496 2497 /* 2498 * Set up ESP header and encryption padding for ENCR PI request. 2499 */ 2500 2501 /* Determine the padding length. Pad to 4-bytes for no-encryption. */ 2502 if (assoc->ipsa_encr_alg != SADB_EALG_NULL) { 2503 iv_len = assoc->ipsa_iv_len; 2504 block_size = assoc->ipsa_datalen; 2505 2506 /* 2507 * Pad the data to the length of the cipher block size. 2508 * Include the two additional bytes (hence the - 2) for the 2509 * padding length and the next header. Take this into account 2510 * when calculating the actual length of the padding. 2511 */ 2512 ASSERT(ISP2(iv_len)); 2513 padlen = ((unsigned)(block_size - datalen - 2)) & 2514 (block_size - 1); 2515 } else { 2516 iv_len = 0; 2517 padlen = ((unsigned)(sizeof (uint32_t) - datalen - 2)) & 2518 (sizeof (uint32_t) - 1); 2519 } 2520 2521 /* Allocate ESP header and IV. */ 2522 esplen += iv_len; 2523 2524 /* 2525 * Update association byte-count lifetimes. Don't forget to take 2526 * into account the padding length and next-header (hence the + 2). 2527 * 2528 * Use the amount of data fed into the "encryption algorithm". This 2529 * is the IV, the data length, the padding length, and the final two 2530 * bytes (padlen, and next-header). 2531 * 2532 */ 2533 2534 if (!esp_age_bytes(assoc, datalen + padlen + iv_len + 2, B_FALSE)) { 2535 ip_drop_packet(data_mp, B_FALSE, ill, 2536 DROPPER(ipss, ipds_esp_bytes_expire), 2537 &espstack->esp_dropper); 2538 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2539 if (need_refrele) 2540 ixa_refrele(ixa); 2541 return (NULL); 2542 } 2543 2544 espmp = allocb(esplen, BPRI_HI); 2545 if (espmp == NULL) { 2546 ESP_BUMP_STAT(espstack, out_discards); 2547 esp1dbg(espstack, ("esp_outbound: can't allocate espmp.\n")); 2548 ip_drop_packet(data_mp, B_FALSE, ill, 2549 DROPPER(ipss, ipds_esp_nomem), 2550 &espstack->esp_dropper); 2551 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2552 if (need_refrele) 2553 ixa_refrele(ixa); 2554 return (NULL); 2555 } 2556 espmp->b_wptr += esplen; 2557 esph_ptr = (esph_t *)espmp->b_rptr; 2558 2559 if (is_natt) { 2560 esp3dbg(espstack, ("esp_outbound: NATT")); 2561 2562 udpha = (udpha_t *)espmp->b_rptr; 2563 udpha->uha_src_port = (assoc->ipsa_local_nat_port != 0) ? 2564 assoc->ipsa_local_nat_port : htons(IPPORT_IKE_NATT); 2565 udpha->uha_dst_port = (assoc->ipsa_remote_nat_port != 0) ? 2566 assoc->ipsa_remote_nat_port : htons(IPPORT_IKE_NATT); 2567 /* 2568 * Set the checksum to 0, so that the esp_prepare_udp() call 2569 * can do the right thing. 2570 */ 2571 udpha->uha_checksum = 0; 2572 esph_ptr = (esph_t *)(udpha + 1); 2573 } 2574 2575 esph_ptr->esph_spi = assoc->ipsa_spi; 2576 2577 esph_ptr->esph_replay = htonl(atomic_inc_32_nv(&assoc->ipsa_replay)); 2578 if (esph_ptr->esph_replay == 0 && assoc->ipsa_replay_wsize != 0) { 2579 /* 2580 * XXX We have replay counter wrapping. 2581 * We probably want to nuke this SA (and its peer). 2582 */ 2583 ipsec_assocfailure(info.mi_idnum, 0, 0, 2584 SL_ERROR | SL_CONSOLE | SL_WARN, 2585 "Outbound ESP SA (0x%x, %s) has wrapped sequence.\n", 2586 esph_ptr->esph_spi, assoc->ipsa_dstaddr, af, 2587 espstack->ipsecesp_netstack); 2588 2589 ESP_BUMP_STAT(espstack, out_discards); 2590 sadb_replay_delete(assoc); 2591 ip_drop_packet(data_mp, B_FALSE, ill, 2592 DROPPER(ipss, ipds_esp_replay), 2593 &espstack->esp_dropper); 2594 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2595 if (need_refrele) 2596 ixa_refrele(ixa); 2597 return (NULL); 2598 } 2599 2600 iv_ptr = (esph_ptr + 1); 2601 /* 2602 * iv_ptr points to the mblk which will contain the IV once we have 2603 * written it there. This mblk will be part of a mblk chain that 2604 * will make up the packet. 2605 * 2606 * For counter mode algorithms, the IV is a 64 bit quantity, it 2607 * must NEVER repeat in the lifetime of the SA, otherwise an 2608 * attacker who had recorded enough packets might be able to 2609 * determine some clear text. 2610 * 2611 * To ensure this does not happen, the IV is stored in the SA and 2612 * incremented for each packet, the IV is then copied into the 2613 * "packet" for transmission to the receiving system. The IV will 2614 * also be copied into the nonce, when the packet is encrypted. 2615 * 2616 * CBC mode algorithms use a random IV for each packet. We do not 2617 * require the highest quality random bits, but for best security 2618 * with CBC mode ciphers, the value must be unlikely to repeat and 2619 * must not be known in advance to an adversary capable of influencing 2620 * the clear text. 2621 */ 2622 if (!update_iv((uint8_t *)iv_ptr, espstack->esp_pfkey_q, assoc, 2623 espstack)) { 2624 ip_drop_packet(data_mp, B_FALSE, ill, 2625 DROPPER(ipss, ipds_esp_iv_wrap), &espstack->esp_dropper); 2626 if (need_refrele) 2627 ixa_refrele(ixa); 2628 return (NULL); 2629 } 2630 2631 /* Fix the IP header. */ 2632 alloclen = padlen + 2 + mac_len; 2633 adj = alloclen + (espmp->b_wptr - espmp->b_rptr); 2634 2635 protocol = *nhp; 2636 2637 if (ixa->ixa_flags & IXAF_IS_IPV4) { 2638 ipha->ipha_length = htons(ntohs(ipha->ipha_length) + adj); 2639 if (is_natt) { 2640 *nhp = IPPROTO_UDP; 2641 udpha->uha_length = htons(ntohs(ipha->ipha_length) - 2642 IPH_HDR_LENGTH(ipha)); 2643 } else { 2644 *nhp = IPPROTO_ESP; 2645 } 2646 ipha->ipha_hdr_checksum = 0; 2647 ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha); 2648 } else { 2649 ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) + adj); 2650 *nhp = IPPROTO_ESP; 2651 } 2652 2653 /* I've got the two ESP mblks, now insert them. */ 2654 2655 esp2dbg(espstack, ("data_mp before outbound ESP adjustment:\n")); 2656 esp2dbg(espstack, (dump_msg(data_mp))); 2657 2658 if (!esp_insert_esp(data_mp, espmp, divpoint, espstack)) { 2659 ESP_BUMP_STAT(espstack, out_discards); 2660 /* NOTE: esp_insert_esp() only fails if there's no memory. */ 2661 ip_drop_packet(data_mp, B_FALSE, ill, 2662 DROPPER(ipss, ipds_esp_nomem), 2663 &espstack->esp_dropper); 2664 freeb(espmp); 2665 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2666 if (need_refrele) 2667 ixa_refrele(ixa); 2668 return (NULL); 2669 } 2670 2671 /* Append padding (and leave room for ICV). */ 2672 for (tailmp = data_mp; tailmp->b_cont != NULL; tailmp = tailmp->b_cont) 2673 ; 2674 if (tailmp->b_wptr + alloclen > tailmp->b_datap->db_lim) { 2675 tailmp->b_cont = allocb(alloclen, BPRI_HI); 2676 if (tailmp->b_cont == NULL) { 2677 ESP_BUMP_STAT(espstack, out_discards); 2678 esp0dbg(("esp_outbound: Can't allocate tailmp.\n")); 2679 ip_drop_packet(data_mp, B_FALSE, ill, 2680 DROPPER(ipss, ipds_esp_nomem), 2681 &espstack->esp_dropper); 2682 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2683 if (need_refrele) 2684 ixa_refrele(ixa); 2685 return (NULL); 2686 } 2687 tailmp = tailmp->b_cont; 2688 } 2689 2690 /* 2691 * If there's padding, N bytes of padding must be of the form 0x1, 2692 * 0x2, 0x3... 0xN. 2693 */ 2694 for (i = 0; i < padlen; ) { 2695 i++; 2696 *tailmp->b_wptr++ = i; 2697 } 2698 *tailmp->b_wptr++ = i; 2699 *tailmp->b_wptr++ = protocol; 2700 2701 esp2dbg(espstack, ("data_Mp before encryption:\n")); 2702 esp2dbg(espstack, (dump_msg(data_mp))); 2703 2704 /* 2705 * Okay. I've set up the pre-encryption ESP. Let's do it! 2706 */ 2707 2708 if (mac_len > 0) { 2709 ASSERT(tailmp->b_wptr + mac_len <= tailmp->b_datap->db_lim); 2710 icv_buf = tailmp->b_wptr; 2711 tailmp->b_wptr += mac_len; 2712 } else { 2713 icv_buf = NULL; 2714 } 2715 2716 data_mp = esp_submit_req_outbound(data_mp, ixa, assoc, icv_buf, 2717 datalen + padlen + 2); 2718 if (need_refrele) 2719 ixa_refrele(ixa); 2720 return (data_mp); 2721 } 2722 2723 /* 2724 * IP calls this to validate the ICMP errors that 2725 * we got from the network. 2726 */ 2727 mblk_t * 2728 ipsecesp_icmp_error(mblk_t *data_mp, ip_recv_attr_t *ira) 2729 { 2730 netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack; 2731 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 2732 ipsec_stack_t *ipss = ns->netstack_ipsec; 2733 2734 /* 2735 * Unless we get an entire packet back, this function is useless. 2736 * Why? 2737 * 2738 * 1.) Partial packets are useless, because the "next header" 2739 * is at the end of the decrypted ESP packet. Without the 2740 * whole packet, this is useless. 2741 * 2742 * 2.) If we every use a stateful cipher, such as a stream or a 2743 * one-time pad, we can't do anything. 2744 * 2745 * Since the chances of us getting an entire packet back are very 2746 * very small, we discard here. 2747 */ 2748 IP_ESP_BUMP_STAT(ipss, in_discards); 2749 ip_drop_packet(data_mp, B_TRUE, ira->ira_ill, 2750 DROPPER(ipss, ipds_esp_icmp), 2751 &espstack->esp_dropper); 2752 return (NULL); 2753 } 2754 2755 /* 2756 * Construct an SADB_REGISTER message with the current algorithms. 2757 * This function gets called when 'ipsecalgs -s' is run or when 2758 * in.iked (or other KMD) starts. 2759 */ 2760 static boolean_t 2761 esp_register_out(uint32_t sequence, uint32_t pid, uint_t serial, 2762 ipsecesp_stack_t *espstack, cred_t *cr) 2763 { 2764 mblk_t *pfkey_msg_mp, *keysock_out_mp; 2765 sadb_msg_t *samsg; 2766 sadb_supported_t *sasupp_auth = NULL; 2767 sadb_supported_t *sasupp_encr = NULL; 2768 sadb_alg_t *saalg; 2769 uint_t allocsize = sizeof (*samsg); 2770 uint_t i, numalgs_snap; 2771 int current_aalgs; 2772 ipsec_alginfo_t **authalgs; 2773 uint_t num_aalgs; 2774 int current_ealgs; 2775 ipsec_alginfo_t **encralgs; 2776 uint_t num_ealgs; 2777 ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec; 2778 sadb_sens_t *sens; 2779 size_t sens_len = 0; 2780 sadb_ext_t *nextext; 2781 ts_label_t *sens_tsl = NULL; 2782 2783 /* Allocate the KEYSOCK_OUT. */ 2784 keysock_out_mp = sadb_keysock_out(serial); 2785 if (keysock_out_mp == NULL) { 2786 esp0dbg(("esp_register_out: couldn't allocate mblk.\n")); 2787 return (B_FALSE); 2788 } 2789 2790 if (is_system_labeled() && (cr != NULL)) { 2791 sens_tsl = crgetlabel(cr); 2792 if (sens_tsl != NULL) { 2793 sens_len = sadb_sens_len_from_label(sens_tsl); 2794 allocsize += sens_len; 2795 } 2796 } 2797 2798 /* 2799 * Allocate the PF_KEY message that follows KEYSOCK_OUT. 2800 */ 2801 2802 rw_enter(&ipss->ipsec_alg_lock, RW_READER); 2803 /* 2804 * Fill SADB_REGISTER message's algorithm descriptors. Hold 2805 * down the lock while filling it. 2806 * 2807 * Return only valid algorithms, so the number of algorithms 2808 * to send up may be less than the number of algorithm entries 2809 * in the table. 2810 */ 2811 authalgs = ipss->ipsec_alglists[IPSEC_ALG_AUTH]; 2812 for (num_aalgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++) 2813 if (authalgs[i] != NULL && ALG_VALID(authalgs[i])) 2814 num_aalgs++; 2815 2816 if (num_aalgs != 0) { 2817 allocsize += (num_aalgs * sizeof (*saalg)); 2818 allocsize += sizeof (*sasupp_auth); 2819 } 2820 encralgs = ipss->ipsec_alglists[IPSEC_ALG_ENCR]; 2821 for (num_ealgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++) 2822 if (encralgs[i] != NULL && ALG_VALID(encralgs[i])) 2823 num_ealgs++; 2824 2825 if (num_ealgs != 0) { 2826 allocsize += (num_ealgs * sizeof (*saalg)); 2827 allocsize += sizeof (*sasupp_encr); 2828 } 2829 keysock_out_mp->b_cont = allocb(allocsize, BPRI_HI); 2830 if (keysock_out_mp->b_cont == NULL) { 2831 rw_exit(&ipss->ipsec_alg_lock); 2832 freemsg(keysock_out_mp); 2833 return (B_FALSE); 2834 } 2835 pfkey_msg_mp = keysock_out_mp->b_cont; 2836 pfkey_msg_mp->b_wptr += allocsize; 2837 2838 nextext = (sadb_ext_t *)(pfkey_msg_mp->b_rptr + sizeof (*samsg)); 2839 2840 if (num_aalgs != 0) { 2841 sasupp_auth = (sadb_supported_t *)nextext; 2842 saalg = (sadb_alg_t *)(sasupp_auth + 1); 2843 2844 ASSERT(((ulong_t)saalg & 0x7) == 0); 2845 2846 numalgs_snap = 0; 2847 for (i = 0; 2848 ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_aalgs)); 2849 i++) { 2850 if (authalgs[i] == NULL || !ALG_VALID(authalgs[i])) 2851 continue; 2852 2853 saalg->sadb_alg_id = authalgs[i]->alg_id; 2854 saalg->sadb_alg_ivlen = 0; 2855 saalg->sadb_alg_minbits = authalgs[i]->alg_ef_minbits; 2856 saalg->sadb_alg_maxbits = authalgs[i]->alg_ef_maxbits; 2857 saalg->sadb_x_alg_increment = 2858 authalgs[i]->alg_increment; 2859 saalg->sadb_x_alg_saltbits = SADB_8TO1( 2860 authalgs[i]->alg_saltlen); 2861 numalgs_snap++; 2862 saalg++; 2863 } 2864 ASSERT(numalgs_snap == num_aalgs); 2865 #ifdef DEBUG 2866 /* 2867 * Reality check to make sure I snagged all of the 2868 * algorithms. 2869 */ 2870 for (; i < IPSEC_MAX_ALGS; i++) { 2871 if (authalgs[i] != NULL && ALG_VALID(authalgs[i])) { 2872 cmn_err(CE_PANIC, "esp_register_out()! " 2873 "Missed aalg #%d.\n", i); 2874 } 2875 } 2876 #endif /* DEBUG */ 2877 nextext = (sadb_ext_t *)saalg; 2878 } 2879 2880 if (num_ealgs != 0) { 2881 sasupp_encr = (sadb_supported_t *)nextext; 2882 saalg = (sadb_alg_t *)(sasupp_encr + 1); 2883 2884 numalgs_snap = 0; 2885 for (i = 0; 2886 ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_ealgs)); i++) { 2887 if (encralgs[i] == NULL || !ALG_VALID(encralgs[i])) 2888 continue; 2889 saalg->sadb_alg_id = encralgs[i]->alg_id; 2890 saalg->sadb_alg_ivlen = encralgs[i]->alg_ivlen; 2891 saalg->sadb_alg_minbits = encralgs[i]->alg_ef_minbits; 2892 saalg->sadb_alg_maxbits = encralgs[i]->alg_ef_maxbits; 2893 /* 2894 * We could advertise the ICV length, except there 2895 * is not a value in sadb_x_algb to do this. 2896 * saalg->sadb_alg_maclen = encralgs[i]->alg_maclen; 2897 */ 2898 saalg->sadb_x_alg_increment = 2899 encralgs[i]->alg_increment; 2900 saalg->sadb_x_alg_saltbits = 2901 SADB_8TO1(encralgs[i]->alg_saltlen); 2902 2903 numalgs_snap++; 2904 saalg++; 2905 } 2906 ASSERT(numalgs_snap == num_ealgs); 2907 #ifdef DEBUG 2908 /* 2909 * Reality check to make sure I snagged all of the 2910 * algorithms. 2911 */ 2912 for (; i < IPSEC_MAX_ALGS; i++) { 2913 if (encralgs[i] != NULL && ALG_VALID(encralgs[i])) { 2914 cmn_err(CE_PANIC, "esp_register_out()! " 2915 "Missed ealg #%d.\n", i); 2916 } 2917 } 2918 #endif /* DEBUG */ 2919 nextext = (sadb_ext_t *)saalg; 2920 } 2921 2922 current_aalgs = num_aalgs; 2923 current_ealgs = num_ealgs; 2924 2925 rw_exit(&ipss->ipsec_alg_lock); 2926 2927 if (sens_tsl != NULL) { 2928 sens = (sadb_sens_t *)nextext; 2929 sadb_sens_from_label(sens, SADB_EXT_SENSITIVITY, 2930 sens_tsl, sens_len); 2931 2932 nextext = (sadb_ext_t *)(((uint8_t *)sens) + sens_len); 2933 } 2934 2935 /* Now fill the rest of the SADB_REGISTER message. */ 2936 2937 samsg = (sadb_msg_t *)pfkey_msg_mp->b_rptr; 2938 samsg->sadb_msg_version = PF_KEY_V2; 2939 samsg->sadb_msg_type = SADB_REGISTER; 2940 samsg->sadb_msg_errno = 0; 2941 samsg->sadb_msg_satype = SADB_SATYPE_ESP; 2942 samsg->sadb_msg_len = SADB_8TO64(allocsize); 2943 samsg->sadb_msg_reserved = 0; 2944 /* 2945 * Assume caller has sufficient sequence/pid number info. If it's one 2946 * from me over a new alg., I could give two hoots about sequence. 2947 */ 2948 samsg->sadb_msg_seq = sequence; 2949 samsg->sadb_msg_pid = pid; 2950 2951 if (sasupp_auth != NULL) { 2952 sasupp_auth->sadb_supported_len = SADB_8TO64( 2953 sizeof (*sasupp_auth) + sizeof (*saalg) * current_aalgs); 2954 sasupp_auth->sadb_supported_exttype = SADB_EXT_SUPPORTED_AUTH; 2955 sasupp_auth->sadb_supported_reserved = 0; 2956 } 2957 2958 if (sasupp_encr != NULL) { 2959 sasupp_encr->sadb_supported_len = SADB_8TO64( 2960 sizeof (*sasupp_encr) + sizeof (*saalg) * current_ealgs); 2961 sasupp_encr->sadb_supported_exttype = 2962 SADB_EXT_SUPPORTED_ENCRYPT; 2963 sasupp_encr->sadb_supported_reserved = 0; 2964 } 2965 2966 if (espstack->esp_pfkey_q != NULL) 2967 putnext(espstack->esp_pfkey_q, keysock_out_mp); 2968 else { 2969 freemsg(keysock_out_mp); 2970 return (B_FALSE); 2971 } 2972 2973 return (B_TRUE); 2974 } 2975 2976 /* 2977 * Invoked when the algorithm table changes. Causes SADB_REGISTER 2978 * messages continaining the current list of algorithms to be 2979 * sent up to the ESP listeners. 2980 */ 2981 void 2982 ipsecesp_algs_changed(netstack_t *ns) 2983 { 2984 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 2985 2986 /* 2987 * Time to send a PF_KEY SADB_REGISTER message to ESP listeners 2988 * everywhere. (The function itself checks for NULL esp_pfkey_q.) 2989 */ 2990 (void) esp_register_out(0, 0, 0, espstack, NULL); 2991 } 2992 2993 /* 2994 * Stub function that taskq_dispatch() invokes to take the mblk (in arg) 2995 * and send() it into ESP and IP again. 2996 */ 2997 static void 2998 inbound_task(void *arg) 2999 { 3000 mblk_t *mp = (mblk_t *)arg; 3001 mblk_t *async_mp; 3002 ip_recv_attr_t iras; 3003 3004 async_mp = mp; 3005 mp = async_mp->b_cont; 3006 async_mp->b_cont = NULL; 3007 if (!ip_recv_attr_from_mblk(async_mp, &iras)) { 3008 /* The ill or ip_stack_t disappeared on us */ 3009 ip_drop_input("ip_recv_attr_from_mblk", mp, NULL); 3010 freemsg(mp); 3011 goto done; 3012 } 3013 3014 esp_inbound_restart(mp, &iras); 3015 done: 3016 ira_cleanup(&iras, B_TRUE); 3017 } 3018 3019 /* 3020 * Restart ESP after the SA has been added. 3021 */ 3022 static void 3023 esp_inbound_restart(mblk_t *mp, ip_recv_attr_t *ira) 3024 { 3025 esph_t *esph; 3026 netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack; 3027 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 3028 3029 esp2dbg(espstack, ("in ESP inbound_task")); 3030 ASSERT(espstack != NULL); 3031 3032 mp = ipsec_inbound_esp_sa(mp, ira, &esph); 3033 if (mp == NULL) 3034 return; 3035 3036 ASSERT(esph != NULL); 3037 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 3038 ASSERT(ira->ira_ipsec_esp_sa != NULL); 3039 3040 mp = ira->ira_ipsec_esp_sa->ipsa_input_func(mp, esph, ira); 3041 if (mp == NULL) { 3042 /* 3043 * Either it failed or is pending. In the former case 3044 * ipIfStatsInDiscards was increased. 3045 */ 3046 return; 3047 } 3048 3049 ip_input_post_ipsec(mp, ira); 3050 } 3051 3052 /* 3053 * Now that weak-key passed, actually ADD the security association, and 3054 * send back a reply ADD message. 3055 */ 3056 static int 3057 esp_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi, 3058 int *diagnostic, ipsecesp_stack_t *espstack) 3059 { 3060 isaf_t *primary = NULL, *secondary; 3061 boolean_t clone = B_FALSE, is_inbound = B_FALSE; 3062 ipsa_t *larval = NULL; 3063 ipsacq_t *acqrec; 3064 iacqf_t *acq_bucket; 3065 mblk_t *acq_msgs = NULL; 3066 int rc; 3067 mblk_t *lpkt; 3068 int error; 3069 ipsa_query_t sq; 3070 ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec; 3071 3072 /* 3073 * Locate the appropriate table(s). 3074 */ 3075 sq.spp = &espstack->esp_sadb; /* XXX */ 3076 error = sadb_form_query(ksi, IPSA_Q_SA|IPSA_Q_DST, 3077 IPSA_Q_SA|IPSA_Q_DST|IPSA_Q_INBOUND|IPSA_Q_OUTBOUND, 3078 &sq, diagnostic); 3079 if (error) 3080 return (error); 3081 3082 /* 3083 * Use the direction flags provided by the KMD to determine 3084 * if the inbound or outbound table should be the primary 3085 * for this SA. If these flags were absent then make this 3086 * decision based on the addresses. 3087 */ 3088 if (sq.assoc->sadb_sa_flags & IPSA_F_INBOUND) { 3089 primary = sq.inbound; 3090 secondary = sq.outbound; 3091 is_inbound = B_TRUE; 3092 if (sq.assoc->sadb_sa_flags & IPSA_F_OUTBOUND) 3093 clone = B_TRUE; 3094 } else if (sq.assoc->sadb_sa_flags & IPSA_F_OUTBOUND) { 3095 primary = sq.outbound; 3096 secondary = sq.inbound; 3097 } 3098 3099 if (primary == NULL) { 3100 /* 3101 * The KMD did not set a direction flag, determine which 3102 * table to insert the SA into based on addresses. 3103 */ 3104 switch (ksi->ks_in_dsttype) { 3105 case KS_IN_ADDR_MBCAST: 3106 clone = B_TRUE; /* All mcast SAs can be bidirectional */ 3107 sq.assoc->sadb_sa_flags |= IPSA_F_OUTBOUND; 3108 /* FALLTHRU */ 3109 /* 3110 * If the source address is either one of mine, or unspecified 3111 * (which is best summed up by saying "not 'not mine'"), 3112 * then the association is potentially bi-directional, 3113 * in that it can be used for inbound traffic and outbound 3114 * traffic. The best example of such an SA is a multicast 3115 * SA (which allows me to receive the outbound traffic). 3116 */ 3117 case KS_IN_ADDR_ME: 3118 sq.assoc->sadb_sa_flags |= IPSA_F_INBOUND; 3119 primary = sq.inbound; 3120 secondary = sq.outbound; 3121 if (ksi->ks_in_srctype != KS_IN_ADDR_NOTME) 3122 clone = B_TRUE; 3123 is_inbound = B_TRUE; 3124 break; 3125 /* 3126 * If the source address literally not mine (either 3127 * unspecified or not mine), then this SA may have an 3128 * address that WILL be mine after some configuration. 3129 * We pay the price for this by making it a bi-directional 3130 * SA. 3131 */ 3132 case KS_IN_ADDR_NOTME: 3133 sq.assoc->sadb_sa_flags |= IPSA_F_OUTBOUND; 3134 primary = sq.outbound; 3135 secondary = sq.inbound; 3136 if (ksi->ks_in_srctype != KS_IN_ADDR_ME) { 3137 sq.assoc->sadb_sa_flags |= IPSA_F_INBOUND; 3138 clone = B_TRUE; 3139 } 3140 break; 3141 default: 3142 *diagnostic = SADB_X_DIAGNOSTIC_BAD_DST; 3143 return (EINVAL); 3144 } 3145 } 3146 3147 /* 3148 * Find a ACQUIRE list entry if possible. If we've added an SA that 3149 * suits the needs of an ACQUIRE list entry, we can eliminate the 3150 * ACQUIRE list entry and transmit the enqueued packets. Use the 3151 * high-bit of the sequence number to queue it. Key off destination 3152 * addr, and change acqrec's state. 3153 */ 3154 3155 if (samsg->sadb_msg_seq & IACQF_LOWEST_SEQ) { 3156 acq_bucket = &(sq.sp->sdb_acq[sq.outhash]); 3157 mutex_enter(&acq_bucket->iacqf_lock); 3158 for (acqrec = acq_bucket->iacqf_ipsacq; acqrec != NULL; 3159 acqrec = acqrec->ipsacq_next) { 3160 mutex_enter(&acqrec->ipsacq_lock); 3161 /* 3162 * Q: I only check sequence. Should I check dst? 3163 * A: Yes, check dest because those are the packets 3164 * that are queued up. 3165 */ 3166 if (acqrec->ipsacq_seq == samsg->sadb_msg_seq && 3167 IPSA_ARE_ADDR_EQUAL(sq.dstaddr, 3168 acqrec->ipsacq_dstaddr, acqrec->ipsacq_addrfam)) 3169 break; 3170 mutex_exit(&acqrec->ipsacq_lock); 3171 } 3172 if (acqrec != NULL) { 3173 /* 3174 * AHA! I found an ACQUIRE record for this SA. 3175 * Grab the msg list, and free the acquire record. 3176 * I already am holding the lock for this record, 3177 * so all I have to do is free it. 3178 */ 3179 acq_msgs = acqrec->ipsacq_mp; 3180 acqrec->ipsacq_mp = NULL; 3181 mutex_exit(&acqrec->ipsacq_lock); 3182 sadb_destroy_acquire(acqrec, 3183 espstack->ipsecesp_netstack); 3184 } 3185 mutex_exit(&acq_bucket->iacqf_lock); 3186 } 3187 3188 /* 3189 * Find PF_KEY message, and see if I'm an update. If so, find entry 3190 * in larval list (if there). 3191 */ 3192 if (samsg->sadb_msg_type == SADB_UPDATE) { 3193 mutex_enter(&sq.inbound->isaf_lock); 3194 larval = ipsec_getassocbyspi(sq.inbound, sq.assoc->sadb_sa_spi, 3195 ALL_ZEROES_PTR, sq.dstaddr, sq.dst->sin_family); 3196 mutex_exit(&sq.inbound->isaf_lock); 3197 3198 if ((larval == NULL) || 3199 (larval->ipsa_state != IPSA_STATE_LARVAL)) { 3200 *diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND; 3201 if (larval != NULL) { 3202 IPSA_REFRELE(larval); 3203 } 3204 esp0dbg(("Larval update, but larval disappeared.\n")); 3205 return (ESRCH); 3206 } /* Else sadb_common_add unlinks it for me! */ 3207 } 3208 3209 if (larval != NULL) { 3210 /* 3211 * Hold again, because sadb_common_add() consumes a reference, 3212 * and we don't want to clear_lpkt() without a reference. 3213 */ 3214 IPSA_REFHOLD(larval); 3215 } 3216 3217 rc = sadb_common_add(espstack->esp_pfkey_q, 3218 mp, samsg, ksi, primary, secondary, larval, clone, is_inbound, 3219 diagnostic, espstack->ipsecesp_netstack, &espstack->esp_sadb); 3220 3221 if (larval != NULL) { 3222 if (rc == 0) { 3223 lpkt = sadb_clear_lpkt(larval); 3224 if (lpkt != NULL) { 3225 rc = taskq_dispatch(esp_taskq, inbound_task, 3226 lpkt, TQ_NOSLEEP) == TASKQID_INVALID; 3227 } 3228 } 3229 IPSA_REFRELE(larval); 3230 } 3231 3232 /* 3233 * How much more stack will I create with all of these 3234 * esp_outbound() calls? 3235 */ 3236 3237 /* Handle the packets queued waiting for the SA */ 3238 while (acq_msgs != NULL) { 3239 mblk_t *asyncmp; 3240 mblk_t *data_mp; 3241 ip_xmit_attr_t ixas; 3242 ill_t *ill; 3243 3244 asyncmp = acq_msgs; 3245 acq_msgs = acq_msgs->b_next; 3246 asyncmp->b_next = NULL; 3247 3248 /* 3249 * Extract the ip_xmit_attr_t from the first mblk. 3250 * Verifies that the netstack and ill is still around; could 3251 * have vanished while iked was doing its work. 3252 * On succesful return we have a nce_t and the ill/ipst can't 3253 * disappear until we do the nce_refrele in ixa_cleanup. 3254 */ 3255 data_mp = asyncmp->b_cont; 3256 asyncmp->b_cont = NULL; 3257 if (!ip_xmit_attr_from_mblk(asyncmp, &ixas)) { 3258 ESP_BUMP_STAT(espstack, out_discards); 3259 ip_drop_packet(data_mp, B_FALSE, NULL, 3260 DROPPER(ipss, ipds_sadb_acquire_timeout), 3261 &espstack->esp_dropper); 3262 } else if (rc != 0) { 3263 ill = ixas.ixa_nce->nce_ill; 3264 ESP_BUMP_STAT(espstack, out_discards); 3265 ip_drop_packet(data_mp, B_FALSE, ill, 3266 DROPPER(ipss, ipds_sadb_acquire_timeout), 3267 &espstack->esp_dropper); 3268 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 3269 } else { 3270 esp_outbound_finish(data_mp, &ixas); 3271 } 3272 ixa_cleanup(&ixas); 3273 } 3274 3275 return (rc); 3276 } 3277 3278 /* 3279 * Process one of the queued messages (from ipsacq_mp) once the SA 3280 * has been added. 3281 */ 3282 static void 3283 esp_outbound_finish(mblk_t *data_mp, ip_xmit_attr_t *ixa) 3284 { 3285 netstack_t *ns = ixa->ixa_ipst->ips_netstack; 3286 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 3287 ipsec_stack_t *ipss = ns->netstack_ipsec; 3288 ill_t *ill = ixa->ixa_nce->nce_ill; 3289 3290 if (!ipsec_outbound_sa(data_mp, ixa, IPPROTO_ESP)) { 3291 ESP_BUMP_STAT(espstack, out_discards); 3292 ip_drop_packet(data_mp, B_FALSE, ill, 3293 DROPPER(ipss, ipds_sadb_acquire_timeout), 3294 &espstack->esp_dropper); 3295 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 3296 return; 3297 } 3298 3299 data_mp = esp_outbound(data_mp, ixa); 3300 if (data_mp == NULL) 3301 return; 3302 3303 /* do AH processing if needed */ 3304 data_mp = esp_do_outbound_ah(data_mp, ixa); 3305 if (data_mp == NULL) 3306 return; 3307 3308 (void) ip_output_post_ipsec(data_mp, ixa); 3309 } 3310 3311 /* 3312 * Add new ESP security association. This may become a generic AH/ESP 3313 * routine eventually. 3314 */ 3315 static int 3316 esp_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, netstack_t *ns) 3317 { 3318 sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA]; 3319 sadb_address_t *srcext = 3320 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC]; 3321 sadb_address_t *dstext = 3322 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST]; 3323 sadb_address_t *isrcext = 3324 (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC]; 3325 sadb_address_t *idstext = 3326 (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST]; 3327 sadb_address_t *nttext_loc = 3328 (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC]; 3329 sadb_address_t *nttext_rem = 3330 (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM]; 3331 sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH]; 3332 sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT]; 3333 struct sockaddr_in *src, *dst; 3334 struct sockaddr_in *natt_loc, *natt_rem; 3335 struct sockaddr_in6 *natt_loc6, *natt_rem6; 3336 sadb_lifetime_t *soft = 3337 (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT]; 3338 sadb_lifetime_t *hard = 3339 (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD]; 3340 sadb_lifetime_t *idle = 3341 (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE]; 3342 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 3343 ipsec_stack_t *ipss = ns->netstack_ipsec; 3344 3345 3346 3347 /* I need certain extensions present for an ADD message. */ 3348 if (srcext == NULL) { 3349 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC; 3350 return (EINVAL); 3351 } 3352 if (dstext == NULL) { 3353 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST; 3354 return (EINVAL); 3355 } 3356 if (isrcext == NULL && idstext != NULL) { 3357 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC; 3358 return (EINVAL); 3359 } 3360 if (isrcext != NULL && idstext == NULL) { 3361 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST; 3362 return (EINVAL); 3363 } 3364 if (assoc == NULL) { 3365 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA; 3366 return (EINVAL); 3367 } 3368 if (ekey == NULL && assoc->sadb_sa_encrypt != SADB_EALG_NULL) { 3369 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_EKEY; 3370 return (EINVAL); 3371 } 3372 3373 src = (struct sockaddr_in *)(srcext + 1); 3374 dst = (struct sockaddr_in *)(dstext + 1); 3375 natt_loc = (struct sockaddr_in *)(nttext_loc + 1); 3376 natt_loc6 = (struct sockaddr_in6 *)(nttext_loc + 1); 3377 natt_rem = (struct sockaddr_in *)(nttext_rem + 1); 3378 natt_rem6 = (struct sockaddr_in6 *)(nttext_rem + 1); 3379 3380 /* Sundry ADD-specific reality checks. */ 3381 /* XXX STATS : Logging/stats here? */ 3382 3383 if ((assoc->sadb_sa_state != SADB_SASTATE_MATURE) && 3384 (assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE_ELSEWHERE)) { 3385 *diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE; 3386 return (EINVAL); 3387 } 3388 if (assoc->sadb_sa_encrypt == SADB_EALG_NONE) { 3389 *diagnostic = SADB_X_DIAGNOSTIC_BAD_EALG; 3390 return (EINVAL); 3391 } 3392 3393 #ifndef IPSEC_LATENCY_TEST 3394 if (assoc->sadb_sa_encrypt == SADB_EALG_NULL && 3395 assoc->sadb_sa_auth == SADB_AALG_NONE) { 3396 *diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG; 3397 return (EINVAL); 3398 } 3399 #endif 3400 3401 if (assoc->sadb_sa_flags & ~espstack->esp_sadb.s_addflags) { 3402 *diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS; 3403 return (EINVAL); 3404 } 3405 3406 if ((*diagnostic = sadb_hardsoftchk(hard, soft, idle)) != 0) { 3407 return (EINVAL); 3408 } 3409 ASSERT(src->sin_family == dst->sin_family); 3410 3411 if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_NATT_LOC) { 3412 if (nttext_loc == NULL) { 3413 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_LOC; 3414 return (EINVAL); 3415 } 3416 3417 if (natt_loc->sin_family == AF_INET6 && 3418 !IN6_IS_ADDR_V4MAPPED(&natt_loc6->sin6_addr)) { 3419 *diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_NATT_LOC; 3420 return (EINVAL); 3421 } 3422 } 3423 3424 if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_NATT_REM) { 3425 if (nttext_rem == NULL) { 3426 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_REM; 3427 return (EINVAL); 3428 } 3429 if (natt_rem->sin_family == AF_INET6 && 3430 !IN6_IS_ADDR_V4MAPPED(&natt_rem6->sin6_addr)) { 3431 *diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_NATT_REM; 3432 return (EINVAL); 3433 } 3434 } 3435 3436 3437 /* Stuff I don't support, for now. XXX Diagnostic? */ 3438 if (ksi->ks_in_extv[SADB_EXT_LIFETIME_CURRENT] != NULL) 3439 return (EOPNOTSUPP); 3440 3441 if ((*diagnostic = sadb_labelchk(ksi)) != 0) 3442 return (EINVAL); 3443 3444 /* 3445 * XXX Policy : I'm not checking identities at this time, 3446 * but if I did, I'd do them here, before I sent 3447 * the weak key check up to the algorithm. 3448 */ 3449 3450 rw_enter(&ipss->ipsec_alg_lock, RW_READER); 3451 3452 /* 3453 * First locate the authentication algorithm. 3454 */ 3455 #ifdef IPSEC_LATENCY_TEST 3456 if (akey != NULL && assoc->sadb_sa_auth != SADB_AALG_NONE) { 3457 #else 3458 if (akey != NULL) { 3459 #endif 3460 ipsec_alginfo_t *aalg; 3461 3462 aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH] 3463 [assoc->sadb_sa_auth]; 3464 if (aalg == NULL || !ALG_VALID(aalg)) { 3465 rw_exit(&ipss->ipsec_alg_lock); 3466 esp1dbg(espstack, ("Couldn't find auth alg #%d.\n", 3467 assoc->sadb_sa_auth)); 3468 *diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG; 3469 return (EINVAL); 3470 } 3471 3472 /* 3473 * Sanity check key sizes. 3474 * Note: It's not possible to use SADB_AALG_NONE because 3475 * this auth_alg is not defined with ALG_FLAG_VALID. If this 3476 * ever changes, the same check for SADB_AALG_NONE and 3477 * a auth_key != NULL should be made here ( see below). 3478 */ 3479 if (!ipsec_valid_key_size(akey->sadb_key_bits, aalg)) { 3480 rw_exit(&ipss->ipsec_alg_lock); 3481 *diagnostic = SADB_X_DIAGNOSTIC_BAD_AKEYBITS; 3482 return (EINVAL); 3483 } 3484 ASSERT(aalg->alg_mech_type != CRYPTO_MECHANISM_INVALID); 3485 3486 /* check key and fix parity if needed */ 3487 if (ipsec_check_key(aalg->alg_mech_type, akey, B_TRUE, 3488 diagnostic) != 0) { 3489 rw_exit(&ipss->ipsec_alg_lock); 3490 return (EINVAL); 3491 } 3492 } 3493 3494 /* 3495 * Then locate the encryption algorithm. 3496 */ 3497 if (ekey != NULL) { 3498 uint_t keybits; 3499 ipsec_alginfo_t *ealg; 3500 3501 ealg = ipss->ipsec_alglists[IPSEC_ALG_ENCR] 3502 [assoc->sadb_sa_encrypt]; 3503 if (ealg == NULL || !ALG_VALID(ealg)) { 3504 rw_exit(&ipss->ipsec_alg_lock); 3505 esp1dbg(espstack, ("Couldn't find encr alg #%d.\n", 3506 assoc->sadb_sa_encrypt)); 3507 *diagnostic = SADB_X_DIAGNOSTIC_BAD_EALG; 3508 return (EINVAL); 3509 } 3510 3511 /* 3512 * Sanity check key sizes. If the encryption algorithm is 3513 * SADB_EALG_NULL but the encryption key is NOT 3514 * NULL then complain. 3515 * 3516 * The keying material includes salt bits if required by 3517 * algorithm and optionally the Initial IV, check the 3518 * length of whats left. 3519 */ 3520 keybits = ekey->sadb_key_bits; 3521 keybits -= ekey->sadb_key_reserved; 3522 keybits -= SADB_8TO1(ealg->alg_saltlen); 3523 if ((assoc->sadb_sa_encrypt == SADB_EALG_NULL) || 3524 (!ipsec_valid_key_size(keybits, ealg))) { 3525 rw_exit(&ipss->ipsec_alg_lock); 3526 *diagnostic = SADB_X_DIAGNOSTIC_BAD_EKEYBITS; 3527 return (EINVAL); 3528 } 3529 ASSERT(ealg->alg_mech_type != CRYPTO_MECHANISM_INVALID); 3530 3531 /* check key */ 3532 if (ipsec_check_key(ealg->alg_mech_type, ekey, B_FALSE, 3533 diagnostic) != 0) { 3534 rw_exit(&ipss->ipsec_alg_lock); 3535 return (EINVAL); 3536 } 3537 } 3538 rw_exit(&ipss->ipsec_alg_lock); 3539 3540 return (esp_add_sa_finish(mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi, 3541 diagnostic, espstack)); 3542 } 3543 3544 /* 3545 * Update a security association. Updates come in two varieties. The first 3546 * is an update of lifetimes on a non-larval SA. The second is an update of 3547 * a larval SA, which ends up looking a lot more like an add. 3548 */ 3549 static int 3550 esp_update_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, 3551 ipsecesp_stack_t *espstack, uint8_t sadb_msg_type) 3552 { 3553 sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA]; 3554 mblk_t *buf_pkt; 3555 int rcode; 3556 3557 sadb_address_t *dstext = 3558 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST]; 3559 3560 if (dstext == NULL) { 3561 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST; 3562 return (EINVAL); 3563 } 3564 3565 rcode = sadb_update_sa(mp, ksi, &buf_pkt, &espstack->esp_sadb, 3566 diagnostic, espstack->esp_pfkey_q, esp_add_sa, 3567 espstack->ipsecesp_netstack, sadb_msg_type); 3568 3569 if ((assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE) || 3570 (rcode != 0)) { 3571 return (rcode); 3572 } 3573 3574 HANDLE_BUF_PKT(esp_taskq, espstack->ipsecesp_netstack->netstack_ipsec, 3575 espstack->esp_dropper, buf_pkt); 3576 3577 return (rcode); 3578 } 3579 3580 /* XXX refactor me */ 3581 /* 3582 * Delete a security association. This is REALLY likely to be code common to 3583 * both AH and ESP. Find the association, then unlink it. 3584 */ 3585 static int 3586 esp_del_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, 3587 ipsecesp_stack_t *espstack, uint8_t sadb_msg_type) 3588 { 3589 sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA]; 3590 sadb_address_t *dstext = 3591 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST]; 3592 sadb_address_t *srcext = 3593 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC]; 3594 struct sockaddr_in *sin; 3595 3596 if (assoc == NULL) { 3597 if (dstext != NULL) { 3598 sin = (struct sockaddr_in *)(dstext + 1); 3599 } else if (srcext != NULL) { 3600 sin = (struct sockaddr_in *)(srcext + 1); 3601 } else { 3602 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA; 3603 return (EINVAL); 3604 } 3605 return (sadb_purge_sa(mp, ksi, 3606 (sin->sin_family == AF_INET6) ? &espstack->esp_sadb.s_v6 : 3607 &espstack->esp_sadb.s_v4, diagnostic, 3608 espstack->esp_pfkey_q)); 3609 } 3610 3611 return (sadb_delget_sa(mp, ksi, &espstack->esp_sadb, diagnostic, 3612 espstack->esp_pfkey_q, sadb_msg_type)); 3613 } 3614 3615 /* XXX refactor me */ 3616 /* 3617 * Convert the entire contents of all of ESP's SA tables into PF_KEY SADB_DUMP 3618 * messages. 3619 */ 3620 static void 3621 esp_dump(mblk_t *mp, keysock_in_t *ksi, ipsecesp_stack_t *espstack) 3622 { 3623 int error; 3624 sadb_msg_t *samsg; 3625 3626 /* 3627 * Dump each fanout, bailing if error is non-zero. 3628 */ 3629 3630 error = sadb_dump(espstack->esp_pfkey_q, mp, ksi, 3631 &espstack->esp_sadb.s_v4); 3632 if (error != 0) 3633 goto bail; 3634 3635 error = sadb_dump(espstack->esp_pfkey_q, mp, ksi, 3636 &espstack->esp_sadb.s_v6); 3637 bail: 3638 ASSERT(mp->b_cont != NULL); 3639 samsg = (sadb_msg_t *)mp->b_cont->b_rptr; 3640 samsg->sadb_msg_errno = (uint8_t)error; 3641 sadb_pfkey_echo(espstack->esp_pfkey_q, mp, 3642 (sadb_msg_t *)mp->b_cont->b_rptr, ksi, NULL); 3643 } 3644 3645 /* 3646 * First-cut reality check for an inbound PF_KEY message. 3647 */ 3648 static boolean_t 3649 esp_pfkey_reality_failures(mblk_t *mp, keysock_in_t *ksi, 3650 ipsecesp_stack_t *espstack) 3651 { 3652 int diagnostic; 3653 3654 if (ksi->ks_in_extv[SADB_EXT_PROPOSAL] != NULL) { 3655 diagnostic = SADB_X_DIAGNOSTIC_PROP_PRESENT; 3656 goto badmsg; 3657 } 3658 if (ksi->ks_in_extv[SADB_EXT_SUPPORTED_AUTH] != NULL || 3659 ksi->ks_in_extv[SADB_EXT_SUPPORTED_ENCRYPT] != NULL) { 3660 diagnostic = SADB_X_DIAGNOSTIC_SUPP_PRESENT; 3661 goto badmsg; 3662 } 3663 return (B_FALSE); /* False ==> no failures */ 3664 3665 badmsg: 3666 sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL, diagnostic, 3667 ksi->ks_in_serial); 3668 return (B_TRUE); /* True ==> failures */ 3669 } 3670 3671 /* 3672 * ESP parsing of PF_KEY messages. Keysock did most of the really silly 3673 * error cases. What I receive is a fully-formed, syntactically legal 3674 * PF_KEY message. I then need to check semantics... 3675 * 3676 * This code may become common to AH and ESP. Stay tuned. 3677 * 3678 * I also make the assumption that db_ref's are cool. If this assumption 3679 * is wrong, this means that someone other than keysock or me has been 3680 * mucking with PF_KEY messages. 3681 */ 3682 static void 3683 esp_parse_pfkey(mblk_t *mp, ipsecesp_stack_t *espstack) 3684 { 3685 mblk_t *msg = mp->b_cont; 3686 sadb_msg_t *samsg; 3687 keysock_in_t *ksi; 3688 int error; 3689 int diagnostic = SADB_X_DIAGNOSTIC_NONE; 3690 3691 ASSERT(msg != NULL); 3692 3693 samsg = (sadb_msg_t *)msg->b_rptr; 3694 ksi = (keysock_in_t *)mp->b_rptr; 3695 3696 /* 3697 * If applicable, convert unspecified AF_INET6 to unspecified 3698 * AF_INET. And do other address reality checks. 3699 */ 3700 if (!sadb_addrfix(ksi, espstack->esp_pfkey_q, mp, 3701 espstack->ipsecesp_netstack) || 3702 esp_pfkey_reality_failures(mp, ksi, espstack)) { 3703 return; 3704 } 3705 3706 switch (samsg->sadb_msg_type) { 3707 case SADB_ADD: 3708 error = esp_add_sa(mp, ksi, &diagnostic, 3709 espstack->ipsecesp_netstack); 3710 if (error != 0) { 3711 sadb_pfkey_error(espstack->esp_pfkey_q, mp, error, 3712 diagnostic, ksi->ks_in_serial); 3713 } 3714 /* else esp_add_sa() took care of things. */ 3715 break; 3716 case SADB_DELETE: 3717 case SADB_X_DELPAIR: 3718 case SADB_X_DELPAIR_STATE: 3719 error = esp_del_sa(mp, ksi, &diagnostic, espstack, 3720 samsg->sadb_msg_type); 3721 if (error != 0) { 3722 sadb_pfkey_error(espstack->esp_pfkey_q, mp, error, 3723 diagnostic, ksi->ks_in_serial); 3724 } 3725 /* Else esp_del_sa() took care of things. */ 3726 break; 3727 case SADB_GET: 3728 error = sadb_delget_sa(mp, ksi, &espstack->esp_sadb, 3729 &diagnostic, espstack->esp_pfkey_q, samsg->sadb_msg_type); 3730 if (error != 0) { 3731 sadb_pfkey_error(espstack->esp_pfkey_q, mp, error, 3732 diagnostic, ksi->ks_in_serial); 3733 } 3734 /* Else sadb_get_sa() took care of things. */ 3735 break; 3736 case SADB_FLUSH: 3737 sadbp_flush(&espstack->esp_sadb, espstack->ipsecesp_netstack); 3738 sadb_pfkey_echo(espstack->esp_pfkey_q, mp, samsg, ksi, NULL); 3739 break; 3740 case SADB_REGISTER: 3741 /* 3742 * Hmmm, let's do it! Check for extensions (there should 3743 * be none), extract the fields, call esp_register_out(), 3744 * then either free or report an error. 3745 * 3746 * Keysock takes care of the PF_KEY bookkeeping for this. 3747 */ 3748 if (esp_register_out(samsg->sadb_msg_seq, samsg->sadb_msg_pid, 3749 ksi->ks_in_serial, espstack, msg_getcred(mp, NULL))) { 3750 freemsg(mp); 3751 } else { 3752 /* 3753 * Only way this path hits is if there is a memory 3754 * failure. It will not return B_FALSE because of 3755 * lack of esp_pfkey_q if I am in wput(). 3756 */ 3757 sadb_pfkey_error(espstack->esp_pfkey_q, mp, ENOMEM, 3758 diagnostic, ksi->ks_in_serial); 3759 } 3760 break; 3761 case SADB_UPDATE: 3762 case SADB_X_UPDATEPAIR: 3763 /* 3764 * Find a larval, if not there, find a full one and get 3765 * strict. 3766 */ 3767 error = esp_update_sa(mp, ksi, &diagnostic, espstack, 3768 samsg->sadb_msg_type); 3769 if (error != 0) { 3770 sadb_pfkey_error(espstack->esp_pfkey_q, mp, error, 3771 diagnostic, ksi->ks_in_serial); 3772 } 3773 /* else esp_update_sa() took care of things. */ 3774 break; 3775 case SADB_GETSPI: 3776 /* 3777 * Reserve a new larval entry. 3778 */ 3779 esp_getspi(mp, ksi, espstack); 3780 break; 3781 case SADB_ACQUIRE: 3782 /* 3783 * Find larval and/or ACQUIRE record and kill it (them), I'm 3784 * most likely an error. Inbound ACQUIRE messages should only 3785 * have the base header. 3786 */ 3787 sadb_in_acquire(samsg, &espstack->esp_sadb, 3788 espstack->esp_pfkey_q, espstack->ipsecesp_netstack); 3789 freemsg(mp); 3790 break; 3791 case SADB_DUMP: 3792 /* 3793 * Dump all entries. 3794 */ 3795 esp_dump(mp, ksi, espstack); 3796 /* esp_dump will take care of the return message, etc. */ 3797 break; 3798 case SADB_EXPIRE: 3799 /* Should never reach me. */ 3800 sadb_pfkey_error(espstack->esp_pfkey_q, mp, EOPNOTSUPP, 3801 diagnostic, ksi->ks_in_serial); 3802 break; 3803 default: 3804 sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL, 3805 SADB_X_DIAGNOSTIC_UNKNOWN_MSG, ksi->ks_in_serial); 3806 break; 3807 } 3808 } 3809 3810 /* 3811 * Handle case where PF_KEY says it can't find a keysock for one of my 3812 * ACQUIRE messages. 3813 */ 3814 static void 3815 esp_keysock_no_socket(mblk_t *mp, ipsecesp_stack_t *espstack) 3816 { 3817 sadb_msg_t *samsg; 3818 keysock_out_err_t *kse = (keysock_out_err_t *)mp->b_rptr; 3819 3820 if (mp->b_cont == NULL) { 3821 freemsg(mp); 3822 return; 3823 } 3824 samsg = (sadb_msg_t *)mp->b_cont->b_rptr; 3825 3826 /* 3827 * If keysock can't find any registered, delete the acquire record 3828 * immediately, and handle errors. 3829 */ 3830 if (samsg->sadb_msg_type == SADB_ACQUIRE) { 3831 samsg->sadb_msg_errno = kse->ks_err_errno; 3832 samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg)); 3833 /* 3834 * Use the write-side of the esp_pfkey_q 3835 */ 3836 sadb_in_acquire(samsg, &espstack->esp_sadb, 3837 WR(espstack->esp_pfkey_q), espstack->ipsecesp_netstack); 3838 } 3839 3840 freemsg(mp); 3841 } 3842 3843 /* 3844 * ESP module read put routine. 3845 */ 3846 static int 3847 ipsecesp_rput(queue_t *q, mblk_t *mp) 3848 { 3849 putnext(q, mp); 3850 return (0); 3851 } 3852 3853 /* 3854 * ESP module write put routine. 3855 */ 3856 static int 3857 ipsecesp_wput(queue_t *q, mblk_t *mp) 3858 { 3859 ipsec_info_t *ii; 3860 struct iocblk *iocp; 3861 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; 3862 3863 esp3dbg(espstack, ("In esp_wput().\n")); 3864 3865 /* NOTE: Each case must take care of freeing or passing mp. */ 3866 switch (mp->b_datap->db_type) { 3867 case M_CTL: 3868 if ((mp->b_wptr - mp->b_rptr) < sizeof (ipsec_info_t)) { 3869 /* Not big enough message. */ 3870 freemsg(mp); 3871 break; 3872 } 3873 ii = (ipsec_info_t *)mp->b_rptr; 3874 3875 switch (ii->ipsec_info_type) { 3876 case KEYSOCK_OUT_ERR: 3877 esp1dbg(espstack, ("Got KEYSOCK_OUT_ERR message.\n")); 3878 esp_keysock_no_socket(mp, espstack); 3879 break; 3880 case KEYSOCK_IN: 3881 ESP_BUMP_STAT(espstack, keysock_in); 3882 esp3dbg(espstack, ("Got KEYSOCK_IN message.\n")); 3883 3884 /* Parse the message. */ 3885 esp_parse_pfkey(mp, espstack); 3886 break; 3887 case KEYSOCK_HELLO: 3888 sadb_keysock_hello(&espstack->esp_pfkey_q, q, mp, 3889 esp_ager, (void *)espstack, &espstack->esp_event, 3890 SADB_SATYPE_ESP); 3891 break; 3892 default: 3893 esp2dbg(espstack, ("Got M_CTL from above of 0x%x.\n", 3894 ii->ipsec_info_type)); 3895 freemsg(mp); 3896 break; 3897 } 3898 break; 3899 case M_IOCTL: 3900 iocp = (struct iocblk *)mp->b_rptr; 3901 switch (iocp->ioc_cmd) { 3902 case ND_SET: 3903 case ND_GET: 3904 if (nd_getset(q, espstack->ipsecesp_g_nd, mp)) { 3905 qreply(q, mp); 3906 return (0); 3907 } else { 3908 iocp->ioc_error = ENOENT; 3909 } 3910 /* FALLTHRU */ 3911 default: 3912 /* We really don't support any other ioctls, do we? */ 3913 3914 /* Return EINVAL */ 3915 if (iocp->ioc_error != ENOENT) 3916 iocp->ioc_error = EINVAL; 3917 iocp->ioc_count = 0; 3918 mp->b_datap->db_type = M_IOCACK; 3919 qreply(q, mp); 3920 return (0); 3921 } 3922 default: 3923 esp3dbg(espstack, 3924 ("Got default message, type %d, passing to IP.\n", 3925 mp->b_datap->db_type)); 3926 putnext(q, mp); 3927 } 3928 return (0); 3929 } 3930 3931 /* 3932 * Wrapper to allow IP to trigger an ESP association failure message 3933 * during inbound SA selection. 3934 */ 3935 void 3936 ipsecesp_in_assocfailure(mblk_t *mp, char level, ushort_t sl, char *fmt, 3937 uint32_t spi, void *addr, int af, ip_recv_attr_t *ira) 3938 { 3939 netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack; 3940 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 3941 ipsec_stack_t *ipss = ns->netstack_ipsec; 3942 3943 if (espstack->ipsecesp_log_unknown_spi) { 3944 ipsec_assocfailure(info.mi_idnum, 0, level, sl, fmt, spi, 3945 addr, af, espstack->ipsecesp_netstack); 3946 } 3947 3948 ip_drop_packet(mp, B_TRUE, ira->ira_ill, 3949 DROPPER(ipss, ipds_esp_no_sa), 3950 &espstack->esp_dropper); 3951 } 3952 3953 /* 3954 * Initialize the ESP input and output processing functions. 3955 */ 3956 void 3957 ipsecesp_init_funcs(ipsa_t *sa) 3958 { 3959 if (sa->ipsa_output_func == NULL) 3960 sa->ipsa_output_func = esp_outbound; 3961 if (sa->ipsa_input_func == NULL) 3962 sa->ipsa_input_func = esp_inbound; 3963 } 3964