1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/stream.h> 28 #include <sys/stropts.h> 29 #include <sys/errno.h> 30 #include <sys/strlog.h> 31 #include <sys/tihdr.h> 32 #include <sys/socket.h> 33 #include <sys/ddi.h> 34 #include <sys/sunddi.h> 35 #include <sys/kmem.h> 36 #include <sys/zone.h> 37 #include <sys/sysmacros.h> 38 #include <sys/cmn_err.h> 39 #include <sys/vtrace.h> 40 #include <sys/debug.h> 41 #include <sys/atomic.h> 42 #include <sys/strsun.h> 43 #include <sys/random.h> 44 #include <netinet/in.h> 45 #include <net/if.h> 46 #include <netinet/ip6.h> 47 #include <net/pfkeyv2.h> 48 49 #include <inet/common.h> 50 #include <inet/mi.h> 51 #include <inet/nd.h> 52 #include <inet/ip.h> 53 #include <inet/ip_impl.h> 54 #include <inet/ip6.h> 55 #include <inet/sadb.h> 56 #include <inet/ipsec_info.h> 57 #include <inet/ipsec_impl.h> 58 #include <inet/ipsecesp.h> 59 #include <inet/ipdrop.h> 60 #include <inet/tcp.h> 61 #include <sys/kstat.h> 62 #include <sys/policy.h> 63 #include <sys/strsun.h> 64 #include <inet/udp_impl.h> 65 #include <sys/taskq.h> 66 #include <sys/note.h> 67 68 #include <sys/iphada.h> 69 70 /* 71 * Table of ND variables supported by ipsecesp. These are loaded into 72 * ipsecesp_g_nd in ipsecesp_init_nd. 73 * All of these are alterable, within the min/max values given, at run time. 74 */ 75 static ipsecespparam_t lcl_param_arr[] = { 76 /* min max value name */ 77 { 0, 3, 0, "ipsecesp_debug"}, 78 { 125, 32000, SADB_AGE_INTERVAL_DEFAULT, "ipsecesp_age_interval"}, 79 { 1, 10, 1, "ipsecesp_reap_delay"}, 80 { 1, SADB_MAX_REPLAY, 64, "ipsecesp_replay_size"}, 81 { 1, 300, 15, "ipsecesp_acquire_timeout"}, 82 { 1, 1800, 90, "ipsecesp_larval_timeout"}, 83 /* Default lifetime values for ACQUIRE messages. */ 84 { 0, 0xffffffffU, 0, "ipsecesp_default_soft_bytes"}, 85 { 0, 0xffffffffU, 0, "ipsecesp_default_hard_bytes"}, 86 { 0, 0xffffffffU, 24000, "ipsecesp_default_soft_addtime"}, 87 { 0, 0xffffffffU, 28800, "ipsecesp_default_hard_addtime"}, 88 { 0, 0xffffffffU, 0, "ipsecesp_default_soft_usetime"}, 89 { 0, 0xffffffffU, 0, "ipsecesp_default_hard_usetime"}, 90 { 0, 1, 0, "ipsecesp_log_unknown_spi"}, 91 { 0, 2, 1, "ipsecesp_padding_check"}, 92 { 0, 600, 20, "ipsecesp_nat_keepalive_interval"}, 93 }; 94 #define ipsecesp_debug ipsecesp_params[0].ipsecesp_param_value 95 #define ipsecesp_age_interval ipsecesp_params[1].ipsecesp_param_value 96 #define ipsecesp_age_int_max ipsecesp_params[1].ipsecesp_param_max 97 #define ipsecesp_reap_delay ipsecesp_params[2].ipsecesp_param_value 98 #define ipsecesp_replay_size ipsecesp_params[3].ipsecesp_param_value 99 #define ipsecesp_acquire_timeout \ 100 ipsecesp_params[4].ipsecesp_param_value 101 #define ipsecesp_larval_timeout \ 102 ipsecesp_params[5].ipsecesp_param_value 103 #define ipsecesp_default_soft_bytes \ 104 ipsecesp_params[6].ipsecesp_param_value 105 #define ipsecesp_default_hard_bytes \ 106 ipsecesp_params[7].ipsecesp_param_value 107 #define ipsecesp_default_soft_addtime \ 108 ipsecesp_params[8].ipsecesp_param_value 109 #define ipsecesp_default_hard_addtime \ 110 ipsecesp_params[9].ipsecesp_param_value 111 #define ipsecesp_default_soft_usetime \ 112 ipsecesp_params[10].ipsecesp_param_value 113 #define ipsecesp_default_hard_usetime \ 114 ipsecesp_params[11].ipsecesp_param_value 115 #define ipsecesp_log_unknown_spi \ 116 ipsecesp_params[12].ipsecesp_param_value 117 #define ipsecesp_padding_check \ 118 ipsecesp_params[13].ipsecesp_param_value 119 /* For ipsecesp_nat_keepalive_interval, see ipsecesp.h. */ 120 121 #define esp0dbg(a) printf a 122 /* NOTE: != 0 instead of > 0 so lint doesn't complain. */ 123 #define esp1dbg(espstack, a) if (espstack->ipsecesp_debug != 0) printf a 124 #define esp2dbg(espstack, a) if (espstack->ipsecesp_debug > 1) printf a 125 #define esp3dbg(espstack, a) if (espstack->ipsecesp_debug > 2) printf a 126 127 static int ipsecesp_open(queue_t *, dev_t *, int, int, cred_t *); 128 static int ipsecesp_close(queue_t *); 129 static void ipsecesp_rput(queue_t *, mblk_t *); 130 static void ipsecesp_wput(queue_t *, mblk_t *); 131 static void *ipsecesp_stack_init(netstackid_t stackid, netstack_t *ns); 132 static void ipsecesp_stack_fini(netstackid_t stackid, void *arg); 133 static void esp_send_acquire(ipsacq_t *, mblk_t *, netstack_t *); 134 135 static void esp_prepare_udp(netstack_t *, mblk_t *, ipha_t *); 136 static ipsec_status_t esp_outbound_accelerated(mblk_t *, uint_t); 137 static ipsec_status_t esp_inbound_accelerated(mblk_t *, mblk_t *, 138 boolean_t, ipsa_t *); 139 140 static boolean_t esp_register_out(uint32_t, uint32_t, uint_t, 141 ipsecesp_stack_t *); 142 static boolean_t esp_strip_header(mblk_t *, boolean_t, uint32_t, 143 kstat_named_t **, ipsecesp_stack_t *); 144 static ipsec_status_t esp_submit_req_inbound(mblk_t *, ipsa_t *, uint_t); 145 static ipsec_status_t esp_submit_req_outbound(mblk_t *, ipsa_t *, uchar_t *, 146 uint_t); 147 extern void (*cl_inet_getspi)(netstackid_t, uint8_t, uint8_t *, size_t, 148 void *); 149 150 /* Setable in /etc/system */ 151 uint32_t esp_hash_size = IPSEC_DEFAULT_HASH_SIZE; 152 153 static struct module_info info = { 154 5137, "ipsecesp", 0, INFPSZ, 65536, 1024 155 }; 156 157 static struct qinit rinit = { 158 (pfi_t)ipsecesp_rput, NULL, ipsecesp_open, ipsecesp_close, NULL, &info, 159 NULL 160 }; 161 162 static struct qinit winit = { 163 (pfi_t)ipsecesp_wput, NULL, ipsecesp_open, ipsecesp_close, NULL, &info, 164 NULL 165 }; 166 167 struct streamtab ipsecespinfo = { 168 &rinit, &winit, NULL, NULL 169 }; 170 171 static taskq_t *esp_taskq; 172 173 /* 174 * OTOH, this one is set at open/close, and I'm D_MTQPAIR for now. 175 * 176 * Question: Do I need this, given that all instance's esps->esps_wq point 177 * to IP? 178 * 179 * Answer: Yes, because I need to know which queue is BOUND to 180 * IPPROTO_ESP 181 */ 182 183 /* 184 * Stats. This may eventually become a full-blown SNMP MIB once that spec 185 * stabilizes. 186 */ 187 188 typedef struct esp_kstats_s { 189 kstat_named_t esp_stat_num_aalgs; 190 kstat_named_t esp_stat_good_auth; 191 kstat_named_t esp_stat_bad_auth; 192 kstat_named_t esp_stat_bad_padding; 193 kstat_named_t esp_stat_replay_failures; 194 kstat_named_t esp_stat_replay_early_failures; 195 kstat_named_t esp_stat_keysock_in; 196 kstat_named_t esp_stat_out_requests; 197 kstat_named_t esp_stat_acquire_requests; 198 kstat_named_t esp_stat_bytes_expired; 199 kstat_named_t esp_stat_out_discards; 200 kstat_named_t esp_stat_in_accelerated; 201 kstat_named_t esp_stat_out_accelerated; 202 kstat_named_t esp_stat_noaccel; 203 kstat_named_t esp_stat_crypto_sync; 204 kstat_named_t esp_stat_crypto_async; 205 kstat_named_t esp_stat_crypto_failures; 206 kstat_named_t esp_stat_num_ealgs; 207 kstat_named_t esp_stat_bad_decrypt; 208 kstat_named_t esp_stat_sa_port_renumbers; 209 } esp_kstats_t; 210 211 /* 212 * espstack->esp_kstats is equal to espstack->esp_ksp->ks_data if 213 * kstat_create_netstack for espstack->esp_ksp succeeds, but when it 214 * fails, it will be NULL. Note this is done for all stack instances, 215 * so it *could* fail. hence a non-NULL checking is done for 216 * ESP_BUMP_STAT and ESP_DEBUMP_STAT 217 */ 218 #define ESP_BUMP_STAT(espstack, x) \ 219 do { \ 220 if (espstack->esp_kstats != NULL) \ 221 (espstack->esp_kstats->esp_stat_ ## x).value.ui64++; \ 222 _NOTE(CONSTCOND) \ 223 } while (0) 224 225 #define ESP_DEBUMP_STAT(espstack, x) \ 226 do { \ 227 if (espstack->esp_kstats != NULL) \ 228 (espstack->esp_kstats->esp_stat_ ## x).value.ui64--; \ 229 _NOTE(CONSTCOND) \ 230 } while (0) 231 232 static int esp_kstat_update(kstat_t *, int); 233 234 static boolean_t 235 esp_kstat_init(ipsecesp_stack_t *espstack, netstackid_t stackid) 236 { 237 espstack->esp_ksp = kstat_create_netstack("ipsecesp", 0, "esp_stat", 238 "net", KSTAT_TYPE_NAMED, 239 sizeof (esp_kstats_t) / sizeof (kstat_named_t), 240 KSTAT_FLAG_PERSISTENT, stackid); 241 242 if (espstack->esp_ksp == NULL || espstack->esp_ksp->ks_data == NULL) 243 return (B_FALSE); 244 245 espstack->esp_kstats = espstack->esp_ksp->ks_data; 246 247 espstack->esp_ksp->ks_update = esp_kstat_update; 248 espstack->esp_ksp->ks_private = (void *)(uintptr_t)stackid; 249 250 #define K64 KSTAT_DATA_UINT64 251 #define KI(x) kstat_named_init(&(espstack->esp_kstats->esp_stat_##x), #x, K64) 252 253 KI(num_aalgs); 254 KI(num_ealgs); 255 KI(good_auth); 256 KI(bad_auth); 257 KI(bad_padding); 258 KI(replay_failures); 259 KI(replay_early_failures); 260 KI(keysock_in); 261 KI(out_requests); 262 KI(acquire_requests); 263 KI(bytes_expired); 264 KI(out_discards); 265 KI(in_accelerated); 266 KI(out_accelerated); 267 KI(noaccel); 268 KI(crypto_sync); 269 KI(crypto_async); 270 KI(crypto_failures); 271 KI(bad_decrypt); 272 KI(sa_port_renumbers); 273 274 #undef KI 275 #undef K64 276 277 kstat_install(espstack->esp_ksp); 278 279 return (B_TRUE); 280 } 281 282 static int 283 esp_kstat_update(kstat_t *kp, int rw) 284 { 285 esp_kstats_t *ekp; 286 netstackid_t stackid = (zoneid_t)(uintptr_t)kp->ks_private; 287 netstack_t *ns; 288 ipsec_stack_t *ipss; 289 290 if ((kp == NULL) || (kp->ks_data == NULL)) 291 return (EIO); 292 293 if (rw == KSTAT_WRITE) 294 return (EACCES); 295 296 ns = netstack_find_by_stackid(stackid); 297 if (ns == NULL) 298 return (-1); 299 ipss = ns->netstack_ipsec; 300 if (ipss == NULL) { 301 netstack_rele(ns); 302 return (-1); 303 } 304 ekp = (esp_kstats_t *)kp->ks_data; 305 306 mutex_enter(&ipss->ipsec_alg_lock); 307 ekp->esp_stat_num_aalgs.value.ui64 = 308 ipss->ipsec_nalgs[IPSEC_ALG_AUTH]; 309 ekp->esp_stat_num_ealgs.value.ui64 = 310 ipss->ipsec_nalgs[IPSEC_ALG_ENCR]; 311 mutex_exit(&ipss->ipsec_alg_lock); 312 313 netstack_rele(ns); 314 return (0); 315 } 316 317 #ifdef DEBUG 318 /* 319 * Debug routine, useful to see pre-encryption data. 320 */ 321 static char * 322 dump_msg(mblk_t *mp) 323 { 324 char tmp_str[3], tmp_line[256]; 325 326 while (mp != NULL) { 327 unsigned char *ptr; 328 329 printf("mblk address 0x%p, length %ld, db_ref %d " 330 "type %d, base 0x%p, lim 0x%p\n", 331 (void *) mp, (long)(mp->b_wptr - mp->b_rptr), 332 mp->b_datap->db_ref, mp->b_datap->db_type, 333 (void *)mp->b_datap->db_base, (void *)mp->b_datap->db_lim); 334 ptr = mp->b_rptr; 335 336 tmp_line[0] = '\0'; 337 while (ptr < mp->b_wptr) { 338 uint_t diff; 339 340 diff = (ptr - mp->b_rptr); 341 if (!(diff & 0x1f)) { 342 if (strlen(tmp_line) > 0) { 343 printf("bytes: %s\n", tmp_line); 344 tmp_line[0] = '\0'; 345 } 346 } 347 if (!(diff & 0x3)) 348 (void) strcat(tmp_line, " "); 349 (void) sprintf(tmp_str, "%02x", *ptr); 350 (void) strcat(tmp_line, tmp_str); 351 ptr++; 352 } 353 if (strlen(tmp_line) > 0) 354 printf("bytes: %s\n", tmp_line); 355 356 mp = mp->b_cont; 357 } 358 359 return ("\n"); 360 } 361 362 #else /* DEBUG */ 363 static char * 364 dump_msg(mblk_t *mp) 365 { 366 printf("Find value of mp %p.\n", mp); 367 return ("\n"); 368 } 369 #endif /* DEBUG */ 370 371 /* 372 * Don't have to lock age_interval, as only one thread will access it at 373 * a time, because I control the one function that does with timeout(). 374 */ 375 static void 376 esp_ager(void *arg) 377 { 378 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)arg; 379 netstack_t *ns = espstack->ipsecesp_netstack; 380 hrtime_t begin = gethrtime(); 381 382 sadb_ager(&espstack->esp_sadb.s_v4, espstack->esp_pfkey_q, 383 espstack->esp_sadb.s_ip_q, espstack->ipsecesp_reap_delay, ns); 384 sadb_ager(&espstack->esp_sadb.s_v6, espstack->esp_pfkey_q, 385 espstack->esp_sadb.s_ip_q, espstack->ipsecesp_reap_delay, ns); 386 387 espstack->esp_event = sadb_retimeout(begin, espstack->esp_pfkey_q, 388 esp_ager, espstack, 389 &espstack->ipsecesp_age_interval, espstack->ipsecesp_age_int_max, 390 info.mi_idnum); 391 } 392 393 /* 394 * Get an ESP NDD parameter. 395 */ 396 /* ARGSUSED */ 397 static int 398 ipsecesp_param_get(q, mp, cp, cr) 399 queue_t *q; 400 mblk_t *mp; 401 caddr_t cp; 402 cred_t *cr; 403 { 404 ipsecespparam_t *ipsecesppa = (ipsecespparam_t *)cp; 405 uint_t value; 406 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; 407 408 mutex_enter(&espstack->ipsecesp_param_lock); 409 value = ipsecesppa->ipsecesp_param_value; 410 mutex_exit(&espstack->ipsecesp_param_lock); 411 412 (void) mi_mpprintf(mp, "%u", value); 413 return (0); 414 } 415 416 /* 417 * This routine sets an NDD variable in a ipsecespparam_t structure. 418 */ 419 /* ARGSUSED */ 420 static int 421 ipsecesp_param_set(q, mp, value, cp, cr) 422 queue_t *q; 423 mblk_t *mp; 424 char *value; 425 caddr_t cp; 426 cred_t *cr; 427 { 428 ulong_t new_value; 429 ipsecespparam_t *ipsecesppa = (ipsecespparam_t *)cp; 430 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; 431 432 /* 433 * Fail the request if the new value does not lie within the 434 * required bounds. 435 */ 436 if (ddi_strtoul(value, NULL, 10, &new_value) != 0 || 437 new_value < ipsecesppa->ipsecesp_param_min || 438 new_value > ipsecesppa->ipsecesp_param_max) { 439 return (EINVAL); 440 } 441 442 /* Set the new value */ 443 mutex_enter(&espstack->ipsecesp_param_lock); 444 ipsecesppa->ipsecesp_param_value = new_value; 445 mutex_exit(&espstack->ipsecesp_param_lock); 446 return (0); 447 } 448 449 /* 450 * Using lifetime NDD variables, fill in an extended combination's 451 * lifetime information. 452 */ 453 void 454 ipsecesp_fill_defs(sadb_x_ecomb_t *ecomb, netstack_t *ns) 455 { 456 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 457 458 ecomb->sadb_x_ecomb_soft_bytes = espstack->ipsecesp_default_soft_bytes; 459 ecomb->sadb_x_ecomb_hard_bytes = espstack->ipsecesp_default_hard_bytes; 460 ecomb->sadb_x_ecomb_soft_addtime = 461 espstack->ipsecesp_default_soft_addtime; 462 ecomb->sadb_x_ecomb_hard_addtime = 463 espstack->ipsecesp_default_hard_addtime; 464 ecomb->sadb_x_ecomb_soft_usetime = 465 espstack->ipsecesp_default_soft_usetime; 466 ecomb->sadb_x_ecomb_hard_usetime = 467 espstack->ipsecesp_default_hard_usetime; 468 } 469 470 /* 471 * Initialize things for ESP at module load time. 472 */ 473 boolean_t 474 ipsecesp_ddi_init(void) 475 { 476 esp_taskq = taskq_create("esp_taskq", 1, minclsyspri, 477 IPSEC_TASKQ_MIN, IPSEC_TASKQ_MAX, 0); 478 479 /* 480 * We want to be informed each time a stack is created or 481 * destroyed in the kernel, so we can maintain the 482 * set of ipsecesp_stack_t's. 483 */ 484 netstack_register(NS_IPSECESP, ipsecesp_stack_init, NULL, 485 ipsecesp_stack_fini); 486 487 return (B_TRUE); 488 } 489 490 /* 491 * Walk through the param array specified registering each element with the 492 * named dispatch handler. 493 */ 494 static boolean_t 495 ipsecesp_param_register(IDP *ndp, ipsecespparam_t *espp, int cnt) 496 { 497 for (; cnt-- > 0; espp++) { 498 if (espp->ipsecesp_param_name != NULL && 499 espp->ipsecesp_param_name[0]) { 500 if (!nd_load(ndp, 501 espp->ipsecesp_param_name, 502 ipsecesp_param_get, ipsecesp_param_set, 503 (caddr_t)espp)) { 504 nd_free(ndp); 505 return (B_FALSE); 506 } 507 } 508 } 509 return (B_TRUE); 510 } 511 /* 512 * Initialize things for ESP for each stack instance 513 */ 514 static void * 515 ipsecesp_stack_init(netstackid_t stackid, netstack_t *ns) 516 { 517 ipsecesp_stack_t *espstack; 518 ipsecespparam_t *espp; 519 520 espstack = (ipsecesp_stack_t *)kmem_zalloc(sizeof (*espstack), 521 KM_SLEEP); 522 espstack->ipsecesp_netstack = ns; 523 524 espp = (ipsecespparam_t *)kmem_alloc(sizeof (lcl_param_arr), KM_SLEEP); 525 espstack->ipsecesp_params = espp; 526 bcopy(lcl_param_arr, espp, sizeof (lcl_param_arr)); 527 528 (void) ipsecesp_param_register(&espstack->ipsecesp_g_nd, espp, 529 A_CNT(lcl_param_arr)); 530 531 (void) esp_kstat_init(espstack, stackid); 532 533 espstack->esp_sadb.s_acquire_timeout = 534 &espstack->ipsecesp_acquire_timeout; 535 espstack->esp_sadb.s_acqfn = esp_send_acquire; 536 sadbp_init("ESP", &espstack->esp_sadb, SADB_SATYPE_ESP, esp_hash_size, 537 espstack->ipsecesp_netstack); 538 539 mutex_init(&espstack->ipsecesp_param_lock, NULL, MUTEX_DEFAULT, 0); 540 541 ip_drop_register(&espstack->esp_dropper, "IPsec ESP"); 542 return (espstack); 543 } 544 545 /* 546 * Destroy things for ESP at module unload time. 547 */ 548 void 549 ipsecesp_ddi_destroy(void) 550 { 551 netstack_unregister(NS_IPSECESP); 552 taskq_destroy(esp_taskq); 553 } 554 555 /* 556 * Destroy things for ESP for one stack instance 557 */ 558 static void 559 ipsecesp_stack_fini(netstackid_t stackid, void *arg) 560 { 561 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)arg; 562 563 if (espstack->esp_pfkey_q != NULL) { 564 (void) quntimeout(espstack->esp_pfkey_q, espstack->esp_event); 565 } 566 espstack->esp_sadb.s_acqfn = NULL; 567 espstack->esp_sadb.s_acquire_timeout = NULL; 568 sadbp_destroy(&espstack->esp_sadb, espstack->ipsecesp_netstack); 569 ip_drop_unregister(&espstack->esp_dropper); 570 mutex_destroy(&espstack->ipsecesp_param_lock); 571 nd_free(&espstack->ipsecesp_g_nd); 572 573 kmem_free(espstack->ipsecesp_params, sizeof (lcl_param_arr)); 574 espstack->ipsecesp_params = NULL; 575 kstat_delete_netstack(espstack->esp_ksp, stackid); 576 espstack->esp_ksp = NULL; 577 espstack->esp_kstats = NULL; 578 kmem_free(espstack, sizeof (*espstack)); 579 } 580 581 /* 582 * ESP module open routine. 583 */ 584 /* ARGSUSED */ 585 static int 586 ipsecesp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 587 { 588 netstack_t *ns; 589 ipsecesp_stack_t *espstack; 590 591 if (secpolicy_ip_config(credp, B_FALSE) != 0) 592 return (EPERM); 593 594 if (q->q_ptr != NULL) 595 return (0); /* Re-open of an already open instance. */ 596 597 if (sflag != MODOPEN) 598 return (EINVAL); 599 600 ns = netstack_find_by_cred(credp); 601 ASSERT(ns != NULL); 602 espstack = ns->netstack_ipsecesp; 603 ASSERT(espstack != NULL); 604 605 /* 606 * ASSUMPTIONS (because I'm MT_OCEXCL): 607 * 608 * * I'm being pushed on top of IP for all my opens (incl. #1). 609 * * Only ipsecesp_open() can write into esp_sadb.s_ip_q. 610 * * Because of this, I can check lazily for esp_sadb.s_ip_q. 611 * 612 * If these assumptions are wrong, I'm in BIG trouble... 613 */ 614 615 q->q_ptr = espstack; 616 WR(q)->q_ptr = q->q_ptr; 617 618 if (espstack->esp_sadb.s_ip_q == NULL) { 619 struct T_unbind_req *tur; 620 621 espstack->esp_sadb.s_ip_q = WR(q); 622 /* Allocate an unbind... */ 623 espstack->esp_ip_unbind = allocb(sizeof (struct T_unbind_req), 624 BPRI_HI); 625 626 /* 627 * Send down T_BIND_REQ to bind IPPROTO_ESP. 628 * Handle the ACK here in ESP. 629 */ 630 qprocson(q); 631 if (espstack->esp_ip_unbind == NULL || 632 !sadb_t_bind_req(espstack->esp_sadb.s_ip_q, IPPROTO_ESP)) { 633 if (espstack->esp_ip_unbind != NULL) { 634 freeb(espstack->esp_ip_unbind); 635 espstack->esp_ip_unbind = NULL; 636 } 637 q->q_ptr = NULL; 638 netstack_rele(espstack->ipsecesp_netstack); 639 return (ENOMEM); 640 } 641 642 espstack->esp_ip_unbind->b_datap->db_type = M_PROTO; 643 tur = (struct T_unbind_req *)espstack->esp_ip_unbind->b_rptr; 644 tur->PRIM_type = T_UNBIND_REQ; 645 } else { 646 qprocson(q); 647 } 648 649 /* 650 * For now, there's not much I can do. I'll be getting a message 651 * passed down to me from keysock (in my wput), and a T_BIND_ACK 652 * up from IP (in my rput). 653 */ 654 655 return (0); 656 } 657 658 /* 659 * ESP module close routine. 660 */ 661 static int 662 ipsecesp_close(queue_t *q) 663 { 664 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; 665 666 /* 667 * If esp_sadb.s_ip_q is attached to this instance, send a 668 * T_UNBIND_REQ to IP for the instance before doing 669 * a qprocsoff(). 670 */ 671 if (WR(q) == espstack->esp_sadb.s_ip_q && 672 espstack->esp_ip_unbind != NULL) { 673 putnext(WR(q), espstack->esp_ip_unbind); 674 espstack->esp_ip_unbind = NULL; 675 } 676 677 /* 678 * Clean up q_ptr, if needed. 679 */ 680 qprocsoff(q); 681 682 /* Keysock queue check is safe, because of OCEXCL perimeter. */ 683 684 if (q == espstack->esp_pfkey_q) { 685 esp1dbg(espstack, 686 ("ipsecesp_close: Ummm... keysock is closing ESP.\n")); 687 espstack->esp_pfkey_q = NULL; 688 /* Detach qtimeouts. */ 689 (void) quntimeout(q, espstack->esp_event); 690 } 691 692 if (WR(q) == espstack->esp_sadb.s_ip_q) { 693 /* 694 * If the esp_sadb.s_ip_q is attached to this instance, find 695 * another. The OCEXCL outer perimeter helps us here. 696 */ 697 espstack->esp_sadb.s_ip_q = NULL; 698 699 /* 700 * Find a replacement queue for esp_sadb.s_ip_q. 701 */ 702 if (espstack->esp_pfkey_q != NULL && 703 espstack->esp_pfkey_q != RD(q)) { 704 /* 705 * See if we can use the pfkey_q. 706 */ 707 espstack->esp_sadb.s_ip_q = WR(espstack->esp_pfkey_q); 708 } 709 710 if (espstack->esp_sadb.s_ip_q == NULL || 711 !sadb_t_bind_req(espstack->esp_sadb.s_ip_q, IPPROTO_ESP)) { 712 esp1dbg(espstack, ("ipsecesp: Can't reassign ip_q.\n")); 713 espstack->esp_sadb.s_ip_q = NULL; 714 } else { 715 espstack->esp_ip_unbind = 716 allocb(sizeof (struct T_unbind_req), BPRI_HI); 717 718 if (espstack->esp_ip_unbind != NULL) { 719 struct T_unbind_req *tur; 720 721 espstack->esp_ip_unbind->b_datap->db_type = 722 M_PROTO; 723 tur = (struct T_unbind_req *) 724 espstack->esp_ip_unbind->b_rptr; 725 tur->PRIM_type = T_UNBIND_REQ; 726 } 727 /* If it's NULL, I can't do much here. */ 728 } 729 } 730 731 netstack_rele(espstack->ipsecesp_netstack); 732 return (0); 733 } 734 735 /* 736 * Add a number of bytes to what the SA has protected so far. Return 737 * B_TRUE if the SA can still protect that many bytes. 738 * 739 * Caller must REFRELE the passed-in assoc. This function must REFRELE 740 * any obtained peer SA. 741 */ 742 static boolean_t 743 esp_age_bytes(ipsa_t *assoc, uint64_t bytes, boolean_t inbound) 744 { 745 ipsa_t *inassoc, *outassoc; 746 isaf_t *bucket; 747 boolean_t inrc, outrc, isv6; 748 sadb_t *sp; 749 int outhash; 750 netstack_t *ns = assoc->ipsa_netstack; 751 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 752 753 /* No peer? No problem! */ 754 if (!assoc->ipsa_haspeer) { 755 return (sadb_age_bytes(espstack->esp_pfkey_q, assoc, bytes, 756 B_TRUE)); 757 } 758 759 /* 760 * Otherwise, we want to grab both the original assoc and its peer. 761 * There might be a race for this, but if it's a real race, two 762 * expire messages may occur. We limit this by only sending the 763 * expire message on one of the peers, we'll pick the inbound 764 * arbitrarily. 765 * 766 * If we need tight synchronization on the peer SA, then we need to 767 * reconsider. 768 */ 769 770 /* Use address length to select IPv6/IPv4 */ 771 isv6 = (assoc->ipsa_addrfam == AF_INET6); 772 sp = isv6 ? &espstack->esp_sadb.s_v6 : &espstack->esp_sadb.s_v4; 773 774 if (inbound) { 775 inassoc = assoc; 776 if (isv6) { 777 outhash = OUTBOUND_HASH_V6(sp, *((in6_addr_t *) 778 &inassoc->ipsa_dstaddr)); 779 } else { 780 outhash = OUTBOUND_HASH_V4(sp, *((ipaddr_t *) 781 &inassoc->ipsa_dstaddr)); 782 } 783 bucket = &sp->sdb_of[outhash]; 784 mutex_enter(&bucket->isaf_lock); 785 outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi, 786 inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr, 787 inassoc->ipsa_addrfam); 788 mutex_exit(&bucket->isaf_lock); 789 if (outassoc == NULL) { 790 /* Q: Do we wish to set haspeer == B_FALSE? */ 791 esp0dbg(("esp_age_bytes: " 792 "can't find peer for inbound.\n")); 793 return (sadb_age_bytes(espstack->esp_pfkey_q, inassoc, 794 bytes, B_TRUE)); 795 } 796 } else { 797 outassoc = assoc; 798 bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi); 799 mutex_enter(&bucket->isaf_lock); 800 inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi, 801 outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr, 802 outassoc->ipsa_addrfam); 803 mutex_exit(&bucket->isaf_lock); 804 if (inassoc == NULL) { 805 /* Q: Do we wish to set haspeer == B_FALSE? */ 806 esp0dbg(("esp_age_bytes: " 807 "can't find peer for outbound.\n")); 808 return (sadb_age_bytes(espstack->esp_pfkey_q, outassoc, 809 bytes, B_TRUE)); 810 } 811 } 812 813 inrc = sadb_age_bytes(espstack->esp_pfkey_q, inassoc, bytes, B_TRUE); 814 outrc = sadb_age_bytes(espstack->esp_pfkey_q, outassoc, bytes, B_FALSE); 815 816 /* 817 * REFRELE any peer SA. 818 * 819 * Because of the multi-line macro nature of IPSA_REFRELE, keep 820 * them in { }. 821 */ 822 if (inbound) { 823 IPSA_REFRELE(outassoc); 824 } else { 825 IPSA_REFRELE(inassoc); 826 } 827 828 return (inrc && outrc); 829 } 830 831 /* 832 * Do incoming NAT-T manipulations for packet. 833 */ 834 static ipsec_status_t 835 esp_fix_natt_checksums(mblk_t *data_mp, ipsa_t *assoc) 836 { 837 ipha_t *ipha = (ipha_t *)data_mp->b_rptr; 838 tcpha_t *tcph; 839 udpha_t *udpha; 840 /* Initialize to our inbound cksum adjustment... */ 841 uint32_t sum = assoc->ipsa_inbound_cksum; 842 843 switch (ipha->ipha_protocol) { 844 case IPPROTO_TCP: 845 tcph = (tcpha_t *)(data_mp->b_rptr + 846 IPH_HDR_LENGTH(ipha)); 847 848 #define DOWN_SUM(x) (x) = ((x) & 0xFFFF) + ((x) >> 16) 849 sum += ~ntohs(tcph->tha_sum) & 0xFFFF; 850 DOWN_SUM(sum); 851 DOWN_SUM(sum); 852 tcph->tha_sum = ~htons(sum); 853 break; 854 case IPPROTO_UDP: 855 udpha = (udpha_t *)(data_mp->b_rptr + IPH_HDR_LENGTH(ipha)); 856 857 if (udpha->uha_checksum != 0) { 858 /* Adujst if the inbound one was not zero. */ 859 sum += ~ntohs(udpha->uha_checksum) & 0xFFFF; 860 DOWN_SUM(sum); 861 DOWN_SUM(sum); 862 udpha->uha_checksum = ~htons(sum); 863 if (udpha->uha_checksum == 0) 864 udpha->uha_checksum = 0xFFFF; 865 } 866 #undef DOWN_SUM 867 break; 868 case IPPROTO_IP: 869 /* 870 * This case is only an issue for self-encapsulated 871 * packets. So for now, fall through. 872 */ 873 break; 874 } 875 return (IPSEC_STATUS_SUCCESS); 876 } 877 878 879 /* 880 * Strip ESP header, check padding, and fix IP header. 881 * Returns B_TRUE on success, B_FALSE if an error occured. 882 */ 883 static boolean_t 884 esp_strip_header(mblk_t *data_mp, boolean_t isv4, uint32_t ivlen, 885 kstat_named_t **counter, ipsecesp_stack_t *espstack) 886 { 887 ipha_t *ipha; 888 ip6_t *ip6h; 889 uint_t divpoint; 890 mblk_t *scratch; 891 uint8_t nexthdr, padlen; 892 uint8_t lastpad; 893 ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec; 894 uint8_t *lastbyte; 895 896 /* 897 * Strip ESP data and fix IP header. 898 * 899 * XXX In case the beginning of esp_inbound() changes to not do a 900 * pullup, this part of the code can remain unchanged. 901 */ 902 if (isv4) { 903 ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ipha_t)); 904 ipha = (ipha_t *)data_mp->b_rptr; 905 ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (esph_t) + 906 IPH_HDR_LENGTH(ipha)); 907 divpoint = IPH_HDR_LENGTH(ipha); 908 } else { 909 ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ip6_t)); 910 ip6h = (ip6_t *)data_mp->b_rptr; 911 divpoint = ip_hdr_length_v6(data_mp, ip6h); 912 } 913 914 scratch = data_mp; 915 while (scratch->b_cont != NULL) 916 scratch = scratch->b_cont; 917 918 ASSERT((scratch->b_wptr - scratch->b_rptr) >= 3); 919 920 /* 921 * "Next header" and padding length are the last two bytes in the 922 * ESP-protected datagram, thus the explicit - 1 and - 2. 923 * lastpad is the last byte of the padding, which can be used for 924 * a quick check to see if the padding is correct. 925 */ 926 lastbyte = scratch->b_wptr - 1; 927 nexthdr = *lastbyte--; 928 padlen = *lastbyte--; 929 930 if (isv4) { 931 /* Fix part of the IP header. */ 932 ipha->ipha_protocol = nexthdr; 933 /* 934 * Reality check the padlen. The explicit - 2 is for the 935 * padding length and the next-header bytes. 936 */ 937 if (padlen >= ntohs(ipha->ipha_length) - sizeof (ipha_t) - 2 - 938 sizeof (esph_t) - ivlen) { 939 ESP_BUMP_STAT(espstack, bad_decrypt); 940 ipsec_rl_strlog(espstack->ipsecesp_netstack, 941 info.mi_idnum, 0, 0, 942 SL_ERROR | SL_WARN, 943 "Corrupt ESP packet (padlen too big).\n"); 944 esp1dbg(espstack, ("padlen (%d) is greater than:\n", 945 padlen)); 946 esp1dbg(espstack, ("pkt len(%d) - ip hdr - esp " 947 "hdr - ivlen(%d) = %d.\n", 948 ntohs(ipha->ipha_length), ivlen, 949 (int)(ntohs(ipha->ipha_length) - sizeof (ipha_t) - 950 2 - sizeof (esph_t) - ivlen))); 951 *counter = DROPPER(ipss, ipds_esp_bad_padlen); 952 return (B_FALSE); 953 } 954 955 /* 956 * Fix the rest of the header. The explicit - 2 is for the 957 * padding length and the next-header bytes. 958 */ 959 ipha->ipha_length = htons(ntohs(ipha->ipha_length) - padlen - 960 2 - sizeof (esph_t) - ivlen); 961 ipha->ipha_hdr_checksum = 0; 962 ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha); 963 } else { 964 if (ip6h->ip6_nxt == IPPROTO_ESP) { 965 ip6h->ip6_nxt = nexthdr; 966 } else { 967 ip6_pkt_t ipp; 968 969 bzero(&ipp, sizeof (ipp)); 970 (void) ip_find_hdr_v6(data_mp, ip6h, &ipp, NULL); 971 if (ipp.ipp_dstopts != NULL) { 972 ipp.ipp_dstopts->ip6d_nxt = nexthdr; 973 } else if (ipp.ipp_rthdr != NULL) { 974 ipp.ipp_rthdr->ip6r_nxt = nexthdr; 975 } else if (ipp.ipp_hopopts != NULL) { 976 ipp.ipp_hopopts->ip6h_nxt = nexthdr; 977 } else { 978 /* Panic a DEBUG kernel. */ 979 ASSERT(ipp.ipp_hopopts != NULL); 980 /* Otherwise, pretend it's IP + ESP. */ 981 cmn_err(CE_WARN, "ESP IPv6 headers wrong.\n"); 982 ip6h->ip6_nxt = nexthdr; 983 } 984 } 985 986 if (padlen >= ntohs(ip6h->ip6_plen) - 2 - sizeof (esph_t) - 987 ivlen) { 988 ESP_BUMP_STAT(espstack, bad_decrypt); 989 ipsec_rl_strlog(espstack->ipsecesp_netstack, 990 info.mi_idnum, 0, 0, 991 SL_ERROR | SL_WARN, 992 "Corrupt ESP packet (v6 padlen too big).\n"); 993 esp1dbg(espstack, ("padlen (%d) is greater than:\n", 994 padlen)); 995 esp1dbg(espstack, 996 ("pkt len(%u) - ip hdr - esp hdr - ivlen(%d) = " 997 "%u.\n", (unsigned)(ntohs(ip6h->ip6_plen) 998 + sizeof (ip6_t)), ivlen, 999 (unsigned)(ntohs(ip6h->ip6_plen) - 2 - 1000 sizeof (esph_t) - ivlen))); 1001 *counter = DROPPER(ipss, ipds_esp_bad_padlen); 1002 return (B_FALSE); 1003 } 1004 1005 1006 /* 1007 * Fix the rest of the header. The explicit - 2 is for the 1008 * padding length and the next-header bytes. IPv6 is nice, 1009 * because there's no hdr checksum! 1010 */ 1011 ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - padlen - 1012 2 - sizeof (esph_t) - ivlen); 1013 } 1014 1015 if (espstack->ipsecesp_padding_check > 0 && padlen > 0) { 1016 /* 1017 * Weak padding check: compare last-byte to length, they 1018 * should be equal. 1019 */ 1020 lastpad = *lastbyte--; 1021 1022 if (padlen != lastpad) { 1023 ipsec_rl_strlog(espstack->ipsecesp_netstack, 1024 info.mi_idnum, 0, 0, SL_ERROR | SL_WARN, 1025 "Corrupt ESP packet (lastpad != padlen).\n"); 1026 esp1dbg(espstack, 1027 ("lastpad (%d) not equal to padlen (%d):\n", 1028 lastpad, padlen)); 1029 ESP_BUMP_STAT(espstack, bad_padding); 1030 *counter = DROPPER(ipss, ipds_esp_bad_padding); 1031 return (B_FALSE); 1032 } 1033 1034 /* 1035 * Strong padding check: Check all pad bytes to see that 1036 * they're ascending. Go backwards using a descending counter 1037 * to verify. padlen == 1 is checked by previous block, so 1038 * only bother if we've more than 1 byte of padding. 1039 * Consequently, start the check one byte before the location 1040 * of "lastpad". 1041 */ 1042 if (espstack->ipsecesp_padding_check > 1) { 1043 /* 1044 * This assert may have to become an if and a pullup 1045 * if we start accepting multi-dblk mblks. For now, 1046 * though, any packet here will have been pulled up in 1047 * esp_inbound. 1048 */ 1049 ASSERT(MBLKL(scratch) >= lastpad + 3); 1050 1051 /* 1052 * Use "--lastpad" because we already checked the very 1053 * last pad byte previously. 1054 */ 1055 while (--lastpad != 0) { 1056 if (lastpad != *lastbyte) { 1057 ipsec_rl_strlog( 1058 espstack->ipsecesp_netstack, 1059 info.mi_idnum, 0, 0, 1060 SL_ERROR | SL_WARN, "Corrupt ESP " 1061 "packet (bad padding).\n"); 1062 esp1dbg(espstack, 1063 ("padding not in correct" 1064 " format:\n")); 1065 ESP_BUMP_STAT(espstack, bad_padding); 1066 *counter = DROPPER(ipss, 1067 ipds_esp_bad_padding); 1068 return (B_FALSE); 1069 } 1070 lastbyte--; 1071 } 1072 } 1073 } 1074 1075 /* Trim off the padding. */ 1076 ASSERT(data_mp->b_cont == NULL); 1077 data_mp->b_wptr -= (padlen + 2); 1078 1079 /* 1080 * Remove the ESP header. 1081 * 1082 * The above assertions about data_mp's size will make this work. 1083 * 1084 * XXX Question: If I send up and get back a contiguous mblk, 1085 * would it be quicker to bcopy over, or keep doing the dupb stuff? 1086 * I go with copying for now. 1087 */ 1088 1089 if (IS_P2ALIGNED(data_mp->b_rptr, sizeof (uint32_t)) && 1090 IS_P2ALIGNED(ivlen, sizeof (uint32_t))) { 1091 uint8_t *start = data_mp->b_rptr; 1092 uint32_t *src, *dst; 1093 1094 src = (uint32_t *)(start + divpoint); 1095 dst = (uint32_t *)(start + divpoint + sizeof (esph_t) + ivlen); 1096 1097 ASSERT(IS_P2ALIGNED(dst, sizeof (uint32_t)) && 1098 IS_P2ALIGNED(src, sizeof (uint32_t))); 1099 1100 do { 1101 src--; 1102 dst--; 1103 *dst = *src; 1104 } while (src != (uint32_t *)start); 1105 1106 data_mp->b_rptr = (uchar_t *)dst; 1107 } else { 1108 uint8_t *start = data_mp->b_rptr; 1109 uint8_t *src, *dst; 1110 1111 src = start + divpoint; 1112 dst = src + sizeof (esph_t) + ivlen; 1113 1114 do { 1115 src--; 1116 dst--; 1117 *dst = *src; 1118 } while (src != start); 1119 1120 data_mp->b_rptr = dst; 1121 } 1122 1123 esp2dbg(espstack, ("data_mp after inbound ESP adjustment:\n")); 1124 esp2dbg(espstack, (dump_msg(data_mp))); 1125 1126 return (B_TRUE); 1127 } 1128 1129 /* 1130 * Updating use times can be tricky business if the ipsa_haspeer flag is 1131 * set. This function is called once in an SA's lifetime. 1132 * 1133 * Caller has to REFRELE "assoc" which is passed in. This function has 1134 * to REFRELE any peer SA that is obtained. 1135 */ 1136 static void 1137 esp_set_usetime(ipsa_t *assoc, boolean_t inbound) 1138 { 1139 ipsa_t *inassoc, *outassoc; 1140 isaf_t *bucket; 1141 sadb_t *sp; 1142 int outhash; 1143 boolean_t isv6; 1144 netstack_t *ns = assoc->ipsa_netstack; 1145 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 1146 1147 /* No peer? No problem! */ 1148 if (!assoc->ipsa_haspeer) { 1149 sadb_set_usetime(assoc); 1150 return; 1151 } 1152 1153 /* 1154 * Otherwise, we want to grab both the original assoc and its peer. 1155 * There might be a race for this, but if it's a real race, the times 1156 * will be out-of-synch by at most a second, and since our time 1157 * granularity is a second, this won't be a problem. 1158 * 1159 * If we need tight synchronization on the peer SA, then we need to 1160 * reconsider. 1161 */ 1162 1163 /* Use address length to select IPv6/IPv4 */ 1164 isv6 = (assoc->ipsa_addrfam == AF_INET6); 1165 sp = isv6 ? &espstack->esp_sadb.s_v6 : &espstack->esp_sadb.s_v4; 1166 1167 if (inbound) { 1168 inassoc = assoc; 1169 if (isv6) { 1170 outhash = OUTBOUND_HASH_V6(sp, *((in6_addr_t *) 1171 &inassoc->ipsa_dstaddr)); 1172 } else { 1173 outhash = OUTBOUND_HASH_V4(sp, *((ipaddr_t *) 1174 &inassoc->ipsa_dstaddr)); 1175 } 1176 bucket = &sp->sdb_of[outhash]; 1177 mutex_enter(&bucket->isaf_lock); 1178 outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi, 1179 inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr, 1180 inassoc->ipsa_addrfam); 1181 mutex_exit(&bucket->isaf_lock); 1182 if (outassoc == NULL) { 1183 /* Q: Do we wish to set haspeer == B_FALSE? */ 1184 esp0dbg(("esp_set_usetime: " 1185 "can't find peer for inbound.\n")); 1186 sadb_set_usetime(inassoc); 1187 return; 1188 } 1189 } else { 1190 outassoc = assoc; 1191 bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi); 1192 mutex_enter(&bucket->isaf_lock); 1193 inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi, 1194 outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr, 1195 outassoc->ipsa_addrfam); 1196 mutex_exit(&bucket->isaf_lock); 1197 if (inassoc == NULL) { 1198 /* Q: Do we wish to set haspeer == B_FALSE? */ 1199 esp0dbg(("esp_set_usetime: " 1200 "can't find peer for outbound.\n")); 1201 sadb_set_usetime(outassoc); 1202 return; 1203 } 1204 } 1205 1206 /* Update usetime on both. */ 1207 sadb_set_usetime(inassoc); 1208 sadb_set_usetime(outassoc); 1209 1210 /* 1211 * REFRELE any peer SA. 1212 * 1213 * Because of the multi-line macro nature of IPSA_REFRELE, keep 1214 * them in { }. 1215 */ 1216 if (inbound) { 1217 IPSA_REFRELE(outassoc); 1218 } else { 1219 IPSA_REFRELE(inassoc); 1220 } 1221 } 1222 1223 /* 1224 * Handle ESP inbound data for IPv4 and IPv6. 1225 * On success returns B_TRUE, on failure returns B_FALSE and frees the 1226 * mblk chain ipsec_in_mp. 1227 */ 1228 ipsec_status_t 1229 esp_inbound(mblk_t *ipsec_in_mp, void *arg) 1230 { 1231 mblk_t *data_mp = ipsec_in_mp->b_cont; 1232 ipsec_in_t *ii = (ipsec_in_t *)ipsec_in_mp->b_rptr; 1233 esph_t *esph = (esph_t *)arg; 1234 ipsa_t *ipsa = ii->ipsec_in_esp_sa; 1235 netstack_t *ns = ii->ipsec_in_ns; 1236 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 1237 ipsec_stack_t *ipss = ns->netstack_ipsec; 1238 1239 /* 1240 * We may wish to check replay in-range-only here as an optimization. 1241 * Include the reality check of ipsa->ipsa_replay > 1242 * ipsa->ipsa_replay_wsize for times when it's the first N packets, 1243 * where N == ipsa->ipsa_replay_wsize. 1244 * 1245 * Another check that may come here later is the "collision" check. 1246 * If legitimate packets flow quickly enough, this won't be a problem, 1247 * but collisions may cause authentication algorithm crunching to 1248 * take place when it doesn't need to. 1249 */ 1250 if (!sadb_replay_peek(ipsa, esph->esph_replay)) { 1251 ESP_BUMP_STAT(espstack, replay_early_failures); 1252 IP_ESP_BUMP_STAT(ipss, in_discards); 1253 /* 1254 * TODO: Extract inbound interface from the IPSEC_IN 1255 * message's ii->ipsec_in_rill_index. 1256 */ 1257 ip_drop_packet(ipsec_in_mp, B_TRUE, NULL, NULL, 1258 DROPPER(ipss, ipds_esp_early_replay), 1259 &espstack->esp_dropper); 1260 return (IPSEC_STATUS_FAILED); 1261 } 1262 1263 /* 1264 * Has this packet already been processed by a hardware 1265 * IPsec accelerator? 1266 */ 1267 if (ii->ipsec_in_accelerated) { 1268 ipsec_status_t rv; 1269 esp3dbg(espstack, 1270 ("esp_inbound: pkt processed by ill=%d isv6=%d\n", 1271 ii->ipsec_in_ill_index, !ii->ipsec_in_v4)); 1272 rv = esp_inbound_accelerated(ipsec_in_mp, 1273 data_mp, ii->ipsec_in_v4, ipsa); 1274 return (rv); 1275 } 1276 ESP_BUMP_STAT(espstack, noaccel); 1277 1278 /* 1279 * Adjust the IP header's payload length to reflect the removal 1280 * of the ICV. 1281 */ 1282 if (!ii->ipsec_in_v4) { 1283 ip6_t *ip6h = (ip6_t *)data_mp->b_rptr; 1284 ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - 1285 ipsa->ipsa_mac_len); 1286 } else { 1287 ipha_t *ipha = (ipha_t *)data_mp->b_rptr; 1288 ipha->ipha_length = htons(ntohs(ipha->ipha_length) - 1289 ipsa->ipsa_mac_len); 1290 } 1291 1292 /* submit the request to the crypto framework */ 1293 return (esp_submit_req_inbound(ipsec_in_mp, ipsa, 1294 (uint8_t *)esph - data_mp->b_rptr)); 1295 } 1296 1297 /* 1298 * Perform the really difficult work of inserting the proposed situation. 1299 * Called while holding the algorithm lock. 1300 */ 1301 static void 1302 esp_insert_prop(sadb_prop_t *prop, ipsacq_t *acqrec, uint_t combs) 1303 { 1304 sadb_comb_t *comb = (sadb_comb_t *)(prop + 1); 1305 ipsec_out_t *io; 1306 ipsec_action_t *ap; 1307 ipsec_prot_t *prot; 1308 netstack_t *ns; 1309 ipsecesp_stack_t *espstack; 1310 ipsec_stack_t *ipss; 1311 1312 io = (ipsec_out_t *)acqrec->ipsacq_mp->b_rptr; 1313 ASSERT(io->ipsec_out_type == IPSEC_OUT); 1314 ns = io->ipsec_out_ns; 1315 espstack = ns->netstack_ipsecesp; 1316 ipss = ns->netstack_ipsec; 1317 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 1318 1319 prop->sadb_prop_exttype = SADB_EXT_PROPOSAL; 1320 prop->sadb_prop_len = SADB_8TO64(sizeof (sadb_prop_t)); 1321 *(uint32_t *)(&prop->sadb_prop_replay) = 0; /* Quick zero-out! */ 1322 1323 prop->sadb_prop_replay = espstack->ipsecesp_replay_size; 1324 1325 /* 1326 * Based upon algorithm properties, and what-not, prioritize 1327 * a proposal. If the IPSEC_OUT message has an algorithm specified, 1328 * use it first and foremost. 1329 * 1330 * For each action in policy list 1331 * Add combination. If I've hit limit, return. 1332 */ 1333 1334 for (ap = acqrec->ipsacq_act; ap != NULL; 1335 ap = ap->ipa_next) { 1336 ipsec_alginfo_t *ealg = NULL; 1337 ipsec_alginfo_t *aalg = NULL; 1338 1339 if (ap->ipa_act.ipa_type != IPSEC_POLICY_APPLY) 1340 continue; 1341 1342 prot = &ap->ipa_act.ipa_apply; 1343 1344 if (!(prot->ipp_use_esp)) 1345 continue; 1346 1347 if (prot->ipp_esp_auth_alg != 0) { 1348 aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH] 1349 [prot->ipp_esp_auth_alg]; 1350 if (aalg == NULL || !ALG_VALID(aalg)) 1351 continue; 1352 } 1353 1354 ASSERT(prot->ipp_encr_alg > 0); 1355 ealg = ipss->ipsec_alglists[IPSEC_ALG_ENCR] 1356 [prot->ipp_encr_alg]; 1357 if (ealg == NULL || !ALG_VALID(ealg)) 1358 continue; 1359 1360 comb->sadb_comb_flags = 0; 1361 comb->sadb_comb_reserved = 0; 1362 comb->sadb_comb_encrypt = ealg->alg_id; 1363 comb->sadb_comb_encrypt_minbits = 1364 MAX(prot->ipp_espe_minbits, ealg->alg_ef_minbits); 1365 comb->sadb_comb_encrypt_maxbits = 1366 MIN(prot->ipp_espe_maxbits, ealg->alg_ef_maxbits); 1367 if (aalg == NULL) { 1368 comb->sadb_comb_auth = 0; 1369 comb->sadb_comb_auth_minbits = 0; 1370 comb->sadb_comb_auth_maxbits = 0; 1371 } else { 1372 comb->sadb_comb_auth = aalg->alg_id; 1373 comb->sadb_comb_auth_minbits = 1374 MAX(prot->ipp_espa_minbits, aalg->alg_ef_minbits); 1375 comb->sadb_comb_auth_maxbits = 1376 MIN(prot->ipp_espa_maxbits, aalg->alg_ef_maxbits); 1377 } 1378 1379 /* 1380 * The following may be based on algorithm 1381 * properties, but in the meantime, we just pick 1382 * some good, sensible numbers. Key mgmt. can 1383 * (and perhaps should) be the place to finalize 1384 * such decisions. 1385 */ 1386 1387 /* 1388 * No limits on allocations, since we really don't 1389 * support that concept currently. 1390 */ 1391 comb->sadb_comb_soft_allocations = 0; 1392 comb->sadb_comb_hard_allocations = 0; 1393 1394 /* 1395 * These may want to come from policy rule.. 1396 */ 1397 comb->sadb_comb_soft_bytes = 1398 espstack->ipsecesp_default_soft_bytes; 1399 comb->sadb_comb_hard_bytes = 1400 espstack->ipsecesp_default_hard_bytes; 1401 comb->sadb_comb_soft_addtime = 1402 espstack->ipsecesp_default_soft_addtime; 1403 comb->sadb_comb_hard_addtime = 1404 espstack->ipsecesp_default_hard_addtime; 1405 comb->sadb_comb_soft_usetime = 1406 espstack->ipsecesp_default_soft_usetime; 1407 comb->sadb_comb_hard_usetime = 1408 espstack->ipsecesp_default_hard_usetime; 1409 1410 prop->sadb_prop_len += SADB_8TO64(sizeof (*comb)); 1411 if (--combs == 0) 1412 break; /* out of space.. */ 1413 comb++; 1414 } 1415 } 1416 1417 /* 1418 * Prepare and actually send the SADB_ACQUIRE message to PF_KEY. 1419 */ 1420 static void 1421 esp_send_acquire(ipsacq_t *acqrec, mblk_t *extended, netstack_t *ns) 1422 { 1423 uint_t combs; 1424 sadb_msg_t *samsg; 1425 sadb_prop_t *prop; 1426 mblk_t *pfkeymp, *msgmp; 1427 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 1428 ipsec_stack_t *ipss = ns->netstack_ipsec; 1429 1430 ESP_BUMP_STAT(espstack, acquire_requests); 1431 1432 if (espstack->esp_pfkey_q == NULL) { 1433 mutex_exit(&acqrec->ipsacq_lock); 1434 return; 1435 } 1436 1437 /* Set up ACQUIRE. */ 1438 pfkeymp = sadb_setup_acquire(acqrec, SADB_SATYPE_ESP, 1439 ns->netstack_ipsec); 1440 if (pfkeymp == NULL) { 1441 esp0dbg(("sadb_setup_acquire failed.\n")); 1442 mutex_exit(&acqrec->ipsacq_lock); 1443 return; 1444 } 1445 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 1446 combs = ipss->ipsec_nalgs[IPSEC_ALG_AUTH] * 1447 ipss->ipsec_nalgs[IPSEC_ALG_ENCR]; 1448 msgmp = pfkeymp->b_cont; 1449 samsg = (sadb_msg_t *)(msgmp->b_rptr); 1450 1451 /* Insert proposal here. */ 1452 1453 prop = (sadb_prop_t *)(((uint64_t *)samsg) + samsg->sadb_msg_len); 1454 esp_insert_prop(prop, acqrec, combs); 1455 samsg->sadb_msg_len += prop->sadb_prop_len; 1456 msgmp->b_wptr += SADB_64TO8(samsg->sadb_msg_len); 1457 1458 mutex_exit(&ipss->ipsec_alg_lock); 1459 1460 /* 1461 * Must mutex_exit() before sending PF_KEY message up, in 1462 * order to avoid recursive mutex_enter() if there are no registered 1463 * listeners. 1464 * 1465 * Once I've sent the message, I'm cool anyway. 1466 */ 1467 mutex_exit(&acqrec->ipsacq_lock); 1468 if (extended != NULL) { 1469 putnext(espstack->esp_pfkey_q, extended); 1470 } 1471 putnext(espstack->esp_pfkey_q, pfkeymp); 1472 } 1473 1474 /* 1475 * Handle the SADB_GETSPI message. Create a larval SA. 1476 */ 1477 static void 1478 esp_getspi(mblk_t *mp, keysock_in_t *ksi, ipsecesp_stack_t *espstack) 1479 { 1480 ipsa_t *newbie, *target; 1481 isaf_t *outbound, *inbound; 1482 int rc, diagnostic; 1483 sadb_sa_t *assoc; 1484 keysock_out_t *kso; 1485 uint32_t newspi; 1486 1487 /* 1488 * Randomly generate a proposed SPI value 1489 */ 1490 if (cl_inet_getspi != NULL) { 1491 cl_inet_getspi(espstack->ipsecesp_netstack->netstack_stackid, 1492 IPPROTO_ESP, (uint8_t *)&newspi, sizeof (uint32_t), NULL); 1493 } else { 1494 (void) random_get_pseudo_bytes((uint8_t *)&newspi, 1495 sizeof (uint32_t)); 1496 } 1497 newbie = sadb_getspi(ksi, newspi, &diagnostic, 1498 espstack->ipsecesp_netstack, IPPROTO_ESP); 1499 1500 if (newbie == NULL) { 1501 sadb_pfkey_error(espstack->esp_pfkey_q, mp, ENOMEM, diagnostic, 1502 ksi->ks_in_serial); 1503 return; 1504 } else if (newbie == (ipsa_t *)-1) { 1505 sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL, diagnostic, 1506 ksi->ks_in_serial); 1507 return; 1508 } 1509 1510 /* 1511 * XXX - We may randomly collide. We really should recover from this. 1512 * Unfortunately, that could require spending way-too-much-time 1513 * in here. For now, let the user retry. 1514 */ 1515 1516 if (newbie->ipsa_addrfam == AF_INET6) { 1517 outbound = OUTBOUND_BUCKET_V6(&espstack->esp_sadb.s_v6, 1518 *(uint32_t *)(newbie->ipsa_dstaddr)); 1519 inbound = INBOUND_BUCKET(&espstack->esp_sadb.s_v6, 1520 newbie->ipsa_spi); 1521 } else { 1522 ASSERT(newbie->ipsa_addrfam == AF_INET); 1523 outbound = OUTBOUND_BUCKET_V4(&espstack->esp_sadb.s_v4, 1524 *(uint32_t *)(newbie->ipsa_dstaddr)); 1525 inbound = INBOUND_BUCKET(&espstack->esp_sadb.s_v4, 1526 newbie->ipsa_spi); 1527 } 1528 1529 mutex_enter(&outbound->isaf_lock); 1530 mutex_enter(&inbound->isaf_lock); 1531 1532 /* 1533 * Check for collisions (i.e. did sadb_getspi() return with something 1534 * that already exists?). 1535 * 1536 * Try outbound first. Even though SADB_GETSPI is traditionally 1537 * for inbound SAs, you never know what a user might do. 1538 */ 1539 target = ipsec_getassocbyspi(outbound, newbie->ipsa_spi, 1540 newbie->ipsa_srcaddr, newbie->ipsa_dstaddr, newbie->ipsa_addrfam); 1541 if (target == NULL) { 1542 target = ipsec_getassocbyspi(inbound, newbie->ipsa_spi, 1543 newbie->ipsa_srcaddr, newbie->ipsa_dstaddr, 1544 newbie->ipsa_addrfam); 1545 } 1546 1547 /* 1548 * I don't have collisions elsewhere! 1549 * (Nor will I because I'm still holding inbound/outbound locks.) 1550 */ 1551 1552 if (target != NULL) { 1553 rc = EEXIST; 1554 IPSA_REFRELE(target); 1555 } else { 1556 /* 1557 * sadb_insertassoc() also checks for collisions, so 1558 * if there's a colliding entry, rc will be set 1559 * to EEXIST. 1560 */ 1561 rc = sadb_insertassoc(newbie, inbound); 1562 newbie->ipsa_hardexpiretime = gethrestime_sec(); 1563 newbie->ipsa_hardexpiretime += 1564 espstack->ipsecesp_larval_timeout; 1565 } 1566 1567 /* 1568 * Can exit outbound mutex. Hold inbound until we're done 1569 * with newbie. 1570 */ 1571 mutex_exit(&outbound->isaf_lock); 1572 1573 if (rc != 0) { 1574 mutex_exit(&inbound->isaf_lock); 1575 IPSA_REFRELE(newbie); 1576 sadb_pfkey_error(espstack->esp_pfkey_q, mp, rc, 1577 SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial); 1578 return; 1579 } 1580 1581 1582 /* Can write here because I'm still holding the bucket lock. */ 1583 newbie->ipsa_type = SADB_SATYPE_ESP; 1584 1585 /* 1586 * Construct successful return message. We have one thing going 1587 * for us in PF_KEY v2. That's the fact that 1588 * sizeof (sadb_spirange_t) == sizeof (sadb_sa_t) 1589 */ 1590 assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SPIRANGE]; 1591 assoc->sadb_sa_exttype = SADB_EXT_SA; 1592 assoc->sadb_sa_spi = newbie->ipsa_spi; 1593 *((uint64_t *)(&assoc->sadb_sa_replay)) = 0; 1594 mutex_exit(&inbound->isaf_lock); 1595 1596 /* Convert KEYSOCK_IN to KEYSOCK_OUT. */ 1597 kso = (keysock_out_t *)ksi; 1598 kso->ks_out_len = sizeof (*kso); 1599 kso->ks_out_serial = ksi->ks_in_serial; 1600 kso->ks_out_type = KEYSOCK_OUT; 1601 1602 /* 1603 * Can safely putnext() to esp_pfkey_q, because this is a turnaround 1604 * from the esp_pfkey_q. 1605 */ 1606 putnext(espstack->esp_pfkey_q, mp); 1607 } 1608 1609 /* 1610 * Insert the ESP header into a packet. Duplicate an mblk, and insert a newly 1611 * allocated mblk with the ESP header in between the two. 1612 */ 1613 static boolean_t 1614 esp_insert_esp(mblk_t *mp, mblk_t *esp_mp, uint_t divpoint, 1615 ipsecesp_stack_t *espstack) 1616 { 1617 mblk_t *split_mp = mp; 1618 uint_t wheretodiv = divpoint; 1619 1620 while ((split_mp->b_wptr - split_mp->b_rptr) < wheretodiv) { 1621 wheretodiv -= (split_mp->b_wptr - split_mp->b_rptr); 1622 split_mp = split_mp->b_cont; 1623 ASSERT(split_mp != NULL); 1624 } 1625 1626 if (split_mp->b_wptr - split_mp->b_rptr != wheretodiv) { 1627 mblk_t *scratch; 1628 1629 /* "scratch" is the 2nd half, split_mp is the first. */ 1630 scratch = dupb(split_mp); 1631 if (scratch == NULL) { 1632 esp1dbg(espstack, 1633 ("esp_insert_esp: can't allocate scratch.\n")); 1634 return (B_FALSE); 1635 } 1636 /* NOTE: dupb() doesn't set b_cont appropriately. */ 1637 scratch->b_cont = split_mp->b_cont; 1638 scratch->b_rptr += wheretodiv; 1639 split_mp->b_wptr = split_mp->b_rptr + wheretodiv; 1640 split_mp->b_cont = scratch; 1641 } 1642 /* 1643 * At this point, split_mp is exactly "wheretodiv" bytes long, and 1644 * holds the end of the pre-ESP part of the datagram. 1645 */ 1646 esp_mp->b_cont = split_mp->b_cont; 1647 split_mp->b_cont = esp_mp; 1648 1649 return (B_TRUE); 1650 } 1651 1652 /* 1653 * Section 7 of RFC 3947 says: 1654 * 1655 * 7. Recovering from the Expiring NAT Mappings 1656 * 1657 * There are cases where NAT box decides to remove mappings that are still 1658 * alive (for example, when the keepalive interval is too long, or when the 1659 * NAT box is rebooted). To recover from this, ends that are NOT behind 1660 * NAT SHOULD use the last valid UDP encapsulated IKE or IPsec packet from 1661 * the other end to determine which IP and port addresses should be used. 1662 * The host behind dynamic NAT MUST NOT do this, as otherwise it opens a 1663 * DoS attack possibility because the IP address or port of the other host 1664 * will not change (it is not behind NAT). 1665 * 1666 * Keepalives cannot be used for these purposes, as they are not 1667 * authenticated, but any IKE authenticated IKE packet or ESP packet can be 1668 * used to detect whether the IP address or the port has changed. 1669 * 1670 * The following function will check an SA and its explicitly-set pair to see 1671 * if the NAT-T remote port matches the received packet (which must have 1672 * passed ESP authentication, see esp_in_done() for the caller context). If 1673 * there is a mismatch, the SAs are updated. It is not important if we race 1674 * with a transmitting thread, as if there is a transmitting thread, it will 1675 * merely emit a packet that will most-likely be dropped. 1676 * 1677 * "ports" are ordered src,dst, and assoc is an inbound SA, where src should 1678 * match ipsa_remote_nat_port and dst should match ipsa_local_nat_port. 1679 */ 1680 #ifdef _LITTLE_ENDIAN 1681 #define FIRST_16(x) ((x) & 0xFFFF) 1682 #define NEXT_16(x) (((x) >> 16) & 0xFFFF) 1683 #else 1684 #define FIRST_16(x) (((x) >> 16) & 0xFFFF) 1685 #define NEXT_16(x) ((x) & 0xFFFF) 1686 #endif 1687 static void 1688 esp_port_freshness(uint32_t ports, ipsa_t *assoc) 1689 { 1690 uint16_t remote = FIRST_16(ports); 1691 uint16_t local = NEXT_16(ports); 1692 ipsa_t *outbound_peer; 1693 isaf_t *bucket; 1694 ipsecesp_stack_t *espstack = assoc->ipsa_netstack->netstack_ipsecesp; 1695 1696 /* We found a conn_t, therefore local != 0. */ 1697 ASSERT(local != 0); 1698 /* Assume an IPv4 SA. */ 1699 ASSERT(assoc->ipsa_addrfam == AF_INET); 1700 1701 /* 1702 * On-the-wire rport == 0 means something's very wrong. 1703 * An unpaired SA is also useless to us. 1704 * If we are behind the NAT, don't bother. 1705 * A zero local NAT port defaults to 4500, so check that too. 1706 * And, of course, if the ports already match, we don't need to 1707 * bother. 1708 */ 1709 if (remote == 0 || assoc->ipsa_otherspi == 0 || 1710 (assoc->ipsa_flags & IPSA_F_BEHIND_NAT) || 1711 (assoc->ipsa_remote_nat_port == 0 && 1712 remote == htons(IPPORT_IKE_NATT)) || 1713 remote == assoc->ipsa_remote_nat_port) 1714 return; 1715 1716 /* Try and snag the peer. NOTE: Assume IPv4 for now. */ 1717 bucket = OUTBOUND_BUCKET_V4(&(espstack->esp_sadb.s_v4), 1718 assoc->ipsa_srcaddr[0]); 1719 mutex_enter(&bucket->isaf_lock); 1720 outbound_peer = ipsec_getassocbyspi(bucket, assoc->ipsa_otherspi, 1721 assoc->ipsa_dstaddr, assoc->ipsa_srcaddr, AF_INET); 1722 mutex_exit(&bucket->isaf_lock); 1723 1724 /* We probably lost a race to a deleting or expiring thread. */ 1725 if (outbound_peer == NULL) 1726 return; 1727 1728 /* 1729 * Hold the mutexes for both SAs so we don't race another inbound 1730 * thread. A lock-entry order shouldn't matter, since all other 1731 * per-ipsa locks are individually held-then-released. 1732 * 1733 * Luckily, this has nothing to do with the remote-NAT address, 1734 * so we don't have to re-scribble the cached-checksum differential. 1735 */ 1736 mutex_enter(&outbound_peer->ipsa_lock); 1737 mutex_enter(&assoc->ipsa_lock); 1738 outbound_peer->ipsa_remote_nat_port = assoc->ipsa_remote_nat_port = 1739 remote; 1740 mutex_exit(&assoc->ipsa_lock); 1741 mutex_exit(&outbound_peer->ipsa_lock); 1742 IPSA_REFRELE(outbound_peer); 1743 ESP_BUMP_STAT(espstack, sa_port_renumbers); 1744 } 1745 /* 1746 * Finish processing of an inbound ESP packet after processing by the 1747 * crypto framework. 1748 * - Remove the ESP header. 1749 * - Send packet back to IP. 1750 * If authentication was performed on the packet, this function is called 1751 * only if the authentication succeeded. 1752 * On success returns B_TRUE, on failure returns B_FALSE and frees the 1753 * mblk chain ipsec_in_mp. 1754 */ 1755 static ipsec_status_t 1756 esp_in_done(mblk_t *ipsec_in_mp) 1757 { 1758 ipsec_in_t *ii = (ipsec_in_t *)ipsec_in_mp->b_rptr; 1759 mblk_t *data_mp; 1760 ipsa_t *assoc; 1761 uint_t espstart; 1762 uint32_t ivlen = 0; 1763 uint_t processed_len; 1764 esph_t *esph; 1765 kstat_named_t *counter; 1766 boolean_t is_natt; 1767 netstack_t *ns = ii->ipsec_in_ns; 1768 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 1769 ipsec_stack_t *ipss = ns->netstack_ipsec; 1770 1771 assoc = ii->ipsec_in_esp_sa; 1772 ASSERT(assoc != NULL); 1773 1774 is_natt = ((assoc->ipsa_flags & IPSA_F_NATT) != 0); 1775 1776 /* get the pointer to the ESP header */ 1777 if (assoc->ipsa_encr_alg == SADB_EALG_NULL) { 1778 /* authentication-only ESP */ 1779 espstart = ii->ipsec_in_crypto_data.cd_offset; 1780 processed_len = ii->ipsec_in_crypto_data.cd_length; 1781 } else { 1782 /* encryption present */ 1783 ivlen = assoc->ipsa_iv_len; 1784 if (assoc->ipsa_auth_alg == SADB_AALG_NONE) { 1785 /* encryption-only ESP */ 1786 espstart = ii->ipsec_in_crypto_data.cd_offset - 1787 sizeof (esph_t) - assoc->ipsa_iv_len; 1788 processed_len = ii->ipsec_in_crypto_data.cd_length + 1789 ivlen; 1790 } else { 1791 /* encryption with authentication */ 1792 espstart = ii->ipsec_in_crypto_dual_data.dd_offset1; 1793 processed_len = ii->ipsec_in_crypto_dual_data.dd_len2 + 1794 ivlen; 1795 } 1796 } 1797 1798 data_mp = ipsec_in_mp->b_cont; 1799 esph = (esph_t *)(data_mp->b_rptr + espstart); 1800 1801 if (assoc->ipsa_auth_alg != IPSA_AALG_NONE) { 1802 /* authentication passed if we reach this point */ 1803 ESP_BUMP_STAT(espstack, good_auth); 1804 data_mp->b_wptr -= assoc->ipsa_mac_len; 1805 1806 /* 1807 * Check replay window here! 1808 * For right now, assume keysock will set the replay window 1809 * size to zero for SAs that have an unspecified sender. 1810 * This may change... 1811 */ 1812 1813 if (!sadb_replay_check(assoc, esph->esph_replay)) { 1814 /* 1815 * Log the event. As of now we print out an event. 1816 * Do not print the replay failure number, or else 1817 * syslog cannot collate the error messages. Printing 1818 * the replay number that failed opens a denial-of- 1819 * service attack. 1820 */ 1821 ipsec_assocfailure(info.mi_idnum, 0, 0, 1822 SL_ERROR | SL_WARN, 1823 "Replay failed for ESP spi 0x%x, dst %s.\n", 1824 assoc->ipsa_spi, assoc->ipsa_dstaddr, 1825 assoc->ipsa_addrfam, espstack->ipsecesp_netstack); 1826 ESP_BUMP_STAT(espstack, replay_failures); 1827 counter = DROPPER(ipss, ipds_esp_replay); 1828 goto drop_and_bail; 1829 } 1830 1831 if (is_natt) 1832 esp_port_freshness(ii->ipsec_in_esp_udp_ports, assoc); 1833 } 1834 1835 esp_set_usetime(assoc, B_TRUE); 1836 1837 if (!esp_age_bytes(assoc, processed_len, B_TRUE)) { 1838 /* The ipsa has hit hard expiration, LOG and AUDIT. */ 1839 ipsec_assocfailure(info.mi_idnum, 0, 0, 1840 SL_ERROR | SL_WARN, 1841 "ESP association 0x%x, dst %s had bytes expire.\n", 1842 assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam, 1843 espstack->ipsecesp_netstack); 1844 ESP_BUMP_STAT(espstack, bytes_expired); 1845 counter = DROPPER(ipss, ipds_esp_bytes_expire); 1846 goto drop_and_bail; 1847 } 1848 1849 /* 1850 * Remove ESP header and padding from packet. I hope the compiler 1851 * spews "branch, predict taken" code for this. 1852 */ 1853 1854 if (esp_strip_header(data_mp, ii->ipsec_in_v4, ivlen, &counter, 1855 espstack)) { 1856 if (is_natt) 1857 return (esp_fix_natt_checksums(data_mp, assoc)); 1858 1859 if (assoc->ipsa_state == IPSA_STATE_IDLE) { 1860 /* 1861 * Cluster buffering case. Tell caller that we're 1862 * handling the packet. 1863 */ 1864 sadb_buf_pkt(assoc, ipsec_in_mp, ns); 1865 return (IPSEC_STATUS_PENDING); 1866 } 1867 1868 return (IPSEC_STATUS_SUCCESS); 1869 } 1870 1871 esp1dbg(espstack, ("esp_in_done: esp_strip_header() failed\n")); 1872 drop_and_bail: 1873 IP_ESP_BUMP_STAT(ipss, in_discards); 1874 /* 1875 * TODO: Extract inbound interface from the IPSEC_IN message's 1876 * ii->ipsec_in_rill_index. 1877 */ 1878 ip_drop_packet(ipsec_in_mp, B_TRUE, NULL, NULL, counter, 1879 &espstack->esp_dropper); 1880 return (IPSEC_STATUS_FAILED); 1881 } 1882 1883 /* 1884 * Called upon failing the inbound ICV check. The message passed as 1885 * argument is freed. 1886 */ 1887 static void 1888 esp_log_bad_auth(mblk_t *ipsec_in) 1889 { 1890 ipsec_in_t *ii = (ipsec_in_t *)ipsec_in->b_rptr; 1891 ipsa_t *assoc = ii->ipsec_in_esp_sa; 1892 netstack_t *ns = ii->ipsec_in_ns; 1893 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 1894 ipsec_stack_t *ipss = ns->netstack_ipsec; 1895 1896 /* 1897 * Log the event. Don't print to the console, block 1898 * potential denial-of-service attack. 1899 */ 1900 ESP_BUMP_STAT(espstack, bad_auth); 1901 1902 ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN, 1903 "ESP Authentication failed for spi 0x%x, dst %s.\n", 1904 assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam, 1905 espstack->ipsecesp_netstack); 1906 1907 IP_ESP_BUMP_STAT(ipss, in_discards); 1908 /* 1909 * TODO: Extract inbound interface from the IPSEC_IN 1910 * message's ii->ipsec_in_rill_index. 1911 */ 1912 ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL, 1913 DROPPER(ipss, ipds_esp_bad_auth), 1914 &espstack->esp_dropper); 1915 } 1916 1917 1918 /* 1919 * Invoked for outbound packets after ESP processing. If the packet 1920 * also requires AH, performs the AH SA selection and AH processing. 1921 * Returns B_TRUE if the AH processing was not needed or if it was 1922 * performed successfully. Returns B_FALSE and consumes the passed mblk 1923 * if AH processing was required but could not be performed. 1924 */ 1925 static boolean_t 1926 esp_do_outbound_ah(mblk_t *ipsec_mp) 1927 { 1928 ipsec_out_t *io = (ipsec_out_t *)ipsec_mp->b_rptr; 1929 ipsec_status_t ipsec_rc; 1930 ipsec_action_t *ap; 1931 1932 ap = io->ipsec_out_act; 1933 if (ap == NULL) { 1934 ipsec_policy_t *pp = io->ipsec_out_policy; 1935 ap = pp->ipsp_act; 1936 } 1937 1938 if (!ap->ipa_want_ah) 1939 return (B_TRUE); 1940 1941 ASSERT(io->ipsec_out_ah_done == B_FALSE); 1942 1943 if (io->ipsec_out_ah_sa == NULL) { 1944 if (!ipsec_outbound_sa(ipsec_mp, IPPROTO_AH)) { 1945 sadb_acquire(ipsec_mp, io, B_TRUE, B_FALSE); 1946 return (B_FALSE); 1947 } 1948 } 1949 ASSERT(io->ipsec_out_ah_sa != NULL); 1950 1951 io->ipsec_out_ah_done = B_TRUE; 1952 ipsec_rc = io->ipsec_out_ah_sa->ipsa_output_func(ipsec_mp); 1953 return (ipsec_rc == IPSEC_STATUS_SUCCESS); 1954 } 1955 1956 1957 /* 1958 * Kernel crypto framework callback invoked after completion of async 1959 * crypto requests. 1960 */ 1961 static void 1962 esp_kcf_callback(void *arg, int status) 1963 { 1964 mblk_t *ipsec_mp = (mblk_t *)arg; 1965 ipsec_in_t *ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1966 ipsec_out_t *io = (ipsec_out_t *)ipsec_mp->b_rptr; 1967 boolean_t is_inbound = (ii->ipsec_in_type == IPSEC_IN); 1968 netstackid_t stackid; 1969 netstack_t *ns, *ns_arg; 1970 ipsecesp_stack_t *espstack; 1971 ipsec_stack_t *ipss; 1972 1973 ASSERT(ipsec_mp->b_cont != NULL); 1974 1975 if (is_inbound) { 1976 stackid = ii->ipsec_in_stackid; 1977 ns_arg = ii->ipsec_in_ns; 1978 } else { 1979 stackid = io->ipsec_out_stackid; 1980 ns_arg = io->ipsec_out_ns; 1981 } 1982 1983 /* 1984 * Verify that the netstack is still around; could have vanished 1985 * while kEf was doing its work. 1986 */ 1987 ns = netstack_find_by_stackid(stackid); 1988 if (ns == NULL || ns != ns_arg) { 1989 /* Disappeared on us */ 1990 if (ns != NULL) 1991 netstack_rele(ns); 1992 freemsg(ipsec_mp); 1993 return; 1994 } 1995 1996 espstack = ns->netstack_ipsecesp; 1997 ipss = ns->netstack_ipsec; 1998 1999 if (status == CRYPTO_SUCCESS) { 2000 if (is_inbound) { 2001 if (esp_in_done(ipsec_mp) != IPSEC_STATUS_SUCCESS) { 2002 netstack_rele(ns); 2003 return; 2004 } 2005 /* finish IPsec processing */ 2006 ip_fanout_proto_again(ipsec_mp, NULL, NULL, NULL); 2007 } else { 2008 /* 2009 * If a ICV was computed, it was stored by the 2010 * crypto framework at the end of the packet. 2011 */ 2012 ipha_t *ipha = (ipha_t *)ipsec_mp->b_cont->b_rptr; 2013 2014 esp_set_usetime(io->ipsec_out_esp_sa, B_FALSE); 2015 /* NAT-T packet. */ 2016 if (ipha->ipha_protocol == IPPROTO_UDP) 2017 esp_prepare_udp(ns, ipsec_mp->b_cont, ipha); 2018 2019 /* do AH processing if needed */ 2020 if (!esp_do_outbound_ah(ipsec_mp)) { 2021 netstack_rele(ns); 2022 return; 2023 } 2024 /* finish IPsec processing */ 2025 if (IPH_HDR_VERSION(ipha) == IP_VERSION) { 2026 ip_wput_ipsec_out(NULL, ipsec_mp, ipha, NULL, 2027 NULL); 2028 } else { 2029 ip6_t *ip6h = (ip6_t *)ipha; 2030 ip_wput_ipsec_out_v6(NULL, ipsec_mp, ip6h, 2031 NULL, NULL); 2032 } 2033 } 2034 2035 } else if (status == CRYPTO_INVALID_MAC) { 2036 esp_log_bad_auth(ipsec_mp); 2037 2038 } else { 2039 esp1dbg(espstack, 2040 ("esp_kcf_callback: crypto failed with 0x%x\n", 2041 status)); 2042 ESP_BUMP_STAT(espstack, crypto_failures); 2043 if (is_inbound) 2044 IP_ESP_BUMP_STAT(ipss, in_discards); 2045 else 2046 ESP_BUMP_STAT(espstack, out_discards); 2047 ip_drop_packet(ipsec_mp, is_inbound, NULL, NULL, 2048 DROPPER(ipss, ipds_esp_crypto_failed), 2049 &espstack->esp_dropper); 2050 } 2051 netstack_rele(ns); 2052 } 2053 2054 /* 2055 * Invoked on crypto framework failure during inbound and outbound processing. 2056 */ 2057 static void 2058 esp_crypto_failed(mblk_t *mp, boolean_t is_inbound, int kef_rc, 2059 ipsecesp_stack_t *espstack) 2060 { 2061 ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec; 2062 2063 esp1dbg(espstack, ("crypto failed for %s ESP with 0x%x\n", 2064 is_inbound ? "inbound" : "outbound", kef_rc)); 2065 ip_drop_packet(mp, is_inbound, NULL, NULL, 2066 DROPPER(ipss, ipds_esp_crypto_failed), 2067 &espstack->esp_dropper); 2068 ESP_BUMP_STAT(espstack, crypto_failures); 2069 if (is_inbound) 2070 IP_ESP_BUMP_STAT(ipss, in_discards); 2071 else 2072 ESP_BUMP_STAT(espstack, out_discards); 2073 } 2074 2075 #define ESP_INIT_CALLREQ(_cr) { \ 2076 (_cr)->cr_flag = CRYPTO_SKIP_REQID|CRYPTO_RESTRICTED; \ 2077 (_cr)->cr_callback_arg = ipsec_mp; \ 2078 (_cr)->cr_callback_func = esp_kcf_callback; \ 2079 } 2080 2081 #define ESP_INIT_CRYPTO_MAC(mac, icvlen, icvbuf) { \ 2082 (mac)->cd_format = CRYPTO_DATA_RAW; \ 2083 (mac)->cd_offset = 0; \ 2084 (mac)->cd_length = icvlen; \ 2085 (mac)->cd_raw.iov_base = (char *)icvbuf; \ 2086 (mac)->cd_raw.iov_len = icvlen; \ 2087 } 2088 2089 #define ESP_INIT_CRYPTO_DATA(data, mp, off, len) { \ 2090 if (MBLKL(mp) >= (len) + (off)) { \ 2091 (data)->cd_format = CRYPTO_DATA_RAW; \ 2092 (data)->cd_raw.iov_base = (char *)(mp)->b_rptr; \ 2093 (data)->cd_raw.iov_len = MBLKL(mp); \ 2094 (data)->cd_offset = off; \ 2095 } else { \ 2096 (data)->cd_format = CRYPTO_DATA_MBLK; \ 2097 (data)->cd_mp = mp; \ 2098 (data)->cd_offset = off; \ 2099 } \ 2100 (data)->cd_length = len; \ 2101 } 2102 2103 #define ESP_INIT_CRYPTO_DUAL_DATA(data, mp, off1, len1, off2, len2) { \ 2104 (data)->dd_format = CRYPTO_DATA_MBLK; \ 2105 (data)->dd_mp = mp; \ 2106 (data)->dd_len1 = len1; \ 2107 (data)->dd_offset1 = off1; \ 2108 (data)->dd_len2 = len2; \ 2109 (data)->dd_offset2 = off2; \ 2110 } 2111 2112 static ipsec_status_t 2113 esp_submit_req_inbound(mblk_t *ipsec_mp, ipsa_t *assoc, uint_t esph_offset) 2114 { 2115 ipsec_in_t *ii = (ipsec_in_t *)ipsec_mp->b_rptr; 2116 boolean_t do_auth; 2117 uint_t auth_offset, msg_len, auth_len; 2118 crypto_call_req_t call_req; 2119 mblk_t *esp_mp; 2120 int kef_rc = CRYPTO_FAILED; 2121 uint_t icv_len = assoc->ipsa_mac_len; 2122 crypto_ctx_template_t auth_ctx_tmpl; 2123 boolean_t do_encr; 2124 uint_t encr_offset, encr_len; 2125 uint_t iv_len = assoc->ipsa_iv_len; 2126 crypto_ctx_template_t encr_ctx_tmpl; 2127 netstack_t *ns = ii->ipsec_in_ns; 2128 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 2129 ipsec_stack_t *ipss = ns->netstack_ipsec; 2130 2131 ASSERT(ii->ipsec_in_type == IPSEC_IN); 2132 2133 /* 2134 * In case kEF queues and calls back, make sure we have the 2135 * netstackid_t for verification that the IP instance is still around 2136 * in esp_kcf_callback(). 2137 */ 2138 ASSERT(ii->ipsec_in_stackid == ns->netstack_stackid); 2139 2140 do_auth = assoc->ipsa_auth_alg != SADB_AALG_NONE; 2141 do_encr = assoc->ipsa_encr_alg != SADB_EALG_NULL; 2142 2143 /* 2144 * An inbound packet is of the form: 2145 * IPSEC_IN -> [IP,options,ESP,IV,data,ICV,pad] 2146 */ 2147 esp_mp = ipsec_mp->b_cont; 2148 msg_len = MBLKL(esp_mp); 2149 2150 ESP_INIT_CALLREQ(&call_req); 2151 2152 if (do_auth) { 2153 /* force asynchronous processing? */ 2154 if (ipss->ipsec_algs_exec_mode[IPSEC_ALG_AUTH] == 2155 IPSEC_ALGS_EXEC_ASYNC) 2156 call_req.cr_flag |= CRYPTO_ALWAYS_QUEUE; 2157 2158 /* authentication context template */ 2159 IPSEC_CTX_TMPL(assoc, ipsa_authtmpl, IPSEC_ALG_AUTH, 2160 auth_ctx_tmpl); 2161 2162 /* ICV to be verified */ 2163 ESP_INIT_CRYPTO_MAC(&ii->ipsec_in_crypto_mac, 2164 icv_len, esp_mp->b_wptr - icv_len); 2165 2166 /* authentication starts at the ESP header */ 2167 auth_offset = esph_offset; 2168 auth_len = msg_len - auth_offset - icv_len; 2169 if (!do_encr) { 2170 /* authentication only */ 2171 /* initialize input data argument */ 2172 ESP_INIT_CRYPTO_DATA(&ii->ipsec_in_crypto_data, 2173 esp_mp, auth_offset, auth_len); 2174 2175 /* call the crypto framework */ 2176 kef_rc = crypto_mac_verify(&assoc->ipsa_amech, 2177 &ii->ipsec_in_crypto_data, 2178 &assoc->ipsa_kcfauthkey, auth_ctx_tmpl, 2179 &ii->ipsec_in_crypto_mac, &call_req); 2180 } 2181 } 2182 2183 if (do_encr) { 2184 /* force asynchronous processing? */ 2185 if (ipss->ipsec_algs_exec_mode[IPSEC_ALG_ENCR] == 2186 IPSEC_ALGS_EXEC_ASYNC) 2187 call_req.cr_flag |= CRYPTO_ALWAYS_QUEUE; 2188 2189 /* encryption template */ 2190 IPSEC_CTX_TMPL(assoc, ipsa_encrtmpl, IPSEC_ALG_ENCR, 2191 encr_ctx_tmpl); 2192 2193 /* skip IV, since it is passed separately */ 2194 encr_offset = esph_offset + sizeof (esph_t) + iv_len; 2195 encr_len = msg_len - encr_offset; 2196 2197 if (!do_auth) { 2198 /* decryption only */ 2199 /* initialize input data argument */ 2200 ESP_INIT_CRYPTO_DATA(&ii->ipsec_in_crypto_data, 2201 esp_mp, encr_offset, encr_len); 2202 2203 /* specify IV */ 2204 ii->ipsec_in_crypto_data.cd_miscdata = 2205 (char *)esp_mp->b_rptr + sizeof (esph_t) + 2206 esph_offset; 2207 2208 /* call the crypto framework */ 2209 kef_rc = crypto_decrypt(&assoc->ipsa_emech, 2210 &ii->ipsec_in_crypto_data, 2211 &assoc->ipsa_kcfencrkey, encr_ctx_tmpl, 2212 NULL, &call_req); 2213 } 2214 } 2215 2216 if (do_auth && do_encr) { 2217 /* dual operation */ 2218 /* initialize input data argument */ 2219 ESP_INIT_CRYPTO_DUAL_DATA(&ii->ipsec_in_crypto_dual_data, 2220 esp_mp, auth_offset, auth_len, 2221 encr_offset, encr_len - icv_len); 2222 2223 /* specify IV */ 2224 ii->ipsec_in_crypto_dual_data.dd_miscdata = 2225 (char *)esp_mp->b_rptr + sizeof (esph_t) + esph_offset; 2226 2227 /* call the framework */ 2228 kef_rc = crypto_mac_verify_decrypt(&assoc->ipsa_amech, 2229 &assoc->ipsa_emech, &ii->ipsec_in_crypto_dual_data, 2230 &assoc->ipsa_kcfauthkey, &assoc->ipsa_kcfencrkey, 2231 auth_ctx_tmpl, encr_ctx_tmpl, &ii->ipsec_in_crypto_mac, 2232 NULL, &call_req); 2233 } 2234 2235 switch (kef_rc) { 2236 case CRYPTO_SUCCESS: 2237 ESP_BUMP_STAT(espstack, crypto_sync); 2238 return (esp_in_done(ipsec_mp)); 2239 case CRYPTO_QUEUED: 2240 /* esp_kcf_callback() will be invoked on completion */ 2241 ESP_BUMP_STAT(espstack, crypto_async); 2242 return (IPSEC_STATUS_PENDING); 2243 case CRYPTO_INVALID_MAC: 2244 ESP_BUMP_STAT(espstack, crypto_sync); 2245 esp_log_bad_auth(ipsec_mp); 2246 return (IPSEC_STATUS_FAILED); 2247 } 2248 2249 esp_crypto_failed(ipsec_mp, B_TRUE, kef_rc, espstack); 2250 return (IPSEC_STATUS_FAILED); 2251 } 2252 2253 /* 2254 * Compute the IP and UDP checksums -- common code for both keepalives and 2255 * actual ESP-in-UDP packets. Be flexible with multiple mblks because ESP 2256 * uses mblk-insertion to insert the UDP header. 2257 * TODO - If there is an easy way to prep a packet for HW checksums, make 2258 * it happen here. 2259 */ 2260 static void 2261 esp_prepare_udp(netstack_t *ns, mblk_t *mp, ipha_t *ipha) 2262 { 2263 int offset; 2264 uint32_t cksum; 2265 uint16_t *arr; 2266 mblk_t *udpmp = mp; 2267 uint_t hlen = IPH_HDR_LENGTH(ipha); 2268 2269 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 2270 2271 ipha->ipha_hdr_checksum = 0; 2272 ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 2273 2274 if (ns->netstack_udp->us_do_checksum) { 2275 ASSERT(MBLKL(udpmp) >= sizeof (udpha_t)); 2276 /* arr points to the IP header. */ 2277 arr = (uint16_t *)ipha; 2278 IP_STAT(ns->netstack_ip, ip_out_sw_cksum); 2279 IP_STAT_UPDATE(ns->netstack_ip, ip_udp_out_sw_cksum_bytes, 2280 ntohs(htons(ipha->ipha_length) - hlen)); 2281 /* arr[6-9] are the IP addresses. */ 2282 cksum = IP_UDP_CSUM_COMP + arr[6] + arr[7] + arr[8] + arr[9] + 2283 ntohs(htons(ipha->ipha_length) - hlen); 2284 cksum = IP_CSUM(mp, hlen, cksum); 2285 offset = hlen + UDP_CHECKSUM_OFFSET; 2286 while (offset >= MBLKL(udpmp)) { 2287 offset -= MBLKL(udpmp); 2288 udpmp = udpmp->b_cont; 2289 } 2290 /* arr points to the UDP header's checksum field. */ 2291 arr = (uint16_t *)(udpmp->b_rptr + offset); 2292 *arr = cksum; 2293 } 2294 } 2295 2296 /* 2297 * taskq handler so we can send the NAT-T keepalive on a separate thread. 2298 */ 2299 static void 2300 actually_send_keepalive(void *arg) 2301 { 2302 mblk_t *ipsec_mp = (mblk_t *)arg; 2303 ipsec_out_t *io = (ipsec_out_t *)ipsec_mp->b_rptr; 2304 ipha_t *ipha; 2305 netstack_t *ns; 2306 2307 ASSERT(DB_TYPE(ipsec_mp) == M_CTL); 2308 ASSERT(io->ipsec_out_type == IPSEC_OUT); 2309 ASSERT(ipsec_mp->b_cont != NULL); 2310 ASSERT(DB_TYPE(ipsec_mp->b_cont) == M_DATA); 2311 2312 ns = netstack_find_by_stackid(io->ipsec_out_stackid); 2313 if (ns == NULL || ns != io->ipsec_out_ns) { 2314 /* Just freemsg(). */ 2315 if (ns != NULL) 2316 netstack_rele(ns); 2317 freemsg(ipsec_mp); 2318 return; 2319 } 2320 2321 ipha = (ipha_t *)ipsec_mp->b_cont->b_rptr; 2322 ip_wput_ipsec_out(NULL, ipsec_mp, ipha, NULL, NULL); 2323 netstack_rele(ns); 2324 } 2325 2326 /* 2327 * Send a one-byte UDP NAT-T keepalive. Construct an IPSEC_OUT too that'll 2328 * get fed into esp_send_udp/ip_wput_ipsec_out. 2329 */ 2330 void 2331 ipsecesp_send_keepalive(ipsa_t *assoc) 2332 { 2333 mblk_t *mp = NULL, *ipsec_mp = NULL; 2334 ipha_t *ipha; 2335 udpha_t *udpha; 2336 ipsec_out_t *io; 2337 2338 ASSERT(MUTEX_NOT_HELD(&assoc->ipsa_lock)); 2339 2340 mp = allocb(sizeof (ipha_t) + sizeof (udpha_t) + 1, BPRI_HI); 2341 if (mp == NULL) 2342 return; 2343 ipha = (ipha_t *)mp->b_rptr; 2344 ipha->ipha_version_and_hdr_length = IP_SIMPLE_HDR_VERSION; 2345 ipha->ipha_type_of_service = 0; 2346 ipha->ipha_length = htons(sizeof (ipha_t) + sizeof (udpha_t) + 1); 2347 /* Use the low-16 of the SPI so we have some clue where it came from. */ 2348 ipha->ipha_ident = *(((uint16_t *)(&assoc->ipsa_spi)) + 1); 2349 ipha->ipha_fragment_offset_and_flags = 0; /* Too small to fragment! */ 2350 ipha->ipha_ttl = 0xFF; 2351 ipha->ipha_protocol = IPPROTO_UDP; 2352 ipha->ipha_hdr_checksum = 0; 2353 ipha->ipha_src = assoc->ipsa_srcaddr[0]; 2354 ipha->ipha_dst = assoc->ipsa_dstaddr[0]; 2355 udpha = (udpha_t *)(ipha + 1); 2356 udpha->uha_src_port = (assoc->ipsa_local_nat_port != 0) ? 2357 assoc->ipsa_local_nat_port : htons(IPPORT_IKE_NATT); 2358 udpha->uha_dst_port = (assoc->ipsa_remote_nat_port != 0) ? 2359 assoc->ipsa_remote_nat_port : htons(IPPORT_IKE_NATT); 2360 udpha->uha_length = htons(sizeof (udpha_t) + 1); 2361 udpha->uha_checksum = 0; 2362 mp->b_wptr = (uint8_t *)(udpha + 1); 2363 *(mp->b_wptr++) = 0xFF; 2364 2365 ipsec_mp = ipsec_alloc_ipsec_out(assoc->ipsa_netstack); 2366 if (ipsec_mp == NULL) { 2367 freeb(mp); 2368 return; 2369 } 2370 ipsec_mp->b_cont = mp; 2371 io = (ipsec_out_t *)ipsec_mp->b_rptr; 2372 io->ipsec_out_zoneid = 2373 netstackid_to_zoneid(assoc->ipsa_netstack->netstack_stackid); 2374 io->ipsec_out_stackid = assoc->ipsa_netstack->netstack_stackid; 2375 2376 esp_prepare_udp(assoc->ipsa_netstack, mp, ipha); 2377 /* 2378 * We're holding an isaf_t bucket lock, so pawn off the actual 2379 * packet transmission to another thread. Just in case syncq 2380 * processing causes a same-bucket packet to be processed. 2381 */ 2382 if (taskq_dispatch(esp_taskq, actually_send_keepalive, ipsec_mp, 2383 TQ_NOSLEEP) == 0) { 2384 /* Assume no memory if taskq_dispatch() fails. */ 2385 ip_drop_packet(ipsec_mp, B_FALSE, NULL, NULL, 2386 DROPPER(assoc->ipsa_netstack->netstack_ipsec, 2387 ipds_esp_nomem), 2388 &assoc->ipsa_netstack->netstack_ipsecesp->esp_dropper); 2389 } 2390 } 2391 2392 static ipsec_status_t 2393 esp_submit_req_outbound(mblk_t *ipsec_mp, ipsa_t *assoc, uchar_t *icv_buf, 2394 uint_t payload_len) 2395 { 2396 ipsec_out_t *io = (ipsec_out_t *)ipsec_mp->b_rptr; 2397 uint_t auth_len; 2398 crypto_call_req_t call_req; 2399 mblk_t *esp_mp; 2400 int kef_rc = CRYPTO_FAILED; 2401 uint_t icv_len = assoc->ipsa_mac_len; 2402 crypto_ctx_template_t auth_ctx_tmpl; 2403 boolean_t do_auth; 2404 boolean_t do_encr; 2405 uint_t iv_len = assoc->ipsa_iv_len; 2406 crypto_ctx_template_t encr_ctx_tmpl; 2407 boolean_t is_natt = ((assoc->ipsa_flags & IPSA_F_NATT) != 0); 2408 size_t esph_offset = (is_natt ? UDPH_SIZE : 0); 2409 netstack_t *ns = io->ipsec_out_ns; 2410 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 2411 ipsec_stack_t *ipss = ns->netstack_ipsec; 2412 2413 esp3dbg(espstack, ("esp_submit_req_outbound:%s", 2414 is_natt ? "natt" : "not natt")); 2415 2416 ASSERT(io->ipsec_out_type == IPSEC_OUT); 2417 2418 /* 2419 * In case kEF queues and calls back, keep netstackid_t for 2420 * verification that the IP instance is still around in 2421 * esp_kcf_callback(). 2422 */ 2423 io->ipsec_out_stackid = ns->netstack_stackid; 2424 2425 do_encr = assoc->ipsa_encr_alg != SADB_EALG_NULL; 2426 do_auth = assoc->ipsa_auth_alg != SADB_AALG_NONE; 2427 2428 /* 2429 * Outbound IPsec packets are of the form: 2430 * IPSEC_OUT -> [IP,options] -> [ESP,IV] -> [data] -> [pad,ICV] 2431 * unless it's NATT, then it's 2432 * IPSEC_OUT -> [IP,options] -> [udp][ESP,IV] -> [data] -> [pad,ICV] 2433 * Get a pointer to the mblk containing the ESP header. 2434 */ 2435 ASSERT(ipsec_mp->b_cont != NULL && ipsec_mp->b_cont->b_cont != NULL); 2436 esp_mp = ipsec_mp->b_cont->b_cont; 2437 2438 ESP_INIT_CALLREQ(&call_req); 2439 2440 if (do_auth) { 2441 /* force asynchronous processing? */ 2442 if (ipss->ipsec_algs_exec_mode[IPSEC_ALG_AUTH] == 2443 IPSEC_ALGS_EXEC_ASYNC) 2444 call_req.cr_flag |= CRYPTO_ALWAYS_QUEUE; 2445 2446 /* authentication context template */ 2447 IPSEC_CTX_TMPL(assoc, ipsa_authtmpl, IPSEC_ALG_AUTH, 2448 auth_ctx_tmpl); 2449 2450 /* where to store the computed mac */ 2451 ESP_INIT_CRYPTO_MAC(&io->ipsec_out_crypto_mac, 2452 icv_len, icv_buf); 2453 2454 /* authentication starts at the ESP header */ 2455 auth_len = payload_len + iv_len + sizeof (esph_t); 2456 if (!do_encr) { 2457 /* authentication only */ 2458 /* initialize input data argument */ 2459 ESP_INIT_CRYPTO_DATA(&io->ipsec_out_crypto_data, 2460 esp_mp, esph_offset, auth_len); 2461 2462 /* call the crypto framework */ 2463 kef_rc = crypto_mac(&assoc->ipsa_amech, 2464 &io->ipsec_out_crypto_data, 2465 &assoc->ipsa_kcfauthkey, auth_ctx_tmpl, 2466 &io->ipsec_out_crypto_mac, &call_req); 2467 } 2468 } 2469 2470 if (do_encr) { 2471 /* force asynchronous processing? */ 2472 if (ipss->ipsec_algs_exec_mode[IPSEC_ALG_ENCR] == 2473 IPSEC_ALGS_EXEC_ASYNC) 2474 call_req.cr_flag |= CRYPTO_ALWAYS_QUEUE; 2475 2476 /* encryption context template */ 2477 IPSEC_CTX_TMPL(assoc, ipsa_encrtmpl, IPSEC_ALG_ENCR, 2478 encr_ctx_tmpl); 2479 2480 if (!do_auth) { 2481 /* encryption only, skip mblk that contains ESP hdr */ 2482 /* initialize input data argument */ 2483 ESP_INIT_CRYPTO_DATA(&io->ipsec_out_crypto_data, 2484 esp_mp->b_cont, 0, payload_len); 2485 2486 /* specify IV */ 2487 io->ipsec_out_crypto_data.cd_miscdata = 2488 (char *)esp_mp->b_rptr + sizeof (esph_t) + 2489 esph_offset; 2490 2491 /* call the crypto framework */ 2492 kef_rc = crypto_encrypt(&assoc->ipsa_emech, 2493 &io->ipsec_out_crypto_data, 2494 &assoc->ipsa_kcfencrkey, encr_ctx_tmpl, 2495 NULL, &call_req); 2496 } 2497 } 2498 2499 if (do_auth && do_encr) { 2500 /* 2501 * Encryption and authentication: 2502 * Pass the pointer to the mblk chain starting at the ESP 2503 * header to the framework. Skip the ESP header mblk 2504 * for encryption, which is reflected by an encryption 2505 * offset equal to the length of that mblk. Start 2506 * the authentication at the ESP header, i.e. use an 2507 * authentication offset of zero. 2508 */ 2509 ESP_INIT_CRYPTO_DUAL_DATA(&io->ipsec_out_crypto_dual_data, 2510 esp_mp, MBLKL(esp_mp), payload_len, esph_offset, auth_len); 2511 2512 /* specify IV */ 2513 io->ipsec_out_crypto_dual_data.dd_miscdata = 2514 (char *)esp_mp->b_rptr + sizeof (esph_t) + esph_offset; 2515 2516 /* call the framework */ 2517 kef_rc = crypto_encrypt_mac(&assoc->ipsa_emech, 2518 &assoc->ipsa_amech, NULL, 2519 &assoc->ipsa_kcfencrkey, &assoc->ipsa_kcfauthkey, 2520 encr_ctx_tmpl, auth_ctx_tmpl, 2521 &io->ipsec_out_crypto_dual_data, 2522 &io->ipsec_out_crypto_mac, &call_req); 2523 } 2524 2525 switch (kef_rc) { 2526 case CRYPTO_SUCCESS: 2527 ESP_BUMP_STAT(espstack, crypto_sync); 2528 esp_set_usetime(assoc, B_FALSE); 2529 if (is_natt) 2530 esp_prepare_udp(ns, ipsec_mp->b_cont, 2531 (ipha_t *)ipsec_mp->b_cont->b_rptr); 2532 return (IPSEC_STATUS_SUCCESS); 2533 case CRYPTO_QUEUED: 2534 /* esp_kcf_callback() will be invoked on completion */ 2535 ESP_BUMP_STAT(espstack, crypto_async); 2536 return (IPSEC_STATUS_PENDING); 2537 } 2538 2539 esp_crypto_failed(ipsec_mp, B_TRUE, kef_rc, espstack); 2540 return (IPSEC_STATUS_FAILED); 2541 } 2542 2543 /* 2544 * Handle outbound IPsec processing for IPv4 and IPv6 2545 * On success returns B_TRUE, on failure returns B_FALSE and frees the 2546 * mblk chain ipsec_in_mp. 2547 */ 2548 static ipsec_status_t 2549 esp_outbound(mblk_t *mp) 2550 { 2551 mblk_t *ipsec_out_mp, *data_mp, *espmp, *tailmp; 2552 ipsec_out_t *io; 2553 ipha_t *ipha; 2554 ip6_t *ip6h; 2555 esph_t *esph; 2556 uint_t af; 2557 uint8_t *nhp; 2558 uintptr_t divpoint, datalen, adj, padlen, i, alloclen; 2559 uintptr_t esplen = sizeof (esph_t); 2560 uint8_t protocol; 2561 ipsa_t *assoc; 2562 uint_t iv_len, mac_len = 0; 2563 uchar_t *icv_buf; 2564 udpha_t *udpha; 2565 boolean_t is_natt = B_FALSE; 2566 netstack_t *ns; 2567 ipsecesp_stack_t *espstack; 2568 ipsec_stack_t *ipss; 2569 2570 ipsec_out_mp = mp; 2571 data_mp = ipsec_out_mp->b_cont; 2572 2573 io = (ipsec_out_t *)ipsec_out_mp->b_rptr; 2574 ns = io->ipsec_out_ns; 2575 espstack = ns->netstack_ipsecesp; 2576 ipss = ns->netstack_ipsec; 2577 2578 ESP_BUMP_STAT(espstack, out_requests); 2579 2580 /* 2581 * <sigh> We have to copy the message here, because TCP (for example) 2582 * keeps a dupb() of the message lying around for retransmission. 2583 * Since ESP changes the whole of the datagram, we have to create our 2584 * own copy lest we clobber TCP's data. Since we have to copy anyway, 2585 * we might as well make use of msgpullup() and get the mblk into one 2586 * contiguous piece! 2587 */ 2588 ipsec_out_mp->b_cont = msgpullup(data_mp, -1); 2589 if (ipsec_out_mp->b_cont == NULL) { 2590 esp0dbg(("esp_outbound: msgpullup() failed, " 2591 "dropping packet.\n")); 2592 ipsec_out_mp->b_cont = data_mp; 2593 /* 2594 * TODO: Find the outbound IRE for this packet and 2595 * pass it to ip_drop_packet(). 2596 */ 2597 ip_drop_packet(ipsec_out_mp, B_FALSE, NULL, NULL, 2598 DROPPER(ipss, ipds_esp_nomem), 2599 &espstack->esp_dropper); 2600 return (IPSEC_STATUS_FAILED); 2601 } else { 2602 freemsg(data_mp); 2603 data_mp = ipsec_out_mp->b_cont; 2604 } 2605 2606 /* 2607 * Reality check.... 2608 */ 2609 2610 ipha = (ipha_t *)data_mp->b_rptr; /* So we can call esp_acquire(). */ 2611 2612 if (io->ipsec_out_v4) { 2613 af = AF_INET; 2614 divpoint = IPH_HDR_LENGTH(ipha); 2615 datalen = ntohs(ipha->ipha_length) - divpoint; 2616 nhp = (uint8_t *)&ipha->ipha_protocol; 2617 } else { 2618 ip6_pkt_t ipp; 2619 2620 af = AF_INET6; 2621 ip6h = (ip6_t *)ipha; 2622 bzero(&ipp, sizeof (ipp)); 2623 divpoint = ip_find_hdr_v6(data_mp, ip6h, &ipp, NULL); 2624 if (ipp.ipp_dstopts != NULL && 2625 ipp.ipp_dstopts->ip6d_nxt != IPPROTO_ROUTING) { 2626 /* 2627 * Destination options are tricky. If we get in here, 2628 * then we have a terminal header following the 2629 * destination options. We need to adjust backwards 2630 * so we insert ESP BEFORE the destination options 2631 * bag. (So that the dstopts get encrypted!) 2632 * 2633 * Since this is for outbound packets only, we know 2634 * that non-terminal destination options only precede 2635 * routing headers. 2636 */ 2637 divpoint -= ipp.ipp_dstoptslen; 2638 } 2639 datalen = ntohs(ip6h->ip6_plen) + sizeof (ip6_t) - divpoint; 2640 2641 if (ipp.ipp_rthdr != NULL) { 2642 nhp = &ipp.ipp_rthdr->ip6r_nxt; 2643 } else if (ipp.ipp_hopopts != NULL) { 2644 nhp = &ipp.ipp_hopopts->ip6h_nxt; 2645 } else { 2646 ASSERT(divpoint == sizeof (ip6_t)); 2647 /* It's probably IP + ESP. */ 2648 nhp = &ip6h->ip6_nxt; 2649 } 2650 } 2651 assoc = io->ipsec_out_esp_sa; 2652 ASSERT(assoc != NULL); 2653 2654 if (assoc->ipsa_auth_alg != SADB_AALG_NONE) 2655 mac_len = assoc->ipsa_mac_len; 2656 2657 if (assoc->ipsa_flags & IPSA_F_NATT) { 2658 /* wedge in fake UDP */ 2659 is_natt = B_TRUE; 2660 esplen += UDPH_SIZE; 2661 } 2662 2663 /* 2664 * Set up ESP header and encryption padding for ENCR PI request. 2665 */ 2666 2667 /* Determine the padding length. Pad to 4-bytes for no-encryption. */ 2668 if (assoc->ipsa_encr_alg != SADB_EALG_NULL) { 2669 iv_len = assoc->ipsa_iv_len; 2670 2671 /* 2672 * Include the two additional bytes (hence the - 2) for the 2673 * padding length and the next header. Take this into account 2674 * when calculating the actual length of the padding. 2675 */ 2676 ASSERT(ISP2(iv_len)); 2677 padlen = ((unsigned)(iv_len - datalen - 2)) & (iv_len - 1); 2678 } else { 2679 iv_len = 0; 2680 padlen = ((unsigned)(sizeof (uint32_t) - datalen - 2)) & 2681 (sizeof (uint32_t) - 1); 2682 } 2683 2684 /* Allocate ESP header and IV. */ 2685 esplen += iv_len; 2686 2687 /* 2688 * Update association byte-count lifetimes. Don't forget to take 2689 * into account the padding length and next-header (hence the + 2). 2690 * 2691 * Use the amount of data fed into the "encryption algorithm". This 2692 * is the IV, the data length, the padding length, and the final two 2693 * bytes (padlen, and next-header). 2694 * 2695 */ 2696 2697 if (!esp_age_bytes(assoc, datalen + padlen + iv_len + 2, B_FALSE)) { 2698 /* 2699 * TODO: Find the outbound IRE for this packet and 2700 * pass it to ip_drop_packet(). 2701 */ 2702 ip_drop_packet(mp, B_FALSE, NULL, NULL, 2703 DROPPER(ipss, ipds_esp_bytes_expire), 2704 &espstack->esp_dropper); 2705 return (IPSEC_STATUS_FAILED); 2706 } 2707 2708 espmp = allocb(esplen, BPRI_HI); 2709 if (espmp == NULL) { 2710 ESP_BUMP_STAT(espstack, out_discards); 2711 esp1dbg(espstack, ("esp_outbound: can't allocate espmp.\n")); 2712 /* 2713 * TODO: Find the outbound IRE for this packet and 2714 * pass it to ip_drop_packet(). 2715 */ 2716 ip_drop_packet(mp, B_FALSE, NULL, NULL, 2717 DROPPER(ipss, ipds_esp_nomem), 2718 &espstack->esp_dropper); 2719 return (IPSEC_STATUS_FAILED); 2720 } 2721 espmp->b_wptr += esplen; 2722 esph = (esph_t *)espmp->b_rptr; 2723 2724 if (is_natt) { 2725 esp3dbg(espstack, ("esp_outbound: NATT")); 2726 2727 udpha = (udpha_t *)espmp->b_rptr; 2728 udpha->uha_src_port = (assoc->ipsa_local_nat_port != 0) ? 2729 assoc->ipsa_local_nat_port : htons(IPPORT_IKE_NATT); 2730 udpha->uha_dst_port = (assoc->ipsa_remote_nat_port != 0) ? 2731 assoc->ipsa_remote_nat_port : htons(IPPORT_IKE_NATT); 2732 /* 2733 * Set the checksum to 0, so that the esp_prepare_udp() call 2734 * can do the right thing. 2735 */ 2736 udpha->uha_checksum = 0; 2737 esph = (esph_t *)(udpha + 1); 2738 } 2739 2740 esph->esph_spi = assoc->ipsa_spi; 2741 2742 esph->esph_replay = htonl(atomic_add_32_nv(&assoc->ipsa_replay, 1)); 2743 if (esph->esph_replay == 0 && assoc->ipsa_replay_wsize != 0) { 2744 /* 2745 * XXX We have replay counter wrapping. 2746 * We probably want to nuke this SA (and its peer). 2747 */ 2748 ipsec_assocfailure(info.mi_idnum, 0, 0, 2749 SL_ERROR | SL_CONSOLE | SL_WARN, 2750 "Outbound ESP SA (0x%x, %s) has wrapped sequence.\n", 2751 esph->esph_spi, assoc->ipsa_dstaddr, af, 2752 espstack->ipsecesp_netstack); 2753 2754 ESP_BUMP_STAT(espstack, out_discards); 2755 sadb_replay_delete(assoc); 2756 /* 2757 * TODO: Find the outbound IRE for this packet and 2758 * pass it to ip_drop_packet(). 2759 */ 2760 ip_drop_packet(mp, B_FALSE, NULL, NULL, 2761 DROPPER(ipss, ipds_esp_replay), 2762 &espstack->esp_dropper); 2763 return (IPSEC_STATUS_FAILED); 2764 } 2765 2766 /* 2767 * Set the IV to a random quantity. We do not require the 2768 * highest quality random bits, but for best security with CBC 2769 * mode ciphers, the value must be unlikely to repeat and also 2770 * must not be known in advance to an adversary capable of 2771 * influencing the plaintext. 2772 */ 2773 (void) random_get_pseudo_bytes((uint8_t *)(esph + 1), iv_len); 2774 2775 /* Fix the IP header. */ 2776 alloclen = padlen + 2 + mac_len; 2777 adj = alloclen + (espmp->b_wptr - espmp->b_rptr); 2778 2779 protocol = *nhp; 2780 2781 if (io->ipsec_out_v4) { 2782 ipha->ipha_length = htons(ntohs(ipha->ipha_length) + adj); 2783 if (is_natt) { 2784 *nhp = IPPROTO_UDP; 2785 udpha->uha_length = htons(ntohs(ipha->ipha_length) - 2786 IPH_HDR_LENGTH(ipha)); 2787 } else { 2788 *nhp = IPPROTO_ESP; 2789 } 2790 ipha->ipha_hdr_checksum = 0; 2791 ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha); 2792 } else { 2793 ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) + adj); 2794 *nhp = IPPROTO_ESP; 2795 } 2796 2797 /* I've got the two ESP mblks, now insert them. */ 2798 2799 esp2dbg(espstack, ("data_mp before outbound ESP adjustment:\n")); 2800 esp2dbg(espstack, (dump_msg(data_mp))); 2801 2802 if (!esp_insert_esp(data_mp, espmp, divpoint, espstack)) { 2803 ESP_BUMP_STAT(espstack, out_discards); 2804 /* NOTE: esp_insert_esp() only fails if there's no memory. */ 2805 /* 2806 * TODO: Find the outbound IRE for this packet and 2807 * pass it to ip_drop_packet(). 2808 */ 2809 ip_drop_packet(mp, B_FALSE, NULL, NULL, 2810 DROPPER(ipss, ipds_esp_nomem), 2811 &espstack->esp_dropper); 2812 freeb(espmp); 2813 return (IPSEC_STATUS_FAILED); 2814 } 2815 2816 /* Append padding (and leave room for ICV). */ 2817 for (tailmp = data_mp; tailmp->b_cont != NULL; tailmp = tailmp->b_cont) 2818 ; 2819 if (tailmp->b_wptr + alloclen > tailmp->b_datap->db_lim) { 2820 tailmp->b_cont = allocb(alloclen, BPRI_HI); 2821 if (tailmp->b_cont == NULL) { 2822 ESP_BUMP_STAT(espstack, out_discards); 2823 esp0dbg(("esp_outbound: Can't allocate tailmp.\n")); 2824 /* 2825 * TODO: Find the outbound IRE for this packet and 2826 * pass it to ip_drop_packet(). 2827 */ 2828 ip_drop_packet(mp, B_FALSE, NULL, NULL, 2829 DROPPER(ipss, ipds_esp_nomem), 2830 &espstack->esp_dropper); 2831 return (IPSEC_STATUS_FAILED); 2832 } 2833 tailmp = tailmp->b_cont; 2834 } 2835 2836 /* 2837 * If there's padding, N bytes of padding must be of the form 0x1, 2838 * 0x2, 0x3... 0xN. 2839 */ 2840 for (i = 0; i < padlen; ) { 2841 i++; 2842 *tailmp->b_wptr++ = i; 2843 } 2844 *tailmp->b_wptr++ = i; 2845 *tailmp->b_wptr++ = protocol; 2846 2847 esp2dbg(espstack, ("data_Mp before encryption:\n")); 2848 esp2dbg(espstack, (dump_msg(data_mp))); 2849 2850 /* 2851 * The packet is eligible for hardware acceleration if the 2852 * following conditions are satisfied: 2853 * 2854 * 1. the packet will not be fragmented 2855 * 2. the provider supports the algorithms specified by SA 2856 * 3. there is no pending control message being exchanged 2857 * 4. snoop is not attached 2858 * 5. the destination address is not a multicast address 2859 * 2860 * All five of these conditions are checked by IP prior to 2861 * sending the packet to ESP. 2862 * 2863 * But We, and We Alone, can, nay MUST check if the packet 2864 * is over NATT, and then disqualify it from hardware 2865 * acceleration. 2866 */ 2867 2868 if (io->ipsec_out_is_capab_ill && !(assoc->ipsa_flags & IPSA_F_NATT)) { 2869 return (esp_outbound_accelerated(ipsec_out_mp, mac_len)); 2870 } 2871 ESP_BUMP_STAT(espstack, noaccel); 2872 2873 /* 2874 * Okay. I've set up the pre-encryption ESP. Let's do it! 2875 */ 2876 2877 if (mac_len > 0) { 2878 ASSERT(tailmp->b_wptr + mac_len <= tailmp->b_datap->db_lim); 2879 icv_buf = tailmp->b_wptr; 2880 tailmp->b_wptr += mac_len; 2881 } else { 2882 icv_buf = NULL; 2883 } 2884 2885 return (esp_submit_req_outbound(ipsec_out_mp, assoc, icv_buf, 2886 datalen + padlen + 2)); 2887 } 2888 2889 /* 2890 * IP calls this to validate the ICMP errors that 2891 * we got from the network. 2892 */ 2893 ipsec_status_t 2894 ipsecesp_icmp_error(mblk_t *ipsec_mp) 2895 { 2896 ipsec_in_t *ii = (ipsec_in_t *)ipsec_mp->b_rptr; 2897 boolean_t is_inbound = (ii->ipsec_in_type == IPSEC_IN); 2898 netstack_t *ns; 2899 ipsecesp_stack_t *espstack; 2900 ipsec_stack_t *ipss; 2901 2902 if (is_inbound) { 2903 ns = ii->ipsec_in_ns; 2904 } else { 2905 ipsec_out_t *io = (ipsec_out_t *)ipsec_mp->b_rptr; 2906 2907 ns = io->ipsec_out_ns; 2908 } 2909 espstack = ns->netstack_ipsecesp; 2910 ipss = ns->netstack_ipsec; 2911 2912 /* 2913 * Unless we get an entire packet back, this function is useless. 2914 * Why? 2915 * 2916 * 1.) Partial packets are useless, because the "next header" 2917 * is at the end of the decrypted ESP packet. Without the 2918 * whole packet, this is useless. 2919 * 2920 * 2.) If we every use a stateful cipher, such as a stream or a 2921 * one-time pad, we can't do anything. 2922 * 2923 * Since the chances of us getting an entire packet back are very 2924 * very small, we discard here. 2925 */ 2926 IP_ESP_BUMP_STAT(ipss, in_discards); 2927 ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL, 2928 DROPPER(ipss, ipds_esp_icmp), 2929 &espstack->esp_dropper); 2930 return (IPSEC_STATUS_FAILED); 2931 } 2932 2933 /* 2934 * ESP module read put routine. 2935 */ 2936 /* ARGSUSED */ 2937 static void 2938 ipsecesp_rput(queue_t *q, mblk_t *mp) 2939 { 2940 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; 2941 2942 ASSERT(mp->b_datap->db_type != M_CTL); /* No more IRE_DB_REQ. */ 2943 2944 switch (mp->b_datap->db_type) { 2945 case M_PROTO: 2946 case M_PCPROTO: 2947 /* TPI message of some sort. */ 2948 switch (*((t_scalar_t *)mp->b_rptr)) { 2949 case T_BIND_ACK: 2950 esp3dbg(espstack, 2951 ("Thank you IP from ESP for T_BIND_ACK\n")); 2952 break; 2953 case T_ERROR_ACK: 2954 cmn_err(CE_WARN, 2955 "ipsecesp: ESP received T_ERROR_ACK from IP."); 2956 /* 2957 * Make esp_sadb.s_ip_q NULL, and in the 2958 * future, perhaps try again. 2959 */ 2960 espstack->esp_sadb.s_ip_q = NULL; 2961 break; 2962 case T_OK_ACK: 2963 /* Probably from a (rarely sent) T_UNBIND_REQ. */ 2964 break; 2965 default: 2966 esp0dbg(("Unknown M_{,PC}PROTO message.\n")); 2967 } 2968 freemsg(mp); 2969 break; 2970 default: 2971 /* For now, passthru message. */ 2972 esp2dbg(espstack, ("ESP got unknown mblk type %d.\n", 2973 mp->b_datap->db_type)); 2974 putnext(q, mp); 2975 } 2976 } 2977 2978 /* 2979 * Construct an SADB_REGISTER message with the current algorithms. 2980 */ 2981 static boolean_t 2982 esp_register_out(uint32_t sequence, uint32_t pid, uint_t serial, 2983 ipsecesp_stack_t *espstack) 2984 { 2985 mblk_t *pfkey_msg_mp, *keysock_out_mp; 2986 sadb_msg_t *samsg; 2987 sadb_supported_t *sasupp_auth = NULL; 2988 sadb_supported_t *sasupp_encr = NULL; 2989 sadb_alg_t *saalg; 2990 uint_t allocsize = sizeof (*samsg); 2991 uint_t i, numalgs_snap; 2992 int current_aalgs; 2993 ipsec_alginfo_t **authalgs; 2994 uint_t num_aalgs; 2995 int current_ealgs; 2996 ipsec_alginfo_t **encralgs; 2997 uint_t num_ealgs; 2998 ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec; 2999 3000 /* Allocate the KEYSOCK_OUT. */ 3001 keysock_out_mp = sadb_keysock_out(serial); 3002 if (keysock_out_mp == NULL) { 3003 esp0dbg(("esp_register_out: couldn't allocate mblk.\n")); 3004 return (B_FALSE); 3005 } 3006 3007 /* 3008 * Allocate the PF_KEY message that follows KEYSOCK_OUT. 3009 */ 3010 3011 mutex_enter(&ipss->ipsec_alg_lock); 3012 3013 /* 3014 * Fill SADB_REGISTER message's algorithm descriptors. Hold 3015 * down the lock while filling it. 3016 * 3017 * Return only valid algorithms, so the number of algorithms 3018 * to send up may be less than the number of algorithm entries 3019 * in the table. 3020 */ 3021 authalgs = ipss->ipsec_alglists[IPSEC_ALG_AUTH]; 3022 for (num_aalgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++) 3023 if (authalgs[i] != NULL && ALG_VALID(authalgs[i])) 3024 num_aalgs++; 3025 3026 if (num_aalgs != 0) { 3027 allocsize += (num_aalgs * sizeof (*saalg)); 3028 allocsize += sizeof (*sasupp_auth); 3029 } 3030 encralgs = ipss->ipsec_alglists[IPSEC_ALG_ENCR]; 3031 for (num_ealgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++) 3032 if (encralgs[i] != NULL && ALG_VALID(encralgs[i])) 3033 num_ealgs++; 3034 3035 if (num_ealgs != 0) { 3036 allocsize += (num_ealgs * sizeof (*saalg)); 3037 allocsize += sizeof (*sasupp_encr); 3038 } 3039 keysock_out_mp->b_cont = allocb(allocsize, BPRI_HI); 3040 if (keysock_out_mp->b_cont == NULL) { 3041 mutex_exit(&ipss->ipsec_alg_lock); 3042 freemsg(keysock_out_mp); 3043 return (B_FALSE); 3044 } 3045 3046 pfkey_msg_mp = keysock_out_mp->b_cont; 3047 pfkey_msg_mp->b_wptr += allocsize; 3048 if (num_aalgs != 0) { 3049 sasupp_auth = (sadb_supported_t *) 3050 (pfkey_msg_mp->b_rptr + sizeof (*samsg)); 3051 saalg = (sadb_alg_t *)(sasupp_auth + 1); 3052 3053 ASSERT(((ulong_t)saalg & 0x7) == 0); 3054 3055 numalgs_snap = 0; 3056 for (i = 0; 3057 ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_aalgs)); 3058 i++) { 3059 if (authalgs[i] == NULL || !ALG_VALID(authalgs[i])) 3060 continue; 3061 3062 saalg->sadb_alg_id = authalgs[i]->alg_id; 3063 saalg->sadb_alg_ivlen = 0; 3064 saalg->sadb_alg_minbits = authalgs[i]->alg_ef_minbits; 3065 saalg->sadb_alg_maxbits = authalgs[i]->alg_ef_maxbits; 3066 saalg->sadb_x_alg_defincr = authalgs[i]->alg_ef_default; 3067 saalg->sadb_x_alg_increment = 3068 authalgs[i]->alg_increment; 3069 numalgs_snap++; 3070 saalg++; 3071 } 3072 ASSERT(numalgs_snap == num_aalgs); 3073 #ifdef DEBUG 3074 /* 3075 * Reality check to make sure I snagged all of the 3076 * algorithms. 3077 */ 3078 for (; i < IPSEC_MAX_ALGS; i++) { 3079 if (authalgs[i] != NULL && ALG_VALID(authalgs[i])) { 3080 cmn_err(CE_PANIC, "esp_register_out()! " 3081 "Missed aalg #%d.\n", i); 3082 } 3083 } 3084 #endif /* DEBUG */ 3085 } else { 3086 saalg = (sadb_alg_t *)(pfkey_msg_mp->b_rptr + sizeof (*samsg)); 3087 } 3088 3089 if (num_ealgs != 0) { 3090 sasupp_encr = (sadb_supported_t *)saalg; 3091 saalg = (sadb_alg_t *)(sasupp_encr + 1); 3092 3093 numalgs_snap = 0; 3094 for (i = 0; 3095 ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_ealgs)); i++) { 3096 if (encralgs[i] == NULL || !ALG_VALID(encralgs[i])) 3097 continue; 3098 saalg->sadb_alg_id = encralgs[i]->alg_id; 3099 saalg->sadb_alg_ivlen = encralgs[i]->alg_datalen; 3100 saalg->sadb_alg_minbits = encralgs[i]->alg_ef_minbits; 3101 saalg->sadb_alg_maxbits = encralgs[i]->alg_ef_maxbits; 3102 saalg->sadb_x_alg_defincr = encralgs[i]->alg_ef_default; 3103 saalg->sadb_x_alg_increment = 3104 encralgs[i]->alg_increment; 3105 numalgs_snap++; 3106 saalg++; 3107 } 3108 ASSERT(numalgs_snap == num_ealgs); 3109 #ifdef DEBUG 3110 /* 3111 * Reality check to make sure I snagged all of the 3112 * algorithms. 3113 */ 3114 for (; i < IPSEC_MAX_ALGS; i++) { 3115 if (encralgs[i] != NULL && ALG_VALID(encralgs[i])) { 3116 cmn_err(CE_PANIC, "esp_register_out()! " 3117 "Missed ealg #%d.\n", i); 3118 } 3119 } 3120 #endif /* DEBUG */ 3121 } 3122 3123 current_aalgs = num_aalgs; 3124 current_ealgs = num_ealgs; 3125 3126 mutex_exit(&ipss->ipsec_alg_lock); 3127 3128 /* Now fill the rest of the SADB_REGISTER message. */ 3129 3130 samsg = (sadb_msg_t *)pfkey_msg_mp->b_rptr; 3131 samsg->sadb_msg_version = PF_KEY_V2; 3132 samsg->sadb_msg_type = SADB_REGISTER; 3133 samsg->sadb_msg_errno = 0; 3134 samsg->sadb_msg_satype = SADB_SATYPE_ESP; 3135 samsg->sadb_msg_len = SADB_8TO64(allocsize); 3136 samsg->sadb_msg_reserved = 0; 3137 /* 3138 * Assume caller has sufficient sequence/pid number info. If it's one 3139 * from me over a new alg., I could give two hoots about sequence. 3140 */ 3141 samsg->sadb_msg_seq = sequence; 3142 samsg->sadb_msg_pid = pid; 3143 3144 if (sasupp_auth != NULL) { 3145 sasupp_auth->sadb_supported_len = SADB_8TO64( 3146 sizeof (*sasupp_auth) + sizeof (*saalg) * current_aalgs); 3147 sasupp_auth->sadb_supported_exttype = SADB_EXT_SUPPORTED_AUTH; 3148 sasupp_auth->sadb_supported_reserved = 0; 3149 } 3150 3151 if (sasupp_encr != NULL) { 3152 sasupp_encr->sadb_supported_len = SADB_8TO64( 3153 sizeof (*sasupp_encr) + sizeof (*saalg) * current_ealgs); 3154 sasupp_encr->sadb_supported_exttype = 3155 SADB_EXT_SUPPORTED_ENCRYPT; 3156 sasupp_encr->sadb_supported_reserved = 0; 3157 } 3158 3159 if (espstack->esp_pfkey_q != NULL) 3160 putnext(espstack->esp_pfkey_q, keysock_out_mp); 3161 else { 3162 freemsg(keysock_out_mp); 3163 return (B_FALSE); 3164 } 3165 3166 return (B_TRUE); 3167 } 3168 3169 /* 3170 * Invoked when the algorithm table changes. Causes SADB_REGISTER 3171 * messages continaining the current list of algorithms to be 3172 * sent up to the ESP listeners. 3173 */ 3174 void 3175 ipsecesp_algs_changed(netstack_t *ns) 3176 { 3177 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 3178 3179 /* 3180 * Time to send a PF_KEY SADB_REGISTER message to ESP listeners 3181 * everywhere. (The function itself checks for NULL esp_pfkey_q.) 3182 */ 3183 (void) esp_register_out(0, 0, 0, espstack); 3184 } 3185 3186 /* 3187 * Stub function that taskq_dispatch() invokes to take the mblk (in arg) 3188 * and put() it into AH and STREAMS again. 3189 */ 3190 static void 3191 inbound_task(void *arg) 3192 { 3193 esph_t *esph; 3194 mblk_t *mp = (mblk_t *)arg; 3195 ipsec_in_t *ii = (ipsec_in_t *)mp->b_rptr; 3196 netstack_t *ns; 3197 ipsecesp_stack_t *espstack; 3198 int ipsec_rc; 3199 3200 ns = netstack_find_by_stackid(ii->ipsec_in_stackid); 3201 if (ns == NULL || ns != ii->ipsec_in_ns) { 3202 /* Just freemsg(). */ 3203 if (ns != NULL) 3204 netstack_rele(ns); 3205 freemsg(mp); 3206 return; 3207 } 3208 3209 espstack = ns->netstack_ipsecesp; 3210 3211 esp2dbg(espstack, ("in ESP inbound_task")); 3212 ASSERT(espstack != NULL); 3213 3214 esph = ipsec_inbound_esp_sa(mp, ns); 3215 if (esph != NULL) { 3216 ASSERT(ii->ipsec_in_esp_sa != NULL); 3217 ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func(mp, esph); 3218 if (ipsec_rc == IPSEC_STATUS_SUCCESS) 3219 ip_fanout_proto_again(mp, NULL, NULL, NULL); 3220 } 3221 netstack_rele(ns); 3222 } 3223 3224 /* 3225 * Now that weak-key passed, actually ADD the security association, and 3226 * send back a reply ADD message. 3227 */ 3228 static int 3229 esp_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi, 3230 int *diagnostic, ipsecesp_stack_t *espstack) 3231 { 3232 isaf_t *primary = NULL, *secondary, *inbound, *outbound; 3233 sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA]; 3234 sadb_address_t *dstext = 3235 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST]; 3236 struct sockaddr_in *dst; 3237 struct sockaddr_in6 *dst6; 3238 boolean_t is_ipv4, clone = B_FALSE, is_inbound = B_FALSE; 3239 uint32_t *dstaddr; 3240 ipsa_t *larval = NULL; 3241 ipsacq_t *acqrec; 3242 iacqf_t *acq_bucket; 3243 mblk_t *acq_msgs = NULL; 3244 int rc; 3245 sadb_t *sp; 3246 int outhash; 3247 mblk_t *lpkt; 3248 ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec; 3249 3250 /* 3251 * Locate the appropriate table(s). 3252 */ 3253 3254 dst = (struct sockaddr_in *)(dstext + 1); 3255 dst6 = (struct sockaddr_in6 *)dst; 3256 is_ipv4 = (dst->sin_family == AF_INET); 3257 if (is_ipv4) { 3258 sp = &espstack->esp_sadb.s_v4; 3259 dstaddr = (uint32_t *)(&dst->sin_addr); 3260 outhash = OUTBOUND_HASH_V4(sp, *(ipaddr_t *)dstaddr); 3261 } else { 3262 sp = &espstack->esp_sadb.s_v6; 3263 dstaddr = (uint32_t *)(&dst6->sin6_addr); 3264 outhash = OUTBOUND_HASH_V6(sp, *(in6_addr_t *)dstaddr); 3265 } 3266 3267 inbound = INBOUND_BUCKET(sp, assoc->sadb_sa_spi); 3268 outbound = &sp->sdb_of[outhash]; 3269 3270 /* 3271 * Use the direction flags provided by the KMD to determine 3272 * if the inbound or outbound table should be the primary 3273 * for this SA. If these flags were absent then make this 3274 * decision based on the addresses. 3275 */ 3276 if (assoc->sadb_sa_flags & IPSA_F_INBOUND) { 3277 primary = inbound; 3278 secondary = outbound; 3279 is_inbound = B_TRUE; 3280 if (assoc->sadb_sa_flags & IPSA_F_OUTBOUND) 3281 clone = B_TRUE; 3282 } else { 3283 if (assoc->sadb_sa_flags & IPSA_F_OUTBOUND) { 3284 primary = outbound; 3285 secondary = inbound; 3286 } 3287 } 3288 3289 if (primary == NULL) { 3290 /* 3291 * The KMD did not set a direction flag, determine which 3292 * table to insert the SA into based on addresses. 3293 */ 3294 switch (ksi->ks_in_dsttype) { 3295 case KS_IN_ADDR_MBCAST: 3296 clone = B_TRUE; /* All mcast SAs can be bidirectional */ 3297 assoc->sadb_sa_flags |= IPSA_F_OUTBOUND; 3298 /* FALLTHRU */ 3299 /* 3300 * If the source address is either one of mine, or unspecified 3301 * (which is best summed up by saying "not 'not mine'"), 3302 * then the association is potentially bi-directional, 3303 * in that it can be used for inbound traffic and outbound 3304 * traffic. The best example of such an SA is a multicast 3305 * SA (which allows me to receive the outbound traffic). 3306 */ 3307 case KS_IN_ADDR_ME: 3308 assoc->sadb_sa_flags |= IPSA_F_INBOUND; 3309 primary = inbound; 3310 secondary = outbound; 3311 if (ksi->ks_in_srctype != KS_IN_ADDR_NOTME) 3312 clone = B_TRUE; 3313 is_inbound = B_TRUE; 3314 break; 3315 /* 3316 * If the source address literally not mine (either 3317 * unspecified or not mine), then this SA may have an 3318 * address that WILL be mine after some configuration. 3319 * We pay the price for this by making it a bi-directional 3320 * SA. 3321 */ 3322 case KS_IN_ADDR_NOTME: 3323 assoc->sadb_sa_flags |= IPSA_F_OUTBOUND; 3324 primary = outbound; 3325 secondary = inbound; 3326 if (ksi->ks_in_srctype != KS_IN_ADDR_ME) { 3327 assoc->sadb_sa_flags |= IPSA_F_INBOUND; 3328 clone = B_TRUE; 3329 } 3330 break; 3331 default: 3332 *diagnostic = SADB_X_DIAGNOSTIC_BAD_DST; 3333 return (EINVAL); 3334 } 3335 } 3336 3337 /* 3338 * Find a ACQUIRE list entry if possible. If we've added an SA that 3339 * suits the needs of an ACQUIRE list entry, we can eliminate the 3340 * ACQUIRE list entry and transmit the enqueued packets. Use the 3341 * high-bit of the sequence number to queue it. Key off destination 3342 * addr, and change acqrec's state. 3343 */ 3344 3345 if (samsg->sadb_msg_seq & IACQF_LOWEST_SEQ) { 3346 acq_bucket = &sp->sdb_acq[outhash]; 3347 mutex_enter(&acq_bucket->iacqf_lock); 3348 for (acqrec = acq_bucket->iacqf_ipsacq; acqrec != NULL; 3349 acqrec = acqrec->ipsacq_next) { 3350 mutex_enter(&acqrec->ipsacq_lock); 3351 /* 3352 * Q: I only check sequence. Should I check dst? 3353 * A: Yes, check dest because those are the packets 3354 * that are queued up. 3355 */ 3356 if (acqrec->ipsacq_seq == samsg->sadb_msg_seq && 3357 IPSA_ARE_ADDR_EQUAL(dstaddr, 3358 acqrec->ipsacq_dstaddr, acqrec->ipsacq_addrfam)) 3359 break; 3360 mutex_exit(&acqrec->ipsacq_lock); 3361 } 3362 if (acqrec != NULL) { 3363 /* 3364 * AHA! I found an ACQUIRE record for this SA. 3365 * Grab the msg list, and free the acquire record. 3366 * I already am holding the lock for this record, 3367 * so all I have to do is free it. 3368 */ 3369 acq_msgs = acqrec->ipsacq_mp; 3370 acqrec->ipsacq_mp = NULL; 3371 mutex_exit(&acqrec->ipsacq_lock); 3372 sadb_destroy_acquire(acqrec, 3373 espstack->ipsecesp_netstack); 3374 } 3375 mutex_exit(&acq_bucket->iacqf_lock); 3376 } 3377 3378 /* 3379 * Find PF_KEY message, and see if I'm an update. If so, find entry 3380 * in larval list (if there). 3381 */ 3382 3383 if (samsg->sadb_msg_type == SADB_UPDATE) { 3384 mutex_enter(&inbound->isaf_lock); 3385 larval = ipsec_getassocbyspi(inbound, assoc->sadb_sa_spi, 3386 ALL_ZEROES_PTR, dstaddr, dst->sin_family); 3387 mutex_exit(&inbound->isaf_lock); 3388 3389 if ((larval == NULL) || 3390 (larval->ipsa_state != IPSA_STATE_LARVAL)) { 3391 *diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND; 3392 if (larval != NULL) { 3393 IPSA_REFRELE(larval); 3394 } 3395 esp0dbg(("Larval update, but larval disappeared.\n")); 3396 return (ESRCH); 3397 } /* Else sadb_common_add unlinks it for me! */ 3398 } 3399 3400 lpkt = NULL; 3401 if (larval != NULL) 3402 lpkt = sadb_clear_lpkt(larval); 3403 3404 rc = sadb_common_add(espstack->esp_sadb.s_ip_q, espstack->esp_pfkey_q, 3405 mp, samsg, ksi, primary, secondary, larval, clone, is_inbound, 3406 diagnostic, espstack->ipsecesp_netstack, &espstack->esp_sadb); 3407 3408 if (rc == 0 && lpkt != NULL) 3409 rc = !taskq_dispatch(esp_taskq, inbound_task, lpkt, TQ_NOSLEEP); 3410 3411 if (rc != 0) { 3412 ip_drop_packet(lpkt, B_TRUE, NULL, NULL, 3413 DROPPER(ipss, ipds_sadb_inlarval_timeout), 3414 &espstack->esp_dropper); 3415 } 3416 3417 /* 3418 * How much more stack will I create with all of these 3419 * esp_outbound() calls? 3420 */ 3421 3422 while (acq_msgs != NULL) { 3423 mblk_t *mp = acq_msgs; 3424 3425 acq_msgs = acq_msgs->b_next; 3426 mp->b_next = NULL; 3427 if (rc == 0) { 3428 if (ipsec_outbound_sa(mp, IPPROTO_ESP)) { 3429 ((ipsec_out_t *)(mp->b_rptr))-> 3430 ipsec_out_esp_done = B_TRUE; 3431 if (esp_outbound(mp) == IPSEC_STATUS_SUCCESS) { 3432 ipha_t *ipha; 3433 3434 /* do AH processing if needed */ 3435 if (!esp_do_outbound_ah(mp)) 3436 continue; 3437 3438 ipha = (ipha_t *)mp->b_cont->b_rptr; 3439 3440 /* finish IPsec processing */ 3441 if (is_ipv4) { 3442 ip_wput_ipsec_out(NULL, mp, 3443 ipha, NULL, NULL); 3444 } else { 3445 ip6_t *ip6h = (ip6_t *)ipha; 3446 ip_wput_ipsec_out_v6(NULL, 3447 mp, ip6h, NULL, NULL); 3448 } 3449 } 3450 continue; 3451 } 3452 } 3453 ESP_BUMP_STAT(espstack, out_discards); 3454 ip_drop_packet(mp, B_FALSE, NULL, NULL, 3455 DROPPER(ipss, ipds_sadb_acquire_timeout), 3456 &espstack->esp_dropper); 3457 } 3458 3459 return (rc); 3460 } 3461 3462 /* 3463 * Add new ESP security association. This may become a generic AH/ESP 3464 * routine eventually. 3465 */ 3466 static int 3467 esp_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, netstack_t *ns) 3468 { 3469 sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA]; 3470 sadb_address_t *srcext = 3471 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC]; 3472 sadb_address_t *dstext = 3473 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST]; 3474 sadb_address_t *isrcext = 3475 (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC]; 3476 sadb_address_t *idstext = 3477 (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST]; 3478 sadb_address_t *nttext_loc = 3479 (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC]; 3480 sadb_address_t *nttext_rem = 3481 (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM]; 3482 sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH]; 3483 sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT]; 3484 struct sockaddr_in *src, *dst; 3485 struct sockaddr_in *natt_loc, *natt_rem; 3486 struct sockaddr_in6 *natt_loc6, *natt_rem6; 3487 sadb_lifetime_t *soft = 3488 (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT]; 3489 sadb_lifetime_t *hard = 3490 (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD]; 3491 sadb_lifetime_t *idle = 3492 (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE]; 3493 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 3494 ipsec_stack_t *ipss = ns->netstack_ipsec; 3495 3496 /* I need certain extensions present for an ADD message. */ 3497 if (srcext == NULL) { 3498 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC; 3499 return (EINVAL); 3500 } 3501 if (dstext == NULL) { 3502 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST; 3503 return (EINVAL); 3504 } 3505 if (isrcext == NULL && idstext != NULL) { 3506 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC; 3507 return (EINVAL); 3508 } 3509 if (isrcext != NULL && idstext == NULL) { 3510 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST; 3511 return (EINVAL); 3512 } 3513 if (assoc == NULL) { 3514 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA; 3515 return (EINVAL); 3516 } 3517 if (ekey == NULL && assoc->sadb_sa_encrypt != SADB_EALG_NULL) { 3518 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_EKEY; 3519 return (EINVAL); 3520 } 3521 3522 src = (struct sockaddr_in *)(srcext + 1); 3523 dst = (struct sockaddr_in *)(dstext + 1); 3524 natt_loc = (struct sockaddr_in *)(nttext_loc + 1); 3525 natt_loc6 = (struct sockaddr_in6 *)(nttext_loc + 1); 3526 natt_rem = (struct sockaddr_in *)(nttext_rem + 1); 3527 natt_rem6 = (struct sockaddr_in6 *)(nttext_rem + 1); 3528 3529 /* Sundry ADD-specific reality checks. */ 3530 /* XXX STATS : Logging/stats here? */ 3531 3532 if ((assoc->sadb_sa_state != SADB_SASTATE_MATURE) && 3533 (assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE_ELSEWHERE)) { 3534 *diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE; 3535 return (EINVAL); 3536 } 3537 if (assoc->sadb_sa_encrypt == SADB_EALG_NONE) { 3538 *diagnostic = SADB_X_DIAGNOSTIC_BAD_EALG; 3539 return (EINVAL); 3540 } 3541 3542 if (assoc->sadb_sa_encrypt == SADB_EALG_NULL && 3543 assoc->sadb_sa_auth == SADB_AALG_NONE) { 3544 *diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG; 3545 return (EINVAL); 3546 } 3547 3548 if (assoc->sadb_sa_flags & ~espstack->esp_sadb.s_addflags) { 3549 *diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS; 3550 return (EINVAL); 3551 } 3552 3553 if ((*diagnostic = sadb_hardsoftchk(hard, soft, idle)) != 0) { 3554 return (EINVAL); 3555 } 3556 ASSERT(src->sin_family == dst->sin_family); 3557 3558 if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_NATT_LOC) { 3559 if (nttext_loc == NULL) { 3560 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_LOC; 3561 return (EINVAL); 3562 } 3563 3564 if (natt_loc->sin_family == AF_INET6 && 3565 !IN6_IS_ADDR_V4MAPPED(&natt_loc6->sin6_addr)) { 3566 *diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_NATT_LOC; 3567 return (EINVAL); 3568 } 3569 } 3570 3571 if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_NATT_REM) { 3572 if (nttext_rem == NULL) { 3573 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_REM; 3574 return (EINVAL); 3575 } 3576 if (natt_rem->sin_family == AF_INET6 && 3577 !IN6_IS_ADDR_V4MAPPED(&natt_rem6->sin6_addr)) { 3578 *diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_NATT_REM; 3579 return (EINVAL); 3580 } 3581 } 3582 3583 3584 /* Stuff I don't support, for now. XXX Diagnostic? */ 3585 if (ksi->ks_in_extv[SADB_EXT_LIFETIME_CURRENT] != NULL || 3586 ksi->ks_in_extv[SADB_EXT_SENSITIVITY] != NULL) 3587 return (EOPNOTSUPP); 3588 3589 /* 3590 * XXX Policy : I'm not checking identities or sensitivity 3591 * labels at this time, but if I did, I'd do them here, before I sent 3592 * the weak key check up to the algorithm. 3593 */ 3594 3595 mutex_enter(&ipss->ipsec_alg_lock); 3596 3597 /* 3598 * First locate the authentication algorithm. 3599 */ 3600 if (akey != NULL) { 3601 ipsec_alginfo_t *aalg; 3602 3603 aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH] 3604 [assoc->sadb_sa_auth]; 3605 if (aalg == NULL || !ALG_VALID(aalg)) { 3606 mutex_exit(&ipss->ipsec_alg_lock); 3607 esp1dbg(espstack, ("Couldn't find auth alg #%d.\n", 3608 assoc->sadb_sa_auth)); 3609 *diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG; 3610 return (EINVAL); 3611 } 3612 3613 /* 3614 * Sanity check key sizes. 3615 * Note: It's not possible to use SADB_AALG_NONE because 3616 * this auth_alg is not defined with ALG_FLAG_VALID. If this 3617 * ever changes, the same check for SADB_AALG_NONE and 3618 * a auth_key != NULL should be made here ( see below). 3619 */ 3620 if (!ipsec_valid_key_size(akey->sadb_key_bits, aalg)) { 3621 mutex_exit(&ipss->ipsec_alg_lock); 3622 *diagnostic = SADB_X_DIAGNOSTIC_BAD_AKEYBITS; 3623 return (EINVAL); 3624 } 3625 ASSERT(aalg->alg_mech_type != CRYPTO_MECHANISM_INVALID); 3626 3627 /* check key and fix parity if needed */ 3628 if (ipsec_check_key(aalg->alg_mech_type, akey, B_TRUE, 3629 diagnostic) != 0) { 3630 mutex_exit(&ipss->ipsec_alg_lock); 3631 return (EINVAL); 3632 } 3633 } 3634 3635 /* 3636 * Then locate the encryption algorithm. 3637 */ 3638 if (ekey != NULL) { 3639 ipsec_alginfo_t *ealg; 3640 3641 ealg = ipss->ipsec_alglists[IPSEC_ALG_ENCR] 3642 [assoc->sadb_sa_encrypt]; 3643 if (ealg == NULL || !ALG_VALID(ealg)) { 3644 mutex_exit(&ipss->ipsec_alg_lock); 3645 esp1dbg(espstack, ("Couldn't find encr alg #%d.\n", 3646 assoc->sadb_sa_encrypt)); 3647 *diagnostic = SADB_X_DIAGNOSTIC_BAD_EALG; 3648 return (EINVAL); 3649 } 3650 3651 /* 3652 * Sanity check key sizes. If the encryption algorithm is 3653 * SADB_EALG_NULL but the encryption key is NOT 3654 * NULL then complain. 3655 */ 3656 if ((assoc->sadb_sa_encrypt == SADB_EALG_NULL) || 3657 (!ipsec_valid_key_size(ekey->sadb_key_bits, ealg))) { 3658 mutex_exit(&ipss->ipsec_alg_lock); 3659 *diagnostic = SADB_X_DIAGNOSTIC_BAD_EKEYBITS; 3660 return (EINVAL); 3661 } 3662 ASSERT(ealg->alg_mech_type != CRYPTO_MECHANISM_INVALID); 3663 3664 /* check key */ 3665 if (ipsec_check_key(ealg->alg_mech_type, ekey, B_FALSE, 3666 diagnostic) != 0) { 3667 mutex_exit(&ipss->ipsec_alg_lock); 3668 return (EINVAL); 3669 } 3670 } 3671 mutex_exit(&ipss->ipsec_alg_lock); 3672 3673 return (esp_add_sa_finish(mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi, 3674 diagnostic, espstack)); 3675 } 3676 3677 /* 3678 * Update a security association. Updates come in two varieties. The first 3679 * is an update of lifetimes on a non-larval SA. The second is an update of 3680 * a larval SA, which ends up looking a lot more like an add. 3681 */ 3682 static int 3683 esp_update_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, 3684 ipsecesp_stack_t *espstack, uint8_t sadb_msg_type) 3685 { 3686 sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA]; 3687 mblk_t *buf_pkt; 3688 int rcode; 3689 3690 sadb_address_t *dstext = 3691 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST]; 3692 3693 if (dstext == NULL) { 3694 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST; 3695 return (EINVAL); 3696 } 3697 3698 rcode = sadb_update_sa(mp, ksi, &buf_pkt, &espstack->esp_sadb, 3699 diagnostic, espstack->esp_pfkey_q, esp_add_sa, 3700 espstack->ipsecesp_netstack, sadb_msg_type); 3701 3702 if ((assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE) || 3703 (rcode != 0)) { 3704 return (rcode); 3705 } 3706 3707 HANDLE_BUF_PKT(esp_taskq, espstack->ipsecesp_netstack->netstack_ipsec, 3708 espstack->esp_dropper, buf_pkt); 3709 3710 return (rcode); 3711 } 3712 3713 /* 3714 * Delete a security association. This is REALLY likely to be code common to 3715 * both AH and ESP. Find the association, then unlink it. 3716 */ 3717 static int 3718 esp_del_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, 3719 ipsecesp_stack_t *espstack, uint8_t sadb_msg_type) 3720 { 3721 sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA]; 3722 sadb_address_t *dstext = 3723 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST]; 3724 sadb_address_t *srcext = 3725 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC]; 3726 struct sockaddr_in *sin; 3727 3728 if (assoc == NULL) { 3729 if (dstext != NULL) { 3730 sin = (struct sockaddr_in *)(dstext + 1); 3731 } else if (srcext != NULL) { 3732 sin = (struct sockaddr_in *)(srcext + 1); 3733 } else { 3734 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA; 3735 return (EINVAL); 3736 } 3737 return (sadb_purge_sa(mp, ksi, 3738 (sin->sin_family == AF_INET6) ? &espstack->esp_sadb.s_v6 : 3739 &espstack->esp_sadb.s_v4, espstack->esp_pfkey_q, 3740 espstack->esp_sadb.s_ip_q)); 3741 } 3742 3743 return (sadb_delget_sa(mp, ksi, &espstack->esp_sadb, diagnostic, 3744 espstack->esp_pfkey_q, sadb_msg_type)); 3745 } 3746 3747 /* 3748 * Convert the entire contents of all of ESP's SA tables into PF_KEY SADB_DUMP 3749 * messages. 3750 */ 3751 static void 3752 esp_dump(mblk_t *mp, keysock_in_t *ksi, ipsecesp_stack_t *espstack) 3753 { 3754 int error; 3755 sadb_msg_t *samsg; 3756 3757 /* 3758 * Dump each fanout, bailing if error is non-zero. 3759 */ 3760 3761 error = sadb_dump(espstack->esp_pfkey_q, mp, ksi, 3762 &espstack->esp_sadb.s_v4); 3763 if (error != 0) 3764 goto bail; 3765 3766 error = sadb_dump(espstack->esp_pfkey_q, mp, ksi, 3767 &espstack->esp_sadb.s_v6); 3768 bail: 3769 ASSERT(mp->b_cont != NULL); 3770 samsg = (sadb_msg_t *)mp->b_cont->b_rptr; 3771 samsg->sadb_msg_errno = (uint8_t)error; 3772 sadb_pfkey_echo(espstack->esp_pfkey_q, mp, 3773 (sadb_msg_t *)mp->b_cont->b_rptr, ksi, NULL); 3774 } 3775 3776 /* 3777 * First-cut reality check for an inbound PF_KEY message. 3778 */ 3779 static boolean_t 3780 esp_pfkey_reality_failures(mblk_t *mp, keysock_in_t *ksi, 3781 ipsecesp_stack_t *espstack) 3782 { 3783 int diagnostic; 3784 3785 if (ksi->ks_in_extv[SADB_EXT_PROPOSAL] != NULL) { 3786 diagnostic = SADB_X_DIAGNOSTIC_PROP_PRESENT; 3787 goto badmsg; 3788 } 3789 if (ksi->ks_in_extv[SADB_EXT_SUPPORTED_AUTH] != NULL || 3790 ksi->ks_in_extv[SADB_EXT_SUPPORTED_ENCRYPT] != NULL) { 3791 diagnostic = SADB_X_DIAGNOSTIC_SUPP_PRESENT; 3792 goto badmsg; 3793 } 3794 return (B_FALSE); /* False ==> no failures */ 3795 3796 badmsg: 3797 sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL, diagnostic, 3798 ksi->ks_in_serial); 3799 return (B_TRUE); /* True ==> failures */ 3800 } 3801 3802 /* 3803 * ESP parsing of PF_KEY messages. Keysock did most of the really silly 3804 * error cases. What I receive is a fully-formed, syntactically legal 3805 * PF_KEY message. I then need to check semantics... 3806 * 3807 * This code may become common to AH and ESP. Stay tuned. 3808 * 3809 * I also make the assumption that db_ref's are cool. If this assumption 3810 * is wrong, this means that someone other than keysock or me has been 3811 * mucking with PF_KEY messages. 3812 */ 3813 static void 3814 esp_parse_pfkey(mblk_t *mp, ipsecesp_stack_t *espstack) 3815 { 3816 mblk_t *msg = mp->b_cont; 3817 sadb_msg_t *samsg; 3818 keysock_in_t *ksi; 3819 int error; 3820 int diagnostic = SADB_X_DIAGNOSTIC_NONE; 3821 3822 ASSERT(msg != NULL); 3823 3824 samsg = (sadb_msg_t *)msg->b_rptr; 3825 ksi = (keysock_in_t *)mp->b_rptr; 3826 3827 /* 3828 * If applicable, convert unspecified AF_INET6 to unspecified 3829 * AF_INET. And do other address reality checks. 3830 */ 3831 if (!sadb_addrfix(ksi, espstack->esp_pfkey_q, mp, 3832 espstack->ipsecesp_netstack) || 3833 esp_pfkey_reality_failures(mp, ksi, espstack)) { 3834 return; 3835 } 3836 3837 switch (samsg->sadb_msg_type) { 3838 case SADB_ADD: 3839 error = esp_add_sa(mp, ksi, &diagnostic, 3840 espstack->ipsecesp_netstack); 3841 if (error != 0) { 3842 sadb_pfkey_error(espstack->esp_pfkey_q, mp, error, 3843 diagnostic, ksi->ks_in_serial); 3844 } 3845 /* else esp_add_sa() took care of things. */ 3846 break; 3847 case SADB_DELETE: 3848 case SADB_X_DELPAIR: 3849 case SADB_X_DELPAIR_STATE: 3850 error = esp_del_sa(mp, ksi, &diagnostic, espstack, 3851 samsg->sadb_msg_type); 3852 if (error != 0) { 3853 sadb_pfkey_error(espstack->esp_pfkey_q, mp, error, 3854 diagnostic, ksi->ks_in_serial); 3855 } 3856 /* Else esp_del_sa() took care of things. */ 3857 break; 3858 case SADB_GET: 3859 error = sadb_delget_sa(mp, ksi, &espstack->esp_sadb, 3860 &diagnostic, espstack->esp_pfkey_q, samsg->sadb_msg_type); 3861 if (error != 0) { 3862 sadb_pfkey_error(espstack->esp_pfkey_q, mp, error, 3863 diagnostic, ksi->ks_in_serial); 3864 } 3865 /* Else sadb_get_sa() took care of things. */ 3866 break; 3867 case SADB_FLUSH: 3868 sadbp_flush(&espstack->esp_sadb, espstack->ipsecesp_netstack); 3869 sadb_pfkey_echo(espstack->esp_pfkey_q, mp, samsg, ksi, NULL); 3870 break; 3871 case SADB_REGISTER: 3872 /* 3873 * Hmmm, let's do it! Check for extensions (there should 3874 * be none), extract the fields, call esp_register_out(), 3875 * then either free or report an error. 3876 * 3877 * Keysock takes care of the PF_KEY bookkeeping for this. 3878 */ 3879 if (esp_register_out(samsg->sadb_msg_seq, samsg->sadb_msg_pid, 3880 ksi->ks_in_serial, espstack)) { 3881 freemsg(mp); 3882 } else { 3883 /* 3884 * Only way this path hits is if there is a memory 3885 * failure. It will not return B_FALSE because of 3886 * lack of esp_pfkey_q if I am in wput(). 3887 */ 3888 sadb_pfkey_error(espstack->esp_pfkey_q, mp, ENOMEM, 3889 diagnostic, ksi->ks_in_serial); 3890 } 3891 break; 3892 case SADB_UPDATE: 3893 case SADB_X_UPDATEPAIR: 3894 /* 3895 * Find a larval, if not there, find a full one and get 3896 * strict. 3897 */ 3898 error = esp_update_sa(mp, ksi, &diagnostic, espstack, 3899 samsg->sadb_msg_type); 3900 if (error != 0) { 3901 sadb_pfkey_error(espstack->esp_pfkey_q, mp, error, 3902 diagnostic, ksi->ks_in_serial); 3903 } 3904 /* else esp_update_sa() took care of things. */ 3905 break; 3906 case SADB_GETSPI: 3907 /* 3908 * Reserve a new larval entry. 3909 */ 3910 esp_getspi(mp, ksi, espstack); 3911 break; 3912 case SADB_ACQUIRE: 3913 /* 3914 * Find larval and/or ACQUIRE record and kill it (them), I'm 3915 * most likely an error. Inbound ACQUIRE messages should only 3916 * have the base header. 3917 */ 3918 sadb_in_acquire(samsg, &espstack->esp_sadb, 3919 espstack->esp_pfkey_q, espstack->ipsecesp_netstack); 3920 freemsg(mp); 3921 break; 3922 case SADB_DUMP: 3923 /* 3924 * Dump all entries. 3925 */ 3926 esp_dump(mp, ksi, espstack); 3927 /* esp_dump will take care of the return message, etc. */ 3928 break; 3929 case SADB_EXPIRE: 3930 /* Should never reach me. */ 3931 sadb_pfkey_error(espstack->esp_pfkey_q, mp, EOPNOTSUPP, 3932 diagnostic, ksi->ks_in_serial); 3933 break; 3934 default: 3935 sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL, 3936 SADB_X_DIAGNOSTIC_UNKNOWN_MSG, ksi->ks_in_serial); 3937 break; 3938 } 3939 } 3940 3941 /* 3942 * Handle case where PF_KEY says it can't find a keysock for one of my 3943 * ACQUIRE messages. 3944 */ 3945 static void 3946 esp_keysock_no_socket(mblk_t *mp, ipsecesp_stack_t *espstack) 3947 { 3948 sadb_msg_t *samsg; 3949 keysock_out_err_t *kse = (keysock_out_err_t *)mp->b_rptr; 3950 3951 if (mp->b_cont == NULL) { 3952 freemsg(mp); 3953 return; 3954 } 3955 samsg = (sadb_msg_t *)mp->b_cont->b_rptr; 3956 3957 /* 3958 * If keysock can't find any registered, delete the acquire record 3959 * immediately, and handle errors. 3960 */ 3961 if (samsg->sadb_msg_type == SADB_ACQUIRE) { 3962 samsg->sadb_msg_errno = kse->ks_err_errno; 3963 samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg)); 3964 /* 3965 * Use the write-side of the esp_pfkey_q, in case there is 3966 * no esp_sadb.s_ip_q. 3967 */ 3968 sadb_in_acquire(samsg, &espstack->esp_sadb, 3969 WR(espstack->esp_pfkey_q), espstack->ipsecesp_netstack); 3970 } 3971 3972 freemsg(mp); 3973 } 3974 3975 /* 3976 * ESP module write put routine. 3977 */ 3978 static void 3979 ipsecesp_wput(queue_t *q, mblk_t *mp) 3980 { 3981 ipsec_info_t *ii; 3982 struct iocblk *iocp; 3983 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; 3984 3985 esp3dbg(espstack, ("In esp_wput().\n")); 3986 3987 /* NOTE: Each case must take care of freeing or passing mp. */ 3988 switch (mp->b_datap->db_type) { 3989 case M_CTL: 3990 if ((mp->b_wptr - mp->b_rptr) < sizeof (ipsec_info_t)) { 3991 /* Not big enough message. */ 3992 freemsg(mp); 3993 break; 3994 } 3995 ii = (ipsec_info_t *)mp->b_rptr; 3996 3997 switch (ii->ipsec_info_type) { 3998 case KEYSOCK_OUT_ERR: 3999 esp1dbg(espstack, ("Got KEYSOCK_OUT_ERR message.\n")); 4000 esp_keysock_no_socket(mp, espstack); 4001 break; 4002 case KEYSOCK_IN: 4003 ESP_BUMP_STAT(espstack, keysock_in); 4004 esp3dbg(espstack, ("Got KEYSOCK_IN message.\n")); 4005 4006 /* Parse the message. */ 4007 esp_parse_pfkey(mp, espstack); 4008 break; 4009 case KEYSOCK_HELLO: 4010 sadb_keysock_hello(&espstack->esp_pfkey_q, q, mp, 4011 esp_ager, (void *)espstack, &espstack->esp_event, 4012 SADB_SATYPE_ESP); 4013 break; 4014 default: 4015 esp2dbg(espstack, ("Got M_CTL from above of 0x%x.\n", 4016 ii->ipsec_info_type)); 4017 freemsg(mp); 4018 break; 4019 } 4020 break; 4021 case M_IOCTL: 4022 iocp = (struct iocblk *)mp->b_rptr; 4023 switch (iocp->ioc_cmd) { 4024 case ND_SET: 4025 case ND_GET: 4026 if (nd_getset(q, espstack->ipsecesp_g_nd, mp)) { 4027 qreply(q, mp); 4028 return; 4029 } else { 4030 iocp->ioc_error = ENOENT; 4031 } 4032 /* FALLTHRU */ 4033 default: 4034 /* We really don't support any other ioctls, do we? */ 4035 4036 /* Return EINVAL */ 4037 if (iocp->ioc_error != ENOENT) 4038 iocp->ioc_error = EINVAL; 4039 iocp->ioc_count = 0; 4040 mp->b_datap->db_type = M_IOCACK; 4041 qreply(q, mp); 4042 return; 4043 } 4044 default: 4045 esp3dbg(espstack, 4046 ("Got default message, type %d, passing to IP.\n", 4047 mp->b_datap->db_type)); 4048 putnext(q, mp); 4049 } 4050 } 4051 4052 /* 4053 * Process an outbound ESP packet that can be accelerated by a IPsec 4054 * hardware acceleration capable Provider. 4055 * The caller already inserted and initialized the ESP header. 4056 * This function allocates a tagging M_CTL, and adds room at the end 4057 * of the packet to hold the ICV if authentication is needed. 4058 * 4059 * On success returns B_TRUE, on failure returns B_FALSE and frees the 4060 * mblk chain ipsec_out. 4061 */ 4062 static ipsec_status_t 4063 esp_outbound_accelerated(mblk_t *ipsec_out, uint_t icv_len) 4064 { 4065 ipsec_out_t *io; 4066 mblk_t *lastmp; 4067 netstack_t *ns; 4068 ipsecesp_stack_t *espstack; 4069 ipsec_stack_t *ipss; 4070 4071 io = (ipsec_out_t *)ipsec_out->b_rptr; 4072 ns = io->ipsec_out_ns; 4073 espstack = ns->netstack_ipsecesp; 4074 ipss = ns->netstack_ipsec; 4075 4076 ESP_BUMP_STAT(espstack, out_accelerated); 4077 4078 /* mark packet as being accelerated in IPSEC_OUT */ 4079 ASSERT(io->ipsec_out_accelerated == B_FALSE); 4080 io->ipsec_out_accelerated = B_TRUE; 4081 4082 /* 4083 * add room at the end of the packet for the ICV if needed 4084 */ 4085 if (icv_len > 0) { 4086 /* go to last mblk */ 4087 lastmp = ipsec_out; /* For following while loop. */ 4088 do { 4089 lastmp = lastmp->b_cont; 4090 } while (lastmp->b_cont != NULL); 4091 4092 /* if not enough available room, allocate new mblk */ 4093 if ((lastmp->b_wptr + icv_len) > lastmp->b_datap->db_lim) { 4094 lastmp->b_cont = allocb(icv_len, BPRI_HI); 4095 if (lastmp->b_cont == NULL) { 4096 ESP_BUMP_STAT(espstack, out_discards); 4097 ip_drop_packet(ipsec_out, B_FALSE, NULL, NULL, 4098 DROPPER(ipss, ipds_esp_nomem), 4099 &espstack->esp_dropper); 4100 return (IPSEC_STATUS_FAILED); 4101 } 4102 lastmp = lastmp->b_cont; 4103 } 4104 lastmp->b_wptr += icv_len; 4105 } 4106 4107 return (IPSEC_STATUS_SUCCESS); 4108 } 4109 4110 /* 4111 * Process an inbound accelerated ESP packet. 4112 * On success returns B_TRUE, on failure returns B_FALSE and frees the 4113 * mblk chain ipsec_in. 4114 */ 4115 static ipsec_status_t 4116 esp_inbound_accelerated(mblk_t *ipsec_in, mblk_t *data_mp, boolean_t isv4, 4117 ipsa_t *assoc) 4118 { 4119 ipsec_in_t *ii = (ipsec_in_t *)ipsec_in->b_rptr; 4120 mblk_t *hada_mp; 4121 uint32_t icv_len = 0; 4122 da_ipsec_t *hada; 4123 ipha_t *ipha; 4124 ip6_t *ip6h; 4125 kstat_named_t *counter; 4126 netstack_t *ns = ii->ipsec_in_ns; 4127 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 4128 ipsec_stack_t *ipss = ns->netstack_ipsec; 4129 4130 ESP_BUMP_STAT(espstack, in_accelerated); 4131 4132 hada_mp = ii->ipsec_in_da; 4133 ASSERT(hada_mp != NULL); 4134 hada = (da_ipsec_t *)hada_mp->b_rptr; 4135 4136 /* 4137 * We only support one level of decapsulation in hardware, so 4138 * nuke the pointer. 4139 */ 4140 ii->ipsec_in_da = NULL; 4141 ii->ipsec_in_accelerated = B_FALSE; 4142 4143 if (assoc->ipsa_auth_alg != IPSA_AALG_NONE) { 4144 /* 4145 * ESP with authentication. We expect the Provider to have 4146 * computed the ICV and placed it in the hardware acceleration 4147 * data attributes. 4148 * 4149 * Extract ICV length from attributes M_CTL and sanity check 4150 * its value. We allow the mblk to be smaller than da_ipsec_t 4151 * for a small ICV, as long as the entire ICV fits within the 4152 * mblk. 4153 * 4154 * Also ensures that the ICV length computed by Provider 4155 * corresponds to the ICV length of the agorithm specified by 4156 * the SA. 4157 */ 4158 icv_len = hada->da_icv_len; 4159 if ((icv_len != assoc->ipsa_mac_len) || 4160 (icv_len > DA_ICV_MAX_LEN) || (MBLKL(hada_mp) < 4161 (sizeof (da_ipsec_t) - DA_ICV_MAX_LEN + icv_len))) { 4162 esp0dbg(("esp_inbound_accelerated: " 4163 "ICV len (%u) incorrect or mblk too small (%u)\n", 4164 icv_len, (uint32_t)(MBLKL(hada_mp)))); 4165 counter = DROPPER(ipss, ipds_esp_bad_auth); 4166 goto esp_in_discard; 4167 } 4168 } 4169 4170 /* get pointers to IP header */ 4171 if (isv4) { 4172 ipha = (ipha_t *)data_mp->b_rptr; 4173 } else { 4174 ip6h = (ip6_t *)data_mp->b_rptr; 4175 } 4176 4177 /* 4178 * Compare ICV in ESP packet vs ICV computed by adapter. 4179 * We also remove the ICV from the end of the packet since 4180 * it will no longer be needed. 4181 * 4182 * Assume that esp_inbound() already ensured that the pkt 4183 * was in one mblk. 4184 */ 4185 ASSERT(data_mp->b_cont == NULL); 4186 data_mp->b_wptr -= icv_len; 4187 /* adjust IP header */ 4188 if (isv4) 4189 ipha->ipha_length = htons(ntohs(ipha->ipha_length) - icv_len); 4190 else 4191 ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - icv_len); 4192 if (icv_len && bcmp(hada->da_icv, data_mp->b_wptr, icv_len)) { 4193 int af; 4194 void *addr; 4195 4196 if (isv4) { 4197 addr = &ipha->ipha_dst; 4198 af = AF_INET; 4199 } else { 4200 addr = &ip6h->ip6_dst; 4201 af = AF_INET6; 4202 } 4203 4204 /* 4205 * Log the event. Don't print to the console, block 4206 * potential denial-of-service attack. 4207 */ 4208 ESP_BUMP_STAT(espstack, bad_auth); 4209 ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN, 4210 "ESP Authentication failed spi %x, dst_addr %s", 4211 assoc->ipsa_spi, addr, af, espstack->ipsecesp_netstack); 4212 counter = DROPPER(ipss, ipds_esp_bad_auth); 4213 goto esp_in_discard; 4214 } 4215 4216 esp3dbg(espstack, ("esp_inbound_accelerated: ESP authentication " 4217 "succeeded, checking replay\n")); 4218 4219 ipsec_in->b_cont = data_mp; 4220 4221 /* 4222 * Remove ESP header and padding from packet. 4223 */ 4224 if (!esp_strip_header(data_mp, ii->ipsec_in_v4, assoc->ipsa_iv_len, 4225 &counter, espstack)) { 4226 esp1dbg(espstack, ("esp_inbound_accelerated: " 4227 "esp_strip_header() failed\n")); 4228 goto esp_in_discard; 4229 } 4230 4231 freeb(hada_mp); 4232 4233 /* 4234 * Account for usage.. 4235 */ 4236 if (!esp_age_bytes(assoc, msgdsize(data_mp), B_TRUE)) { 4237 /* The ipsa has hit hard expiration, LOG and AUDIT. */ 4238 ESP_BUMP_STAT(espstack, bytes_expired); 4239 IP_ESP_BUMP_STAT(ipss, in_discards); 4240 ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN, 4241 "ESP association 0x%x, dst %s had bytes expire.\n", 4242 assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam, 4243 espstack->ipsecesp_netstack); 4244 ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL, 4245 DROPPER(ipss, ipds_esp_bytes_expire), 4246 &espstack->esp_dropper); 4247 return (IPSEC_STATUS_FAILED); 4248 } 4249 4250 /* done processing the packet */ 4251 return (IPSEC_STATUS_SUCCESS); 4252 4253 esp_in_discard: 4254 IP_ESP_BUMP_STAT(ipss, in_discards); 4255 freeb(hada_mp); 4256 4257 ipsec_in->b_cont = data_mp; /* For ip_drop_packet()'s sake... */ 4258 ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL, counter, 4259 &espstack->esp_dropper); 4260 4261 return (IPSEC_STATUS_FAILED); 4262 } 4263 4264 /* 4265 * Wrapper to allow IP to trigger an ESP association failure message 4266 * during inbound SA selection. 4267 */ 4268 void 4269 ipsecesp_in_assocfailure(mblk_t *mp, char level, ushort_t sl, char *fmt, 4270 uint32_t spi, void *addr, int af, ipsecesp_stack_t *espstack) 4271 { 4272 ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec; 4273 4274 if (espstack->ipsecesp_log_unknown_spi) { 4275 ipsec_assocfailure(info.mi_idnum, 0, level, sl, fmt, spi, 4276 addr, af, espstack->ipsecesp_netstack); 4277 } 4278 4279 ip_drop_packet(mp, B_TRUE, NULL, NULL, 4280 DROPPER(ipss, ipds_esp_no_sa), 4281 &espstack->esp_dropper); 4282 } 4283 4284 /* 4285 * Initialize the ESP input and output processing functions. 4286 */ 4287 void 4288 ipsecesp_init_funcs(ipsa_t *sa) 4289 { 4290 if (sa->ipsa_output_func == NULL) 4291 sa->ipsa_output_func = esp_outbound; 4292 if (sa->ipsa_input_func == NULL) 4293 sa->ipsa_input_func = esp_inbound; 4294 } 4295