1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/stream.h> 28 #include <sys/stropts.h> 29 #include <sys/errno.h> 30 #include <sys/strlog.h> 31 #include <sys/tihdr.h> 32 #include <sys/socket.h> 33 #include <sys/ddi.h> 34 #include <sys/sunddi.h> 35 #include <sys/kmem.h> 36 #include <sys/zone.h> 37 #include <sys/sysmacros.h> 38 #include <sys/cmn_err.h> 39 #include <sys/vtrace.h> 40 #include <sys/debug.h> 41 #include <sys/atomic.h> 42 #include <sys/strsun.h> 43 #include <sys/random.h> 44 #include <netinet/in.h> 45 #include <net/if.h> 46 #include <netinet/ip6.h> 47 #include <net/pfkeyv2.h> 48 #include <net/pfpolicy.h> 49 50 #include <inet/common.h> 51 #include <inet/mi.h> 52 #include <inet/nd.h> 53 #include <inet/ip.h> 54 #include <inet/ip_impl.h> 55 #include <inet/ip6.h> 56 #include <inet/sadb.h> 57 #include <inet/ipsec_info.h> 58 #include <inet/ipsec_impl.h> 59 #include <inet/ipsecesp.h> 60 #include <inet/ipdrop.h> 61 #include <inet/tcp.h> 62 #include <sys/kstat.h> 63 #include <sys/policy.h> 64 #include <sys/strsun.h> 65 #include <sys/strsubr.h> 66 #include <inet/udp_impl.h> 67 #include <sys/taskq.h> 68 #include <sys/note.h> 69 70 #include <sys/iphada.h> 71 72 #include <sys/tsol/tnet.h> 73 74 /* 75 * Table of ND variables supported by ipsecesp. These are loaded into 76 * ipsecesp_g_nd in ipsecesp_init_nd. 77 * All of these are alterable, within the min/max values given, at run time. 78 */ 79 static ipsecespparam_t lcl_param_arr[] = { 80 /* min max value name */ 81 { 0, 3, 0, "ipsecesp_debug"}, 82 { 125, 32000, SADB_AGE_INTERVAL_DEFAULT, "ipsecesp_age_interval"}, 83 { 1, 10, 1, "ipsecesp_reap_delay"}, 84 { 1, SADB_MAX_REPLAY, 64, "ipsecesp_replay_size"}, 85 { 1, 300, 15, "ipsecesp_acquire_timeout"}, 86 { 1, 1800, 90, "ipsecesp_larval_timeout"}, 87 /* Default lifetime values for ACQUIRE messages. */ 88 { 0, 0xffffffffU, 0, "ipsecesp_default_soft_bytes"}, 89 { 0, 0xffffffffU, 0, "ipsecesp_default_hard_bytes"}, 90 { 0, 0xffffffffU, 24000, "ipsecesp_default_soft_addtime"}, 91 { 0, 0xffffffffU, 28800, "ipsecesp_default_hard_addtime"}, 92 { 0, 0xffffffffU, 0, "ipsecesp_default_soft_usetime"}, 93 { 0, 0xffffffffU, 0, "ipsecesp_default_hard_usetime"}, 94 { 0, 1, 0, "ipsecesp_log_unknown_spi"}, 95 { 0, 2, 1, "ipsecesp_padding_check"}, 96 { 0, 600, 20, "ipsecesp_nat_keepalive_interval"}, 97 }; 98 #define ipsecesp_debug ipsecesp_params[0].ipsecesp_param_value 99 #define ipsecesp_age_interval ipsecesp_params[1].ipsecesp_param_value 100 #define ipsecesp_age_int_max ipsecesp_params[1].ipsecesp_param_max 101 #define ipsecesp_reap_delay ipsecesp_params[2].ipsecesp_param_value 102 #define ipsecesp_replay_size ipsecesp_params[3].ipsecesp_param_value 103 #define ipsecesp_acquire_timeout \ 104 ipsecesp_params[4].ipsecesp_param_value 105 #define ipsecesp_larval_timeout \ 106 ipsecesp_params[5].ipsecesp_param_value 107 #define ipsecesp_default_soft_bytes \ 108 ipsecesp_params[6].ipsecesp_param_value 109 #define ipsecesp_default_hard_bytes \ 110 ipsecesp_params[7].ipsecesp_param_value 111 #define ipsecesp_default_soft_addtime \ 112 ipsecesp_params[8].ipsecesp_param_value 113 #define ipsecesp_default_hard_addtime \ 114 ipsecesp_params[9].ipsecesp_param_value 115 #define ipsecesp_default_soft_usetime \ 116 ipsecesp_params[10].ipsecesp_param_value 117 #define ipsecesp_default_hard_usetime \ 118 ipsecesp_params[11].ipsecesp_param_value 119 #define ipsecesp_log_unknown_spi \ 120 ipsecesp_params[12].ipsecesp_param_value 121 #define ipsecesp_padding_check \ 122 ipsecesp_params[13].ipsecesp_param_value 123 /* For ipsecesp_nat_keepalive_interval, see ipsecesp.h. */ 124 125 #define esp0dbg(a) printf a 126 /* NOTE: != 0 instead of > 0 so lint doesn't complain. */ 127 #define esp1dbg(espstack, a) if (espstack->ipsecesp_debug != 0) printf a 128 #define esp2dbg(espstack, a) if (espstack->ipsecesp_debug > 1) printf a 129 #define esp3dbg(espstack, a) if (espstack->ipsecesp_debug > 2) printf a 130 131 static int ipsecesp_open(queue_t *, dev_t *, int, int, cred_t *); 132 static int ipsecesp_close(queue_t *); 133 static void ipsecesp_rput(queue_t *, mblk_t *); 134 static void ipsecesp_wput(queue_t *, mblk_t *); 135 static void *ipsecesp_stack_init(netstackid_t stackid, netstack_t *ns); 136 static void ipsecesp_stack_fini(netstackid_t stackid, void *arg); 137 static void esp_send_acquire(ipsacq_t *, mblk_t *, netstack_t *); 138 139 static void esp_prepare_udp(netstack_t *, mblk_t *, ipha_t *); 140 static ipsec_status_t esp_outbound_accelerated(mblk_t *, uint_t); 141 static ipsec_status_t esp_inbound_accelerated(mblk_t *, mblk_t *, 142 boolean_t, ipsa_t *); 143 144 static boolean_t esp_register_out(uint32_t, uint32_t, uint_t, 145 ipsecesp_stack_t *, mblk_t *); 146 static boolean_t esp_strip_header(mblk_t *, boolean_t, uint32_t, 147 kstat_named_t **, ipsecesp_stack_t *); 148 static ipsec_status_t esp_submit_req_inbound(mblk_t *, ipsa_t *, uint_t); 149 static ipsec_status_t esp_submit_req_outbound(mblk_t *, ipsa_t *, uchar_t *, 150 uint_t); 151 extern void (*cl_inet_getspi)(netstackid_t, uint8_t, uint8_t *, size_t, 152 void *); 153 154 /* Setable in /etc/system */ 155 uint32_t esp_hash_size = IPSEC_DEFAULT_HASH_SIZE; 156 157 static struct module_info info = { 158 5137, "ipsecesp", 0, INFPSZ, 65536, 1024 159 }; 160 161 static struct qinit rinit = { 162 (pfi_t)ipsecesp_rput, NULL, ipsecesp_open, ipsecesp_close, NULL, &info, 163 NULL 164 }; 165 166 static struct qinit winit = { 167 (pfi_t)ipsecesp_wput, NULL, ipsecesp_open, ipsecesp_close, NULL, &info, 168 NULL 169 }; 170 171 struct streamtab ipsecespinfo = { 172 &rinit, &winit, NULL, NULL 173 }; 174 175 static taskq_t *esp_taskq; 176 177 /* 178 * OTOH, this one is set at open/close, and I'm D_MTQPAIR for now. 179 * 180 * Question: Do I need this, given that all instance's esps->esps_wq point 181 * to IP? 182 * 183 * Answer: Yes, because I need to know which queue is BOUND to 184 * IPPROTO_ESP 185 */ 186 187 /* 188 * Stats. This may eventually become a full-blown SNMP MIB once that spec 189 * stabilizes. 190 */ 191 192 typedef struct esp_kstats_s { 193 kstat_named_t esp_stat_num_aalgs; 194 kstat_named_t esp_stat_good_auth; 195 kstat_named_t esp_stat_bad_auth; 196 kstat_named_t esp_stat_bad_padding; 197 kstat_named_t esp_stat_replay_failures; 198 kstat_named_t esp_stat_replay_early_failures; 199 kstat_named_t esp_stat_keysock_in; 200 kstat_named_t esp_stat_out_requests; 201 kstat_named_t esp_stat_acquire_requests; 202 kstat_named_t esp_stat_bytes_expired; 203 kstat_named_t esp_stat_out_discards; 204 kstat_named_t esp_stat_in_accelerated; 205 kstat_named_t esp_stat_out_accelerated; 206 kstat_named_t esp_stat_noaccel; 207 kstat_named_t esp_stat_crypto_sync; 208 kstat_named_t esp_stat_crypto_async; 209 kstat_named_t esp_stat_crypto_failures; 210 kstat_named_t esp_stat_num_ealgs; 211 kstat_named_t esp_stat_bad_decrypt; 212 kstat_named_t esp_stat_sa_port_renumbers; 213 } esp_kstats_t; 214 215 /* 216 * espstack->esp_kstats is equal to espstack->esp_ksp->ks_data if 217 * kstat_create_netstack for espstack->esp_ksp succeeds, but when it 218 * fails, it will be NULL. Note this is done for all stack instances, 219 * so it *could* fail. hence a non-NULL checking is done for 220 * ESP_BUMP_STAT and ESP_DEBUMP_STAT 221 */ 222 #define ESP_BUMP_STAT(espstack, x) \ 223 do { \ 224 if (espstack->esp_kstats != NULL) \ 225 (espstack->esp_kstats->esp_stat_ ## x).value.ui64++; \ 226 _NOTE(CONSTCOND) \ 227 } while (0) 228 229 #define ESP_DEBUMP_STAT(espstack, x) \ 230 do { \ 231 if (espstack->esp_kstats != NULL) \ 232 (espstack->esp_kstats->esp_stat_ ## x).value.ui64--; \ 233 _NOTE(CONSTCOND) \ 234 } while (0) 235 236 static int esp_kstat_update(kstat_t *, int); 237 238 static boolean_t 239 esp_kstat_init(ipsecesp_stack_t *espstack, netstackid_t stackid) 240 { 241 espstack->esp_ksp = kstat_create_netstack("ipsecesp", 0, "esp_stat", 242 "net", KSTAT_TYPE_NAMED, 243 sizeof (esp_kstats_t) / sizeof (kstat_named_t), 244 KSTAT_FLAG_PERSISTENT, stackid); 245 246 if (espstack->esp_ksp == NULL || espstack->esp_ksp->ks_data == NULL) 247 return (B_FALSE); 248 249 espstack->esp_kstats = espstack->esp_ksp->ks_data; 250 251 espstack->esp_ksp->ks_update = esp_kstat_update; 252 espstack->esp_ksp->ks_private = (void *)(uintptr_t)stackid; 253 254 #define K64 KSTAT_DATA_UINT64 255 #define KI(x) kstat_named_init(&(espstack->esp_kstats->esp_stat_##x), #x, K64) 256 257 KI(num_aalgs); 258 KI(num_ealgs); 259 KI(good_auth); 260 KI(bad_auth); 261 KI(bad_padding); 262 KI(replay_failures); 263 KI(replay_early_failures); 264 KI(keysock_in); 265 KI(out_requests); 266 KI(acquire_requests); 267 KI(bytes_expired); 268 KI(out_discards); 269 KI(in_accelerated); 270 KI(out_accelerated); 271 KI(noaccel); 272 KI(crypto_sync); 273 KI(crypto_async); 274 KI(crypto_failures); 275 KI(bad_decrypt); 276 KI(sa_port_renumbers); 277 278 #undef KI 279 #undef K64 280 281 kstat_install(espstack->esp_ksp); 282 283 return (B_TRUE); 284 } 285 286 static int 287 esp_kstat_update(kstat_t *kp, int rw) 288 { 289 esp_kstats_t *ekp; 290 netstackid_t stackid = (zoneid_t)(uintptr_t)kp->ks_private; 291 netstack_t *ns; 292 ipsec_stack_t *ipss; 293 294 if ((kp == NULL) || (kp->ks_data == NULL)) 295 return (EIO); 296 297 if (rw == KSTAT_WRITE) 298 return (EACCES); 299 300 ns = netstack_find_by_stackid(stackid); 301 if (ns == NULL) 302 return (-1); 303 ipss = ns->netstack_ipsec; 304 if (ipss == NULL) { 305 netstack_rele(ns); 306 return (-1); 307 } 308 ekp = (esp_kstats_t *)kp->ks_data; 309 310 mutex_enter(&ipss->ipsec_alg_lock); 311 ekp->esp_stat_num_aalgs.value.ui64 = 312 ipss->ipsec_nalgs[IPSEC_ALG_AUTH]; 313 ekp->esp_stat_num_ealgs.value.ui64 = 314 ipss->ipsec_nalgs[IPSEC_ALG_ENCR]; 315 mutex_exit(&ipss->ipsec_alg_lock); 316 317 netstack_rele(ns); 318 return (0); 319 } 320 321 #ifdef DEBUG 322 /* 323 * Debug routine, useful to see pre-encryption data. 324 */ 325 static char * 326 dump_msg(mblk_t *mp) 327 { 328 char tmp_str[3], tmp_line[256]; 329 330 while (mp != NULL) { 331 unsigned char *ptr; 332 333 printf("mblk address 0x%p, length %ld, db_ref %d " 334 "type %d, base 0x%p, lim 0x%p\n", 335 (void *) mp, (long)(mp->b_wptr - mp->b_rptr), 336 mp->b_datap->db_ref, mp->b_datap->db_type, 337 (void *)mp->b_datap->db_base, (void *)mp->b_datap->db_lim); 338 ptr = mp->b_rptr; 339 340 tmp_line[0] = '\0'; 341 while (ptr < mp->b_wptr) { 342 uint_t diff; 343 344 diff = (ptr - mp->b_rptr); 345 if (!(diff & 0x1f)) { 346 if (strlen(tmp_line) > 0) { 347 printf("bytes: %s\n", tmp_line); 348 tmp_line[0] = '\0'; 349 } 350 } 351 if (!(diff & 0x3)) 352 (void) strcat(tmp_line, " "); 353 (void) sprintf(tmp_str, "%02x", *ptr); 354 (void) strcat(tmp_line, tmp_str); 355 ptr++; 356 } 357 if (strlen(tmp_line) > 0) 358 printf("bytes: %s\n", tmp_line); 359 360 mp = mp->b_cont; 361 } 362 363 return ("\n"); 364 } 365 366 #else /* DEBUG */ 367 static char * 368 dump_msg(mblk_t *mp) 369 { 370 printf("Find value of mp %p.\n", mp); 371 return ("\n"); 372 } 373 #endif /* DEBUG */ 374 375 /* 376 * Don't have to lock age_interval, as only one thread will access it at 377 * a time, because I control the one function that does with timeout(). 378 */ 379 static void 380 esp_ager(void *arg) 381 { 382 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)arg; 383 netstack_t *ns = espstack->ipsecesp_netstack; 384 hrtime_t begin = gethrtime(); 385 386 sadb_ager(&espstack->esp_sadb.s_v4, espstack->esp_pfkey_q, 387 espstack->esp_sadb.s_ip_q, espstack->ipsecesp_reap_delay, ns); 388 sadb_ager(&espstack->esp_sadb.s_v6, espstack->esp_pfkey_q, 389 espstack->esp_sadb.s_ip_q, espstack->ipsecesp_reap_delay, ns); 390 391 espstack->esp_event = sadb_retimeout(begin, espstack->esp_pfkey_q, 392 esp_ager, espstack, 393 &espstack->ipsecesp_age_interval, espstack->ipsecesp_age_int_max, 394 info.mi_idnum); 395 } 396 397 /* 398 * Get an ESP NDD parameter. 399 */ 400 /* ARGSUSED */ 401 static int 402 ipsecesp_param_get(q, mp, cp, cr) 403 queue_t *q; 404 mblk_t *mp; 405 caddr_t cp; 406 cred_t *cr; 407 { 408 ipsecespparam_t *ipsecesppa = (ipsecespparam_t *)cp; 409 uint_t value; 410 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; 411 412 mutex_enter(&espstack->ipsecesp_param_lock); 413 value = ipsecesppa->ipsecesp_param_value; 414 mutex_exit(&espstack->ipsecesp_param_lock); 415 416 (void) mi_mpprintf(mp, "%u", value); 417 return (0); 418 } 419 420 /* 421 * This routine sets an NDD variable in a ipsecespparam_t structure. 422 */ 423 /* ARGSUSED */ 424 static int 425 ipsecesp_param_set(q, mp, value, cp, cr) 426 queue_t *q; 427 mblk_t *mp; 428 char *value; 429 caddr_t cp; 430 cred_t *cr; 431 { 432 ulong_t new_value; 433 ipsecespparam_t *ipsecesppa = (ipsecespparam_t *)cp; 434 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; 435 436 /* 437 * Fail the request if the new value does not lie within the 438 * required bounds. 439 */ 440 if (ddi_strtoul(value, NULL, 10, &new_value) != 0 || 441 new_value < ipsecesppa->ipsecesp_param_min || 442 new_value > ipsecesppa->ipsecesp_param_max) { 443 return (EINVAL); 444 } 445 446 /* Set the new value */ 447 mutex_enter(&espstack->ipsecesp_param_lock); 448 ipsecesppa->ipsecesp_param_value = new_value; 449 mutex_exit(&espstack->ipsecesp_param_lock); 450 return (0); 451 } 452 453 /* 454 * Using lifetime NDD variables, fill in an extended combination's 455 * lifetime information. 456 */ 457 void 458 ipsecesp_fill_defs(sadb_x_ecomb_t *ecomb, netstack_t *ns) 459 { 460 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 461 462 ecomb->sadb_x_ecomb_soft_bytes = espstack->ipsecesp_default_soft_bytes; 463 ecomb->sadb_x_ecomb_hard_bytes = espstack->ipsecesp_default_hard_bytes; 464 ecomb->sadb_x_ecomb_soft_addtime = 465 espstack->ipsecesp_default_soft_addtime; 466 ecomb->sadb_x_ecomb_hard_addtime = 467 espstack->ipsecesp_default_hard_addtime; 468 ecomb->sadb_x_ecomb_soft_usetime = 469 espstack->ipsecesp_default_soft_usetime; 470 ecomb->sadb_x_ecomb_hard_usetime = 471 espstack->ipsecesp_default_hard_usetime; 472 } 473 474 /* 475 * Initialize things for ESP at module load time. 476 */ 477 boolean_t 478 ipsecesp_ddi_init(void) 479 { 480 esp_taskq = taskq_create("esp_taskq", 1, minclsyspri, 481 IPSEC_TASKQ_MIN, IPSEC_TASKQ_MAX, 0); 482 483 /* 484 * We want to be informed each time a stack is created or 485 * destroyed in the kernel, so we can maintain the 486 * set of ipsecesp_stack_t's. 487 */ 488 netstack_register(NS_IPSECESP, ipsecesp_stack_init, NULL, 489 ipsecesp_stack_fini); 490 491 return (B_TRUE); 492 } 493 494 /* 495 * Walk through the param array specified registering each element with the 496 * named dispatch handler. 497 */ 498 static boolean_t 499 ipsecesp_param_register(IDP *ndp, ipsecespparam_t *espp, int cnt) 500 { 501 for (; cnt-- > 0; espp++) { 502 if (espp->ipsecesp_param_name != NULL && 503 espp->ipsecesp_param_name[0]) { 504 if (!nd_load(ndp, 505 espp->ipsecesp_param_name, 506 ipsecesp_param_get, ipsecesp_param_set, 507 (caddr_t)espp)) { 508 nd_free(ndp); 509 return (B_FALSE); 510 } 511 } 512 } 513 return (B_TRUE); 514 } 515 /* 516 * Initialize things for ESP for each stack instance 517 */ 518 static void * 519 ipsecesp_stack_init(netstackid_t stackid, netstack_t *ns) 520 { 521 ipsecesp_stack_t *espstack; 522 ipsecespparam_t *espp; 523 524 espstack = (ipsecesp_stack_t *)kmem_zalloc(sizeof (*espstack), 525 KM_SLEEP); 526 espstack->ipsecesp_netstack = ns; 527 528 espp = (ipsecespparam_t *)kmem_alloc(sizeof (lcl_param_arr), KM_SLEEP); 529 espstack->ipsecesp_params = espp; 530 bcopy(lcl_param_arr, espp, sizeof (lcl_param_arr)); 531 532 (void) ipsecesp_param_register(&espstack->ipsecesp_g_nd, espp, 533 A_CNT(lcl_param_arr)); 534 535 (void) esp_kstat_init(espstack, stackid); 536 537 espstack->esp_sadb.s_acquire_timeout = 538 &espstack->ipsecesp_acquire_timeout; 539 espstack->esp_sadb.s_acqfn = esp_send_acquire; 540 sadbp_init("ESP", &espstack->esp_sadb, SADB_SATYPE_ESP, esp_hash_size, 541 espstack->ipsecesp_netstack); 542 543 mutex_init(&espstack->ipsecesp_param_lock, NULL, MUTEX_DEFAULT, 0); 544 545 ip_drop_register(&espstack->esp_dropper, "IPsec ESP"); 546 return (espstack); 547 } 548 549 /* 550 * Destroy things for ESP at module unload time. 551 */ 552 void 553 ipsecesp_ddi_destroy(void) 554 { 555 netstack_unregister(NS_IPSECESP); 556 taskq_destroy(esp_taskq); 557 } 558 559 /* 560 * Destroy things for ESP for one stack instance 561 */ 562 static void 563 ipsecesp_stack_fini(netstackid_t stackid, void *arg) 564 { 565 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)arg; 566 567 if (espstack->esp_pfkey_q != NULL) { 568 (void) quntimeout(espstack->esp_pfkey_q, espstack->esp_event); 569 } 570 espstack->esp_sadb.s_acqfn = NULL; 571 espstack->esp_sadb.s_acquire_timeout = NULL; 572 sadbp_destroy(&espstack->esp_sadb, espstack->ipsecesp_netstack); 573 ip_drop_unregister(&espstack->esp_dropper); 574 mutex_destroy(&espstack->ipsecesp_param_lock); 575 nd_free(&espstack->ipsecesp_g_nd); 576 577 kmem_free(espstack->ipsecesp_params, sizeof (lcl_param_arr)); 578 espstack->ipsecesp_params = NULL; 579 kstat_delete_netstack(espstack->esp_ksp, stackid); 580 espstack->esp_ksp = NULL; 581 espstack->esp_kstats = NULL; 582 kmem_free(espstack, sizeof (*espstack)); 583 } 584 585 /* 586 * ESP module open routine. 587 */ 588 /* ARGSUSED */ 589 static int 590 ipsecesp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 591 { 592 netstack_t *ns; 593 ipsecesp_stack_t *espstack; 594 595 if (secpolicy_ip_config(credp, B_FALSE) != 0) 596 return (EPERM); 597 598 if (q->q_ptr != NULL) 599 return (0); /* Re-open of an already open instance. */ 600 601 if (sflag != MODOPEN) 602 return (EINVAL); 603 604 ns = netstack_find_by_cred(credp); 605 ASSERT(ns != NULL); 606 espstack = ns->netstack_ipsecesp; 607 ASSERT(espstack != NULL); 608 609 /* 610 * ASSUMPTIONS (because I'm MT_OCEXCL): 611 * 612 * * I'm being pushed on top of IP for all my opens (incl. #1). 613 * * Only ipsecesp_open() can write into esp_sadb.s_ip_q. 614 * * Because of this, I can check lazily for esp_sadb.s_ip_q. 615 * 616 * If these assumptions are wrong, I'm in BIG trouble... 617 */ 618 619 q->q_ptr = espstack; 620 WR(q)->q_ptr = q->q_ptr; 621 622 if (espstack->esp_sadb.s_ip_q == NULL) { 623 struct T_unbind_req *tur; 624 625 espstack->esp_sadb.s_ip_q = WR(q); 626 /* Allocate an unbind... */ 627 espstack->esp_ip_unbind = allocb(sizeof (struct T_unbind_req), 628 BPRI_HI); 629 630 /* 631 * Send down T_BIND_REQ to bind IPPROTO_ESP. 632 * Handle the ACK here in ESP. 633 */ 634 qprocson(q); 635 if (espstack->esp_ip_unbind == NULL || 636 !sadb_t_bind_req(espstack->esp_sadb.s_ip_q, IPPROTO_ESP)) { 637 if (espstack->esp_ip_unbind != NULL) { 638 freeb(espstack->esp_ip_unbind); 639 espstack->esp_ip_unbind = NULL; 640 } 641 q->q_ptr = NULL; 642 netstack_rele(espstack->ipsecesp_netstack); 643 return (ENOMEM); 644 } 645 646 espstack->esp_ip_unbind->b_datap->db_type = M_PROTO; 647 tur = (struct T_unbind_req *)espstack->esp_ip_unbind->b_rptr; 648 tur->PRIM_type = T_UNBIND_REQ; 649 } else { 650 qprocson(q); 651 } 652 653 /* 654 * For now, there's not much I can do. I'll be getting a message 655 * passed down to me from keysock (in my wput), and a T_BIND_ACK 656 * up from IP (in my rput). 657 */ 658 659 return (0); 660 } 661 662 /* 663 * ESP module close routine. 664 */ 665 static int 666 ipsecesp_close(queue_t *q) 667 { 668 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; 669 670 /* 671 * If esp_sadb.s_ip_q is attached to this instance, send a 672 * T_UNBIND_REQ to IP for the instance before doing 673 * a qprocsoff(). 674 */ 675 if (WR(q) == espstack->esp_sadb.s_ip_q && 676 espstack->esp_ip_unbind != NULL) { 677 putnext(WR(q), espstack->esp_ip_unbind); 678 espstack->esp_ip_unbind = NULL; 679 } 680 681 /* 682 * Clean up q_ptr, if needed. 683 */ 684 qprocsoff(q); 685 686 /* Keysock queue check is safe, because of OCEXCL perimeter. */ 687 688 if (q == espstack->esp_pfkey_q) { 689 esp1dbg(espstack, 690 ("ipsecesp_close: Ummm... keysock is closing ESP.\n")); 691 espstack->esp_pfkey_q = NULL; 692 /* Detach qtimeouts. */ 693 (void) quntimeout(q, espstack->esp_event); 694 } 695 696 if (WR(q) == espstack->esp_sadb.s_ip_q) { 697 /* 698 * If the esp_sadb.s_ip_q is attached to this instance, find 699 * another. The OCEXCL outer perimeter helps us here. 700 */ 701 espstack->esp_sadb.s_ip_q = NULL; 702 703 /* 704 * Find a replacement queue for esp_sadb.s_ip_q. 705 */ 706 if (espstack->esp_pfkey_q != NULL && 707 espstack->esp_pfkey_q != RD(q)) { 708 /* 709 * See if we can use the pfkey_q. 710 */ 711 espstack->esp_sadb.s_ip_q = WR(espstack->esp_pfkey_q); 712 } 713 714 if (espstack->esp_sadb.s_ip_q == NULL || 715 !sadb_t_bind_req(espstack->esp_sadb.s_ip_q, IPPROTO_ESP)) { 716 esp1dbg(espstack, ("ipsecesp: Can't reassign ip_q.\n")); 717 espstack->esp_sadb.s_ip_q = NULL; 718 } else { 719 espstack->esp_ip_unbind = 720 allocb(sizeof (struct T_unbind_req), BPRI_HI); 721 722 if (espstack->esp_ip_unbind != NULL) { 723 struct T_unbind_req *tur; 724 725 espstack->esp_ip_unbind->b_datap->db_type = 726 M_PROTO; 727 tur = (struct T_unbind_req *) 728 espstack->esp_ip_unbind->b_rptr; 729 tur->PRIM_type = T_UNBIND_REQ; 730 } 731 /* If it's NULL, I can't do much here. */ 732 } 733 } 734 735 netstack_rele(espstack->ipsecesp_netstack); 736 return (0); 737 } 738 739 /* 740 * Add a number of bytes to what the SA has protected so far. Return 741 * B_TRUE if the SA can still protect that many bytes. 742 * 743 * Caller must REFRELE the passed-in assoc. This function must REFRELE 744 * any obtained peer SA. 745 */ 746 static boolean_t 747 esp_age_bytes(ipsa_t *assoc, uint64_t bytes, boolean_t inbound) 748 { 749 ipsa_t *inassoc, *outassoc; 750 isaf_t *bucket; 751 boolean_t inrc, outrc, isv6; 752 sadb_t *sp; 753 int outhash; 754 netstack_t *ns = assoc->ipsa_netstack; 755 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 756 757 /* No peer? No problem! */ 758 if (!assoc->ipsa_haspeer) { 759 return (sadb_age_bytes(espstack->esp_pfkey_q, assoc, bytes, 760 B_TRUE)); 761 } 762 763 /* 764 * Otherwise, we want to grab both the original assoc and its peer. 765 * There might be a race for this, but if it's a real race, two 766 * expire messages may occur. We limit this by only sending the 767 * expire message on one of the peers, we'll pick the inbound 768 * arbitrarily. 769 * 770 * If we need tight synchronization on the peer SA, then we need to 771 * reconsider. 772 */ 773 774 /* Use address length to select IPv6/IPv4 */ 775 isv6 = (assoc->ipsa_addrfam == AF_INET6); 776 sp = isv6 ? &espstack->esp_sadb.s_v6 : &espstack->esp_sadb.s_v4; 777 778 if (inbound) { 779 inassoc = assoc; 780 if (isv6) { 781 outhash = OUTBOUND_HASH_V6(sp, *((in6_addr_t *) 782 &inassoc->ipsa_dstaddr)); 783 } else { 784 outhash = OUTBOUND_HASH_V4(sp, *((ipaddr_t *) 785 &inassoc->ipsa_dstaddr)); 786 } 787 bucket = &sp->sdb_of[outhash]; 788 mutex_enter(&bucket->isaf_lock); 789 outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi, 790 inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr, 791 inassoc->ipsa_addrfam); 792 mutex_exit(&bucket->isaf_lock); 793 if (outassoc == NULL) { 794 /* Q: Do we wish to set haspeer == B_FALSE? */ 795 esp0dbg(("esp_age_bytes: " 796 "can't find peer for inbound.\n")); 797 return (sadb_age_bytes(espstack->esp_pfkey_q, inassoc, 798 bytes, B_TRUE)); 799 } 800 } else { 801 outassoc = assoc; 802 bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi); 803 mutex_enter(&bucket->isaf_lock); 804 inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi, 805 outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr, 806 outassoc->ipsa_addrfam); 807 mutex_exit(&bucket->isaf_lock); 808 if (inassoc == NULL) { 809 /* Q: Do we wish to set haspeer == B_FALSE? */ 810 esp0dbg(("esp_age_bytes: " 811 "can't find peer for outbound.\n")); 812 return (sadb_age_bytes(espstack->esp_pfkey_q, outassoc, 813 bytes, B_TRUE)); 814 } 815 } 816 817 inrc = sadb_age_bytes(espstack->esp_pfkey_q, inassoc, bytes, B_TRUE); 818 outrc = sadb_age_bytes(espstack->esp_pfkey_q, outassoc, bytes, B_FALSE); 819 820 /* 821 * REFRELE any peer SA. 822 * 823 * Because of the multi-line macro nature of IPSA_REFRELE, keep 824 * them in { }. 825 */ 826 if (inbound) { 827 IPSA_REFRELE(outassoc); 828 } else { 829 IPSA_REFRELE(inassoc); 830 } 831 832 return (inrc && outrc); 833 } 834 835 /* 836 * Do incoming NAT-T manipulations for packet. 837 */ 838 static ipsec_status_t 839 esp_fix_natt_checksums(mblk_t *data_mp, ipsa_t *assoc) 840 { 841 ipha_t *ipha = (ipha_t *)data_mp->b_rptr; 842 tcpha_t *tcph; 843 udpha_t *udpha; 844 /* Initialize to our inbound cksum adjustment... */ 845 uint32_t sum = assoc->ipsa_inbound_cksum; 846 847 switch (ipha->ipha_protocol) { 848 case IPPROTO_TCP: 849 tcph = (tcpha_t *)(data_mp->b_rptr + 850 IPH_HDR_LENGTH(ipha)); 851 852 #define DOWN_SUM(x) (x) = ((x) & 0xFFFF) + ((x) >> 16) 853 sum += ~ntohs(tcph->tha_sum) & 0xFFFF; 854 DOWN_SUM(sum); 855 DOWN_SUM(sum); 856 tcph->tha_sum = ~htons(sum); 857 break; 858 case IPPROTO_UDP: 859 udpha = (udpha_t *)(data_mp->b_rptr + IPH_HDR_LENGTH(ipha)); 860 861 if (udpha->uha_checksum != 0) { 862 /* Adujst if the inbound one was not zero. */ 863 sum += ~ntohs(udpha->uha_checksum) & 0xFFFF; 864 DOWN_SUM(sum); 865 DOWN_SUM(sum); 866 udpha->uha_checksum = ~htons(sum); 867 if (udpha->uha_checksum == 0) 868 udpha->uha_checksum = 0xFFFF; 869 } 870 #undef DOWN_SUM 871 break; 872 case IPPROTO_IP: 873 /* 874 * This case is only an issue for self-encapsulated 875 * packets. So for now, fall through. 876 */ 877 break; 878 } 879 return (IPSEC_STATUS_SUCCESS); 880 } 881 882 883 /* 884 * Strip ESP header, check padding, and fix IP header. 885 * Returns B_TRUE on success, B_FALSE if an error occured. 886 */ 887 static boolean_t 888 esp_strip_header(mblk_t *data_mp, boolean_t isv4, uint32_t ivlen, 889 kstat_named_t **counter, ipsecesp_stack_t *espstack) 890 { 891 ipha_t *ipha; 892 ip6_t *ip6h; 893 uint_t divpoint; 894 mblk_t *scratch; 895 uint8_t nexthdr, padlen; 896 uint8_t lastpad; 897 ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec; 898 uint8_t *lastbyte; 899 900 /* 901 * Strip ESP data and fix IP header. 902 * 903 * XXX In case the beginning of esp_inbound() changes to not do a 904 * pullup, this part of the code can remain unchanged. 905 */ 906 if (isv4) { 907 ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ipha_t)); 908 ipha = (ipha_t *)data_mp->b_rptr; 909 ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (esph_t) + 910 IPH_HDR_LENGTH(ipha)); 911 divpoint = IPH_HDR_LENGTH(ipha); 912 } else { 913 ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ip6_t)); 914 ip6h = (ip6_t *)data_mp->b_rptr; 915 divpoint = ip_hdr_length_v6(data_mp, ip6h); 916 } 917 918 scratch = data_mp; 919 while (scratch->b_cont != NULL) 920 scratch = scratch->b_cont; 921 922 ASSERT((scratch->b_wptr - scratch->b_rptr) >= 3); 923 924 /* 925 * "Next header" and padding length are the last two bytes in the 926 * ESP-protected datagram, thus the explicit - 1 and - 2. 927 * lastpad is the last byte of the padding, which can be used for 928 * a quick check to see if the padding is correct. 929 */ 930 lastbyte = scratch->b_wptr - 1; 931 nexthdr = *lastbyte--; 932 padlen = *lastbyte--; 933 934 if (isv4) { 935 /* Fix part of the IP header. */ 936 ipha->ipha_protocol = nexthdr; 937 /* 938 * Reality check the padlen. The explicit - 2 is for the 939 * padding length and the next-header bytes. 940 */ 941 if (padlen >= ntohs(ipha->ipha_length) - sizeof (ipha_t) - 2 - 942 sizeof (esph_t) - ivlen) { 943 ESP_BUMP_STAT(espstack, bad_decrypt); 944 ipsec_rl_strlog(espstack->ipsecesp_netstack, 945 info.mi_idnum, 0, 0, 946 SL_ERROR | SL_WARN, 947 "Corrupt ESP packet (padlen too big).\n"); 948 esp1dbg(espstack, ("padlen (%d) is greater than:\n", 949 padlen)); 950 esp1dbg(espstack, ("pkt len(%d) - ip hdr - esp " 951 "hdr - ivlen(%d) = %d.\n", 952 ntohs(ipha->ipha_length), ivlen, 953 (int)(ntohs(ipha->ipha_length) - sizeof (ipha_t) - 954 2 - sizeof (esph_t) - ivlen))); 955 *counter = DROPPER(ipss, ipds_esp_bad_padlen); 956 return (B_FALSE); 957 } 958 959 /* 960 * Fix the rest of the header. The explicit - 2 is for the 961 * padding length and the next-header bytes. 962 */ 963 ipha->ipha_length = htons(ntohs(ipha->ipha_length) - padlen - 964 2 - sizeof (esph_t) - ivlen); 965 ipha->ipha_hdr_checksum = 0; 966 ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha); 967 } else { 968 if (ip6h->ip6_nxt == IPPROTO_ESP) { 969 ip6h->ip6_nxt = nexthdr; 970 } else { 971 ip6_pkt_t ipp; 972 973 bzero(&ipp, sizeof (ipp)); 974 (void) ip_find_hdr_v6(data_mp, ip6h, &ipp, NULL); 975 if (ipp.ipp_dstopts != NULL) { 976 ipp.ipp_dstopts->ip6d_nxt = nexthdr; 977 } else if (ipp.ipp_rthdr != NULL) { 978 ipp.ipp_rthdr->ip6r_nxt = nexthdr; 979 } else if (ipp.ipp_hopopts != NULL) { 980 ipp.ipp_hopopts->ip6h_nxt = nexthdr; 981 } else { 982 /* Panic a DEBUG kernel. */ 983 ASSERT(ipp.ipp_hopopts != NULL); 984 /* Otherwise, pretend it's IP + ESP. */ 985 cmn_err(CE_WARN, "ESP IPv6 headers wrong.\n"); 986 ip6h->ip6_nxt = nexthdr; 987 } 988 } 989 990 if (padlen >= ntohs(ip6h->ip6_plen) - 2 - sizeof (esph_t) - 991 ivlen) { 992 ESP_BUMP_STAT(espstack, bad_decrypt); 993 ipsec_rl_strlog(espstack->ipsecesp_netstack, 994 info.mi_idnum, 0, 0, 995 SL_ERROR | SL_WARN, 996 "Corrupt ESP packet (v6 padlen too big).\n"); 997 esp1dbg(espstack, ("padlen (%d) is greater than:\n", 998 padlen)); 999 esp1dbg(espstack, 1000 ("pkt len(%u) - ip hdr - esp hdr - ivlen(%d) = " 1001 "%u.\n", (unsigned)(ntohs(ip6h->ip6_plen) 1002 + sizeof (ip6_t)), ivlen, 1003 (unsigned)(ntohs(ip6h->ip6_plen) - 2 - 1004 sizeof (esph_t) - ivlen))); 1005 *counter = DROPPER(ipss, ipds_esp_bad_padlen); 1006 return (B_FALSE); 1007 } 1008 1009 1010 /* 1011 * Fix the rest of the header. The explicit - 2 is for the 1012 * padding length and the next-header bytes. IPv6 is nice, 1013 * because there's no hdr checksum! 1014 */ 1015 ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - padlen - 1016 2 - sizeof (esph_t) - ivlen); 1017 } 1018 1019 if (espstack->ipsecesp_padding_check > 0 && padlen > 0) { 1020 /* 1021 * Weak padding check: compare last-byte to length, they 1022 * should be equal. 1023 */ 1024 lastpad = *lastbyte--; 1025 1026 if (padlen != lastpad) { 1027 ipsec_rl_strlog(espstack->ipsecesp_netstack, 1028 info.mi_idnum, 0, 0, SL_ERROR | SL_WARN, 1029 "Corrupt ESP packet (lastpad != padlen).\n"); 1030 esp1dbg(espstack, 1031 ("lastpad (%d) not equal to padlen (%d):\n", 1032 lastpad, padlen)); 1033 ESP_BUMP_STAT(espstack, bad_padding); 1034 *counter = DROPPER(ipss, ipds_esp_bad_padding); 1035 return (B_FALSE); 1036 } 1037 1038 /* 1039 * Strong padding check: Check all pad bytes to see that 1040 * they're ascending. Go backwards using a descending counter 1041 * to verify. padlen == 1 is checked by previous block, so 1042 * only bother if we've more than 1 byte of padding. 1043 * Consequently, start the check one byte before the location 1044 * of "lastpad". 1045 */ 1046 if (espstack->ipsecesp_padding_check > 1) { 1047 /* 1048 * This assert may have to become an if and a pullup 1049 * if we start accepting multi-dblk mblks. For now, 1050 * though, any packet here will have been pulled up in 1051 * esp_inbound. 1052 */ 1053 ASSERT(MBLKL(scratch) >= lastpad + 3); 1054 1055 /* 1056 * Use "--lastpad" because we already checked the very 1057 * last pad byte previously. 1058 */ 1059 while (--lastpad != 0) { 1060 if (lastpad != *lastbyte) { 1061 ipsec_rl_strlog( 1062 espstack->ipsecesp_netstack, 1063 info.mi_idnum, 0, 0, 1064 SL_ERROR | SL_WARN, "Corrupt ESP " 1065 "packet (bad padding).\n"); 1066 esp1dbg(espstack, 1067 ("padding not in correct" 1068 " format:\n")); 1069 ESP_BUMP_STAT(espstack, bad_padding); 1070 *counter = DROPPER(ipss, 1071 ipds_esp_bad_padding); 1072 return (B_FALSE); 1073 } 1074 lastbyte--; 1075 } 1076 } 1077 } 1078 1079 /* Trim off the padding. */ 1080 ASSERT(data_mp->b_cont == NULL); 1081 data_mp->b_wptr -= (padlen + 2); 1082 1083 /* 1084 * Remove the ESP header. 1085 * 1086 * The above assertions about data_mp's size will make this work. 1087 * 1088 * XXX Question: If I send up and get back a contiguous mblk, 1089 * would it be quicker to bcopy over, or keep doing the dupb stuff? 1090 * I go with copying for now. 1091 */ 1092 1093 if (IS_P2ALIGNED(data_mp->b_rptr, sizeof (uint32_t)) && 1094 IS_P2ALIGNED(ivlen, sizeof (uint32_t))) { 1095 uint8_t *start = data_mp->b_rptr; 1096 uint32_t *src, *dst; 1097 1098 src = (uint32_t *)(start + divpoint); 1099 dst = (uint32_t *)(start + divpoint + sizeof (esph_t) + ivlen); 1100 1101 ASSERT(IS_P2ALIGNED(dst, sizeof (uint32_t)) && 1102 IS_P2ALIGNED(src, sizeof (uint32_t))); 1103 1104 do { 1105 src--; 1106 dst--; 1107 *dst = *src; 1108 } while (src != (uint32_t *)start); 1109 1110 data_mp->b_rptr = (uchar_t *)dst; 1111 } else { 1112 uint8_t *start = data_mp->b_rptr; 1113 uint8_t *src, *dst; 1114 1115 src = start + divpoint; 1116 dst = src + sizeof (esph_t) + ivlen; 1117 1118 do { 1119 src--; 1120 dst--; 1121 *dst = *src; 1122 } while (src != start); 1123 1124 data_mp->b_rptr = dst; 1125 } 1126 1127 esp2dbg(espstack, ("data_mp after inbound ESP adjustment:\n")); 1128 esp2dbg(espstack, (dump_msg(data_mp))); 1129 1130 return (B_TRUE); 1131 } 1132 1133 /* 1134 * Updating use times can be tricky business if the ipsa_haspeer flag is 1135 * set. This function is called once in an SA's lifetime. 1136 * 1137 * Caller has to REFRELE "assoc" which is passed in. This function has 1138 * to REFRELE any peer SA that is obtained. 1139 */ 1140 static void 1141 esp_set_usetime(ipsa_t *assoc, boolean_t inbound) 1142 { 1143 ipsa_t *inassoc, *outassoc; 1144 isaf_t *bucket; 1145 sadb_t *sp; 1146 int outhash; 1147 boolean_t isv6; 1148 netstack_t *ns = assoc->ipsa_netstack; 1149 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 1150 1151 /* No peer? No problem! */ 1152 if (!assoc->ipsa_haspeer) { 1153 sadb_set_usetime(assoc); 1154 return; 1155 } 1156 1157 /* 1158 * Otherwise, we want to grab both the original assoc and its peer. 1159 * There might be a race for this, but if it's a real race, the times 1160 * will be out-of-synch by at most a second, and since our time 1161 * granularity is a second, this won't be a problem. 1162 * 1163 * If we need tight synchronization on the peer SA, then we need to 1164 * reconsider. 1165 */ 1166 1167 /* Use address length to select IPv6/IPv4 */ 1168 isv6 = (assoc->ipsa_addrfam == AF_INET6); 1169 sp = isv6 ? &espstack->esp_sadb.s_v6 : &espstack->esp_sadb.s_v4; 1170 1171 if (inbound) { 1172 inassoc = assoc; 1173 if (isv6) { 1174 outhash = OUTBOUND_HASH_V6(sp, *((in6_addr_t *) 1175 &inassoc->ipsa_dstaddr)); 1176 } else { 1177 outhash = OUTBOUND_HASH_V4(sp, *((ipaddr_t *) 1178 &inassoc->ipsa_dstaddr)); 1179 } 1180 bucket = &sp->sdb_of[outhash]; 1181 mutex_enter(&bucket->isaf_lock); 1182 outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi, 1183 inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr, 1184 inassoc->ipsa_addrfam); 1185 mutex_exit(&bucket->isaf_lock); 1186 if (outassoc == NULL) { 1187 /* Q: Do we wish to set haspeer == B_FALSE? */ 1188 esp0dbg(("esp_set_usetime: " 1189 "can't find peer for inbound.\n")); 1190 sadb_set_usetime(inassoc); 1191 return; 1192 } 1193 } else { 1194 outassoc = assoc; 1195 bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi); 1196 mutex_enter(&bucket->isaf_lock); 1197 inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi, 1198 outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr, 1199 outassoc->ipsa_addrfam); 1200 mutex_exit(&bucket->isaf_lock); 1201 if (inassoc == NULL) { 1202 /* Q: Do we wish to set haspeer == B_FALSE? */ 1203 esp0dbg(("esp_set_usetime: " 1204 "can't find peer for outbound.\n")); 1205 sadb_set_usetime(outassoc); 1206 return; 1207 } 1208 } 1209 1210 /* Update usetime on both. */ 1211 sadb_set_usetime(inassoc); 1212 sadb_set_usetime(outassoc); 1213 1214 /* 1215 * REFRELE any peer SA. 1216 * 1217 * Because of the multi-line macro nature of IPSA_REFRELE, keep 1218 * them in { }. 1219 */ 1220 if (inbound) { 1221 IPSA_REFRELE(outassoc); 1222 } else { 1223 IPSA_REFRELE(inassoc); 1224 } 1225 } 1226 1227 /* 1228 * Handle ESP inbound data for IPv4 and IPv6. 1229 * On success returns B_TRUE, on failure returns B_FALSE and frees the 1230 * mblk chain ipsec_in_mp. 1231 */ 1232 ipsec_status_t 1233 esp_inbound(mblk_t *ipsec_in_mp, void *arg) 1234 { 1235 mblk_t *data_mp = ipsec_in_mp->b_cont; 1236 ipsec_in_t *ii = (ipsec_in_t *)ipsec_in_mp->b_rptr; 1237 esph_t *esph = (esph_t *)arg; 1238 ipsa_t *ipsa = ii->ipsec_in_esp_sa; 1239 netstack_t *ns = ii->ipsec_in_ns; 1240 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 1241 ipsec_stack_t *ipss = ns->netstack_ipsec; 1242 1243 /* 1244 * We may wish to check replay in-range-only here as an optimization. 1245 * Include the reality check of ipsa->ipsa_replay > 1246 * ipsa->ipsa_replay_wsize for times when it's the first N packets, 1247 * where N == ipsa->ipsa_replay_wsize. 1248 * 1249 * Another check that may come here later is the "collision" check. 1250 * If legitimate packets flow quickly enough, this won't be a problem, 1251 * but collisions may cause authentication algorithm crunching to 1252 * take place when it doesn't need to. 1253 */ 1254 if (!sadb_replay_peek(ipsa, esph->esph_replay)) { 1255 ESP_BUMP_STAT(espstack, replay_early_failures); 1256 IP_ESP_BUMP_STAT(ipss, in_discards); 1257 /* 1258 * TODO: Extract inbound interface from the IPSEC_IN 1259 * message's ii->ipsec_in_rill_index. 1260 */ 1261 ip_drop_packet(ipsec_in_mp, B_TRUE, NULL, NULL, 1262 DROPPER(ipss, ipds_esp_early_replay), 1263 &espstack->esp_dropper); 1264 return (IPSEC_STATUS_FAILED); 1265 } 1266 1267 /* 1268 * Has this packet already been processed by a hardware 1269 * IPsec accelerator? 1270 */ 1271 if (ii->ipsec_in_accelerated) { 1272 ipsec_status_t rv; 1273 esp3dbg(espstack, 1274 ("esp_inbound: pkt processed by ill=%d isv6=%d\n", 1275 ii->ipsec_in_ill_index, !ii->ipsec_in_v4)); 1276 rv = esp_inbound_accelerated(ipsec_in_mp, 1277 data_mp, ii->ipsec_in_v4, ipsa); 1278 return (rv); 1279 } 1280 ESP_BUMP_STAT(espstack, noaccel); 1281 1282 /* 1283 * Adjust the IP header's payload length to reflect the removal 1284 * of the ICV. 1285 */ 1286 if (!ii->ipsec_in_v4) { 1287 ip6_t *ip6h = (ip6_t *)data_mp->b_rptr; 1288 ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - 1289 ipsa->ipsa_mac_len); 1290 } else { 1291 ipha_t *ipha = (ipha_t *)data_mp->b_rptr; 1292 ipha->ipha_length = htons(ntohs(ipha->ipha_length) - 1293 ipsa->ipsa_mac_len); 1294 } 1295 1296 /* submit the request to the crypto framework */ 1297 return (esp_submit_req_inbound(ipsec_in_mp, ipsa, 1298 (uint8_t *)esph - data_mp->b_rptr)); 1299 } 1300 1301 /* 1302 * Perform the really difficult work of inserting the proposed situation. 1303 * Called while holding the algorithm lock. 1304 */ 1305 static void 1306 esp_insert_prop(sadb_prop_t *prop, ipsacq_t *acqrec, uint_t combs) 1307 { 1308 sadb_comb_t *comb = (sadb_comb_t *)(prop + 1); 1309 ipsec_out_t *io; 1310 ipsec_action_t *ap; 1311 ipsec_prot_t *prot; 1312 netstack_t *ns; 1313 ipsecesp_stack_t *espstack; 1314 ipsec_stack_t *ipss; 1315 1316 io = (ipsec_out_t *)acqrec->ipsacq_mp->b_rptr; 1317 ASSERT(io->ipsec_out_type == IPSEC_OUT); 1318 ns = io->ipsec_out_ns; 1319 espstack = ns->netstack_ipsecesp; 1320 ipss = ns->netstack_ipsec; 1321 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 1322 1323 prop->sadb_prop_exttype = SADB_EXT_PROPOSAL; 1324 prop->sadb_prop_len = SADB_8TO64(sizeof (sadb_prop_t)); 1325 *(uint32_t *)(&prop->sadb_prop_replay) = 0; /* Quick zero-out! */ 1326 1327 prop->sadb_prop_replay = espstack->ipsecesp_replay_size; 1328 1329 /* 1330 * Based upon algorithm properties, and what-not, prioritize 1331 * a proposal. If the IPSEC_OUT message has an algorithm specified, 1332 * use it first and foremost. 1333 * 1334 * For each action in policy list 1335 * Add combination. If I've hit limit, return. 1336 */ 1337 1338 for (ap = acqrec->ipsacq_act; ap != NULL; 1339 ap = ap->ipa_next) { 1340 ipsec_alginfo_t *ealg = NULL; 1341 ipsec_alginfo_t *aalg = NULL; 1342 1343 if (ap->ipa_act.ipa_type != IPSEC_POLICY_APPLY) 1344 continue; 1345 1346 prot = &ap->ipa_act.ipa_apply; 1347 1348 if (!(prot->ipp_use_esp)) 1349 continue; 1350 1351 if (prot->ipp_esp_auth_alg != 0) { 1352 aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH] 1353 [prot->ipp_esp_auth_alg]; 1354 if (aalg == NULL || !ALG_VALID(aalg)) 1355 continue; 1356 } 1357 1358 ASSERT(prot->ipp_encr_alg > 0); 1359 ealg = ipss->ipsec_alglists[IPSEC_ALG_ENCR] 1360 [prot->ipp_encr_alg]; 1361 if (ealg == NULL || !ALG_VALID(ealg)) 1362 continue; 1363 1364 comb->sadb_comb_flags = 0; 1365 comb->sadb_comb_reserved = 0; 1366 comb->sadb_comb_encrypt = ealg->alg_id; 1367 comb->sadb_comb_encrypt_minbits = 1368 MAX(prot->ipp_espe_minbits, ealg->alg_ef_minbits); 1369 comb->sadb_comb_encrypt_maxbits = 1370 MIN(prot->ipp_espe_maxbits, ealg->alg_ef_maxbits); 1371 1372 if (aalg == NULL) { 1373 comb->sadb_comb_auth = 0; 1374 comb->sadb_comb_auth_minbits = 0; 1375 comb->sadb_comb_auth_maxbits = 0; 1376 } else { 1377 comb->sadb_comb_auth = aalg->alg_id; 1378 comb->sadb_comb_auth_minbits = 1379 MAX(prot->ipp_espa_minbits, aalg->alg_ef_minbits); 1380 comb->sadb_comb_auth_maxbits = 1381 MIN(prot->ipp_espa_maxbits, aalg->alg_ef_maxbits); 1382 } 1383 1384 /* 1385 * The following may be based on algorithm 1386 * properties, but in the meantime, we just pick 1387 * some good, sensible numbers. Key mgmt. can 1388 * (and perhaps should) be the place to finalize 1389 * such decisions. 1390 */ 1391 1392 /* 1393 * No limits on allocations, since we really don't 1394 * support that concept currently. 1395 */ 1396 comb->sadb_comb_soft_allocations = 0; 1397 comb->sadb_comb_hard_allocations = 0; 1398 1399 /* 1400 * These may want to come from policy rule.. 1401 */ 1402 comb->sadb_comb_soft_bytes = 1403 espstack->ipsecesp_default_soft_bytes; 1404 comb->sadb_comb_hard_bytes = 1405 espstack->ipsecesp_default_hard_bytes; 1406 comb->sadb_comb_soft_addtime = 1407 espstack->ipsecesp_default_soft_addtime; 1408 comb->sadb_comb_hard_addtime = 1409 espstack->ipsecesp_default_hard_addtime; 1410 comb->sadb_comb_soft_usetime = 1411 espstack->ipsecesp_default_soft_usetime; 1412 comb->sadb_comb_hard_usetime = 1413 espstack->ipsecesp_default_hard_usetime; 1414 1415 prop->sadb_prop_len += SADB_8TO64(sizeof (*comb)); 1416 if (--combs == 0) 1417 break; /* out of space.. */ 1418 comb++; 1419 } 1420 } 1421 1422 /* 1423 * Prepare and actually send the SADB_ACQUIRE message to PF_KEY. 1424 */ 1425 static void 1426 esp_send_acquire(ipsacq_t *acqrec, mblk_t *extended, netstack_t *ns) 1427 { 1428 uint_t combs; 1429 sadb_msg_t *samsg; 1430 sadb_prop_t *prop; 1431 mblk_t *pfkeymp, *msgmp; 1432 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 1433 ipsec_stack_t *ipss = ns->netstack_ipsec; 1434 1435 ESP_BUMP_STAT(espstack, acquire_requests); 1436 1437 if (espstack->esp_pfkey_q == NULL) { 1438 mutex_exit(&acqrec->ipsacq_lock); 1439 return; 1440 } 1441 1442 /* Set up ACQUIRE. */ 1443 pfkeymp = sadb_setup_acquire(acqrec, SADB_SATYPE_ESP, 1444 ns->netstack_ipsec); 1445 if (pfkeymp == NULL) { 1446 esp0dbg(("sadb_setup_acquire failed.\n")); 1447 mutex_exit(&acqrec->ipsacq_lock); 1448 return; 1449 } 1450 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 1451 combs = ipss->ipsec_nalgs[IPSEC_ALG_AUTH] * 1452 ipss->ipsec_nalgs[IPSEC_ALG_ENCR]; 1453 msgmp = pfkeymp->b_cont; 1454 samsg = (sadb_msg_t *)(msgmp->b_rptr); 1455 1456 /* Insert proposal here. */ 1457 1458 prop = (sadb_prop_t *)(((uint64_t *)samsg) + samsg->sadb_msg_len); 1459 esp_insert_prop(prop, acqrec, combs); 1460 samsg->sadb_msg_len += prop->sadb_prop_len; 1461 msgmp->b_wptr += SADB_64TO8(samsg->sadb_msg_len); 1462 1463 mutex_exit(&ipss->ipsec_alg_lock); 1464 1465 /* 1466 * Must mutex_exit() before sending PF_KEY message up, in 1467 * order to avoid recursive mutex_enter() if there are no registered 1468 * listeners. 1469 * 1470 * Once I've sent the message, I'm cool anyway. 1471 */ 1472 mutex_exit(&acqrec->ipsacq_lock); 1473 if (extended != NULL) { 1474 putnext(espstack->esp_pfkey_q, extended); 1475 } 1476 putnext(espstack->esp_pfkey_q, pfkeymp); 1477 } 1478 1479 /* XXX refactor me */ 1480 /* 1481 * Handle the SADB_GETSPI message. Create a larval SA. 1482 */ 1483 static void 1484 esp_getspi(mblk_t *mp, keysock_in_t *ksi, ipsecesp_stack_t *espstack) 1485 { 1486 ipsa_t *newbie, *target; 1487 isaf_t *outbound, *inbound; 1488 int rc, diagnostic; 1489 sadb_sa_t *assoc; 1490 keysock_out_t *kso; 1491 uint32_t newspi; 1492 1493 /* 1494 * Randomly generate a proposed SPI value 1495 */ 1496 if (cl_inet_getspi != NULL) { 1497 cl_inet_getspi(espstack->ipsecesp_netstack->netstack_stackid, 1498 IPPROTO_ESP, (uint8_t *)&newspi, sizeof (uint32_t), NULL); 1499 } else { 1500 (void) random_get_pseudo_bytes((uint8_t *)&newspi, 1501 sizeof (uint32_t)); 1502 } 1503 newbie = sadb_getspi(ksi, newspi, &diagnostic, 1504 espstack->ipsecesp_netstack, IPPROTO_ESP); 1505 1506 if (newbie == NULL) { 1507 sadb_pfkey_error(espstack->esp_pfkey_q, mp, ENOMEM, diagnostic, 1508 ksi->ks_in_serial); 1509 return; 1510 } else if (newbie == (ipsa_t *)-1) { 1511 sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL, diagnostic, 1512 ksi->ks_in_serial); 1513 return; 1514 } 1515 1516 /* 1517 * XXX - We may randomly collide. We really should recover from this. 1518 * Unfortunately, that could require spending way-too-much-time 1519 * in here. For now, let the user retry. 1520 */ 1521 1522 if (newbie->ipsa_addrfam == AF_INET6) { 1523 outbound = OUTBOUND_BUCKET_V6(&espstack->esp_sadb.s_v6, 1524 *(uint32_t *)(newbie->ipsa_dstaddr)); 1525 inbound = INBOUND_BUCKET(&espstack->esp_sadb.s_v6, 1526 newbie->ipsa_spi); 1527 } else { 1528 ASSERT(newbie->ipsa_addrfam == AF_INET); 1529 outbound = OUTBOUND_BUCKET_V4(&espstack->esp_sadb.s_v4, 1530 *(uint32_t *)(newbie->ipsa_dstaddr)); 1531 inbound = INBOUND_BUCKET(&espstack->esp_sadb.s_v4, 1532 newbie->ipsa_spi); 1533 } 1534 1535 mutex_enter(&outbound->isaf_lock); 1536 mutex_enter(&inbound->isaf_lock); 1537 1538 /* 1539 * Check for collisions (i.e. did sadb_getspi() return with something 1540 * that already exists?). 1541 * 1542 * Try outbound first. Even though SADB_GETSPI is traditionally 1543 * for inbound SAs, you never know what a user might do. 1544 */ 1545 target = ipsec_getassocbyspi(outbound, newbie->ipsa_spi, 1546 newbie->ipsa_srcaddr, newbie->ipsa_dstaddr, newbie->ipsa_addrfam); 1547 if (target == NULL) { 1548 target = ipsec_getassocbyspi(inbound, newbie->ipsa_spi, 1549 newbie->ipsa_srcaddr, newbie->ipsa_dstaddr, 1550 newbie->ipsa_addrfam); 1551 } 1552 1553 /* 1554 * I don't have collisions elsewhere! 1555 * (Nor will I because I'm still holding inbound/outbound locks.) 1556 */ 1557 1558 if (target != NULL) { 1559 rc = EEXIST; 1560 IPSA_REFRELE(target); 1561 } else { 1562 /* 1563 * sadb_insertassoc() also checks for collisions, so 1564 * if there's a colliding entry, rc will be set 1565 * to EEXIST. 1566 */ 1567 rc = sadb_insertassoc(newbie, inbound); 1568 newbie->ipsa_hardexpiretime = gethrestime_sec(); 1569 newbie->ipsa_hardexpiretime += 1570 espstack->ipsecesp_larval_timeout; 1571 } 1572 1573 /* 1574 * Can exit outbound mutex. Hold inbound until we're done 1575 * with newbie. 1576 */ 1577 mutex_exit(&outbound->isaf_lock); 1578 1579 if (rc != 0) { 1580 mutex_exit(&inbound->isaf_lock); 1581 IPSA_REFRELE(newbie); 1582 sadb_pfkey_error(espstack->esp_pfkey_q, mp, rc, 1583 SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial); 1584 return; 1585 } 1586 1587 1588 /* Can write here because I'm still holding the bucket lock. */ 1589 newbie->ipsa_type = SADB_SATYPE_ESP; 1590 1591 /* 1592 * Construct successful return message. We have one thing going 1593 * for us in PF_KEY v2. That's the fact that 1594 * sizeof (sadb_spirange_t) == sizeof (sadb_sa_t) 1595 */ 1596 assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SPIRANGE]; 1597 assoc->sadb_sa_exttype = SADB_EXT_SA; 1598 assoc->sadb_sa_spi = newbie->ipsa_spi; 1599 *((uint64_t *)(&assoc->sadb_sa_replay)) = 0; 1600 mutex_exit(&inbound->isaf_lock); 1601 1602 /* Convert KEYSOCK_IN to KEYSOCK_OUT. */ 1603 kso = (keysock_out_t *)ksi; 1604 kso->ks_out_len = sizeof (*kso); 1605 kso->ks_out_serial = ksi->ks_in_serial; 1606 kso->ks_out_type = KEYSOCK_OUT; 1607 1608 /* 1609 * Can safely putnext() to esp_pfkey_q, because this is a turnaround 1610 * from the esp_pfkey_q. 1611 */ 1612 putnext(espstack->esp_pfkey_q, mp); 1613 } 1614 1615 /* 1616 * Insert the ESP header into a packet. Duplicate an mblk, and insert a newly 1617 * allocated mblk with the ESP header in between the two. 1618 */ 1619 static boolean_t 1620 esp_insert_esp(mblk_t *mp, mblk_t *esp_mp, uint_t divpoint, 1621 ipsecesp_stack_t *espstack) 1622 { 1623 mblk_t *split_mp = mp; 1624 uint_t wheretodiv = divpoint; 1625 1626 while ((split_mp->b_wptr - split_mp->b_rptr) < wheretodiv) { 1627 wheretodiv -= (split_mp->b_wptr - split_mp->b_rptr); 1628 split_mp = split_mp->b_cont; 1629 ASSERT(split_mp != NULL); 1630 } 1631 1632 if (split_mp->b_wptr - split_mp->b_rptr != wheretodiv) { 1633 mblk_t *scratch; 1634 1635 /* "scratch" is the 2nd half, split_mp is the first. */ 1636 scratch = dupb(split_mp); 1637 if (scratch == NULL) { 1638 esp1dbg(espstack, 1639 ("esp_insert_esp: can't allocate scratch.\n")); 1640 return (B_FALSE); 1641 } 1642 /* NOTE: dupb() doesn't set b_cont appropriately. */ 1643 scratch->b_cont = split_mp->b_cont; 1644 scratch->b_rptr += wheretodiv; 1645 split_mp->b_wptr = split_mp->b_rptr + wheretodiv; 1646 split_mp->b_cont = scratch; 1647 } 1648 /* 1649 * At this point, split_mp is exactly "wheretodiv" bytes long, and 1650 * holds the end of the pre-ESP part of the datagram. 1651 */ 1652 esp_mp->b_cont = split_mp->b_cont; 1653 split_mp->b_cont = esp_mp; 1654 1655 return (B_TRUE); 1656 } 1657 1658 /* 1659 * Section 7 of RFC 3947 says: 1660 * 1661 * 7. Recovering from the Expiring NAT Mappings 1662 * 1663 * There are cases where NAT box decides to remove mappings that are still 1664 * alive (for example, when the keepalive interval is too long, or when the 1665 * NAT box is rebooted). To recover from this, ends that are NOT behind 1666 * NAT SHOULD use the last valid UDP encapsulated IKE or IPsec packet from 1667 * the other end to determine which IP and port addresses should be used. 1668 * The host behind dynamic NAT MUST NOT do this, as otherwise it opens a 1669 * DoS attack possibility because the IP address or port of the other host 1670 * will not change (it is not behind NAT). 1671 * 1672 * Keepalives cannot be used for these purposes, as they are not 1673 * authenticated, but any IKE authenticated IKE packet or ESP packet can be 1674 * used to detect whether the IP address or the port has changed. 1675 * 1676 * The following function will check an SA and its explicitly-set pair to see 1677 * if the NAT-T remote port matches the received packet (which must have 1678 * passed ESP authentication, see esp_in_done() for the caller context). If 1679 * there is a mismatch, the SAs are updated. It is not important if we race 1680 * with a transmitting thread, as if there is a transmitting thread, it will 1681 * merely emit a packet that will most-likely be dropped. 1682 * 1683 * "ports" are ordered src,dst, and assoc is an inbound SA, where src should 1684 * match ipsa_remote_nat_port and dst should match ipsa_local_nat_port. 1685 */ 1686 #ifdef _LITTLE_ENDIAN 1687 #define FIRST_16(x) ((x) & 0xFFFF) 1688 #define NEXT_16(x) (((x) >> 16) & 0xFFFF) 1689 #else 1690 #define FIRST_16(x) (((x) >> 16) & 0xFFFF) 1691 #define NEXT_16(x) ((x) & 0xFFFF) 1692 #endif 1693 static void 1694 esp_port_freshness(uint32_t ports, ipsa_t *assoc) 1695 { 1696 uint16_t remote = FIRST_16(ports); 1697 uint16_t local = NEXT_16(ports); 1698 ipsa_t *outbound_peer; 1699 isaf_t *bucket; 1700 ipsecesp_stack_t *espstack = assoc->ipsa_netstack->netstack_ipsecesp; 1701 1702 /* We found a conn_t, therefore local != 0. */ 1703 ASSERT(local != 0); 1704 /* Assume an IPv4 SA. */ 1705 ASSERT(assoc->ipsa_addrfam == AF_INET); 1706 1707 /* 1708 * On-the-wire rport == 0 means something's very wrong. 1709 * An unpaired SA is also useless to us. 1710 * If we are behind the NAT, don't bother. 1711 * A zero local NAT port defaults to 4500, so check that too. 1712 * And, of course, if the ports already match, we don't need to 1713 * bother. 1714 */ 1715 if (remote == 0 || assoc->ipsa_otherspi == 0 || 1716 (assoc->ipsa_flags & IPSA_F_BEHIND_NAT) || 1717 (assoc->ipsa_remote_nat_port == 0 && 1718 remote == htons(IPPORT_IKE_NATT)) || 1719 remote == assoc->ipsa_remote_nat_port) 1720 return; 1721 1722 /* Try and snag the peer. NOTE: Assume IPv4 for now. */ 1723 bucket = OUTBOUND_BUCKET_V4(&(espstack->esp_sadb.s_v4), 1724 assoc->ipsa_srcaddr[0]); 1725 mutex_enter(&bucket->isaf_lock); 1726 outbound_peer = ipsec_getassocbyspi(bucket, assoc->ipsa_otherspi, 1727 assoc->ipsa_dstaddr, assoc->ipsa_srcaddr, AF_INET); 1728 mutex_exit(&bucket->isaf_lock); 1729 1730 /* We probably lost a race to a deleting or expiring thread. */ 1731 if (outbound_peer == NULL) 1732 return; 1733 1734 /* 1735 * Hold the mutexes for both SAs so we don't race another inbound 1736 * thread. A lock-entry order shouldn't matter, since all other 1737 * per-ipsa locks are individually held-then-released. 1738 * 1739 * Luckily, this has nothing to do with the remote-NAT address, 1740 * so we don't have to re-scribble the cached-checksum differential. 1741 */ 1742 mutex_enter(&outbound_peer->ipsa_lock); 1743 mutex_enter(&assoc->ipsa_lock); 1744 outbound_peer->ipsa_remote_nat_port = assoc->ipsa_remote_nat_port = 1745 remote; 1746 mutex_exit(&assoc->ipsa_lock); 1747 mutex_exit(&outbound_peer->ipsa_lock); 1748 IPSA_REFRELE(outbound_peer); 1749 ESP_BUMP_STAT(espstack, sa_port_renumbers); 1750 } 1751 /* 1752 * Finish processing of an inbound ESP packet after processing by the 1753 * crypto framework. 1754 * - Remove the ESP header. 1755 * - Send packet back to IP. 1756 * If authentication was performed on the packet, this function is called 1757 * only if the authentication succeeded. 1758 * On success returns B_TRUE, on failure returns B_FALSE and frees the 1759 * mblk chain ipsec_in_mp. 1760 */ 1761 static ipsec_status_t 1762 esp_in_done(mblk_t *ipsec_in_mp) 1763 { 1764 ipsec_in_t *ii = (ipsec_in_t *)ipsec_in_mp->b_rptr; 1765 mblk_t *data_mp; 1766 ipsa_t *assoc; 1767 uint_t espstart; 1768 uint32_t ivlen = 0; 1769 uint_t processed_len; 1770 esph_t *esph; 1771 kstat_named_t *counter; 1772 boolean_t is_natt; 1773 netstack_t *ns = ii->ipsec_in_ns; 1774 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 1775 ipsec_stack_t *ipss = ns->netstack_ipsec; 1776 1777 assoc = ii->ipsec_in_esp_sa; 1778 ASSERT(assoc != NULL); 1779 1780 is_natt = ((assoc->ipsa_flags & IPSA_F_NATT) != 0); 1781 1782 /* get the pointer to the ESP header */ 1783 if (assoc->ipsa_encr_alg == SADB_EALG_NULL) { 1784 /* authentication-only ESP */ 1785 espstart = ii->ipsec_in_crypto_data.cd_offset; 1786 processed_len = ii->ipsec_in_crypto_data.cd_length; 1787 } else { 1788 /* encryption present */ 1789 ivlen = assoc->ipsa_iv_len; 1790 if (assoc->ipsa_auth_alg == SADB_AALG_NONE) { 1791 /* encryption-only ESP */ 1792 espstart = ii->ipsec_in_crypto_data.cd_offset - 1793 sizeof (esph_t) - assoc->ipsa_iv_len; 1794 processed_len = ii->ipsec_in_crypto_data.cd_length + 1795 ivlen; 1796 } else { 1797 /* encryption with authentication */ 1798 espstart = ii->ipsec_in_crypto_dual_data.dd_offset1; 1799 processed_len = ii->ipsec_in_crypto_dual_data.dd_len2 + 1800 ivlen; 1801 } 1802 } 1803 1804 data_mp = ipsec_in_mp->b_cont; 1805 esph = (esph_t *)(data_mp->b_rptr + espstart); 1806 1807 if (assoc->ipsa_auth_alg != IPSA_AALG_NONE || 1808 (assoc->ipsa_flags & IPSA_F_COMBINED)) { 1809 /* 1810 * Authentication passed if we reach this point. 1811 * Packets with authentication will have the ICV 1812 * after the crypto data. Adjust b_wptr before 1813 * making padlen checks. 1814 */ 1815 ESP_BUMP_STAT(espstack, good_auth); 1816 data_mp->b_wptr -= assoc->ipsa_mac_len; 1817 1818 /* 1819 * Check replay window here! 1820 * For right now, assume keysock will set the replay window 1821 * size to zero for SAs that have an unspecified sender. 1822 * This may change... 1823 */ 1824 1825 if (!sadb_replay_check(assoc, esph->esph_replay)) { 1826 /* 1827 * Log the event. As of now we print out an event. 1828 * Do not print the replay failure number, or else 1829 * syslog cannot collate the error messages. Printing 1830 * the replay number that failed opens a denial-of- 1831 * service attack. 1832 */ 1833 ipsec_assocfailure(info.mi_idnum, 0, 0, 1834 SL_ERROR | SL_WARN, 1835 "Replay failed for ESP spi 0x%x, dst %s.\n", 1836 assoc->ipsa_spi, assoc->ipsa_dstaddr, 1837 assoc->ipsa_addrfam, espstack->ipsecesp_netstack); 1838 ESP_BUMP_STAT(espstack, replay_failures); 1839 counter = DROPPER(ipss, ipds_esp_replay); 1840 goto drop_and_bail; 1841 } 1842 1843 if (is_natt) 1844 esp_port_freshness(ii->ipsec_in_esp_udp_ports, assoc); 1845 } 1846 1847 esp_set_usetime(assoc, B_TRUE); 1848 1849 if (!esp_age_bytes(assoc, processed_len, B_TRUE)) { 1850 /* The ipsa has hit hard expiration, LOG and AUDIT. */ 1851 ipsec_assocfailure(info.mi_idnum, 0, 0, 1852 SL_ERROR | SL_WARN, 1853 "ESP association 0x%x, dst %s had bytes expire.\n", 1854 assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam, 1855 espstack->ipsecesp_netstack); 1856 ESP_BUMP_STAT(espstack, bytes_expired); 1857 counter = DROPPER(ipss, ipds_esp_bytes_expire); 1858 goto drop_and_bail; 1859 } 1860 1861 /* 1862 * Remove ESP header and padding from packet. I hope the compiler 1863 * spews "branch, predict taken" code for this. 1864 */ 1865 1866 if (esp_strip_header(data_mp, ii->ipsec_in_v4, ivlen, &counter, 1867 espstack)) { 1868 1869 if (is_system_labeled()) { 1870 cred_t *cr = assoc->ipsa_cred; 1871 1872 if (cr != NULL) { 1873 mblk_setcred(data_mp, cr, NOPID); 1874 } 1875 1876 } 1877 if (is_natt) 1878 return (esp_fix_natt_checksums(data_mp, assoc)); 1879 1880 ASSERT(!is_system_labeled() || (DB_CRED(data_mp) != NULL)); 1881 1882 if (assoc->ipsa_state == IPSA_STATE_IDLE) { 1883 /* 1884 * Cluster buffering case. Tell caller that we're 1885 * handling the packet. 1886 */ 1887 sadb_buf_pkt(assoc, ipsec_in_mp, ns); 1888 return (IPSEC_STATUS_PENDING); 1889 } 1890 1891 return (IPSEC_STATUS_SUCCESS); 1892 } 1893 1894 esp1dbg(espstack, ("esp_in_done: esp_strip_header() failed\n")); 1895 drop_and_bail: 1896 IP_ESP_BUMP_STAT(ipss, in_discards); 1897 /* 1898 * TODO: Extract inbound interface from the IPSEC_IN message's 1899 * ii->ipsec_in_rill_index. 1900 */ 1901 ip_drop_packet(ipsec_in_mp, B_TRUE, NULL, NULL, counter, 1902 &espstack->esp_dropper); 1903 return (IPSEC_STATUS_FAILED); 1904 } 1905 1906 /* 1907 * Called upon failing the inbound ICV check. The message passed as 1908 * argument is freed. 1909 */ 1910 static void 1911 esp_log_bad_auth(mblk_t *ipsec_in) 1912 { 1913 ipsec_in_t *ii = (ipsec_in_t *)ipsec_in->b_rptr; 1914 ipsa_t *assoc = ii->ipsec_in_esp_sa; 1915 netstack_t *ns = ii->ipsec_in_ns; 1916 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 1917 ipsec_stack_t *ipss = ns->netstack_ipsec; 1918 1919 /* 1920 * Log the event. Don't print to the console, block 1921 * potential denial-of-service attack. 1922 */ 1923 ESP_BUMP_STAT(espstack, bad_auth); 1924 1925 ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN, 1926 "ESP Authentication failed for spi 0x%x, dst %s.\n", 1927 assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam, 1928 espstack->ipsecesp_netstack); 1929 1930 IP_ESP_BUMP_STAT(ipss, in_discards); 1931 /* 1932 * TODO: Extract inbound interface from the IPSEC_IN 1933 * message's ii->ipsec_in_rill_index. 1934 */ 1935 ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL, 1936 DROPPER(ipss, ipds_esp_bad_auth), 1937 &espstack->esp_dropper); 1938 } 1939 1940 1941 /* 1942 * Invoked for outbound packets after ESP processing. If the packet 1943 * also requires AH, performs the AH SA selection and AH processing. 1944 * Returns B_TRUE if the AH processing was not needed or if it was 1945 * performed successfully. Returns B_FALSE and consumes the passed mblk 1946 * if AH processing was required but could not be performed. 1947 */ 1948 static boolean_t 1949 esp_do_outbound_ah(mblk_t *ipsec_mp) 1950 { 1951 ipsec_out_t *io = (ipsec_out_t *)ipsec_mp->b_rptr; 1952 ipsec_status_t ipsec_rc; 1953 ipsec_action_t *ap; 1954 1955 ap = io->ipsec_out_act; 1956 if (ap == NULL) { 1957 ipsec_policy_t *pp = io->ipsec_out_policy; 1958 ap = pp->ipsp_act; 1959 } 1960 1961 if (!ap->ipa_want_ah) 1962 return (B_TRUE); 1963 1964 ASSERT(io->ipsec_out_ah_done == B_FALSE); 1965 1966 if (io->ipsec_out_ah_sa == NULL) { 1967 if (!ipsec_outbound_sa(ipsec_mp, IPPROTO_AH)) { 1968 sadb_acquire(ipsec_mp, io, B_TRUE, B_FALSE); 1969 return (B_FALSE); 1970 } 1971 } 1972 ASSERT(io->ipsec_out_ah_sa != NULL); 1973 1974 io->ipsec_out_ah_done = B_TRUE; 1975 ipsec_rc = io->ipsec_out_ah_sa->ipsa_output_func(ipsec_mp); 1976 return (ipsec_rc == IPSEC_STATUS_SUCCESS); 1977 } 1978 1979 1980 /* 1981 * Kernel crypto framework callback invoked after completion of async 1982 * crypto requests. 1983 */ 1984 static void 1985 esp_kcf_callback(void *arg, int status) 1986 { 1987 mblk_t *ipsec_mp = (mblk_t *)arg; 1988 ipsec_in_t *ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1989 ipsec_out_t *io = (ipsec_out_t *)ipsec_mp->b_rptr; 1990 boolean_t is_inbound = (ii->ipsec_in_type == IPSEC_IN); 1991 netstackid_t stackid; 1992 netstack_t *ns, *ns_arg; 1993 ipsecesp_stack_t *espstack; 1994 ipsec_stack_t *ipss; 1995 1996 ASSERT(ipsec_mp->b_cont != NULL); 1997 1998 if (is_inbound) { 1999 stackid = ii->ipsec_in_stackid; 2000 ns_arg = ii->ipsec_in_ns; 2001 } else { 2002 stackid = io->ipsec_out_stackid; 2003 ns_arg = io->ipsec_out_ns; 2004 } 2005 2006 /* 2007 * Verify that the netstack is still around; could have vanished 2008 * while kEf was doing its work. 2009 */ 2010 ns = netstack_find_by_stackid(stackid); 2011 if (ns == NULL || ns != ns_arg) { 2012 /* Disappeared on us */ 2013 if (ns != NULL) 2014 netstack_rele(ns); 2015 freemsg(ipsec_mp); 2016 return; 2017 } 2018 2019 espstack = ns->netstack_ipsecesp; 2020 ipss = ns->netstack_ipsec; 2021 2022 if (status == CRYPTO_SUCCESS) { 2023 if (is_inbound) { 2024 if (esp_in_done(ipsec_mp) != IPSEC_STATUS_SUCCESS) { 2025 netstack_rele(ns); 2026 return; 2027 } 2028 /* finish IPsec processing */ 2029 ip_fanout_proto_again(ipsec_mp, NULL, NULL, NULL); 2030 } else { 2031 /* 2032 * If a ICV was computed, it was stored by the 2033 * crypto framework at the end of the packet. 2034 */ 2035 ipha_t *ipha = (ipha_t *)ipsec_mp->b_cont->b_rptr; 2036 2037 esp_set_usetime(io->ipsec_out_esp_sa, B_FALSE); 2038 /* NAT-T packet. */ 2039 if (ipha->ipha_protocol == IPPROTO_UDP) 2040 esp_prepare_udp(ns, ipsec_mp->b_cont, ipha); 2041 2042 /* do AH processing if needed */ 2043 if (!esp_do_outbound_ah(ipsec_mp)) { 2044 netstack_rele(ns); 2045 return; 2046 } 2047 /* finish IPsec processing */ 2048 if (IPH_HDR_VERSION(ipha) == IP_VERSION) { 2049 ip_wput_ipsec_out(NULL, ipsec_mp, ipha, NULL, 2050 NULL); 2051 } else { 2052 ip6_t *ip6h = (ip6_t *)ipha; 2053 ip_wput_ipsec_out_v6(NULL, ipsec_mp, ip6h, 2054 NULL, NULL); 2055 } 2056 } 2057 2058 } else if (status == CRYPTO_INVALID_MAC) { 2059 esp_log_bad_auth(ipsec_mp); 2060 2061 } else { 2062 esp1dbg(espstack, 2063 ("esp_kcf_callback: crypto failed with 0x%x\n", 2064 status)); 2065 ESP_BUMP_STAT(espstack, crypto_failures); 2066 if (is_inbound) 2067 IP_ESP_BUMP_STAT(ipss, in_discards); 2068 else 2069 ESP_BUMP_STAT(espstack, out_discards); 2070 ip_drop_packet(ipsec_mp, is_inbound, NULL, NULL, 2071 DROPPER(ipss, ipds_esp_crypto_failed), 2072 &espstack->esp_dropper); 2073 } 2074 netstack_rele(ns); 2075 } 2076 2077 /* 2078 * Invoked on crypto framework failure during inbound and outbound processing. 2079 */ 2080 static void 2081 esp_crypto_failed(mblk_t *mp, boolean_t is_inbound, int kef_rc, 2082 ipsecesp_stack_t *espstack) 2083 { 2084 ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec; 2085 2086 esp1dbg(espstack, ("crypto failed for %s ESP with 0x%x\n", 2087 is_inbound ? "inbound" : "outbound", kef_rc)); 2088 ip_drop_packet(mp, is_inbound, NULL, NULL, 2089 DROPPER(ipss, ipds_esp_crypto_failed), 2090 &espstack->esp_dropper); 2091 ESP_BUMP_STAT(espstack, crypto_failures); 2092 if (is_inbound) 2093 IP_ESP_BUMP_STAT(ipss, in_discards); 2094 else 2095 ESP_BUMP_STAT(espstack, out_discards); 2096 } 2097 2098 #define ESP_INIT_CALLREQ(_cr) { \ 2099 (_cr)->cr_flag = CRYPTO_SKIP_REQID|CRYPTO_RESTRICTED; \ 2100 (_cr)->cr_callback_arg = ipsec_mp; \ 2101 (_cr)->cr_callback_func = esp_kcf_callback; \ 2102 } 2103 2104 #define ESP_INIT_CRYPTO_MAC(mac, icvlen, icvbuf) { \ 2105 (mac)->cd_format = CRYPTO_DATA_RAW; \ 2106 (mac)->cd_offset = 0; \ 2107 (mac)->cd_length = icvlen; \ 2108 (mac)->cd_raw.iov_base = (char *)icvbuf; \ 2109 (mac)->cd_raw.iov_len = icvlen; \ 2110 } 2111 2112 #define ESP_INIT_CRYPTO_DATA(data, mp, off, len) { \ 2113 if (MBLKL(mp) >= (len) + (off)) { \ 2114 (data)->cd_format = CRYPTO_DATA_RAW; \ 2115 (data)->cd_raw.iov_base = (char *)(mp)->b_rptr; \ 2116 (data)->cd_raw.iov_len = MBLKL(mp); \ 2117 (data)->cd_offset = off; \ 2118 } else { \ 2119 (data)->cd_format = CRYPTO_DATA_MBLK; \ 2120 (data)->cd_mp = mp; \ 2121 (data)->cd_offset = off; \ 2122 } \ 2123 (data)->cd_length = len; \ 2124 } 2125 2126 #define ESP_INIT_CRYPTO_DUAL_DATA(data, mp, off1, len1, off2, len2) { \ 2127 (data)->dd_format = CRYPTO_DATA_MBLK; \ 2128 (data)->dd_mp = mp; \ 2129 (data)->dd_len1 = len1; \ 2130 (data)->dd_offset1 = off1; \ 2131 (data)->dd_len2 = len2; \ 2132 (data)->dd_offset2 = off2; \ 2133 } 2134 2135 static ipsec_status_t 2136 esp_submit_req_inbound(mblk_t *ipsec_mp, ipsa_t *assoc, uint_t esph_offset) 2137 { 2138 ipsec_in_t *ii = (ipsec_in_t *)ipsec_mp->b_rptr; 2139 boolean_t do_auth; 2140 uint_t auth_offset, msg_len, auth_len; 2141 crypto_call_req_t call_req; 2142 mblk_t *esp_mp; 2143 esph_t *esph_ptr; 2144 int kef_rc = CRYPTO_FAILED; 2145 uint_t icv_len = assoc->ipsa_mac_len; 2146 crypto_ctx_template_t auth_ctx_tmpl; 2147 boolean_t do_encr; 2148 uint_t encr_offset, encr_len; 2149 uint_t iv_len = assoc->ipsa_iv_len; 2150 crypto_ctx_template_t encr_ctx_tmpl; 2151 netstack_t *ns = ii->ipsec_in_ns; 2152 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 2153 ipsec_stack_t *ipss = ns->netstack_ipsec; 2154 uchar_t *iv_ptr; 2155 2156 ASSERT(ii->ipsec_in_type == IPSEC_IN); 2157 2158 /* 2159 * In case kEF queues and calls back, keep netstackid_t for 2160 * verification that the IP instance is still around in 2161 * esp_kcf_callback(). 2162 */ 2163 ASSERT(ii->ipsec_in_stackid == ns->netstack_stackid); 2164 2165 do_auth = assoc->ipsa_auth_alg != SADB_AALG_NONE; 2166 do_encr = assoc->ipsa_encr_alg != SADB_EALG_NULL; 2167 2168 /* 2169 * An inbound packet is of the form: 2170 * IPSEC_IN -> [IP,options,ESP,IV,data,ICV,pad] 2171 */ 2172 esp_mp = ipsec_mp->b_cont; 2173 esph_ptr = (esph_t *)(esp_mp->b_rptr + esph_offset); 2174 iv_ptr = (uchar_t *)(esph_ptr + 1); 2175 /* Packet length starting at IP header ending after ESP ICV. */ 2176 msg_len = MBLKL(esp_mp); 2177 2178 encr_offset = esph_offset + sizeof (esph_t) + iv_len; 2179 encr_len = msg_len - encr_offset; 2180 2181 ESP_INIT_CALLREQ(&call_req); 2182 2183 /* 2184 * Counter mode algs need a nonce. This is setup in sadb_common_add(). 2185 * If for some reason we are using a SA which does not have a nonce 2186 * then we must fail here. 2187 */ 2188 if ((assoc->ipsa_flags & IPSA_F_COUNTERMODE) && 2189 (assoc->ipsa_nonce == NULL)) { 2190 ip_drop_packet(ipsec_mp, B_FALSE, NULL, NULL, 2191 DROPPER(ipss, ipds_esp_nomem), &espstack->esp_dropper); 2192 return (IPSEC_STATUS_FAILED); 2193 } 2194 2195 if (do_auth) { 2196 /* force asynchronous processing? */ 2197 if (ipss->ipsec_algs_exec_mode[IPSEC_ALG_AUTH] == 2198 IPSEC_ALGS_EXEC_ASYNC) 2199 call_req.cr_flag |= CRYPTO_ALWAYS_QUEUE; 2200 2201 /* authentication context template */ 2202 IPSEC_CTX_TMPL(assoc, ipsa_authtmpl, IPSEC_ALG_AUTH, 2203 auth_ctx_tmpl); 2204 2205 /* ICV to be verified */ 2206 ESP_INIT_CRYPTO_MAC(&ii->ipsec_in_crypto_mac, 2207 icv_len, esp_mp->b_wptr - icv_len); 2208 2209 /* authentication starts at the ESP header */ 2210 auth_offset = esph_offset; 2211 auth_len = msg_len - auth_offset - icv_len; 2212 if (!do_encr) { 2213 /* authentication only */ 2214 /* initialize input data argument */ 2215 ESP_INIT_CRYPTO_DATA(&ii->ipsec_in_crypto_data, 2216 esp_mp, auth_offset, auth_len); 2217 2218 /* call the crypto framework */ 2219 kef_rc = crypto_mac_verify(&assoc->ipsa_amech, 2220 &ii->ipsec_in_crypto_data, 2221 &assoc->ipsa_kcfauthkey, auth_ctx_tmpl, 2222 &ii->ipsec_in_crypto_mac, &call_req); 2223 } 2224 } 2225 2226 if (do_encr) { 2227 /* force asynchronous processing? */ 2228 if (ipss->ipsec_algs_exec_mode[IPSEC_ALG_ENCR] == 2229 IPSEC_ALGS_EXEC_ASYNC) 2230 call_req.cr_flag |= CRYPTO_ALWAYS_QUEUE; 2231 2232 /* encryption template */ 2233 IPSEC_CTX_TMPL(assoc, ipsa_encrtmpl, IPSEC_ALG_ENCR, 2234 encr_ctx_tmpl); 2235 2236 /* Call the nonce update function. Also passes in IV */ 2237 (assoc->ipsa_noncefunc)(assoc, (uchar_t *)esph_ptr, encr_len, 2238 iv_ptr, &ii->ipsec_in_cmm, &ii->ipsec_in_crypto_data); 2239 2240 if (!do_auth) { 2241 /* decryption only */ 2242 /* initialize input data argument */ 2243 ESP_INIT_CRYPTO_DATA(&ii->ipsec_in_crypto_data, 2244 esp_mp, encr_offset, encr_len); 2245 2246 /* call the crypto framework */ 2247 kef_rc = crypto_decrypt((crypto_mechanism_t *) 2248 &ii->ipsec_in_cmm, &ii->ipsec_in_crypto_data, 2249 &assoc->ipsa_kcfencrkey, encr_ctx_tmpl, 2250 NULL, &call_req); 2251 } 2252 } 2253 2254 if (do_auth && do_encr) { 2255 /* dual operation */ 2256 /* initialize input data argument */ 2257 ESP_INIT_CRYPTO_DUAL_DATA(&ii->ipsec_in_crypto_dual_data, 2258 esp_mp, auth_offset, auth_len, 2259 encr_offset, encr_len - icv_len); 2260 2261 /* specify IV */ 2262 ii->ipsec_in_crypto_dual_data.dd_miscdata = (char *)iv_ptr; 2263 2264 /* call the framework */ 2265 kef_rc = crypto_mac_verify_decrypt(&assoc->ipsa_amech, 2266 &assoc->ipsa_emech, &ii->ipsec_in_crypto_dual_data, 2267 &assoc->ipsa_kcfauthkey, &assoc->ipsa_kcfencrkey, 2268 auth_ctx_tmpl, encr_ctx_tmpl, &ii->ipsec_in_crypto_mac, 2269 NULL, &call_req); 2270 } 2271 2272 switch (kef_rc) { 2273 case CRYPTO_SUCCESS: 2274 ESP_BUMP_STAT(espstack, crypto_sync); 2275 return (esp_in_done(ipsec_mp)); 2276 case CRYPTO_QUEUED: 2277 /* esp_kcf_callback() will be invoked on completion */ 2278 ESP_BUMP_STAT(espstack, crypto_async); 2279 return (IPSEC_STATUS_PENDING); 2280 case CRYPTO_INVALID_MAC: 2281 ESP_BUMP_STAT(espstack, crypto_sync); 2282 esp_log_bad_auth(ipsec_mp); 2283 return (IPSEC_STATUS_FAILED); 2284 } 2285 2286 esp_crypto_failed(ipsec_mp, B_TRUE, kef_rc, espstack); 2287 return (IPSEC_STATUS_FAILED); 2288 } 2289 2290 /* 2291 * Compute the IP and UDP checksums -- common code for both keepalives and 2292 * actual ESP-in-UDP packets. Be flexible with multiple mblks because ESP 2293 * uses mblk-insertion to insert the UDP header. 2294 * TODO - If there is an easy way to prep a packet for HW checksums, make 2295 * it happen here. 2296 */ 2297 static void 2298 esp_prepare_udp(netstack_t *ns, mblk_t *mp, ipha_t *ipha) 2299 { 2300 int offset; 2301 uint32_t cksum; 2302 uint16_t *arr; 2303 mblk_t *udpmp = mp; 2304 uint_t hlen = IPH_HDR_LENGTH(ipha); 2305 2306 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 2307 2308 ipha->ipha_hdr_checksum = 0; 2309 ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 2310 2311 if (ns->netstack_udp->us_do_checksum) { 2312 ASSERT(MBLKL(udpmp) >= sizeof (udpha_t)); 2313 /* arr points to the IP header. */ 2314 arr = (uint16_t *)ipha; 2315 IP_STAT(ns->netstack_ip, ip_out_sw_cksum); 2316 IP_STAT_UPDATE(ns->netstack_ip, ip_udp_out_sw_cksum_bytes, 2317 ntohs(htons(ipha->ipha_length) - hlen)); 2318 /* arr[6-9] are the IP addresses. */ 2319 cksum = IP_UDP_CSUM_COMP + arr[6] + arr[7] + arr[8] + arr[9] + 2320 ntohs(htons(ipha->ipha_length) - hlen); 2321 cksum = IP_CSUM(mp, hlen, cksum); 2322 offset = hlen + UDP_CHECKSUM_OFFSET; 2323 while (offset >= MBLKL(udpmp)) { 2324 offset -= MBLKL(udpmp); 2325 udpmp = udpmp->b_cont; 2326 } 2327 /* arr points to the UDP header's checksum field. */ 2328 arr = (uint16_t *)(udpmp->b_rptr + offset); 2329 *arr = cksum; 2330 } 2331 } 2332 2333 /* 2334 * taskq handler so we can send the NAT-T keepalive on a separate thread. 2335 */ 2336 static void 2337 actually_send_keepalive(void *arg) 2338 { 2339 mblk_t *ipsec_mp = (mblk_t *)arg; 2340 ipsec_out_t *io = (ipsec_out_t *)ipsec_mp->b_rptr; 2341 ipha_t *ipha; 2342 netstack_t *ns; 2343 2344 ASSERT(DB_TYPE(ipsec_mp) == M_CTL); 2345 ASSERT(io->ipsec_out_type == IPSEC_OUT); 2346 ASSERT(ipsec_mp->b_cont != NULL); 2347 ASSERT(DB_TYPE(ipsec_mp->b_cont) == M_DATA); 2348 2349 ns = netstack_find_by_stackid(io->ipsec_out_stackid); 2350 if (ns == NULL || ns != io->ipsec_out_ns) { 2351 /* Just freemsg(). */ 2352 if (ns != NULL) 2353 netstack_rele(ns); 2354 freemsg(ipsec_mp); 2355 return; 2356 } 2357 2358 ipha = (ipha_t *)ipsec_mp->b_cont->b_rptr; 2359 ip_wput_ipsec_out(NULL, ipsec_mp, ipha, NULL, NULL); 2360 netstack_rele(ns); 2361 } 2362 2363 /* 2364 * Send a one-byte UDP NAT-T keepalive. Construct an IPSEC_OUT too that'll 2365 * get fed into esp_send_udp/ip_wput_ipsec_out. 2366 */ 2367 void 2368 ipsecesp_send_keepalive(ipsa_t *assoc) 2369 { 2370 mblk_t *mp = NULL, *ipsec_mp = NULL; 2371 ipha_t *ipha; 2372 udpha_t *udpha; 2373 ipsec_out_t *io; 2374 2375 ASSERT(MUTEX_NOT_HELD(&assoc->ipsa_lock)); 2376 2377 mp = allocb(sizeof (ipha_t) + sizeof (udpha_t) + 1, BPRI_HI); 2378 if (mp == NULL) 2379 return; 2380 ipha = (ipha_t *)mp->b_rptr; 2381 ipha->ipha_version_and_hdr_length = IP_SIMPLE_HDR_VERSION; 2382 ipha->ipha_type_of_service = 0; 2383 ipha->ipha_length = htons(sizeof (ipha_t) + sizeof (udpha_t) + 1); 2384 /* Use the low-16 of the SPI so we have some clue where it came from. */ 2385 ipha->ipha_ident = *(((uint16_t *)(&assoc->ipsa_spi)) + 1); 2386 ipha->ipha_fragment_offset_and_flags = 0; /* Too small to fragment! */ 2387 ipha->ipha_ttl = 0xFF; 2388 ipha->ipha_protocol = IPPROTO_UDP; 2389 ipha->ipha_hdr_checksum = 0; 2390 ipha->ipha_src = assoc->ipsa_srcaddr[0]; 2391 ipha->ipha_dst = assoc->ipsa_dstaddr[0]; 2392 udpha = (udpha_t *)(ipha + 1); 2393 udpha->uha_src_port = (assoc->ipsa_local_nat_port != 0) ? 2394 assoc->ipsa_local_nat_port : htons(IPPORT_IKE_NATT); 2395 udpha->uha_dst_port = (assoc->ipsa_remote_nat_port != 0) ? 2396 assoc->ipsa_remote_nat_port : htons(IPPORT_IKE_NATT); 2397 udpha->uha_length = htons(sizeof (udpha_t) + 1); 2398 udpha->uha_checksum = 0; 2399 mp->b_wptr = (uint8_t *)(udpha + 1); 2400 *(mp->b_wptr++) = 0xFF; 2401 2402 ipsec_mp = ipsec_alloc_ipsec_out(assoc->ipsa_netstack); 2403 if (ipsec_mp == NULL) { 2404 freeb(mp); 2405 return; 2406 } 2407 ipsec_mp->b_cont = mp; 2408 io = (ipsec_out_t *)ipsec_mp->b_rptr; 2409 io->ipsec_out_zoneid = 2410 netstackid_to_zoneid(assoc->ipsa_netstack->netstack_stackid); 2411 io->ipsec_out_stackid = assoc->ipsa_netstack->netstack_stackid; 2412 2413 esp_prepare_udp(assoc->ipsa_netstack, mp, ipha); 2414 /* 2415 * We're holding an isaf_t bucket lock, so pawn off the actual 2416 * packet transmission to another thread. Just in case syncq 2417 * processing causes a same-bucket packet to be processed. 2418 */ 2419 if (taskq_dispatch(esp_taskq, actually_send_keepalive, ipsec_mp, 2420 TQ_NOSLEEP) == 0) { 2421 /* Assume no memory if taskq_dispatch() fails. */ 2422 ip_drop_packet(ipsec_mp, B_FALSE, NULL, NULL, 2423 DROPPER(assoc->ipsa_netstack->netstack_ipsec, 2424 ipds_esp_nomem), 2425 &assoc->ipsa_netstack->netstack_ipsecesp->esp_dropper); 2426 } 2427 } 2428 2429 static ipsec_status_t 2430 esp_submit_req_outbound(mblk_t *ipsec_mp, ipsa_t *assoc, uchar_t *icv_buf, 2431 uint_t payload_len) 2432 { 2433 ipsec_out_t *io = (ipsec_out_t *)ipsec_mp->b_rptr; 2434 uint_t auth_len; 2435 crypto_call_req_t call_req; 2436 mblk_t *esp_mp, *data_mp, *ip_mp; 2437 esph_t *esph_ptr; 2438 int kef_rc = CRYPTO_FAILED; 2439 uint_t icv_len = assoc->ipsa_mac_len; 2440 crypto_ctx_template_t auth_ctx_tmpl; 2441 boolean_t do_auth; 2442 boolean_t do_encr; 2443 uint_t iv_len = assoc->ipsa_iv_len; 2444 crypto_ctx_template_t encr_ctx_tmpl; 2445 boolean_t is_natt = ((assoc->ipsa_flags & IPSA_F_NATT) != 0); 2446 size_t esph_offset = (is_natt ? UDPH_SIZE : 0); 2447 netstack_t *ns = io->ipsec_out_ns; 2448 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 2449 ipsec_stack_t *ipss = ns->netstack_ipsec; 2450 uchar_t *iv_ptr; 2451 crypto_data_t *cd_ptr = NULL; 2452 2453 esp3dbg(espstack, ("esp_submit_req_outbound:%s", 2454 is_natt ? "natt" : "not natt")); 2455 2456 ASSERT(io->ipsec_out_type == IPSEC_OUT); 2457 2458 /* 2459 * In case kEF queues and calls back, keep netstackid_t for 2460 * verification that the IP instance is still around in 2461 * esp_kcf_callback(). 2462 */ 2463 io->ipsec_out_stackid = ns->netstack_stackid; 2464 2465 do_encr = assoc->ipsa_encr_alg != SADB_EALG_NULL; 2466 do_auth = assoc->ipsa_auth_alg != SADB_AALG_NONE; 2467 2468 /* 2469 * Outbound IPsec packets are of the form: 2470 * IPSEC_OUT -> [IP,options] -> [ESP,IV] -> [data] -> [pad,ICV] 2471 * unless it's NATT, then it's 2472 * IPSEC_OUT -> [IP,options] -> [udp][ESP,IV] -> [data] -> [pad,ICV] 2473 * Get a pointer to the mblk containing the ESP header. 2474 */ 2475 ip_mp = ipsec_mp->b_cont; 2476 esp_mp = ipsec_mp->b_cont->b_cont; 2477 ASSERT(ip_mp != NULL && esp_mp != NULL); 2478 esph_ptr = (esph_t *)(esp_mp->b_rptr + esph_offset); 2479 iv_ptr = (uchar_t *)(esph_ptr + 1); 2480 data_mp = ipsec_mp->b_cont->b_cont->b_cont; 2481 2482 /* 2483 * Combined mode algs need a nonce. This is setup in sadb_common_add(). 2484 * If for some reason we are using a SA which does not have a nonce 2485 * then we must fail here. 2486 */ 2487 if ((assoc->ipsa_flags & IPSA_F_COUNTERMODE) && 2488 (assoc->ipsa_nonce == NULL)) { 2489 ip_drop_packet(ipsec_mp, B_FALSE, NULL, NULL, 2490 DROPPER(ipss, ipds_esp_nomem), &espstack->esp_dropper); 2491 return (IPSEC_STATUS_FAILED); 2492 } 2493 2494 ESP_INIT_CALLREQ(&call_req); 2495 2496 if (do_auth) { 2497 /* force asynchronous processing? */ 2498 if (ipss->ipsec_algs_exec_mode[IPSEC_ALG_AUTH] == 2499 IPSEC_ALGS_EXEC_ASYNC) 2500 call_req.cr_flag |= CRYPTO_ALWAYS_QUEUE; 2501 2502 /* authentication context template */ 2503 IPSEC_CTX_TMPL(assoc, ipsa_authtmpl, IPSEC_ALG_AUTH, 2504 auth_ctx_tmpl); 2505 2506 /* where to store the computed mac */ 2507 ESP_INIT_CRYPTO_MAC(&io->ipsec_out_crypto_mac, 2508 icv_len, icv_buf); 2509 2510 /* authentication starts at the ESP header */ 2511 auth_len = payload_len + iv_len + sizeof (esph_t); 2512 if (!do_encr) { 2513 /* authentication only */ 2514 /* initialize input data argument */ 2515 ESP_INIT_CRYPTO_DATA(&io->ipsec_out_crypto_data, 2516 esp_mp, esph_offset, auth_len); 2517 2518 /* call the crypto framework */ 2519 kef_rc = crypto_mac(&assoc->ipsa_amech, 2520 &io->ipsec_out_crypto_data, 2521 &assoc->ipsa_kcfauthkey, auth_ctx_tmpl, 2522 &io->ipsec_out_crypto_mac, &call_req); 2523 } 2524 } 2525 2526 if (do_encr) { 2527 /* force asynchronous processing? */ 2528 if (ipss->ipsec_algs_exec_mode[IPSEC_ALG_ENCR] == 2529 IPSEC_ALGS_EXEC_ASYNC) 2530 call_req.cr_flag |= CRYPTO_ALWAYS_QUEUE; 2531 2532 /* encryption context template */ 2533 IPSEC_CTX_TMPL(assoc, ipsa_encrtmpl, IPSEC_ALG_ENCR, 2534 encr_ctx_tmpl); 2535 /* Call the nonce update function. */ 2536 (assoc->ipsa_noncefunc)(assoc, (uchar_t *)esph_ptr, payload_len, 2537 iv_ptr, &io->ipsec_out_cmm, &io->ipsec_out_crypto_data); 2538 2539 if (!do_auth) { 2540 /* encryption only, skip mblk that contains ESP hdr */ 2541 /* initialize input data argument */ 2542 ESP_INIT_CRYPTO_DATA(&io->ipsec_out_crypto_data, 2543 data_mp, 0, payload_len); 2544 2545 /* 2546 * For combined mode ciphers, the ciphertext is the same 2547 * size as the clear text, the ICV should follow the 2548 * ciphertext. To convince the kcf to allow in-line 2549 * encryption, with an ICV, use ipsec_out_crypto_mac 2550 * to point to the same buffer as the data. The calling 2551 * function need to ensure the buffer is large enough to 2552 * include the ICV. 2553 * 2554 * The IV is already written to the packet buffer, the 2555 * nonce setup function copied it to the params struct 2556 * for the cipher to use. 2557 */ 2558 if (assoc->ipsa_flags & IPSA_F_COMBINED) { 2559 bcopy(&io->ipsec_out_crypto_data, 2560 &io->ipsec_out_crypto_mac, 2561 sizeof (crypto_data_t)); 2562 io->ipsec_out_crypto_mac.cd_length = 2563 payload_len + icv_len; 2564 cd_ptr = &io->ipsec_out_crypto_mac; 2565 } 2566 2567 /* call the crypto framework */ 2568 kef_rc = crypto_encrypt((crypto_mechanism_t *) 2569 &io->ipsec_out_cmm, 2570 &io->ipsec_out_crypto_data, 2571 &assoc->ipsa_kcfencrkey, encr_ctx_tmpl, 2572 cd_ptr, &call_req); 2573 2574 } 2575 } 2576 2577 if (do_auth && do_encr) { 2578 /* 2579 * Encryption and authentication: 2580 * Pass the pointer to the mblk chain starting at the ESP 2581 * header to the framework. Skip the ESP header mblk 2582 * for encryption, which is reflected by an encryption 2583 * offset equal to the length of that mblk. Start 2584 * the authentication at the ESP header, i.e. use an 2585 * authentication offset of zero. 2586 */ 2587 ESP_INIT_CRYPTO_DUAL_DATA(&io->ipsec_out_crypto_dual_data, 2588 esp_mp, MBLKL(esp_mp), payload_len, esph_offset, auth_len); 2589 2590 /* specify IV */ 2591 io->ipsec_out_crypto_dual_data.dd_miscdata = (char *)iv_ptr; 2592 2593 /* call the framework */ 2594 kef_rc = crypto_encrypt_mac(&assoc->ipsa_emech, 2595 &assoc->ipsa_amech, NULL, 2596 &assoc->ipsa_kcfencrkey, &assoc->ipsa_kcfauthkey, 2597 encr_ctx_tmpl, auth_ctx_tmpl, 2598 &io->ipsec_out_crypto_dual_data, 2599 &io->ipsec_out_crypto_mac, &call_req); 2600 } 2601 2602 switch (kef_rc) { 2603 case CRYPTO_SUCCESS: 2604 ESP_BUMP_STAT(espstack, crypto_sync); 2605 esp_set_usetime(assoc, B_FALSE); 2606 if (is_natt) 2607 esp_prepare_udp(ns, ipsec_mp->b_cont, 2608 (ipha_t *)ipsec_mp->b_cont->b_rptr); 2609 return (IPSEC_STATUS_SUCCESS); 2610 case CRYPTO_QUEUED: 2611 /* esp_kcf_callback() will be invoked on completion */ 2612 ESP_BUMP_STAT(espstack, crypto_async); 2613 return (IPSEC_STATUS_PENDING); 2614 } 2615 2616 esp_crypto_failed(ipsec_mp, B_FALSE, kef_rc, espstack); 2617 return (IPSEC_STATUS_FAILED); 2618 } 2619 2620 /* 2621 * Handle outbound IPsec processing for IPv4 and IPv6 2622 * On success returns B_TRUE, on failure returns B_FALSE and frees the 2623 * mblk chain ipsec_in_mp. 2624 */ 2625 static ipsec_status_t 2626 esp_outbound(mblk_t *mp) 2627 { 2628 mblk_t *ipsec_out_mp, *data_mp, *espmp, *tailmp; 2629 ipsec_out_t *io; 2630 ipha_t *ipha; 2631 ip6_t *ip6h; 2632 esph_t *esph_ptr, *iv_ptr; 2633 uint_t af; 2634 uint8_t *nhp; 2635 uintptr_t divpoint, datalen, adj, padlen, i, alloclen; 2636 uintptr_t esplen = sizeof (esph_t); 2637 uint8_t protocol; 2638 ipsa_t *assoc; 2639 uint_t iv_len, block_size, mac_len = 0; 2640 uchar_t *icv_buf; 2641 udpha_t *udpha; 2642 boolean_t is_natt = B_FALSE; 2643 netstack_t *ns; 2644 ipsecesp_stack_t *espstack; 2645 ipsec_stack_t *ipss; 2646 2647 ipsec_out_mp = mp; 2648 data_mp = ipsec_out_mp->b_cont; 2649 2650 io = (ipsec_out_t *)ipsec_out_mp->b_rptr; 2651 ns = io->ipsec_out_ns; 2652 espstack = ns->netstack_ipsecesp; 2653 ipss = ns->netstack_ipsec; 2654 2655 ESP_BUMP_STAT(espstack, out_requests); 2656 2657 /* 2658 * <sigh> We have to copy the message here, because TCP (for example) 2659 * keeps a dupb() of the message lying around for retransmission. 2660 * Since ESP changes the whole of the datagram, we have to create our 2661 * own copy lest we clobber TCP's data. Since we have to copy anyway, 2662 * we might as well make use of msgpullup() and get the mblk into one 2663 * contiguous piece! 2664 */ 2665 ipsec_out_mp->b_cont = msgpullup(data_mp, -1); 2666 if (ipsec_out_mp->b_cont == NULL) { 2667 esp0dbg(("esp_outbound: msgpullup() failed, " 2668 "dropping packet.\n")); 2669 ipsec_out_mp->b_cont = data_mp; 2670 /* 2671 * TODO: Find the outbound IRE for this packet and 2672 * pass it to ip_drop_packet(). 2673 */ 2674 ip_drop_packet(ipsec_out_mp, B_FALSE, NULL, NULL, 2675 DROPPER(ipss, ipds_esp_nomem), 2676 &espstack->esp_dropper); 2677 return (IPSEC_STATUS_FAILED); 2678 } else { 2679 freemsg(data_mp); 2680 data_mp = ipsec_out_mp->b_cont; 2681 } 2682 2683 assoc = io->ipsec_out_esp_sa; 2684 ASSERT(assoc != NULL); 2685 2686 /* 2687 * Get the outer IP header in shape to escape this system.. 2688 */ 2689 if (is_system_labeled() && (assoc->ipsa_ocred != NULL)) { 2690 int whack; 2691 2692 mblk_setcred(data_mp, assoc->ipsa_ocred, NOPID); 2693 if (io->ipsec_out_v4) 2694 whack = sadb_whack_label(&data_mp, assoc); 2695 else 2696 whack = sadb_whack_label_v6(&data_mp, assoc); 2697 if (whack != 0) { 2698 ip_drop_packet(ipsec_out_mp, B_FALSE, NULL, 2699 NULL, DROPPER(ipss, ipds_esp_nomem), 2700 &espstack->esp_dropper); 2701 return (IPSEC_STATUS_FAILED); 2702 } 2703 ipsec_out_mp->b_cont = data_mp; 2704 } 2705 2706 2707 /* 2708 * Reality check.... 2709 */ 2710 ipha = (ipha_t *)data_mp->b_rptr; /* So we can call esp_acquire(). */ 2711 2712 if (io->ipsec_out_v4) { 2713 af = AF_INET; 2714 divpoint = IPH_HDR_LENGTH(ipha); 2715 datalen = ntohs(ipha->ipha_length) - divpoint; 2716 nhp = (uint8_t *)&ipha->ipha_protocol; 2717 } else { 2718 ip6_pkt_t ipp; 2719 2720 af = AF_INET6; 2721 ip6h = (ip6_t *)ipha; 2722 bzero(&ipp, sizeof (ipp)); 2723 divpoint = ip_find_hdr_v6(data_mp, ip6h, &ipp, NULL); 2724 if (ipp.ipp_dstopts != NULL && 2725 ipp.ipp_dstopts->ip6d_nxt != IPPROTO_ROUTING) { 2726 /* 2727 * Destination options are tricky. If we get in here, 2728 * then we have a terminal header following the 2729 * destination options. We need to adjust backwards 2730 * so we insert ESP BEFORE the destination options 2731 * bag. (So that the dstopts get encrypted!) 2732 * 2733 * Since this is for outbound packets only, we know 2734 * that non-terminal destination options only precede 2735 * routing headers. 2736 */ 2737 divpoint -= ipp.ipp_dstoptslen; 2738 } 2739 datalen = ntohs(ip6h->ip6_plen) + sizeof (ip6_t) - divpoint; 2740 2741 if (ipp.ipp_rthdr != NULL) { 2742 nhp = &ipp.ipp_rthdr->ip6r_nxt; 2743 } else if (ipp.ipp_hopopts != NULL) { 2744 nhp = &ipp.ipp_hopopts->ip6h_nxt; 2745 } else { 2746 ASSERT(divpoint == sizeof (ip6_t)); 2747 /* It's probably IP + ESP. */ 2748 nhp = &ip6h->ip6_nxt; 2749 } 2750 } 2751 2752 mac_len = assoc->ipsa_mac_len; 2753 2754 if (assoc->ipsa_flags & IPSA_F_NATT) { 2755 /* wedge in UDP header */ 2756 is_natt = B_TRUE; 2757 esplen += UDPH_SIZE; 2758 } 2759 2760 /* 2761 * Set up ESP header and encryption padding for ENCR PI request. 2762 */ 2763 2764 /* Determine the padding length. Pad to 4-bytes for no-encryption. */ 2765 if (assoc->ipsa_encr_alg != SADB_EALG_NULL) { 2766 iv_len = assoc->ipsa_iv_len; 2767 block_size = assoc->ipsa_datalen; 2768 2769 /* 2770 * Pad the data to the length of the cipher block size. 2771 * Include the two additional bytes (hence the - 2) for the 2772 * padding length and the next header. Take this into account 2773 * when calculating the actual length of the padding. 2774 */ 2775 ASSERT(ISP2(iv_len)); 2776 padlen = ((unsigned)(block_size - datalen - 2)) & 2777 (block_size - 1); 2778 } else { 2779 iv_len = 0; 2780 padlen = ((unsigned)(sizeof (uint32_t) - datalen - 2)) & 2781 (sizeof (uint32_t) - 1); 2782 } 2783 2784 /* Allocate ESP header and IV. */ 2785 esplen += iv_len; 2786 2787 /* 2788 * Update association byte-count lifetimes. Don't forget to take 2789 * into account the padding length and next-header (hence the + 2). 2790 * 2791 * Use the amount of data fed into the "encryption algorithm". This 2792 * is the IV, the data length, the padding length, and the final two 2793 * bytes (padlen, and next-header). 2794 * 2795 */ 2796 2797 if (!esp_age_bytes(assoc, datalen + padlen + iv_len + 2, B_FALSE)) { 2798 /* 2799 * TODO: Find the outbound IRE for this packet and 2800 * pass it to ip_drop_packet(). 2801 */ 2802 ip_drop_packet(mp, B_FALSE, NULL, NULL, 2803 DROPPER(ipss, ipds_esp_bytes_expire), 2804 &espstack->esp_dropper); 2805 return (IPSEC_STATUS_FAILED); 2806 } 2807 2808 espmp = allocb(esplen, BPRI_HI); 2809 if (espmp == NULL) { 2810 ESP_BUMP_STAT(espstack, out_discards); 2811 esp1dbg(espstack, ("esp_outbound: can't allocate espmp.\n")); 2812 /* 2813 * TODO: Find the outbound IRE for this packet and 2814 * pass it to ip_drop_packet(). 2815 */ 2816 ip_drop_packet(mp, B_FALSE, NULL, NULL, 2817 DROPPER(ipss, ipds_esp_nomem), 2818 &espstack->esp_dropper); 2819 return (IPSEC_STATUS_FAILED); 2820 } 2821 espmp->b_wptr += esplen; 2822 esph_ptr = (esph_t *)espmp->b_rptr; 2823 2824 if (is_natt) { 2825 esp3dbg(espstack, ("esp_outbound: NATT")); 2826 2827 udpha = (udpha_t *)espmp->b_rptr; 2828 udpha->uha_src_port = (assoc->ipsa_local_nat_port != 0) ? 2829 assoc->ipsa_local_nat_port : htons(IPPORT_IKE_NATT); 2830 udpha->uha_dst_port = (assoc->ipsa_remote_nat_port != 0) ? 2831 assoc->ipsa_remote_nat_port : htons(IPPORT_IKE_NATT); 2832 /* 2833 * Set the checksum to 0, so that the esp_prepare_udp() call 2834 * can do the right thing. 2835 */ 2836 udpha->uha_checksum = 0; 2837 esph_ptr = (esph_t *)(udpha + 1); 2838 } 2839 2840 esph_ptr->esph_spi = assoc->ipsa_spi; 2841 2842 esph_ptr->esph_replay = htonl(atomic_add_32_nv(&assoc->ipsa_replay, 1)); 2843 if (esph_ptr->esph_replay == 0 && assoc->ipsa_replay_wsize != 0) { 2844 /* 2845 * XXX We have replay counter wrapping. 2846 * We probably want to nuke this SA (and its peer). 2847 */ 2848 ipsec_assocfailure(info.mi_idnum, 0, 0, 2849 SL_ERROR | SL_CONSOLE | SL_WARN, 2850 "Outbound ESP SA (0x%x, %s) has wrapped sequence.\n", 2851 esph_ptr->esph_spi, assoc->ipsa_dstaddr, af, 2852 espstack->ipsecesp_netstack); 2853 2854 ESP_BUMP_STAT(espstack, out_discards); 2855 sadb_replay_delete(assoc); 2856 /* 2857 * TODO: Find the outbound IRE for this packet and 2858 * pass it to ip_drop_packet(). 2859 */ 2860 ip_drop_packet(mp, B_FALSE, NULL, NULL, 2861 DROPPER(ipss, ipds_esp_replay), 2862 &espstack->esp_dropper); 2863 return (IPSEC_STATUS_FAILED); 2864 } 2865 2866 iv_ptr = (esph_ptr + 1); 2867 /* 2868 * iv_ptr points to the mblk which will contain the IV once we have 2869 * written it there. This mblk will be part of a mblk chain that 2870 * will make up the packet. 2871 * 2872 * For counter mode algorithms, the IV is a 64 bit quantity, it 2873 * must NEVER repeat in the lifetime of the SA, otherwise an 2874 * attacker who had recorded enough packets might be able to 2875 * determine some clear text. 2876 * 2877 * To ensure this does not happen, the IV is stored in the SA and 2878 * incremented for each packet, the IV is then copied into the 2879 * "packet" for transmission to the receiving system. The IV will 2880 * also be copied into the nonce, when the packet is encrypted. 2881 * 2882 * CBC mode algorithms use a random IV for each packet. We do not 2883 * require the highest quality random bits, but for best security 2884 * with CBC mode ciphers, the value must be unlikely to repeat and 2885 * must not be known in advance to an adversary capable of influencing 2886 * the clear text. 2887 */ 2888 if (!update_iv((uint8_t *)iv_ptr, espstack->esp_pfkey_q, assoc, 2889 espstack)) { 2890 ip_drop_packet(mp, B_FALSE, NULL, NULL, 2891 DROPPER(ipss, ipds_esp_iv_wrap), &espstack->esp_dropper); 2892 return (IPSEC_STATUS_FAILED); 2893 } 2894 2895 /* Fix the IP header. */ 2896 alloclen = padlen + 2 + mac_len; 2897 adj = alloclen + (espmp->b_wptr - espmp->b_rptr); 2898 2899 protocol = *nhp; 2900 2901 if (io->ipsec_out_v4) { 2902 ipha->ipha_length = htons(ntohs(ipha->ipha_length) + adj); 2903 if (is_natt) { 2904 *nhp = IPPROTO_UDP; 2905 udpha->uha_length = htons(ntohs(ipha->ipha_length) - 2906 IPH_HDR_LENGTH(ipha)); 2907 } else { 2908 *nhp = IPPROTO_ESP; 2909 } 2910 ipha->ipha_hdr_checksum = 0; 2911 ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha); 2912 } else { 2913 ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) + adj); 2914 *nhp = IPPROTO_ESP; 2915 } 2916 2917 /* I've got the two ESP mblks, now insert them. */ 2918 2919 esp2dbg(espstack, ("data_mp before outbound ESP adjustment:\n")); 2920 esp2dbg(espstack, (dump_msg(data_mp))); 2921 2922 if (!esp_insert_esp(data_mp, espmp, divpoint, espstack)) { 2923 ESP_BUMP_STAT(espstack, out_discards); 2924 /* NOTE: esp_insert_esp() only fails if there's no memory. */ 2925 /* 2926 * TODO: Find the outbound IRE for this packet and 2927 * pass it to ip_drop_packet(). 2928 */ 2929 ip_drop_packet(mp, B_FALSE, NULL, NULL, 2930 DROPPER(ipss, ipds_esp_nomem), 2931 &espstack->esp_dropper); 2932 freeb(espmp); 2933 return (IPSEC_STATUS_FAILED); 2934 } 2935 2936 /* Append padding (and leave room for ICV). */ 2937 for (tailmp = data_mp; tailmp->b_cont != NULL; tailmp = tailmp->b_cont) 2938 ; 2939 if (tailmp->b_wptr + alloclen > tailmp->b_datap->db_lim) { 2940 tailmp->b_cont = allocb(alloclen, BPRI_HI); 2941 if (tailmp->b_cont == NULL) { 2942 ESP_BUMP_STAT(espstack, out_discards); 2943 esp0dbg(("esp_outbound: Can't allocate tailmp.\n")); 2944 /* 2945 * TODO: Find the outbound IRE for this packet and 2946 * pass it to ip_drop_packet(). 2947 */ 2948 ip_drop_packet(mp, B_FALSE, NULL, NULL, 2949 DROPPER(ipss, ipds_esp_nomem), 2950 &espstack->esp_dropper); 2951 return (IPSEC_STATUS_FAILED); 2952 } 2953 tailmp = tailmp->b_cont; 2954 } 2955 2956 /* 2957 * If there's padding, N bytes of padding must be of the form 0x1, 2958 * 0x2, 0x3... 0xN. 2959 */ 2960 for (i = 0; i < padlen; ) { 2961 i++; 2962 *tailmp->b_wptr++ = i; 2963 } 2964 *tailmp->b_wptr++ = i; 2965 *tailmp->b_wptr++ = protocol; 2966 2967 esp2dbg(espstack, ("data_Mp before encryption:\n")); 2968 esp2dbg(espstack, (dump_msg(data_mp))); 2969 2970 /* 2971 * The packet is eligible for hardware acceleration if the 2972 * following conditions are satisfied: 2973 * 2974 * 1. the packet will not be fragmented 2975 * 2. the provider supports the algorithms specified by SA 2976 * 3. there is no pending control message being exchanged 2977 * 4. snoop is not attached 2978 * 5. the destination address is not a multicast address 2979 * 2980 * All five of these conditions are checked by IP prior to 2981 * sending the packet to ESP. 2982 * 2983 * But We, and We Alone, can, nay MUST check if the packet 2984 * is over NATT, and then disqualify it from hardware 2985 * acceleration. 2986 */ 2987 2988 if (io->ipsec_out_is_capab_ill && !(assoc->ipsa_flags & IPSA_F_NATT)) { 2989 return (esp_outbound_accelerated(ipsec_out_mp, mac_len)); 2990 } 2991 ESP_BUMP_STAT(espstack, noaccel); 2992 2993 /* 2994 * Okay. I've set up the pre-encryption ESP. Let's do it! 2995 */ 2996 2997 if (mac_len > 0) { 2998 ASSERT(tailmp->b_wptr + mac_len <= tailmp->b_datap->db_lim); 2999 icv_buf = tailmp->b_wptr; 3000 tailmp->b_wptr += mac_len; 3001 } else { 3002 icv_buf = NULL; 3003 } 3004 3005 return (esp_submit_req_outbound(ipsec_out_mp, assoc, icv_buf, 3006 datalen + padlen + 2)); 3007 } 3008 3009 /* 3010 * IP calls this to validate the ICMP errors that 3011 * we got from the network. 3012 */ 3013 ipsec_status_t 3014 ipsecesp_icmp_error(mblk_t *ipsec_mp) 3015 { 3016 ipsec_in_t *ii = (ipsec_in_t *)ipsec_mp->b_rptr; 3017 boolean_t is_inbound = (ii->ipsec_in_type == IPSEC_IN); 3018 netstack_t *ns; 3019 ipsecesp_stack_t *espstack; 3020 ipsec_stack_t *ipss; 3021 3022 if (is_inbound) { 3023 ns = ii->ipsec_in_ns; 3024 } else { 3025 ipsec_out_t *io = (ipsec_out_t *)ipsec_mp->b_rptr; 3026 3027 ns = io->ipsec_out_ns; 3028 } 3029 espstack = ns->netstack_ipsecesp; 3030 ipss = ns->netstack_ipsec; 3031 3032 /* 3033 * Unless we get an entire packet back, this function is useless. 3034 * Why? 3035 * 3036 * 1.) Partial packets are useless, because the "next header" 3037 * is at the end of the decrypted ESP packet. Without the 3038 * whole packet, this is useless. 3039 * 3040 * 2.) If we every use a stateful cipher, such as a stream or a 3041 * one-time pad, we can't do anything. 3042 * 3043 * Since the chances of us getting an entire packet back are very 3044 * very small, we discard here. 3045 */ 3046 IP_ESP_BUMP_STAT(ipss, in_discards); 3047 ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL, 3048 DROPPER(ipss, ipds_esp_icmp), 3049 &espstack->esp_dropper); 3050 return (IPSEC_STATUS_FAILED); 3051 } 3052 3053 /* 3054 * ESP module read put routine. 3055 */ 3056 /* ARGSUSED */ 3057 static void 3058 ipsecesp_rput(queue_t *q, mblk_t *mp) 3059 { 3060 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; 3061 3062 ASSERT(mp->b_datap->db_type != M_CTL); /* No more IRE_DB_REQ. */ 3063 3064 switch (mp->b_datap->db_type) { 3065 case M_PROTO: 3066 case M_PCPROTO: 3067 /* TPI message of some sort. */ 3068 switch (*((t_scalar_t *)mp->b_rptr)) { 3069 case T_BIND_ACK: 3070 esp3dbg(espstack, 3071 ("Thank you IP from ESP for T_BIND_ACK\n")); 3072 break; 3073 case T_ERROR_ACK: 3074 cmn_err(CE_WARN, 3075 "ipsecesp: ESP received T_ERROR_ACK from IP."); 3076 /* 3077 * Make esp_sadb.s_ip_q NULL, and in the 3078 * future, perhaps try again. 3079 */ 3080 espstack->esp_sadb.s_ip_q = NULL; 3081 break; 3082 case T_OK_ACK: 3083 /* Probably from a (rarely sent) T_UNBIND_REQ. */ 3084 break; 3085 default: 3086 esp0dbg(("Unknown M_{,PC}PROTO message.\n")); 3087 } 3088 freemsg(mp); 3089 break; 3090 default: 3091 /* For now, passthru message. */ 3092 esp2dbg(espstack, ("ESP got unknown mblk type %d.\n", 3093 mp->b_datap->db_type)); 3094 putnext(q, mp); 3095 } 3096 } 3097 3098 /* 3099 * Construct an SADB_REGISTER message with the current algorithms. 3100 * This function gets called when 'ipsecalgs -s' is run or when 3101 * in.iked (or other KMD) starts. 3102 */ 3103 static boolean_t 3104 esp_register_out(uint32_t sequence, uint32_t pid, uint_t serial, 3105 ipsecesp_stack_t *espstack, mblk_t *in_mp) 3106 { 3107 mblk_t *pfkey_msg_mp, *keysock_out_mp; 3108 sadb_msg_t *samsg; 3109 sadb_supported_t *sasupp_auth = NULL; 3110 sadb_supported_t *sasupp_encr = NULL; 3111 sadb_alg_t *saalg; 3112 uint_t allocsize = sizeof (*samsg); 3113 uint_t i, numalgs_snap; 3114 int current_aalgs; 3115 ipsec_alginfo_t **authalgs; 3116 uint_t num_aalgs; 3117 int current_ealgs; 3118 ipsec_alginfo_t **encralgs; 3119 uint_t num_ealgs; 3120 ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec; 3121 sadb_sens_t *sens; 3122 size_t sens_len = 0; 3123 sadb_ext_t *nextext; 3124 cred_t *sens_cr = NULL; 3125 3126 /* Allocate the KEYSOCK_OUT. */ 3127 keysock_out_mp = sadb_keysock_out(serial); 3128 if (keysock_out_mp == NULL) { 3129 esp0dbg(("esp_register_out: couldn't allocate mblk.\n")); 3130 return (B_FALSE); 3131 } 3132 3133 if (is_system_labeled() && (in_mp != NULL)) { 3134 sens_cr = msg_getcred(in_mp, NULL); 3135 3136 if (sens_cr != NULL) { 3137 sens_len = sadb_sens_len_from_cred(sens_cr); 3138 allocsize += sens_len; 3139 } 3140 } 3141 3142 /* 3143 * Allocate the PF_KEY message that follows KEYSOCK_OUT. 3144 */ 3145 3146 mutex_enter(&ipss->ipsec_alg_lock); 3147 /* 3148 * Fill SADB_REGISTER message's algorithm descriptors. Hold 3149 * down the lock while filling it. 3150 * 3151 * Return only valid algorithms, so the number of algorithms 3152 * to send up may be less than the number of algorithm entries 3153 * in the table. 3154 */ 3155 authalgs = ipss->ipsec_alglists[IPSEC_ALG_AUTH]; 3156 for (num_aalgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++) 3157 if (authalgs[i] != NULL && ALG_VALID(authalgs[i])) 3158 num_aalgs++; 3159 3160 if (num_aalgs != 0) { 3161 allocsize += (num_aalgs * sizeof (*saalg)); 3162 allocsize += sizeof (*sasupp_auth); 3163 } 3164 encralgs = ipss->ipsec_alglists[IPSEC_ALG_ENCR]; 3165 for (num_ealgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++) 3166 if (encralgs[i] != NULL && ALG_VALID(encralgs[i])) 3167 num_ealgs++; 3168 3169 if (num_ealgs != 0) { 3170 allocsize += (num_ealgs * sizeof (*saalg)); 3171 allocsize += sizeof (*sasupp_encr); 3172 } 3173 keysock_out_mp->b_cont = allocb(allocsize, BPRI_HI); 3174 if (keysock_out_mp->b_cont == NULL) { 3175 mutex_exit(&ipss->ipsec_alg_lock); 3176 freemsg(keysock_out_mp); 3177 return (B_FALSE); 3178 } 3179 pfkey_msg_mp = keysock_out_mp->b_cont; 3180 pfkey_msg_mp->b_wptr += allocsize; 3181 3182 nextext = (sadb_ext_t *)(pfkey_msg_mp->b_rptr + sizeof (*samsg)); 3183 3184 if (num_aalgs != 0) { 3185 sasupp_auth = (sadb_supported_t *)nextext; 3186 saalg = (sadb_alg_t *)(sasupp_auth + 1); 3187 3188 ASSERT(((ulong_t)saalg & 0x7) == 0); 3189 3190 numalgs_snap = 0; 3191 for (i = 0; 3192 ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_aalgs)); 3193 i++) { 3194 if (authalgs[i] == NULL || !ALG_VALID(authalgs[i])) 3195 continue; 3196 3197 saalg->sadb_alg_id = authalgs[i]->alg_id; 3198 saalg->sadb_alg_ivlen = 0; 3199 saalg->sadb_alg_minbits = authalgs[i]->alg_ef_minbits; 3200 saalg->sadb_alg_maxbits = authalgs[i]->alg_ef_maxbits; 3201 saalg->sadb_x_alg_increment = 3202 authalgs[i]->alg_increment; 3203 saalg->sadb_x_alg_saltbits = SADB_8TO1( 3204 authalgs[i]->alg_saltlen); 3205 numalgs_snap++; 3206 saalg++; 3207 } 3208 ASSERT(numalgs_snap == num_aalgs); 3209 #ifdef DEBUG 3210 /* 3211 * Reality check to make sure I snagged all of the 3212 * algorithms. 3213 */ 3214 for (; i < IPSEC_MAX_ALGS; i++) { 3215 if (authalgs[i] != NULL && ALG_VALID(authalgs[i])) { 3216 cmn_err(CE_PANIC, "esp_register_out()! " 3217 "Missed aalg #%d.\n", i); 3218 } 3219 } 3220 #endif /* DEBUG */ 3221 nextext = (sadb_ext_t *)saalg; 3222 } 3223 3224 if (num_ealgs != 0) { 3225 sasupp_encr = (sadb_supported_t *)nextext; 3226 saalg = (sadb_alg_t *)(sasupp_encr + 1); 3227 3228 numalgs_snap = 0; 3229 for (i = 0; 3230 ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_ealgs)); i++) { 3231 if (encralgs[i] == NULL || !ALG_VALID(encralgs[i])) 3232 continue; 3233 saalg->sadb_alg_id = encralgs[i]->alg_id; 3234 saalg->sadb_alg_ivlen = encralgs[i]->alg_ivlen; 3235 saalg->sadb_alg_minbits = encralgs[i]->alg_ef_minbits; 3236 saalg->sadb_alg_maxbits = encralgs[i]->alg_ef_maxbits; 3237 /* 3238 * We could advertise the ICV length, except there 3239 * is not a value in sadb_x_algb to do this. 3240 * saalg->sadb_alg_maclen = encralgs[i]->alg_maclen; 3241 */ 3242 saalg->sadb_x_alg_increment = 3243 encralgs[i]->alg_increment; 3244 saalg->sadb_x_alg_saltbits = 3245 SADB_8TO1(encralgs[i]->alg_saltlen); 3246 3247 numalgs_snap++; 3248 saalg++; 3249 } 3250 ASSERT(numalgs_snap == num_ealgs); 3251 #ifdef DEBUG 3252 /* 3253 * Reality check to make sure I snagged all of the 3254 * algorithms. 3255 */ 3256 for (; i < IPSEC_MAX_ALGS; i++) { 3257 if (encralgs[i] != NULL && ALG_VALID(encralgs[i])) { 3258 cmn_err(CE_PANIC, "esp_register_out()! " 3259 "Missed ealg #%d.\n", i); 3260 } 3261 } 3262 #endif /* DEBUG */ 3263 nextext = (sadb_ext_t *)saalg; 3264 } 3265 3266 current_aalgs = num_aalgs; 3267 current_ealgs = num_ealgs; 3268 3269 mutex_exit(&ipss->ipsec_alg_lock); 3270 3271 if (sens_cr != NULL) { 3272 sens = (sadb_sens_t *)nextext; 3273 sadb_sens_from_cred(sens, SADB_EXT_SENSITIVITY, 3274 sens_cr, sens_len); 3275 3276 nextext = (sadb_ext_t *)(((uint8_t *)sens) + sens_len); 3277 } 3278 3279 /* Now fill the rest of the SADB_REGISTER message. */ 3280 3281 samsg = (sadb_msg_t *)pfkey_msg_mp->b_rptr; 3282 samsg->sadb_msg_version = PF_KEY_V2; 3283 samsg->sadb_msg_type = SADB_REGISTER; 3284 samsg->sadb_msg_errno = 0; 3285 samsg->sadb_msg_satype = SADB_SATYPE_ESP; 3286 samsg->sadb_msg_len = SADB_8TO64(allocsize); 3287 samsg->sadb_msg_reserved = 0; 3288 /* 3289 * Assume caller has sufficient sequence/pid number info. If it's one 3290 * from me over a new alg., I could give two hoots about sequence. 3291 */ 3292 samsg->sadb_msg_seq = sequence; 3293 samsg->sadb_msg_pid = pid; 3294 3295 if (sasupp_auth != NULL) { 3296 sasupp_auth->sadb_supported_len = SADB_8TO64( 3297 sizeof (*sasupp_auth) + sizeof (*saalg) * current_aalgs); 3298 sasupp_auth->sadb_supported_exttype = SADB_EXT_SUPPORTED_AUTH; 3299 sasupp_auth->sadb_supported_reserved = 0; 3300 } 3301 3302 if (sasupp_encr != NULL) { 3303 sasupp_encr->sadb_supported_len = SADB_8TO64( 3304 sizeof (*sasupp_encr) + sizeof (*saalg) * current_ealgs); 3305 sasupp_encr->sadb_supported_exttype = 3306 SADB_EXT_SUPPORTED_ENCRYPT; 3307 sasupp_encr->sadb_supported_reserved = 0; 3308 } 3309 3310 if (espstack->esp_pfkey_q != NULL) 3311 putnext(espstack->esp_pfkey_q, keysock_out_mp); 3312 else { 3313 freemsg(keysock_out_mp); 3314 return (B_FALSE); 3315 } 3316 3317 return (B_TRUE); 3318 } 3319 3320 /* 3321 * Invoked when the algorithm table changes. Causes SADB_REGISTER 3322 * messages continaining the current list of algorithms to be 3323 * sent up to the ESP listeners. 3324 */ 3325 void 3326 ipsecesp_algs_changed(netstack_t *ns) 3327 { 3328 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 3329 3330 /* 3331 * Time to send a PF_KEY SADB_REGISTER message to ESP listeners 3332 * everywhere. (The function itself checks for NULL esp_pfkey_q.) 3333 */ 3334 (void) esp_register_out(0, 0, 0, espstack, NULL); 3335 } 3336 3337 /* 3338 * Stub function that taskq_dispatch() invokes to take the mblk (in arg) 3339 * and put() it into AH and STREAMS again. 3340 */ 3341 static void 3342 inbound_task(void *arg) 3343 { 3344 esph_t *esph; 3345 mblk_t *mp = (mblk_t *)arg; 3346 ipsec_in_t *ii = (ipsec_in_t *)mp->b_rptr; 3347 netstack_t *ns; 3348 ipsecesp_stack_t *espstack; 3349 int ipsec_rc; 3350 3351 ns = netstack_find_by_stackid(ii->ipsec_in_stackid); 3352 if (ns == NULL || ns != ii->ipsec_in_ns) { 3353 /* Just freemsg(). */ 3354 if (ns != NULL) 3355 netstack_rele(ns); 3356 freemsg(mp); 3357 return; 3358 } 3359 3360 espstack = ns->netstack_ipsecesp; 3361 3362 esp2dbg(espstack, ("in ESP inbound_task")); 3363 ASSERT(espstack != NULL); 3364 3365 esph = ipsec_inbound_esp_sa(mp, ns); 3366 if (esph != NULL) { 3367 ASSERT(ii->ipsec_in_esp_sa != NULL); 3368 ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func(mp, esph); 3369 if (ipsec_rc == IPSEC_STATUS_SUCCESS) 3370 ip_fanout_proto_again(mp, NULL, NULL, NULL); 3371 } 3372 netstack_rele(ns); 3373 } 3374 3375 /* 3376 * Now that weak-key passed, actually ADD the security association, and 3377 * send back a reply ADD message. 3378 */ 3379 static int 3380 esp_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi, 3381 int *diagnostic, ipsecesp_stack_t *espstack) 3382 { 3383 isaf_t *primary = NULL, *secondary; 3384 boolean_t clone = B_FALSE, is_inbound = B_FALSE; 3385 ipsa_t *larval = NULL; 3386 ipsacq_t *acqrec; 3387 iacqf_t *acq_bucket; 3388 mblk_t *acq_msgs = NULL; 3389 int rc; 3390 mblk_t *lpkt; 3391 int error; 3392 ipsa_query_t sq; 3393 ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec; 3394 3395 /* 3396 * Locate the appropriate table(s). 3397 */ 3398 sq.spp = &espstack->esp_sadb; /* XXX */ 3399 error = sadb_form_query(ksi, IPSA_Q_SA|IPSA_Q_DST, 3400 IPSA_Q_SA|IPSA_Q_DST|IPSA_Q_INBOUND|IPSA_Q_OUTBOUND, 3401 &sq, diagnostic); 3402 if (error) 3403 return (error); 3404 3405 /* 3406 * Use the direction flags provided by the KMD to determine 3407 * if the inbound or outbound table should be the primary 3408 * for this SA. If these flags were absent then make this 3409 * decision based on the addresses. 3410 */ 3411 if (sq.assoc->sadb_sa_flags & IPSA_F_INBOUND) { 3412 primary = sq.inbound; 3413 secondary = sq.outbound; 3414 is_inbound = B_TRUE; 3415 if (sq.assoc->sadb_sa_flags & IPSA_F_OUTBOUND) 3416 clone = B_TRUE; 3417 } else if (sq.assoc->sadb_sa_flags & IPSA_F_OUTBOUND) { 3418 primary = sq.outbound; 3419 secondary = sq.inbound; 3420 } 3421 3422 if (primary == NULL) { 3423 /* 3424 * The KMD did not set a direction flag, determine which 3425 * table to insert the SA into based on addresses. 3426 */ 3427 switch (ksi->ks_in_dsttype) { 3428 case KS_IN_ADDR_MBCAST: 3429 clone = B_TRUE; /* All mcast SAs can be bidirectional */ 3430 sq.assoc->sadb_sa_flags |= IPSA_F_OUTBOUND; 3431 /* FALLTHRU */ 3432 /* 3433 * If the source address is either one of mine, or unspecified 3434 * (which is best summed up by saying "not 'not mine'"), 3435 * then the association is potentially bi-directional, 3436 * in that it can be used for inbound traffic and outbound 3437 * traffic. The best example of such an SA is a multicast 3438 * SA (which allows me to receive the outbound traffic). 3439 */ 3440 case KS_IN_ADDR_ME: 3441 sq.assoc->sadb_sa_flags |= IPSA_F_INBOUND; 3442 primary = sq.inbound; 3443 secondary = sq.outbound; 3444 if (ksi->ks_in_srctype != KS_IN_ADDR_NOTME) 3445 clone = B_TRUE; 3446 is_inbound = B_TRUE; 3447 break; 3448 /* 3449 * If the source address literally not mine (either 3450 * unspecified or not mine), then this SA may have an 3451 * address that WILL be mine after some configuration. 3452 * We pay the price for this by making it a bi-directional 3453 * SA. 3454 */ 3455 case KS_IN_ADDR_NOTME: 3456 sq.assoc->sadb_sa_flags |= IPSA_F_OUTBOUND; 3457 primary = sq.outbound; 3458 secondary = sq.inbound; 3459 if (ksi->ks_in_srctype != KS_IN_ADDR_ME) { 3460 sq.assoc->sadb_sa_flags |= IPSA_F_INBOUND; 3461 clone = B_TRUE; 3462 } 3463 break; 3464 default: 3465 *diagnostic = SADB_X_DIAGNOSTIC_BAD_DST; 3466 return (EINVAL); 3467 } 3468 } 3469 3470 /* 3471 * Find a ACQUIRE list entry if possible. If we've added an SA that 3472 * suits the needs of an ACQUIRE list entry, we can eliminate the 3473 * ACQUIRE list entry and transmit the enqueued packets. Use the 3474 * high-bit of the sequence number to queue it. Key off destination 3475 * addr, and change acqrec's state. 3476 */ 3477 3478 if (samsg->sadb_msg_seq & IACQF_LOWEST_SEQ) { 3479 acq_bucket = &(sq.sp->sdb_acq[sq.outhash]); 3480 mutex_enter(&acq_bucket->iacqf_lock); 3481 for (acqrec = acq_bucket->iacqf_ipsacq; acqrec != NULL; 3482 acqrec = acqrec->ipsacq_next) { 3483 mutex_enter(&acqrec->ipsacq_lock); 3484 /* 3485 * Q: I only check sequence. Should I check dst? 3486 * A: Yes, check dest because those are the packets 3487 * that are queued up. 3488 */ 3489 if (acqrec->ipsacq_seq == samsg->sadb_msg_seq && 3490 IPSA_ARE_ADDR_EQUAL(sq.dstaddr, 3491 acqrec->ipsacq_dstaddr, acqrec->ipsacq_addrfam)) 3492 break; 3493 mutex_exit(&acqrec->ipsacq_lock); 3494 } 3495 if (acqrec != NULL) { 3496 /* 3497 * AHA! I found an ACQUIRE record for this SA. 3498 * Grab the msg list, and free the acquire record. 3499 * I already am holding the lock for this record, 3500 * so all I have to do is free it. 3501 */ 3502 acq_msgs = acqrec->ipsacq_mp; 3503 acqrec->ipsacq_mp = NULL; 3504 mutex_exit(&acqrec->ipsacq_lock); 3505 sadb_destroy_acquire(acqrec, 3506 espstack->ipsecesp_netstack); 3507 } 3508 mutex_exit(&acq_bucket->iacqf_lock); 3509 } 3510 3511 /* 3512 * Find PF_KEY message, and see if I'm an update. If so, find entry 3513 * in larval list (if there). 3514 */ 3515 if (samsg->sadb_msg_type == SADB_UPDATE) { 3516 mutex_enter(&sq.inbound->isaf_lock); 3517 larval = ipsec_getassocbyspi(sq.inbound, sq.assoc->sadb_sa_spi, 3518 ALL_ZEROES_PTR, sq.dstaddr, sq.dst->sin_family); 3519 mutex_exit(&sq.inbound->isaf_lock); 3520 3521 if ((larval == NULL) || 3522 (larval->ipsa_state != IPSA_STATE_LARVAL)) { 3523 *diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND; 3524 if (larval != NULL) { 3525 IPSA_REFRELE(larval); 3526 } 3527 esp0dbg(("Larval update, but larval disappeared.\n")); 3528 return (ESRCH); 3529 } /* Else sadb_common_add unlinks it for me! */ 3530 } 3531 3532 lpkt = NULL; 3533 if (larval != NULL) 3534 lpkt = sadb_clear_lpkt(larval); 3535 3536 rc = sadb_common_add(espstack->esp_sadb.s_ip_q, espstack->esp_pfkey_q, 3537 mp, samsg, ksi, primary, secondary, larval, clone, is_inbound, 3538 diagnostic, espstack->ipsecesp_netstack, &espstack->esp_sadb); 3539 3540 if (rc == 0 && lpkt != NULL) 3541 rc = !taskq_dispatch(esp_taskq, inbound_task, lpkt, TQ_NOSLEEP); 3542 3543 if (rc != 0) { 3544 ip_drop_packet(lpkt, B_TRUE, NULL, NULL, 3545 DROPPER(ipss, ipds_sadb_inlarval_timeout), 3546 &espstack->esp_dropper); 3547 } 3548 3549 /* 3550 * How much more stack will I create with all of these 3551 * esp_outbound() calls? 3552 */ 3553 3554 while (acq_msgs != NULL) { 3555 mblk_t *mp = acq_msgs; 3556 3557 acq_msgs = acq_msgs->b_next; 3558 mp->b_next = NULL; 3559 if (rc == 0) { 3560 if (ipsec_outbound_sa(mp, IPPROTO_ESP)) { 3561 ((ipsec_out_t *)(mp->b_rptr))-> 3562 ipsec_out_esp_done = B_TRUE; 3563 if (esp_outbound(mp) == IPSEC_STATUS_SUCCESS) { 3564 ipha_t *ipha; 3565 3566 /* do AH processing if needed */ 3567 if (!esp_do_outbound_ah(mp)) 3568 continue; 3569 3570 ipha = (ipha_t *)mp->b_cont->b_rptr; 3571 3572 /* finish IPsec processing */ 3573 if (IPH_HDR_VERSION(ipha) == 3574 IP_VERSION) { 3575 ip_wput_ipsec_out(NULL, mp, 3576 ipha, NULL, NULL); 3577 } else { 3578 ip6_t *ip6h = (ip6_t *)ipha; 3579 ip_wput_ipsec_out_v6(NULL, 3580 mp, ip6h, NULL, NULL); 3581 } 3582 } 3583 continue; 3584 } 3585 } 3586 ESP_BUMP_STAT(espstack, out_discards); 3587 ip_drop_packet(mp, B_FALSE, NULL, NULL, 3588 DROPPER(ipss, ipds_sadb_acquire_timeout), 3589 &espstack->esp_dropper); 3590 } 3591 3592 return (rc); 3593 } 3594 3595 /* 3596 * Add new ESP security association. This may become a generic AH/ESP 3597 * routine eventually. 3598 */ 3599 static int 3600 esp_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, netstack_t *ns) 3601 { 3602 sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA]; 3603 sadb_address_t *srcext = 3604 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC]; 3605 sadb_address_t *dstext = 3606 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST]; 3607 sadb_address_t *isrcext = 3608 (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC]; 3609 sadb_address_t *idstext = 3610 (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST]; 3611 sadb_address_t *nttext_loc = 3612 (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC]; 3613 sadb_address_t *nttext_rem = 3614 (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM]; 3615 sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH]; 3616 sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT]; 3617 struct sockaddr_in *src, *dst; 3618 struct sockaddr_in *natt_loc, *natt_rem; 3619 struct sockaddr_in6 *natt_loc6, *natt_rem6; 3620 sadb_lifetime_t *soft = 3621 (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT]; 3622 sadb_lifetime_t *hard = 3623 (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD]; 3624 sadb_lifetime_t *idle = 3625 (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE]; 3626 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 3627 ipsec_stack_t *ipss = ns->netstack_ipsec; 3628 3629 3630 3631 /* I need certain extensions present for an ADD message. */ 3632 if (srcext == NULL) { 3633 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC; 3634 return (EINVAL); 3635 } 3636 if (dstext == NULL) { 3637 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST; 3638 return (EINVAL); 3639 } 3640 if (isrcext == NULL && idstext != NULL) { 3641 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC; 3642 return (EINVAL); 3643 } 3644 if (isrcext != NULL && idstext == NULL) { 3645 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST; 3646 return (EINVAL); 3647 } 3648 if (assoc == NULL) { 3649 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA; 3650 return (EINVAL); 3651 } 3652 if (ekey == NULL && assoc->sadb_sa_encrypt != SADB_EALG_NULL) { 3653 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_EKEY; 3654 return (EINVAL); 3655 } 3656 3657 src = (struct sockaddr_in *)(srcext + 1); 3658 dst = (struct sockaddr_in *)(dstext + 1); 3659 natt_loc = (struct sockaddr_in *)(nttext_loc + 1); 3660 natt_loc6 = (struct sockaddr_in6 *)(nttext_loc + 1); 3661 natt_rem = (struct sockaddr_in *)(nttext_rem + 1); 3662 natt_rem6 = (struct sockaddr_in6 *)(nttext_rem + 1); 3663 3664 /* Sundry ADD-specific reality checks. */ 3665 /* XXX STATS : Logging/stats here? */ 3666 3667 if ((assoc->sadb_sa_state != SADB_SASTATE_MATURE) && 3668 (assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE_ELSEWHERE)) { 3669 *diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE; 3670 return (EINVAL); 3671 } 3672 if (assoc->sadb_sa_encrypt == SADB_EALG_NONE) { 3673 *diagnostic = SADB_X_DIAGNOSTIC_BAD_EALG; 3674 return (EINVAL); 3675 } 3676 3677 if (assoc->sadb_sa_encrypt == SADB_EALG_NULL && 3678 assoc->sadb_sa_auth == SADB_AALG_NONE) { 3679 *diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG; 3680 return (EINVAL); 3681 } 3682 3683 if (assoc->sadb_sa_flags & ~espstack->esp_sadb.s_addflags) { 3684 *diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS; 3685 return (EINVAL); 3686 } 3687 3688 if ((*diagnostic = sadb_hardsoftchk(hard, soft, idle)) != 0) { 3689 return (EINVAL); 3690 } 3691 ASSERT(src->sin_family == dst->sin_family); 3692 3693 if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_NATT_LOC) { 3694 if (nttext_loc == NULL) { 3695 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_LOC; 3696 return (EINVAL); 3697 } 3698 3699 if (natt_loc->sin_family == AF_INET6 && 3700 !IN6_IS_ADDR_V4MAPPED(&natt_loc6->sin6_addr)) { 3701 *diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_NATT_LOC; 3702 return (EINVAL); 3703 } 3704 } 3705 3706 if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_NATT_REM) { 3707 if (nttext_rem == NULL) { 3708 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_REM; 3709 return (EINVAL); 3710 } 3711 if (natt_rem->sin_family == AF_INET6 && 3712 !IN6_IS_ADDR_V4MAPPED(&natt_rem6->sin6_addr)) { 3713 *diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_NATT_REM; 3714 return (EINVAL); 3715 } 3716 } 3717 3718 3719 /* Stuff I don't support, for now. XXX Diagnostic? */ 3720 if (ksi->ks_in_extv[SADB_EXT_LIFETIME_CURRENT] != NULL) 3721 return (EOPNOTSUPP); 3722 3723 if ((*diagnostic = sadb_labelchk(ksi)) != 0) 3724 return (EINVAL); 3725 3726 /* 3727 * XXX Policy : I'm not checking identities at this time, 3728 * but if I did, I'd do them here, before I sent 3729 * the weak key check up to the algorithm. 3730 */ 3731 3732 mutex_enter(&ipss->ipsec_alg_lock); 3733 3734 /* 3735 * First locate the authentication algorithm. 3736 */ 3737 if (akey != NULL) { 3738 ipsec_alginfo_t *aalg; 3739 3740 aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH] 3741 [assoc->sadb_sa_auth]; 3742 if (aalg == NULL || !ALG_VALID(aalg)) { 3743 mutex_exit(&ipss->ipsec_alg_lock); 3744 esp1dbg(espstack, ("Couldn't find auth alg #%d.\n", 3745 assoc->sadb_sa_auth)); 3746 *diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG; 3747 return (EINVAL); 3748 } 3749 3750 /* 3751 * Sanity check key sizes. 3752 * Note: It's not possible to use SADB_AALG_NONE because 3753 * this auth_alg is not defined with ALG_FLAG_VALID. If this 3754 * ever changes, the same check for SADB_AALG_NONE and 3755 * a auth_key != NULL should be made here ( see below). 3756 */ 3757 if (!ipsec_valid_key_size(akey->sadb_key_bits, aalg)) { 3758 mutex_exit(&ipss->ipsec_alg_lock); 3759 *diagnostic = SADB_X_DIAGNOSTIC_BAD_AKEYBITS; 3760 return (EINVAL); 3761 } 3762 ASSERT(aalg->alg_mech_type != CRYPTO_MECHANISM_INVALID); 3763 3764 /* check key and fix parity if needed */ 3765 if (ipsec_check_key(aalg->alg_mech_type, akey, B_TRUE, 3766 diagnostic) != 0) { 3767 mutex_exit(&ipss->ipsec_alg_lock); 3768 return (EINVAL); 3769 } 3770 } 3771 3772 /* 3773 * Then locate the encryption algorithm. 3774 */ 3775 if (ekey != NULL) { 3776 uint_t keybits; 3777 ipsec_alginfo_t *ealg; 3778 3779 ealg = ipss->ipsec_alglists[IPSEC_ALG_ENCR] 3780 [assoc->sadb_sa_encrypt]; 3781 if (ealg == NULL || !ALG_VALID(ealg)) { 3782 mutex_exit(&ipss->ipsec_alg_lock); 3783 esp1dbg(espstack, ("Couldn't find encr alg #%d.\n", 3784 assoc->sadb_sa_encrypt)); 3785 *diagnostic = SADB_X_DIAGNOSTIC_BAD_EALG; 3786 return (EINVAL); 3787 } 3788 3789 /* 3790 * Sanity check key sizes. If the encryption algorithm is 3791 * SADB_EALG_NULL but the encryption key is NOT 3792 * NULL then complain. 3793 * 3794 * The keying material includes salt bits if required by 3795 * algorithm and optionally the Initial IV, check the 3796 * length of whats left. 3797 */ 3798 keybits = ekey->sadb_key_bits; 3799 keybits -= ekey->sadb_key_reserved; 3800 keybits -= SADB_8TO1(ealg->alg_saltlen); 3801 if ((assoc->sadb_sa_encrypt == SADB_EALG_NULL) || 3802 (!ipsec_valid_key_size(keybits, ealg))) { 3803 mutex_exit(&ipss->ipsec_alg_lock); 3804 *diagnostic = SADB_X_DIAGNOSTIC_BAD_EKEYBITS; 3805 return (EINVAL); 3806 } 3807 ASSERT(ealg->alg_mech_type != CRYPTO_MECHANISM_INVALID); 3808 3809 /* check key */ 3810 if (ipsec_check_key(ealg->alg_mech_type, ekey, B_FALSE, 3811 diagnostic) != 0) { 3812 mutex_exit(&ipss->ipsec_alg_lock); 3813 return (EINVAL); 3814 } 3815 } 3816 mutex_exit(&ipss->ipsec_alg_lock); 3817 3818 return (esp_add_sa_finish(mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi, 3819 diagnostic, espstack)); 3820 } 3821 3822 /* 3823 * Update a security association. Updates come in two varieties. The first 3824 * is an update of lifetimes on a non-larval SA. The second is an update of 3825 * a larval SA, which ends up looking a lot more like an add. 3826 */ 3827 static int 3828 esp_update_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, 3829 ipsecesp_stack_t *espstack, uint8_t sadb_msg_type) 3830 { 3831 sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA]; 3832 mblk_t *buf_pkt; 3833 int rcode; 3834 3835 sadb_address_t *dstext = 3836 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST]; 3837 3838 if (dstext == NULL) { 3839 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST; 3840 return (EINVAL); 3841 } 3842 3843 rcode = sadb_update_sa(mp, ksi, &buf_pkt, &espstack->esp_sadb, 3844 diagnostic, espstack->esp_pfkey_q, esp_add_sa, 3845 espstack->ipsecesp_netstack, sadb_msg_type); 3846 3847 if ((assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE) || 3848 (rcode != 0)) { 3849 return (rcode); 3850 } 3851 3852 HANDLE_BUF_PKT(esp_taskq, espstack->ipsecesp_netstack->netstack_ipsec, 3853 espstack->esp_dropper, buf_pkt); 3854 3855 return (rcode); 3856 } 3857 3858 /* XXX refactor me */ 3859 /* 3860 * Delete a security association. This is REALLY likely to be code common to 3861 * both AH and ESP. Find the association, then unlink it. 3862 */ 3863 static int 3864 esp_del_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, 3865 ipsecesp_stack_t *espstack, uint8_t sadb_msg_type) 3866 { 3867 sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA]; 3868 sadb_address_t *dstext = 3869 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST]; 3870 sadb_address_t *srcext = 3871 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC]; 3872 struct sockaddr_in *sin; 3873 3874 if (assoc == NULL) { 3875 if (dstext != NULL) { 3876 sin = (struct sockaddr_in *)(dstext + 1); 3877 } else if (srcext != NULL) { 3878 sin = (struct sockaddr_in *)(srcext + 1); 3879 } else { 3880 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA; 3881 return (EINVAL); 3882 } 3883 return (sadb_purge_sa(mp, ksi, 3884 (sin->sin_family == AF_INET6) ? &espstack->esp_sadb.s_v6 : 3885 &espstack->esp_sadb.s_v4, diagnostic, 3886 espstack->esp_pfkey_q, espstack->esp_sadb.s_ip_q)); 3887 } 3888 3889 return (sadb_delget_sa(mp, ksi, &espstack->esp_sadb, diagnostic, 3890 espstack->esp_pfkey_q, sadb_msg_type)); 3891 } 3892 3893 /* XXX refactor me */ 3894 /* 3895 * Convert the entire contents of all of ESP's SA tables into PF_KEY SADB_DUMP 3896 * messages. 3897 */ 3898 static void 3899 esp_dump(mblk_t *mp, keysock_in_t *ksi, ipsecesp_stack_t *espstack) 3900 { 3901 int error; 3902 sadb_msg_t *samsg; 3903 3904 /* 3905 * Dump each fanout, bailing if error is non-zero. 3906 */ 3907 3908 error = sadb_dump(espstack->esp_pfkey_q, mp, ksi, 3909 &espstack->esp_sadb.s_v4); 3910 if (error != 0) 3911 goto bail; 3912 3913 error = sadb_dump(espstack->esp_pfkey_q, mp, ksi, 3914 &espstack->esp_sadb.s_v6); 3915 bail: 3916 ASSERT(mp->b_cont != NULL); 3917 samsg = (sadb_msg_t *)mp->b_cont->b_rptr; 3918 samsg->sadb_msg_errno = (uint8_t)error; 3919 sadb_pfkey_echo(espstack->esp_pfkey_q, mp, 3920 (sadb_msg_t *)mp->b_cont->b_rptr, ksi, NULL); 3921 } 3922 3923 /* 3924 * First-cut reality check for an inbound PF_KEY message. 3925 */ 3926 static boolean_t 3927 esp_pfkey_reality_failures(mblk_t *mp, keysock_in_t *ksi, 3928 ipsecesp_stack_t *espstack) 3929 { 3930 int diagnostic; 3931 3932 if (ksi->ks_in_extv[SADB_EXT_PROPOSAL] != NULL) { 3933 diagnostic = SADB_X_DIAGNOSTIC_PROP_PRESENT; 3934 goto badmsg; 3935 } 3936 if (ksi->ks_in_extv[SADB_EXT_SUPPORTED_AUTH] != NULL || 3937 ksi->ks_in_extv[SADB_EXT_SUPPORTED_ENCRYPT] != NULL) { 3938 diagnostic = SADB_X_DIAGNOSTIC_SUPP_PRESENT; 3939 goto badmsg; 3940 } 3941 return (B_FALSE); /* False ==> no failures */ 3942 3943 badmsg: 3944 sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL, diagnostic, 3945 ksi->ks_in_serial); 3946 return (B_TRUE); /* True ==> failures */ 3947 } 3948 3949 /* 3950 * ESP parsing of PF_KEY messages. Keysock did most of the really silly 3951 * error cases. What I receive is a fully-formed, syntactically legal 3952 * PF_KEY message. I then need to check semantics... 3953 * 3954 * This code may become common to AH and ESP. Stay tuned. 3955 * 3956 * I also make the assumption that db_ref's are cool. If this assumption 3957 * is wrong, this means that someone other than keysock or me has been 3958 * mucking with PF_KEY messages. 3959 */ 3960 static void 3961 esp_parse_pfkey(mblk_t *mp, ipsecesp_stack_t *espstack) 3962 { 3963 mblk_t *msg = mp->b_cont; 3964 sadb_msg_t *samsg; 3965 keysock_in_t *ksi; 3966 int error; 3967 int diagnostic = SADB_X_DIAGNOSTIC_NONE; 3968 3969 ASSERT(msg != NULL); 3970 3971 samsg = (sadb_msg_t *)msg->b_rptr; 3972 ksi = (keysock_in_t *)mp->b_rptr; 3973 3974 /* 3975 * If applicable, convert unspecified AF_INET6 to unspecified 3976 * AF_INET. And do other address reality checks. 3977 */ 3978 if (!sadb_addrfix(ksi, espstack->esp_pfkey_q, mp, 3979 espstack->ipsecesp_netstack) || 3980 esp_pfkey_reality_failures(mp, ksi, espstack)) { 3981 return; 3982 } 3983 3984 switch (samsg->sadb_msg_type) { 3985 case SADB_ADD: 3986 error = esp_add_sa(mp, ksi, &diagnostic, 3987 espstack->ipsecesp_netstack); 3988 if (error != 0) { 3989 sadb_pfkey_error(espstack->esp_pfkey_q, mp, error, 3990 diagnostic, ksi->ks_in_serial); 3991 } 3992 /* else esp_add_sa() took care of things. */ 3993 break; 3994 case SADB_DELETE: 3995 case SADB_X_DELPAIR: 3996 case SADB_X_DELPAIR_STATE: 3997 error = esp_del_sa(mp, ksi, &diagnostic, espstack, 3998 samsg->sadb_msg_type); 3999 if (error != 0) { 4000 sadb_pfkey_error(espstack->esp_pfkey_q, mp, error, 4001 diagnostic, ksi->ks_in_serial); 4002 } 4003 /* Else esp_del_sa() took care of things. */ 4004 break; 4005 case SADB_GET: 4006 error = sadb_delget_sa(mp, ksi, &espstack->esp_sadb, 4007 &diagnostic, espstack->esp_pfkey_q, samsg->sadb_msg_type); 4008 if (error != 0) { 4009 sadb_pfkey_error(espstack->esp_pfkey_q, mp, error, 4010 diagnostic, ksi->ks_in_serial); 4011 } 4012 /* Else sadb_get_sa() took care of things. */ 4013 break; 4014 case SADB_FLUSH: 4015 sadbp_flush(&espstack->esp_sadb, espstack->ipsecesp_netstack); 4016 sadb_pfkey_echo(espstack->esp_pfkey_q, mp, samsg, ksi, NULL); 4017 break; 4018 case SADB_REGISTER: 4019 /* 4020 * Hmmm, let's do it! Check for extensions (there should 4021 * be none), extract the fields, call esp_register_out(), 4022 * then either free or report an error. 4023 * 4024 * Keysock takes care of the PF_KEY bookkeeping for this. 4025 */ 4026 if (esp_register_out(samsg->sadb_msg_seq, samsg->sadb_msg_pid, 4027 ksi->ks_in_serial, espstack, mp)) { 4028 freemsg(mp); 4029 } else { 4030 /* 4031 * Only way this path hits is if there is a memory 4032 * failure. It will not return B_FALSE because of 4033 * lack of esp_pfkey_q if I am in wput(). 4034 */ 4035 sadb_pfkey_error(espstack->esp_pfkey_q, mp, ENOMEM, 4036 diagnostic, ksi->ks_in_serial); 4037 } 4038 break; 4039 case SADB_UPDATE: 4040 case SADB_X_UPDATEPAIR: 4041 /* 4042 * Find a larval, if not there, find a full one and get 4043 * strict. 4044 */ 4045 error = esp_update_sa(mp, ksi, &diagnostic, espstack, 4046 samsg->sadb_msg_type); 4047 if (error != 0) { 4048 sadb_pfkey_error(espstack->esp_pfkey_q, mp, error, 4049 diagnostic, ksi->ks_in_serial); 4050 } 4051 /* else esp_update_sa() took care of things. */ 4052 break; 4053 case SADB_GETSPI: 4054 /* 4055 * Reserve a new larval entry. 4056 */ 4057 esp_getspi(mp, ksi, espstack); 4058 break; 4059 case SADB_ACQUIRE: 4060 /* 4061 * Find larval and/or ACQUIRE record and kill it (them), I'm 4062 * most likely an error. Inbound ACQUIRE messages should only 4063 * have the base header. 4064 */ 4065 sadb_in_acquire(samsg, &espstack->esp_sadb, 4066 espstack->esp_pfkey_q, espstack->ipsecesp_netstack); 4067 freemsg(mp); 4068 break; 4069 case SADB_DUMP: 4070 /* 4071 * Dump all entries. 4072 */ 4073 esp_dump(mp, ksi, espstack); 4074 /* esp_dump will take care of the return message, etc. */ 4075 break; 4076 case SADB_EXPIRE: 4077 /* Should never reach me. */ 4078 sadb_pfkey_error(espstack->esp_pfkey_q, mp, EOPNOTSUPP, 4079 diagnostic, ksi->ks_in_serial); 4080 break; 4081 default: 4082 sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL, 4083 SADB_X_DIAGNOSTIC_UNKNOWN_MSG, ksi->ks_in_serial); 4084 break; 4085 } 4086 } 4087 4088 /* 4089 * Handle case where PF_KEY says it can't find a keysock for one of my 4090 * ACQUIRE messages. 4091 */ 4092 static void 4093 esp_keysock_no_socket(mblk_t *mp, ipsecesp_stack_t *espstack) 4094 { 4095 sadb_msg_t *samsg; 4096 keysock_out_err_t *kse = (keysock_out_err_t *)mp->b_rptr; 4097 4098 if (mp->b_cont == NULL) { 4099 freemsg(mp); 4100 return; 4101 } 4102 samsg = (sadb_msg_t *)mp->b_cont->b_rptr; 4103 4104 /* 4105 * If keysock can't find any registered, delete the acquire record 4106 * immediately, and handle errors. 4107 */ 4108 if (samsg->sadb_msg_type == SADB_ACQUIRE) { 4109 samsg->sadb_msg_errno = kse->ks_err_errno; 4110 samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg)); 4111 /* 4112 * Use the write-side of the esp_pfkey_q, in case there is 4113 * no esp_sadb.s_ip_q. 4114 */ 4115 sadb_in_acquire(samsg, &espstack->esp_sadb, 4116 WR(espstack->esp_pfkey_q), espstack->ipsecesp_netstack); 4117 } 4118 4119 freemsg(mp); 4120 } 4121 4122 /* 4123 * ESP module write put routine. 4124 */ 4125 static void 4126 ipsecesp_wput(queue_t *q, mblk_t *mp) 4127 { 4128 ipsec_info_t *ii; 4129 struct iocblk *iocp; 4130 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; 4131 4132 esp3dbg(espstack, ("In esp_wput().\n")); 4133 4134 /* NOTE: Each case must take care of freeing or passing mp. */ 4135 switch (mp->b_datap->db_type) { 4136 case M_CTL: 4137 if ((mp->b_wptr - mp->b_rptr) < sizeof (ipsec_info_t)) { 4138 /* Not big enough message. */ 4139 freemsg(mp); 4140 break; 4141 } 4142 ii = (ipsec_info_t *)mp->b_rptr; 4143 4144 switch (ii->ipsec_info_type) { 4145 case KEYSOCK_OUT_ERR: 4146 esp1dbg(espstack, ("Got KEYSOCK_OUT_ERR message.\n")); 4147 esp_keysock_no_socket(mp, espstack); 4148 break; 4149 case KEYSOCK_IN: 4150 ESP_BUMP_STAT(espstack, keysock_in); 4151 esp3dbg(espstack, ("Got KEYSOCK_IN message.\n")); 4152 4153 /* Parse the message. */ 4154 esp_parse_pfkey(mp, espstack); 4155 break; 4156 case KEYSOCK_HELLO: 4157 sadb_keysock_hello(&espstack->esp_pfkey_q, q, mp, 4158 esp_ager, (void *)espstack, &espstack->esp_event, 4159 SADB_SATYPE_ESP); 4160 break; 4161 default: 4162 esp2dbg(espstack, ("Got M_CTL from above of 0x%x.\n", 4163 ii->ipsec_info_type)); 4164 freemsg(mp); 4165 break; 4166 } 4167 break; 4168 case M_IOCTL: 4169 iocp = (struct iocblk *)mp->b_rptr; 4170 switch (iocp->ioc_cmd) { 4171 case ND_SET: 4172 case ND_GET: 4173 if (nd_getset(q, espstack->ipsecesp_g_nd, mp)) { 4174 qreply(q, mp); 4175 return; 4176 } else { 4177 iocp->ioc_error = ENOENT; 4178 } 4179 /* FALLTHRU */ 4180 default: 4181 /* We really don't support any other ioctls, do we? */ 4182 4183 /* Return EINVAL */ 4184 if (iocp->ioc_error != ENOENT) 4185 iocp->ioc_error = EINVAL; 4186 iocp->ioc_count = 0; 4187 mp->b_datap->db_type = M_IOCACK; 4188 qreply(q, mp); 4189 return; 4190 } 4191 default: 4192 esp3dbg(espstack, 4193 ("Got default message, type %d, passing to IP.\n", 4194 mp->b_datap->db_type)); 4195 putnext(q, mp); 4196 } 4197 } 4198 4199 /* 4200 * Process an outbound ESP packet that can be accelerated by a IPsec 4201 * hardware acceleration capable Provider. 4202 * The caller already inserted and initialized the ESP header. 4203 * This function allocates a tagging M_CTL, and adds room at the end 4204 * of the packet to hold the ICV if authentication is needed. 4205 * 4206 * On success returns B_TRUE, on failure returns B_FALSE and frees the 4207 * mblk chain ipsec_out. 4208 */ 4209 static ipsec_status_t 4210 esp_outbound_accelerated(mblk_t *ipsec_out, uint_t icv_len) 4211 { 4212 ipsec_out_t *io; 4213 mblk_t *lastmp; 4214 netstack_t *ns; 4215 ipsecesp_stack_t *espstack; 4216 ipsec_stack_t *ipss; 4217 4218 io = (ipsec_out_t *)ipsec_out->b_rptr; 4219 ns = io->ipsec_out_ns; 4220 espstack = ns->netstack_ipsecesp; 4221 ipss = ns->netstack_ipsec; 4222 4223 ESP_BUMP_STAT(espstack, out_accelerated); 4224 4225 /* mark packet as being accelerated in IPSEC_OUT */ 4226 ASSERT(io->ipsec_out_accelerated == B_FALSE); 4227 io->ipsec_out_accelerated = B_TRUE; 4228 4229 /* 4230 * add room at the end of the packet for the ICV if needed 4231 */ 4232 if (icv_len > 0) { 4233 /* go to last mblk */ 4234 lastmp = ipsec_out; /* For following while loop. */ 4235 do { 4236 lastmp = lastmp->b_cont; 4237 } while (lastmp->b_cont != NULL); 4238 4239 /* if not enough available room, allocate new mblk */ 4240 if ((lastmp->b_wptr + icv_len) > lastmp->b_datap->db_lim) { 4241 lastmp->b_cont = allocb(icv_len, BPRI_HI); 4242 if (lastmp->b_cont == NULL) { 4243 ESP_BUMP_STAT(espstack, out_discards); 4244 ip_drop_packet(ipsec_out, B_FALSE, NULL, NULL, 4245 DROPPER(ipss, ipds_esp_nomem), 4246 &espstack->esp_dropper); 4247 return (IPSEC_STATUS_FAILED); 4248 } 4249 lastmp = lastmp->b_cont; 4250 } 4251 lastmp->b_wptr += icv_len; 4252 } 4253 4254 return (IPSEC_STATUS_SUCCESS); 4255 } 4256 4257 /* 4258 * Process an inbound accelerated ESP packet. 4259 * On success returns B_TRUE, on failure returns B_FALSE and frees the 4260 * mblk chain ipsec_in. 4261 */ 4262 static ipsec_status_t 4263 esp_inbound_accelerated(mblk_t *ipsec_in, mblk_t *data_mp, boolean_t isv4, 4264 ipsa_t *assoc) 4265 { 4266 ipsec_in_t *ii = (ipsec_in_t *)ipsec_in->b_rptr; 4267 mblk_t *hada_mp; 4268 uint32_t icv_len = 0; 4269 da_ipsec_t *hada; 4270 ipha_t *ipha; 4271 ip6_t *ip6h; 4272 kstat_named_t *counter; 4273 netstack_t *ns = ii->ipsec_in_ns; 4274 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 4275 ipsec_stack_t *ipss = ns->netstack_ipsec; 4276 4277 ESP_BUMP_STAT(espstack, in_accelerated); 4278 4279 hada_mp = ii->ipsec_in_da; 4280 ASSERT(hada_mp != NULL); 4281 hada = (da_ipsec_t *)hada_mp->b_rptr; 4282 4283 /* 4284 * We only support one level of decapsulation in hardware, so 4285 * nuke the pointer. 4286 */ 4287 ii->ipsec_in_da = NULL; 4288 ii->ipsec_in_accelerated = B_FALSE; 4289 4290 if (assoc->ipsa_auth_alg != IPSA_AALG_NONE) { 4291 /* 4292 * ESP with authentication. We expect the Provider to have 4293 * computed the ICV and placed it in the hardware acceleration 4294 * data attributes. 4295 * 4296 * Extract ICV length from attributes M_CTL and sanity check 4297 * its value. We allow the mblk to be smaller than da_ipsec_t 4298 * for a small ICV, as long as the entire ICV fits within the 4299 * mblk. 4300 * 4301 * Also ensures that the ICV length computed by Provider 4302 * corresponds to the ICV length of the agorithm specified by 4303 * the SA. 4304 */ 4305 icv_len = hada->da_icv_len; 4306 if ((icv_len != assoc->ipsa_mac_len) || 4307 (icv_len > DA_ICV_MAX_LEN) || (MBLKL(hada_mp) < 4308 (sizeof (da_ipsec_t) - DA_ICV_MAX_LEN + icv_len))) { 4309 esp0dbg(("esp_inbound_accelerated: " 4310 "ICV len (%u) incorrect or mblk too small (%u)\n", 4311 icv_len, (uint32_t)(MBLKL(hada_mp)))); 4312 counter = DROPPER(ipss, ipds_esp_bad_auth); 4313 goto esp_in_discard; 4314 } 4315 } 4316 4317 /* get pointers to IP header */ 4318 if (isv4) { 4319 ipha = (ipha_t *)data_mp->b_rptr; 4320 } else { 4321 ip6h = (ip6_t *)data_mp->b_rptr; 4322 } 4323 4324 /* 4325 * Compare ICV in ESP packet vs ICV computed by adapter. 4326 * We also remove the ICV from the end of the packet since 4327 * it will no longer be needed. 4328 * 4329 * Assume that esp_inbound() already ensured that the pkt 4330 * was in one mblk. 4331 */ 4332 ASSERT(data_mp->b_cont == NULL); 4333 data_mp->b_wptr -= icv_len; 4334 /* adjust IP header */ 4335 if (isv4) 4336 ipha->ipha_length = htons(ntohs(ipha->ipha_length) - icv_len); 4337 else 4338 ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - icv_len); 4339 if (icv_len && bcmp(hada->da_icv, data_mp->b_wptr, icv_len)) { 4340 int af; 4341 void *addr; 4342 4343 if (isv4) { 4344 addr = &ipha->ipha_dst; 4345 af = AF_INET; 4346 } else { 4347 addr = &ip6h->ip6_dst; 4348 af = AF_INET6; 4349 } 4350 4351 /* 4352 * Log the event. Don't print to the console, block 4353 * potential denial-of-service attack. 4354 */ 4355 ESP_BUMP_STAT(espstack, bad_auth); 4356 ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN, 4357 "ESP Authentication failed spi %x, dst_addr %s", 4358 assoc->ipsa_spi, addr, af, espstack->ipsecesp_netstack); 4359 counter = DROPPER(ipss, ipds_esp_bad_auth); 4360 goto esp_in_discard; 4361 } 4362 4363 esp3dbg(espstack, ("esp_inbound_accelerated: ESP authentication " 4364 "succeeded, checking replay\n")); 4365 4366 ipsec_in->b_cont = data_mp; 4367 4368 /* 4369 * Remove ESP header and padding from packet. 4370 */ 4371 if (!esp_strip_header(data_mp, ii->ipsec_in_v4, assoc->ipsa_iv_len, 4372 &counter, espstack)) { 4373 esp1dbg(espstack, ("esp_inbound_accelerated: " 4374 "esp_strip_header() failed\n")); 4375 goto esp_in_discard; 4376 } 4377 4378 freeb(hada_mp); 4379 4380 if (is_system_labeled() && (assoc->ipsa_cred != NULL)) 4381 mblk_setcred(data_mp, assoc->ipsa_cred, NOPID); 4382 4383 /* 4384 * Account for usage.. 4385 */ 4386 if (!esp_age_bytes(assoc, msgdsize(data_mp), B_TRUE)) { 4387 /* The ipsa has hit hard expiration, LOG and AUDIT. */ 4388 ESP_BUMP_STAT(espstack, bytes_expired); 4389 IP_ESP_BUMP_STAT(ipss, in_discards); 4390 ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN, 4391 "ESP association 0x%x, dst %s had bytes expire.\n", 4392 assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam, 4393 espstack->ipsecesp_netstack); 4394 ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL, 4395 DROPPER(ipss, ipds_esp_bytes_expire), 4396 &espstack->esp_dropper); 4397 return (IPSEC_STATUS_FAILED); 4398 } 4399 4400 /* done processing the packet */ 4401 return (IPSEC_STATUS_SUCCESS); 4402 4403 esp_in_discard: 4404 IP_ESP_BUMP_STAT(ipss, in_discards); 4405 freeb(hada_mp); 4406 4407 ipsec_in->b_cont = data_mp; /* For ip_drop_packet()'s sake... */ 4408 ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL, counter, 4409 &espstack->esp_dropper); 4410 4411 return (IPSEC_STATUS_FAILED); 4412 } 4413 4414 /* 4415 * Wrapper to allow IP to trigger an ESP association failure message 4416 * during inbound SA selection. 4417 */ 4418 void 4419 ipsecesp_in_assocfailure(mblk_t *mp, char level, ushort_t sl, char *fmt, 4420 uint32_t spi, void *addr, int af, ipsecesp_stack_t *espstack) 4421 { 4422 ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec; 4423 4424 if (espstack->ipsecesp_log_unknown_spi) { 4425 ipsec_assocfailure(info.mi_idnum, 0, level, sl, fmt, spi, 4426 addr, af, espstack->ipsecesp_netstack); 4427 } 4428 4429 ip_drop_packet(mp, B_TRUE, NULL, NULL, 4430 DROPPER(ipss, ipds_esp_no_sa), 4431 &espstack->esp_dropper); 4432 } 4433 4434 /* 4435 * Initialize the ESP input and output processing functions. 4436 */ 4437 void 4438 ipsecesp_init_funcs(ipsa_t *sa) 4439 { 4440 if (sa->ipsa_output_func == NULL) 4441 sa->ipsa_output_func = esp_outbound; 4442 if (sa->ipsa_input_func == NULL) 4443 sa->ipsa_input_func = esp_inbound; 4444 } 4445