1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/stream.h> 30 #include <sys/stropts.h> 31 #include <sys/errno.h> 32 #include <sys/strlog.h> 33 #include <sys/tihdr.h> 34 #include <sys/socket.h> 35 #include <sys/ddi.h> 36 #include <sys/sunddi.h> 37 #include <sys/kmem.h> 38 #include <sys/zone.h> 39 #include <sys/sysmacros.h> 40 #include <sys/cmn_err.h> 41 #include <sys/vtrace.h> 42 #include <sys/debug.h> 43 #include <sys/atomic.h> 44 #include <sys/strsun.h> 45 #include <sys/random.h> 46 #include <netinet/in.h> 47 #include <net/if.h> 48 #include <netinet/ip6.h> 49 #include <net/pfkeyv2.h> 50 51 #include <inet/common.h> 52 #include <inet/mi.h> 53 #include <inet/nd.h> 54 #include <inet/ip.h> 55 #include <inet/ip6.h> 56 #include <inet/sadb.h> 57 #include <inet/ipsec_info.h> 58 #include <inet/ipsec_impl.h> 59 #include <inet/ipsecesp.h> 60 #include <inet/ipdrop.h> 61 #include <inet/tcp.h> 62 #include <sys/kstat.h> 63 #include <sys/policy.h> 64 #include <sys/strsun.h> 65 #include <inet/udp_impl.h> 66 #include <sys/taskq.h> 67 #include <sys/note.h> 68 69 #include <sys/iphada.h> 70 71 /* 72 * Table of ND variables supported by ipsecesp. These are loaded into 73 * ipsecesp_g_nd in ipsecesp_init_nd. 74 * All of these are alterable, within the min/max values given, at run time. 75 */ 76 static ipsecespparam_t lcl_param_arr[] = { 77 /* min max value name */ 78 { 0, 3, 0, "ipsecesp_debug"}, 79 { 125, 32000, SADB_AGE_INTERVAL_DEFAULT, "ipsecesp_age_interval"}, 80 { 1, 10, 1, "ipsecesp_reap_delay"}, 81 { 1, SADB_MAX_REPLAY, 64, "ipsecesp_replay_size"}, 82 { 1, 300, 15, "ipsecesp_acquire_timeout"}, 83 { 1, 1800, 90, "ipsecesp_larval_timeout"}, 84 /* Default lifetime values for ACQUIRE messages. */ 85 { 0, 0xffffffffU, 0, "ipsecesp_default_soft_bytes"}, 86 { 0, 0xffffffffU, 0, "ipsecesp_default_hard_bytes"}, 87 { 0, 0xffffffffU, 24000, "ipsecesp_default_soft_addtime"}, 88 { 0, 0xffffffffU, 28800, "ipsecesp_default_hard_addtime"}, 89 { 0, 0xffffffffU, 0, "ipsecesp_default_soft_usetime"}, 90 { 0, 0xffffffffU, 0, "ipsecesp_default_hard_usetime"}, 91 { 0, 1, 0, "ipsecesp_log_unknown_spi"}, 92 { 0, 2, 1, "ipsecesp_padding_check"}, 93 }; 94 #define ipsecesp_debug ipsecesp_params[0].ipsecesp_param_value 95 #define ipsecesp_age_interval ipsecesp_params[1].ipsecesp_param_value 96 #define ipsecesp_age_int_max ipsecesp_params[1].ipsecesp_param_max 97 #define ipsecesp_reap_delay ipsecesp_params[2].ipsecesp_param_value 98 #define ipsecesp_replay_size ipsecesp_params[3].ipsecesp_param_value 99 #define ipsecesp_acquire_timeout \ 100 ipsecesp_params[4].ipsecesp_param_value 101 #define ipsecesp_larval_timeout \ 102 ipsecesp_params[5].ipsecesp_param_value 103 #define ipsecesp_default_soft_bytes \ 104 ipsecesp_params[6].ipsecesp_param_value 105 #define ipsecesp_default_hard_bytes \ 106 ipsecesp_params[7].ipsecesp_param_value 107 #define ipsecesp_default_soft_addtime \ 108 ipsecesp_params[8].ipsecesp_param_value 109 #define ipsecesp_default_hard_addtime \ 110 ipsecesp_params[9].ipsecesp_param_value 111 #define ipsecesp_default_soft_usetime \ 112 ipsecesp_params[10].ipsecesp_param_value 113 #define ipsecesp_default_hard_usetime \ 114 ipsecesp_params[11].ipsecesp_param_value 115 #define ipsecesp_log_unknown_spi \ 116 ipsecesp_params[12].ipsecesp_param_value 117 #define ipsecesp_padding_check \ 118 ipsecesp_params[13].ipsecesp_param_value 119 120 #define esp0dbg(a) printf a 121 /* NOTE: != 0 instead of > 0 so lint doesn't complain. */ 122 #define esp1dbg(espstack, a) if (espstack->ipsecesp_debug != 0) printf a 123 #define esp2dbg(espstack, a) if (espstack->ipsecesp_debug > 1) printf a 124 #define esp3dbg(espstack, a) if (espstack->ipsecesp_debug > 2) printf a 125 126 static int ipsecesp_open(queue_t *, dev_t *, int, int, cred_t *); 127 static int ipsecesp_close(queue_t *); 128 static void ipsecesp_rput(queue_t *, mblk_t *); 129 static void ipsecesp_wput(queue_t *, mblk_t *); 130 static void *ipsecesp_stack_init(netstackid_t stackid, netstack_t *ns); 131 static void ipsecesp_stack_fini(netstackid_t stackid, void *arg); 132 static void esp_send_acquire(ipsacq_t *, mblk_t *, netstack_t *); 133 134 static ipsec_status_t esp_outbound_accelerated(mblk_t *, uint_t); 135 static ipsec_status_t esp_inbound_accelerated(mblk_t *, mblk_t *, 136 boolean_t, ipsa_t *); 137 138 static boolean_t esp_register_out(uint32_t, uint32_t, uint_t, 139 ipsecesp_stack_t *); 140 static boolean_t esp_strip_header(mblk_t *, boolean_t, uint32_t, 141 kstat_named_t **, ipsecesp_stack_t *); 142 static ipsec_status_t esp_submit_req_inbound(mblk_t *, ipsa_t *, uint_t); 143 static ipsec_status_t esp_submit_req_outbound(mblk_t *, ipsa_t *, uchar_t *, 144 uint_t); 145 146 /* Setable in /etc/system */ 147 uint32_t esp_hash_size = IPSEC_DEFAULT_HASH_SIZE; 148 149 static struct module_info info = { 150 5137, "ipsecesp", 0, INFPSZ, 65536, 1024 151 }; 152 153 static struct qinit rinit = { 154 (pfi_t)ipsecesp_rput, NULL, ipsecesp_open, ipsecesp_close, NULL, &info, 155 NULL 156 }; 157 158 static struct qinit winit = { 159 (pfi_t)ipsecesp_wput, NULL, ipsecesp_open, ipsecesp_close, NULL, &info, 160 NULL 161 }; 162 163 struct streamtab ipsecespinfo = { 164 &rinit, &winit, NULL, NULL 165 }; 166 167 static taskq_t *esp_taskq; 168 169 /* 170 * OTOH, this one is set at open/close, and I'm D_MTQPAIR for now. 171 * 172 * Question: Do I need this, given that all instance's esps->esps_wq point 173 * to IP? 174 * 175 * Answer: Yes, because I need to know which queue is BOUND to 176 * IPPROTO_ESP 177 */ 178 179 /* 180 * Stats. This may eventually become a full-blown SNMP MIB once that spec 181 * stabilizes. 182 */ 183 184 typedef struct esp_kstats_s { 185 kstat_named_t esp_stat_num_aalgs; 186 kstat_named_t esp_stat_good_auth; 187 kstat_named_t esp_stat_bad_auth; 188 kstat_named_t esp_stat_bad_padding; 189 kstat_named_t esp_stat_replay_failures; 190 kstat_named_t esp_stat_replay_early_failures; 191 kstat_named_t esp_stat_keysock_in; 192 kstat_named_t esp_stat_out_requests; 193 kstat_named_t esp_stat_acquire_requests; 194 kstat_named_t esp_stat_bytes_expired; 195 kstat_named_t esp_stat_out_discards; 196 kstat_named_t esp_stat_in_accelerated; 197 kstat_named_t esp_stat_out_accelerated; 198 kstat_named_t esp_stat_noaccel; 199 kstat_named_t esp_stat_crypto_sync; 200 kstat_named_t esp_stat_crypto_async; 201 kstat_named_t esp_stat_crypto_failures; 202 kstat_named_t esp_stat_num_ealgs; 203 kstat_named_t esp_stat_bad_decrypt; 204 } esp_kstats_t; 205 206 /* 207 * espstack->esp_kstats is equal to espstack->esp_ksp->ks_data if 208 * kstat_create_netstack for espstack->esp_ksp succeeds, but when it 209 * fails, it will be NULL. Note this is done for all stack instances, 210 * so it *could* fail. hence a non-NULL checking is done for 211 * ESP_BUMP_STAT and ESP_DEBUMP_STAT 212 */ 213 #define ESP_BUMP_STAT(espstack, x) \ 214 do { \ 215 if (espstack->esp_kstats != NULL) \ 216 (espstack->esp_kstats->esp_stat_ ## x).value.ui64++; \ 217 _NOTE(CONSTCOND) \ 218 } while (0) 219 220 #define ESP_DEBUMP_STAT(espstack, x) \ 221 do { \ 222 if (espstack->esp_kstats != NULL) \ 223 (espstack->esp_kstats->esp_stat_ ## x).value.ui64--; \ 224 _NOTE(CONSTCOND) \ 225 } while (0) 226 227 static int esp_kstat_update(kstat_t *, int); 228 229 static boolean_t 230 esp_kstat_init(ipsecesp_stack_t *espstack, netstackid_t stackid) 231 { 232 espstack->esp_ksp = kstat_create_netstack("ipsecesp", 0, "esp_stat", 233 "net", KSTAT_TYPE_NAMED, 234 sizeof (esp_kstats_t) / sizeof (kstat_named_t), 235 KSTAT_FLAG_PERSISTENT, stackid); 236 237 if (espstack->esp_ksp == NULL || espstack->esp_ksp->ks_data == NULL) 238 return (B_FALSE); 239 240 espstack->esp_kstats = espstack->esp_ksp->ks_data; 241 242 espstack->esp_ksp->ks_update = esp_kstat_update; 243 espstack->esp_ksp->ks_private = (void *)(uintptr_t)stackid; 244 245 #define K64 KSTAT_DATA_UINT64 246 #define KI(x) kstat_named_init(&(espstack->esp_kstats->esp_stat_##x), #x, K64) 247 248 KI(num_aalgs); 249 KI(num_ealgs); 250 KI(good_auth); 251 KI(bad_auth); 252 KI(bad_padding); 253 KI(replay_failures); 254 KI(replay_early_failures); 255 KI(keysock_in); 256 KI(out_requests); 257 KI(acquire_requests); 258 KI(bytes_expired); 259 KI(out_discards); 260 KI(in_accelerated); 261 KI(out_accelerated); 262 KI(noaccel); 263 KI(crypto_sync); 264 KI(crypto_async); 265 KI(crypto_failures); 266 KI(bad_decrypt); 267 268 #undef KI 269 #undef K64 270 271 kstat_install(espstack->esp_ksp); 272 273 return (B_TRUE); 274 } 275 276 static int 277 esp_kstat_update(kstat_t *kp, int rw) 278 { 279 esp_kstats_t *ekp; 280 netstackid_t stackid = (zoneid_t)(uintptr_t)kp->ks_private; 281 netstack_t *ns; 282 ipsec_stack_t *ipss; 283 284 if ((kp == NULL) || (kp->ks_data == NULL)) 285 return (EIO); 286 287 if (rw == KSTAT_WRITE) 288 return (EACCES); 289 290 ns = netstack_find_by_stackid(stackid); 291 if (ns == NULL) 292 return (-1); 293 ipss = ns->netstack_ipsec; 294 if (ipss == NULL) { 295 netstack_rele(ns); 296 return (-1); 297 } 298 ekp = (esp_kstats_t *)kp->ks_data; 299 300 mutex_enter(&ipss->ipsec_alg_lock); 301 ekp->esp_stat_num_aalgs.value.ui64 = 302 ipss->ipsec_nalgs[IPSEC_ALG_AUTH]; 303 ekp->esp_stat_num_ealgs.value.ui64 = 304 ipss->ipsec_nalgs[IPSEC_ALG_ENCR]; 305 mutex_exit(&ipss->ipsec_alg_lock); 306 307 netstack_rele(ns); 308 return (0); 309 } 310 311 #ifdef DEBUG 312 /* 313 * Debug routine, useful to see pre-encryption data. 314 */ 315 static char * 316 dump_msg(mblk_t *mp) 317 { 318 char tmp_str[3], tmp_line[256]; 319 320 while (mp != NULL) { 321 unsigned char *ptr; 322 323 printf("mblk address 0x%p, length %ld, db_ref %d " 324 "type %d, base 0x%p, lim 0x%p\n", 325 (void *) mp, (long)(mp->b_wptr - mp->b_rptr), 326 mp->b_datap->db_ref, mp->b_datap->db_type, 327 (void *)mp->b_datap->db_base, (void *)mp->b_datap->db_lim); 328 ptr = mp->b_rptr; 329 330 tmp_line[0] = '\0'; 331 while (ptr < mp->b_wptr) { 332 uint_t diff; 333 334 diff = (ptr - mp->b_rptr); 335 if (!(diff & 0x1f)) { 336 if (strlen(tmp_line) > 0) { 337 printf("bytes: %s\n", tmp_line); 338 tmp_line[0] = '\0'; 339 } 340 } 341 if (!(diff & 0x3)) 342 (void) strcat(tmp_line, " "); 343 (void) sprintf(tmp_str, "%02x", *ptr); 344 (void) strcat(tmp_line, tmp_str); 345 ptr++; 346 } 347 if (strlen(tmp_line) > 0) 348 printf("bytes: %s\n", tmp_line); 349 350 mp = mp->b_cont; 351 } 352 353 return ("\n"); 354 } 355 356 #else /* DEBUG */ 357 static char * 358 dump_msg(mblk_t *mp) 359 { 360 printf("Find value of mp %p.\n", mp); 361 return ("\n"); 362 } 363 #endif /* DEBUG */ 364 365 /* 366 * Don't have to lock age_interval, as only one thread will access it at 367 * a time, because I control the one function that does with timeout(). 368 */ 369 static void 370 esp_ager(void *arg) 371 { 372 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)arg; 373 netstack_t *ns = espstack->ipsecesp_netstack; 374 hrtime_t begin = gethrtime(); 375 376 sadb_ager(&espstack->esp_sadb.s_v4, espstack->esp_pfkey_q, 377 espstack->esp_sadb.s_ip_q, espstack->ipsecesp_reap_delay, ns); 378 sadb_ager(&espstack->esp_sadb.s_v6, espstack->esp_pfkey_q, 379 espstack->esp_sadb.s_ip_q, espstack->ipsecesp_reap_delay, ns); 380 381 espstack->esp_event = sadb_retimeout(begin, espstack->esp_pfkey_q, 382 esp_ager, espstack, 383 &espstack->ipsecesp_age_interval, espstack->ipsecesp_age_int_max, 384 info.mi_idnum); 385 } 386 387 /* 388 * Get an ESP NDD parameter. 389 */ 390 /* ARGSUSED */ 391 static int 392 ipsecesp_param_get(q, mp, cp, cr) 393 queue_t *q; 394 mblk_t *mp; 395 caddr_t cp; 396 cred_t *cr; 397 { 398 ipsecespparam_t *ipsecesppa = (ipsecespparam_t *)cp; 399 uint_t value; 400 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; 401 402 mutex_enter(&espstack->ipsecesp_param_lock); 403 value = ipsecesppa->ipsecesp_param_value; 404 mutex_exit(&espstack->ipsecesp_param_lock); 405 406 (void) mi_mpprintf(mp, "%u", value); 407 return (0); 408 } 409 410 /* 411 * This routine sets an NDD variable in a ipsecespparam_t structure. 412 */ 413 /* ARGSUSED */ 414 static int 415 ipsecesp_param_set(q, mp, value, cp, cr) 416 queue_t *q; 417 mblk_t *mp; 418 char *value; 419 caddr_t cp; 420 cred_t *cr; 421 { 422 ulong_t new_value; 423 ipsecespparam_t *ipsecesppa = (ipsecespparam_t *)cp; 424 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; 425 426 /* 427 * Fail the request if the new value does not lie within the 428 * required bounds. 429 */ 430 if (ddi_strtoul(value, NULL, 10, &new_value) != 0 || 431 new_value < ipsecesppa->ipsecesp_param_min || 432 new_value > ipsecesppa->ipsecesp_param_max) { 433 return (EINVAL); 434 } 435 436 /* Set the new value */ 437 mutex_enter(&espstack->ipsecesp_param_lock); 438 ipsecesppa->ipsecesp_param_value = new_value; 439 mutex_exit(&espstack->ipsecesp_param_lock); 440 return (0); 441 } 442 443 /* 444 * Using lifetime NDD variables, fill in an extended combination's 445 * lifetime information. 446 */ 447 void 448 ipsecesp_fill_defs(sadb_x_ecomb_t *ecomb, netstack_t *ns) 449 { 450 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 451 452 ecomb->sadb_x_ecomb_soft_bytes = espstack->ipsecesp_default_soft_bytes; 453 ecomb->sadb_x_ecomb_hard_bytes = espstack->ipsecesp_default_hard_bytes; 454 ecomb->sadb_x_ecomb_soft_addtime = 455 espstack->ipsecesp_default_soft_addtime; 456 ecomb->sadb_x_ecomb_hard_addtime = 457 espstack->ipsecesp_default_hard_addtime; 458 ecomb->sadb_x_ecomb_soft_usetime = 459 espstack->ipsecesp_default_soft_usetime; 460 ecomb->sadb_x_ecomb_hard_usetime = 461 espstack->ipsecesp_default_hard_usetime; 462 } 463 464 /* 465 * Initialize things for ESP at module load time. 466 */ 467 boolean_t 468 ipsecesp_ddi_init(void) 469 { 470 esp_taskq = taskq_create("esp_taskq", 1, minclsyspri, 471 IPSEC_TASKQ_MIN, IPSEC_TASKQ_MAX, 0); 472 473 /* 474 * We want to be informed each time a stack is created or 475 * destroyed in the kernel, so we can maintain the 476 * set of ipsecesp_stack_t's. 477 */ 478 netstack_register(NS_IPSECESP, ipsecesp_stack_init, NULL, 479 ipsecesp_stack_fini); 480 481 return (B_TRUE); 482 } 483 484 /* 485 * Walk through the param array specified registering each element with the 486 * named dispatch handler. 487 */ 488 static boolean_t 489 ipsecesp_param_register(IDP *ndp, ipsecespparam_t *espp, int cnt) 490 { 491 for (; cnt-- > 0; espp++) { 492 if (espp->ipsecesp_param_name != NULL && 493 espp->ipsecesp_param_name[0]) { 494 if (!nd_load(ndp, 495 espp->ipsecesp_param_name, 496 ipsecesp_param_get, ipsecesp_param_set, 497 (caddr_t)espp)) { 498 nd_free(ndp); 499 return (B_FALSE); 500 } 501 } 502 } 503 return (B_TRUE); 504 } 505 /* 506 * Initialize things for ESP for each stack instance 507 */ 508 static void * 509 ipsecesp_stack_init(netstackid_t stackid, netstack_t *ns) 510 { 511 ipsecesp_stack_t *espstack; 512 ipsecespparam_t *espp; 513 514 espstack = (ipsecesp_stack_t *)kmem_zalloc(sizeof (*espstack), 515 KM_SLEEP); 516 espstack->ipsecesp_netstack = ns; 517 518 espp = (ipsecespparam_t *)kmem_alloc(sizeof (lcl_param_arr), KM_SLEEP); 519 espstack->ipsecesp_params = espp; 520 bcopy(lcl_param_arr, espp, sizeof (lcl_param_arr)); 521 522 (void) ipsecesp_param_register(&espstack->ipsecesp_g_nd, espp, 523 A_CNT(lcl_param_arr)); 524 525 (void) esp_kstat_init(espstack, stackid); 526 527 espstack->esp_sadb.s_acquire_timeout = 528 &espstack->ipsecesp_acquire_timeout; 529 espstack->esp_sadb.s_acqfn = esp_send_acquire; 530 sadbp_init("ESP", &espstack->esp_sadb, SADB_SATYPE_ESP, esp_hash_size, 531 espstack->ipsecesp_netstack); 532 533 mutex_init(&espstack->ipsecesp_param_lock, NULL, MUTEX_DEFAULT, 0); 534 535 ip_drop_register(&espstack->esp_dropper, "IPsec ESP"); 536 return (espstack); 537 } 538 539 /* 540 * Destroy things for ESP at module unload time. 541 */ 542 void 543 ipsecesp_ddi_destroy(void) 544 { 545 netstack_unregister(NS_IPSECESP); 546 taskq_destroy(esp_taskq); 547 } 548 549 /* 550 * Destroy things for ESP for one stack instance 551 */ 552 static void 553 ipsecesp_stack_fini(netstackid_t stackid, void *arg) 554 { 555 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)arg; 556 557 if (espstack->esp_pfkey_q != NULL) { 558 (void) quntimeout(espstack->esp_pfkey_q, espstack->esp_event); 559 } 560 espstack->esp_sadb.s_acqfn = NULL; 561 espstack->esp_sadb.s_acquire_timeout = NULL; 562 sadbp_destroy(&espstack->esp_sadb, espstack->ipsecesp_netstack); 563 ip_drop_unregister(&espstack->esp_dropper); 564 mutex_destroy(&espstack->ipsecesp_param_lock); 565 nd_free(&espstack->ipsecesp_g_nd); 566 567 kmem_free(espstack->ipsecesp_params, sizeof (lcl_param_arr)); 568 espstack->ipsecesp_params = NULL; 569 kstat_delete_netstack(espstack->esp_ksp, stackid); 570 espstack->esp_ksp = NULL; 571 espstack->esp_kstats = NULL; 572 kmem_free(espstack, sizeof (*espstack)); 573 } 574 575 /* 576 * ESP module open routine. 577 */ 578 /* ARGSUSED */ 579 static int 580 ipsecesp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 581 { 582 netstack_t *ns; 583 ipsecesp_stack_t *espstack; 584 585 if (secpolicy_ip_config(credp, B_FALSE) != 0) { 586 esp0dbg(("Non-privileged user trying to open ipsecesp.\n")); 587 return (EPERM); 588 } 589 590 if (q->q_ptr != NULL) 591 return (0); /* Re-open of an already open instance. */ 592 593 if (sflag != MODOPEN) 594 return (EINVAL); 595 596 ns = netstack_find_by_cred(credp); 597 ASSERT(ns != NULL); 598 espstack = ns->netstack_ipsecesp; 599 ASSERT(espstack != NULL); 600 601 /* 602 * ASSUMPTIONS (because I'm MT_OCEXCL): 603 * 604 * * I'm being pushed on top of IP for all my opens (incl. #1). 605 * * Only ipsecesp_open() can write into esp_sadb.s_ip_q. 606 * * Because of this, I can check lazily for esp_sadb.s_ip_q. 607 * 608 * If these assumptions are wrong, I'm in BIG trouble... 609 */ 610 611 q->q_ptr = espstack; 612 WR(q)->q_ptr = q->q_ptr; 613 614 if (espstack->esp_sadb.s_ip_q == NULL) { 615 struct T_unbind_req *tur; 616 617 espstack->esp_sadb.s_ip_q = WR(q); 618 /* Allocate an unbind... */ 619 espstack->esp_ip_unbind = allocb(sizeof (struct T_unbind_req), 620 BPRI_HI); 621 622 /* 623 * Send down T_BIND_REQ to bind IPPROTO_ESP. 624 * Handle the ACK here in ESP. 625 */ 626 qprocson(q); 627 if (espstack->esp_ip_unbind == NULL || 628 !sadb_t_bind_req(espstack->esp_sadb.s_ip_q, IPPROTO_ESP)) { 629 if (espstack->esp_ip_unbind != NULL) { 630 freeb(espstack->esp_ip_unbind); 631 espstack->esp_ip_unbind = NULL; 632 } 633 q->q_ptr = NULL; 634 netstack_rele(espstack->ipsecesp_netstack); 635 return (ENOMEM); 636 } 637 638 espstack->esp_ip_unbind->b_datap->db_type = M_PROTO; 639 tur = (struct T_unbind_req *)espstack->esp_ip_unbind->b_rptr; 640 tur->PRIM_type = T_UNBIND_REQ; 641 } else { 642 qprocson(q); 643 } 644 645 /* 646 * For now, there's not much I can do. I'll be getting a message 647 * passed down to me from keysock (in my wput), and a T_BIND_ACK 648 * up from IP (in my rput). 649 */ 650 651 return (0); 652 } 653 654 /* 655 * ESP module close routine. 656 */ 657 static int 658 ipsecesp_close(queue_t *q) 659 { 660 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; 661 662 /* 663 * If esp_sadb.s_ip_q is attached to this instance, send a 664 * T_UNBIND_REQ to IP for the instance before doing 665 * a qprocsoff(). 666 */ 667 if (WR(q) == espstack->esp_sadb.s_ip_q && 668 espstack->esp_ip_unbind != NULL) { 669 putnext(WR(q), espstack->esp_ip_unbind); 670 espstack->esp_ip_unbind = NULL; 671 } 672 673 /* 674 * Clean up q_ptr, if needed. 675 */ 676 qprocsoff(q); 677 678 /* Keysock queue check is safe, because of OCEXCL perimeter. */ 679 680 if (q == espstack->esp_pfkey_q) { 681 esp1dbg(espstack, 682 ("ipsecesp_close: Ummm... keysock is closing ESP.\n")); 683 espstack->esp_pfkey_q = NULL; 684 /* Detach qtimeouts. */ 685 (void) quntimeout(q, espstack->esp_event); 686 } 687 688 if (WR(q) == espstack->esp_sadb.s_ip_q) { 689 /* 690 * If the esp_sadb.s_ip_q is attached to this instance, find 691 * another. The OCEXCL outer perimeter helps us here. 692 */ 693 espstack->esp_sadb.s_ip_q = NULL; 694 695 /* 696 * Find a replacement queue for esp_sadb.s_ip_q. 697 */ 698 if (espstack->esp_pfkey_q != NULL && 699 espstack->esp_pfkey_q != RD(q)) { 700 /* 701 * See if we can use the pfkey_q. 702 */ 703 espstack->esp_sadb.s_ip_q = WR(espstack->esp_pfkey_q); 704 } 705 706 if (espstack->esp_sadb.s_ip_q == NULL || 707 !sadb_t_bind_req(espstack->esp_sadb.s_ip_q, IPPROTO_ESP)) { 708 esp1dbg(espstack, ("ipsecesp: Can't reassign ip_q.\n")); 709 espstack->esp_sadb.s_ip_q = NULL; 710 } else { 711 espstack->esp_ip_unbind = 712 allocb(sizeof (struct T_unbind_req), BPRI_HI); 713 714 if (espstack->esp_ip_unbind != NULL) { 715 struct T_unbind_req *tur; 716 717 espstack->esp_ip_unbind->b_datap->db_type = 718 M_PROTO; 719 tur = (struct T_unbind_req *) 720 espstack->esp_ip_unbind->b_rptr; 721 tur->PRIM_type = T_UNBIND_REQ; 722 } 723 /* If it's NULL, I can't do much here. */ 724 } 725 } 726 727 netstack_rele(espstack->ipsecesp_netstack); 728 return (0); 729 } 730 731 /* 732 * Add a number of bytes to what the SA has protected so far. Return 733 * B_TRUE if the SA can still protect that many bytes. 734 * 735 * Caller must REFRELE the passed-in assoc. This function must REFRELE 736 * any obtained peer SA. 737 */ 738 static boolean_t 739 esp_age_bytes(ipsa_t *assoc, uint64_t bytes, boolean_t inbound) 740 { 741 ipsa_t *inassoc, *outassoc; 742 isaf_t *bucket; 743 boolean_t inrc, outrc, isv6; 744 sadb_t *sp; 745 int outhash; 746 netstack_t *ns = assoc->ipsa_netstack; 747 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 748 749 /* No peer? No problem! */ 750 if (!assoc->ipsa_haspeer) { 751 return (sadb_age_bytes(espstack->esp_pfkey_q, assoc, bytes, 752 B_TRUE)); 753 } 754 755 /* 756 * Otherwise, we want to grab both the original assoc and its peer. 757 * There might be a race for this, but if it's a real race, two 758 * expire messages may occur. We limit this by only sending the 759 * expire message on one of the peers, we'll pick the inbound 760 * arbitrarily. 761 * 762 * If we need tight synchronization on the peer SA, then we need to 763 * reconsider. 764 */ 765 766 /* Use address length to select IPv6/IPv4 */ 767 isv6 = (assoc->ipsa_addrfam == AF_INET6); 768 sp = isv6 ? &espstack->esp_sadb.s_v6 : &espstack->esp_sadb.s_v4; 769 770 if (inbound) { 771 inassoc = assoc; 772 if (isv6) { 773 outhash = OUTBOUND_HASH_V6(sp, *((in6_addr_t *) 774 &inassoc->ipsa_dstaddr)); 775 } else { 776 outhash = OUTBOUND_HASH_V4(sp, *((ipaddr_t *) 777 &inassoc->ipsa_dstaddr)); 778 } 779 bucket = &sp->sdb_of[outhash]; 780 mutex_enter(&bucket->isaf_lock); 781 outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi, 782 inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr, 783 inassoc->ipsa_addrfam); 784 mutex_exit(&bucket->isaf_lock); 785 if (outassoc == NULL) { 786 /* Q: Do we wish to set haspeer == B_FALSE? */ 787 esp0dbg(("esp_age_bytes: " 788 "can't find peer for inbound.\n")); 789 return (sadb_age_bytes(espstack->esp_pfkey_q, inassoc, 790 bytes, B_TRUE)); 791 } 792 } else { 793 outassoc = assoc; 794 bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi); 795 mutex_enter(&bucket->isaf_lock); 796 inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi, 797 outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr, 798 outassoc->ipsa_addrfam); 799 mutex_exit(&bucket->isaf_lock); 800 if (inassoc == NULL) { 801 /* Q: Do we wish to set haspeer == B_FALSE? */ 802 esp0dbg(("esp_age_bytes: " 803 "can't find peer for outbound.\n")); 804 return (sadb_age_bytes(espstack->esp_pfkey_q, outassoc, 805 bytes, B_TRUE)); 806 } 807 } 808 809 inrc = sadb_age_bytes(espstack->esp_pfkey_q, inassoc, bytes, B_TRUE); 810 outrc = sadb_age_bytes(espstack->esp_pfkey_q, outassoc, bytes, B_FALSE); 811 812 /* 813 * REFRELE any peer SA. 814 * 815 * Because of the multi-line macro nature of IPSA_REFRELE, keep 816 * them in { }. 817 */ 818 if (inbound) { 819 IPSA_REFRELE(outassoc); 820 } else { 821 IPSA_REFRELE(inassoc); 822 } 823 824 return (inrc && outrc); 825 } 826 827 /* 828 * Do incoming NAT-T manipulations for packet. 829 */ 830 static ipsec_status_t 831 esp_fix_natt_checksums(mblk_t *data_mp, ipsa_t *assoc) 832 { 833 ipha_t *ipha = (ipha_t *)data_mp->b_rptr; 834 tcpha_t *tcph; 835 udpha_t *udpha; 836 /* Initialize to our inbound cksum adjustment... */ 837 uint32_t sum = assoc->ipsa_inbound_cksum; 838 839 switch (ipha->ipha_protocol) { 840 case IPPROTO_TCP: 841 tcph = (tcpha_t *)(data_mp->b_rptr + 842 IPH_HDR_LENGTH(ipha)); 843 844 #define DOWN_SUM(x) (x) = ((x) & 0xFFFF) + ((x) >> 16) 845 sum += ~ntohs(tcph->tha_sum) & 0xFFFF; 846 DOWN_SUM(sum); 847 DOWN_SUM(sum); 848 tcph->tha_sum = ~htons(sum); 849 break; 850 case IPPROTO_UDP: 851 udpha = (udpha_t *)(data_mp->b_rptr + IPH_HDR_LENGTH(ipha)); 852 853 if (udpha->uha_checksum != 0) { 854 /* Adujst if the inbound one was not zero. */ 855 sum += ~ntohs(udpha->uha_checksum) & 0xFFFF; 856 DOWN_SUM(sum); 857 DOWN_SUM(sum); 858 udpha->uha_checksum = ~htons(sum); 859 if (udpha->uha_checksum == 0) 860 udpha->uha_checksum = 0xFFFF; 861 } 862 #undef DOWN_SUM 863 break; 864 case IPPROTO_IP: 865 /* 866 * This case is only an issue for self-encapsulated 867 * packets. So for now, fall through. 868 */ 869 break; 870 } 871 return (IPSEC_STATUS_SUCCESS); 872 } 873 874 875 /* 876 * Strip ESP header, check padding, and fix IP header. 877 * Returns B_TRUE on success, B_FALSE if an error occured. 878 */ 879 static boolean_t 880 esp_strip_header(mblk_t *data_mp, boolean_t isv4, uint32_t ivlen, 881 kstat_named_t **counter, ipsecesp_stack_t *espstack) 882 { 883 ipha_t *ipha; 884 ip6_t *ip6h; 885 uint_t divpoint; 886 mblk_t *scratch; 887 uint8_t nexthdr, padlen; 888 uint8_t lastpad; 889 ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec; 890 uint8_t *lastbyte; 891 892 /* 893 * Strip ESP data and fix IP header. 894 * 895 * XXX In case the beginning of esp_inbound() changes to not do a 896 * pullup, this part of the code can remain unchanged. 897 */ 898 if (isv4) { 899 ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ipha_t)); 900 ipha = (ipha_t *)data_mp->b_rptr; 901 ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (esph_t) + 902 IPH_HDR_LENGTH(ipha)); 903 divpoint = IPH_HDR_LENGTH(ipha); 904 } else { 905 ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ip6_t)); 906 ip6h = (ip6_t *)data_mp->b_rptr; 907 divpoint = ip_hdr_length_v6(data_mp, ip6h); 908 } 909 910 scratch = data_mp; 911 while (scratch->b_cont != NULL) 912 scratch = scratch->b_cont; 913 914 ASSERT((scratch->b_wptr - scratch->b_rptr) >= 3); 915 916 /* 917 * "Next header" and padding length are the last two bytes in the 918 * ESP-protected datagram, thus the explicit - 1 and - 2. 919 * lastpad is the last byte of the padding, which can be used for 920 * a quick check to see if the padding is correct. 921 */ 922 lastbyte = scratch->b_wptr - 1; 923 nexthdr = *lastbyte--; 924 padlen = *lastbyte--; 925 926 if (isv4) { 927 /* Fix part of the IP header. */ 928 ipha->ipha_protocol = nexthdr; 929 /* 930 * Reality check the padlen. The explicit - 2 is for the 931 * padding length and the next-header bytes. 932 */ 933 if (padlen >= ntohs(ipha->ipha_length) - sizeof (ipha_t) - 2 - 934 sizeof (esph_t) - ivlen) { 935 ESP_BUMP_STAT(espstack, bad_decrypt); 936 ipsec_rl_strlog(espstack->ipsecesp_netstack, 937 info.mi_idnum, 0, 0, 938 SL_ERROR | SL_WARN, 939 "Corrupt ESP packet (padlen too big).\n"); 940 esp1dbg(espstack, ("padlen (%d) is greater than:\n", 941 padlen)); 942 esp1dbg(espstack, ("pkt len(%d) - ip hdr - esp " 943 "hdr - ivlen(%d) = %d.\n", 944 ntohs(ipha->ipha_length), ivlen, 945 (int)(ntohs(ipha->ipha_length) - sizeof (ipha_t) - 946 2 - sizeof (esph_t) - ivlen))); 947 *counter = DROPPER(ipss, ipds_esp_bad_padlen); 948 return (B_FALSE); 949 } 950 951 /* 952 * Fix the rest of the header. The explicit - 2 is for the 953 * padding length and the next-header bytes. 954 */ 955 ipha->ipha_length = htons(ntohs(ipha->ipha_length) - padlen - 956 2 - sizeof (esph_t) - ivlen); 957 ipha->ipha_hdr_checksum = 0; 958 ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha); 959 } else { 960 if (ip6h->ip6_nxt == IPPROTO_ESP) { 961 ip6h->ip6_nxt = nexthdr; 962 } else { 963 ip6_pkt_t ipp; 964 965 bzero(&ipp, sizeof (ipp)); 966 (void) ip_find_hdr_v6(data_mp, ip6h, &ipp, NULL); 967 if (ipp.ipp_dstopts != NULL) { 968 ipp.ipp_dstopts->ip6d_nxt = nexthdr; 969 } else if (ipp.ipp_rthdr != NULL) { 970 ipp.ipp_rthdr->ip6r_nxt = nexthdr; 971 } else if (ipp.ipp_hopopts != NULL) { 972 ipp.ipp_hopopts->ip6h_nxt = nexthdr; 973 } else { 974 /* Panic a DEBUG kernel. */ 975 ASSERT(ipp.ipp_hopopts != NULL); 976 /* Otherwise, pretend it's IP + ESP. */ 977 cmn_err(CE_WARN, "ESP IPv6 headers wrong.\n"); 978 ip6h->ip6_nxt = nexthdr; 979 } 980 } 981 982 if (padlen >= ntohs(ip6h->ip6_plen) - 2 - sizeof (esph_t) - 983 ivlen) { 984 ESP_BUMP_STAT(espstack, bad_decrypt); 985 ipsec_rl_strlog(espstack->ipsecesp_netstack, 986 info.mi_idnum, 0, 0, 987 SL_ERROR | SL_WARN, 988 "Corrupt ESP packet (v6 padlen too big).\n"); 989 esp1dbg(espstack, ("padlen (%d) is greater than:\n", 990 padlen)); 991 esp1dbg(espstack, ("pkt len(%u) - ip hdr - esp " 992 "hdr - ivlen(%d) = %u.\n", 993 (unsigned)(ntohs(ip6h->ip6_plen) 994 + sizeof (ip6_t)), ivlen, 995 (unsigned)(ntohs(ip6h->ip6_plen) - 2 - 996 sizeof (esph_t) - ivlen))); 997 *counter = DROPPER(ipss, ipds_esp_bad_padlen); 998 return (B_FALSE); 999 } 1000 1001 1002 /* 1003 * Fix the rest of the header. The explicit - 2 is for the 1004 * padding length and the next-header bytes. IPv6 is nice, 1005 * because there's no hdr checksum! 1006 */ 1007 ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - padlen - 1008 2 - sizeof (esph_t) - ivlen); 1009 } 1010 1011 if (espstack->ipsecesp_padding_check > 0 && padlen > 0) { 1012 /* 1013 * Weak padding check: compare last-byte to length, they 1014 * should be equal. 1015 */ 1016 lastpad = *lastbyte--; 1017 1018 if (padlen != lastpad) { 1019 ipsec_rl_strlog(espstack->ipsecesp_netstack, 1020 info.mi_idnum, 0, 0, SL_ERROR | SL_WARN, 1021 "Corrupt ESP packet (lastpad != padlen).\n"); 1022 esp1dbg(espstack, 1023 ("lastpad (%d) not equal to padlen (%d):\n", 1024 lastpad, padlen)); 1025 ESP_BUMP_STAT(espstack, bad_padding); 1026 *counter = DROPPER(ipss, ipds_esp_bad_padding); 1027 return (B_FALSE); 1028 } 1029 1030 /* 1031 * Strong padding check: Check all pad bytes to see that 1032 * they're ascending. Go backwards using a descending counter 1033 * to verify. padlen == 1 is checked by previous block, so 1034 * only bother if we've more than 1 byte of padding. 1035 * Consequently, start the check one byte before the location 1036 * of "lastpad". 1037 */ 1038 if (espstack->ipsecesp_padding_check > 1) { 1039 /* 1040 * This assert may have to become an if and a pullup 1041 * if we start accepting multi-dblk mblks. For now, 1042 * though, any packet here will have been pulled up in 1043 * esp_inbound. 1044 */ 1045 ASSERT(MBLKL(scratch) >= lastpad + 3); 1046 1047 /* 1048 * Use "--lastpad" because we already checked the very 1049 * last pad byte previously. 1050 */ 1051 while (--lastpad != 0) { 1052 if (lastpad != *lastbyte) { 1053 ipsec_rl_strlog( 1054 espstack->ipsecesp_netstack, 1055 info.mi_idnum, 0, 0, 1056 SL_ERROR | SL_WARN, "Corrupt ESP " 1057 "packet (bad padding).\n"); 1058 esp1dbg(espstack, 1059 ("padding not in correct" 1060 " format:\n")); 1061 ESP_BUMP_STAT(espstack, bad_padding); 1062 *counter = DROPPER(ipss, 1063 ipds_esp_bad_padding); 1064 return (B_FALSE); 1065 } 1066 lastbyte--; 1067 } 1068 } 1069 } 1070 1071 /* Trim off the padding. */ 1072 ASSERT(data_mp->b_cont == NULL); 1073 data_mp->b_wptr -= (padlen + 2); 1074 1075 /* 1076 * Remove the ESP header. 1077 * 1078 * The above assertions about data_mp's size will make this work. 1079 * 1080 * XXX Question: If I send up and get back a contiguous mblk, 1081 * would it be quicker to bcopy over, or keep doing the dupb stuff? 1082 * I go with copying for now. 1083 */ 1084 1085 if (IS_P2ALIGNED(data_mp->b_rptr, sizeof (uint32_t)) && 1086 IS_P2ALIGNED(ivlen, sizeof (uint32_t))) { 1087 uint8_t *start = data_mp->b_rptr; 1088 uint32_t *src, *dst; 1089 1090 src = (uint32_t *)(start + divpoint); 1091 dst = (uint32_t *)(start + divpoint + sizeof (esph_t) + ivlen); 1092 1093 ASSERT(IS_P2ALIGNED(dst, sizeof (uint32_t)) && 1094 IS_P2ALIGNED(src, sizeof (uint32_t))); 1095 1096 do { 1097 src--; 1098 dst--; 1099 *dst = *src; 1100 } while (src != (uint32_t *)start); 1101 1102 data_mp->b_rptr = (uchar_t *)dst; 1103 } else { 1104 uint8_t *start = data_mp->b_rptr; 1105 uint8_t *src, *dst; 1106 1107 src = start + divpoint; 1108 dst = src + sizeof (esph_t) + ivlen; 1109 1110 do { 1111 src--; 1112 dst--; 1113 *dst = *src; 1114 } while (src != start); 1115 1116 data_mp->b_rptr = dst; 1117 } 1118 1119 esp2dbg(espstack, ("data_mp after inbound ESP adjustment:\n")); 1120 esp2dbg(espstack, (dump_msg(data_mp))); 1121 1122 return (B_TRUE); 1123 } 1124 1125 /* 1126 * Updating use times can be tricky business if the ipsa_haspeer flag is 1127 * set. This function is called once in an SA's lifetime. 1128 * 1129 * Caller has to REFRELE "assoc" which is passed in. This function has 1130 * to REFRELE any peer SA that is obtained. 1131 */ 1132 static void 1133 esp_set_usetime(ipsa_t *assoc, boolean_t inbound) 1134 { 1135 ipsa_t *inassoc, *outassoc; 1136 isaf_t *bucket; 1137 sadb_t *sp; 1138 int outhash; 1139 boolean_t isv6; 1140 netstack_t *ns = assoc->ipsa_netstack; 1141 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 1142 1143 /* No peer? No problem! */ 1144 if (!assoc->ipsa_haspeer) { 1145 sadb_set_usetime(assoc); 1146 return; 1147 } 1148 1149 /* 1150 * Otherwise, we want to grab both the original assoc and its peer. 1151 * There might be a race for this, but if it's a real race, the times 1152 * will be out-of-synch by at most a second, and since our time 1153 * granularity is a second, this won't be a problem. 1154 * 1155 * If we need tight synchronization on the peer SA, then we need to 1156 * reconsider. 1157 */ 1158 1159 /* Use address length to select IPv6/IPv4 */ 1160 isv6 = (assoc->ipsa_addrfam == AF_INET6); 1161 sp = isv6 ? &espstack->esp_sadb.s_v6 : &espstack->esp_sadb.s_v4; 1162 1163 if (inbound) { 1164 inassoc = assoc; 1165 if (isv6) { 1166 outhash = OUTBOUND_HASH_V6(sp, *((in6_addr_t *) 1167 &inassoc->ipsa_dstaddr)); 1168 } else { 1169 outhash = OUTBOUND_HASH_V4(sp, *((ipaddr_t *) 1170 &inassoc->ipsa_dstaddr)); 1171 } 1172 bucket = &sp->sdb_of[outhash]; 1173 mutex_enter(&bucket->isaf_lock); 1174 outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi, 1175 inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr, 1176 inassoc->ipsa_addrfam); 1177 mutex_exit(&bucket->isaf_lock); 1178 if (outassoc == NULL) { 1179 /* Q: Do we wish to set haspeer == B_FALSE? */ 1180 esp0dbg(("esp_set_usetime: " 1181 "can't find peer for inbound.\n")); 1182 sadb_set_usetime(inassoc); 1183 return; 1184 } 1185 } else { 1186 outassoc = assoc; 1187 bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi); 1188 mutex_enter(&bucket->isaf_lock); 1189 inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi, 1190 outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr, 1191 outassoc->ipsa_addrfam); 1192 mutex_exit(&bucket->isaf_lock); 1193 if (inassoc == NULL) { 1194 /* Q: Do we wish to set haspeer == B_FALSE? */ 1195 esp0dbg(("esp_set_usetime: " 1196 "can't find peer for outbound.\n")); 1197 sadb_set_usetime(outassoc); 1198 return; 1199 } 1200 } 1201 1202 /* Update usetime on both. */ 1203 sadb_set_usetime(inassoc); 1204 sadb_set_usetime(outassoc); 1205 1206 /* 1207 * REFRELE any peer SA. 1208 * 1209 * Because of the multi-line macro nature of IPSA_REFRELE, keep 1210 * them in { }. 1211 */ 1212 if (inbound) { 1213 IPSA_REFRELE(outassoc); 1214 } else { 1215 IPSA_REFRELE(inassoc); 1216 } 1217 } 1218 1219 /* 1220 * Handle ESP inbound data for IPv4 and IPv6. 1221 * On success returns B_TRUE, on failure returns B_FALSE and frees the 1222 * mblk chain ipsec_in_mp. 1223 */ 1224 ipsec_status_t 1225 esp_inbound(mblk_t *ipsec_in_mp, void *arg) 1226 { 1227 mblk_t *data_mp = ipsec_in_mp->b_cont; 1228 ipsec_in_t *ii = (ipsec_in_t *)ipsec_in_mp->b_rptr; 1229 esph_t *esph = (esph_t *)arg; 1230 ipsa_t *ipsa = ii->ipsec_in_esp_sa; 1231 netstack_t *ns = ii->ipsec_in_ns; 1232 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 1233 ipsec_stack_t *ipss = ns->netstack_ipsec; 1234 1235 if (ipsa->ipsa_usetime == 0) 1236 esp_set_usetime(ipsa, B_TRUE); 1237 1238 /* 1239 * We may wish to check replay in-range-only here as an optimization. 1240 * Include the reality check of ipsa->ipsa_replay > 1241 * ipsa->ipsa_replay_wsize for times when it's the first N packets, 1242 * where N == ipsa->ipsa_replay_wsize. 1243 * 1244 * Another check that may come here later is the "collision" check. 1245 * If legitimate packets flow quickly enough, this won't be a problem, 1246 * but collisions may cause authentication algorithm crunching to 1247 * take place when it doesn't need to. 1248 */ 1249 if (!sadb_replay_peek(ipsa, esph->esph_replay)) { 1250 ESP_BUMP_STAT(espstack, replay_early_failures); 1251 IP_ESP_BUMP_STAT(ipss, in_discards); 1252 /* 1253 * TODO: Extract inbound interface from the IPSEC_IN 1254 * message's ii->ipsec_in_rill_index. 1255 */ 1256 ip_drop_packet(ipsec_in_mp, B_TRUE, NULL, NULL, 1257 DROPPER(ipss, ipds_esp_early_replay), 1258 &espstack->esp_dropper); 1259 return (IPSEC_STATUS_FAILED); 1260 } 1261 1262 /* 1263 * Has this packet already been processed by a hardware 1264 * IPsec accelerator? 1265 */ 1266 if (ii->ipsec_in_accelerated) { 1267 ipsec_status_t rv; 1268 esp3dbg(espstack, 1269 ("esp_inbound: pkt processed by ill=%d isv6=%d\n", 1270 ii->ipsec_in_ill_index, !ii->ipsec_in_v4)); 1271 rv = esp_inbound_accelerated(ipsec_in_mp, 1272 data_mp, ii->ipsec_in_v4, ipsa); 1273 return (rv); 1274 } 1275 ESP_BUMP_STAT(espstack, noaccel); 1276 1277 /* 1278 * Adjust the IP header's payload length to reflect the removal 1279 * of the ICV. 1280 */ 1281 if (!ii->ipsec_in_v4) { 1282 ip6_t *ip6h = (ip6_t *)data_mp->b_rptr; 1283 ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - 1284 ipsa->ipsa_mac_len); 1285 } else { 1286 ipha_t *ipha = (ipha_t *)data_mp->b_rptr; 1287 ipha->ipha_length = htons(ntohs(ipha->ipha_length) - 1288 ipsa->ipsa_mac_len); 1289 } 1290 1291 /* submit the request to the crypto framework */ 1292 return (esp_submit_req_inbound(ipsec_in_mp, ipsa, 1293 (uint8_t *)esph - data_mp->b_rptr)); 1294 } 1295 1296 /* 1297 * Perform the really difficult work of inserting the proposed situation. 1298 * Called while holding the algorithm lock. 1299 */ 1300 static void 1301 esp_insert_prop(sadb_prop_t *prop, ipsacq_t *acqrec, uint_t combs) 1302 { 1303 sadb_comb_t *comb = (sadb_comb_t *)(prop + 1); 1304 ipsec_out_t *io; 1305 ipsec_action_t *ap; 1306 ipsec_prot_t *prot; 1307 netstack_t *ns; 1308 ipsecesp_stack_t *espstack; 1309 ipsec_stack_t *ipss; 1310 1311 io = (ipsec_out_t *)acqrec->ipsacq_mp->b_rptr; 1312 ASSERT(io->ipsec_out_type == IPSEC_OUT); 1313 ns = io->ipsec_out_ns; 1314 espstack = ns->netstack_ipsecesp; 1315 ipss = ns->netstack_ipsec; 1316 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 1317 1318 prop->sadb_prop_exttype = SADB_EXT_PROPOSAL; 1319 prop->sadb_prop_len = SADB_8TO64(sizeof (sadb_prop_t)); 1320 *(uint32_t *)(&prop->sadb_prop_replay) = 0; /* Quick zero-out! */ 1321 1322 prop->sadb_prop_replay = espstack->ipsecesp_replay_size; 1323 1324 /* 1325 * Based upon algorithm properties, and what-not, prioritize 1326 * a proposal. If the IPSEC_OUT message has an algorithm specified, 1327 * use it first and foremost. 1328 * 1329 * For each action in policy list 1330 * Add combination. If I've hit limit, return. 1331 */ 1332 1333 for (ap = acqrec->ipsacq_act; ap != NULL; 1334 ap = ap->ipa_next) { 1335 ipsec_alginfo_t *ealg = NULL; 1336 ipsec_alginfo_t *aalg = NULL; 1337 1338 if (ap->ipa_act.ipa_type != IPSEC_POLICY_APPLY) 1339 continue; 1340 1341 prot = &ap->ipa_act.ipa_apply; 1342 1343 if (!(prot->ipp_use_esp)) 1344 continue; 1345 1346 if (prot->ipp_esp_auth_alg != 0) { 1347 aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH] 1348 [prot->ipp_esp_auth_alg]; 1349 if (aalg == NULL || !ALG_VALID(aalg)) 1350 continue; 1351 } 1352 1353 ASSERT(prot->ipp_encr_alg > 0); 1354 ealg = ipss->ipsec_alglists[IPSEC_ALG_ENCR] 1355 [prot->ipp_encr_alg]; 1356 if (ealg == NULL || !ALG_VALID(ealg)) 1357 continue; 1358 1359 comb->sadb_comb_flags = 0; 1360 comb->sadb_comb_reserved = 0; 1361 comb->sadb_comb_encrypt = ealg->alg_id; 1362 comb->sadb_comb_encrypt_minbits = 1363 MAX(prot->ipp_espe_minbits, ealg->alg_ef_minbits); 1364 comb->sadb_comb_encrypt_maxbits = 1365 MIN(prot->ipp_espe_maxbits, ealg->alg_ef_maxbits); 1366 if (aalg == NULL) { 1367 comb->sadb_comb_auth = 0; 1368 comb->sadb_comb_auth_minbits = 0; 1369 comb->sadb_comb_auth_maxbits = 0; 1370 } else { 1371 comb->sadb_comb_auth = aalg->alg_id; 1372 comb->sadb_comb_auth_minbits = 1373 MAX(prot->ipp_espa_minbits, aalg->alg_ef_minbits); 1374 comb->sadb_comb_auth_maxbits = 1375 MIN(prot->ipp_espa_maxbits, aalg->alg_ef_maxbits); 1376 } 1377 1378 /* 1379 * The following may be based on algorithm 1380 * properties, but in the meantime, we just pick 1381 * some good, sensible numbers. Key mgmt. can 1382 * (and perhaps should) be the place to finalize 1383 * such decisions. 1384 */ 1385 1386 /* 1387 * No limits on allocations, since we really don't 1388 * support that concept currently. 1389 */ 1390 comb->sadb_comb_soft_allocations = 0; 1391 comb->sadb_comb_hard_allocations = 0; 1392 1393 /* 1394 * These may want to come from policy rule.. 1395 */ 1396 comb->sadb_comb_soft_bytes = 1397 espstack->ipsecesp_default_soft_bytes; 1398 comb->sadb_comb_hard_bytes = 1399 espstack->ipsecesp_default_hard_bytes; 1400 comb->sadb_comb_soft_addtime = 1401 espstack->ipsecesp_default_soft_addtime; 1402 comb->sadb_comb_hard_addtime = 1403 espstack->ipsecesp_default_hard_addtime; 1404 comb->sadb_comb_soft_usetime = 1405 espstack->ipsecesp_default_soft_usetime; 1406 comb->sadb_comb_hard_usetime = 1407 espstack->ipsecesp_default_hard_usetime; 1408 1409 prop->sadb_prop_len += SADB_8TO64(sizeof (*comb)); 1410 if (--combs == 0) 1411 break; /* out of space.. */ 1412 comb++; 1413 } 1414 } 1415 1416 /* 1417 * Prepare and actually send the SADB_ACQUIRE message to PF_KEY. 1418 */ 1419 static void 1420 esp_send_acquire(ipsacq_t *acqrec, mblk_t *extended, netstack_t *ns) 1421 { 1422 uint_t combs; 1423 sadb_msg_t *samsg; 1424 sadb_prop_t *prop; 1425 mblk_t *pfkeymp, *msgmp; 1426 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 1427 ipsec_stack_t *ipss = ns->netstack_ipsec; 1428 1429 ESP_BUMP_STAT(espstack, acquire_requests); 1430 1431 if (espstack->esp_pfkey_q == NULL) 1432 return; 1433 1434 /* Set up ACQUIRE. */ 1435 pfkeymp = sadb_setup_acquire(acqrec, SADB_SATYPE_ESP, 1436 ns->netstack_ipsec); 1437 if (pfkeymp == NULL) { 1438 esp0dbg(("sadb_setup_acquire failed.\n")); 1439 return; 1440 } 1441 ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); 1442 combs = ipss->ipsec_nalgs[IPSEC_ALG_AUTH] * 1443 ipss->ipsec_nalgs[IPSEC_ALG_ENCR]; 1444 msgmp = pfkeymp->b_cont; 1445 samsg = (sadb_msg_t *)(msgmp->b_rptr); 1446 1447 /* Insert proposal here. */ 1448 1449 prop = (sadb_prop_t *)(((uint64_t *)samsg) + samsg->sadb_msg_len); 1450 esp_insert_prop(prop, acqrec, combs); 1451 samsg->sadb_msg_len += prop->sadb_prop_len; 1452 msgmp->b_wptr += SADB_64TO8(samsg->sadb_msg_len); 1453 1454 mutex_exit(&ipss->ipsec_alg_lock); 1455 1456 /* 1457 * Must mutex_exit() before sending PF_KEY message up, in 1458 * order to avoid recursive mutex_enter() if there are no registered 1459 * listeners. 1460 * 1461 * Once I've sent the message, I'm cool anyway. 1462 */ 1463 mutex_exit(&acqrec->ipsacq_lock); 1464 if (extended != NULL) { 1465 putnext(espstack->esp_pfkey_q, extended); 1466 } 1467 putnext(espstack->esp_pfkey_q, pfkeymp); 1468 } 1469 1470 /* 1471 * Handle the SADB_GETSPI message. Create a larval SA. 1472 */ 1473 static void 1474 esp_getspi(mblk_t *mp, keysock_in_t *ksi, ipsecesp_stack_t *espstack) 1475 { 1476 ipsa_t *newbie, *target; 1477 isaf_t *outbound, *inbound; 1478 int rc, diagnostic; 1479 sadb_sa_t *assoc; 1480 keysock_out_t *kso; 1481 uint32_t newspi; 1482 1483 /* 1484 * Randomly generate a proposed SPI value 1485 */ 1486 (void) random_get_pseudo_bytes((uint8_t *)&newspi, sizeof (uint32_t)); 1487 newbie = sadb_getspi(ksi, newspi, &diagnostic, 1488 espstack->ipsecesp_netstack); 1489 1490 if (newbie == NULL) { 1491 sadb_pfkey_error(espstack->esp_pfkey_q, mp, ENOMEM, diagnostic, 1492 ksi->ks_in_serial); 1493 return; 1494 } else if (newbie == (ipsa_t *)-1) { 1495 sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL, diagnostic, 1496 ksi->ks_in_serial); 1497 return; 1498 } 1499 1500 /* 1501 * XXX - We may randomly collide. We really should recover from this. 1502 * Unfortunately, that could require spending way-too-much-time 1503 * in here. For now, let the user retry. 1504 */ 1505 1506 if (newbie->ipsa_addrfam == AF_INET6) { 1507 outbound = OUTBOUND_BUCKET_V6(&espstack->esp_sadb.s_v6, 1508 *(uint32_t *)(newbie->ipsa_dstaddr)); 1509 inbound = INBOUND_BUCKET(&espstack->esp_sadb.s_v6, 1510 newbie->ipsa_spi); 1511 } else { 1512 ASSERT(newbie->ipsa_addrfam == AF_INET); 1513 outbound = OUTBOUND_BUCKET_V4(&espstack->esp_sadb.s_v4, 1514 *(uint32_t *)(newbie->ipsa_dstaddr)); 1515 inbound = INBOUND_BUCKET(&espstack->esp_sadb.s_v4, 1516 newbie->ipsa_spi); 1517 } 1518 1519 mutex_enter(&outbound->isaf_lock); 1520 mutex_enter(&inbound->isaf_lock); 1521 1522 /* 1523 * Check for collisions (i.e. did sadb_getspi() return with something 1524 * that already exists?). 1525 * 1526 * Try outbound first. Even though SADB_GETSPI is traditionally 1527 * for inbound SAs, you never know what a user might do. 1528 */ 1529 target = ipsec_getassocbyspi(outbound, newbie->ipsa_spi, 1530 newbie->ipsa_srcaddr, newbie->ipsa_dstaddr, newbie->ipsa_addrfam); 1531 if (target == NULL) { 1532 target = ipsec_getassocbyspi(inbound, newbie->ipsa_spi, 1533 newbie->ipsa_srcaddr, newbie->ipsa_dstaddr, 1534 newbie->ipsa_addrfam); 1535 } 1536 1537 /* 1538 * I don't have collisions elsewhere! 1539 * (Nor will I because I'm still holding inbound/outbound locks.) 1540 */ 1541 1542 if (target != NULL) { 1543 rc = EEXIST; 1544 IPSA_REFRELE(target); 1545 } else { 1546 /* 1547 * sadb_insertassoc() also checks for collisions, so 1548 * if there's a colliding entry, rc will be set 1549 * to EEXIST. 1550 */ 1551 rc = sadb_insertassoc(newbie, inbound); 1552 (void) drv_getparm(TIME, &newbie->ipsa_hardexpiretime); 1553 newbie->ipsa_hardexpiretime += 1554 espstack->ipsecesp_larval_timeout; 1555 } 1556 1557 /* 1558 * Can exit outbound mutex. Hold inbound until we're done 1559 * with newbie. 1560 */ 1561 mutex_exit(&outbound->isaf_lock); 1562 1563 if (rc != 0) { 1564 mutex_exit(&inbound->isaf_lock); 1565 IPSA_REFRELE(newbie); 1566 sadb_pfkey_error(espstack->esp_pfkey_q, mp, rc, 1567 SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial); 1568 return; 1569 } 1570 1571 1572 /* Can write here because I'm still holding the bucket lock. */ 1573 newbie->ipsa_type = SADB_SATYPE_ESP; 1574 1575 /* 1576 * Construct successful return message. We have one thing going 1577 * for us in PF_KEY v2. That's the fact that 1578 * sizeof (sadb_spirange_t) == sizeof (sadb_sa_t) 1579 */ 1580 assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SPIRANGE]; 1581 assoc->sadb_sa_exttype = SADB_EXT_SA; 1582 assoc->sadb_sa_spi = newbie->ipsa_spi; 1583 *((uint64_t *)(&assoc->sadb_sa_replay)) = 0; 1584 mutex_exit(&inbound->isaf_lock); 1585 1586 /* Convert KEYSOCK_IN to KEYSOCK_OUT. */ 1587 kso = (keysock_out_t *)ksi; 1588 kso->ks_out_len = sizeof (*kso); 1589 kso->ks_out_serial = ksi->ks_in_serial; 1590 kso->ks_out_type = KEYSOCK_OUT; 1591 1592 /* 1593 * Can safely putnext() to esp_pfkey_q, because this is a turnaround 1594 * from the esp_pfkey_q. 1595 */ 1596 putnext(espstack->esp_pfkey_q, mp); 1597 } 1598 1599 /* 1600 * Insert the ESP header into a packet. Duplicate an mblk, and insert a newly 1601 * allocated mblk with the ESP header in between the two. 1602 */ 1603 static boolean_t 1604 esp_insert_esp(mblk_t *mp, mblk_t *esp_mp, uint_t divpoint, 1605 ipsecesp_stack_t *espstack) 1606 { 1607 mblk_t *split_mp = mp; 1608 uint_t wheretodiv = divpoint; 1609 1610 while ((split_mp->b_wptr - split_mp->b_rptr) < wheretodiv) { 1611 wheretodiv -= (split_mp->b_wptr - split_mp->b_rptr); 1612 split_mp = split_mp->b_cont; 1613 ASSERT(split_mp != NULL); 1614 } 1615 1616 if (split_mp->b_wptr - split_mp->b_rptr != wheretodiv) { 1617 mblk_t *scratch; 1618 1619 /* "scratch" is the 2nd half, split_mp is the first. */ 1620 scratch = dupb(split_mp); 1621 if (scratch == NULL) { 1622 esp1dbg(espstack, 1623 ("esp_insert_esp: can't allocate scratch.\n")); 1624 return (B_FALSE); 1625 } 1626 /* NOTE: dupb() doesn't set b_cont appropriately. */ 1627 scratch->b_cont = split_mp->b_cont; 1628 scratch->b_rptr += wheretodiv; 1629 split_mp->b_wptr = split_mp->b_rptr + wheretodiv; 1630 split_mp->b_cont = scratch; 1631 } 1632 /* 1633 * At this point, split_mp is exactly "wheretodiv" bytes long, and 1634 * holds the end of the pre-ESP part of the datagram. 1635 */ 1636 esp_mp->b_cont = split_mp->b_cont; 1637 split_mp->b_cont = esp_mp; 1638 1639 return (B_TRUE); 1640 } 1641 1642 /* 1643 * Finish processing of an inbound ESP packet after processing by the 1644 * crypto framework. 1645 * - Remove the ESP header. 1646 * - Send packet back to IP. 1647 * If authentication was performed on the packet, this function is called 1648 * only if the authentication succeeded. 1649 * On success returns B_TRUE, on failure returns B_FALSE and frees the 1650 * mblk chain ipsec_in_mp. 1651 */ 1652 static ipsec_status_t 1653 esp_in_done(mblk_t *ipsec_in_mp) 1654 { 1655 ipsec_in_t *ii = (ipsec_in_t *)ipsec_in_mp->b_rptr; 1656 mblk_t *data_mp; 1657 ipsa_t *assoc; 1658 uint_t espstart; 1659 uint32_t ivlen = 0; 1660 uint_t processed_len; 1661 esph_t *esph; 1662 kstat_named_t *counter; 1663 boolean_t is_natt; 1664 netstack_t *ns = ii->ipsec_in_ns; 1665 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 1666 ipsec_stack_t *ipss = ns->netstack_ipsec; 1667 1668 assoc = ii->ipsec_in_esp_sa; 1669 ASSERT(assoc != NULL); 1670 1671 is_natt = ((assoc->ipsa_flags & IPSA_F_NATT) != 0); 1672 1673 /* get the pointer to the ESP header */ 1674 if (assoc->ipsa_encr_alg == SADB_EALG_NULL) { 1675 /* authentication-only ESP */ 1676 espstart = ii->ipsec_in_crypto_data.cd_offset; 1677 processed_len = ii->ipsec_in_crypto_data.cd_length; 1678 } else { 1679 /* encryption present */ 1680 ivlen = assoc->ipsa_iv_len; 1681 if (assoc->ipsa_auth_alg == SADB_AALG_NONE) { 1682 /* encryption-only ESP */ 1683 espstart = ii->ipsec_in_crypto_data.cd_offset - 1684 sizeof (esph_t) - assoc->ipsa_iv_len; 1685 processed_len = ii->ipsec_in_crypto_data.cd_length + 1686 ivlen; 1687 } else { 1688 /* encryption with authentication */ 1689 espstart = ii->ipsec_in_crypto_dual_data.dd_offset1; 1690 processed_len = ii->ipsec_in_crypto_dual_data.dd_len2 + 1691 ivlen; 1692 } 1693 } 1694 1695 data_mp = ipsec_in_mp->b_cont; 1696 esph = (esph_t *)(data_mp->b_rptr + espstart); 1697 1698 if (assoc->ipsa_auth_alg != IPSA_AALG_NONE) { 1699 /* authentication passed if we reach this point */ 1700 ESP_BUMP_STAT(espstack, good_auth); 1701 data_mp->b_wptr -= assoc->ipsa_mac_len; 1702 1703 /* 1704 * Check replay window here! 1705 * For right now, assume keysock will set the replay window 1706 * size to zero for SAs that have an unspecified sender. 1707 * This may change... 1708 */ 1709 1710 if (!sadb_replay_check(assoc, esph->esph_replay)) { 1711 /* 1712 * Log the event. As of now we print out an event. 1713 * Do not print the replay failure number, or else 1714 * syslog cannot collate the error messages. Printing 1715 * the replay number that failed opens a denial-of- 1716 * service attack. 1717 */ 1718 ipsec_assocfailure(info.mi_idnum, 0, 0, 1719 SL_ERROR | SL_WARN, 1720 "Replay failed for ESP spi 0x%x, dst %s.\n", 1721 assoc->ipsa_spi, assoc->ipsa_dstaddr, 1722 assoc->ipsa_addrfam, espstack->ipsecesp_netstack); 1723 ESP_BUMP_STAT(espstack, replay_failures); 1724 counter = DROPPER(ipss, ipds_esp_replay); 1725 goto drop_and_bail; 1726 } 1727 } 1728 1729 if (!esp_age_bytes(assoc, processed_len, B_TRUE)) { 1730 /* The ipsa has hit hard expiration, LOG and AUDIT. */ 1731 ipsec_assocfailure(info.mi_idnum, 0, 0, 1732 SL_ERROR | SL_WARN, 1733 "ESP association 0x%x, dst %s had bytes expire.\n", 1734 assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam, 1735 espstack->ipsecesp_netstack); 1736 ESP_BUMP_STAT(espstack, bytes_expired); 1737 counter = DROPPER(ipss, ipds_esp_bytes_expire); 1738 goto drop_and_bail; 1739 } 1740 1741 /* 1742 * Remove ESP header and padding from packet. I hope the compiler 1743 * spews "branch, predict taken" code for this. 1744 */ 1745 1746 if (esp_strip_header(data_mp, ii->ipsec_in_v4, ivlen, &counter, 1747 espstack)) { 1748 if (is_natt) 1749 return (esp_fix_natt_checksums(data_mp, assoc)); 1750 return (IPSEC_STATUS_SUCCESS); 1751 } 1752 1753 esp1dbg(espstack, ("esp_in_done: esp_strip_header() failed\n")); 1754 drop_and_bail: 1755 IP_ESP_BUMP_STAT(ipss, in_discards); 1756 /* 1757 * TODO: Extract inbound interface from the IPSEC_IN message's 1758 * ii->ipsec_in_rill_index. 1759 */ 1760 ip_drop_packet(ipsec_in_mp, B_TRUE, NULL, NULL, counter, 1761 &espstack->esp_dropper); 1762 return (IPSEC_STATUS_FAILED); 1763 } 1764 1765 /* 1766 * Called upon failing the inbound ICV check. The message passed as 1767 * argument is freed. 1768 */ 1769 static void 1770 esp_log_bad_auth(mblk_t *ipsec_in) 1771 { 1772 ipsec_in_t *ii = (ipsec_in_t *)ipsec_in->b_rptr; 1773 ipsa_t *assoc = ii->ipsec_in_esp_sa; 1774 netstack_t *ns = ii->ipsec_in_ns; 1775 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 1776 ipsec_stack_t *ipss = ns->netstack_ipsec; 1777 1778 /* 1779 * Log the event. Don't print to the console, block 1780 * potential denial-of-service attack. 1781 */ 1782 ESP_BUMP_STAT(espstack, bad_auth); 1783 1784 ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN, 1785 "ESP Authentication failed for spi 0x%x, dst %s.\n", 1786 assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam, 1787 espstack->ipsecesp_netstack); 1788 1789 IP_ESP_BUMP_STAT(ipss, in_discards); 1790 /* 1791 * TODO: Extract inbound interface from the IPSEC_IN 1792 * message's ii->ipsec_in_rill_index. 1793 */ 1794 ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL, 1795 DROPPER(ipss, ipds_esp_bad_auth), 1796 &espstack->esp_dropper); 1797 } 1798 1799 1800 /* 1801 * Invoked for outbound packets after ESP processing. If the packet 1802 * also requires AH, performs the AH SA selection and AH processing. 1803 * Returns B_TRUE if the AH processing was not needed or if it was 1804 * performed successfully. Returns B_FALSE and consumes the passed mblk 1805 * if AH processing was required but could not be performed. 1806 */ 1807 static boolean_t 1808 esp_do_outbound_ah(mblk_t *ipsec_mp) 1809 { 1810 ipsec_out_t *io = (ipsec_out_t *)ipsec_mp->b_rptr; 1811 ipsec_status_t ipsec_rc; 1812 ipsec_action_t *ap; 1813 1814 ap = io->ipsec_out_act; 1815 if (ap == NULL) { 1816 ipsec_policy_t *pp = io->ipsec_out_policy; 1817 ap = pp->ipsp_act; 1818 } 1819 1820 if (!ap->ipa_want_ah) 1821 return (B_TRUE); 1822 1823 ASSERT(io->ipsec_out_ah_done == B_FALSE); 1824 1825 if (io->ipsec_out_ah_sa == NULL) { 1826 if (!ipsec_outbound_sa(ipsec_mp, IPPROTO_AH)) { 1827 sadb_acquire(ipsec_mp, io, B_TRUE, B_FALSE); 1828 return (B_FALSE); 1829 } 1830 } 1831 ASSERT(io->ipsec_out_ah_sa != NULL); 1832 1833 io->ipsec_out_ah_done = B_TRUE; 1834 ipsec_rc = io->ipsec_out_ah_sa->ipsa_output_func(ipsec_mp); 1835 return (ipsec_rc == IPSEC_STATUS_SUCCESS); 1836 } 1837 1838 1839 /* 1840 * Kernel crypto framework callback invoked after completion of async 1841 * crypto requests. 1842 */ 1843 static void 1844 esp_kcf_callback(void *arg, int status) 1845 { 1846 mblk_t *ipsec_mp = (mblk_t *)arg; 1847 ipsec_in_t *ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1848 boolean_t is_inbound = (ii->ipsec_in_type == IPSEC_IN); 1849 netstackid_t stackid; 1850 netstack_t *ns, *ns_arg; 1851 ipsecesp_stack_t *espstack; 1852 ipsec_stack_t *ipss; 1853 ipsec_out_t *io = (ipsec_out_t *)ii; 1854 1855 ASSERT(ipsec_mp->b_cont != NULL); 1856 1857 if (is_inbound) { 1858 stackid = ii->ipsec_in_stackid; 1859 ns_arg = ii->ipsec_in_ns; 1860 } else { 1861 stackid = io->ipsec_out_stackid; 1862 ns_arg = io->ipsec_out_ns; 1863 } 1864 1865 /* 1866 * Verify that the netstack is still around; could have vanished 1867 * while kEf was doing its work. 1868 */ 1869 ns = netstack_find_by_stackid(stackid); 1870 if (ns == NULL || ns != ns_arg) { 1871 /* Disappeared on us */ 1872 if (ns != NULL) 1873 netstack_rele(ns); 1874 freemsg(ipsec_mp); 1875 return; 1876 } 1877 1878 espstack = ns->netstack_ipsecesp; 1879 ipss = ns->netstack_ipsec; 1880 1881 if (status == CRYPTO_SUCCESS) { 1882 if (is_inbound) { 1883 if (esp_in_done(ipsec_mp) != IPSEC_STATUS_SUCCESS) { 1884 netstack_rele(ns); 1885 return; 1886 } 1887 /* finish IPsec processing */ 1888 ip_fanout_proto_again(ipsec_mp, NULL, NULL, NULL); 1889 } else { 1890 /* 1891 * If a ICV was computed, it was stored by the 1892 * crypto framework at the end of the packet. 1893 */ 1894 ipha_t *ipha = (ipha_t *)ipsec_mp->b_cont->b_rptr; 1895 1896 /* do AH processing if needed */ 1897 if (!esp_do_outbound_ah(ipsec_mp)) { 1898 netstack_rele(ns); 1899 return; 1900 } 1901 /* finish IPsec processing */ 1902 if (IPH_HDR_VERSION(ipha) == IP_VERSION) { 1903 ip_wput_ipsec_out(NULL, ipsec_mp, ipha, NULL, 1904 NULL); 1905 } else { 1906 ip6_t *ip6h = (ip6_t *)ipha; 1907 ip_wput_ipsec_out_v6(NULL, ipsec_mp, ip6h, 1908 NULL, NULL); 1909 } 1910 } 1911 1912 } else if (status == CRYPTO_INVALID_MAC) { 1913 esp_log_bad_auth(ipsec_mp); 1914 1915 } else { 1916 esp1dbg(espstack, 1917 ("esp_kcf_callback: crypto failed with 0x%x\n", 1918 status)); 1919 ESP_BUMP_STAT(espstack, crypto_failures); 1920 if (is_inbound) 1921 IP_ESP_BUMP_STAT(ipss, in_discards); 1922 else 1923 ESP_BUMP_STAT(espstack, out_discards); 1924 ip_drop_packet(ipsec_mp, is_inbound, NULL, NULL, 1925 DROPPER(ipss, ipds_esp_crypto_failed), 1926 &espstack->esp_dropper); 1927 } 1928 netstack_rele(ns); 1929 } 1930 1931 /* 1932 * Invoked on crypto framework failure during inbound and outbound processing. 1933 */ 1934 static void 1935 esp_crypto_failed(mblk_t *mp, boolean_t is_inbound, int kef_rc, 1936 ipsecesp_stack_t *espstack) 1937 { 1938 ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec; 1939 1940 esp1dbg(espstack, ("crypto failed for %s ESP with 0x%x\n", 1941 is_inbound ? "inbound" : "outbound", kef_rc)); 1942 ip_drop_packet(mp, is_inbound, NULL, NULL, 1943 DROPPER(ipss, ipds_esp_crypto_failed), 1944 &espstack->esp_dropper); 1945 ESP_BUMP_STAT(espstack, crypto_failures); 1946 if (is_inbound) 1947 IP_ESP_BUMP_STAT(ipss, in_discards); 1948 else 1949 ESP_BUMP_STAT(espstack, out_discards); 1950 } 1951 1952 #define ESP_INIT_CALLREQ(_cr) { \ 1953 (_cr)->cr_flag = CRYPTO_SKIP_REQID|CRYPTO_RESTRICTED; \ 1954 (_cr)->cr_callback_arg = ipsec_mp; \ 1955 (_cr)->cr_callback_func = esp_kcf_callback; \ 1956 } 1957 1958 #define ESP_INIT_CRYPTO_MAC(mac, icvlen, icvbuf) { \ 1959 (mac)->cd_format = CRYPTO_DATA_RAW; \ 1960 (mac)->cd_offset = 0; \ 1961 (mac)->cd_length = icvlen; \ 1962 (mac)->cd_raw.iov_base = (char *)icvbuf; \ 1963 (mac)->cd_raw.iov_len = icvlen; \ 1964 } 1965 1966 #define ESP_INIT_CRYPTO_DATA(data, mp, off, len) { \ 1967 if (MBLKL(mp) >= (len) + (off)) { \ 1968 (data)->cd_format = CRYPTO_DATA_RAW; \ 1969 (data)->cd_raw.iov_base = (char *)(mp)->b_rptr; \ 1970 (data)->cd_raw.iov_len = MBLKL(mp); \ 1971 (data)->cd_offset = off; \ 1972 } else { \ 1973 (data)->cd_format = CRYPTO_DATA_MBLK; \ 1974 (data)->cd_mp = mp; \ 1975 (data)->cd_offset = off; \ 1976 } \ 1977 (data)->cd_length = len; \ 1978 } 1979 1980 #define ESP_INIT_CRYPTO_DUAL_DATA(data, mp, off1, len1, off2, len2) { \ 1981 (data)->dd_format = CRYPTO_DATA_MBLK; \ 1982 (data)->dd_mp = mp; \ 1983 (data)->dd_len1 = len1; \ 1984 (data)->dd_offset1 = off1; \ 1985 (data)->dd_len2 = len2; \ 1986 (data)->dd_offset2 = off2; \ 1987 } 1988 1989 static ipsec_status_t 1990 esp_submit_req_inbound(mblk_t *ipsec_mp, ipsa_t *assoc, uint_t esph_offset) 1991 { 1992 ipsec_in_t *ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1993 boolean_t do_auth; 1994 uint_t auth_offset, msg_len, auth_len; 1995 crypto_call_req_t call_req; 1996 mblk_t *esp_mp; 1997 int kef_rc = CRYPTO_FAILED; 1998 uint_t icv_len = assoc->ipsa_mac_len; 1999 crypto_ctx_template_t auth_ctx_tmpl; 2000 boolean_t do_encr; 2001 uint_t encr_offset, encr_len; 2002 uint_t iv_len = assoc->ipsa_iv_len; 2003 crypto_ctx_template_t encr_ctx_tmpl; 2004 netstack_t *ns = ii->ipsec_in_ns; 2005 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 2006 ipsec_stack_t *ipss = ns->netstack_ipsec; 2007 2008 ASSERT(ii->ipsec_in_type == IPSEC_IN); 2009 2010 /* 2011 * In case kEF queues and calls back, keep netstackid_t for 2012 * verification that the IP instance is still around in 2013 * esp_kcf_callback(). 2014 */ 2015 ii->ipsec_in_stackid = ns->netstack_stackid; 2016 2017 do_auth = assoc->ipsa_auth_alg != SADB_AALG_NONE; 2018 do_encr = assoc->ipsa_encr_alg != SADB_EALG_NULL; 2019 2020 /* 2021 * An inbound packet is of the form: 2022 * IPSEC_IN -> [IP,options,ESP,IV,data,ICV,pad] 2023 */ 2024 esp_mp = ipsec_mp->b_cont; 2025 msg_len = MBLKL(esp_mp); 2026 2027 ESP_INIT_CALLREQ(&call_req); 2028 2029 if (do_auth) { 2030 /* force asynchronous processing? */ 2031 if (ipss->ipsec_algs_exec_mode[IPSEC_ALG_AUTH] == 2032 IPSEC_ALGS_EXEC_ASYNC) 2033 call_req.cr_flag |= CRYPTO_ALWAYS_QUEUE; 2034 2035 /* authentication context template */ 2036 IPSEC_CTX_TMPL(assoc, ipsa_authtmpl, IPSEC_ALG_AUTH, 2037 auth_ctx_tmpl); 2038 2039 /* ICV to be verified */ 2040 ESP_INIT_CRYPTO_MAC(&ii->ipsec_in_crypto_mac, 2041 icv_len, esp_mp->b_wptr - icv_len); 2042 2043 /* authentication starts at the ESP header */ 2044 auth_offset = esph_offset; 2045 auth_len = msg_len - auth_offset - icv_len; 2046 if (!do_encr) { 2047 /* authentication only */ 2048 /* initialize input data argument */ 2049 ESP_INIT_CRYPTO_DATA(&ii->ipsec_in_crypto_data, 2050 esp_mp, auth_offset, auth_len); 2051 2052 /* call the crypto framework */ 2053 kef_rc = crypto_mac_verify(&assoc->ipsa_amech, 2054 &ii->ipsec_in_crypto_data, 2055 &assoc->ipsa_kcfauthkey, auth_ctx_tmpl, 2056 &ii->ipsec_in_crypto_mac, &call_req); 2057 } 2058 } 2059 2060 if (do_encr) { 2061 /* force asynchronous processing? */ 2062 if (ipss->ipsec_algs_exec_mode[IPSEC_ALG_ENCR] == 2063 IPSEC_ALGS_EXEC_ASYNC) 2064 call_req.cr_flag |= CRYPTO_ALWAYS_QUEUE; 2065 2066 /* encryption template */ 2067 IPSEC_CTX_TMPL(assoc, ipsa_encrtmpl, IPSEC_ALG_ENCR, 2068 encr_ctx_tmpl); 2069 2070 /* skip IV, since it is passed separately */ 2071 encr_offset = esph_offset + sizeof (esph_t) + iv_len; 2072 encr_len = msg_len - encr_offset; 2073 2074 if (!do_auth) { 2075 /* decryption only */ 2076 /* initialize input data argument */ 2077 ESP_INIT_CRYPTO_DATA(&ii->ipsec_in_crypto_data, 2078 esp_mp, encr_offset, encr_len); 2079 2080 /* specify IV */ 2081 ii->ipsec_in_crypto_data.cd_miscdata = 2082 (char *)esp_mp->b_rptr + sizeof (esph_t) + 2083 esph_offset; 2084 2085 /* call the crypto framework */ 2086 kef_rc = crypto_decrypt(&assoc->ipsa_emech, 2087 &ii->ipsec_in_crypto_data, 2088 &assoc->ipsa_kcfencrkey, encr_ctx_tmpl, 2089 NULL, &call_req); 2090 } 2091 } 2092 2093 if (do_auth && do_encr) { 2094 /* dual operation */ 2095 /* initialize input data argument */ 2096 ESP_INIT_CRYPTO_DUAL_DATA(&ii->ipsec_in_crypto_dual_data, 2097 esp_mp, auth_offset, auth_len, 2098 encr_offset, encr_len - icv_len); 2099 2100 /* specify IV */ 2101 ii->ipsec_in_crypto_dual_data.dd_miscdata = 2102 (char *)esp_mp->b_rptr + sizeof (esph_t) + esph_offset; 2103 2104 /* call the framework */ 2105 kef_rc = crypto_mac_verify_decrypt(&assoc->ipsa_amech, 2106 &assoc->ipsa_emech, &ii->ipsec_in_crypto_dual_data, 2107 &assoc->ipsa_kcfauthkey, &assoc->ipsa_kcfencrkey, 2108 auth_ctx_tmpl, encr_ctx_tmpl, &ii->ipsec_in_crypto_mac, 2109 NULL, &call_req); 2110 } 2111 2112 switch (kef_rc) { 2113 case CRYPTO_SUCCESS: 2114 ESP_BUMP_STAT(espstack, crypto_sync); 2115 return (esp_in_done(ipsec_mp)); 2116 case CRYPTO_QUEUED: 2117 /* esp_kcf_callback() will be invoked on completion */ 2118 ESP_BUMP_STAT(espstack, crypto_async); 2119 return (IPSEC_STATUS_PENDING); 2120 case CRYPTO_INVALID_MAC: 2121 ESP_BUMP_STAT(espstack, crypto_sync); 2122 esp_log_bad_auth(ipsec_mp); 2123 return (IPSEC_STATUS_FAILED); 2124 } 2125 2126 esp_crypto_failed(ipsec_mp, B_TRUE, kef_rc, espstack); 2127 return (IPSEC_STATUS_FAILED); 2128 } 2129 2130 static ipsec_status_t 2131 esp_submit_req_outbound(mblk_t *ipsec_mp, ipsa_t *assoc, uchar_t *icv_buf, 2132 uint_t payload_len) 2133 { 2134 ipsec_out_t *io = (ipsec_out_t *)ipsec_mp->b_rptr; 2135 uint_t auth_len; 2136 crypto_call_req_t call_req; 2137 mblk_t *esp_mp; 2138 int kef_rc = CRYPTO_FAILED; 2139 uint_t icv_len = assoc->ipsa_mac_len; 2140 crypto_ctx_template_t auth_ctx_tmpl; 2141 boolean_t do_auth; 2142 boolean_t do_encr; 2143 uint_t iv_len = assoc->ipsa_iv_len; 2144 crypto_ctx_template_t encr_ctx_tmpl; 2145 boolean_t is_natt = ((assoc->ipsa_flags & IPSA_F_NATT) != 0); 2146 size_t esph_offset = (is_natt ? UDPH_SIZE : 0); 2147 netstack_t *ns = io->ipsec_out_ns; 2148 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 2149 ipsec_stack_t *ipss = ns->netstack_ipsec; 2150 2151 esp3dbg(espstack, ("esp_submit_req_outbound:%s", 2152 is_natt ? "natt" : "not natt")); 2153 2154 ASSERT(io->ipsec_out_type == IPSEC_OUT); 2155 2156 /* 2157 * In case kEF queues and calls back, keep netstackid_t for 2158 * verification that the IP instance is still around in 2159 * esp_kcf_callback(). 2160 */ 2161 io->ipsec_out_stackid = ns->netstack_stackid; 2162 2163 do_encr = assoc->ipsa_encr_alg != SADB_EALG_NULL; 2164 do_auth = assoc->ipsa_auth_alg != SADB_AALG_NONE; 2165 2166 /* 2167 * Outbound IPsec packets are of the form: 2168 * IPSEC_OUT -> [IP,options] -> [ESP,IV] -> [data] -> [pad,ICV] 2169 * unless it's NATT, then it's 2170 * IPSEC_OUT -> [IP,options] -> [udp][ESP,IV] -> [data] -> [pad,ICV] 2171 * Get a pointer to the mblk containing the ESP header. 2172 */ 2173 ASSERT(ipsec_mp->b_cont != NULL && ipsec_mp->b_cont->b_cont != NULL); 2174 esp_mp = ipsec_mp->b_cont->b_cont; 2175 2176 ESP_INIT_CALLREQ(&call_req); 2177 2178 if (do_auth) { 2179 /* force asynchronous processing? */ 2180 if (ipss->ipsec_algs_exec_mode[IPSEC_ALG_AUTH] == 2181 IPSEC_ALGS_EXEC_ASYNC) 2182 call_req.cr_flag |= CRYPTO_ALWAYS_QUEUE; 2183 2184 /* authentication context template */ 2185 IPSEC_CTX_TMPL(assoc, ipsa_authtmpl, IPSEC_ALG_AUTH, 2186 auth_ctx_tmpl); 2187 2188 /* where to store the computed mac */ 2189 ESP_INIT_CRYPTO_MAC(&io->ipsec_out_crypto_mac, 2190 icv_len, icv_buf); 2191 2192 /* authentication starts at the ESP header */ 2193 auth_len = payload_len + iv_len + sizeof (esph_t); 2194 if (!do_encr) { 2195 /* authentication only */ 2196 /* initialize input data argument */ 2197 ESP_INIT_CRYPTO_DATA(&io->ipsec_out_crypto_data, 2198 esp_mp, esph_offset, auth_len); 2199 2200 /* call the crypto framework */ 2201 kef_rc = crypto_mac(&assoc->ipsa_amech, 2202 &io->ipsec_out_crypto_data, 2203 &assoc->ipsa_kcfauthkey, auth_ctx_tmpl, 2204 &io->ipsec_out_crypto_mac, &call_req); 2205 } 2206 } 2207 2208 if (do_encr) { 2209 /* force asynchronous processing? */ 2210 if (ipss->ipsec_algs_exec_mode[IPSEC_ALG_ENCR] == 2211 IPSEC_ALGS_EXEC_ASYNC) 2212 call_req.cr_flag |= CRYPTO_ALWAYS_QUEUE; 2213 2214 /* encryption context template */ 2215 IPSEC_CTX_TMPL(assoc, ipsa_encrtmpl, IPSEC_ALG_ENCR, 2216 encr_ctx_tmpl); 2217 2218 if (!do_auth) { 2219 /* encryption only, skip mblk that contains ESP hdr */ 2220 /* initialize input data argument */ 2221 ESP_INIT_CRYPTO_DATA(&io->ipsec_out_crypto_data, 2222 esp_mp->b_cont, 0, payload_len); 2223 2224 /* specify IV */ 2225 io->ipsec_out_crypto_data.cd_miscdata = 2226 (char *)esp_mp->b_rptr + sizeof (esph_t) + 2227 esph_offset; 2228 2229 /* call the crypto framework */ 2230 kef_rc = crypto_encrypt(&assoc->ipsa_emech, 2231 &io->ipsec_out_crypto_data, 2232 &assoc->ipsa_kcfencrkey, encr_ctx_tmpl, 2233 NULL, &call_req); 2234 } 2235 } 2236 2237 if (do_auth && do_encr) { 2238 /* 2239 * Encryption and authentication: 2240 * Pass the pointer to the mblk chain starting at the ESP 2241 * header to the framework. Skip the ESP header mblk 2242 * for encryption, which is reflected by an encryption 2243 * offset equal to the length of that mblk. Start 2244 * the authentication at the ESP header, i.e. use an 2245 * authentication offset of zero. 2246 */ 2247 ESP_INIT_CRYPTO_DUAL_DATA(&io->ipsec_out_crypto_dual_data, 2248 esp_mp, MBLKL(esp_mp), payload_len, esph_offset, auth_len); 2249 2250 /* specify IV */ 2251 io->ipsec_out_crypto_dual_data.dd_miscdata = 2252 (char *)esp_mp->b_rptr + sizeof (esph_t) + esph_offset; 2253 2254 /* call the framework */ 2255 kef_rc = crypto_encrypt_mac(&assoc->ipsa_emech, 2256 &assoc->ipsa_amech, NULL, 2257 &assoc->ipsa_kcfencrkey, &assoc->ipsa_kcfauthkey, 2258 encr_ctx_tmpl, auth_ctx_tmpl, 2259 &io->ipsec_out_crypto_dual_data, 2260 &io->ipsec_out_crypto_mac, &call_req); 2261 } 2262 2263 switch (kef_rc) { 2264 case CRYPTO_SUCCESS: 2265 ESP_BUMP_STAT(espstack, crypto_sync); 2266 return (IPSEC_STATUS_SUCCESS); 2267 case CRYPTO_QUEUED: 2268 /* esp_kcf_callback() will be invoked on completion */ 2269 ESP_BUMP_STAT(espstack, crypto_async); 2270 return (IPSEC_STATUS_PENDING); 2271 } 2272 2273 esp_crypto_failed(ipsec_mp, B_TRUE, kef_rc, espstack); 2274 return (IPSEC_STATUS_FAILED); 2275 } 2276 2277 /* 2278 * Handle outbound IPsec processing for IPv4 and IPv6 2279 * On success returns B_TRUE, on failure returns B_FALSE and frees the 2280 * mblk chain ipsec_in_mp. 2281 */ 2282 static ipsec_status_t 2283 esp_outbound(mblk_t *mp) 2284 { 2285 mblk_t *ipsec_out_mp, *data_mp, *espmp, *tailmp; 2286 ipsec_out_t *io; 2287 ipha_t *ipha; 2288 ip6_t *ip6h; 2289 esph_t *esph; 2290 uint_t af; 2291 uint8_t *nhp; 2292 uintptr_t divpoint, datalen, adj, padlen, i, alloclen; 2293 uintptr_t esplen = sizeof (esph_t); 2294 uint8_t protocol; 2295 ipsa_t *assoc; 2296 uint_t iv_len, mac_len = 0; 2297 uchar_t *icv_buf; 2298 udpha_t *udpha; 2299 boolean_t is_natt = B_FALSE; 2300 netstack_t *ns; 2301 ipsecesp_stack_t *espstack; 2302 ipsec_stack_t *ipss; 2303 2304 ipsec_out_mp = mp; 2305 data_mp = ipsec_out_mp->b_cont; 2306 2307 io = (ipsec_out_t *)ipsec_out_mp->b_rptr; 2308 ns = io->ipsec_out_ns; 2309 espstack = ns->netstack_ipsecesp; 2310 ipss = ns->netstack_ipsec; 2311 2312 ESP_BUMP_STAT(espstack, out_requests); 2313 2314 /* 2315 * <sigh> We have to copy the message here, because TCP (for example) 2316 * keeps a dupb() of the message lying around for retransmission. 2317 * Since ESP changes the whole of the datagram, we have to create our 2318 * own copy lest we clobber TCP's data. Since we have to copy anyway, 2319 * we might as well make use of msgpullup() and get the mblk into one 2320 * contiguous piece! 2321 */ 2322 ipsec_out_mp->b_cont = msgpullup(data_mp, -1); 2323 if (ipsec_out_mp->b_cont == NULL) { 2324 esp0dbg(("esp_outbound: msgpullup() failed, " 2325 "dropping packet.\n")); 2326 ipsec_out_mp->b_cont = data_mp; 2327 /* 2328 * TODO: Find the outbound IRE for this packet and 2329 * pass it to ip_drop_packet(). 2330 */ 2331 ip_drop_packet(ipsec_out_mp, B_FALSE, NULL, NULL, 2332 DROPPER(ipss, ipds_esp_nomem), 2333 &espstack->esp_dropper); 2334 return (IPSEC_STATUS_FAILED); 2335 } else { 2336 freemsg(data_mp); 2337 data_mp = ipsec_out_mp->b_cont; 2338 } 2339 2340 /* 2341 * Reality check.... 2342 */ 2343 2344 ipha = (ipha_t *)data_mp->b_rptr; /* So we can call esp_acquire(). */ 2345 2346 if (io->ipsec_out_v4) { 2347 af = AF_INET; 2348 divpoint = IPH_HDR_LENGTH(ipha); 2349 datalen = ntohs(ipha->ipha_length) - divpoint; 2350 nhp = (uint8_t *)&ipha->ipha_protocol; 2351 } else { 2352 ip6_pkt_t ipp; 2353 2354 af = AF_INET6; 2355 ip6h = (ip6_t *)ipha; 2356 bzero(&ipp, sizeof (ipp)); 2357 divpoint = ip_find_hdr_v6(data_mp, ip6h, &ipp, NULL); 2358 if (ipp.ipp_dstopts != NULL && 2359 ipp.ipp_dstopts->ip6d_nxt != IPPROTO_ROUTING) { 2360 /* 2361 * Destination options are tricky. If we get in here, 2362 * then we have a terminal header following the 2363 * destination options. We need to adjust backwards 2364 * so we insert ESP BEFORE the destination options 2365 * bag. (So that the dstopts get encrypted!) 2366 * 2367 * Since this is for outbound packets only, we know 2368 * that non-terminal destination options only precede 2369 * routing headers. 2370 */ 2371 divpoint -= ipp.ipp_dstoptslen; 2372 } 2373 datalen = ntohs(ip6h->ip6_plen) + sizeof (ip6_t) - divpoint; 2374 2375 if (ipp.ipp_rthdr != NULL) { 2376 nhp = &ipp.ipp_rthdr->ip6r_nxt; 2377 } else if (ipp.ipp_hopopts != NULL) { 2378 nhp = &ipp.ipp_hopopts->ip6h_nxt; 2379 } else { 2380 ASSERT(divpoint == sizeof (ip6_t)); 2381 /* It's probably IP + ESP. */ 2382 nhp = &ip6h->ip6_nxt; 2383 } 2384 } 2385 assoc = io->ipsec_out_esp_sa; 2386 ASSERT(assoc != NULL); 2387 2388 if (assoc->ipsa_usetime == 0) 2389 esp_set_usetime(assoc, B_FALSE); 2390 2391 if (assoc->ipsa_auth_alg != SADB_AALG_NONE) 2392 mac_len = assoc->ipsa_mac_len; 2393 2394 if (assoc->ipsa_flags & IPSA_F_NATT) { 2395 /* wedge in fake UDP */ 2396 is_natt = B_TRUE; 2397 esplen += UDPH_SIZE; 2398 } 2399 2400 /* 2401 * Set up ESP header and encryption padding for ENCR PI request. 2402 */ 2403 2404 /* Determine the padding length. Pad to 4-bytes for no-encryption. */ 2405 if (assoc->ipsa_encr_alg != SADB_EALG_NULL) { 2406 iv_len = assoc->ipsa_iv_len; 2407 2408 /* 2409 * Include the two additional bytes (hence the - 2) for the 2410 * padding length and the next header. Take this into account 2411 * when calculating the actual length of the padding. 2412 */ 2413 ASSERT(ISP2(iv_len)); 2414 padlen = ((unsigned)(iv_len - datalen - 2)) & (iv_len - 1); 2415 } else { 2416 iv_len = 0; 2417 padlen = ((unsigned)(sizeof (uint32_t) - datalen - 2)) & 2418 (sizeof (uint32_t) - 1); 2419 } 2420 2421 /* Allocate ESP header and IV. */ 2422 esplen += iv_len; 2423 2424 /* 2425 * Update association byte-count lifetimes. Don't forget to take 2426 * into account the padding length and next-header (hence the + 2). 2427 * 2428 * Use the amount of data fed into the "encryption algorithm". This 2429 * is the IV, the data length, the padding length, and the final two 2430 * bytes (padlen, and next-header). 2431 * 2432 */ 2433 2434 if (!esp_age_bytes(assoc, datalen + padlen + iv_len + 2, B_FALSE)) { 2435 /* 2436 * TODO: Find the outbound IRE for this packet and 2437 * pass it to ip_drop_packet(). 2438 */ 2439 ip_drop_packet(mp, B_FALSE, NULL, NULL, 2440 DROPPER(ipss, ipds_esp_bytes_expire), 2441 &espstack->esp_dropper); 2442 return (IPSEC_STATUS_FAILED); 2443 } 2444 2445 espmp = allocb(esplen, BPRI_HI); 2446 if (espmp == NULL) { 2447 ESP_BUMP_STAT(espstack, out_discards); 2448 esp1dbg(espstack, ("esp_outbound: can't allocate espmp.\n")); 2449 /* 2450 * TODO: Find the outbound IRE for this packet and 2451 * pass it to ip_drop_packet(). 2452 */ 2453 ip_drop_packet(mp, B_FALSE, NULL, NULL, 2454 DROPPER(ipss, ipds_esp_nomem), 2455 &espstack->esp_dropper); 2456 return (IPSEC_STATUS_FAILED); 2457 } 2458 espmp->b_wptr += esplen; 2459 esph = (esph_t *)espmp->b_rptr; 2460 2461 if (is_natt) { 2462 esp3dbg(espstack, ("esp_outbound: NATT")); 2463 2464 udpha = (udpha_t *)espmp->b_rptr; 2465 udpha->uha_src_port = htons(IPPORT_IKE_NATT); 2466 if (assoc->ipsa_remote_port != 0) 2467 udpha->uha_dst_port = assoc->ipsa_remote_port; 2468 else 2469 udpha->uha_dst_port = htons(IPPORT_IKE_NATT); 2470 /* 2471 * Set the checksum to 0, so that the ip_wput_ipsec_out() 2472 * can do the right thing. 2473 */ 2474 udpha->uha_checksum = 0; 2475 esph = (esph_t *)(udpha + 1); 2476 } 2477 2478 esph->esph_spi = assoc->ipsa_spi; 2479 2480 esph->esph_replay = htonl(atomic_add_32_nv(&assoc->ipsa_replay, 1)); 2481 if (esph->esph_replay == 0 && assoc->ipsa_replay_wsize != 0) { 2482 /* 2483 * XXX We have replay counter wrapping. 2484 * We probably want to nuke this SA (and its peer). 2485 */ 2486 ipsec_assocfailure(info.mi_idnum, 0, 0, 2487 SL_ERROR | SL_CONSOLE | SL_WARN, 2488 "Outbound ESP SA (0x%x, %s) has wrapped sequence.\n", 2489 esph->esph_spi, assoc->ipsa_dstaddr, af, 2490 espstack->ipsecesp_netstack); 2491 2492 ESP_BUMP_STAT(espstack, out_discards); 2493 sadb_replay_delete(assoc); 2494 /* 2495 * TODO: Find the outbound IRE for this packet and 2496 * pass it to ip_drop_packet(). 2497 */ 2498 ip_drop_packet(mp, B_FALSE, NULL, NULL, 2499 DROPPER(ipss, ipds_esp_replay), 2500 &espstack->esp_dropper); 2501 return (IPSEC_STATUS_FAILED); 2502 } 2503 2504 /* 2505 * Set the IV to a random quantity. We do not require the 2506 * highest quality random bits, but for best security with CBC 2507 * mode ciphers, the value must be unlikely to repeat and also 2508 * must not be known in advance to an adversary capable of 2509 * influencing the plaintext. 2510 */ 2511 (void) random_get_pseudo_bytes((uint8_t *)(esph + 1), iv_len); 2512 2513 /* Fix the IP header. */ 2514 alloclen = padlen + 2 + mac_len; 2515 adj = alloclen + (espmp->b_wptr - espmp->b_rptr); 2516 2517 protocol = *nhp; 2518 2519 if (io->ipsec_out_v4) { 2520 ipha->ipha_length = htons(ntohs(ipha->ipha_length) + adj); 2521 if (is_natt) { 2522 *nhp = IPPROTO_UDP; 2523 udpha->uha_length = htons(ntohs(ipha->ipha_length) - 2524 IPH_HDR_LENGTH(ipha)); 2525 } else { 2526 *nhp = IPPROTO_ESP; 2527 } 2528 ipha->ipha_hdr_checksum = 0; 2529 ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha); 2530 } else { 2531 ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) + adj); 2532 *nhp = IPPROTO_ESP; 2533 } 2534 2535 /* I've got the two ESP mblks, now insert them. */ 2536 2537 esp2dbg(espstack, ("data_mp before outbound ESP adjustment:\n")); 2538 esp2dbg(espstack, (dump_msg(data_mp))); 2539 2540 if (!esp_insert_esp(data_mp, espmp, divpoint, espstack)) { 2541 ESP_BUMP_STAT(espstack, out_discards); 2542 /* NOTE: esp_insert_esp() only fails if there's no memory. */ 2543 /* 2544 * TODO: Find the outbound IRE for this packet and 2545 * pass it to ip_drop_packet(). 2546 */ 2547 ip_drop_packet(mp, B_FALSE, NULL, NULL, 2548 DROPPER(ipss, ipds_esp_nomem), 2549 &espstack->esp_dropper); 2550 freeb(espmp); 2551 return (IPSEC_STATUS_FAILED); 2552 } 2553 2554 /* Append padding (and leave room for ICV). */ 2555 for (tailmp = data_mp; tailmp->b_cont != NULL; tailmp = tailmp->b_cont) 2556 ; 2557 if (tailmp->b_wptr + alloclen > tailmp->b_datap->db_lim) { 2558 tailmp->b_cont = allocb(alloclen, BPRI_HI); 2559 if (tailmp->b_cont == NULL) { 2560 ESP_BUMP_STAT(espstack, out_discards); 2561 esp0dbg(("esp_outbound: Can't allocate tailmp.\n")); 2562 /* 2563 * TODO: Find the outbound IRE for this packet and 2564 * pass it to ip_drop_packet(). 2565 */ 2566 ip_drop_packet(mp, B_FALSE, NULL, NULL, 2567 DROPPER(ipss, ipds_esp_nomem), 2568 &espstack->esp_dropper); 2569 return (IPSEC_STATUS_FAILED); 2570 } 2571 tailmp = tailmp->b_cont; 2572 } 2573 2574 /* 2575 * If there's padding, N bytes of padding must be of the form 0x1, 2576 * 0x2, 0x3... 0xN. 2577 */ 2578 for (i = 0; i < padlen; ) { 2579 i++; 2580 *tailmp->b_wptr++ = i; 2581 } 2582 *tailmp->b_wptr++ = i; 2583 *tailmp->b_wptr++ = protocol; 2584 2585 esp2dbg(espstack, ("data_Mp before encryption:\n")); 2586 esp2dbg(espstack, (dump_msg(data_mp))); 2587 2588 /* 2589 * The packet is eligible for hardware acceleration if the 2590 * following conditions are satisfied: 2591 * 2592 * 1. the packet will not be fragmented 2593 * 2. the provider supports the algorithms specified by SA 2594 * 3. there is no pending control message being exchanged 2595 * 4. snoop is not attached 2596 * 5. the destination address is not a multicast address 2597 * 2598 * All five of these conditions are checked by IP prior to 2599 * sending the packet to ESP. 2600 * 2601 * But We, and We Alone, can, nay MUST check if the packet 2602 * is over NATT, and then disqualify it from hardware 2603 * acceleration. 2604 */ 2605 2606 if (io->ipsec_out_is_capab_ill && !(assoc->ipsa_flags & IPSA_F_NATT)) { 2607 return (esp_outbound_accelerated(ipsec_out_mp, mac_len)); 2608 } 2609 ESP_BUMP_STAT(espstack, noaccel); 2610 2611 /* 2612 * Okay. I've set up the pre-encryption ESP. Let's do it! 2613 */ 2614 2615 if (mac_len > 0) { 2616 ASSERT(tailmp->b_wptr + mac_len <= tailmp->b_datap->db_lim); 2617 icv_buf = tailmp->b_wptr; 2618 tailmp->b_wptr += mac_len; 2619 } else { 2620 icv_buf = NULL; 2621 } 2622 2623 return (esp_submit_req_outbound(ipsec_out_mp, assoc, icv_buf, 2624 datalen + padlen + 2)); 2625 } 2626 2627 /* 2628 * IP calls this to validate the ICMP errors that 2629 * we got from the network. 2630 */ 2631 ipsec_status_t 2632 ipsecesp_icmp_error(mblk_t *ipsec_mp) 2633 { 2634 ipsec_in_t *ii = (ipsec_in_t *)ipsec_mp->b_rptr; 2635 boolean_t is_inbound = (ii->ipsec_in_type == IPSEC_IN); 2636 netstack_t *ns; 2637 ipsecesp_stack_t *espstack; 2638 ipsec_stack_t *ipss; 2639 2640 if (is_inbound) { 2641 ns = ii->ipsec_in_ns; 2642 } else { 2643 ipsec_out_t *io = (ipsec_out_t *)ipsec_mp->b_rptr; 2644 2645 ns = io->ipsec_out_ns; 2646 } 2647 espstack = ns->netstack_ipsecesp; 2648 ipss = ns->netstack_ipsec; 2649 2650 /* 2651 * Unless we get an entire packet back, this function is useless. 2652 * Why? 2653 * 2654 * 1.) Partial packets are useless, because the "next header" 2655 * is at the end of the decrypted ESP packet. Without the 2656 * whole packet, this is useless. 2657 * 2658 * 2.) If we every use a stateful cipher, such as a stream or a 2659 * one-time pad, we can't do anything. 2660 * 2661 * Since the chances of us getting an entire packet back are very 2662 * very small, we discard here. 2663 */ 2664 IP_ESP_BUMP_STAT(ipss, in_discards); 2665 ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL, 2666 DROPPER(ipss, ipds_esp_icmp), 2667 &espstack->esp_dropper); 2668 return (IPSEC_STATUS_FAILED); 2669 } 2670 2671 /* 2672 * ESP module read put routine. 2673 */ 2674 /* ARGSUSED */ 2675 static void 2676 ipsecesp_rput(queue_t *q, mblk_t *mp) 2677 { 2678 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; 2679 2680 ASSERT(mp->b_datap->db_type != M_CTL); /* No more IRE_DB_REQ. */ 2681 2682 switch (mp->b_datap->db_type) { 2683 case M_PROTO: 2684 case M_PCPROTO: 2685 /* TPI message of some sort. */ 2686 switch (*((t_scalar_t *)mp->b_rptr)) { 2687 case T_BIND_ACK: 2688 esp3dbg(espstack, 2689 ("Thank you IP from ESP for T_BIND_ACK\n")); 2690 break; 2691 case T_ERROR_ACK: 2692 cmn_err(CE_WARN, 2693 "ipsecesp: ESP received T_ERROR_ACK from IP."); 2694 /* 2695 * Make esp_sadb.s_ip_q NULL, and in the 2696 * future, perhaps try again. 2697 */ 2698 espstack->esp_sadb.s_ip_q = NULL; 2699 break; 2700 case T_OK_ACK: 2701 /* Probably from a (rarely sent) T_UNBIND_REQ. */ 2702 break; 2703 default: 2704 esp0dbg(("Unknown M_{,PC}PROTO message.\n")); 2705 } 2706 freemsg(mp); 2707 break; 2708 default: 2709 /* For now, passthru message. */ 2710 esp2dbg(espstack, ("ESP got unknown mblk type %d.\n", 2711 mp->b_datap->db_type)); 2712 putnext(q, mp); 2713 } 2714 } 2715 2716 /* 2717 * Construct an SADB_REGISTER message with the current algorithms. 2718 */ 2719 static boolean_t 2720 esp_register_out(uint32_t sequence, uint32_t pid, uint_t serial, 2721 ipsecesp_stack_t *espstack) 2722 { 2723 mblk_t *pfkey_msg_mp, *keysock_out_mp; 2724 sadb_msg_t *samsg; 2725 sadb_supported_t *sasupp_auth = NULL; 2726 sadb_supported_t *sasupp_encr = NULL; 2727 sadb_alg_t *saalg; 2728 uint_t allocsize = sizeof (*samsg); 2729 uint_t i, numalgs_snap; 2730 int current_aalgs; 2731 ipsec_alginfo_t **authalgs; 2732 uint_t num_aalgs; 2733 int current_ealgs; 2734 ipsec_alginfo_t **encralgs; 2735 uint_t num_ealgs; 2736 ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec; 2737 2738 /* Allocate the KEYSOCK_OUT. */ 2739 keysock_out_mp = sadb_keysock_out(serial); 2740 if (keysock_out_mp == NULL) { 2741 esp0dbg(("esp_register_out: couldn't allocate mblk.\n")); 2742 return (B_FALSE); 2743 } 2744 2745 /* 2746 * Allocate the PF_KEY message that follows KEYSOCK_OUT. 2747 */ 2748 2749 mutex_enter(&ipss->ipsec_alg_lock); 2750 2751 /* 2752 * Fill SADB_REGISTER message's algorithm descriptors. Hold 2753 * down the lock while filling it. 2754 * 2755 * Return only valid algorithms, so the number of algorithms 2756 * to send up may be less than the number of algorithm entries 2757 * in the table. 2758 */ 2759 authalgs = ipss->ipsec_alglists[IPSEC_ALG_AUTH]; 2760 for (num_aalgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++) 2761 if (authalgs[i] != NULL && ALG_VALID(authalgs[i])) 2762 num_aalgs++; 2763 2764 if (num_aalgs != 0) { 2765 allocsize += (num_aalgs * sizeof (*saalg)); 2766 allocsize += sizeof (*sasupp_auth); 2767 } 2768 encralgs = ipss->ipsec_alglists[IPSEC_ALG_ENCR]; 2769 for (num_ealgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++) 2770 if (encralgs[i] != NULL && ALG_VALID(encralgs[i])) 2771 num_ealgs++; 2772 2773 if (num_ealgs != 0) { 2774 allocsize += (num_ealgs * sizeof (*saalg)); 2775 allocsize += sizeof (*sasupp_encr); 2776 } 2777 keysock_out_mp->b_cont = allocb(allocsize, BPRI_HI); 2778 if (keysock_out_mp->b_cont == NULL) { 2779 mutex_exit(&ipss->ipsec_alg_lock); 2780 freemsg(keysock_out_mp); 2781 return (B_FALSE); 2782 } 2783 2784 pfkey_msg_mp = keysock_out_mp->b_cont; 2785 pfkey_msg_mp->b_wptr += allocsize; 2786 if (num_aalgs != 0) { 2787 sasupp_auth = (sadb_supported_t *) 2788 (pfkey_msg_mp->b_rptr + sizeof (*samsg)); 2789 saalg = (sadb_alg_t *)(sasupp_auth + 1); 2790 2791 ASSERT(((ulong_t)saalg & 0x7) == 0); 2792 2793 numalgs_snap = 0; 2794 for (i = 0; 2795 ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_aalgs)); 2796 i++) { 2797 if (authalgs[i] == NULL || !ALG_VALID(authalgs[i])) 2798 continue; 2799 2800 saalg->sadb_alg_id = authalgs[i]->alg_id; 2801 saalg->sadb_alg_ivlen = 0; 2802 saalg->sadb_alg_minbits = authalgs[i]->alg_ef_minbits; 2803 saalg->sadb_alg_maxbits = authalgs[i]->alg_ef_maxbits; 2804 saalg->sadb_x_alg_defincr = authalgs[i]->alg_ef_default; 2805 saalg->sadb_x_alg_increment = 2806 authalgs[i]->alg_increment; 2807 numalgs_snap++; 2808 saalg++; 2809 } 2810 ASSERT(numalgs_snap == num_aalgs); 2811 #ifdef DEBUG 2812 /* 2813 * Reality check to make sure I snagged all of the 2814 * algorithms. 2815 */ 2816 for (; i < IPSEC_MAX_ALGS; i++) { 2817 if (authalgs[i] != NULL && ALG_VALID(authalgs[i])) { 2818 cmn_err(CE_PANIC, "esp_register_out()! " 2819 "Missed aalg #%d.\n", i); 2820 } 2821 } 2822 #endif /* DEBUG */ 2823 } else { 2824 saalg = (sadb_alg_t *)(pfkey_msg_mp->b_rptr + sizeof (*samsg)); 2825 } 2826 2827 if (num_ealgs != 0) { 2828 sasupp_encr = (sadb_supported_t *)saalg; 2829 saalg = (sadb_alg_t *)(sasupp_encr + 1); 2830 2831 numalgs_snap = 0; 2832 for (i = 0; 2833 ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_ealgs)); i++) { 2834 if (encralgs[i] == NULL || !ALG_VALID(encralgs[i])) 2835 continue; 2836 saalg->sadb_alg_id = encralgs[i]->alg_id; 2837 saalg->sadb_alg_ivlen = encralgs[i]->alg_datalen; 2838 saalg->sadb_alg_minbits = encralgs[i]->alg_ef_minbits; 2839 saalg->sadb_alg_maxbits = encralgs[i]->alg_ef_maxbits; 2840 saalg->sadb_x_alg_defincr = encralgs[i]->alg_ef_default; 2841 saalg->sadb_x_alg_increment = 2842 encralgs[i]->alg_increment; 2843 numalgs_snap++; 2844 saalg++; 2845 } 2846 ASSERT(numalgs_snap == num_ealgs); 2847 #ifdef DEBUG 2848 /* 2849 * Reality check to make sure I snagged all of the 2850 * algorithms. 2851 */ 2852 for (; i < IPSEC_MAX_ALGS; i++) { 2853 if (encralgs[i] != NULL && ALG_VALID(encralgs[i])) { 2854 cmn_err(CE_PANIC, "esp_register_out()! " 2855 "Missed ealg #%d.\n", i); 2856 } 2857 } 2858 #endif /* DEBUG */ 2859 } 2860 2861 current_aalgs = num_aalgs; 2862 current_ealgs = num_ealgs; 2863 2864 mutex_exit(&ipss->ipsec_alg_lock); 2865 2866 /* Now fill the rest of the SADB_REGISTER message. */ 2867 2868 samsg = (sadb_msg_t *)pfkey_msg_mp->b_rptr; 2869 samsg->sadb_msg_version = PF_KEY_V2; 2870 samsg->sadb_msg_type = SADB_REGISTER; 2871 samsg->sadb_msg_errno = 0; 2872 samsg->sadb_msg_satype = SADB_SATYPE_ESP; 2873 samsg->sadb_msg_len = SADB_8TO64(allocsize); 2874 samsg->sadb_msg_reserved = 0; 2875 /* 2876 * Assume caller has sufficient sequence/pid number info. If it's one 2877 * from me over a new alg., I could give two hoots about sequence. 2878 */ 2879 samsg->sadb_msg_seq = sequence; 2880 samsg->sadb_msg_pid = pid; 2881 2882 if (sasupp_auth != NULL) { 2883 sasupp_auth->sadb_supported_len = 2884 SADB_8TO64(sizeof (*sasupp_auth) + 2885 sizeof (*saalg) * current_aalgs); 2886 sasupp_auth->sadb_supported_exttype = SADB_EXT_SUPPORTED_AUTH; 2887 sasupp_auth->sadb_supported_reserved = 0; 2888 } 2889 2890 if (sasupp_encr != NULL) { 2891 sasupp_encr->sadb_supported_len = 2892 SADB_8TO64(sizeof (*sasupp_encr) + 2893 sizeof (*saalg) * current_ealgs); 2894 sasupp_encr->sadb_supported_exttype = 2895 SADB_EXT_SUPPORTED_ENCRYPT; 2896 sasupp_encr->sadb_supported_reserved = 0; 2897 } 2898 2899 if (espstack->esp_pfkey_q != NULL) 2900 putnext(espstack->esp_pfkey_q, keysock_out_mp); 2901 else { 2902 freemsg(keysock_out_mp); 2903 return (B_FALSE); 2904 } 2905 2906 return (B_TRUE); 2907 } 2908 2909 /* 2910 * Invoked when the algorithm table changes. Causes SADB_REGISTER 2911 * messages continaining the current list of algorithms to be 2912 * sent up to the ESP listeners. 2913 */ 2914 void 2915 ipsecesp_algs_changed(netstack_t *ns) 2916 { 2917 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 2918 2919 /* 2920 * Time to send a PF_KEY SADB_REGISTER message to ESP listeners 2921 * everywhere. (The function itself checks for NULL esp_pfkey_q.) 2922 */ 2923 (void) esp_register_out(0, 0, 0, espstack); 2924 } 2925 2926 /* 2927 * taskq_dispatch handler. 2928 */ 2929 static void 2930 inbound_task(void *arg) 2931 { 2932 esph_t *esph; 2933 mblk_t *mp = (mblk_t *)arg; 2934 ipsec_in_t *ii = (ipsec_in_t *)mp->b_rptr; 2935 netstack_t *ns = ii->ipsec_in_ns; 2936 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 2937 int ipsec_rc; 2938 2939 esp2dbg(espstack, ("in ESP inbound_task")); 2940 ASSERT(espstack != NULL); 2941 2942 esph = ipsec_inbound_esp_sa(mp, ns); 2943 if (esph == NULL) 2944 return; 2945 ASSERT(ii->ipsec_in_esp_sa != NULL); 2946 ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func(mp, esph); 2947 if (ipsec_rc != IPSEC_STATUS_SUCCESS) 2948 return; 2949 ip_fanout_proto_again(mp, NULL, NULL, NULL); 2950 } 2951 2952 /* 2953 * Now that weak-key passed, actually ADD the security association, and 2954 * send back a reply ADD message. 2955 */ 2956 static int 2957 esp_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi, 2958 int *diagnostic, ipsecesp_stack_t *espstack) 2959 { 2960 isaf_t *primary, *secondary, *inbound, *outbound; 2961 sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA]; 2962 sadb_address_t *dstext = 2963 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST]; 2964 struct sockaddr_in *dst; 2965 struct sockaddr_in6 *dst6; 2966 boolean_t is_ipv4, clone = B_FALSE, is_inbound = B_FALSE; 2967 uint32_t *dstaddr; 2968 ipsa_t *larval = NULL; 2969 ipsacq_t *acqrec; 2970 iacqf_t *acq_bucket; 2971 mblk_t *acq_msgs = NULL; 2972 int rc; 2973 sadb_t *sp; 2974 int outhash; 2975 mblk_t *lpkt; 2976 ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec; 2977 2978 /* 2979 * Locate the appropriate table(s). 2980 */ 2981 2982 dst = (struct sockaddr_in *)(dstext + 1); 2983 dst6 = (struct sockaddr_in6 *)dst; 2984 is_ipv4 = (dst->sin_family == AF_INET); 2985 if (is_ipv4) { 2986 sp = &espstack->esp_sadb.s_v4; 2987 dstaddr = (uint32_t *)(&dst->sin_addr); 2988 outhash = OUTBOUND_HASH_V4(sp, *(ipaddr_t *)dstaddr); 2989 } else { 2990 sp = &espstack->esp_sadb.s_v6; 2991 dstaddr = (uint32_t *)(&dst6->sin6_addr); 2992 outhash = OUTBOUND_HASH_V6(sp, *(in6_addr_t *)dstaddr); 2993 } 2994 2995 inbound = INBOUND_BUCKET(sp, assoc->sadb_sa_spi); 2996 outbound = &sp->sdb_of[outhash]; 2997 2998 switch (ksi->ks_in_dsttype) { 2999 case KS_IN_ADDR_MBCAST: 3000 clone = B_TRUE; /* All mcast SAs can be bidirectional */ 3001 /* FALLTHRU */ 3002 case KS_IN_ADDR_ME: 3003 primary = inbound; 3004 secondary = outbound; 3005 /* 3006 * If the source address is either one of mine, or unspecified 3007 * (which is best summed up by saying "not 'not mine'"), 3008 * then the association is potentially bi-directional, 3009 * in that it can be used for inbound traffic and outbound 3010 * traffic. The best example of such an SA is a multicast 3011 * SA (which allows me to receive the outbound traffic). 3012 */ 3013 if (ksi->ks_in_srctype != KS_IN_ADDR_NOTME) 3014 clone = B_TRUE; 3015 is_inbound = B_TRUE; 3016 break; 3017 case KS_IN_ADDR_NOTME: 3018 primary = outbound; 3019 secondary = inbound; 3020 /* 3021 * If the source address literally not mine (either 3022 * unspecified or not mine), then this SA may have an 3023 * address that WILL be mine after some configuration. 3024 * We pay the price for this by making it a bi-directional 3025 * SA. 3026 */ 3027 if (ksi->ks_in_srctype != KS_IN_ADDR_ME) 3028 clone = B_TRUE; 3029 break; 3030 default: 3031 *diagnostic = SADB_X_DIAGNOSTIC_BAD_DST; 3032 return (EINVAL); 3033 } 3034 3035 /* 3036 * Find a ACQUIRE list entry if possible. If we've added an SA that 3037 * suits the needs of an ACQUIRE list entry, we can eliminate the 3038 * ACQUIRE list entry and transmit the enqueued packets. Use the 3039 * high-bit of the sequence number to queue it. Key off destination 3040 * addr, and change acqrec's state. 3041 */ 3042 3043 if (samsg->sadb_msg_seq & IACQF_LOWEST_SEQ) { 3044 acq_bucket = &sp->sdb_acq[outhash]; 3045 mutex_enter(&acq_bucket->iacqf_lock); 3046 for (acqrec = acq_bucket->iacqf_ipsacq; acqrec != NULL; 3047 acqrec = acqrec->ipsacq_next) { 3048 mutex_enter(&acqrec->ipsacq_lock); 3049 /* 3050 * Q: I only check sequence. Should I check dst? 3051 * A: Yes, check dest because those are the packets 3052 * that are queued up. 3053 */ 3054 if (acqrec->ipsacq_seq == samsg->sadb_msg_seq && 3055 IPSA_ARE_ADDR_EQUAL(dstaddr, 3056 acqrec->ipsacq_dstaddr, acqrec->ipsacq_addrfam)) 3057 break; 3058 mutex_exit(&acqrec->ipsacq_lock); 3059 } 3060 if (acqrec != NULL) { 3061 /* 3062 * AHA! I found an ACQUIRE record for this SA. 3063 * Grab the msg list, and free the acquire record. 3064 * I already am holding the lock for this record, 3065 * so all I have to do is free it. 3066 */ 3067 acq_msgs = acqrec->ipsacq_mp; 3068 acqrec->ipsacq_mp = NULL; 3069 mutex_exit(&acqrec->ipsacq_lock); 3070 sadb_destroy_acquire(acqrec, 3071 espstack->ipsecesp_netstack); 3072 } 3073 mutex_exit(&acq_bucket->iacqf_lock); 3074 } 3075 3076 /* 3077 * Find PF_KEY message, and see if I'm an update. If so, find entry 3078 * in larval list (if there). 3079 */ 3080 3081 if (samsg->sadb_msg_type == SADB_UPDATE) { 3082 mutex_enter(&inbound->isaf_lock); 3083 larval = ipsec_getassocbyspi(inbound, assoc->sadb_sa_spi, 3084 ALL_ZEROES_PTR, dstaddr, dst->sin_family); 3085 mutex_exit(&inbound->isaf_lock); 3086 3087 if (larval == NULL) { 3088 esp0dbg(("Larval update, but larval disappeared.\n")); 3089 return (ESRCH); 3090 } /* Else sadb_common_add unlinks it for me! */ 3091 } 3092 3093 lpkt = NULL; 3094 if (larval != NULL) 3095 lpkt = sadb_clear_lpkt(larval); 3096 3097 rc = sadb_common_add(espstack->esp_sadb.s_ip_q, espstack->esp_pfkey_q, 3098 mp, samsg, ksi, primary, secondary, larval, clone, is_inbound, 3099 diagnostic, espstack->ipsecesp_netstack); 3100 3101 if (rc == 0 && lpkt != NULL) { 3102 rc = !taskq_dispatch(esp_taskq, inbound_task, 3103 (void *) lpkt, TQ_NOSLEEP); 3104 } 3105 3106 if (rc != 0) { 3107 ip_drop_packet(lpkt, B_TRUE, NULL, NULL, 3108 DROPPER(ipss, ipds_sadb_inlarval_timeout), 3109 &espstack->esp_dropper); 3110 } 3111 3112 /* 3113 * How much more stack will I create with all of these 3114 * esp_outbound() calls? 3115 */ 3116 3117 while (acq_msgs != NULL) { 3118 mblk_t *mp = acq_msgs; 3119 3120 acq_msgs = acq_msgs->b_next; 3121 mp->b_next = NULL; 3122 if (rc == 0) { 3123 if (ipsec_outbound_sa(mp, IPPROTO_ESP)) { 3124 ((ipsec_out_t *)(mp->b_rptr))-> 3125 ipsec_out_esp_done = B_TRUE; 3126 if (esp_outbound(mp) == IPSEC_STATUS_SUCCESS) { 3127 ipha_t *ipha; 3128 3129 /* do AH processing if needed */ 3130 if (!esp_do_outbound_ah(mp)) 3131 continue; 3132 3133 ipha = (ipha_t *)mp->b_cont->b_rptr; 3134 3135 /* finish IPsec processing */ 3136 if (is_ipv4) { 3137 ip_wput_ipsec_out(NULL, mp, 3138 ipha, NULL, NULL); 3139 } else { 3140 ip6_t *ip6h = (ip6_t *)ipha; 3141 ip_wput_ipsec_out_v6(NULL, 3142 mp, ip6h, NULL, NULL); 3143 } 3144 } 3145 continue; 3146 } 3147 } 3148 ESP_BUMP_STAT(espstack, out_discards); 3149 ip_drop_packet(mp, B_FALSE, NULL, NULL, 3150 DROPPER(ipss, ipds_sadb_acquire_timeout), 3151 &espstack->esp_dropper); 3152 } 3153 3154 return (rc); 3155 } 3156 3157 /* 3158 * Add new ESP security association. This may become a generic AH/ESP 3159 * routine eventually. 3160 */ 3161 static int 3162 esp_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, netstack_t *ns) 3163 { 3164 sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA]; 3165 sadb_address_t *srcext = 3166 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC]; 3167 sadb_address_t *dstext = 3168 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST]; 3169 sadb_address_t *isrcext = 3170 (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC]; 3171 sadb_address_t *idstext = 3172 (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST]; 3173 sadb_address_t *nttext_loc = 3174 (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC]; 3175 sadb_address_t *nttext_rem = 3176 (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM]; 3177 sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH]; 3178 sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT]; 3179 struct sockaddr_in *src, *dst; 3180 struct sockaddr_in *natt_loc, *natt_rem; 3181 struct sockaddr_in6 *natt_loc6, *natt_rem6; 3182 sadb_lifetime_t *soft = 3183 (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT]; 3184 sadb_lifetime_t *hard = 3185 (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD]; 3186 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 3187 ipsec_stack_t *ipss = ns->netstack_ipsec; 3188 3189 /* I need certain extensions present for an ADD message. */ 3190 if (srcext == NULL) { 3191 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC; 3192 return (EINVAL); 3193 } 3194 if (dstext == NULL) { 3195 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST; 3196 return (EINVAL); 3197 } 3198 if (isrcext == NULL && idstext != NULL) { 3199 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC; 3200 return (EINVAL); 3201 } 3202 if (isrcext != NULL && idstext == NULL) { 3203 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST; 3204 return (EINVAL); 3205 } 3206 if (assoc == NULL) { 3207 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA; 3208 return (EINVAL); 3209 } 3210 if (ekey == NULL && assoc->sadb_sa_encrypt != SADB_EALG_NULL) { 3211 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_EKEY; 3212 return (EINVAL); 3213 } 3214 3215 src = (struct sockaddr_in *)(srcext + 1); 3216 dst = (struct sockaddr_in *)(dstext + 1); 3217 natt_loc = (struct sockaddr_in *)(nttext_loc + 1); 3218 natt_loc6 = (struct sockaddr_in6 *)(nttext_loc + 1); 3219 natt_rem = (struct sockaddr_in *)(nttext_rem + 1); 3220 natt_rem6 = (struct sockaddr_in6 *)(nttext_rem + 1); 3221 3222 /* Sundry ADD-specific reality checks. */ 3223 /* XXX STATS : Logging/stats here? */ 3224 if (assoc->sadb_sa_state != SADB_SASTATE_MATURE) { 3225 *diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE; 3226 return (EINVAL); 3227 } 3228 if (assoc->sadb_sa_encrypt == SADB_EALG_NONE) { 3229 *diagnostic = SADB_X_DIAGNOSTIC_BAD_EALG; 3230 return (EINVAL); 3231 } 3232 3233 if (assoc->sadb_sa_encrypt == SADB_EALG_NULL && 3234 assoc->sadb_sa_auth == SADB_AALG_NONE) { 3235 *diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG; 3236 return (EINVAL); 3237 } 3238 3239 if (assoc->sadb_sa_flags & ~(SADB_SAFLAGS_NOREPLAY | 3240 SADB_X_SAFLAGS_NATT_LOC | SADB_X_SAFLAGS_NATT_REM | 3241 SADB_X_SAFLAGS_TUNNEL)) { 3242 *diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS; 3243 return (EINVAL); 3244 } 3245 3246 if ((*diagnostic = sadb_hardsoftchk(hard, soft)) != 0) { 3247 return (EINVAL); 3248 } 3249 ASSERT(src->sin_family == dst->sin_family); 3250 3251 if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_NATT_LOC) { 3252 if (nttext_loc == NULL) { 3253 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_LOC; 3254 return (EINVAL); 3255 } 3256 3257 if (natt_loc->sin_family == AF_INET6 && 3258 !IN6_IS_ADDR_V4MAPPED(&natt_loc6->sin6_addr)) { 3259 *diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_NATT_LOC; 3260 return (EINVAL); 3261 } 3262 } 3263 3264 if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_NATT_REM) { 3265 if (nttext_rem == NULL) { 3266 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_REM; 3267 return (EINVAL); 3268 } 3269 if (natt_rem->sin_family == AF_INET6 && 3270 !IN6_IS_ADDR_V4MAPPED(&natt_rem6->sin6_addr)) { 3271 *diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_NATT_REM; 3272 return (EINVAL); 3273 } 3274 } 3275 3276 3277 /* Stuff I don't support, for now. XXX Diagnostic? */ 3278 if (ksi->ks_in_extv[SADB_EXT_LIFETIME_CURRENT] != NULL || 3279 ksi->ks_in_extv[SADB_EXT_SENSITIVITY] != NULL) 3280 return (EOPNOTSUPP); 3281 3282 /* 3283 * XXX Policy : I'm not checking identities or sensitivity 3284 * labels at this time, but if I did, I'd do them here, before I sent 3285 * the weak key check up to the algorithm. 3286 */ 3287 3288 mutex_enter(&ipss->ipsec_alg_lock); 3289 3290 /* 3291 * First locate the authentication algorithm. 3292 */ 3293 if (akey != NULL) { 3294 ipsec_alginfo_t *aalg; 3295 3296 aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH] 3297 [assoc->sadb_sa_auth]; 3298 if (aalg == NULL || !ALG_VALID(aalg)) { 3299 mutex_exit(&ipss->ipsec_alg_lock); 3300 esp1dbg(espstack, ("Couldn't find auth alg #%d.\n", 3301 assoc->sadb_sa_auth)); 3302 *diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG; 3303 return (EINVAL); 3304 } 3305 3306 /* 3307 * Sanity check key sizes. 3308 * Note: It's not possible to use SADB_AALG_NONE because 3309 * this auth_alg is not defined with ALG_FLAG_VALID. If this 3310 * ever changes, the same check for SADB_AALG_NONE and 3311 * a auth_key != NULL should be made here ( see below). 3312 */ 3313 if (!ipsec_valid_key_size(akey->sadb_key_bits, aalg)) { 3314 mutex_exit(&ipss->ipsec_alg_lock); 3315 *diagnostic = SADB_X_DIAGNOSTIC_BAD_AKEYBITS; 3316 return (EINVAL); 3317 } 3318 ASSERT(aalg->alg_mech_type != CRYPTO_MECHANISM_INVALID); 3319 3320 /* check key and fix parity if needed */ 3321 if (ipsec_check_key(aalg->alg_mech_type, akey, B_TRUE, 3322 diagnostic) != 0) { 3323 mutex_exit(&ipss->ipsec_alg_lock); 3324 return (EINVAL); 3325 } 3326 } 3327 3328 /* 3329 * Then locate the encryption algorithm. 3330 */ 3331 if (ekey != NULL) { 3332 ipsec_alginfo_t *ealg; 3333 3334 ealg = ipss->ipsec_alglists[IPSEC_ALG_ENCR] 3335 [assoc->sadb_sa_encrypt]; 3336 if (ealg == NULL || !ALG_VALID(ealg)) { 3337 mutex_exit(&ipss->ipsec_alg_lock); 3338 esp1dbg(espstack, ("Couldn't find encr alg #%d.\n", 3339 assoc->sadb_sa_encrypt)); 3340 *diagnostic = SADB_X_DIAGNOSTIC_BAD_EALG; 3341 return (EINVAL); 3342 } 3343 3344 /* 3345 * Sanity check key sizes. If the encryption algorithm is 3346 * SADB_EALG_NULL but the encryption key is NOT 3347 * NULL then complain. 3348 */ 3349 if ((assoc->sadb_sa_encrypt == SADB_EALG_NULL) || 3350 (!ipsec_valid_key_size(ekey->sadb_key_bits, ealg))) { 3351 mutex_exit(&ipss->ipsec_alg_lock); 3352 *diagnostic = SADB_X_DIAGNOSTIC_BAD_EKEYBITS; 3353 return (EINVAL); 3354 } 3355 ASSERT(ealg->alg_mech_type != CRYPTO_MECHANISM_INVALID); 3356 3357 /* check key */ 3358 if (ipsec_check_key(ealg->alg_mech_type, ekey, B_FALSE, 3359 diagnostic) != 0) { 3360 mutex_exit(&ipss->ipsec_alg_lock); 3361 return (EINVAL); 3362 } 3363 } 3364 mutex_exit(&ipss->ipsec_alg_lock); 3365 3366 return (esp_add_sa_finish(mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi, 3367 diagnostic, espstack)); 3368 } 3369 3370 /* 3371 * Update a security association. Updates come in two varieties. The first 3372 * is an update of lifetimes on a non-larval SA. The second is an update of 3373 * a larval SA, which ends up looking a lot more like an add. 3374 */ 3375 static int 3376 esp_update_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, 3377 ipsecesp_stack_t *espstack) 3378 { 3379 sadb_address_t *dstext = 3380 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST]; 3381 struct sockaddr_in *sin; 3382 3383 if (dstext == NULL) { 3384 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST; 3385 return (EINVAL); 3386 } 3387 3388 sin = (struct sockaddr_in *)(dstext + 1); 3389 return (sadb_update_sa(mp, ksi, 3390 (sin->sin_family == AF_INET6) ? &espstack->esp_sadb.s_v6 : 3391 &espstack->esp_sadb.s_v4, 3392 diagnostic, espstack->esp_pfkey_q, esp_add_sa, 3393 espstack->ipsecesp_netstack)); 3394 } 3395 3396 /* 3397 * Delete a security association. This is REALLY likely to be code common to 3398 * both AH and ESP. Find the association, then unlink it. 3399 */ 3400 static int 3401 esp_del_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, 3402 ipsecesp_stack_t *espstack) 3403 { 3404 sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA]; 3405 sadb_address_t *dstext = 3406 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST]; 3407 sadb_address_t *srcext = 3408 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC]; 3409 struct sockaddr_in *sin; 3410 3411 if (assoc == NULL) { 3412 if (dstext != NULL) { 3413 sin = (struct sockaddr_in *)(dstext + 1); 3414 } else if (srcext != NULL) { 3415 sin = (struct sockaddr_in *)(srcext + 1); 3416 } else { 3417 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA; 3418 return (EINVAL); 3419 } 3420 return (sadb_purge_sa(mp, ksi, 3421 (sin->sin_family == AF_INET6) ? &espstack->esp_sadb.s_v6 : 3422 &espstack->esp_sadb.s_v4, espstack->esp_pfkey_q, 3423 espstack->esp_sadb.s_ip_q)); 3424 } 3425 3426 return (sadb_del_sa(mp, ksi, &espstack->esp_sadb, diagnostic, 3427 espstack->esp_pfkey_q)); 3428 } 3429 3430 /* 3431 * Convert the entire contents of all of ESP's SA tables into PF_KEY SADB_DUMP 3432 * messages. 3433 */ 3434 static void 3435 esp_dump(mblk_t *mp, keysock_in_t *ksi, ipsecesp_stack_t *espstack) 3436 { 3437 int error; 3438 sadb_msg_t *samsg; 3439 3440 /* 3441 * Dump each fanout, bailing if error is non-zero. 3442 */ 3443 3444 error = sadb_dump(espstack->esp_pfkey_q, mp, ksi->ks_in_serial, 3445 &espstack->esp_sadb.s_v4); 3446 if (error != 0) 3447 goto bail; 3448 3449 error = sadb_dump(espstack->esp_pfkey_q, mp, ksi->ks_in_serial, 3450 &espstack->esp_sadb.s_v6); 3451 bail: 3452 ASSERT(mp->b_cont != NULL); 3453 samsg = (sadb_msg_t *)mp->b_cont->b_rptr; 3454 samsg->sadb_msg_errno = (uint8_t)error; 3455 sadb_pfkey_echo(espstack->esp_pfkey_q, mp, 3456 (sadb_msg_t *)mp->b_cont->b_rptr, ksi, NULL); 3457 } 3458 3459 /* 3460 * First-cut reality check for an inbound PF_KEY message. 3461 */ 3462 static boolean_t 3463 esp_pfkey_reality_failures(mblk_t *mp, keysock_in_t *ksi, 3464 ipsecesp_stack_t *espstack) 3465 { 3466 int diagnostic; 3467 3468 if (ksi->ks_in_extv[SADB_EXT_PROPOSAL] != NULL) { 3469 diagnostic = SADB_X_DIAGNOSTIC_PROP_PRESENT; 3470 goto badmsg; 3471 } 3472 if (ksi->ks_in_extv[SADB_EXT_SUPPORTED_AUTH] != NULL || 3473 ksi->ks_in_extv[SADB_EXT_SUPPORTED_ENCRYPT] != NULL) { 3474 diagnostic = SADB_X_DIAGNOSTIC_SUPP_PRESENT; 3475 goto badmsg; 3476 } 3477 return (B_FALSE); /* False ==> no failures */ 3478 3479 badmsg: 3480 sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL, diagnostic, 3481 ksi->ks_in_serial); 3482 return (B_TRUE); /* True ==> failures */ 3483 } 3484 3485 /* 3486 * ESP parsing of PF_KEY messages. Keysock did most of the really silly 3487 * error cases. What I receive is a fully-formed, syntactically legal 3488 * PF_KEY message. I then need to check semantics... 3489 * 3490 * This code may become common to AH and ESP. Stay tuned. 3491 * 3492 * I also make the assumption that db_ref's are cool. If this assumption 3493 * is wrong, this means that someone other than keysock or me has been 3494 * mucking with PF_KEY messages. 3495 */ 3496 static void 3497 esp_parse_pfkey(mblk_t *mp, ipsecesp_stack_t *espstack) 3498 { 3499 mblk_t *msg = mp->b_cont; 3500 sadb_msg_t *samsg; 3501 keysock_in_t *ksi; 3502 int error; 3503 int diagnostic = SADB_X_DIAGNOSTIC_NONE; 3504 3505 ASSERT(msg != NULL); 3506 3507 samsg = (sadb_msg_t *)msg->b_rptr; 3508 ksi = (keysock_in_t *)mp->b_rptr; 3509 3510 /* 3511 * If applicable, convert unspecified AF_INET6 to unspecified 3512 * AF_INET. And do other address reality checks. 3513 */ 3514 if (!sadb_addrfix(ksi, espstack->esp_pfkey_q, mp, 3515 espstack->ipsecesp_netstack) || 3516 esp_pfkey_reality_failures(mp, ksi, espstack)) { 3517 return; 3518 } 3519 3520 switch (samsg->sadb_msg_type) { 3521 case SADB_ADD: 3522 error = esp_add_sa(mp, ksi, &diagnostic, 3523 espstack->ipsecesp_netstack); 3524 if (error != 0) { 3525 sadb_pfkey_error(espstack->esp_pfkey_q, mp, error, 3526 diagnostic, ksi->ks_in_serial); 3527 } 3528 /* else esp_add_sa() took care of things. */ 3529 break; 3530 case SADB_DELETE: 3531 error = esp_del_sa(mp, ksi, &diagnostic, espstack); 3532 if (error != 0) { 3533 sadb_pfkey_error(espstack->esp_pfkey_q, mp, error, 3534 diagnostic, ksi->ks_in_serial); 3535 } 3536 /* Else esp_del_sa() took care of things. */ 3537 break; 3538 case SADB_GET: 3539 error = sadb_get_sa(mp, ksi, &espstack->esp_sadb, &diagnostic, 3540 espstack->esp_pfkey_q); 3541 if (error != 0) { 3542 sadb_pfkey_error(espstack->esp_pfkey_q, mp, error, 3543 diagnostic, ksi->ks_in_serial); 3544 } 3545 /* Else sadb_get_sa() took care of things. */ 3546 break; 3547 case SADB_FLUSH: 3548 sadbp_flush(&espstack->esp_sadb, espstack->ipsecesp_netstack); 3549 sadb_pfkey_echo(espstack->esp_pfkey_q, mp, samsg, ksi, NULL); 3550 break; 3551 case SADB_REGISTER: 3552 /* 3553 * Hmmm, let's do it! Check for extensions (there should 3554 * be none), extract the fields, call esp_register_out(), 3555 * then either free or report an error. 3556 * 3557 * Keysock takes care of the PF_KEY bookkeeping for this. 3558 */ 3559 if (esp_register_out(samsg->sadb_msg_seq, samsg->sadb_msg_pid, 3560 ksi->ks_in_serial, espstack)) { 3561 freemsg(mp); 3562 } else { 3563 /* 3564 * Only way this path hits is if there is a memory 3565 * failure. It will not return B_FALSE because of 3566 * lack of esp_pfkey_q if I am in wput(). 3567 */ 3568 sadb_pfkey_error(espstack->esp_pfkey_q, mp, ENOMEM, 3569 diagnostic, ksi->ks_in_serial); 3570 } 3571 break; 3572 case SADB_UPDATE: 3573 /* 3574 * Find a larval, if not there, find a full one and get 3575 * strict. 3576 */ 3577 error = esp_update_sa(mp, ksi, &diagnostic, espstack); 3578 if (error != 0) { 3579 sadb_pfkey_error(espstack->esp_pfkey_q, mp, error, 3580 diagnostic, ksi->ks_in_serial); 3581 } 3582 /* else esp_update_sa() took care of things. */ 3583 break; 3584 case SADB_GETSPI: 3585 /* 3586 * Reserve a new larval entry. 3587 */ 3588 esp_getspi(mp, ksi, espstack); 3589 break; 3590 case SADB_ACQUIRE: 3591 /* 3592 * Find larval and/or ACQUIRE record and kill it (them), I'm 3593 * most likely an error. Inbound ACQUIRE messages should only 3594 * have the base header. 3595 */ 3596 sadb_in_acquire(samsg, &espstack->esp_sadb, 3597 espstack->esp_pfkey_q, espstack->ipsecesp_netstack); 3598 freemsg(mp); 3599 break; 3600 case SADB_DUMP: 3601 /* 3602 * Dump all entries. 3603 */ 3604 esp_dump(mp, ksi, espstack); 3605 /* esp_dump will take care of the return message, etc. */ 3606 break; 3607 case SADB_EXPIRE: 3608 /* Should never reach me. */ 3609 sadb_pfkey_error(espstack->esp_pfkey_q, mp, EOPNOTSUPP, 3610 diagnostic, ksi->ks_in_serial); 3611 break; 3612 default: 3613 sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL, 3614 SADB_X_DIAGNOSTIC_UNKNOWN_MSG, ksi->ks_in_serial); 3615 break; 3616 } 3617 } 3618 3619 /* 3620 * Handle case where PF_KEY says it can't find a keysock for one of my 3621 * ACQUIRE messages. 3622 */ 3623 static void 3624 esp_keysock_no_socket(mblk_t *mp, ipsecesp_stack_t *espstack) 3625 { 3626 sadb_msg_t *samsg; 3627 keysock_out_err_t *kse = (keysock_out_err_t *)mp->b_rptr; 3628 3629 if (mp->b_cont == NULL) { 3630 freemsg(mp); 3631 return; 3632 } 3633 samsg = (sadb_msg_t *)mp->b_cont->b_rptr; 3634 3635 /* 3636 * If keysock can't find any registered, delete the acquire record 3637 * immediately, and handle errors. 3638 */ 3639 if (samsg->sadb_msg_type == SADB_ACQUIRE) { 3640 samsg->sadb_msg_errno = kse->ks_err_errno; 3641 samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg)); 3642 /* 3643 * Use the write-side of the esp_pfkey_q, in case there is 3644 * no esp_sadb.s_ip_q. 3645 */ 3646 sadb_in_acquire(samsg, &espstack->esp_sadb, 3647 WR(espstack->esp_pfkey_q), espstack->ipsecesp_netstack); 3648 } 3649 3650 freemsg(mp); 3651 } 3652 3653 /* 3654 * ESP module write put routine. 3655 */ 3656 static void 3657 ipsecesp_wput(queue_t *q, mblk_t *mp) 3658 { 3659 ipsec_info_t *ii; 3660 struct iocblk *iocp; 3661 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; 3662 3663 esp3dbg(espstack, ("In esp_wput().\n")); 3664 3665 /* NOTE: Each case must take care of freeing or passing mp. */ 3666 switch (mp->b_datap->db_type) { 3667 case M_CTL: 3668 if ((mp->b_wptr - mp->b_rptr) < sizeof (ipsec_info_t)) { 3669 /* Not big enough message. */ 3670 freemsg(mp); 3671 break; 3672 } 3673 ii = (ipsec_info_t *)mp->b_rptr; 3674 3675 switch (ii->ipsec_info_type) { 3676 case KEYSOCK_OUT_ERR: 3677 esp1dbg(espstack, ("Got KEYSOCK_OUT_ERR message.\n")); 3678 esp_keysock_no_socket(mp, espstack); 3679 break; 3680 case KEYSOCK_IN: 3681 ESP_BUMP_STAT(espstack, keysock_in); 3682 esp3dbg(espstack, ("Got KEYSOCK_IN message.\n")); 3683 3684 /* Parse the message. */ 3685 esp_parse_pfkey(mp, espstack); 3686 break; 3687 case KEYSOCK_HELLO: 3688 sadb_keysock_hello(&espstack->esp_pfkey_q, q, mp, 3689 esp_ager, (void *)espstack, &espstack->esp_event, 3690 SADB_SATYPE_ESP); 3691 break; 3692 default: 3693 esp2dbg(espstack, ("Got M_CTL from above of 0x%x.\n", 3694 ii->ipsec_info_type)); 3695 freemsg(mp); 3696 break; 3697 } 3698 break; 3699 case M_IOCTL: 3700 iocp = (struct iocblk *)mp->b_rptr; 3701 switch (iocp->ioc_cmd) { 3702 case ND_SET: 3703 case ND_GET: 3704 if (nd_getset(q, espstack->ipsecesp_g_nd, mp)) { 3705 qreply(q, mp); 3706 return; 3707 } else { 3708 iocp->ioc_error = ENOENT; 3709 } 3710 /* FALLTHRU */ 3711 default: 3712 /* We really don't support any other ioctls, do we? */ 3713 3714 /* Return EINVAL */ 3715 if (iocp->ioc_error != ENOENT) 3716 iocp->ioc_error = EINVAL; 3717 iocp->ioc_count = 0; 3718 mp->b_datap->db_type = M_IOCACK; 3719 qreply(q, mp); 3720 return; 3721 } 3722 default: 3723 esp3dbg(espstack, 3724 ("Got default message, type %d, passing to IP.\n", 3725 mp->b_datap->db_type)); 3726 putnext(q, mp); 3727 } 3728 } 3729 3730 /* 3731 * Process an outbound ESP packet that can be accelerated by a IPsec 3732 * hardware acceleration capable Provider. 3733 * The caller already inserted and initialized the ESP header. 3734 * This function allocates a tagging M_CTL, and adds room at the end 3735 * of the packet to hold the ICV if authentication is needed. 3736 * 3737 * On success returns B_TRUE, on failure returns B_FALSE and frees the 3738 * mblk chain ipsec_out. 3739 */ 3740 static ipsec_status_t 3741 esp_outbound_accelerated(mblk_t *ipsec_out, uint_t icv_len) 3742 { 3743 ipsec_out_t *io; 3744 mblk_t *lastmp; 3745 netstack_t *ns; 3746 ipsecesp_stack_t *espstack; 3747 ipsec_stack_t *ipss; 3748 3749 io = (ipsec_out_t *)ipsec_out->b_rptr; 3750 ns = io->ipsec_out_ns; 3751 espstack = ns->netstack_ipsecesp; 3752 ipss = ns->netstack_ipsec; 3753 3754 ESP_BUMP_STAT(espstack, out_accelerated); 3755 3756 /* mark packet as being accelerated in IPSEC_OUT */ 3757 ASSERT(io->ipsec_out_accelerated == B_FALSE); 3758 io->ipsec_out_accelerated = B_TRUE; 3759 3760 /* 3761 * add room at the end of the packet for the ICV if needed 3762 */ 3763 if (icv_len > 0) { 3764 /* go to last mblk */ 3765 lastmp = ipsec_out; /* For following while loop. */ 3766 do { 3767 lastmp = lastmp->b_cont; 3768 } while (lastmp->b_cont != NULL); 3769 3770 /* if not enough available room, allocate new mblk */ 3771 if ((lastmp->b_wptr + icv_len) > lastmp->b_datap->db_lim) { 3772 lastmp->b_cont = allocb(icv_len, BPRI_HI); 3773 if (lastmp->b_cont == NULL) { 3774 ESP_BUMP_STAT(espstack, out_discards); 3775 ip_drop_packet(ipsec_out, B_FALSE, NULL, NULL, 3776 DROPPER(ipss, ipds_esp_nomem), 3777 &espstack->esp_dropper); 3778 return (IPSEC_STATUS_FAILED); 3779 } 3780 lastmp = lastmp->b_cont; 3781 } 3782 lastmp->b_wptr += icv_len; 3783 } 3784 3785 return (IPSEC_STATUS_SUCCESS); 3786 } 3787 3788 /* 3789 * Process an inbound accelerated ESP packet. 3790 * On success returns B_TRUE, on failure returns B_FALSE and frees the 3791 * mblk chain ipsec_in. 3792 */ 3793 static ipsec_status_t 3794 esp_inbound_accelerated(mblk_t *ipsec_in, mblk_t *data_mp, boolean_t isv4, 3795 ipsa_t *assoc) 3796 { 3797 ipsec_in_t *ii = (ipsec_in_t *)ipsec_in->b_rptr; 3798 mblk_t *hada_mp; 3799 uint32_t icv_len = 0; 3800 da_ipsec_t *hada; 3801 ipha_t *ipha; 3802 ip6_t *ip6h; 3803 kstat_named_t *counter; 3804 netstack_t *ns = ii->ipsec_in_ns; 3805 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 3806 ipsec_stack_t *ipss = ns->netstack_ipsec; 3807 3808 ESP_BUMP_STAT(espstack, in_accelerated); 3809 3810 hada_mp = ii->ipsec_in_da; 3811 ASSERT(hada_mp != NULL); 3812 hada = (da_ipsec_t *)hada_mp->b_rptr; 3813 3814 /* 3815 * We only support one level of decapsulation in hardware, so 3816 * nuke the pointer. 3817 */ 3818 ii->ipsec_in_da = NULL; 3819 ii->ipsec_in_accelerated = B_FALSE; 3820 3821 if (assoc->ipsa_auth_alg != IPSA_AALG_NONE) { 3822 /* 3823 * ESP with authentication. We expect the Provider to have 3824 * computed the ICV and placed it in the hardware acceleration 3825 * data attributes. 3826 * 3827 * Extract ICV length from attributes M_CTL and sanity check 3828 * its value. We allow the mblk to be smaller than da_ipsec_t 3829 * for a small ICV, as long as the entire ICV fits within the 3830 * mblk. 3831 * 3832 * Also ensures that the ICV length computed by Provider 3833 * corresponds to the ICV length of the agorithm specified by 3834 * the SA. 3835 */ 3836 icv_len = hada->da_icv_len; 3837 if ((icv_len != assoc->ipsa_mac_len) || 3838 (icv_len > DA_ICV_MAX_LEN) || (MBLKL(hada_mp) < 3839 (sizeof (da_ipsec_t) - DA_ICV_MAX_LEN + icv_len))) { 3840 esp0dbg(("esp_inbound_accelerated: " 3841 "ICV len (%u) incorrect or mblk too small (%u)\n", 3842 icv_len, (uint32_t)(MBLKL(hada_mp)))); 3843 counter = DROPPER(ipss, ipds_esp_bad_auth); 3844 goto esp_in_discard; 3845 } 3846 } 3847 3848 /* get pointers to IP header */ 3849 if (isv4) { 3850 ipha = (ipha_t *)data_mp->b_rptr; 3851 } else { 3852 ip6h = (ip6_t *)data_mp->b_rptr; 3853 } 3854 3855 /* 3856 * Compare ICV in ESP packet vs ICV computed by adapter. 3857 * We also remove the ICV from the end of the packet since 3858 * it will no longer be needed. 3859 * 3860 * Assume that esp_inbound() already ensured that the pkt 3861 * was in one mblk. 3862 */ 3863 ASSERT(data_mp->b_cont == NULL); 3864 data_mp->b_wptr -= icv_len; 3865 /* adjust IP header */ 3866 if (isv4) 3867 ipha->ipha_length = htons(ntohs(ipha->ipha_length) - icv_len); 3868 else 3869 ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - icv_len); 3870 if (icv_len && bcmp(hada->da_icv, data_mp->b_wptr, icv_len)) { 3871 int af; 3872 void *addr; 3873 3874 if (isv4) { 3875 addr = &ipha->ipha_dst; 3876 af = AF_INET; 3877 } else { 3878 addr = &ip6h->ip6_dst; 3879 af = AF_INET6; 3880 } 3881 3882 /* 3883 * Log the event. Don't print to the console, block 3884 * potential denial-of-service attack. 3885 */ 3886 ESP_BUMP_STAT(espstack, bad_auth); 3887 ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN, 3888 "ESP Authentication failed spi %x, dst_addr %s", 3889 assoc->ipsa_spi, addr, af, espstack->ipsecesp_netstack); 3890 counter = DROPPER(ipss, ipds_esp_bad_auth); 3891 goto esp_in_discard; 3892 } 3893 3894 esp3dbg(espstack, ("esp_inbound_accelerated: ESP authentication " 3895 "succeeded, checking replay\n")); 3896 3897 ipsec_in->b_cont = data_mp; 3898 3899 /* 3900 * Remove ESP header and padding from packet. 3901 */ 3902 if (!esp_strip_header(data_mp, ii->ipsec_in_v4, assoc->ipsa_iv_len, 3903 &counter, espstack)) { 3904 esp1dbg(espstack, ("esp_inbound_accelerated: " 3905 "esp_strip_header() failed\n")); 3906 goto esp_in_discard; 3907 } 3908 3909 freeb(hada_mp); 3910 3911 /* 3912 * Account for usage.. 3913 */ 3914 if (!esp_age_bytes(assoc, msgdsize(data_mp), B_TRUE)) { 3915 /* The ipsa has hit hard expiration, LOG and AUDIT. */ 3916 ESP_BUMP_STAT(espstack, bytes_expired); 3917 IP_ESP_BUMP_STAT(ipss, in_discards); 3918 ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN, 3919 "ESP association 0x%x, dst %s had bytes expire.\n", 3920 assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam, 3921 espstack->ipsecesp_netstack); 3922 ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL, 3923 DROPPER(ipss, ipds_esp_bytes_expire), 3924 &espstack->esp_dropper); 3925 return (IPSEC_STATUS_FAILED); 3926 } 3927 3928 /* done processing the packet */ 3929 return (IPSEC_STATUS_SUCCESS); 3930 3931 esp_in_discard: 3932 IP_ESP_BUMP_STAT(ipss, in_discards); 3933 freeb(hada_mp); 3934 3935 ipsec_in->b_cont = data_mp; /* For ip_drop_packet()'s sake... */ 3936 ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL, counter, 3937 &espstack->esp_dropper); 3938 3939 return (IPSEC_STATUS_FAILED); 3940 } 3941 3942 /* 3943 * Wrapper to allow IP to trigger an ESP association failure message 3944 * during inbound SA selection. 3945 */ 3946 void 3947 ipsecesp_in_assocfailure(mblk_t *mp, char level, ushort_t sl, char *fmt, 3948 uint32_t spi, void *addr, int af, ipsecesp_stack_t *espstack) 3949 { 3950 ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec; 3951 3952 if (espstack->ipsecesp_log_unknown_spi) { 3953 ipsec_assocfailure(info.mi_idnum, 0, level, sl, fmt, spi, 3954 addr, af, espstack->ipsecesp_netstack); 3955 } 3956 3957 ip_drop_packet(mp, B_TRUE, NULL, NULL, 3958 DROPPER(ipss, ipds_esp_no_sa), 3959 &espstack->esp_dropper); 3960 } 3961 3962 /* 3963 * Initialize the ESP input and output processing functions. 3964 */ 3965 void 3966 ipsecesp_init_funcs(ipsa_t *sa) 3967 { 3968 if (sa->ipsa_output_func == NULL) 3969 sa->ipsa_output_func = esp_outbound; 3970 if (sa->ipsa_input_func == NULL) 3971 sa->ipsa_input_func = esp_inbound; 3972 } 3973