xref: /titanic_44/usr/src/uts/common/inet/ip/ipsecesp.c (revision 39c23413b8df94a95f67b34cfd4a4dfc3fd0b48d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/stream.h>
30 #include <sys/stropts.h>
31 #include <sys/errno.h>
32 #include <sys/strlog.h>
33 #include <sys/tihdr.h>
34 #include <sys/socket.h>
35 #include <sys/ddi.h>
36 #include <sys/sunddi.h>
37 #include <sys/kmem.h>
38 #include <sys/sysmacros.h>
39 #include <sys/cmn_err.h>
40 #include <sys/vtrace.h>
41 #include <sys/debug.h>
42 #include <sys/atomic.h>
43 #include <sys/strsun.h>
44 #include <sys/random.h>
45 #include <netinet/in.h>
46 #include <net/if.h>
47 #include <netinet/ip6.h>
48 #include <net/pfkeyv2.h>
49 
50 #include <inet/common.h>
51 #include <inet/mi.h>
52 #include <inet/nd.h>
53 #include <inet/ip.h>
54 #include <inet/ip6.h>
55 #include <inet/sadb.h>
56 #include <inet/ipsec_info.h>
57 #include <inet/ipsec_impl.h>
58 #include <inet/ipsecesp.h>
59 #include <inet/ipdrop.h>
60 #include <inet/tcp.h>
61 #include <sys/kstat.h>
62 #include <sys/policy.h>
63 #include <sys/strsun.h>
64 #include <inet/udp_impl.h>
65 #include <sys/taskq.h>
66 
67 #include <sys/iphada.h>
68 
69 /* Packet dropper for ESP drops. */
70 static ipdropper_t esp_dropper;
71 
72 static kmutex_t ipsecesp_param_lock; /* Protects ipsecesp_param_arr[] below. */
73 /*
74  * Table of ND variables supported by ipsecesp. These are loaded into
75  * ipsecesp_g_nd in ipsecesp_init_nd.
76  * All of these are alterable, within the min/max values given, at run time.
77  */
78 static	ipsecespparam_t	ipsecesp_param_arr[] = {
79 	/* min	max			value	name */
80 	{ 0,	3,			0,	"ipsecesp_debug"},
81 	{ 125,	32000, SADB_AGE_INTERVAL_DEFAULT, "ipsecesp_age_interval"},
82 	{ 1,	10,			1,	"ipsecesp_reap_delay"},
83 	{ 1,	SADB_MAX_REPLAY,	64,	"ipsecesp_replay_size"},
84 	{ 1,	300,			15,	"ipsecesp_acquire_timeout"},
85 	{ 1,	1800,			90,	"ipsecesp_larval_timeout"},
86 	/* Default lifetime values for ACQUIRE messages. */
87 	{ 0,	0xffffffffU,	0,	"ipsecesp_default_soft_bytes"},
88 	{ 0,	0xffffffffU,	0,	"ipsecesp_default_hard_bytes"},
89 	{ 0,	0xffffffffU,	24000,	"ipsecesp_default_soft_addtime"},
90 	{ 0,	0xffffffffU,	28800,	"ipsecesp_default_hard_addtime"},
91 	{ 0,	0xffffffffU,	0,	"ipsecesp_default_soft_usetime"},
92 	{ 0,	0xffffffffU,	0,	"ipsecesp_default_hard_usetime"},
93 	{ 0,	1,		0,	"ipsecesp_log_unknown_spi"},
94 	{ 0,	2,		1,	"ipsecesp_padding_check"},
95 };
96 #define	ipsecesp_debug		ipsecesp_param_arr[0].ipsecesp_param_value
97 #define	ipsecesp_age_interval	ipsecesp_param_arr[1].ipsecesp_param_value
98 #define	ipsecesp_age_int_max	ipsecesp_param_arr[1].ipsecesp_param_max
99 #define	ipsecesp_reap_delay	ipsecesp_param_arr[2].ipsecesp_param_value
100 #define	ipsecesp_replay_size	ipsecesp_param_arr[3].ipsecesp_param_value
101 #define	ipsecesp_acquire_timeout ipsecesp_param_arr[4].ipsecesp_param_value
102 #define	ipsecesp_larval_timeout ipsecesp_param_arr[5].ipsecesp_param_value
103 #define	ipsecesp_default_soft_bytes \
104 	ipsecesp_param_arr[6].ipsecesp_param_value
105 #define	ipsecesp_default_hard_bytes \
106 	ipsecesp_param_arr[7].ipsecesp_param_value
107 #define	ipsecesp_default_soft_addtime \
108 	ipsecesp_param_arr[8].ipsecesp_param_value
109 #define	ipsecesp_default_hard_addtime \
110 	ipsecesp_param_arr[9].ipsecesp_param_value
111 #define	ipsecesp_default_soft_usetime \
112 	ipsecesp_param_arr[10].ipsecesp_param_value
113 #define	ipsecesp_default_hard_usetime \
114 	ipsecesp_param_arr[11].ipsecesp_param_value
115 #define	ipsecesp_log_unknown_spi \
116 	ipsecesp_param_arr[12].ipsecesp_param_value
117 #define	ipsecesp_padding_check \
118 	ipsecesp_param_arr[13].ipsecesp_param_value
119 
120 #define	esp0dbg(a)	printf a
121 /* NOTE:  != 0 instead of > 0 so lint doesn't complain. */
122 #define	esp1dbg(a)	if (ipsecesp_debug != 0) printf a
123 #define	esp2dbg(a)	if (ipsecesp_debug > 1) printf a
124 #define	esp3dbg(a)	if (ipsecesp_debug > 2) printf a
125 
126 static IDP ipsecesp_g_nd;
127 
128 static int ipsecesp_open(queue_t *, dev_t *, int, int, cred_t *);
129 static int ipsecesp_close(queue_t *);
130 static void ipsecesp_rput(queue_t *, mblk_t *);
131 static void ipsecesp_wput(queue_t *, mblk_t *);
132 static void esp_send_acquire(ipsacq_t *, mblk_t *);
133 
134 static ipsec_status_t esp_outbound_accelerated(mblk_t *, uint_t);
135 static ipsec_status_t esp_inbound_accelerated(mblk_t *, mblk_t *,
136     boolean_t, ipsa_t *);
137 
138 static boolean_t esp_register_out(uint32_t, uint32_t, uint_t);
139 static boolean_t esp_strip_header(mblk_t *, boolean_t, uint32_t,
140     kstat_named_t **);
141 static ipsec_status_t esp_submit_req_inbound(mblk_t *, ipsa_t *, uint_t);
142 static ipsec_status_t esp_submit_req_outbound(mblk_t *, ipsa_t *, uchar_t *,
143     uint_t);
144 
145 static struct module_info info = {
146 	5137, "ipsecesp", 0, INFPSZ, 65536, 1024
147 };
148 
149 static struct qinit rinit = {
150 	(pfi_t)ipsecesp_rput, NULL, ipsecesp_open, ipsecesp_close, NULL, &info,
151 	NULL
152 };
153 
154 static struct qinit winit = {
155 	(pfi_t)ipsecesp_wput, NULL, ipsecesp_open, ipsecesp_close, NULL, &info,
156 	NULL
157 };
158 
159 struct streamtab ipsecespinfo = {
160 	&rinit, &winit, NULL, NULL
161 };
162 
163 /*
164  * Keysock instance of ESP.  "There can be only one." :)
165  * Use casptr() on this because I don't set it until KEYSOCK_HELLO comes down.
166  * Paired up with the esp_pfkey_q is the esp_event, which will age SAs.
167  */
168 static queue_t *esp_pfkey_q;
169 static timeout_id_t esp_event;
170 static taskq_t *esp_taskq;
171 
172 /*
173  * OTOH, this one is set at open/close, and I'm D_MTQPAIR for now.
174  *
175  * Question:	Do I need this, given that all instance's esps->esps_wq point
176  *		to IP?
177  *
178  * Answer:	Yes, because I need to know which queue is BOUND to
179  *		IPPROTO_ESP
180  */
181 static mblk_t *esp_ip_unbind;
182 
183 /*
184  * Stats.  This may eventually become a full-blown SNMP MIB once that spec
185  * stabilizes.
186  */
187 
188 typedef struct {
189 	kstat_named_t esp_stat_num_aalgs;
190 	kstat_named_t esp_stat_good_auth;
191 	kstat_named_t esp_stat_bad_auth;
192 	kstat_named_t esp_stat_bad_padding;
193 	kstat_named_t esp_stat_replay_failures;
194 	kstat_named_t esp_stat_replay_early_failures;
195 	kstat_named_t esp_stat_keysock_in;
196 	kstat_named_t esp_stat_out_requests;
197 	kstat_named_t esp_stat_acquire_requests;
198 	kstat_named_t esp_stat_bytes_expired;
199 	kstat_named_t esp_stat_out_discards;
200 	kstat_named_t esp_stat_in_accelerated;
201 	kstat_named_t esp_stat_out_accelerated;
202 	kstat_named_t esp_stat_noaccel;
203 	kstat_named_t esp_stat_crypto_sync;
204 	kstat_named_t esp_stat_crypto_async;
205 	kstat_named_t esp_stat_crypto_failures;
206 	kstat_named_t esp_stat_num_ealgs;
207 	kstat_named_t esp_stat_bad_decrypt;
208 } esp_kstats_t;
209 
210 uint32_t esp_hash_size = IPSEC_DEFAULT_HASH_SIZE;
211 #define	ESP_BUMP_STAT(x) (esp_kstats->esp_stat_ ## x).value.ui64++
212 #define	ESP_DEBUMP_STAT(x) (esp_kstats->esp_stat_ ## x).value.ui64--
213 
214 static kstat_t *esp_ksp;
215 static esp_kstats_t *esp_kstats;
216 
217 static int	esp_kstat_update(kstat_t *, int);
218 
219 static boolean_t
220 esp_kstat_init(void)
221 {
222 	esp_ksp = kstat_create("ipsecesp", 0, "esp_stat", "net",
223 	    KSTAT_TYPE_NAMED, sizeof (*esp_kstats) / sizeof (kstat_named_t),
224 	    KSTAT_FLAG_PERSISTENT);
225 
226 	if (esp_ksp == NULL)
227 		return (B_FALSE);
228 
229 	esp_kstats = esp_ksp->ks_data;
230 
231 	esp_ksp->ks_update = esp_kstat_update;
232 
233 #define	K64 KSTAT_DATA_UINT64
234 #define	KI(x) kstat_named_init(&(esp_kstats->esp_stat_##x), #x, K64)
235 
236 	KI(num_aalgs);
237 	KI(num_ealgs);
238 	KI(good_auth);
239 	KI(bad_auth);
240 	KI(bad_padding);
241 	KI(replay_failures);
242 	KI(replay_early_failures);
243 	KI(keysock_in);
244 	KI(out_requests);
245 	KI(acquire_requests);
246 	KI(bytes_expired);
247 	KI(out_discards);
248 	KI(in_accelerated);
249 	KI(out_accelerated);
250 	KI(noaccel);
251 	KI(crypto_sync);
252 	KI(crypto_async);
253 	KI(crypto_failures);
254 	KI(bad_decrypt);
255 
256 #undef KI
257 #undef K64
258 
259 	kstat_install(esp_ksp);
260 
261 	return (B_TRUE);
262 }
263 
264 static int
265 esp_kstat_update(kstat_t *kp, int rw)
266 {
267 	esp_kstats_t *ekp;
268 
269 	if ((kp == NULL) || (kp->ks_data == NULL))
270 		return (EIO);
271 
272 	if (rw == KSTAT_WRITE)
273 		return (EACCES);
274 
275 	ASSERT(kp == esp_ksp);
276 	ekp = (esp_kstats_t *)kp->ks_data;
277 	ASSERT(ekp == esp_kstats);
278 
279 	mutex_enter(&alg_lock);
280 	ekp->esp_stat_num_aalgs.value.ui64 = ipsec_nalgs[IPSEC_ALG_AUTH];
281 	ekp->esp_stat_num_ealgs.value.ui64 = ipsec_nalgs[IPSEC_ALG_ENCR];
282 	mutex_exit(&alg_lock);
283 
284 	return (0);
285 }
286 
287 #ifdef DEBUG
288 /*
289  * Debug routine, useful to see pre-encryption data.
290  */
291 static char *
292 dump_msg(mblk_t *mp)
293 {
294 	char tmp_str[3], tmp_line[256];
295 
296 	while (mp != NULL) {
297 		unsigned char *ptr;
298 
299 		printf("mblk address 0x%p, length %ld, db_ref %d "
300 		    "type %d, base 0x%p, lim 0x%p\n",
301 		    (void *) mp, (long)(mp->b_wptr - mp->b_rptr),
302 		    mp->b_datap->db_ref, mp->b_datap->db_type,
303 		    (void *)mp->b_datap->db_base, (void *)mp->b_datap->db_lim);
304 		ptr = mp->b_rptr;
305 
306 		tmp_line[0] = '\0';
307 		while (ptr < mp->b_wptr) {
308 			uint_t diff;
309 
310 			diff = (ptr - mp->b_rptr);
311 			if (!(diff & 0x1f)) {
312 				if (strlen(tmp_line) > 0) {
313 					printf("bytes: %s\n", tmp_line);
314 					tmp_line[0] = '\0';
315 				}
316 			}
317 			if (!(diff & 0x3))
318 				(void) strcat(tmp_line, " ");
319 			(void) sprintf(tmp_str, "%02x", *ptr);
320 			(void) strcat(tmp_line, tmp_str);
321 			ptr++;
322 		}
323 		if (strlen(tmp_line) > 0)
324 			printf("bytes: %s\n", tmp_line);
325 
326 		mp = mp->b_cont;
327 	}
328 
329 	return ("\n");
330 }
331 
332 #else /* DEBUG */
333 static char *
334 dump_msg(mblk_t *mp)
335 {
336 	printf("Find value of mp %p.\n", mp);
337 	return ("\n");
338 }
339 #endif /* DEBUG */
340 
341 /*
342  * Don't have to lock age_interval, as only one thread will access it at
343  * a time, because I control the one function that does with timeout().
344  */
345 /* ARGSUSED */
346 static void
347 esp_ager(void *ignoreme)
348 {
349 	hrtime_t begin = gethrtime();
350 
351 	sadb_ager(&esp_sadb.s_v4, esp_pfkey_q, esp_sadb.s_ip_q,
352 	    ipsecesp_reap_delay);
353 	sadb_ager(&esp_sadb.s_v6, esp_pfkey_q, esp_sadb.s_ip_q,
354 	    ipsecesp_reap_delay);
355 
356 	esp_event = sadb_retimeout(begin, esp_pfkey_q, esp_ager,
357 	    &(ipsecesp_age_interval), ipsecesp_age_int_max, info.mi_idnum);
358 }
359 
360 /*
361  * Get an ESP NDD parameter.
362  */
363 /* ARGSUSED */
364 static int
365 ipsecesp_param_get(q, mp, cp, cr)
366 	queue_t	*q;
367 	mblk_t	*mp;
368 	caddr_t	cp;
369 	cred_t *cr;
370 {
371 	ipsecespparam_t	*ipsecesppa = (ipsecespparam_t *)cp;
372 	uint_t value;
373 
374 	mutex_enter(&ipsecesp_param_lock);
375 	value = ipsecesppa->ipsecesp_param_value;
376 	mutex_exit(&ipsecesp_param_lock);
377 
378 	(void) mi_mpprintf(mp, "%u", value);
379 	return (0);
380 }
381 
382 /*
383  * This routine sets an NDD variable in a ipsecespparam_t structure.
384  */
385 /* ARGSUSED */
386 static int
387 ipsecesp_param_set(q, mp, value, cp, cr)
388 	queue_t	*q;
389 	mblk_t	*mp;
390 	char	*value;
391 	caddr_t	cp;
392 	cred_t *cr;
393 {
394 	ulong_t	new_value;
395 	ipsecespparam_t	*ipsecesppa = (ipsecespparam_t *)cp;
396 
397 	/*
398 	 * Fail the request if the new value does not lie within the
399 	 * required bounds.
400 	 */
401 	if (ddi_strtoul(value, NULL, 10, &new_value) != 0 ||
402 	    new_value < ipsecesppa->ipsecesp_param_min ||
403 	    new_value > ipsecesppa->ipsecesp_param_max) {
404 		return (EINVAL);
405 	}
406 
407 	/* Set the new value */
408 	mutex_enter(&ipsecesp_param_lock);
409 	ipsecesppa->ipsecesp_param_value = new_value;
410 	mutex_exit(&ipsecesp_param_lock);
411 	return (0);
412 }
413 
414 /*
415  * Using lifetime NDD variables, fill in an extended combination's
416  * lifetime information.
417  */
418 void
419 ipsecesp_fill_defs(sadb_x_ecomb_t *ecomb)
420 {
421 	ecomb->sadb_x_ecomb_soft_bytes = ipsecesp_default_soft_bytes;
422 	ecomb->sadb_x_ecomb_hard_bytes = ipsecesp_default_hard_bytes;
423 	ecomb->sadb_x_ecomb_soft_addtime = ipsecesp_default_soft_addtime;
424 	ecomb->sadb_x_ecomb_hard_addtime = ipsecesp_default_hard_addtime;
425 	ecomb->sadb_x_ecomb_soft_usetime = ipsecesp_default_soft_usetime;
426 	ecomb->sadb_x_ecomb_hard_usetime = ipsecesp_default_hard_usetime;
427 }
428 
429 /*
430  * Initialize things for ESP at module load time.
431  */
432 boolean_t
433 ipsecesp_ddi_init(void)
434 {
435 	int count;
436 	ipsecespparam_t *espp = ipsecesp_param_arr;
437 
438 	for (count = A_CNT(ipsecesp_param_arr); count-- > 0; espp++) {
439 		if (espp->ipsecesp_param_name != NULL &&
440 		    espp->ipsecesp_param_name[0]) {
441 			if (!nd_load(&ipsecesp_g_nd, espp->ipsecesp_param_name,
442 			    ipsecesp_param_get, ipsecesp_param_set,
443 			    (caddr_t)espp)) {
444 				nd_free(&ipsecesp_g_nd);
445 				return (B_FALSE);
446 			}
447 		}
448 	}
449 
450 	if (!esp_kstat_init()) {
451 		nd_free(&ipsecesp_g_nd);
452 		return (B_FALSE);
453 	}
454 
455 	esp_sadb.s_acquire_timeout = &ipsecesp_acquire_timeout;
456 	esp_sadb.s_acqfn = esp_send_acquire;
457 	sadbp_init("ESP", &esp_sadb, SADB_SATYPE_ESP, esp_hash_size);
458 
459 	esp_taskq = taskq_create("esp_taskq", 1, minclsyspri,
460 	    IPSEC_TASKQ_MIN, IPSEC_TASKQ_MAX, 0);
461 
462 	mutex_init(&ipsecesp_param_lock, NULL, MUTEX_DEFAULT, 0);
463 
464 	ip_drop_register(&esp_dropper, "IPsec ESP");
465 
466 	return (B_TRUE);
467 }
468 
469 /*
470  * Destroy things for ESP at module unload time.
471  */
472 void
473 ipsecesp_ddi_destroy(void)
474 {
475 	esp1dbg(("In ipsecesp_ddi_destroy.\n"));
476 
477 	sadbp_destroy(&esp_sadb);
478 	ip_drop_unregister(&esp_dropper);
479 	taskq_destroy(esp_taskq);
480 	mutex_destroy(&ipsecesp_param_lock);
481 	nd_free(&ipsecesp_g_nd);
482 	kstat_delete(esp_ksp);
483 }
484 
485 /*
486  * ESP module open routine.
487  */
488 /* ARGSUSED */
489 static int
490 ipsecesp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
491 {
492 	if (secpolicy_net_config(credp, B_FALSE) != 0) {
493 		esp1dbg(("Non-privileged user trying to open ipsecesp.\n"));
494 		return (EPERM);
495 	}
496 
497 	if (q->q_ptr != NULL)
498 		return (0);  /* Re-open of an already open instance. */
499 
500 	if (sflag != MODOPEN)
501 		return (EINVAL);
502 
503 	/*
504 	 * ASSUMPTIONS (because I'm MT_OCEXCL):
505 	 *
506 	 *	* I'm being pushed on top of IP for all my opens (incl. #1).
507 	 *	* Only ipsecesp_open() can write into esp_sadb.s_ip_q.
508 	 *	* Because of this, I can check lazily for esp_sadb.s_ip_q.
509 	 *
510 	 *  If these assumptions are wrong, I'm in BIG trouble...
511 	 */
512 
513 	q->q_ptr = q; /* just so I know I'm open */
514 
515 	if (esp_sadb.s_ip_q == NULL) {
516 		struct T_unbind_req *tur;
517 
518 		esp_sadb.s_ip_q = WR(q);
519 		/* Allocate an unbind... */
520 		esp_ip_unbind = allocb(sizeof (struct T_unbind_req), BPRI_HI);
521 
522 		/*
523 		 * Send down T_BIND_REQ to bind IPPROTO_ESP.
524 		 * Handle the ACK here in ESP.
525 		 */
526 		qprocson(q);
527 		if (esp_ip_unbind == NULL ||
528 		    !sadb_t_bind_req(esp_sadb.s_ip_q, IPPROTO_ESP)) {
529 			if (esp_ip_unbind != NULL) {
530 				freeb(esp_ip_unbind);
531 				esp_ip_unbind = NULL;
532 			}
533 			q->q_ptr = NULL;
534 			return (ENOMEM);
535 		}
536 
537 		esp_ip_unbind->b_datap->db_type = M_PROTO;
538 		tur = (struct T_unbind_req *)esp_ip_unbind->b_rptr;
539 		tur->PRIM_type = T_UNBIND_REQ;
540 	} else {
541 		qprocson(q);
542 	}
543 
544 	/*
545 	 * For now, there's not much I can do.  I'll be getting a message
546 	 * passed down to me from keysock (in my wput), and a T_BIND_ACK
547 	 * up from IP (in my rput).
548 	 */
549 
550 	return (0);
551 }
552 
553 /*
554  * ESP module close routine.
555  */
556 static int
557 ipsecesp_close(queue_t *q)
558 {
559 	/*
560 	 * If esp_sadb.s_ip_q is attached to this instance, send a
561 	 * T_UNBIND_REQ to IP for the instance before doing
562 	 * a qprocsoff().
563 	 */
564 	if (WR(q) == esp_sadb.s_ip_q && esp_ip_unbind != NULL) {
565 		putnext(WR(q), esp_ip_unbind);
566 		esp_ip_unbind = NULL;
567 	}
568 
569 	/*
570 	 * Clean up q_ptr, if needed.
571 	 */
572 	qprocsoff(q);
573 
574 	/* Keysock queue check is safe, because of OCEXCL perimeter. */
575 
576 	if (q == esp_pfkey_q) {
577 		esp0dbg(("ipsecesp_close:  Ummm... keysock is closing ESP.\n"));
578 		esp_pfkey_q = NULL;
579 		/* Detach qtimeouts. */
580 		(void) quntimeout(q, esp_event);
581 	}
582 
583 	if (WR(q) == esp_sadb.s_ip_q) {
584 		/*
585 		 * If the esp_sadb.s_ip_q is attached to this instance, find
586 		 * another.  The OCEXCL outer perimeter helps us here.
587 		 */
588 		esp_sadb.s_ip_q = NULL;
589 
590 		/*
591 		 * Find a replacement queue for esp_sadb.s_ip_q.
592 		 */
593 		if (esp_pfkey_q != NULL && esp_pfkey_q != RD(q)) {
594 			/*
595 			 * See if we can use the pfkey_q.
596 			 */
597 			esp_sadb.s_ip_q = WR(esp_pfkey_q);
598 		}
599 
600 		if (esp_sadb.s_ip_q == NULL ||
601 		    !sadb_t_bind_req(esp_sadb.s_ip_q, IPPROTO_ESP)) {
602 			esp1dbg(("ipsecesp: Can't reassign ip_q.\n"));
603 			esp_sadb.s_ip_q = NULL;
604 		} else {
605 			esp_ip_unbind = allocb(sizeof (struct T_unbind_req),
606 			    BPRI_HI);
607 
608 			if (esp_ip_unbind != NULL) {
609 				struct T_unbind_req *tur;
610 
611 				esp_ip_unbind->b_datap->db_type = M_PROTO;
612 				tur = (struct T_unbind_req *)
613 				    esp_ip_unbind->b_rptr;
614 				tur->PRIM_type = T_UNBIND_REQ;
615 			}
616 			/* If it's NULL, I can't do much here. */
617 		}
618 	}
619 
620 	return (0);
621 }
622 
623 /*
624  * Add a number of bytes to what the SA has protected so far.  Return
625  * B_TRUE if the SA can still protect that many bytes.
626  *
627  * Caller must REFRELE the passed-in assoc.  This function must REFRELE
628  * any obtained peer SA.
629  */
630 static boolean_t
631 esp_age_bytes(ipsa_t *assoc, uint64_t bytes, boolean_t inbound)
632 {
633 	ipsa_t *inassoc, *outassoc;
634 	isaf_t *bucket;
635 	boolean_t inrc, outrc, isv6;
636 	sadb_t *sp;
637 	int outhash;
638 
639 	/* No peer?  No problem! */
640 	if (!assoc->ipsa_haspeer) {
641 		return (sadb_age_bytes(esp_pfkey_q, assoc, bytes,
642 		    B_TRUE));
643 	}
644 
645 	/*
646 	 * Otherwise, we want to grab both the original assoc and its peer.
647 	 * There might be a race for this, but if it's a real race, two
648 	 * expire messages may occur.  We limit this by only sending the
649 	 * expire message on one of the peers, we'll pick the inbound
650 	 * arbitrarily.
651 	 *
652 	 * If we need tight synchronization on the peer SA, then we need to
653 	 * reconsider.
654 	 */
655 
656 	/* Use address length to select IPv6/IPv4 */
657 	isv6 = (assoc->ipsa_addrfam == AF_INET6);
658 	sp = isv6 ? &esp_sadb.s_v6 : &esp_sadb.s_v4;
659 
660 	if (inbound) {
661 		inassoc = assoc;
662 		if (isv6) {
663 			outhash = OUTBOUND_HASH_V6(sp, *((in6_addr_t *)
664 			    &inassoc->ipsa_dstaddr));
665 		} else {
666 			outhash = OUTBOUND_HASH_V4(sp, *((ipaddr_t *)
667 				&inassoc->ipsa_dstaddr));
668 		}
669 		bucket = &sp->sdb_of[outhash];
670 		mutex_enter(&bucket->isaf_lock);
671 		outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi,
672 		    inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr,
673 		    inassoc->ipsa_addrfam);
674 		mutex_exit(&bucket->isaf_lock);
675 		if (outassoc == NULL) {
676 			/* Q: Do we wish to set haspeer == B_FALSE? */
677 			esp0dbg(("esp_age_bytes: "
678 			    "can't find peer for inbound.\n"));
679 			return (sadb_age_bytes(esp_pfkey_q, inassoc,
680 			    bytes, B_TRUE));
681 		}
682 	} else {
683 		outassoc = assoc;
684 		bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi);
685 		mutex_enter(&bucket->isaf_lock);
686 		inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi,
687 		    outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr,
688 		    outassoc->ipsa_addrfam);
689 		mutex_exit(&bucket->isaf_lock);
690 		if (inassoc == NULL) {
691 			/* Q: Do we wish to set haspeer == B_FALSE? */
692 			esp0dbg(("esp_age_bytes: "
693 			    "can't find peer for outbound.\n"));
694 			return (sadb_age_bytes(esp_pfkey_q, outassoc,
695 			    bytes, B_TRUE));
696 		}
697 	}
698 
699 	inrc = sadb_age_bytes(esp_pfkey_q, inassoc, bytes, B_TRUE);
700 	outrc = sadb_age_bytes(esp_pfkey_q, outassoc, bytes, B_FALSE);
701 
702 	/*
703 	 * REFRELE any peer SA.
704 	 *
705 	 * Because of the multi-line macro nature of IPSA_REFRELE, keep
706 	 * them in { }.
707 	 */
708 	if (inbound) {
709 		IPSA_REFRELE(outassoc);
710 	} else {
711 		IPSA_REFRELE(inassoc);
712 	}
713 
714 	return (inrc && outrc);
715 }
716 
717 /*
718  * Do incoming NAT-T manipulations for packet.
719  */
720 static ipsec_status_t
721 esp_fix_natt_checksums(mblk_t *data_mp, ipsa_t *assoc)
722 {
723 	ipha_t *ipha = (ipha_t *)data_mp->b_rptr;
724 	tcpha_t *tcph;
725 	udpha_t *udpha;
726 	/* Initialize to our inbound cksum adjustment... */
727 	uint32_t sum = assoc->ipsa_inbound_cksum;
728 
729 	switch (ipha->ipha_protocol) {
730 	case IPPROTO_TCP:
731 		tcph = (tcpha_t *)(data_mp->b_rptr +
732 		    IPH_HDR_LENGTH(ipha));
733 
734 #define	DOWN_SUM(x) (x) = ((x) & 0xFFFF) +	 ((x) >> 16)
735 		sum += ~ntohs(tcph->tha_sum) & 0xFFFF;
736 		DOWN_SUM(sum);
737 		DOWN_SUM(sum);
738 		tcph->tha_sum = ~htons(sum);
739 		break;
740 	case IPPROTO_UDP:
741 		udpha = (udpha_t *)(data_mp->b_rptr + IPH_HDR_LENGTH(ipha));
742 
743 		if (udpha->uha_checksum != 0) {
744 			/* Adujst if the inbound one was not zero. */
745 			sum += ~ntohs(udpha->uha_checksum) & 0xFFFF;
746 			DOWN_SUM(sum);
747 			DOWN_SUM(sum);
748 			udpha->uha_checksum = ~htons(sum);
749 			if (udpha->uha_checksum == 0)
750 				udpha->uha_checksum = 0xFFFF;
751 		}
752 #undef DOWN_SUM
753 		break;
754 	case IPPROTO_IP:
755 		/*
756 		 * This case is only an issue for self-encapsulated
757 		 * packets.  So for now, fall through.
758 		 */
759 		break;
760 	}
761 	return (IPSEC_STATUS_SUCCESS);
762 }
763 
764 
765 /*
766  * Strip ESP header, check padding, and fix IP header.
767  * Returns B_TRUE on success, B_FALSE if an error occured.
768  */
769 static boolean_t
770 esp_strip_header(mblk_t *data_mp, boolean_t isv4, uint32_t ivlen,
771     kstat_named_t **counter)
772 {
773 	ipha_t *ipha;
774 	ip6_t *ip6h;
775 	uint_t divpoint;
776 	mblk_t *scratch;
777 	uint8_t nexthdr, padlen;
778 	uint8_t lastpad;
779 	uint8_t *lastbyte;
780 
781 	/*
782 	 * Strip ESP data and fix IP header.
783 	 *
784 	 * XXX In case the beginning of esp_inbound() changes to not do a
785 	 * pullup, this part of the code can remain unchanged.
786 	 */
787 	if (isv4) {
788 		ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ipha_t));
789 		ipha = (ipha_t *)data_mp->b_rptr;
790 		ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (esph_t) +
791 		    IPH_HDR_LENGTH(ipha));
792 		divpoint = IPH_HDR_LENGTH(ipha);
793 	} else {
794 		ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ip6_t));
795 		ip6h = (ip6_t *)data_mp->b_rptr;
796 		divpoint = ip_hdr_length_v6(data_mp, ip6h);
797 	}
798 
799 	scratch = data_mp;
800 	while (scratch->b_cont != NULL)
801 		scratch = scratch->b_cont;
802 
803 	ASSERT((scratch->b_wptr - scratch->b_rptr) >= 3);
804 
805 	/*
806 	 * "Next header" and padding length are the last two bytes in the
807 	 * ESP-protected datagram, thus the explicit - 1 and - 2.
808 	 * lastpad is the last byte of the padding, which can be used for
809 	 * a quick check to see if the padding is correct.
810 	 */
811 	lastbyte = scratch->b_wptr - 1;
812 	nexthdr = *lastbyte--;
813 	padlen = *lastbyte--;
814 
815 	if (isv4) {
816 		/* Fix part of the IP header. */
817 		ipha->ipha_protocol = nexthdr;
818 		/*
819 		 * Reality check the padlen.  The explicit - 2 is for the
820 		 * padding length and the next-header bytes.
821 		 */
822 		if (padlen >= ntohs(ipha->ipha_length) - sizeof (ipha_t) - 2 -
823 		    sizeof (esph_t) - ivlen) {
824 			ESP_BUMP_STAT(bad_decrypt);
825 			ipsec_rl_strlog(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN,
826 			    "Corrupt ESP packet (padlen too big).\n");
827 			esp1dbg(("padlen (%d) is greater than:\n", padlen));
828 			esp1dbg(("pkt len(%d) - ip hdr - esp hdr - ivlen(%d) "
829 			    "= %d.\n", ntohs(ipha->ipha_length), ivlen,
830 			    (int)(ntohs(ipha->ipha_length) - sizeof (ipha_t) -
831 				2 - sizeof (esph_t) - ivlen)));
832 			*counter = &ipdrops_esp_bad_padlen;
833 			return (B_FALSE);
834 		}
835 
836 		/*
837 		 * Fix the rest of the header.  The explicit - 2 is for the
838 		 * padding length and the next-header bytes.
839 		 */
840 		ipha->ipha_length = htons(ntohs(ipha->ipha_length) - padlen -
841 		    2 - sizeof (esph_t) - ivlen);
842 		ipha->ipha_hdr_checksum = 0;
843 		ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha);
844 	} else {
845 		if (ip6h->ip6_nxt == IPPROTO_ESP) {
846 			ip6h->ip6_nxt = nexthdr;
847 		} else {
848 			ip6_pkt_t ipp;
849 
850 			bzero(&ipp, sizeof (ipp));
851 			(void) ip_find_hdr_v6(data_mp, ip6h, &ipp, NULL);
852 			if (ipp.ipp_dstopts != NULL) {
853 				ipp.ipp_dstopts->ip6d_nxt = nexthdr;
854 			} else if (ipp.ipp_rthdr != NULL) {
855 				ipp.ipp_rthdr->ip6r_nxt = nexthdr;
856 			} else if (ipp.ipp_hopopts != NULL) {
857 				ipp.ipp_hopopts->ip6h_nxt = nexthdr;
858 			} else {
859 				/* Panic a DEBUG kernel. */
860 				ASSERT(ipp.ipp_hopopts != NULL);
861 				/* Otherwise, pretend it's IP + ESP. */
862 				cmn_err(CE_WARN, "ESP IPv6 headers wrong.\n");
863 				ip6h->ip6_nxt = nexthdr;
864 			}
865 		}
866 
867 		if (padlen >= ntohs(ip6h->ip6_plen) - 2 - sizeof (esph_t) -
868 		    ivlen) {
869 			ESP_BUMP_STAT(bad_decrypt);
870 			ipsec_rl_strlog(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN,
871 			    "Corrupt ESP packet (v6 padlen too big).\n");
872 			esp1dbg(("padlen (%d) is greater than:\n", padlen));
873 			esp1dbg(("pkt len(%u) - ip hdr - esp hdr - ivlen(%d)"
874 			    " = %u.\n", (unsigned)(ntohs(ip6h->ip6_plen)
875 				+ sizeof (ip6_t)), ivlen,
876 			    (unsigned)(ntohs(ip6h->ip6_plen) - 2 -
877 				sizeof (esph_t) - ivlen)));
878 			*counter = &ipdrops_esp_bad_padlen;
879 			return (B_FALSE);
880 		}
881 
882 
883 		/*
884 		 * Fix the rest of the header.  The explicit - 2 is for the
885 		 * padding length and the next-header bytes.  IPv6 is nice,
886 		 * because there's no hdr checksum!
887 		 */
888 		ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - padlen -
889 		    2 - sizeof (esph_t) - ivlen);
890 	}
891 
892 	if (ipsecesp_padding_check > 0 && padlen > 0) {
893 		/*
894 		 * Weak padding check: compare last-byte to length, they
895 		 * should be equal.
896 		 */
897 		lastpad = *lastbyte--;
898 
899 		if (padlen != lastpad) {
900 			ipsec_rl_strlog(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN,
901 			    "Corrupt ESP packet (lastpad != padlen).\n");
902 			esp1dbg(("lastpad (%d) not equal to padlen (%d):\n",
903 				    lastpad, padlen));
904 			ESP_BUMP_STAT(bad_padding);
905 			*counter = &ipdrops_esp_bad_padding;
906 			return (B_FALSE);
907 		}
908 
909 		/*
910 		 * Strong padding check: Check all pad bytes to see that
911 		 * they're ascending.  Go backwards using a descending counter
912 		 * to verify.  padlen == 1 is checked by previous block, so
913 		 * only bother if we've more than 1 byte of padding.
914 		 * Consequently, start the check one byte before the location
915 		 * of "lastpad".
916 		 */
917 		if (ipsecesp_padding_check > 1) {
918 			/*
919 			 * This assert may have to become an if and a pullup
920 			 * if we start accepting multi-dblk mblks. For now,
921 			 * though, any packet here will have been pulled up in
922 			 * esp_inbound.
923 			 */
924 			ASSERT(MBLKL(scratch) >= lastpad + 3);
925 
926 			/*
927 			 * Use "--lastpad" because we already checked the very
928 			 * last pad byte previously.
929 			 */
930 			while (--lastpad != 0) {
931 				if (lastpad != *lastbyte) {
932 					ipsec_rl_strlog(info.mi_idnum, 0, 0,
933 					    SL_ERROR | SL_WARN, "Corrupt ESP "
934 					    "packet (bad padding).\n");
935 					esp1dbg(("padding not in correct"
936 						    " format:\n"));
937 					ESP_BUMP_STAT(bad_padding);
938 					*counter = &ipdrops_esp_bad_padding;
939 					return (B_FALSE);
940 				}
941 				lastbyte--;
942 			}
943 		}
944 	}
945 
946 	/* Trim off the padding. */
947 	ASSERT(data_mp->b_cont == NULL);
948 	data_mp->b_wptr -= (padlen + 2);
949 
950 	/*
951 	 * Remove the ESP header.
952 	 *
953 	 * The above assertions about data_mp's size will make this work.
954 	 *
955 	 * XXX  Question:  If I send up and get back a contiguous mblk,
956 	 * would it be quicker to bcopy over, or keep doing the dupb stuff?
957 	 * I go with copying for now.
958 	 */
959 
960 	if (IS_P2ALIGNED(data_mp->b_rptr, sizeof (uint32_t)) &&
961 	    IS_P2ALIGNED(ivlen, sizeof (uint32_t))) {
962 		uint8_t *start = data_mp->b_rptr;
963 		uint32_t *src, *dst;
964 
965 		src = (uint32_t *)(start + divpoint);
966 		dst = (uint32_t *)(start + divpoint + sizeof (esph_t) + ivlen);
967 
968 		ASSERT(IS_P2ALIGNED(dst, sizeof (uint32_t)) &&
969 		    IS_P2ALIGNED(src, sizeof (uint32_t)));
970 
971 		do {
972 			src--;
973 			dst--;
974 			*dst = *src;
975 		} while (src != (uint32_t *)start);
976 
977 		data_mp->b_rptr = (uchar_t *)dst;
978 	} else {
979 		uint8_t *start = data_mp->b_rptr;
980 		uint8_t *src, *dst;
981 
982 		src = start + divpoint;
983 		dst = src + sizeof (esph_t) + ivlen;
984 
985 		do {
986 			src--;
987 			dst--;
988 			*dst = *src;
989 		} while (src != start);
990 
991 		data_mp->b_rptr = dst;
992 	}
993 
994 	esp2dbg(("data_mp after inbound ESP adjustment:\n"));
995 	esp2dbg((dump_msg(data_mp)));
996 
997 	return (B_TRUE);
998 }
999 
1000 /*
1001  * Updating use times can be tricky business if the ipsa_haspeer flag is
1002  * set.  This function is called once in an SA's lifetime.
1003  *
1004  * Caller has to REFRELE "assoc" which is passed in.  This function has
1005  * to REFRELE any peer SA that is obtained.
1006  */
1007 static void
1008 esp_set_usetime(ipsa_t *assoc, boolean_t inbound)
1009 {
1010 	ipsa_t *inassoc, *outassoc;
1011 	isaf_t *bucket;
1012 	sadb_t *sp;
1013 	int outhash;
1014 	boolean_t isv6;
1015 
1016 	/* No peer?  No problem! */
1017 	if (!assoc->ipsa_haspeer) {
1018 		sadb_set_usetime(assoc);
1019 		return;
1020 	}
1021 
1022 	/*
1023 	 * Otherwise, we want to grab both the original assoc and its peer.
1024 	 * There might be a race for this, but if it's a real race, the times
1025 	 * will be out-of-synch by at most a second, and since our time
1026 	 * granularity is a second, this won't be a problem.
1027 	 *
1028 	 * If we need tight synchronization on the peer SA, then we need to
1029 	 * reconsider.
1030 	 */
1031 
1032 	/* Use address length to select IPv6/IPv4 */
1033 	isv6 = (assoc->ipsa_addrfam == AF_INET6);
1034 	sp = isv6 ? &esp_sadb.s_v6 : &esp_sadb.s_v4;
1035 
1036 	if (inbound) {
1037 		inassoc = assoc;
1038 		if (isv6) {
1039 			outhash = OUTBOUND_HASH_V6(sp, *((in6_addr_t *)
1040 			    &inassoc->ipsa_dstaddr));
1041 		} else {
1042 			outhash = OUTBOUND_HASH_V4(sp, *((ipaddr_t *)
1043 				&inassoc->ipsa_dstaddr));
1044 		}
1045 		bucket = &sp->sdb_of[outhash];
1046 		mutex_enter(&bucket->isaf_lock);
1047 		outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi,
1048 		    inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr,
1049 		    inassoc->ipsa_addrfam);
1050 		mutex_exit(&bucket->isaf_lock);
1051 		if (outassoc == NULL) {
1052 			/* Q: Do we wish to set haspeer == B_FALSE? */
1053 			esp0dbg(("esp_set_usetime: "
1054 			    "can't find peer for inbound.\n"));
1055 			sadb_set_usetime(inassoc);
1056 			return;
1057 		}
1058 	} else {
1059 		outassoc = assoc;
1060 		bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi);
1061 		mutex_enter(&bucket->isaf_lock);
1062 		inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi,
1063 		    outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr,
1064 		    outassoc->ipsa_addrfam);
1065 		mutex_exit(&bucket->isaf_lock);
1066 		if (inassoc == NULL) {
1067 			/* Q: Do we wish to set haspeer == B_FALSE? */
1068 			esp0dbg(("esp_set_usetime: "
1069 			    "can't find peer for outbound.\n"));
1070 			sadb_set_usetime(outassoc);
1071 			return;
1072 		}
1073 	}
1074 
1075 	/* Update usetime on both. */
1076 	sadb_set_usetime(inassoc);
1077 	sadb_set_usetime(outassoc);
1078 
1079 	/*
1080 	 * REFRELE any peer SA.
1081 	 *
1082 	 * Because of the multi-line macro nature of IPSA_REFRELE, keep
1083 	 * them in { }.
1084 	 */
1085 	if (inbound) {
1086 		IPSA_REFRELE(outassoc);
1087 	} else {
1088 		IPSA_REFRELE(inassoc);
1089 	}
1090 }
1091 
1092 /*
1093  * Handle ESP inbound data for IPv4 and IPv6.
1094  * On success returns B_TRUE, on failure returns B_FALSE and frees the
1095  * mblk chain ipsec_in_mp.
1096  */
1097 ipsec_status_t
1098 esp_inbound(mblk_t *ipsec_in_mp, void *arg)
1099 {
1100 	mblk_t *data_mp = ipsec_in_mp->b_cont;
1101 	ipsec_in_t *ii = (ipsec_in_t *)ipsec_in_mp->b_rptr;
1102 	esph_t *esph = (esph_t *)arg;
1103 	ipsa_t *ipsa = ii->ipsec_in_esp_sa;
1104 
1105 	if (ipsa->ipsa_usetime == 0)
1106 		esp_set_usetime(ipsa, B_TRUE);
1107 
1108 	/*
1109 	 * We may wish to check replay in-range-only here as an optimization.
1110 	 * Include the reality check of ipsa->ipsa_replay >
1111 	 * ipsa->ipsa_replay_wsize for times when it's the first N packets,
1112 	 * where N == ipsa->ipsa_replay_wsize.
1113 	 *
1114 	 * Another check that may come here later is the "collision" check.
1115 	 * If legitimate packets flow quickly enough, this won't be a problem,
1116 	 * but collisions may cause authentication algorithm crunching to
1117 	 * take place when it doesn't need to.
1118 	 */
1119 	if (!sadb_replay_peek(ipsa, esph->esph_replay)) {
1120 		ESP_BUMP_STAT(replay_early_failures);
1121 		IP_ESP_BUMP_STAT(in_discards);
1122 		/*
1123 		 * TODO: Extract inbound interface from the IPSEC_IN
1124 		 * message's ii->ipsec_in_rill_index.
1125 		 */
1126 		ip_drop_packet(ipsec_in_mp, B_TRUE, NULL, NULL,
1127 		    &ipdrops_esp_early_replay, &esp_dropper);
1128 		return (IPSEC_STATUS_FAILED);
1129 	}
1130 
1131 	/*
1132 	 * Has this packet already been processed by a hardware
1133 	 * IPsec accelerator?
1134 	 */
1135 	if (ii->ipsec_in_accelerated) {
1136 		ipsec_status_t rv;
1137 		esp3dbg(("esp_inbound: pkt processed by ill=%d isv6=%d\n",
1138 		    ii->ipsec_in_ill_index, !ii->ipsec_in_v4));
1139 		rv = esp_inbound_accelerated(ipsec_in_mp,
1140 		    data_mp, ii->ipsec_in_v4, ipsa);
1141 		return (rv);
1142 	}
1143 	ESP_BUMP_STAT(noaccel);
1144 
1145 	/*
1146 	 * Adjust the IP header's payload length to reflect the removal
1147 	 * of the ICV.
1148 	 */
1149 	if (!ii->ipsec_in_v4) {
1150 		ip6_t *ip6h = (ip6_t *)data_mp->b_rptr;
1151 		ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) -
1152 		    ipsa->ipsa_mac_len);
1153 	} else {
1154 		ipha_t *ipha = (ipha_t *)data_mp->b_rptr;
1155 		ipha->ipha_length = htons(ntohs(ipha->ipha_length) -
1156 		    ipsa->ipsa_mac_len);
1157 	}
1158 
1159 	/* submit the request to the crypto framework */
1160 	return (esp_submit_req_inbound(ipsec_in_mp, ipsa,
1161 	    (uint8_t *)esph - data_mp->b_rptr));
1162 }
1163 
1164 /*
1165  * Perform the really difficult work of inserting the proposed situation.
1166  * Called while holding the algorithm lock.
1167  */
1168 static void
1169 esp_insert_prop(sadb_prop_t *prop, ipsacq_t *acqrec, uint_t combs)
1170 {
1171 	sadb_comb_t *comb = (sadb_comb_t *)(prop + 1);
1172 	ipsec_out_t *io;
1173 	ipsec_action_t *ap;
1174 	ipsec_prot_t *prot;
1175 
1176 	ASSERT(MUTEX_HELD(&alg_lock));
1177 	io = (ipsec_out_t *)acqrec->ipsacq_mp->b_rptr;
1178 	ASSERT(io->ipsec_out_type == IPSEC_OUT);
1179 
1180 	prop->sadb_prop_exttype = SADB_EXT_PROPOSAL;
1181 	prop->sadb_prop_len = SADB_8TO64(sizeof (sadb_prop_t));
1182 	*(uint32_t *)(&prop->sadb_prop_replay) = 0;	/* Quick zero-out! */
1183 
1184 	prop->sadb_prop_replay = ipsecesp_replay_size;
1185 
1186 	/*
1187 	 * Based upon algorithm properties, and what-not, prioritize
1188 	 * a proposal.  If the IPSEC_OUT message has an algorithm specified,
1189 	 * use it first and foremost.
1190 	 *
1191 	 * For each action in policy list
1192 	 *   Add combination.  If I've hit limit, return.
1193 	 */
1194 
1195 	for (ap = acqrec->ipsacq_act; ap != NULL;
1196 	    ap = ap->ipa_next) {
1197 		ipsec_alginfo_t *ealg = NULL;
1198 		ipsec_alginfo_t *aalg = NULL;
1199 
1200 		if (ap->ipa_act.ipa_type != IPSEC_POLICY_APPLY)
1201 			continue;
1202 
1203 		prot = &ap->ipa_act.ipa_apply;
1204 
1205 		if (!(prot->ipp_use_esp))
1206 			continue;
1207 
1208 		if (prot->ipp_esp_auth_alg != 0) {
1209 			aalg = ipsec_alglists[IPSEC_ALG_AUTH]
1210 			    [prot->ipp_esp_auth_alg];
1211 			if (aalg == NULL || !ALG_VALID(aalg))
1212 				continue;
1213 		}
1214 
1215 		ASSERT(prot->ipp_encr_alg > 0);
1216 		ealg = ipsec_alglists[IPSEC_ALG_ENCR][prot->ipp_encr_alg];
1217 		if (ealg == NULL || !ALG_VALID(ealg))
1218 			continue;
1219 
1220 		comb->sadb_comb_flags = 0;
1221 		comb->sadb_comb_reserved = 0;
1222 		comb->sadb_comb_encrypt = ealg->alg_id;
1223 		comb->sadb_comb_encrypt_minbits =
1224 		    MAX(prot->ipp_espe_minbits, ealg->alg_ef_minbits);
1225 		comb->sadb_comb_encrypt_maxbits =
1226 		    MIN(prot->ipp_espe_maxbits, ealg->alg_ef_maxbits);
1227 		if (aalg == NULL) {
1228 			comb->sadb_comb_auth = 0;
1229 			comb->sadb_comb_auth_minbits = 0;
1230 			comb->sadb_comb_auth_maxbits = 0;
1231 		} else {
1232 			comb->sadb_comb_auth = aalg->alg_id;
1233 			comb->sadb_comb_auth_minbits =
1234 			    MAX(prot->ipp_espa_minbits, aalg->alg_ef_minbits);
1235 			comb->sadb_comb_auth_maxbits =
1236 			    MIN(prot->ipp_espa_maxbits, aalg->alg_ef_maxbits);
1237 		}
1238 
1239 		/*
1240 		 * The following may be based on algorithm
1241 		 * properties, but in the meantime, we just pick
1242 		 * some good, sensible numbers.  Key mgmt. can
1243 		 * (and perhaps should) be the place to finalize
1244 		 * such decisions.
1245 		 */
1246 
1247 		/*
1248 		 * No limits on allocations, since we really don't
1249 		 * support that concept currently.
1250 		 */
1251 		comb->sadb_comb_soft_allocations = 0;
1252 		comb->sadb_comb_hard_allocations = 0;
1253 
1254 		/*
1255 		 * These may want to come from policy rule..
1256 		 */
1257 		comb->sadb_comb_soft_bytes = ipsecesp_default_soft_bytes;
1258 		comb->sadb_comb_hard_bytes = ipsecesp_default_hard_bytes;
1259 		comb->sadb_comb_soft_addtime = ipsecesp_default_soft_addtime;
1260 		comb->sadb_comb_hard_addtime = ipsecesp_default_hard_addtime;
1261 		comb->sadb_comb_soft_usetime = ipsecesp_default_soft_usetime;
1262 		comb->sadb_comb_hard_usetime = ipsecesp_default_hard_usetime;
1263 
1264 		prop->sadb_prop_len += SADB_8TO64(sizeof (*comb));
1265 		if (--combs == 0)
1266 			break;	/* out of space.. */
1267 		comb++;
1268 	}
1269 }
1270 
1271 /*
1272  * Prepare and actually send the SADB_ACQUIRE message to PF_KEY.
1273  */
1274 static void
1275 esp_send_acquire(ipsacq_t *acqrec, mblk_t *extended)
1276 {
1277 	uint_t combs;
1278 	sadb_msg_t *samsg;
1279 	sadb_prop_t *prop;
1280 	mblk_t *pfkeymp, *msgmp;
1281 
1282 	ESP_BUMP_STAT(acquire_requests);
1283 
1284 	if (esp_pfkey_q == NULL)
1285 		return;
1286 
1287 	/* Set up ACQUIRE. */
1288 	pfkeymp = sadb_setup_acquire(acqrec, SADB_SATYPE_ESP);
1289 	if (pfkeymp == NULL) {
1290 		esp0dbg(("sadb_setup_acquire failed.\n"));
1291 		return;
1292 	}
1293 	ASSERT(MUTEX_HELD(&alg_lock));
1294 	combs = ipsec_nalgs[IPSEC_ALG_AUTH] * ipsec_nalgs[IPSEC_ALG_ENCR];
1295 	msgmp = pfkeymp->b_cont;
1296 	samsg = (sadb_msg_t *)(msgmp->b_rptr);
1297 
1298 	/* Insert proposal here. */
1299 
1300 	prop = (sadb_prop_t *)(((uint64_t *)samsg) + samsg->sadb_msg_len);
1301 	esp_insert_prop(prop, acqrec, combs);
1302 	samsg->sadb_msg_len += prop->sadb_prop_len;
1303 	msgmp->b_wptr += SADB_64TO8(samsg->sadb_msg_len);
1304 
1305 	mutex_exit(&alg_lock);
1306 
1307 	/*
1308 	 * Must mutex_exit() before sending PF_KEY message up, in
1309 	 * order to avoid recursive mutex_enter() if there are no registered
1310 	 * listeners.
1311 	 *
1312 	 * Once I've sent the message, I'm cool anyway.
1313 	 */
1314 	mutex_exit(&acqrec->ipsacq_lock);
1315 	if (extended != NULL) {
1316 		putnext(esp_pfkey_q, extended);
1317 	}
1318 	putnext(esp_pfkey_q, pfkeymp);
1319 }
1320 
1321 /*
1322  * Handle the SADB_GETSPI message.  Create a larval SA.
1323  */
1324 static void
1325 esp_getspi(mblk_t *mp, keysock_in_t *ksi)
1326 {
1327 	ipsa_t *newbie, *target;
1328 	isaf_t *outbound, *inbound;
1329 	int rc, diagnostic;
1330 	sadb_sa_t *assoc;
1331 	keysock_out_t *kso;
1332 	uint32_t newspi;
1333 
1334 	/*
1335 	 * Randomly generate a proposed SPI value
1336 	 */
1337 	(void) random_get_pseudo_bytes((uint8_t *)&newspi, sizeof (uint32_t));
1338 	newbie = sadb_getspi(ksi, newspi, &diagnostic);
1339 
1340 	if (newbie == NULL) {
1341 		sadb_pfkey_error(esp_pfkey_q, mp, ENOMEM, diagnostic,
1342 		    ksi->ks_in_serial);
1343 		return;
1344 	} else if (newbie == (ipsa_t *)-1) {
1345 		sadb_pfkey_error(esp_pfkey_q, mp, EINVAL, diagnostic,
1346 		    ksi->ks_in_serial);
1347 		return;
1348 	}
1349 
1350 	/*
1351 	 * XXX - We may randomly collide.  We really should recover from this.
1352 	 *	 Unfortunately, that could require spending way-too-much-time
1353 	 *	 in here.  For now, let the user retry.
1354 	 */
1355 
1356 	if (newbie->ipsa_addrfam == AF_INET6) {
1357 		outbound = OUTBOUND_BUCKET_V6(&esp_sadb.s_v6,
1358 		    *(uint32_t *)(newbie->ipsa_dstaddr));
1359 		inbound = INBOUND_BUCKET(&esp_sadb.s_v6, newbie->ipsa_spi);
1360 	} else {
1361 		ASSERT(newbie->ipsa_addrfam == AF_INET);
1362 		outbound = OUTBOUND_BUCKET_V4(&esp_sadb.s_v4,
1363 		    *(uint32_t *)(newbie->ipsa_dstaddr));
1364 		inbound = INBOUND_BUCKET(&esp_sadb.s_v4, newbie->ipsa_spi);
1365 	}
1366 
1367 	mutex_enter(&outbound->isaf_lock);
1368 	mutex_enter(&inbound->isaf_lock);
1369 
1370 	/*
1371 	 * Check for collisions (i.e. did sadb_getspi() return with something
1372 	 * that already exists?).
1373 	 *
1374 	 * Try outbound first.  Even though SADB_GETSPI is traditionally
1375 	 * for inbound SAs, you never know what a user might do.
1376 	 */
1377 	target = ipsec_getassocbyspi(outbound, newbie->ipsa_spi,
1378 	    newbie->ipsa_srcaddr, newbie->ipsa_dstaddr, newbie->ipsa_addrfam);
1379 	if (target == NULL) {
1380 		target = ipsec_getassocbyspi(inbound, newbie->ipsa_spi,
1381 		    newbie->ipsa_srcaddr, newbie->ipsa_dstaddr,
1382 		    newbie->ipsa_addrfam);
1383 	}
1384 
1385 	/*
1386 	 * I don't have collisions elsewhere!
1387 	 * (Nor will I because I'm still holding inbound/outbound locks.)
1388 	 */
1389 
1390 	if (target != NULL) {
1391 		rc = EEXIST;
1392 		IPSA_REFRELE(target);
1393 	} else {
1394 		/*
1395 		 * sadb_insertassoc() also checks for collisions, so
1396 		 * if there's a colliding entry, rc will be set
1397 		 * to EEXIST.
1398 		 */
1399 		rc = sadb_insertassoc(newbie, inbound);
1400 		(void) drv_getparm(TIME, &newbie->ipsa_hardexpiretime);
1401 		newbie->ipsa_hardexpiretime += ipsecesp_larval_timeout;
1402 	}
1403 
1404 	/*
1405 	 * Can exit outbound mutex.  Hold inbound until we're done
1406 	 * with newbie.
1407 	 */
1408 	mutex_exit(&outbound->isaf_lock);
1409 
1410 	if (rc != 0) {
1411 		mutex_exit(&inbound->isaf_lock);
1412 		IPSA_REFRELE(newbie);
1413 		sadb_pfkey_error(esp_pfkey_q, mp, rc, SADB_X_DIAGNOSTIC_NONE,
1414 		    ksi->ks_in_serial);
1415 		return;
1416 	}
1417 
1418 
1419 	/* Can write here because I'm still holding the bucket lock. */
1420 	newbie->ipsa_type = SADB_SATYPE_ESP;
1421 
1422 	/*
1423 	 * Construct successful return message.  We have one thing going
1424 	 * for us in PF_KEY v2.  That's the fact that
1425 	 *	sizeof (sadb_spirange_t) == sizeof (sadb_sa_t)
1426 	 */
1427 	assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SPIRANGE];
1428 	assoc->sadb_sa_exttype = SADB_EXT_SA;
1429 	assoc->sadb_sa_spi = newbie->ipsa_spi;
1430 	*((uint64_t *)(&assoc->sadb_sa_replay)) = 0;
1431 	mutex_exit(&inbound->isaf_lock);
1432 
1433 	/* Convert KEYSOCK_IN to KEYSOCK_OUT. */
1434 	kso = (keysock_out_t *)ksi;
1435 	kso->ks_out_len = sizeof (*kso);
1436 	kso->ks_out_serial = ksi->ks_in_serial;
1437 	kso->ks_out_type = KEYSOCK_OUT;
1438 
1439 	/*
1440 	 * Can safely putnext() to esp_pfkey_q, because this is a turnaround
1441 	 * from the esp_pfkey_q.
1442 	 */
1443 	putnext(esp_pfkey_q, mp);
1444 }
1445 
1446 /*
1447  * Insert the ESP header into a packet.  Duplicate an mblk, and insert a newly
1448  * allocated mblk with the ESP header in between the two.
1449  */
1450 static boolean_t
1451 esp_insert_esp(mblk_t *mp, mblk_t *esp_mp, uint_t divpoint)
1452 {
1453 	mblk_t *split_mp = mp;
1454 	uint_t wheretodiv = divpoint;
1455 
1456 	while ((split_mp->b_wptr - split_mp->b_rptr) < wheretodiv) {
1457 		wheretodiv -= (split_mp->b_wptr - split_mp->b_rptr);
1458 		split_mp = split_mp->b_cont;
1459 		ASSERT(split_mp != NULL);
1460 	}
1461 
1462 	if (split_mp->b_wptr - split_mp->b_rptr != wheretodiv) {
1463 		mblk_t *scratch;
1464 
1465 		/* "scratch" is the 2nd half, split_mp is the first. */
1466 		scratch = dupb(split_mp);
1467 		if (scratch == NULL) {
1468 			esp1dbg(("esp_insert_esp: can't allocate scratch.\n"));
1469 			return (B_FALSE);
1470 		}
1471 		/* NOTE:  dupb() doesn't set b_cont appropriately. */
1472 		scratch->b_cont = split_mp->b_cont;
1473 		scratch->b_rptr += wheretodiv;
1474 		split_mp->b_wptr = split_mp->b_rptr + wheretodiv;
1475 		split_mp->b_cont = scratch;
1476 	}
1477 	/*
1478 	 * At this point, split_mp is exactly "wheretodiv" bytes long, and
1479 	 * holds the end of the pre-ESP part of the datagram.
1480 	 */
1481 	esp_mp->b_cont = split_mp->b_cont;
1482 	split_mp->b_cont = esp_mp;
1483 
1484 	return (B_TRUE);
1485 }
1486 
1487 /*
1488  * Finish processing of an inbound ESP packet after processing by the
1489  * crypto framework.
1490  * - Remove the ESP header.
1491  * - Send packet back to IP.
1492  * If authentication was performed on the packet, this function is called
1493  * only if the authentication succeeded.
1494  * On success returns B_TRUE, on failure returns B_FALSE and frees the
1495  * mblk chain ipsec_in_mp.
1496  */
1497 static ipsec_status_t
1498 esp_in_done(mblk_t *ipsec_in_mp)
1499 {
1500 	ipsec_in_t *ii = (ipsec_in_t *)ipsec_in_mp->b_rptr;
1501 	mblk_t *data_mp;
1502 	ipsa_t *assoc;
1503 	uint_t espstart;
1504 	uint32_t ivlen = 0;
1505 	uint_t processed_len;
1506 	esph_t *esph;
1507 	kstat_named_t *counter;
1508 	boolean_t is_natt;
1509 
1510 	assoc = ii->ipsec_in_esp_sa;
1511 	ASSERT(assoc != NULL);
1512 
1513 	is_natt = ((assoc->ipsa_flags & IPSA_F_NATT) != 0);
1514 
1515 	/* get the pointer to the ESP header */
1516 	if (assoc->ipsa_encr_alg == SADB_EALG_NULL) {
1517 		/* authentication-only ESP */
1518 		espstart = ii->ipsec_in_crypto_data.cd_offset;
1519 		processed_len = ii->ipsec_in_crypto_data.cd_length;
1520 	} else {
1521 		/* encryption present */
1522 		ivlen = assoc->ipsa_iv_len;
1523 		if (assoc->ipsa_auth_alg == SADB_AALG_NONE) {
1524 			/* encryption-only ESP */
1525 			espstart = ii->ipsec_in_crypto_data.cd_offset -
1526 				sizeof (esph_t) - assoc->ipsa_iv_len;
1527 			processed_len = ii->ipsec_in_crypto_data.cd_length +
1528 				ivlen;
1529 		} else {
1530 			/* encryption with authentication */
1531 			espstart = ii->ipsec_in_crypto_dual_data.dd_offset1;
1532 			processed_len = ii->ipsec_in_crypto_dual_data.dd_len2 +
1533 			    ivlen;
1534 		}
1535 	}
1536 
1537 	data_mp = ipsec_in_mp->b_cont;
1538 	esph = (esph_t *)(data_mp->b_rptr + espstart);
1539 
1540 	if (assoc->ipsa_auth_alg != IPSA_AALG_NONE) {
1541 		/* authentication passed if we reach this point */
1542 		ESP_BUMP_STAT(good_auth);
1543 		data_mp->b_wptr -= assoc->ipsa_mac_len;
1544 
1545 		/*
1546 		 * Check replay window here!
1547 		 * For right now, assume keysock will set the replay window
1548 		 * size to zero for SAs that have an unspecified sender.
1549 		 * This may change...
1550 		 */
1551 
1552 		if (!sadb_replay_check(assoc, esph->esph_replay)) {
1553 			/*
1554 			 * Log the event. As of now we print out an event.
1555 			 * Do not print the replay failure number, or else
1556 			 * syslog cannot collate the error messages.  Printing
1557 			 * the replay number that failed opens a denial-of-
1558 			 * service attack.
1559 			 */
1560 			ipsec_assocfailure(info.mi_idnum, 0, 0,
1561 			    SL_ERROR | SL_WARN,
1562 			    "Replay failed for ESP spi 0x%x, dst %s.\n",
1563 			    assoc->ipsa_spi, assoc->ipsa_dstaddr,
1564 			    assoc->ipsa_addrfam);
1565 			ESP_BUMP_STAT(replay_failures);
1566 			counter = &ipdrops_esp_replay;
1567 			goto drop_and_bail;
1568 		}
1569 	}
1570 
1571 	if (!esp_age_bytes(assoc, processed_len, B_TRUE)) {
1572 		/* The ipsa has hit hard expiration, LOG and AUDIT. */
1573 		ipsec_assocfailure(info.mi_idnum, 0, 0,
1574 		    SL_ERROR | SL_WARN,
1575 		    "ESP association 0x%x, dst %s had bytes expire.\n",
1576 		    assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam);
1577 		ESP_BUMP_STAT(bytes_expired);
1578 		counter = &ipdrops_esp_bytes_expire;
1579 		goto drop_and_bail;
1580 	}
1581 
1582 	/*
1583 	 * Remove ESP header and padding from packet.  I hope the compiler
1584 	 * spews "branch, predict taken" code for this.
1585 	 */
1586 
1587 	if (esp_strip_header(data_mp, ii->ipsec_in_v4, ivlen, &counter)) {
1588 		if (is_natt)
1589 			return (esp_fix_natt_checksums(data_mp, assoc));
1590 		return (IPSEC_STATUS_SUCCESS);
1591 	}
1592 
1593 	esp1dbg(("esp_in_done: esp_strip_header() failed\n"));
1594 drop_and_bail:
1595 	IP_ESP_BUMP_STAT(in_discards);
1596 	/*
1597 	 * TODO: Extract inbound interface from the IPSEC_IN message's
1598 	 * ii->ipsec_in_rill_index.
1599 	 */
1600 	ip_drop_packet(ipsec_in_mp, B_TRUE, NULL, NULL, counter, &esp_dropper);
1601 	return (IPSEC_STATUS_FAILED);
1602 }
1603 
1604 /*
1605  * Called upon failing the inbound ICV check. The message passed as
1606  * argument is freed.
1607  */
1608 static void
1609 esp_log_bad_auth(mblk_t *ipsec_in)
1610 {
1611 	ipsec_in_t *ii = (ipsec_in_t *)ipsec_in->b_rptr;
1612 	ipsa_t *assoc = ii->ipsec_in_esp_sa;
1613 
1614 	/*
1615 	 * Log the event. Don't print to the console, block
1616 	 * potential denial-of-service attack.
1617 	 */
1618 	ESP_BUMP_STAT(bad_auth);
1619 
1620 	ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN,
1621 	    "ESP Authentication failed for spi 0x%x, dst %s.\n",
1622 	    assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam);
1623 
1624 	IP_ESP_BUMP_STAT(in_discards);
1625 	/*
1626 	 * TODO: Extract inbound interface from the IPSEC_IN
1627 	 * message's ii->ipsec_in_rill_index.
1628 	 */
1629 	ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL, &ipdrops_esp_bad_auth,
1630 	    &esp_dropper);
1631 }
1632 
1633 
1634 /*
1635  * Invoked for outbound packets after ESP processing. If the packet
1636  * also requires AH, performs the AH SA selection and AH processing.
1637  * Returns B_TRUE if the AH processing was not needed or if it was
1638  * performed successfully. Returns B_FALSE and consumes the passed mblk
1639  * if AH processing was required but could not be performed.
1640  */
1641 static boolean_t
1642 esp_do_outbound_ah(mblk_t *ipsec_mp)
1643 {
1644 	ipsec_out_t *io = (ipsec_out_t *)ipsec_mp->b_rptr;
1645 	ipsec_status_t ipsec_rc;
1646 	ipsec_action_t *ap;
1647 
1648 	ap = io->ipsec_out_act;
1649 	if (ap == NULL) {
1650 		ipsec_policy_t *pp = io->ipsec_out_policy;
1651 		ap = pp->ipsp_act;
1652 	}
1653 
1654 	if (!ap->ipa_want_ah)
1655 		return (B_TRUE);
1656 
1657 	ASSERT(io->ipsec_out_ah_done == B_FALSE);
1658 
1659 	if (io->ipsec_out_ah_sa == NULL) {
1660 		if (!ipsec_outbound_sa(ipsec_mp, IPPROTO_AH)) {
1661 			sadb_acquire(ipsec_mp, io, B_TRUE, B_FALSE);
1662 			return (B_FALSE);
1663 		}
1664 	}
1665 	ASSERT(io->ipsec_out_ah_sa != NULL);
1666 
1667 	io->ipsec_out_ah_done = B_TRUE;
1668 	ipsec_rc = io->ipsec_out_ah_sa->ipsa_output_func(ipsec_mp);
1669 	return (ipsec_rc == IPSEC_STATUS_SUCCESS);
1670 }
1671 
1672 
1673 /*
1674  * Kernel crypto framework callback invoked after completion of async
1675  * crypto requests.
1676  */
1677 static void
1678 esp_kcf_callback(void *arg, int status)
1679 {
1680 	mblk_t *ipsec_mp = (mblk_t *)arg;
1681 	ipsec_in_t *ii = (ipsec_in_t *)ipsec_mp->b_rptr;
1682 	boolean_t is_inbound = (ii->ipsec_in_type == IPSEC_IN);
1683 
1684 	ASSERT(ipsec_mp->b_cont != NULL);
1685 
1686 	if (status == CRYPTO_SUCCESS) {
1687 		if (is_inbound) {
1688 			if (esp_in_done(ipsec_mp) != IPSEC_STATUS_SUCCESS)
1689 				return;
1690 
1691 			/* finish IPsec processing */
1692 			ip_fanout_proto_again(ipsec_mp, NULL, NULL, NULL);
1693 		} else {
1694 			/*
1695 			 * If a ICV was computed, it was stored by the
1696 			 * crypto framework at the end of the packet.
1697 			 */
1698 			ipha_t *ipha = (ipha_t *)ipsec_mp->b_cont->b_rptr;
1699 
1700 			/* do AH processing if needed */
1701 			if (!esp_do_outbound_ah(ipsec_mp))
1702 				return;
1703 
1704 			/* finish IPsec processing */
1705 			if (IPH_HDR_VERSION(ipha) == IP_VERSION) {
1706 				ip_wput_ipsec_out(NULL, ipsec_mp, ipha, NULL,
1707 				    NULL);
1708 			} else {
1709 				ip6_t *ip6h = (ip6_t *)ipha;
1710 				ip_wput_ipsec_out_v6(NULL, ipsec_mp, ip6h,
1711 				    NULL, NULL);
1712 			}
1713 		}
1714 
1715 	} else if (status == CRYPTO_INVALID_MAC) {
1716 		esp_log_bad_auth(ipsec_mp);
1717 
1718 	} else {
1719 		esp1dbg(("esp_kcf_callback: crypto failed with 0x%x\n",
1720 		    status));
1721 		ESP_BUMP_STAT(crypto_failures);
1722 		if (is_inbound)
1723 			IP_ESP_BUMP_STAT(in_discards);
1724 		else
1725 			ESP_BUMP_STAT(out_discards);
1726 		ip_drop_packet(ipsec_mp, is_inbound, NULL, NULL,
1727 		    &ipdrops_esp_crypto_failed, &esp_dropper);
1728 	}
1729 }
1730 
1731 /*
1732  * Invoked on crypto framework failure during inbound and outbound processing.
1733  */
1734 static void
1735 esp_crypto_failed(mblk_t *mp, boolean_t is_inbound, int kef_rc)
1736 {
1737 	esp1dbg(("crypto failed for %s ESP with 0x%x\n",
1738 	    is_inbound ? "inbound" : "outbound", kef_rc));
1739 	ip_drop_packet(mp, is_inbound, NULL, NULL, &ipdrops_esp_crypto_failed,
1740 	    &esp_dropper);
1741 	ESP_BUMP_STAT(crypto_failures);
1742 	if (is_inbound)
1743 		IP_ESP_BUMP_STAT(in_discards);
1744 	else
1745 		ESP_BUMP_STAT(out_discards);
1746 }
1747 
1748 #define	ESP_INIT_CALLREQ(_cr) {						\
1749 	(_cr)->cr_flag = CRYPTO_SKIP_REQID|CRYPTO_RESTRICTED;		\
1750 	(_cr)->cr_callback_arg = ipsec_mp;				\
1751 	(_cr)->cr_callback_func = esp_kcf_callback;			\
1752 }
1753 
1754 #define	ESP_INIT_CRYPTO_MAC(mac, icvlen, icvbuf) {			\
1755 	(mac)->cd_format = CRYPTO_DATA_RAW;				\
1756 	(mac)->cd_offset = 0;						\
1757 	(mac)->cd_length = icvlen;					\
1758 	(mac)->cd_raw.iov_base = (char *)icvbuf;			\
1759 	(mac)->cd_raw.iov_len = icvlen;					\
1760 }
1761 
1762 #define	ESP_INIT_CRYPTO_DATA(data, mp, off, len) {			\
1763 	if (MBLKL(mp) >= (len) + (off)) {				\
1764 		(data)->cd_format = CRYPTO_DATA_RAW;			\
1765 		(data)->cd_raw.iov_base = (char *)(mp)->b_rptr;		\
1766 		(data)->cd_raw.iov_len = MBLKL(mp);			\
1767 		(data)->cd_offset = off;				\
1768 	} else {							\
1769 		(data)->cd_format = CRYPTO_DATA_MBLK;			\
1770 		(data)->cd_mp = mp;			       		\
1771 		(data)->cd_offset = off;				\
1772 	}								\
1773 	(data)->cd_length = len;					\
1774 }
1775 
1776 #define	ESP_INIT_CRYPTO_DUAL_DATA(data, mp, off1, len1, off2, len2) {	\
1777 	(data)->dd_format = CRYPTO_DATA_MBLK;				\
1778 	(data)->dd_mp = mp;						\
1779 	(data)->dd_len1 = len1;						\
1780 	(data)->dd_offset1 = off1;					\
1781 	(data)->dd_len2 = len2;						\
1782 	(data)->dd_offset2 = off2;					\
1783 }
1784 
1785 static ipsec_status_t
1786 esp_submit_req_inbound(mblk_t *ipsec_mp, ipsa_t *assoc, uint_t esph_offset)
1787 {
1788 	ipsec_in_t *ii = (ipsec_in_t *)ipsec_mp->b_rptr;
1789 	boolean_t do_auth;
1790 	uint_t auth_offset, msg_len, auth_len;
1791 	crypto_call_req_t call_req;
1792 	mblk_t *esp_mp;
1793 	int kef_rc = CRYPTO_FAILED;
1794 	uint_t icv_len = assoc->ipsa_mac_len;
1795 	crypto_ctx_template_t auth_ctx_tmpl;
1796 	boolean_t do_encr;
1797 	uint_t encr_offset, encr_len;
1798 	uint_t iv_len = assoc->ipsa_iv_len;
1799 	crypto_ctx_template_t encr_ctx_tmpl;
1800 
1801 	ASSERT(ii->ipsec_in_type == IPSEC_IN);
1802 
1803 	do_auth = assoc->ipsa_auth_alg != SADB_AALG_NONE;
1804 	do_encr = assoc->ipsa_encr_alg != SADB_EALG_NULL;
1805 
1806 	/*
1807 	 * An inbound packet is of the form:
1808 	 * IPSEC_IN -> [IP,options,ESP,IV,data,ICV,pad]
1809 	 */
1810 	esp_mp = ipsec_mp->b_cont;
1811 	msg_len = MBLKL(esp_mp);
1812 
1813 	ESP_INIT_CALLREQ(&call_req);
1814 
1815 	if (do_auth) {
1816 		/* force asynchronous processing? */
1817 		if (ipsec_algs_exec_mode[IPSEC_ALG_AUTH] ==
1818 		    IPSEC_ALGS_EXEC_ASYNC)
1819 			call_req.cr_flag |= CRYPTO_ALWAYS_QUEUE;
1820 
1821 		/* authentication context template */
1822 		IPSEC_CTX_TMPL(assoc, ipsa_authtmpl, IPSEC_ALG_AUTH,
1823 		    auth_ctx_tmpl);
1824 
1825 		/* ICV to be verified */
1826 		ESP_INIT_CRYPTO_MAC(&ii->ipsec_in_crypto_mac,
1827 		    icv_len, esp_mp->b_wptr - icv_len);
1828 
1829 		/* authentication starts at the ESP header */
1830 		auth_offset = esph_offset;
1831 		auth_len = msg_len - auth_offset - icv_len;
1832 		if (!do_encr) {
1833 			/* authentication only */
1834 			/* initialize input data argument */
1835 			ESP_INIT_CRYPTO_DATA(&ii->ipsec_in_crypto_data,
1836 			    esp_mp, auth_offset, auth_len);
1837 
1838 			/* call the crypto framework */
1839 			kef_rc = crypto_mac_verify(&assoc->ipsa_amech,
1840 			    &ii->ipsec_in_crypto_data,
1841 			    &assoc->ipsa_kcfauthkey, auth_ctx_tmpl,
1842 			    &ii->ipsec_in_crypto_mac, &call_req);
1843 		}
1844 	}
1845 
1846 	if (do_encr) {
1847 		/* force asynchronous processing? */
1848 		if (ipsec_algs_exec_mode[IPSEC_ALG_ENCR] ==
1849 		    IPSEC_ALGS_EXEC_ASYNC)
1850 			call_req.cr_flag |= CRYPTO_ALWAYS_QUEUE;
1851 
1852 		/* encryption template */
1853 		IPSEC_CTX_TMPL(assoc, ipsa_encrtmpl, IPSEC_ALG_ENCR,
1854 		    encr_ctx_tmpl);
1855 
1856 		/* skip IV, since it is passed separately */
1857 		encr_offset = esph_offset + sizeof (esph_t) + iv_len;
1858 		encr_len = msg_len - encr_offset;
1859 
1860 		if (!do_auth) {
1861 			/* decryption only */
1862 			/* initialize input data argument */
1863 			ESP_INIT_CRYPTO_DATA(&ii->ipsec_in_crypto_data,
1864 			    esp_mp, encr_offset, encr_len);
1865 
1866 			/* specify IV */
1867 			ii->ipsec_in_crypto_data.cd_miscdata =
1868 			    (char *)esp_mp->b_rptr + sizeof (esph_t) +
1869 			    esph_offset;
1870 
1871 			/* call the crypto framework */
1872 			kef_rc = crypto_decrypt(&assoc->ipsa_emech,
1873 			    &ii->ipsec_in_crypto_data,
1874 			    &assoc->ipsa_kcfencrkey, encr_ctx_tmpl,
1875 			    NULL, &call_req);
1876 		}
1877 	}
1878 
1879 	if (do_auth && do_encr) {
1880 		/* dual operation */
1881 		/* initialize input data argument */
1882 		ESP_INIT_CRYPTO_DUAL_DATA(&ii->ipsec_in_crypto_dual_data,
1883 		    esp_mp, auth_offset, auth_len,
1884 		    encr_offset, encr_len - icv_len);
1885 
1886 		/* specify IV */
1887 		ii->ipsec_in_crypto_dual_data.dd_miscdata =
1888 		    (char *)esp_mp->b_rptr + sizeof (esph_t) + esph_offset;
1889 
1890 		/* call the framework */
1891 		kef_rc = crypto_mac_verify_decrypt(&assoc->ipsa_amech,
1892 		    &assoc->ipsa_emech, &ii->ipsec_in_crypto_dual_data,
1893 		    &assoc->ipsa_kcfauthkey, &assoc->ipsa_kcfencrkey,
1894 		    auth_ctx_tmpl, encr_ctx_tmpl, &ii->ipsec_in_crypto_mac,
1895 		    NULL, &call_req);
1896 	}
1897 
1898 	switch (kef_rc) {
1899 	case CRYPTO_SUCCESS:
1900 		ESP_BUMP_STAT(crypto_sync);
1901 		return (esp_in_done(ipsec_mp));
1902 	case CRYPTO_QUEUED:
1903 		/* esp_kcf_callback() will be invoked on completion */
1904 		ESP_BUMP_STAT(crypto_async);
1905 		return (IPSEC_STATUS_PENDING);
1906 	case CRYPTO_INVALID_MAC:
1907 		ESP_BUMP_STAT(crypto_sync);
1908 		esp_log_bad_auth(ipsec_mp);
1909 		return (IPSEC_STATUS_FAILED);
1910 	}
1911 
1912 	esp_crypto_failed(ipsec_mp, B_TRUE, kef_rc);
1913 	return (IPSEC_STATUS_FAILED);
1914 }
1915 
1916 static ipsec_status_t
1917 esp_submit_req_outbound(mblk_t *ipsec_mp, ipsa_t *assoc, uchar_t *icv_buf,
1918     uint_t payload_len)
1919 {
1920 	ipsec_out_t *io = (ipsec_out_t *)ipsec_mp->b_rptr;
1921 	uint_t auth_len;
1922 	crypto_call_req_t call_req;
1923 	mblk_t *esp_mp;
1924 	int kef_rc = CRYPTO_FAILED;
1925 	uint_t icv_len = assoc->ipsa_mac_len;
1926 	crypto_ctx_template_t auth_ctx_tmpl;
1927 	boolean_t do_auth;
1928 	boolean_t do_encr;
1929 	uint_t iv_len = assoc->ipsa_iv_len;
1930 	crypto_ctx_template_t encr_ctx_tmpl;
1931 	boolean_t is_natt = ((assoc->ipsa_flags & IPSA_F_NATT) != 0);
1932 	size_t esph_offset = (is_natt ? UDPH_SIZE : 0);
1933 
1934 	esp3dbg(("esp_submit_req_outbound:%s", is_natt ? "natt" : "not natt"));
1935 
1936 	ASSERT(io->ipsec_out_type == IPSEC_OUT);
1937 
1938 	do_encr = assoc->ipsa_encr_alg != SADB_EALG_NULL;
1939 	do_auth = assoc->ipsa_auth_alg != SADB_AALG_NONE;
1940 
1941 	/*
1942 	 * Outbound IPsec packets are of the form:
1943 	 * IPSEC_OUT -> [IP,options] -> [ESP,IV] -> [data] -> [pad,ICV]
1944 	 * unless it's NATT, then it's
1945 	 * IPSEC_OUT -> [IP,options] -> [udp][ESP,IV] -> [data] -> [pad,ICV]
1946 	 * Get a pointer to the mblk containing the ESP header.
1947 	 */
1948 	ASSERT(ipsec_mp->b_cont != NULL && ipsec_mp->b_cont->b_cont != NULL);
1949 	esp_mp = ipsec_mp->b_cont->b_cont;
1950 
1951 	ESP_INIT_CALLREQ(&call_req);
1952 
1953 	if (do_auth) {
1954 		/* force asynchronous processing? */
1955 		if (ipsec_algs_exec_mode[IPSEC_ALG_AUTH] ==
1956 		    IPSEC_ALGS_EXEC_ASYNC)
1957 			call_req.cr_flag |= CRYPTO_ALWAYS_QUEUE;
1958 
1959 		/* authentication context template */
1960 		IPSEC_CTX_TMPL(assoc, ipsa_authtmpl, IPSEC_ALG_AUTH,
1961 		    auth_ctx_tmpl);
1962 
1963 		/* where to store the computed mac */
1964 		ESP_INIT_CRYPTO_MAC(&io->ipsec_out_crypto_mac,
1965 		    icv_len, icv_buf);
1966 
1967 		/* authentication starts at the ESP header */
1968 		auth_len = payload_len + iv_len + sizeof (esph_t);
1969 		if (!do_encr) {
1970 			/* authentication only */
1971 			/* initialize input data argument */
1972 			ESP_INIT_CRYPTO_DATA(&io->ipsec_out_crypto_data,
1973 			    esp_mp, esph_offset, auth_len);
1974 
1975 			/* call the crypto framework */
1976 			kef_rc = crypto_mac(&assoc->ipsa_amech,
1977 			    &io->ipsec_out_crypto_data,
1978 			    &assoc->ipsa_kcfauthkey, auth_ctx_tmpl,
1979 			    &io->ipsec_out_crypto_mac, &call_req);
1980 		}
1981 	}
1982 
1983 	if (do_encr) {
1984 		/* force asynchronous processing? */
1985 		if (ipsec_algs_exec_mode[IPSEC_ALG_ENCR] ==
1986 		    IPSEC_ALGS_EXEC_ASYNC)
1987 			call_req.cr_flag |= CRYPTO_ALWAYS_QUEUE;
1988 
1989 		/* encryption context template */
1990 		IPSEC_CTX_TMPL(assoc, ipsa_encrtmpl, IPSEC_ALG_ENCR,
1991 		    encr_ctx_tmpl);
1992 
1993 		if (!do_auth) {
1994 			/* encryption only, skip mblk that contains ESP hdr */
1995 			/* initialize input data argument */
1996 			ESP_INIT_CRYPTO_DATA(&io->ipsec_out_crypto_data,
1997 			    esp_mp->b_cont, 0, payload_len);
1998 
1999 			/* specify IV */
2000 			io->ipsec_out_crypto_data.cd_miscdata =
2001 			    (char *)esp_mp->b_rptr + sizeof (esph_t) +
2002 			    esph_offset;
2003 
2004 			/* call the crypto framework */
2005 			kef_rc = crypto_encrypt(&assoc->ipsa_emech,
2006 			    &io->ipsec_out_crypto_data,
2007 			    &assoc->ipsa_kcfencrkey, encr_ctx_tmpl,
2008 			    NULL, &call_req);
2009 		}
2010 	}
2011 
2012 	if (do_auth && do_encr) {
2013 		/*
2014 		 * Encryption and authentication:
2015 		 * Pass the pointer to the mblk chain starting at the ESP
2016 		 * header to the framework. Skip the ESP header mblk
2017 		 * for encryption, which is reflected by an encryption
2018 		 * offset equal to the length of that mblk. Start
2019 		 * the authentication at the ESP header, i.e. use an
2020 		 * authentication offset of zero.
2021 		 */
2022 		ESP_INIT_CRYPTO_DUAL_DATA(&io->ipsec_out_crypto_dual_data,
2023 		    esp_mp, MBLKL(esp_mp), payload_len, esph_offset, auth_len);
2024 
2025 		/* specify IV */
2026 		io->ipsec_out_crypto_dual_data.dd_miscdata =
2027 		    (char *)esp_mp->b_rptr + sizeof (esph_t) + esph_offset;
2028 
2029 		/* call the framework */
2030 		kef_rc = crypto_encrypt_mac(&assoc->ipsa_emech,
2031 		    &assoc->ipsa_amech, NULL,
2032 		    &assoc->ipsa_kcfencrkey, &assoc->ipsa_kcfauthkey,
2033 		    encr_ctx_tmpl, auth_ctx_tmpl,
2034 		    &io->ipsec_out_crypto_dual_data,
2035 		    &io->ipsec_out_crypto_mac, &call_req);
2036 	}
2037 
2038 	switch (kef_rc) {
2039 	case CRYPTO_SUCCESS:
2040 		ESP_BUMP_STAT(crypto_sync);
2041 		return (IPSEC_STATUS_SUCCESS);
2042 	case CRYPTO_QUEUED:
2043 		/* esp_kcf_callback() will be invoked on completion */
2044 		ESP_BUMP_STAT(crypto_async);
2045 		return (IPSEC_STATUS_PENDING);
2046 	}
2047 
2048 	esp_crypto_failed(ipsec_mp, B_TRUE, kef_rc);
2049 	return (IPSEC_STATUS_FAILED);
2050 }
2051 
2052 /*
2053  * Handle outbound IPsec processing for IPv4 and IPv6
2054  * On success returns B_TRUE, on failure returns B_FALSE and frees the
2055  * mblk chain ipsec_in_mp.
2056  */
2057 static ipsec_status_t
2058 esp_outbound(mblk_t *mp)
2059 {
2060 	mblk_t *ipsec_out_mp, *data_mp, *espmp, *tailmp;
2061 	ipsec_out_t *io;
2062 	ipha_t *ipha;
2063 	ip6_t *ip6h;
2064 	esph_t *esph;
2065 	uint_t af;
2066 	uint8_t *nhp;
2067 	uintptr_t divpoint, datalen, adj, padlen, i, alloclen;
2068 	uintptr_t esplen = sizeof (esph_t);
2069 	uint8_t protocol;
2070 	ipsa_t *assoc;
2071 	uint_t iv_len, mac_len = 0;
2072 	uchar_t *icv_buf;
2073 	udpha_t *udpha;
2074 	boolean_t is_natt = B_FALSE;
2075 
2076 	ESP_BUMP_STAT(out_requests);
2077 
2078 	ipsec_out_mp = mp;
2079 	data_mp = ipsec_out_mp->b_cont;
2080 
2081 	/*
2082 	 * <sigh> We have to copy the message here, because TCP (for example)
2083 	 * keeps a dupb() of the message lying around for retransmission.
2084 	 * Since ESP changes the whole of the datagram, we have to create our
2085 	 * own copy lest we clobber TCP's data.  Since we have to copy anyway,
2086 	 * we might as well make use of msgpullup() and get the mblk into one
2087 	 * contiguous piece!
2088 	 */
2089 	ipsec_out_mp->b_cont = msgpullup(data_mp, -1);
2090 	if (ipsec_out_mp->b_cont == NULL) {
2091 		esp0dbg(("esp_outbound: msgpullup() failed, "
2092 		    "dropping packet.\n"));
2093 		ipsec_out_mp->b_cont = data_mp;
2094 		/*
2095 		 * TODO:  Find the outbound IRE for this packet and
2096 		 * pass it to ip_drop_packet().
2097 		 */
2098 		ip_drop_packet(ipsec_out_mp, B_FALSE, NULL, NULL,
2099 		    &ipdrops_esp_nomem, &esp_dropper);
2100 		return (IPSEC_STATUS_FAILED);
2101 	} else {
2102 		freemsg(data_mp);
2103 		data_mp = ipsec_out_mp->b_cont;
2104 	}
2105 
2106 	io = (ipsec_out_t *)ipsec_out_mp->b_rptr;
2107 
2108 	/*
2109 	 * Reality check....
2110 	 */
2111 
2112 	ipha = (ipha_t *)data_mp->b_rptr;  /* So we can call esp_acquire(). */
2113 
2114 	if (io->ipsec_out_v4) {
2115 		af = AF_INET;
2116 		divpoint = IPH_HDR_LENGTH(ipha);
2117 		datalen = ntohs(ipha->ipha_length) - divpoint;
2118 		nhp = (uint8_t *)&ipha->ipha_protocol;
2119 	} else {
2120 		ip6_pkt_t ipp;
2121 
2122 		af = AF_INET6;
2123 		ip6h = (ip6_t *)ipha;
2124 		bzero(&ipp, sizeof (ipp));
2125 		divpoint = ip_find_hdr_v6(data_mp, ip6h, &ipp, NULL);
2126 		if (ipp.ipp_dstopts != NULL &&
2127 		    ipp.ipp_dstopts->ip6d_nxt != IPPROTO_ROUTING) {
2128 			/*
2129 			 * Destination options are tricky.  If we get in here,
2130 			 * then we have a terminal header following the
2131 			 * destination options.  We need to adjust backwards
2132 			 * so we insert ESP BEFORE the destination options
2133 			 * bag.  (So that the dstopts get encrypted!)
2134 			 *
2135 			 * Since this is for outbound packets only, we know
2136 			 * that non-terminal destination options only precede
2137 			 * routing headers.
2138 			 */
2139 			divpoint -= ipp.ipp_dstoptslen;
2140 		}
2141 		datalen = ntohs(ip6h->ip6_plen) + sizeof (ip6_t) - divpoint;
2142 
2143 		if (ipp.ipp_rthdr != NULL) {
2144 			nhp = &ipp.ipp_rthdr->ip6r_nxt;
2145 		} else if (ipp.ipp_hopopts != NULL) {
2146 			nhp = &ipp.ipp_hopopts->ip6h_nxt;
2147 		} else {
2148 			ASSERT(divpoint == sizeof (ip6_t));
2149 			/* It's probably IP + ESP. */
2150 			nhp = &ip6h->ip6_nxt;
2151 		}
2152 	}
2153 	assoc = io->ipsec_out_esp_sa;
2154 	ASSERT(assoc != NULL);
2155 
2156 	if (assoc->ipsa_usetime == 0)
2157 		esp_set_usetime(assoc, B_FALSE);
2158 
2159 	if (assoc->ipsa_auth_alg != SADB_AALG_NONE)
2160 		mac_len = assoc->ipsa_mac_len;
2161 
2162 	if (assoc->ipsa_flags & IPSA_F_NATT) {
2163 		/* wedge in fake UDP */
2164 		is_natt = B_TRUE;
2165 		esplen += UDPH_SIZE;
2166 	}
2167 
2168 	/*
2169 	 * Set up ESP header and encryption padding for ENCR PI request.
2170 	 */
2171 
2172 	/* Determine the padding length.  Pad to 4-bytes for no-encryption. */
2173 	if (assoc->ipsa_encr_alg != SADB_EALG_NULL) {
2174 		iv_len = assoc->ipsa_iv_len;
2175 
2176 		/*
2177 		 * Include the two additional bytes (hence the - 2) for the
2178 		 * padding length and the next header.  Take this into account
2179 		 * when calculating the actual length of the padding.
2180 		 */
2181 		ASSERT(ISP2(iv_len));
2182 		padlen = ((unsigned)(iv_len - datalen - 2)) & (iv_len - 1);
2183 	} else {
2184 		iv_len = 0;
2185 		padlen = ((unsigned)(sizeof (uint32_t) - datalen - 2)) &
2186 		    (sizeof (uint32_t) - 1);
2187 	}
2188 
2189 	/* Allocate ESP header and IV. */
2190 	esplen += iv_len;
2191 
2192 	/*
2193 	 * Update association byte-count lifetimes.  Don't forget to take
2194 	 * into account the padding length and next-header (hence the + 2).
2195 	 *
2196 	 * Use the amount of data fed into the "encryption algorithm".  This
2197 	 * is the IV, the data length, the padding length, and the final two
2198 	 * bytes (padlen, and next-header).
2199 	 *
2200 	 */
2201 
2202 	if (!esp_age_bytes(assoc, datalen + padlen + iv_len + 2, B_FALSE)) {
2203 		/*
2204 		 * TODO:  Find the outbound IRE for this packet and
2205 		 * pass it to ip_drop_packet().
2206 		 */
2207 		ip_drop_packet(mp, B_FALSE, NULL, NULL,
2208 		    &ipdrops_esp_bytes_expire, &esp_dropper);
2209 		return (IPSEC_STATUS_FAILED);
2210 	}
2211 
2212 	espmp = allocb(esplen, BPRI_HI);
2213 	if (espmp == NULL) {
2214 		ESP_BUMP_STAT(out_discards);
2215 		esp1dbg(("esp_outbound: can't allocate espmp.\n"));
2216 		/*
2217 		 * TODO:  Find the outbound IRE for this packet and
2218 		 * pass it to ip_drop_packet().
2219 		 */
2220 		ip_drop_packet(mp, B_FALSE, NULL, NULL, &ipdrops_esp_nomem,
2221 		    &esp_dropper);
2222 		return (IPSEC_STATUS_FAILED);
2223 	}
2224 	espmp->b_wptr += esplen;
2225 	esph = (esph_t *)espmp->b_rptr;
2226 
2227 	if (is_natt) {
2228 		esp3dbg(("esp_outbound: NATT"));
2229 
2230 		udpha = (udpha_t *)espmp->b_rptr;
2231 		udpha->uha_src_port = htons(IPPORT_IKE_NATT);
2232 		if (assoc->ipsa_remote_port != 0)
2233 			udpha->uha_dst_port = assoc->ipsa_remote_port;
2234 		else
2235 			udpha->uha_dst_port = htons(IPPORT_IKE_NATT);
2236 		/*
2237 		 * Set the checksum to 0, so that the ip_wput_ipsec_out()
2238 		 * can do the right thing.
2239 		 */
2240 		udpha->uha_checksum = 0;
2241 		esph = (esph_t *)(udpha + 1);
2242 	}
2243 
2244 	esph->esph_spi = assoc->ipsa_spi;
2245 
2246 	esph->esph_replay = htonl(atomic_add_32_nv(&assoc->ipsa_replay, 1));
2247 	if (esph->esph_replay == 0 && assoc->ipsa_replay_wsize != 0) {
2248 		/*
2249 		 * XXX We have replay counter wrapping.
2250 		 * We probably want to nuke this SA (and its peer).
2251 		 */
2252 		ipsec_assocfailure(info.mi_idnum, 0, 0,
2253 		    SL_ERROR | SL_CONSOLE | SL_WARN,
2254 		    "Outbound ESP SA (0x%x, %s) has wrapped sequence.\n",
2255 		    esph->esph_spi, assoc->ipsa_dstaddr, af);
2256 
2257 		ESP_BUMP_STAT(out_discards);
2258 		sadb_replay_delete(assoc);
2259 		/*
2260 		 * TODO:  Find the outbound IRE for this packet and
2261 		 * pass it to ip_drop_packet().
2262 		 */
2263 		ip_drop_packet(mp, B_FALSE, NULL, NULL, &ipdrops_esp_replay,
2264 		    &esp_dropper);
2265 		return (IPSEC_STATUS_FAILED);
2266 	}
2267 
2268 	/*
2269 	 * Set the IV to a random quantity.  We do not require the
2270 	 * highest quality random bits, but for best security with CBC
2271 	 * mode ciphers, the value must be unlikely to repeat and also
2272 	 * must not be known in advance to an adversary capable of
2273 	 * influencing the plaintext.
2274 	 */
2275 	(void) random_get_pseudo_bytes((uint8_t *)(esph + 1), iv_len);
2276 
2277 	/* Fix the IP header. */
2278 	alloclen = padlen + 2 + mac_len;
2279 	adj = alloclen + (espmp->b_wptr - espmp->b_rptr);
2280 
2281 	protocol = *nhp;
2282 
2283 	if (io->ipsec_out_v4) {
2284 		ipha->ipha_length = htons(ntohs(ipha->ipha_length) + adj);
2285 		if (is_natt) {
2286 			*nhp = IPPROTO_UDP;
2287 			udpha->uha_length = htons(ntohs(ipha->ipha_length) -
2288 			    IPH_HDR_LENGTH(ipha));
2289 		} else {
2290 			*nhp = IPPROTO_ESP;
2291 		}
2292 		ipha->ipha_hdr_checksum = 0;
2293 		ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha);
2294 	} else {
2295 		ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) + adj);
2296 		*nhp = IPPROTO_ESP;
2297 	}
2298 
2299 	/* I've got the two ESP mblks, now insert them. */
2300 
2301 	esp2dbg(("data_mp before outbound ESP adjustment:\n"));
2302 	esp2dbg((dump_msg(data_mp)));
2303 
2304 	if (!esp_insert_esp(data_mp, espmp, divpoint)) {
2305 		ESP_BUMP_STAT(out_discards);
2306 		/* NOTE:  esp_insert_esp() only fails if there's no memory. */
2307 		/*
2308 		 * TODO:  Find the outbound IRE for this packet and
2309 		 * pass it to ip_drop_packet().
2310 		 */
2311 		ip_drop_packet(mp, B_FALSE, NULL, NULL, &ipdrops_esp_nomem,
2312 		    &esp_dropper);
2313 		freeb(espmp);
2314 		return (IPSEC_STATUS_FAILED);
2315 	}
2316 
2317 	/* Append padding (and leave room for ICV). */
2318 	for (tailmp = data_mp; tailmp->b_cont != NULL; tailmp = tailmp->b_cont)
2319 		;
2320 	if (tailmp->b_wptr + alloclen > tailmp->b_datap->db_lim) {
2321 		tailmp->b_cont = allocb(alloclen, BPRI_HI);
2322 		if (tailmp->b_cont == NULL) {
2323 			ESP_BUMP_STAT(out_discards);
2324 			esp0dbg(("esp_outbound:  Can't allocate tailmp.\n"));
2325 			/*
2326 			 * TODO:  Find the outbound IRE for this packet and
2327 			 * pass it to ip_drop_packet().
2328 			 */
2329 			ip_drop_packet(mp, B_FALSE, NULL, NULL,
2330 			    &ipdrops_esp_nomem, &esp_dropper);
2331 			return (IPSEC_STATUS_FAILED);
2332 		}
2333 		tailmp = tailmp->b_cont;
2334 	}
2335 
2336 	/*
2337 	 * If there's padding, N bytes of padding must be of the form 0x1,
2338 	 * 0x2, 0x3... 0xN.
2339 	 */
2340 	for (i = 0; i < padlen; ) {
2341 		i++;
2342 		*tailmp->b_wptr++ = i;
2343 	}
2344 	*tailmp->b_wptr++ = i;
2345 	*tailmp->b_wptr++ = protocol;
2346 
2347 	esp2dbg(("data_Mp before encryption:\n"));
2348 	esp2dbg((dump_msg(data_mp)));
2349 
2350 	/*
2351 	 * The packet is eligible for hardware acceleration if the
2352 	 * following conditions are satisfied:
2353 	 *
2354 	 * 1. the packet will not be fragmented
2355 	 * 2. the provider supports the algorithms specified by SA
2356 	 * 3. there is no pending control message being exchanged
2357 	 * 4. snoop is not attached
2358 	 * 5. the destination address is not a multicast address
2359 	 *
2360 	 * All five of these conditions are checked by IP prior to
2361 	 * sending the packet to ESP.
2362 	 *
2363 	 * But We, and We Alone, can, nay MUST check if the packet
2364 	 * is over NATT, and then disqualify it from hardware
2365 	 * acceleration.
2366 	 */
2367 
2368 	if (io->ipsec_out_is_capab_ill && !(assoc->ipsa_flags & IPSA_F_NATT)) {
2369 		return (esp_outbound_accelerated(ipsec_out_mp, mac_len));
2370 	}
2371 	ESP_BUMP_STAT(noaccel);
2372 
2373 	/*
2374 	 * Okay.  I've set up the pre-encryption ESP.  Let's do it!
2375 	 */
2376 
2377 	if (mac_len > 0) {
2378 		ASSERT(tailmp->b_wptr + mac_len <= tailmp->b_datap->db_lim);
2379 		icv_buf = tailmp->b_wptr;
2380 		tailmp->b_wptr += mac_len;
2381 	} else {
2382 		icv_buf = NULL;
2383 	}
2384 
2385 	return (esp_submit_req_outbound(ipsec_out_mp, assoc, icv_buf,
2386 	    datalen + padlen + 2));
2387 }
2388 
2389 /*
2390  * IP calls this to validate the ICMP errors that
2391  * we got from the network.
2392  */
2393 ipsec_status_t
2394 ipsecesp_icmp_error(mblk_t *ipsec_mp)
2395 {
2396 	/*
2397 	 * Unless we get an entire packet back, this function is useless.
2398 	 * Why?
2399 	 *
2400 	 * 1.)	Partial packets are useless, because the "next header"
2401 	 *	is at the end of the decrypted ESP packet.  Without the
2402 	 *	whole packet, this is useless.
2403 	 *
2404 	 * 2.)	If we every use a stateful cipher, such as a stream or a
2405 	 *	one-time pad, we can't do anything.
2406 	 *
2407 	 * Since the chances of us getting an entire packet back are very
2408 	 * very small, we discard here.
2409 	 */
2410 	IP_ESP_BUMP_STAT(in_discards);
2411 	ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL, &ipdrops_esp_icmp,
2412 	    &esp_dropper);
2413 	return (IPSEC_STATUS_FAILED);
2414 }
2415 
2416 /*
2417  * ESP module read put routine.
2418  */
2419 /* ARGSUSED */
2420 static void
2421 ipsecesp_rput(queue_t *q, mblk_t *mp)
2422 {
2423 	ASSERT(mp->b_datap->db_type != M_CTL);	/* No more IRE_DB_REQ. */
2424 	switch (mp->b_datap->db_type) {
2425 	case M_PROTO:
2426 	case M_PCPROTO:
2427 		/* TPI message of some sort. */
2428 		switch (*((t_scalar_t *)mp->b_rptr)) {
2429 		case T_BIND_ACK:
2430 			esp3dbg(("Thank you IP from ESP for T_BIND_ACK\n"));
2431 			break;
2432 		case T_ERROR_ACK:
2433 			cmn_err(CE_WARN,
2434 			    "ipsecesp:  ESP received T_ERROR_ACK from IP.");
2435 			/*
2436 			 * Make esp_sadb.s_ip_q NULL, and in the
2437 			 * future, perhaps try again.
2438 			 */
2439 			esp_sadb.s_ip_q = NULL;
2440 			break;
2441 		case T_OK_ACK:
2442 			/* Probably from a (rarely sent) T_UNBIND_REQ. */
2443 			break;
2444 		default:
2445 			esp0dbg(("Unknown M_{,PC}PROTO message.\n"));
2446 		}
2447 		freemsg(mp);
2448 		break;
2449 	default:
2450 		/* For now, passthru message. */
2451 		esp2dbg(("ESP got unknown mblk type %d.\n",
2452 		    mp->b_datap->db_type));
2453 		putnext(q, mp);
2454 	}
2455 }
2456 
2457 /*
2458  * Construct an SADB_REGISTER message with the current algorithms.
2459  */
2460 static boolean_t
2461 esp_register_out(uint32_t sequence, uint32_t pid, uint_t serial)
2462 {
2463 	mblk_t *pfkey_msg_mp, *keysock_out_mp;
2464 	sadb_msg_t *samsg;
2465 	sadb_supported_t *sasupp_auth = NULL;
2466 	sadb_supported_t *sasupp_encr = NULL;
2467 	sadb_alg_t *saalg;
2468 	uint_t allocsize = sizeof (*samsg);
2469 	uint_t i, numalgs_snap;
2470 	int current_aalgs;
2471 	ipsec_alginfo_t **authalgs;
2472 	uint_t num_aalgs;
2473 	int current_ealgs;
2474 	ipsec_alginfo_t **encralgs;
2475 	uint_t num_ealgs;
2476 
2477 	/* Allocate the KEYSOCK_OUT. */
2478 	keysock_out_mp = sadb_keysock_out(serial);
2479 	if (keysock_out_mp == NULL) {
2480 		esp0dbg(("esp_register_out: couldn't allocate mblk.\n"));
2481 		return (B_FALSE);
2482 	}
2483 
2484 	/*
2485 	 * Allocate the PF_KEY message that follows KEYSOCK_OUT.
2486 	 */
2487 
2488 	mutex_enter(&alg_lock);
2489 
2490 	/*
2491 	 * Fill SADB_REGISTER message's algorithm descriptors.  Hold
2492 	 * down the lock while filling it.
2493 	 *
2494 	 * Return only valid algorithms, so the number of algorithms
2495 	 * to send up may be less than the number of algorithm entries
2496 	 * in the table.
2497 	 */
2498 	authalgs = ipsec_alglists[IPSEC_ALG_AUTH];
2499 	for (num_aalgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++)
2500 		if (authalgs[i] != NULL && ALG_VALID(authalgs[i]))
2501 			num_aalgs++;
2502 
2503 	if (num_aalgs != 0) {
2504 		allocsize += (num_aalgs * sizeof (*saalg));
2505 		allocsize += sizeof (*sasupp_auth);
2506 	}
2507 	encralgs = ipsec_alglists[IPSEC_ALG_ENCR];
2508 	for (num_ealgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++)
2509 		if (encralgs[i] != NULL && ALG_VALID(encralgs[i]))
2510 			num_ealgs++;
2511 
2512 	if (num_ealgs != 0) {
2513 		allocsize += (num_ealgs * sizeof (*saalg));
2514 		allocsize += sizeof (*sasupp_encr);
2515 	}
2516 	keysock_out_mp->b_cont = allocb(allocsize, BPRI_HI);
2517 	if (keysock_out_mp->b_cont == NULL) {
2518 		mutex_exit(&alg_lock);
2519 		freemsg(keysock_out_mp);
2520 		return (B_FALSE);
2521 	}
2522 
2523 	pfkey_msg_mp = keysock_out_mp->b_cont;
2524 	pfkey_msg_mp->b_wptr += allocsize;
2525 	if (num_aalgs != 0) {
2526 		sasupp_auth = (sadb_supported_t *)
2527 		    (pfkey_msg_mp->b_rptr + sizeof (*samsg));
2528 		saalg = (sadb_alg_t *)(sasupp_auth + 1);
2529 
2530 		ASSERT(((ulong_t)saalg & 0x7) == 0);
2531 
2532 		numalgs_snap = 0;
2533 		for (i = 0;
2534 		    ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_aalgs)); i++) {
2535 			if (authalgs[i] == NULL || !ALG_VALID(authalgs[i]))
2536 				continue;
2537 
2538 			saalg->sadb_alg_id = authalgs[i]->alg_id;
2539 			saalg->sadb_alg_ivlen = 0;
2540 			saalg->sadb_alg_minbits	= authalgs[i]->alg_ef_minbits;
2541 			saalg->sadb_alg_maxbits	= authalgs[i]->alg_ef_maxbits;
2542 			saalg->sadb_x_alg_defincr = authalgs[i]->alg_ef_default;
2543 			saalg->sadb_x_alg_increment =
2544 			    authalgs[i]->alg_increment;
2545 			numalgs_snap++;
2546 			saalg++;
2547 		}
2548 		ASSERT(numalgs_snap == num_aalgs);
2549 #ifdef DEBUG
2550 		/*
2551 		 * Reality check to make sure I snagged all of the
2552 		 * algorithms.
2553 		 */
2554 		for (; i < IPSEC_MAX_ALGS; i++) {
2555 			if (authalgs[i] != NULL && ALG_VALID(authalgs[i])) {
2556 				cmn_err(CE_PANIC, "esp_register_out()! "
2557 				    "Missed aalg #%d.\n", i);
2558 			}
2559 		}
2560 #endif /* DEBUG */
2561 	} else {
2562 		saalg = (sadb_alg_t *)(pfkey_msg_mp->b_rptr + sizeof (*samsg));
2563 	}
2564 
2565 	if (num_ealgs != 0) {
2566 		sasupp_encr = (sadb_supported_t *)saalg;
2567 		saalg = (sadb_alg_t *)(sasupp_encr + 1);
2568 
2569 		numalgs_snap = 0;
2570 		for (i = 0;
2571 		    ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_ealgs)); i++) {
2572 			if (encralgs[i] == NULL || !ALG_VALID(encralgs[i]))
2573 				continue;
2574 			saalg->sadb_alg_id = encralgs[i]->alg_id;
2575 			saalg->sadb_alg_ivlen = encralgs[i]->alg_datalen;
2576 			saalg->sadb_alg_minbits	= encralgs[i]->alg_ef_minbits;
2577 			saalg->sadb_alg_maxbits	= encralgs[i]->alg_ef_maxbits;
2578 			saalg->sadb_x_alg_defincr = encralgs[i]->alg_ef_default;
2579 			saalg->sadb_x_alg_increment =
2580 			    encralgs[i]->alg_increment;
2581 			numalgs_snap++;
2582 			saalg++;
2583 		}
2584 		ASSERT(numalgs_snap == num_ealgs);
2585 #ifdef DEBUG
2586 		/*
2587 		 * Reality check to make sure I snagged all of the
2588 		 * algorithms.
2589 		 */
2590 		for (; i < IPSEC_MAX_ALGS; i++) {
2591 			if (encralgs[i] != NULL && ALG_VALID(encralgs[i])) {
2592 				cmn_err(CE_PANIC, "esp_register_out()! "
2593 				    "Missed ealg #%d.\n", i);
2594 			}
2595 		}
2596 #endif /* DEBUG */
2597 	}
2598 
2599 	current_aalgs = num_aalgs;
2600 	current_ealgs = num_ealgs;
2601 
2602 	mutex_exit(&alg_lock);
2603 
2604 	/* Now fill the rest of the SADB_REGISTER message. */
2605 
2606 	samsg = (sadb_msg_t *)pfkey_msg_mp->b_rptr;
2607 	samsg->sadb_msg_version = PF_KEY_V2;
2608 	samsg->sadb_msg_type = SADB_REGISTER;
2609 	samsg->sadb_msg_errno = 0;
2610 	samsg->sadb_msg_satype = SADB_SATYPE_ESP;
2611 	samsg->sadb_msg_len = SADB_8TO64(allocsize);
2612 	samsg->sadb_msg_reserved = 0;
2613 	/*
2614 	 * Assume caller has sufficient sequence/pid number info.  If it's one
2615 	 * from me over a new alg., I could give two hoots about sequence.
2616 	 */
2617 	samsg->sadb_msg_seq = sequence;
2618 	samsg->sadb_msg_pid = pid;
2619 
2620 	if (sasupp_auth != NULL) {
2621 		sasupp_auth->sadb_supported_len =
2622 		    SADB_8TO64(sizeof (*sasupp_auth) +
2623 			sizeof (*saalg) * current_aalgs);
2624 		sasupp_auth->sadb_supported_exttype = SADB_EXT_SUPPORTED_AUTH;
2625 		sasupp_auth->sadb_supported_reserved = 0;
2626 	}
2627 
2628 	if (sasupp_encr != NULL) {
2629 		sasupp_encr->sadb_supported_len =
2630 		    SADB_8TO64(sizeof (*sasupp_encr) +
2631 			sizeof (*saalg) * current_ealgs);
2632 		sasupp_encr->sadb_supported_exttype =
2633 		    SADB_EXT_SUPPORTED_ENCRYPT;
2634 		sasupp_encr->sadb_supported_reserved = 0;
2635 	}
2636 
2637 	if (esp_pfkey_q != NULL)
2638 		putnext(esp_pfkey_q, keysock_out_mp);
2639 	else {
2640 		freemsg(keysock_out_mp);
2641 		return (B_FALSE);
2642 	}
2643 
2644 	return (B_TRUE);
2645 }
2646 
2647 /*
2648  * Invoked when the algorithm table changes. Causes SADB_REGISTER
2649  * messages continaining the current list of algorithms to be
2650  * sent up to the ESP listeners.
2651  */
2652 void
2653 ipsecesp_algs_changed(void)
2654 {
2655 	/*
2656 	 * Time to send a PF_KEY SADB_REGISTER message to ESP listeners
2657 	 * everywhere.  (The function itself checks for NULL esp_pfkey_q.)
2658 	 */
2659 	(void) esp_register_out(0, 0, 0);
2660 }
2661 
2662 /*
2663  * taskq_dispatch handler.
2664  */
2665 static void
2666 inbound_task(void *arg)
2667 {
2668 	esph_t *esph;
2669 	mblk_t *mp = (mblk_t *)arg;
2670 	ipsec_in_t *ii = (ipsec_in_t *)mp->b_rptr;
2671 	int ipsec_rc;
2672 
2673 	esp2dbg(("in ESP inbound_task"));
2674 
2675 	esph = ipsec_inbound_esp_sa(mp);
2676 	if (esph == NULL)
2677 		return;
2678 	ASSERT(ii->ipsec_in_esp_sa != NULL);
2679 	ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func(mp, esph);
2680 	if (ipsec_rc != IPSEC_STATUS_SUCCESS)
2681 		return;
2682 	ip_fanout_proto_again(mp, NULL, NULL, NULL);
2683 }
2684 
2685 /*
2686  * Now that weak-key passed, actually ADD the security association, and
2687  * send back a reply ADD message.
2688  */
2689 static int
2690 esp_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi,
2691     int *diagnostic)
2692 {
2693 	isaf_t *primary, *secondary, *inbound, *outbound;
2694 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
2695 	sadb_address_t *dstext =
2696 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2697 	struct sockaddr_in *dst;
2698 	struct sockaddr_in6 *dst6;
2699 	boolean_t is_ipv4, clone = B_FALSE, is_inbound = B_FALSE;
2700 	uint32_t *dstaddr;
2701 	ipsa_t *larval = NULL;
2702 	ipsacq_t *acqrec;
2703 	iacqf_t *acq_bucket;
2704 	mblk_t *acq_msgs = NULL;
2705 	int rc;
2706 	sadb_t *sp;
2707 	int outhash;
2708 	mblk_t *lpkt;
2709 
2710 	/*
2711 	 * Locate the appropriate table(s).
2712 	 */
2713 
2714 	dst = (struct sockaddr_in *)(dstext + 1);
2715 	dst6 = (struct sockaddr_in6 *)dst;
2716 	is_ipv4 = (dst->sin_family == AF_INET);
2717 	if (is_ipv4) {
2718 		sp = &esp_sadb.s_v4;
2719 		dstaddr = (uint32_t *)(&dst->sin_addr);
2720 		outhash = OUTBOUND_HASH_V4(sp, *(ipaddr_t *)dstaddr);
2721 	} else {
2722 		sp = &esp_sadb.s_v6;
2723 		dstaddr = (uint32_t *)(&dst6->sin6_addr);
2724 		outhash = OUTBOUND_HASH_V6(sp, *(in6_addr_t *)dstaddr);
2725 	}
2726 
2727 	inbound = INBOUND_BUCKET(sp, assoc->sadb_sa_spi);
2728 	outbound = &sp->sdb_of[outhash];
2729 
2730 	switch (ksi->ks_in_dsttype) {
2731 	case KS_IN_ADDR_MBCAST:
2732 		clone = B_TRUE;	/* All mcast SAs can be bidirectional */
2733 		/* FALLTHRU */
2734 	case KS_IN_ADDR_ME:
2735 		primary = inbound;
2736 		secondary = outbound;
2737 		/*
2738 		 * If the source address is either one of mine, or unspecified
2739 		 * (which is best summed up by saying "not 'not mine'"),
2740 		 * then the association is potentially bi-directional,
2741 		 * in that it can be used for inbound traffic and outbound
2742 		 * traffic.  The best example of such an SA is a multicast
2743 		 * SA (which allows me to receive the outbound traffic).
2744 		 */
2745 		if (ksi->ks_in_srctype != KS_IN_ADDR_NOTME)
2746 			clone = B_TRUE;
2747 		is_inbound = B_TRUE;
2748 		break;
2749 	case KS_IN_ADDR_NOTME:
2750 		primary = outbound;
2751 		secondary = inbound;
2752 		/*
2753 		 * If the source address literally not mine (either
2754 		 * unspecified or not mine), then this SA may have an
2755 		 * address that WILL be mine after some configuration.
2756 		 * We pay the price for this by making it a bi-directional
2757 		 * SA.
2758 		 */
2759 		if (ksi->ks_in_srctype != KS_IN_ADDR_ME)
2760 			clone = B_TRUE;
2761 		break;
2762 	default:
2763 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_DST;
2764 		return (EINVAL);
2765 	}
2766 
2767 	/*
2768 	 * Find a ACQUIRE list entry if possible.  If we've added an SA that
2769 	 * suits the needs of an ACQUIRE list entry, we can eliminate the
2770 	 * ACQUIRE list entry and transmit the enqueued packets.  Use the
2771 	 * high-bit of the sequence number to queue it.  Key off destination
2772 	 * addr, and change acqrec's state.
2773 	 */
2774 
2775 	if (samsg->sadb_msg_seq & IACQF_LOWEST_SEQ) {
2776 		acq_bucket = &sp->sdb_acq[outhash];
2777 		mutex_enter(&acq_bucket->iacqf_lock);
2778 		for (acqrec = acq_bucket->iacqf_ipsacq; acqrec != NULL;
2779 		    acqrec = acqrec->ipsacq_next) {
2780 			mutex_enter(&acqrec->ipsacq_lock);
2781 			/*
2782 			 * Q:  I only check sequence.  Should I check dst?
2783 			 * A: Yes, check dest because those are the packets
2784 			 *    that are queued up.
2785 			 */
2786 			if (acqrec->ipsacq_seq == samsg->sadb_msg_seq &&
2787 			    IPSA_ARE_ADDR_EQUAL(dstaddr,
2788 				acqrec->ipsacq_dstaddr, acqrec->ipsacq_addrfam))
2789 				break;
2790 			mutex_exit(&acqrec->ipsacq_lock);
2791 		}
2792 		if (acqrec != NULL) {
2793 			/*
2794 			 * AHA!  I found an ACQUIRE record for this SA.
2795 			 * Grab the msg list, and free the acquire record.
2796 			 * I already am holding the lock for this record,
2797 			 * so all I have to do is free it.
2798 			 */
2799 			acq_msgs = acqrec->ipsacq_mp;
2800 			acqrec->ipsacq_mp = NULL;
2801 			mutex_exit(&acqrec->ipsacq_lock);
2802 			sadb_destroy_acquire(acqrec);
2803 		}
2804 		mutex_exit(&acq_bucket->iacqf_lock);
2805 	}
2806 
2807 	/*
2808 	 * Find PF_KEY message, and see if I'm an update.  If so, find entry
2809 	 * in larval list (if there).
2810 	 */
2811 
2812 	if (samsg->sadb_msg_type == SADB_UPDATE) {
2813 		mutex_enter(&inbound->isaf_lock);
2814 		larval = ipsec_getassocbyspi(inbound, assoc->sadb_sa_spi,
2815 		    ALL_ZEROES_PTR, dstaddr, dst->sin_family);
2816 		mutex_exit(&inbound->isaf_lock);
2817 
2818 		if (larval == NULL) {
2819 			esp0dbg(("Larval update, but larval disappeared.\n"));
2820 			return (ESRCH);
2821 		} /* Else sadb_common_add unlinks it for me! */
2822 	}
2823 
2824 	lpkt = NULL;
2825 	if (larval != NULL)
2826 		lpkt = sadb_clear_lpkt(larval);
2827 
2828 	rc = sadb_common_add(esp_sadb.s_ip_q, esp_pfkey_q, mp, samsg, ksi,
2829 	    primary, secondary, larval, clone, is_inbound, diagnostic);
2830 
2831 	if (rc == 0 && lpkt != NULL) {
2832 		rc = !taskq_dispatch(esp_taskq, inbound_task,
2833 			    (void *) lpkt, TQ_NOSLEEP);
2834 	}
2835 
2836 	if (rc != 0) {
2837 		ip_drop_packet(lpkt, B_TRUE, NULL, NULL,
2838 		    &ipdrops_sadb_inlarval_timeout, &esp_dropper);
2839 	}
2840 
2841 	/*
2842 	 * How much more stack will I create with all of these
2843 	 * esp_outbound() calls?
2844 	 */
2845 
2846 	while (acq_msgs != NULL) {
2847 		mblk_t *mp = acq_msgs;
2848 
2849 		acq_msgs = acq_msgs->b_next;
2850 		mp->b_next = NULL;
2851 		if (rc == 0) {
2852 			if (ipsec_outbound_sa(mp, IPPROTO_ESP)) {
2853 				((ipsec_out_t *)(mp->b_rptr))->
2854 				    ipsec_out_esp_done = B_TRUE;
2855 				if (esp_outbound(mp) == IPSEC_STATUS_SUCCESS) {
2856 					ipha_t *ipha;
2857 
2858 					/* do AH processing if needed */
2859 					if (!esp_do_outbound_ah(mp))
2860 						continue;
2861 
2862 					ipha = (ipha_t *)mp->b_cont->b_rptr;
2863 
2864 					/* finish IPsec processing */
2865 					if (is_ipv4) {
2866 						ip_wput_ipsec_out(NULL, mp,
2867 						    ipha, NULL, NULL);
2868 					} else {
2869 						ip6_t *ip6h = (ip6_t *)ipha;
2870 						ip_wput_ipsec_out_v6(NULL,
2871 						    mp, ip6h, NULL, NULL);
2872 					}
2873 				}
2874 				continue;
2875 			}
2876 		}
2877 		ESP_BUMP_STAT(out_discards);
2878 		ip_drop_packet(mp, B_FALSE, NULL, NULL,
2879 		    &ipdrops_sadb_acquire_timeout, &esp_dropper);
2880 	}
2881 
2882 	return (rc);
2883 }
2884 
2885 /*
2886  * Add new ESP security association.  This may become a generic AH/ESP
2887  * routine eventually.
2888  */
2889 static int
2890 esp_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic)
2891 {
2892 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
2893 	sadb_address_t *srcext =
2894 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2895 	sadb_address_t *dstext =
2896 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2897 	sadb_address_t *isrcext =
2898 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC];
2899 	sadb_address_t *idstext =
2900 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST];
2901 	sadb_address_t *nttext_loc =
2902 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC];
2903 	sadb_address_t *nttext_rem =
2904 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM];
2905 	sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
2906 	sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
2907 	struct sockaddr_in *src, *dst;
2908 	struct sockaddr_in *natt_loc, *natt_rem;
2909 	struct sockaddr_in6 *natt_loc6, *natt_rem6;
2910 
2911 	sadb_lifetime_t *soft =
2912 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
2913 	sadb_lifetime_t *hard =
2914 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
2915 
2916 	/* I need certain extensions present for an ADD message. */
2917 	if (srcext == NULL) {
2918 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
2919 		return (EINVAL);
2920 	}
2921 	if (dstext == NULL) {
2922 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
2923 		return (EINVAL);
2924 	}
2925 	if (isrcext == NULL && idstext != NULL) {
2926 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
2927 		return (EINVAL);
2928 	}
2929 	if (isrcext != NULL && idstext == NULL) {
2930 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST;
2931 		return (EINVAL);
2932 	}
2933 	if (assoc == NULL) {
2934 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
2935 		return (EINVAL);
2936 	}
2937 	if (ekey == NULL && assoc->sadb_sa_encrypt != SADB_EALG_NULL) {
2938 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_EKEY;
2939 		return (EINVAL);
2940 	}
2941 
2942 	src = (struct sockaddr_in *)(srcext + 1);
2943 	dst = (struct sockaddr_in *)(dstext + 1);
2944 	natt_loc = (struct sockaddr_in *)(nttext_loc + 1);
2945 	natt_loc6 = (struct sockaddr_in6 *)(nttext_loc + 1);
2946 	natt_rem = (struct sockaddr_in *)(nttext_rem + 1);
2947 	natt_rem6 = (struct sockaddr_in6 *)(nttext_rem + 1);
2948 
2949 	/* Sundry ADD-specific reality checks. */
2950 	/* XXX STATS :  Logging/stats here? */
2951 	if (assoc->sadb_sa_state != SADB_SASTATE_MATURE) {
2952 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
2953 		return (EINVAL);
2954 	}
2955 	if (assoc->sadb_sa_encrypt == SADB_EALG_NONE) {
2956 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_EALG;
2957 		return (EINVAL);
2958 	}
2959 
2960 	if (assoc->sadb_sa_encrypt == SADB_EALG_NULL &&
2961 	    assoc->sadb_sa_auth == SADB_AALG_NONE) {
2962 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG;
2963 		return (EINVAL);
2964 	}
2965 
2966 	if (assoc->sadb_sa_flags & ~(SADB_SAFLAGS_NOREPLAY |
2967 	    SADB_X_SAFLAGS_NATT_LOC | SADB_X_SAFLAGS_NATT_REM |
2968 		SADB_X_SAFLAGS_TUNNEL)) {
2969 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
2970 		return (EINVAL);
2971 	}
2972 
2973 	if ((*diagnostic = sadb_hardsoftchk(hard, soft)) != 0) {
2974 		return (EINVAL);
2975 	}
2976 	ASSERT(src->sin_family == dst->sin_family);
2977 
2978 	if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_NATT_LOC) {
2979 		if (nttext_loc == NULL) {
2980 			*diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_LOC;
2981 			return (EINVAL);
2982 		}
2983 
2984 		if (natt_loc->sin_family == AF_INET6 &&
2985 		    !IN6_IS_ADDR_V4MAPPED(&natt_loc6->sin6_addr)) {
2986 			*diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_NATT_LOC;
2987 			return (EINVAL);
2988 		}
2989 	}
2990 
2991 	if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_NATT_REM) {
2992 		if (nttext_rem == NULL) {
2993 			*diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_REM;
2994 			return (EINVAL);
2995 		}
2996 		if (natt_rem->sin_family == AF_INET6 &&
2997 		    !IN6_IS_ADDR_V4MAPPED(&natt_rem6->sin6_addr)) {
2998 			*diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_NATT_REM;
2999 			return (EINVAL);
3000 		}
3001 	}
3002 
3003 
3004 	/* Stuff I don't support, for now.  XXX Diagnostic? */
3005 	if (ksi->ks_in_extv[SADB_EXT_LIFETIME_CURRENT] != NULL ||
3006 	    ksi->ks_in_extv[SADB_EXT_SENSITIVITY] != NULL)
3007 		return (EOPNOTSUPP);
3008 
3009 	/*
3010 	 * XXX Policy :  I'm not checking identities or sensitivity
3011 	 * labels at this time, but if I did, I'd do them here, before I sent
3012 	 * the weak key check up to the algorithm.
3013 	 */
3014 
3015 	mutex_enter(&alg_lock);
3016 
3017 	/*
3018 	 * First locate the authentication algorithm.
3019 	 */
3020 	if (akey != NULL) {
3021 		ipsec_alginfo_t *aalg;
3022 
3023 		aalg = ipsec_alglists[IPSEC_ALG_AUTH][assoc->sadb_sa_auth];
3024 		if (aalg == NULL || !ALG_VALID(aalg)) {
3025 			mutex_exit(&alg_lock);
3026 			esp1dbg(("Couldn't find auth alg #%d.\n",
3027 			    assoc->sadb_sa_auth));
3028 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG;
3029 			return (EINVAL);
3030 		}
3031 
3032 		/*
3033 		 * Sanity check key sizes.
3034 		 * Note: It's not possible to use SADB_AALG_NONE because
3035 		 * this auth_alg is not defined with ALG_FLAG_VALID. If this
3036 		 * ever changes, the same check for SADB_AALG_NONE and
3037 		 * a auth_key != NULL should be made here ( see below).
3038 		 */
3039 		if (!ipsec_valid_key_size(akey->sadb_key_bits, aalg)) {
3040 			mutex_exit(&alg_lock);
3041 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_AKEYBITS;
3042 			return (EINVAL);
3043 		}
3044 		ASSERT(aalg->alg_mech_type != CRYPTO_MECHANISM_INVALID);
3045 
3046 		/* check key and fix parity if needed */
3047 		if (ipsec_check_key(aalg->alg_mech_type, akey, B_TRUE,
3048 		    diagnostic) != 0) {
3049 			mutex_exit(&alg_lock);
3050 			return (EINVAL);
3051 		}
3052 	}
3053 
3054 	/*
3055 	 * Then locate the encryption algorithm.
3056 	 */
3057 	if (ekey != NULL) {
3058 		ipsec_alginfo_t *ealg;
3059 
3060 		ealg = ipsec_alglists[IPSEC_ALG_ENCR][assoc->sadb_sa_encrypt];
3061 		if (ealg == NULL || !ALG_VALID(ealg)) {
3062 			mutex_exit(&alg_lock);
3063 			esp1dbg(("Couldn't find encr alg #%d.\n",
3064 			    assoc->sadb_sa_encrypt));
3065 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_EALG;
3066 			return (EINVAL);
3067 		}
3068 
3069 		/*
3070 		 * Sanity check key sizes. If the encryption algorithm is
3071 		 * SADB_EALG_NULL but the encryption key is NOT
3072 		 * NULL then complain.
3073 		 */
3074 		if ((assoc->sadb_sa_encrypt == SADB_EALG_NULL) ||
3075 		    (!ipsec_valid_key_size(ekey->sadb_key_bits, ealg))) {
3076 			mutex_exit(&alg_lock);
3077 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_EKEYBITS;
3078 			return (EINVAL);
3079 		}
3080 		ASSERT(ealg->alg_mech_type != CRYPTO_MECHANISM_INVALID);
3081 
3082 		/* check key */
3083 		if (ipsec_check_key(ealg->alg_mech_type, ekey, B_FALSE,
3084 		    diagnostic) != 0) {
3085 			mutex_exit(&alg_lock);
3086 			return (EINVAL);
3087 		}
3088 	}
3089 	mutex_exit(&alg_lock);
3090 
3091 	return (esp_add_sa_finish(mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi,
3092 		    diagnostic));
3093 }
3094 
3095 /*
3096  * Update a security association.  Updates come in two varieties.  The first
3097  * is an update of lifetimes on a non-larval SA.  The second is an update of
3098  * a larval SA, which ends up looking a lot more like an add.
3099  */
3100 static int
3101 esp_update_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic)
3102 {
3103 	sadb_address_t *dstext =
3104 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
3105 	struct sockaddr_in *sin;
3106 
3107 	if (dstext == NULL) {
3108 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
3109 		return (EINVAL);
3110 	}
3111 
3112 	sin = (struct sockaddr_in *)(dstext + 1);
3113 	return (sadb_update_sa(mp, ksi,
3114 	    (sin->sin_family == AF_INET6) ? &esp_sadb.s_v6 : &esp_sadb.s_v4,
3115 	    diagnostic, esp_pfkey_q, esp_add_sa));
3116 }
3117 
3118 /*
3119  * Delete a security association.  This is REALLY likely to be code common to
3120  * both AH and ESP.  Find the association, then unlink it.
3121  */
3122 static int
3123 esp_del_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic)
3124 {
3125 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
3126 	sadb_address_t *dstext =
3127 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
3128 	sadb_address_t *srcext =
3129 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
3130 	struct sockaddr_in *sin;
3131 
3132 	if (assoc == NULL) {
3133 		if (dstext != NULL) {
3134 			sin = (struct sockaddr_in *)(dstext + 1);
3135 		} else if (srcext != NULL) {
3136 			sin = (struct sockaddr_in *)(srcext + 1);
3137 		} else {
3138 			*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
3139 			return (EINVAL);
3140 		}
3141 		return (sadb_purge_sa(mp, ksi,
3142 			    (sin->sin_family == AF_INET6) ? &esp_sadb.s_v6 :
3143 			    &esp_sadb.s_v4, esp_pfkey_q, esp_sadb.s_ip_q));
3144 	}
3145 
3146 	return (sadb_del_sa(mp, ksi, &esp_sadb, diagnostic, esp_pfkey_q));
3147 }
3148 
3149 /*
3150  * Convert the entire contents of all of ESP's SA tables into PF_KEY SADB_DUMP
3151  * messages.
3152  */
3153 static void
3154 esp_dump(mblk_t *mp, keysock_in_t *ksi)
3155 {
3156 	int error;
3157 	sadb_msg_t *samsg;
3158 
3159 	/*
3160 	 * Dump each fanout, bailing if error is non-zero.
3161 	 */
3162 
3163 	error = sadb_dump(esp_pfkey_q, mp, ksi->ks_in_serial, &esp_sadb.s_v4);
3164 	if (error != 0)
3165 		goto bail;
3166 
3167 	error = sadb_dump(esp_pfkey_q, mp, ksi->ks_in_serial, &esp_sadb.s_v6);
3168 bail:
3169 	ASSERT(mp->b_cont != NULL);
3170 	samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
3171 	samsg->sadb_msg_errno = (uint8_t)error;
3172 	sadb_pfkey_echo(esp_pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi,
3173 	    NULL);
3174 }
3175 
3176 /*
3177  * First-cut reality check for an inbound PF_KEY message.
3178  */
3179 static boolean_t
3180 esp_pfkey_reality_failures(mblk_t *mp, keysock_in_t *ksi)
3181 {
3182 	int diagnostic;
3183 
3184 	if (ksi->ks_in_extv[SADB_EXT_PROPOSAL] != NULL) {
3185 		diagnostic = SADB_X_DIAGNOSTIC_PROP_PRESENT;
3186 		goto badmsg;
3187 	}
3188 	if (ksi->ks_in_extv[SADB_EXT_SUPPORTED_AUTH] != NULL ||
3189 	    ksi->ks_in_extv[SADB_EXT_SUPPORTED_ENCRYPT] != NULL) {
3190 		diagnostic = SADB_X_DIAGNOSTIC_SUPP_PRESENT;
3191 		goto badmsg;
3192 	}
3193 	return (B_FALSE);	/* False ==> no failures */
3194 
3195 badmsg:
3196 	sadb_pfkey_error(esp_pfkey_q, mp, EINVAL, diagnostic,
3197 	    ksi->ks_in_serial);
3198 	return (B_TRUE);	/* True ==> failures */
3199 }
3200 
3201 /*
3202  * ESP parsing of PF_KEY messages.  Keysock did most of the really silly
3203  * error cases.  What I receive is a fully-formed, syntactically legal
3204  * PF_KEY message.  I then need to check semantics...
3205  *
3206  * This code may become common to AH and ESP.  Stay tuned.
3207  *
3208  * I also make the assumption that db_ref's are cool.  If this assumption
3209  * is wrong, this means that someone other than keysock or me has been
3210  * mucking with PF_KEY messages.
3211  */
3212 static void
3213 esp_parse_pfkey(mblk_t *mp)
3214 {
3215 	mblk_t *msg = mp->b_cont;
3216 	sadb_msg_t *samsg;
3217 	keysock_in_t *ksi;
3218 	int error;
3219 	int diagnostic = SADB_X_DIAGNOSTIC_NONE;
3220 
3221 	ASSERT(msg != NULL);
3222 	samsg = (sadb_msg_t *)msg->b_rptr;
3223 	ksi = (keysock_in_t *)mp->b_rptr;
3224 
3225 	/*
3226 	 * If applicable, convert unspecified AF_INET6 to unspecified
3227 	 * AF_INET.  And do other address reality checks.
3228 	 */
3229 	if (!sadb_addrfix(ksi, esp_pfkey_q, mp) ||
3230 	    esp_pfkey_reality_failures(mp, ksi)) {
3231 		return;
3232 	}
3233 
3234 	switch (samsg->sadb_msg_type) {
3235 	case SADB_ADD:
3236 		error = esp_add_sa(mp, ksi, &diagnostic);
3237 		if (error != 0) {
3238 			sadb_pfkey_error(esp_pfkey_q, mp, error, diagnostic,
3239 			    ksi->ks_in_serial);
3240 		}
3241 		/* else esp_add_sa() took care of things. */
3242 		break;
3243 	case SADB_DELETE:
3244 		error = esp_del_sa(mp, ksi, &diagnostic);
3245 		if (error != 0) {
3246 			sadb_pfkey_error(esp_pfkey_q, mp, error, diagnostic,
3247 			    ksi->ks_in_serial);
3248 		}
3249 		/* Else esp_del_sa() took care of things. */
3250 		break;
3251 	case SADB_GET:
3252 		error = sadb_get_sa(mp, ksi, &esp_sadb, &diagnostic,
3253 		    esp_pfkey_q);
3254 		if (error != 0) {
3255 			sadb_pfkey_error(esp_pfkey_q, mp, error, diagnostic,
3256 			    ksi->ks_in_serial);
3257 		}
3258 		/* Else sadb_get_sa() took care of things. */
3259 		break;
3260 	case SADB_FLUSH:
3261 		sadbp_flush(&esp_sadb);
3262 		sadb_pfkey_echo(esp_pfkey_q, mp, samsg, ksi, NULL);
3263 		break;
3264 	case SADB_REGISTER:
3265 		/*
3266 		 * Hmmm, let's do it!  Check for extensions (there should
3267 		 * be none), extract the fields, call esp_register_out(),
3268 		 * then either free or report an error.
3269 		 *
3270 		 * Keysock takes care of the PF_KEY bookkeeping for this.
3271 		 */
3272 		if (esp_register_out(samsg->sadb_msg_seq, samsg->sadb_msg_pid,
3273 		    ksi->ks_in_serial)) {
3274 			freemsg(mp);
3275 		} else {
3276 			/*
3277 			 * Only way this path hits is if there is a memory
3278 			 * failure.  It will not return B_FALSE because of
3279 			 * lack of esp_pfkey_q if I am in wput().
3280 			 */
3281 			sadb_pfkey_error(esp_pfkey_q, mp, ENOMEM, diagnostic,
3282 			    ksi->ks_in_serial);
3283 		}
3284 		break;
3285 	case SADB_UPDATE:
3286 		/*
3287 		 * Find a larval, if not there, find a full one and get
3288 		 * strict.
3289 		 */
3290 		error = esp_update_sa(mp, ksi, &diagnostic);
3291 		if (error != 0) {
3292 			sadb_pfkey_error(esp_pfkey_q, mp, error, diagnostic,
3293 			    ksi->ks_in_serial);
3294 		}
3295 		/* else esp_update_sa() took care of things. */
3296 		break;
3297 	case SADB_GETSPI:
3298 		/*
3299 		 * Reserve a new larval entry.
3300 		 */
3301 		esp_getspi(mp, ksi);
3302 		break;
3303 	case SADB_ACQUIRE:
3304 		/*
3305 		 * Find larval and/or ACQUIRE record and kill it (them), I'm
3306 		 * most likely an error.  Inbound ACQUIRE messages should only
3307 		 * have the base header.
3308 		 */
3309 		sadb_in_acquire(samsg, &esp_sadb, esp_pfkey_q);
3310 		freemsg(mp);
3311 		break;
3312 	case SADB_DUMP:
3313 		/*
3314 		 * Dump all entries.
3315 		 */
3316 		esp_dump(mp, ksi);
3317 		/* esp_dump will take care of the return message, etc. */
3318 		break;
3319 	case SADB_EXPIRE:
3320 		/* Should never reach me. */
3321 		sadb_pfkey_error(esp_pfkey_q, mp, EOPNOTSUPP, diagnostic,
3322 		    ksi->ks_in_serial);
3323 		break;
3324 	default:
3325 		sadb_pfkey_error(esp_pfkey_q, mp, EINVAL,
3326 		    SADB_X_DIAGNOSTIC_UNKNOWN_MSG, ksi->ks_in_serial);
3327 		break;
3328 	}
3329 }
3330 
3331 /*
3332  * Handle case where PF_KEY says it can't find a keysock for one of my
3333  * ACQUIRE messages.
3334  */
3335 static void
3336 esp_keysock_no_socket(mblk_t *mp)
3337 {
3338 	sadb_msg_t *samsg;
3339 	keysock_out_err_t *kse = (keysock_out_err_t *)mp->b_rptr;
3340 
3341 	if (mp->b_cont == NULL) {
3342 		freemsg(mp);
3343 		return;
3344 	}
3345 	samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
3346 
3347 	/*
3348 	 * If keysock can't find any registered, delete the acquire record
3349 	 * immediately, and handle errors.
3350 	 */
3351 	if (samsg->sadb_msg_type == SADB_ACQUIRE) {
3352 		samsg->sadb_msg_errno = kse->ks_err_errno;
3353 		samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
3354 		/*
3355 		 * Use the write-side of the esp_pfkey_q, in case there is
3356 		 * no esp_sadb.s_ip_q.
3357 		 */
3358 		sadb_in_acquire(samsg, &esp_sadb, WR(esp_pfkey_q));
3359 	}
3360 
3361 	freemsg(mp);
3362 }
3363 
3364 /*
3365  * ESP module write put routine.
3366  */
3367 static void
3368 ipsecesp_wput(queue_t *q, mblk_t *mp)
3369 {
3370 	ipsec_info_t *ii;
3371 	struct iocblk *iocp;
3372 
3373 	esp3dbg(("In esp_wput().\n"));
3374 
3375 	/* NOTE: Each case must take care of freeing or passing mp. */
3376 	switch (mp->b_datap->db_type) {
3377 	case M_CTL:
3378 		if ((mp->b_wptr - mp->b_rptr) < sizeof (ipsec_info_t)) {
3379 			/* Not big enough message. */
3380 			freemsg(mp);
3381 			break;
3382 		}
3383 		ii = (ipsec_info_t *)mp->b_rptr;
3384 
3385 		switch (ii->ipsec_info_type) {
3386 		case KEYSOCK_OUT_ERR:
3387 			esp1dbg(("Got KEYSOCK_OUT_ERR message.\n"));
3388 			esp_keysock_no_socket(mp);
3389 			break;
3390 		case KEYSOCK_IN:
3391 			ESP_BUMP_STAT(keysock_in);
3392 			esp3dbg(("Got KEYSOCK_IN message.\n"));
3393 
3394 			/* Parse the message. */
3395 			esp_parse_pfkey(mp);
3396 			break;
3397 		case KEYSOCK_HELLO:
3398 			sadb_keysock_hello(&esp_pfkey_q, q, mp,
3399 			    esp_ager, &esp_event, SADB_SATYPE_ESP);
3400 			break;
3401 		default:
3402 			esp2dbg(("Got M_CTL from above of 0x%x.\n",
3403 			    ii->ipsec_info_type));
3404 			freemsg(mp);
3405 			break;
3406 		}
3407 		break;
3408 	case M_IOCTL:
3409 		iocp = (struct iocblk *)mp->b_rptr;
3410 		switch (iocp->ioc_cmd) {
3411 		case ND_SET:
3412 		case ND_GET:
3413 			if (nd_getset(q, ipsecesp_g_nd, mp)) {
3414 				qreply(q, mp);
3415 				return;
3416 			} else {
3417 				iocp->ioc_error = ENOENT;
3418 			}
3419 			/* FALLTHRU */
3420 		default:
3421 			/* We really don't support any other ioctls, do we? */
3422 
3423 			/* Return EINVAL */
3424 			if (iocp->ioc_error != ENOENT)
3425 				iocp->ioc_error = EINVAL;
3426 			iocp->ioc_count = 0;
3427 			mp->b_datap->db_type = M_IOCACK;
3428 			qreply(q, mp);
3429 			return;
3430 		}
3431 	default:
3432 		esp3dbg(("Got default message, type %d, passing to IP.\n",
3433 		    mp->b_datap->db_type));
3434 		putnext(q, mp);
3435 	}
3436 }
3437 
3438 /*
3439  * Process an outbound ESP packet that can be accelerated by a IPsec
3440  * hardware acceleration capable Provider.
3441  * The caller already inserted and initialized the ESP header.
3442  * This function allocates a tagging M_CTL, and adds room at the end
3443  * of the packet to hold the ICV if authentication is needed.
3444  *
3445  * On success returns B_TRUE, on failure returns B_FALSE and frees the
3446  * mblk chain ipsec_out.
3447  */
3448 static ipsec_status_t
3449 esp_outbound_accelerated(mblk_t *ipsec_out, uint_t icv_len)
3450 {
3451 	ipsec_out_t *io;
3452 	mblk_t *lastmp;
3453 
3454 	ESP_BUMP_STAT(out_accelerated);
3455 
3456 	io = (ipsec_out_t *)ipsec_out->b_rptr;
3457 
3458 	/* mark packet as being accelerated in IPSEC_OUT */
3459 	ASSERT(io->ipsec_out_accelerated == B_FALSE);
3460 	io->ipsec_out_accelerated = B_TRUE;
3461 
3462 	/*
3463 	 * add room at the end of the packet for the ICV if needed
3464 	 */
3465 	if (icv_len > 0) {
3466 		/* go to last mblk */
3467 		lastmp = ipsec_out;	/* For following while loop. */
3468 		do {
3469 			lastmp = lastmp->b_cont;
3470 		} while (lastmp->b_cont != NULL);
3471 
3472 		/* if not enough available room, allocate new mblk */
3473 		if ((lastmp->b_wptr + icv_len) > lastmp->b_datap->db_lim) {
3474 			lastmp->b_cont = allocb(icv_len, BPRI_HI);
3475 			if (lastmp->b_cont == NULL) {
3476 				ESP_BUMP_STAT(out_discards);
3477 				ip_drop_packet(ipsec_out, B_FALSE, NULL, NULL,
3478 				    &ipdrops_esp_nomem, &esp_dropper);
3479 				return (IPSEC_STATUS_FAILED);
3480 			}
3481 			lastmp = lastmp->b_cont;
3482 		}
3483 		lastmp->b_wptr += icv_len;
3484 	}
3485 
3486 	return (IPSEC_STATUS_SUCCESS);
3487 }
3488 
3489 /*
3490  * Process an inbound accelerated ESP packet.
3491  * On success returns B_TRUE, on failure returns B_FALSE and frees the
3492  * mblk chain ipsec_in.
3493  */
3494 static ipsec_status_t
3495 esp_inbound_accelerated(mblk_t *ipsec_in, mblk_t *data_mp, boolean_t isv4,
3496     ipsa_t *assoc)
3497 {
3498 	ipsec_in_t *ii;
3499 	mblk_t *hada_mp;
3500 	uint32_t icv_len = 0;
3501 	da_ipsec_t *hada;
3502 	ipha_t *ipha;
3503 	ip6_t *ip6h;
3504 	kstat_named_t *counter;
3505 
3506 	ESP_BUMP_STAT(in_accelerated);
3507 
3508 	ii = (ipsec_in_t *)ipsec_in->b_rptr;
3509 	hada_mp = ii->ipsec_in_da;
3510 	ASSERT(hada_mp != NULL);
3511 	hada = (da_ipsec_t *)hada_mp->b_rptr;
3512 
3513 	/*
3514 	 * We only support one level of decapsulation in hardware, so
3515 	 * nuke the pointer.
3516 	 */
3517 	ii->ipsec_in_da = NULL;
3518 	ii->ipsec_in_accelerated = B_FALSE;
3519 
3520 	if (assoc->ipsa_auth_alg != IPSA_AALG_NONE) {
3521 		/*
3522 		 * ESP with authentication. We expect the Provider to have
3523 		 * computed the ICV and placed it in the hardware acceleration
3524 		 * data attributes.
3525 		 *
3526 		 * Extract ICV length from attributes M_CTL and sanity check
3527 		 * its value. We allow the mblk to be smaller than da_ipsec_t
3528 		 * for a small ICV, as long as the entire ICV fits within the
3529 		 * mblk.
3530 		 *
3531 		 * Also ensures that the ICV length computed by Provider
3532 		 * corresponds to the ICV length of the agorithm specified by
3533 		 * the SA.
3534 		 */
3535 		icv_len = hada->da_icv_len;
3536 		if ((icv_len != assoc->ipsa_mac_len) ||
3537 		    (icv_len > DA_ICV_MAX_LEN) || (MBLKL(hada_mp) <
3538 			(sizeof (da_ipsec_t) - DA_ICV_MAX_LEN + icv_len))) {
3539 			esp0dbg(("esp_inbound_accelerated: "
3540 			    "ICV len (%u) incorrect or mblk too small (%u)\n",
3541 			    icv_len, (uint32_t)(MBLKL(hada_mp))));
3542 			counter = &ipdrops_esp_bad_auth;
3543 			goto esp_in_discard;
3544 		}
3545 	}
3546 
3547 	/* get pointers to IP header */
3548 	if (isv4) {
3549 		ipha = (ipha_t *)data_mp->b_rptr;
3550 	} else {
3551 		ip6h = (ip6_t *)data_mp->b_rptr;
3552 	}
3553 
3554 	/*
3555 	 * Compare ICV in ESP packet vs ICV computed by adapter.
3556 	 * We also remove the ICV from the end of the packet since
3557 	 * it will no longer be needed.
3558 	 *
3559 	 * Assume that esp_inbound() already ensured that the pkt
3560 	 * was in one mblk.
3561 	 */
3562 	ASSERT(data_mp->b_cont == NULL);
3563 	data_mp->b_wptr -= icv_len;
3564 	/* adjust IP header */
3565 	if (isv4)
3566 		ipha->ipha_length = htons(ntohs(ipha->ipha_length) - icv_len);
3567 	else
3568 		ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - icv_len);
3569 	if (icv_len && bcmp(hada->da_icv, data_mp->b_wptr, icv_len)) {
3570 		int af;
3571 		void *addr;
3572 
3573 		if (isv4) {
3574 			addr = &ipha->ipha_dst;
3575 			af = AF_INET;
3576 		} else {
3577 			addr = &ip6h->ip6_dst;
3578 			af = AF_INET6;
3579 		}
3580 
3581 		/*
3582 		 * Log the event. Don't print to the console, block
3583 		 * potential denial-of-service attack.
3584 		 */
3585 		ESP_BUMP_STAT(bad_auth);
3586 		ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN,
3587 		    "ESP Authentication failed spi %x, dst_addr %s",
3588 		    assoc->ipsa_spi, addr, af);
3589 		counter = &ipdrops_esp_bad_auth;
3590 		goto esp_in_discard;
3591 	}
3592 
3593 	esp3dbg(("esp_inbound_accelerated: ESP authentication succeeded, "
3594 	    "checking replay\n"));
3595 
3596 	ipsec_in->b_cont = data_mp;
3597 
3598 	/*
3599 	 * Remove ESP header and padding from packet.
3600 	 */
3601 	if (!esp_strip_header(data_mp, ii->ipsec_in_v4, assoc->ipsa_iv_len,
3602 		&counter)) {
3603 		esp1dbg(("esp_inbound_accelerated: "
3604 		    "esp_strip_header() failed\n"));
3605 		goto esp_in_discard;
3606 	}
3607 
3608 	freeb(hada_mp);
3609 
3610 	/*
3611 	 * Account for usage..
3612 	 */
3613 	if (!esp_age_bytes(assoc, msgdsize(data_mp), B_TRUE)) {
3614 		/* The ipsa has hit hard expiration, LOG and AUDIT. */
3615 		ESP_BUMP_STAT(bytes_expired);
3616 		IP_ESP_BUMP_STAT(in_discards);
3617 		ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN,
3618 		    "ESP association 0x%x, dst %s had bytes expire.\n",
3619 		    assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam);
3620 		ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL,
3621 		    &ipdrops_esp_bytes_expire, &esp_dropper);
3622 		return (IPSEC_STATUS_FAILED);
3623 	}
3624 
3625 	/* done processing the packet */
3626 	return (IPSEC_STATUS_SUCCESS);
3627 
3628 esp_in_discard:
3629 	IP_ESP_BUMP_STAT(in_discards);
3630 	freeb(hada_mp);
3631 
3632 	ipsec_in->b_cont = data_mp;	/* For ip_drop_packet()'s sake... */
3633 	ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL, counter, &esp_dropper);
3634 
3635 	return (IPSEC_STATUS_FAILED);
3636 }
3637 
3638 /*
3639  * Wrapper to allow IP to trigger an ESP association failure message
3640  * during inbound SA selection.
3641  */
3642 void
3643 ipsecesp_in_assocfailure(mblk_t *mp, char level, ushort_t sl, char *fmt,
3644     uint32_t spi, void *addr, int af)
3645 {
3646 	if (ipsecesp_log_unknown_spi) {
3647 		ipsec_assocfailure(info.mi_idnum, 0, level, sl, fmt, spi,
3648 		    addr, af);
3649 	}
3650 
3651 	ip_drop_packet(mp, B_TRUE, NULL, NULL, &ipdrops_esp_no_sa,
3652 	    &esp_dropper);
3653 }
3654 
3655 /*
3656  * Initialize the ESP input and output processing functions.
3657  */
3658 void
3659 ipsecesp_init_funcs(ipsa_t *sa)
3660 {
3661 	if (sa->ipsa_output_func == NULL)
3662 		sa->ipsa_output_func = esp_outbound;
3663 	if (sa->ipsa_input_func == NULL)
3664 		sa->ipsa_input_func = esp_inbound;
3665 }
3666