xref: /titanic_50/usr/src/uts/common/inet/ip/ipsecesp.c (revision 81fd181a33bee65d5be7a49c6093bb13b382b172)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/stream.h>
28 #include <sys/stropts.h>
29 #include <sys/errno.h>
30 #include <sys/strlog.h>
31 #include <sys/tihdr.h>
32 #include <sys/socket.h>
33 #include <sys/ddi.h>
34 #include <sys/sunddi.h>
35 #include <sys/kmem.h>
36 #include <sys/zone.h>
37 #include <sys/sysmacros.h>
38 #include <sys/cmn_err.h>
39 #include <sys/vtrace.h>
40 #include <sys/debug.h>
41 #include <sys/atomic.h>
42 #include <sys/strsun.h>
43 #include <sys/random.h>
44 #include <netinet/in.h>
45 #include <net/if.h>
46 #include <netinet/ip6.h>
47 #include <net/pfkeyv2.h>
48 #include <net/pfpolicy.h>
49 
50 #include <inet/common.h>
51 #include <inet/mi.h>
52 #include <inet/nd.h>
53 #include <inet/ip.h>
54 #include <inet/ip_impl.h>
55 #include <inet/ip6.h>
56 #include <inet/sadb.h>
57 #include <inet/ipsec_info.h>
58 #include <inet/ipsec_impl.h>
59 #include <inet/ipsecesp.h>
60 #include <inet/ipdrop.h>
61 #include <inet/tcp.h>
62 #include <sys/kstat.h>
63 #include <sys/policy.h>
64 #include <sys/strsun.h>
65 #include <sys/strsubr.h>
66 #include <inet/udp_impl.h>
67 #include <sys/taskq.h>
68 #include <sys/note.h>
69 
70 #include <sys/iphada.h>
71 
72 #include <sys/tsol/tnet.h>
73 
74 /*
75  * Table of ND variables supported by ipsecesp. These are loaded into
76  * ipsecesp_g_nd in ipsecesp_init_nd.
77  * All of these are alterable, within the min/max values given, at run time.
78  */
79 static	ipsecespparam_t	lcl_param_arr[] = {
80 	/* min	max			value	name */
81 	{ 0,	3,			0,	"ipsecesp_debug"},
82 	{ 125,	32000, SADB_AGE_INTERVAL_DEFAULT, "ipsecesp_age_interval"},
83 	{ 1,	10,			1,	"ipsecesp_reap_delay"},
84 	{ 1,	SADB_MAX_REPLAY,	64,	"ipsecesp_replay_size"},
85 	{ 1,	300,			15,	"ipsecesp_acquire_timeout"},
86 	{ 1,	1800,			90,	"ipsecesp_larval_timeout"},
87 	/* Default lifetime values for ACQUIRE messages. */
88 	{ 0,	0xffffffffU,	0,	"ipsecesp_default_soft_bytes"},
89 	{ 0,	0xffffffffU,	0,	"ipsecesp_default_hard_bytes"},
90 	{ 0,	0xffffffffU,	24000,	"ipsecesp_default_soft_addtime"},
91 	{ 0,	0xffffffffU,	28800,	"ipsecesp_default_hard_addtime"},
92 	{ 0,	0xffffffffU,	0,	"ipsecesp_default_soft_usetime"},
93 	{ 0,	0xffffffffU,	0,	"ipsecesp_default_hard_usetime"},
94 	{ 0,	1,		0,	"ipsecesp_log_unknown_spi"},
95 	{ 0,	2,		1,	"ipsecesp_padding_check"},
96 	{ 0,	600,		20,	"ipsecesp_nat_keepalive_interval"},
97 };
98 #define	ipsecesp_debug	ipsecesp_params[0].ipsecesp_param_value
99 #define	ipsecesp_age_interval ipsecesp_params[1].ipsecesp_param_value
100 #define	ipsecesp_age_int_max	ipsecesp_params[1].ipsecesp_param_max
101 #define	ipsecesp_reap_delay	ipsecesp_params[2].ipsecesp_param_value
102 #define	ipsecesp_replay_size	ipsecesp_params[3].ipsecesp_param_value
103 #define	ipsecesp_acquire_timeout	\
104 	ipsecesp_params[4].ipsecesp_param_value
105 #define	ipsecesp_larval_timeout	\
106 	ipsecesp_params[5].ipsecesp_param_value
107 #define	ipsecesp_default_soft_bytes	\
108 	ipsecesp_params[6].ipsecesp_param_value
109 #define	ipsecesp_default_hard_bytes	\
110 	ipsecesp_params[7].ipsecesp_param_value
111 #define	ipsecesp_default_soft_addtime	\
112 	ipsecesp_params[8].ipsecesp_param_value
113 #define	ipsecesp_default_hard_addtime	\
114 	ipsecesp_params[9].ipsecesp_param_value
115 #define	ipsecesp_default_soft_usetime	\
116 	ipsecesp_params[10].ipsecesp_param_value
117 #define	ipsecesp_default_hard_usetime	\
118 	ipsecesp_params[11].ipsecesp_param_value
119 #define	ipsecesp_log_unknown_spi	\
120 	ipsecesp_params[12].ipsecesp_param_value
121 #define	ipsecesp_padding_check	\
122 	ipsecesp_params[13].ipsecesp_param_value
123 /* For ipsecesp_nat_keepalive_interval, see ipsecesp.h. */
124 
125 #define	esp0dbg(a)	printf a
126 /* NOTE:  != 0 instead of > 0 so lint doesn't complain. */
127 #define	esp1dbg(espstack, a)	if (espstack->ipsecesp_debug != 0) printf a
128 #define	esp2dbg(espstack, a)	if (espstack->ipsecesp_debug > 1) printf a
129 #define	esp3dbg(espstack, a)	if (espstack->ipsecesp_debug > 2) printf a
130 
131 static int ipsecesp_open(queue_t *, dev_t *, int, int, cred_t *);
132 static int ipsecesp_close(queue_t *);
133 static void ipsecesp_rput(queue_t *, mblk_t *);
134 static void ipsecesp_wput(queue_t *, mblk_t *);
135 static void	*ipsecesp_stack_init(netstackid_t stackid, netstack_t *ns);
136 static void	ipsecesp_stack_fini(netstackid_t stackid, void *arg);
137 static void esp_send_acquire(ipsacq_t *, mblk_t *, netstack_t *);
138 
139 static void esp_prepare_udp(netstack_t *, mblk_t *, ipha_t *);
140 static ipsec_status_t esp_outbound_accelerated(mblk_t *, uint_t);
141 static ipsec_status_t esp_inbound_accelerated(mblk_t *, mblk_t *,
142     boolean_t, ipsa_t *);
143 
144 static boolean_t esp_register_out(uint32_t, uint32_t, uint_t,
145     ipsecesp_stack_t *, mblk_t *);
146 static boolean_t esp_strip_header(mblk_t *, boolean_t, uint32_t,
147     kstat_named_t **, ipsecesp_stack_t *);
148 static ipsec_status_t esp_submit_req_inbound(mblk_t *, ipsa_t *, uint_t);
149 static ipsec_status_t esp_submit_req_outbound(mblk_t *, ipsa_t *, uchar_t *,
150     uint_t);
151 extern void (*cl_inet_getspi)(netstackid_t, uint8_t, uint8_t *, size_t,
152     void *);
153 
154 /* Setable in /etc/system */
155 uint32_t esp_hash_size = IPSEC_DEFAULT_HASH_SIZE;
156 
157 static struct module_info info = {
158 	5137, "ipsecesp", 0, INFPSZ, 65536, 1024
159 };
160 
161 static struct qinit rinit = {
162 	(pfi_t)ipsecesp_rput, NULL, ipsecesp_open, ipsecesp_close, NULL, &info,
163 	NULL
164 };
165 
166 static struct qinit winit = {
167 	(pfi_t)ipsecesp_wput, NULL, ipsecesp_open, ipsecesp_close, NULL, &info,
168 	NULL
169 };
170 
171 struct streamtab ipsecespinfo = {
172 	&rinit, &winit, NULL, NULL
173 };
174 
175 static taskq_t *esp_taskq;
176 
177 /*
178  * OTOH, this one is set at open/close, and I'm D_MTQPAIR for now.
179  *
180  * Question:	Do I need this, given that all instance's esps->esps_wq point
181  *		to IP?
182  *
183  * Answer:	Yes, because I need to know which queue is BOUND to
184  *		IPPROTO_ESP
185  */
186 
187 /*
188  * Stats.  This may eventually become a full-blown SNMP MIB once that spec
189  * stabilizes.
190  */
191 
192 typedef struct esp_kstats_s {
193 	kstat_named_t esp_stat_num_aalgs;
194 	kstat_named_t esp_stat_good_auth;
195 	kstat_named_t esp_stat_bad_auth;
196 	kstat_named_t esp_stat_bad_padding;
197 	kstat_named_t esp_stat_replay_failures;
198 	kstat_named_t esp_stat_replay_early_failures;
199 	kstat_named_t esp_stat_keysock_in;
200 	kstat_named_t esp_stat_out_requests;
201 	kstat_named_t esp_stat_acquire_requests;
202 	kstat_named_t esp_stat_bytes_expired;
203 	kstat_named_t esp_stat_out_discards;
204 	kstat_named_t esp_stat_in_accelerated;
205 	kstat_named_t esp_stat_out_accelerated;
206 	kstat_named_t esp_stat_noaccel;
207 	kstat_named_t esp_stat_crypto_sync;
208 	kstat_named_t esp_stat_crypto_async;
209 	kstat_named_t esp_stat_crypto_failures;
210 	kstat_named_t esp_stat_num_ealgs;
211 	kstat_named_t esp_stat_bad_decrypt;
212 	kstat_named_t esp_stat_sa_port_renumbers;
213 } esp_kstats_t;
214 
215 /*
216  * espstack->esp_kstats is equal to espstack->esp_ksp->ks_data if
217  * kstat_create_netstack for espstack->esp_ksp succeeds, but when it
218  * fails, it will be NULL. Note this is done for all stack instances,
219  * so it *could* fail. hence a non-NULL checking is done for
220  * ESP_BUMP_STAT and ESP_DEBUMP_STAT
221  */
222 #define	ESP_BUMP_STAT(espstack, x)					\
223 do {									\
224 	if (espstack->esp_kstats != NULL)				\
225 		(espstack->esp_kstats->esp_stat_ ## x).value.ui64++;	\
226 _NOTE(CONSTCOND)							\
227 } while (0)
228 
229 #define	ESP_DEBUMP_STAT(espstack, x)					\
230 do {									\
231 	if (espstack->esp_kstats != NULL)				\
232 		(espstack->esp_kstats->esp_stat_ ## x).value.ui64--;	\
233 _NOTE(CONSTCOND)							\
234 } while (0)
235 
236 static int	esp_kstat_update(kstat_t *, int);
237 
238 static boolean_t
239 esp_kstat_init(ipsecesp_stack_t *espstack, netstackid_t stackid)
240 {
241 	espstack->esp_ksp = kstat_create_netstack("ipsecesp", 0, "esp_stat",
242 	    "net", KSTAT_TYPE_NAMED,
243 	    sizeof (esp_kstats_t) / sizeof (kstat_named_t),
244 	    KSTAT_FLAG_PERSISTENT, stackid);
245 
246 	if (espstack->esp_ksp == NULL || espstack->esp_ksp->ks_data == NULL)
247 		return (B_FALSE);
248 
249 	espstack->esp_kstats = espstack->esp_ksp->ks_data;
250 
251 	espstack->esp_ksp->ks_update = esp_kstat_update;
252 	espstack->esp_ksp->ks_private = (void *)(uintptr_t)stackid;
253 
254 #define	K64 KSTAT_DATA_UINT64
255 #define	KI(x) kstat_named_init(&(espstack->esp_kstats->esp_stat_##x), #x, K64)
256 
257 	KI(num_aalgs);
258 	KI(num_ealgs);
259 	KI(good_auth);
260 	KI(bad_auth);
261 	KI(bad_padding);
262 	KI(replay_failures);
263 	KI(replay_early_failures);
264 	KI(keysock_in);
265 	KI(out_requests);
266 	KI(acquire_requests);
267 	KI(bytes_expired);
268 	KI(out_discards);
269 	KI(in_accelerated);
270 	KI(out_accelerated);
271 	KI(noaccel);
272 	KI(crypto_sync);
273 	KI(crypto_async);
274 	KI(crypto_failures);
275 	KI(bad_decrypt);
276 	KI(sa_port_renumbers);
277 
278 #undef KI
279 #undef K64
280 
281 	kstat_install(espstack->esp_ksp);
282 
283 	return (B_TRUE);
284 }
285 
286 static int
287 esp_kstat_update(kstat_t *kp, int rw)
288 {
289 	esp_kstats_t *ekp;
290 	netstackid_t	stackid = (zoneid_t)(uintptr_t)kp->ks_private;
291 	netstack_t	*ns;
292 	ipsec_stack_t	*ipss;
293 
294 	if ((kp == NULL) || (kp->ks_data == NULL))
295 		return (EIO);
296 
297 	if (rw == KSTAT_WRITE)
298 		return (EACCES);
299 
300 	ns = netstack_find_by_stackid(stackid);
301 	if (ns == NULL)
302 		return (-1);
303 	ipss = ns->netstack_ipsec;
304 	if (ipss == NULL) {
305 		netstack_rele(ns);
306 		return (-1);
307 	}
308 	ekp = (esp_kstats_t *)kp->ks_data;
309 
310 	mutex_enter(&ipss->ipsec_alg_lock);
311 	ekp->esp_stat_num_aalgs.value.ui64 =
312 	    ipss->ipsec_nalgs[IPSEC_ALG_AUTH];
313 	ekp->esp_stat_num_ealgs.value.ui64 =
314 	    ipss->ipsec_nalgs[IPSEC_ALG_ENCR];
315 	mutex_exit(&ipss->ipsec_alg_lock);
316 
317 	netstack_rele(ns);
318 	return (0);
319 }
320 
321 #ifdef DEBUG
322 /*
323  * Debug routine, useful to see pre-encryption data.
324  */
325 static char *
326 dump_msg(mblk_t *mp)
327 {
328 	char tmp_str[3], tmp_line[256];
329 
330 	while (mp != NULL) {
331 		unsigned char *ptr;
332 
333 		printf("mblk address 0x%p, length %ld, db_ref %d "
334 		    "type %d, base 0x%p, lim 0x%p\n",
335 		    (void *) mp, (long)(mp->b_wptr - mp->b_rptr),
336 		    mp->b_datap->db_ref, mp->b_datap->db_type,
337 		    (void *)mp->b_datap->db_base, (void *)mp->b_datap->db_lim);
338 		ptr = mp->b_rptr;
339 
340 		tmp_line[0] = '\0';
341 		while (ptr < mp->b_wptr) {
342 			uint_t diff;
343 
344 			diff = (ptr - mp->b_rptr);
345 			if (!(diff & 0x1f)) {
346 				if (strlen(tmp_line) > 0) {
347 					printf("bytes: %s\n", tmp_line);
348 					tmp_line[0] = '\0';
349 				}
350 			}
351 			if (!(diff & 0x3))
352 				(void) strcat(tmp_line, " ");
353 			(void) sprintf(tmp_str, "%02x", *ptr);
354 			(void) strcat(tmp_line, tmp_str);
355 			ptr++;
356 		}
357 		if (strlen(tmp_line) > 0)
358 			printf("bytes: %s\n", tmp_line);
359 
360 		mp = mp->b_cont;
361 	}
362 
363 	return ("\n");
364 }
365 
366 #else /* DEBUG */
367 static char *
368 dump_msg(mblk_t *mp)
369 {
370 	printf("Find value of mp %p.\n", mp);
371 	return ("\n");
372 }
373 #endif /* DEBUG */
374 
375 /*
376  * Don't have to lock age_interval, as only one thread will access it at
377  * a time, because I control the one function that does with timeout().
378  */
379 static void
380 esp_ager(void *arg)
381 {
382 	ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)arg;
383 	netstack_t	*ns = espstack->ipsecesp_netstack;
384 	hrtime_t begin = gethrtime();
385 
386 	sadb_ager(&espstack->esp_sadb.s_v4, espstack->esp_pfkey_q,
387 	    espstack->esp_sadb.s_ip_q, espstack->ipsecesp_reap_delay, ns);
388 	sadb_ager(&espstack->esp_sadb.s_v6, espstack->esp_pfkey_q,
389 	    espstack->esp_sadb.s_ip_q, espstack->ipsecesp_reap_delay, ns);
390 
391 	espstack->esp_event = sadb_retimeout(begin, espstack->esp_pfkey_q,
392 	    esp_ager, espstack,
393 	    &espstack->ipsecesp_age_interval, espstack->ipsecesp_age_int_max,
394 	    info.mi_idnum);
395 }
396 
397 /*
398  * Get an ESP NDD parameter.
399  */
400 /* ARGSUSED */
401 static int
402 ipsecesp_param_get(q, mp, cp, cr)
403 	queue_t	*q;
404 	mblk_t	*mp;
405 	caddr_t	cp;
406 	cred_t *cr;
407 {
408 	ipsecespparam_t	*ipsecesppa = (ipsecespparam_t *)cp;
409 	uint_t value;
410 	ipsecesp_stack_t	*espstack = (ipsecesp_stack_t *)q->q_ptr;
411 
412 	mutex_enter(&espstack->ipsecesp_param_lock);
413 	value = ipsecesppa->ipsecesp_param_value;
414 	mutex_exit(&espstack->ipsecesp_param_lock);
415 
416 	(void) mi_mpprintf(mp, "%u", value);
417 	return (0);
418 }
419 
420 /*
421  * This routine sets an NDD variable in a ipsecespparam_t structure.
422  */
423 /* ARGSUSED */
424 static int
425 ipsecesp_param_set(q, mp, value, cp, cr)
426 	queue_t	*q;
427 	mblk_t	*mp;
428 	char	*value;
429 	caddr_t	cp;
430 	cred_t *cr;
431 {
432 	ulong_t	new_value;
433 	ipsecespparam_t	*ipsecesppa = (ipsecespparam_t *)cp;
434 	ipsecesp_stack_t	*espstack = (ipsecesp_stack_t *)q->q_ptr;
435 
436 	/*
437 	 * Fail the request if the new value does not lie within the
438 	 * required bounds.
439 	 */
440 	if (ddi_strtoul(value, NULL, 10, &new_value) != 0 ||
441 	    new_value < ipsecesppa->ipsecesp_param_min ||
442 	    new_value > ipsecesppa->ipsecesp_param_max) {
443 		return (EINVAL);
444 	}
445 
446 	/* Set the new value */
447 	mutex_enter(&espstack->ipsecesp_param_lock);
448 	ipsecesppa->ipsecesp_param_value = new_value;
449 	mutex_exit(&espstack->ipsecesp_param_lock);
450 	return (0);
451 }
452 
453 /*
454  * Using lifetime NDD variables, fill in an extended combination's
455  * lifetime information.
456  */
457 void
458 ipsecesp_fill_defs(sadb_x_ecomb_t *ecomb, netstack_t *ns)
459 {
460 	ipsecesp_stack_t	*espstack = ns->netstack_ipsecesp;
461 
462 	ecomb->sadb_x_ecomb_soft_bytes = espstack->ipsecesp_default_soft_bytes;
463 	ecomb->sadb_x_ecomb_hard_bytes = espstack->ipsecesp_default_hard_bytes;
464 	ecomb->sadb_x_ecomb_soft_addtime =
465 	    espstack->ipsecesp_default_soft_addtime;
466 	ecomb->sadb_x_ecomb_hard_addtime =
467 	    espstack->ipsecesp_default_hard_addtime;
468 	ecomb->sadb_x_ecomb_soft_usetime =
469 	    espstack->ipsecesp_default_soft_usetime;
470 	ecomb->sadb_x_ecomb_hard_usetime =
471 	    espstack->ipsecesp_default_hard_usetime;
472 }
473 
474 /*
475  * Initialize things for ESP at module load time.
476  */
477 boolean_t
478 ipsecesp_ddi_init(void)
479 {
480 	esp_taskq = taskq_create("esp_taskq", 1, minclsyspri,
481 	    IPSEC_TASKQ_MIN, IPSEC_TASKQ_MAX, 0);
482 
483 	/*
484 	 * We want to be informed each time a stack is created or
485 	 * destroyed in the kernel, so we can maintain the
486 	 * set of ipsecesp_stack_t's.
487 	 */
488 	netstack_register(NS_IPSECESP, ipsecesp_stack_init, NULL,
489 	    ipsecesp_stack_fini);
490 
491 	return (B_TRUE);
492 }
493 
494 /*
495  * Walk through the param array specified registering each element with the
496  * named dispatch handler.
497  */
498 static boolean_t
499 ipsecesp_param_register(IDP *ndp, ipsecespparam_t *espp, int cnt)
500 {
501 	for (; cnt-- > 0; espp++) {
502 		if (espp->ipsecesp_param_name != NULL &&
503 		    espp->ipsecesp_param_name[0]) {
504 			if (!nd_load(ndp,
505 			    espp->ipsecesp_param_name,
506 			    ipsecesp_param_get, ipsecesp_param_set,
507 			    (caddr_t)espp)) {
508 				nd_free(ndp);
509 				return (B_FALSE);
510 			}
511 		}
512 	}
513 	return (B_TRUE);
514 }
515 /*
516  * Initialize things for ESP for each stack instance
517  */
518 static void *
519 ipsecesp_stack_init(netstackid_t stackid, netstack_t *ns)
520 {
521 	ipsecesp_stack_t	*espstack;
522 	ipsecespparam_t		*espp;
523 
524 	espstack = (ipsecesp_stack_t *)kmem_zalloc(sizeof (*espstack),
525 	    KM_SLEEP);
526 	espstack->ipsecesp_netstack = ns;
527 
528 	espp = (ipsecespparam_t *)kmem_alloc(sizeof (lcl_param_arr), KM_SLEEP);
529 	espstack->ipsecesp_params = espp;
530 	bcopy(lcl_param_arr, espp, sizeof (lcl_param_arr));
531 
532 	(void) ipsecesp_param_register(&espstack->ipsecesp_g_nd, espp,
533 	    A_CNT(lcl_param_arr));
534 
535 	(void) esp_kstat_init(espstack, stackid);
536 
537 	espstack->esp_sadb.s_acquire_timeout =
538 	    &espstack->ipsecesp_acquire_timeout;
539 	espstack->esp_sadb.s_acqfn = esp_send_acquire;
540 	sadbp_init("ESP", &espstack->esp_sadb, SADB_SATYPE_ESP, esp_hash_size,
541 	    espstack->ipsecesp_netstack);
542 
543 	mutex_init(&espstack->ipsecesp_param_lock, NULL, MUTEX_DEFAULT, 0);
544 
545 	ip_drop_register(&espstack->esp_dropper, "IPsec ESP");
546 	return (espstack);
547 }
548 
549 /*
550  * Destroy things for ESP at module unload time.
551  */
552 void
553 ipsecesp_ddi_destroy(void)
554 {
555 	netstack_unregister(NS_IPSECESP);
556 	taskq_destroy(esp_taskq);
557 }
558 
559 /*
560  * Destroy things for ESP for one stack instance
561  */
562 static void
563 ipsecesp_stack_fini(netstackid_t stackid, void *arg)
564 {
565 	ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)arg;
566 
567 	if (espstack->esp_pfkey_q != NULL) {
568 		(void) quntimeout(espstack->esp_pfkey_q, espstack->esp_event);
569 	}
570 	espstack->esp_sadb.s_acqfn = NULL;
571 	espstack->esp_sadb.s_acquire_timeout = NULL;
572 	sadbp_destroy(&espstack->esp_sadb, espstack->ipsecesp_netstack);
573 	ip_drop_unregister(&espstack->esp_dropper);
574 	mutex_destroy(&espstack->ipsecesp_param_lock);
575 	nd_free(&espstack->ipsecesp_g_nd);
576 
577 	kmem_free(espstack->ipsecesp_params, sizeof (lcl_param_arr));
578 	espstack->ipsecesp_params = NULL;
579 	kstat_delete_netstack(espstack->esp_ksp, stackid);
580 	espstack->esp_ksp = NULL;
581 	espstack->esp_kstats = NULL;
582 	kmem_free(espstack, sizeof (*espstack));
583 }
584 
585 /*
586  * ESP module open routine.
587  */
588 /* ARGSUSED */
589 static int
590 ipsecesp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
591 {
592 	netstack_t		*ns;
593 	ipsecesp_stack_t	*espstack;
594 
595 	if (secpolicy_ip_config(credp, B_FALSE) != 0)
596 		return (EPERM);
597 
598 	if (q->q_ptr != NULL)
599 		return (0);  /* Re-open of an already open instance. */
600 
601 	if (sflag != MODOPEN)
602 		return (EINVAL);
603 
604 	ns = netstack_find_by_cred(credp);
605 	ASSERT(ns != NULL);
606 	espstack = ns->netstack_ipsecesp;
607 	ASSERT(espstack != NULL);
608 
609 	/*
610 	 * ASSUMPTIONS (because I'm MT_OCEXCL):
611 	 *
612 	 *	* I'm being pushed on top of IP for all my opens (incl. #1).
613 	 *	* Only ipsecesp_open() can write into esp_sadb.s_ip_q.
614 	 *	* Because of this, I can check lazily for esp_sadb.s_ip_q.
615 	 *
616 	 *  If these assumptions are wrong, I'm in BIG trouble...
617 	 */
618 
619 	q->q_ptr = espstack;
620 	WR(q)->q_ptr = q->q_ptr;
621 
622 	if (espstack->esp_sadb.s_ip_q == NULL) {
623 		struct T_unbind_req *tur;
624 
625 		espstack->esp_sadb.s_ip_q = WR(q);
626 		/* Allocate an unbind... */
627 		espstack->esp_ip_unbind = allocb(sizeof (struct T_unbind_req),
628 		    BPRI_HI);
629 
630 		/*
631 		 * Send down T_BIND_REQ to bind IPPROTO_ESP.
632 		 * Handle the ACK here in ESP.
633 		 */
634 		qprocson(q);
635 		if (espstack->esp_ip_unbind == NULL ||
636 		    !sadb_t_bind_req(espstack->esp_sadb.s_ip_q, IPPROTO_ESP)) {
637 			if (espstack->esp_ip_unbind != NULL) {
638 				freeb(espstack->esp_ip_unbind);
639 				espstack->esp_ip_unbind = NULL;
640 			}
641 			q->q_ptr = NULL;
642 			netstack_rele(espstack->ipsecesp_netstack);
643 			return (ENOMEM);
644 		}
645 
646 		espstack->esp_ip_unbind->b_datap->db_type = M_PROTO;
647 		tur = (struct T_unbind_req *)espstack->esp_ip_unbind->b_rptr;
648 		tur->PRIM_type = T_UNBIND_REQ;
649 	} else {
650 		qprocson(q);
651 	}
652 
653 	/*
654 	 * For now, there's not much I can do.  I'll be getting a message
655 	 * passed down to me from keysock (in my wput), and a T_BIND_ACK
656 	 * up from IP (in my rput).
657 	 */
658 
659 	return (0);
660 }
661 
662 /*
663  * ESP module close routine.
664  */
665 static int
666 ipsecesp_close(queue_t *q)
667 {
668 	ipsecesp_stack_t	*espstack = (ipsecesp_stack_t *)q->q_ptr;
669 
670 	/*
671 	 * If esp_sadb.s_ip_q is attached to this instance, send a
672 	 * T_UNBIND_REQ to IP for the instance before doing
673 	 * a qprocsoff().
674 	 */
675 	if (WR(q) == espstack->esp_sadb.s_ip_q &&
676 	    espstack->esp_ip_unbind != NULL) {
677 		putnext(WR(q), espstack->esp_ip_unbind);
678 		espstack->esp_ip_unbind = NULL;
679 	}
680 
681 	/*
682 	 * Clean up q_ptr, if needed.
683 	 */
684 	qprocsoff(q);
685 
686 	/* Keysock queue check is safe, because of OCEXCL perimeter. */
687 
688 	if (q == espstack->esp_pfkey_q) {
689 		esp1dbg(espstack,
690 		    ("ipsecesp_close:  Ummm... keysock is closing ESP.\n"));
691 		espstack->esp_pfkey_q = NULL;
692 		/* Detach qtimeouts. */
693 		(void) quntimeout(q, espstack->esp_event);
694 	}
695 
696 	if (WR(q) == espstack->esp_sadb.s_ip_q) {
697 		/*
698 		 * If the esp_sadb.s_ip_q is attached to this instance, find
699 		 * another.  The OCEXCL outer perimeter helps us here.
700 		 */
701 		espstack->esp_sadb.s_ip_q = NULL;
702 
703 		/*
704 		 * Find a replacement queue for esp_sadb.s_ip_q.
705 		 */
706 		if (espstack->esp_pfkey_q != NULL &&
707 		    espstack->esp_pfkey_q != RD(q)) {
708 			/*
709 			 * See if we can use the pfkey_q.
710 			 */
711 			espstack->esp_sadb.s_ip_q = WR(espstack->esp_pfkey_q);
712 		}
713 
714 		if (espstack->esp_sadb.s_ip_q == NULL ||
715 		    !sadb_t_bind_req(espstack->esp_sadb.s_ip_q, IPPROTO_ESP)) {
716 			esp1dbg(espstack, ("ipsecesp: Can't reassign ip_q.\n"));
717 			espstack->esp_sadb.s_ip_q = NULL;
718 		} else {
719 			espstack->esp_ip_unbind =
720 			    allocb(sizeof (struct T_unbind_req), BPRI_HI);
721 
722 			if (espstack->esp_ip_unbind != NULL) {
723 				struct T_unbind_req *tur;
724 
725 				espstack->esp_ip_unbind->b_datap->db_type =
726 				    M_PROTO;
727 				tur = (struct T_unbind_req *)
728 				    espstack->esp_ip_unbind->b_rptr;
729 				tur->PRIM_type = T_UNBIND_REQ;
730 			}
731 			/* If it's NULL, I can't do much here. */
732 		}
733 	}
734 
735 	netstack_rele(espstack->ipsecesp_netstack);
736 	return (0);
737 }
738 
739 /*
740  * Add a number of bytes to what the SA has protected so far.  Return
741  * B_TRUE if the SA can still protect that many bytes.
742  *
743  * Caller must REFRELE the passed-in assoc.  This function must REFRELE
744  * any obtained peer SA.
745  */
746 static boolean_t
747 esp_age_bytes(ipsa_t *assoc, uint64_t bytes, boolean_t inbound)
748 {
749 	ipsa_t *inassoc, *outassoc;
750 	isaf_t *bucket;
751 	boolean_t inrc, outrc, isv6;
752 	sadb_t *sp;
753 	int outhash;
754 	netstack_t		*ns = assoc->ipsa_netstack;
755 	ipsecesp_stack_t	*espstack = ns->netstack_ipsecesp;
756 
757 	/* No peer?  No problem! */
758 	if (!assoc->ipsa_haspeer) {
759 		return (sadb_age_bytes(espstack->esp_pfkey_q, assoc, bytes,
760 		    B_TRUE));
761 	}
762 
763 	/*
764 	 * Otherwise, we want to grab both the original assoc and its peer.
765 	 * There might be a race for this, but if it's a real race, two
766 	 * expire messages may occur.  We limit this by only sending the
767 	 * expire message on one of the peers, we'll pick the inbound
768 	 * arbitrarily.
769 	 *
770 	 * If we need tight synchronization on the peer SA, then we need to
771 	 * reconsider.
772 	 */
773 
774 	/* Use address length to select IPv6/IPv4 */
775 	isv6 = (assoc->ipsa_addrfam == AF_INET6);
776 	sp = isv6 ? &espstack->esp_sadb.s_v6 : &espstack->esp_sadb.s_v4;
777 
778 	if (inbound) {
779 		inassoc = assoc;
780 		if (isv6) {
781 			outhash = OUTBOUND_HASH_V6(sp, *((in6_addr_t *)
782 			    &inassoc->ipsa_dstaddr));
783 		} else {
784 			outhash = OUTBOUND_HASH_V4(sp, *((ipaddr_t *)
785 			    &inassoc->ipsa_dstaddr));
786 		}
787 		bucket = &sp->sdb_of[outhash];
788 		mutex_enter(&bucket->isaf_lock);
789 		outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi,
790 		    inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr,
791 		    inassoc->ipsa_addrfam);
792 		mutex_exit(&bucket->isaf_lock);
793 		if (outassoc == NULL) {
794 			/* Q: Do we wish to set haspeer == B_FALSE? */
795 			esp0dbg(("esp_age_bytes: "
796 			    "can't find peer for inbound.\n"));
797 			return (sadb_age_bytes(espstack->esp_pfkey_q, inassoc,
798 			    bytes, B_TRUE));
799 		}
800 	} else {
801 		outassoc = assoc;
802 		bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi);
803 		mutex_enter(&bucket->isaf_lock);
804 		inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi,
805 		    outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr,
806 		    outassoc->ipsa_addrfam);
807 		mutex_exit(&bucket->isaf_lock);
808 		if (inassoc == NULL) {
809 			/* Q: Do we wish to set haspeer == B_FALSE? */
810 			esp0dbg(("esp_age_bytes: "
811 			    "can't find peer for outbound.\n"));
812 			return (sadb_age_bytes(espstack->esp_pfkey_q, outassoc,
813 			    bytes, B_TRUE));
814 		}
815 	}
816 
817 	inrc = sadb_age_bytes(espstack->esp_pfkey_q, inassoc, bytes, B_TRUE);
818 	outrc = sadb_age_bytes(espstack->esp_pfkey_q, outassoc, bytes, B_FALSE);
819 
820 	/*
821 	 * REFRELE any peer SA.
822 	 *
823 	 * Because of the multi-line macro nature of IPSA_REFRELE, keep
824 	 * them in { }.
825 	 */
826 	if (inbound) {
827 		IPSA_REFRELE(outassoc);
828 	} else {
829 		IPSA_REFRELE(inassoc);
830 	}
831 
832 	return (inrc && outrc);
833 }
834 
835 /*
836  * Do incoming NAT-T manipulations for packet.
837  */
838 static ipsec_status_t
839 esp_fix_natt_checksums(mblk_t *data_mp, ipsa_t *assoc)
840 {
841 	ipha_t *ipha = (ipha_t *)data_mp->b_rptr;
842 	tcpha_t *tcph;
843 	udpha_t *udpha;
844 	/* Initialize to our inbound cksum adjustment... */
845 	uint32_t sum = assoc->ipsa_inbound_cksum;
846 
847 	switch (ipha->ipha_protocol) {
848 	case IPPROTO_TCP:
849 		tcph = (tcpha_t *)(data_mp->b_rptr +
850 		    IPH_HDR_LENGTH(ipha));
851 
852 #define	DOWN_SUM(x) (x) = ((x) & 0xFFFF) +	 ((x) >> 16)
853 		sum += ~ntohs(tcph->tha_sum) & 0xFFFF;
854 		DOWN_SUM(sum);
855 		DOWN_SUM(sum);
856 		tcph->tha_sum = ~htons(sum);
857 		break;
858 	case IPPROTO_UDP:
859 		udpha = (udpha_t *)(data_mp->b_rptr + IPH_HDR_LENGTH(ipha));
860 
861 		if (udpha->uha_checksum != 0) {
862 			/* Adujst if the inbound one was not zero. */
863 			sum += ~ntohs(udpha->uha_checksum) & 0xFFFF;
864 			DOWN_SUM(sum);
865 			DOWN_SUM(sum);
866 			udpha->uha_checksum = ~htons(sum);
867 			if (udpha->uha_checksum == 0)
868 				udpha->uha_checksum = 0xFFFF;
869 		}
870 #undef DOWN_SUM
871 		break;
872 	case IPPROTO_IP:
873 		/*
874 		 * This case is only an issue for self-encapsulated
875 		 * packets.  So for now, fall through.
876 		 */
877 		break;
878 	}
879 	return (IPSEC_STATUS_SUCCESS);
880 }
881 
882 
883 /*
884  * Strip ESP header, check padding, and fix IP header.
885  * Returns B_TRUE on success, B_FALSE if an error occured.
886  */
887 static boolean_t
888 esp_strip_header(mblk_t *data_mp, boolean_t isv4, uint32_t ivlen,
889     kstat_named_t **counter, ipsecesp_stack_t *espstack)
890 {
891 	ipha_t *ipha;
892 	ip6_t *ip6h;
893 	uint_t divpoint;
894 	mblk_t *scratch;
895 	uint8_t nexthdr, padlen;
896 	uint8_t lastpad;
897 	ipsec_stack_t	*ipss = espstack->ipsecesp_netstack->netstack_ipsec;
898 	uint8_t *lastbyte;
899 
900 	/*
901 	 * Strip ESP data and fix IP header.
902 	 *
903 	 * XXX In case the beginning of esp_inbound() changes to not do a
904 	 * pullup, this part of the code can remain unchanged.
905 	 */
906 	if (isv4) {
907 		ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ipha_t));
908 		ipha = (ipha_t *)data_mp->b_rptr;
909 		ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (esph_t) +
910 		    IPH_HDR_LENGTH(ipha));
911 		divpoint = IPH_HDR_LENGTH(ipha);
912 	} else {
913 		ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ip6_t));
914 		ip6h = (ip6_t *)data_mp->b_rptr;
915 		divpoint = ip_hdr_length_v6(data_mp, ip6h);
916 	}
917 
918 	scratch = data_mp;
919 	while (scratch->b_cont != NULL)
920 		scratch = scratch->b_cont;
921 
922 	ASSERT((scratch->b_wptr - scratch->b_rptr) >= 3);
923 
924 	/*
925 	 * "Next header" and padding length are the last two bytes in the
926 	 * ESP-protected datagram, thus the explicit - 1 and - 2.
927 	 * lastpad is the last byte of the padding, which can be used for
928 	 * a quick check to see if the padding is correct.
929 	 */
930 	lastbyte = scratch->b_wptr - 1;
931 	nexthdr = *lastbyte--;
932 	padlen = *lastbyte--;
933 
934 	if (isv4) {
935 		/* Fix part of the IP header. */
936 		ipha->ipha_protocol = nexthdr;
937 		/*
938 		 * Reality check the padlen.  The explicit - 2 is for the
939 		 * padding length and the next-header bytes.
940 		 */
941 		if (padlen >= ntohs(ipha->ipha_length) - sizeof (ipha_t) - 2 -
942 		    sizeof (esph_t) - ivlen) {
943 			ESP_BUMP_STAT(espstack, bad_decrypt);
944 			ipsec_rl_strlog(espstack->ipsecesp_netstack,
945 			    info.mi_idnum, 0, 0,
946 			    SL_ERROR | SL_WARN,
947 			    "Corrupt ESP packet (padlen too big).\n");
948 			esp1dbg(espstack, ("padlen (%d) is greater than:\n",
949 			    padlen));
950 			esp1dbg(espstack, ("pkt len(%d) - ip hdr - esp "
951 			    "hdr - ivlen(%d) = %d.\n",
952 			    ntohs(ipha->ipha_length), ivlen,
953 			    (int)(ntohs(ipha->ipha_length) - sizeof (ipha_t) -
954 			    2 - sizeof (esph_t) - ivlen)));
955 			*counter = DROPPER(ipss, ipds_esp_bad_padlen);
956 			return (B_FALSE);
957 		}
958 
959 		/*
960 		 * Fix the rest of the header.  The explicit - 2 is for the
961 		 * padding length and the next-header bytes.
962 		 */
963 		ipha->ipha_length = htons(ntohs(ipha->ipha_length) - padlen -
964 		    2 - sizeof (esph_t) - ivlen);
965 		ipha->ipha_hdr_checksum = 0;
966 		ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha);
967 	} else {
968 		if (ip6h->ip6_nxt == IPPROTO_ESP) {
969 			ip6h->ip6_nxt = nexthdr;
970 		} else {
971 			ip6_pkt_t ipp;
972 
973 			bzero(&ipp, sizeof (ipp));
974 			(void) ip_find_hdr_v6(data_mp, ip6h, &ipp, NULL);
975 			if (ipp.ipp_dstopts != NULL) {
976 				ipp.ipp_dstopts->ip6d_nxt = nexthdr;
977 			} else if (ipp.ipp_rthdr != NULL) {
978 				ipp.ipp_rthdr->ip6r_nxt = nexthdr;
979 			} else if (ipp.ipp_hopopts != NULL) {
980 				ipp.ipp_hopopts->ip6h_nxt = nexthdr;
981 			} else {
982 				/* Panic a DEBUG kernel. */
983 				ASSERT(ipp.ipp_hopopts != NULL);
984 				/* Otherwise, pretend it's IP + ESP. */
985 				cmn_err(CE_WARN, "ESP IPv6 headers wrong.\n");
986 				ip6h->ip6_nxt = nexthdr;
987 			}
988 		}
989 
990 		if (padlen >= ntohs(ip6h->ip6_plen) - 2 - sizeof (esph_t) -
991 		    ivlen) {
992 			ESP_BUMP_STAT(espstack, bad_decrypt);
993 			ipsec_rl_strlog(espstack->ipsecesp_netstack,
994 			    info.mi_idnum, 0, 0,
995 			    SL_ERROR | SL_WARN,
996 			    "Corrupt ESP packet (v6 padlen too big).\n");
997 			esp1dbg(espstack, ("padlen (%d) is greater than:\n",
998 			    padlen));
999 			esp1dbg(espstack,
1000 			    ("pkt len(%u) - ip hdr - esp hdr - ivlen(%d) = "
1001 			    "%u.\n", (unsigned)(ntohs(ip6h->ip6_plen)
1002 			    + sizeof (ip6_t)), ivlen,
1003 			    (unsigned)(ntohs(ip6h->ip6_plen) - 2 -
1004 			    sizeof (esph_t) - ivlen)));
1005 			*counter = DROPPER(ipss, ipds_esp_bad_padlen);
1006 			return (B_FALSE);
1007 		}
1008 
1009 
1010 		/*
1011 		 * Fix the rest of the header.  The explicit - 2 is for the
1012 		 * padding length and the next-header bytes.  IPv6 is nice,
1013 		 * because there's no hdr checksum!
1014 		 */
1015 		ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - padlen -
1016 		    2 - sizeof (esph_t) - ivlen);
1017 	}
1018 
1019 	if (espstack->ipsecesp_padding_check > 0 && padlen > 0) {
1020 		/*
1021 		 * Weak padding check: compare last-byte to length, they
1022 		 * should be equal.
1023 		 */
1024 		lastpad = *lastbyte--;
1025 
1026 		if (padlen != lastpad) {
1027 			ipsec_rl_strlog(espstack->ipsecesp_netstack,
1028 			    info.mi_idnum, 0, 0, SL_ERROR | SL_WARN,
1029 			    "Corrupt ESP packet (lastpad != padlen).\n");
1030 			esp1dbg(espstack,
1031 			    ("lastpad (%d) not equal to padlen (%d):\n",
1032 			    lastpad, padlen));
1033 			ESP_BUMP_STAT(espstack, bad_padding);
1034 			*counter = DROPPER(ipss, ipds_esp_bad_padding);
1035 			return (B_FALSE);
1036 		}
1037 
1038 		/*
1039 		 * Strong padding check: Check all pad bytes to see that
1040 		 * they're ascending.  Go backwards using a descending counter
1041 		 * to verify.  padlen == 1 is checked by previous block, so
1042 		 * only bother if we've more than 1 byte of padding.
1043 		 * Consequently, start the check one byte before the location
1044 		 * of "lastpad".
1045 		 */
1046 		if (espstack->ipsecesp_padding_check > 1) {
1047 			/*
1048 			 * This assert may have to become an if and a pullup
1049 			 * if we start accepting multi-dblk mblks. For now,
1050 			 * though, any packet here will have been pulled up in
1051 			 * esp_inbound.
1052 			 */
1053 			ASSERT(MBLKL(scratch) >= lastpad + 3);
1054 
1055 			/*
1056 			 * Use "--lastpad" because we already checked the very
1057 			 * last pad byte previously.
1058 			 */
1059 			while (--lastpad != 0) {
1060 				if (lastpad != *lastbyte) {
1061 					ipsec_rl_strlog(
1062 					    espstack->ipsecesp_netstack,
1063 					    info.mi_idnum, 0, 0,
1064 					    SL_ERROR | SL_WARN, "Corrupt ESP "
1065 					    "packet (bad padding).\n");
1066 					esp1dbg(espstack,
1067 					    ("padding not in correct"
1068 					    " format:\n"));
1069 					ESP_BUMP_STAT(espstack, bad_padding);
1070 					*counter = DROPPER(ipss,
1071 					    ipds_esp_bad_padding);
1072 					return (B_FALSE);
1073 				}
1074 				lastbyte--;
1075 			}
1076 		}
1077 	}
1078 
1079 	/* Trim off the padding. */
1080 	ASSERT(data_mp->b_cont == NULL);
1081 	data_mp->b_wptr -= (padlen + 2);
1082 
1083 	/*
1084 	 * Remove the ESP header.
1085 	 *
1086 	 * The above assertions about data_mp's size will make this work.
1087 	 *
1088 	 * XXX  Question:  If I send up and get back a contiguous mblk,
1089 	 * would it be quicker to bcopy over, or keep doing the dupb stuff?
1090 	 * I go with copying for now.
1091 	 */
1092 
1093 	if (IS_P2ALIGNED(data_mp->b_rptr, sizeof (uint32_t)) &&
1094 	    IS_P2ALIGNED(ivlen, sizeof (uint32_t))) {
1095 		uint8_t *start = data_mp->b_rptr;
1096 		uint32_t *src, *dst;
1097 
1098 		src = (uint32_t *)(start + divpoint);
1099 		dst = (uint32_t *)(start + divpoint + sizeof (esph_t) + ivlen);
1100 
1101 		ASSERT(IS_P2ALIGNED(dst, sizeof (uint32_t)) &&
1102 		    IS_P2ALIGNED(src, sizeof (uint32_t)));
1103 
1104 		do {
1105 			src--;
1106 			dst--;
1107 			*dst = *src;
1108 		} while (src != (uint32_t *)start);
1109 
1110 		data_mp->b_rptr = (uchar_t *)dst;
1111 	} else {
1112 		uint8_t *start = data_mp->b_rptr;
1113 		uint8_t *src, *dst;
1114 
1115 		src = start + divpoint;
1116 		dst = src + sizeof (esph_t) + ivlen;
1117 
1118 		do {
1119 			src--;
1120 			dst--;
1121 			*dst = *src;
1122 		} while (src != start);
1123 
1124 		data_mp->b_rptr = dst;
1125 	}
1126 
1127 	esp2dbg(espstack, ("data_mp after inbound ESP adjustment:\n"));
1128 	esp2dbg(espstack, (dump_msg(data_mp)));
1129 
1130 	return (B_TRUE);
1131 }
1132 
1133 /*
1134  * Updating use times can be tricky business if the ipsa_haspeer flag is
1135  * set.  This function is called once in an SA's lifetime.
1136  *
1137  * Caller has to REFRELE "assoc" which is passed in.  This function has
1138  * to REFRELE any peer SA that is obtained.
1139  */
1140 static void
1141 esp_set_usetime(ipsa_t *assoc, boolean_t inbound)
1142 {
1143 	ipsa_t *inassoc, *outassoc;
1144 	isaf_t *bucket;
1145 	sadb_t *sp;
1146 	int outhash;
1147 	boolean_t isv6;
1148 	netstack_t		*ns = assoc->ipsa_netstack;
1149 	ipsecesp_stack_t	*espstack = ns->netstack_ipsecesp;
1150 
1151 	/* No peer?  No problem! */
1152 	if (!assoc->ipsa_haspeer) {
1153 		sadb_set_usetime(assoc);
1154 		return;
1155 	}
1156 
1157 	/*
1158 	 * Otherwise, we want to grab both the original assoc and its peer.
1159 	 * There might be a race for this, but if it's a real race, the times
1160 	 * will be out-of-synch by at most a second, and since our time
1161 	 * granularity is a second, this won't be a problem.
1162 	 *
1163 	 * If we need tight synchronization on the peer SA, then we need to
1164 	 * reconsider.
1165 	 */
1166 
1167 	/* Use address length to select IPv6/IPv4 */
1168 	isv6 = (assoc->ipsa_addrfam == AF_INET6);
1169 	sp = isv6 ? &espstack->esp_sadb.s_v6 : &espstack->esp_sadb.s_v4;
1170 
1171 	if (inbound) {
1172 		inassoc = assoc;
1173 		if (isv6) {
1174 			outhash = OUTBOUND_HASH_V6(sp, *((in6_addr_t *)
1175 			    &inassoc->ipsa_dstaddr));
1176 		} else {
1177 			outhash = OUTBOUND_HASH_V4(sp, *((ipaddr_t *)
1178 			    &inassoc->ipsa_dstaddr));
1179 		}
1180 		bucket = &sp->sdb_of[outhash];
1181 		mutex_enter(&bucket->isaf_lock);
1182 		outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi,
1183 		    inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr,
1184 		    inassoc->ipsa_addrfam);
1185 		mutex_exit(&bucket->isaf_lock);
1186 		if (outassoc == NULL) {
1187 			/* Q: Do we wish to set haspeer == B_FALSE? */
1188 			esp0dbg(("esp_set_usetime: "
1189 			    "can't find peer for inbound.\n"));
1190 			sadb_set_usetime(inassoc);
1191 			return;
1192 		}
1193 	} else {
1194 		outassoc = assoc;
1195 		bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi);
1196 		mutex_enter(&bucket->isaf_lock);
1197 		inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi,
1198 		    outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr,
1199 		    outassoc->ipsa_addrfam);
1200 		mutex_exit(&bucket->isaf_lock);
1201 		if (inassoc == NULL) {
1202 			/* Q: Do we wish to set haspeer == B_FALSE? */
1203 			esp0dbg(("esp_set_usetime: "
1204 			    "can't find peer for outbound.\n"));
1205 			sadb_set_usetime(outassoc);
1206 			return;
1207 		}
1208 	}
1209 
1210 	/* Update usetime on both. */
1211 	sadb_set_usetime(inassoc);
1212 	sadb_set_usetime(outassoc);
1213 
1214 	/*
1215 	 * REFRELE any peer SA.
1216 	 *
1217 	 * Because of the multi-line macro nature of IPSA_REFRELE, keep
1218 	 * them in { }.
1219 	 */
1220 	if (inbound) {
1221 		IPSA_REFRELE(outassoc);
1222 	} else {
1223 		IPSA_REFRELE(inassoc);
1224 	}
1225 }
1226 
1227 /*
1228  * Handle ESP inbound data for IPv4 and IPv6.
1229  * On success returns B_TRUE, on failure returns B_FALSE and frees the
1230  * mblk chain ipsec_in_mp.
1231  */
1232 ipsec_status_t
1233 esp_inbound(mblk_t *ipsec_in_mp, void *arg)
1234 {
1235 	mblk_t *data_mp = ipsec_in_mp->b_cont;
1236 	ipsec_in_t *ii = (ipsec_in_t *)ipsec_in_mp->b_rptr;
1237 	esph_t *esph = (esph_t *)arg;
1238 	ipsa_t *ipsa = ii->ipsec_in_esp_sa;
1239 	netstack_t	*ns = ii->ipsec_in_ns;
1240 	ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
1241 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
1242 
1243 	/*
1244 	 * We may wish to check replay in-range-only here as an optimization.
1245 	 * Include the reality check of ipsa->ipsa_replay >
1246 	 * ipsa->ipsa_replay_wsize for times when it's the first N packets,
1247 	 * where N == ipsa->ipsa_replay_wsize.
1248 	 *
1249 	 * Another check that may come here later is the "collision" check.
1250 	 * If legitimate packets flow quickly enough, this won't be a problem,
1251 	 * but collisions may cause authentication algorithm crunching to
1252 	 * take place when it doesn't need to.
1253 	 */
1254 	if (!sadb_replay_peek(ipsa, esph->esph_replay)) {
1255 		ESP_BUMP_STAT(espstack, replay_early_failures);
1256 		IP_ESP_BUMP_STAT(ipss, in_discards);
1257 		/*
1258 		 * TODO: Extract inbound interface from the IPSEC_IN
1259 		 * message's ii->ipsec_in_rill_index.
1260 		 */
1261 		ip_drop_packet(ipsec_in_mp, B_TRUE, NULL, NULL,
1262 		    DROPPER(ipss, ipds_esp_early_replay),
1263 		    &espstack->esp_dropper);
1264 		return (IPSEC_STATUS_FAILED);
1265 	}
1266 
1267 	/*
1268 	 * Has this packet already been processed by a hardware
1269 	 * IPsec accelerator?
1270 	 */
1271 	if (ii->ipsec_in_accelerated) {
1272 		ipsec_status_t rv;
1273 		esp3dbg(espstack,
1274 		    ("esp_inbound: pkt processed by ill=%d isv6=%d\n",
1275 		    ii->ipsec_in_ill_index, !ii->ipsec_in_v4));
1276 		rv = esp_inbound_accelerated(ipsec_in_mp,
1277 		    data_mp, ii->ipsec_in_v4, ipsa);
1278 		return (rv);
1279 	}
1280 	ESP_BUMP_STAT(espstack, noaccel);
1281 
1282 	/*
1283 	 * Adjust the IP header's payload length to reflect the removal
1284 	 * of the ICV.
1285 	 */
1286 	if (!ii->ipsec_in_v4) {
1287 		ip6_t *ip6h = (ip6_t *)data_mp->b_rptr;
1288 		ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) -
1289 		    ipsa->ipsa_mac_len);
1290 	} else {
1291 		ipha_t *ipha = (ipha_t *)data_mp->b_rptr;
1292 		ipha->ipha_length = htons(ntohs(ipha->ipha_length) -
1293 		    ipsa->ipsa_mac_len);
1294 	}
1295 
1296 	/* submit the request to the crypto framework */
1297 	return (esp_submit_req_inbound(ipsec_in_mp, ipsa,
1298 	    (uint8_t *)esph - data_mp->b_rptr));
1299 }
1300 
1301 /*
1302  * Perform the really difficult work of inserting the proposed situation.
1303  * Called while holding the algorithm lock.
1304  */
1305 static void
1306 esp_insert_prop(sadb_prop_t *prop, ipsacq_t *acqrec, uint_t combs)
1307 {
1308 	sadb_comb_t *comb = (sadb_comb_t *)(prop + 1);
1309 	ipsec_out_t *io;
1310 	ipsec_action_t *ap;
1311 	ipsec_prot_t *prot;
1312 	netstack_t *ns;
1313 	ipsecesp_stack_t *espstack;
1314 	ipsec_stack_t *ipss;
1315 
1316 	io = (ipsec_out_t *)acqrec->ipsacq_mp->b_rptr;
1317 	ASSERT(io->ipsec_out_type == IPSEC_OUT);
1318 	ns = io->ipsec_out_ns;
1319 	espstack = ns->netstack_ipsecesp;
1320 	ipss = ns->netstack_ipsec;
1321 	ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock));
1322 
1323 	prop->sadb_prop_exttype = SADB_EXT_PROPOSAL;
1324 	prop->sadb_prop_len = SADB_8TO64(sizeof (sadb_prop_t));
1325 	*(uint32_t *)(&prop->sadb_prop_replay) = 0;	/* Quick zero-out! */
1326 
1327 	prop->sadb_prop_replay = espstack->ipsecesp_replay_size;
1328 
1329 	/*
1330 	 * Based upon algorithm properties, and what-not, prioritize
1331 	 * a proposal.  If the IPSEC_OUT message has an algorithm specified,
1332 	 * use it first and foremost.
1333 	 *
1334 	 * For each action in policy list
1335 	 *   Add combination.  If I've hit limit, return.
1336 	 */
1337 
1338 	for (ap = acqrec->ipsacq_act; ap != NULL;
1339 	    ap = ap->ipa_next) {
1340 		ipsec_alginfo_t *ealg = NULL;
1341 		ipsec_alginfo_t *aalg = NULL;
1342 
1343 		if (ap->ipa_act.ipa_type != IPSEC_POLICY_APPLY)
1344 			continue;
1345 
1346 		prot = &ap->ipa_act.ipa_apply;
1347 
1348 		if (!(prot->ipp_use_esp))
1349 			continue;
1350 
1351 		if (prot->ipp_esp_auth_alg != 0) {
1352 			aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH]
1353 			    [prot->ipp_esp_auth_alg];
1354 			if (aalg == NULL || !ALG_VALID(aalg))
1355 				continue;
1356 		}
1357 
1358 		ASSERT(prot->ipp_encr_alg > 0);
1359 		ealg = ipss->ipsec_alglists[IPSEC_ALG_ENCR]
1360 		    [prot->ipp_encr_alg];
1361 		if (ealg == NULL || !ALG_VALID(ealg))
1362 			continue;
1363 
1364 		comb->sadb_comb_flags = 0;
1365 		comb->sadb_comb_reserved = 0;
1366 		comb->sadb_comb_encrypt = ealg->alg_id;
1367 		comb->sadb_comb_encrypt_minbits =
1368 		    MAX(prot->ipp_espe_minbits, ealg->alg_ef_minbits);
1369 		comb->sadb_comb_encrypt_maxbits =
1370 		    MIN(prot->ipp_espe_maxbits, ealg->alg_ef_maxbits);
1371 
1372 		if (aalg == NULL) {
1373 			comb->sadb_comb_auth = 0;
1374 			comb->sadb_comb_auth_minbits = 0;
1375 			comb->sadb_comb_auth_maxbits = 0;
1376 		} else {
1377 			comb->sadb_comb_auth = aalg->alg_id;
1378 			comb->sadb_comb_auth_minbits =
1379 			    MAX(prot->ipp_espa_minbits, aalg->alg_ef_minbits);
1380 			comb->sadb_comb_auth_maxbits =
1381 			    MIN(prot->ipp_espa_maxbits, aalg->alg_ef_maxbits);
1382 		}
1383 
1384 		/*
1385 		 * The following may be based on algorithm
1386 		 * properties, but in the meantime, we just pick
1387 		 * some good, sensible numbers.  Key mgmt. can
1388 		 * (and perhaps should) be the place to finalize
1389 		 * such decisions.
1390 		 */
1391 
1392 		/*
1393 		 * No limits on allocations, since we really don't
1394 		 * support that concept currently.
1395 		 */
1396 		comb->sadb_comb_soft_allocations = 0;
1397 		comb->sadb_comb_hard_allocations = 0;
1398 
1399 		/*
1400 		 * These may want to come from policy rule..
1401 		 */
1402 		comb->sadb_comb_soft_bytes =
1403 		    espstack->ipsecesp_default_soft_bytes;
1404 		comb->sadb_comb_hard_bytes =
1405 		    espstack->ipsecesp_default_hard_bytes;
1406 		comb->sadb_comb_soft_addtime =
1407 		    espstack->ipsecesp_default_soft_addtime;
1408 		comb->sadb_comb_hard_addtime =
1409 		    espstack->ipsecesp_default_hard_addtime;
1410 		comb->sadb_comb_soft_usetime =
1411 		    espstack->ipsecesp_default_soft_usetime;
1412 		comb->sadb_comb_hard_usetime =
1413 		    espstack->ipsecesp_default_hard_usetime;
1414 
1415 		prop->sadb_prop_len += SADB_8TO64(sizeof (*comb));
1416 		if (--combs == 0)
1417 			break;	/* out of space.. */
1418 		comb++;
1419 	}
1420 }
1421 
1422 /*
1423  * Prepare and actually send the SADB_ACQUIRE message to PF_KEY.
1424  */
1425 static void
1426 esp_send_acquire(ipsacq_t *acqrec, mblk_t *extended, netstack_t *ns)
1427 {
1428 	uint_t combs;
1429 	sadb_msg_t *samsg;
1430 	sadb_prop_t *prop;
1431 	mblk_t *pfkeymp, *msgmp;
1432 	ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
1433 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
1434 
1435 	ESP_BUMP_STAT(espstack, acquire_requests);
1436 
1437 	if (espstack->esp_pfkey_q == NULL) {
1438 		mutex_exit(&acqrec->ipsacq_lock);
1439 		return;
1440 	}
1441 
1442 	/* Set up ACQUIRE. */
1443 	pfkeymp = sadb_setup_acquire(acqrec, SADB_SATYPE_ESP,
1444 	    ns->netstack_ipsec);
1445 	if (pfkeymp == NULL) {
1446 		esp0dbg(("sadb_setup_acquire failed.\n"));
1447 		mutex_exit(&acqrec->ipsacq_lock);
1448 		return;
1449 	}
1450 	ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock));
1451 	combs = ipss->ipsec_nalgs[IPSEC_ALG_AUTH] *
1452 	    ipss->ipsec_nalgs[IPSEC_ALG_ENCR];
1453 	msgmp = pfkeymp->b_cont;
1454 	samsg = (sadb_msg_t *)(msgmp->b_rptr);
1455 
1456 	/* Insert proposal here. */
1457 
1458 	prop = (sadb_prop_t *)(((uint64_t *)samsg) + samsg->sadb_msg_len);
1459 	esp_insert_prop(prop, acqrec, combs);
1460 	samsg->sadb_msg_len += prop->sadb_prop_len;
1461 	msgmp->b_wptr += SADB_64TO8(samsg->sadb_msg_len);
1462 
1463 	mutex_exit(&ipss->ipsec_alg_lock);
1464 
1465 	/*
1466 	 * Must mutex_exit() before sending PF_KEY message up, in
1467 	 * order to avoid recursive mutex_enter() if there are no registered
1468 	 * listeners.
1469 	 *
1470 	 * Once I've sent the message, I'm cool anyway.
1471 	 */
1472 	mutex_exit(&acqrec->ipsacq_lock);
1473 	if (extended != NULL) {
1474 		putnext(espstack->esp_pfkey_q, extended);
1475 	}
1476 	putnext(espstack->esp_pfkey_q, pfkeymp);
1477 }
1478 
1479 /* XXX refactor me */
1480 /*
1481  * Handle the SADB_GETSPI message.  Create a larval SA.
1482  */
1483 static void
1484 esp_getspi(mblk_t *mp, keysock_in_t *ksi, ipsecesp_stack_t *espstack)
1485 {
1486 	ipsa_t *newbie, *target;
1487 	isaf_t *outbound, *inbound;
1488 	int rc, diagnostic;
1489 	sadb_sa_t *assoc;
1490 	keysock_out_t *kso;
1491 	uint32_t newspi;
1492 
1493 	/*
1494 	 * Randomly generate a proposed SPI value
1495 	 */
1496 	if (cl_inet_getspi != NULL) {
1497 		cl_inet_getspi(espstack->ipsecesp_netstack->netstack_stackid,
1498 		    IPPROTO_ESP, (uint8_t *)&newspi, sizeof (uint32_t), NULL);
1499 	} else {
1500 		(void) random_get_pseudo_bytes((uint8_t *)&newspi,
1501 		    sizeof (uint32_t));
1502 	}
1503 	newbie = sadb_getspi(ksi, newspi, &diagnostic,
1504 	    espstack->ipsecesp_netstack, IPPROTO_ESP);
1505 
1506 	if (newbie == NULL) {
1507 		sadb_pfkey_error(espstack->esp_pfkey_q, mp, ENOMEM, diagnostic,
1508 		    ksi->ks_in_serial);
1509 		return;
1510 	} else if (newbie == (ipsa_t *)-1) {
1511 		sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL, diagnostic,
1512 		    ksi->ks_in_serial);
1513 		return;
1514 	}
1515 
1516 	/*
1517 	 * XXX - We may randomly collide.  We really should recover from this.
1518 	 *	 Unfortunately, that could require spending way-too-much-time
1519 	 *	 in here.  For now, let the user retry.
1520 	 */
1521 
1522 	if (newbie->ipsa_addrfam == AF_INET6) {
1523 		outbound = OUTBOUND_BUCKET_V6(&espstack->esp_sadb.s_v6,
1524 		    *(uint32_t *)(newbie->ipsa_dstaddr));
1525 		inbound = INBOUND_BUCKET(&espstack->esp_sadb.s_v6,
1526 		    newbie->ipsa_spi);
1527 	} else {
1528 		ASSERT(newbie->ipsa_addrfam == AF_INET);
1529 		outbound = OUTBOUND_BUCKET_V4(&espstack->esp_sadb.s_v4,
1530 		    *(uint32_t *)(newbie->ipsa_dstaddr));
1531 		inbound = INBOUND_BUCKET(&espstack->esp_sadb.s_v4,
1532 		    newbie->ipsa_spi);
1533 	}
1534 
1535 	mutex_enter(&outbound->isaf_lock);
1536 	mutex_enter(&inbound->isaf_lock);
1537 
1538 	/*
1539 	 * Check for collisions (i.e. did sadb_getspi() return with something
1540 	 * that already exists?).
1541 	 *
1542 	 * Try outbound first.  Even though SADB_GETSPI is traditionally
1543 	 * for inbound SAs, you never know what a user might do.
1544 	 */
1545 	target = ipsec_getassocbyspi(outbound, newbie->ipsa_spi,
1546 	    newbie->ipsa_srcaddr, newbie->ipsa_dstaddr, newbie->ipsa_addrfam);
1547 	if (target == NULL) {
1548 		target = ipsec_getassocbyspi(inbound, newbie->ipsa_spi,
1549 		    newbie->ipsa_srcaddr, newbie->ipsa_dstaddr,
1550 		    newbie->ipsa_addrfam);
1551 	}
1552 
1553 	/*
1554 	 * I don't have collisions elsewhere!
1555 	 * (Nor will I because I'm still holding inbound/outbound locks.)
1556 	 */
1557 
1558 	if (target != NULL) {
1559 		rc = EEXIST;
1560 		IPSA_REFRELE(target);
1561 	} else {
1562 		/*
1563 		 * sadb_insertassoc() also checks for collisions, so
1564 		 * if there's a colliding entry, rc will be set
1565 		 * to EEXIST.
1566 		 */
1567 		rc = sadb_insertassoc(newbie, inbound);
1568 		newbie->ipsa_hardexpiretime = gethrestime_sec();
1569 		newbie->ipsa_hardexpiretime +=
1570 		    espstack->ipsecesp_larval_timeout;
1571 	}
1572 
1573 	/*
1574 	 * Can exit outbound mutex.  Hold inbound until we're done
1575 	 * with newbie.
1576 	 */
1577 	mutex_exit(&outbound->isaf_lock);
1578 
1579 	if (rc != 0) {
1580 		mutex_exit(&inbound->isaf_lock);
1581 		IPSA_REFRELE(newbie);
1582 		sadb_pfkey_error(espstack->esp_pfkey_q, mp, rc,
1583 		    SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
1584 		return;
1585 	}
1586 
1587 
1588 	/* Can write here because I'm still holding the bucket lock. */
1589 	newbie->ipsa_type = SADB_SATYPE_ESP;
1590 
1591 	/*
1592 	 * Construct successful return message. We have one thing going
1593 	 * for us in PF_KEY v2.  That's the fact that
1594 	 *	sizeof (sadb_spirange_t) == sizeof (sadb_sa_t)
1595 	 */
1596 	assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SPIRANGE];
1597 	assoc->sadb_sa_exttype = SADB_EXT_SA;
1598 	assoc->sadb_sa_spi = newbie->ipsa_spi;
1599 	*((uint64_t *)(&assoc->sadb_sa_replay)) = 0;
1600 	mutex_exit(&inbound->isaf_lock);
1601 
1602 	/* Convert KEYSOCK_IN to KEYSOCK_OUT. */
1603 	kso = (keysock_out_t *)ksi;
1604 	kso->ks_out_len = sizeof (*kso);
1605 	kso->ks_out_serial = ksi->ks_in_serial;
1606 	kso->ks_out_type = KEYSOCK_OUT;
1607 
1608 	/*
1609 	 * Can safely putnext() to esp_pfkey_q, because this is a turnaround
1610 	 * from the esp_pfkey_q.
1611 	 */
1612 	putnext(espstack->esp_pfkey_q, mp);
1613 }
1614 
1615 /*
1616  * Insert the ESP header into a packet.  Duplicate an mblk, and insert a newly
1617  * allocated mblk with the ESP header in between the two.
1618  */
1619 static boolean_t
1620 esp_insert_esp(mblk_t *mp, mblk_t *esp_mp, uint_t divpoint,
1621     ipsecesp_stack_t *espstack)
1622 {
1623 	mblk_t *split_mp = mp;
1624 	uint_t wheretodiv = divpoint;
1625 
1626 	while ((split_mp->b_wptr - split_mp->b_rptr) < wheretodiv) {
1627 		wheretodiv -= (split_mp->b_wptr - split_mp->b_rptr);
1628 		split_mp = split_mp->b_cont;
1629 		ASSERT(split_mp != NULL);
1630 	}
1631 
1632 	if (split_mp->b_wptr - split_mp->b_rptr != wheretodiv) {
1633 		mblk_t *scratch;
1634 
1635 		/* "scratch" is the 2nd half, split_mp is the first. */
1636 		scratch = dupb(split_mp);
1637 		if (scratch == NULL) {
1638 			esp1dbg(espstack,
1639 			    ("esp_insert_esp: can't allocate scratch.\n"));
1640 			return (B_FALSE);
1641 		}
1642 		/* NOTE:  dupb() doesn't set b_cont appropriately. */
1643 		scratch->b_cont = split_mp->b_cont;
1644 		scratch->b_rptr += wheretodiv;
1645 		split_mp->b_wptr = split_mp->b_rptr + wheretodiv;
1646 		split_mp->b_cont = scratch;
1647 	}
1648 	/*
1649 	 * At this point, split_mp is exactly "wheretodiv" bytes long, and
1650 	 * holds the end of the pre-ESP part of the datagram.
1651 	 */
1652 	esp_mp->b_cont = split_mp->b_cont;
1653 	split_mp->b_cont = esp_mp;
1654 
1655 	return (B_TRUE);
1656 }
1657 
1658 /*
1659  * Section 7 of RFC 3947 says:
1660  *
1661  * 7.  Recovering from the Expiring NAT Mappings
1662  *
1663  *    There are cases where NAT box decides to remove mappings that are still
1664  *    alive (for example, when the keepalive interval is too long, or when the
1665  *    NAT box is rebooted).  To recover from this, ends that are NOT behind
1666  *    NAT SHOULD use the last valid UDP encapsulated IKE or IPsec packet from
1667  *    the other end to determine which IP and port addresses should be used.
1668  *    The host behind dynamic NAT MUST NOT do this, as otherwise it opens a
1669  *    DoS attack possibility because the IP address or port of the other host
1670  *    will not change (it is not behind NAT).
1671  *
1672  *    Keepalives cannot be used for these purposes, as they are not
1673  *    authenticated, but any IKE authenticated IKE packet or ESP packet can be
1674  *    used to detect whether the IP address or the port has changed.
1675  *
1676  * The following function will check an SA and its explicitly-set pair to see
1677  * if the NAT-T remote port matches the received packet (which must have
1678  * passed ESP authentication, see esp_in_done() for the caller context).  If
1679  * there is a mismatch, the SAs are updated.  It is not important if we race
1680  * with a transmitting thread, as if there is a transmitting thread, it will
1681  * merely emit a packet that will most-likely be dropped.
1682  *
1683  * "ports" are ordered src,dst, and assoc is an inbound SA, where src should
1684  * match ipsa_remote_nat_port and dst should match ipsa_local_nat_port.
1685  */
1686 #ifdef _LITTLE_ENDIAN
1687 #define	FIRST_16(x) ((x) & 0xFFFF)
1688 #define	NEXT_16(x) (((x) >> 16) & 0xFFFF)
1689 #else
1690 #define	FIRST_16(x) (((x) >> 16) & 0xFFFF)
1691 #define	NEXT_16(x) ((x) & 0xFFFF)
1692 #endif
1693 static void
1694 esp_port_freshness(uint32_t ports, ipsa_t *assoc)
1695 {
1696 	uint16_t remote = FIRST_16(ports);
1697 	uint16_t local = NEXT_16(ports);
1698 	ipsa_t *outbound_peer;
1699 	isaf_t *bucket;
1700 	ipsecesp_stack_t *espstack = assoc->ipsa_netstack->netstack_ipsecesp;
1701 
1702 	/* We found a conn_t, therefore local != 0. */
1703 	ASSERT(local != 0);
1704 	/* Assume an IPv4 SA. */
1705 	ASSERT(assoc->ipsa_addrfam == AF_INET);
1706 
1707 	/*
1708 	 * On-the-wire rport == 0 means something's very wrong.
1709 	 * An unpaired SA is also useless to us.
1710 	 * If we are behind the NAT, don't bother.
1711 	 * A zero local NAT port defaults to 4500, so check that too.
1712 	 * And, of course, if the ports already match, we don't need to
1713 	 * bother.
1714 	 */
1715 	if (remote == 0 || assoc->ipsa_otherspi == 0 ||
1716 	    (assoc->ipsa_flags & IPSA_F_BEHIND_NAT) ||
1717 	    (assoc->ipsa_remote_nat_port == 0 &&
1718 	    remote == htons(IPPORT_IKE_NATT)) ||
1719 	    remote == assoc->ipsa_remote_nat_port)
1720 		return;
1721 
1722 	/* Try and snag the peer.   NOTE:  Assume IPv4 for now. */
1723 	bucket = OUTBOUND_BUCKET_V4(&(espstack->esp_sadb.s_v4),
1724 	    assoc->ipsa_srcaddr[0]);
1725 	mutex_enter(&bucket->isaf_lock);
1726 	outbound_peer = ipsec_getassocbyspi(bucket, assoc->ipsa_otherspi,
1727 	    assoc->ipsa_dstaddr, assoc->ipsa_srcaddr, AF_INET);
1728 	mutex_exit(&bucket->isaf_lock);
1729 
1730 	/* We probably lost a race to a deleting or expiring thread. */
1731 	if (outbound_peer == NULL)
1732 		return;
1733 
1734 	/*
1735 	 * Hold the mutexes for both SAs so we don't race another inbound
1736 	 * thread.  A lock-entry order shouldn't matter, since all other
1737 	 * per-ipsa locks are individually held-then-released.
1738 	 *
1739 	 * Luckily, this has nothing to do with the remote-NAT address,
1740 	 * so we don't have to re-scribble the cached-checksum differential.
1741 	 */
1742 	mutex_enter(&outbound_peer->ipsa_lock);
1743 	mutex_enter(&assoc->ipsa_lock);
1744 	outbound_peer->ipsa_remote_nat_port = assoc->ipsa_remote_nat_port =
1745 	    remote;
1746 	mutex_exit(&assoc->ipsa_lock);
1747 	mutex_exit(&outbound_peer->ipsa_lock);
1748 	IPSA_REFRELE(outbound_peer);
1749 	ESP_BUMP_STAT(espstack, sa_port_renumbers);
1750 }
1751 /*
1752  * Finish processing of an inbound ESP packet after processing by the
1753  * crypto framework.
1754  * - Remove the ESP header.
1755  * - Send packet back to IP.
1756  * If authentication was performed on the packet, this function is called
1757  * only if the authentication succeeded.
1758  * On success returns B_TRUE, on failure returns B_FALSE and frees the
1759  * mblk chain ipsec_in_mp.
1760  */
1761 static ipsec_status_t
1762 esp_in_done(mblk_t *ipsec_in_mp)
1763 {
1764 	ipsec_in_t *ii = (ipsec_in_t *)ipsec_in_mp->b_rptr;
1765 	mblk_t *data_mp;
1766 	ipsa_t *assoc;
1767 	uint_t espstart;
1768 	uint32_t ivlen = 0;
1769 	uint_t processed_len;
1770 	esph_t *esph;
1771 	kstat_named_t *counter;
1772 	boolean_t is_natt;
1773 	netstack_t	*ns = ii->ipsec_in_ns;
1774 	ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
1775 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
1776 
1777 	assoc = ii->ipsec_in_esp_sa;
1778 	ASSERT(assoc != NULL);
1779 
1780 	is_natt = ((assoc->ipsa_flags & IPSA_F_NATT) != 0);
1781 
1782 	/* get the pointer to the ESP header */
1783 	if (assoc->ipsa_encr_alg == SADB_EALG_NULL) {
1784 		/* authentication-only ESP */
1785 		espstart = ii->ipsec_in_crypto_data.cd_offset;
1786 		processed_len = ii->ipsec_in_crypto_data.cd_length;
1787 	} else {
1788 		/* encryption present */
1789 		ivlen = assoc->ipsa_iv_len;
1790 		if (assoc->ipsa_auth_alg == SADB_AALG_NONE) {
1791 			/* encryption-only ESP */
1792 			espstart = ii->ipsec_in_crypto_data.cd_offset -
1793 			    sizeof (esph_t) - assoc->ipsa_iv_len;
1794 			processed_len = ii->ipsec_in_crypto_data.cd_length +
1795 			    ivlen;
1796 		} else {
1797 			/* encryption with authentication */
1798 			espstart = ii->ipsec_in_crypto_dual_data.dd_offset1;
1799 			processed_len = ii->ipsec_in_crypto_dual_data.dd_len2 +
1800 			    ivlen;
1801 		}
1802 	}
1803 
1804 	data_mp = ipsec_in_mp->b_cont;
1805 	esph = (esph_t *)(data_mp->b_rptr + espstart);
1806 
1807 	if (assoc->ipsa_auth_alg != IPSA_AALG_NONE ||
1808 	    (assoc->ipsa_flags & IPSA_F_COMBINED)) {
1809 		/*
1810 		 * Authentication passed if we reach this point.
1811 		 * Packets with authentication will have the ICV
1812 		 * after the crypto data. Adjust b_wptr before
1813 		 * making padlen checks.
1814 		 */
1815 		ESP_BUMP_STAT(espstack, good_auth);
1816 		data_mp->b_wptr -= assoc->ipsa_mac_len;
1817 
1818 		/*
1819 		 * Check replay window here!
1820 		 * For right now, assume keysock will set the replay window
1821 		 * size to zero for SAs that have an unspecified sender.
1822 		 * This may change...
1823 		 */
1824 
1825 		if (!sadb_replay_check(assoc, esph->esph_replay)) {
1826 			/*
1827 			 * Log the event. As of now we print out an event.
1828 			 * Do not print the replay failure number, or else
1829 			 * syslog cannot collate the error messages.  Printing
1830 			 * the replay number that failed opens a denial-of-
1831 			 * service attack.
1832 			 */
1833 			ipsec_assocfailure(info.mi_idnum, 0, 0,
1834 			    SL_ERROR | SL_WARN,
1835 			    "Replay failed for ESP spi 0x%x, dst %s.\n",
1836 			    assoc->ipsa_spi, assoc->ipsa_dstaddr,
1837 			    assoc->ipsa_addrfam, espstack->ipsecesp_netstack);
1838 			ESP_BUMP_STAT(espstack, replay_failures);
1839 			counter = DROPPER(ipss, ipds_esp_replay);
1840 			goto drop_and_bail;
1841 		}
1842 
1843 		if (is_natt)
1844 			esp_port_freshness(ii->ipsec_in_esp_udp_ports, assoc);
1845 	}
1846 
1847 	esp_set_usetime(assoc, B_TRUE);
1848 
1849 	if (!esp_age_bytes(assoc, processed_len, B_TRUE)) {
1850 		/* The ipsa has hit hard expiration, LOG and AUDIT. */
1851 		ipsec_assocfailure(info.mi_idnum, 0, 0,
1852 		    SL_ERROR | SL_WARN,
1853 		    "ESP association 0x%x, dst %s had bytes expire.\n",
1854 		    assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam,
1855 		    espstack->ipsecesp_netstack);
1856 		ESP_BUMP_STAT(espstack, bytes_expired);
1857 		counter = DROPPER(ipss, ipds_esp_bytes_expire);
1858 		goto drop_and_bail;
1859 	}
1860 
1861 	/*
1862 	 * Remove ESP header and padding from packet.  I hope the compiler
1863 	 * spews "branch, predict taken" code for this.
1864 	 */
1865 
1866 	if (esp_strip_header(data_mp, ii->ipsec_in_v4, ivlen, &counter,
1867 	    espstack)) {
1868 
1869 		if (is_system_labeled()) {
1870 			cred_t *cr = assoc->ipsa_cred;
1871 
1872 			if (cr != NULL) {
1873 				mblk_setcred(data_mp, cr, NOPID);
1874 			}
1875 
1876 		}
1877 		if (is_natt)
1878 			return (esp_fix_natt_checksums(data_mp, assoc));
1879 
1880 		ASSERT(!is_system_labeled() || (DB_CRED(data_mp) != NULL));
1881 
1882 		if (assoc->ipsa_state == IPSA_STATE_IDLE) {
1883 			/*
1884 			 * Cluster buffering case.  Tell caller that we're
1885 			 * handling the packet.
1886 			 */
1887 			sadb_buf_pkt(assoc, ipsec_in_mp, ns);
1888 			return (IPSEC_STATUS_PENDING);
1889 		}
1890 
1891 		return (IPSEC_STATUS_SUCCESS);
1892 	}
1893 
1894 	esp1dbg(espstack, ("esp_in_done: esp_strip_header() failed\n"));
1895 drop_and_bail:
1896 	IP_ESP_BUMP_STAT(ipss, in_discards);
1897 	/*
1898 	 * TODO: Extract inbound interface from the IPSEC_IN message's
1899 	 * ii->ipsec_in_rill_index.
1900 	 */
1901 	ip_drop_packet(ipsec_in_mp, B_TRUE, NULL, NULL, counter,
1902 	    &espstack->esp_dropper);
1903 	return (IPSEC_STATUS_FAILED);
1904 }
1905 
1906 /*
1907  * Called upon failing the inbound ICV check. The message passed as
1908  * argument is freed.
1909  */
1910 static void
1911 esp_log_bad_auth(mblk_t *ipsec_in)
1912 {
1913 	ipsec_in_t *ii = (ipsec_in_t *)ipsec_in->b_rptr;
1914 	ipsa_t *assoc = ii->ipsec_in_esp_sa;
1915 	netstack_t	*ns = ii->ipsec_in_ns;
1916 	ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
1917 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
1918 
1919 	/*
1920 	 * Log the event. Don't print to the console, block
1921 	 * potential denial-of-service attack.
1922 	 */
1923 	ESP_BUMP_STAT(espstack, bad_auth);
1924 
1925 	ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN,
1926 	    "ESP Authentication failed for spi 0x%x, dst %s.\n",
1927 	    assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam,
1928 	    espstack->ipsecesp_netstack);
1929 
1930 	IP_ESP_BUMP_STAT(ipss, in_discards);
1931 	/*
1932 	 * TODO: Extract inbound interface from the IPSEC_IN
1933 	 * message's ii->ipsec_in_rill_index.
1934 	 */
1935 	ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL,
1936 	    DROPPER(ipss, ipds_esp_bad_auth),
1937 	    &espstack->esp_dropper);
1938 }
1939 
1940 
1941 /*
1942  * Invoked for outbound packets after ESP processing. If the packet
1943  * also requires AH, performs the AH SA selection and AH processing.
1944  * Returns B_TRUE if the AH processing was not needed or if it was
1945  * performed successfully. Returns B_FALSE and consumes the passed mblk
1946  * if AH processing was required but could not be performed.
1947  */
1948 static boolean_t
1949 esp_do_outbound_ah(mblk_t *ipsec_mp)
1950 {
1951 	ipsec_out_t *io = (ipsec_out_t *)ipsec_mp->b_rptr;
1952 	ipsec_status_t ipsec_rc;
1953 	ipsec_action_t *ap;
1954 
1955 	ap = io->ipsec_out_act;
1956 	if (ap == NULL) {
1957 		ipsec_policy_t *pp = io->ipsec_out_policy;
1958 		ap = pp->ipsp_act;
1959 	}
1960 
1961 	if (!ap->ipa_want_ah)
1962 		return (B_TRUE);
1963 
1964 	ASSERT(io->ipsec_out_ah_done == B_FALSE);
1965 
1966 	if (io->ipsec_out_ah_sa == NULL) {
1967 		if (!ipsec_outbound_sa(ipsec_mp, IPPROTO_AH)) {
1968 			sadb_acquire(ipsec_mp, io, B_TRUE, B_FALSE);
1969 			return (B_FALSE);
1970 		}
1971 	}
1972 	ASSERT(io->ipsec_out_ah_sa != NULL);
1973 
1974 	io->ipsec_out_ah_done = B_TRUE;
1975 	ipsec_rc = io->ipsec_out_ah_sa->ipsa_output_func(ipsec_mp);
1976 	return (ipsec_rc == IPSEC_STATUS_SUCCESS);
1977 }
1978 
1979 
1980 /*
1981  * Kernel crypto framework callback invoked after completion of async
1982  * crypto requests.
1983  */
1984 static void
1985 esp_kcf_callback(void *arg, int status)
1986 {
1987 	mblk_t *ipsec_mp = (mblk_t *)arg;
1988 	ipsec_in_t *ii = (ipsec_in_t *)ipsec_mp->b_rptr;
1989 	ipsec_out_t *io = (ipsec_out_t *)ipsec_mp->b_rptr;
1990 	boolean_t is_inbound = (ii->ipsec_in_type == IPSEC_IN);
1991 	netstackid_t	stackid;
1992 	netstack_t	*ns, *ns_arg;
1993 	ipsecesp_stack_t *espstack;
1994 	ipsec_stack_t	*ipss;
1995 
1996 	ASSERT(ipsec_mp->b_cont != NULL);
1997 
1998 	if (is_inbound) {
1999 		stackid = ii->ipsec_in_stackid;
2000 		ns_arg = ii->ipsec_in_ns;
2001 	} else {
2002 		stackid = io->ipsec_out_stackid;
2003 		ns_arg = io->ipsec_out_ns;
2004 	}
2005 
2006 	/*
2007 	 * Verify that the netstack is still around; could have vanished
2008 	 * while kEf was doing its work.
2009 	 */
2010 	ns = netstack_find_by_stackid(stackid);
2011 	if (ns == NULL || ns != ns_arg) {
2012 		/* Disappeared on us */
2013 		if (ns != NULL)
2014 			netstack_rele(ns);
2015 		freemsg(ipsec_mp);
2016 		return;
2017 	}
2018 
2019 	espstack = ns->netstack_ipsecesp;
2020 	ipss = ns->netstack_ipsec;
2021 
2022 	if (status == CRYPTO_SUCCESS) {
2023 		if (is_inbound) {
2024 			if (esp_in_done(ipsec_mp) != IPSEC_STATUS_SUCCESS) {
2025 				netstack_rele(ns);
2026 				return;
2027 			}
2028 			/* finish IPsec processing */
2029 			ip_fanout_proto_again(ipsec_mp, NULL, NULL, NULL);
2030 		} else {
2031 			/*
2032 			 * If a ICV was computed, it was stored by the
2033 			 * crypto framework at the end of the packet.
2034 			 */
2035 			ipha_t *ipha = (ipha_t *)ipsec_mp->b_cont->b_rptr;
2036 
2037 			esp_set_usetime(io->ipsec_out_esp_sa, B_FALSE);
2038 			/* NAT-T packet. */
2039 			if (ipha->ipha_protocol == IPPROTO_UDP)
2040 				esp_prepare_udp(ns, ipsec_mp->b_cont, ipha);
2041 
2042 			/* do AH processing if needed */
2043 			if (!esp_do_outbound_ah(ipsec_mp)) {
2044 				netstack_rele(ns);
2045 				return;
2046 			}
2047 			/* finish IPsec processing */
2048 			if (IPH_HDR_VERSION(ipha) == IP_VERSION) {
2049 				ip_wput_ipsec_out(NULL, ipsec_mp, ipha, NULL,
2050 				    NULL);
2051 			} else {
2052 				ip6_t *ip6h = (ip6_t *)ipha;
2053 				ip_wput_ipsec_out_v6(NULL, ipsec_mp, ip6h,
2054 				    NULL, NULL);
2055 			}
2056 		}
2057 
2058 	} else if (status == CRYPTO_INVALID_MAC) {
2059 		esp_log_bad_auth(ipsec_mp);
2060 
2061 	} else {
2062 		esp1dbg(espstack,
2063 		    ("esp_kcf_callback: crypto failed with 0x%x\n",
2064 		    status));
2065 		ESP_BUMP_STAT(espstack, crypto_failures);
2066 		if (is_inbound)
2067 			IP_ESP_BUMP_STAT(ipss, in_discards);
2068 		else
2069 			ESP_BUMP_STAT(espstack, out_discards);
2070 		ip_drop_packet(ipsec_mp, is_inbound, NULL, NULL,
2071 		    DROPPER(ipss, ipds_esp_crypto_failed),
2072 		    &espstack->esp_dropper);
2073 	}
2074 	netstack_rele(ns);
2075 }
2076 
2077 /*
2078  * Invoked on crypto framework failure during inbound and outbound processing.
2079  */
2080 static void
2081 esp_crypto_failed(mblk_t *mp, boolean_t is_inbound, int kef_rc,
2082     ipsecesp_stack_t *espstack)
2083 {
2084 	ipsec_stack_t	*ipss = espstack->ipsecesp_netstack->netstack_ipsec;
2085 
2086 	esp1dbg(espstack, ("crypto failed for %s ESP with 0x%x\n",
2087 	    is_inbound ? "inbound" : "outbound", kef_rc));
2088 	ip_drop_packet(mp, is_inbound, NULL, NULL,
2089 	    DROPPER(ipss, ipds_esp_crypto_failed),
2090 	    &espstack->esp_dropper);
2091 	ESP_BUMP_STAT(espstack, crypto_failures);
2092 	if (is_inbound)
2093 		IP_ESP_BUMP_STAT(ipss, in_discards);
2094 	else
2095 		ESP_BUMP_STAT(espstack, out_discards);
2096 }
2097 
2098 #define	ESP_INIT_CALLREQ(_cr) {						\
2099 	(_cr)->cr_flag = CRYPTO_SKIP_REQID|CRYPTO_RESTRICTED;		\
2100 	(_cr)->cr_callback_arg = ipsec_mp;				\
2101 	(_cr)->cr_callback_func = esp_kcf_callback;			\
2102 }
2103 
2104 #define	ESP_INIT_CRYPTO_MAC(mac, icvlen, icvbuf) {			\
2105 	(mac)->cd_format = CRYPTO_DATA_RAW;				\
2106 	(mac)->cd_offset = 0;						\
2107 	(mac)->cd_length = icvlen;					\
2108 	(mac)->cd_raw.iov_base = (char *)icvbuf;			\
2109 	(mac)->cd_raw.iov_len = icvlen;					\
2110 }
2111 
2112 #define	ESP_INIT_CRYPTO_DATA(data, mp, off, len) {			\
2113 	if (MBLKL(mp) >= (len) + (off)) {				\
2114 		(data)->cd_format = CRYPTO_DATA_RAW;			\
2115 		(data)->cd_raw.iov_base = (char *)(mp)->b_rptr;		\
2116 		(data)->cd_raw.iov_len = MBLKL(mp);			\
2117 		(data)->cd_offset = off;				\
2118 	} else {							\
2119 		(data)->cd_format = CRYPTO_DATA_MBLK;			\
2120 		(data)->cd_mp = mp;			       		\
2121 		(data)->cd_offset = off;				\
2122 	}								\
2123 	(data)->cd_length = len;					\
2124 }
2125 
2126 #define	ESP_INIT_CRYPTO_DUAL_DATA(data, mp, off1, len1, off2, len2) {	\
2127 	(data)->dd_format = CRYPTO_DATA_MBLK;				\
2128 	(data)->dd_mp = mp;						\
2129 	(data)->dd_len1 = len1;						\
2130 	(data)->dd_offset1 = off1;					\
2131 	(data)->dd_len2 = len2;						\
2132 	(data)->dd_offset2 = off2;					\
2133 }
2134 
2135 static ipsec_status_t
2136 esp_submit_req_inbound(mblk_t *ipsec_mp, ipsa_t *assoc, uint_t esph_offset)
2137 {
2138 	ipsec_in_t *ii = (ipsec_in_t *)ipsec_mp->b_rptr;
2139 	boolean_t do_auth;
2140 	uint_t auth_offset, msg_len, auth_len;
2141 	crypto_call_req_t call_req;
2142 	mblk_t *esp_mp;
2143 	esph_t *esph_ptr;
2144 	int kef_rc = CRYPTO_FAILED;
2145 	uint_t icv_len = assoc->ipsa_mac_len;
2146 	crypto_ctx_template_t auth_ctx_tmpl;
2147 	boolean_t do_encr;
2148 	uint_t encr_offset, encr_len;
2149 	uint_t iv_len = assoc->ipsa_iv_len;
2150 	crypto_ctx_template_t encr_ctx_tmpl;
2151 	netstack_t	*ns = ii->ipsec_in_ns;
2152 	ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
2153 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
2154 	uchar_t *iv_ptr;
2155 
2156 	ASSERT(ii->ipsec_in_type == IPSEC_IN);
2157 
2158 	/*
2159 	 * In case kEF queues and calls back, keep netstackid_t for
2160 	 * verification that the IP instance is still around in
2161 	 * esp_kcf_callback().
2162 	 */
2163 	ASSERT(ii->ipsec_in_stackid == ns->netstack_stackid);
2164 
2165 	do_auth = assoc->ipsa_auth_alg != SADB_AALG_NONE;
2166 	do_encr = assoc->ipsa_encr_alg != SADB_EALG_NULL;
2167 
2168 	/*
2169 	 * An inbound packet is of the form:
2170 	 * IPSEC_IN -> [IP,options,ESP,IV,data,ICV,pad]
2171 	 */
2172 	esp_mp = ipsec_mp->b_cont;
2173 	esph_ptr = (esph_t *)(esp_mp->b_rptr + esph_offset);
2174 	iv_ptr = (uchar_t *)(esph_ptr + 1);
2175 	/* Packet length starting at IP header ending after ESP ICV. */
2176 	msg_len = MBLKL(esp_mp);
2177 
2178 	encr_offset = esph_offset + sizeof (esph_t) + iv_len;
2179 	encr_len = msg_len - encr_offset;
2180 
2181 	ESP_INIT_CALLREQ(&call_req);
2182 
2183 	/*
2184 	 * Counter mode algs need a nonce. This is setup in sadb_common_add().
2185 	 * If for some reason we are using a SA which does not have a nonce
2186 	 * then we must fail here.
2187 	 */
2188 	if ((assoc->ipsa_flags & IPSA_F_COUNTERMODE) &&
2189 	    (assoc->ipsa_nonce == NULL)) {
2190 		ip_drop_packet(ipsec_mp, B_FALSE, NULL, NULL,
2191 		    DROPPER(ipss, ipds_esp_nomem), &espstack->esp_dropper);
2192 		return (IPSEC_STATUS_FAILED);
2193 	}
2194 
2195 	if (do_auth) {
2196 		/* force asynchronous processing? */
2197 		if (ipss->ipsec_algs_exec_mode[IPSEC_ALG_AUTH] ==
2198 		    IPSEC_ALGS_EXEC_ASYNC)
2199 			call_req.cr_flag |= CRYPTO_ALWAYS_QUEUE;
2200 
2201 		/* authentication context template */
2202 		IPSEC_CTX_TMPL(assoc, ipsa_authtmpl, IPSEC_ALG_AUTH,
2203 		    auth_ctx_tmpl);
2204 
2205 		/* ICV to be verified */
2206 		ESP_INIT_CRYPTO_MAC(&ii->ipsec_in_crypto_mac,
2207 		    icv_len, esp_mp->b_wptr - icv_len);
2208 
2209 		/* authentication starts at the ESP header */
2210 		auth_offset = esph_offset;
2211 		auth_len = msg_len - auth_offset - icv_len;
2212 		if (!do_encr) {
2213 			/* authentication only */
2214 			/* initialize input data argument */
2215 			ESP_INIT_CRYPTO_DATA(&ii->ipsec_in_crypto_data,
2216 			    esp_mp, auth_offset, auth_len);
2217 
2218 			/* call the crypto framework */
2219 			kef_rc = crypto_mac_verify(&assoc->ipsa_amech,
2220 			    &ii->ipsec_in_crypto_data,
2221 			    &assoc->ipsa_kcfauthkey, auth_ctx_tmpl,
2222 			    &ii->ipsec_in_crypto_mac, &call_req);
2223 		}
2224 	}
2225 
2226 	if (do_encr) {
2227 		/* force asynchronous processing? */
2228 		if (ipss->ipsec_algs_exec_mode[IPSEC_ALG_ENCR] ==
2229 		    IPSEC_ALGS_EXEC_ASYNC)
2230 			call_req.cr_flag |= CRYPTO_ALWAYS_QUEUE;
2231 
2232 		/* encryption template */
2233 		IPSEC_CTX_TMPL(assoc, ipsa_encrtmpl, IPSEC_ALG_ENCR,
2234 		    encr_ctx_tmpl);
2235 
2236 		/* Call the nonce update function. Also passes in IV */
2237 		(assoc->ipsa_noncefunc)(assoc, (uchar_t *)esph_ptr, encr_len,
2238 		    iv_ptr, &ii->ipsec_in_cmm, &ii->ipsec_in_crypto_data);
2239 
2240 		if (!do_auth) {
2241 			/* decryption only */
2242 			/* initialize input data argument */
2243 			ESP_INIT_CRYPTO_DATA(&ii->ipsec_in_crypto_data,
2244 			    esp_mp, encr_offset, encr_len);
2245 
2246 			/* call the crypto framework */
2247 			kef_rc = crypto_decrypt((crypto_mechanism_t *)
2248 			    &ii->ipsec_in_cmm, &ii->ipsec_in_crypto_data,
2249 			    &assoc->ipsa_kcfencrkey, encr_ctx_tmpl,
2250 			    NULL, &call_req);
2251 		}
2252 	}
2253 
2254 	if (do_auth && do_encr) {
2255 		/* dual operation */
2256 		/* initialize input data argument */
2257 		ESP_INIT_CRYPTO_DUAL_DATA(&ii->ipsec_in_crypto_dual_data,
2258 		    esp_mp, auth_offset, auth_len,
2259 		    encr_offset, encr_len - icv_len);
2260 
2261 		/* specify IV */
2262 		ii->ipsec_in_crypto_dual_data.dd_miscdata = (char *)iv_ptr;
2263 
2264 		/* call the framework */
2265 		kef_rc = crypto_mac_verify_decrypt(&assoc->ipsa_amech,
2266 		    &assoc->ipsa_emech, &ii->ipsec_in_crypto_dual_data,
2267 		    &assoc->ipsa_kcfauthkey, &assoc->ipsa_kcfencrkey,
2268 		    auth_ctx_tmpl, encr_ctx_tmpl, &ii->ipsec_in_crypto_mac,
2269 		    NULL, &call_req);
2270 	}
2271 
2272 	switch (kef_rc) {
2273 	case CRYPTO_SUCCESS:
2274 		ESP_BUMP_STAT(espstack, crypto_sync);
2275 		return (esp_in_done(ipsec_mp));
2276 	case CRYPTO_QUEUED:
2277 		/* esp_kcf_callback() will be invoked on completion */
2278 		ESP_BUMP_STAT(espstack, crypto_async);
2279 		return (IPSEC_STATUS_PENDING);
2280 	case CRYPTO_INVALID_MAC:
2281 		ESP_BUMP_STAT(espstack, crypto_sync);
2282 		esp_log_bad_auth(ipsec_mp);
2283 		return (IPSEC_STATUS_FAILED);
2284 	}
2285 
2286 	esp_crypto_failed(ipsec_mp, B_TRUE, kef_rc, espstack);
2287 	return (IPSEC_STATUS_FAILED);
2288 }
2289 
2290 /*
2291  * Compute the IP and UDP checksums -- common code for both keepalives and
2292  * actual ESP-in-UDP packets.  Be flexible with multiple mblks because ESP
2293  * uses mblk-insertion to insert the UDP header.
2294  * TODO - If there is an easy way to prep a packet for HW checksums, make
2295  * it happen here.
2296  */
2297 static void
2298 esp_prepare_udp(netstack_t *ns, mblk_t *mp, ipha_t *ipha)
2299 {
2300 	int offset;
2301 	uint32_t cksum;
2302 	uint16_t *arr;
2303 	mblk_t *udpmp = mp;
2304 	uint_t hlen = IPH_HDR_LENGTH(ipha);
2305 
2306 	ASSERT(MBLKL(mp) >= sizeof (ipha_t));
2307 
2308 	ipha->ipha_hdr_checksum = 0;
2309 	ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
2310 
2311 	if (ns->netstack_udp->us_do_checksum) {
2312 		ASSERT(MBLKL(udpmp) >= sizeof (udpha_t));
2313 		/* arr points to the IP header. */
2314 		arr = (uint16_t *)ipha;
2315 		IP_STAT(ns->netstack_ip, ip_out_sw_cksum);
2316 		IP_STAT_UPDATE(ns->netstack_ip, ip_udp_out_sw_cksum_bytes,
2317 		    ntohs(htons(ipha->ipha_length) - hlen));
2318 		/* arr[6-9] are the IP addresses. */
2319 		cksum = IP_UDP_CSUM_COMP + arr[6] + arr[7] + arr[8] + arr[9] +
2320 		    ntohs(htons(ipha->ipha_length) - hlen);
2321 		cksum = IP_CSUM(mp, hlen, cksum);
2322 		offset = hlen + UDP_CHECKSUM_OFFSET;
2323 		while (offset >= MBLKL(udpmp)) {
2324 			offset -= MBLKL(udpmp);
2325 			udpmp = udpmp->b_cont;
2326 		}
2327 		/* arr points to the UDP header's checksum field. */
2328 		arr = (uint16_t *)(udpmp->b_rptr + offset);
2329 		*arr = cksum;
2330 	}
2331 }
2332 
2333 /*
2334  * taskq handler so we can send the NAT-T keepalive on a separate thread.
2335  */
2336 static void
2337 actually_send_keepalive(void *arg)
2338 {
2339 	mblk_t *ipsec_mp = (mblk_t *)arg;
2340 	ipsec_out_t *io = (ipsec_out_t *)ipsec_mp->b_rptr;
2341 	ipha_t *ipha;
2342 	netstack_t *ns;
2343 
2344 	ASSERT(DB_TYPE(ipsec_mp) == M_CTL);
2345 	ASSERT(io->ipsec_out_type == IPSEC_OUT);
2346 	ASSERT(ipsec_mp->b_cont != NULL);
2347 	ASSERT(DB_TYPE(ipsec_mp->b_cont) == M_DATA);
2348 
2349 	ns = netstack_find_by_stackid(io->ipsec_out_stackid);
2350 	if (ns == NULL || ns != io->ipsec_out_ns) {
2351 		/* Just freemsg(). */
2352 		if (ns != NULL)
2353 			netstack_rele(ns);
2354 		freemsg(ipsec_mp);
2355 		return;
2356 	}
2357 
2358 	ipha = (ipha_t *)ipsec_mp->b_cont->b_rptr;
2359 	ip_wput_ipsec_out(NULL, ipsec_mp, ipha, NULL, NULL);
2360 	netstack_rele(ns);
2361 }
2362 
2363 /*
2364  * Send a one-byte UDP NAT-T keepalive.  Construct an IPSEC_OUT too that'll
2365  * get fed into esp_send_udp/ip_wput_ipsec_out.
2366  */
2367 void
2368 ipsecesp_send_keepalive(ipsa_t *assoc)
2369 {
2370 	mblk_t *mp = NULL, *ipsec_mp = NULL;
2371 	ipha_t *ipha;
2372 	udpha_t *udpha;
2373 	ipsec_out_t *io;
2374 
2375 	ASSERT(MUTEX_NOT_HELD(&assoc->ipsa_lock));
2376 
2377 	mp = allocb(sizeof (ipha_t) + sizeof (udpha_t) + 1, BPRI_HI);
2378 	if (mp == NULL)
2379 		return;
2380 	ipha = (ipha_t *)mp->b_rptr;
2381 	ipha->ipha_version_and_hdr_length = IP_SIMPLE_HDR_VERSION;
2382 	ipha->ipha_type_of_service = 0;
2383 	ipha->ipha_length = htons(sizeof (ipha_t) + sizeof (udpha_t) + 1);
2384 	/* Use the low-16 of the SPI so we have some clue where it came from. */
2385 	ipha->ipha_ident = *(((uint16_t *)(&assoc->ipsa_spi)) + 1);
2386 	ipha->ipha_fragment_offset_and_flags = 0;  /* Too small to fragment! */
2387 	ipha->ipha_ttl = 0xFF;
2388 	ipha->ipha_protocol = IPPROTO_UDP;
2389 	ipha->ipha_hdr_checksum = 0;
2390 	ipha->ipha_src = assoc->ipsa_srcaddr[0];
2391 	ipha->ipha_dst = assoc->ipsa_dstaddr[0];
2392 	udpha = (udpha_t *)(ipha + 1);
2393 	udpha->uha_src_port = (assoc->ipsa_local_nat_port != 0) ?
2394 	    assoc->ipsa_local_nat_port : htons(IPPORT_IKE_NATT);
2395 	udpha->uha_dst_port = (assoc->ipsa_remote_nat_port != 0) ?
2396 	    assoc->ipsa_remote_nat_port : htons(IPPORT_IKE_NATT);
2397 	udpha->uha_length = htons(sizeof (udpha_t) + 1);
2398 	udpha->uha_checksum = 0;
2399 	mp->b_wptr = (uint8_t *)(udpha + 1);
2400 	*(mp->b_wptr++) = 0xFF;
2401 
2402 	ipsec_mp = ipsec_alloc_ipsec_out(assoc->ipsa_netstack);
2403 	if (ipsec_mp == NULL) {
2404 		freeb(mp);
2405 		return;
2406 	}
2407 	ipsec_mp->b_cont = mp;
2408 	io = (ipsec_out_t *)ipsec_mp->b_rptr;
2409 	io->ipsec_out_zoneid =
2410 	    netstackid_to_zoneid(assoc->ipsa_netstack->netstack_stackid);
2411 	io->ipsec_out_stackid = assoc->ipsa_netstack->netstack_stackid;
2412 
2413 	esp_prepare_udp(assoc->ipsa_netstack, mp, ipha);
2414 	/*
2415 	 * We're holding an isaf_t bucket lock, so pawn off the actual
2416 	 * packet transmission to another thread.  Just in case syncq
2417 	 * processing causes a same-bucket packet to be processed.
2418 	 */
2419 	if (taskq_dispatch(esp_taskq, actually_send_keepalive, ipsec_mp,
2420 	    TQ_NOSLEEP) == 0) {
2421 		/* Assume no memory if taskq_dispatch() fails. */
2422 		ip_drop_packet(ipsec_mp, B_FALSE, NULL, NULL,
2423 		    DROPPER(assoc->ipsa_netstack->netstack_ipsec,
2424 		    ipds_esp_nomem),
2425 		    &assoc->ipsa_netstack->netstack_ipsecesp->esp_dropper);
2426 	}
2427 }
2428 
2429 static ipsec_status_t
2430 esp_submit_req_outbound(mblk_t *ipsec_mp, ipsa_t *assoc, uchar_t *icv_buf,
2431     uint_t payload_len)
2432 {
2433 	ipsec_out_t *io = (ipsec_out_t *)ipsec_mp->b_rptr;
2434 	uint_t auth_len;
2435 	crypto_call_req_t call_req;
2436 	mblk_t *esp_mp, *data_mp, *ip_mp;
2437 	esph_t *esph_ptr;
2438 	int kef_rc = CRYPTO_FAILED;
2439 	uint_t icv_len = assoc->ipsa_mac_len;
2440 	crypto_ctx_template_t auth_ctx_tmpl;
2441 	boolean_t do_auth;
2442 	boolean_t do_encr;
2443 	uint_t iv_len = assoc->ipsa_iv_len;
2444 	crypto_ctx_template_t encr_ctx_tmpl;
2445 	boolean_t is_natt = ((assoc->ipsa_flags & IPSA_F_NATT) != 0);
2446 	size_t esph_offset = (is_natt ? UDPH_SIZE : 0);
2447 	netstack_t	*ns = io->ipsec_out_ns;
2448 	ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
2449 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
2450 	uchar_t *iv_ptr;
2451 	crypto_data_t *cd_ptr = NULL;
2452 
2453 	esp3dbg(espstack, ("esp_submit_req_outbound:%s",
2454 	    is_natt ? "natt" : "not natt"));
2455 
2456 	ASSERT(io->ipsec_out_type == IPSEC_OUT);
2457 
2458 	/*
2459 	 * In case kEF queues and calls back, keep netstackid_t for
2460 	 * verification that the IP instance is still around in
2461 	 * esp_kcf_callback().
2462 	 */
2463 	io->ipsec_out_stackid = ns->netstack_stackid;
2464 
2465 	do_encr = assoc->ipsa_encr_alg != SADB_EALG_NULL;
2466 	do_auth = assoc->ipsa_auth_alg != SADB_AALG_NONE;
2467 
2468 	/*
2469 	 * Outbound IPsec packets are of the form:
2470 	 * IPSEC_OUT -> [IP,options] -> [ESP,IV] -> [data] -> [pad,ICV]
2471 	 * unless it's NATT, then it's
2472 	 * IPSEC_OUT -> [IP,options] -> [udp][ESP,IV] -> [data] -> [pad,ICV]
2473 	 * Get a pointer to the mblk containing the ESP header.
2474 	 */
2475 	ip_mp = ipsec_mp->b_cont;
2476 	esp_mp = ipsec_mp->b_cont->b_cont;
2477 	ASSERT(ip_mp != NULL && esp_mp != NULL);
2478 	esph_ptr = (esph_t *)(esp_mp->b_rptr + esph_offset);
2479 	iv_ptr = (uchar_t *)(esph_ptr + 1);
2480 	data_mp = ipsec_mp->b_cont->b_cont->b_cont;
2481 
2482 	/*
2483 	 * Combined mode algs need a nonce. This is setup in sadb_common_add().
2484 	 * If for some reason we are using a SA which does not have a nonce
2485 	 * then we must fail here.
2486 	 */
2487 	if ((assoc->ipsa_flags & IPSA_F_COUNTERMODE) &&
2488 	    (assoc->ipsa_nonce == NULL)) {
2489 		ip_drop_packet(ipsec_mp, B_FALSE, NULL, NULL,
2490 		    DROPPER(ipss, ipds_esp_nomem), &espstack->esp_dropper);
2491 		return (IPSEC_STATUS_FAILED);
2492 	}
2493 
2494 	ESP_INIT_CALLREQ(&call_req);
2495 
2496 	if (do_auth) {
2497 		/* force asynchronous processing? */
2498 		if (ipss->ipsec_algs_exec_mode[IPSEC_ALG_AUTH] ==
2499 		    IPSEC_ALGS_EXEC_ASYNC)
2500 			call_req.cr_flag |= CRYPTO_ALWAYS_QUEUE;
2501 
2502 		/* authentication context template */
2503 		IPSEC_CTX_TMPL(assoc, ipsa_authtmpl, IPSEC_ALG_AUTH,
2504 		    auth_ctx_tmpl);
2505 
2506 		/* where to store the computed mac */
2507 		ESP_INIT_CRYPTO_MAC(&io->ipsec_out_crypto_mac,
2508 		    icv_len, icv_buf);
2509 
2510 		/* authentication starts at the ESP header */
2511 		auth_len = payload_len + iv_len + sizeof (esph_t);
2512 		if (!do_encr) {
2513 			/* authentication only */
2514 			/* initialize input data argument */
2515 			ESP_INIT_CRYPTO_DATA(&io->ipsec_out_crypto_data,
2516 			    esp_mp, esph_offset, auth_len);
2517 
2518 			/* call the crypto framework */
2519 			kef_rc = crypto_mac(&assoc->ipsa_amech,
2520 			    &io->ipsec_out_crypto_data,
2521 			    &assoc->ipsa_kcfauthkey, auth_ctx_tmpl,
2522 			    &io->ipsec_out_crypto_mac, &call_req);
2523 		}
2524 	}
2525 
2526 	if (do_encr) {
2527 		/* force asynchronous processing? */
2528 		if (ipss->ipsec_algs_exec_mode[IPSEC_ALG_ENCR] ==
2529 		    IPSEC_ALGS_EXEC_ASYNC)
2530 			call_req.cr_flag |= CRYPTO_ALWAYS_QUEUE;
2531 
2532 		/* encryption context template */
2533 		IPSEC_CTX_TMPL(assoc, ipsa_encrtmpl, IPSEC_ALG_ENCR,
2534 		    encr_ctx_tmpl);
2535 		/* Call the nonce update function. */
2536 		(assoc->ipsa_noncefunc)(assoc, (uchar_t *)esph_ptr, payload_len,
2537 		    iv_ptr, &io->ipsec_out_cmm, &io->ipsec_out_crypto_data);
2538 
2539 		if (!do_auth) {
2540 			/* encryption only, skip mblk that contains ESP hdr */
2541 			/* initialize input data argument */
2542 			ESP_INIT_CRYPTO_DATA(&io->ipsec_out_crypto_data,
2543 			    data_mp, 0, payload_len);
2544 
2545 			/*
2546 			 * For combined mode ciphers, the ciphertext is the same
2547 			 * size as the clear text, the ICV should follow the
2548 			 * ciphertext. To convince the kcf to allow in-line
2549 			 * encryption, with an ICV, use ipsec_out_crypto_mac
2550 			 * to point to the same buffer as the data. The calling
2551 			 * function need to ensure the buffer is large enough to
2552 			 * include the ICV.
2553 			 *
2554 			 * The IV is already written to the packet buffer, the
2555 			 * nonce setup function copied it to the params struct
2556 			 * for the cipher to use.
2557 			 */
2558 			if (assoc->ipsa_flags & IPSA_F_COMBINED) {
2559 				bcopy(&io->ipsec_out_crypto_data,
2560 				    &io->ipsec_out_crypto_mac,
2561 				    sizeof (crypto_data_t));
2562 				io->ipsec_out_crypto_mac.cd_length =
2563 				    payload_len + icv_len;
2564 				cd_ptr = &io->ipsec_out_crypto_mac;
2565 			}
2566 
2567 			/* call the crypto framework */
2568 			kef_rc = crypto_encrypt((crypto_mechanism_t *)
2569 			    &io->ipsec_out_cmm,
2570 			    &io->ipsec_out_crypto_data,
2571 			    &assoc->ipsa_kcfencrkey, encr_ctx_tmpl,
2572 			    cd_ptr, &call_req);
2573 
2574 		}
2575 	}
2576 
2577 	if (do_auth && do_encr) {
2578 		/*
2579 		 * Encryption and authentication:
2580 		 * Pass the pointer to the mblk chain starting at the ESP
2581 		 * header to the framework. Skip the ESP header mblk
2582 		 * for encryption, which is reflected by an encryption
2583 		 * offset equal to the length of that mblk. Start
2584 		 * the authentication at the ESP header, i.e. use an
2585 		 * authentication offset of zero.
2586 		 */
2587 		ESP_INIT_CRYPTO_DUAL_DATA(&io->ipsec_out_crypto_dual_data,
2588 		    esp_mp, MBLKL(esp_mp), payload_len, esph_offset, auth_len);
2589 
2590 		/* specify IV */
2591 		io->ipsec_out_crypto_dual_data.dd_miscdata = (char *)iv_ptr;
2592 
2593 		/* call the framework */
2594 		kef_rc = crypto_encrypt_mac(&assoc->ipsa_emech,
2595 		    &assoc->ipsa_amech, NULL,
2596 		    &assoc->ipsa_kcfencrkey, &assoc->ipsa_kcfauthkey,
2597 		    encr_ctx_tmpl, auth_ctx_tmpl,
2598 		    &io->ipsec_out_crypto_dual_data,
2599 		    &io->ipsec_out_crypto_mac, &call_req);
2600 	}
2601 
2602 	switch (kef_rc) {
2603 	case CRYPTO_SUCCESS:
2604 		ESP_BUMP_STAT(espstack, crypto_sync);
2605 		esp_set_usetime(assoc, B_FALSE);
2606 		if (is_natt)
2607 			esp_prepare_udp(ns, ipsec_mp->b_cont,
2608 			    (ipha_t *)ipsec_mp->b_cont->b_rptr);
2609 		return (IPSEC_STATUS_SUCCESS);
2610 	case CRYPTO_QUEUED:
2611 		/* esp_kcf_callback() will be invoked on completion */
2612 		ESP_BUMP_STAT(espstack, crypto_async);
2613 		return (IPSEC_STATUS_PENDING);
2614 	}
2615 
2616 	esp_crypto_failed(ipsec_mp, B_FALSE, kef_rc, espstack);
2617 	return (IPSEC_STATUS_FAILED);
2618 }
2619 
2620 /*
2621  * Handle outbound IPsec processing for IPv4 and IPv6
2622  * On success returns B_TRUE, on failure returns B_FALSE and frees the
2623  * mblk chain ipsec_in_mp.
2624  */
2625 static ipsec_status_t
2626 esp_outbound(mblk_t *mp)
2627 {
2628 	mblk_t *ipsec_out_mp, *data_mp, *espmp, *tailmp;
2629 	ipsec_out_t *io;
2630 	ipha_t *ipha;
2631 	ip6_t *ip6h;
2632 	esph_t *esph_ptr, *iv_ptr;
2633 	uint_t af;
2634 	uint8_t *nhp;
2635 	uintptr_t divpoint, datalen, adj, padlen, i, alloclen;
2636 	uintptr_t esplen = sizeof (esph_t);
2637 	uint8_t protocol;
2638 	ipsa_t *assoc;
2639 	uint_t iv_len, block_size, mac_len = 0;
2640 	uchar_t *icv_buf;
2641 	udpha_t *udpha;
2642 	boolean_t is_natt = B_FALSE;
2643 	netstack_t	*ns;
2644 	ipsecesp_stack_t *espstack;
2645 	ipsec_stack_t	*ipss;
2646 
2647 	ipsec_out_mp = mp;
2648 	data_mp = ipsec_out_mp->b_cont;
2649 
2650 	io = (ipsec_out_t *)ipsec_out_mp->b_rptr;
2651 	ns = io->ipsec_out_ns;
2652 	espstack = ns->netstack_ipsecesp;
2653 	ipss = ns->netstack_ipsec;
2654 
2655 	ESP_BUMP_STAT(espstack, out_requests);
2656 
2657 	/*
2658 	 * <sigh> We have to copy the message here, because TCP (for example)
2659 	 * keeps a dupb() of the message lying around for retransmission.
2660 	 * Since ESP changes the whole of the datagram, we have to create our
2661 	 * own copy lest we clobber TCP's data.  Since we have to copy anyway,
2662 	 * we might as well make use of msgpullup() and get the mblk into one
2663 	 * contiguous piece!
2664 	 */
2665 	ipsec_out_mp->b_cont = msgpullup(data_mp, -1);
2666 	if (ipsec_out_mp->b_cont == NULL) {
2667 		esp0dbg(("esp_outbound: msgpullup() failed, "
2668 		    "dropping packet.\n"));
2669 		ipsec_out_mp->b_cont = data_mp;
2670 		/*
2671 		 * TODO:  Find the outbound IRE for this packet and
2672 		 * pass it to ip_drop_packet().
2673 		 */
2674 		ip_drop_packet(ipsec_out_mp, B_FALSE, NULL, NULL,
2675 		    DROPPER(ipss, ipds_esp_nomem),
2676 		    &espstack->esp_dropper);
2677 		return (IPSEC_STATUS_FAILED);
2678 	} else {
2679 		freemsg(data_mp);
2680 		data_mp = ipsec_out_mp->b_cont;
2681 	}
2682 
2683 	assoc = io->ipsec_out_esp_sa;
2684 	ASSERT(assoc != NULL);
2685 
2686 	/*
2687 	 * Get the outer IP header in shape to escape this system..
2688 	 */
2689 	if (is_system_labeled() && (assoc->ipsa_ocred != NULL)) {
2690 		int whack;
2691 
2692 		mblk_setcred(data_mp, assoc->ipsa_ocred, NOPID);
2693 		if (io->ipsec_out_v4)
2694 			whack = sadb_whack_label(&data_mp, assoc);
2695 		else
2696 			whack = sadb_whack_label_v6(&data_mp, assoc);
2697 		if (whack != 0) {
2698 			ip_drop_packet(ipsec_out_mp, B_FALSE, NULL,
2699 			    NULL, DROPPER(ipss, ipds_esp_nomem),
2700 			    &espstack->esp_dropper);
2701 			return (IPSEC_STATUS_FAILED);
2702 		}
2703 		ipsec_out_mp->b_cont = data_mp;
2704 	}
2705 
2706 
2707 	/*
2708 	 * Reality check....
2709 	 */
2710 	ipha = (ipha_t *)data_mp->b_rptr;  /* So we can call esp_acquire(). */
2711 
2712 	if (io->ipsec_out_v4) {
2713 		af = AF_INET;
2714 		divpoint = IPH_HDR_LENGTH(ipha);
2715 		datalen = ntohs(ipha->ipha_length) - divpoint;
2716 		nhp = (uint8_t *)&ipha->ipha_protocol;
2717 	} else {
2718 		ip6_pkt_t ipp;
2719 
2720 		af = AF_INET6;
2721 		ip6h = (ip6_t *)ipha;
2722 		bzero(&ipp, sizeof (ipp));
2723 		divpoint = ip_find_hdr_v6(data_mp, ip6h, &ipp, NULL);
2724 		if (ipp.ipp_dstopts != NULL &&
2725 		    ipp.ipp_dstopts->ip6d_nxt != IPPROTO_ROUTING) {
2726 			/*
2727 			 * Destination options are tricky.  If we get in here,
2728 			 * then we have a terminal header following the
2729 			 * destination options.  We need to adjust backwards
2730 			 * so we insert ESP BEFORE the destination options
2731 			 * bag.  (So that the dstopts get encrypted!)
2732 			 *
2733 			 * Since this is for outbound packets only, we know
2734 			 * that non-terminal destination options only precede
2735 			 * routing headers.
2736 			 */
2737 			divpoint -= ipp.ipp_dstoptslen;
2738 		}
2739 		datalen = ntohs(ip6h->ip6_plen) + sizeof (ip6_t) - divpoint;
2740 
2741 		if (ipp.ipp_rthdr != NULL) {
2742 			nhp = &ipp.ipp_rthdr->ip6r_nxt;
2743 		} else if (ipp.ipp_hopopts != NULL) {
2744 			nhp = &ipp.ipp_hopopts->ip6h_nxt;
2745 		} else {
2746 			ASSERT(divpoint == sizeof (ip6_t));
2747 			/* It's probably IP + ESP. */
2748 			nhp = &ip6h->ip6_nxt;
2749 		}
2750 	}
2751 
2752 	mac_len = assoc->ipsa_mac_len;
2753 
2754 	if (assoc->ipsa_flags & IPSA_F_NATT) {
2755 		/* wedge in UDP header */
2756 		is_natt = B_TRUE;
2757 		esplen += UDPH_SIZE;
2758 	}
2759 
2760 	/*
2761 	 * Set up ESP header and encryption padding for ENCR PI request.
2762 	 */
2763 
2764 	/* Determine the padding length.  Pad to 4-bytes for no-encryption. */
2765 	if (assoc->ipsa_encr_alg != SADB_EALG_NULL) {
2766 		iv_len = assoc->ipsa_iv_len;
2767 		block_size = assoc->ipsa_datalen;
2768 
2769 		/*
2770 		 * Pad the data to the length of the cipher block size.
2771 		 * Include the two additional bytes (hence the - 2) for the
2772 		 * padding length and the next header.  Take this into account
2773 		 * when calculating the actual length of the padding.
2774 		 */
2775 		ASSERT(ISP2(iv_len));
2776 		padlen = ((unsigned)(block_size - datalen - 2)) &
2777 		    (block_size - 1);
2778 	} else {
2779 		iv_len = 0;
2780 		padlen = ((unsigned)(sizeof (uint32_t) - datalen - 2)) &
2781 		    (sizeof (uint32_t) - 1);
2782 	}
2783 
2784 	/* Allocate ESP header and IV. */
2785 	esplen += iv_len;
2786 
2787 	/*
2788 	 * Update association byte-count lifetimes.  Don't forget to take
2789 	 * into account the padding length and next-header (hence the + 2).
2790 	 *
2791 	 * Use the amount of data fed into the "encryption algorithm".  This
2792 	 * is the IV, the data length, the padding length, and the final two
2793 	 * bytes (padlen, and next-header).
2794 	 *
2795 	 */
2796 
2797 	if (!esp_age_bytes(assoc, datalen + padlen + iv_len + 2, B_FALSE)) {
2798 		/*
2799 		 * TODO:  Find the outbound IRE for this packet and
2800 		 * pass it to ip_drop_packet().
2801 		 */
2802 		ip_drop_packet(mp, B_FALSE, NULL, NULL,
2803 		    DROPPER(ipss, ipds_esp_bytes_expire),
2804 		    &espstack->esp_dropper);
2805 		return (IPSEC_STATUS_FAILED);
2806 	}
2807 
2808 	espmp = allocb(esplen, BPRI_HI);
2809 	if (espmp == NULL) {
2810 		ESP_BUMP_STAT(espstack, out_discards);
2811 		esp1dbg(espstack, ("esp_outbound: can't allocate espmp.\n"));
2812 		/*
2813 		 * TODO:  Find the outbound IRE for this packet and
2814 		 * pass it to ip_drop_packet().
2815 		 */
2816 		ip_drop_packet(mp, B_FALSE, NULL, NULL,
2817 		    DROPPER(ipss, ipds_esp_nomem),
2818 		    &espstack->esp_dropper);
2819 		return (IPSEC_STATUS_FAILED);
2820 	}
2821 	espmp->b_wptr += esplen;
2822 	esph_ptr = (esph_t *)espmp->b_rptr;
2823 
2824 	if (is_natt) {
2825 		esp3dbg(espstack, ("esp_outbound: NATT"));
2826 
2827 		udpha = (udpha_t *)espmp->b_rptr;
2828 		udpha->uha_src_port = (assoc->ipsa_local_nat_port != 0) ?
2829 		    assoc->ipsa_local_nat_port : htons(IPPORT_IKE_NATT);
2830 		udpha->uha_dst_port = (assoc->ipsa_remote_nat_port != 0) ?
2831 		    assoc->ipsa_remote_nat_port : htons(IPPORT_IKE_NATT);
2832 		/*
2833 		 * Set the checksum to 0, so that the esp_prepare_udp() call
2834 		 * can do the right thing.
2835 		 */
2836 		udpha->uha_checksum = 0;
2837 		esph_ptr = (esph_t *)(udpha + 1);
2838 	}
2839 
2840 	esph_ptr->esph_spi = assoc->ipsa_spi;
2841 
2842 	esph_ptr->esph_replay = htonl(atomic_add_32_nv(&assoc->ipsa_replay, 1));
2843 	if (esph_ptr->esph_replay == 0 && assoc->ipsa_replay_wsize != 0) {
2844 		/*
2845 		 * XXX We have replay counter wrapping.
2846 		 * We probably want to nuke this SA (and its peer).
2847 		 */
2848 		ipsec_assocfailure(info.mi_idnum, 0, 0,
2849 		    SL_ERROR | SL_CONSOLE | SL_WARN,
2850 		    "Outbound ESP SA (0x%x, %s) has wrapped sequence.\n",
2851 		    esph_ptr->esph_spi, assoc->ipsa_dstaddr, af,
2852 		    espstack->ipsecesp_netstack);
2853 
2854 		ESP_BUMP_STAT(espstack, out_discards);
2855 		sadb_replay_delete(assoc);
2856 		/*
2857 		 * TODO:  Find the outbound IRE for this packet and
2858 		 * pass it to ip_drop_packet().
2859 		 */
2860 		ip_drop_packet(mp, B_FALSE, NULL, NULL,
2861 		    DROPPER(ipss, ipds_esp_replay),
2862 		    &espstack->esp_dropper);
2863 		return (IPSEC_STATUS_FAILED);
2864 	}
2865 
2866 	iv_ptr = (esph_ptr + 1);
2867 	/*
2868 	 * iv_ptr points to the mblk which will contain the IV once we have
2869 	 * written it there. This mblk will be part of a mblk chain that
2870 	 * will make up the packet.
2871 	 *
2872 	 * For counter mode algorithms, the IV is a 64 bit quantity, it
2873 	 * must NEVER repeat in the lifetime of the SA, otherwise an
2874 	 * attacker who had recorded enough packets might be able to
2875 	 * determine some clear text.
2876 	 *
2877 	 * To ensure this does not happen, the IV is stored in the SA and
2878 	 * incremented for each packet, the IV is then copied into the
2879 	 * "packet" for transmission to the receiving system. The IV will
2880 	 * also be copied into the nonce, when the packet is encrypted.
2881 	 *
2882 	 * CBC mode algorithms use a random IV for each packet. We do not
2883 	 * require the highest quality random bits, but for best security
2884 	 * with CBC mode ciphers, the value must be unlikely to repeat and
2885 	 * must not be known in advance to an adversary capable of influencing
2886 	 * the clear text.
2887 	 */
2888 	if (!update_iv((uint8_t *)iv_ptr, espstack->esp_pfkey_q, assoc,
2889 	    espstack)) {
2890 		ip_drop_packet(mp, B_FALSE, NULL, NULL,
2891 		    DROPPER(ipss, ipds_esp_iv_wrap), &espstack->esp_dropper);
2892 		return (IPSEC_STATUS_FAILED);
2893 	}
2894 
2895 	/* Fix the IP header. */
2896 	alloclen = padlen + 2 + mac_len;
2897 	adj = alloclen + (espmp->b_wptr - espmp->b_rptr);
2898 
2899 	protocol = *nhp;
2900 
2901 	if (io->ipsec_out_v4) {
2902 		ipha->ipha_length = htons(ntohs(ipha->ipha_length) + adj);
2903 		if (is_natt) {
2904 			*nhp = IPPROTO_UDP;
2905 			udpha->uha_length = htons(ntohs(ipha->ipha_length) -
2906 			    IPH_HDR_LENGTH(ipha));
2907 		} else {
2908 			*nhp = IPPROTO_ESP;
2909 		}
2910 		ipha->ipha_hdr_checksum = 0;
2911 		ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha);
2912 	} else {
2913 		ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) + adj);
2914 		*nhp = IPPROTO_ESP;
2915 	}
2916 
2917 	/* I've got the two ESP mblks, now insert them. */
2918 
2919 	esp2dbg(espstack, ("data_mp before outbound ESP adjustment:\n"));
2920 	esp2dbg(espstack, (dump_msg(data_mp)));
2921 
2922 	if (!esp_insert_esp(data_mp, espmp, divpoint, espstack)) {
2923 		ESP_BUMP_STAT(espstack, out_discards);
2924 		/* NOTE:  esp_insert_esp() only fails if there's no memory. */
2925 		/*
2926 		 * TODO:  Find the outbound IRE for this packet and
2927 		 * pass it to ip_drop_packet().
2928 		 */
2929 		ip_drop_packet(mp, B_FALSE, NULL, NULL,
2930 		    DROPPER(ipss, ipds_esp_nomem),
2931 		    &espstack->esp_dropper);
2932 		freeb(espmp);
2933 		return (IPSEC_STATUS_FAILED);
2934 	}
2935 
2936 	/* Append padding (and leave room for ICV). */
2937 	for (tailmp = data_mp; tailmp->b_cont != NULL; tailmp = tailmp->b_cont)
2938 		;
2939 	if (tailmp->b_wptr + alloclen > tailmp->b_datap->db_lim) {
2940 		tailmp->b_cont = allocb(alloclen, BPRI_HI);
2941 		if (tailmp->b_cont == NULL) {
2942 			ESP_BUMP_STAT(espstack, out_discards);
2943 			esp0dbg(("esp_outbound:  Can't allocate tailmp.\n"));
2944 			/*
2945 			 * TODO:  Find the outbound IRE for this packet and
2946 			 * pass it to ip_drop_packet().
2947 			 */
2948 			ip_drop_packet(mp, B_FALSE, NULL, NULL,
2949 			    DROPPER(ipss, ipds_esp_nomem),
2950 			    &espstack->esp_dropper);
2951 			return (IPSEC_STATUS_FAILED);
2952 		}
2953 		tailmp = tailmp->b_cont;
2954 	}
2955 
2956 	/*
2957 	 * If there's padding, N bytes of padding must be of the form 0x1,
2958 	 * 0x2, 0x3... 0xN.
2959 	 */
2960 	for (i = 0; i < padlen; ) {
2961 		i++;
2962 		*tailmp->b_wptr++ = i;
2963 	}
2964 	*tailmp->b_wptr++ = i;
2965 	*tailmp->b_wptr++ = protocol;
2966 
2967 	esp2dbg(espstack, ("data_Mp before encryption:\n"));
2968 	esp2dbg(espstack, (dump_msg(data_mp)));
2969 
2970 	/*
2971 	 * The packet is eligible for hardware acceleration if the
2972 	 * following conditions are satisfied:
2973 	 *
2974 	 * 1. the packet will not be fragmented
2975 	 * 2. the provider supports the algorithms specified by SA
2976 	 * 3. there is no pending control message being exchanged
2977 	 * 4. snoop is not attached
2978 	 * 5. the destination address is not a multicast address
2979 	 *
2980 	 * All five of these conditions are checked by IP prior to
2981 	 * sending the packet to ESP.
2982 	 *
2983 	 * But We, and We Alone, can, nay MUST check if the packet
2984 	 * is over NATT, and then disqualify it from hardware
2985 	 * acceleration.
2986 	 */
2987 
2988 	if (io->ipsec_out_is_capab_ill && !(assoc->ipsa_flags & IPSA_F_NATT)) {
2989 		return (esp_outbound_accelerated(ipsec_out_mp, mac_len));
2990 	}
2991 	ESP_BUMP_STAT(espstack, noaccel);
2992 
2993 	/*
2994 	 * Okay.  I've set up the pre-encryption ESP.  Let's do it!
2995 	 */
2996 
2997 	if (mac_len > 0) {
2998 		ASSERT(tailmp->b_wptr + mac_len <= tailmp->b_datap->db_lim);
2999 		icv_buf = tailmp->b_wptr;
3000 		tailmp->b_wptr += mac_len;
3001 	} else {
3002 		icv_buf = NULL;
3003 	}
3004 
3005 	return (esp_submit_req_outbound(ipsec_out_mp, assoc, icv_buf,
3006 	    datalen + padlen + 2));
3007 }
3008 
3009 /*
3010  * IP calls this to validate the ICMP errors that
3011  * we got from the network.
3012  */
3013 ipsec_status_t
3014 ipsecesp_icmp_error(mblk_t *ipsec_mp)
3015 {
3016 	ipsec_in_t *ii = (ipsec_in_t *)ipsec_mp->b_rptr;
3017 	boolean_t is_inbound = (ii->ipsec_in_type == IPSEC_IN);
3018 	netstack_t	*ns;
3019 	ipsecesp_stack_t *espstack;
3020 	ipsec_stack_t	*ipss;
3021 
3022 	if (is_inbound) {
3023 		ns = ii->ipsec_in_ns;
3024 	} else {
3025 		ipsec_out_t *io = (ipsec_out_t *)ipsec_mp->b_rptr;
3026 
3027 		ns = io->ipsec_out_ns;
3028 	}
3029 	espstack = ns->netstack_ipsecesp;
3030 	ipss = ns->netstack_ipsec;
3031 
3032 	/*
3033 	 * Unless we get an entire packet back, this function is useless.
3034 	 * Why?
3035 	 *
3036 	 * 1.)	Partial packets are useless, because the "next header"
3037 	 *	is at the end of the decrypted ESP packet.  Without the
3038 	 *	whole packet, this is useless.
3039 	 *
3040 	 * 2.)	If we every use a stateful cipher, such as a stream or a
3041 	 *	one-time pad, we can't do anything.
3042 	 *
3043 	 * Since the chances of us getting an entire packet back are very
3044 	 * very small, we discard here.
3045 	 */
3046 	IP_ESP_BUMP_STAT(ipss, in_discards);
3047 	ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL,
3048 	    DROPPER(ipss, ipds_esp_icmp),
3049 	    &espstack->esp_dropper);
3050 	return (IPSEC_STATUS_FAILED);
3051 }
3052 
3053 /*
3054  * ESP module read put routine.
3055  */
3056 /* ARGSUSED */
3057 static void
3058 ipsecesp_rput(queue_t *q, mblk_t *mp)
3059 {
3060 	ipsecesp_stack_t	*espstack = (ipsecesp_stack_t *)q->q_ptr;
3061 
3062 	ASSERT(mp->b_datap->db_type != M_CTL);	/* No more IRE_DB_REQ. */
3063 
3064 	switch (mp->b_datap->db_type) {
3065 	case M_PROTO:
3066 	case M_PCPROTO:
3067 		/* TPI message of some sort. */
3068 		switch (*((t_scalar_t *)mp->b_rptr)) {
3069 		case T_BIND_ACK:
3070 			esp3dbg(espstack,
3071 			    ("Thank you IP from ESP for T_BIND_ACK\n"));
3072 			break;
3073 		case T_ERROR_ACK:
3074 			cmn_err(CE_WARN,
3075 			    "ipsecesp:  ESP received T_ERROR_ACK from IP.");
3076 			/*
3077 			 * Make esp_sadb.s_ip_q NULL, and in the
3078 			 * future, perhaps try again.
3079 			 */
3080 			espstack->esp_sadb.s_ip_q = NULL;
3081 			break;
3082 		case T_OK_ACK:
3083 			/* Probably from a (rarely sent) T_UNBIND_REQ. */
3084 			break;
3085 		default:
3086 			esp0dbg(("Unknown M_{,PC}PROTO message.\n"));
3087 		}
3088 		freemsg(mp);
3089 		break;
3090 	default:
3091 		/* For now, passthru message. */
3092 		esp2dbg(espstack, ("ESP got unknown mblk type %d.\n",
3093 		    mp->b_datap->db_type));
3094 		putnext(q, mp);
3095 	}
3096 }
3097 
3098 /*
3099  * Construct an SADB_REGISTER message with the current algorithms.
3100  * This function gets called when 'ipsecalgs -s' is run or when
3101  * in.iked (or other KMD) starts.
3102  */
3103 static boolean_t
3104 esp_register_out(uint32_t sequence, uint32_t pid, uint_t serial,
3105     ipsecesp_stack_t *espstack, mblk_t *in_mp)
3106 {
3107 	mblk_t *pfkey_msg_mp, *keysock_out_mp;
3108 	sadb_msg_t *samsg;
3109 	sadb_supported_t *sasupp_auth = NULL;
3110 	sadb_supported_t *sasupp_encr = NULL;
3111 	sadb_alg_t *saalg;
3112 	uint_t allocsize = sizeof (*samsg);
3113 	uint_t i, numalgs_snap;
3114 	int current_aalgs;
3115 	ipsec_alginfo_t **authalgs;
3116 	uint_t num_aalgs;
3117 	int current_ealgs;
3118 	ipsec_alginfo_t **encralgs;
3119 	uint_t num_ealgs;
3120 	ipsec_stack_t	*ipss = espstack->ipsecesp_netstack->netstack_ipsec;
3121 	sadb_sens_t *sens;
3122 	size_t sens_len = 0;
3123 	sadb_ext_t *nextext;
3124 	cred_t *sens_cr = NULL;
3125 
3126 	/* Allocate the KEYSOCK_OUT. */
3127 	keysock_out_mp = sadb_keysock_out(serial);
3128 	if (keysock_out_mp == NULL) {
3129 		esp0dbg(("esp_register_out: couldn't allocate mblk.\n"));
3130 		return (B_FALSE);
3131 	}
3132 
3133 	if (is_system_labeled() && (in_mp != NULL)) {
3134 		sens_cr = msg_getcred(in_mp, NULL);
3135 
3136 		if (sens_cr != NULL) {
3137 			sens_len = sadb_sens_len_from_cred(sens_cr);
3138 			allocsize += sens_len;
3139 		}
3140 	}
3141 
3142 	/*
3143 	 * Allocate the PF_KEY message that follows KEYSOCK_OUT.
3144 	 */
3145 
3146 	mutex_enter(&ipss->ipsec_alg_lock);
3147 	/*
3148 	 * Fill SADB_REGISTER message's algorithm descriptors.  Hold
3149 	 * down the lock while filling it.
3150 	 *
3151 	 * Return only valid algorithms, so the number of algorithms
3152 	 * to send up may be less than the number of algorithm entries
3153 	 * in the table.
3154 	 */
3155 	authalgs = ipss->ipsec_alglists[IPSEC_ALG_AUTH];
3156 	for (num_aalgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++)
3157 		if (authalgs[i] != NULL && ALG_VALID(authalgs[i]))
3158 			num_aalgs++;
3159 
3160 	if (num_aalgs != 0) {
3161 		allocsize += (num_aalgs * sizeof (*saalg));
3162 		allocsize += sizeof (*sasupp_auth);
3163 	}
3164 	encralgs = ipss->ipsec_alglists[IPSEC_ALG_ENCR];
3165 	for (num_ealgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++)
3166 		if (encralgs[i] != NULL && ALG_VALID(encralgs[i]))
3167 			num_ealgs++;
3168 
3169 	if (num_ealgs != 0) {
3170 		allocsize += (num_ealgs * sizeof (*saalg));
3171 		allocsize += sizeof (*sasupp_encr);
3172 	}
3173 	keysock_out_mp->b_cont = allocb(allocsize, BPRI_HI);
3174 	if (keysock_out_mp->b_cont == NULL) {
3175 		mutex_exit(&ipss->ipsec_alg_lock);
3176 		freemsg(keysock_out_mp);
3177 		return (B_FALSE);
3178 	}
3179 	pfkey_msg_mp = keysock_out_mp->b_cont;
3180 	pfkey_msg_mp->b_wptr += allocsize;
3181 
3182 	nextext = (sadb_ext_t *)(pfkey_msg_mp->b_rptr + sizeof (*samsg));
3183 
3184 	if (num_aalgs != 0) {
3185 		sasupp_auth = (sadb_supported_t *)nextext;
3186 		saalg = (sadb_alg_t *)(sasupp_auth + 1);
3187 
3188 		ASSERT(((ulong_t)saalg & 0x7) == 0);
3189 
3190 		numalgs_snap = 0;
3191 		for (i = 0;
3192 		    ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_aalgs));
3193 		    i++) {
3194 			if (authalgs[i] == NULL || !ALG_VALID(authalgs[i]))
3195 				continue;
3196 
3197 			saalg->sadb_alg_id = authalgs[i]->alg_id;
3198 			saalg->sadb_alg_ivlen = 0;
3199 			saalg->sadb_alg_minbits	= authalgs[i]->alg_ef_minbits;
3200 			saalg->sadb_alg_maxbits	= authalgs[i]->alg_ef_maxbits;
3201 			saalg->sadb_x_alg_increment =
3202 			    authalgs[i]->alg_increment;
3203 			saalg->sadb_x_alg_saltbits = SADB_8TO1(
3204 			    authalgs[i]->alg_saltlen);
3205 			numalgs_snap++;
3206 			saalg++;
3207 		}
3208 		ASSERT(numalgs_snap == num_aalgs);
3209 #ifdef DEBUG
3210 		/*
3211 		 * Reality check to make sure I snagged all of the
3212 		 * algorithms.
3213 		 */
3214 		for (; i < IPSEC_MAX_ALGS; i++) {
3215 			if (authalgs[i] != NULL && ALG_VALID(authalgs[i])) {
3216 				cmn_err(CE_PANIC, "esp_register_out()! "
3217 				    "Missed aalg #%d.\n", i);
3218 			}
3219 		}
3220 #endif /* DEBUG */
3221 		nextext = (sadb_ext_t *)saalg;
3222 	}
3223 
3224 	if (num_ealgs != 0) {
3225 		sasupp_encr = (sadb_supported_t *)nextext;
3226 		saalg = (sadb_alg_t *)(sasupp_encr + 1);
3227 
3228 		numalgs_snap = 0;
3229 		for (i = 0;
3230 		    ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_ealgs)); i++) {
3231 			if (encralgs[i] == NULL || !ALG_VALID(encralgs[i]))
3232 				continue;
3233 			saalg->sadb_alg_id = encralgs[i]->alg_id;
3234 			saalg->sadb_alg_ivlen = encralgs[i]->alg_ivlen;
3235 			saalg->sadb_alg_minbits	= encralgs[i]->alg_ef_minbits;
3236 			saalg->sadb_alg_maxbits	= encralgs[i]->alg_ef_maxbits;
3237 			/*
3238 			 * We could advertise the ICV length, except there
3239 			 * is not a value in sadb_x_algb to do this.
3240 			 * saalg->sadb_alg_maclen = encralgs[i]->alg_maclen;
3241 			 */
3242 			saalg->sadb_x_alg_increment =
3243 			    encralgs[i]->alg_increment;
3244 			saalg->sadb_x_alg_saltbits =
3245 			    SADB_8TO1(encralgs[i]->alg_saltlen);
3246 
3247 			numalgs_snap++;
3248 			saalg++;
3249 		}
3250 		ASSERT(numalgs_snap == num_ealgs);
3251 #ifdef DEBUG
3252 		/*
3253 		 * Reality check to make sure I snagged all of the
3254 		 * algorithms.
3255 		 */
3256 		for (; i < IPSEC_MAX_ALGS; i++) {
3257 			if (encralgs[i] != NULL && ALG_VALID(encralgs[i])) {
3258 				cmn_err(CE_PANIC, "esp_register_out()! "
3259 				    "Missed ealg #%d.\n", i);
3260 			}
3261 		}
3262 #endif /* DEBUG */
3263 		nextext = (sadb_ext_t *)saalg;
3264 	}
3265 
3266 	current_aalgs = num_aalgs;
3267 	current_ealgs = num_ealgs;
3268 
3269 	mutex_exit(&ipss->ipsec_alg_lock);
3270 
3271 	if (sens_cr != NULL) {
3272 		sens = (sadb_sens_t *)nextext;
3273 		sadb_sens_from_cred(sens, SADB_EXT_SENSITIVITY,
3274 		    sens_cr, sens_len);
3275 
3276 		nextext = (sadb_ext_t *)(((uint8_t *)sens) + sens_len);
3277 	}
3278 
3279 	/* Now fill the rest of the SADB_REGISTER message. */
3280 
3281 	samsg = (sadb_msg_t *)pfkey_msg_mp->b_rptr;
3282 	samsg->sadb_msg_version = PF_KEY_V2;
3283 	samsg->sadb_msg_type = SADB_REGISTER;
3284 	samsg->sadb_msg_errno = 0;
3285 	samsg->sadb_msg_satype = SADB_SATYPE_ESP;
3286 	samsg->sadb_msg_len = SADB_8TO64(allocsize);
3287 	samsg->sadb_msg_reserved = 0;
3288 	/*
3289 	 * Assume caller has sufficient sequence/pid number info.  If it's one
3290 	 * from me over a new alg., I could give two hoots about sequence.
3291 	 */
3292 	samsg->sadb_msg_seq = sequence;
3293 	samsg->sadb_msg_pid = pid;
3294 
3295 	if (sasupp_auth != NULL) {
3296 		sasupp_auth->sadb_supported_len = SADB_8TO64(
3297 		    sizeof (*sasupp_auth) + sizeof (*saalg) * current_aalgs);
3298 		sasupp_auth->sadb_supported_exttype = SADB_EXT_SUPPORTED_AUTH;
3299 		sasupp_auth->sadb_supported_reserved = 0;
3300 	}
3301 
3302 	if (sasupp_encr != NULL) {
3303 		sasupp_encr->sadb_supported_len = SADB_8TO64(
3304 		    sizeof (*sasupp_encr) + sizeof (*saalg) * current_ealgs);
3305 		sasupp_encr->sadb_supported_exttype =
3306 		    SADB_EXT_SUPPORTED_ENCRYPT;
3307 		sasupp_encr->sadb_supported_reserved = 0;
3308 	}
3309 
3310 	if (espstack->esp_pfkey_q != NULL)
3311 		putnext(espstack->esp_pfkey_q, keysock_out_mp);
3312 	else {
3313 		freemsg(keysock_out_mp);
3314 		return (B_FALSE);
3315 	}
3316 
3317 	return (B_TRUE);
3318 }
3319 
3320 /*
3321  * Invoked when the algorithm table changes. Causes SADB_REGISTER
3322  * messages continaining the current list of algorithms to be
3323  * sent up to the ESP listeners.
3324  */
3325 void
3326 ipsecesp_algs_changed(netstack_t *ns)
3327 {
3328 	ipsecesp_stack_t	*espstack = ns->netstack_ipsecesp;
3329 
3330 	/*
3331 	 * Time to send a PF_KEY SADB_REGISTER message to ESP listeners
3332 	 * everywhere.  (The function itself checks for NULL esp_pfkey_q.)
3333 	 */
3334 	(void) esp_register_out(0, 0, 0, espstack, NULL);
3335 }
3336 
3337 /*
3338  * Stub function that taskq_dispatch() invokes to take the mblk (in arg)
3339  * and put() it into AH and STREAMS again.
3340  */
3341 static void
3342 inbound_task(void *arg)
3343 {
3344 	esph_t *esph;
3345 	mblk_t *mp = (mblk_t *)arg;
3346 	ipsec_in_t *ii = (ipsec_in_t *)mp->b_rptr;
3347 	netstack_t *ns;
3348 	ipsecesp_stack_t *espstack;
3349 	int ipsec_rc;
3350 
3351 	ns = netstack_find_by_stackid(ii->ipsec_in_stackid);
3352 	if (ns == NULL || ns != ii->ipsec_in_ns) {
3353 		/* Just freemsg(). */
3354 		if (ns != NULL)
3355 			netstack_rele(ns);
3356 		freemsg(mp);
3357 		return;
3358 	}
3359 
3360 	espstack = ns->netstack_ipsecesp;
3361 
3362 	esp2dbg(espstack, ("in ESP inbound_task"));
3363 	ASSERT(espstack != NULL);
3364 
3365 	esph = ipsec_inbound_esp_sa(mp, ns);
3366 	if (esph != NULL) {
3367 		ASSERT(ii->ipsec_in_esp_sa != NULL);
3368 		ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func(mp, esph);
3369 		if (ipsec_rc == IPSEC_STATUS_SUCCESS)
3370 			ip_fanout_proto_again(mp, NULL, NULL, NULL);
3371 	}
3372 	netstack_rele(ns);
3373 }
3374 
3375 /*
3376  * Now that weak-key passed, actually ADD the security association, and
3377  * send back a reply ADD message.
3378  */
3379 static int
3380 esp_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi,
3381     int *diagnostic, ipsecesp_stack_t *espstack)
3382 {
3383 	isaf_t *primary = NULL, *secondary;
3384 	boolean_t clone = B_FALSE, is_inbound = B_FALSE;
3385 	ipsa_t *larval = NULL;
3386 	ipsacq_t *acqrec;
3387 	iacqf_t *acq_bucket;
3388 	mblk_t *acq_msgs = NULL;
3389 	int rc;
3390 	mblk_t *lpkt;
3391 	int error;
3392 	ipsa_query_t sq;
3393 	ipsec_stack_t	*ipss = espstack->ipsecesp_netstack->netstack_ipsec;
3394 
3395 	/*
3396 	 * Locate the appropriate table(s).
3397 	 */
3398 	sq.spp = &espstack->esp_sadb;	/* XXX */
3399 	error = sadb_form_query(ksi, IPSA_Q_SA|IPSA_Q_DST,
3400 	    IPSA_Q_SA|IPSA_Q_DST|IPSA_Q_INBOUND|IPSA_Q_OUTBOUND,
3401 	    &sq, diagnostic);
3402 	if (error)
3403 		return (error);
3404 
3405 	/*
3406 	 * Use the direction flags provided by the KMD to determine
3407 	 * if the inbound or outbound table should be the primary
3408 	 * for this SA. If these flags were absent then make this
3409 	 * decision based on the addresses.
3410 	 */
3411 	if (sq.assoc->sadb_sa_flags & IPSA_F_INBOUND) {
3412 		primary = sq.inbound;
3413 		secondary = sq.outbound;
3414 		is_inbound = B_TRUE;
3415 		if (sq.assoc->sadb_sa_flags & IPSA_F_OUTBOUND)
3416 			clone = B_TRUE;
3417 	} else if (sq.assoc->sadb_sa_flags & IPSA_F_OUTBOUND) {
3418 		primary = sq.outbound;
3419 		secondary = sq.inbound;
3420 	}
3421 
3422 	if (primary == NULL) {
3423 		/*
3424 		 * The KMD did not set a direction flag, determine which
3425 		 * table to insert the SA into based on addresses.
3426 		 */
3427 		switch (ksi->ks_in_dsttype) {
3428 		case KS_IN_ADDR_MBCAST:
3429 			clone = B_TRUE;	/* All mcast SAs can be bidirectional */
3430 			sq.assoc->sadb_sa_flags |= IPSA_F_OUTBOUND;
3431 			/* FALLTHRU */
3432 		/*
3433 		 * If the source address is either one of mine, or unspecified
3434 		 * (which is best summed up by saying "not 'not mine'"),
3435 		 * then the association is potentially bi-directional,
3436 		 * in that it can be used for inbound traffic and outbound
3437 		 * traffic.  The best example of such an SA is a multicast
3438 		 * SA (which allows me to receive the outbound traffic).
3439 		 */
3440 		case KS_IN_ADDR_ME:
3441 			sq.assoc->sadb_sa_flags |= IPSA_F_INBOUND;
3442 			primary = sq.inbound;
3443 			secondary = sq.outbound;
3444 			if (ksi->ks_in_srctype != KS_IN_ADDR_NOTME)
3445 				clone = B_TRUE;
3446 			is_inbound = B_TRUE;
3447 			break;
3448 		/*
3449 		 * If the source address literally not mine (either
3450 		 * unspecified or not mine), then this SA may have an
3451 		 * address that WILL be mine after some configuration.
3452 		 * We pay the price for this by making it a bi-directional
3453 		 * SA.
3454 		 */
3455 		case KS_IN_ADDR_NOTME:
3456 			sq.assoc->sadb_sa_flags |= IPSA_F_OUTBOUND;
3457 			primary = sq.outbound;
3458 			secondary = sq.inbound;
3459 			if (ksi->ks_in_srctype != KS_IN_ADDR_ME) {
3460 				sq.assoc->sadb_sa_flags |= IPSA_F_INBOUND;
3461 				clone = B_TRUE;
3462 			}
3463 			break;
3464 		default:
3465 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_DST;
3466 			return (EINVAL);
3467 		}
3468 	}
3469 
3470 	/*
3471 	 * Find a ACQUIRE list entry if possible.  If we've added an SA that
3472 	 * suits the needs of an ACQUIRE list entry, we can eliminate the
3473 	 * ACQUIRE list entry and transmit the enqueued packets.  Use the
3474 	 * high-bit of the sequence number to queue it.  Key off destination
3475 	 * addr, and change acqrec's state.
3476 	 */
3477 
3478 	if (samsg->sadb_msg_seq & IACQF_LOWEST_SEQ) {
3479 		acq_bucket = &(sq.sp->sdb_acq[sq.outhash]);
3480 		mutex_enter(&acq_bucket->iacqf_lock);
3481 		for (acqrec = acq_bucket->iacqf_ipsacq; acqrec != NULL;
3482 		    acqrec = acqrec->ipsacq_next) {
3483 			mutex_enter(&acqrec->ipsacq_lock);
3484 			/*
3485 			 * Q:  I only check sequence.  Should I check dst?
3486 			 * A: Yes, check dest because those are the packets
3487 			 *    that are queued up.
3488 			 */
3489 			if (acqrec->ipsacq_seq == samsg->sadb_msg_seq &&
3490 			    IPSA_ARE_ADDR_EQUAL(sq.dstaddr,
3491 			    acqrec->ipsacq_dstaddr, acqrec->ipsacq_addrfam))
3492 				break;
3493 			mutex_exit(&acqrec->ipsacq_lock);
3494 		}
3495 		if (acqrec != NULL) {
3496 			/*
3497 			 * AHA!  I found an ACQUIRE record for this SA.
3498 			 * Grab the msg list, and free the acquire record.
3499 			 * I already am holding the lock for this record,
3500 			 * so all I have to do is free it.
3501 			 */
3502 			acq_msgs = acqrec->ipsacq_mp;
3503 			acqrec->ipsacq_mp = NULL;
3504 			mutex_exit(&acqrec->ipsacq_lock);
3505 			sadb_destroy_acquire(acqrec,
3506 			    espstack->ipsecesp_netstack);
3507 		}
3508 		mutex_exit(&acq_bucket->iacqf_lock);
3509 	}
3510 
3511 	/*
3512 	 * Find PF_KEY message, and see if I'm an update.  If so, find entry
3513 	 * in larval list (if there).
3514 	 */
3515 	if (samsg->sadb_msg_type == SADB_UPDATE) {
3516 		mutex_enter(&sq.inbound->isaf_lock);
3517 		larval = ipsec_getassocbyspi(sq.inbound, sq.assoc->sadb_sa_spi,
3518 		    ALL_ZEROES_PTR, sq.dstaddr, sq.dst->sin_family);
3519 		mutex_exit(&sq.inbound->isaf_lock);
3520 
3521 		if ((larval == NULL) ||
3522 		    (larval->ipsa_state != IPSA_STATE_LARVAL)) {
3523 			*diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND;
3524 			if (larval != NULL) {
3525 				IPSA_REFRELE(larval);
3526 			}
3527 			esp0dbg(("Larval update, but larval disappeared.\n"));
3528 			return (ESRCH);
3529 		} /* Else sadb_common_add unlinks it for me! */
3530 	}
3531 
3532 	lpkt = NULL;
3533 	if (larval != NULL)
3534 		lpkt = sadb_clear_lpkt(larval);
3535 
3536 	rc = sadb_common_add(espstack->esp_sadb.s_ip_q, espstack->esp_pfkey_q,
3537 	    mp, samsg, ksi, primary, secondary, larval, clone, is_inbound,
3538 	    diagnostic, espstack->ipsecesp_netstack, &espstack->esp_sadb);
3539 
3540 	if (rc == 0 && lpkt != NULL)
3541 		rc = !taskq_dispatch(esp_taskq, inbound_task, lpkt, TQ_NOSLEEP);
3542 
3543 	if (rc != 0) {
3544 		ip_drop_packet(lpkt, B_TRUE, NULL, NULL,
3545 		    DROPPER(ipss, ipds_sadb_inlarval_timeout),
3546 		    &espstack->esp_dropper);
3547 	}
3548 
3549 	/*
3550 	 * How much more stack will I create with all of these
3551 	 * esp_outbound() calls?
3552 	 */
3553 
3554 	while (acq_msgs != NULL) {
3555 		mblk_t *mp = acq_msgs;
3556 
3557 		acq_msgs = acq_msgs->b_next;
3558 		mp->b_next = NULL;
3559 		if (rc == 0) {
3560 			if (ipsec_outbound_sa(mp, IPPROTO_ESP)) {
3561 				((ipsec_out_t *)(mp->b_rptr))->
3562 				    ipsec_out_esp_done = B_TRUE;
3563 				if (esp_outbound(mp) == IPSEC_STATUS_SUCCESS) {
3564 					ipha_t *ipha;
3565 
3566 					/* do AH processing if needed */
3567 					if (!esp_do_outbound_ah(mp))
3568 						continue;
3569 
3570 					ipha = (ipha_t *)mp->b_cont->b_rptr;
3571 
3572 					/* finish IPsec processing */
3573 					if (IPH_HDR_VERSION(ipha) ==
3574 					    IP_VERSION) {
3575 						ip_wput_ipsec_out(NULL, mp,
3576 						    ipha, NULL, NULL);
3577 					} else {
3578 						ip6_t *ip6h = (ip6_t *)ipha;
3579 						ip_wput_ipsec_out_v6(NULL,
3580 						    mp, ip6h, NULL, NULL);
3581 					}
3582 				}
3583 				continue;
3584 			}
3585 		}
3586 		ESP_BUMP_STAT(espstack, out_discards);
3587 		ip_drop_packet(mp, B_FALSE, NULL, NULL,
3588 		    DROPPER(ipss, ipds_sadb_acquire_timeout),
3589 		    &espstack->esp_dropper);
3590 	}
3591 
3592 	return (rc);
3593 }
3594 
3595 /*
3596  * Add new ESP security association.  This may become a generic AH/ESP
3597  * routine eventually.
3598  */
3599 static int
3600 esp_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, netstack_t *ns)
3601 {
3602 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
3603 	sadb_address_t *srcext =
3604 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
3605 	sadb_address_t *dstext =
3606 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
3607 	sadb_address_t *isrcext =
3608 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC];
3609 	sadb_address_t *idstext =
3610 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST];
3611 	sadb_address_t *nttext_loc =
3612 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC];
3613 	sadb_address_t *nttext_rem =
3614 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM];
3615 	sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
3616 	sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
3617 	struct sockaddr_in *src, *dst;
3618 	struct sockaddr_in *natt_loc, *natt_rem;
3619 	struct sockaddr_in6 *natt_loc6, *natt_rem6;
3620 	sadb_lifetime_t *soft =
3621 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
3622 	sadb_lifetime_t *hard =
3623 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
3624 	sadb_lifetime_t *idle =
3625 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE];
3626 	ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
3627 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
3628 
3629 
3630 
3631 	/* I need certain extensions present for an ADD message. */
3632 	if (srcext == NULL) {
3633 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
3634 		return (EINVAL);
3635 	}
3636 	if (dstext == NULL) {
3637 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
3638 		return (EINVAL);
3639 	}
3640 	if (isrcext == NULL && idstext != NULL) {
3641 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
3642 		return (EINVAL);
3643 	}
3644 	if (isrcext != NULL && idstext == NULL) {
3645 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST;
3646 		return (EINVAL);
3647 	}
3648 	if (assoc == NULL) {
3649 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
3650 		return (EINVAL);
3651 	}
3652 	if (ekey == NULL && assoc->sadb_sa_encrypt != SADB_EALG_NULL) {
3653 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_EKEY;
3654 		return (EINVAL);
3655 	}
3656 
3657 	src = (struct sockaddr_in *)(srcext + 1);
3658 	dst = (struct sockaddr_in *)(dstext + 1);
3659 	natt_loc = (struct sockaddr_in *)(nttext_loc + 1);
3660 	natt_loc6 = (struct sockaddr_in6 *)(nttext_loc + 1);
3661 	natt_rem = (struct sockaddr_in *)(nttext_rem + 1);
3662 	natt_rem6 = (struct sockaddr_in6 *)(nttext_rem + 1);
3663 
3664 	/* Sundry ADD-specific reality checks. */
3665 	/* XXX STATS :  Logging/stats here? */
3666 
3667 	if ((assoc->sadb_sa_state != SADB_SASTATE_MATURE) &&
3668 	    (assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE_ELSEWHERE)) {
3669 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
3670 		return (EINVAL);
3671 	}
3672 	if (assoc->sadb_sa_encrypt == SADB_EALG_NONE) {
3673 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_EALG;
3674 		return (EINVAL);
3675 	}
3676 
3677 	if (assoc->sadb_sa_encrypt == SADB_EALG_NULL &&
3678 	    assoc->sadb_sa_auth == SADB_AALG_NONE) {
3679 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG;
3680 		return (EINVAL);
3681 	}
3682 
3683 	if (assoc->sadb_sa_flags & ~espstack->esp_sadb.s_addflags) {
3684 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
3685 		return (EINVAL);
3686 	}
3687 
3688 	if ((*diagnostic = sadb_hardsoftchk(hard, soft, idle)) != 0) {
3689 		return (EINVAL);
3690 	}
3691 	ASSERT(src->sin_family == dst->sin_family);
3692 
3693 	if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_NATT_LOC) {
3694 		if (nttext_loc == NULL) {
3695 			*diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_LOC;
3696 			return (EINVAL);
3697 		}
3698 
3699 		if (natt_loc->sin_family == AF_INET6 &&
3700 		    !IN6_IS_ADDR_V4MAPPED(&natt_loc6->sin6_addr)) {
3701 			*diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_NATT_LOC;
3702 			return (EINVAL);
3703 		}
3704 	}
3705 
3706 	if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_NATT_REM) {
3707 		if (nttext_rem == NULL) {
3708 			*diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_REM;
3709 			return (EINVAL);
3710 		}
3711 		if (natt_rem->sin_family == AF_INET6 &&
3712 		    !IN6_IS_ADDR_V4MAPPED(&natt_rem6->sin6_addr)) {
3713 			*diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_NATT_REM;
3714 			return (EINVAL);
3715 		}
3716 	}
3717 
3718 
3719 	/* Stuff I don't support, for now.  XXX Diagnostic? */
3720 	if (ksi->ks_in_extv[SADB_EXT_LIFETIME_CURRENT] != NULL)
3721 		return (EOPNOTSUPP);
3722 
3723 	if ((*diagnostic = sadb_labelchk(ksi)) != 0)
3724 		return (EINVAL);
3725 
3726 	/*
3727 	 * XXX Policy :  I'm not checking identities at this time,
3728 	 * but if I did, I'd do them here, before I sent
3729 	 * the weak key check up to the algorithm.
3730 	 */
3731 
3732 	mutex_enter(&ipss->ipsec_alg_lock);
3733 
3734 	/*
3735 	 * First locate the authentication algorithm.
3736 	 */
3737 	if (akey != NULL) {
3738 		ipsec_alginfo_t *aalg;
3739 
3740 		aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH]
3741 		    [assoc->sadb_sa_auth];
3742 		if (aalg == NULL || !ALG_VALID(aalg)) {
3743 			mutex_exit(&ipss->ipsec_alg_lock);
3744 			esp1dbg(espstack, ("Couldn't find auth alg #%d.\n",
3745 			    assoc->sadb_sa_auth));
3746 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG;
3747 			return (EINVAL);
3748 		}
3749 
3750 		/*
3751 		 * Sanity check key sizes.
3752 		 * Note: It's not possible to use SADB_AALG_NONE because
3753 		 * this auth_alg is not defined with ALG_FLAG_VALID. If this
3754 		 * ever changes, the same check for SADB_AALG_NONE and
3755 		 * a auth_key != NULL should be made here ( see below).
3756 		 */
3757 		if (!ipsec_valid_key_size(akey->sadb_key_bits, aalg)) {
3758 			mutex_exit(&ipss->ipsec_alg_lock);
3759 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_AKEYBITS;
3760 			return (EINVAL);
3761 		}
3762 		ASSERT(aalg->alg_mech_type != CRYPTO_MECHANISM_INVALID);
3763 
3764 		/* check key and fix parity if needed */
3765 		if (ipsec_check_key(aalg->alg_mech_type, akey, B_TRUE,
3766 		    diagnostic) != 0) {
3767 			mutex_exit(&ipss->ipsec_alg_lock);
3768 			return (EINVAL);
3769 		}
3770 	}
3771 
3772 	/*
3773 	 * Then locate the encryption algorithm.
3774 	 */
3775 	if (ekey != NULL) {
3776 		uint_t keybits;
3777 		ipsec_alginfo_t *ealg;
3778 
3779 		ealg = ipss->ipsec_alglists[IPSEC_ALG_ENCR]
3780 		    [assoc->sadb_sa_encrypt];
3781 		if (ealg == NULL || !ALG_VALID(ealg)) {
3782 			mutex_exit(&ipss->ipsec_alg_lock);
3783 			esp1dbg(espstack, ("Couldn't find encr alg #%d.\n",
3784 			    assoc->sadb_sa_encrypt));
3785 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_EALG;
3786 			return (EINVAL);
3787 		}
3788 
3789 		/*
3790 		 * Sanity check key sizes. If the encryption algorithm is
3791 		 * SADB_EALG_NULL but the encryption key is NOT
3792 		 * NULL then complain.
3793 		 *
3794 		 * The keying material includes salt bits if required by
3795 		 * algorithm and optionally the Initial IV, check the
3796 		 * length of whats left.
3797 		 */
3798 		keybits = ekey->sadb_key_bits;
3799 		keybits -= ekey->sadb_key_reserved;
3800 		keybits -= SADB_8TO1(ealg->alg_saltlen);
3801 		if ((assoc->sadb_sa_encrypt == SADB_EALG_NULL) ||
3802 		    (!ipsec_valid_key_size(keybits, ealg))) {
3803 			mutex_exit(&ipss->ipsec_alg_lock);
3804 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_EKEYBITS;
3805 			return (EINVAL);
3806 		}
3807 		ASSERT(ealg->alg_mech_type != CRYPTO_MECHANISM_INVALID);
3808 
3809 		/* check key */
3810 		if (ipsec_check_key(ealg->alg_mech_type, ekey, B_FALSE,
3811 		    diagnostic) != 0) {
3812 			mutex_exit(&ipss->ipsec_alg_lock);
3813 			return (EINVAL);
3814 		}
3815 	}
3816 	mutex_exit(&ipss->ipsec_alg_lock);
3817 
3818 	return (esp_add_sa_finish(mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi,
3819 	    diagnostic, espstack));
3820 }
3821 
3822 /*
3823  * Update a security association.  Updates come in two varieties.  The first
3824  * is an update of lifetimes on a non-larval SA.  The second is an update of
3825  * a larval SA, which ends up looking a lot more like an add.
3826  */
3827 static int
3828 esp_update_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic,
3829     ipsecesp_stack_t *espstack, uint8_t sadb_msg_type)
3830 {
3831 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
3832 	mblk_t    *buf_pkt;
3833 	int rcode;
3834 
3835 	sadb_address_t *dstext =
3836 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
3837 
3838 	if (dstext == NULL) {
3839 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
3840 		return (EINVAL);
3841 	}
3842 
3843 	rcode = sadb_update_sa(mp, ksi, &buf_pkt, &espstack->esp_sadb,
3844 	    diagnostic, espstack->esp_pfkey_q, esp_add_sa,
3845 	    espstack->ipsecesp_netstack, sadb_msg_type);
3846 
3847 	if ((assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE) ||
3848 	    (rcode != 0)) {
3849 		return (rcode);
3850 	}
3851 
3852 	HANDLE_BUF_PKT(esp_taskq, espstack->ipsecesp_netstack->netstack_ipsec,
3853 	    espstack->esp_dropper, buf_pkt);
3854 
3855 	return (rcode);
3856 }
3857 
3858 /* XXX refactor me */
3859 /*
3860  * Delete a security association.  This is REALLY likely to be code common to
3861  * both AH and ESP.  Find the association, then unlink it.
3862  */
3863 static int
3864 esp_del_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic,
3865     ipsecesp_stack_t *espstack, uint8_t sadb_msg_type)
3866 {
3867 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
3868 	sadb_address_t *dstext =
3869 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
3870 	sadb_address_t *srcext =
3871 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
3872 	struct sockaddr_in *sin;
3873 
3874 	if (assoc == NULL) {
3875 		if (dstext != NULL) {
3876 			sin = (struct sockaddr_in *)(dstext + 1);
3877 		} else if (srcext != NULL) {
3878 			sin = (struct sockaddr_in *)(srcext + 1);
3879 		} else {
3880 			*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
3881 			return (EINVAL);
3882 		}
3883 		return (sadb_purge_sa(mp, ksi,
3884 		    (sin->sin_family == AF_INET6) ? &espstack->esp_sadb.s_v6 :
3885 		    &espstack->esp_sadb.s_v4, diagnostic,
3886 		    espstack->esp_pfkey_q, espstack->esp_sadb.s_ip_q));
3887 	}
3888 
3889 	return (sadb_delget_sa(mp, ksi, &espstack->esp_sadb, diagnostic,
3890 	    espstack->esp_pfkey_q, sadb_msg_type));
3891 }
3892 
3893 /* XXX refactor me */
3894 /*
3895  * Convert the entire contents of all of ESP's SA tables into PF_KEY SADB_DUMP
3896  * messages.
3897  */
3898 static void
3899 esp_dump(mblk_t *mp, keysock_in_t *ksi, ipsecesp_stack_t *espstack)
3900 {
3901 	int error;
3902 	sadb_msg_t *samsg;
3903 
3904 	/*
3905 	 * Dump each fanout, bailing if error is non-zero.
3906 	 */
3907 
3908 	error = sadb_dump(espstack->esp_pfkey_q, mp, ksi,
3909 	    &espstack->esp_sadb.s_v4);
3910 	if (error != 0)
3911 		goto bail;
3912 
3913 	error = sadb_dump(espstack->esp_pfkey_q, mp, ksi,
3914 	    &espstack->esp_sadb.s_v6);
3915 bail:
3916 	ASSERT(mp->b_cont != NULL);
3917 	samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
3918 	samsg->sadb_msg_errno = (uint8_t)error;
3919 	sadb_pfkey_echo(espstack->esp_pfkey_q, mp,
3920 	    (sadb_msg_t *)mp->b_cont->b_rptr, ksi, NULL);
3921 }
3922 
3923 /*
3924  * First-cut reality check for an inbound PF_KEY message.
3925  */
3926 static boolean_t
3927 esp_pfkey_reality_failures(mblk_t *mp, keysock_in_t *ksi,
3928     ipsecesp_stack_t *espstack)
3929 {
3930 	int diagnostic;
3931 
3932 	if (ksi->ks_in_extv[SADB_EXT_PROPOSAL] != NULL) {
3933 		diagnostic = SADB_X_DIAGNOSTIC_PROP_PRESENT;
3934 		goto badmsg;
3935 	}
3936 	if (ksi->ks_in_extv[SADB_EXT_SUPPORTED_AUTH] != NULL ||
3937 	    ksi->ks_in_extv[SADB_EXT_SUPPORTED_ENCRYPT] != NULL) {
3938 		diagnostic = SADB_X_DIAGNOSTIC_SUPP_PRESENT;
3939 		goto badmsg;
3940 	}
3941 	return (B_FALSE);	/* False ==> no failures */
3942 
3943 badmsg:
3944 	sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL, diagnostic,
3945 	    ksi->ks_in_serial);
3946 	return (B_TRUE);	/* True ==> failures */
3947 }
3948 
3949 /*
3950  * ESP parsing of PF_KEY messages.  Keysock did most of the really silly
3951  * error cases.  What I receive is a fully-formed, syntactically legal
3952  * PF_KEY message.  I then need to check semantics...
3953  *
3954  * This code may become common to AH and ESP.  Stay tuned.
3955  *
3956  * I also make the assumption that db_ref's are cool.  If this assumption
3957  * is wrong, this means that someone other than keysock or me has been
3958  * mucking with PF_KEY messages.
3959  */
3960 static void
3961 esp_parse_pfkey(mblk_t *mp, ipsecesp_stack_t *espstack)
3962 {
3963 	mblk_t *msg = mp->b_cont;
3964 	sadb_msg_t *samsg;
3965 	keysock_in_t *ksi;
3966 	int error;
3967 	int diagnostic = SADB_X_DIAGNOSTIC_NONE;
3968 
3969 	ASSERT(msg != NULL);
3970 
3971 	samsg = (sadb_msg_t *)msg->b_rptr;
3972 	ksi = (keysock_in_t *)mp->b_rptr;
3973 
3974 	/*
3975 	 * If applicable, convert unspecified AF_INET6 to unspecified
3976 	 * AF_INET.  And do other address reality checks.
3977 	 */
3978 	if (!sadb_addrfix(ksi, espstack->esp_pfkey_q, mp,
3979 	    espstack->ipsecesp_netstack) ||
3980 	    esp_pfkey_reality_failures(mp, ksi, espstack)) {
3981 		return;
3982 	}
3983 
3984 	switch (samsg->sadb_msg_type) {
3985 	case SADB_ADD:
3986 		error = esp_add_sa(mp, ksi, &diagnostic,
3987 		    espstack->ipsecesp_netstack);
3988 		if (error != 0) {
3989 			sadb_pfkey_error(espstack->esp_pfkey_q, mp, error,
3990 			    diagnostic, ksi->ks_in_serial);
3991 		}
3992 		/* else esp_add_sa() took care of things. */
3993 		break;
3994 	case SADB_DELETE:
3995 	case SADB_X_DELPAIR:
3996 	case SADB_X_DELPAIR_STATE:
3997 		error = esp_del_sa(mp, ksi, &diagnostic, espstack,
3998 		    samsg->sadb_msg_type);
3999 		if (error != 0) {
4000 			sadb_pfkey_error(espstack->esp_pfkey_q, mp, error,
4001 			    diagnostic, ksi->ks_in_serial);
4002 		}
4003 		/* Else esp_del_sa() took care of things. */
4004 		break;
4005 	case SADB_GET:
4006 		error = sadb_delget_sa(mp, ksi, &espstack->esp_sadb,
4007 		    &diagnostic, espstack->esp_pfkey_q, samsg->sadb_msg_type);
4008 		if (error != 0) {
4009 			sadb_pfkey_error(espstack->esp_pfkey_q, mp, error,
4010 			    diagnostic, ksi->ks_in_serial);
4011 		}
4012 		/* Else sadb_get_sa() took care of things. */
4013 		break;
4014 	case SADB_FLUSH:
4015 		sadbp_flush(&espstack->esp_sadb, espstack->ipsecesp_netstack);
4016 		sadb_pfkey_echo(espstack->esp_pfkey_q, mp, samsg, ksi, NULL);
4017 		break;
4018 	case SADB_REGISTER:
4019 		/*
4020 		 * Hmmm, let's do it!  Check for extensions (there should
4021 		 * be none), extract the fields, call esp_register_out(),
4022 		 * then either free or report an error.
4023 		 *
4024 		 * Keysock takes care of the PF_KEY bookkeeping for this.
4025 		 */
4026 		if (esp_register_out(samsg->sadb_msg_seq, samsg->sadb_msg_pid,
4027 		    ksi->ks_in_serial, espstack, mp)) {
4028 			freemsg(mp);
4029 		} else {
4030 			/*
4031 			 * Only way this path hits is if there is a memory
4032 			 * failure.  It will not return B_FALSE because of
4033 			 * lack of esp_pfkey_q if I am in wput().
4034 			 */
4035 			sadb_pfkey_error(espstack->esp_pfkey_q, mp, ENOMEM,
4036 			    diagnostic, ksi->ks_in_serial);
4037 		}
4038 		break;
4039 	case SADB_UPDATE:
4040 	case SADB_X_UPDATEPAIR:
4041 		/*
4042 		 * Find a larval, if not there, find a full one and get
4043 		 * strict.
4044 		 */
4045 		error = esp_update_sa(mp, ksi, &diagnostic, espstack,
4046 		    samsg->sadb_msg_type);
4047 		if (error != 0) {
4048 			sadb_pfkey_error(espstack->esp_pfkey_q, mp, error,
4049 			    diagnostic, ksi->ks_in_serial);
4050 		}
4051 		/* else esp_update_sa() took care of things. */
4052 		break;
4053 	case SADB_GETSPI:
4054 		/*
4055 		 * Reserve a new larval entry.
4056 		 */
4057 		esp_getspi(mp, ksi, espstack);
4058 		break;
4059 	case SADB_ACQUIRE:
4060 		/*
4061 		 * Find larval and/or ACQUIRE record and kill it (them), I'm
4062 		 * most likely an error.  Inbound ACQUIRE messages should only
4063 		 * have the base header.
4064 		 */
4065 		sadb_in_acquire(samsg, &espstack->esp_sadb,
4066 		    espstack->esp_pfkey_q, espstack->ipsecesp_netstack);
4067 		freemsg(mp);
4068 		break;
4069 	case SADB_DUMP:
4070 		/*
4071 		 * Dump all entries.
4072 		 */
4073 		esp_dump(mp, ksi, espstack);
4074 		/* esp_dump will take care of the return message, etc. */
4075 		break;
4076 	case SADB_EXPIRE:
4077 		/* Should never reach me. */
4078 		sadb_pfkey_error(espstack->esp_pfkey_q, mp, EOPNOTSUPP,
4079 		    diagnostic, ksi->ks_in_serial);
4080 		break;
4081 	default:
4082 		sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL,
4083 		    SADB_X_DIAGNOSTIC_UNKNOWN_MSG, ksi->ks_in_serial);
4084 		break;
4085 	}
4086 }
4087 
4088 /*
4089  * Handle case where PF_KEY says it can't find a keysock for one of my
4090  * ACQUIRE messages.
4091  */
4092 static void
4093 esp_keysock_no_socket(mblk_t *mp, ipsecesp_stack_t *espstack)
4094 {
4095 	sadb_msg_t *samsg;
4096 	keysock_out_err_t *kse = (keysock_out_err_t *)mp->b_rptr;
4097 
4098 	if (mp->b_cont == NULL) {
4099 		freemsg(mp);
4100 		return;
4101 	}
4102 	samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
4103 
4104 	/*
4105 	 * If keysock can't find any registered, delete the acquire record
4106 	 * immediately, and handle errors.
4107 	 */
4108 	if (samsg->sadb_msg_type == SADB_ACQUIRE) {
4109 		samsg->sadb_msg_errno = kse->ks_err_errno;
4110 		samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
4111 		/*
4112 		 * Use the write-side of the esp_pfkey_q, in case there is
4113 		 * no esp_sadb.s_ip_q.
4114 		 */
4115 		sadb_in_acquire(samsg, &espstack->esp_sadb,
4116 		    WR(espstack->esp_pfkey_q), espstack->ipsecesp_netstack);
4117 	}
4118 
4119 	freemsg(mp);
4120 }
4121 
4122 /*
4123  * ESP module write put routine.
4124  */
4125 static void
4126 ipsecesp_wput(queue_t *q, mblk_t *mp)
4127 {
4128 	ipsec_info_t *ii;
4129 	struct iocblk *iocp;
4130 	ipsecesp_stack_t	*espstack = (ipsecesp_stack_t *)q->q_ptr;
4131 
4132 	esp3dbg(espstack, ("In esp_wput().\n"));
4133 
4134 	/* NOTE: Each case must take care of freeing or passing mp. */
4135 	switch (mp->b_datap->db_type) {
4136 	case M_CTL:
4137 		if ((mp->b_wptr - mp->b_rptr) < sizeof (ipsec_info_t)) {
4138 			/* Not big enough message. */
4139 			freemsg(mp);
4140 			break;
4141 		}
4142 		ii = (ipsec_info_t *)mp->b_rptr;
4143 
4144 		switch (ii->ipsec_info_type) {
4145 		case KEYSOCK_OUT_ERR:
4146 			esp1dbg(espstack, ("Got KEYSOCK_OUT_ERR message.\n"));
4147 			esp_keysock_no_socket(mp, espstack);
4148 			break;
4149 		case KEYSOCK_IN:
4150 			ESP_BUMP_STAT(espstack, keysock_in);
4151 			esp3dbg(espstack, ("Got KEYSOCK_IN message.\n"));
4152 
4153 			/* Parse the message. */
4154 			esp_parse_pfkey(mp, espstack);
4155 			break;
4156 		case KEYSOCK_HELLO:
4157 			sadb_keysock_hello(&espstack->esp_pfkey_q, q, mp,
4158 			    esp_ager, (void *)espstack, &espstack->esp_event,
4159 			    SADB_SATYPE_ESP);
4160 			break;
4161 		default:
4162 			esp2dbg(espstack, ("Got M_CTL from above of 0x%x.\n",
4163 			    ii->ipsec_info_type));
4164 			freemsg(mp);
4165 			break;
4166 		}
4167 		break;
4168 	case M_IOCTL:
4169 		iocp = (struct iocblk *)mp->b_rptr;
4170 		switch (iocp->ioc_cmd) {
4171 		case ND_SET:
4172 		case ND_GET:
4173 			if (nd_getset(q, espstack->ipsecesp_g_nd, mp)) {
4174 				qreply(q, mp);
4175 				return;
4176 			} else {
4177 				iocp->ioc_error = ENOENT;
4178 			}
4179 			/* FALLTHRU */
4180 		default:
4181 			/* We really don't support any other ioctls, do we? */
4182 
4183 			/* Return EINVAL */
4184 			if (iocp->ioc_error != ENOENT)
4185 				iocp->ioc_error = EINVAL;
4186 			iocp->ioc_count = 0;
4187 			mp->b_datap->db_type = M_IOCACK;
4188 			qreply(q, mp);
4189 			return;
4190 		}
4191 	default:
4192 		esp3dbg(espstack,
4193 		    ("Got default message, type %d, passing to IP.\n",
4194 		    mp->b_datap->db_type));
4195 		putnext(q, mp);
4196 	}
4197 }
4198 
4199 /*
4200  * Process an outbound ESP packet that can be accelerated by a IPsec
4201  * hardware acceleration capable Provider.
4202  * The caller already inserted and initialized the ESP header.
4203  * This function allocates a tagging M_CTL, and adds room at the end
4204  * of the packet to hold the ICV if authentication is needed.
4205  *
4206  * On success returns B_TRUE, on failure returns B_FALSE and frees the
4207  * mblk chain ipsec_out.
4208  */
4209 static ipsec_status_t
4210 esp_outbound_accelerated(mblk_t *ipsec_out, uint_t icv_len)
4211 {
4212 	ipsec_out_t *io;
4213 	mblk_t *lastmp;
4214 	netstack_t	*ns;
4215 	ipsecesp_stack_t *espstack;
4216 	ipsec_stack_t	*ipss;
4217 
4218 	io = (ipsec_out_t *)ipsec_out->b_rptr;
4219 	ns = io->ipsec_out_ns;
4220 	espstack = ns->netstack_ipsecesp;
4221 	ipss = ns->netstack_ipsec;
4222 
4223 	ESP_BUMP_STAT(espstack, out_accelerated);
4224 
4225 	/* mark packet as being accelerated in IPSEC_OUT */
4226 	ASSERT(io->ipsec_out_accelerated == B_FALSE);
4227 	io->ipsec_out_accelerated = B_TRUE;
4228 
4229 	/*
4230 	 * add room at the end of the packet for the ICV if needed
4231 	 */
4232 	if (icv_len > 0) {
4233 		/* go to last mblk */
4234 		lastmp = ipsec_out;	/* For following while loop. */
4235 		do {
4236 			lastmp = lastmp->b_cont;
4237 		} while (lastmp->b_cont != NULL);
4238 
4239 		/* if not enough available room, allocate new mblk */
4240 		if ((lastmp->b_wptr + icv_len) > lastmp->b_datap->db_lim) {
4241 			lastmp->b_cont = allocb(icv_len, BPRI_HI);
4242 			if (lastmp->b_cont == NULL) {
4243 				ESP_BUMP_STAT(espstack, out_discards);
4244 				ip_drop_packet(ipsec_out, B_FALSE, NULL, NULL,
4245 				    DROPPER(ipss, ipds_esp_nomem),
4246 				    &espstack->esp_dropper);
4247 				return (IPSEC_STATUS_FAILED);
4248 			}
4249 			lastmp = lastmp->b_cont;
4250 		}
4251 		lastmp->b_wptr += icv_len;
4252 	}
4253 
4254 	return (IPSEC_STATUS_SUCCESS);
4255 }
4256 
4257 /*
4258  * Process an inbound accelerated ESP packet.
4259  * On success returns B_TRUE, on failure returns B_FALSE and frees the
4260  * mblk chain ipsec_in.
4261  */
4262 static ipsec_status_t
4263 esp_inbound_accelerated(mblk_t *ipsec_in, mblk_t *data_mp, boolean_t isv4,
4264     ipsa_t *assoc)
4265 {
4266 	ipsec_in_t *ii = (ipsec_in_t *)ipsec_in->b_rptr;
4267 	mblk_t *hada_mp;
4268 	uint32_t icv_len = 0;
4269 	da_ipsec_t *hada;
4270 	ipha_t *ipha;
4271 	ip6_t *ip6h;
4272 	kstat_named_t *counter;
4273 	netstack_t	*ns = ii->ipsec_in_ns;
4274 	ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
4275 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
4276 
4277 	ESP_BUMP_STAT(espstack, in_accelerated);
4278 
4279 	hada_mp = ii->ipsec_in_da;
4280 	ASSERT(hada_mp != NULL);
4281 	hada = (da_ipsec_t *)hada_mp->b_rptr;
4282 
4283 	/*
4284 	 * We only support one level of decapsulation in hardware, so
4285 	 * nuke the pointer.
4286 	 */
4287 	ii->ipsec_in_da = NULL;
4288 	ii->ipsec_in_accelerated = B_FALSE;
4289 
4290 	if (assoc->ipsa_auth_alg != IPSA_AALG_NONE) {
4291 		/*
4292 		 * ESP with authentication. We expect the Provider to have
4293 		 * computed the ICV and placed it in the hardware acceleration
4294 		 * data attributes.
4295 		 *
4296 		 * Extract ICV length from attributes M_CTL and sanity check
4297 		 * its value. We allow the mblk to be smaller than da_ipsec_t
4298 		 * for a small ICV, as long as the entire ICV fits within the
4299 		 * mblk.
4300 		 *
4301 		 * Also ensures that the ICV length computed by Provider
4302 		 * corresponds to the ICV length of the agorithm specified by
4303 		 * the SA.
4304 		 */
4305 		icv_len = hada->da_icv_len;
4306 		if ((icv_len != assoc->ipsa_mac_len) ||
4307 		    (icv_len > DA_ICV_MAX_LEN) || (MBLKL(hada_mp) <
4308 		    (sizeof (da_ipsec_t) - DA_ICV_MAX_LEN + icv_len))) {
4309 			esp0dbg(("esp_inbound_accelerated: "
4310 			    "ICV len (%u) incorrect or mblk too small (%u)\n",
4311 			    icv_len, (uint32_t)(MBLKL(hada_mp))));
4312 			counter = DROPPER(ipss, ipds_esp_bad_auth);
4313 			goto esp_in_discard;
4314 		}
4315 	}
4316 
4317 	/* get pointers to IP header */
4318 	if (isv4) {
4319 		ipha = (ipha_t *)data_mp->b_rptr;
4320 	} else {
4321 		ip6h = (ip6_t *)data_mp->b_rptr;
4322 	}
4323 
4324 	/*
4325 	 * Compare ICV in ESP packet vs ICV computed by adapter.
4326 	 * We also remove the ICV from the end of the packet since
4327 	 * it will no longer be needed.
4328 	 *
4329 	 * Assume that esp_inbound() already ensured that the pkt
4330 	 * was in one mblk.
4331 	 */
4332 	ASSERT(data_mp->b_cont == NULL);
4333 	data_mp->b_wptr -= icv_len;
4334 	/* adjust IP header */
4335 	if (isv4)
4336 		ipha->ipha_length = htons(ntohs(ipha->ipha_length) - icv_len);
4337 	else
4338 		ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - icv_len);
4339 	if (icv_len && bcmp(hada->da_icv, data_mp->b_wptr, icv_len)) {
4340 		int af;
4341 		void *addr;
4342 
4343 		if (isv4) {
4344 			addr = &ipha->ipha_dst;
4345 			af = AF_INET;
4346 		} else {
4347 			addr = &ip6h->ip6_dst;
4348 			af = AF_INET6;
4349 		}
4350 
4351 		/*
4352 		 * Log the event. Don't print to the console, block
4353 		 * potential denial-of-service attack.
4354 		 */
4355 		ESP_BUMP_STAT(espstack, bad_auth);
4356 		ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN,
4357 		    "ESP Authentication failed spi %x, dst_addr %s",
4358 		    assoc->ipsa_spi, addr, af, espstack->ipsecesp_netstack);
4359 		counter = DROPPER(ipss, ipds_esp_bad_auth);
4360 		goto esp_in_discard;
4361 	}
4362 
4363 	esp3dbg(espstack, ("esp_inbound_accelerated: ESP authentication "
4364 	    "succeeded, checking replay\n"));
4365 
4366 	ipsec_in->b_cont = data_mp;
4367 
4368 	/*
4369 	 * Remove ESP header and padding from packet.
4370 	 */
4371 	if (!esp_strip_header(data_mp, ii->ipsec_in_v4, assoc->ipsa_iv_len,
4372 	    &counter, espstack)) {
4373 		esp1dbg(espstack, ("esp_inbound_accelerated: "
4374 		    "esp_strip_header() failed\n"));
4375 		goto esp_in_discard;
4376 	}
4377 
4378 	freeb(hada_mp);
4379 
4380 	if (is_system_labeled() && (assoc->ipsa_cred != NULL))
4381 		mblk_setcred(data_mp, assoc->ipsa_cred, NOPID);
4382 
4383 	/*
4384 	 * Account for usage..
4385 	 */
4386 	if (!esp_age_bytes(assoc, msgdsize(data_mp), B_TRUE)) {
4387 		/* The ipsa has hit hard expiration, LOG and AUDIT. */
4388 		ESP_BUMP_STAT(espstack, bytes_expired);
4389 		IP_ESP_BUMP_STAT(ipss, in_discards);
4390 		ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN,
4391 		    "ESP association 0x%x, dst %s had bytes expire.\n",
4392 		    assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam,
4393 		    espstack->ipsecesp_netstack);
4394 		ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL,
4395 		    DROPPER(ipss, ipds_esp_bytes_expire),
4396 		    &espstack->esp_dropper);
4397 		return (IPSEC_STATUS_FAILED);
4398 	}
4399 
4400 	/* done processing the packet */
4401 	return (IPSEC_STATUS_SUCCESS);
4402 
4403 esp_in_discard:
4404 	IP_ESP_BUMP_STAT(ipss, in_discards);
4405 	freeb(hada_mp);
4406 
4407 	ipsec_in->b_cont = data_mp;	/* For ip_drop_packet()'s sake... */
4408 	ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL, counter,
4409 	    &espstack->esp_dropper);
4410 
4411 	return (IPSEC_STATUS_FAILED);
4412 }
4413 
4414 /*
4415  * Wrapper to allow IP to trigger an ESP association failure message
4416  * during inbound SA selection.
4417  */
4418 void
4419 ipsecesp_in_assocfailure(mblk_t *mp, char level, ushort_t sl, char *fmt,
4420     uint32_t spi, void *addr, int af, ipsecesp_stack_t *espstack)
4421 {
4422 	ipsec_stack_t	*ipss = espstack->ipsecesp_netstack->netstack_ipsec;
4423 
4424 	if (espstack->ipsecesp_log_unknown_spi) {
4425 		ipsec_assocfailure(info.mi_idnum, 0, level, sl, fmt, spi,
4426 		    addr, af, espstack->ipsecesp_netstack);
4427 	}
4428 
4429 	ip_drop_packet(mp, B_TRUE, NULL, NULL,
4430 	    DROPPER(ipss, ipds_esp_no_sa),
4431 	    &espstack->esp_dropper);
4432 }
4433 
4434 /*
4435  * Initialize the ESP input and output processing functions.
4436  */
4437 void
4438 ipsecesp_init_funcs(ipsa_t *sa)
4439 {
4440 	if (sa->ipsa_output_func == NULL)
4441 		sa->ipsa_output_func = esp_outbound;
4442 	if (sa->ipsa_input_func == NULL)
4443 		sa->ipsa_input_func = esp_inbound;
4444 }
4445