xref: /titanic_50/usr/src/uts/common/inet/ip/ipsecesp.c (revision 8eea8e29cc4374d1ee24c25a07f45af132db3499)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/stream.h>
31 #include <sys/stropts.h>
32 #include <sys/errno.h>
33 #include <sys/strlog.h>
34 #include <sys/tihdr.h>
35 #include <sys/socket.h>
36 #include <sys/ddi.h>
37 #include <sys/sunddi.h>
38 #include <sys/kmem.h>
39 #include <sys/sysmacros.h>
40 #include <sys/cmn_err.h>
41 #include <sys/vtrace.h>
42 #include <sys/debug.h>
43 #include <sys/atomic.h>
44 #include <sys/strsun.h>
45 #include <sys/random.h>
46 #include <netinet/in.h>
47 #include <net/if.h>
48 #include <netinet/ip6.h>
49 #include <net/pfkeyv2.h>
50 
51 #include <inet/common.h>
52 #include <inet/mi.h>
53 #include <inet/nd.h>
54 #include <inet/ip.h>
55 #include <inet/ip6.h>
56 #include <inet/sadb.h>
57 #include <inet/ipsec_info.h>
58 #include <inet/ipsec_impl.h>
59 #include <inet/ipsecesp.h>
60 #include <inet/ipdrop.h>
61 #include <inet/tcp.h>
62 #include <sys/kstat.h>
63 #include <sys/policy.h>
64 #include <sys/strsun.h>
65 #include <inet/udp_impl.h>
66 #include <sys/taskq.h>
67 
68 /* EXPORT DELETE START */
69 #include <sys/iphada.h>
70 /* EXPORT DELETE END */
71 
72 /* Packet dropper for ESP drops. */
73 static ipdropper_t esp_dropper;
74 
75 static kmutex_t ipsecesp_param_lock; /* Protects ipsecesp_param_arr[] below. */
76 /*
77  * Table of ND variables supported by ipsecesp. These are loaded into
78  * ipsecesp_g_nd in ipsecesp_init_nd.
79  * All of these are alterable, within the min/max values given, at run time.
80  */
81 static	ipsecespparam_t	ipsecesp_param_arr[] = {
82 	/* min	max			value	name */
83 	{ 0,	3,			0,	"ipsecesp_debug"},
84 	{ 125,	32000, SADB_AGE_INTERVAL_DEFAULT, "ipsecesp_age_interval"},
85 	{ 1,	10,			1,	"ipsecesp_reap_delay"},
86 	{ 1,	SADB_MAX_REPLAY,	64,	"ipsecesp_replay_size"},
87 	{ 1,	300,			15,	"ipsecesp_acquire_timeout"},
88 	{ 1,	1800,			90,	"ipsecesp_larval_timeout"},
89 	/* Default lifetime values for ACQUIRE messages. */
90 	{ 0,	0xffffffffU,	0,	"ipsecesp_default_soft_bytes"},
91 	{ 0,	0xffffffffU,	0,	"ipsecesp_default_hard_bytes"},
92 	{ 0,	0xffffffffU,	24000,	"ipsecesp_default_soft_addtime"},
93 	{ 0,	0xffffffffU,	28800,	"ipsecesp_default_hard_addtime"},
94 	{ 0,	0xffffffffU,	0,	"ipsecesp_default_soft_usetime"},
95 	{ 0,	0xffffffffU,	0,	"ipsecesp_default_hard_usetime"},
96 	{ 0,	1,		0,	"ipsecesp_log_unknown_spi"},
97 	{ 0,	2,		1,	"ipsecesp_padding_check"},
98 };
99 #define	ipsecesp_debug		ipsecesp_param_arr[0].ipsecesp_param_value
100 #define	ipsecesp_age_interval	ipsecesp_param_arr[1].ipsecesp_param_value
101 #define	ipsecesp_age_int_max	ipsecesp_param_arr[1].ipsecesp_param_max
102 #define	ipsecesp_reap_delay	ipsecesp_param_arr[2].ipsecesp_param_value
103 #define	ipsecesp_replay_size	ipsecesp_param_arr[3].ipsecesp_param_value
104 #define	ipsecesp_acquire_timeout ipsecesp_param_arr[4].ipsecesp_param_value
105 #define	ipsecesp_larval_timeout ipsecesp_param_arr[5].ipsecesp_param_value
106 #define	ipsecesp_default_soft_bytes \
107 	ipsecesp_param_arr[6].ipsecesp_param_value
108 #define	ipsecesp_default_hard_bytes \
109 	ipsecesp_param_arr[7].ipsecesp_param_value
110 #define	ipsecesp_default_soft_addtime \
111 	ipsecesp_param_arr[8].ipsecesp_param_value
112 #define	ipsecesp_default_hard_addtime \
113 	ipsecesp_param_arr[9].ipsecesp_param_value
114 #define	ipsecesp_default_soft_usetime \
115 	ipsecesp_param_arr[10].ipsecesp_param_value
116 #define	ipsecesp_default_hard_usetime \
117 	ipsecesp_param_arr[11].ipsecesp_param_value
118 #define	ipsecesp_log_unknown_spi \
119 	ipsecesp_param_arr[12].ipsecesp_param_value
120 #define	ipsecesp_padding_check \
121 	ipsecesp_param_arr[13].ipsecesp_param_value
122 
123 #define	esp0dbg(a)	printf a
124 /* NOTE:  != 0 instead of > 0 so lint doesn't complain. */
125 #define	esp1dbg(a)	if (ipsecesp_debug != 0) printf a
126 #define	esp2dbg(a)	if (ipsecesp_debug > 1) printf a
127 #define	esp3dbg(a)	if (ipsecesp_debug > 2) printf a
128 
129 static IDP ipsecesp_g_nd;
130 
131 static int ipsecesp_open(queue_t *, dev_t *, int, int, cred_t *);
132 static int ipsecesp_close(queue_t *);
133 static void ipsecesp_rput(queue_t *, mblk_t *);
134 static void ipsecesp_wput(queue_t *, mblk_t *);
135 static void esp_send_acquire(ipsacq_t *, mblk_t *);
136 
137 /* EXPORT DELETE START */
138 static ipsec_status_t esp_outbound_accelerated(mblk_t *, uint_t);
139 static ipsec_status_t esp_inbound_accelerated(mblk_t *, mblk_t *,
140     boolean_t, ipsa_t *);
141 /* EXPORT DELETE END */
142 
143 static boolean_t esp_register_out(uint32_t, uint32_t, uint_t);
144 static boolean_t esp_strip_header(mblk_t *, boolean_t, uint32_t,
145     kstat_named_t **);
146 static ipsec_status_t esp_submit_req_inbound(mblk_t *, ipsa_t *, uint_t);
147 static ipsec_status_t esp_submit_req_outbound(mblk_t *, ipsa_t *, uchar_t *,
148     uint_t);
149 
150 static struct module_info info = {
151 	5137, "ipsecesp", 0, INFPSZ, 65536, 1024
152 };
153 
154 static struct qinit rinit = {
155 	(pfi_t)ipsecesp_rput, NULL, ipsecesp_open, ipsecesp_close, NULL, &info,
156 	NULL
157 };
158 
159 static struct qinit winit = {
160 	(pfi_t)ipsecesp_wput, NULL, ipsecesp_open, ipsecesp_close, NULL, &info,
161 	NULL
162 };
163 
164 struct streamtab ipsecespinfo = {
165 	&rinit, &winit, NULL, NULL
166 };
167 
168 /*
169  * Keysock instance of ESP.  "There can be only one." :)
170  * Use casptr() on this because I don't set it until KEYSOCK_HELLO comes down.
171  * Paired up with the esp_pfkey_q is the esp_event, which will age SAs.
172  */
173 static queue_t *esp_pfkey_q;
174 static timeout_id_t esp_event;
175 static taskq_t *esp_taskq;
176 
177 /*
178  * OTOH, this one is set at open/close, and I'm D_MTQPAIR for now.
179  *
180  * Question:	Do I need this, given that all instance's esps->esps_wq point
181  *		to IP?
182  *
183  * Answer:	Yes, because I need to know which queue is BOUND to
184  *		IPPROTO_ESP
185  */
186 static mblk_t *esp_ip_unbind;
187 
188 /*
189  * Stats.  This may eventually become a full-blown SNMP MIB once that spec
190  * stabilizes.
191  */
192 
193 typedef struct {
194 	kstat_named_t esp_stat_num_aalgs;
195 	kstat_named_t esp_stat_good_auth;
196 	kstat_named_t esp_stat_bad_auth;
197 	kstat_named_t esp_stat_bad_padding;
198 	kstat_named_t esp_stat_replay_failures;
199 	kstat_named_t esp_stat_replay_early_failures;
200 	kstat_named_t esp_stat_keysock_in;
201 	kstat_named_t esp_stat_out_requests;
202 	kstat_named_t esp_stat_acquire_requests;
203 	kstat_named_t esp_stat_bytes_expired;
204 	kstat_named_t esp_stat_out_discards;
205 	kstat_named_t esp_stat_in_accelerated;
206 	kstat_named_t esp_stat_out_accelerated;
207 	kstat_named_t esp_stat_noaccel;
208 	kstat_named_t esp_stat_crypto_sync;
209 	kstat_named_t esp_stat_crypto_async;
210 	kstat_named_t esp_stat_crypto_failures;
211 /* EXPORT DELETE START */
212 	kstat_named_t esp_stat_num_ealgs;
213 	kstat_named_t esp_stat_bad_decrypt;
214 /* EXPORT DELETE END */
215 } esp_kstats_t;
216 
217 #define	ESP_BUMP_STAT(x) (esp_kstats->esp_stat_ ## x).value.ui64++
218 #define	ESP_DEBUMP_STAT(x) (esp_kstats->esp_stat_ ## x).value.ui64--
219 
220 static kstat_t *esp_ksp;
221 static esp_kstats_t *esp_kstats;
222 
223 static int	esp_kstat_update(kstat_t *, int);
224 
225 static boolean_t
226 esp_kstat_init(void)
227 {
228 	esp_ksp = kstat_create("ipsecesp", 0, "esp_stat", "net",
229 	    KSTAT_TYPE_NAMED, sizeof (*esp_kstats) / sizeof (kstat_named_t),
230 	    KSTAT_FLAG_PERSISTENT);
231 
232 	if (esp_ksp == NULL)
233 		return (B_FALSE);
234 
235 	esp_kstats = esp_ksp->ks_data;
236 
237 	esp_ksp->ks_update = esp_kstat_update;
238 
239 #define	K64 KSTAT_DATA_UINT64
240 #define	KI(x) kstat_named_init(&(esp_kstats->esp_stat_##x), #x, K64)
241 
242 	KI(num_aalgs);
243 /* EXPORT DELETE START */
244 	KI(num_ealgs);
245 /* EXPORT DELETE END */
246 	KI(good_auth);
247 	KI(bad_auth);
248 	KI(bad_padding);
249 	KI(replay_failures);
250 	KI(replay_early_failures);
251 	KI(keysock_in);
252 	KI(out_requests);
253 	KI(acquire_requests);
254 	KI(bytes_expired);
255 	KI(out_discards);
256 	KI(in_accelerated);
257 	KI(out_accelerated);
258 	KI(noaccel);
259 	KI(crypto_sync);
260 	KI(crypto_async);
261 	KI(crypto_failures);
262 /* EXPORT DELETE START */
263 	KI(bad_decrypt);
264 /* EXPORT DELETE END */
265 
266 #undef KI
267 #undef K64
268 
269 	kstat_install(esp_ksp);
270 
271 	return (B_TRUE);
272 }
273 
274 static int
275 esp_kstat_update(kstat_t *kp, int rw)
276 {
277 	esp_kstats_t *ekp;
278 
279 	if ((kp == NULL) || (kp->ks_data == NULL))
280 		return (EIO);
281 
282 	if (rw == KSTAT_WRITE)
283 		return (EACCES);
284 
285 	ASSERT(kp == esp_ksp);
286 	ekp = (esp_kstats_t *)kp->ks_data;
287 	ASSERT(ekp == esp_kstats);
288 
289 	mutex_enter(&alg_lock);
290 	ekp->esp_stat_num_aalgs.value.ui64 = ipsec_nalgs[IPSEC_ALG_AUTH];
291 /* EXPORT DELETE START */
292 	ekp->esp_stat_num_ealgs.value.ui64 = ipsec_nalgs[IPSEC_ALG_ENCR];
293 /* EXPORT DELETE END */
294 	mutex_exit(&alg_lock);
295 
296 	return (0);
297 }
298 
299 #ifdef DEBUG
300 /*
301  * Debug routine, useful to see pre-encryption data.
302  */
303 static char *
304 dump_msg(mblk_t *mp)
305 {
306 	char tmp_str[3], tmp_line[256];
307 
308 	while (mp != NULL) {
309 		unsigned char *ptr;
310 
311 		printf("mblk address 0x%p, length %ld, db_ref %d "
312 		    "type %d, base 0x%p, lim 0x%p\n",
313 		    (void *) mp, (long)(mp->b_wptr - mp->b_rptr),
314 		    mp->b_datap->db_ref, mp->b_datap->db_type,
315 		    (void *)mp->b_datap->db_base, (void *)mp->b_datap->db_lim);
316 		ptr = mp->b_rptr;
317 
318 		tmp_line[0] = '\0';
319 		while (ptr < mp->b_wptr) {
320 			uint_t diff;
321 
322 			diff = (ptr - mp->b_rptr);
323 			if (!(diff & 0x1f)) {
324 				if (strlen(tmp_line) > 0) {
325 					printf("bytes: %s\n", tmp_line);
326 					tmp_line[0] = '\0';
327 				}
328 			}
329 			if (!(diff & 0x3))
330 				(void) strcat(tmp_line, " ");
331 			(void) sprintf(tmp_str, "%02x", *ptr);
332 			(void) strcat(tmp_line, tmp_str);
333 			ptr++;
334 		}
335 		if (strlen(tmp_line) > 0)
336 			printf("bytes: %s\n", tmp_line);
337 
338 		mp = mp->b_cont;
339 	}
340 
341 	return ("\n");
342 }
343 
344 #else /* DEBUG */
345 static char *
346 dump_msg(mblk_t *mp)
347 {
348 	printf("Find value of mp %p.\n", mp);
349 	return ("\n");
350 }
351 #endif /* DEBUG */
352 
353 /*
354  * Don't have to lock age_interval, as only one thread will access it at
355  * a time, because I control the one function that does with timeout().
356  */
357 /* ARGSUSED */
358 static void
359 esp_ager(void *ignoreme)
360 {
361 	hrtime_t begin = gethrtime();
362 
363 	sadb_ager(&esp_sadb.s_v4, esp_pfkey_q, esp_sadb.s_ip_q,
364 	    ipsecesp_reap_delay);
365 	sadb_ager(&esp_sadb.s_v6, esp_pfkey_q, esp_sadb.s_ip_q,
366 	    ipsecesp_reap_delay);
367 
368 	esp_event = sadb_retimeout(begin, esp_pfkey_q, esp_ager,
369 	    &(ipsecesp_age_interval), ipsecesp_age_int_max, info.mi_idnum);
370 }
371 
372 /*
373  * Get an ESP NDD parameter.
374  */
375 /* ARGSUSED */
376 static int
377 ipsecesp_param_get(q, mp, cp, cr)
378 	queue_t	*q;
379 	mblk_t	*mp;
380 	caddr_t	cp;
381 	cred_t *cr;
382 {
383 	ipsecespparam_t	*ipsecesppa = (ipsecespparam_t *)cp;
384 	uint_t value;
385 
386 	mutex_enter(&ipsecesp_param_lock);
387 	value = ipsecesppa->ipsecesp_param_value;
388 	mutex_exit(&ipsecesp_param_lock);
389 
390 	(void) mi_mpprintf(mp, "%u", value);
391 	return (0);
392 }
393 
394 /*
395  * This routine sets an NDD variable in a ipsecespparam_t structure.
396  */
397 /* ARGSUSED */
398 static int
399 ipsecesp_param_set(q, mp, value, cp, cr)
400 	queue_t	*q;
401 	mblk_t	*mp;
402 	char	*value;
403 	caddr_t	cp;
404 	cred_t *cr;
405 {
406 	ulong_t	new_value;
407 	ipsecespparam_t	*ipsecesppa = (ipsecespparam_t *)cp;
408 
409 	/*
410 	 * Fail the request if the new value does not lie within the
411 	 * required bounds.
412 	 */
413 	if (ddi_strtoul(value, NULL, 10, &new_value) != 0 ||
414 	    new_value < ipsecesppa->ipsecesp_param_min ||
415 	    new_value > ipsecesppa->ipsecesp_param_max) {
416 		return (EINVAL);
417 	}
418 
419 	/* Set the new value */
420 	mutex_enter(&ipsecesp_param_lock);
421 	ipsecesppa->ipsecesp_param_value = new_value;
422 	mutex_exit(&ipsecesp_param_lock);
423 	return (0);
424 }
425 
426 /*
427  * Using lifetime NDD variables, fill in an extended combination's
428  * lifetime information.
429  */
430 void
431 ipsecesp_fill_defs(sadb_x_ecomb_t *ecomb)
432 {
433 	ecomb->sadb_x_ecomb_soft_bytes = ipsecesp_default_soft_bytes;
434 	ecomb->sadb_x_ecomb_hard_bytes = ipsecesp_default_hard_bytes;
435 	ecomb->sadb_x_ecomb_soft_addtime = ipsecesp_default_soft_addtime;
436 	ecomb->sadb_x_ecomb_hard_addtime = ipsecesp_default_hard_addtime;
437 	ecomb->sadb_x_ecomb_soft_usetime = ipsecesp_default_soft_usetime;
438 	ecomb->sadb_x_ecomb_hard_usetime = ipsecesp_default_hard_usetime;
439 }
440 
441 /*
442  * Initialize things for ESP at module load time.
443  */
444 boolean_t
445 ipsecesp_ddi_init(void)
446 {
447 	int count;
448 	ipsecespparam_t *espp = ipsecesp_param_arr;
449 
450 	for (count = A_CNT(ipsecesp_param_arr); count-- > 0; espp++) {
451 		if (espp->ipsecesp_param_name != NULL &&
452 		    espp->ipsecesp_param_name[0]) {
453 			if (!nd_load(&ipsecesp_g_nd, espp->ipsecesp_param_name,
454 			    ipsecesp_param_get, ipsecesp_param_set,
455 			    (caddr_t)espp)) {
456 				nd_free(&ipsecesp_g_nd);
457 				return (B_FALSE);
458 			}
459 		}
460 	}
461 
462 	if (!esp_kstat_init()) {
463 		nd_free(&ipsecesp_g_nd);
464 		return (B_FALSE);
465 	}
466 
467 	esp_sadb.s_acquire_timeout = &ipsecesp_acquire_timeout;
468 	esp_sadb.s_acqfn = esp_send_acquire;
469 	sadbp_init(&esp_sadb, SADB_SATYPE_ESP);
470 
471 	esp_taskq = taskq_create("esp_taskq", 1, minclsyspri,
472 	    IPSEC_TASKQ_MIN, IPSEC_TASKQ_MAX, 0);
473 
474 	mutex_init(&ipsecesp_param_lock, NULL, MUTEX_DEFAULT, 0);
475 
476 	ip_drop_register(&esp_dropper, "IPsec ESP");
477 
478 	return (B_TRUE);
479 }
480 
481 /*
482  * Destroy things for ESP at module unload time.
483  */
484 void
485 ipsecesp_ddi_destroy(void)
486 {
487 	esp1dbg(("In ipsecesp_ddi_destroy.\n"));
488 
489 	sadbp_destroy(&esp_sadb);
490 	ip_drop_unregister(&esp_dropper);
491 	taskq_destroy(esp_taskq);
492 	mutex_destroy(&ipsecesp_param_lock);
493 	nd_free(&ipsecesp_g_nd);
494 	kstat_delete(esp_ksp);
495 }
496 
497 /*
498  * ESP module open routine.
499  */
500 /* ARGSUSED */
501 static int
502 ipsecesp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
503 {
504 	if (secpolicy_net_config(credp, B_FALSE) != 0) {
505 		esp1dbg(("Non-privileged user trying to open ipsecesp.\n"));
506 		return (EPERM);
507 	}
508 
509 	if (q->q_ptr != NULL)
510 		return (0);  /* Re-open of an already open instance. */
511 
512 	if (sflag != MODOPEN)
513 		return (EINVAL);
514 
515 	/*
516 	 * ASSUMPTIONS (because I'm MT_OCEXCL):
517 	 *
518 	 *	* I'm being pushed on top of IP for all my opens (incl. #1).
519 	 *	* Only ipsecesp_open() can write into esp_sadb.s_ip_q.
520 	 *	* Because of this, I can check lazily for esp_sadb.s_ip_q.
521 	 *
522 	 *  If these assumptions are wrong, I'm in BIG trouble...
523 	 */
524 
525 	q->q_ptr = q; /* just so I know I'm open */
526 
527 	if (esp_sadb.s_ip_q == NULL) {
528 		struct T_unbind_req *tur;
529 
530 		esp_sadb.s_ip_q = WR(q);
531 		/* Allocate an unbind... */
532 		esp_ip_unbind = allocb(sizeof (struct T_unbind_req), BPRI_HI);
533 
534 		/*
535 		 * Send down T_BIND_REQ to bind IPPROTO_ESP.
536 		 * Handle the ACK here in ESP.
537 		 */
538 		qprocson(q);
539 		if (esp_ip_unbind == NULL ||
540 		    !sadb_t_bind_req(esp_sadb.s_ip_q, IPPROTO_ESP)) {
541 			if (esp_ip_unbind != NULL) {
542 				freeb(esp_ip_unbind);
543 				esp_ip_unbind = NULL;
544 			}
545 			q->q_ptr = NULL;
546 			return (ENOMEM);
547 		}
548 
549 		esp_ip_unbind->b_datap->db_type = M_PROTO;
550 		tur = (struct T_unbind_req *)esp_ip_unbind->b_rptr;
551 		tur->PRIM_type = T_UNBIND_REQ;
552 	} else {
553 		qprocson(q);
554 	}
555 
556 	/*
557 	 * For now, there's not much I can do.  I'll be getting a message
558 	 * passed down to me from keysock (in my wput), and a T_BIND_ACK
559 	 * up from IP (in my rput).
560 	 */
561 
562 	return (0);
563 }
564 
565 /*
566  * ESP module close routine.
567  */
568 static int
569 ipsecesp_close(queue_t *q)
570 {
571 	/*
572 	 * If esp_sadb.s_ip_q is attached to this instance, send a
573 	 * T_UNBIND_REQ to IP for the instance before doing
574 	 * a qprocsoff().
575 	 */
576 	if (WR(q) == esp_sadb.s_ip_q && esp_ip_unbind != NULL) {
577 		putnext(WR(q), esp_ip_unbind);
578 		esp_ip_unbind = NULL;
579 	}
580 
581 	/*
582 	 * Clean up q_ptr, if needed.
583 	 */
584 	qprocsoff(q);
585 
586 	/* Keysock queue check is safe, because of OCEXCL perimeter. */
587 
588 	if (q == esp_pfkey_q) {
589 		esp0dbg(("ipsecesp_close:  Ummm... keysock is closing ESP.\n"));
590 		esp_pfkey_q = NULL;
591 		/* Detach qtimeouts. */
592 		(void) quntimeout(q, esp_event);
593 	}
594 
595 	if (WR(q) == esp_sadb.s_ip_q) {
596 		/*
597 		 * If the esp_sadb.s_ip_q is attached to this instance, find
598 		 * another.  The OCEXCL outer perimeter helps us here.
599 		 */
600 		esp_sadb.s_ip_q = NULL;
601 
602 		/*
603 		 * Find a replacement queue for esp_sadb.s_ip_q.
604 		 */
605 		if (esp_pfkey_q != NULL && esp_pfkey_q != RD(q)) {
606 			/*
607 			 * See if we can use the pfkey_q.
608 			 */
609 			esp_sadb.s_ip_q = WR(esp_pfkey_q);
610 		}
611 
612 		if (esp_sadb.s_ip_q == NULL ||
613 		    !sadb_t_bind_req(esp_sadb.s_ip_q, IPPROTO_ESP)) {
614 			esp1dbg(("ipsecesp: Can't reassign ip_q.\n"));
615 			esp_sadb.s_ip_q = NULL;
616 		} else {
617 			esp_ip_unbind = allocb(sizeof (struct T_unbind_req),
618 			    BPRI_HI);
619 
620 			if (esp_ip_unbind != NULL) {
621 				struct T_unbind_req *tur;
622 
623 				esp_ip_unbind->b_datap->db_type = M_PROTO;
624 				tur = (struct T_unbind_req *)
625 				    esp_ip_unbind->b_rptr;
626 				tur->PRIM_type = T_UNBIND_REQ;
627 			}
628 			/* If it's NULL, I can't do much here. */
629 		}
630 	}
631 
632 	return (0);
633 }
634 
635 /*
636  * Add a number of bytes to what the SA has protected so far.  Return
637  * B_TRUE if the SA can still protect that many bytes.
638  *
639  * Caller must REFRELE the passed-in assoc.  This function must REFRELE
640  * any obtained peer SA.
641  */
642 static boolean_t
643 esp_age_bytes(ipsa_t *assoc, uint64_t bytes, boolean_t inbound)
644 {
645 	ipsa_t *inassoc, *outassoc;
646 	isaf_t *bucket;
647 	boolean_t inrc, outrc, isv6;
648 	sadb_t *sp;
649 	int outhash;
650 
651 	/* No peer?  No problem! */
652 	if (!assoc->ipsa_haspeer) {
653 		return (sadb_age_bytes(esp_pfkey_q, assoc, bytes,
654 		    B_TRUE));
655 	}
656 
657 	/*
658 	 * Otherwise, we want to grab both the original assoc and its peer.
659 	 * There might be a race for this, but if it's a real race, two
660 	 * expire messages may occur.  We limit this by only sending the
661 	 * expire message on one of the peers, we'll pick the inbound
662 	 * arbitrarily.
663 	 *
664 	 * If we need tight synchronization on the peer SA, then we need to
665 	 * reconsider.
666 	 */
667 
668 	/* Use address length to select IPv6/IPv4 */
669 	isv6 = (assoc->ipsa_addrfam == AF_INET6);
670 	sp = isv6 ? &esp_sadb.s_v6 : &esp_sadb.s_v4;
671 
672 	if (inbound) {
673 		inassoc = assoc;
674 		if (isv6) {
675 			outhash = OUTBOUND_HASH_V6(*((in6_addr_t *)
676 			    &inassoc->ipsa_dstaddr));
677 		} else {
678 			outhash = OUTBOUND_HASH_V4(*((ipaddr_t *)
679 				&inassoc->ipsa_dstaddr));
680 		}
681 		bucket = &sp->sdb_of[outhash];
682 		mutex_enter(&bucket->isaf_lock);
683 		outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi,
684 		    inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr,
685 		    inassoc->ipsa_addrfam);
686 		mutex_exit(&bucket->isaf_lock);
687 		if (outassoc == NULL) {
688 			/* Q: Do we wish to set haspeer == B_FALSE? */
689 			esp0dbg(("esp_age_bytes: "
690 			    "can't find peer for inbound.\n"));
691 			return (sadb_age_bytes(esp_pfkey_q, inassoc,
692 			    bytes, B_TRUE));
693 		}
694 	} else {
695 		outassoc = assoc;
696 		bucket = &sp->sdb_if[INBOUND_HASH(outassoc->ipsa_spi)];
697 		mutex_enter(&bucket->isaf_lock);
698 		inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi,
699 		    outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr,
700 		    outassoc->ipsa_addrfam);
701 		mutex_exit(&bucket->isaf_lock);
702 		if (inassoc == NULL) {
703 			/* Q: Do we wish to set haspeer == B_FALSE? */
704 			esp0dbg(("esp_age_bytes: "
705 			    "can't find peer for outbound.\n"));
706 			return (sadb_age_bytes(esp_pfkey_q, outassoc,
707 			    bytes, B_TRUE));
708 		}
709 	}
710 
711 	inrc = sadb_age_bytes(esp_pfkey_q, inassoc, bytes, B_TRUE);
712 	outrc = sadb_age_bytes(esp_pfkey_q, outassoc, bytes, B_FALSE);
713 
714 	/*
715 	 * REFRELE any peer SA.
716 	 *
717 	 * Because of the multi-line macro nature of IPSA_REFRELE, keep
718 	 * them in { }.
719 	 */
720 	if (inbound) {
721 		IPSA_REFRELE(outassoc);
722 	} else {
723 		IPSA_REFRELE(inassoc);
724 	}
725 
726 	return (inrc && outrc);
727 }
728 
729 /*
730  * Do incoming NAT-T manipulations for packet.
731  */
732 static ipsec_status_t
733 esp_fix_natt_checksums(mblk_t *data_mp, ipsa_t *assoc)
734 {
735 	ipha_t *ipha = (ipha_t *)data_mp->b_rptr;
736 	tcpha_t *tcph;
737 	udpha_t *udpha;
738 	/* Initialize to our inbound cksum adjustment... */
739 	uint32_t sum = assoc->ipsa_inbound_cksum;
740 
741 	switch (ipha->ipha_protocol) {
742 	case IPPROTO_TCP:
743 		tcph = (tcpha_t *)(data_mp->b_rptr +
744 		    IPH_HDR_LENGTH(ipha));
745 
746 #define	DOWN_SUM(x) (x) = ((x) & 0xFFFF) +	 ((x) >> 16)
747 		sum += ~ntohs(tcph->tha_sum) & 0xFFFF;
748 		DOWN_SUM(sum);
749 		DOWN_SUM(sum);
750 		tcph->tha_sum = ~htons(sum);
751 		break;
752 	case IPPROTO_UDP:
753 		udpha = (udpha_t *)(data_mp->b_rptr + IPH_HDR_LENGTH(ipha));
754 
755 		if (udpha->uha_checksum != 0) {
756 			/* Adujst if the inbound one was not zero. */
757 			sum += ~ntohs(udpha->uha_checksum) & 0xFFFF;
758 			DOWN_SUM(sum);
759 			DOWN_SUM(sum);
760 			udpha->uha_checksum = ~htons(sum);
761 			if (udpha->uha_checksum == 0)
762 				udpha->uha_checksum = 0xFFFF;
763 		}
764 #undef DOWN_SUM
765 		break;
766 	case IPPROTO_IP:
767 		/*
768 		 * This case is only an issue for self-encapsulated
769 		 * packets.  So for now, fall through.
770 		 */
771 		break;
772 	}
773 	return (IPSEC_STATUS_SUCCESS);
774 }
775 
776 
777 /*
778  * Strip ESP header and fix IP header
779  * Returns B_TRUE on success, B_FALSE if an error occured.
780  */
781 static boolean_t
782 esp_strip_header(mblk_t *data_mp, boolean_t isv4, uint32_t ivlen,
783     kstat_named_t **counter)
784 {
785 	ipha_t *ipha;
786 	ip6_t *ip6h;
787 	uint_t divpoint;
788 	mblk_t *scratch;
789 	uint8_t nexthdr, padlen;
790 	uint8_t lastpad;
791 
792 	/*
793 	 * Strip ESP data and fix IP header.
794 	 *
795 	 * XXX In case the beginning of esp_inbound() changes to not do a
796 	 * pullup, this part of the code can remain unchanged.
797 	 */
798 	if (isv4) {
799 		ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ipha_t));
800 		ipha = (ipha_t *)data_mp->b_rptr;
801 		ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (esph_t) +
802 		    IPH_HDR_LENGTH(ipha));
803 		divpoint = IPH_HDR_LENGTH(ipha);
804 	} else {
805 		ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ip6_t));
806 		ip6h = (ip6_t *)data_mp->b_rptr;
807 		divpoint = ip_hdr_length_v6(data_mp, ip6h);
808 	}
809 
810 	scratch = data_mp;
811 	while (scratch->b_cont != NULL)
812 		scratch = scratch->b_cont;
813 
814 	ASSERT((scratch->b_wptr - scratch->b_rptr) >= 3);
815 
816 	/*
817 	 * "Next header" and padding length are the last two bytes in the
818 	 * ESP-protected datagram, thus the explicit - 1 and - 2.
819 	 * lastpad is the last byte of the padding, which can be used for
820 	 * a quick check to see if the padding is correct.
821 	 */
822 	nexthdr = *(scratch->b_wptr - 1);
823 	padlen = *(scratch->b_wptr - 2);
824 	lastpad = *(scratch->b_wptr - 3);
825 
826 	if (isv4) {
827 		/* Fix part of the IP header. */
828 		ipha->ipha_protocol = nexthdr;
829 		/*
830 		 * Reality check the padlen.  The explicit - 2 is for the
831 		 * padding length and the next-header bytes.
832 		 */
833 		if (padlen >= ntohs(ipha->ipha_length) - sizeof (ipha_t) - 2 -
834 		    sizeof (esph_t) - ivlen) {
835 /* EXPORT DELETE START */
836 			ESP_BUMP_STAT(bad_decrypt);
837 /* EXPORT DELETE END */
838 			ipsec_rl_strlog(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN,
839 			    "Possibly corrupt ESP packet.");
840 			esp1dbg(("padlen (%d) is greater than:\n", padlen));
841 			esp1dbg(("pkt len(%d) - ip hdr - esp hdr - ivlen(%d) "
842 			    "= %d.\n", ntohs(ipha->ipha_length), ivlen,
843 			    (int)(ntohs(ipha->ipha_length) - sizeof (ipha_t) -
844 				2 - sizeof (esph_t) - ivlen)));
845 			*counter = &ipdrops_esp_bad_padlen;
846 			return (B_FALSE);
847 		}
848 
849 		/*
850 		 * Fix the rest of the header.  The explicit - 2 is for the
851 		 * padding length and the next-header bytes.
852 		 */
853 		ipha->ipha_length = htons(ntohs(ipha->ipha_length) - padlen -
854 		    2 - sizeof (esph_t) - ivlen);
855 		ipha->ipha_hdr_checksum = 0;
856 		ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha);
857 	} else {
858 		if (ip6h->ip6_nxt == IPPROTO_ESP) {
859 			ip6h->ip6_nxt = nexthdr;
860 		} else {
861 			ip6_pkt_t ipp;
862 
863 			bzero(&ipp, sizeof (ipp));
864 			(void) ip_find_hdr_v6(data_mp, ip6h, &ipp, NULL);
865 			if (ipp.ipp_dstopts != NULL) {
866 				ipp.ipp_dstopts->ip6d_nxt = nexthdr;
867 			} else if (ipp.ipp_rthdr != NULL) {
868 				ipp.ipp_rthdr->ip6r_nxt = nexthdr;
869 			} else if (ipp.ipp_hopopts != NULL) {
870 				ipp.ipp_hopopts->ip6h_nxt = nexthdr;
871 			} else {
872 				/* Panic a DEBUG kernel. */
873 				ASSERT(ipp.ipp_hopopts != NULL);
874 				/* Otherwise, pretend it's IP + ESP. */
875 				cmn_err(CE_WARN, "ESP IPv6 headers wrong.\n");
876 				ip6h->ip6_nxt = nexthdr;
877 			}
878 		}
879 
880 		if (padlen >= ntohs(ip6h->ip6_plen) - 2 - sizeof (esph_t) -
881 		    ivlen) {
882 /* EXPORT DELETE START */
883 			ESP_BUMP_STAT(bad_decrypt);
884 /* EXPORT DELETE END */
885 			ipsec_rl_strlog(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN,
886 			    "Possibly corrupt ESP packet.");
887 			esp1dbg(("padlen (%d) is greater than:\n", padlen));
888 			esp1dbg(("pkt len(%u) - ip hdr - esp hdr - ivlen(%d)"
889 			    " = %u.\n", (unsigned)(ntohs(ip6h->ip6_plen)
890 				+ sizeof (ip6_t)), ivlen,
891 			    (unsigned)(ntohs(ip6h->ip6_plen) - 2 -
892 				sizeof (esph_t) - ivlen)));
893 			*counter = &ipdrops_esp_bad_padlen;
894 			return (B_FALSE);
895 		}
896 
897 
898 		/*
899 		 * Fix the rest of the header.  The explicit - 2 is for the
900 		 * padding length and the next-header bytes.  IPv6 is nice,
901 		 * because there's no hdr checksum!
902 		 */
903 		ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - padlen -
904 		    2 - sizeof (esph_t) - ivlen);
905 	}
906 
907 	if (ipsecesp_padding_check > 0 &&
908 		padlen != lastpad && padlen != 0) {
909 		ipsec_rl_strlog(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN,
910 		    "Possibly corrupt ESP packet.");
911 		esp1dbg(("lastpad (%d) not equal to padlen (%d):\n",
912 		    lastpad, padlen));
913 		ESP_BUMP_STAT(bad_padding);
914 		*counter = &ipdrops_esp_bad_padding;
915 		return (B_FALSE);
916 	}
917 
918 	if (ipsecesp_padding_check > 1) {
919 		uint8_t *last = (uint8_t *)(scratch->b_wptr - 3);
920 		uint8_t lastval = *last;
921 
922 		/*
923 		 * this assert may have to become an if
924 		 * and a pullup if we start accepting
925 		 * multi-dblk mblks. Any packet here will
926 		 * have been pulled up in esp_inbound.
927 		 */
928 		ASSERT(MBLKL(scratch) >= lastval + 3);
929 
930 		while (lastval != 0) {
931 			if (lastval != *last) {
932 				ipsec_rl_strlog(info.mi_idnum, 0, 0,
933 				    SL_ERROR | SL_WARN,
934 				    "Possibly corrupt ESP packet.");
935 				esp1dbg(("padding not in correct"
936 				    " format:\n"));
937 				ESP_BUMP_STAT(bad_padding);
938 				*counter = &ipdrops_esp_bad_padding;
939 				return (B_FALSE);
940 			}
941 			lastval--; last--;
942 		}
943 	}
944 
945 	/* Trim off the padding. */
946 	ASSERT(data_mp->b_cont == NULL);
947 	data_mp->b_wptr -= (padlen + 2);
948 
949 	/*
950 	 * Remove the ESP header.
951 	 *
952 	 * The above assertions about data_mp's size will make this work.
953 	 *
954 	 * XXX  Question:  If I send up and get back a contiguous mblk,
955 	 * would it be quicker to bcopy over, or keep doing the dupb stuff?
956 	 * I go with copying for now.
957 	 */
958 
959 	if (IS_P2ALIGNED(data_mp->b_rptr, sizeof (uint32_t)) &&
960 	    IS_P2ALIGNED(ivlen, sizeof (uint32_t))) {
961 		uint8_t *start = data_mp->b_rptr;
962 		uint32_t *src, *dst;
963 
964 		src = (uint32_t *)(start + divpoint);
965 		dst = (uint32_t *)(start + divpoint + sizeof (esph_t) + ivlen);
966 
967 		ASSERT(IS_P2ALIGNED(dst, sizeof (uint32_t)) &&
968 		    IS_P2ALIGNED(src, sizeof (uint32_t)));
969 
970 		do {
971 			src--;
972 			dst--;
973 			*dst = *src;
974 		} while (src != (uint32_t *)start);
975 
976 		data_mp->b_rptr = (uchar_t *)dst;
977 	} else {
978 		uint8_t *start = data_mp->b_rptr;
979 		uint8_t *src, *dst;
980 
981 		src = start + divpoint;
982 		dst = src + sizeof (esph_t) + ivlen;
983 
984 		do {
985 			src--;
986 			dst--;
987 			*dst = *src;
988 		} while (src != start);
989 
990 		data_mp->b_rptr = dst;
991 	}
992 
993 	esp2dbg(("data_mp after inbound ESP adjustment:\n"));
994 	esp2dbg((dump_msg(data_mp)));
995 
996 	return (B_TRUE);
997 }
998 
999 /*
1000  * Updating use times can be tricky business if the ipsa_haspeer flag is
1001  * set.  This function is called once in an SA's lifetime.
1002  *
1003  * Caller has to REFRELE "assoc" which is passed in.  This function has
1004  * to REFRELE any peer SA that is obtained.
1005  */
1006 static void
1007 esp_set_usetime(ipsa_t *assoc, boolean_t inbound)
1008 {
1009 	ipsa_t *inassoc, *outassoc;
1010 	isaf_t *bucket;
1011 	sadb_t *sp;
1012 	int outhash;
1013 	boolean_t isv6;
1014 
1015 	/* No peer?  No problem! */
1016 	if (!assoc->ipsa_haspeer) {
1017 		sadb_set_usetime(assoc);
1018 		return;
1019 	}
1020 
1021 	/*
1022 	 * Otherwise, we want to grab both the original assoc and its peer.
1023 	 * There might be a race for this, but if it's a real race, the times
1024 	 * will be out-of-synch by at most a second, and since our time
1025 	 * granularity is a second, this won't be a problem.
1026 	 *
1027 	 * If we need tight synchronization on the peer SA, then we need to
1028 	 * reconsider.
1029 	 */
1030 
1031 	/* Use address length to select IPv6/IPv4 */
1032 	isv6 = (assoc->ipsa_addrfam == AF_INET6);
1033 	sp = isv6 ? &esp_sadb.s_v6 : &esp_sadb.s_v4;
1034 
1035 	if (inbound) {
1036 		inassoc = assoc;
1037 		if (isv6) {
1038 			outhash = OUTBOUND_HASH_V6(*((in6_addr_t *)
1039 			    &inassoc->ipsa_dstaddr));
1040 		} else {
1041 			outhash = OUTBOUND_HASH_V4(*((ipaddr_t *)
1042 				&inassoc->ipsa_dstaddr));
1043 		}
1044 		bucket = &sp->sdb_of[outhash];
1045 		mutex_enter(&bucket->isaf_lock);
1046 		outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi,
1047 		    inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr,
1048 		    inassoc->ipsa_addrfam);
1049 		mutex_exit(&bucket->isaf_lock);
1050 		if (outassoc == NULL) {
1051 			/* Q: Do we wish to set haspeer == B_FALSE? */
1052 			esp0dbg(("esp_set_usetime: "
1053 			    "can't find peer for inbound.\n"));
1054 			sadb_set_usetime(inassoc);
1055 			return;
1056 		}
1057 	} else {
1058 		outassoc = assoc;
1059 		bucket = &sp->sdb_if[INBOUND_HASH(outassoc->ipsa_spi)];
1060 		mutex_enter(&bucket->isaf_lock);
1061 		inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi,
1062 		    outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr,
1063 		    outassoc->ipsa_addrfam);
1064 		mutex_exit(&bucket->isaf_lock);
1065 		if (inassoc == NULL) {
1066 			/* Q: Do we wish to set haspeer == B_FALSE? */
1067 			esp0dbg(("esp_set_usetime: "
1068 			    "can't find peer for outbound.\n"));
1069 			sadb_set_usetime(outassoc);
1070 			return;
1071 		}
1072 	}
1073 
1074 	/* Update usetime on both. */
1075 	sadb_set_usetime(inassoc);
1076 	sadb_set_usetime(outassoc);
1077 
1078 	/*
1079 	 * REFRELE any peer SA.
1080 	 *
1081 	 * Because of the multi-line macro nature of IPSA_REFRELE, keep
1082 	 * them in { }.
1083 	 */
1084 	if (inbound) {
1085 		IPSA_REFRELE(outassoc);
1086 	} else {
1087 		IPSA_REFRELE(inassoc);
1088 	}
1089 }
1090 
1091 /*
1092  * Handle ESP inbound data for IPv4 and IPv6.
1093  * On success returns B_TRUE, on failure returns B_FALSE and frees the
1094  * mblk chain ipsec_in_mp.
1095  */
1096 ipsec_status_t
1097 esp_inbound(mblk_t *ipsec_in_mp, void *arg)
1098 {
1099 	mblk_t *data_mp = ipsec_in_mp->b_cont;
1100 	ipsec_in_t *ii = (ipsec_in_t *)ipsec_in_mp->b_rptr;
1101 	esph_t *esph = (esph_t *)arg;
1102 	ipsa_t *ipsa = ii->ipsec_in_esp_sa;
1103 
1104 	if (ipsa->ipsa_usetime == 0)
1105 		esp_set_usetime(ipsa, B_TRUE);
1106 
1107 	/*
1108 	 * We may wish to check replay in-range-only here as an optimization.
1109 	 * Include the reality check of ipsa->ipsa_replay >
1110 	 * ipsa->ipsa_replay_wsize for times when it's the first N packets,
1111 	 * where N == ipsa->ipsa_replay_wsize.
1112 	 *
1113 	 * Another check that may come here later is the "collision" check.
1114 	 * If legitimate packets flow quickly enough, this won't be a problem,
1115 	 * but collisions may cause authentication algorithm crunching to
1116 	 * take place when it doesn't need to.
1117 	 */
1118 	if (!sadb_replay_peek(ipsa, esph->esph_replay)) {
1119 		ESP_BUMP_STAT(replay_early_failures);
1120 		IP_ESP_BUMP_STAT(in_discards);
1121 		/*
1122 		 * TODO: Extract inbound interface from the IPSEC_IN
1123 		 * message's ii->ipsec_in_rill_index.
1124 		 */
1125 		ip_drop_packet(ipsec_in_mp, B_TRUE, NULL, NULL,
1126 		    &ipdrops_esp_early_replay, &esp_dropper);
1127 		return (IPSEC_STATUS_FAILED);
1128 	}
1129 
1130 /* EXPORT DELETE START */
1131 	/*
1132 	 * Has this packet already been processed by a hardware
1133 	 * IPsec accelerator?
1134 	 */
1135 	if (ii->ipsec_in_accelerated) {
1136 		ipsec_status_t rv;
1137 		esp3dbg(("esp_inbound: pkt processed by ill=%d isv6=%d\n",
1138 		    ii->ipsec_in_ill_index, !ii->ipsec_in_v4));
1139 		rv = esp_inbound_accelerated(ipsec_in_mp,
1140 		    data_mp, ii->ipsec_in_v4, ipsa);
1141 		return (rv);
1142 	}
1143 	ESP_BUMP_STAT(noaccel);
1144 /* EXPORT DELETE END */
1145 
1146 	/*
1147 	 * Adjust the IP header's payload length to reflect the removal
1148 	 * of the ICV.
1149 	 */
1150 	if (!ii->ipsec_in_v4) {
1151 		ip6_t *ip6h = (ip6_t *)data_mp->b_rptr;
1152 		ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) -
1153 		    ipsa->ipsa_mac_len);
1154 	} else {
1155 		ipha_t *ipha = (ipha_t *)data_mp->b_rptr;
1156 		ipha->ipha_length = htons(ntohs(ipha->ipha_length) -
1157 		    ipsa->ipsa_mac_len);
1158 	}
1159 
1160 	/* submit the request to the crypto framework */
1161 	return (esp_submit_req_inbound(ipsec_in_mp, ipsa,
1162 	    (uint8_t *)esph - data_mp->b_rptr));
1163 }
1164 
1165 /*
1166  * Perform the really difficult work of inserting the proposed situation.
1167  * Called while holding the algorithm lock.
1168  */
1169 static void
1170 esp_insert_prop(sadb_prop_t *prop, ipsacq_t *acqrec, uint_t combs)
1171 {
1172 	sadb_comb_t *comb = (sadb_comb_t *)(prop + 1);
1173 	ipsec_out_t *io;
1174 	ipsec_action_t *ap;
1175 	ipsec_prot_t *prot;
1176 
1177 	ASSERT(MUTEX_HELD(&alg_lock));
1178 	io = (ipsec_out_t *)acqrec->ipsacq_mp->b_rptr;
1179 	ASSERT(io->ipsec_out_type == IPSEC_OUT);
1180 
1181 	prop->sadb_prop_exttype = SADB_EXT_PROPOSAL;
1182 	prop->sadb_prop_len = SADB_8TO64(sizeof (sadb_prop_t));
1183 	*(uint32_t *)(&prop->sadb_prop_replay) = 0;	/* Quick zero-out! */
1184 
1185 	prop->sadb_prop_replay = ipsecesp_replay_size;
1186 
1187 	/*
1188 	 * Based upon algorithm properties, and what-not, prioritize
1189 	 * a proposal.  If the IPSEC_OUT message has an algorithm specified,
1190 	 * use it first and foremost.
1191 	 *
1192 	 * For each action in policy list
1193 	 *   Add combination.  If I've hit limit, return.
1194 	 */
1195 
1196 	for (ap = acqrec->ipsacq_act; ap != NULL;
1197 	    ap = ap->ipa_next) {
1198 /* EXPORT DELETE START */
1199 		ipsec_alginfo_t *ealg = NULL;
1200 /* EXPORT DELETE END */
1201 		ipsec_alginfo_t *aalg = NULL;
1202 
1203 		if (ap->ipa_act.ipa_type != IPSEC_POLICY_APPLY)
1204 			continue;
1205 
1206 		prot = &ap->ipa_act.ipa_apply;
1207 
1208 		if (!(prot->ipp_use_esp))
1209 			continue;
1210 
1211 		if (prot->ipp_esp_auth_alg != 0) {
1212 			aalg = ipsec_alglists[IPSEC_ALG_AUTH]
1213 			    [prot->ipp_esp_auth_alg];
1214 			if (aalg == NULL || !ALG_VALID(aalg))
1215 				continue;
1216 		}
1217 
1218 /* EXPORT DELETE START */
1219 		ASSERT(prot->ipp_encr_alg > 0);
1220 		ealg = ipsec_alglists[IPSEC_ALG_ENCR][prot->ipp_encr_alg];
1221 		if (ealg == NULL || !ALG_VALID(ealg))
1222 			continue;
1223 /* EXPORT DELETE END */
1224 
1225 		comb->sadb_comb_flags = 0;
1226 		comb->sadb_comb_reserved = 0;
1227 /* EXPORT DELETE START */
1228 		comb->sadb_comb_encrypt = ealg->alg_id;
1229 		comb->sadb_comb_encrypt_minbits = prot->ipp_espe_minbits;
1230 		comb->sadb_comb_encrypt_maxbits = prot->ipp_espe_maxbits;
1231 /* EXPORT DELETE END */
1232 		if (aalg == NULL) {
1233 			comb->sadb_comb_auth = 0;
1234 			comb->sadb_comb_auth_minbits = 0;
1235 			comb->sadb_comb_auth_maxbits = 0;
1236 		} else {
1237 			comb->sadb_comb_auth = aalg->alg_id;
1238 			comb->sadb_comb_auth_minbits = prot->ipp_espa_minbits;
1239 			comb->sadb_comb_auth_maxbits = prot->ipp_espa_maxbits;
1240 		}
1241 
1242 		/*
1243 		 * The following may be based on algorithm
1244 		 * properties, but in the meantime, we just pick
1245 		 * some good, sensible numbers.  Key mgmt. can
1246 		 * (and perhaps should) be the place to finalize
1247 		 * such decisions.
1248 		 */
1249 
1250 		/*
1251 		 * No limits on allocations, since we really don't
1252 		 * support that concept currently.
1253 		 */
1254 		comb->sadb_comb_soft_allocations = 0;
1255 		comb->sadb_comb_hard_allocations = 0;
1256 
1257 		/*
1258 		 * These may want to come from policy rule..
1259 		 */
1260 		comb->sadb_comb_soft_bytes = ipsecesp_default_soft_bytes;
1261 		comb->sadb_comb_hard_bytes = ipsecesp_default_hard_bytes;
1262 		comb->sadb_comb_soft_addtime = ipsecesp_default_soft_addtime;
1263 		comb->sadb_comb_hard_addtime = ipsecesp_default_hard_addtime;
1264 		comb->sadb_comb_soft_usetime = ipsecesp_default_soft_usetime;
1265 		comb->sadb_comb_hard_usetime = ipsecesp_default_hard_usetime;
1266 
1267 		prop->sadb_prop_len += SADB_8TO64(sizeof (*comb));
1268 		if (--combs == 0)
1269 			break;	/* out of space.. */
1270 		comb++;
1271 	}
1272 }
1273 
1274 /*
1275  * Prepare and actually send the SADB_ACQUIRE message to PF_KEY.
1276  */
1277 static void
1278 esp_send_acquire(ipsacq_t *acqrec, mblk_t *extended)
1279 {
1280 	mblk_t *pfkeymp, *msgmp;
1281 	uint_t allocsize, combs;
1282 	sadb_msg_t *samsg;
1283 	sadb_prop_t *prop;
1284 	uint8_t *cur, *end;
1285 
1286 	ESP_BUMP_STAT(acquire_requests);
1287 
1288 	ASSERT(MUTEX_HELD(&acqrec->ipsacq_lock));
1289 
1290 	pfkeymp = sadb_keysock_out(0);
1291 	if (pfkeymp == NULL) {
1292 		esp0dbg(("esp_send_acquire: 1st allocb() failed.\n"));
1293 		/* Just bail. */
1294 		goto done;
1295 	}
1296 
1297 	/*
1298 	 * First, allocate a basic ACQUIRE message.  Beyond that,
1299 	 * you need to extract certificate info from
1300 	 */
1301 	allocsize = sizeof (sadb_msg_t) + sizeof (sadb_address_t) +
1302 	    sizeof (sadb_address_t) + sizeof (sadb_prop_t);
1303 
1304 	switch (acqrec->ipsacq_addrfam) {
1305 	case AF_INET:
1306 		allocsize += 2 * sizeof (struct sockaddr_in);
1307 		break;
1308 	case AF_INET6:
1309 		allocsize += 2 * sizeof (struct sockaddr_in6);
1310 		break;
1311 	}
1312 
1313 	mutex_enter(&alg_lock);
1314 
1315 /* EXPORT DELETE START */
1316 #if 0
1317 /* EXPORT DELETE END */
1318 	combs = ipsec_nalgs[IPSEC_ALG_AUTH];
1319 /* EXPORT DELETE START */
1320 #else
1321 	combs = ipsec_nalgs[IPSEC_ALG_AUTH] * ipsec_nalgs[IPSEC_ALG_ENCR];
1322 #endif
1323 /* EXPORT DELETE END */
1324 
1325 	allocsize += combs * sizeof (sadb_comb_t);
1326 
1327 	/*
1328 	 * XXX If there are:
1329 	 *	certificate IDs
1330 	 *	proxy address
1331 	 *	<Others>
1332 	 * add additional allocation size.
1333 	 */
1334 
1335 	msgmp = allocb(allocsize, BPRI_HI);
1336 	if (msgmp == NULL) {
1337 		esp0dbg(("esp_send_acquire: 2nd allocb() failed.\n"));
1338 		/* Just bail. */
1339 		freemsg(pfkeymp);
1340 		pfkeymp = NULL;
1341 		goto done;
1342 	}
1343 
1344 	cur = msgmp->b_rptr;
1345 	end = cur + allocsize;
1346 	samsg = (sadb_msg_t *)cur;
1347 	pfkeymp->b_cont = msgmp;
1348 
1349 	/* Set up ACQUIRE. */
1350 	cur = sadb_setup_acquire(cur, end, acqrec);
1351 	if (cur == NULL) {
1352 		esp0dbg(("sadb_setup_acquire failed.\n"));
1353 		/* Just bail. */
1354 		freemsg(pfkeymp);
1355 		pfkeymp = NULL;
1356 		goto done;
1357 	}
1358 	samsg->sadb_msg_satype = SADB_SATYPE_ESP;
1359 
1360 	/* XXX Insert proxy address information here. */
1361 
1362 	/* XXX Insert identity information here. */
1363 
1364 	/* XXXMLS Insert sensitivity information here. */
1365 
1366 	/* Insert proposal here. */
1367 
1368 	prop = (sadb_prop_t *)(((uint64_t *)samsg) + samsg->sadb_msg_len);
1369 	esp_insert_prop(prop, acqrec, combs);
1370 	samsg->sadb_msg_len += prop->sadb_prop_len;
1371 	msgmp->b_wptr += SADB_64TO8(samsg->sadb_msg_len);
1372 
1373 done:
1374 	mutex_exit(&alg_lock);
1375 
1376 	/*
1377 	 * Must mutex_exit() before sending PF_KEY message up, in
1378 	 * order to avoid recursive mutex_enter() if there are no registered
1379 	 * listeners.
1380 	 *
1381 	 * Once I've sent the message, I'm cool anyway.
1382 	 */
1383 	mutex_exit(&acqrec->ipsacq_lock);
1384 	if (esp_pfkey_q != NULL && pfkeymp != NULL) {
1385 		if (extended != NULL) {
1386 			putnext(esp_pfkey_q, extended);
1387 		}
1388 		putnext(esp_pfkey_q, pfkeymp);
1389 		return;
1390 	}
1391 	/* XXX freemsg() works for extended == NULL. */
1392 	freemsg(extended);
1393 	freemsg(pfkeymp);
1394 }
1395 
1396 /*
1397  * Handle the SADB_GETSPI message.  Create a larval SA.
1398  */
1399 static void
1400 esp_getspi(mblk_t *mp, keysock_in_t *ksi)
1401 {
1402 	ipsa_t *newbie, *target;
1403 	isaf_t *outbound, *inbound;
1404 	int rc, diagnostic;
1405 	sadb_sa_t *assoc;
1406 	keysock_out_t *kso;
1407 	uint32_t newspi;
1408 
1409 	/*
1410 	 * Randomly generate a proposed SPI value
1411 	 */
1412 	(void) random_get_pseudo_bytes((uint8_t *)&newspi, sizeof (uint32_t));
1413 	newbie = sadb_getspi(ksi, newspi, &diagnostic);
1414 
1415 	if (newbie == NULL) {
1416 		sadb_pfkey_error(esp_pfkey_q, mp, ENOMEM, diagnostic,
1417 		    ksi->ks_in_serial);
1418 		return;
1419 	} else if (newbie == (ipsa_t *)-1) {
1420 		sadb_pfkey_error(esp_pfkey_q, mp, EINVAL, diagnostic,
1421 		    ksi->ks_in_serial);
1422 		return;
1423 	}
1424 
1425 	/*
1426 	 * XXX - We may randomly collide.  We really should recover from this.
1427 	 *	 Unfortunately, that could require spending way-too-much-time
1428 	 *	 in here.  For now, let the user retry.
1429 	 */
1430 
1431 	if (newbie->ipsa_addrfam == AF_INET6) {
1432 		outbound = &esp_sadb.s_v6.sdb_of[
1433 		    OUTBOUND_HASH_V6(*(uint32_t *)(newbie->ipsa_dstaddr))];
1434 		inbound = &esp_sadb.s_v6.sdb_if[INBOUND_HASH(newbie->ipsa_spi)];
1435 	} else {
1436 		ASSERT(newbie->ipsa_addrfam == AF_INET);
1437 		outbound = &esp_sadb.s_v4.sdb_of[
1438 		    OUTBOUND_HASH_V4(*(uint32_t *)(newbie->ipsa_dstaddr))];
1439 		inbound = &esp_sadb.s_v4.sdb_if[INBOUND_HASH(newbie->ipsa_spi)];
1440 	}
1441 
1442 	mutex_enter(&outbound->isaf_lock);
1443 	mutex_enter(&inbound->isaf_lock);
1444 
1445 	/*
1446 	 * Check for collisions (i.e. did sadb_getspi() return with something
1447 	 * that already exists?).
1448 	 *
1449 	 * Try outbound first.  Even though SADB_GETSPI is traditionally
1450 	 * for inbound SAs, you never know what a user might do.
1451 	 */
1452 	target = ipsec_getassocbyspi(outbound, newbie->ipsa_spi,
1453 	    newbie->ipsa_srcaddr, newbie->ipsa_dstaddr, newbie->ipsa_addrfam);
1454 	if (target == NULL) {
1455 		target = ipsec_getassocbyspi(inbound, newbie->ipsa_spi,
1456 		    newbie->ipsa_srcaddr, newbie->ipsa_dstaddr,
1457 		    newbie->ipsa_addrfam);
1458 	}
1459 
1460 	/*
1461 	 * I don't have collisions elsewhere!
1462 	 * (Nor will I because I'm still holding inbound/outbound locks.)
1463 	 */
1464 
1465 	if (target != NULL) {
1466 		rc = EEXIST;
1467 		IPSA_REFRELE(target);
1468 	} else {
1469 		/*
1470 		 * sadb_insertassoc() also checks for collisions, so
1471 		 * if there's a colliding entry, rc will be set
1472 		 * to EEXIST.
1473 		 */
1474 		rc = sadb_insertassoc(newbie, inbound);
1475 		(void) drv_getparm(TIME, &newbie->ipsa_hardexpiretime);
1476 		newbie->ipsa_hardexpiretime += ipsecesp_larval_timeout;
1477 	}
1478 
1479 	/*
1480 	 * Can exit outbound mutex.  Hold inbound until we're done
1481 	 * with newbie.
1482 	 */
1483 	mutex_exit(&outbound->isaf_lock);
1484 
1485 	if (rc != 0) {
1486 		mutex_exit(&inbound->isaf_lock);
1487 		IPSA_REFRELE(newbie);
1488 		sadb_pfkey_error(esp_pfkey_q, mp, rc, SADB_X_DIAGNOSTIC_NONE,
1489 		    ksi->ks_in_serial);
1490 		return;
1491 	}
1492 
1493 
1494 	/* Can write here because I'm still holding the bucket lock. */
1495 	newbie->ipsa_type = SADB_SATYPE_ESP;
1496 
1497 	/*
1498 	 * Construct successful return message.  We have one thing going
1499 	 * for us in PF_KEY v2.  That's the fact that
1500 	 *	sizeof (sadb_spirange_t) == sizeof (sadb_sa_t)
1501 	 */
1502 	assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SPIRANGE];
1503 	assoc->sadb_sa_exttype = SADB_EXT_SA;
1504 	assoc->sadb_sa_spi = newbie->ipsa_spi;
1505 	*((uint64_t *)(&assoc->sadb_sa_replay)) = 0;
1506 	mutex_exit(&inbound->isaf_lock);
1507 
1508 	/* Convert KEYSOCK_IN to KEYSOCK_OUT. */
1509 	kso = (keysock_out_t *)ksi;
1510 	kso->ks_out_len = sizeof (*kso);
1511 	kso->ks_out_serial = ksi->ks_in_serial;
1512 	kso->ks_out_type = KEYSOCK_OUT;
1513 
1514 	/*
1515 	 * Can safely putnext() to esp_pfkey_q, because this is a turnaround
1516 	 * from the esp_pfkey_q.
1517 	 */
1518 	putnext(esp_pfkey_q, mp);
1519 }
1520 
1521 /*
1522  * Insert the ESP header into a packet.  Duplicate an mblk, and insert a newly
1523  * allocated mblk with the ESP header in between the two.
1524  */
1525 static boolean_t
1526 esp_insert_esp(mblk_t *mp, mblk_t *esp_mp, uint_t divpoint)
1527 {
1528 	mblk_t *split_mp = mp;
1529 	uint_t wheretodiv = divpoint;
1530 
1531 	while ((split_mp->b_wptr - split_mp->b_rptr) < wheretodiv) {
1532 		wheretodiv -= (split_mp->b_wptr - split_mp->b_rptr);
1533 		split_mp = split_mp->b_cont;
1534 		ASSERT(split_mp != NULL);
1535 	}
1536 
1537 	if (split_mp->b_wptr - split_mp->b_rptr != wheretodiv) {
1538 		mblk_t *scratch;
1539 
1540 		/* "scratch" is the 2nd half, split_mp is the first. */
1541 		scratch = dupb(split_mp);
1542 		if (scratch == NULL) {
1543 			esp1dbg(("esp_insert_esp: can't allocate scratch.\n"));
1544 			return (B_FALSE);
1545 		}
1546 		/* NOTE:  dupb() doesn't set b_cont appropriately. */
1547 		scratch->b_cont = split_mp->b_cont;
1548 		scratch->b_rptr += wheretodiv;
1549 		split_mp->b_wptr = split_mp->b_rptr + wheretodiv;
1550 		split_mp->b_cont = scratch;
1551 	}
1552 	/*
1553 	 * At this point, split_mp is exactly "wheretodiv" bytes long, and
1554 	 * holds the end of the pre-ESP part of the datagram.
1555 	 */
1556 	esp_mp->b_cont = split_mp->b_cont;
1557 	split_mp->b_cont = esp_mp;
1558 
1559 	return (B_TRUE);
1560 }
1561 
1562 /*
1563  * Finish processing of an inbound ESP packet after processing by the
1564  * crypto framework.
1565  * - Remove the ESP header.
1566  * - Send packet back to IP.
1567  * If authentication was performed on the packet, this function is called
1568  * only if the authentication succeeded.
1569  * On success returns B_TRUE, on failure returns B_FALSE and frees the
1570  * mblk chain ipsec_in_mp.
1571  */
1572 static ipsec_status_t
1573 esp_in_done(mblk_t *ipsec_in_mp)
1574 {
1575 	ipsec_in_t *ii = (ipsec_in_t *)ipsec_in_mp->b_rptr;
1576 	mblk_t *data_mp;
1577 	ipsa_t *assoc;
1578 	uint_t espstart;
1579 	uint32_t ivlen = 0;
1580 	uint_t processed_len;
1581 	esph_t *esph;
1582 	kstat_named_t *counter;
1583 	boolean_t is_natt;
1584 
1585 	assoc = ii->ipsec_in_esp_sa;
1586 	ASSERT(assoc != NULL);
1587 
1588 	is_natt = ((assoc->ipsa_flags & IPSA_F_NATT) != 0);
1589 
1590 	/* get the pointer to the ESP header */
1591 /* EXPORT DELETE START */
1592 	if (assoc->ipsa_encr_alg == SADB_EALG_NULL) {
1593 		/* authentication-only ESP */
1594 /* EXPORT DELETE END */
1595 		espstart = ii->ipsec_in_crypto_data.cd_offset;
1596 		processed_len = ii->ipsec_in_crypto_data.cd_length;
1597 /* EXPORT DELETE START */
1598 	} else {
1599 		/* encryption present */
1600 		ivlen = assoc->ipsa_iv_len;
1601 		if (assoc->ipsa_auth_alg == SADB_AALG_NONE) {
1602 			/* encryption-only ESP */
1603 			espstart = ii->ipsec_in_crypto_data.cd_offset -
1604 				sizeof (esph_t) - assoc->ipsa_iv_len;
1605 			processed_len = ii->ipsec_in_crypto_data.cd_length +
1606 				ivlen;
1607 		} else {
1608 			/* encryption with authentication */
1609 			espstart = ii->ipsec_in_crypto_dual_data.dd_offset1;
1610 			processed_len = ii->ipsec_in_crypto_dual_data.dd_len2 +
1611 			    ivlen;
1612 		}
1613 	}
1614 /* EXPORT DELETE END */
1615 
1616 	data_mp = ipsec_in_mp->b_cont;
1617 	esph = (esph_t *)(data_mp->b_rptr + espstart);
1618 
1619 	if (assoc->ipsa_auth_alg != IPSA_AALG_NONE) {
1620 		/* authentication passed if we reach this point */
1621 		ESP_BUMP_STAT(good_auth);
1622 		data_mp->b_wptr -= assoc->ipsa_mac_len;
1623 
1624 		/*
1625 		 * Check replay window here!
1626 		 * For right now, assume keysock will set the replay window
1627 		 * size to zero for SAs that have an unspecified sender.
1628 		 * This may change...
1629 		 */
1630 
1631 		if (!sadb_replay_check(assoc, esph->esph_replay)) {
1632 			/*
1633 			 * Log the event. As of now we print out an event.
1634 			 * Do not print the replay failure number, or else
1635 			 * syslog cannot collate the error messages.  Printing
1636 			 * the replay number that failed opens a denial-of-
1637 			 * service attack.
1638 			 */
1639 			ipsec_assocfailure(info.mi_idnum, 0, 0,
1640 			    SL_ERROR | SL_WARN,
1641 			    "Replay failed for ESP spi 0x%x, dst %s.\n",
1642 			    assoc->ipsa_spi, assoc->ipsa_dstaddr,
1643 			    assoc->ipsa_addrfam);
1644 			ESP_BUMP_STAT(replay_failures);
1645 			counter = &ipdrops_esp_replay;
1646 			goto drop_and_bail;
1647 		}
1648 	}
1649 
1650 	if (!esp_age_bytes(assoc, processed_len, B_TRUE)) {
1651 		/* The ipsa has hit hard expiration, LOG and AUDIT. */
1652 		ipsec_assocfailure(info.mi_idnum, 0, 0,
1653 		    SL_ERROR | SL_WARN,
1654 		    "ESP association 0x%x, dst %s had bytes expire.\n",
1655 		    assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam);
1656 		ESP_BUMP_STAT(bytes_expired);
1657 		counter = &ipdrops_esp_bytes_expire;
1658 		goto drop_and_bail;
1659 	}
1660 
1661 	/*
1662 	 * Remove ESP header and padding from packet.  I hope the compiler
1663 	 * spews "branch, predict taken" code for this.
1664 	 */
1665 
1666 	if (esp_strip_header(data_mp, ii->ipsec_in_v4, ivlen, &counter)) {
1667 		if (is_natt)
1668 			return (esp_fix_natt_checksums(data_mp, assoc));
1669 		return (IPSEC_STATUS_SUCCESS);
1670 	}
1671 
1672 	esp1dbg(("esp_in_done: esp_strip_header() failed\n"));
1673 drop_and_bail:
1674 	IP_ESP_BUMP_STAT(in_discards);
1675 	/*
1676 	 * TODO: Extract inbound interface from the IPSEC_IN message's
1677 	 * ii->ipsec_in_rill_index.
1678 	 */
1679 	ip_drop_packet(ipsec_in_mp, B_TRUE, NULL, NULL, counter, &esp_dropper);
1680 	return (IPSEC_STATUS_FAILED);
1681 }
1682 
1683 /*
1684  * Called upon failing the inbound ICV check. The message passed as
1685  * argument is freed.
1686  */
1687 static void
1688 esp_log_bad_auth(mblk_t *ipsec_in)
1689 {
1690 	ipsec_in_t *ii = (ipsec_in_t *)ipsec_in->b_rptr;
1691 	ipsa_t *assoc = ii->ipsec_in_esp_sa;
1692 
1693 	/*
1694 	 * Log the event. Don't print to the console, block
1695 	 * potential denial-of-service attack.
1696 	 */
1697 	ESP_BUMP_STAT(bad_auth);
1698 
1699 	ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN,
1700 	    "ESP Authentication failed for spi 0x%x, dst %s.\n",
1701 	    assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam);
1702 
1703 	IP_ESP_BUMP_STAT(in_discards);
1704 	/*
1705 	 * TODO: Extract inbound interface from the IPSEC_IN
1706 	 * message's ii->ipsec_in_rill_index.
1707 	 */
1708 	ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL, &ipdrops_esp_bad_auth,
1709 	    &esp_dropper);
1710 }
1711 
1712 
1713 /*
1714  * Invoked for outbound packets after ESP processing. If the packet
1715  * also requires AH, performs the AH SA selection and AH processing.
1716  * Returns B_TRUE if the AH processing was not needed or if it was
1717  * performed successfully. Returns B_FALSE and consumes the passed mblk
1718  * if AH processing was required but could not be performed.
1719  */
1720 static boolean_t
1721 esp_do_outbound_ah(mblk_t *ipsec_mp)
1722 {
1723 	ipsec_out_t *io = (ipsec_out_t *)ipsec_mp->b_rptr;
1724 	ipsec_status_t ipsec_rc;
1725 	ipsec_action_t *ap;
1726 
1727 	ap = io->ipsec_out_act;
1728 	if (ap == NULL) {
1729 		ipsec_policy_t *pp = io->ipsec_out_policy;
1730 		ap = pp->ipsp_act;
1731 	}
1732 
1733 	if (!ap->ipa_want_ah)
1734 		return (B_TRUE);
1735 
1736 	ASSERT(io->ipsec_out_ah_done == B_FALSE);
1737 
1738 	if (io->ipsec_out_ah_sa == NULL) {
1739 		if (!ipsec_outbound_sa(ipsec_mp, IPPROTO_AH)) {
1740 			sadb_acquire(ipsec_mp, io, B_TRUE, B_FALSE);
1741 			return (B_FALSE);
1742 		}
1743 	}
1744 	ASSERT(io->ipsec_out_ah_sa != NULL);
1745 
1746 	io->ipsec_out_ah_done = B_TRUE;
1747 	ipsec_rc = io->ipsec_out_ah_sa->ipsa_output_func(ipsec_mp);
1748 	return (ipsec_rc == IPSEC_STATUS_SUCCESS);
1749 }
1750 
1751 
1752 /*
1753  * Kernel crypto framework callback invoked after completion of async
1754  * crypto requests.
1755  */
1756 static void
1757 esp_kcf_callback(void *arg, int status)
1758 {
1759 	mblk_t *ipsec_mp = (mblk_t *)arg;
1760 	ipsec_in_t *ii = (ipsec_in_t *)ipsec_mp->b_rptr;
1761 	boolean_t is_inbound = (ii->ipsec_in_type == IPSEC_IN);
1762 
1763 	ASSERT(ipsec_mp->b_cont != NULL);
1764 
1765 	if (status == CRYPTO_SUCCESS) {
1766 		if (is_inbound) {
1767 			if (esp_in_done(ipsec_mp) != IPSEC_STATUS_SUCCESS)
1768 				return;
1769 
1770 			/* finish IPsec processing */
1771 			ip_fanout_proto_again(ipsec_mp, NULL, NULL, NULL);
1772 		} else {
1773 			/*
1774 			 * If a ICV was computed, it was stored by the
1775 			 * crypto framework at the end of the packet.
1776 			 */
1777 			ipha_t *ipha = (ipha_t *)ipsec_mp->b_cont->b_rptr;
1778 
1779 			/* do AH processing if needed */
1780 			if (!esp_do_outbound_ah(ipsec_mp))
1781 				return;
1782 
1783 			/* finish IPsec processing */
1784 			if (IPH_HDR_VERSION(ipha) == IP_VERSION) {
1785 				ip_wput_ipsec_out(NULL, ipsec_mp, ipha, NULL,
1786 				    NULL);
1787 			} else {
1788 				ip6_t *ip6h = (ip6_t *)ipha;
1789 				ip_wput_ipsec_out_v6(NULL, ipsec_mp, ip6h,
1790 				    NULL, NULL);
1791 			}
1792 		}
1793 
1794 	} else if (status == CRYPTO_INVALID_MAC) {
1795 		esp_log_bad_auth(ipsec_mp);
1796 
1797 	} else {
1798 		esp1dbg(("esp_kcf_callback: crypto failed with 0x%x\n",
1799 		    status));
1800 		ESP_BUMP_STAT(crypto_failures);
1801 		if (is_inbound)
1802 			IP_ESP_BUMP_STAT(in_discards);
1803 		else
1804 			ESP_BUMP_STAT(out_discards);
1805 		ip_drop_packet(ipsec_mp, is_inbound, NULL, NULL,
1806 		    &ipdrops_esp_crypto_failed, &esp_dropper);
1807 	}
1808 }
1809 
1810 /*
1811  * Invoked on crypto framework failure during inbound and outbound processing.
1812  */
1813 static void
1814 esp_crypto_failed(mblk_t *mp, boolean_t is_inbound, int kef_rc)
1815 {
1816 	esp1dbg(("crypto failed for %s ESP with 0x%x\n",
1817 	    is_inbound ? "inbound" : "outbound", kef_rc));
1818 	ip_drop_packet(mp, is_inbound, NULL, NULL, &ipdrops_esp_crypto_failed,
1819 	    &esp_dropper);
1820 	ESP_BUMP_STAT(crypto_failures);
1821 	if (is_inbound)
1822 		IP_ESP_BUMP_STAT(in_discards);
1823 	else
1824 		ESP_BUMP_STAT(out_discards);
1825 }
1826 
1827 #define	ESP_INIT_CALLREQ(_cr) {						\
1828 	(_cr)->cr_flag = CRYPTO_SKIP_REQID|CRYPTO_RESTRICTED;		\
1829 	(_cr)->cr_callback_arg = ipsec_mp;				\
1830 	(_cr)->cr_callback_func = esp_kcf_callback;			\
1831 }
1832 
1833 #define	ESP_INIT_CRYPTO_MAC(mac, icvlen, icvbuf) {			\
1834 	(mac)->cd_format = CRYPTO_DATA_RAW;				\
1835 	(mac)->cd_offset = 0;						\
1836 	(mac)->cd_length = icvlen;					\
1837 	(mac)->cd_raw.iov_base = (char *)icvbuf;			\
1838 	(mac)->cd_raw.iov_len = icvlen;					\
1839 }
1840 
1841 #define	ESP_INIT_CRYPTO_DATA(data, mp, off, len) {			\
1842 	if (MBLKL(mp) >= (len) + (off)) {				\
1843 		(data)->cd_format = CRYPTO_DATA_RAW;			\
1844 		(data)->cd_raw.iov_base = (char *)(mp)->b_rptr;		\
1845 		(data)->cd_raw.iov_len = MBLKL(mp);			\
1846 		(data)->cd_offset = off;				\
1847 	} else {							\
1848 		(data)->cd_format = CRYPTO_DATA_MBLK;			\
1849 		(data)->cd_mp = mp;			       		\
1850 		(data)->cd_offset = off;				\
1851 	}								\
1852 	(data)->cd_length = len;					\
1853 }
1854 
1855 /* EXPORT DELETE START */
1856 #define	ESP_INIT_CRYPTO_DUAL_DATA(data, mp, off1, len1, off2, len2) {	\
1857 	(data)->dd_format = CRYPTO_DATA_MBLK;				\
1858 	(data)->dd_mp = mp;						\
1859 	(data)->dd_len1 = len1;						\
1860 	(data)->dd_offset1 = off1;					\
1861 	(data)->dd_len2 = len2;						\
1862 	(data)->dd_offset2 = off2;					\
1863 }
1864 /* EXPORT DELETE END */
1865 
1866 static ipsec_status_t
1867 esp_submit_req_inbound(mblk_t *ipsec_mp, ipsa_t *assoc, uint_t esph_offset)
1868 {
1869 	ipsec_in_t *ii = (ipsec_in_t *)ipsec_mp->b_rptr;
1870 	boolean_t do_auth;
1871 	uint_t auth_offset, msg_len, auth_len;
1872 	crypto_call_req_t call_req;
1873 	mblk_t *esp_mp;
1874 	int kef_rc = CRYPTO_FAILED;
1875 	uint_t icv_len = assoc->ipsa_mac_len;
1876 	crypto_ctx_template_t auth_ctx_tmpl;
1877 /* EXPORT DELETE START */
1878 	boolean_t do_encr;
1879 	uint_t encr_offset, encr_len;
1880 	uint_t iv_len = assoc->ipsa_iv_len;
1881 	crypto_ctx_template_t encr_ctx_tmpl;
1882 /* EXPORT DELETE END */
1883 
1884 	ASSERT(ii->ipsec_in_type == IPSEC_IN);
1885 
1886 	do_auth = assoc->ipsa_auth_alg != SADB_AALG_NONE;
1887 /* EXPORT DELETE START */
1888 	do_encr = assoc->ipsa_encr_alg != SADB_EALG_NULL;
1889 
1890 	/*
1891 	 * An inbound packet is of the form:
1892 	 * IPSEC_IN -> [IP,options,ESP,IV,data,ICV,pad]
1893 	 */
1894 /* EXPORT DELETE END */
1895 	esp_mp = ipsec_mp->b_cont;
1896 	msg_len = MBLKL(esp_mp);
1897 
1898 	ESP_INIT_CALLREQ(&call_req);
1899 
1900 	if (do_auth) {
1901 		/* force asynchronous processing? */
1902 		if (ipsec_algs_exec_mode[IPSEC_ALG_AUTH] ==
1903 		    IPSEC_ALGS_EXEC_ASYNC)
1904 			call_req.cr_flag |= CRYPTO_ALWAYS_QUEUE;
1905 
1906 		/* authentication context template */
1907 		IPSEC_CTX_TMPL(assoc, ipsa_authtmpl, IPSEC_ALG_AUTH,
1908 		    auth_ctx_tmpl);
1909 
1910 		/* ICV to be verified */
1911 		ESP_INIT_CRYPTO_MAC(&ii->ipsec_in_crypto_mac,
1912 		    icv_len, esp_mp->b_wptr - icv_len);
1913 
1914 		/* authentication starts at the ESP header */
1915 		auth_offset = esph_offset;
1916 		auth_len = msg_len - auth_offset - icv_len;
1917 /* EXPORT DELETE START */
1918 		if (!do_encr) {
1919 			/* authentication only */
1920 /* EXPORT DELETE END */
1921 			/* initialize input data argument */
1922 			ESP_INIT_CRYPTO_DATA(&ii->ipsec_in_crypto_data,
1923 			    esp_mp, auth_offset, auth_len);
1924 
1925 			/* call the crypto framework */
1926 			kef_rc = crypto_mac_verify(&assoc->ipsa_amech,
1927 			    &ii->ipsec_in_crypto_data,
1928 			    &assoc->ipsa_kcfauthkey, auth_ctx_tmpl,
1929 			    &ii->ipsec_in_crypto_mac, &call_req);
1930 /* EXPORT DELETE START */
1931 		}
1932 /* EXPORT DELETE END */
1933 	}
1934 
1935 /* EXPORT DELETE START */
1936 	if (do_encr) {
1937 		/* force asynchronous processing? */
1938 		if (ipsec_algs_exec_mode[IPSEC_ALG_ENCR] ==
1939 		    IPSEC_ALGS_EXEC_ASYNC)
1940 			call_req.cr_flag |= CRYPTO_ALWAYS_QUEUE;
1941 
1942 		/* encryption template */
1943 		IPSEC_CTX_TMPL(assoc, ipsa_encrtmpl, IPSEC_ALG_ENCR,
1944 		    encr_ctx_tmpl);
1945 
1946 		/* skip IV, since it is passed separately */
1947 		encr_offset = esph_offset + sizeof (esph_t) + iv_len;
1948 		encr_len = msg_len - encr_offset;
1949 
1950 		if (!do_auth) {
1951 			/* decryption only */
1952 			/* initialize input data argument */
1953 			ESP_INIT_CRYPTO_DATA(&ii->ipsec_in_crypto_data,
1954 			    esp_mp, encr_offset, encr_len);
1955 
1956 			/* specify IV */
1957 			ii->ipsec_in_crypto_data.cd_miscdata =
1958 			    (char *)esp_mp->b_rptr + sizeof (esph_t) +
1959 			    esph_offset;
1960 
1961 			/* call the crypto framework */
1962 			kef_rc = crypto_decrypt(&assoc->ipsa_emech,
1963 			    &ii->ipsec_in_crypto_data,
1964 			    &assoc->ipsa_kcfencrkey, encr_ctx_tmpl,
1965 			    NULL, &call_req);
1966 		}
1967 	}
1968 
1969 	if (do_auth && do_encr) {
1970 		/* dual operation */
1971 		/* initialize input data argument */
1972 		ESP_INIT_CRYPTO_DUAL_DATA(&ii->ipsec_in_crypto_dual_data,
1973 		    esp_mp, auth_offset, auth_len,
1974 		    encr_offset, encr_len - icv_len);
1975 
1976 		/* specify IV */
1977 		ii->ipsec_in_crypto_dual_data.dd_miscdata =
1978 		    (char *)esp_mp->b_rptr + sizeof (esph_t) + esph_offset;
1979 
1980 		/* call the framework */
1981 		kef_rc = crypto_mac_verify_decrypt(&assoc->ipsa_amech,
1982 		    &assoc->ipsa_emech, &ii->ipsec_in_crypto_dual_data,
1983 		    &assoc->ipsa_kcfauthkey, &assoc->ipsa_kcfencrkey,
1984 		    auth_ctx_tmpl, encr_ctx_tmpl, &ii->ipsec_in_crypto_mac,
1985 		    NULL, &call_req);
1986 	}
1987 /* EXPORT DELETE END */
1988 
1989 	switch (kef_rc) {
1990 	case CRYPTO_SUCCESS:
1991 		ESP_BUMP_STAT(crypto_sync);
1992 		return (esp_in_done(ipsec_mp));
1993 	case CRYPTO_QUEUED:
1994 		/* esp_kcf_callback() will be invoked on completion */
1995 		ESP_BUMP_STAT(crypto_async);
1996 		return (IPSEC_STATUS_PENDING);
1997 	case CRYPTO_INVALID_MAC:
1998 		ESP_BUMP_STAT(crypto_sync);
1999 		esp_log_bad_auth(ipsec_mp);
2000 		return (IPSEC_STATUS_FAILED);
2001 	}
2002 
2003 	esp_crypto_failed(ipsec_mp, B_TRUE, kef_rc);
2004 	return (IPSEC_STATUS_FAILED);
2005 }
2006 
2007 static ipsec_status_t
2008 esp_submit_req_outbound(mblk_t *ipsec_mp, ipsa_t *assoc, uchar_t *icv_buf,
2009     uint_t payload_len)
2010 {
2011 	ipsec_out_t *io = (ipsec_out_t *)ipsec_mp->b_rptr;
2012 	uint_t auth_len;
2013 	crypto_call_req_t call_req;
2014 	mblk_t *esp_mp;
2015 	int kef_rc = CRYPTO_FAILED;
2016 	uint_t icv_len = assoc->ipsa_mac_len;
2017 	crypto_ctx_template_t auth_ctx_tmpl;
2018 	boolean_t do_auth;
2019 /* EXPORT DELETE START */
2020 	boolean_t do_encr;
2021 	uint_t iv_len = assoc->ipsa_iv_len;
2022 	crypto_ctx_template_t encr_ctx_tmpl;
2023 /* EXPORT DELETE END */
2024 	boolean_t is_natt = ((assoc->ipsa_flags & IPSA_F_NATT) != 0);
2025 	size_t esph_offset = (is_natt ? UDPH_SIZE : 0);
2026 
2027 	esp3dbg(("esp_submit_req_outbound:%s", is_natt ? "natt" : "not natt"));
2028 
2029 	ASSERT(io->ipsec_out_type == IPSEC_OUT);
2030 
2031 /* EXPORT DELETE START */
2032 	do_encr = assoc->ipsa_encr_alg != SADB_EALG_NULL;
2033 /* EXPORT DELETE END */
2034 	do_auth = assoc->ipsa_auth_alg != SADB_AALG_NONE;
2035 
2036 	/*
2037 	 *	EXPORT DELETE START
2038 	 * Outbound IPsec packets are of the form:
2039 	 * IPSEC_OUT -> [IP,options] -> [ESP,IV] -> [data] -> [pad,ICV]
2040 	 * unless it's NATT, then it's
2041 	 * IPSEC_OUT -> [IP,options] -> [udp][ESP,IV] -> [data] -> [pad,ICV]
2042 	 *	EXPORT DELETE END
2043 	 * Get a pointer to the mblk containing the ESP header.
2044 	 */
2045 	ASSERT(ipsec_mp->b_cont != NULL && ipsec_mp->b_cont->b_cont != NULL);
2046 	esp_mp = ipsec_mp->b_cont->b_cont;
2047 
2048 	ESP_INIT_CALLREQ(&call_req);
2049 
2050 	if (do_auth) {
2051 		/* force asynchronous processing? */
2052 		if (ipsec_algs_exec_mode[IPSEC_ALG_AUTH] ==
2053 		    IPSEC_ALGS_EXEC_ASYNC)
2054 			call_req.cr_flag |= CRYPTO_ALWAYS_QUEUE;
2055 
2056 		/* authentication context template */
2057 		IPSEC_CTX_TMPL(assoc, ipsa_authtmpl, IPSEC_ALG_AUTH,
2058 		    auth_ctx_tmpl);
2059 
2060 		/* where to store the computed mac */
2061 		ESP_INIT_CRYPTO_MAC(&io->ipsec_out_crypto_mac,
2062 		    icv_len, icv_buf);
2063 
2064 		/* authentication starts at the ESP header */
2065 		auth_len = payload_len +
2066 /* EXPORT DELETE START */
2067 		    iv_len +
2068 /* EXPORT DELETE END */
2069 		    sizeof (esph_t);
2070 /* EXPORT DELETE START */
2071 		if (!do_encr) {
2072 			/* authentication only */
2073 /* EXPORT DELETE END */
2074 			/* initialize input data argument */
2075 			ESP_INIT_CRYPTO_DATA(&io->ipsec_out_crypto_data,
2076 			    esp_mp, esph_offset, auth_len);
2077 
2078 			/* call the crypto framework */
2079 			kef_rc = crypto_mac(&assoc->ipsa_amech,
2080 			    &io->ipsec_out_crypto_data,
2081 			    &assoc->ipsa_kcfauthkey, auth_ctx_tmpl,
2082 			    &io->ipsec_out_crypto_mac, &call_req);
2083 /* EXPORT DELETE START */
2084 		}
2085 /* EXPORT DELETE END */
2086 	}
2087 
2088 /* EXPORT DELETE START */
2089 	if (do_encr) {
2090 		/* force asynchronous processing? */
2091 		if (ipsec_algs_exec_mode[IPSEC_ALG_ENCR] ==
2092 		    IPSEC_ALGS_EXEC_ASYNC)
2093 			call_req.cr_flag |= CRYPTO_ALWAYS_QUEUE;
2094 
2095 		/* encryption context template */
2096 		IPSEC_CTX_TMPL(assoc, ipsa_encrtmpl, IPSEC_ALG_ENCR,
2097 		    encr_ctx_tmpl);
2098 
2099 		if (!do_auth) {
2100 			/* encryption only, skip mblk that contains ESP hdr */
2101 			/* initialize input data argument */
2102 			ESP_INIT_CRYPTO_DATA(&io->ipsec_out_crypto_data,
2103 			    esp_mp->b_cont, 0, payload_len);
2104 
2105 			/* specify IV */
2106 			io->ipsec_out_crypto_data.cd_miscdata =
2107 			    (char *)esp_mp->b_rptr + sizeof (esph_t) +
2108 			    esph_offset;
2109 
2110 			/* call the crypto framework */
2111 			kef_rc = crypto_encrypt(&assoc->ipsa_emech,
2112 			    &io->ipsec_out_crypto_data,
2113 			    &assoc->ipsa_kcfencrkey, encr_ctx_tmpl,
2114 			    NULL, &call_req);
2115 		}
2116 	}
2117 
2118 	if (do_auth && do_encr) {
2119 		/*
2120 		 * Encryption and authentication:
2121 		 * Pass the pointer to the mblk chain starting at the ESP
2122 		 * header to the framework. Skip the ESP header mblk
2123 		 * for encryption, which is reflected by an encryption
2124 		 * offset equal to the length of that mblk. Start
2125 		 * the authentication at the ESP header, i.e. use an
2126 		 * authentication offset of zero.
2127 		 */
2128 		ESP_INIT_CRYPTO_DUAL_DATA(&io->ipsec_out_crypto_dual_data,
2129 		    esp_mp, MBLKL(esp_mp), payload_len, esph_offset, auth_len);
2130 
2131 		/* specify IV */
2132 		io->ipsec_out_crypto_dual_data.dd_miscdata =
2133 		    (char *)esp_mp->b_rptr + sizeof (esph_t) + esph_offset;
2134 
2135 		/* call the framework */
2136 		kef_rc = crypto_encrypt_mac(&assoc->ipsa_emech,
2137 		    &assoc->ipsa_amech, NULL,
2138 		    &assoc->ipsa_kcfencrkey, &assoc->ipsa_kcfauthkey,
2139 		    encr_ctx_tmpl, auth_ctx_tmpl,
2140 		    &io->ipsec_out_crypto_dual_data,
2141 		    &io->ipsec_out_crypto_mac, &call_req);
2142 	}
2143 /* EXPORT DELETE END */
2144 
2145 	switch (kef_rc) {
2146 	case CRYPTO_SUCCESS:
2147 		ESP_BUMP_STAT(crypto_sync);
2148 		return (IPSEC_STATUS_SUCCESS);
2149 	case CRYPTO_QUEUED:
2150 		/* esp_kcf_callback() will be invoked on completion */
2151 		ESP_BUMP_STAT(crypto_async);
2152 		return (IPSEC_STATUS_PENDING);
2153 	}
2154 
2155 	esp_crypto_failed(ipsec_mp, B_TRUE, kef_rc);
2156 	return (IPSEC_STATUS_FAILED);
2157 }
2158 
2159 /*
2160  * Handle outbound IPsec processing for IPv4 and IPv6
2161  * On success returns B_TRUE, on failure returns B_FALSE and frees the
2162  * mblk chain ipsec_in_mp.
2163  */
2164 static ipsec_status_t
2165 esp_outbound(mblk_t *mp)
2166 {
2167 	mblk_t *ipsec_out_mp, *data_mp, *espmp, *tailmp;
2168 	ipsec_out_t *io;
2169 	ipha_t *ipha;
2170 	ip6_t *ip6h;
2171 	esph_t *esph;
2172 	uint_t af;
2173 	uint8_t *nhp;
2174 	uintptr_t divpoint, datalen, adj, padlen, i, alloclen;
2175 	uintptr_t esplen = sizeof (esph_t);
2176 	uint8_t protocol;
2177 	ipsa_t *assoc;
2178 	uint_t iv_len = 0, mac_len = 0;
2179 	uchar_t *icv_buf;
2180 	udpha_t *udpha;
2181 	boolean_t is_natt = B_FALSE;
2182 
2183 	ESP_BUMP_STAT(out_requests);
2184 
2185 	ipsec_out_mp = mp;
2186 	data_mp = ipsec_out_mp->b_cont;
2187 
2188 	/*
2189 	 * <sigh> We have to copy the message here, because TCP (for example)
2190 	 * keeps a dupb() of the message lying around for retransmission.
2191 	 * Since ESP changes the whole of the datagram, we have to create our
2192 	 * own copy lest we clobber TCP's data.  Since we have to copy anyway,
2193 	 * we might as well make use of msgpullup() and get the mblk into one
2194 	 * contiguous piece!
2195 	 */
2196 	ipsec_out_mp->b_cont = msgpullup(data_mp, -1);
2197 	if (ipsec_out_mp->b_cont == NULL) {
2198 		esp0dbg(("esp_outbound: msgpullup() failed, "
2199 		    "dropping packet.\n"));
2200 		ipsec_out_mp->b_cont = data_mp;
2201 		/*
2202 		 * TODO:  Find the outbound IRE for this packet and
2203 		 * pass it to ip_drop_packet().
2204 		 */
2205 		ip_drop_packet(ipsec_out_mp, B_FALSE, NULL, NULL,
2206 		    &ipdrops_esp_nomem, &esp_dropper);
2207 		return (IPSEC_STATUS_FAILED);
2208 	} else {
2209 		freemsg(data_mp);
2210 		data_mp = ipsec_out_mp->b_cont;
2211 	}
2212 
2213 	io = (ipsec_out_t *)ipsec_out_mp->b_rptr;
2214 
2215 	/*
2216 	 * Reality check....
2217 	 */
2218 
2219 	ipha = (ipha_t *)data_mp->b_rptr;  /* So we can call esp_acquire(). */
2220 
2221 	if (io->ipsec_out_v4) {
2222 		af = AF_INET;
2223 		divpoint = IPH_HDR_LENGTH(ipha);
2224 		datalen = ntohs(ipha->ipha_length) - divpoint;
2225 		nhp = (uint8_t *)&ipha->ipha_protocol;
2226 	} else {
2227 		ip6_pkt_t ipp;
2228 
2229 		af = AF_INET6;
2230 		ip6h = (ip6_t *)ipha;
2231 		bzero(&ipp, sizeof (ipp));
2232 		divpoint = ip_find_hdr_v6(data_mp, ip6h, &ipp, NULL);
2233 		if (ipp.ipp_dstopts != NULL &&
2234 		    ipp.ipp_dstopts->ip6d_nxt != IPPROTO_ROUTING) {
2235 			/*
2236 			 * Destination options are tricky.  If we get in here,
2237 			 * then we have a terminal header following the
2238 			 * destination options.  We need to adjust backwards
2239 			 * so we insert ESP BEFORE the destination options
2240 			 * bag.  (So that the dstopts get encrypted!)
2241 			 *
2242 			 * Since this is for outbound packets only, we know
2243 			 * that non-terminal destination options only precede
2244 			 * routing headers.
2245 			 */
2246 			divpoint -= ipp.ipp_dstoptslen;
2247 		}
2248 		datalen = ntohs(ip6h->ip6_plen) + sizeof (ip6_t) - divpoint;
2249 
2250 		if (ipp.ipp_rthdr != NULL) {
2251 			nhp = &ipp.ipp_rthdr->ip6r_nxt;
2252 		} else if (ipp.ipp_hopopts != NULL) {
2253 			nhp = &ipp.ipp_hopopts->ip6h_nxt;
2254 		} else {
2255 			ASSERT(divpoint == sizeof (ip6_t));
2256 			/* It's probably IP + ESP. */
2257 			nhp = &ip6h->ip6_nxt;
2258 		}
2259 	}
2260 	assoc = io->ipsec_out_esp_sa;
2261 	ASSERT(assoc != NULL);
2262 
2263 	if (assoc->ipsa_usetime == 0)
2264 		esp_set_usetime(assoc, B_FALSE);
2265 
2266 	if (assoc->ipsa_auth_alg != SADB_AALG_NONE)
2267 		mac_len = assoc->ipsa_mac_len;
2268 
2269 	if (assoc->ipsa_flags & IPSA_F_NATT) {
2270 		/* wedge in fake UDP */
2271 		is_natt = B_TRUE;
2272 		esplen += UDPH_SIZE;
2273 	}
2274 
2275 /* EXPORT DELETE START */
2276 	if (assoc->ipsa_encr_alg != SADB_EALG_NULL)
2277 		iv_len = assoc->ipsa_iv_len;
2278 /* EXPORT DELETE END */
2279 
2280 /* EXPORT DELETE START */
2281 	/*
2282 	 * Set up ESP header and encryption padding for ENCR PI request.
2283 	 */
2284 /* EXPORT DELETE END */
2285 
2286 	/*
2287 	 * Determine the padding length.   Pad to 4-bytes.
2288 	 *
2289 	 * Include the two additional bytes (hence the - 2) for the padding
2290 	 * length and the next header.  Take this into account when
2291 	 * calculating the actual length of the padding.
2292 	 */
2293 
2294 /* EXPORT DELETE START */
2295 	if (assoc->ipsa_encr_alg != SADB_EALG_NULL) {
2296 		padlen = ((unsigned)(iv_len - datalen - 2)) % iv_len;
2297 	} else {
2298 /* EXPORT DELETE END */
2299 		padlen = ((unsigned)(sizeof (uint32_t) - datalen - 2)) %
2300 		    sizeof (uint32_t);
2301 /* EXPORT DELETE START */
2302 	}
2303 
2304 	/* Allocate ESP header and IV. */
2305 	esplen += iv_len;
2306 /* EXPORT DELETE END */
2307 
2308 	/*
2309 	 * Update association byte-count lifetimes.  Don't forget to take
2310 	 * into account the padding length and next-header (hence the + 2).
2311 	 *	EXPORT DELETE START
2312 	 * Use the amount of data fed into the "encryption algorithm".  This
2313 	 * is the IV, the data length, the padding length, and the final two
2314 	 * bytes (padlen, and next-header).
2315 	 *
2316 	 *	EXPORT DELETE END
2317 	 */
2318 
2319 	if (!esp_age_bytes(assoc, datalen + padlen +
2320 /* EXPORT DELETE START */
2321 	    iv_len +
2322 /* EXPORT DELETE END */
2323 	    2, B_FALSE)) {
2324 		/*
2325 		 * TODO:  Find the outbound IRE for this packet and
2326 		 * pass it to ip_drop_packet().
2327 		 */
2328 		ip_drop_packet(mp, B_FALSE, NULL, NULL,
2329 		    &ipdrops_esp_bytes_expire, &esp_dropper);
2330 		return (IPSEC_STATUS_FAILED);
2331 	}
2332 
2333 	espmp = allocb(esplen, BPRI_HI);
2334 	if (espmp == NULL) {
2335 		ESP_BUMP_STAT(out_discards);
2336 		esp1dbg(("esp_outbound: can't allocate espmp.\n"));
2337 		/*
2338 		 * TODO:  Find the outbound IRE for this packet and
2339 		 * pass it to ip_drop_packet().
2340 		 */
2341 		ip_drop_packet(mp, B_FALSE, NULL, NULL, &ipdrops_esp_nomem,
2342 		    &esp_dropper);
2343 		return (IPSEC_STATUS_FAILED);
2344 	}
2345 	espmp->b_wptr += esplen;
2346 	esph = (esph_t *)espmp->b_rptr;
2347 
2348 	if (is_natt) {
2349 		esp3dbg(("esp_outbound: NATT"));
2350 
2351 		udpha = (udpha_t *)espmp->b_rptr;
2352 		udpha->uha_src_port = htons(IPPORT_IKE_NATT);
2353 		if (assoc->ipsa_remote_port != 0)
2354 			udpha->uha_dst_port = assoc->ipsa_remote_port;
2355 		else
2356 			udpha->uha_dst_port = htons(IPPORT_IKE_NATT);
2357 		/*
2358 		 * Set the checksum to 0, so that the ip_wput_ipsec_out()
2359 		 * can do the right thing.
2360 		 */
2361 		udpha->uha_checksum = 0;
2362 		esph = (esph_t *)(udpha + 1);
2363 	}
2364 
2365 	esph->esph_spi = assoc->ipsa_spi;
2366 
2367 	esph->esph_replay = htonl(atomic_add_32_nv(&assoc->ipsa_replay, 1));
2368 	if (esph->esph_replay == 0 && assoc->ipsa_replay_wsize != 0) {
2369 		/*
2370 		 * XXX We have replay counter wrapping.
2371 		 * We probably want to nuke this SA (and its peer).
2372 		 */
2373 		ipsec_assocfailure(info.mi_idnum, 0, 0,
2374 		    SL_ERROR | SL_CONSOLE | SL_WARN,
2375 		    "Outbound ESP SA (0x%x, %s) has wrapped sequence.\n",
2376 		    esph->esph_spi, assoc->ipsa_dstaddr, af);
2377 
2378 		ESP_BUMP_STAT(out_discards);
2379 		sadb_replay_delete(assoc);
2380 		/*
2381 		 * TODO:  Find the outbound IRE for this packet and
2382 		 * pass it to ip_drop_packet().
2383 		 */
2384 		ip_drop_packet(mp, B_FALSE, NULL, NULL, &ipdrops_esp_replay,
2385 		    &esp_dropper);
2386 		return (IPSEC_STATUS_FAILED);
2387 	}
2388 
2389 /* EXPORT DELETE START */
2390 	/*
2391 	 * Set the IV to a random quantity.  We do not require the
2392 	 * highest quality random bits, but for best security with CBC
2393 	 * mode ciphers, the value must be unlikely to repeat and also
2394 	 * must not be known in advance to an adversary capable of
2395 	 * influencing the plaintext.
2396 	 */
2397 	(void) random_get_pseudo_bytes((uint8_t *)(esph + 1), iv_len);
2398 /* EXPORT DELETE END */
2399 
2400 	/* Fix the IP header. */
2401 	alloclen = padlen + 2 + mac_len;
2402 	adj = alloclen + (espmp->b_wptr - espmp->b_rptr);
2403 
2404 	protocol = *nhp;
2405 
2406 	if (io->ipsec_out_v4) {
2407 		ipha->ipha_length = htons(ntohs(ipha->ipha_length) + adj);
2408 		if (is_natt) {
2409 			*nhp = IPPROTO_UDP;
2410 			udpha->uha_length = htons(ntohs(ipha->ipha_length) -
2411 			    IPH_HDR_LENGTH(ipha));
2412 		} else {
2413 			*nhp = IPPROTO_ESP;
2414 		}
2415 		ipha->ipha_hdr_checksum = 0;
2416 		ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha);
2417 	} else {
2418 		ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) + adj);
2419 		*nhp = IPPROTO_ESP;
2420 	}
2421 
2422 	/* I've got the two ESP mblks, now insert them. */
2423 
2424 	esp2dbg(("data_mp before outbound ESP adjustment:\n"));
2425 	esp2dbg((dump_msg(data_mp)));
2426 
2427 	if (!esp_insert_esp(data_mp, espmp, divpoint)) {
2428 		ESP_BUMP_STAT(out_discards);
2429 		/* NOTE:  esp_insert_esp() only fails if there's no memory. */
2430 		/*
2431 		 * TODO:  Find the outbound IRE for this packet and
2432 		 * pass it to ip_drop_packet().
2433 		 */
2434 		ip_drop_packet(mp, B_FALSE, NULL, NULL, &ipdrops_esp_nomem,
2435 		    &esp_dropper);
2436 		freeb(espmp);
2437 		return (IPSEC_STATUS_FAILED);
2438 	}
2439 
2440 	/* Append padding (and leave room for ICV). */
2441 	for (tailmp = data_mp; tailmp->b_cont != NULL; tailmp = tailmp->b_cont)
2442 		;
2443 	if (tailmp->b_wptr + alloclen > tailmp->b_datap->db_lim) {
2444 		tailmp->b_cont = allocb(alloclen, BPRI_HI);
2445 		if (tailmp->b_cont == NULL) {
2446 			ESP_BUMP_STAT(out_discards);
2447 			esp0dbg(("esp_outbound:  Can't allocate tailmp.\n"));
2448 			/*
2449 			 * TODO:  Find the outbound IRE for this packet and
2450 			 * pass it to ip_drop_packet().
2451 			 */
2452 			ip_drop_packet(mp, B_FALSE, NULL, NULL,
2453 			    &ipdrops_esp_nomem, &esp_dropper);
2454 			return (IPSEC_STATUS_FAILED);
2455 		}
2456 		tailmp = tailmp->b_cont;
2457 	}
2458 
2459 	/*
2460 	 * If there's padding, N bytes of padding must be of the form 0x1,
2461 	 * 0x2, 0x3... 0xN.
2462 	 */
2463 	for (i = 0; i < padlen; ) {
2464 		i++;
2465 		*tailmp->b_wptr++ = i;
2466 	}
2467 	*tailmp->b_wptr++ = i;
2468 	*tailmp->b_wptr++ = protocol;
2469 
2470 /* EXPORT DELETE START */
2471 	esp2dbg(("data_Mp before encryption:\n"));
2472 	esp2dbg((dump_msg(data_mp)));
2473 
2474 	/*
2475 	 * The packet is eligible for hardware acceleration if the
2476 	 * following conditions are satisfied:
2477 	 *
2478 	 * 1. the packet will not be fragmented
2479 	 * 2. the provider supports the algorithms specified by SA
2480 	 * 3. there is no pending control message being exchanged
2481 	 * 4. snoop is not attached
2482 	 * 5. the destination address is not a multicast address
2483 	 *
2484 	 * All five of these conditions are checked by IP prior to
2485 	 * sending the packet to ESP.
2486 	 *
2487 	 * But We, and We Alone, can, nay MUST check if the packet
2488 	 * is over NATT, and then disqualify it from hardware
2489 	 * acceleration.
2490 	 */
2491 
2492 	if (io->ipsec_out_is_capab_ill && !(assoc->ipsa_flags & IPSA_F_NATT)) {
2493 		return (esp_outbound_accelerated(ipsec_out_mp, mac_len));
2494 	}
2495 	ESP_BUMP_STAT(noaccel);
2496 
2497 	/*
2498 	 * Okay.  I've set up the pre-encryption ESP.  Let's do it!
2499 	 */
2500 /* EXPORT DELETE END */
2501 
2502 	if (mac_len > 0) {
2503 		ASSERT(tailmp->b_wptr + mac_len <= tailmp->b_datap->db_lim);
2504 		icv_buf = tailmp->b_wptr;
2505 		tailmp->b_wptr += mac_len;
2506 	} else {
2507 		icv_buf = NULL;
2508 	}
2509 
2510 	return (esp_submit_req_outbound(ipsec_out_mp, assoc, icv_buf,
2511 	    datalen + padlen + 2));
2512 }
2513 
2514 /*
2515  * IP calls this to validate the ICMP errors that
2516  * we got from the network.
2517  */
2518 ipsec_status_t
2519 ipsecesp_icmp_error(mblk_t *ipsec_mp)
2520 {
2521 	/*
2522 	 * Unless we get an entire packet back, this function is useless.
2523 	 * Why?
2524 	 *
2525 	 * 1.)	Partial packets are useless, because the "next header"
2526 	 *	is at the end of the decrypted ESP packet.  Without the
2527 	 *	whole packet, this is useless.
2528 	 *
2529 	 * 2.)	If we every use a stateful cipher, such as a stream or a
2530 	 *	one-time pad, we can't do anything.
2531 	 *
2532 	 * Since the chances of us getting an entire packet back are very
2533 	 * very small, we discard here.
2534 	 */
2535 	IP_ESP_BUMP_STAT(in_discards);
2536 	ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL, &ipdrops_esp_icmp,
2537 	    &esp_dropper);
2538 	return (IPSEC_STATUS_FAILED);
2539 }
2540 
2541 /*
2542  * ESP module read put routine.
2543  */
2544 /* ARGSUSED */
2545 static void
2546 ipsecesp_rput(queue_t *q, mblk_t *mp)
2547 {
2548 	keysock_in_t *ksi;
2549 	int *addrtype;
2550 	ire_t *ire;
2551 	mblk_t *ire_mp, *last_mp;
2552 
2553 	switch (mp->b_datap->db_type) {
2554 	case M_CTL:
2555 		/*
2556 		 * IPsec request of some variety from IP.  IPSEC_{IN,OUT}
2557 		 * are the common cases, but even ICMP error messages from IP
2558 		 * may rise up here.
2559 		 *
2560 		 * Ummmm, actually, this can also be the reflected KEYSOCK_IN
2561 		 * message, with an IRE_DB_TYPE hung off at the end.
2562 		 */
2563 		switch (((ipsec_info_t *)(mp->b_rptr))->ipsec_info_type) {
2564 		case KEYSOCK_IN:
2565 			last_mp = mp;
2566 			while (last_mp->b_cont != NULL &&
2567 			    last_mp->b_cont->b_datap->db_type != IRE_DB_TYPE)
2568 				last_mp = last_mp->b_cont;
2569 
2570 			if (last_mp->b_cont == NULL) {
2571 				freemsg(mp);
2572 				break;	/* Out of switch. */
2573 			}
2574 
2575 			ire_mp = last_mp->b_cont;
2576 			last_mp->b_cont = NULL;
2577 
2578 			ksi = (keysock_in_t *)mp->b_rptr;
2579 
2580 			if (ksi->ks_in_srctype == KS_IN_ADDR_UNKNOWN)
2581 				addrtype = &ksi->ks_in_srctype;
2582 			else if (ksi->ks_in_dsttype == KS_IN_ADDR_UNKNOWN)
2583 				addrtype = &ksi->ks_in_dsttype;
2584 			else if (ksi->ks_in_proxytype == KS_IN_ADDR_UNKNOWN)
2585 				addrtype = &ksi->ks_in_proxytype;
2586 
2587 			ire = (ire_t *)ire_mp->b_rptr;
2588 
2589 			*addrtype = sadb_addrset(ire);
2590 
2591 			freemsg(ire_mp);
2592 			if (esp_pfkey_q != NULL) {
2593 				/*
2594 				 * Decrement counter to make up for
2595 				 * auto-increment in ipsecesp_wput().
2596 				 * I'm running all MT-hot through here, so
2597 				 * don't worry about perimeters and lateral
2598 				 * puts.
2599 				 */
2600 				ESP_DEBUMP_STAT(keysock_in);
2601 				ipsecesp_wput(WR(esp_pfkey_q), mp);
2602 			} else {
2603 				freemsg(mp);
2604 			}
2605 			break;
2606 		default:
2607 			freemsg(mp);
2608 			break;
2609 		}
2610 		break;
2611 	case M_PROTO:
2612 	case M_PCPROTO:
2613 		/* TPI message of some sort. */
2614 		switch (*((t_scalar_t *)mp->b_rptr)) {
2615 		case T_BIND_ACK:
2616 			esp3dbg(("Thank you IP from ESP for T_BIND_ACK\n"));
2617 			break;
2618 		case T_ERROR_ACK:
2619 			cmn_err(CE_WARN,
2620 			    "ipsecesp:  ESP received T_ERROR_ACK from IP.");
2621 			/*
2622 			 * Make esp_sadb.s_ip_q NULL, and in the
2623 			 * future, perhaps try again.
2624 			 */
2625 			esp_sadb.s_ip_q = NULL;
2626 			break;
2627 		case T_OK_ACK:
2628 			/* Probably from a (rarely sent) T_UNBIND_REQ. */
2629 			break;
2630 		default:
2631 			esp0dbg(("Unknown M_{,PC}PROTO message.\n"));
2632 		}
2633 		freemsg(mp);
2634 		break;
2635 	default:
2636 		/* For now, passthru message. */
2637 		esp2dbg(("ESP got unknown mblk type %d.\n",
2638 		    mp->b_datap->db_type));
2639 		putnext(q, mp);
2640 	}
2641 }
2642 
2643 /*
2644  * Construct an SADB_REGISTER message with the current algorithms.
2645  */
2646 static boolean_t
2647 esp_register_out(uint32_t sequence, uint32_t pid, uint_t serial)
2648 {
2649 	mblk_t *pfkey_msg_mp, *keysock_out_mp;
2650 	sadb_msg_t *samsg;
2651 	sadb_supported_t *sasupp_auth = NULL;
2652 /* EXPORT DELETE START */
2653 	sadb_supported_t *sasupp_encr = NULL;
2654 /* EXPORT DELETE END */
2655 	sadb_alg_t *saalg;
2656 	uint_t allocsize = sizeof (*samsg);
2657 	uint_t i, numalgs_snap;
2658 	int current_aalgs;
2659 	ipsec_alginfo_t **authalgs;
2660 	uint_t num_aalgs;
2661 /* EXPORT DELETE START */
2662 	int current_ealgs;
2663 	ipsec_alginfo_t **encralgs;
2664 	uint_t num_ealgs;
2665 /* EXPORT DELETE END */
2666 
2667 	/* Allocate the KEYSOCK_OUT. */
2668 	keysock_out_mp = sadb_keysock_out(serial);
2669 	if (keysock_out_mp == NULL) {
2670 		esp0dbg(("esp_register_out: couldn't allocate mblk.\n"));
2671 		return (B_FALSE);
2672 	}
2673 
2674 	/*
2675 	 * Allocate the PF_KEY message that follows KEYSOCK_OUT.
2676 	 */
2677 
2678 	mutex_enter(&alg_lock);
2679 
2680 	/*
2681 	 * Fill SADB_REGISTER message's algorithm descriptors.  Hold
2682 	 * down the lock while filling it.
2683 	 *
2684 	 * Return only valid algorithms, so the number of algorithms
2685 	 * to send up may be less than the number of algorithm entries
2686 	 * in the table.
2687 	 */
2688 	authalgs = ipsec_alglists[IPSEC_ALG_AUTH];
2689 	for (num_aalgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++)
2690 		if (authalgs[i] != NULL && ALG_VALID(authalgs[i]))
2691 			num_aalgs++;
2692 
2693 	if (num_aalgs != 0) {
2694 		allocsize += (num_aalgs * sizeof (*saalg));
2695 		allocsize += sizeof (*sasupp_auth);
2696 	}
2697 /* EXPORT DELETE START */
2698 	encralgs = ipsec_alglists[IPSEC_ALG_ENCR];
2699 	for (num_ealgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++)
2700 		if (encralgs[i] != NULL && ALG_VALID(encralgs[i]))
2701 			num_ealgs++;
2702 
2703 	if (num_ealgs != 0) {
2704 		allocsize += (num_ealgs * sizeof (*saalg));
2705 		allocsize += sizeof (*sasupp_encr);
2706 	}
2707 /* EXPORT DELETE END */
2708 	keysock_out_mp->b_cont = allocb(allocsize, BPRI_HI);
2709 	if (keysock_out_mp->b_cont == NULL) {
2710 		mutex_exit(&alg_lock);
2711 		freemsg(keysock_out_mp);
2712 		return (B_FALSE);
2713 	}
2714 
2715 	pfkey_msg_mp = keysock_out_mp->b_cont;
2716 	pfkey_msg_mp->b_wptr += allocsize;
2717 	if (num_aalgs != 0) {
2718 		sasupp_auth = (sadb_supported_t *)
2719 		    (pfkey_msg_mp->b_rptr + sizeof (*samsg));
2720 		saalg = (sadb_alg_t *)(sasupp_auth + 1);
2721 
2722 		ASSERT(((ulong_t)saalg & 0x7) == 0);
2723 
2724 		numalgs_snap = 0;
2725 		for (i = 0;
2726 		    ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_aalgs)); i++) {
2727 			if (authalgs[i] == NULL || !ALG_VALID(authalgs[i]))
2728 				continue;
2729 
2730 			saalg->sadb_alg_id = authalgs[i]->alg_id;
2731 			saalg->sadb_alg_ivlen = 0;
2732 			saalg->sadb_alg_minbits	= authalgs[i]->alg_ef_minbits;
2733 			saalg->sadb_alg_maxbits	= authalgs[i]->alg_ef_maxbits;
2734 			saalg->sadb_x_alg_defincr = authalgs[i]->alg_ef_default;
2735 			saalg->sadb_x_alg_increment =
2736 			    authalgs[i]->alg_increment;
2737 			numalgs_snap++;
2738 			saalg++;
2739 		}
2740 		ASSERT(numalgs_snap == num_aalgs);
2741 #ifdef DEBUG
2742 		/*
2743 		 * Reality check to make sure I snagged all of the
2744 		 * algorithms.
2745 		 */
2746 		for (; i < IPSEC_MAX_ALGS; i++) {
2747 			if (authalgs[i] != NULL && ALG_VALID(authalgs[i])) {
2748 				cmn_err(CE_PANIC, "esp_register_out()! "
2749 				    "Missed aalg #%d.\n", i);
2750 			}
2751 		}
2752 #endif /* DEBUG */
2753 	} else {
2754 		saalg = (sadb_alg_t *)(pfkey_msg_mp->b_rptr + sizeof (*samsg));
2755 	}
2756 
2757 /* EXPORT DELETE START */
2758 	if (num_ealgs != 0) {
2759 		sasupp_encr = (sadb_supported_t *)saalg;
2760 		saalg = (sadb_alg_t *)(sasupp_encr + 1);
2761 
2762 		numalgs_snap = 0;
2763 		for (i = 0;
2764 		    ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_ealgs)); i++) {
2765 			if (encralgs[i] == NULL || !ALG_VALID(encralgs[i]))
2766 				continue;
2767 			saalg->sadb_alg_id = encralgs[i]->alg_id;
2768 			saalg->sadb_alg_ivlen = encralgs[i]->alg_datalen;
2769 			saalg->sadb_alg_minbits	= encralgs[i]->alg_ef_minbits;
2770 			saalg->sadb_alg_maxbits	= encralgs[i]->alg_ef_maxbits;
2771 			saalg->sadb_x_alg_defincr = encralgs[i]->alg_ef_default;
2772 			saalg->sadb_x_alg_increment =
2773 			    encralgs[i]->alg_increment;
2774 			numalgs_snap++;
2775 			saalg++;
2776 		}
2777 		ASSERT(numalgs_snap == num_ealgs);
2778 #ifdef DEBUG
2779 		/*
2780 		 * Reality check to make sure I snagged all of the
2781 		 * algorithms.
2782 		 */
2783 		for (; i < IPSEC_MAX_ALGS; i++) {
2784 			if (encralgs[i] != NULL && ALG_VALID(encralgs[i])) {
2785 				cmn_err(CE_PANIC, "esp_register_out()! "
2786 				    "Missed ealg #%d.\n", i);
2787 			}
2788 		}
2789 #endif /* DEBUG */
2790 	}
2791 /* EXPORT DELETE END */
2792 
2793 	current_aalgs = num_aalgs;
2794 /* EXPORT DELETE START */
2795 	current_ealgs = num_ealgs;
2796 /* EXPORT DELETE END */
2797 
2798 	mutex_exit(&alg_lock);
2799 
2800 	/* Now fill the rest of the SADB_REGISTER message. */
2801 
2802 	samsg = (sadb_msg_t *)pfkey_msg_mp->b_rptr;
2803 	samsg->sadb_msg_version = PF_KEY_V2;
2804 	samsg->sadb_msg_type = SADB_REGISTER;
2805 	samsg->sadb_msg_errno = 0;
2806 	samsg->sadb_msg_satype = SADB_SATYPE_ESP;
2807 	samsg->sadb_msg_len = SADB_8TO64(allocsize);
2808 	samsg->sadb_msg_reserved = 0;
2809 	/*
2810 	 * Assume caller has sufficient sequence/pid number info.  If it's one
2811 	 * from me over a new alg., I could give two hoots about sequence.
2812 	 */
2813 	samsg->sadb_msg_seq = sequence;
2814 	samsg->sadb_msg_pid = pid;
2815 
2816 	if (sasupp_auth != NULL) {
2817 		sasupp_auth->sadb_supported_len =
2818 		    SADB_8TO64(sizeof (*sasupp_auth) +
2819 			sizeof (*saalg) * current_aalgs);
2820 		sasupp_auth->sadb_supported_exttype = SADB_EXT_SUPPORTED_AUTH;
2821 		sasupp_auth->sadb_supported_reserved = 0;
2822 	}
2823 
2824 /* EXPORT DELETE START */
2825 	if (sasupp_encr != NULL) {
2826 		sasupp_encr->sadb_supported_len =
2827 		    SADB_8TO64(sizeof (*sasupp_encr) +
2828 			sizeof (*saalg) * current_ealgs);
2829 		sasupp_encr->sadb_supported_exttype =
2830 		    SADB_EXT_SUPPORTED_ENCRYPT;
2831 		sasupp_encr->sadb_supported_reserved = 0;
2832 	}
2833 /* EXPORT DELETE END */
2834 
2835 	if (esp_pfkey_q != NULL)
2836 		putnext(esp_pfkey_q, keysock_out_mp);
2837 	else {
2838 		freemsg(keysock_out_mp);
2839 		return (B_FALSE);
2840 	}
2841 
2842 	return (B_TRUE);
2843 }
2844 
2845 /*
2846  * Invoked when the algorithm table changes. Causes SADB_REGISTER
2847  * messages continaining the current list of algorithms to be
2848  * sent up to the ESP listeners.
2849  */
2850 void
2851 ipsecesp_algs_changed(void)
2852 {
2853 	/*
2854 	 * Time to send a PF_KEY SADB_REGISTER message to ESP listeners
2855 	 * everywhere.  (The function itself checks for NULL esp_pfkey_q.)
2856 	 */
2857 	(void) esp_register_out(0, 0, 0);
2858 }
2859 
2860 /*
2861  * taskq_dispatch handler.
2862  */
2863 static void
2864 inbound_task(void *arg)
2865 {
2866 	esph_t *esph;
2867 	mblk_t *mp = (mblk_t *)arg;
2868 	ipsec_in_t *ii = (ipsec_in_t *)mp->b_rptr;
2869 	int ipsec_rc;
2870 
2871 	esp2dbg(("in ESP inbound_task"));
2872 
2873 	esph = ipsec_inbound_esp_sa(mp);
2874 	if (esph == NULL)
2875 		return;
2876 	ASSERT(ii->ipsec_in_esp_sa != NULL);
2877 	ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func(mp, esph);
2878 	if (ipsec_rc != IPSEC_STATUS_SUCCESS)
2879 		return;
2880 	ip_fanout_proto_again(mp, NULL, NULL, NULL);
2881 }
2882 
2883 /*
2884  * Now that weak-key passed, actually ADD the security association, and
2885  * send back a reply ADD message.
2886  */
2887 static int
2888 esp_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi)
2889 {
2890 	isaf_t *primary, *secondary, *inbound;
2891 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
2892 	sadb_address_t *dstext =
2893 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2894 	struct sockaddr_in *dst;
2895 	struct sockaddr_in6 *dst6;
2896 	boolean_t is_ipv4, clone = B_FALSE, is_inbound = B_FALSE;
2897 	uint32_t *dstaddr;
2898 	ipsa_t *larval = NULL;
2899 	ipsacq_t *acqrec;
2900 	iacqf_t *acq_bucket;
2901 	mblk_t *acq_msgs = NULL;
2902 	int rc;
2903 	sadb_t *sp;
2904 	int outhash;
2905 	mblk_t *lpkt;
2906 
2907 /* EXPORT DELETE START */
2908 #if 0
2909 	/*
2910 	 * Gross hack for export control.  Since esp_encr_keycheck
2911 	 * is gone, I have to somehow enforce that exportable ESP source
2912 	 * can't have encryption.
2913 	 */
2914 /* EXPORT DELETE END */
2915 	if (assoc->sadb_sa_encrypt != SADB_EALG_NULL) {
2916 		samsg->sadb_x_msg_diagnostic = SADB_X_DIAGNOSTIC_BAD_EALG;
2917 		return (EINVAL);
2918 	}
2919 /* EXPORT DELETE START */
2920 #endif
2921 /* EXPORT DELETE END */
2922 
2923 	/*
2924 	 * Locate the appropriate table(s).
2925 	 */
2926 
2927 	dst = (struct sockaddr_in *)(dstext + 1);
2928 	dst6 = (struct sockaddr_in6 *)dst;
2929 	is_ipv4 = (dst->sin_family == AF_INET);
2930 	if (is_ipv4) {
2931 		sp = &esp_sadb.s_v4;
2932 		dstaddr = (uint32_t *)(&dst->sin_addr);
2933 		outhash = OUTBOUND_HASH_V4(*(ipaddr_t *)dstaddr);
2934 	} else {
2935 		sp = &esp_sadb.s_v6;
2936 		dstaddr = (uint32_t *)(&dst6->sin6_addr);
2937 		outhash = OUTBOUND_HASH_V6(*(in6_addr_t *)dstaddr);
2938 	}
2939 	inbound = &sp->sdb_if[INBOUND_HASH(assoc->sadb_sa_spi)];
2940 	switch (ksi->ks_in_dsttype) {
2941 	case KS_IN_ADDR_MBCAST:
2942 		clone = B_TRUE;	/* All mcast SAs can be bidirectional */
2943 		/* FALLTHRU */
2944 	case KS_IN_ADDR_ME:
2945 		primary = inbound;
2946 		secondary = &sp->sdb_of[outhash];
2947 		/*
2948 		 * If the source address is either one of mine, or unspecified
2949 		 * (which is best summed up by saying "not 'not mine'"),
2950 		 * then the association is potentially bi-directional,
2951 		 * in that it can be used for inbound traffic and outbound
2952 		 * traffic.  The best example of such an SA is a multicast
2953 		 * SA (which allows me to receive the outbound traffic).
2954 		 */
2955 		if (ksi->ks_in_srctype != KS_IN_ADDR_NOTME)
2956 			clone = B_TRUE;
2957 		is_inbound = B_TRUE;
2958 		break;
2959 	case KS_IN_ADDR_NOTME:
2960 		primary = &sp->sdb_of[outhash];
2961 		secondary = inbound;
2962 		/*
2963 		 * If the source address literally not mine (either
2964 		 * unspecified or not mine), then this SA may have an
2965 		 * address that WILL be mine after some configuration.
2966 		 * We pay the price for this by making it a bi-directional
2967 		 * SA.
2968 		 */
2969 		if (ksi->ks_in_srctype != KS_IN_ADDR_ME)
2970 			clone = B_TRUE;
2971 		break;
2972 	default:
2973 		samsg->sadb_x_msg_diagnostic = SADB_X_DIAGNOSTIC_BAD_DST;
2974 		return (EINVAL);
2975 	}
2976 
2977 	/*
2978 	 * Find a ACQUIRE list entry if possible.  If we've added an SA that
2979 	 * suits the needs of an ACQUIRE list entry, we can eliminate the
2980 	 * ACQUIRE list entry and transmit the enqueued packets.  Use the
2981 	 * high-bit of the sequence number to queue it.  Key off destination
2982 	 * addr, and change acqrec's state.
2983 	 */
2984 
2985 	if (samsg->sadb_msg_seq & IACQF_LOWEST_SEQ) {
2986 		acq_bucket = &sp->sdb_acq[outhash];
2987 		mutex_enter(&acq_bucket->iacqf_lock);
2988 		for (acqrec = acq_bucket->iacqf_ipsacq; acqrec != NULL;
2989 		    acqrec = acqrec->ipsacq_next) {
2990 			mutex_enter(&acqrec->ipsacq_lock);
2991 			/*
2992 			 * Q:  I only check sequence.  Should I check dst?
2993 			 * A: Yes, check dest because those are the packets
2994 			 *    that are queued up.
2995 			 */
2996 			if (acqrec->ipsacq_seq == samsg->sadb_msg_seq &&
2997 			    IPSA_ARE_ADDR_EQUAL(dstaddr,
2998 				acqrec->ipsacq_dstaddr, acqrec->ipsacq_addrfam))
2999 				break;
3000 			mutex_exit(&acqrec->ipsacq_lock);
3001 		}
3002 		if (acqrec != NULL) {
3003 			/*
3004 			 * AHA!  I found an ACQUIRE record for this SA.
3005 			 * Grab the msg list, and free the acquire record.
3006 			 * I already am holding the lock for this record,
3007 			 * so all I have to do is free it.
3008 			 */
3009 			acq_msgs = acqrec->ipsacq_mp;
3010 			acqrec->ipsacq_mp = NULL;
3011 			mutex_exit(&acqrec->ipsacq_lock);
3012 			sadb_destroy_acquire(acqrec);
3013 		}
3014 		mutex_exit(&acq_bucket->iacqf_lock);
3015 	}
3016 
3017 	/*
3018 	 * Find PF_KEY message, and see if I'm an update.  If so, find entry
3019 	 * in larval list (if there).
3020 	 */
3021 
3022 	if (samsg->sadb_msg_type == SADB_UPDATE) {
3023 		mutex_enter(&inbound->isaf_lock);
3024 		larval = ipsec_getassocbyspi(inbound, assoc->sadb_sa_spi,
3025 		    ALL_ZEROES_PTR, dstaddr, dst->sin_family);
3026 		mutex_exit(&inbound->isaf_lock);
3027 
3028 		if (larval == NULL) {
3029 			esp0dbg(("Larval update, but larval disappeared.\n"));
3030 			return (ESRCH);
3031 		} /* Else sadb_common_add unlinks it for me! */
3032 	}
3033 
3034 	lpkt = NULL;
3035 	if (larval != NULL)
3036 		lpkt = sadb_clear_lpkt(larval);
3037 
3038 	rc = sadb_common_add(esp_sadb.s_ip_q, esp_pfkey_q, mp, samsg, ksi,
3039 	    primary, secondary, larval, clone, is_inbound);
3040 
3041 	if (rc == 0 && lpkt != NULL) {
3042 		rc = !taskq_dispatch(esp_taskq, inbound_task,
3043 			    (void *) lpkt, TQ_NOSLEEP);
3044 	}
3045 
3046 	if (rc != 0) {
3047 		ip_drop_packet(lpkt, B_TRUE, NULL, NULL,
3048 		    &ipdrops_sadb_inlarval_timeout, &esp_dropper);
3049 	}
3050 
3051 	/*
3052 	 * How much more stack will I create with all of these
3053 	 * esp_outbound() calls?
3054 	 */
3055 
3056 	while (acq_msgs != NULL) {
3057 		mblk_t *mp = acq_msgs;
3058 
3059 		acq_msgs = acq_msgs->b_next;
3060 		mp->b_next = NULL;
3061 		if (rc == 0) {
3062 			if (ipsec_outbound_sa(mp, IPPROTO_ESP)) {
3063 				((ipsec_out_t *)(mp->b_rptr))->
3064 				    ipsec_out_esp_done = B_TRUE;
3065 				if (esp_outbound(mp) == IPSEC_STATUS_SUCCESS) {
3066 					ipha_t *ipha = (ipha_t *)
3067 					    mp->b_cont->b_rptr;
3068 
3069 					/* do AH processing if needed */
3070 					if (!esp_do_outbound_ah(mp))
3071 						continue;
3072 
3073 					/* finish IPsec processing */
3074 					if (is_ipv4) {
3075 						ip_wput_ipsec_out(NULL, mp,
3076 						    ipha, NULL, NULL);
3077 					} else {
3078 						ip6_t *ip6h = (ip6_t *)ipha;
3079 						ip_wput_ipsec_out_v6(NULL,
3080 						    mp, ip6h, NULL, NULL);
3081 					}
3082 				}
3083 				continue;
3084 			}
3085 		}
3086 		ESP_BUMP_STAT(out_discards);
3087 		ip_drop_packet(mp, B_FALSE, NULL, NULL,
3088 		    &ipdrops_sadb_acquire_timeout, &esp_dropper);
3089 	}
3090 
3091 	return (rc);
3092 }
3093 
3094 /*
3095  * Add new ESP security association.  This may become a generic AH/ESP
3096  * routine eventually.
3097  */
3098 static int
3099 esp_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic)
3100 {
3101 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
3102 	sadb_address_t *srcext =
3103 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
3104 	sadb_address_t *dstext =
3105 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
3106 	sadb_address_t *nttext_loc =
3107 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC];
3108 	sadb_address_t *nttext_rem =
3109 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM];
3110 	sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
3111 	sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
3112 	struct sockaddr_in *src, *dst;
3113 	struct sockaddr_in *natt_loc, *natt_rem;
3114 	struct sockaddr_in6 *natt_loc6, *natt_rem6;
3115 
3116 	sadb_lifetime_t *soft =
3117 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
3118 	sadb_lifetime_t *hard =
3119 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
3120 
3121 	/* I need certain extensions present for an ADD message. */
3122 	if (srcext == NULL) {
3123 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
3124 		return (EINVAL);
3125 	}
3126 	if (dstext == NULL) {
3127 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
3128 		return (EINVAL);
3129 	}
3130 	if (assoc == NULL) {
3131 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
3132 		return (EINVAL);
3133 	}
3134 	if (ekey == NULL && assoc->sadb_sa_encrypt != SADB_EALG_NULL) {
3135 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_EKEY;
3136 		return (EINVAL);
3137 	}
3138 
3139 	src = (struct sockaddr_in *)(srcext + 1);
3140 	dst = (struct sockaddr_in *)(dstext + 1);
3141 	natt_loc = (struct sockaddr_in *)(nttext_loc + 1);
3142 	natt_loc6 = (struct sockaddr_in6 *)(nttext_loc + 1);
3143 	natt_rem = (struct sockaddr_in *)(nttext_rem + 1);
3144 	natt_rem6 = (struct sockaddr_in6 *)(nttext_rem + 1);
3145 
3146 	/* Sundry ADD-specific reality checks. */
3147 	/* XXX STATS :  Logging/stats here? */
3148 	if (assoc->sadb_sa_state != SADB_SASTATE_MATURE) {
3149 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
3150 		return (EINVAL);
3151 	}
3152 	if (assoc->sadb_sa_encrypt == SADB_EALG_NONE) {
3153 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_EALG;
3154 		return (EINVAL);
3155 	}
3156 
3157 	if (assoc->sadb_sa_encrypt == SADB_EALG_NULL &&
3158 	    assoc->sadb_sa_auth == SADB_AALG_NONE) {
3159 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG;
3160 		return (EINVAL);
3161 	}
3162 
3163 	if (assoc->sadb_sa_flags & ~(SADB_SAFLAGS_NOREPLAY |
3164 	    SADB_X_SAFLAGS_NATT_LOC | SADB_X_SAFLAGS_NATT_REM)) {
3165 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
3166 		return (EINVAL);
3167 	}
3168 
3169 	if ((*diagnostic = sadb_hardsoftchk(hard, soft)) != 0) {
3170 		return (EINVAL);
3171 	}
3172 	if (src->sin_family != dst->sin_family) {
3173 		*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
3174 		return (EINVAL);
3175 	}
3176 
3177 
3178 	if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_NATT_LOC) {
3179 		if (nttext_loc == NULL) {
3180 			*diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_LOC;
3181 			return (EINVAL);
3182 		}
3183 
3184 		if (natt_loc->sin_family == AF_INET6 &&
3185 		    !IN6_IS_ADDR_V4MAPPED(&natt_loc6->sin6_addr)) {
3186 			*diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_NATT_LOC;
3187 			return (EINVAL);
3188 		}
3189 	}
3190 
3191 	if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_NATT_REM) {
3192 		if (nttext_rem == NULL) {
3193 			*diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_REM;
3194 			return (EINVAL);
3195 		}
3196 		if (natt_rem->sin_family == AF_INET6 &&
3197 		    !IN6_IS_ADDR_V4MAPPED(&natt_rem6->sin6_addr)) {
3198 			*diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_NATT_REM;
3199 			return (EINVAL);
3200 		}
3201 	}
3202 
3203 
3204 	/* Stuff I don't support, for now.  XXX Diagnostic? */
3205 	if (ksi->ks_in_extv[SADB_EXT_LIFETIME_CURRENT] != NULL ||
3206 	    ksi->ks_in_extv[SADB_EXT_SENSITIVITY] != NULL)
3207 		return (EOPNOTSUPP);
3208 
3209 	/*
3210 	 * XXX Policy :  I'm not checking identities or sensitivity
3211 	 * labels at this time, but if I did, I'd do them here, before I sent
3212 	 * the weak key check up to the algorithm.
3213 	 */
3214 
3215 	mutex_enter(&alg_lock);
3216 
3217 	/*
3218 	 * First locate the authentication algorithm.
3219 	 */
3220 	if (akey != NULL) {
3221 		ipsec_alginfo_t *aalg;
3222 
3223 		aalg = ipsec_alglists[IPSEC_ALG_AUTH][assoc->sadb_sa_auth];
3224 		if (aalg == NULL || !ALG_VALID(aalg)) {
3225 			mutex_exit(&alg_lock);
3226 			esp1dbg(("Couldn't find auth alg #%d.\n",
3227 			    assoc->sadb_sa_auth));
3228 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG;
3229 			return (EINVAL);
3230 		}
3231 		ASSERT(aalg->alg_mech_type != CRYPTO_MECHANISM_INVALID);
3232 
3233 		/* sanity check key sizes */
3234 		if (!ipsec_valid_key_size(akey->sadb_key_bits, aalg)) {
3235 			mutex_exit(&alg_lock);
3236 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_AKEYBITS;
3237 			return (EINVAL);
3238 		}
3239 
3240 		/* check key and fix parity if needed */
3241 		if (ipsec_check_key(aalg->alg_mech_type, akey, B_TRUE,
3242 		    diagnostic) != 0) {
3243 			mutex_exit(&alg_lock);
3244 			return (EINVAL);
3245 		}
3246 	}
3247 
3248 /* EXPORT DELETE START */
3249 	/*
3250 	 * Then locate the encryption algorithm.
3251 	 */
3252 	if (ekey != NULL) {
3253 		ipsec_alginfo_t *ealg;
3254 
3255 		ealg = ipsec_alglists[IPSEC_ALG_ENCR][assoc->sadb_sa_encrypt];
3256 		if (ealg == NULL || !ALG_VALID(ealg)) {
3257 			mutex_exit(&alg_lock);
3258 			esp1dbg(("Couldn't find encr alg #%d.\n",
3259 			    assoc->sadb_sa_encrypt));
3260 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_EALG;
3261 			return (EINVAL);
3262 		}
3263 		ASSERT(ealg->alg_mech_type != CRYPTO_MECHANISM_INVALID);
3264 
3265 		/* sanity check key sizes */
3266 		if (!ipsec_valid_key_size(ekey->sadb_key_bits, ealg)) {
3267 			mutex_exit(&alg_lock);
3268 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_EKEYBITS;
3269 			return (EINVAL);
3270 		}
3271 
3272 		/* check key */
3273 		if (ipsec_check_key(ealg->alg_mech_type, ekey, B_FALSE,
3274 		    diagnostic) != 0) {
3275 			mutex_exit(&alg_lock);
3276 			return (EINVAL);
3277 		}
3278 	}
3279 /* EXPORT DELETE END */
3280 	mutex_exit(&alg_lock);
3281 
3282 	return (esp_add_sa_finish(mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi));
3283 }
3284 
3285 /*
3286  * Update a security association.  Updates come in two varieties.  The first
3287  * is an update of lifetimes on a non-larval SA.  The second is an update of
3288  * a larval SA, which ends up looking a lot more like an add.
3289  */
3290 static int
3291 esp_update_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic)
3292 {
3293 	sadb_address_t *dstext =
3294 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
3295 	struct sockaddr_in *sin;
3296 
3297 	if (dstext == NULL) {
3298 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
3299 		return (EINVAL);
3300 	}
3301 
3302 	sin = (struct sockaddr_in *)(dstext + 1);
3303 	return (sadb_update_sa(mp, ksi,
3304 	    (sin->sin_family == AF_INET6) ? &esp_sadb.s_v6 : &esp_sadb.s_v4,
3305 	    diagnostic, esp_pfkey_q, esp_add_sa));
3306 }
3307 
3308 /*
3309  * Delete a security association.  This is REALLY likely to be code common to
3310  * both AH and ESP.  Find the association, then unlink it.
3311  */
3312 static int
3313 esp_del_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic)
3314 {
3315 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
3316 	sadb_address_t *dstext =
3317 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
3318 	sadb_address_t *srcext =
3319 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
3320 	struct sockaddr_in *sin;
3321 
3322 	if (assoc == NULL) {
3323 		if (dstext != NULL) {
3324 			sin = (struct sockaddr_in *)(dstext + 1);
3325 		} else if (srcext != NULL) {
3326 			sin = (struct sockaddr_in *)(srcext + 1);
3327 		} else {
3328 			*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
3329 			return (EINVAL);
3330 		}
3331 		return sadb_purge_sa(mp, ksi,
3332 		    (sin->sin_family == AF_INET6) ? &esp_sadb.s_v6 :
3333 		    &esp_sadb.s_v4,
3334 		    diagnostic, esp_pfkey_q, esp_sadb.s_ip_q);
3335 	}
3336 
3337 	return (sadb_del_sa(mp, ksi, &esp_sadb, diagnostic, esp_pfkey_q));
3338 }
3339 
3340 /*
3341  * Convert the entire contents of all of ESP's SA tables into PF_KEY SADB_DUMP
3342  * messages.
3343  */
3344 static void
3345 esp_dump(mblk_t *mp, keysock_in_t *ksi)
3346 {
3347 	int error;
3348 	sadb_msg_t *samsg;
3349 
3350 	/*
3351 	 * Dump each fanout, bailing if error is non-zero.
3352 	 */
3353 
3354 	error = sadb_dump(esp_pfkey_q, mp, ksi->ks_in_serial, &esp_sadb.s_v4);
3355 	if (error != 0)
3356 		goto bail;
3357 
3358 	error = sadb_dump(esp_pfkey_q, mp, ksi->ks_in_serial, &esp_sadb.s_v6);
3359 bail:
3360 	ASSERT(mp->b_cont != NULL);
3361 	samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
3362 	samsg->sadb_msg_errno = (uint8_t)error;
3363 	sadb_pfkey_echo(esp_pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi,
3364 	    NULL);
3365 }
3366 
3367 /*
3368  * ESP parsing of PF_KEY messages.  Keysock did most of the really silly
3369  * error cases.  What I receive is a fully-formed, syntactically legal
3370  * PF_KEY message.  I then need to check semantics...
3371  *
3372  * This code may become common to AH and ESP.  Stay tuned.
3373  *
3374  * I also make the assumption that db_ref's are cool.  If this assumption
3375  * is wrong, this means that someone other than keysock or me has been
3376  * mucking with PF_KEY messages.
3377  */
3378 static void
3379 esp_parse_pfkey(mblk_t *mp)
3380 {
3381 	mblk_t *msg = mp->b_cont;
3382 	sadb_msg_t *samsg;
3383 	keysock_in_t *ksi;
3384 	int error;
3385 	int diagnostic = SADB_X_DIAGNOSTIC_NONE;
3386 
3387 	ASSERT(msg != NULL);
3388 	samsg = (sadb_msg_t *)msg->b_rptr;
3389 	ksi = (keysock_in_t *)mp->b_rptr;
3390 
3391 	/*
3392 	 * If applicable, convert unspecified AF_INET6 to unspecified
3393 	 * AF_INET.
3394 	 */
3395 	sadb_srcaddrfix(ksi);
3396 
3397 	switch (samsg->sadb_msg_type) {
3398 	case SADB_ADD:
3399 		error = esp_add_sa(mp, ksi, &diagnostic);
3400 		if (error != 0) {
3401 			sadb_pfkey_error(esp_pfkey_q, mp, error, diagnostic,
3402 			    ksi->ks_in_serial);
3403 		}
3404 		/* else esp_add_sa() took care of things. */
3405 		break;
3406 	case SADB_DELETE:
3407 		error = esp_del_sa(mp, ksi, &diagnostic);
3408 		if (error != 0) {
3409 			sadb_pfkey_error(esp_pfkey_q, mp, error, diagnostic,
3410 			    ksi->ks_in_serial);
3411 		}
3412 		/* Else esp_del_sa() took care of things. */
3413 		break;
3414 	case SADB_GET:
3415 		error = sadb_get_sa(mp, ksi, &esp_sadb, &diagnostic,
3416 		    esp_pfkey_q);
3417 		if (error != 0) {
3418 			sadb_pfkey_error(esp_pfkey_q, mp, error, diagnostic,
3419 			    ksi->ks_in_serial);
3420 		}
3421 		/* Else sadb_get_sa() took care of things. */
3422 		break;
3423 	case SADB_FLUSH:
3424 		sadbp_flush(&esp_sadb);
3425 		sadb_pfkey_echo(esp_pfkey_q, mp, samsg, ksi, NULL);
3426 		break;
3427 	case SADB_REGISTER:
3428 		/*
3429 		 * Hmmm, let's do it!  Check for extensions (there should
3430 		 * be none), extract the fields, call esp_register_out(),
3431 		 * then either free or report an error.
3432 		 *
3433 		 * Keysock takes care of the PF_KEY bookkeeping for this.
3434 		 */
3435 		if (esp_register_out(samsg->sadb_msg_seq, samsg->sadb_msg_pid,
3436 		    ksi->ks_in_serial)) {
3437 			freemsg(mp);
3438 		} else {
3439 			/*
3440 			 * Only way this path hits is if there is a memory
3441 			 * failure.  It will not return B_FALSE because of
3442 			 * lack of esp_pfkey_q if I am in wput().
3443 			 */
3444 			sadb_pfkey_error(esp_pfkey_q, mp, ENOMEM, diagnostic,
3445 			    ksi->ks_in_serial);
3446 		}
3447 		break;
3448 	case SADB_UPDATE:
3449 		/*
3450 		 * Find a larval, if not there, find a full one and get
3451 		 * strict.
3452 		 */
3453 		error = esp_update_sa(mp, ksi, &diagnostic);
3454 		if (error != 0) {
3455 			sadb_pfkey_error(esp_pfkey_q, mp, error, diagnostic,
3456 			    ksi->ks_in_serial);
3457 		}
3458 		/* else esp_update_sa() took care of things. */
3459 		break;
3460 	case SADB_GETSPI:
3461 		/*
3462 		 * Reserve a new larval entry.
3463 		 */
3464 		esp_getspi(mp, ksi);
3465 		break;
3466 	case SADB_ACQUIRE:
3467 		/*
3468 		 * Find larval and/or ACQUIRE record and kill it (them), I'm
3469 		 * most likely an error.  Inbound ACQUIRE messages should only
3470 		 * have the base header.
3471 		 */
3472 		sadb_in_acquire(samsg, &esp_sadb, esp_pfkey_q);
3473 		freemsg(mp);
3474 		break;
3475 	case SADB_DUMP:
3476 		/*
3477 		 * Dump all entries.
3478 		 */
3479 		esp_dump(mp, ksi);
3480 		/* esp_dump will take care of the return message, etc. */
3481 		break;
3482 	case SADB_EXPIRE:
3483 		/* Should never reach me. */
3484 		sadb_pfkey_error(esp_pfkey_q, mp, EOPNOTSUPP, diagnostic,
3485 		    ksi->ks_in_serial);
3486 		break;
3487 	default:
3488 		sadb_pfkey_error(esp_pfkey_q, mp, EINVAL,
3489 		    SADB_X_DIAGNOSTIC_UNKNOWN_MSG, ksi->ks_in_serial);
3490 		break;
3491 	}
3492 }
3493 
3494 /*
3495  * Handle case where PF_KEY says it can't find a keysock for one of my
3496  * ACQUIRE messages.
3497  */
3498 static void
3499 esp_keysock_no_socket(mblk_t *mp)
3500 {
3501 	sadb_msg_t *samsg;
3502 	keysock_out_err_t *kse = (keysock_out_err_t *)mp->b_rptr;
3503 
3504 	if (mp->b_cont == NULL) {
3505 		freemsg(mp);
3506 		return;
3507 	}
3508 	samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
3509 
3510 	/*
3511 	 * If keysock can't find any registered, delete the acquire record
3512 	 * immediately, and handle errors.
3513 	 */
3514 	if (samsg->sadb_msg_type == SADB_ACQUIRE) {
3515 		samsg->sadb_msg_errno = kse->ks_err_errno;
3516 		samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
3517 		/*
3518 		 * Use the write-side of the esp_pfkey_q, in case there is
3519 		 * no esp_sadb.s_ip_q.
3520 		 */
3521 		sadb_in_acquire(samsg, &esp_sadb, WR(esp_pfkey_q));
3522 	}
3523 
3524 	freemsg(mp);
3525 }
3526 
3527 /*
3528  * First-cut reality check for an inbound PF_KEY message.
3529  */
3530 static boolean_t
3531 esp_pfkey_reality_failures(mblk_t *mp, keysock_in_t *ksi)
3532 {
3533 	int diagnostic;
3534 
3535 	if (ksi->ks_in_extv[SADB_EXT_PROPOSAL] != NULL) {
3536 		diagnostic = SADB_X_DIAGNOSTIC_PROP_PRESENT;
3537 		goto badmsg;
3538 	}
3539 	if (ksi->ks_in_extv[SADB_EXT_SUPPORTED_AUTH] != NULL ||
3540 	    ksi->ks_in_extv[SADB_EXT_SUPPORTED_ENCRYPT] != NULL) {
3541 		diagnostic = SADB_X_DIAGNOSTIC_SUPP_PRESENT;
3542 		goto badmsg;
3543 	}
3544 	if (ksi->ks_in_srctype == KS_IN_ADDR_MBCAST) {
3545 		diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC;
3546 		goto badmsg;
3547 	}
3548 	if (ksi->ks_in_dsttype == KS_IN_ADDR_UNSPEC) {
3549 		diagnostic = SADB_X_DIAGNOSTIC_BAD_DST;
3550 		goto badmsg;
3551 	}
3552 
3553 	return (B_FALSE);	/* False ==> no failures */
3554 
3555 badmsg:
3556 	sadb_pfkey_error(esp_pfkey_q, mp, EINVAL, diagnostic,
3557 	    ksi->ks_in_serial);
3558 	return (B_TRUE);	/* True ==> failures */
3559 }
3560 
3561 /*
3562  * ESP module write put routine.
3563  */
3564 static void
3565 ipsecesp_wput(queue_t *q, mblk_t *mp)
3566 {
3567 	ipsec_info_t *ii;
3568 	keysock_in_t *ksi;
3569 	int rc;
3570 	struct iocblk *iocp;
3571 
3572 	esp3dbg(("In esp_wput().\n"));
3573 
3574 	/* NOTE: Each case must take care of freeing or passing mp. */
3575 	switch (mp->b_datap->db_type) {
3576 	case M_CTL:
3577 		if ((mp->b_wptr - mp->b_rptr) < sizeof (ipsec_info_t)) {
3578 			/* Not big enough message. */
3579 			freemsg(mp);
3580 			break;
3581 		}
3582 		ii = (ipsec_info_t *)mp->b_rptr;
3583 
3584 		switch (ii->ipsec_info_type) {
3585 		case KEYSOCK_OUT_ERR:
3586 			esp1dbg(("Got KEYSOCK_OUT_ERR message.\n"));
3587 			esp_keysock_no_socket(mp);
3588 			break;
3589 		case KEYSOCK_IN:
3590 			ESP_BUMP_STAT(keysock_in);
3591 			esp3dbg(("Got KEYSOCK_IN message.\n"));
3592 			ksi = (keysock_in_t *)ii;
3593 			/*
3594 			 * Some common reality checks.
3595 			 */
3596 
3597 			if (esp_pfkey_reality_failures(mp, ksi))
3598 				return;
3599 
3600 			/*
3601 			 * Use 'q' instead of esp_sadb.s_ip_q, since
3602 			 * it's the write side already, and it'll go
3603 			 * down to IP.  Use esp_pfkey_q because we
3604 			 * wouldn't get here if that weren't set, and
3605 			 * the RD(q) has been done already.
3606 			 */
3607 			if (ksi->ks_in_srctype == KS_IN_ADDR_UNKNOWN) {
3608 				rc = sadb_addrcheck(q, esp_pfkey_q, mp,
3609 				    ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC],
3610 				    ksi->ks_in_serial);
3611 				if (rc == KS_IN_ADDR_UNKNOWN)
3612 					return;
3613 				else
3614 					ksi->ks_in_srctype = rc;
3615 			}
3616 			if (ksi->ks_in_dsttype == KS_IN_ADDR_UNKNOWN) {
3617 				rc = sadb_addrcheck(q, esp_pfkey_q, mp,
3618 				    ksi->ks_in_extv[SADB_EXT_ADDRESS_DST],
3619 				    ksi->ks_in_serial);
3620 				if (rc == KS_IN_ADDR_UNKNOWN)
3621 					return;
3622 				else
3623 					ksi->ks_in_dsttype = rc;
3624 			}
3625 			/*
3626 			 * XXX Proxy may be a different address family.
3627 			 */
3628 			if (ksi->ks_in_proxytype == KS_IN_ADDR_UNKNOWN) {
3629 				rc = sadb_addrcheck(q, esp_pfkey_q, mp,
3630 				    ksi->ks_in_extv[SADB_EXT_ADDRESS_PROXY],
3631 				    ksi->ks_in_serial);
3632 				if (rc == KS_IN_ADDR_UNKNOWN)
3633 					return;
3634 				else
3635 					ksi->ks_in_proxytype = rc;
3636 			}
3637 			esp_parse_pfkey(mp);
3638 			break;
3639 		case KEYSOCK_HELLO:
3640 			sadb_keysock_hello(&esp_pfkey_q, q, mp,
3641 			    esp_ager, &esp_event, SADB_SATYPE_ESP);
3642 			break;
3643 		default:
3644 			esp2dbg(("Got M_CTL from above of 0x%x.\n",
3645 			    ii->ipsec_info_type));
3646 			freemsg(mp);
3647 			break;
3648 		}
3649 		break;
3650 	case M_IOCTL:
3651 		iocp = (struct iocblk *)mp->b_rptr;
3652 		switch (iocp->ioc_cmd) {
3653 		case ND_SET:
3654 		case ND_GET:
3655 			if (nd_getset(q, ipsecesp_g_nd, mp)) {
3656 				qreply(q, mp);
3657 				return;
3658 			} else {
3659 				iocp->ioc_error = ENOENT;
3660 			}
3661 			/* FALLTHRU */
3662 		default:
3663 			/* We really don't support any other ioctls, do we? */
3664 
3665 			/* Return EINVAL */
3666 			if (iocp->ioc_error != ENOENT)
3667 				iocp->ioc_error = EINVAL;
3668 			iocp->ioc_count = 0;
3669 			mp->b_datap->db_type = M_IOCACK;
3670 			qreply(q, mp);
3671 			return;
3672 		}
3673 	default:
3674 		esp3dbg(("Got default message, type %d, passing to IP.\n",
3675 		    mp->b_datap->db_type));
3676 		putnext(q, mp);
3677 	}
3678 }
3679 
3680 /* EXPORT DELETE START */
3681 /*
3682  * Process an outbound ESP packet that can be accelerated by a IPsec
3683  * hardware acceleration capable Provider.
3684  * The caller already inserted and initialized the ESP header.
3685  * This function allocates a tagging M_CTL, and adds room at the end
3686  * of the packet to hold the ICV if authentication is needed.
3687  *
3688  * On success returns B_TRUE, on failure returns B_FALSE and frees the
3689  * mblk chain ipsec_out.
3690  */
3691 static ipsec_status_t
3692 esp_outbound_accelerated(mblk_t *ipsec_out, uint_t icv_len)
3693 {
3694 	ipsec_out_t *io;
3695 	mblk_t *lastmp;
3696 
3697 	ESP_BUMP_STAT(out_accelerated);
3698 
3699 	io = (ipsec_out_t *)ipsec_out->b_rptr;
3700 
3701 	/* mark packet as being accelerated in IPSEC_OUT */
3702 	ASSERT(io->ipsec_out_accelerated == B_FALSE);
3703 	io->ipsec_out_accelerated = B_TRUE;
3704 
3705 	/*
3706 	 * add room at the end of the packet for the ICV if needed
3707 	 */
3708 	if (icv_len > 0) {
3709 		/* go to last mblk */
3710 		lastmp = ipsec_out;	/* For following while loop. */
3711 		do {
3712 			lastmp = lastmp->b_cont;
3713 		} while (lastmp->b_cont != NULL);
3714 
3715 		/* if not enough available room, allocate new mblk */
3716 		if ((lastmp->b_wptr + icv_len) > lastmp->b_datap->db_lim) {
3717 			lastmp->b_cont = allocb(icv_len, BPRI_HI);
3718 			if (lastmp->b_cont == NULL) {
3719 				ESP_BUMP_STAT(out_discards);
3720 				ip_drop_packet(ipsec_out, B_FALSE, NULL, NULL,
3721 				    &ipdrops_esp_nomem, &esp_dropper);
3722 				return (IPSEC_STATUS_FAILED);
3723 			}
3724 			lastmp = lastmp->b_cont;
3725 		}
3726 		lastmp->b_wptr += icv_len;
3727 	}
3728 
3729 	return (IPSEC_STATUS_SUCCESS);
3730 }
3731 
3732 /*
3733  * Process an inbound accelerated ESP packet.
3734  * On success returns B_TRUE, on failure returns B_FALSE and frees the
3735  * mblk chain ipsec_in.
3736  */
3737 static ipsec_status_t
3738 esp_inbound_accelerated(mblk_t *ipsec_in, mblk_t *data_mp, boolean_t isv4,
3739     ipsa_t *assoc)
3740 {
3741 	ipsec_in_t *ii;
3742 	mblk_t *hada_mp;
3743 	uint32_t icv_len = 0;
3744 	da_ipsec_t *hada;
3745 	ipha_t *ipha;
3746 	ip6_t *ip6h;
3747 	kstat_named_t *counter;
3748 
3749 	ESP_BUMP_STAT(in_accelerated);
3750 
3751 	ii = (ipsec_in_t *)ipsec_in->b_rptr;
3752 	hada_mp = ii->ipsec_in_da;
3753 	ASSERT(hada_mp != NULL);
3754 	hada = (da_ipsec_t *)hada_mp->b_rptr;
3755 
3756 	/*
3757 	 * We only support one level of decapsulation in hardware, so
3758 	 * nuke the pointer.
3759 	 */
3760 	ii->ipsec_in_da = NULL;
3761 	ii->ipsec_in_accelerated = B_FALSE;
3762 
3763 	if (assoc->ipsa_auth_alg != IPSA_AALG_NONE) {
3764 		/*
3765 		 * ESP with authentication. We expect the Provider to have
3766 		 * computed the ICV and placed it in the hardware acceleration
3767 		 * data attributes.
3768 		 *
3769 		 * Extract ICV length from attributes M_CTL and sanity check
3770 		 * its value. We allow the mblk to be smaller than da_ipsec_t
3771 		 * for a small ICV, as long as the entire ICV fits within the
3772 		 * mblk.
3773 		 *
3774 		 * Also ensures that the ICV length computed by Provider
3775 		 * corresponds to the ICV length of the agorithm specified by
3776 		 * the SA.
3777 		 */
3778 		icv_len = hada->da_icv_len;
3779 		if ((icv_len != assoc->ipsa_mac_len) ||
3780 		    (icv_len > DA_ICV_MAX_LEN) || (MBLKL(hada_mp) <
3781 			(sizeof (da_ipsec_t) - DA_ICV_MAX_LEN + icv_len))) {
3782 			esp0dbg(("esp_inbound_accelerated: "
3783 			    "ICV len (%u) incorrect or mblk too small (%u)\n",
3784 			    icv_len, (uint32_t)(MBLKL(hada_mp))));
3785 			counter = &ipdrops_esp_bad_auth;
3786 			goto esp_in_discard;
3787 		}
3788 	}
3789 
3790 	/* get pointers to IP header */
3791 	if (isv4) {
3792 		ipha = (ipha_t *)data_mp->b_rptr;
3793 	} else {
3794 		ip6h = (ip6_t *)data_mp->b_rptr;
3795 	}
3796 
3797 	/*
3798 	 * Compare ICV in ESP packet vs ICV computed by adapter.
3799 	 * We also remove the ICV from the end of the packet since
3800 	 * it will no longer be needed.
3801 	 *
3802 	 * Assume that esp_inbound() already ensured that the pkt
3803 	 * was in one mblk.
3804 	 */
3805 	ASSERT(data_mp->b_cont == NULL);
3806 	data_mp->b_wptr -= icv_len;
3807 	/* adjust IP header */
3808 	if (isv4)
3809 		ipha->ipha_length = htons(ntohs(ipha->ipha_length) - icv_len);
3810 	else
3811 		ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - icv_len);
3812 	if (icv_len && bcmp(hada->da_icv, data_mp->b_wptr, icv_len)) {
3813 		int af;
3814 		void *addr;
3815 
3816 		if (isv4) {
3817 			addr = &ipha->ipha_dst;
3818 			af = AF_INET;
3819 		} else {
3820 			addr = &ip6h->ip6_dst;
3821 			af = AF_INET6;
3822 		}
3823 
3824 		/*
3825 		 * Log the event. Don't print to the console, block
3826 		 * potential denial-of-service attack.
3827 		 */
3828 		ESP_BUMP_STAT(bad_auth);
3829 		ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN,
3830 		    "ESP Authentication failed spi %x, dst_addr %s",
3831 		    assoc->ipsa_spi, addr, af);
3832 		counter = &ipdrops_esp_bad_auth;
3833 		goto esp_in_discard;
3834 	}
3835 
3836 	esp3dbg(("esp_inbound_accelerated: ESP authentication succeeded, "
3837 	    "checking replay\n"));
3838 
3839 	ipsec_in->b_cont = data_mp;
3840 
3841 	/*
3842 	 * Remove ESP header and padding from packet.
3843 	 */
3844 	if (!esp_strip_header(data_mp, ii->ipsec_in_v4, assoc->ipsa_iv_len,
3845 		&counter)) {
3846 		esp1dbg(("esp_inbound_accelerated: "
3847 		    "esp_strip_header() failed\n"));
3848 		goto esp_in_discard;
3849 	}
3850 
3851 	freeb(hada_mp);
3852 
3853 	/*
3854 	 * Account for usage..
3855 	 */
3856 	if (!esp_age_bytes(assoc, msgdsize(data_mp), B_TRUE)) {
3857 		/* The ipsa has hit hard expiration, LOG and AUDIT. */
3858 		ESP_BUMP_STAT(bytes_expired);
3859 		IP_ESP_BUMP_STAT(in_discards);
3860 		ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN,
3861 		    "ESP association 0x%x, dst %s had bytes expire.\n",
3862 		    assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam);
3863 		ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL,
3864 		    &ipdrops_esp_bytes_expire, &esp_dropper);
3865 		return (IPSEC_STATUS_FAILED);
3866 	}
3867 
3868 	/* done processing the packet */
3869 	return (IPSEC_STATUS_SUCCESS);
3870 
3871 esp_in_discard:
3872 	IP_ESP_BUMP_STAT(in_discards);
3873 	freeb(hada_mp);
3874 
3875 	ipsec_in->b_cont = data_mp;	/* For ip_drop_packet()'s sake... */
3876 	ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL, counter, &esp_dropper);
3877 
3878 	return (IPSEC_STATUS_FAILED);
3879 }
3880 /* EXPORT DELETE END */
3881 
3882 /*
3883  * Wrapper to allow IP to trigger an ESP association failure message
3884  * during inbound SA selection.
3885  */
3886 void
3887 ipsecesp_in_assocfailure(mblk_t *mp, char level, ushort_t sl, char *fmt,
3888     uint32_t spi, void *addr, int af)
3889 {
3890 	if (ipsecesp_log_unknown_spi) {
3891 		ipsec_assocfailure(info.mi_idnum, 0, level, sl, fmt, spi,
3892 		    addr, af);
3893 	}
3894 
3895 	ip_drop_packet(mp, B_TRUE, NULL, NULL, &ipdrops_esp_no_sa,
3896 	    &esp_dropper);
3897 }
3898 
3899 /*
3900  * Initialize the ESP input and output processing functions.
3901  */
3902 void
3903 ipsecesp_init_funcs(ipsa_t *sa)
3904 {
3905 	if (sa->ipsa_output_func == NULL)
3906 		sa->ipsa_output_func = esp_outbound;
3907 	if (sa->ipsa_input_func == NULL)
3908 		sa->ipsa_input_func = esp_inbound;
3909 }
3910