xref: /titanic_41/usr/src/uts/common/inet/ip/ipsecah.c (revision 864221ad7169608e293fbeaa9df563afc9f345a0)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/stream.h>
30 #include <sys/stropts.h>
31 #include <sys/errno.h>
32 #include <sys/strlog.h>
33 #include <sys/tihdr.h>
34 #include <sys/socket.h>
35 #include <sys/ddi.h>
36 #include <sys/sunddi.h>
37 #include <sys/kmem.h>
38 #include <sys/zone.h>
39 #include <sys/sysmacros.h>
40 #include <sys/cmn_err.h>
41 #include <sys/vtrace.h>
42 #include <sys/debug.h>
43 #include <sys/atomic.h>
44 #include <sys/strsun.h>
45 #include <sys/random.h>
46 #include <netinet/in.h>
47 #include <net/if.h>
48 #include <netinet/ip6.h>
49 #include <netinet/icmp6.h>
50 #include <net/pfkeyv2.h>
51 
52 #include <inet/common.h>
53 #include <inet/mi.h>
54 #include <inet/ip.h>
55 #include <inet/ip6.h>
56 #include <inet/nd.h>
57 #include <inet/ipsec_info.h>
58 #include <inet/ipsec_impl.h>
59 #include <inet/sadb.h>
60 #include <inet/ipsecah.h>
61 #include <inet/ipsec_impl.h>
62 #include <inet/ipdrop.h>
63 #include <sys/taskq.h>
64 #include <sys/policy.h>
65 #include <sys/iphada.h>
66 #include <sys/strsun.h>
67 
68 #include <sys/crypto/common.h>
69 #include <sys/crypto/api.h>
70 #include <sys/kstat.h>
71 #include <sys/strsubr.h>
72 
73 /*
74  * Table of ND variables supported by ipsecah. These are loaded into
75  * ipsecah_g_nd in ipsecah_init_nd.
76  * All of these are alterable, within the min/max values given, at run time.
77  */
78 static	ipsecahparam_t	lcl_param_arr[] = {
79 	/* min	max			value	name */
80 	{ 0,	3,			0,	"ipsecah_debug"},
81 	{ 125,	32000, SADB_AGE_INTERVAL_DEFAULT,	"ipsecah_age_interval"},
82 	{ 1,	10,			1,	"ipsecah_reap_delay"},
83 	{ 1,	SADB_MAX_REPLAY,	64,	"ipsecah_replay_size"},
84 	{ 1,	300,			15,	"ipsecah_acquire_timeout"},
85 	{ 1,	1800,			90,	"ipsecah_larval_timeout"},
86 	/* Default lifetime values for ACQUIRE messages. */
87 	{ 0,	0xffffffffU,		0,	"ipsecah_default_soft_bytes"},
88 	{ 0,	0xffffffffU,		0,	"ipsecah_default_hard_bytes"},
89 	{ 0,	0xffffffffU,		24000,	"ipsecah_default_soft_addtime"},
90 	{ 0,	0xffffffffU,		28800,	"ipsecah_default_hard_addtime"},
91 	{ 0,	0xffffffffU,		0,	"ipsecah_default_soft_usetime"},
92 	{ 0,	0xffffffffU,		0,	"ipsecah_default_hard_usetime"},
93 	{ 0,	1,			0,	"ipsecah_log_unknown_spi"},
94 };
95 #define	ipsecah_debug			ipsecah_params[0].ipsecah_param_value
96 #define	ipsecah_age_interval		ipsecah_params[1].ipsecah_param_value
97 #define	ipsecah_age_int_max		ipsecah_params[1].ipsecah_param_max
98 #define	ipsecah_reap_delay		ipsecah_params[2].ipsecah_param_value
99 #define	ipsecah_replay_size		ipsecah_params[3].ipsecah_param_value
100 #define	ipsecah_acquire_timeout		ipsecah_params[4].ipsecah_param_value
101 #define	ipsecah_larval_timeout		ipsecah_params[5].ipsecah_param_value
102 #define	ipsecah_default_soft_bytes	ipsecah_params[6].ipsecah_param_value
103 #define	ipsecah_default_hard_bytes	ipsecah_params[7].ipsecah_param_value
104 #define	ipsecah_default_soft_addtime	ipsecah_params[8].ipsecah_param_value
105 #define	ipsecah_default_hard_addtime	ipsecah_params[9].ipsecah_param_value
106 #define	ipsecah_default_soft_usetime	ipsecah_params[10].ipsecah_param_value
107 #define	ipsecah_default_hard_usetime	ipsecah_params[11].ipsecah_param_value
108 #define	ipsecah_log_unknown_spi		ipsecah_params[12].ipsecah_param_value
109 
110 #define	ah0dbg(a)	printf a
111 /* NOTE:  != 0 instead of > 0 so lint doesn't complain. */
112 #define	ah1dbg(ahstack, a)	if (ahstack->ipsecah_debug != 0) printf a
113 #define	ah2dbg(ahstack, a)	if (ahstack->ipsecah_debug > 1) printf a
114 #define	ah3dbg(ahstack, a)	if (ahstack->ipsecah_debug > 2) printf a
115 
116 /*
117  * XXX This is broken. Padding should be determined dynamically
118  * depending on the ICV size and IP version number so that the
119  * total AH header size is a multiple of 32 bits or 64 bits
120  * for V4 and V6 respectively. For 96bit ICVs we have no problems.
121  * Anything different from that, we need to fix our code.
122  */
123 #define	IPV4_PADDING_ALIGN	0x04	/* Multiple of 32 bits */
124 #define	IPV6_PADDING_ALIGN	0x04	/* Multiple of 32 bits */
125 
126 /*
127  * Helper macro. Avoids a call to msgdsize if there is only one
128  * mblk in the chain.
129  */
130 #define	AH_MSGSIZE(mp) ((mp)->b_cont != NULL ? msgdsize(mp) : MBLKL(mp))
131 
132 
133 static ipsec_status_t ah_auth_out_done(mblk_t *);
134 static ipsec_status_t ah_auth_in_done(mblk_t *);
135 static mblk_t *ah_process_ip_options_v4(mblk_t *, ipsa_t *, int *, uint_t,
136     boolean_t, ipsecah_stack_t *);
137 static mblk_t *ah_process_ip_options_v6(mblk_t *, ipsa_t *, int *, uint_t,
138     boolean_t, ipsecah_stack_t *);
139 static void ah_getspi(mblk_t *, keysock_in_t *, ipsecah_stack_t *);
140 static ipsec_status_t ah_inbound_accelerated(mblk_t *, boolean_t, ipsa_t *,
141     uint32_t);
142 static ipsec_status_t ah_outbound_accelerated_v4(mblk_t *, ipsa_t *);
143 static ipsec_status_t ah_outbound_accelerated_v6(mblk_t *, ipsa_t *);
144 static ipsec_status_t ah_outbound(mblk_t *);
145 
146 static int ipsecah_open(queue_t *, dev_t *, int, int, cred_t *);
147 static int ipsecah_close(queue_t *);
148 static void ipsecah_rput(queue_t *, mblk_t *);
149 static void ipsecah_wput(queue_t *, mblk_t *);
150 static void ah_send_acquire(ipsacq_t *, mblk_t *, netstack_t *);
151 static boolean_t ah_register_out(uint32_t, uint32_t, uint_t, ipsecah_stack_t *);
152 static void	*ipsecah_stack_init(netstackid_t stackid, netstack_t *ns);
153 static void	ipsecah_stack_fini(netstackid_t stackid, void *arg);
154 
155 /* Setable in /etc/system */
156 uint32_t ah_hash_size = IPSEC_DEFAULT_HASH_SIZE;
157 
158 static taskq_t *ah_taskq;
159 
160 static struct module_info info = {
161 	5136, "ipsecah", 0, INFPSZ, 65536, 1024
162 };
163 
164 static struct qinit rinit = {
165 	(pfi_t)ipsecah_rput, NULL, ipsecah_open, ipsecah_close, NULL, &info,
166 	NULL
167 };
168 
169 static struct qinit winit = {
170 	(pfi_t)ipsecah_wput, NULL, ipsecah_open, ipsecah_close, NULL, &info,
171 	NULL
172 };
173 
174 struct streamtab ipsecahinfo = {
175 	&rinit, &winit, NULL, NULL
176 };
177 
178 static int ah_kstat_update(kstat_t *, int);
179 
180 uint64_t ipsacq_maxpackets = IPSACQ_MAXPACKETS;
181 
182 static boolean_t
183 ah_kstat_init(ipsecah_stack_t *ahstack, netstackid_t stackid)
184 {
185 	ipsec_stack_t	*ipss = ahstack->ipsecah_netstack->netstack_ipsec;
186 
187 	ahstack->ah_ksp = kstat_create_netstack("ipsecah", 0, "ah_stat", "net",
188 	    KSTAT_TYPE_NAMED, sizeof (ah_kstats_t) / sizeof (kstat_named_t),
189 	    KSTAT_FLAG_PERSISTENT, stackid);
190 
191 	if (ahstack->ah_ksp == NULL || ahstack->ah_ksp->ks_data == NULL)
192 		return (B_FALSE);
193 
194 	ahstack->ah_kstats = ahstack->ah_ksp->ks_data;
195 
196 	ahstack->ah_ksp->ks_update = ah_kstat_update;
197 	ahstack->ah_ksp->ks_private = (void *)(uintptr_t)stackid;
198 
199 #define	K64 KSTAT_DATA_UINT64
200 #define	KI(x) kstat_named_init(&(ahstack->ah_kstats->ah_stat_##x), #x, K64)
201 
202 	KI(num_aalgs);
203 	KI(good_auth);
204 	KI(bad_auth);
205 	KI(replay_failures);
206 	KI(replay_early_failures);
207 	KI(keysock_in);
208 	KI(out_requests);
209 	KI(acquire_requests);
210 	KI(bytes_expired);
211 	KI(out_discards);
212 	KI(in_accelerated);
213 	KI(out_accelerated);
214 	KI(noaccel);
215 	KI(crypto_sync);
216 	KI(crypto_async);
217 	KI(crypto_failures);
218 
219 #undef KI
220 #undef K64
221 
222 	kstat_install(ahstack->ah_ksp);
223 	IP_ACQUIRE_STAT(ipss, maxpackets, ipsacq_maxpackets);
224 	return (B_TRUE);
225 }
226 
227 static int
228 ah_kstat_update(kstat_t *kp, int rw)
229 {
230 	ah_kstats_t	*ekp;
231 	netstackid_t	stackid = (netstackid_t)(uintptr_t)kp->ks_private;
232 	netstack_t	*ns;
233 	ipsec_stack_t	*ipss;
234 
235 	if ((kp == NULL) || (kp->ks_data == NULL))
236 		return (EIO);
237 
238 	if (rw == KSTAT_WRITE)
239 		return (EACCES);
240 
241 	ns = netstack_find_by_stackid(stackid);
242 	if (ns == NULL)
243 		return (-1);
244 	ipss = ns->netstack_ipsec;
245 	if (ipss == NULL) {
246 		netstack_rele(ns);
247 		return (-1);
248 	}
249 	ekp = (ah_kstats_t *)kp->ks_data;
250 
251 	mutex_enter(&ipss->ipsec_alg_lock);
252 	ekp->ah_stat_num_aalgs.value.ui64 = ipss->ipsec_nalgs[IPSEC_ALG_AUTH];
253 	mutex_exit(&ipss->ipsec_alg_lock);
254 
255 	netstack_rele(ns);
256 	return (0);
257 }
258 
259 /*
260  * Don't have to lock ipsec_age_interval, as only one thread will access it at
261  * a time, because I control the one function that does a qtimeout() on
262  * ah_pfkey_q.
263  */
264 static void
265 ah_ager(void *arg)
266 {
267 	ipsecah_stack_t *ahstack = (ipsecah_stack_t *)arg;
268 	netstack_t	*ns = ahstack->ipsecah_netstack;
269 	hrtime_t begin = gethrtime();
270 
271 	sadb_ager(&ahstack->ah_sadb.s_v4, ahstack->ah_pfkey_q,
272 	    ahstack->ah_sadb.s_ip_q, ahstack->ipsecah_reap_delay, ns);
273 	sadb_ager(&ahstack->ah_sadb.s_v6, ahstack->ah_pfkey_q,
274 	    ahstack->ah_sadb.s_ip_q, ahstack->ipsecah_reap_delay, ns);
275 
276 	ahstack->ah_event = sadb_retimeout(begin, ahstack->ah_pfkey_q,
277 	    ah_ager, ahstack,
278 	    &ahstack->ipsecah_age_interval, ahstack->ipsecah_age_int_max,
279 	    info.mi_idnum);
280 }
281 
282 /*
283  * Get an AH NDD parameter.
284  */
285 /* ARGSUSED */
286 static int
287 ipsecah_param_get(q, mp, cp, cr)
288 	queue_t	*q;
289 	mblk_t	*mp;
290 	caddr_t	cp;
291 	cred_t *cr;
292 {
293 	ipsecahparam_t	*ipsecahpa = (ipsecahparam_t *)cp;
294 	uint_t value;
295 	ipsecah_stack_t	*ahstack = (ipsecah_stack_t *)q->q_ptr;
296 
297 	mutex_enter(&ahstack->ipsecah_param_lock);
298 	value = ipsecahpa->ipsecah_param_value;
299 	mutex_exit(&ahstack->ipsecah_param_lock);
300 
301 	(void) mi_mpprintf(mp, "%u", value);
302 	return (0);
303 }
304 
305 /*
306  * This routine sets an NDD variable in a ipsecahparam_t structure.
307  */
308 /* ARGSUSED */
309 static int
310 ipsecah_param_set(q, mp, value, cp, cr)
311 	queue_t	*q;
312 	mblk_t	*mp;
313 	char	*value;
314 	caddr_t	cp;
315 	cred_t *cr;
316 {
317 	ulong_t	new_value;
318 	ipsecahparam_t	*ipsecahpa = (ipsecahparam_t *)cp;
319 	ipsecah_stack_t	*ahstack = (ipsecah_stack_t *)q->q_ptr;
320 
321 	/*
322 	 * Fail the request if the new value does not lie within the
323 	 * required bounds.
324 	 */
325 	if (ddi_strtoul(value, NULL, 10, &new_value) != 0 ||
326 	    new_value < ipsecahpa->ipsecah_param_min ||
327 	    new_value > ipsecahpa->ipsecah_param_max) {
328 		    return (EINVAL);
329 	}
330 
331 	/* Set the new value */
332 	mutex_enter(&ahstack->ipsecah_param_lock);
333 	ipsecahpa->ipsecah_param_value = new_value;
334 	mutex_exit(&ahstack->ipsecah_param_lock);
335 	return (0);
336 }
337 
338 /*
339  * Using lifetime NDD variables, fill in an extended combination's
340  * lifetime information.
341  */
342 void
343 ipsecah_fill_defs(sadb_x_ecomb_t *ecomb, netstack_t *ns)
344 {
345 	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
346 
347 	ecomb->sadb_x_ecomb_soft_bytes = ahstack->ipsecah_default_soft_bytes;
348 	ecomb->sadb_x_ecomb_hard_bytes = ahstack->ipsecah_default_hard_bytes;
349 	ecomb->sadb_x_ecomb_soft_addtime =
350 	    ahstack->ipsecah_default_soft_addtime;
351 	ecomb->sadb_x_ecomb_hard_addtime =
352 	    ahstack->ipsecah_default_hard_addtime;
353 	ecomb->sadb_x_ecomb_soft_usetime =
354 	    ahstack->ipsecah_default_soft_usetime;
355 	ecomb->sadb_x_ecomb_hard_usetime =
356 	    ahstack->ipsecah_default_hard_usetime;
357 }
358 
359 /*
360  * Initialize things for AH at module load time.
361  */
362 boolean_t
363 ipsecah_ddi_init(void)
364 {
365 	ah_taskq = taskq_create("ah_taskq", 1, minclsyspri,
366 	    IPSEC_TASKQ_MIN, IPSEC_TASKQ_MAX, 0);
367 
368 	/*
369 	 * We want to be informed each time a stack is created or
370 	 * destroyed in the kernel, so we can maintain the
371 	 * set of ipsecah_stack_t's.
372 	 */
373 	netstack_register(NS_IPSECAH, ipsecah_stack_init, NULL,
374 	    ipsecah_stack_fini);
375 
376 	return (B_TRUE);
377 }
378 
379 /*
380  * Walk through the param array specified registering each element with the
381  * named dispatch handler.
382  */
383 static boolean_t
384 ipsecah_param_register(IDP *ndp, ipsecahparam_t *ahp, int cnt)
385 {
386 	for (; cnt-- > 0; ahp++) {
387 		if (ahp->ipsecah_param_name != NULL &&
388 		    ahp->ipsecah_param_name[0]) {
389 			if (!nd_load(ndp,
390 			    ahp->ipsecah_param_name,
391 			    ipsecah_param_get, ipsecah_param_set,
392 			    (caddr_t)ahp)) {
393 				nd_free(ndp);
394 				return (B_FALSE);
395 			}
396 		}
397 	}
398 	return (B_TRUE);
399 }
400 
401 /*
402  * Initialize things for AH for each stack instance
403  */
404 static void *
405 ipsecah_stack_init(netstackid_t stackid, netstack_t *ns)
406 {
407 	ipsecah_stack_t	*ahstack;
408 	ipsecahparam_t	*ahp;
409 
410 	ahstack = (ipsecah_stack_t *)kmem_zalloc(sizeof (*ahstack), KM_SLEEP);
411 	ahstack->ipsecah_netstack = ns;
412 
413 	ahp = (ipsecahparam_t *)kmem_alloc(sizeof (lcl_param_arr), KM_SLEEP);
414 	ahstack->ipsecah_params = ahp;
415 	bcopy(lcl_param_arr, ahp, sizeof (lcl_param_arr));
416 
417 	(void) ipsecah_param_register(&ahstack->ipsecah_g_nd, ahp,
418 	    A_CNT(lcl_param_arr));
419 
420 	(void) ah_kstat_init(ahstack, stackid);
421 
422 	ahstack->ah_sadb.s_acquire_timeout = &ahstack->ipsecah_acquire_timeout;
423 	ahstack->ah_sadb.s_acqfn = ah_send_acquire;
424 	sadbp_init("AH", &ahstack->ah_sadb, SADB_SATYPE_AH, ah_hash_size,
425 	    ahstack->ipsecah_netstack);
426 
427 	mutex_init(&ahstack->ipsecah_param_lock, NULL, MUTEX_DEFAULT, 0);
428 
429 	ip_drop_register(&ahstack->ah_dropper, "IPsec AH");
430 	return (ahstack);
431 }
432 
433 /*
434  * Destroy things for AH at module unload time.
435  */
436 void
437 ipsecah_ddi_destroy(void)
438 {
439 	netstack_unregister(NS_IPSECAH);
440 	taskq_destroy(ah_taskq);
441 }
442 
443 /*
444  * Destroy things for AH for one stack... Never called?
445  */
446 static void
447 ipsecah_stack_fini(netstackid_t stackid, void *arg)
448 {
449 	ipsecah_stack_t *ahstack = (ipsecah_stack_t *)arg;
450 
451 	if (ahstack->ah_pfkey_q != NULL) {
452 		(void) quntimeout(ahstack->ah_pfkey_q, ahstack->ah_event);
453 	}
454 	ahstack->ah_sadb.s_acqfn = NULL;
455 	ahstack->ah_sadb.s_acquire_timeout = NULL;
456 	sadbp_destroy(&ahstack->ah_sadb, ahstack->ipsecah_netstack);
457 	ip_drop_unregister(&ahstack->ah_dropper);
458 	mutex_destroy(&ahstack->ipsecah_param_lock);
459 	nd_free(&ahstack->ipsecah_g_nd);
460 
461 	kmem_free(ahstack->ipsecah_params, sizeof (lcl_param_arr));
462 	ahstack->ipsecah_params = NULL;
463 	kstat_delete_netstack(ahstack->ah_ksp, stackid);
464 	ahstack->ah_ksp = NULL;
465 	ahstack->ah_kstats = NULL;
466 
467 	kmem_free(ahstack, sizeof (*ahstack));
468 }
469 
470 /*
471  * AH module open routine. The module should be opened by keysock.
472  */
473 /* ARGSUSED */
474 static int
475 ipsecah_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
476 {
477 	netstack_t	*ns;
478 	ipsecah_stack_t	*ahstack;
479 
480 	if (secpolicy_ip_config(credp, B_FALSE) != 0) {
481 		ah0dbg(("Non-privileged user trying to open ipsecah.\n"));
482 		return (EPERM);
483 	}
484 
485 	if (q->q_ptr != NULL)
486 		return (0);  /* Re-open of an already open instance. */
487 
488 	if (sflag != MODOPEN)
489 		return (EINVAL);
490 
491 	ns = netstack_find_by_cred(credp);
492 	ASSERT(ns != NULL);
493 	ahstack = ns->netstack_ipsecah;
494 	ASSERT(ahstack != NULL);
495 
496 	/*
497 	 * ASSUMPTIONS (because I'm MT_OCEXCL):
498 	 *
499 	 *	* I'm being pushed on top of IP for all my opens (incl. #1).
500 	 *	* Only ipsecah_open() can write into ah_sadb.s_ip_q.
501 	 *	* Because of this, I can check lazily for ah_sadb.s_ip_q.
502 	 *
503 	 *  If these assumptions are wrong, I'm in BIG trouble...
504 	 */
505 
506 	q->q_ptr = ahstack;
507 	WR(q)->q_ptr = q->q_ptr;
508 
509 	if (ahstack->ah_sadb.s_ip_q == NULL) {
510 		struct T_unbind_req *tur;
511 
512 		ahstack->ah_sadb.s_ip_q = WR(q);
513 		/* Allocate an unbind... */
514 		ahstack->ah_ip_unbind = allocb(sizeof (struct T_unbind_req),
515 		    BPRI_HI);
516 
517 		/*
518 		 * Send down T_BIND_REQ to bind IPPROTO_AH.
519 		 * Handle the ACK here in AH.
520 		 */
521 		qprocson(q);
522 		if (ahstack->ah_ip_unbind == NULL ||
523 		    !sadb_t_bind_req(ahstack->ah_sadb.s_ip_q, IPPROTO_AH)) {
524 			if (ahstack->ah_ip_unbind != NULL) {
525 				freeb(ahstack->ah_ip_unbind);
526 				ahstack->ah_ip_unbind = NULL;
527 			}
528 			q->q_ptr = NULL;
529 			qprocsoff(q);
530 			netstack_rele(ahstack->ipsecah_netstack);
531 			return (ENOMEM);
532 		}
533 
534 		ahstack->ah_ip_unbind->b_datap->db_type = M_PROTO;
535 		tur = (struct T_unbind_req *)ahstack->ah_ip_unbind->b_rptr;
536 		tur->PRIM_type = T_UNBIND_REQ;
537 	} else {
538 		qprocson(q);
539 	}
540 
541 	/*
542 	 * For now, there's not much I can do.  I'll be getting a message
543 	 * passed down to me from keysock (in my wput), and a T_BIND_ACK
544 	 * up from IP (in my rput).
545 	 */
546 
547 	return (0);
548 }
549 
550 /*
551  * AH module close routine.
552  */
553 static int
554 ipsecah_close(queue_t *q)
555 {
556 	ipsecah_stack_t	*ahstack = (ipsecah_stack_t *)q->q_ptr;
557 
558 	/*
559 	 * If ah_sadb.s_ip_q is attached to this instance, send a
560 	 * T_UNBIND_REQ to IP for the instance before doing
561 	 * a qprocsoff().
562 	 */
563 	if (WR(q) == ahstack->ah_sadb.s_ip_q &&
564 	    ahstack->ah_ip_unbind != NULL) {
565 		putnext(WR(q), ahstack->ah_ip_unbind);
566 		ahstack->ah_ip_unbind = NULL;
567 	}
568 
569 	/*
570 	 * Clean up q_ptr, if needed.
571 	 */
572 	qprocsoff(q);
573 
574 	/* Keysock queue check is safe, because of OCEXCL perimeter. */
575 
576 	if (q == ahstack->ah_pfkey_q) {
577 		ah1dbg(ahstack,
578 		    ("ipsecah_close:  Ummm... keysock is closing AH.\n"));
579 		ahstack->ah_pfkey_q = NULL;
580 		/* Detach qtimeouts. */
581 		(void) quntimeout(q, ahstack->ah_event);
582 	}
583 
584 	if (WR(q) == ahstack->ah_sadb.s_ip_q) {
585 		/*
586 		 * If the ah_sadb.s_ip_q is attached to this instance, find
587 		 * another.  The OCEXCL outer perimeter helps us here.
588 		 */
589 
590 		ahstack->ah_sadb.s_ip_q = NULL;
591 
592 		/*
593 		 * Find a replacement queue for ah_sadb.s_ip_q.
594 		 */
595 		if (ahstack->ah_pfkey_q != NULL &&
596 		    ahstack->ah_pfkey_q != RD(q)) {
597 			/*
598 			 * See if we can use the pfkey_q.
599 			 */
600 			ahstack->ah_sadb.s_ip_q = WR(ahstack->ah_pfkey_q);
601 		}
602 
603 		if (ahstack->ah_sadb.s_ip_q == NULL ||
604 		    !sadb_t_bind_req(ahstack->ah_sadb.s_ip_q, IPPROTO_AH)) {
605 			ah1dbg(ahstack,
606 			    ("ipsecah: Can't reassign ah_sadb.s_ip_q.\n"));
607 			ahstack->ah_sadb.s_ip_q = NULL;
608 		} else {
609 			ahstack->ah_ip_unbind =
610 			    allocb(sizeof (struct T_unbind_req), BPRI_HI);
611 
612 			if (ahstack->ah_ip_unbind != NULL) {
613 				struct T_unbind_req *tur;
614 
615 				ahstack->ah_ip_unbind->b_datap->db_type =
616 				    M_PROTO;
617 				tur = (struct T_unbind_req *)
618 				    ahstack->ah_ip_unbind->b_rptr;
619 				tur->PRIM_type = T_UNBIND_REQ;
620 			}
621 			/* If it's NULL, I can't do much here. */
622 		}
623 	}
624 
625 	netstack_rele(ahstack->ipsecah_netstack);
626 	return (0);
627 }
628 
629 /*
630  * AH module read put routine.
631  */
632 /* ARGSUSED */
633 static void
634 ipsecah_rput(queue_t *q, mblk_t *mp)
635 {
636 	ipsecah_stack_t	*ahstack = (ipsecah_stack_t *)q->q_ptr;
637 
638 	ASSERT(mp->b_datap->db_type != M_CTL);	/* No more IRE_DB_REQ. */
639 
640 	switch (mp->b_datap->db_type) {
641 	case M_PROTO:
642 	case M_PCPROTO:
643 		/* TPI message of some sort. */
644 		switch (*((t_scalar_t *)mp->b_rptr)) {
645 		case T_BIND_ACK:
646 			/* We expect this. */
647 			ah3dbg(ahstack,
648 			    ("Thank you IP from AH for T_BIND_ACK\n"));
649 			break;
650 		case T_ERROR_ACK:
651 			cmn_err(CE_WARN,
652 			    "ipsecah:  AH received T_ERROR_ACK from IP.");
653 			break;
654 		case T_OK_ACK:
655 			/* Probably from a (rarely sent) T_UNBIND_REQ. */
656 			break;
657 		default:
658 			ah1dbg(ahstack, ("Unknown M_{,PC}PROTO message.\n"));
659 		}
660 		freemsg(mp);
661 		break;
662 	default:
663 		/* For now, passthru message. */
664 		ah2dbg(ahstack, ("AH got unknown mblk type %d.\n",
665 		    mp->b_datap->db_type));
666 		putnext(q, mp);
667 	}
668 }
669 
670 /*
671  * Construct an SADB_REGISTER message with the current algorithms.
672  */
673 static boolean_t
674 ah_register_out(uint32_t sequence, uint32_t pid, uint_t serial,
675     ipsecah_stack_t *ahstack)
676 {
677 	mblk_t *mp;
678 	boolean_t rc = B_TRUE;
679 	sadb_msg_t *samsg;
680 	sadb_supported_t *sasupp;
681 	sadb_alg_t *saalg;
682 	uint_t allocsize = sizeof (*samsg);
683 	uint_t i, numalgs_snap;
684 	ipsec_alginfo_t **authalgs;
685 	uint_t num_aalgs;
686 	ipsec_stack_t	*ipss = ahstack->ipsecah_netstack->netstack_ipsec;
687 
688 	/* Allocate the KEYSOCK_OUT. */
689 	mp = sadb_keysock_out(serial);
690 	if (mp == NULL) {
691 		ah0dbg(("ah_register_out: couldn't allocate mblk.\n"));
692 		return (B_FALSE);
693 	}
694 
695 	/*
696 	 * Allocate the PF_KEY message that follows KEYSOCK_OUT.
697 	 * The alg reader lock needs to be held while allocating
698 	 * the variable part (i.e. the algorithms) of the message.
699 	 */
700 
701 	mutex_enter(&ipss->ipsec_alg_lock);
702 
703 	/*
704 	 * Return only valid algorithms, so the number of algorithms
705 	 * to send up may be less than the number of algorithm entries
706 	 * in the table.
707 	 */
708 	authalgs = ipss->ipsec_alglists[IPSEC_ALG_AUTH];
709 	for (num_aalgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++)
710 		if (authalgs[i] != NULL && ALG_VALID(authalgs[i]))
711 			num_aalgs++;
712 
713 	/*
714 	 * Fill SADB_REGISTER message's algorithm descriptors.  Hold
715 	 * down the lock while filling it.
716 	 */
717 	if (num_aalgs != 0) {
718 		allocsize += (num_aalgs * sizeof (*saalg));
719 		allocsize += sizeof (*sasupp);
720 	}
721 	mp->b_cont = allocb(allocsize, BPRI_HI);
722 	if (mp->b_cont == NULL) {
723 		mutex_exit(&ipss->ipsec_alg_lock);
724 		freemsg(mp);
725 		return (B_FALSE);
726 	}
727 
728 	mp->b_cont->b_wptr += allocsize;
729 	if (num_aalgs != 0) {
730 
731 		saalg = (sadb_alg_t *)(mp->b_cont->b_rptr + sizeof (*samsg) +
732 		    sizeof (*sasupp));
733 		ASSERT(((ulong_t)saalg & 0x7) == 0);
734 
735 		numalgs_snap = 0;
736 		for (i = 0;
737 		    ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_aalgs));
738 		    i++) {
739 			if (authalgs[i] == NULL || !ALG_VALID(authalgs[i]))
740 				continue;
741 
742 			saalg->sadb_alg_id = authalgs[i]->alg_id;
743 			saalg->sadb_alg_ivlen = 0;
744 			saalg->sadb_alg_minbits = authalgs[i]->alg_ef_minbits;
745 			saalg->sadb_alg_maxbits = authalgs[i]->alg_ef_maxbits;
746 			saalg->sadb_x_alg_increment =
747 			    authalgs[i]->alg_increment;
748 			saalg->sadb_x_alg_defincr =
749 			    authalgs[i]->alg_ef_default;
750 			numalgs_snap++;
751 			saalg++;
752 		}
753 		ASSERT(numalgs_snap == num_aalgs);
754 #ifdef DEBUG
755 		/*
756 		 * Reality check to make sure I snagged all of the
757 		 * algorithms.
758 		 */
759 		for (; i < IPSEC_MAX_ALGS; i++)
760 			if (authalgs[i] != NULL && ALG_VALID(authalgs[i]))
761 				cmn_err(CE_PANIC,
762 				    "ah_register_out()!  Missed #%d.\n", i);
763 #endif /* DEBUG */
764 	}
765 
766 	mutex_exit(&ipss->ipsec_alg_lock);
767 
768 	/* Now fill the restof the SADB_REGISTER message. */
769 
770 	samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
771 	samsg->sadb_msg_version = PF_KEY_V2;
772 	samsg->sadb_msg_type = SADB_REGISTER;
773 	samsg->sadb_msg_errno = 0;
774 	samsg->sadb_msg_satype = SADB_SATYPE_AH;
775 	samsg->sadb_msg_len = SADB_8TO64(allocsize);
776 	samsg->sadb_msg_reserved = 0;
777 	/*
778 	 * Assume caller has sufficient sequence/pid number info.  If it's one
779 	 * from me over a new alg., I could give two hoots about sequence.
780 	 */
781 	samsg->sadb_msg_seq = sequence;
782 	samsg->sadb_msg_pid = pid;
783 
784 	if (allocsize > sizeof (*samsg)) {
785 		sasupp = (sadb_supported_t *)(samsg + 1);
786 		sasupp->sadb_supported_len =
787 		    SADB_8TO64(allocsize - sizeof (sadb_msg_t));
788 		sasupp->sadb_supported_exttype = SADB_EXT_SUPPORTED_AUTH;
789 		sasupp->sadb_supported_reserved = 0;
790 	}
791 
792 	if (ahstack->ah_pfkey_q != NULL)
793 		putnext(ahstack->ah_pfkey_q, mp);
794 	else {
795 		rc = B_FALSE;
796 		freemsg(mp);
797 	}
798 
799 	return (rc);
800 }
801 
802 /*
803  * Invoked when the algorithm table changes. Causes SADB_REGISTER
804  * messages continaining the current list of algorithms to be
805  * sent up to the AH listeners.
806  */
807 void
808 ipsecah_algs_changed(netstack_t *ns)
809 {
810 	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
811 
812 	/*
813 	 * Time to send a PF_KEY SADB_REGISTER message to AH listeners
814 	 * everywhere.  (The function itself checks for NULL ah_pfkey_q.)
815 	 */
816 	(void) ah_register_out(0, 0, 0, ahstack);
817 }
818 
819 /*
820  * Stub function that taskq_dispatch() invokes to take the mblk (in arg)
821  * and put() it into AH and STREAMS again.
822  */
823 static void
824 inbound_task(void *arg)
825 {
826 	ah_t *ah;
827 	mblk_t *mp = (mblk_t *)arg;
828 	ipsec_in_t *ii = (ipsec_in_t *)mp->b_rptr;
829 	int ipsec_rc;
830 	netstack_t	*ns = ii->ipsec_in_ns;
831 	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
832 
833 	ah2dbg(ahstack, ("in AH inbound_task"));
834 
835 	ASSERT(ahstack != NULL);
836 	ah = ipsec_inbound_ah_sa(mp, ns);
837 	if (ah == NULL)
838 		return;
839 	ASSERT(ii->ipsec_in_ah_sa != NULL);
840 	ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func(mp, ah);
841 	if (ipsec_rc != IPSEC_STATUS_SUCCESS)
842 		return;
843 	ip_fanout_proto_again(mp, NULL, NULL, NULL);
844 }
845 
846 
847 /*
848  * Now that weak-key passed, actually ADD the security association, and
849  * send back a reply ADD message.
850  */
851 static int
852 ah_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi,
853     int *diagnostic, ipsecah_stack_t *ahstack)
854 {
855 	isaf_t *primary, *secondary, *inbound, *outbound;
856 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
857 	sadb_address_t *dstext =
858 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
859 	struct sockaddr_in *dst;
860 	struct sockaddr_in6 *dst6;
861 	boolean_t is_ipv4, clone = B_FALSE, is_inbound = B_FALSE;
862 	uint32_t *dstaddr;
863 	ipsa_t *larval;
864 	ipsacq_t *acqrec;
865 	iacqf_t *acq_bucket;
866 	mblk_t *acq_msgs = NULL;
867 	mblk_t *lpkt;
868 	int rc;
869 	sadb_t *sp;
870 	int outhash;
871 	netstack_t	*ns = ahstack->ipsecah_netstack;
872 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
873 
874 	/*
875 	 * Locate the appropriate table(s).
876 	 */
877 
878 	dst = (struct sockaddr_in *)(dstext + 1);
879 	dst6 = (struct sockaddr_in6 *)dst;
880 	is_ipv4 = (dst->sin_family == AF_INET);
881 	if (is_ipv4) {
882 		sp = &ahstack->ah_sadb.s_v4;
883 		dstaddr = (uint32_t *)(&dst->sin_addr);
884 		outhash = OUTBOUND_HASH_V4(sp, *(ipaddr_t *)dstaddr);
885 	} else {
886 		ASSERT(dst->sin_family == AF_INET6);
887 		sp = &ahstack->ah_sadb.s_v6;
888 		dstaddr = (uint32_t *)(&dst6->sin6_addr);
889 		outhash = OUTBOUND_HASH_V6(sp, *(in6_addr_t *)dstaddr);
890 	}
891 
892 	inbound = INBOUND_BUCKET(sp, assoc->sadb_sa_spi);
893 	outbound = &sp->sdb_of[outhash];
894 
895 	switch (ksi->ks_in_dsttype) {
896 	case KS_IN_ADDR_MBCAST:
897 		clone = B_TRUE;	/* All mcast SAs can be bidirectional */
898 		/* FALLTHRU */
899 	case KS_IN_ADDR_ME:
900 		primary = inbound;
901 		secondary = outbound;
902 		/*
903 		 * If the source address is either one of mine, or unspecified
904 		 * (which is best summed up by saying "not 'not mine'"),
905 		 * then the association is potentially bi-directional,
906 		 * in that it can be used for inbound traffic and outbound
907 		 * traffic.  The best example of such and SA is a multicast
908 		 * SA (which allows me to receive the outbound traffic).
909 		 */
910 		if (ksi->ks_in_srctype != KS_IN_ADDR_NOTME)
911 			clone = B_TRUE;
912 		is_inbound = B_TRUE;
913 		break;
914 	case KS_IN_ADDR_NOTME:
915 		primary = outbound;
916 		secondary = inbound;
917 		/*
918 		 * If the source address literally not mine (either
919 		 * unspecified or not mine), then this SA may have an
920 		 * address that WILL be mine after some configuration.
921 		 * We pay the price for this by making it a bi-directional
922 		 * SA.
923 		 */
924 		if (ksi->ks_in_srctype != KS_IN_ADDR_ME)
925 			clone = B_TRUE;
926 		break;
927 	default:
928 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_DST;
929 		return (EINVAL);
930 	}
931 
932 	/*
933 	 * Find a ACQUIRE list entry if possible.  If we've added an SA that
934 	 * suits the needs of an ACQUIRE list entry, we can eliminate the
935 	 * ACQUIRE list entry and transmit the enqueued packets.  Use the
936 	 * high-bit of the sequence number to queue it.  Key off destination
937 	 * addr, and change acqrec's state.
938 	 */
939 
940 	if (samsg->sadb_msg_seq & IACQF_LOWEST_SEQ) {
941 		acq_bucket = &sp->sdb_acq[outhash];
942 		mutex_enter(&acq_bucket->iacqf_lock);
943 		for (acqrec = acq_bucket->iacqf_ipsacq; acqrec != NULL;
944 		    acqrec = acqrec->ipsacq_next) {
945 			mutex_enter(&acqrec->ipsacq_lock);
946 			/*
947 			 * Q:  I only check sequence.  Should I check dst?
948 			 * A: Yes, check dest because those are the packets
949 			 *    that are queued up.
950 			 */
951 			if (acqrec->ipsacq_seq == samsg->sadb_msg_seq &&
952 			    IPSA_ARE_ADDR_EQUAL(dstaddr,
953 			    acqrec->ipsacq_dstaddr, acqrec->ipsacq_addrfam))
954 				break;
955 			mutex_exit(&acqrec->ipsacq_lock);
956 		}
957 		if (acqrec != NULL) {
958 			/*
959 			 * AHA!  I found an ACQUIRE record for this SA.
960 			 * Grab the msg list, and free the acquire record.
961 			 * I already am holding the lock for this record,
962 			 * so all I have to do is free it.
963 			 */
964 			acq_msgs = acqrec->ipsacq_mp;
965 			acqrec->ipsacq_mp = NULL;
966 			mutex_exit(&acqrec->ipsacq_lock);
967 			sadb_destroy_acquire(acqrec, ns);
968 		}
969 		mutex_exit(&acq_bucket->iacqf_lock);
970 	}
971 
972 	/*
973 	 * Find PF_KEY message, and see if I'm an update.  If so, find entry
974 	 * in larval list (if there).
975 	 */
976 
977 	larval = NULL;
978 
979 	if (samsg->sadb_msg_type == SADB_UPDATE) {
980 		mutex_enter(&inbound->isaf_lock);
981 		larval = ipsec_getassocbyspi(inbound, assoc->sadb_sa_spi,
982 		    ALL_ZEROES_PTR, dstaddr, dst->sin_family);
983 		mutex_exit(&inbound->isaf_lock);
984 
985 		if ((larval == NULL) ||
986 		    (larval->ipsa_state != IPSA_STATE_LARVAL)) {
987 			ah0dbg(("Larval update, but larval disappeared.\n"));
988 			return (ESRCH);
989 		} /* Else sadb_common_add unlinks it for me! */
990 	}
991 
992 	lpkt = NULL;
993 	if (larval != NULL)
994 		lpkt = sadb_clear_lpkt(larval);
995 
996 	rc = sadb_common_add(ahstack->ah_sadb.s_ip_q, ahstack->ah_pfkey_q, mp,
997 	    samsg, ksi, primary, secondary, larval, clone, is_inbound,
998 	    diagnostic, ns);
999 
1000 	/*
1001 	 * How much more stack will I create with all of these
1002 	 * ah_inbound_* and ah_outbound_*() calls?
1003 	 */
1004 
1005 
1006 	if (rc == 0 && lpkt != NULL)
1007 		rc = !taskq_dispatch(ah_taskq, inbound_task,
1008 		    (void *) lpkt, TQ_NOSLEEP);
1009 
1010 	if (rc != 0) {
1011 		ip_drop_packet(lpkt, B_TRUE, NULL, NULL,
1012 		    DROPPER(ipss, ipds_sadb_inlarval_timeout),
1013 		    &ahstack->ah_dropper);
1014 	}
1015 
1016 	while (acq_msgs != NULL) {
1017 		mblk_t *mp = acq_msgs;
1018 
1019 		acq_msgs = acq_msgs->b_next;
1020 		mp->b_next = NULL;
1021 		if (rc == 0) {
1022 			ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr;
1023 
1024 			ASSERT(ahstack->ah_sadb.s_ip_q != NULL);
1025 			if (ipsec_outbound_sa(mp, IPPROTO_AH)) {
1026 				io->ipsec_out_ah_done = B_TRUE;
1027 				if (ah_outbound(mp) == IPSEC_STATUS_SUCCESS) {
1028 					ipha_t *ipha = (ipha_t *)
1029 					    mp->b_cont->b_rptr;
1030 					if (is_ipv4) {
1031 						ip_wput_ipsec_out(NULL, mp,
1032 						    ipha, NULL, NULL);
1033 					} else {
1034 						ip6_t *ip6h = (ip6_t *)ipha;
1035 						ip_wput_ipsec_out_v6(NULL,
1036 						    mp, ip6h, NULL, NULL);
1037 					}
1038 				}
1039 				continue;
1040 			}
1041 		}
1042 		AH_BUMP_STAT(ahstack, out_discards);
1043 		ip_drop_packet(mp, B_FALSE, NULL, NULL,
1044 		    DROPPER(ipss, ipds_sadb_acquire_timeout),
1045 		    &ahstack->ah_dropper);
1046 	}
1047 
1048 	return (rc);
1049 }
1050 
1051 /*
1052  * Add new AH security association.  This may become a generic AH/ESP
1053  * routine eventually.
1054  */
1055 static int
1056 ah_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, netstack_t *ns)
1057 {
1058 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
1059 	sadb_address_t *srcext =
1060 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
1061 	sadb_address_t *dstext =
1062 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
1063 	sadb_address_t *isrcext =
1064 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC];
1065 	sadb_address_t *idstext =
1066 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST];
1067 	sadb_key_t *key = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
1068 	struct sockaddr_in *src, *dst;
1069 	/* We don't need sockaddr_in6 for now. */
1070 	sadb_lifetime_t *soft =
1071 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
1072 	sadb_lifetime_t *hard =
1073 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
1074 	ipsec_alginfo_t *aalg;
1075 	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
1076 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
1077 
1078 	/* I need certain extensions present for an ADD message. */
1079 	if (srcext == NULL) {
1080 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
1081 		return (EINVAL);
1082 	}
1083 	if (dstext == NULL) {
1084 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
1085 		return (EINVAL);
1086 	}
1087 	if (isrcext == NULL && idstext != NULL) {
1088 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
1089 		return (EINVAL);
1090 	}
1091 	if (isrcext != NULL && idstext == NULL) {
1092 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST;
1093 		return (EINVAL);
1094 	}
1095 	if (assoc == NULL) {
1096 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
1097 		return (EINVAL);
1098 	}
1099 	if (key == NULL) {
1100 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_AKEY;
1101 		return (EINVAL);
1102 	}
1103 
1104 	src = (struct sockaddr_in *)(srcext + 1);
1105 	dst = (struct sockaddr_in *)(dstext + 1);
1106 
1107 	/* Sundry ADD-specific reality checks. */
1108 	/* XXX STATS : Logging/stats here? */
1109 
1110 	if (assoc->sadb_sa_state != SADB_SASTATE_MATURE) {
1111 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
1112 		return (EINVAL);
1113 	}
1114 	if (assoc->sadb_sa_encrypt != SADB_EALG_NONE) {
1115 		*diagnostic = SADB_X_DIAGNOSTIC_ENCR_NOTSUPP;
1116 		return (EINVAL);
1117 	}
1118 	if (assoc->sadb_sa_flags &
1119 	    ~(SADB_SAFLAGS_NOREPLAY | SADB_X_SAFLAGS_TUNNEL)) {
1120 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
1121 		return (EINVAL);
1122 	}
1123 
1124 	if ((*diagnostic = sadb_hardsoftchk(hard, soft)) != 0)
1125 		return (EINVAL);
1126 
1127 	ASSERT(src->sin_family == dst->sin_family);
1128 
1129 	/* Stuff I don't support, for now.  XXX Diagnostic? */
1130 	if (ksi->ks_in_extv[SADB_EXT_LIFETIME_CURRENT] != NULL ||
1131 	    ksi->ks_in_extv[SADB_EXT_SENSITIVITY] != NULL)
1132 		return (EOPNOTSUPP);
1133 
1134 	/*
1135 	 * XXX Policy : I'm not checking identities or sensitivity
1136 	 * labels at this time, but if I did, I'd do them here, before I sent
1137 	 * the weak key check up to the algorithm.
1138 	 */
1139 
1140 	/* verify that there is a mapping for the specified algorithm */
1141 	mutex_enter(&ipss->ipsec_alg_lock);
1142 	aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH][assoc->sadb_sa_auth];
1143 	if (aalg == NULL || !ALG_VALID(aalg)) {
1144 		mutex_exit(&ipss->ipsec_alg_lock);
1145 		ah1dbg(ahstack, ("Couldn't find auth alg #%d.\n",
1146 		    assoc->sadb_sa_auth));
1147 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG;
1148 		return (EINVAL);
1149 	}
1150 	ASSERT(aalg->alg_mech_type != CRYPTO_MECHANISM_INVALID);
1151 
1152 	/* sanity check key sizes */
1153 	if (!ipsec_valid_key_size(key->sadb_key_bits, aalg)) {
1154 		mutex_exit(&ipss->ipsec_alg_lock);
1155 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_AKEYBITS;
1156 		return (EINVAL);
1157 	}
1158 
1159 	/* check key and fix parity if needed */
1160 	if (ipsec_check_key(aalg->alg_mech_type, key, B_TRUE,
1161 	    diagnostic) != 0) {
1162 		mutex_exit(&ipss->ipsec_alg_lock);
1163 		return (EINVAL);
1164 	}
1165 
1166 	mutex_exit(&ipss->ipsec_alg_lock);
1167 
1168 	return (ah_add_sa_finish(mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi,
1169 	    diagnostic, ahstack));
1170 }
1171 
1172 /*
1173  * Update a security association.  Updates come in two varieties.  The first
1174  * is an update of lifetimes on a non-larval SA.  The second is an update of
1175  * a larval SA, which ends up looking a lot more like an add.
1176  */
1177 static int
1178 ah_update_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic,
1179     ipsecah_stack_t *ahstack)
1180 {
1181 	sadb_address_t *dstext =
1182 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
1183 	struct sockaddr_in *sin;
1184 
1185 	if (dstext == NULL) {
1186 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
1187 		return (EINVAL);
1188 	}
1189 	sin = (struct sockaddr_in *)(dstext + 1);
1190 	return (sadb_update_sa(mp, ksi,
1191 	    (sin->sin_family == AF_INET6) ? &ahstack->ah_sadb.s_v6 :
1192 	    &ahstack->ah_sadb.s_v4, diagnostic, ahstack->ah_pfkey_q, ah_add_sa,
1193 	    ahstack->ipsecah_netstack));
1194 }
1195 
1196 /*
1197  * Delete a security association.  This is REALLY likely to be code common to
1198  * both AH and ESP.  Find the association, then unlink it.
1199  */
1200 static int
1201 ah_del_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic,
1202     ipsecah_stack_t *ahstack)
1203 {
1204 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
1205 	sadb_address_t *dstext =
1206 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
1207 	sadb_address_t *srcext =
1208 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
1209 	struct sockaddr_in *sin;
1210 
1211 	if (assoc == NULL) {
1212 		if (dstext != NULL)
1213 			sin = (struct sockaddr_in *)(dstext + 1);
1214 		else if (srcext != NULL)
1215 			sin = (struct sockaddr_in *)(srcext + 1);
1216 		else {
1217 			*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
1218 			return (EINVAL);
1219 		}
1220 		return (sadb_purge_sa(mp, ksi,
1221 		    (sin->sin_family == AF_INET6) ? &ahstack->ah_sadb.s_v6 :
1222 		    &ahstack->ah_sadb.s_v4,
1223 		    ahstack->ah_pfkey_q, ahstack->ah_sadb.s_ip_q));
1224 	}
1225 
1226 	return (sadb_del_sa(mp, ksi, &ahstack->ah_sadb, diagnostic,
1227 	    ahstack->ah_pfkey_q));
1228 }
1229 
1230 /*
1231  * Convert the entire contents of all of AH's SA tables into PF_KEY SADB_DUMP
1232  * messages.
1233  */
1234 static void
1235 ah_dump(mblk_t *mp, keysock_in_t *ksi, ipsecah_stack_t *ahstack)
1236 {
1237 	int error;
1238 	sadb_msg_t *samsg;
1239 
1240 	/*
1241 	 * Dump each fanout, bailing if error is non-zero.
1242 	 */
1243 
1244 	error = sadb_dump(ahstack->ah_pfkey_q, mp, ksi->ks_in_serial,
1245 	    &ahstack->ah_sadb.s_v4);
1246 	if (error != 0)
1247 		goto bail;
1248 
1249 	error = sadb_dump(ahstack->ah_pfkey_q, mp, ksi->ks_in_serial,
1250 	    &ahstack->ah_sadb.s_v6);
1251 bail:
1252 	ASSERT(mp->b_cont != NULL);
1253 	samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
1254 	samsg->sadb_msg_errno = (uint8_t)error;
1255 	sadb_pfkey_echo(ahstack->ah_pfkey_q, mp,
1256 	    (sadb_msg_t *)mp->b_cont->b_rptr, ksi, NULL);
1257 }
1258 
1259 /*
1260  * First-cut reality check for an inbound PF_KEY message.
1261  */
1262 static boolean_t
1263 ah_pfkey_reality_failures(mblk_t *mp, keysock_in_t *ksi,
1264     ipsecah_stack_t *ahstack)
1265 {
1266 	int diagnostic;
1267 
1268 	if (mp->b_cont == NULL) {
1269 		freemsg(mp);
1270 		return (B_TRUE);
1271 	}
1272 
1273 	if (ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT] != NULL) {
1274 		diagnostic = SADB_X_DIAGNOSTIC_EKEY_PRESENT;
1275 		goto badmsg;
1276 	}
1277 	if (ksi->ks_in_extv[SADB_EXT_PROPOSAL] != NULL) {
1278 		diagnostic = SADB_X_DIAGNOSTIC_PROP_PRESENT;
1279 		goto badmsg;
1280 	}
1281 	if (ksi->ks_in_extv[SADB_EXT_SUPPORTED_AUTH] != NULL ||
1282 	    ksi->ks_in_extv[SADB_EXT_SUPPORTED_ENCRYPT] != NULL) {
1283 		diagnostic = SADB_X_DIAGNOSTIC_SUPP_PRESENT;
1284 		goto badmsg;
1285 	}
1286 	return (B_FALSE);	/* False ==> no failures */
1287 
1288 badmsg:
1289 	sadb_pfkey_error(ahstack->ah_pfkey_q, mp, EINVAL,
1290 	    diagnostic, ksi->ks_in_serial);
1291 	return (B_TRUE);	/* True ==> failures */
1292 }
1293 
1294 /*
1295  * AH parsing of PF_KEY messages.  Keysock did most of the really silly
1296  * error cases.  What I receive is a fully-formed, syntactically legal
1297  * PF_KEY message.  I then need to check semantics...
1298  *
1299  * This code may become common to AH and ESP.  Stay tuned.
1300  *
1301  * I also make the assumption that db_ref's are cool.  If this assumption
1302  * is wrong, this means that someone other than keysock or me has been
1303  * mucking with PF_KEY messages.
1304  */
1305 static void
1306 ah_parse_pfkey(mblk_t *mp, ipsecah_stack_t *ahstack)
1307 {
1308 	mblk_t *msg = mp->b_cont;
1309 	sadb_msg_t *samsg;
1310 	keysock_in_t *ksi;
1311 	int error;
1312 	int diagnostic = SADB_X_DIAGNOSTIC_NONE;
1313 
1314 	ASSERT(msg != NULL);
1315 
1316 	samsg = (sadb_msg_t *)msg->b_rptr;
1317 	ksi = (keysock_in_t *)mp->b_rptr;
1318 
1319 	/*
1320 	 * If applicable, convert unspecified AF_INET6 to unspecified
1321 	 * AF_INET.
1322 	 */
1323 	if (!sadb_addrfix(ksi, ahstack->ah_pfkey_q, mp,
1324 	    ahstack->ipsecah_netstack) ||
1325 	    ah_pfkey_reality_failures(mp, ksi, ahstack)) {
1326 		return;
1327 	}
1328 
1329 	switch (samsg->sadb_msg_type) {
1330 	case SADB_ADD:
1331 		error = ah_add_sa(mp, ksi, &diagnostic,
1332 		    ahstack->ipsecah_netstack);
1333 		if (error != 0) {
1334 			sadb_pfkey_error(ahstack->ah_pfkey_q, mp, error,
1335 			    diagnostic, ksi->ks_in_serial);
1336 		}
1337 		/* else ah_add_sa() took care of things. */
1338 		break;
1339 	case SADB_DELETE:
1340 		error = ah_del_sa(mp, ksi, &diagnostic, ahstack);
1341 		if (error != 0) {
1342 			sadb_pfkey_error(ahstack->ah_pfkey_q, mp, error,
1343 			    diagnostic, ksi->ks_in_serial);
1344 		}
1345 		/* Else ah_del_sa() took care of things. */
1346 		break;
1347 	case SADB_GET:
1348 		error = sadb_get_sa(mp, ksi, &ahstack->ah_sadb, &diagnostic,
1349 		    ahstack->ah_pfkey_q);
1350 		if (error != 0) {
1351 			sadb_pfkey_error(ahstack->ah_pfkey_q, mp, error,
1352 			    diagnostic, ksi->ks_in_serial);
1353 		}
1354 		/* Else sadb_get_sa() took care of things. */
1355 		break;
1356 	case SADB_FLUSH:
1357 		sadbp_flush(&ahstack->ah_sadb, ahstack->ipsecah_netstack);
1358 		sadb_pfkey_echo(ahstack->ah_pfkey_q, mp, samsg, ksi, NULL);
1359 		break;
1360 	case SADB_REGISTER:
1361 		/*
1362 		 * Hmmm, let's do it!  Check for extensions (there should
1363 		 * be none), extract the fields, call ah_register_out(),
1364 		 * then either free or report an error.
1365 		 *
1366 		 * Keysock takes care of the PF_KEY bookkeeping for this.
1367 		 */
1368 		if (ah_register_out(samsg->sadb_msg_seq, samsg->sadb_msg_pid,
1369 		    ksi->ks_in_serial, ahstack)) {
1370 			freemsg(mp);
1371 		} else {
1372 			/*
1373 			 * Only way this path hits is if there is a memory
1374 			 * failure.  It will not return B_FALSE because of
1375 			 * lack of ah_pfkey_q if I am in wput().
1376 			 */
1377 			sadb_pfkey_error(ahstack->ah_pfkey_q, mp, ENOMEM,
1378 			    diagnostic, ksi->ks_in_serial);
1379 		}
1380 		break;
1381 	case SADB_UPDATE:
1382 		/*
1383 		 * Find a larval, if not there, find a full one and get
1384 		 * strict.
1385 		 */
1386 		error = ah_update_sa(mp, ksi, &diagnostic, ahstack);
1387 		if (error != 0) {
1388 			sadb_pfkey_error(ahstack->ah_pfkey_q, mp, error,
1389 			    diagnostic, ksi->ks_in_serial);
1390 		}
1391 		/* else ah_update_sa() took care of things. */
1392 		break;
1393 	case SADB_GETSPI:
1394 		/*
1395 		 * Reserve a new larval entry.
1396 		 */
1397 		ah_getspi(mp, ksi, ahstack);
1398 		break;
1399 	case SADB_ACQUIRE:
1400 		/*
1401 		 * Find larval and/or ACQUIRE record and kill it (them), I'm
1402 		 * most likely an error.  Inbound ACQUIRE messages should only
1403 		 * have the base header.
1404 		 */
1405 		sadb_in_acquire(samsg, &ahstack->ah_sadb, ahstack->ah_pfkey_q,
1406 		    ahstack->ipsecah_netstack);
1407 		freemsg(mp);
1408 		break;
1409 	case SADB_DUMP:
1410 		/*
1411 		 * Dump all entries.
1412 		 */
1413 		ah_dump(mp, ksi, ahstack);
1414 		/* ah_dump will take care of the return message, etc. */
1415 		break;
1416 	case SADB_EXPIRE:
1417 		/* Should never reach me. */
1418 		sadb_pfkey_error(ahstack->ah_pfkey_q, mp, EOPNOTSUPP,
1419 		    diagnostic, ksi->ks_in_serial);
1420 		break;
1421 	default:
1422 		sadb_pfkey_error(ahstack->ah_pfkey_q, mp, EINVAL,
1423 		    SADB_X_DIAGNOSTIC_UNKNOWN_MSG, ksi->ks_in_serial);
1424 		break;
1425 	}
1426 }
1427 
1428 /*
1429  * Handle case where PF_KEY says it can't find a keysock for one of my
1430  * ACQUIRE messages.
1431  */
1432 static void
1433 ah_keysock_no_socket(mblk_t *mp, ipsecah_stack_t *ahstack)
1434 {
1435 	sadb_msg_t *samsg;
1436 	keysock_out_err_t *kse = (keysock_out_err_t *)mp->b_rptr;
1437 
1438 	if (mp->b_cont == NULL) {
1439 		freemsg(mp);
1440 		return;
1441 	}
1442 	samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
1443 
1444 	/*
1445 	 * If keysock can't find any registered, delete the acquire record
1446 	 * immediately, and handle errors.
1447 	 */
1448 	if (samsg->sadb_msg_type == SADB_ACQUIRE) {
1449 		samsg->sadb_msg_errno = kse->ks_err_errno;
1450 		samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
1451 		/*
1452 		 * Use the write-side of the ah_pfkey_q, in case there is
1453 		 * no ahstack->ah_sadb.s_ip_q.
1454 		 */
1455 		sadb_in_acquire(samsg, &ahstack->ah_sadb,
1456 		    WR(ahstack->ah_pfkey_q), ahstack->ipsecah_netstack);
1457 	}
1458 
1459 	freemsg(mp);
1460 }
1461 
1462 /*
1463  * AH module write put routine.
1464  */
1465 static void
1466 ipsecah_wput(queue_t *q, mblk_t *mp)
1467 {
1468 	ipsec_info_t *ii;
1469 	struct iocblk *iocp;
1470 	ipsecah_stack_t	*ahstack = (ipsecah_stack_t *)q->q_ptr;
1471 
1472 	ah3dbg(ahstack, ("In ah_wput().\n"));
1473 
1474 	/* NOTE:  Each case must take care of freeing or passing mp. */
1475 	switch (mp->b_datap->db_type) {
1476 	case M_CTL:
1477 		if ((mp->b_wptr - mp->b_rptr) < sizeof (ipsec_info_t)) {
1478 			/* Not big enough message. */
1479 			freemsg(mp);
1480 			break;
1481 		}
1482 		ii = (ipsec_info_t *)mp->b_rptr;
1483 
1484 		switch (ii->ipsec_info_type) {
1485 		case KEYSOCK_OUT_ERR:
1486 			ah1dbg(ahstack, ("Got KEYSOCK_OUT_ERR message.\n"));
1487 			ah_keysock_no_socket(mp, ahstack);
1488 			break;
1489 		case KEYSOCK_IN:
1490 			AH_BUMP_STAT(ahstack, keysock_in);
1491 			ah3dbg(ahstack, ("Got KEYSOCK_IN message.\n"));
1492 
1493 			/* Parse the message. */
1494 			ah_parse_pfkey(mp, ahstack);
1495 			break;
1496 		case KEYSOCK_HELLO:
1497 			sadb_keysock_hello(&ahstack->ah_pfkey_q, q, mp,
1498 			    ah_ager, (void *)ahstack, &ahstack->ah_event,
1499 			    SADB_SATYPE_AH);
1500 			break;
1501 		default:
1502 			ah1dbg(ahstack, ("Got M_CTL from above of 0x%x.\n",
1503 			    ii->ipsec_info_type));
1504 			freemsg(mp);
1505 			break;
1506 		}
1507 		break;
1508 	case M_IOCTL:
1509 		iocp = (struct iocblk *)mp->b_rptr;
1510 		switch (iocp->ioc_cmd) {
1511 		case ND_SET:
1512 		case ND_GET:
1513 			if (nd_getset(q, ahstack->ipsecah_g_nd, mp)) {
1514 				qreply(q, mp);
1515 				return;
1516 			} else {
1517 				iocp->ioc_error = ENOENT;
1518 			}
1519 			/* FALLTHRU */
1520 		default:
1521 			/* We really don't support any other ioctls, do we? */
1522 
1523 			/* Return EINVAL */
1524 			if (iocp->ioc_error != ENOENT)
1525 				iocp->ioc_error = EINVAL;
1526 			iocp->ioc_count = 0;
1527 			mp->b_datap->db_type = M_IOCACK;
1528 			qreply(q, mp);
1529 			return;
1530 		}
1531 	default:
1532 		ah3dbg(ahstack,
1533 		    ("Got default message, type %d, passing to IP.\n",
1534 		    mp->b_datap->db_type));
1535 		putnext(q, mp);
1536 	}
1537 }
1538 
1539 /*
1540  * Updating use times can be tricky business if the ipsa_haspeer flag is
1541  * set.  This function is called once in an SA's lifetime.
1542  *
1543  * Caller has to REFRELE "assoc" which is passed in.  This function has
1544  * to REFRELE any peer SA that is obtained.
1545  */
1546 static void
1547 ah_set_usetime(ipsa_t *assoc, boolean_t inbound)
1548 {
1549 	ipsa_t *inassoc, *outassoc;
1550 	isaf_t *bucket;
1551 	sadb_t *sp;
1552 	int outhash;
1553 	boolean_t isv6;
1554 	netstack_t	*ns = assoc->ipsa_netstack;
1555 	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
1556 
1557 	/* No peer?  No problem! */
1558 	if (!assoc->ipsa_haspeer) {
1559 		sadb_set_usetime(assoc);
1560 		return;
1561 	}
1562 
1563 	/*
1564 	 * Otherwise, we want to grab both the original assoc and its peer.
1565 	 * There might be a race for this, but if it's a real race, the times
1566 	 * will be out-of-synch by at most a second, and since our time
1567 	 * granularity is a second, this won't be a problem.
1568 	 *
1569 	 * If we need tight synchronization on the peer SA, then we need to
1570 	 * reconsider.
1571 	 */
1572 
1573 	/* Use address family to select IPv6/IPv4 */
1574 	isv6 = (assoc->ipsa_addrfam == AF_INET6);
1575 	if (isv6) {
1576 		sp = &ahstack->ah_sadb.s_v6;
1577 	} else {
1578 		sp = &ahstack->ah_sadb.s_v4;
1579 		ASSERT(assoc->ipsa_addrfam == AF_INET);
1580 	}
1581 	if (inbound) {
1582 		inassoc = assoc;
1583 		if (isv6)
1584 			outhash = OUTBOUND_HASH_V6(sp,
1585 			    *((in6_addr_t *)&inassoc->ipsa_dstaddr));
1586 		else
1587 			outhash = OUTBOUND_HASH_V4(sp,
1588 			    *((ipaddr_t *)&inassoc->ipsa_dstaddr));
1589 		bucket = &sp->sdb_of[outhash];
1590 
1591 		mutex_enter(&bucket->isaf_lock);
1592 		outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi,
1593 		    inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr,
1594 		    inassoc->ipsa_addrfam);
1595 		mutex_exit(&bucket->isaf_lock);
1596 		if (outassoc == NULL) {
1597 			/* Q: Do we wish to set haspeer == B_FALSE? */
1598 			ah0dbg(("ah_set_usetime: "
1599 			    "can't find peer for inbound.\n"));
1600 			sadb_set_usetime(inassoc);
1601 			return;
1602 		}
1603 	} else {
1604 		outassoc = assoc;
1605 		bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi);
1606 		mutex_enter(&bucket->isaf_lock);
1607 		inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi,
1608 		    outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr,
1609 		    outassoc->ipsa_addrfam);
1610 		mutex_exit(&bucket->isaf_lock);
1611 		if (inassoc == NULL) {
1612 			/* Q: Do we wish to set haspeer == B_FALSE? */
1613 			ah0dbg(("ah_set_usetime: "
1614 			    "can't find peer for outbound.\n"));
1615 			sadb_set_usetime(outassoc);
1616 			return;
1617 		}
1618 	}
1619 
1620 	/* Update usetime on both. */
1621 	sadb_set_usetime(inassoc);
1622 	sadb_set_usetime(outassoc);
1623 
1624 	/*
1625 	 * REFRELE any peer SA.
1626 	 *
1627 	 * Because of the multi-line macro nature of IPSA_REFRELE, keep
1628 	 * them in { }.
1629 	 */
1630 	if (inbound) {
1631 		IPSA_REFRELE(outassoc);
1632 	} else {
1633 		IPSA_REFRELE(inassoc);
1634 	}
1635 }
1636 
1637 /*
1638  * Add a number of bytes to what the SA has protected so far.  Return
1639  * B_TRUE if the SA can still protect that many bytes.
1640  *
1641  * Caller must REFRELE the passed-in assoc.  This function must REFRELE
1642  * any obtained peer SA.
1643  */
1644 static boolean_t
1645 ah_age_bytes(ipsa_t *assoc, uint64_t bytes, boolean_t inbound)
1646 {
1647 	ipsa_t *inassoc, *outassoc;
1648 	isaf_t *bucket;
1649 	boolean_t inrc, outrc, isv6;
1650 	sadb_t *sp;
1651 	int outhash;
1652 	netstack_t	*ns = assoc->ipsa_netstack;
1653 	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
1654 
1655 	/* No peer?  No problem! */
1656 	if (!assoc->ipsa_haspeer) {
1657 		return (sadb_age_bytes(ahstack->ah_pfkey_q, assoc, bytes,
1658 		    B_TRUE));
1659 	}
1660 
1661 	/*
1662 	 * Otherwise, we want to grab both the original assoc and its peer.
1663 	 * There might be a race for this, but if it's a real race, two
1664 	 * expire messages may occur.  We limit this by only sending the
1665 	 * expire message on one of the peers, we'll pick the inbound
1666 	 * arbitrarily.
1667 	 *
1668 	 * If we need tight synchronization on the peer SA, then we need to
1669 	 * reconsider.
1670 	 */
1671 
1672 	/* Pick v4/v6 bucket based on addrfam. */
1673 	isv6 = (assoc->ipsa_addrfam == AF_INET6);
1674 	if (isv6) {
1675 		sp = &ahstack->ah_sadb.s_v6;
1676 	} else {
1677 		sp = &ahstack->ah_sadb.s_v4;
1678 		ASSERT(assoc->ipsa_addrfam == AF_INET);
1679 	}
1680 	if (inbound) {
1681 		inassoc = assoc;
1682 		if (isv6)
1683 			outhash = OUTBOUND_HASH_V6(sp,
1684 			    *((in6_addr_t *)&inassoc->ipsa_dstaddr));
1685 		else
1686 			outhash = OUTBOUND_HASH_V4(sp,
1687 			    *((ipaddr_t *)&inassoc->ipsa_dstaddr));
1688 		bucket = &sp->sdb_of[outhash];
1689 		mutex_enter(&bucket->isaf_lock);
1690 		outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi,
1691 		    inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr,
1692 		    inassoc->ipsa_addrfam);
1693 		mutex_exit(&bucket->isaf_lock);
1694 		if (outassoc == NULL) {
1695 			/* Q: Do we wish to set haspeer == B_FALSE? */
1696 			ah0dbg(("ah_age_bytes: "
1697 			    "can't find peer for inbound.\n"));
1698 			return (sadb_age_bytes(ahstack->ah_pfkey_q, inassoc,
1699 			    bytes, B_TRUE));
1700 		}
1701 	} else {
1702 		outassoc = assoc;
1703 		bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi);
1704 		mutex_enter(&bucket->isaf_lock);
1705 		inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi,
1706 		    outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr,
1707 		    outassoc->ipsa_addrfam);
1708 		mutex_exit(&bucket->isaf_lock);
1709 		if (inassoc == NULL) {
1710 			/* Q: Do we wish to set haspeer == B_FALSE? */
1711 			ah0dbg(("ah_age_bytes: "
1712 			    "can't find peer for outbound.\n"));
1713 			return (sadb_age_bytes(ahstack->ah_pfkey_q, outassoc,
1714 			    bytes, B_TRUE));
1715 		}
1716 	}
1717 
1718 	inrc = sadb_age_bytes(ahstack->ah_pfkey_q, inassoc, bytes, B_TRUE);
1719 	outrc = sadb_age_bytes(ahstack->ah_pfkey_q, outassoc, bytes, B_FALSE);
1720 
1721 	/*
1722 	 * REFRELE any peer SA.
1723 	 *
1724 	 * Because of the multi-line macro nature of IPSA_REFRELE, keep
1725 	 * them in { }.
1726 	 */
1727 	if (inbound) {
1728 		IPSA_REFRELE(outassoc);
1729 	} else {
1730 		IPSA_REFRELE(inassoc);
1731 	}
1732 
1733 	return (inrc && outrc);
1734 }
1735 
1736 /*
1737  * Perform the really difficult work of inserting the proposed situation.
1738  * Called while holding the algorithm lock.
1739  */
1740 static void
1741 ah_insert_prop(sadb_prop_t *prop, ipsacq_t *acqrec, uint_t combs)
1742 {
1743 	sadb_comb_t *comb = (sadb_comb_t *)(prop + 1);
1744 	ipsec_out_t *io;
1745 	ipsec_action_t *ap;
1746 	ipsec_prot_t *prot;
1747 	ipsecah_stack_t	*ahstack;
1748 	netstack_t	*ns;
1749 	ipsec_stack_t	*ipss;
1750 
1751 	io = (ipsec_out_t *)acqrec->ipsacq_mp->b_rptr;
1752 	ASSERT(io->ipsec_out_type == IPSEC_OUT);
1753 
1754 	ns = io->ipsec_out_ns;
1755 	ipss = ns->netstack_ipsec;
1756 	ahstack = ns->netstack_ipsecah;
1757 	ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock));
1758 
1759 	prop->sadb_prop_exttype = SADB_EXT_PROPOSAL;
1760 	prop->sadb_prop_len = SADB_8TO64(sizeof (sadb_prop_t));
1761 	*(uint32_t *)(&prop->sadb_prop_replay) = 0;	/* Quick zero-out! */
1762 
1763 	prop->sadb_prop_replay = ahstack->ipsecah_replay_size;
1764 
1765 	/*
1766 	 * Based upon algorithm properties, and what-not, prioritize a
1767 	 * proposal, based on the ordering of the ah algorithms in the
1768 	 * alternatives presented in the policy rule passed down
1769 	 * through the ipsec_out_t and attached to the acquire record.
1770 	 */
1771 
1772 	for (ap = acqrec->ipsacq_act; ap != NULL;
1773 	    ap = ap->ipa_next) {
1774 		ipsec_alginfo_t *aalg;
1775 
1776 		if ((ap->ipa_act.ipa_type != IPSEC_POLICY_APPLY) ||
1777 		    (!ap->ipa_act.ipa_apply.ipp_use_ah))
1778 			continue;
1779 
1780 		prot = &ap->ipa_act.ipa_apply;
1781 
1782 		ASSERT(prot->ipp_auth_alg > 0);
1783 
1784 		aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH]
1785 		    [prot->ipp_auth_alg];
1786 		if (aalg == NULL || !ALG_VALID(aalg))
1787 			continue;
1788 
1789 		/* XXX check aalg for duplicates??.. */
1790 
1791 		comb->sadb_comb_flags = 0;
1792 		comb->sadb_comb_reserved = 0;
1793 		comb->sadb_comb_encrypt = 0;
1794 		comb->sadb_comb_encrypt_minbits = 0;
1795 		comb->sadb_comb_encrypt_maxbits = 0;
1796 
1797 		comb->sadb_comb_auth = aalg->alg_id;
1798 		comb->sadb_comb_auth_minbits =
1799 		    MAX(prot->ipp_ah_minbits, aalg->alg_ef_minbits);
1800 		comb->sadb_comb_auth_maxbits =
1801 		    MIN(prot->ipp_ah_maxbits, aalg->alg_ef_maxbits);
1802 
1803 		/*
1804 		 * The following may be based on algorithm
1805 		 * properties, but in the meantime, we just pick
1806 		 * some good, sensible numbers.  Key mgmt. can
1807 		 * (and perhaps should) be the place to finalize
1808 		 * such decisions.
1809 		 */
1810 
1811 		/*
1812 		 * No limits on allocations, since we really don't
1813 		 * support that concept currently.
1814 		 */
1815 		comb->sadb_comb_soft_allocations = 0;
1816 		comb->sadb_comb_hard_allocations = 0;
1817 
1818 		/*
1819 		 * These may want to come from policy rule..
1820 		 */
1821 		comb->sadb_comb_soft_bytes =
1822 		    ahstack->ipsecah_default_soft_bytes;
1823 		comb->sadb_comb_hard_bytes =
1824 		    ahstack->ipsecah_default_hard_bytes;
1825 		comb->sadb_comb_soft_addtime =
1826 		    ahstack->ipsecah_default_soft_addtime;
1827 		comb->sadb_comb_hard_addtime =
1828 		    ahstack->ipsecah_default_hard_addtime;
1829 		comb->sadb_comb_soft_usetime =
1830 		    ahstack->ipsecah_default_soft_usetime;
1831 		comb->sadb_comb_hard_usetime =
1832 		    ahstack->ipsecah_default_hard_usetime;
1833 
1834 		prop->sadb_prop_len += SADB_8TO64(sizeof (*comb));
1835 		if (--combs == 0)
1836 			return;	/* out of space.. */
1837 		comb++;
1838 	}
1839 }
1840 
1841 /*
1842  * Prepare and actually send the SADB_ACQUIRE message to PF_KEY.
1843  */
1844 static void
1845 ah_send_acquire(ipsacq_t *acqrec, mblk_t *extended, netstack_t *ns)
1846 {
1847 	uint_t combs;
1848 	sadb_msg_t *samsg;
1849 	sadb_prop_t *prop;
1850 	mblk_t *pfkeymp, *msgmp;
1851 	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
1852 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
1853 
1854 	AH_BUMP_STAT(ahstack, acquire_requests);
1855 
1856 	if (ahstack->ah_pfkey_q == NULL)
1857 		return;
1858 
1859 	/* Set up ACQUIRE. */
1860 	pfkeymp = sadb_setup_acquire(acqrec, SADB_SATYPE_AH,
1861 	    ns->netstack_ipsec);
1862 	if (pfkeymp == NULL) {
1863 		ah0dbg(("sadb_setup_acquire failed.\n"));
1864 		return;
1865 	}
1866 	ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock));
1867 	combs = ipss->ipsec_nalgs[IPSEC_ALG_AUTH];
1868 	msgmp = pfkeymp->b_cont;
1869 	samsg = (sadb_msg_t *)(msgmp->b_rptr);
1870 
1871 	/* Insert proposal here. */
1872 
1873 	prop = (sadb_prop_t *)(((uint64_t *)samsg) + samsg->sadb_msg_len);
1874 	ah_insert_prop(prop, acqrec, combs);
1875 	samsg->sadb_msg_len += prop->sadb_prop_len;
1876 	msgmp->b_wptr += SADB_64TO8(samsg->sadb_msg_len);
1877 
1878 	mutex_exit(&ipss->ipsec_alg_lock);
1879 
1880 	/*
1881 	 * Must mutex_exit() before sending PF_KEY message up, in
1882 	 * order to avoid recursive mutex_enter() if there are no registered
1883 	 * listeners.
1884 	 *
1885 	 * Once I've sent the message, I'm cool anyway.
1886 	 */
1887 	mutex_exit(&acqrec->ipsacq_lock);
1888 	if (extended != NULL) {
1889 		putnext(ahstack->ah_pfkey_q, extended);
1890 	}
1891 	putnext(ahstack->ah_pfkey_q, pfkeymp);
1892 }
1893 
1894 /*
1895  * Handle the SADB_GETSPI message.  Create a larval SA.
1896  */
1897 static void
1898 ah_getspi(mblk_t *mp, keysock_in_t *ksi, ipsecah_stack_t *ahstack)
1899 {
1900 	ipsa_t *newbie, *target;
1901 	isaf_t *outbound, *inbound;
1902 	int rc, diagnostic;
1903 	sadb_sa_t *assoc;
1904 	keysock_out_t *kso;
1905 	uint32_t newspi;
1906 
1907 	/*
1908 	 * Randomly generate a proposed SPI value.
1909 	 */
1910 	(void) random_get_pseudo_bytes((uint8_t *)&newspi, sizeof (uint32_t));
1911 	newbie = sadb_getspi(ksi, newspi, &diagnostic,
1912 	    ahstack->ipsecah_netstack);
1913 
1914 	if (newbie == NULL) {
1915 		sadb_pfkey_error(ahstack->ah_pfkey_q, mp, ENOMEM, diagnostic,
1916 		    ksi->ks_in_serial);
1917 		return;
1918 	} else if (newbie == (ipsa_t *)-1) {
1919 		sadb_pfkey_error(ahstack->ah_pfkey_q, mp, EINVAL, diagnostic,
1920 		    ksi->ks_in_serial);
1921 		return;
1922 	}
1923 
1924 	/*
1925 	 * XXX - We may randomly collide.  We really should recover from this.
1926 	 *	 Unfortunately, that could require spending way-too-much-time
1927 	 *	 in here.  For now, let the user retry.
1928 	 */
1929 
1930 	if (newbie->ipsa_addrfam == AF_INET6) {
1931 		outbound = OUTBOUND_BUCKET_V6(&ahstack->ah_sadb.s_v6,
1932 		    *(uint32_t *)(newbie->ipsa_dstaddr));
1933 		inbound = INBOUND_BUCKET(&ahstack->ah_sadb.s_v6,
1934 		    newbie->ipsa_spi);
1935 	} else {
1936 		outbound = OUTBOUND_BUCKET_V4(&ahstack->ah_sadb.s_v4,
1937 		    *(uint32_t *)(newbie->ipsa_dstaddr));
1938 		inbound = INBOUND_BUCKET(&ahstack->ah_sadb.s_v4,
1939 		    newbie->ipsa_spi);
1940 	}
1941 
1942 	mutex_enter(&outbound->isaf_lock);
1943 	mutex_enter(&inbound->isaf_lock);
1944 
1945 	/*
1946 	 * Check for collisions (i.e. did sadb_getspi() return with something
1947 	 * that already exists?).
1948 	 *
1949 	 * Try outbound first.  Even though SADB_GETSPI is traditionally
1950 	 * for inbound SAs, you never know what a user might do.
1951 	 */
1952 	target = ipsec_getassocbyspi(outbound, newbie->ipsa_spi,
1953 	    newbie->ipsa_srcaddr, newbie->ipsa_dstaddr, newbie->ipsa_addrfam);
1954 	if (target == NULL) {
1955 		target = ipsec_getassocbyspi(inbound, newbie->ipsa_spi,
1956 		    newbie->ipsa_srcaddr, newbie->ipsa_dstaddr,
1957 		    newbie->ipsa_addrfam);
1958 	}
1959 
1960 	/*
1961 	 * I don't have collisions elsewhere!
1962 	 * (Nor will I because I'm still holding inbound/outbound locks.)
1963 	 */
1964 
1965 	if (target != NULL) {
1966 		rc = EEXIST;
1967 		IPSA_REFRELE(target);
1968 	} else {
1969 		/*
1970 		 * sadb_insertassoc() also checks for collisions, so
1971 		 * if there's a colliding larval entry, rc will be set
1972 		 * to EEXIST.
1973 		 */
1974 		rc = sadb_insertassoc(newbie, inbound);
1975 		newbie->ipsa_hardexpiretime = gethrestime_sec();
1976 		newbie->ipsa_hardexpiretime += ahstack->ipsecah_larval_timeout;
1977 	}
1978 
1979 	/*
1980 	 * Can exit outbound mutex.  Hold inbound until we're done with
1981 	 * newbie.
1982 	 */
1983 	mutex_exit(&outbound->isaf_lock);
1984 
1985 	if (rc != 0) {
1986 		mutex_exit(&inbound->isaf_lock);
1987 		IPSA_REFRELE(newbie);
1988 		sadb_pfkey_error(ahstack->ah_pfkey_q, mp, rc,
1989 		    SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
1990 		return;
1991 	}
1992 
1993 	/* Can write here because I'm still holding the bucket lock. */
1994 	newbie->ipsa_type = SADB_SATYPE_AH;
1995 
1996 	/*
1997 	 * Construct successful return message.  We have one thing going
1998 	 * for us in PF_KEY v2.  That's the fact that
1999 	 *	sizeof (sadb_spirange_t) == sizeof (sadb_sa_t)
2000 	 */
2001 	assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SPIRANGE];
2002 	assoc->sadb_sa_exttype = SADB_EXT_SA;
2003 	assoc->sadb_sa_spi = newbie->ipsa_spi;
2004 	*((uint64_t *)(&assoc->sadb_sa_replay)) = 0;
2005 	mutex_exit(&inbound->isaf_lock);
2006 
2007 	/* Convert KEYSOCK_IN to KEYSOCK_OUT. */
2008 	kso = (keysock_out_t *)ksi;
2009 	kso->ks_out_len = sizeof (*kso);
2010 	kso->ks_out_serial = ksi->ks_in_serial;
2011 	kso->ks_out_type = KEYSOCK_OUT;
2012 
2013 	/*
2014 	 * Can safely putnext() to ah_pfkey_q, because this is a turnaround
2015 	 * from the ah_pfkey_q.
2016 	 */
2017 	putnext(ahstack->ah_pfkey_q, mp);
2018 }
2019 
2020 /*
2021  * IPv6 sends up the ICMP errors for validation and the removal of the AH
2022  * header.
2023  */
2024 static ipsec_status_t
2025 ah_icmp_error_v6(mblk_t *ipsec_mp, ipsecah_stack_t *ahstack)
2026 {
2027 	mblk_t *mp;
2028 	ip6_t *ip6h, *oip6h;
2029 	uint16_t hdr_length, ah_length;
2030 	uint8_t *nexthdrp;
2031 	ah_t *ah;
2032 	icmp6_t *icmp6;
2033 	isaf_t *isaf;
2034 	ipsa_t *assoc;
2035 	uint8_t *post_ah_ptr;
2036 	ipsec_stack_t	*ipss = ahstack->ipsecah_netstack->netstack_ipsec;
2037 
2038 	mp = ipsec_mp->b_cont;
2039 	ASSERT(mp->b_datap->db_type == M_CTL);
2040 
2041 	/*
2042 	 * Change the type to M_DATA till we finish pullups.
2043 	 */
2044 	mp->b_datap->db_type = M_DATA;
2045 
2046 	/*
2047 	 * Eat the cost of a pullupmsg() for now.  It makes the rest of this
2048 	 * code far less convoluted.
2049 	 */
2050 	if (!pullupmsg(mp, -1) ||
2051 	    !ip_hdr_length_nexthdr_v6(mp, (ip6_t *)mp->b_rptr, &hdr_length,
2052 	    &nexthdrp) ||
2053 	    mp->b_rptr + hdr_length + sizeof (icmp6_t) + sizeof (ip6_t) +
2054 	    sizeof (ah_t) > mp->b_wptr) {
2055 		IP_AH_BUMP_STAT(ipss, in_discards);
2056 		ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL,
2057 		    DROPPER(ipss, ipds_ah_nomem),
2058 		    &ahstack->ah_dropper);
2059 		return (IPSEC_STATUS_FAILED);
2060 	}
2061 
2062 	oip6h = (ip6_t *)mp->b_rptr;
2063 	icmp6 = (icmp6_t *)((uint8_t *)oip6h + hdr_length);
2064 	ip6h = (ip6_t *)(icmp6 + 1);
2065 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) {
2066 		IP_AH_BUMP_STAT(ipss, in_discards);
2067 		ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL,
2068 		    DROPPER(ipss, ipds_ah_bad_v6_hdrs),
2069 		    &ahstack->ah_dropper);
2070 		return (IPSEC_STATUS_FAILED);
2071 	}
2072 	ah = (ah_t *)((uint8_t *)ip6h + hdr_length);
2073 
2074 	isaf = OUTBOUND_BUCKET_V6(&ahstack->ah_sadb.s_v6, ip6h->ip6_dst);
2075 	mutex_enter(&isaf->isaf_lock);
2076 	assoc = ipsec_getassocbyspi(isaf, ah->ah_spi,
2077 	    (uint32_t *)&ip6h->ip6_src, (uint32_t *)&ip6h->ip6_dst, AF_INET6);
2078 	mutex_exit(&isaf->isaf_lock);
2079 
2080 	if (assoc == NULL) {
2081 		IP_AH_BUMP_STAT(ipss, lookup_failure);
2082 		IP_AH_BUMP_STAT(ipss, in_discards);
2083 		if (ahstack->ipsecah_log_unknown_spi) {
2084 			ipsec_assocfailure(info.mi_idnum, 0, 0,
2085 			    SL_CONSOLE | SL_WARN | SL_ERROR,
2086 			    "Bad ICMP message - No association for the "
2087 			    "attached AH header whose spi is 0x%x, "
2088 			    "sender is 0x%x\n",
2089 			    ah->ah_spi, &oip6h->ip6_src, AF_INET6,
2090 			    ahstack->ipsecah_netstack);
2091 		}
2092 		ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL,
2093 		    DROPPER(ipss, ipds_ah_no_sa),
2094 		    &ahstack->ah_dropper);
2095 		return (IPSEC_STATUS_FAILED);
2096 	}
2097 
2098 	IPSA_REFRELE(assoc);
2099 
2100 	/*
2101 	 * There seems to be a valid association. If there is enough of AH
2102 	 * header remove it, otherwise bail.  One could check whether it has
2103 	 * complete AH header plus 8 bytes but it does not make sense if an
2104 	 * icmp error is returned for ICMP messages e.g ICMP time exceeded,
2105 	 * that are being sent up. Let the caller figure out.
2106 	 *
2107 	 * NOTE: ah_length is the number of 32 bit words minus 2.
2108 	 */
2109 	ah_length = (ah->ah_length << 2) + 8;
2110 	post_ah_ptr = (uint8_t *)ah + ah_length;
2111 
2112 	if (post_ah_ptr > mp->b_wptr) {
2113 		IP_AH_BUMP_STAT(ipss, in_discards);
2114 		ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL,
2115 		    DROPPER(ipss, ipds_ah_bad_length),
2116 		    &ahstack->ah_dropper);
2117 		return (IPSEC_STATUS_FAILED);
2118 	}
2119 
2120 	ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - ah_length);
2121 	*nexthdrp = ah->ah_nexthdr;
2122 	ovbcopy(post_ah_ptr, ah,
2123 	    (size_t)((uintptr_t)mp->b_wptr - (uintptr_t)post_ah_ptr));
2124 	mp->b_wptr -= ah_length;
2125 	/* Rewhack to be an ICMP error. */
2126 	mp->b_datap->db_type = M_CTL;
2127 
2128 	return (IPSEC_STATUS_SUCCESS);
2129 }
2130 
2131 /*
2132  * IP sends up the ICMP errors for validation and the removal of
2133  * the AH header.
2134  */
2135 static ipsec_status_t
2136 ah_icmp_error_v4(mblk_t *ipsec_mp, ipsecah_stack_t *ahstack)
2137 {
2138 	mblk_t *mp;
2139 	mblk_t *mp1;
2140 	icmph_t *icmph;
2141 	int iph_hdr_length;
2142 	int hdr_length;
2143 	isaf_t *hptr;
2144 	ipsa_t *assoc;
2145 	int ah_length;
2146 	ipha_t *ipha;
2147 	ipha_t *oipha;
2148 	ah_t *ah;
2149 	uint32_t length;
2150 	int alloc_size;
2151 	uint8_t nexthdr;
2152 	ipsec_stack_t	*ipss = ahstack->ipsecah_netstack->netstack_ipsec;
2153 
2154 	mp = ipsec_mp->b_cont;
2155 	ASSERT(mp->b_datap->db_type == M_CTL);
2156 
2157 	/*
2158 	 * Change the type to M_DATA till we finish pullups.
2159 	 */
2160 	mp->b_datap->db_type = M_DATA;
2161 
2162 	oipha = ipha = (ipha_t *)mp->b_rptr;
2163 	iph_hdr_length = IPH_HDR_LENGTH(ipha);
2164 	icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
2165 
2166 	ipha = (ipha_t *)&icmph[1];
2167 	hdr_length = IPH_HDR_LENGTH(ipha);
2168 
2169 	/*
2170 	 * See if we have enough to locate the SPI
2171 	 */
2172 	if ((uchar_t *)ipha + hdr_length + 8 > mp->b_wptr) {
2173 		if (!pullupmsg(mp, (uchar_t *)ipha + hdr_length + 8 -
2174 		    mp->b_rptr)) {
2175 			ipsec_rl_strlog(ahstack->ipsecah_netstack,
2176 			    info.mi_idnum, 0, 0,
2177 			    SL_WARN | SL_ERROR,
2178 			    "ICMP error: Small AH header\n");
2179 			IP_AH_BUMP_STAT(ipss, in_discards);
2180 			ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL,
2181 			    DROPPER(ipss, ipds_ah_bad_length),
2182 			    &ahstack->ah_dropper);
2183 			return (IPSEC_STATUS_FAILED);
2184 		}
2185 		icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
2186 		ipha = (ipha_t *)&icmph[1];
2187 	}
2188 
2189 	ah = (ah_t *)((uint8_t *)ipha + hdr_length);
2190 	nexthdr = ah->ah_nexthdr;
2191 
2192 	hptr = OUTBOUND_BUCKET_V4(&ahstack->ah_sadb.s_v4, ipha->ipha_dst);
2193 	mutex_enter(&hptr->isaf_lock);
2194 	assoc = ipsec_getassocbyspi(hptr, ah->ah_spi,
2195 	    (uint32_t *)&ipha->ipha_src, (uint32_t *)&ipha->ipha_dst, AF_INET);
2196 	mutex_exit(&hptr->isaf_lock);
2197 
2198 	if (assoc == NULL) {
2199 		IP_AH_BUMP_STAT(ipss, lookup_failure);
2200 		IP_AH_BUMP_STAT(ipss, in_discards);
2201 		if (ahstack->ipsecah_log_unknown_spi) {
2202 			ipsec_assocfailure(info.mi_idnum, 0, 0,
2203 			    SL_CONSOLE | SL_WARN | SL_ERROR,
2204 			    "Bad ICMP message - No association for the "
2205 			    "attached AH header whose spi is 0x%x, "
2206 			    "sender is 0x%x\n",
2207 			    ah->ah_spi, &oipha->ipha_src, AF_INET,
2208 			    ahstack->ipsecah_netstack);
2209 		}
2210 		ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL,
2211 		    DROPPER(ipss, ipds_ah_no_sa),
2212 		    &ahstack->ah_dropper);
2213 		return (IPSEC_STATUS_FAILED);
2214 	}
2215 
2216 	IPSA_REFRELE(assoc);
2217 	/*
2218 	 * There seems to be a valid association. If there
2219 	 * is enough of AH header remove it, otherwise remove
2220 	 * as much as possible and send it back. One could check
2221 	 * whether it has complete AH header plus 8 bytes but it
2222 	 * does not make sense if an icmp error is returned for
2223 	 * ICMP messages e.g ICMP time exceeded, that are being
2224 	 * sent up. Let the caller figure out.
2225 	 *
2226 	 * NOTE: ah_length is the number of 32 bit words minus 2.
2227 	 */
2228 	ah_length = (ah->ah_length << 2) + 8;
2229 
2230 	if ((uchar_t *)ipha + hdr_length + ah_length > mp->b_wptr) {
2231 		if (mp->b_cont == NULL) {
2232 			/*
2233 			 * There is nothing to pullup. Just remove as
2234 			 * much as possible. This is a common case for
2235 			 * IPV4.
2236 			 */
2237 			ah_length = (mp->b_wptr - ((uchar_t *)ipha +
2238 			    hdr_length));
2239 			goto done;
2240 		}
2241 		/* Pullup the full ah header */
2242 		if (!pullupmsg(mp, (uchar_t *)ah + ah_length - mp->b_rptr)) {
2243 			/*
2244 			 * pullupmsg could have failed if there was not
2245 			 * enough to pullup or memory allocation failed.
2246 			 * We tried hard, give up now.
2247 			 */
2248 			IP_AH_BUMP_STAT(ipss, in_discards);
2249 			ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL,
2250 			    DROPPER(ipss, ipds_ah_nomem),
2251 			    &ahstack->ah_dropper);
2252 			return (IPSEC_STATUS_FAILED);
2253 		}
2254 		icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
2255 		ipha = (ipha_t *)&icmph[1];
2256 	}
2257 done:
2258 	/*
2259 	 * Remove the AH header and change the protocol.
2260 	 * Don't update the spi fields in the ipsec_in
2261 	 * message as we are called just to validate the
2262 	 * message attached to the ICMP message.
2263 	 *
2264 	 * If we never pulled up since all of the message
2265 	 * is in one single mblk, we can't remove the AH header
2266 	 * by just setting the b_wptr to the beginning of the
2267 	 * AH header. We need to allocate a mblk that can hold
2268 	 * up until the inner IP header and copy them.
2269 	 */
2270 	alloc_size = iph_hdr_length + sizeof (icmph_t) + hdr_length;
2271 
2272 	if ((mp1 = allocb(alloc_size, BPRI_LO)) == NULL) {
2273 		IP_AH_BUMP_STAT(ipss, in_discards);
2274 		ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL,
2275 		    DROPPER(ipss, ipds_ah_nomem),
2276 		    &ahstack->ah_dropper);
2277 		return (IPSEC_STATUS_FAILED);
2278 	}
2279 	/* ICMP errors are M_CTL messages */
2280 	mp1->b_datap->db_type = M_CTL;
2281 	ipsec_mp->b_cont = mp1;
2282 	bcopy(mp->b_rptr, mp1->b_rptr, alloc_size);
2283 	mp1->b_wptr += alloc_size;
2284 
2285 	/*
2286 	 * Skip whatever we have copied and as much of AH header
2287 	 * possible. If we still have something left in the original
2288 	 * message, tag on.
2289 	 */
2290 	mp->b_rptr = (uchar_t *)ipha + hdr_length + ah_length;
2291 
2292 	if (mp->b_rptr != mp->b_wptr) {
2293 		mp1->b_cont = mp;
2294 	} else {
2295 		if (mp->b_cont != NULL)
2296 			mp1->b_cont = mp->b_cont;
2297 		freeb(mp);
2298 	}
2299 
2300 	ipha = (ipha_t *)(mp1->b_rptr + iph_hdr_length + sizeof (icmph_t));
2301 	ipha->ipha_protocol = nexthdr;
2302 	length = ntohs(ipha->ipha_length);
2303 	length -= ah_length;
2304 	ipha->ipha_length = htons((uint16_t)length);
2305 	ipha->ipha_hdr_checksum = 0;
2306 	ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha);
2307 
2308 	return (IPSEC_STATUS_SUCCESS);
2309 }
2310 
2311 /*
2312  * IP calls this to validate the ICMP errors that
2313  * we got from the network.
2314  */
2315 ipsec_status_t
2316 ipsecah_icmp_error(mblk_t *mp)
2317 {
2318 	ipsec_in_t *ii = (ipsec_in_t *)mp->b_rptr;
2319 	netstack_t	*ns = ii->ipsec_in_ns;
2320 	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
2321 
2322 	if (ii->ipsec_in_v4)
2323 		return (ah_icmp_error_v4(mp, ahstack));
2324 	else
2325 		return (ah_icmp_error_v6(mp, ahstack));
2326 }
2327 
2328 static int
2329 ah_fix_tlv_options_v6(uint8_t *oi_opt, uint8_t *pi_opt, uint_t ehdrlen,
2330     uint8_t hdr_type, boolean_t copy_always)
2331 {
2332 	uint8_t opt_type;
2333 	uint_t optlen;
2334 
2335 	ASSERT(hdr_type == IPPROTO_DSTOPTS || hdr_type == IPPROTO_HOPOPTS);
2336 
2337 	/*
2338 	 * Copy the next header and hdr ext. len of the HOP-by-HOP
2339 	 * and Destination option.
2340 	 */
2341 	*pi_opt++ = *oi_opt++;
2342 	*pi_opt++ = *oi_opt++;
2343 	ehdrlen -= 2;
2344 
2345 	/*
2346 	 * Now handle all the TLV encoded options.
2347 	 */
2348 	while (ehdrlen != 0) {
2349 		opt_type = *oi_opt;
2350 
2351 		if (opt_type == IP6OPT_PAD1) {
2352 			optlen = 1;
2353 		} else {
2354 			if (ehdrlen < 2)
2355 				goto bad_opt;
2356 			optlen = 2 + oi_opt[1];
2357 			if (optlen > ehdrlen)
2358 				goto bad_opt;
2359 		}
2360 		if (copy_always || !(opt_type & IP6OPT_MUTABLE)) {
2361 			bcopy(oi_opt, pi_opt, optlen);
2362 		} else {
2363 			if (optlen == 1) {
2364 				*pi_opt = 0;
2365 			} else {
2366 				/*
2367 				 * Copy the type and data length fields.
2368 				 * Zero the option data by skipping
2369 				 * option type and option data len
2370 				 * fields.
2371 				 */
2372 				*pi_opt = *oi_opt;
2373 				*(pi_opt + 1) = *(oi_opt + 1);
2374 				bzero(pi_opt + 2, optlen - 2);
2375 			}
2376 		}
2377 		ehdrlen -= optlen;
2378 		oi_opt += optlen;
2379 		pi_opt += optlen;
2380 	}
2381 	return (0);
2382 bad_opt:
2383 	return (-1);
2384 }
2385 
2386 /*
2387  * Construct a pseudo header for AH, processing all the options.
2388  *
2389  * oip6h is the IPv6 header of the incoming or outgoing packet.
2390  * ip6h is the pointer to the pseudo headers IPV6 header. All
2391  * the space needed for the options have been allocated including
2392  * the AH header.
2393  *
2394  * If copy_always is set, all the options that appear before AH are copied
2395  * blindly without checking for IP6OPT_MUTABLE. This is used by
2396  * ah_auth_out_done().  Please refer to that function for details.
2397  *
2398  * NOTE :
2399  *
2400  * *  AH header is never copied in this function even if copy_always
2401  *    is set. It just returns the ah_offset - offset of the AH header
2402  *    and the caller needs to do the copying. This is done so that we
2403  *    don't have pass extra arguments e.g. SA etc. and also,
2404  *    it is not needed when ah_auth_out_done is calling this function.
2405  */
2406 static uint_t
2407 ah_fix_phdr_v6(ip6_t *ip6h, ip6_t *oip6h, boolean_t outbound,
2408     boolean_t copy_always)
2409 {
2410 	uint8_t	*oi_opt;
2411 	uint8_t	*pi_opt;
2412 	uint8_t nexthdr;
2413 	uint8_t *prev_nexthdr;
2414 	ip6_hbh_t *hbhhdr;
2415 	ip6_dest_t *dsthdr = NULL;
2416 	ip6_rthdr0_t *rthdr;
2417 	int ehdrlen;
2418 	ah_t *ah;
2419 	int ret;
2420 
2421 	/*
2422 	 * In the outbound case for source route, ULP has already moved
2423 	 * the first hop, which is now in ip6_dst. We need to re-arrange
2424 	 * the header to make it look like how it would appear in the
2425 	 * receiver i.e
2426 	 *
2427 	 * Because of ip_massage_options_v6 the header looks like
2428 	 * this :
2429 	 *
2430 	 * ip6_src = S, ip6_dst = I1. followed by I2,I3,D.
2431 	 *
2432 	 * When it reaches the receiver, it would look like
2433 	 *
2434 	 * ip6_src = S, ip6_dst = D. followed by I1,I2,I3.
2435 	 *
2436 	 * NOTE : We assume that there are no problems with the options
2437 	 * as IP should have already checked this.
2438 	 */
2439 
2440 	oi_opt = (uchar_t *)&oip6h[1];
2441 	pi_opt = (uchar_t *)&ip6h[1];
2442 
2443 	/*
2444 	 * We set the prev_nexthdr properly in the pseudo header.
2445 	 * After we finish authentication and come back from the
2446 	 * algorithm module, pseudo header will become the real
2447 	 * IP header.
2448 	 */
2449 	prev_nexthdr = (uint8_t *)&ip6h->ip6_nxt;
2450 	nexthdr = oip6h->ip6_nxt;
2451 	/* Assume IP has already stripped it */
2452 	ASSERT(nexthdr != IPPROTO_FRAGMENT && nexthdr != IPPROTO_RAW);
2453 	ah = NULL;
2454 	dsthdr = NULL;
2455 	for (;;) {
2456 		switch (nexthdr) {
2457 		case IPPROTO_HOPOPTS:
2458 			hbhhdr = (ip6_hbh_t *)oi_opt;
2459 			nexthdr = hbhhdr->ip6h_nxt;
2460 			ehdrlen = 8 * (hbhhdr->ip6h_len + 1);
2461 			ret = ah_fix_tlv_options_v6(oi_opt, pi_opt, ehdrlen,
2462 			    IPPROTO_HOPOPTS, copy_always);
2463 			/*
2464 			 * Return a zero offset indicating error if there
2465 			 * was error.
2466 			 */
2467 			if (ret == -1)
2468 				return (0);
2469 			hbhhdr = (ip6_hbh_t *)pi_opt;
2470 			prev_nexthdr = (uint8_t *)&hbhhdr->ip6h_nxt;
2471 			break;
2472 		case IPPROTO_ROUTING:
2473 			rthdr = (ip6_rthdr0_t *)oi_opt;
2474 			nexthdr = rthdr->ip6r0_nxt;
2475 			ehdrlen = 8 * (rthdr->ip6r0_len + 1);
2476 			if (!copy_always && outbound) {
2477 				int i, left;
2478 				ip6_rthdr0_t *prthdr;
2479 				in6_addr_t *ap, *pap;
2480 
2481 				left = rthdr->ip6r0_segleft;
2482 				prthdr = (ip6_rthdr0_t *)pi_opt;
2483 				pap = (in6_addr_t *)(prthdr + 1);
2484 				ap = (in6_addr_t *)(rthdr + 1);
2485 				/*
2486 				 * First eight bytes except seg_left
2487 				 * does not change en route.
2488 				 */
2489 				bcopy(oi_opt, pi_opt, 8);
2490 				prthdr->ip6r0_segleft = 0;
2491 				/*
2492 				 * First address has been moved to
2493 				 * the destination address of the
2494 				 * ip header by ip_massage_options_v6.
2495 				 * And the real destination address is
2496 				 * in the last address part of the
2497 				 * option.
2498 				 */
2499 				*pap = oip6h->ip6_dst;
2500 				for (i = 1; i < left - 1; i++)
2501 					pap[i] = ap[i - 1];
2502 				ip6h->ip6_dst = *(ap + left - 1);
2503 			} else {
2504 				bcopy(oi_opt, pi_opt, ehdrlen);
2505 			}
2506 			rthdr = (ip6_rthdr0_t *)pi_opt;
2507 			prev_nexthdr = (uint8_t *)&rthdr->ip6r0_nxt;
2508 			break;
2509 		case IPPROTO_DSTOPTS:
2510 			/*
2511 			 * Destination options are tricky.  If there is
2512 			 * a terminal (e.g. non-IPv6-extension) header
2513 			 * following the destination options, don't
2514 			 * reset prev_nexthdr or advance the AH insertion
2515 			 * point and just treat this as a terminal header.
2516 			 *
2517 			 * If this is an inbound packet, just deal with
2518 			 * it as is.
2519 			 */
2520 			dsthdr = (ip6_dest_t *)oi_opt;
2521 			/*
2522 			 * XXX I hope common-subexpression elimination
2523 			 * saves us the double-evaluate.
2524 			 */
2525 			if (outbound && dsthdr->ip6d_nxt != IPPROTO_ROUTING &&
2526 			    dsthdr->ip6d_nxt != IPPROTO_HOPOPTS)
2527 				goto terminal_hdr;
2528 			nexthdr = dsthdr->ip6d_nxt;
2529 			ehdrlen = 8 * (dsthdr->ip6d_len + 1);
2530 			ret = ah_fix_tlv_options_v6(oi_opt, pi_opt, ehdrlen,
2531 			    IPPROTO_DSTOPTS, copy_always);
2532 			/*
2533 			 * Return a zero offset indicating error if there
2534 			 * was error.
2535 			 */
2536 			if (ret == -1)
2537 				return (0);
2538 			break;
2539 		case IPPROTO_AH:
2540 			/*
2541 			 * Be conservative in what you send.  We shouldn't
2542 			 * see two same-scoped AH's in one packet.
2543 			 * (Inner-IP-scoped AH will be hit by terminal
2544 			 * header of IP or IPv6.)
2545 			 */
2546 			ASSERT(!outbound);
2547 			return ((uint_t)(pi_opt - (uint8_t *)ip6h));
2548 		default:
2549 			ASSERT(outbound);
2550 terminal_hdr:
2551 			*prev_nexthdr = IPPROTO_AH;
2552 			ah = (ah_t *)pi_opt;
2553 			ah->ah_nexthdr = nexthdr;
2554 			return ((uint_t)(pi_opt - (uint8_t *)ip6h));
2555 		}
2556 		pi_opt += ehdrlen;
2557 		oi_opt += ehdrlen;
2558 	}
2559 	/* NOTREACHED */
2560 }
2561 
2562 static boolean_t
2563 ah_finish_up(ah_t *phdr_ah, ah_t *inbound_ah, ipsa_t *assoc,
2564     int ah_data_sz, int ah_align_sz, ipsecah_stack_t *ahstack)
2565 {
2566 	int i;
2567 
2568 	/*
2569 	 * Padding :
2570 	 *
2571 	 * 1) Authentication data may have to be padded
2572 	 * before ICV calculation if ICV is not a multiple
2573 	 * of 64 bits. This padding is arbitrary and transmitted
2574 	 * with the packet at the end of the authentication data.
2575 	 * Payload length should include the padding bytes.
2576 	 *
2577 	 * 2) Explicit padding of the whole datagram may be
2578 	 * required by the algorithm which need not be
2579 	 * transmitted. It is assumed that this will be taken
2580 	 * care by the algorithm module.
2581 	 */
2582 	bzero(phdr_ah + 1, ah_data_sz);	/* Zero out ICV for pseudo-hdr. */
2583 
2584 	if (inbound_ah == NULL) {
2585 		/* Outbound AH datagram. */
2586 
2587 		phdr_ah->ah_length = (ah_align_sz >> 2) + 1;
2588 		phdr_ah->ah_reserved = 0;
2589 		phdr_ah->ah_spi = assoc->ipsa_spi;
2590 
2591 		phdr_ah->ah_replay =
2592 		    htonl(atomic_add_32_nv(&assoc->ipsa_replay, 1));
2593 		if (phdr_ah->ah_replay == 0 && assoc->ipsa_replay_wsize != 0) {
2594 			/*
2595 			 * XXX We have replay counter wrapping.  We probably
2596 			 * want to nuke this SA (and its peer).
2597 			 */
2598 			ipsec_assocfailure(info.mi_idnum, 0, 0,
2599 			    SL_ERROR | SL_CONSOLE | SL_WARN,
2600 			    "Outbound AH SA (0x%x), dst %s has wrapped "
2601 			    "sequence.\n", phdr_ah->ah_spi,
2602 			    assoc->ipsa_dstaddr, assoc->ipsa_addrfam,
2603 			    ahstack->ipsecah_netstack);
2604 
2605 			sadb_replay_delete(assoc);
2606 			/* Caller will free phdr_mp and return NULL. */
2607 			return (B_FALSE);
2608 		}
2609 
2610 		if (ah_data_sz != ah_align_sz) {
2611 			uchar_t *pad = ((uchar_t *)phdr_ah + sizeof (ah_t) +
2612 			    ah_data_sz);
2613 
2614 			for (i = 0; i < (ah_align_sz - ah_data_sz); i++) {
2615 				pad[i] = (uchar_t)i;	/* Fill the padding */
2616 			}
2617 		}
2618 	} else {
2619 		/* Inbound AH datagram. */
2620 		phdr_ah->ah_nexthdr = inbound_ah->ah_nexthdr;
2621 		phdr_ah->ah_length = inbound_ah->ah_length;
2622 		phdr_ah->ah_reserved = 0;
2623 		ASSERT(inbound_ah->ah_spi == assoc->ipsa_spi);
2624 		phdr_ah->ah_spi = inbound_ah->ah_spi;
2625 		phdr_ah->ah_replay = inbound_ah->ah_replay;
2626 
2627 		if (ah_data_sz != ah_align_sz) {
2628 			uchar_t *opad = ((uchar_t *)inbound_ah +
2629 			    sizeof (ah_t) + ah_data_sz);
2630 			uchar_t *pad = ((uchar_t *)phdr_ah + sizeof (ah_t) +
2631 			    ah_data_sz);
2632 
2633 			for (i = 0; i < (ah_align_sz - ah_data_sz); i++) {
2634 				pad[i] = opad[i];	/* Copy the padding */
2635 			}
2636 		}
2637 	}
2638 
2639 	return (B_TRUE);
2640 }
2641 
2642 /*
2643  * Called upon failing the inbound ICV check. The message passed as
2644  * argument is freed.
2645  */
2646 static void
2647 ah_log_bad_auth(mblk_t *ipsec_in)
2648 {
2649 	mblk_t *mp = ipsec_in->b_cont->b_cont;
2650 	ipsec_in_t *ii = (ipsec_in_t *)ipsec_in->b_rptr;
2651 	boolean_t isv4 = ii->ipsec_in_v4;
2652 	ipsa_t *assoc = ii->ipsec_in_ah_sa;
2653 	int af;
2654 	void *addr;
2655 	netstack_t	*ns = ii->ipsec_in_ns;
2656 	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
2657 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
2658 
2659 	mp->b_rptr -= ii->ipsec_in_skip_len;
2660 
2661 	if (isv4) {
2662 		ipha_t *ipha = (ipha_t *)mp->b_rptr;
2663 		addr = &ipha->ipha_dst;
2664 		af = AF_INET;
2665 	} else {
2666 		ip6_t *ip6h = (ip6_t *)mp->b_rptr;
2667 		addr = &ip6h->ip6_dst;
2668 		af = AF_INET6;
2669 	}
2670 
2671 	/*
2672 	 * Log the event. Don't print to the console, block
2673 	 * potential denial-of-service attack.
2674 	 */
2675 	AH_BUMP_STAT(ahstack, bad_auth);
2676 
2677 	ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN,
2678 	    "AH Authentication failed spi %x, dst_addr %s",
2679 	    assoc->ipsa_spi, addr, af, ahstack->ipsecah_netstack);
2680 
2681 	IP_AH_BUMP_STAT(ipss, in_discards);
2682 	ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL,
2683 	    DROPPER(ipss, ipds_ah_bad_auth),
2684 	    &ahstack->ah_dropper);
2685 }
2686 
2687 /*
2688  * Kernel crypto framework callback invoked after completion of async
2689  * crypto requests.
2690  */
2691 static void
2692 ah_kcf_callback(void *arg, int status)
2693 {
2694 	mblk_t *ipsec_mp = (mblk_t *)arg;
2695 	ipsec_in_t *ii = (ipsec_in_t *)ipsec_mp->b_rptr;
2696 	boolean_t is_inbound = (ii->ipsec_in_type == IPSEC_IN);
2697 	netstackid_t	stackid;
2698 	netstack_t	*ns, *ns_arg;
2699 	ipsec_stack_t	*ipss;
2700 	ipsecah_stack_t	*ahstack;
2701 	ipsec_out_t	*io = (ipsec_out_t *)ii;
2702 
2703 	ASSERT(ipsec_mp->b_cont != NULL);
2704 
2705 	if (is_inbound) {
2706 		stackid = ii->ipsec_in_stackid;
2707 		ns_arg = ii->ipsec_in_ns;
2708 	} else {
2709 		stackid = io->ipsec_out_stackid;
2710 		ns_arg = io->ipsec_out_ns;
2711 	}
2712 	/*
2713 	 * Verify that the netstack is still around; could have vanished
2714 	 * while kEf was doing its work.
2715 	 */
2716 	ns = netstack_find_by_stackid(stackid);
2717 	if (ns == NULL || ns != ns_arg) {
2718 		/* Disappeared on us */
2719 		if (ns != NULL)
2720 			netstack_rele(ns);
2721 		freemsg(ipsec_mp);
2722 		return;
2723 	}
2724 
2725 	ahstack = ns->netstack_ipsecah;
2726 	ipss = ns->netstack_ipsec;
2727 
2728 	if (status == CRYPTO_SUCCESS) {
2729 		if (is_inbound) {
2730 			if (ah_auth_in_done(ipsec_mp) != IPSEC_STATUS_SUCCESS) {
2731 				netstack_rele(ns);
2732 				return;
2733 			}
2734 			/* finish IPsec processing */
2735 			ip_fanout_proto_again(ipsec_mp, NULL, NULL, NULL);
2736 		} else {
2737 			ipha_t *ipha;
2738 
2739 			if (ah_auth_out_done(ipsec_mp) !=
2740 			    IPSEC_STATUS_SUCCESS) {
2741 				netstack_rele(ns);
2742 				return;
2743 			}
2744 
2745 			/* finish IPsec processing */
2746 			ipha = (ipha_t *)ipsec_mp->b_cont->b_rptr;
2747 			if (IPH_HDR_VERSION(ipha) == IP_VERSION) {
2748 				ip_wput_ipsec_out(NULL, ipsec_mp, ipha, NULL,
2749 				    NULL);
2750 			} else {
2751 				ip6_t *ip6h = (ip6_t *)ipha;
2752 				ip_wput_ipsec_out_v6(NULL, ipsec_mp, ip6h,
2753 				    NULL, NULL);
2754 			}
2755 		}
2756 
2757 	} else if (status == CRYPTO_INVALID_MAC) {
2758 		ah_log_bad_auth(ipsec_mp);
2759 	} else {
2760 		ah1dbg(ahstack, ("ah_kcf_callback: crypto failed with 0x%x\n",
2761 		    status));
2762 		AH_BUMP_STAT(ahstack, crypto_failures);
2763 		if (is_inbound)
2764 			IP_AH_BUMP_STAT(ipss, in_discards);
2765 		else
2766 			AH_BUMP_STAT(ahstack, out_discards);
2767 		ip_drop_packet(ipsec_mp, is_inbound, NULL, NULL,
2768 		    DROPPER(ipss, ipds_ah_crypto_failed),
2769 		    &ahstack->ah_dropper);
2770 	}
2771 	netstack_rele(ns);
2772 }
2773 
2774 /*
2775  * Invoked on kernel crypto failure during inbound and outbound processing.
2776  */
2777 static void
2778 ah_crypto_failed(mblk_t *mp, boolean_t is_inbound, int kef_rc,
2779     ipsecah_stack_t *ahstack)
2780 {
2781 	ipsec_stack_t	*ipss = ahstack->ipsecah_netstack->netstack_ipsec;
2782 
2783 	ah1dbg(ahstack, ("crypto failed for %s AH with 0x%x\n",
2784 	    is_inbound ? "inbound" : "outbound", kef_rc));
2785 	ip_drop_packet(mp, is_inbound, NULL, NULL,
2786 	    DROPPER(ipss, ipds_ah_crypto_failed),
2787 	    &ahstack->ah_dropper);
2788 	AH_BUMP_STAT(ahstack, crypto_failures);
2789 	if (is_inbound)
2790 		IP_AH_BUMP_STAT(ipss, in_discards);
2791 	else
2792 		AH_BUMP_STAT(ahstack, out_discards);
2793 }
2794 
2795 /*
2796  * Helper macros for the ah_submit_req_{inbound,outbound}() functions.
2797  */
2798 
2799 #define	AH_INIT_CALLREQ(_cr, _ipss) {					\
2800 	(_cr)->cr_flag = CRYPTO_SKIP_REQID|CRYPTO_RESTRICTED;		\
2801 	if ((_ipss)->ipsec_algs_exec_mode[IPSEC_ALG_AUTH] == 		\
2802 	    IPSEC_ALGS_EXEC_ASYNC)					\
2803 		(_cr)->cr_flag |= CRYPTO_ALWAYS_QUEUE;			\
2804 	(_cr)->cr_callback_arg = ipsec_mp;				\
2805 	(_cr)->cr_callback_func = ah_kcf_callback;			\
2806 }
2807 
2808 #define	AH_INIT_CRYPTO_DATA(data, msglen, mblk) {			\
2809 	(data)->cd_format = CRYPTO_DATA_MBLK;				\
2810 	(data)->cd_mp = mblk;						\
2811 	(data)->cd_offset = 0;						\
2812 	(data)->cd_length = msglen;					\
2813 }
2814 
2815 #define	AH_INIT_CRYPTO_MAC(mac, icvlen, icvbuf) {			\
2816 	(mac)->cd_format = CRYPTO_DATA_RAW;				\
2817 	(mac)->cd_offset = 0;						\
2818 	(mac)->cd_length = icvlen;					\
2819 	(mac)->cd_raw.iov_base = icvbuf;				\
2820 	(mac)->cd_raw.iov_len = icvlen;					\
2821 }
2822 
2823 /*
2824  * Submit an inbound packet for processing by the crypto framework.
2825  */
2826 static ipsec_status_t
2827 ah_submit_req_inbound(mblk_t *ipsec_mp, size_t skip_len, uint32_t ah_offset,
2828     ipsa_t *assoc)
2829 {
2830 	int kef_rc;
2831 	mblk_t *phdr_mp;
2832 	crypto_call_req_t call_req;
2833 	ipsec_in_t *ii = (ipsec_in_t *)ipsec_mp->b_rptr;
2834 	uint_t icv_len = assoc->ipsa_mac_len;
2835 	crypto_ctx_template_t ctx_tmpl;
2836 	netstack_t	*ns = ii->ipsec_in_ns;
2837 	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
2838 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
2839 
2840 	phdr_mp = ipsec_mp->b_cont;
2841 	ASSERT(phdr_mp != NULL);
2842 	ASSERT(ii->ipsec_in_type == IPSEC_IN);
2843 
2844 	/*
2845 	 * In case kEF queues and calls back, keep netstackid_t for
2846 	 * verification that the IP instance is still around in
2847 	 * ah_kcf_callback().
2848 	 */
2849 	ii->ipsec_in_stackid = ns->netstack_stackid;
2850 
2851 	/* init arguments for the crypto framework */
2852 	AH_INIT_CRYPTO_DATA(&ii->ipsec_in_crypto_data, AH_MSGSIZE(phdr_mp),
2853 	    phdr_mp);
2854 
2855 	AH_INIT_CRYPTO_MAC(&ii->ipsec_in_crypto_mac, icv_len,
2856 	    (char *)phdr_mp->b_cont->b_rptr - skip_len + ah_offset +
2857 	    sizeof (ah_t));
2858 
2859 	AH_INIT_CALLREQ(&call_req, ipss);
2860 
2861 	ii->ipsec_in_skip_len = skip_len;
2862 
2863 	IPSEC_CTX_TMPL(assoc, ipsa_authtmpl, IPSEC_ALG_AUTH, ctx_tmpl);
2864 
2865 	/* call KEF to do the MAC operation */
2866 	kef_rc = crypto_mac_verify(&assoc->ipsa_amech,
2867 	    &ii->ipsec_in_crypto_data, &assoc->ipsa_kcfauthkey, ctx_tmpl,
2868 	    &ii->ipsec_in_crypto_mac, &call_req);
2869 
2870 	switch (kef_rc) {
2871 	case CRYPTO_SUCCESS:
2872 		AH_BUMP_STAT(ahstack, crypto_sync);
2873 		return (ah_auth_in_done(ipsec_mp));
2874 	case CRYPTO_QUEUED:
2875 		/* ah_kcf_callback() will be invoked on completion */
2876 		AH_BUMP_STAT(ahstack, crypto_async);
2877 		return (IPSEC_STATUS_PENDING);
2878 	case CRYPTO_INVALID_MAC:
2879 		AH_BUMP_STAT(ahstack, crypto_sync);
2880 		ah_log_bad_auth(ipsec_mp);
2881 		return (IPSEC_STATUS_FAILED);
2882 	}
2883 
2884 	ah_crypto_failed(ipsec_mp, B_TRUE, kef_rc, ahstack);
2885 	return (IPSEC_STATUS_FAILED);
2886 }
2887 
2888 /*
2889  * Submit an outbound packet for processing by the crypto framework.
2890  */
2891 static ipsec_status_t
2892 ah_submit_req_outbound(mblk_t *ipsec_mp, size_t skip_len, ipsa_t *assoc)
2893 {
2894 	int kef_rc;
2895 	mblk_t *phdr_mp;
2896 	crypto_call_req_t call_req;
2897 	ipsec_out_t *io = (ipsec_out_t *)ipsec_mp->b_rptr;
2898 	uint_t icv_len = assoc->ipsa_mac_len;
2899 	netstack_t	*ns = io->ipsec_out_ns;
2900 	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
2901 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
2902 
2903 	phdr_mp = ipsec_mp->b_cont;
2904 	ASSERT(phdr_mp != NULL);
2905 	ASSERT(io->ipsec_out_type == IPSEC_OUT);
2906 
2907 	/*
2908 	 * In case kEF queues and calls back, keep netstackid_t for
2909 	 * verification that the IP instance is still around in
2910 	 * ah_kcf_callback().
2911 	 */
2912 	io->ipsec_out_stackid = ns->netstack_stackid;
2913 
2914 	/* init arguments for the crypto framework */
2915 	AH_INIT_CRYPTO_DATA(&io->ipsec_out_crypto_data, AH_MSGSIZE(phdr_mp),
2916 	    phdr_mp);
2917 
2918 	AH_INIT_CRYPTO_MAC(&io->ipsec_out_crypto_mac, icv_len,
2919 	    (char *)phdr_mp->b_wptr);
2920 
2921 	AH_INIT_CALLREQ(&call_req, ipss);
2922 
2923 	io->ipsec_out_skip_len = skip_len;
2924 
2925 	ASSERT(io->ipsec_out_ah_sa != NULL);
2926 
2927 	/* call KEF to do the MAC operation */
2928 	kef_rc = crypto_mac(&assoc->ipsa_amech, &io->ipsec_out_crypto_data,
2929 	    &assoc->ipsa_kcfauthkey, assoc->ipsa_authtmpl,
2930 	    &io->ipsec_out_crypto_mac, &call_req);
2931 
2932 	switch (kef_rc) {
2933 	case CRYPTO_SUCCESS:
2934 		AH_BUMP_STAT(ahstack, crypto_sync);
2935 		return (ah_auth_out_done(ipsec_mp));
2936 	case CRYPTO_QUEUED:
2937 		/* ah_kcf_callback() will be invoked on completion */
2938 		AH_BUMP_STAT(ahstack, crypto_async);
2939 		return (IPSEC_STATUS_PENDING);
2940 	}
2941 
2942 	ah_crypto_failed(ipsec_mp, B_FALSE, kef_rc, ahstack);
2943 	return (IPSEC_STATUS_FAILED);
2944 }
2945 
2946 /*
2947  * This function constructs a pseudo header by looking at the IP header
2948  * and options if any. This is called for both outbound and inbound,
2949  * before computing the ICV.
2950  */
2951 static mblk_t *
2952 ah_process_ip_options_v6(mblk_t *mp, ipsa_t *assoc, int *length_to_skip,
2953     uint_t ah_data_sz, boolean_t outbound, ipsecah_stack_t *ahstack)
2954 {
2955 	ip6_t	*ip6h;
2956 	ip6_t	*oip6h;
2957 	mblk_t 	*phdr_mp;
2958 	int option_length;
2959 	uint_t	ah_align_sz;
2960 	uint_t ah_offset;
2961 	int hdr_size;
2962 	ipsec_stack_t	*ipss = ahstack->ipsecah_netstack->netstack_ipsec;
2963 
2964 	/*
2965 	 * Allocate space for the authentication data also. It is
2966 	 * useful both during the ICV calculation where we need to
2967 	 * feed in zeroes and while sending the datagram back to IP
2968 	 * where we will be using the same space.
2969 	 *
2970 	 * We need to allocate space for padding bytes if it is not
2971 	 * a multiple of IPV6_PADDING_ALIGN.
2972 	 *
2973 	 * In addition, we allocate space for the ICV computed by
2974 	 * the kernel crypto framework, saving us a separate kmem
2975 	 * allocation down the road.
2976 	 */
2977 
2978 	ah_align_sz = P2ALIGN(ah_data_sz + IPV6_PADDING_ALIGN - 1,
2979 	    IPV6_PADDING_ALIGN);
2980 
2981 	ASSERT(ah_align_sz >= ah_data_sz);
2982 
2983 	hdr_size = ipsec_ah_get_hdr_size_v6(mp, B_FALSE);
2984 	option_length = hdr_size - IPV6_HDR_LEN;
2985 
2986 	/* This was not included in ipsec_ah_get_hdr_size_v6() */
2987 	hdr_size += (sizeof (ah_t) + ah_align_sz);
2988 
2989 	if (!outbound && (MBLKL(mp) < hdr_size)) {
2990 		/*
2991 		 * We have post-AH header options in a separate mblk,
2992 		 * a pullup is required.
2993 		 */
2994 		if (!pullupmsg(mp, hdr_size))
2995 			return (NULL);
2996 	}
2997 
2998 	if ((phdr_mp = allocb_cred(hdr_size + ah_data_sz,
2999 	    DB_CRED(mp))) == NULL) {
3000 		return (NULL);
3001 	}
3002 
3003 	oip6h = (ip6_t *)mp->b_rptr;
3004 
3005 	/*
3006 	 * Form the basic IP header first. Zero out the header
3007 	 * so that the mutable fields are zeroed out.
3008 	 */
3009 	ip6h = (ip6_t *)phdr_mp->b_rptr;
3010 	bzero(ip6h, sizeof (ip6_t));
3011 	ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
3012 
3013 	if (outbound) {
3014 		/*
3015 		 * Include the size of AH and authentication data.
3016 		 * This is how our recipient would compute the
3017 		 * authentication data. Look at what we do in the
3018 		 * inbound case below.
3019 		 */
3020 		ip6h->ip6_plen = htons(ntohs(oip6h->ip6_plen) +
3021 		    sizeof (ah_t) + ah_align_sz);
3022 	} else {
3023 		ip6h->ip6_plen = oip6h->ip6_plen;
3024 	}
3025 
3026 	ip6h->ip6_src = oip6h->ip6_src;
3027 	ip6h->ip6_dst = oip6h->ip6_dst;
3028 
3029 	*length_to_skip = IPV6_HDR_LEN;
3030 	if (option_length == 0) {
3031 		/* Form the AH header */
3032 		ip6h->ip6_nxt = IPPROTO_AH;
3033 		((ah_t *)(ip6h + 1))->ah_nexthdr = oip6h->ip6_nxt;
3034 		ah_offset = *length_to_skip;
3035 	} else {
3036 		ip6h->ip6_nxt = oip6h->ip6_nxt;
3037 		/* option_length does not include the AH header's size */
3038 		*length_to_skip += option_length;
3039 
3040 		ah_offset = ah_fix_phdr_v6(ip6h, oip6h, outbound, B_FALSE);
3041 		if (ah_offset == 0) {
3042 			ip_drop_packet(phdr_mp, !outbound, NULL, NULL,
3043 			    DROPPER(ipss, ipds_ah_bad_v6_hdrs),
3044 			    &ahstack->ah_dropper);
3045 			return (NULL);
3046 		}
3047 	}
3048 
3049 	if (!ah_finish_up(((ah_t *)((uint8_t *)ip6h + ah_offset)),
3050 	    (outbound ? NULL : ((ah_t *)((uint8_t *)oip6h + ah_offset))),
3051 	    assoc, ah_data_sz, ah_align_sz, ahstack)) {
3052 		freeb(phdr_mp);
3053 		/*
3054 		 * Returning NULL will tell the caller to
3055 		 * IPSA_REFELE(), free the memory, etc.
3056 		 */
3057 		return (NULL);
3058 	}
3059 
3060 	phdr_mp->b_wptr = ((uint8_t *)ip6h + ah_offset + sizeof (ah_t) +
3061 	    ah_align_sz);
3062 	if (!outbound)
3063 		*length_to_skip += sizeof (ah_t) + ah_align_sz;
3064 	return (phdr_mp);
3065 }
3066 
3067 /*
3068  * This function constructs a pseudo header by looking at the IP header
3069  * and options if any. This is called for both outbound and inbound,
3070  * before computing the ICV.
3071  */
3072 static mblk_t *
3073 ah_process_ip_options_v4(mblk_t *mp, ipsa_t *assoc, int *length_to_skip,
3074     uint_t ah_data_sz, boolean_t outbound, ipsecah_stack_t *ahstack)
3075 {
3076 	ipoptp_t opts;
3077 	uint32_t option_length;
3078 	ipha_t	*ipha;
3079 	ipha_t	*oipha;
3080 	mblk_t 	*phdr_mp;
3081 	int	 size;
3082 	uchar_t	*optptr;
3083 	uint8_t optval;
3084 	uint8_t optlen;
3085 	ipaddr_t dst;
3086 	uint32_t v_hlen_tos_len;
3087 	int ip_hdr_length;
3088 	uint_t	ah_align_sz;
3089 	uint32_t off;
3090 
3091 #ifdef	_BIG_ENDIAN
3092 #define	V_HLEN	(v_hlen_tos_len >> 24)
3093 #else
3094 #define	V_HLEN	(v_hlen_tos_len & 0xFF)
3095 #endif
3096 
3097 	oipha = (ipha_t *)mp->b_rptr;
3098 	v_hlen_tos_len = ((uint32_t *)oipha)[0];
3099 
3100 	/*
3101 	 * Allocate space for the authentication data also. It is
3102 	 * useful both during the ICV calculation where we need to
3103 	 * feed in zeroes and while sending the datagram back to IP
3104 	 * where we will be using the same space.
3105 	 *
3106 	 * We need to allocate space for padding bytes if it is not
3107 	 * a multiple of IPV4_PADDING_ALIGN.
3108 	 *
3109 	 * In addition, we allocate space for the ICV computed by
3110 	 * the kernel crypto framework, saving us a separate kmem
3111 	 * allocation down the road.
3112 	 */
3113 
3114 	ah_align_sz = P2ALIGN(ah_data_sz + IPV4_PADDING_ALIGN - 1,
3115 	    IPV4_PADDING_ALIGN);
3116 
3117 	ASSERT(ah_align_sz >= ah_data_sz);
3118 
3119 	size = IP_SIMPLE_HDR_LENGTH + sizeof (ah_t) + ah_align_sz +
3120 	    ah_data_sz;
3121 
3122 	if (V_HLEN != IP_SIMPLE_HDR_VERSION) {
3123 		option_length = oipha->ipha_version_and_hdr_length -
3124 		    (uint8_t)((IP_VERSION << 4) +
3125 		    IP_SIMPLE_HDR_LENGTH_IN_WORDS);
3126 		option_length <<= 2;
3127 		size += option_length;
3128 	}
3129 
3130 	if ((phdr_mp = allocb_cred(size, DB_CRED(mp))) == NULL) {
3131 		return (NULL);
3132 	}
3133 
3134 	/*
3135 	 * Form the basic IP header first.
3136 	 */
3137 	ipha = (ipha_t *)phdr_mp->b_rptr;
3138 	ipha->ipha_version_and_hdr_length = oipha->ipha_version_and_hdr_length;
3139 	ipha->ipha_type_of_service = 0;
3140 
3141 	if (outbound) {
3142 		/*
3143 		 * Include the size of AH and authentication data.
3144 		 * This is how our recipient would compute the
3145 		 * authentication data. Look at what we do in the
3146 		 * inbound case below.
3147 		 */
3148 		ipha->ipha_length = ntohs(htons(oipha->ipha_length) +
3149 		    sizeof (ah_t) + ah_align_sz);
3150 	} else {
3151 		ipha->ipha_length = oipha->ipha_length;
3152 	}
3153 
3154 	ipha->ipha_ident = oipha->ipha_ident;
3155 	ipha->ipha_fragment_offset_and_flags = 0;
3156 	ipha->ipha_ttl = 0;
3157 	ipha->ipha_protocol = IPPROTO_AH;
3158 	ipha->ipha_hdr_checksum = 0;
3159 	ipha->ipha_src = oipha->ipha_src;
3160 	ipha->ipha_dst = dst = oipha->ipha_dst;
3161 
3162 	/*
3163 	 * If there is no option to process return now.
3164 	 */
3165 	ip_hdr_length = IP_SIMPLE_HDR_LENGTH;
3166 
3167 	if (V_HLEN == IP_SIMPLE_HDR_VERSION) {
3168 		/* Form the AH header */
3169 		goto ah_hdr;
3170 	}
3171 
3172 	ip_hdr_length += option_length;
3173 
3174 	/*
3175 	 * We have options. In the outbound case for source route,
3176 	 * ULP has already moved the first hop, which is now in
3177 	 * ipha_dst. We need the final destination for the calculation
3178 	 * of authentication data. And also make sure that mutable
3179 	 * and experimental fields are zeroed out in the IP options.
3180 	 */
3181 
3182 	bcopy(&oipha[1], &ipha[1], option_length);
3183 
3184 	for (optval = ipoptp_first(&opts, ipha);
3185 	    optval != IPOPT_EOL;
3186 	    optval = ipoptp_next(&opts)) {
3187 		optptr = opts.ipoptp_cur;
3188 		optlen = opts.ipoptp_len;
3189 		switch (optval) {
3190 		case IPOPT_EXTSEC:
3191 		case IPOPT_COMSEC:
3192 		case IPOPT_RA:
3193 		case IPOPT_SDMDD:
3194 		case IPOPT_SECURITY:
3195 			/*
3196 			 * These options are Immutable, leave them as-is.
3197 			 * Note that IPOPT_NOP is also Immutable, but it
3198 			 * was skipped by ipoptp_next() and thus remains
3199 			 * intact in the header.
3200 			 */
3201 			break;
3202 		case IPOPT_SSRR:
3203 		case IPOPT_LSRR:
3204 			if ((opts.ipoptp_flags & IPOPTP_ERROR) != 0)
3205 				goto bad_ipv4opt;
3206 			/*
3207 			 * These two are mutable and will be zeroed, but
3208 			 * first get the final destination.
3209 			 */
3210 			off = optptr[IPOPT_OFFSET];
3211 			/*
3212 			 * If one of the conditions is true, it means
3213 			 * end of options and dst already has the right
3214 			 * value. So, just fall through.
3215 			 */
3216 			if (!(optlen < IP_ADDR_LEN || off > optlen - 3)) {
3217 				off = optlen - IP_ADDR_LEN;
3218 				bcopy(&optptr[off], &dst, IP_ADDR_LEN);
3219 			}
3220 			/* FALLTHRU */
3221 		case IPOPT_RR:
3222 		case IPOPT_TS:
3223 		case IPOPT_SATID:
3224 		default:
3225 			/*
3226 			 * optlen should include from the beginning of an
3227 			 * option.
3228 			 * NOTE : Stream Identifier Option (SID): RFC 791
3229 			 * shows the bit pattern of optlen as 2 and documents
3230 			 * the length as 4. We assume it to be 2 here.
3231 			 */
3232 			bzero(optptr, optlen);
3233 			break;
3234 		}
3235 	}
3236 
3237 	if ((opts.ipoptp_flags & IPOPTP_ERROR) != 0) {
3238 bad_ipv4opt:
3239 		ah1dbg(ahstack, ("AH : bad IPv4 option"));
3240 		freeb(phdr_mp);
3241 		return (NULL);
3242 	}
3243 
3244 	/*
3245 	 * Don't change ipha_dst for an inbound datagram as it points
3246 	 * to the right value. Only for the outbound with LSRR/SSRR,
3247 	 * because of ip_massage_options called by the ULP, ipha_dst
3248 	 * points to the first hop and we need to use the final
3249 	 * destination for computing the ICV.
3250 	 */
3251 
3252 	if (outbound)
3253 		ipha->ipha_dst = dst;
3254 ah_hdr:
3255 	((ah_t *)((uint8_t *)ipha + ip_hdr_length))->ah_nexthdr =
3256 	    oipha->ipha_protocol;
3257 	if (!ah_finish_up(((ah_t *)((uint8_t *)ipha + ip_hdr_length)),
3258 	    (outbound ? NULL : ((ah_t *)((uint8_t *)oipha + ip_hdr_length))),
3259 	    assoc, ah_data_sz, ah_align_sz, ahstack)) {
3260 		freeb(phdr_mp);
3261 		/*
3262 		 * Returning NULL will tell the caller to IPSA_REFELE(), free
3263 		 * the memory, etc.
3264 		 */
3265 		return (NULL);
3266 	}
3267 
3268 	phdr_mp->b_wptr = ((uchar_t *)ipha + ip_hdr_length +
3269 	    sizeof (ah_t) + ah_align_sz);
3270 
3271 	ASSERT(phdr_mp->b_wptr <= phdr_mp->b_datap->db_lim);
3272 	if (outbound)
3273 		*length_to_skip = ip_hdr_length;
3274 	else
3275 		*length_to_skip = ip_hdr_length + sizeof (ah_t) + ah_align_sz;
3276 	return (phdr_mp);
3277 }
3278 
3279 /*
3280  * Authenticate an outbound datagram. This function is called
3281  * whenever IP sends an outbound datagram that needs authentication.
3282  */
3283 static ipsec_status_t
3284 ah_outbound(mblk_t *ipsec_out)
3285 {
3286 	mblk_t *mp;
3287 	mblk_t *phdr_mp;
3288 	ipsec_out_t *oi;
3289 	ipsa_t *assoc;
3290 	int length_to_skip;
3291 	uint_t ah_align_sz;
3292 	uint_t age_bytes;
3293 	netstack_t	*ns;
3294 	ipsec_stack_t	*ipss;
3295 	ipsecah_stack_t	*ahstack;
3296 
3297 	/*
3298 	 * Construct the chain of mblks
3299 	 *
3300 	 * IPSEC_OUT->PSEUDO_HDR->DATA
3301 	 *
3302 	 * one by one.
3303 	 */
3304 
3305 	ASSERT(ipsec_out->b_datap->db_type == M_CTL);
3306 
3307 	ASSERT(MBLKL(ipsec_out) >= sizeof (ipsec_info_t));
3308 
3309 	mp = ipsec_out->b_cont;
3310 	oi = (ipsec_out_t *)ipsec_out->b_rptr;
3311 	ns = oi->ipsec_out_ns;
3312 	ipss = ns->netstack_ipsec;
3313 	ahstack = ns->netstack_ipsecah;
3314 
3315 	AH_BUMP_STAT(ahstack, out_requests);
3316 
3317 	ASSERT(mp->b_datap->db_type == M_DATA);
3318 
3319 	assoc = oi->ipsec_out_ah_sa;
3320 	ASSERT(assoc != NULL);
3321 
3322 	/*
3323 	 * Age SA according to number of bytes that will be sent after
3324 	 * adding the AH header, ICV, and padding to the packet.
3325 	 */
3326 
3327 	if (oi->ipsec_out_v4) {
3328 		ipha_t *ipha = (ipha_t *)mp->b_rptr;
3329 		ah_align_sz = P2ALIGN(assoc->ipsa_mac_len +
3330 		    IPV4_PADDING_ALIGN - 1, IPV4_PADDING_ALIGN);
3331 		age_bytes = ntohs(ipha->ipha_length) + sizeof (ah_t) +
3332 		    ah_align_sz;
3333 	} else {
3334 		ip6_t *ip6h = (ip6_t *)mp->b_rptr;
3335 		ah_align_sz = P2ALIGN(assoc->ipsa_mac_len +
3336 		    IPV6_PADDING_ALIGN - 1, IPV6_PADDING_ALIGN);
3337 		age_bytes = sizeof (ip6_t) + ntohs(ip6h->ip6_plen) +
3338 		    sizeof (ah_t) + ah_align_sz;
3339 	}
3340 
3341 	if (!ah_age_bytes(assoc, age_bytes, B_FALSE)) {
3342 		/* rig things as if ipsec_getassocbyconn() failed */
3343 		ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN,
3344 		    "AH association 0x%x, dst %s had bytes expire.\n",
3345 		    ntohl(assoc->ipsa_spi), assoc->ipsa_dstaddr, AF_INET,
3346 		    ahstack->ipsecah_netstack);
3347 		freemsg(ipsec_out);
3348 		return (IPSEC_STATUS_FAILED);
3349 	}
3350 
3351 	if (oi->ipsec_out_is_capab_ill) {
3352 		ah3dbg(ahstack, ("ah_outbound: pkt can be accelerated\n"));
3353 		if (oi->ipsec_out_v4)
3354 			return (ah_outbound_accelerated_v4(ipsec_out, assoc));
3355 		else
3356 			return (ah_outbound_accelerated_v6(ipsec_out, assoc));
3357 	}
3358 	AH_BUMP_STAT(ahstack, noaccel);
3359 
3360 	/*
3361 	 * Insert pseudo header:
3362 	 * IPSEC_INFO -> [IP, ULP] => IPSEC_INFO -> [IP, AH, ICV] -> ULP
3363 	 */
3364 
3365 	if (oi->ipsec_out_v4) {
3366 		phdr_mp = ah_process_ip_options_v4(mp, assoc, &length_to_skip,
3367 		    assoc->ipsa_mac_len, B_TRUE, ahstack);
3368 	} else {
3369 		phdr_mp = ah_process_ip_options_v6(mp, assoc, &length_to_skip,
3370 		    assoc->ipsa_mac_len, B_TRUE, ahstack);
3371 	}
3372 
3373 	if (phdr_mp == NULL) {
3374 		AH_BUMP_STAT(ahstack, out_discards);
3375 		ip_drop_packet(ipsec_out, B_FALSE, NULL, NULL,
3376 		    DROPPER(ipss, ipds_ah_bad_v4_opts),
3377 		    &ahstack->ah_dropper);
3378 		return (IPSEC_STATUS_FAILED);
3379 	}
3380 
3381 	ipsec_out->b_cont = phdr_mp;
3382 	phdr_mp->b_cont = mp;
3383 	mp->b_rptr += length_to_skip;
3384 
3385 	/*
3386 	 * At this point ipsec_out points to the IPSEC_OUT, new_mp
3387 	 * points to an mblk containing the pseudo header (IP header,
3388 	 * AH header, and ICV with mutable fields zero'ed out).
3389 	 * mp points to the mblk containing the ULP data. The original
3390 	 * IP header is kept before the ULP data in mp.
3391 	 */
3392 
3393 	/* submit MAC request to KCF */
3394 	return (ah_submit_req_outbound(ipsec_out, length_to_skip, assoc));
3395 }
3396 
3397 static ipsec_status_t
3398 ah_inbound(mblk_t *ipsec_in_mp, void *arg)
3399 {
3400 	mblk_t *data_mp = ipsec_in_mp->b_cont;
3401 	ipsec_in_t *ii = (ipsec_in_t *)ipsec_in_mp->b_rptr;
3402 	ah_t *ah = (ah_t *)arg;
3403 	ipsa_t *assoc = ii->ipsec_in_ah_sa;
3404 	int length_to_skip;
3405 	int ah_length;
3406 	mblk_t *phdr_mp;
3407 	uint32_t ah_offset;
3408 	netstack_t	*ns = ii->ipsec_in_ns;
3409 	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
3410 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
3411 
3412 	ASSERT(assoc != NULL);
3413 
3414 	/*
3415 	 * We may wish to check replay in-range-only here as an optimization.
3416 	 * Include the reality check of ipsa->ipsa_replay >
3417 	 * ipsa->ipsa_replay_wsize for times when it's the first N packets,
3418 	 * where N == ipsa->ipsa_replay_wsize.
3419 	 *
3420 	 * Another check that may come here later is the "collision" check.
3421 	 * If legitimate packets flow quickly enough, this won't be a problem,
3422 	 * but collisions may cause authentication algorithm crunching to
3423 	 * take place when it doesn't need to.
3424 	 */
3425 	if (!sadb_replay_peek(assoc, ah->ah_replay)) {
3426 		AH_BUMP_STAT(ahstack, replay_early_failures);
3427 		IP_AH_BUMP_STAT(ipss, in_discards);
3428 		ip_drop_packet(ipsec_in_mp, B_TRUE, NULL, NULL,
3429 		    DROPPER(ipss, ipds_ah_early_replay),
3430 		    &ahstack->ah_dropper);
3431 		return (IPSEC_STATUS_FAILED);
3432 	}
3433 
3434 	/*
3435 	 * The offset of the AH header can be computed from its pointer
3436 	 * within the data mblk, which was pulled up until the AH header
3437 	 * by ipsec_inbound_ah_sa() during SA selection.
3438 	 */
3439 	ah_offset = (uchar_t *)ah - data_mp->b_rptr;
3440 
3441 	/*
3442 	 * Has this packet already been processed by a hardware
3443 	 * IPsec accelerator?
3444 	 */
3445 	if (ii->ipsec_in_accelerated) {
3446 		ah3dbg(ahstack,
3447 		    ("ah_inbound_v6: pkt processed by ill=%d isv6=%d\n",
3448 		    ii->ipsec_in_ill_index, !ii->ipsec_in_v4));
3449 		return (ah_inbound_accelerated(ipsec_in_mp, ii->ipsec_in_v4,
3450 		    assoc, ah_offset));
3451 	}
3452 	AH_BUMP_STAT(ahstack, noaccel);
3453 
3454 	/*
3455 	 * We need to pullup until the ICV before we call
3456 	 * ah_process_ip_options_v6.
3457 	 */
3458 	ah_length = (ah->ah_length << 2) + 8;
3459 
3460 	/*
3461 	 * NOTE : If we want to use any field of IP/AH header, you need
3462 	 * to re-assign following the pullup.
3463 	 */
3464 	if (((uchar_t *)ah + ah_length) > data_mp->b_wptr) {
3465 		if (!pullupmsg(data_mp, (uchar_t *)ah + ah_length -
3466 		    data_mp->b_rptr)) {
3467 			(void) ipsec_rl_strlog(ns, info.mi_idnum, 0, 0,
3468 			    SL_WARN | SL_ERROR,
3469 			    "ah_inbound: Small AH header\n");
3470 			IP_AH_BUMP_STAT(ipss, in_discards);
3471 			ip_drop_packet(ipsec_in_mp, B_TRUE, NULL, NULL,
3472 			    DROPPER(ipss, ipds_ah_nomem),
3473 			    &ahstack->ah_dropper);
3474 			return (IPSEC_STATUS_FAILED);
3475 		}
3476 	}
3477 
3478 	/*
3479 	 * Insert pseudo header:
3480 	 * IPSEC_INFO -> [IP, ULP] => IPSEC_INFO -> [IP, AH, ICV] -> ULP
3481 	 */
3482 	if (ii->ipsec_in_v4) {
3483 		phdr_mp = ah_process_ip_options_v4(data_mp, assoc,
3484 		    &length_to_skip, assoc->ipsa_mac_len, B_FALSE, ahstack);
3485 	} else {
3486 		phdr_mp = ah_process_ip_options_v6(data_mp, assoc,
3487 		    &length_to_skip, assoc->ipsa_mac_len, B_FALSE, ahstack);
3488 	}
3489 
3490 	if (phdr_mp == NULL) {
3491 		IP_AH_BUMP_STAT(ipss, in_discards);
3492 		ip_drop_packet(ipsec_in_mp, B_TRUE, NULL, NULL,
3493 		    (ii->ipsec_in_v4 ?
3494 		    DROPPER(ipss, ipds_ah_bad_v4_opts) :
3495 		    DROPPER(ipss, ipds_ah_bad_v6_hdrs)),
3496 		    &ahstack->ah_dropper);
3497 		return (IPSEC_STATUS_FAILED);
3498 	}
3499 
3500 	ipsec_in_mp->b_cont = phdr_mp;
3501 	phdr_mp->b_cont = data_mp;
3502 	data_mp->b_rptr += length_to_skip;
3503 
3504 	/* submit request to KCF */
3505 	return (ah_submit_req_inbound(ipsec_in_mp, length_to_skip, ah_offset,
3506 	    assoc));
3507 }
3508 
3509 /*
3510  * ah_inbound_accelerated:
3511  * Called from ah_inbound() to process IPsec packets that have been
3512  * accelerated by hardware.
3513  *
3514  * Basically does what ah_auth_in_done() with some changes since
3515  * no pseudo-headers are involved, i.e. the passed message is a
3516  * IPSEC_INFO->DATA.
3517  *
3518  * It is assumed that only packets that have been successfully
3519  * processed by the adapter come here.
3520  *
3521  * 1. get algorithm structure corresponding to association
3522  * 2. calculate pointers to authentication header and ICV
3523  * 3. compare ICV in AH header with ICV in data attributes
3524  *    3.1 if different:
3525  *	  3.1.1 generate error
3526  *        3.1.2 discard message
3527  *    3.2 if ICV matches:
3528  *	  3.2.1 check replay
3529  *        3.2.2 remove AH header
3530  *        3.2.3 age SA byte
3531  *        3.2.4 send to IP
3532  */
3533 ipsec_status_t
3534 ah_inbound_accelerated(mblk_t *ipsec_in, boolean_t isv4, ipsa_t *assoc,
3535     uint32_t ah_offset)
3536 {
3537 	mblk_t *mp;
3538 	ipha_t *ipha;
3539 	ah_t *ah;
3540 	ipsec_in_t *ii;
3541 	uint32_t icv_len;
3542 	uint32_t align_len;
3543 	uint32_t age_bytes;
3544 	ip6_t *ip6h;
3545 	uint8_t *in_icv;
3546 	mblk_t *hada_mp;
3547 	uint32_t next_hdr;
3548 	da_ipsec_t *hada;
3549 	kstat_named_t *counter;
3550 	ipsecah_stack_t	*ahstack;
3551 	netstack_t	*ns;
3552 	ipsec_stack_t	*ipss;
3553 
3554 	ii = (ipsec_in_t *)ipsec_in->b_rptr;
3555 	ns = ii->ipsec_in_ns;
3556 	ahstack = ns->netstack_ipsecah;
3557 	ipss = ns->netstack_ipsec;
3558 
3559 	mp = ipsec_in->b_cont;
3560 	hada_mp = ii->ipsec_in_da;
3561 	ASSERT(hada_mp != NULL);
3562 	hada = (da_ipsec_t *)hada_mp->b_rptr;
3563 
3564 	AH_BUMP_STAT(ahstack, in_accelerated);
3565 
3566 	/*
3567 	 * We only support one level of decapsulation in hardware, so
3568 	 * nuke the pointer.
3569 	 */
3570 	ii->ipsec_in_da = NULL;
3571 	ii->ipsec_in_accelerated = B_FALSE;
3572 
3573 	/*
3574 	 * Extract ICV length from attributes M_CTL and sanity check
3575 	 * its value. We allow the mblk to be smaller than da_ipsec_t
3576 	 * for a small ICV, as long as the entire ICV fits within the mblk.
3577 	 * Also ensures that the ICV length computed by Provider
3578 	 * corresponds to the ICV length of the algorithm specified by the SA.
3579 	 */
3580 	icv_len = hada->da_icv_len;
3581 	if ((icv_len != assoc->ipsa_mac_len) ||
3582 	    (icv_len > DA_ICV_MAX_LEN) || (MBLKL(hada_mp) <
3583 	    (sizeof (da_ipsec_t) - DA_ICV_MAX_LEN + icv_len))) {
3584 		ah0dbg(("ah_inbound_accelerated: "
3585 		    "ICV len (%u) incorrect or mblk too small (%u)\n",
3586 		    icv_len, (uint32_t)(MBLKL(hada_mp))));
3587 		counter = DROPPER(ipss, ipds_ah_bad_length);
3588 		goto ah_in_discard;
3589 	}
3590 	ASSERT(icv_len != 0);
3591 
3592 	/* compute the padded AH ICV len */
3593 	if (isv4) {
3594 		ipha = (ipha_t *)mp->b_rptr;
3595 		align_len = (icv_len + IPV4_PADDING_ALIGN - 1) &
3596 		    -IPV4_PADDING_ALIGN;
3597 	} else {
3598 		ip6h = (ip6_t *)mp->b_rptr;
3599 		align_len = (icv_len + IPV6_PADDING_ALIGN - 1) &
3600 		    -IPV6_PADDING_ALIGN;
3601 	}
3602 
3603 	ah = (ah_t *)(mp->b_rptr + ah_offset);
3604 	in_icv = (uint8_t *)ah + sizeof (ah_t);
3605 
3606 	/* compare ICV in AH header vs ICV computed by adapter */
3607 	if (bcmp(hada->da_icv, in_icv, icv_len)) {
3608 		int af;
3609 		void *addr;
3610 
3611 		if (isv4) {
3612 			addr = &ipha->ipha_dst;
3613 			af = AF_INET;
3614 		} else {
3615 			addr = &ip6h->ip6_dst;
3616 			af = AF_INET6;
3617 		}
3618 
3619 		/*
3620 		 * Log the event. Don't print to the console, block
3621 		 * potential denial-of-service attack.
3622 		 */
3623 		AH_BUMP_STAT(ahstack, bad_auth);
3624 		ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN,
3625 		    "AH Authentication failed spi %x, dst_addr %s",
3626 		    assoc->ipsa_spi, addr, af, ahstack->ipsecah_netstack);
3627 		counter = DROPPER(ipss, ipds_ah_bad_auth);
3628 		goto ah_in_discard;
3629 	}
3630 
3631 	ah3dbg(ahstack, ("AH succeeded, checking replay\n"));
3632 	AH_BUMP_STAT(ahstack, good_auth);
3633 
3634 	if (!sadb_replay_check(assoc, ah->ah_replay)) {
3635 		int af;
3636 		void *addr;
3637 
3638 		if (isv4) {
3639 			addr = &ipha->ipha_dst;
3640 			af = AF_INET;
3641 		} else {
3642 			addr = &ip6h->ip6_dst;
3643 			af = AF_INET6;
3644 		}
3645 
3646 		/*
3647 		 * Log the event. As of now we print out an event.
3648 		 * Do not print the replay failure number, or else
3649 		 * syslog cannot collate the error messages.  Printing
3650 		 * the replay number that failed (or printing to the
3651 		 * console) opens a denial-of-service attack.
3652 		 */
3653 		AH_BUMP_STAT(ahstack, replay_failures);
3654 		ipsec_assocfailure(info.mi_idnum, 0, 0,
3655 		    SL_ERROR | SL_WARN,
3656 		    "Replay failed for AH spi %x, dst_addr %s",
3657 		    assoc->ipsa_spi, addr, af, ahstack->ipsecah_netstack);
3658 		counter = DROPPER(ipss, ipds_ah_replay);
3659 		goto ah_in_discard;
3660 	}
3661 
3662 	/*
3663 	 * Remove AH header. We do this by copying everything before
3664 	 * the AH header onto the AH header+ICV.
3665 	 */
3666 	/* overwrite AH with what was preceeding it (IP header) */
3667 	next_hdr = ah->ah_nexthdr;
3668 	ovbcopy(mp->b_rptr, mp->b_rptr + sizeof (ah_t) + align_len,
3669 	    ah_offset);
3670 	mp->b_rptr += sizeof (ah_t) + align_len;
3671 	if (isv4) {
3672 		/* adjust IP header next protocol */
3673 		ipha = (ipha_t *)mp->b_rptr;
3674 		ipha->ipha_protocol = next_hdr;
3675 
3676 		age_bytes = ipha->ipha_length;
3677 
3678 		/* adjust length in IP header */
3679 		ipha->ipha_length -= (sizeof (ah_t) + align_len);
3680 
3681 		/* recalculate checksum */
3682 		ipha->ipha_hdr_checksum = 0;
3683 		ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha);
3684 	} else {
3685 		/* adjust IP header next protocol */
3686 		ip6h = (ip6_t *)mp->b_rptr;
3687 		ip6h->ip6_nxt = next_hdr;
3688 
3689 		age_bytes = sizeof (ip6_t) + ntohs(ip6h->ip6_plen) +
3690 		    sizeof (ah_t);
3691 
3692 		/* adjust length in IP header */
3693 		ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) -
3694 		    (sizeof (ah_t) + align_len));
3695 	}
3696 
3697 	/* age SA */
3698 	if (!ah_age_bytes(assoc, age_bytes, B_TRUE)) {
3699 		/* The ipsa has hit hard expiration, LOG and AUDIT. */
3700 		ipsec_assocfailure(info.mi_idnum, 0, 0,
3701 		    SL_ERROR | SL_WARN,
3702 		    "AH Association 0x%x, dst %s had bytes expire.\n",
3703 		    assoc->ipsa_spi, assoc->ipsa_dstaddr,
3704 		    AF_INET, ahstack->ipsecah_netstack);
3705 		AH_BUMP_STAT(ahstack, bytes_expired);
3706 		counter = DROPPER(ipss, ipds_ah_bytes_expire);
3707 		goto ah_in_discard;
3708 	}
3709 
3710 	freeb(hada_mp);
3711 	return (IPSEC_STATUS_SUCCESS);
3712 
3713 ah_in_discard:
3714 	IP_AH_BUMP_STAT(ipss, in_discards);
3715 	freeb(hada_mp);
3716 	ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL, counter,
3717 	    &ahstack->ah_dropper);
3718 	return (IPSEC_STATUS_FAILED);
3719 }
3720 
3721 /*
3722  * ah_outbound_accelerated_v4:
3723  * Called from ah_outbound_v4() and once it is determined that the
3724  * packet is elligible for hardware acceleration.
3725  *
3726  * We proceed as follows:
3727  * 1. allocate and initialize attributes mblk
3728  * 2. mark IPSEC_OUT to indicate that pkt is accelerated
3729  * 3. insert AH header
3730  */
3731 static ipsec_status_t
3732 ah_outbound_accelerated_v4(mblk_t *ipsec_mp, ipsa_t *assoc)
3733 {
3734 	mblk_t *mp, *new_mp;
3735 	ipsec_out_t *oi;
3736 	uint_t ah_data_sz;	/* ICV length, algorithm dependent */
3737 	uint_t ah_align_sz;	/* ICV length + padding */
3738 	uint32_t v_hlen_tos_len; /* from original IP header */
3739 	ipha_t	*oipha;		/* original IP header */
3740 	ipha_t	*nipha;		/* new IP header */
3741 	uint_t option_length = 0;
3742 	uint_t new_hdr_len;	/* new header length */
3743 	uint_t iphdr_length;
3744 	ah_t *ah_hdr;		/* ptr to AH header */
3745 	netstack_t	*ns;
3746 	ipsec_stack_t	*ipss;
3747 	ipsecah_stack_t	*ahstack;
3748 
3749 	oi = (ipsec_out_t *)ipsec_mp->b_rptr;
3750 	ns = oi->ipsec_out_ns;
3751 	ipss = ns->netstack_ipsec;
3752 	ahstack = ns->netstack_ipsecah;
3753 
3754 	mp = ipsec_mp->b_cont;
3755 
3756 	AH_BUMP_STAT(ahstack, out_accelerated);
3757 
3758 	oipha = (ipha_t *)mp->b_rptr;
3759 	v_hlen_tos_len = ((uint32_t *)oipha)[0];
3760 
3761 	/* mark packet as being accelerated in IPSEC_OUT */
3762 	ASSERT(oi->ipsec_out_accelerated == B_FALSE);
3763 	oi->ipsec_out_accelerated = B_TRUE;
3764 
3765 	/* calculate authentication data length, i.e. ICV + padding */
3766 	ah_data_sz = assoc->ipsa_mac_len;
3767 	ah_align_sz = (ah_data_sz + IPV4_PADDING_ALIGN - 1) &
3768 	    -IPV4_PADDING_ALIGN;
3769 
3770 	/*
3771 	 * Insert pseudo header:
3772 	 * IPSEC_INFO -> [IP, ULP] => IPSEC_INFO -> [IP, AH, ICV] -> ULP
3773 	 */
3774 
3775 	/* IP + AH + authentication + padding data length */
3776 	new_hdr_len = IP_SIMPLE_HDR_LENGTH + sizeof (ah_t) + ah_align_sz;
3777 	if (V_HLEN != IP_SIMPLE_HDR_VERSION) {
3778 		option_length = oipha->ipha_version_and_hdr_length -
3779 		    (uint8_t)((IP_VERSION << 4) +
3780 		    IP_SIMPLE_HDR_LENGTH_IN_WORDS);
3781 		option_length <<= 2;
3782 		new_hdr_len += option_length;
3783 	}
3784 
3785 	/* allocate pseudo-header mblk */
3786 	if ((new_mp = allocb(new_hdr_len, BPRI_HI)) == NULL) {
3787 		/* IPsec kstats: bump bean counter here */
3788 		ip_drop_packet(ipsec_mp, B_FALSE, NULL, NULL,
3789 		    DROPPER(ipss, ipds_ah_nomem),
3790 		    &ahstack->ah_dropper);
3791 		return (IPSEC_STATUS_FAILED);
3792 	}
3793 
3794 	new_mp->b_cont = mp;
3795 	ipsec_mp->b_cont = new_mp;
3796 	new_mp->b_wptr += new_hdr_len;
3797 
3798 	/* copy original IP header to new header */
3799 	bcopy(mp->b_rptr, new_mp->b_rptr, IP_SIMPLE_HDR_LENGTH +
3800 	    option_length);
3801 
3802 	/* update IP header */
3803 	nipha = (ipha_t *)new_mp->b_rptr;
3804 	nipha->ipha_protocol = IPPROTO_AH;
3805 	iphdr_length = ntohs(nipha->ipha_length);
3806 	iphdr_length += sizeof (ah_t) + ah_align_sz;
3807 	nipha->ipha_length = htons(iphdr_length);
3808 	nipha->ipha_hdr_checksum = 0;
3809 	nipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(nipha);
3810 
3811 	/* skip original IP header in mp */
3812 	mp->b_rptr += IP_SIMPLE_HDR_LENGTH + option_length;
3813 
3814 	/* initialize AH header */
3815 	ah_hdr = (ah_t *)(new_mp->b_rptr + IP_SIMPLE_HDR_LENGTH +
3816 	    option_length);
3817 	ah_hdr->ah_nexthdr = oipha->ipha_protocol;
3818 	if (!ah_finish_up(ah_hdr, NULL, assoc, ah_data_sz, ah_align_sz,
3819 	    ahstack)) {
3820 		/* Only way this fails is if outbound replay counter wraps. */
3821 		ip_drop_packet(ipsec_mp, B_FALSE, NULL, NULL,
3822 		    DROPPER(ipss, ipds_ah_replay),
3823 		    &ahstack->ah_dropper);
3824 		return (IPSEC_STATUS_FAILED);
3825 	}
3826 
3827 	return (IPSEC_STATUS_SUCCESS);
3828 }
3829 
3830 /*
3831  * ah_outbound_accelerated_v6:
3832  *
3833  * Called from ah_outbound_v6() once it is determined that the packet
3834  * is eligible for hardware acceleration.
3835  *
3836  * We proceed as follows:
3837  * 1. allocate and initialize attributes mblk
3838  * 2. mark IPSEC_OUT to indicate that pkt is accelerated
3839  * 3. insert AH header
3840  */
3841 static ipsec_status_t
3842 ah_outbound_accelerated_v6(mblk_t *ipsec_mp, ipsa_t *assoc)
3843 {
3844 	mblk_t *mp, *phdr_mp;
3845 	ipsec_out_t *oi;
3846 	uint_t ah_data_sz;	/* ICV length, algorithm dependent */
3847 	uint_t ah_align_sz;	/* ICV length + padding */
3848 	ip6_t	*oip6h;		/* original IP header */
3849 	ip6_t	*ip6h;		/* new IP header */
3850 	uint_t option_length = 0;
3851 	uint_t hdr_size;
3852 	uint_t ah_offset;
3853 	ah_t *ah_hdr;		/* ptr to AH header */
3854 	netstack_t	*ns;
3855 	ipsec_stack_t	*ipss;
3856 	ipsecah_stack_t	*ahstack;
3857 
3858 	oi = (ipsec_out_t *)ipsec_mp->b_rptr;
3859 	ns = oi->ipsec_out_ns;
3860 	ipss = ns->netstack_ipsec;
3861 	ahstack = ns->netstack_ipsecah;
3862 
3863 	mp = ipsec_mp->b_cont;
3864 
3865 	AH_BUMP_STAT(ahstack, out_accelerated);
3866 
3867 	oip6h = (ip6_t *)mp->b_rptr;
3868 
3869 	/* mark packet as being accelerated in IPSEC_OUT */
3870 	ASSERT(oi->ipsec_out_accelerated == B_FALSE);
3871 	oi->ipsec_out_accelerated = B_TRUE;
3872 
3873 	/* calculate authentication data length, i.e. ICV + padding */
3874 	ah_data_sz = assoc->ipsa_mac_len;
3875 	ah_align_sz = (ah_data_sz + IPV4_PADDING_ALIGN - 1) &
3876 	    -IPV4_PADDING_ALIGN;
3877 
3878 	ASSERT(ah_align_sz >= ah_data_sz);
3879 
3880 	hdr_size = ipsec_ah_get_hdr_size_v6(mp, B_FALSE);
3881 	option_length = hdr_size - IPV6_HDR_LEN;
3882 
3883 	/* This was not included in ipsec_ah_get_hdr_size_v6() */
3884 	hdr_size += (sizeof (ah_t) + ah_align_sz);
3885 
3886 	if ((phdr_mp = allocb(hdr_size, BPRI_HI)) == NULL) {
3887 		ip_drop_packet(ipsec_mp, B_FALSE, NULL, NULL,
3888 		    DROPPER(ipss, ipds_ah_nomem),
3889 		    &ahstack->ah_dropper);
3890 		return (IPSEC_STATUS_FAILED);
3891 	}
3892 	phdr_mp->b_wptr += hdr_size;
3893 
3894 	/*
3895 	 * Form the basic IP header first.  We always assign every bit
3896 	 * of the v6 basic header, so a separate bzero is unneeded.
3897 	 */
3898 	ip6h = (ip6_t *)phdr_mp->b_rptr;
3899 	ip6h->ip6_vcf = oip6h->ip6_vcf;
3900 	ip6h->ip6_hlim = oip6h->ip6_hlim;
3901 	ip6h->ip6_src = oip6h->ip6_src;
3902 	ip6h->ip6_dst = oip6h->ip6_dst;
3903 	/*
3904 	 * Include the size of AH and authentication data.
3905 	 * This is how our recipient would compute the
3906 	 * authentication data. Look at what we do in the
3907 	 * inbound case below.
3908 	 */
3909 	ip6h->ip6_plen = htons(ntohs(oip6h->ip6_plen) + sizeof (ah_t) +
3910 	    ah_align_sz);
3911 
3912 	/*
3913 	 * Insert pseudo header:
3914 	 * IPSEC_INFO -> [IP6, LLH, ULP] =>
3915 	 *	IPSEC_INFO -> [IP, LLH, AH, ICV] -> ULP
3916 	 */
3917 
3918 	if (option_length == 0) {
3919 		/* Form the AH header */
3920 		ip6h->ip6_nxt = IPPROTO_AH;
3921 		((ah_t *)(ip6h + 1))->ah_nexthdr = oip6h->ip6_nxt;
3922 		ah_offset = IPV6_HDR_LEN;
3923 	} else {
3924 		ip6h->ip6_nxt = oip6h->ip6_nxt;
3925 		/* option_length does not include the AH header's size */
3926 		ah_offset = ah_fix_phdr_v6(ip6h, oip6h, B_TRUE, B_FALSE);
3927 		if (ah_offset == 0) {
3928 			freemsg(phdr_mp);
3929 			ip_drop_packet(ipsec_mp, B_FALSE, NULL, NULL,
3930 			    DROPPER(ipss, ipds_ah_bad_v6_hdrs),
3931 			    &ahstack->ah_dropper);
3932 			return (IPSEC_STATUS_FAILED);
3933 		}
3934 	}
3935 
3936 	phdr_mp->b_cont = mp;
3937 	ipsec_mp->b_cont = phdr_mp;
3938 
3939 	/* skip original IP header in mp */
3940 	mp->b_rptr += IPV6_HDR_LEN + option_length;
3941 
3942 	/* initialize AH header */
3943 	ah_hdr = (ah_t *)(phdr_mp->b_rptr + IPV6_HDR_LEN + option_length);
3944 	ah_hdr->ah_nexthdr = oip6h->ip6_nxt;
3945 
3946 	if (!ah_finish_up(((ah_t *)((uint8_t *)ip6h + ah_offset)), NULL,
3947 	    assoc, ah_data_sz, ah_align_sz, ahstack)) {
3948 		/* Only way this fails is if outbound replay counter wraps. */
3949 		ip_drop_packet(ipsec_mp, B_FALSE, NULL, NULL,
3950 		    DROPPER(ipss, ipds_ah_replay),
3951 		    &ahstack->ah_dropper);
3952 		return (IPSEC_STATUS_FAILED);
3953 	}
3954 
3955 	return (IPSEC_STATUS_SUCCESS);
3956 }
3957 
3958 /*
3959  * Invoked after processing of an inbound packet by the
3960  * kernel crypto framework. Called by ah_submit_req() for a sync request,
3961  * or by the kcf callback for an async request.
3962  * Returns IPSEC_STATUS_SUCCESS on success, IPSEC_STATUS_FAILED on failure.
3963  * On failure, the mblk chain ipsec_in is freed by this function.
3964  */
3965 static ipsec_status_t
3966 ah_auth_in_done(mblk_t *ipsec_in)
3967 {
3968 	mblk_t *phdr_mp;
3969 	ipha_t *ipha;
3970 	uint_t ah_offset = 0;
3971 	mblk_t *mp;
3972 	int align_len, newpos;
3973 	ah_t *ah;
3974 	uint32_t length;
3975 	uint32_t *dest32;
3976 	uint8_t *dest;
3977 	ipsec_in_t *ii;
3978 	boolean_t isv4;
3979 	ip6_t *ip6h;
3980 	uint_t icv_len;
3981 	ipsa_t *assoc;
3982 	kstat_named_t *counter;
3983 	netstack_t	*ns;
3984 	ipsecah_stack_t	*ahstack;
3985 	ipsec_stack_t	*ipss;
3986 
3987 	ii = (ipsec_in_t *)ipsec_in->b_rptr;
3988 	ns = ii->ipsec_in_ns;
3989 	ahstack = ns->netstack_ipsecah;
3990 	ipss = ns->netstack_ipsec;
3991 
3992 	isv4 = ii->ipsec_in_v4;
3993 	assoc = ii->ipsec_in_ah_sa;
3994 	icv_len = (uint_t)ii->ipsec_in_crypto_mac.cd_raw.iov_len;
3995 
3996 	phdr_mp = ipsec_in->b_cont;
3997 	if (phdr_mp == NULL) {
3998 		ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL,
3999 		    DROPPER(ipss, ipds_ah_nomem),
4000 		    &ahstack->ah_dropper);
4001 		return (IPSEC_STATUS_FAILED);
4002 	}
4003 
4004 	mp = phdr_mp->b_cont;
4005 	if (mp == NULL) {
4006 		ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL,
4007 		    DROPPER(ipss, ipds_ah_nomem),
4008 		    &ahstack->ah_dropper);
4009 		return (IPSEC_STATUS_FAILED);
4010 	}
4011 	mp->b_rptr -= ii->ipsec_in_skip_len;
4012 
4013 	ah_set_usetime(assoc, B_TRUE);
4014 
4015 	if (isv4) {
4016 		ipha = (ipha_t *)mp->b_rptr;
4017 		ah_offset = ipha->ipha_version_and_hdr_length -
4018 		    (uint8_t)((IP_VERSION << 4));
4019 		ah_offset <<= 2;
4020 		align_len = P2ALIGN(icv_len + IPV4_PADDING_ALIGN - 1,
4021 		    IPV4_PADDING_ALIGN);
4022 	} else {
4023 		ip6h = (ip6_t *)mp->b_rptr;
4024 		ah_offset = ipsec_ah_get_hdr_size_v6(mp, B_TRUE);
4025 		ASSERT((mp->b_wptr - mp->b_rptr) >= ah_offset);
4026 		align_len = P2ALIGN(icv_len + IPV6_PADDING_ALIGN - 1,
4027 		    IPV6_PADDING_ALIGN);
4028 	}
4029 
4030 	ah = (ah_t *)(mp->b_rptr + ah_offset);
4031 	newpos = sizeof (ah_t) + align_len;
4032 
4033 	/*
4034 	 * We get here only when authentication passed.
4035 	 */
4036 
4037 	ah3dbg(ahstack, ("AH succeeded, checking replay\n"));
4038 	AH_BUMP_STAT(ahstack, good_auth);
4039 
4040 	if (!sadb_replay_check(assoc, ah->ah_replay)) {
4041 		int af;
4042 		void *addr;
4043 
4044 		if (isv4) {
4045 			addr = &ipha->ipha_dst;
4046 			af = AF_INET;
4047 		} else {
4048 			addr = &ip6h->ip6_dst;
4049 			af = AF_INET6;
4050 		}
4051 
4052 		/*
4053 		 * Log the event. As of now we print out an event.
4054 		 * Do not print the replay failure number, or else
4055 		 * syslog cannot collate the error messages.  Printing
4056 		 * the replay number that failed (or printing to the
4057 		 * console) opens a denial-of-service attack.
4058 		 */
4059 		AH_BUMP_STAT(ahstack, replay_failures);
4060 		ipsec_assocfailure(info.mi_idnum, 0, 0,
4061 		    SL_ERROR | SL_WARN,
4062 		    "Replay failed for AH spi %x, dst_addr %s",
4063 		    assoc->ipsa_spi, addr, af, ahstack->ipsecah_netstack);
4064 		counter = DROPPER(ipss, ipds_ah_replay);
4065 		goto ah_in_discard;
4066 	}
4067 
4068 	/*
4069 	 * We need to remove the AH header from the original
4070 	 * datagram. Best way to do this is to move the pre-AH headers
4071 	 * forward in the (relatively simple) IPv4 case.  In IPv6, it's
4072 	 * a bit more complicated because of IPv6's next-header chaining,
4073 	 * but it's doable.
4074 	 */
4075 	if (isv4) {
4076 		/*
4077 		 * Assign the right protocol, adjust the length as we
4078 		 * are removing the AH header and adjust the checksum to
4079 		 * account for the protocol and length.
4080 		 */
4081 		length = ntohs(ipha->ipha_length);
4082 		if (!ah_age_bytes(assoc, length, B_TRUE)) {
4083 			/* The ipsa has hit hard expiration, LOG and AUDIT. */
4084 			ipsec_assocfailure(info.mi_idnum, 0, 0,
4085 			    SL_ERROR | SL_WARN,
4086 			    "AH Association 0x%x, dst %s had bytes expire.\n",
4087 			    assoc->ipsa_spi, assoc->ipsa_dstaddr,
4088 			    AF_INET, ahstack->ipsecah_netstack);
4089 			AH_BUMP_STAT(ahstack, bytes_expired);
4090 			counter = DROPPER(ipss, ipds_ah_bytes_expire);
4091 			goto ah_in_discard;
4092 		}
4093 		ipha->ipha_protocol = ah->ah_nexthdr;
4094 		length -= newpos;
4095 
4096 		ipha->ipha_length = htons((uint16_t)length);
4097 		ipha->ipha_hdr_checksum = 0;
4098 		ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha);
4099 	} else {
4100 		uchar_t *whereptr;
4101 		int hdrlen;
4102 		uint8_t *nexthdr;
4103 		ip6_hbh_t *hbhhdr;
4104 		ip6_dest_t *dsthdr;
4105 		ip6_rthdr0_t *rthdr;
4106 
4107 		/*
4108 		 * Make phdr_mp hold until the AH header and make
4109 		 * mp hold everything past AH header.
4110 		 */
4111 		length = ntohs(ip6h->ip6_plen);
4112 		if (!ah_age_bytes(assoc, length + sizeof (ip6_t), B_TRUE)) {
4113 			/* The ipsa has hit hard expiration, LOG and AUDIT. */
4114 			ipsec_assocfailure(info.mi_idnum, 0, 0,
4115 			    SL_ERROR | SL_WARN,
4116 			    "AH Association 0x%x, dst %s had bytes "
4117 			    "expire.\n", assoc->ipsa_spi, &ip6h->ip6_dst,
4118 			    AF_INET6, ahstack->ipsecah_netstack);
4119 			AH_BUMP_STAT(ahstack, bytes_expired);
4120 			counter = DROPPER(ipss, ipds_ah_bytes_expire);
4121 			goto ah_in_discard;
4122 		}
4123 
4124 		/*
4125 		 * Update the next header field of the header preceding
4126 		 * AH with the next header field of AH. Start with the
4127 		 * IPv6 header and proceed with the extension headers
4128 		 * until we find what we're looking for.
4129 		 */
4130 		nexthdr = &ip6h->ip6_nxt;
4131 		whereptr =  (uchar_t *)ip6h;
4132 		hdrlen = sizeof (ip6_t);
4133 
4134 		while (*nexthdr != IPPROTO_AH) {
4135 			whereptr += hdrlen;
4136 			/* Assume IP has already stripped it */
4137 			ASSERT(*nexthdr != IPPROTO_FRAGMENT &&
4138 			    *nexthdr != IPPROTO_RAW);
4139 			switch (*nexthdr) {
4140 			case IPPROTO_HOPOPTS:
4141 				hbhhdr = (ip6_hbh_t *)whereptr;
4142 				nexthdr = &hbhhdr->ip6h_nxt;
4143 				hdrlen = 8 * (hbhhdr->ip6h_len + 1);
4144 				break;
4145 			case IPPROTO_DSTOPTS:
4146 				dsthdr = (ip6_dest_t *)whereptr;
4147 				nexthdr = &dsthdr->ip6d_nxt;
4148 				hdrlen = 8 * (dsthdr->ip6d_len + 1);
4149 				break;
4150 			case IPPROTO_ROUTING:
4151 				rthdr = (ip6_rthdr0_t *)whereptr;
4152 				nexthdr = &rthdr->ip6r0_nxt;
4153 				hdrlen = 8 * (rthdr->ip6r0_len + 1);
4154 				break;
4155 			}
4156 		}
4157 		*nexthdr = ah->ah_nexthdr;
4158 		length -= newpos;
4159 		ip6h->ip6_plen = htons((uint16_t)length);
4160 	}
4161 
4162 	/* Now that we've fixed the IP header, move it forward. */
4163 	mp->b_rptr += newpos;
4164 	if (IS_P2ALIGNED(mp->b_rptr, sizeof (uint32_t))) {
4165 		dest32 = (uint32_t *)(mp->b_rptr + ah_offset);
4166 		while (--dest32 >= (uint32_t *)mp->b_rptr)
4167 			*dest32 = *(dest32 - (newpos >> 2));
4168 	} else {
4169 		dest = mp->b_rptr + ah_offset;
4170 		while (--dest >= mp->b_rptr)
4171 			*dest = *(dest - newpos);
4172 	}
4173 	freeb(phdr_mp);
4174 	ipsec_in->b_cont = mp;
4175 	return (IPSEC_STATUS_SUCCESS);
4176 
4177 ah_in_discard:
4178 	IP_AH_BUMP_STAT(ipss, in_discards);
4179 	ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL, counter,
4180 	    &ahstack->ah_dropper);
4181 	return (IPSEC_STATUS_FAILED);
4182 }
4183 
4184 /*
4185  * Invoked after processing of an outbound packet by the
4186  * kernel crypto framework, either by ah_submit_req() for a request
4187  * executed syncrhonously, or by the KEF callback for a request
4188  * executed asynchronously.
4189  */
4190 static ipsec_status_t
4191 ah_auth_out_done(mblk_t *ipsec_out)
4192 {
4193 	mblk_t *phdr_mp;
4194 	mblk_t *mp;
4195 	int align_len;
4196 	uint32_t hdrs_length;
4197 	uchar_t *ptr;
4198 	uint32_t length;
4199 	boolean_t isv4;
4200 	ipsec_out_t *io;
4201 	size_t icv_len;
4202 	netstack_t	*ns;
4203 	ipsec_stack_t	*ipss;
4204 	ipsecah_stack_t	*ahstack;
4205 
4206 	io = (ipsec_out_t *)ipsec_out->b_rptr;
4207 	ns = io->ipsec_out_ns;
4208 	ipss = ns->netstack_ipsec;
4209 	ahstack = ns->netstack_ipsecah;
4210 
4211 	isv4 = io->ipsec_out_v4;
4212 	icv_len = io->ipsec_out_crypto_mac.cd_raw.iov_len;
4213 
4214 	phdr_mp = ipsec_out->b_cont;
4215 	if (phdr_mp == NULL) {
4216 		ip_drop_packet(ipsec_out, B_FALSE, NULL, NULL,
4217 		    DROPPER(ipss, ipds_ah_nomem),
4218 		    &ahstack->ah_dropper);
4219 		return (IPSEC_STATUS_FAILED);
4220 	}
4221 
4222 	mp = phdr_mp->b_cont;
4223 	if (mp == NULL) {
4224 		ip_drop_packet(ipsec_out, B_FALSE, NULL, NULL,
4225 		    DROPPER(ipss, ipds_ah_nomem),
4226 		    &ahstack->ah_dropper);
4227 		return (IPSEC_STATUS_FAILED);
4228 	}
4229 	mp->b_rptr -= io->ipsec_out_skip_len;
4230 
4231 	ASSERT(io->ipsec_out_ah_sa != NULL);
4232 	ah_set_usetime(io->ipsec_out_ah_sa, B_FALSE);
4233 
4234 	if (isv4) {
4235 		ipha_t *ipha;
4236 		ipha_t *nipha;
4237 
4238 		ipha = (ipha_t *)mp->b_rptr;
4239 		hdrs_length = ipha->ipha_version_and_hdr_length -
4240 		    (uint8_t)((IP_VERSION << 4));
4241 		hdrs_length <<= 2;
4242 		align_len = P2ALIGN(icv_len + IPV4_PADDING_ALIGN - 1,
4243 		    IPV4_PADDING_ALIGN);
4244 		/*
4245 		 * phdr_mp must have the right amount of space for the
4246 		 * combined IP and AH header. Copy the IP header and
4247 		 * the ack_data onto AH. Note that the AH header was
4248 		 * already formed before the ICV calculation and hence
4249 		 * you don't have to copy it here.
4250 		 */
4251 		bcopy(mp->b_rptr, phdr_mp->b_rptr, hdrs_length);
4252 
4253 		ptr = phdr_mp->b_rptr + hdrs_length + sizeof (ah_t);
4254 		bcopy(phdr_mp->b_wptr, ptr, icv_len);
4255 
4256 		/*
4257 		 * Compute the new header checksum as we are assigning
4258 		 * IPPROTO_AH and adjusting the length here.
4259 		 */
4260 		nipha = (ipha_t *)phdr_mp->b_rptr;
4261 
4262 		nipha->ipha_protocol = IPPROTO_AH;
4263 		length = ntohs(nipha->ipha_length);
4264 		length += (sizeof (ah_t) + align_len);
4265 		nipha->ipha_length = htons((uint16_t)length);
4266 		nipha->ipha_hdr_checksum = 0;
4267 		nipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(nipha);
4268 	} else {
4269 		ip6_t *ip6h;
4270 		ip6_t *nip6h;
4271 		uint_t ah_offset;
4272 
4273 		ip6h = (ip6_t *)mp->b_rptr;
4274 		nip6h = (ip6_t *)phdr_mp->b_rptr;
4275 		align_len = P2ALIGN(icv_len + IPV6_PADDING_ALIGN - 1,
4276 		    IPV6_PADDING_ALIGN);
4277 		/*
4278 		 * phdr_mp must have the right amount of space for the
4279 		 * combined IP and AH header. Copy the IP header with
4280 		 * options into the pseudo header. When we constructed
4281 		 * a pseudo header, we did not copy some of the mutable
4282 		 * fields. We do it now by calling ah_fix_phdr_v6()
4283 		 * with the last argument B_TRUE. It returns the
4284 		 * ah_offset into the pseudo header.
4285 		 */
4286 
4287 		bcopy(ip6h, nip6h, IPV6_HDR_LEN);
4288 		ah_offset = ah_fix_phdr_v6(nip6h, ip6h, B_TRUE, B_TRUE);
4289 		ASSERT(ah_offset != 0);
4290 		/*
4291 		 * phdr_mp can hold exactly the whole IP header with options
4292 		 * plus the AH header also. Thus subtracting the AH header's
4293 		 * size should give exactly how much of the original header
4294 		 * should be skipped.
4295 		 */
4296 		hdrs_length = (phdr_mp->b_wptr - phdr_mp->b_rptr) -
4297 		    sizeof (ah_t) - icv_len;
4298 		bcopy(phdr_mp->b_wptr, ((uint8_t *)nip6h + ah_offset +
4299 		    sizeof (ah_t)), icv_len);
4300 		length = ntohs(nip6h->ip6_plen);
4301 		length += (sizeof (ah_t) + align_len);
4302 		nip6h->ip6_plen = htons((uint16_t)length);
4303 	}
4304 
4305 	/* Skip the original IP header */
4306 	mp->b_rptr += hdrs_length;
4307 	if (mp->b_rptr == mp->b_wptr) {
4308 		phdr_mp->b_cont = mp->b_cont;
4309 		freeb(mp);
4310 	}
4311 
4312 	return (IPSEC_STATUS_SUCCESS);
4313 }
4314 
4315 /*
4316  * Wrapper to allow IP to trigger an AH association failure message
4317  * during SA inbound selection.
4318  */
4319 void
4320 ipsecah_in_assocfailure(mblk_t *mp, char level, ushort_t sl, char *fmt,
4321     uint32_t spi, void *addr, int af, ipsecah_stack_t *ahstack)
4322 {
4323 	ipsec_stack_t	*ipss = ahstack->ipsecah_netstack->netstack_ipsec;
4324 
4325 	if (ahstack->ipsecah_log_unknown_spi) {
4326 		ipsec_assocfailure(info.mi_idnum, 0, level, sl, fmt, spi,
4327 		    addr, af, ahstack->ipsecah_netstack);
4328 	}
4329 
4330 	ip_drop_packet(mp, B_TRUE, NULL, NULL,
4331 	    DROPPER(ipss, ipds_ah_no_sa),
4332 	    &ahstack->ah_dropper);
4333 }
4334 
4335 /*
4336  * Initialize the AH input and output processing functions.
4337  */
4338 void
4339 ipsecah_init_funcs(ipsa_t *sa)
4340 {
4341 	if (sa->ipsa_output_func == NULL)
4342 		sa->ipsa_output_func = ah_outbound;
4343 	if (sa->ipsa_input_func == NULL)
4344 		sa->ipsa_input_func = ah_inbound;
4345 }
4346