xref: /illumos-gate/usr/src/uts/common/inet/ip/sadb.c (revision 1128e05efc1f8d851258698732d30c54ae0fcb69)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/stream.h>
28 #include <sys/stropts.h>
29 #include <sys/errno.h>
30 #include <sys/ddi.h>
31 #include <sys/debug.h>
32 #include <sys/cmn_err.h>
33 #include <sys/stream.h>
34 #include <sys/strlog.h>
35 #include <sys/kmem.h>
36 #include <sys/sunddi.h>
37 #include <sys/tihdr.h>
38 #include <sys/atomic.h>
39 #include <sys/socket.h>
40 #include <sys/sysmacros.h>
41 #include <sys/crypto/common.h>
42 #include <sys/crypto/api.h>
43 #include <sys/zone.h>
44 #include <netinet/in.h>
45 #include <net/if.h>
46 #include <net/pfkeyv2.h>
47 #include <inet/common.h>
48 #include <netinet/ip6.h>
49 #include <inet/ip.h>
50 #include <inet/ip_ire.h>
51 #include <inet/ip6.h>
52 #include <inet/ipsec_info.h>
53 #include <inet/tcp.h>
54 #include <inet/sadb.h>
55 #include <inet/ipsec_impl.h>
56 #include <inet/ipsecah.h>
57 #include <inet/ipsecesp.h>
58 #include <sys/random.h>
59 #include <sys/dlpi.h>
60 #include <sys/iphada.h>
61 #include <inet/ip_if.h>
62 #include <inet/ipdrop.h>
63 #include <inet/ipclassifier.h>
64 #include <inet/sctp_ip.h>
65 #include <inet/tun.h>
66 
67 /*
68  * This source file contains Security Association Database (SADB) common
69  * routines.  They are linked in with the AH module.  Since AH has no chance
70  * of falling under export control, it was safe to link it in there.
71  */
72 
73 static mblk_t *sadb_extended_acquire(ipsec_selector_t *, ipsec_policy_t *,
74     ipsec_action_t *, boolean_t, uint32_t, uint32_t, netstack_t *);
75 static void sadb_ill_df(ill_t *, mblk_t *, isaf_t *, int, boolean_t);
76 static ipsa_t *sadb_torch_assoc(isaf_t *, ipsa_t *, boolean_t, mblk_t **);
77 static void sadb_drain_torchq(queue_t *, mblk_t *);
78 static void sadb_destroy_acqlist(iacqf_t **, uint_t, boolean_t,
79 			    netstack_t *);
80 static void sadb_destroy(sadb_t *, netstack_t *);
81 static mblk_t *sadb_sa2msg(ipsa_t *, sadb_msg_t *);
82 
83 static time_t sadb_add_time(time_t, uint64_t);
84 static void lifetime_fuzz(ipsa_t *);
85 static void age_pair_peer_list(templist_t *, sadb_t *, boolean_t);
86 static void ipsa_set_replay(ipsa_t *ipsa, uint32_t offset);
87 
88 extern void (*cl_inet_getspi)(uint8_t protocol, uint8_t *ptr, size_t len);
89 extern int (*cl_inet_checkspi)(uint8_t protocol, uint32_t spi);
90 extern void (*cl_inet_deletespi)(uint8_t protocol, uint32_t spi);
91 
92 /*
93  * ipsacq_maxpackets is defined here to make it tunable
94  * from /etc/system.
95  */
96 extern uint64_t ipsacq_maxpackets;
97 
98 #define	SET_EXPIRE(sa, delta, exp) {				\
99 	if (((sa)->ipsa_ ## delta) != 0) {				\
100 		(sa)->ipsa_ ## exp = sadb_add_time((sa)->ipsa_addtime,	\
101 			(sa)->ipsa_ ## delta);				\
102 	}								\
103 }
104 
105 #define	UPDATE_EXPIRE(sa, delta, exp) {					\
106 	if (((sa)->ipsa_ ## delta) != 0) {				\
107 		time_t tmp = sadb_add_time((sa)->ipsa_usetime,		\
108 			(sa)->ipsa_ ## delta);				\
109 		if (((sa)->ipsa_ ## exp) == 0)				\
110 			(sa)->ipsa_ ## exp = tmp;			\
111 		else							\
112 			(sa)->ipsa_ ## exp = 				\
113 			    MIN((sa)->ipsa_ ## exp, tmp); 		\
114 	}								\
115 }
116 
117 
118 /* wrap the macro so we can pass it as a function pointer */
119 void
120 sadb_sa_refrele(void *target)
121 {
122 	IPSA_REFRELE(((ipsa_t *)target));
123 }
124 
125 /*
126  * We presume that sizeof (long) == sizeof (time_t) and that time_t is
127  * a signed type.
128  */
129 #define	TIME_MAX LONG_MAX
130 
131 /*
132  * PF_KEY gives us lifetimes in uint64_t seconds.  We presume that
133  * time_t is defined to be a signed type with the same range as
134  * "long".  On ILP32 systems, we thus run the risk of wrapping around
135  * at end of time, as well as "overwrapping" the clock back around
136  * into a seemingly valid but incorrect future date earlier than the
137  * desired expiration.
138  *
139  * In order to avoid odd behavior (either negative lifetimes or loss
140  * of high order bits) when someone asks for bizarrely long SA
141  * lifetimes, we do a saturating add for expire times.
142  *
143  * We presume that ILP32 systems will be past end of support life when
144  * the 32-bit time_t overflows (a dangerous assumption, mind you..).
145  *
146  * On LP64, 2^64 seconds are about 5.8e11 years, at which point we
147  * will hopefully have figured out clever ways to avoid the use of
148  * fixed-sized integers in computation.
149  */
150 static time_t
151 sadb_add_time(time_t base, uint64_t delta)
152 {
153 	time_t sum;
154 
155 	/*
156 	 * Clip delta to the maximum possible time_t value to
157 	 * prevent "overwrapping" back into a shorter-than-desired
158 	 * future time.
159 	 */
160 	if (delta > TIME_MAX)
161 		delta = TIME_MAX;
162 	/*
163 	 * This sum may still overflow.
164 	 */
165 	sum = base + delta;
166 
167 	/*
168 	 * .. so if the result is less than the base, we overflowed.
169 	 */
170 	if (sum < base)
171 		sum = TIME_MAX;
172 
173 	return (sum);
174 }
175 
176 /*
177  * Callers of this function have already created a working security
178  * association, and have found the appropriate table & hash chain.  All this
179  * function does is check duplicates, and insert the SA.  The caller needs to
180  * hold the hash bucket lock and increment the refcnt before insertion.
181  *
182  * Return 0 if success, EEXIST if collision.
183  */
184 #define	SA_UNIQUE_MATCH(sa1, sa2) \
185 	(((sa1)->ipsa_unique_id & (sa1)->ipsa_unique_mask) == \
186 	((sa2)->ipsa_unique_id & (sa2)->ipsa_unique_mask))
187 
188 int
189 sadb_insertassoc(ipsa_t *ipsa, isaf_t *bucket)
190 {
191 	ipsa_t **ptpn = NULL;
192 	ipsa_t *walker;
193 	boolean_t unspecsrc;
194 
195 	ASSERT(MUTEX_HELD(&bucket->isaf_lock));
196 
197 	unspecsrc = IPSA_IS_ADDR_UNSPEC(ipsa->ipsa_srcaddr, ipsa->ipsa_addrfam);
198 
199 	walker = bucket->isaf_ipsa;
200 	ASSERT(walker == NULL || ipsa->ipsa_addrfam == walker->ipsa_addrfam);
201 
202 	/*
203 	 * Find insertion point (pointed to with **ptpn).  Insert at the head
204 	 * of the list unless there's an unspecified source address, then
205 	 * insert it after the last SA with a specified source address.
206 	 *
207 	 * BTW, you'll have to walk the whole chain, matching on {DST, SPI}
208 	 * checking for collisions.
209 	 */
210 
211 	while (walker != NULL) {
212 		if (IPSA_ARE_ADDR_EQUAL(walker->ipsa_dstaddr,
213 		    ipsa->ipsa_dstaddr, ipsa->ipsa_addrfam)) {
214 			if (walker->ipsa_spi == ipsa->ipsa_spi)
215 				return (EEXIST);
216 
217 			mutex_enter(&walker->ipsa_lock);
218 			if (ipsa->ipsa_state == IPSA_STATE_MATURE &&
219 			    (walker->ipsa_flags & IPSA_F_USED) &&
220 			    SA_UNIQUE_MATCH(walker, ipsa)) {
221 				walker->ipsa_flags |= IPSA_F_CINVALID;
222 			}
223 			mutex_exit(&walker->ipsa_lock);
224 		}
225 
226 		if (ptpn == NULL && unspecsrc) {
227 			if (IPSA_IS_ADDR_UNSPEC(walker->ipsa_srcaddr,
228 			    walker->ipsa_addrfam))
229 				ptpn = walker->ipsa_ptpn;
230 			else if (walker->ipsa_next == NULL)
231 				ptpn = &walker->ipsa_next;
232 		}
233 
234 		walker = walker->ipsa_next;
235 	}
236 
237 	if (ptpn == NULL)
238 		ptpn = &bucket->isaf_ipsa;
239 	ipsa->ipsa_next = *ptpn;
240 	ipsa->ipsa_ptpn = ptpn;
241 	if (ipsa->ipsa_next != NULL)
242 		ipsa->ipsa_next->ipsa_ptpn = &ipsa->ipsa_next;
243 	*ptpn = ipsa;
244 	ipsa->ipsa_linklock = &bucket->isaf_lock;
245 
246 	return (0);
247 }
248 #undef SA_UNIQUE_MATCH
249 
250 /*
251  * Free a security association.  Its reference count is 0, which means
252  * I must free it.  The SA must be unlocked and must not be linked into
253  * any fanout list.
254  */
255 static void
256 sadb_freeassoc(ipsa_t *ipsa)
257 {
258 	ipsec_stack_t	*ipss = ipsa->ipsa_netstack->netstack_ipsec;
259 
260 	ASSERT(ipss != NULL);
261 	ASSERT(MUTEX_NOT_HELD(&ipsa->ipsa_lock));
262 	ASSERT(ipsa->ipsa_refcnt == 0);
263 	ASSERT(ipsa->ipsa_next == NULL);
264 	ASSERT(ipsa->ipsa_ptpn == NULL);
265 
266 	ip_drop_packet(sadb_clear_lpkt(ipsa), B_TRUE, NULL, NULL,
267 	    DROPPER(ipss, ipds_sadb_inlarval_timeout),
268 	    &ipss->ipsec_sadb_dropper);
269 
270 	mutex_enter(&ipsa->ipsa_lock);
271 	ipsec_destroy_ctx_tmpl(ipsa, IPSEC_ALG_AUTH);
272 	ipsec_destroy_ctx_tmpl(ipsa, IPSEC_ALG_ENCR);
273 	mutex_exit(&ipsa->ipsa_lock);
274 
275 	/* bzero() these fields for paranoia's sake. */
276 	if (ipsa->ipsa_authkey != NULL) {
277 		bzero(ipsa->ipsa_authkey, ipsa->ipsa_authkeylen);
278 		kmem_free(ipsa->ipsa_authkey, ipsa->ipsa_authkeylen);
279 	}
280 	if (ipsa->ipsa_encrkey != NULL) {
281 		bzero(ipsa->ipsa_encrkey, ipsa->ipsa_encrkeylen);
282 		kmem_free(ipsa->ipsa_encrkey, ipsa->ipsa_encrkeylen);
283 	}
284 	if (ipsa->ipsa_src_cid != NULL) {
285 		IPSID_REFRELE(ipsa->ipsa_src_cid);
286 	}
287 	if (ipsa->ipsa_dst_cid != NULL) {
288 		IPSID_REFRELE(ipsa->ipsa_dst_cid);
289 	}
290 	if (ipsa->ipsa_integ != NULL)
291 		kmem_free(ipsa->ipsa_integ, ipsa->ipsa_integlen);
292 	if (ipsa->ipsa_sens != NULL)
293 		kmem_free(ipsa->ipsa_sens, ipsa->ipsa_senslen);
294 
295 	mutex_destroy(&ipsa->ipsa_lock);
296 	kmem_free(ipsa, sizeof (*ipsa));
297 }
298 
299 /*
300  * Unlink a security association from a hash bucket.  Assume the hash bucket
301  * lock is held, but the association's lock is not.
302  *
303  * Note that we do not bump the bucket's generation number here because
304  * we might not be making a visible change to the set of visible SA's.
305  * All callers MUST bump the bucket's generation number before they unlock
306  * the bucket if they use sadb_unlinkassoc to permanetly remove an SA which
307  * was present in the bucket at the time it was locked.
308  */
309 void
310 sadb_unlinkassoc(ipsa_t *ipsa)
311 {
312 	ASSERT(ipsa->ipsa_linklock != NULL);
313 	ASSERT(MUTEX_HELD(ipsa->ipsa_linklock));
314 
315 	/* These fields are protected by the link lock. */
316 	*(ipsa->ipsa_ptpn) = ipsa->ipsa_next;
317 	if (ipsa->ipsa_next != NULL) {
318 		ipsa->ipsa_next->ipsa_ptpn = ipsa->ipsa_ptpn;
319 		ipsa->ipsa_next = NULL;
320 	}
321 
322 	ipsa->ipsa_ptpn = NULL;
323 
324 	/* This may destroy the SA. */
325 	IPSA_REFRELE(ipsa);
326 }
327 
328 void
329 sadb_delete_cluster(ipsa_t *assoc)
330 {
331 	uint8_t protocol;
332 
333 	if (cl_inet_deletespi &&
334 	    ((assoc->ipsa_state == IPSA_STATE_LARVAL) ||
335 	    (assoc->ipsa_state == IPSA_STATE_MATURE))) {
336 		protocol = (assoc->ipsa_type == SADB_SATYPE_AH) ?
337 		    IPPROTO_AH : IPPROTO_ESP;
338 		cl_inet_deletespi(protocol, assoc->ipsa_spi);
339 	}
340 }
341 
342 /*
343  * Create a larval security association with the specified SPI.	 All other
344  * fields are zeroed.
345  */
346 static ipsa_t *
347 sadb_makelarvalassoc(uint32_t spi, uint32_t *src, uint32_t *dst, int addrfam,
348     netstack_t *ns)
349 {
350 	ipsa_t *newbie;
351 
352 	/*
353 	 * Allocate...
354 	 */
355 
356 	newbie = (ipsa_t *)kmem_zalloc(sizeof (ipsa_t), KM_NOSLEEP);
357 	if (newbie == NULL) {
358 		/* Can't make new larval SA. */
359 		return (NULL);
360 	}
361 
362 	/* Assigned requested SPI, assume caller does SPI allocation magic. */
363 	newbie->ipsa_spi = spi;
364 	newbie->ipsa_netstack = ns;	/* No netstack_hold */
365 
366 	/*
367 	 * Copy addresses...
368 	 */
369 
370 	IPSA_COPY_ADDR(newbie->ipsa_srcaddr, src, addrfam);
371 	IPSA_COPY_ADDR(newbie->ipsa_dstaddr, dst, addrfam);
372 
373 	newbie->ipsa_addrfam = addrfam;
374 
375 	/*
376 	 * Set common initialization values, including refcnt.
377 	 */
378 	mutex_init(&newbie->ipsa_lock, NULL, MUTEX_DEFAULT, NULL);
379 	newbie->ipsa_state = IPSA_STATE_LARVAL;
380 	newbie->ipsa_refcnt = 1;
381 	newbie->ipsa_freefunc = sadb_freeassoc;
382 
383 	/*
384 	 * There aren't a lot of other common initialization values, as
385 	 * they are copied in from the PF_KEY message.
386 	 */
387 
388 	return (newbie);
389 }
390 
391 /*
392  * Call me to initialize a security association fanout.
393  */
394 static int
395 sadb_init_fanout(isaf_t **tablep, uint_t size, int kmflag)
396 {
397 	isaf_t *table;
398 	int i;
399 
400 	table = (isaf_t *)kmem_alloc(size * sizeof (*table), kmflag);
401 	*tablep = table;
402 
403 	if (table == NULL)
404 		return (ENOMEM);
405 
406 	for (i = 0; i < size; i++) {
407 		mutex_init(&(table[i].isaf_lock), NULL, MUTEX_DEFAULT, NULL);
408 		table[i].isaf_ipsa = NULL;
409 		table[i].isaf_gen = 0;
410 	}
411 
412 	return (0);
413 }
414 
415 /*
416  * Call me to initialize an acquire fanout
417  */
418 static int
419 sadb_init_acfanout(iacqf_t **tablep, uint_t size, int kmflag)
420 {
421 	iacqf_t *table;
422 	int i;
423 
424 	table = (iacqf_t *)kmem_alloc(size * sizeof (*table), kmflag);
425 	*tablep = table;
426 
427 	if (table == NULL)
428 		return (ENOMEM);
429 
430 	for (i = 0; i < size; i++) {
431 		mutex_init(&(table[i].iacqf_lock), NULL, MUTEX_DEFAULT, NULL);
432 		table[i].iacqf_ipsacq = NULL;
433 	}
434 
435 	return (0);
436 }
437 
438 /*
439  * Attempt to initialize an SADB instance.  On failure, return ENOMEM;
440  * caller must clean up partial allocations.
441  */
442 static int
443 sadb_init_trial(sadb_t *sp, uint_t size, int kmflag)
444 {
445 	ASSERT(sp->sdb_of == NULL);
446 	ASSERT(sp->sdb_if == NULL);
447 	ASSERT(sp->sdb_acq == NULL);
448 
449 	sp->sdb_hashsize = size;
450 	if (sadb_init_fanout(&sp->sdb_of, size, kmflag) != 0)
451 		return (ENOMEM);
452 	if (sadb_init_fanout(&sp->sdb_if, size, kmflag) != 0)
453 		return (ENOMEM);
454 	if (sadb_init_acfanout(&sp->sdb_acq, size, kmflag) != 0)
455 		return (ENOMEM);
456 
457 	return (0);
458 }
459 
460 /*
461  * Call me to initialize an SADB instance; fall back to default size on failure.
462  */
463 static void
464 sadb_init(const char *name, sadb_t *sp, uint_t size, uint_t ver,
465     netstack_t *ns)
466 {
467 	ASSERT(sp->sdb_of == NULL);
468 	ASSERT(sp->sdb_if == NULL);
469 	ASSERT(sp->sdb_acq == NULL);
470 
471 	if (size < IPSEC_DEFAULT_HASH_SIZE)
472 		size = IPSEC_DEFAULT_HASH_SIZE;
473 
474 	if (sadb_init_trial(sp, size, KM_NOSLEEP) != 0) {
475 
476 		cmn_err(CE_WARN,
477 		    "Unable to allocate %u entry IPv%u %s SADB hash table",
478 		    size, ver, name);
479 
480 		sadb_destroy(sp, ns);
481 		size = IPSEC_DEFAULT_HASH_SIZE;
482 		cmn_err(CE_WARN, "Falling back to %d entries", size);
483 		(void) sadb_init_trial(sp, size, KM_SLEEP);
484 	}
485 }
486 
487 
488 /*
489  * Initialize an SADB-pair.
490  */
491 void
492 sadbp_init(const char *name, sadbp_t *sp, int type, int size, netstack_t *ns)
493 {
494 	sadb_init(name, &sp->s_v4, size, 4, ns);
495 	sadb_init(name, &sp->s_v6, size, 6, ns);
496 
497 	sp->s_satype = type;
498 
499 	ASSERT((type == SADB_SATYPE_AH) || (type == SADB_SATYPE_ESP));
500 	if (type == SADB_SATYPE_AH) {
501 		ipsec_stack_t	*ipss = ns->netstack_ipsec;
502 
503 		ip_drop_register(&ipss->ipsec_sadb_dropper, "IPsec SADB");
504 		sp->s_addflags = AH_ADD_SETTABLE_FLAGS;
505 		sp->s_updateflags = AH_UPDATE_SETTABLE_FLAGS;
506 	} else {
507 		sp->s_addflags = ESP_ADD_SETTABLE_FLAGS;
508 		sp->s_updateflags = ESP_UPDATE_SETTABLE_FLAGS;
509 	}
510 }
511 
512 /*
513  * Deliver a single SADB_DUMP message representing a single SA.  This is
514  * called many times by sadb_dump().
515  *
516  * If the return value of this is ENOBUFS (not the same as ENOMEM), then
517  * the caller should take that as a hint that dupb() on the "original answer"
518  * failed, and that perhaps the caller should try again with a copyb()ed
519  * "original answer".
520  */
521 static int
522 sadb_dump_deliver(queue_t *pfkey_q, mblk_t *original_answer, ipsa_t *ipsa,
523     sadb_msg_t *samsg)
524 {
525 	mblk_t *answer;
526 
527 	answer = dupb(original_answer);
528 	if (answer == NULL)
529 		return (ENOBUFS);
530 	answer->b_cont = sadb_sa2msg(ipsa, samsg);
531 	if (answer->b_cont == NULL) {
532 		freeb(answer);
533 		return (ENOMEM);
534 	}
535 
536 	/* Just do a putnext, and let keysock deal with flow control. */
537 	putnext(pfkey_q, answer);
538 	return (0);
539 }
540 
541 /*
542  * Common function to allocate and prepare a keysock_out_t M_CTL message.
543  */
544 mblk_t *
545 sadb_keysock_out(minor_t serial)
546 {
547 	mblk_t *mp;
548 	keysock_out_t *kso;
549 
550 	mp = allocb(sizeof (ipsec_info_t), BPRI_HI);
551 	if (mp != NULL) {
552 		mp->b_datap->db_type = M_CTL;
553 		mp->b_wptr += sizeof (ipsec_info_t);
554 		kso = (keysock_out_t *)mp->b_rptr;
555 		kso->ks_out_type = KEYSOCK_OUT;
556 		kso->ks_out_len = sizeof (*kso);
557 		kso->ks_out_serial = serial;
558 	}
559 
560 	return (mp);
561 }
562 
563 /*
564  * Perform an SADB_DUMP, spewing out every SA in an array of SA fanouts
565  * to keysock.
566  */
567 static int
568 sadb_dump_fanout(queue_t *pfkey_q, mblk_t *mp, minor_t serial, isaf_t *fanout,
569     int num_entries, boolean_t do_peers, time_t active_time)
570 {
571 	int i, error = 0;
572 	mblk_t *original_answer;
573 	ipsa_t *walker;
574 	sadb_msg_t *samsg;
575 	time_t	current;
576 
577 	/*
578 	 * For each IPSA hash bucket do:
579 	 *	- Hold the mutex
580 	 *	- Walk each entry, doing an sadb_dump_deliver() on it.
581 	 */
582 	ASSERT(mp->b_cont != NULL);
583 	samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
584 
585 	original_answer = sadb_keysock_out(serial);
586 	if (original_answer == NULL)
587 		return (ENOMEM);
588 
589 	current = gethrestime_sec();
590 	for (i = 0; i < num_entries; i++) {
591 		mutex_enter(&fanout[i].isaf_lock);
592 		for (walker = fanout[i].isaf_ipsa; walker != NULL;
593 		    walker = walker->ipsa_next) {
594 			if (!do_peers && walker->ipsa_haspeer)
595 				continue;
596 			if ((active_time != 0) &&
597 			    ((current - walker->ipsa_lastuse) > active_time))
598 				continue;
599 			error = sadb_dump_deliver(pfkey_q, original_answer,
600 			    walker, samsg);
601 			if (error == ENOBUFS) {
602 				mblk_t *new_original_answer;
603 
604 				/* Ran out of dupb's.  Try a copyb. */
605 				new_original_answer = copyb(original_answer);
606 				if (new_original_answer == NULL) {
607 					error = ENOMEM;
608 				} else {
609 					freeb(original_answer);
610 					original_answer = new_original_answer;
611 					error = sadb_dump_deliver(pfkey_q,
612 					    original_answer, walker, samsg);
613 				}
614 			}
615 			if (error != 0)
616 				break;	/* out of for loop. */
617 		}
618 		mutex_exit(&fanout[i].isaf_lock);
619 		if (error != 0)
620 			break;	/* out of for loop. */
621 	}
622 
623 	freeb(original_answer);
624 	return (error);
625 }
626 
627 /*
628  * Dump an entire SADB; outbound first, then inbound.
629  */
630 
631 int
632 sadb_dump(queue_t *pfkey_q, mblk_t *mp, keysock_in_t *ksi, sadb_t *sp)
633 {
634 	int error;
635 	time_t	active_time = 0;
636 	sadb_x_edump_t	*edump =
637 	    (sadb_x_edump_t *)ksi->ks_in_extv[SADB_X_EXT_EDUMP];
638 
639 	if (edump != NULL) {
640 		active_time = edump->sadb_x_edump_timeout;
641 	}
642 
643 	/* Dump outbound */
644 	error = sadb_dump_fanout(pfkey_q, mp, ksi->ks_in_serial, sp->sdb_of,
645 	    sp->sdb_hashsize, B_TRUE, active_time);
646 	if (error)
647 		return (error);
648 
649 	/* Dump inbound */
650 	return sadb_dump_fanout(pfkey_q, mp, ksi->ks_in_serial, sp->sdb_if,
651 	    sp->sdb_hashsize, B_FALSE, active_time);
652 }
653 
654 /*
655  * Generic sadb table walker.
656  *
657  * Call "walkfn" for each SA in each bucket in "table"; pass the
658  * bucket, the entry and "cookie" to the callback function.
659  * Take care to ensure that walkfn can delete the SA without screwing
660  * up our traverse.
661  *
662  * The bucket is locked for the duration of the callback, both so that the
663  * callback can just call sadb_unlinkassoc() when it wants to delete something,
664  * and so that no new entries are added while we're walking the list.
665  */
666 static void
667 sadb_walker(isaf_t *table, uint_t numentries,
668     void (*walkfn)(isaf_t *head, ipsa_t *entry, void *cookie),
669     void *cookie)
670 {
671 	int i;
672 	for (i = 0; i < numentries; i++) {
673 		ipsa_t *entry, *next;
674 
675 		mutex_enter(&table[i].isaf_lock);
676 
677 		for (entry = table[i].isaf_ipsa; entry != NULL;
678 		    entry = next) {
679 			next = entry->ipsa_next;
680 			(*walkfn)(&table[i], entry, cookie);
681 		}
682 		mutex_exit(&table[i].isaf_lock);
683 	}
684 }
685 
686 /*
687  * From the given SA, construct a dl_ct_ipsec_key and
688  * a dl_ct_ipsec structures to be sent to the adapter as part
689  * of a DL_CONTROL_REQ.
690  *
691  * ct_sa must point to the storage allocated for the key
692  * structure and must be followed by storage allocated
693  * for the SA information that must be sent to the driver
694  * as part of the DL_CONTROL_REQ request.
695  *
696  * The is_inbound boolean indicates whether the specified
697  * SA is part of an inbound SA table.
698  *
699  * Returns B_TRUE if the corresponding SA must be passed to
700  * a provider, B_FALSE otherwise; frees *mp if it returns B_FALSE.
701  */
702 static boolean_t
703 sadb_req_from_sa(ipsa_t *sa, mblk_t *mp, boolean_t is_inbound)
704 {
705 	dl_ct_ipsec_key_t *keyp;
706 	dl_ct_ipsec_t *sap;
707 	void *ct_sa = mp->b_wptr;
708 
709 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
710 
711 	keyp = (dl_ct_ipsec_key_t *)(ct_sa);
712 	sap = (dl_ct_ipsec_t *)(keyp + 1);
713 
714 	IPSECHW_DEBUG(IPSECHW_CAPAB, ("sadb_req_from_sa: "
715 	    "is_inbound = %d\n", is_inbound));
716 
717 	/* initialize flag */
718 	sap->sadb_sa_flags = 0;
719 	if (is_inbound) {
720 		sap->sadb_sa_flags |= DL_CT_IPSEC_INBOUND;
721 		/*
722 		 * If an inbound SA has a peer, then mark it has being
723 		 * an outbound SA as well.
724 		 */
725 		if (sa->ipsa_haspeer)
726 			sap->sadb_sa_flags |= DL_CT_IPSEC_OUTBOUND;
727 	} else {
728 		/*
729 		 * If an outbound SA has a peer, then don't send it,
730 		 * since we will send the copy from the inbound table.
731 		 */
732 		if (sa->ipsa_haspeer) {
733 			freemsg(mp);
734 			return (B_FALSE);
735 		}
736 		sap->sadb_sa_flags |= DL_CT_IPSEC_OUTBOUND;
737 	}
738 
739 	keyp->dl_key_spi = sa->ipsa_spi;
740 	bcopy(sa->ipsa_dstaddr, keyp->dl_key_dest_addr,
741 	    DL_CTL_IPSEC_ADDR_LEN);
742 	keyp->dl_key_addr_family = sa->ipsa_addrfam;
743 
744 	sap->sadb_sa_auth = sa->ipsa_auth_alg;
745 	sap->sadb_sa_encrypt = sa->ipsa_encr_alg;
746 
747 	sap->sadb_key_len_a = sa->ipsa_authkeylen;
748 	sap->sadb_key_bits_a = sa->ipsa_authkeybits;
749 	bcopy(sa->ipsa_authkey,
750 	    sap->sadb_key_data_a, sap->sadb_key_len_a);
751 
752 	sap->sadb_key_len_e = sa->ipsa_encrkeylen;
753 	sap->sadb_key_bits_e = sa->ipsa_encrkeybits;
754 	bcopy(sa->ipsa_encrkey,
755 	    sap->sadb_key_data_e, sap->sadb_key_len_e);
756 
757 	mp->b_wptr += sizeof (dl_ct_ipsec_t) + sizeof (dl_ct_ipsec_key_t);
758 	return (B_TRUE);
759 }
760 
761 /*
762  * Called from AH or ESP to format a message which will be used to inform
763  * IPsec-acceleration-capable ills of a SADB change.
764  * (It is not possible to send the message to IP directly from this function
765  * since the SA, if any, is locked during the call).
766  *
767  * dl_operation: DL_CONTROL_REQ operation (add, delete, update, etc)
768  * sa_type: identifies whether the operation applies to AH or ESP
769  *	(must be one of SADB_SATYPE_AH or SADB_SATYPE_ESP)
770  * sa: Pointer to an SA.  Must be non-NULL and locked
771  *	for ADD, DELETE, GET, and UPDATE operations.
772  * This function returns an mblk chain that must be passed to IP
773  * for forwarding to the IPsec capable providers.
774  */
775 mblk_t *
776 sadb_fmt_sa_req(uint_t dl_operation, uint_t sa_type, ipsa_t *sa,
777     boolean_t is_inbound)
778 {
779 	mblk_t *mp;
780 	dl_control_req_t *ctrl;
781 	boolean_t need_key = B_FALSE;
782 	mblk_t *ctl_mp = NULL;
783 	ipsec_ctl_t *ctl;
784 
785 	/*
786 	 * 1 allocate and initialize DL_CONTROL_REQ M_PROTO
787 	 * 2 if a key is needed for the operation
788 	 *    2.1 initialize key
789 	 *    2.2 if a full SA is needed for the operation
790 	 *	2.2.1 initialize full SA info
791 	 * 3 return message; caller will call ill_ipsec_capab_send_all()
792 	 * to send the resulting message to IPsec capable ills.
793 	 */
794 
795 	ASSERT(sa_type == SADB_SATYPE_AH || sa_type == SADB_SATYPE_ESP);
796 
797 	/*
798 	 * Allocate DL_CONTROL_REQ M_PROTO
799 	 * We allocate room for the SA even if it's not needed
800 	 * by some of the operations (for example flush)
801 	 */
802 	mp = allocb(sizeof (dl_control_req_t) +
803 	    sizeof (dl_ct_ipsec_key_t) + sizeof (dl_ct_ipsec_t), BPRI_HI);
804 	if (mp == NULL)
805 		return (NULL);
806 	mp->b_datap->db_type = M_PROTO;
807 
808 	/* initialize dl_control_req_t */
809 	ctrl = (dl_control_req_t *)mp->b_wptr;
810 	ctrl->dl_primitive = DL_CONTROL_REQ;
811 	ctrl->dl_operation = dl_operation;
812 	ctrl->dl_type = sa_type == SADB_SATYPE_AH ? DL_CT_IPSEC_AH :
813 	    DL_CT_IPSEC_ESP;
814 	ctrl->dl_key_offset = sizeof (dl_control_req_t);
815 	ctrl->dl_key_length = sizeof (dl_ct_ipsec_key_t);
816 	ctrl->dl_data_offset = sizeof (dl_control_req_t) +
817 	    sizeof (dl_ct_ipsec_key_t);
818 	ctrl->dl_data_length = sizeof (dl_ct_ipsec_t);
819 	mp->b_wptr += sizeof (dl_control_req_t);
820 
821 	if ((dl_operation == DL_CO_SET) || (dl_operation == DL_CO_DELETE)) {
822 		ASSERT(sa != NULL);
823 		ASSERT(MUTEX_HELD(&sa->ipsa_lock));
824 
825 		need_key = B_TRUE;
826 
827 		/*
828 		 * Initialize key and SA data. Note that for some
829 		 * operations the SA data is ignored by the provider
830 		 * (delete, etc.)
831 		 */
832 		if (!sadb_req_from_sa(sa, mp, is_inbound))
833 			return (NULL);
834 	}
835 
836 	/* construct control message */
837 	ctl_mp = allocb(sizeof (ipsec_ctl_t), BPRI_HI);
838 	if (ctl_mp == NULL) {
839 		cmn_err(CE_WARN, "sadb_fmt_sa_req: allocb failed\n");
840 		freemsg(mp);
841 		return (NULL);
842 	}
843 
844 	ctl_mp->b_datap->db_type = M_CTL;
845 	ctl_mp->b_wptr += sizeof (ipsec_ctl_t);
846 	ctl_mp->b_cont = mp;
847 
848 	ctl = (ipsec_ctl_t *)ctl_mp->b_rptr;
849 	ctl->ipsec_ctl_type = IPSEC_CTL;
850 	ctl->ipsec_ctl_len  = sizeof (ipsec_ctl_t);
851 	ctl->ipsec_ctl_sa_type = sa_type;
852 
853 	if (need_key) {
854 		/*
855 		 * Keep an additional reference on SA, since it will be
856 		 * needed by IP to send control messages corresponding
857 		 * to that SA from its perimeter. IP will do a
858 		 * IPSA_REFRELE when done with the request.
859 		 */
860 		ASSERT(MUTEX_HELD(&sa->ipsa_lock));
861 		IPSA_REFHOLD(sa);
862 		ctl->ipsec_ctl_sa = sa;
863 	} else
864 		ctl->ipsec_ctl_sa = NULL;
865 
866 	return (ctl_mp);
867 }
868 
869 
870 /*
871  * Called by sadb_ill_download() to dump the entries for a specific
872  * fanout table.  For each SA entry in the table passed as argument,
873  * use mp as a template and constructs a full DL_CONTROL message, and
874  * call ill_dlpi_send(), provided by IP, to send the resulting
875  * messages to the ill.
876  */
877 static void
878 sadb_ill_df(ill_t *ill, mblk_t *mp, isaf_t *fanout, int num_entries,
879     boolean_t is_inbound)
880 {
881 	ipsa_t *walker;
882 	mblk_t *nmp, *salist;
883 	int i, error = 0;
884 	ip_stack_t	*ipst = ill->ill_ipst;
885 	netstack_t	*ns = ipst->ips_netstack;
886 
887 	IPSECHW_DEBUG(IPSECHW_SADB, ("sadb_ill_df: fanout at 0x%p ne=%d\n",
888 	    (void *)fanout, num_entries));
889 	/*
890 	 * For each IPSA hash bucket do:
891 	 *	- Hold the mutex
892 	 *	- Walk each entry, sending a corresponding request to IP
893 	 *	  for it.
894 	 */
895 	ASSERT(mp->b_datap->db_type == M_PROTO);
896 
897 	for (i = 0; i < num_entries; i++) {
898 		mutex_enter(&fanout[i].isaf_lock);
899 		salist = NULL;
900 
901 		for (walker = fanout[i].isaf_ipsa; walker != NULL;
902 		    walker = walker->ipsa_next) {
903 			IPSECHW_DEBUG(IPSECHW_SADB,
904 			    ("sadb_ill_df: sending SA to ill via IP \n"));
905 			/*
906 			 * Duplicate the template mp passed and
907 			 * complete DL_CONTROL_REQ data.
908 			 * To be more memory efficient, we could use
909 			 * dupb() for the M_CTL and copyb() for the M_PROTO
910 			 * as the M_CTL, since the M_CTL is the same for
911 			 * every SA entry passed down to IP for the same ill.
912 			 *
913 			 * Note that copymsg/copyb ensure that the new mblk
914 			 * is at least as large as the source mblk even if it's
915 			 * not using all its storage -- therefore, nmp
916 			 * has trailing space for sadb_req_from_sa to add
917 			 * the SA-specific bits.
918 			 */
919 			mutex_enter(&walker->ipsa_lock);
920 			if (ipsec_capab_match(ill,
921 			    ill->ill_phyint->phyint_ifindex, ill->ill_isv6,
922 			    walker, ns)) {
923 				nmp = copymsg(mp);
924 				if (nmp == NULL) {
925 					IPSECHW_DEBUG(IPSECHW_SADB,
926 					    ("sadb_ill_df: alloc error\n"));
927 					error = ENOMEM;
928 					mutex_exit(&walker->ipsa_lock);
929 					break;
930 				}
931 				if (sadb_req_from_sa(walker, nmp, is_inbound)) {
932 					nmp->b_next = salist;
933 					salist = nmp;
934 				}
935 			}
936 			mutex_exit(&walker->ipsa_lock);
937 		}
938 		mutex_exit(&fanout[i].isaf_lock);
939 		while (salist != NULL) {
940 			nmp = salist;
941 			salist = nmp->b_next;
942 			nmp->b_next = NULL;
943 			ill_dlpi_send(ill, nmp);
944 		}
945 		if (error != 0)
946 			break;	/* out of for loop. */
947 	}
948 }
949 
950 /*
951  * Called by ill_ipsec_capab_add(). Sends a copy of the SADB of
952  * the type specified by sa_type to the specified ill.
953  *
954  * We call for each fanout table defined by the SADB (one per
955  * protocol). sadb_ill_df() finally calls ill_dlpi_send() for
956  * each SADB entry in order to send a corresponding DL_CONTROL_REQ
957  * message to the ill.
958  */
959 void
960 sadb_ill_download(ill_t *ill, uint_t sa_type)
961 {
962 	mblk_t *protomp;	/* prototype message */
963 	dl_control_req_t *ctrl;
964 	sadbp_t *spp;
965 	sadb_t *sp;
966 	int dlt;
967 	ip_stack_t	*ipst = ill->ill_ipst;
968 	netstack_t	*ns = ipst->ips_netstack;
969 
970 	ASSERT(sa_type == SADB_SATYPE_AH || sa_type == SADB_SATYPE_ESP);
971 
972 	/*
973 	 * Allocate and initialize prototype answer. A duplicate for
974 	 * each SA is sent down to the interface.
975 	 */
976 
977 	/* DL_CONTROL_REQ M_PROTO mblk_t */
978 	protomp = allocb(sizeof (dl_control_req_t) +
979 	    sizeof (dl_ct_ipsec_key_t) + sizeof (dl_ct_ipsec_t), BPRI_HI);
980 	if (protomp == NULL)
981 		return;
982 	protomp->b_datap->db_type = M_PROTO;
983 
984 	dlt = (sa_type == SADB_SATYPE_AH) ? DL_CT_IPSEC_AH : DL_CT_IPSEC_ESP;
985 	if (sa_type == SADB_SATYPE_ESP) {
986 		ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
987 
988 		spp = &espstack->esp_sadb;
989 	} else {
990 		ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
991 
992 		spp = &ahstack->ah_sadb;
993 	}
994 
995 	ctrl = (dl_control_req_t *)protomp->b_wptr;
996 	ctrl->dl_primitive = DL_CONTROL_REQ;
997 	ctrl->dl_operation = DL_CO_SET;
998 	ctrl->dl_type = dlt;
999 	ctrl->dl_key_offset = sizeof (dl_control_req_t);
1000 	ctrl->dl_key_length = sizeof (dl_ct_ipsec_key_t);
1001 	ctrl->dl_data_offset = sizeof (dl_control_req_t) +
1002 	    sizeof (dl_ct_ipsec_key_t);
1003 	ctrl->dl_data_length = sizeof (dl_ct_ipsec_t);
1004 	protomp->b_wptr += sizeof (dl_control_req_t);
1005 
1006 	/*
1007 	 * then for each SADB entry, we fill out the dl_ct_ipsec_key_t
1008 	 * and dl_ct_ipsec_t
1009 	 */
1010 	sp = ill->ill_isv6 ? &(spp->s_v6) : &(spp->s_v4);
1011 	sadb_ill_df(ill, protomp, sp->sdb_of, sp->sdb_hashsize, B_FALSE);
1012 	sadb_ill_df(ill, protomp, sp->sdb_if, sp->sdb_hashsize, B_TRUE);
1013 	freemsg(protomp);
1014 }
1015 
1016 /*
1017  * Call me to free up a security association fanout.  Use the forever
1018  * variable to indicate freeing up the SAs (forever == B_FALSE, e.g.
1019  * an SADB_FLUSH message), or destroying everything (forever == B_TRUE,
1020  * when a module is unloaded).
1021  */
1022 static void
1023 sadb_destroyer(isaf_t **tablep, uint_t numentries, boolean_t forever,
1024     boolean_t inbound)
1025 {
1026 	int i;
1027 	isaf_t *table = *tablep;
1028 	uint8_t protocol;
1029 
1030 	if (table == NULL)
1031 		return;
1032 
1033 	for (i = 0; i < numentries; i++) {
1034 		mutex_enter(&table[i].isaf_lock);
1035 		while (table[i].isaf_ipsa != NULL) {
1036 			if (inbound && cl_inet_deletespi &&
1037 			    (table[i].isaf_ipsa->ipsa_state !=
1038 			    IPSA_STATE_ACTIVE_ELSEWHERE) &&
1039 			    (table[i].isaf_ipsa->ipsa_state !=
1040 			    IPSA_STATE_IDLE)) {
1041 				protocol = (table[i].isaf_ipsa->ipsa_type ==
1042 				    SADB_SATYPE_AH) ? IPPROTO_AH : IPPROTO_ESP;
1043 				cl_inet_deletespi(protocol,
1044 				    table[i].isaf_ipsa->ipsa_spi);
1045 			}
1046 			sadb_unlinkassoc(table[i].isaf_ipsa);
1047 		}
1048 		table[i].isaf_gen++;
1049 		mutex_exit(&table[i].isaf_lock);
1050 		if (forever)
1051 			mutex_destroy(&(table[i].isaf_lock));
1052 	}
1053 
1054 	if (forever) {
1055 		*tablep = NULL;
1056 		kmem_free(table, numentries * sizeof (*table));
1057 	}
1058 }
1059 
1060 /*
1061  * Entry points to sadb_destroyer().
1062  */
1063 static void
1064 sadb_flush(sadb_t *sp, netstack_t *ns)
1065 {
1066 	/*
1067 	 * Flush out each bucket, one at a time.  Were it not for keysock's
1068 	 * enforcement, there would be a subtlety where I could add on the
1069 	 * heels of a flush.  With keysock's enforcement, however, this
1070 	 * makes ESP's job easy.
1071 	 */
1072 	sadb_destroyer(&sp->sdb_of, sp->sdb_hashsize, B_FALSE, B_FALSE);
1073 	sadb_destroyer(&sp->sdb_if, sp->sdb_hashsize, B_FALSE, B_TRUE);
1074 
1075 	/* For each acquire, destroy it; leave the bucket mutex alone. */
1076 	sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_FALSE, ns);
1077 }
1078 
1079 static void
1080 sadb_destroy(sadb_t *sp, netstack_t *ns)
1081 {
1082 	sadb_destroyer(&sp->sdb_of, sp->sdb_hashsize, B_TRUE, B_FALSE);
1083 	sadb_destroyer(&sp->sdb_if, sp->sdb_hashsize, B_TRUE, B_TRUE);
1084 
1085 	/* For each acquire, destroy it, including the bucket mutex. */
1086 	sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_TRUE, ns);
1087 
1088 	ASSERT(sp->sdb_of == NULL);
1089 	ASSERT(sp->sdb_if == NULL);
1090 	ASSERT(sp->sdb_acq == NULL);
1091 }
1092 
1093 static void
1094 sadb_send_flush_req(sadbp_t *spp)
1095 {
1096 	mblk_t *ctl_mp;
1097 
1098 	/*
1099 	 * we've been unplumbed, or never were plumbed; don't go there.
1100 	 */
1101 	if (spp->s_ip_q == NULL)
1102 		return;
1103 
1104 	/* have IP send a flush msg to the IPsec accelerators */
1105 	ctl_mp = sadb_fmt_sa_req(DL_CO_FLUSH, spp->s_satype, NULL, B_TRUE);
1106 	if (ctl_mp != NULL)
1107 		putnext(spp->s_ip_q, ctl_mp);
1108 }
1109 
1110 void
1111 sadbp_flush(sadbp_t *spp, netstack_t *ns)
1112 {
1113 	sadb_flush(&spp->s_v4, ns);
1114 	sadb_flush(&spp->s_v6, ns);
1115 
1116 	sadb_send_flush_req(spp);
1117 }
1118 
1119 void
1120 sadbp_destroy(sadbp_t *spp, netstack_t *ns)
1121 {
1122 	sadb_destroy(&spp->s_v4, ns);
1123 	sadb_destroy(&spp->s_v6, ns);
1124 
1125 	sadb_send_flush_req(spp);
1126 	if (spp->s_satype == SADB_SATYPE_AH) {
1127 		ipsec_stack_t	*ipss = ns->netstack_ipsec;
1128 
1129 		ip_drop_unregister(&ipss->ipsec_sadb_dropper);
1130 	}
1131 }
1132 
1133 
1134 /*
1135  * Check hard vs. soft lifetimes.  If there's a reality mismatch (e.g.
1136  * soft lifetimes > hard lifetimes) return an appropriate diagnostic for
1137  * EINVAL.
1138  */
1139 int
1140 sadb_hardsoftchk(sadb_lifetime_t *hard, sadb_lifetime_t *soft,
1141     sadb_lifetime_t *idle)
1142 {
1143 	if (hard == NULL || soft == NULL)
1144 		return (0);
1145 
1146 	if (hard->sadb_lifetime_allocations != 0 &&
1147 	    soft->sadb_lifetime_allocations != 0 &&
1148 	    hard->sadb_lifetime_allocations < soft->sadb_lifetime_allocations)
1149 		return (SADB_X_DIAGNOSTIC_ALLOC_HSERR);
1150 
1151 	if (hard->sadb_lifetime_bytes != 0 &&
1152 	    soft->sadb_lifetime_bytes != 0 &&
1153 	    hard->sadb_lifetime_bytes < soft->sadb_lifetime_bytes)
1154 		return (SADB_X_DIAGNOSTIC_BYTES_HSERR);
1155 
1156 	if (hard->sadb_lifetime_addtime != 0 &&
1157 	    soft->sadb_lifetime_addtime != 0 &&
1158 	    hard->sadb_lifetime_addtime < soft->sadb_lifetime_addtime)
1159 		return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
1160 
1161 	if (hard->sadb_lifetime_usetime != 0 &&
1162 	    soft->sadb_lifetime_usetime != 0 &&
1163 	    hard->sadb_lifetime_usetime < soft->sadb_lifetime_usetime)
1164 		return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
1165 
1166 	if (idle != NULL) {
1167 		if (hard->sadb_lifetime_addtime != 0 &&
1168 		    idle->sadb_lifetime_addtime != 0 &&
1169 		    hard->sadb_lifetime_addtime < idle->sadb_lifetime_addtime)
1170 			return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
1171 
1172 		if (soft->sadb_lifetime_addtime != 0 &&
1173 		    idle->sadb_lifetime_addtime != 0 &&
1174 		    soft->sadb_lifetime_addtime < idle->sadb_lifetime_addtime)
1175 			return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
1176 
1177 		if (hard->sadb_lifetime_usetime != 0 &&
1178 		    idle->sadb_lifetime_usetime != 0 &&
1179 		    hard->sadb_lifetime_usetime < idle->sadb_lifetime_usetime)
1180 			return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
1181 
1182 		if (soft->sadb_lifetime_usetime != 0 &&
1183 		    idle->sadb_lifetime_usetime != 0 &&
1184 		    soft->sadb_lifetime_usetime < idle->sadb_lifetime_usetime)
1185 			return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
1186 	}
1187 
1188 	return (0);
1189 }
1190 
1191 /*
1192  * Clone a security association for the purposes of inserting a single SA
1193  * into inbound and outbound tables respectively. This function should only
1194  * be called from sadb_common_add().
1195  */
1196 static ipsa_t *
1197 sadb_cloneassoc(ipsa_t *ipsa)
1198 {
1199 	ipsa_t *newbie;
1200 	boolean_t error = B_FALSE;
1201 
1202 	ASSERT(MUTEX_NOT_HELD(&(ipsa->ipsa_lock)));
1203 
1204 	newbie = kmem_alloc(sizeof (ipsa_t), KM_NOSLEEP);
1205 	if (newbie == NULL)
1206 		return (NULL);
1207 
1208 	/* Copy over what we can. */
1209 	*newbie = *ipsa;
1210 
1211 	/* bzero and initialize locks, in case *_init() allocates... */
1212 	mutex_init(&newbie->ipsa_lock, NULL, MUTEX_DEFAULT, NULL);
1213 
1214 	/*
1215 	 * While somewhat dain-bramaged, the most graceful way to
1216 	 * recover from errors is to keep plowing through the
1217 	 * allocations, and getting what I can.  It's easier to call
1218 	 * sadb_freeassoc() on the stillborn clone when all the
1219 	 * pointers aren't pointing to the parent's data.
1220 	 */
1221 
1222 	if (ipsa->ipsa_authkey != NULL) {
1223 		newbie->ipsa_authkey = kmem_alloc(newbie->ipsa_authkeylen,
1224 		    KM_NOSLEEP);
1225 		if (newbie->ipsa_authkey == NULL) {
1226 			error = B_TRUE;
1227 		} else {
1228 			bcopy(ipsa->ipsa_authkey, newbie->ipsa_authkey,
1229 			    newbie->ipsa_authkeylen);
1230 
1231 			newbie->ipsa_kcfauthkey.ck_data =
1232 			    newbie->ipsa_authkey;
1233 		}
1234 
1235 		if (newbie->ipsa_amech.cm_param != NULL) {
1236 			newbie->ipsa_amech.cm_param =
1237 			    (char *)&newbie->ipsa_mac_len;
1238 		}
1239 	}
1240 
1241 	if (ipsa->ipsa_encrkey != NULL) {
1242 		newbie->ipsa_encrkey = kmem_alloc(newbie->ipsa_encrkeylen,
1243 		    KM_NOSLEEP);
1244 		if (newbie->ipsa_encrkey == NULL) {
1245 			error = B_TRUE;
1246 		} else {
1247 			bcopy(ipsa->ipsa_encrkey, newbie->ipsa_encrkey,
1248 			    newbie->ipsa_encrkeylen);
1249 
1250 			newbie->ipsa_kcfencrkey.ck_data =
1251 			    newbie->ipsa_encrkey;
1252 		}
1253 	}
1254 
1255 	newbie->ipsa_authtmpl = NULL;
1256 	newbie->ipsa_encrtmpl = NULL;
1257 	newbie->ipsa_haspeer = B_TRUE;
1258 
1259 	if (ipsa->ipsa_integ != NULL) {
1260 		newbie->ipsa_integ = kmem_alloc(newbie->ipsa_integlen,
1261 		    KM_NOSLEEP);
1262 		if (newbie->ipsa_integ == NULL) {
1263 			error = B_TRUE;
1264 		} else {
1265 			bcopy(ipsa->ipsa_integ, newbie->ipsa_integ,
1266 			    newbie->ipsa_integlen);
1267 		}
1268 	}
1269 
1270 	if (ipsa->ipsa_sens != NULL) {
1271 		newbie->ipsa_sens = kmem_alloc(newbie->ipsa_senslen,
1272 		    KM_NOSLEEP);
1273 		if (newbie->ipsa_sens == NULL) {
1274 			error = B_TRUE;
1275 		} else {
1276 			bcopy(ipsa->ipsa_sens, newbie->ipsa_sens,
1277 			    newbie->ipsa_senslen);
1278 		}
1279 	}
1280 
1281 	if (ipsa->ipsa_src_cid != NULL) {
1282 		newbie->ipsa_src_cid = ipsa->ipsa_src_cid;
1283 		IPSID_REFHOLD(ipsa->ipsa_src_cid);
1284 	}
1285 
1286 	if (ipsa->ipsa_dst_cid != NULL) {
1287 		newbie->ipsa_dst_cid = ipsa->ipsa_dst_cid;
1288 		IPSID_REFHOLD(ipsa->ipsa_dst_cid);
1289 	}
1290 
1291 	if (error) {
1292 		sadb_freeassoc(newbie);
1293 		return (NULL);
1294 	}
1295 
1296 	return (newbie);
1297 }
1298 
1299 /*
1300  * Initialize a SADB address extension at the address specified by addrext.
1301  * Return a pointer to the end of the new address extension.
1302  */
1303 static uint8_t *
1304 sadb_make_addr_ext(uint8_t *start, uint8_t *end, uint16_t exttype,
1305     sa_family_t af, uint32_t *addr, uint16_t port, uint8_t proto, int prefix)
1306 {
1307 	struct sockaddr_in *sin;
1308 	struct sockaddr_in6 *sin6;
1309 	uint8_t *cur = start;
1310 	int addrext_len;
1311 	int sin_len;
1312 	sadb_address_t *addrext	= (sadb_address_t *)cur;
1313 
1314 	if (cur == NULL)
1315 		return (NULL);
1316 
1317 	cur += sizeof (*addrext);
1318 	if (cur > end)
1319 		return (NULL);
1320 
1321 	addrext->sadb_address_proto = proto;
1322 	addrext->sadb_address_prefixlen = prefix;
1323 	addrext->sadb_address_reserved = 0;
1324 	addrext->sadb_address_exttype = exttype;
1325 
1326 	switch (af) {
1327 	case AF_INET:
1328 		sin = (struct sockaddr_in *)cur;
1329 		sin_len = sizeof (*sin);
1330 		cur += sin_len;
1331 		if (cur > end)
1332 			return (NULL);
1333 
1334 		sin->sin_family = af;
1335 		bzero(sin->sin_zero, sizeof (sin->sin_zero));
1336 		sin->sin_port = port;
1337 		IPSA_COPY_ADDR(&sin->sin_addr, addr, af);
1338 		break;
1339 	case AF_INET6:
1340 		sin6 = (struct sockaddr_in6 *)cur;
1341 		sin_len = sizeof (*sin6);
1342 		cur += sin_len;
1343 		if (cur > end)
1344 			return (NULL);
1345 
1346 		bzero(sin6, sizeof (*sin6));
1347 		sin6->sin6_family = af;
1348 		sin6->sin6_port = port;
1349 		IPSA_COPY_ADDR(&sin6->sin6_addr, addr, af);
1350 		break;
1351 	}
1352 
1353 	addrext_len = roundup(cur - start, sizeof (uint64_t));
1354 	addrext->sadb_address_len = SADB_8TO64(addrext_len);
1355 
1356 	cur = start + addrext_len;
1357 	if (cur > end)
1358 		cur = NULL;
1359 
1360 	return (cur);
1361 }
1362 
1363 /*
1364  * Construct a key management cookie extension.
1365  */
1366 
1367 static uint8_t *
1368 sadb_make_kmc_ext(uint8_t *cur, uint8_t *end, uint32_t kmp, uint32_t kmc)
1369 {
1370 	sadb_x_kmc_t *kmcext = (sadb_x_kmc_t *)cur;
1371 
1372 	if (cur == NULL)
1373 		return (NULL);
1374 
1375 	cur += sizeof (*kmcext);
1376 
1377 	if (cur > end)
1378 		return (NULL);
1379 
1380 	kmcext->sadb_x_kmc_len = SADB_8TO64(sizeof (*kmcext));
1381 	kmcext->sadb_x_kmc_exttype = SADB_X_EXT_KM_COOKIE;
1382 	kmcext->sadb_x_kmc_proto = kmp;
1383 	kmcext->sadb_x_kmc_cookie = kmc;
1384 	kmcext->sadb_x_kmc_reserved = 0;
1385 
1386 	return (cur);
1387 }
1388 
1389 /*
1390  * Given an original message header with sufficient space following it, and an
1391  * SA, construct a full PF_KEY message with all of the relevant extensions.
1392  * This is mostly used for SADB_GET, and SADB_DUMP.
1393  */
1394 static mblk_t *
1395 sadb_sa2msg(ipsa_t *ipsa, sadb_msg_t *samsg)
1396 {
1397 	int alloclen, addrsize, paddrsize, authsize, encrsize;
1398 	int srcidsize, dstidsize;
1399 	sa_family_t fam, pfam;	/* Address family for SADB_EXT_ADDRESS */
1400 				/* src/dst and proxy sockaddrs. */
1401 	/*
1402 	 * The following are pointers into the PF_KEY message this PF_KEY
1403 	 * message creates.
1404 	 */
1405 	sadb_msg_t *newsamsg;
1406 	sadb_sa_t *assoc;
1407 	sadb_lifetime_t *lt;
1408 	sadb_key_t *key;
1409 	sadb_ident_t *ident;
1410 	sadb_sens_t *sens;
1411 	sadb_ext_t *walker;	/* For when we need a generic ext. pointer. */
1412 	sadb_x_replay_ctr_t *repl_ctr;
1413 	sadb_x_pair_t *pair_ext;
1414 
1415 	mblk_t *mp;
1416 	uint64_t *bitmap;
1417 	uint8_t *cur, *end;
1418 	/* These indicate the presence of the above extension fields. */
1419 	boolean_t soft, hard, isrc, idst, auth, encr, sensinteg, srcid, dstid;
1420 	boolean_t idle;
1421 	boolean_t paired;
1422 	uint32_t otherspi;
1423 
1424 	/* First off, figure out the allocation length for this message. */
1425 
1426 	/*
1427 	 * Constant stuff.  This includes base, SA, address (src, dst),
1428 	 * and lifetime (current).
1429 	 */
1430 	alloclen = sizeof (sadb_msg_t) + sizeof (sadb_sa_t) +
1431 	    sizeof (sadb_lifetime_t);
1432 
1433 	fam = ipsa->ipsa_addrfam;
1434 	switch (fam) {
1435 	case AF_INET:
1436 		addrsize = roundup(sizeof (struct sockaddr_in) +
1437 		    sizeof (sadb_address_t), sizeof (uint64_t));
1438 		break;
1439 	case AF_INET6:
1440 		addrsize = roundup(sizeof (struct sockaddr_in6) +
1441 		    sizeof (sadb_address_t), sizeof (uint64_t));
1442 		break;
1443 	default:
1444 		return (NULL);
1445 	}
1446 	/*
1447 	 * Allocate TWO address extensions, for source and destination.
1448 	 * (Thus, the * 2.)
1449 	 */
1450 	alloclen += addrsize * 2;
1451 	if (ipsa->ipsa_flags & IPSA_F_NATT_REM)
1452 		alloclen += addrsize;
1453 	if (ipsa->ipsa_flags & IPSA_F_NATT_LOC)
1454 		alloclen += addrsize;
1455 
1456 	if (ipsa->ipsa_flags & IPSA_F_PAIRED) {
1457 		paired = B_TRUE;
1458 		alloclen += sizeof (sadb_x_pair_t);
1459 		otherspi = ipsa->ipsa_otherspi;
1460 	} else {
1461 		paired = B_FALSE;
1462 	}
1463 
1464 	/* How 'bout other lifetimes? */
1465 	if (ipsa->ipsa_softaddlt != 0 || ipsa->ipsa_softuselt != 0 ||
1466 	    ipsa->ipsa_softbyteslt != 0 || ipsa->ipsa_softalloc != 0) {
1467 		alloclen += sizeof (sadb_lifetime_t);
1468 		soft = B_TRUE;
1469 	} else {
1470 		soft = B_FALSE;
1471 	}
1472 
1473 	if (ipsa->ipsa_hardaddlt != 0 || ipsa->ipsa_harduselt != 0 ||
1474 	    ipsa->ipsa_hardbyteslt != 0 || ipsa->ipsa_hardalloc != 0) {
1475 		alloclen += sizeof (sadb_lifetime_t);
1476 		hard = B_TRUE;
1477 	} else {
1478 		hard = B_FALSE;
1479 	}
1480 
1481 	if (ipsa->ipsa_idleaddlt != 0 || ipsa->ipsa_idleuselt != 0) {
1482 		alloclen += sizeof (sadb_lifetime_t);
1483 		idle = B_TRUE;
1484 	} else {
1485 		idle = B_FALSE;
1486 	}
1487 
1488 	/* Inner addresses. */
1489 	if (ipsa->ipsa_innerfam == 0) {
1490 		isrc = B_FALSE;
1491 		idst = B_FALSE;
1492 	} else {
1493 		pfam = ipsa->ipsa_innerfam;
1494 		switch (pfam) {
1495 		case AF_INET6:
1496 			paddrsize = roundup(sizeof (struct sockaddr_in6) +
1497 			    sizeof (sadb_address_t), sizeof (uint64_t));
1498 			break;
1499 		case AF_INET:
1500 			paddrsize = roundup(sizeof (struct sockaddr_in) +
1501 			    sizeof (sadb_address_t), sizeof (uint64_t));
1502 			break;
1503 		default:
1504 			cmn_err(CE_PANIC,
1505 			    "IPsec SADB: Proxy length failure.\n");
1506 			break;
1507 		}
1508 		isrc = B_TRUE;
1509 		idst = B_TRUE;
1510 		alloclen += 2 * paddrsize;
1511 	}
1512 
1513 	/* For the following fields, assume that length != 0 ==> stuff */
1514 	if (ipsa->ipsa_authkeylen != 0) {
1515 		authsize = roundup(sizeof (sadb_key_t) + ipsa->ipsa_authkeylen,
1516 		    sizeof (uint64_t));
1517 		alloclen += authsize;
1518 		auth = B_TRUE;
1519 	} else {
1520 		auth = B_FALSE;
1521 	}
1522 
1523 	if (ipsa->ipsa_encrkeylen != 0) {
1524 		encrsize = roundup(sizeof (sadb_key_t) + ipsa->ipsa_encrkeylen,
1525 		    sizeof (uint64_t));
1526 		alloclen += encrsize;
1527 		encr = B_TRUE;
1528 	} else {
1529 		encr = B_FALSE;
1530 	}
1531 
1532 	/* No need for roundup on sens and integ. */
1533 	if (ipsa->ipsa_integlen != 0 || ipsa->ipsa_senslen != 0) {
1534 		alloclen += sizeof (sadb_key_t) + ipsa->ipsa_integlen +
1535 		    ipsa->ipsa_senslen;
1536 		sensinteg = B_TRUE;
1537 	} else {
1538 		sensinteg = B_FALSE;
1539 	}
1540 
1541 	/*
1542 	 * Must use strlen() here for lengths.	Identities use NULL
1543 	 * pointers to indicate their nonexistence.
1544 	 */
1545 	if (ipsa->ipsa_src_cid != NULL) {
1546 		srcidsize = roundup(sizeof (sadb_ident_t) +
1547 		    strlen(ipsa->ipsa_src_cid->ipsid_cid) + 1,
1548 		    sizeof (uint64_t));
1549 		alloclen += srcidsize;
1550 		srcid = B_TRUE;
1551 	} else {
1552 		srcid = B_FALSE;
1553 	}
1554 
1555 	if (ipsa->ipsa_dst_cid != NULL) {
1556 		dstidsize = roundup(sizeof (sadb_ident_t) +
1557 		    strlen(ipsa->ipsa_dst_cid->ipsid_cid) + 1,
1558 		    sizeof (uint64_t));
1559 		alloclen += dstidsize;
1560 		dstid = B_TRUE;
1561 	} else {
1562 		dstid = B_FALSE;
1563 	}
1564 
1565 	if ((ipsa->ipsa_kmp != 0) || (ipsa->ipsa_kmc != 0))
1566 		alloclen += sizeof (sadb_x_kmc_t);
1567 
1568 	if (ipsa->ipsa_replay != 0) {
1569 		alloclen += sizeof (sadb_x_replay_ctr_t);
1570 	}
1571 
1572 	/* Make sure the allocation length is a multiple of 8 bytes. */
1573 	ASSERT((alloclen & 0x7) == 0);
1574 
1575 	/* XXX Possibly make it esballoc, with a bzero-ing free_ftn. */
1576 	mp = allocb(alloclen, BPRI_HI);
1577 	if (mp == NULL)
1578 		return (NULL);
1579 
1580 	mp->b_wptr += alloclen;
1581 	end = mp->b_wptr;
1582 	newsamsg = (sadb_msg_t *)mp->b_rptr;
1583 	*newsamsg = *samsg;
1584 	newsamsg->sadb_msg_len = (uint16_t)SADB_8TO64(alloclen);
1585 
1586 	mutex_enter(&ipsa->ipsa_lock);	/* Since I'm grabbing SA fields... */
1587 
1588 	newsamsg->sadb_msg_satype = ipsa->ipsa_type;
1589 
1590 	assoc = (sadb_sa_t *)(newsamsg + 1);
1591 	assoc->sadb_sa_len = SADB_8TO64(sizeof (*assoc));
1592 	assoc->sadb_sa_exttype = SADB_EXT_SA;
1593 	assoc->sadb_sa_spi = ipsa->ipsa_spi;
1594 	assoc->sadb_sa_replay = ipsa->ipsa_replay_wsize;
1595 	assoc->sadb_sa_state = ipsa->ipsa_state;
1596 	assoc->sadb_sa_auth = ipsa->ipsa_auth_alg;
1597 	assoc->sadb_sa_encrypt = ipsa->ipsa_encr_alg;
1598 	assoc->sadb_sa_flags = ipsa->ipsa_flags;
1599 
1600 	lt = (sadb_lifetime_t *)(assoc + 1);
1601 	lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1602 	lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
1603 	/* We do not support the concept. */
1604 	lt->sadb_lifetime_allocations = 0;
1605 	lt->sadb_lifetime_bytes = ipsa->ipsa_bytes;
1606 	lt->sadb_lifetime_addtime = ipsa->ipsa_addtime;
1607 	lt->sadb_lifetime_usetime = ipsa->ipsa_usetime;
1608 
1609 	if (hard) {
1610 		lt++;
1611 		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1612 		lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
1613 		lt->sadb_lifetime_allocations = ipsa->ipsa_hardalloc;
1614 		lt->sadb_lifetime_bytes = ipsa->ipsa_hardbyteslt;
1615 		lt->sadb_lifetime_addtime = ipsa->ipsa_hardaddlt;
1616 		lt->sadb_lifetime_usetime = ipsa->ipsa_harduselt;
1617 	}
1618 
1619 	if (soft) {
1620 		lt++;
1621 		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1622 		lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
1623 		lt->sadb_lifetime_allocations = ipsa->ipsa_softalloc;
1624 		lt->sadb_lifetime_bytes = ipsa->ipsa_softbyteslt;
1625 		lt->sadb_lifetime_addtime = ipsa->ipsa_softaddlt;
1626 		lt->sadb_lifetime_usetime = ipsa->ipsa_softuselt;
1627 	}
1628 
1629 	if (idle) {
1630 		lt++;
1631 		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1632 		lt->sadb_lifetime_exttype = SADB_X_EXT_LIFETIME_IDLE;
1633 		lt->sadb_lifetime_addtime = ipsa->ipsa_idleaddlt;
1634 		lt->sadb_lifetime_usetime = ipsa->ipsa_idleuselt;
1635 	}
1636 
1637 	cur = (uint8_t *)(lt + 1);
1638 
1639 	/* NOTE:  Don't fill in ports here if we are a tunnel-mode SA. */
1640 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, fam,
1641 	    ipsa->ipsa_srcaddr, (!isrc && !idst) ? SA_SRCPORT(ipsa) : 0,
1642 	    SA_PROTO(ipsa), 0);
1643 	if (cur == NULL) {
1644 		freemsg(mp);
1645 		mp = NULL;
1646 		goto bail;
1647 	}
1648 
1649 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, fam,
1650 	    ipsa->ipsa_dstaddr, (!isrc && !idst) ? SA_DSTPORT(ipsa) : 0,
1651 	    SA_PROTO(ipsa), 0);
1652 	if (cur == NULL) {
1653 		freemsg(mp);
1654 		mp = NULL;
1655 		goto bail;
1656 	}
1657 
1658 	if (ipsa->ipsa_flags & IPSA_F_NATT_LOC) {
1659 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_NATT_LOC,
1660 		    fam, &ipsa->ipsa_natt_addr_loc, ipsa->ipsa_local_nat_port,
1661 		    IPPROTO_UDP, 0);
1662 		if (cur == NULL) {
1663 			freemsg(mp);
1664 			mp = NULL;
1665 			goto bail;
1666 		}
1667 	}
1668 
1669 	if (ipsa->ipsa_flags & IPSA_F_NATT_REM) {
1670 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_NATT_REM,
1671 		    fam, &ipsa->ipsa_natt_addr_rem, ipsa->ipsa_remote_nat_port,
1672 		    IPPROTO_UDP, 0);
1673 		if (cur == NULL) {
1674 			freemsg(mp);
1675 			mp = NULL;
1676 			goto bail;
1677 		}
1678 	}
1679 
1680 	/* If we are a tunnel-mode SA, fill in the inner-selectors. */
1681 	if (isrc) {
1682 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
1683 		    pfam, ipsa->ipsa_innersrc, SA_SRCPORT(ipsa),
1684 		    SA_IPROTO(ipsa), ipsa->ipsa_innersrcpfx);
1685 		if (cur == NULL) {
1686 			freemsg(mp);
1687 			mp = NULL;
1688 			goto bail;
1689 		}
1690 	}
1691 
1692 	if (idst) {
1693 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
1694 		    pfam, ipsa->ipsa_innerdst, SA_DSTPORT(ipsa),
1695 		    SA_IPROTO(ipsa), ipsa->ipsa_innerdstpfx);
1696 		if (cur == NULL) {
1697 			freemsg(mp);
1698 			mp = NULL;
1699 			goto bail;
1700 		}
1701 	}
1702 
1703 	if ((ipsa->ipsa_kmp != 0) || (ipsa->ipsa_kmc != 0)) {
1704 		cur = sadb_make_kmc_ext(cur, end,
1705 		    ipsa->ipsa_kmp, ipsa->ipsa_kmc);
1706 		if (cur == NULL) {
1707 			freemsg(mp);
1708 			mp = NULL;
1709 			goto bail;
1710 		}
1711 	}
1712 
1713 	walker = (sadb_ext_t *)cur;
1714 	if (auth) {
1715 		key = (sadb_key_t *)walker;
1716 		key->sadb_key_len = SADB_8TO64(authsize);
1717 		key->sadb_key_exttype = SADB_EXT_KEY_AUTH;
1718 		key->sadb_key_bits = ipsa->ipsa_authkeybits;
1719 		key->sadb_key_reserved = 0;
1720 		bcopy(ipsa->ipsa_authkey, key + 1, ipsa->ipsa_authkeylen);
1721 		walker = (sadb_ext_t *)((uint64_t *)walker +
1722 		    walker->sadb_ext_len);
1723 	}
1724 
1725 	if (encr) {
1726 		key = (sadb_key_t *)walker;
1727 		key->sadb_key_len = SADB_8TO64(encrsize);
1728 		key->sadb_key_exttype = SADB_EXT_KEY_ENCRYPT;
1729 		key->sadb_key_bits = ipsa->ipsa_encrkeybits;
1730 		key->sadb_key_reserved = 0;
1731 		bcopy(ipsa->ipsa_encrkey, key + 1, ipsa->ipsa_encrkeylen);
1732 		walker = (sadb_ext_t *)((uint64_t *)walker +
1733 		    walker->sadb_ext_len);
1734 	}
1735 
1736 	if (srcid) {
1737 		ident = (sadb_ident_t *)walker;
1738 		ident->sadb_ident_len = SADB_8TO64(srcidsize);
1739 		ident->sadb_ident_exttype = SADB_EXT_IDENTITY_SRC;
1740 		ident->sadb_ident_type = ipsa->ipsa_src_cid->ipsid_type;
1741 		ident->sadb_ident_id = 0;
1742 		ident->sadb_ident_reserved = 0;
1743 		(void) strcpy((char *)(ident + 1),
1744 		    ipsa->ipsa_src_cid->ipsid_cid);
1745 		walker = (sadb_ext_t *)((uint64_t *)walker +
1746 		    walker->sadb_ext_len);
1747 	}
1748 
1749 	if (dstid) {
1750 		ident = (sadb_ident_t *)walker;
1751 		ident->sadb_ident_len = SADB_8TO64(dstidsize);
1752 		ident->sadb_ident_exttype = SADB_EXT_IDENTITY_DST;
1753 		ident->sadb_ident_type = ipsa->ipsa_dst_cid->ipsid_type;
1754 		ident->sadb_ident_id = 0;
1755 		ident->sadb_ident_reserved = 0;
1756 		(void) strcpy((char *)(ident + 1),
1757 		    ipsa->ipsa_dst_cid->ipsid_cid);
1758 		walker = (sadb_ext_t *)((uint64_t *)walker +
1759 		    walker->sadb_ext_len);
1760 	}
1761 
1762 	if (sensinteg) {
1763 		sens = (sadb_sens_t *)walker;
1764 		sens->sadb_sens_len = SADB_8TO64(sizeof (sadb_sens_t *) +
1765 		    ipsa->ipsa_senslen + ipsa->ipsa_integlen);
1766 		sens->sadb_sens_dpd = ipsa->ipsa_dpd;
1767 		sens->sadb_sens_sens_level = ipsa->ipsa_senslevel;
1768 		sens->sadb_sens_integ_level = ipsa->ipsa_integlevel;
1769 		sens->sadb_sens_sens_len = SADB_8TO64(ipsa->ipsa_senslen);
1770 		sens->sadb_sens_integ_len = SADB_8TO64(ipsa->ipsa_integlen);
1771 		sens->sadb_sens_reserved = 0;
1772 		bitmap = (uint64_t *)(sens + 1);
1773 		if (ipsa->ipsa_sens != NULL) {
1774 			bcopy(ipsa->ipsa_sens, bitmap, ipsa->ipsa_senslen);
1775 			bitmap += sens->sadb_sens_sens_len;
1776 		}
1777 		if (ipsa->ipsa_integ != NULL)
1778 			bcopy(ipsa->ipsa_integ, bitmap, ipsa->ipsa_integlen);
1779 		walker = (sadb_ext_t *)((uint64_t *)walker +
1780 		    walker->sadb_ext_len);
1781 	}
1782 
1783 	if (paired) {
1784 		pair_ext = (sadb_x_pair_t *)walker;
1785 
1786 		pair_ext->sadb_x_pair_len = SADB_8TO64(sizeof (sadb_x_pair_t));
1787 		pair_ext->sadb_x_pair_exttype = SADB_X_EXT_PAIR;
1788 		pair_ext->sadb_x_pair_spi = otherspi;
1789 
1790 		walker = (sadb_ext_t *)((uint64_t *)walker +
1791 		    walker->sadb_ext_len);
1792 	}
1793 
1794 	if (ipsa->ipsa_replay != 0) {
1795 		repl_ctr = (sadb_x_replay_ctr_t *)walker;
1796 		repl_ctr->sadb_x_rc_len = SADB_8TO64(sizeof (*repl_ctr));
1797 		repl_ctr->sadb_x_rc_exttype = SADB_X_EXT_REPLAY_VALUE;
1798 		repl_ctr->sadb_x_rc_replay32 = ipsa->ipsa_replay;
1799 		repl_ctr->sadb_x_rc_replay64 = 0;
1800 		walker = (sadb_ext_t *)(repl_ctr + 1);
1801 	}
1802 
1803 bail:
1804 	/* Pardon any delays... */
1805 	mutex_exit(&ipsa->ipsa_lock);
1806 
1807 	return (mp);
1808 }
1809 
1810 /*
1811  * Strip out key headers or unmarked headers (SADB_EXT_KEY_*, SADB_EXT_UNKNOWN)
1812  * and adjust base message accordingly.
1813  *
1814  * Assume message is pulled up in one piece of contiguous memory.
1815  *
1816  * Say if we start off with:
1817  *
1818  * +------+----+-------------+-----------+---------------+---------------+
1819  * | base | SA | source addr | dest addr | rsrvd. or key | soft lifetime |
1820  * +------+----+-------------+-----------+---------------+---------------+
1821  *
1822  * we will end up with
1823  *
1824  * +------+----+-------------+-----------+---------------+
1825  * | base | SA | source addr | dest addr | soft lifetime |
1826  * +------+----+-------------+-----------+---------------+
1827  */
1828 static void
1829 sadb_strip(sadb_msg_t *samsg)
1830 {
1831 	sadb_ext_t *ext;
1832 	uint8_t *target = NULL;
1833 	uint8_t *msgend;
1834 	int sofar = SADB_8TO64(sizeof (*samsg));
1835 	int copylen;
1836 
1837 	ext = (sadb_ext_t *)(samsg + 1);
1838 	msgend = (uint8_t *)samsg;
1839 	msgend += SADB_64TO8(samsg->sadb_msg_len);
1840 	while ((uint8_t *)ext < msgend) {
1841 		if (ext->sadb_ext_type == SADB_EXT_RESERVED ||
1842 		    ext->sadb_ext_type == SADB_EXT_KEY_AUTH ||
1843 		    ext->sadb_ext_type == SADB_X_EXT_EDUMP ||
1844 		    ext->sadb_ext_type == SADB_EXT_KEY_ENCRYPT) {
1845 			/*
1846 			 * Aha!	 I found a header to be erased.
1847 			 */
1848 
1849 			if (target != NULL) {
1850 				/*
1851 				 * If I had a previous header to be erased,
1852 				 * copy over it.  I can get away with just
1853 				 * copying backwards because the target will
1854 				 * always be 8 bytes behind the source.
1855 				 */
1856 				copylen = ((uint8_t *)ext) - (target +
1857 				    SADB_64TO8(
1858 				    ((sadb_ext_t *)target)->sadb_ext_len));
1859 				ovbcopy(((uint8_t *)ext - copylen), target,
1860 				    copylen);
1861 				target += copylen;
1862 				((sadb_ext_t *)target)->sadb_ext_len =
1863 				    SADB_8TO64(((uint8_t *)ext) - target +
1864 				    SADB_64TO8(ext->sadb_ext_len));
1865 			} else {
1866 				target = (uint8_t *)ext;
1867 			}
1868 		} else {
1869 			sofar += ext->sadb_ext_len;
1870 		}
1871 
1872 		ext = (sadb_ext_t *)(((uint64_t *)ext) + ext->sadb_ext_len);
1873 	}
1874 
1875 	ASSERT((uint8_t *)ext == msgend);
1876 
1877 	if (target != NULL) {
1878 		copylen = ((uint8_t *)ext) - (target +
1879 		    SADB_64TO8(((sadb_ext_t *)target)->sadb_ext_len));
1880 		if (copylen != 0)
1881 			ovbcopy(((uint8_t *)ext - copylen), target, copylen);
1882 	}
1883 
1884 	/* Adjust samsg. */
1885 	samsg->sadb_msg_len = (uint16_t)sofar;
1886 
1887 	/* Assume all of the rest is cleared by caller in sadb_pfkey_echo(). */
1888 }
1889 
1890 /*
1891  * AH needs to send an error to PF_KEY.	 Assume mp points to an M_CTL
1892  * followed by an M_DATA with a PF_KEY message in it.  The serial of
1893  * the sending keysock instance is included.
1894  */
1895 void
1896 sadb_pfkey_error(queue_t *pfkey_q, mblk_t *mp, int error, int diagnostic,
1897     uint_t serial)
1898 {
1899 	mblk_t *msg = mp->b_cont;
1900 	sadb_msg_t *samsg;
1901 	keysock_out_t *kso;
1902 
1903 	/*
1904 	 * Enough functions call this to merit a NULL queue check.
1905 	 */
1906 	if (pfkey_q == NULL) {
1907 		freemsg(mp);
1908 		return;
1909 	}
1910 
1911 	ASSERT(msg != NULL);
1912 	ASSERT((mp->b_wptr - mp->b_rptr) == sizeof (ipsec_info_t));
1913 	ASSERT((msg->b_wptr - msg->b_rptr) >= sizeof (sadb_msg_t));
1914 	samsg = (sadb_msg_t *)msg->b_rptr;
1915 	kso = (keysock_out_t *)mp->b_rptr;
1916 
1917 	kso->ks_out_type = KEYSOCK_OUT;
1918 	kso->ks_out_len = sizeof (*kso);
1919 	kso->ks_out_serial = serial;
1920 
1921 	/*
1922 	 * Only send the base message up in the event of an error.
1923 	 * Don't worry about bzero()-ing, because it was probably bogus
1924 	 * anyway.
1925 	 */
1926 	msg->b_wptr = msg->b_rptr + sizeof (*samsg);
1927 	samsg = (sadb_msg_t *)msg->b_rptr;
1928 	samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
1929 	samsg->sadb_msg_errno = (uint8_t)error;
1930 	if (diagnostic != SADB_X_DIAGNOSTIC_PRESET)
1931 		samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
1932 
1933 	putnext(pfkey_q, mp);
1934 }
1935 
1936 /*
1937  * Send a successful return packet back to keysock via the queue in pfkey_q.
1938  *
1939  * Often, an SA is associated with the reply message, it's passed in if needed,
1940  * and NULL if not.  BTW, that ipsa will have its refcnt appropriately held,
1941  * and the caller will release said refcnt.
1942  */
1943 void
1944 sadb_pfkey_echo(queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg,
1945     keysock_in_t *ksi, ipsa_t *ipsa)
1946 {
1947 	keysock_out_t *kso;
1948 	mblk_t *mp1;
1949 	sadb_msg_t *newsamsg;
1950 	uint8_t *oldend;
1951 
1952 	ASSERT((mp->b_cont != NULL) &&
1953 	    ((void *)samsg == (void *)mp->b_cont->b_rptr) &&
1954 	    ((void *)mp->b_rptr == (void *)ksi));
1955 
1956 	switch (samsg->sadb_msg_type) {
1957 	case SADB_ADD:
1958 	case SADB_UPDATE:
1959 	case SADB_X_UPDATEPAIR:
1960 	case SADB_X_DELPAIR_STATE:
1961 	case SADB_FLUSH:
1962 	case SADB_DUMP:
1963 		/*
1964 		 * I have all of the message already.  I just need to strip
1965 		 * out the keying material and echo the message back.
1966 		 *
1967 		 * NOTE: for SADB_DUMP, the function sadb_dump() did the
1968 		 * work.  When DUMP reaches here, it should only be a base
1969 		 * message.
1970 		 */
1971 	justecho:
1972 		if (ksi->ks_in_extv[SADB_EXT_KEY_AUTH] != NULL ||
1973 		    ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT] != NULL ||
1974 		    ksi->ks_in_extv[SADB_X_EXT_EDUMP] != NULL) {
1975 			sadb_strip(samsg);
1976 			/* Assume PF_KEY message is contiguous. */
1977 			ASSERT(mp->b_cont->b_cont == NULL);
1978 			oldend = mp->b_cont->b_wptr;
1979 			mp->b_cont->b_wptr = mp->b_cont->b_rptr +
1980 			    SADB_64TO8(samsg->sadb_msg_len);
1981 			bzero(mp->b_cont->b_wptr, oldend - mp->b_cont->b_wptr);
1982 		}
1983 		break;
1984 	case SADB_GET:
1985 		/*
1986 		 * Do a lot of work here, because of the ipsa I just found.
1987 		 * First construct the new PF_KEY message, then abandon
1988 		 * the old one.
1989 		 */
1990 		mp1 = sadb_sa2msg(ipsa, samsg);
1991 		if (mp1 == NULL) {
1992 			sadb_pfkey_error(pfkey_q, mp, ENOMEM,
1993 			    SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
1994 			return;
1995 		}
1996 		freemsg(mp->b_cont);
1997 		mp->b_cont = mp1;
1998 		break;
1999 	case SADB_DELETE:
2000 	case SADB_X_DELPAIR:
2001 		if (ipsa == NULL)
2002 			goto justecho;
2003 		/*
2004 		 * Because listening KMds may require more info, treat
2005 		 * DELETE like a special case of GET.
2006 		 */
2007 		mp1 = sadb_sa2msg(ipsa, samsg);
2008 		if (mp1 == NULL) {
2009 			sadb_pfkey_error(pfkey_q, mp, ENOMEM,
2010 			    SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
2011 			return;
2012 		}
2013 		newsamsg = (sadb_msg_t *)mp1->b_rptr;
2014 		sadb_strip(newsamsg);
2015 		oldend = mp1->b_wptr;
2016 		mp1->b_wptr = mp1->b_rptr + SADB_64TO8(newsamsg->sadb_msg_len);
2017 		bzero(mp1->b_wptr, oldend - mp1->b_wptr);
2018 		freemsg(mp->b_cont);
2019 		mp->b_cont = mp1;
2020 		break;
2021 	default:
2022 		if (mp != NULL)
2023 			freemsg(mp);
2024 		return;
2025 	}
2026 
2027 	/* ksi is now null and void. */
2028 	kso = (keysock_out_t *)ksi;
2029 	kso->ks_out_type = KEYSOCK_OUT;
2030 	kso->ks_out_len = sizeof (*kso);
2031 	kso->ks_out_serial = ksi->ks_in_serial;
2032 	/* We're ready to send... */
2033 	putnext(pfkey_q, mp);
2034 }
2035 
2036 /*
2037  * Set up a global pfkey_q instance for AH, ESP, or some other consumer.
2038  */
2039 void
2040 sadb_keysock_hello(queue_t **pfkey_qp, queue_t *q, mblk_t *mp,
2041     void (*ager)(void *), void *agerarg, timeout_id_t *top, int satype)
2042 {
2043 	keysock_hello_ack_t *kha;
2044 	queue_t *oldq;
2045 
2046 	ASSERT(OTHERQ(q) != NULL);
2047 
2048 	/*
2049 	 * First, check atomically that I'm the first and only keysock
2050 	 * instance.
2051 	 *
2052 	 * Use OTHERQ(q), because qreply(q, mp) == putnext(OTHERQ(q), mp),
2053 	 * and I want this module to say putnext(*_pfkey_q, mp) for PF_KEY
2054 	 * messages.
2055 	 */
2056 
2057 	oldq = casptr((void **)pfkey_qp, NULL, OTHERQ(q));
2058 	if (oldq != NULL) {
2059 		ASSERT(oldq != q);
2060 		cmn_err(CE_WARN, "Danger!  Multiple keysocks on top of %s.\n",
2061 		    (satype == SADB_SATYPE_ESP)? "ESP" : "AH or other");
2062 		freemsg(mp);
2063 		return;
2064 	}
2065 
2066 	kha = (keysock_hello_ack_t *)mp->b_rptr;
2067 	kha->ks_hello_len = sizeof (keysock_hello_ack_t);
2068 	kha->ks_hello_type = KEYSOCK_HELLO_ACK;
2069 	kha->ks_hello_satype = (uint8_t)satype;
2070 
2071 	/*
2072 	 * If we made it past the casptr, then we have "exclusive" access
2073 	 * to the timeout handle.  Fire it off in 4 seconds, because it
2074 	 * just seems like a good interval.
2075 	 */
2076 	*top = qtimeout(*pfkey_qp, ager, agerarg, drv_usectohz(4000000));
2077 
2078 	putnext(*pfkey_qp, mp);
2079 }
2080 
2081 /*
2082  * Normalize IPv4-mapped IPv6 addresses (and prefixes) as appropriate.
2083  *
2084  * Check addresses themselves for wildcard or multicast.
2085  * Check ire table for local/non-local/broadcast.
2086  */
2087 int
2088 sadb_addrcheck(queue_t *pfkey_q, mblk_t *mp, sadb_ext_t *ext, uint_t serial,
2089     netstack_t *ns)
2090 {
2091 	sadb_address_t *addr = (sadb_address_t *)ext;
2092 	struct sockaddr_in *sin;
2093 	struct sockaddr_in6 *sin6;
2094 	ire_t *ire;
2095 	int diagnostic, type;
2096 	boolean_t normalized = B_FALSE;
2097 
2098 	ASSERT(ext != NULL);
2099 	ASSERT((ext->sadb_ext_type == SADB_EXT_ADDRESS_SRC) ||
2100 	    (ext->sadb_ext_type == SADB_EXT_ADDRESS_DST) ||
2101 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC) ||
2102 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_DST) ||
2103 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_LOC) ||
2104 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_REM));
2105 
2106 	/* Assign both sockaddrs, the compiler will do the right thing. */
2107 	sin = (struct sockaddr_in *)(addr + 1);
2108 	sin6 = (struct sockaddr_in6 *)(addr + 1);
2109 
2110 	if (sin6->sin6_family == AF_INET6) {
2111 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
2112 			/*
2113 			 * Convert to an AF_INET sockaddr.  This means the
2114 			 * return messages will have the extra space, but have
2115 			 * AF_INET sockaddrs instead of AF_INET6.
2116 			 *
2117 			 * Yes, RFC 2367 isn't clear on what to do here w.r.t.
2118 			 * mapped addresses, but since AF_INET6 ::ffff:<v4> is
2119 			 * equal to AF_INET <v4>, it shouldnt be a huge
2120 			 * problem.
2121 			 */
2122 			sin->sin_family = AF_INET;
2123 			IN6_V4MAPPED_TO_INADDR(&sin6->sin6_addr,
2124 			    &sin->sin_addr);
2125 			bzero(&sin->sin_zero, sizeof (sin->sin_zero));
2126 			normalized = B_TRUE;
2127 		}
2128 	} else if (sin->sin_family != AF_INET) {
2129 		switch (ext->sadb_ext_type) {
2130 		case SADB_EXT_ADDRESS_SRC:
2131 			diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC_AF;
2132 			break;
2133 		case SADB_EXT_ADDRESS_DST:
2134 			diagnostic = SADB_X_DIAGNOSTIC_BAD_DST_AF;
2135 			break;
2136 		case SADB_X_EXT_ADDRESS_INNER_SRC:
2137 			diagnostic = SADB_X_DIAGNOSTIC_BAD_PROXY_AF;
2138 			break;
2139 		case SADB_X_EXT_ADDRESS_INNER_DST:
2140 			diagnostic = SADB_X_DIAGNOSTIC_BAD_INNER_DST_AF;
2141 			break;
2142 		case SADB_X_EXT_ADDRESS_NATT_LOC:
2143 			diagnostic = SADB_X_DIAGNOSTIC_BAD_NATT_LOC_AF;
2144 			break;
2145 		case SADB_X_EXT_ADDRESS_NATT_REM:
2146 			diagnostic = SADB_X_DIAGNOSTIC_BAD_NATT_REM_AF;
2147 			break;
2148 			/* There is no default, see above ASSERT. */
2149 		}
2150 bail:
2151 		if (pfkey_q != NULL) {
2152 			sadb_pfkey_error(pfkey_q, mp, EINVAL, diagnostic,
2153 			    serial);
2154 		} else {
2155 			/*
2156 			 * Scribble in sadb_msg that we got passed in.
2157 			 * Overload "mp" to be an sadb_msg pointer.
2158 			 */
2159 			sadb_msg_t *samsg = (sadb_msg_t *)mp;
2160 
2161 			samsg->sadb_msg_errno = EINVAL;
2162 			samsg->sadb_x_msg_diagnostic = diagnostic;
2163 		}
2164 		return (KS_IN_ADDR_UNKNOWN);
2165 	}
2166 
2167 	if (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC ||
2168 	    ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_DST) {
2169 		/*
2170 		 * We need only check for prefix issues.
2171 		 */
2172 
2173 		/* Set diagnostic now, in case we need it later. */
2174 		diagnostic =
2175 		    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC) ?
2176 		    SADB_X_DIAGNOSTIC_PREFIX_INNER_SRC :
2177 		    SADB_X_DIAGNOSTIC_PREFIX_INNER_DST;
2178 
2179 		if (normalized)
2180 			addr->sadb_address_prefixlen -= 96;
2181 
2182 		/*
2183 		 * Verify and mask out inner-addresses based on prefix length.
2184 		 */
2185 		if (sin->sin_family == AF_INET) {
2186 			if (addr->sadb_address_prefixlen > 32)
2187 				goto bail;
2188 			sin->sin_addr.s_addr &=
2189 			    ip_plen_to_mask(addr->sadb_address_prefixlen);
2190 		} else {
2191 			in6_addr_t mask;
2192 
2193 			ASSERT(sin->sin_family == AF_INET6);
2194 			/*
2195 			 * ip_plen_to_mask_v6() returns NULL if the value in
2196 			 * question is out of range.
2197 			 */
2198 			if (ip_plen_to_mask_v6(addr->sadb_address_prefixlen,
2199 			    &mask) == NULL)
2200 				goto bail;
2201 			sin6->sin6_addr.s6_addr32[0] &= mask.s6_addr32[0];
2202 			sin6->sin6_addr.s6_addr32[1] &= mask.s6_addr32[1];
2203 			sin6->sin6_addr.s6_addr32[2] &= mask.s6_addr32[2];
2204 			sin6->sin6_addr.s6_addr32[3] &= mask.s6_addr32[3];
2205 		}
2206 
2207 		/* We don't care in these cases. */
2208 		return (KS_IN_ADDR_DONTCARE);
2209 	}
2210 
2211 	if (sin->sin_family == AF_INET6) {
2212 		/* Check the easy ones now. */
2213 		if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
2214 			return (KS_IN_ADDR_MBCAST);
2215 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
2216 			return (KS_IN_ADDR_UNSPEC);
2217 		/*
2218 		 * At this point, we're a unicast IPv6 address.
2219 		 *
2220 		 * A ctable lookup for local is sufficient here.  If we're
2221 		 * local, return KS_IN_ADDR_ME, otherwise KS_IN_ADDR_NOTME.
2222 		 *
2223 		 * XXX Zones alert -> me/notme decision needs to be tempered
2224 		 * by what zone we're in when we go to zone-aware IPsec.
2225 		 */
2226 		ire = ire_ctable_lookup_v6(&sin6->sin6_addr, NULL,
2227 		    IRE_LOCAL, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE,
2228 		    ns->netstack_ip);
2229 		if (ire != NULL) {
2230 			/* Hey hey, it's local. */
2231 			IRE_REFRELE(ire);
2232 			return (KS_IN_ADDR_ME);
2233 		}
2234 	} else {
2235 		ASSERT(sin->sin_family == AF_INET);
2236 		if (sin->sin_addr.s_addr == INADDR_ANY)
2237 			return (KS_IN_ADDR_UNSPEC);
2238 		if (CLASSD(sin->sin_addr.s_addr))
2239 			return (KS_IN_ADDR_MBCAST);
2240 		/*
2241 		 * At this point we're a unicast or broadcast IPv4 address.
2242 		 *
2243 		 * Lookup on the ctable for IRE_BROADCAST or IRE_LOCAL.
2244 		 * A NULL return value is NOTME, otherwise, look at the
2245 		 * returned ire for broadcast or not and return accordingly.
2246 		 *
2247 		 * XXX Zones alert -> me/notme decision needs to be tempered
2248 		 * by what zone we're in when we go to zone-aware IPsec.
2249 		 */
2250 		ire = ire_ctable_lookup(sin->sin_addr.s_addr, 0,
2251 		    IRE_LOCAL | IRE_BROADCAST, NULL, ALL_ZONES, NULL,
2252 		    MATCH_IRE_TYPE, ns->netstack_ip);
2253 		if (ire != NULL) {
2254 			/* Check for local or broadcast */
2255 			type = ire->ire_type;
2256 			IRE_REFRELE(ire);
2257 			ASSERT(type == IRE_LOCAL || type == IRE_BROADCAST);
2258 			return ((type == IRE_LOCAL) ? KS_IN_ADDR_ME :
2259 			    KS_IN_ADDR_MBCAST);
2260 		}
2261 	}
2262 
2263 	return (KS_IN_ADDR_NOTME);
2264 }
2265 
2266 /*
2267  * Address normalizations and reality checks for inbound PF_KEY messages.
2268  *
2269  * For the case of src == unspecified AF_INET6, and dst == AF_INET, convert
2270  * the source to AF_INET.  Do the same for the inner sources.
2271  */
2272 boolean_t
2273 sadb_addrfix(keysock_in_t *ksi, queue_t *pfkey_q, mblk_t *mp, netstack_t *ns)
2274 {
2275 	struct sockaddr_in *src, *isrc;
2276 	struct sockaddr_in6 *dst, *idst;
2277 	sadb_address_t *srcext, *dstext;
2278 	uint16_t sport;
2279 	sadb_ext_t **extv = ksi->ks_in_extv;
2280 	int rc;
2281 
2282 	if (extv[SADB_EXT_ADDRESS_SRC] != NULL) {
2283 		rc = sadb_addrcheck(pfkey_q, mp, extv[SADB_EXT_ADDRESS_SRC],
2284 		    ksi->ks_in_serial, ns);
2285 		if (rc == KS_IN_ADDR_UNKNOWN)
2286 			return (B_FALSE);
2287 		if (rc == KS_IN_ADDR_MBCAST) {
2288 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2289 			    SADB_X_DIAGNOSTIC_BAD_SRC, ksi->ks_in_serial);
2290 			return (B_FALSE);
2291 		}
2292 		ksi->ks_in_srctype = rc;
2293 	}
2294 
2295 	if (extv[SADB_EXT_ADDRESS_DST] != NULL) {
2296 		rc = sadb_addrcheck(pfkey_q, mp, extv[SADB_EXT_ADDRESS_DST],
2297 		    ksi->ks_in_serial, ns);
2298 		if (rc == KS_IN_ADDR_UNKNOWN)
2299 			return (B_FALSE);
2300 		if (rc == KS_IN_ADDR_UNSPEC) {
2301 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2302 			    SADB_X_DIAGNOSTIC_BAD_DST, ksi->ks_in_serial);
2303 			return (B_FALSE);
2304 		}
2305 		ksi->ks_in_dsttype = rc;
2306 	}
2307 
2308 	/*
2309 	 * NAT-Traversal addrs are simple enough to not require all of
2310 	 * the checks in sadb_addrcheck().  Just normalize or reject if not
2311 	 * AF_INET.
2312 	 */
2313 	if (extv[SADB_X_EXT_ADDRESS_NATT_LOC] != NULL) {
2314 		rc = sadb_addrcheck(pfkey_q, mp,
2315 		    extv[SADB_X_EXT_ADDRESS_NATT_LOC], ksi->ks_in_serial, ns);
2316 
2317 		/*
2318 		 * Local NAT-T addresses never use an IRE_LOCAL, so it should
2319 		 * always be NOTME, or UNSPEC (to handle both tunnel mode
2320 		 * AND local-port flexibility).
2321 		 */
2322 		if (rc != KS_IN_ADDR_NOTME && rc != KS_IN_ADDR_UNSPEC) {
2323 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2324 			    SADB_X_DIAGNOSTIC_MALFORMED_NATT_LOC,
2325 			    ksi->ks_in_serial);
2326 			return (B_FALSE);
2327 		}
2328 		src = (struct sockaddr_in *)
2329 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_NATT_LOC]) + 1);
2330 		if (src->sin_family != AF_INET) {
2331 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2332 			    SADB_X_DIAGNOSTIC_BAD_NATT_LOC_AF,
2333 			    ksi->ks_in_serial);
2334 			return (B_FALSE);
2335 		}
2336 	}
2337 
2338 	if (extv[SADB_X_EXT_ADDRESS_NATT_REM] != NULL) {
2339 		rc = sadb_addrcheck(pfkey_q, mp,
2340 		    extv[SADB_X_EXT_ADDRESS_NATT_REM], ksi->ks_in_serial, ns);
2341 
2342 		/*
2343 		 * Remote NAT-T addresses never use an IRE_LOCAL, so it should
2344 		 * always be NOTME, or UNSPEC if it's a tunnel-mode SA.
2345 		 */
2346 		if (rc != KS_IN_ADDR_NOTME &&
2347 		    !(extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL &&
2348 		    rc == KS_IN_ADDR_UNSPEC)) {
2349 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2350 			    SADB_X_DIAGNOSTIC_MALFORMED_NATT_REM,
2351 			    ksi->ks_in_serial);
2352 			return (B_FALSE);
2353 		}
2354 		src = (struct sockaddr_in *)
2355 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_NATT_REM]) + 1);
2356 		if (src->sin_family != AF_INET) {
2357 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2358 			    SADB_X_DIAGNOSTIC_BAD_NATT_REM_AF,
2359 			    ksi->ks_in_serial);
2360 			return (B_FALSE);
2361 		}
2362 	}
2363 
2364 	if (extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL) {
2365 		if (extv[SADB_X_EXT_ADDRESS_INNER_DST] == NULL) {
2366 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2367 			    SADB_X_DIAGNOSTIC_MISSING_INNER_DST,
2368 			    ksi->ks_in_serial);
2369 			return (B_FALSE);
2370 		}
2371 
2372 		if (sadb_addrcheck(pfkey_q, mp,
2373 		    extv[SADB_X_EXT_ADDRESS_INNER_DST], ksi->ks_in_serial, ns)
2374 		    == KS_IN_ADDR_UNKNOWN ||
2375 		    sadb_addrcheck(pfkey_q, mp,
2376 		    extv[SADB_X_EXT_ADDRESS_INNER_SRC], ksi->ks_in_serial, ns)
2377 		    == KS_IN_ADDR_UNKNOWN)
2378 			return (B_FALSE);
2379 
2380 		isrc = (struct sockaddr_in *)
2381 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_SRC]) +
2382 		    1);
2383 		idst = (struct sockaddr_in6 *)
2384 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_DST]) +
2385 		    1);
2386 		if (isrc->sin_family != idst->sin6_family) {
2387 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2388 			    SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH,
2389 			    ksi->ks_in_serial);
2390 			return (B_FALSE);
2391 		}
2392 	} else if (extv[SADB_X_EXT_ADDRESS_INNER_DST] != NULL) {
2393 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2394 			    SADB_X_DIAGNOSTIC_MISSING_INNER_SRC,
2395 			    ksi->ks_in_serial);
2396 			return (B_FALSE);
2397 	} else {
2398 		isrc = NULL;	/* For inner/outer port check below. */
2399 	}
2400 
2401 	dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST];
2402 	srcext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_SRC];
2403 
2404 	if (dstext == NULL || srcext == NULL)
2405 		return (B_TRUE);
2406 
2407 	dst = (struct sockaddr_in6 *)(dstext + 1);
2408 	src = (struct sockaddr_in *)(srcext + 1);
2409 
2410 	if (isrc != NULL &&
2411 	    (isrc->sin_port != 0 || idst->sin6_port != 0) &&
2412 	    (src->sin_port != 0 || dst->sin6_port != 0)) {
2413 		/* Can't set inner and outer ports in one SA. */
2414 		sadb_pfkey_error(pfkey_q, mp, EINVAL,
2415 		    SADB_X_DIAGNOSTIC_DUAL_PORT_SETS,
2416 		    ksi->ks_in_serial);
2417 		return (B_FALSE);
2418 	}
2419 
2420 	if (dst->sin6_family == src->sin_family)
2421 		return (B_TRUE);
2422 
2423 	if (srcext->sadb_address_proto != dstext->sadb_address_proto) {
2424 		if (srcext->sadb_address_proto == 0) {
2425 			srcext->sadb_address_proto = dstext->sadb_address_proto;
2426 		} else if (dstext->sadb_address_proto == 0) {
2427 			dstext->sadb_address_proto = srcext->sadb_address_proto;
2428 		} else {
2429 			/* Inequal protocols, neither were 0.  Report error. */
2430 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2431 			    SADB_X_DIAGNOSTIC_PROTO_MISMATCH,
2432 			    ksi->ks_in_serial);
2433 			return (B_FALSE);
2434 		}
2435 	}
2436 
2437 	/*
2438 	 * With the exception of an unspec IPv6 source and an IPv4
2439 	 * destination, address families MUST me matched.
2440 	 */
2441 	if (src->sin_family == AF_INET ||
2442 	    ksi->ks_in_srctype != KS_IN_ADDR_UNSPEC) {
2443 		sadb_pfkey_error(pfkey_q, mp, EINVAL,
2444 		    SADB_X_DIAGNOSTIC_AF_MISMATCH, ksi->ks_in_serial);
2445 		return (B_FALSE);
2446 	}
2447 
2448 	/*
2449 	 * Convert "src" to AF_INET INADDR_ANY.  We rely on sin_port being
2450 	 * in the same place for sockaddr_in and sockaddr_in6.
2451 	 */
2452 	sport = src->sin_port;
2453 	bzero(src, sizeof (*src));
2454 	src->sin_family = AF_INET;
2455 	src->sin_port = sport;
2456 
2457 	return (B_TRUE);
2458 }
2459 
2460 /*
2461  * Set the results in "addrtype", given an IRE as requested by
2462  * sadb_addrcheck().
2463  */
2464 int
2465 sadb_addrset(ire_t *ire)
2466 {
2467 	if ((ire->ire_type & IRE_BROADCAST) ||
2468 	    (ire->ire_ipversion == IPV4_VERSION && CLASSD(ire->ire_addr)) ||
2469 	    (ire->ire_ipversion == IPV6_VERSION &&
2470 	    IN6_IS_ADDR_MULTICAST(&(ire->ire_addr_v6))))
2471 		return (KS_IN_ADDR_MBCAST);
2472 	if (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK))
2473 		return (KS_IN_ADDR_ME);
2474 	return (KS_IN_ADDR_NOTME);
2475 }
2476 
2477 
2478 /*
2479  * Walker callback function to delete sa's based on src/dst address.
2480  * Assumes that we're called with *head locked, no other locks held;
2481  * Conveniently, and not coincidentally, this is both what sadb_walker
2482  * gives us and also what sadb_unlinkassoc expects.
2483  */
2484 
2485 struct sadb_purge_state
2486 {
2487 	uint32_t *src;
2488 	uint32_t *dst;
2489 	sa_family_t af;
2490 	boolean_t inbnd;
2491 	char *sidstr;
2492 	char *didstr;
2493 	uint16_t sidtype;
2494 	uint16_t didtype;
2495 	uint32_t kmproto;
2496 	uint8_t sadb_sa_state;
2497 	mblk_t *mq;
2498 	sadb_t *sp;
2499 };
2500 
2501 static void
2502 sadb_purge_cb(isaf_t *head, ipsa_t *entry, void *cookie)
2503 {
2504 	struct sadb_purge_state *ps = (struct sadb_purge_state *)cookie;
2505 
2506 	ASSERT(MUTEX_HELD(&head->isaf_lock));
2507 
2508 	mutex_enter(&entry->ipsa_lock);
2509 
2510 	if ((entry->ipsa_state == IPSA_STATE_LARVAL) ||
2511 	    (ps->src != NULL &&
2512 	    !IPSA_ARE_ADDR_EQUAL(entry->ipsa_srcaddr, ps->src, ps->af)) ||
2513 	    (ps->dst != NULL &&
2514 	    !IPSA_ARE_ADDR_EQUAL(entry->ipsa_dstaddr, ps->dst, ps->af)) ||
2515 	    (ps->didstr != NULL && (entry->ipsa_dst_cid != NULL) &&
2516 	    !(ps->didtype == entry->ipsa_dst_cid->ipsid_type &&
2517 	    strcmp(ps->didstr, entry->ipsa_dst_cid->ipsid_cid) == 0)) ||
2518 	    (ps->sidstr != NULL && (entry->ipsa_src_cid != NULL) &&
2519 	    !(ps->sidtype == entry->ipsa_src_cid->ipsid_type &&
2520 	    strcmp(ps->sidstr, entry->ipsa_src_cid->ipsid_cid) == 0)) ||
2521 	    (ps->kmproto <= SADB_X_KMP_MAX && ps->kmproto != entry->ipsa_kmp)) {
2522 		mutex_exit(&entry->ipsa_lock);
2523 		return;
2524 	}
2525 
2526 	if (ps->inbnd) {
2527 		sadb_delete_cluster(entry);
2528 	}
2529 	entry->ipsa_state = IPSA_STATE_DEAD;
2530 	(void) sadb_torch_assoc(head, entry, ps->inbnd, &ps->mq);
2531 }
2532 
2533 /*
2534  * Common code to purge an SA with a matching src or dst address.
2535  * Don't kill larval SA's in such a purge.
2536  */
2537 int
2538 sadb_purge_sa(mblk_t *mp, keysock_in_t *ksi, sadb_t *sp, queue_t *pfkey_q,
2539     queue_t *ip_q)
2540 {
2541 	sadb_address_t *dstext =
2542 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2543 	sadb_address_t *srcext =
2544 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2545 	sadb_ident_t *dstid =
2546 	    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_DST];
2547 	sadb_ident_t *srcid =
2548 	    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC];
2549 	sadb_x_kmc_t *kmc =
2550 	    (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
2551 	struct sockaddr_in *src, *dst;
2552 	struct sockaddr_in6 *src6, *dst6;
2553 	struct sadb_purge_state ps;
2554 
2555 	/*
2556 	 * Don't worry about IPv6 v4-mapped addresses, sadb_addrcheck()
2557 	 * takes care of them.
2558 	 */
2559 
2560 	/* enforced by caller */
2561 	ASSERT((dstext != NULL) || (srcext != NULL));
2562 
2563 	ps.src = NULL;
2564 	ps.dst = NULL;
2565 #ifdef DEBUG
2566 	ps.af = (sa_family_t)-1;
2567 #endif
2568 	ps.mq = NULL;
2569 	ps.sidstr = NULL;
2570 	ps.didstr = NULL;
2571 	ps.kmproto = SADB_X_KMP_MAX + 1;
2572 
2573 	if (dstext != NULL) {
2574 		dst = (struct sockaddr_in *)(dstext + 1);
2575 		ps.af = dst->sin_family;
2576 		if (dst->sin_family == AF_INET6) {
2577 			dst6 = (struct sockaddr_in6 *)dst;
2578 			ps.dst = (uint32_t *)&dst6->sin6_addr;
2579 		} else {
2580 			ps.dst = (uint32_t *)&dst->sin_addr;
2581 		}
2582 	}
2583 
2584 	if (srcext != NULL) {
2585 		src = (struct sockaddr_in *)(srcext + 1);
2586 		ps.af = src->sin_family;
2587 		if (src->sin_family == AF_INET6) {
2588 			src6 = (struct sockaddr_in6 *)(srcext + 1);
2589 			ps.src = (uint32_t *)&src6->sin6_addr;
2590 		} else {
2591 			ps.src = (uint32_t *)&src->sin_addr;
2592 		}
2593 		ASSERT(dstext == NULL || src->sin_family == dst->sin_family);
2594 	}
2595 
2596 	ASSERT(ps.af != (sa_family_t)-1);
2597 
2598 	if (dstid != NULL) {
2599 		/*
2600 		 * NOTE:  May need to copy string in the future
2601 		 * if the inbound keysock message disappears for some strange
2602 		 * reason.
2603 		 */
2604 		ps.didstr = (char *)(dstid + 1);
2605 		ps.didtype = dstid->sadb_ident_type;
2606 	}
2607 
2608 	if (srcid != NULL) {
2609 		/*
2610 		 * NOTE:  May need to copy string in the future
2611 		 * if the inbound keysock message disappears for some strange
2612 		 * reason.
2613 		 */
2614 		ps.sidstr = (char *)(srcid + 1);
2615 		ps.sidtype = srcid->sadb_ident_type;
2616 	}
2617 
2618 	if (kmc != NULL)
2619 		ps.kmproto = kmc->sadb_x_kmc_proto;
2620 
2621 	/*
2622 	 * This is simple, crude, and effective.
2623 	 * Unimplemented optimizations (TBD):
2624 	 * - we can limit how many places we search based on where we
2625 	 * think the SA is filed.
2626 	 * - if we get a dst address, we can hash based on dst addr to find
2627 	 * the correct bucket in the outbound table.
2628 	 */
2629 	ps.inbnd = B_TRUE;
2630 	sadb_walker(sp->sdb_if, sp->sdb_hashsize, sadb_purge_cb, &ps);
2631 	ps.inbnd = B_FALSE;
2632 	sadb_walker(sp->sdb_of, sp->sdb_hashsize, sadb_purge_cb, &ps);
2633 
2634 	if (ps.mq != NULL)
2635 		sadb_drain_torchq(ip_q, ps.mq);
2636 
2637 	ASSERT(mp->b_cont != NULL);
2638 	sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi,
2639 	    NULL);
2640 	return (0);
2641 }
2642 
2643 static void
2644 sadb_delpair_state(isaf_t *head, ipsa_t *entry, void *cookie)
2645 {
2646 	struct sadb_purge_state *ps = (struct sadb_purge_state *)cookie;
2647 	isaf_t  *inbound_bucket;
2648 	ipsa_t *peer_assoc;
2649 
2650 	ASSERT(MUTEX_HELD(&head->isaf_lock));
2651 
2652 	mutex_enter(&entry->ipsa_lock);
2653 
2654 	if ((entry->ipsa_state != ps->sadb_sa_state) ||
2655 	    ((ps->src != NULL) &&
2656 	    !IPSA_ARE_ADDR_EQUAL(entry->ipsa_srcaddr, ps->src, ps->af))) {
2657 		mutex_exit(&entry->ipsa_lock);
2658 		return;
2659 	}
2660 
2661 	/*
2662 	 * The isaf_t *, which is passed in , is always an outbound bucket,
2663 	 * and we are preserving the outbound-then-inbound hash-bucket lock
2664 	 * ordering. The sadb_walker() which triggers this function is called
2665 	 * only on the outbound fanout, and the corresponding inbound bucket
2666 	 * lock is safe to acquire here.
2667 	 */
2668 
2669 	if (entry->ipsa_haspeer) {
2670 		inbound_bucket = INBOUND_BUCKET(ps->sp, entry->ipsa_spi);
2671 		mutex_enter(&inbound_bucket->isaf_lock);
2672 		peer_assoc = ipsec_getassocbyspi(inbound_bucket,
2673 		    entry->ipsa_spi, entry->ipsa_srcaddr,
2674 		    entry->ipsa_dstaddr, entry->ipsa_addrfam);
2675 	} else {
2676 		inbound_bucket = INBOUND_BUCKET(ps->sp, entry->ipsa_otherspi);
2677 		mutex_enter(&inbound_bucket->isaf_lock);
2678 		peer_assoc = ipsec_getassocbyspi(inbound_bucket,
2679 		    entry->ipsa_otherspi, entry->ipsa_dstaddr,
2680 		    entry->ipsa_srcaddr, entry->ipsa_addrfam);
2681 	}
2682 
2683 	entry->ipsa_state = IPSA_STATE_DEAD;
2684 	(void) sadb_torch_assoc(head, entry, B_FALSE, &ps->mq);
2685 	if (peer_assoc != NULL) {
2686 		mutex_enter(&peer_assoc->ipsa_lock);
2687 		peer_assoc->ipsa_state = IPSA_STATE_DEAD;
2688 		(void) sadb_torch_assoc(inbound_bucket, peer_assoc,
2689 		    B_FALSE, &ps->mq);
2690 	}
2691 	mutex_exit(&inbound_bucket->isaf_lock);
2692 }
2693 
2694 /*
2695  * Common code to delete/get an SA.
2696  */
2697 int
2698 sadb_delget_sa(mblk_t *mp, keysock_in_t *ksi, sadbp_t *spp,
2699     int *diagnostic, queue_t *pfkey_q, uint8_t sadb_msg_type)
2700 {
2701 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
2702 	sadb_address_t *srcext =
2703 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2704 	sadb_address_t *dstext =
2705 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2706 	ipsa_t *echo_target = NULL;
2707 	ipsap_t *ipsapp;
2708 	mblk_t *torchq = NULL;
2709 	uint_t	error = 0;
2710 
2711 	if (assoc == NULL) {
2712 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
2713 		return (EINVAL);
2714 	}
2715 
2716 	if (sadb_msg_type == SADB_X_DELPAIR_STATE) {
2717 		struct sockaddr_in *src;
2718 		struct sockaddr_in6 *src6;
2719 		struct sadb_purge_state ps;
2720 
2721 		if (srcext == NULL) {
2722 			*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
2723 			return (EINVAL);
2724 		}
2725 		ps.src = NULL;
2726 		ps.mq = NULL;
2727 		src = (struct sockaddr_in *)(srcext + 1);
2728 		ps.af = src->sin_family;
2729 		if (src->sin_family == AF_INET6) {
2730 			src6 = (struct sockaddr_in6 *)(srcext + 1);
2731 			ps.src = (uint32_t *)&src6->sin6_addr;
2732 			ps.sp = &spp->s_v6;
2733 		} else {
2734 			ps.src = (uint32_t *)&src->sin_addr;
2735 			ps.sp = &spp->s_v4;
2736 		}
2737 		ps.inbnd = B_FALSE;
2738 		ps.sadb_sa_state = assoc->sadb_sa_state;
2739 		sadb_walker(ps.sp->sdb_of, ps.sp->sdb_hashsize,
2740 		    sadb_delpair_state, &ps);
2741 
2742 		if (ps.mq != NULL)
2743 			sadb_drain_torchq(pfkey_q, ps.mq);
2744 
2745 		ASSERT(mp->b_cont != NULL);
2746 		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
2747 		    ksi, NULL);
2748 		return (0);
2749 	}
2750 
2751 	if (dstext == NULL) {
2752 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
2753 		return (EINVAL);
2754 	}
2755 
2756 	ipsapp = get_ipsa_pair(assoc, srcext, dstext, spp);
2757 	if (ipsapp == NULL) {
2758 		*diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND;
2759 		return (ESRCH);
2760 	}
2761 
2762 	echo_target = ipsapp->ipsap_sa_ptr;
2763 	if (echo_target == NULL)
2764 		echo_target = ipsapp->ipsap_psa_ptr;
2765 
2766 	if (sadb_msg_type == SADB_DELETE || sadb_msg_type == SADB_X_DELPAIR) {
2767 		/*
2768 		 * Bucket locks will be required if SA is actually unlinked.
2769 		 * get_ipsa_pair() returns valid hash bucket pointers even
2770 		 * if it can't find a pair SA pointer.
2771 		 */
2772 		mutex_enter(&ipsapp->ipsap_bucket->isaf_lock);
2773 		mutex_enter(&ipsapp->ipsap_pbucket->isaf_lock);
2774 
2775 		if (ipsapp->ipsap_sa_ptr != NULL) {
2776 			mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2777 			if (ipsapp->ipsap_sa_ptr->ipsa_flags & IPSA_F_INBOUND) {
2778 				sadb_delete_cluster(ipsapp->ipsap_sa_ptr);
2779 			}
2780 			ipsapp->ipsap_sa_ptr->ipsa_state = IPSA_STATE_DEAD;
2781 			(void) sadb_torch_assoc(ipsapp->ipsap_bucket,
2782 			    ipsapp->ipsap_sa_ptr, B_FALSE, &torchq);
2783 			/*
2784 			 * sadb_torch_assoc() releases the ipsa_lock
2785 			 * and calls sadb_unlinkassoc() which does a
2786 			 * IPSA_REFRELE.
2787 			 */
2788 		}
2789 		if (ipsapp->ipsap_psa_ptr != NULL) {
2790 			mutex_enter(&ipsapp->ipsap_psa_ptr->ipsa_lock);
2791 			if (sadb_msg_type == SADB_X_DELPAIR) {
2792 				if (ipsapp->ipsap_psa_ptr->ipsa_flags &
2793 				    IPSA_F_INBOUND) {
2794 					sadb_delete_cluster(
2795 					    ipsapp->ipsap_psa_ptr);
2796 				}
2797 				ipsapp->ipsap_psa_ptr->ipsa_state =
2798 				    IPSA_STATE_DEAD;
2799 				(void) sadb_torch_assoc(ipsapp->ipsap_pbucket,
2800 				    ipsapp->ipsap_psa_ptr, B_FALSE, &torchq);
2801 			} else {
2802 				/*
2803 				 * Only half of the "pair" has been deleted.
2804 				 * Update the remaining SA and remove references
2805 				 * to its pair SA, which is now gone.
2806 				 */
2807 				ipsapp->ipsap_psa_ptr->ipsa_otherspi = 0;
2808 				ipsapp->ipsap_psa_ptr->ipsa_flags &=
2809 				    ~IPSA_F_PAIRED;
2810 				mutex_exit(&ipsapp->ipsap_psa_ptr->ipsa_lock);
2811 			}
2812 		} else if (sadb_msg_type == SADB_X_DELPAIR) {
2813 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
2814 			error = ESRCH;
2815 		}
2816 		mutex_exit(&ipsapp->ipsap_bucket->isaf_lock);
2817 		mutex_exit(&ipsapp->ipsap_pbucket->isaf_lock);
2818 	}
2819 
2820 	if (torchq != NULL)
2821 		sadb_drain_torchq(spp->s_ip_q, torchq);
2822 
2823 	ASSERT(mp->b_cont != NULL);
2824 
2825 	if (error == 0)
2826 		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)
2827 		    mp->b_cont->b_rptr, ksi, echo_target);
2828 
2829 	destroy_ipsa_pair(ipsapp);
2830 
2831 	return (error);
2832 }
2833 
2834 /*
2835  * This function takes a sadb_sa_t and finds the ipsa_t structure
2836  * and the isaf_t (hash bucket) that its stored under. If the security
2837  * association has a peer, the ipsa_t structure and bucket for that security
2838  * association are also searched for. The "pair" of ipsa_t's and isaf_t's
2839  * are returned as a ipsap_t.
2840  *
2841  * Note that a "pair" is defined as one (but not both) of the following:
2842  *
2843  * A security association which has a soft reference to another security
2844  * association via its SPI.
2845  *
2846  * A security association that is not obviously "inbound" or "outbound" so
2847  * it appears in both hash tables, the "peer" being the same security
2848  * association in the other hash table.
2849  *
2850  * This function will return NULL if the ipsa_t can't be found in the
2851  * inbound or outbound  hash tables (not found). If only one ipsa_t is
2852  * found, the pair ipsa_t will be NULL. Both isaf_t values are valid
2853  * provided at least one ipsa_t is found.
2854  */
2855 ipsap_t *
2856 get_ipsa_pair(sadb_sa_t *assoc, sadb_address_t *srcext, sadb_address_t *dstext,
2857     sadbp_t *spp)
2858 {
2859 	struct sockaddr_in *src, *dst;
2860 	struct sockaddr_in6 *src6, *dst6;
2861 	sadb_t *sp;
2862 	uint32_t *srcaddr, *dstaddr;
2863 	isaf_t *outbound_bucket, *inbound_bucket;
2864 	boolean_t in_inbound_table = B_FALSE;
2865 	ipsap_t *ipsapp;
2866 	sa_family_t af;
2867 
2868 	uint32_t pair_srcaddr[IPSA_MAX_ADDRLEN];
2869 	uint32_t pair_dstaddr[IPSA_MAX_ADDRLEN];
2870 	uint32_t pair_spi;
2871 
2872 	ipsapp = kmem_zalloc(sizeof (*ipsapp), KM_NOSLEEP);
2873 	if (ipsapp == NULL)
2874 		return (NULL);
2875 
2876 	/*
2877 	 * Don't worry about IPv6 v4-mapped addresses, sadb_addrcheck()
2878 	 * takes care of them.
2879 	 */
2880 
2881 	dst = (struct sockaddr_in *)(dstext + 1);
2882 	af = dst->sin_family;
2883 	if (af == AF_INET6) {
2884 		sp = &spp->s_v6;
2885 		dst6 = (struct sockaddr_in6 *)dst;
2886 		dstaddr = (uint32_t *)&dst6->sin6_addr;
2887 		if (srcext != NULL) {
2888 			src6 = (struct sockaddr_in6 *)(srcext + 1);
2889 			srcaddr = (uint32_t *)&src6->sin6_addr;
2890 			ASSERT(src6->sin6_family == af);
2891 			ASSERT(src6->sin6_family == AF_INET6);
2892 		} else {
2893 			srcaddr = ALL_ZEROES_PTR;
2894 		}
2895 		outbound_bucket = OUTBOUND_BUCKET_V6(sp,
2896 		    *(uint32_t *)dstaddr);
2897 	} else {
2898 		sp = &spp->s_v4;
2899 		dstaddr = (uint32_t *)&dst->sin_addr;
2900 		if (srcext != NULL) {
2901 			src = (struct sockaddr_in *)(srcext + 1);
2902 			srcaddr = (uint32_t *)&src->sin_addr;
2903 			ASSERT(src->sin_family == af);
2904 			ASSERT(src->sin_family == AF_INET);
2905 		} else {
2906 			srcaddr = ALL_ZEROES_PTR;
2907 		}
2908 		outbound_bucket = OUTBOUND_BUCKET_V4(sp,
2909 		    *(uint32_t *)dstaddr);
2910 	}
2911 
2912 	inbound_bucket = INBOUND_BUCKET(sp, assoc->sadb_sa_spi);
2913 
2914 	/* Lock down both buckets. */
2915 	mutex_enter(&outbound_bucket->isaf_lock);
2916 	mutex_enter(&inbound_bucket->isaf_lock);
2917 
2918 	if (assoc->sadb_sa_flags & IPSA_F_INBOUND) {
2919 		ipsapp->ipsap_sa_ptr = ipsec_getassocbyspi(inbound_bucket,
2920 		    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2921 		if (ipsapp->ipsap_sa_ptr != NULL) {
2922 			ipsapp->ipsap_bucket = inbound_bucket;
2923 			ipsapp->ipsap_pbucket = outbound_bucket;
2924 			in_inbound_table = B_TRUE;
2925 		} else {
2926 			ipsapp->ipsap_sa_ptr =
2927 			    ipsec_getassocbyspi(outbound_bucket,
2928 			    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2929 			ipsapp->ipsap_bucket = outbound_bucket;
2930 			ipsapp->ipsap_pbucket = inbound_bucket;
2931 		}
2932 	} else {
2933 		/* IPSA_F_OUTBOUND is set *or* no directions flags set. */
2934 		ipsapp->ipsap_sa_ptr =
2935 		    ipsec_getassocbyspi(outbound_bucket,
2936 		    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2937 		if (ipsapp->ipsap_sa_ptr != NULL) {
2938 			ipsapp->ipsap_bucket = outbound_bucket;
2939 			ipsapp->ipsap_pbucket = inbound_bucket;
2940 		} else {
2941 			ipsapp->ipsap_sa_ptr =
2942 			    ipsec_getassocbyspi(inbound_bucket,
2943 			    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2944 			ipsapp->ipsap_bucket = inbound_bucket;
2945 			ipsapp->ipsap_pbucket = outbound_bucket;
2946 			if (ipsapp->ipsap_sa_ptr != NULL)
2947 				in_inbound_table = B_TRUE;
2948 		}
2949 	}
2950 
2951 	if (ipsapp->ipsap_sa_ptr == NULL) {
2952 		mutex_exit(&outbound_bucket->isaf_lock);
2953 		mutex_exit(&inbound_bucket->isaf_lock);
2954 		kmem_free(ipsapp, sizeof (*ipsapp));
2955 		return (NULL);
2956 	}
2957 
2958 	if ((ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) &&
2959 	    in_inbound_table) {
2960 		mutex_exit(&outbound_bucket->isaf_lock);
2961 		mutex_exit(&inbound_bucket->isaf_lock);
2962 		return (ipsapp);
2963 	}
2964 
2965 	mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2966 	if (ipsapp->ipsap_sa_ptr->ipsa_haspeer) {
2967 		/*
2968 		 * haspeer implies no sa_pairing, look for same spi
2969 		 * in other hashtable.
2970 		 */
2971 		ipsapp->ipsap_psa_ptr =
2972 		    ipsec_getassocbyspi(ipsapp->ipsap_pbucket,
2973 		    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2974 		mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2975 		mutex_exit(&outbound_bucket->isaf_lock);
2976 		mutex_exit(&inbound_bucket->isaf_lock);
2977 		return (ipsapp);
2978 	}
2979 	pair_spi = ipsapp->ipsap_sa_ptr->ipsa_otherspi;
2980 	IPSA_COPY_ADDR(&pair_srcaddr,
2981 	    ipsapp->ipsap_sa_ptr->ipsa_srcaddr, af);
2982 	IPSA_COPY_ADDR(&pair_dstaddr,
2983 	    ipsapp->ipsap_sa_ptr->ipsa_dstaddr, af);
2984 	mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2985 	mutex_exit(&outbound_bucket->isaf_lock);
2986 	mutex_exit(&inbound_bucket->isaf_lock);
2987 
2988 	if (pair_spi == 0) {
2989 		ASSERT(ipsapp->ipsap_bucket != NULL);
2990 		ASSERT(ipsapp->ipsap_pbucket != NULL);
2991 		return (ipsapp);
2992 	}
2993 
2994 	/* found sa in outbound sadb, peer should be inbound */
2995 
2996 	if (in_inbound_table) {
2997 		/* Found SA in inbound table, pair will be in outbound. */
2998 		if (af == AF_INET6) {
2999 			ipsapp->ipsap_pbucket = OUTBOUND_BUCKET_V6(sp,
3000 			    *(uint32_t *)pair_srcaddr);
3001 		} else {
3002 			ipsapp->ipsap_pbucket = OUTBOUND_BUCKET_V4(sp,
3003 			    *(uint32_t *)pair_srcaddr);
3004 		}
3005 	} else {
3006 		ipsapp->ipsap_pbucket = INBOUND_BUCKET(sp, pair_spi);
3007 	}
3008 	mutex_enter(&ipsapp->ipsap_pbucket->isaf_lock);
3009 	ipsapp->ipsap_psa_ptr = ipsec_getassocbyspi(ipsapp->ipsap_pbucket,
3010 	    pair_spi, pair_dstaddr, pair_srcaddr, af);
3011 	mutex_exit(&ipsapp->ipsap_pbucket->isaf_lock);
3012 	ASSERT(ipsapp->ipsap_bucket != NULL);
3013 	ASSERT(ipsapp->ipsap_pbucket != NULL);
3014 	return (ipsapp);
3015 }
3016 
3017 /*
3018  * Initialize the mechanism parameters associated with an SA.
3019  * These parameters can be shared by multiple packets, which saves
3020  * us from the overhead of consulting the algorithm table for
3021  * each packet.
3022  */
3023 static void
3024 sadb_init_alginfo(ipsa_t *sa)
3025 {
3026 	ipsec_alginfo_t *alg;
3027 	ipsec_stack_t	*ipss = sa->ipsa_netstack->netstack_ipsec;
3028 
3029 	mutex_enter(&ipss->ipsec_alg_lock);
3030 
3031 	if (sa->ipsa_encrkey != NULL) {
3032 		alg = ipss->ipsec_alglists[IPSEC_ALG_ENCR][sa->ipsa_encr_alg];
3033 		if (alg != NULL && ALG_VALID(alg)) {
3034 			sa->ipsa_emech.cm_type = alg->alg_mech_type;
3035 			sa->ipsa_emech.cm_param = NULL;
3036 			sa->ipsa_emech.cm_param_len = 0;
3037 			sa->ipsa_iv_len = alg->alg_datalen;
3038 		} else
3039 			sa->ipsa_emech.cm_type = CRYPTO_MECHANISM_INVALID;
3040 	}
3041 
3042 	if (sa->ipsa_authkey != NULL) {
3043 		alg = ipss->ipsec_alglists[IPSEC_ALG_AUTH][sa->ipsa_auth_alg];
3044 		if (alg != NULL && ALG_VALID(alg)) {
3045 			sa->ipsa_amech.cm_type = alg->alg_mech_type;
3046 			sa->ipsa_amech.cm_param = (char *)&sa->ipsa_mac_len;
3047 			sa->ipsa_amech.cm_param_len = sizeof (size_t);
3048 			sa->ipsa_mac_len = (size_t)alg->alg_datalen;
3049 		} else
3050 			sa->ipsa_amech.cm_type = CRYPTO_MECHANISM_INVALID;
3051 	}
3052 
3053 	mutex_exit(&ipss->ipsec_alg_lock);
3054 }
3055 
3056 /*
3057  * Perform NAT-traversal cached checksum offset calculations here.
3058  */
3059 static void
3060 sadb_nat_calculations(ipsa_t *newbie, sadb_address_t *natt_loc_ext,
3061     sadb_address_t *natt_rem_ext, uint32_t *src_addr_ptr,
3062     uint32_t *dst_addr_ptr)
3063 {
3064 	struct sockaddr_in *natt_loc, *natt_rem;
3065 	uint32_t *natt_loc_ptr = NULL, *natt_rem_ptr = NULL;
3066 	uint32_t running_sum = 0;
3067 
3068 #define	DOWN_SUM(x) (x) = ((x) & 0xFFFF) +	 ((x) >> 16)
3069 
3070 	if (natt_rem_ext != NULL) {
3071 		uint32_t l_src;
3072 		uint32_t l_rem;
3073 
3074 		natt_rem = (struct sockaddr_in *)(natt_rem_ext + 1);
3075 
3076 		/* Ensured by sadb_addrfix(). */
3077 		ASSERT(natt_rem->sin_family == AF_INET);
3078 
3079 		natt_rem_ptr = (uint32_t *)(&natt_rem->sin_addr);
3080 		newbie->ipsa_remote_nat_port = natt_rem->sin_port;
3081 		l_src = *src_addr_ptr;
3082 		l_rem = *natt_rem_ptr;
3083 
3084 		/* Instead of IPSA_COPY_ADDR(), just copy first 32 bits. */
3085 		newbie->ipsa_natt_addr_rem = *natt_rem_ptr;
3086 
3087 		l_src = ntohl(l_src);
3088 		DOWN_SUM(l_src);
3089 		DOWN_SUM(l_src);
3090 		l_rem = ntohl(l_rem);
3091 		DOWN_SUM(l_rem);
3092 		DOWN_SUM(l_rem);
3093 
3094 		/*
3095 		 * We're 1's complement for checksums, so check for wraparound
3096 		 * here.
3097 		 */
3098 		if (l_rem > l_src)
3099 			l_src--;
3100 
3101 		running_sum += l_src - l_rem;
3102 
3103 		DOWN_SUM(running_sum);
3104 		DOWN_SUM(running_sum);
3105 	}
3106 
3107 	if (natt_loc_ext != NULL) {
3108 		natt_loc = (struct sockaddr_in *)(natt_loc_ext + 1);
3109 
3110 		/* Ensured by sadb_addrfix(). */
3111 		ASSERT(natt_loc->sin_family == AF_INET);
3112 
3113 		natt_loc_ptr = (uint32_t *)(&natt_loc->sin_addr);
3114 		newbie->ipsa_local_nat_port = natt_loc->sin_port;
3115 
3116 		/* Instead of IPSA_COPY_ADDR(), just copy first 32 bits. */
3117 		newbie->ipsa_natt_addr_loc = *natt_loc_ptr;
3118 
3119 		/*
3120 		 * NAT-T port agility means we may have natt_loc_ext, but
3121 		 * only for a local-port change.
3122 		 */
3123 		if (natt_loc->sin_addr.s_addr != INADDR_ANY) {
3124 			uint32_t l_dst = ntohl(*dst_addr_ptr);
3125 			uint32_t l_loc = ntohl(*natt_loc_ptr);
3126 
3127 			DOWN_SUM(l_loc);
3128 			DOWN_SUM(l_loc);
3129 			DOWN_SUM(l_dst);
3130 			DOWN_SUM(l_dst);
3131 
3132 			/*
3133 			 * We're 1's complement for checksums, so check for
3134 			 * wraparound here.
3135 			 */
3136 			if (l_loc > l_dst)
3137 				l_dst--;
3138 
3139 			running_sum += l_dst - l_loc;
3140 			DOWN_SUM(running_sum);
3141 			DOWN_SUM(running_sum);
3142 		}
3143 	}
3144 
3145 	newbie->ipsa_inbound_cksum = running_sum;
3146 #undef DOWN_SUM
3147 }
3148 
3149 /*
3150  * This function is called from consumers that need to insert a fully-grown
3151  * security association into its tables.  This function takes into account that
3152  * SAs can be "inbound", "outbound", or "both".	 The "primary" and "secondary"
3153  * hash bucket parameters are set in order of what the SA will be most of the
3154  * time.  (For example, an SA with an unspecified source, and a multicast
3155  * destination will primarily be an outbound SA.  OTOH, if that destination
3156  * is unicast for this node, then the SA will primarily be inbound.)
3157  *
3158  * It takes a lot of parameters because even if clone is B_FALSE, this needs
3159  * to check both buckets for purposes of collision.
3160  *
3161  * Return 0 upon success.  Return various errnos (ENOMEM, EEXIST) for
3162  * various error conditions.  We may need to set samsg->sadb_x_msg_diagnostic
3163  * with additional diagnostic information because there is at least one EINVAL
3164  * case here.
3165  */
3166 int
3167 sadb_common_add(queue_t *ip_q, queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg,
3168     keysock_in_t *ksi, isaf_t *primary, isaf_t *secondary,
3169     ipsa_t *newbie, boolean_t clone, boolean_t is_inbound, int *diagnostic,
3170     netstack_t *ns, sadbp_t *spp)
3171 {
3172 	ipsa_t *newbie_clone = NULL, *scratch;
3173 	ipsap_t *ipsapp = NULL;
3174 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
3175 	sadb_address_t *srcext =
3176 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
3177 	sadb_address_t *dstext =
3178 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
3179 	sadb_address_t *isrcext =
3180 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC];
3181 	sadb_address_t *idstext =
3182 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST];
3183 	sadb_x_kmc_t *kmcext =
3184 	    (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
3185 	sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
3186 	sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
3187 	sadb_x_pair_t *pair_ext =
3188 	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
3189 	sadb_x_replay_ctr_t *replayext =
3190 	    (sadb_x_replay_ctr_t *)ksi->ks_in_extv[SADB_X_EXT_REPLAY_VALUE];
3191 	uint8_t protocol =
3192 	    (samsg->sadb_msg_satype == SADB_SATYPE_AH) ? IPPROTO_AH:IPPROTO_ESP;
3193 #if 0
3194 	/*
3195 	 * XXXMLS - When Trusted Solaris or Multi-Level Secure functionality
3196 	 * comes to ON, examine these if 0'ed fragments.  Look for XXXMLS.
3197 	 */
3198 	sadb_sens_t *sens = (sadb_sens_t *);
3199 #endif
3200 	struct sockaddr_in *src, *dst, *isrc, *idst;
3201 	struct sockaddr_in6 *src6, *dst6, *isrc6, *idst6;
3202 	sadb_lifetime_t *soft =
3203 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
3204 	sadb_lifetime_t *hard =
3205 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
3206 	sadb_lifetime_t	*idle =
3207 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE];
3208 	sa_family_t af;
3209 	int error = 0;
3210 	boolean_t isupdate = (newbie != NULL);
3211 	uint32_t *src_addr_ptr, *dst_addr_ptr, *isrc_addr_ptr, *idst_addr_ptr;
3212 	mblk_t *ctl_mp = NULL;
3213 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
3214 	int		rcode;
3215 
3216 	if (srcext == NULL) {
3217 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
3218 		return (EINVAL);
3219 	}
3220 	if (dstext == NULL) {
3221 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
3222 		return (EINVAL);
3223 	}
3224 	if (assoc == NULL) {
3225 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
3226 		return (EINVAL);
3227 	}
3228 
3229 	src = (struct sockaddr_in *)(srcext + 1);
3230 	src6 = (struct sockaddr_in6 *)(srcext + 1);
3231 	dst = (struct sockaddr_in *)(dstext + 1);
3232 	dst6 = (struct sockaddr_in6 *)(dstext + 1);
3233 	if (isrcext != NULL) {
3234 		isrc = (struct sockaddr_in *)(isrcext + 1);
3235 		isrc6 = (struct sockaddr_in6 *)(isrcext + 1);
3236 		ASSERT(idstext != NULL);
3237 		idst = (struct sockaddr_in *)(idstext + 1);
3238 		idst6 = (struct sockaddr_in6 *)(idstext + 1);
3239 	} else {
3240 		isrc = NULL;
3241 		isrc6 = NULL;
3242 	}
3243 
3244 	af = src->sin_family;
3245 
3246 	if (af == AF_INET) {
3247 		src_addr_ptr = (uint32_t *)&src->sin_addr;
3248 		dst_addr_ptr = (uint32_t *)&dst->sin_addr;
3249 	} else {
3250 		ASSERT(af == AF_INET6);
3251 		src_addr_ptr = (uint32_t *)&src6->sin6_addr;
3252 		dst_addr_ptr = (uint32_t *)&dst6->sin6_addr;
3253 	}
3254 
3255 	if (!isupdate && (clone == B_TRUE || is_inbound == B_TRUE) &&
3256 	    cl_inet_checkspi &&
3257 	    (assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE_ELSEWHERE)) {
3258 		rcode = cl_inet_checkspi(protocol, assoc->sadb_sa_spi);
3259 		if (rcode == -1) {
3260 			return (EEXIST);
3261 		}
3262 	}
3263 
3264 	/*
3265 	 * Check to see if the new SA will be cloned AND paired. The
3266 	 * reason a SA will be cloned is the source or destination addresses
3267 	 * are not specific enough to determine if the SA goes in the outbound
3268 	 * or the inbound hash table, so its cloned and put in both. If
3269 	 * the SA is paired, it's soft linked to another SA for the other
3270 	 * direction. Keeping track and looking up SA's that are direction
3271 	 * unspecific and linked is too hard.
3272 	 */
3273 	if (clone && (pair_ext != NULL)) {
3274 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
3275 		return (EINVAL);
3276 	}
3277 
3278 	if (!isupdate) {
3279 		newbie = sadb_makelarvalassoc(assoc->sadb_sa_spi,
3280 		    src_addr_ptr, dst_addr_ptr, af, ns);
3281 		if (newbie == NULL)
3282 			return (ENOMEM);
3283 	}
3284 
3285 	mutex_enter(&newbie->ipsa_lock);
3286 
3287 	if (isrc != NULL) {
3288 		if (isrc->sin_family == AF_INET) {
3289 			if (srcext->sadb_address_proto != IPPROTO_ENCAP) {
3290 				if (srcext->sadb_address_proto != 0) {
3291 					/*
3292 					 * Mismatched outer-packet protocol
3293 					 * and inner-packet address family.
3294 					 */
3295 					mutex_exit(&newbie->ipsa_lock);
3296 					error = EPROTOTYPE;
3297 					goto error;
3298 				} else {
3299 					/* Fill in with explicit protocol. */
3300 					srcext->sadb_address_proto =
3301 					    IPPROTO_ENCAP;
3302 					dstext->sadb_address_proto =
3303 					    IPPROTO_ENCAP;
3304 				}
3305 			}
3306 			isrc_addr_ptr = (uint32_t *)&isrc->sin_addr;
3307 			idst_addr_ptr = (uint32_t *)&idst->sin_addr;
3308 		} else {
3309 			ASSERT(isrc->sin_family == AF_INET6);
3310 			if (srcext->sadb_address_proto != IPPROTO_IPV6) {
3311 				if (srcext->sadb_address_proto != 0) {
3312 					/*
3313 					 * Mismatched outer-packet protocol
3314 					 * and inner-packet address family.
3315 					 */
3316 					mutex_exit(&newbie->ipsa_lock);
3317 					error = EPROTOTYPE;
3318 					goto error;
3319 				} else {
3320 					/* Fill in with explicit protocol. */
3321 					srcext->sadb_address_proto =
3322 					    IPPROTO_IPV6;
3323 					dstext->sadb_address_proto =
3324 					    IPPROTO_IPV6;
3325 				}
3326 			}
3327 			isrc_addr_ptr = (uint32_t *)&isrc6->sin6_addr;
3328 			idst_addr_ptr = (uint32_t *)&idst6->sin6_addr;
3329 		}
3330 		newbie->ipsa_innerfam = isrc->sin_family;
3331 
3332 		IPSA_COPY_ADDR(newbie->ipsa_innersrc, isrc_addr_ptr,
3333 		    newbie->ipsa_innerfam);
3334 		IPSA_COPY_ADDR(newbie->ipsa_innerdst, idst_addr_ptr,
3335 		    newbie->ipsa_innerfam);
3336 		newbie->ipsa_innersrcpfx = isrcext->sadb_address_prefixlen;
3337 		newbie->ipsa_innerdstpfx = idstext->sadb_address_prefixlen;
3338 
3339 		/* Unique value uses inner-ports for Tunnel Mode... */
3340 		newbie->ipsa_unique_id = SA_UNIQUE_ID(isrc->sin_port,
3341 		    idst->sin_port, dstext->sadb_address_proto,
3342 		    idstext->sadb_address_proto);
3343 		newbie->ipsa_unique_mask = SA_UNIQUE_MASK(isrc->sin_port,
3344 		    idst->sin_port, dstext->sadb_address_proto,
3345 		    idstext->sadb_address_proto);
3346 	} else {
3347 		/* ... and outer-ports for Transport Mode. */
3348 		newbie->ipsa_unique_id = SA_UNIQUE_ID(src->sin_port,
3349 		    dst->sin_port, dstext->sadb_address_proto, 0);
3350 		newbie->ipsa_unique_mask = SA_UNIQUE_MASK(src->sin_port,
3351 		    dst->sin_port, dstext->sadb_address_proto, 0);
3352 	}
3353 	if (newbie->ipsa_unique_mask != (uint64_t)0)
3354 		newbie->ipsa_flags |= IPSA_F_UNIQUE;
3355 
3356 	sadb_nat_calculations(newbie,
3357 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC],
3358 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM],
3359 	    src_addr_ptr, dst_addr_ptr);
3360 
3361 	newbie->ipsa_type = samsg->sadb_msg_satype;
3362 	ASSERT((assoc->sadb_sa_state == SADB_SASTATE_MATURE) ||
3363 	    (assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE));
3364 	newbie->ipsa_auth_alg = assoc->sadb_sa_auth;
3365 	newbie->ipsa_encr_alg = assoc->sadb_sa_encrypt;
3366 
3367 	newbie->ipsa_flags |= assoc->sadb_sa_flags;
3368 	if ((newbie->ipsa_flags & SADB_X_SAFLAGS_NATT_LOC &&
3369 	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC] == NULL) ||
3370 	    (newbie->ipsa_flags & SADB_X_SAFLAGS_NATT_REM &&
3371 	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM] == NULL) ||
3372 	    (newbie->ipsa_flags & SADB_X_SAFLAGS_TUNNEL &&
3373 	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC] == NULL)) {
3374 		mutex_exit(&newbie->ipsa_lock);
3375 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
3376 		error = EINVAL;
3377 		goto error;
3378 	}
3379 	/*
3380 	 * If unspecified source address, force replay_wsize to 0.
3381 	 * This is because an SA that has multiple sources of secure
3382 	 * traffic cannot enforce a replay counter w/o synchronizing the
3383 	 * senders.
3384 	 */
3385 	if (ksi->ks_in_srctype != KS_IN_ADDR_UNSPEC)
3386 		newbie->ipsa_replay_wsize = assoc->sadb_sa_replay;
3387 	else
3388 		newbie->ipsa_replay_wsize = 0;
3389 
3390 	newbie->ipsa_addtime = gethrestime_sec();
3391 
3392 	if (kmcext != NULL) {
3393 		newbie->ipsa_kmp = kmcext->sadb_x_kmc_proto;
3394 		newbie->ipsa_kmc = kmcext->sadb_x_kmc_cookie;
3395 	}
3396 
3397 	/*
3398 	 * XXX CURRENT lifetime checks MAY BE needed for an UPDATE.
3399 	 * The spec says that one can update current lifetimes, but
3400 	 * that seems impractical, especially in the larval-to-mature
3401 	 * update that this function performs.
3402 	 */
3403 	if (soft != NULL) {
3404 		newbie->ipsa_softaddlt = soft->sadb_lifetime_addtime;
3405 		newbie->ipsa_softuselt = soft->sadb_lifetime_usetime;
3406 		newbie->ipsa_softbyteslt = soft->sadb_lifetime_bytes;
3407 		newbie->ipsa_softalloc = soft->sadb_lifetime_allocations;
3408 		SET_EXPIRE(newbie, softaddlt, softexpiretime);
3409 	}
3410 	if (hard != NULL) {
3411 		newbie->ipsa_hardaddlt = hard->sadb_lifetime_addtime;
3412 		newbie->ipsa_harduselt = hard->sadb_lifetime_usetime;
3413 		newbie->ipsa_hardbyteslt = hard->sadb_lifetime_bytes;
3414 		newbie->ipsa_hardalloc = hard->sadb_lifetime_allocations;
3415 		SET_EXPIRE(newbie, hardaddlt, hardexpiretime);
3416 	}
3417 	if (idle != NULL) {
3418 		newbie->ipsa_idleaddlt = idle->sadb_lifetime_addtime;
3419 		newbie->ipsa_idleuselt = idle->sadb_lifetime_usetime;
3420 		newbie->ipsa_idleexpiretime = newbie->ipsa_addtime +
3421 		    newbie->ipsa_idleaddlt;
3422 		newbie->ipsa_idletime = newbie->ipsa_idleaddlt;
3423 	}
3424 
3425 	newbie->ipsa_authtmpl = NULL;
3426 	newbie->ipsa_encrtmpl = NULL;
3427 
3428 	if (akey != NULL) {
3429 		newbie->ipsa_authkeybits = akey->sadb_key_bits;
3430 		newbie->ipsa_authkeylen = SADB_1TO8(akey->sadb_key_bits);
3431 		/* In case we have to round up to the next byte... */
3432 		if ((akey->sadb_key_bits & 0x7) != 0)
3433 			newbie->ipsa_authkeylen++;
3434 		newbie->ipsa_authkey = kmem_alloc(newbie->ipsa_authkeylen,
3435 		    KM_NOSLEEP);
3436 		if (newbie->ipsa_authkey == NULL) {
3437 			error = ENOMEM;
3438 			mutex_exit(&newbie->ipsa_lock);
3439 			goto error;
3440 		}
3441 		bcopy(akey + 1, newbie->ipsa_authkey, newbie->ipsa_authkeylen);
3442 		bzero(akey + 1, newbie->ipsa_authkeylen);
3443 
3444 		/*
3445 		 * Pre-initialize the kernel crypto framework key
3446 		 * structure.
3447 		 */
3448 		newbie->ipsa_kcfauthkey.ck_format = CRYPTO_KEY_RAW;
3449 		newbie->ipsa_kcfauthkey.ck_length = newbie->ipsa_authkeybits;
3450 		newbie->ipsa_kcfauthkey.ck_data = newbie->ipsa_authkey;
3451 
3452 		mutex_enter(&ipss->ipsec_alg_lock);
3453 		error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_AUTH);
3454 		mutex_exit(&ipss->ipsec_alg_lock);
3455 		if (error != 0) {
3456 			mutex_exit(&newbie->ipsa_lock);
3457 			goto error;
3458 		}
3459 	}
3460 
3461 	if (ekey != NULL) {
3462 		newbie->ipsa_encrkeybits = ekey->sadb_key_bits;
3463 		newbie->ipsa_encrkeylen = SADB_1TO8(ekey->sadb_key_bits);
3464 		/* In case we have to round up to the next byte... */
3465 		if ((ekey->sadb_key_bits & 0x7) != 0)
3466 			newbie->ipsa_encrkeylen++;
3467 		newbie->ipsa_encrkey = kmem_alloc(newbie->ipsa_encrkeylen,
3468 		    KM_NOSLEEP);
3469 		if (newbie->ipsa_encrkey == NULL) {
3470 			error = ENOMEM;
3471 			mutex_exit(&newbie->ipsa_lock);
3472 			goto error;
3473 		}
3474 		bcopy(ekey + 1, newbie->ipsa_encrkey, newbie->ipsa_encrkeylen);
3475 		/* XXX is this safe w.r.t db_ref, etc? */
3476 		bzero(ekey + 1, newbie->ipsa_encrkeylen);
3477 
3478 		/*
3479 		 * Pre-initialize the kernel crypto framework key
3480 		 * structure.
3481 		 */
3482 		newbie->ipsa_kcfencrkey.ck_format = CRYPTO_KEY_RAW;
3483 		newbie->ipsa_kcfencrkey.ck_length = newbie->ipsa_encrkeybits;
3484 		newbie->ipsa_kcfencrkey.ck_data = newbie->ipsa_encrkey;
3485 
3486 		mutex_enter(&ipss->ipsec_alg_lock);
3487 		error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_ENCR);
3488 		mutex_exit(&ipss->ipsec_alg_lock);
3489 		if (error != 0) {
3490 			mutex_exit(&newbie->ipsa_lock);
3491 			goto error;
3492 		}
3493 	}
3494 
3495 	sadb_init_alginfo(newbie);
3496 
3497 	/*
3498 	 * Ptrs to processing functions.
3499 	 */
3500 	if (newbie->ipsa_type == SADB_SATYPE_ESP)
3501 		ipsecesp_init_funcs(newbie);
3502 	else
3503 		ipsecah_init_funcs(newbie);
3504 	ASSERT(newbie->ipsa_output_func != NULL &&
3505 	    newbie->ipsa_input_func != NULL);
3506 
3507 	/*
3508 	 * Certificate ID stuff.
3509 	 */
3510 	if (ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC] != NULL) {
3511 		sadb_ident_t *id =
3512 		    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC];
3513 
3514 		/*
3515 		 * Can assume strlen() will return okay because ext_check() in
3516 		 * keysock.c prepares the string for us.
3517 		 */
3518 		newbie->ipsa_src_cid = ipsid_lookup(id->sadb_ident_type,
3519 		    (char *)(id+1), ns);
3520 		if (newbie->ipsa_src_cid == NULL) {
3521 			error = ENOMEM;
3522 			mutex_exit(&newbie->ipsa_lock);
3523 			goto error;
3524 		}
3525 	}
3526 
3527 	if (ksi->ks_in_extv[SADB_EXT_IDENTITY_DST] != NULL) {
3528 		sadb_ident_t *id =
3529 		    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_DST];
3530 
3531 		/*
3532 		 * Can assume strlen() will return okay because ext_check() in
3533 		 * keysock.c prepares the string for us.
3534 		 */
3535 		newbie->ipsa_dst_cid = ipsid_lookup(id->sadb_ident_type,
3536 		    (char *)(id+1), ns);
3537 		if (newbie->ipsa_dst_cid == NULL) {
3538 			error = ENOMEM;
3539 			mutex_exit(&newbie->ipsa_lock);
3540 			goto error;
3541 		}
3542 	}
3543 
3544 #if 0
3545 	/* XXXMLS  SENSITIVITY handling code. */
3546 	if (sens != NULL) {
3547 		int i;
3548 		uint64_t *bitmap = (uint64_t *)(sens + 1);
3549 
3550 		newbie->ipsa_dpd = sens->sadb_sens_dpd;
3551 		newbie->ipsa_senslevel = sens->sadb_sens_sens_level;
3552 		newbie->ipsa_integlevel = sens->sadb_sens_integ_level;
3553 		newbie->ipsa_senslen = SADB_64TO8(sens->sadb_sens_sens_len);
3554 		newbie->ipsa_integlen = SADB_64TO8(sens->sadb_sens_integ_len);
3555 		newbie->ipsa_integ = kmem_alloc(newbie->ipsa_integlen,
3556 		    KM_NOSLEEP);
3557 		if (newbie->ipsa_integ == NULL) {
3558 			error = ENOMEM;
3559 			mutex_exit(&newbie->ipsa_lock);
3560 			goto error;
3561 		}
3562 		newbie->ipsa_sens = kmem_alloc(newbie->ipsa_senslen,
3563 		    KM_NOSLEEP);
3564 		if (newbie->ipsa_sens == NULL) {
3565 			error = ENOMEM;
3566 			mutex_exit(&newbie->ipsa_lock);
3567 			goto error;
3568 		}
3569 		for (i = 0; i < sens->sadb_sens_sens_len; i++) {
3570 			newbie->ipsa_sens[i] = *bitmap;
3571 			bitmap++;
3572 		}
3573 		for (i = 0; i < sens->sadb_sens_integ_len; i++) {
3574 			newbie->ipsa_integ[i] = *bitmap;
3575 			bitmap++;
3576 		}
3577 	}
3578 
3579 #endif
3580 
3581 	if (replayext != NULL) {
3582 		if ((replayext->sadb_x_rc_replay32 == 0) &&
3583 		    (replayext->sadb_x_rc_replay64 != 0)) {
3584 			error = EOPNOTSUPP;
3585 			mutex_exit(&newbie->ipsa_lock);
3586 			goto error;
3587 		}
3588 		newbie->ipsa_replay = replayext->sadb_x_rc_replay32;
3589 	}
3590 
3591 	/* now that the SA has been updated, set its new state */
3592 	newbie->ipsa_state = assoc->sadb_sa_state;
3593 
3594 	if (clone) {
3595 		newbie->ipsa_haspeer = B_TRUE;
3596 	} else {
3597 		if (!is_inbound) {
3598 			lifetime_fuzz(newbie);
3599 		}
3600 	}
3601 	/*
3602 	 * The less locks I hold when doing an insertion and possible cloning,
3603 	 * the better!
3604 	 */
3605 	mutex_exit(&newbie->ipsa_lock);
3606 
3607 	if (clone) {
3608 		newbie_clone = sadb_cloneassoc(newbie);
3609 
3610 		if (newbie_clone == NULL) {
3611 			error = ENOMEM;
3612 			goto error;
3613 		}
3614 	}
3615 
3616 	/*
3617 	 * Enter the bucket locks.  The order of entry is outbound,
3618 	 * inbound.  We map "primary" and "secondary" into outbound and inbound
3619 	 * based on the destination address type.  If the destination address
3620 	 * type is for a node that isn't mine (or potentially mine), the
3621 	 * "primary" bucket is the outbound one.
3622 	 */
3623 	if (!is_inbound) {
3624 		/* primary == outbound */
3625 		mutex_enter(&primary->isaf_lock);
3626 		mutex_enter(&secondary->isaf_lock);
3627 	} else {
3628 		/* primary == inbound */
3629 		mutex_enter(&secondary->isaf_lock);
3630 		mutex_enter(&primary->isaf_lock);
3631 	}
3632 
3633 	IPSECHW_DEBUG(IPSECHW_SADB, ("sadb_common_add: spi = 0x%x\n",
3634 	    newbie->ipsa_spi));
3635 
3636 	/*
3637 	 * sadb_insertassoc() doesn't increment the reference
3638 	 * count.  We therefore have to increment the
3639 	 * reference count one more time to reflect the
3640 	 * pointers of the table that reference this SA.
3641 	 */
3642 	IPSA_REFHOLD(newbie);
3643 
3644 	if (isupdate) {
3645 		/*
3646 		 * Unlink from larval holding cell in the "inbound" fanout.
3647 		 */
3648 		ASSERT(newbie->ipsa_linklock == &primary->isaf_lock ||
3649 		    newbie->ipsa_linklock == &secondary->isaf_lock);
3650 		sadb_unlinkassoc(newbie);
3651 	}
3652 
3653 	mutex_enter(&newbie->ipsa_lock);
3654 	error = sadb_insertassoc(newbie, primary);
3655 	if (error == 0) {
3656 		ctl_mp = sadb_fmt_sa_req(DL_CO_SET, newbie->ipsa_type, newbie,
3657 		    is_inbound);
3658 	}
3659 	mutex_exit(&newbie->ipsa_lock);
3660 
3661 	if (error != 0) {
3662 		/*
3663 		 * Since sadb_insertassoc() failed, we must decrement the
3664 		 * refcount again so the cleanup code will actually free
3665 		 * the offending SA.
3666 		 */
3667 		IPSA_REFRELE(newbie);
3668 		goto error_unlock;
3669 	}
3670 
3671 	if (newbie_clone != NULL) {
3672 		mutex_enter(&newbie_clone->ipsa_lock);
3673 		error = sadb_insertassoc(newbie_clone, secondary);
3674 		mutex_exit(&newbie_clone->ipsa_lock);
3675 		if (error != 0) {
3676 			/* Collision in secondary table. */
3677 			sadb_unlinkassoc(newbie);  /* This does REFRELE. */
3678 			goto error_unlock;
3679 		}
3680 		IPSA_REFHOLD(newbie_clone);
3681 	} else {
3682 		ASSERT(primary != secondary);
3683 		scratch = ipsec_getassocbyspi(secondary, newbie->ipsa_spi,
3684 		    ALL_ZEROES_PTR, newbie->ipsa_dstaddr, af);
3685 		if (scratch != NULL) {
3686 			/* Collision in secondary table. */
3687 			sadb_unlinkassoc(newbie);  /* This does REFRELE. */
3688 			/* Set the error, since ipsec_getassocbyspi() can't. */
3689 			error = EEXIST;
3690 			goto error_unlock;
3691 		}
3692 	}
3693 
3694 	/* OKAY!  So let's do some reality check assertions. */
3695 
3696 	ASSERT(MUTEX_NOT_HELD(&newbie->ipsa_lock));
3697 	ASSERT(newbie_clone == NULL ||
3698 	    (MUTEX_NOT_HELD(&newbie_clone->ipsa_lock)));
3699 	/*
3700 	 * If hardware acceleration could happen, send it.
3701 	 */
3702 	if (ctl_mp != NULL) {
3703 		putnext(ip_q, ctl_mp);
3704 		ctl_mp = NULL;
3705 	}
3706 
3707 error_unlock:
3708 
3709 	/*
3710 	 * We can exit the locks in any order.	Only entrance needs to
3711 	 * follow any protocol.
3712 	 */
3713 	mutex_exit(&secondary->isaf_lock);
3714 	mutex_exit(&primary->isaf_lock);
3715 
3716 	if (pair_ext != NULL && error == 0) {
3717 		/* update pair_spi if it exists. */
3718 		ipsapp = get_ipsa_pair(assoc, srcext, dstext, spp);
3719 		if (ipsapp == NULL) {
3720 			error = ESRCH;
3721 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
3722 		} else if (ipsapp->ipsap_psa_ptr != NULL) {
3723 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_ALREADY;
3724 			error = EINVAL;
3725 		} else {
3726 			/* update_pairing() sets diagnostic */
3727 			error = update_pairing(ipsapp, ksi, diagnostic, spp);
3728 		}
3729 	}
3730 	/* Common error point for this routine. */
3731 error:
3732 	if (newbie != NULL) {
3733 		if (error != 0) {
3734 			/* This SA is broken, let the reaper clean up. */
3735 			mutex_enter(&newbie->ipsa_lock);
3736 			newbie->ipsa_state = IPSA_STATE_DEAD;
3737 			newbie->ipsa_hardexpiretime = 1;
3738 			mutex_exit(&newbie->ipsa_lock);
3739 		}
3740 		IPSA_REFRELE(newbie);
3741 	}
3742 	if (newbie_clone != NULL) {
3743 		IPSA_REFRELE(newbie_clone);
3744 	}
3745 	if (ctl_mp != NULL)
3746 		freemsg(ctl_mp);
3747 
3748 	if (error == 0) {
3749 		/*
3750 		 * Construct favorable PF_KEY return message and send to
3751 		 * keysock. Update the flags in the original keysock message
3752 		 * to reflect the actual flags in the new SA.
3753 		 *  (Q:  Do I need to pass "newbie"?  If I do,
3754 		 * make sure to REFHOLD, call, then REFRELE.)
3755 		 */
3756 		assoc->sadb_sa_flags = newbie->ipsa_flags;
3757 		sadb_pfkey_echo(pfkey_q, mp, samsg, ksi, NULL);
3758 	}
3759 
3760 	destroy_ipsa_pair(ipsapp);
3761 	return (error);
3762 }
3763 
3764 /*
3765  * Set the time of first use for a security association.  Update any
3766  * expiration times as a result.
3767  */
3768 void
3769 sadb_set_usetime(ipsa_t *assoc)
3770 {
3771 	time_t snapshot = gethrestime_sec();
3772 
3773 	mutex_enter(&assoc->ipsa_lock);
3774 	assoc->ipsa_lastuse = snapshot;
3775 	assoc->ipsa_idleexpiretime = snapshot + assoc->ipsa_idletime;
3776 
3777 	/*
3778 	 * Caller does check usetime before calling me usually, and
3779 	 * double-checking is better than a mutex_enter/exit hit.
3780 	 */
3781 	if (assoc->ipsa_usetime == 0) {
3782 		/*
3783 		 * This is redundant for outbound SA's, as
3784 		 * ipsec_getassocbyconn() sets the IPSA_F_USED flag already.
3785 		 * Inbound SAs, however, have no such protection.
3786 		 */
3787 		assoc->ipsa_flags |= IPSA_F_USED;
3788 		assoc->ipsa_usetime = snapshot;
3789 
3790 		/*
3791 		 * After setting the use time, see if we have a use lifetime
3792 		 * that would cause the actual SA expiration time to shorten.
3793 		 */
3794 		UPDATE_EXPIRE(assoc, softuselt, softexpiretime);
3795 		UPDATE_EXPIRE(assoc, harduselt, hardexpiretime);
3796 	}
3797 	mutex_exit(&assoc->ipsa_lock);
3798 }
3799 
3800 /*
3801  * Send up a PF_KEY expire message for this association.
3802  */
3803 static void
3804 sadb_expire_assoc(queue_t *pfkey_q, ipsa_t *assoc)
3805 {
3806 	mblk_t *mp, *mp1;
3807 	int alloclen, af;
3808 	sadb_msg_t *samsg;
3809 	sadb_lifetime_t *current, *expire;
3810 	sadb_sa_t *saext;
3811 	uint8_t *end;
3812 	boolean_t tunnel_mode;
3813 
3814 	ASSERT(MUTEX_HELD(&assoc->ipsa_lock));
3815 
3816 	/* Don't bother sending if there's no queue. */
3817 	if (pfkey_q == NULL)
3818 		return;
3819 
3820 	/* If the SA is one of a pair, only SOFT expire the OUTBOUND SA */
3821 	if (assoc->ipsa_state == IPSA_STATE_DYING &&
3822 	    (assoc->ipsa_flags & IPSA_F_PAIRED) &&
3823 	    !(assoc->ipsa_flags & IPSA_F_OUTBOUND)) {
3824 		return;
3825 	}
3826 
3827 	mp = sadb_keysock_out(0);
3828 	if (mp == NULL) {
3829 		/* cmn_err(CE_WARN, */
3830 		/*	"sadb_expire_assoc: Can't allocate KEYSOCK_OUT.\n"); */
3831 		return;
3832 	}
3833 
3834 	alloclen = sizeof (*samsg) + sizeof (*current) + sizeof (*expire) +
3835 	    2 * sizeof (sadb_address_t) + sizeof (*saext);
3836 
3837 	af = assoc->ipsa_addrfam;
3838 	switch (af) {
3839 	case AF_INET:
3840 		alloclen += 2 * sizeof (struct sockaddr_in);
3841 		break;
3842 	case AF_INET6:
3843 		alloclen += 2 * sizeof (struct sockaddr_in6);
3844 		break;
3845 	default:
3846 		/* Won't happen unless there's a kernel bug. */
3847 		freeb(mp);
3848 		cmn_err(CE_WARN,
3849 		    "sadb_expire_assoc: Unknown address length.\n");
3850 		return;
3851 	}
3852 
3853 	tunnel_mode = (assoc->ipsa_flags & IPSA_F_TUNNEL);
3854 	if (tunnel_mode) {
3855 		alloclen += 2 * sizeof (sadb_address_t);
3856 		switch (assoc->ipsa_innerfam) {
3857 		case AF_INET:
3858 			alloclen += 2 * sizeof (struct sockaddr_in);
3859 			break;
3860 		case AF_INET6:
3861 			alloclen += 2 * sizeof (struct sockaddr_in6);
3862 			break;
3863 		default:
3864 			/* Won't happen unless there's a kernel bug. */
3865 			freeb(mp);
3866 			cmn_err(CE_WARN, "sadb_expire_assoc: "
3867 			    "Unknown inner address length.\n");
3868 			return;
3869 		}
3870 	}
3871 
3872 	mp->b_cont = allocb(alloclen, BPRI_HI);
3873 	if (mp->b_cont == NULL) {
3874 		freeb(mp);
3875 		/* cmn_err(CE_WARN, */
3876 		/*	"sadb_expire_assoc: Can't allocate message.\n"); */
3877 		return;
3878 	}
3879 
3880 	mp1 = mp;
3881 	mp = mp->b_cont;
3882 	end = mp->b_wptr + alloclen;
3883 
3884 	samsg = (sadb_msg_t *)mp->b_wptr;
3885 	mp->b_wptr += sizeof (*samsg);
3886 	samsg->sadb_msg_version = PF_KEY_V2;
3887 	samsg->sadb_msg_type = SADB_EXPIRE;
3888 	samsg->sadb_msg_errno = 0;
3889 	samsg->sadb_msg_satype = assoc->ipsa_type;
3890 	samsg->sadb_msg_len = SADB_8TO64(alloclen);
3891 	samsg->sadb_msg_reserved = 0;
3892 	samsg->sadb_msg_seq = 0;
3893 	samsg->sadb_msg_pid = 0;
3894 
3895 	saext = (sadb_sa_t *)mp->b_wptr;
3896 	mp->b_wptr += sizeof (*saext);
3897 	saext->sadb_sa_len = SADB_8TO64(sizeof (*saext));
3898 	saext->sadb_sa_exttype = SADB_EXT_SA;
3899 	saext->sadb_sa_spi = assoc->ipsa_spi;
3900 	saext->sadb_sa_replay = assoc->ipsa_replay_wsize;
3901 	saext->sadb_sa_state = assoc->ipsa_state;
3902 	saext->sadb_sa_auth = assoc->ipsa_auth_alg;
3903 	saext->sadb_sa_encrypt = assoc->ipsa_encr_alg;
3904 	saext->sadb_sa_flags = assoc->ipsa_flags;
3905 
3906 	current = (sadb_lifetime_t *)mp->b_wptr;
3907 	mp->b_wptr += sizeof (sadb_lifetime_t);
3908 	current->sadb_lifetime_len = SADB_8TO64(sizeof (*current));
3909 	current->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
3910 	/* We do not support the concept. */
3911 	current->sadb_lifetime_allocations = 0;
3912 	current->sadb_lifetime_bytes = assoc->ipsa_bytes;
3913 	current->sadb_lifetime_addtime = assoc->ipsa_addtime;
3914 	current->sadb_lifetime_usetime = assoc->ipsa_usetime;
3915 
3916 	expire = (sadb_lifetime_t *)mp->b_wptr;
3917 	mp->b_wptr += sizeof (*expire);
3918 	expire->sadb_lifetime_len = SADB_8TO64(sizeof (*expire));
3919 
3920 	if (assoc->ipsa_state == IPSA_STATE_DEAD) {
3921 		expire->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
3922 		expire->sadb_lifetime_allocations = assoc->ipsa_hardalloc;
3923 		expire->sadb_lifetime_bytes = assoc->ipsa_hardbyteslt;
3924 		expire->sadb_lifetime_addtime = assoc->ipsa_hardaddlt;
3925 		expire->sadb_lifetime_usetime = assoc->ipsa_harduselt;
3926 	} else if (assoc->ipsa_state == IPSA_STATE_DYING) {
3927 		expire->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
3928 		expire->sadb_lifetime_allocations = assoc->ipsa_softalloc;
3929 		expire->sadb_lifetime_bytes = assoc->ipsa_softbyteslt;
3930 		expire->sadb_lifetime_addtime = assoc->ipsa_softaddlt;
3931 		expire->sadb_lifetime_usetime = assoc->ipsa_softuselt;
3932 	} else {
3933 		ASSERT(assoc->ipsa_state == IPSA_STATE_MATURE);
3934 		expire->sadb_lifetime_exttype = SADB_X_EXT_LIFETIME_IDLE;
3935 		expire->sadb_lifetime_allocations = 0;
3936 		expire->sadb_lifetime_bytes = 0;
3937 		expire->sadb_lifetime_addtime = assoc->ipsa_idleaddlt;
3938 		expire->sadb_lifetime_usetime = assoc->ipsa_idleuselt;
3939 	}
3940 
3941 	mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end, SADB_EXT_ADDRESS_SRC,
3942 	    af, assoc->ipsa_srcaddr, tunnel_mode ? 0 : SA_SRCPORT(assoc),
3943 	    SA_PROTO(assoc), 0);
3944 	ASSERT(mp->b_wptr != NULL);
3945 
3946 	mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end, SADB_EXT_ADDRESS_DST,
3947 	    af, assoc->ipsa_dstaddr, tunnel_mode ? 0 : SA_DSTPORT(assoc),
3948 	    SA_PROTO(assoc), 0);
3949 	ASSERT(mp->b_wptr != NULL);
3950 
3951 	if (tunnel_mode) {
3952 		mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end,
3953 		    SADB_X_EXT_ADDRESS_INNER_SRC, assoc->ipsa_innerfam,
3954 		    assoc->ipsa_innersrc, SA_SRCPORT(assoc), SA_IPROTO(assoc),
3955 		    assoc->ipsa_innersrcpfx);
3956 		ASSERT(mp->b_wptr != NULL);
3957 		mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end,
3958 		    SADB_X_EXT_ADDRESS_INNER_DST, assoc->ipsa_innerfam,
3959 		    assoc->ipsa_innerdst, SA_DSTPORT(assoc), SA_IPROTO(assoc),
3960 		    assoc->ipsa_innerdstpfx);
3961 		ASSERT(mp->b_wptr != NULL);
3962 	}
3963 
3964 	/* Can just putnext, we're ready to go! */
3965 	putnext(pfkey_q, mp1);
3966 }
3967 
3968 /*
3969  * "Age" the SA with the number of bytes that was used to protect traffic.
3970  * Send an SADB_EXPIRE message if appropriate.	Return B_TRUE if there was
3971  * enough "charge" left in the SA to protect the data.	Return B_FALSE
3972  * otherwise.  (If B_FALSE is returned, the association either was, or became
3973  * DEAD.)
3974  */
3975 boolean_t
3976 sadb_age_bytes(queue_t *pfkey_q, ipsa_t *assoc, uint64_t bytes,
3977     boolean_t sendmsg)
3978 {
3979 	boolean_t rc = B_TRUE;
3980 	uint64_t newtotal;
3981 
3982 	mutex_enter(&assoc->ipsa_lock);
3983 	newtotal = assoc->ipsa_bytes + bytes;
3984 	if (assoc->ipsa_hardbyteslt != 0 &&
3985 	    newtotal >= assoc->ipsa_hardbyteslt) {
3986 		if (assoc->ipsa_state != IPSA_STATE_DEAD) {
3987 			sadb_delete_cluster(assoc);
3988 			/*
3989 			 * Send EXPIRE message to PF_KEY.  May wish to pawn
3990 			 * this off on another non-interrupt thread.  Also
3991 			 * unlink this SA immediately.
3992 			 */
3993 			assoc->ipsa_state = IPSA_STATE_DEAD;
3994 			if (sendmsg)
3995 				sadb_expire_assoc(pfkey_q, assoc);
3996 			/*
3997 			 * Set non-zero expiration time so sadb_age_assoc()
3998 			 * will work when reaping.
3999 			 */
4000 			assoc->ipsa_hardexpiretime = (time_t)1;
4001 		} /* Else someone beat me to it! */
4002 		rc = B_FALSE;
4003 	} else if (assoc->ipsa_softbyteslt != 0 &&
4004 	    (newtotal >= assoc->ipsa_softbyteslt)) {
4005 		if (assoc->ipsa_state < IPSA_STATE_DYING) {
4006 			/*
4007 			 * Send EXPIRE message to PF_KEY.  May wish to pawn
4008 			 * this off on another non-interrupt thread.
4009 			 */
4010 			assoc->ipsa_state = IPSA_STATE_DYING;
4011 			assoc->ipsa_bytes = newtotal;
4012 			if (sendmsg)
4013 				sadb_expire_assoc(pfkey_q, assoc);
4014 		} /* Else someone beat me to it! */
4015 	}
4016 	if (rc == B_TRUE)
4017 		assoc->ipsa_bytes = newtotal;
4018 	mutex_exit(&assoc->ipsa_lock);
4019 	return (rc);
4020 }
4021 
4022 /*
4023  * Push one or more DL_CO_DELETE messages queued up by
4024  * sadb_torch_assoc down to the underlying driver now that it's a
4025  * convenient time for it (i.e., ipsa bucket locks not held).
4026  */
4027 static void
4028 sadb_drain_torchq(queue_t *q, mblk_t *mp)
4029 {
4030 	while (mp != NULL) {
4031 		mblk_t *next = mp->b_next;
4032 		mp->b_next = NULL;
4033 		if (q != NULL)
4034 			putnext(q, mp);
4035 		else
4036 			freemsg(mp);
4037 		mp = next;
4038 	}
4039 }
4040 
4041 /*
4042  * "Torch" an individual SA.  Returns NULL, so it can be tail-called from
4043  *     sadb_age_assoc().
4044  *
4045  * If SA is hardware-accelerated, and we can't allocate the mblk
4046  * containing the DL_CO_DELETE, just return; it will remain in the
4047  * table and be swept up by sadb_ager() in a subsequent pass.
4048  */
4049 static ipsa_t *
4050 sadb_torch_assoc(isaf_t *head, ipsa_t *sa, boolean_t inbnd, mblk_t **mq)
4051 {
4052 	mblk_t *mp;
4053 
4054 	ASSERT(MUTEX_HELD(&head->isaf_lock));
4055 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
4056 	ASSERT(sa->ipsa_state == IPSA_STATE_DEAD);
4057 
4058 	/*
4059 	 * Force cached SAs to be revalidated..
4060 	 */
4061 	head->isaf_gen++;
4062 
4063 	if (sa->ipsa_flags & IPSA_F_HW) {
4064 		mp = sadb_fmt_sa_req(DL_CO_DELETE, sa->ipsa_type, sa, inbnd);
4065 		if (mp == NULL) {
4066 			mutex_exit(&sa->ipsa_lock);
4067 			return (NULL);
4068 		}
4069 		mp->b_next = *mq;
4070 		*mq = mp;
4071 	}
4072 	mutex_exit(&sa->ipsa_lock);
4073 	sadb_unlinkassoc(sa);
4074 
4075 	return (NULL);
4076 }
4077 
4078 /*
4079  * Do various SA-is-idle activities depending on delta (the number of idle
4080  * seconds on the SA) and/or other properties of the SA.
4081  *
4082  * Return B_TRUE if I've sent a packet, because I have to drop the
4083  * association's mutex before sending a packet out the wire.
4084  */
4085 /* ARGSUSED */
4086 static boolean_t
4087 sadb_idle_activities(ipsa_t *assoc, time_t delta, boolean_t inbound)
4088 {
4089 	ipsecesp_stack_t *espstack = assoc->ipsa_netstack->netstack_ipsecesp;
4090 	int nat_t_interval = espstack->ipsecesp_nat_keepalive_interval;
4091 
4092 	ASSERT(MUTEX_HELD(&assoc->ipsa_lock));
4093 
4094 	if (!inbound && (assoc->ipsa_flags & IPSA_F_NATT_LOC) &&
4095 	    delta >= nat_t_interval &&
4096 	    gethrestime_sec() - assoc->ipsa_last_nat_t_ka >= nat_t_interval) {
4097 		ASSERT(assoc->ipsa_type == SADB_SATYPE_ESP);
4098 		assoc->ipsa_last_nat_t_ka = gethrestime_sec();
4099 		mutex_exit(&assoc->ipsa_lock);
4100 		ipsecesp_send_keepalive(assoc);
4101 		return (B_TRUE);
4102 	}
4103 	return (B_FALSE);
4104 }
4105 
4106 /*
4107  * Return "assoc" if haspeer is true and I send an expire.  This allows
4108  * the consumers' aging functions to tidy up an expired SA's peer.
4109  */
4110 static ipsa_t *
4111 sadb_age_assoc(isaf_t *head, queue_t *pfkey_q, ipsa_t *assoc,
4112     time_t current, int reap_delay, boolean_t inbound, mblk_t **mq)
4113 {
4114 	ipsa_t *retval = NULL;
4115 	boolean_t dropped_mutex = B_FALSE;
4116 
4117 	ASSERT(MUTEX_HELD(&head->isaf_lock));
4118 
4119 	mutex_enter(&assoc->ipsa_lock);
4120 
4121 	if (((assoc->ipsa_state == IPSA_STATE_LARVAL) ||
4122 	    ((assoc->ipsa_state == IPSA_STATE_IDLE) ||
4123 	    (assoc->ipsa_state == IPSA_STATE_ACTIVE_ELSEWHERE) &&
4124 	    (assoc->ipsa_hardexpiretime != 0))) &&
4125 	    (assoc->ipsa_hardexpiretime <= current)) {
4126 		assoc->ipsa_state = IPSA_STATE_DEAD;
4127 		return (sadb_torch_assoc(head, assoc, inbound, mq));
4128 	}
4129 
4130 	/*
4131 	 * Check lifetimes.  Fortunately, SA setup is done
4132 	 * such that there are only two times to look at,
4133 	 * softexpiretime, and hardexpiretime.
4134 	 *
4135 	 * Check hard first.
4136 	 */
4137 
4138 	if (assoc->ipsa_hardexpiretime != 0 &&
4139 	    assoc->ipsa_hardexpiretime <= current) {
4140 		if (assoc->ipsa_state == IPSA_STATE_DEAD)
4141 			return (sadb_torch_assoc(head, assoc, inbound, mq));
4142 
4143 		if (inbound) {
4144 			sadb_delete_cluster(assoc);
4145 		}
4146 
4147 		/*
4148 		 * Send SADB_EXPIRE with hard lifetime, delay for unlinking.
4149 		 */
4150 		assoc->ipsa_state = IPSA_STATE_DEAD;
4151 		if (assoc->ipsa_haspeer || assoc->ipsa_otherspi != 0) {
4152 			/*
4153 			 * If the SA is paired or peered with another, put
4154 			 * a copy on a list which can be processed later, the
4155 			 * pair/peer SA needs to be updated so the both die
4156 			 * at the same time.
4157 			 *
4158 			 * If I return assoc, I have to bump up its reference
4159 			 * count to keep with the ipsa_t reference count
4160 			 * semantics.
4161 			 */
4162 			IPSA_REFHOLD(assoc);
4163 			retval = assoc;
4164 		}
4165 		sadb_expire_assoc(pfkey_q, assoc);
4166 		assoc->ipsa_hardexpiretime = current + reap_delay;
4167 	} else if (assoc->ipsa_softexpiretime != 0 &&
4168 	    assoc->ipsa_softexpiretime <= current &&
4169 	    assoc->ipsa_state < IPSA_STATE_DYING) {
4170 		/*
4171 		 * Send EXPIRE message to PF_KEY.  May wish to pawn
4172 		 * this off on another non-interrupt thread.
4173 		 */
4174 		assoc->ipsa_state = IPSA_STATE_DYING;
4175 		if (assoc->ipsa_haspeer) {
4176 			/*
4177 			 * If the SA has a peer, update the peer's state
4178 			 * on SOFT_EXPIRE, this is mostly to prevent two
4179 			 * expire messages from effectively the same SA.
4180 			 *
4181 			 * Don't care about paired SA's, then can (and should)
4182 			 * be able to soft expire at different times.
4183 			 *
4184 			 * If I return assoc, I have to bump up its
4185 			 * reference count to keep with the ipsa_t reference
4186 			 * count semantics.
4187 			 */
4188 			IPSA_REFHOLD(assoc);
4189 			retval = assoc;
4190 		}
4191 		sadb_expire_assoc(pfkey_q, assoc);
4192 	} else if (assoc->ipsa_idletime != 0 &&
4193 	    assoc->ipsa_idleexpiretime <= current) {
4194 		if (assoc->ipsa_state == IPSA_STATE_ACTIVE_ELSEWHERE) {
4195 			assoc->ipsa_state = IPSA_STATE_IDLE;
4196 		}
4197 
4198 		/*
4199 		 * Need to handle Mature case
4200 		 */
4201 		if (assoc->ipsa_state == IPSA_STATE_MATURE && !inbound) {
4202 			sadb_expire_assoc(pfkey_q, assoc);
4203 		}
4204 	} else {
4205 		/* Check idle time activities. */
4206 		dropped_mutex = sadb_idle_activities(assoc,
4207 		    current - assoc->ipsa_lastuse, inbound);
4208 	}
4209 
4210 	if (!dropped_mutex)
4211 		mutex_exit(&assoc->ipsa_lock);
4212 	return (retval);
4213 }
4214 
4215 /*
4216  * Called by a consumer protocol to do ther dirty work of reaping dead
4217  * Security Associations.
4218  *
4219  * NOTE: sadb_age_assoc() marks expired SA's as DEAD but only removed
4220  * SA's that are already marked DEAD, so expired SA's are only reaped
4221  * the second time sadb_ager() runs.
4222  */
4223 void
4224 sadb_ager(sadb_t *sp, queue_t *pfkey_q, queue_t *ip_q, int reap_delay,
4225     netstack_t *ns)
4226 {
4227 	int i;
4228 	isaf_t *bucket;
4229 	ipsa_t *assoc, *spare;
4230 	iacqf_t *acqlist;
4231 	ipsacq_t *acqrec, *spareacq;
4232 	templist_t *haspeerlist, *newbie;
4233 	/* Snapshot current time now. */
4234 	time_t current = gethrestime_sec();
4235 	mblk_t *mq = NULL;
4236 	haspeerlist = NULL;
4237 
4238 	/*
4239 	 * Do my dirty work.  This includes aging real entries, aging
4240 	 * larvals, and aging outstanding ACQUIREs.
4241 	 *
4242 	 * I hope I don't tie up resources for too long.
4243 	 */
4244 
4245 	/* Age acquires. */
4246 
4247 	for (i = 0; i < sp->sdb_hashsize; i++) {
4248 		acqlist = &sp->sdb_acq[i];
4249 		mutex_enter(&acqlist->iacqf_lock);
4250 		for (acqrec = acqlist->iacqf_ipsacq; acqrec != NULL;
4251 		    acqrec = spareacq) {
4252 			spareacq = acqrec->ipsacq_next;
4253 			if (current > acqrec->ipsacq_expire)
4254 				sadb_destroy_acquire(acqrec, ns);
4255 		}
4256 		mutex_exit(&acqlist->iacqf_lock);
4257 	}
4258 
4259 	/* Age inbound associations. */
4260 	for (i = 0; i < sp->sdb_hashsize; i++) {
4261 		bucket = &(sp->sdb_if[i]);
4262 		mutex_enter(&bucket->isaf_lock);
4263 		for (assoc = bucket->isaf_ipsa; assoc != NULL;
4264 		    assoc = spare) {
4265 			spare = assoc->ipsa_next;
4266 			if (sadb_age_assoc(bucket, pfkey_q, assoc, current,
4267 			    reap_delay, B_TRUE, &mq) != NULL) {
4268 				/*
4269 				 * Put SA's which have a peer or SA's which
4270 				 * are paired on a list for processing after
4271 				 * all the hash tables have been walked.
4272 				 *
4273 				 * sadb_age_assoc() increments the refcnt,
4274 				 * effectively doing an IPSA_REFHOLD().
4275 				 */
4276 				newbie = kmem_alloc(sizeof (*newbie),
4277 				    KM_NOSLEEP);
4278 				if (newbie == NULL) {
4279 					/*
4280 					 * Don't forget to REFRELE().
4281 					 */
4282 					IPSA_REFRELE(assoc);
4283 					continue;	/* for loop... */
4284 				}
4285 				newbie->next = haspeerlist;
4286 				newbie->ipsa = assoc;
4287 				haspeerlist = newbie;
4288 			}
4289 		}
4290 		mutex_exit(&bucket->isaf_lock);
4291 	}
4292 
4293 	if (mq != NULL) {
4294 		sadb_drain_torchq(ip_q, mq);
4295 		mq = NULL;
4296 	}
4297 	age_pair_peer_list(haspeerlist, sp, B_FALSE);
4298 	haspeerlist = NULL;
4299 
4300 	/* Age outbound associations. */
4301 	for (i = 0; i < sp->sdb_hashsize; i++) {
4302 		bucket = &(sp->sdb_of[i]);
4303 		mutex_enter(&bucket->isaf_lock);
4304 		for (assoc = bucket->isaf_ipsa; assoc != NULL;
4305 		    assoc = spare) {
4306 			spare = assoc->ipsa_next;
4307 			if (sadb_age_assoc(bucket, pfkey_q, assoc, current,
4308 			    reap_delay, B_FALSE, &mq) != NULL) {
4309 				/*
4310 				 * sadb_age_assoc() increments the refcnt,
4311 				 * effectively doing an IPSA_REFHOLD().
4312 				 */
4313 				newbie = kmem_alloc(sizeof (*newbie),
4314 				    KM_NOSLEEP);
4315 				if (newbie == NULL) {
4316 					/*
4317 					 * Don't forget to REFRELE().
4318 					 */
4319 					IPSA_REFRELE(assoc);
4320 					continue;	/* for loop... */
4321 				}
4322 				newbie->next = haspeerlist;
4323 				newbie->ipsa = assoc;
4324 				haspeerlist = newbie;
4325 			}
4326 		}
4327 		mutex_exit(&bucket->isaf_lock);
4328 	}
4329 	if (mq != NULL) {
4330 		sadb_drain_torchq(ip_q, mq);
4331 		mq = NULL;
4332 	}
4333 
4334 	age_pair_peer_list(haspeerlist, sp, B_TRUE);
4335 
4336 	/*
4337 	 * Run a GC pass to clean out dead identities.
4338 	 */
4339 	ipsid_gc(ns);
4340 }
4341 
4342 /*
4343  * Figure out when to reschedule the ager.
4344  */
4345 timeout_id_t
4346 sadb_retimeout(hrtime_t begin, queue_t *pfkey_q, void (*ager)(void *),
4347     void *agerarg, uint_t *intp, uint_t intmax, short mid)
4348 {
4349 	hrtime_t end = gethrtime();
4350 	uint_t interval = *intp;
4351 
4352 	/*
4353 	 * See how long this took.  If it took too long, increase the
4354 	 * aging interval.
4355 	 */
4356 	if ((end - begin) > interval * 1000000) {
4357 		if (interval >= intmax) {
4358 			/* XXX Rate limit this?  Or recommend flush? */
4359 			(void) strlog(mid, 0, 0, SL_ERROR | SL_WARN,
4360 			    "Too many SA's to age out in %d msec.\n",
4361 			    intmax);
4362 		} else {
4363 			/* Double by shifting by one bit. */
4364 			interval <<= 1;
4365 			interval = min(interval, intmax);
4366 		}
4367 	} else if ((end - begin) <= interval * 500000 &&
4368 	    interval > SADB_AGE_INTERVAL_DEFAULT) {
4369 		/*
4370 		 * If I took less than half of the interval, then I should
4371 		 * ratchet the interval back down.  Never automatically
4372 		 * shift below the default aging interval.
4373 		 *
4374 		 * NOTE:This even overrides manual setting of the age
4375 		 *	interval using NDD.
4376 		 */
4377 		/* Halve by shifting one bit. */
4378 		interval >>= 1;
4379 		interval = max(interval, SADB_AGE_INTERVAL_DEFAULT);
4380 	}
4381 	*intp = interval;
4382 	return (qtimeout(pfkey_q, ager, agerarg,
4383 	    interval * drv_usectohz(1000)));
4384 }
4385 
4386 
4387 /*
4388  * Update the lifetime values of an SA.	 This is the path an SADB_UPDATE
4389  * message takes when updating a MATURE or DYING SA.
4390  */
4391 static void
4392 sadb_update_lifetimes(ipsa_t *assoc, sadb_lifetime_t *hard,
4393     sadb_lifetime_t *soft, sadb_lifetime_t *idle, boolean_t outbound)
4394 {
4395 	mutex_enter(&assoc->ipsa_lock);
4396 
4397 	/*
4398 	 * XXX RFC 2367 mentions how an SADB_EXT_LIFETIME_CURRENT can be
4399 	 * passed in during an update message.	We currently don't handle
4400 	 * these.
4401 	 */
4402 
4403 	if (hard != NULL) {
4404 		if (hard->sadb_lifetime_bytes != 0)
4405 			assoc->ipsa_hardbyteslt = hard->sadb_lifetime_bytes;
4406 		if (hard->sadb_lifetime_usetime != 0)
4407 			assoc->ipsa_harduselt = hard->sadb_lifetime_usetime;
4408 		if (hard->sadb_lifetime_addtime != 0)
4409 			assoc->ipsa_hardaddlt = hard->sadb_lifetime_addtime;
4410 		if (assoc->ipsa_hardaddlt != 0) {
4411 			assoc->ipsa_hardexpiretime =
4412 			    assoc->ipsa_addtime + assoc->ipsa_hardaddlt;
4413 		}
4414 		if (assoc->ipsa_harduselt != 0 &&
4415 		    assoc->ipsa_flags & IPSA_F_USED) {
4416 			UPDATE_EXPIRE(assoc, harduselt, hardexpiretime);
4417 		}
4418 		if (hard->sadb_lifetime_allocations != 0)
4419 			assoc->ipsa_hardalloc = hard->sadb_lifetime_allocations;
4420 	}
4421 
4422 	if (soft != NULL) {
4423 		if (soft->sadb_lifetime_bytes != 0) {
4424 			if (soft->sadb_lifetime_bytes >
4425 			    assoc->ipsa_hardbyteslt) {
4426 				assoc->ipsa_softbyteslt =
4427 				    assoc->ipsa_hardbyteslt;
4428 			} else {
4429 				assoc->ipsa_softbyteslt =
4430 				    soft->sadb_lifetime_bytes;
4431 			}
4432 		}
4433 		if (soft->sadb_lifetime_usetime != 0) {
4434 			if (soft->sadb_lifetime_usetime >
4435 			    assoc->ipsa_harduselt) {
4436 				assoc->ipsa_softuselt =
4437 				    assoc->ipsa_harduselt;
4438 			} else {
4439 				assoc->ipsa_softuselt =
4440 				    soft->sadb_lifetime_usetime;
4441 			}
4442 		}
4443 		if (soft->sadb_lifetime_addtime != 0) {
4444 			if (soft->sadb_lifetime_addtime >
4445 			    assoc->ipsa_hardexpiretime) {
4446 				assoc->ipsa_softexpiretime =
4447 				    assoc->ipsa_hardexpiretime;
4448 			} else {
4449 				assoc->ipsa_softaddlt =
4450 				    soft->sadb_lifetime_addtime;
4451 			}
4452 		}
4453 		if (assoc->ipsa_softaddlt != 0) {
4454 			assoc->ipsa_softexpiretime =
4455 			    assoc->ipsa_addtime + assoc->ipsa_softaddlt;
4456 		}
4457 		if (assoc->ipsa_softuselt != 0 &&
4458 		    assoc->ipsa_flags & IPSA_F_USED) {
4459 			UPDATE_EXPIRE(assoc, softuselt, softexpiretime);
4460 		}
4461 		if (outbound && assoc->ipsa_softexpiretime != 0) {
4462 			if (assoc->ipsa_state == IPSA_STATE_MATURE)
4463 				lifetime_fuzz(assoc);
4464 		}
4465 
4466 		if (soft->sadb_lifetime_allocations != 0)
4467 			assoc->ipsa_softalloc = soft->sadb_lifetime_allocations;
4468 	}
4469 
4470 	if (idle != NULL) {
4471 		time_t current = gethrestime_sec();
4472 		if ((assoc->ipsa_idleexpiretime <= current) &&
4473 		    (assoc->ipsa_idleaddlt == idle->sadb_lifetime_addtime)) {
4474 			assoc->ipsa_idleexpiretime =
4475 			    current + assoc->ipsa_idleaddlt;
4476 		}
4477 		if (idle->sadb_lifetime_addtime != 0)
4478 			assoc->ipsa_idleaddlt = idle->sadb_lifetime_addtime;
4479 		if (idle->sadb_lifetime_usetime != 0)
4480 			assoc->ipsa_idleuselt = idle->sadb_lifetime_usetime;
4481 		if (assoc->ipsa_idleaddlt != 0) {
4482 			assoc->ipsa_idleexpiretime =
4483 			    current + idle->sadb_lifetime_addtime;
4484 			assoc->ipsa_idletime = idle->sadb_lifetime_addtime;
4485 		}
4486 		if (assoc->ipsa_idleuselt != 0) {
4487 			if (assoc->ipsa_idletime != 0) {
4488 				assoc->ipsa_idletime = min(assoc->ipsa_idletime,
4489 				    assoc->ipsa_idleuselt);
4490 			assoc->ipsa_idleexpiretime =
4491 			    current + assoc->ipsa_idletime;
4492 			} else {
4493 				assoc->ipsa_idleexpiretime =
4494 				    current + assoc->ipsa_idleuselt;
4495 				assoc->ipsa_idletime = assoc->ipsa_idleuselt;
4496 			}
4497 		}
4498 	}
4499 	mutex_exit(&assoc->ipsa_lock);
4500 }
4501 
4502 static int
4503 sadb_update_state(ipsa_t *assoc, uint_t new_state, mblk_t **ipkt_lst)
4504 {
4505 	int rcode = 0;
4506 	time_t current = gethrestime_sec();
4507 
4508 	mutex_enter(&assoc->ipsa_lock);
4509 
4510 	switch (new_state) {
4511 	case SADB_X_SASTATE_ACTIVE_ELSEWHERE:
4512 		if (assoc->ipsa_state == SADB_X_SASTATE_IDLE) {
4513 			assoc->ipsa_state = IPSA_STATE_ACTIVE_ELSEWHERE;
4514 			assoc->ipsa_idleexpiretime =
4515 			    current + assoc->ipsa_idletime;
4516 		}
4517 		break;
4518 	case SADB_X_SASTATE_IDLE:
4519 		if (assoc->ipsa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE) {
4520 			assoc->ipsa_state = IPSA_STATE_IDLE;
4521 			assoc->ipsa_idleexpiretime =
4522 			    current + assoc->ipsa_idletime;
4523 		} else {
4524 			rcode = EINVAL;
4525 		}
4526 		break;
4527 
4528 	case SADB_X_SASTATE_ACTIVE:
4529 		if (assoc->ipsa_state != SADB_X_SASTATE_IDLE) {
4530 			rcode = EINVAL;
4531 			break;
4532 		}
4533 		assoc->ipsa_state = IPSA_STATE_MATURE;
4534 		assoc->ipsa_idleexpiretime = current + assoc->ipsa_idletime;
4535 
4536 		if (ipkt_lst == NULL) {
4537 			break;
4538 		}
4539 
4540 		if (assoc->ipsa_bpkt_head != NULL) {
4541 			*ipkt_lst = assoc->ipsa_bpkt_head;
4542 			assoc->ipsa_bpkt_head = assoc->ipsa_bpkt_tail = NULL;
4543 			assoc->ipsa_mblkcnt = 0;
4544 		} else {
4545 			*ipkt_lst = NULL;
4546 		}
4547 		break;
4548 	default:
4549 		rcode = EINVAL;
4550 		break;
4551 	}
4552 
4553 	mutex_exit(&assoc->ipsa_lock);
4554 	return (rcode);
4555 }
4556 
4557 /*
4558  * Common code to update an SA.
4559  */
4560 
4561 int
4562 sadb_update_sa(mblk_t *mp, keysock_in_t *ksi, mblk_t **ipkt_lst,
4563     sadbp_t *spp, int *diagnostic, queue_t *pfkey_q,
4564     int (*add_sa_func)(mblk_t *, keysock_in_t *, int *, netstack_t *),
4565     netstack_t *ns, uint8_t sadb_msg_type)
4566 {
4567 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
4568 	sadb_address_t *srcext =
4569 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
4570 	sadb_address_t *dstext =
4571 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
4572 	sadb_x_kmc_t *kmcext =
4573 	    (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
4574 	sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
4575 	sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
4576 	sadb_x_replay_ctr_t *replext =
4577 	    (sadb_x_replay_ctr_t *)ksi->ks_in_extv[SADB_X_EXT_REPLAY_VALUE];
4578 	sadb_lifetime_t *soft =
4579 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
4580 	sadb_lifetime_t *hard =
4581 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
4582 	sadb_lifetime_t *idle =
4583 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE];
4584 	sadb_x_pair_t *pair_ext =
4585 	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
4586 	ipsa_t *echo_target = NULL;
4587 	int error = 0;
4588 	ipsap_t *ipsapp = NULL;
4589 	uint32_t kmp = 0, kmc = 0;
4590 	time_t current = gethrestime_sec();
4591 
4592 
4593 	/* I need certain extensions present for either UPDATE message. */
4594 	if (srcext == NULL) {
4595 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
4596 		return (EINVAL);
4597 	}
4598 	if (dstext == NULL) {
4599 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
4600 		return (EINVAL);
4601 	}
4602 	if (assoc == NULL) {
4603 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
4604 		return (EINVAL);
4605 	}
4606 
4607 	if (kmcext != NULL) {
4608 		kmp = kmcext->sadb_x_kmc_proto;
4609 		kmc = kmcext->sadb_x_kmc_cookie;
4610 	}
4611 
4612 	ipsapp = get_ipsa_pair(assoc, srcext, dstext, spp);
4613 	if (ipsapp == NULL) {
4614 		*diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND;
4615 		return (ESRCH);
4616 	}
4617 
4618 	if (ipsapp->ipsap_psa_ptr == NULL && ipsapp->ipsap_sa_ptr != NULL) {
4619 		if (ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) {
4620 			/*
4621 			 * REFRELE the target and let the add_sa_func()
4622 			 * deal with updating a larval SA.
4623 			 */
4624 			destroy_ipsa_pair(ipsapp);
4625 			return (add_sa_func(mp, ksi, diagnostic, ns));
4626 		}
4627 	}
4628 
4629 	if (assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE) {
4630 		if (ipsapp->ipsap_sa_ptr != NULL &&
4631 		    ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_IDLE) {
4632 			if ((error = sadb_update_state(ipsapp->ipsap_sa_ptr,
4633 			    assoc->sadb_sa_state, NULL)) != 0) {
4634 				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4635 				goto bail;
4636 			}
4637 		}
4638 		if (ipsapp->ipsap_psa_ptr != NULL &&
4639 		    ipsapp->ipsap_psa_ptr->ipsa_state == IPSA_STATE_IDLE) {
4640 			if ((error = sadb_update_state(ipsapp->ipsap_psa_ptr,
4641 			    assoc->sadb_sa_state, NULL)) != 0) {
4642 				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4643 				goto bail;
4644 			}
4645 		}
4646 	}
4647 	if (assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE) {
4648 		if (ipsapp->ipsap_sa_ptr != NULL) {
4649 			error = sadb_update_state(ipsapp->ipsap_sa_ptr,
4650 			    assoc->sadb_sa_state,
4651 			    (ipsapp->ipsap_sa_ptr->ipsa_flags &
4652 			    IPSA_F_INBOUND) ? ipkt_lst : NULL);
4653 			if (error) {
4654 				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4655 				goto bail;
4656 			}
4657 		}
4658 		if (ipsapp->ipsap_psa_ptr != NULL) {
4659 			error = sadb_update_state(ipsapp->ipsap_psa_ptr,
4660 			    assoc->sadb_sa_state,
4661 			    (ipsapp->ipsap_psa_ptr->ipsa_flags &
4662 			    IPSA_F_INBOUND) ? ipkt_lst : NULL);
4663 			if (error) {
4664 				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4665 				goto bail;
4666 			}
4667 		}
4668 		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
4669 		    ksi, echo_target);
4670 		goto bail;
4671 	}
4672 
4673 	/*
4674 	 * Reality checks for updates of active associations.
4675 	 * Sundry first-pass UPDATE-specific reality checks.
4676 	 * Have to do the checks here, because it's after the add_sa code.
4677 	 * XXX STATS : logging/stats here?
4678 	 */
4679 
4680 	if (!((assoc->sadb_sa_state == SADB_SASTATE_MATURE) ||
4681 	    (assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE))) {
4682 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4683 		error = EINVAL;
4684 		goto bail;
4685 	}
4686 
4687 	if (assoc->sadb_sa_flags & ~spp->s_updateflags) {
4688 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
4689 		error = EINVAL;
4690 		goto bail;
4691 	}
4692 
4693 	if (ksi->ks_in_extv[SADB_EXT_LIFETIME_CURRENT] != NULL) {
4694 		error = EOPNOTSUPP;
4695 		goto bail;
4696 	}
4697 
4698 	if ((*diagnostic = sadb_hardsoftchk(hard, soft, idle)) != 0) {
4699 		error = EINVAL;
4700 		goto bail;
4701 	}
4702 	if (akey != NULL) {
4703 		*diagnostic = SADB_X_DIAGNOSTIC_AKEY_PRESENT;
4704 		error = EINVAL;
4705 		goto bail;
4706 	}
4707 	if (ekey != NULL) {
4708 		*diagnostic = SADB_X_DIAGNOSTIC_EKEY_PRESENT;
4709 		error = EINVAL;
4710 		goto bail;
4711 	}
4712 
4713 	if (ipsapp->ipsap_sa_ptr != NULL) {
4714 		if (ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_DEAD) {
4715 			error = ESRCH;	/* DEAD == Not there, in this case. */
4716 			*diagnostic = SADB_X_DIAGNOSTIC_SA_EXPIRED;
4717 			goto bail;
4718 		}
4719 		if ((kmp != 0) &&
4720 		    ((ipsapp->ipsap_sa_ptr->ipsa_kmp != 0) ||
4721 		    (ipsapp->ipsap_sa_ptr->ipsa_kmp != kmp))) {
4722 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMP;
4723 			error = EINVAL;
4724 			goto bail;
4725 		}
4726 		if ((kmc != 0) &&
4727 		    ((ipsapp->ipsap_sa_ptr->ipsa_kmc != 0) ||
4728 		    (ipsapp->ipsap_sa_ptr->ipsa_kmc != kmc))) {
4729 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMC;
4730 			error = EINVAL;
4731 			goto bail;
4732 		}
4733 		/*
4734 		 * Do not allow replay value change for MATURE or LARVAL SA.
4735 		 */
4736 
4737 		if ((replext != NULL) &&
4738 		    ((ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) ||
4739 		    (ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_MATURE))) {
4740 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4741 			error = EINVAL;
4742 			goto bail;
4743 		}
4744 	}
4745 
4746 	if (ipsapp->ipsap_psa_ptr != NULL) {
4747 		if (ipsapp->ipsap_psa_ptr->ipsa_state == IPSA_STATE_DEAD) {
4748 			*diagnostic = SADB_X_DIAGNOSTIC_SA_EXPIRED;
4749 			error = ESRCH;	/* DEAD == Not there, in this case. */
4750 			goto bail;
4751 		}
4752 		if ((kmp != 0) &&
4753 		    ((ipsapp->ipsap_psa_ptr->ipsa_kmp != 0) ||
4754 		    (ipsapp->ipsap_psa_ptr->ipsa_kmp != kmp))) {
4755 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMP;
4756 			error = EINVAL;
4757 			goto bail;
4758 		}
4759 		if ((kmc != 0) &&
4760 		    ((ipsapp->ipsap_psa_ptr->ipsa_kmc != 0) ||
4761 		    (ipsapp->ipsap_psa_ptr->ipsa_kmc != kmc))) {
4762 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMC;
4763 			error = EINVAL;
4764 			goto bail;
4765 		}
4766 	}
4767 
4768 	if (ipsapp->ipsap_sa_ptr != NULL) {
4769 		sadb_update_lifetimes(ipsapp->ipsap_sa_ptr, hard, soft,
4770 		    idle, B_TRUE);
4771 		if (kmp != 0)
4772 			ipsapp->ipsap_sa_ptr->ipsa_kmp = kmp;
4773 		if (kmc != 0)
4774 			ipsapp->ipsap_sa_ptr->ipsa_kmc = kmc;
4775 		if ((replext != NULL) &&
4776 		    (ipsapp->ipsap_sa_ptr->ipsa_replay_wsize != 0)) {
4777 			/*
4778 			 * If an inbound SA, update the replay counter
4779 			 * and check off all the other sequence number
4780 			 */
4781 			if (ksi->ks_in_dsttype == KS_IN_ADDR_ME) {
4782 				if (!sadb_replay_check(ipsapp->ipsap_sa_ptr,
4783 				    replext->sadb_x_rc_replay32)) {
4784 					error = EINVAL;
4785 					goto bail;
4786 				}
4787 				mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4788 				ipsapp->ipsap_sa_ptr->ipsa_idleexpiretime =
4789 				    current +
4790 				    ipsapp->ipsap_sa_ptr->ipsa_idletime;
4791 				mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4792 			} else {
4793 				mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4794 				ipsapp->ipsap_sa_ptr->ipsa_replay =
4795 				    replext->sadb_x_rc_replay32;
4796 				ipsapp->ipsap_sa_ptr->ipsa_idleexpiretime =
4797 				    current +
4798 				    ipsapp->ipsap_sa_ptr->ipsa_idletime;
4799 				mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4800 			}
4801 		}
4802 	}
4803 
4804 	if (sadb_msg_type == SADB_X_UPDATEPAIR) {
4805 		if (ipsapp->ipsap_psa_ptr != NULL) {
4806 			sadb_update_lifetimes(ipsapp->ipsap_psa_ptr, hard, soft,
4807 			    idle, B_FALSE);
4808 			if (kmp != 0)
4809 				ipsapp->ipsap_psa_ptr->ipsa_kmp = kmp;
4810 			if (kmc != 0)
4811 				ipsapp->ipsap_psa_ptr->ipsa_kmc = kmc;
4812 		} else {
4813 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
4814 			error = ESRCH;
4815 			goto bail;
4816 		}
4817 	}
4818 
4819 	if (pair_ext != NULL)
4820 		error = update_pairing(ipsapp, ksi, diagnostic, spp);
4821 
4822 	if (error == 0)
4823 		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
4824 		    ksi, echo_target);
4825 bail:
4826 
4827 	destroy_ipsa_pair(ipsapp);
4828 
4829 	return (error);
4830 }
4831 
4832 
4833 int
4834 update_pairing(ipsap_t *ipsapp, keysock_in_t *ksi, int *diagnostic,
4835     sadbp_t *spp)
4836 {
4837 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
4838 	sadb_address_t *srcext =
4839 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
4840 	sadb_address_t *dstext =
4841 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
4842 	sadb_x_pair_t *pair_ext =
4843 	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
4844 	int error = 0;
4845 	ipsap_t *oipsapp = NULL;
4846 	boolean_t undo_pair = B_FALSE;
4847 	uint32_t ipsa_flags;
4848 
4849 	if (pair_ext->sadb_x_pair_spi == 0 || pair_ext->sadb_x_pair_spi ==
4850 	    assoc->sadb_sa_spi) {
4851 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4852 		return (EINVAL);
4853 	}
4854 
4855 	/*
4856 	 * Assume for now that the spi value provided in the SADB_UPDATE
4857 	 * message was valid, update the SA with its pair spi value.
4858 	 * If the spi turns out to be bogus or the SA no longer exists
4859 	 * then this will be detected when the reverse update is made
4860 	 * below.
4861 	 */
4862 	mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4863 	ipsapp->ipsap_sa_ptr->ipsa_flags |= IPSA_F_PAIRED;
4864 	ipsapp->ipsap_sa_ptr->ipsa_otherspi = pair_ext->sadb_x_pair_spi;
4865 	mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4866 
4867 	/*
4868 	 * After updating the ipsa_otherspi element of the SA, get_ipsa_pair()
4869 	 * should now return pointers to the SA *AND* its pair, if this is not
4870 	 * the case, the "otherspi" either did not exist or was deleted. Also
4871 	 * check that "otherspi" is not already paired. If everything looks
4872 	 * good, complete the update. IPSA_REFRELE the first pair_pointer
4873 	 * after this update to ensure its not deleted until we are done.
4874 	 */
4875 	oipsapp = get_ipsa_pair(assoc, srcext, dstext, spp);
4876 	if (oipsapp == NULL) {
4877 		/*
4878 		 * This should never happen, calling function still has
4879 		 * IPSA_REFHELD on the SA we just updated.
4880 		 */
4881 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
4882 		return (EINVAL);
4883 	}
4884 
4885 	if (oipsapp->ipsap_psa_ptr == NULL) {
4886 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4887 		undo_pair = B_TRUE;
4888 	} else {
4889 		ipsa_flags = oipsapp->ipsap_psa_ptr->ipsa_flags;
4890 		if ((oipsapp->ipsap_psa_ptr->ipsa_state == IPSA_STATE_DEAD) ||
4891 		    (oipsapp->ipsap_psa_ptr->ipsa_state == IPSA_STATE_DYING)) {
4892 			/* Its dead Jim! */
4893 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4894 			undo_pair = B_TRUE;
4895 		} else if ((ipsa_flags & (IPSA_F_OUTBOUND | IPSA_F_INBOUND)) ==
4896 		    (IPSA_F_OUTBOUND | IPSA_F_INBOUND)) {
4897 			/* This SA is in both hashtables. */
4898 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4899 			undo_pair = B_TRUE;
4900 		} else if (ipsa_flags & IPSA_F_PAIRED) {
4901 			/* This SA is already paired with another. */
4902 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_ALREADY;
4903 			undo_pair = B_TRUE;
4904 		}
4905 	}
4906 
4907 	if (undo_pair) {
4908 		/* The pair SA does not exist. */
4909 		mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4910 		ipsapp->ipsap_sa_ptr->ipsa_flags &= ~IPSA_F_PAIRED;
4911 		ipsapp->ipsap_sa_ptr->ipsa_otherspi = 0;
4912 		mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4913 		error = EINVAL;
4914 	} else {
4915 		mutex_enter(&oipsapp->ipsap_psa_ptr->ipsa_lock);
4916 		oipsapp->ipsap_psa_ptr->ipsa_otherspi = assoc->sadb_sa_spi;
4917 		oipsapp->ipsap_psa_ptr->ipsa_flags |= IPSA_F_PAIRED;
4918 		mutex_exit(&oipsapp->ipsap_psa_ptr->ipsa_lock);
4919 	}
4920 
4921 	destroy_ipsa_pair(oipsapp);
4922 	return (error);
4923 }
4924 
4925 /*
4926  * The following functions deal with ACQUIRE LISTS.  An ACQUIRE list is
4927  * a list of outstanding SADB_ACQUIRE messages.	 If ipsec_getassocbyconn() fails
4928  * for an outbound datagram, that datagram is queued up on an ACQUIRE record,
4929  * and an SADB_ACQUIRE message is sent up.  Presumably, a user-space key
4930  * management daemon will process the ACQUIRE, use a SADB_GETSPI to reserve
4931  * an SPI value and a larval SA, then SADB_UPDATE the larval SA, and ADD the
4932  * other direction's SA.
4933  */
4934 
4935 /*
4936  * Check the ACQUIRE lists.  If there's an existing ACQUIRE record,
4937  * grab it, lock it, and return it.  Otherwise return NULL.
4938  */
4939 static ipsacq_t *
4940 sadb_checkacquire(iacqf_t *bucket, ipsec_action_t *ap, ipsec_policy_t *pp,
4941     uint32_t *src, uint32_t *dst, uint32_t *isrc, uint32_t *idst,
4942     uint64_t unique_id)
4943 {
4944 	ipsacq_t *walker;
4945 	sa_family_t fam;
4946 	uint32_t blank_address[4] = {0, 0, 0, 0};
4947 
4948 	if (isrc == NULL) {
4949 		ASSERT(idst == NULL);
4950 		isrc = idst = blank_address;
4951 	}
4952 
4953 	/*
4954 	 * Scan list for duplicates.  Check for UNIQUE, src/dest, policy.
4955 	 *
4956 	 * XXX May need search for duplicates based on other things too!
4957 	 */
4958 	for (walker = bucket->iacqf_ipsacq; walker != NULL;
4959 	    walker = walker->ipsacq_next) {
4960 		mutex_enter(&walker->ipsacq_lock);
4961 		fam = walker->ipsacq_addrfam;
4962 		if (IPSA_ARE_ADDR_EQUAL(dst, walker->ipsacq_dstaddr, fam) &&
4963 		    IPSA_ARE_ADDR_EQUAL(src, walker->ipsacq_srcaddr, fam) &&
4964 		    ip_addr_match((uint8_t *)isrc, walker->ipsacq_innersrcpfx,
4965 		    (in6_addr_t *)walker->ipsacq_innersrc) &&
4966 		    ip_addr_match((uint8_t *)idst, walker->ipsacq_innerdstpfx,
4967 		    (in6_addr_t *)walker->ipsacq_innerdst) &&
4968 		    (ap == walker->ipsacq_act) &&
4969 		    (pp == walker->ipsacq_policy) &&
4970 		    /* XXX do deep compares of ap/pp? */
4971 		    (unique_id == walker->ipsacq_unique_id))
4972 			break;			/* everything matched */
4973 		mutex_exit(&walker->ipsacq_lock);
4974 	}
4975 
4976 	return (walker);
4977 }
4978 
4979 /*
4980  * For this mblk, insert a new acquire record.  Assume bucket contains addrs
4981  * of all of the same length.  Give up (and drop) if memory
4982  * cannot be allocated for a new one; otherwise, invoke callback to
4983  * send the acquire up..
4984  *
4985  * In cases where we need both AH and ESP, add the SA to the ESP ACQUIRE
4986  * list.  The ah_add_sa_finish() routines can look at the packet's ipsec_out_t
4987  * and handle this case specially.
4988  */
4989 void
4990 sadb_acquire(mblk_t *mp, ipsec_out_t *io, boolean_t need_ah, boolean_t need_esp)
4991 {
4992 	sadbp_t *spp;
4993 	sadb_t *sp;
4994 	ipsacq_t *newbie;
4995 	iacqf_t *bucket;
4996 	mblk_t *datamp = mp->b_cont;
4997 	mblk_t *extended;
4998 	ipha_t *ipha = (ipha_t *)datamp->b_rptr;
4999 	ip6_t *ip6h = (ip6_t *)datamp->b_rptr;
5000 	uint32_t *src, *dst, *isrc, *idst;
5001 	ipsec_policy_t *pp = io->ipsec_out_policy;
5002 	ipsec_action_t *ap = io->ipsec_out_act;
5003 	sa_family_t af;
5004 	int hashoffset;
5005 	uint32_t seq;
5006 	uint64_t unique_id = 0;
5007 	ipsec_selector_t sel;
5008 	boolean_t tunnel_mode = io->ipsec_out_tunnel;
5009 	netstack_t	*ns = io->ipsec_out_ns;
5010 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
5011 
5012 	ASSERT((pp != NULL) || (ap != NULL));
5013 
5014 	ASSERT(need_ah != NULL || need_esp != NULL);
5015 	/* Assign sadb pointers */
5016 	if (need_esp) { /* ESP for AH+ESP */
5017 		ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
5018 
5019 		spp = &espstack->esp_sadb;
5020 	} else {
5021 		ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
5022 
5023 		spp = &ahstack->ah_sadb;
5024 	}
5025 	sp = io->ipsec_out_v4 ? &spp->s_v4 : &spp->s_v6;
5026 
5027 	if (ap == NULL)
5028 		ap = pp->ipsp_act;
5029 
5030 	ASSERT(ap != NULL);
5031 
5032 	if (ap->ipa_act.ipa_apply.ipp_use_unique || tunnel_mode)
5033 		unique_id = SA_FORM_UNIQUE_ID(io);
5034 
5035 	/*
5036 	 * Set up an ACQUIRE record.
5037 	 *
5038 	 * Immediately, make sure the ACQUIRE sequence number doesn't slip
5039 	 * below the lowest point allowed in the kernel.  (In other words,
5040 	 * make sure the high bit on the sequence number is set.)
5041 	 */
5042 
5043 	seq = keysock_next_seq(ns) | IACQF_LOWEST_SEQ;
5044 
5045 	if (IPH_HDR_VERSION(ipha) == IP_VERSION) {
5046 		src = (uint32_t *)&ipha->ipha_src;
5047 		dst = (uint32_t *)&ipha->ipha_dst;
5048 		af = AF_INET;
5049 		hashoffset = OUTBOUND_HASH_V4(sp, ipha->ipha_dst);
5050 		ASSERT(io->ipsec_out_v4 == B_TRUE);
5051 	} else {
5052 		ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
5053 		src = (uint32_t *)&ip6h->ip6_src;
5054 		dst = (uint32_t *)&ip6h->ip6_dst;
5055 		af = AF_INET6;
5056 		hashoffset = OUTBOUND_HASH_V6(sp, ip6h->ip6_dst);
5057 		ASSERT(io->ipsec_out_v4 == B_FALSE);
5058 	}
5059 
5060 	if (tunnel_mode) {
5061 		/* Snag inner addresses. */
5062 		isrc = io->ipsec_out_insrc;
5063 		idst = io->ipsec_out_indst;
5064 	} else {
5065 		isrc = idst = NULL;
5066 	}
5067 
5068 	/*
5069 	 * Check buckets to see if there is an existing entry.  If so,
5070 	 * grab it.  sadb_checkacquire locks newbie if found.
5071 	 */
5072 	bucket = &(sp->sdb_acq[hashoffset]);
5073 	mutex_enter(&bucket->iacqf_lock);
5074 	newbie = sadb_checkacquire(bucket, ap, pp, src, dst, isrc, idst,
5075 	    unique_id);
5076 
5077 	if (newbie == NULL) {
5078 		/*
5079 		 * Otherwise, allocate a new one.
5080 		 */
5081 		newbie = kmem_zalloc(sizeof (*newbie), KM_NOSLEEP);
5082 		if (newbie == NULL) {
5083 			mutex_exit(&bucket->iacqf_lock);
5084 			ip_drop_packet(mp, B_FALSE, NULL, NULL,
5085 			    DROPPER(ipss, ipds_sadb_acquire_nomem),
5086 			    &ipss->ipsec_sadb_dropper);
5087 			return;
5088 		}
5089 		newbie->ipsacq_policy = pp;
5090 		if (pp != NULL) {
5091 			IPPOL_REFHOLD(pp);
5092 		}
5093 		IPACT_REFHOLD(ap);
5094 		newbie->ipsacq_act = ap;
5095 		newbie->ipsacq_linklock = &bucket->iacqf_lock;
5096 		newbie->ipsacq_next = bucket->iacqf_ipsacq;
5097 		newbie->ipsacq_ptpn = &bucket->iacqf_ipsacq;
5098 		if (newbie->ipsacq_next != NULL)
5099 			newbie->ipsacq_next->ipsacq_ptpn = &newbie->ipsacq_next;
5100 		bucket->iacqf_ipsacq = newbie;
5101 		mutex_init(&newbie->ipsacq_lock, NULL, MUTEX_DEFAULT, NULL);
5102 		mutex_enter(&newbie->ipsacq_lock);
5103 	}
5104 
5105 	mutex_exit(&bucket->iacqf_lock);
5106 
5107 	/*
5108 	 * This assert looks silly for now, but we may need to enter newbie's
5109 	 * mutex during a search.
5110 	 */
5111 	ASSERT(MUTEX_HELD(&newbie->ipsacq_lock));
5112 
5113 	mp->b_next = NULL;
5114 	/* Queue up packet.  Use b_next. */
5115 	if (newbie->ipsacq_numpackets == 0) {
5116 		/* First one. */
5117 		newbie->ipsacq_mp = mp;
5118 		newbie->ipsacq_numpackets = 1;
5119 		newbie->ipsacq_expire = gethrestime_sec();
5120 		/*
5121 		 * Extended ACQUIRE with both AH+ESP will use ESP's timeout
5122 		 * value.
5123 		 */
5124 		newbie->ipsacq_expire += *spp->s_acquire_timeout;
5125 		newbie->ipsacq_seq = seq;
5126 		newbie->ipsacq_addrfam = af;
5127 
5128 		newbie->ipsacq_srcport = io->ipsec_out_src_port;
5129 		newbie->ipsacq_dstport = io->ipsec_out_dst_port;
5130 		newbie->ipsacq_icmp_type = io->ipsec_out_icmp_type;
5131 		newbie->ipsacq_icmp_code = io->ipsec_out_icmp_code;
5132 		if (tunnel_mode) {
5133 			newbie->ipsacq_inneraddrfam = io->ipsec_out_inaf;
5134 			newbie->ipsacq_proto = io->ipsec_out_inaf == AF_INET6 ?
5135 			    IPPROTO_IPV6 : IPPROTO_ENCAP;
5136 			newbie->ipsacq_innersrcpfx = io->ipsec_out_insrcpfx;
5137 			newbie->ipsacq_innerdstpfx = io->ipsec_out_indstpfx;
5138 			IPSA_COPY_ADDR(newbie->ipsacq_innersrc,
5139 			    io->ipsec_out_insrc, io->ipsec_out_inaf);
5140 			IPSA_COPY_ADDR(newbie->ipsacq_innerdst,
5141 			    io->ipsec_out_indst, io->ipsec_out_inaf);
5142 		} else {
5143 			newbie->ipsacq_proto = io->ipsec_out_proto;
5144 		}
5145 		newbie->ipsacq_unique_id = unique_id;
5146 	} else {
5147 		/* Scan to the end of the list & insert. */
5148 		mblk_t *lastone = newbie->ipsacq_mp;
5149 
5150 		while (lastone->b_next != NULL)
5151 			lastone = lastone->b_next;
5152 		lastone->b_next = mp;
5153 		if (newbie->ipsacq_numpackets++ == ipsacq_maxpackets) {
5154 			newbie->ipsacq_numpackets = ipsacq_maxpackets;
5155 			lastone = newbie->ipsacq_mp;
5156 			newbie->ipsacq_mp = lastone->b_next;
5157 			lastone->b_next = NULL;
5158 			ip_drop_packet(lastone, B_FALSE, NULL, NULL,
5159 			    DROPPER(ipss, ipds_sadb_acquire_toofull),
5160 			    &ipss->ipsec_sadb_dropper);
5161 		} else {
5162 			IP_ACQUIRE_STAT(ipss, qhiwater,
5163 			    newbie->ipsacq_numpackets);
5164 		}
5165 	}
5166 
5167 	/*
5168 	 * Reset addresses.  Set them to the most recently added mblk chain,
5169 	 * so that the address pointers in the acquire record will point
5170 	 * at an mblk still attached to the acquire list.
5171 	 */
5172 
5173 	newbie->ipsacq_srcaddr = src;
5174 	newbie->ipsacq_dstaddr = dst;
5175 
5176 	/*
5177 	 * If the acquire record has more than one queued packet, we've
5178 	 * already sent an ACQUIRE, and don't need to repeat ourself.
5179 	 */
5180 	if (newbie->ipsacq_seq != seq || newbie->ipsacq_numpackets > 1) {
5181 		/* I have an acquire outstanding already! */
5182 		mutex_exit(&newbie->ipsacq_lock);
5183 		return;
5184 	}
5185 
5186 	if (keysock_extended_reg(ns)) {
5187 		/*
5188 		 * Construct an extended ACQUIRE.  There are logging
5189 		 * opportunities here in failure cases.
5190 		 */
5191 
5192 		(void) memset(&sel, 0, sizeof (sel));
5193 		sel.ips_isv4 = io->ipsec_out_v4;
5194 		if (tunnel_mode) {
5195 			sel.ips_protocol = (io->ipsec_out_inaf == AF_INET) ?
5196 			    IPPROTO_ENCAP : IPPROTO_IPV6;
5197 		} else {
5198 			sel.ips_protocol = io->ipsec_out_proto;
5199 			sel.ips_local_port = io->ipsec_out_src_port;
5200 			sel.ips_remote_port = io->ipsec_out_dst_port;
5201 		}
5202 		sel.ips_icmp_type = io->ipsec_out_icmp_type;
5203 		sel.ips_icmp_code = io->ipsec_out_icmp_code;
5204 		sel.ips_is_icmp_inv_acq = 0;
5205 		if (af == AF_INET) {
5206 			sel.ips_local_addr_v4 = ipha->ipha_src;
5207 			sel.ips_remote_addr_v4 = ipha->ipha_dst;
5208 		} else {
5209 			sel.ips_local_addr_v6 = ip6h->ip6_src;
5210 			sel.ips_remote_addr_v6 = ip6h->ip6_dst;
5211 		}
5212 
5213 		extended = sadb_keysock_out(0);
5214 		if (extended != NULL) {
5215 			extended->b_cont = sadb_extended_acquire(&sel, pp, ap,
5216 			    tunnel_mode, seq, 0, ns);
5217 			if (extended->b_cont == NULL) {
5218 				freeb(extended);
5219 				extended = NULL;
5220 			}
5221 		}
5222 	} else
5223 		extended = NULL;
5224 
5225 	/*
5226 	 * Send an ACQUIRE message (and possible an extended ACQUIRE) based on
5227 	 * this new record.  The send-acquire callback assumes that acqrec is
5228 	 * already locked.
5229 	 */
5230 	(*spp->s_acqfn)(newbie, extended, ns);
5231 }
5232 
5233 /*
5234  * Unlink and free an acquire record.
5235  */
5236 void
5237 sadb_destroy_acquire(ipsacq_t *acqrec, netstack_t *ns)
5238 {
5239 	mblk_t *mp;
5240 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
5241 
5242 	ASSERT(MUTEX_HELD(acqrec->ipsacq_linklock));
5243 
5244 	if (acqrec->ipsacq_policy != NULL) {
5245 		IPPOL_REFRELE(acqrec->ipsacq_policy, ns);
5246 	}
5247 	if (acqrec->ipsacq_act != NULL) {
5248 		IPACT_REFRELE(acqrec->ipsacq_act);
5249 	}
5250 
5251 	/* Unlink */
5252 	*(acqrec->ipsacq_ptpn) = acqrec->ipsacq_next;
5253 	if (acqrec->ipsacq_next != NULL)
5254 		acqrec->ipsacq_next->ipsacq_ptpn = acqrec->ipsacq_ptpn;
5255 
5256 	/*
5257 	 * Free hanging mp's.
5258 	 *
5259 	 * XXX Instead of freemsg(), perhaps use IPSEC_REQ_FAILED.
5260 	 */
5261 
5262 	mutex_enter(&acqrec->ipsacq_lock);
5263 	while (acqrec->ipsacq_mp != NULL) {
5264 		mp = acqrec->ipsacq_mp;
5265 		acqrec->ipsacq_mp = mp->b_next;
5266 		mp->b_next = NULL;
5267 		ip_drop_packet(mp, B_FALSE, NULL, NULL,
5268 		    DROPPER(ipss, ipds_sadb_acquire_timeout),
5269 		    &ipss->ipsec_sadb_dropper);
5270 	}
5271 	mutex_exit(&acqrec->ipsacq_lock);
5272 
5273 	/* Free */
5274 	mutex_destroy(&acqrec->ipsacq_lock);
5275 	kmem_free(acqrec, sizeof (*acqrec));
5276 }
5277 
5278 /*
5279  * Destroy an acquire list fanout.
5280  */
5281 static void
5282 sadb_destroy_acqlist(iacqf_t **listp, uint_t numentries, boolean_t forever,
5283     netstack_t *ns)
5284 {
5285 	int i;
5286 	iacqf_t *list = *listp;
5287 
5288 	if (list == NULL)
5289 		return;
5290 
5291 	for (i = 0; i < numentries; i++) {
5292 		mutex_enter(&(list[i].iacqf_lock));
5293 		while (list[i].iacqf_ipsacq != NULL)
5294 			sadb_destroy_acquire(list[i].iacqf_ipsacq, ns);
5295 		mutex_exit(&(list[i].iacqf_lock));
5296 		if (forever)
5297 			mutex_destroy(&(list[i].iacqf_lock));
5298 	}
5299 
5300 	if (forever) {
5301 		*listp = NULL;
5302 		kmem_free(list, numentries * sizeof (*list));
5303 	}
5304 }
5305 
5306 /*
5307  * Create an algorithm descriptor for an extended ACQUIRE.  Filter crypto
5308  * framework's view of reality vs. IPsec's.  EF's wins, BTW.
5309  */
5310 static uint8_t *
5311 sadb_new_algdesc(uint8_t *start, uint8_t *limit,
5312     sadb_x_ecomb_t *ecomb, uint8_t satype, uint8_t algtype,
5313     uint8_t alg, uint16_t minbits, uint16_t maxbits, ipsec_stack_t *ipss)
5314 {
5315 	uint8_t *cur = start;
5316 	ipsec_alginfo_t *algp;
5317 	sadb_x_algdesc_t *algdesc = (sadb_x_algdesc_t *)cur;
5318 
5319 	cur += sizeof (*algdesc);
5320 	if (cur >= limit)
5321 		return (NULL);
5322 
5323 	ecomb->sadb_x_ecomb_numalgs++;
5324 
5325 	/*
5326 	 * Normalize vs. crypto framework's limits.  This way, you can specify
5327 	 * a stronger policy, and when the framework loads a stronger version,
5328 	 * you can just keep plowing w/o rewhacking your SPD.
5329 	 */
5330 	mutex_enter(&ipss->ipsec_alg_lock);
5331 	algp = ipss->ipsec_alglists[(algtype == SADB_X_ALGTYPE_AUTH) ?
5332 	    IPSEC_ALG_AUTH : IPSEC_ALG_ENCR][alg];
5333 	if (algp == NULL) {
5334 		mutex_exit(&ipss->ipsec_alg_lock);
5335 		return (NULL);	/* Algorithm doesn't exist.  Fail gracefully. */
5336 	}
5337 	if (minbits < algp->alg_ef_minbits)
5338 		minbits = algp->alg_ef_minbits;
5339 	if (maxbits > algp->alg_ef_maxbits)
5340 		maxbits = algp->alg_ef_maxbits;
5341 	mutex_exit(&ipss->ipsec_alg_lock);
5342 
5343 	algdesc->sadb_x_algdesc_satype = satype;
5344 	algdesc->sadb_x_algdesc_algtype = algtype;
5345 	algdesc->sadb_x_algdesc_alg = alg;
5346 	algdesc->sadb_x_algdesc_minbits = minbits;
5347 	algdesc->sadb_x_algdesc_maxbits = maxbits;
5348 	algdesc->sadb_x_algdesc_reserved = 0;
5349 	return (cur);
5350 }
5351 
5352 /*
5353  * Convert the given ipsec_action_t into an ecomb starting at *ecomb
5354  * which must fit before *limit
5355  *
5356  * return NULL if we ran out of room or a pointer to the end of the ecomb.
5357  */
5358 static uint8_t *
5359 sadb_action_to_ecomb(uint8_t *start, uint8_t *limit, ipsec_action_t *act,
5360     netstack_t *ns)
5361 {
5362 	uint8_t *cur = start;
5363 	sadb_x_ecomb_t *ecomb = (sadb_x_ecomb_t *)cur;
5364 	ipsec_prot_t *ipp;
5365 	ipsec_stack_t *ipss = ns->netstack_ipsec;
5366 
5367 	cur += sizeof (*ecomb);
5368 	if (cur >= limit)
5369 		return (NULL);
5370 
5371 	ASSERT(act->ipa_act.ipa_type == IPSEC_ACT_APPLY);
5372 
5373 	ipp = &act->ipa_act.ipa_apply;
5374 
5375 	ecomb->sadb_x_ecomb_numalgs = 0;
5376 	ecomb->sadb_x_ecomb_reserved = 0;
5377 	ecomb->sadb_x_ecomb_reserved2 = 0;
5378 	/*
5379 	 * No limits on allocations, since we really don't support that
5380 	 * concept currently.
5381 	 */
5382 	ecomb->sadb_x_ecomb_soft_allocations = 0;
5383 	ecomb->sadb_x_ecomb_hard_allocations = 0;
5384 
5385 	/*
5386 	 * XXX TBD: Policy or global parameters will eventually be
5387 	 * able to fill in some of these.
5388 	 */
5389 	ecomb->sadb_x_ecomb_flags = 0;
5390 	ecomb->sadb_x_ecomb_soft_bytes = 0;
5391 	ecomb->sadb_x_ecomb_hard_bytes = 0;
5392 	ecomb->sadb_x_ecomb_soft_addtime = 0;
5393 	ecomb->sadb_x_ecomb_hard_addtime = 0;
5394 	ecomb->sadb_x_ecomb_soft_usetime = 0;
5395 	ecomb->sadb_x_ecomb_hard_usetime = 0;
5396 
5397 	if (ipp->ipp_use_ah) {
5398 		cur = sadb_new_algdesc(cur, limit, ecomb,
5399 		    SADB_SATYPE_AH, SADB_X_ALGTYPE_AUTH, ipp->ipp_auth_alg,
5400 		    ipp->ipp_ah_minbits, ipp->ipp_ah_maxbits, ipss);
5401 		if (cur == NULL)
5402 			return (NULL);
5403 		ipsecah_fill_defs(ecomb, ns);
5404 	}
5405 
5406 	if (ipp->ipp_use_esp) {
5407 		if (ipp->ipp_use_espa) {
5408 			cur = sadb_new_algdesc(cur, limit, ecomb,
5409 			    SADB_SATYPE_ESP, SADB_X_ALGTYPE_AUTH,
5410 			    ipp->ipp_esp_auth_alg,
5411 			    ipp->ipp_espa_minbits,
5412 			    ipp->ipp_espa_maxbits, ipss);
5413 			if (cur == NULL)
5414 				return (NULL);
5415 		}
5416 
5417 		cur = sadb_new_algdesc(cur, limit, ecomb,
5418 		    SADB_SATYPE_ESP, SADB_X_ALGTYPE_CRYPT,
5419 		    ipp->ipp_encr_alg,
5420 		    ipp->ipp_espe_minbits,
5421 		    ipp->ipp_espe_maxbits, ipss);
5422 		if (cur == NULL)
5423 			return (NULL);
5424 		/* Fill in lifetimes if and only if AH didn't already... */
5425 		if (!ipp->ipp_use_ah)
5426 			ipsecesp_fill_defs(ecomb, ns);
5427 	}
5428 
5429 	return (cur);
5430 }
5431 
5432 /*
5433  * Construct an extended ACQUIRE message based on a selector and the resulting
5434  * IPsec action.
5435  *
5436  * NOTE: This is used by both inverse ACQUIRE and actual ACQUIRE
5437  * generation. As a consequence, expect this function to evolve
5438  * rapidly.
5439  */
5440 static mblk_t *
5441 sadb_extended_acquire(ipsec_selector_t *sel, ipsec_policy_t *pol,
5442     ipsec_action_t *act, boolean_t tunnel_mode, uint32_t seq, uint32_t pid,
5443     netstack_t *ns)
5444 {
5445 	mblk_t *mp;
5446 	sadb_msg_t *samsg;
5447 	uint8_t *start, *cur, *end;
5448 	uint32_t *saddrptr, *daddrptr;
5449 	sa_family_t af;
5450 	sadb_prop_t *eprop;
5451 	ipsec_action_t *ap, *an;
5452 	ipsec_selkey_t *ipsl;
5453 	uint8_t proto, pfxlen;
5454 	uint16_t lport, rport;
5455 	uint32_t kmp, kmc;
5456 
5457 	/*
5458 	 * Find the action we want sooner rather than later..
5459 	 */
5460 	an = NULL;
5461 	if (pol == NULL) {
5462 		ap = act;
5463 	} else {
5464 		ap = pol->ipsp_act;
5465 
5466 		if (ap != NULL)
5467 			an = ap->ipa_next;
5468 	}
5469 
5470 	/*
5471 	 * Just take a swag for the allocation for now.	 We can always
5472 	 * alter it later.
5473 	 */
5474 #define	SADB_EXTENDED_ACQUIRE_SIZE	4096
5475 	mp = allocb(SADB_EXTENDED_ACQUIRE_SIZE, BPRI_HI);
5476 	if (mp == NULL)
5477 		return (NULL);
5478 
5479 	start = mp->b_rptr;
5480 	end = start + SADB_EXTENDED_ACQUIRE_SIZE;
5481 
5482 	cur = start;
5483 
5484 	samsg = (sadb_msg_t *)cur;
5485 	cur += sizeof (*samsg);
5486 
5487 	samsg->sadb_msg_version = PF_KEY_V2;
5488 	samsg->sadb_msg_type = SADB_ACQUIRE;
5489 	samsg->sadb_msg_errno = 0;
5490 	samsg->sadb_msg_reserved = 0;
5491 	samsg->sadb_msg_satype = 0;
5492 	samsg->sadb_msg_seq = seq;
5493 	samsg->sadb_msg_pid = pid;
5494 
5495 	if (tunnel_mode) {
5496 		/*
5497 		 * Form inner address extensions based NOT on the inner
5498 		 * selectors (i.e. the packet data), but on the policy's
5499 		 * selector key (i.e. the policy's selector information).
5500 		 *
5501 		 * NOTE:  The position of IPv4 and IPv6 addresses is the
5502 		 * same in ipsec_selkey_t (unless the compiler does very
5503 		 * strange things with unions, consult your local C language
5504 		 * lawyer for details).
5505 		 */
5506 		ipsl = &(pol->ipsp_sel->ipsl_key);
5507 		if (ipsl->ipsl_valid & IPSL_IPV4) {
5508 			af = AF_INET;
5509 			ASSERT(sel->ips_protocol == IPPROTO_ENCAP);
5510 			ASSERT(!(ipsl->ipsl_valid & IPSL_IPV6));
5511 		} else {
5512 			af = AF_INET6;
5513 			ASSERT(sel->ips_protocol == IPPROTO_IPV6);
5514 			ASSERT(ipsl->ipsl_valid & IPSL_IPV6);
5515 		}
5516 
5517 		if (ipsl->ipsl_valid & IPSL_LOCAL_ADDR) {
5518 			saddrptr = (uint32_t *)(&ipsl->ipsl_local);
5519 			pfxlen = ipsl->ipsl_local_pfxlen;
5520 		} else {
5521 			saddrptr = (uint32_t *)(&ipv6_all_zeros);
5522 			pfxlen = 0;
5523 		}
5524 		/* XXX What about ICMP type/code? */
5525 		lport = (ipsl->ipsl_valid & IPSL_LOCAL_PORT) ?
5526 		    ipsl->ipsl_lport : 0;
5527 		proto = (ipsl->ipsl_valid & IPSL_PROTOCOL) ?
5528 		    ipsl->ipsl_proto : 0;
5529 
5530 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
5531 		    af, saddrptr, lport, proto, pfxlen);
5532 		if (cur == NULL) {
5533 			freeb(mp);
5534 			return (NULL);
5535 		}
5536 
5537 		if (ipsl->ipsl_valid & IPSL_REMOTE_ADDR) {
5538 			daddrptr = (uint32_t *)(&ipsl->ipsl_remote);
5539 			pfxlen = ipsl->ipsl_remote_pfxlen;
5540 		} else {
5541 			daddrptr = (uint32_t *)(&ipv6_all_zeros);
5542 			pfxlen = 0;
5543 		}
5544 		/* XXX What about ICMP type/code? */
5545 		rport = (ipsl->ipsl_valid & IPSL_REMOTE_PORT) ?
5546 		    ipsl->ipsl_rport : 0;
5547 
5548 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
5549 		    af, daddrptr, rport, proto, pfxlen);
5550 		if (cur == NULL) {
5551 			freeb(mp);
5552 			return (NULL);
5553 		}
5554 		/*
5555 		 * TODO  - if we go to 3408's dream of transport mode IP-in-IP
5556 		 * _with_ inner-packet address selectors, we'll need to further
5557 		 * distinguish tunnel mode here.  For now, having inner
5558 		 * addresses and/or ports is sufficient.
5559 		 *
5560 		 * Meanwhile, whack proto/ports to reflect IP-in-IP for the
5561 		 * outer addresses.
5562 		 */
5563 		proto = sel->ips_protocol;	/* Either _ENCAP or _IPV6 */
5564 		lport = rport = 0;
5565 	} else if ((ap != NULL) && (!ap->ipa_want_unique)) {
5566 		proto = 0;
5567 		lport = 0;
5568 		rport = 0;
5569 		if (pol != NULL) {
5570 			ipsl = &(pol->ipsp_sel->ipsl_key);
5571 			if (ipsl->ipsl_valid & IPSL_PROTOCOL)
5572 				proto = ipsl->ipsl_proto;
5573 			if (ipsl->ipsl_valid & IPSL_REMOTE_PORT)
5574 				rport = ipsl->ipsl_rport;
5575 			if (ipsl->ipsl_valid & IPSL_LOCAL_PORT)
5576 				lport = ipsl->ipsl_lport;
5577 		}
5578 	} else {
5579 		proto = sel->ips_protocol;
5580 		lport = sel->ips_local_port;
5581 		rport = sel->ips_remote_port;
5582 	}
5583 
5584 	af = sel->ips_isv4 ? AF_INET : AF_INET6;
5585 
5586 	/*
5587 	 * NOTE:  The position of IPv4 and IPv6 addresses is the same in
5588 	 * ipsec_selector_t.
5589 	 */
5590 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
5591 	    (uint32_t *)(&sel->ips_local_addr_v6), lport, proto, 0);
5592 
5593 	if (cur == NULL) {
5594 		freeb(mp);
5595 		return (NULL);
5596 	}
5597 
5598 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
5599 	    (uint32_t *)(&sel->ips_remote_addr_v6), rport, proto, 0);
5600 
5601 	if (cur == NULL) {
5602 		freeb(mp);
5603 		return (NULL);
5604 	}
5605 
5606 	/*
5607 	 * This section will change a lot as policy evolves.
5608 	 * For now, it'll be relatively simple.
5609 	 */
5610 	eprop = (sadb_prop_t *)cur;
5611 	cur += sizeof (*eprop);
5612 	if (cur > end) {
5613 		/* no space left */
5614 		freeb(mp);
5615 		return (NULL);
5616 	}
5617 
5618 	eprop->sadb_prop_exttype = SADB_X_EXT_EPROP;
5619 	eprop->sadb_x_prop_ereserved = 0;
5620 	eprop->sadb_x_prop_numecombs = 0;
5621 	eprop->sadb_prop_replay = 32;	/* default */
5622 
5623 	kmc = kmp = 0;
5624 
5625 	for (; ap != NULL; ap = an) {
5626 		an = (pol != NULL) ? ap->ipa_next : NULL;
5627 
5628 		/*
5629 		 * Skip non-IPsec policies
5630 		 */
5631 		if (ap->ipa_act.ipa_type != IPSEC_ACT_APPLY)
5632 			continue;
5633 
5634 		if (ap->ipa_act.ipa_apply.ipp_km_proto)
5635 			kmp = ap->ipa_act.ipa_apply.ipp_km_proto;
5636 		if (ap->ipa_act.ipa_apply.ipp_km_cookie)
5637 			kmc = ap->ipa_act.ipa_apply.ipp_km_cookie;
5638 		if (ap->ipa_act.ipa_apply.ipp_replay_depth) {
5639 			eprop->sadb_prop_replay =
5640 			    ap->ipa_act.ipa_apply.ipp_replay_depth;
5641 		}
5642 
5643 		cur = sadb_action_to_ecomb(cur, end, ap, ns);
5644 		if (cur == NULL) { /* no space */
5645 			freeb(mp);
5646 			return (NULL);
5647 		}
5648 		eprop->sadb_x_prop_numecombs++;
5649 	}
5650 
5651 	if (eprop->sadb_x_prop_numecombs == 0) {
5652 		/*
5653 		 * This will happen if we fail to find a policy
5654 		 * allowing for IPsec processing.
5655 		 * Construct an error message.
5656 		 */
5657 		samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
5658 		samsg->sadb_msg_errno = ENOENT;
5659 		samsg->sadb_x_msg_diagnostic = 0;
5660 		return (mp);
5661 	}
5662 
5663 	if ((kmp != 0) || (kmc != 0)) {
5664 		cur = sadb_make_kmc_ext(cur, end, kmp, kmc);
5665 		if (cur == NULL) {
5666 			freeb(mp);
5667 			return (NULL);
5668 		}
5669 	}
5670 
5671 	eprop->sadb_prop_len = SADB_8TO64(cur - (uint8_t *)eprop);
5672 	samsg->sadb_msg_len = SADB_8TO64(cur - start);
5673 	mp->b_wptr = cur;
5674 
5675 	return (mp);
5676 }
5677 
5678 /*
5679  * Generic setup of an RFC 2367 ACQUIRE message.  Caller sets satype.
5680  *
5681  * NOTE: This function acquires alg_lock as a side-effect if-and-only-if we
5682  * succeed (i.e. return non-NULL).  Caller MUST release it.  This is to
5683  * maximize code consolidation while preventing algorithm changes from messing
5684  * with the callers finishing touches on the ACQUIRE itself.
5685  */
5686 mblk_t *
5687 sadb_setup_acquire(ipsacq_t *acqrec, uint8_t satype, ipsec_stack_t *ipss)
5688 {
5689 	uint_t allocsize;
5690 	mblk_t *pfkeymp, *msgmp;
5691 	sa_family_t af;
5692 	uint8_t *cur, *end;
5693 	sadb_msg_t *samsg;
5694 	uint16_t sport_typecode;
5695 	uint16_t dport_typecode;
5696 	uint8_t check_proto;
5697 	boolean_t tunnel_mode = (acqrec->ipsacq_inneraddrfam != 0);
5698 
5699 	ASSERT(MUTEX_HELD(&acqrec->ipsacq_lock));
5700 
5701 	pfkeymp = sadb_keysock_out(0);
5702 	if (pfkeymp == NULL)
5703 		return (NULL);
5704 
5705 	/*
5706 	 * First, allocate a basic ACQUIRE message
5707 	 */
5708 	allocsize = sizeof (sadb_msg_t) + sizeof (sadb_address_t) +
5709 	    sizeof (sadb_address_t) + sizeof (sadb_prop_t);
5710 
5711 	/* Make sure there's enough to cover both AF_INET and AF_INET6. */
5712 	allocsize += 2 * sizeof (struct sockaddr_in6);
5713 
5714 	mutex_enter(&ipss->ipsec_alg_lock);
5715 	/* NOTE:  The lock is now held through to this function's return. */
5716 	allocsize += ipss->ipsec_nalgs[IPSEC_ALG_AUTH] *
5717 	    ipss->ipsec_nalgs[IPSEC_ALG_ENCR] * sizeof (sadb_comb_t);
5718 
5719 	if (tunnel_mode) {
5720 		/* Tunnel mode! */
5721 		allocsize += 2 * sizeof (sadb_address_t);
5722 		/* Enough to cover both AF_INET and AF_INET6. */
5723 		allocsize += 2 * sizeof (struct sockaddr_in6);
5724 	}
5725 
5726 	msgmp = allocb(allocsize, BPRI_HI);
5727 	if (msgmp == NULL) {
5728 		freeb(pfkeymp);
5729 		mutex_exit(&ipss->ipsec_alg_lock);
5730 		return (NULL);
5731 	}
5732 
5733 	pfkeymp->b_cont = msgmp;
5734 	cur = msgmp->b_rptr;
5735 	end = cur + allocsize;
5736 	samsg = (sadb_msg_t *)cur;
5737 	cur += sizeof (sadb_msg_t);
5738 
5739 	af = acqrec->ipsacq_addrfam;
5740 	switch (af) {
5741 	case AF_INET:
5742 		check_proto = IPPROTO_ICMP;
5743 		break;
5744 	case AF_INET6:
5745 		check_proto = IPPROTO_ICMPV6;
5746 		break;
5747 	default:
5748 		/* This should never happen unless we have kernel bugs. */
5749 		cmn_err(CE_WARN,
5750 		    "sadb_setup_acquire:  corrupt ACQUIRE record.\n");
5751 		ASSERT(0);
5752 		mutex_exit(&ipss->ipsec_alg_lock);
5753 		return (NULL);
5754 	}
5755 
5756 	samsg->sadb_msg_version = PF_KEY_V2;
5757 	samsg->sadb_msg_type = SADB_ACQUIRE;
5758 	samsg->sadb_msg_satype = satype;
5759 	samsg->sadb_msg_errno = 0;
5760 	samsg->sadb_msg_pid = 0;
5761 	samsg->sadb_msg_reserved = 0;
5762 	samsg->sadb_msg_seq = acqrec->ipsacq_seq;
5763 
5764 	ASSERT(MUTEX_HELD(&acqrec->ipsacq_lock));
5765 
5766 	if ((acqrec->ipsacq_proto == check_proto) || tunnel_mode) {
5767 		sport_typecode = dport_typecode = 0;
5768 	} else {
5769 		sport_typecode = acqrec->ipsacq_srcport;
5770 		dport_typecode = acqrec->ipsacq_dstport;
5771 	}
5772 
5773 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
5774 	    acqrec->ipsacq_srcaddr, sport_typecode, acqrec->ipsacq_proto, 0);
5775 
5776 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
5777 	    acqrec->ipsacq_dstaddr, dport_typecode, acqrec->ipsacq_proto, 0);
5778 
5779 	if (tunnel_mode) {
5780 		sport_typecode = acqrec->ipsacq_srcport;
5781 		dport_typecode = acqrec->ipsacq_dstport;
5782 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
5783 		    acqrec->ipsacq_inneraddrfam, acqrec->ipsacq_innersrc,
5784 		    sport_typecode, acqrec->ipsacq_inner_proto,
5785 		    acqrec->ipsacq_innersrcpfx);
5786 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
5787 		    acqrec->ipsacq_inneraddrfam, acqrec->ipsacq_innerdst,
5788 		    dport_typecode, acqrec->ipsacq_inner_proto,
5789 		    acqrec->ipsacq_innerdstpfx);
5790 	}
5791 
5792 	/* XXX Insert identity information here. */
5793 
5794 	/* XXXMLS Insert sensitivity information here. */
5795 
5796 	if (cur != NULL)
5797 		samsg->sadb_msg_len = SADB_8TO64(cur - msgmp->b_rptr);
5798 	else
5799 		mutex_exit(&ipss->ipsec_alg_lock);
5800 
5801 	return (pfkeymp);
5802 }
5803 
5804 /*
5805  * Given an SADB_GETSPI message, find an appropriately ranged SA and
5806  * allocate an SA.  If there are message improprieties, return (ipsa_t *)-1.
5807  * If there was a memory allocation error, return NULL.	 (Assume NULL !=
5808  * (ipsa_t *)-1).
5809  *
5810  * master_spi is passed in host order.
5811  */
5812 ipsa_t *
5813 sadb_getspi(keysock_in_t *ksi, uint32_t master_spi, int *diagnostic,
5814     netstack_t *ns, uint_t sa_type)
5815 {
5816 	sadb_address_t *src =
5817 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC],
5818 	    *dst = (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
5819 	sadb_spirange_t *range =
5820 	    (sadb_spirange_t *)ksi->ks_in_extv[SADB_EXT_SPIRANGE];
5821 	struct sockaddr_in *ssa, *dsa;
5822 	struct sockaddr_in6 *ssa6, *dsa6;
5823 	uint32_t *srcaddr, *dstaddr;
5824 	sa_family_t af;
5825 	uint32_t add, min, max;
5826 	uint8_t protocol =
5827 	    (sa_type == SADB_SATYPE_AH) ? IPPROTO_AH : IPPROTO_ESP;
5828 
5829 	if (src == NULL) {
5830 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
5831 		return ((ipsa_t *)-1);
5832 	}
5833 	if (dst == NULL) {
5834 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
5835 		return ((ipsa_t *)-1);
5836 	}
5837 	if (range == NULL) {
5838 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_RANGE;
5839 		return ((ipsa_t *)-1);
5840 	}
5841 
5842 	min = ntohl(range->sadb_spirange_min);
5843 	max = ntohl(range->sadb_spirange_max);
5844 	dsa = (struct sockaddr_in *)(dst + 1);
5845 	dsa6 = (struct sockaddr_in6 *)dsa;
5846 
5847 	ssa = (struct sockaddr_in *)(src + 1);
5848 	ssa6 = (struct sockaddr_in6 *)ssa;
5849 	ASSERT(dsa->sin_family == ssa->sin_family);
5850 
5851 	srcaddr = ALL_ZEROES_PTR;
5852 	af = dsa->sin_family;
5853 	switch (af) {
5854 	case AF_INET:
5855 		if (src != NULL)
5856 			srcaddr = (uint32_t *)(&ssa->sin_addr);
5857 		dstaddr = (uint32_t *)(&dsa->sin_addr);
5858 		break;
5859 	case AF_INET6:
5860 		if (src != NULL)
5861 			srcaddr = (uint32_t *)(&ssa6->sin6_addr);
5862 		dstaddr = (uint32_t *)(&dsa6->sin6_addr);
5863 		break;
5864 	default:
5865 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_DST_AF;
5866 		return ((ipsa_t *)-1);
5867 	}
5868 
5869 	if (master_spi < min || master_spi > max) {
5870 		/* Return a random value in the range. */
5871 		if (cl_inet_getspi) {
5872 			cl_inet_getspi(protocol, (uint8_t *)&add, sizeof (add));
5873 		} else {
5874 			(void) random_get_pseudo_bytes((uint8_t *)&add,
5875 			    sizeof (add));
5876 		}
5877 		master_spi = min + (add % (max - min + 1));
5878 	}
5879 
5880 	/*
5881 	 * Since master_spi is passed in host order, we need to htonl() it
5882 	 * for the purposes of creating a new SA.
5883 	 */
5884 	return (sadb_makelarvalassoc(htonl(master_spi), srcaddr, dstaddr, af,
5885 	    ns));
5886 }
5887 
5888 /*
5889  *
5890  * Locate an ACQUIRE and nuke it.  If I have an samsg that's larger than the
5891  * base header, just ignore it.	 Otherwise, lock down the whole ACQUIRE list
5892  * and scan for the sequence number in question.  I may wish to accept an
5893  * address pair with it, for easier searching.
5894  *
5895  * Caller frees the message, so we don't have to here.
5896  *
5897  * NOTE:	The ip_q parameter may be used in the future for ACQUIRE
5898  *		failures.
5899  */
5900 /* ARGSUSED */
5901 void
5902 sadb_in_acquire(sadb_msg_t *samsg, sadbp_t *sp, queue_t *ip_q, netstack_t *ns)
5903 {
5904 	int i;
5905 	ipsacq_t *acqrec;
5906 	iacqf_t *bucket;
5907 
5908 	/*
5909 	 * I only accept the base header for this!
5910 	 * Though to be honest, requiring the dst address would help
5911 	 * immensely.
5912 	 *
5913 	 * XXX	There are already cases where I can get the dst address.
5914 	 */
5915 	if (samsg->sadb_msg_len > SADB_8TO64(sizeof (*samsg)))
5916 		return;
5917 
5918 	/*
5919 	 * Using the samsg->sadb_msg_seq, find the ACQUIRE record, delete it,
5920 	 * (and in the future send a message to IP with the appropriate error
5921 	 * number).
5922 	 *
5923 	 * Q: Do I want to reject if pid != 0?
5924 	 */
5925 
5926 	for (i = 0; i < sp->s_v4.sdb_hashsize; i++) {
5927 		bucket = &sp->s_v4.sdb_acq[i];
5928 		mutex_enter(&bucket->iacqf_lock);
5929 		for (acqrec = bucket->iacqf_ipsacq; acqrec != NULL;
5930 		    acqrec = acqrec->ipsacq_next) {
5931 			if (samsg->sadb_msg_seq == acqrec->ipsacq_seq)
5932 				break;	/* for acqrec... loop. */
5933 		}
5934 		if (acqrec != NULL)
5935 			break;	/* for i = 0... loop. */
5936 
5937 		mutex_exit(&bucket->iacqf_lock);
5938 	}
5939 
5940 	if (acqrec == NULL) {
5941 		for (i = 0; i < sp->s_v6.sdb_hashsize; i++) {
5942 			bucket = &sp->s_v6.sdb_acq[i];
5943 			mutex_enter(&bucket->iacqf_lock);
5944 			for (acqrec = bucket->iacqf_ipsacq; acqrec != NULL;
5945 			    acqrec = acqrec->ipsacq_next) {
5946 				if (samsg->sadb_msg_seq == acqrec->ipsacq_seq)
5947 					break;	/* for acqrec... loop. */
5948 			}
5949 			if (acqrec != NULL)
5950 				break;	/* for i = 0... loop. */
5951 
5952 			mutex_exit(&bucket->iacqf_lock);
5953 		}
5954 	}
5955 
5956 
5957 	if (acqrec == NULL)
5958 		return;
5959 
5960 	/*
5961 	 * What do I do with the errno and IP?	I may need mp's services a
5962 	 * little more.	 See sadb_destroy_acquire() for future directions
5963 	 * beyond free the mblk chain on the acquire record.
5964 	 */
5965 
5966 	ASSERT(&bucket->iacqf_lock == acqrec->ipsacq_linklock);
5967 	sadb_destroy_acquire(acqrec, ns);
5968 	/* Have to exit mutex here, because of breaking out of for loop. */
5969 	mutex_exit(&bucket->iacqf_lock);
5970 }
5971 
5972 /*
5973  * The following functions work with the replay windows of an SA.  They assume
5974  * the ipsa->ipsa_replay_arr is an array of uint64_t, and that the bit vector
5975  * represents the highest sequence number packet received, and back
5976  * (ipsa->ipsa_replay_wsize) packets.
5977  */
5978 
5979 /*
5980  * Is the replay bit set?
5981  */
5982 static boolean_t
5983 ipsa_is_replay_set(ipsa_t *ipsa, uint32_t offset)
5984 {
5985 	uint64_t bit = (uint64_t)1 << (uint64_t)(offset & 63);
5986 
5987 	return ((bit & ipsa->ipsa_replay_arr[offset >> 6]) ? B_TRUE : B_FALSE);
5988 }
5989 
5990 /*
5991  * Shift the bits of the replay window over.
5992  */
5993 static void
5994 ipsa_shift_replay(ipsa_t *ipsa, uint32_t shift)
5995 {
5996 	int i;
5997 	int jump = ((shift - 1) >> 6) + 1;
5998 
5999 	if (shift == 0)
6000 		return;
6001 
6002 	for (i = (ipsa->ipsa_replay_wsize - 1) >> 6; i >= 0; i--) {
6003 		if (i + jump <= (ipsa->ipsa_replay_wsize - 1) >> 6) {
6004 			ipsa->ipsa_replay_arr[i + jump] |=
6005 			    ipsa->ipsa_replay_arr[i] >> (64 - (shift & 63));
6006 		}
6007 		ipsa->ipsa_replay_arr[i] <<= shift;
6008 	}
6009 }
6010 
6011 /*
6012  * Set a bit in the bit vector.
6013  */
6014 static void
6015 ipsa_set_replay(ipsa_t *ipsa, uint32_t offset)
6016 {
6017 	uint64_t bit = (uint64_t)1 << (uint64_t)(offset & 63);
6018 
6019 	ipsa->ipsa_replay_arr[offset >> 6] |= bit;
6020 }
6021 
6022 #define	SADB_MAX_REPLAY_VALUE 0xffffffff
6023 
6024 /*
6025  * Assume caller has NOT done ntohl() already on seq.  Check to see
6026  * if replay sequence number "seq" has been seen already.
6027  */
6028 boolean_t
6029 sadb_replay_check(ipsa_t *ipsa, uint32_t seq)
6030 {
6031 	boolean_t rc;
6032 	uint32_t diff;
6033 
6034 	if (ipsa->ipsa_replay_wsize == 0)
6035 		return (B_TRUE);
6036 
6037 	/*
6038 	 * NOTE:  I've already checked for 0 on the wire in sadb_replay_peek().
6039 	 */
6040 
6041 	/* Convert sequence number into host order before holding the mutex. */
6042 	seq = ntohl(seq);
6043 
6044 	mutex_enter(&ipsa->ipsa_lock);
6045 
6046 	/* Initialize inbound SA's ipsa_replay field to last one received. */
6047 	if (ipsa->ipsa_replay == 0)
6048 		ipsa->ipsa_replay = 1;
6049 
6050 	if (seq > ipsa->ipsa_replay) {
6051 		/*
6052 		 * I have received a new "highest value received".  Shift
6053 		 * the replay window over.
6054 		 */
6055 		diff = seq - ipsa->ipsa_replay;
6056 		if (diff < ipsa->ipsa_replay_wsize) {
6057 			/* In replay window, shift bits over. */
6058 			ipsa_shift_replay(ipsa, diff);
6059 		} else {
6060 			/* WAY FAR AHEAD, clear bits and start again. */
6061 			bzero(ipsa->ipsa_replay_arr,
6062 			    sizeof (ipsa->ipsa_replay_arr));
6063 		}
6064 		ipsa_set_replay(ipsa, 0);
6065 		ipsa->ipsa_replay = seq;
6066 		rc = B_TRUE;
6067 		goto done;
6068 	}
6069 	diff = ipsa->ipsa_replay - seq;
6070 	if (diff >= ipsa->ipsa_replay_wsize || ipsa_is_replay_set(ipsa, diff)) {
6071 		rc = B_FALSE;
6072 		goto done;
6073 	}
6074 	/* Set this packet as seen. */
6075 	ipsa_set_replay(ipsa, diff);
6076 
6077 	rc = B_TRUE;
6078 done:
6079 	mutex_exit(&ipsa->ipsa_lock);
6080 	return (rc);
6081 }
6082 
6083 /*
6084  * "Peek" and see if we should even bother going through the effort of
6085  * running an authentication check on the sequence number passed in.
6086  * this takes into account packets that are below the replay window,
6087  * and collisions with already replayed packets.  Return B_TRUE if it
6088  * is okay to proceed, B_FALSE if this packet should be dropped immediately.
6089  * Assume same byte-ordering as sadb_replay_check.
6090  */
6091 boolean_t
6092 sadb_replay_peek(ipsa_t *ipsa, uint32_t seq)
6093 {
6094 	boolean_t rc = B_FALSE;
6095 	uint32_t diff;
6096 
6097 	if (ipsa->ipsa_replay_wsize == 0)
6098 		return (B_TRUE);
6099 
6100 	/*
6101 	 * 0 is 0, regardless of byte order... :)
6102 	 *
6103 	 * If I get 0 on the wire (and there is a replay window) then the
6104 	 * sender most likely wrapped.	This ipsa may need to be marked or
6105 	 * something.
6106 	 */
6107 	if (seq == 0)
6108 		return (B_FALSE);
6109 
6110 	seq = ntohl(seq);
6111 	mutex_enter(&ipsa->ipsa_lock);
6112 	if (seq < ipsa->ipsa_replay - ipsa->ipsa_replay_wsize &&
6113 	    ipsa->ipsa_replay >= ipsa->ipsa_replay_wsize)
6114 		goto done;
6115 
6116 	/*
6117 	 * If I've hit 0xffffffff, then quite honestly, I don't need to
6118 	 * bother with formalities.  I'm not accepting any more packets
6119 	 * on this SA.
6120 	 */
6121 	if (ipsa->ipsa_replay == SADB_MAX_REPLAY_VALUE) {
6122 		/*
6123 		 * Since we're already holding the lock, update the
6124 		 * expire time ala. sadb_replay_delete() and return.
6125 		 */
6126 		ipsa->ipsa_hardexpiretime = (time_t)1;
6127 		goto done;
6128 	}
6129 
6130 	if (seq <= ipsa->ipsa_replay) {
6131 		/*
6132 		 * This seq is in the replay window.  I'm not below it,
6133 		 * because I already checked for that above!
6134 		 */
6135 		diff = ipsa->ipsa_replay - seq;
6136 		if (ipsa_is_replay_set(ipsa, diff))
6137 			goto done;
6138 	}
6139 	/* Else return B_TRUE, I'm going to advance the window. */
6140 
6141 	rc = B_TRUE;
6142 done:
6143 	mutex_exit(&ipsa->ipsa_lock);
6144 	return (rc);
6145 }
6146 
6147 /*
6148  * Delete a single SA.
6149  *
6150  * For now, use the quick-and-dirty trick of making the association's
6151  * hard-expire lifetime (time_t)1, ensuring deletion by the *_ager().
6152  */
6153 void
6154 sadb_replay_delete(ipsa_t *assoc)
6155 {
6156 	mutex_enter(&assoc->ipsa_lock);
6157 	assoc->ipsa_hardexpiretime = (time_t)1;
6158 	mutex_exit(&assoc->ipsa_lock);
6159 }
6160 
6161 /*
6162  * Given a queue that presumably points to IP, send a T_BIND_REQ for _proto_
6163  * down.  The caller will handle the T_BIND_ACK locally.
6164  */
6165 boolean_t
6166 sadb_t_bind_req(queue_t *q, int proto)
6167 {
6168 	struct T_bind_req *tbr;
6169 	mblk_t *mp;
6170 
6171 	mp = allocb(sizeof (struct T_bind_req) + 1, BPRI_HI);
6172 	if (mp == NULL) {
6173 		/* cmn_err(CE_WARN, */
6174 		/* "sadb_t_bind_req(%d): couldn't allocate mblk\n", proto); */
6175 		return (B_FALSE);
6176 	}
6177 	mp->b_datap->db_type = M_PCPROTO;
6178 	tbr = (struct T_bind_req *)mp->b_rptr;
6179 	mp->b_wptr += sizeof (struct T_bind_req);
6180 	tbr->PRIM_type = T_BIND_REQ;
6181 	tbr->ADDR_length = 0;
6182 	tbr->ADDR_offset = 0;
6183 	tbr->CONIND_number = 0;
6184 	*mp->b_wptr = (uint8_t)proto;
6185 	mp->b_wptr++;
6186 
6187 	putnext(q, mp);
6188 	return (B_TRUE);
6189 }
6190 
6191 /*
6192  * Special front-end to ipsec_rl_strlog() dealing with SA failure.
6193  * this is designed to take only a format string with "* %x * %s *", so
6194  * that "spi" is printed first, then "addr" is converted using inet_pton().
6195  *
6196  * This is abstracted out to save the stack space for only when inet_pton()
6197  * is called.  Make sure "spi" is in network order; it usually is when this
6198  * would get called.
6199  */
6200 void
6201 ipsec_assocfailure(short mid, short sid, char level, ushort_t sl, char *fmt,
6202     uint32_t spi, void *addr, int af, netstack_t *ns)
6203 {
6204 	char buf[INET6_ADDRSTRLEN];
6205 
6206 	ASSERT(af == AF_INET6 || af == AF_INET);
6207 
6208 	ipsec_rl_strlog(ns, mid, sid, level, sl, fmt, ntohl(spi),
6209 	    inet_ntop(af, addr, buf, sizeof (buf)));
6210 }
6211 
6212 /*
6213  * Fills in a reference to the policy, if any, from the conn, in *ppp
6214  * Releases a reference to the passed conn_t.
6215  */
6216 static void
6217 ipsec_conn_pol(ipsec_selector_t *sel, conn_t *connp, ipsec_policy_t **ppp)
6218 {
6219 	ipsec_policy_t	*pp;
6220 	ipsec_latch_t	*ipl = connp->conn_latch;
6221 
6222 	if ((ipl != NULL) && (ipl->ipl_out_policy != NULL)) {
6223 		pp = ipl->ipl_out_policy;
6224 		IPPOL_REFHOLD(pp);
6225 	} else {
6226 		pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, sel,
6227 		    connp->conn_netstack);
6228 	}
6229 	*ppp = pp;
6230 	CONN_DEC_REF(connp);
6231 }
6232 
6233 /*
6234  * The following functions scan through active conn_t structures
6235  * and return a reference to the best-matching policy it can find.
6236  * Caller must release the reference.
6237  */
6238 static void
6239 ipsec_udp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst)
6240 {
6241 	connf_t *connfp;
6242 	conn_t *connp = NULL;
6243 	ipsec_selector_t portonly;
6244 
6245 	bzero((void *)&portonly, sizeof (portonly));
6246 
6247 	if (sel->ips_local_port == 0)
6248 		return;
6249 
6250 	connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(sel->ips_local_port,
6251 	    ipst)];
6252 	mutex_enter(&connfp->connf_lock);
6253 
6254 	if (sel->ips_isv4) {
6255 		connp = connfp->connf_head;
6256 		while (connp != NULL) {
6257 			if (IPCL_UDP_MATCH(connp, sel->ips_local_port,
6258 			    sel->ips_local_addr_v4, sel->ips_remote_port,
6259 			    sel->ips_remote_addr_v4))
6260 				break;
6261 			connp = connp->conn_next;
6262 		}
6263 
6264 		if (connp == NULL) {
6265 			/* Try port-only match in IPv6. */
6266 			portonly.ips_local_port = sel->ips_local_port;
6267 			sel = &portonly;
6268 		}
6269 	}
6270 
6271 	if (connp == NULL) {
6272 		connp = connfp->connf_head;
6273 		while (connp != NULL) {
6274 			if (IPCL_UDP_MATCH_V6(connp, sel->ips_local_port,
6275 			    sel->ips_local_addr_v6, sel->ips_remote_port,
6276 			    sel->ips_remote_addr_v6))
6277 				break;
6278 			connp = connp->conn_next;
6279 		}
6280 
6281 		if (connp == NULL) {
6282 			mutex_exit(&connfp->connf_lock);
6283 			return;
6284 		}
6285 	}
6286 
6287 	CONN_INC_REF(connp);
6288 	mutex_exit(&connfp->connf_lock);
6289 
6290 	ipsec_conn_pol(sel, connp, ppp);
6291 }
6292 
6293 static conn_t *
6294 ipsec_find_listen_conn(uint16_t *pptr, ipsec_selector_t *sel, ip_stack_t *ipst)
6295 {
6296 	connf_t *connfp;
6297 	conn_t *connp = NULL;
6298 	const in6_addr_t *v6addrmatch = &sel->ips_local_addr_v6;
6299 
6300 	if (sel->ips_local_port == 0)
6301 		return (NULL);
6302 
6303 	connfp = &ipst->ips_ipcl_bind_fanout[
6304 	    IPCL_BIND_HASH(sel->ips_local_port, ipst)];
6305 	mutex_enter(&connfp->connf_lock);
6306 
6307 	if (sel->ips_isv4) {
6308 		connp = connfp->connf_head;
6309 		while (connp != NULL) {
6310 			if (IPCL_BIND_MATCH(connp, IPPROTO_TCP,
6311 			    sel->ips_local_addr_v4, pptr[1]))
6312 				break;
6313 			connp = connp->conn_next;
6314 		}
6315 
6316 		if (connp == NULL) {
6317 			/* Match to all-zeroes. */
6318 			v6addrmatch = &ipv6_all_zeros;
6319 		}
6320 	}
6321 
6322 	if (connp == NULL) {
6323 		connp = connfp->connf_head;
6324 		while (connp != NULL) {
6325 			if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP,
6326 			    *v6addrmatch, pptr[1]))
6327 				break;
6328 			connp = connp->conn_next;
6329 		}
6330 
6331 		if (connp == NULL) {
6332 			mutex_exit(&connfp->connf_lock);
6333 			return (NULL);
6334 		}
6335 	}
6336 
6337 	CONN_INC_REF(connp);
6338 	mutex_exit(&connfp->connf_lock);
6339 	return (connp);
6340 }
6341 
6342 static void
6343 ipsec_tcp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst)
6344 {
6345 	connf_t 	*connfp;
6346 	conn_t		*connp;
6347 	uint32_t	ports;
6348 	uint16_t	*pptr = (uint16_t *)&ports;
6349 
6350 	/*
6351 	 * Find TCP state in the following order:
6352 	 * 1.) Connected conns.
6353 	 * 2.) Listeners.
6354 	 *
6355 	 * Even though #2 will be the common case for inbound traffic, only
6356 	 * following this order insures correctness.
6357 	 */
6358 
6359 	if (sel->ips_local_port == 0)
6360 		return;
6361 
6362 	/*
6363 	 * 0 should be fport, 1 should be lport.  SRC is the local one here.
6364 	 * See ipsec_construct_inverse_acquire() for details.
6365 	 */
6366 	pptr[0] = sel->ips_remote_port;
6367 	pptr[1] = sel->ips_local_port;
6368 
6369 	connfp = &ipst->ips_ipcl_conn_fanout[
6370 	    IPCL_CONN_HASH(sel->ips_remote_addr_v4, ports, ipst)];
6371 	mutex_enter(&connfp->connf_lock);
6372 	connp = connfp->connf_head;
6373 
6374 	if (sel->ips_isv4) {
6375 		while (connp != NULL) {
6376 			if (IPCL_CONN_MATCH(connp, IPPROTO_TCP,
6377 			    sel->ips_remote_addr_v4, sel->ips_local_addr_v4,
6378 			    ports))
6379 				break;
6380 			connp = connp->conn_next;
6381 		}
6382 	} else {
6383 		while (connp != NULL) {
6384 			if (IPCL_CONN_MATCH_V6(connp, IPPROTO_TCP,
6385 			    sel->ips_remote_addr_v6, sel->ips_local_addr_v6,
6386 			    ports))
6387 				break;
6388 			connp = connp->conn_next;
6389 		}
6390 	}
6391 
6392 	if (connp != NULL) {
6393 		CONN_INC_REF(connp);
6394 		mutex_exit(&connfp->connf_lock);
6395 	} else {
6396 		mutex_exit(&connfp->connf_lock);
6397 
6398 		/* Try the listen hash. */
6399 		if ((connp = ipsec_find_listen_conn(pptr, sel, ipst)) == NULL)
6400 			return;
6401 	}
6402 
6403 	ipsec_conn_pol(sel, connp, ppp);
6404 }
6405 
6406 static void
6407 ipsec_sctp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6408     ip_stack_t *ipst)
6409 {
6410 	conn_t		*connp;
6411 	uint32_t	ports;
6412 	uint16_t	*pptr = (uint16_t *)&ports;
6413 
6414 	/*
6415 	 * Find SCP state in the following order:
6416 	 * 1.) Connected conns.
6417 	 * 2.) Listeners.
6418 	 *
6419 	 * Even though #2 will be the common case for inbound traffic, only
6420 	 * following this order insures correctness.
6421 	 */
6422 
6423 	if (sel->ips_local_port == 0)
6424 		return;
6425 
6426 	/*
6427 	 * 0 should be fport, 1 should be lport.  SRC is the local one here.
6428 	 * See ipsec_construct_inverse_acquire() for details.
6429 	 */
6430 	pptr[0] = sel->ips_remote_port;
6431 	pptr[1] = sel->ips_local_port;
6432 
6433 	if (sel->ips_isv4) {
6434 		in6_addr_t	src, dst;
6435 
6436 		IN6_IPADDR_TO_V4MAPPED(sel->ips_remote_addr_v4, &dst);
6437 		IN6_IPADDR_TO_V4MAPPED(sel->ips_local_addr_v4, &src);
6438 		connp = sctp_find_conn(&dst, &src, ports, ALL_ZONES,
6439 		    ipst->ips_netstack->netstack_sctp);
6440 	} else {
6441 		connp = sctp_find_conn(&sel->ips_remote_addr_v6,
6442 		    &sel->ips_local_addr_v6, ports, ALL_ZONES,
6443 		    ipst->ips_netstack->netstack_sctp);
6444 	}
6445 	if (connp == NULL)
6446 		return;
6447 	ipsec_conn_pol(sel, connp, ppp);
6448 }
6449 
6450 /*
6451  * Fill in a query for the SPD (in "sel") using two PF_KEY address extensions.
6452  * Returns 0 or errno, and always sets *diagnostic to something appropriate
6453  * to PF_KEY.
6454  *
6455  * NOTE:  For right now, this function (and ipsec_selector_t for that matter),
6456  * ignore prefix lengths in the address extension.  Since we match on first-
6457  * entered policies, this shouldn't matter.  Also, since we normalize prefix-
6458  * set addresses to mask out the lower bits, we should get a suitable search
6459  * key for the SPD anyway.  This is the function to change if the assumption
6460  * about suitable search keys is wrong.
6461  */
6462 static int
6463 ipsec_get_inverse_acquire_sel(ipsec_selector_t *sel, sadb_address_t *srcext,
6464     sadb_address_t *dstext, int *diagnostic)
6465 {
6466 	struct sockaddr_in *src, *dst;
6467 	struct sockaddr_in6 *src6, *dst6;
6468 
6469 	*diagnostic = 0;
6470 
6471 	bzero(sel, sizeof (*sel));
6472 	sel->ips_protocol = srcext->sadb_address_proto;
6473 	dst = (struct sockaddr_in *)(dstext + 1);
6474 	if (dst->sin_family == AF_INET6) {
6475 		dst6 = (struct sockaddr_in6 *)dst;
6476 		src6 = (struct sockaddr_in6 *)(srcext + 1);
6477 		if (src6->sin6_family != AF_INET6) {
6478 			*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6479 			return (EINVAL);
6480 		}
6481 		sel->ips_remote_addr_v6 = dst6->sin6_addr;
6482 		sel->ips_local_addr_v6 = src6->sin6_addr;
6483 		if (sel->ips_protocol == IPPROTO_ICMPV6) {
6484 			sel->ips_is_icmp_inv_acq = 1;
6485 		} else {
6486 			sel->ips_remote_port = dst6->sin6_port;
6487 			sel->ips_local_port = src6->sin6_port;
6488 		}
6489 		sel->ips_isv4 = B_FALSE;
6490 	} else {
6491 		src = (struct sockaddr_in *)(srcext + 1);
6492 		if (src->sin_family != AF_INET) {
6493 			*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6494 			return (EINVAL);
6495 		}
6496 		sel->ips_remote_addr_v4 = dst->sin_addr.s_addr;
6497 		sel->ips_local_addr_v4 = src->sin_addr.s_addr;
6498 		if (sel->ips_protocol == IPPROTO_ICMP) {
6499 			sel->ips_is_icmp_inv_acq = 1;
6500 		} else {
6501 			sel->ips_remote_port = dst->sin_port;
6502 			sel->ips_local_port = src->sin_port;
6503 		}
6504 		sel->ips_isv4 = B_TRUE;
6505 	}
6506 	return (0);
6507 }
6508 
6509 /*
6510  * We have encapsulation.
6511  * - Lookup tun_t by address and look for an associated
6512  *   tunnel policy
6513  * - If there are inner selectors
6514  *   - check ITPF_P_TUNNEL and ITPF_P_ACTIVE
6515  *   - Look up tunnel policy based on selectors
6516  * - Else
6517  *   - Sanity check the negotation
6518  *   - If appropriate, fall through to global policy
6519  */
6520 static int
6521 ipsec_tun_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6522     sadb_address_t *innsrcext, sadb_address_t *inndstext, ipsec_tun_pol_t *itp,
6523     int *diagnostic, netstack_t *ns)
6524 {
6525 	int err;
6526 	ipsec_policy_head_t *polhead;
6527 
6528 	/* Check for inner selectors and act appropriately */
6529 
6530 	if (innsrcext != NULL) {
6531 		/* Inner selectors present */
6532 		ASSERT(inndstext != NULL);
6533 		if ((itp == NULL) ||
6534 		    (itp->itp_flags & (ITPF_P_ACTIVE | ITPF_P_TUNNEL)) !=
6535 		    (ITPF_P_ACTIVE | ITPF_P_TUNNEL)) {
6536 			/*
6537 			 * If inner packet selectors, we must have negotiate
6538 			 * tunnel and active policy.  If the tunnel has
6539 			 * transport-mode policy set on it, or has no policy,
6540 			 * fail.
6541 			 */
6542 			return (ENOENT);
6543 		} else {
6544 			/*
6545 			 * Reset "sel" to indicate inner selectors.  Pass
6546 			 * inner PF_KEY address extensions for this to happen.
6547 			 */
6548 			err = ipsec_get_inverse_acquire_sel(sel,
6549 			    innsrcext, inndstext, diagnostic);
6550 			if (err != 0) {
6551 				ITP_REFRELE(itp, ns);
6552 				return (err);
6553 			}
6554 			/*
6555 			 * Now look for a tunnel policy based on those inner
6556 			 * selectors.  (Common code is below.)
6557 			 */
6558 		}
6559 	} else {
6560 		/* No inner selectors present */
6561 		if ((itp == NULL) || !(itp->itp_flags & ITPF_P_ACTIVE)) {
6562 			/*
6563 			 * Transport mode negotiation with no tunnel policy
6564 			 * configured - return to indicate a global policy
6565 			 * check is needed.
6566 			 */
6567 			if (itp != NULL) {
6568 				ITP_REFRELE(itp, ns);
6569 			}
6570 			return (0);
6571 		} else if (itp->itp_flags & ITPF_P_TUNNEL) {
6572 			/* Tunnel mode set with no inner selectors. */
6573 			ITP_REFRELE(itp, ns);
6574 			return (ENOENT);
6575 		}
6576 		/*
6577 		 * Else, this is a tunnel policy configured with ifconfig(1m)
6578 		 * or "negotiate transport" with ipsecconf(1m).  We have an
6579 		 * itp with policy set based on any match, so don't bother
6580 		 * changing fields in "sel".
6581 		 */
6582 	}
6583 
6584 	ASSERT(itp != NULL);
6585 	polhead = itp->itp_policy;
6586 	ASSERT(polhead != NULL);
6587 	rw_enter(&polhead->iph_lock, RW_READER);
6588 	*ppp = ipsec_find_policy_head(NULL, polhead,
6589 	    IPSEC_TYPE_INBOUND, sel, ns);
6590 	rw_exit(&polhead->iph_lock);
6591 	ITP_REFRELE(itp, ns);
6592 
6593 	/*
6594 	 * Don't default to global if we didn't find a matching policy entry.
6595 	 * Instead, send ENOENT, just like if we hit a transport-mode tunnel.
6596 	 */
6597 	if (*ppp == NULL)
6598 		return (ENOENT);
6599 
6600 	return (0);
6601 }
6602 
6603 static void
6604 ipsec_oth_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6605     ip_stack_t *ipst)
6606 {
6607 	boolean_t	isv4 = sel->ips_isv4;
6608 	connf_t		*connfp;
6609 	conn_t		*connp;
6610 
6611 	if (isv4) {
6612 		connfp = &ipst->ips_ipcl_proto_fanout[sel->ips_protocol];
6613 	} else {
6614 		connfp = &ipst->ips_ipcl_proto_fanout_v6[sel->ips_protocol];
6615 	}
6616 
6617 	mutex_enter(&connfp->connf_lock);
6618 	for (connp = connfp->connf_head; connp != NULL;
6619 	    connp = connp->conn_next) {
6620 		if (!((isv4 && !((connp->conn_src == 0 ||
6621 		    connp->conn_src == sel->ips_local_addr_v4) &&
6622 		    (connp->conn_rem == 0 ||
6623 		    connp->conn_rem == sel->ips_remote_addr_v4))) ||
6624 		    (!isv4 && !((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) ||
6625 		    IN6_ARE_ADDR_EQUAL(&connp->conn_srcv6,
6626 		    &sel->ips_local_addr_v6)) &&
6627 		    (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6) ||
6628 		    IN6_ARE_ADDR_EQUAL(&connp->conn_remv6,
6629 		    &sel->ips_remote_addr_v6)))))) {
6630 			break;
6631 		}
6632 	}
6633 	if (connp == NULL) {
6634 		mutex_exit(&connfp->connf_lock);
6635 		return;
6636 	}
6637 
6638 	CONN_INC_REF(connp);
6639 	mutex_exit(&connfp->connf_lock);
6640 
6641 	ipsec_conn_pol(sel, connp, ppp);
6642 }
6643 
6644 /*
6645  * Construct an inverse ACQUIRE reply based on:
6646  *
6647  * 1.) Current global policy.
6648  * 2.) An conn_t match depending on what all was passed in the extv[].
6649  * 3.) A tunnel's policy head.
6650  * ...
6651  * N.) Other stuff TBD (e.g. identities)
6652  *
6653  * If there is an error, set sadb_msg_errno and sadb_x_msg_diagnostic
6654  * in this function so the caller can extract them where appropriately.
6655  *
6656  * The SRC address is the local one - just like an outbound ACQUIRE message.
6657  */
6658 mblk_t *
6659 ipsec_construct_inverse_acquire(sadb_msg_t *samsg, sadb_ext_t *extv[],
6660     netstack_t *ns)
6661 {
6662 	int err;
6663 	int diagnostic;
6664 	sadb_address_t *srcext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_SRC],
6665 	    *dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST],
6666 	    *innsrcext = (sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_SRC],
6667 	    *inndstext = (sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_DST];
6668 	struct sockaddr_in6 *src, *dst;
6669 	struct sockaddr_in6 *isrc, *idst;
6670 	ipsec_tun_pol_t *itp = NULL;
6671 	ipsec_policy_t *pp = NULL;
6672 	ipsec_selector_t sel, isel;
6673 	mblk_t *retmp;
6674 	ip_stack_t	*ipst = ns->netstack_ip;
6675 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
6676 
6677 	/* Normalize addresses */
6678 	if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)srcext, 0, ns)
6679 	    == KS_IN_ADDR_UNKNOWN) {
6680 		err = EINVAL;
6681 		diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC;
6682 		goto bail;
6683 	}
6684 	src = (struct sockaddr_in6 *)(srcext + 1);
6685 	if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)dstext, 0, ns)
6686 	    == KS_IN_ADDR_UNKNOWN) {
6687 		err = EINVAL;
6688 		diagnostic = SADB_X_DIAGNOSTIC_BAD_DST;
6689 		goto bail;
6690 	}
6691 	dst = (struct sockaddr_in6 *)(dstext + 1);
6692 	if (src->sin6_family != dst->sin6_family) {
6693 		err = EINVAL;
6694 		diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6695 		goto bail;
6696 	}
6697 
6698 	/* Check for tunnel mode and act appropriately */
6699 	if (innsrcext != NULL) {
6700 		if (inndstext == NULL) {
6701 			err = EINVAL;
6702 			diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST;
6703 			goto bail;
6704 		}
6705 		if (sadb_addrcheck(NULL, (mblk_t *)samsg,
6706 		    (sadb_ext_t *)innsrcext, 0, ns) == KS_IN_ADDR_UNKNOWN) {
6707 			err = EINVAL;
6708 			diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_SRC;
6709 			goto bail;
6710 		}
6711 		isrc = (struct sockaddr_in6 *)(innsrcext + 1);
6712 		if (sadb_addrcheck(NULL, (mblk_t *)samsg,
6713 		    (sadb_ext_t *)inndstext, 0, ns) == KS_IN_ADDR_UNKNOWN) {
6714 			err = EINVAL;
6715 			diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_DST;
6716 			goto bail;
6717 		}
6718 		idst = (struct sockaddr_in6 *)(inndstext + 1);
6719 		if (isrc->sin6_family != idst->sin6_family) {
6720 			err = EINVAL;
6721 			diagnostic = SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH;
6722 			goto bail;
6723 		}
6724 		if (isrc->sin6_family != AF_INET &&
6725 		    isrc->sin6_family != AF_INET6) {
6726 			err = EINVAL;
6727 			diagnostic = SADB_X_DIAGNOSTIC_BAD_INNER_SRC_AF;
6728 			goto bail;
6729 		}
6730 	} else if (inndstext != NULL) {
6731 		err = EINVAL;
6732 		diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
6733 		goto bail;
6734 	}
6735 
6736 	/* Get selectors first, based on outer addresses */
6737 	err = ipsec_get_inverse_acquire_sel(&sel, srcext, dstext, &diagnostic);
6738 	if (err != 0)
6739 		goto bail;
6740 
6741 	/* Check for tunnel mode mismatches. */
6742 	if (innsrcext != NULL &&
6743 	    ((isrc->sin6_family == AF_INET &&
6744 	    sel.ips_protocol != IPPROTO_ENCAP && sel.ips_protocol != 0) ||
6745 	    (isrc->sin6_family == AF_INET6 &&
6746 	    sel.ips_protocol != IPPROTO_IPV6 && sel.ips_protocol != 0))) {
6747 		err = EPROTOTYPE;
6748 		goto bail;
6749 	}
6750 
6751 	/*
6752 	 * Okay, we have the addresses and other selector information.
6753 	 * Let's first find a conn...
6754 	 */
6755 	pp = NULL;
6756 	switch (sel.ips_protocol) {
6757 	case IPPROTO_TCP:
6758 		ipsec_tcp_pol(&sel, &pp, ipst);
6759 		break;
6760 	case IPPROTO_UDP:
6761 		ipsec_udp_pol(&sel, &pp, ipst);
6762 		break;
6763 	case IPPROTO_SCTP:
6764 		ipsec_sctp_pol(&sel, &pp, ipst);
6765 		break;
6766 	case IPPROTO_ENCAP:
6767 	case IPPROTO_IPV6:
6768 		rw_enter(&ipss->ipsec_itp_get_byaddr_rw_lock, RW_READER);
6769 		/*
6770 		 * Assume sel.ips_remote_addr_* has the right address at
6771 		 * that exact position.
6772 		 */
6773 		itp = ipss->ipsec_itp_get_byaddr(
6774 		    (uint32_t *)(&sel.ips_local_addr_v6),
6775 		    (uint32_t *)(&sel.ips_remote_addr_v6),
6776 		    src->sin6_family, ns);
6777 		rw_exit(&ipss->ipsec_itp_get_byaddr_rw_lock);
6778 		if (innsrcext == NULL) {
6779 			/*
6780 			 * Transport-mode tunnel, make sure we fake out isel
6781 			 * to contain something based on the outer protocol.
6782 			 */
6783 			bzero(&isel, sizeof (isel));
6784 			isel.ips_isv4 = (sel.ips_protocol == IPPROTO_ENCAP);
6785 		} /* Else isel is initialized by ipsec_tun_pol(). */
6786 		err = ipsec_tun_pol(&isel, &pp, innsrcext, inndstext, itp,
6787 		    &diagnostic, ns);
6788 		/*
6789 		 * NOTE:  isel isn't used for now, but in RFC 430x IPsec, it
6790 		 * may be.
6791 		 */
6792 		if (err != 0)
6793 			goto bail;
6794 		break;
6795 	default:
6796 		ipsec_oth_pol(&sel, &pp, ipst);
6797 		break;
6798 	}
6799 
6800 	/*
6801 	 * If we didn't find a matching conn_t or other policy head, take a
6802 	 * look in the global policy.
6803 	 */
6804 	if (pp == NULL) {
6805 		pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, NULL, NULL, &sel,
6806 		    ns);
6807 		if (pp == NULL) {
6808 			/* There's no global policy. */
6809 			err = ENOENT;
6810 			diagnostic = 0;
6811 			goto bail;
6812 		}
6813 	}
6814 
6815 	/*
6816 	 * Now that we have a policy entry/widget, construct an ACQUIRE
6817 	 * message based on that, fix fields where appropriate,
6818 	 * and return the message.
6819 	 */
6820 	retmp = sadb_extended_acquire(&sel, pp, NULL,
6821 	    (itp != NULL && (itp->itp_flags & ITPF_P_TUNNEL)),
6822 	    samsg->sadb_msg_seq, samsg->sadb_msg_pid, ns);
6823 	if (pp != NULL) {
6824 		IPPOL_REFRELE(pp, ns);
6825 	}
6826 	if (retmp != NULL) {
6827 		return (retmp);
6828 	} else {
6829 		err = ENOMEM;
6830 		diagnostic = 0;
6831 	}
6832 bail:
6833 	samsg->sadb_msg_errno = (uint8_t)err;
6834 	samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
6835 	return (NULL);
6836 }
6837 
6838 /*
6839  * ipsa_lpkt is a one-element queue, only manipulated by casptr within
6840  * the next two functions.
6841  *
6842  * These functions loop calling casptr() until the swap "happens",
6843  * turning a compare-and-swap op into an atomic swap operation.
6844  */
6845 
6846 /*
6847  * sadb_set_lpkt: Atomically swap in a value to ipsa->ipsa_lpkt and
6848  * freemsg the previous value.  free clue: freemsg(NULL) is safe.
6849  */
6850 
6851 void
6852 sadb_set_lpkt(ipsa_t *ipsa, mblk_t *npkt, netstack_t *ns)
6853 {
6854 	mblk_t *opkt;
6855 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
6856 
6857 	ASSERT(ipsa->ipsa_state == IPSA_STATE_LARVAL);
6858 
6859 	membar_producer();
6860 	do {
6861 		opkt = ipsa->ipsa_lpkt;
6862 	} while (casptr(&ipsa->ipsa_lpkt, opkt, npkt) != opkt);
6863 
6864 	ip_drop_packet(opkt, B_TRUE, NULL, NULL,
6865 	    DROPPER(ipss, ipds_sadb_inlarval_replace),
6866 	    &ipss->ipsec_sadb_dropper);
6867 }
6868 
6869 /*
6870  * sadb_clear_lpkt: Atomically clear ipsa->ipsa_lpkt and return the
6871  * previous value.
6872  */
6873 
6874 mblk_t *
6875 sadb_clear_lpkt(ipsa_t *ipsa)
6876 {
6877 	mblk_t *opkt;
6878 
6879 	do {
6880 		opkt = ipsa->ipsa_lpkt;
6881 	} while (casptr(&ipsa->ipsa_lpkt, opkt, NULL) != opkt);
6882 
6883 	return (opkt);
6884 }
6885 
6886 /*
6887  * Buffer a packet that's in IDLE state as set by Solaris Clustering.
6888  */
6889 void
6890 sadb_buf_pkt(ipsa_t *ipsa, mblk_t *bpkt, netstack_t *ns)
6891 {
6892 	ipsec_stack_t   *ipss = ns->netstack_ipsec;
6893 	extern void (*cl_inet_idlesa)(uint8_t, uint32_t, sa_family_t,
6894 	    in6_addr_t, in6_addr_t);
6895 	in6_addr_t *srcaddr = (in6_addr_t *)(&ipsa->ipsa_srcaddr);
6896 	in6_addr_t *dstaddr = (in6_addr_t *)(&ipsa->ipsa_dstaddr);
6897 
6898 	ASSERT(ipsa->ipsa_state == IPSA_STATE_IDLE);
6899 
6900 	if (cl_inet_idlesa == NULL) {
6901 		ip_drop_packet(bpkt, B_TRUE, NULL, NULL,
6902 		    DROPPER(ipss, ipds_sadb_inidle_overflow),
6903 		    &ipss->ipsec_sadb_dropper);
6904 		return;
6905 	}
6906 
6907 	cl_inet_idlesa((ipsa->ipsa_type == SADB_SATYPE_AH) ?
6908 	    IPPROTO_AH : IPPROTO_ESP, ipsa->ipsa_spi, ipsa->ipsa_addrfam,
6909 	    *srcaddr, *dstaddr);
6910 
6911 	mutex_enter(&ipsa->ipsa_lock);
6912 	ipsa->ipsa_mblkcnt++;
6913 	if (ipsa->ipsa_bpkt_head == NULL) {
6914 		ipsa->ipsa_bpkt_head = ipsa->ipsa_bpkt_tail = bpkt;
6915 	} else {
6916 		ipsa->ipsa_bpkt_tail->b_next = bpkt;
6917 		ipsa->ipsa_bpkt_tail = bpkt;
6918 		if (ipsa->ipsa_mblkcnt > SADB_MAX_IDLEPKTS) {
6919 			mblk_t *tmp;
6920 			tmp = ipsa->ipsa_bpkt_head;
6921 			ipsa->ipsa_bpkt_head = ipsa->ipsa_bpkt_head->b_next;
6922 			ip_drop_packet(tmp, B_TRUE, NULL, NULL,
6923 			    DROPPER(ipss, ipds_sadb_inidle_overflow),
6924 			    &ipss->ipsec_sadb_dropper);
6925 			ipsa->ipsa_mblkcnt --;
6926 		}
6927 	}
6928 	mutex_exit(&ipsa->ipsa_lock);
6929 
6930 }
6931 
6932 /*
6933  * Stub function that taskq_dispatch() invokes to take the mblk (in arg)
6934  * and put into STREAMS again.
6935  */
6936 void
6937 sadb_clear_buf_pkt(void *ipkt)
6938 {
6939 	mblk_t	*tmp, *buf_pkt;
6940 
6941 	buf_pkt = (mblk_t *)ipkt;
6942 
6943 	while (buf_pkt != NULL) {
6944 		tmp = buf_pkt->b_next;
6945 		buf_pkt->b_next = NULL;
6946 		ip_fanout_proto_again(buf_pkt, NULL, NULL, NULL);
6947 		buf_pkt = tmp;
6948 	}
6949 }
6950 /*
6951  * Walker callback used by sadb_alg_update() to free/create crypto
6952  * context template when a crypto software provider is removed or
6953  * added.
6954  */
6955 
6956 struct sadb_update_alg_state {
6957 	ipsec_algtype_t alg_type;
6958 	uint8_t alg_id;
6959 	boolean_t is_added;
6960 };
6961 
6962 static void
6963 sadb_alg_update_cb(isaf_t *head, ipsa_t *entry, void *cookie)
6964 {
6965 	struct sadb_update_alg_state *update_state =
6966 	    (struct sadb_update_alg_state *)cookie;
6967 	crypto_ctx_template_t *ctx_tmpl = NULL;
6968 
6969 	ASSERT(MUTEX_HELD(&head->isaf_lock));
6970 
6971 	if (entry->ipsa_state == IPSA_STATE_LARVAL)
6972 		return;
6973 
6974 	mutex_enter(&entry->ipsa_lock);
6975 
6976 	switch (update_state->alg_type) {
6977 	case IPSEC_ALG_AUTH:
6978 		if (entry->ipsa_auth_alg == update_state->alg_id)
6979 			ctx_tmpl = &entry->ipsa_authtmpl;
6980 		break;
6981 	case IPSEC_ALG_ENCR:
6982 		if (entry->ipsa_encr_alg == update_state->alg_id)
6983 			ctx_tmpl = &entry->ipsa_encrtmpl;
6984 		break;
6985 	default:
6986 		ctx_tmpl = NULL;
6987 	}
6988 
6989 	if (ctx_tmpl == NULL) {
6990 		mutex_exit(&entry->ipsa_lock);
6991 		return;
6992 	}
6993 
6994 	/*
6995 	 * The context template of the SA may be affected by the change
6996 	 * of crypto provider.
6997 	 */
6998 	if (update_state->is_added) {
6999 		/* create the context template if not already done */
7000 		if (*ctx_tmpl == NULL) {
7001 			(void) ipsec_create_ctx_tmpl(entry,
7002 			    update_state->alg_type);
7003 		}
7004 	} else {
7005 		/*
7006 		 * The crypto provider was removed. If the context template
7007 		 * exists but it is no longer valid, free it.
7008 		 */
7009 		if (*ctx_tmpl != NULL)
7010 			ipsec_destroy_ctx_tmpl(entry, update_state->alg_type);
7011 	}
7012 
7013 	mutex_exit(&entry->ipsa_lock);
7014 }
7015 
7016 /*
7017  * Invoked by IP when an software crypto provider has been updated.
7018  * The type and id of the corresponding algorithm is passed as argument.
7019  * is_added is B_TRUE if the provider was added, B_FALSE if it was
7020  * removed. The function updates the SADB and free/creates the
7021  * context templates associated with SAs if needed.
7022  */
7023 
7024 #define	SADB_ALG_UPDATE_WALK(sadb, table) \
7025     sadb_walker((sadb).table, (sadb).sdb_hashsize, sadb_alg_update_cb, \
7026 	&update_state)
7027 
7028 void
7029 sadb_alg_update(ipsec_algtype_t alg_type, uint8_t alg_id, boolean_t is_added,
7030     netstack_t *ns)
7031 {
7032 	struct sadb_update_alg_state update_state;
7033 	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
7034 	ipsecesp_stack_t	*espstack = ns->netstack_ipsecesp;
7035 
7036 	update_state.alg_type = alg_type;
7037 	update_state.alg_id = alg_id;
7038 	update_state.is_added = is_added;
7039 
7040 	if (alg_type == IPSEC_ALG_AUTH) {
7041 		/* walk the AH tables only for auth. algorithm changes */
7042 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v4, sdb_of);
7043 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v4, sdb_if);
7044 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v6, sdb_of);
7045 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v6, sdb_if);
7046 	}
7047 
7048 	/* walk the ESP tables */
7049 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v4, sdb_of);
7050 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v4, sdb_if);
7051 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v6, sdb_of);
7052 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v6, sdb_if);
7053 }
7054 
7055 /*
7056  * Creates a context template for the specified SA. This function
7057  * is called when an SA is created and when a context template needs
7058  * to be created due to a change of software provider.
7059  */
7060 int
7061 ipsec_create_ctx_tmpl(ipsa_t *sa, ipsec_algtype_t alg_type)
7062 {
7063 	ipsec_alginfo_t *alg;
7064 	crypto_mechanism_t mech;
7065 	crypto_key_t *key;
7066 	crypto_ctx_template_t *sa_tmpl;
7067 	int rv;
7068 	ipsec_stack_t	*ipss = sa->ipsa_netstack->netstack_ipsec;
7069 
7070 	ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock));
7071 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
7072 
7073 	/* get pointers to the algorithm info, context template, and key */
7074 	switch (alg_type) {
7075 	case IPSEC_ALG_AUTH:
7076 		key = &sa->ipsa_kcfauthkey;
7077 		sa_tmpl = &sa->ipsa_authtmpl;
7078 		alg = ipss->ipsec_alglists[alg_type][sa->ipsa_auth_alg];
7079 		break;
7080 	case IPSEC_ALG_ENCR:
7081 		key = &sa->ipsa_kcfencrkey;
7082 		sa_tmpl = &sa->ipsa_encrtmpl;
7083 		alg = ipss->ipsec_alglists[alg_type][sa->ipsa_encr_alg];
7084 		break;
7085 	default:
7086 		alg = NULL;
7087 	}
7088 
7089 	if (alg == NULL || !ALG_VALID(alg))
7090 		return (EINVAL);
7091 
7092 	/* initialize the mech info structure for the framework */
7093 	ASSERT(alg->alg_mech_type != CRYPTO_MECHANISM_INVALID);
7094 	mech.cm_type = alg->alg_mech_type;
7095 	mech.cm_param = NULL;
7096 	mech.cm_param_len = 0;
7097 
7098 	/* create a new context template */
7099 	rv = crypto_create_ctx_template(&mech, key, sa_tmpl, KM_NOSLEEP);
7100 
7101 	/*
7102 	 * CRYPTO_MECH_NOT_SUPPORTED can be returned if only hardware
7103 	 * providers are available for that mechanism. In that case
7104 	 * we don't fail, and will generate the context template from
7105 	 * the framework callback when a software provider for that
7106 	 * mechanism registers.
7107 	 *
7108 	 * The context template is assigned the special value
7109 	 * IPSEC_CTX_TMPL_ALLOC if the allocation failed due to a
7110 	 * lack of memory. No attempt will be made to use
7111 	 * the context template if it is set to this value.
7112 	 */
7113 	if (rv == CRYPTO_HOST_MEMORY) {
7114 		*sa_tmpl = IPSEC_CTX_TMPL_ALLOC;
7115 	} else if (rv != CRYPTO_SUCCESS) {
7116 		*sa_tmpl = NULL;
7117 		if (rv != CRYPTO_MECH_NOT_SUPPORTED)
7118 			return (EINVAL);
7119 	}
7120 
7121 	return (0);
7122 }
7123 
7124 /*
7125  * Destroy the context template of the specified algorithm type
7126  * of the specified SA. Must be called while holding the SA lock.
7127  */
7128 void
7129 ipsec_destroy_ctx_tmpl(ipsa_t *sa, ipsec_algtype_t alg_type)
7130 {
7131 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
7132 
7133 	if (alg_type == IPSEC_ALG_AUTH) {
7134 		if (sa->ipsa_authtmpl == IPSEC_CTX_TMPL_ALLOC)
7135 			sa->ipsa_authtmpl = NULL;
7136 		else if (sa->ipsa_authtmpl != NULL) {
7137 			crypto_destroy_ctx_template(sa->ipsa_authtmpl);
7138 			sa->ipsa_authtmpl = NULL;
7139 		}
7140 	} else {
7141 		ASSERT(alg_type == IPSEC_ALG_ENCR);
7142 		if (sa->ipsa_encrtmpl == IPSEC_CTX_TMPL_ALLOC)
7143 			sa->ipsa_encrtmpl = NULL;
7144 		else if (sa->ipsa_encrtmpl != NULL) {
7145 			crypto_destroy_ctx_template(sa->ipsa_encrtmpl);
7146 			sa->ipsa_encrtmpl = NULL;
7147 		}
7148 	}
7149 }
7150 
7151 /*
7152  * Use the kernel crypto framework to check the validity of a key received
7153  * via keysock. Returns 0 if the key is OK, -1 otherwise.
7154  */
7155 int
7156 ipsec_check_key(crypto_mech_type_t mech_type, sadb_key_t *sadb_key,
7157     boolean_t is_auth, int *diag)
7158 {
7159 	crypto_mechanism_t mech;
7160 	crypto_key_t crypto_key;
7161 	int crypto_rc;
7162 
7163 	mech.cm_type = mech_type;
7164 	mech.cm_param = NULL;
7165 	mech.cm_param_len = 0;
7166 
7167 	crypto_key.ck_format = CRYPTO_KEY_RAW;
7168 	crypto_key.ck_data = sadb_key + 1;
7169 	crypto_key.ck_length = sadb_key->sadb_key_bits;
7170 
7171 	crypto_rc = crypto_key_check(&mech, &crypto_key);
7172 
7173 	switch (crypto_rc) {
7174 	case CRYPTO_SUCCESS:
7175 		return (0);
7176 	case CRYPTO_MECHANISM_INVALID:
7177 	case CRYPTO_MECH_NOT_SUPPORTED:
7178 		*diag = is_auth ? SADB_X_DIAGNOSTIC_BAD_AALG :
7179 		    SADB_X_DIAGNOSTIC_BAD_EALG;
7180 		break;
7181 	case CRYPTO_KEY_SIZE_RANGE:
7182 		*diag = is_auth ? SADB_X_DIAGNOSTIC_BAD_AKEYBITS :
7183 		    SADB_X_DIAGNOSTIC_BAD_EKEYBITS;
7184 		break;
7185 	case CRYPTO_WEAK_KEY:
7186 		*diag = is_auth ? SADB_X_DIAGNOSTIC_WEAK_AKEY :
7187 		    SADB_X_DIAGNOSTIC_WEAK_EKEY;
7188 		break;
7189 	}
7190 
7191 	return (-1);
7192 }
7193 /*
7194  * If this is an outgoing SA then add some fuzz to the
7195  * SOFT EXPIRE time. The reason for this is to stop
7196  * peers trying to renegotiate SOFT expiring SA's at
7197  * the same time. The amount of fuzz needs to be at
7198  * least 10 seconds which is the typical interval
7199  * sadb_ager(), although this is only a guide as it
7200  * selftunes.
7201  */
7202 void
7203 lifetime_fuzz(ipsa_t *assoc)
7204 {
7205 	uint8_t rnd;
7206 
7207 	if (assoc->ipsa_softaddlt == 0)
7208 		return;
7209 
7210 	(void) random_get_pseudo_bytes(&rnd, sizeof (rnd));
7211 	rnd = (rnd & 0xF) + 10;
7212 	assoc->ipsa_softexpiretime -= rnd;
7213 	assoc->ipsa_softaddlt -= rnd;
7214 }
7215 void
7216 destroy_ipsa_pair(ipsap_t *ipsapp)
7217 {
7218 	if (ipsapp == NULL)
7219 		return;
7220 
7221 	/*
7222 	 * Because of the multi-line macro nature of IPSA_REFRELE, keep
7223 	 * them in { }.
7224 	 */
7225 	if (ipsapp->ipsap_sa_ptr != NULL) {
7226 		IPSA_REFRELE(ipsapp->ipsap_sa_ptr);
7227 	}
7228 	if (ipsapp->ipsap_psa_ptr != NULL) {
7229 		IPSA_REFRELE(ipsapp->ipsap_psa_ptr);
7230 	}
7231 
7232 	kmem_free(ipsapp, sizeof (*ipsapp));
7233 }
7234 
7235 /*
7236  * The sadb_ager() function walks through the hash tables of SA's and ages
7237  * them, if the SA expires as a result, its marked as DEAD and will be reaped
7238  * the next time sadb_ager() runs. SA's which are paired or have a peer (same
7239  * SA appears in both the inbound and outbound tables because its not possible
7240  * to determine its direction) are placed on a list when they expire. This is
7241  * to ensure that pair/peer SA's are reaped at the same time, even if they
7242  * expire at different times.
7243  *
7244  * This function is called twice by sadb_ager(), one after processing the
7245  * inbound table, then again after processing the outbound table.
7246  */
7247 void
7248 age_pair_peer_list(templist_t *haspeerlist, sadb_t *sp, boolean_t outbound)
7249 {
7250 	templist_t *listptr;
7251 	int outhash;
7252 	isaf_t *bucket;
7253 	boolean_t haspeer;
7254 	ipsa_t *peer_assoc, *dying;
7255 	/*
7256 	 * Haspeer cases will contain both IPv4 and IPv6.  This code
7257 	 * is address independent.
7258 	 */
7259 	while (haspeerlist != NULL) {
7260 		/* "dying" contains the SA that has a peer. */
7261 		dying = haspeerlist->ipsa;
7262 		haspeer = (dying->ipsa_haspeer);
7263 		listptr = haspeerlist;
7264 		haspeerlist = listptr->next;
7265 		kmem_free(listptr, sizeof (*listptr));
7266 		/*
7267 		 * Pick peer bucket based on addrfam.
7268 		 */
7269 		if (outbound) {
7270 			if (haspeer)
7271 				bucket = INBOUND_BUCKET(sp, dying->ipsa_spi);
7272 			else
7273 				bucket = INBOUND_BUCKET(sp,
7274 				    dying->ipsa_otherspi);
7275 		} else { /* inbound */
7276 			if (haspeer) {
7277 				if (dying->ipsa_addrfam == AF_INET6) {
7278 					outhash = OUTBOUND_HASH_V6(sp,
7279 					    *((in6_addr_t *)&dying->
7280 					    ipsa_dstaddr));
7281 				} else {
7282 					outhash = OUTBOUND_HASH_V4(sp,
7283 					    *((ipaddr_t *)&dying->
7284 					    ipsa_dstaddr));
7285 				}
7286 			} else if (dying->ipsa_addrfam == AF_INET6) {
7287 				outhash = OUTBOUND_HASH_V6(sp,
7288 				    *((in6_addr_t *)&dying->
7289 				    ipsa_srcaddr));
7290 			} else {
7291 				outhash = OUTBOUND_HASH_V4(sp,
7292 				    *((ipaddr_t *)&dying->
7293 				    ipsa_srcaddr));
7294 			}
7295 		bucket = &(sp->sdb_of[outhash]);
7296 		}
7297 
7298 		mutex_enter(&bucket->isaf_lock);
7299 		/*
7300 		 * "haspeer" SA's have the same src/dst address ordering,
7301 		 * "paired" SA's have the src/dst addresses reversed.
7302 		 */
7303 		if (haspeer) {
7304 			peer_assoc = ipsec_getassocbyspi(bucket,
7305 			    dying->ipsa_spi, dying->ipsa_srcaddr,
7306 			    dying->ipsa_dstaddr, dying->ipsa_addrfam);
7307 		} else {
7308 			peer_assoc = ipsec_getassocbyspi(bucket,
7309 			    dying->ipsa_otherspi, dying->ipsa_dstaddr,
7310 			    dying->ipsa_srcaddr, dying->ipsa_addrfam);
7311 		}
7312 
7313 		mutex_exit(&bucket->isaf_lock);
7314 		if (peer_assoc != NULL) {
7315 			mutex_enter(&peer_assoc->ipsa_lock);
7316 			mutex_enter(&dying->ipsa_lock);
7317 			if (!haspeer) {
7318 				/*
7319 				 * Only SA's which have a "peer" or are
7320 				 * "paired" end up on this list, so this
7321 				 * must be a "paired" SA, update the flags
7322 				 * to break the pair.
7323 				 */
7324 				peer_assoc->ipsa_otherspi = 0;
7325 				peer_assoc->ipsa_flags &= ~IPSA_F_PAIRED;
7326 				dying->ipsa_otherspi = 0;
7327 				dying->ipsa_flags &= ~IPSA_F_PAIRED;
7328 			}
7329 			if (haspeer || outbound) {
7330 				/*
7331 				 * Update the state of the "inbound" SA when
7332 				 * the "outbound" SA has expired. Don't update
7333 				 * the "outbound" SA when the "inbound" SA
7334 				 * SA expires because setting the hard_addtime
7335 				 * below will cause this to happen.
7336 				 */
7337 				peer_assoc->ipsa_state = dying->ipsa_state;
7338 			}
7339 			if (dying->ipsa_state == IPSA_STATE_DEAD)
7340 				peer_assoc->ipsa_hardexpiretime = 1;
7341 
7342 			mutex_exit(&dying->ipsa_lock);
7343 			mutex_exit(&peer_assoc->ipsa_lock);
7344 			IPSA_REFRELE(peer_assoc);
7345 		}
7346 		IPSA_REFRELE(dying);
7347 	}
7348 }
7349