xref: /titanic_51/usr/src/uts/common/inet/ip/sadb.c (revision a1d92fe4831144630aa0b262cde1629785c37f23)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/stream.h>
30 #include <sys/stropts.h>
31 #include <sys/errno.h>
32 #include <sys/ddi.h>
33 #include <sys/debug.h>
34 #include <sys/cmn_err.h>
35 #include <sys/stream.h>
36 #include <sys/strlog.h>
37 #include <sys/kmem.h>
38 #include <sys/sunddi.h>
39 #include <sys/tihdr.h>
40 #include <sys/atomic.h>
41 #include <sys/socket.h>
42 #include <sys/sysmacros.h>
43 #include <sys/crypto/common.h>
44 #include <sys/crypto/api.h>
45 #include <sys/zone.h>
46 #include <netinet/in.h>
47 #include <net/if.h>
48 #include <net/pfkeyv2.h>
49 #include <inet/common.h>
50 #include <netinet/ip6.h>
51 #include <inet/ip.h>
52 #include <inet/ip_ire.h>
53 #include <inet/ip6.h>
54 #include <inet/ipsec_info.h>
55 #include <inet/tcp.h>
56 #include <inet/sadb.h>
57 #include <inet/ipsec_impl.h>
58 #include <inet/ipsecah.h>
59 #include <inet/ipsecesp.h>
60 #include <sys/random.h>
61 #include <sys/dlpi.h>
62 #include <sys/iphada.h>
63 #include <inet/ip_if.h>
64 #include <inet/ipdrop.h>
65 #include <inet/ipclassifier.h>
66 #include <inet/sctp_ip.h>
67 #include <inet/tun.h>
68 
69 /*
70  * This source file contains Security Association Database (SADB) common
71  * routines.  They are linked in with the AH module.  Since AH has no chance
72  * of falling under export control, it was safe to link it in there.
73  */
74 
75 static mblk_t *sadb_extended_acquire(ipsec_selector_t *, ipsec_policy_t *,
76     ipsec_action_t *, boolean_t, uint32_t, uint32_t, netstack_t *);
77 static void sadb_ill_df(ill_t *, mblk_t *, isaf_t *, int, boolean_t);
78 static ipsa_t *sadb_torch_assoc(isaf_t *, ipsa_t *, boolean_t, mblk_t **);
79 static void sadb_drain_torchq(queue_t *, mblk_t *);
80 static void sadb_destroy_acqlist(iacqf_t **, uint_t, boolean_t,
81 			    netstack_t *);
82 static void sadb_destroy(sadb_t *, netstack_t *);
83 static mblk_t *sadb_sa2msg(ipsa_t *, sadb_msg_t *);
84 
85 static time_t sadb_add_time(time_t, uint64_t);
86 static void lifetime_fuzz(ipsa_t *);
87 static void age_pair_peer_list(templist_t *, sadb_t *, boolean_t);
88 
89 /*
90  * ipsacq_maxpackets is defined here to make it tunable
91  * from /etc/system.
92  */
93 extern uint64_t ipsacq_maxpackets;
94 
95 #define	SET_EXPIRE(sa, delta, exp) {				\
96 	if (((sa)->ipsa_ ## delta) != 0) {				\
97 		(sa)->ipsa_ ## exp = sadb_add_time((sa)->ipsa_addtime,	\
98 			(sa)->ipsa_ ## delta);				\
99 	}								\
100 }
101 
102 #define	UPDATE_EXPIRE(sa, delta, exp) {					\
103 	if (((sa)->ipsa_ ## delta) != 0) {				\
104 		time_t tmp = sadb_add_time((sa)->ipsa_usetime,		\
105 			(sa)->ipsa_ ## delta);				\
106 		if (((sa)->ipsa_ ## exp) == 0)				\
107 			(sa)->ipsa_ ## exp = tmp;			\
108 		else							\
109 			(sa)->ipsa_ ## exp = 				\
110 			    MIN((sa)->ipsa_ ## exp, tmp); 		\
111 	}								\
112 }
113 
114 
115 /* wrap the macro so we can pass it as a function pointer */
116 void
117 sadb_sa_refrele(void *target)
118 {
119 	IPSA_REFRELE(((ipsa_t *)target));
120 }
121 
122 /*
123  * We presume that sizeof (long) == sizeof (time_t) and that time_t is
124  * a signed type.
125  */
126 #define	TIME_MAX LONG_MAX
127 
128 /*
129  * PF_KEY gives us lifetimes in uint64_t seconds.  We presume that
130  * time_t is defined to be a signed type with the same range as
131  * "long".  On ILP32 systems, we thus run the risk of wrapping around
132  * at end of time, as well as "overwrapping" the clock back around
133  * into a seemingly valid but incorrect future date earlier than the
134  * desired expiration.
135  *
136  * In order to avoid odd behavior (either negative lifetimes or loss
137  * of high order bits) when someone asks for bizarrely long SA
138  * lifetimes, we do a saturating add for expire times.
139  *
140  * We presume that ILP32 systems will be past end of support life when
141  * the 32-bit time_t overflows (a dangerous assumption, mind you..).
142  *
143  * On LP64, 2^64 seconds are about 5.8e11 years, at which point we
144  * will hopefully have figured out clever ways to avoid the use of
145  * fixed-sized integers in computation.
146  */
147 static time_t
148 sadb_add_time(time_t base, uint64_t delta)
149 {
150 	time_t sum;
151 
152 	/*
153 	 * Clip delta to the maximum possible time_t value to
154 	 * prevent "overwrapping" back into a shorter-than-desired
155 	 * future time.
156 	 */
157 	if (delta > TIME_MAX)
158 		delta = TIME_MAX;
159 	/*
160 	 * This sum may still overflow.
161 	 */
162 	sum = base + delta;
163 
164 	/*
165 	 * .. so if the result is less than the base, we overflowed.
166 	 */
167 	if (sum < base)
168 		sum = TIME_MAX;
169 
170 	return (sum);
171 }
172 
173 /*
174  * Callers of this function have already created a working security
175  * association, and have found the appropriate table & hash chain.  All this
176  * function does is check duplicates, and insert the SA.  The caller needs to
177  * hold the hash bucket lock and increment the refcnt before insertion.
178  *
179  * Return 0 if success, EEXIST if collision.
180  */
181 #define	SA_UNIQUE_MATCH(sa1, sa2) \
182 	(((sa1)->ipsa_unique_id & (sa1)->ipsa_unique_mask) == \
183 	((sa2)->ipsa_unique_id & (sa2)->ipsa_unique_mask))
184 
185 int
186 sadb_insertassoc(ipsa_t *ipsa, isaf_t *bucket)
187 {
188 	ipsa_t **ptpn = NULL;
189 	ipsa_t *walker;
190 	boolean_t unspecsrc;
191 
192 	ASSERT(MUTEX_HELD(&bucket->isaf_lock));
193 
194 	unspecsrc = IPSA_IS_ADDR_UNSPEC(ipsa->ipsa_srcaddr, ipsa->ipsa_addrfam);
195 
196 	walker = bucket->isaf_ipsa;
197 	ASSERT(walker == NULL || ipsa->ipsa_addrfam == walker->ipsa_addrfam);
198 
199 	/*
200 	 * Find insertion point (pointed to with **ptpn).  Insert at the head
201 	 * of the list unless there's an unspecified source address, then
202 	 * insert it after the last SA with a specified source address.
203 	 *
204 	 * BTW, you'll have to walk the whole chain, matching on {DST, SPI}
205 	 * checking for collisions.
206 	 */
207 
208 	while (walker != NULL) {
209 		if (IPSA_ARE_ADDR_EQUAL(walker->ipsa_dstaddr,
210 		    ipsa->ipsa_dstaddr, ipsa->ipsa_addrfam)) {
211 			if (walker->ipsa_spi == ipsa->ipsa_spi)
212 				return (EEXIST);
213 
214 			mutex_enter(&walker->ipsa_lock);
215 			if (ipsa->ipsa_state == IPSA_STATE_MATURE &&
216 			    (walker->ipsa_flags & IPSA_F_USED) &&
217 			    SA_UNIQUE_MATCH(walker, ipsa)) {
218 				walker->ipsa_flags |= IPSA_F_CINVALID;
219 			}
220 			mutex_exit(&walker->ipsa_lock);
221 		}
222 
223 		if (ptpn == NULL && unspecsrc) {
224 			if (IPSA_IS_ADDR_UNSPEC(walker->ipsa_srcaddr,
225 			    walker->ipsa_addrfam))
226 				ptpn = walker->ipsa_ptpn;
227 			else if (walker->ipsa_next == NULL)
228 				ptpn = &walker->ipsa_next;
229 		}
230 
231 		walker = walker->ipsa_next;
232 	}
233 
234 	if (ptpn == NULL)
235 		ptpn = &bucket->isaf_ipsa;
236 	ipsa->ipsa_next = *ptpn;
237 	ipsa->ipsa_ptpn = ptpn;
238 	if (ipsa->ipsa_next != NULL)
239 		ipsa->ipsa_next->ipsa_ptpn = &ipsa->ipsa_next;
240 	*ptpn = ipsa;
241 	ipsa->ipsa_linklock = &bucket->isaf_lock;
242 
243 	return (0);
244 }
245 #undef SA_UNIQUE_MATCH
246 
247 /*
248  * Free a security association.  Its reference count is 0, which means
249  * I must free it.  The SA must be unlocked and must not be linked into
250  * any fanout list.
251  */
252 static void
253 sadb_freeassoc(ipsa_t *ipsa)
254 {
255 	ipsec_stack_t	*ipss = ipsa->ipsa_netstack->netstack_ipsec;
256 
257 	ASSERT(ipss != NULL);
258 	ASSERT(!MUTEX_HELD(&ipsa->ipsa_lock));
259 	ASSERT(ipsa->ipsa_refcnt == 0);
260 	ASSERT(ipsa->ipsa_next == NULL);
261 	ASSERT(ipsa->ipsa_ptpn == NULL);
262 
263 	ip_drop_packet(sadb_clear_lpkt(ipsa), B_TRUE, NULL, NULL,
264 	    DROPPER(ipss, ipds_sadb_inlarval_timeout),
265 	    &ipss->ipsec_sadb_dropper);
266 
267 	mutex_enter(&ipsa->ipsa_lock);
268 	ipsec_destroy_ctx_tmpl(ipsa, IPSEC_ALG_AUTH);
269 	ipsec_destroy_ctx_tmpl(ipsa, IPSEC_ALG_ENCR);
270 	mutex_exit(&ipsa->ipsa_lock);
271 
272 	/* bzero() these fields for paranoia's sake. */
273 	if (ipsa->ipsa_authkey != NULL) {
274 		bzero(ipsa->ipsa_authkey, ipsa->ipsa_authkeylen);
275 		kmem_free(ipsa->ipsa_authkey, ipsa->ipsa_authkeylen);
276 	}
277 	if (ipsa->ipsa_encrkey != NULL) {
278 		bzero(ipsa->ipsa_encrkey, ipsa->ipsa_encrkeylen);
279 		kmem_free(ipsa->ipsa_encrkey, ipsa->ipsa_encrkeylen);
280 	}
281 	if (ipsa->ipsa_src_cid != NULL) {
282 		IPSID_REFRELE(ipsa->ipsa_src_cid);
283 	}
284 	if (ipsa->ipsa_dst_cid != NULL) {
285 		IPSID_REFRELE(ipsa->ipsa_dst_cid);
286 	}
287 	if (ipsa->ipsa_integ != NULL)
288 		kmem_free(ipsa->ipsa_integ, ipsa->ipsa_integlen);
289 	if (ipsa->ipsa_sens != NULL)
290 		kmem_free(ipsa->ipsa_sens, ipsa->ipsa_senslen);
291 
292 	mutex_destroy(&ipsa->ipsa_lock);
293 	kmem_free(ipsa, sizeof (*ipsa));
294 }
295 
296 /*
297  * Unlink a security association from a hash bucket.  Assume the hash bucket
298  * lock is held, but the association's lock is not.
299  *
300  * Note that we do not bump the bucket's generation number here because
301  * we might not be making a visible change to the set of visible SA's.
302  * All callers MUST bump the bucket's generation number before they unlock
303  * the bucket if they use sadb_unlinkassoc to permanetly remove an SA which
304  * was present in the bucket at the time it was locked.
305  */
306 void
307 sadb_unlinkassoc(ipsa_t *ipsa)
308 {
309 	ASSERT(ipsa->ipsa_linklock != NULL);
310 	ASSERT(MUTEX_HELD(ipsa->ipsa_linklock));
311 
312 	/* These fields are protected by the link lock. */
313 	*(ipsa->ipsa_ptpn) = ipsa->ipsa_next;
314 	if (ipsa->ipsa_next != NULL) {
315 		ipsa->ipsa_next->ipsa_ptpn = ipsa->ipsa_ptpn;
316 		ipsa->ipsa_next = NULL;
317 	}
318 
319 	ipsa->ipsa_ptpn = NULL;
320 
321 	/* This may destroy the SA. */
322 	IPSA_REFRELE(ipsa);
323 }
324 
325 /*
326  * Create a larval security association with the specified SPI.	 All other
327  * fields are zeroed.
328  */
329 static ipsa_t *
330 sadb_makelarvalassoc(uint32_t spi, uint32_t *src, uint32_t *dst, int addrfam,
331     netstack_t *ns)
332 {
333 	ipsa_t *newbie;
334 
335 	/*
336 	 * Allocate...
337 	 */
338 
339 	newbie = (ipsa_t *)kmem_zalloc(sizeof (ipsa_t), KM_NOSLEEP);
340 	if (newbie == NULL) {
341 		/* Can't make new larval SA. */
342 		return (NULL);
343 	}
344 
345 	/* Assigned requested SPI, assume caller does SPI allocation magic. */
346 	newbie->ipsa_spi = spi;
347 	newbie->ipsa_netstack = ns;	/* No netstack_hold */
348 
349 	/*
350 	 * Copy addresses...
351 	 */
352 
353 	IPSA_COPY_ADDR(newbie->ipsa_srcaddr, src, addrfam);
354 	IPSA_COPY_ADDR(newbie->ipsa_dstaddr, dst, addrfam);
355 
356 	newbie->ipsa_addrfam = addrfam;
357 
358 	/*
359 	 * Set common initialization values, including refcnt.
360 	 */
361 	mutex_init(&newbie->ipsa_lock, NULL, MUTEX_DEFAULT, NULL);
362 	newbie->ipsa_state = IPSA_STATE_LARVAL;
363 	newbie->ipsa_refcnt = 1;
364 	newbie->ipsa_freefunc = sadb_freeassoc;
365 
366 	/*
367 	 * There aren't a lot of other common initialization values, as
368 	 * they are copied in from the PF_KEY message.
369 	 */
370 
371 	return (newbie);
372 }
373 
374 /*
375  * Call me to initialize a security association fanout.
376  */
377 static int
378 sadb_init_fanout(isaf_t **tablep, uint_t size, int kmflag)
379 {
380 	isaf_t *table;
381 	int i;
382 
383 	table = (isaf_t *)kmem_alloc(size * sizeof (*table), kmflag);
384 	*tablep = table;
385 
386 	if (table == NULL)
387 		return (ENOMEM);
388 
389 	for (i = 0; i < size; i++) {
390 		mutex_init(&(table[i].isaf_lock), NULL, MUTEX_DEFAULT, NULL);
391 		table[i].isaf_ipsa = NULL;
392 		table[i].isaf_gen = 0;
393 	}
394 
395 	return (0);
396 }
397 
398 /*
399  * Call me to initialize an acquire fanout
400  */
401 static int
402 sadb_init_acfanout(iacqf_t **tablep, uint_t size, int kmflag)
403 {
404 	iacqf_t *table;
405 	int i;
406 
407 	table = (iacqf_t *)kmem_alloc(size * sizeof (*table), kmflag);
408 	*tablep = table;
409 
410 	if (table == NULL)
411 		return (ENOMEM);
412 
413 	for (i = 0; i < size; i++) {
414 		mutex_init(&(table[i].iacqf_lock), NULL, MUTEX_DEFAULT, NULL);
415 		table[i].iacqf_ipsacq = NULL;
416 	}
417 
418 	return (0);
419 }
420 
421 /*
422  * Attempt to initialize an SADB instance.  On failure, return ENOMEM;
423  * caller must clean up partial allocations.
424  */
425 static int
426 sadb_init_trial(sadb_t *sp, uint_t size, int kmflag)
427 {
428 	ASSERT(sp->sdb_of == NULL);
429 	ASSERT(sp->sdb_if == NULL);
430 	ASSERT(sp->sdb_acq == NULL);
431 
432 	sp->sdb_hashsize = size;
433 	if (sadb_init_fanout(&sp->sdb_of, size, kmflag) != 0)
434 		return (ENOMEM);
435 	if (sadb_init_fanout(&sp->sdb_if, size, kmflag) != 0)
436 		return (ENOMEM);
437 	if (sadb_init_acfanout(&sp->sdb_acq, size, kmflag) != 0)
438 		return (ENOMEM);
439 
440 	return (0);
441 }
442 
443 /*
444  * Call me to initialize an SADB instance; fall back to default size on failure.
445  */
446 static void
447 sadb_init(const char *name, sadb_t *sp, uint_t size, uint_t ver,
448     netstack_t *ns)
449 {
450 	ASSERT(sp->sdb_of == NULL);
451 	ASSERT(sp->sdb_if == NULL);
452 	ASSERT(sp->sdb_acq == NULL);
453 
454 	if (size < IPSEC_DEFAULT_HASH_SIZE)
455 		size = IPSEC_DEFAULT_HASH_SIZE;
456 
457 	if (sadb_init_trial(sp, size, KM_NOSLEEP) != 0) {
458 
459 		cmn_err(CE_WARN,
460 		    "Unable to allocate %u entry IPv%u %s SADB hash table",
461 		    size, ver, name);
462 
463 		sadb_destroy(sp, ns);
464 		size = IPSEC_DEFAULT_HASH_SIZE;
465 		cmn_err(CE_WARN, "Falling back to %d entries", size);
466 		(void) sadb_init_trial(sp, size, KM_SLEEP);
467 	}
468 }
469 
470 
471 /*
472  * Initialize an SADB-pair.
473  */
474 void
475 sadbp_init(const char *name, sadbp_t *sp, int type, int size, netstack_t *ns)
476 {
477 	sadb_init(name, &sp->s_v4, size, 4, ns);
478 	sadb_init(name, &sp->s_v6, size, 6, ns);
479 
480 	sp->s_satype = type;
481 
482 	ASSERT((type == SADB_SATYPE_AH) || (type == SADB_SATYPE_ESP));
483 	if (type == SADB_SATYPE_AH) {
484 		ipsec_stack_t	*ipss = ns->netstack_ipsec;
485 
486 		ip_drop_register(&ipss->ipsec_sadb_dropper, "IPsec SADB");
487 	}
488 }
489 
490 /*
491  * Deliver a single SADB_DUMP message representing a single SA.  This is
492  * called many times by sadb_dump().
493  *
494  * If the return value of this is ENOBUFS (not the same as ENOMEM), then
495  * the caller should take that as a hint that dupb() on the "original answer"
496  * failed, and that perhaps the caller should try again with a copyb()ed
497  * "original answer".
498  */
499 static int
500 sadb_dump_deliver(queue_t *pfkey_q, mblk_t *original_answer, ipsa_t *ipsa,
501     sadb_msg_t *samsg)
502 {
503 	mblk_t *answer;
504 
505 	answer = dupb(original_answer);
506 	if (answer == NULL)
507 		return (ENOBUFS);
508 	answer->b_cont = sadb_sa2msg(ipsa, samsg);
509 	if (answer->b_cont == NULL) {
510 		freeb(answer);
511 		return (ENOMEM);
512 	}
513 
514 	/* Just do a putnext, and let keysock deal with flow control. */
515 	putnext(pfkey_q, answer);
516 	return (0);
517 }
518 
519 /*
520  * Common function to allocate and prepare a keysock_out_t M_CTL message.
521  */
522 mblk_t *
523 sadb_keysock_out(minor_t serial)
524 {
525 	mblk_t *mp;
526 	keysock_out_t *kso;
527 
528 	mp = allocb(sizeof (ipsec_info_t), BPRI_HI);
529 	if (mp != NULL) {
530 		mp->b_datap->db_type = M_CTL;
531 		mp->b_wptr += sizeof (ipsec_info_t);
532 		kso = (keysock_out_t *)mp->b_rptr;
533 		kso->ks_out_type = KEYSOCK_OUT;
534 		kso->ks_out_len = sizeof (*kso);
535 		kso->ks_out_serial = serial;
536 	}
537 
538 	return (mp);
539 }
540 
541 /*
542  * Perform an SADB_DUMP, spewing out every SA in an array of SA fanouts
543  * to keysock.
544  */
545 static int
546 sadb_dump_fanout(queue_t *pfkey_q, mblk_t *mp, minor_t serial, isaf_t *fanout,
547     int num_entries, boolean_t do_peers)
548 {
549 	int i, error = 0;
550 	mblk_t *original_answer;
551 	ipsa_t *walker;
552 	sadb_msg_t *samsg;
553 
554 	/*
555 	 * For each IPSA hash bucket do:
556 	 *	- Hold the mutex
557 	 *	- Walk each entry, doing an sadb_dump_deliver() on it.
558 	 */
559 	ASSERT(mp->b_cont != NULL);
560 	samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
561 
562 	original_answer = sadb_keysock_out(serial);
563 	if (original_answer == NULL)
564 		return (ENOMEM);
565 
566 	for (i = 0; i < num_entries; i++) {
567 		mutex_enter(&fanout[i].isaf_lock);
568 		for (walker = fanout[i].isaf_ipsa; walker != NULL;
569 		    walker = walker->ipsa_next) {
570 			if (!do_peers && walker->ipsa_haspeer)
571 				continue;
572 			error = sadb_dump_deliver(pfkey_q, original_answer,
573 			    walker, samsg);
574 			if (error == ENOBUFS) {
575 				mblk_t *new_original_answer;
576 
577 				/* Ran out of dupb's.  Try a copyb. */
578 				new_original_answer = copyb(original_answer);
579 				if (new_original_answer == NULL) {
580 					error = ENOMEM;
581 				} else {
582 					freeb(original_answer);
583 					original_answer = new_original_answer;
584 					error = sadb_dump_deliver(pfkey_q,
585 					    original_answer, walker, samsg);
586 				}
587 			}
588 			if (error != 0)
589 				break;	/* out of for loop. */
590 		}
591 		mutex_exit(&fanout[i].isaf_lock);
592 		if (error != 0)
593 			break;	/* out of for loop. */
594 	}
595 
596 	freeb(original_answer);
597 	return (error);
598 }
599 
600 /*
601  * Dump an entire SADB; outbound first, then inbound.
602  */
603 
604 int
605 sadb_dump(queue_t *pfkey_q, mblk_t *mp, minor_t serial, sadb_t *sp)
606 {
607 	int error;
608 
609 	/* Dump outbound */
610 	error = sadb_dump_fanout(pfkey_q, mp, serial, sp->sdb_of,
611 	    sp->sdb_hashsize, B_TRUE);
612 	if (error)
613 		return (error);
614 
615 	/* Dump inbound */
616 	return sadb_dump_fanout(pfkey_q, mp, serial, sp->sdb_if,
617 	    sp->sdb_hashsize, B_FALSE);
618 }
619 
620 /*
621  * Generic sadb table walker.
622  *
623  * Call "walkfn" for each SA in each bucket in "table"; pass the
624  * bucket, the entry and "cookie" to the callback function.
625  * Take care to ensure that walkfn can delete the SA without screwing
626  * up our traverse.
627  *
628  * The bucket is locked for the duration of the callback, both so that the
629  * callback can just call sadb_unlinkassoc() when it wants to delete something,
630  * and so that no new entries are added while we're walking the list.
631  */
632 static void
633 sadb_walker(isaf_t *table, uint_t numentries,
634     void (*walkfn)(isaf_t *head, ipsa_t *entry, void *cookie),
635     void *cookie)
636 {
637 	int i;
638 	for (i = 0; i < numentries; i++) {
639 		ipsa_t *entry, *next;
640 
641 		mutex_enter(&table[i].isaf_lock);
642 
643 		for (entry = table[i].isaf_ipsa; entry != NULL;
644 		    entry = next) {
645 			next = entry->ipsa_next;
646 			(*walkfn)(&table[i], entry, cookie);
647 		}
648 		mutex_exit(&table[i].isaf_lock);
649 	}
650 }
651 
652 /*
653  * From the given SA, construct a dl_ct_ipsec_key and
654  * a dl_ct_ipsec structures to be sent to the adapter as part
655  * of a DL_CONTROL_REQ.
656  *
657  * ct_sa must point to the storage allocated for the key
658  * structure and must be followed by storage allocated
659  * for the SA information that must be sent to the driver
660  * as part of the DL_CONTROL_REQ request.
661  *
662  * The is_inbound boolean indicates whether the specified
663  * SA is part of an inbound SA table.
664  *
665  * Returns B_TRUE if the corresponding SA must be passed to
666  * a provider, B_FALSE otherwise; frees *mp if it returns B_FALSE.
667  */
668 static boolean_t
669 sadb_req_from_sa(ipsa_t *sa, mblk_t *mp, boolean_t is_inbound)
670 {
671 	dl_ct_ipsec_key_t *keyp;
672 	dl_ct_ipsec_t *sap;
673 	void *ct_sa = mp->b_wptr;
674 
675 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
676 
677 	keyp = (dl_ct_ipsec_key_t *)(ct_sa);
678 	sap = (dl_ct_ipsec_t *)(keyp + 1);
679 
680 	IPSECHW_DEBUG(IPSECHW_CAPAB, ("sadb_req_from_sa: "
681 	    "is_inbound = %d\n", is_inbound));
682 
683 	/* initialize flag */
684 	sap->sadb_sa_flags = 0;
685 	if (is_inbound) {
686 		sap->sadb_sa_flags |= DL_CT_IPSEC_INBOUND;
687 		/*
688 		 * If an inbound SA has a peer, then mark it has being
689 		 * an outbound SA as well.
690 		 */
691 		if (sa->ipsa_haspeer)
692 			sap->sadb_sa_flags |= DL_CT_IPSEC_OUTBOUND;
693 	} else {
694 		/*
695 		 * If an outbound SA has a peer, then don't send it,
696 		 * since we will send the copy from the inbound table.
697 		 */
698 		if (sa->ipsa_haspeer) {
699 			freemsg(mp);
700 			return (B_FALSE);
701 		}
702 		sap->sadb_sa_flags |= DL_CT_IPSEC_OUTBOUND;
703 	}
704 
705 	keyp->dl_key_spi = sa->ipsa_spi;
706 	bcopy(sa->ipsa_dstaddr, keyp->dl_key_dest_addr,
707 	    DL_CTL_IPSEC_ADDR_LEN);
708 	keyp->dl_key_addr_family = sa->ipsa_addrfam;
709 
710 	sap->sadb_sa_auth = sa->ipsa_auth_alg;
711 	sap->sadb_sa_encrypt = sa->ipsa_encr_alg;
712 
713 	sap->sadb_key_len_a = sa->ipsa_authkeylen;
714 	sap->sadb_key_bits_a = sa->ipsa_authkeybits;
715 	bcopy(sa->ipsa_authkey,
716 	    sap->sadb_key_data_a, sap->sadb_key_len_a);
717 
718 	sap->sadb_key_len_e = sa->ipsa_encrkeylen;
719 	sap->sadb_key_bits_e = sa->ipsa_encrkeybits;
720 	bcopy(sa->ipsa_encrkey,
721 	    sap->sadb_key_data_e, sap->sadb_key_len_e);
722 
723 	mp->b_wptr += sizeof (dl_ct_ipsec_t) + sizeof (dl_ct_ipsec_key_t);
724 	return (B_TRUE);
725 }
726 
727 /*
728  * Called from AH or ESP to format a message which will be used to inform
729  * IPsec-acceleration-capable ills of a SADB change.
730  * (It is not possible to send the message to IP directly from this function
731  * since the SA, if any, is locked during the call).
732  *
733  * dl_operation: DL_CONTROL_REQ operation (add, delete, update, etc)
734  * sa_type: identifies whether the operation applies to AH or ESP
735  *	(must be one of SADB_SATYPE_AH or SADB_SATYPE_ESP)
736  * sa: Pointer to an SA.  Must be non-NULL and locked
737  *	for ADD, DELETE, GET, and UPDATE operations.
738  * This function returns an mblk chain that must be passed to IP
739  * for forwarding to the IPsec capable providers.
740  */
741 mblk_t *
742 sadb_fmt_sa_req(uint_t dl_operation, uint_t sa_type, ipsa_t *sa,
743     boolean_t is_inbound)
744 {
745 	mblk_t *mp;
746 	dl_control_req_t *ctrl;
747 	boolean_t need_key = B_FALSE;
748 	mblk_t *ctl_mp = NULL;
749 	ipsec_ctl_t *ctl;
750 
751 	/*
752 	 * 1 allocate and initialize DL_CONTROL_REQ M_PROTO
753 	 * 2 if a key is needed for the operation
754 	 *    2.1 initialize key
755 	 *    2.2 if a full SA is needed for the operation
756 	 *	2.2.1 initialize full SA info
757 	 * 3 return message; caller will call ill_ipsec_capab_send_all()
758 	 * to send the resulting message to IPsec capable ills.
759 	 */
760 
761 	ASSERT(sa_type == SADB_SATYPE_AH || sa_type == SADB_SATYPE_ESP);
762 
763 	/*
764 	 * Allocate DL_CONTROL_REQ M_PROTO
765 	 * We allocate room for the SA even if it's not needed
766 	 * by some of the operations (for example flush)
767 	 */
768 	mp = allocb(sizeof (dl_control_req_t) +
769 	    sizeof (dl_ct_ipsec_key_t) + sizeof (dl_ct_ipsec_t), BPRI_HI);
770 	if (mp == NULL)
771 		return (NULL);
772 	mp->b_datap->db_type = M_PROTO;
773 
774 	/* initialize dl_control_req_t */
775 	ctrl = (dl_control_req_t *)mp->b_wptr;
776 	ctrl->dl_primitive = DL_CONTROL_REQ;
777 	ctrl->dl_operation = dl_operation;
778 	ctrl->dl_type = sa_type == SADB_SATYPE_AH ? DL_CT_IPSEC_AH :
779 	    DL_CT_IPSEC_ESP;
780 	ctrl->dl_key_offset = sizeof (dl_control_req_t);
781 	ctrl->dl_key_length = sizeof (dl_ct_ipsec_key_t);
782 	ctrl->dl_data_offset = sizeof (dl_control_req_t) +
783 	    sizeof (dl_ct_ipsec_key_t);
784 	ctrl->dl_data_length = sizeof (dl_ct_ipsec_t);
785 	mp->b_wptr += sizeof (dl_control_req_t);
786 
787 	if ((dl_operation == DL_CO_SET) || (dl_operation == DL_CO_DELETE)) {
788 		ASSERT(sa != NULL);
789 		ASSERT(MUTEX_HELD(&sa->ipsa_lock));
790 
791 		need_key = B_TRUE;
792 
793 		/*
794 		 * Initialize key and SA data. Note that for some
795 		 * operations the SA data is ignored by the provider
796 		 * (delete, etc.)
797 		 */
798 		if (!sadb_req_from_sa(sa, mp, is_inbound))
799 			return (NULL);
800 	}
801 
802 	/* construct control message */
803 	ctl_mp = allocb(sizeof (ipsec_ctl_t), BPRI_HI);
804 	if (ctl_mp == NULL) {
805 		cmn_err(CE_WARN, "sadb_fmt_sa_req: allocb failed\n");
806 		freemsg(mp);
807 		return (NULL);
808 	}
809 
810 	ctl_mp->b_datap->db_type = M_CTL;
811 	ctl_mp->b_wptr += sizeof (ipsec_ctl_t);
812 	ctl_mp->b_cont = mp;
813 
814 	ctl = (ipsec_ctl_t *)ctl_mp->b_rptr;
815 	ctl->ipsec_ctl_type = IPSEC_CTL;
816 	ctl->ipsec_ctl_len  = sizeof (ipsec_ctl_t);
817 	ctl->ipsec_ctl_sa_type = sa_type;
818 
819 	if (need_key) {
820 		/*
821 		 * Keep an additional reference on SA, since it will be
822 		 * needed by IP to send control messages corresponding
823 		 * to that SA from its perimeter. IP will do a
824 		 * IPSA_REFRELE when done with the request.
825 		 */
826 		ASSERT(MUTEX_HELD(&sa->ipsa_lock));
827 		IPSA_REFHOLD(sa);
828 		ctl->ipsec_ctl_sa = sa;
829 	} else
830 		ctl->ipsec_ctl_sa = NULL;
831 
832 	return (ctl_mp);
833 }
834 
835 
836 /*
837  * Called by sadb_ill_download() to dump the entries for a specific
838  * fanout table.  For each SA entry in the table passed as argument,
839  * use mp as a template and constructs a full DL_CONTROL message, and
840  * call ill_dlpi_send(), provided by IP, to send the resulting
841  * messages to the ill.
842  */
843 static void
844 sadb_ill_df(ill_t *ill, mblk_t *mp, isaf_t *fanout, int num_entries,
845     boolean_t is_inbound)
846 {
847 	ipsa_t *walker;
848 	mblk_t *nmp, *salist;
849 	int i, error = 0;
850 	ip_stack_t	*ipst = ill->ill_ipst;
851 	netstack_t	*ns = ipst->ips_netstack;
852 
853 	IPSECHW_DEBUG(IPSECHW_SADB, ("sadb_ill_df: fanout at 0x%p ne=%d\n",
854 	    (void *)fanout, num_entries));
855 	/*
856 	 * For each IPSA hash bucket do:
857 	 *	- Hold the mutex
858 	 *	- Walk each entry, sending a corresponding request to IP
859 	 *	  for it.
860 	 */
861 	ASSERT(mp->b_datap->db_type == M_PROTO);
862 
863 	for (i = 0; i < num_entries; i++) {
864 		mutex_enter(&fanout[i].isaf_lock);
865 		salist = NULL;
866 
867 		for (walker = fanout[i].isaf_ipsa; walker != NULL;
868 		    walker = walker->ipsa_next) {
869 			IPSECHW_DEBUG(IPSECHW_SADB,
870 			    ("sadb_ill_df: sending SA to ill via IP \n"));
871 			/*
872 			 * Duplicate the template mp passed and
873 			 * complete DL_CONTROL_REQ data.
874 			 * To be more memory efficient, we could use
875 			 * dupb() for the M_CTL and copyb() for the M_PROTO
876 			 * as the M_CTL, since the M_CTL is the same for
877 			 * every SA entry passed down to IP for the same ill.
878 			 *
879 			 * Note that copymsg/copyb ensure that the new mblk
880 			 * is at least as large as the source mblk even if it's
881 			 * not using all its storage -- therefore, nmp
882 			 * has trailing space for sadb_req_from_sa to add
883 			 * the SA-specific bits.
884 			 */
885 			mutex_enter(&walker->ipsa_lock);
886 			if (ipsec_capab_match(ill,
887 			    ill->ill_phyint->phyint_ifindex, ill->ill_isv6,
888 			    walker, ns)) {
889 				nmp = copymsg(mp);
890 				if (nmp == NULL) {
891 					IPSECHW_DEBUG(IPSECHW_SADB,
892 					    ("sadb_ill_df: alloc error\n"));
893 					error = ENOMEM;
894 					mutex_exit(&walker->ipsa_lock);
895 					break;
896 				}
897 				if (sadb_req_from_sa(walker, nmp, is_inbound)) {
898 					nmp->b_next = salist;
899 					salist = nmp;
900 				}
901 			}
902 			mutex_exit(&walker->ipsa_lock);
903 		}
904 		mutex_exit(&fanout[i].isaf_lock);
905 		while (salist != NULL) {
906 			nmp = salist;
907 			salist = nmp->b_next;
908 			nmp->b_next = NULL;
909 			ill_dlpi_send(ill, nmp);
910 		}
911 		if (error != 0)
912 			break;	/* out of for loop. */
913 	}
914 }
915 
916 /*
917  * Called by ill_ipsec_capab_add(). Sends a copy of the SADB of
918  * the type specified by sa_type to the specified ill.
919  *
920  * We call for each fanout table defined by the SADB (one per
921  * protocol). sadb_ill_df() finally calls ill_dlpi_send() for
922  * each SADB entry in order to send a corresponding DL_CONTROL_REQ
923  * message to the ill.
924  */
925 void
926 sadb_ill_download(ill_t *ill, uint_t sa_type)
927 {
928 	mblk_t *protomp;	/* prototype message */
929 	dl_control_req_t *ctrl;
930 	sadbp_t *spp;
931 	sadb_t *sp;
932 	int dlt;
933 	ip_stack_t	*ipst = ill->ill_ipst;
934 	netstack_t	*ns = ipst->ips_netstack;
935 
936 	ASSERT(sa_type == SADB_SATYPE_AH || sa_type == SADB_SATYPE_ESP);
937 
938 	/*
939 	 * Allocate and initialize prototype answer. A duplicate for
940 	 * each SA is sent down to the interface.
941 	 */
942 
943 	/* DL_CONTROL_REQ M_PROTO mblk_t */
944 	protomp = allocb(sizeof (dl_control_req_t) +
945 	    sizeof (dl_ct_ipsec_key_t) + sizeof (dl_ct_ipsec_t), BPRI_HI);
946 	if (protomp == NULL)
947 		return;
948 	protomp->b_datap->db_type = M_PROTO;
949 
950 	dlt = (sa_type == SADB_SATYPE_AH) ? DL_CT_IPSEC_AH : DL_CT_IPSEC_ESP;
951 	if (sa_type == SADB_SATYPE_ESP) {
952 		ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
953 
954 		spp = &espstack->esp_sadb;
955 	} else {
956 		ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
957 
958 		spp = &ahstack->ah_sadb;
959 	}
960 
961 	ctrl = (dl_control_req_t *)protomp->b_wptr;
962 	ctrl->dl_primitive = DL_CONTROL_REQ;
963 	ctrl->dl_operation = DL_CO_SET;
964 	ctrl->dl_type = dlt;
965 	ctrl->dl_key_offset = sizeof (dl_control_req_t);
966 	ctrl->dl_key_length = sizeof (dl_ct_ipsec_key_t);
967 	ctrl->dl_data_offset = sizeof (dl_control_req_t) +
968 	    sizeof (dl_ct_ipsec_key_t);
969 	ctrl->dl_data_length = sizeof (dl_ct_ipsec_t);
970 	protomp->b_wptr += sizeof (dl_control_req_t);
971 
972 	/*
973 	 * then for each SADB entry, we fill out the dl_ct_ipsec_key_t
974 	 * and dl_ct_ipsec_t
975 	 */
976 	sp = ill->ill_isv6 ? &(spp->s_v6) : &(spp->s_v4);
977 	sadb_ill_df(ill, protomp, sp->sdb_of, sp->sdb_hashsize, B_FALSE);
978 	sadb_ill_df(ill, protomp, sp->sdb_if, sp->sdb_hashsize, B_TRUE);
979 	freemsg(protomp);
980 }
981 
982 /*
983  * Call me to free up a security association fanout.  Use the forever
984  * variable to indicate freeing up the SAs (forever == B_FALSE, e.g.
985  * an SADB_FLUSH message), or destroying everything (forever == B_TRUE,
986  * when a module is unloaded).
987  */
988 static void
989 sadb_destroyer(isaf_t **tablep, uint_t numentries, boolean_t forever)
990 {
991 	int i;
992 	isaf_t *table = *tablep;
993 
994 	if (table == NULL)
995 		return;
996 
997 	for (i = 0; i < numentries; i++) {
998 		mutex_enter(&table[i].isaf_lock);
999 		while (table[i].isaf_ipsa != NULL)
1000 			sadb_unlinkassoc(table[i].isaf_ipsa);
1001 		table[i].isaf_gen++;
1002 		mutex_exit(&table[i].isaf_lock);
1003 		if (forever)
1004 			mutex_destroy(&(table[i].isaf_lock));
1005 	}
1006 
1007 	if (forever) {
1008 		*tablep = NULL;
1009 		kmem_free(table, numentries * sizeof (*table));
1010 	}
1011 }
1012 
1013 /*
1014  * Entry points to sadb_destroyer().
1015  */
1016 static void
1017 sadb_flush(sadb_t *sp, netstack_t *ns)
1018 {
1019 	/*
1020 	 * Flush out each bucket, one at a time.  Were it not for keysock's
1021 	 * enforcement, there would be a subtlety where I could add on the
1022 	 * heels of a flush.  With keysock's enforcement, however, this
1023 	 * makes ESP's job easy.
1024 	 */
1025 	sadb_destroyer(&sp->sdb_of, sp->sdb_hashsize, B_FALSE);
1026 	sadb_destroyer(&sp->sdb_if, sp->sdb_hashsize, B_FALSE);
1027 
1028 	/* For each acquire, destroy it; leave the bucket mutex alone. */
1029 	sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_FALSE, ns);
1030 }
1031 
1032 static void
1033 sadb_destroy(sadb_t *sp, netstack_t *ns)
1034 {
1035 	sadb_destroyer(&sp->sdb_of, sp->sdb_hashsize, B_TRUE);
1036 	sadb_destroyer(&sp->sdb_if, sp->sdb_hashsize, B_TRUE);
1037 
1038 	/* For each acquire, destroy it, including the bucket mutex. */
1039 	sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_TRUE, ns);
1040 
1041 	ASSERT(sp->sdb_of == NULL);
1042 	ASSERT(sp->sdb_if == NULL);
1043 	ASSERT(sp->sdb_acq == NULL);
1044 }
1045 
1046 static void
1047 sadb_send_flush_req(sadbp_t *spp)
1048 {
1049 	mblk_t *ctl_mp;
1050 
1051 	/*
1052 	 * we've been unplumbed, or never were plumbed; don't go there.
1053 	 */
1054 	if (spp->s_ip_q == NULL)
1055 		return;
1056 
1057 	/* have IP send a flush msg to the IPsec accelerators */
1058 	ctl_mp = sadb_fmt_sa_req(DL_CO_FLUSH, spp->s_satype, NULL, B_TRUE);
1059 	if (ctl_mp != NULL)
1060 		putnext(spp->s_ip_q, ctl_mp);
1061 }
1062 
1063 void
1064 sadbp_flush(sadbp_t *spp, netstack_t *ns)
1065 {
1066 	sadb_flush(&spp->s_v4, ns);
1067 	sadb_flush(&spp->s_v6, ns);
1068 
1069 	sadb_send_flush_req(spp);
1070 }
1071 
1072 void
1073 sadbp_destroy(sadbp_t *spp, netstack_t *ns)
1074 {
1075 	sadb_destroy(&spp->s_v4, ns);
1076 	sadb_destroy(&spp->s_v6, ns);
1077 
1078 	sadb_send_flush_req(spp);
1079 	if (spp->s_satype == SADB_SATYPE_AH) {
1080 		ipsec_stack_t	*ipss = ns->netstack_ipsec;
1081 
1082 		ip_drop_unregister(&ipss->ipsec_sadb_dropper);
1083 	}
1084 }
1085 
1086 
1087 /*
1088  * Check hard vs. soft lifetimes.  If there's a reality mismatch (e.g.
1089  * soft lifetimes > hard lifetimes) return an appropriate diagnostic for
1090  * EINVAL.
1091  */
1092 int
1093 sadb_hardsoftchk(sadb_lifetime_t *hard, sadb_lifetime_t *soft)
1094 {
1095 	if (hard == NULL || soft == NULL)
1096 		return (0);
1097 
1098 	if (hard->sadb_lifetime_allocations != 0 &&
1099 	    soft->sadb_lifetime_allocations != 0 &&
1100 	    hard->sadb_lifetime_allocations < soft->sadb_lifetime_allocations)
1101 		return (SADB_X_DIAGNOSTIC_ALLOC_HSERR);
1102 
1103 	if (hard->sadb_lifetime_bytes != 0 &&
1104 	    soft->sadb_lifetime_bytes != 0 &&
1105 	    hard->sadb_lifetime_bytes < soft->sadb_lifetime_bytes)
1106 		return (SADB_X_DIAGNOSTIC_BYTES_HSERR);
1107 
1108 	if (hard->sadb_lifetime_addtime != 0 &&
1109 	    soft->sadb_lifetime_addtime != 0 &&
1110 	    hard->sadb_lifetime_addtime < soft->sadb_lifetime_addtime)
1111 		return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
1112 
1113 	if (hard->sadb_lifetime_usetime != 0 &&
1114 	    soft->sadb_lifetime_usetime != 0 &&
1115 	    hard->sadb_lifetime_usetime < soft->sadb_lifetime_usetime)
1116 		return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
1117 
1118 	return (0);
1119 }
1120 
1121 /*
1122  * Clone a security association for the purposes of inserting a single SA
1123  * into inbound and outbound tables respectively. This function should only
1124  * be called from sadb_common_add().
1125  */
1126 static ipsa_t *
1127 sadb_cloneassoc(ipsa_t *ipsa)
1128 {
1129 	ipsa_t *newbie;
1130 	boolean_t error = B_FALSE;
1131 
1132 	ASSERT(!MUTEX_HELD(&(ipsa->ipsa_lock)));
1133 
1134 	newbie = kmem_alloc(sizeof (ipsa_t), KM_NOSLEEP);
1135 	if (newbie == NULL)
1136 		return (NULL);
1137 
1138 	/* Copy over what we can. */
1139 	*newbie = *ipsa;
1140 
1141 	/* bzero and initialize locks, in case *_init() allocates... */
1142 	mutex_init(&newbie->ipsa_lock, NULL, MUTEX_DEFAULT, NULL);
1143 
1144 	/*
1145 	 * While somewhat dain-bramaged, the most graceful way to
1146 	 * recover from errors is to keep plowing through the
1147 	 * allocations, and getting what I can.  It's easier to call
1148 	 * sadb_freeassoc() on the stillborn clone when all the
1149 	 * pointers aren't pointing to the parent's data.
1150 	 */
1151 
1152 	if (ipsa->ipsa_authkey != NULL) {
1153 		newbie->ipsa_authkey = kmem_alloc(newbie->ipsa_authkeylen,
1154 		    KM_NOSLEEP);
1155 		if (newbie->ipsa_authkey == NULL) {
1156 			error = B_TRUE;
1157 		} else {
1158 			bcopy(ipsa->ipsa_authkey, newbie->ipsa_authkey,
1159 			    newbie->ipsa_authkeylen);
1160 
1161 			newbie->ipsa_kcfauthkey.ck_data =
1162 			    newbie->ipsa_authkey;
1163 		}
1164 
1165 		if (newbie->ipsa_amech.cm_param != NULL) {
1166 			newbie->ipsa_amech.cm_param =
1167 			    (char *)&newbie->ipsa_mac_len;
1168 		}
1169 	}
1170 
1171 	if (ipsa->ipsa_encrkey != NULL) {
1172 		newbie->ipsa_encrkey = kmem_alloc(newbie->ipsa_encrkeylen,
1173 		    KM_NOSLEEP);
1174 		if (newbie->ipsa_encrkey == NULL) {
1175 			error = B_TRUE;
1176 		} else {
1177 			bcopy(ipsa->ipsa_encrkey, newbie->ipsa_encrkey,
1178 			    newbie->ipsa_encrkeylen);
1179 
1180 			newbie->ipsa_kcfencrkey.ck_data =
1181 			    newbie->ipsa_encrkey;
1182 		}
1183 	}
1184 
1185 	newbie->ipsa_authtmpl = NULL;
1186 	newbie->ipsa_encrtmpl = NULL;
1187 	newbie->ipsa_haspeer = B_TRUE;
1188 
1189 	if (ipsa->ipsa_integ != NULL) {
1190 		newbie->ipsa_integ = kmem_alloc(newbie->ipsa_integlen,
1191 		    KM_NOSLEEP);
1192 		if (newbie->ipsa_integ == NULL) {
1193 			error = B_TRUE;
1194 		} else {
1195 			bcopy(ipsa->ipsa_integ, newbie->ipsa_integ,
1196 			    newbie->ipsa_integlen);
1197 		}
1198 	}
1199 
1200 	if (ipsa->ipsa_sens != NULL) {
1201 		newbie->ipsa_sens = kmem_alloc(newbie->ipsa_senslen,
1202 		    KM_NOSLEEP);
1203 		if (newbie->ipsa_sens == NULL) {
1204 			error = B_TRUE;
1205 		} else {
1206 			bcopy(ipsa->ipsa_sens, newbie->ipsa_sens,
1207 			    newbie->ipsa_senslen);
1208 		}
1209 	}
1210 
1211 	if (ipsa->ipsa_src_cid != NULL) {
1212 		newbie->ipsa_src_cid = ipsa->ipsa_src_cid;
1213 		IPSID_REFHOLD(ipsa->ipsa_src_cid);
1214 	}
1215 
1216 	if (ipsa->ipsa_dst_cid != NULL) {
1217 		newbie->ipsa_dst_cid = ipsa->ipsa_dst_cid;
1218 		IPSID_REFHOLD(ipsa->ipsa_dst_cid);
1219 	}
1220 
1221 	if (error) {
1222 		sadb_freeassoc(newbie);
1223 		return (NULL);
1224 	}
1225 
1226 	return (newbie);
1227 }
1228 
1229 /*
1230  * Initialize a SADB address extension at the address specified by addrext.
1231  * Return a pointer to the end of the new address extension.
1232  */
1233 static uint8_t *
1234 sadb_make_addr_ext(uint8_t *start, uint8_t *end, uint16_t exttype,
1235     sa_family_t af, uint32_t *addr, uint16_t port, uint8_t proto, int prefix)
1236 {
1237 	struct sockaddr_in *sin;
1238 	struct sockaddr_in6 *sin6;
1239 	uint8_t *cur = start;
1240 	int addrext_len;
1241 	int sin_len;
1242 	sadb_address_t *addrext	= (sadb_address_t *)cur;
1243 
1244 	if (cur == NULL)
1245 		return (NULL);
1246 
1247 	cur += sizeof (*addrext);
1248 	if (cur > end)
1249 		return (NULL);
1250 
1251 	addrext->sadb_address_proto = proto;
1252 	addrext->sadb_address_prefixlen = prefix;
1253 	addrext->sadb_address_reserved = 0;
1254 	addrext->sadb_address_exttype = exttype;
1255 
1256 	switch (af) {
1257 	case AF_INET:
1258 		sin = (struct sockaddr_in *)cur;
1259 		sin_len = sizeof (*sin);
1260 		cur += sin_len;
1261 		if (cur > end)
1262 			return (NULL);
1263 
1264 		sin->sin_family = af;
1265 		bzero(sin->sin_zero, sizeof (sin->sin_zero));
1266 		sin->sin_port = port;
1267 		IPSA_COPY_ADDR(&sin->sin_addr, addr, af);
1268 		break;
1269 	case AF_INET6:
1270 		sin6 = (struct sockaddr_in6 *)cur;
1271 		sin_len = sizeof (*sin6);
1272 		cur += sin_len;
1273 		if (cur > end)
1274 			return (NULL);
1275 
1276 		bzero(sin6, sizeof (*sin6));
1277 		sin6->sin6_family = af;
1278 		sin6->sin6_port = port;
1279 		IPSA_COPY_ADDR(&sin6->sin6_addr, addr, af);
1280 		break;
1281 	}
1282 
1283 	addrext_len = roundup(cur - start, sizeof (uint64_t));
1284 	addrext->sadb_address_len = SADB_8TO64(addrext_len);
1285 
1286 	cur = start + addrext_len;
1287 	if (cur > end)
1288 		cur = NULL;
1289 
1290 	return (cur);
1291 }
1292 
1293 /*
1294  * Construct a key management cookie extension.
1295  */
1296 
1297 static uint8_t *
1298 sadb_make_kmc_ext(uint8_t *cur, uint8_t *end, uint32_t kmp, uint32_t kmc)
1299 {
1300 	sadb_x_kmc_t *kmcext = (sadb_x_kmc_t *)cur;
1301 
1302 	if (cur == NULL)
1303 		return (NULL);
1304 
1305 	cur += sizeof (*kmcext);
1306 
1307 	if (cur > end)
1308 		return (NULL);
1309 
1310 	kmcext->sadb_x_kmc_len = SADB_8TO64(sizeof (*kmcext));
1311 	kmcext->sadb_x_kmc_exttype = SADB_X_EXT_KM_COOKIE;
1312 	kmcext->sadb_x_kmc_proto = kmp;
1313 	kmcext->sadb_x_kmc_cookie = kmc;
1314 	kmcext->sadb_x_kmc_reserved = 0;
1315 
1316 	return (cur);
1317 }
1318 
1319 /*
1320  * Given an original message header with sufficient space following it, and an
1321  * SA, construct a full PF_KEY message with all of the relevant extensions.
1322  * This is mostly used for SADB_GET, and SADB_DUMP.
1323  */
1324 static mblk_t *
1325 sadb_sa2msg(ipsa_t *ipsa, sadb_msg_t *samsg)
1326 {
1327 	int alloclen, addrsize, paddrsize, authsize, encrsize;
1328 	int srcidsize, dstidsize;
1329 	sa_family_t fam, pfam;	/* Address family for SADB_EXT_ADDRESS */
1330 				/* src/dst and proxy sockaddrs. */
1331 	/*
1332 	 * The following are pointers into the PF_KEY message this PF_KEY
1333 	 * message creates.
1334 	 */
1335 	sadb_msg_t *newsamsg;
1336 	sadb_sa_t *assoc;
1337 	sadb_lifetime_t *lt;
1338 	sadb_key_t *key;
1339 	sadb_ident_t *ident;
1340 	sadb_sens_t *sens;
1341 	sadb_ext_t *walker;	/* For when we need a generic ext. pointer. */
1342 	sadb_x_pair_t *pair_ext;
1343 
1344 	mblk_t *mp;
1345 	uint64_t *bitmap;
1346 	uint8_t *cur, *end;
1347 	/* These indicate the presence of the above extension fields. */
1348 	boolean_t soft, hard, isrc, idst, auth, encr, sensinteg, srcid, dstid;
1349 	boolean_t paired;
1350 	uint32_t otherspi;
1351 
1352 	/* First off, figure out the allocation length for this message. */
1353 
1354 	/*
1355 	 * Constant stuff.  This includes base, SA, address (src, dst),
1356 	 * and lifetime (current).
1357 	 */
1358 	alloclen = sizeof (sadb_msg_t) + sizeof (sadb_sa_t) +
1359 	    sizeof (sadb_lifetime_t);
1360 
1361 	fam = ipsa->ipsa_addrfam;
1362 	switch (fam) {
1363 	case AF_INET:
1364 		addrsize = roundup(sizeof (struct sockaddr_in) +
1365 		    sizeof (sadb_address_t), sizeof (uint64_t));
1366 		break;
1367 	case AF_INET6:
1368 		addrsize = roundup(sizeof (struct sockaddr_in6) +
1369 		    sizeof (sadb_address_t), sizeof (uint64_t));
1370 		break;
1371 	default:
1372 		return (NULL);
1373 	}
1374 	/*
1375 	 * Allocate TWO address extensions, for source and destination.
1376 	 * (Thus, the * 2.)
1377 	 */
1378 	alloclen += addrsize * 2;
1379 	if (ipsa->ipsa_flags & IPSA_F_NATT_REM)
1380 		alloclen += addrsize;
1381 	if (ipsa->ipsa_flags & IPSA_F_NATT_LOC)
1382 		alloclen += addrsize;
1383 
1384 	if (ipsa->ipsa_flags & IPSA_F_PAIRED) {
1385 		paired = B_TRUE;
1386 		alloclen += sizeof (sadb_x_pair_t);
1387 		otherspi = ipsa->ipsa_otherspi;
1388 	} else {
1389 		paired = B_FALSE;
1390 	}
1391 
1392 	/* How 'bout other lifetimes? */
1393 	if (ipsa->ipsa_softaddlt != 0 || ipsa->ipsa_softuselt != 0 ||
1394 	    ipsa->ipsa_softbyteslt != 0 || ipsa->ipsa_softalloc != 0) {
1395 		alloclen += sizeof (sadb_lifetime_t);
1396 		soft = B_TRUE;
1397 	} else {
1398 		soft = B_FALSE;
1399 	}
1400 
1401 	if (ipsa->ipsa_hardaddlt != 0 || ipsa->ipsa_harduselt != 0 ||
1402 	    ipsa->ipsa_hardbyteslt != 0 || ipsa->ipsa_hardalloc != 0) {
1403 		alloclen += sizeof (sadb_lifetime_t);
1404 		hard = B_TRUE;
1405 	} else {
1406 		hard = B_FALSE;
1407 	}
1408 
1409 	/* Inner addresses. */
1410 	if (ipsa->ipsa_innerfam == 0) {
1411 		isrc = B_FALSE;
1412 		idst = B_FALSE;
1413 	} else {
1414 		pfam = ipsa->ipsa_innerfam;
1415 		switch (pfam) {
1416 		case AF_INET6:
1417 			paddrsize = roundup(sizeof (struct sockaddr_in6) +
1418 			    sizeof (sadb_address_t), sizeof (uint64_t));
1419 			break;
1420 		case AF_INET:
1421 			paddrsize = roundup(sizeof (struct sockaddr_in) +
1422 			    sizeof (sadb_address_t), sizeof (uint64_t));
1423 			break;
1424 		default:
1425 			cmn_err(CE_PANIC,
1426 			    "IPsec SADB: Proxy length failure.\n");
1427 			break;
1428 		}
1429 		isrc = B_TRUE;
1430 		idst = B_TRUE;
1431 		alloclen += 2 * paddrsize;
1432 	}
1433 
1434 	/* For the following fields, assume that length != 0 ==> stuff */
1435 	if (ipsa->ipsa_authkeylen != 0) {
1436 		authsize = roundup(sizeof (sadb_key_t) + ipsa->ipsa_authkeylen,
1437 		    sizeof (uint64_t));
1438 		alloclen += authsize;
1439 		auth = B_TRUE;
1440 	} else {
1441 		auth = B_FALSE;
1442 	}
1443 
1444 	if (ipsa->ipsa_encrkeylen != 0) {
1445 		encrsize = roundup(sizeof (sadb_key_t) + ipsa->ipsa_encrkeylen,
1446 		    sizeof (uint64_t));
1447 		alloclen += encrsize;
1448 		encr = B_TRUE;
1449 	} else {
1450 		encr = B_FALSE;
1451 	}
1452 
1453 	/* No need for roundup on sens and integ. */
1454 	if (ipsa->ipsa_integlen != 0 || ipsa->ipsa_senslen != 0) {
1455 		alloclen += sizeof (sadb_key_t) + ipsa->ipsa_integlen +
1456 		    ipsa->ipsa_senslen;
1457 		sensinteg = B_TRUE;
1458 	} else {
1459 		sensinteg = B_FALSE;
1460 	}
1461 
1462 	/*
1463 	 * Must use strlen() here for lengths.	Identities use NULL
1464 	 * pointers to indicate their nonexistence.
1465 	 */
1466 	if (ipsa->ipsa_src_cid != NULL) {
1467 		srcidsize = roundup(sizeof (sadb_ident_t) +
1468 		    strlen(ipsa->ipsa_src_cid->ipsid_cid) + 1,
1469 		    sizeof (uint64_t));
1470 		alloclen += srcidsize;
1471 		srcid = B_TRUE;
1472 	} else {
1473 		srcid = B_FALSE;
1474 	}
1475 
1476 	if (ipsa->ipsa_dst_cid != NULL) {
1477 		dstidsize = roundup(sizeof (sadb_ident_t) +
1478 		    strlen(ipsa->ipsa_dst_cid->ipsid_cid) + 1,
1479 		    sizeof (uint64_t));
1480 		alloclen += dstidsize;
1481 		dstid = B_TRUE;
1482 	} else {
1483 		dstid = B_FALSE;
1484 	}
1485 
1486 	if ((ipsa->ipsa_kmp != 0) || (ipsa->ipsa_kmc != 0))
1487 		alloclen += sizeof (sadb_x_kmc_t);
1488 
1489 	/* Make sure the allocation length is a multiple of 8 bytes. */
1490 	ASSERT((alloclen & 0x7) == 0);
1491 
1492 	/* XXX Possibly make it esballoc, with a bzero-ing free_ftn. */
1493 	mp = allocb(alloclen, BPRI_HI);
1494 	if (mp == NULL)
1495 		return (NULL);
1496 
1497 	mp->b_wptr += alloclen;
1498 	end = mp->b_wptr;
1499 	newsamsg = (sadb_msg_t *)mp->b_rptr;
1500 	*newsamsg = *samsg;
1501 	newsamsg->sadb_msg_len = (uint16_t)SADB_8TO64(alloclen);
1502 
1503 	mutex_enter(&ipsa->ipsa_lock);	/* Since I'm grabbing SA fields... */
1504 
1505 	newsamsg->sadb_msg_satype = ipsa->ipsa_type;
1506 
1507 	assoc = (sadb_sa_t *)(newsamsg + 1);
1508 	assoc->sadb_sa_len = SADB_8TO64(sizeof (*assoc));
1509 	assoc->sadb_sa_exttype = SADB_EXT_SA;
1510 	assoc->sadb_sa_spi = ipsa->ipsa_spi;
1511 	assoc->sadb_sa_replay = ipsa->ipsa_replay_wsize;
1512 	assoc->sadb_sa_state = ipsa->ipsa_state;
1513 	assoc->sadb_sa_auth = ipsa->ipsa_auth_alg;
1514 	assoc->sadb_sa_encrypt = ipsa->ipsa_encr_alg;
1515 	assoc->sadb_sa_flags = ipsa->ipsa_flags;
1516 
1517 	lt = (sadb_lifetime_t *)(assoc + 1);
1518 	lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1519 	lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
1520 	/* We do not support the concept. */
1521 	lt->sadb_lifetime_allocations = 0;
1522 	lt->sadb_lifetime_bytes = ipsa->ipsa_bytes;
1523 	lt->sadb_lifetime_addtime = ipsa->ipsa_addtime;
1524 	lt->sadb_lifetime_usetime = ipsa->ipsa_usetime;
1525 
1526 	if (hard) {
1527 		lt++;
1528 		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1529 		lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
1530 		lt->sadb_lifetime_allocations = ipsa->ipsa_hardalloc;
1531 		lt->sadb_lifetime_bytes = ipsa->ipsa_hardbyteslt;
1532 		lt->sadb_lifetime_addtime = ipsa->ipsa_hardaddlt;
1533 		lt->sadb_lifetime_usetime = ipsa->ipsa_harduselt;
1534 	}
1535 
1536 	if (soft) {
1537 		lt++;
1538 		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1539 		lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
1540 		lt->sadb_lifetime_allocations = ipsa->ipsa_softalloc;
1541 		lt->sadb_lifetime_bytes = ipsa->ipsa_softbyteslt;
1542 		lt->sadb_lifetime_addtime = ipsa->ipsa_softaddlt;
1543 		lt->sadb_lifetime_usetime = ipsa->ipsa_softuselt;
1544 	}
1545 
1546 	cur = (uint8_t *)(lt + 1);
1547 
1548 	/* NOTE:  Don't fill in ports here if we are a tunnel-mode SA. */
1549 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, fam,
1550 	    ipsa->ipsa_srcaddr, (!isrc && !idst) ? SA_SRCPORT(ipsa) : 0,
1551 	    SA_PROTO(ipsa), 0);
1552 	if (cur == NULL) {
1553 		freemsg(mp);
1554 		mp = NULL;
1555 		goto bail;
1556 	}
1557 
1558 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, fam,
1559 	    ipsa->ipsa_dstaddr, (!isrc && !idst) ? SA_DSTPORT(ipsa) : 0,
1560 	    SA_PROTO(ipsa), 0);
1561 	if (cur == NULL) {
1562 		freemsg(mp);
1563 		mp = NULL;
1564 		goto bail;
1565 	}
1566 
1567 	if (ipsa->ipsa_flags & IPSA_F_NATT_LOC) {
1568 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_NATT_LOC,
1569 		    fam, &ipsa->ipsa_natt_addr_loc, ipsa->ipsa_local_nat_port,
1570 		    IPPROTO_UDP, 0);
1571 		if (cur == NULL) {
1572 			freemsg(mp);
1573 			mp = NULL;
1574 			goto bail;
1575 		}
1576 	}
1577 
1578 	if (ipsa->ipsa_flags & IPSA_F_NATT_REM) {
1579 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_NATT_REM,
1580 		    fam, &ipsa->ipsa_natt_addr_rem, ipsa->ipsa_remote_nat_port,
1581 		    IPPROTO_UDP, 0);
1582 		if (cur == NULL) {
1583 			freemsg(mp);
1584 			mp = NULL;
1585 			goto bail;
1586 		}
1587 	}
1588 
1589 	/* If we are a tunnel-mode SA, fill in the inner-selectors. */
1590 	if (isrc) {
1591 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
1592 		    pfam, ipsa->ipsa_innersrc, SA_SRCPORT(ipsa),
1593 		    SA_IPROTO(ipsa), ipsa->ipsa_innersrcpfx);
1594 		if (cur == NULL) {
1595 			freemsg(mp);
1596 			mp = NULL;
1597 			goto bail;
1598 		}
1599 	}
1600 
1601 	if (idst) {
1602 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
1603 		    pfam, ipsa->ipsa_innerdst, SA_DSTPORT(ipsa),
1604 		    SA_IPROTO(ipsa), ipsa->ipsa_innerdstpfx);
1605 		if (cur == NULL) {
1606 			freemsg(mp);
1607 			mp = NULL;
1608 			goto bail;
1609 		}
1610 	}
1611 
1612 	if ((ipsa->ipsa_kmp != 0) || (ipsa->ipsa_kmc != 0)) {
1613 		cur = sadb_make_kmc_ext(cur, end,
1614 		    ipsa->ipsa_kmp, ipsa->ipsa_kmc);
1615 		if (cur == NULL) {
1616 			freemsg(mp);
1617 			mp = NULL;
1618 			goto bail;
1619 		}
1620 	}
1621 
1622 	walker = (sadb_ext_t *)cur;
1623 	if (auth) {
1624 		key = (sadb_key_t *)walker;
1625 		key->sadb_key_len = SADB_8TO64(authsize);
1626 		key->sadb_key_exttype = SADB_EXT_KEY_AUTH;
1627 		key->sadb_key_bits = ipsa->ipsa_authkeybits;
1628 		key->sadb_key_reserved = 0;
1629 		bcopy(ipsa->ipsa_authkey, key + 1, ipsa->ipsa_authkeylen);
1630 		walker = (sadb_ext_t *)((uint64_t *)walker +
1631 		    walker->sadb_ext_len);
1632 	}
1633 
1634 	if (encr) {
1635 		key = (sadb_key_t *)walker;
1636 		key->sadb_key_len = SADB_8TO64(encrsize);
1637 		key->sadb_key_exttype = SADB_EXT_KEY_ENCRYPT;
1638 		key->sadb_key_bits = ipsa->ipsa_encrkeybits;
1639 		key->sadb_key_reserved = 0;
1640 		bcopy(ipsa->ipsa_encrkey, key + 1, ipsa->ipsa_encrkeylen);
1641 		walker = (sadb_ext_t *)((uint64_t *)walker +
1642 		    walker->sadb_ext_len);
1643 	}
1644 
1645 	if (srcid) {
1646 		ident = (sadb_ident_t *)walker;
1647 		ident->sadb_ident_len = SADB_8TO64(srcidsize);
1648 		ident->sadb_ident_exttype = SADB_EXT_IDENTITY_SRC;
1649 		ident->sadb_ident_type = ipsa->ipsa_src_cid->ipsid_type;
1650 		ident->sadb_ident_id = 0;
1651 		ident->sadb_ident_reserved = 0;
1652 		(void) strcpy((char *)(ident + 1),
1653 		    ipsa->ipsa_src_cid->ipsid_cid);
1654 		walker = (sadb_ext_t *)((uint64_t *)walker +
1655 		    walker->sadb_ext_len);
1656 	}
1657 
1658 	if (dstid) {
1659 		ident = (sadb_ident_t *)walker;
1660 		ident->sadb_ident_len = SADB_8TO64(dstidsize);
1661 		ident->sadb_ident_exttype = SADB_EXT_IDENTITY_DST;
1662 		ident->sadb_ident_type = ipsa->ipsa_dst_cid->ipsid_type;
1663 		ident->sadb_ident_id = 0;
1664 		ident->sadb_ident_reserved = 0;
1665 		(void) strcpy((char *)(ident + 1),
1666 		    ipsa->ipsa_dst_cid->ipsid_cid);
1667 		walker = (sadb_ext_t *)((uint64_t *)walker +
1668 		    walker->sadb_ext_len);
1669 	}
1670 
1671 	if (sensinteg) {
1672 		sens = (sadb_sens_t *)walker;
1673 		sens->sadb_sens_len = SADB_8TO64(sizeof (sadb_sens_t *) +
1674 		    ipsa->ipsa_senslen + ipsa->ipsa_integlen);
1675 		sens->sadb_sens_dpd = ipsa->ipsa_dpd;
1676 		sens->sadb_sens_sens_level = ipsa->ipsa_senslevel;
1677 		sens->sadb_sens_integ_level = ipsa->ipsa_integlevel;
1678 		sens->sadb_sens_sens_len = SADB_8TO64(ipsa->ipsa_senslen);
1679 		sens->sadb_sens_integ_len = SADB_8TO64(ipsa->ipsa_integlen);
1680 		sens->sadb_sens_reserved = 0;
1681 		bitmap = (uint64_t *)(sens + 1);
1682 		if (ipsa->ipsa_sens != NULL) {
1683 			bcopy(ipsa->ipsa_sens, bitmap, ipsa->ipsa_senslen);
1684 			bitmap += sens->sadb_sens_sens_len;
1685 		}
1686 		if (ipsa->ipsa_integ != NULL)
1687 			bcopy(ipsa->ipsa_integ, bitmap, ipsa->ipsa_integlen);
1688 		walker = (sadb_ext_t *)((uint64_t *)walker +
1689 		    walker->sadb_ext_len);
1690 	}
1691 
1692 	if (paired) {
1693 		pair_ext = (sadb_x_pair_t *)walker;
1694 
1695 		pair_ext->sadb_x_pair_len = SADB_8TO64(sizeof (sadb_x_pair_t));
1696 		pair_ext->sadb_x_pair_exttype = SADB_X_EXT_PAIR;
1697 		pair_ext->sadb_x_pair_spi = otherspi;
1698 
1699 		walker = (sadb_ext_t *)((uint64_t *)walker +
1700 		    walker->sadb_ext_len);
1701 	}
1702 
1703 bail:
1704 	/* Pardon any delays... */
1705 	mutex_exit(&ipsa->ipsa_lock);
1706 
1707 	return (mp);
1708 }
1709 
1710 /*
1711  * Strip out key headers or unmarked headers (SADB_EXT_KEY_*, SADB_EXT_UNKNOWN)
1712  * and adjust base message accordingly.
1713  *
1714  * Assume message is pulled up in one piece of contiguous memory.
1715  *
1716  * Say if we start off with:
1717  *
1718  * +------+----+-------------+-----------+---------------+---------------+
1719  * | base | SA | source addr | dest addr | rsrvd. or key | soft lifetime |
1720  * +------+----+-------------+-----------+---------------+---------------+
1721  *
1722  * we will end up with
1723  *
1724  * +------+----+-------------+-----------+---------------+
1725  * | base | SA | source addr | dest addr | soft lifetime |
1726  * +------+----+-------------+-----------+---------------+
1727  */
1728 static void
1729 sadb_strip(sadb_msg_t *samsg)
1730 {
1731 	sadb_ext_t *ext;
1732 	uint8_t *target = NULL;
1733 	uint8_t *msgend;
1734 	int sofar = SADB_8TO64(sizeof (*samsg));
1735 	int copylen;
1736 
1737 	ext = (sadb_ext_t *)(samsg + 1);
1738 	msgend = (uint8_t *)samsg;
1739 	msgend += SADB_64TO8(samsg->sadb_msg_len);
1740 	while ((uint8_t *)ext < msgend) {
1741 		if (ext->sadb_ext_type == SADB_EXT_RESERVED ||
1742 		    ext->sadb_ext_type == SADB_EXT_KEY_AUTH ||
1743 		    ext->sadb_ext_type == SADB_EXT_KEY_ENCRYPT) {
1744 			/*
1745 			 * Aha!	 I found a header to be erased.
1746 			 */
1747 
1748 			if (target != NULL) {
1749 				/*
1750 				 * If I had a previous header to be erased,
1751 				 * copy over it.  I can get away with just
1752 				 * copying backwards because the target will
1753 				 * always be 8 bytes behind the source.
1754 				 */
1755 				copylen = ((uint8_t *)ext) - (target +
1756 				    SADB_64TO8(
1757 				    ((sadb_ext_t *)target)->sadb_ext_len));
1758 				ovbcopy(((uint8_t *)ext - copylen), target,
1759 				    copylen);
1760 				target += copylen;
1761 				((sadb_ext_t *)target)->sadb_ext_len =
1762 				    SADB_8TO64(((uint8_t *)ext) - target +
1763 				    SADB_64TO8(ext->sadb_ext_len));
1764 			} else {
1765 				target = (uint8_t *)ext;
1766 			}
1767 		} else {
1768 			sofar += ext->sadb_ext_len;
1769 		}
1770 
1771 		ext = (sadb_ext_t *)(((uint64_t *)ext) + ext->sadb_ext_len);
1772 	}
1773 
1774 	ASSERT((uint8_t *)ext == msgend);
1775 
1776 	if (target != NULL) {
1777 		copylen = ((uint8_t *)ext) - (target +
1778 		    SADB_64TO8(((sadb_ext_t *)target)->sadb_ext_len));
1779 		if (copylen != 0)
1780 			ovbcopy(((uint8_t *)ext - copylen), target, copylen);
1781 	}
1782 
1783 	/* Adjust samsg. */
1784 	samsg->sadb_msg_len = (uint16_t)sofar;
1785 
1786 	/* Assume all of the rest is cleared by caller in sadb_pfkey_echo(). */
1787 }
1788 
1789 /*
1790  * AH needs to send an error to PF_KEY.	 Assume mp points to an M_CTL
1791  * followed by an M_DATA with a PF_KEY message in it.  The serial of
1792  * the sending keysock instance is included.
1793  */
1794 void
1795 sadb_pfkey_error(queue_t *pfkey_q, mblk_t *mp, int error, int diagnostic,
1796     uint_t serial)
1797 {
1798 	mblk_t *msg = mp->b_cont;
1799 	sadb_msg_t *samsg;
1800 	keysock_out_t *kso;
1801 
1802 	/*
1803 	 * Enough functions call this to merit a NULL queue check.
1804 	 */
1805 	if (pfkey_q == NULL) {
1806 		freemsg(mp);
1807 		return;
1808 	}
1809 
1810 	ASSERT(msg != NULL);
1811 	ASSERT((mp->b_wptr - mp->b_rptr) == sizeof (ipsec_info_t));
1812 	ASSERT((msg->b_wptr - msg->b_rptr) >= sizeof (sadb_msg_t));
1813 	samsg = (sadb_msg_t *)msg->b_rptr;
1814 	kso = (keysock_out_t *)mp->b_rptr;
1815 
1816 	kso->ks_out_type = KEYSOCK_OUT;
1817 	kso->ks_out_len = sizeof (*kso);
1818 	kso->ks_out_serial = serial;
1819 
1820 	/*
1821 	 * Only send the base message up in the event of an error.
1822 	 * Don't worry about bzero()-ing, because it was probably bogus
1823 	 * anyway.
1824 	 */
1825 	msg->b_wptr = msg->b_rptr + sizeof (*samsg);
1826 	samsg = (sadb_msg_t *)msg->b_rptr;
1827 	samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
1828 	samsg->sadb_msg_errno = (uint8_t)error;
1829 	if (diagnostic != SADB_X_DIAGNOSTIC_PRESET)
1830 		samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
1831 
1832 	putnext(pfkey_q, mp);
1833 }
1834 
1835 /*
1836  * Send a successful return packet back to keysock via the queue in pfkey_q.
1837  *
1838  * Often, an SA is associated with the reply message, it's passed in if needed,
1839  * and NULL if not.  BTW, that ipsa will have its refcnt appropriately held,
1840  * and the caller will release said refcnt.
1841  */
1842 void
1843 sadb_pfkey_echo(queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg,
1844     keysock_in_t *ksi, ipsa_t *ipsa)
1845 {
1846 	keysock_out_t *kso;
1847 	mblk_t *mp1;
1848 	sadb_msg_t *newsamsg;
1849 	uint8_t *oldend;
1850 
1851 	ASSERT((mp->b_cont != NULL) &&
1852 	    ((void *)samsg == (void *)mp->b_cont->b_rptr) &&
1853 	    ((void *)mp->b_rptr == (void *)ksi));
1854 
1855 	switch (samsg->sadb_msg_type) {
1856 	case SADB_ADD:
1857 	case SADB_UPDATE:
1858 	case SADB_X_UPDATEPAIR:
1859 	case SADB_FLUSH:
1860 	case SADB_DUMP:
1861 		/*
1862 		 * I have all of the message already.  I just need to strip
1863 		 * out the keying material and echo the message back.
1864 		 *
1865 		 * NOTE: for SADB_DUMP, the function sadb_dump() did the
1866 		 * work.  When DUMP reaches here, it should only be a base
1867 		 * message.
1868 		 */
1869 	justecho:
1870 		ASSERT(samsg->sadb_msg_type != SADB_DUMP ||
1871 		    samsg->sadb_msg_len == SADB_8TO64(sizeof (sadb_msg_t)));
1872 
1873 		if (ksi->ks_in_extv[SADB_EXT_KEY_AUTH] != NULL ||
1874 		    ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT] != NULL) {
1875 			sadb_strip(samsg);
1876 			/* Assume PF_KEY message is contiguous. */
1877 			ASSERT(mp->b_cont->b_cont == NULL);
1878 			oldend = mp->b_cont->b_wptr;
1879 			mp->b_cont->b_wptr = mp->b_cont->b_rptr +
1880 			    SADB_64TO8(samsg->sadb_msg_len);
1881 			bzero(mp->b_cont->b_wptr, oldend - mp->b_cont->b_wptr);
1882 		}
1883 		break;
1884 	case SADB_GET:
1885 		/*
1886 		 * Do a lot of work here, because of the ipsa I just found.
1887 		 * First construct the new PF_KEY message, then abandon
1888 		 * the old one.
1889 		 */
1890 		mp1 = sadb_sa2msg(ipsa, samsg);
1891 		if (mp1 == NULL) {
1892 			sadb_pfkey_error(pfkey_q, mp, ENOMEM,
1893 			    SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
1894 			return;
1895 		}
1896 		freemsg(mp->b_cont);
1897 		mp->b_cont = mp1;
1898 		break;
1899 	case SADB_DELETE:
1900 	case SADB_X_DELPAIR:
1901 		if (ipsa == NULL)
1902 			goto justecho;
1903 		/*
1904 		 * Because listening KMds may require more info, treat
1905 		 * DELETE like a special case of GET.
1906 		 */
1907 		mp1 = sadb_sa2msg(ipsa, samsg);
1908 		if (mp1 == NULL) {
1909 			sadb_pfkey_error(pfkey_q, mp, ENOMEM,
1910 			    SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
1911 			return;
1912 		}
1913 		newsamsg = (sadb_msg_t *)mp1->b_rptr;
1914 		sadb_strip(newsamsg);
1915 		oldend = mp1->b_wptr;
1916 		mp1->b_wptr = mp1->b_rptr + SADB_64TO8(newsamsg->sadb_msg_len);
1917 		bzero(mp1->b_wptr, oldend - mp1->b_wptr);
1918 		freemsg(mp->b_cont);
1919 		mp->b_cont = mp1;
1920 		break;
1921 	default:
1922 		if (mp != NULL)
1923 			freemsg(mp);
1924 		return;
1925 	}
1926 
1927 	/* ksi is now null and void. */
1928 	kso = (keysock_out_t *)ksi;
1929 	kso->ks_out_type = KEYSOCK_OUT;
1930 	kso->ks_out_len = sizeof (*kso);
1931 	kso->ks_out_serial = ksi->ks_in_serial;
1932 	/* We're ready to send... */
1933 	putnext(pfkey_q, mp);
1934 }
1935 
1936 /*
1937  * Set up a global pfkey_q instance for AH, ESP, or some other consumer.
1938  */
1939 void
1940 sadb_keysock_hello(queue_t **pfkey_qp, queue_t *q, mblk_t *mp,
1941     void (*ager)(void *), void *agerarg, timeout_id_t *top, int satype)
1942 {
1943 	keysock_hello_ack_t *kha;
1944 	queue_t *oldq;
1945 
1946 	ASSERT(OTHERQ(q) != NULL);
1947 
1948 	/*
1949 	 * First, check atomically that I'm the first and only keysock
1950 	 * instance.
1951 	 *
1952 	 * Use OTHERQ(q), because qreply(q, mp) == putnext(OTHERQ(q), mp),
1953 	 * and I want this module to say putnext(*_pfkey_q, mp) for PF_KEY
1954 	 * messages.
1955 	 */
1956 
1957 	oldq = casptr((void **)pfkey_qp, NULL, OTHERQ(q));
1958 	if (oldq != NULL) {
1959 		ASSERT(oldq != q);
1960 		cmn_err(CE_WARN, "Danger!  Multiple keysocks on top of %s.\n",
1961 		    (satype == SADB_SATYPE_ESP)? "ESP" : "AH or other");
1962 		freemsg(mp);
1963 		return;
1964 	}
1965 
1966 	kha = (keysock_hello_ack_t *)mp->b_rptr;
1967 	kha->ks_hello_len = sizeof (keysock_hello_ack_t);
1968 	kha->ks_hello_type = KEYSOCK_HELLO_ACK;
1969 	kha->ks_hello_satype = (uint8_t)satype;
1970 
1971 	/*
1972 	 * If we made it past the casptr, then we have "exclusive" access
1973 	 * to the timeout handle.  Fire it off in 4 seconds, because it
1974 	 * just seems like a good interval.
1975 	 */
1976 	*top = qtimeout(*pfkey_qp, ager, agerarg, drv_usectohz(4000000));
1977 
1978 	putnext(*pfkey_qp, mp);
1979 }
1980 
1981 /*
1982  * Normalize IPv4-mapped IPv6 addresses (and prefixes) as appropriate.
1983  *
1984  * Check addresses themselves for wildcard or multicast.
1985  * Check ire table for local/non-local/broadcast.
1986  */
1987 int
1988 sadb_addrcheck(queue_t *pfkey_q, mblk_t *mp, sadb_ext_t *ext, uint_t serial,
1989     netstack_t *ns)
1990 {
1991 	sadb_address_t *addr = (sadb_address_t *)ext;
1992 	struct sockaddr_in *sin;
1993 	struct sockaddr_in6 *sin6;
1994 	ire_t *ire;
1995 	int diagnostic, type;
1996 	boolean_t normalized = B_FALSE;
1997 
1998 	ASSERT(ext != NULL);
1999 	ASSERT((ext->sadb_ext_type == SADB_EXT_ADDRESS_SRC) ||
2000 	    (ext->sadb_ext_type == SADB_EXT_ADDRESS_DST) ||
2001 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC) ||
2002 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_DST) ||
2003 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_LOC) ||
2004 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_REM));
2005 
2006 	/* Assign both sockaddrs, the compiler will do the right thing. */
2007 	sin = (struct sockaddr_in *)(addr + 1);
2008 	sin6 = (struct sockaddr_in6 *)(addr + 1);
2009 
2010 	if (sin6->sin6_family == AF_INET6) {
2011 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
2012 			/*
2013 			 * Convert to an AF_INET sockaddr.  This means the
2014 			 * return messages will have the extra space, but have
2015 			 * AF_INET sockaddrs instead of AF_INET6.
2016 			 *
2017 			 * Yes, RFC 2367 isn't clear on what to do here w.r.t.
2018 			 * mapped addresses, but since AF_INET6 ::ffff:<v4> is
2019 			 * equal to AF_INET <v4>, it shouldnt be a huge
2020 			 * problem.
2021 			 */
2022 			sin->sin_family = AF_INET;
2023 			IN6_V4MAPPED_TO_INADDR(&sin6->sin6_addr,
2024 			    &sin->sin_addr);
2025 			bzero(&sin->sin_zero, sizeof (sin->sin_zero));
2026 			normalized = B_TRUE;
2027 		}
2028 	} else if (sin->sin_family != AF_INET) {
2029 		switch (ext->sadb_ext_type) {
2030 		case SADB_EXT_ADDRESS_SRC:
2031 			diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC_AF;
2032 			break;
2033 		case SADB_EXT_ADDRESS_DST:
2034 			diagnostic = SADB_X_DIAGNOSTIC_BAD_DST_AF;
2035 			break;
2036 		case SADB_X_EXT_ADDRESS_INNER_SRC:
2037 			diagnostic = SADB_X_DIAGNOSTIC_BAD_PROXY_AF;
2038 			break;
2039 		case SADB_X_EXT_ADDRESS_INNER_DST:
2040 			diagnostic = SADB_X_DIAGNOSTIC_BAD_INNER_DST_AF;
2041 			break;
2042 		case SADB_X_EXT_ADDRESS_NATT_LOC:
2043 			diagnostic = SADB_X_DIAGNOSTIC_BAD_NATT_LOC_AF;
2044 			break;
2045 		case SADB_X_EXT_ADDRESS_NATT_REM:
2046 			diagnostic = SADB_X_DIAGNOSTIC_BAD_NATT_REM_AF;
2047 			break;
2048 			/* There is no default, see above ASSERT. */
2049 		}
2050 bail:
2051 		if (pfkey_q != NULL) {
2052 			sadb_pfkey_error(pfkey_q, mp, EINVAL, diagnostic,
2053 			    serial);
2054 		} else {
2055 			/*
2056 			 * Scribble in sadb_msg that we got passed in.
2057 			 * Overload "mp" to be an sadb_msg pointer.
2058 			 */
2059 			sadb_msg_t *samsg = (sadb_msg_t *)mp;
2060 
2061 			samsg->sadb_msg_errno = EINVAL;
2062 			samsg->sadb_x_msg_diagnostic = diagnostic;
2063 		}
2064 		return (KS_IN_ADDR_UNKNOWN);
2065 	}
2066 
2067 	if (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC ||
2068 	    ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_DST) {
2069 		/*
2070 		 * We need only check for prefix issues.
2071 		 */
2072 
2073 		/* Set diagnostic now, in case we need it later. */
2074 		diagnostic =
2075 		    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC) ?
2076 		    SADB_X_DIAGNOSTIC_PREFIX_INNER_SRC :
2077 		    SADB_X_DIAGNOSTIC_PREFIX_INNER_DST;
2078 
2079 		if (normalized)
2080 			addr->sadb_address_prefixlen -= 96;
2081 
2082 		/*
2083 		 * Verify and mask out inner-addresses based on prefix length.
2084 		 */
2085 		if (sin->sin_family == AF_INET) {
2086 			if (addr->sadb_address_prefixlen > 32)
2087 				goto bail;
2088 			sin->sin_addr.s_addr &=
2089 			    ip_plen_to_mask(addr->sadb_address_prefixlen);
2090 		} else {
2091 			in6_addr_t mask;
2092 
2093 			ASSERT(sin->sin_family == AF_INET6);
2094 			/*
2095 			 * ip_plen_to_mask_v6() returns NULL if the value in
2096 			 * question is out of range.
2097 			 */
2098 			if (ip_plen_to_mask_v6(addr->sadb_address_prefixlen,
2099 			    &mask) == NULL)
2100 				goto bail;
2101 			sin6->sin6_addr.s6_addr32[0] &= mask.s6_addr32[0];
2102 			sin6->sin6_addr.s6_addr32[1] &= mask.s6_addr32[1];
2103 			sin6->sin6_addr.s6_addr32[2] &= mask.s6_addr32[2];
2104 			sin6->sin6_addr.s6_addr32[3] &= mask.s6_addr32[3];
2105 		}
2106 
2107 		/* We don't care in these cases. */
2108 		return (KS_IN_ADDR_DONTCARE);
2109 	}
2110 
2111 	if (sin->sin_family == AF_INET6) {
2112 		/* Check the easy ones now. */
2113 		if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
2114 			return (KS_IN_ADDR_MBCAST);
2115 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
2116 			return (KS_IN_ADDR_UNSPEC);
2117 		/*
2118 		 * At this point, we're a unicast IPv6 address.
2119 		 *
2120 		 * A ctable lookup for local is sufficient here.  If we're
2121 		 * local, return KS_IN_ADDR_ME, otherwise KS_IN_ADDR_NOTME.
2122 		 *
2123 		 * XXX Zones alert -> me/notme decision needs to be tempered
2124 		 * by what zone we're in when we go to zone-aware IPsec.
2125 		 */
2126 		ire = ire_ctable_lookup_v6(&sin6->sin6_addr, NULL,
2127 		    IRE_LOCAL, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE,
2128 		    ns->netstack_ip);
2129 		if (ire != NULL) {
2130 			/* Hey hey, it's local. */
2131 			IRE_REFRELE(ire);
2132 			return (KS_IN_ADDR_ME);
2133 		}
2134 	} else {
2135 		ASSERT(sin->sin_family == AF_INET);
2136 		if (sin->sin_addr.s_addr == INADDR_ANY)
2137 			return (KS_IN_ADDR_UNSPEC);
2138 		if (CLASSD(sin->sin_addr.s_addr))
2139 			return (KS_IN_ADDR_MBCAST);
2140 		/*
2141 		 * At this point we're a unicast or broadcast IPv4 address.
2142 		 *
2143 		 * Lookup on the ctable for IRE_BROADCAST or IRE_LOCAL.
2144 		 * A NULL return value is NOTME, otherwise, look at the
2145 		 * returned ire for broadcast or not and return accordingly.
2146 		 *
2147 		 * XXX Zones alert -> me/notme decision needs to be tempered
2148 		 * by what zone we're in when we go to zone-aware IPsec.
2149 		 */
2150 		ire = ire_ctable_lookup(sin->sin_addr.s_addr, 0,
2151 		    IRE_LOCAL | IRE_BROADCAST, NULL, ALL_ZONES, NULL,
2152 		    MATCH_IRE_TYPE, ns->netstack_ip);
2153 		if (ire != NULL) {
2154 			/* Check for local or broadcast */
2155 			type = ire->ire_type;
2156 			IRE_REFRELE(ire);
2157 			ASSERT(type == IRE_LOCAL || type == IRE_BROADCAST);
2158 			return ((type == IRE_LOCAL) ? KS_IN_ADDR_ME :
2159 			    KS_IN_ADDR_MBCAST);
2160 		}
2161 	}
2162 
2163 	return (KS_IN_ADDR_NOTME);
2164 }
2165 
2166 /*
2167  * Address normalizations and reality checks for inbound PF_KEY messages.
2168  *
2169  * For the case of src == unspecified AF_INET6, and dst == AF_INET, convert
2170  * the source to AF_INET.  Do the same for the inner sources.
2171  */
2172 boolean_t
2173 sadb_addrfix(keysock_in_t *ksi, queue_t *pfkey_q, mblk_t *mp, netstack_t *ns)
2174 {
2175 	struct sockaddr_in *src, *isrc;
2176 	struct sockaddr_in6 *dst, *idst;
2177 	sadb_address_t *srcext, *dstext;
2178 	uint16_t sport;
2179 	sadb_ext_t **extv = ksi->ks_in_extv;
2180 	int rc;
2181 
2182 	if (extv[SADB_EXT_ADDRESS_SRC] != NULL) {
2183 		rc = sadb_addrcheck(pfkey_q, mp, extv[SADB_EXT_ADDRESS_SRC],
2184 		    ksi->ks_in_serial, ns);
2185 		if (rc == KS_IN_ADDR_UNKNOWN)
2186 			return (B_FALSE);
2187 		if (rc == KS_IN_ADDR_MBCAST) {
2188 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2189 			    SADB_X_DIAGNOSTIC_BAD_SRC, ksi->ks_in_serial);
2190 			return (B_FALSE);
2191 		}
2192 		ksi->ks_in_srctype = rc;
2193 	}
2194 
2195 	if (extv[SADB_EXT_ADDRESS_DST] != NULL) {
2196 		rc = sadb_addrcheck(pfkey_q, mp, extv[SADB_EXT_ADDRESS_DST],
2197 		    ksi->ks_in_serial, ns);
2198 		if (rc == KS_IN_ADDR_UNKNOWN)
2199 			return (B_FALSE);
2200 		if (rc == KS_IN_ADDR_UNSPEC) {
2201 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2202 			    SADB_X_DIAGNOSTIC_BAD_DST, ksi->ks_in_serial);
2203 			return (B_FALSE);
2204 		}
2205 		ksi->ks_in_dsttype = rc;
2206 	}
2207 
2208 	/*
2209 	 * NAT-Traversal addrs are simple enough to not require all of
2210 	 * the checks in sadb_addrcheck().  Just normalize or reject if not
2211 	 * AF_INET.
2212 	 */
2213 	if (extv[SADB_X_EXT_ADDRESS_NATT_LOC] != NULL) {
2214 		rc = sadb_addrcheck(pfkey_q, mp,
2215 		    extv[SADB_X_EXT_ADDRESS_NATT_LOC], ksi->ks_in_serial, ns);
2216 
2217 		/*
2218 		 * Local NAT-T addresses never use an IRE_LOCAL, so it should
2219 		 * always be NOTME, or UNSPEC (to handle both tunnel mode
2220 		 * AND local-port flexibility).
2221 		 */
2222 		if (rc != KS_IN_ADDR_NOTME && rc != KS_IN_ADDR_UNSPEC) {
2223 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2224 			    SADB_X_DIAGNOSTIC_MALFORMED_NATT_LOC,
2225 			    ksi->ks_in_serial);
2226 			return (B_FALSE);
2227 		}
2228 		src = (struct sockaddr_in *)
2229 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_NATT_LOC]) + 1);
2230 		if (src->sin_family != AF_INET) {
2231 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2232 			    SADB_X_DIAGNOSTIC_BAD_NATT_LOC_AF,
2233 			    ksi->ks_in_serial);
2234 			return (B_FALSE);
2235 		}
2236 	}
2237 
2238 	if (extv[SADB_X_EXT_ADDRESS_NATT_REM] != NULL) {
2239 		rc = sadb_addrcheck(pfkey_q, mp,
2240 		    extv[SADB_X_EXT_ADDRESS_NATT_REM], ksi->ks_in_serial, ns);
2241 
2242 		/*
2243 		 * Remote NAT-T addresses never use an IRE_LOCAL, so it should
2244 		 * always be NOTME, or UNSPEC if it's a tunnel-mode SA.
2245 		 */
2246 		if (rc != KS_IN_ADDR_NOTME &&
2247 		    !(extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL &&
2248 		    rc == KS_IN_ADDR_UNSPEC)) {
2249 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2250 			    SADB_X_DIAGNOSTIC_MALFORMED_NATT_REM,
2251 			    ksi->ks_in_serial);
2252 			return (B_FALSE);
2253 		}
2254 		src = (struct sockaddr_in *)
2255 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_NATT_REM]) + 1);
2256 		if (src->sin_family != AF_INET) {
2257 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2258 			    SADB_X_DIAGNOSTIC_BAD_NATT_REM_AF,
2259 			    ksi->ks_in_serial);
2260 			return (B_FALSE);
2261 		}
2262 	}
2263 
2264 	if (extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL) {
2265 		if (extv[SADB_X_EXT_ADDRESS_INNER_DST] == NULL) {
2266 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2267 			    SADB_X_DIAGNOSTIC_MISSING_INNER_DST,
2268 			    ksi->ks_in_serial);
2269 			return (B_FALSE);
2270 		}
2271 
2272 		if (sadb_addrcheck(pfkey_q, mp,
2273 		    extv[SADB_X_EXT_ADDRESS_INNER_DST], ksi->ks_in_serial, ns)
2274 		    == KS_IN_ADDR_UNKNOWN ||
2275 		    sadb_addrcheck(pfkey_q, mp,
2276 		    extv[SADB_X_EXT_ADDRESS_INNER_SRC], ksi->ks_in_serial, ns)
2277 		    == KS_IN_ADDR_UNKNOWN)
2278 			return (B_FALSE);
2279 
2280 		isrc = (struct sockaddr_in *)
2281 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_SRC]) +
2282 		    1);
2283 		idst = (struct sockaddr_in6 *)
2284 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_DST]) +
2285 		    1);
2286 		if (isrc->sin_family != idst->sin6_family) {
2287 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2288 			    SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH,
2289 			    ksi->ks_in_serial);
2290 			return (B_FALSE);
2291 		}
2292 	} else if (extv[SADB_X_EXT_ADDRESS_INNER_DST] != NULL) {
2293 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2294 			    SADB_X_DIAGNOSTIC_MISSING_INNER_SRC,
2295 			    ksi->ks_in_serial);
2296 			return (B_FALSE);
2297 	} else {
2298 		isrc = NULL;	/* For inner/outer port check below. */
2299 	}
2300 
2301 	dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST];
2302 	srcext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_SRC];
2303 
2304 	if (dstext == NULL || srcext == NULL)
2305 		return (B_TRUE);
2306 
2307 	dst = (struct sockaddr_in6 *)(dstext + 1);
2308 	src = (struct sockaddr_in *)(srcext + 1);
2309 
2310 	if (isrc != NULL &&
2311 	    (isrc->sin_port != 0 || idst->sin6_port != 0) &&
2312 	    (src->sin_port != 0 || dst->sin6_port != 0)) {
2313 		/* Can't set inner and outer ports in one SA. */
2314 		sadb_pfkey_error(pfkey_q, mp, EINVAL,
2315 		    SADB_X_DIAGNOSTIC_DUAL_PORT_SETS,
2316 		    ksi->ks_in_serial);
2317 		return (B_FALSE);
2318 	}
2319 
2320 	if (dst->sin6_family == src->sin_family)
2321 		return (B_TRUE);
2322 
2323 	if (srcext->sadb_address_proto != dstext->sadb_address_proto) {
2324 		if (srcext->sadb_address_proto == 0) {
2325 			srcext->sadb_address_proto = dstext->sadb_address_proto;
2326 		} else if (dstext->sadb_address_proto == 0) {
2327 			dstext->sadb_address_proto = srcext->sadb_address_proto;
2328 		} else {
2329 			/* Inequal protocols, neither were 0.  Report error. */
2330 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2331 			    SADB_X_DIAGNOSTIC_PROTO_MISMATCH,
2332 			    ksi->ks_in_serial);
2333 			return (B_FALSE);
2334 		}
2335 	}
2336 
2337 	/*
2338 	 * With the exception of an unspec IPv6 source and an IPv4
2339 	 * destination, address families MUST me matched.
2340 	 */
2341 	if (src->sin_family == AF_INET ||
2342 	    ksi->ks_in_srctype != KS_IN_ADDR_UNSPEC) {
2343 		sadb_pfkey_error(pfkey_q, mp, EINVAL,
2344 		    SADB_X_DIAGNOSTIC_AF_MISMATCH, ksi->ks_in_serial);
2345 		return (B_FALSE);
2346 	}
2347 
2348 	/*
2349 	 * Convert "src" to AF_INET INADDR_ANY.  We rely on sin_port being
2350 	 * in the same place for sockaddr_in and sockaddr_in6.
2351 	 */
2352 	sport = src->sin_port;
2353 	bzero(src, sizeof (*src));
2354 	src->sin_family = AF_INET;
2355 	src->sin_port = sport;
2356 
2357 	return (B_TRUE);
2358 }
2359 
2360 /*
2361  * Set the results in "addrtype", given an IRE as requested by
2362  * sadb_addrcheck().
2363  */
2364 int
2365 sadb_addrset(ire_t *ire)
2366 {
2367 	if ((ire->ire_type & IRE_BROADCAST) ||
2368 	    (ire->ire_ipversion == IPV4_VERSION && CLASSD(ire->ire_addr)) ||
2369 	    (ire->ire_ipversion == IPV6_VERSION &&
2370 	    IN6_IS_ADDR_MULTICAST(&(ire->ire_addr_v6))))
2371 		return (KS_IN_ADDR_MBCAST);
2372 	if (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK))
2373 		return (KS_IN_ADDR_ME);
2374 	return (KS_IN_ADDR_NOTME);
2375 }
2376 
2377 
2378 /*
2379  * Walker callback function to delete sa's based on src/dst address.
2380  * Assumes that we're called with *head locked, no other locks held;
2381  * Conveniently, and not coincidentally, this is both what sadb_walker
2382  * gives us and also what sadb_unlinkassoc expects.
2383  */
2384 
2385 struct sadb_purge_state
2386 {
2387 	uint32_t *src;
2388 	uint32_t *dst;
2389 	sa_family_t af;
2390 	boolean_t inbnd;
2391 	char *sidstr;
2392 	char *didstr;
2393 	uint16_t sidtype;
2394 	uint16_t didtype;
2395 	uint32_t kmproto;
2396 	mblk_t *mq;
2397 };
2398 
2399 static void
2400 sadb_purge_cb(isaf_t *head, ipsa_t *entry, void *cookie)
2401 {
2402 	struct sadb_purge_state *ps = (struct sadb_purge_state *)cookie;
2403 
2404 	ASSERT(MUTEX_HELD(&head->isaf_lock));
2405 
2406 	mutex_enter(&entry->ipsa_lock);
2407 
2408 	if ((entry->ipsa_state == IPSA_STATE_LARVAL) ||
2409 	    (ps->src != NULL &&
2410 	    !IPSA_ARE_ADDR_EQUAL(entry->ipsa_srcaddr, ps->src, ps->af)) ||
2411 	    (ps->dst != NULL &&
2412 	    !IPSA_ARE_ADDR_EQUAL(entry->ipsa_dstaddr, ps->dst, ps->af)) ||
2413 	    (ps->didstr != NULL && (entry->ipsa_dst_cid != NULL) &&
2414 	    !(ps->didtype == entry->ipsa_dst_cid->ipsid_type &&
2415 	    strcmp(ps->didstr, entry->ipsa_dst_cid->ipsid_cid) == 0)) ||
2416 	    (ps->sidstr != NULL && (entry->ipsa_src_cid != NULL) &&
2417 	    !(ps->sidtype == entry->ipsa_src_cid->ipsid_type &&
2418 	    strcmp(ps->sidstr, entry->ipsa_src_cid->ipsid_cid) == 0)) ||
2419 	    (ps->kmproto <= SADB_X_KMP_MAX && ps->kmproto != entry->ipsa_kmp)) {
2420 		mutex_exit(&entry->ipsa_lock);
2421 		return;
2422 	}
2423 
2424 	entry->ipsa_state = IPSA_STATE_DEAD;
2425 	(void) sadb_torch_assoc(head, entry, ps->inbnd, &ps->mq);
2426 }
2427 
2428 /*
2429  * Common code to purge an SA with a matching src or dst address.
2430  * Don't kill larval SA's in such a purge.
2431  */
2432 int
2433 sadb_purge_sa(mblk_t *mp, keysock_in_t *ksi, sadb_t *sp, queue_t *pfkey_q,
2434     queue_t *ip_q)
2435 {
2436 	sadb_address_t *dstext =
2437 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2438 	sadb_address_t *srcext =
2439 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2440 	sadb_ident_t *dstid =
2441 	    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_DST];
2442 	sadb_ident_t *srcid =
2443 	    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC];
2444 	sadb_x_kmc_t *kmc =
2445 	    (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
2446 	struct sockaddr_in *src, *dst;
2447 	struct sockaddr_in6 *src6, *dst6;
2448 	struct sadb_purge_state ps;
2449 
2450 	/*
2451 	 * Don't worry about IPv6 v4-mapped addresses, sadb_addrcheck()
2452 	 * takes care of them.
2453 	 */
2454 
2455 	/* enforced by caller */
2456 	ASSERT((dstext != NULL) || (srcext != NULL));
2457 
2458 	ps.src = NULL;
2459 	ps.dst = NULL;
2460 #ifdef DEBUG
2461 	ps.af = (sa_family_t)-1;
2462 #endif
2463 	ps.mq = NULL;
2464 	ps.sidstr = NULL;
2465 	ps.didstr = NULL;
2466 	ps.kmproto = SADB_X_KMP_MAX + 1;
2467 
2468 	if (dstext != NULL) {
2469 		dst = (struct sockaddr_in *)(dstext + 1);
2470 		ps.af = dst->sin_family;
2471 		if (dst->sin_family == AF_INET6) {
2472 			dst6 = (struct sockaddr_in6 *)dst;
2473 			ps.dst = (uint32_t *)&dst6->sin6_addr;
2474 		} else {
2475 			ps.dst = (uint32_t *)&dst->sin_addr;
2476 		}
2477 	}
2478 
2479 	if (srcext != NULL) {
2480 		src = (struct sockaddr_in *)(srcext + 1);
2481 		ps.af = src->sin_family;
2482 		if (src->sin_family == AF_INET6) {
2483 			src6 = (struct sockaddr_in6 *)(srcext + 1);
2484 			ps.src = (uint32_t *)&src6->sin6_addr;
2485 		} else {
2486 			ps.src = (uint32_t *)&src->sin_addr;
2487 		}
2488 		ASSERT(dstext == NULL || src->sin_family == dst->sin_family);
2489 	}
2490 
2491 	ASSERT(ps.af != (sa_family_t)-1);
2492 
2493 	if (dstid != NULL) {
2494 		/*
2495 		 * NOTE:  May need to copy string in the future
2496 		 * if the inbound keysock message disappears for some strange
2497 		 * reason.
2498 		 */
2499 		ps.didstr = (char *)(dstid + 1);
2500 		ps.didtype = dstid->sadb_ident_type;
2501 	}
2502 
2503 	if (srcid != NULL) {
2504 		/*
2505 		 * NOTE:  May need to copy string in the future
2506 		 * if the inbound keysock message disappears for some strange
2507 		 * reason.
2508 		 */
2509 		ps.sidstr = (char *)(srcid + 1);
2510 		ps.sidtype = srcid->sadb_ident_type;
2511 	}
2512 
2513 	if (kmc != NULL)
2514 		ps.kmproto = kmc->sadb_x_kmc_proto;
2515 
2516 	/*
2517 	 * This is simple, crude, and effective.
2518 	 * Unimplemented optimizations (TBD):
2519 	 * - we can limit how many places we search based on where we
2520 	 * think the SA is filed.
2521 	 * - if we get a dst address, we can hash based on dst addr to find
2522 	 * the correct bucket in the outbound table.
2523 	 */
2524 	ps.inbnd = B_TRUE;
2525 	sadb_walker(sp->sdb_if, sp->sdb_hashsize, sadb_purge_cb, &ps);
2526 	ps.inbnd = B_FALSE;
2527 	sadb_walker(sp->sdb_of, sp->sdb_hashsize, sadb_purge_cb, &ps);
2528 
2529 	if (ps.mq != NULL)
2530 		sadb_drain_torchq(ip_q, ps.mq);
2531 
2532 	ASSERT(mp->b_cont != NULL);
2533 	sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi,
2534 	    NULL);
2535 	return (0);
2536 }
2537 
2538 /*
2539  * Common code to delete/get an SA.
2540  */
2541 int
2542 sadb_delget_sa(mblk_t *mp, keysock_in_t *ksi, sadbp_t *spp,
2543     int *diagnostic, queue_t *pfkey_q, uint8_t sadb_msg_type)
2544 {
2545 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
2546 	sadb_address_t *srcext =
2547 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2548 	sadb_address_t *dstext =
2549 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2550 	ipsa_t *echo_target = NULL;
2551 	ipsap_t *ipsapp;
2552 	mblk_t *torchq = NULL;
2553 	uint_t	error = 0;
2554 
2555 	if (dstext == NULL) {
2556 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
2557 		return (EINVAL);
2558 	}
2559 	if (assoc == NULL) {
2560 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
2561 		return (EINVAL);
2562 	}
2563 
2564 	ipsapp = get_ipsa_pair(assoc, srcext, dstext, spp);
2565 	if (ipsapp == NULL) {
2566 		*diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND;
2567 		return (ESRCH);
2568 	}
2569 
2570 	echo_target = ipsapp->ipsap_sa_ptr;
2571 	if (echo_target == NULL)
2572 		echo_target = ipsapp->ipsap_psa_ptr;
2573 
2574 	if (sadb_msg_type == SADB_DELETE || sadb_msg_type == SADB_X_DELPAIR) {
2575 		/*
2576 		 * Bucket locks will be required if SA is actually unlinked.
2577 		 * get_ipsa_pair() returns valid hash bucket pointers even
2578 		 * if it can't find a pair SA pointer.
2579 		 */
2580 		mutex_enter(&ipsapp->ipsap_bucket->isaf_lock);
2581 		mutex_enter(&ipsapp->ipsap_pbucket->isaf_lock);
2582 
2583 		if (ipsapp->ipsap_sa_ptr != NULL) {
2584 			mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2585 			ipsapp->ipsap_sa_ptr->ipsa_state = IPSA_STATE_DEAD;
2586 			(void) sadb_torch_assoc(ipsapp->ipsap_bucket,
2587 			    ipsapp->ipsap_sa_ptr, B_FALSE, &torchq);
2588 			/*
2589 			 * sadb_torch_assoc() releases the ipsa_lock
2590 			 * and calls sadb_unlinkassoc() which does a
2591 			 * IPSA_REFRELE.
2592 			 */
2593 		}
2594 		if (ipsapp->ipsap_psa_ptr != NULL) {
2595 			mutex_enter(&ipsapp->ipsap_psa_ptr->ipsa_lock);
2596 			if (sadb_msg_type == SADB_X_DELPAIR) {
2597 				ipsapp->ipsap_psa_ptr->ipsa_state =
2598 				    IPSA_STATE_DEAD;
2599 				(void) sadb_torch_assoc(ipsapp->ipsap_pbucket,
2600 				    ipsapp->ipsap_psa_ptr, B_FALSE, &torchq);
2601 			} else {
2602 				/*
2603 				 * Only half of the "pair" has been deleted.
2604 				 * Update the remaining SA and remove references
2605 				 * to its pair SA, which is now gone.
2606 				 */
2607 				ipsapp->ipsap_psa_ptr->ipsa_otherspi = 0;
2608 				ipsapp->ipsap_psa_ptr->ipsa_flags &=
2609 				    ~IPSA_F_PAIRED;
2610 				mutex_exit(&ipsapp->ipsap_psa_ptr->ipsa_lock);
2611 			}
2612 		} else if (sadb_msg_type == SADB_X_DELPAIR) {
2613 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
2614 			error = ESRCH;
2615 		}
2616 		mutex_exit(&ipsapp->ipsap_bucket->isaf_lock);
2617 		mutex_exit(&ipsapp->ipsap_pbucket->isaf_lock);
2618 	}
2619 
2620 	if (torchq != NULL)
2621 		sadb_drain_torchq(spp->s_ip_q, torchq);
2622 
2623 	ASSERT(mp->b_cont != NULL);
2624 
2625 	if (error == 0)
2626 		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)
2627 		    mp->b_cont->b_rptr, ksi, echo_target);
2628 
2629 	destroy_ipsa_pair(ipsapp);
2630 
2631 	return (error);
2632 }
2633 
2634 /*
2635  * This function takes a sadb_sa_t and finds the ipsa_t structure
2636  * and the isaf_t (hash bucket) that its stored under. If the security
2637  * association has a peer, the ipsa_t structure and bucket for that security
2638  * association are also searched for. The "pair" of ipsa_t's and isaf_t's
2639  * are returned as a ipsap_t.
2640  *
2641  * Note that a "pair" is defined as one (but not both) of the following:
2642  *
2643  * A security association which has a soft reference to another security
2644  * association via its SPI.
2645  *
2646  * A security association that is not obviously "inbound" or "outbound" so
2647  * it appears in both hash tables, the "peer" being the same security
2648  * association in the other hash table.
2649  *
2650  * This function will return NULL if the ipsa_t can't be found in the
2651  * inbound or outbound  hash tables (not found). If only one ipsa_t is
2652  * found, the pair ipsa_t will be NULL. Both isaf_t values are valid
2653  * provided at least one ipsa_t is found.
2654  */
2655 ipsap_t *
2656 get_ipsa_pair(sadb_sa_t *assoc, sadb_address_t *srcext, sadb_address_t *dstext,
2657     sadbp_t *spp)
2658 {
2659 	struct sockaddr_in *src, *dst;
2660 	struct sockaddr_in6 *src6, *dst6;
2661 	sadb_t *sp;
2662 	uint32_t *srcaddr, *dstaddr;
2663 	isaf_t *outbound_bucket, *inbound_bucket;
2664 	boolean_t in_inbound_table = B_FALSE;
2665 	ipsap_t *ipsapp;
2666 	sa_family_t af;
2667 
2668 	uint32_t pair_srcaddr[IPSA_MAX_ADDRLEN];
2669 	uint32_t pair_dstaddr[IPSA_MAX_ADDRLEN];
2670 	uint32_t pair_spi;
2671 
2672 	ipsapp = kmem_zalloc(sizeof (*ipsapp), KM_NOSLEEP);
2673 	if (ipsapp == NULL)
2674 		return (NULL);
2675 
2676 	/*
2677 	 * Don't worry about IPv6 v4-mapped addresses, sadb_addrcheck()
2678 	 * takes care of them.
2679 	 */
2680 
2681 	dst = (struct sockaddr_in *)(dstext + 1);
2682 	af = dst->sin_family;
2683 	if (af == AF_INET6) {
2684 		sp = &spp->s_v6;
2685 		dst6 = (struct sockaddr_in6 *)dst;
2686 		dstaddr = (uint32_t *)&dst6->sin6_addr;
2687 		if (srcext != NULL) {
2688 			src6 = (struct sockaddr_in6 *)(srcext + 1);
2689 			srcaddr = (uint32_t *)&src6->sin6_addr;
2690 			ASSERT(src6->sin6_family == af);
2691 			ASSERT(src6->sin6_family == AF_INET6);
2692 		} else {
2693 			srcaddr = ALL_ZEROES_PTR;
2694 		}
2695 		outbound_bucket = OUTBOUND_BUCKET_V6(sp,
2696 		    *(uint32_t *)dstaddr);
2697 	} else {
2698 		sp = &spp->s_v4;
2699 		dstaddr = (uint32_t *)&dst->sin_addr;
2700 		if (srcext != NULL) {
2701 			src = (struct sockaddr_in *)(srcext + 1);
2702 			srcaddr = (uint32_t *)&src->sin_addr;
2703 			ASSERT(src->sin_family == af);
2704 			ASSERT(src->sin_family == AF_INET);
2705 		} else {
2706 			srcaddr = ALL_ZEROES_PTR;
2707 		}
2708 		outbound_bucket = OUTBOUND_BUCKET_V4(sp,
2709 		    *(uint32_t *)dstaddr);
2710 	}
2711 
2712 	inbound_bucket = INBOUND_BUCKET(sp, assoc->sadb_sa_spi);
2713 
2714 	/* Lock down both buckets. */
2715 	mutex_enter(&outbound_bucket->isaf_lock);
2716 	mutex_enter(&inbound_bucket->isaf_lock);
2717 
2718 	if (assoc->sadb_sa_flags & IPSA_F_INBOUND) {
2719 		ipsapp->ipsap_sa_ptr = ipsec_getassocbyspi(inbound_bucket,
2720 		    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2721 		if (ipsapp->ipsap_sa_ptr != NULL) {
2722 			ipsapp->ipsap_bucket = inbound_bucket;
2723 			ipsapp->ipsap_pbucket = outbound_bucket;
2724 			in_inbound_table = B_TRUE;
2725 		} else {
2726 			ipsapp->ipsap_sa_ptr =
2727 			    ipsec_getassocbyspi(outbound_bucket,
2728 			    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2729 			ipsapp->ipsap_bucket = outbound_bucket;
2730 			ipsapp->ipsap_pbucket = inbound_bucket;
2731 		}
2732 	} else {
2733 		/* IPSA_F_OUTBOUND is set *or* no directions flags set. */
2734 		ipsapp->ipsap_sa_ptr =
2735 		    ipsec_getassocbyspi(outbound_bucket,
2736 		    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2737 		if (ipsapp->ipsap_sa_ptr != NULL) {
2738 			ipsapp->ipsap_bucket = outbound_bucket;
2739 			ipsapp->ipsap_pbucket = inbound_bucket;
2740 		} else {
2741 			ipsapp->ipsap_sa_ptr =
2742 			    ipsec_getassocbyspi(inbound_bucket,
2743 			    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2744 			ipsapp->ipsap_bucket = inbound_bucket;
2745 			ipsapp->ipsap_pbucket = outbound_bucket;
2746 			if (ipsapp->ipsap_sa_ptr != NULL)
2747 				in_inbound_table = B_TRUE;
2748 		}
2749 	}
2750 
2751 	if (ipsapp->ipsap_sa_ptr == NULL) {
2752 		mutex_exit(&outbound_bucket->isaf_lock);
2753 		mutex_exit(&inbound_bucket->isaf_lock);
2754 		kmem_free(ipsapp, sizeof (*ipsapp));
2755 		return (NULL);
2756 	}
2757 
2758 	if ((ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) &&
2759 	    in_inbound_table) {
2760 		mutex_exit(&outbound_bucket->isaf_lock);
2761 		mutex_exit(&inbound_bucket->isaf_lock);
2762 		return (ipsapp);
2763 	}
2764 
2765 	mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2766 	if (ipsapp->ipsap_sa_ptr->ipsa_haspeer) {
2767 		/*
2768 		 * haspeer implies no sa_pairing, look for same spi
2769 		 * in other hashtable.
2770 		 */
2771 		ipsapp->ipsap_psa_ptr =
2772 		    ipsec_getassocbyspi(ipsapp->ipsap_pbucket,
2773 		    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2774 		mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2775 		mutex_exit(&outbound_bucket->isaf_lock);
2776 		mutex_exit(&inbound_bucket->isaf_lock);
2777 		return (ipsapp);
2778 	}
2779 	pair_spi = ipsapp->ipsap_sa_ptr->ipsa_otherspi;
2780 	IPSA_COPY_ADDR(&pair_srcaddr,
2781 	    ipsapp->ipsap_sa_ptr->ipsa_srcaddr, af);
2782 	IPSA_COPY_ADDR(&pair_dstaddr,
2783 	    ipsapp->ipsap_sa_ptr->ipsa_dstaddr, af);
2784 	mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2785 	mutex_exit(&outbound_bucket->isaf_lock);
2786 	mutex_exit(&inbound_bucket->isaf_lock);
2787 
2788 	if (pair_spi == 0) {
2789 		ASSERT(ipsapp->ipsap_bucket != NULL);
2790 		ASSERT(ipsapp->ipsap_pbucket != NULL);
2791 		return (ipsapp);
2792 	}
2793 
2794 	/* found sa in outbound sadb, peer should be inbound */
2795 
2796 	if (in_inbound_table) {
2797 		/* Found SA in inbound table, pair will be in outbound. */
2798 		if (af == AF_INET6) {
2799 			ipsapp->ipsap_pbucket = OUTBOUND_BUCKET_V6(sp,
2800 			    *(uint32_t *)pair_srcaddr);
2801 		} else {
2802 			ipsapp->ipsap_pbucket = OUTBOUND_BUCKET_V4(sp,
2803 			    *(uint32_t *)pair_srcaddr);
2804 		}
2805 	} else {
2806 		ipsapp->ipsap_pbucket = INBOUND_BUCKET(sp, pair_spi);
2807 	}
2808 	mutex_enter(&ipsapp->ipsap_pbucket->isaf_lock);
2809 	ipsapp->ipsap_psa_ptr = ipsec_getassocbyspi(ipsapp->ipsap_pbucket,
2810 	    pair_spi, pair_dstaddr, pair_srcaddr, af);
2811 	mutex_exit(&ipsapp->ipsap_pbucket->isaf_lock);
2812 
2813 	ASSERT(ipsapp->ipsap_bucket != NULL);
2814 	ASSERT(ipsapp->ipsap_pbucket != NULL);
2815 	return (ipsapp);
2816 }
2817 
2818 /*
2819  * Initialize the mechanism parameters associated with an SA.
2820  * These parameters can be shared by multiple packets, which saves
2821  * us from the overhead of consulting the algorithm table for
2822  * each packet.
2823  */
2824 static void
2825 sadb_init_alginfo(ipsa_t *sa)
2826 {
2827 	ipsec_alginfo_t *alg;
2828 	ipsec_stack_t	*ipss = sa->ipsa_netstack->netstack_ipsec;
2829 
2830 	mutex_enter(&ipss->ipsec_alg_lock);
2831 
2832 	if (sa->ipsa_encrkey != NULL) {
2833 		alg = ipss->ipsec_alglists[IPSEC_ALG_ENCR][sa->ipsa_encr_alg];
2834 		if (alg != NULL && ALG_VALID(alg)) {
2835 			sa->ipsa_emech.cm_type = alg->alg_mech_type;
2836 			sa->ipsa_emech.cm_param = NULL;
2837 			sa->ipsa_emech.cm_param_len = 0;
2838 			sa->ipsa_iv_len = alg->alg_datalen;
2839 		} else
2840 			sa->ipsa_emech.cm_type = CRYPTO_MECHANISM_INVALID;
2841 	}
2842 
2843 	if (sa->ipsa_authkey != NULL) {
2844 		alg = ipss->ipsec_alglists[IPSEC_ALG_AUTH][sa->ipsa_auth_alg];
2845 		if (alg != NULL && ALG_VALID(alg)) {
2846 			sa->ipsa_amech.cm_type = alg->alg_mech_type;
2847 			sa->ipsa_amech.cm_param = (char *)&sa->ipsa_mac_len;
2848 			sa->ipsa_amech.cm_param_len = sizeof (size_t);
2849 			sa->ipsa_mac_len = (size_t)alg->alg_datalen;
2850 		} else
2851 			sa->ipsa_amech.cm_type = CRYPTO_MECHANISM_INVALID;
2852 	}
2853 
2854 	mutex_exit(&ipss->ipsec_alg_lock);
2855 }
2856 
2857 /*
2858  * Perform NAT-traversal cached checksum offset calculations here.
2859  */
2860 static void
2861 sadb_nat_calculations(ipsa_t *newbie, sadb_address_t *natt_loc_ext,
2862     sadb_address_t *natt_rem_ext, uint32_t *src_addr_ptr,
2863     uint32_t *dst_addr_ptr)
2864 {
2865 	struct sockaddr_in *natt_loc, *natt_rem;
2866 	uint32_t *natt_loc_ptr = NULL, *natt_rem_ptr = NULL;
2867 	uint32_t running_sum = 0;
2868 
2869 #define	DOWN_SUM(x) (x) = ((x) & 0xFFFF) +	 ((x) >> 16)
2870 
2871 	if (natt_rem_ext != NULL) {
2872 		uint32_t l_src;
2873 		uint32_t l_rem;
2874 
2875 		natt_rem = (struct sockaddr_in *)(natt_rem_ext + 1);
2876 
2877 		/* Ensured by sadb_addrfix(). */
2878 		ASSERT(natt_rem->sin_family == AF_INET);
2879 
2880 		natt_rem_ptr = (uint32_t *)(&natt_rem->sin_addr);
2881 		newbie->ipsa_remote_nat_port = natt_rem->sin_port;
2882 		l_src = *src_addr_ptr;
2883 		l_rem = *natt_rem_ptr;
2884 
2885 		/* Instead of IPSA_COPY_ADDR(), just copy first 32 bits. */
2886 		newbie->ipsa_natt_addr_rem = *natt_rem_ptr;
2887 
2888 		l_src = ntohl(l_src);
2889 		DOWN_SUM(l_src);
2890 		DOWN_SUM(l_src);
2891 		l_rem = ntohl(l_rem);
2892 		DOWN_SUM(l_rem);
2893 		DOWN_SUM(l_rem);
2894 
2895 		/*
2896 		 * We're 1's complement for checksums, so check for wraparound
2897 		 * here.
2898 		 */
2899 		if (l_rem > l_src)
2900 			l_src--;
2901 
2902 		running_sum += l_src - l_rem;
2903 
2904 		DOWN_SUM(running_sum);
2905 		DOWN_SUM(running_sum);
2906 	}
2907 
2908 	if (natt_loc_ext != NULL) {
2909 		natt_loc = (struct sockaddr_in *)(natt_loc_ext + 1);
2910 
2911 		/* Ensured by sadb_addrfix(). */
2912 		ASSERT(natt_loc->sin_family == AF_INET);
2913 
2914 		natt_loc_ptr = (uint32_t *)(&natt_loc->sin_addr);
2915 		newbie->ipsa_local_nat_port = natt_loc->sin_port;
2916 
2917 		/* Instead of IPSA_COPY_ADDR(), just copy first 32 bits. */
2918 		newbie->ipsa_natt_addr_loc = *natt_loc_ptr;
2919 
2920 		/*
2921 		 * NAT-T port agility means we may have natt_loc_ext, but
2922 		 * only for a local-port change.
2923 		 */
2924 		if (natt_loc->sin_addr.s_addr != INADDR_ANY) {
2925 			uint32_t l_dst = ntohl(*dst_addr_ptr);
2926 			uint32_t l_loc = ntohl(*natt_loc_ptr);
2927 
2928 			DOWN_SUM(l_loc);
2929 			DOWN_SUM(l_loc);
2930 			DOWN_SUM(l_dst);
2931 			DOWN_SUM(l_dst);
2932 
2933 			/*
2934 			 * We're 1's complement for checksums, so check for
2935 			 * wraparound here.
2936 			 */
2937 			if (l_loc > l_dst)
2938 				l_dst--;
2939 
2940 			running_sum += l_dst - l_loc;
2941 			DOWN_SUM(running_sum);
2942 			DOWN_SUM(running_sum);
2943 		}
2944 	}
2945 
2946 	newbie->ipsa_inbound_cksum = running_sum;
2947 #undef DOWN_SUM
2948 }
2949 
2950 /*
2951  * This function is called from consumers that need to insert a fully-grown
2952  * security association into its tables.  This function takes into account that
2953  * SAs can be "inbound", "outbound", or "both".	 The "primary" and "secondary"
2954  * hash bucket parameters are set in order of what the SA will be most of the
2955  * time.  (For example, an SA with an unspecified source, and a multicast
2956  * destination will primarily be an outbound SA.  OTOH, if that destination
2957  * is unicast for this node, then the SA will primarily be inbound.)
2958  *
2959  * It takes a lot of parameters because even if clone is B_FALSE, this needs
2960  * to check both buckets for purposes of collision.
2961  *
2962  * Return 0 upon success.  Return various errnos (ENOMEM, EEXIST) for
2963  * various error conditions.  We may need to set samsg->sadb_x_msg_diagnostic
2964  * with additional diagnostic information because there is at least one EINVAL
2965  * case here.
2966  */
2967 int
2968 sadb_common_add(queue_t *ip_q, queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg,
2969     keysock_in_t *ksi, isaf_t *primary, isaf_t *secondary,
2970     ipsa_t *newbie, boolean_t clone, boolean_t is_inbound, int *diagnostic,
2971     netstack_t *ns, sadbp_t *spp)
2972 {
2973 	ipsa_t *newbie_clone = NULL, *scratch;
2974 	ipsap_t *ipsapp = NULL;
2975 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
2976 	sadb_address_t *srcext =
2977 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2978 	sadb_address_t *dstext =
2979 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2980 	sadb_address_t *isrcext =
2981 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC];
2982 	sadb_address_t *idstext =
2983 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST];
2984 	sadb_x_kmc_t *kmcext =
2985 	    (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
2986 	sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
2987 	sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
2988 	sadb_x_pair_t *pair_ext =
2989 	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
2990 #if 0
2991 	/*
2992 	 * XXXMLS - When Trusted Solaris or Multi-Level Secure functionality
2993 	 * comes to ON, examine these if 0'ed fragments.  Look for XXXMLS.
2994 	 */
2995 	sadb_sens_t *sens = (sadb_sens_t *);
2996 #endif
2997 	struct sockaddr_in *src, *dst, *isrc, *idst;
2998 	struct sockaddr_in6 *src6, *dst6, *isrc6, *idst6;
2999 	sadb_lifetime_t *soft =
3000 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
3001 	sadb_lifetime_t *hard =
3002 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
3003 	sa_family_t af;
3004 	int error = 0;
3005 	boolean_t isupdate = (newbie != NULL);
3006 	uint32_t *src_addr_ptr, *dst_addr_ptr, *isrc_addr_ptr, *idst_addr_ptr;
3007 	mblk_t *ctl_mp = NULL;
3008 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
3009 
3010 	if (srcext == NULL) {
3011 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
3012 		return (EINVAL);
3013 	}
3014 	if (dstext == NULL) {
3015 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
3016 		return (EINVAL);
3017 	}
3018 	if (assoc == NULL) {
3019 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
3020 		return (EINVAL);
3021 	}
3022 
3023 	src = (struct sockaddr_in *)(srcext + 1);
3024 	src6 = (struct sockaddr_in6 *)(srcext + 1);
3025 	dst = (struct sockaddr_in *)(dstext + 1);
3026 	dst6 = (struct sockaddr_in6 *)(dstext + 1);
3027 	if (isrcext != NULL) {
3028 		isrc = (struct sockaddr_in *)(isrcext + 1);
3029 		isrc6 = (struct sockaddr_in6 *)(isrcext + 1);
3030 		ASSERT(idstext != NULL);
3031 		idst = (struct sockaddr_in *)(idstext + 1);
3032 		idst6 = (struct sockaddr_in6 *)(idstext + 1);
3033 	} else {
3034 		isrc = NULL;
3035 		isrc6 = NULL;
3036 	}
3037 
3038 	af = src->sin_family;
3039 
3040 	if (af == AF_INET) {
3041 		src_addr_ptr = (uint32_t *)&src->sin_addr;
3042 		dst_addr_ptr = (uint32_t *)&dst->sin_addr;
3043 	} else {
3044 		ASSERT(af == AF_INET6);
3045 		src_addr_ptr = (uint32_t *)&src6->sin6_addr;
3046 		dst_addr_ptr = (uint32_t *)&dst6->sin6_addr;
3047 	}
3048 
3049 	/*
3050 	 * Check to see if the new SA will be cloned AND paired. The
3051 	 * reason a SA will be cloned is the source or destination addresses
3052 	 * are not specific enough to determine if the SA goes in the outbound
3053 	 * or the inbound hash table, so its cloned and put in both. If
3054 	 * the SA is paired, it's soft linked to another SA for the other
3055 	 * direction. Keeping track and looking up SA's that are direction
3056 	 * unspecific and linked is too hard.
3057 	 */
3058 	if (clone && (pair_ext != NULL)) {
3059 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
3060 		return (EINVAL);
3061 	}
3062 
3063 	if (!isupdate) {
3064 		newbie = sadb_makelarvalassoc(assoc->sadb_sa_spi,
3065 		    src_addr_ptr, dst_addr_ptr, af, ns);
3066 		if (newbie == NULL)
3067 			return (ENOMEM);
3068 	}
3069 
3070 	mutex_enter(&newbie->ipsa_lock);
3071 
3072 	if (isrc != NULL) {
3073 		if (isrc->sin_family == AF_INET) {
3074 			if (srcext->sadb_address_proto != IPPROTO_ENCAP) {
3075 				if (srcext->sadb_address_proto != 0) {
3076 					/*
3077 					 * Mismatched outer-packet protocol
3078 					 * and inner-packet address family.
3079 					 */
3080 					mutex_exit(&newbie->ipsa_lock);
3081 					error = EPROTOTYPE;
3082 					goto error;
3083 				} else {
3084 					/* Fill in with explicit protocol. */
3085 					srcext->sadb_address_proto =
3086 					    IPPROTO_ENCAP;
3087 					dstext->sadb_address_proto =
3088 					    IPPROTO_ENCAP;
3089 				}
3090 			}
3091 			isrc_addr_ptr = (uint32_t *)&isrc->sin_addr;
3092 			idst_addr_ptr = (uint32_t *)&idst->sin_addr;
3093 		} else {
3094 			ASSERT(isrc->sin_family == AF_INET6);
3095 			if (srcext->sadb_address_proto != IPPROTO_IPV6) {
3096 				if (srcext->sadb_address_proto != 0) {
3097 					/*
3098 					 * Mismatched outer-packet protocol
3099 					 * and inner-packet address family.
3100 					 */
3101 					mutex_exit(&newbie->ipsa_lock);
3102 					error = EPROTOTYPE;
3103 					goto error;
3104 				} else {
3105 					/* Fill in with explicit protocol. */
3106 					srcext->sadb_address_proto =
3107 					    IPPROTO_IPV6;
3108 					dstext->sadb_address_proto =
3109 					    IPPROTO_IPV6;
3110 				}
3111 			}
3112 			isrc_addr_ptr = (uint32_t *)&isrc6->sin6_addr;
3113 			idst_addr_ptr = (uint32_t *)&idst6->sin6_addr;
3114 		}
3115 		newbie->ipsa_innerfam = isrc->sin_family;
3116 
3117 		IPSA_COPY_ADDR(newbie->ipsa_innersrc, isrc_addr_ptr,
3118 		    newbie->ipsa_innerfam);
3119 		IPSA_COPY_ADDR(newbie->ipsa_innerdst, idst_addr_ptr,
3120 		    newbie->ipsa_innerfam);
3121 		newbie->ipsa_innersrcpfx = isrcext->sadb_address_prefixlen;
3122 		newbie->ipsa_innerdstpfx = idstext->sadb_address_prefixlen;
3123 
3124 		/* Unique value uses inner-ports for Tunnel Mode... */
3125 		newbie->ipsa_unique_id = SA_UNIQUE_ID(isrc->sin_port,
3126 		    idst->sin_port, dstext->sadb_address_proto,
3127 		    idstext->sadb_address_proto);
3128 		newbie->ipsa_unique_mask = SA_UNIQUE_MASK(isrc->sin_port,
3129 		    idst->sin_port, dstext->sadb_address_proto,
3130 		    idstext->sadb_address_proto);
3131 	} else {
3132 		/* ... and outer-ports for Transport Mode. */
3133 		newbie->ipsa_unique_id = SA_UNIQUE_ID(src->sin_port,
3134 		    dst->sin_port, dstext->sadb_address_proto, 0);
3135 		newbie->ipsa_unique_mask = SA_UNIQUE_MASK(src->sin_port,
3136 		    dst->sin_port, dstext->sadb_address_proto, 0);
3137 	}
3138 	if (newbie->ipsa_unique_mask != (uint64_t)0)
3139 		newbie->ipsa_flags |= IPSA_F_UNIQUE;
3140 
3141 	sadb_nat_calculations(newbie,
3142 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC],
3143 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM],
3144 	    src_addr_ptr, dst_addr_ptr);
3145 
3146 	newbie->ipsa_type = samsg->sadb_msg_satype;
3147 	ASSERT(assoc->sadb_sa_state == SADB_SASTATE_MATURE);
3148 	newbie->ipsa_auth_alg = assoc->sadb_sa_auth;
3149 	newbie->ipsa_encr_alg = assoc->sadb_sa_encrypt;
3150 
3151 	newbie->ipsa_flags |= assoc->sadb_sa_flags;
3152 	if ((newbie->ipsa_flags & SADB_X_SAFLAGS_NATT_LOC &&
3153 	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC] == NULL) ||
3154 	    (newbie->ipsa_flags & SADB_X_SAFLAGS_NATT_REM &&
3155 	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM] == NULL) ||
3156 	    (newbie->ipsa_flags & SADB_X_SAFLAGS_TUNNEL &&
3157 	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC] == NULL)) {
3158 		mutex_exit(&newbie->ipsa_lock);
3159 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
3160 		error = EINVAL;
3161 		goto error;
3162 	}
3163 	/*
3164 	 * If unspecified source address, force replay_wsize to 0.
3165 	 * This is because an SA that has multiple sources of secure
3166 	 * traffic cannot enforce a replay counter w/o synchronizing the
3167 	 * senders.
3168 	 */
3169 	if (ksi->ks_in_srctype != KS_IN_ADDR_UNSPEC)
3170 		newbie->ipsa_replay_wsize = assoc->sadb_sa_replay;
3171 	else
3172 		newbie->ipsa_replay_wsize = 0;
3173 
3174 	newbie->ipsa_addtime = gethrestime_sec();
3175 
3176 	if (kmcext != NULL) {
3177 		newbie->ipsa_kmp = kmcext->sadb_x_kmc_proto;
3178 		newbie->ipsa_kmc = kmcext->sadb_x_kmc_cookie;
3179 	}
3180 
3181 	/*
3182 	 * XXX CURRENT lifetime checks MAY BE needed for an UPDATE.
3183 	 * The spec says that one can update current lifetimes, but
3184 	 * that seems impractical, especially in the larval-to-mature
3185 	 * update that this function performs.
3186 	 */
3187 	if (soft != NULL) {
3188 		newbie->ipsa_softaddlt = soft->sadb_lifetime_addtime;
3189 		newbie->ipsa_softuselt = soft->sadb_lifetime_usetime;
3190 		newbie->ipsa_softbyteslt = soft->sadb_lifetime_bytes;
3191 		newbie->ipsa_softalloc = soft->sadb_lifetime_allocations;
3192 		SET_EXPIRE(newbie, softaddlt, softexpiretime);
3193 	}
3194 	if (hard != NULL) {
3195 		newbie->ipsa_hardaddlt = hard->sadb_lifetime_addtime;
3196 		newbie->ipsa_harduselt = hard->sadb_lifetime_usetime;
3197 		newbie->ipsa_hardbyteslt = hard->sadb_lifetime_bytes;
3198 		newbie->ipsa_hardalloc = hard->sadb_lifetime_allocations;
3199 		SET_EXPIRE(newbie, hardaddlt, hardexpiretime);
3200 	}
3201 
3202 	newbie->ipsa_authtmpl = NULL;
3203 	newbie->ipsa_encrtmpl = NULL;
3204 
3205 	if (akey != NULL) {
3206 		newbie->ipsa_authkeybits = akey->sadb_key_bits;
3207 		newbie->ipsa_authkeylen = SADB_1TO8(akey->sadb_key_bits);
3208 		/* In case we have to round up to the next byte... */
3209 		if ((akey->sadb_key_bits & 0x7) != 0)
3210 			newbie->ipsa_authkeylen++;
3211 		newbie->ipsa_authkey = kmem_alloc(newbie->ipsa_authkeylen,
3212 		    KM_NOSLEEP);
3213 		if (newbie->ipsa_authkey == NULL) {
3214 			error = ENOMEM;
3215 			mutex_exit(&newbie->ipsa_lock);
3216 			goto error;
3217 		}
3218 		bcopy(akey + 1, newbie->ipsa_authkey, newbie->ipsa_authkeylen);
3219 		bzero(akey + 1, newbie->ipsa_authkeylen);
3220 
3221 		/*
3222 		 * Pre-initialize the kernel crypto framework key
3223 		 * structure.
3224 		 */
3225 		newbie->ipsa_kcfauthkey.ck_format = CRYPTO_KEY_RAW;
3226 		newbie->ipsa_kcfauthkey.ck_length = newbie->ipsa_authkeybits;
3227 		newbie->ipsa_kcfauthkey.ck_data = newbie->ipsa_authkey;
3228 
3229 		mutex_enter(&ipss->ipsec_alg_lock);
3230 		error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_AUTH);
3231 		mutex_exit(&ipss->ipsec_alg_lock);
3232 		if (error != 0) {
3233 			mutex_exit(&newbie->ipsa_lock);
3234 			goto error;
3235 		}
3236 	}
3237 
3238 	if (ekey != NULL) {
3239 		newbie->ipsa_encrkeybits = ekey->sadb_key_bits;
3240 		newbie->ipsa_encrkeylen = SADB_1TO8(ekey->sadb_key_bits);
3241 		/* In case we have to round up to the next byte... */
3242 		if ((ekey->sadb_key_bits & 0x7) != 0)
3243 			newbie->ipsa_encrkeylen++;
3244 		newbie->ipsa_encrkey = kmem_alloc(newbie->ipsa_encrkeylen,
3245 		    KM_NOSLEEP);
3246 		if (newbie->ipsa_encrkey == NULL) {
3247 			error = ENOMEM;
3248 			mutex_exit(&newbie->ipsa_lock);
3249 			goto error;
3250 		}
3251 		bcopy(ekey + 1, newbie->ipsa_encrkey, newbie->ipsa_encrkeylen);
3252 		/* XXX is this safe w.r.t db_ref, etc? */
3253 		bzero(ekey + 1, newbie->ipsa_encrkeylen);
3254 
3255 		/*
3256 		 * Pre-initialize the kernel crypto framework key
3257 		 * structure.
3258 		 */
3259 		newbie->ipsa_kcfencrkey.ck_format = CRYPTO_KEY_RAW;
3260 		newbie->ipsa_kcfencrkey.ck_length = newbie->ipsa_encrkeybits;
3261 		newbie->ipsa_kcfencrkey.ck_data = newbie->ipsa_encrkey;
3262 
3263 		mutex_enter(&ipss->ipsec_alg_lock);
3264 		error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_ENCR);
3265 		mutex_exit(&ipss->ipsec_alg_lock);
3266 		if (error != 0) {
3267 			mutex_exit(&newbie->ipsa_lock);
3268 			goto error;
3269 		}
3270 	}
3271 
3272 	sadb_init_alginfo(newbie);
3273 
3274 	/*
3275 	 * Ptrs to processing functions.
3276 	 */
3277 	if (newbie->ipsa_type == SADB_SATYPE_ESP)
3278 		ipsecesp_init_funcs(newbie);
3279 	else
3280 		ipsecah_init_funcs(newbie);
3281 	ASSERT(newbie->ipsa_output_func != NULL &&
3282 	    newbie->ipsa_input_func != NULL);
3283 
3284 	/*
3285 	 * Certificate ID stuff.
3286 	 */
3287 	if (ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC] != NULL) {
3288 		sadb_ident_t *id =
3289 		    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC];
3290 
3291 		/*
3292 		 * Can assume strlen() will return okay because ext_check() in
3293 		 * keysock.c prepares the string for us.
3294 		 */
3295 		newbie->ipsa_src_cid = ipsid_lookup(id->sadb_ident_type,
3296 		    (char *)(id+1), ns);
3297 		if (newbie->ipsa_src_cid == NULL) {
3298 			error = ENOMEM;
3299 			mutex_exit(&newbie->ipsa_lock);
3300 			goto error;
3301 		}
3302 	}
3303 
3304 	if (ksi->ks_in_extv[SADB_EXT_IDENTITY_DST] != NULL) {
3305 		sadb_ident_t *id =
3306 		    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_DST];
3307 
3308 		/*
3309 		 * Can assume strlen() will return okay because ext_check() in
3310 		 * keysock.c prepares the string for us.
3311 		 */
3312 		newbie->ipsa_dst_cid = ipsid_lookup(id->sadb_ident_type,
3313 		    (char *)(id+1), ns);
3314 		if (newbie->ipsa_dst_cid == NULL) {
3315 			error = ENOMEM;
3316 			mutex_exit(&newbie->ipsa_lock);
3317 			goto error;
3318 		}
3319 	}
3320 
3321 #if 0
3322 	/* XXXMLS  SENSITIVITY handling code. */
3323 	if (sens != NULL) {
3324 		int i;
3325 		uint64_t *bitmap = (uint64_t *)(sens + 1);
3326 
3327 		newbie->ipsa_dpd = sens->sadb_sens_dpd;
3328 		newbie->ipsa_senslevel = sens->sadb_sens_sens_level;
3329 		newbie->ipsa_integlevel = sens->sadb_sens_integ_level;
3330 		newbie->ipsa_senslen = SADB_64TO8(sens->sadb_sens_sens_len);
3331 		newbie->ipsa_integlen = SADB_64TO8(sens->sadb_sens_integ_len);
3332 		newbie->ipsa_integ = kmem_alloc(newbie->ipsa_integlen,
3333 		    KM_NOSLEEP);
3334 		if (newbie->ipsa_integ == NULL) {
3335 			error = ENOMEM;
3336 			mutex_exit(&newbie->ipsa_lock);
3337 			goto error;
3338 		}
3339 		newbie->ipsa_sens = kmem_alloc(newbie->ipsa_senslen,
3340 		    KM_NOSLEEP);
3341 		if (newbie->ipsa_sens == NULL) {
3342 			error = ENOMEM;
3343 			mutex_exit(&newbie->ipsa_lock);
3344 			goto error;
3345 		}
3346 		for (i = 0; i < sens->sadb_sens_sens_len; i++) {
3347 			newbie->ipsa_sens[i] = *bitmap;
3348 			bitmap++;
3349 		}
3350 		for (i = 0; i < sens->sadb_sens_integ_len; i++) {
3351 			newbie->ipsa_integ[i] = *bitmap;
3352 			bitmap++;
3353 		}
3354 	}
3355 
3356 #endif
3357 
3358 	/* now that the SA has been updated, set its new state */
3359 	newbie->ipsa_state = assoc->sadb_sa_state;
3360 
3361 	if (clone) {
3362 		newbie->ipsa_haspeer = B_TRUE;
3363 	} else {
3364 		if (!is_inbound) {
3365 			lifetime_fuzz(newbie);
3366 		}
3367 	}
3368 	/*
3369 	 * The less locks I hold when doing an insertion and possible cloning,
3370 	 * the better!
3371 	 */
3372 	mutex_exit(&newbie->ipsa_lock);
3373 
3374 	if (clone) {
3375 		newbie_clone = sadb_cloneassoc(newbie);
3376 
3377 		if (newbie_clone == NULL) {
3378 			error = ENOMEM;
3379 			goto error;
3380 		}
3381 	}
3382 
3383 	/*
3384 	 * Enter the bucket locks.  The order of entry is outbound,
3385 	 * inbound.  We map "primary" and "secondary" into outbound and inbound
3386 	 * based on the destination address type.  If the destination address
3387 	 * type is for a node that isn't mine (or potentially mine), the
3388 	 * "primary" bucket is the outbound one.
3389 	 */
3390 	if (!is_inbound) {
3391 		/* primary == outbound */
3392 		mutex_enter(&primary->isaf_lock);
3393 		mutex_enter(&secondary->isaf_lock);
3394 	} else {
3395 		/* primary == inbound */
3396 		mutex_enter(&secondary->isaf_lock);
3397 		mutex_enter(&primary->isaf_lock);
3398 	}
3399 
3400 	IPSECHW_DEBUG(IPSECHW_SADB, ("sadb_common_add: spi = 0x%x\n",
3401 	    newbie->ipsa_spi));
3402 
3403 	/*
3404 	 * sadb_insertassoc() doesn't increment the reference
3405 	 * count.  We therefore have to increment the
3406 	 * reference count one more time to reflect the
3407 	 * pointers of the table that reference this SA.
3408 	 */
3409 	IPSA_REFHOLD(newbie);
3410 
3411 	if (isupdate) {
3412 		/*
3413 		 * Unlink from larval holding cell in the "inbound" fanout.
3414 		 */
3415 		ASSERT(newbie->ipsa_linklock == &primary->isaf_lock ||
3416 		    newbie->ipsa_linklock == &secondary->isaf_lock);
3417 		sadb_unlinkassoc(newbie);
3418 	}
3419 
3420 	mutex_enter(&newbie->ipsa_lock);
3421 	error = sadb_insertassoc(newbie, primary);
3422 	if (error == 0) {
3423 		ctl_mp = sadb_fmt_sa_req(DL_CO_SET, newbie->ipsa_type, newbie,
3424 		    is_inbound);
3425 	}
3426 	mutex_exit(&newbie->ipsa_lock);
3427 
3428 	if (error != 0) {
3429 		/*
3430 		 * Since sadb_insertassoc() failed, we must decrement the
3431 		 * refcount again so the cleanup code will actually free
3432 		 * the offending SA.
3433 		 */
3434 		IPSA_REFRELE(newbie);
3435 		goto error_unlock;
3436 	}
3437 
3438 	if (newbie_clone != NULL) {
3439 		mutex_enter(&newbie_clone->ipsa_lock);
3440 		error = sadb_insertassoc(newbie_clone, secondary);
3441 		mutex_exit(&newbie_clone->ipsa_lock);
3442 		if (error != 0) {
3443 			/* Collision in secondary table. */
3444 			sadb_unlinkassoc(newbie);  /* This does REFRELE. */
3445 			goto error_unlock;
3446 		}
3447 		IPSA_REFHOLD(newbie_clone);
3448 	} else {
3449 		ASSERT(primary != secondary);
3450 		scratch = ipsec_getassocbyspi(secondary, newbie->ipsa_spi,
3451 		    ALL_ZEROES_PTR, newbie->ipsa_dstaddr, af);
3452 		if (scratch != NULL) {
3453 			/* Collision in secondary table. */
3454 			sadb_unlinkassoc(newbie);  /* This does REFRELE. */
3455 			/* Set the error, since ipsec_getassocbyspi() can't. */
3456 			error = EEXIST;
3457 			goto error_unlock;
3458 		}
3459 	}
3460 
3461 	/* OKAY!  So let's do some reality check assertions. */
3462 
3463 	ASSERT(!MUTEX_HELD(&newbie->ipsa_lock));
3464 	ASSERT(newbie_clone == NULL || (!MUTEX_HELD(&newbie_clone->ipsa_lock)));
3465 	/*
3466 	 * If hardware acceleration could happen, send it.
3467 	 */
3468 	if (ctl_mp != NULL) {
3469 		putnext(ip_q, ctl_mp);
3470 		ctl_mp = NULL;
3471 	}
3472 
3473 error_unlock:
3474 
3475 	/*
3476 	 * We can exit the locks in any order.	Only entrance needs to
3477 	 * follow any protocol.
3478 	 */
3479 	mutex_exit(&secondary->isaf_lock);
3480 	mutex_exit(&primary->isaf_lock);
3481 
3482 	if (pair_ext != NULL && error == 0) {
3483 		/* update pair_spi if it exists. */
3484 		ipsapp = get_ipsa_pair(assoc, srcext, dstext, spp);
3485 		if (ipsapp == NULL) {
3486 			error = ESRCH;
3487 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
3488 		} else if (ipsapp->ipsap_psa_ptr != NULL) {
3489 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_ALREADY;
3490 			error = EINVAL;
3491 		} else {
3492 			/* update_pairing() sets diagnostic */
3493 			error = update_pairing(ipsapp, ksi, diagnostic, spp);
3494 		}
3495 	}
3496 	/* Common error point for this routine. */
3497 error:
3498 	if (newbie != NULL) {
3499 		if (error != 0) {
3500 			/* This SA is broken, let the reaper clean up. */
3501 			mutex_enter(&newbie->ipsa_lock);
3502 			newbie->ipsa_state = IPSA_STATE_DEAD;
3503 			newbie->ipsa_hardexpiretime = 1;
3504 			mutex_exit(&newbie->ipsa_lock);
3505 		}
3506 		IPSA_REFRELE(newbie);
3507 	}
3508 	if (newbie_clone != NULL) {
3509 		IPSA_REFRELE(newbie_clone);
3510 	}
3511 	if (ctl_mp != NULL)
3512 		freemsg(ctl_mp);
3513 
3514 	if (error == 0) {
3515 		/*
3516 		 * Construct favorable PF_KEY return message and send to
3517 		 * keysock. Update the flags in the original keysock message
3518 		 * to reflect the actual flags in the new SA.
3519 		 *  (Q:  Do I need to pass "newbie"?  If I do,
3520 		 * make sure to REFHOLD, call, then REFRELE.)
3521 		 */
3522 		assoc->sadb_sa_flags = newbie->ipsa_flags;
3523 		sadb_pfkey_echo(pfkey_q, mp, samsg, ksi, NULL);
3524 	}
3525 
3526 	destroy_ipsa_pair(ipsapp);
3527 	return (error);
3528 }
3529 
3530 /*
3531  * Set the time of first use for a security association.  Update any
3532  * expiration times as a result.
3533  */
3534 void
3535 sadb_set_usetime(ipsa_t *assoc)
3536 {
3537 	time_t snapshot = gethrestime_sec();
3538 
3539 	mutex_enter(&assoc->ipsa_lock);
3540 	assoc->ipsa_lastuse = snapshot;
3541 	/*
3542 	 * Caller does check usetime before calling me usually, and
3543 	 * double-checking is better than a mutex_enter/exit hit.
3544 	 */
3545 	if (assoc->ipsa_usetime == 0) {
3546 		/*
3547 		 * This is redundant for outbound SA's, as
3548 		 * ipsec_getassocbyconn() sets the IPSA_F_USED flag already.
3549 		 * Inbound SAs, however, have no such protection.
3550 		 */
3551 		assoc->ipsa_flags |= IPSA_F_USED;
3552 		assoc->ipsa_usetime = snapshot;
3553 
3554 		/*
3555 		 * After setting the use time, see if we have a use lifetime
3556 		 * that would cause the actual SA expiration time to shorten.
3557 		 */
3558 		UPDATE_EXPIRE(assoc, softuselt, softexpiretime);
3559 		UPDATE_EXPIRE(assoc, harduselt, hardexpiretime);
3560 	}
3561 	mutex_exit(&assoc->ipsa_lock);
3562 }
3563 
3564 /*
3565  * Send up a PF_KEY expire message for this association.
3566  */
3567 static void
3568 sadb_expire_assoc(queue_t *pfkey_q, ipsa_t *assoc)
3569 {
3570 	mblk_t *mp, *mp1;
3571 	int alloclen, af;
3572 	sadb_msg_t *samsg;
3573 	sadb_lifetime_t *current, *expire;
3574 	sadb_sa_t *saext;
3575 	uint8_t *end;
3576 	boolean_t tunnel_mode;
3577 
3578 	ASSERT(MUTEX_HELD(&assoc->ipsa_lock));
3579 
3580 	/* Don't bother sending if there's no queue. */
3581 	if (pfkey_q == NULL)
3582 		return;
3583 
3584 	/* If the SA is one of a pair, only SOFT expire the OUTBOUND SA */
3585 	if (assoc->ipsa_state == IPSA_STATE_DYING &&
3586 	    (assoc->ipsa_flags & IPSA_F_PAIRED) &&
3587 	    !(assoc->ipsa_flags & IPSA_F_OUTBOUND)) {
3588 		return;
3589 	}
3590 
3591 	mp = sadb_keysock_out(0);
3592 	if (mp == NULL) {
3593 		/* cmn_err(CE_WARN, */
3594 		/*	"sadb_expire_assoc: Can't allocate KEYSOCK_OUT.\n"); */
3595 		return;
3596 	}
3597 
3598 	alloclen = sizeof (*samsg) + sizeof (*current) + sizeof (*expire) +
3599 	    2 * sizeof (sadb_address_t) + sizeof (*saext);
3600 
3601 	af = assoc->ipsa_addrfam;
3602 	switch (af) {
3603 	case AF_INET:
3604 		alloclen += 2 * sizeof (struct sockaddr_in);
3605 		break;
3606 	case AF_INET6:
3607 		alloclen += 2 * sizeof (struct sockaddr_in6);
3608 		break;
3609 	default:
3610 		/* Won't happen unless there's a kernel bug. */
3611 		freeb(mp);
3612 		cmn_err(CE_WARN,
3613 		    "sadb_expire_assoc: Unknown address length.\n");
3614 		return;
3615 	}
3616 
3617 	tunnel_mode = (assoc->ipsa_flags & IPSA_F_TUNNEL);
3618 	if (tunnel_mode) {
3619 		alloclen += 2 * sizeof (sadb_address_t);
3620 		switch (assoc->ipsa_innerfam) {
3621 		case AF_INET:
3622 			alloclen += 2 * sizeof (struct sockaddr_in);
3623 			break;
3624 		case AF_INET6:
3625 			alloclen += 2 * sizeof (struct sockaddr_in6);
3626 			break;
3627 		default:
3628 			/* Won't happen unless there's a kernel bug. */
3629 			freeb(mp);
3630 			cmn_err(CE_WARN, "sadb_expire_assoc: "
3631 			    "Unknown inner address length.\n");
3632 			return;
3633 		}
3634 	}
3635 
3636 	mp->b_cont = allocb(alloclen, BPRI_HI);
3637 	if (mp->b_cont == NULL) {
3638 		freeb(mp);
3639 		/* cmn_err(CE_WARN, */
3640 		/*	"sadb_expire_assoc: Can't allocate message.\n"); */
3641 		return;
3642 	}
3643 
3644 	mp1 = mp;
3645 	mp = mp->b_cont;
3646 	end = mp->b_wptr + alloclen;
3647 
3648 	samsg = (sadb_msg_t *)mp->b_wptr;
3649 	mp->b_wptr += sizeof (*samsg);
3650 	samsg->sadb_msg_version = PF_KEY_V2;
3651 	samsg->sadb_msg_type = SADB_EXPIRE;
3652 	samsg->sadb_msg_errno = 0;
3653 	samsg->sadb_msg_satype = assoc->ipsa_type;
3654 	samsg->sadb_msg_len = SADB_8TO64(alloclen);
3655 	samsg->sadb_msg_reserved = 0;
3656 	samsg->sadb_msg_seq = 0;
3657 	samsg->sadb_msg_pid = 0;
3658 
3659 	saext = (sadb_sa_t *)mp->b_wptr;
3660 	mp->b_wptr += sizeof (*saext);
3661 	saext->sadb_sa_len = SADB_8TO64(sizeof (*saext));
3662 	saext->sadb_sa_exttype = SADB_EXT_SA;
3663 	saext->sadb_sa_spi = assoc->ipsa_spi;
3664 	saext->sadb_sa_replay = assoc->ipsa_replay_wsize;
3665 	saext->sadb_sa_state = assoc->ipsa_state;
3666 	saext->sadb_sa_auth = assoc->ipsa_auth_alg;
3667 	saext->sadb_sa_encrypt = assoc->ipsa_encr_alg;
3668 	saext->sadb_sa_flags = assoc->ipsa_flags;
3669 
3670 	current = (sadb_lifetime_t *)mp->b_wptr;
3671 	mp->b_wptr += sizeof (sadb_lifetime_t);
3672 	current->sadb_lifetime_len = SADB_8TO64(sizeof (*current));
3673 	current->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
3674 	/* We do not support the concept. */
3675 	current->sadb_lifetime_allocations = 0;
3676 	current->sadb_lifetime_bytes = assoc->ipsa_bytes;
3677 	current->sadb_lifetime_addtime = assoc->ipsa_addtime;
3678 	current->sadb_lifetime_usetime = assoc->ipsa_usetime;
3679 
3680 	expire = (sadb_lifetime_t *)mp->b_wptr;
3681 	mp->b_wptr += sizeof (*expire);
3682 	expire->sadb_lifetime_len = SADB_8TO64(sizeof (*expire));
3683 
3684 	if (assoc->ipsa_state == IPSA_STATE_DEAD) {
3685 		expire->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
3686 		expire->sadb_lifetime_allocations = assoc->ipsa_hardalloc;
3687 		expire->sadb_lifetime_bytes = assoc->ipsa_hardbyteslt;
3688 		expire->sadb_lifetime_addtime = assoc->ipsa_hardaddlt;
3689 		expire->sadb_lifetime_usetime = assoc->ipsa_harduselt;
3690 	} else {
3691 		ASSERT(assoc->ipsa_state == IPSA_STATE_DYING);
3692 		expire->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
3693 		expire->sadb_lifetime_allocations = assoc->ipsa_softalloc;
3694 		expire->sadb_lifetime_bytes = assoc->ipsa_softbyteslt;
3695 		expire->sadb_lifetime_addtime = assoc->ipsa_softaddlt;
3696 		expire->sadb_lifetime_usetime = assoc->ipsa_softuselt;
3697 	}
3698 
3699 	mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end, SADB_EXT_ADDRESS_SRC,
3700 	    af, assoc->ipsa_srcaddr, tunnel_mode ? 0 : SA_SRCPORT(assoc),
3701 	    SA_PROTO(assoc), 0);
3702 	ASSERT(mp->b_wptr != NULL);
3703 
3704 	mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end, SADB_EXT_ADDRESS_DST,
3705 	    af, assoc->ipsa_dstaddr, tunnel_mode ? 0 : SA_DSTPORT(assoc),
3706 	    SA_PROTO(assoc), 0);
3707 	ASSERT(mp->b_wptr != NULL);
3708 
3709 	if (tunnel_mode) {
3710 		mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end,
3711 		    SADB_X_EXT_ADDRESS_INNER_SRC, assoc->ipsa_innerfam,
3712 		    assoc->ipsa_innersrc, SA_SRCPORT(assoc), SA_IPROTO(assoc),
3713 		    assoc->ipsa_innersrcpfx);
3714 		ASSERT(mp->b_wptr != NULL);
3715 		mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end,
3716 		    SADB_X_EXT_ADDRESS_INNER_DST, assoc->ipsa_innerfam,
3717 		    assoc->ipsa_innerdst, SA_DSTPORT(assoc), SA_IPROTO(assoc),
3718 		    assoc->ipsa_innerdstpfx);
3719 		ASSERT(mp->b_wptr != NULL);
3720 	}
3721 
3722 	/* Can just putnext, we're ready to go! */
3723 	putnext(pfkey_q, mp1);
3724 }
3725 
3726 /*
3727  * "Age" the SA with the number of bytes that was used to protect traffic.
3728  * Send an SADB_EXPIRE message if appropriate.	Return B_TRUE if there was
3729  * enough "charge" left in the SA to protect the data.	Return B_FALSE
3730  * otherwise.  (If B_FALSE is returned, the association either was, or became
3731  * DEAD.)
3732  */
3733 boolean_t
3734 sadb_age_bytes(queue_t *pfkey_q, ipsa_t *assoc, uint64_t bytes,
3735     boolean_t sendmsg)
3736 {
3737 	boolean_t rc = B_TRUE;
3738 	uint64_t newtotal;
3739 
3740 	mutex_enter(&assoc->ipsa_lock);
3741 	newtotal = assoc->ipsa_bytes + bytes;
3742 	if (assoc->ipsa_hardbyteslt != 0 &&
3743 	    newtotal >= assoc->ipsa_hardbyteslt) {
3744 		if (assoc->ipsa_state < IPSA_STATE_DEAD) {
3745 			/*
3746 			 * Send EXPIRE message to PF_KEY.  May wish to pawn
3747 			 * this off on another non-interrupt thread.  Also
3748 			 * unlink this SA immediately.
3749 			 */
3750 			assoc->ipsa_state = IPSA_STATE_DEAD;
3751 			if (sendmsg)
3752 				sadb_expire_assoc(pfkey_q, assoc);
3753 			/*
3754 			 * Set non-zero expiration time so sadb_age_assoc()
3755 			 * will work when reaping.
3756 			 */
3757 			assoc->ipsa_hardexpiretime = (time_t)1;
3758 		} /* Else someone beat me to it! */
3759 		rc = B_FALSE;
3760 	} else if (assoc->ipsa_softbyteslt != 0 &&
3761 	    (newtotal >= assoc->ipsa_softbyteslt)) {
3762 		if (assoc->ipsa_state < IPSA_STATE_DYING) {
3763 			/*
3764 			 * Send EXPIRE message to PF_KEY.  May wish to pawn
3765 			 * this off on another non-interrupt thread.
3766 			 */
3767 			assoc->ipsa_state = IPSA_STATE_DYING;
3768 			assoc->ipsa_bytes = newtotal;
3769 			if (sendmsg)
3770 				sadb_expire_assoc(pfkey_q, assoc);
3771 		} /* Else someone beat me to it! */
3772 	}
3773 	if (rc == B_TRUE)
3774 		assoc->ipsa_bytes = newtotal;
3775 	mutex_exit(&assoc->ipsa_lock);
3776 	return (rc);
3777 }
3778 
3779 /*
3780  * Push one or more DL_CO_DELETE messages queued up by
3781  * sadb_torch_assoc down to the underlying driver now that it's a
3782  * convenient time for it (i.e., ipsa bucket locks not held).
3783  */
3784 static void
3785 sadb_drain_torchq(queue_t *q, mblk_t *mp)
3786 {
3787 	while (mp != NULL) {
3788 		mblk_t *next = mp->b_next;
3789 		mp->b_next = NULL;
3790 		if (q != NULL)
3791 			putnext(q, mp);
3792 		else
3793 			freemsg(mp);
3794 		mp = next;
3795 	}
3796 }
3797 
3798 /*
3799  * "Torch" an individual SA.  Returns NULL, so it can be tail-called from
3800  *     sadb_age_assoc().
3801  *
3802  * If SA is hardware-accelerated, and we can't allocate the mblk
3803  * containing the DL_CO_DELETE, just return; it will remain in the
3804  * table and be swept up by sadb_ager() in a subsequent pass.
3805  */
3806 static ipsa_t *
3807 sadb_torch_assoc(isaf_t *head, ipsa_t *sa, boolean_t inbnd, mblk_t **mq)
3808 {
3809 	mblk_t *mp;
3810 
3811 	ASSERT(MUTEX_HELD(&head->isaf_lock));
3812 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
3813 	ASSERT(sa->ipsa_state == IPSA_STATE_DEAD);
3814 
3815 	/*
3816 	 * Force cached SAs to be revalidated..
3817 	 */
3818 	head->isaf_gen++;
3819 
3820 	if (sa->ipsa_flags & IPSA_F_HW) {
3821 		mp = sadb_fmt_sa_req(DL_CO_DELETE, sa->ipsa_type, sa, inbnd);
3822 		if (mp == NULL) {
3823 			mutex_exit(&sa->ipsa_lock);
3824 			return (NULL);
3825 		}
3826 		mp->b_next = *mq;
3827 		*mq = mp;
3828 	}
3829 	mutex_exit(&sa->ipsa_lock);
3830 	sadb_unlinkassoc(sa);
3831 
3832 	return (NULL);
3833 }
3834 
3835 /*
3836  * Do various SA-is-idle activities depending on delta (the number of idle
3837  * seconds on the SA) and/or other properties of the SA.
3838  *
3839  * Return B_TRUE if I've sent a packet, because I have to drop the
3840  * association's mutex before sending a packet out the wire.
3841  */
3842 /* ARGSUSED */
3843 static boolean_t
3844 sadb_idle_activities(ipsa_t *assoc, time_t delta, boolean_t inbound)
3845 {
3846 	ipsecesp_stack_t *espstack = assoc->ipsa_netstack->netstack_ipsecesp;
3847 	int nat_t_interval = espstack->ipsecesp_nat_keepalive_interval;
3848 
3849 	ASSERT(MUTEX_HELD(&assoc->ipsa_lock));
3850 
3851 	if (!inbound && (assoc->ipsa_flags & IPSA_F_NATT_LOC) &&
3852 	    delta >= nat_t_interval &&
3853 	    gethrestime_sec() - assoc->ipsa_last_nat_t_ka >= nat_t_interval) {
3854 		ASSERT(assoc->ipsa_type == SADB_SATYPE_ESP);
3855 		assoc->ipsa_last_nat_t_ka = gethrestime_sec();
3856 		mutex_exit(&assoc->ipsa_lock);
3857 		ipsecesp_send_keepalive(assoc);
3858 		return (B_TRUE);
3859 	}
3860 	return (B_FALSE);
3861 }
3862 
3863 /*
3864  * Return "assoc" if haspeer is true and I send an expire.  This allows
3865  * the consumers' aging functions to tidy up an expired SA's peer.
3866  */
3867 static ipsa_t *
3868 sadb_age_assoc(isaf_t *head, queue_t *pfkey_q, ipsa_t *assoc,
3869     time_t current, int reap_delay, boolean_t inbound, mblk_t **mq)
3870 {
3871 	ipsa_t *retval = NULL;
3872 	boolean_t dropped_mutex = B_FALSE;
3873 
3874 	ASSERT(MUTEX_HELD(&head->isaf_lock));
3875 
3876 	mutex_enter(&assoc->ipsa_lock);
3877 
3878 	if ((assoc->ipsa_state == IPSA_STATE_LARVAL) &&
3879 	    (assoc->ipsa_hardexpiretime <= current)) {
3880 		assoc->ipsa_state = IPSA_STATE_DEAD;
3881 		return (sadb_torch_assoc(head, assoc, inbound, mq));
3882 	}
3883 
3884 	/*
3885 	 * Check lifetimes.  Fortunately, SA setup is done
3886 	 * such that there are only two times to look at,
3887 	 * softexpiretime, and hardexpiretime.
3888 	 *
3889 	 * Check hard first.
3890 	 */
3891 
3892 	if (assoc->ipsa_hardexpiretime != 0 &&
3893 	    assoc->ipsa_hardexpiretime <= current) {
3894 		if (assoc->ipsa_state == IPSA_STATE_DEAD)
3895 			return (sadb_torch_assoc(head, assoc, inbound, mq));
3896 
3897 		/*
3898 		 * Send SADB_EXPIRE with hard lifetime, delay for unlinking.
3899 		 */
3900 		assoc->ipsa_state = IPSA_STATE_DEAD;
3901 		if (assoc->ipsa_haspeer || assoc->ipsa_otherspi != 0) {
3902 			/*
3903 			 * If the SA is paired or peered with another, put
3904 			 * a copy on a list which can be processed later, the
3905 			 * pair/peer SA needs to be updated so the both die
3906 			 * at the same time.
3907 			 *
3908 			 * If I return assoc, I have to bump up its reference
3909 			 * count to keep with the ipsa_t reference count
3910 			 * semantics.
3911 			 */
3912 			IPSA_REFHOLD(assoc);
3913 			retval = assoc;
3914 		}
3915 		sadb_expire_assoc(pfkey_q, assoc);
3916 		assoc->ipsa_hardexpiretime = current + reap_delay;
3917 	} else if (assoc->ipsa_softexpiretime != 0 &&
3918 	    assoc->ipsa_softexpiretime <= current &&
3919 	    assoc->ipsa_state < IPSA_STATE_DYING) {
3920 		/*
3921 		 * Send EXPIRE message to PF_KEY.  May wish to pawn
3922 		 * this off on another non-interrupt thread.
3923 		 */
3924 		assoc->ipsa_state = IPSA_STATE_DYING;
3925 		if (assoc->ipsa_haspeer) {
3926 			/*
3927 			 * If the SA has a peer, update the peer's state
3928 			 * on SOFT_EXPIRE, this is mostly to prevent two
3929 			 * expire messages from effectively the same SA.
3930 			 *
3931 			 * Don't care about paired SA's, then can (and should)
3932 			 * be able to soft expire at different times.
3933 			 *
3934 			 * If I return assoc, I have to bump up its
3935 			 * reference count to keep with the ipsa_t reference
3936 			 * count semantics.
3937 			 */
3938 			IPSA_REFHOLD(assoc);
3939 			retval = assoc;
3940 		}
3941 		sadb_expire_assoc(pfkey_q, assoc);
3942 	} else {
3943 		/* Check idle time activities. */
3944 		dropped_mutex = sadb_idle_activities(assoc,
3945 		    current - assoc->ipsa_lastuse, inbound);
3946 	}
3947 
3948 	if (!dropped_mutex)
3949 		mutex_exit(&assoc->ipsa_lock);
3950 	return (retval);
3951 }
3952 
3953 /*
3954  * Called by a consumer protocol to do ther dirty work of reaping dead
3955  * Security Associations.
3956  *
3957  * NOTE: sadb_age_assoc() marks expired SA's as DEAD but only removed
3958  * SA's that are already marked DEAD, so expired SA's are only reaped
3959  * the second time sadb_ager() runs.
3960  */
3961 void
3962 sadb_ager(sadb_t *sp, queue_t *pfkey_q, queue_t *ip_q, int reap_delay,
3963     netstack_t *ns)
3964 {
3965 	int i;
3966 	isaf_t *bucket;
3967 	ipsa_t *assoc, *spare;
3968 	iacqf_t *acqlist;
3969 	ipsacq_t *acqrec, *spareacq;
3970 	templist_t *haspeerlist, *newbie;
3971 	/* Snapshot current time now. */
3972 	time_t current = gethrestime_sec();
3973 	mblk_t *mq = NULL;
3974 	haspeerlist = NULL;
3975 
3976 	/*
3977 	 * Do my dirty work.  This includes aging real entries, aging
3978 	 * larvals, and aging outstanding ACQUIREs.
3979 	 *
3980 	 * I hope I don't tie up resources for too long.
3981 	 */
3982 
3983 	/* Age acquires. */
3984 
3985 	for (i = 0; i < sp->sdb_hashsize; i++) {
3986 		acqlist = &sp->sdb_acq[i];
3987 		mutex_enter(&acqlist->iacqf_lock);
3988 		for (acqrec = acqlist->iacqf_ipsacq; acqrec != NULL;
3989 		    acqrec = spareacq) {
3990 			spareacq = acqrec->ipsacq_next;
3991 			if (current > acqrec->ipsacq_expire)
3992 				sadb_destroy_acquire(acqrec, ns);
3993 		}
3994 		mutex_exit(&acqlist->iacqf_lock);
3995 	}
3996 
3997 	/* Age inbound associations. */
3998 	for (i = 0; i < sp->sdb_hashsize; i++) {
3999 		bucket = &(sp->sdb_if[i]);
4000 		mutex_enter(&bucket->isaf_lock);
4001 		for (assoc = bucket->isaf_ipsa; assoc != NULL;
4002 		    assoc = spare) {
4003 			spare = assoc->ipsa_next;
4004 			if (sadb_age_assoc(bucket, pfkey_q, assoc, current,
4005 			    reap_delay, B_TRUE, &mq) != NULL) {
4006 				/*
4007 				 * Put SA's which have a peer or SA's which
4008 				 * are paired on a list for processing after
4009 				 * all the hash tables have been walked.
4010 				 *
4011 				 * sadb_age_assoc() increments the refcnt,
4012 				 * effectively doing an IPSA_REFHOLD().
4013 				 */
4014 				newbie = kmem_alloc(sizeof (*newbie),
4015 				    KM_NOSLEEP);
4016 				if (newbie == NULL) {
4017 					/*
4018 					 * Don't forget to REFRELE().
4019 					 */
4020 					IPSA_REFRELE(assoc);
4021 					continue;	/* for loop... */
4022 				}
4023 				newbie->next = haspeerlist;
4024 				newbie->ipsa = assoc;
4025 				haspeerlist = newbie;
4026 			}
4027 		}
4028 		mutex_exit(&bucket->isaf_lock);
4029 	}
4030 
4031 	if (mq != NULL) {
4032 		sadb_drain_torchq(ip_q, mq);
4033 		mq = NULL;
4034 	}
4035 	age_pair_peer_list(haspeerlist, sp, B_FALSE);
4036 	haspeerlist = NULL;
4037 
4038 	/* Age outbound associations. */
4039 	for (i = 0; i < sp->sdb_hashsize; i++) {
4040 		bucket = &(sp->sdb_of[i]);
4041 		mutex_enter(&bucket->isaf_lock);
4042 		for (assoc = bucket->isaf_ipsa; assoc != NULL;
4043 		    assoc = spare) {
4044 			spare = assoc->ipsa_next;
4045 			if (sadb_age_assoc(bucket, pfkey_q, assoc, current,
4046 			    reap_delay, B_FALSE, &mq) != NULL) {
4047 				/*
4048 				 * sadb_age_assoc() increments the refcnt,
4049 				 * effectively doing an IPSA_REFHOLD().
4050 				 */
4051 				newbie = kmem_alloc(sizeof (*newbie),
4052 				    KM_NOSLEEP);
4053 				if (newbie == NULL) {
4054 					/*
4055 					 * Don't forget to REFRELE().
4056 					 */
4057 					IPSA_REFRELE(assoc);
4058 					continue;	/* for loop... */
4059 				}
4060 				newbie->next = haspeerlist;
4061 				newbie->ipsa = assoc;
4062 				haspeerlist = newbie;
4063 			}
4064 		}
4065 		mutex_exit(&bucket->isaf_lock);
4066 	}
4067 	if (mq != NULL) {
4068 		sadb_drain_torchq(ip_q, mq);
4069 		mq = NULL;
4070 	}
4071 
4072 	age_pair_peer_list(haspeerlist, sp, B_TRUE);
4073 
4074 	/*
4075 	 * Run a GC pass to clean out dead identities.
4076 	 */
4077 	ipsid_gc(ns);
4078 }
4079 
4080 /*
4081  * Figure out when to reschedule the ager.
4082  */
4083 timeout_id_t
4084 sadb_retimeout(hrtime_t begin, queue_t *pfkey_q, void (*ager)(void *),
4085     void *agerarg, uint_t *intp, uint_t intmax, short mid)
4086 {
4087 	hrtime_t end = gethrtime();
4088 	uint_t interval = *intp;
4089 
4090 	/*
4091 	 * See how long this took.  If it took too long, increase the
4092 	 * aging interval.
4093 	 */
4094 	if ((end - begin) > interval * 1000000) {
4095 		if (interval >= intmax) {
4096 			/* XXX Rate limit this?  Or recommend flush? */
4097 			(void) strlog(mid, 0, 0, SL_ERROR | SL_WARN,
4098 			    "Too many SA's to age out in %d msec.\n",
4099 			    intmax);
4100 		} else {
4101 			/* Double by shifting by one bit. */
4102 			interval <<= 1;
4103 			interval = min(interval, intmax);
4104 		}
4105 	} else if ((end - begin) <= interval * 500000 &&
4106 	    interval > SADB_AGE_INTERVAL_DEFAULT) {
4107 		/*
4108 		 * If I took less than half of the interval, then I should
4109 		 * ratchet the interval back down.  Never automatically
4110 		 * shift below the default aging interval.
4111 		 *
4112 		 * NOTE:This even overrides manual setting of the age
4113 		 *	interval using NDD.
4114 		 */
4115 		/* Halve by shifting one bit. */
4116 		interval >>= 1;
4117 		interval = max(interval, SADB_AGE_INTERVAL_DEFAULT);
4118 	}
4119 	*intp = interval;
4120 	return (qtimeout(pfkey_q, ager, agerarg,
4121 	    interval * drv_usectohz(1000)));
4122 }
4123 
4124 
4125 /*
4126  * Update the lifetime values of an SA.	 This is the path an SADB_UPDATE
4127  * message takes when updating a MATURE or DYING SA.
4128  */
4129 static void
4130 sadb_update_lifetimes(ipsa_t *assoc, sadb_lifetime_t *hard,
4131     sadb_lifetime_t *soft, boolean_t outbound)
4132 {
4133 	mutex_enter(&assoc->ipsa_lock);
4134 
4135 	/*
4136 	 * XXX RFC 2367 mentions how an SADB_EXT_LIFETIME_CURRENT can be
4137 	 * passed in during an update message.	We currently don't handle
4138 	 * these.
4139 	 */
4140 
4141 	if (hard != NULL) {
4142 		if (hard->sadb_lifetime_bytes != 0)
4143 			assoc->ipsa_hardbyteslt = hard->sadb_lifetime_bytes;
4144 		if (hard->sadb_lifetime_usetime != 0)
4145 			assoc->ipsa_harduselt = hard->sadb_lifetime_usetime;
4146 		if (hard->sadb_lifetime_addtime != 0)
4147 			assoc->ipsa_hardaddlt = hard->sadb_lifetime_addtime;
4148 		if (assoc->ipsa_hardaddlt != 0) {
4149 			assoc->ipsa_hardexpiretime =
4150 			    assoc->ipsa_addtime + assoc->ipsa_hardaddlt;
4151 		}
4152 		if (assoc->ipsa_harduselt != 0 &&
4153 		    assoc->ipsa_flags & IPSA_F_USED) {
4154 			UPDATE_EXPIRE(assoc, harduselt, hardexpiretime);
4155 		}
4156 		if (hard->sadb_lifetime_allocations != 0)
4157 			assoc->ipsa_hardalloc = hard->sadb_lifetime_allocations;
4158 	}
4159 
4160 	if (soft != NULL) {
4161 		if (soft->sadb_lifetime_bytes != 0) {
4162 			if (soft->sadb_lifetime_bytes >
4163 			    assoc->ipsa_hardbyteslt) {
4164 				assoc->ipsa_softbyteslt =
4165 				    assoc->ipsa_hardbyteslt;
4166 			} else {
4167 				assoc->ipsa_softbyteslt =
4168 				    soft->sadb_lifetime_bytes;
4169 			}
4170 		}
4171 		if (soft->sadb_lifetime_usetime != 0) {
4172 			if (soft->sadb_lifetime_usetime >
4173 			    assoc->ipsa_harduselt) {
4174 				assoc->ipsa_softuselt =
4175 				    assoc->ipsa_harduselt;
4176 			} else {
4177 				assoc->ipsa_softuselt =
4178 				    soft->sadb_lifetime_usetime;
4179 			}
4180 		}
4181 		if (soft->sadb_lifetime_addtime != 0) {
4182 			if (soft->sadb_lifetime_addtime >
4183 			    assoc->ipsa_hardexpiretime) {
4184 				assoc->ipsa_softexpiretime =
4185 				    assoc->ipsa_hardexpiretime;
4186 			} else {
4187 				assoc->ipsa_softaddlt =
4188 				    soft->sadb_lifetime_addtime;
4189 			}
4190 		}
4191 		if (assoc->ipsa_softaddlt != 0) {
4192 			assoc->ipsa_softexpiretime =
4193 			    assoc->ipsa_addtime + assoc->ipsa_softaddlt;
4194 		}
4195 		if (assoc->ipsa_softuselt != 0 &&
4196 		    assoc->ipsa_flags & IPSA_F_USED) {
4197 			UPDATE_EXPIRE(assoc, softuselt, softexpiretime);
4198 		}
4199 		if (outbound && assoc->ipsa_softexpiretime != 0) {
4200 			if (assoc->ipsa_state == IPSA_STATE_MATURE)
4201 				lifetime_fuzz(assoc);
4202 		}
4203 
4204 		if (soft->sadb_lifetime_allocations != 0)
4205 			assoc->ipsa_softalloc = soft->sadb_lifetime_allocations;
4206 	}
4207 	mutex_exit(&assoc->ipsa_lock);
4208 }
4209 
4210 /*
4211  * Common code to update an SA.
4212  */
4213 
4214 int
4215 sadb_update_sa(mblk_t *mp, keysock_in_t *ksi,
4216     sadbp_t *spp, int *diagnostic, queue_t *pfkey_q,
4217     int (*add_sa_func)(mblk_t *, keysock_in_t *, int *, netstack_t *),
4218     netstack_t *ns, uint8_t sadb_msg_type)
4219 {
4220 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
4221 	sadb_address_t *srcext =
4222 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
4223 	sadb_address_t *dstext =
4224 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
4225 	sadb_x_kmc_t *kmcext =
4226 	    (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
4227 	sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
4228 	sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
4229 	sadb_lifetime_t *soft =
4230 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
4231 	sadb_lifetime_t *hard =
4232 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
4233 	sadb_x_pair_t *pair_ext =
4234 	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
4235 	ipsa_t *echo_target = NULL;
4236 	int error = 0;
4237 	ipsap_t *ipsapp = NULL;
4238 	uint32_t kmp = 0, kmc = 0;
4239 
4240 
4241 	/* I need certain extensions present for either UPDATE message. */
4242 	if (srcext == NULL) {
4243 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
4244 		return (EINVAL);
4245 	}
4246 	if (dstext == NULL) {
4247 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
4248 		return (EINVAL);
4249 	}
4250 	if (assoc == NULL) {
4251 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
4252 		return (EINVAL);
4253 	}
4254 
4255 	if (kmcext != NULL) {
4256 		kmp = kmcext->sadb_x_kmc_proto;
4257 		kmc = kmcext->sadb_x_kmc_cookie;
4258 	}
4259 
4260 	ipsapp = get_ipsa_pair(assoc, srcext, dstext, spp);
4261 	if (ipsapp == NULL) {
4262 		*diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND;
4263 		return (ESRCH);
4264 	}
4265 
4266 	if (ipsapp->ipsap_psa_ptr == NULL && ipsapp->ipsap_sa_ptr != NULL) {
4267 		if (ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) {
4268 			/*
4269 			 * REFRELE the target and let the add_sa_func()
4270 			 * deal with updating a larval SA.
4271 			 */
4272 			destroy_ipsa_pair(ipsapp);
4273 			return (add_sa_func(mp, ksi, diagnostic, ns));
4274 		}
4275 	}
4276 
4277 	/*
4278 	 * Reality checks for updates of active associations.
4279 	 * Sundry first-pass UPDATE-specific reality checks.
4280 	 * Have to do the checks here, because it's after the add_sa code.
4281 	 * XXX STATS : logging/stats here?
4282 	 */
4283 
4284 	if (assoc->sadb_sa_state != SADB_SASTATE_MATURE) {
4285 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4286 		error = EINVAL;
4287 		goto bail;
4288 	}
4289 	if (assoc->sadb_sa_flags & ~(SADB_SAFLAGS_NOREPLAY |
4290 	    SADB_X_SAFLAGS_NATT_LOC | SADB_X_SAFLAGS_NATT_REM |
4291 	    SADB_X_SAFLAGS_OUTBOUND | SADB_X_SAFLAGS_INBOUND |
4292 	    SADB_X_SAFLAGS_PAIRED)) {
4293 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
4294 		error = EINVAL;
4295 		goto bail;
4296 	}
4297 	if (ksi->ks_in_extv[SADB_EXT_LIFETIME_CURRENT] != NULL) {
4298 		error = EOPNOTSUPP;
4299 		goto bail;
4300 	}
4301 	if ((*diagnostic = sadb_hardsoftchk(hard, soft)) != 0) {
4302 		error = EINVAL;
4303 		goto bail;
4304 	}
4305 	if (akey != NULL) {
4306 		*diagnostic = SADB_X_DIAGNOSTIC_AKEY_PRESENT;
4307 		error = EINVAL;
4308 		goto bail;
4309 	}
4310 	if (ekey != NULL) {
4311 		*diagnostic = SADB_X_DIAGNOSTIC_EKEY_PRESENT;
4312 		error = EINVAL;
4313 		goto bail;
4314 	}
4315 
4316 	if (ipsapp->ipsap_sa_ptr != NULL) {
4317 		if (ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_DEAD) {
4318 			error = ESRCH;	/* DEAD == Not there, in this case. */
4319 			*diagnostic = SADB_X_DIAGNOSTIC_SA_EXPIRED;
4320 			goto bail;
4321 		}
4322 		if ((kmp != 0) &&
4323 		    ((ipsapp->ipsap_sa_ptr->ipsa_kmp != 0) ||
4324 		    (ipsapp->ipsap_sa_ptr->ipsa_kmp != kmp))) {
4325 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMP;
4326 			error = EINVAL;
4327 			goto bail;
4328 		}
4329 		if ((kmc != 0) &&
4330 		    ((ipsapp->ipsap_sa_ptr->ipsa_kmc != 0) ||
4331 		    (ipsapp->ipsap_sa_ptr->ipsa_kmc != kmc))) {
4332 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMC;
4333 			error = EINVAL;
4334 			goto bail;
4335 		}
4336 	}
4337 
4338 	if (ipsapp->ipsap_psa_ptr != NULL) {
4339 		if (ipsapp->ipsap_psa_ptr->ipsa_state == IPSA_STATE_DEAD) {
4340 			*diagnostic = SADB_X_DIAGNOSTIC_SA_EXPIRED;
4341 			error = ESRCH;	/* DEAD == Not there, in this case. */
4342 			goto bail;
4343 		}
4344 		if ((kmp != 0) &&
4345 		    ((ipsapp->ipsap_psa_ptr->ipsa_kmp != 0) ||
4346 		    (ipsapp->ipsap_psa_ptr->ipsa_kmp != kmp))) {
4347 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMP;
4348 			error = EINVAL;
4349 			goto bail;
4350 		}
4351 		if ((kmc != 0) &&
4352 		    ((ipsapp->ipsap_psa_ptr->ipsa_kmc != 0) ||
4353 		    (ipsapp->ipsap_psa_ptr->ipsa_kmc != kmc))) {
4354 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMC;
4355 			error = EINVAL;
4356 			goto bail;
4357 		}
4358 	}
4359 
4360 	if (ipsapp->ipsap_sa_ptr != NULL) {
4361 		sadb_update_lifetimes(ipsapp->ipsap_sa_ptr, hard, soft, B_TRUE);
4362 		if (kmp != 0)
4363 			ipsapp->ipsap_sa_ptr->ipsa_kmp = kmp;
4364 		if (kmc != 0)
4365 			ipsapp->ipsap_sa_ptr->ipsa_kmc = kmc;
4366 	}
4367 
4368 	if (sadb_msg_type == SADB_X_UPDATEPAIR) {
4369 		if (ipsapp->ipsap_psa_ptr != NULL) {
4370 			sadb_update_lifetimes(ipsapp->ipsap_psa_ptr, hard, soft,
4371 			    B_FALSE);
4372 			if (kmp != 0)
4373 				ipsapp->ipsap_psa_ptr->ipsa_kmp = kmp;
4374 			if (kmc != 0)
4375 				ipsapp->ipsap_psa_ptr->ipsa_kmc = kmc;
4376 		} else {
4377 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
4378 			error = ESRCH;
4379 			goto bail;
4380 		}
4381 	}
4382 
4383 	if (pair_ext != NULL)
4384 		error = update_pairing(ipsapp, ksi, diagnostic, spp);
4385 
4386 	if (error == 0)
4387 		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
4388 		    ksi, echo_target);
4389 bail:
4390 
4391 	destroy_ipsa_pair(ipsapp);
4392 
4393 	return (error);
4394 }
4395 
4396 
4397 int
4398 update_pairing(ipsap_t *ipsapp, keysock_in_t *ksi, int *diagnostic,
4399     sadbp_t *spp)
4400 {
4401 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
4402 	sadb_address_t *srcext =
4403 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
4404 	sadb_address_t *dstext =
4405 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
4406 	sadb_x_pair_t *pair_ext =
4407 	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
4408 	int error = 0;
4409 	ipsap_t *oipsapp = NULL;
4410 	boolean_t undo_pair = B_FALSE;
4411 	uint32_t ipsa_flags;
4412 
4413 	if (pair_ext->sadb_x_pair_spi == 0 || pair_ext->sadb_x_pair_spi ==
4414 	    assoc->sadb_sa_spi) {
4415 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4416 		return (EINVAL);
4417 	}
4418 
4419 	/*
4420 	 * Assume for now that the spi value provided in the SADB_UPDATE
4421 	 * message was valid, update the SA with its pair spi value.
4422 	 * If the spi turns out to be bogus or the SA no longer exists
4423 	 * then this will be detected when the reverse update is made
4424 	 * below.
4425 	 */
4426 	mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4427 	ipsapp->ipsap_sa_ptr->ipsa_flags |= IPSA_F_PAIRED;
4428 	ipsapp->ipsap_sa_ptr->ipsa_otherspi = pair_ext->sadb_x_pair_spi;
4429 	mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4430 
4431 	/*
4432 	 * After updating the ipsa_otherspi element of the SA, get_ipsa_pair()
4433 	 * should now return pointers to the SA *AND* its pair, if this is not
4434 	 * the case, the "otherspi" either did not exist or was deleted. Also
4435 	 * check that "otherspi" is not already paired. If everything looks
4436 	 * good, complete the update. IPSA_REFRELE the first pair_pointer
4437 	 * after this update to ensure its not deleted until we are done.
4438 	 */
4439 	oipsapp = get_ipsa_pair(assoc, srcext, dstext, spp);
4440 	if (oipsapp == NULL) {
4441 		/*
4442 		 * This should never happen, calling function still has
4443 		 * IPSA_REFHELD on the SA we just updated.
4444 		 */
4445 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
4446 		return (EINVAL);
4447 	}
4448 
4449 	if (oipsapp->ipsap_psa_ptr == NULL) {
4450 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4451 		undo_pair = B_TRUE;
4452 	} else {
4453 		ipsa_flags = oipsapp->ipsap_psa_ptr->ipsa_flags;
4454 		if (oipsapp->ipsap_psa_ptr->ipsa_state > IPSA_STATE_MATURE) {
4455 			/* Its dead Jim! */
4456 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4457 			undo_pair = B_TRUE;
4458 		} else if ((ipsa_flags & (IPSA_F_OUTBOUND | IPSA_F_INBOUND)) ==
4459 		    (IPSA_F_OUTBOUND | IPSA_F_INBOUND)) {
4460 			/* This SA is in both hashtables. */
4461 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4462 			undo_pair = B_TRUE;
4463 		} else if (ipsa_flags & IPSA_F_PAIRED) {
4464 			/* This SA is already paired with another. */
4465 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_ALREADY;
4466 			undo_pair = B_TRUE;
4467 		}
4468 	}
4469 
4470 	if (undo_pair) {
4471 		/* The pair SA does not exist. */
4472 		mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4473 		ipsapp->ipsap_sa_ptr->ipsa_flags &= ~IPSA_F_PAIRED;
4474 		ipsapp->ipsap_sa_ptr->ipsa_otherspi = 0;
4475 		mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4476 		error = EINVAL;
4477 	} else {
4478 		mutex_enter(&oipsapp->ipsap_psa_ptr->ipsa_lock);
4479 		oipsapp->ipsap_psa_ptr->ipsa_otherspi = assoc->sadb_sa_spi;
4480 		oipsapp->ipsap_psa_ptr->ipsa_flags |= IPSA_F_PAIRED;
4481 		mutex_exit(&oipsapp->ipsap_psa_ptr->ipsa_lock);
4482 	}
4483 
4484 	destroy_ipsa_pair(oipsapp);
4485 	return (error);
4486 }
4487 
4488 /*
4489  * The following functions deal with ACQUIRE LISTS.  An ACQUIRE list is
4490  * a list of outstanding SADB_ACQUIRE messages.	 If ipsec_getassocbyconn() fails
4491  * for an outbound datagram, that datagram is queued up on an ACQUIRE record,
4492  * and an SADB_ACQUIRE message is sent up.  Presumably, a user-space key
4493  * management daemon will process the ACQUIRE, use a SADB_GETSPI to reserve
4494  * an SPI value and a larval SA, then SADB_UPDATE the larval SA, and ADD the
4495  * other direction's SA.
4496  */
4497 
4498 /*
4499  * Check the ACQUIRE lists.  If there's an existing ACQUIRE record,
4500  * grab it, lock it, and return it.  Otherwise return NULL.
4501  */
4502 static ipsacq_t *
4503 sadb_checkacquire(iacqf_t *bucket, ipsec_action_t *ap, ipsec_policy_t *pp,
4504     uint32_t *src, uint32_t *dst, uint32_t *isrc, uint32_t *idst,
4505     uint64_t unique_id)
4506 {
4507 	ipsacq_t *walker;
4508 	sa_family_t fam;
4509 	uint32_t blank_address[4] = {0, 0, 0, 0};
4510 
4511 	if (isrc == NULL) {
4512 		ASSERT(idst == NULL);
4513 		isrc = idst = blank_address;
4514 	}
4515 
4516 	/*
4517 	 * Scan list for duplicates.  Check for UNIQUE, src/dest, policy.
4518 	 *
4519 	 * XXX May need search for duplicates based on other things too!
4520 	 */
4521 	for (walker = bucket->iacqf_ipsacq; walker != NULL;
4522 	    walker = walker->ipsacq_next) {
4523 		mutex_enter(&walker->ipsacq_lock);
4524 		fam = walker->ipsacq_addrfam;
4525 		if (IPSA_ARE_ADDR_EQUAL(dst, walker->ipsacq_dstaddr, fam) &&
4526 		    IPSA_ARE_ADDR_EQUAL(src, walker->ipsacq_srcaddr, fam) &&
4527 		    ip_addr_match((uint8_t *)isrc, walker->ipsacq_innersrcpfx,
4528 		    (in6_addr_t *)walker->ipsacq_innersrc) &&
4529 		    ip_addr_match((uint8_t *)idst, walker->ipsacq_innerdstpfx,
4530 		    (in6_addr_t *)walker->ipsacq_innerdst) &&
4531 		    (ap == walker->ipsacq_act) &&
4532 		    (pp == walker->ipsacq_policy) &&
4533 		    /* XXX do deep compares of ap/pp? */
4534 		    (unique_id == walker->ipsacq_unique_id))
4535 			break;			/* everything matched */
4536 		mutex_exit(&walker->ipsacq_lock);
4537 	}
4538 
4539 	return (walker);
4540 }
4541 
4542 /*
4543  * For this mblk, insert a new acquire record.  Assume bucket contains addrs
4544  * of all of the same length.  Give up (and drop) if memory
4545  * cannot be allocated for a new one; otherwise, invoke callback to
4546  * send the acquire up..
4547  *
4548  * In cases where we need both AH and ESP, add the SA to the ESP ACQUIRE
4549  * list.  The ah_add_sa_finish() routines can look at the packet's ipsec_out_t
4550  * and handle this case specially.
4551  */
4552 void
4553 sadb_acquire(mblk_t *mp, ipsec_out_t *io, boolean_t need_ah, boolean_t need_esp)
4554 {
4555 	sadbp_t *spp;
4556 	sadb_t *sp;
4557 	ipsacq_t *newbie;
4558 	iacqf_t *bucket;
4559 	mblk_t *datamp = mp->b_cont;
4560 	mblk_t *extended;
4561 	ipha_t *ipha = (ipha_t *)datamp->b_rptr;
4562 	ip6_t *ip6h = (ip6_t *)datamp->b_rptr;
4563 	uint32_t *src, *dst, *isrc, *idst;
4564 	ipsec_policy_t *pp = io->ipsec_out_policy;
4565 	ipsec_action_t *ap = io->ipsec_out_act;
4566 	sa_family_t af;
4567 	int hashoffset;
4568 	uint32_t seq;
4569 	uint64_t unique_id = 0;
4570 	ipsec_selector_t sel;
4571 	boolean_t tunnel_mode = io->ipsec_out_tunnel;
4572 	netstack_t	*ns = io->ipsec_out_ns;
4573 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
4574 
4575 	ASSERT((pp != NULL) || (ap != NULL));
4576 
4577 	ASSERT(need_ah != NULL || need_esp != NULL);
4578 	/* Assign sadb pointers */
4579 	if (need_esp) { /* ESP for AH+ESP */
4580 		ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
4581 
4582 		spp = &espstack->esp_sadb;
4583 	} else {
4584 		ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
4585 
4586 		spp = &ahstack->ah_sadb;
4587 	}
4588 	sp = io->ipsec_out_v4 ? &spp->s_v4 : &spp->s_v6;
4589 
4590 	if (ap == NULL)
4591 		ap = pp->ipsp_act;
4592 
4593 	ASSERT(ap != NULL);
4594 
4595 	if (ap->ipa_act.ipa_apply.ipp_use_unique || tunnel_mode)
4596 		unique_id = SA_FORM_UNIQUE_ID(io);
4597 
4598 	/*
4599 	 * Set up an ACQUIRE record.
4600 	 *
4601 	 * Immediately, make sure the ACQUIRE sequence number doesn't slip
4602 	 * below the lowest point allowed in the kernel.  (In other words,
4603 	 * make sure the high bit on the sequence number is set.)
4604 	 */
4605 
4606 	seq = keysock_next_seq(ns) | IACQF_LOWEST_SEQ;
4607 
4608 	if (IPH_HDR_VERSION(ipha) == IP_VERSION) {
4609 		src = (uint32_t *)&ipha->ipha_src;
4610 		dst = (uint32_t *)&ipha->ipha_dst;
4611 		af = AF_INET;
4612 		hashoffset = OUTBOUND_HASH_V4(sp, ipha->ipha_dst);
4613 		ASSERT(io->ipsec_out_v4 == B_TRUE);
4614 	} else {
4615 		ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
4616 		src = (uint32_t *)&ip6h->ip6_src;
4617 		dst = (uint32_t *)&ip6h->ip6_dst;
4618 		af = AF_INET6;
4619 		hashoffset = OUTBOUND_HASH_V6(sp, ip6h->ip6_dst);
4620 		ASSERT(io->ipsec_out_v4 == B_FALSE);
4621 	}
4622 
4623 	if (tunnel_mode) {
4624 		/* Snag inner addresses. */
4625 		isrc = io->ipsec_out_insrc;
4626 		idst = io->ipsec_out_indst;
4627 	} else {
4628 		isrc = idst = NULL;
4629 	}
4630 
4631 	/*
4632 	 * Check buckets to see if there is an existing entry.  If so,
4633 	 * grab it.  sadb_checkacquire locks newbie if found.
4634 	 */
4635 	bucket = &(sp->sdb_acq[hashoffset]);
4636 	mutex_enter(&bucket->iacqf_lock);
4637 	newbie = sadb_checkacquire(bucket, ap, pp, src, dst, isrc, idst,
4638 	    unique_id);
4639 
4640 	if (newbie == NULL) {
4641 		/*
4642 		 * Otherwise, allocate a new one.
4643 		 */
4644 		newbie = kmem_zalloc(sizeof (*newbie), KM_NOSLEEP);
4645 		if (newbie == NULL) {
4646 			mutex_exit(&bucket->iacqf_lock);
4647 			ip_drop_packet(mp, B_FALSE, NULL, NULL,
4648 			    DROPPER(ipss, ipds_sadb_acquire_nomem),
4649 			    &ipss->ipsec_sadb_dropper);
4650 			return;
4651 		}
4652 		newbie->ipsacq_policy = pp;
4653 		if (pp != NULL) {
4654 			IPPOL_REFHOLD(pp);
4655 		}
4656 		IPACT_REFHOLD(ap);
4657 		newbie->ipsacq_act = ap;
4658 		newbie->ipsacq_linklock = &bucket->iacqf_lock;
4659 		newbie->ipsacq_next = bucket->iacqf_ipsacq;
4660 		newbie->ipsacq_ptpn = &bucket->iacqf_ipsacq;
4661 		if (newbie->ipsacq_next != NULL)
4662 			newbie->ipsacq_next->ipsacq_ptpn = &newbie->ipsacq_next;
4663 		bucket->iacqf_ipsacq = newbie;
4664 		mutex_init(&newbie->ipsacq_lock, NULL, MUTEX_DEFAULT, NULL);
4665 		mutex_enter(&newbie->ipsacq_lock);
4666 	}
4667 
4668 	mutex_exit(&bucket->iacqf_lock);
4669 
4670 	/*
4671 	 * This assert looks silly for now, but we may need to enter newbie's
4672 	 * mutex during a search.
4673 	 */
4674 	ASSERT(MUTEX_HELD(&newbie->ipsacq_lock));
4675 
4676 	mp->b_next = NULL;
4677 	/* Queue up packet.  Use b_next. */
4678 	if (newbie->ipsacq_numpackets == 0) {
4679 		/* First one. */
4680 		newbie->ipsacq_mp = mp;
4681 		newbie->ipsacq_numpackets = 1;
4682 		newbie->ipsacq_expire = gethrestime_sec();
4683 		/*
4684 		 * Extended ACQUIRE with both AH+ESP will use ESP's timeout
4685 		 * value.
4686 		 */
4687 		newbie->ipsacq_expire += *spp->s_acquire_timeout;
4688 		newbie->ipsacq_seq = seq;
4689 		newbie->ipsacq_addrfam = af;
4690 
4691 		newbie->ipsacq_srcport = io->ipsec_out_src_port;
4692 		newbie->ipsacq_dstport = io->ipsec_out_dst_port;
4693 		newbie->ipsacq_icmp_type = io->ipsec_out_icmp_type;
4694 		newbie->ipsacq_icmp_code = io->ipsec_out_icmp_code;
4695 		if (tunnel_mode) {
4696 			newbie->ipsacq_inneraddrfam = io->ipsec_out_inaf;
4697 			newbie->ipsacq_proto = io->ipsec_out_inaf == AF_INET6 ?
4698 			    IPPROTO_IPV6 : IPPROTO_ENCAP;
4699 			newbie->ipsacq_innersrcpfx = io->ipsec_out_insrcpfx;
4700 			newbie->ipsacq_innerdstpfx = io->ipsec_out_indstpfx;
4701 			IPSA_COPY_ADDR(newbie->ipsacq_innersrc,
4702 			    io->ipsec_out_insrc, io->ipsec_out_inaf);
4703 			IPSA_COPY_ADDR(newbie->ipsacq_innerdst,
4704 			    io->ipsec_out_indst, io->ipsec_out_inaf);
4705 		} else {
4706 			newbie->ipsacq_proto = io->ipsec_out_proto;
4707 		}
4708 		newbie->ipsacq_unique_id = unique_id;
4709 	} else {
4710 		/* Scan to the end of the list & insert. */
4711 		mblk_t *lastone = newbie->ipsacq_mp;
4712 
4713 		while (lastone->b_next != NULL)
4714 			lastone = lastone->b_next;
4715 		lastone->b_next = mp;
4716 		if (newbie->ipsacq_numpackets++ == ipsacq_maxpackets) {
4717 			newbie->ipsacq_numpackets = ipsacq_maxpackets;
4718 			lastone = newbie->ipsacq_mp;
4719 			newbie->ipsacq_mp = lastone->b_next;
4720 			lastone->b_next = NULL;
4721 			ip_drop_packet(lastone, B_FALSE, NULL, NULL,
4722 			    DROPPER(ipss, ipds_sadb_acquire_toofull),
4723 			    &ipss->ipsec_sadb_dropper);
4724 		} else {
4725 			IP_ACQUIRE_STAT(ipss, qhiwater,
4726 			    newbie->ipsacq_numpackets);
4727 		}
4728 	}
4729 
4730 	/*
4731 	 * Reset addresses.  Set them to the most recently added mblk chain,
4732 	 * so that the address pointers in the acquire record will point
4733 	 * at an mblk still attached to the acquire list.
4734 	 */
4735 
4736 	newbie->ipsacq_srcaddr = src;
4737 	newbie->ipsacq_dstaddr = dst;
4738 
4739 	/*
4740 	 * If the acquire record has more than one queued packet, we've
4741 	 * already sent an ACQUIRE, and don't need to repeat ourself.
4742 	 */
4743 	if (newbie->ipsacq_seq != seq || newbie->ipsacq_numpackets > 1) {
4744 		/* I have an acquire outstanding already! */
4745 		mutex_exit(&newbie->ipsacq_lock);
4746 		return;
4747 	}
4748 
4749 	if (keysock_extended_reg(ns)) {
4750 		/*
4751 		 * Construct an extended ACQUIRE.  There are logging
4752 		 * opportunities here in failure cases.
4753 		 */
4754 
4755 		(void) memset(&sel, 0, sizeof (sel));
4756 		sel.ips_isv4 = io->ipsec_out_v4;
4757 		if (tunnel_mode) {
4758 			sel.ips_protocol = (io->ipsec_out_inaf == AF_INET) ?
4759 			    IPPROTO_ENCAP : IPPROTO_IPV6;
4760 		} else {
4761 			sel.ips_protocol = io->ipsec_out_proto;
4762 			sel.ips_local_port = io->ipsec_out_src_port;
4763 			sel.ips_remote_port = io->ipsec_out_dst_port;
4764 		}
4765 		sel.ips_icmp_type = io->ipsec_out_icmp_type;
4766 		sel.ips_icmp_code = io->ipsec_out_icmp_code;
4767 		sel.ips_is_icmp_inv_acq = 0;
4768 		if (af == AF_INET) {
4769 			sel.ips_local_addr_v4 = ipha->ipha_src;
4770 			sel.ips_remote_addr_v4 = ipha->ipha_dst;
4771 		} else {
4772 			sel.ips_local_addr_v6 = ip6h->ip6_src;
4773 			sel.ips_remote_addr_v6 = ip6h->ip6_dst;
4774 		}
4775 
4776 		extended = sadb_keysock_out(0);
4777 		if (extended != NULL) {
4778 			extended->b_cont = sadb_extended_acquire(&sel, pp, ap,
4779 			    tunnel_mode, seq, 0, ns);
4780 			if (extended->b_cont == NULL) {
4781 				freeb(extended);
4782 				extended = NULL;
4783 			}
4784 		}
4785 	} else
4786 		extended = NULL;
4787 
4788 	/*
4789 	 * Send an ACQUIRE message (and possible an extended ACQUIRE) based on
4790 	 * this new record.  The send-acquire callback assumes that acqrec is
4791 	 * already locked.
4792 	 */
4793 	(*spp->s_acqfn)(newbie, extended, ns);
4794 }
4795 
4796 /*
4797  * Unlink and free an acquire record.
4798  */
4799 void
4800 sadb_destroy_acquire(ipsacq_t *acqrec, netstack_t *ns)
4801 {
4802 	mblk_t *mp;
4803 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
4804 
4805 	ASSERT(MUTEX_HELD(acqrec->ipsacq_linklock));
4806 
4807 	if (acqrec->ipsacq_policy != NULL) {
4808 		IPPOL_REFRELE(acqrec->ipsacq_policy, ns);
4809 	}
4810 	if (acqrec->ipsacq_act != NULL) {
4811 		IPACT_REFRELE(acqrec->ipsacq_act);
4812 	}
4813 
4814 	/* Unlink */
4815 	*(acqrec->ipsacq_ptpn) = acqrec->ipsacq_next;
4816 	if (acqrec->ipsacq_next != NULL)
4817 		acqrec->ipsacq_next->ipsacq_ptpn = acqrec->ipsacq_ptpn;
4818 
4819 	/*
4820 	 * Free hanging mp's.
4821 	 *
4822 	 * XXX Instead of freemsg(), perhaps use IPSEC_REQ_FAILED.
4823 	 */
4824 
4825 	mutex_enter(&acqrec->ipsacq_lock);
4826 	while (acqrec->ipsacq_mp != NULL) {
4827 		mp = acqrec->ipsacq_mp;
4828 		acqrec->ipsacq_mp = mp->b_next;
4829 		mp->b_next = NULL;
4830 		ip_drop_packet(mp, B_FALSE, NULL, NULL,
4831 		    DROPPER(ipss, ipds_sadb_acquire_timeout),
4832 		    &ipss->ipsec_sadb_dropper);
4833 	}
4834 	mutex_exit(&acqrec->ipsacq_lock);
4835 
4836 	/* Free */
4837 	mutex_destroy(&acqrec->ipsacq_lock);
4838 	kmem_free(acqrec, sizeof (*acqrec));
4839 }
4840 
4841 /*
4842  * Destroy an acquire list fanout.
4843  */
4844 static void
4845 sadb_destroy_acqlist(iacqf_t **listp, uint_t numentries, boolean_t forever,
4846     netstack_t *ns)
4847 {
4848 	int i;
4849 	iacqf_t *list = *listp;
4850 
4851 	if (list == NULL)
4852 		return;
4853 
4854 	for (i = 0; i < numentries; i++) {
4855 		mutex_enter(&(list[i].iacqf_lock));
4856 		while (list[i].iacqf_ipsacq != NULL)
4857 			sadb_destroy_acquire(list[i].iacqf_ipsacq, ns);
4858 		mutex_exit(&(list[i].iacqf_lock));
4859 		if (forever)
4860 			mutex_destroy(&(list[i].iacqf_lock));
4861 	}
4862 
4863 	if (forever) {
4864 		*listp = NULL;
4865 		kmem_free(list, numentries * sizeof (*list));
4866 	}
4867 }
4868 
4869 /*
4870  * Create an algorithm descriptor for an extended ACQUIRE.  Filter crypto
4871  * framework's view of reality vs. IPsec's.  EF's wins, BTW.
4872  */
4873 static uint8_t *
4874 sadb_new_algdesc(uint8_t *start, uint8_t *limit,
4875     sadb_x_ecomb_t *ecomb, uint8_t satype, uint8_t algtype,
4876     uint8_t alg, uint16_t minbits, uint16_t maxbits, ipsec_stack_t *ipss)
4877 {
4878 	uint8_t *cur = start;
4879 	ipsec_alginfo_t *algp;
4880 	sadb_x_algdesc_t *algdesc = (sadb_x_algdesc_t *)cur;
4881 
4882 	cur += sizeof (*algdesc);
4883 	if (cur >= limit)
4884 		return (NULL);
4885 
4886 	ecomb->sadb_x_ecomb_numalgs++;
4887 
4888 	/*
4889 	 * Normalize vs. crypto framework's limits.  This way, you can specify
4890 	 * a stronger policy, and when the framework loads a stronger version,
4891 	 * you can just keep plowing w/o rewhacking your SPD.
4892 	 */
4893 	mutex_enter(&ipss->ipsec_alg_lock);
4894 	algp = ipss->ipsec_alglists[(algtype == SADB_X_ALGTYPE_AUTH) ?
4895 	    IPSEC_ALG_AUTH : IPSEC_ALG_ENCR][alg];
4896 	if (algp == NULL) {
4897 		mutex_exit(&ipss->ipsec_alg_lock);
4898 		return (NULL);	/* Algorithm doesn't exist.  Fail gracefully. */
4899 	}
4900 	if (minbits < algp->alg_ef_minbits)
4901 		minbits = algp->alg_ef_minbits;
4902 	if (maxbits > algp->alg_ef_maxbits)
4903 		maxbits = algp->alg_ef_maxbits;
4904 	mutex_exit(&ipss->ipsec_alg_lock);
4905 
4906 	algdesc->sadb_x_algdesc_satype = satype;
4907 	algdesc->sadb_x_algdesc_algtype = algtype;
4908 	algdesc->sadb_x_algdesc_alg = alg;
4909 	algdesc->sadb_x_algdesc_minbits = minbits;
4910 	algdesc->sadb_x_algdesc_maxbits = maxbits;
4911 	algdesc->sadb_x_algdesc_reserved = 0;
4912 	return (cur);
4913 }
4914 
4915 /*
4916  * Convert the given ipsec_action_t into an ecomb starting at *ecomb
4917  * which must fit before *limit
4918  *
4919  * return NULL if we ran out of room or a pointer to the end of the ecomb.
4920  */
4921 static uint8_t *
4922 sadb_action_to_ecomb(uint8_t *start, uint8_t *limit, ipsec_action_t *act,
4923     netstack_t *ns)
4924 {
4925 	uint8_t *cur = start;
4926 	sadb_x_ecomb_t *ecomb = (sadb_x_ecomb_t *)cur;
4927 	ipsec_prot_t *ipp;
4928 	ipsec_stack_t *ipss = ns->netstack_ipsec;
4929 
4930 	cur += sizeof (*ecomb);
4931 	if (cur >= limit)
4932 		return (NULL);
4933 
4934 	ASSERT(act->ipa_act.ipa_type == IPSEC_ACT_APPLY);
4935 
4936 	ipp = &act->ipa_act.ipa_apply;
4937 
4938 	ecomb->sadb_x_ecomb_numalgs = 0;
4939 	ecomb->sadb_x_ecomb_reserved = 0;
4940 	ecomb->sadb_x_ecomb_reserved2 = 0;
4941 	/*
4942 	 * No limits on allocations, since we really don't support that
4943 	 * concept currently.
4944 	 */
4945 	ecomb->sadb_x_ecomb_soft_allocations = 0;
4946 	ecomb->sadb_x_ecomb_hard_allocations = 0;
4947 
4948 	/*
4949 	 * XXX TBD: Policy or global parameters will eventually be
4950 	 * able to fill in some of these.
4951 	 */
4952 	ecomb->sadb_x_ecomb_flags = 0;
4953 	ecomb->sadb_x_ecomb_soft_bytes = 0;
4954 	ecomb->sadb_x_ecomb_hard_bytes = 0;
4955 	ecomb->sadb_x_ecomb_soft_addtime = 0;
4956 	ecomb->sadb_x_ecomb_hard_addtime = 0;
4957 	ecomb->sadb_x_ecomb_soft_usetime = 0;
4958 	ecomb->sadb_x_ecomb_hard_usetime = 0;
4959 
4960 	if (ipp->ipp_use_ah) {
4961 		cur = sadb_new_algdesc(cur, limit, ecomb,
4962 		    SADB_SATYPE_AH, SADB_X_ALGTYPE_AUTH, ipp->ipp_auth_alg,
4963 		    ipp->ipp_ah_minbits, ipp->ipp_ah_maxbits, ipss);
4964 		if (cur == NULL)
4965 			return (NULL);
4966 		ipsecah_fill_defs(ecomb, ns);
4967 	}
4968 
4969 	if (ipp->ipp_use_esp) {
4970 		if (ipp->ipp_use_espa) {
4971 			cur = sadb_new_algdesc(cur, limit, ecomb,
4972 			    SADB_SATYPE_ESP, SADB_X_ALGTYPE_AUTH,
4973 			    ipp->ipp_esp_auth_alg,
4974 			    ipp->ipp_espa_minbits,
4975 			    ipp->ipp_espa_maxbits, ipss);
4976 			if (cur == NULL)
4977 				return (NULL);
4978 		}
4979 
4980 		cur = sadb_new_algdesc(cur, limit, ecomb,
4981 		    SADB_SATYPE_ESP, SADB_X_ALGTYPE_CRYPT,
4982 		    ipp->ipp_encr_alg,
4983 		    ipp->ipp_espe_minbits,
4984 		    ipp->ipp_espe_maxbits, ipss);
4985 		if (cur == NULL)
4986 			return (NULL);
4987 		/* Fill in lifetimes if and only if AH didn't already... */
4988 		if (!ipp->ipp_use_ah)
4989 			ipsecesp_fill_defs(ecomb, ns);
4990 	}
4991 
4992 	return (cur);
4993 }
4994 
4995 /*
4996  * Construct an extended ACQUIRE message based on a selector and the resulting
4997  * IPsec action.
4998  *
4999  * NOTE: This is used by both inverse ACQUIRE and actual ACQUIRE
5000  * generation. As a consequence, expect this function to evolve
5001  * rapidly.
5002  */
5003 static mblk_t *
5004 sadb_extended_acquire(ipsec_selector_t *sel, ipsec_policy_t *pol,
5005     ipsec_action_t *act, boolean_t tunnel_mode, uint32_t seq, uint32_t pid,
5006     netstack_t *ns)
5007 {
5008 	mblk_t *mp;
5009 	sadb_msg_t *samsg;
5010 	uint8_t *start, *cur, *end;
5011 	uint32_t *saddrptr, *daddrptr;
5012 	sa_family_t af;
5013 	sadb_prop_t *eprop;
5014 	ipsec_action_t *ap, *an;
5015 	ipsec_selkey_t *ipsl;
5016 	uint8_t proto, pfxlen;
5017 	uint16_t lport, rport;
5018 	uint32_t kmp, kmc;
5019 
5020 	/*
5021 	 * Find the action we want sooner rather than later..
5022 	 */
5023 	an = NULL;
5024 	if (pol == NULL) {
5025 		ap = act;
5026 	} else {
5027 		ap = pol->ipsp_act;
5028 
5029 		if (ap != NULL)
5030 			an = ap->ipa_next;
5031 	}
5032 
5033 	/*
5034 	 * Just take a swag for the allocation for now.	 We can always
5035 	 * alter it later.
5036 	 */
5037 #define	SADB_EXTENDED_ACQUIRE_SIZE	4096
5038 	mp = allocb(SADB_EXTENDED_ACQUIRE_SIZE, BPRI_HI);
5039 	if (mp == NULL)
5040 		return (NULL);
5041 
5042 	start = mp->b_rptr;
5043 	end = start + SADB_EXTENDED_ACQUIRE_SIZE;
5044 
5045 	cur = start;
5046 
5047 	samsg = (sadb_msg_t *)cur;
5048 	cur += sizeof (*samsg);
5049 
5050 	samsg->sadb_msg_version = PF_KEY_V2;
5051 	samsg->sadb_msg_type = SADB_ACQUIRE;
5052 	samsg->sadb_msg_errno = 0;
5053 	samsg->sadb_msg_reserved = 0;
5054 	samsg->sadb_msg_satype = 0;
5055 	samsg->sadb_msg_seq = seq;
5056 	samsg->sadb_msg_pid = pid;
5057 
5058 	if (tunnel_mode) {
5059 		/*
5060 		 * Form inner address extensions based NOT on the inner
5061 		 * selectors (i.e. the packet data), but on the policy's
5062 		 * selector key (i.e. the policy's selector information).
5063 		 *
5064 		 * NOTE:  The position of IPv4 and IPv6 addresses is the
5065 		 * same in ipsec_selkey_t (unless the compiler does very
5066 		 * strange things with unions, consult your local C language
5067 		 * lawyer for details).
5068 		 */
5069 		ipsl = &(pol->ipsp_sel->ipsl_key);
5070 		if (ipsl->ipsl_valid & IPSL_IPV4) {
5071 			af = AF_INET;
5072 			ASSERT(sel->ips_protocol == IPPROTO_ENCAP);
5073 			ASSERT(!(ipsl->ipsl_valid & IPSL_IPV6));
5074 		} else {
5075 			af = AF_INET6;
5076 			ASSERT(sel->ips_protocol == IPPROTO_IPV6);
5077 			ASSERT(ipsl->ipsl_valid & IPSL_IPV6);
5078 		}
5079 
5080 		if (ipsl->ipsl_valid & IPSL_LOCAL_ADDR) {
5081 			saddrptr = (uint32_t *)(&ipsl->ipsl_local);
5082 			pfxlen = ipsl->ipsl_local_pfxlen;
5083 		} else {
5084 			saddrptr = (uint32_t *)(&ipv6_all_zeros);
5085 			pfxlen = 0;
5086 		}
5087 		/* XXX What about ICMP type/code? */
5088 		lport = (ipsl->ipsl_valid & IPSL_LOCAL_PORT) ?
5089 		    ipsl->ipsl_lport : 0;
5090 		proto = (ipsl->ipsl_valid & IPSL_PROTOCOL) ?
5091 		    ipsl->ipsl_proto : 0;
5092 
5093 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
5094 		    af, saddrptr, lport, proto, pfxlen);
5095 		if (cur == NULL) {
5096 			freeb(mp);
5097 			return (NULL);
5098 		}
5099 
5100 		if (ipsl->ipsl_valid & IPSL_REMOTE_ADDR) {
5101 			daddrptr = (uint32_t *)(&ipsl->ipsl_remote);
5102 			pfxlen = ipsl->ipsl_remote_pfxlen;
5103 		} else {
5104 			daddrptr = (uint32_t *)(&ipv6_all_zeros);
5105 			pfxlen = 0;
5106 		}
5107 		/* XXX What about ICMP type/code? */
5108 		rport = (ipsl->ipsl_valid & IPSL_REMOTE_PORT) ?
5109 		    ipsl->ipsl_rport : 0;
5110 
5111 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
5112 		    af, daddrptr, rport, proto, pfxlen);
5113 		if (cur == NULL) {
5114 			freeb(mp);
5115 			return (NULL);
5116 		}
5117 		/*
5118 		 * TODO  - if we go to 3408's dream of transport mode IP-in-IP
5119 		 * _with_ inner-packet address selectors, we'll need to further
5120 		 * distinguish tunnel mode here.  For now, having inner
5121 		 * addresses and/or ports is sufficient.
5122 		 *
5123 		 * Meanwhile, whack proto/ports to reflect IP-in-IP for the
5124 		 * outer addresses.
5125 		 */
5126 		proto = sel->ips_protocol;	/* Either _ENCAP or _IPV6 */
5127 		lport = rport = 0;
5128 	} else if ((ap != NULL) && (!ap->ipa_want_unique)) {
5129 		proto = 0;
5130 		lport = 0;
5131 		rport = 0;
5132 		if (pol != NULL) {
5133 			ipsl = &(pol->ipsp_sel->ipsl_key);
5134 			if (ipsl->ipsl_valid & IPSL_PROTOCOL)
5135 				proto = ipsl->ipsl_proto;
5136 			if (ipsl->ipsl_valid & IPSL_REMOTE_PORT)
5137 				rport = ipsl->ipsl_rport;
5138 			if (ipsl->ipsl_valid & IPSL_LOCAL_PORT)
5139 				lport = ipsl->ipsl_lport;
5140 		}
5141 	} else {
5142 		proto = sel->ips_protocol;
5143 		lport = sel->ips_local_port;
5144 		rport = sel->ips_remote_port;
5145 	}
5146 
5147 	af = sel->ips_isv4 ? AF_INET : AF_INET6;
5148 
5149 	/*
5150 	 * NOTE:  The position of IPv4 and IPv6 addresses is the same in
5151 	 * ipsec_selector_t.
5152 	 */
5153 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
5154 	    (uint32_t *)(&sel->ips_local_addr_v6), lport, proto, 0);
5155 
5156 	if (cur == NULL) {
5157 		freeb(mp);
5158 		return (NULL);
5159 	}
5160 
5161 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
5162 	    (uint32_t *)(&sel->ips_remote_addr_v6), rport, proto, 0);
5163 
5164 	if (cur == NULL) {
5165 		freeb(mp);
5166 		return (NULL);
5167 	}
5168 
5169 	/*
5170 	 * This section will change a lot as policy evolves.
5171 	 * For now, it'll be relatively simple.
5172 	 */
5173 	eprop = (sadb_prop_t *)cur;
5174 	cur += sizeof (*eprop);
5175 	if (cur > end) {
5176 		/* no space left */
5177 		freeb(mp);
5178 		return (NULL);
5179 	}
5180 
5181 	eprop->sadb_prop_exttype = SADB_X_EXT_EPROP;
5182 	eprop->sadb_x_prop_ereserved = 0;
5183 	eprop->sadb_x_prop_numecombs = 0;
5184 	eprop->sadb_prop_replay = 32;	/* default */
5185 
5186 	kmc = kmp = 0;
5187 
5188 	for (; ap != NULL; ap = an) {
5189 		an = (pol != NULL) ? ap->ipa_next : NULL;
5190 
5191 		/*
5192 		 * Skip non-IPsec policies
5193 		 */
5194 		if (ap->ipa_act.ipa_type != IPSEC_ACT_APPLY)
5195 			continue;
5196 
5197 		if (ap->ipa_act.ipa_apply.ipp_km_proto)
5198 			kmp = ap->ipa_act.ipa_apply.ipp_km_proto;
5199 		if (ap->ipa_act.ipa_apply.ipp_km_cookie)
5200 			kmc = ap->ipa_act.ipa_apply.ipp_km_cookie;
5201 		if (ap->ipa_act.ipa_apply.ipp_replay_depth) {
5202 			eprop->sadb_prop_replay =
5203 			    ap->ipa_act.ipa_apply.ipp_replay_depth;
5204 		}
5205 
5206 		cur = sadb_action_to_ecomb(cur, end, ap, ns);
5207 		if (cur == NULL) { /* no space */
5208 			freeb(mp);
5209 			return (NULL);
5210 		}
5211 		eprop->sadb_x_prop_numecombs++;
5212 	}
5213 
5214 	if (eprop->sadb_x_prop_numecombs == 0) {
5215 		/*
5216 		 * This will happen if we fail to find a policy
5217 		 * allowing for IPsec processing.
5218 		 * Construct an error message.
5219 		 */
5220 		samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
5221 		samsg->sadb_msg_errno = ENOENT;
5222 		samsg->sadb_x_msg_diagnostic = 0;
5223 		return (mp);
5224 	}
5225 
5226 	if ((kmp != 0) || (kmc != 0)) {
5227 		cur = sadb_make_kmc_ext(cur, end, kmp, kmc);
5228 		if (cur == NULL) {
5229 			freeb(mp);
5230 			return (NULL);
5231 		}
5232 	}
5233 
5234 	eprop->sadb_prop_len = SADB_8TO64(cur - (uint8_t *)eprop);
5235 	samsg->sadb_msg_len = SADB_8TO64(cur - start);
5236 	mp->b_wptr = cur;
5237 
5238 	return (mp);
5239 }
5240 
5241 /*
5242  * Generic setup of an RFC 2367 ACQUIRE message.  Caller sets satype.
5243  *
5244  * NOTE: This function acquires alg_lock as a side-effect if-and-only-if we
5245  * succeed (i.e. return non-NULL).  Caller MUST release it.  This is to
5246  * maximize code consolidation while preventing algorithm changes from messing
5247  * with the callers finishing touches on the ACQUIRE itself.
5248  */
5249 mblk_t *
5250 sadb_setup_acquire(ipsacq_t *acqrec, uint8_t satype, ipsec_stack_t *ipss)
5251 {
5252 	uint_t allocsize;
5253 	mblk_t *pfkeymp, *msgmp;
5254 	sa_family_t af;
5255 	uint8_t *cur, *end;
5256 	sadb_msg_t *samsg;
5257 	uint16_t sport_typecode;
5258 	uint16_t dport_typecode;
5259 	uint8_t check_proto;
5260 	boolean_t tunnel_mode = (acqrec->ipsacq_inneraddrfam != 0);
5261 
5262 	ASSERT(MUTEX_HELD(&acqrec->ipsacq_lock));
5263 
5264 	pfkeymp = sadb_keysock_out(0);
5265 	if (pfkeymp == NULL)
5266 		return (NULL);
5267 
5268 	/*
5269 	 * First, allocate a basic ACQUIRE message
5270 	 */
5271 	allocsize = sizeof (sadb_msg_t) + sizeof (sadb_address_t) +
5272 	    sizeof (sadb_address_t) + sizeof (sadb_prop_t);
5273 
5274 	/* Make sure there's enough to cover both AF_INET and AF_INET6. */
5275 	allocsize += 2 * sizeof (struct sockaddr_in6);
5276 
5277 	mutex_enter(&ipss->ipsec_alg_lock);
5278 	/* NOTE:  The lock is now held through to this function's return. */
5279 	allocsize += ipss->ipsec_nalgs[IPSEC_ALG_AUTH] *
5280 	    ipss->ipsec_nalgs[IPSEC_ALG_ENCR] * sizeof (sadb_comb_t);
5281 
5282 	if (tunnel_mode) {
5283 		/* Tunnel mode! */
5284 		allocsize += 2 * sizeof (sadb_address_t);
5285 		/* Enough to cover both AF_INET and AF_INET6. */
5286 		allocsize += 2 * sizeof (struct sockaddr_in6);
5287 	}
5288 
5289 	msgmp = allocb(allocsize, BPRI_HI);
5290 	if (msgmp == NULL) {
5291 		freeb(pfkeymp);
5292 		mutex_exit(&ipss->ipsec_alg_lock);
5293 		return (NULL);
5294 	}
5295 
5296 	pfkeymp->b_cont = msgmp;
5297 	cur = msgmp->b_rptr;
5298 	end = cur + allocsize;
5299 	samsg = (sadb_msg_t *)cur;
5300 	cur += sizeof (sadb_msg_t);
5301 
5302 	af = acqrec->ipsacq_addrfam;
5303 	switch (af) {
5304 	case AF_INET:
5305 		check_proto = IPPROTO_ICMP;
5306 		break;
5307 	case AF_INET6:
5308 		check_proto = IPPROTO_ICMPV6;
5309 		break;
5310 	default:
5311 		/* This should never happen unless we have kernel bugs. */
5312 		cmn_err(CE_WARN,
5313 		    "sadb_setup_acquire:  corrupt ACQUIRE record.\n");
5314 		ASSERT(0);
5315 		mutex_exit(&ipss->ipsec_alg_lock);
5316 		return (NULL);
5317 	}
5318 
5319 	samsg->sadb_msg_version = PF_KEY_V2;
5320 	samsg->sadb_msg_type = SADB_ACQUIRE;
5321 	samsg->sadb_msg_satype = satype;
5322 	samsg->sadb_msg_errno = 0;
5323 	samsg->sadb_msg_pid = 0;
5324 	samsg->sadb_msg_reserved = 0;
5325 	samsg->sadb_msg_seq = acqrec->ipsacq_seq;
5326 
5327 	ASSERT(MUTEX_HELD(&acqrec->ipsacq_lock));
5328 
5329 	if ((acqrec->ipsacq_proto == check_proto) || tunnel_mode) {
5330 		sport_typecode = dport_typecode = 0;
5331 	} else {
5332 		sport_typecode = acqrec->ipsacq_srcport;
5333 		dport_typecode = acqrec->ipsacq_dstport;
5334 	}
5335 
5336 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
5337 	    acqrec->ipsacq_srcaddr, sport_typecode, acqrec->ipsacq_proto, 0);
5338 
5339 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
5340 	    acqrec->ipsacq_dstaddr, dport_typecode, acqrec->ipsacq_proto, 0);
5341 
5342 	if (tunnel_mode) {
5343 		sport_typecode = acqrec->ipsacq_srcport;
5344 		dport_typecode = acqrec->ipsacq_dstport;
5345 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
5346 		    acqrec->ipsacq_inneraddrfam, acqrec->ipsacq_innersrc,
5347 		    sport_typecode, acqrec->ipsacq_inner_proto,
5348 		    acqrec->ipsacq_innersrcpfx);
5349 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
5350 		    acqrec->ipsacq_inneraddrfam, acqrec->ipsacq_innerdst,
5351 		    dport_typecode, acqrec->ipsacq_inner_proto,
5352 		    acqrec->ipsacq_innerdstpfx);
5353 	}
5354 
5355 	/* XXX Insert identity information here. */
5356 
5357 	/* XXXMLS Insert sensitivity information here. */
5358 
5359 	if (cur != NULL)
5360 		samsg->sadb_msg_len = SADB_8TO64(cur - msgmp->b_rptr);
5361 	else
5362 		mutex_exit(&ipss->ipsec_alg_lock);
5363 
5364 	return (pfkeymp);
5365 }
5366 
5367 /*
5368  * Given an SADB_GETSPI message, find an appropriately ranged SA and
5369  * allocate an SA.  If there are message improprieties, return (ipsa_t *)-1.
5370  * If there was a memory allocation error, return NULL.	 (Assume NULL !=
5371  * (ipsa_t *)-1).
5372  *
5373  * master_spi is passed in host order.
5374  */
5375 ipsa_t *
5376 sadb_getspi(keysock_in_t *ksi, uint32_t master_spi, int *diagnostic,
5377     netstack_t *ns)
5378 {
5379 	sadb_address_t *src =
5380 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC],
5381 	    *dst = (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
5382 	sadb_spirange_t *range =
5383 	    (sadb_spirange_t *)ksi->ks_in_extv[SADB_EXT_SPIRANGE];
5384 	struct sockaddr_in *ssa, *dsa;
5385 	struct sockaddr_in6 *ssa6, *dsa6;
5386 	uint32_t *srcaddr, *dstaddr;
5387 	sa_family_t af;
5388 	uint32_t add, min, max;
5389 
5390 	if (src == NULL) {
5391 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
5392 		return ((ipsa_t *)-1);
5393 	}
5394 	if (dst == NULL) {
5395 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
5396 		return ((ipsa_t *)-1);
5397 	}
5398 	if (range == NULL) {
5399 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_RANGE;
5400 		return ((ipsa_t *)-1);
5401 	}
5402 
5403 	min = ntohl(range->sadb_spirange_min);
5404 	max = ntohl(range->sadb_spirange_max);
5405 	dsa = (struct sockaddr_in *)(dst + 1);
5406 	dsa6 = (struct sockaddr_in6 *)dsa;
5407 
5408 	ssa = (struct sockaddr_in *)(src + 1);
5409 	ssa6 = (struct sockaddr_in6 *)ssa;
5410 	ASSERT(dsa->sin_family == ssa->sin_family);
5411 
5412 	srcaddr = ALL_ZEROES_PTR;
5413 	af = dsa->sin_family;
5414 	switch (af) {
5415 	case AF_INET:
5416 		if (src != NULL)
5417 			srcaddr = (uint32_t *)(&ssa->sin_addr);
5418 		dstaddr = (uint32_t *)(&dsa->sin_addr);
5419 		break;
5420 	case AF_INET6:
5421 		if (src != NULL)
5422 			srcaddr = (uint32_t *)(&ssa6->sin6_addr);
5423 		dstaddr = (uint32_t *)(&dsa6->sin6_addr);
5424 		break;
5425 	default:
5426 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_DST_AF;
5427 		return ((ipsa_t *)-1);
5428 	}
5429 
5430 	if (master_spi < min || master_spi > max) {
5431 		/* Return a random value in the range. */
5432 		(void) random_get_pseudo_bytes((uint8_t *)&add, sizeof (add));
5433 		master_spi = min + (add % (max - min + 1));
5434 	}
5435 
5436 	/*
5437 	 * Since master_spi is passed in host order, we need to htonl() it
5438 	 * for the purposes of creating a new SA.
5439 	 */
5440 	return (sadb_makelarvalassoc(htonl(master_spi), srcaddr, dstaddr, af,
5441 	    ns));
5442 }
5443 
5444 /*
5445  *
5446  * Locate an ACQUIRE and nuke it.  If I have an samsg that's larger than the
5447  * base header, just ignore it.	 Otherwise, lock down the whole ACQUIRE list
5448  * and scan for the sequence number in question.  I may wish to accept an
5449  * address pair with it, for easier searching.
5450  *
5451  * Caller frees the message, so we don't have to here.
5452  *
5453  * NOTE:	The ip_q parameter may be used in the future for ACQUIRE
5454  *		failures.
5455  */
5456 /* ARGSUSED */
5457 void
5458 sadb_in_acquire(sadb_msg_t *samsg, sadbp_t *sp, queue_t *ip_q, netstack_t *ns)
5459 {
5460 	int i;
5461 	ipsacq_t *acqrec;
5462 	iacqf_t *bucket;
5463 
5464 	/*
5465 	 * I only accept the base header for this!
5466 	 * Though to be honest, requiring the dst address would help
5467 	 * immensely.
5468 	 *
5469 	 * XXX	There are already cases where I can get the dst address.
5470 	 */
5471 	if (samsg->sadb_msg_len > SADB_8TO64(sizeof (*samsg)))
5472 		return;
5473 
5474 	/*
5475 	 * Using the samsg->sadb_msg_seq, find the ACQUIRE record, delete it,
5476 	 * (and in the future send a message to IP with the appropriate error
5477 	 * number).
5478 	 *
5479 	 * Q: Do I want to reject if pid != 0?
5480 	 */
5481 
5482 	for (i = 0; i < sp->s_v4.sdb_hashsize; i++) {
5483 		bucket = &sp->s_v4.sdb_acq[i];
5484 		mutex_enter(&bucket->iacqf_lock);
5485 		for (acqrec = bucket->iacqf_ipsacq; acqrec != NULL;
5486 		    acqrec = acqrec->ipsacq_next) {
5487 			if (samsg->sadb_msg_seq == acqrec->ipsacq_seq)
5488 				break;	/* for acqrec... loop. */
5489 		}
5490 		if (acqrec != NULL)
5491 			break;	/* for i = 0... loop. */
5492 
5493 		mutex_exit(&bucket->iacqf_lock);
5494 	}
5495 
5496 	if (acqrec == NULL) {
5497 		for (i = 0; i < sp->s_v6.sdb_hashsize; i++) {
5498 			bucket = &sp->s_v6.sdb_acq[i];
5499 			mutex_enter(&bucket->iacqf_lock);
5500 			for (acqrec = bucket->iacqf_ipsacq; acqrec != NULL;
5501 			    acqrec = acqrec->ipsacq_next) {
5502 				if (samsg->sadb_msg_seq == acqrec->ipsacq_seq)
5503 					break;	/* for acqrec... loop. */
5504 			}
5505 			if (acqrec != NULL)
5506 				break;	/* for i = 0... loop. */
5507 
5508 			mutex_exit(&bucket->iacqf_lock);
5509 		}
5510 	}
5511 
5512 
5513 	if (acqrec == NULL)
5514 		return;
5515 
5516 	/*
5517 	 * What do I do with the errno and IP?	I may need mp's services a
5518 	 * little more.	 See sadb_destroy_acquire() for future directions
5519 	 * beyond free the mblk chain on the acquire record.
5520 	 */
5521 
5522 	ASSERT(&bucket->iacqf_lock == acqrec->ipsacq_linklock);
5523 	sadb_destroy_acquire(acqrec, ns);
5524 	/* Have to exit mutex here, because of breaking out of for loop. */
5525 	mutex_exit(&bucket->iacqf_lock);
5526 }
5527 
5528 /*
5529  * The following functions work with the replay windows of an SA.  They assume
5530  * the ipsa->ipsa_replay_arr is an array of uint64_t, and that the bit vector
5531  * represents the highest sequence number packet received, and back
5532  * (ipsa->ipsa_replay_wsize) packets.
5533  */
5534 
5535 /*
5536  * Is the replay bit set?
5537  */
5538 static boolean_t
5539 ipsa_is_replay_set(ipsa_t *ipsa, uint32_t offset)
5540 {
5541 	uint64_t bit = (uint64_t)1 << (uint64_t)(offset & 63);
5542 
5543 	return ((bit & ipsa->ipsa_replay_arr[offset >> 6]) ? B_TRUE : B_FALSE);
5544 }
5545 
5546 /*
5547  * Shift the bits of the replay window over.
5548  */
5549 static void
5550 ipsa_shift_replay(ipsa_t *ipsa, uint32_t shift)
5551 {
5552 	int i;
5553 	int jump = ((shift - 1) >> 6) + 1;
5554 
5555 	if (shift == 0)
5556 		return;
5557 
5558 	for (i = (ipsa->ipsa_replay_wsize - 1) >> 6; i >= 0; i--) {
5559 		if (i + jump <= (ipsa->ipsa_replay_wsize - 1) >> 6) {
5560 			ipsa->ipsa_replay_arr[i + jump] |=
5561 			    ipsa->ipsa_replay_arr[i] >> (64 - (shift & 63));
5562 		}
5563 		ipsa->ipsa_replay_arr[i] <<= shift;
5564 	}
5565 }
5566 
5567 /*
5568  * Set a bit in the bit vector.
5569  */
5570 static void
5571 ipsa_set_replay(ipsa_t *ipsa, uint32_t offset)
5572 {
5573 	uint64_t bit = (uint64_t)1 << (uint64_t)(offset & 63);
5574 
5575 	ipsa->ipsa_replay_arr[offset >> 6] |= bit;
5576 }
5577 
5578 #define	SADB_MAX_REPLAY_VALUE 0xffffffff
5579 
5580 /*
5581  * Assume caller has NOT done ntohl() already on seq.  Check to see
5582  * if replay sequence number "seq" has been seen already.
5583  */
5584 boolean_t
5585 sadb_replay_check(ipsa_t *ipsa, uint32_t seq)
5586 {
5587 	boolean_t rc;
5588 	uint32_t diff;
5589 
5590 	if (ipsa->ipsa_replay_wsize == 0)
5591 		return (B_TRUE);
5592 
5593 	/*
5594 	 * NOTE:  I've already checked for 0 on the wire in sadb_replay_peek().
5595 	 */
5596 
5597 	/* Convert sequence number into host order before holding the mutex. */
5598 	seq = ntohl(seq);
5599 
5600 	mutex_enter(&ipsa->ipsa_lock);
5601 
5602 	/* Initialize inbound SA's ipsa_replay field to last one received. */
5603 	if (ipsa->ipsa_replay == 0)
5604 		ipsa->ipsa_replay = 1;
5605 
5606 	if (seq > ipsa->ipsa_replay) {
5607 		/*
5608 		 * I have received a new "highest value received".  Shift
5609 		 * the replay window over.
5610 		 */
5611 		diff = seq - ipsa->ipsa_replay;
5612 		if (diff < ipsa->ipsa_replay_wsize) {
5613 			/* In replay window, shift bits over. */
5614 			ipsa_shift_replay(ipsa, diff);
5615 		} else {
5616 			/* WAY FAR AHEAD, clear bits and start again. */
5617 			bzero(ipsa->ipsa_replay_arr,
5618 			    sizeof (ipsa->ipsa_replay_arr));
5619 		}
5620 		ipsa_set_replay(ipsa, 0);
5621 		ipsa->ipsa_replay = seq;
5622 		rc = B_TRUE;
5623 		goto done;
5624 	}
5625 	diff = ipsa->ipsa_replay - seq;
5626 	if (diff >= ipsa->ipsa_replay_wsize || ipsa_is_replay_set(ipsa, diff)) {
5627 		rc = B_FALSE;
5628 		goto done;
5629 	}
5630 	/* Set this packet as seen. */
5631 	ipsa_set_replay(ipsa, diff);
5632 
5633 	rc = B_TRUE;
5634 done:
5635 	mutex_exit(&ipsa->ipsa_lock);
5636 	return (rc);
5637 }
5638 
5639 /*
5640  * "Peek" and see if we should even bother going through the effort of
5641  * running an authentication check on the sequence number passed in.
5642  * this takes into account packets that are below the replay window,
5643  * and collisions with already replayed packets.  Return B_TRUE if it
5644  * is okay to proceed, B_FALSE if this packet should be dropped immediately.
5645  * Assume same byte-ordering as sadb_replay_check.
5646  */
5647 boolean_t
5648 sadb_replay_peek(ipsa_t *ipsa, uint32_t seq)
5649 {
5650 	boolean_t rc = B_FALSE;
5651 	uint32_t diff;
5652 
5653 	if (ipsa->ipsa_replay_wsize == 0)
5654 		return (B_TRUE);
5655 
5656 	/*
5657 	 * 0 is 0, regardless of byte order... :)
5658 	 *
5659 	 * If I get 0 on the wire (and there is a replay window) then the
5660 	 * sender most likely wrapped.	This ipsa may need to be marked or
5661 	 * something.
5662 	 */
5663 	if (seq == 0)
5664 		return (B_FALSE);
5665 
5666 	seq = ntohl(seq);
5667 	mutex_enter(&ipsa->ipsa_lock);
5668 	if (seq < ipsa->ipsa_replay - ipsa->ipsa_replay_wsize &&
5669 	    ipsa->ipsa_replay >= ipsa->ipsa_replay_wsize)
5670 		goto done;
5671 
5672 	/*
5673 	 * If I've hit 0xffffffff, then quite honestly, I don't need to
5674 	 * bother with formalities.  I'm not accepting any more packets
5675 	 * on this SA.
5676 	 */
5677 	if (ipsa->ipsa_replay == SADB_MAX_REPLAY_VALUE) {
5678 		/*
5679 		 * Since we're already holding the lock, update the
5680 		 * expire time ala. sadb_replay_delete() and return.
5681 		 */
5682 		ipsa->ipsa_hardexpiretime = (time_t)1;
5683 		goto done;
5684 	}
5685 
5686 	if (seq <= ipsa->ipsa_replay) {
5687 		/*
5688 		 * This seq is in the replay window.  I'm not below it,
5689 		 * because I already checked for that above!
5690 		 */
5691 		diff = ipsa->ipsa_replay - seq;
5692 		if (ipsa_is_replay_set(ipsa, diff))
5693 			goto done;
5694 	}
5695 	/* Else return B_TRUE, I'm going to advance the window. */
5696 
5697 	rc = B_TRUE;
5698 done:
5699 	mutex_exit(&ipsa->ipsa_lock);
5700 	return (rc);
5701 }
5702 
5703 /*
5704  * Delete a single SA.
5705  *
5706  * For now, use the quick-and-dirty trick of making the association's
5707  * hard-expire lifetime (time_t)1, ensuring deletion by the *_ager().
5708  */
5709 void
5710 sadb_replay_delete(ipsa_t *assoc)
5711 {
5712 	mutex_enter(&assoc->ipsa_lock);
5713 	assoc->ipsa_hardexpiretime = (time_t)1;
5714 	mutex_exit(&assoc->ipsa_lock);
5715 }
5716 
5717 /*
5718  * Given a queue that presumably points to IP, send a T_BIND_REQ for _proto_
5719  * down.  The caller will handle the T_BIND_ACK locally.
5720  */
5721 boolean_t
5722 sadb_t_bind_req(queue_t *q, int proto)
5723 {
5724 	struct T_bind_req *tbr;
5725 	mblk_t *mp;
5726 
5727 	mp = allocb(sizeof (struct T_bind_req) + 1, BPRI_HI);
5728 	if (mp == NULL) {
5729 		/* cmn_err(CE_WARN, */
5730 		/* "sadb_t_bind_req(%d): couldn't allocate mblk\n", proto); */
5731 		return (B_FALSE);
5732 	}
5733 	mp->b_datap->db_type = M_PCPROTO;
5734 	tbr = (struct T_bind_req *)mp->b_rptr;
5735 	mp->b_wptr += sizeof (struct T_bind_req);
5736 	tbr->PRIM_type = T_BIND_REQ;
5737 	tbr->ADDR_length = 0;
5738 	tbr->ADDR_offset = 0;
5739 	tbr->CONIND_number = 0;
5740 	*mp->b_wptr = (uint8_t)proto;
5741 	mp->b_wptr++;
5742 
5743 	putnext(q, mp);
5744 	return (B_TRUE);
5745 }
5746 
5747 /*
5748  * Special front-end to ipsec_rl_strlog() dealing with SA failure.
5749  * this is designed to take only a format string with "* %x * %s *", so
5750  * that "spi" is printed first, then "addr" is converted using inet_pton().
5751  *
5752  * This is abstracted out to save the stack space for only when inet_pton()
5753  * is called.  Make sure "spi" is in network order; it usually is when this
5754  * would get called.
5755  */
5756 void
5757 ipsec_assocfailure(short mid, short sid, char level, ushort_t sl, char *fmt,
5758     uint32_t spi, void *addr, int af, netstack_t *ns)
5759 {
5760 	char buf[INET6_ADDRSTRLEN];
5761 
5762 	ASSERT(af == AF_INET6 || af == AF_INET);
5763 
5764 	ipsec_rl_strlog(ns, mid, sid, level, sl, fmt, ntohl(spi),
5765 	    inet_ntop(af, addr, buf, sizeof (buf)));
5766 }
5767 
5768 /*
5769  * Fills in a reference to the policy, if any, from the conn, in *ppp
5770  * Releases a reference to the passed conn_t.
5771  */
5772 static void
5773 ipsec_conn_pol(ipsec_selector_t *sel, conn_t *connp, ipsec_policy_t **ppp)
5774 {
5775 	ipsec_policy_t	*pp;
5776 	ipsec_latch_t	*ipl = connp->conn_latch;
5777 
5778 	if ((ipl != NULL) && (ipl->ipl_out_policy != NULL)) {
5779 		pp = ipl->ipl_out_policy;
5780 		IPPOL_REFHOLD(pp);
5781 	} else {
5782 		pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, sel,
5783 		    connp->conn_netstack);
5784 	}
5785 	*ppp = pp;
5786 	CONN_DEC_REF(connp);
5787 }
5788 
5789 /*
5790  * The following functions scan through active conn_t structures
5791  * and return a reference to the best-matching policy it can find.
5792  * Caller must release the reference.
5793  */
5794 static void
5795 ipsec_udp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst)
5796 {
5797 	connf_t *connfp;
5798 	conn_t *connp = NULL;
5799 	ipsec_selector_t portonly;
5800 
5801 	bzero((void*)&portonly, sizeof (portonly));
5802 
5803 	if (sel->ips_local_port == 0)
5804 		return;
5805 
5806 	connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(sel->ips_local_port,
5807 	    ipst)];
5808 	mutex_enter(&connfp->connf_lock);
5809 
5810 	if (sel->ips_isv4) {
5811 		connp = connfp->connf_head;
5812 		while (connp != NULL) {
5813 			if (IPCL_UDP_MATCH(connp, sel->ips_local_port,
5814 			    sel->ips_local_addr_v4, sel->ips_remote_port,
5815 			    sel->ips_remote_addr_v4))
5816 				break;
5817 			connp = connp->conn_next;
5818 		}
5819 
5820 		if (connp == NULL) {
5821 			/* Try port-only match in IPv6. */
5822 			portonly.ips_local_port = sel->ips_local_port;
5823 			sel = &portonly;
5824 		}
5825 	}
5826 
5827 	if (connp == NULL) {
5828 		connp = connfp->connf_head;
5829 		while (connp != NULL) {
5830 			if (IPCL_UDP_MATCH_V6(connp, sel->ips_local_port,
5831 			    sel->ips_local_addr_v6, sel->ips_remote_port,
5832 			    sel->ips_remote_addr_v6))
5833 				break;
5834 			connp = connp->conn_next;
5835 		}
5836 
5837 		if (connp == NULL) {
5838 			mutex_exit(&connfp->connf_lock);
5839 			return;
5840 		}
5841 	}
5842 
5843 	CONN_INC_REF(connp);
5844 	mutex_exit(&connfp->connf_lock);
5845 
5846 	ipsec_conn_pol(sel, connp, ppp);
5847 }
5848 
5849 static conn_t *
5850 ipsec_find_listen_conn(uint16_t *pptr, ipsec_selector_t *sel, ip_stack_t *ipst)
5851 {
5852 	connf_t *connfp;
5853 	conn_t *connp = NULL;
5854 	const in6_addr_t *v6addrmatch = &sel->ips_local_addr_v6;
5855 
5856 	if (sel->ips_local_port == 0)
5857 		return (NULL);
5858 
5859 	connfp = &ipst->ips_ipcl_bind_fanout[
5860 	    IPCL_BIND_HASH(sel->ips_local_port, ipst)];
5861 	mutex_enter(&connfp->connf_lock);
5862 
5863 	if (sel->ips_isv4) {
5864 		connp = connfp->connf_head;
5865 		while (connp != NULL) {
5866 			if (IPCL_BIND_MATCH(connp, IPPROTO_TCP,
5867 			    sel->ips_local_addr_v4, pptr[1]))
5868 				break;
5869 			connp = connp->conn_next;
5870 		}
5871 
5872 		if (connp == NULL) {
5873 			/* Match to all-zeroes. */
5874 			v6addrmatch = &ipv6_all_zeros;
5875 		}
5876 	}
5877 
5878 	if (connp == NULL) {
5879 		connp = connfp->connf_head;
5880 		while (connp != NULL) {
5881 			if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP,
5882 			    *v6addrmatch, pptr[1]))
5883 				break;
5884 			connp = connp->conn_next;
5885 		}
5886 
5887 		if (connp == NULL) {
5888 			mutex_exit(&connfp->connf_lock);
5889 			return (NULL);
5890 		}
5891 	}
5892 
5893 	CONN_INC_REF(connp);
5894 	mutex_exit(&connfp->connf_lock);
5895 	return (connp);
5896 }
5897 
5898 static void
5899 ipsec_tcp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst)
5900 {
5901 	connf_t 	*connfp;
5902 	conn_t		*connp;
5903 	uint32_t	ports;
5904 	uint16_t	*pptr = (uint16_t *)&ports;
5905 
5906 	/*
5907 	 * Find TCP state in the following order:
5908 	 * 1.) Connected conns.
5909 	 * 2.) Listeners.
5910 	 *
5911 	 * Even though #2 will be the common case for inbound traffic, only
5912 	 * following this order insures correctness.
5913 	 */
5914 
5915 	if (sel->ips_local_port == 0)
5916 		return;
5917 
5918 	/*
5919 	 * 0 should be fport, 1 should be lport.  SRC is the local one here.
5920 	 * See ipsec_construct_inverse_acquire() for details.
5921 	 */
5922 	pptr[0] = sel->ips_remote_port;
5923 	pptr[1] = sel->ips_local_port;
5924 
5925 	connfp = &ipst->ips_ipcl_conn_fanout[
5926 	    IPCL_CONN_HASH(sel->ips_remote_addr_v4, ports, ipst)];
5927 	mutex_enter(&connfp->connf_lock);
5928 	connp = connfp->connf_head;
5929 
5930 	if (sel->ips_isv4) {
5931 		while (connp != NULL) {
5932 			if (IPCL_CONN_MATCH(connp, IPPROTO_TCP,
5933 			    sel->ips_remote_addr_v4, sel->ips_local_addr_v4,
5934 			    ports))
5935 				break;
5936 			connp = connp->conn_next;
5937 		}
5938 	} else {
5939 		while (connp != NULL) {
5940 			if (IPCL_CONN_MATCH_V6(connp, IPPROTO_TCP,
5941 			    sel->ips_remote_addr_v6, sel->ips_local_addr_v6,
5942 			    ports))
5943 				break;
5944 			connp = connp->conn_next;
5945 		}
5946 	}
5947 
5948 	if (connp != NULL) {
5949 		CONN_INC_REF(connp);
5950 		mutex_exit(&connfp->connf_lock);
5951 	} else {
5952 		mutex_exit(&connfp->connf_lock);
5953 
5954 		/* Try the listen hash. */
5955 		if ((connp = ipsec_find_listen_conn(pptr, sel, ipst)) == NULL)
5956 			return;
5957 	}
5958 
5959 	ipsec_conn_pol(sel, connp, ppp);
5960 }
5961 
5962 static void
5963 ipsec_sctp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
5964     ip_stack_t *ipst)
5965 {
5966 	conn_t		*connp;
5967 	uint32_t	ports;
5968 	uint16_t	*pptr = (uint16_t *)&ports;
5969 
5970 	/*
5971 	 * Find SCP state in the following order:
5972 	 * 1.) Connected conns.
5973 	 * 2.) Listeners.
5974 	 *
5975 	 * Even though #2 will be the common case for inbound traffic, only
5976 	 * following this order insures correctness.
5977 	 */
5978 
5979 	if (sel->ips_local_port == 0)
5980 		return;
5981 
5982 	/*
5983 	 * 0 should be fport, 1 should be lport.  SRC is the local one here.
5984 	 * See ipsec_construct_inverse_acquire() for details.
5985 	 */
5986 	pptr[0] = sel->ips_remote_port;
5987 	pptr[1] = sel->ips_local_port;
5988 
5989 	if (sel->ips_isv4) {
5990 		in6_addr_t	src, dst;
5991 
5992 		IN6_IPADDR_TO_V4MAPPED(sel->ips_remote_addr_v4, &dst);
5993 		IN6_IPADDR_TO_V4MAPPED(sel->ips_local_addr_v4, &src);
5994 		connp = sctp_find_conn(&dst, &src, ports, ALL_ZONES,
5995 		    ipst->ips_netstack->netstack_sctp);
5996 	} else {
5997 		connp = sctp_find_conn(&sel->ips_remote_addr_v6,
5998 		    &sel->ips_local_addr_v6, ports, ALL_ZONES,
5999 		    ipst->ips_netstack->netstack_sctp);
6000 	}
6001 	if (connp == NULL)
6002 		return;
6003 	ipsec_conn_pol(sel, connp, ppp);
6004 }
6005 
6006 /*
6007  * Fill in a query for the SPD (in "sel") using two PF_KEY address extensions.
6008  * Returns 0 or errno, and always sets *diagnostic to something appropriate
6009  * to PF_KEY.
6010  *
6011  * NOTE:  For right now, this function (and ipsec_selector_t for that matter),
6012  * ignore prefix lengths in the address extension.  Since we match on first-
6013  * entered policies, this shouldn't matter.  Also, since we normalize prefix-
6014  * set addresses to mask out the lower bits, we should get a suitable search
6015  * key for the SPD anyway.  This is the function to change if the assumption
6016  * about suitable search keys is wrong.
6017  */
6018 static int
6019 ipsec_get_inverse_acquire_sel(ipsec_selector_t *sel, sadb_address_t *srcext,
6020     sadb_address_t *dstext, int *diagnostic)
6021 {
6022 	struct sockaddr_in *src, *dst;
6023 	struct sockaddr_in6 *src6, *dst6;
6024 
6025 	*diagnostic = 0;
6026 
6027 	bzero(sel, sizeof (*sel));
6028 	sel->ips_protocol = srcext->sadb_address_proto;
6029 	dst = (struct sockaddr_in *)(dstext + 1);
6030 	if (dst->sin_family == AF_INET6) {
6031 		dst6 = (struct sockaddr_in6 *)dst;
6032 		src6 = (struct sockaddr_in6 *)(srcext + 1);
6033 		if (src6->sin6_family != AF_INET6) {
6034 			*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6035 			return (EINVAL);
6036 		}
6037 		sel->ips_remote_addr_v6 = dst6->sin6_addr;
6038 		sel->ips_local_addr_v6 = src6->sin6_addr;
6039 		if (sel->ips_protocol == IPPROTO_ICMPV6) {
6040 			sel->ips_is_icmp_inv_acq = 1;
6041 		} else {
6042 			sel->ips_remote_port = dst6->sin6_port;
6043 			sel->ips_local_port = src6->sin6_port;
6044 		}
6045 		sel->ips_isv4 = B_FALSE;
6046 	} else {
6047 		src = (struct sockaddr_in *)(srcext + 1);
6048 		if (src->sin_family != AF_INET) {
6049 			*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6050 			return (EINVAL);
6051 		}
6052 		sel->ips_remote_addr_v4 = dst->sin_addr.s_addr;
6053 		sel->ips_local_addr_v4 = src->sin_addr.s_addr;
6054 		if (sel->ips_protocol == IPPROTO_ICMP) {
6055 			sel->ips_is_icmp_inv_acq = 1;
6056 		} else {
6057 			sel->ips_remote_port = dst->sin_port;
6058 			sel->ips_local_port = src->sin_port;
6059 		}
6060 		sel->ips_isv4 = B_TRUE;
6061 	}
6062 	return (0);
6063 }
6064 
6065 /*
6066  * We have encapsulation.
6067  * - Lookup tun_t by address and look for an associated
6068  *   tunnel policy
6069  * - If there are inner selectors
6070  *   - check ITPF_P_TUNNEL and ITPF_P_ACTIVE
6071  *   - Look up tunnel policy based on selectors
6072  * - Else
6073  *   - Sanity check the negotation
6074  *   - If appropriate, fall through to global policy
6075  */
6076 static int
6077 ipsec_tun_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6078     sadb_address_t *innsrcext, sadb_address_t *inndstext, ipsec_tun_pol_t *itp,
6079     int *diagnostic, netstack_t *ns)
6080 {
6081 	int err;
6082 	ipsec_policy_head_t *polhead;
6083 
6084 	/* Check for inner selectors and act appropriately */
6085 
6086 	if (innsrcext != NULL) {
6087 		/* Inner selectors present */
6088 		ASSERT(inndstext != NULL);
6089 		if ((itp == NULL) ||
6090 		    (itp->itp_flags & (ITPF_P_ACTIVE | ITPF_P_TUNNEL)) !=
6091 		    (ITPF_P_ACTIVE | ITPF_P_TUNNEL)) {
6092 			/*
6093 			 * If inner packet selectors, we must have negotiate
6094 			 * tunnel and active policy.  If the tunnel has
6095 			 * transport-mode policy set on it, or has no policy,
6096 			 * fail.
6097 			 */
6098 			return (ENOENT);
6099 		} else {
6100 			/*
6101 			 * Reset "sel" to indicate inner selectors.  Pass
6102 			 * inner PF_KEY address extensions for this to happen.
6103 			 */
6104 			err = ipsec_get_inverse_acquire_sel(sel,
6105 			    innsrcext, inndstext, diagnostic);
6106 			if (err != 0) {
6107 				ITP_REFRELE(itp, ns);
6108 				return (err);
6109 			}
6110 			/*
6111 			 * Now look for a tunnel policy based on those inner
6112 			 * selectors.  (Common code is below.)
6113 			 */
6114 		}
6115 	} else {
6116 		/* No inner selectors present */
6117 		if ((itp == NULL) || !(itp->itp_flags & ITPF_P_ACTIVE)) {
6118 			/*
6119 			 * Transport mode negotiation with no tunnel policy
6120 			 * configured - return to indicate a global policy
6121 			 * check is needed.
6122 			 */
6123 			if (itp != NULL) {
6124 				ITP_REFRELE(itp, ns);
6125 			}
6126 			return (0);
6127 		} else if (itp->itp_flags & ITPF_P_TUNNEL) {
6128 			/* Tunnel mode set with no inner selectors. */
6129 			ITP_REFRELE(itp, ns);
6130 			return (ENOENT);
6131 		}
6132 		/*
6133 		 * Else, this is a tunnel policy configured with ifconfig(1m)
6134 		 * or "negotiate transport" with ipsecconf(1m).  We have an
6135 		 * itp with policy set based on any match, so don't bother
6136 		 * changing fields in "sel".
6137 		 */
6138 	}
6139 
6140 	ASSERT(itp != NULL);
6141 	polhead = itp->itp_policy;
6142 	ASSERT(polhead != NULL);
6143 	rw_enter(&polhead->iph_lock, RW_READER);
6144 	*ppp = ipsec_find_policy_head(NULL, polhead,
6145 	    IPSEC_TYPE_INBOUND, sel, ns);
6146 	rw_exit(&polhead->iph_lock);
6147 	ITP_REFRELE(itp, ns);
6148 
6149 	/*
6150 	 * Don't default to global if we didn't find a matching policy entry.
6151 	 * Instead, send ENOENT, just like if we hit a transport-mode tunnel.
6152 	 */
6153 	if (*ppp == NULL)
6154 		return (ENOENT);
6155 
6156 	return (0);
6157 }
6158 
6159 static void
6160 ipsec_oth_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6161     ip_stack_t *ipst)
6162 {
6163 	boolean_t	isv4 = sel->ips_isv4;
6164 	connf_t		*connfp;
6165 	conn_t		*connp;
6166 
6167 	if (isv4) {
6168 		connfp = &ipst->ips_ipcl_proto_fanout[sel->ips_protocol];
6169 	} else {
6170 		connfp = &ipst->ips_ipcl_proto_fanout_v6[sel->ips_protocol];
6171 	}
6172 
6173 	mutex_enter(&connfp->connf_lock);
6174 	for (connp = connfp->connf_head; connp != NULL;
6175 	    connp = connp->conn_next) {
6176 		if (!((isv4 && !((connp->conn_src == 0 ||
6177 		    connp->conn_src == sel->ips_local_addr_v4) &&
6178 		    (connp->conn_rem == 0 ||
6179 		    connp->conn_rem == sel->ips_remote_addr_v4))) ||
6180 		    (!isv4 && !((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) ||
6181 		    IN6_ARE_ADDR_EQUAL(&connp->conn_srcv6,
6182 		    &sel->ips_local_addr_v6)) &&
6183 		    (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6) ||
6184 		    IN6_ARE_ADDR_EQUAL(&connp->conn_remv6,
6185 		    &sel->ips_remote_addr_v6)))))) {
6186 			break;
6187 		}
6188 	}
6189 	if (connp == NULL) {
6190 		mutex_exit(&connfp->connf_lock);
6191 		return;
6192 	}
6193 
6194 	CONN_INC_REF(connp);
6195 	mutex_exit(&connfp->connf_lock);
6196 
6197 	ipsec_conn_pol(sel, connp, ppp);
6198 }
6199 
6200 /*
6201  * Construct an inverse ACQUIRE reply based on:
6202  *
6203  * 1.) Current global policy.
6204  * 2.) An conn_t match depending on what all was passed in the extv[].
6205  * 3.) A tunnel's policy head.
6206  * ...
6207  * N.) Other stuff TBD (e.g. identities)
6208  *
6209  * If there is an error, set sadb_msg_errno and sadb_x_msg_diagnostic
6210  * in this function so the caller can extract them where appropriately.
6211  *
6212  * The SRC address is the local one - just like an outbound ACQUIRE message.
6213  */
6214 mblk_t *
6215 ipsec_construct_inverse_acquire(sadb_msg_t *samsg, sadb_ext_t *extv[],
6216     netstack_t *ns)
6217 {
6218 	int err;
6219 	int diagnostic;
6220 	sadb_address_t *srcext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_SRC],
6221 	    *dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST],
6222 	    *innsrcext = (sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_SRC],
6223 	    *inndstext = (sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_DST];
6224 	struct sockaddr_in6 *src, *dst;
6225 	struct sockaddr_in6 *isrc, *idst;
6226 	ipsec_tun_pol_t *itp = NULL;
6227 	ipsec_policy_t *pp = NULL;
6228 	ipsec_selector_t sel, isel;
6229 	mblk_t *retmp;
6230 	ip_stack_t	*ipst = ns->netstack_ip;
6231 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
6232 
6233 	/* Normalize addresses */
6234 	if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)srcext, 0, ns)
6235 	    == KS_IN_ADDR_UNKNOWN) {
6236 		err = EINVAL;
6237 		diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC;
6238 		goto bail;
6239 	}
6240 	src = (struct sockaddr_in6 *)(srcext + 1);
6241 	if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)dstext, 0, ns)
6242 	    == KS_IN_ADDR_UNKNOWN) {
6243 		err = EINVAL;
6244 		diagnostic = SADB_X_DIAGNOSTIC_BAD_DST;
6245 		goto bail;
6246 	}
6247 	dst = (struct sockaddr_in6 *)(dstext + 1);
6248 	if (src->sin6_family != dst->sin6_family) {
6249 		err = EINVAL;
6250 		diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6251 		goto bail;
6252 	}
6253 
6254 	/* Check for tunnel mode and act appropriately */
6255 	if (innsrcext != NULL) {
6256 		if (inndstext == NULL) {
6257 			err = EINVAL;
6258 			diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST;
6259 			goto bail;
6260 		}
6261 		if (sadb_addrcheck(NULL, (mblk_t *)samsg,
6262 		    (sadb_ext_t *)innsrcext, 0, ns) == KS_IN_ADDR_UNKNOWN) {
6263 			err = EINVAL;
6264 			diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_SRC;
6265 			goto bail;
6266 		}
6267 		isrc = (struct sockaddr_in6 *)(innsrcext + 1);
6268 		if (sadb_addrcheck(NULL, (mblk_t *)samsg,
6269 		    (sadb_ext_t *)inndstext, 0, ns) == KS_IN_ADDR_UNKNOWN) {
6270 			err = EINVAL;
6271 			diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_DST;
6272 			goto bail;
6273 		}
6274 		idst = (struct sockaddr_in6 *)(inndstext + 1);
6275 		if (isrc->sin6_family != idst->sin6_family) {
6276 			err = EINVAL;
6277 			diagnostic = SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH;
6278 			goto bail;
6279 		}
6280 		if (isrc->sin6_family != AF_INET &&
6281 		    isrc->sin6_family != AF_INET6) {
6282 			err = EINVAL;
6283 			diagnostic = SADB_X_DIAGNOSTIC_BAD_INNER_SRC_AF;
6284 			goto bail;
6285 		}
6286 	} else if (inndstext != NULL) {
6287 		err = EINVAL;
6288 		diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
6289 		goto bail;
6290 	}
6291 
6292 	/* Get selectors first, based on outer addresses */
6293 	err = ipsec_get_inverse_acquire_sel(&sel, srcext, dstext, &diagnostic);
6294 	if (err != 0)
6295 		goto bail;
6296 
6297 	/* Check for tunnel mode mismatches. */
6298 	if (innsrcext != NULL &&
6299 	    ((isrc->sin6_family == AF_INET &&
6300 	    sel.ips_protocol != IPPROTO_ENCAP && sel.ips_protocol != 0) ||
6301 	    (isrc->sin6_family == AF_INET6 &&
6302 	    sel.ips_protocol != IPPROTO_IPV6 && sel.ips_protocol != 0))) {
6303 		err = EPROTOTYPE;
6304 		goto bail;
6305 	}
6306 
6307 	/*
6308 	 * Okay, we have the addresses and other selector information.
6309 	 * Let's first find a conn...
6310 	 */
6311 	pp = NULL;
6312 	switch (sel.ips_protocol) {
6313 	case IPPROTO_TCP:
6314 		ipsec_tcp_pol(&sel, &pp, ipst);
6315 		break;
6316 	case IPPROTO_UDP:
6317 		ipsec_udp_pol(&sel, &pp, ipst);
6318 		break;
6319 	case IPPROTO_SCTP:
6320 		ipsec_sctp_pol(&sel, &pp, ipst);
6321 		break;
6322 	case IPPROTO_ENCAP:
6323 	case IPPROTO_IPV6:
6324 		rw_enter(&ipss->ipsec_itp_get_byaddr_rw_lock, RW_READER);
6325 		/*
6326 		 * Assume sel.ips_remote_addr_* has the right address at
6327 		 * that exact position.
6328 		 */
6329 		itp = ipss->ipsec_itp_get_byaddr(
6330 		    (uint32_t *)(&sel.ips_local_addr_v6),
6331 		    (uint32_t *)(&sel.ips_remote_addr_v6),
6332 		    src->sin6_family, ns);
6333 		rw_exit(&ipss->ipsec_itp_get_byaddr_rw_lock);
6334 		if (innsrcext == NULL) {
6335 			/*
6336 			 * Transport-mode tunnel, make sure we fake out isel
6337 			 * to contain something based on the outer protocol.
6338 			 */
6339 			bzero(&isel, sizeof (isel));
6340 			isel.ips_isv4 = (sel.ips_protocol == IPPROTO_ENCAP);
6341 		} /* Else isel is initialized by ipsec_tun_pol(). */
6342 		err = ipsec_tun_pol(&isel, &pp, innsrcext, inndstext, itp,
6343 		    &diagnostic, ns);
6344 		/*
6345 		 * NOTE:  isel isn't used for now, but in RFC 430x IPsec, it
6346 		 * may be.
6347 		 */
6348 		if (err != 0)
6349 			goto bail;
6350 		break;
6351 	default:
6352 		ipsec_oth_pol(&sel, &pp, ipst);
6353 		break;
6354 	}
6355 
6356 	/*
6357 	 * If we didn't find a matching conn_t or other policy head, take a
6358 	 * look in the global policy.
6359 	 */
6360 	if (pp == NULL) {
6361 		pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, NULL, NULL, &sel,
6362 		    ns);
6363 		if (pp == NULL) {
6364 			/* There's no global policy. */
6365 			err = ENOENT;
6366 			diagnostic = 0;
6367 			goto bail;
6368 		}
6369 	}
6370 
6371 	/*
6372 	 * Now that we have a policy entry/widget, construct an ACQUIRE
6373 	 * message based on that, fix fields where appropriate,
6374 	 * and return the message.
6375 	 */
6376 	retmp = sadb_extended_acquire(&sel, pp, NULL,
6377 	    (itp != NULL && (itp->itp_flags & ITPF_P_TUNNEL)),
6378 	    samsg->sadb_msg_seq, samsg->sadb_msg_pid, ns);
6379 	if (pp != NULL) {
6380 		IPPOL_REFRELE(pp, ns);
6381 	}
6382 	if (retmp != NULL) {
6383 		return (retmp);
6384 	} else {
6385 		err = ENOMEM;
6386 		diagnostic = 0;
6387 	}
6388 bail:
6389 	samsg->sadb_msg_errno = (uint8_t)err;
6390 	samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
6391 	return (NULL);
6392 }
6393 
6394 /*
6395  * ipsa_lpkt is a one-element queue, only manipulated by casptr within
6396  * the next two functions.
6397  *
6398  * These functions loop calling casptr() until the swap "happens",
6399  * turning a compare-and-swap op into an atomic swap operation.
6400  */
6401 
6402 /*
6403  * sadb_set_lpkt: Atomically swap in a value to ipsa->ipsa_lpkt and
6404  * freemsg the previous value.  free clue: freemsg(NULL) is safe.
6405  */
6406 
6407 void
6408 sadb_set_lpkt(ipsa_t *ipsa, mblk_t *npkt, netstack_t *ns)
6409 {
6410 	mblk_t *opkt;
6411 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
6412 
6413 	membar_producer();
6414 	do {
6415 		opkt = ipsa->ipsa_lpkt;
6416 	} while (casptr(&ipsa->ipsa_lpkt, opkt, npkt) != opkt);
6417 
6418 	ip_drop_packet(opkt, B_TRUE, NULL, NULL,
6419 	    DROPPER(ipss, ipds_sadb_inlarval_replace),
6420 	    &ipss->ipsec_sadb_dropper);
6421 }
6422 
6423 /*
6424  * sadb_clear_lpkt: Atomically clear ipsa->ipsa_lpkt and return the
6425  * previous value.
6426  */
6427 
6428 mblk_t *
6429 sadb_clear_lpkt(ipsa_t *ipsa)
6430 {
6431 	mblk_t *opkt;
6432 
6433 	do {
6434 		opkt = ipsa->ipsa_lpkt;
6435 	} while (casptr(&ipsa->ipsa_lpkt, opkt, NULL) != opkt);
6436 
6437 	return (opkt);
6438 }
6439 
6440 /*
6441  * Walker callback used by sadb_alg_update() to free/create crypto
6442  * context template when a crypto software provider is removed or
6443  * added.
6444  */
6445 
6446 struct sadb_update_alg_state {
6447 	ipsec_algtype_t alg_type;
6448 	uint8_t alg_id;
6449 	boolean_t is_added;
6450 };
6451 
6452 static void
6453 sadb_alg_update_cb(isaf_t *head, ipsa_t *entry, void *cookie)
6454 {
6455 	struct sadb_update_alg_state *update_state =
6456 	    (struct sadb_update_alg_state *)cookie;
6457 	crypto_ctx_template_t *ctx_tmpl = NULL;
6458 
6459 	ASSERT(MUTEX_HELD(&head->isaf_lock));
6460 
6461 	if (entry->ipsa_state == IPSA_STATE_LARVAL)
6462 		return;
6463 
6464 	mutex_enter(&entry->ipsa_lock);
6465 
6466 	switch (update_state->alg_type) {
6467 	case IPSEC_ALG_AUTH:
6468 		if (entry->ipsa_auth_alg == update_state->alg_id)
6469 			ctx_tmpl = &entry->ipsa_authtmpl;
6470 		break;
6471 	case IPSEC_ALG_ENCR:
6472 		if (entry->ipsa_encr_alg == update_state->alg_id)
6473 			ctx_tmpl = &entry->ipsa_encrtmpl;
6474 		break;
6475 	default:
6476 		ctx_tmpl = NULL;
6477 	}
6478 
6479 	if (ctx_tmpl == NULL) {
6480 		mutex_exit(&entry->ipsa_lock);
6481 		return;
6482 	}
6483 
6484 	/*
6485 	 * The context template of the SA may be affected by the change
6486 	 * of crypto provider.
6487 	 */
6488 	if (update_state->is_added) {
6489 		/* create the context template if not already done */
6490 		if (*ctx_tmpl == NULL) {
6491 			(void) ipsec_create_ctx_tmpl(entry,
6492 			    update_state->alg_type);
6493 		}
6494 	} else {
6495 		/*
6496 		 * The crypto provider was removed. If the context template
6497 		 * exists but it is no longer valid, free it.
6498 		 */
6499 		if (*ctx_tmpl != NULL)
6500 			ipsec_destroy_ctx_tmpl(entry, update_state->alg_type);
6501 	}
6502 
6503 	mutex_exit(&entry->ipsa_lock);
6504 }
6505 
6506 /*
6507  * Invoked by IP when an software crypto provider has been updated.
6508  * The type and id of the corresponding algorithm is passed as argument.
6509  * is_added is B_TRUE if the provider was added, B_FALSE if it was
6510  * removed. The function updates the SADB and free/creates the
6511  * context templates associated with SAs if needed.
6512  */
6513 
6514 #define	SADB_ALG_UPDATE_WALK(sadb, table) \
6515     sadb_walker((sadb).table, (sadb).sdb_hashsize, sadb_alg_update_cb, \
6516 	&update_state)
6517 
6518 void
6519 sadb_alg_update(ipsec_algtype_t alg_type, uint8_t alg_id, boolean_t is_added,
6520     netstack_t *ns)
6521 {
6522 	struct sadb_update_alg_state update_state;
6523 	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
6524 	ipsecesp_stack_t	*espstack = ns->netstack_ipsecesp;
6525 
6526 	update_state.alg_type = alg_type;
6527 	update_state.alg_id = alg_id;
6528 	update_state.is_added = is_added;
6529 
6530 	if (alg_type == IPSEC_ALG_AUTH) {
6531 		/* walk the AH tables only for auth. algorithm changes */
6532 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v4, sdb_of);
6533 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v4, sdb_if);
6534 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v6, sdb_of);
6535 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v6, sdb_if);
6536 	}
6537 
6538 	/* walk the ESP tables */
6539 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v4, sdb_of);
6540 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v4, sdb_if);
6541 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v6, sdb_of);
6542 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v6, sdb_if);
6543 }
6544 
6545 /*
6546  * Creates a context template for the specified SA. This function
6547  * is called when an SA is created and when a context template needs
6548  * to be created due to a change of software provider.
6549  */
6550 int
6551 ipsec_create_ctx_tmpl(ipsa_t *sa, ipsec_algtype_t alg_type)
6552 {
6553 	ipsec_alginfo_t *alg;
6554 	crypto_mechanism_t mech;
6555 	crypto_key_t *key;
6556 	crypto_ctx_template_t *sa_tmpl;
6557 	int rv;
6558 	ipsec_stack_t	*ipss = sa->ipsa_netstack->netstack_ipsec;
6559 
6560 	ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock));
6561 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
6562 
6563 	/* get pointers to the algorithm info, context template, and key */
6564 	switch (alg_type) {
6565 	case IPSEC_ALG_AUTH:
6566 		key = &sa->ipsa_kcfauthkey;
6567 		sa_tmpl = &sa->ipsa_authtmpl;
6568 		alg = ipss->ipsec_alglists[alg_type][sa->ipsa_auth_alg];
6569 		break;
6570 	case IPSEC_ALG_ENCR:
6571 		key = &sa->ipsa_kcfencrkey;
6572 		sa_tmpl = &sa->ipsa_encrtmpl;
6573 		alg = ipss->ipsec_alglists[alg_type][sa->ipsa_encr_alg];
6574 		break;
6575 	default:
6576 		alg = NULL;
6577 	}
6578 
6579 	if (alg == NULL || !ALG_VALID(alg))
6580 		return (EINVAL);
6581 
6582 	/* initialize the mech info structure for the framework */
6583 	ASSERT(alg->alg_mech_type != CRYPTO_MECHANISM_INVALID);
6584 	mech.cm_type = alg->alg_mech_type;
6585 	mech.cm_param = NULL;
6586 	mech.cm_param_len = 0;
6587 
6588 	/* create a new context template */
6589 	rv = crypto_create_ctx_template(&mech, key, sa_tmpl, KM_NOSLEEP);
6590 
6591 	/*
6592 	 * CRYPTO_MECH_NOT_SUPPORTED can be returned if only hardware
6593 	 * providers are available for that mechanism. In that case
6594 	 * we don't fail, and will generate the context template from
6595 	 * the framework callback when a software provider for that
6596 	 * mechanism registers.
6597 	 *
6598 	 * The context template is assigned the special value
6599 	 * IPSEC_CTX_TMPL_ALLOC if the allocation failed due to a
6600 	 * lack of memory. No attempt will be made to use
6601 	 * the context template if it is set to this value.
6602 	 */
6603 	if (rv == CRYPTO_HOST_MEMORY) {
6604 		*sa_tmpl = IPSEC_CTX_TMPL_ALLOC;
6605 	} else if (rv != CRYPTO_SUCCESS) {
6606 		*sa_tmpl = NULL;
6607 		if (rv != CRYPTO_MECH_NOT_SUPPORTED)
6608 			return (EINVAL);
6609 	}
6610 
6611 	return (0);
6612 }
6613 
6614 /*
6615  * Destroy the context template of the specified algorithm type
6616  * of the specified SA. Must be called while holding the SA lock.
6617  */
6618 void
6619 ipsec_destroy_ctx_tmpl(ipsa_t *sa, ipsec_algtype_t alg_type)
6620 {
6621 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
6622 
6623 	if (alg_type == IPSEC_ALG_AUTH) {
6624 		if (sa->ipsa_authtmpl == IPSEC_CTX_TMPL_ALLOC)
6625 			sa->ipsa_authtmpl = NULL;
6626 		else if (sa->ipsa_authtmpl != NULL) {
6627 			crypto_destroy_ctx_template(sa->ipsa_authtmpl);
6628 			sa->ipsa_authtmpl = NULL;
6629 		}
6630 	} else {
6631 		ASSERT(alg_type == IPSEC_ALG_ENCR);
6632 		if (sa->ipsa_encrtmpl == IPSEC_CTX_TMPL_ALLOC)
6633 			sa->ipsa_encrtmpl = NULL;
6634 		else if (sa->ipsa_encrtmpl != NULL) {
6635 			crypto_destroy_ctx_template(sa->ipsa_encrtmpl);
6636 			sa->ipsa_encrtmpl = NULL;
6637 		}
6638 	}
6639 }
6640 
6641 /*
6642  * Use the kernel crypto framework to check the validity of a key received
6643  * via keysock. Returns 0 if the key is OK, -1 otherwise.
6644  */
6645 int
6646 ipsec_check_key(crypto_mech_type_t mech_type, sadb_key_t *sadb_key,
6647     boolean_t is_auth, int *diag)
6648 {
6649 	crypto_mechanism_t mech;
6650 	crypto_key_t crypto_key;
6651 	int crypto_rc;
6652 
6653 	mech.cm_type = mech_type;
6654 	mech.cm_param = NULL;
6655 	mech.cm_param_len = 0;
6656 
6657 	crypto_key.ck_format = CRYPTO_KEY_RAW;
6658 	crypto_key.ck_data = sadb_key + 1;
6659 	crypto_key.ck_length = sadb_key->sadb_key_bits;
6660 
6661 	crypto_rc = crypto_key_check(&mech, &crypto_key);
6662 
6663 	switch (crypto_rc) {
6664 	case CRYPTO_SUCCESS:
6665 		return (0);
6666 	case CRYPTO_MECHANISM_INVALID:
6667 	case CRYPTO_MECH_NOT_SUPPORTED:
6668 		*diag = is_auth ? SADB_X_DIAGNOSTIC_BAD_AALG :
6669 		    SADB_X_DIAGNOSTIC_BAD_EALG;
6670 		break;
6671 	case CRYPTO_KEY_SIZE_RANGE:
6672 		*diag = is_auth ? SADB_X_DIAGNOSTIC_BAD_AKEYBITS :
6673 		    SADB_X_DIAGNOSTIC_BAD_EKEYBITS;
6674 		break;
6675 	case CRYPTO_WEAK_KEY:
6676 		*diag = is_auth ? SADB_X_DIAGNOSTIC_WEAK_AKEY :
6677 		    SADB_X_DIAGNOSTIC_WEAK_EKEY;
6678 		break;
6679 	}
6680 
6681 	return (-1);
6682 }
6683 /*
6684  * If this is an outgoing SA then add some fuzz to the
6685  * SOFT EXPIRE time. The reason for this is to stop
6686  * peers trying to renegotiate SOFT expiring SA's at
6687  * the same time. The amount of fuzz needs to be at
6688  * least 10 seconds which is the typical interval
6689  * sadb_ager(), although this is only a guide as it
6690  * selftunes.
6691  */
6692 void
6693 lifetime_fuzz(ipsa_t *assoc)
6694 {
6695 	uint8_t rnd;
6696 
6697 	if (assoc->ipsa_softaddlt == 0)
6698 		return;
6699 
6700 	(void) random_get_pseudo_bytes(&rnd, sizeof (rnd));
6701 	rnd = (rnd & 0xF) + 10;
6702 	assoc->ipsa_softexpiretime -= rnd;
6703 	assoc->ipsa_softaddlt -= rnd;
6704 }
6705 void
6706 destroy_ipsa_pair(ipsap_t *ipsapp)
6707 {
6708 	if (ipsapp == NULL)
6709 		return;
6710 
6711 	/*
6712 	 * Because of the multi-line macro nature of IPSA_REFRELE, keep
6713 	 * them in { }.
6714 	 */
6715 	if (ipsapp->ipsap_sa_ptr != NULL) {
6716 		IPSA_REFRELE(ipsapp->ipsap_sa_ptr);
6717 	}
6718 	if (ipsapp->ipsap_psa_ptr != NULL) {
6719 		IPSA_REFRELE(ipsapp->ipsap_psa_ptr);
6720 	}
6721 
6722 	kmem_free(ipsapp, sizeof (*ipsapp));
6723 }
6724 
6725 /*
6726  * The sadb_ager() function walks through the hash tables of SA's and ages
6727  * them, if the SA expires as a result, its marked as DEAD and will be reaped
6728  * the next time sadb_ager() runs. SA's which are paired or have a peer (same
6729  * SA appears in both the inbound and outbound tables because its not possible
6730  * to determine its direction) are placed on a list when they expire. This is
6731  * to ensure that pair/peer SA's are reaped at the same time, even if they
6732  * expire at different times.
6733  *
6734  * This function is called twice by sadb_ager(), one after processing the
6735  * inbound table, then again after processing the outbound table.
6736  */
6737 void
6738 age_pair_peer_list(templist_t *haspeerlist, sadb_t *sp, boolean_t outbound)
6739 {
6740 	templist_t *listptr;
6741 	int outhash;
6742 	isaf_t *bucket;
6743 	boolean_t haspeer;
6744 	ipsa_t *peer_assoc, *dying;
6745 	/*
6746 	 * Haspeer cases will contain both IPv4 and IPv6.  This code
6747 	 * is address independent.
6748 	 */
6749 	while (haspeerlist != NULL) {
6750 		/* "dying" contains the SA that has a peer. */
6751 		dying = haspeerlist->ipsa;
6752 		haspeer = (dying->ipsa_haspeer);
6753 		listptr = haspeerlist;
6754 		haspeerlist = listptr->next;
6755 		kmem_free(listptr, sizeof (*listptr));
6756 		/*
6757 		 * Pick peer bucket based on addrfam.
6758 		 */
6759 		if (outbound) {
6760 			if (haspeer)
6761 				bucket = INBOUND_BUCKET(sp, dying->ipsa_spi);
6762 			else
6763 				bucket = INBOUND_BUCKET(sp,
6764 				    dying->ipsa_otherspi);
6765 		} else { /* inbound */
6766 			if (haspeer) {
6767 				if (dying->ipsa_addrfam == AF_INET6) {
6768 					outhash = OUTBOUND_HASH_V6(sp,
6769 					    *((in6_addr_t *)&dying->
6770 					    ipsa_dstaddr));
6771 				} else {
6772 					outhash = OUTBOUND_HASH_V4(sp,
6773 					    *((ipaddr_t *)&dying->
6774 					    ipsa_dstaddr));
6775 				}
6776 			} else if (dying->ipsa_addrfam == AF_INET6) {
6777 				outhash = OUTBOUND_HASH_V6(sp,
6778 				    *((in6_addr_t *)&dying->
6779 				    ipsa_srcaddr));
6780 			} else {
6781 				outhash = OUTBOUND_HASH_V4(sp,
6782 				    *((ipaddr_t *)&dying->
6783 				    ipsa_srcaddr));
6784 			}
6785 		bucket = &(sp->sdb_of[outhash]);
6786 		}
6787 
6788 		mutex_enter(&bucket->isaf_lock);
6789 		/*
6790 		 * "haspeer" SA's have the same src/dst address ordering,
6791 		 * "paired" SA's have the src/dst addresses reversed.
6792 		 */
6793 		if (haspeer) {
6794 			peer_assoc = ipsec_getassocbyspi(bucket,
6795 			    dying->ipsa_spi, dying->ipsa_srcaddr,
6796 			    dying->ipsa_dstaddr, dying->ipsa_addrfam);
6797 		} else {
6798 			peer_assoc = ipsec_getassocbyspi(bucket,
6799 			    dying->ipsa_otherspi, dying->ipsa_dstaddr,
6800 			    dying->ipsa_srcaddr, dying->ipsa_addrfam);
6801 		}
6802 
6803 		mutex_exit(&bucket->isaf_lock);
6804 		if (peer_assoc != NULL) {
6805 			mutex_enter(&peer_assoc->ipsa_lock);
6806 			mutex_enter(&dying->ipsa_lock);
6807 			if (!haspeer) {
6808 				/*
6809 				 * Only SA's which have a "peer" or are
6810 				 * "paired" end up on this list, so this
6811 				 * must be a "paired" SA, update the flags
6812 				 * to break the pair.
6813 				 */
6814 				peer_assoc->ipsa_otherspi = 0;
6815 				peer_assoc->ipsa_flags &= ~IPSA_F_PAIRED;
6816 				dying->ipsa_otherspi = 0;
6817 				dying->ipsa_flags &= ~IPSA_F_PAIRED;
6818 			}
6819 			if (haspeer || outbound) {
6820 				/*
6821 				 * Update the state of the "inbound" SA when
6822 				 * the "outbound" SA has expired. Don't update
6823 				 * the "outbound" SA when the "inbound" SA
6824 				 * SA expires because setting the hard_addtime
6825 				 * below will cause this to happen.
6826 				 */
6827 				peer_assoc->ipsa_state = dying->ipsa_state;
6828 			}
6829 			if (dying->ipsa_state == IPSA_STATE_DEAD)
6830 				peer_assoc->ipsa_hardexpiretime = 1;
6831 
6832 			mutex_exit(&dying->ipsa_lock);
6833 			mutex_exit(&peer_assoc->ipsa_lock);
6834 			IPSA_REFRELE(peer_assoc);
6835 		}
6836 		IPSA_REFRELE(dying);
6837 	}
6838 }
6839