xref: /illumos-gate/usr/src/uts/common/inet/ip/sadb.c (revision b2519362c825a494fb6e93549e2e32a425011563)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  * Copyright (c) 2012 Nexenta Systems, Inc. All rights reserved.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/stream.h>
29 #include <sys/stropts.h>
30 #include <sys/strsubr.h>
31 #include <sys/errno.h>
32 #include <sys/ddi.h>
33 #include <sys/debug.h>
34 #include <sys/cmn_err.h>
35 #include <sys/stream.h>
36 #include <sys/strlog.h>
37 #include <sys/kmem.h>
38 #include <sys/sunddi.h>
39 #include <sys/tihdr.h>
40 #include <sys/atomic.h>
41 #include <sys/socket.h>
42 #include <sys/sysmacros.h>
43 #include <sys/crypto/common.h>
44 #include <sys/crypto/api.h>
45 #include <sys/zone.h>
46 #include <netinet/in.h>
47 #include <net/if.h>
48 #include <net/pfkeyv2.h>
49 #include <net/pfpolicy.h>
50 #include <inet/common.h>
51 #include <netinet/ip6.h>
52 #include <inet/ip.h>
53 #include <inet/ip_ire.h>
54 #include <inet/ip6.h>
55 #include <inet/ipsec_info.h>
56 #include <inet/tcp.h>
57 #include <inet/sadb.h>
58 #include <inet/ipsec_impl.h>
59 #include <inet/ipsecah.h>
60 #include <inet/ipsecesp.h>
61 #include <sys/random.h>
62 #include <sys/dlpi.h>
63 #include <sys/strsun.h>
64 #include <sys/strsubr.h>
65 #include <inet/ip_if.h>
66 #include <inet/ipdrop.h>
67 #include <inet/ipclassifier.h>
68 #include <inet/sctp_ip.h>
69 #include <sys/tsol/tnet.h>
70 
71 /*
72  * This source file contains Security Association Database (SADB) common
73  * routines.  They are linked in with the AH module.  Since AH has no chance
74  * of falling under export control, it was safe to link it in there.
75  */
76 
77 static mblk_t *sadb_extended_acquire(ipsec_selector_t *, ipsec_policy_t *,
78     ipsec_action_t *, boolean_t, uint32_t, uint32_t, sadb_sens_t *,
79     netstack_t *);
80 static ipsa_t *sadb_torch_assoc(isaf_t *, ipsa_t *);
81 static void sadb_destroy_acqlist(iacqf_t **, uint_t, boolean_t,
82 			    netstack_t *);
83 static void sadb_destroy(sadb_t *, netstack_t *);
84 static mblk_t *sadb_sa2msg(ipsa_t *, sadb_msg_t *);
85 static ts_label_t *sadb_label_from_sens(sadb_sens_t *, uint64_t *);
86 static sadb_sens_t *sadb_make_sens_ext(ts_label_t *tsl, int *len);
87 
88 static time_t sadb_add_time(time_t, uint64_t);
89 static void lifetime_fuzz(ipsa_t *);
90 static void age_pair_peer_list(templist_t *, sadb_t *, boolean_t);
91 static int get_ipsa_pair(ipsa_query_t *, ipsap_t *, int *);
92 static void init_ipsa_pair(ipsap_t *);
93 static void destroy_ipsa_pair(ipsap_t *);
94 static int update_pairing(ipsap_t *, ipsa_query_t *, keysock_in_t *, int *);
95 static void ipsa_set_replay(ipsa_t *ipsa, uint32_t offset);
96 
97 /*
98  * ipsacq_maxpackets is defined here to make it tunable
99  * from /etc/system.
100  */
101 extern uint64_t ipsacq_maxpackets;
102 
103 #define	SET_EXPIRE(sa, delta, exp) {				\
104 	if (((sa)->ipsa_ ## delta) != 0) {				\
105 		(sa)->ipsa_ ## exp = sadb_add_time((sa)->ipsa_addtime,	\
106 			(sa)->ipsa_ ## delta);				\
107 	}								\
108 }
109 
110 #define	UPDATE_EXPIRE(sa, delta, exp) {					\
111 	if (((sa)->ipsa_ ## delta) != 0) {				\
112 		time_t tmp = sadb_add_time((sa)->ipsa_usetime,		\
113 			(sa)->ipsa_ ## delta);				\
114 		if (((sa)->ipsa_ ## exp) == 0)				\
115 			(sa)->ipsa_ ## exp = tmp;			\
116 		else							\
117 			(sa)->ipsa_ ## exp = 				\
118 			    MIN((sa)->ipsa_ ## exp, tmp); 		\
119 	}								\
120 }
121 
122 
123 /* wrap the macro so we can pass it as a function pointer */
124 void
125 sadb_sa_refrele(void *target)
126 {
127 	IPSA_REFRELE(((ipsa_t *)target));
128 }
129 
130 /*
131  * We presume that sizeof (long) == sizeof (time_t) and that time_t is
132  * a signed type.
133  */
134 #define	TIME_MAX LONG_MAX
135 
136 /*
137  * PF_KEY gives us lifetimes in uint64_t seconds.  We presume that
138  * time_t is defined to be a signed type with the same range as
139  * "long".  On ILP32 systems, we thus run the risk of wrapping around
140  * at end of time, as well as "overwrapping" the clock back around
141  * into a seemingly valid but incorrect future date earlier than the
142  * desired expiration.
143  *
144  * In order to avoid odd behavior (either negative lifetimes or loss
145  * of high order bits) when someone asks for bizarrely long SA
146  * lifetimes, we do a saturating add for expire times.
147  *
148  * We presume that ILP32 systems will be past end of support life when
149  * the 32-bit time_t overflows (a dangerous assumption, mind you..).
150  *
151  * On LP64, 2^64 seconds are about 5.8e11 years, at which point we
152  * will hopefully have figured out clever ways to avoid the use of
153  * fixed-sized integers in computation.
154  */
155 static time_t
156 sadb_add_time(time_t base, uint64_t delta)
157 {
158 	time_t sum;
159 
160 	/*
161 	 * Clip delta to the maximum possible time_t value to
162 	 * prevent "overwrapping" back into a shorter-than-desired
163 	 * future time.
164 	 */
165 	if (delta > TIME_MAX)
166 		delta = TIME_MAX;
167 	/*
168 	 * This sum may still overflow.
169 	 */
170 	sum = base + delta;
171 
172 	/*
173 	 * .. so if the result is less than the base, we overflowed.
174 	 */
175 	if (sum < base)
176 		sum = TIME_MAX;
177 
178 	return (sum);
179 }
180 
181 /*
182  * Callers of this function have already created a working security
183  * association, and have found the appropriate table & hash chain.  All this
184  * function does is check duplicates, and insert the SA.  The caller needs to
185  * hold the hash bucket lock and increment the refcnt before insertion.
186  *
187  * Return 0 if success, EEXIST if collision.
188  */
189 #define	SA_UNIQUE_MATCH(sa1, sa2) \
190 	(((sa1)->ipsa_unique_id & (sa1)->ipsa_unique_mask) == \
191 	((sa2)->ipsa_unique_id & (sa2)->ipsa_unique_mask))
192 
193 int
194 sadb_insertassoc(ipsa_t *ipsa, isaf_t *bucket)
195 {
196 	ipsa_t **ptpn = NULL;
197 	ipsa_t *walker;
198 	boolean_t unspecsrc;
199 
200 	ASSERT(MUTEX_HELD(&bucket->isaf_lock));
201 
202 	unspecsrc = IPSA_IS_ADDR_UNSPEC(ipsa->ipsa_srcaddr, ipsa->ipsa_addrfam);
203 
204 	walker = bucket->isaf_ipsa;
205 	ASSERT(walker == NULL || ipsa->ipsa_addrfam == walker->ipsa_addrfam);
206 
207 	/*
208 	 * Find insertion point (pointed to with **ptpn).  Insert at the head
209 	 * of the list unless there's an unspecified source address, then
210 	 * insert it after the last SA with a specified source address.
211 	 *
212 	 * BTW, you'll have to walk the whole chain, matching on {DST, SPI}
213 	 * checking for collisions.
214 	 */
215 
216 	while (walker != NULL) {
217 		if (IPSA_ARE_ADDR_EQUAL(walker->ipsa_dstaddr,
218 		    ipsa->ipsa_dstaddr, ipsa->ipsa_addrfam)) {
219 			if (walker->ipsa_spi == ipsa->ipsa_spi)
220 				return (EEXIST);
221 
222 			mutex_enter(&walker->ipsa_lock);
223 			if (ipsa->ipsa_state == IPSA_STATE_MATURE &&
224 			    (walker->ipsa_flags & IPSA_F_USED) &&
225 			    SA_UNIQUE_MATCH(walker, ipsa)) {
226 				walker->ipsa_flags |= IPSA_F_CINVALID;
227 			}
228 			mutex_exit(&walker->ipsa_lock);
229 		}
230 
231 		if (ptpn == NULL && unspecsrc) {
232 			if (IPSA_IS_ADDR_UNSPEC(walker->ipsa_srcaddr,
233 			    walker->ipsa_addrfam))
234 				ptpn = walker->ipsa_ptpn;
235 			else if (walker->ipsa_next == NULL)
236 				ptpn = &walker->ipsa_next;
237 		}
238 
239 		walker = walker->ipsa_next;
240 	}
241 
242 	if (ptpn == NULL)
243 		ptpn = &bucket->isaf_ipsa;
244 	ipsa->ipsa_next = *ptpn;
245 	ipsa->ipsa_ptpn = ptpn;
246 	if (ipsa->ipsa_next != NULL)
247 		ipsa->ipsa_next->ipsa_ptpn = &ipsa->ipsa_next;
248 	*ptpn = ipsa;
249 	ipsa->ipsa_linklock = &bucket->isaf_lock;
250 
251 	return (0);
252 }
253 #undef SA_UNIQUE_MATCH
254 
255 /*
256  * Free a security association.  Its reference count is 0, which means
257  * I must free it.  The SA must be unlocked and must not be linked into
258  * any fanout list.
259  */
260 static void
261 sadb_freeassoc(ipsa_t *ipsa)
262 {
263 	ipsec_stack_t	*ipss = ipsa->ipsa_netstack->netstack_ipsec;
264 	mblk_t		*asyncmp, *mp;
265 
266 	ASSERT(ipss != NULL);
267 	ASSERT(MUTEX_NOT_HELD(&ipsa->ipsa_lock));
268 	ASSERT(ipsa->ipsa_refcnt == 0);
269 	ASSERT(ipsa->ipsa_next == NULL);
270 	ASSERT(ipsa->ipsa_ptpn == NULL);
271 
272 
273 	asyncmp = sadb_clear_lpkt(ipsa);
274 	if (asyncmp != NULL) {
275 		mp = ip_recv_attr_free_mblk(asyncmp);
276 		ip_drop_packet(mp, B_TRUE, NULL,
277 		    DROPPER(ipss, ipds_sadb_inlarval_timeout),
278 		    &ipss->ipsec_sadb_dropper);
279 	}
280 	mutex_enter(&ipsa->ipsa_lock);
281 
282 	if (ipsa->ipsa_tsl != NULL) {
283 		label_rele(ipsa->ipsa_tsl);
284 		ipsa->ipsa_tsl = NULL;
285 	}
286 
287 	if (ipsa->ipsa_otsl != NULL) {
288 		label_rele(ipsa->ipsa_otsl);
289 		ipsa->ipsa_otsl = NULL;
290 	}
291 
292 	ipsec_destroy_ctx_tmpl(ipsa, IPSEC_ALG_AUTH);
293 	ipsec_destroy_ctx_tmpl(ipsa, IPSEC_ALG_ENCR);
294 	mutex_exit(&ipsa->ipsa_lock);
295 
296 	/* bzero() these fields for paranoia's sake. */
297 	if (ipsa->ipsa_authkey != NULL) {
298 		bzero(ipsa->ipsa_authkey, ipsa->ipsa_authkeylen);
299 		kmem_free(ipsa->ipsa_authkey, ipsa->ipsa_authkeylen);
300 	}
301 	if (ipsa->ipsa_encrkey != NULL) {
302 		bzero(ipsa->ipsa_encrkey, ipsa->ipsa_encrkeylen);
303 		kmem_free(ipsa->ipsa_encrkey, ipsa->ipsa_encrkeylen);
304 	}
305 	if (ipsa->ipsa_nonce_buf != NULL) {
306 		bzero(ipsa->ipsa_nonce_buf, sizeof (ipsec_nonce_t));
307 		kmem_free(ipsa->ipsa_nonce_buf, sizeof (ipsec_nonce_t));
308 	}
309 	if (ipsa->ipsa_src_cid != NULL) {
310 		IPSID_REFRELE(ipsa->ipsa_src_cid);
311 	}
312 	if (ipsa->ipsa_dst_cid != NULL) {
313 		IPSID_REFRELE(ipsa->ipsa_dst_cid);
314 	}
315 	if (ipsa->ipsa_emech.cm_param != NULL)
316 		kmem_free(ipsa->ipsa_emech.cm_param,
317 		    ipsa->ipsa_emech.cm_param_len);
318 
319 	mutex_destroy(&ipsa->ipsa_lock);
320 	kmem_free(ipsa, sizeof (*ipsa));
321 }
322 
323 /*
324  * Unlink a security association from a hash bucket.  Assume the hash bucket
325  * lock is held, but the association's lock is not.
326  *
327  * Note that we do not bump the bucket's generation number here because
328  * we might not be making a visible change to the set of visible SA's.
329  * All callers MUST bump the bucket's generation number before they unlock
330  * the bucket if they use sadb_unlinkassoc to permanetly remove an SA which
331  * was present in the bucket at the time it was locked.
332  */
333 void
334 sadb_unlinkassoc(ipsa_t *ipsa)
335 {
336 	ASSERT(ipsa->ipsa_linklock != NULL);
337 	ASSERT(MUTEX_HELD(ipsa->ipsa_linklock));
338 
339 	/* These fields are protected by the link lock. */
340 	*(ipsa->ipsa_ptpn) = ipsa->ipsa_next;
341 	if (ipsa->ipsa_next != NULL) {
342 		ipsa->ipsa_next->ipsa_ptpn = ipsa->ipsa_ptpn;
343 		ipsa->ipsa_next = NULL;
344 	}
345 
346 	ipsa->ipsa_ptpn = NULL;
347 
348 	/* This may destroy the SA. */
349 	IPSA_REFRELE(ipsa);
350 }
351 
352 void
353 sadb_delete_cluster(ipsa_t *assoc)
354 {
355 	uint8_t protocol;
356 
357 	if (cl_inet_deletespi &&
358 	    ((assoc->ipsa_state == IPSA_STATE_LARVAL) ||
359 	    (assoc->ipsa_state == IPSA_STATE_MATURE))) {
360 		protocol = (assoc->ipsa_type == SADB_SATYPE_AH) ?
361 		    IPPROTO_AH : IPPROTO_ESP;
362 		cl_inet_deletespi(assoc->ipsa_netstack->netstack_stackid,
363 		    protocol, assoc->ipsa_spi, NULL);
364 	}
365 }
366 
367 /*
368  * Create a larval security association with the specified SPI.	 All other
369  * fields are zeroed.
370  */
371 static ipsa_t *
372 sadb_makelarvalassoc(uint32_t spi, uint32_t *src, uint32_t *dst, int addrfam,
373     netstack_t *ns)
374 {
375 	ipsa_t *newbie;
376 
377 	/*
378 	 * Allocate...
379 	 */
380 
381 	newbie = (ipsa_t *)kmem_zalloc(sizeof (ipsa_t), KM_NOSLEEP);
382 	if (newbie == NULL) {
383 		/* Can't make new larval SA. */
384 		return (NULL);
385 	}
386 
387 	/* Assigned requested SPI, assume caller does SPI allocation magic. */
388 	newbie->ipsa_spi = spi;
389 	newbie->ipsa_netstack = ns;	/* No netstack_hold */
390 
391 	/*
392 	 * Copy addresses...
393 	 */
394 
395 	IPSA_COPY_ADDR(newbie->ipsa_srcaddr, src, addrfam);
396 	IPSA_COPY_ADDR(newbie->ipsa_dstaddr, dst, addrfam);
397 
398 	newbie->ipsa_addrfam = addrfam;
399 
400 	/*
401 	 * Set common initialization values, including refcnt.
402 	 */
403 	mutex_init(&newbie->ipsa_lock, NULL, MUTEX_DEFAULT, NULL);
404 	newbie->ipsa_state = IPSA_STATE_LARVAL;
405 	newbie->ipsa_refcnt = 1;
406 	newbie->ipsa_freefunc = sadb_freeassoc;
407 
408 	/*
409 	 * There aren't a lot of other common initialization values, as
410 	 * they are copied in from the PF_KEY message.
411 	 */
412 
413 	return (newbie);
414 }
415 
416 /*
417  * Call me to initialize a security association fanout.
418  */
419 static int
420 sadb_init_fanout(isaf_t **tablep, uint_t size, int kmflag)
421 {
422 	isaf_t *table;
423 	int i;
424 
425 	table = (isaf_t *)kmem_alloc(size * sizeof (*table), kmflag);
426 	*tablep = table;
427 
428 	if (table == NULL)
429 		return (ENOMEM);
430 
431 	for (i = 0; i < size; i++) {
432 		mutex_init(&(table[i].isaf_lock), NULL, MUTEX_DEFAULT, NULL);
433 		table[i].isaf_ipsa = NULL;
434 		table[i].isaf_gen = 0;
435 	}
436 
437 	return (0);
438 }
439 
440 /*
441  * Call me to initialize an acquire fanout
442  */
443 static int
444 sadb_init_acfanout(iacqf_t **tablep, uint_t size, int kmflag)
445 {
446 	iacqf_t *table;
447 	int i;
448 
449 	table = (iacqf_t *)kmem_alloc(size * sizeof (*table), kmflag);
450 	*tablep = table;
451 
452 	if (table == NULL)
453 		return (ENOMEM);
454 
455 	for (i = 0; i < size; i++) {
456 		mutex_init(&(table[i].iacqf_lock), NULL, MUTEX_DEFAULT, NULL);
457 		table[i].iacqf_ipsacq = NULL;
458 	}
459 
460 	return (0);
461 }
462 
463 /*
464  * Attempt to initialize an SADB instance.  On failure, return ENOMEM;
465  * caller must clean up partial allocations.
466  */
467 static int
468 sadb_init_trial(sadb_t *sp, uint_t size, int kmflag)
469 {
470 	ASSERT(sp->sdb_of == NULL);
471 	ASSERT(sp->sdb_if == NULL);
472 	ASSERT(sp->sdb_acq == NULL);
473 
474 	sp->sdb_hashsize = size;
475 	if (sadb_init_fanout(&sp->sdb_of, size, kmflag) != 0)
476 		return (ENOMEM);
477 	if (sadb_init_fanout(&sp->sdb_if, size, kmflag) != 0)
478 		return (ENOMEM);
479 	if (sadb_init_acfanout(&sp->sdb_acq, size, kmflag) != 0)
480 		return (ENOMEM);
481 
482 	return (0);
483 }
484 
485 /*
486  * Call me to initialize an SADB instance; fall back to default size on failure.
487  */
488 static void
489 sadb_init(const char *name, sadb_t *sp, uint_t size, uint_t ver,
490     netstack_t *ns)
491 {
492 	ASSERT(sp->sdb_of == NULL);
493 	ASSERT(sp->sdb_if == NULL);
494 	ASSERT(sp->sdb_acq == NULL);
495 
496 	if (size < IPSEC_DEFAULT_HASH_SIZE)
497 		size = IPSEC_DEFAULT_HASH_SIZE;
498 
499 	if (sadb_init_trial(sp, size, KM_NOSLEEP) != 0) {
500 
501 		cmn_err(CE_WARN,
502 		    "Unable to allocate %u entry IPv%u %s SADB hash table",
503 		    size, ver, name);
504 
505 		sadb_destroy(sp, ns);
506 		size = IPSEC_DEFAULT_HASH_SIZE;
507 		cmn_err(CE_WARN, "Falling back to %d entries", size);
508 		(void) sadb_init_trial(sp, size, KM_SLEEP);
509 	}
510 }
511 
512 
513 /*
514  * Initialize an SADB-pair.
515  */
516 void
517 sadbp_init(const char *name, sadbp_t *sp, int type, int size, netstack_t *ns)
518 {
519 	sadb_init(name, &sp->s_v4, size, 4, ns);
520 	sadb_init(name, &sp->s_v6, size, 6, ns);
521 
522 	sp->s_satype = type;
523 
524 	ASSERT((type == SADB_SATYPE_AH) || (type == SADB_SATYPE_ESP));
525 	if (type == SADB_SATYPE_AH) {
526 		ipsec_stack_t	*ipss = ns->netstack_ipsec;
527 
528 		ip_drop_register(&ipss->ipsec_sadb_dropper, "IPsec SADB");
529 		sp->s_addflags = AH_ADD_SETTABLE_FLAGS;
530 		sp->s_updateflags = AH_UPDATE_SETTABLE_FLAGS;
531 	} else {
532 		sp->s_addflags = ESP_ADD_SETTABLE_FLAGS;
533 		sp->s_updateflags = ESP_UPDATE_SETTABLE_FLAGS;
534 	}
535 }
536 
537 /*
538  * Deliver a single SADB_DUMP message representing a single SA.  This is
539  * called many times by sadb_dump().
540  *
541  * If the return value of this is ENOBUFS (not the same as ENOMEM), then
542  * the caller should take that as a hint that dupb() on the "original answer"
543  * failed, and that perhaps the caller should try again with a copyb()ed
544  * "original answer".
545  */
546 static int
547 sadb_dump_deliver(queue_t *pfkey_q, mblk_t *original_answer, ipsa_t *ipsa,
548     sadb_msg_t *samsg)
549 {
550 	mblk_t *answer;
551 
552 	answer = dupb(original_answer);
553 	if (answer == NULL)
554 		return (ENOBUFS);
555 	answer->b_cont = sadb_sa2msg(ipsa, samsg);
556 	if (answer->b_cont == NULL) {
557 		freeb(answer);
558 		return (ENOMEM);
559 	}
560 
561 	/* Just do a putnext, and let keysock deal with flow control. */
562 	putnext(pfkey_q, answer);
563 	return (0);
564 }
565 
566 /*
567  * Common function to allocate and prepare a keysock_out_t M_CTL message.
568  */
569 mblk_t *
570 sadb_keysock_out(minor_t serial)
571 {
572 	mblk_t *mp;
573 	keysock_out_t *kso;
574 
575 	mp = allocb(sizeof (ipsec_info_t), BPRI_HI);
576 	if (mp != NULL) {
577 		mp->b_datap->db_type = M_CTL;
578 		mp->b_wptr += sizeof (ipsec_info_t);
579 		kso = (keysock_out_t *)mp->b_rptr;
580 		kso->ks_out_type = KEYSOCK_OUT;
581 		kso->ks_out_len = sizeof (*kso);
582 		kso->ks_out_serial = serial;
583 	}
584 
585 	return (mp);
586 }
587 
588 /*
589  * Perform an SADB_DUMP, spewing out every SA in an array of SA fanouts
590  * to keysock.
591  */
592 static int
593 sadb_dump_fanout(queue_t *pfkey_q, mblk_t *mp, minor_t serial, isaf_t *fanout,
594     int num_entries, boolean_t do_peers, time_t active_time)
595 {
596 	int i, error = 0;
597 	mblk_t *original_answer;
598 	ipsa_t *walker;
599 	sadb_msg_t *samsg;
600 	time_t	current;
601 
602 	/*
603 	 * For each IPSA hash bucket do:
604 	 *	- Hold the mutex
605 	 *	- Walk each entry, doing an sadb_dump_deliver() on it.
606 	 */
607 	ASSERT(mp->b_cont != NULL);
608 	samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
609 
610 	original_answer = sadb_keysock_out(serial);
611 	if (original_answer == NULL)
612 		return (ENOMEM);
613 
614 	current = gethrestime_sec();
615 	for (i = 0; i < num_entries; i++) {
616 		mutex_enter(&fanout[i].isaf_lock);
617 		for (walker = fanout[i].isaf_ipsa; walker != NULL;
618 		    walker = walker->ipsa_next) {
619 			if (!do_peers && walker->ipsa_haspeer)
620 				continue;
621 			if ((active_time != 0) &&
622 			    ((current - walker->ipsa_lastuse) > active_time))
623 				continue;
624 			error = sadb_dump_deliver(pfkey_q, original_answer,
625 			    walker, samsg);
626 			if (error == ENOBUFS) {
627 				mblk_t *new_original_answer;
628 
629 				/* Ran out of dupb's.  Try a copyb. */
630 				new_original_answer = copyb(original_answer);
631 				if (new_original_answer == NULL) {
632 					error = ENOMEM;
633 				} else {
634 					freeb(original_answer);
635 					original_answer = new_original_answer;
636 					error = sadb_dump_deliver(pfkey_q,
637 					    original_answer, walker, samsg);
638 				}
639 			}
640 			if (error != 0)
641 				break;	/* out of for loop. */
642 		}
643 		mutex_exit(&fanout[i].isaf_lock);
644 		if (error != 0)
645 			break;	/* out of for loop. */
646 	}
647 
648 	freeb(original_answer);
649 	return (error);
650 }
651 
652 /*
653  * Dump an entire SADB; outbound first, then inbound.
654  */
655 
656 int
657 sadb_dump(queue_t *pfkey_q, mblk_t *mp, keysock_in_t *ksi, sadb_t *sp)
658 {
659 	int error;
660 	time_t	active_time = 0;
661 	sadb_x_edump_t	*edump =
662 	    (sadb_x_edump_t *)ksi->ks_in_extv[SADB_X_EXT_EDUMP];
663 
664 	if (edump != NULL) {
665 		active_time = edump->sadb_x_edump_timeout;
666 	}
667 
668 	/* Dump outbound */
669 	error = sadb_dump_fanout(pfkey_q, mp, ksi->ks_in_serial, sp->sdb_of,
670 	    sp->sdb_hashsize, B_TRUE, active_time);
671 	if (error)
672 		return (error);
673 
674 	/* Dump inbound */
675 	return sadb_dump_fanout(pfkey_q, mp, ksi->ks_in_serial, sp->sdb_if,
676 	    sp->sdb_hashsize, B_FALSE, active_time);
677 }
678 
679 /*
680  * Generic sadb table walker.
681  *
682  * Call "walkfn" for each SA in each bucket in "table"; pass the
683  * bucket, the entry and "cookie" to the callback function.
684  * Take care to ensure that walkfn can delete the SA without screwing
685  * up our traverse.
686  *
687  * The bucket is locked for the duration of the callback, both so that the
688  * callback can just call sadb_unlinkassoc() when it wants to delete something,
689  * and so that no new entries are added while we're walking the list.
690  */
691 static void
692 sadb_walker(isaf_t *table, uint_t numentries,
693     void (*walkfn)(isaf_t *head, ipsa_t *entry, void *cookie),
694     void *cookie)
695 {
696 	int i;
697 	for (i = 0; i < numentries; i++) {
698 		ipsa_t *entry, *next;
699 
700 		mutex_enter(&table[i].isaf_lock);
701 
702 		for (entry = table[i].isaf_ipsa; entry != NULL;
703 		    entry = next) {
704 			next = entry->ipsa_next;
705 			(*walkfn)(&table[i], entry, cookie);
706 		}
707 		mutex_exit(&table[i].isaf_lock);
708 	}
709 }
710 
711 /*
712  * Call me to free up a security association fanout.  Use the forever
713  * variable to indicate freeing up the SAs (forever == B_FALSE, e.g.
714  * an SADB_FLUSH message), or destroying everything (forever == B_TRUE,
715  * when a module is unloaded).
716  */
717 static void
718 sadb_destroyer(isaf_t **tablep, uint_t numentries, boolean_t forever,
719     boolean_t inbound)
720 {
721 	int i;
722 	isaf_t *table = *tablep;
723 	uint8_t protocol;
724 	ipsa_t *sa;
725 	netstackid_t sid;
726 
727 	if (table == NULL)
728 		return;
729 
730 	for (i = 0; i < numentries; i++) {
731 		mutex_enter(&table[i].isaf_lock);
732 		while ((sa = table[i].isaf_ipsa) != NULL) {
733 			if (inbound && cl_inet_deletespi &&
734 			    (sa->ipsa_state != IPSA_STATE_ACTIVE_ELSEWHERE) &&
735 			    (sa->ipsa_state != IPSA_STATE_IDLE)) {
736 				protocol = (sa->ipsa_type == SADB_SATYPE_AH) ?
737 				    IPPROTO_AH : IPPROTO_ESP;
738 				sid = sa->ipsa_netstack->netstack_stackid;
739 				cl_inet_deletespi(sid, protocol, sa->ipsa_spi,
740 				    NULL);
741 			}
742 			sadb_unlinkassoc(sa);
743 		}
744 		table[i].isaf_gen++;
745 		mutex_exit(&table[i].isaf_lock);
746 		if (forever)
747 			mutex_destroy(&(table[i].isaf_lock));
748 	}
749 
750 	if (forever) {
751 		*tablep = NULL;
752 		kmem_free(table, numentries * sizeof (*table));
753 	}
754 }
755 
756 /*
757  * Entry points to sadb_destroyer().
758  */
759 static void
760 sadb_flush(sadb_t *sp, netstack_t *ns)
761 {
762 	/*
763 	 * Flush out each bucket, one at a time.  Were it not for keysock's
764 	 * enforcement, there would be a subtlety where I could add on the
765 	 * heels of a flush.  With keysock's enforcement, however, this
766 	 * makes ESP's job easy.
767 	 */
768 	sadb_destroyer(&sp->sdb_of, sp->sdb_hashsize, B_FALSE, B_FALSE);
769 	sadb_destroyer(&sp->sdb_if, sp->sdb_hashsize, B_FALSE, B_TRUE);
770 
771 	/* For each acquire, destroy it; leave the bucket mutex alone. */
772 	sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_FALSE, ns);
773 }
774 
775 static void
776 sadb_destroy(sadb_t *sp, netstack_t *ns)
777 {
778 	sadb_destroyer(&sp->sdb_of, sp->sdb_hashsize, B_TRUE, B_FALSE);
779 	sadb_destroyer(&sp->sdb_if, sp->sdb_hashsize, B_TRUE, B_TRUE);
780 
781 	/* For each acquire, destroy it, including the bucket mutex. */
782 	sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_TRUE, ns);
783 
784 	ASSERT(sp->sdb_of == NULL);
785 	ASSERT(sp->sdb_if == NULL);
786 	ASSERT(sp->sdb_acq == NULL);
787 }
788 
789 void
790 sadbp_flush(sadbp_t *spp, netstack_t *ns)
791 {
792 	sadb_flush(&spp->s_v4, ns);
793 	sadb_flush(&spp->s_v6, ns);
794 }
795 
796 void
797 sadbp_destroy(sadbp_t *spp, netstack_t *ns)
798 {
799 	sadb_destroy(&spp->s_v4, ns);
800 	sadb_destroy(&spp->s_v6, ns);
801 
802 	if (spp->s_satype == SADB_SATYPE_AH) {
803 		ipsec_stack_t	*ipss = ns->netstack_ipsec;
804 
805 		ip_drop_unregister(&ipss->ipsec_sadb_dropper);
806 	}
807 }
808 
809 
810 /*
811  * Check hard vs. soft lifetimes.  If there's a reality mismatch (e.g.
812  * soft lifetimes > hard lifetimes) return an appropriate diagnostic for
813  * EINVAL.
814  */
815 int
816 sadb_hardsoftchk(sadb_lifetime_t *hard, sadb_lifetime_t *soft,
817     sadb_lifetime_t *idle)
818 {
819 	if (hard == NULL || soft == NULL)
820 		return (0);
821 
822 	if (hard->sadb_lifetime_allocations != 0 &&
823 	    soft->sadb_lifetime_allocations != 0 &&
824 	    hard->sadb_lifetime_allocations < soft->sadb_lifetime_allocations)
825 		return (SADB_X_DIAGNOSTIC_ALLOC_HSERR);
826 
827 	if (hard->sadb_lifetime_bytes != 0 &&
828 	    soft->sadb_lifetime_bytes != 0 &&
829 	    hard->sadb_lifetime_bytes < soft->sadb_lifetime_bytes)
830 		return (SADB_X_DIAGNOSTIC_BYTES_HSERR);
831 
832 	if (hard->sadb_lifetime_addtime != 0 &&
833 	    soft->sadb_lifetime_addtime != 0 &&
834 	    hard->sadb_lifetime_addtime < soft->sadb_lifetime_addtime)
835 		return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
836 
837 	if (hard->sadb_lifetime_usetime != 0 &&
838 	    soft->sadb_lifetime_usetime != 0 &&
839 	    hard->sadb_lifetime_usetime < soft->sadb_lifetime_usetime)
840 		return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
841 
842 	if (idle != NULL) {
843 		if (hard->sadb_lifetime_addtime != 0 &&
844 		    idle->sadb_lifetime_addtime != 0 &&
845 		    hard->sadb_lifetime_addtime < idle->sadb_lifetime_addtime)
846 			return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
847 
848 		if (soft->sadb_lifetime_addtime != 0 &&
849 		    idle->sadb_lifetime_addtime != 0 &&
850 		    soft->sadb_lifetime_addtime < idle->sadb_lifetime_addtime)
851 			return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
852 
853 		if (hard->sadb_lifetime_usetime != 0 &&
854 		    idle->sadb_lifetime_usetime != 0 &&
855 		    hard->sadb_lifetime_usetime < idle->sadb_lifetime_usetime)
856 			return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
857 
858 		if (soft->sadb_lifetime_usetime != 0 &&
859 		    idle->sadb_lifetime_usetime != 0 &&
860 		    soft->sadb_lifetime_usetime < idle->sadb_lifetime_usetime)
861 			return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
862 	}
863 
864 	return (0);
865 }
866 
867 /*
868  * Sanity check sensitivity labels.
869  *
870  * For now, just reject labels on unlabeled systems.
871  */
872 int
873 sadb_labelchk(keysock_in_t *ksi)
874 {
875 	if (!is_system_labeled()) {
876 		if (ksi->ks_in_extv[SADB_EXT_SENSITIVITY] != NULL)
877 			return (SADB_X_DIAGNOSTIC_BAD_LABEL);
878 
879 		if (ksi->ks_in_extv[SADB_X_EXT_OUTER_SENS] != NULL)
880 			return (SADB_X_DIAGNOSTIC_BAD_LABEL);
881 	}
882 
883 	return (0);
884 }
885 
886 /*
887  * Clone a security association for the purposes of inserting a single SA
888  * into inbound and outbound tables respectively. This function should only
889  * be called from sadb_common_add().
890  */
891 static ipsa_t *
892 sadb_cloneassoc(ipsa_t *ipsa)
893 {
894 	ipsa_t *newbie;
895 	boolean_t error = B_FALSE;
896 
897 	ASSERT(MUTEX_NOT_HELD(&(ipsa->ipsa_lock)));
898 
899 	newbie = kmem_alloc(sizeof (ipsa_t), KM_NOSLEEP);
900 	if (newbie == NULL)
901 		return (NULL);
902 
903 	/* Copy over what we can. */
904 	*newbie = *ipsa;
905 
906 	/* bzero and initialize locks, in case *_init() allocates... */
907 	mutex_init(&newbie->ipsa_lock, NULL, MUTEX_DEFAULT, NULL);
908 
909 	if (newbie->ipsa_tsl != NULL)
910 		label_hold(newbie->ipsa_tsl);
911 
912 	if (newbie->ipsa_otsl != NULL)
913 		label_hold(newbie->ipsa_otsl);
914 
915 	/*
916 	 * While somewhat dain-bramaged, the most graceful way to
917 	 * recover from errors is to keep plowing through the
918 	 * allocations, and getting what I can.  It's easier to call
919 	 * sadb_freeassoc() on the stillborn clone when all the
920 	 * pointers aren't pointing to the parent's data.
921 	 */
922 
923 	if (ipsa->ipsa_authkey != NULL) {
924 		newbie->ipsa_authkey = kmem_alloc(newbie->ipsa_authkeylen,
925 		    KM_NOSLEEP);
926 		if (newbie->ipsa_authkey == NULL) {
927 			error = B_TRUE;
928 		} else {
929 			bcopy(ipsa->ipsa_authkey, newbie->ipsa_authkey,
930 			    newbie->ipsa_authkeylen);
931 
932 			newbie->ipsa_kcfauthkey.ck_data =
933 			    newbie->ipsa_authkey;
934 		}
935 
936 		if (newbie->ipsa_amech.cm_param != NULL) {
937 			newbie->ipsa_amech.cm_param =
938 			    (char *)&newbie->ipsa_mac_len;
939 		}
940 	}
941 
942 	if (ipsa->ipsa_encrkey != NULL) {
943 		newbie->ipsa_encrkey = kmem_alloc(newbie->ipsa_encrkeylen,
944 		    KM_NOSLEEP);
945 		if (newbie->ipsa_encrkey == NULL) {
946 			error = B_TRUE;
947 		} else {
948 			bcopy(ipsa->ipsa_encrkey, newbie->ipsa_encrkey,
949 			    newbie->ipsa_encrkeylen);
950 
951 			newbie->ipsa_kcfencrkey.ck_data =
952 			    newbie->ipsa_encrkey;
953 		}
954 	}
955 
956 	newbie->ipsa_authtmpl = NULL;
957 	newbie->ipsa_encrtmpl = NULL;
958 	newbie->ipsa_haspeer = B_TRUE;
959 
960 	if (ipsa->ipsa_src_cid != NULL) {
961 		newbie->ipsa_src_cid = ipsa->ipsa_src_cid;
962 		IPSID_REFHOLD(ipsa->ipsa_src_cid);
963 	}
964 
965 	if (ipsa->ipsa_dst_cid != NULL) {
966 		newbie->ipsa_dst_cid = ipsa->ipsa_dst_cid;
967 		IPSID_REFHOLD(ipsa->ipsa_dst_cid);
968 	}
969 
970 	if (error) {
971 		sadb_freeassoc(newbie);
972 		return (NULL);
973 	}
974 
975 	return (newbie);
976 }
977 
978 /*
979  * Initialize a SADB address extension at the address specified by addrext.
980  * Return a pointer to the end of the new address extension.
981  */
982 static uint8_t *
983 sadb_make_addr_ext(uint8_t *start, uint8_t *end, uint16_t exttype,
984     sa_family_t af, uint32_t *addr, uint16_t port, uint8_t proto, int prefix)
985 {
986 	struct sockaddr_in *sin;
987 	struct sockaddr_in6 *sin6;
988 	uint8_t *cur = start;
989 	int addrext_len;
990 	int sin_len;
991 	sadb_address_t *addrext	= (sadb_address_t *)cur;
992 
993 	if (cur == NULL)
994 		return (NULL);
995 
996 	cur += sizeof (*addrext);
997 	if (cur > end)
998 		return (NULL);
999 
1000 	addrext->sadb_address_proto = proto;
1001 	addrext->sadb_address_prefixlen = prefix;
1002 	addrext->sadb_address_reserved = 0;
1003 	addrext->sadb_address_exttype = exttype;
1004 
1005 	switch (af) {
1006 	case AF_INET:
1007 		sin = (struct sockaddr_in *)cur;
1008 		sin_len = sizeof (*sin);
1009 		cur += sin_len;
1010 		if (cur > end)
1011 			return (NULL);
1012 
1013 		sin->sin_family = af;
1014 		bzero(sin->sin_zero, sizeof (sin->sin_zero));
1015 		sin->sin_port = port;
1016 		IPSA_COPY_ADDR(&sin->sin_addr, addr, af);
1017 		break;
1018 	case AF_INET6:
1019 		sin6 = (struct sockaddr_in6 *)cur;
1020 		sin_len = sizeof (*sin6);
1021 		cur += sin_len;
1022 		if (cur > end)
1023 			return (NULL);
1024 
1025 		bzero(sin6, sizeof (*sin6));
1026 		sin6->sin6_family = af;
1027 		sin6->sin6_port = port;
1028 		IPSA_COPY_ADDR(&sin6->sin6_addr, addr, af);
1029 		break;
1030 	}
1031 
1032 	addrext_len = roundup(cur - start, sizeof (uint64_t));
1033 	addrext->sadb_address_len = SADB_8TO64(addrext_len);
1034 
1035 	cur = start + addrext_len;
1036 	if (cur > end)
1037 		cur = NULL;
1038 
1039 	return (cur);
1040 }
1041 
1042 /*
1043  * Construct a key management cookie extension.
1044  */
1045 
1046 static uint8_t *
1047 sadb_make_kmc_ext(uint8_t *cur, uint8_t *end, uint32_t kmp, uint32_t kmc)
1048 {
1049 	sadb_x_kmc_t *kmcext = (sadb_x_kmc_t *)cur;
1050 
1051 	if (cur == NULL)
1052 		return (NULL);
1053 
1054 	cur += sizeof (*kmcext);
1055 
1056 	if (cur > end)
1057 		return (NULL);
1058 
1059 	kmcext->sadb_x_kmc_len = SADB_8TO64(sizeof (*kmcext));
1060 	kmcext->sadb_x_kmc_exttype = SADB_X_EXT_KM_COOKIE;
1061 	kmcext->sadb_x_kmc_proto = kmp;
1062 	kmcext->sadb_x_kmc_cookie = kmc;
1063 	kmcext->sadb_x_kmc_reserved = 0;
1064 
1065 	return (cur);
1066 }
1067 
1068 /*
1069  * Given an original message header with sufficient space following it, and an
1070  * SA, construct a full PF_KEY message with all of the relevant extensions.
1071  * This is mostly used for SADB_GET, and SADB_DUMP.
1072  */
1073 static mblk_t *
1074 sadb_sa2msg(ipsa_t *ipsa, sadb_msg_t *samsg)
1075 {
1076 	int alloclen, addrsize, paddrsize, authsize, encrsize;
1077 	int srcidsize, dstidsize, senslen, osenslen;
1078 	sa_family_t fam, pfam;	/* Address family for SADB_EXT_ADDRESS */
1079 				/* src/dst and proxy sockaddrs. */
1080 	/*
1081 	 * The following are pointers into the PF_KEY message this PF_KEY
1082 	 * message creates.
1083 	 */
1084 	sadb_msg_t *newsamsg;
1085 	sadb_sa_t *assoc;
1086 	sadb_lifetime_t *lt;
1087 	sadb_key_t *key;
1088 	sadb_ident_t *ident;
1089 	sadb_sens_t *sens;
1090 	sadb_ext_t *walker;	/* For when we need a generic ext. pointer. */
1091 	sadb_x_replay_ctr_t *repl_ctr;
1092 	sadb_x_pair_t *pair_ext;
1093 
1094 	mblk_t *mp;
1095 	uint8_t *cur, *end;
1096 	/* These indicate the presence of the above extension fields. */
1097 	boolean_t soft = B_FALSE, hard = B_FALSE;
1098 	boolean_t isrc = B_FALSE, idst = B_FALSE;
1099 	boolean_t auth = B_FALSE, encr = B_FALSE;
1100 	boolean_t sensinteg = B_FALSE, osensinteg = B_FALSE;
1101 	boolean_t srcid = B_FALSE, dstid = B_FALSE;
1102 	boolean_t idle;
1103 	boolean_t paired;
1104 	uint32_t otherspi;
1105 
1106 	/* First off, figure out the allocation length for this message. */
1107 	/*
1108 	 * Constant stuff.  This includes base, SA, address (src, dst),
1109 	 * and lifetime (current).
1110 	 */
1111 	alloclen = sizeof (sadb_msg_t) + sizeof (sadb_sa_t) +
1112 	    sizeof (sadb_lifetime_t);
1113 
1114 	fam = ipsa->ipsa_addrfam;
1115 	switch (fam) {
1116 	case AF_INET:
1117 		addrsize = roundup(sizeof (struct sockaddr_in) +
1118 		    sizeof (sadb_address_t), sizeof (uint64_t));
1119 		break;
1120 	case AF_INET6:
1121 		addrsize = roundup(sizeof (struct sockaddr_in6) +
1122 		    sizeof (sadb_address_t), sizeof (uint64_t));
1123 		break;
1124 	default:
1125 		return (NULL);
1126 	}
1127 	/*
1128 	 * Allocate TWO address extensions, for source and destination.
1129 	 * (Thus, the * 2.)
1130 	 */
1131 	alloclen += addrsize * 2;
1132 	if (ipsa->ipsa_flags & IPSA_F_NATT_REM)
1133 		alloclen += addrsize;
1134 	if (ipsa->ipsa_flags & IPSA_F_NATT_LOC)
1135 		alloclen += addrsize;
1136 
1137 	if (ipsa->ipsa_flags & IPSA_F_PAIRED) {
1138 		paired = B_TRUE;
1139 		alloclen += sizeof (sadb_x_pair_t);
1140 		otherspi = ipsa->ipsa_otherspi;
1141 	} else {
1142 		paired = B_FALSE;
1143 	}
1144 
1145 	/* How 'bout other lifetimes? */
1146 	if (ipsa->ipsa_softaddlt != 0 || ipsa->ipsa_softuselt != 0 ||
1147 	    ipsa->ipsa_softbyteslt != 0 || ipsa->ipsa_softalloc != 0) {
1148 		alloclen += sizeof (sadb_lifetime_t);
1149 		soft = B_TRUE;
1150 	}
1151 
1152 	if (ipsa->ipsa_hardaddlt != 0 || ipsa->ipsa_harduselt != 0 ||
1153 	    ipsa->ipsa_hardbyteslt != 0 || ipsa->ipsa_hardalloc != 0) {
1154 		alloclen += sizeof (sadb_lifetime_t);
1155 		hard = B_TRUE;
1156 	}
1157 
1158 	if (ipsa->ipsa_idleaddlt != 0 || ipsa->ipsa_idleuselt != 0) {
1159 		alloclen += sizeof (sadb_lifetime_t);
1160 		idle = B_TRUE;
1161 	} else {
1162 		idle = B_FALSE;
1163 	}
1164 
1165 	/* Inner addresses. */
1166 	if (ipsa->ipsa_innerfam != 0) {
1167 		pfam = ipsa->ipsa_innerfam;
1168 		switch (pfam) {
1169 		case AF_INET6:
1170 			paddrsize = roundup(sizeof (struct sockaddr_in6) +
1171 			    sizeof (sadb_address_t), sizeof (uint64_t));
1172 			break;
1173 		case AF_INET:
1174 			paddrsize = roundup(sizeof (struct sockaddr_in) +
1175 			    sizeof (sadb_address_t), sizeof (uint64_t));
1176 			break;
1177 		default:
1178 			cmn_err(CE_PANIC,
1179 			    "IPsec SADB: Proxy length failure.\n");
1180 			break;
1181 		}
1182 		isrc = B_TRUE;
1183 		idst = B_TRUE;
1184 		alloclen += 2 * paddrsize;
1185 	}
1186 
1187 	/* For the following fields, assume that length != 0 ==> stuff */
1188 	if (ipsa->ipsa_authkeylen != 0) {
1189 		authsize = roundup(sizeof (sadb_key_t) + ipsa->ipsa_authkeylen,
1190 		    sizeof (uint64_t));
1191 		alloclen += authsize;
1192 		auth = B_TRUE;
1193 	}
1194 
1195 	if (ipsa->ipsa_encrkeylen != 0) {
1196 		encrsize = roundup(sizeof (sadb_key_t) + ipsa->ipsa_encrkeylen +
1197 		    ipsa->ipsa_nonce_len, sizeof (uint64_t));
1198 		alloclen += encrsize;
1199 		encr = B_TRUE;
1200 	} else {
1201 		encr = B_FALSE;
1202 	}
1203 
1204 	if (ipsa->ipsa_tsl != NULL) {
1205 		senslen = sadb_sens_len_from_label(ipsa->ipsa_tsl);
1206 		alloclen += senslen;
1207 		sensinteg = B_TRUE;
1208 	}
1209 
1210 	if (ipsa->ipsa_otsl != NULL) {
1211 		osenslen = sadb_sens_len_from_label(ipsa->ipsa_otsl);
1212 		alloclen += osenslen;
1213 		osensinteg = B_TRUE;
1214 	}
1215 
1216 	/*
1217 	 * Must use strlen() here for lengths.	Identities use NULL
1218 	 * pointers to indicate their nonexistence.
1219 	 */
1220 	if (ipsa->ipsa_src_cid != NULL) {
1221 		srcidsize = roundup(sizeof (sadb_ident_t) +
1222 		    strlen(ipsa->ipsa_src_cid->ipsid_cid) + 1,
1223 		    sizeof (uint64_t));
1224 		alloclen += srcidsize;
1225 		srcid = B_TRUE;
1226 	}
1227 
1228 	if (ipsa->ipsa_dst_cid != NULL) {
1229 		dstidsize = roundup(sizeof (sadb_ident_t) +
1230 		    strlen(ipsa->ipsa_dst_cid->ipsid_cid) + 1,
1231 		    sizeof (uint64_t));
1232 		alloclen += dstidsize;
1233 		dstid = B_TRUE;
1234 	}
1235 
1236 	if ((ipsa->ipsa_kmp != 0) || (ipsa->ipsa_kmc != 0))
1237 		alloclen += sizeof (sadb_x_kmc_t);
1238 
1239 	if (ipsa->ipsa_replay != 0) {
1240 		alloclen += sizeof (sadb_x_replay_ctr_t);
1241 	}
1242 
1243 	/* Make sure the allocation length is a multiple of 8 bytes. */
1244 	ASSERT((alloclen & 0x7) == 0);
1245 
1246 	/* XXX Possibly make it esballoc, with a bzero-ing free_ftn. */
1247 	mp = allocb(alloclen, BPRI_HI);
1248 	if (mp == NULL)
1249 		return (NULL);
1250 	bzero(mp->b_rptr, alloclen);
1251 
1252 	mp->b_wptr += alloclen;
1253 	end = mp->b_wptr;
1254 	newsamsg = (sadb_msg_t *)mp->b_rptr;
1255 	*newsamsg = *samsg;
1256 	newsamsg->sadb_msg_len = (uint16_t)SADB_8TO64(alloclen);
1257 
1258 	mutex_enter(&ipsa->ipsa_lock);	/* Since I'm grabbing SA fields... */
1259 
1260 	newsamsg->sadb_msg_satype = ipsa->ipsa_type;
1261 
1262 	assoc = (sadb_sa_t *)(newsamsg + 1);
1263 	assoc->sadb_sa_len = SADB_8TO64(sizeof (*assoc));
1264 	assoc->sadb_sa_exttype = SADB_EXT_SA;
1265 	assoc->sadb_sa_spi = ipsa->ipsa_spi;
1266 	assoc->sadb_sa_replay = ipsa->ipsa_replay_wsize;
1267 	assoc->sadb_sa_state = ipsa->ipsa_state;
1268 	assoc->sadb_sa_auth = ipsa->ipsa_auth_alg;
1269 	assoc->sadb_sa_encrypt = ipsa->ipsa_encr_alg;
1270 	assoc->sadb_sa_flags = ipsa->ipsa_flags;
1271 
1272 	lt = (sadb_lifetime_t *)(assoc + 1);
1273 	lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1274 	lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
1275 	/* We do not support the concept. */
1276 	lt->sadb_lifetime_allocations = 0;
1277 	lt->sadb_lifetime_bytes = ipsa->ipsa_bytes;
1278 	lt->sadb_lifetime_addtime = ipsa->ipsa_addtime;
1279 	lt->sadb_lifetime_usetime = ipsa->ipsa_usetime;
1280 
1281 	if (hard) {
1282 		lt++;
1283 		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1284 		lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
1285 		lt->sadb_lifetime_allocations = ipsa->ipsa_hardalloc;
1286 		lt->sadb_lifetime_bytes = ipsa->ipsa_hardbyteslt;
1287 		lt->sadb_lifetime_addtime = ipsa->ipsa_hardaddlt;
1288 		lt->sadb_lifetime_usetime = ipsa->ipsa_harduselt;
1289 	}
1290 
1291 	if (soft) {
1292 		lt++;
1293 		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1294 		lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
1295 		lt->sadb_lifetime_allocations = ipsa->ipsa_softalloc;
1296 		lt->sadb_lifetime_bytes = ipsa->ipsa_softbyteslt;
1297 		lt->sadb_lifetime_addtime = ipsa->ipsa_softaddlt;
1298 		lt->sadb_lifetime_usetime = ipsa->ipsa_softuselt;
1299 	}
1300 
1301 	if (idle) {
1302 		lt++;
1303 		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1304 		lt->sadb_lifetime_exttype = SADB_X_EXT_LIFETIME_IDLE;
1305 		lt->sadb_lifetime_addtime = ipsa->ipsa_idleaddlt;
1306 		lt->sadb_lifetime_usetime = ipsa->ipsa_idleuselt;
1307 	}
1308 
1309 	cur = (uint8_t *)(lt + 1);
1310 
1311 	/* NOTE:  Don't fill in ports here if we are a tunnel-mode SA. */
1312 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, fam,
1313 	    ipsa->ipsa_srcaddr, (!isrc && !idst) ? SA_SRCPORT(ipsa) : 0,
1314 	    SA_PROTO(ipsa), 0);
1315 	if (cur == NULL) {
1316 		freemsg(mp);
1317 		mp = NULL;
1318 		goto bail;
1319 	}
1320 
1321 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, fam,
1322 	    ipsa->ipsa_dstaddr, (!isrc && !idst) ? SA_DSTPORT(ipsa) : 0,
1323 	    SA_PROTO(ipsa), 0);
1324 	if (cur == NULL) {
1325 		freemsg(mp);
1326 		mp = NULL;
1327 		goto bail;
1328 	}
1329 
1330 	if (ipsa->ipsa_flags & IPSA_F_NATT_LOC) {
1331 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_NATT_LOC,
1332 		    fam, &ipsa->ipsa_natt_addr_loc, ipsa->ipsa_local_nat_port,
1333 		    IPPROTO_UDP, 0);
1334 		if (cur == NULL) {
1335 			freemsg(mp);
1336 			mp = NULL;
1337 			goto bail;
1338 		}
1339 	}
1340 
1341 	if (ipsa->ipsa_flags & IPSA_F_NATT_REM) {
1342 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_NATT_REM,
1343 		    fam, &ipsa->ipsa_natt_addr_rem, ipsa->ipsa_remote_nat_port,
1344 		    IPPROTO_UDP, 0);
1345 		if (cur == NULL) {
1346 			freemsg(mp);
1347 			mp = NULL;
1348 			goto bail;
1349 		}
1350 	}
1351 
1352 	/* If we are a tunnel-mode SA, fill in the inner-selectors. */
1353 	if (isrc) {
1354 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
1355 		    pfam, ipsa->ipsa_innersrc, SA_SRCPORT(ipsa),
1356 		    SA_IPROTO(ipsa), ipsa->ipsa_innersrcpfx);
1357 		if (cur == NULL) {
1358 			freemsg(mp);
1359 			mp = NULL;
1360 			goto bail;
1361 		}
1362 	}
1363 
1364 	if (idst) {
1365 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
1366 		    pfam, ipsa->ipsa_innerdst, SA_DSTPORT(ipsa),
1367 		    SA_IPROTO(ipsa), ipsa->ipsa_innerdstpfx);
1368 		if (cur == NULL) {
1369 			freemsg(mp);
1370 			mp = NULL;
1371 			goto bail;
1372 		}
1373 	}
1374 
1375 	if ((ipsa->ipsa_kmp != 0) || (ipsa->ipsa_kmc != 0)) {
1376 		cur = sadb_make_kmc_ext(cur, end,
1377 		    ipsa->ipsa_kmp, ipsa->ipsa_kmc);
1378 		if (cur == NULL) {
1379 			freemsg(mp);
1380 			mp = NULL;
1381 			goto bail;
1382 		}
1383 	}
1384 
1385 	walker = (sadb_ext_t *)cur;
1386 	if (auth) {
1387 		key = (sadb_key_t *)walker;
1388 		key->sadb_key_len = SADB_8TO64(authsize);
1389 		key->sadb_key_exttype = SADB_EXT_KEY_AUTH;
1390 		key->sadb_key_bits = ipsa->ipsa_authkeybits;
1391 		key->sadb_key_reserved = 0;
1392 		bcopy(ipsa->ipsa_authkey, key + 1, ipsa->ipsa_authkeylen);
1393 		walker = (sadb_ext_t *)((uint64_t *)walker +
1394 		    walker->sadb_ext_len);
1395 	}
1396 
1397 	if (encr) {
1398 		uint8_t *buf_ptr;
1399 		key = (sadb_key_t *)walker;
1400 		key->sadb_key_len = SADB_8TO64(encrsize);
1401 		key->sadb_key_exttype = SADB_EXT_KEY_ENCRYPT;
1402 		key->sadb_key_bits = ipsa->ipsa_encrkeybits;
1403 		key->sadb_key_reserved = ipsa->ipsa_saltbits;
1404 		buf_ptr = (uint8_t *)(key + 1);
1405 		bcopy(ipsa->ipsa_encrkey, buf_ptr, ipsa->ipsa_encrkeylen);
1406 		if (ipsa->ipsa_salt != NULL) {
1407 			buf_ptr += ipsa->ipsa_encrkeylen;
1408 			bcopy(ipsa->ipsa_salt, buf_ptr, ipsa->ipsa_saltlen);
1409 		}
1410 		walker = (sadb_ext_t *)((uint64_t *)walker +
1411 		    walker->sadb_ext_len);
1412 	}
1413 
1414 	if (srcid) {
1415 		ident = (sadb_ident_t *)walker;
1416 		ident->sadb_ident_len = SADB_8TO64(srcidsize);
1417 		ident->sadb_ident_exttype = SADB_EXT_IDENTITY_SRC;
1418 		ident->sadb_ident_type = ipsa->ipsa_src_cid->ipsid_type;
1419 		ident->sadb_ident_id = 0;
1420 		ident->sadb_ident_reserved = 0;
1421 		(void) strcpy((char *)(ident + 1),
1422 		    ipsa->ipsa_src_cid->ipsid_cid);
1423 		walker = (sadb_ext_t *)((uint64_t *)walker +
1424 		    walker->sadb_ext_len);
1425 	}
1426 
1427 	if (dstid) {
1428 		ident = (sadb_ident_t *)walker;
1429 		ident->sadb_ident_len = SADB_8TO64(dstidsize);
1430 		ident->sadb_ident_exttype = SADB_EXT_IDENTITY_DST;
1431 		ident->sadb_ident_type = ipsa->ipsa_dst_cid->ipsid_type;
1432 		ident->sadb_ident_id = 0;
1433 		ident->sadb_ident_reserved = 0;
1434 		(void) strcpy((char *)(ident + 1),
1435 		    ipsa->ipsa_dst_cid->ipsid_cid);
1436 		walker = (sadb_ext_t *)((uint64_t *)walker +
1437 		    walker->sadb_ext_len);
1438 	}
1439 
1440 	if (sensinteg) {
1441 		sens = (sadb_sens_t *)walker;
1442 		sadb_sens_from_label(sens, SADB_EXT_SENSITIVITY,
1443 		    ipsa->ipsa_tsl, senslen);
1444 
1445 		walker = (sadb_ext_t *)((uint64_t *)walker +
1446 		    walker->sadb_ext_len);
1447 	}
1448 
1449 	if (osensinteg) {
1450 		sens = (sadb_sens_t *)walker;
1451 
1452 		sadb_sens_from_label(sens, SADB_X_EXT_OUTER_SENS,
1453 		    ipsa->ipsa_otsl, osenslen);
1454 		if (ipsa->ipsa_mac_exempt)
1455 			sens->sadb_x_sens_flags = SADB_X_SENS_IMPLICIT;
1456 
1457 		walker = (sadb_ext_t *)((uint64_t *)walker +
1458 		    walker->sadb_ext_len);
1459 	}
1460 
1461 	if (paired) {
1462 		pair_ext = (sadb_x_pair_t *)walker;
1463 
1464 		pair_ext->sadb_x_pair_len = SADB_8TO64(sizeof (sadb_x_pair_t));
1465 		pair_ext->sadb_x_pair_exttype = SADB_X_EXT_PAIR;
1466 		pair_ext->sadb_x_pair_spi = otherspi;
1467 
1468 		walker = (sadb_ext_t *)((uint64_t *)walker +
1469 		    walker->sadb_ext_len);
1470 	}
1471 
1472 	if (ipsa->ipsa_replay != 0) {
1473 		repl_ctr = (sadb_x_replay_ctr_t *)walker;
1474 		repl_ctr->sadb_x_rc_len = SADB_8TO64(sizeof (*repl_ctr));
1475 		repl_ctr->sadb_x_rc_exttype = SADB_X_EXT_REPLAY_VALUE;
1476 		repl_ctr->sadb_x_rc_replay32 = ipsa->ipsa_replay;
1477 		repl_ctr->sadb_x_rc_replay64 = 0;
1478 		walker = (sadb_ext_t *)(repl_ctr + 1);
1479 	}
1480 
1481 bail:
1482 	/* Pardon any delays... */
1483 	mutex_exit(&ipsa->ipsa_lock);
1484 
1485 	return (mp);
1486 }
1487 
1488 /*
1489  * Strip out key headers or unmarked headers (SADB_EXT_KEY_*, SADB_EXT_UNKNOWN)
1490  * and adjust base message accordingly.
1491  *
1492  * Assume message is pulled up in one piece of contiguous memory.
1493  *
1494  * Say if we start off with:
1495  *
1496  * +------+----+-------------+-----------+---------------+---------------+
1497  * | base | SA | source addr | dest addr | rsrvd. or key | soft lifetime |
1498  * +------+----+-------------+-----------+---------------+---------------+
1499  *
1500  * we will end up with
1501  *
1502  * +------+----+-------------+-----------+---------------+
1503  * | base | SA | source addr | dest addr | soft lifetime |
1504  * +------+----+-------------+-----------+---------------+
1505  */
1506 static void
1507 sadb_strip(sadb_msg_t *samsg)
1508 {
1509 	sadb_ext_t *ext;
1510 	uint8_t *target = NULL;
1511 	uint8_t *msgend;
1512 	int sofar = SADB_8TO64(sizeof (*samsg));
1513 	int copylen;
1514 
1515 	ext = (sadb_ext_t *)(samsg + 1);
1516 	msgend = (uint8_t *)samsg;
1517 	msgend += SADB_64TO8(samsg->sadb_msg_len);
1518 	while ((uint8_t *)ext < msgend) {
1519 		if (ext->sadb_ext_type == SADB_EXT_RESERVED ||
1520 		    ext->sadb_ext_type == SADB_EXT_KEY_AUTH ||
1521 		    ext->sadb_ext_type == SADB_X_EXT_EDUMP ||
1522 		    ext->sadb_ext_type == SADB_EXT_KEY_ENCRYPT) {
1523 			/*
1524 			 * Aha!	 I found a header to be erased.
1525 			 */
1526 
1527 			if (target != NULL) {
1528 				/*
1529 				 * If I had a previous header to be erased,
1530 				 * copy over it.  I can get away with just
1531 				 * copying backwards because the target will
1532 				 * always be 8 bytes behind the source.
1533 				 */
1534 				copylen = ((uint8_t *)ext) - (target +
1535 				    SADB_64TO8(
1536 				    ((sadb_ext_t *)target)->sadb_ext_len));
1537 				ovbcopy(((uint8_t *)ext - copylen), target,
1538 				    copylen);
1539 				target += copylen;
1540 				((sadb_ext_t *)target)->sadb_ext_len =
1541 				    SADB_8TO64(((uint8_t *)ext) - target +
1542 				    SADB_64TO8(ext->sadb_ext_len));
1543 			} else {
1544 				target = (uint8_t *)ext;
1545 			}
1546 		} else {
1547 			sofar += ext->sadb_ext_len;
1548 		}
1549 
1550 		ext = (sadb_ext_t *)(((uint64_t *)ext) + ext->sadb_ext_len);
1551 	}
1552 
1553 	ASSERT((uint8_t *)ext == msgend);
1554 
1555 	if (target != NULL) {
1556 		copylen = ((uint8_t *)ext) - (target +
1557 		    SADB_64TO8(((sadb_ext_t *)target)->sadb_ext_len));
1558 		if (copylen != 0)
1559 			ovbcopy(((uint8_t *)ext - copylen), target, copylen);
1560 	}
1561 
1562 	/* Adjust samsg. */
1563 	samsg->sadb_msg_len = (uint16_t)sofar;
1564 
1565 	/* Assume all of the rest is cleared by caller in sadb_pfkey_echo(). */
1566 }
1567 
1568 /*
1569  * AH needs to send an error to PF_KEY.	 Assume mp points to an M_CTL
1570  * followed by an M_DATA with a PF_KEY message in it.  The serial of
1571  * the sending keysock instance is included.
1572  */
1573 void
1574 sadb_pfkey_error(queue_t *pfkey_q, mblk_t *mp, int error, int diagnostic,
1575     uint_t serial)
1576 {
1577 	mblk_t *msg = mp->b_cont;
1578 	sadb_msg_t *samsg;
1579 	keysock_out_t *kso;
1580 
1581 	/*
1582 	 * Enough functions call this to merit a NULL queue check.
1583 	 */
1584 	if (pfkey_q == NULL) {
1585 		freemsg(mp);
1586 		return;
1587 	}
1588 
1589 	ASSERT(msg != NULL);
1590 	ASSERT((mp->b_wptr - mp->b_rptr) == sizeof (ipsec_info_t));
1591 	ASSERT((msg->b_wptr - msg->b_rptr) >= sizeof (sadb_msg_t));
1592 	samsg = (sadb_msg_t *)msg->b_rptr;
1593 	kso = (keysock_out_t *)mp->b_rptr;
1594 
1595 	kso->ks_out_type = KEYSOCK_OUT;
1596 	kso->ks_out_len = sizeof (*kso);
1597 	kso->ks_out_serial = serial;
1598 
1599 	/*
1600 	 * Only send the base message up in the event of an error.
1601 	 * Don't worry about bzero()-ing, because it was probably bogus
1602 	 * anyway.
1603 	 */
1604 	msg->b_wptr = msg->b_rptr + sizeof (*samsg);
1605 	samsg = (sadb_msg_t *)msg->b_rptr;
1606 	samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
1607 	samsg->sadb_msg_errno = (uint8_t)error;
1608 	if (diagnostic != SADB_X_DIAGNOSTIC_PRESET)
1609 		samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
1610 
1611 	putnext(pfkey_q, mp);
1612 }
1613 
1614 /*
1615  * Send a successful return packet back to keysock via the queue in pfkey_q.
1616  *
1617  * Often, an SA is associated with the reply message, it's passed in if needed,
1618  * and NULL if not.  BTW, that ipsa will have its refcnt appropriately held,
1619  * and the caller will release said refcnt.
1620  */
1621 void
1622 sadb_pfkey_echo(queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg,
1623     keysock_in_t *ksi, ipsa_t *ipsa)
1624 {
1625 	keysock_out_t *kso;
1626 	mblk_t *mp1;
1627 	sadb_msg_t *newsamsg;
1628 	uint8_t *oldend;
1629 
1630 	ASSERT((mp->b_cont != NULL) &&
1631 	    ((void *)samsg == (void *)mp->b_cont->b_rptr) &&
1632 	    ((void *)mp->b_rptr == (void *)ksi));
1633 
1634 	switch (samsg->sadb_msg_type) {
1635 	case SADB_ADD:
1636 	case SADB_UPDATE:
1637 	case SADB_X_UPDATEPAIR:
1638 	case SADB_X_DELPAIR_STATE:
1639 	case SADB_FLUSH:
1640 	case SADB_DUMP:
1641 		/*
1642 		 * I have all of the message already.  I just need to strip
1643 		 * out the keying material and echo the message back.
1644 		 *
1645 		 * NOTE: for SADB_DUMP, the function sadb_dump() did the
1646 		 * work.  When DUMP reaches here, it should only be a base
1647 		 * message.
1648 		 */
1649 	justecho:
1650 		if (ksi->ks_in_extv[SADB_EXT_KEY_AUTH] != NULL ||
1651 		    ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT] != NULL ||
1652 		    ksi->ks_in_extv[SADB_X_EXT_EDUMP] != NULL) {
1653 			sadb_strip(samsg);
1654 			/* Assume PF_KEY message is contiguous. */
1655 			ASSERT(mp->b_cont->b_cont == NULL);
1656 			oldend = mp->b_cont->b_wptr;
1657 			mp->b_cont->b_wptr = mp->b_cont->b_rptr +
1658 			    SADB_64TO8(samsg->sadb_msg_len);
1659 			bzero(mp->b_cont->b_wptr, oldend - mp->b_cont->b_wptr);
1660 		}
1661 		break;
1662 	case SADB_GET:
1663 		/*
1664 		 * Do a lot of work here, because of the ipsa I just found.
1665 		 * First construct the new PF_KEY message, then abandon
1666 		 * the old one.
1667 		 */
1668 		mp1 = sadb_sa2msg(ipsa, samsg);
1669 		if (mp1 == NULL) {
1670 			sadb_pfkey_error(pfkey_q, mp, ENOMEM,
1671 			    SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
1672 			return;
1673 		}
1674 		freemsg(mp->b_cont);
1675 		mp->b_cont = mp1;
1676 		break;
1677 	case SADB_DELETE:
1678 	case SADB_X_DELPAIR:
1679 		if (ipsa == NULL)
1680 			goto justecho;
1681 		/*
1682 		 * Because listening KMds may require more info, treat
1683 		 * DELETE like a special case of GET.
1684 		 */
1685 		mp1 = sadb_sa2msg(ipsa, samsg);
1686 		if (mp1 == NULL) {
1687 			sadb_pfkey_error(pfkey_q, mp, ENOMEM,
1688 			    SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
1689 			return;
1690 		}
1691 		newsamsg = (sadb_msg_t *)mp1->b_rptr;
1692 		sadb_strip(newsamsg);
1693 		oldend = mp1->b_wptr;
1694 		mp1->b_wptr = mp1->b_rptr + SADB_64TO8(newsamsg->sadb_msg_len);
1695 		bzero(mp1->b_wptr, oldend - mp1->b_wptr);
1696 		freemsg(mp->b_cont);
1697 		mp->b_cont = mp1;
1698 		break;
1699 	default:
1700 		if (mp != NULL)
1701 			freemsg(mp);
1702 		return;
1703 	}
1704 
1705 	/* ksi is now null and void. */
1706 	kso = (keysock_out_t *)ksi;
1707 	kso->ks_out_type = KEYSOCK_OUT;
1708 	kso->ks_out_len = sizeof (*kso);
1709 	kso->ks_out_serial = ksi->ks_in_serial;
1710 	/* We're ready to send... */
1711 	putnext(pfkey_q, mp);
1712 }
1713 
1714 /*
1715  * Set up a global pfkey_q instance for AH, ESP, or some other consumer.
1716  */
1717 void
1718 sadb_keysock_hello(queue_t **pfkey_qp, queue_t *q, mblk_t *mp,
1719     void (*ager)(void *), void *agerarg, timeout_id_t *top, int satype)
1720 {
1721 	keysock_hello_ack_t *kha;
1722 	queue_t *oldq;
1723 
1724 	ASSERT(OTHERQ(q) != NULL);
1725 
1726 	/*
1727 	 * First, check atomically that I'm the first and only keysock
1728 	 * instance.
1729 	 *
1730 	 * Use OTHERQ(q), because qreply(q, mp) == putnext(OTHERQ(q), mp),
1731 	 * and I want this module to say putnext(*_pfkey_q, mp) for PF_KEY
1732 	 * messages.
1733 	 */
1734 
1735 	oldq = atomic_cas_ptr((void **)pfkey_qp, NULL, OTHERQ(q));
1736 	if (oldq != NULL) {
1737 		ASSERT(oldq != q);
1738 		cmn_err(CE_WARN, "Danger!  Multiple keysocks on top of %s.\n",
1739 		    (satype == SADB_SATYPE_ESP)? "ESP" : "AH or other");
1740 		freemsg(mp);
1741 		return;
1742 	}
1743 
1744 	kha = (keysock_hello_ack_t *)mp->b_rptr;
1745 	kha->ks_hello_len = sizeof (keysock_hello_ack_t);
1746 	kha->ks_hello_type = KEYSOCK_HELLO_ACK;
1747 	kha->ks_hello_satype = (uint8_t)satype;
1748 
1749 	/*
1750 	 * If we made it past the atomic_cas_ptr, then we have "exclusive"
1751 	 * access to the timeout handle.  Fire it off after the default ager
1752 	 * interval.
1753 	 */
1754 	*top = qtimeout(*pfkey_qp, ager, agerarg,
1755 	    drv_usectohz(SADB_AGE_INTERVAL_DEFAULT * 1000));
1756 
1757 	putnext(*pfkey_qp, mp);
1758 }
1759 
1760 /*
1761  * Normalize IPv4-mapped IPv6 addresses (and prefixes) as appropriate.
1762  *
1763  * Check addresses themselves for wildcard or multicast.
1764  * Check ire table for local/non-local/broadcast.
1765  */
1766 int
1767 sadb_addrcheck(queue_t *pfkey_q, mblk_t *mp, sadb_ext_t *ext, uint_t serial,
1768     netstack_t *ns)
1769 {
1770 	sadb_address_t *addr = (sadb_address_t *)ext;
1771 	struct sockaddr_in *sin;
1772 	struct sockaddr_in6 *sin6;
1773 	int diagnostic, type;
1774 	boolean_t normalized = B_FALSE;
1775 
1776 	ASSERT(ext != NULL);
1777 	ASSERT((ext->sadb_ext_type == SADB_EXT_ADDRESS_SRC) ||
1778 	    (ext->sadb_ext_type == SADB_EXT_ADDRESS_DST) ||
1779 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC) ||
1780 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_DST) ||
1781 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_LOC) ||
1782 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_REM));
1783 
1784 	/* Assign both sockaddrs, the compiler will do the right thing. */
1785 	sin = (struct sockaddr_in *)(addr + 1);
1786 	sin6 = (struct sockaddr_in6 *)(addr + 1);
1787 
1788 	if (sin6->sin6_family == AF_INET6) {
1789 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
1790 			/*
1791 			 * Convert to an AF_INET sockaddr.  This means the
1792 			 * return messages will have the extra space, but have
1793 			 * AF_INET sockaddrs instead of AF_INET6.
1794 			 *
1795 			 * Yes, RFC 2367 isn't clear on what to do here w.r.t.
1796 			 * mapped addresses, but since AF_INET6 ::ffff:<v4> is
1797 			 * equal to AF_INET <v4>, it shouldnt be a huge
1798 			 * problem.
1799 			 */
1800 			sin->sin_family = AF_INET;
1801 			IN6_V4MAPPED_TO_INADDR(&sin6->sin6_addr,
1802 			    &sin->sin_addr);
1803 			bzero(&sin->sin_zero, sizeof (sin->sin_zero));
1804 			normalized = B_TRUE;
1805 		}
1806 	} else if (sin->sin_family != AF_INET) {
1807 		switch (ext->sadb_ext_type) {
1808 		case SADB_EXT_ADDRESS_SRC:
1809 			diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC_AF;
1810 			break;
1811 		case SADB_EXT_ADDRESS_DST:
1812 			diagnostic = SADB_X_DIAGNOSTIC_BAD_DST_AF;
1813 			break;
1814 		case SADB_X_EXT_ADDRESS_INNER_SRC:
1815 			diagnostic = SADB_X_DIAGNOSTIC_BAD_PROXY_AF;
1816 			break;
1817 		case SADB_X_EXT_ADDRESS_INNER_DST:
1818 			diagnostic = SADB_X_DIAGNOSTIC_BAD_INNER_DST_AF;
1819 			break;
1820 		case SADB_X_EXT_ADDRESS_NATT_LOC:
1821 			diagnostic = SADB_X_DIAGNOSTIC_BAD_NATT_LOC_AF;
1822 			break;
1823 		case SADB_X_EXT_ADDRESS_NATT_REM:
1824 			diagnostic = SADB_X_DIAGNOSTIC_BAD_NATT_REM_AF;
1825 			break;
1826 			/* There is no default, see above ASSERT. */
1827 		}
1828 bail:
1829 		if (pfkey_q != NULL) {
1830 			sadb_pfkey_error(pfkey_q, mp, EINVAL, diagnostic,
1831 			    serial);
1832 		} else {
1833 			/*
1834 			 * Scribble in sadb_msg that we got passed in.
1835 			 * Overload "mp" to be an sadb_msg pointer.
1836 			 */
1837 			sadb_msg_t *samsg = (sadb_msg_t *)mp;
1838 
1839 			samsg->sadb_msg_errno = EINVAL;
1840 			samsg->sadb_x_msg_diagnostic = diagnostic;
1841 		}
1842 		return (KS_IN_ADDR_UNKNOWN);
1843 	}
1844 
1845 	if (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC ||
1846 	    ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_DST) {
1847 		/*
1848 		 * We need only check for prefix issues.
1849 		 */
1850 
1851 		/* Set diagnostic now, in case we need it later. */
1852 		diagnostic =
1853 		    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC) ?
1854 		    SADB_X_DIAGNOSTIC_PREFIX_INNER_SRC :
1855 		    SADB_X_DIAGNOSTIC_PREFIX_INNER_DST;
1856 
1857 		if (normalized)
1858 			addr->sadb_address_prefixlen -= 96;
1859 
1860 		/*
1861 		 * Verify and mask out inner-addresses based on prefix length.
1862 		 */
1863 		if (sin->sin_family == AF_INET) {
1864 			if (addr->sadb_address_prefixlen > 32)
1865 				goto bail;
1866 			sin->sin_addr.s_addr &=
1867 			    ip_plen_to_mask(addr->sadb_address_prefixlen);
1868 		} else {
1869 			in6_addr_t mask;
1870 
1871 			ASSERT(sin->sin_family == AF_INET6);
1872 			/*
1873 			 * ip_plen_to_mask_v6() returns NULL if the value in
1874 			 * question is out of range.
1875 			 */
1876 			if (ip_plen_to_mask_v6(addr->sadb_address_prefixlen,
1877 			    &mask) == NULL)
1878 				goto bail;
1879 			sin6->sin6_addr.s6_addr32[0] &= mask.s6_addr32[0];
1880 			sin6->sin6_addr.s6_addr32[1] &= mask.s6_addr32[1];
1881 			sin6->sin6_addr.s6_addr32[2] &= mask.s6_addr32[2];
1882 			sin6->sin6_addr.s6_addr32[3] &= mask.s6_addr32[3];
1883 		}
1884 
1885 		/* We don't care in these cases. */
1886 		return (KS_IN_ADDR_DONTCARE);
1887 	}
1888 
1889 	if (sin->sin_family == AF_INET6) {
1890 		/* Check the easy ones now. */
1891 		if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
1892 			return (KS_IN_ADDR_MBCAST);
1893 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
1894 			return (KS_IN_ADDR_UNSPEC);
1895 		/*
1896 		 * At this point, we're a unicast IPv6 address.
1897 		 *
1898 		 * XXX Zones alert -> me/notme decision needs to be tempered
1899 		 * by what zone we're in when we go to zone-aware IPsec.
1900 		 */
1901 		if (ip_type_v6(&sin6->sin6_addr, ns->netstack_ip) ==
1902 		    IRE_LOCAL) {
1903 			/* Hey hey, it's local. */
1904 			return (KS_IN_ADDR_ME);
1905 		}
1906 	} else {
1907 		ASSERT(sin->sin_family == AF_INET);
1908 		if (sin->sin_addr.s_addr == INADDR_ANY)
1909 			return (KS_IN_ADDR_UNSPEC);
1910 		if (CLASSD(sin->sin_addr.s_addr))
1911 			return (KS_IN_ADDR_MBCAST);
1912 		/*
1913 		 * At this point we're a unicast or broadcast IPv4 address.
1914 		 *
1915 		 * Check if the address is IRE_BROADCAST or IRE_LOCAL.
1916 		 *
1917 		 * XXX Zones alert -> me/notme decision needs to be tempered
1918 		 * by what zone we're in when we go to zone-aware IPsec.
1919 		 */
1920 		type = ip_type_v4(sin->sin_addr.s_addr, ns->netstack_ip);
1921 		switch (type) {
1922 		case IRE_LOCAL:
1923 			return (KS_IN_ADDR_ME);
1924 		case IRE_BROADCAST:
1925 			return (KS_IN_ADDR_MBCAST);
1926 		}
1927 	}
1928 
1929 	return (KS_IN_ADDR_NOTME);
1930 }
1931 
1932 /*
1933  * Address normalizations and reality checks for inbound PF_KEY messages.
1934  *
1935  * For the case of src == unspecified AF_INET6, and dst == AF_INET, convert
1936  * the source to AF_INET.  Do the same for the inner sources.
1937  */
1938 boolean_t
1939 sadb_addrfix(keysock_in_t *ksi, queue_t *pfkey_q, mblk_t *mp, netstack_t *ns)
1940 {
1941 	struct sockaddr_in *src, *isrc;
1942 	struct sockaddr_in6 *dst, *idst;
1943 	sadb_address_t *srcext, *dstext;
1944 	uint16_t sport;
1945 	sadb_ext_t **extv = ksi->ks_in_extv;
1946 	int rc;
1947 
1948 	if (extv[SADB_EXT_ADDRESS_SRC] != NULL) {
1949 		rc = sadb_addrcheck(pfkey_q, mp, extv[SADB_EXT_ADDRESS_SRC],
1950 		    ksi->ks_in_serial, ns);
1951 		if (rc == KS_IN_ADDR_UNKNOWN)
1952 			return (B_FALSE);
1953 		if (rc == KS_IN_ADDR_MBCAST) {
1954 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
1955 			    SADB_X_DIAGNOSTIC_BAD_SRC, ksi->ks_in_serial);
1956 			return (B_FALSE);
1957 		}
1958 		ksi->ks_in_srctype = rc;
1959 	}
1960 
1961 	if (extv[SADB_EXT_ADDRESS_DST] != NULL) {
1962 		rc = sadb_addrcheck(pfkey_q, mp, extv[SADB_EXT_ADDRESS_DST],
1963 		    ksi->ks_in_serial, ns);
1964 		if (rc == KS_IN_ADDR_UNKNOWN)
1965 			return (B_FALSE);
1966 		if (rc == KS_IN_ADDR_UNSPEC) {
1967 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
1968 			    SADB_X_DIAGNOSTIC_BAD_DST, ksi->ks_in_serial);
1969 			return (B_FALSE);
1970 		}
1971 		ksi->ks_in_dsttype = rc;
1972 	}
1973 
1974 	/*
1975 	 * NAT-Traversal addrs are simple enough to not require all of
1976 	 * the checks in sadb_addrcheck().  Just normalize or reject if not
1977 	 * AF_INET.
1978 	 */
1979 	if (extv[SADB_X_EXT_ADDRESS_NATT_LOC] != NULL) {
1980 		rc = sadb_addrcheck(pfkey_q, mp,
1981 		    extv[SADB_X_EXT_ADDRESS_NATT_LOC], ksi->ks_in_serial, ns);
1982 
1983 		/*
1984 		 * Local NAT-T addresses never use an IRE_LOCAL, so it should
1985 		 * always be NOTME, or UNSPEC (to handle both tunnel mode
1986 		 * AND local-port flexibility).
1987 		 */
1988 		if (rc != KS_IN_ADDR_NOTME && rc != KS_IN_ADDR_UNSPEC) {
1989 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
1990 			    SADB_X_DIAGNOSTIC_MALFORMED_NATT_LOC,
1991 			    ksi->ks_in_serial);
1992 			return (B_FALSE);
1993 		}
1994 		src = (struct sockaddr_in *)
1995 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_NATT_LOC]) + 1);
1996 		if (src->sin_family != AF_INET) {
1997 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
1998 			    SADB_X_DIAGNOSTIC_BAD_NATT_LOC_AF,
1999 			    ksi->ks_in_serial);
2000 			return (B_FALSE);
2001 		}
2002 	}
2003 
2004 	if (extv[SADB_X_EXT_ADDRESS_NATT_REM] != NULL) {
2005 		rc = sadb_addrcheck(pfkey_q, mp,
2006 		    extv[SADB_X_EXT_ADDRESS_NATT_REM], ksi->ks_in_serial, ns);
2007 
2008 		/*
2009 		 * Remote NAT-T addresses never use an IRE_LOCAL, so it should
2010 		 * always be NOTME, or UNSPEC if it's a tunnel-mode SA.
2011 		 */
2012 		if (rc != KS_IN_ADDR_NOTME &&
2013 		    !(extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL &&
2014 		    rc == KS_IN_ADDR_UNSPEC)) {
2015 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2016 			    SADB_X_DIAGNOSTIC_MALFORMED_NATT_REM,
2017 			    ksi->ks_in_serial);
2018 			return (B_FALSE);
2019 		}
2020 		src = (struct sockaddr_in *)
2021 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_NATT_REM]) + 1);
2022 		if (src->sin_family != AF_INET) {
2023 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2024 			    SADB_X_DIAGNOSTIC_BAD_NATT_REM_AF,
2025 			    ksi->ks_in_serial);
2026 			return (B_FALSE);
2027 		}
2028 	}
2029 
2030 	if (extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL) {
2031 		if (extv[SADB_X_EXT_ADDRESS_INNER_DST] == NULL) {
2032 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2033 			    SADB_X_DIAGNOSTIC_MISSING_INNER_DST,
2034 			    ksi->ks_in_serial);
2035 			return (B_FALSE);
2036 		}
2037 
2038 		if (sadb_addrcheck(pfkey_q, mp,
2039 		    extv[SADB_X_EXT_ADDRESS_INNER_DST], ksi->ks_in_serial, ns)
2040 		    == KS_IN_ADDR_UNKNOWN ||
2041 		    sadb_addrcheck(pfkey_q, mp,
2042 		    extv[SADB_X_EXT_ADDRESS_INNER_SRC], ksi->ks_in_serial, ns)
2043 		    == KS_IN_ADDR_UNKNOWN)
2044 			return (B_FALSE);
2045 
2046 		isrc = (struct sockaddr_in *)
2047 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_SRC]) +
2048 		    1);
2049 		idst = (struct sockaddr_in6 *)
2050 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_DST]) +
2051 		    1);
2052 		if (isrc->sin_family != idst->sin6_family) {
2053 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2054 			    SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH,
2055 			    ksi->ks_in_serial);
2056 			return (B_FALSE);
2057 		}
2058 	} else if (extv[SADB_X_EXT_ADDRESS_INNER_DST] != NULL) {
2059 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2060 			    SADB_X_DIAGNOSTIC_MISSING_INNER_SRC,
2061 			    ksi->ks_in_serial);
2062 			return (B_FALSE);
2063 	} else {
2064 		isrc = NULL;	/* For inner/outer port check below. */
2065 	}
2066 
2067 	dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST];
2068 	srcext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_SRC];
2069 
2070 	if (dstext == NULL || srcext == NULL)
2071 		return (B_TRUE);
2072 
2073 	dst = (struct sockaddr_in6 *)(dstext + 1);
2074 	src = (struct sockaddr_in *)(srcext + 1);
2075 
2076 	if (isrc != NULL &&
2077 	    (isrc->sin_port != 0 || idst->sin6_port != 0) &&
2078 	    (src->sin_port != 0 || dst->sin6_port != 0)) {
2079 		/* Can't set inner and outer ports in one SA. */
2080 		sadb_pfkey_error(pfkey_q, mp, EINVAL,
2081 		    SADB_X_DIAGNOSTIC_DUAL_PORT_SETS,
2082 		    ksi->ks_in_serial);
2083 		return (B_FALSE);
2084 	}
2085 
2086 	if (dst->sin6_family == src->sin_family)
2087 		return (B_TRUE);
2088 
2089 	if (srcext->sadb_address_proto != dstext->sadb_address_proto) {
2090 		if (srcext->sadb_address_proto == 0) {
2091 			srcext->sadb_address_proto = dstext->sadb_address_proto;
2092 		} else if (dstext->sadb_address_proto == 0) {
2093 			dstext->sadb_address_proto = srcext->sadb_address_proto;
2094 		} else {
2095 			/* Inequal protocols, neither were 0.  Report error. */
2096 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2097 			    SADB_X_DIAGNOSTIC_PROTO_MISMATCH,
2098 			    ksi->ks_in_serial);
2099 			return (B_FALSE);
2100 		}
2101 	}
2102 
2103 	/*
2104 	 * With the exception of an unspec IPv6 source and an IPv4
2105 	 * destination, address families MUST me matched.
2106 	 */
2107 	if (src->sin_family == AF_INET ||
2108 	    ksi->ks_in_srctype != KS_IN_ADDR_UNSPEC) {
2109 		sadb_pfkey_error(pfkey_q, mp, EINVAL,
2110 		    SADB_X_DIAGNOSTIC_AF_MISMATCH, ksi->ks_in_serial);
2111 		return (B_FALSE);
2112 	}
2113 
2114 	/*
2115 	 * Convert "src" to AF_INET INADDR_ANY.  We rely on sin_port being
2116 	 * in the same place for sockaddr_in and sockaddr_in6.
2117 	 */
2118 	sport = src->sin_port;
2119 	bzero(src, sizeof (*src));
2120 	src->sin_family = AF_INET;
2121 	src->sin_port = sport;
2122 
2123 	return (B_TRUE);
2124 }
2125 
2126 /*
2127  * Set the results in "addrtype", given an IRE as requested by
2128  * sadb_addrcheck().
2129  */
2130 int
2131 sadb_addrset(ire_t *ire)
2132 {
2133 	if ((ire->ire_type & IRE_BROADCAST) ||
2134 	    (ire->ire_ipversion == IPV4_VERSION && CLASSD(ire->ire_addr)) ||
2135 	    (ire->ire_ipversion == IPV6_VERSION &&
2136 	    IN6_IS_ADDR_MULTICAST(&(ire->ire_addr_v6))))
2137 		return (KS_IN_ADDR_MBCAST);
2138 	if (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK))
2139 		return (KS_IN_ADDR_ME);
2140 	return (KS_IN_ADDR_NOTME);
2141 }
2142 
2143 /*
2144  * Match primitives..
2145  * !!! TODO: short term: inner selectors
2146  *		ipv6 scope id (ifindex)
2147  * longer term:  zone id.  sensitivity label. uid.
2148  */
2149 boolean_t
2150 sadb_match_spi(ipsa_query_t *sq, ipsa_t *sa)
2151 {
2152 	return (sq->spi == sa->ipsa_spi);
2153 }
2154 
2155 boolean_t
2156 sadb_match_dst_v6(ipsa_query_t *sq, ipsa_t *sa)
2157 {
2158 	return (IPSA_ARE_ADDR_EQUAL(sa->ipsa_dstaddr, sq->dstaddr, AF_INET6));
2159 }
2160 
2161 boolean_t
2162 sadb_match_src_v6(ipsa_query_t *sq, ipsa_t *sa)
2163 {
2164 	return (IPSA_ARE_ADDR_EQUAL(sa->ipsa_srcaddr, sq->srcaddr, AF_INET6));
2165 }
2166 
2167 boolean_t
2168 sadb_match_dst_v4(ipsa_query_t *sq, ipsa_t *sa)
2169 {
2170 	return (sq->dstaddr[0] == sa->ipsa_dstaddr[0]);
2171 }
2172 
2173 boolean_t
2174 sadb_match_src_v4(ipsa_query_t *sq, ipsa_t *sa)
2175 {
2176 	return (sq->srcaddr[0] == sa->ipsa_srcaddr[0]);
2177 }
2178 
2179 boolean_t
2180 sadb_match_dstid(ipsa_query_t *sq, ipsa_t *sa)
2181 {
2182 	return ((sa->ipsa_dst_cid != NULL) &&
2183 	    (sq->didtype == sa->ipsa_dst_cid->ipsid_type) &&
2184 	    (strcmp(sq->didstr, sa->ipsa_dst_cid->ipsid_cid) == 0));
2185 
2186 }
2187 boolean_t
2188 sadb_match_srcid(ipsa_query_t *sq, ipsa_t *sa)
2189 {
2190 	return ((sa->ipsa_src_cid != NULL) &&
2191 	    (sq->sidtype == sa->ipsa_src_cid->ipsid_type) &&
2192 	    (strcmp(sq->sidstr, sa->ipsa_src_cid->ipsid_cid) == 0));
2193 }
2194 
2195 boolean_t
2196 sadb_match_kmc(ipsa_query_t *sq, ipsa_t *sa)
2197 {
2198 #define	M(a, b) (((a) == 0) || ((b) == 0) || ((a) == (b)))
2199 
2200 	return (M(sq->kmc, sa->ipsa_kmc) && M(sq->kmp, sa->ipsa_kmp));
2201 
2202 #undef M
2203 }
2204 
2205 /*
2206  * Common function which extracts several PF_KEY extensions for ease of
2207  * SADB matching.
2208  *
2209  * XXX TODO: weed out ipsa_query_t fields not used during matching
2210  * or afterwards?
2211  */
2212 int
2213 sadb_form_query(keysock_in_t *ksi, uint32_t req, uint32_t match,
2214     ipsa_query_t *sq, int *diagnostic)
2215 {
2216 	int i;
2217 	ipsa_match_fn_t *mfpp = &(sq->matchers[0]);
2218 
2219 	for (i = 0; i < IPSA_NMATCH; i++)
2220 		sq->matchers[i] = NULL;
2221 
2222 	ASSERT((req & ~match) == 0);
2223 
2224 	sq->req = req;
2225 	sq->dstext = (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2226 	sq->srcext = (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2227 	sq->assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
2228 
2229 	if ((req & IPSA_Q_DST) && (sq->dstext == NULL)) {
2230 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
2231 		return (EINVAL);
2232 	}
2233 	if ((req & IPSA_Q_SRC) && (sq->srcext == NULL)) {
2234 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
2235 		return (EINVAL);
2236 	}
2237 	if ((req & IPSA_Q_SA) && (sq->assoc == NULL)) {
2238 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
2239 		return (EINVAL);
2240 	}
2241 
2242 	if (match & IPSA_Q_SA) {
2243 		*mfpp++ = sadb_match_spi;
2244 		sq->spi = sq->assoc->sadb_sa_spi;
2245 	}
2246 
2247 	if (sq->dstext != NULL)
2248 		sq->dst = (struct sockaddr_in *)(sq->dstext + 1);
2249 	else {
2250 		sq->dst = NULL;
2251 		sq->dst6 = NULL;
2252 		sq->dstaddr = NULL;
2253 	}
2254 
2255 	if (sq->srcext != NULL)
2256 		sq->src = (struct sockaddr_in *)(sq->srcext + 1);
2257 	else {
2258 		sq->src = NULL;
2259 		sq->src6 = NULL;
2260 		sq->srcaddr = NULL;
2261 	}
2262 
2263 	if (sq->dst != NULL)
2264 		sq->af = sq->dst->sin_family;
2265 	else if (sq->src != NULL)
2266 		sq->af = sq->src->sin_family;
2267 	else
2268 		sq->af = AF_INET;
2269 
2270 	if (sq->af == AF_INET6) {
2271 		if ((match & IPSA_Q_DST) && (sq->dstext != NULL)) {
2272 			*mfpp++ = sadb_match_dst_v6;
2273 			sq->dst6 = (struct sockaddr_in6 *)sq->dst;
2274 			sq->dstaddr = (uint32_t *)&(sq->dst6->sin6_addr);
2275 		} else {
2276 			match &= ~IPSA_Q_DST;
2277 			sq->dstaddr = ALL_ZEROES_PTR;
2278 		}
2279 
2280 		if ((match & IPSA_Q_SRC) && (sq->srcext != NULL)) {
2281 			sq->src6 = (struct sockaddr_in6 *)(sq->srcext + 1);
2282 			sq->srcaddr = (uint32_t *)&sq->src6->sin6_addr;
2283 			if (sq->src6->sin6_family != AF_INET6) {
2284 				*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
2285 				return (EINVAL);
2286 			}
2287 			*mfpp++ = sadb_match_src_v6;
2288 		} else {
2289 			match &= ~IPSA_Q_SRC;
2290 			sq->srcaddr = ALL_ZEROES_PTR;
2291 		}
2292 	} else {
2293 		sq->src6 = sq->dst6 = NULL;
2294 		if ((match & IPSA_Q_DST) && (sq->dstext != NULL)) {
2295 			*mfpp++ = sadb_match_dst_v4;
2296 			sq->dstaddr = (uint32_t *)&sq->dst->sin_addr;
2297 		} else {
2298 			match &= ~IPSA_Q_DST;
2299 			sq->dstaddr = ALL_ZEROES_PTR;
2300 		}
2301 		if ((match & IPSA_Q_SRC) && (sq->srcext != NULL)) {
2302 			sq->srcaddr = (uint32_t *)&sq->src->sin_addr;
2303 			if (sq->src->sin_family != AF_INET) {
2304 				*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
2305 				return (EINVAL);
2306 			}
2307 			*mfpp++ = sadb_match_src_v4;
2308 		} else {
2309 			match &= ~IPSA_Q_SRC;
2310 			sq->srcaddr = ALL_ZEROES_PTR;
2311 		}
2312 	}
2313 
2314 	sq->dstid = (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_DST];
2315 	if ((match & IPSA_Q_DSTID) && (sq->dstid != NULL)) {
2316 		sq->didstr = (char *)(sq->dstid + 1);
2317 		sq->didtype = sq->dstid->sadb_ident_type;
2318 		*mfpp++ = sadb_match_dstid;
2319 	}
2320 
2321 	sq->srcid = (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC];
2322 
2323 	if ((match & IPSA_Q_SRCID) && (sq->srcid != NULL)) {
2324 		sq->sidstr = (char *)(sq->srcid + 1);
2325 		sq->sidtype = sq->srcid->sadb_ident_type;
2326 		*mfpp++ = sadb_match_srcid;
2327 	}
2328 
2329 	sq->kmcext = (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
2330 	sq->kmc = 0;
2331 	sq->kmp = 0;
2332 
2333 	if ((match & IPSA_Q_KMC) && (sq->kmcext)) {
2334 		sq->kmc = sq->kmcext->sadb_x_kmc_cookie;
2335 		sq->kmp = sq->kmcext->sadb_x_kmc_proto;
2336 		*mfpp++ = sadb_match_kmc;
2337 	}
2338 
2339 	if (match & (IPSA_Q_INBOUND|IPSA_Q_OUTBOUND)) {
2340 		if (sq->af == AF_INET6)
2341 			sq->sp = &sq->spp->s_v6;
2342 		else
2343 			sq->sp = &sq->spp->s_v4;
2344 	} else {
2345 		sq->sp = NULL;
2346 	}
2347 
2348 	if (match & IPSA_Q_INBOUND) {
2349 		sq->inhash = INBOUND_HASH(sq->sp, sq->assoc->sadb_sa_spi);
2350 		sq->inbound = &sq->sp->sdb_if[sq->inhash];
2351 	} else {
2352 		sq->inhash = 0;
2353 		sq->inbound = NULL;
2354 	}
2355 
2356 	if (match & IPSA_Q_OUTBOUND) {
2357 		if (sq->af == AF_INET6) {
2358 			sq->outhash = OUTBOUND_HASH_V6(sq->sp, *(sq->dstaddr));
2359 		} else {
2360 			sq->outhash = OUTBOUND_HASH_V4(sq->sp, *(sq->dstaddr));
2361 		}
2362 		sq->outbound = &sq->sp->sdb_of[sq->outhash];
2363 	} else {
2364 		sq->outhash = 0;
2365 		sq->outbound = NULL;
2366 	}
2367 	sq->match = match;
2368 	return (0);
2369 }
2370 
2371 /*
2372  * Match an initialized query structure with a security association;
2373  * return B_TRUE on a match, B_FALSE on a miss.
2374  * Applies match functions set up by sadb_form_query() until one returns false.
2375  */
2376 boolean_t
2377 sadb_match_query(ipsa_query_t *sq, ipsa_t *sa)
2378 {
2379 	ipsa_match_fn_t *mfpp = &(sq->matchers[0]);
2380 	ipsa_match_fn_t mfp;
2381 
2382 	for (mfp = *mfpp++; mfp != NULL; mfp = *mfpp++) {
2383 		if (!mfp(sq, sa))
2384 			return (B_FALSE);
2385 	}
2386 	return (B_TRUE);
2387 }
2388 
2389 /*
2390  * Walker callback function to delete sa's based on src/dst address.
2391  * Assumes that we're called with *head locked, no other locks held;
2392  * Conveniently, and not coincidentally, this is both what sadb_walker
2393  * gives us and also what sadb_unlinkassoc expects.
2394  */
2395 struct sadb_purge_state
2396 {
2397 	ipsa_query_t sq;
2398 	boolean_t inbnd;
2399 	uint8_t sadb_sa_state;
2400 };
2401 
2402 static void
2403 sadb_purge_cb(isaf_t *head, ipsa_t *entry, void *cookie)
2404 {
2405 	struct sadb_purge_state *ps = (struct sadb_purge_state *)cookie;
2406 
2407 	ASSERT(MUTEX_HELD(&head->isaf_lock));
2408 
2409 	mutex_enter(&entry->ipsa_lock);
2410 
2411 	if (entry->ipsa_state == IPSA_STATE_LARVAL ||
2412 	    !sadb_match_query(&ps->sq, entry)) {
2413 		mutex_exit(&entry->ipsa_lock);
2414 		return;
2415 	}
2416 
2417 	if (ps->inbnd) {
2418 		sadb_delete_cluster(entry);
2419 	}
2420 	entry->ipsa_state = IPSA_STATE_DEAD;
2421 	(void) sadb_torch_assoc(head, entry);
2422 }
2423 
2424 /*
2425  * Common code to purge an SA with a matching src or dst address.
2426  * Don't kill larval SA's in such a purge.
2427  */
2428 int
2429 sadb_purge_sa(mblk_t *mp, keysock_in_t *ksi, sadb_t *sp,
2430     int *diagnostic, queue_t *pfkey_q)
2431 {
2432 	struct sadb_purge_state ps;
2433 	int error = sadb_form_query(ksi, 0,
2434 	    IPSA_Q_SRC|IPSA_Q_DST|IPSA_Q_SRCID|IPSA_Q_DSTID|IPSA_Q_KMC,
2435 	    &ps.sq, diagnostic);
2436 
2437 	if (error != 0)
2438 		return (error);
2439 
2440 	/*
2441 	 * This is simple, crude, and effective.
2442 	 * Unimplemented optimizations (TBD):
2443 	 * - we can limit how many places we search based on where we
2444 	 * think the SA is filed.
2445 	 * - if we get a dst address, we can hash based on dst addr to find
2446 	 * the correct bucket in the outbound table.
2447 	 */
2448 	ps.inbnd = B_TRUE;
2449 	sadb_walker(sp->sdb_if, sp->sdb_hashsize, sadb_purge_cb, &ps);
2450 	ps.inbnd = B_FALSE;
2451 	sadb_walker(sp->sdb_of, sp->sdb_hashsize, sadb_purge_cb, &ps);
2452 
2453 	ASSERT(mp->b_cont != NULL);
2454 	sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi,
2455 	    NULL);
2456 	return (0);
2457 }
2458 
2459 static void
2460 sadb_delpair_state_one(isaf_t *head, ipsa_t *entry, void *cookie)
2461 {
2462 	struct sadb_purge_state *ps = (struct sadb_purge_state *)cookie;
2463 	isaf_t  *inbound_bucket;
2464 	ipsa_t *peer_assoc;
2465 	ipsa_query_t *sq = &ps->sq;
2466 
2467 	ASSERT(MUTEX_HELD(&head->isaf_lock));
2468 
2469 	mutex_enter(&entry->ipsa_lock);
2470 
2471 	if ((entry->ipsa_state != ps->sadb_sa_state) ||
2472 	    ((sq->srcaddr != NULL) &&
2473 	    !IPSA_ARE_ADDR_EQUAL(entry->ipsa_srcaddr, sq->srcaddr, sq->af))) {
2474 		mutex_exit(&entry->ipsa_lock);
2475 		return;
2476 	}
2477 
2478 	/*
2479 	 * The isaf_t *, which is passed in , is always an outbound bucket,
2480 	 * and we are preserving the outbound-then-inbound hash-bucket lock
2481 	 * ordering. The sadb_walker() which triggers this function is called
2482 	 * only on the outbound fanout, and the corresponding inbound bucket
2483 	 * lock is safe to acquire here.
2484 	 */
2485 
2486 	if (entry->ipsa_haspeer) {
2487 		inbound_bucket = INBOUND_BUCKET(sq->sp, entry->ipsa_spi);
2488 		mutex_enter(&inbound_bucket->isaf_lock);
2489 		peer_assoc = ipsec_getassocbyspi(inbound_bucket,
2490 		    entry->ipsa_spi, entry->ipsa_srcaddr,
2491 		    entry->ipsa_dstaddr, entry->ipsa_addrfam);
2492 	} else {
2493 		inbound_bucket = INBOUND_BUCKET(sq->sp, entry->ipsa_otherspi);
2494 		mutex_enter(&inbound_bucket->isaf_lock);
2495 		peer_assoc = ipsec_getassocbyspi(inbound_bucket,
2496 		    entry->ipsa_otherspi, entry->ipsa_dstaddr,
2497 		    entry->ipsa_srcaddr, entry->ipsa_addrfam);
2498 	}
2499 
2500 	entry->ipsa_state = IPSA_STATE_DEAD;
2501 	(void) sadb_torch_assoc(head, entry);
2502 	if (peer_assoc != NULL) {
2503 		mutex_enter(&peer_assoc->ipsa_lock);
2504 		peer_assoc->ipsa_state = IPSA_STATE_DEAD;
2505 		(void) sadb_torch_assoc(inbound_bucket, peer_assoc);
2506 	}
2507 	mutex_exit(&inbound_bucket->isaf_lock);
2508 }
2509 
2510 static int
2511 sadb_delpair_state(mblk_t *mp, keysock_in_t *ksi, sadbp_t *spp,
2512     int *diagnostic, queue_t *pfkey_q)
2513 {
2514 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
2515 	struct sadb_purge_state ps;
2516 	int error;
2517 
2518 	ps.sq.spp = spp;		/* XXX param */
2519 
2520 	error = sadb_form_query(ksi, IPSA_Q_DST|IPSA_Q_SRC,
2521 	    IPSA_Q_SRC|IPSA_Q_DST|IPSA_Q_SRCID|IPSA_Q_DSTID|IPSA_Q_KMC,
2522 	    &ps.sq, diagnostic);
2523 	if (error != 0)
2524 		return (error);
2525 
2526 	ps.inbnd = B_FALSE;
2527 	ps.sadb_sa_state = assoc->sadb_sa_state;
2528 	sadb_walker(ps.sq.sp->sdb_of, ps.sq.sp->sdb_hashsize,
2529 	    sadb_delpair_state_one, &ps);
2530 
2531 	ASSERT(mp->b_cont != NULL);
2532 	sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
2533 	    ksi, NULL);
2534 	return (0);
2535 }
2536 
2537 /*
2538  * Common code to delete/get an SA.
2539  */
2540 int
2541 sadb_delget_sa(mblk_t *mp, keysock_in_t *ksi, sadbp_t *spp,
2542     int *diagnostic, queue_t *pfkey_q, uint8_t sadb_msg_type)
2543 {
2544 	ipsa_query_t sq;
2545 	ipsa_t *echo_target = NULL;
2546 	ipsap_t ipsapp;
2547 	uint_t	error = 0;
2548 
2549 	if (sadb_msg_type == SADB_X_DELPAIR_STATE)
2550 		return (sadb_delpair_state(mp, ksi, spp, diagnostic, pfkey_q));
2551 
2552 	sq.spp = spp;		/* XXX param */
2553 	error = sadb_form_query(ksi, IPSA_Q_DST|IPSA_Q_SA,
2554 	    IPSA_Q_SRC|IPSA_Q_DST|IPSA_Q_SA|IPSA_Q_INBOUND|IPSA_Q_OUTBOUND,
2555 	    &sq, diagnostic);
2556 	if (error != 0)
2557 		return (error);
2558 
2559 	error = get_ipsa_pair(&sq, &ipsapp, diagnostic);
2560 	if (error != 0) {
2561 		return (error);
2562 	}
2563 
2564 	echo_target = ipsapp.ipsap_sa_ptr;
2565 	if (echo_target == NULL)
2566 		echo_target = ipsapp.ipsap_psa_ptr;
2567 
2568 	if (sadb_msg_type == SADB_DELETE || sadb_msg_type == SADB_X_DELPAIR) {
2569 		/*
2570 		 * Bucket locks will be required if SA is actually unlinked.
2571 		 * get_ipsa_pair() returns valid hash bucket pointers even
2572 		 * if it can't find a pair SA pointer. To prevent a potential
2573 		 * deadlock, always lock the outbound bucket before the inbound.
2574 		 */
2575 		if (ipsapp.in_inbound_table) {
2576 			mutex_enter(&ipsapp.ipsap_pbucket->isaf_lock);
2577 			mutex_enter(&ipsapp.ipsap_bucket->isaf_lock);
2578 		} else {
2579 			mutex_enter(&ipsapp.ipsap_bucket->isaf_lock);
2580 			mutex_enter(&ipsapp.ipsap_pbucket->isaf_lock);
2581 		}
2582 
2583 		if (ipsapp.ipsap_sa_ptr != NULL) {
2584 			mutex_enter(&ipsapp.ipsap_sa_ptr->ipsa_lock);
2585 			if (ipsapp.ipsap_sa_ptr->ipsa_flags & IPSA_F_INBOUND) {
2586 				sadb_delete_cluster(ipsapp.ipsap_sa_ptr);
2587 			}
2588 			ipsapp.ipsap_sa_ptr->ipsa_state = IPSA_STATE_DEAD;
2589 			(void) sadb_torch_assoc(ipsapp.ipsap_bucket,
2590 			    ipsapp.ipsap_sa_ptr);
2591 			/*
2592 			 * sadb_torch_assoc() releases the ipsa_lock
2593 			 * and calls sadb_unlinkassoc() which does a
2594 			 * IPSA_REFRELE.
2595 			 */
2596 		}
2597 		if (ipsapp.ipsap_psa_ptr != NULL) {
2598 			mutex_enter(&ipsapp.ipsap_psa_ptr->ipsa_lock);
2599 			if (sadb_msg_type == SADB_X_DELPAIR ||
2600 			    ipsapp.ipsap_psa_ptr->ipsa_haspeer) {
2601 				if (ipsapp.ipsap_psa_ptr->ipsa_flags &
2602 				    IPSA_F_INBOUND) {
2603 					sadb_delete_cluster
2604 					    (ipsapp.ipsap_psa_ptr);
2605 				}
2606 				ipsapp.ipsap_psa_ptr->ipsa_state =
2607 				    IPSA_STATE_DEAD;
2608 				(void) sadb_torch_assoc(ipsapp.ipsap_pbucket,
2609 				    ipsapp.ipsap_psa_ptr);
2610 			} else {
2611 				/*
2612 				 * Only half of the "pair" has been deleted.
2613 				 * Update the remaining SA and remove references
2614 				 * to its pair SA, which is now gone.
2615 				 */
2616 				ipsapp.ipsap_psa_ptr->ipsa_otherspi = 0;
2617 				ipsapp.ipsap_psa_ptr->ipsa_flags &=
2618 				    ~IPSA_F_PAIRED;
2619 				mutex_exit(&ipsapp.ipsap_psa_ptr->ipsa_lock);
2620 			}
2621 		} else if (sadb_msg_type == SADB_X_DELPAIR) {
2622 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
2623 			error = ESRCH;
2624 		}
2625 		mutex_exit(&ipsapp.ipsap_bucket->isaf_lock);
2626 		mutex_exit(&ipsapp.ipsap_pbucket->isaf_lock);
2627 	}
2628 
2629 	ASSERT(mp->b_cont != NULL);
2630 
2631 	if (error == 0)
2632 		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)
2633 		    mp->b_cont->b_rptr, ksi, echo_target);
2634 
2635 	destroy_ipsa_pair(&ipsapp);
2636 
2637 	return (error);
2638 }
2639 
2640 /*
2641  * This function takes a sadb_sa_t and finds the ipsa_t structure
2642  * and the isaf_t (hash bucket) that its stored under. If the security
2643  * association has a peer, the ipsa_t structure and bucket for that security
2644  * association are also searched for. The "pair" of ipsa_t's and isaf_t's
2645  * are returned as a ipsap_t.
2646  *
2647  * The hash buckets are returned for convenience, if the calling function
2648  * needs to use the hash bucket locks, say to remove the SA's, it should
2649  * take care to observe the convention of locking outbound bucket then
2650  * inbound bucket. The flag in_inbound_table provides direction.
2651  *
2652  * Note that a "pair" is defined as one (but not both) of the following:
2653  *
2654  * A security association which has a soft reference to another security
2655  * association via its SPI.
2656  *
2657  * A security association that is not obviously "inbound" or "outbound" so
2658  * it appears in both hash tables, the "peer" being the same security
2659  * association in the other hash table.
2660  *
2661  * This function will return NULL if the ipsa_t can't be found in the
2662  * inbound or outbound  hash tables (not found). If only one ipsa_t is
2663  * found, the pair ipsa_t will be NULL. Both isaf_t values are valid
2664  * provided at least one ipsa_t is found.
2665  */
2666 static int
2667 get_ipsa_pair(ipsa_query_t *sq, ipsap_t *ipsapp, int *diagnostic)
2668 {
2669 	uint32_t pair_srcaddr[IPSA_MAX_ADDRLEN];
2670 	uint32_t pair_dstaddr[IPSA_MAX_ADDRLEN];
2671 	uint32_t pair_spi;
2672 
2673 	init_ipsa_pair(ipsapp);
2674 
2675 	ipsapp->in_inbound_table = B_FALSE;
2676 
2677 	/* Lock down both buckets. */
2678 	mutex_enter(&sq->outbound->isaf_lock);
2679 	mutex_enter(&sq->inbound->isaf_lock);
2680 
2681 	if (sq->assoc->sadb_sa_flags & IPSA_F_INBOUND) {
2682 		ipsapp->ipsap_sa_ptr = ipsec_getassocbyspi(sq->inbound,
2683 		    sq->assoc->sadb_sa_spi, sq->srcaddr, sq->dstaddr, sq->af);
2684 		if (ipsapp->ipsap_sa_ptr != NULL) {
2685 			ipsapp->ipsap_bucket = sq->inbound;
2686 			ipsapp->ipsap_pbucket = sq->outbound;
2687 			ipsapp->in_inbound_table = B_TRUE;
2688 		} else {
2689 			ipsapp->ipsap_sa_ptr = ipsec_getassocbyspi(sq->outbound,
2690 			    sq->assoc->sadb_sa_spi, sq->srcaddr, sq->dstaddr,
2691 			    sq->af);
2692 			ipsapp->ipsap_bucket = sq->outbound;
2693 			ipsapp->ipsap_pbucket = sq->inbound;
2694 		}
2695 	} else {
2696 		/* IPSA_F_OUTBOUND is set *or* no directions flags set. */
2697 		ipsapp->ipsap_sa_ptr =
2698 		    ipsec_getassocbyspi(sq->outbound,
2699 		    sq->assoc->sadb_sa_spi, sq->srcaddr, sq->dstaddr, sq->af);
2700 		if (ipsapp->ipsap_sa_ptr != NULL) {
2701 			ipsapp->ipsap_bucket = sq->outbound;
2702 			ipsapp->ipsap_pbucket = sq->inbound;
2703 		} else {
2704 			ipsapp->ipsap_sa_ptr = ipsec_getassocbyspi(sq->inbound,
2705 			    sq->assoc->sadb_sa_spi, sq->srcaddr, sq->dstaddr,
2706 			    sq->af);
2707 			ipsapp->ipsap_bucket = sq->inbound;
2708 			ipsapp->ipsap_pbucket = sq->outbound;
2709 			if (ipsapp->ipsap_sa_ptr != NULL)
2710 				ipsapp->in_inbound_table = B_TRUE;
2711 		}
2712 	}
2713 
2714 	if (ipsapp->ipsap_sa_ptr == NULL) {
2715 		mutex_exit(&sq->outbound->isaf_lock);
2716 		mutex_exit(&sq->inbound->isaf_lock);
2717 		*diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND;
2718 		return (ESRCH);
2719 	}
2720 
2721 	if ((ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) &&
2722 	    ipsapp->in_inbound_table) {
2723 		mutex_exit(&sq->outbound->isaf_lock);
2724 		mutex_exit(&sq->inbound->isaf_lock);
2725 		return (0);
2726 	}
2727 
2728 	mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2729 	if (ipsapp->ipsap_sa_ptr->ipsa_haspeer) {
2730 		/*
2731 		 * haspeer implies no sa_pairing, look for same spi
2732 		 * in other hashtable.
2733 		 */
2734 		ipsapp->ipsap_psa_ptr =
2735 		    ipsec_getassocbyspi(ipsapp->ipsap_pbucket,
2736 		    sq->assoc->sadb_sa_spi, sq->srcaddr, sq->dstaddr, sq->af);
2737 		mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2738 		mutex_exit(&sq->outbound->isaf_lock);
2739 		mutex_exit(&sq->inbound->isaf_lock);
2740 		return (0);
2741 	}
2742 	pair_spi = ipsapp->ipsap_sa_ptr->ipsa_otherspi;
2743 	IPSA_COPY_ADDR(&pair_srcaddr,
2744 	    ipsapp->ipsap_sa_ptr->ipsa_srcaddr, sq->af);
2745 	IPSA_COPY_ADDR(&pair_dstaddr,
2746 	    ipsapp->ipsap_sa_ptr->ipsa_dstaddr, sq->af);
2747 	mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2748 	mutex_exit(&sq->inbound->isaf_lock);
2749 	mutex_exit(&sq->outbound->isaf_lock);
2750 
2751 	if (pair_spi == 0) {
2752 		ASSERT(ipsapp->ipsap_bucket != NULL);
2753 		ASSERT(ipsapp->ipsap_pbucket != NULL);
2754 		return (0);
2755 	}
2756 
2757 	/* found sa in outbound sadb, peer should be inbound */
2758 
2759 	if (ipsapp->in_inbound_table) {
2760 		/* Found SA in inbound table, pair will be in outbound. */
2761 		if (sq->af == AF_INET6) {
2762 			ipsapp->ipsap_pbucket = OUTBOUND_BUCKET_V6(sq->sp,
2763 			    *(uint32_t *)pair_srcaddr);
2764 		} else {
2765 			ipsapp->ipsap_pbucket = OUTBOUND_BUCKET_V4(sq->sp,
2766 			    *(uint32_t *)pair_srcaddr);
2767 		}
2768 	} else {
2769 		ipsapp->ipsap_pbucket = INBOUND_BUCKET(sq->sp, pair_spi);
2770 	}
2771 	mutex_enter(&ipsapp->ipsap_pbucket->isaf_lock);
2772 	ipsapp->ipsap_psa_ptr = ipsec_getassocbyspi(ipsapp->ipsap_pbucket,
2773 	    pair_spi, pair_dstaddr, pair_srcaddr, sq->af);
2774 	mutex_exit(&ipsapp->ipsap_pbucket->isaf_lock);
2775 	ASSERT(ipsapp->ipsap_bucket != NULL);
2776 	ASSERT(ipsapp->ipsap_pbucket != NULL);
2777 	return (0);
2778 }
2779 
2780 /*
2781  * Perform NAT-traversal cached checksum offset calculations here.
2782  */
2783 static void
2784 sadb_nat_calculations(ipsa_t *newbie, sadb_address_t *natt_loc_ext,
2785     sadb_address_t *natt_rem_ext, uint32_t *src_addr_ptr,
2786     uint32_t *dst_addr_ptr)
2787 {
2788 	struct sockaddr_in *natt_loc, *natt_rem;
2789 	uint32_t *natt_loc_ptr = NULL, *natt_rem_ptr = NULL;
2790 	uint32_t running_sum = 0;
2791 
2792 #define	DOWN_SUM(x) (x) = ((x) & 0xFFFF) +	 ((x) >> 16)
2793 
2794 	if (natt_rem_ext != NULL) {
2795 		uint32_t l_src;
2796 		uint32_t l_rem;
2797 
2798 		natt_rem = (struct sockaddr_in *)(natt_rem_ext + 1);
2799 
2800 		/* Ensured by sadb_addrfix(). */
2801 		ASSERT(natt_rem->sin_family == AF_INET);
2802 
2803 		natt_rem_ptr = (uint32_t *)(&natt_rem->sin_addr);
2804 		newbie->ipsa_remote_nat_port = natt_rem->sin_port;
2805 		l_src = *src_addr_ptr;
2806 		l_rem = *natt_rem_ptr;
2807 
2808 		/* Instead of IPSA_COPY_ADDR(), just copy first 32 bits. */
2809 		newbie->ipsa_natt_addr_rem = *natt_rem_ptr;
2810 
2811 		l_src = ntohl(l_src);
2812 		DOWN_SUM(l_src);
2813 		DOWN_SUM(l_src);
2814 		l_rem = ntohl(l_rem);
2815 		DOWN_SUM(l_rem);
2816 		DOWN_SUM(l_rem);
2817 
2818 		/*
2819 		 * We're 1's complement for checksums, so check for wraparound
2820 		 * here.
2821 		 */
2822 		if (l_rem > l_src)
2823 			l_src--;
2824 
2825 		running_sum += l_src - l_rem;
2826 
2827 		DOWN_SUM(running_sum);
2828 		DOWN_SUM(running_sum);
2829 	}
2830 
2831 	if (natt_loc_ext != NULL) {
2832 		natt_loc = (struct sockaddr_in *)(natt_loc_ext + 1);
2833 
2834 		/* Ensured by sadb_addrfix(). */
2835 		ASSERT(natt_loc->sin_family == AF_INET);
2836 
2837 		natt_loc_ptr = (uint32_t *)(&natt_loc->sin_addr);
2838 		newbie->ipsa_local_nat_port = natt_loc->sin_port;
2839 
2840 		/* Instead of IPSA_COPY_ADDR(), just copy first 32 bits. */
2841 		newbie->ipsa_natt_addr_loc = *natt_loc_ptr;
2842 
2843 		/*
2844 		 * NAT-T port agility means we may have natt_loc_ext, but
2845 		 * only for a local-port change.
2846 		 */
2847 		if (natt_loc->sin_addr.s_addr != INADDR_ANY) {
2848 			uint32_t l_dst = ntohl(*dst_addr_ptr);
2849 			uint32_t l_loc = ntohl(*natt_loc_ptr);
2850 
2851 			DOWN_SUM(l_loc);
2852 			DOWN_SUM(l_loc);
2853 			DOWN_SUM(l_dst);
2854 			DOWN_SUM(l_dst);
2855 
2856 			/*
2857 			 * We're 1's complement for checksums, so check for
2858 			 * wraparound here.
2859 			 */
2860 			if (l_loc > l_dst)
2861 				l_dst--;
2862 
2863 			running_sum += l_dst - l_loc;
2864 			DOWN_SUM(running_sum);
2865 			DOWN_SUM(running_sum);
2866 		}
2867 	}
2868 
2869 	newbie->ipsa_inbound_cksum = running_sum;
2870 #undef DOWN_SUM
2871 }
2872 
2873 /*
2874  * This function is called from consumers that need to insert a fully-grown
2875  * security association into its tables.  This function takes into account that
2876  * SAs can be "inbound", "outbound", or "both".	 The "primary" and "secondary"
2877  * hash bucket parameters are set in order of what the SA will be most of the
2878  * time.  (For example, an SA with an unspecified source, and a multicast
2879  * destination will primarily be an outbound SA.  OTOH, if that destination
2880  * is unicast for this node, then the SA will primarily be inbound.)
2881  *
2882  * It takes a lot of parameters because even if clone is B_FALSE, this needs
2883  * to check both buckets for purposes of collision.
2884  *
2885  * Return 0 upon success.  Return various errnos (ENOMEM, EEXIST) for
2886  * various error conditions.  We may need to set samsg->sadb_x_msg_diagnostic
2887  * with additional diagnostic information because there is at least one EINVAL
2888  * case here.
2889  */
2890 int
2891 sadb_common_add(queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg,
2892     keysock_in_t *ksi, isaf_t *primary, isaf_t *secondary,
2893     ipsa_t *newbie, boolean_t clone, boolean_t is_inbound, int *diagnostic,
2894     netstack_t *ns, sadbp_t *spp)
2895 {
2896 	ipsa_t *newbie_clone = NULL, *scratch;
2897 	ipsap_t ipsapp;
2898 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
2899 	sadb_address_t *srcext =
2900 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2901 	sadb_address_t *dstext =
2902 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2903 	sadb_address_t *isrcext =
2904 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC];
2905 	sadb_address_t *idstext =
2906 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST];
2907 	sadb_x_kmc_t *kmcext =
2908 	    (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
2909 	sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
2910 	sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
2911 	sadb_sens_t *sens =
2912 	    (sadb_sens_t *)ksi->ks_in_extv[SADB_EXT_SENSITIVITY];
2913 	sadb_sens_t *osens =
2914 	    (sadb_sens_t *)ksi->ks_in_extv[SADB_X_EXT_OUTER_SENS];
2915 	sadb_x_pair_t *pair_ext =
2916 	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
2917 	sadb_x_replay_ctr_t *replayext =
2918 	    (sadb_x_replay_ctr_t *)ksi->ks_in_extv[SADB_X_EXT_REPLAY_VALUE];
2919 	uint8_t protocol =
2920 	    (samsg->sadb_msg_satype == SADB_SATYPE_AH) ? IPPROTO_AH:IPPROTO_ESP;
2921 	int salt_offset;
2922 	uint8_t *buf_ptr;
2923 	struct sockaddr_in *src, *dst, *isrc, *idst;
2924 	struct sockaddr_in6 *src6, *dst6, *isrc6, *idst6;
2925 	sadb_lifetime_t *soft =
2926 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
2927 	sadb_lifetime_t *hard =
2928 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
2929 	sadb_lifetime_t	*idle =
2930 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE];
2931 	sa_family_t af;
2932 	int error = 0;
2933 	boolean_t isupdate = (newbie != NULL);
2934 	uint32_t *src_addr_ptr, *dst_addr_ptr, *isrc_addr_ptr, *idst_addr_ptr;
2935 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
2936 	ip_stack_t 	*ipst = ns->netstack_ip;
2937 	ipsec_alginfo_t *alg;
2938 	int		rcode;
2939 	boolean_t	async = B_FALSE;
2940 
2941 	init_ipsa_pair(&ipsapp);
2942 
2943 	if (srcext == NULL) {
2944 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
2945 		return (EINVAL);
2946 	}
2947 	if (dstext == NULL) {
2948 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
2949 		return (EINVAL);
2950 	}
2951 	if (assoc == NULL) {
2952 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
2953 		return (EINVAL);
2954 	}
2955 
2956 	src = (struct sockaddr_in *)(srcext + 1);
2957 	src6 = (struct sockaddr_in6 *)(srcext + 1);
2958 	dst = (struct sockaddr_in *)(dstext + 1);
2959 	dst6 = (struct sockaddr_in6 *)(dstext + 1);
2960 	if (isrcext != NULL) {
2961 		isrc = (struct sockaddr_in *)(isrcext + 1);
2962 		isrc6 = (struct sockaddr_in6 *)(isrcext + 1);
2963 		ASSERT(idstext != NULL);
2964 		idst = (struct sockaddr_in *)(idstext + 1);
2965 		idst6 = (struct sockaddr_in6 *)(idstext + 1);
2966 	} else {
2967 		isrc = NULL;
2968 		isrc6 = NULL;
2969 	}
2970 
2971 	af = src->sin_family;
2972 
2973 	if (af == AF_INET) {
2974 		src_addr_ptr = (uint32_t *)&src->sin_addr;
2975 		dst_addr_ptr = (uint32_t *)&dst->sin_addr;
2976 	} else {
2977 		ASSERT(af == AF_INET6);
2978 		src_addr_ptr = (uint32_t *)&src6->sin6_addr;
2979 		dst_addr_ptr = (uint32_t *)&dst6->sin6_addr;
2980 	}
2981 
2982 	if (!isupdate && (clone == B_TRUE || is_inbound == B_TRUE) &&
2983 	    cl_inet_checkspi &&
2984 	    (assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE_ELSEWHERE)) {
2985 		rcode = cl_inet_checkspi(ns->netstack_stackid, protocol,
2986 		    assoc->sadb_sa_spi, NULL);
2987 		if (rcode == -1) {
2988 			return (EEXIST);
2989 		}
2990 	}
2991 
2992 	/*
2993 	 * Check to see if the new SA will be cloned AND paired. The
2994 	 * reason a SA will be cloned is the source or destination addresses
2995 	 * are not specific enough to determine if the SA goes in the outbound
2996 	 * or the inbound hash table, so its cloned and put in both. If
2997 	 * the SA is paired, it's soft linked to another SA for the other
2998 	 * direction. Keeping track and looking up SA's that are direction
2999 	 * unspecific and linked is too hard.
3000 	 */
3001 	if (clone && (pair_ext != NULL)) {
3002 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
3003 		return (EINVAL);
3004 	}
3005 
3006 	if (!isupdate) {
3007 		newbie = sadb_makelarvalassoc(assoc->sadb_sa_spi,
3008 		    src_addr_ptr, dst_addr_ptr, af, ns);
3009 		if (newbie == NULL)
3010 			return (ENOMEM);
3011 	}
3012 
3013 	mutex_enter(&newbie->ipsa_lock);
3014 
3015 	if (isrc != NULL) {
3016 		if (isrc->sin_family == AF_INET) {
3017 			if (srcext->sadb_address_proto != IPPROTO_ENCAP) {
3018 				if (srcext->sadb_address_proto != 0) {
3019 					/*
3020 					 * Mismatched outer-packet protocol
3021 					 * and inner-packet address family.
3022 					 */
3023 					mutex_exit(&newbie->ipsa_lock);
3024 					error = EPROTOTYPE;
3025 					*diagnostic =
3026 					    SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH;
3027 					goto error;
3028 				} else {
3029 					/* Fill in with explicit protocol. */
3030 					srcext->sadb_address_proto =
3031 					    IPPROTO_ENCAP;
3032 					dstext->sadb_address_proto =
3033 					    IPPROTO_ENCAP;
3034 				}
3035 			}
3036 			isrc_addr_ptr = (uint32_t *)&isrc->sin_addr;
3037 			idst_addr_ptr = (uint32_t *)&idst->sin_addr;
3038 		} else {
3039 			ASSERT(isrc->sin_family == AF_INET6);
3040 			if (srcext->sadb_address_proto != IPPROTO_IPV6) {
3041 				if (srcext->sadb_address_proto != 0) {
3042 					/*
3043 					 * Mismatched outer-packet protocol
3044 					 * and inner-packet address family.
3045 					 */
3046 					mutex_exit(&newbie->ipsa_lock);
3047 					error = EPROTOTYPE;
3048 					*diagnostic =
3049 					    SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH;
3050 					goto error;
3051 				} else {
3052 					/* Fill in with explicit protocol. */
3053 					srcext->sadb_address_proto =
3054 					    IPPROTO_IPV6;
3055 					dstext->sadb_address_proto =
3056 					    IPPROTO_IPV6;
3057 				}
3058 			}
3059 			isrc_addr_ptr = (uint32_t *)&isrc6->sin6_addr;
3060 			idst_addr_ptr = (uint32_t *)&idst6->sin6_addr;
3061 		}
3062 		newbie->ipsa_innerfam = isrc->sin_family;
3063 
3064 		IPSA_COPY_ADDR(newbie->ipsa_innersrc, isrc_addr_ptr,
3065 		    newbie->ipsa_innerfam);
3066 		IPSA_COPY_ADDR(newbie->ipsa_innerdst, idst_addr_ptr,
3067 		    newbie->ipsa_innerfam);
3068 		newbie->ipsa_innersrcpfx = isrcext->sadb_address_prefixlen;
3069 		newbie->ipsa_innerdstpfx = idstext->sadb_address_prefixlen;
3070 
3071 		/* Unique value uses inner-ports for Tunnel Mode... */
3072 		newbie->ipsa_unique_id = SA_UNIQUE_ID(isrc->sin_port,
3073 		    idst->sin_port, dstext->sadb_address_proto,
3074 		    idstext->sadb_address_proto);
3075 		newbie->ipsa_unique_mask = SA_UNIQUE_MASK(isrc->sin_port,
3076 		    idst->sin_port, dstext->sadb_address_proto,
3077 		    idstext->sadb_address_proto);
3078 	} else {
3079 		/* ... and outer-ports for Transport Mode. */
3080 		newbie->ipsa_unique_id = SA_UNIQUE_ID(src->sin_port,
3081 		    dst->sin_port, dstext->sadb_address_proto, 0);
3082 		newbie->ipsa_unique_mask = SA_UNIQUE_MASK(src->sin_port,
3083 		    dst->sin_port, dstext->sadb_address_proto, 0);
3084 	}
3085 	if (newbie->ipsa_unique_mask != (uint64_t)0)
3086 		newbie->ipsa_flags |= IPSA_F_UNIQUE;
3087 
3088 	sadb_nat_calculations(newbie,
3089 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC],
3090 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM],
3091 	    src_addr_ptr, dst_addr_ptr);
3092 
3093 	newbie->ipsa_type = samsg->sadb_msg_satype;
3094 
3095 	ASSERT((assoc->sadb_sa_state == SADB_SASTATE_MATURE) ||
3096 	    (assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE));
3097 	newbie->ipsa_auth_alg = assoc->sadb_sa_auth;
3098 	newbie->ipsa_encr_alg = assoc->sadb_sa_encrypt;
3099 
3100 	newbie->ipsa_flags |= assoc->sadb_sa_flags;
3101 	if (newbie->ipsa_flags & SADB_X_SAFLAGS_NATT_LOC &&
3102 	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC] == NULL) {
3103 		mutex_exit(&newbie->ipsa_lock);
3104 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_LOC;
3105 		error = EINVAL;
3106 		goto error;
3107 	}
3108 	if (newbie->ipsa_flags & SADB_X_SAFLAGS_NATT_REM &&
3109 	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM] == NULL) {
3110 		mutex_exit(&newbie->ipsa_lock);
3111 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_REM;
3112 		error = EINVAL;
3113 		goto error;
3114 	}
3115 	if (newbie->ipsa_flags & SADB_X_SAFLAGS_TUNNEL &&
3116 	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC] == NULL) {
3117 		mutex_exit(&newbie->ipsa_lock);
3118 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
3119 		error = EINVAL;
3120 		goto error;
3121 	}
3122 	/*
3123 	 * If unspecified source address, force replay_wsize to 0.
3124 	 * This is because an SA that has multiple sources of secure
3125 	 * traffic cannot enforce a replay counter w/o synchronizing the
3126 	 * senders.
3127 	 */
3128 	if (ksi->ks_in_srctype != KS_IN_ADDR_UNSPEC)
3129 		newbie->ipsa_replay_wsize = assoc->sadb_sa_replay;
3130 	else
3131 		newbie->ipsa_replay_wsize = 0;
3132 
3133 	newbie->ipsa_addtime = gethrestime_sec();
3134 
3135 	if (kmcext != NULL) {
3136 		newbie->ipsa_kmp = kmcext->sadb_x_kmc_proto;
3137 		newbie->ipsa_kmc = kmcext->sadb_x_kmc_cookie;
3138 	}
3139 
3140 	/*
3141 	 * XXX CURRENT lifetime checks MAY BE needed for an UPDATE.
3142 	 * The spec says that one can update current lifetimes, but
3143 	 * that seems impractical, especially in the larval-to-mature
3144 	 * update that this function performs.
3145 	 */
3146 	if (soft != NULL) {
3147 		newbie->ipsa_softaddlt = soft->sadb_lifetime_addtime;
3148 		newbie->ipsa_softuselt = soft->sadb_lifetime_usetime;
3149 		newbie->ipsa_softbyteslt = soft->sadb_lifetime_bytes;
3150 		newbie->ipsa_softalloc = soft->sadb_lifetime_allocations;
3151 		SET_EXPIRE(newbie, softaddlt, softexpiretime);
3152 	}
3153 	if (hard != NULL) {
3154 		newbie->ipsa_hardaddlt = hard->sadb_lifetime_addtime;
3155 		newbie->ipsa_harduselt = hard->sadb_lifetime_usetime;
3156 		newbie->ipsa_hardbyteslt = hard->sadb_lifetime_bytes;
3157 		newbie->ipsa_hardalloc = hard->sadb_lifetime_allocations;
3158 		SET_EXPIRE(newbie, hardaddlt, hardexpiretime);
3159 	}
3160 	if (idle != NULL) {
3161 		newbie->ipsa_idleaddlt = idle->sadb_lifetime_addtime;
3162 		newbie->ipsa_idleuselt = idle->sadb_lifetime_usetime;
3163 		newbie->ipsa_idleexpiretime = newbie->ipsa_addtime +
3164 		    newbie->ipsa_idleaddlt;
3165 		newbie->ipsa_idletime = newbie->ipsa_idleaddlt;
3166 	}
3167 
3168 	newbie->ipsa_authtmpl = NULL;
3169 	newbie->ipsa_encrtmpl = NULL;
3170 
3171 #ifdef IPSEC_LATENCY_TEST
3172 	if (akey != NULL && newbie->ipsa_auth_alg != SADB_AALG_NONE) {
3173 #else
3174 	if (akey != NULL) {
3175 #endif
3176 		async = (ipss->ipsec_algs_exec_mode[IPSEC_ALG_AUTH] ==
3177 		    IPSEC_ALGS_EXEC_ASYNC);
3178 
3179 		newbie->ipsa_authkeybits = akey->sadb_key_bits;
3180 		newbie->ipsa_authkeylen = SADB_1TO8(akey->sadb_key_bits);
3181 		/* In case we have to round up to the next byte... */
3182 		if ((akey->sadb_key_bits & 0x7) != 0)
3183 			newbie->ipsa_authkeylen++;
3184 		newbie->ipsa_authkey = kmem_alloc(newbie->ipsa_authkeylen,
3185 		    KM_NOSLEEP);
3186 		if (newbie->ipsa_authkey == NULL) {
3187 			error = ENOMEM;
3188 			mutex_exit(&newbie->ipsa_lock);
3189 			goto error;
3190 		}
3191 		bcopy(akey + 1, newbie->ipsa_authkey, newbie->ipsa_authkeylen);
3192 		bzero(akey + 1, newbie->ipsa_authkeylen);
3193 
3194 		/*
3195 		 * Pre-initialize the kernel crypto framework key
3196 		 * structure.
3197 		 */
3198 		newbie->ipsa_kcfauthkey.ck_format = CRYPTO_KEY_RAW;
3199 		newbie->ipsa_kcfauthkey.ck_length = newbie->ipsa_authkeybits;
3200 		newbie->ipsa_kcfauthkey.ck_data = newbie->ipsa_authkey;
3201 
3202 		rw_enter(&ipss->ipsec_alg_lock, RW_READER);
3203 		alg = ipss->ipsec_alglists[IPSEC_ALG_AUTH]
3204 		    [newbie->ipsa_auth_alg];
3205 		if (alg != NULL && ALG_VALID(alg)) {
3206 			newbie->ipsa_amech.cm_type = alg->alg_mech_type;
3207 			newbie->ipsa_amech.cm_param =
3208 			    (char *)&newbie->ipsa_mac_len;
3209 			newbie->ipsa_amech.cm_param_len = sizeof (size_t);
3210 			newbie->ipsa_mac_len = (size_t)alg->alg_datalen;
3211 		} else {
3212 			newbie->ipsa_amech.cm_type = CRYPTO_MECHANISM_INVALID;
3213 		}
3214 		error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_AUTH);
3215 		rw_exit(&ipss->ipsec_alg_lock);
3216 		if (error != 0) {
3217 			mutex_exit(&newbie->ipsa_lock);
3218 			/*
3219 			 * An error here indicates that alg is the wrong type
3220 			 * (IE: not authentication) or its not in the alg tables
3221 			 * created by ipsecalgs(1m), or Kcf does not like the
3222 			 * parameters passed in with this algorithm, which is
3223 			 * probably a coding error!
3224 			 */
3225 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_CTX;
3226 
3227 			goto error;
3228 		}
3229 	}
3230 
3231 	if (ekey != NULL) {
3232 		rw_enter(&ipss->ipsec_alg_lock, RW_READER);
3233 		async = async || (ipss->ipsec_algs_exec_mode[IPSEC_ALG_ENCR] ==
3234 		    IPSEC_ALGS_EXEC_ASYNC);
3235 		alg = ipss->ipsec_alglists[IPSEC_ALG_ENCR]
3236 		    [newbie->ipsa_encr_alg];
3237 
3238 		if (alg != NULL && ALG_VALID(alg)) {
3239 			newbie->ipsa_emech.cm_type = alg->alg_mech_type;
3240 			newbie->ipsa_datalen = alg->alg_datalen;
3241 			if (alg->alg_flags & ALG_FLAG_COUNTERMODE)
3242 				newbie->ipsa_flags |= IPSA_F_COUNTERMODE;
3243 
3244 			if (alg->alg_flags & ALG_FLAG_COMBINED) {
3245 				newbie->ipsa_flags |= IPSA_F_COMBINED;
3246 				newbie->ipsa_mac_len =  alg->alg_icvlen;
3247 			}
3248 
3249 			if (alg->alg_flags & ALG_FLAG_CCM)
3250 				newbie->ipsa_noncefunc = ccm_params_init;
3251 			else if (alg->alg_flags & ALG_FLAG_GCM)
3252 				newbie->ipsa_noncefunc = gcm_params_init;
3253 			else newbie->ipsa_noncefunc = cbc_params_init;
3254 
3255 			newbie->ipsa_saltlen = alg->alg_saltlen;
3256 			newbie->ipsa_saltbits = SADB_8TO1(newbie->ipsa_saltlen);
3257 			newbie->ipsa_iv_len = alg->alg_ivlen;
3258 			newbie->ipsa_nonce_len = newbie->ipsa_saltlen +
3259 			    newbie->ipsa_iv_len;
3260 			newbie->ipsa_emech.cm_param = NULL;
3261 			newbie->ipsa_emech.cm_param_len = 0;
3262 		} else {
3263 			newbie->ipsa_emech.cm_type = CRYPTO_MECHANISM_INVALID;
3264 		}
3265 		rw_exit(&ipss->ipsec_alg_lock);
3266 
3267 		/*
3268 		 * The byte stream following the sadb_key_t is made up of:
3269 		 * key bytes, [salt bytes], [IV initial value]
3270 		 * All of these have variable length. The IV is typically
3271 		 * randomly generated by this function and not passed in.
3272 		 * By supporting the injection of a known IV, the whole
3273 		 * IPsec subsystem and the underlying crypto subsystem
3274 		 * can be tested with known test vectors.
3275 		 *
3276 		 * The keying material has been checked by ext_check()
3277 		 * and ipsec_valid_key_size(), after removing salt/IV
3278 		 * bits, whats left is the encryption key. If this is too
3279 		 * short, ipsec_create_ctx_tmpl() will fail and the SA
3280 		 * won't get created.
3281 		 *
3282 		 * set ipsa_encrkeylen to length of key only.
3283 		 */
3284 		newbie->ipsa_encrkeybits = ekey->sadb_key_bits;
3285 		newbie->ipsa_encrkeybits -= ekey->sadb_key_reserved;
3286 		newbie->ipsa_encrkeybits -= newbie->ipsa_saltbits;
3287 		newbie->ipsa_encrkeylen = SADB_1TO8(newbie->ipsa_encrkeybits);
3288 
3289 		/* In case we have to round up to the next byte... */
3290 		if ((ekey->sadb_key_bits & 0x7) != 0)
3291 			newbie->ipsa_encrkeylen++;
3292 
3293 		newbie->ipsa_encrkey = kmem_alloc(newbie->ipsa_encrkeylen,
3294 		    KM_NOSLEEP);
3295 		if (newbie->ipsa_encrkey == NULL) {
3296 			error = ENOMEM;
3297 			mutex_exit(&newbie->ipsa_lock);
3298 			goto error;
3299 		}
3300 
3301 		buf_ptr = (uint8_t *)(ekey + 1);
3302 		bcopy(buf_ptr, newbie->ipsa_encrkey, newbie->ipsa_encrkeylen);
3303 
3304 		if (newbie->ipsa_flags & IPSA_F_COMBINED) {
3305 			/*
3306 			 * Combined mode algs need a nonce. Copy the salt and
3307 			 * IV into a buffer. The ipsa_nonce is a pointer into
3308 			 * this buffer, some bytes at the start of the buffer
3309 			 * may be unused, depends on the salt length. The IV
3310 			 * is 64 bit aligned so it can be incremented as a
3311 			 * uint64_t. Zero out key in samsg_t before freeing.
3312 			 */
3313 
3314 			newbie->ipsa_nonce_buf = kmem_alloc(
3315 			    sizeof (ipsec_nonce_t), KM_NOSLEEP);
3316 			if (newbie->ipsa_nonce_buf == NULL) {
3317 				error = ENOMEM;
3318 				mutex_exit(&newbie->ipsa_lock);
3319 				goto error;
3320 			}
3321 			/*
3322 			 * Initialize nonce and salt pointers to point
3323 			 * to the nonce buffer. This is just in case we get
3324 			 * bad data, the pointers will be valid, the data
3325 			 * won't be.
3326 			 *
3327 			 * See sadb.h for layout of nonce.
3328 			 */
3329 			newbie->ipsa_iv = &newbie->ipsa_nonce_buf->iv;
3330 			newbie->ipsa_salt = (uint8_t *)newbie->ipsa_nonce_buf;
3331 			newbie->ipsa_nonce = newbie->ipsa_salt;
3332 			if (newbie->ipsa_saltlen != 0) {
3333 				salt_offset = MAXSALTSIZE -
3334 				    newbie->ipsa_saltlen;
3335 				newbie->ipsa_salt = (uint8_t *)
3336 				    &newbie->ipsa_nonce_buf->salt[salt_offset];
3337 				newbie->ipsa_nonce = newbie->ipsa_salt;
3338 				buf_ptr += newbie->ipsa_encrkeylen;
3339 				bcopy(buf_ptr, newbie->ipsa_salt,
3340 				    newbie->ipsa_saltlen);
3341 			}
3342 			/*
3343 			 * The IV for CCM/GCM mode increments, it should not
3344 			 * repeat. Get a random value for the IV, make a
3345 			 * copy, the SA will expire when/if the IV ever
3346 			 * wraps back to the initial value. If an Initial IV
3347 			 * is passed in via PF_KEY, save this in the SA.
3348 			 * Initialising IV for inbound is pointless as its
3349 			 * taken from the inbound packet.
3350 			 */
3351 			if (!is_inbound) {
3352 				if (ekey->sadb_key_reserved != 0) {
3353 					buf_ptr += newbie->ipsa_saltlen;
3354 					bcopy(buf_ptr, (uint8_t *)newbie->
3355 					    ipsa_iv, SADB_1TO8(ekey->
3356 					    sadb_key_reserved));
3357 				} else {
3358 					(void) random_get_pseudo_bytes(
3359 					    (uint8_t *)newbie->ipsa_iv,
3360 					    newbie->ipsa_iv_len);
3361 				}
3362 				newbie->ipsa_iv_softexpire =
3363 				    (*newbie->ipsa_iv) << 9;
3364 				newbie->ipsa_iv_hardexpire = *newbie->ipsa_iv;
3365 			}
3366 		}
3367 		bzero((ekey + 1), SADB_1TO8(ekey->sadb_key_bits));
3368 
3369 		/*
3370 		 * Pre-initialize the kernel crypto framework key
3371 		 * structure.
3372 		 */
3373 		newbie->ipsa_kcfencrkey.ck_format = CRYPTO_KEY_RAW;
3374 		newbie->ipsa_kcfencrkey.ck_length = newbie->ipsa_encrkeybits;
3375 		newbie->ipsa_kcfencrkey.ck_data = newbie->ipsa_encrkey;
3376 
3377 		rw_enter(&ipss->ipsec_alg_lock, RW_READER);
3378 		error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_ENCR);
3379 		rw_exit(&ipss->ipsec_alg_lock);
3380 		if (error != 0) {
3381 			mutex_exit(&newbie->ipsa_lock);
3382 			/* See above for error explanation. */
3383 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_CTX;
3384 			goto error;
3385 		}
3386 	}
3387 
3388 	if (async)
3389 		newbie->ipsa_flags |= IPSA_F_ASYNC;
3390 
3391 	/*
3392 	 * Ptrs to processing functions.
3393 	 */
3394 	if (newbie->ipsa_type == SADB_SATYPE_ESP)
3395 		ipsecesp_init_funcs(newbie);
3396 	else
3397 		ipsecah_init_funcs(newbie);
3398 	ASSERT(newbie->ipsa_output_func != NULL &&
3399 	    newbie->ipsa_input_func != NULL);
3400 
3401 	/*
3402 	 * Certificate ID stuff.
3403 	 */
3404 	if (ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC] != NULL) {
3405 		sadb_ident_t *id =
3406 		    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC];
3407 
3408 		/*
3409 		 * Can assume strlen() will return okay because ext_check() in
3410 		 * keysock.c prepares the string for us.
3411 		 */
3412 		newbie->ipsa_src_cid = ipsid_lookup(id->sadb_ident_type,
3413 		    (char *)(id+1), ns);
3414 		if (newbie->ipsa_src_cid == NULL) {
3415 			error = ENOMEM;
3416 			mutex_exit(&newbie->ipsa_lock);
3417 			goto error;
3418 		}
3419 	}
3420 
3421 	if (ksi->ks_in_extv[SADB_EXT_IDENTITY_DST] != NULL) {
3422 		sadb_ident_t *id =
3423 		    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_DST];
3424 
3425 		/*
3426 		 * Can assume strlen() will return okay because ext_check() in
3427 		 * keysock.c prepares the string for us.
3428 		 */
3429 		newbie->ipsa_dst_cid = ipsid_lookup(id->sadb_ident_type,
3430 		    (char *)(id+1), ns);
3431 		if (newbie->ipsa_dst_cid == NULL) {
3432 			error = ENOMEM;
3433 			mutex_exit(&newbie->ipsa_lock);
3434 			goto error;
3435 		}
3436 	}
3437 
3438 	/*
3439 	 * sensitivity label handling code:
3440 	 * Convert sens + bitmap into cred_t, and associate it
3441 	 * with the new SA.
3442 	 */
3443 	if (sens != NULL) {
3444 		uint64_t *bitmap = (uint64_t *)(sens + 1);
3445 
3446 		newbie->ipsa_tsl = sadb_label_from_sens(sens, bitmap);
3447 	}
3448 
3449 	/*
3450 	 * Likewise for outer sensitivity.
3451 	 */
3452 	if (osens != NULL) {
3453 		uint64_t *bitmap = (uint64_t *)(osens + 1);
3454 		ts_label_t *tsl, *effective_tsl;
3455 		uint32_t *peer_addr_ptr;
3456 		zoneid_t zoneid = GLOBAL_ZONEID;
3457 		zone_t *zone;
3458 
3459 		peer_addr_ptr = is_inbound ? src_addr_ptr : dst_addr_ptr;
3460 
3461 		tsl = sadb_label_from_sens(osens, bitmap);
3462 		newbie->ipsa_mac_exempt = CONN_MAC_DEFAULT;
3463 
3464 		if (osens->sadb_x_sens_flags & SADB_X_SENS_IMPLICIT) {
3465 			newbie->ipsa_mac_exempt = CONN_MAC_IMPLICIT;
3466 		}
3467 
3468 		error = tsol_check_dest(tsl, peer_addr_ptr,
3469 		    (af == AF_INET6)?IPV6_VERSION:IPV4_VERSION,
3470 		    newbie->ipsa_mac_exempt, B_TRUE, &effective_tsl);
3471 		if (error != 0) {
3472 			label_rele(tsl);
3473 			mutex_exit(&newbie->ipsa_lock);
3474 			goto error;
3475 		}
3476 
3477 		if (effective_tsl != NULL) {
3478 			label_rele(tsl);
3479 			tsl = effective_tsl;
3480 		}
3481 
3482 		newbie->ipsa_otsl = tsl;
3483 
3484 		zone = zone_find_by_label(tsl);
3485 		if (zone != NULL) {
3486 			zoneid = zone->zone_id;
3487 			zone_rele(zone);
3488 		}
3489 		/*
3490 		 * For exclusive stacks we set the zoneid to zero to operate
3491 		 * as if in the global zone for tsol_compute_label_v4/v6
3492 		 */
3493 		if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID)
3494 			zoneid = GLOBAL_ZONEID;
3495 
3496 		if (af == AF_INET6) {
3497 			error = tsol_compute_label_v6(tsl, zoneid,
3498 			    (in6_addr_t *)peer_addr_ptr,
3499 			    newbie->ipsa_opt_storage, ipst);
3500 		} else {
3501 			error = tsol_compute_label_v4(tsl, zoneid,
3502 			    *peer_addr_ptr, newbie->ipsa_opt_storage, ipst);
3503 		}
3504 		if (error != 0) {
3505 			mutex_exit(&newbie->ipsa_lock);
3506 			goto error;
3507 		}
3508 	}
3509 
3510 
3511 	if (replayext != NULL) {
3512 		if ((replayext->sadb_x_rc_replay32 == 0) &&
3513 		    (replayext->sadb_x_rc_replay64 != 0)) {
3514 			error = EOPNOTSUPP;
3515 			*diagnostic = SADB_X_DIAGNOSTIC_INVALID_REPLAY;
3516 			mutex_exit(&newbie->ipsa_lock);
3517 			goto error;
3518 		}
3519 		newbie->ipsa_replay = replayext->sadb_x_rc_replay32;
3520 	}
3521 
3522 	/* now that the SA has been updated, set its new state */
3523 	newbie->ipsa_state = assoc->sadb_sa_state;
3524 
3525 	if (clone) {
3526 		newbie->ipsa_haspeer = B_TRUE;
3527 	} else {
3528 		if (!is_inbound) {
3529 			lifetime_fuzz(newbie);
3530 		}
3531 	}
3532 	/*
3533 	 * The less locks I hold when doing an insertion and possible cloning,
3534 	 * the better!
3535 	 */
3536 	mutex_exit(&newbie->ipsa_lock);
3537 
3538 	if (clone) {
3539 		newbie_clone = sadb_cloneassoc(newbie);
3540 
3541 		if (newbie_clone == NULL) {
3542 			error = ENOMEM;
3543 			goto error;
3544 		}
3545 	}
3546 
3547 	/*
3548 	 * Enter the bucket locks.  The order of entry is outbound,
3549 	 * inbound.  We map "primary" and "secondary" into outbound and inbound
3550 	 * based on the destination address type.  If the destination address
3551 	 * type is for a node that isn't mine (or potentially mine), the
3552 	 * "primary" bucket is the outbound one.
3553 	 */
3554 	if (!is_inbound) {
3555 		/* primary == outbound */
3556 		mutex_enter(&primary->isaf_lock);
3557 		mutex_enter(&secondary->isaf_lock);
3558 	} else {
3559 		/* primary == inbound */
3560 		mutex_enter(&secondary->isaf_lock);
3561 		mutex_enter(&primary->isaf_lock);
3562 	}
3563 
3564 	/*
3565 	 * sadb_insertassoc() doesn't increment the reference
3566 	 * count.  We therefore have to increment the
3567 	 * reference count one more time to reflect the
3568 	 * pointers of the table that reference this SA.
3569 	 */
3570 	IPSA_REFHOLD(newbie);
3571 
3572 	if (isupdate) {
3573 		/*
3574 		 * Unlink from larval holding cell in the "inbound" fanout.
3575 		 */
3576 		ASSERT(newbie->ipsa_linklock == &primary->isaf_lock ||
3577 		    newbie->ipsa_linklock == &secondary->isaf_lock);
3578 		sadb_unlinkassoc(newbie);
3579 	}
3580 
3581 	mutex_enter(&newbie->ipsa_lock);
3582 	error = sadb_insertassoc(newbie, primary);
3583 	mutex_exit(&newbie->ipsa_lock);
3584 
3585 	if (error != 0) {
3586 		/*
3587 		 * Since sadb_insertassoc() failed, we must decrement the
3588 		 * refcount again so the cleanup code will actually free
3589 		 * the offending SA.
3590 		 */
3591 		IPSA_REFRELE(newbie);
3592 		goto error_unlock;
3593 	}
3594 
3595 	if (newbie_clone != NULL) {
3596 		mutex_enter(&newbie_clone->ipsa_lock);
3597 		error = sadb_insertassoc(newbie_clone, secondary);
3598 		mutex_exit(&newbie_clone->ipsa_lock);
3599 		if (error != 0) {
3600 			/* Collision in secondary table. */
3601 			sadb_unlinkassoc(newbie);  /* This does REFRELE. */
3602 			goto error_unlock;
3603 		}
3604 		IPSA_REFHOLD(newbie_clone);
3605 	} else {
3606 		ASSERT(primary != secondary);
3607 		scratch = ipsec_getassocbyspi(secondary, newbie->ipsa_spi,
3608 		    ALL_ZEROES_PTR, newbie->ipsa_dstaddr, af);
3609 		if (scratch != NULL) {
3610 			/* Collision in secondary table. */
3611 			sadb_unlinkassoc(newbie);  /* This does REFRELE. */
3612 			/* Set the error, since ipsec_getassocbyspi() can't. */
3613 			error = EEXIST;
3614 			goto error_unlock;
3615 		}
3616 	}
3617 
3618 	/* OKAY!  So let's do some reality check assertions. */
3619 
3620 	ASSERT(MUTEX_NOT_HELD(&newbie->ipsa_lock));
3621 	ASSERT(newbie_clone == NULL ||
3622 	    (MUTEX_NOT_HELD(&newbie_clone->ipsa_lock)));
3623 
3624 error_unlock:
3625 
3626 	/*
3627 	 * We can exit the locks in any order.	Only entrance needs to
3628 	 * follow any protocol.
3629 	 */
3630 	mutex_exit(&secondary->isaf_lock);
3631 	mutex_exit(&primary->isaf_lock);
3632 
3633 	if (pair_ext != NULL && error == 0) {
3634 		/* update pair_spi if it exists. */
3635 		ipsa_query_t sq;
3636 
3637 		sq.spp = spp;		/* XXX param */
3638 		error = sadb_form_query(ksi, IPSA_Q_DST, IPSA_Q_SRC|IPSA_Q_DST|
3639 		    IPSA_Q_SA|IPSA_Q_INBOUND|IPSA_Q_OUTBOUND, &sq, diagnostic);
3640 		if (error)
3641 			return (error);
3642 
3643 		error = get_ipsa_pair(&sq, &ipsapp, diagnostic);
3644 
3645 		if (error != 0)
3646 			goto error;
3647 
3648 		if (ipsapp.ipsap_psa_ptr != NULL) {
3649 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_ALREADY;
3650 			error = EINVAL;
3651 		} else {
3652 			/* update_pairing() sets diagnostic */
3653 			error = update_pairing(&ipsapp, &sq, ksi, diagnostic);
3654 		}
3655 	}
3656 	/* Common error point for this routine. */
3657 error:
3658 	if (newbie != NULL) {
3659 		if (error != 0) {
3660 			/* This SA is broken, let the reaper clean up. */
3661 			mutex_enter(&newbie->ipsa_lock);
3662 			newbie->ipsa_state = IPSA_STATE_DEAD;
3663 			newbie->ipsa_hardexpiretime = 1;
3664 			mutex_exit(&newbie->ipsa_lock);
3665 		}
3666 		IPSA_REFRELE(newbie);
3667 	}
3668 	if (newbie_clone != NULL) {
3669 		IPSA_REFRELE(newbie_clone);
3670 	}
3671 
3672 	if (error == 0) {
3673 		/*
3674 		 * Construct favorable PF_KEY return message and send to
3675 		 * keysock. Update the flags in the original keysock message
3676 		 * to reflect the actual flags in the new SA.
3677 		 *  (Q:  Do I need to pass "newbie"?  If I do,
3678 		 * make sure to REFHOLD, call, then REFRELE.)
3679 		 */
3680 		assoc->sadb_sa_flags = newbie->ipsa_flags;
3681 		sadb_pfkey_echo(pfkey_q, mp, samsg, ksi, NULL);
3682 	}
3683 
3684 	destroy_ipsa_pair(&ipsapp);
3685 	return (error);
3686 }
3687 
3688 /*
3689  * Set the time of first use for a security association.  Update any
3690  * expiration times as a result.
3691  */
3692 void
3693 sadb_set_usetime(ipsa_t *assoc)
3694 {
3695 	time_t snapshot = gethrestime_sec();
3696 
3697 	mutex_enter(&assoc->ipsa_lock);
3698 	assoc->ipsa_lastuse = snapshot;
3699 	assoc->ipsa_idleexpiretime = snapshot + assoc->ipsa_idletime;
3700 
3701 	/*
3702 	 * Caller does check usetime before calling me usually, and
3703 	 * double-checking is better than a mutex_enter/exit hit.
3704 	 */
3705 	if (assoc->ipsa_usetime == 0) {
3706 		/*
3707 		 * This is redundant for outbound SA's, as
3708 		 * ipsec_getassocbyconn() sets the IPSA_F_USED flag already.
3709 		 * Inbound SAs, however, have no such protection.
3710 		 */
3711 		assoc->ipsa_flags |= IPSA_F_USED;
3712 		assoc->ipsa_usetime = snapshot;
3713 
3714 		/*
3715 		 * After setting the use time, see if we have a use lifetime
3716 		 * that would cause the actual SA expiration time to shorten.
3717 		 */
3718 		UPDATE_EXPIRE(assoc, softuselt, softexpiretime);
3719 		UPDATE_EXPIRE(assoc, harduselt, hardexpiretime);
3720 	}
3721 	mutex_exit(&assoc->ipsa_lock);
3722 }
3723 
3724 /*
3725  * Send up a PF_KEY expire message for this association.
3726  */
3727 static void
3728 sadb_expire_assoc(queue_t *pfkey_q, ipsa_t *assoc)
3729 {
3730 	mblk_t *mp, *mp1;
3731 	int alloclen, af;
3732 	sadb_msg_t *samsg;
3733 	sadb_lifetime_t *current, *expire;
3734 	sadb_sa_t *saext;
3735 	uint8_t *end;
3736 	boolean_t tunnel_mode;
3737 
3738 	ASSERT(MUTEX_HELD(&assoc->ipsa_lock));
3739 
3740 	/* Don't bother sending if there's no queue. */
3741 	if (pfkey_q == NULL)
3742 		return;
3743 
3744 	mp = sadb_keysock_out(0);
3745 	if (mp == NULL) {
3746 		/* cmn_err(CE_WARN, */
3747 		/*	"sadb_expire_assoc: Can't allocate KEYSOCK_OUT.\n"); */
3748 		return;
3749 	}
3750 
3751 	alloclen = sizeof (*samsg) + sizeof (*current) + sizeof (*expire) +
3752 	    2 * sizeof (sadb_address_t) + sizeof (*saext);
3753 
3754 	af = assoc->ipsa_addrfam;
3755 	switch (af) {
3756 	case AF_INET:
3757 		alloclen += 2 * sizeof (struct sockaddr_in);
3758 		break;
3759 	case AF_INET6:
3760 		alloclen += 2 * sizeof (struct sockaddr_in6);
3761 		break;
3762 	default:
3763 		/* Won't happen unless there's a kernel bug. */
3764 		freeb(mp);
3765 		cmn_err(CE_WARN,
3766 		    "sadb_expire_assoc: Unknown address length.\n");
3767 		return;
3768 	}
3769 
3770 	tunnel_mode = (assoc->ipsa_flags & IPSA_F_TUNNEL);
3771 	if (tunnel_mode) {
3772 		alloclen += 2 * sizeof (sadb_address_t);
3773 		switch (assoc->ipsa_innerfam) {
3774 		case AF_INET:
3775 			alloclen += 2 * sizeof (struct sockaddr_in);
3776 			break;
3777 		case AF_INET6:
3778 			alloclen += 2 * sizeof (struct sockaddr_in6);
3779 			break;
3780 		default:
3781 			/* Won't happen unless there's a kernel bug. */
3782 			freeb(mp);
3783 			cmn_err(CE_WARN, "sadb_expire_assoc: "
3784 			    "Unknown inner address length.\n");
3785 			return;
3786 		}
3787 	}
3788 
3789 	mp->b_cont = allocb(alloclen, BPRI_HI);
3790 	if (mp->b_cont == NULL) {
3791 		freeb(mp);
3792 		/* cmn_err(CE_WARN, */
3793 		/*	"sadb_expire_assoc: Can't allocate message.\n"); */
3794 		return;
3795 	}
3796 
3797 	mp1 = mp;
3798 	mp = mp->b_cont;
3799 	end = mp->b_wptr + alloclen;
3800 
3801 	samsg = (sadb_msg_t *)mp->b_wptr;
3802 	mp->b_wptr += sizeof (*samsg);
3803 	samsg->sadb_msg_version = PF_KEY_V2;
3804 	samsg->sadb_msg_type = SADB_EXPIRE;
3805 	samsg->sadb_msg_errno = 0;
3806 	samsg->sadb_msg_satype = assoc->ipsa_type;
3807 	samsg->sadb_msg_len = SADB_8TO64(alloclen);
3808 	samsg->sadb_msg_reserved = 0;
3809 	samsg->sadb_msg_seq = 0;
3810 	samsg->sadb_msg_pid = 0;
3811 
3812 	saext = (sadb_sa_t *)mp->b_wptr;
3813 	mp->b_wptr += sizeof (*saext);
3814 	saext->sadb_sa_len = SADB_8TO64(sizeof (*saext));
3815 	saext->sadb_sa_exttype = SADB_EXT_SA;
3816 	saext->sadb_sa_spi = assoc->ipsa_spi;
3817 	saext->sadb_sa_replay = assoc->ipsa_replay_wsize;
3818 	saext->sadb_sa_state = assoc->ipsa_state;
3819 	saext->sadb_sa_auth = assoc->ipsa_auth_alg;
3820 	saext->sadb_sa_encrypt = assoc->ipsa_encr_alg;
3821 	saext->sadb_sa_flags = assoc->ipsa_flags;
3822 
3823 	current = (sadb_lifetime_t *)mp->b_wptr;
3824 	mp->b_wptr += sizeof (sadb_lifetime_t);
3825 	current->sadb_lifetime_len = SADB_8TO64(sizeof (*current));
3826 	current->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
3827 	/* We do not support the concept. */
3828 	current->sadb_lifetime_allocations = 0;
3829 	current->sadb_lifetime_bytes = assoc->ipsa_bytes;
3830 	current->sadb_lifetime_addtime = assoc->ipsa_addtime;
3831 	current->sadb_lifetime_usetime = assoc->ipsa_usetime;
3832 
3833 	expire = (sadb_lifetime_t *)mp->b_wptr;
3834 	mp->b_wptr += sizeof (*expire);
3835 	expire->sadb_lifetime_len = SADB_8TO64(sizeof (*expire));
3836 
3837 	if (assoc->ipsa_state == IPSA_STATE_DEAD) {
3838 		expire->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
3839 		expire->sadb_lifetime_allocations = assoc->ipsa_hardalloc;
3840 		expire->sadb_lifetime_bytes = assoc->ipsa_hardbyteslt;
3841 		expire->sadb_lifetime_addtime = assoc->ipsa_hardaddlt;
3842 		expire->sadb_lifetime_usetime = assoc->ipsa_harduselt;
3843 	} else if (assoc->ipsa_state == IPSA_STATE_DYING) {
3844 		expire->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
3845 		expire->sadb_lifetime_allocations = assoc->ipsa_softalloc;
3846 		expire->sadb_lifetime_bytes = assoc->ipsa_softbyteslt;
3847 		expire->sadb_lifetime_addtime = assoc->ipsa_softaddlt;
3848 		expire->sadb_lifetime_usetime = assoc->ipsa_softuselt;
3849 	} else {
3850 		ASSERT(assoc->ipsa_state == IPSA_STATE_MATURE);
3851 		expire->sadb_lifetime_exttype = SADB_X_EXT_LIFETIME_IDLE;
3852 		expire->sadb_lifetime_allocations = 0;
3853 		expire->sadb_lifetime_bytes = 0;
3854 		expire->sadb_lifetime_addtime = assoc->ipsa_idleaddlt;
3855 		expire->sadb_lifetime_usetime = assoc->ipsa_idleuselt;
3856 	}
3857 
3858 	mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end, SADB_EXT_ADDRESS_SRC,
3859 	    af, assoc->ipsa_srcaddr, tunnel_mode ? 0 : SA_SRCPORT(assoc),
3860 	    SA_PROTO(assoc), 0);
3861 	ASSERT(mp->b_wptr != NULL);
3862 
3863 	mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end, SADB_EXT_ADDRESS_DST,
3864 	    af, assoc->ipsa_dstaddr, tunnel_mode ? 0 : SA_DSTPORT(assoc),
3865 	    SA_PROTO(assoc), 0);
3866 	ASSERT(mp->b_wptr != NULL);
3867 
3868 	if (tunnel_mode) {
3869 		mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end,
3870 		    SADB_X_EXT_ADDRESS_INNER_SRC, assoc->ipsa_innerfam,
3871 		    assoc->ipsa_innersrc, SA_SRCPORT(assoc), SA_IPROTO(assoc),
3872 		    assoc->ipsa_innersrcpfx);
3873 		ASSERT(mp->b_wptr != NULL);
3874 		mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end,
3875 		    SADB_X_EXT_ADDRESS_INNER_DST, assoc->ipsa_innerfam,
3876 		    assoc->ipsa_innerdst, SA_DSTPORT(assoc), SA_IPROTO(assoc),
3877 		    assoc->ipsa_innerdstpfx);
3878 		ASSERT(mp->b_wptr != NULL);
3879 	}
3880 
3881 	/* Can just putnext, we're ready to go! */
3882 	putnext(pfkey_q, mp1);
3883 }
3884 
3885 /*
3886  * "Age" the SA with the number of bytes that was used to protect traffic.
3887  * Send an SADB_EXPIRE message if appropriate.	Return B_TRUE if there was
3888  * enough "charge" left in the SA to protect the data.	Return B_FALSE
3889  * otherwise.  (If B_FALSE is returned, the association either was, or became
3890  * DEAD.)
3891  */
3892 boolean_t
3893 sadb_age_bytes(queue_t *pfkey_q, ipsa_t *assoc, uint64_t bytes,
3894     boolean_t sendmsg)
3895 {
3896 	boolean_t rc = B_TRUE;
3897 	uint64_t newtotal;
3898 
3899 	mutex_enter(&assoc->ipsa_lock);
3900 	newtotal = assoc->ipsa_bytes + bytes;
3901 	if (assoc->ipsa_hardbyteslt != 0 &&
3902 	    newtotal >= assoc->ipsa_hardbyteslt) {
3903 		if (assoc->ipsa_state != IPSA_STATE_DEAD) {
3904 			sadb_delete_cluster(assoc);
3905 			/*
3906 			 * Send EXPIRE message to PF_KEY.  May wish to pawn
3907 			 * this off on another non-interrupt thread.  Also
3908 			 * unlink this SA immediately.
3909 			 */
3910 			assoc->ipsa_state = IPSA_STATE_DEAD;
3911 			if (sendmsg)
3912 				sadb_expire_assoc(pfkey_q, assoc);
3913 			/*
3914 			 * Set non-zero expiration time so sadb_age_assoc()
3915 			 * will work when reaping.
3916 			 */
3917 			assoc->ipsa_hardexpiretime = (time_t)1;
3918 		} /* Else someone beat me to it! */
3919 		rc = B_FALSE;
3920 	} else if (assoc->ipsa_softbyteslt != 0 &&
3921 	    (newtotal >= assoc->ipsa_softbyteslt)) {
3922 		if (assoc->ipsa_state < IPSA_STATE_DYING) {
3923 			/*
3924 			 * Send EXPIRE message to PF_KEY.  May wish to pawn
3925 			 * this off on another non-interrupt thread.
3926 			 */
3927 			assoc->ipsa_state = IPSA_STATE_DYING;
3928 			assoc->ipsa_bytes = newtotal;
3929 			if (sendmsg)
3930 				sadb_expire_assoc(pfkey_q, assoc);
3931 		} /* Else someone beat me to it! */
3932 	}
3933 	if (rc == B_TRUE)
3934 		assoc->ipsa_bytes = newtotal;
3935 	mutex_exit(&assoc->ipsa_lock);
3936 	return (rc);
3937 }
3938 
3939 /*
3940  * "Torch" an individual SA.  Returns NULL, so it can be tail-called from
3941  *     sadb_age_assoc().
3942  */
3943 static ipsa_t *
3944 sadb_torch_assoc(isaf_t *head, ipsa_t *sa)
3945 {
3946 	ASSERT(MUTEX_HELD(&head->isaf_lock));
3947 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
3948 	ASSERT(sa->ipsa_state == IPSA_STATE_DEAD);
3949 
3950 	/*
3951 	 * Force cached SAs to be revalidated..
3952 	 */
3953 	head->isaf_gen++;
3954 
3955 	mutex_exit(&sa->ipsa_lock);
3956 	sadb_unlinkassoc(sa);
3957 
3958 	return (NULL);
3959 }
3960 
3961 /*
3962  * Do various SA-is-idle activities depending on delta (the number of idle
3963  * seconds on the SA) and/or other properties of the SA.
3964  *
3965  * Return B_TRUE if I've sent a packet, because I have to drop the
3966  * association's mutex before sending a packet out the wire.
3967  */
3968 /* ARGSUSED */
3969 static boolean_t
3970 sadb_idle_activities(ipsa_t *assoc, time_t delta, boolean_t inbound)
3971 {
3972 	ipsecesp_stack_t *espstack = assoc->ipsa_netstack->netstack_ipsecesp;
3973 	int nat_t_interval = espstack->ipsecesp_nat_keepalive_interval;
3974 
3975 	ASSERT(MUTEX_HELD(&assoc->ipsa_lock));
3976 
3977 	if (!inbound && (assoc->ipsa_flags & IPSA_F_NATT_LOC) &&
3978 	    delta >= nat_t_interval &&
3979 	    gethrestime_sec() - assoc->ipsa_last_nat_t_ka >= nat_t_interval) {
3980 		ASSERT(assoc->ipsa_type == SADB_SATYPE_ESP);
3981 		assoc->ipsa_last_nat_t_ka = gethrestime_sec();
3982 		mutex_exit(&assoc->ipsa_lock);
3983 		ipsecesp_send_keepalive(assoc);
3984 		return (B_TRUE);
3985 	}
3986 	return (B_FALSE);
3987 }
3988 
3989 /*
3990  * Return "assoc" if haspeer is true and I send an expire.  This allows
3991  * the consumers' aging functions to tidy up an expired SA's peer.
3992  */
3993 static ipsa_t *
3994 sadb_age_assoc(isaf_t *head, queue_t *pfkey_q, ipsa_t *assoc,
3995     time_t current, int reap_delay, boolean_t inbound)
3996 {
3997 	ipsa_t *retval = NULL;
3998 	boolean_t dropped_mutex = B_FALSE;
3999 
4000 	ASSERT(MUTEX_HELD(&head->isaf_lock));
4001 
4002 	mutex_enter(&assoc->ipsa_lock);
4003 
4004 	if (((assoc->ipsa_state == IPSA_STATE_LARVAL) ||
4005 	    ((assoc->ipsa_state == IPSA_STATE_IDLE) ||
4006 	    (assoc->ipsa_state == IPSA_STATE_ACTIVE_ELSEWHERE) &&
4007 	    (assoc->ipsa_hardexpiretime != 0))) &&
4008 	    (assoc->ipsa_hardexpiretime <= current)) {
4009 		assoc->ipsa_state = IPSA_STATE_DEAD;
4010 		return (sadb_torch_assoc(head, assoc));
4011 	}
4012 
4013 	/*
4014 	 * Check lifetimes.  Fortunately, SA setup is done
4015 	 * such that there are only two times to look at,
4016 	 * softexpiretime, and hardexpiretime.
4017 	 *
4018 	 * Check hard first.
4019 	 */
4020 
4021 	if (assoc->ipsa_hardexpiretime != 0 &&
4022 	    assoc->ipsa_hardexpiretime <= current) {
4023 		if (assoc->ipsa_state == IPSA_STATE_DEAD)
4024 			return (sadb_torch_assoc(head, assoc));
4025 
4026 		if (inbound) {
4027 			sadb_delete_cluster(assoc);
4028 		}
4029 
4030 		/*
4031 		 * Send SADB_EXPIRE with hard lifetime, delay for unlinking.
4032 		 */
4033 		assoc->ipsa_state = IPSA_STATE_DEAD;
4034 		if (assoc->ipsa_haspeer || assoc->ipsa_otherspi != 0) {
4035 			/*
4036 			 * If the SA is paired or peered with another, put
4037 			 * a copy on a list which can be processed later, the
4038 			 * pair/peer SA needs to be updated so the both die
4039 			 * at the same time.
4040 			 *
4041 			 * If I return assoc, I have to bump up its reference
4042 			 * count to keep with the ipsa_t reference count
4043 			 * semantics.
4044 			 */
4045 			IPSA_REFHOLD(assoc);
4046 			retval = assoc;
4047 		}
4048 		sadb_expire_assoc(pfkey_q, assoc);
4049 		assoc->ipsa_hardexpiretime = current + reap_delay;
4050 	} else if (assoc->ipsa_softexpiretime != 0 &&
4051 	    assoc->ipsa_softexpiretime <= current &&
4052 	    assoc->ipsa_state < IPSA_STATE_DYING) {
4053 		/*
4054 		 * Send EXPIRE message to PF_KEY.  May wish to pawn
4055 		 * this off on another non-interrupt thread.
4056 		 */
4057 		assoc->ipsa_state = IPSA_STATE_DYING;
4058 		if (assoc->ipsa_haspeer) {
4059 			/*
4060 			 * If the SA has a peer, update the peer's state
4061 			 * on SOFT_EXPIRE, this is mostly to prevent two
4062 			 * expire messages from effectively the same SA.
4063 			 *
4064 			 * Don't care about paired SA's, then can (and should)
4065 			 * be able to soft expire at different times.
4066 			 *
4067 			 * If I return assoc, I have to bump up its
4068 			 * reference count to keep with the ipsa_t reference
4069 			 * count semantics.
4070 			 */
4071 			IPSA_REFHOLD(assoc);
4072 			retval = assoc;
4073 		}
4074 		sadb_expire_assoc(pfkey_q, assoc);
4075 	} else if (assoc->ipsa_idletime != 0 &&
4076 	    assoc->ipsa_idleexpiretime <= current) {
4077 		if (assoc->ipsa_state == IPSA_STATE_ACTIVE_ELSEWHERE) {
4078 			assoc->ipsa_state = IPSA_STATE_IDLE;
4079 		}
4080 
4081 		/*
4082 		 * Need to handle Mature case
4083 		 */
4084 		if (assoc->ipsa_state == IPSA_STATE_MATURE) {
4085 			sadb_expire_assoc(pfkey_q, assoc);
4086 		}
4087 	} else {
4088 		/* Check idle time activities. */
4089 		dropped_mutex = sadb_idle_activities(assoc,
4090 		    current - assoc->ipsa_lastuse, inbound);
4091 	}
4092 
4093 	if (!dropped_mutex)
4094 		mutex_exit(&assoc->ipsa_lock);
4095 	return (retval);
4096 }
4097 
4098 /*
4099  * Called by a consumer protocol to do ther dirty work of reaping dead
4100  * Security Associations.
4101  *
4102  * NOTE: sadb_age_assoc() marks expired SA's as DEAD but only removed
4103  * SA's that are already marked DEAD, so expired SA's are only reaped
4104  * the second time sadb_ager() runs.
4105  */
4106 void
4107 sadb_ager(sadb_t *sp, queue_t *pfkey_q, int reap_delay, netstack_t *ns)
4108 {
4109 	int i;
4110 	isaf_t *bucket;
4111 	ipsa_t *assoc, *spare;
4112 	iacqf_t *acqlist;
4113 	ipsacq_t *acqrec, *spareacq;
4114 	templist_t *haspeerlist, *newbie;
4115 	/* Snapshot current time now. */
4116 	time_t current = gethrestime_sec();
4117 	haspeerlist = NULL;
4118 
4119 	/*
4120 	 * Do my dirty work.  This includes aging real entries, aging
4121 	 * larvals, and aging outstanding ACQUIREs.
4122 	 *
4123 	 * I hope I don't tie up resources for too long.
4124 	 */
4125 
4126 	/* Age acquires. */
4127 
4128 	for (i = 0; i < sp->sdb_hashsize; i++) {
4129 		acqlist = &sp->sdb_acq[i];
4130 		mutex_enter(&acqlist->iacqf_lock);
4131 		for (acqrec = acqlist->iacqf_ipsacq; acqrec != NULL;
4132 		    acqrec = spareacq) {
4133 			spareacq = acqrec->ipsacq_next;
4134 			if (current > acqrec->ipsacq_expire)
4135 				sadb_destroy_acquire(acqrec, ns);
4136 		}
4137 		mutex_exit(&acqlist->iacqf_lock);
4138 	}
4139 
4140 	/* Age inbound associations. */
4141 	for (i = 0; i < sp->sdb_hashsize; i++) {
4142 		bucket = &(sp->sdb_if[i]);
4143 		mutex_enter(&bucket->isaf_lock);
4144 		for (assoc = bucket->isaf_ipsa; assoc != NULL;
4145 		    assoc = spare) {
4146 			spare = assoc->ipsa_next;
4147 			if (sadb_age_assoc(bucket, pfkey_q, assoc, current,
4148 			    reap_delay, B_TRUE) != NULL) {
4149 				/*
4150 				 * Put SA's which have a peer or SA's which
4151 				 * are paired on a list for processing after
4152 				 * all the hash tables have been walked.
4153 				 *
4154 				 * sadb_age_assoc() increments the refcnt,
4155 				 * effectively doing an IPSA_REFHOLD().
4156 				 */
4157 				newbie = kmem_alloc(sizeof (*newbie),
4158 				    KM_NOSLEEP);
4159 				if (newbie == NULL) {
4160 					/*
4161 					 * Don't forget to REFRELE().
4162 					 */
4163 					IPSA_REFRELE(assoc);
4164 					continue;	/* for loop... */
4165 				}
4166 				newbie->next = haspeerlist;
4167 				newbie->ipsa = assoc;
4168 				haspeerlist = newbie;
4169 			}
4170 		}
4171 		mutex_exit(&bucket->isaf_lock);
4172 	}
4173 
4174 	age_pair_peer_list(haspeerlist, sp, B_FALSE);
4175 	haspeerlist = NULL;
4176 
4177 	/* Age outbound associations. */
4178 	for (i = 0; i < sp->sdb_hashsize; i++) {
4179 		bucket = &(sp->sdb_of[i]);
4180 		mutex_enter(&bucket->isaf_lock);
4181 		for (assoc = bucket->isaf_ipsa; assoc != NULL;
4182 		    assoc = spare) {
4183 			spare = assoc->ipsa_next;
4184 			if (sadb_age_assoc(bucket, pfkey_q, assoc, current,
4185 			    reap_delay, B_FALSE) != NULL) {
4186 				/*
4187 				 * sadb_age_assoc() increments the refcnt,
4188 				 * effectively doing an IPSA_REFHOLD().
4189 				 */
4190 				newbie = kmem_alloc(sizeof (*newbie),
4191 				    KM_NOSLEEP);
4192 				if (newbie == NULL) {
4193 					/*
4194 					 * Don't forget to REFRELE().
4195 					 */
4196 					IPSA_REFRELE(assoc);
4197 					continue;	/* for loop... */
4198 				}
4199 				newbie->next = haspeerlist;
4200 				newbie->ipsa = assoc;
4201 				haspeerlist = newbie;
4202 			}
4203 		}
4204 		mutex_exit(&bucket->isaf_lock);
4205 	}
4206 
4207 	age_pair_peer_list(haspeerlist, sp, B_TRUE);
4208 
4209 	/*
4210 	 * Run a GC pass to clean out dead identities.
4211 	 */
4212 	ipsid_gc(ns);
4213 }
4214 
4215 /*
4216  * Figure out when to reschedule the ager.
4217  */
4218 timeout_id_t
4219 sadb_retimeout(hrtime_t begin, queue_t *pfkey_q, void (*ager)(void *),
4220     void *agerarg, uint_t *intp, uint_t intmax, short mid)
4221 {
4222 	hrtime_t end = gethrtime();
4223 	uint_t interval = *intp;	/* "interval" is in ms. */
4224 
4225 	/*
4226 	 * See how long this took.  If it took too long, increase the
4227 	 * aging interval.
4228 	 */
4229 	if ((end - begin) > MSEC2NSEC(interval)) {
4230 		if (interval >= intmax) {
4231 			/* XXX Rate limit this?  Or recommend flush? */
4232 			(void) strlog(mid, 0, 0, SL_ERROR | SL_WARN,
4233 			    "Too many SA's to age out in %d msec.\n",
4234 			    intmax);
4235 		} else {
4236 			/* Double by shifting by one bit. */
4237 			interval <<= 1;
4238 			interval = min(interval, intmax);
4239 		}
4240 	} else if ((end - begin) <= (MSEC2NSEC(interval) / 2) &&
4241 	    interval > SADB_AGE_INTERVAL_DEFAULT) {
4242 		/*
4243 		 * If I took less than half of the interval, then I should
4244 		 * ratchet the interval back down.  Never automatically
4245 		 * shift below the default aging interval.
4246 		 *
4247 		 * NOTE:This even overrides manual setting of the age
4248 		 *	interval using NDD to lower the setting past the
4249 		 *	default.  In other words, if you set the interval
4250 		 *	lower than the default, and your SADB gets too big,
4251 		 *	the interval will only self-lower back to the default.
4252 		 */
4253 		/* Halve by shifting one bit. */
4254 		interval >>= 1;
4255 		interval = max(interval, SADB_AGE_INTERVAL_DEFAULT);
4256 	}
4257 	*intp = interval;
4258 	return (qtimeout(pfkey_q, ager, agerarg,
4259 	    drv_usectohz(interval * (MICROSEC / MILLISEC))));
4260 }
4261 
4262 
4263 /*
4264  * Update the lifetime values of an SA.	 This is the path an SADB_UPDATE
4265  * message takes when updating a MATURE or DYING SA.
4266  */
4267 static void
4268 sadb_update_lifetimes(ipsa_t *assoc, sadb_lifetime_t *hard,
4269     sadb_lifetime_t *soft, sadb_lifetime_t *idle, boolean_t outbound)
4270 {
4271 	mutex_enter(&assoc->ipsa_lock);
4272 
4273 	/*
4274 	 * XXX RFC 2367 mentions how an SADB_EXT_LIFETIME_CURRENT can be
4275 	 * passed in during an update message.	We currently don't handle
4276 	 * these.
4277 	 */
4278 
4279 	if (hard != NULL) {
4280 		if (hard->sadb_lifetime_bytes != 0)
4281 			assoc->ipsa_hardbyteslt = hard->sadb_lifetime_bytes;
4282 		if (hard->sadb_lifetime_usetime != 0)
4283 			assoc->ipsa_harduselt = hard->sadb_lifetime_usetime;
4284 		if (hard->sadb_lifetime_addtime != 0)
4285 			assoc->ipsa_hardaddlt = hard->sadb_lifetime_addtime;
4286 		if (assoc->ipsa_hardaddlt != 0) {
4287 			assoc->ipsa_hardexpiretime =
4288 			    assoc->ipsa_addtime + assoc->ipsa_hardaddlt;
4289 		}
4290 		if (assoc->ipsa_harduselt != 0 &&
4291 		    assoc->ipsa_flags & IPSA_F_USED) {
4292 			UPDATE_EXPIRE(assoc, harduselt, hardexpiretime);
4293 		}
4294 		if (hard->sadb_lifetime_allocations != 0)
4295 			assoc->ipsa_hardalloc = hard->sadb_lifetime_allocations;
4296 	}
4297 
4298 	if (soft != NULL) {
4299 		if (soft->sadb_lifetime_bytes != 0) {
4300 			if (soft->sadb_lifetime_bytes >
4301 			    assoc->ipsa_hardbyteslt) {
4302 				assoc->ipsa_softbyteslt =
4303 				    assoc->ipsa_hardbyteslt;
4304 			} else {
4305 				assoc->ipsa_softbyteslt =
4306 				    soft->sadb_lifetime_bytes;
4307 			}
4308 		}
4309 		if (soft->sadb_lifetime_usetime != 0) {
4310 			if (soft->sadb_lifetime_usetime >
4311 			    assoc->ipsa_harduselt) {
4312 				assoc->ipsa_softuselt =
4313 				    assoc->ipsa_harduselt;
4314 			} else {
4315 				assoc->ipsa_softuselt =
4316 				    soft->sadb_lifetime_usetime;
4317 			}
4318 		}
4319 		if (soft->sadb_lifetime_addtime != 0) {
4320 			if (soft->sadb_lifetime_addtime >
4321 			    assoc->ipsa_hardexpiretime) {
4322 				assoc->ipsa_softexpiretime =
4323 				    assoc->ipsa_hardexpiretime;
4324 			} else {
4325 				assoc->ipsa_softaddlt =
4326 				    soft->sadb_lifetime_addtime;
4327 			}
4328 		}
4329 		if (assoc->ipsa_softaddlt != 0) {
4330 			assoc->ipsa_softexpiretime =
4331 			    assoc->ipsa_addtime + assoc->ipsa_softaddlt;
4332 		}
4333 		if (assoc->ipsa_softuselt != 0 &&
4334 		    assoc->ipsa_flags & IPSA_F_USED) {
4335 			UPDATE_EXPIRE(assoc, softuselt, softexpiretime);
4336 		}
4337 		if (outbound && assoc->ipsa_softexpiretime != 0) {
4338 			if (assoc->ipsa_state == IPSA_STATE_MATURE)
4339 				lifetime_fuzz(assoc);
4340 		}
4341 
4342 		if (soft->sadb_lifetime_allocations != 0)
4343 			assoc->ipsa_softalloc = soft->sadb_lifetime_allocations;
4344 	}
4345 
4346 	if (idle != NULL) {
4347 		time_t current = gethrestime_sec();
4348 		if ((assoc->ipsa_idleexpiretime <= current) &&
4349 		    (assoc->ipsa_idleaddlt == idle->sadb_lifetime_addtime)) {
4350 			assoc->ipsa_idleexpiretime =
4351 			    current + assoc->ipsa_idleaddlt;
4352 		}
4353 		if (idle->sadb_lifetime_addtime != 0)
4354 			assoc->ipsa_idleaddlt = idle->sadb_lifetime_addtime;
4355 		if (idle->sadb_lifetime_usetime != 0)
4356 			assoc->ipsa_idleuselt = idle->sadb_lifetime_usetime;
4357 		if (assoc->ipsa_idleaddlt != 0) {
4358 			assoc->ipsa_idleexpiretime =
4359 			    current + idle->sadb_lifetime_addtime;
4360 			assoc->ipsa_idletime = idle->sadb_lifetime_addtime;
4361 		}
4362 		if (assoc->ipsa_idleuselt != 0) {
4363 			if (assoc->ipsa_idletime != 0) {
4364 				assoc->ipsa_idletime = min(assoc->ipsa_idletime,
4365 				    assoc->ipsa_idleuselt);
4366 			assoc->ipsa_idleexpiretime =
4367 			    current + assoc->ipsa_idletime;
4368 			} else {
4369 				assoc->ipsa_idleexpiretime =
4370 				    current + assoc->ipsa_idleuselt;
4371 				assoc->ipsa_idletime = assoc->ipsa_idleuselt;
4372 			}
4373 		}
4374 	}
4375 	mutex_exit(&assoc->ipsa_lock);
4376 }
4377 
4378 static int
4379 sadb_update_state(ipsa_t *assoc, uint_t new_state, mblk_t **ipkt_lst)
4380 {
4381 	int rcode = 0;
4382 	time_t current = gethrestime_sec();
4383 
4384 	mutex_enter(&assoc->ipsa_lock);
4385 
4386 	switch (new_state) {
4387 	case SADB_X_SASTATE_ACTIVE_ELSEWHERE:
4388 		if (assoc->ipsa_state == SADB_X_SASTATE_IDLE) {
4389 			assoc->ipsa_state = IPSA_STATE_ACTIVE_ELSEWHERE;
4390 			assoc->ipsa_idleexpiretime =
4391 			    current + assoc->ipsa_idletime;
4392 		}
4393 		break;
4394 	case SADB_X_SASTATE_IDLE:
4395 		if (assoc->ipsa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE) {
4396 			assoc->ipsa_state = IPSA_STATE_IDLE;
4397 			assoc->ipsa_idleexpiretime =
4398 			    current + assoc->ipsa_idletime;
4399 		} else {
4400 			rcode = EINVAL;
4401 		}
4402 		break;
4403 
4404 	case SADB_X_SASTATE_ACTIVE:
4405 		if (assoc->ipsa_state != SADB_X_SASTATE_IDLE) {
4406 			rcode = EINVAL;
4407 			break;
4408 		}
4409 		assoc->ipsa_state = IPSA_STATE_MATURE;
4410 		assoc->ipsa_idleexpiretime = current + assoc->ipsa_idletime;
4411 
4412 		if (ipkt_lst == NULL) {
4413 			break;
4414 		}
4415 
4416 		if (assoc->ipsa_bpkt_head != NULL) {
4417 			*ipkt_lst = assoc->ipsa_bpkt_head;
4418 			assoc->ipsa_bpkt_head = assoc->ipsa_bpkt_tail = NULL;
4419 			assoc->ipsa_mblkcnt = 0;
4420 		} else {
4421 			*ipkt_lst = NULL;
4422 		}
4423 		break;
4424 	default:
4425 		rcode = EINVAL;
4426 		break;
4427 	}
4428 
4429 	mutex_exit(&assoc->ipsa_lock);
4430 	return (rcode);
4431 }
4432 
4433 /*
4434  * Check a proposed KMC update for sanity.
4435  */
4436 static int
4437 sadb_check_kmc(ipsa_query_t *sq, ipsa_t *sa, int *diagnostic)
4438 {
4439 	uint32_t kmp = sq->kmp;
4440 	uint32_t kmc = sq->kmc;
4441 
4442 	if (sa == NULL)
4443 		return (0);
4444 
4445 	if (sa->ipsa_state == IPSA_STATE_DEAD)
4446 		return (ESRCH);	/* DEAD == Not there, in this case. */
4447 
4448 	if ((kmp != 0) && ((sa->ipsa_kmp != 0) || (sa->ipsa_kmp != kmp))) {
4449 		*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMP;
4450 		return (EINVAL);
4451 	}
4452 
4453 	if ((kmc != 0) && ((sa->ipsa_kmc != 0) || (sa->ipsa_kmc != kmc))) {
4454 		*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMC;
4455 		return (EINVAL);
4456 	}
4457 
4458 	return (0);
4459 }
4460 
4461 /*
4462  * Actually update the KMC info.
4463  */
4464 static void
4465 sadb_update_kmc(ipsa_query_t *sq, ipsa_t *sa)
4466 {
4467 	uint32_t kmp = sq->kmp;
4468 	uint32_t kmc = sq->kmc;
4469 
4470 	if (kmp != 0)
4471 		sa->ipsa_kmp = kmp;
4472 	if (kmc != 0)
4473 		sa->ipsa_kmc = kmc;
4474 }
4475 
4476 /*
4477  * Common code to update an SA.
4478  */
4479 
4480 int
4481 sadb_update_sa(mblk_t *mp, keysock_in_t *ksi, mblk_t **ipkt_lst,
4482     sadbp_t *spp, int *diagnostic, queue_t *pfkey_q,
4483     int (*add_sa_func)(mblk_t *, keysock_in_t *, int *, netstack_t *),
4484     netstack_t *ns, uint8_t sadb_msg_type)
4485 {
4486 	sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
4487 	sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
4488 	sadb_x_replay_ctr_t *replext =
4489 	    (sadb_x_replay_ctr_t *)ksi->ks_in_extv[SADB_X_EXT_REPLAY_VALUE];
4490 	sadb_lifetime_t *soft =
4491 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
4492 	sadb_lifetime_t *hard =
4493 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
4494 	sadb_lifetime_t *idle =
4495 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE];
4496 	sadb_x_pair_t *pair_ext =
4497 	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
4498 	ipsa_t *echo_target = NULL;
4499 	ipsap_t ipsapp;
4500 	ipsa_query_t sq;
4501 	time_t current = gethrestime_sec();
4502 
4503 	sq.spp = spp;		/* XXX param */
4504 	int error = sadb_form_query(ksi, IPSA_Q_SRC|IPSA_Q_DST|IPSA_Q_SA,
4505 	    IPSA_Q_SRC|IPSA_Q_DST|IPSA_Q_SA|IPSA_Q_INBOUND|IPSA_Q_OUTBOUND,
4506 	    &sq, diagnostic);
4507 
4508 	if (error != 0)
4509 		return (error);
4510 
4511 	error = get_ipsa_pair(&sq, &ipsapp, diagnostic);
4512 	if (error != 0)
4513 		return (error);
4514 
4515 	if (ipsapp.ipsap_psa_ptr == NULL && ipsapp.ipsap_sa_ptr != NULL) {
4516 		if (ipsapp.ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) {
4517 			/*
4518 			 * REFRELE the target and let the add_sa_func()
4519 			 * deal with updating a larval SA.
4520 			 */
4521 			destroy_ipsa_pair(&ipsapp);
4522 			return (add_sa_func(mp, ksi, diagnostic, ns));
4523 		}
4524 	}
4525 
4526 	/*
4527 	 * At this point we have an UPDATE to a MATURE SA. There should
4528 	 * not be any keying material present.
4529 	 */
4530 	if (akey != NULL) {
4531 		*diagnostic = SADB_X_DIAGNOSTIC_AKEY_PRESENT;
4532 		error = EINVAL;
4533 		goto bail;
4534 	}
4535 	if (ekey != NULL) {
4536 		*diagnostic = SADB_X_DIAGNOSTIC_EKEY_PRESENT;
4537 		error = EINVAL;
4538 		goto bail;
4539 	}
4540 
4541 	if (sq.assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE) {
4542 		if (ipsapp.ipsap_sa_ptr != NULL &&
4543 		    ipsapp.ipsap_sa_ptr->ipsa_state == IPSA_STATE_IDLE) {
4544 			if ((error = sadb_update_state(ipsapp.ipsap_sa_ptr,
4545 			    sq.assoc->sadb_sa_state, NULL)) != 0) {
4546 				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4547 				goto bail;
4548 			}
4549 		}
4550 		if (ipsapp.ipsap_psa_ptr != NULL &&
4551 		    ipsapp.ipsap_psa_ptr->ipsa_state == IPSA_STATE_IDLE) {
4552 			if ((error = sadb_update_state(ipsapp.ipsap_psa_ptr,
4553 			    sq.assoc->sadb_sa_state, NULL)) != 0) {
4554 				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4555 				goto bail;
4556 			}
4557 		}
4558 	}
4559 	if (sq.assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE) {
4560 		if (ipsapp.ipsap_sa_ptr != NULL) {
4561 			error = sadb_update_state(ipsapp.ipsap_sa_ptr,
4562 			    sq.assoc->sadb_sa_state,
4563 			    (ipsapp.ipsap_sa_ptr->ipsa_flags &
4564 			    IPSA_F_INBOUND) ? ipkt_lst : NULL);
4565 			if (error) {
4566 				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4567 				goto bail;
4568 			}
4569 		}
4570 		if (ipsapp.ipsap_psa_ptr != NULL) {
4571 			error = sadb_update_state(ipsapp.ipsap_psa_ptr,
4572 			    sq.assoc->sadb_sa_state,
4573 			    (ipsapp.ipsap_psa_ptr->ipsa_flags &
4574 			    IPSA_F_INBOUND) ? ipkt_lst : NULL);
4575 			if (error) {
4576 				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4577 				goto bail;
4578 			}
4579 		}
4580 		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
4581 		    ksi, echo_target);
4582 		goto bail;
4583 	}
4584 
4585 	/*
4586 	 * Reality checks for updates of active associations.
4587 	 * Sundry first-pass UPDATE-specific reality checks.
4588 	 * Have to do the checks here, because it's after the add_sa code.
4589 	 * XXX STATS : logging/stats here?
4590 	 */
4591 
4592 	if (!((sq.assoc->sadb_sa_state == SADB_SASTATE_MATURE) ||
4593 	    (sq.assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE))) {
4594 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4595 		error = EINVAL;
4596 		goto bail;
4597 	}
4598 	if (sq.assoc->sadb_sa_flags & ~spp->s_updateflags) {
4599 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
4600 		error = EINVAL;
4601 		goto bail;
4602 	}
4603 	if (ksi->ks_in_extv[SADB_EXT_LIFETIME_CURRENT] != NULL) {
4604 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_LIFETIME;
4605 		error = EOPNOTSUPP;
4606 		goto bail;
4607 	}
4608 
4609 	if ((*diagnostic = sadb_hardsoftchk(hard, soft, idle)) != 0) {
4610 		error = EINVAL;
4611 		goto bail;
4612 	}
4613 
4614 	if ((*diagnostic = sadb_labelchk(ksi)) != 0)
4615 		return (EINVAL);
4616 
4617 	error = sadb_check_kmc(&sq, ipsapp.ipsap_sa_ptr, diagnostic);
4618 	if (error != 0)
4619 		goto bail;
4620 
4621 	error = sadb_check_kmc(&sq, ipsapp.ipsap_psa_ptr, diagnostic);
4622 	if (error != 0)
4623 		goto bail;
4624 
4625 
4626 	if (ipsapp.ipsap_sa_ptr != NULL) {
4627 		/*
4628 		 * Do not allow replay value change for MATURE or LARVAL SA.
4629 		 */
4630 
4631 		if ((replext != NULL) &&
4632 		    ((ipsapp.ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) ||
4633 		    (ipsapp.ipsap_sa_ptr->ipsa_state == IPSA_STATE_MATURE))) {
4634 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4635 			error = EINVAL;
4636 			goto bail;
4637 		}
4638 	}
4639 
4640 
4641 	if (ipsapp.ipsap_sa_ptr != NULL) {
4642 		sadb_update_lifetimes(ipsapp.ipsap_sa_ptr, hard, soft,
4643 		    idle, B_TRUE);
4644 		sadb_update_kmc(&sq, ipsapp.ipsap_sa_ptr);
4645 		if ((replext != NULL) &&
4646 		    (ipsapp.ipsap_sa_ptr->ipsa_replay_wsize != 0)) {
4647 			/*
4648 			 * If an inbound SA, update the replay counter
4649 			 * and check off all the other sequence number
4650 			 */
4651 			if (ksi->ks_in_dsttype == KS_IN_ADDR_ME) {
4652 				if (!sadb_replay_check(ipsapp.ipsap_sa_ptr,
4653 				    replext->sadb_x_rc_replay32)) {
4654 					*diagnostic =
4655 					    SADB_X_DIAGNOSTIC_INVALID_REPLAY;
4656 					error = EINVAL;
4657 					goto bail;
4658 				}
4659 				mutex_enter(&ipsapp.ipsap_sa_ptr->ipsa_lock);
4660 				ipsapp.ipsap_sa_ptr->ipsa_idleexpiretime =
4661 				    current +
4662 				    ipsapp.ipsap_sa_ptr->ipsa_idletime;
4663 				mutex_exit(&ipsapp.ipsap_sa_ptr->ipsa_lock);
4664 			} else {
4665 				mutex_enter(&ipsapp.ipsap_sa_ptr->ipsa_lock);
4666 				ipsapp.ipsap_sa_ptr->ipsa_replay =
4667 				    replext->sadb_x_rc_replay32;
4668 				ipsapp.ipsap_sa_ptr->ipsa_idleexpiretime =
4669 				    current +
4670 				    ipsapp.ipsap_sa_ptr->ipsa_idletime;
4671 				mutex_exit(&ipsapp.ipsap_sa_ptr->ipsa_lock);
4672 			}
4673 		}
4674 	}
4675 
4676 	if (sadb_msg_type == SADB_X_UPDATEPAIR) {
4677 		if (ipsapp.ipsap_psa_ptr != NULL) {
4678 			sadb_update_lifetimes(ipsapp.ipsap_psa_ptr, hard, soft,
4679 			    idle, B_FALSE);
4680 			sadb_update_kmc(&sq, ipsapp.ipsap_psa_ptr);
4681 		} else {
4682 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
4683 			error = ESRCH;
4684 			goto bail;
4685 		}
4686 	}
4687 
4688 	if (pair_ext != NULL)
4689 		error = update_pairing(&ipsapp, &sq, ksi, diagnostic);
4690 
4691 	if (error == 0)
4692 		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
4693 		    ksi, echo_target);
4694 bail:
4695 
4696 	destroy_ipsa_pair(&ipsapp);
4697 
4698 	return (error);
4699 }
4700 
4701 
4702 static int
4703 update_pairing(ipsap_t *ipsapp, ipsa_query_t *sq, keysock_in_t *ksi,
4704     int *diagnostic)
4705 {
4706 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
4707 	sadb_x_pair_t *pair_ext =
4708 	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
4709 	int error = 0;
4710 	ipsap_t oipsapp;
4711 	boolean_t undo_pair = B_FALSE;
4712 	uint32_t ipsa_flags;
4713 
4714 	if (pair_ext->sadb_x_pair_spi == 0 || pair_ext->sadb_x_pair_spi ==
4715 	    assoc->sadb_sa_spi) {
4716 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4717 		return (EINVAL);
4718 	}
4719 
4720 	/*
4721 	 * Assume for now that the spi value provided in the SADB_UPDATE
4722 	 * message was valid, update the SA with its pair spi value.
4723 	 * If the spi turns out to be bogus or the SA no longer exists
4724 	 * then this will be detected when the reverse update is made
4725 	 * below.
4726 	 */
4727 	mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4728 	ipsapp->ipsap_sa_ptr->ipsa_flags |= IPSA_F_PAIRED;
4729 	ipsapp->ipsap_sa_ptr->ipsa_otherspi = pair_ext->sadb_x_pair_spi;
4730 	mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4731 
4732 	/*
4733 	 * After updating the ipsa_otherspi element of the SA, get_ipsa_pair()
4734 	 * should now return pointers to the SA *AND* its pair, if this is not
4735 	 * the case, the "otherspi" either did not exist or was deleted. Also
4736 	 * check that "otherspi" is not already paired. If everything looks
4737 	 * good, complete the update. IPSA_REFRELE the first pair_pointer
4738 	 * after this update to ensure its not deleted until we are done.
4739 	 */
4740 	error = get_ipsa_pair(sq, &oipsapp, diagnostic);
4741 	if (error != 0) {
4742 		/*
4743 		 * This should never happen, calling function still has
4744 		 * IPSA_REFHELD on the SA we just updated.
4745 		 */
4746 		return (error);	/* XXX EINVAL instead of ESRCH? */
4747 	}
4748 
4749 	if (oipsapp.ipsap_psa_ptr == NULL) {
4750 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4751 		error = EINVAL;
4752 		undo_pair = B_TRUE;
4753 	} else {
4754 		ipsa_flags = oipsapp.ipsap_psa_ptr->ipsa_flags;
4755 		if ((oipsapp.ipsap_psa_ptr->ipsa_state == IPSA_STATE_DEAD) ||
4756 		    (oipsapp.ipsap_psa_ptr->ipsa_state == IPSA_STATE_DYING)) {
4757 			/* Its dead Jim! */
4758 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4759 			undo_pair = B_TRUE;
4760 		} else if ((ipsa_flags & (IPSA_F_OUTBOUND | IPSA_F_INBOUND)) ==
4761 		    (IPSA_F_OUTBOUND | IPSA_F_INBOUND)) {
4762 			/* This SA is in both hashtables. */
4763 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4764 			undo_pair = B_TRUE;
4765 		} else if (ipsa_flags & IPSA_F_PAIRED) {
4766 			/* This SA is already paired with another. */
4767 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_ALREADY;
4768 			undo_pair = B_TRUE;
4769 		}
4770 	}
4771 
4772 	if (undo_pair) {
4773 		/* The pair SA does not exist. */
4774 		mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4775 		ipsapp->ipsap_sa_ptr->ipsa_flags &= ~IPSA_F_PAIRED;
4776 		ipsapp->ipsap_sa_ptr->ipsa_otherspi = 0;
4777 		mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4778 	} else {
4779 		mutex_enter(&oipsapp.ipsap_psa_ptr->ipsa_lock);
4780 		oipsapp.ipsap_psa_ptr->ipsa_otherspi = assoc->sadb_sa_spi;
4781 		oipsapp.ipsap_psa_ptr->ipsa_flags |= IPSA_F_PAIRED;
4782 		mutex_exit(&oipsapp.ipsap_psa_ptr->ipsa_lock);
4783 	}
4784 
4785 	destroy_ipsa_pair(&oipsapp);
4786 	return (error);
4787 }
4788 
4789 /*
4790  * The following functions deal with ACQUIRE LISTS.  An ACQUIRE list is
4791  * a list of outstanding SADB_ACQUIRE messages.	 If ipsec_getassocbyconn() fails
4792  * for an outbound datagram, that datagram is queued up on an ACQUIRE record,
4793  * and an SADB_ACQUIRE message is sent up.  Presumably, a user-space key
4794  * management daemon will process the ACQUIRE, use a SADB_GETSPI to reserve
4795  * an SPI value and a larval SA, then SADB_UPDATE the larval SA, and ADD the
4796  * other direction's SA.
4797  */
4798 
4799 /*
4800  * Check the ACQUIRE lists.  If there's an existing ACQUIRE record,
4801  * grab it, lock it, and return it.  Otherwise return NULL.
4802  *
4803  * XXX MLS number of arguments getting unwieldy here
4804  */
4805 static ipsacq_t *
4806 sadb_checkacquire(iacqf_t *bucket, ipsec_action_t *ap, ipsec_policy_t *pp,
4807     uint32_t *src, uint32_t *dst, uint32_t *isrc, uint32_t *idst,
4808     uint64_t unique_id, ts_label_t *tsl)
4809 {
4810 	ipsacq_t *walker;
4811 	sa_family_t fam;
4812 	uint32_t blank_address[4] = {0, 0, 0, 0};
4813 
4814 	if (isrc == NULL) {
4815 		ASSERT(idst == NULL);
4816 		isrc = idst = blank_address;
4817 	}
4818 
4819 	/*
4820 	 * Scan list for duplicates.  Check for UNIQUE, src/dest, policy.
4821 	 *
4822 	 * XXX May need search for duplicates based on other things too!
4823 	 */
4824 	for (walker = bucket->iacqf_ipsacq; walker != NULL;
4825 	    walker = walker->ipsacq_next) {
4826 		mutex_enter(&walker->ipsacq_lock);
4827 		fam = walker->ipsacq_addrfam;
4828 		if (IPSA_ARE_ADDR_EQUAL(dst, walker->ipsacq_dstaddr, fam) &&
4829 		    IPSA_ARE_ADDR_EQUAL(src, walker->ipsacq_srcaddr, fam) &&
4830 		    ip_addr_match((uint8_t *)isrc, walker->ipsacq_innersrcpfx,
4831 		    (in6_addr_t *)walker->ipsacq_innersrc) &&
4832 		    ip_addr_match((uint8_t *)idst, walker->ipsacq_innerdstpfx,
4833 		    (in6_addr_t *)walker->ipsacq_innerdst) &&
4834 		    (ap == walker->ipsacq_act) &&
4835 		    (pp == walker->ipsacq_policy) &&
4836 		    /* XXX do deep compares of ap/pp? */
4837 		    (unique_id == walker->ipsacq_unique_id) &&
4838 		    (ipsec_label_match(tsl, walker->ipsacq_tsl)))
4839 			break;			/* everything matched */
4840 		mutex_exit(&walker->ipsacq_lock);
4841 	}
4842 
4843 	return (walker);
4844 }
4845 
4846 /*
4847  * For this mblk, insert a new acquire record.  Assume bucket contains addrs
4848  * of all of the same length.  Give up (and drop) if memory
4849  * cannot be allocated for a new one; otherwise, invoke callback to
4850  * send the acquire up..
4851  *
4852  * In cases where we need both AH and ESP, add the SA to the ESP ACQUIRE
4853  * list.  The ah_add_sa_finish() routines can look at the packet's attached
4854  * attributes and handle this case specially.
4855  */
4856 void
4857 sadb_acquire(mblk_t *datamp, ip_xmit_attr_t *ixa, boolean_t need_ah,
4858     boolean_t need_esp)
4859 {
4860 	mblk_t	*asyncmp;
4861 	sadbp_t *spp;
4862 	sadb_t *sp;
4863 	ipsacq_t *newbie;
4864 	iacqf_t *bucket;
4865 	mblk_t *extended;
4866 	ipha_t *ipha = (ipha_t *)datamp->b_rptr;
4867 	ip6_t *ip6h = (ip6_t *)datamp->b_rptr;
4868 	uint32_t *src, *dst, *isrc, *idst;
4869 	ipsec_policy_t *pp = ixa->ixa_ipsec_policy;
4870 	ipsec_action_t *ap = ixa->ixa_ipsec_action;
4871 	sa_family_t af;
4872 	int hashoffset;
4873 	uint32_t seq;
4874 	uint64_t unique_id = 0;
4875 	ipsec_selector_t sel;
4876 	boolean_t tunnel_mode = (ixa->ixa_flags & IXAF_IPSEC_TUNNEL) != 0;
4877 	ts_label_t 	*tsl = NULL;
4878 	netstack_t	*ns = ixa->ixa_ipst->ips_netstack;
4879 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
4880 	sadb_sens_t 	*sens = NULL;
4881 	int 		sens_len;
4882 
4883 	ASSERT((pp != NULL) || (ap != NULL));
4884 
4885 	ASSERT(need_ah != NULL || need_esp != NULL);
4886 
4887 	/* Assign sadb pointers */
4888 	if (need_esp) { /* ESP for AH+ESP */
4889 		ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
4890 
4891 		spp = &espstack->esp_sadb;
4892 	} else {
4893 		ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
4894 
4895 		spp = &ahstack->ah_sadb;
4896 	}
4897 	sp = (ixa->ixa_flags & IXAF_IS_IPV4) ? &spp->s_v4 : &spp->s_v6;
4898 
4899 	if (is_system_labeled())
4900 		tsl = ixa->ixa_tsl;
4901 
4902 	if (ap == NULL)
4903 		ap = pp->ipsp_act;
4904 
4905 	ASSERT(ap != NULL);
4906 
4907 	if (ap->ipa_act.ipa_apply.ipp_use_unique || tunnel_mode)
4908 		unique_id = SA_FORM_UNIQUE_ID(ixa);
4909 
4910 	/*
4911 	 * Set up an ACQUIRE record.
4912 	 *
4913 	 * Immediately, make sure the ACQUIRE sequence number doesn't slip
4914 	 * below the lowest point allowed in the kernel.  (In other words,
4915 	 * make sure the high bit on the sequence number is set.)
4916 	 */
4917 
4918 	seq = keysock_next_seq(ns) | IACQF_LOWEST_SEQ;
4919 
4920 	if (IPH_HDR_VERSION(ipha) == IP_VERSION) {
4921 		src = (uint32_t *)&ipha->ipha_src;
4922 		dst = (uint32_t *)&ipha->ipha_dst;
4923 		af = AF_INET;
4924 		hashoffset = OUTBOUND_HASH_V4(sp, ipha->ipha_dst);
4925 		ASSERT(ixa->ixa_flags & IXAF_IS_IPV4);
4926 	} else {
4927 		ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
4928 		src = (uint32_t *)&ip6h->ip6_src;
4929 		dst = (uint32_t *)&ip6h->ip6_dst;
4930 		af = AF_INET6;
4931 		hashoffset = OUTBOUND_HASH_V6(sp, ip6h->ip6_dst);
4932 		ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4));
4933 	}
4934 
4935 	if (tunnel_mode) {
4936 		if (pp == NULL) {
4937 			/*
4938 			 * Tunnel mode with no policy pointer means this is a
4939 			 * reflected ICMP (like a ECHO REQUEST) that came in
4940 			 * with self-encapsulated protection.  Until we better
4941 			 * support this, drop the packet.
4942 			 */
4943 			ip_drop_packet(datamp, B_FALSE, NULL,
4944 			    DROPPER(ipss, ipds_spd_got_selfencap),
4945 			    &ipss->ipsec_spd_dropper);
4946 			return;
4947 		}
4948 		/* Snag inner addresses. */
4949 		isrc = ixa->ixa_ipsec_insrc;
4950 		idst = ixa->ixa_ipsec_indst;
4951 	} else {
4952 		isrc = idst = NULL;
4953 	}
4954 
4955 	/*
4956 	 * Check buckets to see if there is an existing entry.  If so,
4957 	 * grab it.  sadb_checkacquire locks newbie if found.
4958 	 */
4959 	bucket = &(sp->sdb_acq[hashoffset]);
4960 	mutex_enter(&bucket->iacqf_lock);
4961 	newbie = sadb_checkacquire(bucket, ap, pp, src, dst, isrc, idst,
4962 	    unique_id, tsl);
4963 
4964 	if (newbie == NULL) {
4965 		/*
4966 		 * Otherwise, allocate a new one.
4967 		 */
4968 		newbie = kmem_zalloc(sizeof (*newbie), KM_NOSLEEP);
4969 		if (newbie == NULL) {
4970 			mutex_exit(&bucket->iacqf_lock);
4971 			ip_drop_packet(datamp, B_FALSE, NULL,
4972 			    DROPPER(ipss, ipds_sadb_acquire_nomem),
4973 			    &ipss->ipsec_sadb_dropper);
4974 			return;
4975 		}
4976 		newbie->ipsacq_policy = pp;
4977 		if (pp != NULL) {
4978 			IPPOL_REFHOLD(pp);
4979 		}
4980 		IPACT_REFHOLD(ap);
4981 		newbie->ipsacq_act = ap;
4982 		newbie->ipsacq_linklock = &bucket->iacqf_lock;
4983 		newbie->ipsacq_next = bucket->iacqf_ipsacq;
4984 		newbie->ipsacq_ptpn = &bucket->iacqf_ipsacq;
4985 		if (newbie->ipsacq_next != NULL)
4986 			newbie->ipsacq_next->ipsacq_ptpn = &newbie->ipsacq_next;
4987 
4988 		bucket->iacqf_ipsacq = newbie;
4989 		mutex_init(&newbie->ipsacq_lock, NULL, MUTEX_DEFAULT, NULL);
4990 		mutex_enter(&newbie->ipsacq_lock);
4991 	}
4992 
4993 	/*
4994 	 * XXX MLS does it actually help us to drop the bucket lock here?
4995 	 * we have inserted a half-built, locked acquire record into the
4996 	 * bucket.  any competing thread will now be able to lock the bucket
4997 	 * to scan it, but will immediately pile up on the new acquire
4998 	 * record's lock; I don't think we gain anything here other than to
4999 	 * disperse blame for lock contention.
5000 	 *
5001 	 * we might be able to dispense with acquire record locks entirely..
5002 	 * just use the bucket locks..
5003 	 */
5004 
5005 	mutex_exit(&bucket->iacqf_lock);
5006 
5007 	/*
5008 	 * This assert looks silly for now, but we may need to enter newbie's
5009 	 * mutex during a search.
5010 	 */
5011 	ASSERT(MUTEX_HELD(&newbie->ipsacq_lock));
5012 
5013 	/*
5014 	 * Make the ip_xmit_attr_t into something we can queue.
5015 	 * If no memory it frees datamp.
5016 	 */
5017 	asyncmp = ip_xmit_attr_to_mblk(ixa);
5018 	if (asyncmp != NULL)
5019 		linkb(asyncmp, datamp);
5020 
5021 	/* Queue up packet.  Use b_next. */
5022 
5023 	if (asyncmp == NULL) {
5024 		/* Statistics for allocation failure */
5025 		if (ixa->ixa_flags & IXAF_IS_IPV4) {
5026 			BUMP_MIB(&ixa->ixa_ipst->ips_ip_mib,
5027 			    ipIfStatsOutDiscards);
5028 		} else {
5029 			BUMP_MIB(&ixa->ixa_ipst->ips_ip6_mib,
5030 			    ipIfStatsOutDiscards);
5031 		}
5032 		ip_drop_output("No memory for asyncmp", datamp, NULL);
5033 		freemsg(datamp);
5034 	} else if (newbie->ipsacq_numpackets == 0) {
5035 		/* First one. */
5036 		newbie->ipsacq_mp = asyncmp;
5037 		newbie->ipsacq_numpackets = 1;
5038 		newbie->ipsacq_expire = gethrestime_sec();
5039 		/*
5040 		 * Extended ACQUIRE with both AH+ESP will use ESP's timeout
5041 		 * value.
5042 		 */
5043 		newbie->ipsacq_expire += *spp->s_acquire_timeout;
5044 		newbie->ipsacq_seq = seq;
5045 		newbie->ipsacq_addrfam = af;
5046 
5047 		newbie->ipsacq_srcport = ixa->ixa_ipsec_src_port;
5048 		newbie->ipsacq_dstport = ixa->ixa_ipsec_dst_port;
5049 		newbie->ipsacq_icmp_type = ixa->ixa_ipsec_icmp_type;
5050 		newbie->ipsacq_icmp_code = ixa->ixa_ipsec_icmp_code;
5051 		if (tunnel_mode) {
5052 			newbie->ipsacq_inneraddrfam = ixa->ixa_ipsec_inaf;
5053 			newbie->ipsacq_proto = ixa->ixa_ipsec_inaf == AF_INET6 ?
5054 			    IPPROTO_IPV6 : IPPROTO_ENCAP;
5055 			newbie->ipsacq_innersrcpfx = ixa->ixa_ipsec_insrcpfx;
5056 			newbie->ipsacq_innerdstpfx = ixa->ixa_ipsec_indstpfx;
5057 			IPSA_COPY_ADDR(newbie->ipsacq_innersrc,
5058 			    ixa->ixa_ipsec_insrc, ixa->ixa_ipsec_inaf);
5059 			IPSA_COPY_ADDR(newbie->ipsacq_innerdst,
5060 			    ixa->ixa_ipsec_indst, ixa->ixa_ipsec_inaf);
5061 		} else {
5062 			newbie->ipsacq_proto = ixa->ixa_ipsec_proto;
5063 		}
5064 		newbie->ipsacq_unique_id = unique_id;
5065 
5066 		if (ixa->ixa_tsl != NULL) {
5067 			label_hold(ixa->ixa_tsl);
5068 			newbie->ipsacq_tsl = ixa->ixa_tsl;
5069 		}
5070 	} else {
5071 		/* Scan to the end of the list & insert. */
5072 		mblk_t *lastone = newbie->ipsacq_mp;
5073 
5074 		while (lastone->b_next != NULL)
5075 			lastone = lastone->b_next;
5076 		lastone->b_next = asyncmp;
5077 		if (newbie->ipsacq_numpackets++ == ipsacq_maxpackets) {
5078 			newbie->ipsacq_numpackets = ipsacq_maxpackets;
5079 			lastone = newbie->ipsacq_mp;
5080 			newbie->ipsacq_mp = lastone->b_next;
5081 			lastone->b_next = NULL;
5082 
5083 			/* Freeing the async message */
5084 			lastone = ip_xmit_attr_free_mblk(lastone);
5085 			ip_drop_packet(lastone, B_FALSE, NULL,
5086 			    DROPPER(ipss, ipds_sadb_acquire_toofull),
5087 			    &ipss->ipsec_sadb_dropper);
5088 		} else {
5089 			IP_ACQUIRE_STAT(ipss, qhiwater,
5090 			    newbie->ipsacq_numpackets);
5091 		}
5092 	}
5093 
5094 	/*
5095 	 * Reset addresses.  Set them to the most recently added mblk chain,
5096 	 * so that the address pointers in the acquire record will point
5097 	 * at an mblk still attached to the acquire list.
5098 	 */
5099 
5100 	newbie->ipsacq_srcaddr = src;
5101 	newbie->ipsacq_dstaddr = dst;
5102 
5103 	/*
5104 	 * If the acquire record has more than one queued packet, we've
5105 	 * already sent an ACQUIRE, and don't need to repeat ourself.
5106 	 */
5107 	if (newbie->ipsacq_seq != seq || newbie->ipsacq_numpackets > 1) {
5108 		/* I have an acquire outstanding already! */
5109 		mutex_exit(&newbie->ipsacq_lock);
5110 		return;
5111 	}
5112 
5113 	if (!keysock_extended_reg(ns))
5114 		goto punt_extended;
5115 	/*
5116 	 * Construct an extended ACQUIRE.  There are logging
5117 	 * opportunities here in failure cases.
5118 	 */
5119 	bzero(&sel, sizeof (sel));
5120 	sel.ips_isv4 = (ixa->ixa_flags & IXAF_IS_IPV4) != 0;
5121 	if (tunnel_mode) {
5122 		sel.ips_protocol = (ixa->ixa_ipsec_inaf == AF_INET) ?
5123 		    IPPROTO_ENCAP : IPPROTO_IPV6;
5124 	} else {
5125 		sel.ips_protocol = ixa->ixa_ipsec_proto;
5126 		sel.ips_local_port = ixa->ixa_ipsec_src_port;
5127 		sel.ips_remote_port = ixa->ixa_ipsec_dst_port;
5128 	}
5129 	sel.ips_icmp_type = ixa->ixa_ipsec_icmp_type;
5130 	sel.ips_icmp_code = ixa->ixa_ipsec_icmp_code;
5131 	sel.ips_is_icmp_inv_acq = 0;
5132 	if (af == AF_INET) {
5133 		sel.ips_local_addr_v4 = ipha->ipha_src;
5134 		sel.ips_remote_addr_v4 = ipha->ipha_dst;
5135 	} else {
5136 		sel.ips_local_addr_v6 = ip6h->ip6_src;
5137 		sel.ips_remote_addr_v6 = ip6h->ip6_dst;
5138 	}
5139 
5140 	extended = sadb_keysock_out(0);
5141 	if (extended == NULL)
5142 		goto punt_extended;
5143 
5144 	if (ixa->ixa_tsl != NULL) {
5145 		/*
5146 		 * XXX MLS correct condition here?
5147 		 * XXX MLS other credential attributes in acquire?
5148 		 * XXX malloc failure?  don't fall back to original?
5149 		 */
5150 		sens = sadb_make_sens_ext(ixa->ixa_tsl, &sens_len);
5151 
5152 		if (sens == NULL) {
5153 			freeb(extended);
5154 			goto punt_extended;
5155 		}
5156 	}
5157 
5158 	extended->b_cont = sadb_extended_acquire(&sel, pp, ap, tunnel_mode,
5159 	    seq, 0, sens, ns);
5160 
5161 	if (sens != NULL)
5162 		kmem_free(sens, sens_len);
5163 
5164 	if (extended->b_cont == NULL) {
5165 		freeb(extended);
5166 		goto punt_extended;
5167 	}
5168 
5169 	/*
5170 	 * Send an ACQUIRE message (and possible an extended ACQUIRE) based on
5171 	 * this new record.  The send-acquire callback assumes that acqrec is
5172 	 * already locked.
5173 	 */
5174 	(*spp->s_acqfn)(newbie, extended, ns);
5175 	return;
5176 
5177 punt_extended:
5178 	(*spp->s_acqfn)(newbie, NULL, ns);
5179 }
5180 
5181 /*
5182  * Unlink and free an acquire record.
5183  */
5184 void
5185 sadb_destroy_acquire(ipsacq_t *acqrec, netstack_t *ns)
5186 {
5187 	mblk_t		*mp;
5188 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
5189 
5190 	ASSERT(MUTEX_HELD(acqrec->ipsacq_linklock));
5191 
5192 	if (acqrec->ipsacq_policy != NULL) {
5193 		IPPOL_REFRELE(acqrec->ipsacq_policy);
5194 	}
5195 	if (acqrec->ipsacq_act != NULL) {
5196 		IPACT_REFRELE(acqrec->ipsacq_act);
5197 	}
5198 
5199 	/* Unlink */
5200 	*(acqrec->ipsacq_ptpn) = acqrec->ipsacq_next;
5201 	if (acqrec->ipsacq_next != NULL)
5202 		acqrec->ipsacq_next->ipsacq_ptpn = acqrec->ipsacq_ptpn;
5203 
5204 	if (acqrec->ipsacq_tsl != NULL) {
5205 		label_rele(acqrec->ipsacq_tsl);
5206 		acqrec->ipsacq_tsl = NULL;
5207 	}
5208 
5209 	/*
5210 	 * Free hanging mp's.
5211 	 *
5212 	 * XXX Instead of freemsg(), perhaps use IPSEC_REQ_FAILED.
5213 	 */
5214 
5215 	mutex_enter(&acqrec->ipsacq_lock);
5216 	while (acqrec->ipsacq_mp != NULL) {
5217 		mp = acqrec->ipsacq_mp;
5218 		acqrec->ipsacq_mp = mp->b_next;
5219 		mp->b_next = NULL;
5220 		/* Freeing the async message */
5221 		mp = ip_xmit_attr_free_mblk(mp);
5222 		ip_drop_packet(mp, B_FALSE, NULL,
5223 		    DROPPER(ipss, ipds_sadb_acquire_timeout),
5224 		    &ipss->ipsec_sadb_dropper);
5225 	}
5226 	mutex_exit(&acqrec->ipsacq_lock);
5227 
5228 	/* Free */
5229 	mutex_destroy(&acqrec->ipsacq_lock);
5230 	kmem_free(acqrec, sizeof (*acqrec));
5231 }
5232 
5233 /*
5234  * Destroy an acquire list fanout.
5235  */
5236 static void
5237 sadb_destroy_acqlist(iacqf_t **listp, uint_t numentries, boolean_t forever,
5238     netstack_t *ns)
5239 {
5240 	int i;
5241 	iacqf_t *list = *listp;
5242 
5243 	if (list == NULL)
5244 		return;
5245 
5246 	for (i = 0; i < numentries; i++) {
5247 		mutex_enter(&(list[i].iacqf_lock));
5248 		while (list[i].iacqf_ipsacq != NULL)
5249 			sadb_destroy_acquire(list[i].iacqf_ipsacq, ns);
5250 		mutex_exit(&(list[i].iacqf_lock));
5251 		if (forever)
5252 			mutex_destroy(&(list[i].iacqf_lock));
5253 	}
5254 
5255 	if (forever) {
5256 		*listp = NULL;
5257 		kmem_free(list, numentries * sizeof (*list));
5258 	}
5259 }
5260 
5261 /*
5262  * Create an algorithm descriptor for an extended ACQUIRE.  Filter crypto
5263  * framework's view of reality vs. IPsec's.  EF's wins, BTW.
5264  */
5265 static uint8_t *
5266 sadb_new_algdesc(uint8_t *start, uint8_t *limit,
5267     sadb_x_ecomb_t *ecomb, uint8_t satype, uint8_t algtype,
5268     uint8_t alg, uint16_t minbits, uint16_t maxbits, ipsec_stack_t *ipss)
5269 {
5270 	uint8_t *cur = start;
5271 	ipsec_alginfo_t *algp;
5272 	sadb_x_algdesc_t *algdesc = (sadb_x_algdesc_t *)cur;
5273 
5274 	cur += sizeof (*algdesc);
5275 	if (cur >= limit)
5276 		return (NULL);
5277 
5278 	ecomb->sadb_x_ecomb_numalgs++;
5279 
5280 	/*
5281 	 * Normalize vs. crypto framework's limits.  This way, you can specify
5282 	 * a stronger policy, and when the framework loads a stronger version,
5283 	 * you can just keep plowing w/o rewhacking your SPD.
5284 	 */
5285 	rw_enter(&ipss->ipsec_alg_lock, RW_READER);
5286 	algp = ipss->ipsec_alglists[(algtype == SADB_X_ALGTYPE_AUTH) ?
5287 	    IPSEC_ALG_AUTH : IPSEC_ALG_ENCR][alg];
5288 	if (algp == NULL) {
5289 		rw_exit(&ipss->ipsec_alg_lock);
5290 		return (NULL);	/* Algorithm doesn't exist.  Fail gracefully. */
5291 	}
5292 	if (minbits < algp->alg_ef_minbits)
5293 		minbits = algp->alg_ef_minbits;
5294 	if (maxbits > algp->alg_ef_maxbits)
5295 		maxbits = algp->alg_ef_maxbits;
5296 	rw_exit(&ipss->ipsec_alg_lock);
5297 
5298 	algdesc->sadb_x_algdesc_reserved = SADB_8TO1(algp->alg_saltlen);
5299 	algdesc->sadb_x_algdesc_satype = satype;
5300 	algdesc->sadb_x_algdesc_algtype = algtype;
5301 	algdesc->sadb_x_algdesc_alg = alg;
5302 	algdesc->sadb_x_algdesc_minbits = minbits;
5303 	algdesc->sadb_x_algdesc_maxbits = maxbits;
5304 
5305 	return (cur);
5306 }
5307 
5308 /*
5309  * Convert the given ipsec_action_t into an ecomb starting at *ecomb
5310  * which must fit before *limit
5311  *
5312  * return NULL if we ran out of room or a pointer to the end of the ecomb.
5313  */
5314 static uint8_t *
5315 sadb_action_to_ecomb(uint8_t *start, uint8_t *limit, ipsec_action_t *act,
5316     netstack_t *ns)
5317 {
5318 	uint8_t *cur = start;
5319 	sadb_x_ecomb_t *ecomb = (sadb_x_ecomb_t *)cur;
5320 	ipsec_prot_t *ipp;
5321 	ipsec_stack_t *ipss = ns->netstack_ipsec;
5322 
5323 	cur += sizeof (*ecomb);
5324 	if (cur >= limit)
5325 		return (NULL);
5326 
5327 	ASSERT(act->ipa_act.ipa_type == IPSEC_ACT_APPLY);
5328 
5329 	ipp = &act->ipa_act.ipa_apply;
5330 
5331 	ecomb->sadb_x_ecomb_numalgs = 0;
5332 	ecomb->sadb_x_ecomb_reserved = 0;
5333 	ecomb->sadb_x_ecomb_reserved2 = 0;
5334 	/*
5335 	 * No limits on allocations, since we really don't support that
5336 	 * concept currently.
5337 	 */
5338 	ecomb->sadb_x_ecomb_soft_allocations = 0;
5339 	ecomb->sadb_x_ecomb_hard_allocations = 0;
5340 
5341 	/*
5342 	 * XXX TBD: Policy or global parameters will eventually be
5343 	 * able to fill in some of these.
5344 	 */
5345 	ecomb->sadb_x_ecomb_flags = 0;
5346 	ecomb->sadb_x_ecomb_soft_bytes = 0;
5347 	ecomb->sadb_x_ecomb_hard_bytes = 0;
5348 	ecomb->sadb_x_ecomb_soft_addtime = 0;
5349 	ecomb->sadb_x_ecomb_hard_addtime = 0;
5350 	ecomb->sadb_x_ecomb_soft_usetime = 0;
5351 	ecomb->sadb_x_ecomb_hard_usetime = 0;
5352 
5353 	if (ipp->ipp_use_ah) {
5354 		cur = sadb_new_algdesc(cur, limit, ecomb,
5355 		    SADB_SATYPE_AH, SADB_X_ALGTYPE_AUTH, ipp->ipp_auth_alg,
5356 		    ipp->ipp_ah_minbits, ipp->ipp_ah_maxbits, ipss);
5357 		if (cur == NULL)
5358 			return (NULL);
5359 		ipsecah_fill_defs(ecomb, ns);
5360 	}
5361 
5362 	if (ipp->ipp_use_esp) {
5363 		if (ipp->ipp_use_espa) {
5364 			cur = sadb_new_algdesc(cur, limit, ecomb,
5365 			    SADB_SATYPE_ESP, SADB_X_ALGTYPE_AUTH,
5366 			    ipp->ipp_esp_auth_alg,
5367 			    ipp->ipp_espa_minbits,
5368 			    ipp->ipp_espa_maxbits, ipss);
5369 			if (cur == NULL)
5370 				return (NULL);
5371 		}
5372 
5373 		cur = sadb_new_algdesc(cur, limit, ecomb,
5374 		    SADB_SATYPE_ESP, SADB_X_ALGTYPE_CRYPT,
5375 		    ipp->ipp_encr_alg,
5376 		    ipp->ipp_espe_minbits,
5377 		    ipp->ipp_espe_maxbits, ipss);
5378 		if (cur == NULL)
5379 			return (NULL);
5380 		/* Fill in lifetimes if and only if AH didn't already... */
5381 		if (!ipp->ipp_use_ah)
5382 			ipsecesp_fill_defs(ecomb, ns);
5383 	}
5384 
5385 	return (cur);
5386 }
5387 
5388 #include <sys/tsol/label_macro.h> /* XXX should not need this */
5389 
5390 /*
5391  * From a cred_t, construct a sensitivity label extension
5392  *
5393  * We send up a fixed-size sensitivity label bitmap, and are perhaps
5394  * overly chummy with the underlying data structures here.
5395  */
5396 
5397 /* ARGSUSED */
5398 int
5399 sadb_sens_len_from_label(ts_label_t *tsl)
5400 {
5401 	int baselen = sizeof (sadb_sens_t) + _C_LEN * 4;
5402 	return (roundup(baselen, sizeof (uint64_t)));
5403 }
5404 
5405 void
5406 sadb_sens_from_label(sadb_sens_t *sens, int exttype, ts_label_t *tsl,
5407     int senslen)
5408 {
5409 	uint8_t *bitmap;
5410 	bslabel_t *sl;
5411 
5412 	/* LINTED */
5413 	ASSERT((_C_LEN & 1) == 0);
5414 	ASSERT((senslen & 7) == 0);
5415 
5416 	sl = label2bslabel(tsl);
5417 
5418 	sens->sadb_sens_exttype = exttype;
5419 	sens->sadb_sens_len = SADB_8TO64(senslen);
5420 
5421 	sens->sadb_sens_dpd = tsl->tsl_doi;
5422 	sens->sadb_sens_sens_level = LCLASS(sl);
5423 	sens->sadb_sens_integ_level = 0; /* TBD */
5424 	sens->sadb_sens_sens_len = _C_LEN >> 1;
5425 	sens->sadb_sens_integ_len = 0; /* TBD */
5426 	sens->sadb_x_sens_flags = 0;
5427 
5428 	bitmap = (uint8_t *)(sens + 1);
5429 	bcopy(&(((_bslabel_impl_t *)sl)->compartments), bitmap, _C_LEN * 4);
5430 }
5431 
5432 static sadb_sens_t *
5433 sadb_make_sens_ext(ts_label_t *tsl, int *len)
5434 {
5435 	/* XXX allocation failure? */
5436 	int sens_len = sadb_sens_len_from_label(tsl);
5437 
5438 	sadb_sens_t *sens = kmem_alloc(sens_len, KM_SLEEP);
5439 
5440 	sadb_sens_from_label(sens, SADB_EXT_SENSITIVITY, tsl, sens_len);
5441 
5442 	*len = sens_len;
5443 
5444 	return (sens);
5445 }
5446 
5447 /*
5448  * Okay, how do we report errors/invalid labels from this?
5449  * With a special designated "not a label" cred_t ?
5450  */
5451 /* ARGSUSED */
5452 ts_label_t *
5453 sadb_label_from_sens(sadb_sens_t *sens, uint64_t *bitmap)
5454 {
5455 	int bitmap_len = SADB_64TO8(sens->sadb_sens_sens_len);
5456 	bslabel_t sl;
5457 	ts_label_t *tsl;
5458 
5459 	if (sens->sadb_sens_integ_level != 0)
5460 		return (NULL);
5461 	if (sens->sadb_sens_integ_len != 0)
5462 		return (NULL);
5463 	if (bitmap_len > _C_LEN * 4)
5464 		return (NULL);
5465 
5466 	bsllow(&sl);
5467 	LCLASS_SET((_bslabel_impl_t *)&sl, sens->sadb_sens_sens_level);
5468 	bcopy(bitmap, &((_bslabel_impl_t *)&sl)->compartments,
5469 	    bitmap_len);
5470 
5471 	tsl = labelalloc(&sl, sens->sadb_sens_dpd, KM_NOSLEEP);
5472 	if (tsl == NULL)
5473 		return (NULL);
5474 
5475 	if (sens->sadb_x_sens_flags & SADB_X_SENS_UNLABELED)
5476 		tsl->tsl_flags |= TSLF_UNLABELED;
5477 	return (tsl);
5478 }
5479 
5480 /* End XXX label-library-leakage */
5481 
5482 /*
5483  * Construct an extended ACQUIRE message based on a selector and the resulting
5484  * IPsec action.
5485  *
5486  * NOTE: This is used by both inverse ACQUIRE and actual ACQUIRE
5487  * generation. As a consequence, expect this function to evolve
5488  * rapidly.
5489  */
5490 static mblk_t *
5491 sadb_extended_acquire(ipsec_selector_t *sel, ipsec_policy_t *pol,
5492     ipsec_action_t *act, boolean_t tunnel_mode, uint32_t seq, uint32_t pid,
5493     sadb_sens_t *sens, netstack_t *ns)
5494 {
5495 	mblk_t *mp;
5496 	sadb_msg_t *samsg;
5497 	uint8_t *start, *cur, *end;
5498 	uint32_t *saddrptr, *daddrptr;
5499 	sa_family_t af;
5500 	sadb_prop_t *eprop;
5501 	ipsec_action_t *ap, *an;
5502 	ipsec_selkey_t *ipsl;
5503 	uint8_t proto, pfxlen;
5504 	uint16_t lport, rport;
5505 	uint32_t kmp, kmc;
5506 
5507 	/*
5508 	 * Find the action we want sooner rather than later..
5509 	 */
5510 	an = NULL;
5511 	if (pol == NULL) {
5512 		ap = act;
5513 	} else {
5514 		ap = pol->ipsp_act;
5515 
5516 		if (ap != NULL)
5517 			an = ap->ipa_next;
5518 	}
5519 
5520 	/*
5521 	 * Just take a swag for the allocation for now.	 We can always
5522 	 * alter it later.
5523 	 */
5524 #define	SADB_EXTENDED_ACQUIRE_SIZE	4096
5525 	mp = allocb(SADB_EXTENDED_ACQUIRE_SIZE, BPRI_HI);
5526 	if (mp == NULL)
5527 		return (NULL);
5528 
5529 	start = mp->b_rptr;
5530 	end = start + SADB_EXTENDED_ACQUIRE_SIZE;
5531 
5532 	cur = start;
5533 
5534 	samsg = (sadb_msg_t *)cur;
5535 	cur += sizeof (*samsg);
5536 
5537 	samsg->sadb_msg_version = PF_KEY_V2;
5538 	samsg->sadb_msg_type = SADB_ACQUIRE;
5539 	samsg->sadb_msg_errno = 0;
5540 	samsg->sadb_msg_reserved = 0;
5541 	samsg->sadb_msg_satype = 0;
5542 	samsg->sadb_msg_seq = seq;
5543 	samsg->sadb_msg_pid = pid;
5544 
5545 	if (tunnel_mode) {
5546 		/*
5547 		 * Form inner address extensions based NOT on the inner
5548 		 * selectors (i.e. the packet data), but on the policy's
5549 		 * selector key (i.e. the policy's selector information).
5550 		 *
5551 		 * NOTE:  The position of IPv4 and IPv6 addresses is the
5552 		 * same in ipsec_selkey_t (unless the compiler does very
5553 		 * strange things with unions, consult your local C language
5554 		 * lawyer for details).
5555 		 */
5556 		ASSERT(pol != NULL);
5557 
5558 		ipsl = &(pol->ipsp_sel->ipsl_key);
5559 		if (ipsl->ipsl_valid & IPSL_IPV4) {
5560 			af = AF_INET;
5561 			ASSERT(sel->ips_protocol == IPPROTO_ENCAP);
5562 			ASSERT(!(ipsl->ipsl_valid & IPSL_IPV6));
5563 		} else {
5564 			af = AF_INET6;
5565 			ASSERT(sel->ips_protocol == IPPROTO_IPV6);
5566 			ASSERT(ipsl->ipsl_valid & IPSL_IPV6);
5567 		}
5568 
5569 		if (ipsl->ipsl_valid & IPSL_LOCAL_ADDR) {
5570 			saddrptr = (uint32_t *)(&ipsl->ipsl_local);
5571 			pfxlen = ipsl->ipsl_local_pfxlen;
5572 		} else {
5573 			saddrptr = (uint32_t *)(&ipv6_all_zeros);
5574 			pfxlen = 0;
5575 		}
5576 		/* XXX What about ICMP type/code? */
5577 		lport = (ipsl->ipsl_valid & IPSL_LOCAL_PORT) ?
5578 		    ipsl->ipsl_lport : 0;
5579 		proto = (ipsl->ipsl_valid & IPSL_PROTOCOL) ?
5580 		    ipsl->ipsl_proto : 0;
5581 
5582 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
5583 		    af, saddrptr, lport, proto, pfxlen);
5584 		if (cur == NULL) {
5585 			freeb(mp);
5586 			return (NULL);
5587 		}
5588 
5589 		if (ipsl->ipsl_valid & IPSL_REMOTE_ADDR) {
5590 			daddrptr = (uint32_t *)(&ipsl->ipsl_remote);
5591 			pfxlen = ipsl->ipsl_remote_pfxlen;
5592 		} else {
5593 			daddrptr = (uint32_t *)(&ipv6_all_zeros);
5594 			pfxlen = 0;
5595 		}
5596 		/* XXX What about ICMP type/code? */
5597 		rport = (ipsl->ipsl_valid & IPSL_REMOTE_PORT) ?
5598 		    ipsl->ipsl_rport : 0;
5599 
5600 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
5601 		    af, daddrptr, rport, proto, pfxlen);
5602 		if (cur == NULL) {
5603 			freeb(mp);
5604 			return (NULL);
5605 		}
5606 		/*
5607 		 * TODO  - if we go to 3408's dream of transport mode IP-in-IP
5608 		 * _with_ inner-packet address selectors, we'll need to further
5609 		 * distinguish tunnel mode here.  For now, having inner
5610 		 * addresses and/or ports is sufficient.
5611 		 *
5612 		 * Meanwhile, whack proto/ports to reflect IP-in-IP for the
5613 		 * outer addresses.
5614 		 */
5615 		proto = sel->ips_protocol;	/* Either _ENCAP or _IPV6 */
5616 		lport = rport = 0;
5617 	} else if ((ap != NULL) && (!ap->ipa_want_unique)) {
5618 		proto = 0;
5619 		lport = 0;
5620 		rport = 0;
5621 		if (pol != NULL) {
5622 			ipsl = &(pol->ipsp_sel->ipsl_key);
5623 			if (ipsl->ipsl_valid & IPSL_PROTOCOL)
5624 				proto = ipsl->ipsl_proto;
5625 			if (ipsl->ipsl_valid & IPSL_REMOTE_PORT)
5626 				rport = ipsl->ipsl_rport;
5627 			if (ipsl->ipsl_valid & IPSL_LOCAL_PORT)
5628 				lport = ipsl->ipsl_lport;
5629 		}
5630 	} else {
5631 		proto = sel->ips_protocol;
5632 		lport = sel->ips_local_port;
5633 		rport = sel->ips_remote_port;
5634 	}
5635 
5636 	af = sel->ips_isv4 ? AF_INET : AF_INET6;
5637 
5638 	/*
5639 	 * NOTE:  The position of IPv4 and IPv6 addresses is the same in
5640 	 * ipsec_selector_t.
5641 	 */
5642 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
5643 	    (uint32_t *)(&sel->ips_local_addr_v6), lport, proto, 0);
5644 
5645 	if (cur == NULL) {
5646 		freeb(mp);
5647 		return (NULL);
5648 	}
5649 
5650 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
5651 	    (uint32_t *)(&sel->ips_remote_addr_v6), rport, proto, 0);
5652 
5653 	if (cur == NULL) {
5654 		freeb(mp);
5655 		return (NULL);
5656 	}
5657 
5658 	if (sens != NULL) {
5659 		uint8_t *sensext = cur;
5660 		int senslen = SADB_64TO8(sens->sadb_sens_len);
5661 
5662 		cur += senslen;
5663 		if (cur > end) {
5664 			freeb(mp);
5665 			return (NULL);
5666 		}
5667 		bcopy(sens, sensext, senslen);
5668 	}
5669 
5670 	/*
5671 	 * This section will change a lot as policy evolves.
5672 	 * For now, it'll be relatively simple.
5673 	 */
5674 	eprop = (sadb_prop_t *)cur;
5675 	cur += sizeof (*eprop);
5676 	if (cur > end) {
5677 		/* no space left */
5678 		freeb(mp);
5679 		return (NULL);
5680 	}
5681 
5682 	eprop->sadb_prop_exttype = SADB_X_EXT_EPROP;
5683 	eprop->sadb_x_prop_ereserved = 0;
5684 	eprop->sadb_x_prop_numecombs = 0;
5685 	eprop->sadb_prop_replay = 32;	/* default */
5686 
5687 	kmc = kmp = 0;
5688 
5689 	for (; ap != NULL; ap = an) {
5690 		an = (pol != NULL) ? ap->ipa_next : NULL;
5691 
5692 		/*
5693 		 * Skip non-IPsec policies
5694 		 */
5695 		if (ap->ipa_act.ipa_type != IPSEC_ACT_APPLY)
5696 			continue;
5697 
5698 		if (ap->ipa_act.ipa_apply.ipp_km_proto)
5699 			kmp = ap->ipa_act.ipa_apply.ipp_km_proto;
5700 		if (ap->ipa_act.ipa_apply.ipp_km_cookie)
5701 			kmc = ap->ipa_act.ipa_apply.ipp_km_cookie;
5702 		if (ap->ipa_act.ipa_apply.ipp_replay_depth) {
5703 			eprop->sadb_prop_replay =
5704 			    ap->ipa_act.ipa_apply.ipp_replay_depth;
5705 		}
5706 
5707 		cur = sadb_action_to_ecomb(cur, end, ap, ns);
5708 		if (cur == NULL) { /* no space */
5709 			freeb(mp);
5710 			return (NULL);
5711 		}
5712 		eprop->sadb_x_prop_numecombs++;
5713 	}
5714 
5715 	if (eprop->sadb_x_prop_numecombs == 0) {
5716 		/*
5717 		 * This will happen if we fail to find a policy
5718 		 * allowing for IPsec processing.
5719 		 * Construct an error message.
5720 		 */
5721 		samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
5722 		samsg->sadb_msg_errno = ENOENT;
5723 		samsg->sadb_x_msg_diagnostic = 0;
5724 		return (mp);
5725 	}
5726 
5727 	if ((kmp != 0) || (kmc != 0)) {
5728 		cur = sadb_make_kmc_ext(cur, end, kmp, kmc);
5729 		if (cur == NULL) {
5730 			freeb(mp);
5731 			return (NULL);
5732 		}
5733 	}
5734 
5735 	eprop->sadb_prop_len = SADB_8TO64(cur - (uint8_t *)eprop);
5736 	samsg->sadb_msg_len = SADB_8TO64(cur - start);
5737 	mp->b_wptr = cur;
5738 
5739 	return (mp);
5740 }
5741 
5742 /*
5743  * Generic setup of an RFC 2367 ACQUIRE message.  Caller sets satype.
5744  *
5745  * NOTE: This function acquires alg_lock as a side-effect if-and-only-if we
5746  * succeed (i.e. return non-NULL).  Caller MUST release it.  This is to
5747  * maximize code consolidation while preventing algorithm changes from messing
5748  * with the callers finishing touches on the ACQUIRE itself.
5749  */
5750 mblk_t *
5751 sadb_setup_acquire(ipsacq_t *acqrec, uint8_t satype, ipsec_stack_t *ipss)
5752 {
5753 	uint_t allocsize;
5754 	mblk_t *pfkeymp, *msgmp;
5755 	sa_family_t af;
5756 	uint8_t *cur, *end;
5757 	sadb_msg_t *samsg;
5758 	uint16_t sport_typecode;
5759 	uint16_t dport_typecode;
5760 	uint8_t check_proto;
5761 	boolean_t tunnel_mode = (acqrec->ipsacq_inneraddrfam != 0);
5762 
5763 	ASSERT(MUTEX_HELD(&acqrec->ipsacq_lock));
5764 
5765 	pfkeymp = sadb_keysock_out(0);
5766 	if (pfkeymp == NULL)
5767 		return (NULL);
5768 
5769 	/*
5770 	 * First, allocate a basic ACQUIRE message
5771 	 */
5772 	allocsize = sizeof (sadb_msg_t) + sizeof (sadb_address_t) +
5773 	    sizeof (sadb_address_t) + sizeof (sadb_prop_t);
5774 
5775 	/* Make sure there's enough to cover both AF_INET and AF_INET6. */
5776 	allocsize += 2 * sizeof (struct sockaddr_in6);
5777 
5778 	rw_enter(&ipss->ipsec_alg_lock, RW_READER);
5779 	/* NOTE:  The lock is now held through to this function's return. */
5780 	allocsize += ipss->ipsec_nalgs[IPSEC_ALG_AUTH] *
5781 	    ipss->ipsec_nalgs[IPSEC_ALG_ENCR] * sizeof (sadb_comb_t);
5782 
5783 	if (tunnel_mode) {
5784 		/* Tunnel mode! */
5785 		allocsize += 2 * sizeof (sadb_address_t);
5786 		/* Enough to cover both AF_INET and AF_INET6. */
5787 		allocsize += 2 * sizeof (struct sockaddr_in6);
5788 	}
5789 
5790 	msgmp = allocb(allocsize, BPRI_HI);
5791 	if (msgmp == NULL) {
5792 		freeb(pfkeymp);
5793 		rw_exit(&ipss->ipsec_alg_lock);
5794 		return (NULL);
5795 	}
5796 
5797 	pfkeymp->b_cont = msgmp;
5798 	cur = msgmp->b_rptr;
5799 	end = cur + allocsize;
5800 	samsg = (sadb_msg_t *)cur;
5801 	cur += sizeof (sadb_msg_t);
5802 
5803 	af = acqrec->ipsacq_addrfam;
5804 	switch (af) {
5805 	case AF_INET:
5806 		check_proto = IPPROTO_ICMP;
5807 		break;
5808 	case AF_INET6:
5809 		check_proto = IPPROTO_ICMPV6;
5810 		break;
5811 	default:
5812 		/* This should never happen unless we have kernel bugs. */
5813 		cmn_err(CE_WARN,
5814 		    "sadb_setup_acquire:  corrupt ACQUIRE record.\n");
5815 		ASSERT(0);
5816 		rw_exit(&ipss->ipsec_alg_lock);
5817 		return (NULL);
5818 	}
5819 
5820 	samsg->sadb_msg_version = PF_KEY_V2;
5821 	samsg->sadb_msg_type = SADB_ACQUIRE;
5822 	samsg->sadb_msg_satype = satype;
5823 	samsg->sadb_msg_errno = 0;
5824 	samsg->sadb_msg_pid = 0;
5825 	samsg->sadb_msg_reserved = 0;
5826 	samsg->sadb_msg_seq = acqrec->ipsacq_seq;
5827 
5828 	ASSERT(MUTEX_HELD(&acqrec->ipsacq_lock));
5829 
5830 	if ((acqrec->ipsacq_proto == check_proto) || tunnel_mode) {
5831 		sport_typecode = dport_typecode = 0;
5832 	} else {
5833 		sport_typecode = acqrec->ipsacq_srcport;
5834 		dport_typecode = acqrec->ipsacq_dstport;
5835 	}
5836 
5837 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
5838 	    acqrec->ipsacq_srcaddr, sport_typecode, acqrec->ipsacq_proto, 0);
5839 
5840 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
5841 	    acqrec->ipsacq_dstaddr, dport_typecode, acqrec->ipsacq_proto, 0);
5842 
5843 	if (tunnel_mode) {
5844 		sport_typecode = acqrec->ipsacq_srcport;
5845 		dport_typecode = acqrec->ipsacq_dstport;
5846 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
5847 		    acqrec->ipsacq_inneraddrfam, acqrec->ipsacq_innersrc,
5848 		    sport_typecode, acqrec->ipsacq_inner_proto,
5849 		    acqrec->ipsacq_innersrcpfx);
5850 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
5851 		    acqrec->ipsacq_inneraddrfam, acqrec->ipsacq_innerdst,
5852 		    dport_typecode, acqrec->ipsacq_inner_proto,
5853 		    acqrec->ipsacq_innerdstpfx);
5854 	}
5855 
5856 	/* XXX Insert identity information here. */
5857 
5858 	/* XXXMLS Insert sensitivity information here. */
5859 
5860 	if (cur != NULL)
5861 		samsg->sadb_msg_len = SADB_8TO64(cur - msgmp->b_rptr);
5862 	else
5863 		rw_exit(&ipss->ipsec_alg_lock);
5864 
5865 	return (pfkeymp);
5866 }
5867 
5868 /*
5869  * Given an SADB_GETSPI message, find an appropriately ranged SA and
5870  * allocate an SA.  If there are message improprieties, return (ipsa_t *)-1.
5871  * If there was a memory allocation error, return NULL.	 (Assume NULL !=
5872  * (ipsa_t *)-1).
5873  *
5874  * master_spi is passed in host order.
5875  */
5876 ipsa_t *
5877 sadb_getspi(keysock_in_t *ksi, uint32_t master_spi, int *diagnostic,
5878     netstack_t *ns, uint_t sa_type)
5879 {
5880 	sadb_address_t *src =
5881 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC],
5882 	    *dst = (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
5883 	sadb_spirange_t *range =
5884 	    (sadb_spirange_t *)ksi->ks_in_extv[SADB_EXT_SPIRANGE];
5885 	struct sockaddr_in *ssa, *dsa;
5886 	struct sockaddr_in6 *ssa6, *dsa6;
5887 	uint32_t *srcaddr, *dstaddr;
5888 	sa_family_t af;
5889 	uint32_t add, min, max;
5890 	uint8_t protocol =
5891 	    (sa_type == SADB_SATYPE_AH) ? IPPROTO_AH : IPPROTO_ESP;
5892 
5893 	if (src == NULL) {
5894 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
5895 		return ((ipsa_t *)-1);
5896 	}
5897 	if (dst == NULL) {
5898 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
5899 		return ((ipsa_t *)-1);
5900 	}
5901 	if (range == NULL) {
5902 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_RANGE;
5903 		return ((ipsa_t *)-1);
5904 	}
5905 
5906 	min = ntohl(range->sadb_spirange_min);
5907 	max = ntohl(range->sadb_spirange_max);
5908 	dsa = (struct sockaddr_in *)(dst + 1);
5909 	dsa6 = (struct sockaddr_in6 *)dsa;
5910 
5911 	ssa = (struct sockaddr_in *)(src + 1);
5912 	ssa6 = (struct sockaddr_in6 *)ssa;
5913 	ASSERT(dsa->sin_family == ssa->sin_family);
5914 
5915 	srcaddr = ALL_ZEROES_PTR;
5916 	af = dsa->sin_family;
5917 	switch (af) {
5918 	case AF_INET:
5919 		if (src != NULL)
5920 			srcaddr = (uint32_t *)(&ssa->sin_addr);
5921 		dstaddr = (uint32_t *)(&dsa->sin_addr);
5922 		break;
5923 	case AF_INET6:
5924 		if (src != NULL)
5925 			srcaddr = (uint32_t *)(&ssa6->sin6_addr);
5926 		dstaddr = (uint32_t *)(&dsa6->sin6_addr);
5927 		break;
5928 	default:
5929 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_DST_AF;
5930 		return ((ipsa_t *)-1);
5931 	}
5932 
5933 	if (master_spi < min || master_spi > max) {
5934 		/* Return a random value in the range. */
5935 		if (cl_inet_getspi) {
5936 			cl_inet_getspi(ns->netstack_stackid, protocol,
5937 			    (uint8_t *)&add, sizeof (add), NULL);
5938 		} else {
5939 			(void) random_get_pseudo_bytes((uint8_t *)&add,
5940 			    sizeof (add));
5941 		}
5942 		master_spi = min + (add % (max - min + 1));
5943 	}
5944 
5945 	/*
5946 	 * Since master_spi is passed in host order, we need to htonl() it
5947 	 * for the purposes of creating a new SA.
5948 	 */
5949 	return (sadb_makelarvalassoc(htonl(master_spi), srcaddr, dstaddr, af,
5950 	    ns));
5951 }
5952 
5953 /*
5954  *
5955  * Locate an ACQUIRE and nuke it.  If I have an samsg that's larger than the
5956  * base header, just ignore it.	 Otherwise, lock down the whole ACQUIRE list
5957  * and scan for the sequence number in question.  I may wish to accept an
5958  * address pair with it, for easier searching.
5959  *
5960  * Caller frees the message, so we don't have to here.
5961  *
5962  * NOTE:	The pfkey_q parameter may be used in the future for ACQUIRE
5963  *		failures.
5964  */
5965 /* ARGSUSED */
5966 void
5967 sadb_in_acquire(sadb_msg_t *samsg, sadbp_t *sp, queue_t *pfkey_q,
5968     netstack_t *ns)
5969 {
5970 	int i;
5971 	ipsacq_t *acqrec;
5972 	iacqf_t *bucket;
5973 
5974 	/*
5975 	 * I only accept the base header for this!
5976 	 * Though to be honest, requiring the dst address would help
5977 	 * immensely.
5978 	 *
5979 	 * XXX	There are already cases where I can get the dst address.
5980 	 */
5981 	if (samsg->sadb_msg_len > SADB_8TO64(sizeof (*samsg)))
5982 		return;
5983 
5984 	/*
5985 	 * Using the samsg->sadb_msg_seq, find the ACQUIRE record, delete it,
5986 	 * (and in the future send a message to IP with the appropriate error
5987 	 * number).
5988 	 *
5989 	 * Q: Do I want to reject if pid != 0?
5990 	 */
5991 
5992 	for (i = 0; i < sp->s_v4.sdb_hashsize; i++) {
5993 		bucket = &sp->s_v4.sdb_acq[i];
5994 		mutex_enter(&bucket->iacqf_lock);
5995 		for (acqrec = bucket->iacqf_ipsacq; acqrec != NULL;
5996 		    acqrec = acqrec->ipsacq_next) {
5997 			if (samsg->sadb_msg_seq == acqrec->ipsacq_seq)
5998 				break;	/* for acqrec... loop. */
5999 		}
6000 		if (acqrec != NULL)
6001 			break;	/* for i = 0... loop. */
6002 
6003 		mutex_exit(&bucket->iacqf_lock);
6004 	}
6005 
6006 	if (acqrec == NULL) {
6007 		for (i = 0; i < sp->s_v6.sdb_hashsize; i++) {
6008 			bucket = &sp->s_v6.sdb_acq[i];
6009 			mutex_enter(&bucket->iacqf_lock);
6010 			for (acqrec = bucket->iacqf_ipsacq; acqrec != NULL;
6011 			    acqrec = acqrec->ipsacq_next) {
6012 				if (samsg->sadb_msg_seq == acqrec->ipsacq_seq)
6013 					break;	/* for acqrec... loop. */
6014 			}
6015 			if (acqrec != NULL)
6016 				break;	/* for i = 0... loop. */
6017 
6018 			mutex_exit(&bucket->iacqf_lock);
6019 		}
6020 	}
6021 
6022 
6023 	if (acqrec == NULL)
6024 		return;
6025 
6026 	/*
6027 	 * What do I do with the errno and IP?	I may need mp's services a
6028 	 * little more.	 See sadb_destroy_acquire() for future directions
6029 	 * beyond free the mblk chain on the acquire record.
6030 	 */
6031 
6032 	ASSERT(&bucket->iacqf_lock == acqrec->ipsacq_linklock);
6033 	sadb_destroy_acquire(acqrec, ns);
6034 	/* Have to exit mutex here, because of breaking out of for loop. */
6035 	mutex_exit(&bucket->iacqf_lock);
6036 }
6037 
6038 /*
6039  * The following functions work with the replay windows of an SA.  They assume
6040  * the ipsa->ipsa_replay_arr is an array of uint64_t, and that the bit vector
6041  * represents the highest sequence number packet received, and back
6042  * (ipsa->ipsa_replay_wsize) packets.
6043  */
6044 
6045 /*
6046  * Is the replay bit set?
6047  */
6048 static boolean_t
6049 ipsa_is_replay_set(ipsa_t *ipsa, uint32_t offset)
6050 {
6051 	uint64_t bit = (uint64_t)1 << (uint64_t)(offset & 63);
6052 
6053 	return ((bit & ipsa->ipsa_replay_arr[offset >> 6]) ? B_TRUE : B_FALSE);
6054 }
6055 
6056 /*
6057  * Shift the bits of the replay window over.
6058  */
6059 static void
6060 ipsa_shift_replay(ipsa_t *ipsa, uint32_t shift)
6061 {
6062 	int i;
6063 	int jump = ((shift - 1) >> 6) + 1;
6064 
6065 	if (shift == 0)
6066 		return;
6067 
6068 	for (i = (ipsa->ipsa_replay_wsize - 1) >> 6; i >= 0; i--) {
6069 		if (i + jump <= (ipsa->ipsa_replay_wsize - 1) >> 6) {
6070 			ipsa->ipsa_replay_arr[i + jump] |=
6071 			    ipsa->ipsa_replay_arr[i] >> (64 - (shift & 63));
6072 		}
6073 		ipsa->ipsa_replay_arr[i] <<= shift;
6074 	}
6075 }
6076 
6077 /*
6078  * Set a bit in the bit vector.
6079  */
6080 static void
6081 ipsa_set_replay(ipsa_t *ipsa, uint32_t offset)
6082 {
6083 	uint64_t bit = (uint64_t)1 << (uint64_t)(offset & 63);
6084 
6085 	ipsa->ipsa_replay_arr[offset >> 6] |= bit;
6086 }
6087 
6088 #define	SADB_MAX_REPLAY_VALUE 0xffffffff
6089 
6090 /*
6091  * Assume caller has NOT done ntohl() already on seq.  Check to see
6092  * if replay sequence number "seq" has been seen already.
6093  */
6094 boolean_t
6095 sadb_replay_check(ipsa_t *ipsa, uint32_t seq)
6096 {
6097 	boolean_t rc;
6098 	uint32_t diff;
6099 
6100 	if (ipsa->ipsa_replay_wsize == 0)
6101 		return (B_TRUE);
6102 
6103 	/*
6104 	 * NOTE:  I've already checked for 0 on the wire in sadb_replay_peek().
6105 	 */
6106 
6107 	/* Convert sequence number into host order before holding the mutex. */
6108 	seq = ntohl(seq);
6109 
6110 	mutex_enter(&ipsa->ipsa_lock);
6111 
6112 	/* Initialize inbound SA's ipsa_replay field to last one received. */
6113 	if (ipsa->ipsa_replay == 0)
6114 		ipsa->ipsa_replay = 1;
6115 
6116 	if (seq > ipsa->ipsa_replay) {
6117 		/*
6118 		 * I have received a new "highest value received".  Shift
6119 		 * the replay window over.
6120 		 */
6121 		diff = seq - ipsa->ipsa_replay;
6122 		if (diff < ipsa->ipsa_replay_wsize) {
6123 			/* In replay window, shift bits over. */
6124 			ipsa_shift_replay(ipsa, diff);
6125 		} else {
6126 			/* WAY FAR AHEAD, clear bits and start again. */
6127 			bzero(ipsa->ipsa_replay_arr,
6128 			    sizeof (ipsa->ipsa_replay_arr));
6129 		}
6130 		ipsa_set_replay(ipsa, 0);
6131 		ipsa->ipsa_replay = seq;
6132 		rc = B_TRUE;
6133 		goto done;
6134 	}
6135 	diff = ipsa->ipsa_replay - seq;
6136 	if (diff >= ipsa->ipsa_replay_wsize || ipsa_is_replay_set(ipsa, diff)) {
6137 		rc = B_FALSE;
6138 		goto done;
6139 	}
6140 	/* Set this packet as seen. */
6141 	ipsa_set_replay(ipsa, diff);
6142 
6143 	rc = B_TRUE;
6144 done:
6145 	mutex_exit(&ipsa->ipsa_lock);
6146 	return (rc);
6147 }
6148 
6149 /*
6150  * "Peek" and see if we should even bother going through the effort of
6151  * running an authentication check on the sequence number passed in.
6152  * this takes into account packets that are below the replay window,
6153  * and collisions with already replayed packets.  Return B_TRUE if it
6154  * is okay to proceed, B_FALSE if this packet should be dropped immediately.
6155  * Assume same byte-ordering as sadb_replay_check.
6156  */
6157 boolean_t
6158 sadb_replay_peek(ipsa_t *ipsa, uint32_t seq)
6159 {
6160 	boolean_t rc = B_FALSE;
6161 	uint32_t diff;
6162 
6163 	if (ipsa->ipsa_replay_wsize == 0)
6164 		return (B_TRUE);
6165 
6166 	/*
6167 	 * 0 is 0, regardless of byte order... :)
6168 	 *
6169 	 * If I get 0 on the wire (and there is a replay window) then the
6170 	 * sender most likely wrapped.	This ipsa may need to be marked or
6171 	 * something.
6172 	 */
6173 	if (seq == 0)
6174 		return (B_FALSE);
6175 
6176 	seq = ntohl(seq);
6177 	mutex_enter(&ipsa->ipsa_lock);
6178 	if (seq < ipsa->ipsa_replay - ipsa->ipsa_replay_wsize &&
6179 	    ipsa->ipsa_replay >= ipsa->ipsa_replay_wsize)
6180 		goto done;
6181 
6182 	/*
6183 	 * If I've hit 0xffffffff, then quite honestly, I don't need to
6184 	 * bother with formalities.  I'm not accepting any more packets
6185 	 * on this SA.
6186 	 */
6187 	if (ipsa->ipsa_replay == SADB_MAX_REPLAY_VALUE) {
6188 		/*
6189 		 * Since we're already holding the lock, update the
6190 		 * expire time ala. sadb_replay_delete() and return.
6191 		 */
6192 		ipsa->ipsa_hardexpiretime = (time_t)1;
6193 		goto done;
6194 	}
6195 
6196 	if (seq <= ipsa->ipsa_replay) {
6197 		/*
6198 		 * This seq is in the replay window.  I'm not below it,
6199 		 * because I already checked for that above!
6200 		 */
6201 		diff = ipsa->ipsa_replay - seq;
6202 		if (ipsa_is_replay_set(ipsa, diff))
6203 			goto done;
6204 	}
6205 	/* Else return B_TRUE, I'm going to advance the window. */
6206 
6207 	rc = B_TRUE;
6208 done:
6209 	mutex_exit(&ipsa->ipsa_lock);
6210 	return (rc);
6211 }
6212 
6213 /*
6214  * Delete a single SA.
6215  *
6216  * For now, use the quick-and-dirty trick of making the association's
6217  * hard-expire lifetime (time_t)1, ensuring deletion by the *_ager().
6218  */
6219 void
6220 sadb_replay_delete(ipsa_t *assoc)
6221 {
6222 	mutex_enter(&assoc->ipsa_lock);
6223 	assoc->ipsa_hardexpiretime = (time_t)1;
6224 	mutex_exit(&assoc->ipsa_lock);
6225 }
6226 
6227 /*
6228  * Special front-end to ipsec_rl_strlog() dealing with SA failure.
6229  * this is designed to take only a format string with "* %x * %s *", so
6230  * that "spi" is printed first, then "addr" is converted using inet_pton().
6231  *
6232  * This is abstracted out to save the stack space for only when inet_pton()
6233  * is called.  Make sure "spi" is in network order; it usually is when this
6234  * would get called.
6235  */
6236 void
6237 ipsec_assocfailure(short mid, short sid, char level, ushort_t sl, char *fmt,
6238     uint32_t spi, void *addr, int af, netstack_t *ns)
6239 {
6240 	char buf[INET6_ADDRSTRLEN];
6241 
6242 	ASSERT(af == AF_INET6 || af == AF_INET);
6243 
6244 	ipsec_rl_strlog(ns, mid, sid, level, sl, fmt, ntohl(spi),
6245 	    inet_ntop(af, addr, buf, sizeof (buf)));
6246 }
6247 
6248 /*
6249  * Fills in a reference to the policy, if any, from the conn, in *ppp
6250  */
6251 static void
6252 ipsec_conn_pol(ipsec_selector_t *sel, conn_t *connp, ipsec_policy_t **ppp)
6253 {
6254 	ipsec_policy_t	*pp;
6255 	ipsec_latch_t	*ipl = connp->conn_latch;
6256 
6257 	if ((ipl != NULL) && (connp->conn_ixa->ixa_ipsec_policy != NULL)) {
6258 		pp = connp->conn_ixa->ixa_ipsec_policy;
6259 		IPPOL_REFHOLD(pp);
6260 	} else {
6261 		pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, sel,
6262 		    connp->conn_netstack);
6263 	}
6264 	*ppp = pp;
6265 }
6266 
6267 /*
6268  * The following functions scan through active conn_t structures
6269  * and return a reference to the best-matching policy it can find.
6270  * Caller must release the reference.
6271  */
6272 static void
6273 ipsec_udp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst)
6274 {
6275 	connf_t *connfp;
6276 	conn_t *connp = NULL;
6277 	ipsec_selector_t portonly;
6278 
6279 	bzero((void *)&portonly, sizeof (portonly));
6280 
6281 	if (sel->ips_local_port == 0)
6282 		return;
6283 
6284 	connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(sel->ips_local_port,
6285 	    ipst)];
6286 	mutex_enter(&connfp->connf_lock);
6287 
6288 	if (sel->ips_isv4) {
6289 		connp = connfp->connf_head;
6290 		while (connp != NULL) {
6291 			if (IPCL_UDP_MATCH(connp, sel->ips_local_port,
6292 			    sel->ips_local_addr_v4, sel->ips_remote_port,
6293 			    sel->ips_remote_addr_v4))
6294 				break;
6295 			connp = connp->conn_next;
6296 		}
6297 
6298 		if (connp == NULL) {
6299 			/* Try port-only match in IPv6. */
6300 			portonly.ips_local_port = sel->ips_local_port;
6301 			sel = &portonly;
6302 		}
6303 	}
6304 
6305 	if (connp == NULL) {
6306 		connp = connfp->connf_head;
6307 		while (connp != NULL) {
6308 			if (IPCL_UDP_MATCH_V6(connp, sel->ips_local_port,
6309 			    sel->ips_local_addr_v6, sel->ips_remote_port,
6310 			    sel->ips_remote_addr_v6))
6311 				break;
6312 			connp = connp->conn_next;
6313 		}
6314 
6315 		if (connp == NULL) {
6316 			mutex_exit(&connfp->connf_lock);
6317 			return;
6318 		}
6319 	}
6320 
6321 	CONN_INC_REF(connp);
6322 	mutex_exit(&connfp->connf_lock);
6323 
6324 	ipsec_conn_pol(sel, connp, ppp);
6325 	CONN_DEC_REF(connp);
6326 }
6327 
6328 static conn_t *
6329 ipsec_find_listen_conn(uint16_t *pptr, ipsec_selector_t *sel, ip_stack_t *ipst)
6330 {
6331 	connf_t *connfp;
6332 	conn_t *connp = NULL;
6333 	const in6_addr_t *v6addrmatch = &sel->ips_local_addr_v6;
6334 
6335 	if (sel->ips_local_port == 0)
6336 		return (NULL);
6337 
6338 	connfp = &ipst->ips_ipcl_bind_fanout[
6339 	    IPCL_BIND_HASH(sel->ips_local_port, ipst)];
6340 	mutex_enter(&connfp->connf_lock);
6341 
6342 	if (sel->ips_isv4) {
6343 		connp = connfp->connf_head;
6344 		while (connp != NULL) {
6345 			if (IPCL_BIND_MATCH(connp, IPPROTO_TCP,
6346 			    sel->ips_local_addr_v4, pptr[1]))
6347 				break;
6348 			connp = connp->conn_next;
6349 		}
6350 
6351 		if (connp == NULL) {
6352 			/* Match to all-zeroes. */
6353 			v6addrmatch = &ipv6_all_zeros;
6354 		}
6355 	}
6356 
6357 	if (connp == NULL) {
6358 		connp = connfp->connf_head;
6359 		while (connp != NULL) {
6360 			if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP,
6361 			    *v6addrmatch, pptr[1]))
6362 				break;
6363 			connp = connp->conn_next;
6364 		}
6365 
6366 		if (connp == NULL) {
6367 			mutex_exit(&connfp->connf_lock);
6368 			return (NULL);
6369 		}
6370 	}
6371 
6372 	CONN_INC_REF(connp);
6373 	mutex_exit(&connfp->connf_lock);
6374 	return (connp);
6375 }
6376 
6377 static void
6378 ipsec_tcp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst)
6379 {
6380 	connf_t 	*connfp;
6381 	conn_t		*connp;
6382 	uint32_t	ports;
6383 	uint16_t	*pptr = (uint16_t *)&ports;
6384 
6385 	/*
6386 	 * Find TCP state in the following order:
6387 	 * 1.) Connected conns.
6388 	 * 2.) Listeners.
6389 	 *
6390 	 * Even though #2 will be the common case for inbound traffic, only
6391 	 * following this order insures correctness.
6392 	 */
6393 
6394 	if (sel->ips_local_port == 0)
6395 		return;
6396 
6397 	/*
6398 	 * 0 should be fport, 1 should be lport.  SRC is the local one here.
6399 	 * See ipsec_construct_inverse_acquire() for details.
6400 	 */
6401 	pptr[0] = sel->ips_remote_port;
6402 	pptr[1] = sel->ips_local_port;
6403 
6404 	connfp = &ipst->ips_ipcl_conn_fanout[
6405 	    IPCL_CONN_HASH(sel->ips_remote_addr_v4, ports, ipst)];
6406 	mutex_enter(&connfp->connf_lock);
6407 	connp = connfp->connf_head;
6408 
6409 	if (sel->ips_isv4) {
6410 		while (connp != NULL) {
6411 			if (IPCL_CONN_MATCH(connp, IPPROTO_TCP,
6412 			    sel->ips_remote_addr_v4, sel->ips_local_addr_v4,
6413 			    ports))
6414 				break;
6415 			connp = connp->conn_next;
6416 		}
6417 	} else {
6418 		while (connp != NULL) {
6419 			if (IPCL_CONN_MATCH_V6(connp, IPPROTO_TCP,
6420 			    sel->ips_remote_addr_v6, sel->ips_local_addr_v6,
6421 			    ports))
6422 				break;
6423 			connp = connp->conn_next;
6424 		}
6425 	}
6426 
6427 	if (connp != NULL) {
6428 		CONN_INC_REF(connp);
6429 		mutex_exit(&connfp->connf_lock);
6430 	} else {
6431 		mutex_exit(&connfp->connf_lock);
6432 
6433 		/* Try the listen hash. */
6434 		if ((connp = ipsec_find_listen_conn(pptr, sel, ipst)) == NULL)
6435 			return;
6436 	}
6437 
6438 	ipsec_conn_pol(sel, connp, ppp);
6439 	CONN_DEC_REF(connp);
6440 }
6441 
6442 static void
6443 ipsec_sctp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6444     ip_stack_t *ipst)
6445 {
6446 	conn_t		*connp;
6447 	uint32_t	ports;
6448 	uint16_t	*pptr = (uint16_t *)&ports;
6449 
6450 	/*
6451 	 * Find SCP state in the following order:
6452 	 * 1.) Connected conns.
6453 	 * 2.) Listeners.
6454 	 *
6455 	 * Even though #2 will be the common case for inbound traffic, only
6456 	 * following this order insures correctness.
6457 	 */
6458 
6459 	if (sel->ips_local_port == 0)
6460 		return;
6461 
6462 	/*
6463 	 * 0 should be fport, 1 should be lport.  SRC is the local one here.
6464 	 * See ipsec_construct_inverse_acquire() for details.
6465 	 */
6466 	pptr[0] = sel->ips_remote_port;
6467 	pptr[1] = sel->ips_local_port;
6468 
6469 	/*
6470 	 * For labeled systems, there's no need to check the
6471 	 * label here.  It's known to be good as we checked
6472 	 * before allowing the connection to become bound.
6473 	 */
6474 	if (sel->ips_isv4) {
6475 		in6_addr_t	src, dst;
6476 
6477 		IN6_IPADDR_TO_V4MAPPED(sel->ips_remote_addr_v4, &dst);
6478 		IN6_IPADDR_TO_V4MAPPED(sel->ips_local_addr_v4, &src);
6479 		connp = sctp_find_conn(&dst, &src, ports, ALL_ZONES,
6480 		    0, ipst->ips_netstack->netstack_sctp);
6481 	} else {
6482 		connp = sctp_find_conn(&sel->ips_remote_addr_v6,
6483 		    &sel->ips_local_addr_v6, ports, ALL_ZONES,
6484 		    0, ipst->ips_netstack->netstack_sctp);
6485 	}
6486 	if (connp == NULL)
6487 		return;
6488 	ipsec_conn_pol(sel, connp, ppp);
6489 	CONN_DEC_REF(connp);
6490 }
6491 
6492 /*
6493  * Fill in a query for the SPD (in "sel") using two PF_KEY address extensions.
6494  * Returns 0 or errno, and always sets *diagnostic to something appropriate
6495  * to PF_KEY.
6496  *
6497  * NOTE:  For right now, this function (and ipsec_selector_t for that matter),
6498  * ignore prefix lengths in the address extension.  Since we match on first-
6499  * entered policies, this shouldn't matter.  Also, since we normalize prefix-
6500  * set addresses to mask out the lower bits, we should get a suitable search
6501  * key for the SPD anyway.  This is the function to change if the assumption
6502  * about suitable search keys is wrong.
6503  */
6504 static int
6505 ipsec_get_inverse_acquire_sel(ipsec_selector_t *sel, sadb_address_t *srcext,
6506     sadb_address_t *dstext, int *diagnostic)
6507 {
6508 	struct sockaddr_in *src, *dst;
6509 	struct sockaddr_in6 *src6, *dst6;
6510 
6511 	*diagnostic = 0;
6512 
6513 	bzero(sel, sizeof (*sel));
6514 	sel->ips_protocol = srcext->sadb_address_proto;
6515 	dst = (struct sockaddr_in *)(dstext + 1);
6516 	if (dst->sin_family == AF_INET6) {
6517 		dst6 = (struct sockaddr_in6 *)dst;
6518 		src6 = (struct sockaddr_in6 *)(srcext + 1);
6519 		if (src6->sin6_family != AF_INET6) {
6520 			*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6521 			return (EINVAL);
6522 		}
6523 		sel->ips_remote_addr_v6 = dst6->sin6_addr;
6524 		sel->ips_local_addr_v6 = src6->sin6_addr;
6525 		if (sel->ips_protocol == IPPROTO_ICMPV6) {
6526 			sel->ips_is_icmp_inv_acq = 1;
6527 		} else {
6528 			sel->ips_remote_port = dst6->sin6_port;
6529 			sel->ips_local_port = src6->sin6_port;
6530 		}
6531 		sel->ips_isv4 = B_FALSE;
6532 	} else {
6533 		src = (struct sockaddr_in *)(srcext + 1);
6534 		if (src->sin_family != AF_INET) {
6535 			*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6536 			return (EINVAL);
6537 		}
6538 		sel->ips_remote_addr_v4 = dst->sin_addr.s_addr;
6539 		sel->ips_local_addr_v4 = src->sin_addr.s_addr;
6540 		if (sel->ips_protocol == IPPROTO_ICMP) {
6541 			sel->ips_is_icmp_inv_acq = 1;
6542 		} else {
6543 			sel->ips_remote_port = dst->sin_port;
6544 			sel->ips_local_port = src->sin_port;
6545 		}
6546 		sel->ips_isv4 = B_TRUE;
6547 	}
6548 	return (0);
6549 }
6550 
6551 /*
6552  * We have encapsulation.
6553  * - Lookup tun_t by address and look for an associated
6554  *   tunnel policy
6555  * - If there are inner selectors
6556  *   - check ITPF_P_TUNNEL and ITPF_P_ACTIVE
6557  *   - Look up tunnel policy based on selectors
6558  * - Else
6559  *   - Sanity check the negotation
6560  *   - If appropriate, fall through to global policy
6561  */
6562 static int
6563 ipsec_tun_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6564     sadb_address_t *innsrcext, sadb_address_t *inndstext, ipsec_tun_pol_t *itp,
6565     int *diagnostic)
6566 {
6567 	int err;
6568 	ipsec_policy_head_t *polhead;
6569 
6570 	*diagnostic = 0;
6571 
6572 	/* Check for inner selectors and act appropriately */
6573 
6574 	if (innsrcext != NULL) {
6575 		/* Inner selectors present */
6576 		ASSERT(inndstext != NULL);
6577 		if ((itp == NULL) ||
6578 		    (itp->itp_flags & (ITPF_P_ACTIVE | ITPF_P_TUNNEL)) !=
6579 		    (ITPF_P_ACTIVE | ITPF_P_TUNNEL)) {
6580 			/*
6581 			 * If inner packet selectors, we must have negotiate
6582 			 * tunnel and active policy.  If the tunnel has
6583 			 * transport-mode policy set on it, or has no policy,
6584 			 * fail.
6585 			 */
6586 			return (ENOENT);
6587 		} else {
6588 			/*
6589 			 * Reset "sel" to indicate inner selectors.  Pass
6590 			 * inner PF_KEY address extensions for this to happen.
6591 			 */
6592 			if ((err = ipsec_get_inverse_acquire_sel(sel,
6593 			    innsrcext, inndstext, diagnostic)) != 0)
6594 				return (err);
6595 			/*
6596 			 * Now look for a tunnel policy based on those inner
6597 			 * selectors.  (Common code is below.)
6598 			 */
6599 		}
6600 	} else {
6601 		/* No inner selectors present */
6602 		if ((itp == NULL) || !(itp->itp_flags & ITPF_P_ACTIVE)) {
6603 			/*
6604 			 * Transport mode negotiation with no tunnel policy
6605 			 * configured - return to indicate a global policy
6606 			 * check is needed.
6607 			 */
6608 			return (0);
6609 		} else if (itp->itp_flags & ITPF_P_TUNNEL) {
6610 			/* Tunnel mode set with no inner selectors. */
6611 			return (ENOENT);
6612 		}
6613 		/*
6614 		 * Else, this is a tunnel policy configured with ifconfig(1m)
6615 		 * or "negotiate transport" with ipsecconf(1m).  We have an
6616 		 * itp with policy set based on any match, so don't bother
6617 		 * changing fields in "sel".
6618 		 */
6619 	}
6620 
6621 	ASSERT(itp != NULL);
6622 	polhead = itp->itp_policy;
6623 	ASSERT(polhead != NULL);
6624 	rw_enter(&polhead->iph_lock, RW_READER);
6625 	*ppp = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_INBOUND, sel);
6626 	rw_exit(&polhead->iph_lock);
6627 
6628 	/*
6629 	 * Don't default to global if we didn't find a matching policy entry.
6630 	 * Instead, send ENOENT, just like if we hit a transport-mode tunnel.
6631 	 */
6632 	if (*ppp == NULL)
6633 		return (ENOENT);
6634 
6635 	return (0);
6636 }
6637 
6638 /*
6639  * For sctp conn_faddr is the primary address, hence this is of limited
6640  * use for sctp.
6641  */
6642 static void
6643 ipsec_oth_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6644     ip_stack_t *ipst)
6645 {
6646 	boolean_t	isv4 = sel->ips_isv4;
6647 	connf_t		*connfp;
6648 	conn_t		*connp;
6649 
6650 	if (isv4) {
6651 		connfp = &ipst->ips_ipcl_proto_fanout_v4[sel->ips_protocol];
6652 	} else {
6653 		connfp = &ipst->ips_ipcl_proto_fanout_v6[sel->ips_protocol];
6654 	}
6655 
6656 	mutex_enter(&connfp->connf_lock);
6657 	for (connp = connfp->connf_head; connp != NULL;
6658 	    connp = connp->conn_next) {
6659 		if (isv4) {
6660 			if ((connp->conn_laddr_v4 == INADDR_ANY ||
6661 			    connp->conn_laddr_v4 == sel->ips_local_addr_v4) &&
6662 			    (connp->conn_faddr_v4 == INADDR_ANY ||
6663 			    connp->conn_faddr_v4 == sel->ips_remote_addr_v4))
6664 				break;
6665 		} else {
6666 			if ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
6667 			    IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6,
6668 			    &sel->ips_local_addr_v6)) &&
6669 			    (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) ||
6670 			    IN6_ARE_ADDR_EQUAL(&connp->conn_faddr_v6,
6671 			    &sel->ips_remote_addr_v6)))
6672 				break;
6673 		}
6674 	}
6675 	if (connp == NULL) {
6676 		mutex_exit(&connfp->connf_lock);
6677 		return;
6678 	}
6679 
6680 	CONN_INC_REF(connp);
6681 	mutex_exit(&connfp->connf_lock);
6682 
6683 	ipsec_conn_pol(sel, connp, ppp);
6684 	CONN_DEC_REF(connp);
6685 }
6686 
6687 /*
6688  * Construct an inverse ACQUIRE reply based on:
6689  *
6690  * 1.) Current global policy.
6691  * 2.) An conn_t match depending on what all was passed in the extv[].
6692  * 3.) A tunnel's policy head.
6693  * ...
6694  * N.) Other stuff TBD (e.g. identities)
6695  *
6696  * If there is an error, set sadb_msg_errno and sadb_x_msg_diagnostic
6697  * in this function so the caller can extract them where appropriately.
6698  *
6699  * The SRC address is the local one - just like an outbound ACQUIRE message.
6700  *
6701  * XXX MLS: key management supplies a label which we just reflect back up
6702  * again.  clearly we need to involve the label in the rest of the checks.
6703  */
6704 mblk_t *
6705 ipsec_construct_inverse_acquire(sadb_msg_t *samsg, sadb_ext_t *extv[],
6706     netstack_t *ns)
6707 {
6708 	int err;
6709 	int diagnostic;
6710 	sadb_address_t *srcext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_SRC],
6711 	    *dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST],
6712 	    *innsrcext = (sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_SRC],
6713 	    *inndstext = (sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_DST];
6714 	sadb_sens_t *sens = (sadb_sens_t *)extv[SADB_EXT_SENSITIVITY];
6715 	struct sockaddr_in6 *src, *dst;
6716 	struct sockaddr_in6 *isrc, *idst;
6717 	ipsec_tun_pol_t *itp = NULL;
6718 	ipsec_policy_t *pp = NULL;
6719 	ipsec_selector_t sel, isel;
6720 	mblk_t *retmp = NULL;
6721 	ip_stack_t	*ipst = ns->netstack_ip;
6722 
6723 
6724 	/* Normalize addresses */
6725 	if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)srcext, 0, ns)
6726 	    == KS_IN_ADDR_UNKNOWN) {
6727 		err = EINVAL;
6728 		diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC;
6729 		goto bail;
6730 	}
6731 	src = (struct sockaddr_in6 *)(srcext + 1);
6732 	if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)dstext, 0, ns)
6733 	    == KS_IN_ADDR_UNKNOWN) {
6734 		err = EINVAL;
6735 		diagnostic = SADB_X_DIAGNOSTIC_BAD_DST;
6736 		goto bail;
6737 	}
6738 	dst = (struct sockaddr_in6 *)(dstext + 1);
6739 	if (src->sin6_family != dst->sin6_family) {
6740 		err = EINVAL;
6741 		diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6742 		goto bail;
6743 	}
6744 
6745 	/* Check for tunnel mode and act appropriately */
6746 	if (innsrcext != NULL) {
6747 		if (inndstext == NULL) {
6748 			err = EINVAL;
6749 			diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST;
6750 			goto bail;
6751 		}
6752 		if (sadb_addrcheck(NULL, (mblk_t *)samsg,
6753 		    (sadb_ext_t *)innsrcext, 0, ns) == KS_IN_ADDR_UNKNOWN) {
6754 			err = EINVAL;
6755 			diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_SRC;
6756 			goto bail;
6757 		}
6758 		isrc = (struct sockaddr_in6 *)(innsrcext + 1);
6759 		if (sadb_addrcheck(NULL, (mblk_t *)samsg,
6760 		    (sadb_ext_t *)inndstext, 0, ns) == KS_IN_ADDR_UNKNOWN) {
6761 			err = EINVAL;
6762 			diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_DST;
6763 			goto bail;
6764 		}
6765 		idst = (struct sockaddr_in6 *)(inndstext + 1);
6766 		if (isrc->sin6_family != idst->sin6_family) {
6767 			err = EINVAL;
6768 			diagnostic = SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH;
6769 			goto bail;
6770 		}
6771 		if (isrc->sin6_family != AF_INET &&
6772 		    isrc->sin6_family != AF_INET6) {
6773 			err = EINVAL;
6774 			diagnostic = SADB_X_DIAGNOSTIC_BAD_INNER_SRC_AF;
6775 			goto bail;
6776 		}
6777 	} else if (inndstext != NULL) {
6778 		err = EINVAL;
6779 		diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
6780 		goto bail;
6781 	}
6782 
6783 	/* Get selectors first, based on outer addresses */
6784 	err = ipsec_get_inverse_acquire_sel(&sel, srcext, dstext, &diagnostic);
6785 	if (err != 0)
6786 		goto bail;
6787 
6788 	/* Check for tunnel mode mismatches. */
6789 	if (innsrcext != NULL &&
6790 	    ((isrc->sin6_family == AF_INET &&
6791 	    sel.ips_protocol != IPPROTO_ENCAP && sel.ips_protocol != 0) ||
6792 	    (isrc->sin6_family == AF_INET6 &&
6793 	    sel.ips_protocol != IPPROTO_IPV6 && sel.ips_protocol != 0))) {
6794 		err = EPROTOTYPE;
6795 		goto bail;
6796 	}
6797 
6798 	/*
6799 	 * Okay, we have the addresses and other selector information.
6800 	 * Let's first find a conn...
6801 	 */
6802 	pp = NULL;
6803 	switch (sel.ips_protocol) {
6804 	case IPPROTO_TCP:
6805 		ipsec_tcp_pol(&sel, &pp, ipst);
6806 		break;
6807 	case IPPROTO_UDP:
6808 		ipsec_udp_pol(&sel, &pp, ipst);
6809 		break;
6810 	case IPPROTO_SCTP:
6811 		ipsec_sctp_pol(&sel, &pp, ipst);
6812 		break;
6813 	case IPPROTO_ENCAP:
6814 	case IPPROTO_IPV6:
6815 		/*
6816 		 * Assume sel.ips_remote_addr_* has the right address at
6817 		 * that exact position.
6818 		 */
6819 		itp = itp_get_byaddr((uint32_t *)(&sel.ips_local_addr_v6),
6820 		    (uint32_t *)(&sel.ips_remote_addr_v6), src->sin6_family,
6821 		    ipst);
6822 
6823 		if (innsrcext == NULL) {
6824 			/*
6825 			 * Transport-mode tunnel, make sure we fake out isel
6826 			 * to contain something based on the outer protocol.
6827 			 */
6828 			bzero(&isel, sizeof (isel));
6829 			isel.ips_isv4 = (sel.ips_protocol == IPPROTO_ENCAP);
6830 		} /* Else isel is initialized by ipsec_tun_pol(). */
6831 		err = ipsec_tun_pol(&isel, &pp, innsrcext, inndstext, itp,
6832 		    &diagnostic);
6833 		/*
6834 		 * NOTE:  isel isn't used for now, but in RFC 430x IPsec, it
6835 		 * may be.
6836 		 */
6837 		if (err != 0)
6838 			goto bail;
6839 		break;
6840 	default:
6841 		ipsec_oth_pol(&sel, &pp, ipst);
6842 		break;
6843 	}
6844 
6845 	/*
6846 	 * If we didn't find a matching conn_t or other policy head, take a
6847 	 * look in the global policy.
6848 	 */
6849 	if (pp == NULL) {
6850 		pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, NULL, &sel, ns);
6851 		if (pp == NULL) {
6852 			/* There's no global policy. */
6853 			err = ENOENT;
6854 			diagnostic = 0;
6855 			goto bail;
6856 		}
6857 	}
6858 
6859 	/*
6860 	 * Now that we have a policy entry/widget, construct an ACQUIRE
6861 	 * message based on that, fix fields where appropriate,
6862 	 * and return the message.
6863 	 */
6864 	retmp = sadb_extended_acquire(&sel, pp, NULL,
6865 	    (itp != NULL && (itp->itp_flags & ITPF_P_TUNNEL)),
6866 	    samsg->sadb_msg_seq, samsg->sadb_msg_pid, sens, ns);
6867 	if (pp != NULL) {
6868 		IPPOL_REFRELE(pp);
6869 	}
6870 	ASSERT(err == 0 && diagnostic == 0);
6871 	if (retmp == NULL)
6872 		err = ENOMEM;
6873 bail:
6874 	if (itp != NULL) {
6875 		ITP_REFRELE(itp, ns);
6876 	}
6877 	samsg->sadb_msg_errno = (uint8_t)err;
6878 	samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
6879 	return (retmp);
6880 }
6881 
6882 /*
6883  * ipsa_lpkt is a one-element queue, only manipulated by the next two
6884  * functions.  They have to hold the ipsa_lock because of potential races
6885  * between key management using SADB_UPDATE, and inbound packets that may
6886  * queue up on the larval SA (hence the 'l' in "lpkt").
6887  */
6888 
6889 /*
6890  * sadb_set_lpkt:
6891  *
6892  * Returns the passed-in packet if the SA is no longer larval.
6893  *
6894  * Returns NULL if the SA is larval, and needs to be swapped into the SA for
6895  * processing after an SADB_UPDATE.
6896  */
6897 mblk_t *
6898 sadb_set_lpkt(ipsa_t *ipsa, mblk_t *npkt, ip_recv_attr_t *ira)
6899 {
6900 	mblk_t		*opkt;
6901 
6902 	mutex_enter(&ipsa->ipsa_lock);
6903 	opkt = ipsa->ipsa_lpkt;
6904 	if (ipsa->ipsa_state == IPSA_STATE_LARVAL) {
6905 		/*
6906 		 * Consume npkt and place it in the LARVAL SA's inbound
6907 		 * packet slot.
6908 		 */
6909 		mblk_t	*attrmp;
6910 
6911 		attrmp = ip_recv_attr_to_mblk(ira);
6912 		if (attrmp == NULL) {
6913 			ill_t *ill = ira->ira_ill;
6914 
6915 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
6916 			ip_drop_input("ipIfStatsInDiscards", npkt, ill);
6917 			freemsg(npkt);
6918 			opkt = NULL;
6919 		} else {
6920 			ASSERT(attrmp->b_cont == NULL);
6921 			attrmp->b_cont = npkt;
6922 			ipsa->ipsa_lpkt = attrmp;
6923 		}
6924 		npkt = NULL;
6925 	} else {
6926 		/*
6927 		 * If not larval, we lost the race.  NOTE: ipsa_lpkt may still
6928 		 * have been non-NULL in the non-larval case, because of
6929 		 * inbound packets arriving prior to sadb_common_add()
6930 		 * transferring the SA completely out of larval state, but
6931 		 * after lpkt was grabbed by the AH/ESP-specific add routines.
6932 		 * We should clear the old ipsa_lpkt in this case to make sure
6933 		 * that it doesn't linger on the now-MATURE IPsec SA, or get
6934 		 * picked up as an out-of-order packet.
6935 		 */
6936 		ipsa->ipsa_lpkt = NULL;
6937 	}
6938 	mutex_exit(&ipsa->ipsa_lock);
6939 
6940 	if (opkt != NULL) {
6941 		ipsec_stack_t	*ipss;
6942 
6943 		ipss = ira->ira_ill->ill_ipst->ips_netstack->netstack_ipsec;
6944 		opkt = ip_recv_attr_free_mblk(opkt);
6945 		ip_drop_packet(opkt, B_TRUE, ira->ira_ill,
6946 		    DROPPER(ipss, ipds_sadb_inlarval_replace),
6947 		    &ipss->ipsec_sadb_dropper);
6948 	}
6949 	return (npkt);
6950 }
6951 
6952 /*
6953  * sadb_clear_lpkt: Atomically clear ipsa->ipsa_lpkt and return the
6954  * previous value.
6955  */
6956 mblk_t *
6957 sadb_clear_lpkt(ipsa_t *ipsa)
6958 {
6959 	mblk_t *opkt;
6960 
6961 	mutex_enter(&ipsa->ipsa_lock);
6962 	opkt = ipsa->ipsa_lpkt;
6963 	ipsa->ipsa_lpkt = NULL;
6964 	mutex_exit(&ipsa->ipsa_lock);
6965 	return (opkt);
6966 }
6967 
6968 /*
6969  * Buffer a packet that's in IDLE state as set by Solaris Clustering.
6970  */
6971 void
6972 sadb_buf_pkt(ipsa_t *ipsa, mblk_t *bpkt, ip_recv_attr_t *ira)
6973 {
6974 	netstack_t	*ns = ira->ira_ill->ill_ipst->ips_netstack;
6975 	ipsec_stack_t   *ipss = ns->netstack_ipsec;
6976 	in6_addr_t *srcaddr = (in6_addr_t *)(&ipsa->ipsa_srcaddr);
6977 	in6_addr_t *dstaddr = (in6_addr_t *)(&ipsa->ipsa_dstaddr);
6978 	mblk_t		*mp;
6979 
6980 	ASSERT(ipsa->ipsa_state == IPSA_STATE_IDLE);
6981 
6982 	if (cl_inet_idlesa == NULL) {
6983 		ip_drop_packet(bpkt, B_TRUE, ira->ira_ill,
6984 		    DROPPER(ipss, ipds_sadb_inidle_overflow),
6985 		    &ipss->ipsec_sadb_dropper);
6986 		return;
6987 	}
6988 
6989 	cl_inet_idlesa(ns->netstack_stackid,
6990 	    (ipsa->ipsa_type == SADB_SATYPE_AH) ? IPPROTO_AH : IPPROTO_ESP,
6991 	    ipsa->ipsa_spi, ipsa->ipsa_addrfam, *srcaddr, *dstaddr, NULL);
6992 
6993 	mp = ip_recv_attr_to_mblk(ira);
6994 	if (mp == NULL) {
6995 		ip_drop_packet(bpkt, B_TRUE, ira->ira_ill,
6996 		    DROPPER(ipss, ipds_sadb_inidle_overflow),
6997 		    &ipss->ipsec_sadb_dropper);
6998 		return;
6999 	}
7000 	linkb(mp, bpkt);
7001 
7002 	mutex_enter(&ipsa->ipsa_lock);
7003 	ipsa->ipsa_mblkcnt++;
7004 	if (ipsa->ipsa_bpkt_head == NULL) {
7005 		ipsa->ipsa_bpkt_head = ipsa->ipsa_bpkt_tail = bpkt;
7006 	} else {
7007 		ipsa->ipsa_bpkt_tail->b_next = bpkt;
7008 		ipsa->ipsa_bpkt_tail = bpkt;
7009 		if (ipsa->ipsa_mblkcnt > SADB_MAX_IDLEPKTS) {
7010 			mblk_t *tmp;
7011 
7012 			tmp = ipsa->ipsa_bpkt_head;
7013 			ipsa->ipsa_bpkt_head = ipsa->ipsa_bpkt_head->b_next;
7014 			tmp = ip_recv_attr_free_mblk(tmp);
7015 			ip_drop_packet(tmp, B_TRUE, NULL,
7016 			    DROPPER(ipss, ipds_sadb_inidle_overflow),
7017 			    &ipss->ipsec_sadb_dropper);
7018 			ipsa->ipsa_mblkcnt --;
7019 		}
7020 	}
7021 	mutex_exit(&ipsa->ipsa_lock);
7022 }
7023 
7024 /*
7025  * Stub function that taskq_dispatch() invokes to take the mblk (in arg)
7026  * and put into STREAMS again.
7027  */
7028 void
7029 sadb_clear_buf_pkt(void *ipkt)
7030 {
7031 	mblk_t	*tmp, *buf_pkt;
7032 	ip_recv_attr_t	iras;
7033 
7034 	buf_pkt = (mblk_t *)ipkt;
7035 
7036 	while (buf_pkt != NULL) {
7037 		mblk_t *data_mp;
7038 
7039 		tmp = buf_pkt->b_next;
7040 		buf_pkt->b_next = NULL;
7041 
7042 		data_mp = buf_pkt->b_cont;
7043 		buf_pkt->b_cont = NULL;
7044 		if (!ip_recv_attr_from_mblk(buf_pkt, &iras)) {
7045 			/* The ill or ip_stack_t disappeared on us. */
7046 			ip_drop_input("ip_recv_attr_from_mblk", data_mp, NULL);
7047 			freemsg(data_mp);
7048 		} else {
7049 			ip_input_post_ipsec(data_mp, &iras);
7050 		}
7051 		ira_cleanup(&iras, B_TRUE);
7052 		buf_pkt = tmp;
7053 	}
7054 }
7055 /*
7056  * Walker callback used by sadb_alg_update() to free/create crypto
7057  * context template when a crypto software provider is removed or
7058  * added.
7059  */
7060 
7061 struct sadb_update_alg_state {
7062 	ipsec_algtype_t alg_type;
7063 	uint8_t alg_id;
7064 	boolean_t is_added;
7065 	boolean_t async_auth;
7066 	boolean_t async_encr;
7067 };
7068 
7069 static void
7070 sadb_alg_update_cb(isaf_t *head, ipsa_t *entry, void *cookie)
7071 {
7072 	struct sadb_update_alg_state *update_state =
7073 	    (struct sadb_update_alg_state *)cookie;
7074 	crypto_ctx_template_t *ctx_tmpl = NULL;
7075 
7076 	ASSERT(MUTEX_HELD(&head->isaf_lock));
7077 
7078 	if (entry->ipsa_state == IPSA_STATE_LARVAL)
7079 		return;
7080 
7081 	mutex_enter(&entry->ipsa_lock);
7082 
7083 	if ((entry->ipsa_encr_alg != SADB_EALG_NONE && entry->ipsa_encr_alg !=
7084 	    SADB_EALG_NULL && update_state->async_encr) ||
7085 	    (entry->ipsa_auth_alg != SADB_AALG_NONE &&
7086 	    update_state->async_auth)) {
7087 		entry->ipsa_flags |= IPSA_F_ASYNC;
7088 	} else {
7089 		entry->ipsa_flags &= ~IPSA_F_ASYNC;
7090 	}
7091 
7092 	switch (update_state->alg_type) {
7093 	case IPSEC_ALG_AUTH:
7094 		if (entry->ipsa_auth_alg == update_state->alg_id)
7095 			ctx_tmpl = &entry->ipsa_authtmpl;
7096 		break;
7097 	case IPSEC_ALG_ENCR:
7098 		if (entry->ipsa_encr_alg == update_state->alg_id)
7099 			ctx_tmpl = &entry->ipsa_encrtmpl;
7100 		break;
7101 	default:
7102 		ctx_tmpl = NULL;
7103 	}
7104 
7105 	if (ctx_tmpl == NULL) {
7106 		mutex_exit(&entry->ipsa_lock);
7107 		return;
7108 	}
7109 
7110 	/*
7111 	 * The context template of the SA may be affected by the change
7112 	 * of crypto provider.
7113 	 */
7114 	if (update_state->is_added) {
7115 		/* create the context template if not already done */
7116 		if (*ctx_tmpl == NULL) {
7117 			(void) ipsec_create_ctx_tmpl(entry,
7118 			    update_state->alg_type);
7119 		}
7120 	} else {
7121 		/*
7122 		 * The crypto provider was removed. If the context template
7123 		 * exists but it is no longer valid, free it.
7124 		 */
7125 		if (*ctx_tmpl != NULL)
7126 			ipsec_destroy_ctx_tmpl(entry, update_state->alg_type);
7127 	}
7128 
7129 	mutex_exit(&entry->ipsa_lock);
7130 }
7131 
7132 /*
7133  * Invoked by IP when an software crypto provider has been updated, or if
7134  * the crypto synchrony changes.  The type and id of the corresponding
7135  * algorithm is passed as argument.  The type is set to ALL in the case of
7136  * a synchrony change.
7137  *
7138  * is_added is B_TRUE if the provider was added, B_FALSE if it was
7139  * removed. The function updates the SADB and free/creates the
7140  * context templates associated with SAs if needed.
7141  */
7142 
7143 #define	SADB_ALG_UPDATE_WALK(sadb, table) \
7144     sadb_walker((sadb).table, (sadb).sdb_hashsize, sadb_alg_update_cb, \
7145 	&update_state)
7146 
7147 void
7148 sadb_alg_update(ipsec_algtype_t alg_type, uint8_t alg_id, boolean_t is_added,
7149     netstack_t *ns)
7150 {
7151 	struct sadb_update_alg_state update_state;
7152 	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
7153 	ipsecesp_stack_t	*espstack = ns->netstack_ipsecesp;
7154 	ipsec_stack_t *ipss = ns->netstack_ipsec;
7155 
7156 	update_state.alg_type = alg_type;
7157 	update_state.alg_id = alg_id;
7158 	update_state.is_added = is_added;
7159 	update_state.async_auth = ipss->ipsec_algs_exec_mode[IPSEC_ALG_AUTH] ==
7160 	    IPSEC_ALGS_EXEC_ASYNC;
7161 	update_state.async_encr = ipss->ipsec_algs_exec_mode[IPSEC_ALG_ENCR] ==
7162 	    IPSEC_ALGS_EXEC_ASYNC;
7163 
7164 	if (alg_type == IPSEC_ALG_AUTH || alg_type == IPSEC_ALG_ALL) {
7165 		/* walk the AH tables only for auth. algorithm changes */
7166 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v4, sdb_of);
7167 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v4, sdb_if);
7168 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v6, sdb_of);
7169 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v6, sdb_if);
7170 	}
7171 
7172 	/* walk the ESP tables */
7173 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v4, sdb_of);
7174 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v4, sdb_if);
7175 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v6, sdb_of);
7176 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v6, sdb_if);
7177 }
7178 
7179 /*
7180  * Creates a context template for the specified SA. This function
7181  * is called when an SA is created and when a context template needs
7182  * to be created due to a change of software provider.
7183  */
7184 int
7185 ipsec_create_ctx_tmpl(ipsa_t *sa, ipsec_algtype_t alg_type)
7186 {
7187 	ipsec_alginfo_t *alg;
7188 	crypto_mechanism_t mech;
7189 	crypto_key_t *key;
7190 	crypto_ctx_template_t *sa_tmpl;
7191 	int rv;
7192 	ipsec_stack_t	*ipss = sa->ipsa_netstack->netstack_ipsec;
7193 
7194 	ASSERT(RW_READ_HELD(&ipss->ipsec_alg_lock));
7195 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
7196 
7197 	/* get pointers to the algorithm info, context template, and key */
7198 	switch (alg_type) {
7199 	case IPSEC_ALG_AUTH:
7200 		key = &sa->ipsa_kcfauthkey;
7201 		sa_tmpl = &sa->ipsa_authtmpl;
7202 		alg = ipss->ipsec_alglists[alg_type][sa->ipsa_auth_alg];
7203 		break;
7204 	case IPSEC_ALG_ENCR:
7205 		key = &sa->ipsa_kcfencrkey;
7206 		sa_tmpl = &sa->ipsa_encrtmpl;
7207 		alg = ipss->ipsec_alglists[alg_type][sa->ipsa_encr_alg];
7208 		break;
7209 	default:
7210 		alg = NULL;
7211 	}
7212 
7213 	if (alg == NULL || !ALG_VALID(alg))
7214 		return (EINVAL);
7215 
7216 	/* initialize the mech info structure for the framework */
7217 	ASSERT(alg->alg_mech_type != CRYPTO_MECHANISM_INVALID);
7218 	mech.cm_type = alg->alg_mech_type;
7219 	mech.cm_param = NULL;
7220 	mech.cm_param_len = 0;
7221 
7222 	/* create a new context template */
7223 	rv = crypto_create_ctx_template(&mech, key, sa_tmpl, KM_NOSLEEP);
7224 
7225 	/*
7226 	 * CRYPTO_MECH_NOT_SUPPORTED can be returned if only hardware
7227 	 * providers are available for that mechanism. In that case
7228 	 * we don't fail, and will generate the context template from
7229 	 * the framework callback when a software provider for that
7230 	 * mechanism registers.
7231 	 *
7232 	 * The context template is assigned the special value
7233 	 * IPSEC_CTX_TMPL_ALLOC if the allocation failed due to a
7234 	 * lack of memory. No attempt will be made to use
7235 	 * the context template if it is set to this value.
7236 	 */
7237 	if (rv == CRYPTO_HOST_MEMORY) {
7238 		*sa_tmpl = IPSEC_CTX_TMPL_ALLOC;
7239 	} else if (rv != CRYPTO_SUCCESS) {
7240 		*sa_tmpl = NULL;
7241 		if (rv != CRYPTO_MECH_NOT_SUPPORTED)
7242 			return (EINVAL);
7243 	}
7244 
7245 	return (0);
7246 }
7247 
7248 /*
7249  * Destroy the context template of the specified algorithm type
7250  * of the specified SA. Must be called while holding the SA lock.
7251  */
7252 void
7253 ipsec_destroy_ctx_tmpl(ipsa_t *sa, ipsec_algtype_t alg_type)
7254 {
7255 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
7256 
7257 	if (alg_type == IPSEC_ALG_AUTH) {
7258 		if (sa->ipsa_authtmpl == IPSEC_CTX_TMPL_ALLOC)
7259 			sa->ipsa_authtmpl = NULL;
7260 		else if (sa->ipsa_authtmpl != NULL) {
7261 			crypto_destroy_ctx_template(sa->ipsa_authtmpl);
7262 			sa->ipsa_authtmpl = NULL;
7263 		}
7264 	} else {
7265 		ASSERT(alg_type == IPSEC_ALG_ENCR);
7266 		if (sa->ipsa_encrtmpl == IPSEC_CTX_TMPL_ALLOC)
7267 			sa->ipsa_encrtmpl = NULL;
7268 		else if (sa->ipsa_encrtmpl != NULL) {
7269 			crypto_destroy_ctx_template(sa->ipsa_encrtmpl);
7270 			sa->ipsa_encrtmpl = NULL;
7271 		}
7272 	}
7273 }
7274 
7275 /*
7276  * Use the kernel crypto framework to check the validity of a key received
7277  * via keysock. Returns 0 if the key is OK, -1 otherwise.
7278  */
7279 int
7280 ipsec_check_key(crypto_mech_type_t mech_type, sadb_key_t *sadb_key,
7281     boolean_t is_auth, int *diag)
7282 {
7283 	crypto_mechanism_t mech;
7284 	crypto_key_t crypto_key;
7285 	int crypto_rc;
7286 
7287 	mech.cm_type = mech_type;
7288 	mech.cm_param = NULL;
7289 	mech.cm_param_len = 0;
7290 
7291 	crypto_key.ck_format = CRYPTO_KEY_RAW;
7292 	crypto_key.ck_data = sadb_key + 1;
7293 	crypto_key.ck_length = sadb_key->sadb_key_bits;
7294 
7295 	crypto_rc = crypto_key_check(&mech, &crypto_key);
7296 
7297 	switch (crypto_rc) {
7298 	case CRYPTO_SUCCESS:
7299 		return (0);
7300 	case CRYPTO_MECHANISM_INVALID:
7301 	case CRYPTO_MECH_NOT_SUPPORTED:
7302 		*diag = is_auth ? SADB_X_DIAGNOSTIC_BAD_AALG :
7303 		    SADB_X_DIAGNOSTIC_BAD_EALG;
7304 		break;
7305 	case CRYPTO_KEY_SIZE_RANGE:
7306 		*diag = is_auth ? SADB_X_DIAGNOSTIC_BAD_AKEYBITS :
7307 		    SADB_X_DIAGNOSTIC_BAD_EKEYBITS;
7308 		break;
7309 	case CRYPTO_WEAK_KEY:
7310 		*diag = is_auth ? SADB_X_DIAGNOSTIC_WEAK_AKEY :
7311 		    SADB_X_DIAGNOSTIC_WEAK_EKEY;
7312 		break;
7313 	}
7314 
7315 	return (-1);
7316 }
7317 
7318 /*
7319  * Whack options in the outer IP header when ipsec changes the outer label
7320  *
7321  * This is inelegant and really could use refactoring.
7322  */
7323 mblk_t *
7324 sadb_whack_label_v4(mblk_t *mp, ipsa_t *assoc, kstat_named_t *counter,
7325     ipdropper_t *dropper)
7326 {
7327 	int delta;
7328 	int plen;
7329 	dblk_t *db;
7330 	int hlen;
7331 	uint8_t *opt_storage = assoc->ipsa_opt_storage;
7332 	ipha_t *ipha = (ipha_t *)mp->b_rptr;
7333 
7334 	plen = ntohs(ipha->ipha_length);
7335 
7336 	delta = tsol_remove_secopt(ipha, MBLKL(mp));
7337 	mp->b_wptr += delta;
7338 	plen += delta;
7339 
7340 	/* XXX XXX code copied from tsol_check_label */
7341 
7342 	/* Make sure we have room for the worst-case addition */
7343 	hlen = IPH_HDR_LENGTH(ipha) + opt_storage[IPOPT_OLEN];
7344 	hlen = (hlen + 3) & ~3;
7345 	if (hlen > IP_MAX_HDR_LENGTH)
7346 		hlen = IP_MAX_HDR_LENGTH;
7347 	hlen -= IPH_HDR_LENGTH(ipha);
7348 
7349 	db = mp->b_datap;
7350 	if ((db->db_ref != 1) || (mp->b_wptr + hlen > db->db_lim)) {
7351 		int copylen;
7352 		mblk_t *new_mp;
7353 
7354 		/* allocate enough to be meaningful, but not *too* much */
7355 		copylen = MBLKL(mp);
7356 		if (copylen > 256)
7357 			copylen = 256;
7358 		new_mp = allocb_tmpl(hlen + copylen +
7359 		    (mp->b_rptr - mp->b_datap->db_base), mp);
7360 
7361 		if (new_mp == NULL) {
7362 			ip_drop_packet(mp, B_FALSE, NULL, counter,  dropper);
7363 			return (NULL);
7364 		}
7365 
7366 		/* keep the bias */
7367 		new_mp->b_rptr += mp->b_rptr - mp->b_datap->db_base;
7368 		new_mp->b_wptr = new_mp->b_rptr + copylen;
7369 		bcopy(mp->b_rptr, new_mp->b_rptr, copylen);
7370 		new_mp->b_cont = mp;
7371 		if ((mp->b_rptr += copylen) >= mp->b_wptr) {
7372 			new_mp->b_cont = mp->b_cont;
7373 			freeb(mp);
7374 		}
7375 		mp = new_mp;
7376 		ipha = (ipha_t *)mp->b_rptr;
7377 	}
7378 
7379 	delta = tsol_prepend_option(assoc->ipsa_opt_storage, ipha, MBLKL(mp));
7380 
7381 	ASSERT(delta != -1);
7382 
7383 	plen += delta;
7384 	mp->b_wptr += delta;
7385 
7386 	/*
7387 	 * Paranoia
7388 	 */
7389 	db = mp->b_datap;
7390 
7391 	ASSERT3P(mp->b_wptr, <=, db->db_lim);
7392 	ASSERT3P(mp->b_rptr, <=, db->db_lim);
7393 
7394 	ASSERT3P(mp->b_wptr, >=, db->db_base);
7395 	ASSERT3P(mp->b_rptr, >=, db->db_base);
7396 	/* End paranoia */
7397 
7398 	ipha->ipha_length = htons(plen);
7399 
7400 	return (mp);
7401 }
7402 
7403 mblk_t *
7404 sadb_whack_label_v6(mblk_t *mp, ipsa_t *assoc, kstat_named_t *counter,
7405     ipdropper_t *dropper)
7406 {
7407 	int delta;
7408 	int plen;
7409 	dblk_t *db;
7410 	int hlen;
7411 	uint8_t *opt_storage = assoc->ipsa_opt_storage;
7412 	uint_t sec_opt_len; /* label option length not including type, len */
7413 	ip6_t *ip6h = (ip6_t *)mp->b_rptr;
7414 
7415 	plen = ntohs(ip6h->ip6_plen);
7416 
7417 	delta = tsol_remove_secopt_v6(ip6h, MBLKL(mp));
7418 	mp->b_wptr += delta;
7419 	plen += delta;
7420 
7421 	/* XXX XXX code copied from tsol_check_label_v6 */
7422 	/*
7423 	 * Make sure we have room for the worst-case addition. Add 2 bytes for
7424 	 * the hop-by-hop ext header's next header and length fields. Add
7425 	 * another 2 bytes for the label option type, len and then round
7426 	 * up to the next 8-byte multiple.
7427 	 */
7428 	sec_opt_len = opt_storage[1];
7429 
7430 	db = mp->b_datap;
7431 	hlen = (4 + sec_opt_len + 7) & ~7;
7432 
7433 	if ((db->db_ref != 1) || (mp->b_wptr + hlen > db->db_lim)) {
7434 		int copylen;
7435 		mblk_t *new_mp;
7436 		uint16_t hdr_len;
7437 
7438 		hdr_len = ip_hdr_length_v6(mp, ip6h);
7439 		/*
7440 		 * Allocate enough to be meaningful, but not *too* much.
7441 		 * Also all the IPv6 extension headers must be in the same mblk
7442 		 */
7443 		copylen = MBLKL(mp);
7444 		if (copylen > 256)
7445 			copylen = 256;
7446 		if (copylen < hdr_len)
7447 			copylen = hdr_len;
7448 		new_mp = allocb_tmpl(hlen + copylen +
7449 		    (mp->b_rptr - mp->b_datap->db_base), mp);
7450 		if (new_mp == NULL) {
7451 			ip_drop_packet(mp, B_FALSE, NULL, counter,  dropper);
7452 			return (NULL);
7453 		}
7454 
7455 		/* keep the bias */
7456 		new_mp->b_rptr += mp->b_rptr - mp->b_datap->db_base;
7457 		new_mp->b_wptr = new_mp->b_rptr + copylen;
7458 		bcopy(mp->b_rptr, new_mp->b_rptr, copylen);
7459 		new_mp->b_cont = mp;
7460 		if ((mp->b_rptr += copylen) >= mp->b_wptr) {
7461 			new_mp->b_cont = mp->b_cont;
7462 			freeb(mp);
7463 		}
7464 		mp = new_mp;
7465 		ip6h = (ip6_t *)mp->b_rptr;
7466 	}
7467 
7468 	delta = tsol_prepend_option_v6(assoc->ipsa_opt_storage,
7469 	    ip6h, MBLKL(mp));
7470 
7471 	ASSERT(delta != -1);
7472 
7473 	plen += delta;
7474 	mp->b_wptr += delta;
7475 
7476 	/*
7477 	 * Paranoia
7478 	 */
7479 	db = mp->b_datap;
7480 
7481 	ASSERT3P(mp->b_wptr, <=, db->db_lim);
7482 	ASSERT3P(mp->b_rptr, <=, db->db_lim);
7483 
7484 	ASSERT3P(mp->b_wptr, >=, db->db_base);
7485 	ASSERT3P(mp->b_rptr, >=, db->db_base);
7486 	/* End paranoia */
7487 
7488 	ip6h->ip6_plen = htons(plen);
7489 
7490 	return (mp);
7491 }
7492 
7493 /* Whack the labels and update ip_xmit_attr_t as needed */
7494 mblk_t *
7495 sadb_whack_label(mblk_t *mp, ipsa_t *assoc, ip_xmit_attr_t *ixa,
7496     kstat_named_t *counter, ipdropper_t *dropper)
7497 {
7498 	int adjust;
7499 	int iplen;
7500 
7501 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
7502 		ipha_t		*ipha = (ipha_t *)mp->b_rptr;
7503 
7504 		ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
7505 		iplen = ntohs(ipha->ipha_length);
7506 		mp = sadb_whack_label_v4(mp, assoc, counter, dropper);
7507 		if (mp == NULL)
7508 			return (NULL);
7509 
7510 		ipha = (ipha_t *)mp->b_rptr;
7511 		ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
7512 		adjust = (int)ntohs(ipha->ipha_length) - iplen;
7513 	} else {
7514 		ip6_t		*ip6h = (ip6_t *)mp->b_rptr;
7515 
7516 		ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION);
7517 		iplen = ntohs(ip6h->ip6_plen);
7518 		mp = sadb_whack_label_v6(mp, assoc, counter, dropper);
7519 		if (mp == NULL)
7520 			return (NULL);
7521 
7522 		ip6h = (ip6_t *)mp->b_rptr;
7523 		ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION);
7524 		adjust = (int)ntohs(ip6h->ip6_plen) - iplen;
7525 	}
7526 	ixa->ixa_pktlen += adjust;
7527 	ixa->ixa_ip_hdr_length += adjust;
7528 	return (mp);
7529 }
7530 
7531 /*
7532  * If this is an outgoing SA then add some fuzz to the
7533  * SOFT EXPIRE time. The reason for this is to stop
7534  * peers trying to renegotiate SOFT expiring SA's at
7535  * the same time. The amount of fuzz needs to be at
7536  * least 8 seconds which is the typical interval
7537  * sadb_ager(), although this is only a guide as it
7538  * selftunes.
7539  */
7540 static void
7541 lifetime_fuzz(ipsa_t *assoc)
7542 {
7543 	uint8_t rnd;
7544 
7545 	if (assoc->ipsa_softaddlt == 0)
7546 		return;
7547 
7548 	(void) random_get_pseudo_bytes(&rnd, sizeof (rnd));
7549 	rnd = (rnd & 0xF) + 8;
7550 	assoc->ipsa_softexpiretime -= rnd;
7551 	assoc->ipsa_softaddlt -= rnd;
7552 }
7553 
7554 static void
7555 destroy_ipsa_pair(ipsap_t *ipsapp)
7556 {
7557 	/*
7558 	 * Because of the multi-line macro nature of IPSA_REFRELE, keep
7559 	 * them in { }.
7560 	 */
7561 	if (ipsapp->ipsap_sa_ptr != NULL) {
7562 		IPSA_REFRELE(ipsapp->ipsap_sa_ptr);
7563 	}
7564 	if (ipsapp->ipsap_psa_ptr != NULL) {
7565 		IPSA_REFRELE(ipsapp->ipsap_psa_ptr);
7566 	}
7567 	init_ipsa_pair(ipsapp);
7568 }
7569 
7570 static void
7571 init_ipsa_pair(ipsap_t *ipsapp)
7572 {
7573 	ipsapp->ipsap_bucket = NULL;
7574 	ipsapp->ipsap_sa_ptr = NULL;
7575 	ipsapp->ipsap_pbucket = NULL;
7576 	ipsapp->ipsap_psa_ptr = NULL;
7577 }
7578 
7579 /*
7580  * The sadb_ager() function walks through the hash tables of SA's and ages
7581  * them, if the SA expires as a result, its marked as DEAD and will be reaped
7582  * the next time sadb_ager() runs. SA's which are paired or have a peer (same
7583  * SA appears in both the inbound and outbound tables because its not possible
7584  * to determine its direction) are placed on a list when they expire. This is
7585  * to ensure that pair/peer SA's are reaped at the same time, even if they
7586  * expire at different times.
7587  *
7588  * This function is called twice by sadb_ager(), one after processing the
7589  * inbound table, then again after processing the outbound table.
7590  */
7591 void
7592 age_pair_peer_list(templist_t *haspeerlist, sadb_t *sp, boolean_t outbound)
7593 {
7594 	templist_t *listptr;
7595 	int outhash;
7596 	isaf_t *bucket;
7597 	boolean_t haspeer;
7598 	ipsa_t *peer_assoc, *dying;
7599 	/*
7600 	 * Haspeer cases will contain both IPv4 and IPv6.  This code
7601 	 * is address independent.
7602 	 */
7603 	while (haspeerlist != NULL) {
7604 		/* "dying" contains the SA that has a peer. */
7605 		dying = haspeerlist->ipsa;
7606 		haspeer = (dying->ipsa_haspeer);
7607 		listptr = haspeerlist;
7608 		haspeerlist = listptr->next;
7609 		kmem_free(listptr, sizeof (*listptr));
7610 		/*
7611 		 * Pick peer bucket based on addrfam.
7612 		 */
7613 		if (outbound) {
7614 			if (haspeer)
7615 				bucket = INBOUND_BUCKET(sp, dying->ipsa_spi);
7616 			else
7617 				bucket = INBOUND_BUCKET(sp,
7618 				    dying->ipsa_otherspi);
7619 		} else { /* inbound */
7620 			if (haspeer) {
7621 				if (dying->ipsa_addrfam == AF_INET6) {
7622 					outhash = OUTBOUND_HASH_V6(sp,
7623 					    *((in6_addr_t *)&dying->
7624 					    ipsa_dstaddr));
7625 				} else {
7626 					outhash = OUTBOUND_HASH_V4(sp,
7627 					    *((ipaddr_t *)&dying->
7628 					    ipsa_dstaddr));
7629 				}
7630 			} else if (dying->ipsa_addrfam == AF_INET6) {
7631 				outhash = OUTBOUND_HASH_V6(sp,
7632 				    *((in6_addr_t *)&dying->
7633 				    ipsa_srcaddr));
7634 			} else {
7635 				outhash = OUTBOUND_HASH_V4(sp,
7636 				    *((ipaddr_t *)&dying->
7637 				    ipsa_srcaddr));
7638 			}
7639 			bucket = &(sp->sdb_of[outhash]);
7640 		}
7641 
7642 		mutex_enter(&bucket->isaf_lock);
7643 		/*
7644 		 * "haspeer" SA's have the same src/dst address ordering,
7645 		 * "paired" SA's have the src/dst addresses reversed.
7646 		 */
7647 		if (haspeer) {
7648 			peer_assoc = ipsec_getassocbyspi(bucket,
7649 			    dying->ipsa_spi, dying->ipsa_srcaddr,
7650 			    dying->ipsa_dstaddr, dying->ipsa_addrfam);
7651 		} else {
7652 			peer_assoc = ipsec_getassocbyspi(bucket,
7653 			    dying->ipsa_otherspi, dying->ipsa_dstaddr,
7654 			    dying->ipsa_srcaddr, dying->ipsa_addrfam);
7655 		}
7656 
7657 		mutex_exit(&bucket->isaf_lock);
7658 		if (peer_assoc != NULL) {
7659 			mutex_enter(&peer_assoc->ipsa_lock);
7660 			mutex_enter(&dying->ipsa_lock);
7661 			if (!haspeer) {
7662 				/*
7663 				 * Only SA's which have a "peer" or are
7664 				 * "paired" end up on this list, so this
7665 				 * must be a "paired" SA, update the flags
7666 				 * to break the pair.
7667 				 */
7668 				peer_assoc->ipsa_otherspi = 0;
7669 				peer_assoc->ipsa_flags &= ~IPSA_F_PAIRED;
7670 				dying->ipsa_otherspi = 0;
7671 				dying->ipsa_flags &= ~IPSA_F_PAIRED;
7672 			}
7673 			if (haspeer || outbound) {
7674 				/*
7675 				 * Update the state of the "inbound" SA when
7676 				 * the "outbound" SA has expired. Don't update
7677 				 * the "outbound" SA when the "inbound" SA
7678 				 * SA expires because setting the hard_addtime
7679 				 * below will cause this to happen.
7680 				 */
7681 				peer_assoc->ipsa_state = dying->ipsa_state;
7682 			}
7683 			if (dying->ipsa_state == IPSA_STATE_DEAD)
7684 				peer_assoc->ipsa_hardexpiretime = 1;
7685 
7686 			mutex_exit(&dying->ipsa_lock);
7687 			mutex_exit(&peer_assoc->ipsa_lock);
7688 			IPSA_REFRELE(peer_assoc);
7689 		}
7690 		IPSA_REFRELE(dying);
7691 	}
7692 }
7693 
7694 /*
7695  * Ensure that the IV used for CCM mode never repeats. The IV should
7696  * only be updated by this function. Also check to see if the IV
7697  * is about to wrap and generate a SOFT Expire. This function is only
7698  * called for outgoing packets, the IV for incomming packets is taken
7699  * from the wire. If the outgoing SA needs to be expired, update
7700  * the matching incomming SA.
7701  */
7702 boolean_t
7703 update_iv(uint8_t *iv_ptr, queue_t *pfkey_q, ipsa_t *assoc,
7704     ipsecesp_stack_t *espstack)
7705 {
7706 	boolean_t rc = B_TRUE;
7707 	isaf_t *inbound_bucket;
7708 	sadb_t *sp;
7709 	ipsa_t *pair_sa = NULL;
7710 	int sa_new_state = 0;
7711 
7712 	/* For non counter modes, the IV is random data. */
7713 	if (!(assoc->ipsa_flags & IPSA_F_COUNTERMODE)) {
7714 		(void) random_get_pseudo_bytes(iv_ptr, assoc->ipsa_iv_len);
7715 		return (rc);
7716 	}
7717 
7718 	mutex_enter(&assoc->ipsa_lock);
7719 
7720 	(*assoc->ipsa_iv)++;
7721 
7722 	if (*assoc->ipsa_iv == assoc->ipsa_iv_hardexpire) {
7723 		sa_new_state = IPSA_STATE_DEAD;
7724 		rc = B_FALSE;
7725 	} else if (*assoc->ipsa_iv == assoc->ipsa_iv_softexpire) {
7726 		if (assoc->ipsa_state != IPSA_STATE_DYING) {
7727 			/*
7728 			 * This SA may have already been expired when its
7729 			 * PAIR_SA expired.
7730 			 */
7731 			sa_new_state = IPSA_STATE_DYING;
7732 		}
7733 	}
7734 	if (sa_new_state) {
7735 		/*
7736 		 * If there is a state change, we need to update this SA
7737 		 * and its "pair", we can find the bucket for the "pair" SA
7738 		 * while holding the ipsa_t mutex, but we won't actually
7739 		 * update anything untill the ipsa_t mutex has been released
7740 		 * for _this_ SA.
7741 		 */
7742 		assoc->ipsa_state = sa_new_state;
7743 		if (assoc->ipsa_addrfam == AF_INET6) {
7744 			sp = &espstack->esp_sadb.s_v6;
7745 		} else {
7746 			sp = &espstack->esp_sadb.s_v4;
7747 		}
7748 		inbound_bucket = INBOUND_BUCKET(sp, assoc->ipsa_otherspi);
7749 		sadb_expire_assoc(pfkey_q, assoc);
7750 	}
7751 	if (rc == B_TRUE)
7752 		bcopy(assoc->ipsa_iv, iv_ptr, assoc->ipsa_iv_len);
7753 
7754 	mutex_exit(&assoc->ipsa_lock);
7755 
7756 	if (sa_new_state) {
7757 		/* Find the inbound SA, need to lock hash bucket. */
7758 		mutex_enter(&inbound_bucket->isaf_lock);
7759 		pair_sa = ipsec_getassocbyspi(inbound_bucket,
7760 		    assoc->ipsa_otherspi, assoc->ipsa_dstaddr,
7761 		    assoc->ipsa_srcaddr, assoc->ipsa_addrfam);
7762 		mutex_exit(&inbound_bucket->isaf_lock);
7763 		if (pair_sa != NULL) {
7764 			mutex_enter(&pair_sa->ipsa_lock);
7765 			pair_sa->ipsa_state = sa_new_state;
7766 			mutex_exit(&pair_sa->ipsa_lock);
7767 			IPSA_REFRELE(pair_sa);
7768 		}
7769 	}
7770 
7771 	return (rc);
7772 }
7773 
7774 void
7775 ccm_params_init(ipsa_t *assoc, uchar_t *esph, uint_t data_len, uchar_t *iv_ptr,
7776     ipsa_cm_mech_t *cm_mech, crypto_data_t *crypto_data)
7777 {
7778 	uchar_t *nonce;
7779 	crypto_mechanism_t *combined_mech;
7780 	CK_AES_CCM_PARAMS *params;
7781 
7782 	combined_mech = (crypto_mechanism_t *)cm_mech;
7783 	params = (CK_AES_CCM_PARAMS *)(combined_mech + 1);
7784 	nonce = (uchar_t *)(params + 1);
7785 	params->ulMACSize = assoc->ipsa_mac_len;
7786 	params->ulNonceSize = assoc->ipsa_nonce_len;
7787 	params->ulAuthDataSize = sizeof (esph_t);
7788 	params->ulDataSize = data_len;
7789 	params->nonce = nonce;
7790 	params->authData = esph;
7791 
7792 	cm_mech->combined_mech.cm_type = assoc->ipsa_emech.cm_type;
7793 	cm_mech->combined_mech.cm_param_len = sizeof (CK_AES_CCM_PARAMS);
7794 	cm_mech->combined_mech.cm_param = (caddr_t)params;
7795 	/* See gcm_params_init() for comments. */
7796 	bcopy(assoc->ipsa_nonce, nonce, assoc->ipsa_saltlen);
7797 	nonce += assoc->ipsa_saltlen;
7798 	bcopy(iv_ptr, nonce, assoc->ipsa_iv_len);
7799 	crypto_data->cd_miscdata = NULL;
7800 }
7801 
7802 /* ARGSUSED */
7803 void
7804 cbc_params_init(ipsa_t *assoc, uchar_t *esph, uint_t data_len, uchar_t *iv_ptr,
7805     ipsa_cm_mech_t *cm_mech, crypto_data_t *crypto_data)
7806 {
7807 	cm_mech->combined_mech.cm_type = assoc->ipsa_emech.cm_type;
7808 	cm_mech->combined_mech.cm_param_len = 0;
7809 	cm_mech->combined_mech.cm_param = NULL;
7810 	crypto_data->cd_miscdata = (char *)iv_ptr;
7811 }
7812 
7813 /* ARGSUSED */
7814 void
7815 gcm_params_init(ipsa_t *assoc, uchar_t *esph, uint_t data_len, uchar_t *iv_ptr,
7816     ipsa_cm_mech_t *cm_mech, crypto_data_t *crypto_data)
7817 {
7818 	uchar_t *nonce;
7819 	crypto_mechanism_t *combined_mech;
7820 	CK_AES_GCM_PARAMS *params;
7821 
7822 	combined_mech = (crypto_mechanism_t *)cm_mech;
7823 	params = (CK_AES_GCM_PARAMS *)(combined_mech + 1);
7824 	nonce = (uchar_t *)(params + 1);
7825 
7826 	params->pIv = nonce;
7827 	params->ulIvLen = assoc->ipsa_nonce_len;
7828 	params->ulIvBits = SADB_8TO1(assoc->ipsa_nonce_len);
7829 	params->pAAD = esph;
7830 	params->ulAADLen = sizeof (esph_t);
7831 	params->ulTagBits = SADB_8TO1(assoc->ipsa_mac_len);
7832 
7833 	cm_mech->combined_mech.cm_type = assoc->ipsa_emech.cm_type;
7834 	cm_mech->combined_mech.cm_param_len = sizeof (CK_AES_GCM_PARAMS);
7835 	cm_mech->combined_mech.cm_param = (caddr_t)params;
7836 	/*
7837 	 * Create the nonce, which is made up of the salt and the IV.
7838 	 * Copy the salt from the SA and the IV from the packet.
7839 	 * For inbound packets we copy the IV from the packet because it
7840 	 * was set by the sending system, for outbound packets we copy the IV
7841 	 * from the packet because the IV in the SA may be changed by another
7842 	 * thread, the IV in the packet was created while holding a mutex.
7843 	 */
7844 	bcopy(assoc->ipsa_nonce, nonce, assoc->ipsa_saltlen);
7845 	nonce += assoc->ipsa_saltlen;
7846 	bcopy(iv_ptr, nonce, assoc->ipsa_iv_len);
7847 	crypto_data->cd_miscdata = NULL;
7848 }
7849