xref: /titanic_50/usr/src/uts/common/inet/ip/sadb.c (revision 491f61a1e1c1fc54a47bbcf53dbbbe1293b93b27)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/stream.h>
28 #include <sys/stropts.h>
29 #include <sys/strsubr.h>
30 #include <sys/errno.h>
31 #include <sys/ddi.h>
32 #include <sys/debug.h>
33 #include <sys/cmn_err.h>
34 #include <sys/stream.h>
35 #include <sys/strlog.h>
36 #include <sys/kmem.h>
37 #include <sys/sunddi.h>
38 #include <sys/tihdr.h>
39 #include <sys/atomic.h>
40 #include <sys/socket.h>
41 #include <sys/sysmacros.h>
42 #include <sys/crypto/common.h>
43 #include <sys/crypto/api.h>
44 #include <sys/zone.h>
45 #include <netinet/in.h>
46 #include <net/if.h>
47 #include <net/pfkeyv2.h>
48 #include <net/pfpolicy.h>
49 #include <inet/common.h>
50 #include <netinet/ip6.h>
51 #include <inet/ip.h>
52 #include <inet/ip_ire.h>
53 #include <inet/ip6.h>
54 #include <inet/ipsec_info.h>
55 #include <inet/tcp.h>
56 #include <inet/sadb.h>
57 #include <inet/ipsec_impl.h>
58 #include <inet/ipsecah.h>
59 #include <inet/ipsecesp.h>
60 #include <sys/random.h>
61 #include <sys/dlpi.h>
62 #include <sys/iphada.h>
63 #include <inet/ip_if.h>
64 #include <inet/ipdrop.h>
65 #include <inet/ipclassifier.h>
66 #include <inet/sctp_ip.h>
67 
68 /*
69  * This source file contains Security Association Database (SADB) common
70  * routines.  They are linked in with the AH module.  Since AH has no chance
71  * of falling under export control, it was safe to link it in there.
72  */
73 
74 static mblk_t *sadb_extended_acquire(ipsec_selector_t *, ipsec_policy_t *,
75     ipsec_action_t *, boolean_t, uint32_t, uint32_t, netstack_t *);
76 static void sadb_ill_df(ill_t *, mblk_t *, isaf_t *, int, boolean_t);
77 static ipsa_t *sadb_torch_assoc(isaf_t *, ipsa_t *, boolean_t, mblk_t **);
78 static void sadb_drain_torchq(queue_t *, mblk_t *);
79 static void sadb_destroy_acqlist(iacqf_t **, uint_t, boolean_t,
80 			    netstack_t *);
81 static void sadb_destroy(sadb_t *, netstack_t *);
82 static mblk_t *sadb_sa2msg(ipsa_t *, sadb_msg_t *);
83 
84 static time_t sadb_add_time(time_t, uint64_t);
85 static void lifetime_fuzz(ipsa_t *);
86 static void age_pair_peer_list(templist_t *, sadb_t *, boolean_t);
87 static void ipsa_set_replay(ipsa_t *ipsa, uint32_t offset);
88 
89 extern void (*cl_inet_getspi)(netstackid_t stack_id, uint8_t protocol,
90     uint8_t *ptr, size_t len, void *args);
91 extern int (*cl_inet_checkspi)(netstackid_t stack_id, uint8_t protocol,
92     uint32_t spi, void *args);
93 extern void (*cl_inet_deletespi)(netstackid_t stack_id, uint8_t protocol,
94     uint32_t spi, void *args);
95 /*
96  * ipsacq_maxpackets is defined here to make it tunable
97  * from /etc/system.
98  */
99 extern uint64_t ipsacq_maxpackets;
100 
101 #define	SET_EXPIRE(sa, delta, exp) {				\
102 	if (((sa)->ipsa_ ## delta) != 0) {				\
103 		(sa)->ipsa_ ## exp = sadb_add_time((sa)->ipsa_addtime,	\
104 			(sa)->ipsa_ ## delta);				\
105 	}								\
106 }
107 
108 #define	UPDATE_EXPIRE(sa, delta, exp) {					\
109 	if (((sa)->ipsa_ ## delta) != 0) {				\
110 		time_t tmp = sadb_add_time((sa)->ipsa_usetime,		\
111 			(sa)->ipsa_ ## delta);				\
112 		if (((sa)->ipsa_ ## exp) == 0)				\
113 			(sa)->ipsa_ ## exp = tmp;			\
114 		else							\
115 			(sa)->ipsa_ ## exp = 				\
116 			    MIN((sa)->ipsa_ ## exp, tmp); 		\
117 	}								\
118 }
119 
120 
121 /* wrap the macro so we can pass it as a function pointer */
122 void
123 sadb_sa_refrele(void *target)
124 {
125 	IPSA_REFRELE(((ipsa_t *)target));
126 }
127 
128 /*
129  * We presume that sizeof (long) == sizeof (time_t) and that time_t is
130  * a signed type.
131  */
132 #define	TIME_MAX LONG_MAX
133 
134 /*
135  * PF_KEY gives us lifetimes in uint64_t seconds.  We presume that
136  * time_t is defined to be a signed type with the same range as
137  * "long".  On ILP32 systems, we thus run the risk of wrapping around
138  * at end of time, as well as "overwrapping" the clock back around
139  * into a seemingly valid but incorrect future date earlier than the
140  * desired expiration.
141  *
142  * In order to avoid odd behavior (either negative lifetimes or loss
143  * of high order bits) when someone asks for bizarrely long SA
144  * lifetimes, we do a saturating add for expire times.
145  *
146  * We presume that ILP32 systems will be past end of support life when
147  * the 32-bit time_t overflows (a dangerous assumption, mind you..).
148  *
149  * On LP64, 2^64 seconds are about 5.8e11 years, at which point we
150  * will hopefully have figured out clever ways to avoid the use of
151  * fixed-sized integers in computation.
152  */
153 static time_t
154 sadb_add_time(time_t base, uint64_t delta)
155 {
156 	time_t sum;
157 
158 	/*
159 	 * Clip delta to the maximum possible time_t value to
160 	 * prevent "overwrapping" back into a shorter-than-desired
161 	 * future time.
162 	 */
163 	if (delta > TIME_MAX)
164 		delta = TIME_MAX;
165 	/*
166 	 * This sum may still overflow.
167 	 */
168 	sum = base + delta;
169 
170 	/*
171 	 * .. so if the result is less than the base, we overflowed.
172 	 */
173 	if (sum < base)
174 		sum = TIME_MAX;
175 
176 	return (sum);
177 }
178 
179 /*
180  * Callers of this function have already created a working security
181  * association, and have found the appropriate table & hash chain.  All this
182  * function does is check duplicates, and insert the SA.  The caller needs to
183  * hold the hash bucket lock and increment the refcnt before insertion.
184  *
185  * Return 0 if success, EEXIST if collision.
186  */
187 #define	SA_UNIQUE_MATCH(sa1, sa2) \
188 	(((sa1)->ipsa_unique_id & (sa1)->ipsa_unique_mask) == \
189 	((sa2)->ipsa_unique_id & (sa2)->ipsa_unique_mask))
190 
191 int
192 sadb_insertassoc(ipsa_t *ipsa, isaf_t *bucket)
193 {
194 	ipsa_t **ptpn = NULL;
195 	ipsa_t *walker;
196 	boolean_t unspecsrc;
197 
198 	ASSERT(MUTEX_HELD(&bucket->isaf_lock));
199 
200 	unspecsrc = IPSA_IS_ADDR_UNSPEC(ipsa->ipsa_srcaddr, ipsa->ipsa_addrfam);
201 
202 	walker = bucket->isaf_ipsa;
203 	ASSERT(walker == NULL || ipsa->ipsa_addrfam == walker->ipsa_addrfam);
204 
205 	/*
206 	 * Find insertion point (pointed to with **ptpn).  Insert at the head
207 	 * of the list unless there's an unspecified source address, then
208 	 * insert it after the last SA with a specified source address.
209 	 *
210 	 * BTW, you'll have to walk the whole chain, matching on {DST, SPI}
211 	 * checking for collisions.
212 	 */
213 
214 	while (walker != NULL) {
215 		if (IPSA_ARE_ADDR_EQUAL(walker->ipsa_dstaddr,
216 		    ipsa->ipsa_dstaddr, ipsa->ipsa_addrfam)) {
217 			if (walker->ipsa_spi == ipsa->ipsa_spi)
218 				return (EEXIST);
219 
220 			mutex_enter(&walker->ipsa_lock);
221 			if (ipsa->ipsa_state == IPSA_STATE_MATURE &&
222 			    (walker->ipsa_flags & IPSA_F_USED) &&
223 			    SA_UNIQUE_MATCH(walker, ipsa)) {
224 				walker->ipsa_flags |= IPSA_F_CINVALID;
225 			}
226 			mutex_exit(&walker->ipsa_lock);
227 		}
228 
229 		if (ptpn == NULL && unspecsrc) {
230 			if (IPSA_IS_ADDR_UNSPEC(walker->ipsa_srcaddr,
231 			    walker->ipsa_addrfam))
232 				ptpn = walker->ipsa_ptpn;
233 			else if (walker->ipsa_next == NULL)
234 				ptpn = &walker->ipsa_next;
235 		}
236 
237 		walker = walker->ipsa_next;
238 	}
239 
240 	if (ptpn == NULL)
241 		ptpn = &bucket->isaf_ipsa;
242 	ipsa->ipsa_next = *ptpn;
243 	ipsa->ipsa_ptpn = ptpn;
244 	if (ipsa->ipsa_next != NULL)
245 		ipsa->ipsa_next->ipsa_ptpn = &ipsa->ipsa_next;
246 	*ptpn = ipsa;
247 	ipsa->ipsa_linklock = &bucket->isaf_lock;
248 
249 	return (0);
250 }
251 #undef SA_UNIQUE_MATCH
252 
253 /*
254  * Free a security association.  Its reference count is 0, which means
255  * I must free it.  The SA must be unlocked and must not be linked into
256  * any fanout list.
257  */
258 static void
259 sadb_freeassoc(ipsa_t *ipsa)
260 {
261 	ipsec_stack_t	*ipss = ipsa->ipsa_netstack->netstack_ipsec;
262 
263 	ASSERT(ipss != NULL);
264 	ASSERT(MUTEX_NOT_HELD(&ipsa->ipsa_lock));
265 	ASSERT(ipsa->ipsa_refcnt == 0);
266 	ASSERT(ipsa->ipsa_next == NULL);
267 	ASSERT(ipsa->ipsa_ptpn == NULL);
268 
269 	mutex_enter(&ipsa->ipsa_lock);
270 	/* Don't call sadb_clear_lpkt() since we hold the ipsa_lock anyway. */
271 	ip_drop_packet(ipsa->ipsa_lpkt, B_TRUE, NULL, NULL,
272 	    DROPPER(ipss, ipds_sadb_inlarval_timeout),
273 	    &ipss->ipsec_sadb_dropper);
274 	ipsec_destroy_ctx_tmpl(ipsa, IPSEC_ALG_AUTH);
275 	ipsec_destroy_ctx_tmpl(ipsa, IPSEC_ALG_ENCR);
276 	mutex_exit(&ipsa->ipsa_lock);
277 
278 	/* bzero() these fields for paranoia's sake. */
279 	if (ipsa->ipsa_authkey != NULL) {
280 		bzero(ipsa->ipsa_authkey, ipsa->ipsa_authkeylen);
281 		kmem_free(ipsa->ipsa_authkey, ipsa->ipsa_authkeylen);
282 	}
283 	if (ipsa->ipsa_encrkey != NULL) {
284 		bzero(ipsa->ipsa_encrkey, ipsa->ipsa_encrkeylen);
285 		kmem_free(ipsa->ipsa_encrkey, ipsa->ipsa_encrkeylen);
286 	}
287 	if (ipsa->ipsa_nonce_buf != NULL) {
288 		bzero(ipsa->ipsa_nonce_buf, sizeof (ipsec_nonce_t));
289 		kmem_free(ipsa->ipsa_nonce_buf, sizeof (ipsec_nonce_t));
290 	}
291 	if (ipsa->ipsa_src_cid != NULL) {
292 		IPSID_REFRELE(ipsa->ipsa_src_cid);
293 	}
294 	if (ipsa->ipsa_dst_cid != NULL) {
295 		IPSID_REFRELE(ipsa->ipsa_dst_cid);
296 	}
297 	if (ipsa->ipsa_integ != NULL)
298 		kmem_free(ipsa->ipsa_integ, ipsa->ipsa_integlen);
299 	if (ipsa->ipsa_sens != NULL)
300 		kmem_free(ipsa->ipsa_sens, ipsa->ipsa_senslen);
301 	if (ipsa->ipsa_emech.cm_param != NULL)
302 		kmem_free(ipsa->ipsa_emech.cm_param,
303 		    ipsa->ipsa_emech.cm_param_len);
304 
305 	mutex_destroy(&ipsa->ipsa_lock);
306 	kmem_free(ipsa, sizeof (*ipsa));
307 }
308 
309 /*
310  * Unlink a security association from a hash bucket.  Assume the hash bucket
311  * lock is held, but the association's lock is not.
312  *
313  * Note that we do not bump the bucket's generation number here because
314  * we might not be making a visible change to the set of visible SA's.
315  * All callers MUST bump the bucket's generation number before they unlock
316  * the bucket if they use sadb_unlinkassoc to permanetly remove an SA which
317  * was present in the bucket at the time it was locked.
318  */
319 void
320 sadb_unlinkassoc(ipsa_t *ipsa)
321 {
322 	ASSERT(ipsa->ipsa_linklock != NULL);
323 	ASSERT(MUTEX_HELD(ipsa->ipsa_linklock));
324 
325 	/* These fields are protected by the link lock. */
326 	*(ipsa->ipsa_ptpn) = ipsa->ipsa_next;
327 	if (ipsa->ipsa_next != NULL) {
328 		ipsa->ipsa_next->ipsa_ptpn = ipsa->ipsa_ptpn;
329 		ipsa->ipsa_next = NULL;
330 	}
331 
332 	ipsa->ipsa_ptpn = NULL;
333 
334 	/* This may destroy the SA. */
335 	IPSA_REFRELE(ipsa);
336 }
337 
338 void
339 sadb_delete_cluster(ipsa_t *assoc)
340 {
341 	uint8_t protocol;
342 
343 	if (cl_inet_deletespi &&
344 	    ((assoc->ipsa_state == IPSA_STATE_LARVAL) ||
345 	    (assoc->ipsa_state == IPSA_STATE_MATURE))) {
346 		protocol = (assoc->ipsa_type == SADB_SATYPE_AH) ?
347 		    IPPROTO_AH : IPPROTO_ESP;
348 		cl_inet_deletespi(assoc->ipsa_netstack->netstack_stackid,
349 		    protocol, assoc->ipsa_spi, NULL);
350 	}
351 }
352 
353 /*
354  * Create a larval security association with the specified SPI.	 All other
355  * fields are zeroed.
356  */
357 static ipsa_t *
358 sadb_makelarvalassoc(uint32_t spi, uint32_t *src, uint32_t *dst, int addrfam,
359     netstack_t *ns)
360 {
361 	ipsa_t *newbie;
362 
363 	/*
364 	 * Allocate...
365 	 */
366 
367 	newbie = (ipsa_t *)kmem_zalloc(sizeof (ipsa_t), KM_NOSLEEP);
368 	if (newbie == NULL) {
369 		/* Can't make new larval SA. */
370 		return (NULL);
371 	}
372 
373 	/* Assigned requested SPI, assume caller does SPI allocation magic. */
374 	newbie->ipsa_spi = spi;
375 	newbie->ipsa_netstack = ns;	/* No netstack_hold */
376 
377 	/*
378 	 * Copy addresses...
379 	 */
380 
381 	IPSA_COPY_ADDR(newbie->ipsa_srcaddr, src, addrfam);
382 	IPSA_COPY_ADDR(newbie->ipsa_dstaddr, dst, addrfam);
383 
384 	newbie->ipsa_addrfam = addrfam;
385 
386 	/*
387 	 * Set common initialization values, including refcnt.
388 	 */
389 	mutex_init(&newbie->ipsa_lock, NULL, MUTEX_DEFAULT, NULL);
390 	newbie->ipsa_state = IPSA_STATE_LARVAL;
391 	newbie->ipsa_refcnt = 1;
392 	newbie->ipsa_freefunc = sadb_freeassoc;
393 
394 	/*
395 	 * There aren't a lot of other common initialization values, as
396 	 * they are copied in from the PF_KEY message.
397 	 */
398 
399 	return (newbie);
400 }
401 
402 /*
403  * Call me to initialize a security association fanout.
404  */
405 static int
406 sadb_init_fanout(isaf_t **tablep, uint_t size, int kmflag)
407 {
408 	isaf_t *table;
409 	int i;
410 
411 	table = (isaf_t *)kmem_alloc(size * sizeof (*table), kmflag);
412 	*tablep = table;
413 
414 	if (table == NULL)
415 		return (ENOMEM);
416 
417 	for (i = 0; i < size; i++) {
418 		mutex_init(&(table[i].isaf_lock), NULL, MUTEX_DEFAULT, NULL);
419 		table[i].isaf_ipsa = NULL;
420 		table[i].isaf_gen = 0;
421 	}
422 
423 	return (0);
424 }
425 
426 /*
427  * Call me to initialize an acquire fanout
428  */
429 static int
430 sadb_init_acfanout(iacqf_t **tablep, uint_t size, int kmflag)
431 {
432 	iacqf_t *table;
433 	int i;
434 
435 	table = (iacqf_t *)kmem_alloc(size * sizeof (*table), kmflag);
436 	*tablep = table;
437 
438 	if (table == NULL)
439 		return (ENOMEM);
440 
441 	for (i = 0; i < size; i++) {
442 		mutex_init(&(table[i].iacqf_lock), NULL, MUTEX_DEFAULT, NULL);
443 		table[i].iacqf_ipsacq = NULL;
444 	}
445 
446 	return (0);
447 }
448 
449 /*
450  * Attempt to initialize an SADB instance.  On failure, return ENOMEM;
451  * caller must clean up partial allocations.
452  */
453 static int
454 sadb_init_trial(sadb_t *sp, uint_t size, int kmflag)
455 {
456 	ASSERT(sp->sdb_of == NULL);
457 	ASSERT(sp->sdb_if == NULL);
458 	ASSERT(sp->sdb_acq == NULL);
459 
460 	sp->sdb_hashsize = size;
461 	if (sadb_init_fanout(&sp->sdb_of, size, kmflag) != 0)
462 		return (ENOMEM);
463 	if (sadb_init_fanout(&sp->sdb_if, size, kmflag) != 0)
464 		return (ENOMEM);
465 	if (sadb_init_acfanout(&sp->sdb_acq, size, kmflag) != 0)
466 		return (ENOMEM);
467 
468 	return (0);
469 }
470 
471 /*
472  * Call me to initialize an SADB instance; fall back to default size on failure.
473  */
474 static void
475 sadb_init(const char *name, sadb_t *sp, uint_t size, uint_t ver,
476     netstack_t *ns)
477 {
478 	ASSERT(sp->sdb_of == NULL);
479 	ASSERT(sp->sdb_if == NULL);
480 	ASSERT(sp->sdb_acq == NULL);
481 
482 	if (size < IPSEC_DEFAULT_HASH_SIZE)
483 		size = IPSEC_DEFAULT_HASH_SIZE;
484 
485 	if (sadb_init_trial(sp, size, KM_NOSLEEP) != 0) {
486 
487 		cmn_err(CE_WARN,
488 		    "Unable to allocate %u entry IPv%u %s SADB hash table",
489 		    size, ver, name);
490 
491 		sadb_destroy(sp, ns);
492 		size = IPSEC_DEFAULT_HASH_SIZE;
493 		cmn_err(CE_WARN, "Falling back to %d entries", size);
494 		(void) sadb_init_trial(sp, size, KM_SLEEP);
495 	}
496 }
497 
498 
499 /*
500  * Initialize an SADB-pair.
501  */
502 void
503 sadbp_init(const char *name, sadbp_t *sp, int type, int size, netstack_t *ns)
504 {
505 	sadb_init(name, &sp->s_v4, size, 4, ns);
506 	sadb_init(name, &sp->s_v6, size, 6, ns);
507 
508 	sp->s_satype = type;
509 
510 	ASSERT((type == SADB_SATYPE_AH) || (type == SADB_SATYPE_ESP));
511 	if (type == SADB_SATYPE_AH) {
512 		ipsec_stack_t	*ipss = ns->netstack_ipsec;
513 
514 		ip_drop_register(&ipss->ipsec_sadb_dropper, "IPsec SADB");
515 		sp->s_addflags = AH_ADD_SETTABLE_FLAGS;
516 		sp->s_updateflags = AH_UPDATE_SETTABLE_FLAGS;
517 	} else {
518 		sp->s_addflags = ESP_ADD_SETTABLE_FLAGS;
519 		sp->s_updateflags = ESP_UPDATE_SETTABLE_FLAGS;
520 	}
521 }
522 
523 /*
524  * Deliver a single SADB_DUMP message representing a single SA.  This is
525  * called many times by sadb_dump().
526  *
527  * If the return value of this is ENOBUFS (not the same as ENOMEM), then
528  * the caller should take that as a hint that dupb() on the "original answer"
529  * failed, and that perhaps the caller should try again with a copyb()ed
530  * "original answer".
531  */
532 static int
533 sadb_dump_deliver(queue_t *pfkey_q, mblk_t *original_answer, ipsa_t *ipsa,
534     sadb_msg_t *samsg)
535 {
536 	mblk_t *answer;
537 
538 	answer = dupb(original_answer);
539 	if (answer == NULL)
540 		return (ENOBUFS);
541 	answer->b_cont = sadb_sa2msg(ipsa, samsg);
542 	if (answer->b_cont == NULL) {
543 		freeb(answer);
544 		return (ENOMEM);
545 	}
546 
547 	/* Just do a putnext, and let keysock deal with flow control. */
548 	putnext(pfkey_q, answer);
549 	return (0);
550 }
551 
552 /*
553  * Common function to allocate and prepare a keysock_out_t M_CTL message.
554  */
555 mblk_t *
556 sadb_keysock_out(minor_t serial)
557 {
558 	mblk_t *mp;
559 	keysock_out_t *kso;
560 
561 	mp = allocb(sizeof (ipsec_info_t), BPRI_HI);
562 	if (mp != NULL) {
563 		mp->b_datap->db_type = M_CTL;
564 		mp->b_wptr += sizeof (ipsec_info_t);
565 		kso = (keysock_out_t *)mp->b_rptr;
566 		kso->ks_out_type = KEYSOCK_OUT;
567 		kso->ks_out_len = sizeof (*kso);
568 		kso->ks_out_serial = serial;
569 	}
570 
571 	return (mp);
572 }
573 
574 /*
575  * Perform an SADB_DUMP, spewing out every SA in an array of SA fanouts
576  * to keysock.
577  */
578 static int
579 sadb_dump_fanout(queue_t *pfkey_q, mblk_t *mp, minor_t serial, isaf_t *fanout,
580     int num_entries, boolean_t do_peers, time_t active_time)
581 {
582 	int i, error = 0;
583 	mblk_t *original_answer;
584 	ipsa_t *walker;
585 	sadb_msg_t *samsg;
586 	time_t	current;
587 
588 	/*
589 	 * For each IPSA hash bucket do:
590 	 *	- Hold the mutex
591 	 *	- Walk each entry, doing an sadb_dump_deliver() on it.
592 	 */
593 	ASSERT(mp->b_cont != NULL);
594 	samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
595 
596 	original_answer = sadb_keysock_out(serial);
597 	if (original_answer == NULL)
598 		return (ENOMEM);
599 
600 	current = gethrestime_sec();
601 	for (i = 0; i < num_entries; i++) {
602 		mutex_enter(&fanout[i].isaf_lock);
603 		for (walker = fanout[i].isaf_ipsa; walker != NULL;
604 		    walker = walker->ipsa_next) {
605 			if (!do_peers && walker->ipsa_haspeer)
606 				continue;
607 			if ((active_time != 0) &&
608 			    ((current - walker->ipsa_lastuse) > active_time))
609 				continue;
610 			error = sadb_dump_deliver(pfkey_q, original_answer,
611 			    walker, samsg);
612 			if (error == ENOBUFS) {
613 				mblk_t *new_original_answer;
614 
615 				/* Ran out of dupb's.  Try a copyb. */
616 				new_original_answer = copyb(original_answer);
617 				if (new_original_answer == NULL) {
618 					error = ENOMEM;
619 				} else {
620 					freeb(original_answer);
621 					original_answer = new_original_answer;
622 					error = sadb_dump_deliver(pfkey_q,
623 					    original_answer, walker, samsg);
624 				}
625 			}
626 			if (error != 0)
627 				break;	/* out of for loop. */
628 		}
629 		mutex_exit(&fanout[i].isaf_lock);
630 		if (error != 0)
631 			break;	/* out of for loop. */
632 	}
633 
634 	freeb(original_answer);
635 	return (error);
636 }
637 
638 /*
639  * Dump an entire SADB; outbound first, then inbound.
640  */
641 
642 int
643 sadb_dump(queue_t *pfkey_q, mblk_t *mp, keysock_in_t *ksi, sadb_t *sp)
644 {
645 	int error;
646 	time_t	active_time = 0;
647 	sadb_x_edump_t	*edump =
648 	    (sadb_x_edump_t *)ksi->ks_in_extv[SADB_X_EXT_EDUMP];
649 
650 	if (edump != NULL) {
651 		active_time = edump->sadb_x_edump_timeout;
652 	}
653 
654 	/* Dump outbound */
655 	error = sadb_dump_fanout(pfkey_q, mp, ksi->ks_in_serial, sp->sdb_of,
656 	    sp->sdb_hashsize, B_TRUE, active_time);
657 	if (error)
658 		return (error);
659 
660 	/* Dump inbound */
661 	return sadb_dump_fanout(pfkey_q, mp, ksi->ks_in_serial, sp->sdb_if,
662 	    sp->sdb_hashsize, B_FALSE, active_time);
663 }
664 
665 /*
666  * Generic sadb table walker.
667  *
668  * Call "walkfn" for each SA in each bucket in "table"; pass the
669  * bucket, the entry and "cookie" to the callback function.
670  * Take care to ensure that walkfn can delete the SA without screwing
671  * up our traverse.
672  *
673  * The bucket is locked for the duration of the callback, both so that the
674  * callback can just call sadb_unlinkassoc() when it wants to delete something,
675  * and so that no new entries are added while we're walking the list.
676  */
677 static void
678 sadb_walker(isaf_t *table, uint_t numentries,
679     void (*walkfn)(isaf_t *head, ipsa_t *entry, void *cookie),
680     void *cookie)
681 {
682 	int i;
683 	for (i = 0; i < numentries; i++) {
684 		ipsa_t *entry, *next;
685 
686 		mutex_enter(&table[i].isaf_lock);
687 
688 		for (entry = table[i].isaf_ipsa; entry != NULL;
689 		    entry = next) {
690 			next = entry->ipsa_next;
691 			(*walkfn)(&table[i], entry, cookie);
692 		}
693 		mutex_exit(&table[i].isaf_lock);
694 	}
695 }
696 
697 /*
698  * From the given SA, construct a dl_ct_ipsec_key and
699  * a dl_ct_ipsec structures to be sent to the adapter as part
700  * of a DL_CONTROL_REQ.
701  *
702  * ct_sa must point to the storage allocated for the key
703  * structure and must be followed by storage allocated
704  * for the SA information that must be sent to the driver
705  * as part of the DL_CONTROL_REQ request.
706  *
707  * The is_inbound boolean indicates whether the specified
708  * SA is part of an inbound SA table.
709  *
710  * Returns B_TRUE if the corresponding SA must be passed to
711  * a provider, B_FALSE otherwise; frees *mp if it returns B_FALSE.
712  */
713 static boolean_t
714 sadb_req_from_sa(ipsa_t *sa, mblk_t *mp, boolean_t is_inbound)
715 {
716 	dl_ct_ipsec_key_t *keyp;
717 	dl_ct_ipsec_t *sap;
718 	void *ct_sa = mp->b_wptr;
719 
720 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
721 
722 	keyp = (dl_ct_ipsec_key_t *)(ct_sa);
723 	sap = (dl_ct_ipsec_t *)(keyp + 1);
724 
725 	IPSECHW_DEBUG(IPSECHW_CAPAB, ("sadb_req_from_sa: "
726 	    "is_inbound = %d\n", is_inbound));
727 
728 	/* initialize flag */
729 	sap->sadb_sa_flags = 0;
730 	if (is_inbound) {
731 		sap->sadb_sa_flags |= DL_CT_IPSEC_INBOUND;
732 		/*
733 		 * If an inbound SA has a peer, then mark it has being
734 		 * an outbound SA as well.
735 		 */
736 		if (sa->ipsa_haspeer)
737 			sap->sadb_sa_flags |= DL_CT_IPSEC_OUTBOUND;
738 	} else {
739 		/*
740 		 * If an outbound SA has a peer, then don't send it,
741 		 * since we will send the copy from the inbound table.
742 		 */
743 		if (sa->ipsa_haspeer) {
744 			freemsg(mp);
745 			return (B_FALSE);
746 		}
747 		sap->sadb_sa_flags |= DL_CT_IPSEC_OUTBOUND;
748 	}
749 
750 	keyp->dl_key_spi = sa->ipsa_spi;
751 	bcopy(sa->ipsa_dstaddr, keyp->dl_key_dest_addr,
752 	    DL_CTL_IPSEC_ADDR_LEN);
753 	keyp->dl_key_addr_family = sa->ipsa_addrfam;
754 
755 	sap->sadb_sa_auth = sa->ipsa_auth_alg;
756 	sap->sadb_sa_encrypt = sa->ipsa_encr_alg;
757 
758 	sap->sadb_key_len_a = sa->ipsa_authkeylen;
759 	sap->sadb_key_bits_a = sa->ipsa_authkeybits;
760 	bcopy(sa->ipsa_authkey,
761 	    sap->sadb_key_data_a, sap->sadb_key_len_a);
762 
763 	sap->sadb_key_len_e = sa->ipsa_encrkeylen;
764 	sap->sadb_key_bits_e = sa->ipsa_encrkeybits;
765 	bcopy(sa->ipsa_encrkey,
766 	    sap->sadb_key_data_e, sap->sadb_key_len_e);
767 
768 	mp->b_wptr += sizeof (dl_ct_ipsec_t) + sizeof (dl_ct_ipsec_key_t);
769 	return (B_TRUE);
770 }
771 
772 /*
773  * Called from AH or ESP to format a message which will be used to inform
774  * IPsec-acceleration-capable ills of a SADB change.
775  * (It is not possible to send the message to IP directly from this function
776  * since the SA, if any, is locked during the call).
777  *
778  * dl_operation: DL_CONTROL_REQ operation (add, delete, update, etc)
779  * sa_type: identifies whether the operation applies to AH or ESP
780  *	(must be one of SADB_SATYPE_AH or SADB_SATYPE_ESP)
781  * sa: Pointer to an SA.  Must be non-NULL and locked
782  *	for ADD, DELETE, GET, and UPDATE operations.
783  * This function returns an mblk chain that must be passed to IP
784  * for forwarding to the IPsec capable providers.
785  */
786 mblk_t *
787 sadb_fmt_sa_req(uint_t dl_operation, uint_t sa_type, ipsa_t *sa,
788     boolean_t is_inbound)
789 {
790 	mblk_t *mp;
791 	dl_control_req_t *ctrl;
792 	boolean_t need_key = B_FALSE;
793 	mblk_t *ctl_mp = NULL;
794 	ipsec_ctl_t *ctl;
795 
796 	/*
797 	 * 1 allocate and initialize DL_CONTROL_REQ M_PROTO
798 	 * 2 if a key is needed for the operation
799 	 *    2.1 initialize key
800 	 *    2.2 if a full SA is needed for the operation
801 	 *	2.2.1 initialize full SA info
802 	 * 3 return message; caller will call ill_ipsec_capab_send_all()
803 	 * to send the resulting message to IPsec capable ills.
804 	 */
805 
806 	ASSERT(sa_type == SADB_SATYPE_AH || sa_type == SADB_SATYPE_ESP);
807 
808 	/*
809 	 * Allocate DL_CONTROL_REQ M_PROTO
810 	 * We allocate room for the SA even if it's not needed
811 	 * by some of the operations (for example flush)
812 	 */
813 	mp = allocb(sizeof (dl_control_req_t) +
814 	    sizeof (dl_ct_ipsec_key_t) + sizeof (dl_ct_ipsec_t), BPRI_HI);
815 	if (mp == NULL)
816 		return (NULL);
817 	mp->b_datap->db_type = M_PROTO;
818 
819 	/* initialize dl_control_req_t */
820 	ctrl = (dl_control_req_t *)mp->b_wptr;
821 	ctrl->dl_primitive = DL_CONTROL_REQ;
822 	ctrl->dl_operation = dl_operation;
823 	ctrl->dl_type = sa_type == SADB_SATYPE_AH ? DL_CT_IPSEC_AH :
824 	    DL_CT_IPSEC_ESP;
825 	ctrl->dl_key_offset = sizeof (dl_control_req_t);
826 	ctrl->dl_key_length = sizeof (dl_ct_ipsec_key_t);
827 	ctrl->dl_data_offset = sizeof (dl_control_req_t) +
828 	    sizeof (dl_ct_ipsec_key_t);
829 	ctrl->dl_data_length = sizeof (dl_ct_ipsec_t);
830 	mp->b_wptr += sizeof (dl_control_req_t);
831 
832 	if ((dl_operation == DL_CO_SET) || (dl_operation == DL_CO_DELETE)) {
833 		ASSERT(sa != NULL);
834 		ASSERT(MUTEX_HELD(&sa->ipsa_lock));
835 
836 		need_key = B_TRUE;
837 
838 		/*
839 		 * Initialize key and SA data. Note that for some
840 		 * operations the SA data is ignored by the provider
841 		 * (delete, etc.)
842 		 */
843 		if (!sadb_req_from_sa(sa, mp, is_inbound))
844 			return (NULL);
845 	}
846 
847 	/* construct control message */
848 	ctl_mp = allocb(sizeof (ipsec_ctl_t), BPRI_HI);
849 	if (ctl_mp == NULL) {
850 		cmn_err(CE_WARN, "sadb_fmt_sa_req: allocb failed\n");
851 		freemsg(mp);
852 		return (NULL);
853 	}
854 
855 	ctl_mp->b_datap->db_type = M_CTL;
856 	ctl_mp->b_wptr += sizeof (ipsec_ctl_t);
857 	ctl_mp->b_cont = mp;
858 
859 	ctl = (ipsec_ctl_t *)ctl_mp->b_rptr;
860 	ctl->ipsec_ctl_type = IPSEC_CTL;
861 	ctl->ipsec_ctl_len  = sizeof (ipsec_ctl_t);
862 	ctl->ipsec_ctl_sa_type = sa_type;
863 
864 	if (need_key) {
865 		/*
866 		 * Keep an additional reference on SA, since it will be
867 		 * needed by IP to send control messages corresponding
868 		 * to that SA from its perimeter. IP will do a
869 		 * IPSA_REFRELE when done with the request.
870 		 */
871 		ASSERT(MUTEX_HELD(&sa->ipsa_lock));
872 		IPSA_REFHOLD(sa);
873 		ctl->ipsec_ctl_sa = sa;
874 	} else
875 		ctl->ipsec_ctl_sa = NULL;
876 
877 	return (ctl_mp);
878 }
879 
880 
881 /*
882  * Called by sadb_ill_download() to dump the entries for a specific
883  * fanout table.  For each SA entry in the table passed as argument,
884  * use mp as a template and constructs a full DL_CONTROL message, and
885  * call ill_dlpi_send(), provided by IP, to send the resulting
886  * messages to the ill.
887  */
888 static void
889 sadb_ill_df(ill_t *ill, mblk_t *mp, isaf_t *fanout, int num_entries,
890     boolean_t is_inbound)
891 {
892 	ipsa_t *walker;
893 	mblk_t *nmp, *salist;
894 	int i, error = 0;
895 	ip_stack_t	*ipst = ill->ill_ipst;
896 	netstack_t	*ns = ipst->ips_netstack;
897 
898 	IPSECHW_DEBUG(IPSECHW_SADB, ("sadb_ill_df: fanout at 0x%p ne=%d\n",
899 	    (void *)fanout, num_entries));
900 	/*
901 	 * For each IPSA hash bucket do:
902 	 *	- Hold the mutex
903 	 *	- Walk each entry, sending a corresponding request to IP
904 	 *	  for it.
905 	 */
906 	ASSERT(mp->b_datap->db_type == M_PROTO);
907 
908 	for (i = 0; i < num_entries; i++) {
909 		mutex_enter(&fanout[i].isaf_lock);
910 		salist = NULL;
911 
912 		for (walker = fanout[i].isaf_ipsa; walker != NULL;
913 		    walker = walker->ipsa_next) {
914 			IPSECHW_DEBUG(IPSECHW_SADB,
915 			    ("sadb_ill_df: sending SA to ill via IP \n"));
916 			/*
917 			 * Duplicate the template mp passed and
918 			 * complete DL_CONTROL_REQ data.
919 			 * To be more memory efficient, we could use
920 			 * dupb() for the M_CTL and copyb() for the M_PROTO
921 			 * as the M_CTL, since the M_CTL is the same for
922 			 * every SA entry passed down to IP for the same ill.
923 			 *
924 			 * Note that copymsg/copyb ensure that the new mblk
925 			 * is at least as large as the source mblk even if it's
926 			 * not using all its storage -- therefore, nmp
927 			 * has trailing space for sadb_req_from_sa to add
928 			 * the SA-specific bits.
929 			 */
930 			mutex_enter(&walker->ipsa_lock);
931 			if (ipsec_capab_match(ill,
932 			    ill->ill_phyint->phyint_ifindex, ill->ill_isv6,
933 			    walker, ns)) {
934 				nmp = copymsg(mp);
935 				if (nmp == NULL) {
936 					IPSECHW_DEBUG(IPSECHW_SADB,
937 					    ("sadb_ill_df: alloc error\n"));
938 					error = ENOMEM;
939 					mutex_exit(&walker->ipsa_lock);
940 					break;
941 				}
942 				if (sadb_req_from_sa(walker, nmp, is_inbound)) {
943 					nmp->b_next = salist;
944 					salist = nmp;
945 				}
946 			}
947 			mutex_exit(&walker->ipsa_lock);
948 		}
949 		mutex_exit(&fanout[i].isaf_lock);
950 		while (salist != NULL) {
951 			nmp = salist;
952 			salist = nmp->b_next;
953 			nmp->b_next = NULL;
954 			ill_dlpi_send(ill, nmp);
955 		}
956 		if (error != 0)
957 			break;	/* out of for loop. */
958 	}
959 }
960 
961 /*
962  * Called by ill_ipsec_capab_add(). Sends a copy of the SADB of
963  * the type specified by sa_type to the specified ill.
964  *
965  * We call for each fanout table defined by the SADB (one per
966  * protocol). sadb_ill_df() finally calls ill_dlpi_send() for
967  * each SADB entry in order to send a corresponding DL_CONTROL_REQ
968  * message to the ill.
969  */
970 void
971 sadb_ill_download(ill_t *ill, uint_t sa_type)
972 {
973 	mblk_t *protomp;	/* prototype message */
974 	dl_control_req_t *ctrl;
975 	sadbp_t *spp;
976 	sadb_t *sp;
977 	int dlt;
978 	ip_stack_t	*ipst = ill->ill_ipst;
979 	netstack_t	*ns = ipst->ips_netstack;
980 
981 	ASSERT(sa_type == SADB_SATYPE_AH || sa_type == SADB_SATYPE_ESP);
982 
983 	/*
984 	 * Allocate and initialize prototype answer. A duplicate for
985 	 * each SA is sent down to the interface.
986 	 */
987 
988 	/* DL_CONTROL_REQ M_PROTO mblk_t */
989 	protomp = allocb(sizeof (dl_control_req_t) +
990 	    sizeof (dl_ct_ipsec_key_t) + sizeof (dl_ct_ipsec_t), BPRI_HI);
991 	if (protomp == NULL)
992 		return;
993 	protomp->b_datap->db_type = M_PROTO;
994 
995 	dlt = (sa_type == SADB_SATYPE_AH) ? DL_CT_IPSEC_AH : DL_CT_IPSEC_ESP;
996 	if (sa_type == SADB_SATYPE_ESP) {
997 		ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
998 
999 		spp = &espstack->esp_sadb;
1000 	} else {
1001 		ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
1002 
1003 		spp = &ahstack->ah_sadb;
1004 	}
1005 
1006 	ctrl = (dl_control_req_t *)protomp->b_wptr;
1007 	ctrl->dl_primitive = DL_CONTROL_REQ;
1008 	ctrl->dl_operation = DL_CO_SET;
1009 	ctrl->dl_type = dlt;
1010 	ctrl->dl_key_offset = sizeof (dl_control_req_t);
1011 	ctrl->dl_key_length = sizeof (dl_ct_ipsec_key_t);
1012 	ctrl->dl_data_offset = sizeof (dl_control_req_t) +
1013 	    sizeof (dl_ct_ipsec_key_t);
1014 	ctrl->dl_data_length = sizeof (dl_ct_ipsec_t);
1015 	protomp->b_wptr += sizeof (dl_control_req_t);
1016 
1017 	/*
1018 	 * then for each SADB entry, we fill out the dl_ct_ipsec_key_t
1019 	 * and dl_ct_ipsec_t
1020 	 */
1021 	sp = ill->ill_isv6 ? &(spp->s_v6) : &(spp->s_v4);
1022 	sadb_ill_df(ill, protomp, sp->sdb_of, sp->sdb_hashsize, B_FALSE);
1023 	sadb_ill_df(ill, protomp, sp->sdb_if, sp->sdb_hashsize, B_TRUE);
1024 	freemsg(protomp);
1025 }
1026 
1027 /*
1028  * Call me to free up a security association fanout.  Use the forever
1029  * variable to indicate freeing up the SAs (forever == B_FALSE, e.g.
1030  * an SADB_FLUSH message), or destroying everything (forever == B_TRUE,
1031  * when a module is unloaded).
1032  */
1033 static void
1034 sadb_destroyer(isaf_t **tablep, uint_t numentries, boolean_t forever,
1035     boolean_t inbound)
1036 {
1037 	int i;
1038 	isaf_t *table = *tablep;
1039 	uint8_t protocol;
1040 	ipsa_t *sa;
1041 	netstackid_t sid;
1042 
1043 	if (table == NULL)
1044 		return;
1045 
1046 	for (i = 0; i < numentries; i++) {
1047 		mutex_enter(&table[i].isaf_lock);
1048 		while ((sa = table[i].isaf_ipsa) != NULL) {
1049 			if (inbound && cl_inet_deletespi &&
1050 			    (sa->ipsa_state != IPSA_STATE_ACTIVE_ELSEWHERE) &&
1051 			    (sa->ipsa_state != IPSA_STATE_IDLE)) {
1052 				protocol = (sa->ipsa_type == SADB_SATYPE_AH) ?
1053 				    IPPROTO_AH : IPPROTO_ESP;
1054 				sid = sa->ipsa_netstack->netstack_stackid;
1055 				cl_inet_deletespi(sid, protocol, sa->ipsa_spi,
1056 				    NULL);
1057 			}
1058 			sadb_unlinkassoc(sa);
1059 		}
1060 		table[i].isaf_gen++;
1061 		mutex_exit(&table[i].isaf_lock);
1062 		if (forever)
1063 			mutex_destroy(&(table[i].isaf_lock));
1064 	}
1065 
1066 	if (forever) {
1067 		*tablep = NULL;
1068 		kmem_free(table, numentries * sizeof (*table));
1069 	}
1070 }
1071 
1072 /*
1073  * Entry points to sadb_destroyer().
1074  */
1075 static void
1076 sadb_flush(sadb_t *sp, netstack_t *ns)
1077 {
1078 	/*
1079 	 * Flush out each bucket, one at a time.  Were it not for keysock's
1080 	 * enforcement, there would be a subtlety where I could add on the
1081 	 * heels of a flush.  With keysock's enforcement, however, this
1082 	 * makes ESP's job easy.
1083 	 */
1084 	sadb_destroyer(&sp->sdb_of, sp->sdb_hashsize, B_FALSE, B_FALSE);
1085 	sadb_destroyer(&sp->sdb_if, sp->sdb_hashsize, B_FALSE, B_TRUE);
1086 
1087 	/* For each acquire, destroy it; leave the bucket mutex alone. */
1088 	sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_FALSE, ns);
1089 }
1090 
1091 static void
1092 sadb_destroy(sadb_t *sp, netstack_t *ns)
1093 {
1094 	sadb_destroyer(&sp->sdb_of, sp->sdb_hashsize, B_TRUE, B_FALSE);
1095 	sadb_destroyer(&sp->sdb_if, sp->sdb_hashsize, B_TRUE, B_TRUE);
1096 
1097 	/* For each acquire, destroy it, including the bucket mutex. */
1098 	sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_TRUE, ns);
1099 
1100 	ASSERT(sp->sdb_of == NULL);
1101 	ASSERT(sp->sdb_if == NULL);
1102 	ASSERT(sp->sdb_acq == NULL);
1103 }
1104 
1105 static void
1106 sadb_send_flush_req(sadbp_t *spp)
1107 {
1108 	mblk_t *ctl_mp;
1109 
1110 	/*
1111 	 * we've been unplumbed, or never were plumbed; don't go there.
1112 	 */
1113 	if (spp->s_ip_q == NULL)
1114 		return;
1115 
1116 	/* have IP send a flush msg to the IPsec accelerators */
1117 	ctl_mp = sadb_fmt_sa_req(DL_CO_FLUSH, spp->s_satype, NULL, B_TRUE);
1118 	if (ctl_mp != NULL)
1119 		putnext(spp->s_ip_q, ctl_mp);
1120 }
1121 
1122 void
1123 sadbp_flush(sadbp_t *spp, netstack_t *ns)
1124 {
1125 	sadb_flush(&spp->s_v4, ns);
1126 	sadb_flush(&spp->s_v6, ns);
1127 
1128 	sadb_send_flush_req(spp);
1129 }
1130 
1131 void
1132 sadbp_destroy(sadbp_t *spp, netstack_t *ns)
1133 {
1134 	sadb_destroy(&spp->s_v4, ns);
1135 	sadb_destroy(&spp->s_v6, ns);
1136 
1137 	sadb_send_flush_req(spp);
1138 	if (spp->s_satype == SADB_SATYPE_AH) {
1139 		ipsec_stack_t	*ipss = ns->netstack_ipsec;
1140 
1141 		ip_drop_unregister(&ipss->ipsec_sadb_dropper);
1142 	}
1143 }
1144 
1145 
1146 /*
1147  * Check hard vs. soft lifetimes.  If there's a reality mismatch (e.g.
1148  * soft lifetimes > hard lifetimes) return an appropriate diagnostic for
1149  * EINVAL.
1150  */
1151 int
1152 sadb_hardsoftchk(sadb_lifetime_t *hard, sadb_lifetime_t *soft,
1153     sadb_lifetime_t *idle)
1154 {
1155 	if (hard == NULL || soft == NULL)
1156 		return (0);
1157 
1158 	if (hard->sadb_lifetime_allocations != 0 &&
1159 	    soft->sadb_lifetime_allocations != 0 &&
1160 	    hard->sadb_lifetime_allocations < soft->sadb_lifetime_allocations)
1161 		return (SADB_X_DIAGNOSTIC_ALLOC_HSERR);
1162 
1163 	if (hard->sadb_lifetime_bytes != 0 &&
1164 	    soft->sadb_lifetime_bytes != 0 &&
1165 	    hard->sadb_lifetime_bytes < soft->sadb_lifetime_bytes)
1166 		return (SADB_X_DIAGNOSTIC_BYTES_HSERR);
1167 
1168 	if (hard->sadb_lifetime_addtime != 0 &&
1169 	    soft->sadb_lifetime_addtime != 0 &&
1170 	    hard->sadb_lifetime_addtime < soft->sadb_lifetime_addtime)
1171 		return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
1172 
1173 	if (hard->sadb_lifetime_usetime != 0 &&
1174 	    soft->sadb_lifetime_usetime != 0 &&
1175 	    hard->sadb_lifetime_usetime < soft->sadb_lifetime_usetime)
1176 		return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
1177 
1178 	if (idle != NULL) {
1179 		if (hard->sadb_lifetime_addtime != 0 &&
1180 		    idle->sadb_lifetime_addtime != 0 &&
1181 		    hard->sadb_lifetime_addtime < idle->sadb_lifetime_addtime)
1182 			return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
1183 
1184 		if (soft->sadb_lifetime_addtime != 0 &&
1185 		    idle->sadb_lifetime_addtime != 0 &&
1186 		    soft->sadb_lifetime_addtime < idle->sadb_lifetime_addtime)
1187 			return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
1188 
1189 		if (hard->sadb_lifetime_usetime != 0 &&
1190 		    idle->sadb_lifetime_usetime != 0 &&
1191 		    hard->sadb_lifetime_usetime < idle->sadb_lifetime_usetime)
1192 			return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
1193 
1194 		if (soft->sadb_lifetime_usetime != 0 &&
1195 		    idle->sadb_lifetime_usetime != 0 &&
1196 		    soft->sadb_lifetime_usetime < idle->sadb_lifetime_usetime)
1197 			return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
1198 	}
1199 
1200 	return (0);
1201 }
1202 
1203 /*
1204  * Clone a security association for the purposes of inserting a single SA
1205  * into inbound and outbound tables respectively. This function should only
1206  * be called from sadb_common_add().
1207  */
1208 static ipsa_t *
1209 sadb_cloneassoc(ipsa_t *ipsa)
1210 {
1211 	ipsa_t *newbie;
1212 	boolean_t error = B_FALSE;
1213 
1214 	ASSERT(MUTEX_NOT_HELD(&(ipsa->ipsa_lock)));
1215 
1216 	newbie = kmem_alloc(sizeof (ipsa_t), KM_NOSLEEP);
1217 	if (newbie == NULL)
1218 		return (NULL);
1219 
1220 	/* Copy over what we can. */
1221 	*newbie = *ipsa;
1222 
1223 	/* bzero and initialize locks, in case *_init() allocates... */
1224 	mutex_init(&newbie->ipsa_lock, NULL, MUTEX_DEFAULT, NULL);
1225 
1226 	/*
1227 	 * While somewhat dain-bramaged, the most graceful way to
1228 	 * recover from errors is to keep plowing through the
1229 	 * allocations, and getting what I can.  It's easier to call
1230 	 * sadb_freeassoc() on the stillborn clone when all the
1231 	 * pointers aren't pointing to the parent's data.
1232 	 */
1233 
1234 	if (ipsa->ipsa_authkey != NULL) {
1235 		newbie->ipsa_authkey = kmem_alloc(newbie->ipsa_authkeylen,
1236 		    KM_NOSLEEP);
1237 		if (newbie->ipsa_authkey == NULL) {
1238 			error = B_TRUE;
1239 		} else {
1240 			bcopy(ipsa->ipsa_authkey, newbie->ipsa_authkey,
1241 			    newbie->ipsa_authkeylen);
1242 
1243 			newbie->ipsa_kcfauthkey.ck_data =
1244 			    newbie->ipsa_authkey;
1245 		}
1246 
1247 		if (newbie->ipsa_amech.cm_param != NULL) {
1248 			newbie->ipsa_amech.cm_param =
1249 			    (char *)&newbie->ipsa_mac_len;
1250 		}
1251 	}
1252 
1253 	if (ipsa->ipsa_encrkey != NULL) {
1254 		newbie->ipsa_encrkey = kmem_alloc(newbie->ipsa_encrkeylen,
1255 		    KM_NOSLEEP);
1256 		if (newbie->ipsa_encrkey == NULL) {
1257 			error = B_TRUE;
1258 		} else {
1259 			bcopy(ipsa->ipsa_encrkey, newbie->ipsa_encrkey,
1260 			    newbie->ipsa_encrkeylen);
1261 
1262 			newbie->ipsa_kcfencrkey.ck_data =
1263 			    newbie->ipsa_encrkey;
1264 		}
1265 	}
1266 
1267 	newbie->ipsa_authtmpl = NULL;
1268 	newbie->ipsa_encrtmpl = NULL;
1269 	newbie->ipsa_haspeer = B_TRUE;
1270 
1271 	if (ipsa->ipsa_integ != NULL) {
1272 		newbie->ipsa_integ = kmem_alloc(newbie->ipsa_integlen,
1273 		    KM_NOSLEEP);
1274 		if (newbie->ipsa_integ == NULL) {
1275 			error = B_TRUE;
1276 		} else {
1277 			bcopy(ipsa->ipsa_integ, newbie->ipsa_integ,
1278 			    newbie->ipsa_integlen);
1279 		}
1280 	}
1281 
1282 	if (ipsa->ipsa_sens != NULL) {
1283 		newbie->ipsa_sens = kmem_alloc(newbie->ipsa_senslen,
1284 		    KM_NOSLEEP);
1285 		if (newbie->ipsa_sens == NULL) {
1286 			error = B_TRUE;
1287 		} else {
1288 			bcopy(ipsa->ipsa_sens, newbie->ipsa_sens,
1289 			    newbie->ipsa_senslen);
1290 		}
1291 	}
1292 
1293 	if (ipsa->ipsa_src_cid != NULL) {
1294 		newbie->ipsa_src_cid = ipsa->ipsa_src_cid;
1295 		IPSID_REFHOLD(ipsa->ipsa_src_cid);
1296 	}
1297 
1298 	if (ipsa->ipsa_dst_cid != NULL) {
1299 		newbie->ipsa_dst_cid = ipsa->ipsa_dst_cid;
1300 		IPSID_REFHOLD(ipsa->ipsa_dst_cid);
1301 	}
1302 
1303 	if (error) {
1304 		sadb_freeassoc(newbie);
1305 		return (NULL);
1306 	}
1307 
1308 	return (newbie);
1309 }
1310 
1311 /*
1312  * Initialize a SADB address extension at the address specified by addrext.
1313  * Return a pointer to the end of the new address extension.
1314  */
1315 static uint8_t *
1316 sadb_make_addr_ext(uint8_t *start, uint8_t *end, uint16_t exttype,
1317     sa_family_t af, uint32_t *addr, uint16_t port, uint8_t proto, int prefix)
1318 {
1319 	struct sockaddr_in *sin;
1320 	struct sockaddr_in6 *sin6;
1321 	uint8_t *cur = start;
1322 	int addrext_len;
1323 	int sin_len;
1324 	sadb_address_t *addrext	= (sadb_address_t *)cur;
1325 
1326 	if (cur == NULL)
1327 		return (NULL);
1328 
1329 	cur += sizeof (*addrext);
1330 	if (cur > end)
1331 		return (NULL);
1332 
1333 	addrext->sadb_address_proto = proto;
1334 	addrext->sadb_address_prefixlen = prefix;
1335 	addrext->sadb_address_reserved = 0;
1336 	addrext->sadb_address_exttype = exttype;
1337 
1338 	switch (af) {
1339 	case AF_INET:
1340 		sin = (struct sockaddr_in *)cur;
1341 		sin_len = sizeof (*sin);
1342 		cur += sin_len;
1343 		if (cur > end)
1344 			return (NULL);
1345 
1346 		sin->sin_family = af;
1347 		bzero(sin->sin_zero, sizeof (sin->sin_zero));
1348 		sin->sin_port = port;
1349 		IPSA_COPY_ADDR(&sin->sin_addr, addr, af);
1350 		break;
1351 	case AF_INET6:
1352 		sin6 = (struct sockaddr_in6 *)cur;
1353 		sin_len = sizeof (*sin6);
1354 		cur += sin_len;
1355 		if (cur > end)
1356 			return (NULL);
1357 
1358 		bzero(sin6, sizeof (*sin6));
1359 		sin6->sin6_family = af;
1360 		sin6->sin6_port = port;
1361 		IPSA_COPY_ADDR(&sin6->sin6_addr, addr, af);
1362 		break;
1363 	}
1364 
1365 	addrext_len = roundup(cur - start, sizeof (uint64_t));
1366 	addrext->sadb_address_len = SADB_8TO64(addrext_len);
1367 
1368 	cur = start + addrext_len;
1369 	if (cur > end)
1370 		cur = NULL;
1371 
1372 	return (cur);
1373 }
1374 
1375 /*
1376  * Construct a key management cookie extension.
1377  */
1378 
1379 static uint8_t *
1380 sadb_make_kmc_ext(uint8_t *cur, uint8_t *end, uint32_t kmp, uint32_t kmc)
1381 {
1382 	sadb_x_kmc_t *kmcext = (sadb_x_kmc_t *)cur;
1383 
1384 	if (cur == NULL)
1385 		return (NULL);
1386 
1387 	cur += sizeof (*kmcext);
1388 
1389 	if (cur > end)
1390 		return (NULL);
1391 
1392 	kmcext->sadb_x_kmc_len = SADB_8TO64(sizeof (*kmcext));
1393 	kmcext->sadb_x_kmc_exttype = SADB_X_EXT_KM_COOKIE;
1394 	kmcext->sadb_x_kmc_proto = kmp;
1395 	kmcext->sadb_x_kmc_cookie = kmc;
1396 	kmcext->sadb_x_kmc_reserved = 0;
1397 
1398 	return (cur);
1399 }
1400 
1401 /*
1402  * Given an original message header with sufficient space following it, and an
1403  * SA, construct a full PF_KEY message with all of the relevant extensions.
1404  * This is mostly used for SADB_GET, and SADB_DUMP.
1405  */
1406 static mblk_t *
1407 sadb_sa2msg(ipsa_t *ipsa, sadb_msg_t *samsg)
1408 {
1409 	int alloclen, addrsize, paddrsize, authsize, encrsize;
1410 	int srcidsize, dstidsize;
1411 	sa_family_t fam, pfam;	/* Address family for SADB_EXT_ADDRESS */
1412 				/* src/dst and proxy sockaddrs. */
1413 	/*
1414 	 * The following are pointers into the PF_KEY message this PF_KEY
1415 	 * message creates.
1416 	 */
1417 	sadb_msg_t *newsamsg;
1418 	sadb_sa_t *assoc;
1419 	sadb_lifetime_t *lt;
1420 	sadb_key_t *key;
1421 	sadb_ident_t *ident;
1422 	sadb_sens_t *sens;
1423 	sadb_ext_t *walker;	/* For when we need a generic ext. pointer. */
1424 	sadb_x_replay_ctr_t *repl_ctr;
1425 	sadb_x_pair_t *pair_ext;
1426 
1427 	mblk_t *mp;
1428 	uint64_t *bitmap;
1429 	uint8_t *cur, *end;
1430 	/* These indicate the presence of the above extension fields. */
1431 	boolean_t soft, hard, isrc, idst, auth, encr, sensinteg, srcid, dstid;
1432 	boolean_t idle;
1433 	boolean_t paired;
1434 	uint32_t otherspi;
1435 
1436 	/* First off, figure out the allocation length for this message. */
1437 
1438 	/*
1439 	 * Constant stuff.  This includes base, SA, address (src, dst),
1440 	 * and lifetime (current).
1441 	 */
1442 	alloclen = sizeof (sadb_msg_t) + sizeof (sadb_sa_t) +
1443 	    sizeof (sadb_lifetime_t);
1444 
1445 	fam = ipsa->ipsa_addrfam;
1446 	switch (fam) {
1447 	case AF_INET:
1448 		addrsize = roundup(sizeof (struct sockaddr_in) +
1449 		    sizeof (sadb_address_t), sizeof (uint64_t));
1450 		break;
1451 	case AF_INET6:
1452 		addrsize = roundup(sizeof (struct sockaddr_in6) +
1453 		    sizeof (sadb_address_t), sizeof (uint64_t));
1454 		break;
1455 	default:
1456 		return (NULL);
1457 	}
1458 	/*
1459 	 * Allocate TWO address extensions, for source and destination.
1460 	 * (Thus, the * 2.)
1461 	 */
1462 	alloclen += addrsize * 2;
1463 	if (ipsa->ipsa_flags & IPSA_F_NATT_REM)
1464 		alloclen += addrsize;
1465 	if (ipsa->ipsa_flags & IPSA_F_NATT_LOC)
1466 		alloclen += addrsize;
1467 
1468 	if (ipsa->ipsa_flags & IPSA_F_PAIRED) {
1469 		paired = B_TRUE;
1470 		alloclen += sizeof (sadb_x_pair_t);
1471 		otherspi = ipsa->ipsa_otherspi;
1472 	} else {
1473 		paired = B_FALSE;
1474 	}
1475 
1476 	/* How 'bout other lifetimes? */
1477 	if (ipsa->ipsa_softaddlt != 0 || ipsa->ipsa_softuselt != 0 ||
1478 	    ipsa->ipsa_softbyteslt != 0 || ipsa->ipsa_softalloc != 0) {
1479 		alloclen += sizeof (sadb_lifetime_t);
1480 		soft = B_TRUE;
1481 	} else {
1482 		soft = B_FALSE;
1483 	}
1484 
1485 	if (ipsa->ipsa_hardaddlt != 0 || ipsa->ipsa_harduselt != 0 ||
1486 	    ipsa->ipsa_hardbyteslt != 0 || ipsa->ipsa_hardalloc != 0) {
1487 		alloclen += sizeof (sadb_lifetime_t);
1488 		hard = B_TRUE;
1489 	} else {
1490 		hard = B_FALSE;
1491 	}
1492 
1493 	if (ipsa->ipsa_idleaddlt != 0 || ipsa->ipsa_idleuselt != 0) {
1494 		alloclen += sizeof (sadb_lifetime_t);
1495 		idle = B_TRUE;
1496 	} else {
1497 		idle = B_FALSE;
1498 	}
1499 
1500 	/* Inner addresses. */
1501 	if (ipsa->ipsa_innerfam == 0) {
1502 		isrc = B_FALSE;
1503 		idst = B_FALSE;
1504 	} else {
1505 		pfam = ipsa->ipsa_innerfam;
1506 		switch (pfam) {
1507 		case AF_INET6:
1508 			paddrsize = roundup(sizeof (struct sockaddr_in6) +
1509 			    sizeof (sadb_address_t), sizeof (uint64_t));
1510 			break;
1511 		case AF_INET:
1512 			paddrsize = roundup(sizeof (struct sockaddr_in) +
1513 			    sizeof (sadb_address_t), sizeof (uint64_t));
1514 			break;
1515 		default:
1516 			cmn_err(CE_PANIC,
1517 			    "IPsec SADB: Proxy length failure.\n");
1518 			break;
1519 		}
1520 		isrc = B_TRUE;
1521 		idst = B_TRUE;
1522 		alloclen += 2 * paddrsize;
1523 	}
1524 
1525 	/* For the following fields, assume that length != 0 ==> stuff */
1526 	if (ipsa->ipsa_authkeylen != 0) {
1527 		authsize = roundup(sizeof (sadb_key_t) + ipsa->ipsa_authkeylen,
1528 		    sizeof (uint64_t));
1529 		alloclen += authsize;
1530 		auth = B_TRUE;
1531 	} else {
1532 		auth = B_FALSE;
1533 	}
1534 
1535 	if (ipsa->ipsa_encrkeylen != 0) {
1536 		encrsize = roundup(sizeof (sadb_key_t) + ipsa->ipsa_encrkeylen +
1537 		    ipsa->ipsa_nonce_len, sizeof (uint64_t));
1538 		alloclen += encrsize;
1539 		encr = B_TRUE;
1540 	} else {
1541 		encr = B_FALSE;
1542 	}
1543 
1544 	/* No need for roundup on sens and integ. */
1545 	if (ipsa->ipsa_integlen != 0 || ipsa->ipsa_senslen != 0) {
1546 		alloclen += sizeof (sadb_key_t) + ipsa->ipsa_integlen +
1547 		    ipsa->ipsa_senslen;
1548 		sensinteg = B_TRUE;
1549 	} else {
1550 		sensinteg = B_FALSE;
1551 	}
1552 
1553 	/*
1554 	 * Must use strlen() here for lengths.	Identities use NULL
1555 	 * pointers to indicate their nonexistence.
1556 	 */
1557 	if (ipsa->ipsa_src_cid != NULL) {
1558 		srcidsize = roundup(sizeof (sadb_ident_t) +
1559 		    strlen(ipsa->ipsa_src_cid->ipsid_cid) + 1,
1560 		    sizeof (uint64_t));
1561 		alloclen += srcidsize;
1562 		srcid = B_TRUE;
1563 	} else {
1564 		srcid = B_FALSE;
1565 	}
1566 
1567 	if (ipsa->ipsa_dst_cid != NULL) {
1568 		dstidsize = roundup(sizeof (sadb_ident_t) +
1569 		    strlen(ipsa->ipsa_dst_cid->ipsid_cid) + 1,
1570 		    sizeof (uint64_t));
1571 		alloclen += dstidsize;
1572 		dstid = B_TRUE;
1573 	} else {
1574 		dstid = B_FALSE;
1575 	}
1576 
1577 	if ((ipsa->ipsa_kmp != 0) || (ipsa->ipsa_kmc != 0))
1578 		alloclen += sizeof (sadb_x_kmc_t);
1579 
1580 	if (ipsa->ipsa_replay != 0) {
1581 		alloclen += sizeof (sadb_x_replay_ctr_t);
1582 	}
1583 
1584 	/* Make sure the allocation length is a multiple of 8 bytes. */
1585 	ASSERT((alloclen & 0x7) == 0);
1586 
1587 	/* XXX Possibly make it esballoc, with a bzero-ing free_ftn. */
1588 	mp = allocb(alloclen, BPRI_HI);
1589 	if (mp == NULL)
1590 		return (NULL);
1591 
1592 	mp->b_wptr += alloclen;
1593 	end = mp->b_wptr;
1594 	newsamsg = (sadb_msg_t *)mp->b_rptr;
1595 	*newsamsg = *samsg;
1596 	newsamsg->sadb_msg_len = (uint16_t)SADB_8TO64(alloclen);
1597 
1598 	mutex_enter(&ipsa->ipsa_lock);	/* Since I'm grabbing SA fields... */
1599 
1600 	newsamsg->sadb_msg_satype = ipsa->ipsa_type;
1601 
1602 	assoc = (sadb_sa_t *)(newsamsg + 1);
1603 	assoc->sadb_sa_len = SADB_8TO64(sizeof (*assoc));
1604 	assoc->sadb_sa_exttype = SADB_EXT_SA;
1605 	assoc->sadb_sa_spi = ipsa->ipsa_spi;
1606 	assoc->sadb_sa_replay = ipsa->ipsa_replay_wsize;
1607 	assoc->sadb_sa_state = ipsa->ipsa_state;
1608 	assoc->sadb_sa_auth = ipsa->ipsa_auth_alg;
1609 	assoc->sadb_sa_encrypt = ipsa->ipsa_encr_alg;
1610 	assoc->sadb_sa_flags = ipsa->ipsa_flags;
1611 
1612 	lt = (sadb_lifetime_t *)(assoc + 1);
1613 	lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1614 	lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
1615 	/* We do not support the concept. */
1616 	lt->sadb_lifetime_allocations = 0;
1617 	lt->sadb_lifetime_bytes = ipsa->ipsa_bytes;
1618 	lt->sadb_lifetime_addtime = ipsa->ipsa_addtime;
1619 	lt->sadb_lifetime_usetime = ipsa->ipsa_usetime;
1620 
1621 	if (hard) {
1622 		lt++;
1623 		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1624 		lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
1625 		lt->sadb_lifetime_allocations = ipsa->ipsa_hardalloc;
1626 		lt->sadb_lifetime_bytes = ipsa->ipsa_hardbyteslt;
1627 		lt->sadb_lifetime_addtime = ipsa->ipsa_hardaddlt;
1628 		lt->sadb_lifetime_usetime = ipsa->ipsa_harduselt;
1629 	}
1630 
1631 	if (soft) {
1632 		lt++;
1633 		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1634 		lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
1635 		lt->sadb_lifetime_allocations = ipsa->ipsa_softalloc;
1636 		lt->sadb_lifetime_bytes = ipsa->ipsa_softbyteslt;
1637 		lt->sadb_lifetime_addtime = ipsa->ipsa_softaddlt;
1638 		lt->sadb_lifetime_usetime = ipsa->ipsa_softuselt;
1639 	}
1640 
1641 	if (idle) {
1642 		lt++;
1643 		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1644 		lt->sadb_lifetime_exttype = SADB_X_EXT_LIFETIME_IDLE;
1645 		lt->sadb_lifetime_addtime = ipsa->ipsa_idleaddlt;
1646 		lt->sadb_lifetime_usetime = ipsa->ipsa_idleuselt;
1647 	}
1648 
1649 	cur = (uint8_t *)(lt + 1);
1650 
1651 	/* NOTE:  Don't fill in ports here if we are a tunnel-mode SA. */
1652 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, fam,
1653 	    ipsa->ipsa_srcaddr, (!isrc && !idst) ? SA_SRCPORT(ipsa) : 0,
1654 	    SA_PROTO(ipsa), 0);
1655 	if (cur == NULL) {
1656 		freemsg(mp);
1657 		mp = NULL;
1658 		goto bail;
1659 	}
1660 
1661 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, fam,
1662 	    ipsa->ipsa_dstaddr, (!isrc && !idst) ? SA_DSTPORT(ipsa) : 0,
1663 	    SA_PROTO(ipsa), 0);
1664 	if (cur == NULL) {
1665 		freemsg(mp);
1666 		mp = NULL;
1667 		goto bail;
1668 	}
1669 
1670 	if (ipsa->ipsa_flags & IPSA_F_NATT_LOC) {
1671 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_NATT_LOC,
1672 		    fam, &ipsa->ipsa_natt_addr_loc, ipsa->ipsa_local_nat_port,
1673 		    IPPROTO_UDP, 0);
1674 		if (cur == NULL) {
1675 			freemsg(mp);
1676 			mp = NULL;
1677 			goto bail;
1678 		}
1679 	}
1680 
1681 	if (ipsa->ipsa_flags & IPSA_F_NATT_REM) {
1682 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_NATT_REM,
1683 		    fam, &ipsa->ipsa_natt_addr_rem, ipsa->ipsa_remote_nat_port,
1684 		    IPPROTO_UDP, 0);
1685 		if (cur == NULL) {
1686 			freemsg(mp);
1687 			mp = NULL;
1688 			goto bail;
1689 		}
1690 	}
1691 
1692 	/* If we are a tunnel-mode SA, fill in the inner-selectors. */
1693 	if (isrc) {
1694 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
1695 		    pfam, ipsa->ipsa_innersrc, SA_SRCPORT(ipsa),
1696 		    SA_IPROTO(ipsa), ipsa->ipsa_innersrcpfx);
1697 		if (cur == NULL) {
1698 			freemsg(mp);
1699 			mp = NULL;
1700 			goto bail;
1701 		}
1702 	}
1703 
1704 	if (idst) {
1705 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
1706 		    pfam, ipsa->ipsa_innerdst, SA_DSTPORT(ipsa),
1707 		    SA_IPROTO(ipsa), ipsa->ipsa_innerdstpfx);
1708 		if (cur == NULL) {
1709 			freemsg(mp);
1710 			mp = NULL;
1711 			goto bail;
1712 		}
1713 	}
1714 
1715 	if ((ipsa->ipsa_kmp != 0) || (ipsa->ipsa_kmc != 0)) {
1716 		cur = sadb_make_kmc_ext(cur, end,
1717 		    ipsa->ipsa_kmp, ipsa->ipsa_kmc);
1718 		if (cur == NULL) {
1719 			freemsg(mp);
1720 			mp = NULL;
1721 			goto bail;
1722 		}
1723 	}
1724 
1725 	walker = (sadb_ext_t *)cur;
1726 	if (auth) {
1727 		key = (sadb_key_t *)walker;
1728 		key->sadb_key_len = SADB_8TO64(authsize);
1729 		key->sadb_key_exttype = SADB_EXT_KEY_AUTH;
1730 		key->sadb_key_bits = ipsa->ipsa_authkeybits;
1731 		key->sadb_key_reserved = 0;
1732 		bcopy(ipsa->ipsa_authkey, key + 1, ipsa->ipsa_authkeylen);
1733 		walker = (sadb_ext_t *)((uint64_t *)walker +
1734 		    walker->sadb_ext_len);
1735 	}
1736 
1737 	if (encr) {
1738 		uint8_t *buf_ptr;
1739 		key = (sadb_key_t *)walker;
1740 		key->sadb_key_len = SADB_8TO64(encrsize);
1741 		key->sadb_key_exttype = SADB_EXT_KEY_ENCRYPT;
1742 		key->sadb_key_bits = ipsa->ipsa_encrkeybits;
1743 		key->sadb_key_reserved = ipsa->ipsa_saltbits;
1744 		buf_ptr = (uint8_t *)(key + 1);
1745 		bcopy(ipsa->ipsa_encrkey, buf_ptr, ipsa->ipsa_encrkeylen);
1746 		if (ipsa->ipsa_salt != NULL) {
1747 			buf_ptr += ipsa->ipsa_encrkeylen;
1748 			bcopy(ipsa->ipsa_salt, buf_ptr, ipsa->ipsa_saltlen);
1749 		}
1750 		walker = (sadb_ext_t *)((uint64_t *)walker +
1751 		    walker->sadb_ext_len);
1752 	}
1753 
1754 	if (srcid) {
1755 		ident = (sadb_ident_t *)walker;
1756 		ident->sadb_ident_len = SADB_8TO64(srcidsize);
1757 		ident->sadb_ident_exttype = SADB_EXT_IDENTITY_SRC;
1758 		ident->sadb_ident_type = ipsa->ipsa_src_cid->ipsid_type;
1759 		ident->sadb_ident_id = 0;
1760 		ident->sadb_ident_reserved = 0;
1761 		(void) strcpy((char *)(ident + 1),
1762 		    ipsa->ipsa_src_cid->ipsid_cid);
1763 		walker = (sadb_ext_t *)((uint64_t *)walker +
1764 		    walker->sadb_ext_len);
1765 	}
1766 
1767 	if (dstid) {
1768 		ident = (sadb_ident_t *)walker;
1769 		ident->sadb_ident_len = SADB_8TO64(dstidsize);
1770 		ident->sadb_ident_exttype = SADB_EXT_IDENTITY_DST;
1771 		ident->sadb_ident_type = ipsa->ipsa_dst_cid->ipsid_type;
1772 		ident->sadb_ident_id = 0;
1773 		ident->sadb_ident_reserved = 0;
1774 		(void) strcpy((char *)(ident + 1),
1775 		    ipsa->ipsa_dst_cid->ipsid_cid);
1776 		walker = (sadb_ext_t *)((uint64_t *)walker +
1777 		    walker->sadb_ext_len);
1778 	}
1779 
1780 	if (sensinteg) {
1781 		sens = (sadb_sens_t *)walker;
1782 		sens->sadb_sens_len = SADB_8TO64(sizeof (sadb_sens_t *) +
1783 		    ipsa->ipsa_senslen + ipsa->ipsa_integlen);
1784 		sens->sadb_sens_dpd = ipsa->ipsa_dpd;
1785 		sens->sadb_sens_sens_level = ipsa->ipsa_senslevel;
1786 		sens->sadb_sens_integ_level = ipsa->ipsa_integlevel;
1787 		sens->sadb_sens_sens_len = SADB_8TO64(ipsa->ipsa_senslen);
1788 		sens->sadb_sens_integ_len = SADB_8TO64(ipsa->ipsa_integlen);
1789 		sens->sadb_sens_reserved = 0;
1790 		bitmap = (uint64_t *)(sens + 1);
1791 		if (ipsa->ipsa_sens != NULL) {
1792 			bcopy(ipsa->ipsa_sens, bitmap, ipsa->ipsa_senslen);
1793 			bitmap += sens->sadb_sens_sens_len;
1794 		}
1795 		if (ipsa->ipsa_integ != NULL)
1796 			bcopy(ipsa->ipsa_integ, bitmap, ipsa->ipsa_integlen);
1797 		walker = (sadb_ext_t *)((uint64_t *)walker +
1798 		    walker->sadb_ext_len);
1799 	}
1800 
1801 	if (paired) {
1802 		pair_ext = (sadb_x_pair_t *)walker;
1803 
1804 		pair_ext->sadb_x_pair_len = SADB_8TO64(sizeof (sadb_x_pair_t));
1805 		pair_ext->sadb_x_pair_exttype = SADB_X_EXT_PAIR;
1806 		pair_ext->sadb_x_pair_spi = otherspi;
1807 
1808 		walker = (sadb_ext_t *)((uint64_t *)walker +
1809 		    walker->sadb_ext_len);
1810 	}
1811 
1812 	if (ipsa->ipsa_replay != 0) {
1813 		repl_ctr = (sadb_x_replay_ctr_t *)walker;
1814 		repl_ctr->sadb_x_rc_len = SADB_8TO64(sizeof (*repl_ctr));
1815 		repl_ctr->sadb_x_rc_exttype = SADB_X_EXT_REPLAY_VALUE;
1816 		repl_ctr->sadb_x_rc_replay32 = ipsa->ipsa_replay;
1817 		repl_ctr->sadb_x_rc_replay64 = 0;
1818 		walker = (sadb_ext_t *)(repl_ctr + 1);
1819 	}
1820 
1821 bail:
1822 	/* Pardon any delays... */
1823 	mutex_exit(&ipsa->ipsa_lock);
1824 
1825 	return (mp);
1826 }
1827 
1828 /*
1829  * Strip out key headers or unmarked headers (SADB_EXT_KEY_*, SADB_EXT_UNKNOWN)
1830  * and adjust base message accordingly.
1831  *
1832  * Assume message is pulled up in one piece of contiguous memory.
1833  *
1834  * Say if we start off with:
1835  *
1836  * +------+----+-------------+-----------+---------------+---------------+
1837  * | base | SA | source addr | dest addr | rsrvd. or key | soft lifetime |
1838  * +------+----+-------------+-----------+---------------+---------------+
1839  *
1840  * we will end up with
1841  *
1842  * +------+----+-------------+-----------+---------------+
1843  * | base | SA | source addr | dest addr | soft lifetime |
1844  * +------+----+-------------+-----------+---------------+
1845  */
1846 static void
1847 sadb_strip(sadb_msg_t *samsg)
1848 {
1849 	sadb_ext_t *ext;
1850 	uint8_t *target = NULL;
1851 	uint8_t *msgend;
1852 	int sofar = SADB_8TO64(sizeof (*samsg));
1853 	int copylen;
1854 
1855 	ext = (sadb_ext_t *)(samsg + 1);
1856 	msgend = (uint8_t *)samsg;
1857 	msgend += SADB_64TO8(samsg->sadb_msg_len);
1858 	while ((uint8_t *)ext < msgend) {
1859 		if (ext->sadb_ext_type == SADB_EXT_RESERVED ||
1860 		    ext->sadb_ext_type == SADB_EXT_KEY_AUTH ||
1861 		    ext->sadb_ext_type == SADB_X_EXT_EDUMP ||
1862 		    ext->sadb_ext_type == SADB_EXT_KEY_ENCRYPT) {
1863 			/*
1864 			 * Aha!	 I found a header to be erased.
1865 			 */
1866 
1867 			if (target != NULL) {
1868 				/*
1869 				 * If I had a previous header to be erased,
1870 				 * copy over it.  I can get away with just
1871 				 * copying backwards because the target will
1872 				 * always be 8 bytes behind the source.
1873 				 */
1874 				copylen = ((uint8_t *)ext) - (target +
1875 				    SADB_64TO8(
1876 				    ((sadb_ext_t *)target)->sadb_ext_len));
1877 				ovbcopy(((uint8_t *)ext - copylen), target,
1878 				    copylen);
1879 				target += copylen;
1880 				((sadb_ext_t *)target)->sadb_ext_len =
1881 				    SADB_8TO64(((uint8_t *)ext) - target +
1882 				    SADB_64TO8(ext->sadb_ext_len));
1883 			} else {
1884 				target = (uint8_t *)ext;
1885 			}
1886 		} else {
1887 			sofar += ext->sadb_ext_len;
1888 		}
1889 
1890 		ext = (sadb_ext_t *)(((uint64_t *)ext) + ext->sadb_ext_len);
1891 	}
1892 
1893 	ASSERT((uint8_t *)ext == msgend);
1894 
1895 	if (target != NULL) {
1896 		copylen = ((uint8_t *)ext) - (target +
1897 		    SADB_64TO8(((sadb_ext_t *)target)->sadb_ext_len));
1898 		if (copylen != 0)
1899 			ovbcopy(((uint8_t *)ext - copylen), target, copylen);
1900 	}
1901 
1902 	/* Adjust samsg. */
1903 	samsg->sadb_msg_len = (uint16_t)sofar;
1904 
1905 	/* Assume all of the rest is cleared by caller in sadb_pfkey_echo(). */
1906 }
1907 
1908 /*
1909  * AH needs to send an error to PF_KEY.	 Assume mp points to an M_CTL
1910  * followed by an M_DATA with a PF_KEY message in it.  The serial of
1911  * the sending keysock instance is included.
1912  */
1913 void
1914 sadb_pfkey_error(queue_t *pfkey_q, mblk_t *mp, int error, int diagnostic,
1915     uint_t serial)
1916 {
1917 	mblk_t *msg = mp->b_cont;
1918 	sadb_msg_t *samsg;
1919 	keysock_out_t *kso;
1920 
1921 	/*
1922 	 * Enough functions call this to merit a NULL queue check.
1923 	 */
1924 	if (pfkey_q == NULL) {
1925 		freemsg(mp);
1926 		return;
1927 	}
1928 
1929 	ASSERT(msg != NULL);
1930 	ASSERT((mp->b_wptr - mp->b_rptr) == sizeof (ipsec_info_t));
1931 	ASSERT((msg->b_wptr - msg->b_rptr) >= sizeof (sadb_msg_t));
1932 	samsg = (sadb_msg_t *)msg->b_rptr;
1933 	kso = (keysock_out_t *)mp->b_rptr;
1934 
1935 	kso->ks_out_type = KEYSOCK_OUT;
1936 	kso->ks_out_len = sizeof (*kso);
1937 	kso->ks_out_serial = serial;
1938 
1939 	/*
1940 	 * Only send the base message up in the event of an error.
1941 	 * Don't worry about bzero()-ing, because it was probably bogus
1942 	 * anyway.
1943 	 */
1944 	msg->b_wptr = msg->b_rptr + sizeof (*samsg);
1945 	samsg = (sadb_msg_t *)msg->b_rptr;
1946 	samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
1947 	samsg->sadb_msg_errno = (uint8_t)error;
1948 	if (diagnostic != SADB_X_DIAGNOSTIC_PRESET)
1949 		samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
1950 
1951 	putnext(pfkey_q, mp);
1952 }
1953 
1954 /*
1955  * Send a successful return packet back to keysock via the queue in pfkey_q.
1956  *
1957  * Often, an SA is associated with the reply message, it's passed in if needed,
1958  * and NULL if not.  BTW, that ipsa will have its refcnt appropriately held,
1959  * and the caller will release said refcnt.
1960  */
1961 void
1962 sadb_pfkey_echo(queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg,
1963     keysock_in_t *ksi, ipsa_t *ipsa)
1964 {
1965 	keysock_out_t *kso;
1966 	mblk_t *mp1;
1967 	sadb_msg_t *newsamsg;
1968 	uint8_t *oldend;
1969 
1970 	ASSERT((mp->b_cont != NULL) &&
1971 	    ((void *)samsg == (void *)mp->b_cont->b_rptr) &&
1972 	    ((void *)mp->b_rptr == (void *)ksi));
1973 
1974 	switch (samsg->sadb_msg_type) {
1975 	case SADB_ADD:
1976 	case SADB_UPDATE:
1977 	case SADB_X_UPDATEPAIR:
1978 	case SADB_X_DELPAIR_STATE:
1979 	case SADB_FLUSH:
1980 	case SADB_DUMP:
1981 		/*
1982 		 * I have all of the message already.  I just need to strip
1983 		 * out the keying material and echo the message back.
1984 		 *
1985 		 * NOTE: for SADB_DUMP, the function sadb_dump() did the
1986 		 * work.  When DUMP reaches here, it should only be a base
1987 		 * message.
1988 		 */
1989 	justecho:
1990 		if (ksi->ks_in_extv[SADB_EXT_KEY_AUTH] != NULL ||
1991 		    ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT] != NULL ||
1992 		    ksi->ks_in_extv[SADB_X_EXT_EDUMP] != NULL) {
1993 			sadb_strip(samsg);
1994 			/* Assume PF_KEY message is contiguous. */
1995 			ASSERT(mp->b_cont->b_cont == NULL);
1996 			oldend = mp->b_cont->b_wptr;
1997 			mp->b_cont->b_wptr = mp->b_cont->b_rptr +
1998 			    SADB_64TO8(samsg->sadb_msg_len);
1999 			bzero(mp->b_cont->b_wptr, oldend - mp->b_cont->b_wptr);
2000 		}
2001 		break;
2002 	case SADB_GET:
2003 		/*
2004 		 * Do a lot of work here, because of the ipsa I just found.
2005 		 * First construct the new PF_KEY message, then abandon
2006 		 * the old one.
2007 		 */
2008 		mp1 = sadb_sa2msg(ipsa, samsg);
2009 		if (mp1 == NULL) {
2010 			sadb_pfkey_error(pfkey_q, mp, ENOMEM,
2011 			    SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
2012 			return;
2013 		}
2014 		freemsg(mp->b_cont);
2015 		mp->b_cont = mp1;
2016 		break;
2017 	case SADB_DELETE:
2018 	case SADB_X_DELPAIR:
2019 		if (ipsa == NULL)
2020 			goto justecho;
2021 		/*
2022 		 * Because listening KMds may require more info, treat
2023 		 * DELETE like a special case of GET.
2024 		 */
2025 		mp1 = sadb_sa2msg(ipsa, samsg);
2026 		if (mp1 == NULL) {
2027 			sadb_pfkey_error(pfkey_q, mp, ENOMEM,
2028 			    SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
2029 			return;
2030 		}
2031 		newsamsg = (sadb_msg_t *)mp1->b_rptr;
2032 		sadb_strip(newsamsg);
2033 		oldend = mp1->b_wptr;
2034 		mp1->b_wptr = mp1->b_rptr + SADB_64TO8(newsamsg->sadb_msg_len);
2035 		bzero(mp1->b_wptr, oldend - mp1->b_wptr);
2036 		freemsg(mp->b_cont);
2037 		mp->b_cont = mp1;
2038 		break;
2039 	default:
2040 		if (mp != NULL)
2041 			freemsg(mp);
2042 		return;
2043 	}
2044 
2045 	/* ksi is now null and void. */
2046 	kso = (keysock_out_t *)ksi;
2047 	kso->ks_out_type = KEYSOCK_OUT;
2048 	kso->ks_out_len = sizeof (*kso);
2049 	kso->ks_out_serial = ksi->ks_in_serial;
2050 	/* We're ready to send... */
2051 	putnext(pfkey_q, mp);
2052 }
2053 
2054 /*
2055  * Set up a global pfkey_q instance for AH, ESP, or some other consumer.
2056  */
2057 void
2058 sadb_keysock_hello(queue_t **pfkey_qp, queue_t *q, mblk_t *mp,
2059     void (*ager)(void *), void *agerarg, timeout_id_t *top, int satype)
2060 {
2061 	keysock_hello_ack_t *kha;
2062 	queue_t *oldq;
2063 
2064 	ASSERT(OTHERQ(q) != NULL);
2065 
2066 	/*
2067 	 * First, check atomically that I'm the first and only keysock
2068 	 * instance.
2069 	 *
2070 	 * Use OTHERQ(q), because qreply(q, mp) == putnext(OTHERQ(q), mp),
2071 	 * and I want this module to say putnext(*_pfkey_q, mp) for PF_KEY
2072 	 * messages.
2073 	 */
2074 
2075 	oldq = casptr((void **)pfkey_qp, NULL, OTHERQ(q));
2076 	if (oldq != NULL) {
2077 		ASSERT(oldq != q);
2078 		cmn_err(CE_WARN, "Danger!  Multiple keysocks on top of %s.\n",
2079 		    (satype == SADB_SATYPE_ESP)? "ESP" : "AH or other");
2080 		freemsg(mp);
2081 		return;
2082 	}
2083 
2084 	kha = (keysock_hello_ack_t *)mp->b_rptr;
2085 	kha->ks_hello_len = sizeof (keysock_hello_ack_t);
2086 	kha->ks_hello_type = KEYSOCK_HELLO_ACK;
2087 	kha->ks_hello_satype = (uint8_t)satype;
2088 
2089 	/*
2090 	 * If we made it past the casptr, then we have "exclusive" access
2091 	 * to the timeout handle.  Fire it off after the default ager
2092 	 * interval.
2093 	 */
2094 	*top = qtimeout(*pfkey_qp, ager, agerarg,
2095 	    drv_usectohz(SADB_AGE_INTERVAL_DEFAULT * 1000));
2096 
2097 	putnext(*pfkey_qp, mp);
2098 }
2099 
2100 /*
2101  * Normalize IPv4-mapped IPv6 addresses (and prefixes) as appropriate.
2102  *
2103  * Check addresses themselves for wildcard or multicast.
2104  * Check ire table for local/non-local/broadcast.
2105  */
2106 int
2107 sadb_addrcheck(queue_t *pfkey_q, mblk_t *mp, sadb_ext_t *ext, uint_t serial,
2108     netstack_t *ns)
2109 {
2110 	sadb_address_t *addr = (sadb_address_t *)ext;
2111 	struct sockaddr_in *sin;
2112 	struct sockaddr_in6 *sin6;
2113 	ire_t *ire;
2114 	int diagnostic, type;
2115 	boolean_t normalized = B_FALSE;
2116 
2117 	ASSERT(ext != NULL);
2118 	ASSERT((ext->sadb_ext_type == SADB_EXT_ADDRESS_SRC) ||
2119 	    (ext->sadb_ext_type == SADB_EXT_ADDRESS_DST) ||
2120 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC) ||
2121 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_DST) ||
2122 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_LOC) ||
2123 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_REM));
2124 
2125 	/* Assign both sockaddrs, the compiler will do the right thing. */
2126 	sin = (struct sockaddr_in *)(addr + 1);
2127 	sin6 = (struct sockaddr_in6 *)(addr + 1);
2128 
2129 	if (sin6->sin6_family == AF_INET6) {
2130 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
2131 			/*
2132 			 * Convert to an AF_INET sockaddr.  This means the
2133 			 * return messages will have the extra space, but have
2134 			 * AF_INET sockaddrs instead of AF_INET6.
2135 			 *
2136 			 * Yes, RFC 2367 isn't clear on what to do here w.r.t.
2137 			 * mapped addresses, but since AF_INET6 ::ffff:<v4> is
2138 			 * equal to AF_INET <v4>, it shouldnt be a huge
2139 			 * problem.
2140 			 */
2141 			sin->sin_family = AF_INET;
2142 			IN6_V4MAPPED_TO_INADDR(&sin6->sin6_addr,
2143 			    &sin->sin_addr);
2144 			bzero(&sin->sin_zero, sizeof (sin->sin_zero));
2145 			normalized = B_TRUE;
2146 		}
2147 	} else if (sin->sin_family != AF_INET) {
2148 		switch (ext->sadb_ext_type) {
2149 		case SADB_EXT_ADDRESS_SRC:
2150 			diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC_AF;
2151 			break;
2152 		case SADB_EXT_ADDRESS_DST:
2153 			diagnostic = SADB_X_DIAGNOSTIC_BAD_DST_AF;
2154 			break;
2155 		case SADB_X_EXT_ADDRESS_INNER_SRC:
2156 			diagnostic = SADB_X_DIAGNOSTIC_BAD_PROXY_AF;
2157 			break;
2158 		case SADB_X_EXT_ADDRESS_INNER_DST:
2159 			diagnostic = SADB_X_DIAGNOSTIC_BAD_INNER_DST_AF;
2160 			break;
2161 		case SADB_X_EXT_ADDRESS_NATT_LOC:
2162 			diagnostic = SADB_X_DIAGNOSTIC_BAD_NATT_LOC_AF;
2163 			break;
2164 		case SADB_X_EXT_ADDRESS_NATT_REM:
2165 			diagnostic = SADB_X_DIAGNOSTIC_BAD_NATT_REM_AF;
2166 			break;
2167 			/* There is no default, see above ASSERT. */
2168 		}
2169 bail:
2170 		if (pfkey_q != NULL) {
2171 			sadb_pfkey_error(pfkey_q, mp, EINVAL, diagnostic,
2172 			    serial);
2173 		} else {
2174 			/*
2175 			 * Scribble in sadb_msg that we got passed in.
2176 			 * Overload "mp" to be an sadb_msg pointer.
2177 			 */
2178 			sadb_msg_t *samsg = (sadb_msg_t *)mp;
2179 
2180 			samsg->sadb_msg_errno = EINVAL;
2181 			samsg->sadb_x_msg_diagnostic = diagnostic;
2182 		}
2183 		return (KS_IN_ADDR_UNKNOWN);
2184 	}
2185 
2186 	if (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC ||
2187 	    ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_DST) {
2188 		/*
2189 		 * We need only check for prefix issues.
2190 		 */
2191 
2192 		/* Set diagnostic now, in case we need it later. */
2193 		diagnostic =
2194 		    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC) ?
2195 		    SADB_X_DIAGNOSTIC_PREFIX_INNER_SRC :
2196 		    SADB_X_DIAGNOSTIC_PREFIX_INNER_DST;
2197 
2198 		if (normalized)
2199 			addr->sadb_address_prefixlen -= 96;
2200 
2201 		/*
2202 		 * Verify and mask out inner-addresses based on prefix length.
2203 		 */
2204 		if (sin->sin_family == AF_INET) {
2205 			if (addr->sadb_address_prefixlen > 32)
2206 				goto bail;
2207 			sin->sin_addr.s_addr &=
2208 			    ip_plen_to_mask(addr->sadb_address_prefixlen);
2209 		} else {
2210 			in6_addr_t mask;
2211 
2212 			ASSERT(sin->sin_family == AF_INET6);
2213 			/*
2214 			 * ip_plen_to_mask_v6() returns NULL if the value in
2215 			 * question is out of range.
2216 			 */
2217 			if (ip_plen_to_mask_v6(addr->sadb_address_prefixlen,
2218 			    &mask) == NULL)
2219 				goto bail;
2220 			sin6->sin6_addr.s6_addr32[0] &= mask.s6_addr32[0];
2221 			sin6->sin6_addr.s6_addr32[1] &= mask.s6_addr32[1];
2222 			sin6->sin6_addr.s6_addr32[2] &= mask.s6_addr32[2];
2223 			sin6->sin6_addr.s6_addr32[3] &= mask.s6_addr32[3];
2224 		}
2225 
2226 		/* We don't care in these cases. */
2227 		return (KS_IN_ADDR_DONTCARE);
2228 	}
2229 
2230 	if (sin->sin_family == AF_INET6) {
2231 		/* Check the easy ones now. */
2232 		if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
2233 			return (KS_IN_ADDR_MBCAST);
2234 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
2235 			return (KS_IN_ADDR_UNSPEC);
2236 		/*
2237 		 * At this point, we're a unicast IPv6 address.
2238 		 *
2239 		 * A ctable lookup for local is sufficient here.  If we're
2240 		 * local, return KS_IN_ADDR_ME, otherwise KS_IN_ADDR_NOTME.
2241 		 *
2242 		 * XXX Zones alert -> me/notme decision needs to be tempered
2243 		 * by what zone we're in when we go to zone-aware IPsec.
2244 		 */
2245 		ire = ire_ctable_lookup_v6(&sin6->sin6_addr, NULL,
2246 		    IRE_LOCAL, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE,
2247 		    ns->netstack_ip);
2248 		if (ire != NULL) {
2249 			/* Hey hey, it's local. */
2250 			IRE_REFRELE(ire);
2251 			return (KS_IN_ADDR_ME);
2252 		}
2253 	} else {
2254 		ASSERT(sin->sin_family == AF_INET);
2255 		if (sin->sin_addr.s_addr == INADDR_ANY)
2256 			return (KS_IN_ADDR_UNSPEC);
2257 		if (CLASSD(sin->sin_addr.s_addr))
2258 			return (KS_IN_ADDR_MBCAST);
2259 		/*
2260 		 * At this point we're a unicast or broadcast IPv4 address.
2261 		 *
2262 		 * Lookup on the ctable for IRE_BROADCAST or IRE_LOCAL.
2263 		 * A NULL return value is NOTME, otherwise, look at the
2264 		 * returned ire for broadcast or not and return accordingly.
2265 		 *
2266 		 * XXX Zones alert -> me/notme decision needs to be tempered
2267 		 * by what zone we're in when we go to zone-aware IPsec.
2268 		 */
2269 		ire = ire_ctable_lookup(sin->sin_addr.s_addr, 0,
2270 		    IRE_LOCAL | IRE_BROADCAST, NULL, ALL_ZONES, NULL,
2271 		    MATCH_IRE_TYPE, ns->netstack_ip);
2272 		if (ire != NULL) {
2273 			/* Check for local or broadcast */
2274 			type = ire->ire_type;
2275 			IRE_REFRELE(ire);
2276 			ASSERT(type == IRE_LOCAL || type == IRE_BROADCAST);
2277 			return ((type == IRE_LOCAL) ? KS_IN_ADDR_ME :
2278 			    KS_IN_ADDR_MBCAST);
2279 		}
2280 	}
2281 
2282 	return (KS_IN_ADDR_NOTME);
2283 }
2284 
2285 /*
2286  * Address normalizations and reality checks for inbound PF_KEY messages.
2287  *
2288  * For the case of src == unspecified AF_INET6, and dst == AF_INET, convert
2289  * the source to AF_INET.  Do the same for the inner sources.
2290  */
2291 boolean_t
2292 sadb_addrfix(keysock_in_t *ksi, queue_t *pfkey_q, mblk_t *mp, netstack_t *ns)
2293 {
2294 	struct sockaddr_in *src, *isrc;
2295 	struct sockaddr_in6 *dst, *idst;
2296 	sadb_address_t *srcext, *dstext;
2297 	uint16_t sport;
2298 	sadb_ext_t **extv = ksi->ks_in_extv;
2299 	int rc;
2300 
2301 	if (extv[SADB_EXT_ADDRESS_SRC] != NULL) {
2302 		rc = sadb_addrcheck(pfkey_q, mp, extv[SADB_EXT_ADDRESS_SRC],
2303 		    ksi->ks_in_serial, ns);
2304 		if (rc == KS_IN_ADDR_UNKNOWN)
2305 			return (B_FALSE);
2306 		if (rc == KS_IN_ADDR_MBCAST) {
2307 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2308 			    SADB_X_DIAGNOSTIC_BAD_SRC, ksi->ks_in_serial);
2309 			return (B_FALSE);
2310 		}
2311 		ksi->ks_in_srctype = rc;
2312 	}
2313 
2314 	if (extv[SADB_EXT_ADDRESS_DST] != NULL) {
2315 		rc = sadb_addrcheck(pfkey_q, mp, extv[SADB_EXT_ADDRESS_DST],
2316 		    ksi->ks_in_serial, ns);
2317 		if (rc == KS_IN_ADDR_UNKNOWN)
2318 			return (B_FALSE);
2319 		if (rc == KS_IN_ADDR_UNSPEC) {
2320 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2321 			    SADB_X_DIAGNOSTIC_BAD_DST, ksi->ks_in_serial);
2322 			return (B_FALSE);
2323 		}
2324 		ksi->ks_in_dsttype = rc;
2325 	}
2326 
2327 	/*
2328 	 * NAT-Traversal addrs are simple enough to not require all of
2329 	 * the checks in sadb_addrcheck().  Just normalize or reject if not
2330 	 * AF_INET.
2331 	 */
2332 	if (extv[SADB_X_EXT_ADDRESS_NATT_LOC] != NULL) {
2333 		rc = sadb_addrcheck(pfkey_q, mp,
2334 		    extv[SADB_X_EXT_ADDRESS_NATT_LOC], ksi->ks_in_serial, ns);
2335 
2336 		/*
2337 		 * Local NAT-T addresses never use an IRE_LOCAL, so it should
2338 		 * always be NOTME, or UNSPEC (to handle both tunnel mode
2339 		 * AND local-port flexibility).
2340 		 */
2341 		if (rc != KS_IN_ADDR_NOTME && rc != KS_IN_ADDR_UNSPEC) {
2342 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2343 			    SADB_X_DIAGNOSTIC_MALFORMED_NATT_LOC,
2344 			    ksi->ks_in_serial);
2345 			return (B_FALSE);
2346 		}
2347 		src = (struct sockaddr_in *)
2348 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_NATT_LOC]) + 1);
2349 		if (src->sin_family != AF_INET) {
2350 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2351 			    SADB_X_DIAGNOSTIC_BAD_NATT_LOC_AF,
2352 			    ksi->ks_in_serial);
2353 			return (B_FALSE);
2354 		}
2355 	}
2356 
2357 	if (extv[SADB_X_EXT_ADDRESS_NATT_REM] != NULL) {
2358 		rc = sadb_addrcheck(pfkey_q, mp,
2359 		    extv[SADB_X_EXT_ADDRESS_NATT_REM], ksi->ks_in_serial, ns);
2360 
2361 		/*
2362 		 * Remote NAT-T addresses never use an IRE_LOCAL, so it should
2363 		 * always be NOTME, or UNSPEC if it's a tunnel-mode SA.
2364 		 */
2365 		if (rc != KS_IN_ADDR_NOTME &&
2366 		    !(extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL &&
2367 		    rc == KS_IN_ADDR_UNSPEC)) {
2368 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2369 			    SADB_X_DIAGNOSTIC_MALFORMED_NATT_REM,
2370 			    ksi->ks_in_serial);
2371 			return (B_FALSE);
2372 		}
2373 		src = (struct sockaddr_in *)
2374 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_NATT_REM]) + 1);
2375 		if (src->sin_family != AF_INET) {
2376 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2377 			    SADB_X_DIAGNOSTIC_BAD_NATT_REM_AF,
2378 			    ksi->ks_in_serial);
2379 			return (B_FALSE);
2380 		}
2381 	}
2382 
2383 	if (extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL) {
2384 		if (extv[SADB_X_EXT_ADDRESS_INNER_DST] == NULL) {
2385 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2386 			    SADB_X_DIAGNOSTIC_MISSING_INNER_DST,
2387 			    ksi->ks_in_serial);
2388 			return (B_FALSE);
2389 		}
2390 
2391 		if (sadb_addrcheck(pfkey_q, mp,
2392 		    extv[SADB_X_EXT_ADDRESS_INNER_DST], ksi->ks_in_serial, ns)
2393 		    == KS_IN_ADDR_UNKNOWN ||
2394 		    sadb_addrcheck(pfkey_q, mp,
2395 		    extv[SADB_X_EXT_ADDRESS_INNER_SRC], ksi->ks_in_serial, ns)
2396 		    == KS_IN_ADDR_UNKNOWN)
2397 			return (B_FALSE);
2398 
2399 		isrc = (struct sockaddr_in *)
2400 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_SRC]) +
2401 		    1);
2402 		idst = (struct sockaddr_in6 *)
2403 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_DST]) +
2404 		    1);
2405 		if (isrc->sin_family != idst->sin6_family) {
2406 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2407 			    SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH,
2408 			    ksi->ks_in_serial);
2409 			return (B_FALSE);
2410 		}
2411 	} else if (extv[SADB_X_EXT_ADDRESS_INNER_DST] != NULL) {
2412 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2413 			    SADB_X_DIAGNOSTIC_MISSING_INNER_SRC,
2414 			    ksi->ks_in_serial);
2415 			return (B_FALSE);
2416 	} else {
2417 		isrc = NULL;	/* For inner/outer port check below. */
2418 	}
2419 
2420 	dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST];
2421 	srcext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_SRC];
2422 
2423 	if (dstext == NULL || srcext == NULL)
2424 		return (B_TRUE);
2425 
2426 	dst = (struct sockaddr_in6 *)(dstext + 1);
2427 	src = (struct sockaddr_in *)(srcext + 1);
2428 
2429 	if (isrc != NULL &&
2430 	    (isrc->sin_port != 0 || idst->sin6_port != 0) &&
2431 	    (src->sin_port != 0 || dst->sin6_port != 0)) {
2432 		/* Can't set inner and outer ports in one SA. */
2433 		sadb_pfkey_error(pfkey_q, mp, EINVAL,
2434 		    SADB_X_DIAGNOSTIC_DUAL_PORT_SETS,
2435 		    ksi->ks_in_serial);
2436 		return (B_FALSE);
2437 	}
2438 
2439 	if (dst->sin6_family == src->sin_family)
2440 		return (B_TRUE);
2441 
2442 	if (srcext->sadb_address_proto != dstext->sadb_address_proto) {
2443 		if (srcext->sadb_address_proto == 0) {
2444 			srcext->sadb_address_proto = dstext->sadb_address_proto;
2445 		} else if (dstext->sadb_address_proto == 0) {
2446 			dstext->sadb_address_proto = srcext->sadb_address_proto;
2447 		} else {
2448 			/* Inequal protocols, neither were 0.  Report error. */
2449 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2450 			    SADB_X_DIAGNOSTIC_PROTO_MISMATCH,
2451 			    ksi->ks_in_serial);
2452 			return (B_FALSE);
2453 		}
2454 	}
2455 
2456 	/*
2457 	 * With the exception of an unspec IPv6 source and an IPv4
2458 	 * destination, address families MUST me matched.
2459 	 */
2460 	if (src->sin_family == AF_INET ||
2461 	    ksi->ks_in_srctype != KS_IN_ADDR_UNSPEC) {
2462 		sadb_pfkey_error(pfkey_q, mp, EINVAL,
2463 		    SADB_X_DIAGNOSTIC_AF_MISMATCH, ksi->ks_in_serial);
2464 		return (B_FALSE);
2465 	}
2466 
2467 	/*
2468 	 * Convert "src" to AF_INET INADDR_ANY.  We rely on sin_port being
2469 	 * in the same place for sockaddr_in and sockaddr_in6.
2470 	 */
2471 	sport = src->sin_port;
2472 	bzero(src, sizeof (*src));
2473 	src->sin_family = AF_INET;
2474 	src->sin_port = sport;
2475 
2476 	return (B_TRUE);
2477 }
2478 
2479 /*
2480  * Set the results in "addrtype", given an IRE as requested by
2481  * sadb_addrcheck().
2482  */
2483 int
2484 sadb_addrset(ire_t *ire)
2485 {
2486 	if ((ire->ire_type & IRE_BROADCAST) ||
2487 	    (ire->ire_ipversion == IPV4_VERSION && CLASSD(ire->ire_addr)) ||
2488 	    (ire->ire_ipversion == IPV6_VERSION &&
2489 	    IN6_IS_ADDR_MULTICAST(&(ire->ire_addr_v6))))
2490 		return (KS_IN_ADDR_MBCAST);
2491 	if (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK))
2492 		return (KS_IN_ADDR_ME);
2493 	return (KS_IN_ADDR_NOTME);
2494 }
2495 
2496 
2497 /*
2498  * Walker callback function to delete sa's based on src/dst address.
2499  * Assumes that we're called with *head locked, no other locks held;
2500  * Conveniently, and not coincidentally, this is both what sadb_walker
2501  * gives us and also what sadb_unlinkassoc expects.
2502  */
2503 
2504 struct sadb_purge_state
2505 {
2506 	uint32_t *src;
2507 	uint32_t *dst;
2508 	sa_family_t af;
2509 	boolean_t inbnd;
2510 	char *sidstr;
2511 	char *didstr;
2512 	uint16_t sidtype;
2513 	uint16_t didtype;
2514 	uint32_t kmproto;
2515 	uint8_t sadb_sa_state;
2516 	mblk_t *mq;
2517 	sadb_t *sp;
2518 };
2519 
2520 static void
2521 sadb_purge_cb(isaf_t *head, ipsa_t *entry, void *cookie)
2522 {
2523 	struct sadb_purge_state *ps = (struct sadb_purge_state *)cookie;
2524 
2525 	ASSERT(MUTEX_HELD(&head->isaf_lock));
2526 
2527 	mutex_enter(&entry->ipsa_lock);
2528 
2529 	if ((entry->ipsa_state == IPSA_STATE_LARVAL) ||
2530 	    (ps->src != NULL &&
2531 	    !IPSA_ARE_ADDR_EQUAL(entry->ipsa_srcaddr, ps->src, ps->af)) ||
2532 	    (ps->dst != NULL &&
2533 	    !IPSA_ARE_ADDR_EQUAL(entry->ipsa_dstaddr, ps->dst, ps->af)) ||
2534 	    (ps->didstr != NULL && (entry->ipsa_dst_cid != NULL) &&
2535 	    !(ps->didtype == entry->ipsa_dst_cid->ipsid_type &&
2536 	    strcmp(ps->didstr, entry->ipsa_dst_cid->ipsid_cid) == 0)) ||
2537 	    (ps->sidstr != NULL && (entry->ipsa_src_cid != NULL) &&
2538 	    !(ps->sidtype == entry->ipsa_src_cid->ipsid_type &&
2539 	    strcmp(ps->sidstr, entry->ipsa_src_cid->ipsid_cid) == 0)) ||
2540 	    (ps->kmproto <= SADB_X_KMP_MAX && ps->kmproto != entry->ipsa_kmp)) {
2541 		mutex_exit(&entry->ipsa_lock);
2542 		return;
2543 	}
2544 
2545 	if (ps->inbnd) {
2546 		sadb_delete_cluster(entry);
2547 	}
2548 	entry->ipsa_state = IPSA_STATE_DEAD;
2549 	(void) sadb_torch_assoc(head, entry, ps->inbnd, &ps->mq);
2550 }
2551 
2552 /*
2553  * Common code to purge an SA with a matching src or dst address.
2554  * Don't kill larval SA's in such a purge.
2555  */
2556 int
2557 sadb_purge_sa(mblk_t *mp, keysock_in_t *ksi, sadb_t *sp, queue_t *pfkey_q,
2558     queue_t *ip_q)
2559 {
2560 	sadb_address_t *dstext =
2561 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2562 	sadb_address_t *srcext =
2563 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2564 	sadb_ident_t *dstid =
2565 	    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_DST];
2566 	sadb_ident_t *srcid =
2567 	    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC];
2568 	sadb_x_kmc_t *kmc =
2569 	    (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
2570 	struct sockaddr_in *src, *dst;
2571 	struct sockaddr_in6 *src6, *dst6;
2572 	struct sadb_purge_state ps;
2573 
2574 	/*
2575 	 * Don't worry about IPv6 v4-mapped addresses, sadb_addrcheck()
2576 	 * takes care of them.
2577 	 */
2578 
2579 	/* enforced by caller */
2580 	ASSERT((dstext != NULL) || (srcext != NULL));
2581 
2582 	ps.src = NULL;
2583 	ps.dst = NULL;
2584 #ifdef DEBUG
2585 	ps.af = (sa_family_t)-1;
2586 #endif
2587 	ps.mq = NULL;
2588 	ps.sidstr = NULL;
2589 	ps.didstr = NULL;
2590 	ps.kmproto = SADB_X_KMP_MAX + 1;
2591 
2592 	if (dstext != NULL) {
2593 		dst = (struct sockaddr_in *)(dstext + 1);
2594 		ps.af = dst->sin_family;
2595 		if (dst->sin_family == AF_INET6) {
2596 			dst6 = (struct sockaddr_in6 *)dst;
2597 			ps.dst = (uint32_t *)&dst6->sin6_addr;
2598 		} else {
2599 			ps.dst = (uint32_t *)&dst->sin_addr;
2600 		}
2601 	}
2602 
2603 	if (srcext != NULL) {
2604 		src = (struct sockaddr_in *)(srcext + 1);
2605 		ps.af = src->sin_family;
2606 		if (src->sin_family == AF_INET6) {
2607 			src6 = (struct sockaddr_in6 *)(srcext + 1);
2608 			ps.src = (uint32_t *)&src6->sin6_addr;
2609 		} else {
2610 			ps.src = (uint32_t *)&src->sin_addr;
2611 		}
2612 		ASSERT(dstext == NULL || src->sin_family == dst->sin_family);
2613 	}
2614 
2615 	ASSERT(ps.af != (sa_family_t)-1);
2616 
2617 	if (dstid != NULL) {
2618 		/*
2619 		 * NOTE:  May need to copy string in the future
2620 		 * if the inbound keysock message disappears for some strange
2621 		 * reason.
2622 		 */
2623 		ps.didstr = (char *)(dstid + 1);
2624 		ps.didtype = dstid->sadb_ident_type;
2625 	}
2626 
2627 	if (srcid != NULL) {
2628 		/*
2629 		 * NOTE:  May need to copy string in the future
2630 		 * if the inbound keysock message disappears for some strange
2631 		 * reason.
2632 		 */
2633 		ps.sidstr = (char *)(srcid + 1);
2634 		ps.sidtype = srcid->sadb_ident_type;
2635 	}
2636 
2637 	if (kmc != NULL)
2638 		ps.kmproto = kmc->sadb_x_kmc_proto;
2639 
2640 	/*
2641 	 * This is simple, crude, and effective.
2642 	 * Unimplemented optimizations (TBD):
2643 	 * - we can limit how many places we search based on where we
2644 	 * think the SA is filed.
2645 	 * - if we get a dst address, we can hash based on dst addr to find
2646 	 * the correct bucket in the outbound table.
2647 	 */
2648 	ps.inbnd = B_TRUE;
2649 	sadb_walker(sp->sdb_if, sp->sdb_hashsize, sadb_purge_cb, &ps);
2650 	ps.inbnd = B_FALSE;
2651 	sadb_walker(sp->sdb_of, sp->sdb_hashsize, sadb_purge_cb, &ps);
2652 
2653 	if (ps.mq != NULL)
2654 		sadb_drain_torchq(ip_q, ps.mq);
2655 
2656 	ASSERT(mp->b_cont != NULL);
2657 	sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi,
2658 	    NULL);
2659 	return (0);
2660 }
2661 
2662 static void
2663 sadb_delpair_state(isaf_t *head, ipsa_t *entry, void *cookie)
2664 {
2665 	struct sadb_purge_state *ps = (struct sadb_purge_state *)cookie;
2666 	isaf_t  *inbound_bucket;
2667 	ipsa_t *peer_assoc;
2668 
2669 	ASSERT(MUTEX_HELD(&head->isaf_lock));
2670 
2671 	mutex_enter(&entry->ipsa_lock);
2672 
2673 	if ((entry->ipsa_state != ps->sadb_sa_state) ||
2674 	    ((ps->src != NULL) &&
2675 	    !IPSA_ARE_ADDR_EQUAL(entry->ipsa_srcaddr, ps->src, ps->af))) {
2676 		mutex_exit(&entry->ipsa_lock);
2677 		return;
2678 	}
2679 
2680 	/*
2681 	 * The isaf_t *, which is passed in , is always an outbound bucket,
2682 	 * and we are preserving the outbound-then-inbound hash-bucket lock
2683 	 * ordering. The sadb_walker() which triggers this function is called
2684 	 * only on the outbound fanout, and the corresponding inbound bucket
2685 	 * lock is safe to acquire here.
2686 	 */
2687 
2688 	if (entry->ipsa_haspeer) {
2689 		inbound_bucket = INBOUND_BUCKET(ps->sp, entry->ipsa_spi);
2690 		mutex_enter(&inbound_bucket->isaf_lock);
2691 		peer_assoc = ipsec_getassocbyspi(inbound_bucket,
2692 		    entry->ipsa_spi, entry->ipsa_srcaddr,
2693 		    entry->ipsa_dstaddr, entry->ipsa_addrfam);
2694 	} else {
2695 		inbound_bucket = INBOUND_BUCKET(ps->sp, entry->ipsa_otherspi);
2696 		mutex_enter(&inbound_bucket->isaf_lock);
2697 		peer_assoc = ipsec_getassocbyspi(inbound_bucket,
2698 		    entry->ipsa_otherspi, entry->ipsa_dstaddr,
2699 		    entry->ipsa_srcaddr, entry->ipsa_addrfam);
2700 	}
2701 
2702 	entry->ipsa_state = IPSA_STATE_DEAD;
2703 	(void) sadb_torch_assoc(head, entry, B_FALSE, &ps->mq);
2704 	if (peer_assoc != NULL) {
2705 		mutex_enter(&peer_assoc->ipsa_lock);
2706 		peer_assoc->ipsa_state = IPSA_STATE_DEAD;
2707 		(void) sadb_torch_assoc(inbound_bucket, peer_assoc,
2708 		    B_FALSE, &ps->mq);
2709 	}
2710 	mutex_exit(&inbound_bucket->isaf_lock);
2711 }
2712 
2713 /*
2714  * Common code to delete/get an SA.
2715  */
2716 int
2717 sadb_delget_sa(mblk_t *mp, keysock_in_t *ksi, sadbp_t *spp,
2718     int *diagnostic, queue_t *pfkey_q, uint8_t sadb_msg_type)
2719 {
2720 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
2721 	sadb_address_t *srcext =
2722 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2723 	sadb_address_t *dstext =
2724 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2725 	ipsa_t *echo_target = NULL;
2726 	ipsap_t *ipsapp;
2727 	mblk_t *torchq = NULL;
2728 	uint_t	error = 0;
2729 
2730 	if (assoc == NULL) {
2731 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
2732 		return (EINVAL);
2733 	}
2734 
2735 	if (sadb_msg_type == SADB_X_DELPAIR_STATE) {
2736 		struct sockaddr_in *src;
2737 		struct sockaddr_in6 *src6;
2738 		struct sadb_purge_state ps;
2739 
2740 		if (srcext == NULL) {
2741 			*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
2742 			return (EINVAL);
2743 		}
2744 		ps.src = NULL;
2745 		ps.mq = NULL;
2746 		src = (struct sockaddr_in *)(srcext + 1);
2747 		ps.af = src->sin_family;
2748 		if (src->sin_family == AF_INET6) {
2749 			src6 = (struct sockaddr_in6 *)(srcext + 1);
2750 			ps.src = (uint32_t *)&src6->sin6_addr;
2751 			ps.sp = &spp->s_v6;
2752 		} else {
2753 			ps.src = (uint32_t *)&src->sin_addr;
2754 			ps.sp = &spp->s_v4;
2755 		}
2756 		ps.inbnd = B_FALSE;
2757 		ps.sadb_sa_state = assoc->sadb_sa_state;
2758 		sadb_walker(ps.sp->sdb_of, ps.sp->sdb_hashsize,
2759 		    sadb_delpair_state, &ps);
2760 
2761 		if (ps.mq != NULL)
2762 			sadb_drain_torchq(pfkey_q, ps.mq);
2763 
2764 		ASSERT(mp->b_cont != NULL);
2765 		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
2766 		    ksi, NULL);
2767 		return (0);
2768 	}
2769 
2770 	if (dstext == NULL) {
2771 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
2772 		return (EINVAL);
2773 	}
2774 
2775 	ipsapp = get_ipsa_pair(assoc, srcext, dstext, spp);
2776 	if (ipsapp == NULL) {
2777 		*diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND;
2778 		return (ESRCH);
2779 	}
2780 
2781 	echo_target = ipsapp->ipsap_sa_ptr;
2782 	if (echo_target == NULL)
2783 		echo_target = ipsapp->ipsap_psa_ptr;
2784 
2785 	if (sadb_msg_type == SADB_DELETE || sadb_msg_type == SADB_X_DELPAIR) {
2786 		/*
2787 		 * Bucket locks will be required if SA is actually unlinked.
2788 		 * get_ipsa_pair() returns valid hash bucket pointers even
2789 		 * if it can't find a pair SA pointer. To prevent a potential
2790 		 * deadlock, always lock the outbound bucket before the inbound.
2791 		 */
2792 		if (ipsapp->in_inbound_table) {
2793 			mutex_enter(&ipsapp->ipsap_pbucket->isaf_lock);
2794 			mutex_enter(&ipsapp->ipsap_bucket->isaf_lock);
2795 		} else {
2796 			mutex_enter(&ipsapp->ipsap_bucket->isaf_lock);
2797 			mutex_enter(&ipsapp->ipsap_pbucket->isaf_lock);
2798 		}
2799 
2800 		if (ipsapp->ipsap_sa_ptr != NULL) {
2801 			mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2802 			if (ipsapp->ipsap_sa_ptr->ipsa_flags & IPSA_F_INBOUND) {
2803 				sadb_delete_cluster(ipsapp->ipsap_sa_ptr);
2804 			}
2805 			ipsapp->ipsap_sa_ptr->ipsa_state = IPSA_STATE_DEAD;
2806 			(void) sadb_torch_assoc(ipsapp->ipsap_bucket,
2807 			    ipsapp->ipsap_sa_ptr, B_FALSE, &torchq);
2808 			/*
2809 			 * sadb_torch_assoc() releases the ipsa_lock
2810 			 * and calls sadb_unlinkassoc() which does a
2811 			 * IPSA_REFRELE.
2812 			 */
2813 		}
2814 		if (ipsapp->ipsap_psa_ptr != NULL) {
2815 			mutex_enter(&ipsapp->ipsap_psa_ptr->ipsa_lock);
2816 			if (sadb_msg_type == SADB_X_DELPAIR ||
2817 			    ipsapp->ipsap_psa_ptr->ipsa_haspeer) {
2818 				if (ipsapp->ipsap_psa_ptr->ipsa_flags &
2819 				    IPSA_F_INBOUND) {
2820 					sadb_delete_cluster(
2821 					    ipsapp->ipsap_psa_ptr);
2822 				}
2823 				ipsapp->ipsap_psa_ptr->ipsa_state =
2824 				    IPSA_STATE_DEAD;
2825 				(void) sadb_torch_assoc(ipsapp->ipsap_pbucket,
2826 				    ipsapp->ipsap_psa_ptr, B_FALSE, &torchq);
2827 			} else {
2828 				/*
2829 				 * Only half of the "pair" has been deleted.
2830 				 * Update the remaining SA and remove references
2831 				 * to its pair SA, which is now gone.
2832 				 */
2833 				ipsapp->ipsap_psa_ptr->ipsa_otherspi = 0;
2834 				ipsapp->ipsap_psa_ptr->ipsa_flags &=
2835 				    ~IPSA_F_PAIRED;
2836 				mutex_exit(&ipsapp->ipsap_psa_ptr->ipsa_lock);
2837 			}
2838 		} else if (sadb_msg_type == SADB_X_DELPAIR) {
2839 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
2840 			error = ESRCH;
2841 		}
2842 		mutex_exit(&ipsapp->ipsap_bucket->isaf_lock);
2843 		mutex_exit(&ipsapp->ipsap_pbucket->isaf_lock);
2844 	}
2845 
2846 	if (torchq != NULL)
2847 		sadb_drain_torchq(spp->s_ip_q, torchq);
2848 
2849 	ASSERT(mp->b_cont != NULL);
2850 
2851 	if (error == 0)
2852 		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)
2853 		    mp->b_cont->b_rptr, ksi, echo_target);
2854 
2855 	destroy_ipsa_pair(ipsapp);
2856 
2857 	return (error);
2858 }
2859 
2860 /*
2861  * This function takes a sadb_sa_t and finds the ipsa_t structure
2862  * and the isaf_t (hash bucket) that its stored under. If the security
2863  * association has a peer, the ipsa_t structure and bucket for that security
2864  * association are also searched for. The "pair" of ipsa_t's and isaf_t's
2865  * are returned as a ipsap_t.
2866  *
2867  * The hash buckets are returned for convenience, if the calling function
2868  * needs to use the hash bucket locks, say to remove the SA's, it should
2869  * take care to observe the convention of locking outbound bucket then
2870  * inbound bucket. The flag in_inbound_table provides direction.
2871  *
2872  * Note that a "pair" is defined as one (but not both) of the following:
2873  *
2874  * A security association which has a soft reference to another security
2875  * association via its SPI.
2876  *
2877  * A security association that is not obviously "inbound" or "outbound" so
2878  * it appears in both hash tables, the "peer" being the same security
2879  * association in the other hash table.
2880  *
2881  * This function will return NULL if the ipsa_t can't be found in the
2882  * inbound or outbound  hash tables (not found). If only one ipsa_t is
2883  * found, the pair ipsa_t will be NULL. Both isaf_t values are valid
2884  * provided at least one ipsa_t is found.
2885  */
2886 ipsap_t *
2887 get_ipsa_pair(sadb_sa_t *assoc, sadb_address_t *srcext, sadb_address_t *dstext,
2888     sadbp_t *spp)
2889 {
2890 	struct sockaddr_in *src, *dst;
2891 	struct sockaddr_in6 *src6, *dst6;
2892 	sadb_t *sp;
2893 	uint32_t *srcaddr, *dstaddr;
2894 	isaf_t *outbound_bucket, *inbound_bucket;
2895 	ipsap_t *ipsapp;
2896 	sa_family_t af;
2897 
2898 	uint32_t pair_srcaddr[IPSA_MAX_ADDRLEN];
2899 	uint32_t pair_dstaddr[IPSA_MAX_ADDRLEN];
2900 	uint32_t pair_spi;
2901 
2902 	ipsapp = kmem_zalloc(sizeof (*ipsapp), KM_NOSLEEP);
2903 	if (ipsapp == NULL)
2904 		return (NULL);
2905 
2906 	ipsapp->in_inbound_table = B_FALSE;
2907 
2908 	/*
2909 	 * Don't worry about IPv6 v4-mapped addresses, sadb_addrcheck()
2910 	 * takes care of them.
2911 	 */
2912 
2913 	dst = (struct sockaddr_in *)(dstext + 1);
2914 	af = dst->sin_family;
2915 	if (af == AF_INET6) {
2916 		sp = &spp->s_v6;
2917 		dst6 = (struct sockaddr_in6 *)dst;
2918 		dstaddr = (uint32_t *)&dst6->sin6_addr;
2919 		if (srcext != NULL) {
2920 			src6 = (struct sockaddr_in6 *)(srcext + 1);
2921 			srcaddr = (uint32_t *)&src6->sin6_addr;
2922 			ASSERT(src6->sin6_family == af);
2923 			ASSERT(src6->sin6_family == AF_INET6);
2924 		} else {
2925 			srcaddr = ALL_ZEROES_PTR;
2926 		}
2927 		outbound_bucket = OUTBOUND_BUCKET_V6(sp,
2928 		    *(uint32_t *)dstaddr);
2929 	} else {
2930 		sp = &spp->s_v4;
2931 		dstaddr = (uint32_t *)&dst->sin_addr;
2932 		if (srcext != NULL) {
2933 			src = (struct sockaddr_in *)(srcext + 1);
2934 			srcaddr = (uint32_t *)&src->sin_addr;
2935 			ASSERT(src->sin_family == af);
2936 			ASSERT(src->sin_family == AF_INET);
2937 		} else {
2938 			srcaddr = ALL_ZEROES_PTR;
2939 		}
2940 		outbound_bucket = OUTBOUND_BUCKET_V4(sp,
2941 		    *(uint32_t *)dstaddr);
2942 	}
2943 
2944 	inbound_bucket = INBOUND_BUCKET(sp, assoc->sadb_sa_spi);
2945 
2946 	/* Lock down both buckets. */
2947 	mutex_enter(&outbound_bucket->isaf_lock);
2948 	mutex_enter(&inbound_bucket->isaf_lock);
2949 
2950 	if (assoc->sadb_sa_flags & IPSA_F_INBOUND) {
2951 		ipsapp->ipsap_sa_ptr = ipsec_getassocbyspi(inbound_bucket,
2952 		    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2953 		if (ipsapp->ipsap_sa_ptr != NULL) {
2954 			ipsapp->ipsap_bucket = inbound_bucket;
2955 			ipsapp->ipsap_pbucket = outbound_bucket;
2956 			ipsapp->in_inbound_table = B_TRUE;
2957 		} else {
2958 			ipsapp->ipsap_sa_ptr =
2959 			    ipsec_getassocbyspi(outbound_bucket,
2960 			    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2961 			ipsapp->ipsap_bucket = outbound_bucket;
2962 			ipsapp->ipsap_pbucket = inbound_bucket;
2963 		}
2964 	} else {
2965 		/* IPSA_F_OUTBOUND is set *or* no directions flags set. */
2966 		ipsapp->ipsap_sa_ptr =
2967 		    ipsec_getassocbyspi(outbound_bucket,
2968 		    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2969 		if (ipsapp->ipsap_sa_ptr != NULL) {
2970 			ipsapp->ipsap_bucket = outbound_bucket;
2971 			ipsapp->ipsap_pbucket = inbound_bucket;
2972 		} else {
2973 			ipsapp->ipsap_sa_ptr =
2974 			    ipsec_getassocbyspi(inbound_bucket,
2975 			    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2976 			ipsapp->ipsap_bucket = inbound_bucket;
2977 			ipsapp->ipsap_pbucket = outbound_bucket;
2978 			if (ipsapp->ipsap_sa_ptr != NULL)
2979 				ipsapp->in_inbound_table = B_TRUE;
2980 		}
2981 	}
2982 
2983 	if (ipsapp->ipsap_sa_ptr == NULL) {
2984 		mutex_exit(&outbound_bucket->isaf_lock);
2985 		mutex_exit(&inbound_bucket->isaf_lock);
2986 		kmem_free(ipsapp, sizeof (*ipsapp));
2987 		return (NULL);
2988 	}
2989 
2990 	if ((ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) &&
2991 	    ipsapp->in_inbound_table) {
2992 		mutex_exit(&outbound_bucket->isaf_lock);
2993 		mutex_exit(&inbound_bucket->isaf_lock);
2994 		return (ipsapp);
2995 	}
2996 
2997 	mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2998 	if (ipsapp->ipsap_sa_ptr->ipsa_haspeer) {
2999 		/*
3000 		 * haspeer implies no sa_pairing, look for same spi
3001 		 * in other hashtable.
3002 		 */
3003 		ipsapp->ipsap_psa_ptr =
3004 		    ipsec_getassocbyspi(ipsapp->ipsap_pbucket,
3005 		    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
3006 		mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
3007 		mutex_exit(&outbound_bucket->isaf_lock);
3008 		mutex_exit(&inbound_bucket->isaf_lock);
3009 		return (ipsapp);
3010 	}
3011 	pair_spi = ipsapp->ipsap_sa_ptr->ipsa_otherspi;
3012 	IPSA_COPY_ADDR(&pair_srcaddr,
3013 	    ipsapp->ipsap_sa_ptr->ipsa_srcaddr, af);
3014 	IPSA_COPY_ADDR(&pair_dstaddr,
3015 	    ipsapp->ipsap_sa_ptr->ipsa_dstaddr, af);
3016 	mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
3017 	mutex_exit(&outbound_bucket->isaf_lock);
3018 	mutex_exit(&inbound_bucket->isaf_lock);
3019 
3020 	if (pair_spi == 0) {
3021 		ASSERT(ipsapp->ipsap_bucket != NULL);
3022 		ASSERT(ipsapp->ipsap_pbucket != NULL);
3023 		return (ipsapp);
3024 	}
3025 
3026 	/* found sa in outbound sadb, peer should be inbound */
3027 
3028 	if (ipsapp->in_inbound_table) {
3029 		/* Found SA in inbound table, pair will be in outbound. */
3030 		if (af == AF_INET6) {
3031 			ipsapp->ipsap_pbucket = OUTBOUND_BUCKET_V6(sp,
3032 			    *(uint32_t *)pair_srcaddr);
3033 		} else {
3034 			ipsapp->ipsap_pbucket = OUTBOUND_BUCKET_V4(sp,
3035 			    *(uint32_t *)pair_srcaddr);
3036 		}
3037 	} else {
3038 		ipsapp->ipsap_pbucket = INBOUND_BUCKET(sp, pair_spi);
3039 	}
3040 	mutex_enter(&ipsapp->ipsap_pbucket->isaf_lock);
3041 	ipsapp->ipsap_psa_ptr = ipsec_getassocbyspi(ipsapp->ipsap_pbucket,
3042 	    pair_spi, pair_dstaddr, pair_srcaddr, af);
3043 	mutex_exit(&ipsapp->ipsap_pbucket->isaf_lock);
3044 	ASSERT(ipsapp->ipsap_bucket != NULL);
3045 	ASSERT(ipsapp->ipsap_pbucket != NULL);
3046 	return (ipsapp);
3047 }
3048 
3049 /*
3050  * Perform NAT-traversal cached checksum offset calculations here.
3051  */
3052 static void
3053 sadb_nat_calculations(ipsa_t *newbie, sadb_address_t *natt_loc_ext,
3054     sadb_address_t *natt_rem_ext, uint32_t *src_addr_ptr,
3055     uint32_t *dst_addr_ptr)
3056 {
3057 	struct sockaddr_in *natt_loc, *natt_rem;
3058 	uint32_t *natt_loc_ptr = NULL, *natt_rem_ptr = NULL;
3059 	uint32_t running_sum = 0;
3060 
3061 #define	DOWN_SUM(x) (x) = ((x) & 0xFFFF) +	 ((x) >> 16)
3062 
3063 	if (natt_rem_ext != NULL) {
3064 		uint32_t l_src;
3065 		uint32_t l_rem;
3066 
3067 		natt_rem = (struct sockaddr_in *)(natt_rem_ext + 1);
3068 
3069 		/* Ensured by sadb_addrfix(). */
3070 		ASSERT(natt_rem->sin_family == AF_INET);
3071 
3072 		natt_rem_ptr = (uint32_t *)(&natt_rem->sin_addr);
3073 		newbie->ipsa_remote_nat_port = natt_rem->sin_port;
3074 		l_src = *src_addr_ptr;
3075 		l_rem = *natt_rem_ptr;
3076 
3077 		/* Instead of IPSA_COPY_ADDR(), just copy first 32 bits. */
3078 		newbie->ipsa_natt_addr_rem = *natt_rem_ptr;
3079 
3080 		l_src = ntohl(l_src);
3081 		DOWN_SUM(l_src);
3082 		DOWN_SUM(l_src);
3083 		l_rem = ntohl(l_rem);
3084 		DOWN_SUM(l_rem);
3085 		DOWN_SUM(l_rem);
3086 
3087 		/*
3088 		 * We're 1's complement for checksums, so check for wraparound
3089 		 * here.
3090 		 */
3091 		if (l_rem > l_src)
3092 			l_src--;
3093 
3094 		running_sum += l_src - l_rem;
3095 
3096 		DOWN_SUM(running_sum);
3097 		DOWN_SUM(running_sum);
3098 	}
3099 
3100 	if (natt_loc_ext != NULL) {
3101 		natt_loc = (struct sockaddr_in *)(natt_loc_ext + 1);
3102 
3103 		/* Ensured by sadb_addrfix(). */
3104 		ASSERT(natt_loc->sin_family == AF_INET);
3105 
3106 		natt_loc_ptr = (uint32_t *)(&natt_loc->sin_addr);
3107 		newbie->ipsa_local_nat_port = natt_loc->sin_port;
3108 
3109 		/* Instead of IPSA_COPY_ADDR(), just copy first 32 bits. */
3110 		newbie->ipsa_natt_addr_loc = *natt_loc_ptr;
3111 
3112 		/*
3113 		 * NAT-T port agility means we may have natt_loc_ext, but
3114 		 * only for a local-port change.
3115 		 */
3116 		if (natt_loc->sin_addr.s_addr != INADDR_ANY) {
3117 			uint32_t l_dst = ntohl(*dst_addr_ptr);
3118 			uint32_t l_loc = ntohl(*natt_loc_ptr);
3119 
3120 			DOWN_SUM(l_loc);
3121 			DOWN_SUM(l_loc);
3122 			DOWN_SUM(l_dst);
3123 			DOWN_SUM(l_dst);
3124 
3125 			/*
3126 			 * We're 1's complement for checksums, so check for
3127 			 * wraparound here.
3128 			 */
3129 			if (l_loc > l_dst)
3130 				l_dst--;
3131 
3132 			running_sum += l_dst - l_loc;
3133 			DOWN_SUM(running_sum);
3134 			DOWN_SUM(running_sum);
3135 		}
3136 	}
3137 
3138 	newbie->ipsa_inbound_cksum = running_sum;
3139 #undef DOWN_SUM
3140 }
3141 
3142 /*
3143  * This function is called from consumers that need to insert a fully-grown
3144  * security association into its tables.  This function takes into account that
3145  * SAs can be "inbound", "outbound", or "both".	 The "primary" and "secondary"
3146  * hash bucket parameters are set in order of what the SA will be most of the
3147  * time.  (For example, an SA with an unspecified source, and a multicast
3148  * destination will primarily be an outbound SA.  OTOH, if that destination
3149  * is unicast for this node, then the SA will primarily be inbound.)
3150  *
3151  * It takes a lot of parameters because even if clone is B_FALSE, this needs
3152  * to check both buckets for purposes of collision.
3153  *
3154  * Return 0 upon success.  Return various errnos (ENOMEM, EEXIST) for
3155  * various error conditions.  We may need to set samsg->sadb_x_msg_diagnostic
3156  * with additional diagnostic information because there is at least one EINVAL
3157  * case here.
3158  */
3159 int
3160 sadb_common_add(queue_t *ip_q, queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg,
3161     keysock_in_t *ksi, isaf_t *primary, isaf_t *secondary,
3162     ipsa_t *newbie, boolean_t clone, boolean_t is_inbound, int *diagnostic,
3163     netstack_t *ns, sadbp_t *spp)
3164 {
3165 	ipsa_t *newbie_clone = NULL, *scratch;
3166 	ipsap_t *ipsapp = NULL;
3167 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
3168 	sadb_address_t *srcext =
3169 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
3170 	sadb_address_t *dstext =
3171 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
3172 	sadb_address_t *isrcext =
3173 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC];
3174 	sadb_address_t *idstext =
3175 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST];
3176 	sadb_x_kmc_t *kmcext =
3177 	    (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
3178 	sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
3179 	sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
3180 	sadb_x_pair_t *pair_ext =
3181 	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
3182 	sadb_x_replay_ctr_t *replayext =
3183 	    (sadb_x_replay_ctr_t *)ksi->ks_in_extv[SADB_X_EXT_REPLAY_VALUE];
3184 	uint8_t protocol =
3185 	    (samsg->sadb_msg_satype == SADB_SATYPE_AH) ? IPPROTO_AH:IPPROTO_ESP;
3186 	int salt_offset;
3187 	uint8_t *buf_ptr;
3188 #if 0
3189 	/*
3190 	 * XXXMLS - When Trusted Solaris or Multi-Level Secure functionality
3191 	 * comes to ON, examine these if 0'ed fragments.  Look for XXXMLS.
3192 	 */
3193 	sadb_sens_t *sens = (sadb_sens_t *);
3194 #endif
3195 	struct sockaddr_in *src, *dst, *isrc, *idst;
3196 	struct sockaddr_in6 *src6, *dst6, *isrc6, *idst6;
3197 	sadb_lifetime_t *soft =
3198 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
3199 	sadb_lifetime_t *hard =
3200 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
3201 	sadb_lifetime_t	*idle =
3202 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE];
3203 	sa_family_t af;
3204 	int error = 0;
3205 	boolean_t isupdate = (newbie != NULL);
3206 	uint32_t *src_addr_ptr, *dst_addr_ptr, *isrc_addr_ptr, *idst_addr_ptr;
3207 	mblk_t *ctl_mp = NULL;
3208 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
3209 	ipsec_alginfo_t *alg;
3210 	int		rcode;
3211 
3212 	if (srcext == NULL) {
3213 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
3214 		return (EINVAL);
3215 	}
3216 	if (dstext == NULL) {
3217 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
3218 		return (EINVAL);
3219 	}
3220 	if (assoc == NULL) {
3221 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
3222 		return (EINVAL);
3223 	}
3224 
3225 	src = (struct sockaddr_in *)(srcext + 1);
3226 	src6 = (struct sockaddr_in6 *)(srcext + 1);
3227 	dst = (struct sockaddr_in *)(dstext + 1);
3228 	dst6 = (struct sockaddr_in6 *)(dstext + 1);
3229 	if (isrcext != NULL) {
3230 		isrc = (struct sockaddr_in *)(isrcext + 1);
3231 		isrc6 = (struct sockaddr_in6 *)(isrcext + 1);
3232 		ASSERT(idstext != NULL);
3233 		idst = (struct sockaddr_in *)(idstext + 1);
3234 		idst6 = (struct sockaddr_in6 *)(idstext + 1);
3235 	} else {
3236 		isrc = NULL;
3237 		isrc6 = NULL;
3238 	}
3239 
3240 	af = src->sin_family;
3241 
3242 	if (af == AF_INET) {
3243 		src_addr_ptr = (uint32_t *)&src->sin_addr;
3244 		dst_addr_ptr = (uint32_t *)&dst->sin_addr;
3245 	} else {
3246 		ASSERT(af == AF_INET6);
3247 		src_addr_ptr = (uint32_t *)&src6->sin6_addr;
3248 		dst_addr_ptr = (uint32_t *)&dst6->sin6_addr;
3249 	}
3250 
3251 	if (!isupdate && (clone == B_TRUE || is_inbound == B_TRUE) &&
3252 	    cl_inet_checkspi &&
3253 	    (assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE_ELSEWHERE)) {
3254 		rcode = cl_inet_checkspi(ns->netstack_stackid, protocol,
3255 		    assoc->sadb_sa_spi, NULL);
3256 		if (rcode == -1) {
3257 			return (EEXIST);
3258 		}
3259 	}
3260 
3261 	/*
3262 	 * Check to see if the new SA will be cloned AND paired. The
3263 	 * reason a SA will be cloned is the source or destination addresses
3264 	 * are not specific enough to determine if the SA goes in the outbound
3265 	 * or the inbound hash table, so its cloned and put in both. If
3266 	 * the SA is paired, it's soft linked to another SA for the other
3267 	 * direction. Keeping track and looking up SA's that are direction
3268 	 * unspecific and linked is too hard.
3269 	 */
3270 	if (clone && (pair_ext != NULL)) {
3271 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
3272 		return (EINVAL);
3273 	}
3274 
3275 	if (!isupdate) {
3276 		newbie = sadb_makelarvalassoc(assoc->sadb_sa_spi,
3277 		    src_addr_ptr, dst_addr_ptr, af, ns);
3278 		if (newbie == NULL)
3279 			return (ENOMEM);
3280 	}
3281 
3282 	mutex_enter(&newbie->ipsa_lock);
3283 
3284 	if (isrc != NULL) {
3285 		if (isrc->sin_family == AF_INET) {
3286 			if (srcext->sadb_address_proto != IPPROTO_ENCAP) {
3287 				if (srcext->sadb_address_proto != 0) {
3288 					/*
3289 					 * Mismatched outer-packet protocol
3290 					 * and inner-packet address family.
3291 					 */
3292 					mutex_exit(&newbie->ipsa_lock);
3293 					error = EPROTOTYPE;
3294 					*diagnostic =
3295 					    SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH;
3296 					goto error;
3297 				} else {
3298 					/* Fill in with explicit protocol. */
3299 					srcext->sadb_address_proto =
3300 					    IPPROTO_ENCAP;
3301 					dstext->sadb_address_proto =
3302 					    IPPROTO_ENCAP;
3303 				}
3304 			}
3305 			isrc_addr_ptr = (uint32_t *)&isrc->sin_addr;
3306 			idst_addr_ptr = (uint32_t *)&idst->sin_addr;
3307 		} else {
3308 			ASSERT(isrc->sin_family == AF_INET6);
3309 			if (srcext->sadb_address_proto != IPPROTO_IPV6) {
3310 				if (srcext->sadb_address_proto != 0) {
3311 					/*
3312 					 * Mismatched outer-packet protocol
3313 					 * and inner-packet address family.
3314 					 */
3315 					mutex_exit(&newbie->ipsa_lock);
3316 					error = EPROTOTYPE;
3317 					*diagnostic =
3318 					    SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH;
3319 					goto error;
3320 				} else {
3321 					/* Fill in with explicit protocol. */
3322 					srcext->sadb_address_proto =
3323 					    IPPROTO_IPV6;
3324 					dstext->sadb_address_proto =
3325 					    IPPROTO_IPV6;
3326 				}
3327 			}
3328 			isrc_addr_ptr = (uint32_t *)&isrc6->sin6_addr;
3329 			idst_addr_ptr = (uint32_t *)&idst6->sin6_addr;
3330 		}
3331 		newbie->ipsa_innerfam = isrc->sin_family;
3332 
3333 		IPSA_COPY_ADDR(newbie->ipsa_innersrc, isrc_addr_ptr,
3334 		    newbie->ipsa_innerfam);
3335 		IPSA_COPY_ADDR(newbie->ipsa_innerdst, idst_addr_ptr,
3336 		    newbie->ipsa_innerfam);
3337 		newbie->ipsa_innersrcpfx = isrcext->sadb_address_prefixlen;
3338 		newbie->ipsa_innerdstpfx = idstext->sadb_address_prefixlen;
3339 
3340 		/* Unique value uses inner-ports for Tunnel Mode... */
3341 		newbie->ipsa_unique_id = SA_UNIQUE_ID(isrc->sin_port,
3342 		    idst->sin_port, dstext->sadb_address_proto,
3343 		    idstext->sadb_address_proto);
3344 		newbie->ipsa_unique_mask = SA_UNIQUE_MASK(isrc->sin_port,
3345 		    idst->sin_port, dstext->sadb_address_proto,
3346 		    idstext->sadb_address_proto);
3347 	} else {
3348 		/* ... and outer-ports for Transport Mode. */
3349 		newbie->ipsa_unique_id = SA_UNIQUE_ID(src->sin_port,
3350 		    dst->sin_port, dstext->sadb_address_proto, 0);
3351 		newbie->ipsa_unique_mask = SA_UNIQUE_MASK(src->sin_port,
3352 		    dst->sin_port, dstext->sadb_address_proto, 0);
3353 	}
3354 	if (newbie->ipsa_unique_mask != (uint64_t)0)
3355 		newbie->ipsa_flags |= IPSA_F_UNIQUE;
3356 
3357 	sadb_nat_calculations(newbie,
3358 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC],
3359 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM],
3360 	    src_addr_ptr, dst_addr_ptr);
3361 
3362 	newbie->ipsa_type = samsg->sadb_msg_satype;
3363 
3364 	ASSERT((assoc->sadb_sa_state == SADB_SASTATE_MATURE) ||
3365 	    (assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE));
3366 	newbie->ipsa_auth_alg = assoc->sadb_sa_auth;
3367 	newbie->ipsa_encr_alg = assoc->sadb_sa_encrypt;
3368 
3369 	newbie->ipsa_flags |= assoc->sadb_sa_flags;
3370 	if (newbie->ipsa_flags & SADB_X_SAFLAGS_NATT_LOC &&
3371 	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC] == NULL) {
3372 		mutex_exit(&newbie->ipsa_lock);
3373 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_LOC;
3374 		error = EINVAL;
3375 		goto error;
3376 	}
3377 	if (newbie->ipsa_flags & SADB_X_SAFLAGS_NATT_REM &&
3378 	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM] == NULL) {
3379 		mutex_exit(&newbie->ipsa_lock);
3380 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_REM;
3381 		error = EINVAL;
3382 		goto error;
3383 	}
3384 	if (newbie->ipsa_flags & SADB_X_SAFLAGS_TUNNEL &&
3385 	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC] == NULL) {
3386 		mutex_exit(&newbie->ipsa_lock);
3387 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
3388 		error = EINVAL;
3389 		goto error;
3390 	}
3391 	/*
3392 	 * If unspecified source address, force replay_wsize to 0.
3393 	 * This is because an SA that has multiple sources of secure
3394 	 * traffic cannot enforce a replay counter w/o synchronizing the
3395 	 * senders.
3396 	 */
3397 	if (ksi->ks_in_srctype != KS_IN_ADDR_UNSPEC)
3398 		newbie->ipsa_replay_wsize = assoc->sadb_sa_replay;
3399 	else
3400 		newbie->ipsa_replay_wsize = 0;
3401 
3402 	newbie->ipsa_addtime = gethrestime_sec();
3403 
3404 	if (kmcext != NULL) {
3405 		newbie->ipsa_kmp = kmcext->sadb_x_kmc_proto;
3406 		newbie->ipsa_kmc = kmcext->sadb_x_kmc_cookie;
3407 	}
3408 
3409 	/*
3410 	 * XXX CURRENT lifetime checks MAY BE needed for an UPDATE.
3411 	 * The spec says that one can update current lifetimes, but
3412 	 * that seems impractical, especially in the larval-to-mature
3413 	 * update that this function performs.
3414 	 */
3415 	if (soft != NULL) {
3416 		newbie->ipsa_softaddlt = soft->sadb_lifetime_addtime;
3417 		newbie->ipsa_softuselt = soft->sadb_lifetime_usetime;
3418 		newbie->ipsa_softbyteslt = soft->sadb_lifetime_bytes;
3419 		newbie->ipsa_softalloc = soft->sadb_lifetime_allocations;
3420 		SET_EXPIRE(newbie, softaddlt, softexpiretime);
3421 	}
3422 	if (hard != NULL) {
3423 		newbie->ipsa_hardaddlt = hard->sadb_lifetime_addtime;
3424 		newbie->ipsa_harduselt = hard->sadb_lifetime_usetime;
3425 		newbie->ipsa_hardbyteslt = hard->sadb_lifetime_bytes;
3426 		newbie->ipsa_hardalloc = hard->sadb_lifetime_allocations;
3427 		SET_EXPIRE(newbie, hardaddlt, hardexpiretime);
3428 	}
3429 	if (idle != NULL) {
3430 		newbie->ipsa_idleaddlt = idle->sadb_lifetime_addtime;
3431 		newbie->ipsa_idleuselt = idle->sadb_lifetime_usetime;
3432 		newbie->ipsa_idleexpiretime = newbie->ipsa_addtime +
3433 		    newbie->ipsa_idleaddlt;
3434 		newbie->ipsa_idletime = newbie->ipsa_idleaddlt;
3435 	}
3436 
3437 	newbie->ipsa_authtmpl = NULL;
3438 	newbie->ipsa_encrtmpl = NULL;
3439 
3440 	if (akey != NULL) {
3441 		newbie->ipsa_authkeybits = akey->sadb_key_bits;
3442 		newbie->ipsa_authkeylen = SADB_1TO8(akey->sadb_key_bits);
3443 		/* In case we have to round up to the next byte... */
3444 		if ((akey->sadb_key_bits & 0x7) != 0)
3445 			newbie->ipsa_authkeylen++;
3446 		newbie->ipsa_authkey = kmem_alloc(newbie->ipsa_authkeylen,
3447 		    KM_NOSLEEP);
3448 		if (newbie->ipsa_authkey == NULL) {
3449 			error = ENOMEM;
3450 			mutex_exit(&newbie->ipsa_lock);
3451 			goto error;
3452 		}
3453 		bcopy(akey + 1, newbie->ipsa_authkey, newbie->ipsa_authkeylen);
3454 		bzero(akey + 1, newbie->ipsa_authkeylen);
3455 
3456 		/*
3457 		 * Pre-initialize the kernel crypto framework key
3458 		 * structure.
3459 		 */
3460 		newbie->ipsa_kcfauthkey.ck_format = CRYPTO_KEY_RAW;
3461 		newbie->ipsa_kcfauthkey.ck_length = newbie->ipsa_authkeybits;
3462 		newbie->ipsa_kcfauthkey.ck_data = newbie->ipsa_authkey;
3463 
3464 		mutex_enter(&ipss->ipsec_alg_lock);
3465 		alg = ipss->ipsec_alglists[IPSEC_ALG_AUTH]
3466 		    [newbie->ipsa_auth_alg];
3467 		if (alg != NULL && ALG_VALID(alg)) {
3468 			newbie->ipsa_amech.cm_type = alg->alg_mech_type;
3469 			newbie->ipsa_amech.cm_param =
3470 			    (char *)&newbie->ipsa_mac_len;
3471 			newbie->ipsa_amech.cm_param_len = sizeof (size_t);
3472 			newbie->ipsa_mac_len = (size_t)alg->alg_datalen;
3473 		} else {
3474 			newbie->ipsa_amech.cm_type = CRYPTO_MECHANISM_INVALID;
3475 		}
3476 		error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_AUTH);
3477 		mutex_exit(&ipss->ipsec_alg_lock);
3478 		if (error != 0) {
3479 			mutex_exit(&newbie->ipsa_lock);
3480 			/*
3481 			 * An error here indicates that alg is the wrong type
3482 			 * (IE: not authentication) or its not in the alg tables
3483 			 * created by ipsecalgs(1m), or Kcf does not like the
3484 			 * parameters passed in with this algorithm, which is
3485 			 * probably a coding error!
3486 			 */
3487 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_CTX;
3488 
3489 			goto error;
3490 		}
3491 	}
3492 
3493 	if (ekey != NULL) {
3494 		mutex_enter(&ipss->ipsec_alg_lock);
3495 		alg = ipss->ipsec_alglists[IPSEC_ALG_ENCR]
3496 		    [newbie->ipsa_encr_alg];
3497 
3498 		if (alg != NULL && ALG_VALID(alg)) {
3499 			newbie->ipsa_emech.cm_type = alg->alg_mech_type;
3500 			newbie->ipsa_datalen = alg->alg_datalen;
3501 			if (alg->alg_flags & ALG_FLAG_COUNTERMODE)
3502 				newbie->ipsa_flags |= IPSA_F_COUNTERMODE;
3503 
3504 			if (alg->alg_flags & ALG_FLAG_COMBINED) {
3505 				newbie->ipsa_flags |= IPSA_F_COMBINED;
3506 				newbie->ipsa_mac_len =  alg->alg_icvlen;
3507 			}
3508 
3509 			if (alg->alg_flags & ALG_FLAG_CCM)
3510 				newbie->ipsa_noncefunc = ccm_params_init;
3511 			else if (alg->alg_flags & ALG_FLAG_GCM)
3512 				newbie->ipsa_noncefunc = gcm_params_init;
3513 			else newbie->ipsa_noncefunc = cbc_params_init;
3514 
3515 			newbie->ipsa_saltlen = alg->alg_saltlen;
3516 			newbie->ipsa_saltbits = SADB_8TO1(newbie->ipsa_saltlen);
3517 			newbie->ipsa_iv_len = alg->alg_ivlen;
3518 			newbie->ipsa_nonce_len = newbie->ipsa_saltlen +
3519 			    newbie->ipsa_iv_len;
3520 			newbie->ipsa_emech.cm_param = NULL;
3521 			newbie->ipsa_emech.cm_param_len = 0;
3522 		} else {
3523 			newbie->ipsa_emech.cm_type = CRYPTO_MECHANISM_INVALID;
3524 		}
3525 		mutex_exit(&ipss->ipsec_alg_lock);
3526 
3527 		/*
3528 		 * The byte stream following the sadb_key_t is made up of:
3529 		 * key bytes, [salt bytes], [IV initial value]
3530 		 * All of these have variable length. The IV is typically
3531 		 * randomly generated by this function and not passed in.
3532 		 * By supporting the injection of a known IV, the whole
3533 		 * IPsec subsystem and the underlying crypto subsystem
3534 		 * can be tested with known test vectors.
3535 		 *
3536 		 * The keying material has been checked by ext_check()
3537 		 * and ipsec_valid_key_size(), after removing salt/IV
3538 		 * bits, whats left is the encryption key. If this is too
3539 		 * short, ipsec_create_ctx_tmpl() will fail and the SA
3540 		 * won't get created.
3541 		 *
3542 		 * set ipsa_encrkeylen to length of key only.
3543 		 */
3544 		newbie->ipsa_encrkeybits = ekey->sadb_key_bits;
3545 		newbie->ipsa_encrkeybits -= ekey->sadb_key_reserved;
3546 		newbie->ipsa_encrkeybits -= newbie->ipsa_saltbits;
3547 		newbie->ipsa_encrkeylen = SADB_1TO8(newbie->ipsa_encrkeybits);
3548 
3549 		/* In case we have to round up to the next byte... */
3550 		if ((ekey->sadb_key_bits & 0x7) != 0)
3551 			newbie->ipsa_encrkeylen++;
3552 
3553 		newbie->ipsa_encrkey = kmem_alloc(newbie->ipsa_encrkeylen,
3554 		    KM_NOSLEEP);
3555 		if (newbie->ipsa_encrkey == NULL) {
3556 			error = ENOMEM;
3557 			mutex_exit(&newbie->ipsa_lock);
3558 			goto error;
3559 		}
3560 
3561 		buf_ptr = (uint8_t *)(ekey + 1);
3562 		bcopy(buf_ptr, newbie->ipsa_encrkey, newbie->ipsa_encrkeylen);
3563 
3564 		if (newbie->ipsa_flags & IPSA_F_COMBINED) {
3565 			/*
3566 			 * Combined mode algs need a nonce. Copy the salt and
3567 			 * IV into a buffer. The ipsa_nonce is a pointer into
3568 			 * this buffer, some bytes at the start of the buffer
3569 			 * may be unused, depends on the salt length. The IV
3570 			 * is 64 bit aligned so it can be incremented as a
3571 			 * uint64_t. Zero out key in samsg_t before freeing.
3572 			 */
3573 
3574 			newbie->ipsa_nonce_buf = kmem_alloc(
3575 			    sizeof (ipsec_nonce_t), KM_NOSLEEP);
3576 			if (newbie->ipsa_nonce_buf == NULL) {
3577 				error = ENOMEM;
3578 				mutex_exit(&newbie->ipsa_lock);
3579 				goto error;
3580 			}
3581 			/*
3582 			 * Initialize nonce and salt pointers to point
3583 			 * to the nonce buffer. This is just in case we get
3584 			 * bad data, the pointers will be valid, the data
3585 			 * won't be.
3586 			 *
3587 			 * See sadb.h for layout of nonce.
3588 			 */
3589 			newbie->ipsa_iv = &newbie->ipsa_nonce_buf->iv;
3590 			newbie->ipsa_salt = (uint8_t *)newbie->ipsa_nonce_buf;
3591 			newbie->ipsa_nonce = newbie->ipsa_salt;
3592 			if (newbie->ipsa_saltlen != 0) {
3593 				salt_offset = MAXSALTSIZE -
3594 				    newbie->ipsa_saltlen;
3595 				newbie->ipsa_salt = (uint8_t *)
3596 				    &newbie->ipsa_nonce_buf->salt[salt_offset];
3597 				newbie->ipsa_nonce = newbie->ipsa_salt;
3598 				buf_ptr += newbie->ipsa_encrkeylen;
3599 				bcopy(buf_ptr, newbie->ipsa_salt,
3600 				    newbie->ipsa_saltlen);
3601 			}
3602 			/*
3603 			 * The IV for CCM/GCM mode increments, it should not
3604 			 * repeat. Get a random value for the IV, make a
3605 			 * copy, the SA will expire when/if the IV ever
3606 			 * wraps back to the initial value. If an Initial IV
3607 			 * is passed in via PF_KEY, save this in the SA.
3608 			 * Initialising IV for inbound is pointless as its
3609 			 * taken from the inbound packet.
3610 			 */
3611 			if (!is_inbound) {
3612 				if (ekey->sadb_key_reserved != 0) {
3613 					buf_ptr += newbie->ipsa_saltlen;
3614 					bcopy(buf_ptr, (uint8_t *)newbie->
3615 					    ipsa_iv, SADB_1TO8(ekey->
3616 					    sadb_key_reserved));
3617 				} else {
3618 					(void) random_get_pseudo_bytes(
3619 					    (uint8_t *)newbie->ipsa_iv,
3620 					    newbie->ipsa_iv_len);
3621 				}
3622 				newbie->ipsa_iv_softexpire =
3623 				    (*newbie->ipsa_iv) << 9;
3624 				newbie->ipsa_iv_hardexpire = *newbie->ipsa_iv;
3625 			}
3626 		}
3627 		bzero((ekey + 1), SADB_1TO8(ekey->sadb_key_bits));
3628 
3629 		/*
3630 		 * Pre-initialize the kernel crypto framework key
3631 		 * structure.
3632 		 */
3633 		newbie->ipsa_kcfencrkey.ck_format = CRYPTO_KEY_RAW;
3634 		newbie->ipsa_kcfencrkey.ck_length = newbie->ipsa_encrkeybits;
3635 		newbie->ipsa_kcfencrkey.ck_data = newbie->ipsa_encrkey;
3636 
3637 		mutex_enter(&ipss->ipsec_alg_lock);
3638 		error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_ENCR);
3639 		mutex_exit(&ipss->ipsec_alg_lock);
3640 		if (error != 0) {
3641 			mutex_exit(&newbie->ipsa_lock);
3642 			/* See above for error explanation. */
3643 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_CTX;
3644 			goto error;
3645 		}
3646 	}
3647 
3648 	/*
3649 	 * Ptrs to processing functions.
3650 	 */
3651 	if (newbie->ipsa_type == SADB_SATYPE_ESP)
3652 		ipsecesp_init_funcs(newbie);
3653 	else
3654 		ipsecah_init_funcs(newbie);
3655 	ASSERT(newbie->ipsa_output_func != NULL &&
3656 	    newbie->ipsa_input_func != NULL);
3657 
3658 	/*
3659 	 * Certificate ID stuff.
3660 	 */
3661 	if (ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC] != NULL) {
3662 		sadb_ident_t *id =
3663 		    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC];
3664 
3665 		/*
3666 		 * Can assume strlen() will return okay because ext_check() in
3667 		 * keysock.c prepares the string for us.
3668 		 */
3669 		newbie->ipsa_src_cid = ipsid_lookup(id->sadb_ident_type,
3670 		    (char *)(id+1), ns);
3671 		if (newbie->ipsa_src_cid == NULL) {
3672 			error = ENOMEM;
3673 			mutex_exit(&newbie->ipsa_lock);
3674 			goto error;
3675 		}
3676 	}
3677 
3678 	if (ksi->ks_in_extv[SADB_EXT_IDENTITY_DST] != NULL) {
3679 		sadb_ident_t *id =
3680 		    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_DST];
3681 
3682 		/*
3683 		 * Can assume strlen() will return okay because ext_check() in
3684 		 * keysock.c prepares the string for us.
3685 		 */
3686 		newbie->ipsa_dst_cid = ipsid_lookup(id->sadb_ident_type,
3687 		    (char *)(id+1), ns);
3688 		if (newbie->ipsa_dst_cid == NULL) {
3689 			error = ENOMEM;
3690 			mutex_exit(&newbie->ipsa_lock);
3691 			goto error;
3692 		}
3693 	}
3694 
3695 #if 0
3696 	/* XXXMLS  SENSITIVITY handling code. */
3697 	if (sens != NULL) {
3698 		int i;
3699 		uint64_t *bitmap = (uint64_t *)(sens + 1);
3700 
3701 		newbie->ipsa_dpd = sens->sadb_sens_dpd;
3702 		newbie->ipsa_senslevel = sens->sadb_sens_sens_level;
3703 		newbie->ipsa_integlevel = sens->sadb_sens_integ_level;
3704 		newbie->ipsa_senslen = SADB_64TO8(sens->sadb_sens_sens_len);
3705 		newbie->ipsa_integlen = SADB_64TO8(sens->sadb_sens_integ_len);
3706 		newbie->ipsa_integ = kmem_alloc(newbie->ipsa_integlen,
3707 		    KM_NOSLEEP);
3708 		if (newbie->ipsa_integ == NULL) {
3709 			error = ENOMEM;
3710 			mutex_exit(&newbie->ipsa_lock);
3711 			goto error;
3712 		}
3713 		newbie->ipsa_sens = kmem_alloc(newbie->ipsa_senslen,
3714 		    KM_NOSLEEP);
3715 		if (newbie->ipsa_sens == NULL) {
3716 			error = ENOMEM;
3717 			mutex_exit(&newbie->ipsa_lock);
3718 			goto error;
3719 		}
3720 		for (i = 0; i < sens->sadb_sens_sens_len; i++) {
3721 			newbie->ipsa_sens[i] = *bitmap;
3722 			bitmap++;
3723 		}
3724 		for (i = 0; i < sens->sadb_sens_integ_len; i++) {
3725 			newbie->ipsa_integ[i] = *bitmap;
3726 			bitmap++;
3727 		}
3728 	}
3729 
3730 #endif
3731 
3732 	if (replayext != NULL) {
3733 		if ((replayext->sadb_x_rc_replay32 == 0) &&
3734 		    (replayext->sadb_x_rc_replay64 != 0)) {
3735 			error = EOPNOTSUPP;
3736 			*diagnostic = SADB_X_DIAGNOSTIC_INVALID_REPLAY;
3737 			mutex_exit(&newbie->ipsa_lock);
3738 			goto error;
3739 		}
3740 		newbie->ipsa_replay = replayext->sadb_x_rc_replay32;
3741 	}
3742 
3743 	/* now that the SA has been updated, set its new state */
3744 	newbie->ipsa_state = assoc->sadb_sa_state;
3745 
3746 	if (clone) {
3747 		newbie->ipsa_haspeer = B_TRUE;
3748 	} else {
3749 		if (!is_inbound) {
3750 			lifetime_fuzz(newbie);
3751 		}
3752 	}
3753 	/*
3754 	 * The less locks I hold when doing an insertion and possible cloning,
3755 	 * the better!
3756 	 */
3757 	mutex_exit(&newbie->ipsa_lock);
3758 
3759 	if (clone) {
3760 		newbie_clone = sadb_cloneassoc(newbie);
3761 
3762 		if (newbie_clone == NULL) {
3763 			error = ENOMEM;
3764 			goto error;
3765 		}
3766 	}
3767 
3768 	/*
3769 	 * Enter the bucket locks.  The order of entry is outbound,
3770 	 * inbound.  We map "primary" and "secondary" into outbound and inbound
3771 	 * based on the destination address type.  If the destination address
3772 	 * type is for a node that isn't mine (or potentially mine), the
3773 	 * "primary" bucket is the outbound one.
3774 	 */
3775 	if (!is_inbound) {
3776 		/* primary == outbound */
3777 		mutex_enter(&primary->isaf_lock);
3778 		mutex_enter(&secondary->isaf_lock);
3779 	} else {
3780 		/* primary == inbound */
3781 		mutex_enter(&secondary->isaf_lock);
3782 		mutex_enter(&primary->isaf_lock);
3783 	}
3784 
3785 	IPSECHW_DEBUG(IPSECHW_SADB, ("sadb_common_add: spi = 0x%x\n",
3786 	    newbie->ipsa_spi));
3787 
3788 	/*
3789 	 * sadb_insertassoc() doesn't increment the reference
3790 	 * count.  We therefore have to increment the
3791 	 * reference count one more time to reflect the
3792 	 * pointers of the table that reference this SA.
3793 	 */
3794 	IPSA_REFHOLD(newbie);
3795 
3796 	if (isupdate) {
3797 		/*
3798 		 * Unlink from larval holding cell in the "inbound" fanout.
3799 		 */
3800 		ASSERT(newbie->ipsa_linklock == &primary->isaf_lock ||
3801 		    newbie->ipsa_linklock == &secondary->isaf_lock);
3802 		sadb_unlinkassoc(newbie);
3803 	}
3804 
3805 	mutex_enter(&newbie->ipsa_lock);
3806 	error = sadb_insertassoc(newbie, primary);
3807 	if (error == 0) {
3808 		ctl_mp = sadb_fmt_sa_req(DL_CO_SET, newbie->ipsa_type, newbie,
3809 		    is_inbound);
3810 	}
3811 	mutex_exit(&newbie->ipsa_lock);
3812 
3813 	if (error != 0) {
3814 		/*
3815 		 * Since sadb_insertassoc() failed, we must decrement the
3816 		 * refcount again so the cleanup code will actually free
3817 		 * the offending SA.
3818 		 */
3819 		IPSA_REFRELE(newbie);
3820 		goto error_unlock;
3821 	}
3822 
3823 	if (newbie_clone != NULL) {
3824 		mutex_enter(&newbie_clone->ipsa_lock);
3825 		error = sadb_insertassoc(newbie_clone, secondary);
3826 		mutex_exit(&newbie_clone->ipsa_lock);
3827 		if (error != 0) {
3828 			/* Collision in secondary table. */
3829 			sadb_unlinkassoc(newbie);  /* This does REFRELE. */
3830 			goto error_unlock;
3831 		}
3832 		IPSA_REFHOLD(newbie_clone);
3833 	} else {
3834 		ASSERT(primary != secondary);
3835 		scratch = ipsec_getassocbyspi(secondary, newbie->ipsa_spi,
3836 		    ALL_ZEROES_PTR, newbie->ipsa_dstaddr, af);
3837 		if (scratch != NULL) {
3838 			/* Collision in secondary table. */
3839 			sadb_unlinkassoc(newbie);  /* This does REFRELE. */
3840 			/* Set the error, since ipsec_getassocbyspi() can't. */
3841 			error = EEXIST;
3842 			goto error_unlock;
3843 		}
3844 	}
3845 
3846 	/* OKAY!  So let's do some reality check assertions. */
3847 
3848 	ASSERT(MUTEX_NOT_HELD(&newbie->ipsa_lock));
3849 	ASSERT(newbie_clone == NULL ||
3850 	    (MUTEX_NOT_HELD(&newbie_clone->ipsa_lock)));
3851 	/*
3852 	 * If hardware acceleration could happen, send it.
3853 	 */
3854 	if (ctl_mp != NULL) {
3855 		putnext(ip_q, ctl_mp);
3856 		ctl_mp = NULL;
3857 	}
3858 
3859 error_unlock:
3860 
3861 	/*
3862 	 * We can exit the locks in any order.	Only entrance needs to
3863 	 * follow any protocol.
3864 	 */
3865 	mutex_exit(&secondary->isaf_lock);
3866 	mutex_exit(&primary->isaf_lock);
3867 
3868 	if (pair_ext != NULL && error == 0) {
3869 		/* update pair_spi if it exists. */
3870 		ipsapp = get_ipsa_pair(assoc, srcext, dstext, spp);
3871 		if (ipsapp == NULL) {
3872 			error = ESRCH;
3873 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
3874 		} else if (ipsapp->ipsap_psa_ptr != NULL) {
3875 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_ALREADY;
3876 			error = EINVAL;
3877 		} else {
3878 			/* update_pairing() sets diagnostic */
3879 			error = update_pairing(ipsapp, ksi, diagnostic, spp);
3880 		}
3881 	}
3882 	/* Common error point for this routine. */
3883 error:
3884 	if (newbie != NULL) {
3885 		if (error != 0) {
3886 			/* This SA is broken, let the reaper clean up. */
3887 			mutex_enter(&newbie->ipsa_lock);
3888 			newbie->ipsa_state = IPSA_STATE_DEAD;
3889 			newbie->ipsa_hardexpiretime = 1;
3890 			mutex_exit(&newbie->ipsa_lock);
3891 		}
3892 		IPSA_REFRELE(newbie);
3893 	}
3894 	if (newbie_clone != NULL) {
3895 		IPSA_REFRELE(newbie_clone);
3896 	}
3897 	if (ctl_mp != NULL)
3898 		freemsg(ctl_mp);
3899 
3900 	if (error == 0) {
3901 		/*
3902 		 * Construct favorable PF_KEY return message and send to
3903 		 * keysock. Update the flags in the original keysock message
3904 		 * to reflect the actual flags in the new SA.
3905 		 *  (Q:  Do I need to pass "newbie"?  If I do,
3906 		 * make sure to REFHOLD, call, then REFRELE.)
3907 		 */
3908 		assoc->sadb_sa_flags = newbie->ipsa_flags;
3909 		sadb_pfkey_echo(pfkey_q, mp, samsg, ksi, NULL);
3910 	}
3911 
3912 	destroy_ipsa_pair(ipsapp);
3913 	return (error);
3914 }
3915 
3916 /*
3917  * Set the time of first use for a security association.  Update any
3918  * expiration times as a result.
3919  */
3920 void
3921 sadb_set_usetime(ipsa_t *assoc)
3922 {
3923 	time_t snapshot = gethrestime_sec();
3924 
3925 	mutex_enter(&assoc->ipsa_lock);
3926 	assoc->ipsa_lastuse = snapshot;
3927 	assoc->ipsa_idleexpiretime = snapshot + assoc->ipsa_idletime;
3928 
3929 	/*
3930 	 * Caller does check usetime before calling me usually, and
3931 	 * double-checking is better than a mutex_enter/exit hit.
3932 	 */
3933 	if (assoc->ipsa_usetime == 0) {
3934 		/*
3935 		 * This is redundant for outbound SA's, as
3936 		 * ipsec_getassocbyconn() sets the IPSA_F_USED flag already.
3937 		 * Inbound SAs, however, have no such protection.
3938 		 */
3939 		assoc->ipsa_flags |= IPSA_F_USED;
3940 		assoc->ipsa_usetime = snapshot;
3941 
3942 		/*
3943 		 * After setting the use time, see if we have a use lifetime
3944 		 * that would cause the actual SA expiration time to shorten.
3945 		 */
3946 		UPDATE_EXPIRE(assoc, softuselt, softexpiretime);
3947 		UPDATE_EXPIRE(assoc, harduselt, hardexpiretime);
3948 	}
3949 	mutex_exit(&assoc->ipsa_lock);
3950 }
3951 
3952 /*
3953  * Send up a PF_KEY expire message for this association.
3954  */
3955 static void
3956 sadb_expire_assoc(queue_t *pfkey_q, ipsa_t *assoc)
3957 {
3958 	mblk_t *mp, *mp1;
3959 	int alloclen, af;
3960 	sadb_msg_t *samsg;
3961 	sadb_lifetime_t *current, *expire;
3962 	sadb_sa_t *saext;
3963 	uint8_t *end;
3964 	boolean_t tunnel_mode;
3965 
3966 	ASSERT(MUTEX_HELD(&assoc->ipsa_lock));
3967 
3968 	/* Don't bother sending if there's no queue. */
3969 	if (pfkey_q == NULL)
3970 		return;
3971 
3972 	/* If the SA is one of a pair, only SOFT expire the OUTBOUND SA */
3973 	if (assoc->ipsa_state == IPSA_STATE_DYING &&
3974 	    (assoc->ipsa_flags & IPSA_F_PAIRED) &&
3975 	    !(assoc->ipsa_flags & IPSA_F_OUTBOUND)) {
3976 		return;
3977 	}
3978 
3979 	mp = sadb_keysock_out(0);
3980 	if (mp == NULL) {
3981 		/* cmn_err(CE_WARN, */
3982 		/*	"sadb_expire_assoc: Can't allocate KEYSOCK_OUT.\n"); */
3983 		return;
3984 	}
3985 
3986 	alloclen = sizeof (*samsg) + sizeof (*current) + sizeof (*expire) +
3987 	    2 * sizeof (sadb_address_t) + sizeof (*saext);
3988 
3989 	af = assoc->ipsa_addrfam;
3990 	switch (af) {
3991 	case AF_INET:
3992 		alloclen += 2 * sizeof (struct sockaddr_in);
3993 		break;
3994 	case AF_INET6:
3995 		alloclen += 2 * sizeof (struct sockaddr_in6);
3996 		break;
3997 	default:
3998 		/* Won't happen unless there's a kernel bug. */
3999 		freeb(mp);
4000 		cmn_err(CE_WARN,
4001 		    "sadb_expire_assoc: Unknown address length.\n");
4002 		return;
4003 	}
4004 
4005 	tunnel_mode = (assoc->ipsa_flags & IPSA_F_TUNNEL);
4006 	if (tunnel_mode) {
4007 		alloclen += 2 * sizeof (sadb_address_t);
4008 		switch (assoc->ipsa_innerfam) {
4009 		case AF_INET:
4010 			alloclen += 2 * sizeof (struct sockaddr_in);
4011 			break;
4012 		case AF_INET6:
4013 			alloclen += 2 * sizeof (struct sockaddr_in6);
4014 			break;
4015 		default:
4016 			/* Won't happen unless there's a kernel bug. */
4017 			freeb(mp);
4018 			cmn_err(CE_WARN, "sadb_expire_assoc: "
4019 			    "Unknown inner address length.\n");
4020 			return;
4021 		}
4022 	}
4023 
4024 	mp->b_cont = allocb(alloclen, BPRI_HI);
4025 	if (mp->b_cont == NULL) {
4026 		freeb(mp);
4027 		/* cmn_err(CE_WARN, */
4028 		/*	"sadb_expire_assoc: Can't allocate message.\n"); */
4029 		return;
4030 	}
4031 
4032 	mp1 = mp;
4033 	mp = mp->b_cont;
4034 	end = mp->b_wptr + alloclen;
4035 
4036 	samsg = (sadb_msg_t *)mp->b_wptr;
4037 	mp->b_wptr += sizeof (*samsg);
4038 	samsg->sadb_msg_version = PF_KEY_V2;
4039 	samsg->sadb_msg_type = SADB_EXPIRE;
4040 	samsg->sadb_msg_errno = 0;
4041 	samsg->sadb_msg_satype = assoc->ipsa_type;
4042 	samsg->sadb_msg_len = SADB_8TO64(alloclen);
4043 	samsg->sadb_msg_reserved = 0;
4044 	samsg->sadb_msg_seq = 0;
4045 	samsg->sadb_msg_pid = 0;
4046 
4047 	saext = (sadb_sa_t *)mp->b_wptr;
4048 	mp->b_wptr += sizeof (*saext);
4049 	saext->sadb_sa_len = SADB_8TO64(sizeof (*saext));
4050 	saext->sadb_sa_exttype = SADB_EXT_SA;
4051 	saext->sadb_sa_spi = assoc->ipsa_spi;
4052 	saext->sadb_sa_replay = assoc->ipsa_replay_wsize;
4053 	saext->sadb_sa_state = assoc->ipsa_state;
4054 	saext->sadb_sa_auth = assoc->ipsa_auth_alg;
4055 	saext->sadb_sa_encrypt = assoc->ipsa_encr_alg;
4056 	saext->sadb_sa_flags = assoc->ipsa_flags;
4057 
4058 	current = (sadb_lifetime_t *)mp->b_wptr;
4059 	mp->b_wptr += sizeof (sadb_lifetime_t);
4060 	current->sadb_lifetime_len = SADB_8TO64(sizeof (*current));
4061 	current->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
4062 	/* We do not support the concept. */
4063 	current->sadb_lifetime_allocations = 0;
4064 	current->sadb_lifetime_bytes = assoc->ipsa_bytes;
4065 	current->sadb_lifetime_addtime = assoc->ipsa_addtime;
4066 	current->sadb_lifetime_usetime = assoc->ipsa_usetime;
4067 
4068 	expire = (sadb_lifetime_t *)mp->b_wptr;
4069 	mp->b_wptr += sizeof (*expire);
4070 	expire->sadb_lifetime_len = SADB_8TO64(sizeof (*expire));
4071 
4072 	if (assoc->ipsa_state == IPSA_STATE_DEAD) {
4073 		expire->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
4074 		expire->sadb_lifetime_allocations = assoc->ipsa_hardalloc;
4075 		expire->sadb_lifetime_bytes = assoc->ipsa_hardbyteslt;
4076 		expire->sadb_lifetime_addtime = assoc->ipsa_hardaddlt;
4077 		expire->sadb_lifetime_usetime = assoc->ipsa_harduselt;
4078 	} else if (assoc->ipsa_state == IPSA_STATE_DYING) {
4079 		expire->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
4080 		expire->sadb_lifetime_allocations = assoc->ipsa_softalloc;
4081 		expire->sadb_lifetime_bytes = assoc->ipsa_softbyteslt;
4082 		expire->sadb_lifetime_addtime = assoc->ipsa_softaddlt;
4083 		expire->sadb_lifetime_usetime = assoc->ipsa_softuselt;
4084 	} else {
4085 		ASSERT(assoc->ipsa_state == IPSA_STATE_MATURE);
4086 		expire->sadb_lifetime_exttype = SADB_X_EXT_LIFETIME_IDLE;
4087 		expire->sadb_lifetime_allocations = 0;
4088 		expire->sadb_lifetime_bytes = 0;
4089 		expire->sadb_lifetime_addtime = assoc->ipsa_idleaddlt;
4090 		expire->sadb_lifetime_usetime = assoc->ipsa_idleuselt;
4091 	}
4092 
4093 	mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end, SADB_EXT_ADDRESS_SRC,
4094 	    af, assoc->ipsa_srcaddr, tunnel_mode ? 0 : SA_SRCPORT(assoc),
4095 	    SA_PROTO(assoc), 0);
4096 	ASSERT(mp->b_wptr != NULL);
4097 
4098 	mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end, SADB_EXT_ADDRESS_DST,
4099 	    af, assoc->ipsa_dstaddr, tunnel_mode ? 0 : SA_DSTPORT(assoc),
4100 	    SA_PROTO(assoc), 0);
4101 	ASSERT(mp->b_wptr != NULL);
4102 
4103 	if (tunnel_mode) {
4104 		mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end,
4105 		    SADB_X_EXT_ADDRESS_INNER_SRC, assoc->ipsa_innerfam,
4106 		    assoc->ipsa_innersrc, SA_SRCPORT(assoc), SA_IPROTO(assoc),
4107 		    assoc->ipsa_innersrcpfx);
4108 		ASSERT(mp->b_wptr != NULL);
4109 		mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end,
4110 		    SADB_X_EXT_ADDRESS_INNER_DST, assoc->ipsa_innerfam,
4111 		    assoc->ipsa_innerdst, SA_DSTPORT(assoc), SA_IPROTO(assoc),
4112 		    assoc->ipsa_innerdstpfx);
4113 		ASSERT(mp->b_wptr != NULL);
4114 	}
4115 
4116 	/* Can just putnext, we're ready to go! */
4117 	putnext(pfkey_q, mp1);
4118 }
4119 
4120 /*
4121  * "Age" the SA with the number of bytes that was used to protect traffic.
4122  * Send an SADB_EXPIRE message if appropriate.	Return B_TRUE if there was
4123  * enough "charge" left in the SA to protect the data.	Return B_FALSE
4124  * otherwise.  (If B_FALSE is returned, the association either was, or became
4125  * DEAD.)
4126  */
4127 boolean_t
4128 sadb_age_bytes(queue_t *pfkey_q, ipsa_t *assoc, uint64_t bytes,
4129     boolean_t sendmsg)
4130 {
4131 	boolean_t rc = B_TRUE;
4132 	uint64_t newtotal;
4133 
4134 	mutex_enter(&assoc->ipsa_lock);
4135 	newtotal = assoc->ipsa_bytes + bytes;
4136 	if (assoc->ipsa_hardbyteslt != 0 &&
4137 	    newtotal >= assoc->ipsa_hardbyteslt) {
4138 		if (assoc->ipsa_state != IPSA_STATE_DEAD) {
4139 			sadb_delete_cluster(assoc);
4140 			/*
4141 			 * Send EXPIRE message to PF_KEY.  May wish to pawn
4142 			 * this off on another non-interrupt thread.  Also
4143 			 * unlink this SA immediately.
4144 			 */
4145 			assoc->ipsa_state = IPSA_STATE_DEAD;
4146 			if (sendmsg)
4147 				sadb_expire_assoc(pfkey_q, assoc);
4148 			/*
4149 			 * Set non-zero expiration time so sadb_age_assoc()
4150 			 * will work when reaping.
4151 			 */
4152 			assoc->ipsa_hardexpiretime = (time_t)1;
4153 		} /* Else someone beat me to it! */
4154 		rc = B_FALSE;
4155 	} else if (assoc->ipsa_softbyteslt != 0 &&
4156 	    (newtotal >= assoc->ipsa_softbyteslt)) {
4157 		if (assoc->ipsa_state < IPSA_STATE_DYING) {
4158 			/*
4159 			 * Send EXPIRE message to PF_KEY.  May wish to pawn
4160 			 * this off on another non-interrupt thread.
4161 			 */
4162 			assoc->ipsa_state = IPSA_STATE_DYING;
4163 			assoc->ipsa_bytes = newtotal;
4164 			if (sendmsg)
4165 				sadb_expire_assoc(pfkey_q, assoc);
4166 		} /* Else someone beat me to it! */
4167 	}
4168 	if (rc == B_TRUE)
4169 		assoc->ipsa_bytes = newtotal;
4170 	mutex_exit(&assoc->ipsa_lock);
4171 	return (rc);
4172 }
4173 
4174 /*
4175  * Push one or more DL_CO_DELETE messages queued up by
4176  * sadb_torch_assoc down to the underlying driver now that it's a
4177  * convenient time for it (i.e., ipsa bucket locks not held).
4178  */
4179 static void
4180 sadb_drain_torchq(queue_t *q, mblk_t *mp)
4181 {
4182 	while (mp != NULL) {
4183 		mblk_t *next = mp->b_next;
4184 		mp->b_next = NULL;
4185 		if (q != NULL)
4186 			putnext(q, mp);
4187 		else
4188 			freemsg(mp);
4189 		mp = next;
4190 	}
4191 }
4192 
4193 /*
4194  * "Torch" an individual SA.  Returns NULL, so it can be tail-called from
4195  *     sadb_age_assoc().
4196  *
4197  * If SA is hardware-accelerated, and we can't allocate the mblk
4198  * containing the DL_CO_DELETE, just return; it will remain in the
4199  * table and be swept up by sadb_ager() in a subsequent pass.
4200  */
4201 static ipsa_t *
4202 sadb_torch_assoc(isaf_t *head, ipsa_t *sa, boolean_t inbnd, mblk_t **mq)
4203 {
4204 	mblk_t *mp;
4205 
4206 	ASSERT(MUTEX_HELD(&head->isaf_lock));
4207 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
4208 	ASSERT(sa->ipsa_state == IPSA_STATE_DEAD);
4209 
4210 	/*
4211 	 * Force cached SAs to be revalidated..
4212 	 */
4213 	head->isaf_gen++;
4214 
4215 	if (sa->ipsa_flags & IPSA_F_HW) {
4216 		mp = sadb_fmt_sa_req(DL_CO_DELETE, sa->ipsa_type, sa, inbnd);
4217 		if (mp == NULL) {
4218 			mutex_exit(&sa->ipsa_lock);
4219 			return (NULL);
4220 		}
4221 		mp->b_next = *mq;
4222 		*mq = mp;
4223 	}
4224 	mutex_exit(&sa->ipsa_lock);
4225 	sadb_unlinkassoc(sa);
4226 
4227 	return (NULL);
4228 }
4229 
4230 /*
4231  * Do various SA-is-idle activities depending on delta (the number of idle
4232  * seconds on the SA) and/or other properties of the SA.
4233  *
4234  * Return B_TRUE if I've sent a packet, because I have to drop the
4235  * association's mutex before sending a packet out the wire.
4236  */
4237 /* ARGSUSED */
4238 static boolean_t
4239 sadb_idle_activities(ipsa_t *assoc, time_t delta, boolean_t inbound)
4240 {
4241 	ipsecesp_stack_t *espstack = assoc->ipsa_netstack->netstack_ipsecesp;
4242 	int nat_t_interval = espstack->ipsecesp_nat_keepalive_interval;
4243 
4244 	ASSERT(MUTEX_HELD(&assoc->ipsa_lock));
4245 
4246 	if (!inbound && (assoc->ipsa_flags & IPSA_F_NATT_LOC) &&
4247 	    delta >= nat_t_interval &&
4248 	    gethrestime_sec() - assoc->ipsa_last_nat_t_ka >= nat_t_interval) {
4249 		ASSERT(assoc->ipsa_type == SADB_SATYPE_ESP);
4250 		assoc->ipsa_last_nat_t_ka = gethrestime_sec();
4251 		mutex_exit(&assoc->ipsa_lock);
4252 		ipsecesp_send_keepalive(assoc);
4253 		return (B_TRUE);
4254 	}
4255 	return (B_FALSE);
4256 }
4257 
4258 /*
4259  * Return "assoc" if haspeer is true and I send an expire.  This allows
4260  * the consumers' aging functions to tidy up an expired SA's peer.
4261  */
4262 static ipsa_t *
4263 sadb_age_assoc(isaf_t *head, queue_t *pfkey_q, ipsa_t *assoc,
4264     time_t current, int reap_delay, boolean_t inbound, mblk_t **mq)
4265 {
4266 	ipsa_t *retval = NULL;
4267 	boolean_t dropped_mutex = B_FALSE;
4268 
4269 	ASSERT(MUTEX_HELD(&head->isaf_lock));
4270 
4271 	mutex_enter(&assoc->ipsa_lock);
4272 
4273 	if (((assoc->ipsa_state == IPSA_STATE_LARVAL) ||
4274 	    ((assoc->ipsa_state == IPSA_STATE_IDLE) ||
4275 	    (assoc->ipsa_state == IPSA_STATE_ACTIVE_ELSEWHERE) &&
4276 	    (assoc->ipsa_hardexpiretime != 0))) &&
4277 	    (assoc->ipsa_hardexpiretime <= current)) {
4278 		assoc->ipsa_state = IPSA_STATE_DEAD;
4279 		return (sadb_torch_assoc(head, assoc, inbound, mq));
4280 	}
4281 
4282 	/*
4283 	 * Check lifetimes.  Fortunately, SA setup is done
4284 	 * such that there are only two times to look at,
4285 	 * softexpiretime, and hardexpiretime.
4286 	 *
4287 	 * Check hard first.
4288 	 */
4289 
4290 	if (assoc->ipsa_hardexpiretime != 0 &&
4291 	    assoc->ipsa_hardexpiretime <= current) {
4292 		if (assoc->ipsa_state == IPSA_STATE_DEAD)
4293 			return (sadb_torch_assoc(head, assoc, inbound, mq));
4294 
4295 		if (inbound) {
4296 			sadb_delete_cluster(assoc);
4297 		}
4298 
4299 		/*
4300 		 * Send SADB_EXPIRE with hard lifetime, delay for unlinking.
4301 		 */
4302 		assoc->ipsa_state = IPSA_STATE_DEAD;
4303 		if (assoc->ipsa_haspeer || assoc->ipsa_otherspi != 0) {
4304 			/*
4305 			 * If the SA is paired or peered with another, put
4306 			 * a copy on a list which can be processed later, the
4307 			 * pair/peer SA needs to be updated so the both die
4308 			 * at the same time.
4309 			 *
4310 			 * If I return assoc, I have to bump up its reference
4311 			 * count to keep with the ipsa_t reference count
4312 			 * semantics.
4313 			 */
4314 			IPSA_REFHOLD(assoc);
4315 			retval = assoc;
4316 		}
4317 		sadb_expire_assoc(pfkey_q, assoc);
4318 		assoc->ipsa_hardexpiretime = current + reap_delay;
4319 	} else if (assoc->ipsa_softexpiretime != 0 &&
4320 	    assoc->ipsa_softexpiretime <= current &&
4321 	    assoc->ipsa_state < IPSA_STATE_DYING) {
4322 		/*
4323 		 * Send EXPIRE message to PF_KEY.  May wish to pawn
4324 		 * this off on another non-interrupt thread.
4325 		 */
4326 		assoc->ipsa_state = IPSA_STATE_DYING;
4327 		if (assoc->ipsa_haspeer) {
4328 			/*
4329 			 * If the SA has a peer, update the peer's state
4330 			 * on SOFT_EXPIRE, this is mostly to prevent two
4331 			 * expire messages from effectively the same SA.
4332 			 *
4333 			 * Don't care about paired SA's, then can (and should)
4334 			 * be able to soft expire at different times.
4335 			 *
4336 			 * If I return assoc, I have to bump up its
4337 			 * reference count to keep with the ipsa_t reference
4338 			 * count semantics.
4339 			 */
4340 			IPSA_REFHOLD(assoc);
4341 			retval = assoc;
4342 		}
4343 		sadb_expire_assoc(pfkey_q, assoc);
4344 	} else if (assoc->ipsa_idletime != 0 &&
4345 	    assoc->ipsa_idleexpiretime <= current) {
4346 		if (assoc->ipsa_state == IPSA_STATE_ACTIVE_ELSEWHERE) {
4347 			assoc->ipsa_state = IPSA_STATE_IDLE;
4348 		}
4349 
4350 		/*
4351 		 * Need to handle Mature case
4352 		 */
4353 		if (assoc->ipsa_state == IPSA_STATE_MATURE) {
4354 			sadb_expire_assoc(pfkey_q, assoc);
4355 		}
4356 	} else {
4357 		/* Check idle time activities. */
4358 		dropped_mutex = sadb_idle_activities(assoc,
4359 		    current - assoc->ipsa_lastuse, inbound);
4360 	}
4361 
4362 	if (!dropped_mutex)
4363 		mutex_exit(&assoc->ipsa_lock);
4364 	return (retval);
4365 }
4366 
4367 /*
4368  * Called by a consumer protocol to do ther dirty work of reaping dead
4369  * Security Associations.
4370  *
4371  * NOTE: sadb_age_assoc() marks expired SA's as DEAD but only removed
4372  * SA's that are already marked DEAD, so expired SA's are only reaped
4373  * the second time sadb_ager() runs.
4374  */
4375 void
4376 sadb_ager(sadb_t *sp, queue_t *pfkey_q, queue_t *ip_q, int reap_delay,
4377     netstack_t *ns)
4378 {
4379 	int i;
4380 	isaf_t *bucket;
4381 	ipsa_t *assoc, *spare;
4382 	iacqf_t *acqlist;
4383 	ipsacq_t *acqrec, *spareacq;
4384 	templist_t *haspeerlist, *newbie;
4385 	/* Snapshot current time now. */
4386 	time_t current = gethrestime_sec();
4387 	mblk_t *mq = NULL;
4388 	haspeerlist = NULL;
4389 
4390 	/*
4391 	 * Do my dirty work.  This includes aging real entries, aging
4392 	 * larvals, and aging outstanding ACQUIREs.
4393 	 *
4394 	 * I hope I don't tie up resources for too long.
4395 	 */
4396 
4397 	/* Age acquires. */
4398 
4399 	for (i = 0; i < sp->sdb_hashsize; i++) {
4400 		acqlist = &sp->sdb_acq[i];
4401 		mutex_enter(&acqlist->iacqf_lock);
4402 		for (acqrec = acqlist->iacqf_ipsacq; acqrec != NULL;
4403 		    acqrec = spareacq) {
4404 			spareacq = acqrec->ipsacq_next;
4405 			if (current > acqrec->ipsacq_expire)
4406 				sadb_destroy_acquire(acqrec, ns);
4407 		}
4408 		mutex_exit(&acqlist->iacqf_lock);
4409 	}
4410 
4411 	/* Age inbound associations. */
4412 	for (i = 0; i < sp->sdb_hashsize; i++) {
4413 		bucket = &(sp->sdb_if[i]);
4414 		mutex_enter(&bucket->isaf_lock);
4415 		for (assoc = bucket->isaf_ipsa; assoc != NULL;
4416 		    assoc = spare) {
4417 			spare = assoc->ipsa_next;
4418 			if (sadb_age_assoc(bucket, pfkey_q, assoc, current,
4419 			    reap_delay, B_TRUE, &mq) != NULL) {
4420 				/*
4421 				 * Put SA's which have a peer or SA's which
4422 				 * are paired on a list for processing after
4423 				 * all the hash tables have been walked.
4424 				 *
4425 				 * sadb_age_assoc() increments the refcnt,
4426 				 * effectively doing an IPSA_REFHOLD().
4427 				 */
4428 				newbie = kmem_alloc(sizeof (*newbie),
4429 				    KM_NOSLEEP);
4430 				if (newbie == NULL) {
4431 					/*
4432 					 * Don't forget to REFRELE().
4433 					 */
4434 					IPSA_REFRELE(assoc);
4435 					continue;	/* for loop... */
4436 				}
4437 				newbie->next = haspeerlist;
4438 				newbie->ipsa = assoc;
4439 				haspeerlist = newbie;
4440 			}
4441 		}
4442 		mutex_exit(&bucket->isaf_lock);
4443 	}
4444 
4445 	if (mq != NULL) {
4446 		sadb_drain_torchq(ip_q, mq);
4447 		mq = NULL;
4448 	}
4449 	age_pair_peer_list(haspeerlist, sp, B_FALSE);
4450 	haspeerlist = NULL;
4451 
4452 	/* Age outbound associations. */
4453 	for (i = 0; i < sp->sdb_hashsize; i++) {
4454 		bucket = &(sp->sdb_of[i]);
4455 		mutex_enter(&bucket->isaf_lock);
4456 		for (assoc = bucket->isaf_ipsa; assoc != NULL;
4457 		    assoc = spare) {
4458 			spare = assoc->ipsa_next;
4459 			if (sadb_age_assoc(bucket, pfkey_q, assoc, current,
4460 			    reap_delay, B_FALSE, &mq) != NULL) {
4461 				/*
4462 				 * sadb_age_assoc() increments the refcnt,
4463 				 * effectively doing an IPSA_REFHOLD().
4464 				 */
4465 				newbie = kmem_alloc(sizeof (*newbie),
4466 				    KM_NOSLEEP);
4467 				if (newbie == NULL) {
4468 					/*
4469 					 * Don't forget to REFRELE().
4470 					 */
4471 					IPSA_REFRELE(assoc);
4472 					continue;	/* for loop... */
4473 				}
4474 				newbie->next = haspeerlist;
4475 				newbie->ipsa = assoc;
4476 				haspeerlist = newbie;
4477 			}
4478 		}
4479 		mutex_exit(&bucket->isaf_lock);
4480 	}
4481 	if (mq != NULL) {
4482 		sadb_drain_torchq(ip_q, mq);
4483 		mq = NULL;
4484 	}
4485 
4486 	age_pair_peer_list(haspeerlist, sp, B_TRUE);
4487 
4488 	/*
4489 	 * Run a GC pass to clean out dead identities.
4490 	 */
4491 	ipsid_gc(ns);
4492 }
4493 
4494 /*
4495  * Figure out when to reschedule the ager.
4496  */
4497 timeout_id_t
4498 sadb_retimeout(hrtime_t begin, queue_t *pfkey_q, void (*ager)(void *),
4499     void *agerarg, uint_t *intp, uint_t intmax, short mid)
4500 {
4501 	hrtime_t end = gethrtime();
4502 	uint_t interval = *intp;
4503 
4504 	/*
4505 	 * See how long this took.  If it took too long, increase the
4506 	 * aging interval.
4507 	 */
4508 	if ((end - begin) > (hrtime_t)interval * (hrtime_t)1000000) {
4509 		if (interval >= intmax) {
4510 			/* XXX Rate limit this?  Or recommend flush? */
4511 			(void) strlog(mid, 0, 0, SL_ERROR | SL_WARN,
4512 			    "Too many SA's to age out in %d msec.\n",
4513 			    intmax);
4514 		} else {
4515 			/* Double by shifting by one bit. */
4516 			interval <<= 1;
4517 			interval = min(interval, intmax);
4518 		}
4519 	} else if ((end - begin) <= (hrtime_t)interval * (hrtime_t)500000 &&
4520 	    interval > SADB_AGE_INTERVAL_DEFAULT) {
4521 		/*
4522 		 * If I took less than half of the interval, then I should
4523 		 * ratchet the interval back down.  Never automatically
4524 		 * shift below the default aging interval.
4525 		 *
4526 		 * NOTE:This even overrides manual setting of the age
4527 		 *	interval using NDD to lower the setting past the
4528 		 *	default.  In other words, if you set the interval
4529 		 *	lower than the default, and your SADB gets too big,
4530 		 *	the interval will only self-lower back to the default.
4531 		 */
4532 		/* Halve by shifting one bit. */
4533 		interval >>= 1;
4534 		interval = max(interval, SADB_AGE_INTERVAL_DEFAULT);
4535 	}
4536 	*intp = interval;
4537 	return (qtimeout(pfkey_q, ager, agerarg,
4538 	    drv_usectohz(interval * 1000)));
4539 }
4540 
4541 
4542 /*
4543  * Update the lifetime values of an SA.	 This is the path an SADB_UPDATE
4544  * message takes when updating a MATURE or DYING SA.
4545  */
4546 static void
4547 sadb_update_lifetimes(ipsa_t *assoc, sadb_lifetime_t *hard,
4548     sadb_lifetime_t *soft, sadb_lifetime_t *idle, boolean_t outbound)
4549 {
4550 	mutex_enter(&assoc->ipsa_lock);
4551 
4552 	/*
4553 	 * XXX RFC 2367 mentions how an SADB_EXT_LIFETIME_CURRENT can be
4554 	 * passed in during an update message.	We currently don't handle
4555 	 * these.
4556 	 */
4557 
4558 	if (hard != NULL) {
4559 		if (hard->sadb_lifetime_bytes != 0)
4560 			assoc->ipsa_hardbyteslt = hard->sadb_lifetime_bytes;
4561 		if (hard->sadb_lifetime_usetime != 0)
4562 			assoc->ipsa_harduselt = hard->sadb_lifetime_usetime;
4563 		if (hard->sadb_lifetime_addtime != 0)
4564 			assoc->ipsa_hardaddlt = hard->sadb_lifetime_addtime;
4565 		if (assoc->ipsa_hardaddlt != 0) {
4566 			assoc->ipsa_hardexpiretime =
4567 			    assoc->ipsa_addtime + assoc->ipsa_hardaddlt;
4568 		}
4569 		if (assoc->ipsa_harduselt != 0 &&
4570 		    assoc->ipsa_flags & IPSA_F_USED) {
4571 			UPDATE_EXPIRE(assoc, harduselt, hardexpiretime);
4572 		}
4573 		if (hard->sadb_lifetime_allocations != 0)
4574 			assoc->ipsa_hardalloc = hard->sadb_lifetime_allocations;
4575 	}
4576 
4577 	if (soft != NULL) {
4578 		if (soft->sadb_lifetime_bytes != 0) {
4579 			if (soft->sadb_lifetime_bytes >
4580 			    assoc->ipsa_hardbyteslt) {
4581 				assoc->ipsa_softbyteslt =
4582 				    assoc->ipsa_hardbyteslt;
4583 			} else {
4584 				assoc->ipsa_softbyteslt =
4585 				    soft->sadb_lifetime_bytes;
4586 			}
4587 		}
4588 		if (soft->sadb_lifetime_usetime != 0) {
4589 			if (soft->sadb_lifetime_usetime >
4590 			    assoc->ipsa_harduselt) {
4591 				assoc->ipsa_softuselt =
4592 				    assoc->ipsa_harduselt;
4593 			} else {
4594 				assoc->ipsa_softuselt =
4595 				    soft->sadb_lifetime_usetime;
4596 			}
4597 		}
4598 		if (soft->sadb_lifetime_addtime != 0) {
4599 			if (soft->sadb_lifetime_addtime >
4600 			    assoc->ipsa_hardexpiretime) {
4601 				assoc->ipsa_softexpiretime =
4602 				    assoc->ipsa_hardexpiretime;
4603 			} else {
4604 				assoc->ipsa_softaddlt =
4605 				    soft->sadb_lifetime_addtime;
4606 			}
4607 		}
4608 		if (assoc->ipsa_softaddlt != 0) {
4609 			assoc->ipsa_softexpiretime =
4610 			    assoc->ipsa_addtime + assoc->ipsa_softaddlt;
4611 		}
4612 		if (assoc->ipsa_softuselt != 0 &&
4613 		    assoc->ipsa_flags & IPSA_F_USED) {
4614 			UPDATE_EXPIRE(assoc, softuselt, softexpiretime);
4615 		}
4616 		if (outbound && assoc->ipsa_softexpiretime != 0) {
4617 			if (assoc->ipsa_state == IPSA_STATE_MATURE)
4618 				lifetime_fuzz(assoc);
4619 		}
4620 
4621 		if (soft->sadb_lifetime_allocations != 0)
4622 			assoc->ipsa_softalloc = soft->sadb_lifetime_allocations;
4623 	}
4624 
4625 	if (idle != NULL) {
4626 		time_t current = gethrestime_sec();
4627 		if ((assoc->ipsa_idleexpiretime <= current) &&
4628 		    (assoc->ipsa_idleaddlt == idle->sadb_lifetime_addtime)) {
4629 			assoc->ipsa_idleexpiretime =
4630 			    current + assoc->ipsa_idleaddlt;
4631 		}
4632 		if (idle->sadb_lifetime_addtime != 0)
4633 			assoc->ipsa_idleaddlt = idle->sadb_lifetime_addtime;
4634 		if (idle->sadb_lifetime_usetime != 0)
4635 			assoc->ipsa_idleuselt = idle->sadb_lifetime_usetime;
4636 		if (assoc->ipsa_idleaddlt != 0) {
4637 			assoc->ipsa_idleexpiretime =
4638 			    current + idle->sadb_lifetime_addtime;
4639 			assoc->ipsa_idletime = idle->sadb_lifetime_addtime;
4640 		}
4641 		if (assoc->ipsa_idleuselt != 0) {
4642 			if (assoc->ipsa_idletime != 0) {
4643 				assoc->ipsa_idletime = min(assoc->ipsa_idletime,
4644 				    assoc->ipsa_idleuselt);
4645 			assoc->ipsa_idleexpiretime =
4646 			    current + assoc->ipsa_idletime;
4647 			} else {
4648 				assoc->ipsa_idleexpiretime =
4649 				    current + assoc->ipsa_idleuselt;
4650 				assoc->ipsa_idletime = assoc->ipsa_idleuselt;
4651 			}
4652 		}
4653 	}
4654 	mutex_exit(&assoc->ipsa_lock);
4655 }
4656 
4657 static int
4658 sadb_update_state(ipsa_t *assoc, uint_t new_state, mblk_t **ipkt_lst)
4659 {
4660 	int rcode = 0;
4661 	time_t current = gethrestime_sec();
4662 
4663 	mutex_enter(&assoc->ipsa_lock);
4664 
4665 	switch (new_state) {
4666 	case SADB_X_SASTATE_ACTIVE_ELSEWHERE:
4667 		if (assoc->ipsa_state == SADB_X_SASTATE_IDLE) {
4668 			assoc->ipsa_state = IPSA_STATE_ACTIVE_ELSEWHERE;
4669 			assoc->ipsa_idleexpiretime =
4670 			    current + assoc->ipsa_idletime;
4671 		}
4672 		break;
4673 	case SADB_X_SASTATE_IDLE:
4674 		if (assoc->ipsa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE) {
4675 			assoc->ipsa_state = IPSA_STATE_IDLE;
4676 			assoc->ipsa_idleexpiretime =
4677 			    current + assoc->ipsa_idletime;
4678 		} else {
4679 			rcode = EINVAL;
4680 		}
4681 		break;
4682 
4683 	case SADB_X_SASTATE_ACTIVE:
4684 		if (assoc->ipsa_state != SADB_X_SASTATE_IDLE) {
4685 			rcode = EINVAL;
4686 			break;
4687 		}
4688 		assoc->ipsa_state = IPSA_STATE_MATURE;
4689 		assoc->ipsa_idleexpiretime = current + assoc->ipsa_idletime;
4690 
4691 		if (ipkt_lst == NULL) {
4692 			break;
4693 		}
4694 
4695 		if (assoc->ipsa_bpkt_head != NULL) {
4696 			*ipkt_lst = assoc->ipsa_bpkt_head;
4697 			assoc->ipsa_bpkt_head = assoc->ipsa_bpkt_tail = NULL;
4698 			assoc->ipsa_mblkcnt = 0;
4699 		} else {
4700 			*ipkt_lst = NULL;
4701 		}
4702 		break;
4703 	default:
4704 		rcode = EINVAL;
4705 		break;
4706 	}
4707 
4708 	mutex_exit(&assoc->ipsa_lock);
4709 	return (rcode);
4710 }
4711 
4712 /*
4713  * Common code to update an SA.
4714  */
4715 
4716 int
4717 sadb_update_sa(mblk_t *mp, keysock_in_t *ksi, mblk_t **ipkt_lst,
4718     sadbp_t *spp, int *diagnostic, queue_t *pfkey_q,
4719     int (*add_sa_func)(mblk_t *, keysock_in_t *, int *, netstack_t *),
4720     netstack_t *ns, uint8_t sadb_msg_type)
4721 {
4722 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
4723 	sadb_address_t *srcext =
4724 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
4725 	sadb_address_t *dstext =
4726 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
4727 	sadb_x_kmc_t *kmcext =
4728 	    (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
4729 	sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
4730 	sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
4731 	sadb_x_replay_ctr_t *replext =
4732 	    (sadb_x_replay_ctr_t *)ksi->ks_in_extv[SADB_X_EXT_REPLAY_VALUE];
4733 	sadb_lifetime_t *soft =
4734 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
4735 	sadb_lifetime_t *hard =
4736 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
4737 	sadb_lifetime_t *idle =
4738 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE];
4739 	sadb_x_pair_t *pair_ext =
4740 	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
4741 	ipsa_t *echo_target = NULL;
4742 	int error = 0;
4743 	ipsap_t *ipsapp = NULL;
4744 	uint32_t kmp = 0, kmc = 0;
4745 	time_t current = gethrestime_sec();
4746 
4747 
4748 	/* I need certain extensions present for either UPDATE message. */
4749 	if (srcext == NULL) {
4750 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
4751 		return (EINVAL);
4752 	}
4753 	if (dstext == NULL) {
4754 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
4755 		return (EINVAL);
4756 	}
4757 	if (assoc == NULL) {
4758 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
4759 		return (EINVAL);
4760 	}
4761 
4762 	if (kmcext != NULL) {
4763 		kmp = kmcext->sadb_x_kmc_proto;
4764 		kmc = kmcext->sadb_x_kmc_cookie;
4765 	}
4766 
4767 	ipsapp = get_ipsa_pair(assoc, srcext, dstext, spp);
4768 	if (ipsapp == NULL) {
4769 		*diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND;
4770 		return (ESRCH);
4771 	}
4772 
4773 	if (ipsapp->ipsap_psa_ptr == NULL && ipsapp->ipsap_sa_ptr != NULL) {
4774 		if (ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) {
4775 			/*
4776 			 * REFRELE the target and let the add_sa_func()
4777 			 * deal with updating a larval SA.
4778 			 */
4779 			destroy_ipsa_pair(ipsapp);
4780 			return (add_sa_func(mp, ksi, diagnostic, ns));
4781 		}
4782 	}
4783 
4784 	/*
4785 	 * At this point we have an UPDATE to a MATURE SA. There should
4786 	 * not be any keying material present.
4787 	 */
4788 	if (akey != NULL) {
4789 		*diagnostic = SADB_X_DIAGNOSTIC_AKEY_PRESENT;
4790 		error = EINVAL;
4791 		goto bail;
4792 	}
4793 	if (ekey != NULL) {
4794 		*diagnostic = SADB_X_DIAGNOSTIC_EKEY_PRESENT;
4795 		error = EINVAL;
4796 		goto bail;
4797 	}
4798 
4799 	if (assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE) {
4800 		if (ipsapp->ipsap_sa_ptr != NULL &&
4801 		    ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_IDLE) {
4802 			if ((error = sadb_update_state(ipsapp->ipsap_sa_ptr,
4803 			    assoc->sadb_sa_state, NULL)) != 0) {
4804 				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4805 				goto bail;
4806 			}
4807 		}
4808 		if (ipsapp->ipsap_psa_ptr != NULL &&
4809 		    ipsapp->ipsap_psa_ptr->ipsa_state == IPSA_STATE_IDLE) {
4810 			if ((error = sadb_update_state(ipsapp->ipsap_psa_ptr,
4811 			    assoc->sadb_sa_state, NULL)) != 0) {
4812 				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4813 				goto bail;
4814 			}
4815 		}
4816 	}
4817 	if (assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE) {
4818 		if (ipsapp->ipsap_sa_ptr != NULL) {
4819 			error = sadb_update_state(ipsapp->ipsap_sa_ptr,
4820 			    assoc->sadb_sa_state,
4821 			    (ipsapp->ipsap_sa_ptr->ipsa_flags &
4822 			    IPSA_F_INBOUND) ? ipkt_lst : NULL);
4823 			if (error) {
4824 				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4825 				goto bail;
4826 			}
4827 		}
4828 		if (ipsapp->ipsap_psa_ptr != NULL) {
4829 			error = sadb_update_state(ipsapp->ipsap_psa_ptr,
4830 			    assoc->sadb_sa_state,
4831 			    (ipsapp->ipsap_psa_ptr->ipsa_flags &
4832 			    IPSA_F_INBOUND) ? ipkt_lst : NULL);
4833 			if (error) {
4834 				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4835 				goto bail;
4836 			}
4837 		}
4838 		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
4839 		    ksi, echo_target);
4840 		goto bail;
4841 	}
4842 
4843 	/*
4844 	 * Reality checks for updates of active associations.
4845 	 * Sundry first-pass UPDATE-specific reality checks.
4846 	 * Have to do the checks here, because it's after the add_sa code.
4847 	 * XXX STATS : logging/stats here?
4848 	 */
4849 
4850 	if (!((assoc->sadb_sa_state == SADB_SASTATE_MATURE) ||
4851 	    (assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE))) {
4852 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4853 		error = EINVAL;
4854 		goto bail;
4855 	}
4856 
4857 	if (assoc->sadb_sa_flags & ~spp->s_updateflags) {
4858 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
4859 		error = EINVAL;
4860 		goto bail;
4861 	}
4862 
4863 	if (ksi->ks_in_extv[SADB_EXT_LIFETIME_CURRENT] != NULL) {
4864 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_LIFETIME;
4865 		error = EOPNOTSUPP;
4866 		goto bail;
4867 	}
4868 
4869 	if ((*diagnostic = sadb_hardsoftchk(hard, soft, idle)) != 0) {
4870 		error = EINVAL;
4871 		goto bail;
4872 	}
4873 
4874 	if (ipsapp->ipsap_sa_ptr != NULL) {
4875 		if (ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_DEAD) {
4876 			error = ESRCH;	/* DEAD == Not there, in this case. */
4877 			*diagnostic = SADB_X_DIAGNOSTIC_SA_EXPIRED;
4878 			goto bail;
4879 		}
4880 		if ((kmp != 0) &&
4881 		    ((ipsapp->ipsap_sa_ptr->ipsa_kmp != 0) ||
4882 		    (ipsapp->ipsap_sa_ptr->ipsa_kmp != kmp))) {
4883 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMP;
4884 			error = EINVAL;
4885 			goto bail;
4886 		}
4887 		if ((kmc != 0) &&
4888 		    ((ipsapp->ipsap_sa_ptr->ipsa_kmc != 0) ||
4889 		    (ipsapp->ipsap_sa_ptr->ipsa_kmc != kmc))) {
4890 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMC;
4891 			error = EINVAL;
4892 			goto bail;
4893 		}
4894 		/*
4895 		 * Do not allow replay value change for MATURE or LARVAL SA.
4896 		 */
4897 
4898 		if ((replext != NULL) &&
4899 		    ((ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) ||
4900 		    (ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_MATURE))) {
4901 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4902 			error = EINVAL;
4903 			goto bail;
4904 		}
4905 	}
4906 
4907 	if (ipsapp->ipsap_psa_ptr != NULL) {
4908 		if (ipsapp->ipsap_psa_ptr->ipsa_state == IPSA_STATE_DEAD) {
4909 			*diagnostic = SADB_X_DIAGNOSTIC_SA_EXPIRED;
4910 			error = ESRCH;	/* DEAD == Not there, in this case. */
4911 			goto bail;
4912 		}
4913 		if ((kmp != 0) &&
4914 		    ((ipsapp->ipsap_psa_ptr->ipsa_kmp != 0) ||
4915 		    (ipsapp->ipsap_psa_ptr->ipsa_kmp != kmp))) {
4916 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMP;
4917 			error = EINVAL;
4918 			goto bail;
4919 		}
4920 		if ((kmc != 0) &&
4921 		    ((ipsapp->ipsap_psa_ptr->ipsa_kmc != 0) ||
4922 		    (ipsapp->ipsap_psa_ptr->ipsa_kmc != kmc))) {
4923 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMC;
4924 			error = EINVAL;
4925 			goto bail;
4926 		}
4927 	}
4928 
4929 	if (ipsapp->ipsap_sa_ptr != NULL) {
4930 		sadb_update_lifetimes(ipsapp->ipsap_sa_ptr, hard, soft,
4931 		    idle, B_TRUE);
4932 		if (kmp != 0)
4933 			ipsapp->ipsap_sa_ptr->ipsa_kmp = kmp;
4934 		if (kmc != 0)
4935 			ipsapp->ipsap_sa_ptr->ipsa_kmc = kmc;
4936 		if ((replext != NULL) &&
4937 		    (ipsapp->ipsap_sa_ptr->ipsa_replay_wsize != 0)) {
4938 			/*
4939 			 * If an inbound SA, update the replay counter
4940 			 * and check off all the other sequence number
4941 			 */
4942 			if (ksi->ks_in_dsttype == KS_IN_ADDR_ME) {
4943 				if (!sadb_replay_check(ipsapp->ipsap_sa_ptr,
4944 				    replext->sadb_x_rc_replay32)) {
4945 					*diagnostic =
4946 					    SADB_X_DIAGNOSTIC_INVALID_REPLAY;
4947 					error = EINVAL;
4948 					goto bail;
4949 				}
4950 				mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4951 				ipsapp->ipsap_sa_ptr->ipsa_idleexpiretime =
4952 				    current +
4953 				    ipsapp->ipsap_sa_ptr->ipsa_idletime;
4954 				mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4955 			} else {
4956 				mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4957 				ipsapp->ipsap_sa_ptr->ipsa_replay =
4958 				    replext->sadb_x_rc_replay32;
4959 				ipsapp->ipsap_sa_ptr->ipsa_idleexpiretime =
4960 				    current +
4961 				    ipsapp->ipsap_sa_ptr->ipsa_idletime;
4962 				mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4963 			}
4964 		}
4965 	}
4966 
4967 	if (sadb_msg_type == SADB_X_UPDATEPAIR) {
4968 		if (ipsapp->ipsap_psa_ptr != NULL) {
4969 			sadb_update_lifetimes(ipsapp->ipsap_psa_ptr, hard, soft,
4970 			    idle, B_FALSE);
4971 			if (kmp != 0)
4972 				ipsapp->ipsap_psa_ptr->ipsa_kmp = kmp;
4973 			if (kmc != 0)
4974 				ipsapp->ipsap_psa_ptr->ipsa_kmc = kmc;
4975 		} else {
4976 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
4977 			error = ESRCH;
4978 			goto bail;
4979 		}
4980 	}
4981 
4982 	if (pair_ext != NULL)
4983 		error = update_pairing(ipsapp, ksi, diagnostic, spp);
4984 
4985 	if (error == 0)
4986 		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
4987 		    ksi, echo_target);
4988 bail:
4989 
4990 	destroy_ipsa_pair(ipsapp);
4991 
4992 	return (error);
4993 }
4994 
4995 
4996 int
4997 update_pairing(ipsap_t *ipsapp, keysock_in_t *ksi, int *diagnostic,
4998     sadbp_t *spp)
4999 {
5000 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
5001 	sadb_address_t *srcext =
5002 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
5003 	sadb_address_t *dstext =
5004 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
5005 	sadb_x_pair_t *pair_ext =
5006 	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
5007 	int error = 0;
5008 	ipsap_t *oipsapp = NULL;
5009 	boolean_t undo_pair = B_FALSE;
5010 	uint32_t ipsa_flags;
5011 
5012 	if (pair_ext->sadb_x_pair_spi == 0 || pair_ext->sadb_x_pair_spi ==
5013 	    assoc->sadb_sa_spi) {
5014 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
5015 		return (EINVAL);
5016 	}
5017 
5018 	/*
5019 	 * Assume for now that the spi value provided in the SADB_UPDATE
5020 	 * message was valid, update the SA with its pair spi value.
5021 	 * If the spi turns out to be bogus or the SA no longer exists
5022 	 * then this will be detected when the reverse update is made
5023 	 * below.
5024 	 */
5025 	mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
5026 	ipsapp->ipsap_sa_ptr->ipsa_flags |= IPSA_F_PAIRED;
5027 	ipsapp->ipsap_sa_ptr->ipsa_otherspi = pair_ext->sadb_x_pair_spi;
5028 	mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
5029 
5030 	/*
5031 	 * After updating the ipsa_otherspi element of the SA, get_ipsa_pair()
5032 	 * should now return pointers to the SA *AND* its pair, if this is not
5033 	 * the case, the "otherspi" either did not exist or was deleted. Also
5034 	 * check that "otherspi" is not already paired. If everything looks
5035 	 * good, complete the update. IPSA_REFRELE the first pair_pointer
5036 	 * after this update to ensure its not deleted until we are done.
5037 	 */
5038 	oipsapp = get_ipsa_pair(assoc, srcext, dstext, spp);
5039 	if (oipsapp == NULL) {
5040 		/*
5041 		 * This should never happen, calling function still has
5042 		 * IPSA_REFHELD on the SA we just updated.
5043 		 */
5044 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
5045 		return (EINVAL);
5046 	}
5047 
5048 	if (oipsapp->ipsap_psa_ptr == NULL) {
5049 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
5050 		error = EINVAL;
5051 		undo_pair = B_TRUE;
5052 	} else {
5053 		ipsa_flags = oipsapp->ipsap_psa_ptr->ipsa_flags;
5054 		if ((oipsapp->ipsap_psa_ptr->ipsa_state == IPSA_STATE_DEAD) ||
5055 		    (oipsapp->ipsap_psa_ptr->ipsa_state == IPSA_STATE_DYING)) {
5056 			/* Its dead Jim! */
5057 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
5058 			undo_pair = B_TRUE;
5059 		} else if ((ipsa_flags & (IPSA_F_OUTBOUND | IPSA_F_INBOUND)) ==
5060 		    (IPSA_F_OUTBOUND | IPSA_F_INBOUND)) {
5061 			/* This SA is in both hashtables. */
5062 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
5063 			undo_pair = B_TRUE;
5064 		} else if (ipsa_flags & IPSA_F_PAIRED) {
5065 			/* This SA is already paired with another. */
5066 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_ALREADY;
5067 			undo_pair = B_TRUE;
5068 		}
5069 	}
5070 
5071 	if (undo_pair) {
5072 		/* The pair SA does not exist. */
5073 		mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
5074 		ipsapp->ipsap_sa_ptr->ipsa_flags &= ~IPSA_F_PAIRED;
5075 		ipsapp->ipsap_sa_ptr->ipsa_otherspi = 0;
5076 		mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
5077 	} else {
5078 		mutex_enter(&oipsapp->ipsap_psa_ptr->ipsa_lock);
5079 		oipsapp->ipsap_psa_ptr->ipsa_otherspi = assoc->sadb_sa_spi;
5080 		oipsapp->ipsap_psa_ptr->ipsa_flags |= IPSA_F_PAIRED;
5081 		mutex_exit(&oipsapp->ipsap_psa_ptr->ipsa_lock);
5082 	}
5083 
5084 	destroy_ipsa_pair(oipsapp);
5085 	return (error);
5086 }
5087 
5088 /*
5089  * The following functions deal with ACQUIRE LISTS.  An ACQUIRE list is
5090  * a list of outstanding SADB_ACQUIRE messages.	 If ipsec_getassocbyconn() fails
5091  * for an outbound datagram, that datagram is queued up on an ACQUIRE record,
5092  * and an SADB_ACQUIRE message is sent up.  Presumably, a user-space key
5093  * management daemon will process the ACQUIRE, use a SADB_GETSPI to reserve
5094  * an SPI value and a larval SA, then SADB_UPDATE the larval SA, and ADD the
5095  * other direction's SA.
5096  */
5097 
5098 /*
5099  * Check the ACQUIRE lists.  If there's an existing ACQUIRE record,
5100  * grab it, lock it, and return it.  Otherwise return NULL.
5101  */
5102 static ipsacq_t *
5103 sadb_checkacquire(iacqf_t *bucket, ipsec_action_t *ap, ipsec_policy_t *pp,
5104     uint32_t *src, uint32_t *dst, uint32_t *isrc, uint32_t *idst,
5105     uint64_t unique_id)
5106 {
5107 	ipsacq_t *walker;
5108 	sa_family_t fam;
5109 	uint32_t blank_address[4] = {0, 0, 0, 0};
5110 
5111 	if (isrc == NULL) {
5112 		ASSERT(idst == NULL);
5113 		isrc = idst = blank_address;
5114 	}
5115 
5116 	/*
5117 	 * Scan list for duplicates.  Check for UNIQUE, src/dest, policy.
5118 	 *
5119 	 * XXX May need search for duplicates based on other things too!
5120 	 */
5121 	for (walker = bucket->iacqf_ipsacq; walker != NULL;
5122 	    walker = walker->ipsacq_next) {
5123 		mutex_enter(&walker->ipsacq_lock);
5124 		fam = walker->ipsacq_addrfam;
5125 		if (IPSA_ARE_ADDR_EQUAL(dst, walker->ipsacq_dstaddr, fam) &&
5126 		    IPSA_ARE_ADDR_EQUAL(src, walker->ipsacq_srcaddr, fam) &&
5127 		    ip_addr_match((uint8_t *)isrc, walker->ipsacq_innersrcpfx,
5128 		    (in6_addr_t *)walker->ipsacq_innersrc) &&
5129 		    ip_addr_match((uint8_t *)idst, walker->ipsacq_innerdstpfx,
5130 		    (in6_addr_t *)walker->ipsacq_innerdst) &&
5131 		    (ap == walker->ipsacq_act) &&
5132 		    (pp == walker->ipsacq_policy) &&
5133 		    /* XXX do deep compares of ap/pp? */
5134 		    (unique_id == walker->ipsacq_unique_id))
5135 			break;			/* everything matched */
5136 		mutex_exit(&walker->ipsacq_lock);
5137 	}
5138 
5139 	return (walker);
5140 }
5141 
5142 /*
5143  * For this mblk, insert a new acquire record.  Assume bucket contains addrs
5144  * of all of the same length.  Give up (and drop) if memory
5145  * cannot be allocated for a new one; otherwise, invoke callback to
5146  * send the acquire up..
5147  *
5148  * In cases where we need both AH and ESP, add the SA to the ESP ACQUIRE
5149  * list.  The ah_add_sa_finish() routines can look at the packet's ipsec_out_t
5150  * and handle this case specially.
5151  */
5152 void
5153 sadb_acquire(mblk_t *mp, ipsec_out_t *io, boolean_t need_ah, boolean_t need_esp)
5154 {
5155 	sadbp_t *spp;
5156 	sadb_t *sp;
5157 	ipsacq_t *newbie;
5158 	iacqf_t *bucket;
5159 	mblk_t *datamp = mp->b_cont;
5160 	mblk_t *extended;
5161 	ipha_t *ipha = (ipha_t *)datamp->b_rptr;
5162 	ip6_t *ip6h = (ip6_t *)datamp->b_rptr;
5163 	uint32_t *src, *dst, *isrc, *idst;
5164 	ipsec_policy_t *pp = io->ipsec_out_policy;
5165 	ipsec_action_t *ap = io->ipsec_out_act;
5166 	sa_family_t af;
5167 	int hashoffset;
5168 	uint32_t seq;
5169 	uint64_t unique_id = 0;
5170 	ipsec_selector_t sel;
5171 	boolean_t tunnel_mode = io->ipsec_out_tunnel;
5172 	netstack_t	*ns = io->ipsec_out_ns;
5173 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
5174 
5175 	ASSERT((pp != NULL) || (ap != NULL));
5176 
5177 	ASSERT(need_ah != NULL || need_esp != NULL);
5178 	/* Assign sadb pointers */
5179 	if (need_esp) { /* ESP for AH+ESP */
5180 		ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
5181 
5182 		spp = &espstack->esp_sadb;
5183 	} else {
5184 		ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
5185 
5186 		spp = &ahstack->ah_sadb;
5187 	}
5188 	sp = io->ipsec_out_v4 ? &spp->s_v4 : &spp->s_v6;
5189 
5190 	if (ap == NULL)
5191 		ap = pp->ipsp_act;
5192 
5193 	ASSERT(ap != NULL);
5194 
5195 	if (ap->ipa_act.ipa_apply.ipp_use_unique || tunnel_mode)
5196 		unique_id = SA_FORM_UNIQUE_ID(io);
5197 
5198 	/*
5199 	 * Set up an ACQUIRE record.
5200 	 *
5201 	 * Immediately, make sure the ACQUIRE sequence number doesn't slip
5202 	 * below the lowest point allowed in the kernel.  (In other words,
5203 	 * make sure the high bit on the sequence number is set.)
5204 	 */
5205 
5206 	seq = keysock_next_seq(ns) | IACQF_LOWEST_SEQ;
5207 
5208 	if (IPH_HDR_VERSION(ipha) == IP_VERSION) {
5209 		src = (uint32_t *)&ipha->ipha_src;
5210 		dst = (uint32_t *)&ipha->ipha_dst;
5211 		af = AF_INET;
5212 		hashoffset = OUTBOUND_HASH_V4(sp, ipha->ipha_dst);
5213 		ASSERT(io->ipsec_out_v4 == B_TRUE);
5214 	} else {
5215 		ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
5216 		src = (uint32_t *)&ip6h->ip6_src;
5217 		dst = (uint32_t *)&ip6h->ip6_dst;
5218 		af = AF_INET6;
5219 		hashoffset = OUTBOUND_HASH_V6(sp, ip6h->ip6_dst);
5220 		ASSERT(io->ipsec_out_v4 == B_FALSE);
5221 	}
5222 
5223 	if (tunnel_mode) {
5224 		if (pp == NULL) {
5225 			/*
5226 			 * Tunnel mode with no policy pointer means this is a
5227 			 * reflected ICMP (like a ECHO REQUEST) that came in
5228 			 * with self-encapsulated protection.  Until we better
5229 			 * support this, drop the packet.
5230 			 */
5231 			ip_drop_packet(mp, B_FALSE, NULL, NULL,
5232 			    DROPPER(ipss, ipds_spd_got_selfencap),
5233 			    &ipss->ipsec_spd_dropper);
5234 			return;
5235 		}
5236 		/* Snag inner addresses. */
5237 		isrc = io->ipsec_out_insrc;
5238 		idst = io->ipsec_out_indst;
5239 	} else {
5240 		isrc = idst = NULL;
5241 	}
5242 
5243 	/*
5244 	 * Check buckets to see if there is an existing entry.  If so,
5245 	 * grab it.  sadb_checkacquire locks newbie if found.
5246 	 */
5247 	bucket = &(sp->sdb_acq[hashoffset]);
5248 	mutex_enter(&bucket->iacqf_lock);
5249 	newbie = sadb_checkacquire(bucket, ap, pp, src, dst, isrc, idst,
5250 	    unique_id);
5251 
5252 	if (newbie == NULL) {
5253 		/*
5254 		 * Otherwise, allocate a new one.
5255 		 */
5256 		newbie = kmem_zalloc(sizeof (*newbie), KM_NOSLEEP);
5257 		if (newbie == NULL) {
5258 			mutex_exit(&bucket->iacqf_lock);
5259 			ip_drop_packet(mp, B_FALSE, NULL, NULL,
5260 			    DROPPER(ipss, ipds_sadb_acquire_nomem),
5261 			    &ipss->ipsec_sadb_dropper);
5262 			return;
5263 		}
5264 		newbie->ipsacq_policy = pp;
5265 		if (pp != NULL) {
5266 			IPPOL_REFHOLD(pp);
5267 		}
5268 		IPACT_REFHOLD(ap);
5269 		newbie->ipsacq_act = ap;
5270 		newbie->ipsacq_linklock = &bucket->iacqf_lock;
5271 		newbie->ipsacq_next = bucket->iacqf_ipsacq;
5272 		newbie->ipsacq_ptpn = &bucket->iacqf_ipsacq;
5273 		if (newbie->ipsacq_next != NULL)
5274 			newbie->ipsacq_next->ipsacq_ptpn = &newbie->ipsacq_next;
5275 		bucket->iacqf_ipsacq = newbie;
5276 		mutex_init(&newbie->ipsacq_lock, NULL, MUTEX_DEFAULT, NULL);
5277 		mutex_enter(&newbie->ipsacq_lock);
5278 	}
5279 
5280 	mutex_exit(&bucket->iacqf_lock);
5281 
5282 	/*
5283 	 * This assert looks silly for now, but we may need to enter newbie's
5284 	 * mutex during a search.
5285 	 */
5286 	ASSERT(MUTEX_HELD(&newbie->ipsacq_lock));
5287 
5288 	mp->b_next = NULL;
5289 	/* Queue up packet.  Use b_next. */
5290 	if (newbie->ipsacq_numpackets == 0) {
5291 		/* First one. */
5292 		newbie->ipsacq_mp = mp;
5293 		newbie->ipsacq_numpackets = 1;
5294 		newbie->ipsacq_expire = gethrestime_sec();
5295 		/*
5296 		 * Extended ACQUIRE with both AH+ESP will use ESP's timeout
5297 		 * value.
5298 		 */
5299 		newbie->ipsacq_expire += *spp->s_acquire_timeout;
5300 		newbie->ipsacq_seq = seq;
5301 		newbie->ipsacq_addrfam = af;
5302 
5303 		newbie->ipsacq_srcport = io->ipsec_out_src_port;
5304 		newbie->ipsacq_dstport = io->ipsec_out_dst_port;
5305 		newbie->ipsacq_icmp_type = io->ipsec_out_icmp_type;
5306 		newbie->ipsacq_icmp_code = io->ipsec_out_icmp_code;
5307 		if (tunnel_mode) {
5308 			newbie->ipsacq_inneraddrfam = io->ipsec_out_inaf;
5309 			newbie->ipsacq_proto = io->ipsec_out_inaf == AF_INET6 ?
5310 			    IPPROTO_IPV6 : IPPROTO_ENCAP;
5311 			newbie->ipsacq_innersrcpfx = io->ipsec_out_insrcpfx;
5312 			newbie->ipsacq_innerdstpfx = io->ipsec_out_indstpfx;
5313 			IPSA_COPY_ADDR(newbie->ipsacq_innersrc,
5314 			    io->ipsec_out_insrc, io->ipsec_out_inaf);
5315 			IPSA_COPY_ADDR(newbie->ipsacq_innerdst,
5316 			    io->ipsec_out_indst, io->ipsec_out_inaf);
5317 		} else {
5318 			newbie->ipsacq_proto = io->ipsec_out_proto;
5319 		}
5320 		newbie->ipsacq_unique_id = unique_id;
5321 	} else {
5322 		/* Scan to the end of the list & insert. */
5323 		mblk_t *lastone = newbie->ipsacq_mp;
5324 
5325 		while (lastone->b_next != NULL)
5326 			lastone = lastone->b_next;
5327 		lastone->b_next = mp;
5328 		if (newbie->ipsacq_numpackets++ == ipsacq_maxpackets) {
5329 			newbie->ipsacq_numpackets = ipsacq_maxpackets;
5330 			lastone = newbie->ipsacq_mp;
5331 			newbie->ipsacq_mp = lastone->b_next;
5332 			lastone->b_next = NULL;
5333 			ip_drop_packet(lastone, B_FALSE, NULL, NULL,
5334 			    DROPPER(ipss, ipds_sadb_acquire_toofull),
5335 			    &ipss->ipsec_sadb_dropper);
5336 		} else {
5337 			IP_ACQUIRE_STAT(ipss, qhiwater,
5338 			    newbie->ipsacq_numpackets);
5339 		}
5340 	}
5341 
5342 	/*
5343 	 * Reset addresses.  Set them to the most recently added mblk chain,
5344 	 * so that the address pointers in the acquire record will point
5345 	 * at an mblk still attached to the acquire list.
5346 	 */
5347 
5348 	newbie->ipsacq_srcaddr = src;
5349 	newbie->ipsacq_dstaddr = dst;
5350 
5351 	/*
5352 	 * If the acquire record has more than one queued packet, we've
5353 	 * already sent an ACQUIRE, and don't need to repeat ourself.
5354 	 */
5355 	if (newbie->ipsacq_seq != seq || newbie->ipsacq_numpackets > 1) {
5356 		/* I have an acquire outstanding already! */
5357 		mutex_exit(&newbie->ipsacq_lock);
5358 		return;
5359 	}
5360 
5361 	if (keysock_extended_reg(ns)) {
5362 		/*
5363 		 * Construct an extended ACQUIRE.  There are logging
5364 		 * opportunities here in failure cases.
5365 		 */
5366 
5367 		(void) memset(&sel, 0, sizeof (sel));
5368 		sel.ips_isv4 = io->ipsec_out_v4;
5369 		if (tunnel_mode) {
5370 			sel.ips_protocol = (io->ipsec_out_inaf == AF_INET) ?
5371 			    IPPROTO_ENCAP : IPPROTO_IPV6;
5372 		} else {
5373 			sel.ips_protocol = io->ipsec_out_proto;
5374 			sel.ips_local_port = io->ipsec_out_src_port;
5375 			sel.ips_remote_port = io->ipsec_out_dst_port;
5376 		}
5377 		sel.ips_icmp_type = io->ipsec_out_icmp_type;
5378 		sel.ips_icmp_code = io->ipsec_out_icmp_code;
5379 		sel.ips_is_icmp_inv_acq = 0;
5380 		if (af == AF_INET) {
5381 			sel.ips_local_addr_v4 = ipha->ipha_src;
5382 			sel.ips_remote_addr_v4 = ipha->ipha_dst;
5383 		} else {
5384 			sel.ips_local_addr_v6 = ip6h->ip6_src;
5385 			sel.ips_remote_addr_v6 = ip6h->ip6_dst;
5386 		}
5387 
5388 		extended = sadb_keysock_out(0);
5389 		if (extended != NULL) {
5390 			extended->b_cont = sadb_extended_acquire(&sel, pp, ap,
5391 			    tunnel_mode, seq, 0, ns);
5392 			if (extended->b_cont == NULL) {
5393 				freeb(extended);
5394 				extended = NULL;
5395 			}
5396 		}
5397 	} else
5398 		extended = NULL;
5399 
5400 	/*
5401 	 * Send an ACQUIRE message (and possible an extended ACQUIRE) based on
5402 	 * this new record.  The send-acquire callback assumes that acqrec is
5403 	 * already locked.
5404 	 */
5405 	(*spp->s_acqfn)(newbie, extended, ns);
5406 }
5407 
5408 /*
5409  * Unlink and free an acquire record.
5410  */
5411 void
5412 sadb_destroy_acquire(ipsacq_t *acqrec, netstack_t *ns)
5413 {
5414 	mblk_t *mp;
5415 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
5416 
5417 	ASSERT(MUTEX_HELD(acqrec->ipsacq_linklock));
5418 
5419 	if (acqrec->ipsacq_policy != NULL) {
5420 		IPPOL_REFRELE(acqrec->ipsacq_policy, ns);
5421 	}
5422 	if (acqrec->ipsacq_act != NULL) {
5423 		IPACT_REFRELE(acqrec->ipsacq_act);
5424 	}
5425 
5426 	/* Unlink */
5427 	*(acqrec->ipsacq_ptpn) = acqrec->ipsacq_next;
5428 	if (acqrec->ipsacq_next != NULL)
5429 		acqrec->ipsacq_next->ipsacq_ptpn = acqrec->ipsacq_ptpn;
5430 
5431 	/*
5432 	 * Free hanging mp's.
5433 	 *
5434 	 * XXX Instead of freemsg(), perhaps use IPSEC_REQ_FAILED.
5435 	 */
5436 
5437 	mutex_enter(&acqrec->ipsacq_lock);
5438 	while (acqrec->ipsacq_mp != NULL) {
5439 		mp = acqrec->ipsacq_mp;
5440 		acqrec->ipsacq_mp = mp->b_next;
5441 		mp->b_next = NULL;
5442 		ip_drop_packet(mp, B_FALSE, NULL, NULL,
5443 		    DROPPER(ipss, ipds_sadb_acquire_timeout),
5444 		    &ipss->ipsec_sadb_dropper);
5445 	}
5446 	mutex_exit(&acqrec->ipsacq_lock);
5447 
5448 	/* Free */
5449 	mutex_destroy(&acqrec->ipsacq_lock);
5450 	kmem_free(acqrec, sizeof (*acqrec));
5451 }
5452 
5453 /*
5454  * Destroy an acquire list fanout.
5455  */
5456 static void
5457 sadb_destroy_acqlist(iacqf_t **listp, uint_t numentries, boolean_t forever,
5458     netstack_t *ns)
5459 {
5460 	int i;
5461 	iacqf_t *list = *listp;
5462 
5463 	if (list == NULL)
5464 		return;
5465 
5466 	for (i = 0; i < numentries; i++) {
5467 		mutex_enter(&(list[i].iacqf_lock));
5468 		while (list[i].iacqf_ipsacq != NULL)
5469 			sadb_destroy_acquire(list[i].iacqf_ipsacq, ns);
5470 		mutex_exit(&(list[i].iacqf_lock));
5471 		if (forever)
5472 			mutex_destroy(&(list[i].iacqf_lock));
5473 	}
5474 
5475 	if (forever) {
5476 		*listp = NULL;
5477 		kmem_free(list, numentries * sizeof (*list));
5478 	}
5479 }
5480 
5481 /*
5482  * Create an algorithm descriptor for an extended ACQUIRE.  Filter crypto
5483  * framework's view of reality vs. IPsec's.  EF's wins, BTW.
5484  */
5485 static uint8_t *
5486 sadb_new_algdesc(uint8_t *start, uint8_t *limit,
5487     sadb_x_ecomb_t *ecomb, uint8_t satype, uint8_t algtype,
5488     uint8_t alg, uint16_t minbits, uint16_t maxbits, ipsec_stack_t *ipss)
5489 {
5490 	uint8_t *cur = start;
5491 	ipsec_alginfo_t *algp;
5492 	sadb_x_algdesc_t *algdesc = (sadb_x_algdesc_t *)cur;
5493 
5494 	cur += sizeof (*algdesc);
5495 	if (cur >= limit)
5496 		return (NULL);
5497 
5498 	ecomb->sadb_x_ecomb_numalgs++;
5499 
5500 	/*
5501 	 * Normalize vs. crypto framework's limits.  This way, you can specify
5502 	 * a stronger policy, and when the framework loads a stronger version,
5503 	 * you can just keep plowing w/o rewhacking your SPD.
5504 	 */
5505 	mutex_enter(&ipss->ipsec_alg_lock);
5506 	algp = ipss->ipsec_alglists[(algtype == SADB_X_ALGTYPE_AUTH) ?
5507 	    IPSEC_ALG_AUTH : IPSEC_ALG_ENCR][alg];
5508 	if (algp == NULL) {
5509 		mutex_exit(&ipss->ipsec_alg_lock);
5510 		return (NULL);	/* Algorithm doesn't exist.  Fail gracefully. */
5511 	}
5512 	if (minbits < algp->alg_ef_minbits)
5513 		minbits = algp->alg_ef_minbits;
5514 	if (maxbits > algp->alg_ef_maxbits)
5515 		maxbits = algp->alg_ef_maxbits;
5516 	mutex_exit(&ipss->ipsec_alg_lock);
5517 
5518 	algdesc->sadb_x_algdesc_reserved = SADB_8TO1(algp->alg_saltlen);
5519 	algdesc->sadb_x_algdesc_satype = satype;
5520 	algdesc->sadb_x_algdesc_algtype = algtype;
5521 	algdesc->sadb_x_algdesc_alg = alg;
5522 	algdesc->sadb_x_algdesc_minbits = minbits;
5523 	algdesc->sadb_x_algdesc_maxbits = maxbits;
5524 
5525 	return (cur);
5526 }
5527 
5528 /*
5529  * Convert the given ipsec_action_t into an ecomb starting at *ecomb
5530  * which must fit before *limit
5531  *
5532  * return NULL if we ran out of room or a pointer to the end of the ecomb.
5533  */
5534 static uint8_t *
5535 sadb_action_to_ecomb(uint8_t *start, uint8_t *limit, ipsec_action_t *act,
5536     netstack_t *ns)
5537 {
5538 	uint8_t *cur = start;
5539 	sadb_x_ecomb_t *ecomb = (sadb_x_ecomb_t *)cur;
5540 	ipsec_prot_t *ipp;
5541 	ipsec_stack_t *ipss = ns->netstack_ipsec;
5542 
5543 	cur += sizeof (*ecomb);
5544 	if (cur >= limit)
5545 		return (NULL);
5546 
5547 	ASSERT(act->ipa_act.ipa_type == IPSEC_ACT_APPLY);
5548 
5549 	ipp = &act->ipa_act.ipa_apply;
5550 
5551 	ecomb->sadb_x_ecomb_numalgs = 0;
5552 	ecomb->sadb_x_ecomb_reserved = 0;
5553 	ecomb->sadb_x_ecomb_reserved2 = 0;
5554 	/*
5555 	 * No limits on allocations, since we really don't support that
5556 	 * concept currently.
5557 	 */
5558 	ecomb->sadb_x_ecomb_soft_allocations = 0;
5559 	ecomb->sadb_x_ecomb_hard_allocations = 0;
5560 
5561 	/*
5562 	 * XXX TBD: Policy or global parameters will eventually be
5563 	 * able to fill in some of these.
5564 	 */
5565 	ecomb->sadb_x_ecomb_flags = 0;
5566 	ecomb->sadb_x_ecomb_soft_bytes = 0;
5567 	ecomb->sadb_x_ecomb_hard_bytes = 0;
5568 	ecomb->sadb_x_ecomb_soft_addtime = 0;
5569 	ecomb->sadb_x_ecomb_hard_addtime = 0;
5570 	ecomb->sadb_x_ecomb_soft_usetime = 0;
5571 	ecomb->sadb_x_ecomb_hard_usetime = 0;
5572 
5573 	if (ipp->ipp_use_ah) {
5574 		cur = sadb_new_algdesc(cur, limit, ecomb,
5575 		    SADB_SATYPE_AH, SADB_X_ALGTYPE_AUTH, ipp->ipp_auth_alg,
5576 		    ipp->ipp_ah_minbits, ipp->ipp_ah_maxbits, ipss);
5577 		if (cur == NULL)
5578 			return (NULL);
5579 		ipsecah_fill_defs(ecomb, ns);
5580 	}
5581 
5582 	if (ipp->ipp_use_esp) {
5583 		if (ipp->ipp_use_espa) {
5584 			cur = sadb_new_algdesc(cur, limit, ecomb,
5585 			    SADB_SATYPE_ESP, SADB_X_ALGTYPE_AUTH,
5586 			    ipp->ipp_esp_auth_alg,
5587 			    ipp->ipp_espa_minbits,
5588 			    ipp->ipp_espa_maxbits, ipss);
5589 			if (cur == NULL)
5590 				return (NULL);
5591 		}
5592 
5593 		cur = sadb_new_algdesc(cur, limit, ecomb,
5594 		    SADB_SATYPE_ESP, SADB_X_ALGTYPE_CRYPT,
5595 		    ipp->ipp_encr_alg,
5596 		    ipp->ipp_espe_minbits,
5597 		    ipp->ipp_espe_maxbits, ipss);
5598 		if (cur == NULL)
5599 			return (NULL);
5600 		/* Fill in lifetimes if and only if AH didn't already... */
5601 		if (!ipp->ipp_use_ah)
5602 			ipsecesp_fill_defs(ecomb, ns);
5603 	}
5604 
5605 	return (cur);
5606 }
5607 
5608 /*
5609  * Construct an extended ACQUIRE message based on a selector and the resulting
5610  * IPsec action.
5611  *
5612  * NOTE: This is used by both inverse ACQUIRE and actual ACQUIRE
5613  * generation. As a consequence, expect this function to evolve
5614  * rapidly.
5615  */
5616 static mblk_t *
5617 sadb_extended_acquire(ipsec_selector_t *sel, ipsec_policy_t *pol,
5618     ipsec_action_t *act, boolean_t tunnel_mode, uint32_t seq, uint32_t pid,
5619     netstack_t *ns)
5620 {
5621 	mblk_t *mp;
5622 	sadb_msg_t *samsg;
5623 	uint8_t *start, *cur, *end;
5624 	uint32_t *saddrptr, *daddrptr;
5625 	sa_family_t af;
5626 	sadb_prop_t *eprop;
5627 	ipsec_action_t *ap, *an;
5628 	ipsec_selkey_t *ipsl;
5629 	uint8_t proto, pfxlen;
5630 	uint16_t lport, rport;
5631 	uint32_t kmp, kmc;
5632 
5633 	/*
5634 	 * Find the action we want sooner rather than later..
5635 	 */
5636 	an = NULL;
5637 	if (pol == NULL) {
5638 		ap = act;
5639 	} else {
5640 		ap = pol->ipsp_act;
5641 
5642 		if (ap != NULL)
5643 			an = ap->ipa_next;
5644 	}
5645 
5646 	/*
5647 	 * Just take a swag for the allocation for now.	 We can always
5648 	 * alter it later.
5649 	 */
5650 #define	SADB_EXTENDED_ACQUIRE_SIZE	4096
5651 	mp = allocb(SADB_EXTENDED_ACQUIRE_SIZE, BPRI_HI);
5652 	if (mp == NULL)
5653 		return (NULL);
5654 
5655 	start = mp->b_rptr;
5656 	end = start + SADB_EXTENDED_ACQUIRE_SIZE;
5657 
5658 	cur = start;
5659 
5660 	samsg = (sadb_msg_t *)cur;
5661 	cur += sizeof (*samsg);
5662 
5663 	samsg->sadb_msg_version = PF_KEY_V2;
5664 	samsg->sadb_msg_type = SADB_ACQUIRE;
5665 	samsg->sadb_msg_errno = 0;
5666 	samsg->sadb_msg_reserved = 0;
5667 	samsg->sadb_msg_satype = 0;
5668 	samsg->sadb_msg_seq = seq;
5669 	samsg->sadb_msg_pid = pid;
5670 
5671 	if (tunnel_mode) {
5672 		/*
5673 		 * Form inner address extensions based NOT on the inner
5674 		 * selectors (i.e. the packet data), but on the policy's
5675 		 * selector key (i.e. the policy's selector information).
5676 		 *
5677 		 * NOTE:  The position of IPv4 and IPv6 addresses is the
5678 		 * same in ipsec_selkey_t (unless the compiler does very
5679 		 * strange things with unions, consult your local C language
5680 		 * lawyer for details).
5681 		 */
5682 		ASSERT(pol != NULL);
5683 
5684 		ipsl = &(pol->ipsp_sel->ipsl_key);
5685 		if (ipsl->ipsl_valid & IPSL_IPV4) {
5686 			af = AF_INET;
5687 			ASSERT(sel->ips_protocol == IPPROTO_ENCAP);
5688 			ASSERT(!(ipsl->ipsl_valid & IPSL_IPV6));
5689 		} else {
5690 			af = AF_INET6;
5691 			ASSERT(sel->ips_protocol == IPPROTO_IPV6);
5692 			ASSERT(ipsl->ipsl_valid & IPSL_IPV6);
5693 		}
5694 
5695 		if (ipsl->ipsl_valid & IPSL_LOCAL_ADDR) {
5696 			saddrptr = (uint32_t *)(&ipsl->ipsl_local);
5697 			pfxlen = ipsl->ipsl_local_pfxlen;
5698 		} else {
5699 			saddrptr = (uint32_t *)(&ipv6_all_zeros);
5700 			pfxlen = 0;
5701 		}
5702 		/* XXX What about ICMP type/code? */
5703 		lport = (ipsl->ipsl_valid & IPSL_LOCAL_PORT) ?
5704 		    ipsl->ipsl_lport : 0;
5705 		proto = (ipsl->ipsl_valid & IPSL_PROTOCOL) ?
5706 		    ipsl->ipsl_proto : 0;
5707 
5708 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
5709 		    af, saddrptr, lport, proto, pfxlen);
5710 		if (cur == NULL) {
5711 			freeb(mp);
5712 			return (NULL);
5713 		}
5714 
5715 		if (ipsl->ipsl_valid & IPSL_REMOTE_ADDR) {
5716 			daddrptr = (uint32_t *)(&ipsl->ipsl_remote);
5717 			pfxlen = ipsl->ipsl_remote_pfxlen;
5718 		} else {
5719 			daddrptr = (uint32_t *)(&ipv6_all_zeros);
5720 			pfxlen = 0;
5721 		}
5722 		/* XXX What about ICMP type/code? */
5723 		rport = (ipsl->ipsl_valid & IPSL_REMOTE_PORT) ?
5724 		    ipsl->ipsl_rport : 0;
5725 
5726 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
5727 		    af, daddrptr, rport, proto, pfxlen);
5728 		if (cur == NULL) {
5729 			freeb(mp);
5730 			return (NULL);
5731 		}
5732 		/*
5733 		 * TODO  - if we go to 3408's dream of transport mode IP-in-IP
5734 		 * _with_ inner-packet address selectors, we'll need to further
5735 		 * distinguish tunnel mode here.  For now, having inner
5736 		 * addresses and/or ports is sufficient.
5737 		 *
5738 		 * Meanwhile, whack proto/ports to reflect IP-in-IP for the
5739 		 * outer addresses.
5740 		 */
5741 		proto = sel->ips_protocol;	/* Either _ENCAP or _IPV6 */
5742 		lport = rport = 0;
5743 	} else if ((ap != NULL) && (!ap->ipa_want_unique)) {
5744 		proto = 0;
5745 		lport = 0;
5746 		rport = 0;
5747 		if (pol != NULL) {
5748 			ipsl = &(pol->ipsp_sel->ipsl_key);
5749 			if (ipsl->ipsl_valid & IPSL_PROTOCOL)
5750 				proto = ipsl->ipsl_proto;
5751 			if (ipsl->ipsl_valid & IPSL_REMOTE_PORT)
5752 				rport = ipsl->ipsl_rport;
5753 			if (ipsl->ipsl_valid & IPSL_LOCAL_PORT)
5754 				lport = ipsl->ipsl_lport;
5755 		}
5756 	} else {
5757 		proto = sel->ips_protocol;
5758 		lport = sel->ips_local_port;
5759 		rport = sel->ips_remote_port;
5760 	}
5761 
5762 	af = sel->ips_isv4 ? AF_INET : AF_INET6;
5763 
5764 	/*
5765 	 * NOTE:  The position of IPv4 and IPv6 addresses is the same in
5766 	 * ipsec_selector_t.
5767 	 */
5768 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
5769 	    (uint32_t *)(&sel->ips_local_addr_v6), lport, proto, 0);
5770 
5771 	if (cur == NULL) {
5772 		freeb(mp);
5773 		return (NULL);
5774 	}
5775 
5776 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
5777 	    (uint32_t *)(&sel->ips_remote_addr_v6), rport, proto, 0);
5778 
5779 	if (cur == NULL) {
5780 		freeb(mp);
5781 		return (NULL);
5782 	}
5783 
5784 	/*
5785 	 * This section will change a lot as policy evolves.
5786 	 * For now, it'll be relatively simple.
5787 	 */
5788 	eprop = (sadb_prop_t *)cur;
5789 	cur += sizeof (*eprop);
5790 	if (cur > end) {
5791 		/* no space left */
5792 		freeb(mp);
5793 		return (NULL);
5794 	}
5795 
5796 	eprop->sadb_prop_exttype = SADB_X_EXT_EPROP;
5797 	eprop->sadb_x_prop_ereserved = 0;
5798 	eprop->sadb_x_prop_numecombs = 0;
5799 	eprop->sadb_prop_replay = 32;	/* default */
5800 
5801 	kmc = kmp = 0;
5802 
5803 	for (; ap != NULL; ap = an) {
5804 		an = (pol != NULL) ? ap->ipa_next : NULL;
5805 
5806 		/*
5807 		 * Skip non-IPsec policies
5808 		 */
5809 		if (ap->ipa_act.ipa_type != IPSEC_ACT_APPLY)
5810 			continue;
5811 
5812 		if (ap->ipa_act.ipa_apply.ipp_km_proto)
5813 			kmp = ap->ipa_act.ipa_apply.ipp_km_proto;
5814 		if (ap->ipa_act.ipa_apply.ipp_km_cookie)
5815 			kmc = ap->ipa_act.ipa_apply.ipp_km_cookie;
5816 		if (ap->ipa_act.ipa_apply.ipp_replay_depth) {
5817 			eprop->sadb_prop_replay =
5818 			    ap->ipa_act.ipa_apply.ipp_replay_depth;
5819 		}
5820 
5821 		cur = sadb_action_to_ecomb(cur, end, ap, ns);
5822 		if (cur == NULL) { /* no space */
5823 			freeb(mp);
5824 			return (NULL);
5825 		}
5826 		eprop->sadb_x_prop_numecombs++;
5827 	}
5828 
5829 	if (eprop->sadb_x_prop_numecombs == 0) {
5830 		/*
5831 		 * This will happen if we fail to find a policy
5832 		 * allowing for IPsec processing.
5833 		 * Construct an error message.
5834 		 */
5835 		samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
5836 		samsg->sadb_msg_errno = ENOENT;
5837 		samsg->sadb_x_msg_diagnostic = 0;
5838 		return (mp);
5839 	}
5840 
5841 	if ((kmp != 0) || (kmc != 0)) {
5842 		cur = sadb_make_kmc_ext(cur, end, kmp, kmc);
5843 		if (cur == NULL) {
5844 			freeb(mp);
5845 			return (NULL);
5846 		}
5847 	}
5848 
5849 	eprop->sadb_prop_len = SADB_8TO64(cur - (uint8_t *)eprop);
5850 	samsg->sadb_msg_len = SADB_8TO64(cur - start);
5851 	mp->b_wptr = cur;
5852 
5853 	return (mp);
5854 }
5855 
5856 /*
5857  * Generic setup of an RFC 2367 ACQUIRE message.  Caller sets satype.
5858  *
5859  * NOTE: This function acquires alg_lock as a side-effect if-and-only-if we
5860  * succeed (i.e. return non-NULL).  Caller MUST release it.  This is to
5861  * maximize code consolidation while preventing algorithm changes from messing
5862  * with the callers finishing touches on the ACQUIRE itself.
5863  */
5864 mblk_t *
5865 sadb_setup_acquire(ipsacq_t *acqrec, uint8_t satype, ipsec_stack_t *ipss)
5866 {
5867 	uint_t allocsize;
5868 	mblk_t *pfkeymp, *msgmp;
5869 	sa_family_t af;
5870 	uint8_t *cur, *end;
5871 	sadb_msg_t *samsg;
5872 	uint16_t sport_typecode;
5873 	uint16_t dport_typecode;
5874 	uint8_t check_proto;
5875 	boolean_t tunnel_mode = (acqrec->ipsacq_inneraddrfam != 0);
5876 
5877 	ASSERT(MUTEX_HELD(&acqrec->ipsacq_lock));
5878 
5879 	pfkeymp = sadb_keysock_out(0);
5880 	if (pfkeymp == NULL)
5881 		return (NULL);
5882 
5883 	/*
5884 	 * First, allocate a basic ACQUIRE message
5885 	 */
5886 	allocsize = sizeof (sadb_msg_t) + sizeof (sadb_address_t) +
5887 	    sizeof (sadb_address_t) + sizeof (sadb_prop_t);
5888 
5889 	/* Make sure there's enough to cover both AF_INET and AF_INET6. */
5890 	allocsize += 2 * sizeof (struct sockaddr_in6);
5891 
5892 	mutex_enter(&ipss->ipsec_alg_lock);
5893 	/* NOTE:  The lock is now held through to this function's return. */
5894 	allocsize += ipss->ipsec_nalgs[IPSEC_ALG_AUTH] *
5895 	    ipss->ipsec_nalgs[IPSEC_ALG_ENCR] * sizeof (sadb_comb_t);
5896 
5897 	if (tunnel_mode) {
5898 		/* Tunnel mode! */
5899 		allocsize += 2 * sizeof (sadb_address_t);
5900 		/* Enough to cover both AF_INET and AF_INET6. */
5901 		allocsize += 2 * sizeof (struct sockaddr_in6);
5902 	}
5903 
5904 	msgmp = allocb(allocsize, BPRI_HI);
5905 	if (msgmp == NULL) {
5906 		freeb(pfkeymp);
5907 		mutex_exit(&ipss->ipsec_alg_lock);
5908 		return (NULL);
5909 	}
5910 
5911 	pfkeymp->b_cont = msgmp;
5912 	cur = msgmp->b_rptr;
5913 	end = cur + allocsize;
5914 	samsg = (sadb_msg_t *)cur;
5915 	cur += sizeof (sadb_msg_t);
5916 
5917 	af = acqrec->ipsacq_addrfam;
5918 	switch (af) {
5919 	case AF_INET:
5920 		check_proto = IPPROTO_ICMP;
5921 		break;
5922 	case AF_INET6:
5923 		check_proto = IPPROTO_ICMPV6;
5924 		break;
5925 	default:
5926 		/* This should never happen unless we have kernel bugs. */
5927 		cmn_err(CE_WARN,
5928 		    "sadb_setup_acquire:  corrupt ACQUIRE record.\n");
5929 		ASSERT(0);
5930 		mutex_exit(&ipss->ipsec_alg_lock);
5931 		return (NULL);
5932 	}
5933 
5934 	samsg->sadb_msg_version = PF_KEY_V2;
5935 	samsg->sadb_msg_type = SADB_ACQUIRE;
5936 	samsg->sadb_msg_satype = satype;
5937 	samsg->sadb_msg_errno = 0;
5938 	samsg->sadb_msg_pid = 0;
5939 	samsg->sadb_msg_reserved = 0;
5940 	samsg->sadb_msg_seq = acqrec->ipsacq_seq;
5941 
5942 	ASSERT(MUTEX_HELD(&acqrec->ipsacq_lock));
5943 
5944 	if ((acqrec->ipsacq_proto == check_proto) || tunnel_mode) {
5945 		sport_typecode = dport_typecode = 0;
5946 	} else {
5947 		sport_typecode = acqrec->ipsacq_srcport;
5948 		dport_typecode = acqrec->ipsacq_dstport;
5949 	}
5950 
5951 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
5952 	    acqrec->ipsacq_srcaddr, sport_typecode, acqrec->ipsacq_proto, 0);
5953 
5954 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
5955 	    acqrec->ipsacq_dstaddr, dport_typecode, acqrec->ipsacq_proto, 0);
5956 
5957 	if (tunnel_mode) {
5958 		sport_typecode = acqrec->ipsacq_srcport;
5959 		dport_typecode = acqrec->ipsacq_dstport;
5960 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
5961 		    acqrec->ipsacq_inneraddrfam, acqrec->ipsacq_innersrc,
5962 		    sport_typecode, acqrec->ipsacq_inner_proto,
5963 		    acqrec->ipsacq_innersrcpfx);
5964 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
5965 		    acqrec->ipsacq_inneraddrfam, acqrec->ipsacq_innerdst,
5966 		    dport_typecode, acqrec->ipsacq_inner_proto,
5967 		    acqrec->ipsacq_innerdstpfx);
5968 	}
5969 
5970 	/* XXX Insert identity information here. */
5971 
5972 	/* XXXMLS Insert sensitivity information here. */
5973 
5974 	if (cur != NULL)
5975 		samsg->sadb_msg_len = SADB_8TO64(cur - msgmp->b_rptr);
5976 	else
5977 		mutex_exit(&ipss->ipsec_alg_lock);
5978 
5979 	return (pfkeymp);
5980 }
5981 
5982 /*
5983  * Given an SADB_GETSPI message, find an appropriately ranged SA and
5984  * allocate an SA.  If there are message improprieties, return (ipsa_t *)-1.
5985  * If there was a memory allocation error, return NULL.	 (Assume NULL !=
5986  * (ipsa_t *)-1).
5987  *
5988  * master_spi is passed in host order.
5989  */
5990 ipsa_t *
5991 sadb_getspi(keysock_in_t *ksi, uint32_t master_spi, int *diagnostic,
5992     netstack_t *ns, uint_t sa_type)
5993 {
5994 	sadb_address_t *src =
5995 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC],
5996 	    *dst = (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
5997 	sadb_spirange_t *range =
5998 	    (sadb_spirange_t *)ksi->ks_in_extv[SADB_EXT_SPIRANGE];
5999 	struct sockaddr_in *ssa, *dsa;
6000 	struct sockaddr_in6 *ssa6, *dsa6;
6001 	uint32_t *srcaddr, *dstaddr;
6002 	sa_family_t af;
6003 	uint32_t add, min, max;
6004 	uint8_t protocol =
6005 	    (sa_type == SADB_SATYPE_AH) ? IPPROTO_AH : IPPROTO_ESP;
6006 
6007 	if (src == NULL) {
6008 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
6009 		return ((ipsa_t *)-1);
6010 	}
6011 	if (dst == NULL) {
6012 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
6013 		return ((ipsa_t *)-1);
6014 	}
6015 	if (range == NULL) {
6016 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_RANGE;
6017 		return ((ipsa_t *)-1);
6018 	}
6019 
6020 	min = ntohl(range->sadb_spirange_min);
6021 	max = ntohl(range->sadb_spirange_max);
6022 	dsa = (struct sockaddr_in *)(dst + 1);
6023 	dsa6 = (struct sockaddr_in6 *)dsa;
6024 
6025 	ssa = (struct sockaddr_in *)(src + 1);
6026 	ssa6 = (struct sockaddr_in6 *)ssa;
6027 	ASSERT(dsa->sin_family == ssa->sin_family);
6028 
6029 	srcaddr = ALL_ZEROES_PTR;
6030 	af = dsa->sin_family;
6031 	switch (af) {
6032 	case AF_INET:
6033 		if (src != NULL)
6034 			srcaddr = (uint32_t *)(&ssa->sin_addr);
6035 		dstaddr = (uint32_t *)(&dsa->sin_addr);
6036 		break;
6037 	case AF_INET6:
6038 		if (src != NULL)
6039 			srcaddr = (uint32_t *)(&ssa6->sin6_addr);
6040 		dstaddr = (uint32_t *)(&dsa6->sin6_addr);
6041 		break;
6042 	default:
6043 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_DST_AF;
6044 		return ((ipsa_t *)-1);
6045 	}
6046 
6047 	if (master_spi < min || master_spi > max) {
6048 		/* Return a random value in the range. */
6049 		if (cl_inet_getspi) {
6050 			cl_inet_getspi(ns->netstack_stackid, protocol,
6051 			    (uint8_t *)&add, sizeof (add), NULL);
6052 		} else {
6053 			(void) random_get_pseudo_bytes((uint8_t *)&add,
6054 			    sizeof (add));
6055 		}
6056 		master_spi = min + (add % (max - min + 1));
6057 	}
6058 
6059 	/*
6060 	 * Since master_spi is passed in host order, we need to htonl() it
6061 	 * for the purposes of creating a new SA.
6062 	 */
6063 	return (sadb_makelarvalassoc(htonl(master_spi), srcaddr, dstaddr, af,
6064 	    ns));
6065 }
6066 
6067 /*
6068  *
6069  * Locate an ACQUIRE and nuke it.  If I have an samsg that's larger than the
6070  * base header, just ignore it.	 Otherwise, lock down the whole ACQUIRE list
6071  * and scan for the sequence number in question.  I may wish to accept an
6072  * address pair with it, for easier searching.
6073  *
6074  * Caller frees the message, so we don't have to here.
6075  *
6076  * NOTE:	The ip_q parameter may be used in the future for ACQUIRE
6077  *		failures.
6078  */
6079 /* ARGSUSED */
6080 void
6081 sadb_in_acquire(sadb_msg_t *samsg, sadbp_t *sp, queue_t *ip_q, netstack_t *ns)
6082 {
6083 	int i;
6084 	ipsacq_t *acqrec;
6085 	iacqf_t *bucket;
6086 
6087 	/*
6088 	 * I only accept the base header for this!
6089 	 * Though to be honest, requiring the dst address would help
6090 	 * immensely.
6091 	 *
6092 	 * XXX	There are already cases where I can get the dst address.
6093 	 */
6094 	if (samsg->sadb_msg_len > SADB_8TO64(sizeof (*samsg)))
6095 		return;
6096 
6097 	/*
6098 	 * Using the samsg->sadb_msg_seq, find the ACQUIRE record, delete it,
6099 	 * (and in the future send a message to IP with the appropriate error
6100 	 * number).
6101 	 *
6102 	 * Q: Do I want to reject if pid != 0?
6103 	 */
6104 
6105 	for (i = 0; i < sp->s_v4.sdb_hashsize; i++) {
6106 		bucket = &sp->s_v4.sdb_acq[i];
6107 		mutex_enter(&bucket->iacqf_lock);
6108 		for (acqrec = bucket->iacqf_ipsacq; acqrec != NULL;
6109 		    acqrec = acqrec->ipsacq_next) {
6110 			if (samsg->sadb_msg_seq == acqrec->ipsacq_seq)
6111 				break;	/* for acqrec... loop. */
6112 		}
6113 		if (acqrec != NULL)
6114 			break;	/* for i = 0... loop. */
6115 
6116 		mutex_exit(&bucket->iacqf_lock);
6117 	}
6118 
6119 	if (acqrec == NULL) {
6120 		for (i = 0; i < sp->s_v6.sdb_hashsize; i++) {
6121 			bucket = &sp->s_v6.sdb_acq[i];
6122 			mutex_enter(&bucket->iacqf_lock);
6123 			for (acqrec = bucket->iacqf_ipsacq; acqrec != NULL;
6124 			    acqrec = acqrec->ipsacq_next) {
6125 				if (samsg->sadb_msg_seq == acqrec->ipsacq_seq)
6126 					break;	/* for acqrec... loop. */
6127 			}
6128 			if (acqrec != NULL)
6129 				break;	/* for i = 0... loop. */
6130 
6131 			mutex_exit(&bucket->iacqf_lock);
6132 		}
6133 	}
6134 
6135 
6136 	if (acqrec == NULL)
6137 		return;
6138 
6139 	/*
6140 	 * What do I do with the errno and IP?	I may need mp's services a
6141 	 * little more.	 See sadb_destroy_acquire() for future directions
6142 	 * beyond free the mblk chain on the acquire record.
6143 	 */
6144 
6145 	ASSERT(&bucket->iacqf_lock == acqrec->ipsacq_linklock);
6146 	sadb_destroy_acquire(acqrec, ns);
6147 	/* Have to exit mutex here, because of breaking out of for loop. */
6148 	mutex_exit(&bucket->iacqf_lock);
6149 }
6150 
6151 /*
6152  * The following functions work with the replay windows of an SA.  They assume
6153  * the ipsa->ipsa_replay_arr is an array of uint64_t, and that the bit vector
6154  * represents the highest sequence number packet received, and back
6155  * (ipsa->ipsa_replay_wsize) packets.
6156  */
6157 
6158 /*
6159  * Is the replay bit set?
6160  */
6161 static boolean_t
6162 ipsa_is_replay_set(ipsa_t *ipsa, uint32_t offset)
6163 {
6164 	uint64_t bit = (uint64_t)1 << (uint64_t)(offset & 63);
6165 
6166 	return ((bit & ipsa->ipsa_replay_arr[offset >> 6]) ? B_TRUE : B_FALSE);
6167 }
6168 
6169 /*
6170  * Shift the bits of the replay window over.
6171  */
6172 static void
6173 ipsa_shift_replay(ipsa_t *ipsa, uint32_t shift)
6174 {
6175 	int i;
6176 	int jump = ((shift - 1) >> 6) + 1;
6177 
6178 	if (shift == 0)
6179 		return;
6180 
6181 	for (i = (ipsa->ipsa_replay_wsize - 1) >> 6; i >= 0; i--) {
6182 		if (i + jump <= (ipsa->ipsa_replay_wsize - 1) >> 6) {
6183 			ipsa->ipsa_replay_arr[i + jump] |=
6184 			    ipsa->ipsa_replay_arr[i] >> (64 - (shift & 63));
6185 		}
6186 		ipsa->ipsa_replay_arr[i] <<= shift;
6187 	}
6188 }
6189 
6190 /*
6191  * Set a bit in the bit vector.
6192  */
6193 static void
6194 ipsa_set_replay(ipsa_t *ipsa, uint32_t offset)
6195 {
6196 	uint64_t bit = (uint64_t)1 << (uint64_t)(offset & 63);
6197 
6198 	ipsa->ipsa_replay_arr[offset >> 6] |= bit;
6199 }
6200 
6201 #define	SADB_MAX_REPLAY_VALUE 0xffffffff
6202 
6203 /*
6204  * Assume caller has NOT done ntohl() already on seq.  Check to see
6205  * if replay sequence number "seq" has been seen already.
6206  */
6207 boolean_t
6208 sadb_replay_check(ipsa_t *ipsa, uint32_t seq)
6209 {
6210 	boolean_t rc;
6211 	uint32_t diff;
6212 
6213 	if (ipsa->ipsa_replay_wsize == 0)
6214 		return (B_TRUE);
6215 
6216 	/*
6217 	 * NOTE:  I've already checked for 0 on the wire in sadb_replay_peek().
6218 	 */
6219 
6220 	/* Convert sequence number into host order before holding the mutex. */
6221 	seq = ntohl(seq);
6222 
6223 	mutex_enter(&ipsa->ipsa_lock);
6224 
6225 	/* Initialize inbound SA's ipsa_replay field to last one received. */
6226 	if (ipsa->ipsa_replay == 0)
6227 		ipsa->ipsa_replay = 1;
6228 
6229 	if (seq > ipsa->ipsa_replay) {
6230 		/*
6231 		 * I have received a new "highest value received".  Shift
6232 		 * the replay window over.
6233 		 */
6234 		diff = seq - ipsa->ipsa_replay;
6235 		if (diff < ipsa->ipsa_replay_wsize) {
6236 			/* In replay window, shift bits over. */
6237 			ipsa_shift_replay(ipsa, diff);
6238 		} else {
6239 			/* WAY FAR AHEAD, clear bits and start again. */
6240 			bzero(ipsa->ipsa_replay_arr,
6241 			    sizeof (ipsa->ipsa_replay_arr));
6242 		}
6243 		ipsa_set_replay(ipsa, 0);
6244 		ipsa->ipsa_replay = seq;
6245 		rc = B_TRUE;
6246 		goto done;
6247 	}
6248 	diff = ipsa->ipsa_replay - seq;
6249 	if (diff >= ipsa->ipsa_replay_wsize || ipsa_is_replay_set(ipsa, diff)) {
6250 		rc = B_FALSE;
6251 		goto done;
6252 	}
6253 	/* Set this packet as seen. */
6254 	ipsa_set_replay(ipsa, diff);
6255 
6256 	rc = B_TRUE;
6257 done:
6258 	mutex_exit(&ipsa->ipsa_lock);
6259 	return (rc);
6260 }
6261 
6262 /*
6263  * "Peek" and see if we should even bother going through the effort of
6264  * running an authentication check on the sequence number passed in.
6265  * this takes into account packets that are below the replay window,
6266  * and collisions with already replayed packets.  Return B_TRUE if it
6267  * is okay to proceed, B_FALSE if this packet should be dropped immediately.
6268  * Assume same byte-ordering as sadb_replay_check.
6269  */
6270 boolean_t
6271 sadb_replay_peek(ipsa_t *ipsa, uint32_t seq)
6272 {
6273 	boolean_t rc = B_FALSE;
6274 	uint32_t diff;
6275 
6276 	if (ipsa->ipsa_replay_wsize == 0)
6277 		return (B_TRUE);
6278 
6279 	/*
6280 	 * 0 is 0, regardless of byte order... :)
6281 	 *
6282 	 * If I get 0 on the wire (and there is a replay window) then the
6283 	 * sender most likely wrapped.	This ipsa may need to be marked or
6284 	 * something.
6285 	 */
6286 	if (seq == 0)
6287 		return (B_FALSE);
6288 
6289 	seq = ntohl(seq);
6290 	mutex_enter(&ipsa->ipsa_lock);
6291 	if (seq < ipsa->ipsa_replay - ipsa->ipsa_replay_wsize &&
6292 	    ipsa->ipsa_replay >= ipsa->ipsa_replay_wsize)
6293 		goto done;
6294 
6295 	/*
6296 	 * If I've hit 0xffffffff, then quite honestly, I don't need to
6297 	 * bother with formalities.  I'm not accepting any more packets
6298 	 * on this SA.
6299 	 */
6300 	if (ipsa->ipsa_replay == SADB_MAX_REPLAY_VALUE) {
6301 		/*
6302 		 * Since we're already holding the lock, update the
6303 		 * expire time ala. sadb_replay_delete() and return.
6304 		 */
6305 		ipsa->ipsa_hardexpiretime = (time_t)1;
6306 		goto done;
6307 	}
6308 
6309 	if (seq <= ipsa->ipsa_replay) {
6310 		/*
6311 		 * This seq is in the replay window.  I'm not below it,
6312 		 * because I already checked for that above!
6313 		 */
6314 		diff = ipsa->ipsa_replay - seq;
6315 		if (ipsa_is_replay_set(ipsa, diff))
6316 			goto done;
6317 	}
6318 	/* Else return B_TRUE, I'm going to advance the window. */
6319 
6320 	rc = B_TRUE;
6321 done:
6322 	mutex_exit(&ipsa->ipsa_lock);
6323 	return (rc);
6324 }
6325 
6326 /*
6327  * Delete a single SA.
6328  *
6329  * For now, use the quick-and-dirty trick of making the association's
6330  * hard-expire lifetime (time_t)1, ensuring deletion by the *_ager().
6331  */
6332 void
6333 sadb_replay_delete(ipsa_t *assoc)
6334 {
6335 	mutex_enter(&assoc->ipsa_lock);
6336 	assoc->ipsa_hardexpiretime = (time_t)1;
6337 	mutex_exit(&assoc->ipsa_lock);
6338 }
6339 
6340 /*
6341  * Given a queue that presumably points to IP, send a T_BIND_REQ for _proto_
6342  * down.  The caller will handle the T_BIND_ACK locally.
6343  */
6344 boolean_t
6345 sadb_t_bind_req(queue_t *q, int proto)
6346 {
6347 	struct T_bind_req *tbr;
6348 	mblk_t *mp;
6349 
6350 	mp = allocb_cred(sizeof (struct T_bind_req) + 1, kcred, NOPID);
6351 	if (mp == NULL) {
6352 		/* cmn_err(CE_WARN, */
6353 		/* "sadb_t_bind_req(%d): couldn't allocate mblk\n", proto); */
6354 		return (B_FALSE);
6355 	}
6356 	mp->b_datap->db_type = M_PCPROTO;
6357 	tbr = (struct T_bind_req *)mp->b_rptr;
6358 	mp->b_wptr += sizeof (struct T_bind_req);
6359 	tbr->PRIM_type = T_BIND_REQ;
6360 	tbr->ADDR_length = 0;
6361 	tbr->ADDR_offset = 0;
6362 	tbr->CONIND_number = 0;
6363 	*mp->b_wptr = (uint8_t)proto;
6364 	mp->b_wptr++;
6365 
6366 	putnext(q, mp);
6367 	return (B_TRUE);
6368 }
6369 
6370 /*
6371  * Special front-end to ipsec_rl_strlog() dealing with SA failure.
6372  * this is designed to take only a format string with "* %x * %s *", so
6373  * that "spi" is printed first, then "addr" is converted using inet_pton().
6374  *
6375  * This is abstracted out to save the stack space for only when inet_pton()
6376  * is called.  Make sure "spi" is in network order; it usually is when this
6377  * would get called.
6378  */
6379 void
6380 ipsec_assocfailure(short mid, short sid, char level, ushort_t sl, char *fmt,
6381     uint32_t spi, void *addr, int af, netstack_t *ns)
6382 {
6383 	char buf[INET6_ADDRSTRLEN];
6384 
6385 	ASSERT(af == AF_INET6 || af == AF_INET);
6386 
6387 	ipsec_rl_strlog(ns, mid, sid, level, sl, fmt, ntohl(spi),
6388 	    inet_ntop(af, addr, buf, sizeof (buf)));
6389 }
6390 
6391 /*
6392  * Fills in a reference to the policy, if any, from the conn, in *ppp
6393  * Releases a reference to the passed conn_t.
6394  */
6395 static void
6396 ipsec_conn_pol(ipsec_selector_t *sel, conn_t *connp, ipsec_policy_t **ppp)
6397 {
6398 	ipsec_policy_t	*pp;
6399 	ipsec_latch_t	*ipl = connp->conn_latch;
6400 
6401 	if ((ipl != NULL) && (ipl->ipl_out_policy != NULL)) {
6402 		pp = ipl->ipl_out_policy;
6403 		IPPOL_REFHOLD(pp);
6404 	} else {
6405 		pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, sel,
6406 		    connp->conn_netstack);
6407 	}
6408 	*ppp = pp;
6409 	CONN_DEC_REF(connp);
6410 }
6411 
6412 /*
6413  * The following functions scan through active conn_t structures
6414  * and return a reference to the best-matching policy it can find.
6415  * Caller must release the reference.
6416  */
6417 static void
6418 ipsec_udp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst)
6419 {
6420 	connf_t *connfp;
6421 	conn_t *connp = NULL;
6422 	ipsec_selector_t portonly;
6423 
6424 	bzero((void *)&portonly, sizeof (portonly));
6425 
6426 	if (sel->ips_local_port == 0)
6427 		return;
6428 
6429 	connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(sel->ips_local_port,
6430 	    ipst)];
6431 	mutex_enter(&connfp->connf_lock);
6432 
6433 	if (sel->ips_isv4) {
6434 		connp = connfp->connf_head;
6435 		while (connp != NULL) {
6436 			if (IPCL_UDP_MATCH(connp, sel->ips_local_port,
6437 			    sel->ips_local_addr_v4, sel->ips_remote_port,
6438 			    sel->ips_remote_addr_v4))
6439 				break;
6440 			connp = connp->conn_next;
6441 		}
6442 
6443 		if (connp == NULL) {
6444 			/* Try port-only match in IPv6. */
6445 			portonly.ips_local_port = sel->ips_local_port;
6446 			sel = &portonly;
6447 		}
6448 	}
6449 
6450 	if (connp == NULL) {
6451 		connp = connfp->connf_head;
6452 		while (connp != NULL) {
6453 			if (IPCL_UDP_MATCH_V6(connp, sel->ips_local_port,
6454 			    sel->ips_local_addr_v6, sel->ips_remote_port,
6455 			    sel->ips_remote_addr_v6))
6456 				break;
6457 			connp = connp->conn_next;
6458 		}
6459 
6460 		if (connp == NULL) {
6461 			mutex_exit(&connfp->connf_lock);
6462 			return;
6463 		}
6464 	}
6465 
6466 	CONN_INC_REF(connp);
6467 	mutex_exit(&connfp->connf_lock);
6468 
6469 	ipsec_conn_pol(sel, connp, ppp);
6470 }
6471 
6472 static conn_t *
6473 ipsec_find_listen_conn(uint16_t *pptr, ipsec_selector_t *sel, ip_stack_t *ipst)
6474 {
6475 	connf_t *connfp;
6476 	conn_t *connp = NULL;
6477 	const in6_addr_t *v6addrmatch = &sel->ips_local_addr_v6;
6478 
6479 	if (sel->ips_local_port == 0)
6480 		return (NULL);
6481 
6482 	connfp = &ipst->ips_ipcl_bind_fanout[
6483 	    IPCL_BIND_HASH(sel->ips_local_port, ipst)];
6484 	mutex_enter(&connfp->connf_lock);
6485 
6486 	if (sel->ips_isv4) {
6487 		connp = connfp->connf_head;
6488 		while (connp != NULL) {
6489 			if (IPCL_BIND_MATCH(connp, IPPROTO_TCP,
6490 			    sel->ips_local_addr_v4, pptr[1]))
6491 				break;
6492 			connp = connp->conn_next;
6493 		}
6494 
6495 		if (connp == NULL) {
6496 			/* Match to all-zeroes. */
6497 			v6addrmatch = &ipv6_all_zeros;
6498 		}
6499 	}
6500 
6501 	if (connp == NULL) {
6502 		connp = connfp->connf_head;
6503 		while (connp != NULL) {
6504 			if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP,
6505 			    *v6addrmatch, pptr[1]))
6506 				break;
6507 			connp = connp->conn_next;
6508 		}
6509 
6510 		if (connp == NULL) {
6511 			mutex_exit(&connfp->connf_lock);
6512 			return (NULL);
6513 		}
6514 	}
6515 
6516 	CONN_INC_REF(connp);
6517 	mutex_exit(&connfp->connf_lock);
6518 	return (connp);
6519 }
6520 
6521 static void
6522 ipsec_tcp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst)
6523 {
6524 	connf_t 	*connfp;
6525 	conn_t		*connp;
6526 	uint32_t	ports;
6527 	uint16_t	*pptr = (uint16_t *)&ports;
6528 
6529 	/*
6530 	 * Find TCP state in the following order:
6531 	 * 1.) Connected conns.
6532 	 * 2.) Listeners.
6533 	 *
6534 	 * Even though #2 will be the common case for inbound traffic, only
6535 	 * following this order insures correctness.
6536 	 */
6537 
6538 	if (sel->ips_local_port == 0)
6539 		return;
6540 
6541 	/*
6542 	 * 0 should be fport, 1 should be lport.  SRC is the local one here.
6543 	 * See ipsec_construct_inverse_acquire() for details.
6544 	 */
6545 	pptr[0] = sel->ips_remote_port;
6546 	pptr[1] = sel->ips_local_port;
6547 
6548 	connfp = &ipst->ips_ipcl_conn_fanout[
6549 	    IPCL_CONN_HASH(sel->ips_remote_addr_v4, ports, ipst)];
6550 	mutex_enter(&connfp->connf_lock);
6551 	connp = connfp->connf_head;
6552 
6553 	if (sel->ips_isv4) {
6554 		while (connp != NULL) {
6555 			if (IPCL_CONN_MATCH(connp, IPPROTO_TCP,
6556 			    sel->ips_remote_addr_v4, sel->ips_local_addr_v4,
6557 			    ports))
6558 				break;
6559 			connp = connp->conn_next;
6560 		}
6561 	} else {
6562 		while (connp != NULL) {
6563 			if (IPCL_CONN_MATCH_V6(connp, IPPROTO_TCP,
6564 			    sel->ips_remote_addr_v6, sel->ips_local_addr_v6,
6565 			    ports))
6566 				break;
6567 			connp = connp->conn_next;
6568 		}
6569 	}
6570 
6571 	if (connp != NULL) {
6572 		CONN_INC_REF(connp);
6573 		mutex_exit(&connfp->connf_lock);
6574 	} else {
6575 		mutex_exit(&connfp->connf_lock);
6576 
6577 		/* Try the listen hash. */
6578 		if ((connp = ipsec_find_listen_conn(pptr, sel, ipst)) == NULL)
6579 			return;
6580 	}
6581 
6582 	ipsec_conn_pol(sel, connp, ppp);
6583 }
6584 
6585 static void
6586 ipsec_sctp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6587     ip_stack_t *ipst)
6588 {
6589 	conn_t		*connp;
6590 	uint32_t	ports;
6591 	uint16_t	*pptr = (uint16_t *)&ports;
6592 
6593 	/*
6594 	 * Find SCP state in the following order:
6595 	 * 1.) Connected conns.
6596 	 * 2.) Listeners.
6597 	 *
6598 	 * Even though #2 will be the common case for inbound traffic, only
6599 	 * following this order insures correctness.
6600 	 */
6601 
6602 	if (sel->ips_local_port == 0)
6603 		return;
6604 
6605 	/*
6606 	 * 0 should be fport, 1 should be lport.  SRC is the local one here.
6607 	 * See ipsec_construct_inverse_acquire() for details.
6608 	 */
6609 	pptr[0] = sel->ips_remote_port;
6610 	pptr[1] = sel->ips_local_port;
6611 
6612 	if (sel->ips_isv4) {
6613 		in6_addr_t	src, dst;
6614 
6615 		IN6_IPADDR_TO_V4MAPPED(sel->ips_remote_addr_v4, &dst);
6616 		IN6_IPADDR_TO_V4MAPPED(sel->ips_local_addr_v4, &src);
6617 		connp = sctp_find_conn(&dst, &src, ports, ALL_ZONES,
6618 		    ipst->ips_netstack->netstack_sctp);
6619 	} else {
6620 		connp = sctp_find_conn(&sel->ips_remote_addr_v6,
6621 		    &sel->ips_local_addr_v6, ports, ALL_ZONES,
6622 		    ipst->ips_netstack->netstack_sctp);
6623 	}
6624 	if (connp == NULL)
6625 		return;
6626 	ipsec_conn_pol(sel, connp, ppp);
6627 }
6628 
6629 /*
6630  * Fill in a query for the SPD (in "sel") using two PF_KEY address extensions.
6631  * Returns 0 or errno, and always sets *diagnostic to something appropriate
6632  * to PF_KEY.
6633  *
6634  * NOTE:  For right now, this function (and ipsec_selector_t for that matter),
6635  * ignore prefix lengths in the address extension.  Since we match on first-
6636  * entered policies, this shouldn't matter.  Also, since we normalize prefix-
6637  * set addresses to mask out the lower bits, we should get a suitable search
6638  * key for the SPD anyway.  This is the function to change if the assumption
6639  * about suitable search keys is wrong.
6640  */
6641 static int
6642 ipsec_get_inverse_acquire_sel(ipsec_selector_t *sel, sadb_address_t *srcext,
6643     sadb_address_t *dstext, int *diagnostic)
6644 {
6645 	struct sockaddr_in *src, *dst;
6646 	struct sockaddr_in6 *src6, *dst6;
6647 
6648 	*diagnostic = 0;
6649 
6650 	bzero(sel, sizeof (*sel));
6651 	sel->ips_protocol = srcext->sadb_address_proto;
6652 	dst = (struct sockaddr_in *)(dstext + 1);
6653 	if (dst->sin_family == AF_INET6) {
6654 		dst6 = (struct sockaddr_in6 *)dst;
6655 		src6 = (struct sockaddr_in6 *)(srcext + 1);
6656 		if (src6->sin6_family != AF_INET6) {
6657 			*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6658 			return (EINVAL);
6659 		}
6660 		sel->ips_remote_addr_v6 = dst6->sin6_addr;
6661 		sel->ips_local_addr_v6 = src6->sin6_addr;
6662 		if (sel->ips_protocol == IPPROTO_ICMPV6) {
6663 			sel->ips_is_icmp_inv_acq = 1;
6664 		} else {
6665 			sel->ips_remote_port = dst6->sin6_port;
6666 			sel->ips_local_port = src6->sin6_port;
6667 		}
6668 		sel->ips_isv4 = B_FALSE;
6669 	} else {
6670 		src = (struct sockaddr_in *)(srcext + 1);
6671 		if (src->sin_family != AF_INET) {
6672 			*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6673 			return (EINVAL);
6674 		}
6675 		sel->ips_remote_addr_v4 = dst->sin_addr.s_addr;
6676 		sel->ips_local_addr_v4 = src->sin_addr.s_addr;
6677 		if (sel->ips_protocol == IPPROTO_ICMP) {
6678 			sel->ips_is_icmp_inv_acq = 1;
6679 		} else {
6680 			sel->ips_remote_port = dst->sin_port;
6681 			sel->ips_local_port = src->sin_port;
6682 		}
6683 		sel->ips_isv4 = B_TRUE;
6684 	}
6685 	return (0);
6686 }
6687 
6688 /*
6689  * We have encapsulation.
6690  * - Lookup tun_t by address and look for an associated
6691  *   tunnel policy
6692  * - If there are inner selectors
6693  *   - check ITPF_P_TUNNEL and ITPF_P_ACTIVE
6694  *   - Look up tunnel policy based on selectors
6695  * - Else
6696  *   - Sanity check the negotation
6697  *   - If appropriate, fall through to global policy
6698  */
6699 static int
6700 ipsec_tun_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6701     sadb_address_t *innsrcext, sadb_address_t *inndstext, ipsec_tun_pol_t *itp,
6702     int *diagnostic, netstack_t *ns)
6703 {
6704 	int err;
6705 	ipsec_policy_head_t *polhead;
6706 
6707 	*diagnostic = 0;
6708 
6709 	/* Check for inner selectors and act appropriately */
6710 
6711 	if (innsrcext != NULL) {
6712 		/* Inner selectors present */
6713 		ASSERT(inndstext != NULL);
6714 		if ((itp == NULL) ||
6715 		    (itp->itp_flags & (ITPF_P_ACTIVE | ITPF_P_TUNNEL)) !=
6716 		    (ITPF_P_ACTIVE | ITPF_P_TUNNEL)) {
6717 			/*
6718 			 * If inner packet selectors, we must have negotiate
6719 			 * tunnel and active policy.  If the tunnel has
6720 			 * transport-mode policy set on it, or has no policy,
6721 			 * fail.
6722 			 */
6723 			return (ENOENT);
6724 		} else {
6725 			/*
6726 			 * Reset "sel" to indicate inner selectors.  Pass
6727 			 * inner PF_KEY address extensions for this to happen.
6728 			 */
6729 			if ((err = ipsec_get_inverse_acquire_sel(sel,
6730 			    innsrcext, inndstext, diagnostic)) != 0)
6731 				return (err);
6732 			/*
6733 			 * Now look for a tunnel policy based on those inner
6734 			 * selectors.  (Common code is below.)
6735 			 */
6736 		}
6737 	} else {
6738 		/* No inner selectors present */
6739 		if ((itp == NULL) || !(itp->itp_flags & ITPF_P_ACTIVE)) {
6740 			/*
6741 			 * Transport mode negotiation with no tunnel policy
6742 			 * configured - return to indicate a global policy
6743 			 * check is needed.
6744 			 */
6745 			return (0);
6746 		} else if (itp->itp_flags & ITPF_P_TUNNEL) {
6747 			/* Tunnel mode set with no inner selectors. */
6748 			return (ENOENT);
6749 		}
6750 		/*
6751 		 * Else, this is a tunnel policy configured with ifconfig(1m)
6752 		 * or "negotiate transport" with ipsecconf(1m).  We have an
6753 		 * itp with policy set based on any match, so don't bother
6754 		 * changing fields in "sel".
6755 		 */
6756 	}
6757 
6758 	ASSERT(itp != NULL);
6759 	polhead = itp->itp_policy;
6760 	ASSERT(polhead != NULL);
6761 	rw_enter(&polhead->iph_lock, RW_READER);
6762 	*ppp = ipsec_find_policy_head(NULL, polhead,
6763 	    IPSEC_TYPE_INBOUND, sel, ns);
6764 	rw_exit(&polhead->iph_lock);
6765 
6766 	/*
6767 	 * Don't default to global if we didn't find a matching policy entry.
6768 	 * Instead, send ENOENT, just like if we hit a transport-mode tunnel.
6769 	 */
6770 	if (*ppp == NULL)
6771 		return (ENOENT);
6772 
6773 	return (0);
6774 }
6775 
6776 static void
6777 ipsec_oth_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6778     ip_stack_t *ipst)
6779 {
6780 	boolean_t	isv4 = sel->ips_isv4;
6781 	connf_t		*connfp;
6782 	conn_t		*connp;
6783 
6784 	if (isv4) {
6785 		connfp = &ipst->ips_ipcl_proto_fanout[sel->ips_protocol];
6786 	} else {
6787 		connfp = &ipst->ips_ipcl_proto_fanout_v6[sel->ips_protocol];
6788 	}
6789 
6790 	mutex_enter(&connfp->connf_lock);
6791 	for (connp = connfp->connf_head; connp != NULL;
6792 	    connp = connp->conn_next) {
6793 		if (!((isv4 && !((connp->conn_src == 0 ||
6794 		    connp->conn_src == sel->ips_local_addr_v4) &&
6795 		    (connp->conn_rem == 0 ||
6796 		    connp->conn_rem == sel->ips_remote_addr_v4))) ||
6797 		    (!isv4 && !((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) ||
6798 		    IN6_ARE_ADDR_EQUAL(&connp->conn_srcv6,
6799 		    &sel->ips_local_addr_v6)) &&
6800 		    (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6) ||
6801 		    IN6_ARE_ADDR_EQUAL(&connp->conn_remv6,
6802 		    &sel->ips_remote_addr_v6)))))) {
6803 			break;
6804 		}
6805 	}
6806 	if (connp == NULL) {
6807 		mutex_exit(&connfp->connf_lock);
6808 		return;
6809 	}
6810 
6811 	CONN_INC_REF(connp);
6812 	mutex_exit(&connfp->connf_lock);
6813 
6814 	ipsec_conn_pol(sel, connp, ppp);
6815 }
6816 
6817 /*
6818  * Construct an inverse ACQUIRE reply based on:
6819  *
6820  * 1.) Current global policy.
6821  * 2.) An conn_t match depending on what all was passed in the extv[].
6822  * 3.) A tunnel's policy head.
6823  * ...
6824  * N.) Other stuff TBD (e.g. identities)
6825  *
6826  * If there is an error, set sadb_msg_errno and sadb_x_msg_diagnostic
6827  * in this function so the caller can extract them where appropriately.
6828  *
6829  * The SRC address is the local one - just like an outbound ACQUIRE message.
6830  */
6831 mblk_t *
6832 ipsec_construct_inverse_acquire(sadb_msg_t *samsg, sadb_ext_t *extv[],
6833     netstack_t *ns)
6834 {
6835 	int err;
6836 	int diagnostic;
6837 	sadb_address_t *srcext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_SRC],
6838 	    *dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST],
6839 	    *innsrcext = (sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_SRC],
6840 	    *inndstext = (sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_DST];
6841 	struct sockaddr_in6 *src, *dst;
6842 	struct sockaddr_in6 *isrc, *idst;
6843 	ipsec_tun_pol_t *itp = NULL;
6844 	ipsec_policy_t *pp = NULL;
6845 	ipsec_selector_t sel, isel;
6846 	mblk_t *retmp = NULL;
6847 	ip_stack_t	*ipst = ns->netstack_ip;
6848 
6849 	/* Normalize addresses */
6850 	if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)srcext, 0, ns)
6851 	    == KS_IN_ADDR_UNKNOWN) {
6852 		err = EINVAL;
6853 		diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC;
6854 		goto bail;
6855 	}
6856 	src = (struct sockaddr_in6 *)(srcext + 1);
6857 	if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)dstext, 0, ns)
6858 	    == KS_IN_ADDR_UNKNOWN) {
6859 		err = EINVAL;
6860 		diagnostic = SADB_X_DIAGNOSTIC_BAD_DST;
6861 		goto bail;
6862 	}
6863 	dst = (struct sockaddr_in6 *)(dstext + 1);
6864 	if (src->sin6_family != dst->sin6_family) {
6865 		err = EINVAL;
6866 		diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6867 		goto bail;
6868 	}
6869 
6870 	/* Check for tunnel mode and act appropriately */
6871 	if (innsrcext != NULL) {
6872 		if (inndstext == NULL) {
6873 			err = EINVAL;
6874 			diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST;
6875 			goto bail;
6876 		}
6877 		if (sadb_addrcheck(NULL, (mblk_t *)samsg,
6878 		    (sadb_ext_t *)innsrcext, 0, ns) == KS_IN_ADDR_UNKNOWN) {
6879 			err = EINVAL;
6880 			diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_SRC;
6881 			goto bail;
6882 		}
6883 		isrc = (struct sockaddr_in6 *)(innsrcext + 1);
6884 		if (sadb_addrcheck(NULL, (mblk_t *)samsg,
6885 		    (sadb_ext_t *)inndstext, 0, ns) == KS_IN_ADDR_UNKNOWN) {
6886 			err = EINVAL;
6887 			diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_DST;
6888 			goto bail;
6889 		}
6890 		idst = (struct sockaddr_in6 *)(inndstext + 1);
6891 		if (isrc->sin6_family != idst->sin6_family) {
6892 			err = EINVAL;
6893 			diagnostic = SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH;
6894 			goto bail;
6895 		}
6896 		if (isrc->sin6_family != AF_INET &&
6897 		    isrc->sin6_family != AF_INET6) {
6898 			err = EINVAL;
6899 			diagnostic = SADB_X_DIAGNOSTIC_BAD_INNER_SRC_AF;
6900 			goto bail;
6901 		}
6902 	} else if (inndstext != NULL) {
6903 		err = EINVAL;
6904 		diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
6905 		goto bail;
6906 	}
6907 
6908 	/* Get selectors first, based on outer addresses */
6909 	err = ipsec_get_inverse_acquire_sel(&sel, srcext, dstext, &diagnostic);
6910 	if (err != 0)
6911 		goto bail;
6912 
6913 	/* Check for tunnel mode mismatches. */
6914 	if (innsrcext != NULL &&
6915 	    ((isrc->sin6_family == AF_INET &&
6916 	    sel.ips_protocol != IPPROTO_ENCAP && sel.ips_protocol != 0) ||
6917 	    (isrc->sin6_family == AF_INET6 &&
6918 	    sel.ips_protocol != IPPROTO_IPV6 && sel.ips_protocol != 0))) {
6919 		err = EPROTOTYPE;
6920 		goto bail;
6921 	}
6922 
6923 	/*
6924 	 * Okay, we have the addresses and other selector information.
6925 	 * Let's first find a conn...
6926 	 */
6927 	pp = NULL;
6928 	switch (sel.ips_protocol) {
6929 	case IPPROTO_TCP:
6930 		ipsec_tcp_pol(&sel, &pp, ipst);
6931 		break;
6932 	case IPPROTO_UDP:
6933 		ipsec_udp_pol(&sel, &pp, ipst);
6934 		break;
6935 	case IPPROTO_SCTP:
6936 		ipsec_sctp_pol(&sel, &pp, ipst);
6937 		break;
6938 	case IPPROTO_ENCAP:
6939 	case IPPROTO_IPV6:
6940 		/*
6941 		 * Assume sel.ips_remote_addr_* has the right address at
6942 		 * that exact position.
6943 		 */
6944 		itp = itp_get_byaddr((uint32_t *)(&sel.ips_local_addr_v6),
6945 		    (uint32_t *)(&sel.ips_remote_addr_v6), src->sin6_family,
6946 		    ipst);
6947 
6948 		if (innsrcext == NULL) {
6949 			/*
6950 			 * Transport-mode tunnel, make sure we fake out isel
6951 			 * to contain something based on the outer protocol.
6952 			 */
6953 			bzero(&isel, sizeof (isel));
6954 			isel.ips_isv4 = (sel.ips_protocol == IPPROTO_ENCAP);
6955 		} /* Else isel is initialized by ipsec_tun_pol(). */
6956 		err = ipsec_tun_pol(&isel, &pp, innsrcext, inndstext, itp,
6957 		    &diagnostic, ns);
6958 		/*
6959 		 * NOTE:  isel isn't used for now, but in RFC 430x IPsec, it
6960 		 * may be.
6961 		 */
6962 		if (err != 0)
6963 			goto bail;
6964 		break;
6965 	default:
6966 		ipsec_oth_pol(&sel, &pp, ipst);
6967 		break;
6968 	}
6969 
6970 	/*
6971 	 * If we didn't find a matching conn_t or other policy head, take a
6972 	 * look in the global policy.
6973 	 */
6974 	if (pp == NULL) {
6975 		pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, NULL, NULL, &sel,
6976 		    ns);
6977 		if (pp == NULL) {
6978 			/* There's no global policy. */
6979 			err = ENOENT;
6980 			diagnostic = 0;
6981 			goto bail;
6982 		}
6983 	}
6984 
6985 	/*
6986 	 * Now that we have a policy entry/widget, construct an ACQUIRE
6987 	 * message based on that, fix fields where appropriate,
6988 	 * and return the message.
6989 	 */
6990 	retmp = sadb_extended_acquire(&sel, pp, NULL,
6991 	    (itp != NULL && (itp->itp_flags & ITPF_P_TUNNEL)),
6992 	    samsg->sadb_msg_seq, samsg->sadb_msg_pid, ns);
6993 	if (pp != NULL) {
6994 		IPPOL_REFRELE(pp, ns);
6995 	}
6996 	ASSERT(err == 0 && diagnostic == 0);
6997 	if (retmp == NULL)
6998 		err = ENOMEM;
6999 bail:
7000 	if (itp != NULL) {
7001 		ITP_REFRELE(itp, ns);
7002 	}
7003 	samsg->sadb_msg_errno = (uint8_t)err;
7004 	samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
7005 	return (retmp);
7006 }
7007 
7008 /*
7009  * ipsa_lpkt is a one-element queue, only manipulated by the next two
7010  * functions.  They have to hold the ipsa_lock because of potential races
7011  * between key management using SADB_UPDATE, and inbound packets that may
7012  * queue up on the larval SA (hence the 'l' in "lpkt").
7013  */
7014 
7015 /*
7016  * sadb_set_lpkt: Return TRUE if we can swap in a value to ipsa->ipsa_lpkt and
7017  * freemsg the previous value.  Return FALSE if we lost the race and the SA is
7018  * in a non-LARVAL state.  free clue: ip_drop_packet(NULL) is safe.
7019  */
7020 boolean_t
7021 sadb_set_lpkt(ipsa_t *ipsa, mblk_t *npkt, netstack_t *ns)
7022 {
7023 	mblk_t *opkt;
7024 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
7025 	boolean_t is_larval;
7026 
7027 	/*
7028 	 * Check the packet's netstack id in case we go asynch with a
7029 	 * taskq_dispatch.
7030 	 */
7031 	ASSERT(((ipsec_in_t *)npkt->b_rptr)->ipsec_in_type == IPSEC_IN);
7032 	ASSERT(((ipsec_in_t *)npkt->b_rptr)->ipsec_in_stackid ==
7033 	    ns->netstack_stackid);
7034 
7035 	mutex_enter(&ipsa->ipsa_lock);
7036 	is_larval = (ipsa->ipsa_state == IPSA_STATE_LARVAL);
7037 	if (is_larval) {
7038 		opkt = ipsa->ipsa_lpkt;
7039 		ipsa->ipsa_lpkt = npkt;
7040 	} else {
7041 		/* We lost the race. */
7042 		opkt = NULL;
7043 	}
7044 	mutex_exit(&ipsa->ipsa_lock);
7045 
7046 	ip_drop_packet(opkt, B_TRUE, NULL, NULL,
7047 	    DROPPER(ipss, ipds_sadb_inlarval_replace),
7048 	    &ipss->ipsec_sadb_dropper);
7049 	return (is_larval);
7050 }
7051 
7052 /*
7053  * sadb_clear_lpkt: Atomically clear ipsa->ipsa_lpkt and return the
7054  * previous value.
7055  */
7056 mblk_t *
7057 sadb_clear_lpkt(ipsa_t *ipsa)
7058 {
7059 	mblk_t *opkt;
7060 
7061 	mutex_enter(&ipsa->ipsa_lock);
7062 	opkt = ipsa->ipsa_lpkt;
7063 	ipsa->ipsa_lpkt = NULL;
7064 	mutex_exit(&ipsa->ipsa_lock);
7065 
7066 	return (opkt);
7067 }
7068 
7069 /*
7070  * Buffer a packet that's in IDLE state as set by Solaris Clustering.
7071  */
7072 void
7073 sadb_buf_pkt(ipsa_t *ipsa, mblk_t *bpkt, netstack_t *ns)
7074 {
7075 	ipsec_stack_t   *ipss = ns->netstack_ipsec;
7076 	extern void (*cl_inet_idlesa)(netstackid_t, uint8_t, uint32_t,
7077 	    sa_family_t, in6_addr_t, in6_addr_t, void *);
7078 	in6_addr_t *srcaddr = (in6_addr_t *)(&ipsa->ipsa_srcaddr);
7079 	in6_addr_t *dstaddr = (in6_addr_t *)(&ipsa->ipsa_dstaddr);
7080 
7081 	ASSERT(ipsa->ipsa_state == IPSA_STATE_IDLE);
7082 
7083 	if (cl_inet_idlesa == NULL) {
7084 		ip_drop_packet(bpkt, B_TRUE, NULL, NULL,
7085 		    DROPPER(ipss, ipds_sadb_inidle_overflow),
7086 		    &ipss->ipsec_sadb_dropper);
7087 		return;
7088 	}
7089 
7090 	cl_inet_idlesa(ns->netstack_stackid,
7091 	    (ipsa->ipsa_type == SADB_SATYPE_AH) ? IPPROTO_AH : IPPROTO_ESP,
7092 	    ipsa->ipsa_spi, ipsa->ipsa_addrfam, *srcaddr, *dstaddr, NULL);
7093 
7094 	/*
7095 	 * Check the packet's netstack id in case we go asynch with a
7096 	 * taskq_dispatch.
7097 	 */
7098 	ASSERT(((ipsec_in_t *)bpkt->b_rptr)->ipsec_in_type == IPSEC_IN);
7099 	ASSERT(((ipsec_in_t *)bpkt->b_rptr)->ipsec_in_stackid ==
7100 	    ns->netstack_stackid);
7101 
7102 	mutex_enter(&ipsa->ipsa_lock);
7103 	ipsa->ipsa_mblkcnt++;
7104 	if (ipsa->ipsa_bpkt_head == NULL) {
7105 		ipsa->ipsa_bpkt_head = ipsa->ipsa_bpkt_tail = bpkt;
7106 	} else {
7107 		ipsa->ipsa_bpkt_tail->b_next = bpkt;
7108 		ipsa->ipsa_bpkt_tail = bpkt;
7109 		if (ipsa->ipsa_mblkcnt > SADB_MAX_IDLEPKTS) {
7110 			mblk_t *tmp;
7111 			tmp = ipsa->ipsa_bpkt_head;
7112 			ipsa->ipsa_bpkt_head = ipsa->ipsa_bpkt_head->b_next;
7113 			ip_drop_packet(tmp, B_TRUE, NULL, NULL,
7114 			    DROPPER(ipss, ipds_sadb_inidle_overflow),
7115 			    &ipss->ipsec_sadb_dropper);
7116 			ipsa->ipsa_mblkcnt --;
7117 		}
7118 	}
7119 	mutex_exit(&ipsa->ipsa_lock);
7120 
7121 }
7122 
7123 /*
7124  * Stub function that taskq_dispatch() invokes to take the mblk (in arg)
7125  * and put into STREAMS again.
7126  */
7127 void
7128 sadb_clear_buf_pkt(void *ipkt)
7129 {
7130 	mblk_t	*tmp, *buf_pkt;
7131 	netstack_t *ns;
7132 	ipsec_in_t *ii;
7133 
7134 	buf_pkt = (mblk_t *)ipkt;
7135 
7136 	ii = (ipsec_in_t *)buf_pkt->b_rptr;
7137 	ASSERT(ii->ipsec_in_type == IPSEC_IN);
7138 	ns = netstack_find_by_stackid(ii->ipsec_in_stackid);
7139 	if (ns != NULL && ns != ii->ipsec_in_ns) {
7140 		netstack_rele(ns);
7141 		ns = NULL;  /* For while-loop below. */
7142 	}
7143 
7144 	while (buf_pkt != NULL) {
7145 		tmp = buf_pkt->b_next;
7146 		buf_pkt->b_next = NULL;
7147 		if (ns != NULL)
7148 			ip_fanout_proto_again(buf_pkt, NULL, NULL, NULL);
7149 		else
7150 			freemsg(buf_pkt);
7151 		buf_pkt = tmp;
7152 	}
7153 	if (ns != NULL)
7154 		netstack_rele(ns);
7155 }
7156 /*
7157  * Walker callback used by sadb_alg_update() to free/create crypto
7158  * context template when a crypto software provider is removed or
7159  * added.
7160  */
7161 
7162 struct sadb_update_alg_state {
7163 	ipsec_algtype_t alg_type;
7164 	uint8_t alg_id;
7165 	boolean_t is_added;
7166 };
7167 
7168 static void
7169 sadb_alg_update_cb(isaf_t *head, ipsa_t *entry, void *cookie)
7170 {
7171 	struct sadb_update_alg_state *update_state =
7172 	    (struct sadb_update_alg_state *)cookie;
7173 	crypto_ctx_template_t *ctx_tmpl = NULL;
7174 
7175 	ASSERT(MUTEX_HELD(&head->isaf_lock));
7176 
7177 	if (entry->ipsa_state == IPSA_STATE_LARVAL)
7178 		return;
7179 
7180 	mutex_enter(&entry->ipsa_lock);
7181 
7182 	switch (update_state->alg_type) {
7183 	case IPSEC_ALG_AUTH:
7184 		if (entry->ipsa_auth_alg == update_state->alg_id)
7185 			ctx_tmpl = &entry->ipsa_authtmpl;
7186 		break;
7187 	case IPSEC_ALG_ENCR:
7188 		if (entry->ipsa_encr_alg == update_state->alg_id)
7189 			ctx_tmpl = &entry->ipsa_encrtmpl;
7190 		break;
7191 	default:
7192 		ctx_tmpl = NULL;
7193 	}
7194 
7195 	if (ctx_tmpl == NULL) {
7196 		mutex_exit(&entry->ipsa_lock);
7197 		return;
7198 	}
7199 
7200 	/*
7201 	 * The context template of the SA may be affected by the change
7202 	 * of crypto provider.
7203 	 */
7204 	if (update_state->is_added) {
7205 		/* create the context template if not already done */
7206 		if (*ctx_tmpl == NULL) {
7207 			(void) ipsec_create_ctx_tmpl(entry,
7208 			    update_state->alg_type);
7209 		}
7210 	} else {
7211 		/*
7212 		 * The crypto provider was removed. If the context template
7213 		 * exists but it is no longer valid, free it.
7214 		 */
7215 		if (*ctx_tmpl != NULL)
7216 			ipsec_destroy_ctx_tmpl(entry, update_state->alg_type);
7217 	}
7218 
7219 	mutex_exit(&entry->ipsa_lock);
7220 }
7221 
7222 /*
7223  * Invoked by IP when an software crypto provider has been updated.
7224  * The type and id of the corresponding algorithm is passed as argument.
7225  * is_added is B_TRUE if the provider was added, B_FALSE if it was
7226  * removed. The function updates the SADB and free/creates the
7227  * context templates associated with SAs if needed.
7228  */
7229 
7230 #define	SADB_ALG_UPDATE_WALK(sadb, table) \
7231     sadb_walker((sadb).table, (sadb).sdb_hashsize, sadb_alg_update_cb, \
7232 	&update_state)
7233 
7234 void
7235 sadb_alg_update(ipsec_algtype_t alg_type, uint8_t alg_id, boolean_t is_added,
7236     netstack_t *ns)
7237 {
7238 	struct sadb_update_alg_state update_state;
7239 	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
7240 	ipsecesp_stack_t	*espstack = ns->netstack_ipsecesp;
7241 
7242 	update_state.alg_type = alg_type;
7243 	update_state.alg_id = alg_id;
7244 	update_state.is_added = is_added;
7245 
7246 	if (alg_type == IPSEC_ALG_AUTH) {
7247 		/* walk the AH tables only for auth. algorithm changes */
7248 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v4, sdb_of);
7249 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v4, sdb_if);
7250 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v6, sdb_of);
7251 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v6, sdb_if);
7252 	}
7253 
7254 	/* walk the ESP tables */
7255 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v4, sdb_of);
7256 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v4, sdb_if);
7257 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v6, sdb_of);
7258 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v6, sdb_if);
7259 }
7260 
7261 /*
7262  * Creates a context template for the specified SA. This function
7263  * is called when an SA is created and when a context template needs
7264  * to be created due to a change of software provider.
7265  */
7266 int
7267 ipsec_create_ctx_tmpl(ipsa_t *sa, ipsec_algtype_t alg_type)
7268 {
7269 	ipsec_alginfo_t *alg;
7270 	crypto_mechanism_t mech;
7271 	crypto_key_t *key;
7272 	crypto_ctx_template_t *sa_tmpl;
7273 	int rv;
7274 	ipsec_stack_t	*ipss = sa->ipsa_netstack->netstack_ipsec;
7275 
7276 	ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock));
7277 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
7278 
7279 	/* get pointers to the algorithm info, context template, and key */
7280 	switch (alg_type) {
7281 	case IPSEC_ALG_AUTH:
7282 		key = &sa->ipsa_kcfauthkey;
7283 		sa_tmpl = &sa->ipsa_authtmpl;
7284 		alg = ipss->ipsec_alglists[alg_type][sa->ipsa_auth_alg];
7285 		break;
7286 	case IPSEC_ALG_ENCR:
7287 		key = &sa->ipsa_kcfencrkey;
7288 		sa_tmpl = &sa->ipsa_encrtmpl;
7289 		alg = ipss->ipsec_alglists[alg_type][sa->ipsa_encr_alg];
7290 		break;
7291 	default:
7292 		alg = NULL;
7293 	}
7294 
7295 	if (alg == NULL || !ALG_VALID(alg))
7296 		return (EINVAL);
7297 
7298 	/* initialize the mech info structure for the framework */
7299 	ASSERT(alg->alg_mech_type != CRYPTO_MECHANISM_INVALID);
7300 	mech.cm_type = alg->alg_mech_type;
7301 	mech.cm_param = NULL;
7302 	mech.cm_param_len = 0;
7303 
7304 	/* create a new context template */
7305 	rv = crypto_create_ctx_template(&mech, key, sa_tmpl, KM_NOSLEEP);
7306 
7307 	/*
7308 	 * CRYPTO_MECH_NOT_SUPPORTED can be returned if only hardware
7309 	 * providers are available for that mechanism. In that case
7310 	 * we don't fail, and will generate the context template from
7311 	 * the framework callback when a software provider for that
7312 	 * mechanism registers.
7313 	 *
7314 	 * The context template is assigned the special value
7315 	 * IPSEC_CTX_TMPL_ALLOC if the allocation failed due to a
7316 	 * lack of memory. No attempt will be made to use
7317 	 * the context template if it is set to this value.
7318 	 */
7319 	if (rv == CRYPTO_HOST_MEMORY) {
7320 		*sa_tmpl = IPSEC_CTX_TMPL_ALLOC;
7321 	} else if (rv != CRYPTO_SUCCESS) {
7322 		*sa_tmpl = NULL;
7323 		if (rv != CRYPTO_MECH_NOT_SUPPORTED)
7324 			return (EINVAL);
7325 	}
7326 
7327 	return (0);
7328 }
7329 
7330 /*
7331  * Destroy the context template of the specified algorithm type
7332  * of the specified SA. Must be called while holding the SA lock.
7333  */
7334 void
7335 ipsec_destroy_ctx_tmpl(ipsa_t *sa, ipsec_algtype_t alg_type)
7336 {
7337 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
7338 
7339 	if (alg_type == IPSEC_ALG_AUTH) {
7340 		if (sa->ipsa_authtmpl == IPSEC_CTX_TMPL_ALLOC)
7341 			sa->ipsa_authtmpl = NULL;
7342 		else if (sa->ipsa_authtmpl != NULL) {
7343 			crypto_destroy_ctx_template(sa->ipsa_authtmpl);
7344 			sa->ipsa_authtmpl = NULL;
7345 		}
7346 	} else {
7347 		ASSERT(alg_type == IPSEC_ALG_ENCR);
7348 		if (sa->ipsa_encrtmpl == IPSEC_CTX_TMPL_ALLOC)
7349 			sa->ipsa_encrtmpl = NULL;
7350 		else if (sa->ipsa_encrtmpl != NULL) {
7351 			crypto_destroy_ctx_template(sa->ipsa_encrtmpl);
7352 			sa->ipsa_encrtmpl = NULL;
7353 		}
7354 	}
7355 }
7356 
7357 /*
7358  * Use the kernel crypto framework to check the validity of a key received
7359  * via keysock. Returns 0 if the key is OK, -1 otherwise.
7360  */
7361 int
7362 ipsec_check_key(crypto_mech_type_t mech_type, sadb_key_t *sadb_key,
7363     boolean_t is_auth, int *diag)
7364 {
7365 	crypto_mechanism_t mech;
7366 	crypto_key_t crypto_key;
7367 	int crypto_rc;
7368 
7369 	mech.cm_type = mech_type;
7370 	mech.cm_param = NULL;
7371 	mech.cm_param_len = 0;
7372 
7373 	crypto_key.ck_format = CRYPTO_KEY_RAW;
7374 	crypto_key.ck_data = sadb_key + 1;
7375 	crypto_key.ck_length = sadb_key->sadb_key_bits;
7376 
7377 	crypto_rc = crypto_key_check(&mech, &crypto_key);
7378 
7379 	switch (crypto_rc) {
7380 	case CRYPTO_SUCCESS:
7381 		return (0);
7382 	case CRYPTO_MECHANISM_INVALID:
7383 	case CRYPTO_MECH_NOT_SUPPORTED:
7384 		*diag = is_auth ? SADB_X_DIAGNOSTIC_BAD_AALG :
7385 		    SADB_X_DIAGNOSTIC_BAD_EALG;
7386 		break;
7387 	case CRYPTO_KEY_SIZE_RANGE:
7388 		*diag = is_auth ? SADB_X_DIAGNOSTIC_BAD_AKEYBITS :
7389 		    SADB_X_DIAGNOSTIC_BAD_EKEYBITS;
7390 		break;
7391 	case CRYPTO_WEAK_KEY:
7392 		*diag = is_auth ? SADB_X_DIAGNOSTIC_WEAK_AKEY :
7393 		    SADB_X_DIAGNOSTIC_WEAK_EKEY;
7394 		break;
7395 	}
7396 
7397 	return (-1);
7398 }
7399 /*
7400  * If this is an outgoing SA then add some fuzz to the
7401  * SOFT EXPIRE time. The reason for this is to stop
7402  * peers trying to renegotiate SOFT expiring SA's at
7403  * the same time. The amount of fuzz needs to be at
7404  * least 8 seconds which is the typical interval
7405  * sadb_ager(), although this is only a guide as it
7406  * selftunes.
7407  */
7408 void
7409 lifetime_fuzz(ipsa_t *assoc)
7410 {
7411 	uint8_t rnd;
7412 
7413 	if (assoc->ipsa_softaddlt == 0)
7414 		return;
7415 
7416 	(void) random_get_pseudo_bytes(&rnd, sizeof (rnd));
7417 	rnd = (rnd & 0xF) + 8;
7418 	assoc->ipsa_softexpiretime -= rnd;
7419 	assoc->ipsa_softaddlt -= rnd;
7420 }
7421 void
7422 destroy_ipsa_pair(ipsap_t *ipsapp)
7423 {
7424 	if (ipsapp == NULL)
7425 		return;
7426 
7427 	/*
7428 	 * Because of the multi-line macro nature of IPSA_REFRELE, keep
7429 	 * them in { }.
7430 	 */
7431 	if (ipsapp->ipsap_sa_ptr != NULL) {
7432 		IPSA_REFRELE(ipsapp->ipsap_sa_ptr);
7433 	}
7434 	if (ipsapp->ipsap_psa_ptr != NULL) {
7435 		IPSA_REFRELE(ipsapp->ipsap_psa_ptr);
7436 	}
7437 
7438 	kmem_free(ipsapp, sizeof (*ipsapp));
7439 }
7440 
7441 /*
7442  * The sadb_ager() function walks through the hash tables of SA's and ages
7443  * them, if the SA expires as a result, its marked as DEAD and will be reaped
7444  * the next time sadb_ager() runs. SA's which are paired or have a peer (same
7445  * SA appears in both the inbound and outbound tables because its not possible
7446  * to determine its direction) are placed on a list when they expire. This is
7447  * to ensure that pair/peer SA's are reaped at the same time, even if they
7448  * expire at different times.
7449  *
7450  * This function is called twice by sadb_ager(), one after processing the
7451  * inbound table, then again after processing the outbound table.
7452  */
7453 void
7454 age_pair_peer_list(templist_t *haspeerlist, sadb_t *sp, boolean_t outbound)
7455 {
7456 	templist_t *listptr;
7457 	int outhash;
7458 	isaf_t *bucket;
7459 	boolean_t haspeer;
7460 	ipsa_t *peer_assoc, *dying;
7461 	/*
7462 	 * Haspeer cases will contain both IPv4 and IPv6.  This code
7463 	 * is address independent.
7464 	 */
7465 	while (haspeerlist != NULL) {
7466 		/* "dying" contains the SA that has a peer. */
7467 		dying = haspeerlist->ipsa;
7468 		haspeer = (dying->ipsa_haspeer);
7469 		listptr = haspeerlist;
7470 		haspeerlist = listptr->next;
7471 		kmem_free(listptr, sizeof (*listptr));
7472 		/*
7473 		 * Pick peer bucket based on addrfam.
7474 		 */
7475 		if (outbound) {
7476 			if (haspeer)
7477 				bucket = INBOUND_BUCKET(sp, dying->ipsa_spi);
7478 			else
7479 				bucket = INBOUND_BUCKET(sp,
7480 				    dying->ipsa_otherspi);
7481 		} else { /* inbound */
7482 			if (haspeer) {
7483 				if (dying->ipsa_addrfam == AF_INET6) {
7484 					outhash = OUTBOUND_HASH_V6(sp,
7485 					    *((in6_addr_t *)&dying->
7486 					    ipsa_dstaddr));
7487 				} else {
7488 					outhash = OUTBOUND_HASH_V4(sp,
7489 					    *((ipaddr_t *)&dying->
7490 					    ipsa_dstaddr));
7491 				}
7492 			} else if (dying->ipsa_addrfam == AF_INET6) {
7493 				outhash = OUTBOUND_HASH_V6(sp,
7494 				    *((in6_addr_t *)&dying->
7495 				    ipsa_srcaddr));
7496 			} else {
7497 				outhash = OUTBOUND_HASH_V4(sp,
7498 				    *((ipaddr_t *)&dying->
7499 				    ipsa_srcaddr));
7500 			}
7501 		bucket = &(sp->sdb_of[outhash]);
7502 		}
7503 
7504 		mutex_enter(&bucket->isaf_lock);
7505 		/*
7506 		 * "haspeer" SA's have the same src/dst address ordering,
7507 		 * "paired" SA's have the src/dst addresses reversed.
7508 		 */
7509 		if (haspeer) {
7510 			peer_assoc = ipsec_getassocbyspi(bucket,
7511 			    dying->ipsa_spi, dying->ipsa_srcaddr,
7512 			    dying->ipsa_dstaddr, dying->ipsa_addrfam);
7513 		} else {
7514 			peer_assoc = ipsec_getassocbyspi(bucket,
7515 			    dying->ipsa_otherspi, dying->ipsa_dstaddr,
7516 			    dying->ipsa_srcaddr, dying->ipsa_addrfam);
7517 		}
7518 
7519 		mutex_exit(&bucket->isaf_lock);
7520 		if (peer_assoc != NULL) {
7521 			mutex_enter(&peer_assoc->ipsa_lock);
7522 			mutex_enter(&dying->ipsa_lock);
7523 			if (!haspeer) {
7524 				/*
7525 				 * Only SA's which have a "peer" or are
7526 				 * "paired" end up on this list, so this
7527 				 * must be a "paired" SA, update the flags
7528 				 * to break the pair.
7529 				 */
7530 				peer_assoc->ipsa_otherspi = 0;
7531 				peer_assoc->ipsa_flags &= ~IPSA_F_PAIRED;
7532 				dying->ipsa_otherspi = 0;
7533 				dying->ipsa_flags &= ~IPSA_F_PAIRED;
7534 			}
7535 			if (haspeer || outbound) {
7536 				/*
7537 				 * Update the state of the "inbound" SA when
7538 				 * the "outbound" SA has expired. Don't update
7539 				 * the "outbound" SA when the "inbound" SA
7540 				 * SA expires because setting the hard_addtime
7541 				 * below will cause this to happen.
7542 				 */
7543 				peer_assoc->ipsa_state = dying->ipsa_state;
7544 			}
7545 			if (dying->ipsa_state == IPSA_STATE_DEAD)
7546 				peer_assoc->ipsa_hardexpiretime = 1;
7547 
7548 			mutex_exit(&dying->ipsa_lock);
7549 			mutex_exit(&peer_assoc->ipsa_lock);
7550 			IPSA_REFRELE(peer_assoc);
7551 		}
7552 		IPSA_REFRELE(dying);
7553 	}
7554 }
7555 
7556 /*
7557  * Ensure that the IV used for CCM mode never repeats. The IV should
7558  * only be updated by this function. Also check to see if the IV
7559  * is about to wrap and generate a SOFT Expire. This function is only
7560  * called for outgoing packets, the IV for incomming packets is taken
7561  * from the wire. If the outgoing SA needs to be expired, update
7562  * the matching incomming SA.
7563  */
7564 boolean_t
7565 update_iv(uint8_t *iv_ptr, queue_t *pfkey_q, ipsa_t *assoc,
7566     ipsecesp_stack_t *espstack)
7567 {
7568 	boolean_t rc = B_TRUE;
7569 	isaf_t *inbound_bucket;
7570 	sadb_t *sp;
7571 	ipsa_t *pair_sa = NULL;
7572 	int sa_new_state = 0;
7573 
7574 	/* For non counter modes, the IV is random data. */
7575 	if (!(assoc->ipsa_flags & IPSA_F_COUNTERMODE)) {
7576 		(void) random_get_pseudo_bytes(iv_ptr, assoc->ipsa_iv_len);
7577 		return (rc);
7578 	}
7579 
7580 	mutex_enter(&assoc->ipsa_lock);
7581 
7582 	(*assoc->ipsa_iv)++;
7583 
7584 	if (*assoc->ipsa_iv == assoc->ipsa_iv_hardexpire) {
7585 		sa_new_state = IPSA_STATE_DEAD;
7586 		rc = B_FALSE;
7587 	} else if (*assoc->ipsa_iv == assoc->ipsa_iv_softexpire) {
7588 		if (assoc->ipsa_state != IPSA_STATE_DYING) {
7589 			/*
7590 			 * This SA may have already been expired when its
7591 			 * PAIR_SA expired.
7592 			 */
7593 			sa_new_state = IPSA_STATE_DYING;
7594 		}
7595 	}
7596 	if (sa_new_state) {
7597 		/*
7598 		 * If there is a state change, we need to update this SA
7599 		 * and its "pair", we can find the bucket for the "pair" SA
7600 		 * while holding the ipsa_t mutex, but we won't actually
7601 		 * update anything untill the ipsa_t mutex has been released
7602 		 * for _this_ SA.
7603 		 */
7604 		assoc->ipsa_state = sa_new_state;
7605 		if (assoc->ipsa_addrfam == AF_INET6) {
7606 			sp = &espstack->esp_sadb.s_v6;
7607 		} else {
7608 			sp = &espstack->esp_sadb.s_v4;
7609 		}
7610 		inbound_bucket = INBOUND_BUCKET(sp, assoc->ipsa_otherspi);
7611 		sadb_expire_assoc(pfkey_q, assoc);
7612 	}
7613 	if (rc == B_TRUE)
7614 		bcopy(assoc->ipsa_iv, iv_ptr, assoc->ipsa_iv_len);
7615 
7616 	mutex_exit(&assoc->ipsa_lock);
7617 
7618 	if (sa_new_state) {
7619 		/* Find the inbound SA, need to lock hash bucket. */
7620 		mutex_enter(&inbound_bucket->isaf_lock);
7621 		pair_sa = ipsec_getassocbyspi(inbound_bucket,
7622 		    assoc->ipsa_otherspi, assoc->ipsa_dstaddr,
7623 		    assoc->ipsa_srcaddr, assoc->ipsa_addrfam);
7624 		mutex_exit(&inbound_bucket->isaf_lock);
7625 		if (pair_sa != NULL) {
7626 			mutex_enter(&pair_sa->ipsa_lock);
7627 			pair_sa->ipsa_state = sa_new_state;
7628 			mutex_exit(&pair_sa->ipsa_lock);
7629 			IPSA_REFRELE(pair_sa);
7630 		}
7631 	}
7632 
7633 	return (rc);
7634 }
7635 
7636 void
7637 ccm_params_init(ipsa_t *assoc, uchar_t *esph, uint_t data_len, uchar_t *iv_ptr,
7638     ipsa_cm_mech_t *cm_mech, crypto_data_t *crypto_data)
7639 {
7640 	uchar_t *nonce;
7641 	crypto_mechanism_t *combined_mech;
7642 	CK_AES_CCM_PARAMS *params;
7643 
7644 	combined_mech = (crypto_mechanism_t *)cm_mech;
7645 	params = (CK_AES_CCM_PARAMS *)(combined_mech + 1);
7646 	nonce = (uchar_t *)(params + 1);
7647 	params->ulMACSize = assoc->ipsa_mac_len;
7648 	params->ulNonceSize = assoc->ipsa_nonce_len;
7649 	params->ulAuthDataSize = sizeof (esph_t);
7650 	params->ulDataSize = data_len;
7651 	params->nonce = nonce;
7652 	params->authData = esph;
7653 
7654 	cm_mech->combined_mech.cm_type = assoc->ipsa_emech.cm_type;
7655 	cm_mech->combined_mech.cm_param_len = sizeof (CK_AES_CCM_PARAMS);
7656 	cm_mech->combined_mech.cm_param = (caddr_t)params;
7657 	/* See gcm_params_init() for comments. */
7658 	bcopy(assoc->ipsa_nonce, nonce, assoc->ipsa_saltlen);
7659 	nonce += assoc->ipsa_saltlen;
7660 	bcopy(iv_ptr, nonce, assoc->ipsa_iv_len);
7661 	crypto_data->cd_miscdata = NULL;
7662 }
7663 
7664 /* ARGSUSED */
7665 void
7666 cbc_params_init(ipsa_t *assoc, uchar_t *esph, uint_t data_len, uchar_t *iv_ptr,
7667     ipsa_cm_mech_t *cm_mech, crypto_data_t *crypto_data)
7668 {
7669 	cm_mech->combined_mech.cm_type = assoc->ipsa_emech.cm_type;
7670 	cm_mech->combined_mech.cm_param_len = 0;
7671 	cm_mech->combined_mech.cm_param = NULL;
7672 	crypto_data->cd_miscdata = (char *)iv_ptr;
7673 }
7674 
7675 /* ARGSUSED */
7676 void
7677 gcm_params_init(ipsa_t *assoc, uchar_t *esph, uint_t data_len, uchar_t *iv_ptr,
7678     ipsa_cm_mech_t *cm_mech, crypto_data_t *crypto_data)
7679 {
7680 	uchar_t *nonce;
7681 	crypto_mechanism_t *combined_mech;
7682 	CK_AES_GCM_PARAMS *params;
7683 
7684 	combined_mech = (crypto_mechanism_t *)cm_mech;
7685 	params = (CK_AES_GCM_PARAMS *)(combined_mech + 1);
7686 	nonce = (uchar_t *)(params + 1);
7687 
7688 	params->pIv = nonce;
7689 	params->ulIvLen = assoc->ipsa_nonce_len;
7690 	params->ulIvBits = SADB_8TO1(assoc->ipsa_nonce_len);
7691 	params->pAAD = esph;
7692 	params->ulAADLen = sizeof (esph_t);
7693 	params->ulTagBits = SADB_8TO1(assoc->ipsa_mac_len);
7694 
7695 	cm_mech->combined_mech.cm_type = assoc->ipsa_emech.cm_type;
7696 	cm_mech->combined_mech.cm_param_len = sizeof (CK_AES_GCM_PARAMS);
7697 	cm_mech->combined_mech.cm_param = (caddr_t)params;
7698 	/*
7699 	 * Create the nonce, which is made up of the salt and the IV.
7700 	 * Copy the salt from the SA and the IV from the packet.
7701 	 * For inbound packets we copy the IV from the packet because it
7702 	 * was set by the sending system, for outbound packets we copy the IV
7703 	 * from the packet because the IV in the SA may be changed by another
7704 	 * thread, the IV in the packet was created while holding a mutex.
7705 	 */
7706 	bcopy(assoc->ipsa_nonce, nonce, assoc->ipsa_saltlen);
7707 	nonce += assoc->ipsa_saltlen;
7708 	bcopy(iv_ptr, nonce, assoc->ipsa_iv_len);
7709 	crypto_data->cd_miscdata = NULL;
7710 }
7711