xref: /illumos-gate/usr/src/uts/common/inet/ip/sadb.c (revision 47842382d52f28aa3173aa6b511781c322ccb6a2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/stream.h>
28 #include <sys/stropts.h>
29 #include <sys/errno.h>
30 #include <sys/ddi.h>
31 #include <sys/debug.h>
32 #include <sys/cmn_err.h>
33 #include <sys/stream.h>
34 #include <sys/strlog.h>
35 #include <sys/kmem.h>
36 #include <sys/sunddi.h>
37 #include <sys/tihdr.h>
38 #include <sys/atomic.h>
39 #include <sys/socket.h>
40 #include <sys/sysmacros.h>
41 #include <sys/crypto/common.h>
42 #include <sys/crypto/api.h>
43 #include <sys/zone.h>
44 #include <netinet/in.h>
45 #include <net/if.h>
46 #include <net/pfkeyv2.h>
47 #include <inet/common.h>
48 #include <netinet/ip6.h>
49 #include <inet/ip.h>
50 #include <inet/ip_ire.h>
51 #include <inet/ip6.h>
52 #include <inet/ipsec_info.h>
53 #include <inet/tcp.h>
54 #include <inet/sadb.h>
55 #include <inet/ipsec_impl.h>
56 #include <inet/ipsecah.h>
57 #include <inet/ipsecesp.h>
58 #include <sys/random.h>
59 #include <sys/dlpi.h>
60 #include <sys/iphada.h>
61 #include <inet/ip_if.h>
62 #include <inet/ipdrop.h>
63 #include <inet/ipclassifier.h>
64 #include <inet/sctp_ip.h>
65 #include <inet/tun.h>
66 
67 /*
68  * This source file contains Security Association Database (SADB) common
69  * routines.  They are linked in with the AH module.  Since AH has no chance
70  * of falling under export control, it was safe to link it in there.
71  */
72 
73 static mblk_t *sadb_extended_acquire(ipsec_selector_t *, ipsec_policy_t *,
74     ipsec_action_t *, boolean_t, uint32_t, uint32_t, netstack_t *);
75 static void sadb_ill_df(ill_t *, mblk_t *, isaf_t *, int, boolean_t);
76 static ipsa_t *sadb_torch_assoc(isaf_t *, ipsa_t *, boolean_t, mblk_t **);
77 static void sadb_drain_torchq(queue_t *, mblk_t *);
78 static void sadb_destroy_acqlist(iacqf_t **, uint_t, boolean_t,
79 			    netstack_t *);
80 static void sadb_destroy(sadb_t *, netstack_t *);
81 static mblk_t *sadb_sa2msg(ipsa_t *, sadb_msg_t *);
82 
83 static time_t sadb_add_time(time_t, uint64_t);
84 static void lifetime_fuzz(ipsa_t *);
85 static void age_pair_peer_list(templist_t *, sadb_t *, boolean_t);
86 static void ipsa_set_replay(ipsa_t *ipsa, uint32_t offset);
87 
88 extern void (*cl_inet_getspi)(netstackid_t stack_id, uint8_t protocol,
89     uint8_t *ptr, size_t len, void *args);
90 extern int (*cl_inet_checkspi)(netstackid_t stack_id, uint8_t protocol,
91     uint32_t spi, void *args);
92 extern void (*cl_inet_deletespi)(netstackid_t stack_id, uint8_t protocol,
93     uint32_t spi, void *args);
94 
95 /*
96  * ipsacq_maxpackets is defined here to make it tunable
97  * from /etc/system.
98  */
99 extern uint64_t ipsacq_maxpackets;
100 
101 #define	SET_EXPIRE(sa, delta, exp) {				\
102 	if (((sa)->ipsa_ ## delta) != 0) {				\
103 		(sa)->ipsa_ ## exp = sadb_add_time((sa)->ipsa_addtime,	\
104 			(sa)->ipsa_ ## delta);				\
105 	}								\
106 }
107 
108 #define	UPDATE_EXPIRE(sa, delta, exp) {					\
109 	if (((sa)->ipsa_ ## delta) != 0) {				\
110 		time_t tmp = sadb_add_time((sa)->ipsa_usetime,		\
111 			(sa)->ipsa_ ## delta);				\
112 		if (((sa)->ipsa_ ## exp) == 0)				\
113 			(sa)->ipsa_ ## exp = tmp;			\
114 		else							\
115 			(sa)->ipsa_ ## exp = 				\
116 			    MIN((sa)->ipsa_ ## exp, tmp); 		\
117 	}								\
118 }
119 
120 
121 /* wrap the macro so we can pass it as a function pointer */
122 void
123 sadb_sa_refrele(void *target)
124 {
125 	IPSA_REFRELE(((ipsa_t *)target));
126 }
127 
128 /*
129  * We presume that sizeof (long) == sizeof (time_t) and that time_t is
130  * a signed type.
131  */
132 #define	TIME_MAX LONG_MAX
133 
134 /*
135  * PF_KEY gives us lifetimes in uint64_t seconds.  We presume that
136  * time_t is defined to be a signed type with the same range as
137  * "long".  On ILP32 systems, we thus run the risk of wrapping around
138  * at end of time, as well as "overwrapping" the clock back around
139  * into a seemingly valid but incorrect future date earlier than the
140  * desired expiration.
141  *
142  * In order to avoid odd behavior (either negative lifetimes or loss
143  * of high order bits) when someone asks for bizarrely long SA
144  * lifetimes, we do a saturating add for expire times.
145  *
146  * We presume that ILP32 systems will be past end of support life when
147  * the 32-bit time_t overflows (a dangerous assumption, mind you..).
148  *
149  * On LP64, 2^64 seconds are about 5.8e11 years, at which point we
150  * will hopefully have figured out clever ways to avoid the use of
151  * fixed-sized integers in computation.
152  */
153 static time_t
154 sadb_add_time(time_t base, uint64_t delta)
155 {
156 	time_t sum;
157 
158 	/*
159 	 * Clip delta to the maximum possible time_t value to
160 	 * prevent "overwrapping" back into a shorter-than-desired
161 	 * future time.
162 	 */
163 	if (delta > TIME_MAX)
164 		delta = TIME_MAX;
165 	/*
166 	 * This sum may still overflow.
167 	 */
168 	sum = base + delta;
169 
170 	/*
171 	 * .. so if the result is less than the base, we overflowed.
172 	 */
173 	if (sum < base)
174 		sum = TIME_MAX;
175 
176 	return (sum);
177 }
178 
179 /*
180  * Callers of this function have already created a working security
181  * association, and have found the appropriate table & hash chain.  All this
182  * function does is check duplicates, and insert the SA.  The caller needs to
183  * hold the hash bucket lock and increment the refcnt before insertion.
184  *
185  * Return 0 if success, EEXIST if collision.
186  */
187 #define	SA_UNIQUE_MATCH(sa1, sa2) \
188 	(((sa1)->ipsa_unique_id & (sa1)->ipsa_unique_mask) == \
189 	((sa2)->ipsa_unique_id & (sa2)->ipsa_unique_mask))
190 
191 int
192 sadb_insertassoc(ipsa_t *ipsa, isaf_t *bucket)
193 {
194 	ipsa_t **ptpn = NULL;
195 	ipsa_t *walker;
196 	boolean_t unspecsrc;
197 
198 	ASSERT(MUTEX_HELD(&bucket->isaf_lock));
199 
200 	unspecsrc = IPSA_IS_ADDR_UNSPEC(ipsa->ipsa_srcaddr, ipsa->ipsa_addrfam);
201 
202 	walker = bucket->isaf_ipsa;
203 	ASSERT(walker == NULL || ipsa->ipsa_addrfam == walker->ipsa_addrfam);
204 
205 	/*
206 	 * Find insertion point (pointed to with **ptpn).  Insert at the head
207 	 * of the list unless there's an unspecified source address, then
208 	 * insert it after the last SA with a specified source address.
209 	 *
210 	 * BTW, you'll have to walk the whole chain, matching on {DST, SPI}
211 	 * checking for collisions.
212 	 */
213 
214 	while (walker != NULL) {
215 		if (IPSA_ARE_ADDR_EQUAL(walker->ipsa_dstaddr,
216 		    ipsa->ipsa_dstaddr, ipsa->ipsa_addrfam)) {
217 			if (walker->ipsa_spi == ipsa->ipsa_spi)
218 				return (EEXIST);
219 
220 			mutex_enter(&walker->ipsa_lock);
221 			if (ipsa->ipsa_state == IPSA_STATE_MATURE &&
222 			    (walker->ipsa_flags & IPSA_F_USED) &&
223 			    SA_UNIQUE_MATCH(walker, ipsa)) {
224 				walker->ipsa_flags |= IPSA_F_CINVALID;
225 			}
226 			mutex_exit(&walker->ipsa_lock);
227 		}
228 
229 		if (ptpn == NULL && unspecsrc) {
230 			if (IPSA_IS_ADDR_UNSPEC(walker->ipsa_srcaddr,
231 			    walker->ipsa_addrfam))
232 				ptpn = walker->ipsa_ptpn;
233 			else if (walker->ipsa_next == NULL)
234 				ptpn = &walker->ipsa_next;
235 		}
236 
237 		walker = walker->ipsa_next;
238 	}
239 
240 	if (ptpn == NULL)
241 		ptpn = &bucket->isaf_ipsa;
242 	ipsa->ipsa_next = *ptpn;
243 	ipsa->ipsa_ptpn = ptpn;
244 	if (ipsa->ipsa_next != NULL)
245 		ipsa->ipsa_next->ipsa_ptpn = &ipsa->ipsa_next;
246 	*ptpn = ipsa;
247 	ipsa->ipsa_linklock = &bucket->isaf_lock;
248 
249 	return (0);
250 }
251 #undef SA_UNIQUE_MATCH
252 
253 /*
254  * Free a security association.  Its reference count is 0, which means
255  * I must free it.  The SA must be unlocked and must not be linked into
256  * any fanout list.
257  */
258 static void
259 sadb_freeassoc(ipsa_t *ipsa)
260 {
261 	ipsec_stack_t	*ipss = ipsa->ipsa_netstack->netstack_ipsec;
262 
263 	ASSERT(ipss != NULL);
264 	ASSERT(MUTEX_NOT_HELD(&ipsa->ipsa_lock));
265 	ASSERT(ipsa->ipsa_refcnt == 0);
266 	ASSERT(ipsa->ipsa_next == NULL);
267 	ASSERT(ipsa->ipsa_ptpn == NULL);
268 
269 	mutex_enter(&ipsa->ipsa_lock);
270 	/* Don't call sadb_clear_lpkt() since we hold the ipsa_lock anyway. */
271 	ip_drop_packet(ipsa->ipsa_lpkt, B_TRUE, NULL, NULL,
272 	    DROPPER(ipss, ipds_sadb_inlarval_timeout),
273 	    &ipss->ipsec_sadb_dropper);
274 	ipsec_destroy_ctx_tmpl(ipsa, IPSEC_ALG_AUTH);
275 	ipsec_destroy_ctx_tmpl(ipsa, IPSEC_ALG_ENCR);
276 	mutex_exit(&ipsa->ipsa_lock);
277 
278 	/* bzero() these fields for paranoia's sake. */
279 	if (ipsa->ipsa_authkey != NULL) {
280 		bzero(ipsa->ipsa_authkey, ipsa->ipsa_authkeylen);
281 		kmem_free(ipsa->ipsa_authkey, ipsa->ipsa_authkeylen);
282 	}
283 	if (ipsa->ipsa_encrkey != NULL) {
284 		bzero(ipsa->ipsa_encrkey, ipsa->ipsa_encrkeylen);
285 		kmem_free(ipsa->ipsa_encrkey, ipsa->ipsa_encrkeylen);
286 	}
287 	if (ipsa->ipsa_src_cid != NULL) {
288 		IPSID_REFRELE(ipsa->ipsa_src_cid);
289 	}
290 	if (ipsa->ipsa_dst_cid != NULL) {
291 		IPSID_REFRELE(ipsa->ipsa_dst_cid);
292 	}
293 	if (ipsa->ipsa_integ != NULL)
294 		kmem_free(ipsa->ipsa_integ, ipsa->ipsa_integlen);
295 	if (ipsa->ipsa_sens != NULL)
296 		kmem_free(ipsa->ipsa_sens, ipsa->ipsa_senslen);
297 
298 	mutex_destroy(&ipsa->ipsa_lock);
299 	kmem_free(ipsa, sizeof (*ipsa));
300 }
301 
302 /*
303  * Unlink a security association from a hash bucket.  Assume the hash bucket
304  * lock is held, but the association's lock is not.
305  *
306  * Note that we do not bump the bucket's generation number here because
307  * we might not be making a visible change to the set of visible SA's.
308  * All callers MUST bump the bucket's generation number before they unlock
309  * the bucket if they use sadb_unlinkassoc to permanetly remove an SA which
310  * was present in the bucket at the time it was locked.
311  */
312 void
313 sadb_unlinkassoc(ipsa_t *ipsa)
314 {
315 	ASSERT(ipsa->ipsa_linklock != NULL);
316 	ASSERT(MUTEX_HELD(ipsa->ipsa_linklock));
317 
318 	/* These fields are protected by the link lock. */
319 	*(ipsa->ipsa_ptpn) = ipsa->ipsa_next;
320 	if (ipsa->ipsa_next != NULL) {
321 		ipsa->ipsa_next->ipsa_ptpn = ipsa->ipsa_ptpn;
322 		ipsa->ipsa_next = NULL;
323 	}
324 
325 	ipsa->ipsa_ptpn = NULL;
326 
327 	/* This may destroy the SA. */
328 	IPSA_REFRELE(ipsa);
329 }
330 
331 void
332 sadb_delete_cluster(ipsa_t *assoc)
333 {
334 	uint8_t protocol;
335 
336 	if (cl_inet_deletespi &&
337 	    ((assoc->ipsa_state == IPSA_STATE_LARVAL) ||
338 	    (assoc->ipsa_state == IPSA_STATE_MATURE))) {
339 		protocol = (assoc->ipsa_type == SADB_SATYPE_AH) ?
340 		    IPPROTO_AH : IPPROTO_ESP;
341 		cl_inet_deletespi(assoc->ipsa_netstack->netstack_stackid,
342 		    protocol, assoc->ipsa_spi, NULL);
343 	}
344 }
345 
346 /*
347  * Create a larval security association with the specified SPI.	 All other
348  * fields are zeroed.
349  */
350 static ipsa_t *
351 sadb_makelarvalassoc(uint32_t spi, uint32_t *src, uint32_t *dst, int addrfam,
352     netstack_t *ns)
353 {
354 	ipsa_t *newbie;
355 
356 	/*
357 	 * Allocate...
358 	 */
359 
360 	newbie = (ipsa_t *)kmem_zalloc(sizeof (ipsa_t), KM_NOSLEEP);
361 	if (newbie == NULL) {
362 		/* Can't make new larval SA. */
363 		return (NULL);
364 	}
365 
366 	/* Assigned requested SPI, assume caller does SPI allocation magic. */
367 	newbie->ipsa_spi = spi;
368 	newbie->ipsa_netstack = ns;	/* No netstack_hold */
369 
370 	/*
371 	 * Copy addresses...
372 	 */
373 
374 	IPSA_COPY_ADDR(newbie->ipsa_srcaddr, src, addrfam);
375 	IPSA_COPY_ADDR(newbie->ipsa_dstaddr, dst, addrfam);
376 
377 	newbie->ipsa_addrfam = addrfam;
378 
379 	/*
380 	 * Set common initialization values, including refcnt.
381 	 */
382 	mutex_init(&newbie->ipsa_lock, NULL, MUTEX_DEFAULT, NULL);
383 	newbie->ipsa_state = IPSA_STATE_LARVAL;
384 	newbie->ipsa_refcnt = 1;
385 	newbie->ipsa_freefunc = sadb_freeassoc;
386 
387 	/*
388 	 * There aren't a lot of other common initialization values, as
389 	 * they are copied in from the PF_KEY message.
390 	 */
391 
392 	return (newbie);
393 }
394 
395 /*
396  * Call me to initialize a security association fanout.
397  */
398 static int
399 sadb_init_fanout(isaf_t **tablep, uint_t size, int kmflag)
400 {
401 	isaf_t *table;
402 	int i;
403 
404 	table = (isaf_t *)kmem_alloc(size * sizeof (*table), kmflag);
405 	*tablep = table;
406 
407 	if (table == NULL)
408 		return (ENOMEM);
409 
410 	for (i = 0; i < size; i++) {
411 		mutex_init(&(table[i].isaf_lock), NULL, MUTEX_DEFAULT, NULL);
412 		table[i].isaf_ipsa = NULL;
413 		table[i].isaf_gen = 0;
414 	}
415 
416 	return (0);
417 }
418 
419 /*
420  * Call me to initialize an acquire fanout
421  */
422 static int
423 sadb_init_acfanout(iacqf_t **tablep, uint_t size, int kmflag)
424 {
425 	iacqf_t *table;
426 	int i;
427 
428 	table = (iacqf_t *)kmem_alloc(size * sizeof (*table), kmflag);
429 	*tablep = table;
430 
431 	if (table == NULL)
432 		return (ENOMEM);
433 
434 	for (i = 0; i < size; i++) {
435 		mutex_init(&(table[i].iacqf_lock), NULL, MUTEX_DEFAULT, NULL);
436 		table[i].iacqf_ipsacq = NULL;
437 	}
438 
439 	return (0);
440 }
441 
442 /*
443  * Attempt to initialize an SADB instance.  On failure, return ENOMEM;
444  * caller must clean up partial allocations.
445  */
446 static int
447 sadb_init_trial(sadb_t *sp, uint_t size, int kmflag)
448 {
449 	ASSERT(sp->sdb_of == NULL);
450 	ASSERT(sp->sdb_if == NULL);
451 	ASSERT(sp->sdb_acq == NULL);
452 
453 	sp->sdb_hashsize = size;
454 	if (sadb_init_fanout(&sp->sdb_of, size, kmflag) != 0)
455 		return (ENOMEM);
456 	if (sadb_init_fanout(&sp->sdb_if, size, kmflag) != 0)
457 		return (ENOMEM);
458 	if (sadb_init_acfanout(&sp->sdb_acq, size, kmflag) != 0)
459 		return (ENOMEM);
460 
461 	return (0);
462 }
463 
464 /*
465  * Call me to initialize an SADB instance; fall back to default size on failure.
466  */
467 static void
468 sadb_init(const char *name, sadb_t *sp, uint_t size, uint_t ver,
469     netstack_t *ns)
470 {
471 	ASSERT(sp->sdb_of == NULL);
472 	ASSERT(sp->sdb_if == NULL);
473 	ASSERT(sp->sdb_acq == NULL);
474 
475 	if (size < IPSEC_DEFAULT_HASH_SIZE)
476 		size = IPSEC_DEFAULT_HASH_SIZE;
477 
478 	if (sadb_init_trial(sp, size, KM_NOSLEEP) != 0) {
479 
480 		cmn_err(CE_WARN,
481 		    "Unable to allocate %u entry IPv%u %s SADB hash table",
482 		    size, ver, name);
483 
484 		sadb_destroy(sp, ns);
485 		size = IPSEC_DEFAULT_HASH_SIZE;
486 		cmn_err(CE_WARN, "Falling back to %d entries", size);
487 		(void) sadb_init_trial(sp, size, KM_SLEEP);
488 	}
489 }
490 
491 
492 /*
493  * Initialize an SADB-pair.
494  */
495 void
496 sadbp_init(const char *name, sadbp_t *sp, int type, int size, netstack_t *ns)
497 {
498 	sadb_init(name, &sp->s_v4, size, 4, ns);
499 	sadb_init(name, &sp->s_v6, size, 6, ns);
500 
501 	sp->s_satype = type;
502 
503 	ASSERT((type == SADB_SATYPE_AH) || (type == SADB_SATYPE_ESP));
504 	if (type == SADB_SATYPE_AH) {
505 		ipsec_stack_t	*ipss = ns->netstack_ipsec;
506 
507 		ip_drop_register(&ipss->ipsec_sadb_dropper, "IPsec SADB");
508 		sp->s_addflags = AH_ADD_SETTABLE_FLAGS;
509 		sp->s_updateflags = AH_UPDATE_SETTABLE_FLAGS;
510 	} else {
511 		sp->s_addflags = ESP_ADD_SETTABLE_FLAGS;
512 		sp->s_updateflags = ESP_UPDATE_SETTABLE_FLAGS;
513 	}
514 }
515 
516 /*
517  * Deliver a single SADB_DUMP message representing a single SA.  This is
518  * called many times by sadb_dump().
519  *
520  * If the return value of this is ENOBUFS (not the same as ENOMEM), then
521  * the caller should take that as a hint that dupb() on the "original answer"
522  * failed, and that perhaps the caller should try again with a copyb()ed
523  * "original answer".
524  */
525 static int
526 sadb_dump_deliver(queue_t *pfkey_q, mblk_t *original_answer, ipsa_t *ipsa,
527     sadb_msg_t *samsg)
528 {
529 	mblk_t *answer;
530 
531 	answer = dupb(original_answer);
532 	if (answer == NULL)
533 		return (ENOBUFS);
534 	answer->b_cont = sadb_sa2msg(ipsa, samsg);
535 	if (answer->b_cont == NULL) {
536 		freeb(answer);
537 		return (ENOMEM);
538 	}
539 
540 	/* Just do a putnext, and let keysock deal with flow control. */
541 	putnext(pfkey_q, answer);
542 	return (0);
543 }
544 
545 /*
546  * Common function to allocate and prepare a keysock_out_t M_CTL message.
547  */
548 mblk_t *
549 sadb_keysock_out(minor_t serial)
550 {
551 	mblk_t *mp;
552 	keysock_out_t *kso;
553 
554 	mp = allocb(sizeof (ipsec_info_t), BPRI_HI);
555 	if (mp != NULL) {
556 		mp->b_datap->db_type = M_CTL;
557 		mp->b_wptr += sizeof (ipsec_info_t);
558 		kso = (keysock_out_t *)mp->b_rptr;
559 		kso->ks_out_type = KEYSOCK_OUT;
560 		kso->ks_out_len = sizeof (*kso);
561 		kso->ks_out_serial = serial;
562 	}
563 
564 	return (mp);
565 }
566 
567 /*
568  * Perform an SADB_DUMP, spewing out every SA in an array of SA fanouts
569  * to keysock.
570  */
571 static int
572 sadb_dump_fanout(queue_t *pfkey_q, mblk_t *mp, minor_t serial, isaf_t *fanout,
573     int num_entries, boolean_t do_peers, time_t active_time)
574 {
575 	int i, error = 0;
576 	mblk_t *original_answer;
577 	ipsa_t *walker;
578 	sadb_msg_t *samsg;
579 	time_t	current;
580 
581 	/*
582 	 * For each IPSA hash bucket do:
583 	 *	- Hold the mutex
584 	 *	- Walk each entry, doing an sadb_dump_deliver() on it.
585 	 */
586 	ASSERT(mp->b_cont != NULL);
587 	samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
588 
589 	original_answer = sadb_keysock_out(serial);
590 	if (original_answer == NULL)
591 		return (ENOMEM);
592 
593 	current = gethrestime_sec();
594 	for (i = 0; i < num_entries; i++) {
595 		mutex_enter(&fanout[i].isaf_lock);
596 		for (walker = fanout[i].isaf_ipsa; walker != NULL;
597 		    walker = walker->ipsa_next) {
598 			if (!do_peers && walker->ipsa_haspeer)
599 				continue;
600 			if ((active_time != 0) &&
601 			    ((current - walker->ipsa_lastuse) > active_time))
602 				continue;
603 			error = sadb_dump_deliver(pfkey_q, original_answer,
604 			    walker, samsg);
605 			if (error == ENOBUFS) {
606 				mblk_t *new_original_answer;
607 
608 				/* Ran out of dupb's.  Try a copyb. */
609 				new_original_answer = copyb(original_answer);
610 				if (new_original_answer == NULL) {
611 					error = ENOMEM;
612 				} else {
613 					freeb(original_answer);
614 					original_answer = new_original_answer;
615 					error = sadb_dump_deliver(pfkey_q,
616 					    original_answer, walker, samsg);
617 				}
618 			}
619 			if (error != 0)
620 				break;	/* out of for loop. */
621 		}
622 		mutex_exit(&fanout[i].isaf_lock);
623 		if (error != 0)
624 			break;	/* out of for loop. */
625 	}
626 
627 	freeb(original_answer);
628 	return (error);
629 }
630 
631 /*
632  * Dump an entire SADB; outbound first, then inbound.
633  */
634 
635 int
636 sadb_dump(queue_t *pfkey_q, mblk_t *mp, keysock_in_t *ksi, sadb_t *sp)
637 {
638 	int error;
639 	time_t	active_time = 0;
640 	sadb_x_edump_t	*edump =
641 	    (sadb_x_edump_t *)ksi->ks_in_extv[SADB_X_EXT_EDUMP];
642 
643 	if (edump != NULL) {
644 		active_time = edump->sadb_x_edump_timeout;
645 	}
646 
647 	/* Dump outbound */
648 	error = sadb_dump_fanout(pfkey_q, mp, ksi->ks_in_serial, sp->sdb_of,
649 	    sp->sdb_hashsize, B_TRUE, active_time);
650 	if (error)
651 		return (error);
652 
653 	/* Dump inbound */
654 	return sadb_dump_fanout(pfkey_q, mp, ksi->ks_in_serial, sp->sdb_if,
655 	    sp->sdb_hashsize, B_FALSE, active_time);
656 }
657 
658 /*
659  * Generic sadb table walker.
660  *
661  * Call "walkfn" for each SA in each bucket in "table"; pass the
662  * bucket, the entry and "cookie" to the callback function.
663  * Take care to ensure that walkfn can delete the SA without screwing
664  * up our traverse.
665  *
666  * The bucket is locked for the duration of the callback, both so that the
667  * callback can just call sadb_unlinkassoc() when it wants to delete something,
668  * and so that no new entries are added while we're walking the list.
669  */
670 static void
671 sadb_walker(isaf_t *table, uint_t numentries,
672     void (*walkfn)(isaf_t *head, ipsa_t *entry, void *cookie),
673     void *cookie)
674 {
675 	int i;
676 	for (i = 0; i < numentries; i++) {
677 		ipsa_t *entry, *next;
678 
679 		mutex_enter(&table[i].isaf_lock);
680 
681 		for (entry = table[i].isaf_ipsa; entry != NULL;
682 		    entry = next) {
683 			next = entry->ipsa_next;
684 			(*walkfn)(&table[i], entry, cookie);
685 		}
686 		mutex_exit(&table[i].isaf_lock);
687 	}
688 }
689 
690 /*
691  * From the given SA, construct a dl_ct_ipsec_key and
692  * a dl_ct_ipsec structures to be sent to the adapter as part
693  * of a DL_CONTROL_REQ.
694  *
695  * ct_sa must point to the storage allocated for the key
696  * structure and must be followed by storage allocated
697  * for the SA information that must be sent to the driver
698  * as part of the DL_CONTROL_REQ request.
699  *
700  * The is_inbound boolean indicates whether the specified
701  * SA is part of an inbound SA table.
702  *
703  * Returns B_TRUE if the corresponding SA must be passed to
704  * a provider, B_FALSE otherwise; frees *mp if it returns B_FALSE.
705  */
706 static boolean_t
707 sadb_req_from_sa(ipsa_t *sa, mblk_t *mp, boolean_t is_inbound)
708 {
709 	dl_ct_ipsec_key_t *keyp;
710 	dl_ct_ipsec_t *sap;
711 	void *ct_sa = mp->b_wptr;
712 
713 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
714 
715 	keyp = (dl_ct_ipsec_key_t *)(ct_sa);
716 	sap = (dl_ct_ipsec_t *)(keyp + 1);
717 
718 	IPSECHW_DEBUG(IPSECHW_CAPAB, ("sadb_req_from_sa: "
719 	    "is_inbound = %d\n", is_inbound));
720 
721 	/* initialize flag */
722 	sap->sadb_sa_flags = 0;
723 	if (is_inbound) {
724 		sap->sadb_sa_flags |= DL_CT_IPSEC_INBOUND;
725 		/*
726 		 * If an inbound SA has a peer, then mark it has being
727 		 * an outbound SA as well.
728 		 */
729 		if (sa->ipsa_haspeer)
730 			sap->sadb_sa_flags |= DL_CT_IPSEC_OUTBOUND;
731 	} else {
732 		/*
733 		 * If an outbound SA has a peer, then don't send it,
734 		 * since we will send the copy from the inbound table.
735 		 */
736 		if (sa->ipsa_haspeer) {
737 			freemsg(mp);
738 			return (B_FALSE);
739 		}
740 		sap->sadb_sa_flags |= DL_CT_IPSEC_OUTBOUND;
741 	}
742 
743 	keyp->dl_key_spi = sa->ipsa_spi;
744 	bcopy(sa->ipsa_dstaddr, keyp->dl_key_dest_addr,
745 	    DL_CTL_IPSEC_ADDR_LEN);
746 	keyp->dl_key_addr_family = sa->ipsa_addrfam;
747 
748 	sap->sadb_sa_auth = sa->ipsa_auth_alg;
749 	sap->sadb_sa_encrypt = sa->ipsa_encr_alg;
750 
751 	sap->sadb_key_len_a = sa->ipsa_authkeylen;
752 	sap->sadb_key_bits_a = sa->ipsa_authkeybits;
753 	bcopy(sa->ipsa_authkey,
754 	    sap->sadb_key_data_a, sap->sadb_key_len_a);
755 
756 	sap->sadb_key_len_e = sa->ipsa_encrkeylen;
757 	sap->sadb_key_bits_e = sa->ipsa_encrkeybits;
758 	bcopy(sa->ipsa_encrkey,
759 	    sap->sadb_key_data_e, sap->sadb_key_len_e);
760 
761 	mp->b_wptr += sizeof (dl_ct_ipsec_t) + sizeof (dl_ct_ipsec_key_t);
762 	return (B_TRUE);
763 }
764 
765 /*
766  * Called from AH or ESP to format a message which will be used to inform
767  * IPsec-acceleration-capable ills of a SADB change.
768  * (It is not possible to send the message to IP directly from this function
769  * since the SA, if any, is locked during the call).
770  *
771  * dl_operation: DL_CONTROL_REQ operation (add, delete, update, etc)
772  * sa_type: identifies whether the operation applies to AH or ESP
773  *	(must be one of SADB_SATYPE_AH or SADB_SATYPE_ESP)
774  * sa: Pointer to an SA.  Must be non-NULL and locked
775  *	for ADD, DELETE, GET, and UPDATE operations.
776  * This function returns an mblk chain that must be passed to IP
777  * for forwarding to the IPsec capable providers.
778  */
779 mblk_t *
780 sadb_fmt_sa_req(uint_t dl_operation, uint_t sa_type, ipsa_t *sa,
781     boolean_t is_inbound)
782 {
783 	mblk_t *mp;
784 	dl_control_req_t *ctrl;
785 	boolean_t need_key = B_FALSE;
786 	mblk_t *ctl_mp = NULL;
787 	ipsec_ctl_t *ctl;
788 
789 	/*
790 	 * 1 allocate and initialize DL_CONTROL_REQ M_PROTO
791 	 * 2 if a key is needed for the operation
792 	 *    2.1 initialize key
793 	 *    2.2 if a full SA is needed for the operation
794 	 *	2.2.1 initialize full SA info
795 	 * 3 return message; caller will call ill_ipsec_capab_send_all()
796 	 * to send the resulting message to IPsec capable ills.
797 	 */
798 
799 	ASSERT(sa_type == SADB_SATYPE_AH || sa_type == SADB_SATYPE_ESP);
800 
801 	/*
802 	 * Allocate DL_CONTROL_REQ M_PROTO
803 	 * We allocate room for the SA even if it's not needed
804 	 * by some of the operations (for example flush)
805 	 */
806 	mp = allocb(sizeof (dl_control_req_t) +
807 	    sizeof (dl_ct_ipsec_key_t) + sizeof (dl_ct_ipsec_t), BPRI_HI);
808 	if (mp == NULL)
809 		return (NULL);
810 	mp->b_datap->db_type = M_PROTO;
811 
812 	/* initialize dl_control_req_t */
813 	ctrl = (dl_control_req_t *)mp->b_wptr;
814 	ctrl->dl_primitive = DL_CONTROL_REQ;
815 	ctrl->dl_operation = dl_operation;
816 	ctrl->dl_type = sa_type == SADB_SATYPE_AH ? DL_CT_IPSEC_AH :
817 	    DL_CT_IPSEC_ESP;
818 	ctrl->dl_key_offset = sizeof (dl_control_req_t);
819 	ctrl->dl_key_length = sizeof (dl_ct_ipsec_key_t);
820 	ctrl->dl_data_offset = sizeof (dl_control_req_t) +
821 	    sizeof (dl_ct_ipsec_key_t);
822 	ctrl->dl_data_length = sizeof (dl_ct_ipsec_t);
823 	mp->b_wptr += sizeof (dl_control_req_t);
824 
825 	if ((dl_operation == DL_CO_SET) || (dl_operation == DL_CO_DELETE)) {
826 		ASSERT(sa != NULL);
827 		ASSERT(MUTEX_HELD(&sa->ipsa_lock));
828 
829 		need_key = B_TRUE;
830 
831 		/*
832 		 * Initialize key and SA data. Note that for some
833 		 * operations the SA data is ignored by the provider
834 		 * (delete, etc.)
835 		 */
836 		if (!sadb_req_from_sa(sa, mp, is_inbound))
837 			return (NULL);
838 	}
839 
840 	/* construct control message */
841 	ctl_mp = allocb(sizeof (ipsec_ctl_t), BPRI_HI);
842 	if (ctl_mp == NULL) {
843 		cmn_err(CE_WARN, "sadb_fmt_sa_req: allocb failed\n");
844 		freemsg(mp);
845 		return (NULL);
846 	}
847 
848 	ctl_mp->b_datap->db_type = M_CTL;
849 	ctl_mp->b_wptr += sizeof (ipsec_ctl_t);
850 	ctl_mp->b_cont = mp;
851 
852 	ctl = (ipsec_ctl_t *)ctl_mp->b_rptr;
853 	ctl->ipsec_ctl_type = IPSEC_CTL;
854 	ctl->ipsec_ctl_len  = sizeof (ipsec_ctl_t);
855 	ctl->ipsec_ctl_sa_type = sa_type;
856 
857 	if (need_key) {
858 		/*
859 		 * Keep an additional reference on SA, since it will be
860 		 * needed by IP to send control messages corresponding
861 		 * to that SA from its perimeter. IP will do a
862 		 * IPSA_REFRELE when done with the request.
863 		 */
864 		ASSERT(MUTEX_HELD(&sa->ipsa_lock));
865 		IPSA_REFHOLD(sa);
866 		ctl->ipsec_ctl_sa = sa;
867 	} else
868 		ctl->ipsec_ctl_sa = NULL;
869 
870 	return (ctl_mp);
871 }
872 
873 
874 /*
875  * Called by sadb_ill_download() to dump the entries for a specific
876  * fanout table.  For each SA entry in the table passed as argument,
877  * use mp as a template and constructs a full DL_CONTROL message, and
878  * call ill_dlpi_send(), provided by IP, to send the resulting
879  * messages to the ill.
880  */
881 static void
882 sadb_ill_df(ill_t *ill, mblk_t *mp, isaf_t *fanout, int num_entries,
883     boolean_t is_inbound)
884 {
885 	ipsa_t *walker;
886 	mblk_t *nmp, *salist;
887 	int i, error = 0;
888 	ip_stack_t	*ipst = ill->ill_ipst;
889 	netstack_t	*ns = ipst->ips_netstack;
890 
891 	IPSECHW_DEBUG(IPSECHW_SADB, ("sadb_ill_df: fanout at 0x%p ne=%d\n",
892 	    (void *)fanout, num_entries));
893 	/*
894 	 * For each IPSA hash bucket do:
895 	 *	- Hold the mutex
896 	 *	- Walk each entry, sending a corresponding request to IP
897 	 *	  for it.
898 	 */
899 	ASSERT(mp->b_datap->db_type == M_PROTO);
900 
901 	for (i = 0; i < num_entries; i++) {
902 		mutex_enter(&fanout[i].isaf_lock);
903 		salist = NULL;
904 
905 		for (walker = fanout[i].isaf_ipsa; walker != NULL;
906 		    walker = walker->ipsa_next) {
907 			IPSECHW_DEBUG(IPSECHW_SADB,
908 			    ("sadb_ill_df: sending SA to ill via IP \n"));
909 			/*
910 			 * Duplicate the template mp passed and
911 			 * complete DL_CONTROL_REQ data.
912 			 * To be more memory efficient, we could use
913 			 * dupb() for the M_CTL and copyb() for the M_PROTO
914 			 * as the M_CTL, since the M_CTL is the same for
915 			 * every SA entry passed down to IP for the same ill.
916 			 *
917 			 * Note that copymsg/copyb ensure that the new mblk
918 			 * is at least as large as the source mblk even if it's
919 			 * not using all its storage -- therefore, nmp
920 			 * has trailing space for sadb_req_from_sa to add
921 			 * the SA-specific bits.
922 			 */
923 			mutex_enter(&walker->ipsa_lock);
924 			if (ipsec_capab_match(ill,
925 			    ill->ill_phyint->phyint_ifindex, ill->ill_isv6,
926 			    walker, ns)) {
927 				nmp = copymsg(mp);
928 				if (nmp == NULL) {
929 					IPSECHW_DEBUG(IPSECHW_SADB,
930 					    ("sadb_ill_df: alloc error\n"));
931 					error = ENOMEM;
932 					mutex_exit(&walker->ipsa_lock);
933 					break;
934 				}
935 				if (sadb_req_from_sa(walker, nmp, is_inbound)) {
936 					nmp->b_next = salist;
937 					salist = nmp;
938 				}
939 			}
940 			mutex_exit(&walker->ipsa_lock);
941 		}
942 		mutex_exit(&fanout[i].isaf_lock);
943 		while (salist != NULL) {
944 			nmp = salist;
945 			salist = nmp->b_next;
946 			nmp->b_next = NULL;
947 			ill_dlpi_send(ill, nmp);
948 		}
949 		if (error != 0)
950 			break;	/* out of for loop. */
951 	}
952 }
953 
954 /*
955  * Called by ill_ipsec_capab_add(). Sends a copy of the SADB of
956  * the type specified by sa_type to the specified ill.
957  *
958  * We call for each fanout table defined by the SADB (one per
959  * protocol). sadb_ill_df() finally calls ill_dlpi_send() for
960  * each SADB entry in order to send a corresponding DL_CONTROL_REQ
961  * message to the ill.
962  */
963 void
964 sadb_ill_download(ill_t *ill, uint_t sa_type)
965 {
966 	mblk_t *protomp;	/* prototype message */
967 	dl_control_req_t *ctrl;
968 	sadbp_t *spp;
969 	sadb_t *sp;
970 	int dlt;
971 	ip_stack_t	*ipst = ill->ill_ipst;
972 	netstack_t	*ns = ipst->ips_netstack;
973 
974 	ASSERT(sa_type == SADB_SATYPE_AH || sa_type == SADB_SATYPE_ESP);
975 
976 	/*
977 	 * Allocate and initialize prototype answer. A duplicate for
978 	 * each SA is sent down to the interface.
979 	 */
980 
981 	/* DL_CONTROL_REQ M_PROTO mblk_t */
982 	protomp = allocb(sizeof (dl_control_req_t) +
983 	    sizeof (dl_ct_ipsec_key_t) + sizeof (dl_ct_ipsec_t), BPRI_HI);
984 	if (protomp == NULL)
985 		return;
986 	protomp->b_datap->db_type = M_PROTO;
987 
988 	dlt = (sa_type == SADB_SATYPE_AH) ? DL_CT_IPSEC_AH : DL_CT_IPSEC_ESP;
989 	if (sa_type == SADB_SATYPE_ESP) {
990 		ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
991 
992 		spp = &espstack->esp_sadb;
993 	} else {
994 		ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
995 
996 		spp = &ahstack->ah_sadb;
997 	}
998 
999 	ctrl = (dl_control_req_t *)protomp->b_wptr;
1000 	ctrl->dl_primitive = DL_CONTROL_REQ;
1001 	ctrl->dl_operation = DL_CO_SET;
1002 	ctrl->dl_type = dlt;
1003 	ctrl->dl_key_offset = sizeof (dl_control_req_t);
1004 	ctrl->dl_key_length = sizeof (dl_ct_ipsec_key_t);
1005 	ctrl->dl_data_offset = sizeof (dl_control_req_t) +
1006 	    sizeof (dl_ct_ipsec_key_t);
1007 	ctrl->dl_data_length = sizeof (dl_ct_ipsec_t);
1008 	protomp->b_wptr += sizeof (dl_control_req_t);
1009 
1010 	/*
1011 	 * then for each SADB entry, we fill out the dl_ct_ipsec_key_t
1012 	 * and dl_ct_ipsec_t
1013 	 */
1014 	sp = ill->ill_isv6 ? &(spp->s_v6) : &(spp->s_v4);
1015 	sadb_ill_df(ill, protomp, sp->sdb_of, sp->sdb_hashsize, B_FALSE);
1016 	sadb_ill_df(ill, protomp, sp->sdb_if, sp->sdb_hashsize, B_TRUE);
1017 	freemsg(protomp);
1018 }
1019 
1020 /*
1021  * Call me to free up a security association fanout.  Use the forever
1022  * variable to indicate freeing up the SAs (forever == B_FALSE, e.g.
1023  * an SADB_FLUSH message), or destroying everything (forever == B_TRUE,
1024  * when a module is unloaded).
1025  */
1026 static void
1027 sadb_destroyer(isaf_t **tablep, uint_t numentries, boolean_t forever,
1028     boolean_t inbound)
1029 {
1030 	int i;
1031 	isaf_t *table = *tablep;
1032 	uint8_t protocol;
1033 	ipsa_t *sa;
1034 	netstackid_t sid;
1035 
1036 	if (table == NULL)
1037 		return;
1038 
1039 	for (i = 0; i < numentries; i++) {
1040 		mutex_enter(&table[i].isaf_lock);
1041 		while ((sa = table[i].isaf_ipsa) != NULL) {
1042 			if (inbound && cl_inet_deletespi &&
1043 			    (sa->ipsa_state != IPSA_STATE_ACTIVE_ELSEWHERE) &&
1044 			    (sa->ipsa_state != IPSA_STATE_IDLE)) {
1045 				protocol = (sa->ipsa_type == SADB_SATYPE_AH) ?
1046 				    IPPROTO_AH : IPPROTO_ESP;
1047 				sid = sa->ipsa_netstack->netstack_stackid;
1048 				cl_inet_deletespi(sid, protocol, sa->ipsa_spi,
1049 				    NULL);
1050 			}
1051 			sadb_unlinkassoc(sa);
1052 		}
1053 		table[i].isaf_gen++;
1054 		mutex_exit(&table[i].isaf_lock);
1055 		if (forever)
1056 			mutex_destroy(&(table[i].isaf_lock));
1057 	}
1058 
1059 	if (forever) {
1060 		*tablep = NULL;
1061 		kmem_free(table, numentries * sizeof (*table));
1062 	}
1063 }
1064 
1065 /*
1066  * Entry points to sadb_destroyer().
1067  */
1068 static void
1069 sadb_flush(sadb_t *sp, netstack_t *ns)
1070 {
1071 	/*
1072 	 * Flush out each bucket, one at a time.  Were it not for keysock's
1073 	 * enforcement, there would be a subtlety where I could add on the
1074 	 * heels of a flush.  With keysock's enforcement, however, this
1075 	 * makes ESP's job easy.
1076 	 */
1077 	sadb_destroyer(&sp->sdb_of, sp->sdb_hashsize, B_FALSE, B_FALSE);
1078 	sadb_destroyer(&sp->sdb_if, sp->sdb_hashsize, B_FALSE, B_TRUE);
1079 
1080 	/* For each acquire, destroy it; leave the bucket mutex alone. */
1081 	sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_FALSE, ns);
1082 }
1083 
1084 static void
1085 sadb_destroy(sadb_t *sp, netstack_t *ns)
1086 {
1087 	sadb_destroyer(&sp->sdb_of, sp->sdb_hashsize, B_TRUE, B_FALSE);
1088 	sadb_destroyer(&sp->sdb_if, sp->sdb_hashsize, B_TRUE, B_TRUE);
1089 
1090 	/* For each acquire, destroy it, including the bucket mutex. */
1091 	sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_TRUE, ns);
1092 
1093 	ASSERT(sp->sdb_of == NULL);
1094 	ASSERT(sp->sdb_if == NULL);
1095 	ASSERT(sp->sdb_acq == NULL);
1096 }
1097 
1098 static void
1099 sadb_send_flush_req(sadbp_t *spp)
1100 {
1101 	mblk_t *ctl_mp;
1102 
1103 	/*
1104 	 * we've been unplumbed, or never were plumbed; don't go there.
1105 	 */
1106 	if (spp->s_ip_q == NULL)
1107 		return;
1108 
1109 	/* have IP send a flush msg to the IPsec accelerators */
1110 	ctl_mp = sadb_fmt_sa_req(DL_CO_FLUSH, spp->s_satype, NULL, B_TRUE);
1111 	if (ctl_mp != NULL)
1112 		putnext(spp->s_ip_q, ctl_mp);
1113 }
1114 
1115 void
1116 sadbp_flush(sadbp_t *spp, netstack_t *ns)
1117 {
1118 	sadb_flush(&spp->s_v4, ns);
1119 	sadb_flush(&spp->s_v6, ns);
1120 
1121 	sadb_send_flush_req(spp);
1122 }
1123 
1124 void
1125 sadbp_destroy(sadbp_t *spp, netstack_t *ns)
1126 {
1127 	sadb_destroy(&spp->s_v4, ns);
1128 	sadb_destroy(&spp->s_v6, ns);
1129 
1130 	sadb_send_flush_req(spp);
1131 	if (spp->s_satype == SADB_SATYPE_AH) {
1132 		ipsec_stack_t	*ipss = ns->netstack_ipsec;
1133 
1134 		ip_drop_unregister(&ipss->ipsec_sadb_dropper);
1135 	}
1136 }
1137 
1138 
1139 /*
1140  * Check hard vs. soft lifetimes.  If there's a reality mismatch (e.g.
1141  * soft lifetimes > hard lifetimes) return an appropriate diagnostic for
1142  * EINVAL.
1143  */
1144 int
1145 sadb_hardsoftchk(sadb_lifetime_t *hard, sadb_lifetime_t *soft,
1146     sadb_lifetime_t *idle)
1147 {
1148 	if (hard == NULL || soft == NULL)
1149 		return (0);
1150 
1151 	if (hard->sadb_lifetime_allocations != 0 &&
1152 	    soft->sadb_lifetime_allocations != 0 &&
1153 	    hard->sadb_lifetime_allocations < soft->sadb_lifetime_allocations)
1154 		return (SADB_X_DIAGNOSTIC_ALLOC_HSERR);
1155 
1156 	if (hard->sadb_lifetime_bytes != 0 &&
1157 	    soft->sadb_lifetime_bytes != 0 &&
1158 	    hard->sadb_lifetime_bytes < soft->sadb_lifetime_bytes)
1159 		return (SADB_X_DIAGNOSTIC_BYTES_HSERR);
1160 
1161 	if (hard->sadb_lifetime_addtime != 0 &&
1162 	    soft->sadb_lifetime_addtime != 0 &&
1163 	    hard->sadb_lifetime_addtime < soft->sadb_lifetime_addtime)
1164 		return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
1165 
1166 	if (hard->sadb_lifetime_usetime != 0 &&
1167 	    soft->sadb_lifetime_usetime != 0 &&
1168 	    hard->sadb_lifetime_usetime < soft->sadb_lifetime_usetime)
1169 		return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
1170 
1171 	if (idle != NULL) {
1172 		if (hard->sadb_lifetime_addtime != 0 &&
1173 		    idle->sadb_lifetime_addtime != 0 &&
1174 		    hard->sadb_lifetime_addtime < idle->sadb_lifetime_addtime)
1175 			return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
1176 
1177 		if (soft->sadb_lifetime_addtime != 0 &&
1178 		    idle->sadb_lifetime_addtime != 0 &&
1179 		    soft->sadb_lifetime_addtime < idle->sadb_lifetime_addtime)
1180 			return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
1181 
1182 		if (hard->sadb_lifetime_usetime != 0 &&
1183 		    idle->sadb_lifetime_usetime != 0 &&
1184 		    hard->sadb_lifetime_usetime < idle->sadb_lifetime_usetime)
1185 			return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
1186 
1187 		if (soft->sadb_lifetime_usetime != 0 &&
1188 		    idle->sadb_lifetime_usetime != 0 &&
1189 		    soft->sadb_lifetime_usetime < idle->sadb_lifetime_usetime)
1190 			return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
1191 	}
1192 
1193 	return (0);
1194 }
1195 
1196 /*
1197  * Clone a security association for the purposes of inserting a single SA
1198  * into inbound and outbound tables respectively. This function should only
1199  * be called from sadb_common_add().
1200  */
1201 static ipsa_t *
1202 sadb_cloneassoc(ipsa_t *ipsa)
1203 {
1204 	ipsa_t *newbie;
1205 	boolean_t error = B_FALSE;
1206 
1207 	ASSERT(MUTEX_NOT_HELD(&(ipsa->ipsa_lock)));
1208 
1209 	newbie = kmem_alloc(sizeof (ipsa_t), KM_NOSLEEP);
1210 	if (newbie == NULL)
1211 		return (NULL);
1212 
1213 	/* Copy over what we can. */
1214 	*newbie = *ipsa;
1215 
1216 	/* bzero and initialize locks, in case *_init() allocates... */
1217 	mutex_init(&newbie->ipsa_lock, NULL, MUTEX_DEFAULT, NULL);
1218 
1219 	/*
1220 	 * While somewhat dain-bramaged, the most graceful way to
1221 	 * recover from errors is to keep plowing through the
1222 	 * allocations, and getting what I can.  It's easier to call
1223 	 * sadb_freeassoc() on the stillborn clone when all the
1224 	 * pointers aren't pointing to the parent's data.
1225 	 */
1226 
1227 	if (ipsa->ipsa_authkey != NULL) {
1228 		newbie->ipsa_authkey = kmem_alloc(newbie->ipsa_authkeylen,
1229 		    KM_NOSLEEP);
1230 		if (newbie->ipsa_authkey == NULL) {
1231 			error = B_TRUE;
1232 		} else {
1233 			bcopy(ipsa->ipsa_authkey, newbie->ipsa_authkey,
1234 			    newbie->ipsa_authkeylen);
1235 
1236 			newbie->ipsa_kcfauthkey.ck_data =
1237 			    newbie->ipsa_authkey;
1238 		}
1239 
1240 		if (newbie->ipsa_amech.cm_param != NULL) {
1241 			newbie->ipsa_amech.cm_param =
1242 			    (char *)&newbie->ipsa_mac_len;
1243 		}
1244 	}
1245 
1246 	if (ipsa->ipsa_encrkey != NULL) {
1247 		newbie->ipsa_encrkey = kmem_alloc(newbie->ipsa_encrkeylen,
1248 		    KM_NOSLEEP);
1249 		if (newbie->ipsa_encrkey == NULL) {
1250 			error = B_TRUE;
1251 		} else {
1252 			bcopy(ipsa->ipsa_encrkey, newbie->ipsa_encrkey,
1253 			    newbie->ipsa_encrkeylen);
1254 
1255 			newbie->ipsa_kcfencrkey.ck_data =
1256 			    newbie->ipsa_encrkey;
1257 		}
1258 	}
1259 
1260 	newbie->ipsa_authtmpl = NULL;
1261 	newbie->ipsa_encrtmpl = NULL;
1262 	newbie->ipsa_haspeer = B_TRUE;
1263 
1264 	if (ipsa->ipsa_integ != NULL) {
1265 		newbie->ipsa_integ = kmem_alloc(newbie->ipsa_integlen,
1266 		    KM_NOSLEEP);
1267 		if (newbie->ipsa_integ == NULL) {
1268 			error = B_TRUE;
1269 		} else {
1270 			bcopy(ipsa->ipsa_integ, newbie->ipsa_integ,
1271 			    newbie->ipsa_integlen);
1272 		}
1273 	}
1274 
1275 	if (ipsa->ipsa_sens != NULL) {
1276 		newbie->ipsa_sens = kmem_alloc(newbie->ipsa_senslen,
1277 		    KM_NOSLEEP);
1278 		if (newbie->ipsa_sens == NULL) {
1279 			error = B_TRUE;
1280 		} else {
1281 			bcopy(ipsa->ipsa_sens, newbie->ipsa_sens,
1282 			    newbie->ipsa_senslen);
1283 		}
1284 	}
1285 
1286 	if (ipsa->ipsa_src_cid != NULL) {
1287 		newbie->ipsa_src_cid = ipsa->ipsa_src_cid;
1288 		IPSID_REFHOLD(ipsa->ipsa_src_cid);
1289 	}
1290 
1291 	if (ipsa->ipsa_dst_cid != NULL) {
1292 		newbie->ipsa_dst_cid = ipsa->ipsa_dst_cid;
1293 		IPSID_REFHOLD(ipsa->ipsa_dst_cid);
1294 	}
1295 
1296 	if (error) {
1297 		sadb_freeassoc(newbie);
1298 		return (NULL);
1299 	}
1300 
1301 	return (newbie);
1302 }
1303 
1304 /*
1305  * Initialize a SADB address extension at the address specified by addrext.
1306  * Return a pointer to the end of the new address extension.
1307  */
1308 static uint8_t *
1309 sadb_make_addr_ext(uint8_t *start, uint8_t *end, uint16_t exttype,
1310     sa_family_t af, uint32_t *addr, uint16_t port, uint8_t proto, int prefix)
1311 {
1312 	struct sockaddr_in *sin;
1313 	struct sockaddr_in6 *sin6;
1314 	uint8_t *cur = start;
1315 	int addrext_len;
1316 	int sin_len;
1317 	sadb_address_t *addrext	= (sadb_address_t *)cur;
1318 
1319 	if (cur == NULL)
1320 		return (NULL);
1321 
1322 	cur += sizeof (*addrext);
1323 	if (cur > end)
1324 		return (NULL);
1325 
1326 	addrext->sadb_address_proto = proto;
1327 	addrext->sadb_address_prefixlen = prefix;
1328 	addrext->sadb_address_reserved = 0;
1329 	addrext->sadb_address_exttype = exttype;
1330 
1331 	switch (af) {
1332 	case AF_INET:
1333 		sin = (struct sockaddr_in *)cur;
1334 		sin_len = sizeof (*sin);
1335 		cur += sin_len;
1336 		if (cur > end)
1337 			return (NULL);
1338 
1339 		sin->sin_family = af;
1340 		bzero(sin->sin_zero, sizeof (sin->sin_zero));
1341 		sin->sin_port = port;
1342 		IPSA_COPY_ADDR(&sin->sin_addr, addr, af);
1343 		break;
1344 	case AF_INET6:
1345 		sin6 = (struct sockaddr_in6 *)cur;
1346 		sin_len = sizeof (*sin6);
1347 		cur += sin_len;
1348 		if (cur > end)
1349 			return (NULL);
1350 
1351 		bzero(sin6, sizeof (*sin6));
1352 		sin6->sin6_family = af;
1353 		sin6->sin6_port = port;
1354 		IPSA_COPY_ADDR(&sin6->sin6_addr, addr, af);
1355 		break;
1356 	}
1357 
1358 	addrext_len = roundup(cur - start, sizeof (uint64_t));
1359 	addrext->sadb_address_len = SADB_8TO64(addrext_len);
1360 
1361 	cur = start + addrext_len;
1362 	if (cur > end)
1363 		cur = NULL;
1364 
1365 	return (cur);
1366 }
1367 
1368 /*
1369  * Construct a key management cookie extension.
1370  */
1371 
1372 static uint8_t *
1373 sadb_make_kmc_ext(uint8_t *cur, uint8_t *end, uint32_t kmp, uint32_t kmc)
1374 {
1375 	sadb_x_kmc_t *kmcext = (sadb_x_kmc_t *)cur;
1376 
1377 	if (cur == NULL)
1378 		return (NULL);
1379 
1380 	cur += sizeof (*kmcext);
1381 
1382 	if (cur > end)
1383 		return (NULL);
1384 
1385 	kmcext->sadb_x_kmc_len = SADB_8TO64(sizeof (*kmcext));
1386 	kmcext->sadb_x_kmc_exttype = SADB_X_EXT_KM_COOKIE;
1387 	kmcext->sadb_x_kmc_proto = kmp;
1388 	kmcext->sadb_x_kmc_cookie = kmc;
1389 	kmcext->sadb_x_kmc_reserved = 0;
1390 
1391 	return (cur);
1392 }
1393 
1394 /*
1395  * Given an original message header with sufficient space following it, and an
1396  * SA, construct a full PF_KEY message with all of the relevant extensions.
1397  * This is mostly used for SADB_GET, and SADB_DUMP.
1398  */
1399 static mblk_t *
1400 sadb_sa2msg(ipsa_t *ipsa, sadb_msg_t *samsg)
1401 {
1402 	int alloclen, addrsize, paddrsize, authsize, encrsize;
1403 	int srcidsize, dstidsize;
1404 	sa_family_t fam, pfam;	/* Address family for SADB_EXT_ADDRESS */
1405 				/* src/dst and proxy sockaddrs. */
1406 	/*
1407 	 * The following are pointers into the PF_KEY message this PF_KEY
1408 	 * message creates.
1409 	 */
1410 	sadb_msg_t *newsamsg;
1411 	sadb_sa_t *assoc;
1412 	sadb_lifetime_t *lt;
1413 	sadb_key_t *key;
1414 	sadb_ident_t *ident;
1415 	sadb_sens_t *sens;
1416 	sadb_ext_t *walker;	/* For when we need a generic ext. pointer. */
1417 	sadb_x_replay_ctr_t *repl_ctr;
1418 	sadb_x_pair_t *pair_ext;
1419 
1420 	mblk_t *mp;
1421 	uint64_t *bitmap;
1422 	uint8_t *cur, *end;
1423 	/* These indicate the presence of the above extension fields. */
1424 	boolean_t soft, hard, isrc, idst, auth, encr, sensinteg, srcid, dstid;
1425 	boolean_t idle;
1426 	boolean_t paired;
1427 	uint32_t otherspi;
1428 
1429 	/* First off, figure out the allocation length for this message. */
1430 
1431 	/*
1432 	 * Constant stuff.  This includes base, SA, address (src, dst),
1433 	 * and lifetime (current).
1434 	 */
1435 	alloclen = sizeof (sadb_msg_t) + sizeof (sadb_sa_t) +
1436 	    sizeof (sadb_lifetime_t);
1437 
1438 	fam = ipsa->ipsa_addrfam;
1439 	switch (fam) {
1440 	case AF_INET:
1441 		addrsize = roundup(sizeof (struct sockaddr_in) +
1442 		    sizeof (sadb_address_t), sizeof (uint64_t));
1443 		break;
1444 	case AF_INET6:
1445 		addrsize = roundup(sizeof (struct sockaddr_in6) +
1446 		    sizeof (sadb_address_t), sizeof (uint64_t));
1447 		break;
1448 	default:
1449 		return (NULL);
1450 	}
1451 	/*
1452 	 * Allocate TWO address extensions, for source and destination.
1453 	 * (Thus, the * 2.)
1454 	 */
1455 	alloclen += addrsize * 2;
1456 	if (ipsa->ipsa_flags & IPSA_F_NATT_REM)
1457 		alloclen += addrsize;
1458 	if (ipsa->ipsa_flags & IPSA_F_NATT_LOC)
1459 		alloclen += addrsize;
1460 
1461 	if (ipsa->ipsa_flags & IPSA_F_PAIRED) {
1462 		paired = B_TRUE;
1463 		alloclen += sizeof (sadb_x_pair_t);
1464 		otherspi = ipsa->ipsa_otherspi;
1465 	} else {
1466 		paired = B_FALSE;
1467 	}
1468 
1469 	/* How 'bout other lifetimes? */
1470 	if (ipsa->ipsa_softaddlt != 0 || ipsa->ipsa_softuselt != 0 ||
1471 	    ipsa->ipsa_softbyteslt != 0 || ipsa->ipsa_softalloc != 0) {
1472 		alloclen += sizeof (sadb_lifetime_t);
1473 		soft = B_TRUE;
1474 	} else {
1475 		soft = B_FALSE;
1476 	}
1477 
1478 	if (ipsa->ipsa_hardaddlt != 0 || ipsa->ipsa_harduselt != 0 ||
1479 	    ipsa->ipsa_hardbyteslt != 0 || ipsa->ipsa_hardalloc != 0) {
1480 		alloclen += sizeof (sadb_lifetime_t);
1481 		hard = B_TRUE;
1482 	} else {
1483 		hard = B_FALSE;
1484 	}
1485 
1486 	if (ipsa->ipsa_idleaddlt != 0 || ipsa->ipsa_idleuselt != 0) {
1487 		alloclen += sizeof (sadb_lifetime_t);
1488 		idle = B_TRUE;
1489 	} else {
1490 		idle = B_FALSE;
1491 	}
1492 
1493 	/* Inner addresses. */
1494 	if (ipsa->ipsa_innerfam == 0) {
1495 		isrc = B_FALSE;
1496 		idst = B_FALSE;
1497 	} else {
1498 		pfam = ipsa->ipsa_innerfam;
1499 		switch (pfam) {
1500 		case AF_INET6:
1501 			paddrsize = roundup(sizeof (struct sockaddr_in6) +
1502 			    sizeof (sadb_address_t), sizeof (uint64_t));
1503 			break;
1504 		case AF_INET:
1505 			paddrsize = roundup(sizeof (struct sockaddr_in) +
1506 			    sizeof (sadb_address_t), sizeof (uint64_t));
1507 			break;
1508 		default:
1509 			cmn_err(CE_PANIC,
1510 			    "IPsec SADB: Proxy length failure.\n");
1511 			break;
1512 		}
1513 		isrc = B_TRUE;
1514 		idst = B_TRUE;
1515 		alloclen += 2 * paddrsize;
1516 	}
1517 
1518 	/* For the following fields, assume that length != 0 ==> stuff */
1519 	if (ipsa->ipsa_authkeylen != 0) {
1520 		authsize = roundup(sizeof (sadb_key_t) + ipsa->ipsa_authkeylen,
1521 		    sizeof (uint64_t));
1522 		alloclen += authsize;
1523 		auth = B_TRUE;
1524 	} else {
1525 		auth = B_FALSE;
1526 	}
1527 
1528 	if (ipsa->ipsa_encrkeylen != 0) {
1529 		encrsize = roundup(sizeof (sadb_key_t) + ipsa->ipsa_encrkeylen,
1530 		    sizeof (uint64_t));
1531 		alloclen += encrsize;
1532 		encr = B_TRUE;
1533 	} else {
1534 		encr = B_FALSE;
1535 	}
1536 
1537 	/* No need for roundup on sens and integ. */
1538 	if (ipsa->ipsa_integlen != 0 || ipsa->ipsa_senslen != 0) {
1539 		alloclen += sizeof (sadb_key_t) + ipsa->ipsa_integlen +
1540 		    ipsa->ipsa_senslen;
1541 		sensinteg = B_TRUE;
1542 	} else {
1543 		sensinteg = B_FALSE;
1544 	}
1545 
1546 	/*
1547 	 * Must use strlen() here for lengths.	Identities use NULL
1548 	 * pointers to indicate their nonexistence.
1549 	 */
1550 	if (ipsa->ipsa_src_cid != NULL) {
1551 		srcidsize = roundup(sizeof (sadb_ident_t) +
1552 		    strlen(ipsa->ipsa_src_cid->ipsid_cid) + 1,
1553 		    sizeof (uint64_t));
1554 		alloclen += srcidsize;
1555 		srcid = B_TRUE;
1556 	} else {
1557 		srcid = B_FALSE;
1558 	}
1559 
1560 	if (ipsa->ipsa_dst_cid != NULL) {
1561 		dstidsize = roundup(sizeof (sadb_ident_t) +
1562 		    strlen(ipsa->ipsa_dst_cid->ipsid_cid) + 1,
1563 		    sizeof (uint64_t));
1564 		alloclen += dstidsize;
1565 		dstid = B_TRUE;
1566 	} else {
1567 		dstid = B_FALSE;
1568 	}
1569 
1570 	if ((ipsa->ipsa_kmp != 0) || (ipsa->ipsa_kmc != 0))
1571 		alloclen += sizeof (sadb_x_kmc_t);
1572 
1573 	if (ipsa->ipsa_replay != 0) {
1574 		alloclen += sizeof (sadb_x_replay_ctr_t);
1575 	}
1576 
1577 	/* Make sure the allocation length is a multiple of 8 bytes. */
1578 	ASSERT((alloclen & 0x7) == 0);
1579 
1580 	/* XXX Possibly make it esballoc, with a bzero-ing free_ftn. */
1581 	mp = allocb(alloclen, BPRI_HI);
1582 	if (mp == NULL)
1583 		return (NULL);
1584 
1585 	mp->b_wptr += alloclen;
1586 	end = mp->b_wptr;
1587 	newsamsg = (sadb_msg_t *)mp->b_rptr;
1588 	*newsamsg = *samsg;
1589 	newsamsg->sadb_msg_len = (uint16_t)SADB_8TO64(alloclen);
1590 
1591 	mutex_enter(&ipsa->ipsa_lock);	/* Since I'm grabbing SA fields... */
1592 
1593 	newsamsg->sadb_msg_satype = ipsa->ipsa_type;
1594 
1595 	assoc = (sadb_sa_t *)(newsamsg + 1);
1596 	assoc->sadb_sa_len = SADB_8TO64(sizeof (*assoc));
1597 	assoc->sadb_sa_exttype = SADB_EXT_SA;
1598 	assoc->sadb_sa_spi = ipsa->ipsa_spi;
1599 	assoc->sadb_sa_replay = ipsa->ipsa_replay_wsize;
1600 	assoc->sadb_sa_state = ipsa->ipsa_state;
1601 	assoc->sadb_sa_auth = ipsa->ipsa_auth_alg;
1602 	assoc->sadb_sa_encrypt = ipsa->ipsa_encr_alg;
1603 	assoc->sadb_sa_flags = ipsa->ipsa_flags;
1604 
1605 	lt = (sadb_lifetime_t *)(assoc + 1);
1606 	lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1607 	lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
1608 	/* We do not support the concept. */
1609 	lt->sadb_lifetime_allocations = 0;
1610 	lt->sadb_lifetime_bytes = ipsa->ipsa_bytes;
1611 	lt->sadb_lifetime_addtime = ipsa->ipsa_addtime;
1612 	lt->sadb_lifetime_usetime = ipsa->ipsa_usetime;
1613 
1614 	if (hard) {
1615 		lt++;
1616 		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1617 		lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
1618 		lt->sadb_lifetime_allocations = ipsa->ipsa_hardalloc;
1619 		lt->sadb_lifetime_bytes = ipsa->ipsa_hardbyteslt;
1620 		lt->sadb_lifetime_addtime = ipsa->ipsa_hardaddlt;
1621 		lt->sadb_lifetime_usetime = ipsa->ipsa_harduselt;
1622 	}
1623 
1624 	if (soft) {
1625 		lt++;
1626 		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1627 		lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
1628 		lt->sadb_lifetime_allocations = ipsa->ipsa_softalloc;
1629 		lt->sadb_lifetime_bytes = ipsa->ipsa_softbyteslt;
1630 		lt->sadb_lifetime_addtime = ipsa->ipsa_softaddlt;
1631 		lt->sadb_lifetime_usetime = ipsa->ipsa_softuselt;
1632 	}
1633 
1634 	if (idle) {
1635 		lt++;
1636 		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1637 		lt->sadb_lifetime_exttype = SADB_X_EXT_LIFETIME_IDLE;
1638 		lt->sadb_lifetime_addtime = ipsa->ipsa_idleaddlt;
1639 		lt->sadb_lifetime_usetime = ipsa->ipsa_idleuselt;
1640 	}
1641 
1642 	cur = (uint8_t *)(lt + 1);
1643 
1644 	/* NOTE:  Don't fill in ports here if we are a tunnel-mode SA. */
1645 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, fam,
1646 	    ipsa->ipsa_srcaddr, (!isrc && !idst) ? SA_SRCPORT(ipsa) : 0,
1647 	    SA_PROTO(ipsa), 0);
1648 	if (cur == NULL) {
1649 		freemsg(mp);
1650 		mp = NULL;
1651 		goto bail;
1652 	}
1653 
1654 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, fam,
1655 	    ipsa->ipsa_dstaddr, (!isrc && !idst) ? SA_DSTPORT(ipsa) : 0,
1656 	    SA_PROTO(ipsa), 0);
1657 	if (cur == NULL) {
1658 		freemsg(mp);
1659 		mp = NULL;
1660 		goto bail;
1661 	}
1662 
1663 	if (ipsa->ipsa_flags & IPSA_F_NATT_LOC) {
1664 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_NATT_LOC,
1665 		    fam, &ipsa->ipsa_natt_addr_loc, ipsa->ipsa_local_nat_port,
1666 		    IPPROTO_UDP, 0);
1667 		if (cur == NULL) {
1668 			freemsg(mp);
1669 			mp = NULL;
1670 			goto bail;
1671 		}
1672 	}
1673 
1674 	if (ipsa->ipsa_flags & IPSA_F_NATT_REM) {
1675 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_NATT_REM,
1676 		    fam, &ipsa->ipsa_natt_addr_rem, ipsa->ipsa_remote_nat_port,
1677 		    IPPROTO_UDP, 0);
1678 		if (cur == NULL) {
1679 			freemsg(mp);
1680 			mp = NULL;
1681 			goto bail;
1682 		}
1683 	}
1684 
1685 	/* If we are a tunnel-mode SA, fill in the inner-selectors. */
1686 	if (isrc) {
1687 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
1688 		    pfam, ipsa->ipsa_innersrc, SA_SRCPORT(ipsa),
1689 		    SA_IPROTO(ipsa), ipsa->ipsa_innersrcpfx);
1690 		if (cur == NULL) {
1691 			freemsg(mp);
1692 			mp = NULL;
1693 			goto bail;
1694 		}
1695 	}
1696 
1697 	if (idst) {
1698 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
1699 		    pfam, ipsa->ipsa_innerdst, SA_DSTPORT(ipsa),
1700 		    SA_IPROTO(ipsa), ipsa->ipsa_innerdstpfx);
1701 		if (cur == NULL) {
1702 			freemsg(mp);
1703 			mp = NULL;
1704 			goto bail;
1705 		}
1706 	}
1707 
1708 	if ((ipsa->ipsa_kmp != 0) || (ipsa->ipsa_kmc != 0)) {
1709 		cur = sadb_make_kmc_ext(cur, end,
1710 		    ipsa->ipsa_kmp, ipsa->ipsa_kmc);
1711 		if (cur == NULL) {
1712 			freemsg(mp);
1713 			mp = NULL;
1714 			goto bail;
1715 		}
1716 	}
1717 
1718 	walker = (sadb_ext_t *)cur;
1719 	if (auth) {
1720 		key = (sadb_key_t *)walker;
1721 		key->sadb_key_len = SADB_8TO64(authsize);
1722 		key->sadb_key_exttype = SADB_EXT_KEY_AUTH;
1723 		key->sadb_key_bits = ipsa->ipsa_authkeybits;
1724 		key->sadb_key_reserved = 0;
1725 		bcopy(ipsa->ipsa_authkey, key + 1, ipsa->ipsa_authkeylen);
1726 		walker = (sadb_ext_t *)((uint64_t *)walker +
1727 		    walker->sadb_ext_len);
1728 	}
1729 
1730 	if (encr) {
1731 		key = (sadb_key_t *)walker;
1732 		key->sadb_key_len = SADB_8TO64(encrsize);
1733 		key->sadb_key_exttype = SADB_EXT_KEY_ENCRYPT;
1734 		key->sadb_key_bits = ipsa->ipsa_encrkeybits;
1735 		key->sadb_key_reserved = 0;
1736 		bcopy(ipsa->ipsa_encrkey, key + 1, ipsa->ipsa_encrkeylen);
1737 		walker = (sadb_ext_t *)((uint64_t *)walker +
1738 		    walker->sadb_ext_len);
1739 	}
1740 
1741 	if (srcid) {
1742 		ident = (sadb_ident_t *)walker;
1743 		ident->sadb_ident_len = SADB_8TO64(srcidsize);
1744 		ident->sadb_ident_exttype = SADB_EXT_IDENTITY_SRC;
1745 		ident->sadb_ident_type = ipsa->ipsa_src_cid->ipsid_type;
1746 		ident->sadb_ident_id = 0;
1747 		ident->sadb_ident_reserved = 0;
1748 		(void) strcpy((char *)(ident + 1),
1749 		    ipsa->ipsa_src_cid->ipsid_cid);
1750 		walker = (sadb_ext_t *)((uint64_t *)walker +
1751 		    walker->sadb_ext_len);
1752 	}
1753 
1754 	if (dstid) {
1755 		ident = (sadb_ident_t *)walker;
1756 		ident->sadb_ident_len = SADB_8TO64(dstidsize);
1757 		ident->sadb_ident_exttype = SADB_EXT_IDENTITY_DST;
1758 		ident->sadb_ident_type = ipsa->ipsa_dst_cid->ipsid_type;
1759 		ident->sadb_ident_id = 0;
1760 		ident->sadb_ident_reserved = 0;
1761 		(void) strcpy((char *)(ident + 1),
1762 		    ipsa->ipsa_dst_cid->ipsid_cid);
1763 		walker = (sadb_ext_t *)((uint64_t *)walker +
1764 		    walker->sadb_ext_len);
1765 	}
1766 
1767 	if (sensinteg) {
1768 		sens = (sadb_sens_t *)walker;
1769 		sens->sadb_sens_len = SADB_8TO64(sizeof (sadb_sens_t *) +
1770 		    ipsa->ipsa_senslen + ipsa->ipsa_integlen);
1771 		sens->sadb_sens_dpd = ipsa->ipsa_dpd;
1772 		sens->sadb_sens_sens_level = ipsa->ipsa_senslevel;
1773 		sens->sadb_sens_integ_level = ipsa->ipsa_integlevel;
1774 		sens->sadb_sens_sens_len = SADB_8TO64(ipsa->ipsa_senslen);
1775 		sens->sadb_sens_integ_len = SADB_8TO64(ipsa->ipsa_integlen);
1776 		sens->sadb_sens_reserved = 0;
1777 		bitmap = (uint64_t *)(sens + 1);
1778 		if (ipsa->ipsa_sens != NULL) {
1779 			bcopy(ipsa->ipsa_sens, bitmap, ipsa->ipsa_senslen);
1780 			bitmap += sens->sadb_sens_sens_len;
1781 		}
1782 		if (ipsa->ipsa_integ != NULL)
1783 			bcopy(ipsa->ipsa_integ, bitmap, ipsa->ipsa_integlen);
1784 		walker = (sadb_ext_t *)((uint64_t *)walker +
1785 		    walker->sadb_ext_len);
1786 	}
1787 
1788 	if (paired) {
1789 		pair_ext = (sadb_x_pair_t *)walker;
1790 
1791 		pair_ext->sadb_x_pair_len = SADB_8TO64(sizeof (sadb_x_pair_t));
1792 		pair_ext->sadb_x_pair_exttype = SADB_X_EXT_PAIR;
1793 		pair_ext->sadb_x_pair_spi = otherspi;
1794 
1795 		walker = (sadb_ext_t *)((uint64_t *)walker +
1796 		    walker->sadb_ext_len);
1797 	}
1798 
1799 	if (ipsa->ipsa_replay != 0) {
1800 		repl_ctr = (sadb_x_replay_ctr_t *)walker;
1801 		repl_ctr->sadb_x_rc_len = SADB_8TO64(sizeof (*repl_ctr));
1802 		repl_ctr->sadb_x_rc_exttype = SADB_X_EXT_REPLAY_VALUE;
1803 		repl_ctr->sadb_x_rc_replay32 = ipsa->ipsa_replay;
1804 		repl_ctr->sadb_x_rc_replay64 = 0;
1805 		walker = (sadb_ext_t *)(repl_ctr + 1);
1806 	}
1807 
1808 bail:
1809 	/* Pardon any delays... */
1810 	mutex_exit(&ipsa->ipsa_lock);
1811 
1812 	return (mp);
1813 }
1814 
1815 /*
1816  * Strip out key headers or unmarked headers (SADB_EXT_KEY_*, SADB_EXT_UNKNOWN)
1817  * and adjust base message accordingly.
1818  *
1819  * Assume message is pulled up in one piece of contiguous memory.
1820  *
1821  * Say if we start off with:
1822  *
1823  * +------+----+-------------+-----------+---------------+---------------+
1824  * | base | SA | source addr | dest addr | rsrvd. or key | soft lifetime |
1825  * +------+----+-------------+-----------+---------------+---------------+
1826  *
1827  * we will end up with
1828  *
1829  * +------+----+-------------+-----------+---------------+
1830  * | base | SA | source addr | dest addr | soft lifetime |
1831  * +------+----+-------------+-----------+---------------+
1832  */
1833 static void
1834 sadb_strip(sadb_msg_t *samsg)
1835 {
1836 	sadb_ext_t *ext;
1837 	uint8_t *target = NULL;
1838 	uint8_t *msgend;
1839 	int sofar = SADB_8TO64(sizeof (*samsg));
1840 	int copylen;
1841 
1842 	ext = (sadb_ext_t *)(samsg + 1);
1843 	msgend = (uint8_t *)samsg;
1844 	msgend += SADB_64TO8(samsg->sadb_msg_len);
1845 	while ((uint8_t *)ext < msgend) {
1846 		if (ext->sadb_ext_type == SADB_EXT_RESERVED ||
1847 		    ext->sadb_ext_type == SADB_EXT_KEY_AUTH ||
1848 		    ext->sadb_ext_type == SADB_X_EXT_EDUMP ||
1849 		    ext->sadb_ext_type == SADB_EXT_KEY_ENCRYPT) {
1850 			/*
1851 			 * Aha!	 I found a header to be erased.
1852 			 */
1853 
1854 			if (target != NULL) {
1855 				/*
1856 				 * If I had a previous header to be erased,
1857 				 * copy over it.  I can get away with just
1858 				 * copying backwards because the target will
1859 				 * always be 8 bytes behind the source.
1860 				 */
1861 				copylen = ((uint8_t *)ext) - (target +
1862 				    SADB_64TO8(
1863 				    ((sadb_ext_t *)target)->sadb_ext_len));
1864 				ovbcopy(((uint8_t *)ext - copylen), target,
1865 				    copylen);
1866 				target += copylen;
1867 				((sadb_ext_t *)target)->sadb_ext_len =
1868 				    SADB_8TO64(((uint8_t *)ext) - target +
1869 				    SADB_64TO8(ext->sadb_ext_len));
1870 			} else {
1871 				target = (uint8_t *)ext;
1872 			}
1873 		} else {
1874 			sofar += ext->sadb_ext_len;
1875 		}
1876 
1877 		ext = (sadb_ext_t *)(((uint64_t *)ext) + ext->sadb_ext_len);
1878 	}
1879 
1880 	ASSERT((uint8_t *)ext == msgend);
1881 
1882 	if (target != NULL) {
1883 		copylen = ((uint8_t *)ext) - (target +
1884 		    SADB_64TO8(((sadb_ext_t *)target)->sadb_ext_len));
1885 		if (copylen != 0)
1886 			ovbcopy(((uint8_t *)ext - copylen), target, copylen);
1887 	}
1888 
1889 	/* Adjust samsg. */
1890 	samsg->sadb_msg_len = (uint16_t)sofar;
1891 
1892 	/* Assume all of the rest is cleared by caller in sadb_pfkey_echo(). */
1893 }
1894 
1895 /*
1896  * AH needs to send an error to PF_KEY.	 Assume mp points to an M_CTL
1897  * followed by an M_DATA with a PF_KEY message in it.  The serial of
1898  * the sending keysock instance is included.
1899  */
1900 void
1901 sadb_pfkey_error(queue_t *pfkey_q, mblk_t *mp, int error, int diagnostic,
1902     uint_t serial)
1903 {
1904 	mblk_t *msg = mp->b_cont;
1905 	sadb_msg_t *samsg;
1906 	keysock_out_t *kso;
1907 
1908 	/*
1909 	 * Enough functions call this to merit a NULL queue check.
1910 	 */
1911 	if (pfkey_q == NULL) {
1912 		freemsg(mp);
1913 		return;
1914 	}
1915 
1916 	ASSERT(msg != NULL);
1917 	ASSERT((mp->b_wptr - mp->b_rptr) == sizeof (ipsec_info_t));
1918 	ASSERT((msg->b_wptr - msg->b_rptr) >= sizeof (sadb_msg_t));
1919 	samsg = (sadb_msg_t *)msg->b_rptr;
1920 	kso = (keysock_out_t *)mp->b_rptr;
1921 
1922 	kso->ks_out_type = KEYSOCK_OUT;
1923 	kso->ks_out_len = sizeof (*kso);
1924 	kso->ks_out_serial = serial;
1925 
1926 	/*
1927 	 * Only send the base message up in the event of an error.
1928 	 * Don't worry about bzero()-ing, because it was probably bogus
1929 	 * anyway.
1930 	 */
1931 	msg->b_wptr = msg->b_rptr + sizeof (*samsg);
1932 	samsg = (sadb_msg_t *)msg->b_rptr;
1933 	samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
1934 	samsg->sadb_msg_errno = (uint8_t)error;
1935 	if (diagnostic != SADB_X_DIAGNOSTIC_PRESET)
1936 		samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
1937 
1938 	putnext(pfkey_q, mp);
1939 }
1940 
1941 /*
1942  * Send a successful return packet back to keysock via the queue in pfkey_q.
1943  *
1944  * Often, an SA is associated with the reply message, it's passed in if needed,
1945  * and NULL if not.  BTW, that ipsa will have its refcnt appropriately held,
1946  * and the caller will release said refcnt.
1947  */
1948 void
1949 sadb_pfkey_echo(queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg,
1950     keysock_in_t *ksi, ipsa_t *ipsa)
1951 {
1952 	keysock_out_t *kso;
1953 	mblk_t *mp1;
1954 	sadb_msg_t *newsamsg;
1955 	uint8_t *oldend;
1956 
1957 	ASSERT((mp->b_cont != NULL) &&
1958 	    ((void *)samsg == (void *)mp->b_cont->b_rptr) &&
1959 	    ((void *)mp->b_rptr == (void *)ksi));
1960 
1961 	switch (samsg->sadb_msg_type) {
1962 	case SADB_ADD:
1963 	case SADB_UPDATE:
1964 	case SADB_X_UPDATEPAIR:
1965 	case SADB_X_DELPAIR_STATE:
1966 	case SADB_FLUSH:
1967 	case SADB_DUMP:
1968 		/*
1969 		 * I have all of the message already.  I just need to strip
1970 		 * out the keying material and echo the message back.
1971 		 *
1972 		 * NOTE: for SADB_DUMP, the function sadb_dump() did the
1973 		 * work.  When DUMP reaches here, it should only be a base
1974 		 * message.
1975 		 */
1976 	justecho:
1977 		if (ksi->ks_in_extv[SADB_EXT_KEY_AUTH] != NULL ||
1978 		    ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT] != NULL ||
1979 		    ksi->ks_in_extv[SADB_X_EXT_EDUMP] != NULL) {
1980 			sadb_strip(samsg);
1981 			/* Assume PF_KEY message is contiguous. */
1982 			ASSERT(mp->b_cont->b_cont == NULL);
1983 			oldend = mp->b_cont->b_wptr;
1984 			mp->b_cont->b_wptr = mp->b_cont->b_rptr +
1985 			    SADB_64TO8(samsg->sadb_msg_len);
1986 			bzero(mp->b_cont->b_wptr, oldend - mp->b_cont->b_wptr);
1987 		}
1988 		break;
1989 	case SADB_GET:
1990 		/*
1991 		 * Do a lot of work here, because of the ipsa I just found.
1992 		 * First construct the new PF_KEY message, then abandon
1993 		 * the old one.
1994 		 */
1995 		mp1 = sadb_sa2msg(ipsa, samsg);
1996 		if (mp1 == NULL) {
1997 			sadb_pfkey_error(pfkey_q, mp, ENOMEM,
1998 			    SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
1999 			return;
2000 		}
2001 		freemsg(mp->b_cont);
2002 		mp->b_cont = mp1;
2003 		break;
2004 	case SADB_DELETE:
2005 	case SADB_X_DELPAIR:
2006 		if (ipsa == NULL)
2007 			goto justecho;
2008 		/*
2009 		 * Because listening KMds may require more info, treat
2010 		 * DELETE like a special case of GET.
2011 		 */
2012 		mp1 = sadb_sa2msg(ipsa, samsg);
2013 		if (mp1 == NULL) {
2014 			sadb_pfkey_error(pfkey_q, mp, ENOMEM,
2015 			    SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
2016 			return;
2017 		}
2018 		newsamsg = (sadb_msg_t *)mp1->b_rptr;
2019 		sadb_strip(newsamsg);
2020 		oldend = mp1->b_wptr;
2021 		mp1->b_wptr = mp1->b_rptr + SADB_64TO8(newsamsg->sadb_msg_len);
2022 		bzero(mp1->b_wptr, oldend - mp1->b_wptr);
2023 		freemsg(mp->b_cont);
2024 		mp->b_cont = mp1;
2025 		break;
2026 	default:
2027 		if (mp != NULL)
2028 			freemsg(mp);
2029 		return;
2030 	}
2031 
2032 	/* ksi is now null and void. */
2033 	kso = (keysock_out_t *)ksi;
2034 	kso->ks_out_type = KEYSOCK_OUT;
2035 	kso->ks_out_len = sizeof (*kso);
2036 	kso->ks_out_serial = ksi->ks_in_serial;
2037 	/* We're ready to send... */
2038 	putnext(pfkey_q, mp);
2039 }
2040 
2041 /*
2042  * Set up a global pfkey_q instance for AH, ESP, or some other consumer.
2043  */
2044 void
2045 sadb_keysock_hello(queue_t **pfkey_qp, queue_t *q, mblk_t *mp,
2046     void (*ager)(void *), void *agerarg, timeout_id_t *top, int satype)
2047 {
2048 	keysock_hello_ack_t *kha;
2049 	queue_t *oldq;
2050 
2051 	ASSERT(OTHERQ(q) != NULL);
2052 
2053 	/*
2054 	 * First, check atomically that I'm the first and only keysock
2055 	 * instance.
2056 	 *
2057 	 * Use OTHERQ(q), because qreply(q, mp) == putnext(OTHERQ(q), mp),
2058 	 * and I want this module to say putnext(*_pfkey_q, mp) for PF_KEY
2059 	 * messages.
2060 	 */
2061 
2062 	oldq = casptr((void **)pfkey_qp, NULL, OTHERQ(q));
2063 	if (oldq != NULL) {
2064 		ASSERT(oldq != q);
2065 		cmn_err(CE_WARN, "Danger!  Multiple keysocks on top of %s.\n",
2066 		    (satype == SADB_SATYPE_ESP)? "ESP" : "AH or other");
2067 		freemsg(mp);
2068 		return;
2069 	}
2070 
2071 	kha = (keysock_hello_ack_t *)mp->b_rptr;
2072 	kha->ks_hello_len = sizeof (keysock_hello_ack_t);
2073 	kha->ks_hello_type = KEYSOCK_HELLO_ACK;
2074 	kha->ks_hello_satype = (uint8_t)satype;
2075 
2076 	/*
2077 	 * If we made it past the casptr, then we have "exclusive" access
2078 	 * to the timeout handle.  Fire it off in 4 seconds, because it
2079 	 * just seems like a good interval.
2080 	 */
2081 	*top = qtimeout(*pfkey_qp, ager, agerarg, drv_usectohz(4000000));
2082 
2083 	putnext(*pfkey_qp, mp);
2084 }
2085 
2086 /*
2087  * Normalize IPv4-mapped IPv6 addresses (and prefixes) as appropriate.
2088  *
2089  * Check addresses themselves for wildcard or multicast.
2090  * Check ire table for local/non-local/broadcast.
2091  */
2092 int
2093 sadb_addrcheck(queue_t *pfkey_q, mblk_t *mp, sadb_ext_t *ext, uint_t serial,
2094     netstack_t *ns)
2095 {
2096 	sadb_address_t *addr = (sadb_address_t *)ext;
2097 	struct sockaddr_in *sin;
2098 	struct sockaddr_in6 *sin6;
2099 	ire_t *ire;
2100 	int diagnostic, type;
2101 	boolean_t normalized = B_FALSE;
2102 
2103 	ASSERT(ext != NULL);
2104 	ASSERT((ext->sadb_ext_type == SADB_EXT_ADDRESS_SRC) ||
2105 	    (ext->sadb_ext_type == SADB_EXT_ADDRESS_DST) ||
2106 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC) ||
2107 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_DST) ||
2108 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_LOC) ||
2109 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_REM));
2110 
2111 	/* Assign both sockaddrs, the compiler will do the right thing. */
2112 	sin = (struct sockaddr_in *)(addr + 1);
2113 	sin6 = (struct sockaddr_in6 *)(addr + 1);
2114 
2115 	if (sin6->sin6_family == AF_INET6) {
2116 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
2117 			/*
2118 			 * Convert to an AF_INET sockaddr.  This means the
2119 			 * return messages will have the extra space, but have
2120 			 * AF_INET sockaddrs instead of AF_INET6.
2121 			 *
2122 			 * Yes, RFC 2367 isn't clear on what to do here w.r.t.
2123 			 * mapped addresses, but since AF_INET6 ::ffff:<v4> is
2124 			 * equal to AF_INET <v4>, it shouldnt be a huge
2125 			 * problem.
2126 			 */
2127 			sin->sin_family = AF_INET;
2128 			IN6_V4MAPPED_TO_INADDR(&sin6->sin6_addr,
2129 			    &sin->sin_addr);
2130 			bzero(&sin->sin_zero, sizeof (sin->sin_zero));
2131 			normalized = B_TRUE;
2132 		}
2133 	} else if (sin->sin_family != AF_INET) {
2134 		switch (ext->sadb_ext_type) {
2135 		case SADB_EXT_ADDRESS_SRC:
2136 			diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC_AF;
2137 			break;
2138 		case SADB_EXT_ADDRESS_DST:
2139 			diagnostic = SADB_X_DIAGNOSTIC_BAD_DST_AF;
2140 			break;
2141 		case SADB_X_EXT_ADDRESS_INNER_SRC:
2142 			diagnostic = SADB_X_DIAGNOSTIC_BAD_PROXY_AF;
2143 			break;
2144 		case SADB_X_EXT_ADDRESS_INNER_DST:
2145 			diagnostic = SADB_X_DIAGNOSTIC_BAD_INNER_DST_AF;
2146 			break;
2147 		case SADB_X_EXT_ADDRESS_NATT_LOC:
2148 			diagnostic = SADB_X_DIAGNOSTIC_BAD_NATT_LOC_AF;
2149 			break;
2150 		case SADB_X_EXT_ADDRESS_NATT_REM:
2151 			diagnostic = SADB_X_DIAGNOSTIC_BAD_NATT_REM_AF;
2152 			break;
2153 			/* There is no default, see above ASSERT. */
2154 		}
2155 bail:
2156 		if (pfkey_q != NULL) {
2157 			sadb_pfkey_error(pfkey_q, mp, EINVAL, diagnostic,
2158 			    serial);
2159 		} else {
2160 			/*
2161 			 * Scribble in sadb_msg that we got passed in.
2162 			 * Overload "mp" to be an sadb_msg pointer.
2163 			 */
2164 			sadb_msg_t *samsg = (sadb_msg_t *)mp;
2165 
2166 			samsg->sadb_msg_errno = EINVAL;
2167 			samsg->sadb_x_msg_diagnostic = diagnostic;
2168 		}
2169 		return (KS_IN_ADDR_UNKNOWN);
2170 	}
2171 
2172 	if (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC ||
2173 	    ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_DST) {
2174 		/*
2175 		 * We need only check for prefix issues.
2176 		 */
2177 
2178 		/* Set diagnostic now, in case we need it later. */
2179 		diagnostic =
2180 		    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC) ?
2181 		    SADB_X_DIAGNOSTIC_PREFIX_INNER_SRC :
2182 		    SADB_X_DIAGNOSTIC_PREFIX_INNER_DST;
2183 
2184 		if (normalized)
2185 			addr->sadb_address_prefixlen -= 96;
2186 
2187 		/*
2188 		 * Verify and mask out inner-addresses based on prefix length.
2189 		 */
2190 		if (sin->sin_family == AF_INET) {
2191 			if (addr->sadb_address_prefixlen > 32)
2192 				goto bail;
2193 			sin->sin_addr.s_addr &=
2194 			    ip_plen_to_mask(addr->sadb_address_prefixlen);
2195 		} else {
2196 			in6_addr_t mask;
2197 
2198 			ASSERT(sin->sin_family == AF_INET6);
2199 			/*
2200 			 * ip_plen_to_mask_v6() returns NULL if the value in
2201 			 * question is out of range.
2202 			 */
2203 			if (ip_plen_to_mask_v6(addr->sadb_address_prefixlen,
2204 			    &mask) == NULL)
2205 				goto bail;
2206 			sin6->sin6_addr.s6_addr32[0] &= mask.s6_addr32[0];
2207 			sin6->sin6_addr.s6_addr32[1] &= mask.s6_addr32[1];
2208 			sin6->sin6_addr.s6_addr32[2] &= mask.s6_addr32[2];
2209 			sin6->sin6_addr.s6_addr32[3] &= mask.s6_addr32[3];
2210 		}
2211 
2212 		/* We don't care in these cases. */
2213 		return (KS_IN_ADDR_DONTCARE);
2214 	}
2215 
2216 	if (sin->sin_family == AF_INET6) {
2217 		/* Check the easy ones now. */
2218 		if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
2219 			return (KS_IN_ADDR_MBCAST);
2220 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
2221 			return (KS_IN_ADDR_UNSPEC);
2222 		/*
2223 		 * At this point, we're a unicast IPv6 address.
2224 		 *
2225 		 * A ctable lookup for local is sufficient here.  If we're
2226 		 * local, return KS_IN_ADDR_ME, otherwise KS_IN_ADDR_NOTME.
2227 		 *
2228 		 * XXX Zones alert -> me/notme decision needs to be tempered
2229 		 * by what zone we're in when we go to zone-aware IPsec.
2230 		 */
2231 		ire = ire_ctable_lookup_v6(&sin6->sin6_addr, NULL,
2232 		    IRE_LOCAL, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE,
2233 		    ns->netstack_ip);
2234 		if (ire != NULL) {
2235 			/* Hey hey, it's local. */
2236 			IRE_REFRELE(ire);
2237 			return (KS_IN_ADDR_ME);
2238 		}
2239 	} else {
2240 		ASSERT(sin->sin_family == AF_INET);
2241 		if (sin->sin_addr.s_addr == INADDR_ANY)
2242 			return (KS_IN_ADDR_UNSPEC);
2243 		if (CLASSD(sin->sin_addr.s_addr))
2244 			return (KS_IN_ADDR_MBCAST);
2245 		/*
2246 		 * At this point we're a unicast or broadcast IPv4 address.
2247 		 *
2248 		 * Lookup on the ctable for IRE_BROADCAST or IRE_LOCAL.
2249 		 * A NULL return value is NOTME, otherwise, look at the
2250 		 * returned ire for broadcast or not and return accordingly.
2251 		 *
2252 		 * XXX Zones alert -> me/notme decision needs to be tempered
2253 		 * by what zone we're in when we go to zone-aware IPsec.
2254 		 */
2255 		ire = ire_ctable_lookup(sin->sin_addr.s_addr, 0,
2256 		    IRE_LOCAL | IRE_BROADCAST, NULL, ALL_ZONES, NULL,
2257 		    MATCH_IRE_TYPE, ns->netstack_ip);
2258 		if (ire != NULL) {
2259 			/* Check for local or broadcast */
2260 			type = ire->ire_type;
2261 			IRE_REFRELE(ire);
2262 			ASSERT(type == IRE_LOCAL || type == IRE_BROADCAST);
2263 			return ((type == IRE_LOCAL) ? KS_IN_ADDR_ME :
2264 			    KS_IN_ADDR_MBCAST);
2265 		}
2266 	}
2267 
2268 	return (KS_IN_ADDR_NOTME);
2269 }
2270 
2271 /*
2272  * Address normalizations and reality checks for inbound PF_KEY messages.
2273  *
2274  * For the case of src == unspecified AF_INET6, and dst == AF_INET, convert
2275  * the source to AF_INET.  Do the same for the inner sources.
2276  */
2277 boolean_t
2278 sadb_addrfix(keysock_in_t *ksi, queue_t *pfkey_q, mblk_t *mp, netstack_t *ns)
2279 {
2280 	struct sockaddr_in *src, *isrc;
2281 	struct sockaddr_in6 *dst, *idst;
2282 	sadb_address_t *srcext, *dstext;
2283 	uint16_t sport;
2284 	sadb_ext_t **extv = ksi->ks_in_extv;
2285 	int rc;
2286 
2287 	if (extv[SADB_EXT_ADDRESS_SRC] != NULL) {
2288 		rc = sadb_addrcheck(pfkey_q, mp, extv[SADB_EXT_ADDRESS_SRC],
2289 		    ksi->ks_in_serial, ns);
2290 		if (rc == KS_IN_ADDR_UNKNOWN)
2291 			return (B_FALSE);
2292 		if (rc == KS_IN_ADDR_MBCAST) {
2293 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2294 			    SADB_X_DIAGNOSTIC_BAD_SRC, ksi->ks_in_serial);
2295 			return (B_FALSE);
2296 		}
2297 		ksi->ks_in_srctype = rc;
2298 	}
2299 
2300 	if (extv[SADB_EXT_ADDRESS_DST] != NULL) {
2301 		rc = sadb_addrcheck(pfkey_q, mp, extv[SADB_EXT_ADDRESS_DST],
2302 		    ksi->ks_in_serial, ns);
2303 		if (rc == KS_IN_ADDR_UNKNOWN)
2304 			return (B_FALSE);
2305 		if (rc == KS_IN_ADDR_UNSPEC) {
2306 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2307 			    SADB_X_DIAGNOSTIC_BAD_DST, ksi->ks_in_serial);
2308 			return (B_FALSE);
2309 		}
2310 		ksi->ks_in_dsttype = rc;
2311 	}
2312 
2313 	/*
2314 	 * NAT-Traversal addrs are simple enough to not require all of
2315 	 * the checks in sadb_addrcheck().  Just normalize or reject if not
2316 	 * AF_INET.
2317 	 */
2318 	if (extv[SADB_X_EXT_ADDRESS_NATT_LOC] != NULL) {
2319 		rc = sadb_addrcheck(pfkey_q, mp,
2320 		    extv[SADB_X_EXT_ADDRESS_NATT_LOC], ksi->ks_in_serial, ns);
2321 
2322 		/*
2323 		 * Local NAT-T addresses never use an IRE_LOCAL, so it should
2324 		 * always be NOTME, or UNSPEC (to handle both tunnel mode
2325 		 * AND local-port flexibility).
2326 		 */
2327 		if (rc != KS_IN_ADDR_NOTME && rc != KS_IN_ADDR_UNSPEC) {
2328 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2329 			    SADB_X_DIAGNOSTIC_MALFORMED_NATT_LOC,
2330 			    ksi->ks_in_serial);
2331 			return (B_FALSE);
2332 		}
2333 		src = (struct sockaddr_in *)
2334 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_NATT_LOC]) + 1);
2335 		if (src->sin_family != AF_INET) {
2336 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2337 			    SADB_X_DIAGNOSTIC_BAD_NATT_LOC_AF,
2338 			    ksi->ks_in_serial);
2339 			return (B_FALSE);
2340 		}
2341 	}
2342 
2343 	if (extv[SADB_X_EXT_ADDRESS_NATT_REM] != NULL) {
2344 		rc = sadb_addrcheck(pfkey_q, mp,
2345 		    extv[SADB_X_EXT_ADDRESS_NATT_REM], ksi->ks_in_serial, ns);
2346 
2347 		/*
2348 		 * Remote NAT-T addresses never use an IRE_LOCAL, so it should
2349 		 * always be NOTME, or UNSPEC if it's a tunnel-mode SA.
2350 		 */
2351 		if (rc != KS_IN_ADDR_NOTME &&
2352 		    !(extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL &&
2353 		    rc == KS_IN_ADDR_UNSPEC)) {
2354 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2355 			    SADB_X_DIAGNOSTIC_MALFORMED_NATT_REM,
2356 			    ksi->ks_in_serial);
2357 			return (B_FALSE);
2358 		}
2359 		src = (struct sockaddr_in *)
2360 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_NATT_REM]) + 1);
2361 		if (src->sin_family != AF_INET) {
2362 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2363 			    SADB_X_DIAGNOSTIC_BAD_NATT_REM_AF,
2364 			    ksi->ks_in_serial);
2365 			return (B_FALSE);
2366 		}
2367 	}
2368 
2369 	if (extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL) {
2370 		if (extv[SADB_X_EXT_ADDRESS_INNER_DST] == NULL) {
2371 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2372 			    SADB_X_DIAGNOSTIC_MISSING_INNER_DST,
2373 			    ksi->ks_in_serial);
2374 			return (B_FALSE);
2375 		}
2376 
2377 		if (sadb_addrcheck(pfkey_q, mp,
2378 		    extv[SADB_X_EXT_ADDRESS_INNER_DST], ksi->ks_in_serial, ns)
2379 		    == KS_IN_ADDR_UNKNOWN ||
2380 		    sadb_addrcheck(pfkey_q, mp,
2381 		    extv[SADB_X_EXT_ADDRESS_INNER_SRC], ksi->ks_in_serial, ns)
2382 		    == KS_IN_ADDR_UNKNOWN)
2383 			return (B_FALSE);
2384 
2385 		isrc = (struct sockaddr_in *)
2386 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_SRC]) +
2387 		    1);
2388 		idst = (struct sockaddr_in6 *)
2389 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_DST]) +
2390 		    1);
2391 		if (isrc->sin_family != idst->sin6_family) {
2392 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2393 			    SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH,
2394 			    ksi->ks_in_serial);
2395 			return (B_FALSE);
2396 		}
2397 	} else if (extv[SADB_X_EXT_ADDRESS_INNER_DST] != NULL) {
2398 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2399 			    SADB_X_DIAGNOSTIC_MISSING_INNER_SRC,
2400 			    ksi->ks_in_serial);
2401 			return (B_FALSE);
2402 	} else {
2403 		isrc = NULL;	/* For inner/outer port check below. */
2404 	}
2405 
2406 	dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST];
2407 	srcext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_SRC];
2408 
2409 	if (dstext == NULL || srcext == NULL)
2410 		return (B_TRUE);
2411 
2412 	dst = (struct sockaddr_in6 *)(dstext + 1);
2413 	src = (struct sockaddr_in *)(srcext + 1);
2414 
2415 	if (isrc != NULL &&
2416 	    (isrc->sin_port != 0 || idst->sin6_port != 0) &&
2417 	    (src->sin_port != 0 || dst->sin6_port != 0)) {
2418 		/* Can't set inner and outer ports in one SA. */
2419 		sadb_pfkey_error(pfkey_q, mp, EINVAL,
2420 		    SADB_X_DIAGNOSTIC_DUAL_PORT_SETS,
2421 		    ksi->ks_in_serial);
2422 		return (B_FALSE);
2423 	}
2424 
2425 	if (dst->sin6_family == src->sin_family)
2426 		return (B_TRUE);
2427 
2428 	if (srcext->sadb_address_proto != dstext->sadb_address_proto) {
2429 		if (srcext->sadb_address_proto == 0) {
2430 			srcext->sadb_address_proto = dstext->sadb_address_proto;
2431 		} else if (dstext->sadb_address_proto == 0) {
2432 			dstext->sadb_address_proto = srcext->sadb_address_proto;
2433 		} else {
2434 			/* Inequal protocols, neither were 0.  Report error. */
2435 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2436 			    SADB_X_DIAGNOSTIC_PROTO_MISMATCH,
2437 			    ksi->ks_in_serial);
2438 			return (B_FALSE);
2439 		}
2440 	}
2441 
2442 	/*
2443 	 * With the exception of an unspec IPv6 source and an IPv4
2444 	 * destination, address families MUST me matched.
2445 	 */
2446 	if (src->sin_family == AF_INET ||
2447 	    ksi->ks_in_srctype != KS_IN_ADDR_UNSPEC) {
2448 		sadb_pfkey_error(pfkey_q, mp, EINVAL,
2449 		    SADB_X_DIAGNOSTIC_AF_MISMATCH, ksi->ks_in_serial);
2450 		return (B_FALSE);
2451 	}
2452 
2453 	/*
2454 	 * Convert "src" to AF_INET INADDR_ANY.  We rely on sin_port being
2455 	 * in the same place for sockaddr_in and sockaddr_in6.
2456 	 */
2457 	sport = src->sin_port;
2458 	bzero(src, sizeof (*src));
2459 	src->sin_family = AF_INET;
2460 	src->sin_port = sport;
2461 
2462 	return (B_TRUE);
2463 }
2464 
2465 /*
2466  * Set the results in "addrtype", given an IRE as requested by
2467  * sadb_addrcheck().
2468  */
2469 int
2470 sadb_addrset(ire_t *ire)
2471 {
2472 	if ((ire->ire_type & IRE_BROADCAST) ||
2473 	    (ire->ire_ipversion == IPV4_VERSION && CLASSD(ire->ire_addr)) ||
2474 	    (ire->ire_ipversion == IPV6_VERSION &&
2475 	    IN6_IS_ADDR_MULTICAST(&(ire->ire_addr_v6))))
2476 		return (KS_IN_ADDR_MBCAST);
2477 	if (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK))
2478 		return (KS_IN_ADDR_ME);
2479 	return (KS_IN_ADDR_NOTME);
2480 }
2481 
2482 
2483 /*
2484  * Walker callback function to delete sa's based on src/dst address.
2485  * Assumes that we're called with *head locked, no other locks held;
2486  * Conveniently, and not coincidentally, this is both what sadb_walker
2487  * gives us and also what sadb_unlinkassoc expects.
2488  */
2489 
2490 struct sadb_purge_state
2491 {
2492 	uint32_t *src;
2493 	uint32_t *dst;
2494 	sa_family_t af;
2495 	boolean_t inbnd;
2496 	char *sidstr;
2497 	char *didstr;
2498 	uint16_t sidtype;
2499 	uint16_t didtype;
2500 	uint32_t kmproto;
2501 	uint8_t sadb_sa_state;
2502 	mblk_t *mq;
2503 	sadb_t *sp;
2504 };
2505 
2506 static void
2507 sadb_purge_cb(isaf_t *head, ipsa_t *entry, void *cookie)
2508 {
2509 	struct sadb_purge_state *ps = (struct sadb_purge_state *)cookie;
2510 
2511 	ASSERT(MUTEX_HELD(&head->isaf_lock));
2512 
2513 	mutex_enter(&entry->ipsa_lock);
2514 
2515 	if ((entry->ipsa_state == IPSA_STATE_LARVAL) ||
2516 	    (ps->src != NULL &&
2517 	    !IPSA_ARE_ADDR_EQUAL(entry->ipsa_srcaddr, ps->src, ps->af)) ||
2518 	    (ps->dst != NULL &&
2519 	    !IPSA_ARE_ADDR_EQUAL(entry->ipsa_dstaddr, ps->dst, ps->af)) ||
2520 	    (ps->didstr != NULL && (entry->ipsa_dst_cid != NULL) &&
2521 	    !(ps->didtype == entry->ipsa_dst_cid->ipsid_type &&
2522 	    strcmp(ps->didstr, entry->ipsa_dst_cid->ipsid_cid) == 0)) ||
2523 	    (ps->sidstr != NULL && (entry->ipsa_src_cid != NULL) &&
2524 	    !(ps->sidtype == entry->ipsa_src_cid->ipsid_type &&
2525 	    strcmp(ps->sidstr, entry->ipsa_src_cid->ipsid_cid) == 0)) ||
2526 	    (ps->kmproto <= SADB_X_KMP_MAX && ps->kmproto != entry->ipsa_kmp)) {
2527 		mutex_exit(&entry->ipsa_lock);
2528 		return;
2529 	}
2530 
2531 	if (ps->inbnd) {
2532 		sadb_delete_cluster(entry);
2533 	}
2534 	entry->ipsa_state = IPSA_STATE_DEAD;
2535 	(void) sadb_torch_assoc(head, entry, ps->inbnd, &ps->mq);
2536 }
2537 
2538 /*
2539  * Common code to purge an SA with a matching src or dst address.
2540  * Don't kill larval SA's in such a purge.
2541  */
2542 int
2543 sadb_purge_sa(mblk_t *mp, keysock_in_t *ksi, sadb_t *sp, queue_t *pfkey_q,
2544     queue_t *ip_q)
2545 {
2546 	sadb_address_t *dstext =
2547 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2548 	sadb_address_t *srcext =
2549 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2550 	sadb_ident_t *dstid =
2551 	    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_DST];
2552 	sadb_ident_t *srcid =
2553 	    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC];
2554 	sadb_x_kmc_t *kmc =
2555 	    (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
2556 	struct sockaddr_in *src, *dst;
2557 	struct sockaddr_in6 *src6, *dst6;
2558 	struct sadb_purge_state ps;
2559 
2560 	/*
2561 	 * Don't worry about IPv6 v4-mapped addresses, sadb_addrcheck()
2562 	 * takes care of them.
2563 	 */
2564 
2565 	/* enforced by caller */
2566 	ASSERT((dstext != NULL) || (srcext != NULL));
2567 
2568 	ps.src = NULL;
2569 	ps.dst = NULL;
2570 #ifdef DEBUG
2571 	ps.af = (sa_family_t)-1;
2572 #endif
2573 	ps.mq = NULL;
2574 	ps.sidstr = NULL;
2575 	ps.didstr = NULL;
2576 	ps.kmproto = SADB_X_KMP_MAX + 1;
2577 
2578 	if (dstext != NULL) {
2579 		dst = (struct sockaddr_in *)(dstext + 1);
2580 		ps.af = dst->sin_family;
2581 		if (dst->sin_family == AF_INET6) {
2582 			dst6 = (struct sockaddr_in6 *)dst;
2583 			ps.dst = (uint32_t *)&dst6->sin6_addr;
2584 		} else {
2585 			ps.dst = (uint32_t *)&dst->sin_addr;
2586 		}
2587 	}
2588 
2589 	if (srcext != NULL) {
2590 		src = (struct sockaddr_in *)(srcext + 1);
2591 		ps.af = src->sin_family;
2592 		if (src->sin_family == AF_INET6) {
2593 			src6 = (struct sockaddr_in6 *)(srcext + 1);
2594 			ps.src = (uint32_t *)&src6->sin6_addr;
2595 		} else {
2596 			ps.src = (uint32_t *)&src->sin_addr;
2597 		}
2598 		ASSERT(dstext == NULL || src->sin_family == dst->sin_family);
2599 	}
2600 
2601 	ASSERT(ps.af != (sa_family_t)-1);
2602 
2603 	if (dstid != NULL) {
2604 		/*
2605 		 * NOTE:  May need to copy string in the future
2606 		 * if the inbound keysock message disappears for some strange
2607 		 * reason.
2608 		 */
2609 		ps.didstr = (char *)(dstid + 1);
2610 		ps.didtype = dstid->sadb_ident_type;
2611 	}
2612 
2613 	if (srcid != NULL) {
2614 		/*
2615 		 * NOTE:  May need to copy string in the future
2616 		 * if the inbound keysock message disappears for some strange
2617 		 * reason.
2618 		 */
2619 		ps.sidstr = (char *)(srcid + 1);
2620 		ps.sidtype = srcid->sadb_ident_type;
2621 	}
2622 
2623 	if (kmc != NULL)
2624 		ps.kmproto = kmc->sadb_x_kmc_proto;
2625 
2626 	/*
2627 	 * This is simple, crude, and effective.
2628 	 * Unimplemented optimizations (TBD):
2629 	 * - we can limit how many places we search based on where we
2630 	 * think the SA is filed.
2631 	 * - if we get a dst address, we can hash based on dst addr to find
2632 	 * the correct bucket in the outbound table.
2633 	 */
2634 	ps.inbnd = B_TRUE;
2635 	sadb_walker(sp->sdb_if, sp->sdb_hashsize, sadb_purge_cb, &ps);
2636 	ps.inbnd = B_FALSE;
2637 	sadb_walker(sp->sdb_of, sp->sdb_hashsize, sadb_purge_cb, &ps);
2638 
2639 	if (ps.mq != NULL)
2640 		sadb_drain_torchq(ip_q, ps.mq);
2641 
2642 	ASSERT(mp->b_cont != NULL);
2643 	sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi,
2644 	    NULL);
2645 	return (0);
2646 }
2647 
2648 static void
2649 sadb_delpair_state(isaf_t *head, ipsa_t *entry, void *cookie)
2650 {
2651 	struct sadb_purge_state *ps = (struct sadb_purge_state *)cookie;
2652 	isaf_t  *inbound_bucket;
2653 	ipsa_t *peer_assoc;
2654 
2655 	ASSERT(MUTEX_HELD(&head->isaf_lock));
2656 
2657 	mutex_enter(&entry->ipsa_lock);
2658 
2659 	if ((entry->ipsa_state != ps->sadb_sa_state) ||
2660 	    ((ps->src != NULL) &&
2661 	    !IPSA_ARE_ADDR_EQUAL(entry->ipsa_srcaddr, ps->src, ps->af))) {
2662 		mutex_exit(&entry->ipsa_lock);
2663 		return;
2664 	}
2665 
2666 	/*
2667 	 * The isaf_t *, which is passed in , is always an outbound bucket,
2668 	 * and we are preserving the outbound-then-inbound hash-bucket lock
2669 	 * ordering. The sadb_walker() which triggers this function is called
2670 	 * only on the outbound fanout, and the corresponding inbound bucket
2671 	 * lock is safe to acquire here.
2672 	 */
2673 
2674 	if (entry->ipsa_haspeer) {
2675 		inbound_bucket = INBOUND_BUCKET(ps->sp, entry->ipsa_spi);
2676 		mutex_enter(&inbound_bucket->isaf_lock);
2677 		peer_assoc = ipsec_getassocbyspi(inbound_bucket,
2678 		    entry->ipsa_spi, entry->ipsa_srcaddr,
2679 		    entry->ipsa_dstaddr, entry->ipsa_addrfam);
2680 	} else {
2681 		inbound_bucket = INBOUND_BUCKET(ps->sp, entry->ipsa_otherspi);
2682 		mutex_enter(&inbound_bucket->isaf_lock);
2683 		peer_assoc = ipsec_getassocbyspi(inbound_bucket,
2684 		    entry->ipsa_otherspi, entry->ipsa_dstaddr,
2685 		    entry->ipsa_srcaddr, entry->ipsa_addrfam);
2686 	}
2687 
2688 	entry->ipsa_state = IPSA_STATE_DEAD;
2689 	(void) sadb_torch_assoc(head, entry, B_FALSE, &ps->mq);
2690 	if (peer_assoc != NULL) {
2691 		mutex_enter(&peer_assoc->ipsa_lock);
2692 		peer_assoc->ipsa_state = IPSA_STATE_DEAD;
2693 		(void) sadb_torch_assoc(inbound_bucket, peer_assoc,
2694 		    B_FALSE, &ps->mq);
2695 	}
2696 	mutex_exit(&inbound_bucket->isaf_lock);
2697 }
2698 
2699 /*
2700  * Common code to delete/get an SA.
2701  */
2702 int
2703 sadb_delget_sa(mblk_t *mp, keysock_in_t *ksi, sadbp_t *spp,
2704     int *diagnostic, queue_t *pfkey_q, uint8_t sadb_msg_type)
2705 {
2706 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
2707 	sadb_address_t *srcext =
2708 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2709 	sadb_address_t *dstext =
2710 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2711 	ipsa_t *echo_target = NULL;
2712 	ipsap_t *ipsapp;
2713 	mblk_t *torchq = NULL;
2714 	uint_t	error = 0;
2715 
2716 	if (assoc == NULL) {
2717 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
2718 		return (EINVAL);
2719 	}
2720 
2721 	if (sadb_msg_type == SADB_X_DELPAIR_STATE) {
2722 		struct sockaddr_in *src;
2723 		struct sockaddr_in6 *src6;
2724 		struct sadb_purge_state ps;
2725 
2726 		if (srcext == NULL) {
2727 			*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
2728 			return (EINVAL);
2729 		}
2730 		ps.src = NULL;
2731 		ps.mq = NULL;
2732 		src = (struct sockaddr_in *)(srcext + 1);
2733 		ps.af = src->sin_family;
2734 		if (src->sin_family == AF_INET6) {
2735 			src6 = (struct sockaddr_in6 *)(srcext + 1);
2736 			ps.src = (uint32_t *)&src6->sin6_addr;
2737 			ps.sp = &spp->s_v6;
2738 		} else {
2739 			ps.src = (uint32_t *)&src->sin_addr;
2740 			ps.sp = &spp->s_v4;
2741 		}
2742 		ps.inbnd = B_FALSE;
2743 		ps.sadb_sa_state = assoc->sadb_sa_state;
2744 		sadb_walker(ps.sp->sdb_of, ps.sp->sdb_hashsize,
2745 		    sadb_delpair_state, &ps);
2746 
2747 		if (ps.mq != NULL)
2748 			sadb_drain_torchq(pfkey_q, ps.mq);
2749 
2750 		ASSERT(mp->b_cont != NULL);
2751 		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
2752 		    ksi, NULL);
2753 		return (0);
2754 	}
2755 
2756 	if (dstext == NULL) {
2757 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
2758 		return (EINVAL);
2759 	}
2760 
2761 	ipsapp = get_ipsa_pair(assoc, srcext, dstext, spp);
2762 	if (ipsapp == NULL) {
2763 		*diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND;
2764 		return (ESRCH);
2765 	}
2766 
2767 	echo_target = ipsapp->ipsap_sa_ptr;
2768 	if (echo_target == NULL)
2769 		echo_target = ipsapp->ipsap_psa_ptr;
2770 
2771 	if (sadb_msg_type == SADB_DELETE || sadb_msg_type == SADB_X_DELPAIR) {
2772 		/*
2773 		 * Bucket locks will be required if SA is actually unlinked.
2774 		 * get_ipsa_pair() returns valid hash bucket pointers even
2775 		 * if it can't find a pair SA pointer.
2776 		 */
2777 		mutex_enter(&ipsapp->ipsap_bucket->isaf_lock);
2778 		mutex_enter(&ipsapp->ipsap_pbucket->isaf_lock);
2779 
2780 		if (ipsapp->ipsap_sa_ptr != NULL) {
2781 			mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2782 			if (ipsapp->ipsap_sa_ptr->ipsa_flags & IPSA_F_INBOUND) {
2783 				sadb_delete_cluster(ipsapp->ipsap_sa_ptr);
2784 			}
2785 			ipsapp->ipsap_sa_ptr->ipsa_state = IPSA_STATE_DEAD;
2786 			(void) sadb_torch_assoc(ipsapp->ipsap_bucket,
2787 			    ipsapp->ipsap_sa_ptr, B_FALSE, &torchq);
2788 			/*
2789 			 * sadb_torch_assoc() releases the ipsa_lock
2790 			 * and calls sadb_unlinkassoc() which does a
2791 			 * IPSA_REFRELE.
2792 			 */
2793 		}
2794 		if (ipsapp->ipsap_psa_ptr != NULL) {
2795 			mutex_enter(&ipsapp->ipsap_psa_ptr->ipsa_lock);
2796 			if (sadb_msg_type == SADB_X_DELPAIR) {
2797 				if (ipsapp->ipsap_psa_ptr->ipsa_flags &
2798 				    IPSA_F_INBOUND) {
2799 					sadb_delete_cluster(
2800 					    ipsapp->ipsap_psa_ptr);
2801 				}
2802 				ipsapp->ipsap_psa_ptr->ipsa_state =
2803 				    IPSA_STATE_DEAD;
2804 				(void) sadb_torch_assoc(ipsapp->ipsap_pbucket,
2805 				    ipsapp->ipsap_psa_ptr, B_FALSE, &torchq);
2806 			} else {
2807 				/*
2808 				 * Only half of the "pair" has been deleted.
2809 				 * Update the remaining SA and remove references
2810 				 * to its pair SA, which is now gone.
2811 				 */
2812 				ipsapp->ipsap_psa_ptr->ipsa_otherspi = 0;
2813 				ipsapp->ipsap_psa_ptr->ipsa_flags &=
2814 				    ~IPSA_F_PAIRED;
2815 				mutex_exit(&ipsapp->ipsap_psa_ptr->ipsa_lock);
2816 			}
2817 		} else if (sadb_msg_type == SADB_X_DELPAIR) {
2818 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
2819 			error = ESRCH;
2820 		}
2821 		mutex_exit(&ipsapp->ipsap_bucket->isaf_lock);
2822 		mutex_exit(&ipsapp->ipsap_pbucket->isaf_lock);
2823 	}
2824 
2825 	if (torchq != NULL)
2826 		sadb_drain_torchq(spp->s_ip_q, torchq);
2827 
2828 	ASSERT(mp->b_cont != NULL);
2829 
2830 	if (error == 0)
2831 		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)
2832 		    mp->b_cont->b_rptr, ksi, echo_target);
2833 
2834 	destroy_ipsa_pair(ipsapp);
2835 
2836 	return (error);
2837 }
2838 
2839 /*
2840  * This function takes a sadb_sa_t and finds the ipsa_t structure
2841  * and the isaf_t (hash bucket) that its stored under. If the security
2842  * association has a peer, the ipsa_t structure and bucket for that security
2843  * association are also searched for. The "pair" of ipsa_t's and isaf_t's
2844  * are returned as a ipsap_t.
2845  *
2846  * Note that a "pair" is defined as one (but not both) of the following:
2847  *
2848  * A security association which has a soft reference to another security
2849  * association via its SPI.
2850  *
2851  * A security association that is not obviously "inbound" or "outbound" so
2852  * it appears in both hash tables, the "peer" being the same security
2853  * association in the other hash table.
2854  *
2855  * This function will return NULL if the ipsa_t can't be found in the
2856  * inbound or outbound  hash tables (not found). If only one ipsa_t is
2857  * found, the pair ipsa_t will be NULL. Both isaf_t values are valid
2858  * provided at least one ipsa_t is found.
2859  */
2860 ipsap_t *
2861 get_ipsa_pair(sadb_sa_t *assoc, sadb_address_t *srcext, sadb_address_t *dstext,
2862     sadbp_t *spp)
2863 {
2864 	struct sockaddr_in *src, *dst;
2865 	struct sockaddr_in6 *src6, *dst6;
2866 	sadb_t *sp;
2867 	uint32_t *srcaddr, *dstaddr;
2868 	isaf_t *outbound_bucket, *inbound_bucket;
2869 	boolean_t in_inbound_table = B_FALSE;
2870 	ipsap_t *ipsapp;
2871 	sa_family_t af;
2872 
2873 	uint32_t pair_srcaddr[IPSA_MAX_ADDRLEN];
2874 	uint32_t pair_dstaddr[IPSA_MAX_ADDRLEN];
2875 	uint32_t pair_spi;
2876 
2877 	ipsapp = kmem_zalloc(sizeof (*ipsapp), KM_NOSLEEP);
2878 	if (ipsapp == NULL)
2879 		return (NULL);
2880 
2881 	/*
2882 	 * Don't worry about IPv6 v4-mapped addresses, sadb_addrcheck()
2883 	 * takes care of them.
2884 	 */
2885 
2886 	dst = (struct sockaddr_in *)(dstext + 1);
2887 	af = dst->sin_family;
2888 	if (af == AF_INET6) {
2889 		sp = &spp->s_v6;
2890 		dst6 = (struct sockaddr_in6 *)dst;
2891 		dstaddr = (uint32_t *)&dst6->sin6_addr;
2892 		if (srcext != NULL) {
2893 			src6 = (struct sockaddr_in6 *)(srcext + 1);
2894 			srcaddr = (uint32_t *)&src6->sin6_addr;
2895 			ASSERT(src6->sin6_family == af);
2896 			ASSERT(src6->sin6_family == AF_INET6);
2897 		} else {
2898 			srcaddr = ALL_ZEROES_PTR;
2899 		}
2900 		outbound_bucket = OUTBOUND_BUCKET_V6(sp,
2901 		    *(uint32_t *)dstaddr);
2902 	} else {
2903 		sp = &spp->s_v4;
2904 		dstaddr = (uint32_t *)&dst->sin_addr;
2905 		if (srcext != NULL) {
2906 			src = (struct sockaddr_in *)(srcext + 1);
2907 			srcaddr = (uint32_t *)&src->sin_addr;
2908 			ASSERT(src->sin_family == af);
2909 			ASSERT(src->sin_family == AF_INET);
2910 		} else {
2911 			srcaddr = ALL_ZEROES_PTR;
2912 		}
2913 		outbound_bucket = OUTBOUND_BUCKET_V4(sp,
2914 		    *(uint32_t *)dstaddr);
2915 	}
2916 
2917 	inbound_bucket = INBOUND_BUCKET(sp, assoc->sadb_sa_spi);
2918 
2919 	/* Lock down both buckets. */
2920 	mutex_enter(&outbound_bucket->isaf_lock);
2921 	mutex_enter(&inbound_bucket->isaf_lock);
2922 
2923 	if (assoc->sadb_sa_flags & IPSA_F_INBOUND) {
2924 		ipsapp->ipsap_sa_ptr = ipsec_getassocbyspi(inbound_bucket,
2925 		    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2926 		if (ipsapp->ipsap_sa_ptr != NULL) {
2927 			ipsapp->ipsap_bucket = inbound_bucket;
2928 			ipsapp->ipsap_pbucket = outbound_bucket;
2929 			in_inbound_table = B_TRUE;
2930 		} else {
2931 			ipsapp->ipsap_sa_ptr =
2932 			    ipsec_getassocbyspi(outbound_bucket,
2933 			    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2934 			ipsapp->ipsap_bucket = outbound_bucket;
2935 			ipsapp->ipsap_pbucket = inbound_bucket;
2936 		}
2937 	} else {
2938 		/* IPSA_F_OUTBOUND is set *or* no directions flags set. */
2939 		ipsapp->ipsap_sa_ptr =
2940 		    ipsec_getassocbyspi(outbound_bucket,
2941 		    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2942 		if (ipsapp->ipsap_sa_ptr != NULL) {
2943 			ipsapp->ipsap_bucket = outbound_bucket;
2944 			ipsapp->ipsap_pbucket = inbound_bucket;
2945 		} else {
2946 			ipsapp->ipsap_sa_ptr =
2947 			    ipsec_getassocbyspi(inbound_bucket,
2948 			    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2949 			ipsapp->ipsap_bucket = inbound_bucket;
2950 			ipsapp->ipsap_pbucket = outbound_bucket;
2951 			if (ipsapp->ipsap_sa_ptr != NULL)
2952 				in_inbound_table = B_TRUE;
2953 		}
2954 	}
2955 
2956 	if (ipsapp->ipsap_sa_ptr == NULL) {
2957 		mutex_exit(&outbound_bucket->isaf_lock);
2958 		mutex_exit(&inbound_bucket->isaf_lock);
2959 		kmem_free(ipsapp, sizeof (*ipsapp));
2960 		return (NULL);
2961 	}
2962 
2963 	if ((ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) &&
2964 	    in_inbound_table) {
2965 		mutex_exit(&outbound_bucket->isaf_lock);
2966 		mutex_exit(&inbound_bucket->isaf_lock);
2967 		return (ipsapp);
2968 	}
2969 
2970 	mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2971 	if (ipsapp->ipsap_sa_ptr->ipsa_haspeer) {
2972 		/*
2973 		 * haspeer implies no sa_pairing, look for same spi
2974 		 * in other hashtable.
2975 		 */
2976 		ipsapp->ipsap_psa_ptr =
2977 		    ipsec_getassocbyspi(ipsapp->ipsap_pbucket,
2978 		    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2979 		mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2980 		mutex_exit(&outbound_bucket->isaf_lock);
2981 		mutex_exit(&inbound_bucket->isaf_lock);
2982 		return (ipsapp);
2983 	}
2984 	pair_spi = ipsapp->ipsap_sa_ptr->ipsa_otherspi;
2985 	IPSA_COPY_ADDR(&pair_srcaddr,
2986 	    ipsapp->ipsap_sa_ptr->ipsa_srcaddr, af);
2987 	IPSA_COPY_ADDR(&pair_dstaddr,
2988 	    ipsapp->ipsap_sa_ptr->ipsa_dstaddr, af);
2989 	mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2990 	mutex_exit(&outbound_bucket->isaf_lock);
2991 	mutex_exit(&inbound_bucket->isaf_lock);
2992 
2993 	if (pair_spi == 0) {
2994 		ASSERT(ipsapp->ipsap_bucket != NULL);
2995 		ASSERT(ipsapp->ipsap_pbucket != NULL);
2996 		return (ipsapp);
2997 	}
2998 
2999 	/* found sa in outbound sadb, peer should be inbound */
3000 
3001 	if (in_inbound_table) {
3002 		/* Found SA in inbound table, pair will be in outbound. */
3003 		if (af == AF_INET6) {
3004 			ipsapp->ipsap_pbucket = OUTBOUND_BUCKET_V6(sp,
3005 			    *(uint32_t *)pair_srcaddr);
3006 		} else {
3007 			ipsapp->ipsap_pbucket = OUTBOUND_BUCKET_V4(sp,
3008 			    *(uint32_t *)pair_srcaddr);
3009 		}
3010 	} else {
3011 		ipsapp->ipsap_pbucket = INBOUND_BUCKET(sp, pair_spi);
3012 	}
3013 	mutex_enter(&ipsapp->ipsap_pbucket->isaf_lock);
3014 	ipsapp->ipsap_psa_ptr = ipsec_getassocbyspi(ipsapp->ipsap_pbucket,
3015 	    pair_spi, pair_dstaddr, pair_srcaddr, af);
3016 	mutex_exit(&ipsapp->ipsap_pbucket->isaf_lock);
3017 	ASSERT(ipsapp->ipsap_bucket != NULL);
3018 	ASSERT(ipsapp->ipsap_pbucket != NULL);
3019 	return (ipsapp);
3020 }
3021 
3022 /*
3023  * Initialize the mechanism parameters associated with an SA.
3024  * These parameters can be shared by multiple packets, which saves
3025  * us from the overhead of consulting the algorithm table for
3026  * each packet.
3027  */
3028 static void
3029 sadb_init_alginfo(ipsa_t *sa)
3030 {
3031 	ipsec_alginfo_t *alg;
3032 	ipsec_stack_t	*ipss = sa->ipsa_netstack->netstack_ipsec;
3033 
3034 	mutex_enter(&ipss->ipsec_alg_lock);
3035 
3036 	if (sa->ipsa_encrkey != NULL) {
3037 		alg = ipss->ipsec_alglists[IPSEC_ALG_ENCR][sa->ipsa_encr_alg];
3038 		if (alg != NULL && ALG_VALID(alg)) {
3039 			sa->ipsa_emech.cm_type = alg->alg_mech_type;
3040 			sa->ipsa_emech.cm_param = NULL;
3041 			sa->ipsa_emech.cm_param_len = 0;
3042 			sa->ipsa_iv_len = alg->alg_datalen;
3043 		} else
3044 			sa->ipsa_emech.cm_type = CRYPTO_MECHANISM_INVALID;
3045 	}
3046 
3047 	if (sa->ipsa_authkey != NULL) {
3048 		alg = ipss->ipsec_alglists[IPSEC_ALG_AUTH][sa->ipsa_auth_alg];
3049 		if (alg != NULL && ALG_VALID(alg)) {
3050 			sa->ipsa_amech.cm_type = alg->alg_mech_type;
3051 			sa->ipsa_amech.cm_param = (char *)&sa->ipsa_mac_len;
3052 			sa->ipsa_amech.cm_param_len = sizeof (size_t);
3053 			sa->ipsa_mac_len = (size_t)alg->alg_datalen;
3054 		} else
3055 			sa->ipsa_amech.cm_type = CRYPTO_MECHANISM_INVALID;
3056 	}
3057 
3058 	mutex_exit(&ipss->ipsec_alg_lock);
3059 }
3060 
3061 /*
3062  * Perform NAT-traversal cached checksum offset calculations here.
3063  */
3064 static void
3065 sadb_nat_calculations(ipsa_t *newbie, sadb_address_t *natt_loc_ext,
3066     sadb_address_t *natt_rem_ext, uint32_t *src_addr_ptr,
3067     uint32_t *dst_addr_ptr)
3068 {
3069 	struct sockaddr_in *natt_loc, *natt_rem;
3070 	uint32_t *natt_loc_ptr = NULL, *natt_rem_ptr = NULL;
3071 	uint32_t running_sum = 0;
3072 
3073 #define	DOWN_SUM(x) (x) = ((x) & 0xFFFF) +	 ((x) >> 16)
3074 
3075 	if (natt_rem_ext != NULL) {
3076 		uint32_t l_src;
3077 		uint32_t l_rem;
3078 
3079 		natt_rem = (struct sockaddr_in *)(natt_rem_ext + 1);
3080 
3081 		/* Ensured by sadb_addrfix(). */
3082 		ASSERT(natt_rem->sin_family == AF_INET);
3083 
3084 		natt_rem_ptr = (uint32_t *)(&natt_rem->sin_addr);
3085 		newbie->ipsa_remote_nat_port = natt_rem->sin_port;
3086 		l_src = *src_addr_ptr;
3087 		l_rem = *natt_rem_ptr;
3088 
3089 		/* Instead of IPSA_COPY_ADDR(), just copy first 32 bits. */
3090 		newbie->ipsa_natt_addr_rem = *natt_rem_ptr;
3091 
3092 		l_src = ntohl(l_src);
3093 		DOWN_SUM(l_src);
3094 		DOWN_SUM(l_src);
3095 		l_rem = ntohl(l_rem);
3096 		DOWN_SUM(l_rem);
3097 		DOWN_SUM(l_rem);
3098 
3099 		/*
3100 		 * We're 1's complement for checksums, so check for wraparound
3101 		 * here.
3102 		 */
3103 		if (l_rem > l_src)
3104 			l_src--;
3105 
3106 		running_sum += l_src - l_rem;
3107 
3108 		DOWN_SUM(running_sum);
3109 		DOWN_SUM(running_sum);
3110 	}
3111 
3112 	if (natt_loc_ext != NULL) {
3113 		natt_loc = (struct sockaddr_in *)(natt_loc_ext + 1);
3114 
3115 		/* Ensured by sadb_addrfix(). */
3116 		ASSERT(natt_loc->sin_family == AF_INET);
3117 
3118 		natt_loc_ptr = (uint32_t *)(&natt_loc->sin_addr);
3119 		newbie->ipsa_local_nat_port = natt_loc->sin_port;
3120 
3121 		/* Instead of IPSA_COPY_ADDR(), just copy first 32 bits. */
3122 		newbie->ipsa_natt_addr_loc = *natt_loc_ptr;
3123 
3124 		/*
3125 		 * NAT-T port agility means we may have natt_loc_ext, but
3126 		 * only for a local-port change.
3127 		 */
3128 		if (natt_loc->sin_addr.s_addr != INADDR_ANY) {
3129 			uint32_t l_dst = ntohl(*dst_addr_ptr);
3130 			uint32_t l_loc = ntohl(*natt_loc_ptr);
3131 
3132 			DOWN_SUM(l_loc);
3133 			DOWN_SUM(l_loc);
3134 			DOWN_SUM(l_dst);
3135 			DOWN_SUM(l_dst);
3136 
3137 			/*
3138 			 * We're 1's complement for checksums, so check for
3139 			 * wraparound here.
3140 			 */
3141 			if (l_loc > l_dst)
3142 				l_dst--;
3143 
3144 			running_sum += l_dst - l_loc;
3145 			DOWN_SUM(running_sum);
3146 			DOWN_SUM(running_sum);
3147 		}
3148 	}
3149 
3150 	newbie->ipsa_inbound_cksum = running_sum;
3151 #undef DOWN_SUM
3152 }
3153 
3154 /*
3155  * This function is called from consumers that need to insert a fully-grown
3156  * security association into its tables.  This function takes into account that
3157  * SAs can be "inbound", "outbound", or "both".	 The "primary" and "secondary"
3158  * hash bucket parameters are set in order of what the SA will be most of the
3159  * time.  (For example, an SA with an unspecified source, and a multicast
3160  * destination will primarily be an outbound SA.  OTOH, if that destination
3161  * is unicast for this node, then the SA will primarily be inbound.)
3162  *
3163  * It takes a lot of parameters because even if clone is B_FALSE, this needs
3164  * to check both buckets for purposes of collision.
3165  *
3166  * Return 0 upon success.  Return various errnos (ENOMEM, EEXIST) for
3167  * various error conditions.  We may need to set samsg->sadb_x_msg_diagnostic
3168  * with additional diagnostic information because there is at least one EINVAL
3169  * case here.
3170  */
3171 int
3172 sadb_common_add(queue_t *ip_q, queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg,
3173     keysock_in_t *ksi, isaf_t *primary, isaf_t *secondary,
3174     ipsa_t *newbie, boolean_t clone, boolean_t is_inbound, int *diagnostic,
3175     netstack_t *ns, sadbp_t *spp)
3176 {
3177 	ipsa_t *newbie_clone = NULL, *scratch;
3178 	ipsap_t *ipsapp = NULL;
3179 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
3180 	sadb_address_t *srcext =
3181 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
3182 	sadb_address_t *dstext =
3183 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
3184 	sadb_address_t *isrcext =
3185 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC];
3186 	sadb_address_t *idstext =
3187 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST];
3188 	sadb_x_kmc_t *kmcext =
3189 	    (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
3190 	sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
3191 	sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
3192 	sadb_x_pair_t *pair_ext =
3193 	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
3194 	sadb_x_replay_ctr_t *replayext =
3195 	    (sadb_x_replay_ctr_t *)ksi->ks_in_extv[SADB_X_EXT_REPLAY_VALUE];
3196 	uint8_t protocol =
3197 	    (samsg->sadb_msg_satype == SADB_SATYPE_AH) ? IPPROTO_AH:IPPROTO_ESP;
3198 #if 0
3199 	/*
3200 	 * XXXMLS - When Trusted Solaris or Multi-Level Secure functionality
3201 	 * comes to ON, examine these if 0'ed fragments.  Look for XXXMLS.
3202 	 */
3203 	sadb_sens_t *sens = (sadb_sens_t *);
3204 #endif
3205 	struct sockaddr_in *src, *dst, *isrc, *idst;
3206 	struct sockaddr_in6 *src6, *dst6, *isrc6, *idst6;
3207 	sadb_lifetime_t *soft =
3208 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
3209 	sadb_lifetime_t *hard =
3210 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
3211 	sadb_lifetime_t	*idle =
3212 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE];
3213 	sa_family_t af;
3214 	int error = 0;
3215 	boolean_t isupdate = (newbie != NULL);
3216 	uint32_t *src_addr_ptr, *dst_addr_ptr, *isrc_addr_ptr, *idst_addr_ptr;
3217 	mblk_t *ctl_mp = NULL;
3218 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
3219 	int		rcode;
3220 
3221 	if (srcext == NULL) {
3222 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
3223 		return (EINVAL);
3224 	}
3225 	if (dstext == NULL) {
3226 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
3227 		return (EINVAL);
3228 	}
3229 	if (assoc == NULL) {
3230 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
3231 		return (EINVAL);
3232 	}
3233 
3234 	src = (struct sockaddr_in *)(srcext + 1);
3235 	src6 = (struct sockaddr_in6 *)(srcext + 1);
3236 	dst = (struct sockaddr_in *)(dstext + 1);
3237 	dst6 = (struct sockaddr_in6 *)(dstext + 1);
3238 	if (isrcext != NULL) {
3239 		isrc = (struct sockaddr_in *)(isrcext + 1);
3240 		isrc6 = (struct sockaddr_in6 *)(isrcext + 1);
3241 		ASSERT(idstext != NULL);
3242 		idst = (struct sockaddr_in *)(idstext + 1);
3243 		idst6 = (struct sockaddr_in6 *)(idstext + 1);
3244 	} else {
3245 		isrc = NULL;
3246 		isrc6 = NULL;
3247 	}
3248 
3249 	af = src->sin_family;
3250 
3251 	if (af == AF_INET) {
3252 		src_addr_ptr = (uint32_t *)&src->sin_addr;
3253 		dst_addr_ptr = (uint32_t *)&dst->sin_addr;
3254 	} else {
3255 		ASSERT(af == AF_INET6);
3256 		src_addr_ptr = (uint32_t *)&src6->sin6_addr;
3257 		dst_addr_ptr = (uint32_t *)&dst6->sin6_addr;
3258 	}
3259 
3260 	if (!isupdate && (clone == B_TRUE || is_inbound == B_TRUE) &&
3261 	    cl_inet_checkspi &&
3262 	    (assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE_ELSEWHERE)) {
3263 		rcode = cl_inet_checkspi(ns->netstack_stackid, protocol,
3264 		    assoc->sadb_sa_spi, NULL);
3265 		if (rcode == -1) {
3266 			return (EEXIST);
3267 		}
3268 	}
3269 
3270 	/*
3271 	 * Check to see if the new SA will be cloned AND paired. The
3272 	 * reason a SA will be cloned is the source or destination addresses
3273 	 * are not specific enough to determine if the SA goes in the outbound
3274 	 * or the inbound hash table, so its cloned and put in both. If
3275 	 * the SA is paired, it's soft linked to another SA for the other
3276 	 * direction. Keeping track and looking up SA's that are direction
3277 	 * unspecific and linked is too hard.
3278 	 */
3279 	if (clone && (pair_ext != NULL)) {
3280 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
3281 		return (EINVAL);
3282 	}
3283 
3284 	if (!isupdate) {
3285 		newbie = sadb_makelarvalassoc(assoc->sadb_sa_spi,
3286 		    src_addr_ptr, dst_addr_ptr, af, ns);
3287 		if (newbie == NULL)
3288 			return (ENOMEM);
3289 	}
3290 
3291 	mutex_enter(&newbie->ipsa_lock);
3292 
3293 	if (isrc != NULL) {
3294 		if (isrc->sin_family == AF_INET) {
3295 			if (srcext->sadb_address_proto != IPPROTO_ENCAP) {
3296 				if (srcext->sadb_address_proto != 0) {
3297 					/*
3298 					 * Mismatched outer-packet protocol
3299 					 * and inner-packet address family.
3300 					 */
3301 					mutex_exit(&newbie->ipsa_lock);
3302 					error = EPROTOTYPE;
3303 					goto error;
3304 				} else {
3305 					/* Fill in with explicit protocol. */
3306 					srcext->sadb_address_proto =
3307 					    IPPROTO_ENCAP;
3308 					dstext->sadb_address_proto =
3309 					    IPPROTO_ENCAP;
3310 				}
3311 			}
3312 			isrc_addr_ptr = (uint32_t *)&isrc->sin_addr;
3313 			idst_addr_ptr = (uint32_t *)&idst->sin_addr;
3314 		} else {
3315 			ASSERT(isrc->sin_family == AF_INET6);
3316 			if (srcext->sadb_address_proto != IPPROTO_IPV6) {
3317 				if (srcext->sadb_address_proto != 0) {
3318 					/*
3319 					 * Mismatched outer-packet protocol
3320 					 * and inner-packet address family.
3321 					 */
3322 					mutex_exit(&newbie->ipsa_lock);
3323 					error = EPROTOTYPE;
3324 					goto error;
3325 				} else {
3326 					/* Fill in with explicit protocol. */
3327 					srcext->sadb_address_proto =
3328 					    IPPROTO_IPV6;
3329 					dstext->sadb_address_proto =
3330 					    IPPROTO_IPV6;
3331 				}
3332 			}
3333 			isrc_addr_ptr = (uint32_t *)&isrc6->sin6_addr;
3334 			idst_addr_ptr = (uint32_t *)&idst6->sin6_addr;
3335 		}
3336 		newbie->ipsa_innerfam = isrc->sin_family;
3337 
3338 		IPSA_COPY_ADDR(newbie->ipsa_innersrc, isrc_addr_ptr,
3339 		    newbie->ipsa_innerfam);
3340 		IPSA_COPY_ADDR(newbie->ipsa_innerdst, idst_addr_ptr,
3341 		    newbie->ipsa_innerfam);
3342 		newbie->ipsa_innersrcpfx = isrcext->sadb_address_prefixlen;
3343 		newbie->ipsa_innerdstpfx = idstext->sadb_address_prefixlen;
3344 
3345 		/* Unique value uses inner-ports for Tunnel Mode... */
3346 		newbie->ipsa_unique_id = SA_UNIQUE_ID(isrc->sin_port,
3347 		    idst->sin_port, dstext->sadb_address_proto,
3348 		    idstext->sadb_address_proto);
3349 		newbie->ipsa_unique_mask = SA_UNIQUE_MASK(isrc->sin_port,
3350 		    idst->sin_port, dstext->sadb_address_proto,
3351 		    idstext->sadb_address_proto);
3352 	} else {
3353 		/* ... and outer-ports for Transport Mode. */
3354 		newbie->ipsa_unique_id = SA_UNIQUE_ID(src->sin_port,
3355 		    dst->sin_port, dstext->sadb_address_proto, 0);
3356 		newbie->ipsa_unique_mask = SA_UNIQUE_MASK(src->sin_port,
3357 		    dst->sin_port, dstext->sadb_address_proto, 0);
3358 	}
3359 	if (newbie->ipsa_unique_mask != (uint64_t)0)
3360 		newbie->ipsa_flags |= IPSA_F_UNIQUE;
3361 
3362 	sadb_nat_calculations(newbie,
3363 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC],
3364 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM],
3365 	    src_addr_ptr, dst_addr_ptr);
3366 
3367 	newbie->ipsa_type = samsg->sadb_msg_satype;
3368 	ASSERT((assoc->sadb_sa_state == SADB_SASTATE_MATURE) ||
3369 	    (assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE));
3370 	newbie->ipsa_auth_alg = assoc->sadb_sa_auth;
3371 	newbie->ipsa_encr_alg = assoc->sadb_sa_encrypt;
3372 
3373 	newbie->ipsa_flags |= assoc->sadb_sa_flags;
3374 	if ((newbie->ipsa_flags & SADB_X_SAFLAGS_NATT_LOC &&
3375 	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC] == NULL) ||
3376 	    (newbie->ipsa_flags & SADB_X_SAFLAGS_NATT_REM &&
3377 	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM] == NULL) ||
3378 	    (newbie->ipsa_flags & SADB_X_SAFLAGS_TUNNEL &&
3379 	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC] == NULL)) {
3380 		mutex_exit(&newbie->ipsa_lock);
3381 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
3382 		error = EINVAL;
3383 		goto error;
3384 	}
3385 	/*
3386 	 * If unspecified source address, force replay_wsize to 0.
3387 	 * This is because an SA that has multiple sources of secure
3388 	 * traffic cannot enforce a replay counter w/o synchronizing the
3389 	 * senders.
3390 	 */
3391 	if (ksi->ks_in_srctype != KS_IN_ADDR_UNSPEC)
3392 		newbie->ipsa_replay_wsize = assoc->sadb_sa_replay;
3393 	else
3394 		newbie->ipsa_replay_wsize = 0;
3395 
3396 	newbie->ipsa_addtime = gethrestime_sec();
3397 
3398 	if (kmcext != NULL) {
3399 		newbie->ipsa_kmp = kmcext->sadb_x_kmc_proto;
3400 		newbie->ipsa_kmc = kmcext->sadb_x_kmc_cookie;
3401 	}
3402 
3403 	/*
3404 	 * XXX CURRENT lifetime checks MAY BE needed for an UPDATE.
3405 	 * The spec says that one can update current lifetimes, but
3406 	 * that seems impractical, especially in the larval-to-mature
3407 	 * update that this function performs.
3408 	 */
3409 	if (soft != NULL) {
3410 		newbie->ipsa_softaddlt = soft->sadb_lifetime_addtime;
3411 		newbie->ipsa_softuselt = soft->sadb_lifetime_usetime;
3412 		newbie->ipsa_softbyteslt = soft->sadb_lifetime_bytes;
3413 		newbie->ipsa_softalloc = soft->sadb_lifetime_allocations;
3414 		SET_EXPIRE(newbie, softaddlt, softexpiretime);
3415 	}
3416 	if (hard != NULL) {
3417 		newbie->ipsa_hardaddlt = hard->sadb_lifetime_addtime;
3418 		newbie->ipsa_harduselt = hard->sadb_lifetime_usetime;
3419 		newbie->ipsa_hardbyteslt = hard->sadb_lifetime_bytes;
3420 		newbie->ipsa_hardalloc = hard->sadb_lifetime_allocations;
3421 		SET_EXPIRE(newbie, hardaddlt, hardexpiretime);
3422 	}
3423 	if (idle != NULL) {
3424 		newbie->ipsa_idleaddlt = idle->sadb_lifetime_addtime;
3425 		newbie->ipsa_idleuselt = idle->sadb_lifetime_usetime;
3426 		newbie->ipsa_idleexpiretime = newbie->ipsa_addtime +
3427 		    newbie->ipsa_idleaddlt;
3428 		newbie->ipsa_idletime = newbie->ipsa_idleaddlt;
3429 	}
3430 
3431 	newbie->ipsa_authtmpl = NULL;
3432 	newbie->ipsa_encrtmpl = NULL;
3433 
3434 	if (akey != NULL) {
3435 		newbie->ipsa_authkeybits = akey->sadb_key_bits;
3436 		newbie->ipsa_authkeylen = SADB_1TO8(akey->sadb_key_bits);
3437 		/* In case we have to round up to the next byte... */
3438 		if ((akey->sadb_key_bits & 0x7) != 0)
3439 			newbie->ipsa_authkeylen++;
3440 		newbie->ipsa_authkey = kmem_alloc(newbie->ipsa_authkeylen,
3441 		    KM_NOSLEEP);
3442 		if (newbie->ipsa_authkey == NULL) {
3443 			error = ENOMEM;
3444 			mutex_exit(&newbie->ipsa_lock);
3445 			goto error;
3446 		}
3447 		bcopy(akey + 1, newbie->ipsa_authkey, newbie->ipsa_authkeylen);
3448 		bzero(akey + 1, newbie->ipsa_authkeylen);
3449 
3450 		/*
3451 		 * Pre-initialize the kernel crypto framework key
3452 		 * structure.
3453 		 */
3454 		newbie->ipsa_kcfauthkey.ck_format = CRYPTO_KEY_RAW;
3455 		newbie->ipsa_kcfauthkey.ck_length = newbie->ipsa_authkeybits;
3456 		newbie->ipsa_kcfauthkey.ck_data = newbie->ipsa_authkey;
3457 
3458 		mutex_enter(&ipss->ipsec_alg_lock);
3459 		error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_AUTH);
3460 		mutex_exit(&ipss->ipsec_alg_lock);
3461 		if (error != 0) {
3462 			mutex_exit(&newbie->ipsa_lock);
3463 			goto error;
3464 		}
3465 	}
3466 
3467 	if (ekey != NULL) {
3468 		newbie->ipsa_encrkeybits = ekey->sadb_key_bits;
3469 		newbie->ipsa_encrkeylen = SADB_1TO8(ekey->sadb_key_bits);
3470 		/* In case we have to round up to the next byte... */
3471 		if ((ekey->sadb_key_bits & 0x7) != 0)
3472 			newbie->ipsa_encrkeylen++;
3473 		newbie->ipsa_encrkey = kmem_alloc(newbie->ipsa_encrkeylen,
3474 		    KM_NOSLEEP);
3475 		if (newbie->ipsa_encrkey == NULL) {
3476 			error = ENOMEM;
3477 			mutex_exit(&newbie->ipsa_lock);
3478 			goto error;
3479 		}
3480 		bcopy(ekey + 1, newbie->ipsa_encrkey, newbie->ipsa_encrkeylen);
3481 		/* XXX is this safe w.r.t db_ref, etc? */
3482 		bzero(ekey + 1, newbie->ipsa_encrkeylen);
3483 
3484 		/*
3485 		 * Pre-initialize the kernel crypto framework key
3486 		 * structure.
3487 		 */
3488 		newbie->ipsa_kcfencrkey.ck_format = CRYPTO_KEY_RAW;
3489 		newbie->ipsa_kcfencrkey.ck_length = newbie->ipsa_encrkeybits;
3490 		newbie->ipsa_kcfencrkey.ck_data = newbie->ipsa_encrkey;
3491 
3492 		mutex_enter(&ipss->ipsec_alg_lock);
3493 		error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_ENCR);
3494 		mutex_exit(&ipss->ipsec_alg_lock);
3495 		if (error != 0) {
3496 			mutex_exit(&newbie->ipsa_lock);
3497 			goto error;
3498 		}
3499 	}
3500 
3501 	sadb_init_alginfo(newbie);
3502 
3503 	/*
3504 	 * Ptrs to processing functions.
3505 	 */
3506 	if (newbie->ipsa_type == SADB_SATYPE_ESP)
3507 		ipsecesp_init_funcs(newbie);
3508 	else
3509 		ipsecah_init_funcs(newbie);
3510 	ASSERT(newbie->ipsa_output_func != NULL &&
3511 	    newbie->ipsa_input_func != NULL);
3512 
3513 	/*
3514 	 * Certificate ID stuff.
3515 	 */
3516 	if (ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC] != NULL) {
3517 		sadb_ident_t *id =
3518 		    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC];
3519 
3520 		/*
3521 		 * Can assume strlen() will return okay because ext_check() in
3522 		 * keysock.c prepares the string for us.
3523 		 */
3524 		newbie->ipsa_src_cid = ipsid_lookup(id->sadb_ident_type,
3525 		    (char *)(id+1), ns);
3526 		if (newbie->ipsa_src_cid == NULL) {
3527 			error = ENOMEM;
3528 			mutex_exit(&newbie->ipsa_lock);
3529 			goto error;
3530 		}
3531 	}
3532 
3533 	if (ksi->ks_in_extv[SADB_EXT_IDENTITY_DST] != NULL) {
3534 		sadb_ident_t *id =
3535 		    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_DST];
3536 
3537 		/*
3538 		 * Can assume strlen() will return okay because ext_check() in
3539 		 * keysock.c prepares the string for us.
3540 		 */
3541 		newbie->ipsa_dst_cid = ipsid_lookup(id->sadb_ident_type,
3542 		    (char *)(id+1), ns);
3543 		if (newbie->ipsa_dst_cid == NULL) {
3544 			error = ENOMEM;
3545 			mutex_exit(&newbie->ipsa_lock);
3546 			goto error;
3547 		}
3548 	}
3549 
3550 #if 0
3551 	/* XXXMLS  SENSITIVITY handling code. */
3552 	if (sens != NULL) {
3553 		int i;
3554 		uint64_t *bitmap = (uint64_t *)(sens + 1);
3555 
3556 		newbie->ipsa_dpd = sens->sadb_sens_dpd;
3557 		newbie->ipsa_senslevel = sens->sadb_sens_sens_level;
3558 		newbie->ipsa_integlevel = sens->sadb_sens_integ_level;
3559 		newbie->ipsa_senslen = SADB_64TO8(sens->sadb_sens_sens_len);
3560 		newbie->ipsa_integlen = SADB_64TO8(sens->sadb_sens_integ_len);
3561 		newbie->ipsa_integ = kmem_alloc(newbie->ipsa_integlen,
3562 		    KM_NOSLEEP);
3563 		if (newbie->ipsa_integ == NULL) {
3564 			error = ENOMEM;
3565 			mutex_exit(&newbie->ipsa_lock);
3566 			goto error;
3567 		}
3568 		newbie->ipsa_sens = kmem_alloc(newbie->ipsa_senslen,
3569 		    KM_NOSLEEP);
3570 		if (newbie->ipsa_sens == NULL) {
3571 			error = ENOMEM;
3572 			mutex_exit(&newbie->ipsa_lock);
3573 			goto error;
3574 		}
3575 		for (i = 0; i < sens->sadb_sens_sens_len; i++) {
3576 			newbie->ipsa_sens[i] = *bitmap;
3577 			bitmap++;
3578 		}
3579 		for (i = 0; i < sens->sadb_sens_integ_len; i++) {
3580 			newbie->ipsa_integ[i] = *bitmap;
3581 			bitmap++;
3582 		}
3583 	}
3584 
3585 #endif
3586 
3587 	if (replayext != NULL) {
3588 		if ((replayext->sadb_x_rc_replay32 == 0) &&
3589 		    (replayext->sadb_x_rc_replay64 != 0)) {
3590 			error = EOPNOTSUPP;
3591 			mutex_exit(&newbie->ipsa_lock);
3592 			goto error;
3593 		}
3594 		newbie->ipsa_replay = replayext->sadb_x_rc_replay32;
3595 	}
3596 
3597 	/* now that the SA has been updated, set its new state */
3598 	newbie->ipsa_state = assoc->sadb_sa_state;
3599 
3600 	if (clone) {
3601 		newbie->ipsa_haspeer = B_TRUE;
3602 	} else {
3603 		if (!is_inbound) {
3604 			lifetime_fuzz(newbie);
3605 		}
3606 	}
3607 	/*
3608 	 * The less locks I hold when doing an insertion and possible cloning,
3609 	 * the better!
3610 	 */
3611 	mutex_exit(&newbie->ipsa_lock);
3612 
3613 	if (clone) {
3614 		newbie_clone = sadb_cloneassoc(newbie);
3615 
3616 		if (newbie_clone == NULL) {
3617 			error = ENOMEM;
3618 			goto error;
3619 		}
3620 	}
3621 
3622 	/*
3623 	 * Enter the bucket locks.  The order of entry is outbound,
3624 	 * inbound.  We map "primary" and "secondary" into outbound and inbound
3625 	 * based on the destination address type.  If the destination address
3626 	 * type is for a node that isn't mine (or potentially mine), the
3627 	 * "primary" bucket is the outbound one.
3628 	 */
3629 	if (!is_inbound) {
3630 		/* primary == outbound */
3631 		mutex_enter(&primary->isaf_lock);
3632 		mutex_enter(&secondary->isaf_lock);
3633 	} else {
3634 		/* primary == inbound */
3635 		mutex_enter(&secondary->isaf_lock);
3636 		mutex_enter(&primary->isaf_lock);
3637 	}
3638 
3639 	IPSECHW_DEBUG(IPSECHW_SADB, ("sadb_common_add: spi = 0x%x\n",
3640 	    newbie->ipsa_spi));
3641 
3642 	/*
3643 	 * sadb_insertassoc() doesn't increment the reference
3644 	 * count.  We therefore have to increment the
3645 	 * reference count one more time to reflect the
3646 	 * pointers of the table that reference this SA.
3647 	 */
3648 	IPSA_REFHOLD(newbie);
3649 
3650 	if (isupdate) {
3651 		/*
3652 		 * Unlink from larval holding cell in the "inbound" fanout.
3653 		 */
3654 		ASSERT(newbie->ipsa_linklock == &primary->isaf_lock ||
3655 		    newbie->ipsa_linklock == &secondary->isaf_lock);
3656 		sadb_unlinkassoc(newbie);
3657 	}
3658 
3659 	mutex_enter(&newbie->ipsa_lock);
3660 	error = sadb_insertassoc(newbie, primary);
3661 	if (error == 0) {
3662 		ctl_mp = sadb_fmt_sa_req(DL_CO_SET, newbie->ipsa_type, newbie,
3663 		    is_inbound);
3664 	}
3665 	mutex_exit(&newbie->ipsa_lock);
3666 
3667 	if (error != 0) {
3668 		/*
3669 		 * Since sadb_insertassoc() failed, we must decrement the
3670 		 * refcount again so the cleanup code will actually free
3671 		 * the offending SA.
3672 		 */
3673 		IPSA_REFRELE(newbie);
3674 		goto error_unlock;
3675 	}
3676 
3677 	if (newbie_clone != NULL) {
3678 		mutex_enter(&newbie_clone->ipsa_lock);
3679 		error = sadb_insertassoc(newbie_clone, secondary);
3680 		mutex_exit(&newbie_clone->ipsa_lock);
3681 		if (error != 0) {
3682 			/* Collision in secondary table. */
3683 			sadb_unlinkassoc(newbie);  /* This does REFRELE. */
3684 			goto error_unlock;
3685 		}
3686 		IPSA_REFHOLD(newbie_clone);
3687 	} else {
3688 		ASSERT(primary != secondary);
3689 		scratch = ipsec_getassocbyspi(secondary, newbie->ipsa_spi,
3690 		    ALL_ZEROES_PTR, newbie->ipsa_dstaddr, af);
3691 		if (scratch != NULL) {
3692 			/* Collision in secondary table. */
3693 			sadb_unlinkassoc(newbie);  /* This does REFRELE. */
3694 			/* Set the error, since ipsec_getassocbyspi() can't. */
3695 			error = EEXIST;
3696 			goto error_unlock;
3697 		}
3698 	}
3699 
3700 	/* OKAY!  So let's do some reality check assertions. */
3701 
3702 	ASSERT(MUTEX_NOT_HELD(&newbie->ipsa_lock));
3703 	ASSERT(newbie_clone == NULL ||
3704 	    (MUTEX_NOT_HELD(&newbie_clone->ipsa_lock)));
3705 	/*
3706 	 * If hardware acceleration could happen, send it.
3707 	 */
3708 	if (ctl_mp != NULL) {
3709 		putnext(ip_q, ctl_mp);
3710 		ctl_mp = NULL;
3711 	}
3712 
3713 error_unlock:
3714 
3715 	/*
3716 	 * We can exit the locks in any order.	Only entrance needs to
3717 	 * follow any protocol.
3718 	 */
3719 	mutex_exit(&secondary->isaf_lock);
3720 	mutex_exit(&primary->isaf_lock);
3721 
3722 	if (pair_ext != NULL && error == 0) {
3723 		/* update pair_spi if it exists. */
3724 		ipsapp = get_ipsa_pair(assoc, srcext, dstext, spp);
3725 		if (ipsapp == NULL) {
3726 			error = ESRCH;
3727 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
3728 		} else if (ipsapp->ipsap_psa_ptr != NULL) {
3729 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_ALREADY;
3730 			error = EINVAL;
3731 		} else {
3732 			/* update_pairing() sets diagnostic */
3733 			error = update_pairing(ipsapp, ksi, diagnostic, spp);
3734 		}
3735 	}
3736 	/* Common error point for this routine. */
3737 error:
3738 	if (newbie != NULL) {
3739 		if (error != 0) {
3740 			/* This SA is broken, let the reaper clean up. */
3741 			mutex_enter(&newbie->ipsa_lock);
3742 			newbie->ipsa_state = IPSA_STATE_DEAD;
3743 			newbie->ipsa_hardexpiretime = 1;
3744 			mutex_exit(&newbie->ipsa_lock);
3745 		}
3746 		IPSA_REFRELE(newbie);
3747 	}
3748 	if (newbie_clone != NULL) {
3749 		IPSA_REFRELE(newbie_clone);
3750 	}
3751 	if (ctl_mp != NULL)
3752 		freemsg(ctl_mp);
3753 
3754 	if (error == 0) {
3755 		/*
3756 		 * Construct favorable PF_KEY return message and send to
3757 		 * keysock. Update the flags in the original keysock message
3758 		 * to reflect the actual flags in the new SA.
3759 		 *  (Q:  Do I need to pass "newbie"?  If I do,
3760 		 * make sure to REFHOLD, call, then REFRELE.)
3761 		 */
3762 		assoc->sadb_sa_flags = newbie->ipsa_flags;
3763 		sadb_pfkey_echo(pfkey_q, mp, samsg, ksi, NULL);
3764 	}
3765 
3766 	destroy_ipsa_pair(ipsapp);
3767 	return (error);
3768 }
3769 
3770 /*
3771  * Set the time of first use for a security association.  Update any
3772  * expiration times as a result.
3773  */
3774 void
3775 sadb_set_usetime(ipsa_t *assoc)
3776 {
3777 	time_t snapshot = gethrestime_sec();
3778 
3779 	mutex_enter(&assoc->ipsa_lock);
3780 	assoc->ipsa_lastuse = snapshot;
3781 	assoc->ipsa_idleexpiretime = snapshot + assoc->ipsa_idletime;
3782 
3783 	/*
3784 	 * Caller does check usetime before calling me usually, and
3785 	 * double-checking is better than a mutex_enter/exit hit.
3786 	 */
3787 	if (assoc->ipsa_usetime == 0) {
3788 		/*
3789 		 * This is redundant for outbound SA's, as
3790 		 * ipsec_getassocbyconn() sets the IPSA_F_USED flag already.
3791 		 * Inbound SAs, however, have no such protection.
3792 		 */
3793 		assoc->ipsa_flags |= IPSA_F_USED;
3794 		assoc->ipsa_usetime = snapshot;
3795 
3796 		/*
3797 		 * After setting the use time, see if we have a use lifetime
3798 		 * that would cause the actual SA expiration time to shorten.
3799 		 */
3800 		UPDATE_EXPIRE(assoc, softuselt, softexpiretime);
3801 		UPDATE_EXPIRE(assoc, harduselt, hardexpiretime);
3802 	}
3803 	mutex_exit(&assoc->ipsa_lock);
3804 }
3805 
3806 /*
3807  * Send up a PF_KEY expire message for this association.
3808  */
3809 static void
3810 sadb_expire_assoc(queue_t *pfkey_q, ipsa_t *assoc)
3811 {
3812 	mblk_t *mp, *mp1;
3813 	int alloclen, af;
3814 	sadb_msg_t *samsg;
3815 	sadb_lifetime_t *current, *expire;
3816 	sadb_sa_t *saext;
3817 	uint8_t *end;
3818 	boolean_t tunnel_mode;
3819 
3820 	ASSERT(MUTEX_HELD(&assoc->ipsa_lock));
3821 
3822 	/* Don't bother sending if there's no queue. */
3823 	if (pfkey_q == NULL)
3824 		return;
3825 
3826 	/* If the SA is one of a pair, only SOFT expire the OUTBOUND SA */
3827 	if (assoc->ipsa_state == IPSA_STATE_DYING &&
3828 	    (assoc->ipsa_flags & IPSA_F_PAIRED) &&
3829 	    !(assoc->ipsa_flags & IPSA_F_OUTBOUND)) {
3830 		return;
3831 	}
3832 
3833 	mp = sadb_keysock_out(0);
3834 	if (mp == NULL) {
3835 		/* cmn_err(CE_WARN, */
3836 		/*	"sadb_expire_assoc: Can't allocate KEYSOCK_OUT.\n"); */
3837 		return;
3838 	}
3839 
3840 	alloclen = sizeof (*samsg) + sizeof (*current) + sizeof (*expire) +
3841 	    2 * sizeof (sadb_address_t) + sizeof (*saext);
3842 
3843 	af = assoc->ipsa_addrfam;
3844 	switch (af) {
3845 	case AF_INET:
3846 		alloclen += 2 * sizeof (struct sockaddr_in);
3847 		break;
3848 	case AF_INET6:
3849 		alloclen += 2 * sizeof (struct sockaddr_in6);
3850 		break;
3851 	default:
3852 		/* Won't happen unless there's a kernel bug. */
3853 		freeb(mp);
3854 		cmn_err(CE_WARN,
3855 		    "sadb_expire_assoc: Unknown address length.\n");
3856 		return;
3857 	}
3858 
3859 	tunnel_mode = (assoc->ipsa_flags & IPSA_F_TUNNEL);
3860 	if (tunnel_mode) {
3861 		alloclen += 2 * sizeof (sadb_address_t);
3862 		switch (assoc->ipsa_innerfam) {
3863 		case AF_INET:
3864 			alloclen += 2 * sizeof (struct sockaddr_in);
3865 			break;
3866 		case AF_INET6:
3867 			alloclen += 2 * sizeof (struct sockaddr_in6);
3868 			break;
3869 		default:
3870 			/* Won't happen unless there's a kernel bug. */
3871 			freeb(mp);
3872 			cmn_err(CE_WARN, "sadb_expire_assoc: "
3873 			    "Unknown inner address length.\n");
3874 			return;
3875 		}
3876 	}
3877 
3878 	mp->b_cont = allocb(alloclen, BPRI_HI);
3879 	if (mp->b_cont == NULL) {
3880 		freeb(mp);
3881 		/* cmn_err(CE_WARN, */
3882 		/*	"sadb_expire_assoc: Can't allocate message.\n"); */
3883 		return;
3884 	}
3885 
3886 	mp1 = mp;
3887 	mp = mp->b_cont;
3888 	end = mp->b_wptr + alloclen;
3889 
3890 	samsg = (sadb_msg_t *)mp->b_wptr;
3891 	mp->b_wptr += sizeof (*samsg);
3892 	samsg->sadb_msg_version = PF_KEY_V2;
3893 	samsg->sadb_msg_type = SADB_EXPIRE;
3894 	samsg->sadb_msg_errno = 0;
3895 	samsg->sadb_msg_satype = assoc->ipsa_type;
3896 	samsg->sadb_msg_len = SADB_8TO64(alloclen);
3897 	samsg->sadb_msg_reserved = 0;
3898 	samsg->sadb_msg_seq = 0;
3899 	samsg->sadb_msg_pid = 0;
3900 
3901 	saext = (sadb_sa_t *)mp->b_wptr;
3902 	mp->b_wptr += sizeof (*saext);
3903 	saext->sadb_sa_len = SADB_8TO64(sizeof (*saext));
3904 	saext->sadb_sa_exttype = SADB_EXT_SA;
3905 	saext->sadb_sa_spi = assoc->ipsa_spi;
3906 	saext->sadb_sa_replay = assoc->ipsa_replay_wsize;
3907 	saext->sadb_sa_state = assoc->ipsa_state;
3908 	saext->sadb_sa_auth = assoc->ipsa_auth_alg;
3909 	saext->sadb_sa_encrypt = assoc->ipsa_encr_alg;
3910 	saext->sadb_sa_flags = assoc->ipsa_flags;
3911 
3912 	current = (sadb_lifetime_t *)mp->b_wptr;
3913 	mp->b_wptr += sizeof (sadb_lifetime_t);
3914 	current->sadb_lifetime_len = SADB_8TO64(sizeof (*current));
3915 	current->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
3916 	/* We do not support the concept. */
3917 	current->sadb_lifetime_allocations = 0;
3918 	current->sadb_lifetime_bytes = assoc->ipsa_bytes;
3919 	current->sadb_lifetime_addtime = assoc->ipsa_addtime;
3920 	current->sadb_lifetime_usetime = assoc->ipsa_usetime;
3921 
3922 	expire = (sadb_lifetime_t *)mp->b_wptr;
3923 	mp->b_wptr += sizeof (*expire);
3924 	expire->sadb_lifetime_len = SADB_8TO64(sizeof (*expire));
3925 
3926 	if (assoc->ipsa_state == IPSA_STATE_DEAD) {
3927 		expire->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
3928 		expire->sadb_lifetime_allocations = assoc->ipsa_hardalloc;
3929 		expire->sadb_lifetime_bytes = assoc->ipsa_hardbyteslt;
3930 		expire->sadb_lifetime_addtime = assoc->ipsa_hardaddlt;
3931 		expire->sadb_lifetime_usetime = assoc->ipsa_harduselt;
3932 	} else if (assoc->ipsa_state == IPSA_STATE_DYING) {
3933 		expire->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
3934 		expire->sadb_lifetime_allocations = assoc->ipsa_softalloc;
3935 		expire->sadb_lifetime_bytes = assoc->ipsa_softbyteslt;
3936 		expire->sadb_lifetime_addtime = assoc->ipsa_softaddlt;
3937 		expire->sadb_lifetime_usetime = assoc->ipsa_softuselt;
3938 	} else {
3939 		ASSERT(assoc->ipsa_state == IPSA_STATE_MATURE);
3940 		expire->sadb_lifetime_exttype = SADB_X_EXT_LIFETIME_IDLE;
3941 		expire->sadb_lifetime_allocations = 0;
3942 		expire->sadb_lifetime_bytes = 0;
3943 		expire->sadb_lifetime_addtime = assoc->ipsa_idleaddlt;
3944 		expire->sadb_lifetime_usetime = assoc->ipsa_idleuselt;
3945 	}
3946 
3947 	mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end, SADB_EXT_ADDRESS_SRC,
3948 	    af, assoc->ipsa_srcaddr, tunnel_mode ? 0 : SA_SRCPORT(assoc),
3949 	    SA_PROTO(assoc), 0);
3950 	ASSERT(mp->b_wptr != NULL);
3951 
3952 	mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end, SADB_EXT_ADDRESS_DST,
3953 	    af, assoc->ipsa_dstaddr, tunnel_mode ? 0 : SA_DSTPORT(assoc),
3954 	    SA_PROTO(assoc), 0);
3955 	ASSERT(mp->b_wptr != NULL);
3956 
3957 	if (tunnel_mode) {
3958 		mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end,
3959 		    SADB_X_EXT_ADDRESS_INNER_SRC, assoc->ipsa_innerfam,
3960 		    assoc->ipsa_innersrc, SA_SRCPORT(assoc), SA_IPROTO(assoc),
3961 		    assoc->ipsa_innersrcpfx);
3962 		ASSERT(mp->b_wptr != NULL);
3963 		mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end,
3964 		    SADB_X_EXT_ADDRESS_INNER_DST, assoc->ipsa_innerfam,
3965 		    assoc->ipsa_innerdst, SA_DSTPORT(assoc), SA_IPROTO(assoc),
3966 		    assoc->ipsa_innerdstpfx);
3967 		ASSERT(mp->b_wptr != NULL);
3968 	}
3969 
3970 	/* Can just putnext, we're ready to go! */
3971 	putnext(pfkey_q, mp1);
3972 }
3973 
3974 /*
3975  * "Age" the SA with the number of bytes that was used to protect traffic.
3976  * Send an SADB_EXPIRE message if appropriate.	Return B_TRUE if there was
3977  * enough "charge" left in the SA to protect the data.	Return B_FALSE
3978  * otherwise.  (If B_FALSE is returned, the association either was, or became
3979  * DEAD.)
3980  */
3981 boolean_t
3982 sadb_age_bytes(queue_t *pfkey_q, ipsa_t *assoc, uint64_t bytes,
3983     boolean_t sendmsg)
3984 {
3985 	boolean_t rc = B_TRUE;
3986 	uint64_t newtotal;
3987 
3988 	mutex_enter(&assoc->ipsa_lock);
3989 	newtotal = assoc->ipsa_bytes + bytes;
3990 	if (assoc->ipsa_hardbyteslt != 0 &&
3991 	    newtotal >= assoc->ipsa_hardbyteslt) {
3992 		if (assoc->ipsa_state != IPSA_STATE_DEAD) {
3993 			sadb_delete_cluster(assoc);
3994 			/*
3995 			 * Send EXPIRE message to PF_KEY.  May wish to pawn
3996 			 * this off on another non-interrupt thread.  Also
3997 			 * unlink this SA immediately.
3998 			 */
3999 			assoc->ipsa_state = IPSA_STATE_DEAD;
4000 			if (sendmsg)
4001 				sadb_expire_assoc(pfkey_q, assoc);
4002 			/*
4003 			 * Set non-zero expiration time so sadb_age_assoc()
4004 			 * will work when reaping.
4005 			 */
4006 			assoc->ipsa_hardexpiretime = (time_t)1;
4007 		} /* Else someone beat me to it! */
4008 		rc = B_FALSE;
4009 	} else if (assoc->ipsa_softbyteslt != 0 &&
4010 	    (newtotal >= assoc->ipsa_softbyteslt)) {
4011 		if (assoc->ipsa_state < IPSA_STATE_DYING) {
4012 			/*
4013 			 * Send EXPIRE message to PF_KEY.  May wish to pawn
4014 			 * this off on another non-interrupt thread.
4015 			 */
4016 			assoc->ipsa_state = IPSA_STATE_DYING;
4017 			assoc->ipsa_bytes = newtotal;
4018 			if (sendmsg)
4019 				sadb_expire_assoc(pfkey_q, assoc);
4020 		} /* Else someone beat me to it! */
4021 	}
4022 	if (rc == B_TRUE)
4023 		assoc->ipsa_bytes = newtotal;
4024 	mutex_exit(&assoc->ipsa_lock);
4025 	return (rc);
4026 }
4027 
4028 /*
4029  * Push one or more DL_CO_DELETE messages queued up by
4030  * sadb_torch_assoc down to the underlying driver now that it's a
4031  * convenient time for it (i.e., ipsa bucket locks not held).
4032  */
4033 static void
4034 sadb_drain_torchq(queue_t *q, mblk_t *mp)
4035 {
4036 	while (mp != NULL) {
4037 		mblk_t *next = mp->b_next;
4038 		mp->b_next = NULL;
4039 		if (q != NULL)
4040 			putnext(q, mp);
4041 		else
4042 			freemsg(mp);
4043 		mp = next;
4044 	}
4045 }
4046 
4047 /*
4048  * "Torch" an individual SA.  Returns NULL, so it can be tail-called from
4049  *     sadb_age_assoc().
4050  *
4051  * If SA is hardware-accelerated, and we can't allocate the mblk
4052  * containing the DL_CO_DELETE, just return; it will remain in the
4053  * table and be swept up by sadb_ager() in a subsequent pass.
4054  */
4055 static ipsa_t *
4056 sadb_torch_assoc(isaf_t *head, ipsa_t *sa, boolean_t inbnd, mblk_t **mq)
4057 {
4058 	mblk_t *mp;
4059 
4060 	ASSERT(MUTEX_HELD(&head->isaf_lock));
4061 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
4062 	ASSERT(sa->ipsa_state == IPSA_STATE_DEAD);
4063 
4064 	/*
4065 	 * Force cached SAs to be revalidated..
4066 	 */
4067 	head->isaf_gen++;
4068 
4069 	if (sa->ipsa_flags & IPSA_F_HW) {
4070 		mp = sadb_fmt_sa_req(DL_CO_DELETE, sa->ipsa_type, sa, inbnd);
4071 		if (mp == NULL) {
4072 			mutex_exit(&sa->ipsa_lock);
4073 			return (NULL);
4074 		}
4075 		mp->b_next = *mq;
4076 		*mq = mp;
4077 	}
4078 	mutex_exit(&sa->ipsa_lock);
4079 	sadb_unlinkassoc(sa);
4080 
4081 	return (NULL);
4082 }
4083 
4084 /*
4085  * Do various SA-is-idle activities depending on delta (the number of idle
4086  * seconds on the SA) and/or other properties of the SA.
4087  *
4088  * Return B_TRUE if I've sent a packet, because I have to drop the
4089  * association's mutex before sending a packet out the wire.
4090  */
4091 /* ARGSUSED */
4092 static boolean_t
4093 sadb_idle_activities(ipsa_t *assoc, time_t delta, boolean_t inbound)
4094 {
4095 	ipsecesp_stack_t *espstack = assoc->ipsa_netstack->netstack_ipsecesp;
4096 	int nat_t_interval = espstack->ipsecesp_nat_keepalive_interval;
4097 
4098 	ASSERT(MUTEX_HELD(&assoc->ipsa_lock));
4099 
4100 	if (!inbound && (assoc->ipsa_flags & IPSA_F_NATT_LOC) &&
4101 	    delta >= nat_t_interval &&
4102 	    gethrestime_sec() - assoc->ipsa_last_nat_t_ka >= nat_t_interval) {
4103 		ASSERT(assoc->ipsa_type == SADB_SATYPE_ESP);
4104 		assoc->ipsa_last_nat_t_ka = gethrestime_sec();
4105 		mutex_exit(&assoc->ipsa_lock);
4106 		ipsecesp_send_keepalive(assoc);
4107 		return (B_TRUE);
4108 	}
4109 	return (B_FALSE);
4110 }
4111 
4112 /*
4113  * Return "assoc" if haspeer is true and I send an expire.  This allows
4114  * the consumers' aging functions to tidy up an expired SA's peer.
4115  */
4116 static ipsa_t *
4117 sadb_age_assoc(isaf_t *head, queue_t *pfkey_q, ipsa_t *assoc,
4118     time_t current, int reap_delay, boolean_t inbound, mblk_t **mq)
4119 {
4120 	ipsa_t *retval = NULL;
4121 	boolean_t dropped_mutex = B_FALSE;
4122 
4123 	ASSERT(MUTEX_HELD(&head->isaf_lock));
4124 
4125 	mutex_enter(&assoc->ipsa_lock);
4126 
4127 	if (((assoc->ipsa_state == IPSA_STATE_LARVAL) ||
4128 	    ((assoc->ipsa_state == IPSA_STATE_IDLE) ||
4129 	    (assoc->ipsa_state == IPSA_STATE_ACTIVE_ELSEWHERE) &&
4130 	    (assoc->ipsa_hardexpiretime != 0))) &&
4131 	    (assoc->ipsa_hardexpiretime <= current)) {
4132 		assoc->ipsa_state = IPSA_STATE_DEAD;
4133 		return (sadb_torch_assoc(head, assoc, inbound, mq));
4134 	}
4135 
4136 	/*
4137 	 * Check lifetimes.  Fortunately, SA setup is done
4138 	 * such that there are only two times to look at,
4139 	 * softexpiretime, and hardexpiretime.
4140 	 *
4141 	 * Check hard first.
4142 	 */
4143 
4144 	if (assoc->ipsa_hardexpiretime != 0 &&
4145 	    assoc->ipsa_hardexpiretime <= current) {
4146 		if (assoc->ipsa_state == IPSA_STATE_DEAD)
4147 			return (sadb_torch_assoc(head, assoc, inbound, mq));
4148 
4149 		if (inbound) {
4150 			sadb_delete_cluster(assoc);
4151 		}
4152 
4153 		/*
4154 		 * Send SADB_EXPIRE with hard lifetime, delay for unlinking.
4155 		 */
4156 		assoc->ipsa_state = IPSA_STATE_DEAD;
4157 		if (assoc->ipsa_haspeer || assoc->ipsa_otherspi != 0) {
4158 			/*
4159 			 * If the SA is paired or peered with another, put
4160 			 * a copy on a list which can be processed later, the
4161 			 * pair/peer SA needs to be updated so the both die
4162 			 * at the same time.
4163 			 *
4164 			 * If I return assoc, I have to bump up its reference
4165 			 * count to keep with the ipsa_t reference count
4166 			 * semantics.
4167 			 */
4168 			IPSA_REFHOLD(assoc);
4169 			retval = assoc;
4170 		}
4171 		sadb_expire_assoc(pfkey_q, assoc);
4172 		assoc->ipsa_hardexpiretime = current + reap_delay;
4173 	} else if (assoc->ipsa_softexpiretime != 0 &&
4174 	    assoc->ipsa_softexpiretime <= current &&
4175 	    assoc->ipsa_state < IPSA_STATE_DYING) {
4176 		/*
4177 		 * Send EXPIRE message to PF_KEY.  May wish to pawn
4178 		 * this off on another non-interrupt thread.
4179 		 */
4180 		assoc->ipsa_state = IPSA_STATE_DYING;
4181 		if (assoc->ipsa_haspeer) {
4182 			/*
4183 			 * If the SA has a peer, update the peer's state
4184 			 * on SOFT_EXPIRE, this is mostly to prevent two
4185 			 * expire messages from effectively the same SA.
4186 			 *
4187 			 * Don't care about paired SA's, then can (and should)
4188 			 * be able to soft expire at different times.
4189 			 *
4190 			 * If I return assoc, I have to bump up its
4191 			 * reference count to keep with the ipsa_t reference
4192 			 * count semantics.
4193 			 */
4194 			IPSA_REFHOLD(assoc);
4195 			retval = assoc;
4196 		}
4197 		sadb_expire_assoc(pfkey_q, assoc);
4198 	} else if (assoc->ipsa_idletime != 0 &&
4199 	    assoc->ipsa_idleexpiretime <= current) {
4200 		if (assoc->ipsa_state == IPSA_STATE_ACTIVE_ELSEWHERE) {
4201 			assoc->ipsa_state = IPSA_STATE_IDLE;
4202 		}
4203 
4204 		/*
4205 		 * Need to handle Mature case
4206 		 */
4207 		if (assoc->ipsa_state == IPSA_STATE_MATURE) {
4208 			sadb_expire_assoc(pfkey_q, assoc);
4209 		}
4210 	} else {
4211 		/* Check idle time activities. */
4212 		dropped_mutex = sadb_idle_activities(assoc,
4213 		    current - assoc->ipsa_lastuse, inbound);
4214 	}
4215 
4216 	if (!dropped_mutex)
4217 		mutex_exit(&assoc->ipsa_lock);
4218 	return (retval);
4219 }
4220 
4221 /*
4222  * Called by a consumer protocol to do ther dirty work of reaping dead
4223  * Security Associations.
4224  *
4225  * NOTE: sadb_age_assoc() marks expired SA's as DEAD but only removed
4226  * SA's that are already marked DEAD, so expired SA's are only reaped
4227  * the second time sadb_ager() runs.
4228  */
4229 void
4230 sadb_ager(sadb_t *sp, queue_t *pfkey_q, queue_t *ip_q, int reap_delay,
4231     netstack_t *ns)
4232 {
4233 	int i;
4234 	isaf_t *bucket;
4235 	ipsa_t *assoc, *spare;
4236 	iacqf_t *acqlist;
4237 	ipsacq_t *acqrec, *spareacq;
4238 	templist_t *haspeerlist, *newbie;
4239 	/* Snapshot current time now. */
4240 	time_t current = gethrestime_sec();
4241 	mblk_t *mq = NULL;
4242 	haspeerlist = NULL;
4243 
4244 	/*
4245 	 * Do my dirty work.  This includes aging real entries, aging
4246 	 * larvals, and aging outstanding ACQUIREs.
4247 	 *
4248 	 * I hope I don't tie up resources for too long.
4249 	 */
4250 
4251 	/* Age acquires. */
4252 
4253 	for (i = 0; i < sp->sdb_hashsize; i++) {
4254 		acqlist = &sp->sdb_acq[i];
4255 		mutex_enter(&acqlist->iacqf_lock);
4256 		for (acqrec = acqlist->iacqf_ipsacq; acqrec != NULL;
4257 		    acqrec = spareacq) {
4258 			spareacq = acqrec->ipsacq_next;
4259 			if (current > acqrec->ipsacq_expire)
4260 				sadb_destroy_acquire(acqrec, ns);
4261 		}
4262 		mutex_exit(&acqlist->iacqf_lock);
4263 	}
4264 
4265 	/* Age inbound associations. */
4266 	for (i = 0; i < sp->sdb_hashsize; i++) {
4267 		bucket = &(sp->sdb_if[i]);
4268 		mutex_enter(&bucket->isaf_lock);
4269 		for (assoc = bucket->isaf_ipsa; assoc != NULL;
4270 		    assoc = spare) {
4271 			spare = assoc->ipsa_next;
4272 			if (sadb_age_assoc(bucket, pfkey_q, assoc, current,
4273 			    reap_delay, B_TRUE, &mq) != NULL) {
4274 				/*
4275 				 * Put SA's which have a peer or SA's which
4276 				 * are paired on a list for processing after
4277 				 * all the hash tables have been walked.
4278 				 *
4279 				 * sadb_age_assoc() increments the refcnt,
4280 				 * effectively doing an IPSA_REFHOLD().
4281 				 */
4282 				newbie = kmem_alloc(sizeof (*newbie),
4283 				    KM_NOSLEEP);
4284 				if (newbie == NULL) {
4285 					/*
4286 					 * Don't forget to REFRELE().
4287 					 */
4288 					IPSA_REFRELE(assoc);
4289 					continue;	/* for loop... */
4290 				}
4291 				newbie->next = haspeerlist;
4292 				newbie->ipsa = assoc;
4293 				haspeerlist = newbie;
4294 			}
4295 		}
4296 		mutex_exit(&bucket->isaf_lock);
4297 	}
4298 
4299 	if (mq != NULL) {
4300 		sadb_drain_torchq(ip_q, mq);
4301 		mq = NULL;
4302 	}
4303 	age_pair_peer_list(haspeerlist, sp, B_FALSE);
4304 	haspeerlist = NULL;
4305 
4306 	/* Age outbound associations. */
4307 	for (i = 0; i < sp->sdb_hashsize; i++) {
4308 		bucket = &(sp->sdb_of[i]);
4309 		mutex_enter(&bucket->isaf_lock);
4310 		for (assoc = bucket->isaf_ipsa; assoc != NULL;
4311 		    assoc = spare) {
4312 			spare = assoc->ipsa_next;
4313 			if (sadb_age_assoc(bucket, pfkey_q, assoc, current,
4314 			    reap_delay, B_FALSE, &mq) != NULL) {
4315 				/*
4316 				 * sadb_age_assoc() increments the refcnt,
4317 				 * effectively doing an IPSA_REFHOLD().
4318 				 */
4319 				newbie = kmem_alloc(sizeof (*newbie),
4320 				    KM_NOSLEEP);
4321 				if (newbie == NULL) {
4322 					/*
4323 					 * Don't forget to REFRELE().
4324 					 */
4325 					IPSA_REFRELE(assoc);
4326 					continue;	/* for loop... */
4327 				}
4328 				newbie->next = haspeerlist;
4329 				newbie->ipsa = assoc;
4330 				haspeerlist = newbie;
4331 			}
4332 		}
4333 		mutex_exit(&bucket->isaf_lock);
4334 	}
4335 	if (mq != NULL) {
4336 		sadb_drain_torchq(ip_q, mq);
4337 		mq = NULL;
4338 	}
4339 
4340 	age_pair_peer_list(haspeerlist, sp, B_TRUE);
4341 
4342 	/*
4343 	 * Run a GC pass to clean out dead identities.
4344 	 */
4345 	ipsid_gc(ns);
4346 }
4347 
4348 /*
4349  * Figure out when to reschedule the ager.
4350  */
4351 timeout_id_t
4352 sadb_retimeout(hrtime_t begin, queue_t *pfkey_q, void (*ager)(void *),
4353     void *agerarg, uint_t *intp, uint_t intmax, short mid)
4354 {
4355 	hrtime_t end = gethrtime();
4356 	uint_t interval = *intp;
4357 
4358 	/*
4359 	 * See how long this took.  If it took too long, increase the
4360 	 * aging interval.
4361 	 */
4362 	if ((end - begin) > interval * 1000000) {
4363 		if (interval >= intmax) {
4364 			/* XXX Rate limit this?  Or recommend flush? */
4365 			(void) strlog(mid, 0, 0, SL_ERROR | SL_WARN,
4366 			    "Too many SA's to age out in %d msec.\n",
4367 			    intmax);
4368 		} else {
4369 			/* Double by shifting by one bit. */
4370 			interval <<= 1;
4371 			interval = min(interval, intmax);
4372 		}
4373 	} else if ((end - begin) <= interval * 500000 &&
4374 	    interval > SADB_AGE_INTERVAL_DEFAULT) {
4375 		/*
4376 		 * If I took less than half of the interval, then I should
4377 		 * ratchet the interval back down.  Never automatically
4378 		 * shift below the default aging interval.
4379 		 *
4380 		 * NOTE:This even overrides manual setting of the age
4381 		 *	interval using NDD.
4382 		 */
4383 		/* Halve by shifting one bit. */
4384 		interval >>= 1;
4385 		interval = max(interval, SADB_AGE_INTERVAL_DEFAULT);
4386 	}
4387 	*intp = interval;
4388 	return (qtimeout(pfkey_q, ager, agerarg,
4389 	    interval * drv_usectohz(1000)));
4390 }
4391 
4392 
4393 /*
4394  * Update the lifetime values of an SA.	 This is the path an SADB_UPDATE
4395  * message takes when updating a MATURE or DYING SA.
4396  */
4397 static void
4398 sadb_update_lifetimes(ipsa_t *assoc, sadb_lifetime_t *hard,
4399     sadb_lifetime_t *soft, sadb_lifetime_t *idle, boolean_t outbound)
4400 {
4401 	mutex_enter(&assoc->ipsa_lock);
4402 
4403 	/*
4404 	 * XXX RFC 2367 mentions how an SADB_EXT_LIFETIME_CURRENT can be
4405 	 * passed in during an update message.	We currently don't handle
4406 	 * these.
4407 	 */
4408 
4409 	if (hard != NULL) {
4410 		if (hard->sadb_lifetime_bytes != 0)
4411 			assoc->ipsa_hardbyteslt = hard->sadb_lifetime_bytes;
4412 		if (hard->sadb_lifetime_usetime != 0)
4413 			assoc->ipsa_harduselt = hard->sadb_lifetime_usetime;
4414 		if (hard->sadb_lifetime_addtime != 0)
4415 			assoc->ipsa_hardaddlt = hard->sadb_lifetime_addtime;
4416 		if (assoc->ipsa_hardaddlt != 0) {
4417 			assoc->ipsa_hardexpiretime =
4418 			    assoc->ipsa_addtime + assoc->ipsa_hardaddlt;
4419 		}
4420 		if (assoc->ipsa_harduselt != 0 &&
4421 		    assoc->ipsa_flags & IPSA_F_USED) {
4422 			UPDATE_EXPIRE(assoc, harduselt, hardexpiretime);
4423 		}
4424 		if (hard->sadb_lifetime_allocations != 0)
4425 			assoc->ipsa_hardalloc = hard->sadb_lifetime_allocations;
4426 	}
4427 
4428 	if (soft != NULL) {
4429 		if (soft->sadb_lifetime_bytes != 0) {
4430 			if (soft->sadb_lifetime_bytes >
4431 			    assoc->ipsa_hardbyteslt) {
4432 				assoc->ipsa_softbyteslt =
4433 				    assoc->ipsa_hardbyteslt;
4434 			} else {
4435 				assoc->ipsa_softbyteslt =
4436 				    soft->sadb_lifetime_bytes;
4437 			}
4438 		}
4439 		if (soft->sadb_lifetime_usetime != 0) {
4440 			if (soft->sadb_lifetime_usetime >
4441 			    assoc->ipsa_harduselt) {
4442 				assoc->ipsa_softuselt =
4443 				    assoc->ipsa_harduselt;
4444 			} else {
4445 				assoc->ipsa_softuselt =
4446 				    soft->sadb_lifetime_usetime;
4447 			}
4448 		}
4449 		if (soft->sadb_lifetime_addtime != 0) {
4450 			if (soft->sadb_lifetime_addtime >
4451 			    assoc->ipsa_hardexpiretime) {
4452 				assoc->ipsa_softexpiretime =
4453 				    assoc->ipsa_hardexpiretime;
4454 			} else {
4455 				assoc->ipsa_softaddlt =
4456 				    soft->sadb_lifetime_addtime;
4457 			}
4458 		}
4459 		if (assoc->ipsa_softaddlt != 0) {
4460 			assoc->ipsa_softexpiretime =
4461 			    assoc->ipsa_addtime + assoc->ipsa_softaddlt;
4462 		}
4463 		if (assoc->ipsa_softuselt != 0 &&
4464 		    assoc->ipsa_flags & IPSA_F_USED) {
4465 			UPDATE_EXPIRE(assoc, softuselt, softexpiretime);
4466 		}
4467 		if (outbound && assoc->ipsa_softexpiretime != 0) {
4468 			if (assoc->ipsa_state == IPSA_STATE_MATURE)
4469 				lifetime_fuzz(assoc);
4470 		}
4471 
4472 		if (soft->sadb_lifetime_allocations != 0)
4473 			assoc->ipsa_softalloc = soft->sadb_lifetime_allocations;
4474 	}
4475 
4476 	if (idle != NULL) {
4477 		time_t current = gethrestime_sec();
4478 		if ((assoc->ipsa_idleexpiretime <= current) &&
4479 		    (assoc->ipsa_idleaddlt == idle->sadb_lifetime_addtime)) {
4480 			assoc->ipsa_idleexpiretime =
4481 			    current + assoc->ipsa_idleaddlt;
4482 		}
4483 		if (idle->sadb_lifetime_addtime != 0)
4484 			assoc->ipsa_idleaddlt = idle->sadb_lifetime_addtime;
4485 		if (idle->sadb_lifetime_usetime != 0)
4486 			assoc->ipsa_idleuselt = idle->sadb_lifetime_usetime;
4487 		if (assoc->ipsa_idleaddlt != 0) {
4488 			assoc->ipsa_idleexpiretime =
4489 			    current + idle->sadb_lifetime_addtime;
4490 			assoc->ipsa_idletime = idle->sadb_lifetime_addtime;
4491 		}
4492 		if (assoc->ipsa_idleuselt != 0) {
4493 			if (assoc->ipsa_idletime != 0) {
4494 				assoc->ipsa_idletime = min(assoc->ipsa_idletime,
4495 				    assoc->ipsa_idleuselt);
4496 			assoc->ipsa_idleexpiretime =
4497 			    current + assoc->ipsa_idletime;
4498 			} else {
4499 				assoc->ipsa_idleexpiretime =
4500 				    current + assoc->ipsa_idleuselt;
4501 				assoc->ipsa_idletime = assoc->ipsa_idleuselt;
4502 			}
4503 		}
4504 	}
4505 	mutex_exit(&assoc->ipsa_lock);
4506 }
4507 
4508 static int
4509 sadb_update_state(ipsa_t *assoc, uint_t new_state, mblk_t **ipkt_lst)
4510 {
4511 	int rcode = 0;
4512 	time_t current = gethrestime_sec();
4513 
4514 	mutex_enter(&assoc->ipsa_lock);
4515 
4516 	switch (new_state) {
4517 	case SADB_X_SASTATE_ACTIVE_ELSEWHERE:
4518 		if (assoc->ipsa_state == SADB_X_SASTATE_IDLE) {
4519 			assoc->ipsa_state = IPSA_STATE_ACTIVE_ELSEWHERE;
4520 			assoc->ipsa_idleexpiretime =
4521 			    current + assoc->ipsa_idletime;
4522 		}
4523 		break;
4524 	case SADB_X_SASTATE_IDLE:
4525 		if (assoc->ipsa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE) {
4526 			assoc->ipsa_state = IPSA_STATE_IDLE;
4527 			assoc->ipsa_idleexpiretime =
4528 			    current + assoc->ipsa_idletime;
4529 		} else {
4530 			rcode = EINVAL;
4531 		}
4532 		break;
4533 
4534 	case SADB_X_SASTATE_ACTIVE:
4535 		if (assoc->ipsa_state != SADB_X_SASTATE_IDLE) {
4536 			rcode = EINVAL;
4537 			break;
4538 		}
4539 		assoc->ipsa_state = IPSA_STATE_MATURE;
4540 		assoc->ipsa_idleexpiretime = current + assoc->ipsa_idletime;
4541 
4542 		if (ipkt_lst == NULL) {
4543 			break;
4544 		}
4545 
4546 		if (assoc->ipsa_bpkt_head != NULL) {
4547 			*ipkt_lst = assoc->ipsa_bpkt_head;
4548 			assoc->ipsa_bpkt_head = assoc->ipsa_bpkt_tail = NULL;
4549 			assoc->ipsa_mblkcnt = 0;
4550 		} else {
4551 			*ipkt_lst = NULL;
4552 		}
4553 		break;
4554 	default:
4555 		rcode = EINVAL;
4556 		break;
4557 	}
4558 
4559 	mutex_exit(&assoc->ipsa_lock);
4560 	return (rcode);
4561 }
4562 
4563 /*
4564  * Common code to update an SA.
4565  */
4566 
4567 int
4568 sadb_update_sa(mblk_t *mp, keysock_in_t *ksi, mblk_t **ipkt_lst,
4569     sadbp_t *spp, int *diagnostic, queue_t *pfkey_q,
4570     int (*add_sa_func)(mblk_t *, keysock_in_t *, int *, netstack_t *),
4571     netstack_t *ns, uint8_t sadb_msg_type)
4572 {
4573 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
4574 	sadb_address_t *srcext =
4575 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
4576 	sadb_address_t *dstext =
4577 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
4578 	sadb_x_kmc_t *kmcext =
4579 	    (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
4580 	sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
4581 	sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
4582 	sadb_x_replay_ctr_t *replext =
4583 	    (sadb_x_replay_ctr_t *)ksi->ks_in_extv[SADB_X_EXT_REPLAY_VALUE];
4584 	sadb_lifetime_t *soft =
4585 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
4586 	sadb_lifetime_t *hard =
4587 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
4588 	sadb_lifetime_t *idle =
4589 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE];
4590 	sadb_x_pair_t *pair_ext =
4591 	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
4592 	ipsa_t *echo_target = NULL;
4593 	int error = 0;
4594 	ipsap_t *ipsapp = NULL;
4595 	uint32_t kmp = 0, kmc = 0;
4596 	time_t current = gethrestime_sec();
4597 
4598 
4599 	/* I need certain extensions present for either UPDATE message. */
4600 	if (srcext == NULL) {
4601 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
4602 		return (EINVAL);
4603 	}
4604 	if (dstext == NULL) {
4605 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
4606 		return (EINVAL);
4607 	}
4608 	if (assoc == NULL) {
4609 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
4610 		return (EINVAL);
4611 	}
4612 
4613 	if (kmcext != NULL) {
4614 		kmp = kmcext->sadb_x_kmc_proto;
4615 		kmc = kmcext->sadb_x_kmc_cookie;
4616 	}
4617 
4618 	ipsapp = get_ipsa_pair(assoc, srcext, dstext, spp);
4619 	if (ipsapp == NULL) {
4620 		*diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND;
4621 		return (ESRCH);
4622 	}
4623 
4624 	if (ipsapp->ipsap_psa_ptr == NULL && ipsapp->ipsap_sa_ptr != NULL) {
4625 		if (ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) {
4626 			/*
4627 			 * REFRELE the target and let the add_sa_func()
4628 			 * deal with updating a larval SA.
4629 			 */
4630 			destroy_ipsa_pair(ipsapp);
4631 			return (add_sa_func(mp, ksi, diagnostic, ns));
4632 		}
4633 	}
4634 
4635 	if (assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE) {
4636 		if (ipsapp->ipsap_sa_ptr != NULL &&
4637 		    ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_IDLE) {
4638 			if ((error = sadb_update_state(ipsapp->ipsap_sa_ptr,
4639 			    assoc->sadb_sa_state, NULL)) != 0) {
4640 				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4641 				goto bail;
4642 			}
4643 		}
4644 		if (ipsapp->ipsap_psa_ptr != NULL &&
4645 		    ipsapp->ipsap_psa_ptr->ipsa_state == IPSA_STATE_IDLE) {
4646 			if ((error = sadb_update_state(ipsapp->ipsap_psa_ptr,
4647 			    assoc->sadb_sa_state, NULL)) != 0) {
4648 				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4649 				goto bail;
4650 			}
4651 		}
4652 	}
4653 	if (assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE) {
4654 		if (ipsapp->ipsap_sa_ptr != NULL) {
4655 			error = sadb_update_state(ipsapp->ipsap_sa_ptr,
4656 			    assoc->sadb_sa_state,
4657 			    (ipsapp->ipsap_sa_ptr->ipsa_flags &
4658 			    IPSA_F_INBOUND) ? ipkt_lst : NULL);
4659 			if (error) {
4660 				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4661 				goto bail;
4662 			}
4663 		}
4664 		if (ipsapp->ipsap_psa_ptr != NULL) {
4665 			error = sadb_update_state(ipsapp->ipsap_psa_ptr,
4666 			    assoc->sadb_sa_state,
4667 			    (ipsapp->ipsap_psa_ptr->ipsa_flags &
4668 			    IPSA_F_INBOUND) ? ipkt_lst : NULL);
4669 			if (error) {
4670 				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4671 				goto bail;
4672 			}
4673 		}
4674 		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
4675 		    ksi, echo_target);
4676 		goto bail;
4677 	}
4678 
4679 	/*
4680 	 * Reality checks for updates of active associations.
4681 	 * Sundry first-pass UPDATE-specific reality checks.
4682 	 * Have to do the checks here, because it's after the add_sa code.
4683 	 * XXX STATS : logging/stats here?
4684 	 */
4685 
4686 	if (!((assoc->sadb_sa_state == SADB_SASTATE_MATURE) ||
4687 	    (assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE))) {
4688 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4689 		error = EINVAL;
4690 		goto bail;
4691 	}
4692 
4693 	if (assoc->sadb_sa_flags & ~spp->s_updateflags) {
4694 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
4695 		error = EINVAL;
4696 		goto bail;
4697 	}
4698 
4699 	if (ksi->ks_in_extv[SADB_EXT_LIFETIME_CURRENT] != NULL) {
4700 		error = EOPNOTSUPP;
4701 		goto bail;
4702 	}
4703 
4704 	if ((*diagnostic = sadb_hardsoftchk(hard, soft, idle)) != 0) {
4705 		error = EINVAL;
4706 		goto bail;
4707 	}
4708 	if (akey != NULL) {
4709 		*diagnostic = SADB_X_DIAGNOSTIC_AKEY_PRESENT;
4710 		error = EINVAL;
4711 		goto bail;
4712 	}
4713 	if (ekey != NULL) {
4714 		*diagnostic = SADB_X_DIAGNOSTIC_EKEY_PRESENT;
4715 		error = EINVAL;
4716 		goto bail;
4717 	}
4718 
4719 	if (ipsapp->ipsap_sa_ptr != NULL) {
4720 		if (ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_DEAD) {
4721 			error = ESRCH;	/* DEAD == Not there, in this case. */
4722 			*diagnostic = SADB_X_DIAGNOSTIC_SA_EXPIRED;
4723 			goto bail;
4724 		}
4725 		if ((kmp != 0) &&
4726 		    ((ipsapp->ipsap_sa_ptr->ipsa_kmp != 0) ||
4727 		    (ipsapp->ipsap_sa_ptr->ipsa_kmp != kmp))) {
4728 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMP;
4729 			error = EINVAL;
4730 			goto bail;
4731 		}
4732 		if ((kmc != 0) &&
4733 		    ((ipsapp->ipsap_sa_ptr->ipsa_kmc != 0) ||
4734 		    (ipsapp->ipsap_sa_ptr->ipsa_kmc != kmc))) {
4735 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMC;
4736 			error = EINVAL;
4737 			goto bail;
4738 		}
4739 		/*
4740 		 * Do not allow replay value change for MATURE or LARVAL SA.
4741 		 */
4742 
4743 		if ((replext != NULL) &&
4744 		    ((ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) ||
4745 		    (ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_MATURE))) {
4746 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4747 			error = EINVAL;
4748 			goto bail;
4749 		}
4750 	}
4751 
4752 	if (ipsapp->ipsap_psa_ptr != NULL) {
4753 		if (ipsapp->ipsap_psa_ptr->ipsa_state == IPSA_STATE_DEAD) {
4754 			*diagnostic = SADB_X_DIAGNOSTIC_SA_EXPIRED;
4755 			error = ESRCH;	/* DEAD == Not there, in this case. */
4756 			goto bail;
4757 		}
4758 		if ((kmp != 0) &&
4759 		    ((ipsapp->ipsap_psa_ptr->ipsa_kmp != 0) ||
4760 		    (ipsapp->ipsap_psa_ptr->ipsa_kmp != kmp))) {
4761 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMP;
4762 			error = EINVAL;
4763 			goto bail;
4764 		}
4765 		if ((kmc != 0) &&
4766 		    ((ipsapp->ipsap_psa_ptr->ipsa_kmc != 0) ||
4767 		    (ipsapp->ipsap_psa_ptr->ipsa_kmc != kmc))) {
4768 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMC;
4769 			error = EINVAL;
4770 			goto bail;
4771 		}
4772 	}
4773 
4774 	if (ipsapp->ipsap_sa_ptr != NULL) {
4775 		sadb_update_lifetimes(ipsapp->ipsap_sa_ptr, hard, soft,
4776 		    idle, B_TRUE);
4777 		if (kmp != 0)
4778 			ipsapp->ipsap_sa_ptr->ipsa_kmp = kmp;
4779 		if (kmc != 0)
4780 			ipsapp->ipsap_sa_ptr->ipsa_kmc = kmc;
4781 		if ((replext != NULL) &&
4782 		    (ipsapp->ipsap_sa_ptr->ipsa_replay_wsize != 0)) {
4783 			/*
4784 			 * If an inbound SA, update the replay counter
4785 			 * and check off all the other sequence number
4786 			 */
4787 			if (ksi->ks_in_dsttype == KS_IN_ADDR_ME) {
4788 				if (!sadb_replay_check(ipsapp->ipsap_sa_ptr,
4789 				    replext->sadb_x_rc_replay32)) {
4790 					error = EINVAL;
4791 					goto bail;
4792 				}
4793 				mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4794 				ipsapp->ipsap_sa_ptr->ipsa_idleexpiretime =
4795 				    current +
4796 				    ipsapp->ipsap_sa_ptr->ipsa_idletime;
4797 				mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4798 			} else {
4799 				mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4800 				ipsapp->ipsap_sa_ptr->ipsa_replay =
4801 				    replext->sadb_x_rc_replay32;
4802 				ipsapp->ipsap_sa_ptr->ipsa_idleexpiretime =
4803 				    current +
4804 				    ipsapp->ipsap_sa_ptr->ipsa_idletime;
4805 				mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4806 			}
4807 		}
4808 	}
4809 
4810 	if (sadb_msg_type == SADB_X_UPDATEPAIR) {
4811 		if (ipsapp->ipsap_psa_ptr != NULL) {
4812 			sadb_update_lifetimes(ipsapp->ipsap_psa_ptr, hard, soft,
4813 			    idle, B_FALSE);
4814 			if (kmp != 0)
4815 				ipsapp->ipsap_psa_ptr->ipsa_kmp = kmp;
4816 			if (kmc != 0)
4817 				ipsapp->ipsap_psa_ptr->ipsa_kmc = kmc;
4818 		} else {
4819 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
4820 			error = ESRCH;
4821 			goto bail;
4822 		}
4823 	}
4824 
4825 	if (pair_ext != NULL)
4826 		error = update_pairing(ipsapp, ksi, diagnostic, spp);
4827 
4828 	if (error == 0)
4829 		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
4830 		    ksi, echo_target);
4831 bail:
4832 
4833 	destroy_ipsa_pair(ipsapp);
4834 
4835 	return (error);
4836 }
4837 
4838 
4839 int
4840 update_pairing(ipsap_t *ipsapp, keysock_in_t *ksi, int *diagnostic,
4841     sadbp_t *spp)
4842 {
4843 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
4844 	sadb_address_t *srcext =
4845 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
4846 	sadb_address_t *dstext =
4847 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
4848 	sadb_x_pair_t *pair_ext =
4849 	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
4850 	int error = 0;
4851 	ipsap_t *oipsapp = NULL;
4852 	boolean_t undo_pair = B_FALSE;
4853 	uint32_t ipsa_flags;
4854 
4855 	if (pair_ext->sadb_x_pair_spi == 0 || pair_ext->sadb_x_pair_spi ==
4856 	    assoc->sadb_sa_spi) {
4857 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4858 		return (EINVAL);
4859 	}
4860 
4861 	/*
4862 	 * Assume for now that the spi value provided in the SADB_UPDATE
4863 	 * message was valid, update the SA with its pair spi value.
4864 	 * If the spi turns out to be bogus or the SA no longer exists
4865 	 * then this will be detected when the reverse update is made
4866 	 * below.
4867 	 */
4868 	mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4869 	ipsapp->ipsap_sa_ptr->ipsa_flags |= IPSA_F_PAIRED;
4870 	ipsapp->ipsap_sa_ptr->ipsa_otherspi = pair_ext->sadb_x_pair_spi;
4871 	mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4872 
4873 	/*
4874 	 * After updating the ipsa_otherspi element of the SA, get_ipsa_pair()
4875 	 * should now return pointers to the SA *AND* its pair, if this is not
4876 	 * the case, the "otherspi" either did not exist or was deleted. Also
4877 	 * check that "otherspi" is not already paired. If everything looks
4878 	 * good, complete the update. IPSA_REFRELE the first pair_pointer
4879 	 * after this update to ensure its not deleted until we are done.
4880 	 */
4881 	oipsapp = get_ipsa_pair(assoc, srcext, dstext, spp);
4882 	if (oipsapp == NULL) {
4883 		/*
4884 		 * This should never happen, calling function still has
4885 		 * IPSA_REFHELD on the SA we just updated.
4886 		 */
4887 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
4888 		return (EINVAL);
4889 	}
4890 
4891 	if (oipsapp->ipsap_psa_ptr == NULL) {
4892 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4893 		undo_pair = B_TRUE;
4894 	} else {
4895 		ipsa_flags = oipsapp->ipsap_psa_ptr->ipsa_flags;
4896 		if ((oipsapp->ipsap_psa_ptr->ipsa_state == IPSA_STATE_DEAD) ||
4897 		    (oipsapp->ipsap_psa_ptr->ipsa_state == IPSA_STATE_DYING)) {
4898 			/* Its dead Jim! */
4899 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4900 			undo_pair = B_TRUE;
4901 		} else if ((ipsa_flags & (IPSA_F_OUTBOUND | IPSA_F_INBOUND)) ==
4902 		    (IPSA_F_OUTBOUND | IPSA_F_INBOUND)) {
4903 			/* This SA is in both hashtables. */
4904 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4905 			undo_pair = B_TRUE;
4906 		} else if (ipsa_flags & IPSA_F_PAIRED) {
4907 			/* This SA is already paired with another. */
4908 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_ALREADY;
4909 			undo_pair = B_TRUE;
4910 		}
4911 	}
4912 
4913 	if (undo_pair) {
4914 		/* The pair SA does not exist. */
4915 		mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4916 		ipsapp->ipsap_sa_ptr->ipsa_flags &= ~IPSA_F_PAIRED;
4917 		ipsapp->ipsap_sa_ptr->ipsa_otherspi = 0;
4918 		mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4919 		error = EINVAL;
4920 	} else {
4921 		mutex_enter(&oipsapp->ipsap_psa_ptr->ipsa_lock);
4922 		oipsapp->ipsap_psa_ptr->ipsa_otherspi = assoc->sadb_sa_spi;
4923 		oipsapp->ipsap_psa_ptr->ipsa_flags |= IPSA_F_PAIRED;
4924 		mutex_exit(&oipsapp->ipsap_psa_ptr->ipsa_lock);
4925 	}
4926 
4927 	destroy_ipsa_pair(oipsapp);
4928 	return (error);
4929 }
4930 
4931 /*
4932  * The following functions deal with ACQUIRE LISTS.  An ACQUIRE list is
4933  * a list of outstanding SADB_ACQUIRE messages.	 If ipsec_getassocbyconn() fails
4934  * for an outbound datagram, that datagram is queued up on an ACQUIRE record,
4935  * and an SADB_ACQUIRE message is sent up.  Presumably, a user-space key
4936  * management daemon will process the ACQUIRE, use a SADB_GETSPI to reserve
4937  * an SPI value and a larval SA, then SADB_UPDATE the larval SA, and ADD the
4938  * other direction's SA.
4939  */
4940 
4941 /*
4942  * Check the ACQUIRE lists.  If there's an existing ACQUIRE record,
4943  * grab it, lock it, and return it.  Otherwise return NULL.
4944  */
4945 static ipsacq_t *
4946 sadb_checkacquire(iacqf_t *bucket, ipsec_action_t *ap, ipsec_policy_t *pp,
4947     uint32_t *src, uint32_t *dst, uint32_t *isrc, uint32_t *idst,
4948     uint64_t unique_id)
4949 {
4950 	ipsacq_t *walker;
4951 	sa_family_t fam;
4952 	uint32_t blank_address[4] = {0, 0, 0, 0};
4953 
4954 	if (isrc == NULL) {
4955 		ASSERT(idst == NULL);
4956 		isrc = idst = blank_address;
4957 	}
4958 
4959 	/*
4960 	 * Scan list for duplicates.  Check for UNIQUE, src/dest, policy.
4961 	 *
4962 	 * XXX May need search for duplicates based on other things too!
4963 	 */
4964 	for (walker = bucket->iacqf_ipsacq; walker != NULL;
4965 	    walker = walker->ipsacq_next) {
4966 		mutex_enter(&walker->ipsacq_lock);
4967 		fam = walker->ipsacq_addrfam;
4968 		if (IPSA_ARE_ADDR_EQUAL(dst, walker->ipsacq_dstaddr, fam) &&
4969 		    IPSA_ARE_ADDR_EQUAL(src, walker->ipsacq_srcaddr, fam) &&
4970 		    ip_addr_match((uint8_t *)isrc, walker->ipsacq_innersrcpfx,
4971 		    (in6_addr_t *)walker->ipsacq_innersrc) &&
4972 		    ip_addr_match((uint8_t *)idst, walker->ipsacq_innerdstpfx,
4973 		    (in6_addr_t *)walker->ipsacq_innerdst) &&
4974 		    (ap == walker->ipsacq_act) &&
4975 		    (pp == walker->ipsacq_policy) &&
4976 		    /* XXX do deep compares of ap/pp? */
4977 		    (unique_id == walker->ipsacq_unique_id))
4978 			break;			/* everything matched */
4979 		mutex_exit(&walker->ipsacq_lock);
4980 	}
4981 
4982 	return (walker);
4983 }
4984 
4985 /*
4986  * For this mblk, insert a new acquire record.  Assume bucket contains addrs
4987  * of all of the same length.  Give up (and drop) if memory
4988  * cannot be allocated for a new one; otherwise, invoke callback to
4989  * send the acquire up..
4990  *
4991  * In cases where we need both AH and ESP, add the SA to the ESP ACQUIRE
4992  * list.  The ah_add_sa_finish() routines can look at the packet's ipsec_out_t
4993  * and handle this case specially.
4994  */
4995 void
4996 sadb_acquire(mblk_t *mp, ipsec_out_t *io, boolean_t need_ah, boolean_t need_esp)
4997 {
4998 	sadbp_t *spp;
4999 	sadb_t *sp;
5000 	ipsacq_t *newbie;
5001 	iacqf_t *bucket;
5002 	mblk_t *datamp = mp->b_cont;
5003 	mblk_t *extended;
5004 	ipha_t *ipha = (ipha_t *)datamp->b_rptr;
5005 	ip6_t *ip6h = (ip6_t *)datamp->b_rptr;
5006 	uint32_t *src, *dst, *isrc, *idst;
5007 	ipsec_policy_t *pp = io->ipsec_out_policy;
5008 	ipsec_action_t *ap = io->ipsec_out_act;
5009 	sa_family_t af;
5010 	int hashoffset;
5011 	uint32_t seq;
5012 	uint64_t unique_id = 0;
5013 	ipsec_selector_t sel;
5014 	boolean_t tunnel_mode = io->ipsec_out_tunnel;
5015 	netstack_t	*ns = io->ipsec_out_ns;
5016 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
5017 
5018 	ASSERT((pp != NULL) || (ap != NULL));
5019 
5020 	ASSERT(need_ah != NULL || need_esp != NULL);
5021 	/* Assign sadb pointers */
5022 	if (need_esp) { /* ESP for AH+ESP */
5023 		ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
5024 
5025 		spp = &espstack->esp_sadb;
5026 	} else {
5027 		ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
5028 
5029 		spp = &ahstack->ah_sadb;
5030 	}
5031 	sp = io->ipsec_out_v4 ? &spp->s_v4 : &spp->s_v6;
5032 
5033 	if (ap == NULL)
5034 		ap = pp->ipsp_act;
5035 
5036 	ASSERT(ap != NULL);
5037 
5038 	if (ap->ipa_act.ipa_apply.ipp_use_unique || tunnel_mode)
5039 		unique_id = SA_FORM_UNIQUE_ID(io);
5040 
5041 	/*
5042 	 * Set up an ACQUIRE record.
5043 	 *
5044 	 * Immediately, make sure the ACQUIRE sequence number doesn't slip
5045 	 * below the lowest point allowed in the kernel.  (In other words,
5046 	 * make sure the high bit on the sequence number is set.)
5047 	 */
5048 
5049 	seq = keysock_next_seq(ns) | IACQF_LOWEST_SEQ;
5050 
5051 	if (IPH_HDR_VERSION(ipha) == IP_VERSION) {
5052 		src = (uint32_t *)&ipha->ipha_src;
5053 		dst = (uint32_t *)&ipha->ipha_dst;
5054 		af = AF_INET;
5055 		hashoffset = OUTBOUND_HASH_V4(sp, ipha->ipha_dst);
5056 		ASSERT(io->ipsec_out_v4 == B_TRUE);
5057 	} else {
5058 		ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
5059 		src = (uint32_t *)&ip6h->ip6_src;
5060 		dst = (uint32_t *)&ip6h->ip6_dst;
5061 		af = AF_INET6;
5062 		hashoffset = OUTBOUND_HASH_V6(sp, ip6h->ip6_dst);
5063 		ASSERT(io->ipsec_out_v4 == B_FALSE);
5064 	}
5065 
5066 	if (tunnel_mode) {
5067 		/* Snag inner addresses. */
5068 		isrc = io->ipsec_out_insrc;
5069 		idst = io->ipsec_out_indst;
5070 	} else {
5071 		isrc = idst = NULL;
5072 	}
5073 
5074 	/*
5075 	 * Check buckets to see if there is an existing entry.  If so,
5076 	 * grab it.  sadb_checkacquire locks newbie if found.
5077 	 */
5078 	bucket = &(sp->sdb_acq[hashoffset]);
5079 	mutex_enter(&bucket->iacqf_lock);
5080 	newbie = sadb_checkacquire(bucket, ap, pp, src, dst, isrc, idst,
5081 	    unique_id);
5082 
5083 	if (newbie == NULL) {
5084 		/*
5085 		 * Otherwise, allocate a new one.
5086 		 */
5087 		newbie = kmem_zalloc(sizeof (*newbie), KM_NOSLEEP);
5088 		if (newbie == NULL) {
5089 			mutex_exit(&bucket->iacqf_lock);
5090 			ip_drop_packet(mp, B_FALSE, NULL, NULL,
5091 			    DROPPER(ipss, ipds_sadb_acquire_nomem),
5092 			    &ipss->ipsec_sadb_dropper);
5093 			return;
5094 		}
5095 		newbie->ipsacq_policy = pp;
5096 		if (pp != NULL) {
5097 			IPPOL_REFHOLD(pp);
5098 		}
5099 		IPACT_REFHOLD(ap);
5100 		newbie->ipsacq_act = ap;
5101 		newbie->ipsacq_linklock = &bucket->iacqf_lock;
5102 		newbie->ipsacq_next = bucket->iacqf_ipsacq;
5103 		newbie->ipsacq_ptpn = &bucket->iacqf_ipsacq;
5104 		if (newbie->ipsacq_next != NULL)
5105 			newbie->ipsacq_next->ipsacq_ptpn = &newbie->ipsacq_next;
5106 		bucket->iacqf_ipsacq = newbie;
5107 		mutex_init(&newbie->ipsacq_lock, NULL, MUTEX_DEFAULT, NULL);
5108 		mutex_enter(&newbie->ipsacq_lock);
5109 	}
5110 
5111 	mutex_exit(&bucket->iacqf_lock);
5112 
5113 	/*
5114 	 * This assert looks silly for now, but we may need to enter newbie's
5115 	 * mutex during a search.
5116 	 */
5117 	ASSERT(MUTEX_HELD(&newbie->ipsacq_lock));
5118 
5119 	mp->b_next = NULL;
5120 	/* Queue up packet.  Use b_next. */
5121 	if (newbie->ipsacq_numpackets == 0) {
5122 		/* First one. */
5123 		newbie->ipsacq_mp = mp;
5124 		newbie->ipsacq_numpackets = 1;
5125 		newbie->ipsacq_expire = gethrestime_sec();
5126 		/*
5127 		 * Extended ACQUIRE with both AH+ESP will use ESP's timeout
5128 		 * value.
5129 		 */
5130 		newbie->ipsacq_expire += *spp->s_acquire_timeout;
5131 		newbie->ipsacq_seq = seq;
5132 		newbie->ipsacq_addrfam = af;
5133 
5134 		newbie->ipsacq_srcport = io->ipsec_out_src_port;
5135 		newbie->ipsacq_dstport = io->ipsec_out_dst_port;
5136 		newbie->ipsacq_icmp_type = io->ipsec_out_icmp_type;
5137 		newbie->ipsacq_icmp_code = io->ipsec_out_icmp_code;
5138 		if (tunnel_mode) {
5139 			newbie->ipsacq_inneraddrfam = io->ipsec_out_inaf;
5140 			newbie->ipsacq_proto = io->ipsec_out_inaf == AF_INET6 ?
5141 			    IPPROTO_IPV6 : IPPROTO_ENCAP;
5142 			newbie->ipsacq_innersrcpfx = io->ipsec_out_insrcpfx;
5143 			newbie->ipsacq_innerdstpfx = io->ipsec_out_indstpfx;
5144 			IPSA_COPY_ADDR(newbie->ipsacq_innersrc,
5145 			    io->ipsec_out_insrc, io->ipsec_out_inaf);
5146 			IPSA_COPY_ADDR(newbie->ipsacq_innerdst,
5147 			    io->ipsec_out_indst, io->ipsec_out_inaf);
5148 		} else {
5149 			newbie->ipsacq_proto = io->ipsec_out_proto;
5150 		}
5151 		newbie->ipsacq_unique_id = unique_id;
5152 	} else {
5153 		/* Scan to the end of the list & insert. */
5154 		mblk_t *lastone = newbie->ipsacq_mp;
5155 
5156 		while (lastone->b_next != NULL)
5157 			lastone = lastone->b_next;
5158 		lastone->b_next = mp;
5159 		if (newbie->ipsacq_numpackets++ == ipsacq_maxpackets) {
5160 			newbie->ipsacq_numpackets = ipsacq_maxpackets;
5161 			lastone = newbie->ipsacq_mp;
5162 			newbie->ipsacq_mp = lastone->b_next;
5163 			lastone->b_next = NULL;
5164 			ip_drop_packet(lastone, B_FALSE, NULL, NULL,
5165 			    DROPPER(ipss, ipds_sadb_acquire_toofull),
5166 			    &ipss->ipsec_sadb_dropper);
5167 		} else {
5168 			IP_ACQUIRE_STAT(ipss, qhiwater,
5169 			    newbie->ipsacq_numpackets);
5170 		}
5171 	}
5172 
5173 	/*
5174 	 * Reset addresses.  Set them to the most recently added mblk chain,
5175 	 * so that the address pointers in the acquire record will point
5176 	 * at an mblk still attached to the acquire list.
5177 	 */
5178 
5179 	newbie->ipsacq_srcaddr = src;
5180 	newbie->ipsacq_dstaddr = dst;
5181 
5182 	/*
5183 	 * If the acquire record has more than one queued packet, we've
5184 	 * already sent an ACQUIRE, and don't need to repeat ourself.
5185 	 */
5186 	if (newbie->ipsacq_seq != seq || newbie->ipsacq_numpackets > 1) {
5187 		/* I have an acquire outstanding already! */
5188 		mutex_exit(&newbie->ipsacq_lock);
5189 		return;
5190 	}
5191 
5192 	if (keysock_extended_reg(ns)) {
5193 		/*
5194 		 * Construct an extended ACQUIRE.  There are logging
5195 		 * opportunities here in failure cases.
5196 		 */
5197 
5198 		(void) memset(&sel, 0, sizeof (sel));
5199 		sel.ips_isv4 = io->ipsec_out_v4;
5200 		if (tunnel_mode) {
5201 			sel.ips_protocol = (io->ipsec_out_inaf == AF_INET) ?
5202 			    IPPROTO_ENCAP : IPPROTO_IPV6;
5203 		} else {
5204 			sel.ips_protocol = io->ipsec_out_proto;
5205 			sel.ips_local_port = io->ipsec_out_src_port;
5206 			sel.ips_remote_port = io->ipsec_out_dst_port;
5207 		}
5208 		sel.ips_icmp_type = io->ipsec_out_icmp_type;
5209 		sel.ips_icmp_code = io->ipsec_out_icmp_code;
5210 		sel.ips_is_icmp_inv_acq = 0;
5211 		if (af == AF_INET) {
5212 			sel.ips_local_addr_v4 = ipha->ipha_src;
5213 			sel.ips_remote_addr_v4 = ipha->ipha_dst;
5214 		} else {
5215 			sel.ips_local_addr_v6 = ip6h->ip6_src;
5216 			sel.ips_remote_addr_v6 = ip6h->ip6_dst;
5217 		}
5218 
5219 		extended = sadb_keysock_out(0);
5220 		if (extended != NULL) {
5221 			extended->b_cont = sadb_extended_acquire(&sel, pp, ap,
5222 			    tunnel_mode, seq, 0, ns);
5223 			if (extended->b_cont == NULL) {
5224 				freeb(extended);
5225 				extended = NULL;
5226 			}
5227 		}
5228 	} else
5229 		extended = NULL;
5230 
5231 	/*
5232 	 * Send an ACQUIRE message (and possible an extended ACQUIRE) based on
5233 	 * this new record.  The send-acquire callback assumes that acqrec is
5234 	 * already locked.
5235 	 */
5236 	(*spp->s_acqfn)(newbie, extended, ns);
5237 }
5238 
5239 /*
5240  * Unlink and free an acquire record.
5241  */
5242 void
5243 sadb_destroy_acquire(ipsacq_t *acqrec, netstack_t *ns)
5244 {
5245 	mblk_t *mp;
5246 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
5247 
5248 	ASSERT(MUTEX_HELD(acqrec->ipsacq_linklock));
5249 
5250 	if (acqrec->ipsacq_policy != NULL) {
5251 		IPPOL_REFRELE(acqrec->ipsacq_policy, ns);
5252 	}
5253 	if (acqrec->ipsacq_act != NULL) {
5254 		IPACT_REFRELE(acqrec->ipsacq_act);
5255 	}
5256 
5257 	/* Unlink */
5258 	*(acqrec->ipsacq_ptpn) = acqrec->ipsacq_next;
5259 	if (acqrec->ipsacq_next != NULL)
5260 		acqrec->ipsacq_next->ipsacq_ptpn = acqrec->ipsacq_ptpn;
5261 
5262 	/*
5263 	 * Free hanging mp's.
5264 	 *
5265 	 * XXX Instead of freemsg(), perhaps use IPSEC_REQ_FAILED.
5266 	 */
5267 
5268 	mutex_enter(&acqrec->ipsacq_lock);
5269 	while (acqrec->ipsacq_mp != NULL) {
5270 		mp = acqrec->ipsacq_mp;
5271 		acqrec->ipsacq_mp = mp->b_next;
5272 		mp->b_next = NULL;
5273 		ip_drop_packet(mp, B_FALSE, NULL, NULL,
5274 		    DROPPER(ipss, ipds_sadb_acquire_timeout),
5275 		    &ipss->ipsec_sadb_dropper);
5276 	}
5277 	mutex_exit(&acqrec->ipsacq_lock);
5278 
5279 	/* Free */
5280 	mutex_destroy(&acqrec->ipsacq_lock);
5281 	kmem_free(acqrec, sizeof (*acqrec));
5282 }
5283 
5284 /*
5285  * Destroy an acquire list fanout.
5286  */
5287 static void
5288 sadb_destroy_acqlist(iacqf_t **listp, uint_t numentries, boolean_t forever,
5289     netstack_t *ns)
5290 {
5291 	int i;
5292 	iacqf_t *list = *listp;
5293 
5294 	if (list == NULL)
5295 		return;
5296 
5297 	for (i = 0; i < numentries; i++) {
5298 		mutex_enter(&(list[i].iacqf_lock));
5299 		while (list[i].iacqf_ipsacq != NULL)
5300 			sadb_destroy_acquire(list[i].iacqf_ipsacq, ns);
5301 		mutex_exit(&(list[i].iacqf_lock));
5302 		if (forever)
5303 			mutex_destroy(&(list[i].iacqf_lock));
5304 	}
5305 
5306 	if (forever) {
5307 		*listp = NULL;
5308 		kmem_free(list, numentries * sizeof (*list));
5309 	}
5310 }
5311 
5312 /*
5313  * Create an algorithm descriptor for an extended ACQUIRE.  Filter crypto
5314  * framework's view of reality vs. IPsec's.  EF's wins, BTW.
5315  */
5316 static uint8_t *
5317 sadb_new_algdesc(uint8_t *start, uint8_t *limit,
5318     sadb_x_ecomb_t *ecomb, uint8_t satype, uint8_t algtype,
5319     uint8_t alg, uint16_t minbits, uint16_t maxbits, ipsec_stack_t *ipss)
5320 {
5321 	uint8_t *cur = start;
5322 	ipsec_alginfo_t *algp;
5323 	sadb_x_algdesc_t *algdesc = (sadb_x_algdesc_t *)cur;
5324 
5325 	cur += sizeof (*algdesc);
5326 	if (cur >= limit)
5327 		return (NULL);
5328 
5329 	ecomb->sadb_x_ecomb_numalgs++;
5330 
5331 	/*
5332 	 * Normalize vs. crypto framework's limits.  This way, you can specify
5333 	 * a stronger policy, and when the framework loads a stronger version,
5334 	 * you can just keep plowing w/o rewhacking your SPD.
5335 	 */
5336 	mutex_enter(&ipss->ipsec_alg_lock);
5337 	algp = ipss->ipsec_alglists[(algtype == SADB_X_ALGTYPE_AUTH) ?
5338 	    IPSEC_ALG_AUTH : IPSEC_ALG_ENCR][alg];
5339 	if (algp == NULL) {
5340 		mutex_exit(&ipss->ipsec_alg_lock);
5341 		return (NULL);	/* Algorithm doesn't exist.  Fail gracefully. */
5342 	}
5343 	if (minbits < algp->alg_ef_minbits)
5344 		minbits = algp->alg_ef_minbits;
5345 	if (maxbits > algp->alg_ef_maxbits)
5346 		maxbits = algp->alg_ef_maxbits;
5347 	mutex_exit(&ipss->ipsec_alg_lock);
5348 
5349 	algdesc->sadb_x_algdesc_satype = satype;
5350 	algdesc->sadb_x_algdesc_algtype = algtype;
5351 	algdesc->sadb_x_algdesc_alg = alg;
5352 	algdesc->sadb_x_algdesc_minbits = minbits;
5353 	algdesc->sadb_x_algdesc_maxbits = maxbits;
5354 	algdesc->sadb_x_algdesc_reserved = 0;
5355 	return (cur);
5356 }
5357 
5358 /*
5359  * Convert the given ipsec_action_t into an ecomb starting at *ecomb
5360  * which must fit before *limit
5361  *
5362  * return NULL if we ran out of room or a pointer to the end of the ecomb.
5363  */
5364 static uint8_t *
5365 sadb_action_to_ecomb(uint8_t *start, uint8_t *limit, ipsec_action_t *act,
5366     netstack_t *ns)
5367 {
5368 	uint8_t *cur = start;
5369 	sadb_x_ecomb_t *ecomb = (sadb_x_ecomb_t *)cur;
5370 	ipsec_prot_t *ipp;
5371 	ipsec_stack_t *ipss = ns->netstack_ipsec;
5372 
5373 	cur += sizeof (*ecomb);
5374 	if (cur >= limit)
5375 		return (NULL);
5376 
5377 	ASSERT(act->ipa_act.ipa_type == IPSEC_ACT_APPLY);
5378 
5379 	ipp = &act->ipa_act.ipa_apply;
5380 
5381 	ecomb->sadb_x_ecomb_numalgs = 0;
5382 	ecomb->sadb_x_ecomb_reserved = 0;
5383 	ecomb->sadb_x_ecomb_reserved2 = 0;
5384 	/*
5385 	 * No limits on allocations, since we really don't support that
5386 	 * concept currently.
5387 	 */
5388 	ecomb->sadb_x_ecomb_soft_allocations = 0;
5389 	ecomb->sadb_x_ecomb_hard_allocations = 0;
5390 
5391 	/*
5392 	 * XXX TBD: Policy or global parameters will eventually be
5393 	 * able to fill in some of these.
5394 	 */
5395 	ecomb->sadb_x_ecomb_flags = 0;
5396 	ecomb->sadb_x_ecomb_soft_bytes = 0;
5397 	ecomb->sadb_x_ecomb_hard_bytes = 0;
5398 	ecomb->sadb_x_ecomb_soft_addtime = 0;
5399 	ecomb->sadb_x_ecomb_hard_addtime = 0;
5400 	ecomb->sadb_x_ecomb_soft_usetime = 0;
5401 	ecomb->sadb_x_ecomb_hard_usetime = 0;
5402 
5403 	if (ipp->ipp_use_ah) {
5404 		cur = sadb_new_algdesc(cur, limit, ecomb,
5405 		    SADB_SATYPE_AH, SADB_X_ALGTYPE_AUTH, ipp->ipp_auth_alg,
5406 		    ipp->ipp_ah_minbits, ipp->ipp_ah_maxbits, ipss);
5407 		if (cur == NULL)
5408 			return (NULL);
5409 		ipsecah_fill_defs(ecomb, ns);
5410 	}
5411 
5412 	if (ipp->ipp_use_esp) {
5413 		if (ipp->ipp_use_espa) {
5414 			cur = sadb_new_algdesc(cur, limit, ecomb,
5415 			    SADB_SATYPE_ESP, SADB_X_ALGTYPE_AUTH,
5416 			    ipp->ipp_esp_auth_alg,
5417 			    ipp->ipp_espa_minbits,
5418 			    ipp->ipp_espa_maxbits, ipss);
5419 			if (cur == NULL)
5420 				return (NULL);
5421 		}
5422 
5423 		cur = sadb_new_algdesc(cur, limit, ecomb,
5424 		    SADB_SATYPE_ESP, SADB_X_ALGTYPE_CRYPT,
5425 		    ipp->ipp_encr_alg,
5426 		    ipp->ipp_espe_minbits,
5427 		    ipp->ipp_espe_maxbits, ipss);
5428 		if (cur == NULL)
5429 			return (NULL);
5430 		/* Fill in lifetimes if and only if AH didn't already... */
5431 		if (!ipp->ipp_use_ah)
5432 			ipsecesp_fill_defs(ecomb, ns);
5433 	}
5434 
5435 	return (cur);
5436 }
5437 
5438 /*
5439  * Construct an extended ACQUIRE message based on a selector and the resulting
5440  * IPsec action.
5441  *
5442  * NOTE: This is used by both inverse ACQUIRE and actual ACQUIRE
5443  * generation. As a consequence, expect this function to evolve
5444  * rapidly.
5445  */
5446 static mblk_t *
5447 sadb_extended_acquire(ipsec_selector_t *sel, ipsec_policy_t *pol,
5448     ipsec_action_t *act, boolean_t tunnel_mode, uint32_t seq, uint32_t pid,
5449     netstack_t *ns)
5450 {
5451 	mblk_t *mp;
5452 	sadb_msg_t *samsg;
5453 	uint8_t *start, *cur, *end;
5454 	uint32_t *saddrptr, *daddrptr;
5455 	sa_family_t af;
5456 	sadb_prop_t *eprop;
5457 	ipsec_action_t *ap, *an;
5458 	ipsec_selkey_t *ipsl;
5459 	uint8_t proto, pfxlen;
5460 	uint16_t lport, rport;
5461 	uint32_t kmp, kmc;
5462 
5463 	/*
5464 	 * Find the action we want sooner rather than later..
5465 	 */
5466 	an = NULL;
5467 	if (pol == NULL) {
5468 		ap = act;
5469 	} else {
5470 		ap = pol->ipsp_act;
5471 
5472 		if (ap != NULL)
5473 			an = ap->ipa_next;
5474 	}
5475 
5476 	/*
5477 	 * Just take a swag for the allocation for now.	 We can always
5478 	 * alter it later.
5479 	 */
5480 #define	SADB_EXTENDED_ACQUIRE_SIZE	4096
5481 	mp = allocb(SADB_EXTENDED_ACQUIRE_SIZE, BPRI_HI);
5482 	if (mp == NULL)
5483 		return (NULL);
5484 
5485 	start = mp->b_rptr;
5486 	end = start + SADB_EXTENDED_ACQUIRE_SIZE;
5487 
5488 	cur = start;
5489 
5490 	samsg = (sadb_msg_t *)cur;
5491 	cur += sizeof (*samsg);
5492 
5493 	samsg->sadb_msg_version = PF_KEY_V2;
5494 	samsg->sadb_msg_type = SADB_ACQUIRE;
5495 	samsg->sadb_msg_errno = 0;
5496 	samsg->sadb_msg_reserved = 0;
5497 	samsg->sadb_msg_satype = 0;
5498 	samsg->sadb_msg_seq = seq;
5499 	samsg->sadb_msg_pid = pid;
5500 
5501 	if (tunnel_mode) {
5502 		/*
5503 		 * Form inner address extensions based NOT on the inner
5504 		 * selectors (i.e. the packet data), but on the policy's
5505 		 * selector key (i.e. the policy's selector information).
5506 		 *
5507 		 * NOTE:  The position of IPv4 and IPv6 addresses is the
5508 		 * same in ipsec_selkey_t (unless the compiler does very
5509 		 * strange things with unions, consult your local C language
5510 		 * lawyer for details).
5511 		 */
5512 		ipsl = &(pol->ipsp_sel->ipsl_key);
5513 		if (ipsl->ipsl_valid & IPSL_IPV4) {
5514 			af = AF_INET;
5515 			ASSERT(sel->ips_protocol == IPPROTO_ENCAP);
5516 			ASSERT(!(ipsl->ipsl_valid & IPSL_IPV6));
5517 		} else {
5518 			af = AF_INET6;
5519 			ASSERT(sel->ips_protocol == IPPROTO_IPV6);
5520 			ASSERT(ipsl->ipsl_valid & IPSL_IPV6);
5521 		}
5522 
5523 		if (ipsl->ipsl_valid & IPSL_LOCAL_ADDR) {
5524 			saddrptr = (uint32_t *)(&ipsl->ipsl_local);
5525 			pfxlen = ipsl->ipsl_local_pfxlen;
5526 		} else {
5527 			saddrptr = (uint32_t *)(&ipv6_all_zeros);
5528 			pfxlen = 0;
5529 		}
5530 		/* XXX What about ICMP type/code? */
5531 		lport = (ipsl->ipsl_valid & IPSL_LOCAL_PORT) ?
5532 		    ipsl->ipsl_lport : 0;
5533 		proto = (ipsl->ipsl_valid & IPSL_PROTOCOL) ?
5534 		    ipsl->ipsl_proto : 0;
5535 
5536 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
5537 		    af, saddrptr, lport, proto, pfxlen);
5538 		if (cur == NULL) {
5539 			freeb(mp);
5540 			return (NULL);
5541 		}
5542 
5543 		if (ipsl->ipsl_valid & IPSL_REMOTE_ADDR) {
5544 			daddrptr = (uint32_t *)(&ipsl->ipsl_remote);
5545 			pfxlen = ipsl->ipsl_remote_pfxlen;
5546 		} else {
5547 			daddrptr = (uint32_t *)(&ipv6_all_zeros);
5548 			pfxlen = 0;
5549 		}
5550 		/* XXX What about ICMP type/code? */
5551 		rport = (ipsl->ipsl_valid & IPSL_REMOTE_PORT) ?
5552 		    ipsl->ipsl_rport : 0;
5553 
5554 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
5555 		    af, daddrptr, rport, proto, pfxlen);
5556 		if (cur == NULL) {
5557 			freeb(mp);
5558 			return (NULL);
5559 		}
5560 		/*
5561 		 * TODO  - if we go to 3408's dream of transport mode IP-in-IP
5562 		 * _with_ inner-packet address selectors, we'll need to further
5563 		 * distinguish tunnel mode here.  For now, having inner
5564 		 * addresses and/or ports is sufficient.
5565 		 *
5566 		 * Meanwhile, whack proto/ports to reflect IP-in-IP for the
5567 		 * outer addresses.
5568 		 */
5569 		proto = sel->ips_protocol;	/* Either _ENCAP or _IPV6 */
5570 		lport = rport = 0;
5571 	} else if ((ap != NULL) && (!ap->ipa_want_unique)) {
5572 		proto = 0;
5573 		lport = 0;
5574 		rport = 0;
5575 		if (pol != NULL) {
5576 			ipsl = &(pol->ipsp_sel->ipsl_key);
5577 			if (ipsl->ipsl_valid & IPSL_PROTOCOL)
5578 				proto = ipsl->ipsl_proto;
5579 			if (ipsl->ipsl_valid & IPSL_REMOTE_PORT)
5580 				rport = ipsl->ipsl_rport;
5581 			if (ipsl->ipsl_valid & IPSL_LOCAL_PORT)
5582 				lport = ipsl->ipsl_lport;
5583 		}
5584 	} else {
5585 		proto = sel->ips_protocol;
5586 		lport = sel->ips_local_port;
5587 		rport = sel->ips_remote_port;
5588 	}
5589 
5590 	af = sel->ips_isv4 ? AF_INET : AF_INET6;
5591 
5592 	/*
5593 	 * NOTE:  The position of IPv4 and IPv6 addresses is the same in
5594 	 * ipsec_selector_t.
5595 	 */
5596 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
5597 	    (uint32_t *)(&sel->ips_local_addr_v6), lport, proto, 0);
5598 
5599 	if (cur == NULL) {
5600 		freeb(mp);
5601 		return (NULL);
5602 	}
5603 
5604 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
5605 	    (uint32_t *)(&sel->ips_remote_addr_v6), rport, proto, 0);
5606 
5607 	if (cur == NULL) {
5608 		freeb(mp);
5609 		return (NULL);
5610 	}
5611 
5612 	/*
5613 	 * This section will change a lot as policy evolves.
5614 	 * For now, it'll be relatively simple.
5615 	 */
5616 	eprop = (sadb_prop_t *)cur;
5617 	cur += sizeof (*eprop);
5618 	if (cur > end) {
5619 		/* no space left */
5620 		freeb(mp);
5621 		return (NULL);
5622 	}
5623 
5624 	eprop->sadb_prop_exttype = SADB_X_EXT_EPROP;
5625 	eprop->sadb_x_prop_ereserved = 0;
5626 	eprop->sadb_x_prop_numecombs = 0;
5627 	eprop->sadb_prop_replay = 32;	/* default */
5628 
5629 	kmc = kmp = 0;
5630 
5631 	for (; ap != NULL; ap = an) {
5632 		an = (pol != NULL) ? ap->ipa_next : NULL;
5633 
5634 		/*
5635 		 * Skip non-IPsec policies
5636 		 */
5637 		if (ap->ipa_act.ipa_type != IPSEC_ACT_APPLY)
5638 			continue;
5639 
5640 		if (ap->ipa_act.ipa_apply.ipp_km_proto)
5641 			kmp = ap->ipa_act.ipa_apply.ipp_km_proto;
5642 		if (ap->ipa_act.ipa_apply.ipp_km_cookie)
5643 			kmc = ap->ipa_act.ipa_apply.ipp_km_cookie;
5644 		if (ap->ipa_act.ipa_apply.ipp_replay_depth) {
5645 			eprop->sadb_prop_replay =
5646 			    ap->ipa_act.ipa_apply.ipp_replay_depth;
5647 		}
5648 
5649 		cur = sadb_action_to_ecomb(cur, end, ap, ns);
5650 		if (cur == NULL) { /* no space */
5651 			freeb(mp);
5652 			return (NULL);
5653 		}
5654 		eprop->sadb_x_prop_numecombs++;
5655 	}
5656 
5657 	if (eprop->sadb_x_prop_numecombs == 0) {
5658 		/*
5659 		 * This will happen if we fail to find a policy
5660 		 * allowing for IPsec processing.
5661 		 * Construct an error message.
5662 		 */
5663 		samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
5664 		samsg->sadb_msg_errno = ENOENT;
5665 		samsg->sadb_x_msg_diagnostic = 0;
5666 		return (mp);
5667 	}
5668 
5669 	if ((kmp != 0) || (kmc != 0)) {
5670 		cur = sadb_make_kmc_ext(cur, end, kmp, kmc);
5671 		if (cur == NULL) {
5672 			freeb(mp);
5673 			return (NULL);
5674 		}
5675 	}
5676 
5677 	eprop->sadb_prop_len = SADB_8TO64(cur - (uint8_t *)eprop);
5678 	samsg->sadb_msg_len = SADB_8TO64(cur - start);
5679 	mp->b_wptr = cur;
5680 
5681 	return (mp);
5682 }
5683 
5684 /*
5685  * Generic setup of an RFC 2367 ACQUIRE message.  Caller sets satype.
5686  *
5687  * NOTE: This function acquires alg_lock as a side-effect if-and-only-if we
5688  * succeed (i.e. return non-NULL).  Caller MUST release it.  This is to
5689  * maximize code consolidation while preventing algorithm changes from messing
5690  * with the callers finishing touches on the ACQUIRE itself.
5691  */
5692 mblk_t *
5693 sadb_setup_acquire(ipsacq_t *acqrec, uint8_t satype, ipsec_stack_t *ipss)
5694 {
5695 	uint_t allocsize;
5696 	mblk_t *pfkeymp, *msgmp;
5697 	sa_family_t af;
5698 	uint8_t *cur, *end;
5699 	sadb_msg_t *samsg;
5700 	uint16_t sport_typecode;
5701 	uint16_t dport_typecode;
5702 	uint8_t check_proto;
5703 	boolean_t tunnel_mode = (acqrec->ipsacq_inneraddrfam != 0);
5704 
5705 	ASSERT(MUTEX_HELD(&acqrec->ipsacq_lock));
5706 
5707 	pfkeymp = sadb_keysock_out(0);
5708 	if (pfkeymp == NULL)
5709 		return (NULL);
5710 
5711 	/*
5712 	 * First, allocate a basic ACQUIRE message
5713 	 */
5714 	allocsize = sizeof (sadb_msg_t) + sizeof (sadb_address_t) +
5715 	    sizeof (sadb_address_t) + sizeof (sadb_prop_t);
5716 
5717 	/* Make sure there's enough to cover both AF_INET and AF_INET6. */
5718 	allocsize += 2 * sizeof (struct sockaddr_in6);
5719 
5720 	mutex_enter(&ipss->ipsec_alg_lock);
5721 	/* NOTE:  The lock is now held through to this function's return. */
5722 	allocsize += ipss->ipsec_nalgs[IPSEC_ALG_AUTH] *
5723 	    ipss->ipsec_nalgs[IPSEC_ALG_ENCR] * sizeof (sadb_comb_t);
5724 
5725 	if (tunnel_mode) {
5726 		/* Tunnel mode! */
5727 		allocsize += 2 * sizeof (sadb_address_t);
5728 		/* Enough to cover both AF_INET and AF_INET6. */
5729 		allocsize += 2 * sizeof (struct sockaddr_in6);
5730 	}
5731 
5732 	msgmp = allocb(allocsize, BPRI_HI);
5733 	if (msgmp == NULL) {
5734 		freeb(pfkeymp);
5735 		mutex_exit(&ipss->ipsec_alg_lock);
5736 		return (NULL);
5737 	}
5738 
5739 	pfkeymp->b_cont = msgmp;
5740 	cur = msgmp->b_rptr;
5741 	end = cur + allocsize;
5742 	samsg = (sadb_msg_t *)cur;
5743 	cur += sizeof (sadb_msg_t);
5744 
5745 	af = acqrec->ipsacq_addrfam;
5746 	switch (af) {
5747 	case AF_INET:
5748 		check_proto = IPPROTO_ICMP;
5749 		break;
5750 	case AF_INET6:
5751 		check_proto = IPPROTO_ICMPV6;
5752 		break;
5753 	default:
5754 		/* This should never happen unless we have kernel bugs. */
5755 		cmn_err(CE_WARN,
5756 		    "sadb_setup_acquire:  corrupt ACQUIRE record.\n");
5757 		ASSERT(0);
5758 		mutex_exit(&ipss->ipsec_alg_lock);
5759 		return (NULL);
5760 	}
5761 
5762 	samsg->sadb_msg_version = PF_KEY_V2;
5763 	samsg->sadb_msg_type = SADB_ACQUIRE;
5764 	samsg->sadb_msg_satype = satype;
5765 	samsg->sadb_msg_errno = 0;
5766 	samsg->sadb_msg_pid = 0;
5767 	samsg->sadb_msg_reserved = 0;
5768 	samsg->sadb_msg_seq = acqrec->ipsacq_seq;
5769 
5770 	ASSERT(MUTEX_HELD(&acqrec->ipsacq_lock));
5771 
5772 	if ((acqrec->ipsacq_proto == check_proto) || tunnel_mode) {
5773 		sport_typecode = dport_typecode = 0;
5774 	} else {
5775 		sport_typecode = acqrec->ipsacq_srcport;
5776 		dport_typecode = acqrec->ipsacq_dstport;
5777 	}
5778 
5779 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
5780 	    acqrec->ipsacq_srcaddr, sport_typecode, acqrec->ipsacq_proto, 0);
5781 
5782 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
5783 	    acqrec->ipsacq_dstaddr, dport_typecode, acqrec->ipsacq_proto, 0);
5784 
5785 	if (tunnel_mode) {
5786 		sport_typecode = acqrec->ipsacq_srcport;
5787 		dport_typecode = acqrec->ipsacq_dstport;
5788 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
5789 		    acqrec->ipsacq_inneraddrfam, acqrec->ipsacq_innersrc,
5790 		    sport_typecode, acqrec->ipsacq_inner_proto,
5791 		    acqrec->ipsacq_innersrcpfx);
5792 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
5793 		    acqrec->ipsacq_inneraddrfam, acqrec->ipsacq_innerdst,
5794 		    dport_typecode, acqrec->ipsacq_inner_proto,
5795 		    acqrec->ipsacq_innerdstpfx);
5796 	}
5797 
5798 	/* XXX Insert identity information here. */
5799 
5800 	/* XXXMLS Insert sensitivity information here. */
5801 
5802 	if (cur != NULL)
5803 		samsg->sadb_msg_len = SADB_8TO64(cur - msgmp->b_rptr);
5804 	else
5805 		mutex_exit(&ipss->ipsec_alg_lock);
5806 
5807 	return (pfkeymp);
5808 }
5809 
5810 /*
5811  * Given an SADB_GETSPI message, find an appropriately ranged SA and
5812  * allocate an SA.  If there are message improprieties, return (ipsa_t *)-1.
5813  * If there was a memory allocation error, return NULL.	 (Assume NULL !=
5814  * (ipsa_t *)-1).
5815  *
5816  * master_spi is passed in host order.
5817  */
5818 ipsa_t *
5819 sadb_getspi(keysock_in_t *ksi, uint32_t master_spi, int *diagnostic,
5820     netstack_t *ns, uint_t sa_type)
5821 {
5822 	sadb_address_t *src =
5823 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC],
5824 	    *dst = (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
5825 	sadb_spirange_t *range =
5826 	    (sadb_spirange_t *)ksi->ks_in_extv[SADB_EXT_SPIRANGE];
5827 	struct sockaddr_in *ssa, *dsa;
5828 	struct sockaddr_in6 *ssa6, *dsa6;
5829 	uint32_t *srcaddr, *dstaddr;
5830 	sa_family_t af;
5831 	uint32_t add, min, max;
5832 	uint8_t protocol =
5833 	    (sa_type == SADB_SATYPE_AH) ? IPPROTO_AH : IPPROTO_ESP;
5834 
5835 	if (src == NULL) {
5836 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
5837 		return ((ipsa_t *)-1);
5838 	}
5839 	if (dst == NULL) {
5840 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
5841 		return ((ipsa_t *)-1);
5842 	}
5843 	if (range == NULL) {
5844 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_RANGE;
5845 		return ((ipsa_t *)-1);
5846 	}
5847 
5848 	min = ntohl(range->sadb_spirange_min);
5849 	max = ntohl(range->sadb_spirange_max);
5850 	dsa = (struct sockaddr_in *)(dst + 1);
5851 	dsa6 = (struct sockaddr_in6 *)dsa;
5852 
5853 	ssa = (struct sockaddr_in *)(src + 1);
5854 	ssa6 = (struct sockaddr_in6 *)ssa;
5855 	ASSERT(dsa->sin_family == ssa->sin_family);
5856 
5857 	srcaddr = ALL_ZEROES_PTR;
5858 	af = dsa->sin_family;
5859 	switch (af) {
5860 	case AF_INET:
5861 		if (src != NULL)
5862 			srcaddr = (uint32_t *)(&ssa->sin_addr);
5863 		dstaddr = (uint32_t *)(&dsa->sin_addr);
5864 		break;
5865 	case AF_INET6:
5866 		if (src != NULL)
5867 			srcaddr = (uint32_t *)(&ssa6->sin6_addr);
5868 		dstaddr = (uint32_t *)(&dsa6->sin6_addr);
5869 		break;
5870 	default:
5871 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_DST_AF;
5872 		return ((ipsa_t *)-1);
5873 	}
5874 
5875 	if (master_spi < min || master_spi > max) {
5876 		/* Return a random value in the range. */
5877 		if (cl_inet_getspi) {
5878 			cl_inet_getspi(ns->netstack_stackid, protocol,
5879 			    (uint8_t *)&add, sizeof (add), NULL);
5880 		} else {
5881 			(void) random_get_pseudo_bytes((uint8_t *)&add,
5882 			    sizeof (add));
5883 		}
5884 		master_spi = min + (add % (max - min + 1));
5885 	}
5886 
5887 	/*
5888 	 * Since master_spi is passed in host order, we need to htonl() it
5889 	 * for the purposes of creating a new SA.
5890 	 */
5891 	return (sadb_makelarvalassoc(htonl(master_spi), srcaddr, dstaddr, af,
5892 	    ns));
5893 }
5894 
5895 /*
5896  *
5897  * Locate an ACQUIRE and nuke it.  If I have an samsg that's larger than the
5898  * base header, just ignore it.	 Otherwise, lock down the whole ACQUIRE list
5899  * and scan for the sequence number in question.  I may wish to accept an
5900  * address pair with it, for easier searching.
5901  *
5902  * Caller frees the message, so we don't have to here.
5903  *
5904  * NOTE:	The ip_q parameter may be used in the future for ACQUIRE
5905  *		failures.
5906  */
5907 /* ARGSUSED */
5908 void
5909 sadb_in_acquire(sadb_msg_t *samsg, sadbp_t *sp, queue_t *ip_q, netstack_t *ns)
5910 {
5911 	int i;
5912 	ipsacq_t *acqrec;
5913 	iacqf_t *bucket;
5914 
5915 	/*
5916 	 * I only accept the base header for this!
5917 	 * Though to be honest, requiring the dst address would help
5918 	 * immensely.
5919 	 *
5920 	 * XXX	There are already cases where I can get the dst address.
5921 	 */
5922 	if (samsg->sadb_msg_len > SADB_8TO64(sizeof (*samsg)))
5923 		return;
5924 
5925 	/*
5926 	 * Using the samsg->sadb_msg_seq, find the ACQUIRE record, delete it,
5927 	 * (and in the future send a message to IP with the appropriate error
5928 	 * number).
5929 	 *
5930 	 * Q: Do I want to reject if pid != 0?
5931 	 */
5932 
5933 	for (i = 0; i < sp->s_v4.sdb_hashsize; i++) {
5934 		bucket = &sp->s_v4.sdb_acq[i];
5935 		mutex_enter(&bucket->iacqf_lock);
5936 		for (acqrec = bucket->iacqf_ipsacq; acqrec != NULL;
5937 		    acqrec = acqrec->ipsacq_next) {
5938 			if (samsg->sadb_msg_seq == acqrec->ipsacq_seq)
5939 				break;	/* for acqrec... loop. */
5940 		}
5941 		if (acqrec != NULL)
5942 			break;	/* for i = 0... loop. */
5943 
5944 		mutex_exit(&bucket->iacqf_lock);
5945 	}
5946 
5947 	if (acqrec == NULL) {
5948 		for (i = 0; i < sp->s_v6.sdb_hashsize; i++) {
5949 			bucket = &sp->s_v6.sdb_acq[i];
5950 			mutex_enter(&bucket->iacqf_lock);
5951 			for (acqrec = bucket->iacqf_ipsacq; acqrec != NULL;
5952 			    acqrec = acqrec->ipsacq_next) {
5953 				if (samsg->sadb_msg_seq == acqrec->ipsacq_seq)
5954 					break;	/* for acqrec... loop. */
5955 			}
5956 			if (acqrec != NULL)
5957 				break;	/* for i = 0... loop. */
5958 
5959 			mutex_exit(&bucket->iacqf_lock);
5960 		}
5961 	}
5962 
5963 
5964 	if (acqrec == NULL)
5965 		return;
5966 
5967 	/*
5968 	 * What do I do with the errno and IP?	I may need mp's services a
5969 	 * little more.	 See sadb_destroy_acquire() for future directions
5970 	 * beyond free the mblk chain on the acquire record.
5971 	 */
5972 
5973 	ASSERT(&bucket->iacqf_lock == acqrec->ipsacq_linklock);
5974 	sadb_destroy_acquire(acqrec, ns);
5975 	/* Have to exit mutex here, because of breaking out of for loop. */
5976 	mutex_exit(&bucket->iacqf_lock);
5977 }
5978 
5979 /*
5980  * The following functions work with the replay windows of an SA.  They assume
5981  * the ipsa->ipsa_replay_arr is an array of uint64_t, and that the bit vector
5982  * represents the highest sequence number packet received, and back
5983  * (ipsa->ipsa_replay_wsize) packets.
5984  */
5985 
5986 /*
5987  * Is the replay bit set?
5988  */
5989 static boolean_t
5990 ipsa_is_replay_set(ipsa_t *ipsa, uint32_t offset)
5991 {
5992 	uint64_t bit = (uint64_t)1 << (uint64_t)(offset & 63);
5993 
5994 	return ((bit & ipsa->ipsa_replay_arr[offset >> 6]) ? B_TRUE : B_FALSE);
5995 }
5996 
5997 /*
5998  * Shift the bits of the replay window over.
5999  */
6000 static void
6001 ipsa_shift_replay(ipsa_t *ipsa, uint32_t shift)
6002 {
6003 	int i;
6004 	int jump = ((shift - 1) >> 6) + 1;
6005 
6006 	if (shift == 0)
6007 		return;
6008 
6009 	for (i = (ipsa->ipsa_replay_wsize - 1) >> 6; i >= 0; i--) {
6010 		if (i + jump <= (ipsa->ipsa_replay_wsize - 1) >> 6) {
6011 			ipsa->ipsa_replay_arr[i + jump] |=
6012 			    ipsa->ipsa_replay_arr[i] >> (64 - (shift & 63));
6013 		}
6014 		ipsa->ipsa_replay_arr[i] <<= shift;
6015 	}
6016 }
6017 
6018 /*
6019  * Set a bit in the bit vector.
6020  */
6021 static void
6022 ipsa_set_replay(ipsa_t *ipsa, uint32_t offset)
6023 {
6024 	uint64_t bit = (uint64_t)1 << (uint64_t)(offset & 63);
6025 
6026 	ipsa->ipsa_replay_arr[offset >> 6] |= bit;
6027 }
6028 
6029 #define	SADB_MAX_REPLAY_VALUE 0xffffffff
6030 
6031 /*
6032  * Assume caller has NOT done ntohl() already on seq.  Check to see
6033  * if replay sequence number "seq" has been seen already.
6034  */
6035 boolean_t
6036 sadb_replay_check(ipsa_t *ipsa, uint32_t seq)
6037 {
6038 	boolean_t rc;
6039 	uint32_t diff;
6040 
6041 	if (ipsa->ipsa_replay_wsize == 0)
6042 		return (B_TRUE);
6043 
6044 	/*
6045 	 * NOTE:  I've already checked for 0 on the wire in sadb_replay_peek().
6046 	 */
6047 
6048 	/* Convert sequence number into host order before holding the mutex. */
6049 	seq = ntohl(seq);
6050 
6051 	mutex_enter(&ipsa->ipsa_lock);
6052 
6053 	/* Initialize inbound SA's ipsa_replay field to last one received. */
6054 	if (ipsa->ipsa_replay == 0)
6055 		ipsa->ipsa_replay = 1;
6056 
6057 	if (seq > ipsa->ipsa_replay) {
6058 		/*
6059 		 * I have received a new "highest value received".  Shift
6060 		 * the replay window over.
6061 		 */
6062 		diff = seq - ipsa->ipsa_replay;
6063 		if (diff < ipsa->ipsa_replay_wsize) {
6064 			/* In replay window, shift bits over. */
6065 			ipsa_shift_replay(ipsa, diff);
6066 		} else {
6067 			/* WAY FAR AHEAD, clear bits and start again. */
6068 			bzero(ipsa->ipsa_replay_arr,
6069 			    sizeof (ipsa->ipsa_replay_arr));
6070 		}
6071 		ipsa_set_replay(ipsa, 0);
6072 		ipsa->ipsa_replay = seq;
6073 		rc = B_TRUE;
6074 		goto done;
6075 	}
6076 	diff = ipsa->ipsa_replay - seq;
6077 	if (diff >= ipsa->ipsa_replay_wsize || ipsa_is_replay_set(ipsa, diff)) {
6078 		rc = B_FALSE;
6079 		goto done;
6080 	}
6081 	/* Set this packet as seen. */
6082 	ipsa_set_replay(ipsa, diff);
6083 
6084 	rc = B_TRUE;
6085 done:
6086 	mutex_exit(&ipsa->ipsa_lock);
6087 	return (rc);
6088 }
6089 
6090 /*
6091  * "Peek" and see if we should even bother going through the effort of
6092  * running an authentication check on the sequence number passed in.
6093  * this takes into account packets that are below the replay window,
6094  * and collisions with already replayed packets.  Return B_TRUE if it
6095  * is okay to proceed, B_FALSE if this packet should be dropped immediately.
6096  * Assume same byte-ordering as sadb_replay_check.
6097  */
6098 boolean_t
6099 sadb_replay_peek(ipsa_t *ipsa, uint32_t seq)
6100 {
6101 	boolean_t rc = B_FALSE;
6102 	uint32_t diff;
6103 
6104 	if (ipsa->ipsa_replay_wsize == 0)
6105 		return (B_TRUE);
6106 
6107 	/*
6108 	 * 0 is 0, regardless of byte order... :)
6109 	 *
6110 	 * If I get 0 on the wire (and there is a replay window) then the
6111 	 * sender most likely wrapped.	This ipsa may need to be marked or
6112 	 * something.
6113 	 */
6114 	if (seq == 0)
6115 		return (B_FALSE);
6116 
6117 	seq = ntohl(seq);
6118 	mutex_enter(&ipsa->ipsa_lock);
6119 	if (seq < ipsa->ipsa_replay - ipsa->ipsa_replay_wsize &&
6120 	    ipsa->ipsa_replay >= ipsa->ipsa_replay_wsize)
6121 		goto done;
6122 
6123 	/*
6124 	 * If I've hit 0xffffffff, then quite honestly, I don't need to
6125 	 * bother with formalities.  I'm not accepting any more packets
6126 	 * on this SA.
6127 	 */
6128 	if (ipsa->ipsa_replay == SADB_MAX_REPLAY_VALUE) {
6129 		/*
6130 		 * Since we're already holding the lock, update the
6131 		 * expire time ala. sadb_replay_delete() and return.
6132 		 */
6133 		ipsa->ipsa_hardexpiretime = (time_t)1;
6134 		goto done;
6135 	}
6136 
6137 	if (seq <= ipsa->ipsa_replay) {
6138 		/*
6139 		 * This seq is in the replay window.  I'm not below it,
6140 		 * because I already checked for that above!
6141 		 */
6142 		diff = ipsa->ipsa_replay - seq;
6143 		if (ipsa_is_replay_set(ipsa, diff))
6144 			goto done;
6145 	}
6146 	/* Else return B_TRUE, I'm going to advance the window. */
6147 
6148 	rc = B_TRUE;
6149 done:
6150 	mutex_exit(&ipsa->ipsa_lock);
6151 	return (rc);
6152 }
6153 
6154 /*
6155  * Delete a single SA.
6156  *
6157  * For now, use the quick-and-dirty trick of making the association's
6158  * hard-expire lifetime (time_t)1, ensuring deletion by the *_ager().
6159  */
6160 void
6161 sadb_replay_delete(ipsa_t *assoc)
6162 {
6163 	mutex_enter(&assoc->ipsa_lock);
6164 	assoc->ipsa_hardexpiretime = (time_t)1;
6165 	mutex_exit(&assoc->ipsa_lock);
6166 }
6167 
6168 /*
6169  * Given a queue that presumably points to IP, send a T_BIND_REQ for _proto_
6170  * down.  The caller will handle the T_BIND_ACK locally.
6171  */
6172 boolean_t
6173 sadb_t_bind_req(queue_t *q, int proto)
6174 {
6175 	struct T_bind_req *tbr;
6176 	mblk_t *mp;
6177 
6178 	mp = allocb(sizeof (struct T_bind_req) + 1, BPRI_HI);
6179 	if (mp == NULL) {
6180 		/* cmn_err(CE_WARN, */
6181 		/* "sadb_t_bind_req(%d): couldn't allocate mblk\n", proto); */
6182 		return (B_FALSE);
6183 	}
6184 	mp->b_datap->db_type = M_PCPROTO;
6185 	tbr = (struct T_bind_req *)mp->b_rptr;
6186 	mp->b_wptr += sizeof (struct T_bind_req);
6187 	tbr->PRIM_type = T_BIND_REQ;
6188 	tbr->ADDR_length = 0;
6189 	tbr->ADDR_offset = 0;
6190 	tbr->CONIND_number = 0;
6191 	*mp->b_wptr = (uint8_t)proto;
6192 	mp->b_wptr++;
6193 
6194 	putnext(q, mp);
6195 	return (B_TRUE);
6196 }
6197 
6198 /*
6199  * Special front-end to ipsec_rl_strlog() dealing with SA failure.
6200  * this is designed to take only a format string with "* %x * %s *", so
6201  * that "spi" is printed first, then "addr" is converted using inet_pton().
6202  *
6203  * This is abstracted out to save the stack space for only when inet_pton()
6204  * is called.  Make sure "spi" is in network order; it usually is when this
6205  * would get called.
6206  */
6207 void
6208 ipsec_assocfailure(short mid, short sid, char level, ushort_t sl, char *fmt,
6209     uint32_t spi, void *addr, int af, netstack_t *ns)
6210 {
6211 	char buf[INET6_ADDRSTRLEN];
6212 
6213 	ASSERT(af == AF_INET6 || af == AF_INET);
6214 
6215 	ipsec_rl_strlog(ns, mid, sid, level, sl, fmt, ntohl(spi),
6216 	    inet_ntop(af, addr, buf, sizeof (buf)));
6217 }
6218 
6219 /*
6220  * Fills in a reference to the policy, if any, from the conn, in *ppp
6221  * Releases a reference to the passed conn_t.
6222  */
6223 static void
6224 ipsec_conn_pol(ipsec_selector_t *sel, conn_t *connp, ipsec_policy_t **ppp)
6225 {
6226 	ipsec_policy_t	*pp;
6227 	ipsec_latch_t	*ipl = connp->conn_latch;
6228 
6229 	if ((ipl != NULL) && (ipl->ipl_out_policy != NULL)) {
6230 		pp = ipl->ipl_out_policy;
6231 		IPPOL_REFHOLD(pp);
6232 	} else {
6233 		pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, sel,
6234 		    connp->conn_netstack);
6235 	}
6236 	*ppp = pp;
6237 	CONN_DEC_REF(connp);
6238 }
6239 
6240 /*
6241  * The following functions scan through active conn_t structures
6242  * and return a reference to the best-matching policy it can find.
6243  * Caller must release the reference.
6244  */
6245 static void
6246 ipsec_udp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst)
6247 {
6248 	connf_t *connfp;
6249 	conn_t *connp = NULL;
6250 	ipsec_selector_t portonly;
6251 
6252 	bzero((void *)&portonly, sizeof (portonly));
6253 
6254 	if (sel->ips_local_port == 0)
6255 		return;
6256 
6257 	connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(sel->ips_local_port,
6258 	    ipst)];
6259 	mutex_enter(&connfp->connf_lock);
6260 
6261 	if (sel->ips_isv4) {
6262 		connp = connfp->connf_head;
6263 		while (connp != NULL) {
6264 			if (IPCL_UDP_MATCH(connp, sel->ips_local_port,
6265 			    sel->ips_local_addr_v4, sel->ips_remote_port,
6266 			    sel->ips_remote_addr_v4))
6267 				break;
6268 			connp = connp->conn_next;
6269 		}
6270 
6271 		if (connp == NULL) {
6272 			/* Try port-only match in IPv6. */
6273 			portonly.ips_local_port = sel->ips_local_port;
6274 			sel = &portonly;
6275 		}
6276 	}
6277 
6278 	if (connp == NULL) {
6279 		connp = connfp->connf_head;
6280 		while (connp != NULL) {
6281 			if (IPCL_UDP_MATCH_V6(connp, sel->ips_local_port,
6282 			    sel->ips_local_addr_v6, sel->ips_remote_port,
6283 			    sel->ips_remote_addr_v6))
6284 				break;
6285 			connp = connp->conn_next;
6286 		}
6287 
6288 		if (connp == NULL) {
6289 			mutex_exit(&connfp->connf_lock);
6290 			return;
6291 		}
6292 	}
6293 
6294 	CONN_INC_REF(connp);
6295 	mutex_exit(&connfp->connf_lock);
6296 
6297 	ipsec_conn_pol(sel, connp, ppp);
6298 }
6299 
6300 static conn_t *
6301 ipsec_find_listen_conn(uint16_t *pptr, ipsec_selector_t *sel, ip_stack_t *ipst)
6302 {
6303 	connf_t *connfp;
6304 	conn_t *connp = NULL;
6305 	const in6_addr_t *v6addrmatch = &sel->ips_local_addr_v6;
6306 
6307 	if (sel->ips_local_port == 0)
6308 		return (NULL);
6309 
6310 	connfp = &ipst->ips_ipcl_bind_fanout[
6311 	    IPCL_BIND_HASH(sel->ips_local_port, ipst)];
6312 	mutex_enter(&connfp->connf_lock);
6313 
6314 	if (sel->ips_isv4) {
6315 		connp = connfp->connf_head;
6316 		while (connp != NULL) {
6317 			if (IPCL_BIND_MATCH(connp, IPPROTO_TCP,
6318 			    sel->ips_local_addr_v4, pptr[1]))
6319 				break;
6320 			connp = connp->conn_next;
6321 		}
6322 
6323 		if (connp == NULL) {
6324 			/* Match to all-zeroes. */
6325 			v6addrmatch = &ipv6_all_zeros;
6326 		}
6327 	}
6328 
6329 	if (connp == NULL) {
6330 		connp = connfp->connf_head;
6331 		while (connp != NULL) {
6332 			if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP,
6333 			    *v6addrmatch, pptr[1]))
6334 				break;
6335 			connp = connp->conn_next;
6336 		}
6337 
6338 		if (connp == NULL) {
6339 			mutex_exit(&connfp->connf_lock);
6340 			return (NULL);
6341 		}
6342 	}
6343 
6344 	CONN_INC_REF(connp);
6345 	mutex_exit(&connfp->connf_lock);
6346 	return (connp);
6347 }
6348 
6349 static void
6350 ipsec_tcp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst)
6351 {
6352 	connf_t 	*connfp;
6353 	conn_t		*connp;
6354 	uint32_t	ports;
6355 	uint16_t	*pptr = (uint16_t *)&ports;
6356 
6357 	/*
6358 	 * Find TCP state in the following order:
6359 	 * 1.) Connected conns.
6360 	 * 2.) Listeners.
6361 	 *
6362 	 * Even though #2 will be the common case for inbound traffic, only
6363 	 * following this order insures correctness.
6364 	 */
6365 
6366 	if (sel->ips_local_port == 0)
6367 		return;
6368 
6369 	/*
6370 	 * 0 should be fport, 1 should be lport.  SRC is the local one here.
6371 	 * See ipsec_construct_inverse_acquire() for details.
6372 	 */
6373 	pptr[0] = sel->ips_remote_port;
6374 	pptr[1] = sel->ips_local_port;
6375 
6376 	connfp = &ipst->ips_ipcl_conn_fanout[
6377 	    IPCL_CONN_HASH(sel->ips_remote_addr_v4, ports, ipst)];
6378 	mutex_enter(&connfp->connf_lock);
6379 	connp = connfp->connf_head;
6380 
6381 	if (sel->ips_isv4) {
6382 		while (connp != NULL) {
6383 			if (IPCL_CONN_MATCH(connp, IPPROTO_TCP,
6384 			    sel->ips_remote_addr_v4, sel->ips_local_addr_v4,
6385 			    ports))
6386 				break;
6387 			connp = connp->conn_next;
6388 		}
6389 	} else {
6390 		while (connp != NULL) {
6391 			if (IPCL_CONN_MATCH_V6(connp, IPPROTO_TCP,
6392 			    sel->ips_remote_addr_v6, sel->ips_local_addr_v6,
6393 			    ports))
6394 				break;
6395 			connp = connp->conn_next;
6396 		}
6397 	}
6398 
6399 	if (connp != NULL) {
6400 		CONN_INC_REF(connp);
6401 		mutex_exit(&connfp->connf_lock);
6402 	} else {
6403 		mutex_exit(&connfp->connf_lock);
6404 
6405 		/* Try the listen hash. */
6406 		if ((connp = ipsec_find_listen_conn(pptr, sel, ipst)) == NULL)
6407 			return;
6408 	}
6409 
6410 	ipsec_conn_pol(sel, connp, ppp);
6411 }
6412 
6413 static void
6414 ipsec_sctp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6415     ip_stack_t *ipst)
6416 {
6417 	conn_t		*connp;
6418 	uint32_t	ports;
6419 	uint16_t	*pptr = (uint16_t *)&ports;
6420 
6421 	/*
6422 	 * Find SCP state in the following order:
6423 	 * 1.) Connected conns.
6424 	 * 2.) Listeners.
6425 	 *
6426 	 * Even though #2 will be the common case for inbound traffic, only
6427 	 * following this order insures correctness.
6428 	 */
6429 
6430 	if (sel->ips_local_port == 0)
6431 		return;
6432 
6433 	/*
6434 	 * 0 should be fport, 1 should be lport.  SRC is the local one here.
6435 	 * See ipsec_construct_inverse_acquire() for details.
6436 	 */
6437 	pptr[0] = sel->ips_remote_port;
6438 	pptr[1] = sel->ips_local_port;
6439 
6440 	if (sel->ips_isv4) {
6441 		in6_addr_t	src, dst;
6442 
6443 		IN6_IPADDR_TO_V4MAPPED(sel->ips_remote_addr_v4, &dst);
6444 		IN6_IPADDR_TO_V4MAPPED(sel->ips_local_addr_v4, &src);
6445 		connp = sctp_find_conn(&dst, &src, ports, ALL_ZONES,
6446 		    ipst->ips_netstack->netstack_sctp);
6447 	} else {
6448 		connp = sctp_find_conn(&sel->ips_remote_addr_v6,
6449 		    &sel->ips_local_addr_v6, ports, ALL_ZONES,
6450 		    ipst->ips_netstack->netstack_sctp);
6451 	}
6452 	if (connp == NULL)
6453 		return;
6454 	ipsec_conn_pol(sel, connp, ppp);
6455 }
6456 
6457 /*
6458  * Fill in a query for the SPD (in "sel") using two PF_KEY address extensions.
6459  * Returns 0 or errno, and always sets *diagnostic to something appropriate
6460  * to PF_KEY.
6461  *
6462  * NOTE:  For right now, this function (and ipsec_selector_t for that matter),
6463  * ignore prefix lengths in the address extension.  Since we match on first-
6464  * entered policies, this shouldn't matter.  Also, since we normalize prefix-
6465  * set addresses to mask out the lower bits, we should get a suitable search
6466  * key for the SPD anyway.  This is the function to change if the assumption
6467  * about suitable search keys is wrong.
6468  */
6469 static int
6470 ipsec_get_inverse_acquire_sel(ipsec_selector_t *sel, sadb_address_t *srcext,
6471     sadb_address_t *dstext, int *diagnostic)
6472 {
6473 	struct sockaddr_in *src, *dst;
6474 	struct sockaddr_in6 *src6, *dst6;
6475 
6476 	*diagnostic = 0;
6477 
6478 	bzero(sel, sizeof (*sel));
6479 	sel->ips_protocol = srcext->sadb_address_proto;
6480 	dst = (struct sockaddr_in *)(dstext + 1);
6481 	if (dst->sin_family == AF_INET6) {
6482 		dst6 = (struct sockaddr_in6 *)dst;
6483 		src6 = (struct sockaddr_in6 *)(srcext + 1);
6484 		if (src6->sin6_family != AF_INET6) {
6485 			*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6486 			return (EINVAL);
6487 		}
6488 		sel->ips_remote_addr_v6 = dst6->sin6_addr;
6489 		sel->ips_local_addr_v6 = src6->sin6_addr;
6490 		if (sel->ips_protocol == IPPROTO_ICMPV6) {
6491 			sel->ips_is_icmp_inv_acq = 1;
6492 		} else {
6493 			sel->ips_remote_port = dst6->sin6_port;
6494 			sel->ips_local_port = src6->sin6_port;
6495 		}
6496 		sel->ips_isv4 = B_FALSE;
6497 	} else {
6498 		src = (struct sockaddr_in *)(srcext + 1);
6499 		if (src->sin_family != AF_INET) {
6500 			*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6501 			return (EINVAL);
6502 		}
6503 		sel->ips_remote_addr_v4 = dst->sin_addr.s_addr;
6504 		sel->ips_local_addr_v4 = src->sin_addr.s_addr;
6505 		if (sel->ips_protocol == IPPROTO_ICMP) {
6506 			sel->ips_is_icmp_inv_acq = 1;
6507 		} else {
6508 			sel->ips_remote_port = dst->sin_port;
6509 			sel->ips_local_port = src->sin_port;
6510 		}
6511 		sel->ips_isv4 = B_TRUE;
6512 	}
6513 	return (0);
6514 }
6515 
6516 /*
6517  * We have encapsulation.
6518  * - Lookup tun_t by address and look for an associated
6519  *   tunnel policy
6520  * - If there are inner selectors
6521  *   - check ITPF_P_TUNNEL and ITPF_P_ACTIVE
6522  *   - Look up tunnel policy based on selectors
6523  * - Else
6524  *   - Sanity check the negotation
6525  *   - If appropriate, fall through to global policy
6526  */
6527 static int
6528 ipsec_tun_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6529     sadb_address_t *innsrcext, sadb_address_t *inndstext, ipsec_tun_pol_t *itp,
6530     int *diagnostic, netstack_t *ns)
6531 {
6532 	int err;
6533 	ipsec_policy_head_t *polhead;
6534 
6535 	/* Check for inner selectors and act appropriately */
6536 
6537 	if (innsrcext != NULL) {
6538 		/* Inner selectors present */
6539 		ASSERT(inndstext != NULL);
6540 		if ((itp == NULL) ||
6541 		    (itp->itp_flags & (ITPF_P_ACTIVE | ITPF_P_TUNNEL)) !=
6542 		    (ITPF_P_ACTIVE | ITPF_P_TUNNEL)) {
6543 			/*
6544 			 * If inner packet selectors, we must have negotiate
6545 			 * tunnel and active policy.  If the tunnel has
6546 			 * transport-mode policy set on it, or has no policy,
6547 			 * fail.
6548 			 */
6549 			return (ENOENT);
6550 		} else {
6551 			/*
6552 			 * Reset "sel" to indicate inner selectors.  Pass
6553 			 * inner PF_KEY address extensions for this to happen.
6554 			 */
6555 			err = ipsec_get_inverse_acquire_sel(sel,
6556 			    innsrcext, inndstext, diagnostic);
6557 			if (err != 0) {
6558 				ITP_REFRELE(itp, ns);
6559 				return (err);
6560 			}
6561 			/*
6562 			 * Now look for a tunnel policy based on those inner
6563 			 * selectors.  (Common code is below.)
6564 			 */
6565 		}
6566 	} else {
6567 		/* No inner selectors present */
6568 		if ((itp == NULL) || !(itp->itp_flags & ITPF_P_ACTIVE)) {
6569 			/*
6570 			 * Transport mode negotiation with no tunnel policy
6571 			 * configured - return to indicate a global policy
6572 			 * check is needed.
6573 			 */
6574 			if (itp != NULL) {
6575 				ITP_REFRELE(itp, ns);
6576 			}
6577 			return (0);
6578 		} else if (itp->itp_flags & ITPF_P_TUNNEL) {
6579 			/* Tunnel mode set with no inner selectors. */
6580 			ITP_REFRELE(itp, ns);
6581 			return (ENOENT);
6582 		}
6583 		/*
6584 		 * Else, this is a tunnel policy configured with ifconfig(1m)
6585 		 * or "negotiate transport" with ipsecconf(1m).  We have an
6586 		 * itp with policy set based on any match, so don't bother
6587 		 * changing fields in "sel".
6588 		 */
6589 	}
6590 
6591 	ASSERT(itp != NULL);
6592 	polhead = itp->itp_policy;
6593 	ASSERT(polhead != NULL);
6594 	rw_enter(&polhead->iph_lock, RW_READER);
6595 	*ppp = ipsec_find_policy_head(NULL, polhead,
6596 	    IPSEC_TYPE_INBOUND, sel, ns);
6597 	rw_exit(&polhead->iph_lock);
6598 	ITP_REFRELE(itp, ns);
6599 
6600 	/*
6601 	 * Don't default to global if we didn't find a matching policy entry.
6602 	 * Instead, send ENOENT, just like if we hit a transport-mode tunnel.
6603 	 */
6604 	if (*ppp == NULL)
6605 		return (ENOENT);
6606 
6607 	return (0);
6608 }
6609 
6610 static void
6611 ipsec_oth_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6612     ip_stack_t *ipst)
6613 {
6614 	boolean_t	isv4 = sel->ips_isv4;
6615 	connf_t		*connfp;
6616 	conn_t		*connp;
6617 
6618 	if (isv4) {
6619 		connfp = &ipst->ips_ipcl_proto_fanout[sel->ips_protocol];
6620 	} else {
6621 		connfp = &ipst->ips_ipcl_proto_fanout_v6[sel->ips_protocol];
6622 	}
6623 
6624 	mutex_enter(&connfp->connf_lock);
6625 	for (connp = connfp->connf_head; connp != NULL;
6626 	    connp = connp->conn_next) {
6627 		if (!((isv4 && !((connp->conn_src == 0 ||
6628 		    connp->conn_src == sel->ips_local_addr_v4) &&
6629 		    (connp->conn_rem == 0 ||
6630 		    connp->conn_rem == sel->ips_remote_addr_v4))) ||
6631 		    (!isv4 && !((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) ||
6632 		    IN6_ARE_ADDR_EQUAL(&connp->conn_srcv6,
6633 		    &sel->ips_local_addr_v6)) &&
6634 		    (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6) ||
6635 		    IN6_ARE_ADDR_EQUAL(&connp->conn_remv6,
6636 		    &sel->ips_remote_addr_v6)))))) {
6637 			break;
6638 		}
6639 	}
6640 	if (connp == NULL) {
6641 		mutex_exit(&connfp->connf_lock);
6642 		return;
6643 	}
6644 
6645 	CONN_INC_REF(connp);
6646 	mutex_exit(&connfp->connf_lock);
6647 
6648 	ipsec_conn_pol(sel, connp, ppp);
6649 }
6650 
6651 /*
6652  * Construct an inverse ACQUIRE reply based on:
6653  *
6654  * 1.) Current global policy.
6655  * 2.) An conn_t match depending on what all was passed in the extv[].
6656  * 3.) A tunnel's policy head.
6657  * ...
6658  * N.) Other stuff TBD (e.g. identities)
6659  *
6660  * If there is an error, set sadb_msg_errno and sadb_x_msg_diagnostic
6661  * in this function so the caller can extract them where appropriately.
6662  *
6663  * The SRC address is the local one - just like an outbound ACQUIRE message.
6664  */
6665 mblk_t *
6666 ipsec_construct_inverse_acquire(sadb_msg_t *samsg, sadb_ext_t *extv[],
6667     netstack_t *ns)
6668 {
6669 	int err;
6670 	int diagnostic;
6671 	sadb_address_t *srcext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_SRC],
6672 	    *dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST],
6673 	    *innsrcext = (sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_SRC],
6674 	    *inndstext = (sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_DST];
6675 	struct sockaddr_in6 *src, *dst;
6676 	struct sockaddr_in6 *isrc, *idst;
6677 	ipsec_tun_pol_t *itp = NULL;
6678 	ipsec_policy_t *pp = NULL;
6679 	ipsec_selector_t sel, isel;
6680 	mblk_t *retmp;
6681 	ip_stack_t	*ipst = ns->netstack_ip;
6682 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
6683 
6684 	/* Normalize addresses */
6685 	if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)srcext, 0, ns)
6686 	    == KS_IN_ADDR_UNKNOWN) {
6687 		err = EINVAL;
6688 		diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC;
6689 		goto bail;
6690 	}
6691 	src = (struct sockaddr_in6 *)(srcext + 1);
6692 	if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)dstext, 0, ns)
6693 	    == KS_IN_ADDR_UNKNOWN) {
6694 		err = EINVAL;
6695 		diagnostic = SADB_X_DIAGNOSTIC_BAD_DST;
6696 		goto bail;
6697 	}
6698 	dst = (struct sockaddr_in6 *)(dstext + 1);
6699 	if (src->sin6_family != dst->sin6_family) {
6700 		err = EINVAL;
6701 		diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6702 		goto bail;
6703 	}
6704 
6705 	/* Check for tunnel mode and act appropriately */
6706 	if (innsrcext != NULL) {
6707 		if (inndstext == NULL) {
6708 			err = EINVAL;
6709 			diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST;
6710 			goto bail;
6711 		}
6712 		if (sadb_addrcheck(NULL, (mblk_t *)samsg,
6713 		    (sadb_ext_t *)innsrcext, 0, ns) == KS_IN_ADDR_UNKNOWN) {
6714 			err = EINVAL;
6715 			diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_SRC;
6716 			goto bail;
6717 		}
6718 		isrc = (struct sockaddr_in6 *)(innsrcext + 1);
6719 		if (sadb_addrcheck(NULL, (mblk_t *)samsg,
6720 		    (sadb_ext_t *)inndstext, 0, ns) == KS_IN_ADDR_UNKNOWN) {
6721 			err = EINVAL;
6722 			diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_DST;
6723 			goto bail;
6724 		}
6725 		idst = (struct sockaddr_in6 *)(inndstext + 1);
6726 		if (isrc->sin6_family != idst->sin6_family) {
6727 			err = EINVAL;
6728 			diagnostic = SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH;
6729 			goto bail;
6730 		}
6731 		if (isrc->sin6_family != AF_INET &&
6732 		    isrc->sin6_family != AF_INET6) {
6733 			err = EINVAL;
6734 			diagnostic = SADB_X_DIAGNOSTIC_BAD_INNER_SRC_AF;
6735 			goto bail;
6736 		}
6737 	} else if (inndstext != NULL) {
6738 		err = EINVAL;
6739 		diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
6740 		goto bail;
6741 	}
6742 
6743 	/* Get selectors first, based on outer addresses */
6744 	err = ipsec_get_inverse_acquire_sel(&sel, srcext, dstext, &diagnostic);
6745 	if (err != 0)
6746 		goto bail;
6747 
6748 	/* Check for tunnel mode mismatches. */
6749 	if (innsrcext != NULL &&
6750 	    ((isrc->sin6_family == AF_INET &&
6751 	    sel.ips_protocol != IPPROTO_ENCAP && sel.ips_protocol != 0) ||
6752 	    (isrc->sin6_family == AF_INET6 &&
6753 	    sel.ips_protocol != IPPROTO_IPV6 && sel.ips_protocol != 0))) {
6754 		err = EPROTOTYPE;
6755 		goto bail;
6756 	}
6757 
6758 	/*
6759 	 * Okay, we have the addresses and other selector information.
6760 	 * Let's first find a conn...
6761 	 */
6762 	pp = NULL;
6763 	switch (sel.ips_protocol) {
6764 	case IPPROTO_TCP:
6765 		ipsec_tcp_pol(&sel, &pp, ipst);
6766 		break;
6767 	case IPPROTO_UDP:
6768 		ipsec_udp_pol(&sel, &pp, ipst);
6769 		break;
6770 	case IPPROTO_SCTP:
6771 		ipsec_sctp_pol(&sel, &pp, ipst);
6772 		break;
6773 	case IPPROTO_ENCAP:
6774 	case IPPROTO_IPV6:
6775 		rw_enter(&ipss->ipsec_itp_get_byaddr_rw_lock, RW_READER);
6776 		/*
6777 		 * Assume sel.ips_remote_addr_* has the right address at
6778 		 * that exact position.
6779 		 */
6780 		itp = ipss->ipsec_itp_get_byaddr(
6781 		    (uint32_t *)(&sel.ips_local_addr_v6),
6782 		    (uint32_t *)(&sel.ips_remote_addr_v6),
6783 		    src->sin6_family, ns);
6784 		rw_exit(&ipss->ipsec_itp_get_byaddr_rw_lock);
6785 		if (innsrcext == NULL) {
6786 			/*
6787 			 * Transport-mode tunnel, make sure we fake out isel
6788 			 * to contain something based on the outer protocol.
6789 			 */
6790 			bzero(&isel, sizeof (isel));
6791 			isel.ips_isv4 = (sel.ips_protocol == IPPROTO_ENCAP);
6792 		} /* Else isel is initialized by ipsec_tun_pol(). */
6793 		err = ipsec_tun_pol(&isel, &pp, innsrcext, inndstext, itp,
6794 		    &diagnostic, ns);
6795 		/*
6796 		 * NOTE:  isel isn't used for now, but in RFC 430x IPsec, it
6797 		 * may be.
6798 		 */
6799 		if (err != 0)
6800 			goto bail;
6801 		break;
6802 	default:
6803 		ipsec_oth_pol(&sel, &pp, ipst);
6804 		break;
6805 	}
6806 
6807 	/*
6808 	 * If we didn't find a matching conn_t or other policy head, take a
6809 	 * look in the global policy.
6810 	 */
6811 	if (pp == NULL) {
6812 		pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, NULL, NULL, &sel,
6813 		    ns);
6814 		if (pp == NULL) {
6815 			/* There's no global policy. */
6816 			err = ENOENT;
6817 			diagnostic = 0;
6818 			goto bail;
6819 		}
6820 	}
6821 
6822 	/*
6823 	 * Now that we have a policy entry/widget, construct an ACQUIRE
6824 	 * message based on that, fix fields where appropriate,
6825 	 * and return the message.
6826 	 */
6827 	retmp = sadb_extended_acquire(&sel, pp, NULL,
6828 	    (itp != NULL && (itp->itp_flags & ITPF_P_TUNNEL)),
6829 	    samsg->sadb_msg_seq, samsg->sadb_msg_pid, ns);
6830 	if (pp != NULL) {
6831 		IPPOL_REFRELE(pp, ns);
6832 	}
6833 	if (retmp != NULL) {
6834 		return (retmp);
6835 	} else {
6836 		err = ENOMEM;
6837 		diagnostic = 0;
6838 	}
6839 bail:
6840 	samsg->sadb_msg_errno = (uint8_t)err;
6841 	samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
6842 	return (NULL);
6843 }
6844 
6845 /*
6846  * ipsa_lpkt is a one-element queue, only manipulated by the next two
6847  * functions.  They have to hold the ipsa_lock because of potential races
6848  * between key management using SADB_UPDATE, and inbound packets that may
6849  * queue up on the larval SA (hence the 'l' in "lpkt").
6850  */
6851 
6852 /*
6853  * sadb_set_lpkt: Return TRUE if we can swap in a value to ipsa->ipsa_lpkt and
6854  * freemsg the previous value.  Return FALSE if we lost the race and the SA is
6855  * in a non-LARVAL state.  free clue: ip_drop_packet(NULL) is safe.
6856  */
6857 boolean_t
6858 sadb_set_lpkt(ipsa_t *ipsa, mblk_t *npkt, netstack_t *ns)
6859 {
6860 	mblk_t *opkt;
6861 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
6862 	boolean_t is_larval;
6863 
6864 	/*
6865 	 * Check the packet's netstack id in case we go asynch with a
6866 	 * taskq_dispatch.
6867 	 */
6868 	ASSERT(((ipsec_in_t *)npkt->b_rptr)->ipsec_in_type == IPSEC_IN);
6869 	ASSERT(((ipsec_in_t *)npkt->b_rptr)->ipsec_in_stackid ==
6870 	    ns->netstack_stackid);
6871 
6872 	mutex_enter(&ipsa->ipsa_lock);
6873 	is_larval = (ipsa->ipsa_state == IPSA_STATE_LARVAL);
6874 	if (is_larval) {
6875 		opkt = ipsa->ipsa_lpkt;
6876 		ipsa->ipsa_lpkt = npkt;
6877 	} else {
6878 		/* We lost the race. */
6879 		opkt = NULL;
6880 		ASSERT(ipsa->ipsa_lpkt == NULL);
6881 	}
6882 	mutex_exit(&ipsa->ipsa_lock);
6883 
6884 	ip_drop_packet(opkt, B_TRUE, NULL, NULL,
6885 	    DROPPER(ipss, ipds_sadb_inlarval_replace),
6886 	    &ipss->ipsec_sadb_dropper);
6887 	return (is_larval);
6888 }
6889 
6890 /*
6891  * sadb_clear_lpkt: Atomically clear ipsa->ipsa_lpkt and return the
6892  * previous value.
6893  */
6894 mblk_t *
6895 sadb_clear_lpkt(ipsa_t *ipsa)
6896 {
6897 	mblk_t *opkt;
6898 
6899 	mutex_enter(&ipsa->ipsa_lock);
6900 	opkt = ipsa->ipsa_lpkt;
6901 	ipsa->ipsa_lpkt = NULL;
6902 	mutex_exit(&ipsa->ipsa_lock);
6903 
6904 	return (opkt);
6905 }
6906 
6907 /*
6908  * Buffer a packet that's in IDLE state as set by Solaris Clustering.
6909  */
6910 void
6911 sadb_buf_pkt(ipsa_t *ipsa, mblk_t *bpkt, netstack_t *ns)
6912 {
6913 	ipsec_stack_t   *ipss = ns->netstack_ipsec;
6914 	extern void (*cl_inet_idlesa)(netstackid_t, uint8_t, uint32_t,
6915 	    sa_family_t, in6_addr_t, in6_addr_t, void *);
6916 	in6_addr_t *srcaddr = (in6_addr_t *)(&ipsa->ipsa_srcaddr);
6917 	in6_addr_t *dstaddr = (in6_addr_t *)(&ipsa->ipsa_dstaddr);
6918 
6919 	ASSERT(ipsa->ipsa_state == IPSA_STATE_IDLE);
6920 
6921 	if (cl_inet_idlesa == NULL) {
6922 		ip_drop_packet(bpkt, B_TRUE, NULL, NULL,
6923 		    DROPPER(ipss, ipds_sadb_inidle_overflow),
6924 		    &ipss->ipsec_sadb_dropper);
6925 		return;
6926 	}
6927 
6928 	cl_inet_idlesa(ns->netstack_stackid,
6929 	    (ipsa->ipsa_type == SADB_SATYPE_AH) ? IPPROTO_AH : IPPROTO_ESP,
6930 	    ipsa->ipsa_spi, ipsa->ipsa_addrfam, *srcaddr, *dstaddr, NULL);
6931 
6932 	/*
6933 	 * Check the packet's netstack id in case we go asynch with a
6934 	 * taskq_dispatch.
6935 	 */
6936 	ASSERT(((ipsec_in_t *)bpkt->b_rptr)->ipsec_in_type == IPSEC_IN);
6937 	ASSERT(((ipsec_in_t *)bpkt->b_rptr)->ipsec_in_stackid ==
6938 	    ns->netstack_stackid);
6939 
6940 	mutex_enter(&ipsa->ipsa_lock);
6941 	ipsa->ipsa_mblkcnt++;
6942 	if (ipsa->ipsa_bpkt_head == NULL) {
6943 		ipsa->ipsa_bpkt_head = ipsa->ipsa_bpkt_tail = bpkt;
6944 	} else {
6945 		ipsa->ipsa_bpkt_tail->b_next = bpkt;
6946 		ipsa->ipsa_bpkt_tail = bpkt;
6947 		if (ipsa->ipsa_mblkcnt > SADB_MAX_IDLEPKTS) {
6948 			mblk_t *tmp;
6949 			tmp = ipsa->ipsa_bpkt_head;
6950 			ipsa->ipsa_bpkt_head = ipsa->ipsa_bpkt_head->b_next;
6951 			ip_drop_packet(tmp, B_TRUE, NULL, NULL,
6952 			    DROPPER(ipss, ipds_sadb_inidle_overflow),
6953 			    &ipss->ipsec_sadb_dropper);
6954 			ipsa->ipsa_mblkcnt --;
6955 		}
6956 	}
6957 	mutex_exit(&ipsa->ipsa_lock);
6958 
6959 }
6960 
6961 /*
6962  * Stub function that taskq_dispatch() invokes to take the mblk (in arg)
6963  * and put into STREAMS again.
6964  */
6965 void
6966 sadb_clear_buf_pkt(void *ipkt)
6967 {
6968 	mblk_t	*tmp, *buf_pkt;
6969 	netstack_t *ns;
6970 	ipsec_in_t *ii;
6971 
6972 	buf_pkt = (mblk_t *)ipkt;
6973 
6974 	ii = (ipsec_in_t *)buf_pkt->b_rptr;
6975 	ASSERT(ii->ipsec_in_type == IPSEC_IN);
6976 	ns = netstack_find_by_stackid(ii->ipsec_in_stackid);
6977 	if (ns != NULL && ns != ii->ipsec_in_ns) {
6978 		netstack_rele(ns);
6979 		ns = NULL;  /* For while-loop below. */
6980 	}
6981 
6982 	while (buf_pkt != NULL) {
6983 		tmp = buf_pkt->b_next;
6984 		buf_pkt->b_next = NULL;
6985 		if (ns != NULL)
6986 			ip_fanout_proto_again(buf_pkt, NULL, NULL, NULL);
6987 		else
6988 			freemsg(buf_pkt);
6989 		buf_pkt = tmp;
6990 	}
6991 	if (ns != NULL)
6992 		netstack_rele(ns);
6993 }
6994 /*
6995  * Walker callback used by sadb_alg_update() to free/create crypto
6996  * context template when a crypto software provider is removed or
6997  * added.
6998  */
6999 
7000 struct sadb_update_alg_state {
7001 	ipsec_algtype_t alg_type;
7002 	uint8_t alg_id;
7003 	boolean_t is_added;
7004 };
7005 
7006 static void
7007 sadb_alg_update_cb(isaf_t *head, ipsa_t *entry, void *cookie)
7008 {
7009 	struct sadb_update_alg_state *update_state =
7010 	    (struct sadb_update_alg_state *)cookie;
7011 	crypto_ctx_template_t *ctx_tmpl = NULL;
7012 
7013 	ASSERT(MUTEX_HELD(&head->isaf_lock));
7014 
7015 	if (entry->ipsa_state == IPSA_STATE_LARVAL)
7016 		return;
7017 
7018 	mutex_enter(&entry->ipsa_lock);
7019 
7020 	switch (update_state->alg_type) {
7021 	case IPSEC_ALG_AUTH:
7022 		if (entry->ipsa_auth_alg == update_state->alg_id)
7023 			ctx_tmpl = &entry->ipsa_authtmpl;
7024 		break;
7025 	case IPSEC_ALG_ENCR:
7026 		if (entry->ipsa_encr_alg == update_state->alg_id)
7027 			ctx_tmpl = &entry->ipsa_encrtmpl;
7028 		break;
7029 	default:
7030 		ctx_tmpl = NULL;
7031 	}
7032 
7033 	if (ctx_tmpl == NULL) {
7034 		mutex_exit(&entry->ipsa_lock);
7035 		return;
7036 	}
7037 
7038 	/*
7039 	 * The context template of the SA may be affected by the change
7040 	 * of crypto provider.
7041 	 */
7042 	if (update_state->is_added) {
7043 		/* create the context template if not already done */
7044 		if (*ctx_tmpl == NULL) {
7045 			(void) ipsec_create_ctx_tmpl(entry,
7046 			    update_state->alg_type);
7047 		}
7048 	} else {
7049 		/*
7050 		 * The crypto provider was removed. If the context template
7051 		 * exists but it is no longer valid, free it.
7052 		 */
7053 		if (*ctx_tmpl != NULL)
7054 			ipsec_destroy_ctx_tmpl(entry, update_state->alg_type);
7055 	}
7056 
7057 	mutex_exit(&entry->ipsa_lock);
7058 }
7059 
7060 /*
7061  * Invoked by IP when an software crypto provider has been updated.
7062  * The type and id of the corresponding algorithm is passed as argument.
7063  * is_added is B_TRUE if the provider was added, B_FALSE if it was
7064  * removed. The function updates the SADB and free/creates the
7065  * context templates associated with SAs if needed.
7066  */
7067 
7068 #define	SADB_ALG_UPDATE_WALK(sadb, table) \
7069     sadb_walker((sadb).table, (sadb).sdb_hashsize, sadb_alg_update_cb, \
7070 	&update_state)
7071 
7072 void
7073 sadb_alg_update(ipsec_algtype_t alg_type, uint8_t alg_id, boolean_t is_added,
7074     netstack_t *ns)
7075 {
7076 	struct sadb_update_alg_state update_state;
7077 	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
7078 	ipsecesp_stack_t	*espstack = ns->netstack_ipsecesp;
7079 
7080 	update_state.alg_type = alg_type;
7081 	update_state.alg_id = alg_id;
7082 	update_state.is_added = is_added;
7083 
7084 	if (alg_type == IPSEC_ALG_AUTH) {
7085 		/* walk the AH tables only for auth. algorithm changes */
7086 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v4, sdb_of);
7087 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v4, sdb_if);
7088 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v6, sdb_of);
7089 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v6, sdb_if);
7090 	}
7091 
7092 	/* walk the ESP tables */
7093 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v4, sdb_of);
7094 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v4, sdb_if);
7095 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v6, sdb_of);
7096 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v6, sdb_if);
7097 }
7098 
7099 /*
7100  * Creates a context template for the specified SA. This function
7101  * is called when an SA is created and when a context template needs
7102  * to be created due to a change of software provider.
7103  */
7104 int
7105 ipsec_create_ctx_tmpl(ipsa_t *sa, ipsec_algtype_t alg_type)
7106 {
7107 	ipsec_alginfo_t *alg;
7108 	crypto_mechanism_t mech;
7109 	crypto_key_t *key;
7110 	crypto_ctx_template_t *sa_tmpl;
7111 	int rv;
7112 	ipsec_stack_t	*ipss = sa->ipsa_netstack->netstack_ipsec;
7113 
7114 	ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock));
7115 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
7116 
7117 	/* get pointers to the algorithm info, context template, and key */
7118 	switch (alg_type) {
7119 	case IPSEC_ALG_AUTH:
7120 		key = &sa->ipsa_kcfauthkey;
7121 		sa_tmpl = &sa->ipsa_authtmpl;
7122 		alg = ipss->ipsec_alglists[alg_type][sa->ipsa_auth_alg];
7123 		break;
7124 	case IPSEC_ALG_ENCR:
7125 		key = &sa->ipsa_kcfencrkey;
7126 		sa_tmpl = &sa->ipsa_encrtmpl;
7127 		alg = ipss->ipsec_alglists[alg_type][sa->ipsa_encr_alg];
7128 		break;
7129 	default:
7130 		alg = NULL;
7131 	}
7132 
7133 	if (alg == NULL || !ALG_VALID(alg))
7134 		return (EINVAL);
7135 
7136 	/* initialize the mech info structure for the framework */
7137 	ASSERT(alg->alg_mech_type != CRYPTO_MECHANISM_INVALID);
7138 	mech.cm_type = alg->alg_mech_type;
7139 	mech.cm_param = NULL;
7140 	mech.cm_param_len = 0;
7141 
7142 	/* create a new context template */
7143 	rv = crypto_create_ctx_template(&mech, key, sa_tmpl, KM_NOSLEEP);
7144 
7145 	/*
7146 	 * CRYPTO_MECH_NOT_SUPPORTED can be returned if only hardware
7147 	 * providers are available for that mechanism. In that case
7148 	 * we don't fail, and will generate the context template from
7149 	 * the framework callback when a software provider for that
7150 	 * mechanism registers.
7151 	 *
7152 	 * The context template is assigned the special value
7153 	 * IPSEC_CTX_TMPL_ALLOC if the allocation failed due to a
7154 	 * lack of memory. No attempt will be made to use
7155 	 * the context template if it is set to this value.
7156 	 */
7157 	if (rv == CRYPTO_HOST_MEMORY) {
7158 		*sa_tmpl = IPSEC_CTX_TMPL_ALLOC;
7159 	} else if (rv != CRYPTO_SUCCESS) {
7160 		*sa_tmpl = NULL;
7161 		if (rv != CRYPTO_MECH_NOT_SUPPORTED)
7162 			return (EINVAL);
7163 	}
7164 
7165 	return (0);
7166 }
7167 
7168 /*
7169  * Destroy the context template of the specified algorithm type
7170  * of the specified SA. Must be called while holding the SA lock.
7171  */
7172 void
7173 ipsec_destroy_ctx_tmpl(ipsa_t *sa, ipsec_algtype_t alg_type)
7174 {
7175 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
7176 
7177 	if (alg_type == IPSEC_ALG_AUTH) {
7178 		if (sa->ipsa_authtmpl == IPSEC_CTX_TMPL_ALLOC)
7179 			sa->ipsa_authtmpl = NULL;
7180 		else if (sa->ipsa_authtmpl != NULL) {
7181 			crypto_destroy_ctx_template(sa->ipsa_authtmpl);
7182 			sa->ipsa_authtmpl = NULL;
7183 		}
7184 	} else {
7185 		ASSERT(alg_type == IPSEC_ALG_ENCR);
7186 		if (sa->ipsa_encrtmpl == IPSEC_CTX_TMPL_ALLOC)
7187 			sa->ipsa_encrtmpl = NULL;
7188 		else if (sa->ipsa_encrtmpl != NULL) {
7189 			crypto_destroy_ctx_template(sa->ipsa_encrtmpl);
7190 			sa->ipsa_encrtmpl = NULL;
7191 		}
7192 	}
7193 }
7194 
7195 /*
7196  * Use the kernel crypto framework to check the validity of a key received
7197  * via keysock. Returns 0 if the key is OK, -1 otherwise.
7198  */
7199 int
7200 ipsec_check_key(crypto_mech_type_t mech_type, sadb_key_t *sadb_key,
7201     boolean_t is_auth, int *diag)
7202 {
7203 	crypto_mechanism_t mech;
7204 	crypto_key_t crypto_key;
7205 	int crypto_rc;
7206 
7207 	mech.cm_type = mech_type;
7208 	mech.cm_param = NULL;
7209 	mech.cm_param_len = 0;
7210 
7211 	crypto_key.ck_format = CRYPTO_KEY_RAW;
7212 	crypto_key.ck_data = sadb_key + 1;
7213 	crypto_key.ck_length = sadb_key->sadb_key_bits;
7214 
7215 	crypto_rc = crypto_key_check(&mech, &crypto_key);
7216 
7217 	switch (crypto_rc) {
7218 	case CRYPTO_SUCCESS:
7219 		return (0);
7220 	case CRYPTO_MECHANISM_INVALID:
7221 	case CRYPTO_MECH_NOT_SUPPORTED:
7222 		*diag = is_auth ? SADB_X_DIAGNOSTIC_BAD_AALG :
7223 		    SADB_X_DIAGNOSTIC_BAD_EALG;
7224 		break;
7225 	case CRYPTO_KEY_SIZE_RANGE:
7226 		*diag = is_auth ? SADB_X_DIAGNOSTIC_BAD_AKEYBITS :
7227 		    SADB_X_DIAGNOSTIC_BAD_EKEYBITS;
7228 		break;
7229 	case CRYPTO_WEAK_KEY:
7230 		*diag = is_auth ? SADB_X_DIAGNOSTIC_WEAK_AKEY :
7231 		    SADB_X_DIAGNOSTIC_WEAK_EKEY;
7232 		break;
7233 	}
7234 
7235 	return (-1);
7236 }
7237 /*
7238  * If this is an outgoing SA then add some fuzz to the
7239  * SOFT EXPIRE time. The reason for this is to stop
7240  * peers trying to renegotiate SOFT expiring SA's at
7241  * the same time. The amount of fuzz needs to be at
7242  * least 10 seconds which is the typical interval
7243  * sadb_ager(), although this is only a guide as it
7244  * selftunes.
7245  */
7246 void
7247 lifetime_fuzz(ipsa_t *assoc)
7248 {
7249 	uint8_t rnd;
7250 
7251 	if (assoc->ipsa_softaddlt == 0)
7252 		return;
7253 
7254 	(void) random_get_pseudo_bytes(&rnd, sizeof (rnd));
7255 	rnd = (rnd & 0xF) + 10;
7256 	assoc->ipsa_softexpiretime -= rnd;
7257 	assoc->ipsa_softaddlt -= rnd;
7258 }
7259 void
7260 destroy_ipsa_pair(ipsap_t *ipsapp)
7261 {
7262 	if (ipsapp == NULL)
7263 		return;
7264 
7265 	/*
7266 	 * Because of the multi-line macro nature of IPSA_REFRELE, keep
7267 	 * them in { }.
7268 	 */
7269 	if (ipsapp->ipsap_sa_ptr != NULL) {
7270 		IPSA_REFRELE(ipsapp->ipsap_sa_ptr);
7271 	}
7272 	if (ipsapp->ipsap_psa_ptr != NULL) {
7273 		IPSA_REFRELE(ipsapp->ipsap_psa_ptr);
7274 	}
7275 
7276 	kmem_free(ipsapp, sizeof (*ipsapp));
7277 }
7278 
7279 /*
7280  * The sadb_ager() function walks through the hash tables of SA's and ages
7281  * them, if the SA expires as a result, its marked as DEAD and will be reaped
7282  * the next time sadb_ager() runs. SA's which are paired or have a peer (same
7283  * SA appears in both the inbound and outbound tables because its not possible
7284  * to determine its direction) are placed on a list when they expire. This is
7285  * to ensure that pair/peer SA's are reaped at the same time, even if they
7286  * expire at different times.
7287  *
7288  * This function is called twice by sadb_ager(), one after processing the
7289  * inbound table, then again after processing the outbound table.
7290  */
7291 void
7292 age_pair_peer_list(templist_t *haspeerlist, sadb_t *sp, boolean_t outbound)
7293 {
7294 	templist_t *listptr;
7295 	int outhash;
7296 	isaf_t *bucket;
7297 	boolean_t haspeer;
7298 	ipsa_t *peer_assoc, *dying;
7299 	/*
7300 	 * Haspeer cases will contain both IPv4 and IPv6.  This code
7301 	 * is address independent.
7302 	 */
7303 	while (haspeerlist != NULL) {
7304 		/* "dying" contains the SA that has a peer. */
7305 		dying = haspeerlist->ipsa;
7306 		haspeer = (dying->ipsa_haspeer);
7307 		listptr = haspeerlist;
7308 		haspeerlist = listptr->next;
7309 		kmem_free(listptr, sizeof (*listptr));
7310 		/*
7311 		 * Pick peer bucket based on addrfam.
7312 		 */
7313 		if (outbound) {
7314 			if (haspeer)
7315 				bucket = INBOUND_BUCKET(sp, dying->ipsa_spi);
7316 			else
7317 				bucket = INBOUND_BUCKET(sp,
7318 				    dying->ipsa_otherspi);
7319 		} else { /* inbound */
7320 			if (haspeer) {
7321 				if (dying->ipsa_addrfam == AF_INET6) {
7322 					outhash = OUTBOUND_HASH_V6(sp,
7323 					    *((in6_addr_t *)&dying->
7324 					    ipsa_dstaddr));
7325 				} else {
7326 					outhash = OUTBOUND_HASH_V4(sp,
7327 					    *((ipaddr_t *)&dying->
7328 					    ipsa_dstaddr));
7329 				}
7330 			} else if (dying->ipsa_addrfam == AF_INET6) {
7331 				outhash = OUTBOUND_HASH_V6(sp,
7332 				    *((in6_addr_t *)&dying->
7333 				    ipsa_srcaddr));
7334 			} else {
7335 				outhash = OUTBOUND_HASH_V4(sp,
7336 				    *((ipaddr_t *)&dying->
7337 				    ipsa_srcaddr));
7338 			}
7339 		bucket = &(sp->sdb_of[outhash]);
7340 		}
7341 
7342 		mutex_enter(&bucket->isaf_lock);
7343 		/*
7344 		 * "haspeer" SA's have the same src/dst address ordering,
7345 		 * "paired" SA's have the src/dst addresses reversed.
7346 		 */
7347 		if (haspeer) {
7348 			peer_assoc = ipsec_getassocbyspi(bucket,
7349 			    dying->ipsa_spi, dying->ipsa_srcaddr,
7350 			    dying->ipsa_dstaddr, dying->ipsa_addrfam);
7351 		} else {
7352 			peer_assoc = ipsec_getassocbyspi(bucket,
7353 			    dying->ipsa_otherspi, dying->ipsa_dstaddr,
7354 			    dying->ipsa_srcaddr, dying->ipsa_addrfam);
7355 		}
7356 
7357 		mutex_exit(&bucket->isaf_lock);
7358 		if (peer_assoc != NULL) {
7359 			mutex_enter(&peer_assoc->ipsa_lock);
7360 			mutex_enter(&dying->ipsa_lock);
7361 			if (!haspeer) {
7362 				/*
7363 				 * Only SA's which have a "peer" or are
7364 				 * "paired" end up on this list, so this
7365 				 * must be a "paired" SA, update the flags
7366 				 * to break the pair.
7367 				 */
7368 				peer_assoc->ipsa_otherspi = 0;
7369 				peer_assoc->ipsa_flags &= ~IPSA_F_PAIRED;
7370 				dying->ipsa_otherspi = 0;
7371 				dying->ipsa_flags &= ~IPSA_F_PAIRED;
7372 			}
7373 			if (haspeer || outbound) {
7374 				/*
7375 				 * Update the state of the "inbound" SA when
7376 				 * the "outbound" SA has expired. Don't update
7377 				 * the "outbound" SA when the "inbound" SA
7378 				 * SA expires because setting the hard_addtime
7379 				 * below will cause this to happen.
7380 				 */
7381 				peer_assoc->ipsa_state = dying->ipsa_state;
7382 			}
7383 			if (dying->ipsa_state == IPSA_STATE_DEAD)
7384 				peer_assoc->ipsa_hardexpiretime = 1;
7385 
7386 			mutex_exit(&dying->ipsa_lock);
7387 			mutex_exit(&peer_assoc->ipsa_lock);
7388 			IPSA_REFRELE(peer_assoc);
7389 		}
7390 		IPSA_REFRELE(dying);
7391 	}
7392 }
7393