xref: /titanic_41/usr/src/uts/common/inet/ip/sadb.c (revision 09cb82ca24006b806e9f17e2135eef96364facfe)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/stream.h>
30 #include <sys/stropts.h>
31 #include <sys/errno.h>
32 #include <sys/ddi.h>
33 #include <sys/debug.h>
34 #include <sys/cmn_err.h>
35 #include <sys/stream.h>
36 #include <sys/strlog.h>
37 #include <sys/kmem.h>
38 #include <sys/sunddi.h>
39 #include <sys/tihdr.h>
40 #include <sys/atomic.h>
41 #include <sys/socket.h>
42 #include <sys/sysmacros.h>
43 #include <sys/crypto/common.h>
44 #include <sys/crypto/api.h>
45 #include <sys/zone.h>
46 #include <netinet/in.h>
47 #include <net/if.h>
48 #include <net/pfkeyv2.h>
49 #include <inet/common.h>
50 #include <netinet/ip6.h>
51 #include <inet/ip.h>
52 #include <inet/ip_ire.h>
53 #include <inet/ip6.h>
54 #include <inet/ipsec_info.h>
55 #include <inet/tcp.h>
56 #include <inet/sadb.h>
57 #include <inet/ipsec_impl.h>
58 #include <inet/ipsecah.h>
59 #include <inet/ipsecesp.h>
60 #include <sys/random.h>
61 #include <sys/dlpi.h>
62 #include <sys/iphada.h>
63 #include <inet/ip_if.h>
64 #include <inet/ipdrop.h>
65 #include <inet/ipclassifier.h>
66 #include <inet/sctp_ip.h>
67 #include <inet/tun.h>
68 
69 /*
70  * This source file contains Security Association Database (SADB) common
71  * routines.  They are linked in with the AH module.  Since AH has no chance
72  * of falling under export control, it was safe to link it in there.
73  */
74 
75 static mblk_t *sadb_extended_acquire(ipsec_selector_t *, ipsec_policy_t *,
76     ipsec_action_t *, boolean_t, uint32_t, uint32_t, netstack_t *);
77 static void sadb_ill_df(ill_t *, mblk_t *, isaf_t *, int, boolean_t);
78 static ipsa_t *sadb_torch_assoc(isaf_t *, ipsa_t *, boolean_t, mblk_t **);
79 static void sadb_drain_torchq(queue_t *, mblk_t *);
80 static void sadb_destroy_acqlist(iacqf_t **, uint_t, boolean_t,
81 			    netstack_t *);
82 static void sadb_destroy(sadb_t *, netstack_t *);
83 static mblk_t *sadb_sa2msg(ipsa_t *, sadb_msg_t *);
84 
85 static time_t sadb_add_time(time_t, uint64_t);
86 static void lifetime_fuzz(ipsa_t *);
87 static void age_pair_peer_list(templist_t *, sadb_t *, boolean_t);
88 
89 /*
90  * ipsacq_maxpackets is defined here to make it tunable
91  * from /etc/system.
92  */
93 extern uint64_t ipsacq_maxpackets;
94 
95 #define	SET_EXPIRE(sa, delta, exp) {				\
96 	if (((sa)->ipsa_ ## delta) != 0) {				\
97 		(sa)->ipsa_ ## exp = sadb_add_time((sa)->ipsa_addtime,	\
98 			(sa)->ipsa_ ## delta);				\
99 	}								\
100 }
101 
102 #define	UPDATE_EXPIRE(sa, delta, exp) {					\
103 	if (((sa)->ipsa_ ## delta) != 0) {				\
104 		time_t tmp = sadb_add_time((sa)->ipsa_usetime,		\
105 			(sa)->ipsa_ ## delta);				\
106 		if (((sa)->ipsa_ ## exp) == 0)				\
107 			(sa)->ipsa_ ## exp = tmp;			\
108 		else							\
109 			(sa)->ipsa_ ## exp = 				\
110 			    MIN((sa)->ipsa_ ## exp, tmp); 		\
111 	}								\
112 }
113 
114 
115 /* wrap the macro so we can pass it as a function pointer */
116 void
117 sadb_sa_refrele(void *target)
118 {
119 	IPSA_REFRELE(((ipsa_t *)target));
120 }
121 
122 /*
123  * We presume that sizeof (long) == sizeof (time_t) and that time_t is
124  * a signed type.
125  */
126 #define	TIME_MAX LONG_MAX
127 
128 /*
129  * PF_KEY gives us lifetimes in uint64_t seconds.  We presume that
130  * time_t is defined to be a signed type with the same range as
131  * "long".  On ILP32 systems, we thus run the risk of wrapping around
132  * at end of time, as well as "overwrapping" the clock back around
133  * into a seemingly valid but incorrect future date earlier than the
134  * desired expiration.
135  *
136  * In order to avoid odd behavior (either negative lifetimes or loss
137  * of high order bits) when someone asks for bizarrely long SA
138  * lifetimes, we do a saturating add for expire times.
139  *
140  * We presume that ILP32 systems will be past end of support life when
141  * the 32-bit time_t overflows (a dangerous assumption, mind you..).
142  *
143  * On LP64, 2^64 seconds are about 5.8e11 years, at which point we
144  * will hopefully have figured out clever ways to avoid the use of
145  * fixed-sized integers in computation.
146  */
147 static time_t
148 sadb_add_time(time_t base, uint64_t delta)
149 {
150 	time_t sum;
151 
152 	/*
153 	 * Clip delta to the maximum possible time_t value to
154 	 * prevent "overwrapping" back into a shorter-than-desired
155 	 * future time.
156 	 */
157 	if (delta > TIME_MAX)
158 		delta = TIME_MAX;
159 	/*
160 	 * This sum may still overflow.
161 	 */
162 	sum = base + delta;
163 
164 	/*
165 	 * .. so if the result is less than the base, we overflowed.
166 	 */
167 	if (sum < base)
168 		sum = TIME_MAX;
169 
170 	return (sum);
171 }
172 
173 /*
174  * Callers of this function have already created a working security
175  * association, and have found the appropriate table & hash chain.  All this
176  * function does is check duplicates, and insert the SA.  The caller needs to
177  * hold the hash bucket lock and increment the refcnt before insertion.
178  *
179  * Return 0 if success, EEXIST if collision.
180  */
181 #define	SA_UNIQUE_MATCH(sa1, sa2) \
182 	(((sa1)->ipsa_unique_id & (sa1)->ipsa_unique_mask) == \
183 	((sa2)->ipsa_unique_id & (sa2)->ipsa_unique_mask))
184 
185 int
186 sadb_insertassoc(ipsa_t *ipsa, isaf_t *bucket)
187 {
188 	ipsa_t **ptpn = NULL;
189 	ipsa_t *walker;
190 	boolean_t unspecsrc;
191 
192 	ASSERT(MUTEX_HELD(&bucket->isaf_lock));
193 
194 	unspecsrc = IPSA_IS_ADDR_UNSPEC(ipsa->ipsa_srcaddr, ipsa->ipsa_addrfam);
195 
196 	walker = bucket->isaf_ipsa;
197 	ASSERT(walker == NULL || ipsa->ipsa_addrfam == walker->ipsa_addrfam);
198 
199 	/*
200 	 * Find insertion point (pointed to with **ptpn).  Insert at the head
201 	 * of the list unless there's an unspecified source address, then
202 	 * insert it after the last SA with a specified source address.
203 	 *
204 	 * BTW, you'll have to walk the whole chain, matching on {DST, SPI}
205 	 * checking for collisions.
206 	 */
207 
208 	while (walker != NULL) {
209 		if (IPSA_ARE_ADDR_EQUAL(walker->ipsa_dstaddr,
210 		    ipsa->ipsa_dstaddr, ipsa->ipsa_addrfam)) {
211 			if (walker->ipsa_spi == ipsa->ipsa_spi)
212 				return (EEXIST);
213 
214 			mutex_enter(&walker->ipsa_lock);
215 			if (ipsa->ipsa_state == IPSA_STATE_MATURE &&
216 			    (walker->ipsa_flags & IPSA_F_USED) &&
217 			    SA_UNIQUE_MATCH(walker, ipsa)) {
218 				walker->ipsa_flags |= IPSA_F_CINVALID;
219 			}
220 			mutex_exit(&walker->ipsa_lock);
221 		}
222 
223 		if (ptpn == NULL && unspecsrc) {
224 			if (IPSA_IS_ADDR_UNSPEC(walker->ipsa_srcaddr,
225 			    walker->ipsa_addrfam))
226 				ptpn = walker->ipsa_ptpn;
227 			else if (walker->ipsa_next == NULL)
228 				ptpn = &walker->ipsa_next;
229 		}
230 
231 		walker = walker->ipsa_next;
232 	}
233 
234 	if (ptpn == NULL)
235 		ptpn = &bucket->isaf_ipsa;
236 	ipsa->ipsa_next = *ptpn;
237 	ipsa->ipsa_ptpn = ptpn;
238 	if (ipsa->ipsa_next != NULL)
239 		ipsa->ipsa_next->ipsa_ptpn = &ipsa->ipsa_next;
240 	*ptpn = ipsa;
241 	ipsa->ipsa_linklock = &bucket->isaf_lock;
242 
243 	return (0);
244 }
245 #undef SA_UNIQUE_MATCH
246 
247 /*
248  * Free a security association.  Its reference count is 0, which means
249  * I must free it.  The SA must be unlocked and must not be linked into
250  * any fanout list.
251  */
252 static void
253 sadb_freeassoc(ipsa_t *ipsa)
254 {
255 	ipsec_stack_t	*ipss = ipsa->ipsa_netstack->netstack_ipsec;
256 
257 	ASSERT(ipss != NULL);
258 	ASSERT(!MUTEX_HELD(&ipsa->ipsa_lock));
259 	ASSERT(ipsa->ipsa_refcnt == 0);
260 	ASSERT(ipsa->ipsa_next == NULL);
261 	ASSERT(ipsa->ipsa_ptpn == NULL);
262 
263 	ip_drop_packet(sadb_clear_lpkt(ipsa), B_TRUE, NULL, NULL,
264 	    DROPPER(ipss, ipds_sadb_inlarval_timeout),
265 	    &ipss->ipsec_sadb_dropper);
266 
267 	mutex_enter(&ipsa->ipsa_lock);
268 	ipsec_destroy_ctx_tmpl(ipsa, IPSEC_ALG_AUTH);
269 	ipsec_destroy_ctx_tmpl(ipsa, IPSEC_ALG_ENCR);
270 	mutex_exit(&ipsa->ipsa_lock);
271 
272 	/* bzero() these fields for paranoia's sake. */
273 	if (ipsa->ipsa_authkey != NULL) {
274 		bzero(ipsa->ipsa_authkey, ipsa->ipsa_authkeylen);
275 		kmem_free(ipsa->ipsa_authkey, ipsa->ipsa_authkeylen);
276 	}
277 	if (ipsa->ipsa_encrkey != NULL) {
278 		bzero(ipsa->ipsa_encrkey, ipsa->ipsa_encrkeylen);
279 		kmem_free(ipsa->ipsa_encrkey, ipsa->ipsa_encrkeylen);
280 	}
281 	if (ipsa->ipsa_src_cid != NULL) {
282 		IPSID_REFRELE(ipsa->ipsa_src_cid);
283 	}
284 	if (ipsa->ipsa_dst_cid != NULL) {
285 		IPSID_REFRELE(ipsa->ipsa_dst_cid);
286 	}
287 	if (ipsa->ipsa_integ != NULL)
288 		kmem_free(ipsa->ipsa_integ, ipsa->ipsa_integlen);
289 	if (ipsa->ipsa_sens != NULL)
290 		kmem_free(ipsa->ipsa_sens, ipsa->ipsa_senslen);
291 
292 	mutex_destroy(&ipsa->ipsa_lock);
293 	kmem_free(ipsa, sizeof (*ipsa));
294 }
295 
296 /*
297  * Unlink a security association from a hash bucket.  Assume the hash bucket
298  * lock is held, but the association's lock is not.
299  *
300  * Note that we do not bump the bucket's generation number here because
301  * we might not be making a visible change to the set of visible SA's.
302  * All callers MUST bump the bucket's generation number before they unlock
303  * the bucket if they use sadb_unlinkassoc to permanetly remove an SA which
304  * was present in the bucket at the time it was locked.
305  */
306 void
307 sadb_unlinkassoc(ipsa_t *ipsa)
308 {
309 	ASSERT(ipsa->ipsa_linklock != NULL);
310 	ASSERT(MUTEX_HELD(ipsa->ipsa_linklock));
311 
312 	/* These fields are protected by the link lock. */
313 	*(ipsa->ipsa_ptpn) = ipsa->ipsa_next;
314 	if (ipsa->ipsa_next != NULL) {
315 		ipsa->ipsa_next->ipsa_ptpn = ipsa->ipsa_ptpn;
316 		ipsa->ipsa_next = NULL;
317 	}
318 
319 	ipsa->ipsa_ptpn = NULL;
320 
321 	/* This may destroy the SA. */
322 	IPSA_REFRELE(ipsa);
323 }
324 
325 /*
326  * Create a larval security association with the specified SPI.	 All other
327  * fields are zeroed.
328  */
329 static ipsa_t *
330 sadb_makelarvalassoc(uint32_t spi, uint32_t *src, uint32_t *dst, int addrfam,
331     netstack_t *ns)
332 {
333 	ipsa_t *newbie;
334 
335 	/*
336 	 * Allocate...
337 	 */
338 
339 	newbie = (ipsa_t *)kmem_zalloc(sizeof (ipsa_t), KM_NOSLEEP);
340 	if (newbie == NULL) {
341 		/* Can't make new larval SA. */
342 		return (NULL);
343 	}
344 
345 	/* Assigned requested SPI, assume caller does SPI allocation magic. */
346 	newbie->ipsa_spi = spi;
347 	newbie->ipsa_netstack = ns;	/* No netstack_hold */
348 
349 	/*
350 	 * Copy addresses...
351 	 */
352 
353 	IPSA_COPY_ADDR(newbie->ipsa_srcaddr, src, addrfam);
354 	IPSA_COPY_ADDR(newbie->ipsa_dstaddr, dst, addrfam);
355 
356 	newbie->ipsa_addrfam = addrfam;
357 
358 	/*
359 	 * Set common initialization values, including refcnt.
360 	 */
361 	mutex_init(&newbie->ipsa_lock, NULL, MUTEX_DEFAULT, NULL);
362 	newbie->ipsa_state = IPSA_STATE_LARVAL;
363 	newbie->ipsa_refcnt = 1;
364 	newbie->ipsa_freefunc = sadb_freeassoc;
365 
366 	/*
367 	 * There aren't a lot of other common initialization values, as
368 	 * they are copied in from the PF_KEY message.
369 	 */
370 
371 	return (newbie);
372 }
373 
374 /*
375  * Call me to initialize a security association fanout.
376  */
377 static int
378 sadb_init_fanout(isaf_t **tablep, uint_t size, int kmflag)
379 {
380 	isaf_t *table;
381 	int i;
382 
383 	table = (isaf_t *)kmem_alloc(size * sizeof (*table), kmflag);
384 	*tablep = table;
385 
386 	if (table == NULL)
387 		return (ENOMEM);
388 
389 	for (i = 0; i < size; i++) {
390 		mutex_init(&(table[i].isaf_lock), NULL, MUTEX_DEFAULT, NULL);
391 		table[i].isaf_ipsa = NULL;
392 		table[i].isaf_gen = 0;
393 	}
394 
395 	return (0);
396 }
397 
398 /*
399  * Call me to initialize an acquire fanout
400  */
401 static int
402 sadb_init_acfanout(iacqf_t **tablep, uint_t size, int kmflag)
403 {
404 	iacqf_t *table;
405 	int i;
406 
407 	table = (iacqf_t *)kmem_alloc(size * sizeof (*table), kmflag);
408 	*tablep = table;
409 
410 	if (table == NULL)
411 		return (ENOMEM);
412 
413 	for (i = 0; i < size; i++) {
414 		mutex_init(&(table[i].iacqf_lock), NULL, MUTEX_DEFAULT, NULL);
415 		table[i].iacqf_ipsacq = NULL;
416 	}
417 
418 	return (0);
419 }
420 
421 /*
422  * Attempt to initialize an SADB instance.  On failure, return ENOMEM;
423  * caller must clean up partial allocations.
424  */
425 static int
426 sadb_init_trial(sadb_t *sp, uint_t size, int kmflag)
427 {
428 	ASSERT(sp->sdb_of == NULL);
429 	ASSERT(sp->sdb_if == NULL);
430 	ASSERT(sp->sdb_acq == NULL);
431 
432 	sp->sdb_hashsize = size;
433 	if (sadb_init_fanout(&sp->sdb_of, size, kmflag) != 0)
434 		return (ENOMEM);
435 	if (sadb_init_fanout(&sp->sdb_if, size, kmflag) != 0)
436 		return (ENOMEM);
437 	if (sadb_init_acfanout(&sp->sdb_acq, size, kmflag) != 0)
438 		return (ENOMEM);
439 
440 	return (0);
441 }
442 
443 /*
444  * Call me to initialize an SADB instance; fall back to default size on failure.
445  */
446 static void
447 sadb_init(const char *name, sadb_t *sp, uint_t size, uint_t ver,
448     netstack_t *ns)
449 {
450 	ASSERT(sp->sdb_of == NULL);
451 	ASSERT(sp->sdb_if == NULL);
452 	ASSERT(sp->sdb_acq == NULL);
453 
454 	if (size < IPSEC_DEFAULT_HASH_SIZE)
455 		size = IPSEC_DEFAULT_HASH_SIZE;
456 
457 	if (sadb_init_trial(sp, size, KM_NOSLEEP) != 0) {
458 
459 		cmn_err(CE_WARN,
460 		    "Unable to allocate %u entry IPv%u %s SADB hash table",
461 		    size, ver, name);
462 
463 		sadb_destroy(sp, ns);
464 		size = IPSEC_DEFAULT_HASH_SIZE;
465 		cmn_err(CE_WARN, "Falling back to %d entries", size);
466 		(void) sadb_init_trial(sp, size, KM_SLEEP);
467 	}
468 }
469 
470 
471 /*
472  * Initialize an SADB-pair.
473  */
474 void
475 sadbp_init(const char *name, sadbp_t *sp, int type, int size, netstack_t *ns)
476 {
477 	sadb_init(name, &sp->s_v4, size, 4, ns);
478 	sadb_init(name, &sp->s_v6, size, 6, ns);
479 
480 	sp->s_satype = type;
481 
482 	ASSERT((type == SADB_SATYPE_AH) || (type == SADB_SATYPE_ESP));
483 	if (type == SADB_SATYPE_AH) {
484 		ipsec_stack_t	*ipss = ns->netstack_ipsec;
485 
486 		ip_drop_register(&ipss->ipsec_sadb_dropper, "IPsec SADB");
487 		sp->s_addflags = AH_ADD_SETTABLE_FLAGS;
488 		sp->s_updateflags = AH_UPDATE_SETTABLE_FLAGS;
489 	} else {
490 		sp->s_addflags = ESP_ADD_SETTABLE_FLAGS;
491 		sp->s_updateflags = ESP_UPDATE_SETTABLE_FLAGS;
492 	}
493 }
494 
495 /*
496  * Deliver a single SADB_DUMP message representing a single SA.  This is
497  * called many times by sadb_dump().
498  *
499  * If the return value of this is ENOBUFS (not the same as ENOMEM), then
500  * the caller should take that as a hint that dupb() on the "original answer"
501  * failed, and that perhaps the caller should try again with a copyb()ed
502  * "original answer".
503  */
504 static int
505 sadb_dump_deliver(queue_t *pfkey_q, mblk_t *original_answer, ipsa_t *ipsa,
506     sadb_msg_t *samsg)
507 {
508 	mblk_t *answer;
509 
510 	answer = dupb(original_answer);
511 	if (answer == NULL)
512 		return (ENOBUFS);
513 	answer->b_cont = sadb_sa2msg(ipsa, samsg);
514 	if (answer->b_cont == NULL) {
515 		freeb(answer);
516 		return (ENOMEM);
517 	}
518 
519 	/* Just do a putnext, and let keysock deal with flow control. */
520 	putnext(pfkey_q, answer);
521 	return (0);
522 }
523 
524 /*
525  * Common function to allocate and prepare a keysock_out_t M_CTL message.
526  */
527 mblk_t *
528 sadb_keysock_out(minor_t serial)
529 {
530 	mblk_t *mp;
531 	keysock_out_t *kso;
532 
533 	mp = allocb(sizeof (ipsec_info_t), BPRI_HI);
534 	if (mp != NULL) {
535 		mp->b_datap->db_type = M_CTL;
536 		mp->b_wptr += sizeof (ipsec_info_t);
537 		kso = (keysock_out_t *)mp->b_rptr;
538 		kso->ks_out_type = KEYSOCK_OUT;
539 		kso->ks_out_len = sizeof (*kso);
540 		kso->ks_out_serial = serial;
541 	}
542 
543 	return (mp);
544 }
545 
546 /*
547  * Perform an SADB_DUMP, spewing out every SA in an array of SA fanouts
548  * to keysock.
549  */
550 static int
551 sadb_dump_fanout(queue_t *pfkey_q, mblk_t *mp, minor_t serial, isaf_t *fanout,
552     int num_entries, boolean_t do_peers)
553 {
554 	int i, error = 0;
555 	mblk_t *original_answer;
556 	ipsa_t *walker;
557 	sadb_msg_t *samsg;
558 
559 	/*
560 	 * For each IPSA hash bucket do:
561 	 *	- Hold the mutex
562 	 *	- Walk each entry, doing an sadb_dump_deliver() on it.
563 	 */
564 	ASSERT(mp->b_cont != NULL);
565 	samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
566 
567 	original_answer = sadb_keysock_out(serial);
568 	if (original_answer == NULL)
569 		return (ENOMEM);
570 
571 	for (i = 0; i < num_entries; i++) {
572 		mutex_enter(&fanout[i].isaf_lock);
573 		for (walker = fanout[i].isaf_ipsa; walker != NULL;
574 		    walker = walker->ipsa_next) {
575 			if (!do_peers && walker->ipsa_haspeer)
576 				continue;
577 			error = sadb_dump_deliver(pfkey_q, original_answer,
578 			    walker, samsg);
579 			if (error == ENOBUFS) {
580 				mblk_t *new_original_answer;
581 
582 				/* Ran out of dupb's.  Try a copyb. */
583 				new_original_answer = copyb(original_answer);
584 				if (new_original_answer == NULL) {
585 					error = ENOMEM;
586 				} else {
587 					freeb(original_answer);
588 					original_answer = new_original_answer;
589 					error = sadb_dump_deliver(pfkey_q,
590 					    original_answer, walker, samsg);
591 				}
592 			}
593 			if (error != 0)
594 				break;	/* out of for loop. */
595 		}
596 		mutex_exit(&fanout[i].isaf_lock);
597 		if (error != 0)
598 			break;	/* out of for loop. */
599 	}
600 
601 	freeb(original_answer);
602 	return (error);
603 }
604 
605 /*
606  * Dump an entire SADB; outbound first, then inbound.
607  */
608 
609 int
610 sadb_dump(queue_t *pfkey_q, mblk_t *mp, minor_t serial, sadb_t *sp)
611 {
612 	int error;
613 
614 	/* Dump outbound */
615 	error = sadb_dump_fanout(pfkey_q, mp, serial, sp->sdb_of,
616 	    sp->sdb_hashsize, B_TRUE);
617 	if (error)
618 		return (error);
619 
620 	/* Dump inbound */
621 	return sadb_dump_fanout(pfkey_q, mp, serial, sp->sdb_if,
622 	    sp->sdb_hashsize, B_FALSE);
623 }
624 
625 /*
626  * Generic sadb table walker.
627  *
628  * Call "walkfn" for each SA in each bucket in "table"; pass the
629  * bucket, the entry and "cookie" to the callback function.
630  * Take care to ensure that walkfn can delete the SA without screwing
631  * up our traverse.
632  *
633  * The bucket is locked for the duration of the callback, both so that the
634  * callback can just call sadb_unlinkassoc() when it wants to delete something,
635  * and so that no new entries are added while we're walking the list.
636  */
637 static void
638 sadb_walker(isaf_t *table, uint_t numentries,
639     void (*walkfn)(isaf_t *head, ipsa_t *entry, void *cookie),
640     void *cookie)
641 {
642 	int i;
643 	for (i = 0; i < numentries; i++) {
644 		ipsa_t *entry, *next;
645 
646 		mutex_enter(&table[i].isaf_lock);
647 
648 		for (entry = table[i].isaf_ipsa; entry != NULL;
649 		    entry = next) {
650 			next = entry->ipsa_next;
651 			(*walkfn)(&table[i], entry, cookie);
652 		}
653 		mutex_exit(&table[i].isaf_lock);
654 	}
655 }
656 
657 /*
658  * From the given SA, construct a dl_ct_ipsec_key and
659  * a dl_ct_ipsec structures to be sent to the adapter as part
660  * of a DL_CONTROL_REQ.
661  *
662  * ct_sa must point to the storage allocated for the key
663  * structure and must be followed by storage allocated
664  * for the SA information that must be sent to the driver
665  * as part of the DL_CONTROL_REQ request.
666  *
667  * The is_inbound boolean indicates whether the specified
668  * SA is part of an inbound SA table.
669  *
670  * Returns B_TRUE if the corresponding SA must be passed to
671  * a provider, B_FALSE otherwise; frees *mp if it returns B_FALSE.
672  */
673 static boolean_t
674 sadb_req_from_sa(ipsa_t *sa, mblk_t *mp, boolean_t is_inbound)
675 {
676 	dl_ct_ipsec_key_t *keyp;
677 	dl_ct_ipsec_t *sap;
678 	void *ct_sa = mp->b_wptr;
679 
680 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
681 
682 	keyp = (dl_ct_ipsec_key_t *)(ct_sa);
683 	sap = (dl_ct_ipsec_t *)(keyp + 1);
684 
685 	IPSECHW_DEBUG(IPSECHW_CAPAB, ("sadb_req_from_sa: "
686 	    "is_inbound = %d\n", is_inbound));
687 
688 	/* initialize flag */
689 	sap->sadb_sa_flags = 0;
690 	if (is_inbound) {
691 		sap->sadb_sa_flags |= DL_CT_IPSEC_INBOUND;
692 		/*
693 		 * If an inbound SA has a peer, then mark it has being
694 		 * an outbound SA as well.
695 		 */
696 		if (sa->ipsa_haspeer)
697 			sap->sadb_sa_flags |= DL_CT_IPSEC_OUTBOUND;
698 	} else {
699 		/*
700 		 * If an outbound SA has a peer, then don't send it,
701 		 * since we will send the copy from the inbound table.
702 		 */
703 		if (sa->ipsa_haspeer) {
704 			freemsg(mp);
705 			return (B_FALSE);
706 		}
707 		sap->sadb_sa_flags |= DL_CT_IPSEC_OUTBOUND;
708 	}
709 
710 	keyp->dl_key_spi = sa->ipsa_spi;
711 	bcopy(sa->ipsa_dstaddr, keyp->dl_key_dest_addr,
712 	    DL_CTL_IPSEC_ADDR_LEN);
713 	keyp->dl_key_addr_family = sa->ipsa_addrfam;
714 
715 	sap->sadb_sa_auth = sa->ipsa_auth_alg;
716 	sap->sadb_sa_encrypt = sa->ipsa_encr_alg;
717 
718 	sap->sadb_key_len_a = sa->ipsa_authkeylen;
719 	sap->sadb_key_bits_a = sa->ipsa_authkeybits;
720 	bcopy(sa->ipsa_authkey,
721 	    sap->sadb_key_data_a, sap->sadb_key_len_a);
722 
723 	sap->sadb_key_len_e = sa->ipsa_encrkeylen;
724 	sap->sadb_key_bits_e = sa->ipsa_encrkeybits;
725 	bcopy(sa->ipsa_encrkey,
726 	    sap->sadb_key_data_e, sap->sadb_key_len_e);
727 
728 	mp->b_wptr += sizeof (dl_ct_ipsec_t) + sizeof (dl_ct_ipsec_key_t);
729 	return (B_TRUE);
730 }
731 
732 /*
733  * Called from AH or ESP to format a message which will be used to inform
734  * IPsec-acceleration-capable ills of a SADB change.
735  * (It is not possible to send the message to IP directly from this function
736  * since the SA, if any, is locked during the call).
737  *
738  * dl_operation: DL_CONTROL_REQ operation (add, delete, update, etc)
739  * sa_type: identifies whether the operation applies to AH or ESP
740  *	(must be one of SADB_SATYPE_AH or SADB_SATYPE_ESP)
741  * sa: Pointer to an SA.  Must be non-NULL and locked
742  *	for ADD, DELETE, GET, and UPDATE operations.
743  * This function returns an mblk chain that must be passed to IP
744  * for forwarding to the IPsec capable providers.
745  */
746 mblk_t *
747 sadb_fmt_sa_req(uint_t dl_operation, uint_t sa_type, ipsa_t *sa,
748     boolean_t is_inbound)
749 {
750 	mblk_t *mp;
751 	dl_control_req_t *ctrl;
752 	boolean_t need_key = B_FALSE;
753 	mblk_t *ctl_mp = NULL;
754 	ipsec_ctl_t *ctl;
755 
756 	/*
757 	 * 1 allocate and initialize DL_CONTROL_REQ M_PROTO
758 	 * 2 if a key is needed for the operation
759 	 *    2.1 initialize key
760 	 *    2.2 if a full SA is needed for the operation
761 	 *	2.2.1 initialize full SA info
762 	 * 3 return message; caller will call ill_ipsec_capab_send_all()
763 	 * to send the resulting message to IPsec capable ills.
764 	 */
765 
766 	ASSERT(sa_type == SADB_SATYPE_AH || sa_type == SADB_SATYPE_ESP);
767 
768 	/*
769 	 * Allocate DL_CONTROL_REQ M_PROTO
770 	 * We allocate room for the SA even if it's not needed
771 	 * by some of the operations (for example flush)
772 	 */
773 	mp = allocb(sizeof (dl_control_req_t) +
774 	    sizeof (dl_ct_ipsec_key_t) + sizeof (dl_ct_ipsec_t), BPRI_HI);
775 	if (mp == NULL)
776 		return (NULL);
777 	mp->b_datap->db_type = M_PROTO;
778 
779 	/* initialize dl_control_req_t */
780 	ctrl = (dl_control_req_t *)mp->b_wptr;
781 	ctrl->dl_primitive = DL_CONTROL_REQ;
782 	ctrl->dl_operation = dl_operation;
783 	ctrl->dl_type = sa_type == SADB_SATYPE_AH ? DL_CT_IPSEC_AH :
784 	    DL_CT_IPSEC_ESP;
785 	ctrl->dl_key_offset = sizeof (dl_control_req_t);
786 	ctrl->dl_key_length = sizeof (dl_ct_ipsec_key_t);
787 	ctrl->dl_data_offset = sizeof (dl_control_req_t) +
788 	    sizeof (dl_ct_ipsec_key_t);
789 	ctrl->dl_data_length = sizeof (dl_ct_ipsec_t);
790 	mp->b_wptr += sizeof (dl_control_req_t);
791 
792 	if ((dl_operation == DL_CO_SET) || (dl_operation == DL_CO_DELETE)) {
793 		ASSERT(sa != NULL);
794 		ASSERT(MUTEX_HELD(&sa->ipsa_lock));
795 
796 		need_key = B_TRUE;
797 
798 		/*
799 		 * Initialize key and SA data. Note that for some
800 		 * operations the SA data is ignored by the provider
801 		 * (delete, etc.)
802 		 */
803 		if (!sadb_req_from_sa(sa, mp, is_inbound))
804 			return (NULL);
805 	}
806 
807 	/* construct control message */
808 	ctl_mp = allocb(sizeof (ipsec_ctl_t), BPRI_HI);
809 	if (ctl_mp == NULL) {
810 		cmn_err(CE_WARN, "sadb_fmt_sa_req: allocb failed\n");
811 		freemsg(mp);
812 		return (NULL);
813 	}
814 
815 	ctl_mp->b_datap->db_type = M_CTL;
816 	ctl_mp->b_wptr += sizeof (ipsec_ctl_t);
817 	ctl_mp->b_cont = mp;
818 
819 	ctl = (ipsec_ctl_t *)ctl_mp->b_rptr;
820 	ctl->ipsec_ctl_type = IPSEC_CTL;
821 	ctl->ipsec_ctl_len  = sizeof (ipsec_ctl_t);
822 	ctl->ipsec_ctl_sa_type = sa_type;
823 
824 	if (need_key) {
825 		/*
826 		 * Keep an additional reference on SA, since it will be
827 		 * needed by IP to send control messages corresponding
828 		 * to that SA from its perimeter. IP will do a
829 		 * IPSA_REFRELE when done with the request.
830 		 */
831 		ASSERT(MUTEX_HELD(&sa->ipsa_lock));
832 		IPSA_REFHOLD(sa);
833 		ctl->ipsec_ctl_sa = sa;
834 	} else
835 		ctl->ipsec_ctl_sa = NULL;
836 
837 	return (ctl_mp);
838 }
839 
840 
841 /*
842  * Called by sadb_ill_download() to dump the entries for a specific
843  * fanout table.  For each SA entry in the table passed as argument,
844  * use mp as a template and constructs a full DL_CONTROL message, and
845  * call ill_dlpi_send(), provided by IP, to send the resulting
846  * messages to the ill.
847  */
848 static void
849 sadb_ill_df(ill_t *ill, mblk_t *mp, isaf_t *fanout, int num_entries,
850     boolean_t is_inbound)
851 {
852 	ipsa_t *walker;
853 	mblk_t *nmp, *salist;
854 	int i, error = 0;
855 	ip_stack_t	*ipst = ill->ill_ipst;
856 	netstack_t	*ns = ipst->ips_netstack;
857 
858 	IPSECHW_DEBUG(IPSECHW_SADB, ("sadb_ill_df: fanout at 0x%p ne=%d\n",
859 	    (void *)fanout, num_entries));
860 	/*
861 	 * For each IPSA hash bucket do:
862 	 *	- Hold the mutex
863 	 *	- Walk each entry, sending a corresponding request to IP
864 	 *	  for it.
865 	 */
866 	ASSERT(mp->b_datap->db_type == M_PROTO);
867 
868 	for (i = 0; i < num_entries; i++) {
869 		mutex_enter(&fanout[i].isaf_lock);
870 		salist = NULL;
871 
872 		for (walker = fanout[i].isaf_ipsa; walker != NULL;
873 		    walker = walker->ipsa_next) {
874 			IPSECHW_DEBUG(IPSECHW_SADB,
875 			    ("sadb_ill_df: sending SA to ill via IP \n"));
876 			/*
877 			 * Duplicate the template mp passed and
878 			 * complete DL_CONTROL_REQ data.
879 			 * To be more memory efficient, we could use
880 			 * dupb() for the M_CTL and copyb() for the M_PROTO
881 			 * as the M_CTL, since the M_CTL is the same for
882 			 * every SA entry passed down to IP for the same ill.
883 			 *
884 			 * Note that copymsg/copyb ensure that the new mblk
885 			 * is at least as large as the source mblk even if it's
886 			 * not using all its storage -- therefore, nmp
887 			 * has trailing space for sadb_req_from_sa to add
888 			 * the SA-specific bits.
889 			 */
890 			mutex_enter(&walker->ipsa_lock);
891 			if (ipsec_capab_match(ill,
892 			    ill->ill_phyint->phyint_ifindex, ill->ill_isv6,
893 			    walker, ns)) {
894 				nmp = copymsg(mp);
895 				if (nmp == NULL) {
896 					IPSECHW_DEBUG(IPSECHW_SADB,
897 					    ("sadb_ill_df: alloc error\n"));
898 					error = ENOMEM;
899 					mutex_exit(&walker->ipsa_lock);
900 					break;
901 				}
902 				if (sadb_req_from_sa(walker, nmp, is_inbound)) {
903 					nmp->b_next = salist;
904 					salist = nmp;
905 				}
906 			}
907 			mutex_exit(&walker->ipsa_lock);
908 		}
909 		mutex_exit(&fanout[i].isaf_lock);
910 		while (salist != NULL) {
911 			nmp = salist;
912 			salist = nmp->b_next;
913 			nmp->b_next = NULL;
914 			ill_dlpi_send(ill, nmp);
915 		}
916 		if (error != 0)
917 			break;	/* out of for loop. */
918 	}
919 }
920 
921 /*
922  * Called by ill_ipsec_capab_add(). Sends a copy of the SADB of
923  * the type specified by sa_type to the specified ill.
924  *
925  * We call for each fanout table defined by the SADB (one per
926  * protocol). sadb_ill_df() finally calls ill_dlpi_send() for
927  * each SADB entry in order to send a corresponding DL_CONTROL_REQ
928  * message to the ill.
929  */
930 void
931 sadb_ill_download(ill_t *ill, uint_t sa_type)
932 {
933 	mblk_t *protomp;	/* prototype message */
934 	dl_control_req_t *ctrl;
935 	sadbp_t *spp;
936 	sadb_t *sp;
937 	int dlt;
938 	ip_stack_t	*ipst = ill->ill_ipst;
939 	netstack_t	*ns = ipst->ips_netstack;
940 
941 	ASSERT(sa_type == SADB_SATYPE_AH || sa_type == SADB_SATYPE_ESP);
942 
943 	/*
944 	 * Allocate and initialize prototype answer. A duplicate for
945 	 * each SA is sent down to the interface.
946 	 */
947 
948 	/* DL_CONTROL_REQ M_PROTO mblk_t */
949 	protomp = allocb(sizeof (dl_control_req_t) +
950 	    sizeof (dl_ct_ipsec_key_t) + sizeof (dl_ct_ipsec_t), BPRI_HI);
951 	if (protomp == NULL)
952 		return;
953 	protomp->b_datap->db_type = M_PROTO;
954 
955 	dlt = (sa_type == SADB_SATYPE_AH) ? DL_CT_IPSEC_AH : DL_CT_IPSEC_ESP;
956 	if (sa_type == SADB_SATYPE_ESP) {
957 		ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
958 
959 		spp = &espstack->esp_sadb;
960 	} else {
961 		ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
962 
963 		spp = &ahstack->ah_sadb;
964 	}
965 
966 	ctrl = (dl_control_req_t *)protomp->b_wptr;
967 	ctrl->dl_primitive = DL_CONTROL_REQ;
968 	ctrl->dl_operation = DL_CO_SET;
969 	ctrl->dl_type = dlt;
970 	ctrl->dl_key_offset = sizeof (dl_control_req_t);
971 	ctrl->dl_key_length = sizeof (dl_ct_ipsec_key_t);
972 	ctrl->dl_data_offset = sizeof (dl_control_req_t) +
973 	    sizeof (dl_ct_ipsec_key_t);
974 	ctrl->dl_data_length = sizeof (dl_ct_ipsec_t);
975 	protomp->b_wptr += sizeof (dl_control_req_t);
976 
977 	/*
978 	 * then for each SADB entry, we fill out the dl_ct_ipsec_key_t
979 	 * and dl_ct_ipsec_t
980 	 */
981 	sp = ill->ill_isv6 ? &(spp->s_v6) : &(spp->s_v4);
982 	sadb_ill_df(ill, protomp, sp->sdb_of, sp->sdb_hashsize, B_FALSE);
983 	sadb_ill_df(ill, protomp, sp->sdb_if, sp->sdb_hashsize, B_TRUE);
984 	freemsg(protomp);
985 }
986 
987 /*
988  * Call me to free up a security association fanout.  Use the forever
989  * variable to indicate freeing up the SAs (forever == B_FALSE, e.g.
990  * an SADB_FLUSH message), or destroying everything (forever == B_TRUE,
991  * when a module is unloaded).
992  */
993 static void
994 sadb_destroyer(isaf_t **tablep, uint_t numentries, boolean_t forever)
995 {
996 	int i;
997 	isaf_t *table = *tablep;
998 
999 	if (table == NULL)
1000 		return;
1001 
1002 	for (i = 0; i < numentries; i++) {
1003 		mutex_enter(&table[i].isaf_lock);
1004 		while (table[i].isaf_ipsa != NULL)
1005 			sadb_unlinkassoc(table[i].isaf_ipsa);
1006 		table[i].isaf_gen++;
1007 		mutex_exit(&table[i].isaf_lock);
1008 		if (forever)
1009 			mutex_destroy(&(table[i].isaf_lock));
1010 	}
1011 
1012 	if (forever) {
1013 		*tablep = NULL;
1014 		kmem_free(table, numentries * sizeof (*table));
1015 	}
1016 }
1017 
1018 /*
1019  * Entry points to sadb_destroyer().
1020  */
1021 static void
1022 sadb_flush(sadb_t *sp, netstack_t *ns)
1023 {
1024 	/*
1025 	 * Flush out each bucket, one at a time.  Were it not for keysock's
1026 	 * enforcement, there would be a subtlety where I could add on the
1027 	 * heels of a flush.  With keysock's enforcement, however, this
1028 	 * makes ESP's job easy.
1029 	 */
1030 	sadb_destroyer(&sp->sdb_of, sp->sdb_hashsize, B_FALSE);
1031 	sadb_destroyer(&sp->sdb_if, sp->sdb_hashsize, B_FALSE);
1032 
1033 	/* For each acquire, destroy it; leave the bucket mutex alone. */
1034 	sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_FALSE, ns);
1035 }
1036 
1037 static void
1038 sadb_destroy(sadb_t *sp, netstack_t *ns)
1039 {
1040 	sadb_destroyer(&sp->sdb_of, sp->sdb_hashsize, B_TRUE);
1041 	sadb_destroyer(&sp->sdb_if, sp->sdb_hashsize, B_TRUE);
1042 
1043 	/* For each acquire, destroy it, including the bucket mutex. */
1044 	sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_TRUE, ns);
1045 
1046 	ASSERT(sp->sdb_of == NULL);
1047 	ASSERT(sp->sdb_if == NULL);
1048 	ASSERT(sp->sdb_acq == NULL);
1049 }
1050 
1051 static void
1052 sadb_send_flush_req(sadbp_t *spp)
1053 {
1054 	mblk_t *ctl_mp;
1055 
1056 	/*
1057 	 * we've been unplumbed, or never were plumbed; don't go there.
1058 	 */
1059 	if (spp->s_ip_q == NULL)
1060 		return;
1061 
1062 	/* have IP send a flush msg to the IPsec accelerators */
1063 	ctl_mp = sadb_fmt_sa_req(DL_CO_FLUSH, spp->s_satype, NULL, B_TRUE);
1064 	if (ctl_mp != NULL)
1065 		putnext(spp->s_ip_q, ctl_mp);
1066 }
1067 
1068 void
1069 sadbp_flush(sadbp_t *spp, netstack_t *ns)
1070 {
1071 	sadb_flush(&spp->s_v4, ns);
1072 	sadb_flush(&spp->s_v6, ns);
1073 
1074 	sadb_send_flush_req(spp);
1075 }
1076 
1077 void
1078 sadbp_destroy(sadbp_t *spp, netstack_t *ns)
1079 {
1080 	sadb_destroy(&spp->s_v4, ns);
1081 	sadb_destroy(&spp->s_v6, ns);
1082 
1083 	sadb_send_flush_req(spp);
1084 	if (spp->s_satype == SADB_SATYPE_AH) {
1085 		ipsec_stack_t	*ipss = ns->netstack_ipsec;
1086 
1087 		ip_drop_unregister(&ipss->ipsec_sadb_dropper);
1088 	}
1089 }
1090 
1091 
1092 /*
1093  * Check hard vs. soft lifetimes.  If there's a reality mismatch (e.g.
1094  * soft lifetimes > hard lifetimes) return an appropriate diagnostic for
1095  * EINVAL.
1096  */
1097 int
1098 sadb_hardsoftchk(sadb_lifetime_t *hard, sadb_lifetime_t *soft)
1099 {
1100 	if (hard == NULL || soft == NULL)
1101 		return (0);
1102 
1103 	if (hard->sadb_lifetime_allocations != 0 &&
1104 	    soft->sadb_lifetime_allocations != 0 &&
1105 	    hard->sadb_lifetime_allocations < soft->sadb_lifetime_allocations)
1106 		return (SADB_X_DIAGNOSTIC_ALLOC_HSERR);
1107 
1108 	if (hard->sadb_lifetime_bytes != 0 &&
1109 	    soft->sadb_lifetime_bytes != 0 &&
1110 	    hard->sadb_lifetime_bytes < soft->sadb_lifetime_bytes)
1111 		return (SADB_X_DIAGNOSTIC_BYTES_HSERR);
1112 
1113 	if (hard->sadb_lifetime_addtime != 0 &&
1114 	    soft->sadb_lifetime_addtime != 0 &&
1115 	    hard->sadb_lifetime_addtime < soft->sadb_lifetime_addtime)
1116 		return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
1117 
1118 	if (hard->sadb_lifetime_usetime != 0 &&
1119 	    soft->sadb_lifetime_usetime != 0 &&
1120 	    hard->sadb_lifetime_usetime < soft->sadb_lifetime_usetime)
1121 		return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
1122 
1123 	return (0);
1124 }
1125 
1126 /*
1127  * Clone a security association for the purposes of inserting a single SA
1128  * into inbound and outbound tables respectively. This function should only
1129  * be called from sadb_common_add().
1130  */
1131 static ipsa_t *
1132 sadb_cloneassoc(ipsa_t *ipsa)
1133 {
1134 	ipsa_t *newbie;
1135 	boolean_t error = B_FALSE;
1136 
1137 	ASSERT(!MUTEX_HELD(&(ipsa->ipsa_lock)));
1138 
1139 	newbie = kmem_alloc(sizeof (ipsa_t), KM_NOSLEEP);
1140 	if (newbie == NULL)
1141 		return (NULL);
1142 
1143 	/* Copy over what we can. */
1144 	*newbie = *ipsa;
1145 
1146 	/* bzero and initialize locks, in case *_init() allocates... */
1147 	mutex_init(&newbie->ipsa_lock, NULL, MUTEX_DEFAULT, NULL);
1148 
1149 	/*
1150 	 * While somewhat dain-bramaged, the most graceful way to
1151 	 * recover from errors is to keep plowing through the
1152 	 * allocations, and getting what I can.  It's easier to call
1153 	 * sadb_freeassoc() on the stillborn clone when all the
1154 	 * pointers aren't pointing to the parent's data.
1155 	 */
1156 
1157 	if (ipsa->ipsa_authkey != NULL) {
1158 		newbie->ipsa_authkey = kmem_alloc(newbie->ipsa_authkeylen,
1159 		    KM_NOSLEEP);
1160 		if (newbie->ipsa_authkey == NULL) {
1161 			error = B_TRUE;
1162 		} else {
1163 			bcopy(ipsa->ipsa_authkey, newbie->ipsa_authkey,
1164 			    newbie->ipsa_authkeylen);
1165 
1166 			newbie->ipsa_kcfauthkey.ck_data =
1167 			    newbie->ipsa_authkey;
1168 		}
1169 
1170 		if (newbie->ipsa_amech.cm_param != NULL) {
1171 			newbie->ipsa_amech.cm_param =
1172 			    (char *)&newbie->ipsa_mac_len;
1173 		}
1174 	}
1175 
1176 	if (ipsa->ipsa_encrkey != NULL) {
1177 		newbie->ipsa_encrkey = kmem_alloc(newbie->ipsa_encrkeylen,
1178 		    KM_NOSLEEP);
1179 		if (newbie->ipsa_encrkey == NULL) {
1180 			error = B_TRUE;
1181 		} else {
1182 			bcopy(ipsa->ipsa_encrkey, newbie->ipsa_encrkey,
1183 			    newbie->ipsa_encrkeylen);
1184 
1185 			newbie->ipsa_kcfencrkey.ck_data =
1186 			    newbie->ipsa_encrkey;
1187 		}
1188 	}
1189 
1190 	newbie->ipsa_authtmpl = NULL;
1191 	newbie->ipsa_encrtmpl = NULL;
1192 	newbie->ipsa_haspeer = B_TRUE;
1193 
1194 	if (ipsa->ipsa_integ != NULL) {
1195 		newbie->ipsa_integ = kmem_alloc(newbie->ipsa_integlen,
1196 		    KM_NOSLEEP);
1197 		if (newbie->ipsa_integ == NULL) {
1198 			error = B_TRUE;
1199 		} else {
1200 			bcopy(ipsa->ipsa_integ, newbie->ipsa_integ,
1201 			    newbie->ipsa_integlen);
1202 		}
1203 	}
1204 
1205 	if (ipsa->ipsa_sens != NULL) {
1206 		newbie->ipsa_sens = kmem_alloc(newbie->ipsa_senslen,
1207 		    KM_NOSLEEP);
1208 		if (newbie->ipsa_sens == NULL) {
1209 			error = B_TRUE;
1210 		} else {
1211 			bcopy(ipsa->ipsa_sens, newbie->ipsa_sens,
1212 			    newbie->ipsa_senslen);
1213 		}
1214 	}
1215 
1216 	if (ipsa->ipsa_src_cid != NULL) {
1217 		newbie->ipsa_src_cid = ipsa->ipsa_src_cid;
1218 		IPSID_REFHOLD(ipsa->ipsa_src_cid);
1219 	}
1220 
1221 	if (ipsa->ipsa_dst_cid != NULL) {
1222 		newbie->ipsa_dst_cid = ipsa->ipsa_dst_cid;
1223 		IPSID_REFHOLD(ipsa->ipsa_dst_cid);
1224 	}
1225 
1226 	if (error) {
1227 		sadb_freeassoc(newbie);
1228 		return (NULL);
1229 	}
1230 
1231 	return (newbie);
1232 }
1233 
1234 /*
1235  * Initialize a SADB address extension at the address specified by addrext.
1236  * Return a pointer to the end of the new address extension.
1237  */
1238 static uint8_t *
1239 sadb_make_addr_ext(uint8_t *start, uint8_t *end, uint16_t exttype,
1240     sa_family_t af, uint32_t *addr, uint16_t port, uint8_t proto, int prefix)
1241 {
1242 	struct sockaddr_in *sin;
1243 	struct sockaddr_in6 *sin6;
1244 	uint8_t *cur = start;
1245 	int addrext_len;
1246 	int sin_len;
1247 	sadb_address_t *addrext	= (sadb_address_t *)cur;
1248 
1249 	if (cur == NULL)
1250 		return (NULL);
1251 
1252 	cur += sizeof (*addrext);
1253 	if (cur > end)
1254 		return (NULL);
1255 
1256 	addrext->sadb_address_proto = proto;
1257 	addrext->sadb_address_prefixlen = prefix;
1258 	addrext->sadb_address_reserved = 0;
1259 	addrext->sadb_address_exttype = exttype;
1260 
1261 	switch (af) {
1262 	case AF_INET:
1263 		sin = (struct sockaddr_in *)cur;
1264 		sin_len = sizeof (*sin);
1265 		cur += sin_len;
1266 		if (cur > end)
1267 			return (NULL);
1268 
1269 		sin->sin_family = af;
1270 		bzero(sin->sin_zero, sizeof (sin->sin_zero));
1271 		sin->sin_port = port;
1272 		IPSA_COPY_ADDR(&sin->sin_addr, addr, af);
1273 		break;
1274 	case AF_INET6:
1275 		sin6 = (struct sockaddr_in6 *)cur;
1276 		sin_len = sizeof (*sin6);
1277 		cur += sin_len;
1278 		if (cur > end)
1279 			return (NULL);
1280 
1281 		bzero(sin6, sizeof (*sin6));
1282 		sin6->sin6_family = af;
1283 		sin6->sin6_port = port;
1284 		IPSA_COPY_ADDR(&sin6->sin6_addr, addr, af);
1285 		break;
1286 	}
1287 
1288 	addrext_len = roundup(cur - start, sizeof (uint64_t));
1289 	addrext->sadb_address_len = SADB_8TO64(addrext_len);
1290 
1291 	cur = start + addrext_len;
1292 	if (cur > end)
1293 		cur = NULL;
1294 
1295 	return (cur);
1296 }
1297 
1298 /*
1299  * Construct a key management cookie extension.
1300  */
1301 
1302 static uint8_t *
1303 sadb_make_kmc_ext(uint8_t *cur, uint8_t *end, uint32_t kmp, uint32_t kmc)
1304 {
1305 	sadb_x_kmc_t *kmcext = (sadb_x_kmc_t *)cur;
1306 
1307 	if (cur == NULL)
1308 		return (NULL);
1309 
1310 	cur += sizeof (*kmcext);
1311 
1312 	if (cur > end)
1313 		return (NULL);
1314 
1315 	kmcext->sadb_x_kmc_len = SADB_8TO64(sizeof (*kmcext));
1316 	kmcext->sadb_x_kmc_exttype = SADB_X_EXT_KM_COOKIE;
1317 	kmcext->sadb_x_kmc_proto = kmp;
1318 	kmcext->sadb_x_kmc_cookie = kmc;
1319 	kmcext->sadb_x_kmc_reserved = 0;
1320 
1321 	return (cur);
1322 }
1323 
1324 /*
1325  * Given an original message header with sufficient space following it, and an
1326  * SA, construct a full PF_KEY message with all of the relevant extensions.
1327  * This is mostly used for SADB_GET, and SADB_DUMP.
1328  */
1329 static mblk_t *
1330 sadb_sa2msg(ipsa_t *ipsa, sadb_msg_t *samsg)
1331 {
1332 	int alloclen, addrsize, paddrsize, authsize, encrsize;
1333 	int srcidsize, dstidsize;
1334 	sa_family_t fam, pfam;	/* Address family for SADB_EXT_ADDRESS */
1335 				/* src/dst and proxy sockaddrs. */
1336 	/*
1337 	 * The following are pointers into the PF_KEY message this PF_KEY
1338 	 * message creates.
1339 	 */
1340 	sadb_msg_t *newsamsg;
1341 	sadb_sa_t *assoc;
1342 	sadb_lifetime_t *lt;
1343 	sadb_key_t *key;
1344 	sadb_ident_t *ident;
1345 	sadb_sens_t *sens;
1346 	sadb_ext_t *walker;	/* For when we need a generic ext. pointer. */
1347 	sadb_x_pair_t *pair_ext;
1348 
1349 	mblk_t *mp;
1350 	uint64_t *bitmap;
1351 	uint8_t *cur, *end;
1352 	/* These indicate the presence of the above extension fields. */
1353 	boolean_t soft, hard, isrc, idst, auth, encr, sensinteg, srcid, dstid;
1354 	boolean_t paired;
1355 	uint32_t otherspi;
1356 
1357 	/* First off, figure out the allocation length for this message. */
1358 
1359 	/*
1360 	 * Constant stuff.  This includes base, SA, address (src, dst),
1361 	 * and lifetime (current).
1362 	 */
1363 	alloclen = sizeof (sadb_msg_t) + sizeof (sadb_sa_t) +
1364 	    sizeof (sadb_lifetime_t);
1365 
1366 	fam = ipsa->ipsa_addrfam;
1367 	switch (fam) {
1368 	case AF_INET:
1369 		addrsize = roundup(sizeof (struct sockaddr_in) +
1370 		    sizeof (sadb_address_t), sizeof (uint64_t));
1371 		break;
1372 	case AF_INET6:
1373 		addrsize = roundup(sizeof (struct sockaddr_in6) +
1374 		    sizeof (sadb_address_t), sizeof (uint64_t));
1375 		break;
1376 	default:
1377 		return (NULL);
1378 	}
1379 	/*
1380 	 * Allocate TWO address extensions, for source and destination.
1381 	 * (Thus, the * 2.)
1382 	 */
1383 	alloclen += addrsize * 2;
1384 	if (ipsa->ipsa_flags & IPSA_F_NATT_REM)
1385 		alloclen += addrsize;
1386 	if (ipsa->ipsa_flags & IPSA_F_NATT_LOC)
1387 		alloclen += addrsize;
1388 
1389 	if (ipsa->ipsa_flags & IPSA_F_PAIRED) {
1390 		paired = B_TRUE;
1391 		alloclen += sizeof (sadb_x_pair_t);
1392 		otherspi = ipsa->ipsa_otherspi;
1393 	} else {
1394 		paired = B_FALSE;
1395 	}
1396 
1397 	/* How 'bout other lifetimes? */
1398 	if (ipsa->ipsa_softaddlt != 0 || ipsa->ipsa_softuselt != 0 ||
1399 	    ipsa->ipsa_softbyteslt != 0 || ipsa->ipsa_softalloc != 0) {
1400 		alloclen += sizeof (sadb_lifetime_t);
1401 		soft = B_TRUE;
1402 	} else {
1403 		soft = B_FALSE;
1404 	}
1405 
1406 	if (ipsa->ipsa_hardaddlt != 0 || ipsa->ipsa_harduselt != 0 ||
1407 	    ipsa->ipsa_hardbyteslt != 0 || ipsa->ipsa_hardalloc != 0) {
1408 		alloclen += sizeof (sadb_lifetime_t);
1409 		hard = B_TRUE;
1410 	} else {
1411 		hard = B_FALSE;
1412 	}
1413 
1414 	/* Inner addresses. */
1415 	if (ipsa->ipsa_innerfam == 0) {
1416 		isrc = B_FALSE;
1417 		idst = B_FALSE;
1418 	} else {
1419 		pfam = ipsa->ipsa_innerfam;
1420 		switch (pfam) {
1421 		case AF_INET6:
1422 			paddrsize = roundup(sizeof (struct sockaddr_in6) +
1423 			    sizeof (sadb_address_t), sizeof (uint64_t));
1424 			break;
1425 		case AF_INET:
1426 			paddrsize = roundup(sizeof (struct sockaddr_in) +
1427 			    sizeof (sadb_address_t), sizeof (uint64_t));
1428 			break;
1429 		default:
1430 			cmn_err(CE_PANIC,
1431 			    "IPsec SADB: Proxy length failure.\n");
1432 			break;
1433 		}
1434 		isrc = B_TRUE;
1435 		idst = B_TRUE;
1436 		alloclen += 2 * paddrsize;
1437 	}
1438 
1439 	/* For the following fields, assume that length != 0 ==> stuff */
1440 	if (ipsa->ipsa_authkeylen != 0) {
1441 		authsize = roundup(sizeof (sadb_key_t) + ipsa->ipsa_authkeylen,
1442 		    sizeof (uint64_t));
1443 		alloclen += authsize;
1444 		auth = B_TRUE;
1445 	} else {
1446 		auth = B_FALSE;
1447 	}
1448 
1449 	if (ipsa->ipsa_encrkeylen != 0) {
1450 		encrsize = roundup(sizeof (sadb_key_t) + ipsa->ipsa_encrkeylen,
1451 		    sizeof (uint64_t));
1452 		alloclen += encrsize;
1453 		encr = B_TRUE;
1454 	} else {
1455 		encr = B_FALSE;
1456 	}
1457 
1458 	/* No need for roundup on sens and integ. */
1459 	if (ipsa->ipsa_integlen != 0 || ipsa->ipsa_senslen != 0) {
1460 		alloclen += sizeof (sadb_key_t) + ipsa->ipsa_integlen +
1461 		    ipsa->ipsa_senslen;
1462 		sensinteg = B_TRUE;
1463 	} else {
1464 		sensinteg = B_FALSE;
1465 	}
1466 
1467 	/*
1468 	 * Must use strlen() here for lengths.	Identities use NULL
1469 	 * pointers to indicate their nonexistence.
1470 	 */
1471 	if (ipsa->ipsa_src_cid != NULL) {
1472 		srcidsize = roundup(sizeof (sadb_ident_t) +
1473 		    strlen(ipsa->ipsa_src_cid->ipsid_cid) + 1,
1474 		    sizeof (uint64_t));
1475 		alloclen += srcidsize;
1476 		srcid = B_TRUE;
1477 	} else {
1478 		srcid = B_FALSE;
1479 	}
1480 
1481 	if (ipsa->ipsa_dst_cid != NULL) {
1482 		dstidsize = roundup(sizeof (sadb_ident_t) +
1483 		    strlen(ipsa->ipsa_dst_cid->ipsid_cid) + 1,
1484 		    sizeof (uint64_t));
1485 		alloclen += dstidsize;
1486 		dstid = B_TRUE;
1487 	} else {
1488 		dstid = B_FALSE;
1489 	}
1490 
1491 	if ((ipsa->ipsa_kmp != 0) || (ipsa->ipsa_kmc != 0))
1492 		alloclen += sizeof (sadb_x_kmc_t);
1493 
1494 	/* Make sure the allocation length is a multiple of 8 bytes. */
1495 	ASSERT((alloclen & 0x7) == 0);
1496 
1497 	/* XXX Possibly make it esballoc, with a bzero-ing free_ftn. */
1498 	mp = allocb(alloclen, BPRI_HI);
1499 	if (mp == NULL)
1500 		return (NULL);
1501 
1502 	mp->b_wptr += alloclen;
1503 	end = mp->b_wptr;
1504 	newsamsg = (sadb_msg_t *)mp->b_rptr;
1505 	*newsamsg = *samsg;
1506 	newsamsg->sadb_msg_len = (uint16_t)SADB_8TO64(alloclen);
1507 
1508 	mutex_enter(&ipsa->ipsa_lock);	/* Since I'm grabbing SA fields... */
1509 
1510 	newsamsg->sadb_msg_satype = ipsa->ipsa_type;
1511 
1512 	assoc = (sadb_sa_t *)(newsamsg + 1);
1513 	assoc->sadb_sa_len = SADB_8TO64(sizeof (*assoc));
1514 	assoc->sadb_sa_exttype = SADB_EXT_SA;
1515 	assoc->sadb_sa_spi = ipsa->ipsa_spi;
1516 	assoc->sadb_sa_replay = ipsa->ipsa_replay_wsize;
1517 	assoc->sadb_sa_state = ipsa->ipsa_state;
1518 	assoc->sadb_sa_auth = ipsa->ipsa_auth_alg;
1519 	assoc->sadb_sa_encrypt = ipsa->ipsa_encr_alg;
1520 	assoc->sadb_sa_flags = ipsa->ipsa_flags;
1521 
1522 	lt = (sadb_lifetime_t *)(assoc + 1);
1523 	lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1524 	lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
1525 	/* We do not support the concept. */
1526 	lt->sadb_lifetime_allocations = 0;
1527 	lt->sadb_lifetime_bytes = ipsa->ipsa_bytes;
1528 	lt->sadb_lifetime_addtime = ipsa->ipsa_addtime;
1529 	lt->sadb_lifetime_usetime = ipsa->ipsa_usetime;
1530 
1531 	if (hard) {
1532 		lt++;
1533 		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1534 		lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
1535 		lt->sadb_lifetime_allocations = ipsa->ipsa_hardalloc;
1536 		lt->sadb_lifetime_bytes = ipsa->ipsa_hardbyteslt;
1537 		lt->sadb_lifetime_addtime = ipsa->ipsa_hardaddlt;
1538 		lt->sadb_lifetime_usetime = ipsa->ipsa_harduselt;
1539 	}
1540 
1541 	if (soft) {
1542 		lt++;
1543 		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1544 		lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
1545 		lt->sadb_lifetime_allocations = ipsa->ipsa_softalloc;
1546 		lt->sadb_lifetime_bytes = ipsa->ipsa_softbyteslt;
1547 		lt->sadb_lifetime_addtime = ipsa->ipsa_softaddlt;
1548 		lt->sadb_lifetime_usetime = ipsa->ipsa_softuselt;
1549 	}
1550 
1551 	cur = (uint8_t *)(lt + 1);
1552 
1553 	/* NOTE:  Don't fill in ports here if we are a tunnel-mode SA. */
1554 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, fam,
1555 	    ipsa->ipsa_srcaddr, (!isrc && !idst) ? SA_SRCPORT(ipsa) : 0,
1556 	    SA_PROTO(ipsa), 0);
1557 	if (cur == NULL) {
1558 		freemsg(mp);
1559 		mp = NULL;
1560 		goto bail;
1561 	}
1562 
1563 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, fam,
1564 	    ipsa->ipsa_dstaddr, (!isrc && !idst) ? SA_DSTPORT(ipsa) : 0,
1565 	    SA_PROTO(ipsa), 0);
1566 	if (cur == NULL) {
1567 		freemsg(mp);
1568 		mp = NULL;
1569 		goto bail;
1570 	}
1571 
1572 	if (ipsa->ipsa_flags & IPSA_F_NATT_LOC) {
1573 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_NATT_LOC,
1574 		    fam, &ipsa->ipsa_natt_addr_loc, ipsa->ipsa_local_nat_port,
1575 		    IPPROTO_UDP, 0);
1576 		if (cur == NULL) {
1577 			freemsg(mp);
1578 			mp = NULL;
1579 			goto bail;
1580 		}
1581 	}
1582 
1583 	if (ipsa->ipsa_flags & IPSA_F_NATT_REM) {
1584 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_NATT_REM,
1585 		    fam, &ipsa->ipsa_natt_addr_rem, ipsa->ipsa_remote_nat_port,
1586 		    IPPROTO_UDP, 0);
1587 		if (cur == NULL) {
1588 			freemsg(mp);
1589 			mp = NULL;
1590 			goto bail;
1591 		}
1592 	}
1593 
1594 	/* If we are a tunnel-mode SA, fill in the inner-selectors. */
1595 	if (isrc) {
1596 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
1597 		    pfam, ipsa->ipsa_innersrc, SA_SRCPORT(ipsa),
1598 		    SA_IPROTO(ipsa), ipsa->ipsa_innersrcpfx);
1599 		if (cur == NULL) {
1600 			freemsg(mp);
1601 			mp = NULL;
1602 			goto bail;
1603 		}
1604 	}
1605 
1606 	if (idst) {
1607 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
1608 		    pfam, ipsa->ipsa_innerdst, SA_DSTPORT(ipsa),
1609 		    SA_IPROTO(ipsa), ipsa->ipsa_innerdstpfx);
1610 		if (cur == NULL) {
1611 			freemsg(mp);
1612 			mp = NULL;
1613 			goto bail;
1614 		}
1615 	}
1616 
1617 	if ((ipsa->ipsa_kmp != 0) || (ipsa->ipsa_kmc != 0)) {
1618 		cur = sadb_make_kmc_ext(cur, end,
1619 		    ipsa->ipsa_kmp, ipsa->ipsa_kmc);
1620 		if (cur == NULL) {
1621 			freemsg(mp);
1622 			mp = NULL;
1623 			goto bail;
1624 		}
1625 	}
1626 
1627 	walker = (sadb_ext_t *)cur;
1628 	if (auth) {
1629 		key = (sadb_key_t *)walker;
1630 		key->sadb_key_len = SADB_8TO64(authsize);
1631 		key->sadb_key_exttype = SADB_EXT_KEY_AUTH;
1632 		key->sadb_key_bits = ipsa->ipsa_authkeybits;
1633 		key->sadb_key_reserved = 0;
1634 		bcopy(ipsa->ipsa_authkey, key + 1, ipsa->ipsa_authkeylen);
1635 		walker = (sadb_ext_t *)((uint64_t *)walker +
1636 		    walker->sadb_ext_len);
1637 	}
1638 
1639 	if (encr) {
1640 		key = (sadb_key_t *)walker;
1641 		key->sadb_key_len = SADB_8TO64(encrsize);
1642 		key->sadb_key_exttype = SADB_EXT_KEY_ENCRYPT;
1643 		key->sadb_key_bits = ipsa->ipsa_encrkeybits;
1644 		key->sadb_key_reserved = 0;
1645 		bcopy(ipsa->ipsa_encrkey, key + 1, ipsa->ipsa_encrkeylen);
1646 		walker = (sadb_ext_t *)((uint64_t *)walker +
1647 		    walker->sadb_ext_len);
1648 	}
1649 
1650 	if (srcid) {
1651 		ident = (sadb_ident_t *)walker;
1652 		ident->sadb_ident_len = SADB_8TO64(srcidsize);
1653 		ident->sadb_ident_exttype = SADB_EXT_IDENTITY_SRC;
1654 		ident->sadb_ident_type = ipsa->ipsa_src_cid->ipsid_type;
1655 		ident->sadb_ident_id = 0;
1656 		ident->sadb_ident_reserved = 0;
1657 		(void) strcpy((char *)(ident + 1),
1658 		    ipsa->ipsa_src_cid->ipsid_cid);
1659 		walker = (sadb_ext_t *)((uint64_t *)walker +
1660 		    walker->sadb_ext_len);
1661 	}
1662 
1663 	if (dstid) {
1664 		ident = (sadb_ident_t *)walker;
1665 		ident->sadb_ident_len = SADB_8TO64(dstidsize);
1666 		ident->sadb_ident_exttype = SADB_EXT_IDENTITY_DST;
1667 		ident->sadb_ident_type = ipsa->ipsa_dst_cid->ipsid_type;
1668 		ident->sadb_ident_id = 0;
1669 		ident->sadb_ident_reserved = 0;
1670 		(void) strcpy((char *)(ident + 1),
1671 		    ipsa->ipsa_dst_cid->ipsid_cid);
1672 		walker = (sadb_ext_t *)((uint64_t *)walker +
1673 		    walker->sadb_ext_len);
1674 	}
1675 
1676 	if (sensinteg) {
1677 		sens = (sadb_sens_t *)walker;
1678 		sens->sadb_sens_len = SADB_8TO64(sizeof (sadb_sens_t *) +
1679 		    ipsa->ipsa_senslen + ipsa->ipsa_integlen);
1680 		sens->sadb_sens_dpd = ipsa->ipsa_dpd;
1681 		sens->sadb_sens_sens_level = ipsa->ipsa_senslevel;
1682 		sens->sadb_sens_integ_level = ipsa->ipsa_integlevel;
1683 		sens->sadb_sens_sens_len = SADB_8TO64(ipsa->ipsa_senslen);
1684 		sens->sadb_sens_integ_len = SADB_8TO64(ipsa->ipsa_integlen);
1685 		sens->sadb_sens_reserved = 0;
1686 		bitmap = (uint64_t *)(sens + 1);
1687 		if (ipsa->ipsa_sens != NULL) {
1688 			bcopy(ipsa->ipsa_sens, bitmap, ipsa->ipsa_senslen);
1689 			bitmap += sens->sadb_sens_sens_len;
1690 		}
1691 		if (ipsa->ipsa_integ != NULL)
1692 			bcopy(ipsa->ipsa_integ, bitmap, ipsa->ipsa_integlen);
1693 		walker = (sadb_ext_t *)((uint64_t *)walker +
1694 		    walker->sadb_ext_len);
1695 	}
1696 
1697 	if (paired) {
1698 		pair_ext = (sadb_x_pair_t *)walker;
1699 
1700 		pair_ext->sadb_x_pair_len = SADB_8TO64(sizeof (sadb_x_pair_t));
1701 		pair_ext->sadb_x_pair_exttype = SADB_X_EXT_PAIR;
1702 		pair_ext->sadb_x_pair_spi = otherspi;
1703 
1704 		walker = (sadb_ext_t *)((uint64_t *)walker +
1705 		    walker->sadb_ext_len);
1706 	}
1707 
1708 bail:
1709 	/* Pardon any delays... */
1710 	mutex_exit(&ipsa->ipsa_lock);
1711 
1712 	return (mp);
1713 }
1714 
1715 /*
1716  * Strip out key headers or unmarked headers (SADB_EXT_KEY_*, SADB_EXT_UNKNOWN)
1717  * and adjust base message accordingly.
1718  *
1719  * Assume message is pulled up in one piece of contiguous memory.
1720  *
1721  * Say if we start off with:
1722  *
1723  * +------+----+-------------+-----------+---------------+---------------+
1724  * | base | SA | source addr | dest addr | rsrvd. or key | soft lifetime |
1725  * +------+----+-------------+-----------+---------------+---------------+
1726  *
1727  * we will end up with
1728  *
1729  * +------+----+-------------+-----------+---------------+
1730  * | base | SA | source addr | dest addr | soft lifetime |
1731  * +------+----+-------------+-----------+---------------+
1732  */
1733 static void
1734 sadb_strip(sadb_msg_t *samsg)
1735 {
1736 	sadb_ext_t *ext;
1737 	uint8_t *target = NULL;
1738 	uint8_t *msgend;
1739 	int sofar = SADB_8TO64(sizeof (*samsg));
1740 	int copylen;
1741 
1742 	ext = (sadb_ext_t *)(samsg + 1);
1743 	msgend = (uint8_t *)samsg;
1744 	msgend += SADB_64TO8(samsg->sadb_msg_len);
1745 	while ((uint8_t *)ext < msgend) {
1746 		if (ext->sadb_ext_type == SADB_EXT_RESERVED ||
1747 		    ext->sadb_ext_type == SADB_EXT_KEY_AUTH ||
1748 		    ext->sadb_ext_type == SADB_EXT_KEY_ENCRYPT) {
1749 			/*
1750 			 * Aha!	 I found a header to be erased.
1751 			 */
1752 
1753 			if (target != NULL) {
1754 				/*
1755 				 * If I had a previous header to be erased,
1756 				 * copy over it.  I can get away with just
1757 				 * copying backwards because the target will
1758 				 * always be 8 bytes behind the source.
1759 				 */
1760 				copylen = ((uint8_t *)ext) - (target +
1761 				    SADB_64TO8(
1762 				    ((sadb_ext_t *)target)->sadb_ext_len));
1763 				ovbcopy(((uint8_t *)ext - copylen), target,
1764 				    copylen);
1765 				target += copylen;
1766 				((sadb_ext_t *)target)->sadb_ext_len =
1767 				    SADB_8TO64(((uint8_t *)ext) - target +
1768 				    SADB_64TO8(ext->sadb_ext_len));
1769 			} else {
1770 				target = (uint8_t *)ext;
1771 			}
1772 		} else {
1773 			sofar += ext->sadb_ext_len;
1774 		}
1775 
1776 		ext = (sadb_ext_t *)(((uint64_t *)ext) + ext->sadb_ext_len);
1777 	}
1778 
1779 	ASSERT((uint8_t *)ext == msgend);
1780 
1781 	if (target != NULL) {
1782 		copylen = ((uint8_t *)ext) - (target +
1783 		    SADB_64TO8(((sadb_ext_t *)target)->sadb_ext_len));
1784 		if (copylen != 0)
1785 			ovbcopy(((uint8_t *)ext - copylen), target, copylen);
1786 	}
1787 
1788 	/* Adjust samsg. */
1789 	samsg->sadb_msg_len = (uint16_t)sofar;
1790 
1791 	/* Assume all of the rest is cleared by caller in sadb_pfkey_echo(). */
1792 }
1793 
1794 /*
1795  * AH needs to send an error to PF_KEY.	 Assume mp points to an M_CTL
1796  * followed by an M_DATA with a PF_KEY message in it.  The serial of
1797  * the sending keysock instance is included.
1798  */
1799 void
1800 sadb_pfkey_error(queue_t *pfkey_q, mblk_t *mp, int error, int diagnostic,
1801     uint_t serial)
1802 {
1803 	mblk_t *msg = mp->b_cont;
1804 	sadb_msg_t *samsg;
1805 	keysock_out_t *kso;
1806 
1807 	/*
1808 	 * Enough functions call this to merit a NULL queue check.
1809 	 */
1810 	if (pfkey_q == NULL) {
1811 		freemsg(mp);
1812 		return;
1813 	}
1814 
1815 	ASSERT(msg != NULL);
1816 	ASSERT((mp->b_wptr - mp->b_rptr) == sizeof (ipsec_info_t));
1817 	ASSERT((msg->b_wptr - msg->b_rptr) >= sizeof (sadb_msg_t));
1818 	samsg = (sadb_msg_t *)msg->b_rptr;
1819 	kso = (keysock_out_t *)mp->b_rptr;
1820 
1821 	kso->ks_out_type = KEYSOCK_OUT;
1822 	kso->ks_out_len = sizeof (*kso);
1823 	kso->ks_out_serial = serial;
1824 
1825 	/*
1826 	 * Only send the base message up in the event of an error.
1827 	 * Don't worry about bzero()-ing, because it was probably bogus
1828 	 * anyway.
1829 	 */
1830 	msg->b_wptr = msg->b_rptr + sizeof (*samsg);
1831 	samsg = (sadb_msg_t *)msg->b_rptr;
1832 	samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
1833 	samsg->sadb_msg_errno = (uint8_t)error;
1834 	if (diagnostic != SADB_X_DIAGNOSTIC_PRESET)
1835 		samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
1836 
1837 	putnext(pfkey_q, mp);
1838 }
1839 
1840 /*
1841  * Send a successful return packet back to keysock via the queue in pfkey_q.
1842  *
1843  * Often, an SA is associated with the reply message, it's passed in if needed,
1844  * and NULL if not.  BTW, that ipsa will have its refcnt appropriately held,
1845  * and the caller will release said refcnt.
1846  */
1847 void
1848 sadb_pfkey_echo(queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg,
1849     keysock_in_t *ksi, ipsa_t *ipsa)
1850 {
1851 	keysock_out_t *kso;
1852 	mblk_t *mp1;
1853 	sadb_msg_t *newsamsg;
1854 	uint8_t *oldend;
1855 
1856 	ASSERT((mp->b_cont != NULL) &&
1857 	    ((void *)samsg == (void *)mp->b_cont->b_rptr) &&
1858 	    ((void *)mp->b_rptr == (void *)ksi));
1859 
1860 	switch (samsg->sadb_msg_type) {
1861 	case SADB_ADD:
1862 	case SADB_UPDATE:
1863 	case SADB_X_UPDATEPAIR:
1864 	case SADB_FLUSH:
1865 	case SADB_DUMP:
1866 		/*
1867 		 * I have all of the message already.  I just need to strip
1868 		 * out the keying material and echo the message back.
1869 		 *
1870 		 * NOTE: for SADB_DUMP, the function sadb_dump() did the
1871 		 * work.  When DUMP reaches here, it should only be a base
1872 		 * message.
1873 		 */
1874 	justecho:
1875 		ASSERT(samsg->sadb_msg_type != SADB_DUMP ||
1876 		    samsg->sadb_msg_len == SADB_8TO64(sizeof (sadb_msg_t)));
1877 
1878 		if (ksi->ks_in_extv[SADB_EXT_KEY_AUTH] != NULL ||
1879 		    ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT] != NULL) {
1880 			sadb_strip(samsg);
1881 			/* Assume PF_KEY message is contiguous. */
1882 			ASSERT(mp->b_cont->b_cont == NULL);
1883 			oldend = mp->b_cont->b_wptr;
1884 			mp->b_cont->b_wptr = mp->b_cont->b_rptr +
1885 			    SADB_64TO8(samsg->sadb_msg_len);
1886 			bzero(mp->b_cont->b_wptr, oldend - mp->b_cont->b_wptr);
1887 		}
1888 		break;
1889 	case SADB_GET:
1890 		/*
1891 		 * Do a lot of work here, because of the ipsa I just found.
1892 		 * First construct the new PF_KEY message, then abandon
1893 		 * the old one.
1894 		 */
1895 		mp1 = sadb_sa2msg(ipsa, samsg);
1896 		if (mp1 == NULL) {
1897 			sadb_pfkey_error(pfkey_q, mp, ENOMEM,
1898 			    SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
1899 			return;
1900 		}
1901 		freemsg(mp->b_cont);
1902 		mp->b_cont = mp1;
1903 		break;
1904 	case SADB_DELETE:
1905 	case SADB_X_DELPAIR:
1906 		if (ipsa == NULL)
1907 			goto justecho;
1908 		/*
1909 		 * Because listening KMds may require more info, treat
1910 		 * DELETE like a special case of GET.
1911 		 */
1912 		mp1 = sadb_sa2msg(ipsa, samsg);
1913 		if (mp1 == NULL) {
1914 			sadb_pfkey_error(pfkey_q, mp, ENOMEM,
1915 			    SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
1916 			return;
1917 		}
1918 		newsamsg = (sadb_msg_t *)mp1->b_rptr;
1919 		sadb_strip(newsamsg);
1920 		oldend = mp1->b_wptr;
1921 		mp1->b_wptr = mp1->b_rptr + SADB_64TO8(newsamsg->sadb_msg_len);
1922 		bzero(mp1->b_wptr, oldend - mp1->b_wptr);
1923 		freemsg(mp->b_cont);
1924 		mp->b_cont = mp1;
1925 		break;
1926 	default:
1927 		if (mp != NULL)
1928 			freemsg(mp);
1929 		return;
1930 	}
1931 
1932 	/* ksi is now null and void. */
1933 	kso = (keysock_out_t *)ksi;
1934 	kso->ks_out_type = KEYSOCK_OUT;
1935 	kso->ks_out_len = sizeof (*kso);
1936 	kso->ks_out_serial = ksi->ks_in_serial;
1937 	/* We're ready to send... */
1938 	putnext(pfkey_q, mp);
1939 }
1940 
1941 /*
1942  * Set up a global pfkey_q instance for AH, ESP, or some other consumer.
1943  */
1944 void
1945 sadb_keysock_hello(queue_t **pfkey_qp, queue_t *q, mblk_t *mp,
1946     void (*ager)(void *), void *agerarg, timeout_id_t *top, int satype)
1947 {
1948 	keysock_hello_ack_t *kha;
1949 	queue_t *oldq;
1950 
1951 	ASSERT(OTHERQ(q) != NULL);
1952 
1953 	/*
1954 	 * First, check atomically that I'm the first and only keysock
1955 	 * instance.
1956 	 *
1957 	 * Use OTHERQ(q), because qreply(q, mp) == putnext(OTHERQ(q), mp),
1958 	 * and I want this module to say putnext(*_pfkey_q, mp) for PF_KEY
1959 	 * messages.
1960 	 */
1961 
1962 	oldq = casptr((void **)pfkey_qp, NULL, OTHERQ(q));
1963 	if (oldq != NULL) {
1964 		ASSERT(oldq != q);
1965 		cmn_err(CE_WARN, "Danger!  Multiple keysocks on top of %s.\n",
1966 		    (satype == SADB_SATYPE_ESP)? "ESP" : "AH or other");
1967 		freemsg(mp);
1968 		return;
1969 	}
1970 
1971 	kha = (keysock_hello_ack_t *)mp->b_rptr;
1972 	kha->ks_hello_len = sizeof (keysock_hello_ack_t);
1973 	kha->ks_hello_type = KEYSOCK_HELLO_ACK;
1974 	kha->ks_hello_satype = (uint8_t)satype;
1975 
1976 	/*
1977 	 * If we made it past the casptr, then we have "exclusive" access
1978 	 * to the timeout handle.  Fire it off in 4 seconds, because it
1979 	 * just seems like a good interval.
1980 	 */
1981 	*top = qtimeout(*pfkey_qp, ager, agerarg, drv_usectohz(4000000));
1982 
1983 	putnext(*pfkey_qp, mp);
1984 }
1985 
1986 /*
1987  * Normalize IPv4-mapped IPv6 addresses (and prefixes) as appropriate.
1988  *
1989  * Check addresses themselves for wildcard or multicast.
1990  * Check ire table for local/non-local/broadcast.
1991  */
1992 int
1993 sadb_addrcheck(queue_t *pfkey_q, mblk_t *mp, sadb_ext_t *ext, uint_t serial,
1994     netstack_t *ns)
1995 {
1996 	sadb_address_t *addr = (sadb_address_t *)ext;
1997 	struct sockaddr_in *sin;
1998 	struct sockaddr_in6 *sin6;
1999 	ire_t *ire;
2000 	int diagnostic, type;
2001 	boolean_t normalized = B_FALSE;
2002 
2003 	ASSERT(ext != NULL);
2004 	ASSERT((ext->sadb_ext_type == SADB_EXT_ADDRESS_SRC) ||
2005 	    (ext->sadb_ext_type == SADB_EXT_ADDRESS_DST) ||
2006 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC) ||
2007 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_DST) ||
2008 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_LOC) ||
2009 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_REM));
2010 
2011 	/* Assign both sockaddrs, the compiler will do the right thing. */
2012 	sin = (struct sockaddr_in *)(addr + 1);
2013 	sin6 = (struct sockaddr_in6 *)(addr + 1);
2014 
2015 	if (sin6->sin6_family == AF_INET6) {
2016 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
2017 			/*
2018 			 * Convert to an AF_INET sockaddr.  This means the
2019 			 * return messages will have the extra space, but have
2020 			 * AF_INET sockaddrs instead of AF_INET6.
2021 			 *
2022 			 * Yes, RFC 2367 isn't clear on what to do here w.r.t.
2023 			 * mapped addresses, but since AF_INET6 ::ffff:<v4> is
2024 			 * equal to AF_INET <v4>, it shouldnt be a huge
2025 			 * problem.
2026 			 */
2027 			sin->sin_family = AF_INET;
2028 			IN6_V4MAPPED_TO_INADDR(&sin6->sin6_addr,
2029 			    &sin->sin_addr);
2030 			bzero(&sin->sin_zero, sizeof (sin->sin_zero));
2031 			normalized = B_TRUE;
2032 		}
2033 	} else if (sin->sin_family != AF_INET) {
2034 		switch (ext->sadb_ext_type) {
2035 		case SADB_EXT_ADDRESS_SRC:
2036 			diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC_AF;
2037 			break;
2038 		case SADB_EXT_ADDRESS_DST:
2039 			diagnostic = SADB_X_DIAGNOSTIC_BAD_DST_AF;
2040 			break;
2041 		case SADB_X_EXT_ADDRESS_INNER_SRC:
2042 			diagnostic = SADB_X_DIAGNOSTIC_BAD_PROXY_AF;
2043 			break;
2044 		case SADB_X_EXT_ADDRESS_INNER_DST:
2045 			diagnostic = SADB_X_DIAGNOSTIC_BAD_INNER_DST_AF;
2046 			break;
2047 		case SADB_X_EXT_ADDRESS_NATT_LOC:
2048 			diagnostic = SADB_X_DIAGNOSTIC_BAD_NATT_LOC_AF;
2049 			break;
2050 		case SADB_X_EXT_ADDRESS_NATT_REM:
2051 			diagnostic = SADB_X_DIAGNOSTIC_BAD_NATT_REM_AF;
2052 			break;
2053 			/* There is no default, see above ASSERT. */
2054 		}
2055 bail:
2056 		if (pfkey_q != NULL) {
2057 			sadb_pfkey_error(pfkey_q, mp, EINVAL, diagnostic,
2058 			    serial);
2059 		} else {
2060 			/*
2061 			 * Scribble in sadb_msg that we got passed in.
2062 			 * Overload "mp" to be an sadb_msg pointer.
2063 			 */
2064 			sadb_msg_t *samsg = (sadb_msg_t *)mp;
2065 
2066 			samsg->sadb_msg_errno = EINVAL;
2067 			samsg->sadb_x_msg_diagnostic = diagnostic;
2068 		}
2069 		return (KS_IN_ADDR_UNKNOWN);
2070 	}
2071 
2072 	if (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC ||
2073 	    ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_DST) {
2074 		/*
2075 		 * We need only check for prefix issues.
2076 		 */
2077 
2078 		/* Set diagnostic now, in case we need it later. */
2079 		diagnostic =
2080 		    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC) ?
2081 		    SADB_X_DIAGNOSTIC_PREFIX_INNER_SRC :
2082 		    SADB_X_DIAGNOSTIC_PREFIX_INNER_DST;
2083 
2084 		if (normalized)
2085 			addr->sadb_address_prefixlen -= 96;
2086 
2087 		/*
2088 		 * Verify and mask out inner-addresses based on prefix length.
2089 		 */
2090 		if (sin->sin_family == AF_INET) {
2091 			if (addr->sadb_address_prefixlen > 32)
2092 				goto bail;
2093 			sin->sin_addr.s_addr &=
2094 			    ip_plen_to_mask(addr->sadb_address_prefixlen);
2095 		} else {
2096 			in6_addr_t mask;
2097 
2098 			ASSERT(sin->sin_family == AF_INET6);
2099 			/*
2100 			 * ip_plen_to_mask_v6() returns NULL if the value in
2101 			 * question is out of range.
2102 			 */
2103 			if (ip_plen_to_mask_v6(addr->sadb_address_prefixlen,
2104 			    &mask) == NULL)
2105 				goto bail;
2106 			sin6->sin6_addr.s6_addr32[0] &= mask.s6_addr32[0];
2107 			sin6->sin6_addr.s6_addr32[1] &= mask.s6_addr32[1];
2108 			sin6->sin6_addr.s6_addr32[2] &= mask.s6_addr32[2];
2109 			sin6->sin6_addr.s6_addr32[3] &= mask.s6_addr32[3];
2110 		}
2111 
2112 		/* We don't care in these cases. */
2113 		return (KS_IN_ADDR_DONTCARE);
2114 	}
2115 
2116 	if (sin->sin_family == AF_INET6) {
2117 		/* Check the easy ones now. */
2118 		if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
2119 			return (KS_IN_ADDR_MBCAST);
2120 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
2121 			return (KS_IN_ADDR_UNSPEC);
2122 		/*
2123 		 * At this point, we're a unicast IPv6 address.
2124 		 *
2125 		 * A ctable lookup for local is sufficient here.  If we're
2126 		 * local, return KS_IN_ADDR_ME, otherwise KS_IN_ADDR_NOTME.
2127 		 *
2128 		 * XXX Zones alert -> me/notme decision needs to be tempered
2129 		 * by what zone we're in when we go to zone-aware IPsec.
2130 		 */
2131 		ire = ire_ctable_lookup_v6(&sin6->sin6_addr, NULL,
2132 		    IRE_LOCAL, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE,
2133 		    ns->netstack_ip);
2134 		if (ire != NULL) {
2135 			/* Hey hey, it's local. */
2136 			IRE_REFRELE(ire);
2137 			return (KS_IN_ADDR_ME);
2138 		}
2139 	} else {
2140 		ASSERT(sin->sin_family == AF_INET);
2141 		if (sin->sin_addr.s_addr == INADDR_ANY)
2142 			return (KS_IN_ADDR_UNSPEC);
2143 		if (CLASSD(sin->sin_addr.s_addr))
2144 			return (KS_IN_ADDR_MBCAST);
2145 		/*
2146 		 * At this point we're a unicast or broadcast IPv4 address.
2147 		 *
2148 		 * Lookup on the ctable for IRE_BROADCAST or IRE_LOCAL.
2149 		 * A NULL return value is NOTME, otherwise, look at the
2150 		 * returned ire for broadcast or not and return accordingly.
2151 		 *
2152 		 * XXX Zones alert -> me/notme decision needs to be tempered
2153 		 * by what zone we're in when we go to zone-aware IPsec.
2154 		 */
2155 		ire = ire_ctable_lookup(sin->sin_addr.s_addr, 0,
2156 		    IRE_LOCAL | IRE_BROADCAST, NULL, ALL_ZONES, NULL,
2157 		    MATCH_IRE_TYPE, ns->netstack_ip);
2158 		if (ire != NULL) {
2159 			/* Check for local or broadcast */
2160 			type = ire->ire_type;
2161 			IRE_REFRELE(ire);
2162 			ASSERT(type == IRE_LOCAL || type == IRE_BROADCAST);
2163 			return ((type == IRE_LOCAL) ? KS_IN_ADDR_ME :
2164 			    KS_IN_ADDR_MBCAST);
2165 		}
2166 	}
2167 
2168 	return (KS_IN_ADDR_NOTME);
2169 }
2170 
2171 /*
2172  * Address normalizations and reality checks for inbound PF_KEY messages.
2173  *
2174  * For the case of src == unspecified AF_INET6, and dst == AF_INET, convert
2175  * the source to AF_INET.  Do the same for the inner sources.
2176  */
2177 boolean_t
2178 sadb_addrfix(keysock_in_t *ksi, queue_t *pfkey_q, mblk_t *mp, netstack_t *ns)
2179 {
2180 	struct sockaddr_in *src, *isrc;
2181 	struct sockaddr_in6 *dst, *idst;
2182 	sadb_address_t *srcext, *dstext;
2183 	uint16_t sport;
2184 	sadb_ext_t **extv = ksi->ks_in_extv;
2185 	int rc;
2186 
2187 	if (extv[SADB_EXT_ADDRESS_SRC] != NULL) {
2188 		rc = sadb_addrcheck(pfkey_q, mp, extv[SADB_EXT_ADDRESS_SRC],
2189 		    ksi->ks_in_serial, ns);
2190 		if (rc == KS_IN_ADDR_UNKNOWN)
2191 			return (B_FALSE);
2192 		if (rc == KS_IN_ADDR_MBCAST) {
2193 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2194 			    SADB_X_DIAGNOSTIC_BAD_SRC, ksi->ks_in_serial);
2195 			return (B_FALSE);
2196 		}
2197 		ksi->ks_in_srctype = rc;
2198 	}
2199 
2200 	if (extv[SADB_EXT_ADDRESS_DST] != NULL) {
2201 		rc = sadb_addrcheck(pfkey_q, mp, extv[SADB_EXT_ADDRESS_DST],
2202 		    ksi->ks_in_serial, ns);
2203 		if (rc == KS_IN_ADDR_UNKNOWN)
2204 			return (B_FALSE);
2205 		if (rc == KS_IN_ADDR_UNSPEC) {
2206 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2207 			    SADB_X_DIAGNOSTIC_BAD_DST, ksi->ks_in_serial);
2208 			return (B_FALSE);
2209 		}
2210 		ksi->ks_in_dsttype = rc;
2211 	}
2212 
2213 	/*
2214 	 * NAT-Traversal addrs are simple enough to not require all of
2215 	 * the checks in sadb_addrcheck().  Just normalize or reject if not
2216 	 * AF_INET.
2217 	 */
2218 	if (extv[SADB_X_EXT_ADDRESS_NATT_LOC] != NULL) {
2219 		rc = sadb_addrcheck(pfkey_q, mp,
2220 		    extv[SADB_X_EXT_ADDRESS_NATT_LOC], ksi->ks_in_serial, ns);
2221 
2222 		/*
2223 		 * Local NAT-T addresses never use an IRE_LOCAL, so it should
2224 		 * always be NOTME, or UNSPEC (to handle both tunnel mode
2225 		 * AND local-port flexibility).
2226 		 */
2227 		if (rc != KS_IN_ADDR_NOTME && rc != KS_IN_ADDR_UNSPEC) {
2228 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2229 			    SADB_X_DIAGNOSTIC_MALFORMED_NATT_LOC,
2230 			    ksi->ks_in_serial);
2231 			return (B_FALSE);
2232 		}
2233 		src = (struct sockaddr_in *)
2234 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_NATT_LOC]) + 1);
2235 		if (src->sin_family != AF_INET) {
2236 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2237 			    SADB_X_DIAGNOSTIC_BAD_NATT_LOC_AF,
2238 			    ksi->ks_in_serial);
2239 			return (B_FALSE);
2240 		}
2241 	}
2242 
2243 	if (extv[SADB_X_EXT_ADDRESS_NATT_REM] != NULL) {
2244 		rc = sadb_addrcheck(pfkey_q, mp,
2245 		    extv[SADB_X_EXT_ADDRESS_NATT_REM], ksi->ks_in_serial, ns);
2246 
2247 		/*
2248 		 * Remote NAT-T addresses never use an IRE_LOCAL, so it should
2249 		 * always be NOTME, or UNSPEC if it's a tunnel-mode SA.
2250 		 */
2251 		if (rc != KS_IN_ADDR_NOTME &&
2252 		    !(extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL &&
2253 		    rc == KS_IN_ADDR_UNSPEC)) {
2254 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2255 			    SADB_X_DIAGNOSTIC_MALFORMED_NATT_REM,
2256 			    ksi->ks_in_serial);
2257 			return (B_FALSE);
2258 		}
2259 		src = (struct sockaddr_in *)
2260 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_NATT_REM]) + 1);
2261 		if (src->sin_family != AF_INET) {
2262 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2263 			    SADB_X_DIAGNOSTIC_BAD_NATT_REM_AF,
2264 			    ksi->ks_in_serial);
2265 			return (B_FALSE);
2266 		}
2267 	}
2268 
2269 	if (extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL) {
2270 		if (extv[SADB_X_EXT_ADDRESS_INNER_DST] == NULL) {
2271 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2272 			    SADB_X_DIAGNOSTIC_MISSING_INNER_DST,
2273 			    ksi->ks_in_serial);
2274 			return (B_FALSE);
2275 		}
2276 
2277 		if (sadb_addrcheck(pfkey_q, mp,
2278 		    extv[SADB_X_EXT_ADDRESS_INNER_DST], ksi->ks_in_serial, ns)
2279 		    == KS_IN_ADDR_UNKNOWN ||
2280 		    sadb_addrcheck(pfkey_q, mp,
2281 		    extv[SADB_X_EXT_ADDRESS_INNER_SRC], ksi->ks_in_serial, ns)
2282 		    == KS_IN_ADDR_UNKNOWN)
2283 			return (B_FALSE);
2284 
2285 		isrc = (struct sockaddr_in *)
2286 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_SRC]) +
2287 		    1);
2288 		idst = (struct sockaddr_in6 *)
2289 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_DST]) +
2290 		    1);
2291 		if (isrc->sin_family != idst->sin6_family) {
2292 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2293 			    SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH,
2294 			    ksi->ks_in_serial);
2295 			return (B_FALSE);
2296 		}
2297 	} else if (extv[SADB_X_EXT_ADDRESS_INNER_DST] != NULL) {
2298 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2299 			    SADB_X_DIAGNOSTIC_MISSING_INNER_SRC,
2300 			    ksi->ks_in_serial);
2301 			return (B_FALSE);
2302 	} else {
2303 		isrc = NULL;	/* For inner/outer port check below. */
2304 	}
2305 
2306 	dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST];
2307 	srcext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_SRC];
2308 
2309 	if (dstext == NULL || srcext == NULL)
2310 		return (B_TRUE);
2311 
2312 	dst = (struct sockaddr_in6 *)(dstext + 1);
2313 	src = (struct sockaddr_in *)(srcext + 1);
2314 
2315 	if (isrc != NULL &&
2316 	    (isrc->sin_port != 0 || idst->sin6_port != 0) &&
2317 	    (src->sin_port != 0 || dst->sin6_port != 0)) {
2318 		/* Can't set inner and outer ports in one SA. */
2319 		sadb_pfkey_error(pfkey_q, mp, EINVAL,
2320 		    SADB_X_DIAGNOSTIC_DUAL_PORT_SETS,
2321 		    ksi->ks_in_serial);
2322 		return (B_FALSE);
2323 	}
2324 
2325 	if (dst->sin6_family == src->sin_family)
2326 		return (B_TRUE);
2327 
2328 	if (srcext->sadb_address_proto != dstext->sadb_address_proto) {
2329 		if (srcext->sadb_address_proto == 0) {
2330 			srcext->sadb_address_proto = dstext->sadb_address_proto;
2331 		} else if (dstext->sadb_address_proto == 0) {
2332 			dstext->sadb_address_proto = srcext->sadb_address_proto;
2333 		} else {
2334 			/* Inequal protocols, neither were 0.  Report error. */
2335 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2336 			    SADB_X_DIAGNOSTIC_PROTO_MISMATCH,
2337 			    ksi->ks_in_serial);
2338 			return (B_FALSE);
2339 		}
2340 	}
2341 
2342 	/*
2343 	 * With the exception of an unspec IPv6 source and an IPv4
2344 	 * destination, address families MUST me matched.
2345 	 */
2346 	if (src->sin_family == AF_INET ||
2347 	    ksi->ks_in_srctype != KS_IN_ADDR_UNSPEC) {
2348 		sadb_pfkey_error(pfkey_q, mp, EINVAL,
2349 		    SADB_X_DIAGNOSTIC_AF_MISMATCH, ksi->ks_in_serial);
2350 		return (B_FALSE);
2351 	}
2352 
2353 	/*
2354 	 * Convert "src" to AF_INET INADDR_ANY.  We rely on sin_port being
2355 	 * in the same place for sockaddr_in and sockaddr_in6.
2356 	 */
2357 	sport = src->sin_port;
2358 	bzero(src, sizeof (*src));
2359 	src->sin_family = AF_INET;
2360 	src->sin_port = sport;
2361 
2362 	return (B_TRUE);
2363 }
2364 
2365 /*
2366  * Set the results in "addrtype", given an IRE as requested by
2367  * sadb_addrcheck().
2368  */
2369 int
2370 sadb_addrset(ire_t *ire)
2371 {
2372 	if ((ire->ire_type & IRE_BROADCAST) ||
2373 	    (ire->ire_ipversion == IPV4_VERSION && CLASSD(ire->ire_addr)) ||
2374 	    (ire->ire_ipversion == IPV6_VERSION &&
2375 	    IN6_IS_ADDR_MULTICAST(&(ire->ire_addr_v6))))
2376 		return (KS_IN_ADDR_MBCAST);
2377 	if (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK))
2378 		return (KS_IN_ADDR_ME);
2379 	return (KS_IN_ADDR_NOTME);
2380 }
2381 
2382 
2383 /*
2384  * Walker callback function to delete sa's based on src/dst address.
2385  * Assumes that we're called with *head locked, no other locks held;
2386  * Conveniently, and not coincidentally, this is both what sadb_walker
2387  * gives us and also what sadb_unlinkassoc expects.
2388  */
2389 
2390 struct sadb_purge_state
2391 {
2392 	uint32_t *src;
2393 	uint32_t *dst;
2394 	sa_family_t af;
2395 	boolean_t inbnd;
2396 	char *sidstr;
2397 	char *didstr;
2398 	uint16_t sidtype;
2399 	uint16_t didtype;
2400 	uint32_t kmproto;
2401 	mblk_t *mq;
2402 };
2403 
2404 static void
2405 sadb_purge_cb(isaf_t *head, ipsa_t *entry, void *cookie)
2406 {
2407 	struct sadb_purge_state *ps = (struct sadb_purge_state *)cookie;
2408 
2409 	ASSERT(MUTEX_HELD(&head->isaf_lock));
2410 
2411 	mutex_enter(&entry->ipsa_lock);
2412 
2413 	if ((entry->ipsa_state == IPSA_STATE_LARVAL) ||
2414 	    (ps->src != NULL &&
2415 	    !IPSA_ARE_ADDR_EQUAL(entry->ipsa_srcaddr, ps->src, ps->af)) ||
2416 	    (ps->dst != NULL &&
2417 	    !IPSA_ARE_ADDR_EQUAL(entry->ipsa_dstaddr, ps->dst, ps->af)) ||
2418 	    (ps->didstr != NULL && (entry->ipsa_dst_cid != NULL) &&
2419 	    !(ps->didtype == entry->ipsa_dst_cid->ipsid_type &&
2420 	    strcmp(ps->didstr, entry->ipsa_dst_cid->ipsid_cid) == 0)) ||
2421 	    (ps->sidstr != NULL && (entry->ipsa_src_cid != NULL) &&
2422 	    !(ps->sidtype == entry->ipsa_src_cid->ipsid_type &&
2423 	    strcmp(ps->sidstr, entry->ipsa_src_cid->ipsid_cid) == 0)) ||
2424 	    (ps->kmproto <= SADB_X_KMP_MAX && ps->kmproto != entry->ipsa_kmp)) {
2425 		mutex_exit(&entry->ipsa_lock);
2426 		return;
2427 	}
2428 
2429 	entry->ipsa_state = IPSA_STATE_DEAD;
2430 	(void) sadb_torch_assoc(head, entry, ps->inbnd, &ps->mq);
2431 }
2432 
2433 /*
2434  * Common code to purge an SA with a matching src or dst address.
2435  * Don't kill larval SA's in such a purge.
2436  */
2437 int
2438 sadb_purge_sa(mblk_t *mp, keysock_in_t *ksi, sadb_t *sp, queue_t *pfkey_q,
2439     queue_t *ip_q)
2440 {
2441 	sadb_address_t *dstext =
2442 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2443 	sadb_address_t *srcext =
2444 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2445 	sadb_ident_t *dstid =
2446 	    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_DST];
2447 	sadb_ident_t *srcid =
2448 	    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC];
2449 	sadb_x_kmc_t *kmc =
2450 	    (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
2451 	struct sockaddr_in *src, *dst;
2452 	struct sockaddr_in6 *src6, *dst6;
2453 	struct sadb_purge_state ps;
2454 
2455 	/*
2456 	 * Don't worry about IPv6 v4-mapped addresses, sadb_addrcheck()
2457 	 * takes care of them.
2458 	 */
2459 
2460 	/* enforced by caller */
2461 	ASSERT((dstext != NULL) || (srcext != NULL));
2462 
2463 	ps.src = NULL;
2464 	ps.dst = NULL;
2465 #ifdef DEBUG
2466 	ps.af = (sa_family_t)-1;
2467 #endif
2468 	ps.mq = NULL;
2469 	ps.sidstr = NULL;
2470 	ps.didstr = NULL;
2471 	ps.kmproto = SADB_X_KMP_MAX + 1;
2472 
2473 	if (dstext != NULL) {
2474 		dst = (struct sockaddr_in *)(dstext + 1);
2475 		ps.af = dst->sin_family;
2476 		if (dst->sin_family == AF_INET6) {
2477 			dst6 = (struct sockaddr_in6 *)dst;
2478 			ps.dst = (uint32_t *)&dst6->sin6_addr;
2479 		} else {
2480 			ps.dst = (uint32_t *)&dst->sin_addr;
2481 		}
2482 	}
2483 
2484 	if (srcext != NULL) {
2485 		src = (struct sockaddr_in *)(srcext + 1);
2486 		ps.af = src->sin_family;
2487 		if (src->sin_family == AF_INET6) {
2488 			src6 = (struct sockaddr_in6 *)(srcext + 1);
2489 			ps.src = (uint32_t *)&src6->sin6_addr;
2490 		} else {
2491 			ps.src = (uint32_t *)&src->sin_addr;
2492 		}
2493 		ASSERT(dstext == NULL || src->sin_family == dst->sin_family);
2494 	}
2495 
2496 	ASSERT(ps.af != (sa_family_t)-1);
2497 
2498 	if (dstid != NULL) {
2499 		/*
2500 		 * NOTE:  May need to copy string in the future
2501 		 * if the inbound keysock message disappears for some strange
2502 		 * reason.
2503 		 */
2504 		ps.didstr = (char *)(dstid + 1);
2505 		ps.didtype = dstid->sadb_ident_type;
2506 	}
2507 
2508 	if (srcid != NULL) {
2509 		/*
2510 		 * NOTE:  May need to copy string in the future
2511 		 * if the inbound keysock message disappears for some strange
2512 		 * reason.
2513 		 */
2514 		ps.sidstr = (char *)(srcid + 1);
2515 		ps.sidtype = srcid->sadb_ident_type;
2516 	}
2517 
2518 	if (kmc != NULL)
2519 		ps.kmproto = kmc->sadb_x_kmc_proto;
2520 
2521 	/*
2522 	 * This is simple, crude, and effective.
2523 	 * Unimplemented optimizations (TBD):
2524 	 * - we can limit how many places we search based on where we
2525 	 * think the SA is filed.
2526 	 * - if we get a dst address, we can hash based on dst addr to find
2527 	 * the correct bucket in the outbound table.
2528 	 */
2529 	ps.inbnd = B_TRUE;
2530 	sadb_walker(sp->sdb_if, sp->sdb_hashsize, sadb_purge_cb, &ps);
2531 	ps.inbnd = B_FALSE;
2532 	sadb_walker(sp->sdb_of, sp->sdb_hashsize, sadb_purge_cb, &ps);
2533 
2534 	if (ps.mq != NULL)
2535 		sadb_drain_torchq(ip_q, ps.mq);
2536 
2537 	ASSERT(mp->b_cont != NULL);
2538 	sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi,
2539 	    NULL);
2540 	return (0);
2541 }
2542 
2543 /*
2544  * Common code to delete/get an SA.
2545  */
2546 int
2547 sadb_delget_sa(mblk_t *mp, keysock_in_t *ksi, sadbp_t *spp,
2548     int *diagnostic, queue_t *pfkey_q, uint8_t sadb_msg_type)
2549 {
2550 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
2551 	sadb_address_t *srcext =
2552 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2553 	sadb_address_t *dstext =
2554 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2555 	ipsa_t *echo_target = NULL;
2556 	ipsap_t *ipsapp;
2557 	mblk_t *torchq = NULL;
2558 	uint_t	error = 0;
2559 
2560 	if (dstext == NULL) {
2561 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
2562 		return (EINVAL);
2563 	}
2564 	if (assoc == NULL) {
2565 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
2566 		return (EINVAL);
2567 	}
2568 
2569 	ipsapp = get_ipsa_pair(assoc, srcext, dstext, spp);
2570 	if (ipsapp == NULL) {
2571 		*diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND;
2572 		return (ESRCH);
2573 	}
2574 
2575 	echo_target = ipsapp->ipsap_sa_ptr;
2576 	if (echo_target == NULL)
2577 		echo_target = ipsapp->ipsap_psa_ptr;
2578 
2579 	if (sadb_msg_type == SADB_DELETE || sadb_msg_type == SADB_X_DELPAIR) {
2580 		/*
2581 		 * Bucket locks will be required if SA is actually unlinked.
2582 		 * get_ipsa_pair() returns valid hash bucket pointers even
2583 		 * if it can't find a pair SA pointer.
2584 		 */
2585 		mutex_enter(&ipsapp->ipsap_bucket->isaf_lock);
2586 		mutex_enter(&ipsapp->ipsap_pbucket->isaf_lock);
2587 
2588 		if (ipsapp->ipsap_sa_ptr != NULL) {
2589 			mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2590 			ipsapp->ipsap_sa_ptr->ipsa_state = IPSA_STATE_DEAD;
2591 			(void) sadb_torch_assoc(ipsapp->ipsap_bucket,
2592 			    ipsapp->ipsap_sa_ptr, B_FALSE, &torchq);
2593 			/*
2594 			 * sadb_torch_assoc() releases the ipsa_lock
2595 			 * and calls sadb_unlinkassoc() which does a
2596 			 * IPSA_REFRELE.
2597 			 */
2598 		}
2599 		if (ipsapp->ipsap_psa_ptr != NULL) {
2600 			mutex_enter(&ipsapp->ipsap_psa_ptr->ipsa_lock);
2601 			if (sadb_msg_type == SADB_X_DELPAIR) {
2602 				ipsapp->ipsap_psa_ptr->ipsa_state =
2603 				    IPSA_STATE_DEAD;
2604 				(void) sadb_torch_assoc(ipsapp->ipsap_pbucket,
2605 				    ipsapp->ipsap_psa_ptr, B_FALSE, &torchq);
2606 			} else {
2607 				/*
2608 				 * Only half of the "pair" has been deleted.
2609 				 * Update the remaining SA and remove references
2610 				 * to its pair SA, which is now gone.
2611 				 */
2612 				ipsapp->ipsap_psa_ptr->ipsa_otherspi = 0;
2613 				ipsapp->ipsap_psa_ptr->ipsa_flags &=
2614 				    ~IPSA_F_PAIRED;
2615 				mutex_exit(&ipsapp->ipsap_psa_ptr->ipsa_lock);
2616 			}
2617 		} else if (sadb_msg_type == SADB_X_DELPAIR) {
2618 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
2619 			error = ESRCH;
2620 		}
2621 		mutex_exit(&ipsapp->ipsap_bucket->isaf_lock);
2622 		mutex_exit(&ipsapp->ipsap_pbucket->isaf_lock);
2623 	}
2624 
2625 	if (torchq != NULL)
2626 		sadb_drain_torchq(spp->s_ip_q, torchq);
2627 
2628 	ASSERT(mp->b_cont != NULL);
2629 
2630 	if (error == 0)
2631 		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)
2632 		    mp->b_cont->b_rptr, ksi, echo_target);
2633 
2634 	destroy_ipsa_pair(ipsapp);
2635 
2636 	return (error);
2637 }
2638 
2639 /*
2640  * This function takes a sadb_sa_t and finds the ipsa_t structure
2641  * and the isaf_t (hash bucket) that its stored under. If the security
2642  * association has a peer, the ipsa_t structure and bucket for that security
2643  * association are also searched for. The "pair" of ipsa_t's and isaf_t's
2644  * are returned as a ipsap_t.
2645  *
2646  * Note that a "pair" is defined as one (but not both) of the following:
2647  *
2648  * A security association which has a soft reference to another security
2649  * association via its SPI.
2650  *
2651  * A security association that is not obviously "inbound" or "outbound" so
2652  * it appears in both hash tables, the "peer" being the same security
2653  * association in the other hash table.
2654  *
2655  * This function will return NULL if the ipsa_t can't be found in the
2656  * inbound or outbound  hash tables (not found). If only one ipsa_t is
2657  * found, the pair ipsa_t will be NULL. Both isaf_t values are valid
2658  * provided at least one ipsa_t is found.
2659  */
2660 ipsap_t *
2661 get_ipsa_pair(sadb_sa_t *assoc, sadb_address_t *srcext, sadb_address_t *dstext,
2662     sadbp_t *spp)
2663 {
2664 	struct sockaddr_in *src, *dst;
2665 	struct sockaddr_in6 *src6, *dst6;
2666 	sadb_t *sp;
2667 	uint32_t *srcaddr, *dstaddr;
2668 	isaf_t *outbound_bucket, *inbound_bucket;
2669 	boolean_t in_inbound_table = B_FALSE;
2670 	ipsap_t *ipsapp;
2671 	sa_family_t af;
2672 
2673 	uint32_t pair_srcaddr[IPSA_MAX_ADDRLEN];
2674 	uint32_t pair_dstaddr[IPSA_MAX_ADDRLEN];
2675 	uint32_t pair_spi;
2676 
2677 	ipsapp = kmem_zalloc(sizeof (*ipsapp), KM_NOSLEEP);
2678 	if (ipsapp == NULL)
2679 		return (NULL);
2680 
2681 	/*
2682 	 * Don't worry about IPv6 v4-mapped addresses, sadb_addrcheck()
2683 	 * takes care of them.
2684 	 */
2685 
2686 	dst = (struct sockaddr_in *)(dstext + 1);
2687 	af = dst->sin_family;
2688 	if (af == AF_INET6) {
2689 		sp = &spp->s_v6;
2690 		dst6 = (struct sockaddr_in6 *)dst;
2691 		dstaddr = (uint32_t *)&dst6->sin6_addr;
2692 		if (srcext != NULL) {
2693 			src6 = (struct sockaddr_in6 *)(srcext + 1);
2694 			srcaddr = (uint32_t *)&src6->sin6_addr;
2695 			ASSERT(src6->sin6_family == af);
2696 			ASSERT(src6->sin6_family == AF_INET6);
2697 		} else {
2698 			srcaddr = ALL_ZEROES_PTR;
2699 		}
2700 		outbound_bucket = OUTBOUND_BUCKET_V6(sp,
2701 		    *(uint32_t *)dstaddr);
2702 	} else {
2703 		sp = &spp->s_v4;
2704 		dstaddr = (uint32_t *)&dst->sin_addr;
2705 		if (srcext != NULL) {
2706 			src = (struct sockaddr_in *)(srcext + 1);
2707 			srcaddr = (uint32_t *)&src->sin_addr;
2708 			ASSERT(src->sin_family == af);
2709 			ASSERT(src->sin_family == AF_INET);
2710 		} else {
2711 			srcaddr = ALL_ZEROES_PTR;
2712 		}
2713 		outbound_bucket = OUTBOUND_BUCKET_V4(sp,
2714 		    *(uint32_t *)dstaddr);
2715 	}
2716 
2717 	inbound_bucket = INBOUND_BUCKET(sp, assoc->sadb_sa_spi);
2718 
2719 	/* Lock down both buckets. */
2720 	mutex_enter(&outbound_bucket->isaf_lock);
2721 	mutex_enter(&inbound_bucket->isaf_lock);
2722 
2723 	if (assoc->sadb_sa_flags & IPSA_F_INBOUND) {
2724 		ipsapp->ipsap_sa_ptr = ipsec_getassocbyspi(inbound_bucket,
2725 		    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2726 		if (ipsapp->ipsap_sa_ptr != NULL) {
2727 			ipsapp->ipsap_bucket = inbound_bucket;
2728 			ipsapp->ipsap_pbucket = outbound_bucket;
2729 			in_inbound_table = B_TRUE;
2730 		} else {
2731 			ipsapp->ipsap_sa_ptr =
2732 			    ipsec_getassocbyspi(outbound_bucket,
2733 			    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2734 			ipsapp->ipsap_bucket = outbound_bucket;
2735 			ipsapp->ipsap_pbucket = inbound_bucket;
2736 		}
2737 	} else {
2738 		/* IPSA_F_OUTBOUND is set *or* no directions flags set. */
2739 		ipsapp->ipsap_sa_ptr =
2740 		    ipsec_getassocbyspi(outbound_bucket,
2741 		    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2742 		if (ipsapp->ipsap_sa_ptr != NULL) {
2743 			ipsapp->ipsap_bucket = outbound_bucket;
2744 			ipsapp->ipsap_pbucket = inbound_bucket;
2745 		} else {
2746 			ipsapp->ipsap_sa_ptr =
2747 			    ipsec_getassocbyspi(inbound_bucket,
2748 			    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2749 			ipsapp->ipsap_bucket = inbound_bucket;
2750 			ipsapp->ipsap_pbucket = outbound_bucket;
2751 			if (ipsapp->ipsap_sa_ptr != NULL)
2752 				in_inbound_table = B_TRUE;
2753 		}
2754 	}
2755 
2756 	if (ipsapp->ipsap_sa_ptr == NULL) {
2757 		mutex_exit(&outbound_bucket->isaf_lock);
2758 		mutex_exit(&inbound_bucket->isaf_lock);
2759 		kmem_free(ipsapp, sizeof (*ipsapp));
2760 		return (NULL);
2761 	}
2762 
2763 	if ((ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) &&
2764 	    in_inbound_table) {
2765 		mutex_exit(&outbound_bucket->isaf_lock);
2766 		mutex_exit(&inbound_bucket->isaf_lock);
2767 		return (ipsapp);
2768 	}
2769 
2770 	mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2771 	if (ipsapp->ipsap_sa_ptr->ipsa_haspeer) {
2772 		/*
2773 		 * haspeer implies no sa_pairing, look for same spi
2774 		 * in other hashtable.
2775 		 */
2776 		ipsapp->ipsap_psa_ptr =
2777 		    ipsec_getassocbyspi(ipsapp->ipsap_pbucket,
2778 		    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2779 		mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2780 		mutex_exit(&outbound_bucket->isaf_lock);
2781 		mutex_exit(&inbound_bucket->isaf_lock);
2782 		return (ipsapp);
2783 	}
2784 	pair_spi = ipsapp->ipsap_sa_ptr->ipsa_otherspi;
2785 	IPSA_COPY_ADDR(&pair_srcaddr,
2786 	    ipsapp->ipsap_sa_ptr->ipsa_srcaddr, af);
2787 	IPSA_COPY_ADDR(&pair_dstaddr,
2788 	    ipsapp->ipsap_sa_ptr->ipsa_dstaddr, af);
2789 	mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2790 	mutex_exit(&outbound_bucket->isaf_lock);
2791 	mutex_exit(&inbound_bucket->isaf_lock);
2792 
2793 	if (pair_spi == 0) {
2794 		ASSERT(ipsapp->ipsap_bucket != NULL);
2795 		ASSERT(ipsapp->ipsap_pbucket != NULL);
2796 		return (ipsapp);
2797 	}
2798 
2799 	/* found sa in outbound sadb, peer should be inbound */
2800 
2801 	if (in_inbound_table) {
2802 		/* Found SA in inbound table, pair will be in outbound. */
2803 		if (af == AF_INET6) {
2804 			ipsapp->ipsap_pbucket = OUTBOUND_BUCKET_V6(sp,
2805 			    *(uint32_t *)pair_srcaddr);
2806 		} else {
2807 			ipsapp->ipsap_pbucket = OUTBOUND_BUCKET_V4(sp,
2808 			    *(uint32_t *)pair_srcaddr);
2809 		}
2810 	} else {
2811 		ipsapp->ipsap_pbucket = INBOUND_BUCKET(sp, pair_spi);
2812 	}
2813 	mutex_enter(&ipsapp->ipsap_pbucket->isaf_lock);
2814 	ipsapp->ipsap_psa_ptr = ipsec_getassocbyspi(ipsapp->ipsap_pbucket,
2815 	    pair_spi, pair_dstaddr, pair_srcaddr, af);
2816 	mutex_exit(&ipsapp->ipsap_pbucket->isaf_lock);
2817 
2818 	ASSERT(ipsapp->ipsap_bucket != NULL);
2819 	ASSERT(ipsapp->ipsap_pbucket != NULL);
2820 	return (ipsapp);
2821 }
2822 
2823 /*
2824  * Initialize the mechanism parameters associated with an SA.
2825  * These parameters can be shared by multiple packets, which saves
2826  * us from the overhead of consulting the algorithm table for
2827  * each packet.
2828  */
2829 static void
2830 sadb_init_alginfo(ipsa_t *sa)
2831 {
2832 	ipsec_alginfo_t *alg;
2833 	ipsec_stack_t	*ipss = sa->ipsa_netstack->netstack_ipsec;
2834 
2835 	mutex_enter(&ipss->ipsec_alg_lock);
2836 
2837 	if (sa->ipsa_encrkey != NULL) {
2838 		alg = ipss->ipsec_alglists[IPSEC_ALG_ENCR][sa->ipsa_encr_alg];
2839 		if (alg != NULL && ALG_VALID(alg)) {
2840 			sa->ipsa_emech.cm_type = alg->alg_mech_type;
2841 			sa->ipsa_emech.cm_param = NULL;
2842 			sa->ipsa_emech.cm_param_len = 0;
2843 			sa->ipsa_iv_len = alg->alg_datalen;
2844 		} else
2845 			sa->ipsa_emech.cm_type = CRYPTO_MECHANISM_INVALID;
2846 	}
2847 
2848 	if (sa->ipsa_authkey != NULL) {
2849 		alg = ipss->ipsec_alglists[IPSEC_ALG_AUTH][sa->ipsa_auth_alg];
2850 		if (alg != NULL && ALG_VALID(alg)) {
2851 			sa->ipsa_amech.cm_type = alg->alg_mech_type;
2852 			sa->ipsa_amech.cm_param = (char *)&sa->ipsa_mac_len;
2853 			sa->ipsa_amech.cm_param_len = sizeof (size_t);
2854 			sa->ipsa_mac_len = (size_t)alg->alg_datalen;
2855 		} else
2856 			sa->ipsa_amech.cm_type = CRYPTO_MECHANISM_INVALID;
2857 	}
2858 
2859 	mutex_exit(&ipss->ipsec_alg_lock);
2860 }
2861 
2862 /*
2863  * Perform NAT-traversal cached checksum offset calculations here.
2864  */
2865 static void
2866 sadb_nat_calculations(ipsa_t *newbie, sadb_address_t *natt_loc_ext,
2867     sadb_address_t *natt_rem_ext, uint32_t *src_addr_ptr,
2868     uint32_t *dst_addr_ptr)
2869 {
2870 	struct sockaddr_in *natt_loc, *natt_rem;
2871 	uint32_t *natt_loc_ptr = NULL, *natt_rem_ptr = NULL;
2872 	uint32_t running_sum = 0;
2873 
2874 #define	DOWN_SUM(x) (x) = ((x) & 0xFFFF) +	 ((x) >> 16)
2875 
2876 	if (natt_rem_ext != NULL) {
2877 		uint32_t l_src;
2878 		uint32_t l_rem;
2879 
2880 		natt_rem = (struct sockaddr_in *)(natt_rem_ext + 1);
2881 
2882 		/* Ensured by sadb_addrfix(). */
2883 		ASSERT(natt_rem->sin_family == AF_INET);
2884 
2885 		natt_rem_ptr = (uint32_t *)(&natt_rem->sin_addr);
2886 		newbie->ipsa_remote_nat_port = natt_rem->sin_port;
2887 		l_src = *src_addr_ptr;
2888 		l_rem = *natt_rem_ptr;
2889 
2890 		/* Instead of IPSA_COPY_ADDR(), just copy first 32 bits. */
2891 		newbie->ipsa_natt_addr_rem = *natt_rem_ptr;
2892 
2893 		l_src = ntohl(l_src);
2894 		DOWN_SUM(l_src);
2895 		DOWN_SUM(l_src);
2896 		l_rem = ntohl(l_rem);
2897 		DOWN_SUM(l_rem);
2898 		DOWN_SUM(l_rem);
2899 
2900 		/*
2901 		 * We're 1's complement for checksums, so check for wraparound
2902 		 * here.
2903 		 */
2904 		if (l_rem > l_src)
2905 			l_src--;
2906 
2907 		running_sum += l_src - l_rem;
2908 
2909 		DOWN_SUM(running_sum);
2910 		DOWN_SUM(running_sum);
2911 	}
2912 
2913 	if (natt_loc_ext != NULL) {
2914 		natt_loc = (struct sockaddr_in *)(natt_loc_ext + 1);
2915 
2916 		/* Ensured by sadb_addrfix(). */
2917 		ASSERT(natt_loc->sin_family == AF_INET);
2918 
2919 		natt_loc_ptr = (uint32_t *)(&natt_loc->sin_addr);
2920 		newbie->ipsa_local_nat_port = natt_loc->sin_port;
2921 
2922 		/* Instead of IPSA_COPY_ADDR(), just copy first 32 bits. */
2923 		newbie->ipsa_natt_addr_loc = *natt_loc_ptr;
2924 
2925 		/*
2926 		 * NAT-T port agility means we may have natt_loc_ext, but
2927 		 * only for a local-port change.
2928 		 */
2929 		if (natt_loc->sin_addr.s_addr != INADDR_ANY) {
2930 			uint32_t l_dst = ntohl(*dst_addr_ptr);
2931 			uint32_t l_loc = ntohl(*natt_loc_ptr);
2932 
2933 			DOWN_SUM(l_loc);
2934 			DOWN_SUM(l_loc);
2935 			DOWN_SUM(l_dst);
2936 			DOWN_SUM(l_dst);
2937 
2938 			/*
2939 			 * We're 1's complement for checksums, so check for
2940 			 * wraparound here.
2941 			 */
2942 			if (l_loc > l_dst)
2943 				l_dst--;
2944 
2945 			running_sum += l_dst - l_loc;
2946 			DOWN_SUM(running_sum);
2947 			DOWN_SUM(running_sum);
2948 		}
2949 	}
2950 
2951 	newbie->ipsa_inbound_cksum = running_sum;
2952 #undef DOWN_SUM
2953 }
2954 
2955 /*
2956  * This function is called from consumers that need to insert a fully-grown
2957  * security association into its tables.  This function takes into account that
2958  * SAs can be "inbound", "outbound", or "both".	 The "primary" and "secondary"
2959  * hash bucket parameters are set in order of what the SA will be most of the
2960  * time.  (For example, an SA with an unspecified source, and a multicast
2961  * destination will primarily be an outbound SA.  OTOH, if that destination
2962  * is unicast for this node, then the SA will primarily be inbound.)
2963  *
2964  * It takes a lot of parameters because even if clone is B_FALSE, this needs
2965  * to check both buckets for purposes of collision.
2966  *
2967  * Return 0 upon success.  Return various errnos (ENOMEM, EEXIST) for
2968  * various error conditions.  We may need to set samsg->sadb_x_msg_diagnostic
2969  * with additional diagnostic information because there is at least one EINVAL
2970  * case here.
2971  */
2972 int
2973 sadb_common_add(queue_t *ip_q, queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg,
2974     keysock_in_t *ksi, isaf_t *primary, isaf_t *secondary,
2975     ipsa_t *newbie, boolean_t clone, boolean_t is_inbound, int *diagnostic,
2976     netstack_t *ns, sadbp_t *spp)
2977 {
2978 	ipsa_t *newbie_clone = NULL, *scratch;
2979 	ipsap_t *ipsapp = NULL;
2980 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
2981 	sadb_address_t *srcext =
2982 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2983 	sadb_address_t *dstext =
2984 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2985 	sadb_address_t *isrcext =
2986 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC];
2987 	sadb_address_t *idstext =
2988 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST];
2989 	sadb_x_kmc_t *kmcext =
2990 	    (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
2991 	sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
2992 	sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
2993 	sadb_x_pair_t *pair_ext =
2994 	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
2995 #if 0
2996 	/*
2997 	 * XXXMLS - When Trusted Solaris or Multi-Level Secure functionality
2998 	 * comes to ON, examine these if 0'ed fragments.  Look for XXXMLS.
2999 	 */
3000 	sadb_sens_t *sens = (sadb_sens_t *);
3001 #endif
3002 	struct sockaddr_in *src, *dst, *isrc, *idst;
3003 	struct sockaddr_in6 *src6, *dst6, *isrc6, *idst6;
3004 	sadb_lifetime_t *soft =
3005 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
3006 	sadb_lifetime_t *hard =
3007 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
3008 	sa_family_t af;
3009 	int error = 0;
3010 	boolean_t isupdate = (newbie != NULL);
3011 	uint32_t *src_addr_ptr, *dst_addr_ptr, *isrc_addr_ptr, *idst_addr_ptr;
3012 	mblk_t *ctl_mp = NULL;
3013 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
3014 
3015 	if (srcext == NULL) {
3016 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
3017 		return (EINVAL);
3018 	}
3019 	if (dstext == NULL) {
3020 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
3021 		return (EINVAL);
3022 	}
3023 	if (assoc == NULL) {
3024 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
3025 		return (EINVAL);
3026 	}
3027 
3028 	src = (struct sockaddr_in *)(srcext + 1);
3029 	src6 = (struct sockaddr_in6 *)(srcext + 1);
3030 	dst = (struct sockaddr_in *)(dstext + 1);
3031 	dst6 = (struct sockaddr_in6 *)(dstext + 1);
3032 	if (isrcext != NULL) {
3033 		isrc = (struct sockaddr_in *)(isrcext + 1);
3034 		isrc6 = (struct sockaddr_in6 *)(isrcext + 1);
3035 		ASSERT(idstext != NULL);
3036 		idst = (struct sockaddr_in *)(idstext + 1);
3037 		idst6 = (struct sockaddr_in6 *)(idstext + 1);
3038 	} else {
3039 		isrc = NULL;
3040 		isrc6 = NULL;
3041 	}
3042 
3043 	af = src->sin_family;
3044 
3045 	if (af == AF_INET) {
3046 		src_addr_ptr = (uint32_t *)&src->sin_addr;
3047 		dst_addr_ptr = (uint32_t *)&dst->sin_addr;
3048 	} else {
3049 		ASSERT(af == AF_INET6);
3050 		src_addr_ptr = (uint32_t *)&src6->sin6_addr;
3051 		dst_addr_ptr = (uint32_t *)&dst6->sin6_addr;
3052 	}
3053 
3054 	/*
3055 	 * Check to see if the new SA will be cloned AND paired. The
3056 	 * reason a SA will be cloned is the source or destination addresses
3057 	 * are not specific enough to determine if the SA goes in the outbound
3058 	 * or the inbound hash table, so its cloned and put in both. If
3059 	 * the SA is paired, it's soft linked to another SA for the other
3060 	 * direction. Keeping track and looking up SA's that are direction
3061 	 * unspecific and linked is too hard.
3062 	 */
3063 	if (clone && (pair_ext != NULL)) {
3064 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
3065 		return (EINVAL);
3066 	}
3067 
3068 	if (!isupdate) {
3069 		newbie = sadb_makelarvalassoc(assoc->sadb_sa_spi,
3070 		    src_addr_ptr, dst_addr_ptr, af, ns);
3071 		if (newbie == NULL)
3072 			return (ENOMEM);
3073 	}
3074 
3075 	mutex_enter(&newbie->ipsa_lock);
3076 
3077 	if (isrc != NULL) {
3078 		if (isrc->sin_family == AF_INET) {
3079 			if (srcext->sadb_address_proto != IPPROTO_ENCAP) {
3080 				if (srcext->sadb_address_proto != 0) {
3081 					/*
3082 					 * Mismatched outer-packet protocol
3083 					 * and inner-packet address family.
3084 					 */
3085 					mutex_exit(&newbie->ipsa_lock);
3086 					error = EPROTOTYPE;
3087 					goto error;
3088 				} else {
3089 					/* Fill in with explicit protocol. */
3090 					srcext->sadb_address_proto =
3091 					    IPPROTO_ENCAP;
3092 					dstext->sadb_address_proto =
3093 					    IPPROTO_ENCAP;
3094 				}
3095 			}
3096 			isrc_addr_ptr = (uint32_t *)&isrc->sin_addr;
3097 			idst_addr_ptr = (uint32_t *)&idst->sin_addr;
3098 		} else {
3099 			ASSERT(isrc->sin_family == AF_INET6);
3100 			if (srcext->sadb_address_proto != IPPROTO_IPV6) {
3101 				if (srcext->sadb_address_proto != 0) {
3102 					/*
3103 					 * Mismatched outer-packet protocol
3104 					 * and inner-packet address family.
3105 					 */
3106 					mutex_exit(&newbie->ipsa_lock);
3107 					error = EPROTOTYPE;
3108 					goto error;
3109 				} else {
3110 					/* Fill in with explicit protocol. */
3111 					srcext->sadb_address_proto =
3112 					    IPPROTO_IPV6;
3113 					dstext->sadb_address_proto =
3114 					    IPPROTO_IPV6;
3115 				}
3116 			}
3117 			isrc_addr_ptr = (uint32_t *)&isrc6->sin6_addr;
3118 			idst_addr_ptr = (uint32_t *)&idst6->sin6_addr;
3119 		}
3120 		newbie->ipsa_innerfam = isrc->sin_family;
3121 
3122 		IPSA_COPY_ADDR(newbie->ipsa_innersrc, isrc_addr_ptr,
3123 		    newbie->ipsa_innerfam);
3124 		IPSA_COPY_ADDR(newbie->ipsa_innerdst, idst_addr_ptr,
3125 		    newbie->ipsa_innerfam);
3126 		newbie->ipsa_innersrcpfx = isrcext->sadb_address_prefixlen;
3127 		newbie->ipsa_innerdstpfx = idstext->sadb_address_prefixlen;
3128 
3129 		/* Unique value uses inner-ports for Tunnel Mode... */
3130 		newbie->ipsa_unique_id = SA_UNIQUE_ID(isrc->sin_port,
3131 		    idst->sin_port, dstext->sadb_address_proto,
3132 		    idstext->sadb_address_proto);
3133 		newbie->ipsa_unique_mask = SA_UNIQUE_MASK(isrc->sin_port,
3134 		    idst->sin_port, dstext->sadb_address_proto,
3135 		    idstext->sadb_address_proto);
3136 	} else {
3137 		/* ... and outer-ports for Transport Mode. */
3138 		newbie->ipsa_unique_id = SA_UNIQUE_ID(src->sin_port,
3139 		    dst->sin_port, dstext->sadb_address_proto, 0);
3140 		newbie->ipsa_unique_mask = SA_UNIQUE_MASK(src->sin_port,
3141 		    dst->sin_port, dstext->sadb_address_proto, 0);
3142 	}
3143 	if (newbie->ipsa_unique_mask != (uint64_t)0)
3144 		newbie->ipsa_flags |= IPSA_F_UNIQUE;
3145 
3146 	sadb_nat_calculations(newbie,
3147 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC],
3148 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM],
3149 	    src_addr_ptr, dst_addr_ptr);
3150 
3151 	newbie->ipsa_type = samsg->sadb_msg_satype;
3152 	ASSERT(assoc->sadb_sa_state == SADB_SASTATE_MATURE);
3153 	newbie->ipsa_auth_alg = assoc->sadb_sa_auth;
3154 	newbie->ipsa_encr_alg = assoc->sadb_sa_encrypt;
3155 
3156 	newbie->ipsa_flags |= assoc->sadb_sa_flags;
3157 	if ((newbie->ipsa_flags & SADB_X_SAFLAGS_NATT_LOC &&
3158 	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC] == NULL) ||
3159 	    (newbie->ipsa_flags & SADB_X_SAFLAGS_NATT_REM &&
3160 	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM] == NULL) ||
3161 	    (newbie->ipsa_flags & SADB_X_SAFLAGS_TUNNEL &&
3162 	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC] == NULL)) {
3163 		mutex_exit(&newbie->ipsa_lock);
3164 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
3165 		error = EINVAL;
3166 		goto error;
3167 	}
3168 	/*
3169 	 * If unspecified source address, force replay_wsize to 0.
3170 	 * This is because an SA that has multiple sources of secure
3171 	 * traffic cannot enforce a replay counter w/o synchronizing the
3172 	 * senders.
3173 	 */
3174 	if (ksi->ks_in_srctype != KS_IN_ADDR_UNSPEC)
3175 		newbie->ipsa_replay_wsize = assoc->sadb_sa_replay;
3176 	else
3177 		newbie->ipsa_replay_wsize = 0;
3178 
3179 	newbie->ipsa_addtime = gethrestime_sec();
3180 
3181 	if (kmcext != NULL) {
3182 		newbie->ipsa_kmp = kmcext->sadb_x_kmc_proto;
3183 		newbie->ipsa_kmc = kmcext->sadb_x_kmc_cookie;
3184 	}
3185 
3186 	/*
3187 	 * XXX CURRENT lifetime checks MAY BE needed for an UPDATE.
3188 	 * The spec says that one can update current lifetimes, but
3189 	 * that seems impractical, especially in the larval-to-mature
3190 	 * update that this function performs.
3191 	 */
3192 	if (soft != NULL) {
3193 		newbie->ipsa_softaddlt = soft->sadb_lifetime_addtime;
3194 		newbie->ipsa_softuselt = soft->sadb_lifetime_usetime;
3195 		newbie->ipsa_softbyteslt = soft->sadb_lifetime_bytes;
3196 		newbie->ipsa_softalloc = soft->sadb_lifetime_allocations;
3197 		SET_EXPIRE(newbie, softaddlt, softexpiretime);
3198 	}
3199 	if (hard != NULL) {
3200 		newbie->ipsa_hardaddlt = hard->sadb_lifetime_addtime;
3201 		newbie->ipsa_harduselt = hard->sadb_lifetime_usetime;
3202 		newbie->ipsa_hardbyteslt = hard->sadb_lifetime_bytes;
3203 		newbie->ipsa_hardalloc = hard->sadb_lifetime_allocations;
3204 		SET_EXPIRE(newbie, hardaddlt, hardexpiretime);
3205 	}
3206 
3207 	newbie->ipsa_authtmpl = NULL;
3208 	newbie->ipsa_encrtmpl = NULL;
3209 
3210 	if (akey != NULL) {
3211 		newbie->ipsa_authkeybits = akey->sadb_key_bits;
3212 		newbie->ipsa_authkeylen = SADB_1TO8(akey->sadb_key_bits);
3213 		/* In case we have to round up to the next byte... */
3214 		if ((akey->sadb_key_bits & 0x7) != 0)
3215 			newbie->ipsa_authkeylen++;
3216 		newbie->ipsa_authkey = kmem_alloc(newbie->ipsa_authkeylen,
3217 		    KM_NOSLEEP);
3218 		if (newbie->ipsa_authkey == NULL) {
3219 			error = ENOMEM;
3220 			mutex_exit(&newbie->ipsa_lock);
3221 			goto error;
3222 		}
3223 		bcopy(akey + 1, newbie->ipsa_authkey, newbie->ipsa_authkeylen);
3224 		bzero(akey + 1, newbie->ipsa_authkeylen);
3225 
3226 		/*
3227 		 * Pre-initialize the kernel crypto framework key
3228 		 * structure.
3229 		 */
3230 		newbie->ipsa_kcfauthkey.ck_format = CRYPTO_KEY_RAW;
3231 		newbie->ipsa_kcfauthkey.ck_length = newbie->ipsa_authkeybits;
3232 		newbie->ipsa_kcfauthkey.ck_data = newbie->ipsa_authkey;
3233 
3234 		mutex_enter(&ipss->ipsec_alg_lock);
3235 		error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_AUTH);
3236 		mutex_exit(&ipss->ipsec_alg_lock);
3237 		if (error != 0) {
3238 			mutex_exit(&newbie->ipsa_lock);
3239 			goto error;
3240 		}
3241 	}
3242 
3243 	if (ekey != NULL) {
3244 		newbie->ipsa_encrkeybits = ekey->sadb_key_bits;
3245 		newbie->ipsa_encrkeylen = SADB_1TO8(ekey->sadb_key_bits);
3246 		/* In case we have to round up to the next byte... */
3247 		if ((ekey->sadb_key_bits & 0x7) != 0)
3248 			newbie->ipsa_encrkeylen++;
3249 		newbie->ipsa_encrkey = kmem_alloc(newbie->ipsa_encrkeylen,
3250 		    KM_NOSLEEP);
3251 		if (newbie->ipsa_encrkey == NULL) {
3252 			error = ENOMEM;
3253 			mutex_exit(&newbie->ipsa_lock);
3254 			goto error;
3255 		}
3256 		bcopy(ekey + 1, newbie->ipsa_encrkey, newbie->ipsa_encrkeylen);
3257 		/* XXX is this safe w.r.t db_ref, etc? */
3258 		bzero(ekey + 1, newbie->ipsa_encrkeylen);
3259 
3260 		/*
3261 		 * Pre-initialize the kernel crypto framework key
3262 		 * structure.
3263 		 */
3264 		newbie->ipsa_kcfencrkey.ck_format = CRYPTO_KEY_RAW;
3265 		newbie->ipsa_kcfencrkey.ck_length = newbie->ipsa_encrkeybits;
3266 		newbie->ipsa_kcfencrkey.ck_data = newbie->ipsa_encrkey;
3267 
3268 		mutex_enter(&ipss->ipsec_alg_lock);
3269 		error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_ENCR);
3270 		mutex_exit(&ipss->ipsec_alg_lock);
3271 		if (error != 0) {
3272 			mutex_exit(&newbie->ipsa_lock);
3273 			goto error;
3274 		}
3275 	}
3276 
3277 	sadb_init_alginfo(newbie);
3278 
3279 	/*
3280 	 * Ptrs to processing functions.
3281 	 */
3282 	if (newbie->ipsa_type == SADB_SATYPE_ESP)
3283 		ipsecesp_init_funcs(newbie);
3284 	else
3285 		ipsecah_init_funcs(newbie);
3286 	ASSERT(newbie->ipsa_output_func != NULL &&
3287 	    newbie->ipsa_input_func != NULL);
3288 
3289 	/*
3290 	 * Certificate ID stuff.
3291 	 */
3292 	if (ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC] != NULL) {
3293 		sadb_ident_t *id =
3294 		    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC];
3295 
3296 		/*
3297 		 * Can assume strlen() will return okay because ext_check() in
3298 		 * keysock.c prepares the string for us.
3299 		 */
3300 		newbie->ipsa_src_cid = ipsid_lookup(id->sadb_ident_type,
3301 		    (char *)(id+1), ns);
3302 		if (newbie->ipsa_src_cid == NULL) {
3303 			error = ENOMEM;
3304 			mutex_exit(&newbie->ipsa_lock);
3305 			goto error;
3306 		}
3307 	}
3308 
3309 	if (ksi->ks_in_extv[SADB_EXT_IDENTITY_DST] != NULL) {
3310 		sadb_ident_t *id =
3311 		    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_DST];
3312 
3313 		/*
3314 		 * Can assume strlen() will return okay because ext_check() in
3315 		 * keysock.c prepares the string for us.
3316 		 */
3317 		newbie->ipsa_dst_cid = ipsid_lookup(id->sadb_ident_type,
3318 		    (char *)(id+1), ns);
3319 		if (newbie->ipsa_dst_cid == NULL) {
3320 			error = ENOMEM;
3321 			mutex_exit(&newbie->ipsa_lock);
3322 			goto error;
3323 		}
3324 	}
3325 
3326 #if 0
3327 	/* XXXMLS  SENSITIVITY handling code. */
3328 	if (sens != NULL) {
3329 		int i;
3330 		uint64_t *bitmap = (uint64_t *)(sens + 1);
3331 
3332 		newbie->ipsa_dpd = sens->sadb_sens_dpd;
3333 		newbie->ipsa_senslevel = sens->sadb_sens_sens_level;
3334 		newbie->ipsa_integlevel = sens->sadb_sens_integ_level;
3335 		newbie->ipsa_senslen = SADB_64TO8(sens->sadb_sens_sens_len);
3336 		newbie->ipsa_integlen = SADB_64TO8(sens->sadb_sens_integ_len);
3337 		newbie->ipsa_integ = kmem_alloc(newbie->ipsa_integlen,
3338 		    KM_NOSLEEP);
3339 		if (newbie->ipsa_integ == NULL) {
3340 			error = ENOMEM;
3341 			mutex_exit(&newbie->ipsa_lock);
3342 			goto error;
3343 		}
3344 		newbie->ipsa_sens = kmem_alloc(newbie->ipsa_senslen,
3345 		    KM_NOSLEEP);
3346 		if (newbie->ipsa_sens == NULL) {
3347 			error = ENOMEM;
3348 			mutex_exit(&newbie->ipsa_lock);
3349 			goto error;
3350 		}
3351 		for (i = 0; i < sens->sadb_sens_sens_len; i++) {
3352 			newbie->ipsa_sens[i] = *bitmap;
3353 			bitmap++;
3354 		}
3355 		for (i = 0; i < sens->sadb_sens_integ_len; i++) {
3356 			newbie->ipsa_integ[i] = *bitmap;
3357 			bitmap++;
3358 		}
3359 	}
3360 
3361 #endif
3362 
3363 	/* now that the SA has been updated, set its new state */
3364 	newbie->ipsa_state = assoc->sadb_sa_state;
3365 
3366 	if (clone) {
3367 		newbie->ipsa_haspeer = B_TRUE;
3368 	} else {
3369 		if (!is_inbound) {
3370 			lifetime_fuzz(newbie);
3371 		}
3372 	}
3373 	/*
3374 	 * The less locks I hold when doing an insertion and possible cloning,
3375 	 * the better!
3376 	 */
3377 	mutex_exit(&newbie->ipsa_lock);
3378 
3379 	if (clone) {
3380 		newbie_clone = sadb_cloneassoc(newbie);
3381 
3382 		if (newbie_clone == NULL) {
3383 			error = ENOMEM;
3384 			goto error;
3385 		}
3386 	}
3387 
3388 	/*
3389 	 * Enter the bucket locks.  The order of entry is outbound,
3390 	 * inbound.  We map "primary" and "secondary" into outbound and inbound
3391 	 * based on the destination address type.  If the destination address
3392 	 * type is for a node that isn't mine (or potentially mine), the
3393 	 * "primary" bucket is the outbound one.
3394 	 */
3395 	if (!is_inbound) {
3396 		/* primary == outbound */
3397 		mutex_enter(&primary->isaf_lock);
3398 		mutex_enter(&secondary->isaf_lock);
3399 	} else {
3400 		/* primary == inbound */
3401 		mutex_enter(&secondary->isaf_lock);
3402 		mutex_enter(&primary->isaf_lock);
3403 	}
3404 
3405 	IPSECHW_DEBUG(IPSECHW_SADB, ("sadb_common_add: spi = 0x%x\n",
3406 	    newbie->ipsa_spi));
3407 
3408 	/*
3409 	 * sadb_insertassoc() doesn't increment the reference
3410 	 * count.  We therefore have to increment the
3411 	 * reference count one more time to reflect the
3412 	 * pointers of the table that reference this SA.
3413 	 */
3414 	IPSA_REFHOLD(newbie);
3415 
3416 	if (isupdate) {
3417 		/*
3418 		 * Unlink from larval holding cell in the "inbound" fanout.
3419 		 */
3420 		ASSERT(newbie->ipsa_linklock == &primary->isaf_lock ||
3421 		    newbie->ipsa_linklock == &secondary->isaf_lock);
3422 		sadb_unlinkassoc(newbie);
3423 	}
3424 
3425 	mutex_enter(&newbie->ipsa_lock);
3426 	error = sadb_insertassoc(newbie, primary);
3427 	if (error == 0) {
3428 		ctl_mp = sadb_fmt_sa_req(DL_CO_SET, newbie->ipsa_type, newbie,
3429 		    is_inbound);
3430 	}
3431 	mutex_exit(&newbie->ipsa_lock);
3432 
3433 	if (error != 0) {
3434 		/*
3435 		 * Since sadb_insertassoc() failed, we must decrement the
3436 		 * refcount again so the cleanup code will actually free
3437 		 * the offending SA.
3438 		 */
3439 		IPSA_REFRELE(newbie);
3440 		goto error_unlock;
3441 	}
3442 
3443 	if (newbie_clone != NULL) {
3444 		mutex_enter(&newbie_clone->ipsa_lock);
3445 		error = sadb_insertassoc(newbie_clone, secondary);
3446 		mutex_exit(&newbie_clone->ipsa_lock);
3447 		if (error != 0) {
3448 			/* Collision in secondary table. */
3449 			sadb_unlinkassoc(newbie);  /* This does REFRELE. */
3450 			goto error_unlock;
3451 		}
3452 		IPSA_REFHOLD(newbie_clone);
3453 	} else {
3454 		ASSERT(primary != secondary);
3455 		scratch = ipsec_getassocbyspi(secondary, newbie->ipsa_spi,
3456 		    ALL_ZEROES_PTR, newbie->ipsa_dstaddr, af);
3457 		if (scratch != NULL) {
3458 			/* Collision in secondary table. */
3459 			sadb_unlinkassoc(newbie);  /* This does REFRELE. */
3460 			/* Set the error, since ipsec_getassocbyspi() can't. */
3461 			error = EEXIST;
3462 			goto error_unlock;
3463 		}
3464 	}
3465 
3466 	/* OKAY!  So let's do some reality check assertions. */
3467 
3468 	ASSERT(!MUTEX_HELD(&newbie->ipsa_lock));
3469 	ASSERT(newbie_clone == NULL || (!MUTEX_HELD(&newbie_clone->ipsa_lock)));
3470 	/*
3471 	 * If hardware acceleration could happen, send it.
3472 	 */
3473 	if (ctl_mp != NULL) {
3474 		putnext(ip_q, ctl_mp);
3475 		ctl_mp = NULL;
3476 	}
3477 
3478 error_unlock:
3479 
3480 	/*
3481 	 * We can exit the locks in any order.	Only entrance needs to
3482 	 * follow any protocol.
3483 	 */
3484 	mutex_exit(&secondary->isaf_lock);
3485 	mutex_exit(&primary->isaf_lock);
3486 
3487 	if (pair_ext != NULL && error == 0) {
3488 		/* update pair_spi if it exists. */
3489 		ipsapp = get_ipsa_pair(assoc, srcext, dstext, spp);
3490 		if (ipsapp == NULL) {
3491 			error = ESRCH;
3492 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
3493 		} else if (ipsapp->ipsap_psa_ptr != NULL) {
3494 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_ALREADY;
3495 			error = EINVAL;
3496 		} else {
3497 			/* update_pairing() sets diagnostic */
3498 			error = update_pairing(ipsapp, ksi, diagnostic, spp);
3499 		}
3500 	}
3501 	/* Common error point for this routine. */
3502 error:
3503 	if (newbie != NULL) {
3504 		if (error != 0) {
3505 			/* This SA is broken, let the reaper clean up. */
3506 			mutex_enter(&newbie->ipsa_lock);
3507 			newbie->ipsa_state = IPSA_STATE_DEAD;
3508 			newbie->ipsa_hardexpiretime = 1;
3509 			mutex_exit(&newbie->ipsa_lock);
3510 		}
3511 		IPSA_REFRELE(newbie);
3512 	}
3513 	if (newbie_clone != NULL) {
3514 		IPSA_REFRELE(newbie_clone);
3515 	}
3516 	if (ctl_mp != NULL)
3517 		freemsg(ctl_mp);
3518 
3519 	if (error == 0) {
3520 		/*
3521 		 * Construct favorable PF_KEY return message and send to
3522 		 * keysock. Update the flags in the original keysock message
3523 		 * to reflect the actual flags in the new SA.
3524 		 *  (Q:  Do I need to pass "newbie"?  If I do,
3525 		 * make sure to REFHOLD, call, then REFRELE.)
3526 		 */
3527 		assoc->sadb_sa_flags = newbie->ipsa_flags;
3528 		sadb_pfkey_echo(pfkey_q, mp, samsg, ksi, NULL);
3529 	}
3530 
3531 	destroy_ipsa_pair(ipsapp);
3532 	return (error);
3533 }
3534 
3535 /*
3536  * Set the time of first use for a security association.  Update any
3537  * expiration times as a result.
3538  */
3539 void
3540 sadb_set_usetime(ipsa_t *assoc)
3541 {
3542 	time_t snapshot = gethrestime_sec();
3543 
3544 	mutex_enter(&assoc->ipsa_lock);
3545 	assoc->ipsa_lastuse = snapshot;
3546 	/*
3547 	 * Caller does check usetime before calling me usually, and
3548 	 * double-checking is better than a mutex_enter/exit hit.
3549 	 */
3550 	if (assoc->ipsa_usetime == 0) {
3551 		/*
3552 		 * This is redundant for outbound SA's, as
3553 		 * ipsec_getassocbyconn() sets the IPSA_F_USED flag already.
3554 		 * Inbound SAs, however, have no such protection.
3555 		 */
3556 		assoc->ipsa_flags |= IPSA_F_USED;
3557 		assoc->ipsa_usetime = snapshot;
3558 
3559 		/*
3560 		 * After setting the use time, see if we have a use lifetime
3561 		 * that would cause the actual SA expiration time to shorten.
3562 		 */
3563 		UPDATE_EXPIRE(assoc, softuselt, softexpiretime);
3564 		UPDATE_EXPIRE(assoc, harduselt, hardexpiretime);
3565 	}
3566 	mutex_exit(&assoc->ipsa_lock);
3567 }
3568 
3569 /*
3570  * Send up a PF_KEY expire message for this association.
3571  */
3572 static void
3573 sadb_expire_assoc(queue_t *pfkey_q, ipsa_t *assoc)
3574 {
3575 	mblk_t *mp, *mp1;
3576 	int alloclen, af;
3577 	sadb_msg_t *samsg;
3578 	sadb_lifetime_t *current, *expire;
3579 	sadb_sa_t *saext;
3580 	uint8_t *end;
3581 	boolean_t tunnel_mode;
3582 
3583 	ASSERT(MUTEX_HELD(&assoc->ipsa_lock));
3584 
3585 	/* Don't bother sending if there's no queue. */
3586 	if (pfkey_q == NULL)
3587 		return;
3588 
3589 	/* If the SA is one of a pair, only SOFT expire the OUTBOUND SA */
3590 	if (assoc->ipsa_state == IPSA_STATE_DYING &&
3591 	    (assoc->ipsa_flags & IPSA_F_PAIRED) &&
3592 	    !(assoc->ipsa_flags & IPSA_F_OUTBOUND)) {
3593 		return;
3594 	}
3595 
3596 	mp = sadb_keysock_out(0);
3597 	if (mp == NULL) {
3598 		/* cmn_err(CE_WARN, */
3599 		/*	"sadb_expire_assoc: Can't allocate KEYSOCK_OUT.\n"); */
3600 		return;
3601 	}
3602 
3603 	alloclen = sizeof (*samsg) + sizeof (*current) + sizeof (*expire) +
3604 	    2 * sizeof (sadb_address_t) + sizeof (*saext);
3605 
3606 	af = assoc->ipsa_addrfam;
3607 	switch (af) {
3608 	case AF_INET:
3609 		alloclen += 2 * sizeof (struct sockaddr_in);
3610 		break;
3611 	case AF_INET6:
3612 		alloclen += 2 * sizeof (struct sockaddr_in6);
3613 		break;
3614 	default:
3615 		/* Won't happen unless there's a kernel bug. */
3616 		freeb(mp);
3617 		cmn_err(CE_WARN,
3618 		    "sadb_expire_assoc: Unknown address length.\n");
3619 		return;
3620 	}
3621 
3622 	tunnel_mode = (assoc->ipsa_flags & IPSA_F_TUNNEL);
3623 	if (tunnel_mode) {
3624 		alloclen += 2 * sizeof (sadb_address_t);
3625 		switch (assoc->ipsa_innerfam) {
3626 		case AF_INET:
3627 			alloclen += 2 * sizeof (struct sockaddr_in);
3628 			break;
3629 		case AF_INET6:
3630 			alloclen += 2 * sizeof (struct sockaddr_in6);
3631 			break;
3632 		default:
3633 			/* Won't happen unless there's a kernel bug. */
3634 			freeb(mp);
3635 			cmn_err(CE_WARN, "sadb_expire_assoc: "
3636 			    "Unknown inner address length.\n");
3637 			return;
3638 		}
3639 	}
3640 
3641 	mp->b_cont = allocb(alloclen, BPRI_HI);
3642 	if (mp->b_cont == NULL) {
3643 		freeb(mp);
3644 		/* cmn_err(CE_WARN, */
3645 		/*	"sadb_expire_assoc: Can't allocate message.\n"); */
3646 		return;
3647 	}
3648 
3649 	mp1 = mp;
3650 	mp = mp->b_cont;
3651 	end = mp->b_wptr + alloclen;
3652 
3653 	samsg = (sadb_msg_t *)mp->b_wptr;
3654 	mp->b_wptr += sizeof (*samsg);
3655 	samsg->sadb_msg_version = PF_KEY_V2;
3656 	samsg->sadb_msg_type = SADB_EXPIRE;
3657 	samsg->sadb_msg_errno = 0;
3658 	samsg->sadb_msg_satype = assoc->ipsa_type;
3659 	samsg->sadb_msg_len = SADB_8TO64(alloclen);
3660 	samsg->sadb_msg_reserved = 0;
3661 	samsg->sadb_msg_seq = 0;
3662 	samsg->sadb_msg_pid = 0;
3663 
3664 	saext = (sadb_sa_t *)mp->b_wptr;
3665 	mp->b_wptr += sizeof (*saext);
3666 	saext->sadb_sa_len = SADB_8TO64(sizeof (*saext));
3667 	saext->sadb_sa_exttype = SADB_EXT_SA;
3668 	saext->sadb_sa_spi = assoc->ipsa_spi;
3669 	saext->sadb_sa_replay = assoc->ipsa_replay_wsize;
3670 	saext->sadb_sa_state = assoc->ipsa_state;
3671 	saext->sadb_sa_auth = assoc->ipsa_auth_alg;
3672 	saext->sadb_sa_encrypt = assoc->ipsa_encr_alg;
3673 	saext->sadb_sa_flags = assoc->ipsa_flags;
3674 
3675 	current = (sadb_lifetime_t *)mp->b_wptr;
3676 	mp->b_wptr += sizeof (sadb_lifetime_t);
3677 	current->sadb_lifetime_len = SADB_8TO64(sizeof (*current));
3678 	current->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
3679 	/* We do not support the concept. */
3680 	current->sadb_lifetime_allocations = 0;
3681 	current->sadb_lifetime_bytes = assoc->ipsa_bytes;
3682 	current->sadb_lifetime_addtime = assoc->ipsa_addtime;
3683 	current->sadb_lifetime_usetime = assoc->ipsa_usetime;
3684 
3685 	expire = (sadb_lifetime_t *)mp->b_wptr;
3686 	mp->b_wptr += sizeof (*expire);
3687 	expire->sadb_lifetime_len = SADB_8TO64(sizeof (*expire));
3688 
3689 	if (assoc->ipsa_state == IPSA_STATE_DEAD) {
3690 		expire->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
3691 		expire->sadb_lifetime_allocations = assoc->ipsa_hardalloc;
3692 		expire->sadb_lifetime_bytes = assoc->ipsa_hardbyteslt;
3693 		expire->sadb_lifetime_addtime = assoc->ipsa_hardaddlt;
3694 		expire->sadb_lifetime_usetime = assoc->ipsa_harduselt;
3695 	} else {
3696 		ASSERT(assoc->ipsa_state == IPSA_STATE_DYING);
3697 		expire->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
3698 		expire->sadb_lifetime_allocations = assoc->ipsa_softalloc;
3699 		expire->sadb_lifetime_bytes = assoc->ipsa_softbyteslt;
3700 		expire->sadb_lifetime_addtime = assoc->ipsa_softaddlt;
3701 		expire->sadb_lifetime_usetime = assoc->ipsa_softuselt;
3702 	}
3703 
3704 	mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end, SADB_EXT_ADDRESS_SRC,
3705 	    af, assoc->ipsa_srcaddr, tunnel_mode ? 0 : SA_SRCPORT(assoc),
3706 	    SA_PROTO(assoc), 0);
3707 	ASSERT(mp->b_wptr != NULL);
3708 
3709 	mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end, SADB_EXT_ADDRESS_DST,
3710 	    af, assoc->ipsa_dstaddr, tunnel_mode ? 0 : SA_DSTPORT(assoc),
3711 	    SA_PROTO(assoc), 0);
3712 	ASSERT(mp->b_wptr != NULL);
3713 
3714 	if (tunnel_mode) {
3715 		mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end,
3716 		    SADB_X_EXT_ADDRESS_INNER_SRC, assoc->ipsa_innerfam,
3717 		    assoc->ipsa_innersrc, SA_SRCPORT(assoc), SA_IPROTO(assoc),
3718 		    assoc->ipsa_innersrcpfx);
3719 		ASSERT(mp->b_wptr != NULL);
3720 		mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end,
3721 		    SADB_X_EXT_ADDRESS_INNER_DST, assoc->ipsa_innerfam,
3722 		    assoc->ipsa_innerdst, SA_DSTPORT(assoc), SA_IPROTO(assoc),
3723 		    assoc->ipsa_innerdstpfx);
3724 		ASSERT(mp->b_wptr != NULL);
3725 	}
3726 
3727 	/* Can just putnext, we're ready to go! */
3728 	putnext(pfkey_q, mp1);
3729 }
3730 
3731 /*
3732  * "Age" the SA with the number of bytes that was used to protect traffic.
3733  * Send an SADB_EXPIRE message if appropriate.	Return B_TRUE if there was
3734  * enough "charge" left in the SA to protect the data.	Return B_FALSE
3735  * otherwise.  (If B_FALSE is returned, the association either was, or became
3736  * DEAD.)
3737  */
3738 boolean_t
3739 sadb_age_bytes(queue_t *pfkey_q, ipsa_t *assoc, uint64_t bytes,
3740     boolean_t sendmsg)
3741 {
3742 	boolean_t rc = B_TRUE;
3743 	uint64_t newtotal;
3744 
3745 	mutex_enter(&assoc->ipsa_lock);
3746 	newtotal = assoc->ipsa_bytes + bytes;
3747 	if (assoc->ipsa_hardbyteslt != 0 &&
3748 	    newtotal >= assoc->ipsa_hardbyteslt) {
3749 		if (assoc->ipsa_state < IPSA_STATE_DEAD) {
3750 			/*
3751 			 * Send EXPIRE message to PF_KEY.  May wish to pawn
3752 			 * this off on another non-interrupt thread.  Also
3753 			 * unlink this SA immediately.
3754 			 */
3755 			assoc->ipsa_state = IPSA_STATE_DEAD;
3756 			if (sendmsg)
3757 				sadb_expire_assoc(pfkey_q, assoc);
3758 			/*
3759 			 * Set non-zero expiration time so sadb_age_assoc()
3760 			 * will work when reaping.
3761 			 */
3762 			assoc->ipsa_hardexpiretime = (time_t)1;
3763 		} /* Else someone beat me to it! */
3764 		rc = B_FALSE;
3765 	} else if (assoc->ipsa_softbyteslt != 0 &&
3766 	    (newtotal >= assoc->ipsa_softbyteslt)) {
3767 		if (assoc->ipsa_state < IPSA_STATE_DYING) {
3768 			/*
3769 			 * Send EXPIRE message to PF_KEY.  May wish to pawn
3770 			 * this off on another non-interrupt thread.
3771 			 */
3772 			assoc->ipsa_state = IPSA_STATE_DYING;
3773 			assoc->ipsa_bytes = newtotal;
3774 			if (sendmsg)
3775 				sadb_expire_assoc(pfkey_q, assoc);
3776 		} /* Else someone beat me to it! */
3777 	}
3778 	if (rc == B_TRUE)
3779 		assoc->ipsa_bytes = newtotal;
3780 	mutex_exit(&assoc->ipsa_lock);
3781 	return (rc);
3782 }
3783 
3784 /*
3785  * Push one or more DL_CO_DELETE messages queued up by
3786  * sadb_torch_assoc down to the underlying driver now that it's a
3787  * convenient time for it (i.e., ipsa bucket locks not held).
3788  */
3789 static void
3790 sadb_drain_torchq(queue_t *q, mblk_t *mp)
3791 {
3792 	while (mp != NULL) {
3793 		mblk_t *next = mp->b_next;
3794 		mp->b_next = NULL;
3795 		if (q != NULL)
3796 			putnext(q, mp);
3797 		else
3798 			freemsg(mp);
3799 		mp = next;
3800 	}
3801 }
3802 
3803 /*
3804  * "Torch" an individual SA.  Returns NULL, so it can be tail-called from
3805  *     sadb_age_assoc().
3806  *
3807  * If SA is hardware-accelerated, and we can't allocate the mblk
3808  * containing the DL_CO_DELETE, just return; it will remain in the
3809  * table and be swept up by sadb_ager() in a subsequent pass.
3810  */
3811 static ipsa_t *
3812 sadb_torch_assoc(isaf_t *head, ipsa_t *sa, boolean_t inbnd, mblk_t **mq)
3813 {
3814 	mblk_t *mp;
3815 
3816 	ASSERT(MUTEX_HELD(&head->isaf_lock));
3817 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
3818 	ASSERT(sa->ipsa_state == IPSA_STATE_DEAD);
3819 
3820 	/*
3821 	 * Force cached SAs to be revalidated..
3822 	 */
3823 	head->isaf_gen++;
3824 
3825 	if (sa->ipsa_flags & IPSA_F_HW) {
3826 		mp = sadb_fmt_sa_req(DL_CO_DELETE, sa->ipsa_type, sa, inbnd);
3827 		if (mp == NULL) {
3828 			mutex_exit(&sa->ipsa_lock);
3829 			return (NULL);
3830 		}
3831 		mp->b_next = *mq;
3832 		*mq = mp;
3833 	}
3834 	mutex_exit(&sa->ipsa_lock);
3835 	sadb_unlinkassoc(sa);
3836 
3837 	return (NULL);
3838 }
3839 
3840 /*
3841  * Do various SA-is-idle activities depending on delta (the number of idle
3842  * seconds on the SA) and/or other properties of the SA.
3843  *
3844  * Return B_TRUE if I've sent a packet, because I have to drop the
3845  * association's mutex before sending a packet out the wire.
3846  */
3847 /* ARGSUSED */
3848 static boolean_t
3849 sadb_idle_activities(ipsa_t *assoc, time_t delta, boolean_t inbound)
3850 {
3851 	ipsecesp_stack_t *espstack = assoc->ipsa_netstack->netstack_ipsecesp;
3852 	int nat_t_interval = espstack->ipsecesp_nat_keepalive_interval;
3853 
3854 	ASSERT(MUTEX_HELD(&assoc->ipsa_lock));
3855 
3856 	if (!inbound && (assoc->ipsa_flags & IPSA_F_NATT_LOC) &&
3857 	    delta >= nat_t_interval &&
3858 	    gethrestime_sec() - assoc->ipsa_last_nat_t_ka >= nat_t_interval) {
3859 		ASSERT(assoc->ipsa_type == SADB_SATYPE_ESP);
3860 		assoc->ipsa_last_nat_t_ka = gethrestime_sec();
3861 		mutex_exit(&assoc->ipsa_lock);
3862 		ipsecesp_send_keepalive(assoc);
3863 		return (B_TRUE);
3864 	}
3865 	return (B_FALSE);
3866 }
3867 
3868 /*
3869  * Return "assoc" if haspeer is true and I send an expire.  This allows
3870  * the consumers' aging functions to tidy up an expired SA's peer.
3871  */
3872 static ipsa_t *
3873 sadb_age_assoc(isaf_t *head, queue_t *pfkey_q, ipsa_t *assoc,
3874     time_t current, int reap_delay, boolean_t inbound, mblk_t **mq)
3875 {
3876 	ipsa_t *retval = NULL;
3877 	boolean_t dropped_mutex = B_FALSE;
3878 
3879 	ASSERT(MUTEX_HELD(&head->isaf_lock));
3880 
3881 	mutex_enter(&assoc->ipsa_lock);
3882 
3883 	if ((assoc->ipsa_state == IPSA_STATE_LARVAL) &&
3884 	    (assoc->ipsa_hardexpiretime <= current)) {
3885 		assoc->ipsa_state = IPSA_STATE_DEAD;
3886 		return (sadb_torch_assoc(head, assoc, inbound, mq));
3887 	}
3888 
3889 	/*
3890 	 * Check lifetimes.  Fortunately, SA setup is done
3891 	 * such that there are only two times to look at,
3892 	 * softexpiretime, and hardexpiretime.
3893 	 *
3894 	 * Check hard first.
3895 	 */
3896 
3897 	if (assoc->ipsa_hardexpiretime != 0 &&
3898 	    assoc->ipsa_hardexpiretime <= current) {
3899 		if (assoc->ipsa_state == IPSA_STATE_DEAD)
3900 			return (sadb_torch_assoc(head, assoc, inbound, mq));
3901 
3902 		/*
3903 		 * Send SADB_EXPIRE with hard lifetime, delay for unlinking.
3904 		 */
3905 		assoc->ipsa_state = IPSA_STATE_DEAD;
3906 		if (assoc->ipsa_haspeer || assoc->ipsa_otherspi != 0) {
3907 			/*
3908 			 * If the SA is paired or peered with another, put
3909 			 * a copy on a list which can be processed later, the
3910 			 * pair/peer SA needs to be updated so the both die
3911 			 * at the same time.
3912 			 *
3913 			 * If I return assoc, I have to bump up its reference
3914 			 * count to keep with the ipsa_t reference count
3915 			 * semantics.
3916 			 */
3917 			IPSA_REFHOLD(assoc);
3918 			retval = assoc;
3919 		}
3920 		sadb_expire_assoc(pfkey_q, assoc);
3921 		assoc->ipsa_hardexpiretime = current + reap_delay;
3922 	} else if (assoc->ipsa_softexpiretime != 0 &&
3923 	    assoc->ipsa_softexpiretime <= current &&
3924 	    assoc->ipsa_state < IPSA_STATE_DYING) {
3925 		/*
3926 		 * Send EXPIRE message to PF_KEY.  May wish to pawn
3927 		 * this off on another non-interrupt thread.
3928 		 */
3929 		assoc->ipsa_state = IPSA_STATE_DYING;
3930 		if (assoc->ipsa_haspeer) {
3931 			/*
3932 			 * If the SA has a peer, update the peer's state
3933 			 * on SOFT_EXPIRE, this is mostly to prevent two
3934 			 * expire messages from effectively the same SA.
3935 			 *
3936 			 * Don't care about paired SA's, then can (and should)
3937 			 * be able to soft expire at different times.
3938 			 *
3939 			 * If I return assoc, I have to bump up its
3940 			 * reference count to keep with the ipsa_t reference
3941 			 * count semantics.
3942 			 */
3943 			IPSA_REFHOLD(assoc);
3944 			retval = assoc;
3945 		}
3946 		sadb_expire_assoc(pfkey_q, assoc);
3947 	} else {
3948 		/* Check idle time activities. */
3949 		dropped_mutex = sadb_idle_activities(assoc,
3950 		    current - assoc->ipsa_lastuse, inbound);
3951 	}
3952 
3953 	if (!dropped_mutex)
3954 		mutex_exit(&assoc->ipsa_lock);
3955 	return (retval);
3956 }
3957 
3958 /*
3959  * Called by a consumer protocol to do ther dirty work of reaping dead
3960  * Security Associations.
3961  *
3962  * NOTE: sadb_age_assoc() marks expired SA's as DEAD but only removed
3963  * SA's that are already marked DEAD, so expired SA's are only reaped
3964  * the second time sadb_ager() runs.
3965  */
3966 void
3967 sadb_ager(sadb_t *sp, queue_t *pfkey_q, queue_t *ip_q, int reap_delay,
3968     netstack_t *ns)
3969 {
3970 	int i;
3971 	isaf_t *bucket;
3972 	ipsa_t *assoc, *spare;
3973 	iacqf_t *acqlist;
3974 	ipsacq_t *acqrec, *spareacq;
3975 	templist_t *haspeerlist, *newbie;
3976 	/* Snapshot current time now. */
3977 	time_t current = gethrestime_sec();
3978 	mblk_t *mq = NULL;
3979 	haspeerlist = NULL;
3980 
3981 	/*
3982 	 * Do my dirty work.  This includes aging real entries, aging
3983 	 * larvals, and aging outstanding ACQUIREs.
3984 	 *
3985 	 * I hope I don't tie up resources for too long.
3986 	 */
3987 
3988 	/* Age acquires. */
3989 
3990 	for (i = 0; i < sp->sdb_hashsize; i++) {
3991 		acqlist = &sp->sdb_acq[i];
3992 		mutex_enter(&acqlist->iacqf_lock);
3993 		for (acqrec = acqlist->iacqf_ipsacq; acqrec != NULL;
3994 		    acqrec = spareacq) {
3995 			spareacq = acqrec->ipsacq_next;
3996 			if (current > acqrec->ipsacq_expire)
3997 				sadb_destroy_acquire(acqrec, ns);
3998 		}
3999 		mutex_exit(&acqlist->iacqf_lock);
4000 	}
4001 
4002 	/* Age inbound associations. */
4003 	for (i = 0; i < sp->sdb_hashsize; i++) {
4004 		bucket = &(sp->sdb_if[i]);
4005 		mutex_enter(&bucket->isaf_lock);
4006 		for (assoc = bucket->isaf_ipsa; assoc != NULL;
4007 		    assoc = spare) {
4008 			spare = assoc->ipsa_next;
4009 			if (sadb_age_assoc(bucket, pfkey_q, assoc, current,
4010 			    reap_delay, B_TRUE, &mq) != NULL) {
4011 				/*
4012 				 * Put SA's which have a peer or SA's which
4013 				 * are paired on a list for processing after
4014 				 * all the hash tables have been walked.
4015 				 *
4016 				 * sadb_age_assoc() increments the refcnt,
4017 				 * effectively doing an IPSA_REFHOLD().
4018 				 */
4019 				newbie = kmem_alloc(sizeof (*newbie),
4020 				    KM_NOSLEEP);
4021 				if (newbie == NULL) {
4022 					/*
4023 					 * Don't forget to REFRELE().
4024 					 */
4025 					IPSA_REFRELE(assoc);
4026 					continue;	/* for loop... */
4027 				}
4028 				newbie->next = haspeerlist;
4029 				newbie->ipsa = assoc;
4030 				haspeerlist = newbie;
4031 			}
4032 		}
4033 		mutex_exit(&bucket->isaf_lock);
4034 	}
4035 
4036 	if (mq != NULL) {
4037 		sadb_drain_torchq(ip_q, mq);
4038 		mq = NULL;
4039 	}
4040 	age_pair_peer_list(haspeerlist, sp, B_FALSE);
4041 	haspeerlist = NULL;
4042 
4043 	/* Age outbound associations. */
4044 	for (i = 0; i < sp->sdb_hashsize; i++) {
4045 		bucket = &(sp->sdb_of[i]);
4046 		mutex_enter(&bucket->isaf_lock);
4047 		for (assoc = bucket->isaf_ipsa; assoc != NULL;
4048 		    assoc = spare) {
4049 			spare = assoc->ipsa_next;
4050 			if (sadb_age_assoc(bucket, pfkey_q, assoc, current,
4051 			    reap_delay, B_FALSE, &mq) != NULL) {
4052 				/*
4053 				 * sadb_age_assoc() increments the refcnt,
4054 				 * effectively doing an IPSA_REFHOLD().
4055 				 */
4056 				newbie = kmem_alloc(sizeof (*newbie),
4057 				    KM_NOSLEEP);
4058 				if (newbie == NULL) {
4059 					/*
4060 					 * Don't forget to REFRELE().
4061 					 */
4062 					IPSA_REFRELE(assoc);
4063 					continue;	/* for loop... */
4064 				}
4065 				newbie->next = haspeerlist;
4066 				newbie->ipsa = assoc;
4067 				haspeerlist = newbie;
4068 			}
4069 		}
4070 		mutex_exit(&bucket->isaf_lock);
4071 	}
4072 	if (mq != NULL) {
4073 		sadb_drain_torchq(ip_q, mq);
4074 		mq = NULL;
4075 	}
4076 
4077 	age_pair_peer_list(haspeerlist, sp, B_TRUE);
4078 
4079 	/*
4080 	 * Run a GC pass to clean out dead identities.
4081 	 */
4082 	ipsid_gc(ns);
4083 }
4084 
4085 /*
4086  * Figure out when to reschedule the ager.
4087  */
4088 timeout_id_t
4089 sadb_retimeout(hrtime_t begin, queue_t *pfkey_q, void (*ager)(void *),
4090     void *agerarg, uint_t *intp, uint_t intmax, short mid)
4091 {
4092 	hrtime_t end = gethrtime();
4093 	uint_t interval = *intp;
4094 
4095 	/*
4096 	 * See how long this took.  If it took too long, increase the
4097 	 * aging interval.
4098 	 */
4099 	if ((end - begin) > interval * 1000000) {
4100 		if (interval >= intmax) {
4101 			/* XXX Rate limit this?  Or recommend flush? */
4102 			(void) strlog(mid, 0, 0, SL_ERROR | SL_WARN,
4103 			    "Too many SA's to age out in %d msec.\n",
4104 			    intmax);
4105 		} else {
4106 			/* Double by shifting by one bit. */
4107 			interval <<= 1;
4108 			interval = min(interval, intmax);
4109 		}
4110 	} else if ((end - begin) <= interval * 500000 &&
4111 	    interval > SADB_AGE_INTERVAL_DEFAULT) {
4112 		/*
4113 		 * If I took less than half of the interval, then I should
4114 		 * ratchet the interval back down.  Never automatically
4115 		 * shift below the default aging interval.
4116 		 *
4117 		 * NOTE:This even overrides manual setting of the age
4118 		 *	interval using NDD.
4119 		 */
4120 		/* Halve by shifting one bit. */
4121 		interval >>= 1;
4122 		interval = max(interval, SADB_AGE_INTERVAL_DEFAULT);
4123 	}
4124 	*intp = interval;
4125 	return (qtimeout(pfkey_q, ager, agerarg,
4126 	    interval * drv_usectohz(1000)));
4127 }
4128 
4129 
4130 /*
4131  * Update the lifetime values of an SA.	 This is the path an SADB_UPDATE
4132  * message takes when updating a MATURE or DYING SA.
4133  */
4134 static void
4135 sadb_update_lifetimes(ipsa_t *assoc, sadb_lifetime_t *hard,
4136     sadb_lifetime_t *soft, boolean_t outbound)
4137 {
4138 	mutex_enter(&assoc->ipsa_lock);
4139 
4140 	/*
4141 	 * XXX RFC 2367 mentions how an SADB_EXT_LIFETIME_CURRENT can be
4142 	 * passed in during an update message.	We currently don't handle
4143 	 * these.
4144 	 */
4145 
4146 	if (hard != NULL) {
4147 		if (hard->sadb_lifetime_bytes != 0)
4148 			assoc->ipsa_hardbyteslt = hard->sadb_lifetime_bytes;
4149 		if (hard->sadb_lifetime_usetime != 0)
4150 			assoc->ipsa_harduselt = hard->sadb_lifetime_usetime;
4151 		if (hard->sadb_lifetime_addtime != 0)
4152 			assoc->ipsa_hardaddlt = hard->sadb_lifetime_addtime;
4153 		if (assoc->ipsa_hardaddlt != 0) {
4154 			assoc->ipsa_hardexpiretime =
4155 			    assoc->ipsa_addtime + assoc->ipsa_hardaddlt;
4156 		}
4157 		if (assoc->ipsa_harduselt != 0 &&
4158 		    assoc->ipsa_flags & IPSA_F_USED) {
4159 			UPDATE_EXPIRE(assoc, harduselt, hardexpiretime);
4160 		}
4161 		if (hard->sadb_lifetime_allocations != 0)
4162 			assoc->ipsa_hardalloc = hard->sadb_lifetime_allocations;
4163 	}
4164 
4165 	if (soft != NULL) {
4166 		if (soft->sadb_lifetime_bytes != 0) {
4167 			if (soft->sadb_lifetime_bytes >
4168 			    assoc->ipsa_hardbyteslt) {
4169 				assoc->ipsa_softbyteslt =
4170 				    assoc->ipsa_hardbyteslt;
4171 			} else {
4172 				assoc->ipsa_softbyteslt =
4173 				    soft->sadb_lifetime_bytes;
4174 			}
4175 		}
4176 		if (soft->sadb_lifetime_usetime != 0) {
4177 			if (soft->sadb_lifetime_usetime >
4178 			    assoc->ipsa_harduselt) {
4179 				assoc->ipsa_softuselt =
4180 				    assoc->ipsa_harduselt;
4181 			} else {
4182 				assoc->ipsa_softuselt =
4183 				    soft->sadb_lifetime_usetime;
4184 			}
4185 		}
4186 		if (soft->sadb_lifetime_addtime != 0) {
4187 			if (soft->sadb_lifetime_addtime >
4188 			    assoc->ipsa_hardexpiretime) {
4189 				assoc->ipsa_softexpiretime =
4190 				    assoc->ipsa_hardexpiretime;
4191 			} else {
4192 				assoc->ipsa_softaddlt =
4193 				    soft->sadb_lifetime_addtime;
4194 			}
4195 		}
4196 		if (assoc->ipsa_softaddlt != 0) {
4197 			assoc->ipsa_softexpiretime =
4198 			    assoc->ipsa_addtime + assoc->ipsa_softaddlt;
4199 		}
4200 		if (assoc->ipsa_softuselt != 0 &&
4201 		    assoc->ipsa_flags & IPSA_F_USED) {
4202 			UPDATE_EXPIRE(assoc, softuselt, softexpiretime);
4203 		}
4204 		if (outbound && assoc->ipsa_softexpiretime != 0) {
4205 			if (assoc->ipsa_state == IPSA_STATE_MATURE)
4206 				lifetime_fuzz(assoc);
4207 		}
4208 
4209 		if (soft->sadb_lifetime_allocations != 0)
4210 			assoc->ipsa_softalloc = soft->sadb_lifetime_allocations;
4211 	}
4212 	mutex_exit(&assoc->ipsa_lock);
4213 }
4214 
4215 /*
4216  * Common code to update an SA.
4217  */
4218 
4219 int
4220 sadb_update_sa(mblk_t *mp, keysock_in_t *ksi,
4221     sadbp_t *spp, int *diagnostic, queue_t *pfkey_q,
4222     int (*add_sa_func)(mblk_t *, keysock_in_t *, int *, netstack_t *),
4223     netstack_t *ns, uint8_t sadb_msg_type)
4224 {
4225 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
4226 	sadb_address_t *srcext =
4227 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
4228 	sadb_address_t *dstext =
4229 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
4230 	sadb_x_kmc_t *kmcext =
4231 	    (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
4232 	sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
4233 	sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
4234 	sadb_lifetime_t *soft =
4235 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
4236 	sadb_lifetime_t *hard =
4237 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
4238 	sadb_x_pair_t *pair_ext =
4239 	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
4240 	ipsa_t *echo_target = NULL;
4241 	int error = 0;
4242 	ipsap_t *ipsapp = NULL;
4243 	uint32_t kmp = 0, kmc = 0;
4244 
4245 
4246 	/* I need certain extensions present for either UPDATE message. */
4247 	if (srcext == NULL) {
4248 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
4249 		return (EINVAL);
4250 	}
4251 	if (dstext == NULL) {
4252 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
4253 		return (EINVAL);
4254 	}
4255 	if (assoc == NULL) {
4256 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
4257 		return (EINVAL);
4258 	}
4259 
4260 	if (kmcext != NULL) {
4261 		kmp = kmcext->sadb_x_kmc_proto;
4262 		kmc = kmcext->sadb_x_kmc_cookie;
4263 	}
4264 
4265 	ipsapp = get_ipsa_pair(assoc, srcext, dstext, spp);
4266 	if (ipsapp == NULL) {
4267 		*diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND;
4268 		return (ESRCH);
4269 	}
4270 
4271 	if (ipsapp->ipsap_psa_ptr == NULL && ipsapp->ipsap_sa_ptr != NULL) {
4272 		if (ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) {
4273 			/*
4274 			 * REFRELE the target and let the add_sa_func()
4275 			 * deal with updating a larval SA.
4276 			 */
4277 			destroy_ipsa_pair(ipsapp);
4278 			return (add_sa_func(mp, ksi, diagnostic, ns));
4279 		}
4280 	}
4281 
4282 	/*
4283 	 * Reality checks for updates of active associations.
4284 	 * Sundry first-pass UPDATE-specific reality checks.
4285 	 * Have to do the checks here, because it's after the add_sa code.
4286 	 * XXX STATS : logging/stats here?
4287 	 */
4288 
4289 	if (assoc->sadb_sa_state != SADB_SASTATE_MATURE) {
4290 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4291 		error = EINVAL;
4292 		goto bail;
4293 	}
4294 
4295 	if (assoc->sadb_sa_flags & ~spp->s_updateflags) {
4296 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
4297 		error = EINVAL;
4298 		goto bail;
4299 	}
4300 
4301 	if (ksi->ks_in_extv[SADB_EXT_LIFETIME_CURRENT] != NULL) {
4302 		error = EOPNOTSUPP;
4303 		goto bail;
4304 	}
4305 	if ((*diagnostic = sadb_hardsoftchk(hard, soft)) != 0) {
4306 		error = EINVAL;
4307 		goto bail;
4308 	}
4309 	if (akey != NULL) {
4310 		*diagnostic = SADB_X_DIAGNOSTIC_AKEY_PRESENT;
4311 		error = EINVAL;
4312 		goto bail;
4313 	}
4314 	if (ekey != NULL) {
4315 		*diagnostic = SADB_X_DIAGNOSTIC_EKEY_PRESENT;
4316 		error = EINVAL;
4317 		goto bail;
4318 	}
4319 
4320 	if (ipsapp->ipsap_sa_ptr != NULL) {
4321 		if (ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_DEAD) {
4322 			error = ESRCH;	/* DEAD == Not there, in this case. */
4323 			*diagnostic = SADB_X_DIAGNOSTIC_SA_EXPIRED;
4324 			goto bail;
4325 		}
4326 		if ((kmp != 0) &&
4327 		    ((ipsapp->ipsap_sa_ptr->ipsa_kmp != 0) ||
4328 		    (ipsapp->ipsap_sa_ptr->ipsa_kmp != kmp))) {
4329 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMP;
4330 			error = EINVAL;
4331 			goto bail;
4332 		}
4333 		if ((kmc != 0) &&
4334 		    ((ipsapp->ipsap_sa_ptr->ipsa_kmc != 0) ||
4335 		    (ipsapp->ipsap_sa_ptr->ipsa_kmc != kmc))) {
4336 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMC;
4337 			error = EINVAL;
4338 			goto bail;
4339 		}
4340 	}
4341 
4342 	if (ipsapp->ipsap_psa_ptr != NULL) {
4343 		if (ipsapp->ipsap_psa_ptr->ipsa_state == IPSA_STATE_DEAD) {
4344 			*diagnostic = SADB_X_DIAGNOSTIC_SA_EXPIRED;
4345 			error = ESRCH;	/* DEAD == Not there, in this case. */
4346 			goto bail;
4347 		}
4348 		if ((kmp != 0) &&
4349 		    ((ipsapp->ipsap_psa_ptr->ipsa_kmp != 0) ||
4350 		    (ipsapp->ipsap_psa_ptr->ipsa_kmp != kmp))) {
4351 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMP;
4352 			error = EINVAL;
4353 			goto bail;
4354 		}
4355 		if ((kmc != 0) &&
4356 		    ((ipsapp->ipsap_psa_ptr->ipsa_kmc != 0) ||
4357 		    (ipsapp->ipsap_psa_ptr->ipsa_kmc != kmc))) {
4358 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMC;
4359 			error = EINVAL;
4360 			goto bail;
4361 		}
4362 	}
4363 
4364 	if (ipsapp->ipsap_sa_ptr != NULL) {
4365 		sadb_update_lifetimes(ipsapp->ipsap_sa_ptr, hard, soft, B_TRUE);
4366 		if (kmp != 0)
4367 			ipsapp->ipsap_sa_ptr->ipsa_kmp = kmp;
4368 		if (kmc != 0)
4369 			ipsapp->ipsap_sa_ptr->ipsa_kmc = kmc;
4370 	}
4371 
4372 	if (sadb_msg_type == SADB_X_UPDATEPAIR) {
4373 		if (ipsapp->ipsap_psa_ptr != NULL) {
4374 			sadb_update_lifetimes(ipsapp->ipsap_psa_ptr, hard, soft,
4375 			    B_FALSE);
4376 			if (kmp != 0)
4377 				ipsapp->ipsap_psa_ptr->ipsa_kmp = kmp;
4378 			if (kmc != 0)
4379 				ipsapp->ipsap_psa_ptr->ipsa_kmc = kmc;
4380 		} else {
4381 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
4382 			error = ESRCH;
4383 			goto bail;
4384 		}
4385 	}
4386 
4387 	if (pair_ext != NULL)
4388 		error = update_pairing(ipsapp, ksi, diagnostic, spp);
4389 
4390 	if (error == 0)
4391 		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
4392 		    ksi, echo_target);
4393 bail:
4394 
4395 	destroy_ipsa_pair(ipsapp);
4396 
4397 	return (error);
4398 }
4399 
4400 
4401 int
4402 update_pairing(ipsap_t *ipsapp, keysock_in_t *ksi, int *diagnostic,
4403     sadbp_t *spp)
4404 {
4405 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
4406 	sadb_address_t *srcext =
4407 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
4408 	sadb_address_t *dstext =
4409 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
4410 	sadb_x_pair_t *pair_ext =
4411 	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
4412 	int error = 0;
4413 	ipsap_t *oipsapp = NULL;
4414 	boolean_t undo_pair = B_FALSE;
4415 	uint32_t ipsa_flags;
4416 
4417 	if (pair_ext->sadb_x_pair_spi == 0 || pair_ext->sadb_x_pair_spi ==
4418 	    assoc->sadb_sa_spi) {
4419 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4420 		return (EINVAL);
4421 	}
4422 
4423 	/*
4424 	 * Assume for now that the spi value provided in the SADB_UPDATE
4425 	 * message was valid, update the SA with its pair spi value.
4426 	 * If the spi turns out to be bogus or the SA no longer exists
4427 	 * then this will be detected when the reverse update is made
4428 	 * below.
4429 	 */
4430 	mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4431 	ipsapp->ipsap_sa_ptr->ipsa_flags |= IPSA_F_PAIRED;
4432 	ipsapp->ipsap_sa_ptr->ipsa_otherspi = pair_ext->sadb_x_pair_spi;
4433 	mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4434 
4435 	/*
4436 	 * After updating the ipsa_otherspi element of the SA, get_ipsa_pair()
4437 	 * should now return pointers to the SA *AND* its pair, if this is not
4438 	 * the case, the "otherspi" either did not exist or was deleted. Also
4439 	 * check that "otherspi" is not already paired. If everything looks
4440 	 * good, complete the update. IPSA_REFRELE the first pair_pointer
4441 	 * after this update to ensure its not deleted until we are done.
4442 	 */
4443 	oipsapp = get_ipsa_pair(assoc, srcext, dstext, spp);
4444 	if (oipsapp == NULL) {
4445 		/*
4446 		 * This should never happen, calling function still has
4447 		 * IPSA_REFHELD on the SA we just updated.
4448 		 */
4449 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
4450 		return (EINVAL);
4451 	}
4452 
4453 	if (oipsapp->ipsap_psa_ptr == NULL) {
4454 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4455 		undo_pair = B_TRUE;
4456 	} else {
4457 		ipsa_flags = oipsapp->ipsap_psa_ptr->ipsa_flags;
4458 		if (oipsapp->ipsap_psa_ptr->ipsa_state > IPSA_STATE_MATURE) {
4459 			/* Its dead Jim! */
4460 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4461 			undo_pair = B_TRUE;
4462 		} else if ((ipsa_flags & (IPSA_F_OUTBOUND | IPSA_F_INBOUND)) ==
4463 		    (IPSA_F_OUTBOUND | IPSA_F_INBOUND)) {
4464 			/* This SA is in both hashtables. */
4465 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4466 			undo_pair = B_TRUE;
4467 		} else if (ipsa_flags & IPSA_F_PAIRED) {
4468 			/* This SA is already paired with another. */
4469 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_ALREADY;
4470 			undo_pair = B_TRUE;
4471 		}
4472 	}
4473 
4474 	if (undo_pair) {
4475 		/* The pair SA does not exist. */
4476 		mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4477 		ipsapp->ipsap_sa_ptr->ipsa_flags &= ~IPSA_F_PAIRED;
4478 		ipsapp->ipsap_sa_ptr->ipsa_otherspi = 0;
4479 		mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4480 		error = EINVAL;
4481 	} else {
4482 		mutex_enter(&oipsapp->ipsap_psa_ptr->ipsa_lock);
4483 		oipsapp->ipsap_psa_ptr->ipsa_otherspi = assoc->sadb_sa_spi;
4484 		oipsapp->ipsap_psa_ptr->ipsa_flags |= IPSA_F_PAIRED;
4485 		mutex_exit(&oipsapp->ipsap_psa_ptr->ipsa_lock);
4486 	}
4487 
4488 	destroy_ipsa_pair(oipsapp);
4489 	return (error);
4490 }
4491 
4492 /*
4493  * The following functions deal with ACQUIRE LISTS.  An ACQUIRE list is
4494  * a list of outstanding SADB_ACQUIRE messages.	 If ipsec_getassocbyconn() fails
4495  * for an outbound datagram, that datagram is queued up on an ACQUIRE record,
4496  * and an SADB_ACQUIRE message is sent up.  Presumably, a user-space key
4497  * management daemon will process the ACQUIRE, use a SADB_GETSPI to reserve
4498  * an SPI value and a larval SA, then SADB_UPDATE the larval SA, and ADD the
4499  * other direction's SA.
4500  */
4501 
4502 /*
4503  * Check the ACQUIRE lists.  If there's an existing ACQUIRE record,
4504  * grab it, lock it, and return it.  Otherwise return NULL.
4505  */
4506 static ipsacq_t *
4507 sadb_checkacquire(iacqf_t *bucket, ipsec_action_t *ap, ipsec_policy_t *pp,
4508     uint32_t *src, uint32_t *dst, uint32_t *isrc, uint32_t *idst,
4509     uint64_t unique_id)
4510 {
4511 	ipsacq_t *walker;
4512 	sa_family_t fam;
4513 	uint32_t blank_address[4] = {0, 0, 0, 0};
4514 
4515 	if (isrc == NULL) {
4516 		ASSERT(idst == NULL);
4517 		isrc = idst = blank_address;
4518 	}
4519 
4520 	/*
4521 	 * Scan list for duplicates.  Check for UNIQUE, src/dest, policy.
4522 	 *
4523 	 * XXX May need search for duplicates based on other things too!
4524 	 */
4525 	for (walker = bucket->iacqf_ipsacq; walker != NULL;
4526 	    walker = walker->ipsacq_next) {
4527 		mutex_enter(&walker->ipsacq_lock);
4528 		fam = walker->ipsacq_addrfam;
4529 		if (IPSA_ARE_ADDR_EQUAL(dst, walker->ipsacq_dstaddr, fam) &&
4530 		    IPSA_ARE_ADDR_EQUAL(src, walker->ipsacq_srcaddr, fam) &&
4531 		    ip_addr_match((uint8_t *)isrc, walker->ipsacq_innersrcpfx,
4532 		    (in6_addr_t *)walker->ipsacq_innersrc) &&
4533 		    ip_addr_match((uint8_t *)idst, walker->ipsacq_innerdstpfx,
4534 		    (in6_addr_t *)walker->ipsacq_innerdst) &&
4535 		    (ap == walker->ipsacq_act) &&
4536 		    (pp == walker->ipsacq_policy) &&
4537 		    /* XXX do deep compares of ap/pp? */
4538 		    (unique_id == walker->ipsacq_unique_id))
4539 			break;			/* everything matched */
4540 		mutex_exit(&walker->ipsacq_lock);
4541 	}
4542 
4543 	return (walker);
4544 }
4545 
4546 /*
4547  * For this mblk, insert a new acquire record.  Assume bucket contains addrs
4548  * of all of the same length.  Give up (and drop) if memory
4549  * cannot be allocated for a new one; otherwise, invoke callback to
4550  * send the acquire up..
4551  *
4552  * In cases where we need both AH and ESP, add the SA to the ESP ACQUIRE
4553  * list.  The ah_add_sa_finish() routines can look at the packet's ipsec_out_t
4554  * and handle this case specially.
4555  */
4556 void
4557 sadb_acquire(mblk_t *mp, ipsec_out_t *io, boolean_t need_ah, boolean_t need_esp)
4558 {
4559 	sadbp_t *spp;
4560 	sadb_t *sp;
4561 	ipsacq_t *newbie;
4562 	iacqf_t *bucket;
4563 	mblk_t *datamp = mp->b_cont;
4564 	mblk_t *extended;
4565 	ipha_t *ipha = (ipha_t *)datamp->b_rptr;
4566 	ip6_t *ip6h = (ip6_t *)datamp->b_rptr;
4567 	uint32_t *src, *dst, *isrc, *idst;
4568 	ipsec_policy_t *pp = io->ipsec_out_policy;
4569 	ipsec_action_t *ap = io->ipsec_out_act;
4570 	sa_family_t af;
4571 	int hashoffset;
4572 	uint32_t seq;
4573 	uint64_t unique_id = 0;
4574 	ipsec_selector_t sel;
4575 	boolean_t tunnel_mode = io->ipsec_out_tunnel;
4576 	netstack_t	*ns = io->ipsec_out_ns;
4577 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
4578 
4579 	ASSERT((pp != NULL) || (ap != NULL));
4580 
4581 	ASSERT(need_ah != NULL || need_esp != NULL);
4582 	/* Assign sadb pointers */
4583 	if (need_esp) { /* ESP for AH+ESP */
4584 		ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
4585 
4586 		spp = &espstack->esp_sadb;
4587 	} else {
4588 		ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
4589 
4590 		spp = &ahstack->ah_sadb;
4591 	}
4592 	sp = io->ipsec_out_v4 ? &spp->s_v4 : &spp->s_v6;
4593 
4594 	if (ap == NULL)
4595 		ap = pp->ipsp_act;
4596 
4597 	ASSERT(ap != NULL);
4598 
4599 	if (ap->ipa_act.ipa_apply.ipp_use_unique || tunnel_mode)
4600 		unique_id = SA_FORM_UNIQUE_ID(io);
4601 
4602 	/*
4603 	 * Set up an ACQUIRE record.
4604 	 *
4605 	 * Immediately, make sure the ACQUIRE sequence number doesn't slip
4606 	 * below the lowest point allowed in the kernel.  (In other words,
4607 	 * make sure the high bit on the sequence number is set.)
4608 	 */
4609 
4610 	seq = keysock_next_seq(ns) | IACQF_LOWEST_SEQ;
4611 
4612 	if (IPH_HDR_VERSION(ipha) == IP_VERSION) {
4613 		src = (uint32_t *)&ipha->ipha_src;
4614 		dst = (uint32_t *)&ipha->ipha_dst;
4615 		af = AF_INET;
4616 		hashoffset = OUTBOUND_HASH_V4(sp, ipha->ipha_dst);
4617 		ASSERT(io->ipsec_out_v4 == B_TRUE);
4618 	} else {
4619 		ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
4620 		src = (uint32_t *)&ip6h->ip6_src;
4621 		dst = (uint32_t *)&ip6h->ip6_dst;
4622 		af = AF_INET6;
4623 		hashoffset = OUTBOUND_HASH_V6(sp, ip6h->ip6_dst);
4624 		ASSERT(io->ipsec_out_v4 == B_FALSE);
4625 	}
4626 
4627 	if (tunnel_mode) {
4628 		/* Snag inner addresses. */
4629 		isrc = io->ipsec_out_insrc;
4630 		idst = io->ipsec_out_indst;
4631 	} else {
4632 		isrc = idst = NULL;
4633 	}
4634 
4635 	/*
4636 	 * Check buckets to see if there is an existing entry.  If so,
4637 	 * grab it.  sadb_checkacquire locks newbie if found.
4638 	 */
4639 	bucket = &(sp->sdb_acq[hashoffset]);
4640 	mutex_enter(&bucket->iacqf_lock);
4641 	newbie = sadb_checkacquire(bucket, ap, pp, src, dst, isrc, idst,
4642 	    unique_id);
4643 
4644 	if (newbie == NULL) {
4645 		/*
4646 		 * Otherwise, allocate a new one.
4647 		 */
4648 		newbie = kmem_zalloc(sizeof (*newbie), KM_NOSLEEP);
4649 		if (newbie == NULL) {
4650 			mutex_exit(&bucket->iacqf_lock);
4651 			ip_drop_packet(mp, B_FALSE, NULL, NULL,
4652 			    DROPPER(ipss, ipds_sadb_acquire_nomem),
4653 			    &ipss->ipsec_sadb_dropper);
4654 			return;
4655 		}
4656 		newbie->ipsacq_policy = pp;
4657 		if (pp != NULL) {
4658 			IPPOL_REFHOLD(pp);
4659 		}
4660 		IPACT_REFHOLD(ap);
4661 		newbie->ipsacq_act = ap;
4662 		newbie->ipsacq_linklock = &bucket->iacqf_lock;
4663 		newbie->ipsacq_next = bucket->iacqf_ipsacq;
4664 		newbie->ipsacq_ptpn = &bucket->iacqf_ipsacq;
4665 		if (newbie->ipsacq_next != NULL)
4666 			newbie->ipsacq_next->ipsacq_ptpn = &newbie->ipsacq_next;
4667 		bucket->iacqf_ipsacq = newbie;
4668 		mutex_init(&newbie->ipsacq_lock, NULL, MUTEX_DEFAULT, NULL);
4669 		mutex_enter(&newbie->ipsacq_lock);
4670 	}
4671 
4672 	mutex_exit(&bucket->iacqf_lock);
4673 
4674 	/*
4675 	 * This assert looks silly for now, but we may need to enter newbie's
4676 	 * mutex during a search.
4677 	 */
4678 	ASSERT(MUTEX_HELD(&newbie->ipsacq_lock));
4679 
4680 	mp->b_next = NULL;
4681 	/* Queue up packet.  Use b_next. */
4682 	if (newbie->ipsacq_numpackets == 0) {
4683 		/* First one. */
4684 		newbie->ipsacq_mp = mp;
4685 		newbie->ipsacq_numpackets = 1;
4686 		newbie->ipsacq_expire = gethrestime_sec();
4687 		/*
4688 		 * Extended ACQUIRE with both AH+ESP will use ESP's timeout
4689 		 * value.
4690 		 */
4691 		newbie->ipsacq_expire += *spp->s_acquire_timeout;
4692 		newbie->ipsacq_seq = seq;
4693 		newbie->ipsacq_addrfam = af;
4694 
4695 		newbie->ipsacq_srcport = io->ipsec_out_src_port;
4696 		newbie->ipsacq_dstport = io->ipsec_out_dst_port;
4697 		newbie->ipsacq_icmp_type = io->ipsec_out_icmp_type;
4698 		newbie->ipsacq_icmp_code = io->ipsec_out_icmp_code;
4699 		if (tunnel_mode) {
4700 			newbie->ipsacq_inneraddrfam = io->ipsec_out_inaf;
4701 			newbie->ipsacq_proto = io->ipsec_out_inaf == AF_INET6 ?
4702 			    IPPROTO_IPV6 : IPPROTO_ENCAP;
4703 			newbie->ipsacq_innersrcpfx = io->ipsec_out_insrcpfx;
4704 			newbie->ipsacq_innerdstpfx = io->ipsec_out_indstpfx;
4705 			IPSA_COPY_ADDR(newbie->ipsacq_innersrc,
4706 			    io->ipsec_out_insrc, io->ipsec_out_inaf);
4707 			IPSA_COPY_ADDR(newbie->ipsacq_innerdst,
4708 			    io->ipsec_out_indst, io->ipsec_out_inaf);
4709 		} else {
4710 			newbie->ipsacq_proto = io->ipsec_out_proto;
4711 		}
4712 		newbie->ipsacq_unique_id = unique_id;
4713 	} else {
4714 		/* Scan to the end of the list & insert. */
4715 		mblk_t *lastone = newbie->ipsacq_mp;
4716 
4717 		while (lastone->b_next != NULL)
4718 			lastone = lastone->b_next;
4719 		lastone->b_next = mp;
4720 		if (newbie->ipsacq_numpackets++ == ipsacq_maxpackets) {
4721 			newbie->ipsacq_numpackets = ipsacq_maxpackets;
4722 			lastone = newbie->ipsacq_mp;
4723 			newbie->ipsacq_mp = lastone->b_next;
4724 			lastone->b_next = NULL;
4725 			ip_drop_packet(lastone, B_FALSE, NULL, NULL,
4726 			    DROPPER(ipss, ipds_sadb_acquire_toofull),
4727 			    &ipss->ipsec_sadb_dropper);
4728 		} else {
4729 			IP_ACQUIRE_STAT(ipss, qhiwater,
4730 			    newbie->ipsacq_numpackets);
4731 		}
4732 	}
4733 
4734 	/*
4735 	 * Reset addresses.  Set them to the most recently added mblk chain,
4736 	 * so that the address pointers in the acquire record will point
4737 	 * at an mblk still attached to the acquire list.
4738 	 */
4739 
4740 	newbie->ipsacq_srcaddr = src;
4741 	newbie->ipsacq_dstaddr = dst;
4742 
4743 	/*
4744 	 * If the acquire record has more than one queued packet, we've
4745 	 * already sent an ACQUIRE, and don't need to repeat ourself.
4746 	 */
4747 	if (newbie->ipsacq_seq != seq || newbie->ipsacq_numpackets > 1) {
4748 		/* I have an acquire outstanding already! */
4749 		mutex_exit(&newbie->ipsacq_lock);
4750 		return;
4751 	}
4752 
4753 	if (keysock_extended_reg(ns)) {
4754 		/*
4755 		 * Construct an extended ACQUIRE.  There are logging
4756 		 * opportunities here in failure cases.
4757 		 */
4758 
4759 		(void) memset(&sel, 0, sizeof (sel));
4760 		sel.ips_isv4 = io->ipsec_out_v4;
4761 		if (tunnel_mode) {
4762 			sel.ips_protocol = (io->ipsec_out_inaf == AF_INET) ?
4763 			    IPPROTO_ENCAP : IPPROTO_IPV6;
4764 		} else {
4765 			sel.ips_protocol = io->ipsec_out_proto;
4766 			sel.ips_local_port = io->ipsec_out_src_port;
4767 			sel.ips_remote_port = io->ipsec_out_dst_port;
4768 		}
4769 		sel.ips_icmp_type = io->ipsec_out_icmp_type;
4770 		sel.ips_icmp_code = io->ipsec_out_icmp_code;
4771 		sel.ips_is_icmp_inv_acq = 0;
4772 		if (af == AF_INET) {
4773 			sel.ips_local_addr_v4 = ipha->ipha_src;
4774 			sel.ips_remote_addr_v4 = ipha->ipha_dst;
4775 		} else {
4776 			sel.ips_local_addr_v6 = ip6h->ip6_src;
4777 			sel.ips_remote_addr_v6 = ip6h->ip6_dst;
4778 		}
4779 
4780 		extended = sadb_keysock_out(0);
4781 		if (extended != NULL) {
4782 			extended->b_cont = sadb_extended_acquire(&sel, pp, ap,
4783 			    tunnel_mode, seq, 0, ns);
4784 			if (extended->b_cont == NULL) {
4785 				freeb(extended);
4786 				extended = NULL;
4787 			}
4788 		}
4789 	} else
4790 		extended = NULL;
4791 
4792 	/*
4793 	 * Send an ACQUIRE message (and possible an extended ACQUIRE) based on
4794 	 * this new record.  The send-acquire callback assumes that acqrec is
4795 	 * already locked.
4796 	 */
4797 	(*spp->s_acqfn)(newbie, extended, ns);
4798 }
4799 
4800 /*
4801  * Unlink and free an acquire record.
4802  */
4803 void
4804 sadb_destroy_acquire(ipsacq_t *acqrec, netstack_t *ns)
4805 {
4806 	mblk_t *mp;
4807 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
4808 
4809 	ASSERT(MUTEX_HELD(acqrec->ipsacq_linklock));
4810 
4811 	if (acqrec->ipsacq_policy != NULL) {
4812 		IPPOL_REFRELE(acqrec->ipsacq_policy, ns);
4813 	}
4814 	if (acqrec->ipsacq_act != NULL) {
4815 		IPACT_REFRELE(acqrec->ipsacq_act);
4816 	}
4817 
4818 	/* Unlink */
4819 	*(acqrec->ipsacq_ptpn) = acqrec->ipsacq_next;
4820 	if (acqrec->ipsacq_next != NULL)
4821 		acqrec->ipsacq_next->ipsacq_ptpn = acqrec->ipsacq_ptpn;
4822 
4823 	/*
4824 	 * Free hanging mp's.
4825 	 *
4826 	 * XXX Instead of freemsg(), perhaps use IPSEC_REQ_FAILED.
4827 	 */
4828 
4829 	mutex_enter(&acqrec->ipsacq_lock);
4830 	while (acqrec->ipsacq_mp != NULL) {
4831 		mp = acqrec->ipsacq_mp;
4832 		acqrec->ipsacq_mp = mp->b_next;
4833 		mp->b_next = NULL;
4834 		ip_drop_packet(mp, B_FALSE, NULL, NULL,
4835 		    DROPPER(ipss, ipds_sadb_acquire_timeout),
4836 		    &ipss->ipsec_sadb_dropper);
4837 	}
4838 	mutex_exit(&acqrec->ipsacq_lock);
4839 
4840 	/* Free */
4841 	mutex_destroy(&acqrec->ipsacq_lock);
4842 	kmem_free(acqrec, sizeof (*acqrec));
4843 }
4844 
4845 /*
4846  * Destroy an acquire list fanout.
4847  */
4848 static void
4849 sadb_destroy_acqlist(iacqf_t **listp, uint_t numentries, boolean_t forever,
4850     netstack_t *ns)
4851 {
4852 	int i;
4853 	iacqf_t *list = *listp;
4854 
4855 	if (list == NULL)
4856 		return;
4857 
4858 	for (i = 0; i < numentries; i++) {
4859 		mutex_enter(&(list[i].iacqf_lock));
4860 		while (list[i].iacqf_ipsacq != NULL)
4861 			sadb_destroy_acquire(list[i].iacqf_ipsacq, ns);
4862 		mutex_exit(&(list[i].iacqf_lock));
4863 		if (forever)
4864 			mutex_destroy(&(list[i].iacqf_lock));
4865 	}
4866 
4867 	if (forever) {
4868 		*listp = NULL;
4869 		kmem_free(list, numentries * sizeof (*list));
4870 	}
4871 }
4872 
4873 /*
4874  * Create an algorithm descriptor for an extended ACQUIRE.  Filter crypto
4875  * framework's view of reality vs. IPsec's.  EF's wins, BTW.
4876  */
4877 static uint8_t *
4878 sadb_new_algdesc(uint8_t *start, uint8_t *limit,
4879     sadb_x_ecomb_t *ecomb, uint8_t satype, uint8_t algtype,
4880     uint8_t alg, uint16_t minbits, uint16_t maxbits, ipsec_stack_t *ipss)
4881 {
4882 	uint8_t *cur = start;
4883 	ipsec_alginfo_t *algp;
4884 	sadb_x_algdesc_t *algdesc = (sadb_x_algdesc_t *)cur;
4885 
4886 	cur += sizeof (*algdesc);
4887 	if (cur >= limit)
4888 		return (NULL);
4889 
4890 	ecomb->sadb_x_ecomb_numalgs++;
4891 
4892 	/*
4893 	 * Normalize vs. crypto framework's limits.  This way, you can specify
4894 	 * a stronger policy, and when the framework loads a stronger version,
4895 	 * you can just keep plowing w/o rewhacking your SPD.
4896 	 */
4897 	mutex_enter(&ipss->ipsec_alg_lock);
4898 	algp = ipss->ipsec_alglists[(algtype == SADB_X_ALGTYPE_AUTH) ?
4899 	    IPSEC_ALG_AUTH : IPSEC_ALG_ENCR][alg];
4900 	if (algp == NULL) {
4901 		mutex_exit(&ipss->ipsec_alg_lock);
4902 		return (NULL);	/* Algorithm doesn't exist.  Fail gracefully. */
4903 	}
4904 	if (minbits < algp->alg_ef_minbits)
4905 		minbits = algp->alg_ef_minbits;
4906 	if (maxbits > algp->alg_ef_maxbits)
4907 		maxbits = algp->alg_ef_maxbits;
4908 	mutex_exit(&ipss->ipsec_alg_lock);
4909 
4910 	algdesc->sadb_x_algdesc_satype = satype;
4911 	algdesc->sadb_x_algdesc_algtype = algtype;
4912 	algdesc->sadb_x_algdesc_alg = alg;
4913 	algdesc->sadb_x_algdesc_minbits = minbits;
4914 	algdesc->sadb_x_algdesc_maxbits = maxbits;
4915 	algdesc->sadb_x_algdesc_reserved = 0;
4916 	return (cur);
4917 }
4918 
4919 /*
4920  * Convert the given ipsec_action_t into an ecomb starting at *ecomb
4921  * which must fit before *limit
4922  *
4923  * return NULL if we ran out of room or a pointer to the end of the ecomb.
4924  */
4925 static uint8_t *
4926 sadb_action_to_ecomb(uint8_t *start, uint8_t *limit, ipsec_action_t *act,
4927     netstack_t *ns)
4928 {
4929 	uint8_t *cur = start;
4930 	sadb_x_ecomb_t *ecomb = (sadb_x_ecomb_t *)cur;
4931 	ipsec_prot_t *ipp;
4932 	ipsec_stack_t *ipss = ns->netstack_ipsec;
4933 
4934 	cur += sizeof (*ecomb);
4935 	if (cur >= limit)
4936 		return (NULL);
4937 
4938 	ASSERT(act->ipa_act.ipa_type == IPSEC_ACT_APPLY);
4939 
4940 	ipp = &act->ipa_act.ipa_apply;
4941 
4942 	ecomb->sadb_x_ecomb_numalgs = 0;
4943 	ecomb->sadb_x_ecomb_reserved = 0;
4944 	ecomb->sadb_x_ecomb_reserved2 = 0;
4945 	/*
4946 	 * No limits on allocations, since we really don't support that
4947 	 * concept currently.
4948 	 */
4949 	ecomb->sadb_x_ecomb_soft_allocations = 0;
4950 	ecomb->sadb_x_ecomb_hard_allocations = 0;
4951 
4952 	/*
4953 	 * XXX TBD: Policy or global parameters will eventually be
4954 	 * able to fill in some of these.
4955 	 */
4956 	ecomb->sadb_x_ecomb_flags = 0;
4957 	ecomb->sadb_x_ecomb_soft_bytes = 0;
4958 	ecomb->sadb_x_ecomb_hard_bytes = 0;
4959 	ecomb->sadb_x_ecomb_soft_addtime = 0;
4960 	ecomb->sadb_x_ecomb_hard_addtime = 0;
4961 	ecomb->sadb_x_ecomb_soft_usetime = 0;
4962 	ecomb->sadb_x_ecomb_hard_usetime = 0;
4963 
4964 	if (ipp->ipp_use_ah) {
4965 		cur = sadb_new_algdesc(cur, limit, ecomb,
4966 		    SADB_SATYPE_AH, SADB_X_ALGTYPE_AUTH, ipp->ipp_auth_alg,
4967 		    ipp->ipp_ah_minbits, ipp->ipp_ah_maxbits, ipss);
4968 		if (cur == NULL)
4969 			return (NULL);
4970 		ipsecah_fill_defs(ecomb, ns);
4971 	}
4972 
4973 	if (ipp->ipp_use_esp) {
4974 		if (ipp->ipp_use_espa) {
4975 			cur = sadb_new_algdesc(cur, limit, ecomb,
4976 			    SADB_SATYPE_ESP, SADB_X_ALGTYPE_AUTH,
4977 			    ipp->ipp_esp_auth_alg,
4978 			    ipp->ipp_espa_minbits,
4979 			    ipp->ipp_espa_maxbits, ipss);
4980 			if (cur == NULL)
4981 				return (NULL);
4982 		}
4983 
4984 		cur = sadb_new_algdesc(cur, limit, ecomb,
4985 		    SADB_SATYPE_ESP, SADB_X_ALGTYPE_CRYPT,
4986 		    ipp->ipp_encr_alg,
4987 		    ipp->ipp_espe_minbits,
4988 		    ipp->ipp_espe_maxbits, ipss);
4989 		if (cur == NULL)
4990 			return (NULL);
4991 		/* Fill in lifetimes if and only if AH didn't already... */
4992 		if (!ipp->ipp_use_ah)
4993 			ipsecesp_fill_defs(ecomb, ns);
4994 	}
4995 
4996 	return (cur);
4997 }
4998 
4999 /*
5000  * Construct an extended ACQUIRE message based on a selector and the resulting
5001  * IPsec action.
5002  *
5003  * NOTE: This is used by both inverse ACQUIRE and actual ACQUIRE
5004  * generation. As a consequence, expect this function to evolve
5005  * rapidly.
5006  */
5007 static mblk_t *
5008 sadb_extended_acquire(ipsec_selector_t *sel, ipsec_policy_t *pol,
5009     ipsec_action_t *act, boolean_t tunnel_mode, uint32_t seq, uint32_t pid,
5010     netstack_t *ns)
5011 {
5012 	mblk_t *mp;
5013 	sadb_msg_t *samsg;
5014 	uint8_t *start, *cur, *end;
5015 	uint32_t *saddrptr, *daddrptr;
5016 	sa_family_t af;
5017 	sadb_prop_t *eprop;
5018 	ipsec_action_t *ap, *an;
5019 	ipsec_selkey_t *ipsl;
5020 	uint8_t proto, pfxlen;
5021 	uint16_t lport, rport;
5022 	uint32_t kmp, kmc;
5023 
5024 	/*
5025 	 * Find the action we want sooner rather than later..
5026 	 */
5027 	an = NULL;
5028 	if (pol == NULL) {
5029 		ap = act;
5030 	} else {
5031 		ap = pol->ipsp_act;
5032 
5033 		if (ap != NULL)
5034 			an = ap->ipa_next;
5035 	}
5036 
5037 	/*
5038 	 * Just take a swag for the allocation for now.	 We can always
5039 	 * alter it later.
5040 	 */
5041 #define	SADB_EXTENDED_ACQUIRE_SIZE	4096
5042 	mp = allocb(SADB_EXTENDED_ACQUIRE_SIZE, BPRI_HI);
5043 	if (mp == NULL)
5044 		return (NULL);
5045 
5046 	start = mp->b_rptr;
5047 	end = start + SADB_EXTENDED_ACQUIRE_SIZE;
5048 
5049 	cur = start;
5050 
5051 	samsg = (sadb_msg_t *)cur;
5052 	cur += sizeof (*samsg);
5053 
5054 	samsg->sadb_msg_version = PF_KEY_V2;
5055 	samsg->sadb_msg_type = SADB_ACQUIRE;
5056 	samsg->sadb_msg_errno = 0;
5057 	samsg->sadb_msg_reserved = 0;
5058 	samsg->sadb_msg_satype = 0;
5059 	samsg->sadb_msg_seq = seq;
5060 	samsg->sadb_msg_pid = pid;
5061 
5062 	if (tunnel_mode) {
5063 		/*
5064 		 * Form inner address extensions based NOT on the inner
5065 		 * selectors (i.e. the packet data), but on the policy's
5066 		 * selector key (i.e. the policy's selector information).
5067 		 *
5068 		 * NOTE:  The position of IPv4 and IPv6 addresses is the
5069 		 * same in ipsec_selkey_t (unless the compiler does very
5070 		 * strange things with unions, consult your local C language
5071 		 * lawyer for details).
5072 		 */
5073 		ipsl = &(pol->ipsp_sel->ipsl_key);
5074 		if (ipsl->ipsl_valid & IPSL_IPV4) {
5075 			af = AF_INET;
5076 			ASSERT(sel->ips_protocol == IPPROTO_ENCAP);
5077 			ASSERT(!(ipsl->ipsl_valid & IPSL_IPV6));
5078 		} else {
5079 			af = AF_INET6;
5080 			ASSERT(sel->ips_protocol == IPPROTO_IPV6);
5081 			ASSERT(ipsl->ipsl_valid & IPSL_IPV6);
5082 		}
5083 
5084 		if (ipsl->ipsl_valid & IPSL_LOCAL_ADDR) {
5085 			saddrptr = (uint32_t *)(&ipsl->ipsl_local);
5086 			pfxlen = ipsl->ipsl_local_pfxlen;
5087 		} else {
5088 			saddrptr = (uint32_t *)(&ipv6_all_zeros);
5089 			pfxlen = 0;
5090 		}
5091 		/* XXX What about ICMP type/code? */
5092 		lport = (ipsl->ipsl_valid & IPSL_LOCAL_PORT) ?
5093 		    ipsl->ipsl_lport : 0;
5094 		proto = (ipsl->ipsl_valid & IPSL_PROTOCOL) ?
5095 		    ipsl->ipsl_proto : 0;
5096 
5097 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
5098 		    af, saddrptr, lport, proto, pfxlen);
5099 		if (cur == NULL) {
5100 			freeb(mp);
5101 			return (NULL);
5102 		}
5103 
5104 		if (ipsl->ipsl_valid & IPSL_REMOTE_ADDR) {
5105 			daddrptr = (uint32_t *)(&ipsl->ipsl_remote);
5106 			pfxlen = ipsl->ipsl_remote_pfxlen;
5107 		} else {
5108 			daddrptr = (uint32_t *)(&ipv6_all_zeros);
5109 			pfxlen = 0;
5110 		}
5111 		/* XXX What about ICMP type/code? */
5112 		rport = (ipsl->ipsl_valid & IPSL_REMOTE_PORT) ?
5113 		    ipsl->ipsl_rport : 0;
5114 
5115 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
5116 		    af, daddrptr, rport, proto, pfxlen);
5117 		if (cur == NULL) {
5118 			freeb(mp);
5119 			return (NULL);
5120 		}
5121 		/*
5122 		 * TODO  - if we go to 3408's dream of transport mode IP-in-IP
5123 		 * _with_ inner-packet address selectors, we'll need to further
5124 		 * distinguish tunnel mode here.  For now, having inner
5125 		 * addresses and/or ports is sufficient.
5126 		 *
5127 		 * Meanwhile, whack proto/ports to reflect IP-in-IP for the
5128 		 * outer addresses.
5129 		 */
5130 		proto = sel->ips_protocol;	/* Either _ENCAP or _IPV6 */
5131 		lport = rport = 0;
5132 	} else if ((ap != NULL) && (!ap->ipa_want_unique)) {
5133 		proto = 0;
5134 		lport = 0;
5135 		rport = 0;
5136 		if (pol != NULL) {
5137 			ipsl = &(pol->ipsp_sel->ipsl_key);
5138 			if (ipsl->ipsl_valid & IPSL_PROTOCOL)
5139 				proto = ipsl->ipsl_proto;
5140 			if (ipsl->ipsl_valid & IPSL_REMOTE_PORT)
5141 				rport = ipsl->ipsl_rport;
5142 			if (ipsl->ipsl_valid & IPSL_LOCAL_PORT)
5143 				lport = ipsl->ipsl_lport;
5144 		}
5145 	} else {
5146 		proto = sel->ips_protocol;
5147 		lport = sel->ips_local_port;
5148 		rport = sel->ips_remote_port;
5149 	}
5150 
5151 	af = sel->ips_isv4 ? AF_INET : AF_INET6;
5152 
5153 	/*
5154 	 * NOTE:  The position of IPv4 and IPv6 addresses is the same in
5155 	 * ipsec_selector_t.
5156 	 */
5157 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
5158 	    (uint32_t *)(&sel->ips_local_addr_v6), lport, proto, 0);
5159 
5160 	if (cur == NULL) {
5161 		freeb(mp);
5162 		return (NULL);
5163 	}
5164 
5165 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
5166 	    (uint32_t *)(&sel->ips_remote_addr_v6), rport, proto, 0);
5167 
5168 	if (cur == NULL) {
5169 		freeb(mp);
5170 		return (NULL);
5171 	}
5172 
5173 	/*
5174 	 * This section will change a lot as policy evolves.
5175 	 * For now, it'll be relatively simple.
5176 	 */
5177 	eprop = (sadb_prop_t *)cur;
5178 	cur += sizeof (*eprop);
5179 	if (cur > end) {
5180 		/* no space left */
5181 		freeb(mp);
5182 		return (NULL);
5183 	}
5184 
5185 	eprop->sadb_prop_exttype = SADB_X_EXT_EPROP;
5186 	eprop->sadb_x_prop_ereserved = 0;
5187 	eprop->sadb_x_prop_numecombs = 0;
5188 	eprop->sadb_prop_replay = 32;	/* default */
5189 
5190 	kmc = kmp = 0;
5191 
5192 	for (; ap != NULL; ap = an) {
5193 		an = (pol != NULL) ? ap->ipa_next : NULL;
5194 
5195 		/*
5196 		 * Skip non-IPsec policies
5197 		 */
5198 		if (ap->ipa_act.ipa_type != IPSEC_ACT_APPLY)
5199 			continue;
5200 
5201 		if (ap->ipa_act.ipa_apply.ipp_km_proto)
5202 			kmp = ap->ipa_act.ipa_apply.ipp_km_proto;
5203 		if (ap->ipa_act.ipa_apply.ipp_km_cookie)
5204 			kmc = ap->ipa_act.ipa_apply.ipp_km_cookie;
5205 		if (ap->ipa_act.ipa_apply.ipp_replay_depth) {
5206 			eprop->sadb_prop_replay =
5207 			    ap->ipa_act.ipa_apply.ipp_replay_depth;
5208 		}
5209 
5210 		cur = sadb_action_to_ecomb(cur, end, ap, ns);
5211 		if (cur == NULL) { /* no space */
5212 			freeb(mp);
5213 			return (NULL);
5214 		}
5215 		eprop->sadb_x_prop_numecombs++;
5216 	}
5217 
5218 	if (eprop->sadb_x_prop_numecombs == 0) {
5219 		/*
5220 		 * This will happen if we fail to find a policy
5221 		 * allowing for IPsec processing.
5222 		 * Construct an error message.
5223 		 */
5224 		samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
5225 		samsg->sadb_msg_errno = ENOENT;
5226 		samsg->sadb_x_msg_diagnostic = 0;
5227 		return (mp);
5228 	}
5229 
5230 	if ((kmp != 0) || (kmc != 0)) {
5231 		cur = sadb_make_kmc_ext(cur, end, kmp, kmc);
5232 		if (cur == NULL) {
5233 			freeb(mp);
5234 			return (NULL);
5235 		}
5236 	}
5237 
5238 	eprop->sadb_prop_len = SADB_8TO64(cur - (uint8_t *)eprop);
5239 	samsg->sadb_msg_len = SADB_8TO64(cur - start);
5240 	mp->b_wptr = cur;
5241 
5242 	return (mp);
5243 }
5244 
5245 /*
5246  * Generic setup of an RFC 2367 ACQUIRE message.  Caller sets satype.
5247  *
5248  * NOTE: This function acquires alg_lock as a side-effect if-and-only-if we
5249  * succeed (i.e. return non-NULL).  Caller MUST release it.  This is to
5250  * maximize code consolidation while preventing algorithm changes from messing
5251  * with the callers finishing touches on the ACQUIRE itself.
5252  */
5253 mblk_t *
5254 sadb_setup_acquire(ipsacq_t *acqrec, uint8_t satype, ipsec_stack_t *ipss)
5255 {
5256 	uint_t allocsize;
5257 	mblk_t *pfkeymp, *msgmp;
5258 	sa_family_t af;
5259 	uint8_t *cur, *end;
5260 	sadb_msg_t *samsg;
5261 	uint16_t sport_typecode;
5262 	uint16_t dport_typecode;
5263 	uint8_t check_proto;
5264 	boolean_t tunnel_mode = (acqrec->ipsacq_inneraddrfam != 0);
5265 
5266 	ASSERT(MUTEX_HELD(&acqrec->ipsacq_lock));
5267 
5268 	pfkeymp = sadb_keysock_out(0);
5269 	if (pfkeymp == NULL)
5270 		return (NULL);
5271 
5272 	/*
5273 	 * First, allocate a basic ACQUIRE message
5274 	 */
5275 	allocsize = sizeof (sadb_msg_t) + sizeof (sadb_address_t) +
5276 	    sizeof (sadb_address_t) + sizeof (sadb_prop_t);
5277 
5278 	/* Make sure there's enough to cover both AF_INET and AF_INET6. */
5279 	allocsize += 2 * sizeof (struct sockaddr_in6);
5280 
5281 	mutex_enter(&ipss->ipsec_alg_lock);
5282 	/* NOTE:  The lock is now held through to this function's return. */
5283 	allocsize += ipss->ipsec_nalgs[IPSEC_ALG_AUTH] *
5284 	    ipss->ipsec_nalgs[IPSEC_ALG_ENCR] * sizeof (sadb_comb_t);
5285 
5286 	if (tunnel_mode) {
5287 		/* Tunnel mode! */
5288 		allocsize += 2 * sizeof (sadb_address_t);
5289 		/* Enough to cover both AF_INET and AF_INET6. */
5290 		allocsize += 2 * sizeof (struct sockaddr_in6);
5291 	}
5292 
5293 	msgmp = allocb(allocsize, BPRI_HI);
5294 	if (msgmp == NULL) {
5295 		freeb(pfkeymp);
5296 		mutex_exit(&ipss->ipsec_alg_lock);
5297 		return (NULL);
5298 	}
5299 
5300 	pfkeymp->b_cont = msgmp;
5301 	cur = msgmp->b_rptr;
5302 	end = cur + allocsize;
5303 	samsg = (sadb_msg_t *)cur;
5304 	cur += sizeof (sadb_msg_t);
5305 
5306 	af = acqrec->ipsacq_addrfam;
5307 	switch (af) {
5308 	case AF_INET:
5309 		check_proto = IPPROTO_ICMP;
5310 		break;
5311 	case AF_INET6:
5312 		check_proto = IPPROTO_ICMPV6;
5313 		break;
5314 	default:
5315 		/* This should never happen unless we have kernel bugs. */
5316 		cmn_err(CE_WARN,
5317 		    "sadb_setup_acquire:  corrupt ACQUIRE record.\n");
5318 		ASSERT(0);
5319 		mutex_exit(&ipss->ipsec_alg_lock);
5320 		return (NULL);
5321 	}
5322 
5323 	samsg->sadb_msg_version = PF_KEY_V2;
5324 	samsg->sadb_msg_type = SADB_ACQUIRE;
5325 	samsg->sadb_msg_satype = satype;
5326 	samsg->sadb_msg_errno = 0;
5327 	samsg->sadb_msg_pid = 0;
5328 	samsg->sadb_msg_reserved = 0;
5329 	samsg->sadb_msg_seq = acqrec->ipsacq_seq;
5330 
5331 	ASSERT(MUTEX_HELD(&acqrec->ipsacq_lock));
5332 
5333 	if ((acqrec->ipsacq_proto == check_proto) || tunnel_mode) {
5334 		sport_typecode = dport_typecode = 0;
5335 	} else {
5336 		sport_typecode = acqrec->ipsacq_srcport;
5337 		dport_typecode = acqrec->ipsacq_dstport;
5338 	}
5339 
5340 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
5341 	    acqrec->ipsacq_srcaddr, sport_typecode, acqrec->ipsacq_proto, 0);
5342 
5343 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
5344 	    acqrec->ipsacq_dstaddr, dport_typecode, acqrec->ipsacq_proto, 0);
5345 
5346 	if (tunnel_mode) {
5347 		sport_typecode = acqrec->ipsacq_srcport;
5348 		dport_typecode = acqrec->ipsacq_dstport;
5349 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
5350 		    acqrec->ipsacq_inneraddrfam, acqrec->ipsacq_innersrc,
5351 		    sport_typecode, acqrec->ipsacq_inner_proto,
5352 		    acqrec->ipsacq_innersrcpfx);
5353 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
5354 		    acqrec->ipsacq_inneraddrfam, acqrec->ipsacq_innerdst,
5355 		    dport_typecode, acqrec->ipsacq_inner_proto,
5356 		    acqrec->ipsacq_innerdstpfx);
5357 	}
5358 
5359 	/* XXX Insert identity information here. */
5360 
5361 	/* XXXMLS Insert sensitivity information here. */
5362 
5363 	if (cur != NULL)
5364 		samsg->sadb_msg_len = SADB_8TO64(cur - msgmp->b_rptr);
5365 	else
5366 		mutex_exit(&ipss->ipsec_alg_lock);
5367 
5368 	return (pfkeymp);
5369 }
5370 
5371 /*
5372  * Given an SADB_GETSPI message, find an appropriately ranged SA and
5373  * allocate an SA.  If there are message improprieties, return (ipsa_t *)-1.
5374  * If there was a memory allocation error, return NULL.	 (Assume NULL !=
5375  * (ipsa_t *)-1).
5376  *
5377  * master_spi is passed in host order.
5378  */
5379 ipsa_t *
5380 sadb_getspi(keysock_in_t *ksi, uint32_t master_spi, int *diagnostic,
5381     netstack_t *ns)
5382 {
5383 	sadb_address_t *src =
5384 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC],
5385 	    *dst = (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
5386 	sadb_spirange_t *range =
5387 	    (sadb_spirange_t *)ksi->ks_in_extv[SADB_EXT_SPIRANGE];
5388 	struct sockaddr_in *ssa, *dsa;
5389 	struct sockaddr_in6 *ssa6, *dsa6;
5390 	uint32_t *srcaddr, *dstaddr;
5391 	sa_family_t af;
5392 	uint32_t add, min, max;
5393 
5394 	if (src == NULL) {
5395 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
5396 		return ((ipsa_t *)-1);
5397 	}
5398 	if (dst == NULL) {
5399 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
5400 		return ((ipsa_t *)-1);
5401 	}
5402 	if (range == NULL) {
5403 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_RANGE;
5404 		return ((ipsa_t *)-1);
5405 	}
5406 
5407 	min = ntohl(range->sadb_spirange_min);
5408 	max = ntohl(range->sadb_spirange_max);
5409 	dsa = (struct sockaddr_in *)(dst + 1);
5410 	dsa6 = (struct sockaddr_in6 *)dsa;
5411 
5412 	ssa = (struct sockaddr_in *)(src + 1);
5413 	ssa6 = (struct sockaddr_in6 *)ssa;
5414 	ASSERT(dsa->sin_family == ssa->sin_family);
5415 
5416 	srcaddr = ALL_ZEROES_PTR;
5417 	af = dsa->sin_family;
5418 	switch (af) {
5419 	case AF_INET:
5420 		if (src != NULL)
5421 			srcaddr = (uint32_t *)(&ssa->sin_addr);
5422 		dstaddr = (uint32_t *)(&dsa->sin_addr);
5423 		break;
5424 	case AF_INET6:
5425 		if (src != NULL)
5426 			srcaddr = (uint32_t *)(&ssa6->sin6_addr);
5427 		dstaddr = (uint32_t *)(&dsa6->sin6_addr);
5428 		break;
5429 	default:
5430 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_DST_AF;
5431 		return ((ipsa_t *)-1);
5432 	}
5433 
5434 	if (master_spi < min || master_spi > max) {
5435 		/* Return a random value in the range. */
5436 		(void) random_get_pseudo_bytes((uint8_t *)&add, sizeof (add));
5437 		master_spi = min + (add % (max - min + 1));
5438 	}
5439 
5440 	/*
5441 	 * Since master_spi is passed in host order, we need to htonl() it
5442 	 * for the purposes of creating a new SA.
5443 	 */
5444 	return (sadb_makelarvalassoc(htonl(master_spi), srcaddr, dstaddr, af,
5445 	    ns));
5446 }
5447 
5448 /*
5449  *
5450  * Locate an ACQUIRE and nuke it.  If I have an samsg that's larger than the
5451  * base header, just ignore it.	 Otherwise, lock down the whole ACQUIRE list
5452  * and scan for the sequence number in question.  I may wish to accept an
5453  * address pair with it, for easier searching.
5454  *
5455  * Caller frees the message, so we don't have to here.
5456  *
5457  * NOTE:	The ip_q parameter may be used in the future for ACQUIRE
5458  *		failures.
5459  */
5460 /* ARGSUSED */
5461 void
5462 sadb_in_acquire(sadb_msg_t *samsg, sadbp_t *sp, queue_t *ip_q, netstack_t *ns)
5463 {
5464 	int i;
5465 	ipsacq_t *acqrec;
5466 	iacqf_t *bucket;
5467 
5468 	/*
5469 	 * I only accept the base header for this!
5470 	 * Though to be honest, requiring the dst address would help
5471 	 * immensely.
5472 	 *
5473 	 * XXX	There are already cases where I can get the dst address.
5474 	 */
5475 	if (samsg->sadb_msg_len > SADB_8TO64(sizeof (*samsg)))
5476 		return;
5477 
5478 	/*
5479 	 * Using the samsg->sadb_msg_seq, find the ACQUIRE record, delete it,
5480 	 * (and in the future send a message to IP with the appropriate error
5481 	 * number).
5482 	 *
5483 	 * Q: Do I want to reject if pid != 0?
5484 	 */
5485 
5486 	for (i = 0; i < sp->s_v4.sdb_hashsize; i++) {
5487 		bucket = &sp->s_v4.sdb_acq[i];
5488 		mutex_enter(&bucket->iacqf_lock);
5489 		for (acqrec = bucket->iacqf_ipsacq; acqrec != NULL;
5490 		    acqrec = acqrec->ipsacq_next) {
5491 			if (samsg->sadb_msg_seq == acqrec->ipsacq_seq)
5492 				break;	/* for acqrec... loop. */
5493 		}
5494 		if (acqrec != NULL)
5495 			break;	/* for i = 0... loop. */
5496 
5497 		mutex_exit(&bucket->iacqf_lock);
5498 	}
5499 
5500 	if (acqrec == NULL) {
5501 		for (i = 0; i < sp->s_v6.sdb_hashsize; i++) {
5502 			bucket = &sp->s_v6.sdb_acq[i];
5503 			mutex_enter(&bucket->iacqf_lock);
5504 			for (acqrec = bucket->iacqf_ipsacq; acqrec != NULL;
5505 			    acqrec = acqrec->ipsacq_next) {
5506 				if (samsg->sadb_msg_seq == acqrec->ipsacq_seq)
5507 					break;	/* for acqrec... loop. */
5508 			}
5509 			if (acqrec != NULL)
5510 				break;	/* for i = 0... loop. */
5511 
5512 			mutex_exit(&bucket->iacqf_lock);
5513 		}
5514 	}
5515 
5516 
5517 	if (acqrec == NULL)
5518 		return;
5519 
5520 	/*
5521 	 * What do I do with the errno and IP?	I may need mp's services a
5522 	 * little more.	 See sadb_destroy_acquire() for future directions
5523 	 * beyond free the mblk chain on the acquire record.
5524 	 */
5525 
5526 	ASSERT(&bucket->iacqf_lock == acqrec->ipsacq_linklock);
5527 	sadb_destroy_acquire(acqrec, ns);
5528 	/* Have to exit mutex here, because of breaking out of for loop. */
5529 	mutex_exit(&bucket->iacqf_lock);
5530 }
5531 
5532 /*
5533  * The following functions work with the replay windows of an SA.  They assume
5534  * the ipsa->ipsa_replay_arr is an array of uint64_t, and that the bit vector
5535  * represents the highest sequence number packet received, and back
5536  * (ipsa->ipsa_replay_wsize) packets.
5537  */
5538 
5539 /*
5540  * Is the replay bit set?
5541  */
5542 static boolean_t
5543 ipsa_is_replay_set(ipsa_t *ipsa, uint32_t offset)
5544 {
5545 	uint64_t bit = (uint64_t)1 << (uint64_t)(offset & 63);
5546 
5547 	return ((bit & ipsa->ipsa_replay_arr[offset >> 6]) ? B_TRUE : B_FALSE);
5548 }
5549 
5550 /*
5551  * Shift the bits of the replay window over.
5552  */
5553 static void
5554 ipsa_shift_replay(ipsa_t *ipsa, uint32_t shift)
5555 {
5556 	int i;
5557 	int jump = ((shift - 1) >> 6) + 1;
5558 
5559 	if (shift == 0)
5560 		return;
5561 
5562 	for (i = (ipsa->ipsa_replay_wsize - 1) >> 6; i >= 0; i--) {
5563 		if (i + jump <= (ipsa->ipsa_replay_wsize - 1) >> 6) {
5564 			ipsa->ipsa_replay_arr[i + jump] |=
5565 			    ipsa->ipsa_replay_arr[i] >> (64 - (shift & 63));
5566 		}
5567 		ipsa->ipsa_replay_arr[i] <<= shift;
5568 	}
5569 }
5570 
5571 /*
5572  * Set a bit in the bit vector.
5573  */
5574 static void
5575 ipsa_set_replay(ipsa_t *ipsa, uint32_t offset)
5576 {
5577 	uint64_t bit = (uint64_t)1 << (uint64_t)(offset & 63);
5578 
5579 	ipsa->ipsa_replay_arr[offset >> 6] |= bit;
5580 }
5581 
5582 #define	SADB_MAX_REPLAY_VALUE 0xffffffff
5583 
5584 /*
5585  * Assume caller has NOT done ntohl() already on seq.  Check to see
5586  * if replay sequence number "seq" has been seen already.
5587  */
5588 boolean_t
5589 sadb_replay_check(ipsa_t *ipsa, uint32_t seq)
5590 {
5591 	boolean_t rc;
5592 	uint32_t diff;
5593 
5594 	if (ipsa->ipsa_replay_wsize == 0)
5595 		return (B_TRUE);
5596 
5597 	/*
5598 	 * NOTE:  I've already checked for 0 on the wire in sadb_replay_peek().
5599 	 */
5600 
5601 	/* Convert sequence number into host order before holding the mutex. */
5602 	seq = ntohl(seq);
5603 
5604 	mutex_enter(&ipsa->ipsa_lock);
5605 
5606 	/* Initialize inbound SA's ipsa_replay field to last one received. */
5607 	if (ipsa->ipsa_replay == 0)
5608 		ipsa->ipsa_replay = 1;
5609 
5610 	if (seq > ipsa->ipsa_replay) {
5611 		/*
5612 		 * I have received a new "highest value received".  Shift
5613 		 * the replay window over.
5614 		 */
5615 		diff = seq - ipsa->ipsa_replay;
5616 		if (diff < ipsa->ipsa_replay_wsize) {
5617 			/* In replay window, shift bits over. */
5618 			ipsa_shift_replay(ipsa, diff);
5619 		} else {
5620 			/* WAY FAR AHEAD, clear bits and start again. */
5621 			bzero(ipsa->ipsa_replay_arr,
5622 			    sizeof (ipsa->ipsa_replay_arr));
5623 		}
5624 		ipsa_set_replay(ipsa, 0);
5625 		ipsa->ipsa_replay = seq;
5626 		rc = B_TRUE;
5627 		goto done;
5628 	}
5629 	diff = ipsa->ipsa_replay - seq;
5630 	if (diff >= ipsa->ipsa_replay_wsize || ipsa_is_replay_set(ipsa, diff)) {
5631 		rc = B_FALSE;
5632 		goto done;
5633 	}
5634 	/* Set this packet as seen. */
5635 	ipsa_set_replay(ipsa, diff);
5636 
5637 	rc = B_TRUE;
5638 done:
5639 	mutex_exit(&ipsa->ipsa_lock);
5640 	return (rc);
5641 }
5642 
5643 /*
5644  * "Peek" and see if we should even bother going through the effort of
5645  * running an authentication check on the sequence number passed in.
5646  * this takes into account packets that are below the replay window,
5647  * and collisions with already replayed packets.  Return B_TRUE if it
5648  * is okay to proceed, B_FALSE if this packet should be dropped immediately.
5649  * Assume same byte-ordering as sadb_replay_check.
5650  */
5651 boolean_t
5652 sadb_replay_peek(ipsa_t *ipsa, uint32_t seq)
5653 {
5654 	boolean_t rc = B_FALSE;
5655 	uint32_t diff;
5656 
5657 	if (ipsa->ipsa_replay_wsize == 0)
5658 		return (B_TRUE);
5659 
5660 	/*
5661 	 * 0 is 0, regardless of byte order... :)
5662 	 *
5663 	 * If I get 0 on the wire (and there is a replay window) then the
5664 	 * sender most likely wrapped.	This ipsa may need to be marked or
5665 	 * something.
5666 	 */
5667 	if (seq == 0)
5668 		return (B_FALSE);
5669 
5670 	seq = ntohl(seq);
5671 	mutex_enter(&ipsa->ipsa_lock);
5672 	if (seq < ipsa->ipsa_replay - ipsa->ipsa_replay_wsize &&
5673 	    ipsa->ipsa_replay >= ipsa->ipsa_replay_wsize)
5674 		goto done;
5675 
5676 	/*
5677 	 * If I've hit 0xffffffff, then quite honestly, I don't need to
5678 	 * bother with formalities.  I'm not accepting any more packets
5679 	 * on this SA.
5680 	 */
5681 	if (ipsa->ipsa_replay == SADB_MAX_REPLAY_VALUE) {
5682 		/*
5683 		 * Since we're already holding the lock, update the
5684 		 * expire time ala. sadb_replay_delete() and return.
5685 		 */
5686 		ipsa->ipsa_hardexpiretime = (time_t)1;
5687 		goto done;
5688 	}
5689 
5690 	if (seq <= ipsa->ipsa_replay) {
5691 		/*
5692 		 * This seq is in the replay window.  I'm not below it,
5693 		 * because I already checked for that above!
5694 		 */
5695 		diff = ipsa->ipsa_replay - seq;
5696 		if (ipsa_is_replay_set(ipsa, diff))
5697 			goto done;
5698 	}
5699 	/* Else return B_TRUE, I'm going to advance the window. */
5700 
5701 	rc = B_TRUE;
5702 done:
5703 	mutex_exit(&ipsa->ipsa_lock);
5704 	return (rc);
5705 }
5706 
5707 /*
5708  * Delete a single SA.
5709  *
5710  * For now, use the quick-and-dirty trick of making the association's
5711  * hard-expire lifetime (time_t)1, ensuring deletion by the *_ager().
5712  */
5713 void
5714 sadb_replay_delete(ipsa_t *assoc)
5715 {
5716 	mutex_enter(&assoc->ipsa_lock);
5717 	assoc->ipsa_hardexpiretime = (time_t)1;
5718 	mutex_exit(&assoc->ipsa_lock);
5719 }
5720 
5721 /*
5722  * Given a queue that presumably points to IP, send a T_BIND_REQ for _proto_
5723  * down.  The caller will handle the T_BIND_ACK locally.
5724  */
5725 boolean_t
5726 sadb_t_bind_req(queue_t *q, int proto)
5727 {
5728 	struct T_bind_req *tbr;
5729 	mblk_t *mp;
5730 
5731 	mp = allocb(sizeof (struct T_bind_req) + 1, BPRI_HI);
5732 	if (mp == NULL) {
5733 		/* cmn_err(CE_WARN, */
5734 		/* "sadb_t_bind_req(%d): couldn't allocate mblk\n", proto); */
5735 		return (B_FALSE);
5736 	}
5737 	mp->b_datap->db_type = M_PCPROTO;
5738 	tbr = (struct T_bind_req *)mp->b_rptr;
5739 	mp->b_wptr += sizeof (struct T_bind_req);
5740 	tbr->PRIM_type = T_BIND_REQ;
5741 	tbr->ADDR_length = 0;
5742 	tbr->ADDR_offset = 0;
5743 	tbr->CONIND_number = 0;
5744 	*mp->b_wptr = (uint8_t)proto;
5745 	mp->b_wptr++;
5746 
5747 	putnext(q, mp);
5748 	return (B_TRUE);
5749 }
5750 
5751 /*
5752  * Special front-end to ipsec_rl_strlog() dealing with SA failure.
5753  * this is designed to take only a format string with "* %x * %s *", so
5754  * that "spi" is printed first, then "addr" is converted using inet_pton().
5755  *
5756  * This is abstracted out to save the stack space for only when inet_pton()
5757  * is called.  Make sure "spi" is in network order; it usually is when this
5758  * would get called.
5759  */
5760 void
5761 ipsec_assocfailure(short mid, short sid, char level, ushort_t sl, char *fmt,
5762     uint32_t spi, void *addr, int af, netstack_t *ns)
5763 {
5764 	char buf[INET6_ADDRSTRLEN];
5765 
5766 	ASSERT(af == AF_INET6 || af == AF_INET);
5767 
5768 	ipsec_rl_strlog(ns, mid, sid, level, sl, fmt, ntohl(spi),
5769 	    inet_ntop(af, addr, buf, sizeof (buf)));
5770 }
5771 
5772 /*
5773  * Fills in a reference to the policy, if any, from the conn, in *ppp
5774  * Releases a reference to the passed conn_t.
5775  */
5776 static void
5777 ipsec_conn_pol(ipsec_selector_t *sel, conn_t *connp, ipsec_policy_t **ppp)
5778 {
5779 	ipsec_policy_t	*pp;
5780 	ipsec_latch_t	*ipl = connp->conn_latch;
5781 
5782 	if ((ipl != NULL) && (ipl->ipl_out_policy != NULL)) {
5783 		pp = ipl->ipl_out_policy;
5784 		IPPOL_REFHOLD(pp);
5785 	} else {
5786 		pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, sel,
5787 		    connp->conn_netstack);
5788 	}
5789 	*ppp = pp;
5790 	CONN_DEC_REF(connp);
5791 }
5792 
5793 /*
5794  * The following functions scan through active conn_t structures
5795  * and return a reference to the best-matching policy it can find.
5796  * Caller must release the reference.
5797  */
5798 static void
5799 ipsec_udp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst)
5800 {
5801 	connf_t *connfp;
5802 	conn_t *connp = NULL;
5803 	ipsec_selector_t portonly;
5804 
5805 	bzero((void*)&portonly, sizeof (portonly));
5806 
5807 	if (sel->ips_local_port == 0)
5808 		return;
5809 
5810 	connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(sel->ips_local_port,
5811 	    ipst)];
5812 	mutex_enter(&connfp->connf_lock);
5813 
5814 	if (sel->ips_isv4) {
5815 		connp = connfp->connf_head;
5816 		while (connp != NULL) {
5817 			if (IPCL_UDP_MATCH(connp, sel->ips_local_port,
5818 			    sel->ips_local_addr_v4, sel->ips_remote_port,
5819 			    sel->ips_remote_addr_v4))
5820 				break;
5821 			connp = connp->conn_next;
5822 		}
5823 
5824 		if (connp == NULL) {
5825 			/* Try port-only match in IPv6. */
5826 			portonly.ips_local_port = sel->ips_local_port;
5827 			sel = &portonly;
5828 		}
5829 	}
5830 
5831 	if (connp == NULL) {
5832 		connp = connfp->connf_head;
5833 		while (connp != NULL) {
5834 			if (IPCL_UDP_MATCH_V6(connp, sel->ips_local_port,
5835 			    sel->ips_local_addr_v6, sel->ips_remote_port,
5836 			    sel->ips_remote_addr_v6))
5837 				break;
5838 			connp = connp->conn_next;
5839 		}
5840 
5841 		if (connp == NULL) {
5842 			mutex_exit(&connfp->connf_lock);
5843 			return;
5844 		}
5845 	}
5846 
5847 	CONN_INC_REF(connp);
5848 	mutex_exit(&connfp->connf_lock);
5849 
5850 	ipsec_conn_pol(sel, connp, ppp);
5851 }
5852 
5853 static conn_t *
5854 ipsec_find_listen_conn(uint16_t *pptr, ipsec_selector_t *sel, ip_stack_t *ipst)
5855 {
5856 	connf_t *connfp;
5857 	conn_t *connp = NULL;
5858 	const in6_addr_t *v6addrmatch = &sel->ips_local_addr_v6;
5859 
5860 	if (sel->ips_local_port == 0)
5861 		return (NULL);
5862 
5863 	connfp = &ipst->ips_ipcl_bind_fanout[
5864 	    IPCL_BIND_HASH(sel->ips_local_port, ipst)];
5865 	mutex_enter(&connfp->connf_lock);
5866 
5867 	if (sel->ips_isv4) {
5868 		connp = connfp->connf_head;
5869 		while (connp != NULL) {
5870 			if (IPCL_BIND_MATCH(connp, IPPROTO_TCP,
5871 			    sel->ips_local_addr_v4, pptr[1]))
5872 				break;
5873 			connp = connp->conn_next;
5874 		}
5875 
5876 		if (connp == NULL) {
5877 			/* Match to all-zeroes. */
5878 			v6addrmatch = &ipv6_all_zeros;
5879 		}
5880 	}
5881 
5882 	if (connp == NULL) {
5883 		connp = connfp->connf_head;
5884 		while (connp != NULL) {
5885 			if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP,
5886 			    *v6addrmatch, pptr[1]))
5887 				break;
5888 			connp = connp->conn_next;
5889 		}
5890 
5891 		if (connp == NULL) {
5892 			mutex_exit(&connfp->connf_lock);
5893 			return (NULL);
5894 		}
5895 	}
5896 
5897 	CONN_INC_REF(connp);
5898 	mutex_exit(&connfp->connf_lock);
5899 	return (connp);
5900 }
5901 
5902 static void
5903 ipsec_tcp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst)
5904 {
5905 	connf_t 	*connfp;
5906 	conn_t		*connp;
5907 	uint32_t	ports;
5908 	uint16_t	*pptr = (uint16_t *)&ports;
5909 
5910 	/*
5911 	 * Find TCP state in the following order:
5912 	 * 1.) Connected conns.
5913 	 * 2.) Listeners.
5914 	 *
5915 	 * Even though #2 will be the common case for inbound traffic, only
5916 	 * following this order insures correctness.
5917 	 */
5918 
5919 	if (sel->ips_local_port == 0)
5920 		return;
5921 
5922 	/*
5923 	 * 0 should be fport, 1 should be lport.  SRC is the local one here.
5924 	 * See ipsec_construct_inverse_acquire() for details.
5925 	 */
5926 	pptr[0] = sel->ips_remote_port;
5927 	pptr[1] = sel->ips_local_port;
5928 
5929 	connfp = &ipst->ips_ipcl_conn_fanout[
5930 	    IPCL_CONN_HASH(sel->ips_remote_addr_v4, ports, ipst)];
5931 	mutex_enter(&connfp->connf_lock);
5932 	connp = connfp->connf_head;
5933 
5934 	if (sel->ips_isv4) {
5935 		while (connp != NULL) {
5936 			if (IPCL_CONN_MATCH(connp, IPPROTO_TCP,
5937 			    sel->ips_remote_addr_v4, sel->ips_local_addr_v4,
5938 			    ports))
5939 				break;
5940 			connp = connp->conn_next;
5941 		}
5942 	} else {
5943 		while (connp != NULL) {
5944 			if (IPCL_CONN_MATCH_V6(connp, IPPROTO_TCP,
5945 			    sel->ips_remote_addr_v6, sel->ips_local_addr_v6,
5946 			    ports))
5947 				break;
5948 			connp = connp->conn_next;
5949 		}
5950 	}
5951 
5952 	if (connp != NULL) {
5953 		CONN_INC_REF(connp);
5954 		mutex_exit(&connfp->connf_lock);
5955 	} else {
5956 		mutex_exit(&connfp->connf_lock);
5957 
5958 		/* Try the listen hash. */
5959 		if ((connp = ipsec_find_listen_conn(pptr, sel, ipst)) == NULL)
5960 			return;
5961 	}
5962 
5963 	ipsec_conn_pol(sel, connp, ppp);
5964 }
5965 
5966 static void
5967 ipsec_sctp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
5968     ip_stack_t *ipst)
5969 {
5970 	conn_t		*connp;
5971 	uint32_t	ports;
5972 	uint16_t	*pptr = (uint16_t *)&ports;
5973 
5974 	/*
5975 	 * Find SCP state in the following order:
5976 	 * 1.) Connected conns.
5977 	 * 2.) Listeners.
5978 	 *
5979 	 * Even though #2 will be the common case for inbound traffic, only
5980 	 * following this order insures correctness.
5981 	 */
5982 
5983 	if (sel->ips_local_port == 0)
5984 		return;
5985 
5986 	/*
5987 	 * 0 should be fport, 1 should be lport.  SRC is the local one here.
5988 	 * See ipsec_construct_inverse_acquire() for details.
5989 	 */
5990 	pptr[0] = sel->ips_remote_port;
5991 	pptr[1] = sel->ips_local_port;
5992 
5993 	if (sel->ips_isv4) {
5994 		in6_addr_t	src, dst;
5995 
5996 		IN6_IPADDR_TO_V4MAPPED(sel->ips_remote_addr_v4, &dst);
5997 		IN6_IPADDR_TO_V4MAPPED(sel->ips_local_addr_v4, &src);
5998 		connp = sctp_find_conn(&dst, &src, ports, ALL_ZONES,
5999 		    ipst->ips_netstack->netstack_sctp);
6000 	} else {
6001 		connp = sctp_find_conn(&sel->ips_remote_addr_v6,
6002 		    &sel->ips_local_addr_v6, ports, ALL_ZONES,
6003 		    ipst->ips_netstack->netstack_sctp);
6004 	}
6005 	if (connp == NULL)
6006 		return;
6007 	ipsec_conn_pol(sel, connp, ppp);
6008 }
6009 
6010 /*
6011  * Fill in a query for the SPD (in "sel") using two PF_KEY address extensions.
6012  * Returns 0 or errno, and always sets *diagnostic to something appropriate
6013  * to PF_KEY.
6014  *
6015  * NOTE:  For right now, this function (and ipsec_selector_t for that matter),
6016  * ignore prefix lengths in the address extension.  Since we match on first-
6017  * entered policies, this shouldn't matter.  Also, since we normalize prefix-
6018  * set addresses to mask out the lower bits, we should get a suitable search
6019  * key for the SPD anyway.  This is the function to change if the assumption
6020  * about suitable search keys is wrong.
6021  */
6022 static int
6023 ipsec_get_inverse_acquire_sel(ipsec_selector_t *sel, sadb_address_t *srcext,
6024     sadb_address_t *dstext, int *diagnostic)
6025 {
6026 	struct sockaddr_in *src, *dst;
6027 	struct sockaddr_in6 *src6, *dst6;
6028 
6029 	*diagnostic = 0;
6030 
6031 	bzero(sel, sizeof (*sel));
6032 	sel->ips_protocol = srcext->sadb_address_proto;
6033 	dst = (struct sockaddr_in *)(dstext + 1);
6034 	if (dst->sin_family == AF_INET6) {
6035 		dst6 = (struct sockaddr_in6 *)dst;
6036 		src6 = (struct sockaddr_in6 *)(srcext + 1);
6037 		if (src6->sin6_family != AF_INET6) {
6038 			*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6039 			return (EINVAL);
6040 		}
6041 		sel->ips_remote_addr_v6 = dst6->sin6_addr;
6042 		sel->ips_local_addr_v6 = src6->sin6_addr;
6043 		if (sel->ips_protocol == IPPROTO_ICMPV6) {
6044 			sel->ips_is_icmp_inv_acq = 1;
6045 		} else {
6046 			sel->ips_remote_port = dst6->sin6_port;
6047 			sel->ips_local_port = src6->sin6_port;
6048 		}
6049 		sel->ips_isv4 = B_FALSE;
6050 	} else {
6051 		src = (struct sockaddr_in *)(srcext + 1);
6052 		if (src->sin_family != AF_INET) {
6053 			*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6054 			return (EINVAL);
6055 		}
6056 		sel->ips_remote_addr_v4 = dst->sin_addr.s_addr;
6057 		sel->ips_local_addr_v4 = src->sin_addr.s_addr;
6058 		if (sel->ips_protocol == IPPROTO_ICMP) {
6059 			sel->ips_is_icmp_inv_acq = 1;
6060 		} else {
6061 			sel->ips_remote_port = dst->sin_port;
6062 			sel->ips_local_port = src->sin_port;
6063 		}
6064 		sel->ips_isv4 = B_TRUE;
6065 	}
6066 	return (0);
6067 }
6068 
6069 /*
6070  * We have encapsulation.
6071  * - Lookup tun_t by address and look for an associated
6072  *   tunnel policy
6073  * - If there are inner selectors
6074  *   - check ITPF_P_TUNNEL and ITPF_P_ACTIVE
6075  *   - Look up tunnel policy based on selectors
6076  * - Else
6077  *   - Sanity check the negotation
6078  *   - If appropriate, fall through to global policy
6079  */
6080 static int
6081 ipsec_tun_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6082     sadb_address_t *innsrcext, sadb_address_t *inndstext, ipsec_tun_pol_t *itp,
6083     int *diagnostic, netstack_t *ns)
6084 {
6085 	int err;
6086 	ipsec_policy_head_t *polhead;
6087 
6088 	/* Check for inner selectors and act appropriately */
6089 
6090 	if (innsrcext != NULL) {
6091 		/* Inner selectors present */
6092 		ASSERT(inndstext != NULL);
6093 		if ((itp == NULL) ||
6094 		    (itp->itp_flags & (ITPF_P_ACTIVE | ITPF_P_TUNNEL)) !=
6095 		    (ITPF_P_ACTIVE | ITPF_P_TUNNEL)) {
6096 			/*
6097 			 * If inner packet selectors, we must have negotiate
6098 			 * tunnel and active policy.  If the tunnel has
6099 			 * transport-mode policy set on it, or has no policy,
6100 			 * fail.
6101 			 */
6102 			return (ENOENT);
6103 		} else {
6104 			/*
6105 			 * Reset "sel" to indicate inner selectors.  Pass
6106 			 * inner PF_KEY address extensions for this to happen.
6107 			 */
6108 			err = ipsec_get_inverse_acquire_sel(sel,
6109 			    innsrcext, inndstext, diagnostic);
6110 			if (err != 0) {
6111 				ITP_REFRELE(itp, ns);
6112 				return (err);
6113 			}
6114 			/*
6115 			 * Now look for a tunnel policy based on those inner
6116 			 * selectors.  (Common code is below.)
6117 			 */
6118 		}
6119 	} else {
6120 		/* No inner selectors present */
6121 		if ((itp == NULL) || !(itp->itp_flags & ITPF_P_ACTIVE)) {
6122 			/*
6123 			 * Transport mode negotiation with no tunnel policy
6124 			 * configured - return to indicate a global policy
6125 			 * check is needed.
6126 			 */
6127 			if (itp != NULL) {
6128 				ITP_REFRELE(itp, ns);
6129 			}
6130 			return (0);
6131 		} else if (itp->itp_flags & ITPF_P_TUNNEL) {
6132 			/* Tunnel mode set with no inner selectors. */
6133 			ITP_REFRELE(itp, ns);
6134 			return (ENOENT);
6135 		}
6136 		/*
6137 		 * Else, this is a tunnel policy configured with ifconfig(1m)
6138 		 * or "negotiate transport" with ipsecconf(1m).  We have an
6139 		 * itp with policy set based on any match, so don't bother
6140 		 * changing fields in "sel".
6141 		 */
6142 	}
6143 
6144 	ASSERT(itp != NULL);
6145 	polhead = itp->itp_policy;
6146 	ASSERT(polhead != NULL);
6147 	rw_enter(&polhead->iph_lock, RW_READER);
6148 	*ppp = ipsec_find_policy_head(NULL, polhead,
6149 	    IPSEC_TYPE_INBOUND, sel, ns);
6150 	rw_exit(&polhead->iph_lock);
6151 	ITP_REFRELE(itp, ns);
6152 
6153 	/*
6154 	 * Don't default to global if we didn't find a matching policy entry.
6155 	 * Instead, send ENOENT, just like if we hit a transport-mode tunnel.
6156 	 */
6157 	if (*ppp == NULL)
6158 		return (ENOENT);
6159 
6160 	return (0);
6161 }
6162 
6163 static void
6164 ipsec_oth_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6165     ip_stack_t *ipst)
6166 {
6167 	boolean_t	isv4 = sel->ips_isv4;
6168 	connf_t		*connfp;
6169 	conn_t		*connp;
6170 
6171 	if (isv4) {
6172 		connfp = &ipst->ips_ipcl_proto_fanout[sel->ips_protocol];
6173 	} else {
6174 		connfp = &ipst->ips_ipcl_proto_fanout_v6[sel->ips_protocol];
6175 	}
6176 
6177 	mutex_enter(&connfp->connf_lock);
6178 	for (connp = connfp->connf_head; connp != NULL;
6179 	    connp = connp->conn_next) {
6180 		if (!((isv4 && !((connp->conn_src == 0 ||
6181 		    connp->conn_src == sel->ips_local_addr_v4) &&
6182 		    (connp->conn_rem == 0 ||
6183 		    connp->conn_rem == sel->ips_remote_addr_v4))) ||
6184 		    (!isv4 && !((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) ||
6185 		    IN6_ARE_ADDR_EQUAL(&connp->conn_srcv6,
6186 		    &sel->ips_local_addr_v6)) &&
6187 		    (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6) ||
6188 		    IN6_ARE_ADDR_EQUAL(&connp->conn_remv6,
6189 		    &sel->ips_remote_addr_v6)))))) {
6190 			break;
6191 		}
6192 	}
6193 	if (connp == NULL) {
6194 		mutex_exit(&connfp->connf_lock);
6195 		return;
6196 	}
6197 
6198 	CONN_INC_REF(connp);
6199 	mutex_exit(&connfp->connf_lock);
6200 
6201 	ipsec_conn_pol(sel, connp, ppp);
6202 }
6203 
6204 /*
6205  * Construct an inverse ACQUIRE reply based on:
6206  *
6207  * 1.) Current global policy.
6208  * 2.) An conn_t match depending on what all was passed in the extv[].
6209  * 3.) A tunnel's policy head.
6210  * ...
6211  * N.) Other stuff TBD (e.g. identities)
6212  *
6213  * If there is an error, set sadb_msg_errno and sadb_x_msg_diagnostic
6214  * in this function so the caller can extract them where appropriately.
6215  *
6216  * The SRC address is the local one - just like an outbound ACQUIRE message.
6217  */
6218 mblk_t *
6219 ipsec_construct_inverse_acquire(sadb_msg_t *samsg, sadb_ext_t *extv[],
6220     netstack_t *ns)
6221 {
6222 	int err;
6223 	int diagnostic;
6224 	sadb_address_t *srcext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_SRC],
6225 	    *dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST],
6226 	    *innsrcext = (sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_SRC],
6227 	    *inndstext = (sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_DST];
6228 	struct sockaddr_in6 *src, *dst;
6229 	struct sockaddr_in6 *isrc, *idst;
6230 	ipsec_tun_pol_t *itp = NULL;
6231 	ipsec_policy_t *pp = NULL;
6232 	ipsec_selector_t sel, isel;
6233 	mblk_t *retmp;
6234 	ip_stack_t	*ipst = ns->netstack_ip;
6235 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
6236 
6237 	/* Normalize addresses */
6238 	if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)srcext, 0, ns)
6239 	    == KS_IN_ADDR_UNKNOWN) {
6240 		err = EINVAL;
6241 		diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC;
6242 		goto bail;
6243 	}
6244 	src = (struct sockaddr_in6 *)(srcext + 1);
6245 	if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)dstext, 0, ns)
6246 	    == KS_IN_ADDR_UNKNOWN) {
6247 		err = EINVAL;
6248 		diagnostic = SADB_X_DIAGNOSTIC_BAD_DST;
6249 		goto bail;
6250 	}
6251 	dst = (struct sockaddr_in6 *)(dstext + 1);
6252 	if (src->sin6_family != dst->sin6_family) {
6253 		err = EINVAL;
6254 		diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6255 		goto bail;
6256 	}
6257 
6258 	/* Check for tunnel mode and act appropriately */
6259 	if (innsrcext != NULL) {
6260 		if (inndstext == NULL) {
6261 			err = EINVAL;
6262 			diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST;
6263 			goto bail;
6264 		}
6265 		if (sadb_addrcheck(NULL, (mblk_t *)samsg,
6266 		    (sadb_ext_t *)innsrcext, 0, ns) == KS_IN_ADDR_UNKNOWN) {
6267 			err = EINVAL;
6268 			diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_SRC;
6269 			goto bail;
6270 		}
6271 		isrc = (struct sockaddr_in6 *)(innsrcext + 1);
6272 		if (sadb_addrcheck(NULL, (mblk_t *)samsg,
6273 		    (sadb_ext_t *)inndstext, 0, ns) == KS_IN_ADDR_UNKNOWN) {
6274 			err = EINVAL;
6275 			diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_DST;
6276 			goto bail;
6277 		}
6278 		idst = (struct sockaddr_in6 *)(inndstext + 1);
6279 		if (isrc->sin6_family != idst->sin6_family) {
6280 			err = EINVAL;
6281 			diagnostic = SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH;
6282 			goto bail;
6283 		}
6284 		if (isrc->sin6_family != AF_INET &&
6285 		    isrc->sin6_family != AF_INET6) {
6286 			err = EINVAL;
6287 			diagnostic = SADB_X_DIAGNOSTIC_BAD_INNER_SRC_AF;
6288 			goto bail;
6289 		}
6290 	} else if (inndstext != NULL) {
6291 		err = EINVAL;
6292 		diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
6293 		goto bail;
6294 	}
6295 
6296 	/* Get selectors first, based on outer addresses */
6297 	err = ipsec_get_inverse_acquire_sel(&sel, srcext, dstext, &diagnostic);
6298 	if (err != 0)
6299 		goto bail;
6300 
6301 	/* Check for tunnel mode mismatches. */
6302 	if (innsrcext != NULL &&
6303 	    ((isrc->sin6_family == AF_INET &&
6304 	    sel.ips_protocol != IPPROTO_ENCAP && sel.ips_protocol != 0) ||
6305 	    (isrc->sin6_family == AF_INET6 &&
6306 	    sel.ips_protocol != IPPROTO_IPV6 && sel.ips_protocol != 0))) {
6307 		err = EPROTOTYPE;
6308 		goto bail;
6309 	}
6310 
6311 	/*
6312 	 * Okay, we have the addresses and other selector information.
6313 	 * Let's first find a conn...
6314 	 */
6315 	pp = NULL;
6316 	switch (sel.ips_protocol) {
6317 	case IPPROTO_TCP:
6318 		ipsec_tcp_pol(&sel, &pp, ipst);
6319 		break;
6320 	case IPPROTO_UDP:
6321 		ipsec_udp_pol(&sel, &pp, ipst);
6322 		break;
6323 	case IPPROTO_SCTP:
6324 		ipsec_sctp_pol(&sel, &pp, ipst);
6325 		break;
6326 	case IPPROTO_ENCAP:
6327 	case IPPROTO_IPV6:
6328 		rw_enter(&ipss->ipsec_itp_get_byaddr_rw_lock, RW_READER);
6329 		/*
6330 		 * Assume sel.ips_remote_addr_* has the right address at
6331 		 * that exact position.
6332 		 */
6333 		itp = ipss->ipsec_itp_get_byaddr(
6334 		    (uint32_t *)(&sel.ips_local_addr_v6),
6335 		    (uint32_t *)(&sel.ips_remote_addr_v6),
6336 		    src->sin6_family, ns);
6337 		rw_exit(&ipss->ipsec_itp_get_byaddr_rw_lock);
6338 		if (innsrcext == NULL) {
6339 			/*
6340 			 * Transport-mode tunnel, make sure we fake out isel
6341 			 * to contain something based on the outer protocol.
6342 			 */
6343 			bzero(&isel, sizeof (isel));
6344 			isel.ips_isv4 = (sel.ips_protocol == IPPROTO_ENCAP);
6345 		} /* Else isel is initialized by ipsec_tun_pol(). */
6346 		err = ipsec_tun_pol(&isel, &pp, innsrcext, inndstext, itp,
6347 		    &diagnostic, ns);
6348 		/*
6349 		 * NOTE:  isel isn't used for now, but in RFC 430x IPsec, it
6350 		 * may be.
6351 		 */
6352 		if (err != 0)
6353 			goto bail;
6354 		break;
6355 	default:
6356 		ipsec_oth_pol(&sel, &pp, ipst);
6357 		break;
6358 	}
6359 
6360 	/*
6361 	 * If we didn't find a matching conn_t or other policy head, take a
6362 	 * look in the global policy.
6363 	 */
6364 	if (pp == NULL) {
6365 		pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, NULL, NULL, &sel,
6366 		    ns);
6367 		if (pp == NULL) {
6368 			/* There's no global policy. */
6369 			err = ENOENT;
6370 			diagnostic = 0;
6371 			goto bail;
6372 		}
6373 	}
6374 
6375 	/*
6376 	 * Now that we have a policy entry/widget, construct an ACQUIRE
6377 	 * message based on that, fix fields where appropriate,
6378 	 * and return the message.
6379 	 */
6380 	retmp = sadb_extended_acquire(&sel, pp, NULL,
6381 	    (itp != NULL && (itp->itp_flags & ITPF_P_TUNNEL)),
6382 	    samsg->sadb_msg_seq, samsg->sadb_msg_pid, ns);
6383 	if (pp != NULL) {
6384 		IPPOL_REFRELE(pp, ns);
6385 	}
6386 	if (retmp != NULL) {
6387 		return (retmp);
6388 	} else {
6389 		err = ENOMEM;
6390 		diagnostic = 0;
6391 	}
6392 bail:
6393 	samsg->sadb_msg_errno = (uint8_t)err;
6394 	samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
6395 	return (NULL);
6396 }
6397 
6398 /*
6399  * ipsa_lpkt is a one-element queue, only manipulated by casptr within
6400  * the next two functions.
6401  *
6402  * These functions loop calling casptr() until the swap "happens",
6403  * turning a compare-and-swap op into an atomic swap operation.
6404  */
6405 
6406 /*
6407  * sadb_set_lpkt: Atomically swap in a value to ipsa->ipsa_lpkt and
6408  * freemsg the previous value.  free clue: freemsg(NULL) is safe.
6409  */
6410 
6411 void
6412 sadb_set_lpkt(ipsa_t *ipsa, mblk_t *npkt, netstack_t *ns)
6413 {
6414 	mblk_t *opkt;
6415 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
6416 
6417 	membar_producer();
6418 	do {
6419 		opkt = ipsa->ipsa_lpkt;
6420 	} while (casptr(&ipsa->ipsa_lpkt, opkt, npkt) != opkt);
6421 
6422 	ip_drop_packet(opkt, B_TRUE, NULL, NULL,
6423 	    DROPPER(ipss, ipds_sadb_inlarval_replace),
6424 	    &ipss->ipsec_sadb_dropper);
6425 }
6426 
6427 /*
6428  * sadb_clear_lpkt: Atomically clear ipsa->ipsa_lpkt and return the
6429  * previous value.
6430  */
6431 
6432 mblk_t *
6433 sadb_clear_lpkt(ipsa_t *ipsa)
6434 {
6435 	mblk_t *opkt;
6436 
6437 	do {
6438 		opkt = ipsa->ipsa_lpkt;
6439 	} while (casptr(&ipsa->ipsa_lpkt, opkt, NULL) != opkt);
6440 
6441 	return (opkt);
6442 }
6443 
6444 /*
6445  * Walker callback used by sadb_alg_update() to free/create crypto
6446  * context template when a crypto software provider is removed or
6447  * added.
6448  */
6449 
6450 struct sadb_update_alg_state {
6451 	ipsec_algtype_t alg_type;
6452 	uint8_t alg_id;
6453 	boolean_t is_added;
6454 };
6455 
6456 static void
6457 sadb_alg_update_cb(isaf_t *head, ipsa_t *entry, void *cookie)
6458 {
6459 	struct sadb_update_alg_state *update_state =
6460 	    (struct sadb_update_alg_state *)cookie;
6461 	crypto_ctx_template_t *ctx_tmpl = NULL;
6462 
6463 	ASSERT(MUTEX_HELD(&head->isaf_lock));
6464 
6465 	if (entry->ipsa_state == IPSA_STATE_LARVAL)
6466 		return;
6467 
6468 	mutex_enter(&entry->ipsa_lock);
6469 
6470 	switch (update_state->alg_type) {
6471 	case IPSEC_ALG_AUTH:
6472 		if (entry->ipsa_auth_alg == update_state->alg_id)
6473 			ctx_tmpl = &entry->ipsa_authtmpl;
6474 		break;
6475 	case IPSEC_ALG_ENCR:
6476 		if (entry->ipsa_encr_alg == update_state->alg_id)
6477 			ctx_tmpl = &entry->ipsa_encrtmpl;
6478 		break;
6479 	default:
6480 		ctx_tmpl = NULL;
6481 	}
6482 
6483 	if (ctx_tmpl == NULL) {
6484 		mutex_exit(&entry->ipsa_lock);
6485 		return;
6486 	}
6487 
6488 	/*
6489 	 * The context template of the SA may be affected by the change
6490 	 * of crypto provider.
6491 	 */
6492 	if (update_state->is_added) {
6493 		/* create the context template if not already done */
6494 		if (*ctx_tmpl == NULL) {
6495 			(void) ipsec_create_ctx_tmpl(entry,
6496 			    update_state->alg_type);
6497 		}
6498 	} else {
6499 		/*
6500 		 * The crypto provider was removed. If the context template
6501 		 * exists but it is no longer valid, free it.
6502 		 */
6503 		if (*ctx_tmpl != NULL)
6504 			ipsec_destroy_ctx_tmpl(entry, update_state->alg_type);
6505 	}
6506 
6507 	mutex_exit(&entry->ipsa_lock);
6508 }
6509 
6510 /*
6511  * Invoked by IP when an software crypto provider has been updated.
6512  * The type and id of the corresponding algorithm is passed as argument.
6513  * is_added is B_TRUE if the provider was added, B_FALSE if it was
6514  * removed. The function updates the SADB and free/creates the
6515  * context templates associated with SAs if needed.
6516  */
6517 
6518 #define	SADB_ALG_UPDATE_WALK(sadb, table) \
6519     sadb_walker((sadb).table, (sadb).sdb_hashsize, sadb_alg_update_cb, \
6520 	&update_state)
6521 
6522 void
6523 sadb_alg_update(ipsec_algtype_t alg_type, uint8_t alg_id, boolean_t is_added,
6524     netstack_t *ns)
6525 {
6526 	struct sadb_update_alg_state update_state;
6527 	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
6528 	ipsecesp_stack_t	*espstack = ns->netstack_ipsecesp;
6529 
6530 	update_state.alg_type = alg_type;
6531 	update_state.alg_id = alg_id;
6532 	update_state.is_added = is_added;
6533 
6534 	if (alg_type == IPSEC_ALG_AUTH) {
6535 		/* walk the AH tables only for auth. algorithm changes */
6536 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v4, sdb_of);
6537 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v4, sdb_if);
6538 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v6, sdb_of);
6539 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v6, sdb_if);
6540 	}
6541 
6542 	/* walk the ESP tables */
6543 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v4, sdb_of);
6544 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v4, sdb_if);
6545 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v6, sdb_of);
6546 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v6, sdb_if);
6547 }
6548 
6549 /*
6550  * Creates a context template for the specified SA. This function
6551  * is called when an SA is created and when a context template needs
6552  * to be created due to a change of software provider.
6553  */
6554 int
6555 ipsec_create_ctx_tmpl(ipsa_t *sa, ipsec_algtype_t alg_type)
6556 {
6557 	ipsec_alginfo_t *alg;
6558 	crypto_mechanism_t mech;
6559 	crypto_key_t *key;
6560 	crypto_ctx_template_t *sa_tmpl;
6561 	int rv;
6562 	ipsec_stack_t	*ipss = sa->ipsa_netstack->netstack_ipsec;
6563 
6564 	ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock));
6565 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
6566 
6567 	/* get pointers to the algorithm info, context template, and key */
6568 	switch (alg_type) {
6569 	case IPSEC_ALG_AUTH:
6570 		key = &sa->ipsa_kcfauthkey;
6571 		sa_tmpl = &sa->ipsa_authtmpl;
6572 		alg = ipss->ipsec_alglists[alg_type][sa->ipsa_auth_alg];
6573 		break;
6574 	case IPSEC_ALG_ENCR:
6575 		key = &sa->ipsa_kcfencrkey;
6576 		sa_tmpl = &sa->ipsa_encrtmpl;
6577 		alg = ipss->ipsec_alglists[alg_type][sa->ipsa_encr_alg];
6578 		break;
6579 	default:
6580 		alg = NULL;
6581 	}
6582 
6583 	if (alg == NULL || !ALG_VALID(alg))
6584 		return (EINVAL);
6585 
6586 	/* initialize the mech info structure for the framework */
6587 	ASSERT(alg->alg_mech_type != CRYPTO_MECHANISM_INVALID);
6588 	mech.cm_type = alg->alg_mech_type;
6589 	mech.cm_param = NULL;
6590 	mech.cm_param_len = 0;
6591 
6592 	/* create a new context template */
6593 	rv = crypto_create_ctx_template(&mech, key, sa_tmpl, KM_NOSLEEP);
6594 
6595 	/*
6596 	 * CRYPTO_MECH_NOT_SUPPORTED can be returned if only hardware
6597 	 * providers are available for that mechanism. In that case
6598 	 * we don't fail, and will generate the context template from
6599 	 * the framework callback when a software provider for that
6600 	 * mechanism registers.
6601 	 *
6602 	 * The context template is assigned the special value
6603 	 * IPSEC_CTX_TMPL_ALLOC if the allocation failed due to a
6604 	 * lack of memory. No attempt will be made to use
6605 	 * the context template if it is set to this value.
6606 	 */
6607 	if (rv == CRYPTO_HOST_MEMORY) {
6608 		*sa_tmpl = IPSEC_CTX_TMPL_ALLOC;
6609 	} else if (rv != CRYPTO_SUCCESS) {
6610 		*sa_tmpl = NULL;
6611 		if (rv != CRYPTO_MECH_NOT_SUPPORTED)
6612 			return (EINVAL);
6613 	}
6614 
6615 	return (0);
6616 }
6617 
6618 /*
6619  * Destroy the context template of the specified algorithm type
6620  * of the specified SA. Must be called while holding the SA lock.
6621  */
6622 void
6623 ipsec_destroy_ctx_tmpl(ipsa_t *sa, ipsec_algtype_t alg_type)
6624 {
6625 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
6626 
6627 	if (alg_type == IPSEC_ALG_AUTH) {
6628 		if (sa->ipsa_authtmpl == IPSEC_CTX_TMPL_ALLOC)
6629 			sa->ipsa_authtmpl = NULL;
6630 		else if (sa->ipsa_authtmpl != NULL) {
6631 			crypto_destroy_ctx_template(sa->ipsa_authtmpl);
6632 			sa->ipsa_authtmpl = NULL;
6633 		}
6634 	} else {
6635 		ASSERT(alg_type == IPSEC_ALG_ENCR);
6636 		if (sa->ipsa_encrtmpl == IPSEC_CTX_TMPL_ALLOC)
6637 			sa->ipsa_encrtmpl = NULL;
6638 		else if (sa->ipsa_encrtmpl != NULL) {
6639 			crypto_destroy_ctx_template(sa->ipsa_encrtmpl);
6640 			sa->ipsa_encrtmpl = NULL;
6641 		}
6642 	}
6643 }
6644 
6645 /*
6646  * Use the kernel crypto framework to check the validity of a key received
6647  * via keysock. Returns 0 if the key is OK, -1 otherwise.
6648  */
6649 int
6650 ipsec_check_key(crypto_mech_type_t mech_type, sadb_key_t *sadb_key,
6651     boolean_t is_auth, int *diag)
6652 {
6653 	crypto_mechanism_t mech;
6654 	crypto_key_t crypto_key;
6655 	int crypto_rc;
6656 
6657 	mech.cm_type = mech_type;
6658 	mech.cm_param = NULL;
6659 	mech.cm_param_len = 0;
6660 
6661 	crypto_key.ck_format = CRYPTO_KEY_RAW;
6662 	crypto_key.ck_data = sadb_key + 1;
6663 	crypto_key.ck_length = sadb_key->sadb_key_bits;
6664 
6665 	crypto_rc = crypto_key_check(&mech, &crypto_key);
6666 
6667 	switch (crypto_rc) {
6668 	case CRYPTO_SUCCESS:
6669 		return (0);
6670 	case CRYPTO_MECHANISM_INVALID:
6671 	case CRYPTO_MECH_NOT_SUPPORTED:
6672 		*diag = is_auth ? SADB_X_DIAGNOSTIC_BAD_AALG :
6673 		    SADB_X_DIAGNOSTIC_BAD_EALG;
6674 		break;
6675 	case CRYPTO_KEY_SIZE_RANGE:
6676 		*diag = is_auth ? SADB_X_DIAGNOSTIC_BAD_AKEYBITS :
6677 		    SADB_X_DIAGNOSTIC_BAD_EKEYBITS;
6678 		break;
6679 	case CRYPTO_WEAK_KEY:
6680 		*diag = is_auth ? SADB_X_DIAGNOSTIC_WEAK_AKEY :
6681 		    SADB_X_DIAGNOSTIC_WEAK_EKEY;
6682 		break;
6683 	}
6684 
6685 	return (-1);
6686 }
6687 /*
6688  * If this is an outgoing SA then add some fuzz to the
6689  * SOFT EXPIRE time. The reason for this is to stop
6690  * peers trying to renegotiate SOFT expiring SA's at
6691  * the same time. The amount of fuzz needs to be at
6692  * least 10 seconds which is the typical interval
6693  * sadb_ager(), although this is only a guide as it
6694  * selftunes.
6695  */
6696 void
6697 lifetime_fuzz(ipsa_t *assoc)
6698 {
6699 	uint8_t rnd;
6700 
6701 	if (assoc->ipsa_softaddlt == 0)
6702 		return;
6703 
6704 	(void) random_get_pseudo_bytes(&rnd, sizeof (rnd));
6705 	rnd = (rnd & 0xF) + 10;
6706 	assoc->ipsa_softexpiretime -= rnd;
6707 	assoc->ipsa_softaddlt -= rnd;
6708 }
6709 void
6710 destroy_ipsa_pair(ipsap_t *ipsapp)
6711 {
6712 	if (ipsapp == NULL)
6713 		return;
6714 
6715 	/*
6716 	 * Because of the multi-line macro nature of IPSA_REFRELE, keep
6717 	 * them in { }.
6718 	 */
6719 	if (ipsapp->ipsap_sa_ptr != NULL) {
6720 		IPSA_REFRELE(ipsapp->ipsap_sa_ptr);
6721 	}
6722 	if (ipsapp->ipsap_psa_ptr != NULL) {
6723 		IPSA_REFRELE(ipsapp->ipsap_psa_ptr);
6724 	}
6725 
6726 	kmem_free(ipsapp, sizeof (*ipsapp));
6727 }
6728 
6729 /*
6730  * The sadb_ager() function walks through the hash tables of SA's and ages
6731  * them, if the SA expires as a result, its marked as DEAD and will be reaped
6732  * the next time sadb_ager() runs. SA's which are paired or have a peer (same
6733  * SA appears in both the inbound and outbound tables because its not possible
6734  * to determine its direction) are placed on a list when they expire. This is
6735  * to ensure that pair/peer SA's are reaped at the same time, even if they
6736  * expire at different times.
6737  *
6738  * This function is called twice by sadb_ager(), one after processing the
6739  * inbound table, then again after processing the outbound table.
6740  */
6741 void
6742 age_pair_peer_list(templist_t *haspeerlist, sadb_t *sp, boolean_t outbound)
6743 {
6744 	templist_t *listptr;
6745 	int outhash;
6746 	isaf_t *bucket;
6747 	boolean_t haspeer;
6748 	ipsa_t *peer_assoc, *dying;
6749 	/*
6750 	 * Haspeer cases will contain both IPv4 and IPv6.  This code
6751 	 * is address independent.
6752 	 */
6753 	while (haspeerlist != NULL) {
6754 		/* "dying" contains the SA that has a peer. */
6755 		dying = haspeerlist->ipsa;
6756 		haspeer = (dying->ipsa_haspeer);
6757 		listptr = haspeerlist;
6758 		haspeerlist = listptr->next;
6759 		kmem_free(listptr, sizeof (*listptr));
6760 		/*
6761 		 * Pick peer bucket based on addrfam.
6762 		 */
6763 		if (outbound) {
6764 			if (haspeer)
6765 				bucket = INBOUND_BUCKET(sp, dying->ipsa_spi);
6766 			else
6767 				bucket = INBOUND_BUCKET(sp,
6768 				    dying->ipsa_otherspi);
6769 		} else { /* inbound */
6770 			if (haspeer) {
6771 				if (dying->ipsa_addrfam == AF_INET6) {
6772 					outhash = OUTBOUND_HASH_V6(sp,
6773 					    *((in6_addr_t *)&dying->
6774 					    ipsa_dstaddr));
6775 				} else {
6776 					outhash = OUTBOUND_HASH_V4(sp,
6777 					    *((ipaddr_t *)&dying->
6778 					    ipsa_dstaddr));
6779 				}
6780 			} else if (dying->ipsa_addrfam == AF_INET6) {
6781 				outhash = OUTBOUND_HASH_V6(sp,
6782 				    *((in6_addr_t *)&dying->
6783 				    ipsa_srcaddr));
6784 			} else {
6785 				outhash = OUTBOUND_HASH_V4(sp,
6786 				    *((ipaddr_t *)&dying->
6787 				    ipsa_srcaddr));
6788 			}
6789 		bucket = &(sp->sdb_of[outhash]);
6790 		}
6791 
6792 		mutex_enter(&bucket->isaf_lock);
6793 		/*
6794 		 * "haspeer" SA's have the same src/dst address ordering,
6795 		 * "paired" SA's have the src/dst addresses reversed.
6796 		 */
6797 		if (haspeer) {
6798 			peer_assoc = ipsec_getassocbyspi(bucket,
6799 			    dying->ipsa_spi, dying->ipsa_srcaddr,
6800 			    dying->ipsa_dstaddr, dying->ipsa_addrfam);
6801 		} else {
6802 			peer_assoc = ipsec_getassocbyspi(bucket,
6803 			    dying->ipsa_otherspi, dying->ipsa_dstaddr,
6804 			    dying->ipsa_srcaddr, dying->ipsa_addrfam);
6805 		}
6806 
6807 		mutex_exit(&bucket->isaf_lock);
6808 		if (peer_assoc != NULL) {
6809 			mutex_enter(&peer_assoc->ipsa_lock);
6810 			mutex_enter(&dying->ipsa_lock);
6811 			if (!haspeer) {
6812 				/*
6813 				 * Only SA's which have a "peer" or are
6814 				 * "paired" end up on this list, so this
6815 				 * must be a "paired" SA, update the flags
6816 				 * to break the pair.
6817 				 */
6818 				peer_assoc->ipsa_otherspi = 0;
6819 				peer_assoc->ipsa_flags &= ~IPSA_F_PAIRED;
6820 				dying->ipsa_otherspi = 0;
6821 				dying->ipsa_flags &= ~IPSA_F_PAIRED;
6822 			}
6823 			if (haspeer || outbound) {
6824 				/*
6825 				 * Update the state of the "inbound" SA when
6826 				 * the "outbound" SA has expired. Don't update
6827 				 * the "outbound" SA when the "inbound" SA
6828 				 * SA expires because setting the hard_addtime
6829 				 * below will cause this to happen.
6830 				 */
6831 				peer_assoc->ipsa_state = dying->ipsa_state;
6832 			}
6833 			if (dying->ipsa_state == IPSA_STATE_DEAD)
6834 				peer_assoc->ipsa_hardexpiretime = 1;
6835 
6836 			mutex_exit(&dying->ipsa_lock);
6837 			mutex_exit(&peer_assoc->ipsa_lock);
6838 			IPSA_REFRELE(peer_assoc);
6839 		}
6840 		IPSA_REFRELE(dying);
6841 	}
6842 }
6843