xref: /illumos-gate/usr/src/uts/common/inet/ip/sadb.c (revision 74e7dc986c89efca1f2e4451c7a572e05e4a6e4f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/stream.h>
28 #include <sys/stropts.h>
29 #include <sys/errno.h>
30 #include <sys/ddi.h>
31 #include <sys/debug.h>
32 #include <sys/cmn_err.h>
33 #include <sys/stream.h>
34 #include <sys/strlog.h>
35 #include <sys/kmem.h>
36 #include <sys/sunddi.h>
37 #include <sys/tihdr.h>
38 #include <sys/atomic.h>
39 #include <sys/socket.h>
40 #include <sys/sysmacros.h>
41 #include <sys/crypto/common.h>
42 #include <sys/crypto/api.h>
43 #include <sys/zone.h>
44 #include <netinet/in.h>
45 #include <net/if.h>
46 #include <net/pfkeyv2.h>
47 #include <inet/common.h>
48 #include <netinet/ip6.h>
49 #include <inet/ip.h>
50 #include <inet/ip_ire.h>
51 #include <inet/ip6.h>
52 #include <inet/ipsec_info.h>
53 #include <inet/tcp.h>
54 #include <inet/sadb.h>
55 #include <inet/ipsec_impl.h>
56 #include <inet/ipsecah.h>
57 #include <inet/ipsecesp.h>
58 #include <sys/random.h>
59 #include <sys/dlpi.h>
60 #include <sys/iphada.h>
61 #include <inet/ip_if.h>
62 #include <inet/ipdrop.h>
63 #include <inet/ipclassifier.h>
64 #include <inet/sctp_ip.h>
65 #include <inet/tun.h>
66 
67 /*
68  * This source file contains Security Association Database (SADB) common
69  * routines.  They are linked in with the AH module.  Since AH has no chance
70  * of falling under export control, it was safe to link it in there.
71  */
72 
73 static mblk_t *sadb_extended_acquire(ipsec_selector_t *, ipsec_policy_t *,
74     ipsec_action_t *, boolean_t, uint32_t, uint32_t, netstack_t *);
75 static void sadb_ill_df(ill_t *, mblk_t *, isaf_t *, int, boolean_t);
76 static ipsa_t *sadb_torch_assoc(isaf_t *, ipsa_t *, boolean_t, mblk_t **);
77 static void sadb_drain_torchq(queue_t *, mblk_t *);
78 static void sadb_destroy_acqlist(iacqf_t **, uint_t, boolean_t,
79 			    netstack_t *);
80 static void sadb_destroy(sadb_t *, netstack_t *);
81 static mblk_t *sadb_sa2msg(ipsa_t *, sadb_msg_t *);
82 
83 static time_t sadb_add_time(time_t, uint64_t);
84 static void lifetime_fuzz(ipsa_t *);
85 static void age_pair_peer_list(templist_t *, sadb_t *, boolean_t);
86 
87 /*
88  * ipsacq_maxpackets is defined here to make it tunable
89  * from /etc/system.
90  */
91 extern uint64_t ipsacq_maxpackets;
92 
93 #define	SET_EXPIRE(sa, delta, exp) {				\
94 	if (((sa)->ipsa_ ## delta) != 0) {				\
95 		(sa)->ipsa_ ## exp = sadb_add_time((sa)->ipsa_addtime,	\
96 			(sa)->ipsa_ ## delta);				\
97 	}								\
98 }
99 
100 #define	UPDATE_EXPIRE(sa, delta, exp) {					\
101 	if (((sa)->ipsa_ ## delta) != 0) {				\
102 		time_t tmp = sadb_add_time((sa)->ipsa_usetime,		\
103 			(sa)->ipsa_ ## delta);				\
104 		if (((sa)->ipsa_ ## exp) == 0)				\
105 			(sa)->ipsa_ ## exp = tmp;			\
106 		else							\
107 			(sa)->ipsa_ ## exp = 				\
108 			    MIN((sa)->ipsa_ ## exp, tmp); 		\
109 	}								\
110 }
111 
112 
113 /* wrap the macro so we can pass it as a function pointer */
114 void
115 sadb_sa_refrele(void *target)
116 {
117 	IPSA_REFRELE(((ipsa_t *)target));
118 }
119 
120 /*
121  * We presume that sizeof (long) == sizeof (time_t) and that time_t is
122  * a signed type.
123  */
124 #define	TIME_MAX LONG_MAX
125 
126 /*
127  * PF_KEY gives us lifetimes in uint64_t seconds.  We presume that
128  * time_t is defined to be a signed type with the same range as
129  * "long".  On ILP32 systems, we thus run the risk of wrapping around
130  * at end of time, as well as "overwrapping" the clock back around
131  * into a seemingly valid but incorrect future date earlier than the
132  * desired expiration.
133  *
134  * In order to avoid odd behavior (either negative lifetimes or loss
135  * of high order bits) when someone asks for bizarrely long SA
136  * lifetimes, we do a saturating add for expire times.
137  *
138  * We presume that ILP32 systems will be past end of support life when
139  * the 32-bit time_t overflows (a dangerous assumption, mind you..).
140  *
141  * On LP64, 2^64 seconds are about 5.8e11 years, at which point we
142  * will hopefully have figured out clever ways to avoid the use of
143  * fixed-sized integers in computation.
144  */
145 static time_t
146 sadb_add_time(time_t base, uint64_t delta)
147 {
148 	time_t sum;
149 
150 	/*
151 	 * Clip delta to the maximum possible time_t value to
152 	 * prevent "overwrapping" back into a shorter-than-desired
153 	 * future time.
154 	 */
155 	if (delta > TIME_MAX)
156 		delta = TIME_MAX;
157 	/*
158 	 * This sum may still overflow.
159 	 */
160 	sum = base + delta;
161 
162 	/*
163 	 * .. so if the result is less than the base, we overflowed.
164 	 */
165 	if (sum < base)
166 		sum = TIME_MAX;
167 
168 	return (sum);
169 }
170 
171 /*
172  * Callers of this function have already created a working security
173  * association, and have found the appropriate table & hash chain.  All this
174  * function does is check duplicates, and insert the SA.  The caller needs to
175  * hold the hash bucket lock and increment the refcnt before insertion.
176  *
177  * Return 0 if success, EEXIST if collision.
178  */
179 #define	SA_UNIQUE_MATCH(sa1, sa2) \
180 	(((sa1)->ipsa_unique_id & (sa1)->ipsa_unique_mask) == \
181 	((sa2)->ipsa_unique_id & (sa2)->ipsa_unique_mask))
182 
183 int
184 sadb_insertassoc(ipsa_t *ipsa, isaf_t *bucket)
185 {
186 	ipsa_t **ptpn = NULL;
187 	ipsa_t *walker;
188 	boolean_t unspecsrc;
189 
190 	ASSERT(MUTEX_HELD(&bucket->isaf_lock));
191 
192 	unspecsrc = IPSA_IS_ADDR_UNSPEC(ipsa->ipsa_srcaddr, ipsa->ipsa_addrfam);
193 
194 	walker = bucket->isaf_ipsa;
195 	ASSERT(walker == NULL || ipsa->ipsa_addrfam == walker->ipsa_addrfam);
196 
197 	/*
198 	 * Find insertion point (pointed to with **ptpn).  Insert at the head
199 	 * of the list unless there's an unspecified source address, then
200 	 * insert it after the last SA with a specified source address.
201 	 *
202 	 * BTW, you'll have to walk the whole chain, matching on {DST, SPI}
203 	 * checking for collisions.
204 	 */
205 
206 	while (walker != NULL) {
207 		if (IPSA_ARE_ADDR_EQUAL(walker->ipsa_dstaddr,
208 		    ipsa->ipsa_dstaddr, ipsa->ipsa_addrfam)) {
209 			if (walker->ipsa_spi == ipsa->ipsa_spi)
210 				return (EEXIST);
211 
212 			mutex_enter(&walker->ipsa_lock);
213 			if (ipsa->ipsa_state == IPSA_STATE_MATURE &&
214 			    (walker->ipsa_flags & IPSA_F_USED) &&
215 			    SA_UNIQUE_MATCH(walker, ipsa)) {
216 				walker->ipsa_flags |= IPSA_F_CINVALID;
217 			}
218 			mutex_exit(&walker->ipsa_lock);
219 		}
220 
221 		if (ptpn == NULL && unspecsrc) {
222 			if (IPSA_IS_ADDR_UNSPEC(walker->ipsa_srcaddr,
223 			    walker->ipsa_addrfam))
224 				ptpn = walker->ipsa_ptpn;
225 			else if (walker->ipsa_next == NULL)
226 				ptpn = &walker->ipsa_next;
227 		}
228 
229 		walker = walker->ipsa_next;
230 	}
231 
232 	if (ptpn == NULL)
233 		ptpn = &bucket->isaf_ipsa;
234 	ipsa->ipsa_next = *ptpn;
235 	ipsa->ipsa_ptpn = ptpn;
236 	if (ipsa->ipsa_next != NULL)
237 		ipsa->ipsa_next->ipsa_ptpn = &ipsa->ipsa_next;
238 	*ptpn = ipsa;
239 	ipsa->ipsa_linklock = &bucket->isaf_lock;
240 
241 	return (0);
242 }
243 #undef SA_UNIQUE_MATCH
244 
245 /*
246  * Free a security association.  Its reference count is 0, which means
247  * I must free it.  The SA must be unlocked and must not be linked into
248  * any fanout list.
249  */
250 static void
251 sadb_freeassoc(ipsa_t *ipsa)
252 {
253 	ipsec_stack_t	*ipss = ipsa->ipsa_netstack->netstack_ipsec;
254 
255 	ASSERT(ipss != NULL);
256 	ASSERT(MUTEX_NOT_HELD(&ipsa->ipsa_lock));
257 	ASSERT(ipsa->ipsa_refcnt == 0);
258 	ASSERT(ipsa->ipsa_next == NULL);
259 	ASSERT(ipsa->ipsa_ptpn == NULL);
260 
261 	ip_drop_packet(sadb_clear_lpkt(ipsa), B_TRUE, NULL, NULL,
262 	    DROPPER(ipss, ipds_sadb_inlarval_timeout),
263 	    &ipss->ipsec_sadb_dropper);
264 
265 	mutex_enter(&ipsa->ipsa_lock);
266 	ipsec_destroy_ctx_tmpl(ipsa, IPSEC_ALG_AUTH);
267 	ipsec_destroy_ctx_tmpl(ipsa, IPSEC_ALG_ENCR);
268 	mutex_exit(&ipsa->ipsa_lock);
269 
270 	/* bzero() these fields for paranoia's sake. */
271 	if (ipsa->ipsa_authkey != NULL) {
272 		bzero(ipsa->ipsa_authkey, ipsa->ipsa_authkeylen);
273 		kmem_free(ipsa->ipsa_authkey, ipsa->ipsa_authkeylen);
274 	}
275 	if (ipsa->ipsa_encrkey != NULL) {
276 		bzero(ipsa->ipsa_encrkey, ipsa->ipsa_encrkeylen);
277 		kmem_free(ipsa->ipsa_encrkey, ipsa->ipsa_encrkeylen);
278 	}
279 	if (ipsa->ipsa_src_cid != NULL) {
280 		IPSID_REFRELE(ipsa->ipsa_src_cid);
281 	}
282 	if (ipsa->ipsa_dst_cid != NULL) {
283 		IPSID_REFRELE(ipsa->ipsa_dst_cid);
284 	}
285 	if (ipsa->ipsa_integ != NULL)
286 		kmem_free(ipsa->ipsa_integ, ipsa->ipsa_integlen);
287 	if (ipsa->ipsa_sens != NULL)
288 		kmem_free(ipsa->ipsa_sens, ipsa->ipsa_senslen);
289 
290 	mutex_destroy(&ipsa->ipsa_lock);
291 	kmem_free(ipsa, sizeof (*ipsa));
292 }
293 
294 /*
295  * Unlink a security association from a hash bucket.  Assume the hash bucket
296  * lock is held, but the association's lock is not.
297  *
298  * Note that we do not bump the bucket's generation number here because
299  * we might not be making a visible change to the set of visible SA's.
300  * All callers MUST bump the bucket's generation number before they unlock
301  * the bucket if they use sadb_unlinkassoc to permanetly remove an SA which
302  * was present in the bucket at the time it was locked.
303  */
304 void
305 sadb_unlinkassoc(ipsa_t *ipsa)
306 {
307 	ASSERT(ipsa->ipsa_linklock != NULL);
308 	ASSERT(MUTEX_HELD(ipsa->ipsa_linklock));
309 
310 	/* These fields are protected by the link lock. */
311 	*(ipsa->ipsa_ptpn) = ipsa->ipsa_next;
312 	if (ipsa->ipsa_next != NULL) {
313 		ipsa->ipsa_next->ipsa_ptpn = ipsa->ipsa_ptpn;
314 		ipsa->ipsa_next = NULL;
315 	}
316 
317 	ipsa->ipsa_ptpn = NULL;
318 
319 	/* This may destroy the SA. */
320 	IPSA_REFRELE(ipsa);
321 }
322 
323 /*
324  * Create a larval security association with the specified SPI.	 All other
325  * fields are zeroed.
326  */
327 static ipsa_t *
328 sadb_makelarvalassoc(uint32_t spi, uint32_t *src, uint32_t *dst, int addrfam,
329     netstack_t *ns)
330 {
331 	ipsa_t *newbie;
332 
333 	/*
334 	 * Allocate...
335 	 */
336 
337 	newbie = (ipsa_t *)kmem_zalloc(sizeof (ipsa_t), KM_NOSLEEP);
338 	if (newbie == NULL) {
339 		/* Can't make new larval SA. */
340 		return (NULL);
341 	}
342 
343 	/* Assigned requested SPI, assume caller does SPI allocation magic. */
344 	newbie->ipsa_spi = spi;
345 	newbie->ipsa_netstack = ns;	/* No netstack_hold */
346 
347 	/*
348 	 * Copy addresses...
349 	 */
350 
351 	IPSA_COPY_ADDR(newbie->ipsa_srcaddr, src, addrfam);
352 	IPSA_COPY_ADDR(newbie->ipsa_dstaddr, dst, addrfam);
353 
354 	newbie->ipsa_addrfam = addrfam;
355 
356 	/*
357 	 * Set common initialization values, including refcnt.
358 	 */
359 	mutex_init(&newbie->ipsa_lock, NULL, MUTEX_DEFAULT, NULL);
360 	newbie->ipsa_state = IPSA_STATE_LARVAL;
361 	newbie->ipsa_refcnt = 1;
362 	newbie->ipsa_freefunc = sadb_freeassoc;
363 
364 	/*
365 	 * There aren't a lot of other common initialization values, as
366 	 * they are copied in from the PF_KEY message.
367 	 */
368 
369 	return (newbie);
370 }
371 
372 /*
373  * Call me to initialize a security association fanout.
374  */
375 static int
376 sadb_init_fanout(isaf_t **tablep, uint_t size, int kmflag)
377 {
378 	isaf_t *table;
379 	int i;
380 
381 	table = (isaf_t *)kmem_alloc(size * sizeof (*table), kmflag);
382 	*tablep = table;
383 
384 	if (table == NULL)
385 		return (ENOMEM);
386 
387 	for (i = 0; i < size; i++) {
388 		mutex_init(&(table[i].isaf_lock), NULL, MUTEX_DEFAULT, NULL);
389 		table[i].isaf_ipsa = NULL;
390 		table[i].isaf_gen = 0;
391 	}
392 
393 	return (0);
394 }
395 
396 /*
397  * Call me to initialize an acquire fanout
398  */
399 static int
400 sadb_init_acfanout(iacqf_t **tablep, uint_t size, int kmflag)
401 {
402 	iacqf_t *table;
403 	int i;
404 
405 	table = (iacqf_t *)kmem_alloc(size * sizeof (*table), kmflag);
406 	*tablep = table;
407 
408 	if (table == NULL)
409 		return (ENOMEM);
410 
411 	for (i = 0; i < size; i++) {
412 		mutex_init(&(table[i].iacqf_lock), NULL, MUTEX_DEFAULT, NULL);
413 		table[i].iacqf_ipsacq = NULL;
414 	}
415 
416 	return (0);
417 }
418 
419 /*
420  * Attempt to initialize an SADB instance.  On failure, return ENOMEM;
421  * caller must clean up partial allocations.
422  */
423 static int
424 sadb_init_trial(sadb_t *sp, uint_t size, int kmflag)
425 {
426 	ASSERT(sp->sdb_of == NULL);
427 	ASSERT(sp->sdb_if == NULL);
428 	ASSERT(sp->sdb_acq == NULL);
429 
430 	sp->sdb_hashsize = size;
431 	if (sadb_init_fanout(&sp->sdb_of, size, kmflag) != 0)
432 		return (ENOMEM);
433 	if (sadb_init_fanout(&sp->sdb_if, size, kmflag) != 0)
434 		return (ENOMEM);
435 	if (sadb_init_acfanout(&sp->sdb_acq, size, kmflag) != 0)
436 		return (ENOMEM);
437 
438 	return (0);
439 }
440 
441 /*
442  * Call me to initialize an SADB instance; fall back to default size on failure.
443  */
444 static void
445 sadb_init(const char *name, sadb_t *sp, uint_t size, uint_t ver,
446     netstack_t *ns)
447 {
448 	ASSERT(sp->sdb_of == NULL);
449 	ASSERT(sp->sdb_if == NULL);
450 	ASSERT(sp->sdb_acq == NULL);
451 
452 	if (size < IPSEC_DEFAULT_HASH_SIZE)
453 		size = IPSEC_DEFAULT_HASH_SIZE;
454 
455 	if (sadb_init_trial(sp, size, KM_NOSLEEP) != 0) {
456 
457 		cmn_err(CE_WARN,
458 		    "Unable to allocate %u entry IPv%u %s SADB hash table",
459 		    size, ver, name);
460 
461 		sadb_destroy(sp, ns);
462 		size = IPSEC_DEFAULT_HASH_SIZE;
463 		cmn_err(CE_WARN, "Falling back to %d entries", size);
464 		(void) sadb_init_trial(sp, size, KM_SLEEP);
465 	}
466 }
467 
468 
469 /*
470  * Initialize an SADB-pair.
471  */
472 void
473 sadbp_init(const char *name, sadbp_t *sp, int type, int size, netstack_t *ns)
474 {
475 	sadb_init(name, &sp->s_v4, size, 4, ns);
476 	sadb_init(name, &sp->s_v6, size, 6, ns);
477 
478 	sp->s_satype = type;
479 
480 	ASSERT((type == SADB_SATYPE_AH) || (type == SADB_SATYPE_ESP));
481 	if (type == SADB_SATYPE_AH) {
482 		ipsec_stack_t	*ipss = ns->netstack_ipsec;
483 
484 		ip_drop_register(&ipss->ipsec_sadb_dropper, "IPsec SADB");
485 		sp->s_addflags = AH_ADD_SETTABLE_FLAGS;
486 		sp->s_updateflags = AH_UPDATE_SETTABLE_FLAGS;
487 	} else {
488 		sp->s_addflags = ESP_ADD_SETTABLE_FLAGS;
489 		sp->s_updateflags = ESP_UPDATE_SETTABLE_FLAGS;
490 	}
491 }
492 
493 /*
494  * Deliver a single SADB_DUMP message representing a single SA.  This is
495  * called many times by sadb_dump().
496  *
497  * If the return value of this is ENOBUFS (not the same as ENOMEM), then
498  * the caller should take that as a hint that dupb() on the "original answer"
499  * failed, and that perhaps the caller should try again with a copyb()ed
500  * "original answer".
501  */
502 static int
503 sadb_dump_deliver(queue_t *pfkey_q, mblk_t *original_answer, ipsa_t *ipsa,
504     sadb_msg_t *samsg)
505 {
506 	mblk_t *answer;
507 
508 	answer = dupb(original_answer);
509 	if (answer == NULL)
510 		return (ENOBUFS);
511 	answer->b_cont = sadb_sa2msg(ipsa, samsg);
512 	if (answer->b_cont == NULL) {
513 		freeb(answer);
514 		return (ENOMEM);
515 	}
516 
517 	/* Just do a putnext, and let keysock deal with flow control. */
518 	putnext(pfkey_q, answer);
519 	return (0);
520 }
521 
522 /*
523  * Common function to allocate and prepare a keysock_out_t M_CTL message.
524  */
525 mblk_t *
526 sadb_keysock_out(minor_t serial)
527 {
528 	mblk_t *mp;
529 	keysock_out_t *kso;
530 
531 	mp = allocb(sizeof (ipsec_info_t), BPRI_HI);
532 	if (mp != NULL) {
533 		mp->b_datap->db_type = M_CTL;
534 		mp->b_wptr += sizeof (ipsec_info_t);
535 		kso = (keysock_out_t *)mp->b_rptr;
536 		kso->ks_out_type = KEYSOCK_OUT;
537 		kso->ks_out_len = sizeof (*kso);
538 		kso->ks_out_serial = serial;
539 	}
540 
541 	return (mp);
542 }
543 
544 /*
545  * Perform an SADB_DUMP, spewing out every SA in an array of SA fanouts
546  * to keysock.
547  */
548 static int
549 sadb_dump_fanout(queue_t *pfkey_q, mblk_t *mp, minor_t serial, isaf_t *fanout,
550     int num_entries, boolean_t do_peers)
551 {
552 	int i, error = 0;
553 	mblk_t *original_answer;
554 	ipsa_t *walker;
555 	sadb_msg_t *samsg;
556 
557 	/*
558 	 * For each IPSA hash bucket do:
559 	 *	- Hold the mutex
560 	 *	- Walk each entry, doing an sadb_dump_deliver() on it.
561 	 */
562 	ASSERT(mp->b_cont != NULL);
563 	samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
564 
565 	original_answer = sadb_keysock_out(serial);
566 	if (original_answer == NULL)
567 		return (ENOMEM);
568 
569 	for (i = 0; i < num_entries; i++) {
570 		mutex_enter(&fanout[i].isaf_lock);
571 		for (walker = fanout[i].isaf_ipsa; walker != NULL;
572 		    walker = walker->ipsa_next) {
573 			if (!do_peers && walker->ipsa_haspeer)
574 				continue;
575 			error = sadb_dump_deliver(pfkey_q, original_answer,
576 			    walker, samsg);
577 			if (error == ENOBUFS) {
578 				mblk_t *new_original_answer;
579 
580 				/* Ran out of dupb's.  Try a copyb. */
581 				new_original_answer = copyb(original_answer);
582 				if (new_original_answer == NULL) {
583 					error = ENOMEM;
584 				} else {
585 					freeb(original_answer);
586 					original_answer = new_original_answer;
587 					error = sadb_dump_deliver(pfkey_q,
588 					    original_answer, walker, samsg);
589 				}
590 			}
591 			if (error != 0)
592 				break;	/* out of for loop. */
593 		}
594 		mutex_exit(&fanout[i].isaf_lock);
595 		if (error != 0)
596 			break;	/* out of for loop. */
597 	}
598 
599 	freeb(original_answer);
600 	return (error);
601 }
602 
603 /*
604  * Dump an entire SADB; outbound first, then inbound.
605  */
606 
607 int
608 sadb_dump(queue_t *pfkey_q, mblk_t *mp, minor_t serial, sadb_t *sp)
609 {
610 	int error;
611 
612 	/* Dump outbound */
613 	error = sadb_dump_fanout(pfkey_q, mp, serial, sp->sdb_of,
614 	    sp->sdb_hashsize, B_TRUE);
615 	if (error)
616 		return (error);
617 
618 	/* Dump inbound */
619 	return sadb_dump_fanout(pfkey_q, mp, serial, sp->sdb_if,
620 	    sp->sdb_hashsize, B_FALSE);
621 }
622 
623 /*
624  * Generic sadb table walker.
625  *
626  * Call "walkfn" for each SA in each bucket in "table"; pass the
627  * bucket, the entry and "cookie" to the callback function.
628  * Take care to ensure that walkfn can delete the SA without screwing
629  * up our traverse.
630  *
631  * The bucket is locked for the duration of the callback, both so that the
632  * callback can just call sadb_unlinkassoc() when it wants to delete something,
633  * and so that no new entries are added while we're walking the list.
634  */
635 static void
636 sadb_walker(isaf_t *table, uint_t numentries,
637     void (*walkfn)(isaf_t *head, ipsa_t *entry, void *cookie),
638     void *cookie)
639 {
640 	int i;
641 	for (i = 0; i < numentries; i++) {
642 		ipsa_t *entry, *next;
643 
644 		mutex_enter(&table[i].isaf_lock);
645 
646 		for (entry = table[i].isaf_ipsa; entry != NULL;
647 		    entry = next) {
648 			next = entry->ipsa_next;
649 			(*walkfn)(&table[i], entry, cookie);
650 		}
651 		mutex_exit(&table[i].isaf_lock);
652 	}
653 }
654 
655 /*
656  * From the given SA, construct a dl_ct_ipsec_key and
657  * a dl_ct_ipsec structures to be sent to the adapter as part
658  * of a DL_CONTROL_REQ.
659  *
660  * ct_sa must point to the storage allocated for the key
661  * structure and must be followed by storage allocated
662  * for the SA information that must be sent to the driver
663  * as part of the DL_CONTROL_REQ request.
664  *
665  * The is_inbound boolean indicates whether the specified
666  * SA is part of an inbound SA table.
667  *
668  * Returns B_TRUE if the corresponding SA must be passed to
669  * a provider, B_FALSE otherwise; frees *mp if it returns B_FALSE.
670  */
671 static boolean_t
672 sadb_req_from_sa(ipsa_t *sa, mblk_t *mp, boolean_t is_inbound)
673 {
674 	dl_ct_ipsec_key_t *keyp;
675 	dl_ct_ipsec_t *sap;
676 	void *ct_sa = mp->b_wptr;
677 
678 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
679 
680 	keyp = (dl_ct_ipsec_key_t *)(ct_sa);
681 	sap = (dl_ct_ipsec_t *)(keyp + 1);
682 
683 	IPSECHW_DEBUG(IPSECHW_CAPAB, ("sadb_req_from_sa: "
684 	    "is_inbound = %d\n", is_inbound));
685 
686 	/* initialize flag */
687 	sap->sadb_sa_flags = 0;
688 	if (is_inbound) {
689 		sap->sadb_sa_flags |= DL_CT_IPSEC_INBOUND;
690 		/*
691 		 * If an inbound SA has a peer, then mark it has being
692 		 * an outbound SA as well.
693 		 */
694 		if (sa->ipsa_haspeer)
695 			sap->sadb_sa_flags |= DL_CT_IPSEC_OUTBOUND;
696 	} else {
697 		/*
698 		 * If an outbound SA has a peer, then don't send it,
699 		 * since we will send the copy from the inbound table.
700 		 */
701 		if (sa->ipsa_haspeer) {
702 			freemsg(mp);
703 			return (B_FALSE);
704 		}
705 		sap->sadb_sa_flags |= DL_CT_IPSEC_OUTBOUND;
706 	}
707 
708 	keyp->dl_key_spi = sa->ipsa_spi;
709 	bcopy(sa->ipsa_dstaddr, keyp->dl_key_dest_addr,
710 	    DL_CTL_IPSEC_ADDR_LEN);
711 	keyp->dl_key_addr_family = sa->ipsa_addrfam;
712 
713 	sap->sadb_sa_auth = sa->ipsa_auth_alg;
714 	sap->sadb_sa_encrypt = sa->ipsa_encr_alg;
715 
716 	sap->sadb_key_len_a = sa->ipsa_authkeylen;
717 	sap->sadb_key_bits_a = sa->ipsa_authkeybits;
718 	bcopy(sa->ipsa_authkey,
719 	    sap->sadb_key_data_a, sap->sadb_key_len_a);
720 
721 	sap->sadb_key_len_e = sa->ipsa_encrkeylen;
722 	sap->sadb_key_bits_e = sa->ipsa_encrkeybits;
723 	bcopy(sa->ipsa_encrkey,
724 	    sap->sadb_key_data_e, sap->sadb_key_len_e);
725 
726 	mp->b_wptr += sizeof (dl_ct_ipsec_t) + sizeof (dl_ct_ipsec_key_t);
727 	return (B_TRUE);
728 }
729 
730 /*
731  * Called from AH or ESP to format a message which will be used to inform
732  * IPsec-acceleration-capable ills of a SADB change.
733  * (It is not possible to send the message to IP directly from this function
734  * since the SA, if any, is locked during the call).
735  *
736  * dl_operation: DL_CONTROL_REQ operation (add, delete, update, etc)
737  * sa_type: identifies whether the operation applies to AH or ESP
738  *	(must be one of SADB_SATYPE_AH or SADB_SATYPE_ESP)
739  * sa: Pointer to an SA.  Must be non-NULL and locked
740  *	for ADD, DELETE, GET, and UPDATE operations.
741  * This function returns an mblk chain that must be passed to IP
742  * for forwarding to the IPsec capable providers.
743  */
744 mblk_t *
745 sadb_fmt_sa_req(uint_t dl_operation, uint_t sa_type, ipsa_t *sa,
746     boolean_t is_inbound)
747 {
748 	mblk_t *mp;
749 	dl_control_req_t *ctrl;
750 	boolean_t need_key = B_FALSE;
751 	mblk_t *ctl_mp = NULL;
752 	ipsec_ctl_t *ctl;
753 
754 	/*
755 	 * 1 allocate and initialize DL_CONTROL_REQ M_PROTO
756 	 * 2 if a key is needed for the operation
757 	 *    2.1 initialize key
758 	 *    2.2 if a full SA is needed for the operation
759 	 *	2.2.1 initialize full SA info
760 	 * 3 return message; caller will call ill_ipsec_capab_send_all()
761 	 * to send the resulting message to IPsec capable ills.
762 	 */
763 
764 	ASSERT(sa_type == SADB_SATYPE_AH || sa_type == SADB_SATYPE_ESP);
765 
766 	/*
767 	 * Allocate DL_CONTROL_REQ M_PROTO
768 	 * We allocate room for the SA even if it's not needed
769 	 * by some of the operations (for example flush)
770 	 */
771 	mp = allocb(sizeof (dl_control_req_t) +
772 	    sizeof (dl_ct_ipsec_key_t) + sizeof (dl_ct_ipsec_t), BPRI_HI);
773 	if (mp == NULL)
774 		return (NULL);
775 	mp->b_datap->db_type = M_PROTO;
776 
777 	/* initialize dl_control_req_t */
778 	ctrl = (dl_control_req_t *)mp->b_wptr;
779 	ctrl->dl_primitive = DL_CONTROL_REQ;
780 	ctrl->dl_operation = dl_operation;
781 	ctrl->dl_type = sa_type == SADB_SATYPE_AH ? DL_CT_IPSEC_AH :
782 	    DL_CT_IPSEC_ESP;
783 	ctrl->dl_key_offset = sizeof (dl_control_req_t);
784 	ctrl->dl_key_length = sizeof (dl_ct_ipsec_key_t);
785 	ctrl->dl_data_offset = sizeof (dl_control_req_t) +
786 	    sizeof (dl_ct_ipsec_key_t);
787 	ctrl->dl_data_length = sizeof (dl_ct_ipsec_t);
788 	mp->b_wptr += sizeof (dl_control_req_t);
789 
790 	if ((dl_operation == DL_CO_SET) || (dl_operation == DL_CO_DELETE)) {
791 		ASSERT(sa != NULL);
792 		ASSERT(MUTEX_HELD(&sa->ipsa_lock));
793 
794 		need_key = B_TRUE;
795 
796 		/*
797 		 * Initialize key and SA data. Note that for some
798 		 * operations the SA data is ignored by the provider
799 		 * (delete, etc.)
800 		 */
801 		if (!sadb_req_from_sa(sa, mp, is_inbound))
802 			return (NULL);
803 	}
804 
805 	/* construct control message */
806 	ctl_mp = allocb(sizeof (ipsec_ctl_t), BPRI_HI);
807 	if (ctl_mp == NULL) {
808 		cmn_err(CE_WARN, "sadb_fmt_sa_req: allocb failed\n");
809 		freemsg(mp);
810 		return (NULL);
811 	}
812 
813 	ctl_mp->b_datap->db_type = M_CTL;
814 	ctl_mp->b_wptr += sizeof (ipsec_ctl_t);
815 	ctl_mp->b_cont = mp;
816 
817 	ctl = (ipsec_ctl_t *)ctl_mp->b_rptr;
818 	ctl->ipsec_ctl_type = IPSEC_CTL;
819 	ctl->ipsec_ctl_len  = sizeof (ipsec_ctl_t);
820 	ctl->ipsec_ctl_sa_type = sa_type;
821 
822 	if (need_key) {
823 		/*
824 		 * Keep an additional reference on SA, since it will be
825 		 * needed by IP to send control messages corresponding
826 		 * to that SA from its perimeter. IP will do a
827 		 * IPSA_REFRELE when done with the request.
828 		 */
829 		ASSERT(MUTEX_HELD(&sa->ipsa_lock));
830 		IPSA_REFHOLD(sa);
831 		ctl->ipsec_ctl_sa = sa;
832 	} else
833 		ctl->ipsec_ctl_sa = NULL;
834 
835 	return (ctl_mp);
836 }
837 
838 
839 /*
840  * Called by sadb_ill_download() to dump the entries for a specific
841  * fanout table.  For each SA entry in the table passed as argument,
842  * use mp as a template and constructs a full DL_CONTROL message, and
843  * call ill_dlpi_send(), provided by IP, to send the resulting
844  * messages to the ill.
845  */
846 static void
847 sadb_ill_df(ill_t *ill, mblk_t *mp, isaf_t *fanout, int num_entries,
848     boolean_t is_inbound)
849 {
850 	ipsa_t *walker;
851 	mblk_t *nmp, *salist;
852 	int i, error = 0;
853 	ip_stack_t	*ipst = ill->ill_ipst;
854 	netstack_t	*ns = ipst->ips_netstack;
855 
856 	IPSECHW_DEBUG(IPSECHW_SADB, ("sadb_ill_df: fanout at 0x%p ne=%d\n",
857 	    (void *)fanout, num_entries));
858 	/*
859 	 * For each IPSA hash bucket do:
860 	 *	- Hold the mutex
861 	 *	- Walk each entry, sending a corresponding request to IP
862 	 *	  for it.
863 	 */
864 	ASSERT(mp->b_datap->db_type == M_PROTO);
865 
866 	for (i = 0; i < num_entries; i++) {
867 		mutex_enter(&fanout[i].isaf_lock);
868 		salist = NULL;
869 
870 		for (walker = fanout[i].isaf_ipsa; walker != NULL;
871 		    walker = walker->ipsa_next) {
872 			IPSECHW_DEBUG(IPSECHW_SADB,
873 			    ("sadb_ill_df: sending SA to ill via IP \n"));
874 			/*
875 			 * Duplicate the template mp passed and
876 			 * complete DL_CONTROL_REQ data.
877 			 * To be more memory efficient, we could use
878 			 * dupb() for the M_CTL and copyb() for the M_PROTO
879 			 * as the M_CTL, since the M_CTL is the same for
880 			 * every SA entry passed down to IP for the same ill.
881 			 *
882 			 * Note that copymsg/copyb ensure that the new mblk
883 			 * is at least as large as the source mblk even if it's
884 			 * not using all its storage -- therefore, nmp
885 			 * has trailing space for sadb_req_from_sa to add
886 			 * the SA-specific bits.
887 			 */
888 			mutex_enter(&walker->ipsa_lock);
889 			if (ipsec_capab_match(ill,
890 			    ill->ill_phyint->phyint_ifindex, ill->ill_isv6,
891 			    walker, ns)) {
892 				nmp = copymsg(mp);
893 				if (nmp == NULL) {
894 					IPSECHW_DEBUG(IPSECHW_SADB,
895 					    ("sadb_ill_df: alloc error\n"));
896 					error = ENOMEM;
897 					mutex_exit(&walker->ipsa_lock);
898 					break;
899 				}
900 				if (sadb_req_from_sa(walker, nmp, is_inbound)) {
901 					nmp->b_next = salist;
902 					salist = nmp;
903 				}
904 			}
905 			mutex_exit(&walker->ipsa_lock);
906 		}
907 		mutex_exit(&fanout[i].isaf_lock);
908 		while (salist != NULL) {
909 			nmp = salist;
910 			salist = nmp->b_next;
911 			nmp->b_next = NULL;
912 			ill_dlpi_send(ill, nmp);
913 		}
914 		if (error != 0)
915 			break;	/* out of for loop. */
916 	}
917 }
918 
919 /*
920  * Called by ill_ipsec_capab_add(). Sends a copy of the SADB of
921  * the type specified by sa_type to the specified ill.
922  *
923  * We call for each fanout table defined by the SADB (one per
924  * protocol). sadb_ill_df() finally calls ill_dlpi_send() for
925  * each SADB entry in order to send a corresponding DL_CONTROL_REQ
926  * message to the ill.
927  */
928 void
929 sadb_ill_download(ill_t *ill, uint_t sa_type)
930 {
931 	mblk_t *protomp;	/* prototype message */
932 	dl_control_req_t *ctrl;
933 	sadbp_t *spp;
934 	sadb_t *sp;
935 	int dlt;
936 	ip_stack_t	*ipst = ill->ill_ipst;
937 	netstack_t	*ns = ipst->ips_netstack;
938 
939 	ASSERT(sa_type == SADB_SATYPE_AH || sa_type == SADB_SATYPE_ESP);
940 
941 	/*
942 	 * Allocate and initialize prototype answer. A duplicate for
943 	 * each SA is sent down to the interface.
944 	 */
945 
946 	/* DL_CONTROL_REQ M_PROTO mblk_t */
947 	protomp = allocb(sizeof (dl_control_req_t) +
948 	    sizeof (dl_ct_ipsec_key_t) + sizeof (dl_ct_ipsec_t), BPRI_HI);
949 	if (protomp == NULL)
950 		return;
951 	protomp->b_datap->db_type = M_PROTO;
952 
953 	dlt = (sa_type == SADB_SATYPE_AH) ? DL_CT_IPSEC_AH : DL_CT_IPSEC_ESP;
954 	if (sa_type == SADB_SATYPE_ESP) {
955 		ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
956 
957 		spp = &espstack->esp_sadb;
958 	} else {
959 		ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
960 
961 		spp = &ahstack->ah_sadb;
962 	}
963 
964 	ctrl = (dl_control_req_t *)protomp->b_wptr;
965 	ctrl->dl_primitive = DL_CONTROL_REQ;
966 	ctrl->dl_operation = DL_CO_SET;
967 	ctrl->dl_type = dlt;
968 	ctrl->dl_key_offset = sizeof (dl_control_req_t);
969 	ctrl->dl_key_length = sizeof (dl_ct_ipsec_key_t);
970 	ctrl->dl_data_offset = sizeof (dl_control_req_t) +
971 	    sizeof (dl_ct_ipsec_key_t);
972 	ctrl->dl_data_length = sizeof (dl_ct_ipsec_t);
973 	protomp->b_wptr += sizeof (dl_control_req_t);
974 
975 	/*
976 	 * then for each SADB entry, we fill out the dl_ct_ipsec_key_t
977 	 * and dl_ct_ipsec_t
978 	 */
979 	sp = ill->ill_isv6 ? &(spp->s_v6) : &(spp->s_v4);
980 	sadb_ill_df(ill, protomp, sp->sdb_of, sp->sdb_hashsize, B_FALSE);
981 	sadb_ill_df(ill, protomp, sp->sdb_if, sp->sdb_hashsize, B_TRUE);
982 	freemsg(protomp);
983 }
984 
985 /*
986  * Call me to free up a security association fanout.  Use the forever
987  * variable to indicate freeing up the SAs (forever == B_FALSE, e.g.
988  * an SADB_FLUSH message), or destroying everything (forever == B_TRUE,
989  * when a module is unloaded).
990  */
991 static void
992 sadb_destroyer(isaf_t **tablep, uint_t numentries, boolean_t forever)
993 {
994 	int i;
995 	isaf_t *table = *tablep;
996 
997 	if (table == NULL)
998 		return;
999 
1000 	for (i = 0; i < numentries; i++) {
1001 		mutex_enter(&table[i].isaf_lock);
1002 		while (table[i].isaf_ipsa != NULL)
1003 			sadb_unlinkassoc(table[i].isaf_ipsa);
1004 		table[i].isaf_gen++;
1005 		mutex_exit(&table[i].isaf_lock);
1006 		if (forever)
1007 			mutex_destroy(&(table[i].isaf_lock));
1008 	}
1009 
1010 	if (forever) {
1011 		*tablep = NULL;
1012 		kmem_free(table, numentries * sizeof (*table));
1013 	}
1014 }
1015 
1016 /*
1017  * Entry points to sadb_destroyer().
1018  */
1019 static void
1020 sadb_flush(sadb_t *sp, netstack_t *ns)
1021 {
1022 	/*
1023 	 * Flush out each bucket, one at a time.  Were it not for keysock's
1024 	 * enforcement, there would be a subtlety where I could add on the
1025 	 * heels of a flush.  With keysock's enforcement, however, this
1026 	 * makes ESP's job easy.
1027 	 */
1028 	sadb_destroyer(&sp->sdb_of, sp->sdb_hashsize, B_FALSE);
1029 	sadb_destroyer(&sp->sdb_if, sp->sdb_hashsize, B_FALSE);
1030 
1031 	/* For each acquire, destroy it; leave the bucket mutex alone. */
1032 	sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_FALSE, ns);
1033 }
1034 
1035 static void
1036 sadb_destroy(sadb_t *sp, netstack_t *ns)
1037 {
1038 	sadb_destroyer(&sp->sdb_of, sp->sdb_hashsize, B_TRUE);
1039 	sadb_destroyer(&sp->sdb_if, sp->sdb_hashsize, B_TRUE);
1040 
1041 	/* For each acquire, destroy it, including the bucket mutex. */
1042 	sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_TRUE, ns);
1043 
1044 	ASSERT(sp->sdb_of == NULL);
1045 	ASSERT(sp->sdb_if == NULL);
1046 	ASSERT(sp->sdb_acq == NULL);
1047 }
1048 
1049 static void
1050 sadb_send_flush_req(sadbp_t *spp)
1051 {
1052 	mblk_t *ctl_mp;
1053 
1054 	/*
1055 	 * we've been unplumbed, or never were plumbed; don't go there.
1056 	 */
1057 	if (spp->s_ip_q == NULL)
1058 		return;
1059 
1060 	/* have IP send a flush msg to the IPsec accelerators */
1061 	ctl_mp = sadb_fmt_sa_req(DL_CO_FLUSH, spp->s_satype, NULL, B_TRUE);
1062 	if (ctl_mp != NULL)
1063 		putnext(spp->s_ip_q, ctl_mp);
1064 }
1065 
1066 void
1067 sadbp_flush(sadbp_t *spp, netstack_t *ns)
1068 {
1069 	sadb_flush(&spp->s_v4, ns);
1070 	sadb_flush(&spp->s_v6, ns);
1071 
1072 	sadb_send_flush_req(spp);
1073 }
1074 
1075 void
1076 sadbp_destroy(sadbp_t *spp, netstack_t *ns)
1077 {
1078 	sadb_destroy(&spp->s_v4, ns);
1079 	sadb_destroy(&spp->s_v6, ns);
1080 
1081 	sadb_send_flush_req(spp);
1082 	if (spp->s_satype == SADB_SATYPE_AH) {
1083 		ipsec_stack_t	*ipss = ns->netstack_ipsec;
1084 
1085 		ip_drop_unregister(&ipss->ipsec_sadb_dropper);
1086 	}
1087 }
1088 
1089 
1090 /*
1091  * Check hard vs. soft lifetimes.  If there's a reality mismatch (e.g.
1092  * soft lifetimes > hard lifetimes) return an appropriate diagnostic for
1093  * EINVAL.
1094  */
1095 int
1096 sadb_hardsoftchk(sadb_lifetime_t *hard, sadb_lifetime_t *soft)
1097 {
1098 	if (hard == NULL || soft == NULL)
1099 		return (0);
1100 
1101 	if (hard->sadb_lifetime_allocations != 0 &&
1102 	    soft->sadb_lifetime_allocations != 0 &&
1103 	    hard->sadb_lifetime_allocations < soft->sadb_lifetime_allocations)
1104 		return (SADB_X_DIAGNOSTIC_ALLOC_HSERR);
1105 
1106 	if (hard->sadb_lifetime_bytes != 0 &&
1107 	    soft->sadb_lifetime_bytes != 0 &&
1108 	    hard->sadb_lifetime_bytes < soft->sadb_lifetime_bytes)
1109 		return (SADB_X_DIAGNOSTIC_BYTES_HSERR);
1110 
1111 	if (hard->sadb_lifetime_addtime != 0 &&
1112 	    soft->sadb_lifetime_addtime != 0 &&
1113 	    hard->sadb_lifetime_addtime < soft->sadb_lifetime_addtime)
1114 		return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
1115 
1116 	if (hard->sadb_lifetime_usetime != 0 &&
1117 	    soft->sadb_lifetime_usetime != 0 &&
1118 	    hard->sadb_lifetime_usetime < soft->sadb_lifetime_usetime)
1119 		return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
1120 
1121 	return (0);
1122 }
1123 
1124 /*
1125  * Clone a security association for the purposes of inserting a single SA
1126  * into inbound and outbound tables respectively. This function should only
1127  * be called from sadb_common_add().
1128  */
1129 static ipsa_t *
1130 sadb_cloneassoc(ipsa_t *ipsa)
1131 {
1132 	ipsa_t *newbie;
1133 	boolean_t error = B_FALSE;
1134 
1135 	ASSERT(MUTEX_NOT_HELD(&(ipsa->ipsa_lock)));
1136 
1137 	newbie = kmem_alloc(sizeof (ipsa_t), KM_NOSLEEP);
1138 	if (newbie == NULL)
1139 		return (NULL);
1140 
1141 	/* Copy over what we can. */
1142 	*newbie = *ipsa;
1143 
1144 	/* bzero and initialize locks, in case *_init() allocates... */
1145 	mutex_init(&newbie->ipsa_lock, NULL, MUTEX_DEFAULT, NULL);
1146 
1147 	/*
1148 	 * While somewhat dain-bramaged, the most graceful way to
1149 	 * recover from errors is to keep plowing through the
1150 	 * allocations, and getting what I can.  It's easier to call
1151 	 * sadb_freeassoc() on the stillborn clone when all the
1152 	 * pointers aren't pointing to the parent's data.
1153 	 */
1154 
1155 	if (ipsa->ipsa_authkey != NULL) {
1156 		newbie->ipsa_authkey = kmem_alloc(newbie->ipsa_authkeylen,
1157 		    KM_NOSLEEP);
1158 		if (newbie->ipsa_authkey == NULL) {
1159 			error = B_TRUE;
1160 		} else {
1161 			bcopy(ipsa->ipsa_authkey, newbie->ipsa_authkey,
1162 			    newbie->ipsa_authkeylen);
1163 
1164 			newbie->ipsa_kcfauthkey.ck_data =
1165 			    newbie->ipsa_authkey;
1166 		}
1167 
1168 		if (newbie->ipsa_amech.cm_param != NULL) {
1169 			newbie->ipsa_amech.cm_param =
1170 			    (char *)&newbie->ipsa_mac_len;
1171 		}
1172 	}
1173 
1174 	if (ipsa->ipsa_encrkey != NULL) {
1175 		newbie->ipsa_encrkey = kmem_alloc(newbie->ipsa_encrkeylen,
1176 		    KM_NOSLEEP);
1177 		if (newbie->ipsa_encrkey == NULL) {
1178 			error = B_TRUE;
1179 		} else {
1180 			bcopy(ipsa->ipsa_encrkey, newbie->ipsa_encrkey,
1181 			    newbie->ipsa_encrkeylen);
1182 
1183 			newbie->ipsa_kcfencrkey.ck_data =
1184 			    newbie->ipsa_encrkey;
1185 		}
1186 	}
1187 
1188 	newbie->ipsa_authtmpl = NULL;
1189 	newbie->ipsa_encrtmpl = NULL;
1190 	newbie->ipsa_haspeer = B_TRUE;
1191 
1192 	if (ipsa->ipsa_integ != NULL) {
1193 		newbie->ipsa_integ = kmem_alloc(newbie->ipsa_integlen,
1194 		    KM_NOSLEEP);
1195 		if (newbie->ipsa_integ == NULL) {
1196 			error = B_TRUE;
1197 		} else {
1198 			bcopy(ipsa->ipsa_integ, newbie->ipsa_integ,
1199 			    newbie->ipsa_integlen);
1200 		}
1201 	}
1202 
1203 	if (ipsa->ipsa_sens != NULL) {
1204 		newbie->ipsa_sens = kmem_alloc(newbie->ipsa_senslen,
1205 		    KM_NOSLEEP);
1206 		if (newbie->ipsa_sens == NULL) {
1207 			error = B_TRUE;
1208 		} else {
1209 			bcopy(ipsa->ipsa_sens, newbie->ipsa_sens,
1210 			    newbie->ipsa_senslen);
1211 		}
1212 	}
1213 
1214 	if (ipsa->ipsa_src_cid != NULL) {
1215 		newbie->ipsa_src_cid = ipsa->ipsa_src_cid;
1216 		IPSID_REFHOLD(ipsa->ipsa_src_cid);
1217 	}
1218 
1219 	if (ipsa->ipsa_dst_cid != NULL) {
1220 		newbie->ipsa_dst_cid = ipsa->ipsa_dst_cid;
1221 		IPSID_REFHOLD(ipsa->ipsa_dst_cid);
1222 	}
1223 
1224 	if (error) {
1225 		sadb_freeassoc(newbie);
1226 		return (NULL);
1227 	}
1228 
1229 	return (newbie);
1230 }
1231 
1232 /*
1233  * Initialize a SADB address extension at the address specified by addrext.
1234  * Return a pointer to the end of the new address extension.
1235  */
1236 static uint8_t *
1237 sadb_make_addr_ext(uint8_t *start, uint8_t *end, uint16_t exttype,
1238     sa_family_t af, uint32_t *addr, uint16_t port, uint8_t proto, int prefix)
1239 {
1240 	struct sockaddr_in *sin;
1241 	struct sockaddr_in6 *sin6;
1242 	uint8_t *cur = start;
1243 	int addrext_len;
1244 	int sin_len;
1245 	sadb_address_t *addrext	= (sadb_address_t *)cur;
1246 
1247 	if (cur == NULL)
1248 		return (NULL);
1249 
1250 	cur += sizeof (*addrext);
1251 	if (cur > end)
1252 		return (NULL);
1253 
1254 	addrext->sadb_address_proto = proto;
1255 	addrext->sadb_address_prefixlen = prefix;
1256 	addrext->sadb_address_reserved = 0;
1257 	addrext->sadb_address_exttype = exttype;
1258 
1259 	switch (af) {
1260 	case AF_INET:
1261 		sin = (struct sockaddr_in *)cur;
1262 		sin_len = sizeof (*sin);
1263 		cur += sin_len;
1264 		if (cur > end)
1265 			return (NULL);
1266 
1267 		sin->sin_family = af;
1268 		bzero(sin->sin_zero, sizeof (sin->sin_zero));
1269 		sin->sin_port = port;
1270 		IPSA_COPY_ADDR(&sin->sin_addr, addr, af);
1271 		break;
1272 	case AF_INET6:
1273 		sin6 = (struct sockaddr_in6 *)cur;
1274 		sin_len = sizeof (*sin6);
1275 		cur += sin_len;
1276 		if (cur > end)
1277 			return (NULL);
1278 
1279 		bzero(sin6, sizeof (*sin6));
1280 		sin6->sin6_family = af;
1281 		sin6->sin6_port = port;
1282 		IPSA_COPY_ADDR(&sin6->sin6_addr, addr, af);
1283 		break;
1284 	}
1285 
1286 	addrext_len = roundup(cur - start, sizeof (uint64_t));
1287 	addrext->sadb_address_len = SADB_8TO64(addrext_len);
1288 
1289 	cur = start + addrext_len;
1290 	if (cur > end)
1291 		cur = NULL;
1292 
1293 	return (cur);
1294 }
1295 
1296 /*
1297  * Construct a key management cookie extension.
1298  */
1299 
1300 static uint8_t *
1301 sadb_make_kmc_ext(uint8_t *cur, uint8_t *end, uint32_t kmp, uint32_t kmc)
1302 {
1303 	sadb_x_kmc_t *kmcext = (sadb_x_kmc_t *)cur;
1304 
1305 	if (cur == NULL)
1306 		return (NULL);
1307 
1308 	cur += sizeof (*kmcext);
1309 
1310 	if (cur > end)
1311 		return (NULL);
1312 
1313 	kmcext->sadb_x_kmc_len = SADB_8TO64(sizeof (*kmcext));
1314 	kmcext->sadb_x_kmc_exttype = SADB_X_EXT_KM_COOKIE;
1315 	kmcext->sadb_x_kmc_proto = kmp;
1316 	kmcext->sadb_x_kmc_cookie = kmc;
1317 	kmcext->sadb_x_kmc_reserved = 0;
1318 
1319 	return (cur);
1320 }
1321 
1322 /*
1323  * Given an original message header with sufficient space following it, and an
1324  * SA, construct a full PF_KEY message with all of the relevant extensions.
1325  * This is mostly used for SADB_GET, and SADB_DUMP.
1326  */
1327 static mblk_t *
1328 sadb_sa2msg(ipsa_t *ipsa, sadb_msg_t *samsg)
1329 {
1330 	int alloclen, addrsize, paddrsize, authsize, encrsize;
1331 	int srcidsize, dstidsize;
1332 	sa_family_t fam, pfam;	/* Address family for SADB_EXT_ADDRESS */
1333 				/* src/dst and proxy sockaddrs. */
1334 	/*
1335 	 * The following are pointers into the PF_KEY message this PF_KEY
1336 	 * message creates.
1337 	 */
1338 	sadb_msg_t *newsamsg;
1339 	sadb_sa_t *assoc;
1340 	sadb_lifetime_t *lt;
1341 	sadb_key_t *key;
1342 	sadb_ident_t *ident;
1343 	sadb_sens_t *sens;
1344 	sadb_ext_t *walker;	/* For when we need a generic ext. pointer. */
1345 	sadb_x_pair_t *pair_ext;
1346 
1347 	mblk_t *mp;
1348 	uint64_t *bitmap;
1349 	uint8_t *cur, *end;
1350 	/* These indicate the presence of the above extension fields. */
1351 	boolean_t soft, hard, isrc, idst, auth, encr, sensinteg, srcid, dstid;
1352 	boolean_t paired;
1353 	uint32_t otherspi;
1354 
1355 	/* First off, figure out the allocation length for this message. */
1356 
1357 	/*
1358 	 * Constant stuff.  This includes base, SA, address (src, dst),
1359 	 * and lifetime (current).
1360 	 */
1361 	alloclen = sizeof (sadb_msg_t) + sizeof (sadb_sa_t) +
1362 	    sizeof (sadb_lifetime_t);
1363 
1364 	fam = ipsa->ipsa_addrfam;
1365 	switch (fam) {
1366 	case AF_INET:
1367 		addrsize = roundup(sizeof (struct sockaddr_in) +
1368 		    sizeof (sadb_address_t), sizeof (uint64_t));
1369 		break;
1370 	case AF_INET6:
1371 		addrsize = roundup(sizeof (struct sockaddr_in6) +
1372 		    sizeof (sadb_address_t), sizeof (uint64_t));
1373 		break;
1374 	default:
1375 		return (NULL);
1376 	}
1377 	/*
1378 	 * Allocate TWO address extensions, for source and destination.
1379 	 * (Thus, the * 2.)
1380 	 */
1381 	alloclen += addrsize * 2;
1382 	if (ipsa->ipsa_flags & IPSA_F_NATT_REM)
1383 		alloclen += addrsize;
1384 	if (ipsa->ipsa_flags & IPSA_F_NATT_LOC)
1385 		alloclen += addrsize;
1386 
1387 	if (ipsa->ipsa_flags & IPSA_F_PAIRED) {
1388 		paired = B_TRUE;
1389 		alloclen += sizeof (sadb_x_pair_t);
1390 		otherspi = ipsa->ipsa_otherspi;
1391 	} else {
1392 		paired = B_FALSE;
1393 	}
1394 
1395 	/* How 'bout other lifetimes? */
1396 	if (ipsa->ipsa_softaddlt != 0 || ipsa->ipsa_softuselt != 0 ||
1397 	    ipsa->ipsa_softbyteslt != 0 || ipsa->ipsa_softalloc != 0) {
1398 		alloclen += sizeof (sadb_lifetime_t);
1399 		soft = B_TRUE;
1400 	} else {
1401 		soft = B_FALSE;
1402 	}
1403 
1404 	if (ipsa->ipsa_hardaddlt != 0 || ipsa->ipsa_harduselt != 0 ||
1405 	    ipsa->ipsa_hardbyteslt != 0 || ipsa->ipsa_hardalloc != 0) {
1406 		alloclen += sizeof (sadb_lifetime_t);
1407 		hard = B_TRUE;
1408 	} else {
1409 		hard = B_FALSE;
1410 	}
1411 
1412 	/* Inner addresses. */
1413 	if (ipsa->ipsa_innerfam == 0) {
1414 		isrc = B_FALSE;
1415 		idst = B_FALSE;
1416 	} else {
1417 		pfam = ipsa->ipsa_innerfam;
1418 		switch (pfam) {
1419 		case AF_INET6:
1420 			paddrsize = roundup(sizeof (struct sockaddr_in6) +
1421 			    sizeof (sadb_address_t), sizeof (uint64_t));
1422 			break;
1423 		case AF_INET:
1424 			paddrsize = roundup(sizeof (struct sockaddr_in) +
1425 			    sizeof (sadb_address_t), sizeof (uint64_t));
1426 			break;
1427 		default:
1428 			cmn_err(CE_PANIC,
1429 			    "IPsec SADB: Proxy length failure.\n");
1430 			break;
1431 		}
1432 		isrc = B_TRUE;
1433 		idst = B_TRUE;
1434 		alloclen += 2 * paddrsize;
1435 	}
1436 
1437 	/* For the following fields, assume that length != 0 ==> stuff */
1438 	if (ipsa->ipsa_authkeylen != 0) {
1439 		authsize = roundup(sizeof (sadb_key_t) + ipsa->ipsa_authkeylen,
1440 		    sizeof (uint64_t));
1441 		alloclen += authsize;
1442 		auth = B_TRUE;
1443 	} else {
1444 		auth = B_FALSE;
1445 	}
1446 
1447 	if (ipsa->ipsa_encrkeylen != 0) {
1448 		encrsize = roundup(sizeof (sadb_key_t) + ipsa->ipsa_encrkeylen,
1449 		    sizeof (uint64_t));
1450 		alloclen += encrsize;
1451 		encr = B_TRUE;
1452 	} else {
1453 		encr = B_FALSE;
1454 	}
1455 
1456 	/* No need for roundup on sens and integ. */
1457 	if (ipsa->ipsa_integlen != 0 || ipsa->ipsa_senslen != 0) {
1458 		alloclen += sizeof (sadb_key_t) + ipsa->ipsa_integlen +
1459 		    ipsa->ipsa_senslen;
1460 		sensinteg = B_TRUE;
1461 	} else {
1462 		sensinteg = B_FALSE;
1463 	}
1464 
1465 	/*
1466 	 * Must use strlen() here for lengths.	Identities use NULL
1467 	 * pointers to indicate their nonexistence.
1468 	 */
1469 	if (ipsa->ipsa_src_cid != NULL) {
1470 		srcidsize = roundup(sizeof (sadb_ident_t) +
1471 		    strlen(ipsa->ipsa_src_cid->ipsid_cid) + 1,
1472 		    sizeof (uint64_t));
1473 		alloclen += srcidsize;
1474 		srcid = B_TRUE;
1475 	} else {
1476 		srcid = B_FALSE;
1477 	}
1478 
1479 	if (ipsa->ipsa_dst_cid != NULL) {
1480 		dstidsize = roundup(sizeof (sadb_ident_t) +
1481 		    strlen(ipsa->ipsa_dst_cid->ipsid_cid) + 1,
1482 		    sizeof (uint64_t));
1483 		alloclen += dstidsize;
1484 		dstid = B_TRUE;
1485 	} else {
1486 		dstid = B_FALSE;
1487 	}
1488 
1489 	if ((ipsa->ipsa_kmp != 0) || (ipsa->ipsa_kmc != 0))
1490 		alloclen += sizeof (sadb_x_kmc_t);
1491 
1492 	/* Make sure the allocation length is a multiple of 8 bytes. */
1493 	ASSERT((alloclen & 0x7) == 0);
1494 
1495 	/* XXX Possibly make it esballoc, with a bzero-ing free_ftn. */
1496 	mp = allocb(alloclen, BPRI_HI);
1497 	if (mp == NULL)
1498 		return (NULL);
1499 
1500 	mp->b_wptr += alloclen;
1501 	end = mp->b_wptr;
1502 	newsamsg = (sadb_msg_t *)mp->b_rptr;
1503 	*newsamsg = *samsg;
1504 	newsamsg->sadb_msg_len = (uint16_t)SADB_8TO64(alloclen);
1505 
1506 	mutex_enter(&ipsa->ipsa_lock);	/* Since I'm grabbing SA fields... */
1507 
1508 	newsamsg->sadb_msg_satype = ipsa->ipsa_type;
1509 
1510 	assoc = (sadb_sa_t *)(newsamsg + 1);
1511 	assoc->sadb_sa_len = SADB_8TO64(sizeof (*assoc));
1512 	assoc->sadb_sa_exttype = SADB_EXT_SA;
1513 	assoc->sadb_sa_spi = ipsa->ipsa_spi;
1514 	assoc->sadb_sa_replay = ipsa->ipsa_replay_wsize;
1515 	assoc->sadb_sa_state = ipsa->ipsa_state;
1516 	assoc->sadb_sa_auth = ipsa->ipsa_auth_alg;
1517 	assoc->sadb_sa_encrypt = ipsa->ipsa_encr_alg;
1518 	assoc->sadb_sa_flags = ipsa->ipsa_flags;
1519 
1520 	lt = (sadb_lifetime_t *)(assoc + 1);
1521 	lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1522 	lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
1523 	/* We do not support the concept. */
1524 	lt->sadb_lifetime_allocations = 0;
1525 	lt->sadb_lifetime_bytes = ipsa->ipsa_bytes;
1526 	lt->sadb_lifetime_addtime = ipsa->ipsa_addtime;
1527 	lt->sadb_lifetime_usetime = ipsa->ipsa_usetime;
1528 
1529 	if (hard) {
1530 		lt++;
1531 		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1532 		lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
1533 		lt->sadb_lifetime_allocations = ipsa->ipsa_hardalloc;
1534 		lt->sadb_lifetime_bytes = ipsa->ipsa_hardbyteslt;
1535 		lt->sadb_lifetime_addtime = ipsa->ipsa_hardaddlt;
1536 		lt->sadb_lifetime_usetime = ipsa->ipsa_harduselt;
1537 	}
1538 
1539 	if (soft) {
1540 		lt++;
1541 		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1542 		lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
1543 		lt->sadb_lifetime_allocations = ipsa->ipsa_softalloc;
1544 		lt->sadb_lifetime_bytes = ipsa->ipsa_softbyteslt;
1545 		lt->sadb_lifetime_addtime = ipsa->ipsa_softaddlt;
1546 		lt->sadb_lifetime_usetime = ipsa->ipsa_softuselt;
1547 	}
1548 
1549 	cur = (uint8_t *)(lt + 1);
1550 
1551 	/* NOTE:  Don't fill in ports here if we are a tunnel-mode SA. */
1552 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, fam,
1553 	    ipsa->ipsa_srcaddr, (!isrc && !idst) ? SA_SRCPORT(ipsa) : 0,
1554 	    SA_PROTO(ipsa), 0);
1555 	if (cur == NULL) {
1556 		freemsg(mp);
1557 		mp = NULL;
1558 		goto bail;
1559 	}
1560 
1561 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, fam,
1562 	    ipsa->ipsa_dstaddr, (!isrc && !idst) ? SA_DSTPORT(ipsa) : 0,
1563 	    SA_PROTO(ipsa), 0);
1564 	if (cur == NULL) {
1565 		freemsg(mp);
1566 		mp = NULL;
1567 		goto bail;
1568 	}
1569 
1570 	if (ipsa->ipsa_flags & IPSA_F_NATT_LOC) {
1571 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_NATT_LOC,
1572 		    fam, &ipsa->ipsa_natt_addr_loc, ipsa->ipsa_local_nat_port,
1573 		    IPPROTO_UDP, 0);
1574 		if (cur == NULL) {
1575 			freemsg(mp);
1576 			mp = NULL;
1577 			goto bail;
1578 		}
1579 	}
1580 
1581 	if (ipsa->ipsa_flags & IPSA_F_NATT_REM) {
1582 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_NATT_REM,
1583 		    fam, &ipsa->ipsa_natt_addr_rem, ipsa->ipsa_remote_nat_port,
1584 		    IPPROTO_UDP, 0);
1585 		if (cur == NULL) {
1586 			freemsg(mp);
1587 			mp = NULL;
1588 			goto bail;
1589 		}
1590 	}
1591 
1592 	/* If we are a tunnel-mode SA, fill in the inner-selectors. */
1593 	if (isrc) {
1594 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
1595 		    pfam, ipsa->ipsa_innersrc, SA_SRCPORT(ipsa),
1596 		    SA_IPROTO(ipsa), ipsa->ipsa_innersrcpfx);
1597 		if (cur == NULL) {
1598 			freemsg(mp);
1599 			mp = NULL;
1600 			goto bail;
1601 		}
1602 	}
1603 
1604 	if (idst) {
1605 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
1606 		    pfam, ipsa->ipsa_innerdst, SA_DSTPORT(ipsa),
1607 		    SA_IPROTO(ipsa), ipsa->ipsa_innerdstpfx);
1608 		if (cur == NULL) {
1609 			freemsg(mp);
1610 			mp = NULL;
1611 			goto bail;
1612 		}
1613 	}
1614 
1615 	if ((ipsa->ipsa_kmp != 0) || (ipsa->ipsa_kmc != 0)) {
1616 		cur = sadb_make_kmc_ext(cur, end,
1617 		    ipsa->ipsa_kmp, ipsa->ipsa_kmc);
1618 		if (cur == NULL) {
1619 			freemsg(mp);
1620 			mp = NULL;
1621 			goto bail;
1622 		}
1623 	}
1624 
1625 	walker = (sadb_ext_t *)cur;
1626 	if (auth) {
1627 		key = (sadb_key_t *)walker;
1628 		key->sadb_key_len = SADB_8TO64(authsize);
1629 		key->sadb_key_exttype = SADB_EXT_KEY_AUTH;
1630 		key->sadb_key_bits = ipsa->ipsa_authkeybits;
1631 		key->sadb_key_reserved = 0;
1632 		bcopy(ipsa->ipsa_authkey, key + 1, ipsa->ipsa_authkeylen);
1633 		walker = (sadb_ext_t *)((uint64_t *)walker +
1634 		    walker->sadb_ext_len);
1635 	}
1636 
1637 	if (encr) {
1638 		key = (sadb_key_t *)walker;
1639 		key->sadb_key_len = SADB_8TO64(encrsize);
1640 		key->sadb_key_exttype = SADB_EXT_KEY_ENCRYPT;
1641 		key->sadb_key_bits = ipsa->ipsa_encrkeybits;
1642 		key->sadb_key_reserved = 0;
1643 		bcopy(ipsa->ipsa_encrkey, key + 1, ipsa->ipsa_encrkeylen);
1644 		walker = (sadb_ext_t *)((uint64_t *)walker +
1645 		    walker->sadb_ext_len);
1646 	}
1647 
1648 	if (srcid) {
1649 		ident = (sadb_ident_t *)walker;
1650 		ident->sadb_ident_len = SADB_8TO64(srcidsize);
1651 		ident->sadb_ident_exttype = SADB_EXT_IDENTITY_SRC;
1652 		ident->sadb_ident_type = ipsa->ipsa_src_cid->ipsid_type;
1653 		ident->sadb_ident_id = 0;
1654 		ident->sadb_ident_reserved = 0;
1655 		(void) strcpy((char *)(ident + 1),
1656 		    ipsa->ipsa_src_cid->ipsid_cid);
1657 		walker = (sadb_ext_t *)((uint64_t *)walker +
1658 		    walker->sadb_ext_len);
1659 	}
1660 
1661 	if (dstid) {
1662 		ident = (sadb_ident_t *)walker;
1663 		ident->sadb_ident_len = SADB_8TO64(dstidsize);
1664 		ident->sadb_ident_exttype = SADB_EXT_IDENTITY_DST;
1665 		ident->sadb_ident_type = ipsa->ipsa_dst_cid->ipsid_type;
1666 		ident->sadb_ident_id = 0;
1667 		ident->sadb_ident_reserved = 0;
1668 		(void) strcpy((char *)(ident + 1),
1669 		    ipsa->ipsa_dst_cid->ipsid_cid);
1670 		walker = (sadb_ext_t *)((uint64_t *)walker +
1671 		    walker->sadb_ext_len);
1672 	}
1673 
1674 	if (sensinteg) {
1675 		sens = (sadb_sens_t *)walker;
1676 		sens->sadb_sens_len = SADB_8TO64(sizeof (sadb_sens_t *) +
1677 		    ipsa->ipsa_senslen + ipsa->ipsa_integlen);
1678 		sens->sadb_sens_dpd = ipsa->ipsa_dpd;
1679 		sens->sadb_sens_sens_level = ipsa->ipsa_senslevel;
1680 		sens->sadb_sens_integ_level = ipsa->ipsa_integlevel;
1681 		sens->sadb_sens_sens_len = SADB_8TO64(ipsa->ipsa_senslen);
1682 		sens->sadb_sens_integ_len = SADB_8TO64(ipsa->ipsa_integlen);
1683 		sens->sadb_sens_reserved = 0;
1684 		bitmap = (uint64_t *)(sens + 1);
1685 		if (ipsa->ipsa_sens != NULL) {
1686 			bcopy(ipsa->ipsa_sens, bitmap, ipsa->ipsa_senslen);
1687 			bitmap += sens->sadb_sens_sens_len;
1688 		}
1689 		if (ipsa->ipsa_integ != NULL)
1690 			bcopy(ipsa->ipsa_integ, bitmap, ipsa->ipsa_integlen);
1691 		walker = (sadb_ext_t *)((uint64_t *)walker +
1692 		    walker->sadb_ext_len);
1693 	}
1694 
1695 	if (paired) {
1696 		pair_ext = (sadb_x_pair_t *)walker;
1697 
1698 		pair_ext->sadb_x_pair_len = SADB_8TO64(sizeof (sadb_x_pair_t));
1699 		pair_ext->sadb_x_pair_exttype = SADB_X_EXT_PAIR;
1700 		pair_ext->sadb_x_pair_spi = otherspi;
1701 
1702 		walker = (sadb_ext_t *)((uint64_t *)walker +
1703 		    walker->sadb_ext_len);
1704 	}
1705 
1706 bail:
1707 	/* Pardon any delays... */
1708 	mutex_exit(&ipsa->ipsa_lock);
1709 
1710 	return (mp);
1711 }
1712 
1713 /*
1714  * Strip out key headers or unmarked headers (SADB_EXT_KEY_*, SADB_EXT_UNKNOWN)
1715  * and adjust base message accordingly.
1716  *
1717  * Assume message is pulled up in one piece of contiguous memory.
1718  *
1719  * Say if we start off with:
1720  *
1721  * +------+----+-------------+-----------+---------------+---------------+
1722  * | base | SA | source addr | dest addr | rsrvd. or key | soft lifetime |
1723  * +------+----+-------------+-----------+---------------+---------------+
1724  *
1725  * we will end up with
1726  *
1727  * +------+----+-------------+-----------+---------------+
1728  * | base | SA | source addr | dest addr | soft lifetime |
1729  * +------+----+-------------+-----------+---------------+
1730  */
1731 static void
1732 sadb_strip(sadb_msg_t *samsg)
1733 {
1734 	sadb_ext_t *ext;
1735 	uint8_t *target = NULL;
1736 	uint8_t *msgend;
1737 	int sofar = SADB_8TO64(sizeof (*samsg));
1738 	int copylen;
1739 
1740 	ext = (sadb_ext_t *)(samsg + 1);
1741 	msgend = (uint8_t *)samsg;
1742 	msgend += SADB_64TO8(samsg->sadb_msg_len);
1743 	while ((uint8_t *)ext < msgend) {
1744 		if (ext->sadb_ext_type == SADB_EXT_RESERVED ||
1745 		    ext->sadb_ext_type == SADB_EXT_KEY_AUTH ||
1746 		    ext->sadb_ext_type == SADB_EXT_KEY_ENCRYPT) {
1747 			/*
1748 			 * Aha!	 I found a header to be erased.
1749 			 */
1750 
1751 			if (target != NULL) {
1752 				/*
1753 				 * If I had a previous header to be erased,
1754 				 * copy over it.  I can get away with just
1755 				 * copying backwards because the target will
1756 				 * always be 8 bytes behind the source.
1757 				 */
1758 				copylen = ((uint8_t *)ext) - (target +
1759 				    SADB_64TO8(
1760 				    ((sadb_ext_t *)target)->sadb_ext_len));
1761 				ovbcopy(((uint8_t *)ext - copylen), target,
1762 				    copylen);
1763 				target += copylen;
1764 				((sadb_ext_t *)target)->sadb_ext_len =
1765 				    SADB_8TO64(((uint8_t *)ext) - target +
1766 				    SADB_64TO8(ext->sadb_ext_len));
1767 			} else {
1768 				target = (uint8_t *)ext;
1769 			}
1770 		} else {
1771 			sofar += ext->sadb_ext_len;
1772 		}
1773 
1774 		ext = (sadb_ext_t *)(((uint64_t *)ext) + ext->sadb_ext_len);
1775 	}
1776 
1777 	ASSERT((uint8_t *)ext == msgend);
1778 
1779 	if (target != NULL) {
1780 		copylen = ((uint8_t *)ext) - (target +
1781 		    SADB_64TO8(((sadb_ext_t *)target)->sadb_ext_len));
1782 		if (copylen != 0)
1783 			ovbcopy(((uint8_t *)ext - copylen), target, copylen);
1784 	}
1785 
1786 	/* Adjust samsg. */
1787 	samsg->sadb_msg_len = (uint16_t)sofar;
1788 
1789 	/* Assume all of the rest is cleared by caller in sadb_pfkey_echo(). */
1790 }
1791 
1792 /*
1793  * AH needs to send an error to PF_KEY.	 Assume mp points to an M_CTL
1794  * followed by an M_DATA with a PF_KEY message in it.  The serial of
1795  * the sending keysock instance is included.
1796  */
1797 void
1798 sadb_pfkey_error(queue_t *pfkey_q, mblk_t *mp, int error, int diagnostic,
1799     uint_t serial)
1800 {
1801 	mblk_t *msg = mp->b_cont;
1802 	sadb_msg_t *samsg;
1803 	keysock_out_t *kso;
1804 
1805 	/*
1806 	 * Enough functions call this to merit a NULL queue check.
1807 	 */
1808 	if (pfkey_q == NULL) {
1809 		freemsg(mp);
1810 		return;
1811 	}
1812 
1813 	ASSERT(msg != NULL);
1814 	ASSERT((mp->b_wptr - mp->b_rptr) == sizeof (ipsec_info_t));
1815 	ASSERT((msg->b_wptr - msg->b_rptr) >= sizeof (sadb_msg_t));
1816 	samsg = (sadb_msg_t *)msg->b_rptr;
1817 	kso = (keysock_out_t *)mp->b_rptr;
1818 
1819 	kso->ks_out_type = KEYSOCK_OUT;
1820 	kso->ks_out_len = sizeof (*kso);
1821 	kso->ks_out_serial = serial;
1822 
1823 	/*
1824 	 * Only send the base message up in the event of an error.
1825 	 * Don't worry about bzero()-ing, because it was probably bogus
1826 	 * anyway.
1827 	 */
1828 	msg->b_wptr = msg->b_rptr + sizeof (*samsg);
1829 	samsg = (sadb_msg_t *)msg->b_rptr;
1830 	samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
1831 	samsg->sadb_msg_errno = (uint8_t)error;
1832 	if (diagnostic != SADB_X_DIAGNOSTIC_PRESET)
1833 		samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
1834 
1835 	putnext(pfkey_q, mp);
1836 }
1837 
1838 /*
1839  * Send a successful return packet back to keysock via the queue in pfkey_q.
1840  *
1841  * Often, an SA is associated with the reply message, it's passed in if needed,
1842  * and NULL if not.  BTW, that ipsa will have its refcnt appropriately held,
1843  * and the caller will release said refcnt.
1844  */
1845 void
1846 sadb_pfkey_echo(queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg,
1847     keysock_in_t *ksi, ipsa_t *ipsa)
1848 {
1849 	keysock_out_t *kso;
1850 	mblk_t *mp1;
1851 	sadb_msg_t *newsamsg;
1852 	uint8_t *oldend;
1853 
1854 	ASSERT((mp->b_cont != NULL) &&
1855 	    ((void *)samsg == (void *)mp->b_cont->b_rptr) &&
1856 	    ((void *)mp->b_rptr == (void *)ksi));
1857 
1858 	switch (samsg->sadb_msg_type) {
1859 	case SADB_ADD:
1860 	case SADB_UPDATE:
1861 	case SADB_X_UPDATEPAIR:
1862 	case SADB_FLUSH:
1863 	case SADB_DUMP:
1864 		/*
1865 		 * I have all of the message already.  I just need to strip
1866 		 * out the keying material and echo the message back.
1867 		 *
1868 		 * NOTE: for SADB_DUMP, the function sadb_dump() did the
1869 		 * work.  When DUMP reaches here, it should only be a base
1870 		 * message.
1871 		 */
1872 	justecho:
1873 		ASSERT(samsg->sadb_msg_type != SADB_DUMP ||
1874 		    samsg->sadb_msg_len == SADB_8TO64(sizeof (sadb_msg_t)));
1875 
1876 		if (ksi->ks_in_extv[SADB_EXT_KEY_AUTH] != NULL ||
1877 		    ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT] != NULL) {
1878 			sadb_strip(samsg);
1879 			/* Assume PF_KEY message is contiguous. */
1880 			ASSERT(mp->b_cont->b_cont == NULL);
1881 			oldend = mp->b_cont->b_wptr;
1882 			mp->b_cont->b_wptr = mp->b_cont->b_rptr +
1883 			    SADB_64TO8(samsg->sadb_msg_len);
1884 			bzero(mp->b_cont->b_wptr, oldend - mp->b_cont->b_wptr);
1885 		}
1886 		break;
1887 	case SADB_GET:
1888 		/*
1889 		 * Do a lot of work here, because of the ipsa I just found.
1890 		 * First construct the new PF_KEY message, then abandon
1891 		 * the old one.
1892 		 */
1893 		mp1 = sadb_sa2msg(ipsa, samsg);
1894 		if (mp1 == NULL) {
1895 			sadb_pfkey_error(pfkey_q, mp, ENOMEM,
1896 			    SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
1897 			return;
1898 		}
1899 		freemsg(mp->b_cont);
1900 		mp->b_cont = mp1;
1901 		break;
1902 	case SADB_DELETE:
1903 	case SADB_X_DELPAIR:
1904 		if (ipsa == NULL)
1905 			goto justecho;
1906 		/*
1907 		 * Because listening KMds may require more info, treat
1908 		 * DELETE like a special case of GET.
1909 		 */
1910 		mp1 = sadb_sa2msg(ipsa, samsg);
1911 		if (mp1 == NULL) {
1912 			sadb_pfkey_error(pfkey_q, mp, ENOMEM,
1913 			    SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
1914 			return;
1915 		}
1916 		newsamsg = (sadb_msg_t *)mp1->b_rptr;
1917 		sadb_strip(newsamsg);
1918 		oldend = mp1->b_wptr;
1919 		mp1->b_wptr = mp1->b_rptr + SADB_64TO8(newsamsg->sadb_msg_len);
1920 		bzero(mp1->b_wptr, oldend - mp1->b_wptr);
1921 		freemsg(mp->b_cont);
1922 		mp->b_cont = mp1;
1923 		break;
1924 	default:
1925 		if (mp != NULL)
1926 			freemsg(mp);
1927 		return;
1928 	}
1929 
1930 	/* ksi is now null and void. */
1931 	kso = (keysock_out_t *)ksi;
1932 	kso->ks_out_type = KEYSOCK_OUT;
1933 	kso->ks_out_len = sizeof (*kso);
1934 	kso->ks_out_serial = ksi->ks_in_serial;
1935 	/* We're ready to send... */
1936 	putnext(pfkey_q, mp);
1937 }
1938 
1939 /*
1940  * Set up a global pfkey_q instance for AH, ESP, or some other consumer.
1941  */
1942 void
1943 sadb_keysock_hello(queue_t **pfkey_qp, queue_t *q, mblk_t *mp,
1944     void (*ager)(void *), void *agerarg, timeout_id_t *top, int satype)
1945 {
1946 	keysock_hello_ack_t *kha;
1947 	queue_t *oldq;
1948 
1949 	ASSERT(OTHERQ(q) != NULL);
1950 
1951 	/*
1952 	 * First, check atomically that I'm the first and only keysock
1953 	 * instance.
1954 	 *
1955 	 * Use OTHERQ(q), because qreply(q, mp) == putnext(OTHERQ(q), mp),
1956 	 * and I want this module to say putnext(*_pfkey_q, mp) for PF_KEY
1957 	 * messages.
1958 	 */
1959 
1960 	oldq = casptr((void **)pfkey_qp, NULL, OTHERQ(q));
1961 	if (oldq != NULL) {
1962 		ASSERT(oldq != q);
1963 		cmn_err(CE_WARN, "Danger!  Multiple keysocks on top of %s.\n",
1964 		    (satype == SADB_SATYPE_ESP)? "ESP" : "AH or other");
1965 		freemsg(mp);
1966 		return;
1967 	}
1968 
1969 	kha = (keysock_hello_ack_t *)mp->b_rptr;
1970 	kha->ks_hello_len = sizeof (keysock_hello_ack_t);
1971 	kha->ks_hello_type = KEYSOCK_HELLO_ACK;
1972 	kha->ks_hello_satype = (uint8_t)satype;
1973 
1974 	/*
1975 	 * If we made it past the casptr, then we have "exclusive" access
1976 	 * to the timeout handle.  Fire it off in 4 seconds, because it
1977 	 * just seems like a good interval.
1978 	 */
1979 	*top = qtimeout(*pfkey_qp, ager, agerarg, drv_usectohz(4000000));
1980 
1981 	putnext(*pfkey_qp, mp);
1982 }
1983 
1984 /*
1985  * Normalize IPv4-mapped IPv6 addresses (and prefixes) as appropriate.
1986  *
1987  * Check addresses themselves for wildcard or multicast.
1988  * Check ire table for local/non-local/broadcast.
1989  */
1990 int
1991 sadb_addrcheck(queue_t *pfkey_q, mblk_t *mp, sadb_ext_t *ext, uint_t serial,
1992     netstack_t *ns)
1993 {
1994 	sadb_address_t *addr = (sadb_address_t *)ext;
1995 	struct sockaddr_in *sin;
1996 	struct sockaddr_in6 *sin6;
1997 	ire_t *ire;
1998 	int diagnostic, type;
1999 	boolean_t normalized = B_FALSE;
2000 
2001 	ASSERT(ext != NULL);
2002 	ASSERT((ext->sadb_ext_type == SADB_EXT_ADDRESS_SRC) ||
2003 	    (ext->sadb_ext_type == SADB_EXT_ADDRESS_DST) ||
2004 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC) ||
2005 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_DST) ||
2006 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_LOC) ||
2007 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_REM));
2008 
2009 	/* Assign both sockaddrs, the compiler will do the right thing. */
2010 	sin = (struct sockaddr_in *)(addr + 1);
2011 	sin6 = (struct sockaddr_in6 *)(addr + 1);
2012 
2013 	if (sin6->sin6_family == AF_INET6) {
2014 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
2015 			/*
2016 			 * Convert to an AF_INET sockaddr.  This means the
2017 			 * return messages will have the extra space, but have
2018 			 * AF_INET sockaddrs instead of AF_INET6.
2019 			 *
2020 			 * Yes, RFC 2367 isn't clear on what to do here w.r.t.
2021 			 * mapped addresses, but since AF_INET6 ::ffff:<v4> is
2022 			 * equal to AF_INET <v4>, it shouldnt be a huge
2023 			 * problem.
2024 			 */
2025 			sin->sin_family = AF_INET;
2026 			IN6_V4MAPPED_TO_INADDR(&sin6->sin6_addr,
2027 			    &sin->sin_addr);
2028 			bzero(&sin->sin_zero, sizeof (sin->sin_zero));
2029 			normalized = B_TRUE;
2030 		}
2031 	} else if (sin->sin_family != AF_INET) {
2032 		switch (ext->sadb_ext_type) {
2033 		case SADB_EXT_ADDRESS_SRC:
2034 			diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC_AF;
2035 			break;
2036 		case SADB_EXT_ADDRESS_DST:
2037 			diagnostic = SADB_X_DIAGNOSTIC_BAD_DST_AF;
2038 			break;
2039 		case SADB_X_EXT_ADDRESS_INNER_SRC:
2040 			diagnostic = SADB_X_DIAGNOSTIC_BAD_PROXY_AF;
2041 			break;
2042 		case SADB_X_EXT_ADDRESS_INNER_DST:
2043 			diagnostic = SADB_X_DIAGNOSTIC_BAD_INNER_DST_AF;
2044 			break;
2045 		case SADB_X_EXT_ADDRESS_NATT_LOC:
2046 			diagnostic = SADB_X_DIAGNOSTIC_BAD_NATT_LOC_AF;
2047 			break;
2048 		case SADB_X_EXT_ADDRESS_NATT_REM:
2049 			diagnostic = SADB_X_DIAGNOSTIC_BAD_NATT_REM_AF;
2050 			break;
2051 			/* There is no default, see above ASSERT. */
2052 		}
2053 bail:
2054 		if (pfkey_q != NULL) {
2055 			sadb_pfkey_error(pfkey_q, mp, EINVAL, diagnostic,
2056 			    serial);
2057 		} else {
2058 			/*
2059 			 * Scribble in sadb_msg that we got passed in.
2060 			 * Overload "mp" to be an sadb_msg pointer.
2061 			 */
2062 			sadb_msg_t *samsg = (sadb_msg_t *)mp;
2063 
2064 			samsg->sadb_msg_errno = EINVAL;
2065 			samsg->sadb_x_msg_diagnostic = diagnostic;
2066 		}
2067 		return (KS_IN_ADDR_UNKNOWN);
2068 	}
2069 
2070 	if (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC ||
2071 	    ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_DST) {
2072 		/*
2073 		 * We need only check for prefix issues.
2074 		 */
2075 
2076 		/* Set diagnostic now, in case we need it later. */
2077 		diagnostic =
2078 		    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC) ?
2079 		    SADB_X_DIAGNOSTIC_PREFIX_INNER_SRC :
2080 		    SADB_X_DIAGNOSTIC_PREFIX_INNER_DST;
2081 
2082 		if (normalized)
2083 			addr->sadb_address_prefixlen -= 96;
2084 
2085 		/*
2086 		 * Verify and mask out inner-addresses based on prefix length.
2087 		 */
2088 		if (sin->sin_family == AF_INET) {
2089 			if (addr->sadb_address_prefixlen > 32)
2090 				goto bail;
2091 			sin->sin_addr.s_addr &=
2092 			    ip_plen_to_mask(addr->sadb_address_prefixlen);
2093 		} else {
2094 			in6_addr_t mask;
2095 
2096 			ASSERT(sin->sin_family == AF_INET6);
2097 			/*
2098 			 * ip_plen_to_mask_v6() returns NULL if the value in
2099 			 * question is out of range.
2100 			 */
2101 			if (ip_plen_to_mask_v6(addr->sadb_address_prefixlen,
2102 			    &mask) == NULL)
2103 				goto bail;
2104 			sin6->sin6_addr.s6_addr32[0] &= mask.s6_addr32[0];
2105 			sin6->sin6_addr.s6_addr32[1] &= mask.s6_addr32[1];
2106 			sin6->sin6_addr.s6_addr32[2] &= mask.s6_addr32[2];
2107 			sin6->sin6_addr.s6_addr32[3] &= mask.s6_addr32[3];
2108 		}
2109 
2110 		/* We don't care in these cases. */
2111 		return (KS_IN_ADDR_DONTCARE);
2112 	}
2113 
2114 	if (sin->sin_family == AF_INET6) {
2115 		/* Check the easy ones now. */
2116 		if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
2117 			return (KS_IN_ADDR_MBCAST);
2118 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
2119 			return (KS_IN_ADDR_UNSPEC);
2120 		/*
2121 		 * At this point, we're a unicast IPv6 address.
2122 		 *
2123 		 * A ctable lookup for local is sufficient here.  If we're
2124 		 * local, return KS_IN_ADDR_ME, otherwise KS_IN_ADDR_NOTME.
2125 		 *
2126 		 * XXX Zones alert -> me/notme decision needs to be tempered
2127 		 * by what zone we're in when we go to zone-aware IPsec.
2128 		 */
2129 		ire = ire_ctable_lookup_v6(&sin6->sin6_addr, NULL,
2130 		    IRE_LOCAL, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE,
2131 		    ns->netstack_ip);
2132 		if (ire != NULL) {
2133 			/* Hey hey, it's local. */
2134 			IRE_REFRELE(ire);
2135 			return (KS_IN_ADDR_ME);
2136 		}
2137 	} else {
2138 		ASSERT(sin->sin_family == AF_INET);
2139 		if (sin->sin_addr.s_addr == INADDR_ANY)
2140 			return (KS_IN_ADDR_UNSPEC);
2141 		if (CLASSD(sin->sin_addr.s_addr))
2142 			return (KS_IN_ADDR_MBCAST);
2143 		/*
2144 		 * At this point we're a unicast or broadcast IPv4 address.
2145 		 *
2146 		 * Lookup on the ctable for IRE_BROADCAST or IRE_LOCAL.
2147 		 * A NULL return value is NOTME, otherwise, look at the
2148 		 * returned ire for broadcast or not and return accordingly.
2149 		 *
2150 		 * XXX Zones alert -> me/notme decision needs to be tempered
2151 		 * by what zone we're in when we go to zone-aware IPsec.
2152 		 */
2153 		ire = ire_ctable_lookup(sin->sin_addr.s_addr, 0,
2154 		    IRE_LOCAL | IRE_BROADCAST, NULL, ALL_ZONES, NULL,
2155 		    MATCH_IRE_TYPE, ns->netstack_ip);
2156 		if (ire != NULL) {
2157 			/* Check for local or broadcast */
2158 			type = ire->ire_type;
2159 			IRE_REFRELE(ire);
2160 			ASSERT(type == IRE_LOCAL || type == IRE_BROADCAST);
2161 			return ((type == IRE_LOCAL) ? KS_IN_ADDR_ME :
2162 			    KS_IN_ADDR_MBCAST);
2163 		}
2164 	}
2165 
2166 	return (KS_IN_ADDR_NOTME);
2167 }
2168 
2169 /*
2170  * Address normalizations and reality checks for inbound PF_KEY messages.
2171  *
2172  * For the case of src == unspecified AF_INET6, and dst == AF_INET, convert
2173  * the source to AF_INET.  Do the same for the inner sources.
2174  */
2175 boolean_t
2176 sadb_addrfix(keysock_in_t *ksi, queue_t *pfkey_q, mblk_t *mp, netstack_t *ns)
2177 {
2178 	struct sockaddr_in *src, *isrc;
2179 	struct sockaddr_in6 *dst, *idst;
2180 	sadb_address_t *srcext, *dstext;
2181 	uint16_t sport;
2182 	sadb_ext_t **extv = ksi->ks_in_extv;
2183 	int rc;
2184 
2185 	if (extv[SADB_EXT_ADDRESS_SRC] != NULL) {
2186 		rc = sadb_addrcheck(pfkey_q, mp, extv[SADB_EXT_ADDRESS_SRC],
2187 		    ksi->ks_in_serial, ns);
2188 		if (rc == KS_IN_ADDR_UNKNOWN)
2189 			return (B_FALSE);
2190 		if (rc == KS_IN_ADDR_MBCAST) {
2191 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2192 			    SADB_X_DIAGNOSTIC_BAD_SRC, ksi->ks_in_serial);
2193 			return (B_FALSE);
2194 		}
2195 		ksi->ks_in_srctype = rc;
2196 	}
2197 
2198 	if (extv[SADB_EXT_ADDRESS_DST] != NULL) {
2199 		rc = sadb_addrcheck(pfkey_q, mp, extv[SADB_EXT_ADDRESS_DST],
2200 		    ksi->ks_in_serial, ns);
2201 		if (rc == KS_IN_ADDR_UNKNOWN)
2202 			return (B_FALSE);
2203 		if (rc == KS_IN_ADDR_UNSPEC) {
2204 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2205 			    SADB_X_DIAGNOSTIC_BAD_DST, ksi->ks_in_serial);
2206 			return (B_FALSE);
2207 		}
2208 		ksi->ks_in_dsttype = rc;
2209 	}
2210 
2211 	/*
2212 	 * NAT-Traversal addrs are simple enough to not require all of
2213 	 * the checks in sadb_addrcheck().  Just normalize or reject if not
2214 	 * AF_INET.
2215 	 */
2216 	if (extv[SADB_X_EXT_ADDRESS_NATT_LOC] != NULL) {
2217 		rc = sadb_addrcheck(pfkey_q, mp,
2218 		    extv[SADB_X_EXT_ADDRESS_NATT_LOC], ksi->ks_in_serial, ns);
2219 
2220 		/*
2221 		 * Local NAT-T addresses never use an IRE_LOCAL, so it should
2222 		 * always be NOTME, or UNSPEC (to handle both tunnel mode
2223 		 * AND local-port flexibility).
2224 		 */
2225 		if (rc != KS_IN_ADDR_NOTME && rc != KS_IN_ADDR_UNSPEC) {
2226 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2227 			    SADB_X_DIAGNOSTIC_MALFORMED_NATT_LOC,
2228 			    ksi->ks_in_serial);
2229 			return (B_FALSE);
2230 		}
2231 		src = (struct sockaddr_in *)
2232 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_NATT_LOC]) + 1);
2233 		if (src->sin_family != AF_INET) {
2234 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2235 			    SADB_X_DIAGNOSTIC_BAD_NATT_LOC_AF,
2236 			    ksi->ks_in_serial);
2237 			return (B_FALSE);
2238 		}
2239 	}
2240 
2241 	if (extv[SADB_X_EXT_ADDRESS_NATT_REM] != NULL) {
2242 		rc = sadb_addrcheck(pfkey_q, mp,
2243 		    extv[SADB_X_EXT_ADDRESS_NATT_REM], ksi->ks_in_serial, ns);
2244 
2245 		/*
2246 		 * Remote NAT-T addresses never use an IRE_LOCAL, so it should
2247 		 * always be NOTME, or UNSPEC if it's a tunnel-mode SA.
2248 		 */
2249 		if (rc != KS_IN_ADDR_NOTME &&
2250 		    !(extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL &&
2251 		    rc == KS_IN_ADDR_UNSPEC)) {
2252 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2253 			    SADB_X_DIAGNOSTIC_MALFORMED_NATT_REM,
2254 			    ksi->ks_in_serial);
2255 			return (B_FALSE);
2256 		}
2257 		src = (struct sockaddr_in *)
2258 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_NATT_REM]) + 1);
2259 		if (src->sin_family != AF_INET) {
2260 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2261 			    SADB_X_DIAGNOSTIC_BAD_NATT_REM_AF,
2262 			    ksi->ks_in_serial);
2263 			return (B_FALSE);
2264 		}
2265 	}
2266 
2267 	if (extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL) {
2268 		if (extv[SADB_X_EXT_ADDRESS_INNER_DST] == NULL) {
2269 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2270 			    SADB_X_DIAGNOSTIC_MISSING_INNER_DST,
2271 			    ksi->ks_in_serial);
2272 			return (B_FALSE);
2273 		}
2274 
2275 		if (sadb_addrcheck(pfkey_q, mp,
2276 		    extv[SADB_X_EXT_ADDRESS_INNER_DST], ksi->ks_in_serial, ns)
2277 		    == KS_IN_ADDR_UNKNOWN ||
2278 		    sadb_addrcheck(pfkey_q, mp,
2279 		    extv[SADB_X_EXT_ADDRESS_INNER_SRC], ksi->ks_in_serial, ns)
2280 		    == KS_IN_ADDR_UNKNOWN)
2281 			return (B_FALSE);
2282 
2283 		isrc = (struct sockaddr_in *)
2284 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_SRC]) +
2285 		    1);
2286 		idst = (struct sockaddr_in6 *)
2287 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_DST]) +
2288 		    1);
2289 		if (isrc->sin_family != idst->sin6_family) {
2290 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2291 			    SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH,
2292 			    ksi->ks_in_serial);
2293 			return (B_FALSE);
2294 		}
2295 	} else if (extv[SADB_X_EXT_ADDRESS_INNER_DST] != NULL) {
2296 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2297 			    SADB_X_DIAGNOSTIC_MISSING_INNER_SRC,
2298 			    ksi->ks_in_serial);
2299 			return (B_FALSE);
2300 	} else {
2301 		isrc = NULL;	/* For inner/outer port check below. */
2302 	}
2303 
2304 	dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST];
2305 	srcext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_SRC];
2306 
2307 	if (dstext == NULL || srcext == NULL)
2308 		return (B_TRUE);
2309 
2310 	dst = (struct sockaddr_in6 *)(dstext + 1);
2311 	src = (struct sockaddr_in *)(srcext + 1);
2312 
2313 	if (isrc != NULL &&
2314 	    (isrc->sin_port != 0 || idst->sin6_port != 0) &&
2315 	    (src->sin_port != 0 || dst->sin6_port != 0)) {
2316 		/* Can't set inner and outer ports in one SA. */
2317 		sadb_pfkey_error(pfkey_q, mp, EINVAL,
2318 		    SADB_X_DIAGNOSTIC_DUAL_PORT_SETS,
2319 		    ksi->ks_in_serial);
2320 		return (B_FALSE);
2321 	}
2322 
2323 	if (dst->sin6_family == src->sin_family)
2324 		return (B_TRUE);
2325 
2326 	if (srcext->sadb_address_proto != dstext->sadb_address_proto) {
2327 		if (srcext->sadb_address_proto == 0) {
2328 			srcext->sadb_address_proto = dstext->sadb_address_proto;
2329 		} else if (dstext->sadb_address_proto == 0) {
2330 			dstext->sadb_address_proto = srcext->sadb_address_proto;
2331 		} else {
2332 			/* Inequal protocols, neither were 0.  Report error. */
2333 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2334 			    SADB_X_DIAGNOSTIC_PROTO_MISMATCH,
2335 			    ksi->ks_in_serial);
2336 			return (B_FALSE);
2337 		}
2338 	}
2339 
2340 	/*
2341 	 * With the exception of an unspec IPv6 source and an IPv4
2342 	 * destination, address families MUST me matched.
2343 	 */
2344 	if (src->sin_family == AF_INET ||
2345 	    ksi->ks_in_srctype != KS_IN_ADDR_UNSPEC) {
2346 		sadb_pfkey_error(pfkey_q, mp, EINVAL,
2347 		    SADB_X_DIAGNOSTIC_AF_MISMATCH, ksi->ks_in_serial);
2348 		return (B_FALSE);
2349 	}
2350 
2351 	/*
2352 	 * Convert "src" to AF_INET INADDR_ANY.  We rely on sin_port being
2353 	 * in the same place for sockaddr_in and sockaddr_in6.
2354 	 */
2355 	sport = src->sin_port;
2356 	bzero(src, sizeof (*src));
2357 	src->sin_family = AF_INET;
2358 	src->sin_port = sport;
2359 
2360 	return (B_TRUE);
2361 }
2362 
2363 /*
2364  * Set the results in "addrtype", given an IRE as requested by
2365  * sadb_addrcheck().
2366  */
2367 int
2368 sadb_addrset(ire_t *ire)
2369 {
2370 	if ((ire->ire_type & IRE_BROADCAST) ||
2371 	    (ire->ire_ipversion == IPV4_VERSION && CLASSD(ire->ire_addr)) ||
2372 	    (ire->ire_ipversion == IPV6_VERSION &&
2373 	    IN6_IS_ADDR_MULTICAST(&(ire->ire_addr_v6))))
2374 		return (KS_IN_ADDR_MBCAST);
2375 	if (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK))
2376 		return (KS_IN_ADDR_ME);
2377 	return (KS_IN_ADDR_NOTME);
2378 }
2379 
2380 
2381 /*
2382  * Walker callback function to delete sa's based on src/dst address.
2383  * Assumes that we're called with *head locked, no other locks held;
2384  * Conveniently, and not coincidentally, this is both what sadb_walker
2385  * gives us and also what sadb_unlinkassoc expects.
2386  */
2387 
2388 struct sadb_purge_state
2389 {
2390 	uint32_t *src;
2391 	uint32_t *dst;
2392 	sa_family_t af;
2393 	boolean_t inbnd;
2394 	char *sidstr;
2395 	char *didstr;
2396 	uint16_t sidtype;
2397 	uint16_t didtype;
2398 	uint32_t kmproto;
2399 	mblk_t *mq;
2400 };
2401 
2402 static void
2403 sadb_purge_cb(isaf_t *head, ipsa_t *entry, void *cookie)
2404 {
2405 	struct sadb_purge_state *ps = (struct sadb_purge_state *)cookie;
2406 
2407 	ASSERT(MUTEX_HELD(&head->isaf_lock));
2408 
2409 	mutex_enter(&entry->ipsa_lock);
2410 
2411 	if ((entry->ipsa_state == IPSA_STATE_LARVAL) ||
2412 	    (ps->src != NULL &&
2413 	    !IPSA_ARE_ADDR_EQUAL(entry->ipsa_srcaddr, ps->src, ps->af)) ||
2414 	    (ps->dst != NULL &&
2415 	    !IPSA_ARE_ADDR_EQUAL(entry->ipsa_dstaddr, ps->dst, ps->af)) ||
2416 	    (ps->didstr != NULL && (entry->ipsa_dst_cid != NULL) &&
2417 	    !(ps->didtype == entry->ipsa_dst_cid->ipsid_type &&
2418 	    strcmp(ps->didstr, entry->ipsa_dst_cid->ipsid_cid) == 0)) ||
2419 	    (ps->sidstr != NULL && (entry->ipsa_src_cid != NULL) &&
2420 	    !(ps->sidtype == entry->ipsa_src_cid->ipsid_type &&
2421 	    strcmp(ps->sidstr, entry->ipsa_src_cid->ipsid_cid) == 0)) ||
2422 	    (ps->kmproto <= SADB_X_KMP_MAX && ps->kmproto != entry->ipsa_kmp)) {
2423 		mutex_exit(&entry->ipsa_lock);
2424 		return;
2425 	}
2426 
2427 	entry->ipsa_state = IPSA_STATE_DEAD;
2428 	(void) sadb_torch_assoc(head, entry, ps->inbnd, &ps->mq);
2429 }
2430 
2431 /*
2432  * Common code to purge an SA with a matching src or dst address.
2433  * Don't kill larval SA's in such a purge.
2434  */
2435 int
2436 sadb_purge_sa(mblk_t *mp, keysock_in_t *ksi, sadb_t *sp, queue_t *pfkey_q,
2437     queue_t *ip_q)
2438 {
2439 	sadb_address_t *dstext =
2440 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2441 	sadb_address_t *srcext =
2442 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2443 	sadb_ident_t *dstid =
2444 	    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_DST];
2445 	sadb_ident_t *srcid =
2446 	    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC];
2447 	sadb_x_kmc_t *kmc =
2448 	    (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
2449 	struct sockaddr_in *src, *dst;
2450 	struct sockaddr_in6 *src6, *dst6;
2451 	struct sadb_purge_state ps;
2452 
2453 	/*
2454 	 * Don't worry about IPv6 v4-mapped addresses, sadb_addrcheck()
2455 	 * takes care of them.
2456 	 */
2457 
2458 	/* enforced by caller */
2459 	ASSERT((dstext != NULL) || (srcext != NULL));
2460 
2461 	ps.src = NULL;
2462 	ps.dst = NULL;
2463 #ifdef DEBUG
2464 	ps.af = (sa_family_t)-1;
2465 #endif
2466 	ps.mq = NULL;
2467 	ps.sidstr = NULL;
2468 	ps.didstr = NULL;
2469 	ps.kmproto = SADB_X_KMP_MAX + 1;
2470 
2471 	if (dstext != NULL) {
2472 		dst = (struct sockaddr_in *)(dstext + 1);
2473 		ps.af = dst->sin_family;
2474 		if (dst->sin_family == AF_INET6) {
2475 			dst6 = (struct sockaddr_in6 *)dst;
2476 			ps.dst = (uint32_t *)&dst6->sin6_addr;
2477 		} else {
2478 			ps.dst = (uint32_t *)&dst->sin_addr;
2479 		}
2480 	}
2481 
2482 	if (srcext != NULL) {
2483 		src = (struct sockaddr_in *)(srcext + 1);
2484 		ps.af = src->sin_family;
2485 		if (src->sin_family == AF_INET6) {
2486 			src6 = (struct sockaddr_in6 *)(srcext + 1);
2487 			ps.src = (uint32_t *)&src6->sin6_addr;
2488 		} else {
2489 			ps.src = (uint32_t *)&src->sin_addr;
2490 		}
2491 		ASSERT(dstext == NULL || src->sin_family == dst->sin_family);
2492 	}
2493 
2494 	ASSERT(ps.af != (sa_family_t)-1);
2495 
2496 	if (dstid != NULL) {
2497 		/*
2498 		 * NOTE:  May need to copy string in the future
2499 		 * if the inbound keysock message disappears for some strange
2500 		 * reason.
2501 		 */
2502 		ps.didstr = (char *)(dstid + 1);
2503 		ps.didtype = dstid->sadb_ident_type;
2504 	}
2505 
2506 	if (srcid != NULL) {
2507 		/*
2508 		 * NOTE:  May need to copy string in the future
2509 		 * if the inbound keysock message disappears for some strange
2510 		 * reason.
2511 		 */
2512 		ps.sidstr = (char *)(srcid + 1);
2513 		ps.sidtype = srcid->sadb_ident_type;
2514 	}
2515 
2516 	if (kmc != NULL)
2517 		ps.kmproto = kmc->sadb_x_kmc_proto;
2518 
2519 	/*
2520 	 * This is simple, crude, and effective.
2521 	 * Unimplemented optimizations (TBD):
2522 	 * - we can limit how many places we search based on where we
2523 	 * think the SA is filed.
2524 	 * - if we get a dst address, we can hash based on dst addr to find
2525 	 * the correct bucket in the outbound table.
2526 	 */
2527 	ps.inbnd = B_TRUE;
2528 	sadb_walker(sp->sdb_if, sp->sdb_hashsize, sadb_purge_cb, &ps);
2529 	ps.inbnd = B_FALSE;
2530 	sadb_walker(sp->sdb_of, sp->sdb_hashsize, sadb_purge_cb, &ps);
2531 
2532 	if (ps.mq != NULL)
2533 		sadb_drain_torchq(ip_q, ps.mq);
2534 
2535 	ASSERT(mp->b_cont != NULL);
2536 	sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi,
2537 	    NULL);
2538 	return (0);
2539 }
2540 
2541 /*
2542  * Common code to delete/get an SA.
2543  */
2544 int
2545 sadb_delget_sa(mblk_t *mp, keysock_in_t *ksi, sadbp_t *spp,
2546     int *diagnostic, queue_t *pfkey_q, uint8_t sadb_msg_type)
2547 {
2548 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
2549 	sadb_address_t *srcext =
2550 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2551 	sadb_address_t *dstext =
2552 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2553 	ipsa_t *echo_target = NULL;
2554 	ipsap_t *ipsapp;
2555 	mblk_t *torchq = NULL;
2556 	uint_t	error = 0;
2557 
2558 	if (dstext == NULL) {
2559 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
2560 		return (EINVAL);
2561 	}
2562 	if (assoc == NULL) {
2563 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
2564 		return (EINVAL);
2565 	}
2566 
2567 	ipsapp = get_ipsa_pair(assoc, srcext, dstext, spp);
2568 	if (ipsapp == NULL) {
2569 		*diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND;
2570 		return (ESRCH);
2571 	}
2572 
2573 	echo_target = ipsapp->ipsap_sa_ptr;
2574 	if (echo_target == NULL)
2575 		echo_target = ipsapp->ipsap_psa_ptr;
2576 
2577 	if (sadb_msg_type == SADB_DELETE || sadb_msg_type == SADB_X_DELPAIR) {
2578 		/*
2579 		 * Bucket locks will be required if SA is actually unlinked.
2580 		 * get_ipsa_pair() returns valid hash bucket pointers even
2581 		 * if it can't find a pair SA pointer.
2582 		 */
2583 		mutex_enter(&ipsapp->ipsap_bucket->isaf_lock);
2584 		mutex_enter(&ipsapp->ipsap_pbucket->isaf_lock);
2585 
2586 		if (ipsapp->ipsap_sa_ptr != NULL) {
2587 			mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2588 			ipsapp->ipsap_sa_ptr->ipsa_state = IPSA_STATE_DEAD;
2589 			(void) sadb_torch_assoc(ipsapp->ipsap_bucket,
2590 			    ipsapp->ipsap_sa_ptr, B_FALSE, &torchq);
2591 			/*
2592 			 * sadb_torch_assoc() releases the ipsa_lock
2593 			 * and calls sadb_unlinkassoc() which does a
2594 			 * IPSA_REFRELE.
2595 			 */
2596 		}
2597 		if (ipsapp->ipsap_psa_ptr != NULL) {
2598 			mutex_enter(&ipsapp->ipsap_psa_ptr->ipsa_lock);
2599 			if (sadb_msg_type == SADB_X_DELPAIR) {
2600 				ipsapp->ipsap_psa_ptr->ipsa_state =
2601 				    IPSA_STATE_DEAD;
2602 				(void) sadb_torch_assoc(ipsapp->ipsap_pbucket,
2603 				    ipsapp->ipsap_psa_ptr, B_FALSE, &torchq);
2604 			} else {
2605 				/*
2606 				 * Only half of the "pair" has been deleted.
2607 				 * Update the remaining SA and remove references
2608 				 * to its pair SA, which is now gone.
2609 				 */
2610 				ipsapp->ipsap_psa_ptr->ipsa_otherspi = 0;
2611 				ipsapp->ipsap_psa_ptr->ipsa_flags &=
2612 				    ~IPSA_F_PAIRED;
2613 				mutex_exit(&ipsapp->ipsap_psa_ptr->ipsa_lock);
2614 			}
2615 		} else if (sadb_msg_type == SADB_X_DELPAIR) {
2616 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
2617 			error = ESRCH;
2618 		}
2619 		mutex_exit(&ipsapp->ipsap_bucket->isaf_lock);
2620 		mutex_exit(&ipsapp->ipsap_pbucket->isaf_lock);
2621 	}
2622 
2623 	if (torchq != NULL)
2624 		sadb_drain_torchq(spp->s_ip_q, torchq);
2625 
2626 	ASSERT(mp->b_cont != NULL);
2627 
2628 	if (error == 0)
2629 		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)
2630 		    mp->b_cont->b_rptr, ksi, echo_target);
2631 
2632 	destroy_ipsa_pair(ipsapp);
2633 
2634 	return (error);
2635 }
2636 
2637 /*
2638  * This function takes a sadb_sa_t and finds the ipsa_t structure
2639  * and the isaf_t (hash bucket) that its stored under. If the security
2640  * association has a peer, the ipsa_t structure and bucket for that security
2641  * association are also searched for. The "pair" of ipsa_t's and isaf_t's
2642  * are returned as a ipsap_t.
2643  *
2644  * Note that a "pair" is defined as one (but not both) of the following:
2645  *
2646  * A security association which has a soft reference to another security
2647  * association via its SPI.
2648  *
2649  * A security association that is not obviously "inbound" or "outbound" so
2650  * it appears in both hash tables, the "peer" being the same security
2651  * association in the other hash table.
2652  *
2653  * This function will return NULL if the ipsa_t can't be found in the
2654  * inbound or outbound  hash tables (not found). If only one ipsa_t is
2655  * found, the pair ipsa_t will be NULL. Both isaf_t values are valid
2656  * provided at least one ipsa_t is found.
2657  */
2658 ipsap_t *
2659 get_ipsa_pair(sadb_sa_t *assoc, sadb_address_t *srcext, sadb_address_t *dstext,
2660     sadbp_t *spp)
2661 {
2662 	struct sockaddr_in *src, *dst;
2663 	struct sockaddr_in6 *src6, *dst6;
2664 	sadb_t *sp;
2665 	uint32_t *srcaddr, *dstaddr;
2666 	isaf_t *outbound_bucket, *inbound_bucket;
2667 	boolean_t in_inbound_table = B_FALSE;
2668 	ipsap_t *ipsapp;
2669 	sa_family_t af;
2670 
2671 	uint32_t pair_srcaddr[IPSA_MAX_ADDRLEN];
2672 	uint32_t pair_dstaddr[IPSA_MAX_ADDRLEN];
2673 	uint32_t pair_spi;
2674 
2675 	ipsapp = kmem_zalloc(sizeof (*ipsapp), KM_NOSLEEP);
2676 	if (ipsapp == NULL)
2677 		return (NULL);
2678 
2679 	/*
2680 	 * Don't worry about IPv6 v4-mapped addresses, sadb_addrcheck()
2681 	 * takes care of them.
2682 	 */
2683 
2684 	dst = (struct sockaddr_in *)(dstext + 1);
2685 	af = dst->sin_family;
2686 	if (af == AF_INET6) {
2687 		sp = &spp->s_v6;
2688 		dst6 = (struct sockaddr_in6 *)dst;
2689 		dstaddr = (uint32_t *)&dst6->sin6_addr;
2690 		if (srcext != NULL) {
2691 			src6 = (struct sockaddr_in6 *)(srcext + 1);
2692 			srcaddr = (uint32_t *)&src6->sin6_addr;
2693 			ASSERT(src6->sin6_family == af);
2694 			ASSERT(src6->sin6_family == AF_INET6);
2695 		} else {
2696 			srcaddr = ALL_ZEROES_PTR;
2697 		}
2698 		outbound_bucket = OUTBOUND_BUCKET_V6(sp,
2699 		    *(uint32_t *)dstaddr);
2700 	} else {
2701 		sp = &spp->s_v4;
2702 		dstaddr = (uint32_t *)&dst->sin_addr;
2703 		if (srcext != NULL) {
2704 			src = (struct sockaddr_in *)(srcext + 1);
2705 			srcaddr = (uint32_t *)&src->sin_addr;
2706 			ASSERT(src->sin_family == af);
2707 			ASSERT(src->sin_family == AF_INET);
2708 		} else {
2709 			srcaddr = ALL_ZEROES_PTR;
2710 		}
2711 		outbound_bucket = OUTBOUND_BUCKET_V4(sp,
2712 		    *(uint32_t *)dstaddr);
2713 	}
2714 
2715 	inbound_bucket = INBOUND_BUCKET(sp, assoc->sadb_sa_spi);
2716 
2717 	/* Lock down both buckets. */
2718 	mutex_enter(&outbound_bucket->isaf_lock);
2719 	mutex_enter(&inbound_bucket->isaf_lock);
2720 
2721 	if (assoc->sadb_sa_flags & IPSA_F_INBOUND) {
2722 		ipsapp->ipsap_sa_ptr = ipsec_getassocbyspi(inbound_bucket,
2723 		    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2724 		if (ipsapp->ipsap_sa_ptr != NULL) {
2725 			ipsapp->ipsap_bucket = inbound_bucket;
2726 			ipsapp->ipsap_pbucket = outbound_bucket;
2727 			in_inbound_table = B_TRUE;
2728 		} else {
2729 			ipsapp->ipsap_sa_ptr =
2730 			    ipsec_getassocbyspi(outbound_bucket,
2731 			    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2732 			ipsapp->ipsap_bucket = outbound_bucket;
2733 			ipsapp->ipsap_pbucket = inbound_bucket;
2734 		}
2735 	} else {
2736 		/* IPSA_F_OUTBOUND is set *or* no directions flags set. */
2737 		ipsapp->ipsap_sa_ptr =
2738 		    ipsec_getassocbyspi(outbound_bucket,
2739 		    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2740 		if (ipsapp->ipsap_sa_ptr != NULL) {
2741 			ipsapp->ipsap_bucket = outbound_bucket;
2742 			ipsapp->ipsap_pbucket = inbound_bucket;
2743 		} else {
2744 			ipsapp->ipsap_sa_ptr =
2745 			    ipsec_getassocbyspi(inbound_bucket,
2746 			    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2747 			ipsapp->ipsap_bucket = inbound_bucket;
2748 			ipsapp->ipsap_pbucket = outbound_bucket;
2749 			if (ipsapp->ipsap_sa_ptr != NULL)
2750 				in_inbound_table = B_TRUE;
2751 		}
2752 	}
2753 
2754 	if (ipsapp->ipsap_sa_ptr == NULL) {
2755 		mutex_exit(&outbound_bucket->isaf_lock);
2756 		mutex_exit(&inbound_bucket->isaf_lock);
2757 		kmem_free(ipsapp, sizeof (*ipsapp));
2758 		return (NULL);
2759 	}
2760 
2761 	if ((ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) &&
2762 	    in_inbound_table) {
2763 		mutex_exit(&outbound_bucket->isaf_lock);
2764 		mutex_exit(&inbound_bucket->isaf_lock);
2765 		return (ipsapp);
2766 	}
2767 
2768 	mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2769 	if (ipsapp->ipsap_sa_ptr->ipsa_haspeer) {
2770 		/*
2771 		 * haspeer implies no sa_pairing, look for same spi
2772 		 * in other hashtable.
2773 		 */
2774 		ipsapp->ipsap_psa_ptr =
2775 		    ipsec_getassocbyspi(ipsapp->ipsap_pbucket,
2776 		    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2777 		mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2778 		mutex_exit(&outbound_bucket->isaf_lock);
2779 		mutex_exit(&inbound_bucket->isaf_lock);
2780 		return (ipsapp);
2781 	}
2782 	pair_spi = ipsapp->ipsap_sa_ptr->ipsa_otherspi;
2783 	IPSA_COPY_ADDR(&pair_srcaddr,
2784 	    ipsapp->ipsap_sa_ptr->ipsa_srcaddr, af);
2785 	IPSA_COPY_ADDR(&pair_dstaddr,
2786 	    ipsapp->ipsap_sa_ptr->ipsa_dstaddr, af);
2787 	mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2788 	mutex_exit(&outbound_bucket->isaf_lock);
2789 	mutex_exit(&inbound_bucket->isaf_lock);
2790 
2791 	if (pair_spi == 0) {
2792 		ASSERT(ipsapp->ipsap_bucket != NULL);
2793 		ASSERT(ipsapp->ipsap_pbucket != NULL);
2794 		return (ipsapp);
2795 	}
2796 
2797 	/* found sa in outbound sadb, peer should be inbound */
2798 
2799 	if (in_inbound_table) {
2800 		/* Found SA in inbound table, pair will be in outbound. */
2801 		if (af == AF_INET6) {
2802 			ipsapp->ipsap_pbucket = OUTBOUND_BUCKET_V6(sp,
2803 			    *(uint32_t *)pair_srcaddr);
2804 		} else {
2805 			ipsapp->ipsap_pbucket = OUTBOUND_BUCKET_V4(sp,
2806 			    *(uint32_t *)pair_srcaddr);
2807 		}
2808 	} else {
2809 		ipsapp->ipsap_pbucket = INBOUND_BUCKET(sp, pair_spi);
2810 	}
2811 	mutex_enter(&ipsapp->ipsap_pbucket->isaf_lock);
2812 	ipsapp->ipsap_psa_ptr = ipsec_getassocbyspi(ipsapp->ipsap_pbucket,
2813 	    pair_spi, pair_dstaddr, pair_srcaddr, af);
2814 	mutex_exit(&ipsapp->ipsap_pbucket->isaf_lock);
2815 
2816 	ASSERT(ipsapp->ipsap_bucket != NULL);
2817 	ASSERT(ipsapp->ipsap_pbucket != NULL);
2818 	return (ipsapp);
2819 }
2820 
2821 /*
2822  * Initialize the mechanism parameters associated with an SA.
2823  * These parameters can be shared by multiple packets, which saves
2824  * us from the overhead of consulting the algorithm table for
2825  * each packet.
2826  */
2827 static void
2828 sadb_init_alginfo(ipsa_t *sa)
2829 {
2830 	ipsec_alginfo_t *alg;
2831 	ipsec_stack_t	*ipss = sa->ipsa_netstack->netstack_ipsec;
2832 
2833 	mutex_enter(&ipss->ipsec_alg_lock);
2834 
2835 	if (sa->ipsa_encrkey != NULL) {
2836 		alg = ipss->ipsec_alglists[IPSEC_ALG_ENCR][sa->ipsa_encr_alg];
2837 		if (alg != NULL && ALG_VALID(alg)) {
2838 			sa->ipsa_emech.cm_type = alg->alg_mech_type;
2839 			sa->ipsa_emech.cm_param = NULL;
2840 			sa->ipsa_emech.cm_param_len = 0;
2841 			sa->ipsa_iv_len = alg->alg_datalen;
2842 		} else
2843 			sa->ipsa_emech.cm_type = CRYPTO_MECHANISM_INVALID;
2844 	}
2845 
2846 	if (sa->ipsa_authkey != NULL) {
2847 		alg = ipss->ipsec_alglists[IPSEC_ALG_AUTH][sa->ipsa_auth_alg];
2848 		if (alg != NULL && ALG_VALID(alg)) {
2849 			sa->ipsa_amech.cm_type = alg->alg_mech_type;
2850 			sa->ipsa_amech.cm_param = (char *)&sa->ipsa_mac_len;
2851 			sa->ipsa_amech.cm_param_len = sizeof (size_t);
2852 			sa->ipsa_mac_len = (size_t)alg->alg_datalen;
2853 		} else
2854 			sa->ipsa_amech.cm_type = CRYPTO_MECHANISM_INVALID;
2855 	}
2856 
2857 	mutex_exit(&ipss->ipsec_alg_lock);
2858 }
2859 
2860 /*
2861  * Perform NAT-traversal cached checksum offset calculations here.
2862  */
2863 static void
2864 sadb_nat_calculations(ipsa_t *newbie, sadb_address_t *natt_loc_ext,
2865     sadb_address_t *natt_rem_ext, uint32_t *src_addr_ptr,
2866     uint32_t *dst_addr_ptr)
2867 {
2868 	struct sockaddr_in *natt_loc, *natt_rem;
2869 	uint32_t *natt_loc_ptr = NULL, *natt_rem_ptr = NULL;
2870 	uint32_t running_sum = 0;
2871 
2872 #define	DOWN_SUM(x) (x) = ((x) & 0xFFFF) +	 ((x) >> 16)
2873 
2874 	if (natt_rem_ext != NULL) {
2875 		uint32_t l_src;
2876 		uint32_t l_rem;
2877 
2878 		natt_rem = (struct sockaddr_in *)(natt_rem_ext + 1);
2879 
2880 		/* Ensured by sadb_addrfix(). */
2881 		ASSERT(natt_rem->sin_family == AF_INET);
2882 
2883 		natt_rem_ptr = (uint32_t *)(&natt_rem->sin_addr);
2884 		newbie->ipsa_remote_nat_port = natt_rem->sin_port;
2885 		l_src = *src_addr_ptr;
2886 		l_rem = *natt_rem_ptr;
2887 
2888 		/* Instead of IPSA_COPY_ADDR(), just copy first 32 bits. */
2889 		newbie->ipsa_natt_addr_rem = *natt_rem_ptr;
2890 
2891 		l_src = ntohl(l_src);
2892 		DOWN_SUM(l_src);
2893 		DOWN_SUM(l_src);
2894 		l_rem = ntohl(l_rem);
2895 		DOWN_SUM(l_rem);
2896 		DOWN_SUM(l_rem);
2897 
2898 		/*
2899 		 * We're 1's complement for checksums, so check for wraparound
2900 		 * here.
2901 		 */
2902 		if (l_rem > l_src)
2903 			l_src--;
2904 
2905 		running_sum += l_src - l_rem;
2906 
2907 		DOWN_SUM(running_sum);
2908 		DOWN_SUM(running_sum);
2909 	}
2910 
2911 	if (natt_loc_ext != NULL) {
2912 		natt_loc = (struct sockaddr_in *)(natt_loc_ext + 1);
2913 
2914 		/* Ensured by sadb_addrfix(). */
2915 		ASSERT(natt_loc->sin_family == AF_INET);
2916 
2917 		natt_loc_ptr = (uint32_t *)(&natt_loc->sin_addr);
2918 		newbie->ipsa_local_nat_port = natt_loc->sin_port;
2919 
2920 		/* Instead of IPSA_COPY_ADDR(), just copy first 32 bits. */
2921 		newbie->ipsa_natt_addr_loc = *natt_loc_ptr;
2922 
2923 		/*
2924 		 * NAT-T port agility means we may have natt_loc_ext, but
2925 		 * only for a local-port change.
2926 		 */
2927 		if (natt_loc->sin_addr.s_addr != INADDR_ANY) {
2928 			uint32_t l_dst = ntohl(*dst_addr_ptr);
2929 			uint32_t l_loc = ntohl(*natt_loc_ptr);
2930 
2931 			DOWN_SUM(l_loc);
2932 			DOWN_SUM(l_loc);
2933 			DOWN_SUM(l_dst);
2934 			DOWN_SUM(l_dst);
2935 
2936 			/*
2937 			 * We're 1's complement for checksums, so check for
2938 			 * wraparound here.
2939 			 */
2940 			if (l_loc > l_dst)
2941 				l_dst--;
2942 
2943 			running_sum += l_dst - l_loc;
2944 			DOWN_SUM(running_sum);
2945 			DOWN_SUM(running_sum);
2946 		}
2947 	}
2948 
2949 	newbie->ipsa_inbound_cksum = running_sum;
2950 #undef DOWN_SUM
2951 }
2952 
2953 /*
2954  * This function is called from consumers that need to insert a fully-grown
2955  * security association into its tables.  This function takes into account that
2956  * SAs can be "inbound", "outbound", or "both".	 The "primary" and "secondary"
2957  * hash bucket parameters are set in order of what the SA will be most of the
2958  * time.  (For example, an SA with an unspecified source, and a multicast
2959  * destination will primarily be an outbound SA.  OTOH, if that destination
2960  * is unicast for this node, then the SA will primarily be inbound.)
2961  *
2962  * It takes a lot of parameters because even if clone is B_FALSE, this needs
2963  * to check both buckets for purposes of collision.
2964  *
2965  * Return 0 upon success.  Return various errnos (ENOMEM, EEXIST) for
2966  * various error conditions.  We may need to set samsg->sadb_x_msg_diagnostic
2967  * with additional diagnostic information because there is at least one EINVAL
2968  * case here.
2969  */
2970 int
2971 sadb_common_add(queue_t *ip_q, queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg,
2972     keysock_in_t *ksi, isaf_t *primary, isaf_t *secondary,
2973     ipsa_t *newbie, boolean_t clone, boolean_t is_inbound, int *diagnostic,
2974     netstack_t *ns, sadbp_t *spp)
2975 {
2976 	ipsa_t *newbie_clone = NULL, *scratch;
2977 	ipsap_t *ipsapp = NULL;
2978 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
2979 	sadb_address_t *srcext =
2980 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2981 	sadb_address_t *dstext =
2982 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2983 	sadb_address_t *isrcext =
2984 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC];
2985 	sadb_address_t *idstext =
2986 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST];
2987 	sadb_x_kmc_t *kmcext =
2988 	    (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
2989 	sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
2990 	sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
2991 	sadb_x_pair_t *pair_ext =
2992 	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
2993 #if 0
2994 	/*
2995 	 * XXXMLS - When Trusted Solaris or Multi-Level Secure functionality
2996 	 * comes to ON, examine these if 0'ed fragments.  Look for XXXMLS.
2997 	 */
2998 	sadb_sens_t *sens = (sadb_sens_t *);
2999 #endif
3000 	struct sockaddr_in *src, *dst, *isrc, *idst;
3001 	struct sockaddr_in6 *src6, *dst6, *isrc6, *idst6;
3002 	sadb_lifetime_t *soft =
3003 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
3004 	sadb_lifetime_t *hard =
3005 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
3006 	sa_family_t af;
3007 	int error = 0;
3008 	boolean_t isupdate = (newbie != NULL);
3009 	uint32_t *src_addr_ptr, *dst_addr_ptr, *isrc_addr_ptr, *idst_addr_ptr;
3010 	mblk_t *ctl_mp = NULL;
3011 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
3012 
3013 	if (srcext == NULL) {
3014 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
3015 		return (EINVAL);
3016 	}
3017 	if (dstext == NULL) {
3018 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
3019 		return (EINVAL);
3020 	}
3021 	if (assoc == NULL) {
3022 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
3023 		return (EINVAL);
3024 	}
3025 
3026 	src = (struct sockaddr_in *)(srcext + 1);
3027 	src6 = (struct sockaddr_in6 *)(srcext + 1);
3028 	dst = (struct sockaddr_in *)(dstext + 1);
3029 	dst6 = (struct sockaddr_in6 *)(dstext + 1);
3030 	if (isrcext != NULL) {
3031 		isrc = (struct sockaddr_in *)(isrcext + 1);
3032 		isrc6 = (struct sockaddr_in6 *)(isrcext + 1);
3033 		ASSERT(idstext != NULL);
3034 		idst = (struct sockaddr_in *)(idstext + 1);
3035 		idst6 = (struct sockaddr_in6 *)(idstext + 1);
3036 	} else {
3037 		isrc = NULL;
3038 		isrc6 = NULL;
3039 	}
3040 
3041 	af = src->sin_family;
3042 
3043 	if (af == AF_INET) {
3044 		src_addr_ptr = (uint32_t *)&src->sin_addr;
3045 		dst_addr_ptr = (uint32_t *)&dst->sin_addr;
3046 	} else {
3047 		ASSERT(af == AF_INET6);
3048 		src_addr_ptr = (uint32_t *)&src6->sin6_addr;
3049 		dst_addr_ptr = (uint32_t *)&dst6->sin6_addr;
3050 	}
3051 
3052 	/*
3053 	 * Check to see if the new SA will be cloned AND paired. The
3054 	 * reason a SA will be cloned is the source or destination addresses
3055 	 * are not specific enough to determine if the SA goes in the outbound
3056 	 * or the inbound hash table, so its cloned and put in both. If
3057 	 * the SA is paired, it's soft linked to another SA for the other
3058 	 * direction. Keeping track and looking up SA's that are direction
3059 	 * unspecific and linked is too hard.
3060 	 */
3061 	if (clone && (pair_ext != NULL)) {
3062 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
3063 		return (EINVAL);
3064 	}
3065 
3066 	if (!isupdate) {
3067 		newbie = sadb_makelarvalassoc(assoc->sadb_sa_spi,
3068 		    src_addr_ptr, dst_addr_ptr, af, ns);
3069 		if (newbie == NULL)
3070 			return (ENOMEM);
3071 	}
3072 
3073 	mutex_enter(&newbie->ipsa_lock);
3074 
3075 	if (isrc != NULL) {
3076 		if (isrc->sin_family == AF_INET) {
3077 			if (srcext->sadb_address_proto != IPPROTO_ENCAP) {
3078 				if (srcext->sadb_address_proto != 0) {
3079 					/*
3080 					 * Mismatched outer-packet protocol
3081 					 * and inner-packet address family.
3082 					 */
3083 					mutex_exit(&newbie->ipsa_lock);
3084 					error = EPROTOTYPE;
3085 					goto error;
3086 				} else {
3087 					/* Fill in with explicit protocol. */
3088 					srcext->sadb_address_proto =
3089 					    IPPROTO_ENCAP;
3090 					dstext->sadb_address_proto =
3091 					    IPPROTO_ENCAP;
3092 				}
3093 			}
3094 			isrc_addr_ptr = (uint32_t *)&isrc->sin_addr;
3095 			idst_addr_ptr = (uint32_t *)&idst->sin_addr;
3096 		} else {
3097 			ASSERT(isrc->sin_family == AF_INET6);
3098 			if (srcext->sadb_address_proto != IPPROTO_IPV6) {
3099 				if (srcext->sadb_address_proto != 0) {
3100 					/*
3101 					 * Mismatched outer-packet protocol
3102 					 * and inner-packet address family.
3103 					 */
3104 					mutex_exit(&newbie->ipsa_lock);
3105 					error = EPROTOTYPE;
3106 					goto error;
3107 				} else {
3108 					/* Fill in with explicit protocol. */
3109 					srcext->sadb_address_proto =
3110 					    IPPROTO_IPV6;
3111 					dstext->sadb_address_proto =
3112 					    IPPROTO_IPV6;
3113 				}
3114 			}
3115 			isrc_addr_ptr = (uint32_t *)&isrc6->sin6_addr;
3116 			idst_addr_ptr = (uint32_t *)&idst6->sin6_addr;
3117 		}
3118 		newbie->ipsa_innerfam = isrc->sin_family;
3119 
3120 		IPSA_COPY_ADDR(newbie->ipsa_innersrc, isrc_addr_ptr,
3121 		    newbie->ipsa_innerfam);
3122 		IPSA_COPY_ADDR(newbie->ipsa_innerdst, idst_addr_ptr,
3123 		    newbie->ipsa_innerfam);
3124 		newbie->ipsa_innersrcpfx = isrcext->sadb_address_prefixlen;
3125 		newbie->ipsa_innerdstpfx = idstext->sadb_address_prefixlen;
3126 
3127 		/* Unique value uses inner-ports for Tunnel Mode... */
3128 		newbie->ipsa_unique_id = SA_UNIQUE_ID(isrc->sin_port,
3129 		    idst->sin_port, dstext->sadb_address_proto,
3130 		    idstext->sadb_address_proto);
3131 		newbie->ipsa_unique_mask = SA_UNIQUE_MASK(isrc->sin_port,
3132 		    idst->sin_port, dstext->sadb_address_proto,
3133 		    idstext->sadb_address_proto);
3134 	} else {
3135 		/* ... and outer-ports for Transport Mode. */
3136 		newbie->ipsa_unique_id = SA_UNIQUE_ID(src->sin_port,
3137 		    dst->sin_port, dstext->sadb_address_proto, 0);
3138 		newbie->ipsa_unique_mask = SA_UNIQUE_MASK(src->sin_port,
3139 		    dst->sin_port, dstext->sadb_address_proto, 0);
3140 	}
3141 	if (newbie->ipsa_unique_mask != (uint64_t)0)
3142 		newbie->ipsa_flags |= IPSA_F_UNIQUE;
3143 
3144 	sadb_nat_calculations(newbie,
3145 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC],
3146 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM],
3147 	    src_addr_ptr, dst_addr_ptr);
3148 
3149 	newbie->ipsa_type = samsg->sadb_msg_satype;
3150 	ASSERT(assoc->sadb_sa_state == SADB_SASTATE_MATURE);
3151 	newbie->ipsa_auth_alg = assoc->sadb_sa_auth;
3152 	newbie->ipsa_encr_alg = assoc->sadb_sa_encrypt;
3153 
3154 	newbie->ipsa_flags |= assoc->sadb_sa_flags;
3155 	if ((newbie->ipsa_flags & SADB_X_SAFLAGS_NATT_LOC &&
3156 	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC] == NULL) ||
3157 	    (newbie->ipsa_flags & SADB_X_SAFLAGS_NATT_REM &&
3158 	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM] == NULL) ||
3159 	    (newbie->ipsa_flags & SADB_X_SAFLAGS_TUNNEL &&
3160 	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC] == NULL)) {
3161 		mutex_exit(&newbie->ipsa_lock);
3162 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
3163 		error = EINVAL;
3164 		goto error;
3165 	}
3166 	/*
3167 	 * If unspecified source address, force replay_wsize to 0.
3168 	 * This is because an SA that has multiple sources of secure
3169 	 * traffic cannot enforce a replay counter w/o synchronizing the
3170 	 * senders.
3171 	 */
3172 	if (ksi->ks_in_srctype != KS_IN_ADDR_UNSPEC)
3173 		newbie->ipsa_replay_wsize = assoc->sadb_sa_replay;
3174 	else
3175 		newbie->ipsa_replay_wsize = 0;
3176 
3177 	newbie->ipsa_addtime = gethrestime_sec();
3178 
3179 	if (kmcext != NULL) {
3180 		newbie->ipsa_kmp = kmcext->sadb_x_kmc_proto;
3181 		newbie->ipsa_kmc = kmcext->sadb_x_kmc_cookie;
3182 	}
3183 
3184 	/*
3185 	 * XXX CURRENT lifetime checks MAY BE needed for an UPDATE.
3186 	 * The spec says that one can update current lifetimes, but
3187 	 * that seems impractical, especially in the larval-to-mature
3188 	 * update that this function performs.
3189 	 */
3190 	if (soft != NULL) {
3191 		newbie->ipsa_softaddlt = soft->sadb_lifetime_addtime;
3192 		newbie->ipsa_softuselt = soft->sadb_lifetime_usetime;
3193 		newbie->ipsa_softbyteslt = soft->sadb_lifetime_bytes;
3194 		newbie->ipsa_softalloc = soft->sadb_lifetime_allocations;
3195 		SET_EXPIRE(newbie, softaddlt, softexpiretime);
3196 	}
3197 	if (hard != NULL) {
3198 		newbie->ipsa_hardaddlt = hard->sadb_lifetime_addtime;
3199 		newbie->ipsa_harduselt = hard->sadb_lifetime_usetime;
3200 		newbie->ipsa_hardbyteslt = hard->sadb_lifetime_bytes;
3201 		newbie->ipsa_hardalloc = hard->sadb_lifetime_allocations;
3202 		SET_EXPIRE(newbie, hardaddlt, hardexpiretime);
3203 	}
3204 
3205 	newbie->ipsa_authtmpl = NULL;
3206 	newbie->ipsa_encrtmpl = NULL;
3207 
3208 	if (akey != NULL) {
3209 		newbie->ipsa_authkeybits = akey->sadb_key_bits;
3210 		newbie->ipsa_authkeylen = SADB_1TO8(akey->sadb_key_bits);
3211 		/* In case we have to round up to the next byte... */
3212 		if ((akey->sadb_key_bits & 0x7) != 0)
3213 			newbie->ipsa_authkeylen++;
3214 		newbie->ipsa_authkey = kmem_alloc(newbie->ipsa_authkeylen,
3215 		    KM_NOSLEEP);
3216 		if (newbie->ipsa_authkey == NULL) {
3217 			error = ENOMEM;
3218 			mutex_exit(&newbie->ipsa_lock);
3219 			goto error;
3220 		}
3221 		bcopy(akey + 1, newbie->ipsa_authkey, newbie->ipsa_authkeylen);
3222 		bzero(akey + 1, newbie->ipsa_authkeylen);
3223 
3224 		/*
3225 		 * Pre-initialize the kernel crypto framework key
3226 		 * structure.
3227 		 */
3228 		newbie->ipsa_kcfauthkey.ck_format = CRYPTO_KEY_RAW;
3229 		newbie->ipsa_kcfauthkey.ck_length = newbie->ipsa_authkeybits;
3230 		newbie->ipsa_kcfauthkey.ck_data = newbie->ipsa_authkey;
3231 
3232 		mutex_enter(&ipss->ipsec_alg_lock);
3233 		error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_AUTH);
3234 		mutex_exit(&ipss->ipsec_alg_lock);
3235 		if (error != 0) {
3236 			mutex_exit(&newbie->ipsa_lock);
3237 			goto error;
3238 		}
3239 	}
3240 
3241 	if (ekey != NULL) {
3242 		newbie->ipsa_encrkeybits = ekey->sadb_key_bits;
3243 		newbie->ipsa_encrkeylen = SADB_1TO8(ekey->sadb_key_bits);
3244 		/* In case we have to round up to the next byte... */
3245 		if ((ekey->sadb_key_bits & 0x7) != 0)
3246 			newbie->ipsa_encrkeylen++;
3247 		newbie->ipsa_encrkey = kmem_alloc(newbie->ipsa_encrkeylen,
3248 		    KM_NOSLEEP);
3249 		if (newbie->ipsa_encrkey == NULL) {
3250 			error = ENOMEM;
3251 			mutex_exit(&newbie->ipsa_lock);
3252 			goto error;
3253 		}
3254 		bcopy(ekey + 1, newbie->ipsa_encrkey, newbie->ipsa_encrkeylen);
3255 		/* XXX is this safe w.r.t db_ref, etc? */
3256 		bzero(ekey + 1, newbie->ipsa_encrkeylen);
3257 
3258 		/*
3259 		 * Pre-initialize the kernel crypto framework key
3260 		 * structure.
3261 		 */
3262 		newbie->ipsa_kcfencrkey.ck_format = CRYPTO_KEY_RAW;
3263 		newbie->ipsa_kcfencrkey.ck_length = newbie->ipsa_encrkeybits;
3264 		newbie->ipsa_kcfencrkey.ck_data = newbie->ipsa_encrkey;
3265 
3266 		mutex_enter(&ipss->ipsec_alg_lock);
3267 		error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_ENCR);
3268 		mutex_exit(&ipss->ipsec_alg_lock);
3269 		if (error != 0) {
3270 			mutex_exit(&newbie->ipsa_lock);
3271 			goto error;
3272 		}
3273 	}
3274 
3275 	sadb_init_alginfo(newbie);
3276 
3277 	/*
3278 	 * Ptrs to processing functions.
3279 	 */
3280 	if (newbie->ipsa_type == SADB_SATYPE_ESP)
3281 		ipsecesp_init_funcs(newbie);
3282 	else
3283 		ipsecah_init_funcs(newbie);
3284 	ASSERT(newbie->ipsa_output_func != NULL &&
3285 	    newbie->ipsa_input_func != NULL);
3286 
3287 	/*
3288 	 * Certificate ID stuff.
3289 	 */
3290 	if (ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC] != NULL) {
3291 		sadb_ident_t *id =
3292 		    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC];
3293 
3294 		/*
3295 		 * Can assume strlen() will return okay because ext_check() in
3296 		 * keysock.c prepares the string for us.
3297 		 */
3298 		newbie->ipsa_src_cid = ipsid_lookup(id->sadb_ident_type,
3299 		    (char *)(id+1), ns);
3300 		if (newbie->ipsa_src_cid == NULL) {
3301 			error = ENOMEM;
3302 			mutex_exit(&newbie->ipsa_lock);
3303 			goto error;
3304 		}
3305 	}
3306 
3307 	if (ksi->ks_in_extv[SADB_EXT_IDENTITY_DST] != NULL) {
3308 		sadb_ident_t *id =
3309 		    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_DST];
3310 
3311 		/*
3312 		 * Can assume strlen() will return okay because ext_check() in
3313 		 * keysock.c prepares the string for us.
3314 		 */
3315 		newbie->ipsa_dst_cid = ipsid_lookup(id->sadb_ident_type,
3316 		    (char *)(id+1), ns);
3317 		if (newbie->ipsa_dst_cid == NULL) {
3318 			error = ENOMEM;
3319 			mutex_exit(&newbie->ipsa_lock);
3320 			goto error;
3321 		}
3322 	}
3323 
3324 #if 0
3325 	/* XXXMLS  SENSITIVITY handling code. */
3326 	if (sens != NULL) {
3327 		int i;
3328 		uint64_t *bitmap = (uint64_t *)(sens + 1);
3329 
3330 		newbie->ipsa_dpd = sens->sadb_sens_dpd;
3331 		newbie->ipsa_senslevel = sens->sadb_sens_sens_level;
3332 		newbie->ipsa_integlevel = sens->sadb_sens_integ_level;
3333 		newbie->ipsa_senslen = SADB_64TO8(sens->sadb_sens_sens_len);
3334 		newbie->ipsa_integlen = SADB_64TO8(sens->sadb_sens_integ_len);
3335 		newbie->ipsa_integ = kmem_alloc(newbie->ipsa_integlen,
3336 		    KM_NOSLEEP);
3337 		if (newbie->ipsa_integ == NULL) {
3338 			error = ENOMEM;
3339 			mutex_exit(&newbie->ipsa_lock);
3340 			goto error;
3341 		}
3342 		newbie->ipsa_sens = kmem_alloc(newbie->ipsa_senslen,
3343 		    KM_NOSLEEP);
3344 		if (newbie->ipsa_sens == NULL) {
3345 			error = ENOMEM;
3346 			mutex_exit(&newbie->ipsa_lock);
3347 			goto error;
3348 		}
3349 		for (i = 0; i < sens->sadb_sens_sens_len; i++) {
3350 			newbie->ipsa_sens[i] = *bitmap;
3351 			bitmap++;
3352 		}
3353 		for (i = 0; i < sens->sadb_sens_integ_len; i++) {
3354 			newbie->ipsa_integ[i] = *bitmap;
3355 			bitmap++;
3356 		}
3357 	}
3358 
3359 #endif
3360 
3361 	/* now that the SA has been updated, set its new state */
3362 	newbie->ipsa_state = assoc->sadb_sa_state;
3363 
3364 	if (clone) {
3365 		newbie->ipsa_haspeer = B_TRUE;
3366 	} else {
3367 		if (!is_inbound) {
3368 			lifetime_fuzz(newbie);
3369 		}
3370 	}
3371 	/*
3372 	 * The less locks I hold when doing an insertion and possible cloning,
3373 	 * the better!
3374 	 */
3375 	mutex_exit(&newbie->ipsa_lock);
3376 
3377 	if (clone) {
3378 		newbie_clone = sadb_cloneassoc(newbie);
3379 
3380 		if (newbie_clone == NULL) {
3381 			error = ENOMEM;
3382 			goto error;
3383 		}
3384 	}
3385 
3386 	/*
3387 	 * Enter the bucket locks.  The order of entry is outbound,
3388 	 * inbound.  We map "primary" and "secondary" into outbound and inbound
3389 	 * based on the destination address type.  If the destination address
3390 	 * type is for a node that isn't mine (or potentially mine), the
3391 	 * "primary" bucket is the outbound one.
3392 	 */
3393 	if (!is_inbound) {
3394 		/* primary == outbound */
3395 		mutex_enter(&primary->isaf_lock);
3396 		mutex_enter(&secondary->isaf_lock);
3397 	} else {
3398 		/* primary == inbound */
3399 		mutex_enter(&secondary->isaf_lock);
3400 		mutex_enter(&primary->isaf_lock);
3401 	}
3402 
3403 	IPSECHW_DEBUG(IPSECHW_SADB, ("sadb_common_add: spi = 0x%x\n",
3404 	    newbie->ipsa_spi));
3405 
3406 	/*
3407 	 * sadb_insertassoc() doesn't increment the reference
3408 	 * count.  We therefore have to increment the
3409 	 * reference count one more time to reflect the
3410 	 * pointers of the table that reference this SA.
3411 	 */
3412 	IPSA_REFHOLD(newbie);
3413 
3414 	if (isupdate) {
3415 		/*
3416 		 * Unlink from larval holding cell in the "inbound" fanout.
3417 		 */
3418 		ASSERT(newbie->ipsa_linklock == &primary->isaf_lock ||
3419 		    newbie->ipsa_linklock == &secondary->isaf_lock);
3420 		sadb_unlinkassoc(newbie);
3421 	}
3422 
3423 	mutex_enter(&newbie->ipsa_lock);
3424 	error = sadb_insertassoc(newbie, primary);
3425 	if (error == 0) {
3426 		ctl_mp = sadb_fmt_sa_req(DL_CO_SET, newbie->ipsa_type, newbie,
3427 		    is_inbound);
3428 	}
3429 	mutex_exit(&newbie->ipsa_lock);
3430 
3431 	if (error != 0) {
3432 		/*
3433 		 * Since sadb_insertassoc() failed, we must decrement the
3434 		 * refcount again so the cleanup code will actually free
3435 		 * the offending SA.
3436 		 */
3437 		IPSA_REFRELE(newbie);
3438 		goto error_unlock;
3439 	}
3440 
3441 	if (newbie_clone != NULL) {
3442 		mutex_enter(&newbie_clone->ipsa_lock);
3443 		error = sadb_insertassoc(newbie_clone, secondary);
3444 		mutex_exit(&newbie_clone->ipsa_lock);
3445 		if (error != 0) {
3446 			/* Collision in secondary table. */
3447 			sadb_unlinkassoc(newbie);  /* This does REFRELE. */
3448 			goto error_unlock;
3449 		}
3450 		IPSA_REFHOLD(newbie_clone);
3451 	} else {
3452 		ASSERT(primary != secondary);
3453 		scratch = ipsec_getassocbyspi(secondary, newbie->ipsa_spi,
3454 		    ALL_ZEROES_PTR, newbie->ipsa_dstaddr, af);
3455 		if (scratch != NULL) {
3456 			/* Collision in secondary table. */
3457 			sadb_unlinkassoc(newbie);  /* This does REFRELE. */
3458 			/* Set the error, since ipsec_getassocbyspi() can't. */
3459 			error = EEXIST;
3460 			goto error_unlock;
3461 		}
3462 	}
3463 
3464 	/* OKAY!  So let's do some reality check assertions. */
3465 
3466 	ASSERT(MUTEX_NOT_HELD(&newbie->ipsa_lock));
3467 	ASSERT(newbie_clone == NULL ||
3468 	    (MUTEX_NOT_HELD(&newbie_clone->ipsa_lock)));
3469 	/*
3470 	 * If hardware acceleration could happen, send it.
3471 	 */
3472 	if (ctl_mp != NULL) {
3473 		putnext(ip_q, ctl_mp);
3474 		ctl_mp = NULL;
3475 	}
3476 
3477 error_unlock:
3478 
3479 	/*
3480 	 * We can exit the locks in any order.	Only entrance needs to
3481 	 * follow any protocol.
3482 	 */
3483 	mutex_exit(&secondary->isaf_lock);
3484 	mutex_exit(&primary->isaf_lock);
3485 
3486 	if (pair_ext != NULL && error == 0) {
3487 		/* update pair_spi if it exists. */
3488 		ipsapp = get_ipsa_pair(assoc, srcext, dstext, spp);
3489 		if (ipsapp == NULL) {
3490 			error = ESRCH;
3491 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
3492 		} else if (ipsapp->ipsap_psa_ptr != NULL) {
3493 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_ALREADY;
3494 			error = EINVAL;
3495 		} else {
3496 			/* update_pairing() sets diagnostic */
3497 			error = update_pairing(ipsapp, ksi, diagnostic, spp);
3498 		}
3499 	}
3500 	/* Common error point for this routine. */
3501 error:
3502 	if (newbie != NULL) {
3503 		if (error != 0) {
3504 			/* This SA is broken, let the reaper clean up. */
3505 			mutex_enter(&newbie->ipsa_lock);
3506 			newbie->ipsa_state = IPSA_STATE_DEAD;
3507 			newbie->ipsa_hardexpiretime = 1;
3508 			mutex_exit(&newbie->ipsa_lock);
3509 		}
3510 		IPSA_REFRELE(newbie);
3511 	}
3512 	if (newbie_clone != NULL) {
3513 		IPSA_REFRELE(newbie_clone);
3514 	}
3515 	if (ctl_mp != NULL)
3516 		freemsg(ctl_mp);
3517 
3518 	if (error == 0) {
3519 		/*
3520 		 * Construct favorable PF_KEY return message and send to
3521 		 * keysock. Update the flags in the original keysock message
3522 		 * to reflect the actual flags in the new SA.
3523 		 *  (Q:  Do I need to pass "newbie"?  If I do,
3524 		 * make sure to REFHOLD, call, then REFRELE.)
3525 		 */
3526 		assoc->sadb_sa_flags = newbie->ipsa_flags;
3527 		sadb_pfkey_echo(pfkey_q, mp, samsg, ksi, NULL);
3528 	}
3529 
3530 	destroy_ipsa_pair(ipsapp);
3531 	return (error);
3532 }
3533 
3534 /*
3535  * Set the time of first use for a security association.  Update any
3536  * expiration times as a result.
3537  */
3538 void
3539 sadb_set_usetime(ipsa_t *assoc)
3540 {
3541 	time_t snapshot = gethrestime_sec();
3542 
3543 	mutex_enter(&assoc->ipsa_lock);
3544 	assoc->ipsa_lastuse = snapshot;
3545 	/*
3546 	 * Caller does check usetime before calling me usually, and
3547 	 * double-checking is better than a mutex_enter/exit hit.
3548 	 */
3549 	if (assoc->ipsa_usetime == 0) {
3550 		/*
3551 		 * This is redundant for outbound SA's, as
3552 		 * ipsec_getassocbyconn() sets the IPSA_F_USED flag already.
3553 		 * Inbound SAs, however, have no such protection.
3554 		 */
3555 		assoc->ipsa_flags |= IPSA_F_USED;
3556 		assoc->ipsa_usetime = snapshot;
3557 
3558 		/*
3559 		 * After setting the use time, see if we have a use lifetime
3560 		 * that would cause the actual SA expiration time to shorten.
3561 		 */
3562 		UPDATE_EXPIRE(assoc, softuselt, softexpiretime);
3563 		UPDATE_EXPIRE(assoc, harduselt, hardexpiretime);
3564 	}
3565 	mutex_exit(&assoc->ipsa_lock);
3566 }
3567 
3568 /*
3569  * Send up a PF_KEY expire message for this association.
3570  */
3571 static void
3572 sadb_expire_assoc(queue_t *pfkey_q, ipsa_t *assoc)
3573 {
3574 	mblk_t *mp, *mp1;
3575 	int alloclen, af;
3576 	sadb_msg_t *samsg;
3577 	sadb_lifetime_t *current, *expire;
3578 	sadb_sa_t *saext;
3579 	uint8_t *end;
3580 	boolean_t tunnel_mode;
3581 
3582 	ASSERT(MUTEX_HELD(&assoc->ipsa_lock));
3583 
3584 	/* Don't bother sending if there's no queue. */
3585 	if (pfkey_q == NULL)
3586 		return;
3587 
3588 	/* If the SA is one of a pair, only SOFT expire the OUTBOUND SA */
3589 	if (assoc->ipsa_state == IPSA_STATE_DYING &&
3590 	    (assoc->ipsa_flags & IPSA_F_PAIRED) &&
3591 	    !(assoc->ipsa_flags & IPSA_F_OUTBOUND)) {
3592 		return;
3593 	}
3594 
3595 	mp = sadb_keysock_out(0);
3596 	if (mp == NULL) {
3597 		/* cmn_err(CE_WARN, */
3598 		/*	"sadb_expire_assoc: Can't allocate KEYSOCK_OUT.\n"); */
3599 		return;
3600 	}
3601 
3602 	alloclen = sizeof (*samsg) + sizeof (*current) + sizeof (*expire) +
3603 	    2 * sizeof (sadb_address_t) + sizeof (*saext);
3604 
3605 	af = assoc->ipsa_addrfam;
3606 	switch (af) {
3607 	case AF_INET:
3608 		alloclen += 2 * sizeof (struct sockaddr_in);
3609 		break;
3610 	case AF_INET6:
3611 		alloclen += 2 * sizeof (struct sockaddr_in6);
3612 		break;
3613 	default:
3614 		/* Won't happen unless there's a kernel bug. */
3615 		freeb(mp);
3616 		cmn_err(CE_WARN,
3617 		    "sadb_expire_assoc: Unknown address length.\n");
3618 		return;
3619 	}
3620 
3621 	tunnel_mode = (assoc->ipsa_flags & IPSA_F_TUNNEL);
3622 	if (tunnel_mode) {
3623 		alloclen += 2 * sizeof (sadb_address_t);
3624 		switch (assoc->ipsa_innerfam) {
3625 		case AF_INET:
3626 			alloclen += 2 * sizeof (struct sockaddr_in);
3627 			break;
3628 		case AF_INET6:
3629 			alloclen += 2 * sizeof (struct sockaddr_in6);
3630 			break;
3631 		default:
3632 			/* Won't happen unless there's a kernel bug. */
3633 			freeb(mp);
3634 			cmn_err(CE_WARN, "sadb_expire_assoc: "
3635 			    "Unknown inner address length.\n");
3636 			return;
3637 		}
3638 	}
3639 
3640 	mp->b_cont = allocb(alloclen, BPRI_HI);
3641 	if (mp->b_cont == NULL) {
3642 		freeb(mp);
3643 		/* cmn_err(CE_WARN, */
3644 		/*	"sadb_expire_assoc: Can't allocate message.\n"); */
3645 		return;
3646 	}
3647 
3648 	mp1 = mp;
3649 	mp = mp->b_cont;
3650 	end = mp->b_wptr + alloclen;
3651 
3652 	samsg = (sadb_msg_t *)mp->b_wptr;
3653 	mp->b_wptr += sizeof (*samsg);
3654 	samsg->sadb_msg_version = PF_KEY_V2;
3655 	samsg->sadb_msg_type = SADB_EXPIRE;
3656 	samsg->sadb_msg_errno = 0;
3657 	samsg->sadb_msg_satype = assoc->ipsa_type;
3658 	samsg->sadb_msg_len = SADB_8TO64(alloclen);
3659 	samsg->sadb_msg_reserved = 0;
3660 	samsg->sadb_msg_seq = 0;
3661 	samsg->sadb_msg_pid = 0;
3662 
3663 	saext = (sadb_sa_t *)mp->b_wptr;
3664 	mp->b_wptr += sizeof (*saext);
3665 	saext->sadb_sa_len = SADB_8TO64(sizeof (*saext));
3666 	saext->sadb_sa_exttype = SADB_EXT_SA;
3667 	saext->sadb_sa_spi = assoc->ipsa_spi;
3668 	saext->sadb_sa_replay = assoc->ipsa_replay_wsize;
3669 	saext->sadb_sa_state = assoc->ipsa_state;
3670 	saext->sadb_sa_auth = assoc->ipsa_auth_alg;
3671 	saext->sadb_sa_encrypt = assoc->ipsa_encr_alg;
3672 	saext->sadb_sa_flags = assoc->ipsa_flags;
3673 
3674 	current = (sadb_lifetime_t *)mp->b_wptr;
3675 	mp->b_wptr += sizeof (sadb_lifetime_t);
3676 	current->sadb_lifetime_len = SADB_8TO64(sizeof (*current));
3677 	current->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
3678 	/* We do not support the concept. */
3679 	current->sadb_lifetime_allocations = 0;
3680 	current->sadb_lifetime_bytes = assoc->ipsa_bytes;
3681 	current->sadb_lifetime_addtime = assoc->ipsa_addtime;
3682 	current->sadb_lifetime_usetime = assoc->ipsa_usetime;
3683 
3684 	expire = (sadb_lifetime_t *)mp->b_wptr;
3685 	mp->b_wptr += sizeof (*expire);
3686 	expire->sadb_lifetime_len = SADB_8TO64(sizeof (*expire));
3687 
3688 	if (assoc->ipsa_state == IPSA_STATE_DEAD) {
3689 		expire->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
3690 		expire->sadb_lifetime_allocations = assoc->ipsa_hardalloc;
3691 		expire->sadb_lifetime_bytes = assoc->ipsa_hardbyteslt;
3692 		expire->sadb_lifetime_addtime = assoc->ipsa_hardaddlt;
3693 		expire->sadb_lifetime_usetime = assoc->ipsa_harduselt;
3694 	} else {
3695 		ASSERT(assoc->ipsa_state == IPSA_STATE_DYING);
3696 		expire->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
3697 		expire->sadb_lifetime_allocations = assoc->ipsa_softalloc;
3698 		expire->sadb_lifetime_bytes = assoc->ipsa_softbyteslt;
3699 		expire->sadb_lifetime_addtime = assoc->ipsa_softaddlt;
3700 		expire->sadb_lifetime_usetime = assoc->ipsa_softuselt;
3701 	}
3702 
3703 	mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end, SADB_EXT_ADDRESS_SRC,
3704 	    af, assoc->ipsa_srcaddr, tunnel_mode ? 0 : SA_SRCPORT(assoc),
3705 	    SA_PROTO(assoc), 0);
3706 	ASSERT(mp->b_wptr != NULL);
3707 
3708 	mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end, SADB_EXT_ADDRESS_DST,
3709 	    af, assoc->ipsa_dstaddr, tunnel_mode ? 0 : SA_DSTPORT(assoc),
3710 	    SA_PROTO(assoc), 0);
3711 	ASSERT(mp->b_wptr != NULL);
3712 
3713 	if (tunnel_mode) {
3714 		mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end,
3715 		    SADB_X_EXT_ADDRESS_INNER_SRC, assoc->ipsa_innerfam,
3716 		    assoc->ipsa_innersrc, SA_SRCPORT(assoc), SA_IPROTO(assoc),
3717 		    assoc->ipsa_innersrcpfx);
3718 		ASSERT(mp->b_wptr != NULL);
3719 		mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end,
3720 		    SADB_X_EXT_ADDRESS_INNER_DST, assoc->ipsa_innerfam,
3721 		    assoc->ipsa_innerdst, SA_DSTPORT(assoc), SA_IPROTO(assoc),
3722 		    assoc->ipsa_innerdstpfx);
3723 		ASSERT(mp->b_wptr != NULL);
3724 	}
3725 
3726 	/* Can just putnext, we're ready to go! */
3727 	putnext(pfkey_q, mp1);
3728 }
3729 
3730 /*
3731  * "Age" the SA with the number of bytes that was used to protect traffic.
3732  * Send an SADB_EXPIRE message if appropriate.	Return B_TRUE if there was
3733  * enough "charge" left in the SA to protect the data.	Return B_FALSE
3734  * otherwise.  (If B_FALSE is returned, the association either was, or became
3735  * DEAD.)
3736  */
3737 boolean_t
3738 sadb_age_bytes(queue_t *pfkey_q, ipsa_t *assoc, uint64_t bytes,
3739     boolean_t sendmsg)
3740 {
3741 	boolean_t rc = B_TRUE;
3742 	uint64_t newtotal;
3743 
3744 	mutex_enter(&assoc->ipsa_lock);
3745 	newtotal = assoc->ipsa_bytes + bytes;
3746 	if (assoc->ipsa_hardbyteslt != 0 &&
3747 	    newtotal >= assoc->ipsa_hardbyteslt) {
3748 		if (assoc->ipsa_state < IPSA_STATE_DEAD) {
3749 			/*
3750 			 * Send EXPIRE message to PF_KEY.  May wish to pawn
3751 			 * this off on another non-interrupt thread.  Also
3752 			 * unlink this SA immediately.
3753 			 */
3754 			assoc->ipsa_state = IPSA_STATE_DEAD;
3755 			if (sendmsg)
3756 				sadb_expire_assoc(pfkey_q, assoc);
3757 			/*
3758 			 * Set non-zero expiration time so sadb_age_assoc()
3759 			 * will work when reaping.
3760 			 */
3761 			assoc->ipsa_hardexpiretime = (time_t)1;
3762 		} /* Else someone beat me to it! */
3763 		rc = B_FALSE;
3764 	} else if (assoc->ipsa_softbyteslt != 0 &&
3765 	    (newtotal >= assoc->ipsa_softbyteslt)) {
3766 		if (assoc->ipsa_state < IPSA_STATE_DYING) {
3767 			/*
3768 			 * Send EXPIRE message to PF_KEY.  May wish to pawn
3769 			 * this off on another non-interrupt thread.
3770 			 */
3771 			assoc->ipsa_state = IPSA_STATE_DYING;
3772 			assoc->ipsa_bytes = newtotal;
3773 			if (sendmsg)
3774 				sadb_expire_assoc(pfkey_q, assoc);
3775 		} /* Else someone beat me to it! */
3776 	}
3777 	if (rc == B_TRUE)
3778 		assoc->ipsa_bytes = newtotal;
3779 	mutex_exit(&assoc->ipsa_lock);
3780 	return (rc);
3781 }
3782 
3783 /*
3784  * Push one or more DL_CO_DELETE messages queued up by
3785  * sadb_torch_assoc down to the underlying driver now that it's a
3786  * convenient time for it (i.e., ipsa bucket locks not held).
3787  */
3788 static void
3789 sadb_drain_torchq(queue_t *q, mblk_t *mp)
3790 {
3791 	while (mp != NULL) {
3792 		mblk_t *next = mp->b_next;
3793 		mp->b_next = NULL;
3794 		if (q != NULL)
3795 			putnext(q, mp);
3796 		else
3797 			freemsg(mp);
3798 		mp = next;
3799 	}
3800 }
3801 
3802 /*
3803  * "Torch" an individual SA.  Returns NULL, so it can be tail-called from
3804  *     sadb_age_assoc().
3805  *
3806  * If SA is hardware-accelerated, and we can't allocate the mblk
3807  * containing the DL_CO_DELETE, just return; it will remain in the
3808  * table and be swept up by sadb_ager() in a subsequent pass.
3809  */
3810 static ipsa_t *
3811 sadb_torch_assoc(isaf_t *head, ipsa_t *sa, boolean_t inbnd, mblk_t **mq)
3812 {
3813 	mblk_t *mp;
3814 
3815 	ASSERT(MUTEX_HELD(&head->isaf_lock));
3816 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
3817 	ASSERT(sa->ipsa_state == IPSA_STATE_DEAD);
3818 
3819 	/*
3820 	 * Force cached SAs to be revalidated..
3821 	 */
3822 	head->isaf_gen++;
3823 
3824 	if (sa->ipsa_flags & IPSA_F_HW) {
3825 		mp = sadb_fmt_sa_req(DL_CO_DELETE, sa->ipsa_type, sa, inbnd);
3826 		if (mp == NULL) {
3827 			mutex_exit(&sa->ipsa_lock);
3828 			return (NULL);
3829 		}
3830 		mp->b_next = *mq;
3831 		*mq = mp;
3832 	}
3833 	mutex_exit(&sa->ipsa_lock);
3834 	sadb_unlinkassoc(sa);
3835 
3836 	return (NULL);
3837 }
3838 
3839 /*
3840  * Do various SA-is-idle activities depending on delta (the number of idle
3841  * seconds on the SA) and/or other properties of the SA.
3842  *
3843  * Return B_TRUE if I've sent a packet, because I have to drop the
3844  * association's mutex before sending a packet out the wire.
3845  */
3846 /* ARGSUSED */
3847 static boolean_t
3848 sadb_idle_activities(ipsa_t *assoc, time_t delta, boolean_t inbound)
3849 {
3850 	ipsecesp_stack_t *espstack = assoc->ipsa_netstack->netstack_ipsecesp;
3851 	int nat_t_interval = espstack->ipsecesp_nat_keepalive_interval;
3852 
3853 	ASSERT(MUTEX_HELD(&assoc->ipsa_lock));
3854 
3855 	if (!inbound && (assoc->ipsa_flags & IPSA_F_NATT_LOC) &&
3856 	    delta >= nat_t_interval &&
3857 	    gethrestime_sec() - assoc->ipsa_last_nat_t_ka >= nat_t_interval) {
3858 		ASSERT(assoc->ipsa_type == SADB_SATYPE_ESP);
3859 		assoc->ipsa_last_nat_t_ka = gethrestime_sec();
3860 		mutex_exit(&assoc->ipsa_lock);
3861 		ipsecesp_send_keepalive(assoc);
3862 		return (B_TRUE);
3863 	}
3864 	return (B_FALSE);
3865 }
3866 
3867 /*
3868  * Return "assoc" if haspeer is true and I send an expire.  This allows
3869  * the consumers' aging functions to tidy up an expired SA's peer.
3870  */
3871 static ipsa_t *
3872 sadb_age_assoc(isaf_t *head, queue_t *pfkey_q, ipsa_t *assoc,
3873     time_t current, int reap_delay, boolean_t inbound, mblk_t **mq)
3874 {
3875 	ipsa_t *retval = NULL;
3876 	boolean_t dropped_mutex = B_FALSE;
3877 
3878 	ASSERT(MUTEX_HELD(&head->isaf_lock));
3879 
3880 	mutex_enter(&assoc->ipsa_lock);
3881 
3882 	if ((assoc->ipsa_state == IPSA_STATE_LARVAL) &&
3883 	    (assoc->ipsa_hardexpiretime <= current)) {
3884 		assoc->ipsa_state = IPSA_STATE_DEAD;
3885 		return (sadb_torch_assoc(head, assoc, inbound, mq));
3886 	}
3887 
3888 	/*
3889 	 * Check lifetimes.  Fortunately, SA setup is done
3890 	 * such that there are only two times to look at,
3891 	 * softexpiretime, and hardexpiretime.
3892 	 *
3893 	 * Check hard first.
3894 	 */
3895 
3896 	if (assoc->ipsa_hardexpiretime != 0 &&
3897 	    assoc->ipsa_hardexpiretime <= current) {
3898 		if (assoc->ipsa_state == IPSA_STATE_DEAD)
3899 			return (sadb_torch_assoc(head, assoc, inbound, mq));
3900 
3901 		/*
3902 		 * Send SADB_EXPIRE with hard lifetime, delay for unlinking.
3903 		 */
3904 		assoc->ipsa_state = IPSA_STATE_DEAD;
3905 		if (assoc->ipsa_haspeer || assoc->ipsa_otherspi != 0) {
3906 			/*
3907 			 * If the SA is paired or peered with another, put
3908 			 * a copy on a list which can be processed later, the
3909 			 * pair/peer SA needs to be updated so the both die
3910 			 * at the same time.
3911 			 *
3912 			 * If I return assoc, I have to bump up its reference
3913 			 * count to keep with the ipsa_t reference count
3914 			 * semantics.
3915 			 */
3916 			IPSA_REFHOLD(assoc);
3917 			retval = assoc;
3918 		}
3919 		sadb_expire_assoc(pfkey_q, assoc);
3920 		assoc->ipsa_hardexpiretime = current + reap_delay;
3921 	} else if (assoc->ipsa_softexpiretime != 0 &&
3922 	    assoc->ipsa_softexpiretime <= current &&
3923 	    assoc->ipsa_state < IPSA_STATE_DYING) {
3924 		/*
3925 		 * Send EXPIRE message to PF_KEY.  May wish to pawn
3926 		 * this off on another non-interrupt thread.
3927 		 */
3928 		assoc->ipsa_state = IPSA_STATE_DYING;
3929 		if (assoc->ipsa_haspeer) {
3930 			/*
3931 			 * If the SA has a peer, update the peer's state
3932 			 * on SOFT_EXPIRE, this is mostly to prevent two
3933 			 * expire messages from effectively the same SA.
3934 			 *
3935 			 * Don't care about paired SA's, then can (and should)
3936 			 * be able to soft expire at different times.
3937 			 *
3938 			 * If I return assoc, I have to bump up its
3939 			 * reference count to keep with the ipsa_t reference
3940 			 * count semantics.
3941 			 */
3942 			IPSA_REFHOLD(assoc);
3943 			retval = assoc;
3944 		}
3945 		sadb_expire_assoc(pfkey_q, assoc);
3946 	} else {
3947 		/* Check idle time activities. */
3948 		dropped_mutex = sadb_idle_activities(assoc,
3949 		    current - assoc->ipsa_lastuse, inbound);
3950 	}
3951 
3952 	if (!dropped_mutex)
3953 		mutex_exit(&assoc->ipsa_lock);
3954 	return (retval);
3955 }
3956 
3957 /*
3958  * Called by a consumer protocol to do ther dirty work of reaping dead
3959  * Security Associations.
3960  *
3961  * NOTE: sadb_age_assoc() marks expired SA's as DEAD but only removed
3962  * SA's that are already marked DEAD, so expired SA's are only reaped
3963  * the second time sadb_ager() runs.
3964  */
3965 void
3966 sadb_ager(sadb_t *sp, queue_t *pfkey_q, queue_t *ip_q, int reap_delay,
3967     netstack_t *ns)
3968 {
3969 	int i;
3970 	isaf_t *bucket;
3971 	ipsa_t *assoc, *spare;
3972 	iacqf_t *acqlist;
3973 	ipsacq_t *acqrec, *spareacq;
3974 	templist_t *haspeerlist, *newbie;
3975 	/* Snapshot current time now. */
3976 	time_t current = gethrestime_sec();
3977 	mblk_t *mq = NULL;
3978 	haspeerlist = NULL;
3979 
3980 	/*
3981 	 * Do my dirty work.  This includes aging real entries, aging
3982 	 * larvals, and aging outstanding ACQUIREs.
3983 	 *
3984 	 * I hope I don't tie up resources for too long.
3985 	 */
3986 
3987 	/* Age acquires. */
3988 
3989 	for (i = 0; i < sp->sdb_hashsize; i++) {
3990 		acqlist = &sp->sdb_acq[i];
3991 		mutex_enter(&acqlist->iacqf_lock);
3992 		for (acqrec = acqlist->iacqf_ipsacq; acqrec != NULL;
3993 		    acqrec = spareacq) {
3994 			spareacq = acqrec->ipsacq_next;
3995 			if (current > acqrec->ipsacq_expire)
3996 				sadb_destroy_acquire(acqrec, ns);
3997 		}
3998 		mutex_exit(&acqlist->iacqf_lock);
3999 	}
4000 
4001 	/* Age inbound associations. */
4002 	for (i = 0; i < sp->sdb_hashsize; i++) {
4003 		bucket = &(sp->sdb_if[i]);
4004 		mutex_enter(&bucket->isaf_lock);
4005 		for (assoc = bucket->isaf_ipsa; assoc != NULL;
4006 		    assoc = spare) {
4007 			spare = assoc->ipsa_next;
4008 			if (sadb_age_assoc(bucket, pfkey_q, assoc, current,
4009 			    reap_delay, B_TRUE, &mq) != NULL) {
4010 				/*
4011 				 * Put SA's which have a peer or SA's which
4012 				 * are paired on a list for processing after
4013 				 * all the hash tables have been walked.
4014 				 *
4015 				 * sadb_age_assoc() increments the refcnt,
4016 				 * effectively doing an IPSA_REFHOLD().
4017 				 */
4018 				newbie = kmem_alloc(sizeof (*newbie),
4019 				    KM_NOSLEEP);
4020 				if (newbie == NULL) {
4021 					/*
4022 					 * Don't forget to REFRELE().
4023 					 */
4024 					IPSA_REFRELE(assoc);
4025 					continue;	/* for loop... */
4026 				}
4027 				newbie->next = haspeerlist;
4028 				newbie->ipsa = assoc;
4029 				haspeerlist = newbie;
4030 			}
4031 		}
4032 		mutex_exit(&bucket->isaf_lock);
4033 	}
4034 
4035 	if (mq != NULL) {
4036 		sadb_drain_torchq(ip_q, mq);
4037 		mq = NULL;
4038 	}
4039 	age_pair_peer_list(haspeerlist, sp, B_FALSE);
4040 	haspeerlist = NULL;
4041 
4042 	/* Age outbound associations. */
4043 	for (i = 0; i < sp->sdb_hashsize; i++) {
4044 		bucket = &(sp->sdb_of[i]);
4045 		mutex_enter(&bucket->isaf_lock);
4046 		for (assoc = bucket->isaf_ipsa; assoc != NULL;
4047 		    assoc = spare) {
4048 			spare = assoc->ipsa_next;
4049 			if (sadb_age_assoc(bucket, pfkey_q, assoc, current,
4050 			    reap_delay, B_FALSE, &mq) != NULL) {
4051 				/*
4052 				 * sadb_age_assoc() increments the refcnt,
4053 				 * effectively doing an IPSA_REFHOLD().
4054 				 */
4055 				newbie = kmem_alloc(sizeof (*newbie),
4056 				    KM_NOSLEEP);
4057 				if (newbie == NULL) {
4058 					/*
4059 					 * Don't forget to REFRELE().
4060 					 */
4061 					IPSA_REFRELE(assoc);
4062 					continue;	/* for loop... */
4063 				}
4064 				newbie->next = haspeerlist;
4065 				newbie->ipsa = assoc;
4066 				haspeerlist = newbie;
4067 			}
4068 		}
4069 		mutex_exit(&bucket->isaf_lock);
4070 	}
4071 	if (mq != NULL) {
4072 		sadb_drain_torchq(ip_q, mq);
4073 		mq = NULL;
4074 	}
4075 
4076 	age_pair_peer_list(haspeerlist, sp, B_TRUE);
4077 
4078 	/*
4079 	 * Run a GC pass to clean out dead identities.
4080 	 */
4081 	ipsid_gc(ns);
4082 }
4083 
4084 /*
4085  * Figure out when to reschedule the ager.
4086  */
4087 timeout_id_t
4088 sadb_retimeout(hrtime_t begin, queue_t *pfkey_q, void (*ager)(void *),
4089     void *agerarg, uint_t *intp, uint_t intmax, short mid)
4090 {
4091 	hrtime_t end = gethrtime();
4092 	uint_t interval = *intp;
4093 
4094 	/*
4095 	 * See how long this took.  If it took too long, increase the
4096 	 * aging interval.
4097 	 */
4098 	if ((end - begin) > interval * 1000000) {
4099 		if (interval >= intmax) {
4100 			/* XXX Rate limit this?  Or recommend flush? */
4101 			(void) strlog(mid, 0, 0, SL_ERROR | SL_WARN,
4102 			    "Too many SA's to age out in %d msec.\n",
4103 			    intmax);
4104 		} else {
4105 			/* Double by shifting by one bit. */
4106 			interval <<= 1;
4107 			interval = min(interval, intmax);
4108 		}
4109 	} else if ((end - begin) <= interval * 500000 &&
4110 	    interval > SADB_AGE_INTERVAL_DEFAULT) {
4111 		/*
4112 		 * If I took less than half of the interval, then I should
4113 		 * ratchet the interval back down.  Never automatically
4114 		 * shift below the default aging interval.
4115 		 *
4116 		 * NOTE:This even overrides manual setting of the age
4117 		 *	interval using NDD.
4118 		 */
4119 		/* Halve by shifting one bit. */
4120 		interval >>= 1;
4121 		interval = max(interval, SADB_AGE_INTERVAL_DEFAULT);
4122 	}
4123 	*intp = interval;
4124 	return (qtimeout(pfkey_q, ager, agerarg,
4125 	    interval * drv_usectohz(1000)));
4126 }
4127 
4128 
4129 /*
4130  * Update the lifetime values of an SA.	 This is the path an SADB_UPDATE
4131  * message takes when updating a MATURE or DYING SA.
4132  */
4133 static void
4134 sadb_update_lifetimes(ipsa_t *assoc, sadb_lifetime_t *hard,
4135     sadb_lifetime_t *soft, boolean_t outbound)
4136 {
4137 	mutex_enter(&assoc->ipsa_lock);
4138 
4139 	/*
4140 	 * XXX RFC 2367 mentions how an SADB_EXT_LIFETIME_CURRENT can be
4141 	 * passed in during an update message.	We currently don't handle
4142 	 * these.
4143 	 */
4144 
4145 	if (hard != NULL) {
4146 		if (hard->sadb_lifetime_bytes != 0)
4147 			assoc->ipsa_hardbyteslt = hard->sadb_lifetime_bytes;
4148 		if (hard->sadb_lifetime_usetime != 0)
4149 			assoc->ipsa_harduselt = hard->sadb_lifetime_usetime;
4150 		if (hard->sadb_lifetime_addtime != 0)
4151 			assoc->ipsa_hardaddlt = hard->sadb_lifetime_addtime;
4152 		if (assoc->ipsa_hardaddlt != 0) {
4153 			assoc->ipsa_hardexpiretime =
4154 			    assoc->ipsa_addtime + assoc->ipsa_hardaddlt;
4155 		}
4156 		if (assoc->ipsa_harduselt != 0 &&
4157 		    assoc->ipsa_flags & IPSA_F_USED) {
4158 			UPDATE_EXPIRE(assoc, harduselt, hardexpiretime);
4159 		}
4160 		if (hard->sadb_lifetime_allocations != 0)
4161 			assoc->ipsa_hardalloc = hard->sadb_lifetime_allocations;
4162 	}
4163 
4164 	if (soft != NULL) {
4165 		if (soft->sadb_lifetime_bytes != 0) {
4166 			if (soft->sadb_lifetime_bytes >
4167 			    assoc->ipsa_hardbyteslt) {
4168 				assoc->ipsa_softbyteslt =
4169 				    assoc->ipsa_hardbyteslt;
4170 			} else {
4171 				assoc->ipsa_softbyteslt =
4172 				    soft->sadb_lifetime_bytes;
4173 			}
4174 		}
4175 		if (soft->sadb_lifetime_usetime != 0) {
4176 			if (soft->sadb_lifetime_usetime >
4177 			    assoc->ipsa_harduselt) {
4178 				assoc->ipsa_softuselt =
4179 				    assoc->ipsa_harduselt;
4180 			} else {
4181 				assoc->ipsa_softuselt =
4182 				    soft->sadb_lifetime_usetime;
4183 			}
4184 		}
4185 		if (soft->sadb_lifetime_addtime != 0) {
4186 			if (soft->sadb_lifetime_addtime >
4187 			    assoc->ipsa_hardexpiretime) {
4188 				assoc->ipsa_softexpiretime =
4189 				    assoc->ipsa_hardexpiretime;
4190 			} else {
4191 				assoc->ipsa_softaddlt =
4192 				    soft->sadb_lifetime_addtime;
4193 			}
4194 		}
4195 		if (assoc->ipsa_softaddlt != 0) {
4196 			assoc->ipsa_softexpiretime =
4197 			    assoc->ipsa_addtime + assoc->ipsa_softaddlt;
4198 		}
4199 		if (assoc->ipsa_softuselt != 0 &&
4200 		    assoc->ipsa_flags & IPSA_F_USED) {
4201 			UPDATE_EXPIRE(assoc, softuselt, softexpiretime);
4202 		}
4203 		if (outbound && assoc->ipsa_softexpiretime != 0) {
4204 			if (assoc->ipsa_state == IPSA_STATE_MATURE)
4205 				lifetime_fuzz(assoc);
4206 		}
4207 
4208 		if (soft->sadb_lifetime_allocations != 0)
4209 			assoc->ipsa_softalloc = soft->sadb_lifetime_allocations;
4210 	}
4211 	mutex_exit(&assoc->ipsa_lock);
4212 }
4213 
4214 /*
4215  * Common code to update an SA.
4216  */
4217 
4218 int
4219 sadb_update_sa(mblk_t *mp, keysock_in_t *ksi,
4220     sadbp_t *spp, int *diagnostic, queue_t *pfkey_q,
4221     int (*add_sa_func)(mblk_t *, keysock_in_t *, int *, netstack_t *),
4222     netstack_t *ns, uint8_t sadb_msg_type)
4223 {
4224 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
4225 	sadb_address_t *srcext =
4226 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
4227 	sadb_address_t *dstext =
4228 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
4229 	sadb_x_kmc_t *kmcext =
4230 	    (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
4231 	sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
4232 	sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
4233 	sadb_lifetime_t *soft =
4234 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
4235 	sadb_lifetime_t *hard =
4236 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
4237 	sadb_x_pair_t *pair_ext =
4238 	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
4239 	ipsa_t *echo_target = NULL;
4240 	int error = 0;
4241 	ipsap_t *ipsapp = NULL;
4242 	uint32_t kmp = 0, kmc = 0;
4243 
4244 
4245 	/* I need certain extensions present for either UPDATE message. */
4246 	if (srcext == NULL) {
4247 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
4248 		return (EINVAL);
4249 	}
4250 	if (dstext == NULL) {
4251 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
4252 		return (EINVAL);
4253 	}
4254 	if (assoc == NULL) {
4255 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
4256 		return (EINVAL);
4257 	}
4258 
4259 	if (kmcext != NULL) {
4260 		kmp = kmcext->sadb_x_kmc_proto;
4261 		kmc = kmcext->sadb_x_kmc_cookie;
4262 	}
4263 
4264 	ipsapp = get_ipsa_pair(assoc, srcext, dstext, spp);
4265 	if (ipsapp == NULL) {
4266 		*diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND;
4267 		return (ESRCH);
4268 	}
4269 
4270 	if (ipsapp->ipsap_psa_ptr == NULL && ipsapp->ipsap_sa_ptr != NULL) {
4271 		if (ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) {
4272 			/*
4273 			 * REFRELE the target and let the add_sa_func()
4274 			 * deal with updating a larval SA.
4275 			 */
4276 			destroy_ipsa_pair(ipsapp);
4277 			return (add_sa_func(mp, ksi, diagnostic, ns));
4278 		}
4279 	}
4280 
4281 	/*
4282 	 * Reality checks for updates of active associations.
4283 	 * Sundry first-pass UPDATE-specific reality checks.
4284 	 * Have to do the checks here, because it's after the add_sa code.
4285 	 * XXX STATS : logging/stats here?
4286 	 */
4287 
4288 	if (assoc->sadb_sa_state != SADB_SASTATE_MATURE) {
4289 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4290 		error = EINVAL;
4291 		goto bail;
4292 	}
4293 
4294 	if (assoc->sadb_sa_flags & ~spp->s_updateflags) {
4295 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
4296 		error = EINVAL;
4297 		goto bail;
4298 	}
4299 
4300 	if (ksi->ks_in_extv[SADB_EXT_LIFETIME_CURRENT] != NULL) {
4301 		error = EOPNOTSUPP;
4302 		goto bail;
4303 	}
4304 	if ((*diagnostic = sadb_hardsoftchk(hard, soft)) != 0) {
4305 		error = EINVAL;
4306 		goto bail;
4307 	}
4308 	if (akey != NULL) {
4309 		*diagnostic = SADB_X_DIAGNOSTIC_AKEY_PRESENT;
4310 		error = EINVAL;
4311 		goto bail;
4312 	}
4313 	if (ekey != NULL) {
4314 		*diagnostic = SADB_X_DIAGNOSTIC_EKEY_PRESENT;
4315 		error = EINVAL;
4316 		goto bail;
4317 	}
4318 
4319 	if (ipsapp->ipsap_sa_ptr != NULL) {
4320 		if (ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_DEAD) {
4321 			error = ESRCH;	/* DEAD == Not there, in this case. */
4322 			*diagnostic = SADB_X_DIAGNOSTIC_SA_EXPIRED;
4323 			goto bail;
4324 		}
4325 		if ((kmp != 0) &&
4326 		    ((ipsapp->ipsap_sa_ptr->ipsa_kmp != 0) ||
4327 		    (ipsapp->ipsap_sa_ptr->ipsa_kmp != kmp))) {
4328 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMP;
4329 			error = EINVAL;
4330 			goto bail;
4331 		}
4332 		if ((kmc != 0) &&
4333 		    ((ipsapp->ipsap_sa_ptr->ipsa_kmc != 0) ||
4334 		    (ipsapp->ipsap_sa_ptr->ipsa_kmc != kmc))) {
4335 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMC;
4336 			error = EINVAL;
4337 			goto bail;
4338 		}
4339 	}
4340 
4341 	if (ipsapp->ipsap_psa_ptr != NULL) {
4342 		if (ipsapp->ipsap_psa_ptr->ipsa_state == IPSA_STATE_DEAD) {
4343 			*diagnostic = SADB_X_DIAGNOSTIC_SA_EXPIRED;
4344 			error = ESRCH;	/* DEAD == Not there, in this case. */
4345 			goto bail;
4346 		}
4347 		if ((kmp != 0) &&
4348 		    ((ipsapp->ipsap_psa_ptr->ipsa_kmp != 0) ||
4349 		    (ipsapp->ipsap_psa_ptr->ipsa_kmp != kmp))) {
4350 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMP;
4351 			error = EINVAL;
4352 			goto bail;
4353 		}
4354 		if ((kmc != 0) &&
4355 		    ((ipsapp->ipsap_psa_ptr->ipsa_kmc != 0) ||
4356 		    (ipsapp->ipsap_psa_ptr->ipsa_kmc != kmc))) {
4357 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMC;
4358 			error = EINVAL;
4359 			goto bail;
4360 		}
4361 	}
4362 
4363 	if (ipsapp->ipsap_sa_ptr != NULL) {
4364 		sadb_update_lifetimes(ipsapp->ipsap_sa_ptr, hard, soft, B_TRUE);
4365 		if (kmp != 0)
4366 			ipsapp->ipsap_sa_ptr->ipsa_kmp = kmp;
4367 		if (kmc != 0)
4368 			ipsapp->ipsap_sa_ptr->ipsa_kmc = kmc;
4369 	}
4370 
4371 	if (sadb_msg_type == SADB_X_UPDATEPAIR) {
4372 		if (ipsapp->ipsap_psa_ptr != NULL) {
4373 			sadb_update_lifetimes(ipsapp->ipsap_psa_ptr, hard, soft,
4374 			    B_FALSE);
4375 			if (kmp != 0)
4376 				ipsapp->ipsap_psa_ptr->ipsa_kmp = kmp;
4377 			if (kmc != 0)
4378 				ipsapp->ipsap_psa_ptr->ipsa_kmc = kmc;
4379 		} else {
4380 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
4381 			error = ESRCH;
4382 			goto bail;
4383 		}
4384 	}
4385 
4386 	if (pair_ext != NULL)
4387 		error = update_pairing(ipsapp, ksi, diagnostic, spp);
4388 
4389 	if (error == 0)
4390 		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
4391 		    ksi, echo_target);
4392 bail:
4393 
4394 	destroy_ipsa_pair(ipsapp);
4395 
4396 	return (error);
4397 }
4398 
4399 
4400 int
4401 update_pairing(ipsap_t *ipsapp, keysock_in_t *ksi, int *diagnostic,
4402     sadbp_t *spp)
4403 {
4404 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
4405 	sadb_address_t *srcext =
4406 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
4407 	sadb_address_t *dstext =
4408 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
4409 	sadb_x_pair_t *pair_ext =
4410 	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
4411 	int error = 0;
4412 	ipsap_t *oipsapp = NULL;
4413 	boolean_t undo_pair = B_FALSE;
4414 	uint32_t ipsa_flags;
4415 
4416 	if (pair_ext->sadb_x_pair_spi == 0 || pair_ext->sadb_x_pair_spi ==
4417 	    assoc->sadb_sa_spi) {
4418 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4419 		return (EINVAL);
4420 	}
4421 
4422 	/*
4423 	 * Assume for now that the spi value provided in the SADB_UPDATE
4424 	 * message was valid, update the SA with its pair spi value.
4425 	 * If the spi turns out to be bogus or the SA no longer exists
4426 	 * then this will be detected when the reverse update is made
4427 	 * below.
4428 	 */
4429 	mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4430 	ipsapp->ipsap_sa_ptr->ipsa_flags |= IPSA_F_PAIRED;
4431 	ipsapp->ipsap_sa_ptr->ipsa_otherspi = pair_ext->sadb_x_pair_spi;
4432 	mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4433 
4434 	/*
4435 	 * After updating the ipsa_otherspi element of the SA, get_ipsa_pair()
4436 	 * should now return pointers to the SA *AND* its pair, if this is not
4437 	 * the case, the "otherspi" either did not exist or was deleted. Also
4438 	 * check that "otherspi" is not already paired. If everything looks
4439 	 * good, complete the update. IPSA_REFRELE the first pair_pointer
4440 	 * after this update to ensure its not deleted until we are done.
4441 	 */
4442 	oipsapp = get_ipsa_pair(assoc, srcext, dstext, spp);
4443 	if (oipsapp == NULL) {
4444 		/*
4445 		 * This should never happen, calling function still has
4446 		 * IPSA_REFHELD on the SA we just updated.
4447 		 */
4448 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
4449 		return (EINVAL);
4450 	}
4451 
4452 	if (oipsapp->ipsap_psa_ptr == NULL) {
4453 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4454 		undo_pair = B_TRUE;
4455 	} else {
4456 		ipsa_flags = oipsapp->ipsap_psa_ptr->ipsa_flags;
4457 		if (oipsapp->ipsap_psa_ptr->ipsa_state > IPSA_STATE_MATURE) {
4458 			/* Its dead Jim! */
4459 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4460 			undo_pair = B_TRUE;
4461 		} else if ((ipsa_flags & (IPSA_F_OUTBOUND | IPSA_F_INBOUND)) ==
4462 		    (IPSA_F_OUTBOUND | IPSA_F_INBOUND)) {
4463 			/* This SA is in both hashtables. */
4464 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4465 			undo_pair = B_TRUE;
4466 		} else if (ipsa_flags & IPSA_F_PAIRED) {
4467 			/* This SA is already paired with another. */
4468 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_ALREADY;
4469 			undo_pair = B_TRUE;
4470 		}
4471 	}
4472 
4473 	if (undo_pair) {
4474 		/* The pair SA does not exist. */
4475 		mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4476 		ipsapp->ipsap_sa_ptr->ipsa_flags &= ~IPSA_F_PAIRED;
4477 		ipsapp->ipsap_sa_ptr->ipsa_otherspi = 0;
4478 		mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4479 		error = EINVAL;
4480 	} else {
4481 		mutex_enter(&oipsapp->ipsap_psa_ptr->ipsa_lock);
4482 		oipsapp->ipsap_psa_ptr->ipsa_otherspi = assoc->sadb_sa_spi;
4483 		oipsapp->ipsap_psa_ptr->ipsa_flags |= IPSA_F_PAIRED;
4484 		mutex_exit(&oipsapp->ipsap_psa_ptr->ipsa_lock);
4485 	}
4486 
4487 	destroy_ipsa_pair(oipsapp);
4488 	return (error);
4489 }
4490 
4491 /*
4492  * The following functions deal with ACQUIRE LISTS.  An ACQUIRE list is
4493  * a list of outstanding SADB_ACQUIRE messages.	 If ipsec_getassocbyconn() fails
4494  * for an outbound datagram, that datagram is queued up on an ACQUIRE record,
4495  * and an SADB_ACQUIRE message is sent up.  Presumably, a user-space key
4496  * management daemon will process the ACQUIRE, use a SADB_GETSPI to reserve
4497  * an SPI value and a larval SA, then SADB_UPDATE the larval SA, and ADD the
4498  * other direction's SA.
4499  */
4500 
4501 /*
4502  * Check the ACQUIRE lists.  If there's an existing ACQUIRE record,
4503  * grab it, lock it, and return it.  Otherwise return NULL.
4504  */
4505 static ipsacq_t *
4506 sadb_checkacquire(iacqf_t *bucket, ipsec_action_t *ap, ipsec_policy_t *pp,
4507     uint32_t *src, uint32_t *dst, uint32_t *isrc, uint32_t *idst,
4508     uint64_t unique_id)
4509 {
4510 	ipsacq_t *walker;
4511 	sa_family_t fam;
4512 	uint32_t blank_address[4] = {0, 0, 0, 0};
4513 
4514 	if (isrc == NULL) {
4515 		ASSERT(idst == NULL);
4516 		isrc = idst = blank_address;
4517 	}
4518 
4519 	/*
4520 	 * Scan list for duplicates.  Check for UNIQUE, src/dest, policy.
4521 	 *
4522 	 * XXX May need search for duplicates based on other things too!
4523 	 */
4524 	for (walker = bucket->iacqf_ipsacq; walker != NULL;
4525 	    walker = walker->ipsacq_next) {
4526 		mutex_enter(&walker->ipsacq_lock);
4527 		fam = walker->ipsacq_addrfam;
4528 		if (IPSA_ARE_ADDR_EQUAL(dst, walker->ipsacq_dstaddr, fam) &&
4529 		    IPSA_ARE_ADDR_EQUAL(src, walker->ipsacq_srcaddr, fam) &&
4530 		    ip_addr_match((uint8_t *)isrc, walker->ipsacq_innersrcpfx,
4531 		    (in6_addr_t *)walker->ipsacq_innersrc) &&
4532 		    ip_addr_match((uint8_t *)idst, walker->ipsacq_innerdstpfx,
4533 		    (in6_addr_t *)walker->ipsacq_innerdst) &&
4534 		    (ap == walker->ipsacq_act) &&
4535 		    (pp == walker->ipsacq_policy) &&
4536 		    /* XXX do deep compares of ap/pp? */
4537 		    (unique_id == walker->ipsacq_unique_id))
4538 			break;			/* everything matched */
4539 		mutex_exit(&walker->ipsacq_lock);
4540 	}
4541 
4542 	return (walker);
4543 }
4544 
4545 /*
4546  * For this mblk, insert a new acquire record.  Assume bucket contains addrs
4547  * of all of the same length.  Give up (and drop) if memory
4548  * cannot be allocated for a new one; otherwise, invoke callback to
4549  * send the acquire up..
4550  *
4551  * In cases where we need both AH and ESP, add the SA to the ESP ACQUIRE
4552  * list.  The ah_add_sa_finish() routines can look at the packet's ipsec_out_t
4553  * and handle this case specially.
4554  */
4555 void
4556 sadb_acquire(mblk_t *mp, ipsec_out_t *io, boolean_t need_ah, boolean_t need_esp)
4557 {
4558 	sadbp_t *spp;
4559 	sadb_t *sp;
4560 	ipsacq_t *newbie;
4561 	iacqf_t *bucket;
4562 	mblk_t *datamp = mp->b_cont;
4563 	mblk_t *extended;
4564 	ipha_t *ipha = (ipha_t *)datamp->b_rptr;
4565 	ip6_t *ip6h = (ip6_t *)datamp->b_rptr;
4566 	uint32_t *src, *dst, *isrc, *idst;
4567 	ipsec_policy_t *pp = io->ipsec_out_policy;
4568 	ipsec_action_t *ap = io->ipsec_out_act;
4569 	sa_family_t af;
4570 	int hashoffset;
4571 	uint32_t seq;
4572 	uint64_t unique_id = 0;
4573 	ipsec_selector_t sel;
4574 	boolean_t tunnel_mode = io->ipsec_out_tunnel;
4575 	netstack_t	*ns = io->ipsec_out_ns;
4576 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
4577 
4578 	ASSERT((pp != NULL) || (ap != NULL));
4579 
4580 	ASSERT(need_ah != NULL || need_esp != NULL);
4581 	/* Assign sadb pointers */
4582 	if (need_esp) { /* ESP for AH+ESP */
4583 		ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
4584 
4585 		spp = &espstack->esp_sadb;
4586 	} else {
4587 		ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
4588 
4589 		spp = &ahstack->ah_sadb;
4590 	}
4591 	sp = io->ipsec_out_v4 ? &spp->s_v4 : &spp->s_v6;
4592 
4593 	if (ap == NULL)
4594 		ap = pp->ipsp_act;
4595 
4596 	ASSERT(ap != NULL);
4597 
4598 	if (ap->ipa_act.ipa_apply.ipp_use_unique || tunnel_mode)
4599 		unique_id = SA_FORM_UNIQUE_ID(io);
4600 
4601 	/*
4602 	 * Set up an ACQUIRE record.
4603 	 *
4604 	 * Immediately, make sure the ACQUIRE sequence number doesn't slip
4605 	 * below the lowest point allowed in the kernel.  (In other words,
4606 	 * make sure the high bit on the sequence number is set.)
4607 	 */
4608 
4609 	seq = keysock_next_seq(ns) | IACQF_LOWEST_SEQ;
4610 
4611 	if (IPH_HDR_VERSION(ipha) == IP_VERSION) {
4612 		src = (uint32_t *)&ipha->ipha_src;
4613 		dst = (uint32_t *)&ipha->ipha_dst;
4614 		af = AF_INET;
4615 		hashoffset = OUTBOUND_HASH_V4(sp, ipha->ipha_dst);
4616 		ASSERT(io->ipsec_out_v4 == B_TRUE);
4617 	} else {
4618 		ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
4619 		src = (uint32_t *)&ip6h->ip6_src;
4620 		dst = (uint32_t *)&ip6h->ip6_dst;
4621 		af = AF_INET6;
4622 		hashoffset = OUTBOUND_HASH_V6(sp, ip6h->ip6_dst);
4623 		ASSERT(io->ipsec_out_v4 == B_FALSE);
4624 	}
4625 
4626 	if (tunnel_mode) {
4627 		/* Snag inner addresses. */
4628 		isrc = io->ipsec_out_insrc;
4629 		idst = io->ipsec_out_indst;
4630 	} else {
4631 		isrc = idst = NULL;
4632 	}
4633 
4634 	/*
4635 	 * Check buckets to see if there is an existing entry.  If so,
4636 	 * grab it.  sadb_checkacquire locks newbie if found.
4637 	 */
4638 	bucket = &(sp->sdb_acq[hashoffset]);
4639 	mutex_enter(&bucket->iacqf_lock);
4640 	newbie = sadb_checkacquire(bucket, ap, pp, src, dst, isrc, idst,
4641 	    unique_id);
4642 
4643 	if (newbie == NULL) {
4644 		/*
4645 		 * Otherwise, allocate a new one.
4646 		 */
4647 		newbie = kmem_zalloc(sizeof (*newbie), KM_NOSLEEP);
4648 		if (newbie == NULL) {
4649 			mutex_exit(&bucket->iacqf_lock);
4650 			ip_drop_packet(mp, B_FALSE, NULL, NULL,
4651 			    DROPPER(ipss, ipds_sadb_acquire_nomem),
4652 			    &ipss->ipsec_sadb_dropper);
4653 			return;
4654 		}
4655 		newbie->ipsacq_policy = pp;
4656 		if (pp != NULL) {
4657 			IPPOL_REFHOLD(pp);
4658 		}
4659 		IPACT_REFHOLD(ap);
4660 		newbie->ipsacq_act = ap;
4661 		newbie->ipsacq_linklock = &bucket->iacqf_lock;
4662 		newbie->ipsacq_next = bucket->iacqf_ipsacq;
4663 		newbie->ipsacq_ptpn = &bucket->iacqf_ipsacq;
4664 		if (newbie->ipsacq_next != NULL)
4665 			newbie->ipsacq_next->ipsacq_ptpn = &newbie->ipsacq_next;
4666 		bucket->iacqf_ipsacq = newbie;
4667 		mutex_init(&newbie->ipsacq_lock, NULL, MUTEX_DEFAULT, NULL);
4668 		mutex_enter(&newbie->ipsacq_lock);
4669 	}
4670 
4671 	mutex_exit(&bucket->iacqf_lock);
4672 
4673 	/*
4674 	 * This assert looks silly for now, but we may need to enter newbie's
4675 	 * mutex during a search.
4676 	 */
4677 	ASSERT(MUTEX_HELD(&newbie->ipsacq_lock));
4678 
4679 	mp->b_next = NULL;
4680 	/* Queue up packet.  Use b_next. */
4681 	if (newbie->ipsacq_numpackets == 0) {
4682 		/* First one. */
4683 		newbie->ipsacq_mp = mp;
4684 		newbie->ipsacq_numpackets = 1;
4685 		newbie->ipsacq_expire = gethrestime_sec();
4686 		/*
4687 		 * Extended ACQUIRE with both AH+ESP will use ESP's timeout
4688 		 * value.
4689 		 */
4690 		newbie->ipsacq_expire += *spp->s_acquire_timeout;
4691 		newbie->ipsacq_seq = seq;
4692 		newbie->ipsacq_addrfam = af;
4693 
4694 		newbie->ipsacq_srcport = io->ipsec_out_src_port;
4695 		newbie->ipsacq_dstport = io->ipsec_out_dst_port;
4696 		newbie->ipsacq_icmp_type = io->ipsec_out_icmp_type;
4697 		newbie->ipsacq_icmp_code = io->ipsec_out_icmp_code;
4698 		if (tunnel_mode) {
4699 			newbie->ipsacq_inneraddrfam = io->ipsec_out_inaf;
4700 			newbie->ipsacq_proto = io->ipsec_out_inaf == AF_INET6 ?
4701 			    IPPROTO_IPV6 : IPPROTO_ENCAP;
4702 			newbie->ipsacq_innersrcpfx = io->ipsec_out_insrcpfx;
4703 			newbie->ipsacq_innerdstpfx = io->ipsec_out_indstpfx;
4704 			IPSA_COPY_ADDR(newbie->ipsacq_innersrc,
4705 			    io->ipsec_out_insrc, io->ipsec_out_inaf);
4706 			IPSA_COPY_ADDR(newbie->ipsacq_innerdst,
4707 			    io->ipsec_out_indst, io->ipsec_out_inaf);
4708 		} else {
4709 			newbie->ipsacq_proto = io->ipsec_out_proto;
4710 		}
4711 		newbie->ipsacq_unique_id = unique_id;
4712 	} else {
4713 		/* Scan to the end of the list & insert. */
4714 		mblk_t *lastone = newbie->ipsacq_mp;
4715 
4716 		while (lastone->b_next != NULL)
4717 			lastone = lastone->b_next;
4718 		lastone->b_next = mp;
4719 		if (newbie->ipsacq_numpackets++ == ipsacq_maxpackets) {
4720 			newbie->ipsacq_numpackets = ipsacq_maxpackets;
4721 			lastone = newbie->ipsacq_mp;
4722 			newbie->ipsacq_mp = lastone->b_next;
4723 			lastone->b_next = NULL;
4724 			ip_drop_packet(lastone, B_FALSE, NULL, NULL,
4725 			    DROPPER(ipss, ipds_sadb_acquire_toofull),
4726 			    &ipss->ipsec_sadb_dropper);
4727 		} else {
4728 			IP_ACQUIRE_STAT(ipss, qhiwater,
4729 			    newbie->ipsacq_numpackets);
4730 		}
4731 	}
4732 
4733 	/*
4734 	 * Reset addresses.  Set them to the most recently added mblk chain,
4735 	 * so that the address pointers in the acquire record will point
4736 	 * at an mblk still attached to the acquire list.
4737 	 */
4738 
4739 	newbie->ipsacq_srcaddr = src;
4740 	newbie->ipsacq_dstaddr = dst;
4741 
4742 	/*
4743 	 * If the acquire record has more than one queued packet, we've
4744 	 * already sent an ACQUIRE, and don't need to repeat ourself.
4745 	 */
4746 	if (newbie->ipsacq_seq != seq || newbie->ipsacq_numpackets > 1) {
4747 		/* I have an acquire outstanding already! */
4748 		mutex_exit(&newbie->ipsacq_lock);
4749 		return;
4750 	}
4751 
4752 	if (keysock_extended_reg(ns)) {
4753 		/*
4754 		 * Construct an extended ACQUIRE.  There are logging
4755 		 * opportunities here in failure cases.
4756 		 */
4757 
4758 		(void) memset(&sel, 0, sizeof (sel));
4759 		sel.ips_isv4 = io->ipsec_out_v4;
4760 		if (tunnel_mode) {
4761 			sel.ips_protocol = (io->ipsec_out_inaf == AF_INET) ?
4762 			    IPPROTO_ENCAP : IPPROTO_IPV6;
4763 		} else {
4764 			sel.ips_protocol = io->ipsec_out_proto;
4765 			sel.ips_local_port = io->ipsec_out_src_port;
4766 			sel.ips_remote_port = io->ipsec_out_dst_port;
4767 		}
4768 		sel.ips_icmp_type = io->ipsec_out_icmp_type;
4769 		sel.ips_icmp_code = io->ipsec_out_icmp_code;
4770 		sel.ips_is_icmp_inv_acq = 0;
4771 		if (af == AF_INET) {
4772 			sel.ips_local_addr_v4 = ipha->ipha_src;
4773 			sel.ips_remote_addr_v4 = ipha->ipha_dst;
4774 		} else {
4775 			sel.ips_local_addr_v6 = ip6h->ip6_src;
4776 			sel.ips_remote_addr_v6 = ip6h->ip6_dst;
4777 		}
4778 
4779 		extended = sadb_keysock_out(0);
4780 		if (extended != NULL) {
4781 			extended->b_cont = sadb_extended_acquire(&sel, pp, ap,
4782 			    tunnel_mode, seq, 0, ns);
4783 			if (extended->b_cont == NULL) {
4784 				freeb(extended);
4785 				extended = NULL;
4786 			}
4787 		}
4788 	} else
4789 		extended = NULL;
4790 
4791 	/*
4792 	 * Send an ACQUIRE message (and possible an extended ACQUIRE) based on
4793 	 * this new record.  The send-acquire callback assumes that acqrec is
4794 	 * already locked.
4795 	 */
4796 	(*spp->s_acqfn)(newbie, extended, ns);
4797 }
4798 
4799 /*
4800  * Unlink and free an acquire record.
4801  */
4802 void
4803 sadb_destroy_acquire(ipsacq_t *acqrec, netstack_t *ns)
4804 {
4805 	mblk_t *mp;
4806 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
4807 
4808 	ASSERT(MUTEX_HELD(acqrec->ipsacq_linklock));
4809 
4810 	if (acqrec->ipsacq_policy != NULL) {
4811 		IPPOL_REFRELE(acqrec->ipsacq_policy, ns);
4812 	}
4813 	if (acqrec->ipsacq_act != NULL) {
4814 		IPACT_REFRELE(acqrec->ipsacq_act);
4815 	}
4816 
4817 	/* Unlink */
4818 	*(acqrec->ipsacq_ptpn) = acqrec->ipsacq_next;
4819 	if (acqrec->ipsacq_next != NULL)
4820 		acqrec->ipsacq_next->ipsacq_ptpn = acqrec->ipsacq_ptpn;
4821 
4822 	/*
4823 	 * Free hanging mp's.
4824 	 *
4825 	 * XXX Instead of freemsg(), perhaps use IPSEC_REQ_FAILED.
4826 	 */
4827 
4828 	mutex_enter(&acqrec->ipsacq_lock);
4829 	while (acqrec->ipsacq_mp != NULL) {
4830 		mp = acqrec->ipsacq_mp;
4831 		acqrec->ipsacq_mp = mp->b_next;
4832 		mp->b_next = NULL;
4833 		ip_drop_packet(mp, B_FALSE, NULL, NULL,
4834 		    DROPPER(ipss, ipds_sadb_acquire_timeout),
4835 		    &ipss->ipsec_sadb_dropper);
4836 	}
4837 	mutex_exit(&acqrec->ipsacq_lock);
4838 
4839 	/* Free */
4840 	mutex_destroy(&acqrec->ipsacq_lock);
4841 	kmem_free(acqrec, sizeof (*acqrec));
4842 }
4843 
4844 /*
4845  * Destroy an acquire list fanout.
4846  */
4847 static void
4848 sadb_destroy_acqlist(iacqf_t **listp, uint_t numentries, boolean_t forever,
4849     netstack_t *ns)
4850 {
4851 	int i;
4852 	iacqf_t *list = *listp;
4853 
4854 	if (list == NULL)
4855 		return;
4856 
4857 	for (i = 0; i < numentries; i++) {
4858 		mutex_enter(&(list[i].iacqf_lock));
4859 		while (list[i].iacqf_ipsacq != NULL)
4860 			sadb_destroy_acquire(list[i].iacqf_ipsacq, ns);
4861 		mutex_exit(&(list[i].iacqf_lock));
4862 		if (forever)
4863 			mutex_destroy(&(list[i].iacqf_lock));
4864 	}
4865 
4866 	if (forever) {
4867 		*listp = NULL;
4868 		kmem_free(list, numentries * sizeof (*list));
4869 	}
4870 }
4871 
4872 /*
4873  * Create an algorithm descriptor for an extended ACQUIRE.  Filter crypto
4874  * framework's view of reality vs. IPsec's.  EF's wins, BTW.
4875  */
4876 static uint8_t *
4877 sadb_new_algdesc(uint8_t *start, uint8_t *limit,
4878     sadb_x_ecomb_t *ecomb, uint8_t satype, uint8_t algtype,
4879     uint8_t alg, uint16_t minbits, uint16_t maxbits, ipsec_stack_t *ipss)
4880 {
4881 	uint8_t *cur = start;
4882 	ipsec_alginfo_t *algp;
4883 	sadb_x_algdesc_t *algdesc = (sadb_x_algdesc_t *)cur;
4884 
4885 	cur += sizeof (*algdesc);
4886 	if (cur >= limit)
4887 		return (NULL);
4888 
4889 	ecomb->sadb_x_ecomb_numalgs++;
4890 
4891 	/*
4892 	 * Normalize vs. crypto framework's limits.  This way, you can specify
4893 	 * a stronger policy, and when the framework loads a stronger version,
4894 	 * you can just keep plowing w/o rewhacking your SPD.
4895 	 */
4896 	mutex_enter(&ipss->ipsec_alg_lock);
4897 	algp = ipss->ipsec_alglists[(algtype == SADB_X_ALGTYPE_AUTH) ?
4898 	    IPSEC_ALG_AUTH : IPSEC_ALG_ENCR][alg];
4899 	if (algp == NULL) {
4900 		mutex_exit(&ipss->ipsec_alg_lock);
4901 		return (NULL);	/* Algorithm doesn't exist.  Fail gracefully. */
4902 	}
4903 	if (minbits < algp->alg_ef_minbits)
4904 		minbits = algp->alg_ef_minbits;
4905 	if (maxbits > algp->alg_ef_maxbits)
4906 		maxbits = algp->alg_ef_maxbits;
4907 	mutex_exit(&ipss->ipsec_alg_lock);
4908 
4909 	algdesc->sadb_x_algdesc_satype = satype;
4910 	algdesc->sadb_x_algdesc_algtype = algtype;
4911 	algdesc->sadb_x_algdesc_alg = alg;
4912 	algdesc->sadb_x_algdesc_minbits = minbits;
4913 	algdesc->sadb_x_algdesc_maxbits = maxbits;
4914 	algdesc->sadb_x_algdesc_reserved = 0;
4915 	return (cur);
4916 }
4917 
4918 /*
4919  * Convert the given ipsec_action_t into an ecomb starting at *ecomb
4920  * which must fit before *limit
4921  *
4922  * return NULL if we ran out of room or a pointer to the end of the ecomb.
4923  */
4924 static uint8_t *
4925 sadb_action_to_ecomb(uint8_t *start, uint8_t *limit, ipsec_action_t *act,
4926     netstack_t *ns)
4927 {
4928 	uint8_t *cur = start;
4929 	sadb_x_ecomb_t *ecomb = (sadb_x_ecomb_t *)cur;
4930 	ipsec_prot_t *ipp;
4931 	ipsec_stack_t *ipss = ns->netstack_ipsec;
4932 
4933 	cur += sizeof (*ecomb);
4934 	if (cur >= limit)
4935 		return (NULL);
4936 
4937 	ASSERT(act->ipa_act.ipa_type == IPSEC_ACT_APPLY);
4938 
4939 	ipp = &act->ipa_act.ipa_apply;
4940 
4941 	ecomb->sadb_x_ecomb_numalgs = 0;
4942 	ecomb->sadb_x_ecomb_reserved = 0;
4943 	ecomb->sadb_x_ecomb_reserved2 = 0;
4944 	/*
4945 	 * No limits on allocations, since we really don't support that
4946 	 * concept currently.
4947 	 */
4948 	ecomb->sadb_x_ecomb_soft_allocations = 0;
4949 	ecomb->sadb_x_ecomb_hard_allocations = 0;
4950 
4951 	/*
4952 	 * XXX TBD: Policy or global parameters will eventually be
4953 	 * able to fill in some of these.
4954 	 */
4955 	ecomb->sadb_x_ecomb_flags = 0;
4956 	ecomb->sadb_x_ecomb_soft_bytes = 0;
4957 	ecomb->sadb_x_ecomb_hard_bytes = 0;
4958 	ecomb->sadb_x_ecomb_soft_addtime = 0;
4959 	ecomb->sadb_x_ecomb_hard_addtime = 0;
4960 	ecomb->sadb_x_ecomb_soft_usetime = 0;
4961 	ecomb->sadb_x_ecomb_hard_usetime = 0;
4962 
4963 	if (ipp->ipp_use_ah) {
4964 		cur = sadb_new_algdesc(cur, limit, ecomb,
4965 		    SADB_SATYPE_AH, SADB_X_ALGTYPE_AUTH, ipp->ipp_auth_alg,
4966 		    ipp->ipp_ah_minbits, ipp->ipp_ah_maxbits, ipss);
4967 		if (cur == NULL)
4968 			return (NULL);
4969 		ipsecah_fill_defs(ecomb, ns);
4970 	}
4971 
4972 	if (ipp->ipp_use_esp) {
4973 		if (ipp->ipp_use_espa) {
4974 			cur = sadb_new_algdesc(cur, limit, ecomb,
4975 			    SADB_SATYPE_ESP, SADB_X_ALGTYPE_AUTH,
4976 			    ipp->ipp_esp_auth_alg,
4977 			    ipp->ipp_espa_minbits,
4978 			    ipp->ipp_espa_maxbits, ipss);
4979 			if (cur == NULL)
4980 				return (NULL);
4981 		}
4982 
4983 		cur = sadb_new_algdesc(cur, limit, ecomb,
4984 		    SADB_SATYPE_ESP, SADB_X_ALGTYPE_CRYPT,
4985 		    ipp->ipp_encr_alg,
4986 		    ipp->ipp_espe_minbits,
4987 		    ipp->ipp_espe_maxbits, ipss);
4988 		if (cur == NULL)
4989 			return (NULL);
4990 		/* Fill in lifetimes if and only if AH didn't already... */
4991 		if (!ipp->ipp_use_ah)
4992 			ipsecesp_fill_defs(ecomb, ns);
4993 	}
4994 
4995 	return (cur);
4996 }
4997 
4998 /*
4999  * Construct an extended ACQUIRE message based on a selector and the resulting
5000  * IPsec action.
5001  *
5002  * NOTE: This is used by both inverse ACQUIRE and actual ACQUIRE
5003  * generation. As a consequence, expect this function to evolve
5004  * rapidly.
5005  */
5006 static mblk_t *
5007 sadb_extended_acquire(ipsec_selector_t *sel, ipsec_policy_t *pol,
5008     ipsec_action_t *act, boolean_t tunnel_mode, uint32_t seq, uint32_t pid,
5009     netstack_t *ns)
5010 {
5011 	mblk_t *mp;
5012 	sadb_msg_t *samsg;
5013 	uint8_t *start, *cur, *end;
5014 	uint32_t *saddrptr, *daddrptr;
5015 	sa_family_t af;
5016 	sadb_prop_t *eprop;
5017 	ipsec_action_t *ap, *an;
5018 	ipsec_selkey_t *ipsl;
5019 	uint8_t proto, pfxlen;
5020 	uint16_t lport, rport;
5021 	uint32_t kmp, kmc;
5022 
5023 	/*
5024 	 * Find the action we want sooner rather than later..
5025 	 */
5026 	an = NULL;
5027 	if (pol == NULL) {
5028 		ap = act;
5029 	} else {
5030 		ap = pol->ipsp_act;
5031 
5032 		if (ap != NULL)
5033 			an = ap->ipa_next;
5034 	}
5035 
5036 	/*
5037 	 * Just take a swag for the allocation for now.	 We can always
5038 	 * alter it later.
5039 	 */
5040 #define	SADB_EXTENDED_ACQUIRE_SIZE	4096
5041 	mp = allocb(SADB_EXTENDED_ACQUIRE_SIZE, BPRI_HI);
5042 	if (mp == NULL)
5043 		return (NULL);
5044 
5045 	start = mp->b_rptr;
5046 	end = start + SADB_EXTENDED_ACQUIRE_SIZE;
5047 
5048 	cur = start;
5049 
5050 	samsg = (sadb_msg_t *)cur;
5051 	cur += sizeof (*samsg);
5052 
5053 	samsg->sadb_msg_version = PF_KEY_V2;
5054 	samsg->sadb_msg_type = SADB_ACQUIRE;
5055 	samsg->sadb_msg_errno = 0;
5056 	samsg->sadb_msg_reserved = 0;
5057 	samsg->sadb_msg_satype = 0;
5058 	samsg->sadb_msg_seq = seq;
5059 	samsg->sadb_msg_pid = pid;
5060 
5061 	if (tunnel_mode) {
5062 		/*
5063 		 * Form inner address extensions based NOT on the inner
5064 		 * selectors (i.e. the packet data), but on the policy's
5065 		 * selector key (i.e. the policy's selector information).
5066 		 *
5067 		 * NOTE:  The position of IPv4 and IPv6 addresses is the
5068 		 * same in ipsec_selkey_t (unless the compiler does very
5069 		 * strange things with unions, consult your local C language
5070 		 * lawyer for details).
5071 		 */
5072 		ipsl = &(pol->ipsp_sel->ipsl_key);
5073 		if (ipsl->ipsl_valid & IPSL_IPV4) {
5074 			af = AF_INET;
5075 			ASSERT(sel->ips_protocol == IPPROTO_ENCAP);
5076 			ASSERT(!(ipsl->ipsl_valid & IPSL_IPV6));
5077 		} else {
5078 			af = AF_INET6;
5079 			ASSERT(sel->ips_protocol == IPPROTO_IPV6);
5080 			ASSERT(ipsl->ipsl_valid & IPSL_IPV6);
5081 		}
5082 
5083 		if (ipsl->ipsl_valid & IPSL_LOCAL_ADDR) {
5084 			saddrptr = (uint32_t *)(&ipsl->ipsl_local);
5085 			pfxlen = ipsl->ipsl_local_pfxlen;
5086 		} else {
5087 			saddrptr = (uint32_t *)(&ipv6_all_zeros);
5088 			pfxlen = 0;
5089 		}
5090 		/* XXX What about ICMP type/code? */
5091 		lport = (ipsl->ipsl_valid & IPSL_LOCAL_PORT) ?
5092 		    ipsl->ipsl_lport : 0;
5093 		proto = (ipsl->ipsl_valid & IPSL_PROTOCOL) ?
5094 		    ipsl->ipsl_proto : 0;
5095 
5096 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
5097 		    af, saddrptr, lport, proto, pfxlen);
5098 		if (cur == NULL) {
5099 			freeb(mp);
5100 			return (NULL);
5101 		}
5102 
5103 		if (ipsl->ipsl_valid & IPSL_REMOTE_ADDR) {
5104 			daddrptr = (uint32_t *)(&ipsl->ipsl_remote);
5105 			pfxlen = ipsl->ipsl_remote_pfxlen;
5106 		} else {
5107 			daddrptr = (uint32_t *)(&ipv6_all_zeros);
5108 			pfxlen = 0;
5109 		}
5110 		/* XXX What about ICMP type/code? */
5111 		rport = (ipsl->ipsl_valid & IPSL_REMOTE_PORT) ?
5112 		    ipsl->ipsl_rport : 0;
5113 
5114 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
5115 		    af, daddrptr, rport, proto, pfxlen);
5116 		if (cur == NULL) {
5117 			freeb(mp);
5118 			return (NULL);
5119 		}
5120 		/*
5121 		 * TODO  - if we go to 3408's dream of transport mode IP-in-IP
5122 		 * _with_ inner-packet address selectors, we'll need to further
5123 		 * distinguish tunnel mode here.  For now, having inner
5124 		 * addresses and/or ports is sufficient.
5125 		 *
5126 		 * Meanwhile, whack proto/ports to reflect IP-in-IP for the
5127 		 * outer addresses.
5128 		 */
5129 		proto = sel->ips_protocol;	/* Either _ENCAP or _IPV6 */
5130 		lport = rport = 0;
5131 	} else if ((ap != NULL) && (!ap->ipa_want_unique)) {
5132 		proto = 0;
5133 		lport = 0;
5134 		rport = 0;
5135 		if (pol != NULL) {
5136 			ipsl = &(pol->ipsp_sel->ipsl_key);
5137 			if (ipsl->ipsl_valid & IPSL_PROTOCOL)
5138 				proto = ipsl->ipsl_proto;
5139 			if (ipsl->ipsl_valid & IPSL_REMOTE_PORT)
5140 				rport = ipsl->ipsl_rport;
5141 			if (ipsl->ipsl_valid & IPSL_LOCAL_PORT)
5142 				lport = ipsl->ipsl_lport;
5143 		}
5144 	} else {
5145 		proto = sel->ips_protocol;
5146 		lport = sel->ips_local_port;
5147 		rport = sel->ips_remote_port;
5148 	}
5149 
5150 	af = sel->ips_isv4 ? AF_INET : AF_INET6;
5151 
5152 	/*
5153 	 * NOTE:  The position of IPv4 and IPv6 addresses is the same in
5154 	 * ipsec_selector_t.
5155 	 */
5156 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
5157 	    (uint32_t *)(&sel->ips_local_addr_v6), lport, proto, 0);
5158 
5159 	if (cur == NULL) {
5160 		freeb(mp);
5161 		return (NULL);
5162 	}
5163 
5164 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
5165 	    (uint32_t *)(&sel->ips_remote_addr_v6), rport, proto, 0);
5166 
5167 	if (cur == NULL) {
5168 		freeb(mp);
5169 		return (NULL);
5170 	}
5171 
5172 	/*
5173 	 * This section will change a lot as policy evolves.
5174 	 * For now, it'll be relatively simple.
5175 	 */
5176 	eprop = (sadb_prop_t *)cur;
5177 	cur += sizeof (*eprop);
5178 	if (cur > end) {
5179 		/* no space left */
5180 		freeb(mp);
5181 		return (NULL);
5182 	}
5183 
5184 	eprop->sadb_prop_exttype = SADB_X_EXT_EPROP;
5185 	eprop->sadb_x_prop_ereserved = 0;
5186 	eprop->sadb_x_prop_numecombs = 0;
5187 	eprop->sadb_prop_replay = 32;	/* default */
5188 
5189 	kmc = kmp = 0;
5190 
5191 	for (; ap != NULL; ap = an) {
5192 		an = (pol != NULL) ? ap->ipa_next : NULL;
5193 
5194 		/*
5195 		 * Skip non-IPsec policies
5196 		 */
5197 		if (ap->ipa_act.ipa_type != IPSEC_ACT_APPLY)
5198 			continue;
5199 
5200 		if (ap->ipa_act.ipa_apply.ipp_km_proto)
5201 			kmp = ap->ipa_act.ipa_apply.ipp_km_proto;
5202 		if (ap->ipa_act.ipa_apply.ipp_km_cookie)
5203 			kmc = ap->ipa_act.ipa_apply.ipp_km_cookie;
5204 		if (ap->ipa_act.ipa_apply.ipp_replay_depth) {
5205 			eprop->sadb_prop_replay =
5206 			    ap->ipa_act.ipa_apply.ipp_replay_depth;
5207 		}
5208 
5209 		cur = sadb_action_to_ecomb(cur, end, ap, ns);
5210 		if (cur == NULL) { /* no space */
5211 			freeb(mp);
5212 			return (NULL);
5213 		}
5214 		eprop->sadb_x_prop_numecombs++;
5215 	}
5216 
5217 	if (eprop->sadb_x_prop_numecombs == 0) {
5218 		/*
5219 		 * This will happen if we fail to find a policy
5220 		 * allowing for IPsec processing.
5221 		 * Construct an error message.
5222 		 */
5223 		samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
5224 		samsg->sadb_msg_errno = ENOENT;
5225 		samsg->sadb_x_msg_diagnostic = 0;
5226 		return (mp);
5227 	}
5228 
5229 	if ((kmp != 0) || (kmc != 0)) {
5230 		cur = sadb_make_kmc_ext(cur, end, kmp, kmc);
5231 		if (cur == NULL) {
5232 			freeb(mp);
5233 			return (NULL);
5234 		}
5235 	}
5236 
5237 	eprop->sadb_prop_len = SADB_8TO64(cur - (uint8_t *)eprop);
5238 	samsg->sadb_msg_len = SADB_8TO64(cur - start);
5239 	mp->b_wptr = cur;
5240 
5241 	return (mp);
5242 }
5243 
5244 /*
5245  * Generic setup of an RFC 2367 ACQUIRE message.  Caller sets satype.
5246  *
5247  * NOTE: This function acquires alg_lock as a side-effect if-and-only-if we
5248  * succeed (i.e. return non-NULL).  Caller MUST release it.  This is to
5249  * maximize code consolidation while preventing algorithm changes from messing
5250  * with the callers finishing touches on the ACQUIRE itself.
5251  */
5252 mblk_t *
5253 sadb_setup_acquire(ipsacq_t *acqrec, uint8_t satype, ipsec_stack_t *ipss)
5254 {
5255 	uint_t allocsize;
5256 	mblk_t *pfkeymp, *msgmp;
5257 	sa_family_t af;
5258 	uint8_t *cur, *end;
5259 	sadb_msg_t *samsg;
5260 	uint16_t sport_typecode;
5261 	uint16_t dport_typecode;
5262 	uint8_t check_proto;
5263 	boolean_t tunnel_mode = (acqrec->ipsacq_inneraddrfam != 0);
5264 
5265 	ASSERT(MUTEX_HELD(&acqrec->ipsacq_lock));
5266 
5267 	pfkeymp = sadb_keysock_out(0);
5268 	if (pfkeymp == NULL)
5269 		return (NULL);
5270 
5271 	/*
5272 	 * First, allocate a basic ACQUIRE message
5273 	 */
5274 	allocsize = sizeof (sadb_msg_t) + sizeof (sadb_address_t) +
5275 	    sizeof (sadb_address_t) + sizeof (sadb_prop_t);
5276 
5277 	/* Make sure there's enough to cover both AF_INET and AF_INET6. */
5278 	allocsize += 2 * sizeof (struct sockaddr_in6);
5279 
5280 	mutex_enter(&ipss->ipsec_alg_lock);
5281 	/* NOTE:  The lock is now held through to this function's return. */
5282 	allocsize += ipss->ipsec_nalgs[IPSEC_ALG_AUTH] *
5283 	    ipss->ipsec_nalgs[IPSEC_ALG_ENCR] * sizeof (sadb_comb_t);
5284 
5285 	if (tunnel_mode) {
5286 		/* Tunnel mode! */
5287 		allocsize += 2 * sizeof (sadb_address_t);
5288 		/* Enough to cover both AF_INET and AF_INET6. */
5289 		allocsize += 2 * sizeof (struct sockaddr_in6);
5290 	}
5291 
5292 	msgmp = allocb(allocsize, BPRI_HI);
5293 	if (msgmp == NULL) {
5294 		freeb(pfkeymp);
5295 		mutex_exit(&ipss->ipsec_alg_lock);
5296 		return (NULL);
5297 	}
5298 
5299 	pfkeymp->b_cont = msgmp;
5300 	cur = msgmp->b_rptr;
5301 	end = cur + allocsize;
5302 	samsg = (sadb_msg_t *)cur;
5303 	cur += sizeof (sadb_msg_t);
5304 
5305 	af = acqrec->ipsacq_addrfam;
5306 	switch (af) {
5307 	case AF_INET:
5308 		check_proto = IPPROTO_ICMP;
5309 		break;
5310 	case AF_INET6:
5311 		check_proto = IPPROTO_ICMPV6;
5312 		break;
5313 	default:
5314 		/* This should never happen unless we have kernel bugs. */
5315 		cmn_err(CE_WARN,
5316 		    "sadb_setup_acquire:  corrupt ACQUIRE record.\n");
5317 		ASSERT(0);
5318 		mutex_exit(&ipss->ipsec_alg_lock);
5319 		return (NULL);
5320 	}
5321 
5322 	samsg->sadb_msg_version = PF_KEY_V2;
5323 	samsg->sadb_msg_type = SADB_ACQUIRE;
5324 	samsg->sadb_msg_satype = satype;
5325 	samsg->sadb_msg_errno = 0;
5326 	samsg->sadb_msg_pid = 0;
5327 	samsg->sadb_msg_reserved = 0;
5328 	samsg->sadb_msg_seq = acqrec->ipsacq_seq;
5329 
5330 	ASSERT(MUTEX_HELD(&acqrec->ipsacq_lock));
5331 
5332 	if ((acqrec->ipsacq_proto == check_proto) || tunnel_mode) {
5333 		sport_typecode = dport_typecode = 0;
5334 	} else {
5335 		sport_typecode = acqrec->ipsacq_srcport;
5336 		dport_typecode = acqrec->ipsacq_dstport;
5337 	}
5338 
5339 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
5340 	    acqrec->ipsacq_srcaddr, sport_typecode, acqrec->ipsacq_proto, 0);
5341 
5342 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
5343 	    acqrec->ipsacq_dstaddr, dport_typecode, acqrec->ipsacq_proto, 0);
5344 
5345 	if (tunnel_mode) {
5346 		sport_typecode = acqrec->ipsacq_srcport;
5347 		dport_typecode = acqrec->ipsacq_dstport;
5348 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
5349 		    acqrec->ipsacq_inneraddrfam, acqrec->ipsacq_innersrc,
5350 		    sport_typecode, acqrec->ipsacq_inner_proto,
5351 		    acqrec->ipsacq_innersrcpfx);
5352 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
5353 		    acqrec->ipsacq_inneraddrfam, acqrec->ipsacq_innerdst,
5354 		    dport_typecode, acqrec->ipsacq_inner_proto,
5355 		    acqrec->ipsacq_innerdstpfx);
5356 	}
5357 
5358 	/* XXX Insert identity information here. */
5359 
5360 	/* XXXMLS Insert sensitivity information here. */
5361 
5362 	if (cur != NULL)
5363 		samsg->sadb_msg_len = SADB_8TO64(cur - msgmp->b_rptr);
5364 	else
5365 		mutex_exit(&ipss->ipsec_alg_lock);
5366 
5367 	return (pfkeymp);
5368 }
5369 
5370 /*
5371  * Given an SADB_GETSPI message, find an appropriately ranged SA and
5372  * allocate an SA.  If there are message improprieties, return (ipsa_t *)-1.
5373  * If there was a memory allocation error, return NULL.	 (Assume NULL !=
5374  * (ipsa_t *)-1).
5375  *
5376  * master_spi is passed in host order.
5377  */
5378 ipsa_t *
5379 sadb_getspi(keysock_in_t *ksi, uint32_t master_spi, int *diagnostic,
5380     netstack_t *ns)
5381 {
5382 	sadb_address_t *src =
5383 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC],
5384 	    *dst = (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
5385 	sadb_spirange_t *range =
5386 	    (sadb_spirange_t *)ksi->ks_in_extv[SADB_EXT_SPIRANGE];
5387 	struct sockaddr_in *ssa, *dsa;
5388 	struct sockaddr_in6 *ssa6, *dsa6;
5389 	uint32_t *srcaddr, *dstaddr;
5390 	sa_family_t af;
5391 	uint32_t add, min, max;
5392 
5393 	if (src == NULL) {
5394 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
5395 		return ((ipsa_t *)-1);
5396 	}
5397 	if (dst == NULL) {
5398 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
5399 		return ((ipsa_t *)-1);
5400 	}
5401 	if (range == NULL) {
5402 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_RANGE;
5403 		return ((ipsa_t *)-1);
5404 	}
5405 
5406 	min = ntohl(range->sadb_spirange_min);
5407 	max = ntohl(range->sadb_spirange_max);
5408 	dsa = (struct sockaddr_in *)(dst + 1);
5409 	dsa6 = (struct sockaddr_in6 *)dsa;
5410 
5411 	ssa = (struct sockaddr_in *)(src + 1);
5412 	ssa6 = (struct sockaddr_in6 *)ssa;
5413 	ASSERT(dsa->sin_family == ssa->sin_family);
5414 
5415 	srcaddr = ALL_ZEROES_PTR;
5416 	af = dsa->sin_family;
5417 	switch (af) {
5418 	case AF_INET:
5419 		if (src != NULL)
5420 			srcaddr = (uint32_t *)(&ssa->sin_addr);
5421 		dstaddr = (uint32_t *)(&dsa->sin_addr);
5422 		break;
5423 	case AF_INET6:
5424 		if (src != NULL)
5425 			srcaddr = (uint32_t *)(&ssa6->sin6_addr);
5426 		dstaddr = (uint32_t *)(&dsa6->sin6_addr);
5427 		break;
5428 	default:
5429 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_DST_AF;
5430 		return ((ipsa_t *)-1);
5431 	}
5432 
5433 	if (master_spi < min || master_spi > max) {
5434 		/* Return a random value in the range. */
5435 		(void) random_get_pseudo_bytes((uint8_t *)&add, sizeof (add));
5436 		master_spi = min + (add % (max - min + 1));
5437 	}
5438 
5439 	/*
5440 	 * Since master_spi is passed in host order, we need to htonl() it
5441 	 * for the purposes of creating a new SA.
5442 	 */
5443 	return (sadb_makelarvalassoc(htonl(master_spi), srcaddr, dstaddr, af,
5444 	    ns));
5445 }
5446 
5447 /*
5448  *
5449  * Locate an ACQUIRE and nuke it.  If I have an samsg that's larger than the
5450  * base header, just ignore it.	 Otherwise, lock down the whole ACQUIRE list
5451  * and scan for the sequence number in question.  I may wish to accept an
5452  * address pair with it, for easier searching.
5453  *
5454  * Caller frees the message, so we don't have to here.
5455  *
5456  * NOTE:	The ip_q parameter may be used in the future for ACQUIRE
5457  *		failures.
5458  */
5459 /* ARGSUSED */
5460 void
5461 sadb_in_acquire(sadb_msg_t *samsg, sadbp_t *sp, queue_t *ip_q, netstack_t *ns)
5462 {
5463 	int i;
5464 	ipsacq_t *acqrec;
5465 	iacqf_t *bucket;
5466 
5467 	/*
5468 	 * I only accept the base header for this!
5469 	 * Though to be honest, requiring the dst address would help
5470 	 * immensely.
5471 	 *
5472 	 * XXX	There are already cases where I can get the dst address.
5473 	 */
5474 	if (samsg->sadb_msg_len > SADB_8TO64(sizeof (*samsg)))
5475 		return;
5476 
5477 	/*
5478 	 * Using the samsg->sadb_msg_seq, find the ACQUIRE record, delete it,
5479 	 * (and in the future send a message to IP with the appropriate error
5480 	 * number).
5481 	 *
5482 	 * Q: Do I want to reject if pid != 0?
5483 	 */
5484 
5485 	for (i = 0; i < sp->s_v4.sdb_hashsize; i++) {
5486 		bucket = &sp->s_v4.sdb_acq[i];
5487 		mutex_enter(&bucket->iacqf_lock);
5488 		for (acqrec = bucket->iacqf_ipsacq; acqrec != NULL;
5489 		    acqrec = acqrec->ipsacq_next) {
5490 			if (samsg->sadb_msg_seq == acqrec->ipsacq_seq)
5491 				break;	/* for acqrec... loop. */
5492 		}
5493 		if (acqrec != NULL)
5494 			break;	/* for i = 0... loop. */
5495 
5496 		mutex_exit(&bucket->iacqf_lock);
5497 	}
5498 
5499 	if (acqrec == NULL) {
5500 		for (i = 0; i < sp->s_v6.sdb_hashsize; i++) {
5501 			bucket = &sp->s_v6.sdb_acq[i];
5502 			mutex_enter(&bucket->iacqf_lock);
5503 			for (acqrec = bucket->iacqf_ipsacq; acqrec != NULL;
5504 			    acqrec = acqrec->ipsacq_next) {
5505 				if (samsg->sadb_msg_seq == acqrec->ipsacq_seq)
5506 					break;	/* for acqrec... loop. */
5507 			}
5508 			if (acqrec != NULL)
5509 				break;	/* for i = 0... loop. */
5510 
5511 			mutex_exit(&bucket->iacqf_lock);
5512 		}
5513 	}
5514 
5515 
5516 	if (acqrec == NULL)
5517 		return;
5518 
5519 	/*
5520 	 * What do I do with the errno and IP?	I may need mp's services a
5521 	 * little more.	 See sadb_destroy_acquire() for future directions
5522 	 * beyond free the mblk chain on the acquire record.
5523 	 */
5524 
5525 	ASSERT(&bucket->iacqf_lock == acqrec->ipsacq_linklock);
5526 	sadb_destroy_acquire(acqrec, ns);
5527 	/* Have to exit mutex here, because of breaking out of for loop. */
5528 	mutex_exit(&bucket->iacqf_lock);
5529 }
5530 
5531 /*
5532  * The following functions work with the replay windows of an SA.  They assume
5533  * the ipsa->ipsa_replay_arr is an array of uint64_t, and that the bit vector
5534  * represents the highest sequence number packet received, and back
5535  * (ipsa->ipsa_replay_wsize) packets.
5536  */
5537 
5538 /*
5539  * Is the replay bit set?
5540  */
5541 static boolean_t
5542 ipsa_is_replay_set(ipsa_t *ipsa, uint32_t offset)
5543 {
5544 	uint64_t bit = (uint64_t)1 << (uint64_t)(offset & 63);
5545 
5546 	return ((bit & ipsa->ipsa_replay_arr[offset >> 6]) ? B_TRUE : B_FALSE);
5547 }
5548 
5549 /*
5550  * Shift the bits of the replay window over.
5551  */
5552 static void
5553 ipsa_shift_replay(ipsa_t *ipsa, uint32_t shift)
5554 {
5555 	int i;
5556 	int jump = ((shift - 1) >> 6) + 1;
5557 
5558 	if (shift == 0)
5559 		return;
5560 
5561 	for (i = (ipsa->ipsa_replay_wsize - 1) >> 6; i >= 0; i--) {
5562 		if (i + jump <= (ipsa->ipsa_replay_wsize - 1) >> 6) {
5563 			ipsa->ipsa_replay_arr[i + jump] |=
5564 			    ipsa->ipsa_replay_arr[i] >> (64 - (shift & 63));
5565 		}
5566 		ipsa->ipsa_replay_arr[i] <<= shift;
5567 	}
5568 }
5569 
5570 /*
5571  * Set a bit in the bit vector.
5572  */
5573 static void
5574 ipsa_set_replay(ipsa_t *ipsa, uint32_t offset)
5575 {
5576 	uint64_t bit = (uint64_t)1 << (uint64_t)(offset & 63);
5577 
5578 	ipsa->ipsa_replay_arr[offset >> 6] |= bit;
5579 }
5580 
5581 #define	SADB_MAX_REPLAY_VALUE 0xffffffff
5582 
5583 /*
5584  * Assume caller has NOT done ntohl() already on seq.  Check to see
5585  * if replay sequence number "seq" has been seen already.
5586  */
5587 boolean_t
5588 sadb_replay_check(ipsa_t *ipsa, uint32_t seq)
5589 {
5590 	boolean_t rc;
5591 	uint32_t diff;
5592 
5593 	if (ipsa->ipsa_replay_wsize == 0)
5594 		return (B_TRUE);
5595 
5596 	/*
5597 	 * NOTE:  I've already checked for 0 on the wire in sadb_replay_peek().
5598 	 */
5599 
5600 	/* Convert sequence number into host order before holding the mutex. */
5601 	seq = ntohl(seq);
5602 
5603 	mutex_enter(&ipsa->ipsa_lock);
5604 
5605 	/* Initialize inbound SA's ipsa_replay field to last one received. */
5606 	if (ipsa->ipsa_replay == 0)
5607 		ipsa->ipsa_replay = 1;
5608 
5609 	if (seq > ipsa->ipsa_replay) {
5610 		/*
5611 		 * I have received a new "highest value received".  Shift
5612 		 * the replay window over.
5613 		 */
5614 		diff = seq - ipsa->ipsa_replay;
5615 		if (diff < ipsa->ipsa_replay_wsize) {
5616 			/* In replay window, shift bits over. */
5617 			ipsa_shift_replay(ipsa, diff);
5618 		} else {
5619 			/* WAY FAR AHEAD, clear bits and start again. */
5620 			bzero(ipsa->ipsa_replay_arr,
5621 			    sizeof (ipsa->ipsa_replay_arr));
5622 		}
5623 		ipsa_set_replay(ipsa, 0);
5624 		ipsa->ipsa_replay = seq;
5625 		rc = B_TRUE;
5626 		goto done;
5627 	}
5628 	diff = ipsa->ipsa_replay - seq;
5629 	if (diff >= ipsa->ipsa_replay_wsize || ipsa_is_replay_set(ipsa, diff)) {
5630 		rc = B_FALSE;
5631 		goto done;
5632 	}
5633 	/* Set this packet as seen. */
5634 	ipsa_set_replay(ipsa, diff);
5635 
5636 	rc = B_TRUE;
5637 done:
5638 	mutex_exit(&ipsa->ipsa_lock);
5639 	return (rc);
5640 }
5641 
5642 /*
5643  * "Peek" and see if we should even bother going through the effort of
5644  * running an authentication check on the sequence number passed in.
5645  * this takes into account packets that are below the replay window,
5646  * and collisions with already replayed packets.  Return B_TRUE if it
5647  * is okay to proceed, B_FALSE if this packet should be dropped immediately.
5648  * Assume same byte-ordering as sadb_replay_check.
5649  */
5650 boolean_t
5651 sadb_replay_peek(ipsa_t *ipsa, uint32_t seq)
5652 {
5653 	boolean_t rc = B_FALSE;
5654 	uint32_t diff;
5655 
5656 	if (ipsa->ipsa_replay_wsize == 0)
5657 		return (B_TRUE);
5658 
5659 	/*
5660 	 * 0 is 0, regardless of byte order... :)
5661 	 *
5662 	 * If I get 0 on the wire (and there is a replay window) then the
5663 	 * sender most likely wrapped.	This ipsa may need to be marked or
5664 	 * something.
5665 	 */
5666 	if (seq == 0)
5667 		return (B_FALSE);
5668 
5669 	seq = ntohl(seq);
5670 	mutex_enter(&ipsa->ipsa_lock);
5671 	if (seq < ipsa->ipsa_replay - ipsa->ipsa_replay_wsize &&
5672 	    ipsa->ipsa_replay >= ipsa->ipsa_replay_wsize)
5673 		goto done;
5674 
5675 	/*
5676 	 * If I've hit 0xffffffff, then quite honestly, I don't need to
5677 	 * bother with formalities.  I'm not accepting any more packets
5678 	 * on this SA.
5679 	 */
5680 	if (ipsa->ipsa_replay == SADB_MAX_REPLAY_VALUE) {
5681 		/*
5682 		 * Since we're already holding the lock, update the
5683 		 * expire time ala. sadb_replay_delete() and return.
5684 		 */
5685 		ipsa->ipsa_hardexpiretime = (time_t)1;
5686 		goto done;
5687 	}
5688 
5689 	if (seq <= ipsa->ipsa_replay) {
5690 		/*
5691 		 * This seq is in the replay window.  I'm not below it,
5692 		 * because I already checked for that above!
5693 		 */
5694 		diff = ipsa->ipsa_replay - seq;
5695 		if (ipsa_is_replay_set(ipsa, diff))
5696 			goto done;
5697 	}
5698 	/* Else return B_TRUE, I'm going to advance the window. */
5699 
5700 	rc = B_TRUE;
5701 done:
5702 	mutex_exit(&ipsa->ipsa_lock);
5703 	return (rc);
5704 }
5705 
5706 /*
5707  * Delete a single SA.
5708  *
5709  * For now, use the quick-and-dirty trick of making the association's
5710  * hard-expire lifetime (time_t)1, ensuring deletion by the *_ager().
5711  */
5712 void
5713 sadb_replay_delete(ipsa_t *assoc)
5714 {
5715 	mutex_enter(&assoc->ipsa_lock);
5716 	assoc->ipsa_hardexpiretime = (time_t)1;
5717 	mutex_exit(&assoc->ipsa_lock);
5718 }
5719 
5720 /*
5721  * Given a queue that presumably points to IP, send a T_BIND_REQ for _proto_
5722  * down.  The caller will handle the T_BIND_ACK locally.
5723  */
5724 boolean_t
5725 sadb_t_bind_req(queue_t *q, int proto)
5726 {
5727 	struct T_bind_req *tbr;
5728 	mblk_t *mp;
5729 
5730 	mp = allocb(sizeof (struct T_bind_req) + 1, BPRI_HI);
5731 	if (mp == NULL) {
5732 		/* cmn_err(CE_WARN, */
5733 		/* "sadb_t_bind_req(%d): couldn't allocate mblk\n", proto); */
5734 		return (B_FALSE);
5735 	}
5736 	mp->b_datap->db_type = M_PCPROTO;
5737 	tbr = (struct T_bind_req *)mp->b_rptr;
5738 	mp->b_wptr += sizeof (struct T_bind_req);
5739 	tbr->PRIM_type = T_BIND_REQ;
5740 	tbr->ADDR_length = 0;
5741 	tbr->ADDR_offset = 0;
5742 	tbr->CONIND_number = 0;
5743 	*mp->b_wptr = (uint8_t)proto;
5744 	mp->b_wptr++;
5745 
5746 	putnext(q, mp);
5747 	return (B_TRUE);
5748 }
5749 
5750 /*
5751  * Special front-end to ipsec_rl_strlog() dealing with SA failure.
5752  * this is designed to take only a format string with "* %x * %s *", so
5753  * that "spi" is printed first, then "addr" is converted using inet_pton().
5754  *
5755  * This is abstracted out to save the stack space for only when inet_pton()
5756  * is called.  Make sure "spi" is in network order; it usually is when this
5757  * would get called.
5758  */
5759 void
5760 ipsec_assocfailure(short mid, short sid, char level, ushort_t sl, char *fmt,
5761     uint32_t spi, void *addr, int af, netstack_t *ns)
5762 {
5763 	char buf[INET6_ADDRSTRLEN];
5764 
5765 	ASSERT(af == AF_INET6 || af == AF_INET);
5766 
5767 	ipsec_rl_strlog(ns, mid, sid, level, sl, fmt, ntohl(spi),
5768 	    inet_ntop(af, addr, buf, sizeof (buf)));
5769 }
5770 
5771 /*
5772  * Fills in a reference to the policy, if any, from the conn, in *ppp
5773  * Releases a reference to the passed conn_t.
5774  */
5775 static void
5776 ipsec_conn_pol(ipsec_selector_t *sel, conn_t *connp, ipsec_policy_t **ppp)
5777 {
5778 	ipsec_policy_t	*pp;
5779 	ipsec_latch_t	*ipl = connp->conn_latch;
5780 
5781 	if ((ipl != NULL) && (ipl->ipl_out_policy != NULL)) {
5782 		pp = ipl->ipl_out_policy;
5783 		IPPOL_REFHOLD(pp);
5784 	} else {
5785 		pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, sel,
5786 		    connp->conn_netstack);
5787 	}
5788 	*ppp = pp;
5789 	CONN_DEC_REF(connp);
5790 }
5791 
5792 /*
5793  * The following functions scan through active conn_t structures
5794  * and return a reference to the best-matching policy it can find.
5795  * Caller must release the reference.
5796  */
5797 static void
5798 ipsec_udp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst)
5799 {
5800 	connf_t *connfp;
5801 	conn_t *connp = NULL;
5802 	ipsec_selector_t portonly;
5803 
5804 	bzero((void*)&portonly, sizeof (portonly));
5805 
5806 	if (sel->ips_local_port == 0)
5807 		return;
5808 
5809 	connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(sel->ips_local_port,
5810 	    ipst)];
5811 	mutex_enter(&connfp->connf_lock);
5812 
5813 	if (sel->ips_isv4) {
5814 		connp = connfp->connf_head;
5815 		while (connp != NULL) {
5816 			if (IPCL_UDP_MATCH(connp, sel->ips_local_port,
5817 			    sel->ips_local_addr_v4, sel->ips_remote_port,
5818 			    sel->ips_remote_addr_v4))
5819 				break;
5820 			connp = connp->conn_next;
5821 		}
5822 
5823 		if (connp == NULL) {
5824 			/* Try port-only match in IPv6. */
5825 			portonly.ips_local_port = sel->ips_local_port;
5826 			sel = &portonly;
5827 		}
5828 	}
5829 
5830 	if (connp == NULL) {
5831 		connp = connfp->connf_head;
5832 		while (connp != NULL) {
5833 			if (IPCL_UDP_MATCH_V6(connp, sel->ips_local_port,
5834 			    sel->ips_local_addr_v6, sel->ips_remote_port,
5835 			    sel->ips_remote_addr_v6))
5836 				break;
5837 			connp = connp->conn_next;
5838 		}
5839 
5840 		if (connp == NULL) {
5841 			mutex_exit(&connfp->connf_lock);
5842 			return;
5843 		}
5844 	}
5845 
5846 	CONN_INC_REF(connp);
5847 	mutex_exit(&connfp->connf_lock);
5848 
5849 	ipsec_conn_pol(sel, connp, ppp);
5850 }
5851 
5852 static conn_t *
5853 ipsec_find_listen_conn(uint16_t *pptr, ipsec_selector_t *sel, ip_stack_t *ipst)
5854 {
5855 	connf_t *connfp;
5856 	conn_t *connp = NULL;
5857 	const in6_addr_t *v6addrmatch = &sel->ips_local_addr_v6;
5858 
5859 	if (sel->ips_local_port == 0)
5860 		return (NULL);
5861 
5862 	connfp = &ipst->ips_ipcl_bind_fanout[
5863 	    IPCL_BIND_HASH(sel->ips_local_port, ipst)];
5864 	mutex_enter(&connfp->connf_lock);
5865 
5866 	if (sel->ips_isv4) {
5867 		connp = connfp->connf_head;
5868 		while (connp != NULL) {
5869 			if (IPCL_BIND_MATCH(connp, IPPROTO_TCP,
5870 			    sel->ips_local_addr_v4, pptr[1]))
5871 				break;
5872 			connp = connp->conn_next;
5873 		}
5874 
5875 		if (connp == NULL) {
5876 			/* Match to all-zeroes. */
5877 			v6addrmatch = &ipv6_all_zeros;
5878 		}
5879 	}
5880 
5881 	if (connp == NULL) {
5882 		connp = connfp->connf_head;
5883 		while (connp != NULL) {
5884 			if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP,
5885 			    *v6addrmatch, pptr[1]))
5886 				break;
5887 			connp = connp->conn_next;
5888 		}
5889 
5890 		if (connp == NULL) {
5891 			mutex_exit(&connfp->connf_lock);
5892 			return (NULL);
5893 		}
5894 	}
5895 
5896 	CONN_INC_REF(connp);
5897 	mutex_exit(&connfp->connf_lock);
5898 	return (connp);
5899 }
5900 
5901 static void
5902 ipsec_tcp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst)
5903 {
5904 	connf_t 	*connfp;
5905 	conn_t		*connp;
5906 	uint32_t	ports;
5907 	uint16_t	*pptr = (uint16_t *)&ports;
5908 
5909 	/*
5910 	 * Find TCP state in the following order:
5911 	 * 1.) Connected conns.
5912 	 * 2.) Listeners.
5913 	 *
5914 	 * Even though #2 will be the common case for inbound traffic, only
5915 	 * following this order insures correctness.
5916 	 */
5917 
5918 	if (sel->ips_local_port == 0)
5919 		return;
5920 
5921 	/*
5922 	 * 0 should be fport, 1 should be lport.  SRC is the local one here.
5923 	 * See ipsec_construct_inverse_acquire() for details.
5924 	 */
5925 	pptr[0] = sel->ips_remote_port;
5926 	pptr[1] = sel->ips_local_port;
5927 
5928 	connfp = &ipst->ips_ipcl_conn_fanout[
5929 	    IPCL_CONN_HASH(sel->ips_remote_addr_v4, ports, ipst)];
5930 	mutex_enter(&connfp->connf_lock);
5931 	connp = connfp->connf_head;
5932 
5933 	if (sel->ips_isv4) {
5934 		while (connp != NULL) {
5935 			if (IPCL_CONN_MATCH(connp, IPPROTO_TCP,
5936 			    sel->ips_remote_addr_v4, sel->ips_local_addr_v4,
5937 			    ports))
5938 				break;
5939 			connp = connp->conn_next;
5940 		}
5941 	} else {
5942 		while (connp != NULL) {
5943 			if (IPCL_CONN_MATCH_V6(connp, IPPROTO_TCP,
5944 			    sel->ips_remote_addr_v6, sel->ips_local_addr_v6,
5945 			    ports))
5946 				break;
5947 			connp = connp->conn_next;
5948 		}
5949 	}
5950 
5951 	if (connp != NULL) {
5952 		CONN_INC_REF(connp);
5953 		mutex_exit(&connfp->connf_lock);
5954 	} else {
5955 		mutex_exit(&connfp->connf_lock);
5956 
5957 		/* Try the listen hash. */
5958 		if ((connp = ipsec_find_listen_conn(pptr, sel, ipst)) == NULL)
5959 			return;
5960 	}
5961 
5962 	ipsec_conn_pol(sel, connp, ppp);
5963 }
5964 
5965 static void
5966 ipsec_sctp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
5967     ip_stack_t *ipst)
5968 {
5969 	conn_t		*connp;
5970 	uint32_t	ports;
5971 	uint16_t	*pptr = (uint16_t *)&ports;
5972 
5973 	/*
5974 	 * Find SCP state in the following order:
5975 	 * 1.) Connected conns.
5976 	 * 2.) Listeners.
5977 	 *
5978 	 * Even though #2 will be the common case for inbound traffic, only
5979 	 * following this order insures correctness.
5980 	 */
5981 
5982 	if (sel->ips_local_port == 0)
5983 		return;
5984 
5985 	/*
5986 	 * 0 should be fport, 1 should be lport.  SRC is the local one here.
5987 	 * See ipsec_construct_inverse_acquire() for details.
5988 	 */
5989 	pptr[0] = sel->ips_remote_port;
5990 	pptr[1] = sel->ips_local_port;
5991 
5992 	if (sel->ips_isv4) {
5993 		in6_addr_t	src, dst;
5994 
5995 		IN6_IPADDR_TO_V4MAPPED(sel->ips_remote_addr_v4, &dst);
5996 		IN6_IPADDR_TO_V4MAPPED(sel->ips_local_addr_v4, &src);
5997 		connp = sctp_find_conn(&dst, &src, ports, ALL_ZONES,
5998 		    ipst->ips_netstack->netstack_sctp);
5999 	} else {
6000 		connp = sctp_find_conn(&sel->ips_remote_addr_v6,
6001 		    &sel->ips_local_addr_v6, ports, ALL_ZONES,
6002 		    ipst->ips_netstack->netstack_sctp);
6003 	}
6004 	if (connp == NULL)
6005 		return;
6006 	ipsec_conn_pol(sel, connp, ppp);
6007 }
6008 
6009 /*
6010  * Fill in a query for the SPD (in "sel") using two PF_KEY address extensions.
6011  * Returns 0 or errno, and always sets *diagnostic to something appropriate
6012  * to PF_KEY.
6013  *
6014  * NOTE:  For right now, this function (and ipsec_selector_t for that matter),
6015  * ignore prefix lengths in the address extension.  Since we match on first-
6016  * entered policies, this shouldn't matter.  Also, since we normalize prefix-
6017  * set addresses to mask out the lower bits, we should get a suitable search
6018  * key for the SPD anyway.  This is the function to change if the assumption
6019  * about suitable search keys is wrong.
6020  */
6021 static int
6022 ipsec_get_inverse_acquire_sel(ipsec_selector_t *sel, sadb_address_t *srcext,
6023     sadb_address_t *dstext, int *diagnostic)
6024 {
6025 	struct sockaddr_in *src, *dst;
6026 	struct sockaddr_in6 *src6, *dst6;
6027 
6028 	*diagnostic = 0;
6029 
6030 	bzero(sel, sizeof (*sel));
6031 	sel->ips_protocol = srcext->sadb_address_proto;
6032 	dst = (struct sockaddr_in *)(dstext + 1);
6033 	if (dst->sin_family == AF_INET6) {
6034 		dst6 = (struct sockaddr_in6 *)dst;
6035 		src6 = (struct sockaddr_in6 *)(srcext + 1);
6036 		if (src6->sin6_family != AF_INET6) {
6037 			*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6038 			return (EINVAL);
6039 		}
6040 		sel->ips_remote_addr_v6 = dst6->sin6_addr;
6041 		sel->ips_local_addr_v6 = src6->sin6_addr;
6042 		if (sel->ips_protocol == IPPROTO_ICMPV6) {
6043 			sel->ips_is_icmp_inv_acq = 1;
6044 		} else {
6045 			sel->ips_remote_port = dst6->sin6_port;
6046 			sel->ips_local_port = src6->sin6_port;
6047 		}
6048 		sel->ips_isv4 = B_FALSE;
6049 	} else {
6050 		src = (struct sockaddr_in *)(srcext + 1);
6051 		if (src->sin_family != AF_INET) {
6052 			*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6053 			return (EINVAL);
6054 		}
6055 		sel->ips_remote_addr_v4 = dst->sin_addr.s_addr;
6056 		sel->ips_local_addr_v4 = src->sin_addr.s_addr;
6057 		if (sel->ips_protocol == IPPROTO_ICMP) {
6058 			sel->ips_is_icmp_inv_acq = 1;
6059 		} else {
6060 			sel->ips_remote_port = dst->sin_port;
6061 			sel->ips_local_port = src->sin_port;
6062 		}
6063 		sel->ips_isv4 = B_TRUE;
6064 	}
6065 	return (0);
6066 }
6067 
6068 /*
6069  * We have encapsulation.
6070  * - Lookup tun_t by address and look for an associated
6071  *   tunnel policy
6072  * - If there are inner selectors
6073  *   - check ITPF_P_TUNNEL and ITPF_P_ACTIVE
6074  *   - Look up tunnel policy based on selectors
6075  * - Else
6076  *   - Sanity check the negotation
6077  *   - If appropriate, fall through to global policy
6078  */
6079 static int
6080 ipsec_tun_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6081     sadb_address_t *innsrcext, sadb_address_t *inndstext, ipsec_tun_pol_t *itp,
6082     int *diagnostic, netstack_t *ns)
6083 {
6084 	int err;
6085 	ipsec_policy_head_t *polhead;
6086 
6087 	/* Check for inner selectors and act appropriately */
6088 
6089 	if (innsrcext != NULL) {
6090 		/* Inner selectors present */
6091 		ASSERT(inndstext != NULL);
6092 		if ((itp == NULL) ||
6093 		    (itp->itp_flags & (ITPF_P_ACTIVE | ITPF_P_TUNNEL)) !=
6094 		    (ITPF_P_ACTIVE | ITPF_P_TUNNEL)) {
6095 			/*
6096 			 * If inner packet selectors, we must have negotiate
6097 			 * tunnel and active policy.  If the tunnel has
6098 			 * transport-mode policy set on it, or has no policy,
6099 			 * fail.
6100 			 */
6101 			return (ENOENT);
6102 		} else {
6103 			/*
6104 			 * Reset "sel" to indicate inner selectors.  Pass
6105 			 * inner PF_KEY address extensions for this to happen.
6106 			 */
6107 			err = ipsec_get_inverse_acquire_sel(sel,
6108 			    innsrcext, inndstext, diagnostic);
6109 			if (err != 0) {
6110 				ITP_REFRELE(itp, ns);
6111 				return (err);
6112 			}
6113 			/*
6114 			 * Now look for a tunnel policy based on those inner
6115 			 * selectors.  (Common code is below.)
6116 			 */
6117 		}
6118 	} else {
6119 		/* No inner selectors present */
6120 		if ((itp == NULL) || !(itp->itp_flags & ITPF_P_ACTIVE)) {
6121 			/*
6122 			 * Transport mode negotiation with no tunnel policy
6123 			 * configured - return to indicate a global policy
6124 			 * check is needed.
6125 			 */
6126 			if (itp != NULL) {
6127 				ITP_REFRELE(itp, ns);
6128 			}
6129 			return (0);
6130 		} else if (itp->itp_flags & ITPF_P_TUNNEL) {
6131 			/* Tunnel mode set with no inner selectors. */
6132 			ITP_REFRELE(itp, ns);
6133 			return (ENOENT);
6134 		}
6135 		/*
6136 		 * Else, this is a tunnel policy configured with ifconfig(1m)
6137 		 * or "negotiate transport" with ipsecconf(1m).  We have an
6138 		 * itp with policy set based on any match, so don't bother
6139 		 * changing fields in "sel".
6140 		 */
6141 	}
6142 
6143 	ASSERT(itp != NULL);
6144 	polhead = itp->itp_policy;
6145 	ASSERT(polhead != NULL);
6146 	rw_enter(&polhead->iph_lock, RW_READER);
6147 	*ppp = ipsec_find_policy_head(NULL, polhead,
6148 	    IPSEC_TYPE_INBOUND, sel, ns);
6149 	rw_exit(&polhead->iph_lock);
6150 	ITP_REFRELE(itp, ns);
6151 
6152 	/*
6153 	 * Don't default to global if we didn't find a matching policy entry.
6154 	 * Instead, send ENOENT, just like if we hit a transport-mode tunnel.
6155 	 */
6156 	if (*ppp == NULL)
6157 		return (ENOENT);
6158 
6159 	return (0);
6160 }
6161 
6162 static void
6163 ipsec_oth_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6164     ip_stack_t *ipst)
6165 {
6166 	boolean_t	isv4 = sel->ips_isv4;
6167 	connf_t		*connfp;
6168 	conn_t		*connp;
6169 
6170 	if (isv4) {
6171 		connfp = &ipst->ips_ipcl_proto_fanout[sel->ips_protocol];
6172 	} else {
6173 		connfp = &ipst->ips_ipcl_proto_fanout_v6[sel->ips_protocol];
6174 	}
6175 
6176 	mutex_enter(&connfp->connf_lock);
6177 	for (connp = connfp->connf_head; connp != NULL;
6178 	    connp = connp->conn_next) {
6179 		if (!((isv4 && !((connp->conn_src == 0 ||
6180 		    connp->conn_src == sel->ips_local_addr_v4) &&
6181 		    (connp->conn_rem == 0 ||
6182 		    connp->conn_rem == sel->ips_remote_addr_v4))) ||
6183 		    (!isv4 && !((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) ||
6184 		    IN6_ARE_ADDR_EQUAL(&connp->conn_srcv6,
6185 		    &sel->ips_local_addr_v6)) &&
6186 		    (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6) ||
6187 		    IN6_ARE_ADDR_EQUAL(&connp->conn_remv6,
6188 		    &sel->ips_remote_addr_v6)))))) {
6189 			break;
6190 		}
6191 	}
6192 	if (connp == NULL) {
6193 		mutex_exit(&connfp->connf_lock);
6194 		return;
6195 	}
6196 
6197 	CONN_INC_REF(connp);
6198 	mutex_exit(&connfp->connf_lock);
6199 
6200 	ipsec_conn_pol(sel, connp, ppp);
6201 }
6202 
6203 /*
6204  * Construct an inverse ACQUIRE reply based on:
6205  *
6206  * 1.) Current global policy.
6207  * 2.) An conn_t match depending on what all was passed in the extv[].
6208  * 3.) A tunnel's policy head.
6209  * ...
6210  * N.) Other stuff TBD (e.g. identities)
6211  *
6212  * If there is an error, set sadb_msg_errno and sadb_x_msg_diagnostic
6213  * in this function so the caller can extract them where appropriately.
6214  *
6215  * The SRC address is the local one - just like an outbound ACQUIRE message.
6216  */
6217 mblk_t *
6218 ipsec_construct_inverse_acquire(sadb_msg_t *samsg, sadb_ext_t *extv[],
6219     netstack_t *ns)
6220 {
6221 	int err;
6222 	int diagnostic;
6223 	sadb_address_t *srcext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_SRC],
6224 	    *dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST],
6225 	    *innsrcext = (sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_SRC],
6226 	    *inndstext = (sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_DST];
6227 	struct sockaddr_in6 *src, *dst;
6228 	struct sockaddr_in6 *isrc, *idst;
6229 	ipsec_tun_pol_t *itp = NULL;
6230 	ipsec_policy_t *pp = NULL;
6231 	ipsec_selector_t sel, isel;
6232 	mblk_t *retmp;
6233 	ip_stack_t	*ipst = ns->netstack_ip;
6234 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
6235 
6236 	/* Normalize addresses */
6237 	if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)srcext, 0, ns)
6238 	    == KS_IN_ADDR_UNKNOWN) {
6239 		err = EINVAL;
6240 		diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC;
6241 		goto bail;
6242 	}
6243 	src = (struct sockaddr_in6 *)(srcext + 1);
6244 	if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)dstext, 0, ns)
6245 	    == KS_IN_ADDR_UNKNOWN) {
6246 		err = EINVAL;
6247 		diagnostic = SADB_X_DIAGNOSTIC_BAD_DST;
6248 		goto bail;
6249 	}
6250 	dst = (struct sockaddr_in6 *)(dstext + 1);
6251 	if (src->sin6_family != dst->sin6_family) {
6252 		err = EINVAL;
6253 		diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6254 		goto bail;
6255 	}
6256 
6257 	/* Check for tunnel mode and act appropriately */
6258 	if (innsrcext != NULL) {
6259 		if (inndstext == NULL) {
6260 			err = EINVAL;
6261 			diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST;
6262 			goto bail;
6263 		}
6264 		if (sadb_addrcheck(NULL, (mblk_t *)samsg,
6265 		    (sadb_ext_t *)innsrcext, 0, ns) == KS_IN_ADDR_UNKNOWN) {
6266 			err = EINVAL;
6267 			diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_SRC;
6268 			goto bail;
6269 		}
6270 		isrc = (struct sockaddr_in6 *)(innsrcext + 1);
6271 		if (sadb_addrcheck(NULL, (mblk_t *)samsg,
6272 		    (sadb_ext_t *)inndstext, 0, ns) == KS_IN_ADDR_UNKNOWN) {
6273 			err = EINVAL;
6274 			diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_DST;
6275 			goto bail;
6276 		}
6277 		idst = (struct sockaddr_in6 *)(inndstext + 1);
6278 		if (isrc->sin6_family != idst->sin6_family) {
6279 			err = EINVAL;
6280 			diagnostic = SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH;
6281 			goto bail;
6282 		}
6283 		if (isrc->sin6_family != AF_INET &&
6284 		    isrc->sin6_family != AF_INET6) {
6285 			err = EINVAL;
6286 			diagnostic = SADB_X_DIAGNOSTIC_BAD_INNER_SRC_AF;
6287 			goto bail;
6288 		}
6289 	} else if (inndstext != NULL) {
6290 		err = EINVAL;
6291 		diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
6292 		goto bail;
6293 	}
6294 
6295 	/* Get selectors first, based on outer addresses */
6296 	err = ipsec_get_inverse_acquire_sel(&sel, srcext, dstext, &diagnostic);
6297 	if (err != 0)
6298 		goto bail;
6299 
6300 	/* Check for tunnel mode mismatches. */
6301 	if (innsrcext != NULL &&
6302 	    ((isrc->sin6_family == AF_INET &&
6303 	    sel.ips_protocol != IPPROTO_ENCAP && sel.ips_protocol != 0) ||
6304 	    (isrc->sin6_family == AF_INET6 &&
6305 	    sel.ips_protocol != IPPROTO_IPV6 && sel.ips_protocol != 0))) {
6306 		err = EPROTOTYPE;
6307 		goto bail;
6308 	}
6309 
6310 	/*
6311 	 * Okay, we have the addresses and other selector information.
6312 	 * Let's first find a conn...
6313 	 */
6314 	pp = NULL;
6315 	switch (sel.ips_protocol) {
6316 	case IPPROTO_TCP:
6317 		ipsec_tcp_pol(&sel, &pp, ipst);
6318 		break;
6319 	case IPPROTO_UDP:
6320 		ipsec_udp_pol(&sel, &pp, ipst);
6321 		break;
6322 	case IPPROTO_SCTP:
6323 		ipsec_sctp_pol(&sel, &pp, ipst);
6324 		break;
6325 	case IPPROTO_ENCAP:
6326 	case IPPROTO_IPV6:
6327 		rw_enter(&ipss->ipsec_itp_get_byaddr_rw_lock, RW_READER);
6328 		/*
6329 		 * Assume sel.ips_remote_addr_* has the right address at
6330 		 * that exact position.
6331 		 */
6332 		itp = ipss->ipsec_itp_get_byaddr(
6333 		    (uint32_t *)(&sel.ips_local_addr_v6),
6334 		    (uint32_t *)(&sel.ips_remote_addr_v6),
6335 		    src->sin6_family, ns);
6336 		rw_exit(&ipss->ipsec_itp_get_byaddr_rw_lock);
6337 		if (innsrcext == NULL) {
6338 			/*
6339 			 * Transport-mode tunnel, make sure we fake out isel
6340 			 * to contain something based on the outer protocol.
6341 			 */
6342 			bzero(&isel, sizeof (isel));
6343 			isel.ips_isv4 = (sel.ips_protocol == IPPROTO_ENCAP);
6344 		} /* Else isel is initialized by ipsec_tun_pol(). */
6345 		err = ipsec_tun_pol(&isel, &pp, innsrcext, inndstext, itp,
6346 		    &diagnostic, ns);
6347 		/*
6348 		 * NOTE:  isel isn't used for now, but in RFC 430x IPsec, it
6349 		 * may be.
6350 		 */
6351 		if (err != 0)
6352 			goto bail;
6353 		break;
6354 	default:
6355 		ipsec_oth_pol(&sel, &pp, ipst);
6356 		break;
6357 	}
6358 
6359 	/*
6360 	 * If we didn't find a matching conn_t or other policy head, take a
6361 	 * look in the global policy.
6362 	 */
6363 	if (pp == NULL) {
6364 		pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, NULL, NULL, &sel,
6365 		    ns);
6366 		if (pp == NULL) {
6367 			/* There's no global policy. */
6368 			err = ENOENT;
6369 			diagnostic = 0;
6370 			goto bail;
6371 		}
6372 	}
6373 
6374 	/*
6375 	 * Now that we have a policy entry/widget, construct an ACQUIRE
6376 	 * message based on that, fix fields where appropriate,
6377 	 * and return the message.
6378 	 */
6379 	retmp = sadb_extended_acquire(&sel, pp, NULL,
6380 	    (itp != NULL && (itp->itp_flags & ITPF_P_TUNNEL)),
6381 	    samsg->sadb_msg_seq, samsg->sadb_msg_pid, ns);
6382 	if (pp != NULL) {
6383 		IPPOL_REFRELE(pp, ns);
6384 	}
6385 	if (retmp != NULL) {
6386 		return (retmp);
6387 	} else {
6388 		err = ENOMEM;
6389 		diagnostic = 0;
6390 	}
6391 bail:
6392 	samsg->sadb_msg_errno = (uint8_t)err;
6393 	samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
6394 	return (NULL);
6395 }
6396 
6397 /*
6398  * ipsa_lpkt is a one-element queue, only manipulated by casptr within
6399  * the next two functions.
6400  *
6401  * These functions loop calling casptr() until the swap "happens",
6402  * turning a compare-and-swap op into an atomic swap operation.
6403  */
6404 
6405 /*
6406  * sadb_set_lpkt: Atomically swap in a value to ipsa->ipsa_lpkt and
6407  * freemsg the previous value.  free clue: freemsg(NULL) is safe.
6408  */
6409 
6410 void
6411 sadb_set_lpkt(ipsa_t *ipsa, mblk_t *npkt, netstack_t *ns)
6412 {
6413 	mblk_t *opkt;
6414 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
6415 
6416 	membar_producer();
6417 	do {
6418 		opkt = ipsa->ipsa_lpkt;
6419 	} while (casptr(&ipsa->ipsa_lpkt, opkt, npkt) != opkt);
6420 
6421 	ip_drop_packet(opkt, B_TRUE, NULL, NULL,
6422 	    DROPPER(ipss, ipds_sadb_inlarval_replace),
6423 	    &ipss->ipsec_sadb_dropper);
6424 }
6425 
6426 /*
6427  * sadb_clear_lpkt: Atomically clear ipsa->ipsa_lpkt and return the
6428  * previous value.
6429  */
6430 
6431 mblk_t *
6432 sadb_clear_lpkt(ipsa_t *ipsa)
6433 {
6434 	mblk_t *opkt;
6435 
6436 	do {
6437 		opkt = ipsa->ipsa_lpkt;
6438 	} while (casptr(&ipsa->ipsa_lpkt, opkt, NULL) != opkt);
6439 
6440 	return (opkt);
6441 }
6442 
6443 /*
6444  * Walker callback used by sadb_alg_update() to free/create crypto
6445  * context template when a crypto software provider is removed or
6446  * added.
6447  */
6448 
6449 struct sadb_update_alg_state {
6450 	ipsec_algtype_t alg_type;
6451 	uint8_t alg_id;
6452 	boolean_t is_added;
6453 };
6454 
6455 static void
6456 sadb_alg_update_cb(isaf_t *head, ipsa_t *entry, void *cookie)
6457 {
6458 	struct sadb_update_alg_state *update_state =
6459 	    (struct sadb_update_alg_state *)cookie;
6460 	crypto_ctx_template_t *ctx_tmpl = NULL;
6461 
6462 	ASSERT(MUTEX_HELD(&head->isaf_lock));
6463 
6464 	if (entry->ipsa_state == IPSA_STATE_LARVAL)
6465 		return;
6466 
6467 	mutex_enter(&entry->ipsa_lock);
6468 
6469 	switch (update_state->alg_type) {
6470 	case IPSEC_ALG_AUTH:
6471 		if (entry->ipsa_auth_alg == update_state->alg_id)
6472 			ctx_tmpl = &entry->ipsa_authtmpl;
6473 		break;
6474 	case IPSEC_ALG_ENCR:
6475 		if (entry->ipsa_encr_alg == update_state->alg_id)
6476 			ctx_tmpl = &entry->ipsa_encrtmpl;
6477 		break;
6478 	default:
6479 		ctx_tmpl = NULL;
6480 	}
6481 
6482 	if (ctx_tmpl == NULL) {
6483 		mutex_exit(&entry->ipsa_lock);
6484 		return;
6485 	}
6486 
6487 	/*
6488 	 * The context template of the SA may be affected by the change
6489 	 * of crypto provider.
6490 	 */
6491 	if (update_state->is_added) {
6492 		/* create the context template if not already done */
6493 		if (*ctx_tmpl == NULL) {
6494 			(void) ipsec_create_ctx_tmpl(entry,
6495 			    update_state->alg_type);
6496 		}
6497 	} else {
6498 		/*
6499 		 * The crypto provider was removed. If the context template
6500 		 * exists but it is no longer valid, free it.
6501 		 */
6502 		if (*ctx_tmpl != NULL)
6503 			ipsec_destroy_ctx_tmpl(entry, update_state->alg_type);
6504 	}
6505 
6506 	mutex_exit(&entry->ipsa_lock);
6507 }
6508 
6509 /*
6510  * Invoked by IP when an software crypto provider has been updated.
6511  * The type and id of the corresponding algorithm is passed as argument.
6512  * is_added is B_TRUE if the provider was added, B_FALSE if it was
6513  * removed. The function updates the SADB and free/creates the
6514  * context templates associated with SAs if needed.
6515  */
6516 
6517 #define	SADB_ALG_UPDATE_WALK(sadb, table) \
6518     sadb_walker((sadb).table, (sadb).sdb_hashsize, sadb_alg_update_cb, \
6519 	&update_state)
6520 
6521 void
6522 sadb_alg_update(ipsec_algtype_t alg_type, uint8_t alg_id, boolean_t is_added,
6523     netstack_t *ns)
6524 {
6525 	struct sadb_update_alg_state update_state;
6526 	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
6527 	ipsecesp_stack_t	*espstack = ns->netstack_ipsecesp;
6528 
6529 	update_state.alg_type = alg_type;
6530 	update_state.alg_id = alg_id;
6531 	update_state.is_added = is_added;
6532 
6533 	if (alg_type == IPSEC_ALG_AUTH) {
6534 		/* walk the AH tables only for auth. algorithm changes */
6535 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v4, sdb_of);
6536 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v4, sdb_if);
6537 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v6, sdb_of);
6538 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v6, sdb_if);
6539 	}
6540 
6541 	/* walk the ESP tables */
6542 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v4, sdb_of);
6543 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v4, sdb_if);
6544 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v6, sdb_of);
6545 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v6, sdb_if);
6546 }
6547 
6548 /*
6549  * Creates a context template for the specified SA. This function
6550  * is called when an SA is created and when a context template needs
6551  * to be created due to a change of software provider.
6552  */
6553 int
6554 ipsec_create_ctx_tmpl(ipsa_t *sa, ipsec_algtype_t alg_type)
6555 {
6556 	ipsec_alginfo_t *alg;
6557 	crypto_mechanism_t mech;
6558 	crypto_key_t *key;
6559 	crypto_ctx_template_t *sa_tmpl;
6560 	int rv;
6561 	ipsec_stack_t	*ipss = sa->ipsa_netstack->netstack_ipsec;
6562 
6563 	ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock));
6564 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
6565 
6566 	/* get pointers to the algorithm info, context template, and key */
6567 	switch (alg_type) {
6568 	case IPSEC_ALG_AUTH:
6569 		key = &sa->ipsa_kcfauthkey;
6570 		sa_tmpl = &sa->ipsa_authtmpl;
6571 		alg = ipss->ipsec_alglists[alg_type][sa->ipsa_auth_alg];
6572 		break;
6573 	case IPSEC_ALG_ENCR:
6574 		key = &sa->ipsa_kcfencrkey;
6575 		sa_tmpl = &sa->ipsa_encrtmpl;
6576 		alg = ipss->ipsec_alglists[alg_type][sa->ipsa_encr_alg];
6577 		break;
6578 	default:
6579 		alg = NULL;
6580 	}
6581 
6582 	if (alg == NULL || !ALG_VALID(alg))
6583 		return (EINVAL);
6584 
6585 	/* initialize the mech info structure for the framework */
6586 	ASSERT(alg->alg_mech_type != CRYPTO_MECHANISM_INVALID);
6587 	mech.cm_type = alg->alg_mech_type;
6588 	mech.cm_param = NULL;
6589 	mech.cm_param_len = 0;
6590 
6591 	/* create a new context template */
6592 	rv = crypto_create_ctx_template(&mech, key, sa_tmpl, KM_NOSLEEP);
6593 
6594 	/*
6595 	 * CRYPTO_MECH_NOT_SUPPORTED can be returned if only hardware
6596 	 * providers are available for that mechanism. In that case
6597 	 * we don't fail, and will generate the context template from
6598 	 * the framework callback when a software provider for that
6599 	 * mechanism registers.
6600 	 *
6601 	 * The context template is assigned the special value
6602 	 * IPSEC_CTX_TMPL_ALLOC if the allocation failed due to a
6603 	 * lack of memory. No attempt will be made to use
6604 	 * the context template if it is set to this value.
6605 	 */
6606 	if (rv == CRYPTO_HOST_MEMORY) {
6607 		*sa_tmpl = IPSEC_CTX_TMPL_ALLOC;
6608 	} else if (rv != CRYPTO_SUCCESS) {
6609 		*sa_tmpl = NULL;
6610 		if (rv != CRYPTO_MECH_NOT_SUPPORTED)
6611 			return (EINVAL);
6612 	}
6613 
6614 	return (0);
6615 }
6616 
6617 /*
6618  * Destroy the context template of the specified algorithm type
6619  * of the specified SA. Must be called while holding the SA lock.
6620  */
6621 void
6622 ipsec_destroy_ctx_tmpl(ipsa_t *sa, ipsec_algtype_t alg_type)
6623 {
6624 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
6625 
6626 	if (alg_type == IPSEC_ALG_AUTH) {
6627 		if (sa->ipsa_authtmpl == IPSEC_CTX_TMPL_ALLOC)
6628 			sa->ipsa_authtmpl = NULL;
6629 		else if (sa->ipsa_authtmpl != NULL) {
6630 			crypto_destroy_ctx_template(sa->ipsa_authtmpl);
6631 			sa->ipsa_authtmpl = NULL;
6632 		}
6633 	} else {
6634 		ASSERT(alg_type == IPSEC_ALG_ENCR);
6635 		if (sa->ipsa_encrtmpl == IPSEC_CTX_TMPL_ALLOC)
6636 			sa->ipsa_encrtmpl = NULL;
6637 		else if (sa->ipsa_encrtmpl != NULL) {
6638 			crypto_destroy_ctx_template(sa->ipsa_encrtmpl);
6639 			sa->ipsa_encrtmpl = NULL;
6640 		}
6641 	}
6642 }
6643 
6644 /*
6645  * Use the kernel crypto framework to check the validity of a key received
6646  * via keysock. Returns 0 if the key is OK, -1 otherwise.
6647  */
6648 int
6649 ipsec_check_key(crypto_mech_type_t mech_type, sadb_key_t *sadb_key,
6650     boolean_t is_auth, int *diag)
6651 {
6652 	crypto_mechanism_t mech;
6653 	crypto_key_t crypto_key;
6654 	int crypto_rc;
6655 
6656 	mech.cm_type = mech_type;
6657 	mech.cm_param = NULL;
6658 	mech.cm_param_len = 0;
6659 
6660 	crypto_key.ck_format = CRYPTO_KEY_RAW;
6661 	crypto_key.ck_data = sadb_key + 1;
6662 	crypto_key.ck_length = sadb_key->sadb_key_bits;
6663 
6664 	crypto_rc = crypto_key_check(&mech, &crypto_key);
6665 
6666 	switch (crypto_rc) {
6667 	case CRYPTO_SUCCESS:
6668 		return (0);
6669 	case CRYPTO_MECHANISM_INVALID:
6670 	case CRYPTO_MECH_NOT_SUPPORTED:
6671 		*diag = is_auth ? SADB_X_DIAGNOSTIC_BAD_AALG :
6672 		    SADB_X_DIAGNOSTIC_BAD_EALG;
6673 		break;
6674 	case CRYPTO_KEY_SIZE_RANGE:
6675 		*diag = is_auth ? SADB_X_DIAGNOSTIC_BAD_AKEYBITS :
6676 		    SADB_X_DIAGNOSTIC_BAD_EKEYBITS;
6677 		break;
6678 	case CRYPTO_WEAK_KEY:
6679 		*diag = is_auth ? SADB_X_DIAGNOSTIC_WEAK_AKEY :
6680 		    SADB_X_DIAGNOSTIC_WEAK_EKEY;
6681 		break;
6682 	}
6683 
6684 	return (-1);
6685 }
6686 /*
6687  * If this is an outgoing SA then add some fuzz to the
6688  * SOFT EXPIRE time. The reason for this is to stop
6689  * peers trying to renegotiate SOFT expiring SA's at
6690  * the same time. The amount of fuzz needs to be at
6691  * least 10 seconds which is the typical interval
6692  * sadb_ager(), although this is only a guide as it
6693  * selftunes.
6694  */
6695 void
6696 lifetime_fuzz(ipsa_t *assoc)
6697 {
6698 	uint8_t rnd;
6699 
6700 	if (assoc->ipsa_softaddlt == 0)
6701 		return;
6702 
6703 	(void) random_get_pseudo_bytes(&rnd, sizeof (rnd));
6704 	rnd = (rnd & 0xF) + 10;
6705 	assoc->ipsa_softexpiretime -= rnd;
6706 	assoc->ipsa_softaddlt -= rnd;
6707 }
6708 void
6709 destroy_ipsa_pair(ipsap_t *ipsapp)
6710 {
6711 	if (ipsapp == NULL)
6712 		return;
6713 
6714 	/*
6715 	 * Because of the multi-line macro nature of IPSA_REFRELE, keep
6716 	 * them in { }.
6717 	 */
6718 	if (ipsapp->ipsap_sa_ptr != NULL) {
6719 		IPSA_REFRELE(ipsapp->ipsap_sa_ptr);
6720 	}
6721 	if (ipsapp->ipsap_psa_ptr != NULL) {
6722 		IPSA_REFRELE(ipsapp->ipsap_psa_ptr);
6723 	}
6724 
6725 	kmem_free(ipsapp, sizeof (*ipsapp));
6726 }
6727 
6728 /*
6729  * The sadb_ager() function walks through the hash tables of SA's and ages
6730  * them, if the SA expires as a result, its marked as DEAD and will be reaped
6731  * the next time sadb_ager() runs. SA's which are paired or have a peer (same
6732  * SA appears in both the inbound and outbound tables because its not possible
6733  * to determine its direction) are placed on a list when they expire. This is
6734  * to ensure that pair/peer SA's are reaped at the same time, even if they
6735  * expire at different times.
6736  *
6737  * This function is called twice by sadb_ager(), one after processing the
6738  * inbound table, then again after processing the outbound table.
6739  */
6740 void
6741 age_pair_peer_list(templist_t *haspeerlist, sadb_t *sp, boolean_t outbound)
6742 {
6743 	templist_t *listptr;
6744 	int outhash;
6745 	isaf_t *bucket;
6746 	boolean_t haspeer;
6747 	ipsa_t *peer_assoc, *dying;
6748 	/*
6749 	 * Haspeer cases will contain both IPv4 and IPv6.  This code
6750 	 * is address independent.
6751 	 */
6752 	while (haspeerlist != NULL) {
6753 		/* "dying" contains the SA that has a peer. */
6754 		dying = haspeerlist->ipsa;
6755 		haspeer = (dying->ipsa_haspeer);
6756 		listptr = haspeerlist;
6757 		haspeerlist = listptr->next;
6758 		kmem_free(listptr, sizeof (*listptr));
6759 		/*
6760 		 * Pick peer bucket based on addrfam.
6761 		 */
6762 		if (outbound) {
6763 			if (haspeer)
6764 				bucket = INBOUND_BUCKET(sp, dying->ipsa_spi);
6765 			else
6766 				bucket = INBOUND_BUCKET(sp,
6767 				    dying->ipsa_otherspi);
6768 		} else { /* inbound */
6769 			if (haspeer) {
6770 				if (dying->ipsa_addrfam == AF_INET6) {
6771 					outhash = OUTBOUND_HASH_V6(sp,
6772 					    *((in6_addr_t *)&dying->
6773 					    ipsa_dstaddr));
6774 				} else {
6775 					outhash = OUTBOUND_HASH_V4(sp,
6776 					    *((ipaddr_t *)&dying->
6777 					    ipsa_dstaddr));
6778 				}
6779 			} else if (dying->ipsa_addrfam == AF_INET6) {
6780 				outhash = OUTBOUND_HASH_V6(sp,
6781 				    *((in6_addr_t *)&dying->
6782 				    ipsa_srcaddr));
6783 			} else {
6784 				outhash = OUTBOUND_HASH_V4(sp,
6785 				    *((ipaddr_t *)&dying->
6786 				    ipsa_srcaddr));
6787 			}
6788 		bucket = &(sp->sdb_of[outhash]);
6789 		}
6790 
6791 		mutex_enter(&bucket->isaf_lock);
6792 		/*
6793 		 * "haspeer" SA's have the same src/dst address ordering,
6794 		 * "paired" SA's have the src/dst addresses reversed.
6795 		 */
6796 		if (haspeer) {
6797 			peer_assoc = ipsec_getassocbyspi(bucket,
6798 			    dying->ipsa_spi, dying->ipsa_srcaddr,
6799 			    dying->ipsa_dstaddr, dying->ipsa_addrfam);
6800 		} else {
6801 			peer_assoc = ipsec_getassocbyspi(bucket,
6802 			    dying->ipsa_otherspi, dying->ipsa_dstaddr,
6803 			    dying->ipsa_srcaddr, dying->ipsa_addrfam);
6804 		}
6805 
6806 		mutex_exit(&bucket->isaf_lock);
6807 		if (peer_assoc != NULL) {
6808 			mutex_enter(&peer_assoc->ipsa_lock);
6809 			mutex_enter(&dying->ipsa_lock);
6810 			if (!haspeer) {
6811 				/*
6812 				 * Only SA's which have a "peer" or are
6813 				 * "paired" end up on this list, so this
6814 				 * must be a "paired" SA, update the flags
6815 				 * to break the pair.
6816 				 */
6817 				peer_assoc->ipsa_otherspi = 0;
6818 				peer_assoc->ipsa_flags &= ~IPSA_F_PAIRED;
6819 				dying->ipsa_otherspi = 0;
6820 				dying->ipsa_flags &= ~IPSA_F_PAIRED;
6821 			}
6822 			if (haspeer || outbound) {
6823 				/*
6824 				 * Update the state of the "inbound" SA when
6825 				 * the "outbound" SA has expired. Don't update
6826 				 * the "outbound" SA when the "inbound" SA
6827 				 * SA expires because setting the hard_addtime
6828 				 * below will cause this to happen.
6829 				 */
6830 				peer_assoc->ipsa_state = dying->ipsa_state;
6831 			}
6832 			if (dying->ipsa_state == IPSA_STATE_DEAD)
6833 				peer_assoc->ipsa_hardexpiretime = 1;
6834 
6835 			mutex_exit(&dying->ipsa_lock);
6836 			mutex_exit(&peer_assoc->ipsa_lock);
6837 			IPSA_REFRELE(peer_assoc);
6838 		}
6839 		IPSA_REFRELE(dying);
6840 	}
6841 }
6842