xref: /illumos-gate/usr/src/uts/common/inet/ipsec_impl.h (revision 2a6e99a0f1f7d22c0396e8b2ce9b9babbd1056cf)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  * Copyright (c) 2012 Nexenta Systems, Inc. All rights reserved.
25  * Copyright 2017 Joyent, Inc.
26  */
27 
28 #ifndef _INET_IPSEC_IMPL_H
29 #define	_INET_IPSEC_IMPL_H
30 
31 #include <inet/ip.h>
32 #include <inet/ipdrop.h>
33 
34 #ifdef	__cplusplus
35 extern "C" {
36 #endif
37 
38 #define	IPSEC_CONF_SRC_ADDRESS	0	/* Source Address */
39 #define	IPSEC_CONF_SRC_PORT		1	/* Source Port */
40 #define	IPSEC_CONF_DST_ADDRESS	2	/* Dest Address */
41 #define	IPSEC_CONF_DST_PORT		3	/* Dest Port */
42 #define	IPSEC_CONF_SRC_MASK		4	/* Source Address Mask */
43 #define	IPSEC_CONF_DST_MASK		5	/* Destination Address Mask */
44 #define	IPSEC_CONF_ULP			6	/* Upper layer Port */
45 #define	IPSEC_CONF_IPSEC_PROT	7	/* AH or ESP or AH_ESP */
46 #define	IPSEC_CONF_IPSEC_AALGS	8	/* Auth Algorithms - MD5 etc. */
47 #define	IPSEC_CONF_IPSEC_EALGS	9	/* Encr Algorithms - DES etc. */
48 #define	IPSEC_CONF_IPSEC_EAALGS	10	/* Encr Algorithms - MD5 etc. */
49 #define	IPSEC_CONF_IPSEC_SA		11	/* Shared or unique SA */
50 #define	IPSEC_CONF_IPSEC_DIR 		12	/* Direction of traffic */
51 #define	IPSEC_CONF_ICMP_TYPE 		13	/* ICMP type */
52 #define	IPSEC_CONF_ICMP_CODE 		14	/* ICMP code */
53 #define	IPSEC_CONF_NEGOTIATE		15	/* Negotiation */
54 #define	IPSEC_CONF_TUNNEL		16	/* Tunnel */
55 
56 /* Type of an entry */
57 
58 #define	IPSEC_NTYPES			0x02
59 #define	IPSEC_TYPE_OUTBOUND		0x00
60 #define	IPSEC_TYPE_INBOUND		0x01
61 
62 /* Policy */
63 #define	IPSEC_POLICY_APPLY	0x01
64 #define	IPSEC_POLICY_DISCARD	0x02
65 #define	IPSEC_POLICY_BYPASS	0x03
66 
67 /* Shared or unique SA */
68 #define	IPSEC_SHARED_SA		0x01
69 #define	IPSEC_UNIQUE_SA		0x02
70 
71 /* IPsec protocols and combinations */
72 #define	IPSEC_AH_ONLY		0x01
73 #define	IPSEC_ESP_ONLY		0x02
74 #define	IPSEC_AH_ESP		0x03
75 
76 /*
77  * Internally defined "any" algorithm.
78  * Move to PF_KEY v3 when that RFC is released.
79  */
80 #define	SADB_AALG_ANY 255
81 
82 #ifdef _KERNEL
83 
84 #include <inet/common.h>
85 #include <netinet/ip6.h>
86 #include <netinet/icmp6.h>
87 #include <net/pfkeyv2.h>
88 #include <inet/ip.h>
89 #include <inet/sadb.h>
90 #include <inet/ipsecah.h>
91 #include <inet/ipsecesp.h>
92 #include <sys/crypto/common.h>
93 #include <sys/crypto/api.h>
94 #include <sys/avl.h>
95 
96 /*
97  * Maximum number of authentication algorithms (can be indexed by one byte
98  * per PF_KEY and the IKE IPsec DOI.
99  */
100 #define	MAX_AALGS 256
101 
102 /*
103  * IPsec task queue constants.
104  */
105 #define	IPSEC_TASKQ_MIN 10
106 #define	IPSEC_TASKQ_MAX 20
107 
108 /*
109  * So we can access IPsec global variables that live in keysock.c.
110  */
111 extern boolean_t keysock_extended_reg(netstack_t *);
112 extern uint32_t keysock_next_seq(netstack_t *);
113 
114 /* Common-code for spdsock and keysock. */
115 extern void keysock_spdsock_wput_iocdata(queue_t *, mblk_t *, sa_family_t);
116 
117 /*
118  * Locking for ipsec policy rules:
119  *
120  * policy heads: system policy is static; per-conn polheads are dynamic,
121  * and refcounted (and inherited); use atomic refcounts and "don't let
122  * go with both hands".
123  *
124  * policy: refcounted; references from polhead, ipsec_out
125  *
126  * actions: refcounted; referenced from: action hash table, policy, ipsec_out
127  * selectors: refcounted; referenced from: selector hash table, policy.
128  */
129 
130 /*
131  * the following are inspired by, but not directly based on,
132  * some of the sys/queue.h type-safe pseudo-polymorphic macros
133  * found in BSD.
134  *
135  * XXX If we use these more generally, we'll have to make the names
136  * less generic (HASH_* will probably clobber other namespaces).
137  */
138 
139 #define	HASH_LOCK(table, hash) \
140 	mutex_enter(&(table)[hash].hash_lock)
141 #define	HASH_UNLOCK(table, hash) \
142 	mutex_exit(&(table)[hash].hash_lock)
143 
144 #define	HASH_LOCKED(table, hash) \
145 	MUTEX_HELD(&(table)[hash].hash_lock)
146 
147 #define	HASH_ITERATE(var, field, table, hash) 		\
148 	var = table[hash].hash_head; var != NULL; var = var->field.hash_next
149 
150 #define	HASH_NEXT(var, field) 		\
151 	(var)->field.hash_next
152 
153 #define	HASH_INSERT(var, field, table, hash)			\
154 {								\
155 	ASSERT(HASH_LOCKED(table, hash));			\
156 	(var)->field.hash_next = (table)[hash].hash_head;	\
157 	(var)->field.hash_pp = &(table)[hash].hash_head;	\
158 	(table)[hash].hash_head = var;				\
159 	if ((var)->field.hash_next != NULL)			\
160 		(var)->field.hash_next->field.hash_pp = 	\
161 			&((var)->field.hash_next); 		\
162 }
163 
164 
165 #define	HASH_UNCHAIN(var, field, table, hash)			\
166 {								\
167 	ASSERT(MUTEX_HELD(&(table)[hash].hash_lock));		\
168 	HASHLIST_UNCHAIN(var, field);				\
169 }
170 
171 #define	HASHLIST_INSERT(var, field, head)			\
172 {								\
173 	(var)->field.hash_next = head;				\
174 	(var)->field.hash_pp = &(head);				\
175 	head = var;						\
176 	if ((var)->field.hash_next != NULL)			\
177 		(var)->field.hash_next->field.hash_pp = 	\
178 			&((var)->field.hash_next); 		\
179 }
180 
181 #define	HASHLIST_UNCHAIN(var, field) 				\
182 {								\
183 	*var->field.hash_pp = var->field.hash_next;		\
184 	if (var->field.hash_next)				\
185 		var->field.hash_next->field.hash_pp = 		\
186 			var->field.hash_pp;			\
187 	HASH_NULL(var, field);					\
188 }
189 
190 
191 #define	HASH_NULL(var, field) 					\
192 {								\
193 	var->field.hash_next = NULL;				\
194 	var->field.hash_pp = NULL;				\
195 }
196 
197 #define	HASH_LINK(fieldname, type)				\
198 	struct {						\
199 		type *hash_next;				\
200 		type **hash_pp;					\
201 	} fieldname
202 
203 
204 #define	HASH_HEAD(tag)						\
205 	struct {						\
206 		struct tag *hash_head;				\
207 		kmutex_t hash_lock;				\
208 	}
209 
210 
211 typedef struct ipsec_policy_s ipsec_policy_t;
212 
213 typedef HASH_HEAD(ipsec_policy_s) ipsec_policy_hash_t;
214 
215 /*
216  * When adding new fields to ipsec_prot_t, make sure to update
217  * ipsec_in_to_out_action() as well as other code in spd.c
218  */
219 
220 typedef struct ipsec_prot
221 {
222 	unsigned int
223 		ipp_use_ah : 1,
224 		ipp_use_esp : 1,
225 		ipp_use_se : 1,
226 		ipp_use_unique : 1,
227 		ipp_use_espa : 1,
228 		ipp_pad : 27;
229 	uint8_t		ipp_auth_alg;		 /* DOI number */
230 	uint8_t		ipp_encr_alg;		 /* DOI number */
231 	uint8_t		ipp_esp_auth_alg;	 /* DOI number */
232 	uint16_t 	ipp_ah_minbits;		 /* AH: min keylen */
233 	uint16_t 	ipp_ah_maxbits;		 /* AH: max keylen */
234 	uint16_t	ipp_espe_minbits;	 /* ESP encr: min keylen */
235 	uint16_t	ipp_espe_maxbits;	 /* ESP encr: max keylen */
236 	uint16_t	ipp_espa_minbits;	 /* ESP auth: min keylen */
237 	uint16_t	ipp_espa_maxbits;	 /* ESP auth: max keylen */
238 	uint32_t	ipp_km_proto;		 /* key mgmt protocol */
239 	uint64_t	ipp_km_cookie;		 /* key mgmt cookie */
240 	uint32_t	ipp_replay_depth;	 /* replay window */
241 	/* XXX add lifetimes */
242 } ipsec_prot_t;
243 
244 #define	IPSEC_MAX_KEYBITS (0xffff)
245 
246 /*
247  * An individual policy action, possibly a member of a chain.
248  *
249  * Action chains may be shared between multiple policy rules.
250  *
251  * With one exception (IPSEC_POLICY_LOG), a chain consists of an
252  * ordered list of alternative ways to handle a packet.
253  *
254  * All actions are also "interned" into a hash table (to allow
255  * multiple rules with the same action chain to share one copy in
256  * memory).
257  */
258 
259 typedef struct ipsec_act
260 {
261 	uint8_t		ipa_type;
262 	uint8_t		ipa_log;
263 	union
264 	{
265 		ipsec_prot_t	ipau_apply;
266 		uint8_t		ipau_reject_type;
267 		uint32_t	ipau_resolve_id; /* magic cookie */
268 		uint8_t		ipau_log_type;
269 	} ipa_u;
270 #define	ipa_apply ipa_u.ipau_apply
271 #define	ipa_reject_type ipa_u.ipau_reject_type
272 #define	ipa_log_type ipa_u.ipau_log_type
273 #define	ipa_resolve_type ipa_u.ipau_resolve_type
274 } ipsec_act_t;
275 
276 #define	IPSEC_ACT_APPLY		0x01 /* match IPSEC_POLICY_APPLY */
277 #define	IPSEC_ACT_DISCARD	0x02 /* match IPSEC_POLICY_DISCARD */
278 #define	IPSEC_ACT_BYPASS	0x03 /* match IPSEC_POLICY_BYPASS */
279 #define	IPSEC_ACT_REJECT	0x04
280 #define	IPSEC_ACT_CLEAR		0x05
281 
282 typedef struct ipsec_action_s
283 {
284 	HASH_LINK(ipa_hash, struct ipsec_action_s);
285 	struct ipsec_action_s	*ipa_next;	/* next alternative */
286 	uint32_t		ipa_refs;		/* refcount */
287 	ipsec_act_t		ipa_act;
288 	/*
289 	 * The following bits are equivalent to an OR of bits included in the
290 	 * ipau_apply fields of this and subsequent actions in an
291 	 * action chain; this is an optimization for the sake of
292 	 * ipsec_out_process() in ip.c and a few other places.
293 	 */
294 	unsigned int
295 		ipa_hval: 8,
296 		ipa_allow_clear:1,		/* rule allows cleartext? */
297 		ipa_want_ah:1,			/* an action wants ah */
298 		ipa_want_esp:1,			/* an action wants esp */
299 		ipa_want_se:1,			/* an action wants se */
300 		ipa_want_unique:1,		/* want unique sa's */
301 		ipa_pad:19;
302 	uint32_t		ipa_ovhd;	/* per-packet encap ovhd */
303 } ipsec_action_t;
304 
305 #define	IPACT_REFHOLD(ipa) {			\
306 	atomic_inc_32(&(ipa)->ipa_refs);	\
307 	ASSERT((ipa)->ipa_refs != 0);	\
308 }
309 #define	IPACT_REFRELE(ipa) {					\
310 	ASSERT((ipa)->ipa_refs != 0);				\
311 	membar_exit();						\
312 	if (atomic_dec_32_nv(&(ipa)->ipa_refs) == 0)	\
313 		ipsec_action_free(ipa);				\
314 	(ipa) = 0;						\
315 }
316 
317 /*
318  * For now, use a trivially sized hash table for actions.
319  * In the future we can add the structure canonicalization necessary
320  * to get the hash function to behave correctly..
321  */
322 #define	IPSEC_ACTION_HASH_SIZE 1
323 
324 /*
325  * Merged address structure, for cheezy address-family independent
326  * matches in policy code.
327  */
328 
329 typedef union ipsec_addr
330 {
331 	in6_addr_t	ipsad_v6;
332 	in_addr_t	ipsad_v4;
333 } ipsec_addr_t;
334 
335 /*
336  * ipsec selector set, as used by the kernel policy structures.
337  * Note that that we specify "local" and "remote"
338  * rather than "source" and "destination", which allows the selectors
339  * for symmetric policy rules to be shared between inbound and
340  * outbound rules.
341  *
342  * "local" means "destination" on inbound, and "source" on outbound.
343  * "remote" means "source" on inbound, and "destination" on outbound.
344  * XXX if we add a fifth policy enforcement point for forwarded packets,
345  * what do we do?
346  *
347  * The ipsl_valid mask is not done as a bitfield; this is so we
348  * can use "ffs()" to find the "most interesting" valid tag.
349  *
350  * XXX should we have multiple types for space-conservation reasons?
351  * (v4 vs v6?  prefix vs. range)?
352  */
353 
354 typedef struct ipsec_selkey
355 {
356 	uint32_t	ipsl_valid;		/* bitmask of valid entries */
357 #define	IPSL_REMOTE_ADDR		0x00000001
358 #define	IPSL_LOCAL_ADDR			0x00000002
359 #define	IPSL_REMOTE_PORT		0x00000004
360 #define	IPSL_LOCAL_PORT			0x00000008
361 #define	IPSL_PROTOCOL			0x00000010
362 #define	IPSL_ICMP_TYPE			0x00000020
363 #define	IPSL_ICMP_CODE			0x00000040
364 #define	IPSL_IPV6			0x00000080
365 #define	IPSL_IPV4			0x00000100
366 
367 #define	IPSL_WILDCARD			0x0000007f
368 
369 	ipsec_addr_t	ipsl_local;
370 	ipsec_addr_t	ipsl_remote;
371 	uint16_t	ipsl_lport;
372 	uint16_t	ipsl_rport;
373 	/*
374 	 * ICMP type and code selectors. Both have an end value to
375 	 * specify ranges, or * and *_end are equal for a single
376 	 * value
377 	 */
378 	uint8_t		ipsl_icmp_type;
379 	uint8_t		ipsl_icmp_type_end;
380 	uint8_t		ipsl_icmp_code;
381 	uint8_t		ipsl_icmp_code_end;
382 
383 	uint8_t		ipsl_proto;		/* ip payload type */
384 	uint8_t		ipsl_local_pfxlen;	/* #bits of prefix */
385 	uint8_t		ipsl_remote_pfxlen;	/* #bits of prefix */
386 	uint8_t		ipsl_mbz;
387 
388 	/* Insert new elements above this line */
389 	uint32_t	ipsl_pol_hval;
390 	uint32_t	ipsl_sel_hval;
391 } ipsec_selkey_t;
392 
393 typedef struct ipsec_sel
394 {
395 	HASH_LINK(ipsl_hash, struct ipsec_sel);
396 	uint32_t	ipsl_refs;		/* # refs to this sel */
397 	ipsec_selkey_t	ipsl_key;		/* actual selector guts */
398 } ipsec_sel_t;
399 
400 /*
401  * One policy rule.  This will be linked into a single hash chain bucket in
402  * the parent rule structure.  If the selector is simple enough to
403  * allow hashing, it gets filed under ipsec_policy_root_t->ipr_hash.
404  * Otherwise it goes onto a linked list in ipsec_policy_root_t->ipr_nonhash[af]
405  *
406  * In addition, we file the rule into an avl tree keyed by the rule index.
407  * (Duplicate rules are permitted; the comparison function breaks ties).
408  */
409 struct ipsec_policy_s
410 {
411 	HASH_LINK(ipsp_hash, struct ipsec_policy_s);
412 	avl_node_t		ipsp_byid;
413 	uint64_t		ipsp_index;	/* unique id */
414 	uint32_t		ipsp_prio; 	/* rule priority */
415 	uint32_t		ipsp_refs;
416 	ipsec_sel_t		*ipsp_sel;	/* selector set (shared) */
417 	ipsec_action_t		*ipsp_act; 	/* action (may be shared) */
418 	netstack_t		*ipsp_netstack;	/* No netstack_hold */
419 };
420 
421 #define	IPPOL_REFHOLD(ipp) {			\
422 	atomic_inc_32(&(ipp)->ipsp_refs);	\
423 	ASSERT((ipp)->ipsp_refs != 0);		\
424 }
425 #define	IPPOL_REFRELE(ipp) {					\
426 	ASSERT((ipp)->ipsp_refs != 0);				\
427 	membar_exit();						\
428 	if (atomic_dec_32_nv(&(ipp)->ipsp_refs) == 0)	\
429 		ipsec_policy_free(ipp);				\
430 	(ipp) = 0;						\
431 }
432 
433 #define	IPPOL_UNCHAIN(php, ip)					\
434 	HASHLIST_UNCHAIN((ip), ipsp_hash);			\
435 	avl_remove(&(php)->iph_rulebyid, (ip));			\
436 	IPPOL_REFRELE(ip);
437 
438 /*
439  * Policy ruleset.  One per (protocol * direction) for system policy.
440  */
441 
442 #define	IPSEC_AF_V4	0
443 #define	IPSEC_AF_V6	1
444 #define	IPSEC_NAF	2
445 
446 typedef struct ipsec_policy_root_s
447 {
448 	ipsec_policy_t		*ipr_nonhash[IPSEC_NAF];
449 	int			ipr_nchains;
450 	ipsec_policy_hash_t 	*ipr_hash;
451 } ipsec_policy_root_t;
452 
453 /*
454  * Policy head.  One for system policy; there may also be one present
455  * on ill_t's with interface-specific policy, as well as one present
456  * for sockets with per-socket policy allocated.
457  */
458 
459 typedef struct ipsec_policy_head_s
460 {
461 	uint32_t	iph_refs;
462 	krwlock_t	iph_lock;
463 	uint64_t	iph_gen; /* generation number */
464 	ipsec_policy_root_t iph_root[IPSEC_NTYPES];
465 	avl_tree_t	iph_rulebyid;
466 } ipsec_policy_head_t;
467 
468 #define	IPPH_REFHOLD(iph) {			\
469 	atomic_inc_32(&(iph)->iph_refs);	\
470 	ASSERT((iph)->iph_refs != 0);		\
471 }
472 #define	IPPH_REFRELE(iph, ns) {					\
473 	ASSERT((iph)->iph_refs != 0);				\
474 	membar_exit();						\
475 	if (atomic_dec_32_nv(&(iph)->iph_refs) == 0)	\
476 		ipsec_polhead_free(iph, ns);			\
477 	(iph) = 0;						\
478 }
479 
480 /*
481  * IPsec fragment related structures
482  */
483 
484 typedef struct ipsec_fragcache_entry {
485 	struct ipsec_fragcache_entry *itpfe_next;	/* hash list chain */
486 	mblk_t *itpfe_fraglist;			/* list of fragments */
487 	time_t itpfe_exp;			/* time when entry is stale */
488 	int itpfe_depth;			/* # of fragments in list */
489 	ipsec_addr_t itpfe_frag_src;
490 	ipsec_addr_t itpfe_frag_dst;
491 #define	itpfe_src itpfe_frag_src.ipsad_v4
492 #define	itpfe_src6 itpfe_frag_src.ipsad_v6
493 #define	itpfe_dst itpfe_frag_dst.ipsad_v4
494 #define	itpfe_dst6 itpfe_frag_dst.ipsad_v6
495 	uint32_t itpfe_id;			/* IP datagram ID */
496 	uint8_t itpfe_proto;			/* IP Protocol */
497 	uint8_t itpfe_last;			/* Last packet */
498 } ipsec_fragcache_entry_t;
499 
500 typedef struct ipsec_fragcache {
501 	kmutex_t itpf_lock;
502 	struct ipsec_fragcache_entry **itpf_ptr;
503 	struct ipsec_fragcache_entry *itpf_freelist;
504 	time_t itpf_expire_hint;	/* time when oldest entry is stale */
505 } ipsec_fragcache_t;
506 
507 /*
508  * Tunnel policies.  We keep a minature of the transport-mode/global policy
509  * per each tunnel instance.
510  *
511  * People who need both an itp held down AND one of its polheads need to
512  * first lock the itp, THEN the polhead, otherwise deadlock WILL occur.
513  */
514 typedef struct ipsec_tun_pol_s {
515 	avl_node_t itp_node;
516 	kmutex_t itp_lock;
517 	uint64_t itp_next_policy_index;
518 	ipsec_policy_head_t *itp_policy;
519 	ipsec_policy_head_t *itp_inactive;
520 	uint32_t itp_flags;
521 	uint32_t itp_refcnt;
522 	char itp_name[LIFNAMSIZ];
523 	ipsec_fragcache_t itp_fragcache;
524 } ipsec_tun_pol_t;
525 /* NOTE - Callers (tun code) synchronize their own instances for these flags. */
526 #define	ITPF_P_ACTIVE 0x1	/* Are we using IPsec right now? */
527 #define	ITPF_P_TUNNEL 0x2	/* Negotiate tunnel-mode */
528 /* Optimization -> Do we have per-port security entries in this polhead? */
529 #define	ITPF_P_PER_PORT_SECURITY 0x4
530 #define	ITPF_PFLAGS 0x7
531 #define	ITPF_SHIFT 3
532 
533 #define	ITPF_I_ACTIVE 0x8	/* Is the inactive using IPsec right now? */
534 #define	ITPF_I_TUNNEL 0x10	/* Negotiate tunnel-mode (on inactive) */
535 /* Optimization -> Do we have per-port security entries in this polhead? */
536 #define	ITPF_I_PER_PORT_SECURITY 0x20
537 #define	ITPF_IFLAGS 0x38
538 
539 /* NOTE:  f cannot be an expression. */
540 #define	ITPF_CLONE(f) (f) = (((f) & ITPF_PFLAGS) | \
541 	    (((f) & ITPF_PFLAGS) << ITPF_SHIFT));
542 #define	ITPF_SWAP(f) (f) = ((((f) & ITPF_PFLAGS) << ITPF_SHIFT) | \
543 	    (((f) & ITPF_IFLAGS) >> ITPF_SHIFT))
544 
545 #define	ITP_P_ISACTIVE(itp, iph) ((itp)->itp_flags & \
546 	(((itp)->itp_policy == (iph)) ? ITPF_P_ACTIVE : ITPF_I_ACTIVE))
547 
548 #define	ITP_P_ISTUNNEL(itp, iph) ((itp)->itp_flags & \
549 	(((itp)->itp_policy == (iph)) ? ITPF_P_TUNNEL : ITPF_I_TUNNEL))
550 
551 #define	ITP_P_ISPERPORT(itp, iph) ((itp)->itp_flags & \
552 	(((itp)->itp_policy == (iph)) ? ITPF_P_PER_PORT_SECURITY : \
553 	ITPF_I_PER_PORT_SECURITY))
554 
555 #define	ITP_REFHOLD(itp) { \
556 	atomic_inc_32(&((itp)->itp_refcnt));	\
557 	ASSERT((itp)->itp_refcnt != 0); \
558 }
559 
560 #define	ITP_REFRELE(itp, ns) { \
561 	ASSERT((itp)->itp_refcnt != 0); \
562 	membar_exit(); \
563 	if (atomic_dec_32_nv(&((itp)->itp_refcnt)) == 0) \
564 		itp_free(itp, ns); \
565 }
566 
567 /*
568  * Certificate identity.
569  */
570 
571 typedef struct ipsid_s
572 {
573 	struct ipsid_s *ipsid_next;
574 	struct ipsid_s **ipsid_ptpn;
575 	uint32_t	ipsid_refcnt;
576 	int		ipsid_type;	/* id type */
577 	char 		*ipsid_cid;	/* certificate id string */
578 } ipsid_t;
579 
580 /*
581  * ipsid_t reference hold/release macros, just like ipsa versions.
582  */
583 
584 #define	IPSID_REFHOLD(ipsid) {			\
585 	atomic_inc_32(&(ipsid)->ipsid_refcnt);	\
586 	ASSERT((ipsid)->ipsid_refcnt != 0);	\
587 }
588 
589 /*
590  * Decrement the reference count on the ID.  Someone else will clean up
591  * after us later.
592  */
593 
594 #define	IPSID_REFRELE(ipsid) {					\
595 	membar_exit();						\
596 	atomic_dec_32(&(ipsid)->ipsid_refcnt);		\
597 }
598 
599 /*
600  * Following are the estimates of what the maximum AH and ESP header size
601  * would be. This is used to tell the upper layer the right value of MSS
602  * it should use without consulting AH/ESP. If the size is something
603  * different from this, ULP will learn the right one through
604  * ICMP_FRAGMENTATION_NEEDED messages generated locally.
605  *
606  * AH : 12 bytes of constant header + 32 bytes of ICV checksum (SHA-512).
607  */
608 #define	IPSEC_MAX_AH_HDR_SIZE   (44)
609 
610 /*
611  * ESP : Is a bit more complex...
612  *
613  * A system of one inequality and one equation MUST be solved for proper ESP
614  * overhead.  The inequality is:
615  *
616  *    MTU - sizeof (IP header + options) >=
617  *		sizeof (esph_t) + sizeof (IV or ctr) + data-size + 2 + ICV
618  *
619  * IV or counter is almost always the cipher's block size.  The equation is:
620  *
621  *    data-size % block-size = (block-size - 2)
622  *
623  * so we can put as much data into the datagram as possible.  If we are
624  * pessimistic and include our largest overhead cipher (AES) and hash
625  * (SHA-512), and assume 1500-byte MTU minus IPv4 overhead of 20 bytes, we get:
626  *
627  *    1480 >= 8 + 16 + data-size + 2 + 32
628  *    1480 >= 58 + data-size
629  *    1422 >= data-size,      1422 % 16 = 14, so 58 is the overhead!
630  *
631  * But, let's re-run the numbers with the same algorithms, but with an IPv6
632  * header:
633  *
634  *    1460 >= 58 + data-size
635  *    1402 >= data-size,     1402 % 16 = 10, meaning shrink to 1390 to get 14,
636  *
637  * which means the overhead is now 70.
638  *
639  * Hmmm... IPv4 headers can never be anything other than multiples of 4-bytes,
640  * and IPv6 ones can never be anything other than multiples of 8-bytes.  We've
641  * seen overheads of 58 and 70.  58 % 16 == 10, and 70 % 16 == 6.  IPv4 could
642  * force us to have 62 ( % 16 == 14) or 66 ( % 16 == 2), or IPv6 could force us
643  * to have 78 ( % 16 = 14).  Let's compute IPv6 + 8-bytes of options:
644  *
645  *    1452 >= 58 + data-size
646  *    1394 >= data-size,     1394 % 16 = 2, meaning shrink to 1390 to get 14,
647  *
648  * Aha!  The "ESP overhead" shrinks to 62 (70 - 8).  This is good.  Let's try
649  * IPv4 + 8 bytes of IPv4 options:
650  *
651  *    1472 >= 58 + data-size
652  *    1414 >= data-size,      1414 % 16 = 6, meaning shrink to 1406,
653  *
654  * meaning 66 is the overhead.  Let's try 12 bytes:
655  *
656  *    1468 >= 58 + data-size
657  *    1410 >= data-size,      1410 % 16 = 2, meaning also shrink to 1406,
658  *
659  * meaning 62 is the overhead.  How about 16 bytes?
660  *
661  *    1464 >= 58 + data-size
662  *    1406 >= data-size,      1402 % 16 = 14, which is great!
663  *
664  * this means 58 is the overhead.  If I wrap and add 20 bytes, it looks just
665  * like IPv6's 70 bytes.  If I add 24, we go back to 66 bytes.
666  *
667  * So picking 70 is a sensible, conservative default.  Optimal calculations
668  * will depend on knowing pre-ESP header length (called "divpoint" in the ESP
669  * code), which could be cached in the conn_t for connected endpoints, or
670  * which must be computed on every datagram otherwise.
671  */
672 #define	IPSEC_MAX_ESP_HDR_SIZE  (70)
673 
674 /*
675  * Alternate, when we know the crypto block size via the SA.  Assume an ICV on
676  * the SA.  Use:
677  *
678  * sizeof (esph_t) + 2 * (sizeof (IV/counter)) - 2 + sizeof (ICV).  The "-2"
679  * discounts the overhead of the pad + padlen that gets swallowed up by the
680  * second (theoretically all-pad) cipher-block.  If you use our examples of
681  * AES and SHA512, you get:
682  *
683  *    8 + 32 - 2 + 32 == 70.
684  *
685  * Which is our pre-computed maximum above.
686  */
687 #include <inet/ipsecesp.h>
688 #define	IPSEC_BASE_ESP_HDR_SIZE(sa) \
689 	(sizeof (esph_t) + ((sa)->ipsa_iv_len << 1) - 2 + (sa)->ipsa_mac_len)
690 
691 /*
692  * Identity hash table.
693  *
694  * Identities are refcounted and "interned" into the hash table.
695  * Only references coming from other objects (SA's, latching state)
696  * are counted in ipsid_refcnt.
697  *
698  * Locking: IPSID_REFHOLD is safe only when (a) the object's hash bucket
699  * is locked, (b) we know that the refcount must be > 0.
700  *
701  * The ipsid_next and ipsid_ptpn fields are only to be referenced or
702  * modified when the bucket lock is held; in particular, we only
703  * delete objects while holding the bucket lock, and we only increase
704  * the refcount from 0 to 1 while the bucket lock is held.
705  */
706 
707 #define	IPSID_HASHSIZE 64
708 
709 typedef struct ipsif_s
710 {
711 	ipsid_t *ipsif_head;
712 	kmutex_t ipsif_lock;
713 } ipsif_t;
714 
715 /*
716  * For call to the kernel crypto framework. State needed during
717  * the execution of a crypto request.
718  */
719 typedef struct ipsec_crypto_s {
720 	size_t		ic_skip_len;		/* len to skip for AH auth */
721 	crypto_data_t	ic_crypto_data;		/* single op crypto data */
722 	crypto_dual_data_t ic_crypto_dual_data; /* for dual ops */
723 	crypto_data_t	ic_crypto_mac;		/* to store the MAC */
724 	ipsa_cm_mech_t	ic_cmm;
725 } ipsec_crypto_t;
726 
727 /*
728  * IPsec stack instances
729  */
730 struct ipsec_stack {
731 	netstack_t		*ipsec_netstack;	/* Common netstack */
732 
733 	/* Packet dropper for IP IPsec processing failures */
734 	ipdropper_t		ipsec_dropper;
735 
736 /* From spd.c */
737 	/*
738 	 * Policy rule index generator.  We assume this won't wrap in the
739 	 * lifetime of a system.  If we make 2^20 policy changes per second,
740 	 * this will last 2^44 seconds, or roughly 500,000 years, so we don't
741 	 * have to worry about reusing policy index values.
742 	 */
743 	uint64_t		ipsec_next_policy_index;
744 
745 	HASH_HEAD(ipsec_action_s) ipsec_action_hash[IPSEC_ACTION_HASH_SIZE];
746 	HASH_HEAD(ipsec_sel)	  *ipsec_sel_hash;
747 	uint32_t		ipsec_spd_hashsize;
748 
749 	ipsif_t			ipsec_ipsid_buckets[IPSID_HASHSIZE];
750 
751 	/*
752 	 * Active & Inactive system policy roots
753 	 */
754 	ipsec_policy_head_t	ipsec_system_policy;
755 	ipsec_policy_head_t	ipsec_inactive_policy;
756 
757 	/* Packet dropper for generic SPD drops. */
758 	ipdropper_t		ipsec_spd_dropper;
759 
760 /* ipdrop.c */
761 	kstat_t			*ipsec_ip_drop_kstat;
762 	struct ip_dropstats	*ipsec_ip_drop_types;
763 
764 /* spd.c */
765 	/*
766 	 * Have a counter for every possible policy message in
767 	 * ipsec_policy_failure_msgs
768 	 */
769 	uint32_t		ipsec_policy_failure_count[IPSEC_POLICY_MAX];
770 	/* Time since last ipsec policy failure that printed a message. */
771 	hrtime_t		ipsec_policy_failure_last;
772 
773 /* ip_spd.c */
774 	/* stats */
775 	kstat_t			*ipsec_ksp;
776 	struct ipsec_kstats_s	*ipsec_kstats;
777 
778 /* sadb.c */
779 	/* Packet dropper for generic SADB drops. */
780 	ipdropper_t		ipsec_sadb_dropper;
781 
782 /* spd.c */
783 	boolean_t		ipsec_inbound_v4_policy_present;
784 	boolean_t		ipsec_outbound_v4_policy_present;
785 	boolean_t		ipsec_inbound_v6_policy_present;
786 	boolean_t		ipsec_outbound_v6_policy_present;
787 
788 /* spd.c */
789 	/*
790 	 * Because policy needs to know what algorithms are supported, keep the
791 	 * lists of algorithms here.
792 	 */
793 	krwlock_t 		ipsec_alg_lock;
794 
795 	uint8_t			ipsec_nalgs[IPSEC_NALGTYPES];
796 	ipsec_alginfo_t	*ipsec_alglists[IPSEC_NALGTYPES][IPSEC_MAX_ALGS];
797 
798 	uint8_t		ipsec_sortlist[IPSEC_NALGTYPES][IPSEC_MAX_ALGS];
799 
800 	int		ipsec_algs_exec_mode[IPSEC_NALGTYPES];
801 
802 	uint32_t 	ipsec_tun_spd_hashsize;
803 	/*
804 	 * Tunnel policies - AVL tree indexed by tunnel name.
805 	 */
806 	krwlock_t 	ipsec_tunnel_policy_lock;
807 	uint64_t	ipsec_tunnel_policy_gen;
808 	avl_tree_t	ipsec_tunnel_policies;
809 
810 /* ipsec_loader.c */
811 	kmutex_t	ipsec_loader_lock;
812 	int		ipsec_loader_state;
813 	int		ipsec_loader_sig;
814 	kt_did_t	ipsec_loader_tid;
815 	kcondvar_t	ipsec_loader_sig_cv;	/* For loader_sig conditions. */
816 
817 };
818 typedef struct ipsec_stack ipsec_stack_t;
819 
820 /* Handle the kstat_create in ip_drop_init() failing */
821 #define	DROPPER(_ipss, _dropper) \
822 	(((_ipss)->ipsec_ip_drop_types == NULL) ? NULL : \
823 	&((_ipss)->ipsec_ip_drop_types->_dropper))
824 
825 /*
826  * Loader states..
827  */
828 #define	IPSEC_LOADER_WAIT	0
829 #define	IPSEC_LOADER_FAILED	-1
830 #define	IPSEC_LOADER_SUCCEEDED	1
831 
832 /*
833  * ipsec_loader entrypoints.
834  */
835 extern void ipsec_loader_init(ipsec_stack_t *);
836 extern void ipsec_loader_start(ipsec_stack_t *);
837 extern void ipsec_loader_destroy(ipsec_stack_t *);
838 extern void ipsec_loader_loadnow(ipsec_stack_t *);
839 extern boolean_t ipsec_loader_wait(queue_t *q, ipsec_stack_t *);
840 extern boolean_t ipsec_loaded(ipsec_stack_t *);
841 extern boolean_t ipsec_failed(ipsec_stack_t *);
842 
843 /*
844  * ipsec policy entrypoints (spd.c)
845  */
846 
847 extern void ipsec_policy_g_destroy(void);
848 extern void ipsec_policy_g_init(void);
849 
850 extern mblk_t	*ipsec_add_crypto_data(mblk_t *, ipsec_crypto_t **);
851 extern mblk_t	*ipsec_remove_crypto_data(mblk_t *, ipsec_crypto_t **);
852 extern mblk_t	*ipsec_free_crypto_data(mblk_t *);
853 extern int ipsec_alloc_table(ipsec_policy_head_t *, int, int, boolean_t,
854     netstack_t *);
855 extern void ipsec_polhead_init(ipsec_policy_head_t *, int);
856 extern void ipsec_polhead_destroy(ipsec_policy_head_t *);
857 extern void ipsec_polhead_free_table(ipsec_policy_head_t *);
858 extern mblk_t *ipsec_check_global_policy(mblk_t *, conn_t *, ipha_t *,
859     ip6_t *, ip_recv_attr_t *, netstack_t *ns);
860 extern mblk_t *ipsec_check_inbound_policy(mblk_t *, conn_t *, ipha_t *, ip6_t *,
861     ip_recv_attr_t *);
862 
863 extern boolean_t ipsec_in_to_out(ip_recv_attr_t *, ip_xmit_attr_t *,
864     mblk_t *, ipha_t *, ip6_t *);
865 extern void ipsec_in_release_refs(ip_recv_attr_t *);
866 extern void ipsec_out_release_refs(ip_xmit_attr_t *);
867 extern void ipsec_log_policy_failure(int, char *, ipha_t *, ip6_t *, boolean_t,
868     netstack_t *);
869 extern boolean_t ipsec_inbound_accept_clear(mblk_t *, ipha_t *, ip6_t *);
870 extern int ipsec_conn_cache_policy(conn_t *, boolean_t);
871 extern void ipsec_cache_outbound_policy(const conn_t *, const in6_addr_t *,
872     const in6_addr_t *, in_port_t, ip_xmit_attr_t *);
873 extern boolean_t ipsec_outbound_policy_current(ip_xmit_attr_t *);
874 extern ipsec_action_t *ipsec_in_to_out_action(ip_recv_attr_t *);
875 extern void ipsec_latch_inbound(conn_t *connp, ip_recv_attr_t *ira);
876 
877 extern void ipsec_policy_free(ipsec_policy_t *);
878 extern void ipsec_action_free(ipsec_action_t *);
879 extern void ipsec_polhead_free(ipsec_policy_head_t *, netstack_t *);
880 extern ipsec_policy_head_t *ipsec_polhead_split(ipsec_policy_head_t *,
881     netstack_t *);
882 extern ipsec_policy_head_t *ipsec_polhead_create(void);
883 extern ipsec_policy_head_t *ipsec_system_policy(netstack_t *);
884 extern ipsec_policy_head_t *ipsec_inactive_policy(netstack_t *);
885 extern void ipsec_swap_policy(ipsec_policy_head_t *, ipsec_policy_head_t *,
886     netstack_t *);
887 extern void ipsec_swap_global_policy(netstack_t *);
888 
889 extern int ipsec_clone_system_policy(netstack_t *);
890 extern ipsec_policy_t *ipsec_policy_create(ipsec_selkey_t *,
891     const ipsec_act_t *, int, int, uint64_t *, netstack_t *);
892 extern boolean_t ipsec_policy_delete(ipsec_policy_head_t *,
893     ipsec_selkey_t *, int, netstack_t *);
894 extern int ipsec_policy_delete_index(ipsec_policy_head_t *, uint64_t,
895     netstack_t *);
896 extern boolean_t ipsec_polhead_insert(ipsec_policy_head_t *, ipsec_act_t *,
897     uint_t, int, int, netstack_t *);
898 extern void ipsec_polhead_flush(ipsec_policy_head_t *, netstack_t *);
899 extern int ipsec_copy_polhead(ipsec_policy_head_t *, ipsec_policy_head_t *,
900     netstack_t *);
901 extern void ipsec_actvec_from_req(const ipsec_req_t *, ipsec_act_t **, uint_t *,
902     netstack_t *);
903 extern void ipsec_actvec_free(ipsec_act_t *, uint_t);
904 extern int ipsec_req_from_head(ipsec_policy_head_t *, ipsec_req_t *, int);
905 extern mblk_t *ipsec_construct_inverse_acquire(sadb_msg_t *, sadb_ext_t **,
906     netstack_t *);
907 extern ipsec_policy_t *ipsec_find_policy(int, const conn_t *,
908     ipsec_selector_t *, netstack_t *);
909 extern ipsid_t *ipsid_lookup(int, char *, netstack_t *);
910 extern boolean_t ipsid_equal(ipsid_t *, ipsid_t *);
911 extern void ipsid_gc(netstack_t *);
912 extern void ipsec_latch_ids(ipsec_latch_t *, ipsid_t *, ipsid_t *);
913 
914 extern void ipsec_config_flush(netstack_t *);
915 extern boolean_t ipsec_check_policy(ipsec_policy_head_t *, ipsec_policy_t *,
916     int);
917 extern void ipsec_enter_policy(ipsec_policy_head_t *, ipsec_policy_t *, int,
918     netstack_t *);
919 extern boolean_t ipsec_check_action(ipsec_act_t *, int *, netstack_t *);
920 
921 extern void iplatch_free(ipsec_latch_t *);
922 extern ipsec_latch_t *iplatch_create(void);
923 extern int ipsec_set_req(cred_t *, conn_t *, ipsec_req_t *);
924 
925 extern void ipsec_insert_always(avl_tree_t *tree, void *new_node);
926 
927 extern int32_t ipsec_act_ovhd(const ipsec_act_t *act);
928 extern mblk_t *sadb_whack_label(mblk_t *, ipsa_t *, ip_xmit_attr_t *,
929     kstat_named_t *, ipdropper_t *);
930 extern mblk_t *sadb_whack_label_v4(mblk_t *, ipsa_t *, kstat_named_t *,
931     ipdropper_t *);
932 extern mblk_t *sadb_whack_label_v6(mblk_t *, ipsa_t *, kstat_named_t *,
933     ipdropper_t *);
934 extern boolean_t update_iv(uint8_t *, queue_t *, ipsa_t *, ipsecesp_stack_t *);
935 
936 /*
937  * Tunnel-support SPD functions and variables.
938  */
939 struct iptun_s;	/* Defined in inet/iptun/iptun_impl.h. */
940 extern mblk_t *ipsec_tun_inbound(ip_recv_attr_t *, mblk_t *,  ipsec_tun_pol_t *,
941     ipha_t *, ip6_t *, ipha_t *, ip6_t *, int, netstack_t *);
942 extern mblk_t *ipsec_tun_outbound(mblk_t *, struct iptun_s *, ipha_t *,
943     ip6_t *, ipha_t *, ip6_t *, int, ip_xmit_attr_t *);
944 extern void itp_free(ipsec_tun_pol_t *, netstack_t *);
945 extern ipsec_tun_pol_t *create_tunnel_policy(char *, int *, uint64_t *,
946     netstack_t *);
947 extern ipsec_tun_pol_t *get_tunnel_policy(char *, netstack_t *);
948 extern void itp_unlink(ipsec_tun_pol_t *, netstack_t *);
949 extern void itp_walk(void (*)(ipsec_tun_pol_t *, void *, netstack_t *),
950     void *, netstack_t *);
951 
952 extern ipsec_tun_pol_t *itp_get_byaddr(uint32_t *, uint32_t *, int,
953     ip_stack_t *);
954 
955 /*
956  * IPsec AH/ESP functions called from IP or the common SADB code in AH.
957  */
958 
959 extern void ipsecah_in_assocfailure(mblk_t *, char, ushort_t, char *,
960     uint32_t, void *, int, ip_recv_attr_t *ira);
961 extern void ipsecesp_in_assocfailure(mblk_t *, char, ushort_t, char *,
962     uint32_t, void *, int, ip_recv_attr_t *ira);
963 extern void ipsecesp_send_keepalive(ipsa_t *);
964 
965 /*
966  * Algorithm management helper functions.
967  */
968 extern boolean_t ipsec_valid_key_size(uint16_t, ipsec_alginfo_t *);
969 
970 /*
971  * Per-socket policy, for now, takes precedence... this priority value
972  * insures it.
973  */
974 #define	IPSEC_PRIO_SOCKET		0x1000000
975 
976 /* DDI initialization functions. */
977 extern	boolean_t    ipsecesp_ddi_init(void);
978 extern	boolean_t    ipsecah_ddi_init(void);
979 extern	boolean_t    keysock_ddi_init(void);
980 extern	boolean_t    spdsock_ddi_init(void);
981 
982 extern	void    ipsecesp_ddi_destroy(void);
983 extern	void    ipsecah_ddi_destroy(void);
984 extern	void	keysock_ddi_destroy(void);
985 extern	void    spdsock_ddi_destroy(void);
986 
987 /*
988  * AH- and ESP-specific functions that are called directly by other modules.
989  */
990 extern void ipsecah_fill_defs(struct sadb_x_ecomb *, netstack_t *);
991 extern void ipsecesp_fill_defs(struct sadb_x_ecomb *, netstack_t *);
992 extern void ipsecah_algs_changed(netstack_t *);
993 extern void ipsecesp_algs_changed(netstack_t *);
994 extern void ipsecesp_init_funcs(ipsa_t *);
995 extern void ipsecah_init_funcs(ipsa_t *);
996 extern mblk_t *ipsecah_icmp_error(mblk_t *, ip_recv_attr_t *);
997 extern mblk_t *ipsecesp_icmp_error(mblk_t *, ip_recv_attr_t *);
998 
999 /*
1000  * spdsock functions that are called directly by IP.
1001  */
1002 extern void spdsock_update_pending_algs(netstack_t *);
1003 
1004 /*
1005  * IP functions that are called from AH and ESP.
1006  */
1007 extern boolean_t ipsec_outbound_sa(mblk_t *, ip_xmit_attr_t *, uint_t);
1008 extern mblk_t *ipsec_inbound_esp_sa(mblk_t *, ip_recv_attr_t *, esph_t **);
1009 extern mblk_t *ipsec_inbound_ah_sa(mblk_t *, ip_recv_attr_t *, ah_t **);
1010 extern ipsec_policy_t *ipsec_find_policy_head(ipsec_policy_t *,
1011     ipsec_policy_head_t *, int, ipsec_selector_t *);
1012 
1013 /*
1014  * IP dropper init/destroy.
1015  */
1016 void ip_drop_init(ipsec_stack_t *);
1017 void ip_drop_destroy(ipsec_stack_t *);
1018 
1019 /*
1020  * Common functions
1021  */
1022 extern boolean_t ip_addr_match(uint8_t *, int, in6_addr_t *);
1023 extern boolean_t ipsec_label_match(ts_label_t *, ts_label_t *);
1024 
1025 /*
1026  * AH and ESP counters types.
1027  */
1028 typedef uint32_t ah_counter;
1029 typedef uint32_t esp_counter;
1030 
1031 #endif /* _KERNEL */
1032 
1033 #ifdef	__cplusplus
1034 }
1035 #endif
1036 
1037 #endif	/* _INET_IPSEC_IMPL_H */
1038