xref: /linux/net/xfrm/xfrm_state.c (revision c537b994505099b7197e7d3125b942ecbcc51eb6)
1 /*
2  * xfrm_state.c
3  *
4  * Changes:
5  *	Mitsuru KANDA @USAGI
6  * 	Kazunori MIYAZAWA @USAGI
7  * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  * 		IPv6 support
9  * 	YOSHIFUJI Hideaki @USAGI
10  * 		Split up af-specific functions
11  *	Derek Atkins <derek@ihtfp.com>
12  *		Add UDP Encapsulation
13  *
14  */
15 
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <linux/cache.h>
22 #include <asm/uaccess.h>
23 #include <linux/audit.h>
24 
25 #include "xfrm_hash.h"
26 
27 struct sock *xfrm_nl;
28 EXPORT_SYMBOL(xfrm_nl);
29 
30 u32 sysctl_xfrm_aevent_etime = XFRM_AE_ETIME;
31 EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
32 
33 u32 sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE;
34 EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
35 
36 /* Each xfrm_state may be linked to two tables:
37 
38    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
39    2. Hash table by (daddr,family,reqid) to find what SAs exist for given
40       destination/tunnel endpoint. (output)
41  */
42 
43 static DEFINE_SPINLOCK(xfrm_state_lock);
44 
45 /* Hash table to find appropriate SA towards given target (endpoint
46  * of tunnel or destination of transport mode) allowed by selector.
47  *
48  * Main use is finding SA after policy selected tunnel or transport mode.
49  * Also, it can be used by ah/esp icmp error handler to find offending SA.
50  */
51 static struct hlist_head *xfrm_state_bydst __read_mostly;
52 static struct hlist_head *xfrm_state_bysrc __read_mostly;
53 static struct hlist_head *xfrm_state_byspi __read_mostly;
54 static unsigned int xfrm_state_hmask __read_mostly;
55 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
56 static unsigned int xfrm_state_num;
57 static unsigned int xfrm_state_genid;
58 
59 static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
60 					 xfrm_address_t *saddr,
61 					 u32 reqid,
62 					 unsigned short family)
63 {
64 	return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask);
65 }
66 
67 static inline unsigned int xfrm_src_hash(xfrm_address_t *daddr,
68 					 xfrm_address_t *saddr,
69 					 unsigned short family)
70 {
71 	return __xfrm_src_hash(daddr, saddr, family, xfrm_state_hmask);
72 }
73 
74 static inline unsigned int
75 xfrm_spi_hash(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
76 {
77 	return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask);
78 }
79 
80 static void xfrm_hash_transfer(struct hlist_head *list,
81 			       struct hlist_head *ndsttable,
82 			       struct hlist_head *nsrctable,
83 			       struct hlist_head *nspitable,
84 			       unsigned int nhashmask)
85 {
86 	struct hlist_node *entry, *tmp;
87 	struct xfrm_state *x;
88 
89 	hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
90 		unsigned int h;
91 
92 		h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
93 				    x->props.reqid, x->props.family,
94 				    nhashmask);
95 		hlist_add_head(&x->bydst, ndsttable+h);
96 
97 		h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
98 				    x->props.family,
99 				    nhashmask);
100 		hlist_add_head(&x->bysrc, nsrctable+h);
101 
102 		if (x->id.spi) {
103 			h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
104 					    x->id.proto, x->props.family,
105 					    nhashmask);
106 			hlist_add_head(&x->byspi, nspitable+h);
107 		}
108 	}
109 }
110 
111 static unsigned long xfrm_hash_new_size(void)
112 {
113 	return ((xfrm_state_hmask + 1) << 1) *
114 		sizeof(struct hlist_head);
115 }
116 
117 static DEFINE_MUTEX(hash_resize_mutex);
118 
119 static void xfrm_hash_resize(struct work_struct *__unused)
120 {
121 	struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
122 	unsigned long nsize, osize;
123 	unsigned int nhashmask, ohashmask;
124 	int i;
125 
126 	mutex_lock(&hash_resize_mutex);
127 
128 	nsize = xfrm_hash_new_size();
129 	ndst = xfrm_hash_alloc(nsize);
130 	if (!ndst)
131 		goto out_unlock;
132 	nsrc = xfrm_hash_alloc(nsize);
133 	if (!nsrc) {
134 		xfrm_hash_free(ndst, nsize);
135 		goto out_unlock;
136 	}
137 	nspi = xfrm_hash_alloc(nsize);
138 	if (!nspi) {
139 		xfrm_hash_free(ndst, nsize);
140 		xfrm_hash_free(nsrc, nsize);
141 		goto out_unlock;
142 	}
143 
144 	spin_lock_bh(&xfrm_state_lock);
145 
146 	nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
147 	for (i = xfrm_state_hmask; i >= 0; i--)
148 		xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
149 				   nhashmask);
150 
151 	odst = xfrm_state_bydst;
152 	osrc = xfrm_state_bysrc;
153 	ospi = xfrm_state_byspi;
154 	ohashmask = xfrm_state_hmask;
155 
156 	xfrm_state_bydst = ndst;
157 	xfrm_state_bysrc = nsrc;
158 	xfrm_state_byspi = nspi;
159 	xfrm_state_hmask = nhashmask;
160 
161 	spin_unlock_bh(&xfrm_state_lock);
162 
163 	osize = (ohashmask + 1) * sizeof(struct hlist_head);
164 	xfrm_hash_free(odst, osize);
165 	xfrm_hash_free(osrc, osize);
166 	xfrm_hash_free(ospi, osize);
167 
168 out_unlock:
169 	mutex_unlock(&hash_resize_mutex);
170 }
171 
172 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize);
173 
174 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
175 EXPORT_SYMBOL(km_waitq);
176 
177 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
178 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
179 
180 static struct work_struct xfrm_state_gc_work;
181 static HLIST_HEAD(xfrm_state_gc_list);
182 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
183 
184 int __xfrm_state_delete(struct xfrm_state *x);
185 
186 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
187 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
188 
189 static void xfrm_state_gc_destroy(struct xfrm_state *x)
190 {
191 	del_timer_sync(&x->timer);
192 	del_timer_sync(&x->rtimer);
193 	kfree(x->aalg);
194 	kfree(x->ealg);
195 	kfree(x->calg);
196 	kfree(x->encap);
197 	kfree(x->coaddr);
198 	if (x->mode)
199 		xfrm_put_mode(x->mode);
200 	if (x->type) {
201 		x->type->destructor(x);
202 		xfrm_put_type(x->type);
203 	}
204 	security_xfrm_state_free(x);
205 	kfree(x);
206 }
207 
208 static void xfrm_state_gc_task(struct work_struct *data)
209 {
210 	struct xfrm_state *x;
211 	struct hlist_node *entry, *tmp;
212 	struct hlist_head gc_list;
213 
214 	spin_lock_bh(&xfrm_state_gc_lock);
215 	gc_list.first = xfrm_state_gc_list.first;
216 	INIT_HLIST_HEAD(&xfrm_state_gc_list);
217 	spin_unlock_bh(&xfrm_state_gc_lock);
218 
219 	hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
220 		xfrm_state_gc_destroy(x);
221 
222 	wake_up(&km_waitq);
223 }
224 
225 static inline unsigned long make_jiffies(long secs)
226 {
227 	if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
228 		return MAX_SCHEDULE_TIMEOUT-1;
229 	else
230 		return secs*HZ;
231 }
232 
233 static void xfrm_timer_handler(unsigned long data)
234 {
235 	struct xfrm_state *x = (struct xfrm_state*)data;
236 	unsigned long now = (unsigned long)xtime.tv_sec;
237 	long next = LONG_MAX;
238 	int warn = 0;
239 	int err = 0;
240 
241 	spin_lock(&x->lock);
242 	if (x->km.state == XFRM_STATE_DEAD)
243 		goto out;
244 	if (x->km.state == XFRM_STATE_EXPIRED)
245 		goto expired;
246 	if (x->lft.hard_add_expires_seconds) {
247 		long tmo = x->lft.hard_add_expires_seconds +
248 			x->curlft.add_time - now;
249 		if (tmo <= 0)
250 			goto expired;
251 		if (tmo < next)
252 			next = tmo;
253 	}
254 	if (x->lft.hard_use_expires_seconds) {
255 		long tmo = x->lft.hard_use_expires_seconds +
256 			(x->curlft.use_time ? : now) - now;
257 		if (tmo <= 0)
258 			goto expired;
259 		if (tmo < next)
260 			next = tmo;
261 	}
262 	if (x->km.dying)
263 		goto resched;
264 	if (x->lft.soft_add_expires_seconds) {
265 		long tmo = x->lft.soft_add_expires_seconds +
266 			x->curlft.add_time - now;
267 		if (tmo <= 0)
268 			warn = 1;
269 		else if (tmo < next)
270 			next = tmo;
271 	}
272 	if (x->lft.soft_use_expires_seconds) {
273 		long tmo = x->lft.soft_use_expires_seconds +
274 			(x->curlft.use_time ? : now) - now;
275 		if (tmo <= 0)
276 			warn = 1;
277 		else if (tmo < next)
278 			next = tmo;
279 	}
280 
281 	x->km.dying = warn;
282 	if (warn)
283 		km_state_expired(x, 0, 0);
284 resched:
285 	if (next != LONG_MAX)
286 		mod_timer(&x->timer, jiffies + make_jiffies(next));
287 
288 	goto out;
289 
290 expired:
291 	if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
292 		x->km.state = XFRM_STATE_EXPIRED;
293 		wake_up(&km_waitq);
294 		next = 2;
295 		goto resched;
296 	}
297 
298 	err = __xfrm_state_delete(x);
299 	if (!err && x->id.spi)
300 		km_state_expired(x, 1, 0);
301 
302 	xfrm_audit_log(audit_get_loginuid(current->audit_context), 0,
303 		       AUDIT_MAC_IPSEC_DELSA, err ? 0 : 1, NULL, x);
304 
305 out:
306 	spin_unlock(&x->lock);
307 }
308 
309 static void xfrm_replay_timer_handler(unsigned long data);
310 
311 struct xfrm_state *xfrm_state_alloc(void)
312 {
313 	struct xfrm_state *x;
314 
315 	x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
316 
317 	if (x) {
318 		atomic_set(&x->refcnt, 1);
319 		atomic_set(&x->tunnel_users, 0);
320 		INIT_HLIST_NODE(&x->bydst);
321 		INIT_HLIST_NODE(&x->bysrc);
322 		INIT_HLIST_NODE(&x->byspi);
323 		init_timer(&x->timer);
324 		x->timer.function = xfrm_timer_handler;
325 		x->timer.data	  = (unsigned long)x;
326 		init_timer(&x->rtimer);
327 		x->rtimer.function = xfrm_replay_timer_handler;
328 		x->rtimer.data     = (unsigned long)x;
329 		x->curlft.add_time = (unsigned long)xtime.tv_sec;
330 		x->lft.soft_byte_limit = XFRM_INF;
331 		x->lft.soft_packet_limit = XFRM_INF;
332 		x->lft.hard_byte_limit = XFRM_INF;
333 		x->lft.hard_packet_limit = XFRM_INF;
334 		x->replay_maxage = 0;
335 		x->replay_maxdiff = 0;
336 		spin_lock_init(&x->lock);
337 	}
338 	return x;
339 }
340 EXPORT_SYMBOL(xfrm_state_alloc);
341 
342 void __xfrm_state_destroy(struct xfrm_state *x)
343 {
344 	BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
345 
346 	spin_lock_bh(&xfrm_state_gc_lock);
347 	hlist_add_head(&x->bydst, &xfrm_state_gc_list);
348 	spin_unlock_bh(&xfrm_state_gc_lock);
349 	schedule_work(&xfrm_state_gc_work);
350 }
351 EXPORT_SYMBOL(__xfrm_state_destroy);
352 
353 int __xfrm_state_delete(struct xfrm_state *x)
354 {
355 	int err = -ESRCH;
356 
357 	if (x->km.state != XFRM_STATE_DEAD) {
358 		x->km.state = XFRM_STATE_DEAD;
359 		spin_lock(&xfrm_state_lock);
360 		hlist_del(&x->bydst);
361 		hlist_del(&x->bysrc);
362 		if (x->id.spi)
363 			hlist_del(&x->byspi);
364 		xfrm_state_num--;
365 		spin_unlock(&xfrm_state_lock);
366 
367 		/* All xfrm_state objects are created by xfrm_state_alloc.
368 		 * The xfrm_state_alloc call gives a reference, and that
369 		 * is what we are dropping here.
370 		 */
371 		__xfrm_state_put(x);
372 		err = 0;
373 	}
374 
375 	return err;
376 }
377 EXPORT_SYMBOL(__xfrm_state_delete);
378 
379 int xfrm_state_delete(struct xfrm_state *x)
380 {
381 	int err;
382 
383 	spin_lock_bh(&x->lock);
384 	err = __xfrm_state_delete(x);
385 	spin_unlock_bh(&x->lock);
386 
387 	return err;
388 }
389 EXPORT_SYMBOL(xfrm_state_delete);
390 
391 void xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info)
392 {
393 	int i;
394 	int err = 0;
395 
396 	spin_lock_bh(&xfrm_state_lock);
397 	for (i = 0; i <= xfrm_state_hmask; i++) {
398 		struct hlist_node *entry;
399 		struct xfrm_state *x;
400 restart:
401 		hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
402 			if (!xfrm_state_kern(x) &&
403 			    xfrm_id_proto_match(x->id.proto, proto)) {
404 				xfrm_state_hold(x);
405 				spin_unlock_bh(&xfrm_state_lock);
406 
407 				err = xfrm_state_delete(x);
408 				xfrm_audit_log(audit_info->loginuid,
409 					       audit_info->secid,
410 					       AUDIT_MAC_IPSEC_DELSA,
411 					       err ? 0 : 1, NULL, x);
412 				xfrm_state_put(x);
413 
414 				spin_lock_bh(&xfrm_state_lock);
415 				goto restart;
416 			}
417 		}
418 	}
419 	spin_unlock_bh(&xfrm_state_lock);
420 	wake_up(&km_waitq);
421 }
422 EXPORT_SYMBOL(xfrm_state_flush);
423 
424 static int
425 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
426 		  struct xfrm_tmpl *tmpl,
427 		  xfrm_address_t *daddr, xfrm_address_t *saddr,
428 		  unsigned short family)
429 {
430 	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
431 	if (!afinfo)
432 		return -1;
433 	afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
434 	xfrm_state_put_afinfo(afinfo);
435 	return 0;
436 }
437 
438 static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
439 {
440 	unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
441 	struct xfrm_state *x;
442 	struct hlist_node *entry;
443 
444 	hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
445 		if (x->props.family != family ||
446 		    x->id.spi       != spi ||
447 		    x->id.proto     != proto)
448 			continue;
449 
450 		switch (family) {
451 		case AF_INET:
452 			if (x->id.daddr.a4 != daddr->a4)
453 				continue;
454 			break;
455 		case AF_INET6:
456 			if (!ipv6_addr_equal((struct in6_addr *)daddr,
457 					     (struct in6_addr *)
458 					     x->id.daddr.a6))
459 				continue;
460 			break;
461 		};
462 
463 		xfrm_state_hold(x);
464 		return x;
465 	}
466 
467 	return NULL;
468 }
469 
470 static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
471 {
472 	unsigned int h = xfrm_src_hash(daddr, saddr, family);
473 	struct xfrm_state *x;
474 	struct hlist_node *entry;
475 
476 	hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
477 		if (x->props.family != family ||
478 		    x->id.proto     != proto)
479 			continue;
480 
481 		switch (family) {
482 		case AF_INET:
483 			if (x->id.daddr.a4 != daddr->a4 ||
484 			    x->props.saddr.a4 != saddr->a4)
485 				continue;
486 			break;
487 		case AF_INET6:
488 			if (!ipv6_addr_equal((struct in6_addr *)daddr,
489 					     (struct in6_addr *)
490 					     x->id.daddr.a6) ||
491 			    !ipv6_addr_equal((struct in6_addr *)saddr,
492 					     (struct in6_addr *)
493 					     x->props.saddr.a6))
494 				continue;
495 			break;
496 		};
497 
498 		xfrm_state_hold(x);
499 		return x;
500 	}
501 
502 	return NULL;
503 }
504 
505 static inline struct xfrm_state *
506 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
507 {
508 	if (use_spi)
509 		return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
510 					   x->id.proto, family);
511 	else
512 		return __xfrm_state_lookup_byaddr(&x->id.daddr,
513 						  &x->props.saddr,
514 						  x->id.proto, family);
515 }
516 
517 static void xfrm_hash_grow_check(int have_hash_collision)
518 {
519 	if (have_hash_collision &&
520 	    (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
521 	    xfrm_state_num > xfrm_state_hmask)
522 		schedule_work(&xfrm_hash_work);
523 }
524 
525 struct xfrm_state *
526 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
527 		struct flowi *fl, struct xfrm_tmpl *tmpl,
528 		struct xfrm_policy *pol, int *err,
529 		unsigned short family)
530 {
531 	unsigned int h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family);
532 	struct hlist_node *entry;
533 	struct xfrm_state *x, *x0;
534 	int acquire_in_progress = 0;
535 	int error = 0;
536 	struct xfrm_state *best = NULL;
537 
538 	spin_lock_bh(&xfrm_state_lock);
539 	hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
540 		if (x->props.family == family &&
541 		    x->props.reqid == tmpl->reqid &&
542 		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
543 		    xfrm_state_addr_check(x, daddr, saddr, family) &&
544 		    tmpl->mode == x->props.mode &&
545 		    tmpl->id.proto == x->id.proto &&
546 		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
547 			/* Resolution logic:
548 			   1. There is a valid state with matching selector.
549 			      Done.
550 			   2. Valid state with inappropriate selector. Skip.
551 
552 			   Entering area of "sysdeps".
553 
554 			   3. If state is not valid, selector is temporary,
555 			      it selects only session which triggered
556 			      previous resolution. Key manager will do
557 			      something to install a state with proper
558 			      selector.
559 			 */
560 			if (x->km.state == XFRM_STATE_VALID) {
561 				if (!xfrm_selector_match(&x->sel, fl, family) ||
562 				    !security_xfrm_state_pol_flow_match(x, pol, fl))
563 					continue;
564 				if (!best ||
565 				    best->km.dying > x->km.dying ||
566 				    (best->km.dying == x->km.dying &&
567 				     best->curlft.add_time < x->curlft.add_time))
568 					best = x;
569 			} else if (x->km.state == XFRM_STATE_ACQ) {
570 				acquire_in_progress = 1;
571 			} else if (x->km.state == XFRM_STATE_ERROR ||
572 				   x->km.state == XFRM_STATE_EXPIRED) {
573 				if (xfrm_selector_match(&x->sel, fl, family) &&
574 				    security_xfrm_state_pol_flow_match(x, pol, fl))
575 					error = -ESRCH;
576 			}
577 		}
578 	}
579 
580 	x = best;
581 	if (!x && !error && !acquire_in_progress) {
582 		if (tmpl->id.spi &&
583 		    (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
584 					      tmpl->id.proto, family)) != NULL) {
585 			xfrm_state_put(x0);
586 			error = -EEXIST;
587 			goto out;
588 		}
589 		x = xfrm_state_alloc();
590 		if (x == NULL) {
591 			error = -ENOMEM;
592 			goto out;
593 		}
594 		/* Initialize temporary selector matching only
595 		 * to current session. */
596 		xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
597 
598 		error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
599 		if (error) {
600 			x->km.state = XFRM_STATE_DEAD;
601 			xfrm_state_put(x);
602 			x = NULL;
603 			goto out;
604 		}
605 
606 		if (km_query(x, tmpl, pol) == 0) {
607 			x->km.state = XFRM_STATE_ACQ;
608 			hlist_add_head(&x->bydst, xfrm_state_bydst+h);
609 			h = xfrm_src_hash(daddr, saddr, family);
610 			hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
611 			if (x->id.spi) {
612 				h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
613 				hlist_add_head(&x->byspi, xfrm_state_byspi+h);
614 			}
615 			x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
616 			x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
617 			add_timer(&x->timer);
618 			xfrm_state_num++;
619 			xfrm_hash_grow_check(x->bydst.next != NULL);
620 		} else {
621 			x->km.state = XFRM_STATE_DEAD;
622 			xfrm_state_put(x);
623 			x = NULL;
624 			error = -ESRCH;
625 		}
626 	}
627 out:
628 	if (x)
629 		xfrm_state_hold(x);
630 	else
631 		*err = acquire_in_progress ? -EAGAIN : error;
632 	spin_unlock_bh(&xfrm_state_lock);
633 	return x;
634 }
635 
636 static void __xfrm_state_insert(struct xfrm_state *x)
637 {
638 	unsigned int h;
639 
640 	x->genid = ++xfrm_state_genid;
641 
642 	h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
643 			  x->props.reqid, x->props.family);
644 	hlist_add_head(&x->bydst, xfrm_state_bydst+h);
645 
646 	h = xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family);
647 	hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
648 
649 	if (x->id.spi) {
650 		h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
651 				  x->props.family);
652 
653 		hlist_add_head(&x->byspi, xfrm_state_byspi+h);
654 	}
655 
656 	mod_timer(&x->timer, jiffies + HZ);
657 	if (x->replay_maxage)
658 		mod_timer(&x->rtimer, jiffies + x->replay_maxage);
659 
660 	wake_up(&km_waitq);
661 
662 	xfrm_state_num++;
663 
664 	xfrm_hash_grow_check(x->bydst.next != NULL);
665 }
666 
667 /* xfrm_state_lock is held */
668 static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
669 {
670 	unsigned short family = xnew->props.family;
671 	u32 reqid = xnew->props.reqid;
672 	struct xfrm_state *x;
673 	struct hlist_node *entry;
674 	unsigned int h;
675 
676 	h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family);
677 	hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
678 		if (x->props.family	== family &&
679 		    x->props.reqid	== reqid &&
680 		    !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
681 		    !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
682 			x->genid = xfrm_state_genid;
683 	}
684 }
685 
686 void xfrm_state_insert(struct xfrm_state *x)
687 {
688 	spin_lock_bh(&xfrm_state_lock);
689 	__xfrm_state_bump_genids(x);
690 	__xfrm_state_insert(x);
691 	spin_unlock_bh(&xfrm_state_lock);
692 }
693 EXPORT_SYMBOL(xfrm_state_insert);
694 
695 /* xfrm_state_lock is held */
696 static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
697 {
698 	unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family);
699 	struct hlist_node *entry;
700 	struct xfrm_state *x;
701 
702 	hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
703 		if (x->props.reqid  != reqid ||
704 		    x->props.mode   != mode ||
705 		    x->props.family != family ||
706 		    x->km.state     != XFRM_STATE_ACQ ||
707 		    x->id.spi       != 0 ||
708 		    x->id.proto	    != proto)
709 			continue;
710 
711 		switch (family) {
712 		case AF_INET:
713 			if (x->id.daddr.a4    != daddr->a4 ||
714 			    x->props.saddr.a4 != saddr->a4)
715 				continue;
716 			break;
717 		case AF_INET6:
718 			if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
719 					     (struct in6_addr *)daddr) ||
720 			    !ipv6_addr_equal((struct in6_addr *)
721 					     x->props.saddr.a6,
722 					     (struct in6_addr *)saddr))
723 				continue;
724 			break;
725 		};
726 
727 		xfrm_state_hold(x);
728 		return x;
729 	}
730 
731 	if (!create)
732 		return NULL;
733 
734 	x = xfrm_state_alloc();
735 	if (likely(x)) {
736 		switch (family) {
737 		case AF_INET:
738 			x->sel.daddr.a4 = daddr->a4;
739 			x->sel.saddr.a4 = saddr->a4;
740 			x->sel.prefixlen_d = 32;
741 			x->sel.prefixlen_s = 32;
742 			x->props.saddr.a4 = saddr->a4;
743 			x->id.daddr.a4 = daddr->a4;
744 			break;
745 
746 		case AF_INET6:
747 			ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
748 				       (struct in6_addr *)daddr);
749 			ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
750 				       (struct in6_addr *)saddr);
751 			x->sel.prefixlen_d = 128;
752 			x->sel.prefixlen_s = 128;
753 			ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
754 				       (struct in6_addr *)saddr);
755 			ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
756 				       (struct in6_addr *)daddr);
757 			break;
758 		};
759 
760 		x->km.state = XFRM_STATE_ACQ;
761 		x->id.proto = proto;
762 		x->props.family = family;
763 		x->props.mode = mode;
764 		x->props.reqid = reqid;
765 		x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
766 		xfrm_state_hold(x);
767 		x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
768 		add_timer(&x->timer);
769 		hlist_add_head(&x->bydst, xfrm_state_bydst+h);
770 		h = xfrm_src_hash(daddr, saddr, family);
771 		hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
772 		wake_up(&km_waitq);
773 
774 		xfrm_state_num++;
775 
776 		xfrm_hash_grow_check(x->bydst.next != NULL);
777 	}
778 
779 	return x;
780 }
781 
782 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
783 
784 int xfrm_state_add(struct xfrm_state *x)
785 {
786 	struct xfrm_state *x1;
787 	int family;
788 	int err;
789 	int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
790 
791 	family = x->props.family;
792 
793 	spin_lock_bh(&xfrm_state_lock);
794 
795 	x1 = __xfrm_state_locate(x, use_spi, family);
796 	if (x1) {
797 		xfrm_state_put(x1);
798 		x1 = NULL;
799 		err = -EEXIST;
800 		goto out;
801 	}
802 
803 	if (use_spi && x->km.seq) {
804 		x1 = __xfrm_find_acq_byseq(x->km.seq);
805 		if (x1 && ((x1->id.proto != x->id.proto) ||
806 		    xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family))) {
807 			xfrm_state_put(x1);
808 			x1 = NULL;
809 		}
810 	}
811 
812 	if (use_spi && !x1)
813 		x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
814 				     x->id.proto,
815 				     &x->id.daddr, &x->props.saddr, 0);
816 
817 	__xfrm_state_bump_genids(x);
818 	__xfrm_state_insert(x);
819 	err = 0;
820 
821 out:
822 	spin_unlock_bh(&xfrm_state_lock);
823 
824 	if (x1) {
825 		xfrm_state_delete(x1);
826 		xfrm_state_put(x1);
827 	}
828 
829 	return err;
830 }
831 EXPORT_SYMBOL(xfrm_state_add);
832 
833 #ifdef CONFIG_XFRM_MIGRATE
834 struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
835 {
836 	int err = -ENOMEM;
837 	struct xfrm_state *x = xfrm_state_alloc();
838 	if (!x)
839 		goto error;
840 
841 	memcpy(&x->id, &orig->id, sizeof(x->id));
842 	memcpy(&x->sel, &orig->sel, sizeof(x->sel));
843 	memcpy(&x->lft, &orig->lft, sizeof(x->lft));
844 	x->props.mode = orig->props.mode;
845 	x->props.replay_window = orig->props.replay_window;
846 	x->props.reqid = orig->props.reqid;
847 	x->props.family = orig->props.family;
848 	x->props.saddr = orig->props.saddr;
849 
850 	if (orig->aalg) {
851 		x->aalg = xfrm_algo_clone(orig->aalg);
852 		if (!x->aalg)
853 			goto error;
854 	}
855 	x->props.aalgo = orig->props.aalgo;
856 
857 	if (orig->ealg) {
858 		x->ealg = xfrm_algo_clone(orig->ealg);
859 		if (!x->ealg)
860 			goto error;
861 	}
862 	x->props.ealgo = orig->props.ealgo;
863 
864 	if (orig->calg) {
865 		x->calg = xfrm_algo_clone(orig->calg);
866 		if (!x->calg)
867 			goto error;
868 	}
869 	x->props.calgo = orig->props.calgo;
870 
871 	if (orig->encap) {
872 		x->encap = kmemdup(orig->encap, sizeof(*x->encap), GFP_KERNEL);
873 		if (!x->encap)
874 			goto error;
875 	}
876 
877 	if (orig->coaddr) {
878 		x->coaddr = kmemdup(orig->coaddr, sizeof(*x->coaddr),
879 				    GFP_KERNEL);
880 		if (!x->coaddr)
881 			goto error;
882 	}
883 
884 	err = xfrm_init_state(x);
885 	if (err)
886 		goto error;
887 
888 	x->props.flags = orig->props.flags;
889 
890 	x->curlft.add_time = orig->curlft.add_time;
891 	x->km.state = orig->km.state;
892 	x->km.seq = orig->km.seq;
893 
894 	return x;
895 
896  error:
897 	if (errp)
898 		*errp = err;
899 	if (x) {
900 		kfree(x->aalg);
901 		kfree(x->ealg);
902 		kfree(x->calg);
903 		kfree(x->encap);
904 		kfree(x->coaddr);
905 	}
906 	kfree(x);
907 	return NULL;
908 }
909 EXPORT_SYMBOL(xfrm_state_clone);
910 
911 /* xfrm_state_lock is held */
912 struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m)
913 {
914 	unsigned int h;
915 	struct xfrm_state *x;
916 	struct hlist_node *entry;
917 
918 	if (m->reqid) {
919 		h = xfrm_dst_hash(&m->old_daddr, &m->old_saddr,
920 				  m->reqid, m->old_family);
921 		hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
922 			if (x->props.mode != m->mode ||
923 			    x->id.proto != m->proto)
924 				continue;
925 			if (m->reqid && x->props.reqid != m->reqid)
926 				continue;
927 			if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr,
928 					  m->old_family) ||
929 			    xfrm_addr_cmp(&x->props.saddr, &m->old_saddr,
930 					  m->old_family))
931 				continue;
932 			xfrm_state_hold(x);
933 			return x;
934 		}
935 	} else {
936 		h = xfrm_src_hash(&m->old_daddr, &m->old_saddr,
937 				  m->old_family);
938 		hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
939 			if (x->props.mode != m->mode ||
940 			    x->id.proto != m->proto)
941 				continue;
942 			if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr,
943 					  m->old_family) ||
944 			    xfrm_addr_cmp(&x->props.saddr, &m->old_saddr,
945 					  m->old_family))
946 				continue;
947 			xfrm_state_hold(x);
948 			return x;
949 		}
950 	}
951 
952 	return NULL;
953 }
954 EXPORT_SYMBOL(xfrm_migrate_state_find);
955 
956 struct xfrm_state * xfrm_state_migrate(struct xfrm_state *x,
957 				       struct xfrm_migrate *m)
958 {
959 	struct xfrm_state *xc;
960 	int err;
961 
962 	xc = xfrm_state_clone(x, &err);
963 	if (!xc)
964 		return NULL;
965 
966 	memcpy(&xc->id.daddr, &m->new_daddr, sizeof(xc->id.daddr));
967 	memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr));
968 
969 	/* add state */
970 	if (!xfrm_addr_cmp(&x->id.daddr, &m->new_daddr, m->new_family)) {
971 		/* a care is needed when the destination address of the
972 		   state is to be updated as it is a part of triplet */
973 		xfrm_state_insert(xc);
974 	} else {
975 		if ((err = xfrm_state_add(xc)) < 0)
976 			goto error;
977 	}
978 
979 	return xc;
980 error:
981 	kfree(xc);
982 	return NULL;
983 }
984 EXPORT_SYMBOL(xfrm_state_migrate);
985 #endif
986 
987 int xfrm_state_update(struct xfrm_state *x)
988 {
989 	struct xfrm_state *x1;
990 	int err;
991 	int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
992 
993 	spin_lock_bh(&xfrm_state_lock);
994 	x1 = __xfrm_state_locate(x, use_spi, x->props.family);
995 
996 	err = -ESRCH;
997 	if (!x1)
998 		goto out;
999 
1000 	if (xfrm_state_kern(x1)) {
1001 		xfrm_state_put(x1);
1002 		err = -EEXIST;
1003 		goto out;
1004 	}
1005 
1006 	if (x1->km.state == XFRM_STATE_ACQ) {
1007 		__xfrm_state_insert(x);
1008 		x = NULL;
1009 	}
1010 	err = 0;
1011 
1012 out:
1013 	spin_unlock_bh(&xfrm_state_lock);
1014 
1015 	if (err)
1016 		return err;
1017 
1018 	if (!x) {
1019 		xfrm_state_delete(x1);
1020 		xfrm_state_put(x1);
1021 		return 0;
1022 	}
1023 
1024 	err = -EINVAL;
1025 	spin_lock_bh(&x1->lock);
1026 	if (likely(x1->km.state == XFRM_STATE_VALID)) {
1027 		if (x->encap && x1->encap)
1028 			memcpy(x1->encap, x->encap, sizeof(*x1->encap));
1029 		if (x->coaddr && x1->coaddr) {
1030 			memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
1031 		}
1032 		if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
1033 			memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
1034 		memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
1035 		x1->km.dying = 0;
1036 
1037 		mod_timer(&x1->timer, jiffies + HZ);
1038 		if (x1->curlft.use_time)
1039 			xfrm_state_check_expire(x1);
1040 
1041 		err = 0;
1042 	}
1043 	spin_unlock_bh(&x1->lock);
1044 
1045 	xfrm_state_put(x1);
1046 
1047 	return err;
1048 }
1049 EXPORT_SYMBOL(xfrm_state_update);
1050 
1051 int xfrm_state_check_expire(struct xfrm_state *x)
1052 {
1053 	if (!x->curlft.use_time)
1054 		x->curlft.use_time = (unsigned long)xtime.tv_sec;
1055 
1056 	if (x->km.state != XFRM_STATE_VALID)
1057 		return -EINVAL;
1058 
1059 	if (x->curlft.bytes >= x->lft.hard_byte_limit ||
1060 	    x->curlft.packets >= x->lft.hard_packet_limit) {
1061 		x->km.state = XFRM_STATE_EXPIRED;
1062 		mod_timer(&x->timer, jiffies);
1063 		return -EINVAL;
1064 	}
1065 
1066 	if (!x->km.dying &&
1067 	    (x->curlft.bytes >= x->lft.soft_byte_limit ||
1068 	     x->curlft.packets >= x->lft.soft_packet_limit)) {
1069 		x->km.dying = 1;
1070 		km_state_expired(x, 0, 0);
1071 	}
1072 	return 0;
1073 }
1074 EXPORT_SYMBOL(xfrm_state_check_expire);
1075 
1076 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
1077 {
1078 	int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
1079 		- skb_headroom(skb);
1080 
1081 	if (nhead > 0)
1082 		return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
1083 
1084 	/* Check tail too... */
1085 	return 0;
1086 }
1087 
1088 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
1089 {
1090 	int err = xfrm_state_check_expire(x);
1091 	if (err < 0)
1092 		goto err;
1093 	err = xfrm_state_check_space(x, skb);
1094 err:
1095 	return err;
1096 }
1097 EXPORT_SYMBOL(xfrm_state_check);
1098 
1099 struct xfrm_state *
1100 xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto,
1101 		  unsigned short family)
1102 {
1103 	struct xfrm_state *x;
1104 
1105 	spin_lock_bh(&xfrm_state_lock);
1106 	x = __xfrm_state_lookup(daddr, spi, proto, family);
1107 	spin_unlock_bh(&xfrm_state_lock);
1108 	return x;
1109 }
1110 EXPORT_SYMBOL(xfrm_state_lookup);
1111 
1112 struct xfrm_state *
1113 xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
1114 			 u8 proto, unsigned short family)
1115 {
1116 	struct xfrm_state *x;
1117 
1118 	spin_lock_bh(&xfrm_state_lock);
1119 	x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
1120 	spin_unlock_bh(&xfrm_state_lock);
1121 	return x;
1122 }
1123 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
1124 
1125 struct xfrm_state *
1126 xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
1127 	      xfrm_address_t *daddr, xfrm_address_t *saddr,
1128 	      int create, unsigned short family)
1129 {
1130 	struct xfrm_state *x;
1131 
1132 	spin_lock_bh(&xfrm_state_lock);
1133 	x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
1134 	spin_unlock_bh(&xfrm_state_lock);
1135 
1136 	return x;
1137 }
1138 EXPORT_SYMBOL(xfrm_find_acq);
1139 
1140 #ifdef CONFIG_XFRM_SUB_POLICY
1141 int
1142 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
1143 	       unsigned short family)
1144 {
1145 	int err = 0;
1146 	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1147 	if (!afinfo)
1148 		return -EAFNOSUPPORT;
1149 
1150 	spin_lock_bh(&xfrm_state_lock);
1151 	if (afinfo->tmpl_sort)
1152 		err = afinfo->tmpl_sort(dst, src, n);
1153 	spin_unlock_bh(&xfrm_state_lock);
1154 	xfrm_state_put_afinfo(afinfo);
1155 	return err;
1156 }
1157 EXPORT_SYMBOL(xfrm_tmpl_sort);
1158 
1159 int
1160 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
1161 		unsigned short family)
1162 {
1163 	int err = 0;
1164 	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1165 	if (!afinfo)
1166 		return -EAFNOSUPPORT;
1167 
1168 	spin_lock_bh(&xfrm_state_lock);
1169 	if (afinfo->state_sort)
1170 		err = afinfo->state_sort(dst, src, n);
1171 	spin_unlock_bh(&xfrm_state_lock);
1172 	xfrm_state_put_afinfo(afinfo);
1173 	return err;
1174 }
1175 EXPORT_SYMBOL(xfrm_state_sort);
1176 #endif
1177 
1178 /* Silly enough, but I'm lazy to build resolution list */
1179 
1180 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
1181 {
1182 	int i;
1183 
1184 	for (i = 0; i <= xfrm_state_hmask; i++) {
1185 		struct hlist_node *entry;
1186 		struct xfrm_state *x;
1187 
1188 		hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1189 			if (x->km.seq == seq &&
1190 			    x->km.state == XFRM_STATE_ACQ) {
1191 				xfrm_state_hold(x);
1192 				return x;
1193 			}
1194 		}
1195 	}
1196 	return NULL;
1197 }
1198 
1199 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
1200 {
1201 	struct xfrm_state *x;
1202 
1203 	spin_lock_bh(&xfrm_state_lock);
1204 	x = __xfrm_find_acq_byseq(seq);
1205 	spin_unlock_bh(&xfrm_state_lock);
1206 	return x;
1207 }
1208 EXPORT_SYMBOL(xfrm_find_acq_byseq);
1209 
1210 u32 xfrm_get_acqseq(void)
1211 {
1212 	u32 res;
1213 	static u32 acqseq;
1214 	static DEFINE_SPINLOCK(acqseq_lock);
1215 
1216 	spin_lock_bh(&acqseq_lock);
1217 	res = (++acqseq ? : ++acqseq);
1218 	spin_unlock_bh(&acqseq_lock);
1219 	return res;
1220 }
1221 EXPORT_SYMBOL(xfrm_get_acqseq);
1222 
1223 void
1224 xfrm_alloc_spi(struct xfrm_state *x, __be32 minspi, __be32 maxspi)
1225 {
1226 	unsigned int h;
1227 	struct xfrm_state *x0;
1228 
1229 	if (x->id.spi)
1230 		return;
1231 
1232 	if (minspi == maxspi) {
1233 		x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
1234 		if (x0) {
1235 			xfrm_state_put(x0);
1236 			return;
1237 		}
1238 		x->id.spi = minspi;
1239 	} else {
1240 		u32 spi = 0;
1241 		u32 low = ntohl(minspi);
1242 		u32 high = ntohl(maxspi);
1243 		for (h=0; h<high-low+1; h++) {
1244 			spi = low + net_random()%(high-low+1);
1245 			x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1246 			if (x0 == NULL) {
1247 				x->id.spi = htonl(spi);
1248 				break;
1249 			}
1250 			xfrm_state_put(x0);
1251 		}
1252 	}
1253 	if (x->id.spi) {
1254 		spin_lock_bh(&xfrm_state_lock);
1255 		h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1256 		hlist_add_head(&x->byspi, xfrm_state_byspi+h);
1257 		spin_unlock_bh(&xfrm_state_lock);
1258 		wake_up(&km_waitq);
1259 	}
1260 }
1261 EXPORT_SYMBOL(xfrm_alloc_spi);
1262 
1263 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
1264 		    void *data)
1265 {
1266 	int i;
1267 	struct xfrm_state *x, *last = NULL;
1268 	struct hlist_node *entry;
1269 	int count = 0;
1270 	int err = 0;
1271 
1272 	spin_lock_bh(&xfrm_state_lock);
1273 	for (i = 0; i <= xfrm_state_hmask; i++) {
1274 		hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1275 			if (!xfrm_id_proto_match(x->id.proto, proto))
1276 				continue;
1277 			if (last) {
1278 				err = func(last, count, data);
1279 				if (err)
1280 					goto out;
1281 			}
1282 			last = x;
1283 			count++;
1284 		}
1285 	}
1286 	if (count == 0) {
1287 		err = -ENOENT;
1288 		goto out;
1289 	}
1290 	err = func(last, 0, data);
1291 out:
1292 	spin_unlock_bh(&xfrm_state_lock);
1293 	return err;
1294 }
1295 EXPORT_SYMBOL(xfrm_state_walk);
1296 
1297 
1298 void xfrm_replay_notify(struct xfrm_state *x, int event)
1299 {
1300 	struct km_event c;
1301 	/* we send notify messages in case
1302 	 *  1. we updated on of the sequence numbers, and the seqno difference
1303 	 *     is at least x->replay_maxdiff, in this case we also update the
1304 	 *     timeout of our timer function
1305 	 *  2. if x->replay_maxage has elapsed since last update,
1306 	 *     and there were changes
1307 	 *
1308 	 *  The state structure must be locked!
1309 	 */
1310 
1311 	switch (event) {
1312 	case XFRM_REPLAY_UPDATE:
1313 		if (x->replay_maxdiff &&
1314 		    (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
1315 		    (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1316 			if (x->xflags & XFRM_TIME_DEFER)
1317 				event = XFRM_REPLAY_TIMEOUT;
1318 			else
1319 				return;
1320 		}
1321 
1322 		break;
1323 
1324 	case XFRM_REPLAY_TIMEOUT:
1325 		if ((x->replay.seq == x->preplay.seq) &&
1326 		    (x->replay.bitmap == x->preplay.bitmap) &&
1327 		    (x->replay.oseq == x->preplay.oseq)) {
1328 			x->xflags |= XFRM_TIME_DEFER;
1329 			return;
1330 		}
1331 
1332 		break;
1333 	}
1334 
1335 	memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1336 	c.event = XFRM_MSG_NEWAE;
1337 	c.data.aevent = event;
1338 	km_state_notify(x, &c);
1339 
1340 	if (x->replay_maxage &&
1341 	    !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
1342 		x->xflags &= ~XFRM_TIME_DEFER;
1343 }
1344 EXPORT_SYMBOL(xfrm_replay_notify);
1345 
1346 static void xfrm_replay_timer_handler(unsigned long data)
1347 {
1348 	struct xfrm_state *x = (struct xfrm_state*)data;
1349 
1350 	spin_lock(&x->lock);
1351 
1352 	if (x->km.state == XFRM_STATE_VALID) {
1353 		if (xfrm_aevent_is_on())
1354 			xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
1355 		else
1356 			x->xflags |= XFRM_TIME_DEFER;
1357 	}
1358 
1359 	spin_unlock(&x->lock);
1360 }
1361 
1362 int xfrm_replay_check(struct xfrm_state *x, __be32 net_seq)
1363 {
1364 	u32 diff;
1365 	u32 seq = ntohl(net_seq);
1366 
1367 	if (unlikely(seq == 0))
1368 		return -EINVAL;
1369 
1370 	if (likely(seq > x->replay.seq))
1371 		return 0;
1372 
1373 	diff = x->replay.seq - seq;
1374 	if (diff >= x->props.replay_window) {
1375 		x->stats.replay_window++;
1376 		return -EINVAL;
1377 	}
1378 
1379 	if (x->replay.bitmap & (1U << diff)) {
1380 		x->stats.replay++;
1381 		return -EINVAL;
1382 	}
1383 	return 0;
1384 }
1385 EXPORT_SYMBOL(xfrm_replay_check);
1386 
1387 void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
1388 {
1389 	u32 diff;
1390 	u32 seq = ntohl(net_seq);
1391 
1392 	if (seq > x->replay.seq) {
1393 		diff = seq - x->replay.seq;
1394 		if (diff < x->props.replay_window)
1395 			x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1396 		else
1397 			x->replay.bitmap = 1;
1398 		x->replay.seq = seq;
1399 	} else {
1400 		diff = x->replay.seq - seq;
1401 		x->replay.bitmap |= (1U << diff);
1402 	}
1403 
1404 	if (xfrm_aevent_is_on())
1405 		xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1406 }
1407 EXPORT_SYMBOL(xfrm_replay_advance);
1408 
1409 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
1410 static DEFINE_RWLOCK(xfrm_km_lock);
1411 
1412 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1413 {
1414 	struct xfrm_mgr *km;
1415 
1416 	read_lock(&xfrm_km_lock);
1417 	list_for_each_entry(km, &xfrm_km_list, list)
1418 		if (km->notify_policy)
1419 			km->notify_policy(xp, dir, c);
1420 	read_unlock(&xfrm_km_lock);
1421 }
1422 
1423 void km_state_notify(struct xfrm_state *x, struct km_event *c)
1424 {
1425 	struct xfrm_mgr *km;
1426 	read_lock(&xfrm_km_lock);
1427 	list_for_each_entry(km, &xfrm_km_list, list)
1428 		if (km->notify)
1429 			km->notify(x, c);
1430 	read_unlock(&xfrm_km_lock);
1431 }
1432 
1433 EXPORT_SYMBOL(km_policy_notify);
1434 EXPORT_SYMBOL(km_state_notify);
1435 
1436 void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
1437 {
1438 	struct km_event c;
1439 
1440 	c.data.hard = hard;
1441 	c.pid = pid;
1442 	c.event = XFRM_MSG_EXPIRE;
1443 	km_state_notify(x, &c);
1444 
1445 	if (hard)
1446 		wake_up(&km_waitq);
1447 }
1448 
1449 EXPORT_SYMBOL(km_state_expired);
1450 /*
1451  * We send to all registered managers regardless of failure
1452  * We are happy with one success
1453 */
1454 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1455 {
1456 	int err = -EINVAL, acqret;
1457 	struct xfrm_mgr *km;
1458 
1459 	read_lock(&xfrm_km_lock);
1460 	list_for_each_entry(km, &xfrm_km_list, list) {
1461 		acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1462 		if (!acqret)
1463 			err = acqret;
1464 	}
1465 	read_unlock(&xfrm_km_lock);
1466 	return err;
1467 }
1468 EXPORT_SYMBOL(km_query);
1469 
1470 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
1471 {
1472 	int err = -EINVAL;
1473 	struct xfrm_mgr *km;
1474 
1475 	read_lock(&xfrm_km_lock);
1476 	list_for_each_entry(km, &xfrm_km_list, list) {
1477 		if (km->new_mapping)
1478 			err = km->new_mapping(x, ipaddr, sport);
1479 		if (!err)
1480 			break;
1481 	}
1482 	read_unlock(&xfrm_km_lock);
1483 	return err;
1484 }
1485 EXPORT_SYMBOL(km_new_mapping);
1486 
1487 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1488 {
1489 	struct km_event c;
1490 
1491 	c.data.hard = hard;
1492 	c.pid = pid;
1493 	c.event = XFRM_MSG_POLEXPIRE;
1494 	km_policy_notify(pol, dir, &c);
1495 
1496 	if (hard)
1497 		wake_up(&km_waitq);
1498 }
1499 EXPORT_SYMBOL(km_policy_expired);
1500 
1501 int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
1502 	       struct xfrm_migrate *m, int num_migrate)
1503 {
1504 	int err = -EINVAL;
1505 	int ret;
1506 	struct xfrm_mgr *km;
1507 
1508 	read_lock(&xfrm_km_lock);
1509 	list_for_each_entry(km, &xfrm_km_list, list) {
1510 		if (km->migrate) {
1511 			ret = km->migrate(sel, dir, type, m, num_migrate);
1512 			if (!ret)
1513 				err = ret;
1514 		}
1515 	}
1516 	read_unlock(&xfrm_km_lock);
1517 	return err;
1518 }
1519 EXPORT_SYMBOL(km_migrate);
1520 
1521 int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1522 {
1523 	int err = -EINVAL;
1524 	int ret;
1525 	struct xfrm_mgr *km;
1526 
1527 	read_lock(&xfrm_km_lock);
1528 	list_for_each_entry(km, &xfrm_km_list, list) {
1529 		if (km->report) {
1530 			ret = km->report(proto, sel, addr);
1531 			if (!ret)
1532 				err = ret;
1533 		}
1534 	}
1535 	read_unlock(&xfrm_km_lock);
1536 	return err;
1537 }
1538 EXPORT_SYMBOL(km_report);
1539 
1540 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1541 {
1542 	int err;
1543 	u8 *data;
1544 	struct xfrm_mgr *km;
1545 	struct xfrm_policy *pol = NULL;
1546 
1547 	if (optlen <= 0 || optlen > PAGE_SIZE)
1548 		return -EMSGSIZE;
1549 
1550 	data = kmalloc(optlen, GFP_KERNEL);
1551 	if (!data)
1552 		return -ENOMEM;
1553 
1554 	err = -EFAULT;
1555 	if (copy_from_user(data, optval, optlen))
1556 		goto out;
1557 
1558 	err = -EINVAL;
1559 	read_lock(&xfrm_km_lock);
1560 	list_for_each_entry(km, &xfrm_km_list, list) {
1561 		pol = km->compile_policy(sk, optname, data,
1562 					 optlen, &err);
1563 		if (err >= 0)
1564 			break;
1565 	}
1566 	read_unlock(&xfrm_km_lock);
1567 
1568 	if (err >= 0) {
1569 		xfrm_sk_policy_insert(sk, err, pol);
1570 		xfrm_pol_put(pol);
1571 		err = 0;
1572 	}
1573 
1574 out:
1575 	kfree(data);
1576 	return err;
1577 }
1578 EXPORT_SYMBOL(xfrm_user_policy);
1579 
1580 int xfrm_register_km(struct xfrm_mgr *km)
1581 {
1582 	write_lock_bh(&xfrm_km_lock);
1583 	list_add_tail(&km->list, &xfrm_km_list);
1584 	write_unlock_bh(&xfrm_km_lock);
1585 	return 0;
1586 }
1587 EXPORT_SYMBOL(xfrm_register_km);
1588 
1589 int xfrm_unregister_km(struct xfrm_mgr *km)
1590 {
1591 	write_lock_bh(&xfrm_km_lock);
1592 	list_del(&km->list);
1593 	write_unlock_bh(&xfrm_km_lock);
1594 	return 0;
1595 }
1596 EXPORT_SYMBOL(xfrm_unregister_km);
1597 
1598 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1599 {
1600 	int err = 0;
1601 	if (unlikely(afinfo == NULL))
1602 		return -EINVAL;
1603 	if (unlikely(afinfo->family >= NPROTO))
1604 		return -EAFNOSUPPORT;
1605 	write_lock_bh(&xfrm_state_afinfo_lock);
1606 	if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1607 		err = -ENOBUFS;
1608 	else
1609 		xfrm_state_afinfo[afinfo->family] = afinfo;
1610 	write_unlock_bh(&xfrm_state_afinfo_lock);
1611 	return err;
1612 }
1613 EXPORT_SYMBOL(xfrm_state_register_afinfo);
1614 
1615 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1616 {
1617 	int err = 0;
1618 	if (unlikely(afinfo == NULL))
1619 		return -EINVAL;
1620 	if (unlikely(afinfo->family >= NPROTO))
1621 		return -EAFNOSUPPORT;
1622 	write_lock_bh(&xfrm_state_afinfo_lock);
1623 	if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1624 		if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1625 			err = -EINVAL;
1626 		else
1627 			xfrm_state_afinfo[afinfo->family] = NULL;
1628 	}
1629 	write_unlock_bh(&xfrm_state_afinfo_lock);
1630 	return err;
1631 }
1632 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1633 
1634 struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1635 {
1636 	struct xfrm_state_afinfo *afinfo;
1637 	if (unlikely(family >= NPROTO))
1638 		return NULL;
1639 	read_lock(&xfrm_state_afinfo_lock);
1640 	afinfo = xfrm_state_afinfo[family];
1641 	if (unlikely(!afinfo))
1642 		read_unlock(&xfrm_state_afinfo_lock);
1643 	return afinfo;
1644 }
1645 
1646 void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1647 {
1648 	read_unlock(&xfrm_state_afinfo_lock);
1649 }
1650 
1651 EXPORT_SYMBOL(xfrm_state_get_afinfo);
1652 EXPORT_SYMBOL(xfrm_state_put_afinfo);
1653 
1654 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1655 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1656 {
1657 	if (x->tunnel) {
1658 		struct xfrm_state *t = x->tunnel;
1659 
1660 		if (atomic_read(&t->tunnel_users) == 2)
1661 			xfrm_state_delete(t);
1662 		atomic_dec(&t->tunnel_users);
1663 		xfrm_state_put(t);
1664 		x->tunnel = NULL;
1665 	}
1666 }
1667 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1668 
1669 /*
1670  * This function is NOT optimal.  For example, with ESP it will give an
1671  * MTU that's usually two bytes short of being optimal.  However, it will
1672  * usually give an answer that's a multiple of 4 provided the input is
1673  * also a multiple of 4.
1674  */
1675 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1676 {
1677 	int res = mtu;
1678 
1679 	res -= x->props.header_len;
1680 
1681 	for (;;) {
1682 		int m = res;
1683 
1684 		if (m < 68)
1685 			return 68;
1686 
1687 		spin_lock_bh(&x->lock);
1688 		if (x->km.state == XFRM_STATE_VALID &&
1689 		    x->type && x->type->get_max_size)
1690 			m = x->type->get_max_size(x, m);
1691 		else
1692 			m += x->props.header_len;
1693 		spin_unlock_bh(&x->lock);
1694 
1695 		if (m <= mtu)
1696 			break;
1697 		res -= (m - mtu);
1698 	}
1699 
1700 	return res;
1701 }
1702 
1703 int xfrm_init_state(struct xfrm_state *x)
1704 {
1705 	struct xfrm_state_afinfo *afinfo;
1706 	int family = x->props.family;
1707 	int err;
1708 
1709 	err = -EAFNOSUPPORT;
1710 	afinfo = xfrm_state_get_afinfo(family);
1711 	if (!afinfo)
1712 		goto error;
1713 
1714 	err = 0;
1715 	if (afinfo->init_flags)
1716 		err = afinfo->init_flags(x);
1717 
1718 	xfrm_state_put_afinfo(afinfo);
1719 
1720 	if (err)
1721 		goto error;
1722 
1723 	err = -EPROTONOSUPPORT;
1724 	x->type = xfrm_get_type(x->id.proto, family);
1725 	if (x->type == NULL)
1726 		goto error;
1727 
1728 	err = x->type->init_state(x);
1729 	if (err)
1730 		goto error;
1731 
1732 	x->mode = xfrm_get_mode(x->props.mode, family);
1733 	if (x->mode == NULL)
1734 		goto error;
1735 
1736 	x->km.state = XFRM_STATE_VALID;
1737 
1738 error:
1739 	return err;
1740 }
1741 
1742 EXPORT_SYMBOL(xfrm_init_state);
1743 
1744 void __init xfrm_state_init(void)
1745 {
1746 	unsigned int sz;
1747 
1748 	sz = sizeof(struct hlist_head) * 8;
1749 
1750 	xfrm_state_bydst = xfrm_hash_alloc(sz);
1751 	xfrm_state_bysrc = xfrm_hash_alloc(sz);
1752 	xfrm_state_byspi = xfrm_hash_alloc(sz);
1753 	if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
1754 		panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
1755 	xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
1756 
1757 	INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task);
1758 }
1759 
1760