xref: /linux/net/xfrm/xfrm_state.c (revision 9ce7677cfd7cd871adb457c80bea3b581b839641)
1 /*
2  * xfrm_state.c
3  *
4  * Changes:
5  *	Mitsuru KANDA @USAGI
6  * 	Kazunori MIYAZAWA @USAGI
7  * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  * 		IPv6 support
9  * 	YOSHIFUJI Hideaki @USAGI
10  * 		Split up af-specific functions
11  *	Derek Atkins <derek@ihtfp.com>
12  *		Add UDP Encapsulation
13  *
14  */
15 
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <asm/uaccess.h>
22 
23 /* Each xfrm_state may be linked to two tables:
24 
25    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
26    2. Hash table by daddr to find what SAs exist for given
27       destination/tunnel endpoint. (output)
28  */
29 
30 static DEFINE_SPINLOCK(xfrm_state_lock);
31 
32 /* Hash table to find appropriate SA towards given target (endpoint
33  * of tunnel or destination of transport mode) allowed by selector.
34  *
35  * Main use is finding SA after policy selected tunnel or transport mode.
36  * Also, it can be used by ah/esp icmp error handler to find offending SA.
37  */
38 static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE];
39 static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE];
40 
41 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
42 EXPORT_SYMBOL(km_waitq);
43 
44 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
45 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
46 
47 static struct work_struct xfrm_state_gc_work;
48 static struct list_head xfrm_state_gc_list = LIST_HEAD_INIT(xfrm_state_gc_list);
49 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
50 
51 static int xfrm_state_gc_flush_bundles;
52 
53 static int __xfrm_state_delete(struct xfrm_state *x);
54 
55 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
56 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
57 
58 static int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
59 static void km_state_expired(struct xfrm_state *x, int hard);
60 
61 static void xfrm_state_gc_destroy(struct xfrm_state *x)
62 {
63 	if (del_timer(&x->timer))
64 		BUG();
65 	kfree(x->aalg);
66 	kfree(x->ealg);
67 	kfree(x->calg);
68 	kfree(x->encap);
69 	if (x->type) {
70 		x->type->destructor(x);
71 		xfrm_put_type(x->type);
72 	}
73 	kfree(x);
74 }
75 
76 static void xfrm_state_gc_task(void *data)
77 {
78 	struct xfrm_state *x;
79 	struct list_head *entry, *tmp;
80 	struct list_head gc_list = LIST_HEAD_INIT(gc_list);
81 
82 	if (xfrm_state_gc_flush_bundles) {
83 		xfrm_state_gc_flush_bundles = 0;
84 		xfrm_flush_bundles();
85 	}
86 
87 	spin_lock_bh(&xfrm_state_gc_lock);
88 	list_splice_init(&xfrm_state_gc_list, &gc_list);
89 	spin_unlock_bh(&xfrm_state_gc_lock);
90 
91 	list_for_each_safe(entry, tmp, &gc_list) {
92 		x = list_entry(entry, struct xfrm_state, bydst);
93 		xfrm_state_gc_destroy(x);
94 	}
95 	wake_up(&km_waitq);
96 }
97 
98 static inline unsigned long make_jiffies(long secs)
99 {
100 	if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
101 		return MAX_SCHEDULE_TIMEOUT-1;
102 	else
103 	        return secs*HZ;
104 }
105 
106 static void xfrm_timer_handler(unsigned long data)
107 {
108 	struct xfrm_state *x = (struct xfrm_state*)data;
109 	unsigned long now = (unsigned long)xtime.tv_sec;
110 	long next = LONG_MAX;
111 	int warn = 0;
112 
113 	spin_lock(&x->lock);
114 	if (x->km.state == XFRM_STATE_DEAD)
115 		goto out;
116 	if (x->km.state == XFRM_STATE_EXPIRED)
117 		goto expired;
118 	if (x->lft.hard_add_expires_seconds) {
119 		long tmo = x->lft.hard_add_expires_seconds +
120 			x->curlft.add_time - now;
121 		if (tmo <= 0)
122 			goto expired;
123 		if (tmo < next)
124 			next = tmo;
125 	}
126 	if (x->lft.hard_use_expires_seconds) {
127 		long tmo = x->lft.hard_use_expires_seconds +
128 			(x->curlft.use_time ? : now) - now;
129 		if (tmo <= 0)
130 			goto expired;
131 		if (tmo < next)
132 			next = tmo;
133 	}
134 	if (x->km.dying)
135 		goto resched;
136 	if (x->lft.soft_add_expires_seconds) {
137 		long tmo = x->lft.soft_add_expires_seconds +
138 			x->curlft.add_time - now;
139 		if (tmo <= 0)
140 			warn = 1;
141 		else if (tmo < next)
142 			next = tmo;
143 	}
144 	if (x->lft.soft_use_expires_seconds) {
145 		long tmo = x->lft.soft_use_expires_seconds +
146 			(x->curlft.use_time ? : now) - now;
147 		if (tmo <= 0)
148 			warn = 1;
149 		else if (tmo < next)
150 			next = tmo;
151 	}
152 
153 	x->km.dying = warn;
154 	if (warn)
155 		km_state_expired(x, 0);
156 resched:
157 	if (next != LONG_MAX &&
158 	    !mod_timer(&x->timer, jiffies + make_jiffies(next)))
159 		xfrm_state_hold(x);
160 	goto out;
161 
162 expired:
163 	if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
164 		x->km.state = XFRM_STATE_EXPIRED;
165 		wake_up(&km_waitq);
166 		next = 2;
167 		goto resched;
168 	}
169 	if (!__xfrm_state_delete(x) && x->id.spi)
170 		km_state_expired(x, 1);
171 
172 out:
173 	spin_unlock(&x->lock);
174 	xfrm_state_put(x);
175 }
176 
177 struct xfrm_state *xfrm_state_alloc(void)
178 {
179 	struct xfrm_state *x;
180 
181 	x = kmalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
182 
183 	if (x) {
184 		memset(x, 0, sizeof(struct xfrm_state));
185 		atomic_set(&x->refcnt, 1);
186 		atomic_set(&x->tunnel_users, 0);
187 		INIT_LIST_HEAD(&x->bydst);
188 		INIT_LIST_HEAD(&x->byspi);
189 		init_timer(&x->timer);
190 		x->timer.function = xfrm_timer_handler;
191 		x->timer.data	  = (unsigned long)x;
192 		x->curlft.add_time = (unsigned long)xtime.tv_sec;
193 		x->lft.soft_byte_limit = XFRM_INF;
194 		x->lft.soft_packet_limit = XFRM_INF;
195 		x->lft.hard_byte_limit = XFRM_INF;
196 		x->lft.hard_packet_limit = XFRM_INF;
197 		spin_lock_init(&x->lock);
198 	}
199 	return x;
200 }
201 EXPORT_SYMBOL(xfrm_state_alloc);
202 
203 void __xfrm_state_destroy(struct xfrm_state *x)
204 {
205 	BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
206 
207 	spin_lock_bh(&xfrm_state_gc_lock);
208 	list_add(&x->bydst, &xfrm_state_gc_list);
209 	spin_unlock_bh(&xfrm_state_gc_lock);
210 	schedule_work(&xfrm_state_gc_work);
211 }
212 EXPORT_SYMBOL(__xfrm_state_destroy);
213 
214 static int __xfrm_state_delete(struct xfrm_state *x)
215 {
216 	int err = -ESRCH;
217 
218 	if (x->km.state != XFRM_STATE_DEAD) {
219 		x->km.state = XFRM_STATE_DEAD;
220 		spin_lock(&xfrm_state_lock);
221 		list_del(&x->bydst);
222 		atomic_dec(&x->refcnt);
223 		if (x->id.spi) {
224 			list_del(&x->byspi);
225 			atomic_dec(&x->refcnt);
226 		}
227 		spin_unlock(&xfrm_state_lock);
228 		if (del_timer(&x->timer))
229 			atomic_dec(&x->refcnt);
230 
231 		/* The number two in this test is the reference
232 		 * mentioned in the comment below plus the reference
233 		 * our caller holds.  A larger value means that
234 		 * there are DSTs attached to this xfrm_state.
235 		 */
236 		if (atomic_read(&x->refcnt) > 2) {
237 			xfrm_state_gc_flush_bundles = 1;
238 			schedule_work(&xfrm_state_gc_work);
239 		}
240 
241 		/* All xfrm_state objects are created by xfrm_state_alloc.
242 		 * The xfrm_state_alloc call gives a reference, and that
243 		 * is what we are dropping here.
244 		 */
245 		atomic_dec(&x->refcnt);
246 		err = 0;
247 	}
248 
249 	return err;
250 }
251 
252 int xfrm_state_delete(struct xfrm_state *x)
253 {
254 	int err;
255 
256 	spin_lock_bh(&x->lock);
257 	err = __xfrm_state_delete(x);
258 	spin_unlock_bh(&x->lock);
259 
260 	return err;
261 }
262 EXPORT_SYMBOL(xfrm_state_delete);
263 
264 void xfrm_state_flush(u8 proto)
265 {
266 	int i;
267 	struct xfrm_state *x;
268 
269 	spin_lock_bh(&xfrm_state_lock);
270 	for (i = 0; i < XFRM_DST_HSIZE; i++) {
271 restart:
272 		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
273 			if (!xfrm_state_kern(x) &&
274 			    (proto == IPSEC_PROTO_ANY || x->id.proto == proto)) {
275 				xfrm_state_hold(x);
276 				spin_unlock_bh(&xfrm_state_lock);
277 
278 				xfrm_state_delete(x);
279 				xfrm_state_put(x);
280 
281 				spin_lock_bh(&xfrm_state_lock);
282 				goto restart;
283 			}
284 		}
285 	}
286 	spin_unlock_bh(&xfrm_state_lock);
287 	wake_up(&km_waitq);
288 }
289 EXPORT_SYMBOL(xfrm_state_flush);
290 
291 static int
292 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
293 		  struct xfrm_tmpl *tmpl,
294 		  xfrm_address_t *daddr, xfrm_address_t *saddr,
295 		  unsigned short family)
296 {
297 	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
298 	if (!afinfo)
299 		return -1;
300 	afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
301 	xfrm_state_put_afinfo(afinfo);
302 	return 0;
303 }
304 
305 struct xfrm_state *
306 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
307 		struct flowi *fl, struct xfrm_tmpl *tmpl,
308 		struct xfrm_policy *pol, int *err,
309 		unsigned short family)
310 {
311 	unsigned h = xfrm_dst_hash(daddr, family);
312 	struct xfrm_state *x, *x0;
313 	int acquire_in_progress = 0;
314 	int error = 0;
315 	struct xfrm_state *best = NULL;
316 	struct xfrm_state_afinfo *afinfo;
317 
318 	afinfo = xfrm_state_get_afinfo(family);
319 	if (afinfo == NULL) {
320 		*err = -EAFNOSUPPORT;
321 		return NULL;
322 	}
323 
324 	spin_lock_bh(&xfrm_state_lock);
325 	list_for_each_entry(x, xfrm_state_bydst+h, bydst) {
326 		if (x->props.family == family &&
327 		    x->props.reqid == tmpl->reqid &&
328 		    xfrm_state_addr_check(x, daddr, saddr, family) &&
329 		    tmpl->mode == x->props.mode &&
330 		    tmpl->id.proto == x->id.proto &&
331 		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
332 			/* Resolution logic:
333 			   1. There is a valid state with matching selector.
334 			      Done.
335 			   2. Valid state with inappropriate selector. Skip.
336 
337 			   Entering area of "sysdeps".
338 
339 			   3. If state is not valid, selector is temporary,
340 			      it selects only session which triggered
341 			      previous resolution. Key manager will do
342 			      something to install a state with proper
343 			      selector.
344 			 */
345 			if (x->km.state == XFRM_STATE_VALID) {
346 				if (!xfrm_selector_match(&x->sel, fl, family))
347 					continue;
348 				if (!best ||
349 				    best->km.dying > x->km.dying ||
350 				    (best->km.dying == x->km.dying &&
351 				     best->curlft.add_time < x->curlft.add_time))
352 					best = x;
353 			} else if (x->km.state == XFRM_STATE_ACQ) {
354 				acquire_in_progress = 1;
355 			} else if (x->km.state == XFRM_STATE_ERROR ||
356 				   x->km.state == XFRM_STATE_EXPIRED) {
357 				if (xfrm_selector_match(&x->sel, fl, family))
358 					error = -ESRCH;
359 			}
360 		}
361 	}
362 
363 	x = best;
364 	if (!x && !error && !acquire_in_progress) {
365 		if (tmpl->id.spi &&
366 		    (x0 = afinfo->state_lookup(daddr, tmpl->id.spi,
367 		                               tmpl->id.proto)) != NULL) {
368 			xfrm_state_put(x0);
369 			error = -EEXIST;
370 			goto out;
371 		}
372 		x = xfrm_state_alloc();
373 		if (x == NULL) {
374 			error = -ENOMEM;
375 			goto out;
376 		}
377 		/* Initialize temporary selector matching only
378 		 * to current session. */
379 		xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
380 
381 		if (km_query(x, tmpl, pol) == 0) {
382 			x->km.state = XFRM_STATE_ACQ;
383 			list_add_tail(&x->bydst, xfrm_state_bydst+h);
384 			xfrm_state_hold(x);
385 			if (x->id.spi) {
386 				h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
387 				list_add(&x->byspi, xfrm_state_byspi+h);
388 				xfrm_state_hold(x);
389 			}
390 			x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
391 			xfrm_state_hold(x);
392 			x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
393 			add_timer(&x->timer);
394 		} else {
395 			x->km.state = XFRM_STATE_DEAD;
396 			xfrm_state_put(x);
397 			x = NULL;
398 			error = -ESRCH;
399 		}
400 	}
401 out:
402 	if (x)
403 		xfrm_state_hold(x);
404 	else
405 		*err = acquire_in_progress ? -EAGAIN : error;
406 	spin_unlock_bh(&xfrm_state_lock);
407 	xfrm_state_put_afinfo(afinfo);
408 	return x;
409 }
410 
411 static void __xfrm_state_insert(struct xfrm_state *x)
412 {
413 	unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family);
414 
415 	list_add(&x->bydst, xfrm_state_bydst+h);
416 	xfrm_state_hold(x);
417 
418 	h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
419 
420 	list_add(&x->byspi, xfrm_state_byspi+h);
421 	xfrm_state_hold(x);
422 
423 	if (!mod_timer(&x->timer, jiffies + HZ))
424 		xfrm_state_hold(x);
425 
426 	wake_up(&km_waitq);
427 }
428 
429 void xfrm_state_insert(struct xfrm_state *x)
430 {
431 	spin_lock_bh(&xfrm_state_lock);
432 	__xfrm_state_insert(x);
433 	spin_unlock_bh(&xfrm_state_lock);
434 }
435 EXPORT_SYMBOL(xfrm_state_insert);
436 
437 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
438 
439 int xfrm_state_add(struct xfrm_state *x)
440 {
441 	struct xfrm_state_afinfo *afinfo;
442 	struct xfrm_state *x1;
443 	int family;
444 	int err;
445 
446 	family = x->props.family;
447 	afinfo = xfrm_state_get_afinfo(family);
448 	if (unlikely(afinfo == NULL))
449 		return -EAFNOSUPPORT;
450 
451 	spin_lock_bh(&xfrm_state_lock);
452 
453 	x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
454 	if (x1) {
455 		xfrm_state_put(x1);
456 		x1 = NULL;
457 		err = -EEXIST;
458 		goto out;
459 	}
460 
461 	if (x->km.seq) {
462 		x1 = __xfrm_find_acq_byseq(x->km.seq);
463 		if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
464 			xfrm_state_put(x1);
465 			x1 = NULL;
466 		}
467 	}
468 
469 	if (!x1)
470 		x1 = afinfo->find_acq(
471 			x->props.mode, x->props.reqid, x->id.proto,
472 			&x->id.daddr, &x->props.saddr, 0);
473 
474 	__xfrm_state_insert(x);
475 	err = 0;
476 
477 out:
478 	spin_unlock_bh(&xfrm_state_lock);
479 	xfrm_state_put_afinfo(afinfo);
480 
481 	if (x1) {
482 		xfrm_state_delete(x1);
483 		xfrm_state_put(x1);
484 	}
485 
486 	return err;
487 }
488 EXPORT_SYMBOL(xfrm_state_add);
489 
490 int xfrm_state_update(struct xfrm_state *x)
491 {
492 	struct xfrm_state_afinfo *afinfo;
493 	struct xfrm_state *x1;
494 	int err;
495 
496 	afinfo = xfrm_state_get_afinfo(x->props.family);
497 	if (unlikely(afinfo == NULL))
498 		return -EAFNOSUPPORT;
499 
500 	spin_lock_bh(&xfrm_state_lock);
501 	x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
502 
503 	err = -ESRCH;
504 	if (!x1)
505 		goto out;
506 
507 	if (xfrm_state_kern(x1)) {
508 		xfrm_state_put(x1);
509 		err = -EEXIST;
510 		goto out;
511 	}
512 
513 	if (x1->km.state == XFRM_STATE_ACQ) {
514 		__xfrm_state_insert(x);
515 		x = NULL;
516 	}
517 	err = 0;
518 
519 out:
520 	spin_unlock_bh(&xfrm_state_lock);
521 	xfrm_state_put_afinfo(afinfo);
522 
523 	if (err)
524 		return err;
525 
526 	if (!x) {
527 		xfrm_state_delete(x1);
528 		xfrm_state_put(x1);
529 		return 0;
530 	}
531 
532 	err = -EINVAL;
533 	spin_lock_bh(&x1->lock);
534 	if (likely(x1->km.state == XFRM_STATE_VALID)) {
535 		if (x->encap && x1->encap)
536 			memcpy(x1->encap, x->encap, sizeof(*x1->encap));
537 		memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
538 		x1->km.dying = 0;
539 
540 		if (!mod_timer(&x1->timer, jiffies + HZ))
541 			xfrm_state_hold(x1);
542 		if (x1->curlft.use_time)
543 			xfrm_state_check_expire(x1);
544 
545 		err = 0;
546 	}
547 	spin_unlock_bh(&x1->lock);
548 
549 	xfrm_state_put(x1);
550 
551 	return err;
552 }
553 EXPORT_SYMBOL(xfrm_state_update);
554 
555 int xfrm_state_check_expire(struct xfrm_state *x)
556 {
557 	if (!x->curlft.use_time)
558 		x->curlft.use_time = (unsigned long)xtime.tv_sec;
559 
560 	if (x->km.state != XFRM_STATE_VALID)
561 		return -EINVAL;
562 
563 	if (x->curlft.bytes >= x->lft.hard_byte_limit ||
564 	    x->curlft.packets >= x->lft.hard_packet_limit) {
565 		x->km.state = XFRM_STATE_EXPIRED;
566 		if (!mod_timer(&x->timer, jiffies))
567 			xfrm_state_hold(x);
568 		return -EINVAL;
569 	}
570 
571 	if (!x->km.dying &&
572 	    (x->curlft.bytes >= x->lft.soft_byte_limit ||
573 	     x->curlft.packets >= x->lft.soft_packet_limit)) {
574 		x->km.dying = 1;
575 		km_state_expired(x, 0);
576 	}
577 	return 0;
578 }
579 EXPORT_SYMBOL(xfrm_state_check_expire);
580 
581 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
582 {
583 	int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
584 		- skb_headroom(skb);
585 
586 	if (nhead > 0)
587 		return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
588 
589 	/* Check tail too... */
590 	return 0;
591 }
592 
593 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
594 {
595 	int err = xfrm_state_check_expire(x);
596 	if (err < 0)
597 		goto err;
598 	err = xfrm_state_check_space(x, skb);
599 err:
600 	return err;
601 }
602 EXPORT_SYMBOL(xfrm_state_check);
603 
604 struct xfrm_state *
605 xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
606 		  unsigned short family)
607 {
608 	struct xfrm_state *x;
609 	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
610 	if (!afinfo)
611 		return NULL;
612 
613 	spin_lock_bh(&xfrm_state_lock);
614 	x = afinfo->state_lookup(daddr, spi, proto);
615 	spin_unlock_bh(&xfrm_state_lock);
616 	xfrm_state_put_afinfo(afinfo);
617 	return x;
618 }
619 EXPORT_SYMBOL(xfrm_state_lookup);
620 
621 struct xfrm_state *
622 xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
623 	      xfrm_address_t *daddr, xfrm_address_t *saddr,
624 	      int create, unsigned short family)
625 {
626 	struct xfrm_state *x;
627 	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
628 	if (!afinfo)
629 		return NULL;
630 
631 	spin_lock_bh(&xfrm_state_lock);
632 	x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create);
633 	spin_unlock_bh(&xfrm_state_lock);
634 	xfrm_state_put_afinfo(afinfo);
635 	return x;
636 }
637 EXPORT_SYMBOL(xfrm_find_acq);
638 
639 /* Silly enough, but I'm lazy to build resolution list */
640 
641 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
642 {
643 	int i;
644 	struct xfrm_state *x;
645 
646 	for (i = 0; i < XFRM_DST_HSIZE; i++) {
647 		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
648 			if (x->km.seq == seq && x->km.state == XFRM_STATE_ACQ) {
649 				xfrm_state_hold(x);
650 				return x;
651 			}
652 		}
653 	}
654 	return NULL;
655 }
656 
657 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
658 {
659 	struct xfrm_state *x;
660 
661 	spin_lock_bh(&xfrm_state_lock);
662 	x = __xfrm_find_acq_byseq(seq);
663 	spin_unlock_bh(&xfrm_state_lock);
664 	return x;
665 }
666 EXPORT_SYMBOL(xfrm_find_acq_byseq);
667 
668 u32 xfrm_get_acqseq(void)
669 {
670 	u32 res;
671 	static u32 acqseq;
672 	static DEFINE_SPINLOCK(acqseq_lock);
673 
674 	spin_lock_bh(&acqseq_lock);
675 	res = (++acqseq ? : ++acqseq);
676 	spin_unlock_bh(&acqseq_lock);
677 	return res;
678 }
679 EXPORT_SYMBOL(xfrm_get_acqseq);
680 
681 void
682 xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
683 {
684 	u32 h;
685 	struct xfrm_state *x0;
686 
687 	if (x->id.spi)
688 		return;
689 
690 	if (minspi == maxspi) {
691 		x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
692 		if (x0) {
693 			xfrm_state_put(x0);
694 			return;
695 		}
696 		x->id.spi = minspi;
697 	} else {
698 		u32 spi = 0;
699 		minspi = ntohl(minspi);
700 		maxspi = ntohl(maxspi);
701 		for (h=0; h<maxspi-minspi+1; h++) {
702 			spi = minspi + net_random()%(maxspi-minspi+1);
703 			x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
704 			if (x0 == NULL) {
705 				x->id.spi = htonl(spi);
706 				break;
707 			}
708 			xfrm_state_put(x0);
709 		}
710 	}
711 	if (x->id.spi) {
712 		spin_lock_bh(&xfrm_state_lock);
713 		h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
714 		list_add(&x->byspi, xfrm_state_byspi+h);
715 		xfrm_state_hold(x);
716 		spin_unlock_bh(&xfrm_state_lock);
717 		wake_up(&km_waitq);
718 	}
719 }
720 EXPORT_SYMBOL(xfrm_alloc_spi);
721 
722 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
723 		    void *data)
724 {
725 	int i;
726 	struct xfrm_state *x;
727 	int count = 0;
728 	int err = 0;
729 
730 	spin_lock_bh(&xfrm_state_lock);
731 	for (i = 0; i < XFRM_DST_HSIZE; i++) {
732 		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
733 			if (proto == IPSEC_PROTO_ANY || x->id.proto == proto)
734 				count++;
735 		}
736 	}
737 	if (count == 0) {
738 		err = -ENOENT;
739 		goto out;
740 	}
741 
742 	for (i = 0; i < XFRM_DST_HSIZE; i++) {
743 		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
744 			if (proto != IPSEC_PROTO_ANY && x->id.proto != proto)
745 				continue;
746 			err = func(x, --count, data);
747 			if (err)
748 				goto out;
749 		}
750 	}
751 out:
752 	spin_unlock_bh(&xfrm_state_lock);
753 	return err;
754 }
755 EXPORT_SYMBOL(xfrm_state_walk);
756 
757 int xfrm_replay_check(struct xfrm_state *x, u32 seq)
758 {
759 	u32 diff;
760 
761 	seq = ntohl(seq);
762 
763 	if (unlikely(seq == 0))
764 		return -EINVAL;
765 
766 	if (likely(seq > x->replay.seq))
767 		return 0;
768 
769 	diff = x->replay.seq - seq;
770 	if (diff >= x->props.replay_window) {
771 		x->stats.replay_window++;
772 		return -EINVAL;
773 	}
774 
775 	if (x->replay.bitmap & (1U << diff)) {
776 		x->stats.replay++;
777 		return -EINVAL;
778 	}
779 	return 0;
780 }
781 EXPORT_SYMBOL(xfrm_replay_check);
782 
783 void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
784 {
785 	u32 diff;
786 
787 	seq = ntohl(seq);
788 
789 	if (seq > x->replay.seq) {
790 		diff = seq - x->replay.seq;
791 		if (diff < x->props.replay_window)
792 			x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
793 		else
794 			x->replay.bitmap = 1;
795 		x->replay.seq = seq;
796 	} else {
797 		diff = x->replay.seq - seq;
798 		x->replay.bitmap |= (1U << diff);
799 	}
800 }
801 EXPORT_SYMBOL(xfrm_replay_advance);
802 
803 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
804 static DEFINE_RWLOCK(xfrm_km_lock);
805 
806 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
807 {
808 	struct xfrm_mgr *km;
809 
810 	read_lock(&xfrm_km_lock);
811 	list_for_each_entry(km, &xfrm_km_list, list)
812 		if (km->notify_policy)
813 			km->notify_policy(xp, dir, c);
814 	read_unlock(&xfrm_km_lock);
815 }
816 
817 void km_state_notify(struct xfrm_state *x, struct km_event *c)
818 {
819 	struct xfrm_mgr *km;
820 	read_lock(&xfrm_km_lock);
821 	list_for_each_entry(km, &xfrm_km_list, list)
822 		if (km->notify)
823 			km->notify(x, c);
824 	read_unlock(&xfrm_km_lock);
825 }
826 
827 EXPORT_SYMBOL(km_policy_notify);
828 EXPORT_SYMBOL(km_state_notify);
829 
830 static void km_state_expired(struct xfrm_state *x, int hard)
831 {
832 	struct km_event c;
833 
834 	c.data.hard = hard;
835 	c.event = XFRM_MSG_EXPIRE;
836 	km_state_notify(x, &c);
837 
838 	if (hard)
839 		wake_up(&km_waitq);
840 }
841 
842 /*
843  * We send to all registered managers regardless of failure
844  * We are happy with one success
845 */
846 static int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
847 {
848 	int err = -EINVAL, acqret;
849 	struct xfrm_mgr *km;
850 
851 	read_lock(&xfrm_km_lock);
852 	list_for_each_entry(km, &xfrm_km_list, list) {
853 		acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
854 		if (!acqret)
855 			err = acqret;
856 	}
857 	read_unlock(&xfrm_km_lock);
858 	return err;
859 }
860 
861 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
862 {
863 	int err = -EINVAL;
864 	struct xfrm_mgr *km;
865 
866 	read_lock(&xfrm_km_lock);
867 	list_for_each_entry(km, &xfrm_km_list, list) {
868 		if (km->new_mapping)
869 			err = km->new_mapping(x, ipaddr, sport);
870 		if (!err)
871 			break;
872 	}
873 	read_unlock(&xfrm_km_lock);
874 	return err;
875 }
876 EXPORT_SYMBOL(km_new_mapping);
877 
878 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard)
879 {
880 	struct km_event c;
881 
882 	c.data.hard = hard;
883 	c.event = XFRM_MSG_POLEXPIRE;
884 	km_policy_notify(pol, dir, &c);
885 
886 	if (hard)
887 		wake_up(&km_waitq);
888 }
889 
890 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
891 {
892 	int err;
893 	u8 *data;
894 	struct xfrm_mgr *km;
895 	struct xfrm_policy *pol = NULL;
896 
897 	if (optlen <= 0 || optlen > PAGE_SIZE)
898 		return -EMSGSIZE;
899 
900 	data = kmalloc(optlen, GFP_KERNEL);
901 	if (!data)
902 		return -ENOMEM;
903 
904 	err = -EFAULT;
905 	if (copy_from_user(data, optval, optlen))
906 		goto out;
907 
908 	err = -EINVAL;
909 	read_lock(&xfrm_km_lock);
910 	list_for_each_entry(km, &xfrm_km_list, list) {
911 		pol = km->compile_policy(sk->sk_family, optname, data,
912 					 optlen, &err);
913 		if (err >= 0)
914 			break;
915 	}
916 	read_unlock(&xfrm_km_lock);
917 
918 	if (err >= 0) {
919 		xfrm_sk_policy_insert(sk, err, pol);
920 		xfrm_pol_put(pol);
921 		err = 0;
922 	}
923 
924 out:
925 	kfree(data);
926 	return err;
927 }
928 EXPORT_SYMBOL(xfrm_user_policy);
929 
930 int xfrm_register_km(struct xfrm_mgr *km)
931 {
932 	write_lock_bh(&xfrm_km_lock);
933 	list_add_tail(&km->list, &xfrm_km_list);
934 	write_unlock_bh(&xfrm_km_lock);
935 	return 0;
936 }
937 EXPORT_SYMBOL(xfrm_register_km);
938 
939 int xfrm_unregister_km(struct xfrm_mgr *km)
940 {
941 	write_lock_bh(&xfrm_km_lock);
942 	list_del(&km->list);
943 	write_unlock_bh(&xfrm_km_lock);
944 	return 0;
945 }
946 EXPORT_SYMBOL(xfrm_unregister_km);
947 
948 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
949 {
950 	int err = 0;
951 	if (unlikely(afinfo == NULL))
952 		return -EINVAL;
953 	if (unlikely(afinfo->family >= NPROTO))
954 		return -EAFNOSUPPORT;
955 	write_lock(&xfrm_state_afinfo_lock);
956 	if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
957 		err = -ENOBUFS;
958 	else {
959 		afinfo->state_bydst = xfrm_state_bydst;
960 		afinfo->state_byspi = xfrm_state_byspi;
961 		xfrm_state_afinfo[afinfo->family] = afinfo;
962 	}
963 	write_unlock(&xfrm_state_afinfo_lock);
964 	return err;
965 }
966 EXPORT_SYMBOL(xfrm_state_register_afinfo);
967 
968 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
969 {
970 	int err = 0;
971 	if (unlikely(afinfo == NULL))
972 		return -EINVAL;
973 	if (unlikely(afinfo->family >= NPROTO))
974 		return -EAFNOSUPPORT;
975 	write_lock(&xfrm_state_afinfo_lock);
976 	if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
977 		if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
978 			err = -EINVAL;
979 		else {
980 			xfrm_state_afinfo[afinfo->family] = NULL;
981 			afinfo->state_byspi = NULL;
982 			afinfo->state_bydst = NULL;
983 		}
984 	}
985 	write_unlock(&xfrm_state_afinfo_lock);
986 	return err;
987 }
988 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
989 
990 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
991 {
992 	struct xfrm_state_afinfo *afinfo;
993 	if (unlikely(family >= NPROTO))
994 		return NULL;
995 	read_lock(&xfrm_state_afinfo_lock);
996 	afinfo = xfrm_state_afinfo[family];
997 	if (likely(afinfo != NULL))
998 		read_lock(&afinfo->lock);
999 	read_unlock(&xfrm_state_afinfo_lock);
1000 	return afinfo;
1001 }
1002 
1003 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1004 {
1005 	if (unlikely(afinfo == NULL))
1006 		return;
1007 	read_unlock(&afinfo->lock);
1008 }
1009 
1010 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1011 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1012 {
1013 	if (x->tunnel) {
1014 		struct xfrm_state *t = x->tunnel;
1015 
1016 		if (atomic_read(&t->tunnel_users) == 2)
1017 			xfrm_state_delete(t);
1018 		atomic_dec(&t->tunnel_users);
1019 		xfrm_state_put(t);
1020 		x->tunnel = NULL;
1021 	}
1022 }
1023 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1024 
1025 /*
1026  * This function is NOT optimal.  For example, with ESP it will give an
1027  * MTU that's usually two bytes short of being optimal.  However, it will
1028  * usually give an answer that's a multiple of 4 provided the input is
1029  * also a multiple of 4.
1030  */
1031 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1032 {
1033 	int res = mtu;
1034 
1035 	res -= x->props.header_len;
1036 
1037 	for (;;) {
1038 		int m = res;
1039 
1040 		if (m < 68)
1041 			return 68;
1042 
1043 		spin_lock_bh(&x->lock);
1044 		if (x->km.state == XFRM_STATE_VALID &&
1045 		    x->type && x->type->get_max_size)
1046 			m = x->type->get_max_size(x, m);
1047 		else
1048 			m += x->props.header_len;
1049 		spin_unlock_bh(&x->lock);
1050 
1051 		if (m <= mtu)
1052 			break;
1053 		res -= (m - mtu);
1054 	}
1055 
1056 	return res;
1057 }
1058 
1059 EXPORT_SYMBOL(xfrm_state_mtu);
1060 
1061 int xfrm_init_state(struct xfrm_state *x)
1062 {
1063 	struct xfrm_state_afinfo *afinfo;
1064 	int family = x->props.family;
1065 	int err;
1066 
1067 	err = -EAFNOSUPPORT;
1068 	afinfo = xfrm_state_get_afinfo(family);
1069 	if (!afinfo)
1070 		goto error;
1071 
1072 	err = 0;
1073 	if (afinfo->init_flags)
1074 		err = afinfo->init_flags(x);
1075 
1076 	xfrm_state_put_afinfo(afinfo);
1077 
1078 	if (err)
1079 		goto error;
1080 
1081 	err = -EPROTONOSUPPORT;
1082 	x->type = xfrm_get_type(x->id.proto, family);
1083 	if (x->type == NULL)
1084 		goto error;
1085 
1086 	err = x->type->init_state(x);
1087 	if (err)
1088 		goto error;
1089 
1090 	x->km.state = XFRM_STATE_VALID;
1091 
1092 error:
1093 	return err;
1094 }
1095 
1096 EXPORT_SYMBOL(xfrm_init_state);
1097 
1098 void __init xfrm_state_init(void)
1099 {
1100 	int i;
1101 
1102 	for (i=0; i<XFRM_DST_HSIZE; i++) {
1103 		INIT_LIST_HEAD(&xfrm_state_bydst[i]);
1104 		INIT_LIST_HEAD(&xfrm_state_byspi[i]);
1105 	}
1106 	INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
1107 }
1108 
1109