xref: /linux/net/xfrm/xfrm_policy.c (revision 8fa5723aa7e053d498336b48448b292fc2e0458b)
1 /*
2  * xfrm_policy.c
3  *
4  * Changes:
5  *	Mitsuru KANDA @USAGI
6  * 	Kazunori MIYAZAWA @USAGI
7  * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  * 		IPv6 support
9  * 	Kazunori MIYAZAWA @USAGI
10  * 	YOSHIFUJI Hideaki
11  * 		Split up af-specific portion
12  *	Derek Atkins <derek@ihtfp.com>		Add the post_input processor
13  *
14  */
15 
16 #include <linux/err.h>
17 #include <linux/slab.h>
18 #include <linux/kmod.h>
19 #include <linux/list.h>
20 #include <linux/spinlock.h>
21 #include <linux/workqueue.h>
22 #include <linux/notifier.h>
23 #include <linux/netdevice.h>
24 #include <linux/netfilter.h>
25 #include <linux/module.h>
26 #include <linux/cache.h>
27 #include <linux/audit.h>
28 #include <net/dst.h>
29 #include <net/xfrm.h>
30 #include <net/ip.h>
31 #ifdef CONFIG_XFRM_STATISTICS
32 #include <net/snmp.h>
33 #endif
34 
35 #include "xfrm_hash.h"
36 
37 int sysctl_xfrm_larval_drop __read_mostly = 1;
38 
39 #ifdef CONFIG_XFRM_STATISTICS
40 DEFINE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics) __read_mostly;
41 EXPORT_SYMBOL(xfrm_statistics);
42 #endif
43 
44 DEFINE_MUTEX(xfrm_cfg_mutex);
45 EXPORT_SYMBOL(xfrm_cfg_mutex);
46 
47 static DEFINE_RWLOCK(xfrm_policy_lock);
48 
49 static struct list_head xfrm_policy_all;
50 unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2];
51 EXPORT_SYMBOL(xfrm_policy_count);
52 
53 static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
54 static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
55 
56 static struct kmem_cache *xfrm_dst_cache __read_mostly;
57 
58 static struct work_struct xfrm_policy_gc_work;
59 static HLIST_HEAD(xfrm_policy_gc_list);
60 static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
61 
62 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
63 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
64 static void xfrm_init_pmtu(struct dst_entry *dst);
65 
66 static inline int
67 __xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl)
68 {
69 	return  addr_match(&fl->fl4_dst, &sel->daddr, sel->prefixlen_d) &&
70 		addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) &&
71 		!((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&
72 		!((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&
73 		(fl->proto == sel->proto || !sel->proto) &&
74 		(fl->oif == sel->ifindex || !sel->ifindex);
75 }
76 
77 static inline int
78 __xfrm6_selector_match(struct xfrm_selector *sel, struct flowi *fl)
79 {
80 	return  addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) &&
81 		addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) &&
82 		!((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&
83 		!((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&
84 		(fl->proto == sel->proto || !sel->proto) &&
85 		(fl->oif == sel->ifindex || !sel->ifindex);
86 }
87 
88 int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl,
89 		    unsigned short family)
90 {
91 	switch (family) {
92 	case AF_INET:
93 		return __xfrm4_selector_match(sel, fl);
94 	case AF_INET6:
95 		return __xfrm6_selector_match(sel, fl);
96 	}
97 	return 0;
98 }
99 
100 static inline struct dst_entry *__xfrm_dst_lookup(int tos,
101 						  xfrm_address_t *saddr,
102 						  xfrm_address_t *daddr,
103 						  int family)
104 {
105 	struct xfrm_policy_afinfo *afinfo;
106 	struct dst_entry *dst;
107 
108 	afinfo = xfrm_policy_get_afinfo(family);
109 	if (unlikely(afinfo == NULL))
110 		return ERR_PTR(-EAFNOSUPPORT);
111 
112 	dst = afinfo->dst_lookup(tos, saddr, daddr);
113 
114 	xfrm_policy_put_afinfo(afinfo);
115 
116 	return dst;
117 }
118 
119 static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos,
120 						xfrm_address_t *prev_saddr,
121 						xfrm_address_t *prev_daddr,
122 						int family)
123 {
124 	xfrm_address_t *saddr = &x->props.saddr;
125 	xfrm_address_t *daddr = &x->id.daddr;
126 	struct dst_entry *dst;
127 
128 	if (x->type->flags & XFRM_TYPE_LOCAL_COADDR) {
129 		saddr = x->coaddr;
130 		daddr = prev_daddr;
131 	}
132 	if (x->type->flags & XFRM_TYPE_REMOTE_COADDR) {
133 		saddr = prev_saddr;
134 		daddr = x->coaddr;
135 	}
136 
137 	dst = __xfrm_dst_lookup(tos, saddr, daddr, family);
138 
139 	if (!IS_ERR(dst)) {
140 		if (prev_saddr != saddr)
141 			memcpy(prev_saddr, saddr,  sizeof(*prev_saddr));
142 		if (prev_daddr != daddr)
143 			memcpy(prev_daddr, daddr,  sizeof(*prev_daddr));
144 	}
145 
146 	return dst;
147 }
148 
149 static inline unsigned long make_jiffies(long secs)
150 {
151 	if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
152 		return MAX_SCHEDULE_TIMEOUT-1;
153 	else
154 		return secs*HZ;
155 }
156 
157 static void xfrm_policy_timer(unsigned long data)
158 {
159 	struct xfrm_policy *xp = (struct xfrm_policy*)data;
160 	unsigned long now = get_seconds();
161 	long next = LONG_MAX;
162 	int warn = 0;
163 	int dir;
164 
165 	read_lock(&xp->lock);
166 
167 	if (xp->walk.dead)
168 		goto out;
169 
170 	dir = xfrm_policy_id2dir(xp->index);
171 
172 	if (xp->lft.hard_add_expires_seconds) {
173 		long tmo = xp->lft.hard_add_expires_seconds +
174 			xp->curlft.add_time - now;
175 		if (tmo <= 0)
176 			goto expired;
177 		if (tmo < next)
178 			next = tmo;
179 	}
180 	if (xp->lft.hard_use_expires_seconds) {
181 		long tmo = xp->lft.hard_use_expires_seconds +
182 			(xp->curlft.use_time ? : xp->curlft.add_time) - now;
183 		if (tmo <= 0)
184 			goto expired;
185 		if (tmo < next)
186 			next = tmo;
187 	}
188 	if (xp->lft.soft_add_expires_seconds) {
189 		long tmo = xp->lft.soft_add_expires_seconds +
190 			xp->curlft.add_time - now;
191 		if (tmo <= 0) {
192 			warn = 1;
193 			tmo = XFRM_KM_TIMEOUT;
194 		}
195 		if (tmo < next)
196 			next = tmo;
197 	}
198 	if (xp->lft.soft_use_expires_seconds) {
199 		long tmo = xp->lft.soft_use_expires_seconds +
200 			(xp->curlft.use_time ? : xp->curlft.add_time) - now;
201 		if (tmo <= 0) {
202 			warn = 1;
203 			tmo = XFRM_KM_TIMEOUT;
204 		}
205 		if (tmo < next)
206 			next = tmo;
207 	}
208 
209 	if (warn)
210 		km_policy_expired(xp, dir, 0, 0);
211 	if (next != LONG_MAX &&
212 	    !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
213 		xfrm_pol_hold(xp);
214 
215 out:
216 	read_unlock(&xp->lock);
217 	xfrm_pol_put(xp);
218 	return;
219 
220 expired:
221 	read_unlock(&xp->lock);
222 	if (!xfrm_policy_delete(xp, dir))
223 		km_policy_expired(xp, dir, 1, 0);
224 	xfrm_pol_put(xp);
225 }
226 
227 
228 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
229  * SPD calls.
230  */
231 
232 struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp)
233 {
234 	struct xfrm_policy *policy;
235 
236 	policy = kzalloc(sizeof(struct xfrm_policy), gfp);
237 
238 	if (policy) {
239 		INIT_LIST_HEAD(&policy->walk.all);
240 		INIT_HLIST_NODE(&policy->bydst);
241 		INIT_HLIST_NODE(&policy->byidx);
242 		rwlock_init(&policy->lock);
243 		atomic_set(&policy->refcnt, 1);
244 		setup_timer(&policy->timer, xfrm_policy_timer,
245 				(unsigned long)policy);
246 	}
247 	return policy;
248 }
249 EXPORT_SYMBOL(xfrm_policy_alloc);
250 
251 /* Destroy xfrm_policy: descendant resources must be released to this moment. */
252 
253 void xfrm_policy_destroy(struct xfrm_policy *policy)
254 {
255 	BUG_ON(!policy->walk.dead);
256 
257 	BUG_ON(policy->bundles);
258 
259 	if (del_timer(&policy->timer))
260 		BUG();
261 
262 	security_xfrm_policy_free(policy->security);
263 	kfree(policy);
264 }
265 EXPORT_SYMBOL(xfrm_policy_destroy);
266 
267 static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
268 {
269 	struct dst_entry *dst;
270 
271 	while ((dst = policy->bundles) != NULL) {
272 		policy->bundles = dst->next;
273 		dst_free(dst);
274 	}
275 
276 	if (del_timer(&policy->timer))
277 		atomic_dec(&policy->refcnt);
278 
279 	if (atomic_read(&policy->refcnt) > 1)
280 		flow_cache_flush();
281 
282 	xfrm_pol_put(policy);
283 }
284 
285 static void xfrm_policy_gc_task(struct work_struct *work)
286 {
287 	struct xfrm_policy *policy;
288 	struct hlist_node *entry, *tmp;
289 	struct hlist_head gc_list;
290 
291 	spin_lock_bh(&xfrm_policy_gc_lock);
292 	gc_list.first = xfrm_policy_gc_list.first;
293 	INIT_HLIST_HEAD(&xfrm_policy_gc_list);
294 	spin_unlock_bh(&xfrm_policy_gc_lock);
295 
296 	hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst)
297 		xfrm_policy_gc_kill(policy);
298 }
299 
300 /* Rule must be locked. Release descentant resources, announce
301  * entry dead. The rule must be unlinked from lists to the moment.
302  */
303 
304 static void xfrm_policy_kill(struct xfrm_policy *policy)
305 {
306 	int dead;
307 
308 	write_lock_bh(&policy->lock);
309 	dead = policy->walk.dead;
310 	policy->walk.dead = 1;
311 	write_unlock_bh(&policy->lock);
312 
313 	if (unlikely(dead)) {
314 		WARN_ON(1);
315 		return;
316 	}
317 
318 	spin_lock(&xfrm_policy_gc_lock);
319 	hlist_add_head(&policy->bydst, &xfrm_policy_gc_list);
320 	spin_unlock(&xfrm_policy_gc_lock);
321 
322 	schedule_work(&xfrm_policy_gc_work);
323 }
324 
325 struct xfrm_policy_hash {
326 	struct hlist_head	*table;
327 	unsigned int		hmask;
328 };
329 
330 static struct hlist_head xfrm_policy_inexact[XFRM_POLICY_MAX*2];
331 static struct xfrm_policy_hash xfrm_policy_bydst[XFRM_POLICY_MAX*2] __read_mostly;
332 static struct hlist_head *xfrm_policy_byidx __read_mostly;
333 static unsigned int xfrm_idx_hmask __read_mostly;
334 static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
335 
336 static inline unsigned int idx_hash(u32 index)
337 {
338 	return __idx_hash(index, xfrm_idx_hmask);
339 }
340 
341 static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir)
342 {
343 	unsigned int hmask = xfrm_policy_bydst[dir].hmask;
344 	unsigned int hash = __sel_hash(sel, family, hmask);
345 
346 	return (hash == hmask + 1 ?
347 		&xfrm_policy_inexact[dir] :
348 		xfrm_policy_bydst[dir].table + hash);
349 }
350 
351 static struct hlist_head *policy_hash_direct(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir)
352 {
353 	unsigned int hmask = xfrm_policy_bydst[dir].hmask;
354 	unsigned int hash = __addr_hash(daddr, saddr, family, hmask);
355 
356 	return xfrm_policy_bydst[dir].table + hash;
357 }
358 
359 static void xfrm_dst_hash_transfer(struct hlist_head *list,
360 				   struct hlist_head *ndsttable,
361 				   unsigned int nhashmask)
362 {
363 	struct hlist_node *entry, *tmp, *entry0 = NULL;
364 	struct xfrm_policy *pol;
365 	unsigned int h0 = 0;
366 
367 redo:
368 	hlist_for_each_entry_safe(pol, entry, tmp, list, bydst) {
369 		unsigned int h;
370 
371 		h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
372 				pol->family, nhashmask);
373 		if (!entry0) {
374 			hlist_del(entry);
375 			hlist_add_head(&pol->bydst, ndsttable+h);
376 			h0 = h;
377 		} else {
378 			if (h != h0)
379 				continue;
380 			hlist_del(entry);
381 			hlist_add_after(entry0, &pol->bydst);
382 		}
383 		entry0 = entry;
384 	}
385 	if (!hlist_empty(list)) {
386 		entry0 = NULL;
387 		goto redo;
388 	}
389 }
390 
391 static void xfrm_idx_hash_transfer(struct hlist_head *list,
392 				   struct hlist_head *nidxtable,
393 				   unsigned int nhashmask)
394 {
395 	struct hlist_node *entry, *tmp;
396 	struct xfrm_policy *pol;
397 
398 	hlist_for_each_entry_safe(pol, entry, tmp, list, byidx) {
399 		unsigned int h;
400 
401 		h = __idx_hash(pol->index, nhashmask);
402 		hlist_add_head(&pol->byidx, nidxtable+h);
403 	}
404 }
405 
406 static unsigned long xfrm_new_hash_mask(unsigned int old_hmask)
407 {
408 	return ((old_hmask + 1) << 1) - 1;
409 }
410 
411 static void xfrm_bydst_resize(int dir)
412 {
413 	unsigned int hmask = xfrm_policy_bydst[dir].hmask;
414 	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
415 	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
416 	struct hlist_head *odst = xfrm_policy_bydst[dir].table;
417 	struct hlist_head *ndst = xfrm_hash_alloc(nsize);
418 	int i;
419 
420 	if (!ndst)
421 		return;
422 
423 	write_lock_bh(&xfrm_policy_lock);
424 
425 	for (i = hmask; i >= 0; i--)
426 		xfrm_dst_hash_transfer(odst + i, ndst, nhashmask);
427 
428 	xfrm_policy_bydst[dir].table = ndst;
429 	xfrm_policy_bydst[dir].hmask = nhashmask;
430 
431 	write_unlock_bh(&xfrm_policy_lock);
432 
433 	xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
434 }
435 
436 static void xfrm_byidx_resize(int total)
437 {
438 	unsigned int hmask = xfrm_idx_hmask;
439 	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
440 	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
441 	struct hlist_head *oidx = xfrm_policy_byidx;
442 	struct hlist_head *nidx = xfrm_hash_alloc(nsize);
443 	int i;
444 
445 	if (!nidx)
446 		return;
447 
448 	write_lock_bh(&xfrm_policy_lock);
449 
450 	for (i = hmask; i >= 0; i--)
451 		xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
452 
453 	xfrm_policy_byidx = nidx;
454 	xfrm_idx_hmask = nhashmask;
455 
456 	write_unlock_bh(&xfrm_policy_lock);
457 
458 	xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
459 }
460 
461 static inline int xfrm_bydst_should_resize(int dir, int *total)
462 {
463 	unsigned int cnt = xfrm_policy_count[dir];
464 	unsigned int hmask = xfrm_policy_bydst[dir].hmask;
465 
466 	if (total)
467 		*total += cnt;
468 
469 	if ((hmask + 1) < xfrm_policy_hashmax &&
470 	    cnt > hmask)
471 		return 1;
472 
473 	return 0;
474 }
475 
476 static inline int xfrm_byidx_should_resize(int total)
477 {
478 	unsigned int hmask = xfrm_idx_hmask;
479 
480 	if ((hmask + 1) < xfrm_policy_hashmax &&
481 	    total > hmask)
482 		return 1;
483 
484 	return 0;
485 }
486 
487 void xfrm_spd_getinfo(struct xfrmk_spdinfo *si)
488 {
489 	read_lock_bh(&xfrm_policy_lock);
490 	si->incnt = xfrm_policy_count[XFRM_POLICY_IN];
491 	si->outcnt = xfrm_policy_count[XFRM_POLICY_OUT];
492 	si->fwdcnt = xfrm_policy_count[XFRM_POLICY_FWD];
493 	si->inscnt = xfrm_policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
494 	si->outscnt = xfrm_policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
495 	si->fwdscnt = xfrm_policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
496 	si->spdhcnt = xfrm_idx_hmask;
497 	si->spdhmcnt = xfrm_policy_hashmax;
498 	read_unlock_bh(&xfrm_policy_lock);
499 }
500 EXPORT_SYMBOL(xfrm_spd_getinfo);
501 
502 static DEFINE_MUTEX(hash_resize_mutex);
503 static void xfrm_hash_resize(struct work_struct *__unused)
504 {
505 	int dir, total;
506 
507 	mutex_lock(&hash_resize_mutex);
508 
509 	total = 0;
510 	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
511 		if (xfrm_bydst_should_resize(dir, &total))
512 			xfrm_bydst_resize(dir);
513 	}
514 	if (xfrm_byidx_should_resize(total))
515 		xfrm_byidx_resize(total);
516 
517 	mutex_unlock(&hash_resize_mutex);
518 }
519 
520 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize);
521 
522 /* Generate new index... KAME seems to generate them ordered by cost
523  * of an absolute inpredictability of ordering of rules. This will not pass. */
524 static u32 xfrm_gen_index(u8 type, int dir)
525 {
526 	static u32 idx_generator;
527 
528 	for (;;) {
529 		struct hlist_node *entry;
530 		struct hlist_head *list;
531 		struct xfrm_policy *p;
532 		u32 idx;
533 		int found;
534 
535 		idx = (idx_generator | dir);
536 		idx_generator += 8;
537 		if (idx == 0)
538 			idx = 8;
539 		list = xfrm_policy_byidx + idx_hash(idx);
540 		found = 0;
541 		hlist_for_each_entry(p, entry, list, byidx) {
542 			if (p->index == idx) {
543 				found = 1;
544 				break;
545 			}
546 		}
547 		if (!found)
548 			return idx;
549 	}
550 }
551 
552 static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2)
553 {
554 	u32 *p1 = (u32 *) s1;
555 	u32 *p2 = (u32 *) s2;
556 	int len = sizeof(struct xfrm_selector) / sizeof(u32);
557 	int i;
558 
559 	for (i = 0; i < len; i++) {
560 		if (p1[i] != p2[i])
561 			return 1;
562 	}
563 
564 	return 0;
565 }
566 
567 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
568 {
569 	struct xfrm_policy *pol;
570 	struct xfrm_policy *delpol;
571 	struct hlist_head *chain;
572 	struct hlist_node *entry, *newpos;
573 	struct dst_entry *gc_list;
574 
575 	write_lock_bh(&xfrm_policy_lock);
576 	chain = policy_hash_bysel(&policy->selector, policy->family, dir);
577 	delpol = NULL;
578 	newpos = NULL;
579 	hlist_for_each_entry(pol, entry, chain, bydst) {
580 		if (pol->type == policy->type &&
581 		    !selector_cmp(&pol->selector, &policy->selector) &&
582 		    xfrm_sec_ctx_match(pol->security, policy->security) &&
583 		    !WARN_ON(delpol)) {
584 			if (excl) {
585 				write_unlock_bh(&xfrm_policy_lock);
586 				return -EEXIST;
587 			}
588 			delpol = pol;
589 			if (policy->priority > pol->priority)
590 				continue;
591 		} else if (policy->priority >= pol->priority) {
592 			newpos = &pol->bydst;
593 			continue;
594 		}
595 		if (delpol)
596 			break;
597 	}
598 	if (newpos)
599 		hlist_add_after(newpos, &policy->bydst);
600 	else
601 		hlist_add_head(&policy->bydst, chain);
602 	xfrm_pol_hold(policy);
603 	xfrm_policy_count[dir]++;
604 	atomic_inc(&flow_cache_genid);
605 	if (delpol) {
606 		hlist_del(&delpol->bydst);
607 		hlist_del(&delpol->byidx);
608 		list_del(&delpol->walk.all);
609 		xfrm_policy_count[dir]--;
610 	}
611 	policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir);
612 	hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index));
613 	policy->curlft.add_time = get_seconds();
614 	policy->curlft.use_time = 0;
615 	if (!mod_timer(&policy->timer, jiffies + HZ))
616 		xfrm_pol_hold(policy);
617 	list_add(&policy->walk.all, &xfrm_policy_all);
618 	write_unlock_bh(&xfrm_policy_lock);
619 
620 	if (delpol)
621 		xfrm_policy_kill(delpol);
622 	else if (xfrm_bydst_should_resize(dir, NULL))
623 		schedule_work(&xfrm_hash_work);
624 
625 	read_lock_bh(&xfrm_policy_lock);
626 	gc_list = NULL;
627 	entry = &policy->bydst;
628 	hlist_for_each_entry_continue(policy, entry, bydst) {
629 		struct dst_entry *dst;
630 
631 		write_lock(&policy->lock);
632 		dst = policy->bundles;
633 		if (dst) {
634 			struct dst_entry *tail = dst;
635 			while (tail->next)
636 				tail = tail->next;
637 			tail->next = gc_list;
638 			gc_list = dst;
639 
640 			policy->bundles = NULL;
641 		}
642 		write_unlock(&policy->lock);
643 	}
644 	read_unlock_bh(&xfrm_policy_lock);
645 
646 	while (gc_list) {
647 		struct dst_entry *dst = gc_list;
648 
649 		gc_list = dst->next;
650 		dst_free(dst);
651 	}
652 
653 	return 0;
654 }
655 EXPORT_SYMBOL(xfrm_policy_insert);
656 
657 struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir,
658 					  struct xfrm_selector *sel,
659 					  struct xfrm_sec_ctx *ctx, int delete,
660 					  int *err)
661 {
662 	struct xfrm_policy *pol, *ret;
663 	struct hlist_head *chain;
664 	struct hlist_node *entry;
665 
666 	*err = 0;
667 	write_lock_bh(&xfrm_policy_lock);
668 	chain = policy_hash_bysel(sel, sel->family, dir);
669 	ret = NULL;
670 	hlist_for_each_entry(pol, entry, chain, bydst) {
671 		if (pol->type == type &&
672 		    !selector_cmp(sel, &pol->selector) &&
673 		    xfrm_sec_ctx_match(ctx, pol->security)) {
674 			xfrm_pol_hold(pol);
675 			if (delete) {
676 				*err = security_xfrm_policy_delete(
677 								pol->security);
678 				if (*err) {
679 					write_unlock_bh(&xfrm_policy_lock);
680 					return pol;
681 				}
682 				hlist_del(&pol->bydst);
683 				hlist_del(&pol->byidx);
684 				list_del(&pol->walk.all);
685 				xfrm_policy_count[dir]--;
686 			}
687 			ret = pol;
688 			break;
689 		}
690 	}
691 	write_unlock_bh(&xfrm_policy_lock);
692 
693 	if (ret && delete) {
694 		atomic_inc(&flow_cache_genid);
695 		xfrm_policy_kill(ret);
696 	}
697 	return ret;
698 }
699 EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
700 
701 struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete,
702 				     int *err)
703 {
704 	struct xfrm_policy *pol, *ret;
705 	struct hlist_head *chain;
706 	struct hlist_node *entry;
707 
708 	*err = -ENOENT;
709 	if (xfrm_policy_id2dir(id) != dir)
710 		return NULL;
711 
712 	*err = 0;
713 	write_lock_bh(&xfrm_policy_lock);
714 	chain = xfrm_policy_byidx + idx_hash(id);
715 	ret = NULL;
716 	hlist_for_each_entry(pol, entry, chain, byidx) {
717 		if (pol->type == type && pol->index == id) {
718 			xfrm_pol_hold(pol);
719 			if (delete) {
720 				*err = security_xfrm_policy_delete(
721 								pol->security);
722 				if (*err) {
723 					write_unlock_bh(&xfrm_policy_lock);
724 					return pol;
725 				}
726 				hlist_del(&pol->bydst);
727 				hlist_del(&pol->byidx);
728 				list_del(&pol->walk.all);
729 				xfrm_policy_count[dir]--;
730 			}
731 			ret = pol;
732 			break;
733 		}
734 	}
735 	write_unlock_bh(&xfrm_policy_lock);
736 
737 	if (ret && delete) {
738 		atomic_inc(&flow_cache_genid);
739 		xfrm_policy_kill(ret);
740 	}
741 	return ret;
742 }
743 EXPORT_SYMBOL(xfrm_policy_byid);
744 
745 #ifdef CONFIG_SECURITY_NETWORK_XFRM
746 static inline int
747 xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info)
748 {
749 	int dir, err = 0;
750 
751 	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
752 		struct xfrm_policy *pol;
753 		struct hlist_node *entry;
754 		int i;
755 
756 		hlist_for_each_entry(pol, entry,
757 				     &xfrm_policy_inexact[dir], bydst) {
758 			if (pol->type != type)
759 				continue;
760 			err = security_xfrm_policy_delete(pol->security);
761 			if (err) {
762 				xfrm_audit_policy_delete(pol, 0,
763 							 audit_info->loginuid,
764 							 audit_info->sessionid,
765 							 audit_info->secid);
766 				return err;
767 			}
768 		}
769 		for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
770 			hlist_for_each_entry(pol, entry,
771 					     xfrm_policy_bydst[dir].table + i,
772 					     bydst) {
773 				if (pol->type != type)
774 					continue;
775 				err = security_xfrm_policy_delete(
776 								pol->security);
777 				if (err) {
778 					xfrm_audit_policy_delete(pol, 0,
779 							audit_info->loginuid,
780 							audit_info->sessionid,
781 							audit_info->secid);
782 					return err;
783 				}
784 			}
785 		}
786 	}
787 	return err;
788 }
789 #else
790 static inline int
791 xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info)
792 {
793 	return 0;
794 }
795 #endif
796 
797 int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info)
798 {
799 	int dir, err = 0;
800 
801 	write_lock_bh(&xfrm_policy_lock);
802 
803 	err = xfrm_policy_flush_secctx_check(type, audit_info);
804 	if (err)
805 		goto out;
806 
807 	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
808 		struct xfrm_policy *pol;
809 		struct hlist_node *entry;
810 		int i, killed;
811 
812 		killed = 0;
813 	again1:
814 		hlist_for_each_entry(pol, entry,
815 				     &xfrm_policy_inexact[dir], bydst) {
816 			if (pol->type != type)
817 				continue;
818 			hlist_del(&pol->bydst);
819 			hlist_del(&pol->byidx);
820 			write_unlock_bh(&xfrm_policy_lock);
821 
822 			xfrm_audit_policy_delete(pol, 1, audit_info->loginuid,
823 						 audit_info->sessionid,
824 						 audit_info->secid);
825 
826 			xfrm_policy_kill(pol);
827 			killed++;
828 
829 			write_lock_bh(&xfrm_policy_lock);
830 			goto again1;
831 		}
832 
833 		for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
834 	again2:
835 			hlist_for_each_entry(pol, entry,
836 					     xfrm_policy_bydst[dir].table + i,
837 					     bydst) {
838 				if (pol->type != type)
839 					continue;
840 				hlist_del(&pol->bydst);
841 				hlist_del(&pol->byidx);
842 				list_del(&pol->walk.all);
843 				write_unlock_bh(&xfrm_policy_lock);
844 
845 				xfrm_audit_policy_delete(pol, 1,
846 							 audit_info->loginuid,
847 							 audit_info->sessionid,
848 							 audit_info->secid);
849 				xfrm_policy_kill(pol);
850 				killed++;
851 
852 				write_lock_bh(&xfrm_policy_lock);
853 				goto again2;
854 			}
855 		}
856 
857 		xfrm_policy_count[dir] -= killed;
858 	}
859 	atomic_inc(&flow_cache_genid);
860 out:
861 	write_unlock_bh(&xfrm_policy_lock);
862 	return err;
863 }
864 EXPORT_SYMBOL(xfrm_policy_flush);
865 
866 int xfrm_policy_walk(struct xfrm_policy_walk *walk,
867 		     int (*func)(struct xfrm_policy *, int, int, void*),
868 		     void *data)
869 {
870 	struct xfrm_policy *pol;
871 	struct xfrm_policy_walk_entry *x;
872 	int error = 0;
873 
874 	if (walk->type >= XFRM_POLICY_TYPE_MAX &&
875 	    walk->type != XFRM_POLICY_TYPE_ANY)
876 		return -EINVAL;
877 
878 	if (list_empty(&walk->walk.all) && walk->seq != 0)
879 		return 0;
880 
881 	write_lock_bh(&xfrm_policy_lock);
882 	if (list_empty(&walk->walk.all))
883 		x = list_first_entry(&xfrm_policy_all, struct xfrm_policy_walk_entry, all);
884 	else
885 		x = list_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all);
886 	list_for_each_entry_from(x, &xfrm_policy_all, all) {
887 		if (x->dead)
888 			continue;
889 		pol = container_of(x, struct xfrm_policy, walk);
890 		if (walk->type != XFRM_POLICY_TYPE_ANY &&
891 		    walk->type != pol->type)
892 			continue;
893 		error = func(pol, xfrm_policy_id2dir(pol->index),
894 			     walk->seq, data);
895 		if (error) {
896 			list_move_tail(&walk->walk.all, &x->all);
897 			goto out;
898 		}
899 		walk->seq++;
900 	}
901 	if (walk->seq == 0) {
902 		error = -ENOENT;
903 		goto out;
904 	}
905 	list_del_init(&walk->walk.all);
906 out:
907 	write_unlock_bh(&xfrm_policy_lock);
908 	return error;
909 }
910 EXPORT_SYMBOL(xfrm_policy_walk);
911 
912 void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type)
913 {
914 	INIT_LIST_HEAD(&walk->walk.all);
915 	walk->walk.dead = 1;
916 	walk->type = type;
917 	walk->seq = 0;
918 }
919 EXPORT_SYMBOL(xfrm_policy_walk_init);
920 
921 void xfrm_policy_walk_done(struct xfrm_policy_walk *walk)
922 {
923 	if (list_empty(&walk->walk.all))
924 		return;
925 
926 	write_lock_bh(&xfrm_policy_lock);
927 	list_del(&walk->walk.all);
928 	write_unlock_bh(&xfrm_policy_lock);
929 }
930 EXPORT_SYMBOL(xfrm_policy_walk_done);
931 
932 /*
933  * Find policy to apply to this flow.
934  *
935  * Returns 0 if policy found, else an -errno.
936  */
937 static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl,
938 			     u8 type, u16 family, int dir)
939 {
940 	struct xfrm_selector *sel = &pol->selector;
941 	int match, ret = -ESRCH;
942 
943 	if (pol->family != family ||
944 	    pol->type != type)
945 		return ret;
946 
947 	match = xfrm_selector_match(sel, fl, family);
948 	if (match)
949 		ret = security_xfrm_policy_lookup(pol->security, fl->secid,
950 						  dir);
951 
952 	return ret;
953 }
954 
955 static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl,
956 						     u16 family, u8 dir)
957 {
958 	int err;
959 	struct xfrm_policy *pol, *ret;
960 	xfrm_address_t *daddr, *saddr;
961 	struct hlist_node *entry;
962 	struct hlist_head *chain;
963 	u32 priority = ~0U;
964 
965 	daddr = xfrm_flowi_daddr(fl, family);
966 	saddr = xfrm_flowi_saddr(fl, family);
967 	if (unlikely(!daddr || !saddr))
968 		return NULL;
969 
970 	read_lock_bh(&xfrm_policy_lock);
971 	chain = policy_hash_direct(daddr, saddr, family, dir);
972 	ret = NULL;
973 	hlist_for_each_entry(pol, entry, chain, bydst) {
974 		err = xfrm_policy_match(pol, fl, type, family, dir);
975 		if (err) {
976 			if (err == -ESRCH)
977 				continue;
978 			else {
979 				ret = ERR_PTR(err);
980 				goto fail;
981 			}
982 		} else {
983 			ret = pol;
984 			priority = ret->priority;
985 			break;
986 		}
987 	}
988 	chain = &xfrm_policy_inexact[dir];
989 	hlist_for_each_entry(pol, entry, chain, bydst) {
990 		err = xfrm_policy_match(pol, fl, type, family, dir);
991 		if (err) {
992 			if (err == -ESRCH)
993 				continue;
994 			else {
995 				ret = ERR_PTR(err);
996 				goto fail;
997 			}
998 		} else if (pol->priority < priority) {
999 			ret = pol;
1000 			break;
1001 		}
1002 	}
1003 	if (ret)
1004 		xfrm_pol_hold(ret);
1005 fail:
1006 	read_unlock_bh(&xfrm_policy_lock);
1007 
1008 	return ret;
1009 }
1010 
1011 static int xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
1012 			       void **objp, atomic_t **obj_refp)
1013 {
1014 	struct xfrm_policy *pol;
1015 	int err = 0;
1016 
1017 #ifdef CONFIG_XFRM_SUB_POLICY
1018 	pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir);
1019 	if (IS_ERR(pol)) {
1020 		err = PTR_ERR(pol);
1021 		pol = NULL;
1022 	}
1023 	if (pol || err)
1024 		goto end;
1025 #endif
1026 	pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir);
1027 	if (IS_ERR(pol)) {
1028 		err = PTR_ERR(pol);
1029 		pol = NULL;
1030 	}
1031 #ifdef CONFIG_XFRM_SUB_POLICY
1032 end:
1033 #endif
1034 	if ((*objp = (void *) pol) != NULL)
1035 		*obj_refp = &pol->refcnt;
1036 	return err;
1037 }
1038 
1039 static inline int policy_to_flow_dir(int dir)
1040 {
1041 	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
1042 	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
1043 	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
1044 		return dir;
1045 	switch (dir) {
1046 	default:
1047 	case XFRM_POLICY_IN:
1048 		return FLOW_DIR_IN;
1049 	case XFRM_POLICY_OUT:
1050 		return FLOW_DIR_OUT;
1051 	case XFRM_POLICY_FWD:
1052 		return FLOW_DIR_FWD;
1053 	}
1054 }
1055 
1056 static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
1057 {
1058 	struct xfrm_policy *pol;
1059 
1060 	read_lock_bh(&xfrm_policy_lock);
1061 	if ((pol = sk->sk_policy[dir]) != NULL) {
1062 		int match = xfrm_selector_match(&pol->selector, fl,
1063 						sk->sk_family);
1064 		int err = 0;
1065 
1066 		if (match) {
1067 			err = security_xfrm_policy_lookup(pol->security,
1068 						      fl->secid,
1069 						      policy_to_flow_dir(dir));
1070 			if (!err)
1071 				xfrm_pol_hold(pol);
1072 			else if (err == -ESRCH)
1073 				pol = NULL;
1074 			else
1075 				pol = ERR_PTR(err);
1076 		} else
1077 			pol = NULL;
1078 	}
1079 	read_unlock_bh(&xfrm_policy_lock);
1080 	return pol;
1081 }
1082 
1083 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
1084 {
1085 	struct hlist_head *chain = policy_hash_bysel(&pol->selector,
1086 						     pol->family, dir);
1087 
1088 	list_add(&pol->walk.all, &xfrm_policy_all);
1089 	hlist_add_head(&pol->bydst, chain);
1090 	hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index));
1091 	xfrm_policy_count[dir]++;
1092 	xfrm_pol_hold(pol);
1093 
1094 	if (xfrm_bydst_should_resize(dir, NULL))
1095 		schedule_work(&xfrm_hash_work);
1096 }
1097 
1098 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
1099 						int dir)
1100 {
1101 	if (hlist_unhashed(&pol->bydst))
1102 		return NULL;
1103 
1104 	hlist_del(&pol->bydst);
1105 	hlist_del(&pol->byidx);
1106 	list_del(&pol->walk.all);
1107 	xfrm_policy_count[dir]--;
1108 
1109 	return pol;
1110 }
1111 
1112 int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
1113 {
1114 	write_lock_bh(&xfrm_policy_lock);
1115 	pol = __xfrm_policy_unlink(pol, dir);
1116 	write_unlock_bh(&xfrm_policy_lock);
1117 	if (pol) {
1118 		if (dir < XFRM_POLICY_MAX)
1119 			atomic_inc(&flow_cache_genid);
1120 		xfrm_policy_kill(pol);
1121 		return 0;
1122 	}
1123 	return -ENOENT;
1124 }
1125 EXPORT_SYMBOL(xfrm_policy_delete);
1126 
1127 int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
1128 {
1129 	struct xfrm_policy *old_pol;
1130 
1131 #ifdef CONFIG_XFRM_SUB_POLICY
1132 	if (pol && pol->type != XFRM_POLICY_TYPE_MAIN)
1133 		return -EINVAL;
1134 #endif
1135 
1136 	write_lock_bh(&xfrm_policy_lock);
1137 	old_pol = sk->sk_policy[dir];
1138 	sk->sk_policy[dir] = pol;
1139 	if (pol) {
1140 		pol->curlft.add_time = get_seconds();
1141 		pol->index = xfrm_gen_index(pol->type, XFRM_POLICY_MAX+dir);
1142 		__xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
1143 	}
1144 	if (old_pol)
1145 		__xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
1146 	write_unlock_bh(&xfrm_policy_lock);
1147 
1148 	if (old_pol) {
1149 		xfrm_policy_kill(old_pol);
1150 	}
1151 	return 0;
1152 }
1153 
1154 static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
1155 {
1156 	struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC);
1157 
1158 	if (newp) {
1159 		newp->selector = old->selector;
1160 		if (security_xfrm_policy_clone(old->security,
1161 					       &newp->security)) {
1162 			kfree(newp);
1163 			return NULL;  /* ENOMEM */
1164 		}
1165 		newp->lft = old->lft;
1166 		newp->curlft = old->curlft;
1167 		newp->action = old->action;
1168 		newp->flags = old->flags;
1169 		newp->xfrm_nr = old->xfrm_nr;
1170 		newp->index = old->index;
1171 		newp->type = old->type;
1172 		memcpy(newp->xfrm_vec, old->xfrm_vec,
1173 		       newp->xfrm_nr*sizeof(struct xfrm_tmpl));
1174 		write_lock_bh(&xfrm_policy_lock);
1175 		__xfrm_policy_link(newp, XFRM_POLICY_MAX+dir);
1176 		write_unlock_bh(&xfrm_policy_lock);
1177 		xfrm_pol_put(newp);
1178 	}
1179 	return newp;
1180 }
1181 
1182 int __xfrm_sk_clone_policy(struct sock *sk)
1183 {
1184 	struct xfrm_policy *p0 = sk->sk_policy[0],
1185 			   *p1 = sk->sk_policy[1];
1186 
1187 	sk->sk_policy[0] = sk->sk_policy[1] = NULL;
1188 	if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL)
1189 		return -ENOMEM;
1190 	if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL)
1191 		return -ENOMEM;
1192 	return 0;
1193 }
1194 
1195 static int
1196 xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote,
1197 	       unsigned short family)
1198 {
1199 	int err;
1200 	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1201 
1202 	if (unlikely(afinfo == NULL))
1203 		return -EINVAL;
1204 	err = afinfo->get_saddr(local, remote);
1205 	xfrm_policy_put_afinfo(afinfo);
1206 	return err;
1207 }
1208 
1209 /* Resolve list of templates for the flow, given policy. */
1210 
1211 static int
1212 xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl,
1213 		      struct xfrm_state **xfrm,
1214 		      unsigned short family)
1215 {
1216 	int nx;
1217 	int i, error;
1218 	xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
1219 	xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
1220 	xfrm_address_t tmp;
1221 
1222 	for (nx=0, i = 0; i < policy->xfrm_nr; i++) {
1223 		struct xfrm_state *x;
1224 		xfrm_address_t *remote = daddr;
1225 		xfrm_address_t *local  = saddr;
1226 		struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
1227 
1228 		if (tmpl->mode == XFRM_MODE_TUNNEL ||
1229 		    tmpl->mode == XFRM_MODE_BEET) {
1230 			remote = &tmpl->id.daddr;
1231 			local = &tmpl->saddr;
1232 			family = tmpl->encap_family;
1233 			if (xfrm_addr_any(local, family)) {
1234 				error = xfrm_get_saddr(&tmp, remote, family);
1235 				if (error)
1236 					goto fail;
1237 				local = &tmp;
1238 			}
1239 		}
1240 
1241 		x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
1242 
1243 		if (x && x->km.state == XFRM_STATE_VALID) {
1244 			xfrm[nx++] = x;
1245 			daddr = remote;
1246 			saddr = local;
1247 			continue;
1248 		}
1249 		if (x) {
1250 			error = (x->km.state == XFRM_STATE_ERROR ?
1251 				 -EINVAL : -EAGAIN);
1252 			xfrm_state_put(x);
1253 		}
1254 		else if (error == -ESRCH)
1255 			error = -EAGAIN;
1256 
1257 		if (!tmpl->optional)
1258 			goto fail;
1259 	}
1260 	return nx;
1261 
1262 fail:
1263 	for (nx--; nx>=0; nx--)
1264 		xfrm_state_put(xfrm[nx]);
1265 	return error;
1266 }
1267 
1268 static int
1269 xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
1270 		  struct xfrm_state **xfrm,
1271 		  unsigned short family)
1272 {
1273 	struct xfrm_state *tp[XFRM_MAX_DEPTH];
1274 	struct xfrm_state **tpp = (npols > 1) ? tp : xfrm;
1275 	int cnx = 0;
1276 	int error;
1277 	int ret;
1278 	int i;
1279 
1280 	for (i = 0; i < npols; i++) {
1281 		if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) {
1282 			error = -ENOBUFS;
1283 			goto fail;
1284 		}
1285 
1286 		ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family);
1287 		if (ret < 0) {
1288 			error = ret;
1289 			goto fail;
1290 		} else
1291 			cnx += ret;
1292 	}
1293 
1294 	/* found states are sorted for outbound processing */
1295 	if (npols > 1)
1296 		xfrm_state_sort(xfrm, tpp, cnx, family);
1297 
1298 	return cnx;
1299 
1300  fail:
1301 	for (cnx--; cnx>=0; cnx--)
1302 		xfrm_state_put(tpp[cnx]);
1303 	return error;
1304 
1305 }
1306 
1307 /* Check that the bundle accepts the flow and its components are
1308  * still valid.
1309  */
1310 
1311 static struct dst_entry *
1312 xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family)
1313 {
1314 	struct dst_entry *x;
1315 	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1316 	if (unlikely(afinfo == NULL))
1317 		return ERR_PTR(-EINVAL);
1318 	x = afinfo->find_bundle(fl, policy);
1319 	xfrm_policy_put_afinfo(afinfo);
1320 	return x;
1321 }
1322 
1323 static inline int xfrm_get_tos(struct flowi *fl, int family)
1324 {
1325 	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1326 	int tos;
1327 
1328 	if (!afinfo)
1329 		return -EINVAL;
1330 
1331 	tos = afinfo->get_tos(fl);
1332 
1333 	xfrm_policy_put_afinfo(afinfo);
1334 
1335 	return tos;
1336 }
1337 
1338 static inline struct xfrm_dst *xfrm_alloc_dst(int family)
1339 {
1340 	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1341 	struct xfrm_dst *xdst;
1342 
1343 	if (!afinfo)
1344 		return ERR_PTR(-EINVAL);
1345 
1346 	xdst = dst_alloc(afinfo->dst_ops) ?: ERR_PTR(-ENOBUFS);
1347 
1348 	xfrm_policy_put_afinfo(afinfo);
1349 
1350 	return xdst;
1351 }
1352 
1353 static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
1354 				 int nfheader_len)
1355 {
1356 	struct xfrm_policy_afinfo *afinfo =
1357 		xfrm_policy_get_afinfo(dst->ops->family);
1358 	int err;
1359 
1360 	if (!afinfo)
1361 		return -EINVAL;
1362 
1363 	err = afinfo->init_path(path, dst, nfheader_len);
1364 
1365 	xfrm_policy_put_afinfo(afinfo);
1366 
1367 	return err;
1368 }
1369 
1370 static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
1371 {
1372 	struct xfrm_policy_afinfo *afinfo =
1373 		xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
1374 	int err;
1375 
1376 	if (!afinfo)
1377 		return -EINVAL;
1378 
1379 	err = afinfo->fill_dst(xdst, dev);
1380 
1381 	xfrm_policy_put_afinfo(afinfo);
1382 
1383 	return err;
1384 }
1385 
1386 /* Allocate chain of dst_entry's, attach known xfrm's, calculate
1387  * all the metrics... Shortly, bundle a bundle.
1388  */
1389 
1390 static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
1391 					    struct xfrm_state **xfrm, int nx,
1392 					    struct flowi *fl,
1393 					    struct dst_entry *dst)
1394 {
1395 	unsigned long now = jiffies;
1396 	struct net_device *dev;
1397 	struct dst_entry *dst_prev = NULL;
1398 	struct dst_entry *dst0 = NULL;
1399 	int i = 0;
1400 	int err;
1401 	int header_len = 0;
1402 	int nfheader_len = 0;
1403 	int trailer_len = 0;
1404 	int tos;
1405 	int family = policy->selector.family;
1406 	xfrm_address_t saddr, daddr;
1407 
1408 	xfrm_flowi_addr_get(fl, &saddr, &daddr, family);
1409 
1410 	tos = xfrm_get_tos(fl, family);
1411 	err = tos;
1412 	if (tos < 0)
1413 		goto put_states;
1414 
1415 	dst_hold(dst);
1416 
1417 	for (; i < nx; i++) {
1418 		struct xfrm_dst *xdst = xfrm_alloc_dst(family);
1419 		struct dst_entry *dst1 = &xdst->u.dst;
1420 
1421 		err = PTR_ERR(xdst);
1422 		if (IS_ERR(xdst)) {
1423 			dst_release(dst);
1424 			goto put_states;
1425 		}
1426 
1427 		if (!dst_prev)
1428 			dst0 = dst1;
1429 		else {
1430 			dst_prev->child = dst_clone(dst1);
1431 			dst1->flags |= DST_NOHASH;
1432 		}
1433 
1434 		xdst->route = dst;
1435 		memcpy(&dst1->metrics, &dst->metrics, sizeof(dst->metrics));
1436 
1437 		if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
1438 			family = xfrm[i]->props.family;
1439 			dst = xfrm_dst_lookup(xfrm[i], tos, &saddr, &daddr,
1440 					      family);
1441 			err = PTR_ERR(dst);
1442 			if (IS_ERR(dst))
1443 				goto put_states;
1444 		} else
1445 			dst_hold(dst);
1446 
1447 		dst1->xfrm = xfrm[i];
1448 		xdst->genid = xfrm[i]->genid;
1449 
1450 		dst1->obsolete = -1;
1451 		dst1->flags |= DST_HOST;
1452 		dst1->lastuse = now;
1453 
1454 		dst1->input = dst_discard;
1455 		dst1->output = xfrm[i]->outer_mode->afinfo->output;
1456 
1457 		dst1->next = dst_prev;
1458 		dst_prev = dst1;
1459 
1460 		header_len += xfrm[i]->props.header_len;
1461 		if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT)
1462 			nfheader_len += xfrm[i]->props.header_len;
1463 		trailer_len += xfrm[i]->props.trailer_len;
1464 	}
1465 
1466 	dst_prev->child = dst;
1467 	dst0->path = dst;
1468 
1469 	err = -ENODEV;
1470 	dev = dst->dev;
1471 	if (!dev)
1472 		goto free_dst;
1473 
1474 	/* Copy neighbout for reachability confirmation */
1475 	dst0->neighbour = neigh_clone(dst->neighbour);
1476 
1477 	xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len);
1478 	xfrm_init_pmtu(dst_prev);
1479 
1480 	for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
1481 		struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;
1482 
1483 		err = xfrm_fill_dst(xdst, dev);
1484 		if (err)
1485 			goto free_dst;
1486 
1487 		dst_prev->header_len = header_len;
1488 		dst_prev->trailer_len = trailer_len;
1489 		header_len -= xdst->u.dst.xfrm->props.header_len;
1490 		trailer_len -= xdst->u.dst.xfrm->props.trailer_len;
1491 	}
1492 
1493 out:
1494 	return dst0;
1495 
1496 put_states:
1497 	for (; i < nx; i++)
1498 		xfrm_state_put(xfrm[i]);
1499 free_dst:
1500 	if (dst0)
1501 		dst_free(dst0);
1502 	dst0 = ERR_PTR(err);
1503 	goto out;
1504 }
1505 
1506 static int inline
1507 xfrm_dst_alloc_copy(void **target, void *src, int size)
1508 {
1509 	if (!*target) {
1510 		*target = kmalloc(size, GFP_ATOMIC);
1511 		if (!*target)
1512 			return -ENOMEM;
1513 	}
1514 	memcpy(*target, src, size);
1515 	return 0;
1516 }
1517 
1518 static int inline
1519 xfrm_dst_update_parent(struct dst_entry *dst, struct xfrm_selector *sel)
1520 {
1521 #ifdef CONFIG_XFRM_SUB_POLICY
1522 	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1523 	return xfrm_dst_alloc_copy((void **)&(xdst->partner),
1524 				   sel, sizeof(*sel));
1525 #else
1526 	return 0;
1527 #endif
1528 }
1529 
1530 static int inline
1531 xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl)
1532 {
1533 #ifdef CONFIG_XFRM_SUB_POLICY
1534 	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1535 	return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl));
1536 #else
1537 	return 0;
1538 #endif
1539 }
1540 
1541 static int stale_bundle(struct dst_entry *dst);
1542 
1543 /* Main function: finds/creates a bundle for given flow.
1544  *
1545  * At the moment we eat a raw IP route. Mostly to speed up lookups
1546  * on interfaces with disabled IPsec.
1547  */
1548 int __xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
1549 		  struct sock *sk, int flags)
1550 {
1551 	struct xfrm_policy *policy;
1552 	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1553 	int npols;
1554 	int pol_dead;
1555 	int xfrm_nr;
1556 	int pi;
1557 	struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
1558 	struct dst_entry *dst, *dst_orig = *dst_p;
1559 	int nx = 0;
1560 	int err;
1561 	u32 genid;
1562 	u16 family;
1563 	u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
1564 
1565 restart:
1566 	genid = atomic_read(&flow_cache_genid);
1567 	policy = NULL;
1568 	for (pi = 0; pi < ARRAY_SIZE(pols); pi++)
1569 		pols[pi] = NULL;
1570 	npols = 0;
1571 	pol_dead = 0;
1572 	xfrm_nr = 0;
1573 
1574 	if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
1575 		policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
1576 		err = PTR_ERR(policy);
1577 		if (IS_ERR(policy)) {
1578 			XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR);
1579 			goto dropdst;
1580 		}
1581 	}
1582 
1583 	if (!policy) {
1584 		/* To accelerate a bit...  */
1585 		if ((dst_orig->flags & DST_NOXFRM) ||
1586 		    !xfrm_policy_count[XFRM_POLICY_OUT])
1587 			goto nopol;
1588 
1589 		policy = flow_cache_lookup(fl, dst_orig->ops->family,
1590 					   dir, xfrm_policy_lookup);
1591 		err = PTR_ERR(policy);
1592 		if (IS_ERR(policy)) {
1593 			XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR);
1594 			goto dropdst;
1595 		}
1596 	}
1597 
1598 	if (!policy)
1599 		goto nopol;
1600 
1601 	family = dst_orig->ops->family;
1602 	pols[0] = policy;
1603 	npols ++;
1604 	xfrm_nr += pols[0]->xfrm_nr;
1605 
1606 	err = -ENOENT;
1607 	if ((flags & XFRM_LOOKUP_ICMP) && !(policy->flags & XFRM_POLICY_ICMP))
1608 		goto error;
1609 
1610 	policy->curlft.use_time = get_seconds();
1611 
1612 	switch (policy->action) {
1613 	default:
1614 	case XFRM_POLICY_BLOCK:
1615 		/* Prohibit the flow */
1616 		XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK);
1617 		err = -EPERM;
1618 		goto error;
1619 
1620 	case XFRM_POLICY_ALLOW:
1621 #ifndef CONFIG_XFRM_SUB_POLICY
1622 		if (policy->xfrm_nr == 0) {
1623 			/* Flow passes not transformed. */
1624 			xfrm_pol_put(policy);
1625 			return 0;
1626 		}
1627 #endif
1628 
1629 		/* Try to find matching bundle.
1630 		 *
1631 		 * LATER: help from flow cache. It is optional, this
1632 		 * is required only for output policy.
1633 		 */
1634 		dst = xfrm_find_bundle(fl, policy, family);
1635 		if (IS_ERR(dst)) {
1636 			XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1637 			err = PTR_ERR(dst);
1638 			goto error;
1639 		}
1640 
1641 		if (dst)
1642 			break;
1643 
1644 #ifdef CONFIG_XFRM_SUB_POLICY
1645 		if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1646 			pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
1647 							    fl, family,
1648 							    XFRM_POLICY_OUT);
1649 			if (pols[1]) {
1650 				if (IS_ERR(pols[1])) {
1651 					XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR);
1652 					err = PTR_ERR(pols[1]);
1653 					goto error;
1654 				}
1655 				if (pols[1]->action == XFRM_POLICY_BLOCK) {
1656 					XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK);
1657 					err = -EPERM;
1658 					goto error;
1659 				}
1660 				npols ++;
1661 				xfrm_nr += pols[1]->xfrm_nr;
1662 			}
1663 		}
1664 
1665 		/*
1666 		 * Because neither flowi nor bundle information knows about
1667 		 * transformation template size. On more than one policy usage
1668 		 * we can realize whether all of them is bypass or not after
1669 		 * they are searched. See above not-transformed bypass
1670 		 * is surrounded by non-sub policy configuration, too.
1671 		 */
1672 		if (xfrm_nr == 0) {
1673 			/* Flow passes not transformed. */
1674 			xfrm_pols_put(pols, npols);
1675 			return 0;
1676 		}
1677 
1678 #endif
1679 		nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
1680 
1681 		if (unlikely(nx<0)) {
1682 			err = nx;
1683 			if (err == -EAGAIN && sysctl_xfrm_larval_drop) {
1684 				/* EREMOTE tells the caller to generate
1685 				 * a one-shot blackhole route.
1686 				 */
1687 				XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES);
1688 				xfrm_pol_put(policy);
1689 				return -EREMOTE;
1690 			}
1691 			if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) {
1692 				DECLARE_WAITQUEUE(wait, current);
1693 
1694 				add_wait_queue(&km_waitq, &wait);
1695 				set_current_state(TASK_INTERRUPTIBLE);
1696 				schedule();
1697 				set_current_state(TASK_RUNNING);
1698 				remove_wait_queue(&km_waitq, &wait);
1699 
1700 				nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
1701 
1702 				if (nx == -EAGAIN && signal_pending(current)) {
1703 					XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES);
1704 					err = -ERESTART;
1705 					goto error;
1706 				}
1707 				if (nx == -EAGAIN ||
1708 				    genid != atomic_read(&flow_cache_genid)) {
1709 					xfrm_pols_put(pols, npols);
1710 					goto restart;
1711 				}
1712 				err = nx;
1713 			}
1714 			if (err < 0) {
1715 				XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES);
1716 				goto error;
1717 			}
1718 		}
1719 		if (nx == 0) {
1720 			/* Flow passes not transformed. */
1721 			xfrm_pols_put(pols, npols);
1722 			return 0;
1723 		}
1724 
1725 		dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig);
1726 		err = PTR_ERR(dst);
1727 		if (IS_ERR(dst)) {
1728 			XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLEGENERROR);
1729 			goto error;
1730 		}
1731 
1732 		for (pi = 0; pi < npols; pi++) {
1733 			read_lock_bh(&pols[pi]->lock);
1734 			pol_dead |= pols[pi]->walk.dead;
1735 			read_unlock_bh(&pols[pi]->lock);
1736 		}
1737 
1738 		write_lock_bh(&policy->lock);
1739 		if (unlikely(pol_dead || stale_bundle(dst))) {
1740 			/* Wow! While we worked on resolving, this
1741 			 * policy has gone. Retry. It is not paranoia,
1742 			 * we just cannot enlist new bundle to dead object.
1743 			 * We can't enlist stable bundles either.
1744 			 */
1745 			write_unlock_bh(&policy->lock);
1746 			dst_free(dst);
1747 
1748 			if (pol_dead)
1749 				XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLDEAD);
1750 			else
1751 				XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1752 			err = -EHOSTUNREACH;
1753 			goto error;
1754 		}
1755 
1756 		if (npols > 1)
1757 			err = xfrm_dst_update_parent(dst, &pols[1]->selector);
1758 		else
1759 			err = xfrm_dst_update_origin(dst, fl);
1760 		if (unlikely(err)) {
1761 			write_unlock_bh(&policy->lock);
1762 			dst_free(dst);
1763 			XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1764 			goto error;
1765 		}
1766 
1767 		dst->next = policy->bundles;
1768 		policy->bundles = dst;
1769 		dst_hold(dst);
1770 		write_unlock_bh(&policy->lock);
1771 	}
1772 	*dst_p = dst;
1773 	dst_release(dst_orig);
1774 	xfrm_pols_put(pols, npols);
1775 	return 0;
1776 
1777 error:
1778 	xfrm_pols_put(pols, npols);
1779 dropdst:
1780 	dst_release(dst_orig);
1781 	*dst_p = NULL;
1782 	return err;
1783 
1784 nopol:
1785 	err = -ENOENT;
1786 	if (flags & XFRM_LOOKUP_ICMP)
1787 		goto dropdst;
1788 	return 0;
1789 }
1790 EXPORT_SYMBOL(__xfrm_lookup);
1791 
1792 int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
1793 		struct sock *sk, int flags)
1794 {
1795 	int err = __xfrm_lookup(dst_p, fl, sk, flags);
1796 
1797 	if (err == -EREMOTE) {
1798 		dst_release(*dst_p);
1799 		*dst_p = NULL;
1800 		err = -EAGAIN;
1801 	}
1802 
1803 	return err;
1804 }
1805 EXPORT_SYMBOL(xfrm_lookup);
1806 
1807 static inline int
1808 xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl)
1809 {
1810 	struct xfrm_state *x;
1811 
1812 	if (!skb->sp || idx < 0 || idx >= skb->sp->len)
1813 		return 0;
1814 	x = skb->sp->xvec[idx];
1815 	if (!x->type->reject)
1816 		return 0;
1817 	return x->type->reject(x, skb, fl);
1818 }
1819 
1820 /* When skb is transformed back to its "native" form, we have to
1821  * check policy restrictions. At the moment we make this in maximally
1822  * stupid way. Shame on me. :-) Of course, connected sockets must
1823  * have policy cached at them.
1824  */
1825 
1826 static inline int
1827 xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x,
1828 	      unsigned short family)
1829 {
1830 	if (xfrm_state_kern(x))
1831 		return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family);
1832 	return	x->id.proto == tmpl->id.proto &&
1833 		(x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
1834 		(x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
1835 		x->props.mode == tmpl->mode &&
1836 		(tmpl->allalgs || (tmpl->aalgos & (1<<x->props.aalgo)) ||
1837 		 !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) &&
1838 		!(x->props.mode != XFRM_MODE_TRANSPORT &&
1839 		  xfrm_state_addr_cmp(tmpl, x, family));
1840 }
1841 
1842 /*
1843  * 0 or more than 0 is returned when validation is succeeded (either bypass
1844  * because of optional transport mode, or next index of the mathced secpath
1845  * state with the template.
1846  * -1 is returned when no matching template is found.
1847  * Otherwise "-2 - errored_index" is returned.
1848  */
1849 static inline int
1850 xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
1851 	       unsigned short family)
1852 {
1853 	int idx = start;
1854 
1855 	if (tmpl->optional) {
1856 		if (tmpl->mode == XFRM_MODE_TRANSPORT)
1857 			return start;
1858 	} else
1859 		start = -1;
1860 	for (; idx < sp->len; idx++) {
1861 		if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
1862 			return ++idx;
1863 		if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
1864 			if (start == -1)
1865 				start = -2-idx;
1866 			break;
1867 		}
1868 	}
1869 	return start;
1870 }
1871 
1872 int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
1873 			  unsigned int family, int reverse)
1874 {
1875 	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1876 	int err;
1877 
1878 	if (unlikely(afinfo == NULL))
1879 		return -EAFNOSUPPORT;
1880 
1881 	afinfo->decode_session(skb, fl, reverse);
1882 	err = security_xfrm_decode_session(skb, &fl->secid);
1883 	xfrm_policy_put_afinfo(afinfo);
1884 	return err;
1885 }
1886 EXPORT_SYMBOL(__xfrm_decode_session);
1887 
1888 static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp)
1889 {
1890 	for (; k < sp->len; k++) {
1891 		if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
1892 			*idxp = k;
1893 			return 1;
1894 		}
1895 	}
1896 
1897 	return 0;
1898 }
1899 
1900 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
1901 			unsigned short family)
1902 {
1903 	struct xfrm_policy *pol;
1904 	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1905 	int npols = 0;
1906 	int xfrm_nr;
1907 	int pi;
1908 	int reverse;
1909 	struct flowi fl;
1910 	u8 fl_dir;
1911 	int xerr_idx = -1;
1912 
1913 	reverse = dir & ~XFRM_POLICY_MASK;
1914 	dir &= XFRM_POLICY_MASK;
1915 	fl_dir = policy_to_flow_dir(dir);
1916 
1917 	if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
1918 		XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR);
1919 		return 0;
1920 	}
1921 
1922 	nf_nat_decode_session(skb, &fl, family);
1923 
1924 	/* First, check used SA against their selectors. */
1925 	if (skb->sp) {
1926 		int i;
1927 
1928 		for (i=skb->sp->len-1; i>=0; i--) {
1929 			struct xfrm_state *x = skb->sp->xvec[i];
1930 			if (!xfrm_selector_match(&x->sel, &fl, family)) {
1931 				XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMISMATCH);
1932 				return 0;
1933 			}
1934 		}
1935 	}
1936 
1937 	pol = NULL;
1938 	if (sk && sk->sk_policy[dir]) {
1939 		pol = xfrm_sk_policy_lookup(sk, dir, &fl);
1940 		if (IS_ERR(pol)) {
1941 			XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
1942 			return 0;
1943 		}
1944 	}
1945 
1946 	if (!pol)
1947 		pol = flow_cache_lookup(&fl, family, fl_dir,
1948 					xfrm_policy_lookup);
1949 
1950 	if (IS_ERR(pol)) {
1951 		XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
1952 		return 0;
1953 	}
1954 
1955 	if (!pol) {
1956 		if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
1957 			xfrm_secpath_reject(xerr_idx, skb, &fl);
1958 			XFRM_INC_STATS(LINUX_MIB_XFRMINNOPOLS);
1959 			return 0;
1960 		}
1961 		return 1;
1962 	}
1963 
1964 	pol->curlft.use_time = get_seconds();
1965 
1966 	pols[0] = pol;
1967 	npols ++;
1968 #ifdef CONFIG_XFRM_SUB_POLICY
1969 	if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1970 		pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
1971 						    &fl, family,
1972 						    XFRM_POLICY_IN);
1973 		if (pols[1]) {
1974 			if (IS_ERR(pols[1])) {
1975 				XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
1976 				return 0;
1977 			}
1978 			pols[1]->curlft.use_time = get_seconds();
1979 			npols ++;
1980 		}
1981 	}
1982 #endif
1983 
1984 	if (pol->action == XFRM_POLICY_ALLOW) {
1985 		struct sec_path *sp;
1986 		static struct sec_path dummy;
1987 		struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
1988 		struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];
1989 		struct xfrm_tmpl **tpp = tp;
1990 		int ti = 0;
1991 		int i, k;
1992 
1993 		if ((sp = skb->sp) == NULL)
1994 			sp = &dummy;
1995 
1996 		for (pi = 0; pi < npols; pi++) {
1997 			if (pols[pi] != pol &&
1998 			    pols[pi]->action != XFRM_POLICY_ALLOW) {
1999 				XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK);
2000 				goto reject;
2001 			}
2002 			if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) {
2003 				XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR);
2004 				goto reject_error;
2005 			}
2006 			for (i = 0; i < pols[pi]->xfrm_nr; i++)
2007 				tpp[ti++] = &pols[pi]->xfrm_vec[i];
2008 		}
2009 		xfrm_nr = ti;
2010 		if (npols > 1) {
2011 			xfrm_tmpl_sort(stp, tpp, xfrm_nr, family);
2012 			tpp = stp;
2013 		}
2014 
2015 		/* For each tunnel xfrm, find the first matching tmpl.
2016 		 * For each tmpl before that, find corresponding xfrm.
2017 		 * Order is _important_. Later we will implement
2018 		 * some barriers, but at the moment barriers
2019 		 * are implied between each two transformations.
2020 		 */
2021 		for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
2022 			k = xfrm_policy_ok(tpp[i], sp, k, family);
2023 			if (k < 0) {
2024 				if (k < -1)
2025 					/* "-2 - errored_index" returned */
2026 					xerr_idx = -(2+k);
2027 				XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH);
2028 				goto reject;
2029 			}
2030 		}
2031 
2032 		if (secpath_has_nontransport(sp, k, &xerr_idx)) {
2033 			XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH);
2034 			goto reject;
2035 		}
2036 
2037 		xfrm_pols_put(pols, npols);
2038 		return 1;
2039 	}
2040 	XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK);
2041 
2042 reject:
2043 	xfrm_secpath_reject(xerr_idx, skb, &fl);
2044 reject_error:
2045 	xfrm_pols_put(pols, npols);
2046 	return 0;
2047 }
2048 EXPORT_SYMBOL(__xfrm_policy_check);
2049 
2050 int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
2051 {
2052 	struct flowi fl;
2053 
2054 	if (xfrm_decode_session(skb, &fl, family) < 0) {
2055 		/* XXX: we should have something like FWDHDRERROR here. */
2056 		XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR);
2057 		return 0;
2058 	}
2059 
2060 	return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0;
2061 }
2062 EXPORT_SYMBOL(__xfrm_route_forward);
2063 
2064 /* Optimize later using cookies and generation ids. */
2065 
2066 static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
2067 {
2068 	/* Code (such as __xfrm4_bundle_create()) sets dst->obsolete
2069 	 * to "-1" to force all XFRM destinations to get validated by
2070 	 * dst_ops->check on every use.  We do this because when a
2071 	 * normal route referenced by an XFRM dst is obsoleted we do
2072 	 * not go looking around for all parent referencing XFRM dsts
2073 	 * so that we can invalidate them.  It is just too much work.
2074 	 * Instead we make the checks here on every use.  For example:
2075 	 *
2076 	 *	XFRM dst A --> IPv4 dst X
2077 	 *
2078 	 * X is the "xdst->route" of A (X is also the "dst->path" of A
2079 	 * in this example).  If X is marked obsolete, "A" will not
2080 	 * notice.  That's what we are validating here via the
2081 	 * stale_bundle() check.
2082 	 *
2083 	 * When a policy's bundle is pruned, we dst_free() the XFRM
2084 	 * dst which causes it's ->obsolete field to be set to a
2085 	 * positive non-zero integer.  If an XFRM dst has been pruned
2086 	 * like this, we want to force a new route lookup.
2087 	 */
2088 	if (dst->obsolete < 0 && !stale_bundle(dst))
2089 		return dst;
2090 
2091 	return NULL;
2092 }
2093 
2094 static int stale_bundle(struct dst_entry *dst)
2095 {
2096 	return !xfrm_bundle_ok(NULL, (struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0);
2097 }
2098 
2099 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
2100 {
2101 	while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
2102 		dst->dev = dev_net(dev)->loopback_dev;
2103 		dev_hold(dst->dev);
2104 		dev_put(dev);
2105 	}
2106 }
2107 EXPORT_SYMBOL(xfrm_dst_ifdown);
2108 
2109 static void xfrm_link_failure(struct sk_buff *skb)
2110 {
2111 	/* Impossible. Such dst must be popped before reaches point of failure. */
2112 	return;
2113 }
2114 
2115 static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
2116 {
2117 	if (dst) {
2118 		if (dst->obsolete) {
2119 			dst_release(dst);
2120 			dst = NULL;
2121 		}
2122 	}
2123 	return dst;
2124 }
2125 
2126 static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p)
2127 {
2128 	struct dst_entry *dst, **dstp;
2129 
2130 	write_lock(&pol->lock);
2131 	dstp = &pol->bundles;
2132 	while ((dst=*dstp) != NULL) {
2133 		if (func(dst)) {
2134 			*dstp = dst->next;
2135 			dst->next = *gc_list_p;
2136 			*gc_list_p = dst;
2137 		} else {
2138 			dstp = &dst->next;
2139 		}
2140 	}
2141 	write_unlock(&pol->lock);
2142 }
2143 
2144 static void xfrm_prune_bundles(int (*func)(struct dst_entry *))
2145 {
2146 	struct dst_entry *gc_list = NULL;
2147 	int dir;
2148 
2149 	read_lock_bh(&xfrm_policy_lock);
2150 	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2151 		struct xfrm_policy *pol;
2152 		struct hlist_node *entry;
2153 		struct hlist_head *table;
2154 		int i;
2155 
2156 		hlist_for_each_entry(pol, entry,
2157 				     &xfrm_policy_inexact[dir], bydst)
2158 			prune_one_bundle(pol, func, &gc_list);
2159 
2160 		table = xfrm_policy_bydst[dir].table;
2161 		for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
2162 			hlist_for_each_entry(pol, entry, table + i, bydst)
2163 				prune_one_bundle(pol, func, &gc_list);
2164 		}
2165 	}
2166 	read_unlock_bh(&xfrm_policy_lock);
2167 
2168 	while (gc_list) {
2169 		struct dst_entry *dst = gc_list;
2170 		gc_list = dst->next;
2171 		dst_free(dst);
2172 	}
2173 }
2174 
2175 static int unused_bundle(struct dst_entry *dst)
2176 {
2177 	return !atomic_read(&dst->__refcnt);
2178 }
2179 
2180 static void __xfrm_garbage_collect(void)
2181 {
2182 	xfrm_prune_bundles(unused_bundle);
2183 }
2184 
2185 static int xfrm_flush_bundles(void)
2186 {
2187 	xfrm_prune_bundles(stale_bundle);
2188 	return 0;
2189 }
2190 
2191 static void xfrm_init_pmtu(struct dst_entry *dst)
2192 {
2193 	do {
2194 		struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2195 		u32 pmtu, route_mtu_cached;
2196 
2197 		pmtu = dst_mtu(dst->child);
2198 		xdst->child_mtu_cached = pmtu;
2199 
2200 		pmtu = xfrm_state_mtu(dst->xfrm, pmtu);
2201 
2202 		route_mtu_cached = dst_mtu(xdst->route);
2203 		xdst->route_mtu_cached = route_mtu_cached;
2204 
2205 		if (pmtu > route_mtu_cached)
2206 			pmtu = route_mtu_cached;
2207 
2208 		dst->metrics[RTAX_MTU-1] = pmtu;
2209 	} while ((dst = dst->next));
2210 }
2211 
2212 /* Check that the bundle accepts the flow and its components are
2213  * still valid.
2214  */
2215 
2216 int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
2217 		struct flowi *fl, int family, int strict)
2218 {
2219 	struct dst_entry *dst = &first->u.dst;
2220 	struct xfrm_dst *last;
2221 	u32 mtu;
2222 
2223 	if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
2224 	    (dst->dev && !netif_running(dst->dev)))
2225 		return 0;
2226 #ifdef CONFIG_XFRM_SUB_POLICY
2227 	if (fl) {
2228 		if (first->origin && !flow_cache_uli_match(first->origin, fl))
2229 			return 0;
2230 		if (first->partner &&
2231 		    !xfrm_selector_match(first->partner, fl, family))
2232 			return 0;
2233 	}
2234 #endif
2235 
2236 	last = NULL;
2237 
2238 	do {
2239 		struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2240 
2241 		if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family))
2242 			return 0;
2243 		if (fl && pol &&
2244 		    !security_xfrm_state_pol_flow_match(dst->xfrm, pol, fl))
2245 			return 0;
2246 		if (dst->xfrm->km.state != XFRM_STATE_VALID)
2247 			return 0;
2248 		if (xdst->genid != dst->xfrm->genid)
2249 			return 0;
2250 
2251 		if (strict && fl &&
2252 		    !(dst->xfrm->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
2253 		    !xfrm_state_addr_flow_check(dst->xfrm, fl, family))
2254 			return 0;
2255 
2256 		mtu = dst_mtu(dst->child);
2257 		if (xdst->child_mtu_cached != mtu) {
2258 			last = xdst;
2259 			xdst->child_mtu_cached = mtu;
2260 		}
2261 
2262 		if (!dst_check(xdst->route, xdst->route_cookie))
2263 			return 0;
2264 		mtu = dst_mtu(xdst->route);
2265 		if (xdst->route_mtu_cached != mtu) {
2266 			last = xdst;
2267 			xdst->route_mtu_cached = mtu;
2268 		}
2269 
2270 		dst = dst->child;
2271 	} while (dst->xfrm);
2272 
2273 	if (likely(!last))
2274 		return 1;
2275 
2276 	mtu = last->child_mtu_cached;
2277 	for (;;) {
2278 		dst = &last->u.dst;
2279 
2280 		mtu = xfrm_state_mtu(dst->xfrm, mtu);
2281 		if (mtu > last->route_mtu_cached)
2282 			mtu = last->route_mtu_cached;
2283 		dst->metrics[RTAX_MTU-1] = mtu;
2284 
2285 		if (last == first)
2286 			break;
2287 
2288 		last = (struct xfrm_dst *)last->u.dst.next;
2289 		last->child_mtu_cached = mtu;
2290 	}
2291 
2292 	return 1;
2293 }
2294 
2295 EXPORT_SYMBOL(xfrm_bundle_ok);
2296 
2297 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
2298 {
2299 	int err = 0;
2300 	if (unlikely(afinfo == NULL))
2301 		return -EINVAL;
2302 	if (unlikely(afinfo->family >= NPROTO))
2303 		return -EAFNOSUPPORT;
2304 	write_lock_bh(&xfrm_policy_afinfo_lock);
2305 	if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
2306 		err = -ENOBUFS;
2307 	else {
2308 		struct dst_ops *dst_ops = afinfo->dst_ops;
2309 		if (likely(dst_ops->kmem_cachep == NULL))
2310 			dst_ops->kmem_cachep = xfrm_dst_cache;
2311 		if (likely(dst_ops->check == NULL))
2312 			dst_ops->check = xfrm_dst_check;
2313 		if (likely(dst_ops->negative_advice == NULL))
2314 			dst_ops->negative_advice = xfrm_negative_advice;
2315 		if (likely(dst_ops->link_failure == NULL))
2316 			dst_ops->link_failure = xfrm_link_failure;
2317 		if (likely(afinfo->garbage_collect == NULL))
2318 			afinfo->garbage_collect = __xfrm_garbage_collect;
2319 		xfrm_policy_afinfo[afinfo->family] = afinfo;
2320 	}
2321 	write_unlock_bh(&xfrm_policy_afinfo_lock);
2322 	return err;
2323 }
2324 EXPORT_SYMBOL(xfrm_policy_register_afinfo);
2325 
2326 int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
2327 {
2328 	int err = 0;
2329 	if (unlikely(afinfo == NULL))
2330 		return -EINVAL;
2331 	if (unlikely(afinfo->family >= NPROTO))
2332 		return -EAFNOSUPPORT;
2333 	write_lock_bh(&xfrm_policy_afinfo_lock);
2334 	if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
2335 		if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
2336 			err = -EINVAL;
2337 		else {
2338 			struct dst_ops *dst_ops = afinfo->dst_ops;
2339 			xfrm_policy_afinfo[afinfo->family] = NULL;
2340 			dst_ops->kmem_cachep = NULL;
2341 			dst_ops->check = NULL;
2342 			dst_ops->negative_advice = NULL;
2343 			dst_ops->link_failure = NULL;
2344 			afinfo->garbage_collect = NULL;
2345 		}
2346 	}
2347 	write_unlock_bh(&xfrm_policy_afinfo_lock);
2348 	return err;
2349 }
2350 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
2351 
2352 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
2353 {
2354 	struct xfrm_policy_afinfo *afinfo;
2355 	if (unlikely(family >= NPROTO))
2356 		return NULL;
2357 	read_lock(&xfrm_policy_afinfo_lock);
2358 	afinfo = xfrm_policy_afinfo[family];
2359 	if (unlikely(!afinfo))
2360 		read_unlock(&xfrm_policy_afinfo_lock);
2361 	return afinfo;
2362 }
2363 
2364 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
2365 {
2366 	read_unlock(&xfrm_policy_afinfo_lock);
2367 }
2368 
2369 static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
2370 {
2371 	struct net_device *dev = ptr;
2372 
2373 	if (!net_eq(dev_net(dev), &init_net))
2374 		return NOTIFY_DONE;
2375 
2376 	switch (event) {
2377 	case NETDEV_DOWN:
2378 		xfrm_flush_bundles();
2379 	}
2380 	return NOTIFY_DONE;
2381 }
2382 
2383 static struct notifier_block xfrm_dev_notifier = {
2384 	xfrm_dev_event,
2385 	NULL,
2386 	0
2387 };
2388 
2389 #ifdef CONFIG_XFRM_STATISTICS
2390 static int __init xfrm_statistics_init(void)
2391 {
2392 	if (snmp_mib_init((void **)xfrm_statistics,
2393 			  sizeof(struct linux_xfrm_mib)) < 0)
2394 		return -ENOMEM;
2395 	return 0;
2396 }
2397 #endif
2398 
2399 static void __init xfrm_policy_init(void)
2400 {
2401 	unsigned int hmask, sz;
2402 	int dir;
2403 
2404 	xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
2405 					   sizeof(struct xfrm_dst),
2406 					   0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2407 					   NULL);
2408 
2409 	hmask = 8 - 1;
2410 	sz = (hmask+1) * sizeof(struct hlist_head);
2411 
2412 	xfrm_policy_byidx = xfrm_hash_alloc(sz);
2413 	xfrm_idx_hmask = hmask;
2414 	if (!xfrm_policy_byidx)
2415 		panic("XFRM: failed to allocate byidx hash\n");
2416 
2417 	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2418 		struct xfrm_policy_hash *htab;
2419 
2420 		INIT_HLIST_HEAD(&xfrm_policy_inexact[dir]);
2421 
2422 		htab = &xfrm_policy_bydst[dir];
2423 		htab->table = xfrm_hash_alloc(sz);
2424 		htab->hmask = hmask;
2425 		if (!htab->table)
2426 			panic("XFRM: failed to allocate bydst hash\n");
2427 	}
2428 
2429 	INIT_LIST_HEAD(&xfrm_policy_all);
2430 	INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task);
2431 	register_netdevice_notifier(&xfrm_dev_notifier);
2432 }
2433 
2434 void __init xfrm_init(void)
2435 {
2436 #ifdef CONFIG_XFRM_STATISTICS
2437 	xfrm_statistics_init();
2438 #endif
2439 	xfrm_state_init();
2440 	xfrm_policy_init();
2441 	xfrm_input_init();
2442 #ifdef CONFIG_XFRM_STATISTICS
2443 	xfrm_proc_init();
2444 #endif
2445 }
2446 
2447 #ifdef CONFIG_AUDITSYSCALL
2448 static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
2449 					 struct audit_buffer *audit_buf)
2450 {
2451 	struct xfrm_sec_ctx *ctx = xp->security;
2452 	struct xfrm_selector *sel = &xp->selector;
2453 
2454 	if (ctx)
2455 		audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
2456 				 ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
2457 
2458 	switch(sel->family) {
2459 	case AF_INET:
2460 		audit_log_format(audit_buf, " src=" NIPQUAD_FMT,
2461 				 NIPQUAD(sel->saddr.a4));
2462 		if (sel->prefixlen_s != 32)
2463 			audit_log_format(audit_buf, " src_prefixlen=%d",
2464 					 sel->prefixlen_s);
2465 		audit_log_format(audit_buf, " dst=" NIPQUAD_FMT,
2466 				 NIPQUAD(sel->daddr.a4));
2467 		if (sel->prefixlen_d != 32)
2468 			audit_log_format(audit_buf, " dst_prefixlen=%d",
2469 					 sel->prefixlen_d);
2470 		break;
2471 	case AF_INET6:
2472 		audit_log_format(audit_buf, " src=" NIP6_FMT,
2473 				 NIP6(*(struct in6_addr *)sel->saddr.a6));
2474 		if (sel->prefixlen_s != 128)
2475 			audit_log_format(audit_buf, " src_prefixlen=%d",
2476 					 sel->prefixlen_s);
2477 		audit_log_format(audit_buf, " dst=" NIP6_FMT,
2478 				 NIP6(*(struct in6_addr *)sel->daddr.a6));
2479 		if (sel->prefixlen_d != 128)
2480 			audit_log_format(audit_buf, " dst_prefixlen=%d",
2481 					 sel->prefixlen_d);
2482 		break;
2483 	}
2484 }
2485 
2486 void xfrm_audit_policy_add(struct xfrm_policy *xp, int result,
2487 			   uid_t auid, u32 sessionid, u32 secid)
2488 {
2489 	struct audit_buffer *audit_buf;
2490 
2491 	audit_buf = xfrm_audit_start("SPD-add");
2492 	if (audit_buf == NULL)
2493 		return;
2494 	xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf);
2495 	audit_log_format(audit_buf, " res=%u", result);
2496 	xfrm_audit_common_policyinfo(xp, audit_buf);
2497 	audit_log_end(audit_buf);
2498 }
2499 EXPORT_SYMBOL_GPL(xfrm_audit_policy_add);
2500 
2501 void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
2502 			      uid_t auid, u32 sessionid, u32 secid)
2503 {
2504 	struct audit_buffer *audit_buf;
2505 
2506 	audit_buf = xfrm_audit_start("SPD-delete");
2507 	if (audit_buf == NULL)
2508 		return;
2509 	xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf);
2510 	audit_log_format(audit_buf, " res=%u", result);
2511 	xfrm_audit_common_policyinfo(xp, audit_buf);
2512 	audit_log_end(audit_buf);
2513 }
2514 EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);
2515 #endif
2516 
2517 #ifdef CONFIG_XFRM_MIGRATE
2518 static int xfrm_migrate_selector_match(struct xfrm_selector *sel_cmp,
2519 				       struct xfrm_selector *sel_tgt)
2520 {
2521 	if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
2522 		if (sel_tgt->family == sel_cmp->family &&
2523 		    xfrm_addr_cmp(&sel_tgt->daddr, &sel_cmp->daddr,
2524 				  sel_cmp->family) == 0 &&
2525 		    xfrm_addr_cmp(&sel_tgt->saddr, &sel_cmp->saddr,
2526 				  sel_cmp->family) == 0 &&
2527 		    sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&
2528 		    sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) {
2529 			return 1;
2530 		}
2531 	} else {
2532 		if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) {
2533 			return 1;
2534 		}
2535 	}
2536 	return 0;
2537 }
2538 
2539 static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel,
2540 						     u8 dir, u8 type)
2541 {
2542 	struct xfrm_policy *pol, *ret = NULL;
2543 	struct hlist_node *entry;
2544 	struct hlist_head *chain;
2545 	u32 priority = ~0U;
2546 
2547 	read_lock_bh(&xfrm_policy_lock);
2548 	chain = policy_hash_direct(&sel->daddr, &sel->saddr, sel->family, dir);
2549 	hlist_for_each_entry(pol, entry, chain, bydst) {
2550 		if (xfrm_migrate_selector_match(sel, &pol->selector) &&
2551 		    pol->type == type) {
2552 			ret = pol;
2553 			priority = ret->priority;
2554 			break;
2555 		}
2556 	}
2557 	chain = &xfrm_policy_inexact[dir];
2558 	hlist_for_each_entry(pol, entry, chain, bydst) {
2559 		if (xfrm_migrate_selector_match(sel, &pol->selector) &&
2560 		    pol->type == type &&
2561 		    pol->priority < priority) {
2562 			ret = pol;
2563 			break;
2564 		}
2565 	}
2566 
2567 	if (ret)
2568 		xfrm_pol_hold(ret);
2569 
2570 	read_unlock_bh(&xfrm_policy_lock);
2571 
2572 	return ret;
2573 }
2574 
2575 static int migrate_tmpl_match(struct xfrm_migrate *m, struct xfrm_tmpl *t)
2576 {
2577 	int match = 0;
2578 
2579 	if (t->mode == m->mode && t->id.proto == m->proto &&
2580 	    (m->reqid == 0 || t->reqid == m->reqid)) {
2581 		switch (t->mode) {
2582 		case XFRM_MODE_TUNNEL:
2583 		case XFRM_MODE_BEET:
2584 			if (xfrm_addr_cmp(&t->id.daddr, &m->old_daddr,
2585 					  m->old_family) == 0 &&
2586 			    xfrm_addr_cmp(&t->saddr, &m->old_saddr,
2587 					  m->old_family) == 0) {
2588 				match = 1;
2589 			}
2590 			break;
2591 		case XFRM_MODE_TRANSPORT:
2592 			/* in case of transport mode, template does not store
2593 			   any IP addresses, hence we just compare mode and
2594 			   protocol */
2595 			match = 1;
2596 			break;
2597 		default:
2598 			break;
2599 		}
2600 	}
2601 	return match;
2602 }
2603 
2604 /* update endpoint address(es) of template(s) */
2605 static int xfrm_policy_migrate(struct xfrm_policy *pol,
2606 			       struct xfrm_migrate *m, int num_migrate)
2607 {
2608 	struct xfrm_migrate *mp;
2609 	struct dst_entry *dst;
2610 	int i, j, n = 0;
2611 
2612 	write_lock_bh(&pol->lock);
2613 	if (unlikely(pol->walk.dead)) {
2614 		/* target policy has been deleted */
2615 		write_unlock_bh(&pol->lock);
2616 		return -ENOENT;
2617 	}
2618 
2619 	for (i = 0; i < pol->xfrm_nr; i++) {
2620 		for (j = 0, mp = m; j < num_migrate; j++, mp++) {
2621 			if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i]))
2622 				continue;
2623 			n++;
2624 			if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL &&
2625 			    pol->xfrm_vec[i].mode != XFRM_MODE_BEET)
2626 				continue;
2627 			/* update endpoints */
2628 			memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr,
2629 			       sizeof(pol->xfrm_vec[i].id.daddr));
2630 			memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr,
2631 			       sizeof(pol->xfrm_vec[i].saddr));
2632 			pol->xfrm_vec[i].encap_family = mp->new_family;
2633 			/* flush bundles */
2634 			while ((dst = pol->bundles) != NULL) {
2635 				pol->bundles = dst->next;
2636 				dst_free(dst);
2637 			}
2638 		}
2639 	}
2640 
2641 	write_unlock_bh(&pol->lock);
2642 
2643 	if (!n)
2644 		return -ENODATA;
2645 
2646 	return 0;
2647 }
2648 
2649 static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate)
2650 {
2651 	int i, j;
2652 
2653 	if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH)
2654 		return -EINVAL;
2655 
2656 	for (i = 0; i < num_migrate; i++) {
2657 		if ((xfrm_addr_cmp(&m[i].old_daddr, &m[i].new_daddr,
2658 				   m[i].old_family) == 0) &&
2659 		    (xfrm_addr_cmp(&m[i].old_saddr, &m[i].new_saddr,
2660 				   m[i].old_family) == 0))
2661 			return -EINVAL;
2662 		if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) ||
2663 		    xfrm_addr_any(&m[i].new_saddr, m[i].new_family))
2664 			return -EINVAL;
2665 
2666 		/* check if there is any duplicated entry */
2667 		for (j = i + 1; j < num_migrate; j++) {
2668 			if (!memcmp(&m[i].old_daddr, &m[j].old_daddr,
2669 				    sizeof(m[i].old_daddr)) &&
2670 			    !memcmp(&m[i].old_saddr, &m[j].old_saddr,
2671 				    sizeof(m[i].old_saddr)) &&
2672 			    m[i].proto == m[j].proto &&
2673 			    m[i].mode == m[j].mode &&
2674 			    m[i].reqid == m[j].reqid &&
2675 			    m[i].old_family == m[j].old_family)
2676 				return -EINVAL;
2677 		}
2678 	}
2679 
2680 	return 0;
2681 }
2682 
2683 int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
2684 		 struct xfrm_migrate *m, int num_migrate,
2685 		 struct xfrm_kmaddress *k)
2686 {
2687 	int i, err, nx_cur = 0, nx_new = 0;
2688 	struct xfrm_policy *pol = NULL;
2689 	struct xfrm_state *x, *xc;
2690 	struct xfrm_state *x_cur[XFRM_MAX_DEPTH];
2691 	struct xfrm_state *x_new[XFRM_MAX_DEPTH];
2692 	struct xfrm_migrate *mp;
2693 
2694 	if ((err = xfrm_migrate_check(m, num_migrate)) < 0)
2695 		goto out;
2696 
2697 	/* Stage 1 - find policy */
2698 	if ((pol = xfrm_migrate_policy_find(sel, dir, type)) == NULL) {
2699 		err = -ENOENT;
2700 		goto out;
2701 	}
2702 
2703 	/* Stage 2 - find and update state(s) */
2704 	for (i = 0, mp = m; i < num_migrate; i++, mp++) {
2705 		if ((x = xfrm_migrate_state_find(mp))) {
2706 			x_cur[nx_cur] = x;
2707 			nx_cur++;
2708 			if ((xc = xfrm_state_migrate(x, mp))) {
2709 				x_new[nx_new] = xc;
2710 				nx_new++;
2711 			} else {
2712 				err = -ENODATA;
2713 				goto restore_state;
2714 			}
2715 		}
2716 	}
2717 
2718 	/* Stage 3 - update policy */
2719 	if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0)
2720 		goto restore_state;
2721 
2722 	/* Stage 4 - delete old state(s) */
2723 	if (nx_cur) {
2724 		xfrm_states_put(x_cur, nx_cur);
2725 		xfrm_states_delete(x_cur, nx_cur);
2726 	}
2727 
2728 	/* Stage 5 - announce */
2729 	km_migrate(sel, dir, type, m, num_migrate, k);
2730 
2731 	xfrm_pol_put(pol);
2732 
2733 	return 0;
2734 out:
2735 	return err;
2736 
2737 restore_state:
2738 	if (pol)
2739 		xfrm_pol_put(pol);
2740 	if (nx_cur)
2741 		xfrm_states_put(x_cur, nx_cur);
2742 	if (nx_new)
2743 		xfrm_states_delete(x_new, nx_new);
2744 
2745 	return err;
2746 }
2747 EXPORT_SYMBOL(xfrm_migrate);
2748 #endif
2749