1 /* 2 * xfrm_policy.c 3 * 4 * Changes: 5 * Mitsuru KANDA @USAGI 6 * Kazunori MIYAZAWA @USAGI 7 * Kunihiro Ishiguro <kunihiro@ipinfusion.com> 8 * IPv6 support 9 * Kazunori MIYAZAWA @USAGI 10 * YOSHIFUJI Hideaki 11 * Split up af-specific portion 12 * Derek Atkins <derek@ihtfp.com> Add the post_input processor 13 * 14 */ 15 16 #include <linux/err.h> 17 #include <linux/slab.h> 18 #include <linux/kmod.h> 19 #include <linux/list.h> 20 #include <linux/spinlock.h> 21 #include <linux/workqueue.h> 22 #include <linux/notifier.h> 23 #include <linux/netdevice.h> 24 #include <linux/netfilter.h> 25 #include <linux/module.h> 26 #include <linux/cache.h> 27 #include <linux/audit.h> 28 #include <net/dst.h> 29 #include <net/flow.h> 30 #include <net/xfrm.h> 31 #include <net/ip.h> 32 #ifdef CONFIG_XFRM_STATISTICS 33 #include <net/snmp.h> 34 #endif 35 36 #include "xfrm_hash.h" 37 38 #define XFRM_QUEUE_TMO_MIN ((unsigned)(HZ/10)) 39 #define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ)) 40 #define XFRM_MAX_QUEUE_LEN 100 41 42 struct xfrm_flo { 43 struct dst_entry *dst_orig; 44 u8 flags; 45 }; 46 47 static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); 48 static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO] 49 __read_mostly; 50 51 static struct kmem_cache *xfrm_dst_cache __read_mostly; 52 53 static void xfrm_init_pmtu(struct dst_entry *dst); 54 static int stale_bundle(struct dst_entry *dst); 55 static int xfrm_bundle_ok(struct xfrm_dst *xdst); 56 static void xfrm_policy_queue_process(unsigned long arg); 57 58 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, 59 int dir); 60 61 static inline bool 62 __xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) 63 { 64 const struct flowi4 *fl4 = &fl->u.ip4; 65 66 return addr4_match(fl4->daddr, sel->daddr.a4, sel->prefixlen_d) && 67 addr4_match(fl4->saddr, sel->saddr.a4, sel->prefixlen_s) && 68 !((xfrm_flowi_dport(fl, &fl4->uli) ^ sel->dport) & sel->dport_mask) && 69 !((xfrm_flowi_sport(fl, &fl4->uli) ^ sel->sport) & sel->sport_mask) && 70 (fl4->flowi4_proto == sel->proto || !sel->proto) && 71 (fl4->flowi4_oif == sel->ifindex || !sel->ifindex); 72 } 73 74 static inline bool 75 __xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) 76 { 77 const struct flowi6 *fl6 = &fl->u.ip6; 78 79 return addr_match(&fl6->daddr, &sel->daddr, sel->prefixlen_d) && 80 addr_match(&fl6->saddr, &sel->saddr, sel->prefixlen_s) && 81 !((xfrm_flowi_dport(fl, &fl6->uli) ^ sel->dport) & sel->dport_mask) && 82 !((xfrm_flowi_sport(fl, &fl6->uli) ^ sel->sport) & sel->sport_mask) && 83 (fl6->flowi6_proto == sel->proto || !sel->proto) && 84 (fl6->flowi6_oif == sel->ifindex || !sel->ifindex); 85 } 86 87 bool xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl, 88 unsigned short family) 89 { 90 switch (family) { 91 case AF_INET: 92 return __xfrm4_selector_match(sel, fl); 93 case AF_INET6: 94 return __xfrm6_selector_match(sel, fl); 95 } 96 return false; 97 } 98 99 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) 100 { 101 struct xfrm_policy_afinfo *afinfo; 102 103 if (unlikely(family >= NPROTO)) 104 return NULL; 105 rcu_read_lock(); 106 afinfo = rcu_dereference(xfrm_policy_afinfo[family]); 107 if (unlikely(!afinfo)) 108 rcu_read_unlock(); 109 return afinfo; 110 } 111 112 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo) 113 { 114 rcu_read_unlock(); 115 } 116 117 static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, 118 const xfrm_address_t *saddr, 119 const xfrm_address_t *daddr, 120 int family) 121 { 122 struct xfrm_policy_afinfo *afinfo; 123 struct dst_entry *dst; 124 125 afinfo = xfrm_policy_get_afinfo(family); 126 if (unlikely(afinfo == NULL)) 127 return ERR_PTR(-EAFNOSUPPORT); 128 129 dst = afinfo->dst_lookup(net, tos, saddr, daddr); 130 131 xfrm_policy_put_afinfo(afinfo); 132 133 return dst; 134 } 135 136 static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos, 137 xfrm_address_t *prev_saddr, 138 xfrm_address_t *prev_daddr, 139 int family) 140 { 141 struct net *net = xs_net(x); 142 xfrm_address_t *saddr = &x->props.saddr; 143 xfrm_address_t *daddr = &x->id.daddr; 144 struct dst_entry *dst; 145 146 if (x->type->flags & XFRM_TYPE_LOCAL_COADDR) { 147 saddr = x->coaddr; 148 daddr = prev_daddr; 149 } 150 if (x->type->flags & XFRM_TYPE_REMOTE_COADDR) { 151 saddr = prev_saddr; 152 daddr = x->coaddr; 153 } 154 155 dst = __xfrm_dst_lookup(net, tos, saddr, daddr, family); 156 157 if (!IS_ERR(dst)) { 158 if (prev_saddr != saddr) 159 memcpy(prev_saddr, saddr, sizeof(*prev_saddr)); 160 if (prev_daddr != daddr) 161 memcpy(prev_daddr, daddr, sizeof(*prev_daddr)); 162 } 163 164 return dst; 165 } 166 167 static inline unsigned long make_jiffies(long secs) 168 { 169 if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ) 170 return MAX_SCHEDULE_TIMEOUT-1; 171 else 172 return secs*HZ; 173 } 174 175 static void xfrm_policy_timer(unsigned long data) 176 { 177 struct xfrm_policy *xp = (struct xfrm_policy *)data; 178 unsigned long now = get_seconds(); 179 long next = LONG_MAX; 180 int warn = 0; 181 int dir; 182 183 read_lock(&xp->lock); 184 185 if (unlikely(xp->walk.dead)) 186 goto out; 187 188 dir = xfrm_policy_id2dir(xp->index); 189 190 if (xp->lft.hard_add_expires_seconds) { 191 long tmo = xp->lft.hard_add_expires_seconds + 192 xp->curlft.add_time - now; 193 if (tmo <= 0) 194 goto expired; 195 if (tmo < next) 196 next = tmo; 197 } 198 if (xp->lft.hard_use_expires_seconds) { 199 long tmo = xp->lft.hard_use_expires_seconds + 200 (xp->curlft.use_time ? : xp->curlft.add_time) - now; 201 if (tmo <= 0) 202 goto expired; 203 if (tmo < next) 204 next = tmo; 205 } 206 if (xp->lft.soft_add_expires_seconds) { 207 long tmo = xp->lft.soft_add_expires_seconds + 208 xp->curlft.add_time - now; 209 if (tmo <= 0) { 210 warn = 1; 211 tmo = XFRM_KM_TIMEOUT; 212 } 213 if (tmo < next) 214 next = tmo; 215 } 216 if (xp->lft.soft_use_expires_seconds) { 217 long tmo = xp->lft.soft_use_expires_seconds + 218 (xp->curlft.use_time ? : xp->curlft.add_time) - now; 219 if (tmo <= 0) { 220 warn = 1; 221 tmo = XFRM_KM_TIMEOUT; 222 } 223 if (tmo < next) 224 next = tmo; 225 } 226 227 if (warn) 228 km_policy_expired(xp, dir, 0, 0); 229 if (next != LONG_MAX && 230 !mod_timer(&xp->timer, jiffies + make_jiffies(next))) 231 xfrm_pol_hold(xp); 232 233 out: 234 read_unlock(&xp->lock); 235 xfrm_pol_put(xp); 236 return; 237 238 expired: 239 read_unlock(&xp->lock); 240 if (!xfrm_policy_delete(xp, dir)) 241 km_policy_expired(xp, dir, 1, 0); 242 xfrm_pol_put(xp); 243 } 244 245 static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo) 246 { 247 struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo); 248 249 if (unlikely(pol->walk.dead)) 250 flo = NULL; 251 else 252 xfrm_pol_hold(pol); 253 254 return flo; 255 } 256 257 static int xfrm_policy_flo_check(struct flow_cache_object *flo) 258 { 259 struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo); 260 261 return !pol->walk.dead; 262 } 263 264 static void xfrm_policy_flo_delete(struct flow_cache_object *flo) 265 { 266 xfrm_pol_put(container_of(flo, struct xfrm_policy, flo)); 267 } 268 269 static const struct flow_cache_ops xfrm_policy_fc_ops = { 270 .get = xfrm_policy_flo_get, 271 .check = xfrm_policy_flo_check, 272 .delete = xfrm_policy_flo_delete, 273 }; 274 275 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2 276 * SPD calls. 277 */ 278 279 struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp) 280 { 281 struct xfrm_policy *policy; 282 283 policy = kzalloc(sizeof(struct xfrm_policy), gfp); 284 285 if (policy) { 286 write_pnet(&policy->xp_net, net); 287 INIT_LIST_HEAD(&policy->walk.all); 288 INIT_HLIST_NODE(&policy->bydst); 289 INIT_HLIST_NODE(&policy->byidx); 290 rwlock_init(&policy->lock); 291 atomic_set(&policy->refcnt, 1); 292 skb_queue_head_init(&policy->polq.hold_queue); 293 setup_timer(&policy->timer, xfrm_policy_timer, 294 (unsigned long)policy); 295 setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process, 296 (unsigned long)policy); 297 policy->flo.ops = &xfrm_policy_fc_ops; 298 } 299 return policy; 300 } 301 EXPORT_SYMBOL(xfrm_policy_alloc); 302 303 /* Destroy xfrm_policy: descendant resources must be released to this moment. */ 304 305 void xfrm_policy_destroy(struct xfrm_policy *policy) 306 { 307 BUG_ON(!policy->walk.dead); 308 309 if (del_timer(&policy->timer) || del_timer(&policy->polq.hold_timer)) 310 BUG(); 311 312 security_xfrm_policy_free(policy->security); 313 kfree(policy); 314 } 315 EXPORT_SYMBOL(xfrm_policy_destroy); 316 317 static void xfrm_queue_purge(struct sk_buff_head *list) 318 { 319 struct sk_buff *skb; 320 321 while ((skb = skb_dequeue(list)) != NULL) 322 kfree_skb(skb); 323 } 324 325 /* Rule must be locked. Release descentant resources, announce 326 * entry dead. The rule must be unlinked from lists to the moment. 327 */ 328 329 static void xfrm_policy_kill(struct xfrm_policy *policy) 330 { 331 policy->walk.dead = 1; 332 333 atomic_inc(&policy->genid); 334 335 if (del_timer(&policy->polq.hold_timer)) 336 xfrm_pol_put(policy); 337 xfrm_queue_purge(&policy->polq.hold_queue); 338 339 if (del_timer(&policy->timer)) 340 xfrm_pol_put(policy); 341 342 xfrm_pol_put(policy); 343 } 344 345 static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024; 346 347 static inline unsigned int idx_hash(struct net *net, u32 index) 348 { 349 return __idx_hash(index, net->xfrm.policy_idx_hmask); 350 } 351 352 static struct hlist_head *policy_hash_bysel(struct net *net, 353 const struct xfrm_selector *sel, 354 unsigned short family, int dir) 355 { 356 unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; 357 unsigned int hash = __sel_hash(sel, family, hmask); 358 359 return (hash == hmask + 1 ? 360 &net->xfrm.policy_inexact[dir] : 361 net->xfrm.policy_bydst[dir].table + hash); 362 } 363 364 static struct hlist_head *policy_hash_direct(struct net *net, 365 const xfrm_address_t *daddr, 366 const xfrm_address_t *saddr, 367 unsigned short family, int dir) 368 { 369 unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; 370 unsigned int hash = __addr_hash(daddr, saddr, family, hmask); 371 372 return net->xfrm.policy_bydst[dir].table + hash; 373 } 374 375 static void xfrm_dst_hash_transfer(struct hlist_head *list, 376 struct hlist_head *ndsttable, 377 unsigned int nhashmask) 378 { 379 struct hlist_node *tmp, *entry0 = NULL; 380 struct xfrm_policy *pol; 381 unsigned int h0 = 0; 382 383 redo: 384 hlist_for_each_entry_safe(pol, tmp, list, bydst) { 385 unsigned int h; 386 387 h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr, 388 pol->family, nhashmask); 389 if (!entry0) { 390 hlist_del(&pol->bydst); 391 hlist_add_head(&pol->bydst, ndsttable+h); 392 h0 = h; 393 } else { 394 if (h != h0) 395 continue; 396 hlist_del(&pol->bydst); 397 hlist_add_behind(&pol->bydst, entry0); 398 } 399 entry0 = &pol->bydst; 400 } 401 if (!hlist_empty(list)) { 402 entry0 = NULL; 403 goto redo; 404 } 405 } 406 407 static void xfrm_idx_hash_transfer(struct hlist_head *list, 408 struct hlist_head *nidxtable, 409 unsigned int nhashmask) 410 { 411 struct hlist_node *tmp; 412 struct xfrm_policy *pol; 413 414 hlist_for_each_entry_safe(pol, tmp, list, byidx) { 415 unsigned int h; 416 417 h = __idx_hash(pol->index, nhashmask); 418 hlist_add_head(&pol->byidx, nidxtable+h); 419 } 420 } 421 422 static unsigned long xfrm_new_hash_mask(unsigned int old_hmask) 423 { 424 return ((old_hmask + 1) << 1) - 1; 425 } 426 427 static void xfrm_bydst_resize(struct net *net, int dir) 428 { 429 unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; 430 unsigned int nhashmask = xfrm_new_hash_mask(hmask); 431 unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); 432 struct hlist_head *odst = net->xfrm.policy_bydst[dir].table; 433 struct hlist_head *ndst = xfrm_hash_alloc(nsize); 434 int i; 435 436 if (!ndst) 437 return; 438 439 write_lock_bh(&net->xfrm.xfrm_policy_lock); 440 441 for (i = hmask; i >= 0; i--) 442 xfrm_dst_hash_transfer(odst + i, ndst, nhashmask); 443 444 net->xfrm.policy_bydst[dir].table = ndst; 445 net->xfrm.policy_bydst[dir].hmask = nhashmask; 446 447 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 448 449 xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head)); 450 } 451 452 static void xfrm_byidx_resize(struct net *net, int total) 453 { 454 unsigned int hmask = net->xfrm.policy_idx_hmask; 455 unsigned int nhashmask = xfrm_new_hash_mask(hmask); 456 unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); 457 struct hlist_head *oidx = net->xfrm.policy_byidx; 458 struct hlist_head *nidx = xfrm_hash_alloc(nsize); 459 int i; 460 461 if (!nidx) 462 return; 463 464 write_lock_bh(&net->xfrm.xfrm_policy_lock); 465 466 for (i = hmask; i >= 0; i--) 467 xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask); 468 469 net->xfrm.policy_byidx = nidx; 470 net->xfrm.policy_idx_hmask = nhashmask; 471 472 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 473 474 xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head)); 475 } 476 477 static inline int xfrm_bydst_should_resize(struct net *net, int dir, int *total) 478 { 479 unsigned int cnt = net->xfrm.policy_count[dir]; 480 unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; 481 482 if (total) 483 *total += cnt; 484 485 if ((hmask + 1) < xfrm_policy_hashmax && 486 cnt > hmask) 487 return 1; 488 489 return 0; 490 } 491 492 static inline int xfrm_byidx_should_resize(struct net *net, int total) 493 { 494 unsigned int hmask = net->xfrm.policy_idx_hmask; 495 496 if ((hmask + 1) < xfrm_policy_hashmax && 497 total > hmask) 498 return 1; 499 500 return 0; 501 } 502 503 void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si) 504 { 505 read_lock_bh(&net->xfrm.xfrm_policy_lock); 506 si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN]; 507 si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT]; 508 si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD]; 509 si->inscnt = net->xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX]; 510 si->outscnt = net->xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX]; 511 si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX]; 512 si->spdhcnt = net->xfrm.policy_idx_hmask; 513 si->spdhmcnt = xfrm_policy_hashmax; 514 read_unlock_bh(&net->xfrm.xfrm_policy_lock); 515 } 516 EXPORT_SYMBOL(xfrm_spd_getinfo); 517 518 static DEFINE_MUTEX(hash_resize_mutex); 519 static void xfrm_hash_resize(struct work_struct *work) 520 { 521 struct net *net = container_of(work, struct net, xfrm.policy_hash_work); 522 int dir, total; 523 524 mutex_lock(&hash_resize_mutex); 525 526 total = 0; 527 for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { 528 if (xfrm_bydst_should_resize(net, dir, &total)) 529 xfrm_bydst_resize(net, dir); 530 } 531 if (xfrm_byidx_should_resize(net, total)) 532 xfrm_byidx_resize(net, total); 533 534 mutex_unlock(&hash_resize_mutex); 535 } 536 537 /* Generate new index... KAME seems to generate them ordered by cost 538 * of an absolute inpredictability of ordering of rules. This will not pass. */ 539 static u32 xfrm_gen_index(struct net *net, int dir, u32 index) 540 { 541 static u32 idx_generator; 542 543 for (;;) { 544 struct hlist_head *list; 545 struct xfrm_policy *p; 546 u32 idx; 547 int found; 548 549 if (!index) { 550 idx = (idx_generator | dir); 551 idx_generator += 8; 552 } else { 553 idx = index; 554 index = 0; 555 } 556 557 if (idx == 0) 558 idx = 8; 559 list = net->xfrm.policy_byidx + idx_hash(net, idx); 560 found = 0; 561 hlist_for_each_entry(p, list, byidx) { 562 if (p->index == idx) { 563 found = 1; 564 break; 565 } 566 } 567 if (!found) 568 return idx; 569 } 570 } 571 572 static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2) 573 { 574 u32 *p1 = (u32 *) s1; 575 u32 *p2 = (u32 *) s2; 576 int len = sizeof(struct xfrm_selector) / sizeof(u32); 577 int i; 578 579 for (i = 0; i < len; i++) { 580 if (p1[i] != p2[i]) 581 return 1; 582 } 583 584 return 0; 585 } 586 587 static void xfrm_policy_requeue(struct xfrm_policy *old, 588 struct xfrm_policy *new) 589 { 590 struct xfrm_policy_queue *pq = &old->polq; 591 struct sk_buff_head list; 592 593 __skb_queue_head_init(&list); 594 595 spin_lock_bh(&pq->hold_queue.lock); 596 skb_queue_splice_init(&pq->hold_queue, &list); 597 if (del_timer(&pq->hold_timer)) 598 xfrm_pol_put(old); 599 spin_unlock_bh(&pq->hold_queue.lock); 600 601 if (skb_queue_empty(&list)) 602 return; 603 604 pq = &new->polq; 605 606 spin_lock_bh(&pq->hold_queue.lock); 607 skb_queue_splice(&list, &pq->hold_queue); 608 pq->timeout = XFRM_QUEUE_TMO_MIN; 609 if (!mod_timer(&pq->hold_timer, jiffies)) 610 xfrm_pol_hold(new); 611 spin_unlock_bh(&pq->hold_queue.lock); 612 } 613 614 static bool xfrm_policy_mark_match(struct xfrm_policy *policy, 615 struct xfrm_policy *pol) 616 { 617 u32 mark = policy->mark.v & policy->mark.m; 618 619 if (policy->mark.v == pol->mark.v && policy->mark.m == pol->mark.m) 620 return true; 621 622 if ((mark & pol->mark.m) == pol->mark.v && 623 policy->priority == pol->priority) 624 return true; 625 626 return false; 627 } 628 629 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) 630 { 631 struct net *net = xp_net(policy); 632 struct xfrm_policy *pol; 633 struct xfrm_policy *delpol; 634 struct hlist_head *chain; 635 struct hlist_node *newpos; 636 637 write_lock_bh(&net->xfrm.xfrm_policy_lock); 638 chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); 639 delpol = NULL; 640 newpos = NULL; 641 hlist_for_each_entry(pol, chain, bydst) { 642 if (pol->type == policy->type && 643 !selector_cmp(&pol->selector, &policy->selector) && 644 xfrm_policy_mark_match(policy, pol) && 645 xfrm_sec_ctx_match(pol->security, policy->security) && 646 !WARN_ON(delpol)) { 647 if (excl) { 648 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 649 return -EEXIST; 650 } 651 delpol = pol; 652 if (policy->priority > pol->priority) 653 continue; 654 } else if (policy->priority >= pol->priority) { 655 newpos = &pol->bydst; 656 continue; 657 } 658 if (delpol) 659 break; 660 } 661 if (newpos) 662 hlist_add_behind(&policy->bydst, newpos); 663 else 664 hlist_add_head(&policy->bydst, chain); 665 xfrm_pol_hold(policy); 666 net->xfrm.policy_count[dir]++; 667 atomic_inc(&net->xfrm.flow_cache_genid); 668 669 /* After previous checking, family can either be AF_INET or AF_INET6 */ 670 if (policy->family == AF_INET) 671 rt_genid_bump_ipv4(net); 672 else 673 rt_genid_bump_ipv6(net); 674 675 if (delpol) { 676 xfrm_policy_requeue(delpol, policy); 677 __xfrm_policy_unlink(delpol, dir); 678 } 679 policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir, policy->index); 680 hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index)); 681 policy->curlft.add_time = get_seconds(); 682 policy->curlft.use_time = 0; 683 if (!mod_timer(&policy->timer, jiffies + HZ)) 684 xfrm_pol_hold(policy); 685 list_add(&policy->walk.all, &net->xfrm.policy_all); 686 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 687 688 if (delpol) 689 xfrm_policy_kill(delpol); 690 else if (xfrm_bydst_should_resize(net, dir, NULL)) 691 schedule_work(&net->xfrm.policy_hash_work); 692 693 return 0; 694 } 695 EXPORT_SYMBOL(xfrm_policy_insert); 696 697 struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, 698 int dir, struct xfrm_selector *sel, 699 struct xfrm_sec_ctx *ctx, int delete, 700 int *err) 701 { 702 struct xfrm_policy *pol, *ret; 703 struct hlist_head *chain; 704 705 *err = 0; 706 write_lock_bh(&net->xfrm.xfrm_policy_lock); 707 chain = policy_hash_bysel(net, sel, sel->family, dir); 708 ret = NULL; 709 hlist_for_each_entry(pol, chain, bydst) { 710 if (pol->type == type && 711 (mark & pol->mark.m) == pol->mark.v && 712 !selector_cmp(sel, &pol->selector) && 713 xfrm_sec_ctx_match(ctx, pol->security)) { 714 xfrm_pol_hold(pol); 715 if (delete) { 716 *err = security_xfrm_policy_delete( 717 pol->security); 718 if (*err) { 719 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 720 return pol; 721 } 722 __xfrm_policy_unlink(pol, dir); 723 } 724 ret = pol; 725 break; 726 } 727 } 728 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 729 730 if (ret && delete) 731 xfrm_policy_kill(ret); 732 return ret; 733 } 734 EXPORT_SYMBOL(xfrm_policy_bysel_ctx); 735 736 struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, 737 int dir, u32 id, int delete, int *err) 738 { 739 struct xfrm_policy *pol, *ret; 740 struct hlist_head *chain; 741 742 *err = -ENOENT; 743 if (xfrm_policy_id2dir(id) != dir) 744 return NULL; 745 746 *err = 0; 747 write_lock_bh(&net->xfrm.xfrm_policy_lock); 748 chain = net->xfrm.policy_byidx + idx_hash(net, id); 749 ret = NULL; 750 hlist_for_each_entry(pol, chain, byidx) { 751 if (pol->type == type && pol->index == id && 752 (mark & pol->mark.m) == pol->mark.v) { 753 xfrm_pol_hold(pol); 754 if (delete) { 755 *err = security_xfrm_policy_delete( 756 pol->security); 757 if (*err) { 758 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 759 return pol; 760 } 761 __xfrm_policy_unlink(pol, dir); 762 } 763 ret = pol; 764 break; 765 } 766 } 767 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 768 769 if (ret && delete) 770 xfrm_policy_kill(ret); 771 return ret; 772 } 773 EXPORT_SYMBOL(xfrm_policy_byid); 774 775 #ifdef CONFIG_SECURITY_NETWORK_XFRM 776 static inline int 777 xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid) 778 { 779 int dir, err = 0; 780 781 for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { 782 struct xfrm_policy *pol; 783 int i; 784 785 hlist_for_each_entry(pol, 786 &net->xfrm.policy_inexact[dir], bydst) { 787 if (pol->type != type) 788 continue; 789 err = security_xfrm_policy_delete(pol->security); 790 if (err) { 791 xfrm_audit_policy_delete(pol, 0, task_valid); 792 return err; 793 } 794 } 795 for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { 796 hlist_for_each_entry(pol, 797 net->xfrm.policy_bydst[dir].table + i, 798 bydst) { 799 if (pol->type != type) 800 continue; 801 err = security_xfrm_policy_delete( 802 pol->security); 803 if (err) { 804 xfrm_audit_policy_delete(pol, 0, 805 task_valid); 806 return err; 807 } 808 } 809 } 810 } 811 return err; 812 } 813 #else 814 static inline int 815 xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid) 816 { 817 return 0; 818 } 819 #endif 820 821 int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) 822 { 823 int dir, err = 0, cnt = 0; 824 825 write_lock_bh(&net->xfrm.xfrm_policy_lock); 826 827 err = xfrm_policy_flush_secctx_check(net, type, task_valid); 828 if (err) 829 goto out; 830 831 for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { 832 struct xfrm_policy *pol; 833 int i; 834 835 again1: 836 hlist_for_each_entry(pol, 837 &net->xfrm.policy_inexact[dir], bydst) { 838 if (pol->type != type) 839 continue; 840 __xfrm_policy_unlink(pol, dir); 841 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 842 cnt++; 843 844 xfrm_audit_policy_delete(pol, 1, task_valid); 845 846 xfrm_policy_kill(pol); 847 848 write_lock_bh(&net->xfrm.xfrm_policy_lock); 849 goto again1; 850 } 851 852 for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { 853 again2: 854 hlist_for_each_entry(pol, 855 net->xfrm.policy_bydst[dir].table + i, 856 bydst) { 857 if (pol->type != type) 858 continue; 859 __xfrm_policy_unlink(pol, dir); 860 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 861 cnt++; 862 863 xfrm_audit_policy_delete(pol, 1, task_valid); 864 xfrm_policy_kill(pol); 865 866 write_lock_bh(&net->xfrm.xfrm_policy_lock); 867 goto again2; 868 } 869 } 870 871 } 872 if (!cnt) 873 err = -ESRCH; 874 out: 875 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 876 return err; 877 } 878 EXPORT_SYMBOL(xfrm_policy_flush); 879 880 int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk, 881 int (*func)(struct xfrm_policy *, int, int, void*), 882 void *data) 883 { 884 struct xfrm_policy *pol; 885 struct xfrm_policy_walk_entry *x; 886 int error = 0; 887 888 if (walk->type >= XFRM_POLICY_TYPE_MAX && 889 walk->type != XFRM_POLICY_TYPE_ANY) 890 return -EINVAL; 891 892 if (list_empty(&walk->walk.all) && walk->seq != 0) 893 return 0; 894 895 write_lock_bh(&net->xfrm.xfrm_policy_lock); 896 if (list_empty(&walk->walk.all)) 897 x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all); 898 else 899 x = list_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all); 900 list_for_each_entry_from(x, &net->xfrm.policy_all, all) { 901 if (x->dead) 902 continue; 903 pol = container_of(x, struct xfrm_policy, walk); 904 if (walk->type != XFRM_POLICY_TYPE_ANY && 905 walk->type != pol->type) 906 continue; 907 error = func(pol, xfrm_policy_id2dir(pol->index), 908 walk->seq, data); 909 if (error) { 910 list_move_tail(&walk->walk.all, &x->all); 911 goto out; 912 } 913 walk->seq++; 914 } 915 if (walk->seq == 0) { 916 error = -ENOENT; 917 goto out; 918 } 919 list_del_init(&walk->walk.all); 920 out: 921 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 922 return error; 923 } 924 EXPORT_SYMBOL(xfrm_policy_walk); 925 926 void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type) 927 { 928 INIT_LIST_HEAD(&walk->walk.all); 929 walk->walk.dead = 1; 930 walk->type = type; 931 walk->seq = 0; 932 } 933 EXPORT_SYMBOL(xfrm_policy_walk_init); 934 935 void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net) 936 { 937 if (list_empty(&walk->walk.all)) 938 return; 939 940 write_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */ 941 list_del(&walk->walk.all); 942 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 943 } 944 EXPORT_SYMBOL(xfrm_policy_walk_done); 945 946 /* 947 * Find policy to apply to this flow. 948 * 949 * Returns 0 if policy found, else an -errno. 950 */ 951 static int xfrm_policy_match(const struct xfrm_policy *pol, 952 const struct flowi *fl, 953 u8 type, u16 family, int dir) 954 { 955 const struct xfrm_selector *sel = &pol->selector; 956 int ret = -ESRCH; 957 bool match; 958 959 if (pol->family != family || 960 (fl->flowi_mark & pol->mark.m) != pol->mark.v || 961 pol->type != type) 962 return ret; 963 964 match = xfrm_selector_match(sel, fl, family); 965 if (match) 966 ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid, 967 dir); 968 969 return ret; 970 } 971 972 static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, 973 const struct flowi *fl, 974 u16 family, u8 dir) 975 { 976 int err; 977 struct xfrm_policy *pol, *ret; 978 const xfrm_address_t *daddr, *saddr; 979 struct hlist_head *chain; 980 u32 priority = ~0U; 981 982 daddr = xfrm_flowi_daddr(fl, family); 983 saddr = xfrm_flowi_saddr(fl, family); 984 if (unlikely(!daddr || !saddr)) 985 return NULL; 986 987 read_lock_bh(&net->xfrm.xfrm_policy_lock); 988 chain = policy_hash_direct(net, daddr, saddr, family, dir); 989 ret = NULL; 990 hlist_for_each_entry(pol, chain, bydst) { 991 err = xfrm_policy_match(pol, fl, type, family, dir); 992 if (err) { 993 if (err == -ESRCH) 994 continue; 995 else { 996 ret = ERR_PTR(err); 997 goto fail; 998 } 999 } else { 1000 ret = pol; 1001 priority = ret->priority; 1002 break; 1003 } 1004 } 1005 chain = &net->xfrm.policy_inexact[dir]; 1006 hlist_for_each_entry(pol, chain, bydst) { 1007 err = xfrm_policy_match(pol, fl, type, family, dir); 1008 if (err) { 1009 if (err == -ESRCH) 1010 continue; 1011 else { 1012 ret = ERR_PTR(err); 1013 goto fail; 1014 } 1015 } else if (pol->priority < priority) { 1016 ret = pol; 1017 break; 1018 } 1019 } 1020 if (ret) 1021 xfrm_pol_hold(ret); 1022 fail: 1023 read_unlock_bh(&net->xfrm.xfrm_policy_lock); 1024 1025 return ret; 1026 } 1027 1028 static struct xfrm_policy * 1029 __xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir) 1030 { 1031 #ifdef CONFIG_XFRM_SUB_POLICY 1032 struct xfrm_policy *pol; 1033 1034 pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir); 1035 if (pol != NULL) 1036 return pol; 1037 #endif 1038 return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir); 1039 } 1040 1041 static int flow_to_policy_dir(int dir) 1042 { 1043 if (XFRM_POLICY_IN == FLOW_DIR_IN && 1044 XFRM_POLICY_OUT == FLOW_DIR_OUT && 1045 XFRM_POLICY_FWD == FLOW_DIR_FWD) 1046 return dir; 1047 1048 switch (dir) { 1049 default: 1050 case FLOW_DIR_IN: 1051 return XFRM_POLICY_IN; 1052 case FLOW_DIR_OUT: 1053 return XFRM_POLICY_OUT; 1054 case FLOW_DIR_FWD: 1055 return XFRM_POLICY_FWD; 1056 } 1057 } 1058 1059 static struct flow_cache_object * 1060 xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, 1061 u8 dir, struct flow_cache_object *old_obj, void *ctx) 1062 { 1063 struct xfrm_policy *pol; 1064 1065 if (old_obj) 1066 xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo)); 1067 1068 pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir)); 1069 if (IS_ERR_OR_NULL(pol)) 1070 return ERR_CAST(pol); 1071 1072 /* Resolver returns two references: 1073 * one for cache and one for caller of flow_cache_lookup() */ 1074 xfrm_pol_hold(pol); 1075 1076 return &pol->flo; 1077 } 1078 1079 static inline int policy_to_flow_dir(int dir) 1080 { 1081 if (XFRM_POLICY_IN == FLOW_DIR_IN && 1082 XFRM_POLICY_OUT == FLOW_DIR_OUT && 1083 XFRM_POLICY_FWD == FLOW_DIR_FWD) 1084 return dir; 1085 switch (dir) { 1086 default: 1087 case XFRM_POLICY_IN: 1088 return FLOW_DIR_IN; 1089 case XFRM_POLICY_OUT: 1090 return FLOW_DIR_OUT; 1091 case XFRM_POLICY_FWD: 1092 return FLOW_DIR_FWD; 1093 } 1094 } 1095 1096 static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, 1097 const struct flowi *fl) 1098 { 1099 struct xfrm_policy *pol; 1100 struct net *net = sock_net(sk); 1101 1102 read_lock_bh(&net->xfrm.xfrm_policy_lock); 1103 if ((pol = sk->sk_policy[dir]) != NULL) { 1104 bool match = xfrm_selector_match(&pol->selector, fl, 1105 sk->sk_family); 1106 int err = 0; 1107 1108 if (match) { 1109 if ((sk->sk_mark & pol->mark.m) != pol->mark.v) { 1110 pol = NULL; 1111 goto out; 1112 } 1113 err = security_xfrm_policy_lookup(pol->security, 1114 fl->flowi_secid, 1115 policy_to_flow_dir(dir)); 1116 if (!err) 1117 xfrm_pol_hold(pol); 1118 else if (err == -ESRCH) 1119 pol = NULL; 1120 else 1121 pol = ERR_PTR(err); 1122 } else 1123 pol = NULL; 1124 } 1125 out: 1126 read_unlock_bh(&net->xfrm.xfrm_policy_lock); 1127 return pol; 1128 } 1129 1130 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) 1131 { 1132 struct net *net = xp_net(pol); 1133 struct hlist_head *chain = policy_hash_bysel(net, &pol->selector, 1134 pol->family, dir); 1135 1136 list_add(&pol->walk.all, &net->xfrm.policy_all); 1137 hlist_add_head(&pol->bydst, chain); 1138 hlist_add_head(&pol->byidx, net->xfrm.policy_byidx+idx_hash(net, pol->index)); 1139 net->xfrm.policy_count[dir]++; 1140 xfrm_pol_hold(pol); 1141 1142 if (xfrm_bydst_should_resize(net, dir, NULL)) 1143 schedule_work(&net->xfrm.policy_hash_work); 1144 } 1145 1146 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, 1147 int dir) 1148 { 1149 struct net *net = xp_net(pol); 1150 1151 if (hlist_unhashed(&pol->bydst)) 1152 return NULL; 1153 1154 hlist_del_init(&pol->bydst); 1155 hlist_del(&pol->byidx); 1156 list_del(&pol->walk.all); 1157 net->xfrm.policy_count[dir]--; 1158 1159 return pol; 1160 } 1161 1162 int xfrm_policy_delete(struct xfrm_policy *pol, int dir) 1163 { 1164 struct net *net = xp_net(pol); 1165 1166 write_lock_bh(&net->xfrm.xfrm_policy_lock); 1167 pol = __xfrm_policy_unlink(pol, dir); 1168 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 1169 if (pol) { 1170 xfrm_policy_kill(pol); 1171 return 0; 1172 } 1173 return -ENOENT; 1174 } 1175 EXPORT_SYMBOL(xfrm_policy_delete); 1176 1177 int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) 1178 { 1179 struct net *net = xp_net(pol); 1180 struct xfrm_policy *old_pol; 1181 1182 #ifdef CONFIG_XFRM_SUB_POLICY 1183 if (pol && pol->type != XFRM_POLICY_TYPE_MAIN) 1184 return -EINVAL; 1185 #endif 1186 1187 write_lock_bh(&net->xfrm.xfrm_policy_lock); 1188 old_pol = sk->sk_policy[dir]; 1189 sk->sk_policy[dir] = pol; 1190 if (pol) { 1191 pol->curlft.add_time = get_seconds(); 1192 pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir, 0); 1193 __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir); 1194 } 1195 if (old_pol) { 1196 if (pol) 1197 xfrm_policy_requeue(old_pol, pol); 1198 1199 /* Unlinking succeeds always. This is the only function 1200 * allowed to delete or replace socket policy. 1201 */ 1202 __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir); 1203 } 1204 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 1205 1206 if (old_pol) { 1207 xfrm_policy_kill(old_pol); 1208 } 1209 return 0; 1210 } 1211 1212 static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir) 1213 { 1214 struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC); 1215 struct net *net = xp_net(old); 1216 1217 if (newp) { 1218 newp->selector = old->selector; 1219 if (security_xfrm_policy_clone(old->security, 1220 &newp->security)) { 1221 kfree(newp); 1222 return NULL; /* ENOMEM */ 1223 } 1224 newp->lft = old->lft; 1225 newp->curlft = old->curlft; 1226 newp->mark = old->mark; 1227 newp->action = old->action; 1228 newp->flags = old->flags; 1229 newp->xfrm_nr = old->xfrm_nr; 1230 newp->index = old->index; 1231 newp->type = old->type; 1232 memcpy(newp->xfrm_vec, old->xfrm_vec, 1233 newp->xfrm_nr*sizeof(struct xfrm_tmpl)); 1234 write_lock_bh(&net->xfrm.xfrm_policy_lock); 1235 __xfrm_policy_link(newp, XFRM_POLICY_MAX+dir); 1236 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 1237 xfrm_pol_put(newp); 1238 } 1239 return newp; 1240 } 1241 1242 int __xfrm_sk_clone_policy(struct sock *sk) 1243 { 1244 struct xfrm_policy *p0 = sk->sk_policy[0], 1245 *p1 = sk->sk_policy[1]; 1246 1247 sk->sk_policy[0] = sk->sk_policy[1] = NULL; 1248 if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL) 1249 return -ENOMEM; 1250 if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL) 1251 return -ENOMEM; 1252 return 0; 1253 } 1254 1255 static int 1256 xfrm_get_saddr(struct net *net, xfrm_address_t *local, xfrm_address_t *remote, 1257 unsigned short family) 1258 { 1259 int err; 1260 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 1261 1262 if (unlikely(afinfo == NULL)) 1263 return -EINVAL; 1264 err = afinfo->get_saddr(net, local, remote); 1265 xfrm_policy_put_afinfo(afinfo); 1266 return err; 1267 } 1268 1269 /* Resolve list of templates for the flow, given policy. */ 1270 1271 static int 1272 xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl, 1273 struct xfrm_state **xfrm, unsigned short family) 1274 { 1275 struct net *net = xp_net(policy); 1276 int nx; 1277 int i, error; 1278 xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family); 1279 xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family); 1280 xfrm_address_t tmp; 1281 1282 for (nx = 0, i = 0; i < policy->xfrm_nr; i++) { 1283 struct xfrm_state *x; 1284 xfrm_address_t *remote = daddr; 1285 xfrm_address_t *local = saddr; 1286 struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i]; 1287 1288 if (tmpl->mode == XFRM_MODE_TUNNEL || 1289 tmpl->mode == XFRM_MODE_BEET) { 1290 remote = &tmpl->id.daddr; 1291 local = &tmpl->saddr; 1292 if (xfrm_addr_any(local, tmpl->encap_family)) { 1293 error = xfrm_get_saddr(net, &tmp, remote, tmpl->encap_family); 1294 if (error) 1295 goto fail; 1296 local = &tmp; 1297 } 1298 } 1299 1300 x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family); 1301 1302 if (x && x->km.state == XFRM_STATE_VALID) { 1303 xfrm[nx++] = x; 1304 daddr = remote; 1305 saddr = local; 1306 continue; 1307 } 1308 if (x) { 1309 error = (x->km.state == XFRM_STATE_ERROR ? 1310 -EINVAL : -EAGAIN); 1311 xfrm_state_put(x); 1312 } else if (error == -ESRCH) { 1313 error = -EAGAIN; 1314 } 1315 1316 if (!tmpl->optional) 1317 goto fail; 1318 } 1319 return nx; 1320 1321 fail: 1322 for (nx--; nx >= 0; nx--) 1323 xfrm_state_put(xfrm[nx]); 1324 return error; 1325 } 1326 1327 static int 1328 xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl, 1329 struct xfrm_state **xfrm, unsigned short family) 1330 { 1331 struct xfrm_state *tp[XFRM_MAX_DEPTH]; 1332 struct xfrm_state **tpp = (npols > 1) ? tp : xfrm; 1333 int cnx = 0; 1334 int error; 1335 int ret; 1336 int i; 1337 1338 for (i = 0; i < npols; i++) { 1339 if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) { 1340 error = -ENOBUFS; 1341 goto fail; 1342 } 1343 1344 ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family); 1345 if (ret < 0) { 1346 error = ret; 1347 goto fail; 1348 } else 1349 cnx += ret; 1350 } 1351 1352 /* found states are sorted for outbound processing */ 1353 if (npols > 1) 1354 xfrm_state_sort(xfrm, tpp, cnx, family); 1355 1356 return cnx; 1357 1358 fail: 1359 for (cnx--; cnx >= 0; cnx--) 1360 xfrm_state_put(tpp[cnx]); 1361 return error; 1362 1363 } 1364 1365 /* Check that the bundle accepts the flow and its components are 1366 * still valid. 1367 */ 1368 1369 static inline int xfrm_get_tos(const struct flowi *fl, int family) 1370 { 1371 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 1372 int tos; 1373 1374 if (!afinfo) 1375 return -EINVAL; 1376 1377 tos = afinfo->get_tos(fl); 1378 1379 xfrm_policy_put_afinfo(afinfo); 1380 1381 return tos; 1382 } 1383 1384 static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo) 1385 { 1386 struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); 1387 struct dst_entry *dst = &xdst->u.dst; 1388 1389 if (xdst->route == NULL) { 1390 /* Dummy bundle - if it has xfrms we were not 1391 * able to build bundle as template resolution failed. 1392 * It means we need to try again resolving. */ 1393 if (xdst->num_xfrms > 0) 1394 return NULL; 1395 } else if (dst->flags & DST_XFRM_QUEUE) { 1396 return NULL; 1397 } else { 1398 /* Real bundle */ 1399 if (stale_bundle(dst)) 1400 return NULL; 1401 } 1402 1403 dst_hold(dst); 1404 return flo; 1405 } 1406 1407 static int xfrm_bundle_flo_check(struct flow_cache_object *flo) 1408 { 1409 struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); 1410 struct dst_entry *dst = &xdst->u.dst; 1411 1412 if (!xdst->route) 1413 return 0; 1414 if (stale_bundle(dst)) 1415 return 0; 1416 1417 return 1; 1418 } 1419 1420 static void xfrm_bundle_flo_delete(struct flow_cache_object *flo) 1421 { 1422 struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); 1423 struct dst_entry *dst = &xdst->u.dst; 1424 1425 dst_free(dst); 1426 } 1427 1428 static const struct flow_cache_ops xfrm_bundle_fc_ops = { 1429 .get = xfrm_bundle_flo_get, 1430 .check = xfrm_bundle_flo_check, 1431 .delete = xfrm_bundle_flo_delete, 1432 }; 1433 1434 static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) 1435 { 1436 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 1437 struct dst_ops *dst_ops; 1438 struct xfrm_dst *xdst; 1439 1440 if (!afinfo) 1441 return ERR_PTR(-EINVAL); 1442 1443 switch (family) { 1444 case AF_INET: 1445 dst_ops = &net->xfrm.xfrm4_dst_ops; 1446 break; 1447 #if IS_ENABLED(CONFIG_IPV6) 1448 case AF_INET6: 1449 dst_ops = &net->xfrm.xfrm6_dst_ops; 1450 break; 1451 #endif 1452 default: 1453 BUG(); 1454 } 1455 xdst = dst_alloc(dst_ops, NULL, 0, DST_OBSOLETE_NONE, 0); 1456 1457 if (likely(xdst)) { 1458 struct dst_entry *dst = &xdst->u.dst; 1459 1460 memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst)); 1461 xdst->flo.ops = &xfrm_bundle_fc_ops; 1462 if (afinfo->init_dst) 1463 afinfo->init_dst(net, xdst); 1464 } else 1465 xdst = ERR_PTR(-ENOBUFS); 1466 1467 xfrm_policy_put_afinfo(afinfo); 1468 1469 return xdst; 1470 } 1471 1472 static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, 1473 int nfheader_len) 1474 { 1475 struct xfrm_policy_afinfo *afinfo = 1476 xfrm_policy_get_afinfo(dst->ops->family); 1477 int err; 1478 1479 if (!afinfo) 1480 return -EINVAL; 1481 1482 err = afinfo->init_path(path, dst, nfheader_len); 1483 1484 xfrm_policy_put_afinfo(afinfo); 1485 1486 return err; 1487 } 1488 1489 static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, 1490 const struct flowi *fl) 1491 { 1492 struct xfrm_policy_afinfo *afinfo = 1493 xfrm_policy_get_afinfo(xdst->u.dst.ops->family); 1494 int err; 1495 1496 if (!afinfo) 1497 return -EINVAL; 1498 1499 err = afinfo->fill_dst(xdst, dev, fl); 1500 1501 xfrm_policy_put_afinfo(afinfo); 1502 1503 return err; 1504 } 1505 1506 1507 /* Allocate chain of dst_entry's, attach known xfrm's, calculate 1508 * all the metrics... Shortly, bundle a bundle. 1509 */ 1510 1511 static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, 1512 struct xfrm_state **xfrm, int nx, 1513 const struct flowi *fl, 1514 struct dst_entry *dst) 1515 { 1516 struct net *net = xp_net(policy); 1517 unsigned long now = jiffies; 1518 struct net_device *dev; 1519 struct xfrm_mode *inner_mode; 1520 struct dst_entry *dst_prev = NULL; 1521 struct dst_entry *dst0 = NULL; 1522 int i = 0; 1523 int err; 1524 int header_len = 0; 1525 int nfheader_len = 0; 1526 int trailer_len = 0; 1527 int tos; 1528 int family = policy->selector.family; 1529 xfrm_address_t saddr, daddr; 1530 1531 xfrm_flowi_addr_get(fl, &saddr, &daddr, family); 1532 1533 tos = xfrm_get_tos(fl, family); 1534 err = tos; 1535 if (tos < 0) 1536 goto put_states; 1537 1538 dst_hold(dst); 1539 1540 for (; i < nx; i++) { 1541 struct xfrm_dst *xdst = xfrm_alloc_dst(net, family); 1542 struct dst_entry *dst1 = &xdst->u.dst; 1543 1544 err = PTR_ERR(xdst); 1545 if (IS_ERR(xdst)) { 1546 dst_release(dst); 1547 goto put_states; 1548 } 1549 1550 if (xfrm[i]->sel.family == AF_UNSPEC) { 1551 inner_mode = xfrm_ip2inner_mode(xfrm[i], 1552 xfrm_af2proto(family)); 1553 if (!inner_mode) { 1554 err = -EAFNOSUPPORT; 1555 dst_release(dst); 1556 goto put_states; 1557 } 1558 } else 1559 inner_mode = xfrm[i]->inner_mode; 1560 1561 if (!dst_prev) 1562 dst0 = dst1; 1563 else { 1564 dst_prev->child = dst_clone(dst1); 1565 dst1->flags |= DST_NOHASH; 1566 } 1567 1568 xdst->route = dst; 1569 dst_copy_metrics(dst1, dst); 1570 1571 if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { 1572 family = xfrm[i]->props.family; 1573 dst = xfrm_dst_lookup(xfrm[i], tos, &saddr, &daddr, 1574 family); 1575 err = PTR_ERR(dst); 1576 if (IS_ERR(dst)) 1577 goto put_states; 1578 } else 1579 dst_hold(dst); 1580 1581 dst1->xfrm = xfrm[i]; 1582 xdst->xfrm_genid = xfrm[i]->genid; 1583 1584 dst1->obsolete = DST_OBSOLETE_FORCE_CHK; 1585 dst1->flags |= DST_HOST; 1586 dst1->lastuse = now; 1587 1588 dst1->input = dst_discard; 1589 dst1->output = inner_mode->afinfo->output; 1590 1591 dst1->next = dst_prev; 1592 dst_prev = dst1; 1593 1594 header_len += xfrm[i]->props.header_len; 1595 if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT) 1596 nfheader_len += xfrm[i]->props.header_len; 1597 trailer_len += xfrm[i]->props.trailer_len; 1598 } 1599 1600 dst_prev->child = dst; 1601 dst0->path = dst; 1602 1603 err = -ENODEV; 1604 dev = dst->dev; 1605 if (!dev) 1606 goto free_dst; 1607 1608 xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len); 1609 xfrm_init_pmtu(dst_prev); 1610 1611 for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) { 1612 struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev; 1613 1614 err = xfrm_fill_dst(xdst, dev, fl); 1615 if (err) 1616 goto free_dst; 1617 1618 dst_prev->header_len = header_len; 1619 dst_prev->trailer_len = trailer_len; 1620 header_len -= xdst->u.dst.xfrm->props.header_len; 1621 trailer_len -= xdst->u.dst.xfrm->props.trailer_len; 1622 } 1623 1624 out: 1625 return dst0; 1626 1627 put_states: 1628 for (; i < nx; i++) 1629 xfrm_state_put(xfrm[i]); 1630 free_dst: 1631 if (dst0) 1632 dst_free(dst0); 1633 dst0 = ERR_PTR(err); 1634 goto out; 1635 } 1636 1637 #ifdef CONFIG_XFRM_SUB_POLICY 1638 static int xfrm_dst_alloc_copy(void **target, const void *src, int size) 1639 { 1640 if (!*target) { 1641 *target = kmalloc(size, GFP_ATOMIC); 1642 if (!*target) 1643 return -ENOMEM; 1644 } 1645 1646 memcpy(*target, src, size); 1647 return 0; 1648 } 1649 #endif 1650 1651 static int xfrm_dst_update_parent(struct dst_entry *dst, 1652 const struct xfrm_selector *sel) 1653 { 1654 #ifdef CONFIG_XFRM_SUB_POLICY 1655 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 1656 return xfrm_dst_alloc_copy((void **)&(xdst->partner), 1657 sel, sizeof(*sel)); 1658 #else 1659 return 0; 1660 #endif 1661 } 1662 1663 static int xfrm_dst_update_origin(struct dst_entry *dst, 1664 const struct flowi *fl) 1665 { 1666 #ifdef CONFIG_XFRM_SUB_POLICY 1667 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 1668 return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl)); 1669 #else 1670 return 0; 1671 #endif 1672 } 1673 1674 static int xfrm_expand_policies(const struct flowi *fl, u16 family, 1675 struct xfrm_policy **pols, 1676 int *num_pols, int *num_xfrms) 1677 { 1678 int i; 1679 1680 if (*num_pols == 0 || !pols[0]) { 1681 *num_pols = 0; 1682 *num_xfrms = 0; 1683 return 0; 1684 } 1685 if (IS_ERR(pols[0])) 1686 return PTR_ERR(pols[0]); 1687 1688 *num_xfrms = pols[0]->xfrm_nr; 1689 1690 #ifdef CONFIG_XFRM_SUB_POLICY 1691 if (pols[0] && pols[0]->action == XFRM_POLICY_ALLOW && 1692 pols[0]->type != XFRM_POLICY_TYPE_MAIN) { 1693 pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]), 1694 XFRM_POLICY_TYPE_MAIN, 1695 fl, family, 1696 XFRM_POLICY_OUT); 1697 if (pols[1]) { 1698 if (IS_ERR(pols[1])) { 1699 xfrm_pols_put(pols, *num_pols); 1700 return PTR_ERR(pols[1]); 1701 } 1702 (*num_pols)++; 1703 (*num_xfrms) += pols[1]->xfrm_nr; 1704 } 1705 } 1706 #endif 1707 for (i = 0; i < *num_pols; i++) { 1708 if (pols[i]->action != XFRM_POLICY_ALLOW) { 1709 *num_xfrms = -1; 1710 break; 1711 } 1712 } 1713 1714 return 0; 1715 1716 } 1717 1718 static struct xfrm_dst * 1719 xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, 1720 const struct flowi *fl, u16 family, 1721 struct dst_entry *dst_orig) 1722 { 1723 struct net *net = xp_net(pols[0]); 1724 struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; 1725 struct dst_entry *dst; 1726 struct xfrm_dst *xdst; 1727 int err; 1728 1729 /* Try to instantiate a bundle */ 1730 err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family); 1731 if (err <= 0) { 1732 if (err != 0 && err != -EAGAIN) 1733 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); 1734 return ERR_PTR(err); 1735 } 1736 1737 dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig); 1738 if (IS_ERR(dst)) { 1739 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR); 1740 return ERR_CAST(dst); 1741 } 1742 1743 xdst = (struct xfrm_dst *)dst; 1744 xdst->num_xfrms = err; 1745 if (num_pols > 1) 1746 err = xfrm_dst_update_parent(dst, &pols[1]->selector); 1747 else 1748 err = xfrm_dst_update_origin(dst, fl); 1749 if (unlikely(err)) { 1750 dst_free(dst); 1751 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR); 1752 return ERR_PTR(err); 1753 } 1754 1755 xdst->num_pols = num_pols; 1756 memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols); 1757 xdst->policy_genid = atomic_read(&pols[0]->genid); 1758 1759 return xdst; 1760 } 1761 1762 static void xfrm_policy_queue_process(unsigned long arg) 1763 { 1764 int err = 0; 1765 struct sk_buff *skb; 1766 struct sock *sk; 1767 struct dst_entry *dst; 1768 struct xfrm_policy *pol = (struct xfrm_policy *)arg; 1769 struct xfrm_policy_queue *pq = &pol->polq; 1770 struct flowi fl; 1771 struct sk_buff_head list; 1772 1773 spin_lock(&pq->hold_queue.lock); 1774 skb = skb_peek(&pq->hold_queue); 1775 if (!skb) { 1776 spin_unlock(&pq->hold_queue.lock); 1777 goto out; 1778 } 1779 dst = skb_dst(skb); 1780 sk = skb->sk; 1781 xfrm_decode_session(skb, &fl, dst->ops->family); 1782 spin_unlock(&pq->hold_queue.lock); 1783 1784 dst_hold(dst->path); 1785 dst = xfrm_lookup(xp_net(pol), dst->path, &fl, 1786 sk, 0); 1787 if (IS_ERR(dst)) 1788 goto purge_queue; 1789 1790 if (dst->flags & DST_XFRM_QUEUE) { 1791 dst_release(dst); 1792 1793 if (pq->timeout >= XFRM_QUEUE_TMO_MAX) 1794 goto purge_queue; 1795 1796 pq->timeout = pq->timeout << 1; 1797 if (!mod_timer(&pq->hold_timer, jiffies + pq->timeout)) 1798 xfrm_pol_hold(pol); 1799 goto out; 1800 } 1801 1802 dst_release(dst); 1803 1804 __skb_queue_head_init(&list); 1805 1806 spin_lock(&pq->hold_queue.lock); 1807 pq->timeout = 0; 1808 skb_queue_splice_init(&pq->hold_queue, &list); 1809 spin_unlock(&pq->hold_queue.lock); 1810 1811 while (!skb_queue_empty(&list)) { 1812 skb = __skb_dequeue(&list); 1813 1814 xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family); 1815 dst_hold(skb_dst(skb)->path); 1816 dst = xfrm_lookup(xp_net(pol), skb_dst(skb)->path, 1817 &fl, skb->sk, 0); 1818 if (IS_ERR(dst)) { 1819 kfree_skb(skb); 1820 continue; 1821 } 1822 1823 nf_reset(skb); 1824 skb_dst_drop(skb); 1825 skb_dst_set(skb, dst); 1826 1827 err = dst_output(skb); 1828 } 1829 1830 out: 1831 xfrm_pol_put(pol); 1832 return; 1833 1834 purge_queue: 1835 pq->timeout = 0; 1836 xfrm_queue_purge(&pq->hold_queue); 1837 xfrm_pol_put(pol); 1838 } 1839 1840 static int xdst_queue_output(struct sock *sk, struct sk_buff *skb) 1841 { 1842 unsigned long sched_next; 1843 struct dst_entry *dst = skb_dst(skb); 1844 struct xfrm_dst *xdst = (struct xfrm_dst *) dst; 1845 struct xfrm_policy *pol = xdst->pols[0]; 1846 struct xfrm_policy_queue *pq = &pol->polq; 1847 const struct sk_buff *fclone = skb + 1; 1848 1849 if (unlikely(skb->fclone == SKB_FCLONE_ORIG && 1850 fclone->fclone == SKB_FCLONE_CLONE)) { 1851 kfree_skb(skb); 1852 return 0; 1853 } 1854 1855 if (pq->hold_queue.qlen > XFRM_MAX_QUEUE_LEN) { 1856 kfree_skb(skb); 1857 return -EAGAIN; 1858 } 1859 1860 skb_dst_force(skb); 1861 1862 spin_lock_bh(&pq->hold_queue.lock); 1863 1864 if (!pq->timeout) 1865 pq->timeout = XFRM_QUEUE_TMO_MIN; 1866 1867 sched_next = jiffies + pq->timeout; 1868 1869 if (del_timer(&pq->hold_timer)) { 1870 if (time_before(pq->hold_timer.expires, sched_next)) 1871 sched_next = pq->hold_timer.expires; 1872 xfrm_pol_put(pol); 1873 } 1874 1875 __skb_queue_tail(&pq->hold_queue, skb); 1876 if (!mod_timer(&pq->hold_timer, sched_next)) 1877 xfrm_pol_hold(pol); 1878 1879 spin_unlock_bh(&pq->hold_queue.lock); 1880 1881 return 0; 1882 } 1883 1884 static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net, 1885 struct xfrm_flo *xflo, 1886 const struct flowi *fl, 1887 int num_xfrms, 1888 u16 family) 1889 { 1890 int err; 1891 struct net_device *dev; 1892 struct dst_entry *dst; 1893 struct dst_entry *dst1; 1894 struct xfrm_dst *xdst; 1895 1896 xdst = xfrm_alloc_dst(net, family); 1897 if (IS_ERR(xdst)) 1898 return xdst; 1899 1900 if (!(xflo->flags & XFRM_LOOKUP_QUEUE) || 1901 net->xfrm.sysctl_larval_drop || 1902 num_xfrms <= 0) 1903 return xdst; 1904 1905 dst = xflo->dst_orig; 1906 dst1 = &xdst->u.dst; 1907 dst_hold(dst); 1908 xdst->route = dst; 1909 1910 dst_copy_metrics(dst1, dst); 1911 1912 dst1->obsolete = DST_OBSOLETE_FORCE_CHK; 1913 dst1->flags |= DST_HOST | DST_XFRM_QUEUE; 1914 dst1->lastuse = jiffies; 1915 1916 dst1->input = dst_discard; 1917 dst1->output = xdst_queue_output; 1918 1919 dst_hold(dst); 1920 dst1->child = dst; 1921 dst1->path = dst; 1922 1923 xfrm_init_path((struct xfrm_dst *)dst1, dst, 0); 1924 1925 err = -ENODEV; 1926 dev = dst->dev; 1927 if (!dev) 1928 goto free_dst; 1929 1930 err = xfrm_fill_dst(xdst, dev, fl); 1931 if (err) 1932 goto free_dst; 1933 1934 out: 1935 return xdst; 1936 1937 free_dst: 1938 dst_release(dst1); 1939 xdst = ERR_PTR(err); 1940 goto out; 1941 } 1942 1943 static struct flow_cache_object * 1944 xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, 1945 struct flow_cache_object *oldflo, void *ctx) 1946 { 1947 struct xfrm_flo *xflo = (struct xfrm_flo *)ctx; 1948 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; 1949 struct xfrm_dst *xdst, *new_xdst; 1950 int num_pols = 0, num_xfrms = 0, i, err, pol_dead; 1951 1952 /* Check if the policies from old bundle are usable */ 1953 xdst = NULL; 1954 if (oldflo) { 1955 xdst = container_of(oldflo, struct xfrm_dst, flo); 1956 num_pols = xdst->num_pols; 1957 num_xfrms = xdst->num_xfrms; 1958 pol_dead = 0; 1959 for (i = 0; i < num_pols; i++) { 1960 pols[i] = xdst->pols[i]; 1961 pol_dead |= pols[i]->walk.dead; 1962 } 1963 if (pol_dead) { 1964 dst_free(&xdst->u.dst); 1965 xdst = NULL; 1966 num_pols = 0; 1967 num_xfrms = 0; 1968 oldflo = NULL; 1969 } 1970 } 1971 1972 /* Resolve policies to use if we couldn't get them from 1973 * previous cache entry */ 1974 if (xdst == NULL) { 1975 num_pols = 1; 1976 pols[0] = __xfrm_policy_lookup(net, fl, family, 1977 flow_to_policy_dir(dir)); 1978 err = xfrm_expand_policies(fl, family, pols, 1979 &num_pols, &num_xfrms); 1980 if (err < 0) 1981 goto inc_error; 1982 if (num_pols == 0) 1983 return NULL; 1984 if (num_xfrms <= 0) 1985 goto make_dummy_bundle; 1986 } 1987 1988 new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, 1989 xflo->dst_orig); 1990 if (IS_ERR(new_xdst)) { 1991 err = PTR_ERR(new_xdst); 1992 if (err != -EAGAIN) 1993 goto error; 1994 if (oldflo == NULL) 1995 goto make_dummy_bundle; 1996 dst_hold(&xdst->u.dst); 1997 return oldflo; 1998 } else if (new_xdst == NULL) { 1999 num_xfrms = 0; 2000 if (oldflo == NULL) 2001 goto make_dummy_bundle; 2002 xdst->num_xfrms = 0; 2003 dst_hold(&xdst->u.dst); 2004 return oldflo; 2005 } 2006 2007 /* Kill the previous bundle */ 2008 if (xdst) { 2009 /* The policies were stolen for newly generated bundle */ 2010 xdst->num_pols = 0; 2011 dst_free(&xdst->u.dst); 2012 } 2013 2014 /* Flow cache does not have reference, it dst_free()'s, 2015 * but we do need to return one reference for original caller */ 2016 dst_hold(&new_xdst->u.dst); 2017 return &new_xdst->flo; 2018 2019 make_dummy_bundle: 2020 /* We found policies, but there's no bundles to instantiate: 2021 * either because the policy blocks, has no transformations or 2022 * we could not build template (no xfrm_states).*/ 2023 xdst = xfrm_create_dummy_bundle(net, xflo, fl, num_xfrms, family); 2024 if (IS_ERR(xdst)) { 2025 xfrm_pols_put(pols, num_pols); 2026 return ERR_CAST(xdst); 2027 } 2028 xdst->num_pols = num_pols; 2029 xdst->num_xfrms = num_xfrms; 2030 memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols); 2031 2032 dst_hold(&xdst->u.dst); 2033 return &xdst->flo; 2034 2035 inc_error: 2036 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); 2037 error: 2038 if (xdst != NULL) 2039 dst_free(&xdst->u.dst); 2040 else 2041 xfrm_pols_put(pols, num_pols); 2042 return ERR_PTR(err); 2043 } 2044 2045 static struct dst_entry *make_blackhole(struct net *net, u16 family, 2046 struct dst_entry *dst_orig) 2047 { 2048 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 2049 struct dst_entry *ret; 2050 2051 if (!afinfo) { 2052 dst_release(dst_orig); 2053 return ERR_PTR(-EINVAL); 2054 } else { 2055 ret = afinfo->blackhole_route(net, dst_orig); 2056 } 2057 xfrm_policy_put_afinfo(afinfo); 2058 2059 return ret; 2060 } 2061 2062 /* Main function: finds/creates a bundle for given flow. 2063 * 2064 * At the moment we eat a raw IP route. Mostly to speed up lookups 2065 * on interfaces with disabled IPsec. 2066 */ 2067 struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, 2068 const struct flowi *fl, 2069 struct sock *sk, int flags) 2070 { 2071 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; 2072 struct flow_cache_object *flo; 2073 struct xfrm_dst *xdst; 2074 struct dst_entry *dst, *route; 2075 u16 family = dst_orig->ops->family; 2076 u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); 2077 int i, err, num_pols, num_xfrms = 0, drop_pols = 0; 2078 2079 dst = NULL; 2080 xdst = NULL; 2081 route = NULL; 2082 2083 if (sk && sk->sk_policy[XFRM_POLICY_OUT]) { 2084 num_pols = 1; 2085 pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); 2086 err = xfrm_expand_policies(fl, family, pols, 2087 &num_pols, &num_xfrms); 2088 if (err < 0) 2089 goto dropdst; 2090 2091 if (num_pols) { 2092 if (num_xfrms <= 0) { 2093 drop_pols = num_pols; 2094 goto no_transform; 2095 } 2096 2097 xdst = xfrm_resolve_and_create_bundle( 2098 pols, num_pols, fl, 2099 family, dst_orig); 2100 if (IS_ERR(xdst)) { 2101 xfrm_pols_put(pols, num_pols); 2102 err = PTR_ERR(xdst); 2103 goto dropdst; 2104 } else if (xdst == NULL) { 2105 num_xfrms = 0; 2106 drop_pols = num_pols; 2107 goto no_transform; 2108 } 2109 2110 dst_hold(&xdst->u.dst); 2111 xdst->u.dst.flags |= DST_NOCACHE; 2112 route = xdst->route; 2113 } 2114 } 2115 2116 if (xdst == NULL) { 2117 struct xfrm_flo xflo; 2118 2119 xflo.dst_orig = dst_orig; 2120 xflo.flags = flags; 2121 2122 /* To accelerate a bit... */ 2123 if ((dst_orig->flags & DST_NOXFRM) || 2124 !net->xfrm.policy_count[XFRM_POLICY_OUT]) 2125 goto nopol; 2126 2127 flo = flow_cache_lookup(net, fl, family, dir, 2128 xfrm_bundle_lookup, &xflo); 2129 if (flo == NULL) 2130 goto nopol; 2131 if (IS_ERR(flo)) { 2132 err = PTR_ERR(flo); 2133 goto dropdst; 2134 } 2135 xdst = container_of(flo, struct xfrm_dst, flo); 2136 2137 num_pols = xdst->num_pols; 2138 num_xfrms = xdst->num_xfrms; 2139 memcpy(pols, xdst->pols, sizeof(struct xfrm_policy *) * num_pols); 2140 route = xdst->route; 2141 } 2142 2143 dst = &xdst->u.dst; 2144 if (route == NULL && num_xfrms > 0) { 2145 /* The only case when xfrm_bundle_lookup() returns a 2146 * bundle with null route, is when the template could 2147 * not be resolved. It means policies are there, but 2148 * bundle could not be created, since we don't yet 2149 * have the xfrm_state's. We need to wait for KM to 2150 * negotiate new SA's or bail out with error.*/ 2151 if (net->xfrm.sysctl_larval_drop) { 2152 dst_release(dst); 2153 xfrm_pols_put(pols, drop_pols); 2154 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); 2155 2156 return ERR_PTR(-EREMOTE); 2157 } 2158 2159 err = -EAGAIN; 2160 2161 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); 2162 goto error; 2163 } 2164 2165 no_transform: 2166 if (num_pols == 0) 2167 goto nopol; 2168 2169 if ((flags & XFRM_LOOKUP_ICMP) && 2170 !(pols[0]->flags & XFRM_POLICY_ICMP)) { 2171 err = -ENOENT; 2172 goto error; 2173 } 2174 2175 for (i = 0; i < num_pols; i++) 2176 pols[i]->curlft.use_time = get_seconds(); 2177 2178 if (num_xfrms < 0) { 2179 /* Prohibit the flow */ 2180 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK); 2181 err = -EPERM; 2182 goto error; 2183 } else if (num_xfrms > 0) { 2184 /* Flow transformed */ 2185 dst_release(dst_orig); 2186 } else { 2187 /* Flow passes untransformed */ 2188 dst_release(dst); 2189 dst = dst_orig; 2190 } 2191 ok: 2192 xfrm_pols_put(pols, drop_pols); 2193 if (dst && dst->xfrm && 2194 dst->xfrm->props.mode == XFRM_MODE_TUNNEL) 2195 dst->flags |= DST_XFRM_TUNNEL; 2196 return dst; 2197 2198 nopol: 2199 if (!(flags & XFRM_LOOKUP_ICMP)) { 2200 dst = dst_orig; 2201 goto ok; 2202 } 2203 err = -ENOENT; 2204 error: 2205 dst_release(dst); 2206 dropdst: 2207 dst_release(dst_orig); 2208 xfrm_pols_put(pols, drop_pols); 2209 return ERR_PTR(err); 2210 } 2211 EXPORT_SYMBOL(xfrm_lookup); 2212 2213 /* Callers of xfrm_lookup_route() must ensure a call to dst_output(). 2214 * Otherwise we may send out blackholed packets. 2215 */ 2216 struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig, 2217 const struct flowi *fl, 2218 struct sock *sk, int flags) 2219 { 2220 struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk, 2221 flags | XFRM_LOOKUP_QUEUE); 2222 2223 if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE) 2224 return make_blackhole(net, dst_orig->ops->family, dst_orig); 2225 2226 return dst; 2227 } 2228 EXPORT_SYMBOL(xfrm_lookup_route); 2229 2230 static inline int 2231 xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl) 2232 { 2233 struct xfrm_state *x; 2234 2235 if (!skb->sp || idx < 0 || idx >= skb->sp->len) 2236 return 0; 2237 x = skb->sp->xvec[idx]; 2238 if (!x->type->reject) 2239 return 0; 2240 return x->type->reject(x, skb, fl); 2241 } 2242 2243 /* When skb is transformed back to its "native" form, we have to 2244 * check policy restrictions. At the moment we make this in maximally 2245 * stupid way. Shame on me. :-) Of course, connected sockets must 2246 * have policy cached at them. 2247 */ 2248 2249 static inline int 2250 xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x, 2251 unsigned short family) 2252 { 2253 if (xfrm_state_kern(x)) 2254 return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family); 2255 return x->id.proto == tmpl->id.proto && 2256 (x->id.spi == tmpl->id.spi || !tmpl->id.spi) && 2257 (x->props.reqid == tmpl->reqid || !tmpl->reqid) && 2258 x->props.mode == tmpl->mode && 2259 (tmpl->allalgs || (tmpl->aalgos & (1<<x->props.aalgo)) || 2260 !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) && 2261 !(x->props.mode != XFRM_MODE_TRANSPORT && 2262 xfrm_state_addr_cmp(tmpl, x, family)); 2263 } 2264 2265 /* 2266 * 0 or more than 0 is returned when validation is succeeded (either bypass 2267 * because of optional transport mode, or next index of the mathced secpath 2268 * state with the template. 2269 * -1 is returned when no matching template is found. 2270 * Otherwise "-2 - errored_index" is returned. 2271 */ 2272 static inline int 2273 xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int start, 2274 unsigned short family) 2275 { 2276 int idx = start; 2277 2278 if (tmpl->optional) { 2279 if (tmpl->mode == XFRM_MODE_TRANSPORT) 2280 return start; 2281 } else 2282 start = -1; 2283 for (; idx < sp->len; idx++) { 2284 if (xfrm_state_ok(tmpl, sp->xvec[idx], family)) 2285 return ++idx; 2286 if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) { 2287 if (start == -1) 2288 start = -2-idx; 2289 break; 2290 } 2291 } 2292 return start; 2293 } 2294 2295 int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, 2296 unsigned int family, int reverse) 2297 { 2298 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 2299 int err; 2300 2301 if (unlikely(afinfo == NULL)) 2302 return -EAFNOSUPPORT; 2303 2304 afinfo->decode_session(skb, fl, reverse); 2305 err = security_xfrm_decode_session(skb, &fl->flowi_secid); 2306 xfrm_policy_put_afinfo(afinfo); 2307 return err; 2308 } 2309 EXPORT_SYMBOL(__xfrm_decode_session); 2310 2311 static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int *idxp) 2312 { 2313 for (; k < sp->len; k++) { 2314 if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) { 2315 *idxp = k; 2316 return 1; 2317 } 2318 } 2319 2320 return 0; 2321 } 2322 2323 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, 2324 unsigned short family) 2325 { 2326 struct net *net = dev_net(skb->dev); 2327 struct xfrm_policy *pol; 2328 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; 2329 int npols = 0; 2330 int xfrm_nr; 2331 int pi; 2332 int reverse; 2333 struct flowi fl; 2334 u8 fl_dir; 2335 int xerr_idx = -1; 2336 2337 reverse = dir & ~XFRM_POLICY_MASK; 2338 dir &= XFRM_POLICY_MASK; 2339 fl_dir = policy_to_flow_dir(dir); 2340 2341 if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) { 2342 XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); 2343 return 0; 2344 } 2345 2346 nf_nat_decode_session(skb, &fl, family); 2347 2348 /* First, check used SA against their selectors. */ 2349 if (skb->sp) { 2350 int i; 2351 2352 for (i = skb->sp->len-1; i >= 0; i--) { 2353 struct xfrm_state *x = skb->sp->xvec[i]; 2354 if (!xfrm_selector_match(&x->sel, &fl, family)) { 2355 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH); 2356 return 0; 2357 } 2358 } 2359 } 2360 2361 pol = NULL; 2362 if (sk && sk->sk_policy[dir]) { 2363 pol = xfrm_sk_policy_lookup(sk, dir, &fl); 2364 if (IS_ERR(pol)) { 2365 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); 2366 return 0; 2367 } 2368 } 2369 2370 if (!pol) { 2371 struct flow_cache_object *flo; 2372 2373 flo = flow_cache_lookup(net, &fl, family, fl_dir, 2374 xfrm_policy_lookup, NULL); 2375 if (IS_ERR_OR_NULL(flo)) 2376 pol = ERR_CAST(flo); 2377 else 2378 pol = container_of(flo, struct xfrm_policy, flo); 2379 } 2380 2381 if (IS_ERR(pol)) { 2382 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); 2383 return 0; 2384 } 2385 2386 if (!pol) { 2387 if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) { 2388 xfrm_secpath_reject(xerr_idx, skb, &fl); 2389 XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS); 2390 return 0; 2391 } 2392 return 1; 2393 } 2394 2395 pol->curlft.use_time = get_seconds(); 2396 2397 pols[0] = pol; 2398 npols++; 2399 #ifdef CONFIG_XFRM_SUB_POLICY 2400 if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { 2401 pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, 2402 &fl, family, 2403 XFRM_POLICY_IN); 2404 if (pols[1]) { 2405 if (IS_ERR(pols[1])) { 2406 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); 2407 return 0; 2408 } 2409 pols[1]->curlft.use_time = get_seconds(); 2410 npols++; 2411 } 2412 } 2413 #endif 2414 2415 if (pol->action == XFRM_POLICY_ALLOW) { 2416 struct sec_path *sp; 2417 static struct sec_path dummy; 2418 struct xfrm_tmpl *tp[XFRM_MAX_DEPTH]; 2419 struct xfrm_tmpl *stp[XFRM_MAX_DEPTH]; 2420 struct xfrm_tmpl **tpp = tp; 2421 int ti = 0; 2422 int i, k; 2423 2424 if ((sp = skb->sp) == NULL) 2425 sp = &dummy; 2426 2427 for (pi = 0; pi < npols; pi++) { 2428 if (pols[pi] != pol && 2429 pols[pi]->action != XFRM_POLICY_ALLOW) { 2430 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK); 2431 goto reject; 2432 } 2433 if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) { 2434 XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); 2435 goto reject_error; 2436 } 2437 for (i = 0; i < pols[pi]->xfrm_nr; i++) 2438 tpp[ti++] = &pols[pi]->xfrm_vec[i]; 2439 } 2440 xfrm_nr = ti; 2441 if (npols > 1) { 2442 xfrm_tmpl_sort(stp, tpp, xfrm_nr, family, net); 2443 tpp = stp; 2444 } 2445 2446 /* For each tunnel xfrm, find the first matching tmpl. 2447 * For each tmpl before that, find corresponding xfrm. 2448 * Order is _important_. Later we will implement 2449 * some barriers, but at the moment barriers 2450 * are implied between each two transformations. 2451 */ 2452 for (i = xfrm_nr-1, k = 0; i >= 0; i--) { 2453 k = xfrm_policy_ok(tpp[i], sp, k, family); 2454 if (k < 0) { 2455 if (k < -1) 2456 /* "-2 - errored_index" returned */ 2457 xerr_idx = -(2+k); 2458 XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH); 2459 goto reject; 2460 } 2461 } 2462 2463 if (secpath_has_nontransport(sp, k, &xerr_idx)) { 2464 XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH); 2465 goto reject; 2466 } 2467 2468 xfrm_pols_put(pols, npols); 2469 return 1; 2470 } 2471 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK); 2472 2473 reject: 2474 xfrm_secpath_reject(xerr_idx, skb, &fl); 2475 reject_error: 2476 xfrm_pols_put(pols, npols); 2477 return 0; 2478 } 2479 EXPORT_SYMBOL(__xfrm_policy_check); 2480 2481 int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) 2482 { 2483 struct net *net = dev_net(skb->dev); 2484 struct flowi fl; 2485 struct dst_entry *dst; 2486 int res = 1; 2487 2488 if (xfrm_decode_session(skb, &fl, family) < 0) { 2489 XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR); 2490 return 0; 2491 } 2492 2493 skb_dst_force(skb); 2494 2495 dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, XFRM_LOOKUP_QUEUE); 2496 if (IS_ERR(dst)) { 2497 res = 0; 2498 dst = NULL; 2499 } 2500 skb_dst_set(skb, dst); 2501 return res; 2502 } 2503 EXPORT_SYMBOL(__xfrm_route_forward); 2504 2505 /* Optimize later using cookies and generation ids. */ 2506 2507 static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) 2508 { 2509 /* Code (such as __xfrm4_bundle_create()) sets dst->obsolete 2510 * to DST_OBSOLETE_FORCE_CHK to force all XFRM destinations to 2511 * get validated by dst_ops->check on every use. We do this 2512 * because when a normal route referenced by an XFRM dst is 2513 * obsoleted we do not go looking around for all parent 2514 * referencing XFRM dsts so that we can invalidate them. It 2515 * is just too much work. Instead we make the checks here on 2516 * every use. For example: 2517 * 2518 * XFRM dst A --> IPv4 dst X 2519 * 2520 * X is the "xdst->route" of A (X is also the "dst->path" of A 2521 * in this example). If X is marked obsolete, "A" will not 2522 * notice. That's what we are validating here via the 2523 * stale_bundle() check. 2524 * 2525 * When a policy's bundle is pruned, we dst_free() the XFRM 2526 * dst which causes it's ->obsolete field to be set to 2527 * DST_OBSOLETE_DEAD. If an XFRM dst has been pruned like 2528 * this, we want to force a new route lookup. 2529 */ 2530 if (dst->obsolete < 0 && !stale_bundle(dst)) 2531 return dst; 2532 2533 return NULL; 2534 } 2535 2536 static int stale_bundle(struct dst_entry *dst) 2537 { 2538 return !xfrm_bundle_ok((struct xfrm_dst *)dst); 2539 } 2540 2541 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) 2542 { 2543 while ((dst = dst->child) && dst->xfrm && dst->dev == dev) { 2544 dst->dev = dev_net(dev)->loopback_dev; 2545 dev_hold(dst->dev); 2546 dev_put(dev); 2547 } 2548 } 2549 EXPORT_SYMBOL(xfrm_dst_ifdown); 2550 2551 static void xfrm_link_failure(struct sk_buff *skb) 2552 { 2553 /* Impossible. Such dst must be popped before reaches point of failure. */ 2554 } 2555 2556 static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) 2557 { 2558 if (dst) { 2559 if (dst->obsolete) { 2560 dst_release(dst); 2561 dst = NULL; 2562 } 2563 } 2564 return dst; 2565 } 2566 2567 void xfrm_garbage_collect(struct net *net) 2568 { 2569 flow_cache_flush(net); 2570 } 2571 EXPORT_SYMBOL(xfrm_garbage_collect); 2572 2573 static void xfrm_garbage_collect_deferred(struct net *net) 2574 { 2575 flow_cache_flush_deferred(net); 2576 } 2577 2578 static void xfrm_init_pmtu(struct dst_entry *dst) 2579 { 2580 do { 2581 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 2582 u32 pmtu, route_mtu_cached; 2583 2584 pmtu = dst_mtu(dst->child); 2585 xdst->child_mtu_cached = pmtu; 2586 2587 pmtu = xfrm_state_mtu(dst->xfrm, pmtu); 2588 2589 route_mtu_cached = dst_mtu(xdst->route); 2590 xdst->route_mtu_cached = route_mtu_cached; 2591 2592 if (pmtu > route_mtu_cached) 2593 pmtu = route_mtu_cached; 2594 2595 dst_metric_set(dst, RTAX_MTU, pmtu); 2596 } while ((dst = dst->next)); 2597 } 2598 2599 /* Check that the bundle accepts the flow and its components are 2600 * still valid. 2601 */ 2602 2603 static int xfrm_bundle_ok(struct xfrm_dst *first) 2604 { 2605 struct dst_entry *dst = &first->u.dst; 2606 struct xfrm_dst *last; 2607 u32 mtu; 2608 2609 if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) || 2610 (dst->dev && !netif_running(dst->dev))) 2611 return 0; 2612 2613 if (dst->flags & DST_XFRM_QUEUE) 2614 return 1; 2615 2616 last = NULL; 2617 2618 do { 2619 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 2620 2621 if (dst->xfrm->km.state != XFRM_STATE_VALID) 2622 return 0; 2623 if (xdst->xfrm_genid != dst->xfrm->genid) 2624 return 0; 2625 if (xdst->num_pols > 0 && 2626 xdst->policy_genid != atomic_read(&xdst->pols[0]->genid)) 2627 return 0; 2628 2629 mtu = dst_mtu(dst->child); 2630 if (xdst->child_mtu_cached != mtu) { 2631 last = xdst; 2632 xdst->child_mtu_cached = mtu; 2633 } 2634 2635 if (!dst_check(xdst->route, xdst->route_cookie)) 2636 return 0; 2637 mtu = dst_mtu(xdst->route); 2638 if (xdst->route_mtu_cached != mtu) { 2639 last = xdst; 2640 xdst->route_mtu_cached = mtu; 2641 } 2642 2643 dst = dst->child; 2644 } while (dst->xfrm); 2645 2646 if (likely(!last)) 2647 return 1; 2648 2649 mtu = last->child_mtu_cached; 2650 for (;;) { 2651 dst = &last->u.dst; 2652 2653 mtu = xfrm_state_mtu(dst->xfrm, mtu); 2654 if (mtu > last->route_mtu_cached) 2655 mtu = last->route_mtu_cached; 2656 dst_metric_set(dst, RTAX_MTU, mtu); 2657 2658 if (last == first) 2659 break; 2660 2661 last = (struct xfrm_dst *)last->u.dst.next; 2662 last->child_mtu_cached = mtu; 2663 } 2664 2665 return 1; 2666 } 2667 2668 static unsigned int xfrm_default_advmss(const struct dst_entry *dst) 2669 { 2670 return dst_metric_advmss(dst->path); 2671 } 2672 2673 static unsigned int xfrm_mtu(const struct dst_entry *dst) 2674 { 2675 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); 2676 2677 return mtu ? : dst_mtu(dst->path); 2678 } 2679 2680 static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst, 2681 struct sk_buff *skb, 2682 const void *daddr) 2683 { 2684 return dst->path->ops->neigh_lookup(dst, skb, daddr); 2685 } 2686 2687 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) 2688 { 2689 struct net *net; 2690 int err = 0; 2691 if (unlikely(afinfo == NULL)) 2692 return -EINVAL; 2693 if (unlikely(afinfo->family >= NPROTO)) 2694 return -EAFNOSUPPORT; 2695 spin_lock(&xfrm_policy_afinfo_lock); 2696 if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL)) 2697 err = -ENOBUFS; 2698 else { 2699 struct dst_ops *dst_ops = afinfo->dst_ops; 2700 if (likely(dst_ops->kmem_cachep == NULL)) 2701 dst_ops->kmem_cachep = xfrm_dst_cache; 2702 if (likely(dst_ops->check == NULL)) 2703 dst_ops->check = xfrm_dst_check; 2704 if (likely(dst_ops->default_advmss == NULL)) 2705 dst_ops->default_advmss = xfrm_default_advmss; 2706 if (likely(dst_ops->mtu == NULL)) 2707 dst_ops->mtu = xfrm_mtu; 2708 if (likely(dst_ops->negative_advice == NULL)) 2709 dst_ops->negative_advice = xfrm_negative_advice; 2710 if (likely(dst_ops->link_failure == NULL)) 2711 dst_ops->link_failure = xfrm_link_failure; 2712 if (likely(dst_ops->neigh_lookup == NULL)) 2713 dst_ops->neigh_lookup = xfrm_neigh_lookup; 2714 if (likely(afinfo->garbage_collect == NULL)) 2715 afinfo->garbage_collect = xfrm_garbage_collect_deferred; 2716 rcu_assign_pointer(xfrm_policy_afinfo[afinfo->family], afinfo); 2717 } 2718 spin_unlock(&xfrm_policy_afinfo_lock); 2719 2720 rtnl_lock(); 2721 for_each_net(net) { 2722 struct dst_ops *xfrm_dst_ops; 2723 2724 switch (afinfo->family) { 2725 case AF_INET: 2726 xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops; 2727 break; 2728 #if IS_ENABLED(CONFIG_IPV6) 2729 case AF_INET6: 2730 xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops; 2731 break; 2732 #endif 2733 default: 2734 BUG(); 2735 } 2736 *xfrm_dst_ops = *afinfo->dst_ops; 2737 } 2738 rtnl_unlock(); 2739 2740 return err; 2741 } 2742 EXPORT_SYMBOL(xfrm_policy_register_afinfo); 2743 2744 int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo) 2745 { 2746 int err = 0; 2747 if (unlikely(afinfo == NULL)) 2748 return -EINVAL; 2749 if (unlikely(afinfo->family >= NPROTO)) 2750 return -EAFNOSUPPORT; 2751 spin_lock(&xfrm_policy_afinfo_lock); 2752 if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) { 2753 if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo)) 2754 err = -EINVAL; 2755 else 2756 RCU_INIT_POINTER(xfrm_policy_afinfo[afinfo->family], 2757 NULL); 2758 } 2759 spin_unlock(&xfrm_policy_afinfo_lock); 2760 if (!err) { 2761 struct dst_ops *dst_ops = afinfo->dst_ops; 2762 2763 synchronize_rcu(); 2764 2765 dst_ops->kmem_cachep = NULL; 2766 dst_ops->check = NULL; 2767 dst_ops->negative_advice = NULL; 2768 dst_ops->link_failure = NULL; 2769 afinfo->garbage_collect = NULL; 2770 } 2771 return err; 2772 } 2773 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo); 2774 2775 static void __net_init xfrm_dst_ops_init(struct net *net) 2776 { 2777 struct xfrm_policy_afinfo *afinfo; 2778 2779 rcu_read_lock(); 2780 afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET]); 2781 if (afinfo) 2782 net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops; 2783 #if IS_ENABLED(CONFIG_IPV6) 2784 afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET6]); 2785 if (afinfo) 2786 net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops; 2787 #endif 2788 rcu_read_unlock(); 2789 } 2790 2791 static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) 2792 { 2793 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 2794 2795 switch (event) { 2796 case NETDEV_DOWN: 2797 xfrm_garbage_collect(dev_net(dev)); 2798 } 2799 return NOTIFY_DONE; 2800 } 2801 2802 static struct notifier_block xfrm_dev_notifier = { 2803 .notifier_call = xfrm_dev_event, 2804 }; 2805 2806 #ifdef CONFIG_XFRM_STATISTICS 2807 static int __net_init xfrm_statistics_init(struct net *net) 2808 { 2809 int rv; 2810 net->mib.xfrm_statistics = alloc_percpu(struct linux_xfrm_mib); 2811 if (!net->mib.xfrm_statistics) 2812 return -ENOMEM; 2813 rv = xfrm_proc_init(net); 2814 if (rv < 0) 2815 free_percpu(net->mib.xfrm_statistics); 2816 return rv; 2817 } 2818 2819 static void xfrm_statistics_fini(struct net *net) 2820 { 2821 xfrm_proc_fini(net); 2822 free_percpu(net->mib.xfrm_statistics); 2823 } 2824 #else 2825 static int __net_init xfrm_statistics_init(struct net *net) 2826 { 2827 return 0; 2828 } 2829 2830 static void xfrm_statistics_fini(struct net *net) 2831 { 2832 } 2833 #endif 2834 2835 static int __net_init xfrm_policy_init(struct net *net) 2836 { 2837 unsigned int hmask, sz; 2838 int dir; 2839 2840 if (net_eq(net, &init_net)) 2841 xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", 2842 sizeof(struct xfrm_dst), 2843 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, 2844 NULL); 2845 2846 hmask = 8 - 1; 2847 sz = (hmask+1) * sizeof(struct hlist_head); 2848 2849 net->xfrm.policy_byidx = xfrm_hash_alloc(sz); 2850 if (!net->xfrm.policy_byidx) 2851 goto out_byidx; 2852 net->xfrm.policy_idx_hmask = hmask; 2853 2854 for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { 2855 struct xfrm_policy_hash *htab; 2856 2857 net->xfrm.policy_count[dir] = 0; 2858 INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]); 2859 2860 htab = &net->xfrm.policy_bydst[dir]; 2861 htab->table = xfrm_hash_alloc(sz); 2862 if (!htab->table) 2863 goto out_bydst; 2864 htab->hmask = hmask; 2865 } 2866 2867 INIT_LIST_HEAD(&net->xfrm.policy_all); 2868 INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize); 2869 if (net_eq(net, &init_net)) 2870 register_netdevice_notifier(&xfrm_dev_notifier); 2871 return 0; 2872 2873 out_bydst: 2874 for (dir--; dir >= 0; dir--) { 2875 struct xfrm_policy_hash *htab; 2876 2877 htab = &net->xfrm.policy_bydst[dir]; 2878 xfrm_hash_free(htab->table, sz); 2879 } 2880 xfrm_hash_free(net->xfrm.policy_byidx, sz); 2881 out_byidx: 2882 return -ENOMEM; 2883 } 2884 2885 static void xfrm_policy_fini(struct net *net) 2886 { 2887 unsigned int sz; 2888 int dir; 2889 2890 flush_work(&net->xfrm.policy_hash_work); 2891 #ifdef CONFIG_XFRM_SUB_POLICY 2892 xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, false); 2893 #endif 2894 xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, false); 2895 2896 WARN_ON(!list_empty(&net->xfrm.policy_all)); 2897 2898 for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { 2899 struct xfrm_policy_hash *htab; 2900 2901 WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir])); 2902 2903 htab = &net->xfrm.policy_bydst[dir]; 2904 sz = (htab->hmask + 1) * sizeof(struct hlist_head); 2905 WARN_ON(!hlist_empty(htab->table)); 2906 xfrm_hash_free(htab->table, sz); 2907 } 2908 2909 sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head); 2910 WARN_ON(!hlist_empty(net->xfrm.policy_byidx)); 2911 xfrm_hash_free(net->xfrm.policy_byidx, sz); 2912 } 2913 2914 static int __net_init xfrm_net_init(struct net *net) 2915 { 2916 int rv; 2917 2918 rv = xfrm_statistics_init(net); 2919 if (rv < 0) 2920 goto out_statistics; 2921 rv = xfrm_state_init(net); 2922 if (rv < 0) 2923 goto out_state; 2924 rv = xfrm_policy_init(net); 2925 if (rv < 0) 2926 goto out_policy; 2927 xfrm_dst_ops_init(net); 2928 rv = xfrm_sysctl_init(net); 2929 if (rv < 0) 2930 goto out_sysctl; 2931 rv = flow_cache_init(net); 2932 if (rv < 0) 2933 goto out; 2934 2935 /* Initialize the per-net locks here */ 2936 spin_lock_init(&net->xfrm.xfrm_state_lock); 2937 rwlock_init(&net->xfrm.xfrm_policy_lock); 2938 mutex_init(&net->xfrm.xfrm_cfg_mutex); 2939 2940 return 0; 2941 2942 out: 2943 xfrm_sysctl_fini(net); 2944 out_sysctl: 2945 xfrm_policy_fini(net); 2946 out_policy: 2947 xfrm_state_fini(net); 2948 out_state: 2949 xfrm_statistics_fini(net); 2950 out_statistics: 2951 return rv; 2952 } 2953 2954 static void __net_exit xfrm_net_exit(struct net *net) 2955 { 2956 flow_cache_fini(net); 2957 xfrm_sysctl_fini(net); 2958 xfrm_policy_fini(net); 2959 xfrm_state_fini(net); 2960 xfrm_statistics_fini(net); 2961 } 2962 2963 static struct pernet_operations __net_initdata xfrm_net_ops = { 2964 .init = xfrm_net_init, 2965 .exit = xfrm_net_exit, 2966 }; 2967 2968 void __init xfrm_init(void) 2969 { 2970 register_pernet_subsys(&xfrm_net_ops); 2971 xfrm_input_init(); 2972 } 2973 2974 #ifdef CONFIG_AUDITSYSCALL 2975 static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp, 2976 struct audit_buffer *audit_buf) 2977 { 2978 struct xfrm_sec_ctx *ctx = xp->security; 2979 struct xfrm_selector *sel = &xp->selector; 2980 2981 if (ctx) 2982 audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s", 2983 ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str); 2984 2985 switch (sel->family) { 2986 case AF_INET: 2987 audit_log_format(audit_buf, " src=%pI4", &sel->saddr.a4); 2988 if (sel->prefixlen_s != 32) 2989 audit_log_format(audit_buf, " src_prefixlen=%d", 2990 sel->prefixlen_s); 2991 audit_log_format(audit_buf, " dst=%pI4", &sel->daddr.a4); 2992 if (sel->prefixlen_d != 32) 2993 audit_log_format(audit_buf, " dst_prefixlen=%d", 2994 sel->prefixlen_d); 2995 break; 2996 case AF_INET6: 2997 audit_log_format(audit_buf, " src=%pI6", sel->saddr.a6); 2998 if (sel->prefixlen_s != 128) 2999 audit_log_format(audit_buf, " src_prefixlen=%d", 3000 sel->prefixlen_s); 3001 audit_log_format(audit_buf, " dst=%pI6", sel->daddr.a6); 3002 if (sel->prefixlen_d != 128) 3003 audit_log_format(audit_buf, " dst_prefixlen=%d", 3004 sel->prefixlen_d); 3005 break; 3006 } 3007 } 3008 3009 void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, bool task_valid) 3010 { 3011 struct audit_buffer *audit_buf; 3012 3013 audit_buf = xfrm_audit_start("SPD-add"); 3014 if (audit_buf == NULL) 3015 return; 3016 xfrm_audit_helper_usrinfo(task_valid, audit_buf); 3017 audit_log_format(audit_buf, " res=%u", result); 3018 xfrm_audit_common_policyinfo(xp, audit_buf); 3019 audit_log_end(audit_buf); 3020 } 3021 EXPORT_SYMBOL_GPL(xfrm_audit_policy_add); 3022 3023 void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, 3024 bool task_valid) 3025 { 3026 struct audit_buffer *audit_buf; 3027 3028 audit_buf = xfrm_audit_start("SPD-delete"); 3029 if (audit_buf == NULL) 3030 return; 3031 xfrm_audit_helper_usrinfo(task_valid, audit_buf); 3032 audit_log_format(audit_buf, " res=%u", result); 3033 xfrm_audit_common_policyinfo(xp, audit_buf); 3034 audit_log_end(audit_buf); 3035 } 3036 EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete); 3037 #endif 3038 3039 #ifdef CONFIG_XFRM_MIGRATE 3040 static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp, 3041 const struct xfrm_selector *sel_tgt) 3042 { 3043 if (sel_cmp->proto == IPSEC_ULPROTO_ANY) { 3044 if (sel_tgt->family == sel_cmp->family && 3045 xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr, 3046 sel_cmp->family) && 3047 xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr, 3048 sel_cmp->family) && 3049 sel_tgt->prefixlen_d == sel_cmp->prefixlen_d && 3050 sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) { 3051 return true; 3052 } 3053 } else { 3054 if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) { 3055 return true; 3056 } 3057 } 3058 return false; 3059 } 3060 3061 static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel, 3062 u8 dir, u8 type, struct net *net) 3063 { 3064 struct xfrm_policy *pol, *ret = NULL; 3065 struct hlist_head *chain; 3066 u32 priority = ~0U; 3067 3068 read_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME*/ 3069 chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir); 3070 hlist_for_each_entry(pol, chain, bydst) { 3071 if (xfrm_migrate_selector_match(sel, &pol->selector) && 3072 pol->type == type) { 3073 ret = pol; 3074 priority = ret->priority; 3075 break; 3076 } 3077 } 3078 chain = &net->xfrm.policy_inexact[dir]; 3079 hlist_for_each_entry(pol, chain, bydst) { 3080 if (xfrm_migrate_selector_match(sel, &pol->selector) && 3081 pol->type == type && 3082 pol->priority < priority) { 3083 ret = pol; 3084 break; 3085 } 3086 } 3087 3088 if (ret) 3089 xfrm_pol_hold(ret); 3090 3091 read_unlock_bh(&net->xfrm.xfrm_policy_lock); 3092 3093 return ret; 3094 } 3095 3096 static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t) 3097 { 3098 int match = 0; 3099 3100 if (t->mode == m->mode && t->id.proto == m->proto && 3101 (m->reqid == 0 || t->reqid == m->reqid)) { 3102 switch (t->mode) { 3103 case XFRM_MODE_TUNNEL: 3104 case XFRM_MODE_BEET: 3105 if (xfrm_addr_equal(&t->id.daddr, &m->old_daddr, 3106 m->old_family) && 3107 xfrm_addr_equal(&t->saddr, &m->old_saddr, 3108 m->old_family)) { 3109 match = 1; 3110 } 3111 break; 3112 case XFRM_MODE_TRANSPORT: 3113 /* in case of transport mode, template does not store 3114 any IP addresses, hence we just compare mode and 3115 protocol */ 3116 match = 1; 3117 break; 3118 default: 3119 break; 3120 } 3121 } 3122 return match; 3123 } 3124 3125 /* update endpoint address(es) of template(s) */ 3126 static int xfrm_policy_migrate(struct xfrm_policy *pol, 3127 struct xfrm_migrate *m, int num_migrate) 3128 { 3129 struct xfrm_migrate *mp; 3130 int i, j, n = 0; 3131 3132 write_lock_bh(&pol->lock); 3133 if (unlikely(pol->walk.dead)) { 3134 /* target policy has been deleted */ 3135 write_unlock_bh(&pol->lock); 3136 return -ENOENT; 3137 } 3138 3139 for (i = 0; i < pol->xfrm_nr; i++) { 3140 for (j = 0, mp = m; j < num_migrate; j++, mp++) { 3141 if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i])) 3142 continue; 3143 n++; 3144 if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL && 3145 pol->xfrm_vec[i].mode != XFRM_MODE_BEET) 3146 continue; 3147 /* update endpoints */ 3148 memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr, 3149 sizeof(pol->xfrm_vec[i].id.daddr)); 3150 memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr, 3151 sizeof(pol->xfrm_vec[i].saddr)); 3152 pol->xfrm_vec[i].encap_family = mp->new_family; 3153 /* flush bundles */ 3154 atomic_inc(&pol->genid); 3155 } 3156 } 3157 3158 write_unlock_bh(&pol->lock); 3159 3160 if (!n) 3161 return -ENODATA; 3162 3163 return 0; 3164 } 3165 3166 static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate) 3167 { 3168 int i, j; 3169 3170 if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH) 3171 return -EINVAL; 3172 3173 for (i = 0; i < num_migrate; i++) { 3174 if (xfrm_addr_equal(&m[i].old_daddr, &m[i].new_daddr, 3175 m[i].old_family) && 3176 xfrm_addr_equal(&m[i].old_saddr, &m[i].new_saddr, 3177 m[i].old_family)) 3178 return -EINVAL; 3179 if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) || 3180 xfrm_addr_any(&m[i].new_saddr, m[i].new_family)) 3181 return -EINVAL; 3182 3183 /* check if there is any duplicated entry */ 3184 for (j = i + 1; j < num_migrate; j++) { 3185 if (!memcmp(&m[i].old_daddr, &m[j].old_daddr, 3186 sizeof(m[i].old_daddr)) && 3187 !memcmp(&m[i].old_saddr, &m[j].old_saddr, 3188 sizeof(m[i].old_saddr)) && 3189 m[i].proto == m[j].proto && 3190 m[i].mode == m[j].mode && 3191 m[i].reqid == m[j].reqid && 3192 m[i].old_family == m[j].old_family) 3193 return -EINVAL; 3194 } 3195 } 3196 3197 return 0; 3198 } 3199 3200 int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, 3201 struct xfrm_migrate *m, int num_migrate, 3202 struct xfrm_kmaddress *k, struct net *net) 3203 { 3204 int i, err, nx_cur = 0, nx_new = 0; 3205 struct xfrm_policy *pol = NULL; 3206 struct xfrm_state *x, *xc; 3207 struct xfrm_state *x_cur[XFRM_MAX_DEPTH]; 3208 struct xfrm_state *x_new[XFRM_MAX_DEPTH]; 3209 struct xfrm_migrate *mp; 3210 3211 if ((err = xfrm_migrate_check(m, num_migrate)) < 0) 3212 goto out; 3213 3214 /* Stage 1 - find policy */ 3215 if ((pol = xfrm_migrate_policy_find(sel, dir, type, net)) == NULL) { 3216 err = -ENOENT; 3217 goto out; 3218 } 3219 3220 /* Stage 2 - find and update state(s) */ 3221 for (i = 0, mp = m; i < num_migrate; i++, mp++) { 3222 if ((x = xfrm_migrate_state_find(mp, net))) { 3223 x_cur[nx_cur] = x; 3224 nx_cur++; 3225 if ((xc = xfrm_state_migrate(x, mp))) { 3226 x_new[nx_new] = xc; 3227 nx_new++; 3228 } else { 3229 err = -ENODATA; 3230 goto restore_state; 3231 } 3232 } 3233 } 3234 3235 /* Stage 3 - update policy */ 3236 if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0) 3237 goto restore_state; 3238 3239 /* Stage 4 - delete old state(s) */ 3240 if (nx_cur) { 3241 xfrm_states_put(x_cur, nx_cur); 3242 xfrm_states_delete(x_cur, nx_cur); 3243 } 3244 3245 /* Stage 5 - announce */ 3246 km_migrate(sel, dir, type, m, num_migrate, k); 3247 3248 xfrm_pol_put(pol); 3249 3250 return 0; 3251 out: 3252 return err; 3253 3254 restore_state: 3255 if (pol) 3256 xfrm_pol_put(pol); 3257 if (nx_cur) 3258 xfrm_states_put(x_cur, nx_cur); 3259 if (nx_new) 3260 xfrm_states_delete(x_new, nx_new); 3261 3262 return err; 3263 } 3264 EXPORT_SYMBOL(xfrm_migrate); 3265 #endif 3266