1 /* 2 * xfrm_policy.c 3 * 4 * Changes: 5 * Mitsuru KANDA @USAGI 6 * Kazunori MIYAZAWA @USAGI 7 * Kunihiro Ishiguro <kunihiro@ipinfusion.com> 8 * IPv6 support 9 * Kazunori MIYAZAWA @USAGI 10 * YOSHIFUJI Hideaki 11 * Split up af-specific portion 12 * Derek Atkins <derek@ihtfp.com> Add the post_input processor 13 * 14 */ 15 16 #include <asm/bug.h> 17 #include <linux/config.h> 18 #include <linux/slab.h> 19 #include <linux/kmod.h> 20 #include <linux/list.h> 21 #include <linux/spinlock.h> 22 #include <linux/workqueue.h> 23 #include <linux/notifier.h> 24 #include <linux/netdevice.h> 25 #include <linux/module.h> 26 #include <net/xfrm.h> 27 #include <net/ip.h> 28 29 DECLARE_MUTEX(xfrm_cfg_sem); 30 EXPORT_SYMBOL(xfrm_cfg_sem); 31 32 static DEFINE_RWLOCK(xfrm_policy_lock); 33 34 struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2]; 35 EXPORT_SYMBOL(xfrm_policy_list); 36 37 static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); 38 static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; 39 40 static kmem_cache_t *xfrm_dst_cache __read_mostly; 41 42 static struct work_struct xfrm_policy_gc_work; 43 static struct list_head xfrm_policy_gc_list = 44 LIST_HEAD_INIT(xfrm_policy_gc_list); 45 static DEFINE_SPINLOCK(xfrm_policy_gc_lock); 46 47 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); 48 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); 49 50 int xfrm_register_type(struct xfrm_type *type, unsigned short family) 51 { 52 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 53 struct xfrm_type_map *typemap; 54 int err = 0; 55 56 if (unlikely(afinfo == NULL)) 57 return -EAFNOSUPPORT; 58 typemap = afinfo->type_map; 59 60 write_lock(&typemap->lock); 61 if (likely(typemap->map[type->proto] == NULL)) 62 typemap->map[type->proto] = type; 63 else 64 err = -EEXIST; 65 write_unlock(&typemap->lock); 66 xfrm_policy_put_afinfo(afinfo); 67 return err; 68 } 69 EXPORT_SYMBOL(xfrm_register_type); 70 71 int xfrm_unregister_type(struct xfrm_type *type, unsigned short family) 72 { 73 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 74 struct xfrm_type_map *typemap; 75 int err = 0; 76 77 if (unlikely(afinfo == NULL)) 78 return -EAFNOSUPPORT; 79 typemap = afinfo->type_map; 80 81 write_lock(&typemap->lock); 82 if (unlikely(typemap->map[type->proto] != type)) 83 err = -ENOENT; 84 else 85 typemap->map[type->proto] = NULL; 86 write_unlock(&typemap->lock); 87 xfrm_policy_put_afinfo(afinfo); 88 return err; 89 } 90 EXPORT_SYMBOL(xfrm_unregister_type); 91 92 struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family) 93 { 94 struct xfrm_policy_afinfo *afinfo; 95 struct xfrm_type_map *typemap; 96 struct xfrm_type *type; 97 int modload_attempted = 0; 98 99 retry: 100 afinfo = xfrm_policy_get_afinfo(family); 101 if (unlikely(afinfo == NULL)) 102 return NULL; 103 typemap = afinfo->type_map; 104 105 read_lock(&typemap->lock); 106 type = typemap->map[proto]; 107 if (unlikely(type && !try_module_get(type->owner))) 108 type = NULL; 109 read_unlock(&typemap->lock); 110 if (!type && !modload_attempted) { 111 xfrm_policy_put_afinfo(afinfo); 112 request_module("xfrm-type-%d-%d", 113 (int) family, (int) proto); 114 modload_attempted = 1; 115 goto retry; 116 } 117 118 xfrm_policy_put_afinfo(afinfo); 119 return type; 120 } 121 122 int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, 123 unsigned short family) 124 { 125 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 126 int err = 0; 127 128 if (unlikely(afinfo == NULL)) 129 return -EAFNOSUPPORT; 130 131 if (likely(afinfo->dst_lookup != NULL)) 132 err = afinfo->dst_lookup(dst, fl); 133 else 134 err = -EINVAL; 135 xfrm_policy_put_afinfo(afinfo); 136 return err; 137 } 138 EXPORT_SYMBOL(xfrm_dst_lookup); 139 140 void xfrm_put_type(struct xfrm_type *type) 141 { 142 module_put(type->owner); 143 } 144 145 static inline unsigned long make_jiffies(long secs) 146 { 147 if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ) 148 return MAX_SCHEDULE_TIMEOUT-1; 149 else 150 return secs*HZ; 151 } 152 153 static void xfrm_policy_timer(unsigned long data) 154 { 155 struct xfrm_policy *xp = (struct xfrm_policy*)data; 156 unsigned long now = (unsigned long)xtime.tv_sec; 157 long next = LONG_MAX; 158 int warn = 0; 159 int dir; 160 161 read_lock(&xp->lock); 162 163 if (xp->dead) 164 goto out; 165 166 dir = xfrm_policy_id2dir(xp->index); 167 168 if (xp->lft.hard_add_expires_seconds) { 169 long tmo = xp->lft.hard_add_expires_seconds + 170 xp->curlft.add_time - now; 171 if (tmo <= 0) 172 goto expired; 173 if (tmo < next) 174 next = tmo; 175 } 176 if (xp->lft.hard_use_expires_seconds) { 177 long tmo = xp->lft.hard_use_expires_seconds + 178 (xp->curlft.use_time ? : xp->curlft.add_time) - now; 179 if (tmo <= 0) 180 goto expired; 181 if (tmo < next) 182 next = tmo; 183 } 184 if (xp->lft.soft_add_expires_seconds) { 185 long tmo = xp->lft.soft_add_expires_seconds + 186 xp->curlft.add_time - now; 187 if (tmo <= 0) { 188 warn = 1; 189 tmo = XFRM_KM_TIMEOUT; 190 } 191 if (tmo < next) 192 next = tmo; 193 } 194 if (xp->lft.soft_use_expires_seconds) { 195 long tmo = xp->lft.soft_use_expires_seconds + 196 (xp->curlft.use_time ? : xp->curlft.add_time) - now; 197 if (tmo <= 0) { 198 warn = 1; 199 tmo = XFRM_KM_TIMEOUT; 200 } 201 if (tmo < next) 202 next = tmo; 203 } 204 205 if (warn) 206 km_policy_expired(xp, dir, 0); 207 if (next != LONG_MAX && 208 !mod_timer(&xp->timer, jiffies + make_jiffies(next))) 209 xfrm_pol_hold(xp); 210 211 out: 212 read_unlock(&xp->lock); 213 xfrm_pol_put(xp); 214 return; 215 216 expired: 217 read_unlock(&xp->lock); 218 if (!xfrm_policy_delete(xp, dir)) 219 km_policy_expired(xp, dir, 1); 220 xfrm_pol_put(xp); 221 } 222 223 224 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2 225 * SPD calls. 226 */ 227 228 struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp) 229 { 230 struct xfrm_policy *policy; 231 232 policy = kmalloc(sizeof(struct xfrm_policy), gfp); 233 234 if (policy) { 235 memset(policy, 0, sizeof(struct xfrm_policy)); 236 atomic_set(&policy->refcnt, 1); 237 rwlock_init(&policy->lock); 238 init_timer(&policy->timer); 239 policy->timer.data = (unsigned long)policy; 240 policy->timer.function = xfrm_policy_timer; 241 } 242 return policy; 243 } 244 EXPORT_SYMBOL(xfrm_policy_alloc); 245 246 /* Destroy xfrm_policy: descendant resources must be released to this moment. */ 247 248 void __xfrm_policy_destroy(struct xfrm_policy *policy) 249 { 250 if (!policy->dead) 251 BUG(); 252 253 if (policy->bundles) 254 BUG(); 255 256 if (del_timer(&policy->timer)) 257 BUG(); 258 259 kfree(policy); 260 } 261 EXPORT_SYMBOL(__xfrm_policy_destroy); 262 263 static void xfrm_policy_gc_kill(struct xfrm_policy *policy) 264 { 265 struct dst_entry *dst; 266 267 while ((dst = policy->bundles) != NULL) { 268 policy->bundles = dst->next; 269 dst_free(dst); 270 } 271 272 if (del_timer(&policy->timer)) 273 atomic_dec(&policy->refcnt); 274 275 if (atomic_read(&policy->refcnt) > 1) 276 flow_cache_flush(); 277 278 xfrm_pol_put(policy); 279 } 280 281 static void xfrm_policy_gc_task(void *data) 282 { 283 struct xfrm_policy *policy; 284 struct list_head *entry, *tmp; 285 struct list_head gc_list = LIST_HEAD_INIT(gc_list); 286 287 spin_lock_bh(&xfrm_policy_gc_lock); 288 list_splice_init(&xfrm_policy_gc_list, &gc_list); 289 spin_unlock_bh(&xfrm_policy_gc_lock); 290 291 list_for_each_safe(entry, tmp, &gc_list) { 292 policy = list_entry(entry, struct xfrm_policy, list); 293 xfrm_policy_gc_kill(policy); 294 } 295 } 296 297 /* Rule must be locked. Release descentant resources, announce 298 * entry dead. The rule must be unlinked from lists to the moment. 299 */ 300 301 static void xfrm_policy_kill(struct xfrm_policy *policy) 302 { 303 int dead; 304 305 write_lock_bh(&policy->lock); 306 dead = policy->dead; 307 policy->dead = 1; 308 write_unlock_bh(&policy->lock); 309 310 if (unlikely(dead)) { 311 WARN_ON(1); 312 return; 313 } 314 315 spin_lock(&xfrm_policy_gc_lock); 316 list_add(&policy->list, &xfrm_policy_gc_list); 317 spin_unlock(&xfrm_policy_gc_lock); 318 319 schedule_work(&xfrm_policy_gc_work); 320 } 321 322 /* Generate new index... KAME seems to generate them ordered by cost 323 * of an absolute inpredictability of ordering of rules. This will not pass. */ 324 static u32 xfrm_gen_index(int dir) 325 { 326 u32 idx; 327 struct xfrm_policy *p; 328 static u32 idx_generator; 329 330 for (;;) { 331 idx = (idx_generator | dir); 332 idx_generator += 8; 333 if (idx == 0) 334 idx = 8; 335 for (p = xfrm_policy_list[dir]; p; p = p->next) { 336 if (p->index == idx) 337 break; 338 } 339 if (!p) 340 return idx; 341 } 342 } 343 344 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) 345 { 346 struct xfrm_policy *pol, **p; 347 struct xfrm_policy *delpol = NULL; 348 struct xfrm_policy **newpos = NULL; 349 350 write_lock_bh(&xfrm_policy_lock); 351 for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL;) { 352 if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0) { 353 if (excl) { 354 write_unlock_bh(&xfrm_policy_lock); 355 return -EEXIST; 356 } 357 *p = pol->next; 358 delpol = pol; 359 if (policy->priority > pol->priority) 360 continue; 361 } else if (policy->priority >= pol->priority) { 362 p = &pol->next; 363 continue; 364 } 365 if (!newpos) 366 newpos = p; 367 if (delpol) 368 break; 369 p = &pol->next; 370 } 371 if (newpos) 372 p = newpos; 373 xfrm_pol_hold(policy); 374 policy->next = *p; 375 *p = policy; 376 atomic_inc(&flow_cache_genid); 377 policy->index = delpol ? delpol->index : xfrm_gen_index(dir); 378 policy->curlft.add_time = (unsigned long)xtime.tv_sec; 379 policy->curlft.use_time = 0; 380 if (!mod_timer(&policy->timer, jiffies + HZ)) 381 xfrm_pol_hold(policy); 382 write_unlock_bh(&xfrm_policy_lock); 383 384 if (delpol) { 385 xfrm_policy_kill(delpol); 386 } 387 return 0; 388 } 389 EXPORT_SYMBOL(xfrm_policy_insert); 390 391 struct xfrm_policy *xfrm_policy_bysel(int dir, struct xfrm_selector *sel, 392 int delete) 393 { 394 struct xfrm_policy *pol, **p; 395 396 write_lock_bh(&xfrm_policy_lock); 397 for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) { 398 if (memcmp(sel, &pol->selector, sizeof(*sel)) == 0) { 399 xfrm_pol_hold(pol); 400 if (delete) 401 *p = pol->next; 402 break; 403 } 404 } 405 write_unlock_bh(&xfrm_policy_lock); 406 407 if (pol && delete) { 408 atomic_inc(&flow_cache_genid); 409 xfrm_policy_kill(pol); 410 } 411 return pol; 412 } 413 EXPORT_SYMBOL(xfrm_policy_bysel); 414 415 struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete) 416 { 417 struct xfrm_policy *pol, **p; 418 419 write_lock_bh(&xfrm_policy_lock); 420 for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) { 421 if (pol->index == id) { 422 xfrm_pol_hold(pol); 423 if (delete) 424 *p = pol->next; 425 break; 426 } 427 } 428 write_unlock_bh(&xfrm_policy_lock); 429 430 if (pol && delete) { 431 atomic_inc(&flow_cache_genid); 432 xfrm_policy_kill(pol); 433 } 434 return pol; 435 } 436 EXPORT_SYMBOL(xfrm_policy_byid); 437 438 void xfrm_policy_flush(void) 439 { 440 struct xfrm_policy *xp; 441 int dir; 442 443 write_lock_bh(&xfrm_policy_lock); 444 for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { 445 while ((xp = xfrm_policy_list[dir]) != NULL) { 446 xfrm_policy_list[dir] = xp->next; 447 write_unlock_bh(&xfrm_policy_lock); 448 449 xfrm_policy_kill(xp); 450 451 write_lock_bh(&xfrm_policy_lock); 452 } 453 } 454 atomic_inc(&flow_cache_genid); 455 write_unlock_bh(&xfrm_policy_lock); 456 } 457 EXPORT_SYMBOL(xfrm_policy_flush); 458 459 int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), 460 void *data) 461 { 462 struct xfrm_policy *xp; 463 int dir; 464 int count = 0; 465 int error = 0; 466 467 read_lock_bh(&xfrm_policy_lock); 468 for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { 469 for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) 470 count++; 471 } 472 473 if (count == 0) { 474 error = -ENOENT; 475 goto out; 476 } 477 478 for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { 479 for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) { 480 error = func(xp, dir%XFRM_POLICY_MAX, --count, data); 481 if (error) 482 goto out; 483 } 484 } 485 486 out: 487 read_unlock_bh(&xfrm_policy_lock); 488 return error; 489 } 490 EXPORT_SYMBOL(xfrm_policy_walk); 491 492 /* Find policy to apply to this flow. */ 493 494 static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, 495 void **objp, atomic_t **obj_refp) 496 { 497 struct xfrm_policy *pol; 498 499 read_lock_bh(&xfrm_policy_lock); 500 for (pol = xfrm_policy_list[dir]; pol; pol = pol->next) { 501 struct xfrm_selector *sel = &pol->selector; 502 int match; 503 504 if (pol->family != family) 505 continue; 506 507 match = xfrm_selector_match(sel, fl, family); 508 if (match) { 509 xfrm_pol_hold(pol); 510 break; 511 } 512 } 513 read_unlock_bh(&xfrm_policy_lock); 514 if ((*objp = (void *) pol) != NULL) 515 *obj_refp = &pol->refcnt; 516 } 517 518 static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl) 519 { 520 struct xfrm_policy *pol; 521 522 read_lock_bh(&xfrm_policy_lock); 523 if ((pol = sk->sk_policy[dir]) != NULL) { 524 int match = xfrm_selector_match(&pol->selector, fl, 525 sk->sk_family); 526 if (match) 527 xfrm_pol_hold(pol); 528 else 529 pol = NULL; 530 } 531 read_unlock_bh(&xfrm_policy_lock); 532 return pol; 533 } 534 535 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) 536 { 537 pol->next = xfrm_policy_list[dir]; 538 xfrm_policy_list[dir] = pol; 539 xfrm_pol_hold(pol); 540 } 541 542 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, 543 int dir) 544 { 545 struct xfrm_policy **polp; 546 547 for (polp = &xfrm_policy_list[dir]; 548 *polp != NULL; polp = &(*polp)->next) { 549 if (*polp == pol) { 550 *polp = pol->next; 551 return pol; 552 } 553 } 554 return NULL; 555 } 556 557 int xfrm_policy_delete(struct xfrm_policy *pol, int dir) 558 { 559 write_lock_bh(&xfrm_policy_lock); 560 pol = __xfrm_policy_unlink(pol, dir); 561 write_unlock_bh(&xfrm_policy_lock); 562 if (pol) { 563 if (dir < XFRM_POLICY_MAX) 564 atomic_inc(&flow_cache_genid); 565 xfrm_policy_kill(pol); 566 return 0; 567 } 568 return -ENOENT; 569 } 570 571 int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) 572 { 573 struct xfrm_policy *old_pol; 574 575 write_lock_bh(&xfrm_policy_lock); 576 old_pol = sk->sk_policy[dir]; 577 sk->sk_policy[dir] = pol; 578 if (pol) { 579 pol->curlft.add_time = (unsigned long)xtime.tv_sec; 580 pol->index = xfrm_gen_index(XFRM_POLICY_MAX+dir); 581 __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir); 582 } 583 if (old_pol) 584 __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir); 585 write_unlock_bh(&xfrm_policy_lock); 586 587 if (old_pol) { 588 xfrm_policy_kill(old_pol); 589 } 590 return 0; 591 } 592 593 static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir) 594 { 595 struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC); 596 597 if (newp) { 598 newp->selector = old->selector; 599 newp->lft = old->lft; 600 newp->curlft = old->curlft; 601 newp->action = old->action; 602 newp->flags = old->flags; 603 newp->xfrm_nr = old->xfrm_nr; 604 newp->index = old->index; 605 memcpy(newp->xfrm_vec, old->xfrm_vec, 606 newp->xfrm_nr*sizeof(struct xfrm_tmpl)); 607 write_lock_bh(&xfrm_policy_lock); 608 __xfrm_policy_link(newp, XFRM_POLICY_MAX+dir); 609 write_unlock_bh(&xfrm_policy_lock); 610 xfrm_pol_put(newp); 611 } 612 return newp; 613 } 614 615 int __xfrm_sk_clone_policy(struct sock *sk) 616 { 617 struct xfrm_policy *p0 = sk->sk_policy[0], 618 *p1 = sk->sk_policy[1]; 619 620 sk->sk_policy[0] = sk->sk_policy[1] = NULL; 621 if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL) 622 return -ENOMEM; 623 if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL) 624 return -ENOMEM; 625 return 0; 626 } 627 628 /* Resolve list of templates for the flow, given policy. */ 629 630 static int 631 xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl, 632 struct xfrm_state **xfrm, 633 unsigned short family) 634 { 635 int nx; 636 int i, error; 637 xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family); 638 xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family); 639 640 for (nx=0, i = 0; i < policy->xfrm_nr; i++) { 641 struct xfrm_state *x; 642 xfrm_address_t *remote = daddr; 643 xfrm_address_t *local = saddr; 644 struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i]; 645 646 if (tmpl->mode) { 647 remote = &tmpl->id.daddr; 648 local = &tmpl->saddr; 649 } 650 651 x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family); 652 653 if (x && x->km.state == XFRM_STATE_VALID) { 654 xfrm[nx++] = x; 655 daddr = remote; 656 saddr = local; 657 continue; 658 } 659 if (x) { 660 error = (x->km.state == XFRM_STATE_ERROR ? 661 -EINVAL : -EAGAIN); 662 xfrm_state_put(x); 663 } 664 665 if (!tmpl->optional) 666 goto fail; 667 } 668 return nx; 669 670 fail: 671 for (nx--; nx>=0; nx--) 672 xfrm_state_put(xfrm[nx]); 673 return error; 674 } 675 676 /* Check that the bundle accepts the flow and its components are 677 * still valid. 678 */ 679 680 static struct dst_entry * 681 xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family) 682 { 683 struct dst_entry *x; 684 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 685 if (unlikely(afinfo == NULL)) 686 return ERR_PTR(-EINVAL); 687 x = afinfo->find_bundle(fl, policy); 688 xfrm_policy_put_afinfo(afinfo); 689 return x; 690 } 691 692 /* Allocate chain of dst_entry's, attach known xfrm's, calculate 693 * all the metrics... Shortly, bundle a bundle. 694 */ 695 696 static int 697 xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx, 698 struct flowi *fl, struct dst_entry **dst_p, 699 unsigned short family) 700 { 701 int err; 702 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 703 if (unlikely(afinfo == NULL)) 704 return -EINVAL; 705 err = afinfo->bundle_create(policy, xfrm, nx, fl, dst_p); 706 xfrm_policy_put_afinfo(afinfo); 707 return err; 708 } 709 710 static inline int policy_to_flow_dir(int dir) 711 { 712 if (XFRM_POLICY_IN == FLOW_DIR_IN && 713 XFRM_POLICY_OUT == FLOW_DIR_OUT && 714 XFRM_POLICY_FWD == FLOW_DIR_FWD) 715 return dir; 716 switch (dir) { 717 default: 718 case XFRM_POLICY_IN: 719 return FLOW_DIR_IN; 720 case XFRM_POLICY_OUT: 721 return FLOW_DIR_OUT; 722 case XFRM_POLICY_FWD: 723 return FLOW_DIR_FWD; 724 }; 725 } 726 727 static int stale_bundle(struct dst_entry *dst); 728 729 /* Main function: finds/creates a bundle for given flow. 730 * 731 * At the moment we eat a raw IP route. Mostly to speed up lookups 732 * on interfaces with disabled IPsec. 733 */ 734 int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, 735 struct sock *sk, int flags) 736 { 737 struct xfrm_policy *policy; 738 struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; 739 struct dst_entry *dst, *dst_orig = *dst_p; 740 int nx = 0; 741 int err; 742 u32 genid; 743 u16 family = dst_orig->ops->family; 744 restart: 745 genid = atomic_read(&flow_cache_genid); 746 policy = NULL; 747 if (sk && sk->sk_policy[1]) 748 policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); 749 750 if (!policy) { 751 /* To accelerate a bit... */ 752 if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT]) 753 return 0; 754 755 policy = flow_cache_lookup(fl, family, 756 policy_to_flow_dir(XFRM_POLICY_OUT), 757 xfrm_policy_lookup); 758 } 759 760 if (!policy) 761 return 0; 762 763 policy->curlft.use_time = (unsigned long)xtime.tv_sec; 764 765 switch (policy->action) { 766 case XFRM_POLICY_BLOCK: 767 /* Prohibit the flow */ 768 err = -EPERM; 769 goto error; 770 771 case XFRM_POLICY_ALLOW: 772 if (policy->xfrm_nr == 0) { 773 /* Flow passes not transformed. */ 774 xfrm_pol_put(policy); 775 return 0; 776 } 777 778 /* Try to find matching bundle. 779 * 780 * LATER: help from flow cache. It is optional, this 781 * is required only for output policy. 782 */ 783 dst = xfrm_find_bundle(fl, policy, family); 784 if (IS_ERR(dst)) { 785 err = PTR_ERR(dst); 786 goto error; 787 } 788 789 if (dst) 790 break; 791 792 nx = xfrm_tmpl_resolve(policy, fl, xfrm, family); 793 794 if (unlikely(nx<0)) { 795 err = nx; 796 if (err == -EAGAIN && flags) { 797 DECLARE_WAITQUEUE(wait, current); 798 799 add_wait_queue(&km_waitq, &wait); 800 set_current_state(TASK_INTERRUPTIBLE); 801 schedule(); 802 set_current_state(TASK_RUNNING); 803 remove_wait_queue(&km_waitq, &wait); 804 805 nx = xfrm_tmpl_resolve(policy, fl, xfrm, family); 806 807 if (nx == -EAGAIN && signal_pending(current)) { 808 err = -ERESTART; 809 goto error; 810 } 811 if (nx == -EAGAIN || 812 genid != atomic_read(&flow_cache_genid)) { 813 xfrm_pol_put(policy); 814 goto restart; 815 } 816 err = nx; 817 } 818 if (err < 0) 819 goto error; 820 } 821 if (nx == 0) { 822 /* Flow passes not transformed. */ 823 xfrm_pol_put(policy); 824 return 0; 825 } 826 827 dst = dst_orig; 828 err = xfrm_bundle_create(policy, xfrm, nx, fl, &dst, family); 829 830 if (unlikely(err)) { 831 int i; 832 for (i=0; i<nx; i++) 833 xfrm_state_put(xfrm[i]); 834 goto error; 835 } 836 837 write_lock_bh(&policy->lock); 838 if (unlikely(policy->dead || stale_bundle(dst))) { 839 /* Wow! While we worked on resolving, this 840 * policy has gone. Retry. It is not paranoia, 841 * we just cannot enlist new bundle to dead object. 842 * We can't enlist stable bundles either. 843 */ 844 write_unlock_bh(&policy->lock); 845 846 xfrm_pol_put(policy); 847 if (dst) 848 dst_free(dst); 849 goto restart; 850 } 851 dst->next = policy->bundles; 852 policy->bundles = dst; 853 dst_hold(dst); 854 write_unlock_bh(&policy->lock); 855 } 856 *dst_p = dst; 857 dst_release(dst_orig); 858 xfrm_pol_put(policy); 859 return 0; 860 861 error: 862 dst_release(dst_orig); 863 xfrm_pol_put(policy); 864 *dst_p = NULL; 865 return err; 866 } 867 EXPORT_SYMBOL(xfrm_lookup); 868 869 /* When skb is transformed back to its "native" form, we have to 870 * check policy restrictions. At the moment we make this in maximally 871 * stupid way. Shame on me. :-) Of course, connected sockets must 872 * have policy cached at them. 873 */ 874 875 static inline int 876 xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, 877 unsigned short family) 878 { 879 if (xfrm_state_kern(x)) 880 return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, family); 881 return x->id.proto == tmpl->id.proto && 882 (x->id.spi == tmpl->id.spi || !tmpl->id.spi) && 883 (x->props.reqid == tmpl->reqid || !tmpl->reqid) && 884 x->props.mode == tmpl->mode && 885 (tmpl->aalgos & (1<<x->props.aalgo)) && 886 !(x->props.mode && xfrm_state_addr_cmp(tmpl, x, family)); 887 } 888 889 static inline int 890 xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start, 891 unsigned short family) 892 { 893 int idx = start; 894 895 if (tmpl->optional) { 896 if (!tmpl->mode) 897 return start; 898 } else 899 start = -1; 900 for (; idx < sp->len; idx++) { 901 if (xfrm_state_ok(tmpl, sp->x[idx].xvec, family)) 902 return ++idx; 903 if (sp->x[idx].xvec->props.mode) 904 break; 905 } 906 return start; 907 } 908 909 static int 910 _decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family) 911 { 912 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 913 914 if (unlikely(afinfo == NULL)) 915 return -EAFNOSUPPORT; 916 917 afinfo->decode_session(skb, fl); 918 xfrm_policy_put_afinfo(afinfo); 919 return 0; 920 } 921 922 static inline int secpath_has_tunnel(struct sec_path *sp, int k) 923 { 924 for (; k < sp->len; k++) { 925 if (sp->x[k].xvec->props.mode) 926 return 1; 927 } 928 929 return 0; 930 } 931 932 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, 933 unsigned short family) 934 { 935 struct xfrm_policy *pol; 936 struct flowi fl; 937 938 if (_decode_session(skb, &fl, family) < 0) 939 return 0; 940 941 /* First, check used SA against their selectors. */ 942 if (skb->sp) { 943 int i; 944 945 for (i=skb->sp->len-1; i>=0; i--) { 946 struct sec_decap_state *xvec = &(skb->sp->x[i]); 947 if (!xfrm_selector_match(&xvec->xvec->sel, &fl, family)) 948 return 0; 949 950 /* If there is a post_input processor, try running it */ 951 if (xvec->xvec->type->post_input && 952 (xvec->xvec->type->post_input)(xvec->xvec, 953 &(xvec->decap), 954 skb) != 0) 955 return 0; 956 } 957 } 958 959 pol = NULL; 960 if (sk && sk->sk_policy[dir]) 961 pol = xfrm_sk_policy_lookup(sk, dir, &fl); 962 963 if (!pol) 964 pol = flow_cache_lookup(&fl, family, 965 policy_to_flow_dir(dir), 966 xfrm_policy_lookup); 967 968 if (!pol) 969 return !skb->sp || !secpath_has_tunnel(skb->sp, 0); 970 971 pol->curlft.use_time = (unsigned long)xtime.tv_sec; 972 973 if (pol->action == XFRM_POLICY_ALLOW) { 974 struct sec_path *sp; 975 static struct sec_path dummy; 976 int i, k; 977 978 if ((sp = skb->sp) == NULL) 979 sp = &dummy; 980 981 /* For each tunnel xfrm, find the first matching tmpl. 982 * For each tmpl before that, find corresponding xfrm. 983 * Order is _important_. Later we will implement 984 * some barriers, but at the moment barriers 985 * are implied between each two transformations. 986 */ 987 for (i = pol->xfrm_nr-1, k = 0; i >= 0; i--) { 988 k = xfrm_policy_ok(pol->xfrm_vec+i, sp, k, family); 989 if (k < 0) 990 goto reject; 991 } 992 993 if (secpath_has_tunnel(sp, k)) 994 goto reject; 995 996 xfrm_pol_put(pol); 997 return 1; 998 } 999 1000 reject: 1001 xfrm_pol_put(pol); 1002 return 0; 1003 } 1004 EXPORT_SYMBOL(__xfrm_policy_check); 1005 1006 int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) 1007 { 1008 struct flowi fl; 1009 1010 if (_decode_session(skb, &fl, family) < 0) 1011 return 0; 1012 1013 return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0; 1014 } 1015 EXPORT_SYMBOL(__xfrm_route_forward); 1016 1017 /* Optimize later using cookies and generation ids. */ 1018 1019 static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) 1020 { 1021 if (!stale_bundle(dst)) 1022 return dst; 1023 1024 return NULL; 1025 } 1026 1027 static int stale_bundle(struct dst_entry *dst) 1028 { 1029 return !xfrm_bundle_ok((struct xfrm_dst *)dst, NULL, AF_UNSPEC); 1030 } 1031 1032 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) 1033 { 1034 while ((dst = dst->child) && dst->xfrm && dst->dev == dev) { 1035 dst->dev = &loopback_dev; 1036 dev_hold(&loopback_dev); 1037 dev_put(dev); 1038 } 1039 } 1040 EXPORT_SYMBOL(xfrm_dst_ifdown); 1041 1042 static void xfrm_link_failure(struct sk_buff *skb) 1043 { 1044 /* Impossible. Such dst must be popped before reaches point of failure. */ 1045 return; 1046 } 1047 1048 static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) 1049 { 1050 if (dst) { 1051 if (dst->obsolete) { 1052 dst_release(dst); 1053 dst = NULL; 1054 } 1055 } 1056 return dst; 1057 } 1058 1059 static void xfrm_prune_bundles(int (*func)(struct dst_entry *)) 1060 { 1061 int i; 1062 struct xfrm_policy *pol; 1063 struct dst_entry *dst, **dstp, *gc_list = NULL; 1064 1065 read_lock_bh(&xfrm_policy_lock); 1066 for (i=0; i<2*XFRM_POLICY_MAX; i++) { 1067 for (pol = xfrm_policy_list[i]; pol; pol = pol->next) { 1068 write_lock(&pol->lock); 1069 dstp = &pol->bundles; 1070 while ((dst=*dstp) != NULL) { 1071 if (func(dst)) { 1072 *dstp = dst->next; 1073 dst->next = gc_list; 1074 gc_list = dst; 1075 } else { 1076 dstp = &dst->next; 1077 } 1078 } 1079 write_unlock(&pol->lock); 1080 } 1081 } 1082 read_unlock_bh(&xfrm_policy_lock); 1083 1084 while (gc_list) { 1085 dst = gc_list; 1086 gc_list = dst->next; 1087 dst_free(dst); 1088 } 1089 } 1090 1091 static int unused_bundle(struct dst_entry *dst) 1092 { 1093 return !atomic_read(&dst->__refcnt); 1094 } 1095 1096 static void __xfrm_garbage_collect(void) 1097 { 1098 xfrm_prune_bundles(unused_bundle); 1099 } 1100 1101 int xfrm_flush_bundles(void) 1102 { 1103 xfrm_prune_bundles(stale_bundle); 1104 return 0; 1105 } 1106 1107 void xfrm_init_pmtu(struct dst_entry *dst) 1108 { 1109 do { 1110 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 1111 u32 pmtu, route_mtu_cached; 1112 1113 pmtu = dst_mtu(dst->child); 1114 xdst->child_mtu_cached = pmtu; 1115 1116 pmtu = xfrm_state_mtu(dst->xfrm, pmtu); 1117 1118 route_mtu_cached = dst_mtu(xdst->route); 1119 xdst->route_mtu_cached = route_mtu_cached; 1120 1121 if (pmtu > route_mtu_cached) 1122 pmtu = route_mtu_cached; 1123 1124 dst->metrics[RTAX_MTU-1] = pmtu; 1125 } while ((dst = dst->next)); 1126 } 1127 1128 EXPORT_SYMBOL(xfrm_init_pmtu); 1129 1130 /* Check that the bundle accepts the flow and its components are 1131 * still valid. 1132 */ 1133 1134 int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family) 1135 { 1136 struct dst_entry *dst = &first->u.dst; 1137 struct xfrm_dst *last; 1138 u32 mtu; 1139 1140 if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) || 1141 (dst->dev && !netif_running(dst->dev))) 1142 return 0; 1143 1144 last = NULL; 1145 1146 do { 1147 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 1148 1149 if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family)) 1150 return 0; 1151 if (dst->xfrm->km.state != XFRM_STATE_VALID) 1152 return 0; 1153 1154 mtu = dst_mtu(dst->child); 1155 if (xdst->child_mtu_cached != mtu) { 1156 last = xdst; 1157 xdst->child_mtu_cached = mtu; 1158 } 1159 1160 if (!dst_check(xdst->route, xdst->route_cookie)) 1161 return 0; 1162 mtu = dst_mtu(xdst->route); 1163 if (xdst->route_mtu_cached != mtu) { 1164 last = xdst; 1165 xdst->route_mtu_cached = mtu; 1166 } 1167 1168 dst = dst->child; 1169 } while (dst->xfrm); 1170 1171 if (likely(!last)) 1172 return 1; 1173 1174 mtu = last->child_mtu_cached; 1175 for (;;) { 1176 dst = &last->u.dst; 1177 1178 mtu = xfrm_state_mtu(dst->xfrm, mtu); 1179 if (mtu > last->route_mtu_cached) 1180 mtu = last->route_mtu_cached; 1181 dst->metrics[RTAX_MTU-1] = mtu; 1182 1183 if (last == first) 1184 break; 1185 1186 last = last->u.next; 1187 last->child_mtu_cached = mtu; 1188 } 1189 1190 return 1; 1191 } 1192 1193 EXPORT_SYMBOL(xfrm_bundle_ok); 1194 1195 /* Well... that's _TASK_. We need to scan through transformation 1196 * list and figure out what mss tcp should generate in order to 1197 * final datagram fit to mtu. Mama mia... :-) 1198 * 1199 * Apparently, some easy way exists, but we used to choose the most 1200 * bizarre ones. :-) So, raising Kalashnikov... tra-ta-ta. 1201 * 1202 * Consider this function as something like dark humour. :-) 1203 */ 1204 static int xfrm_get_mss(struct dst_entry *dst, u32 mtu) 1205 { 1206 int res = mtu - dst->header_len; 1207 1208 for (;;) { 1209 struct dst_entry *d = dst; 1210 int m = res; 1211 1212 do { 1213 struct xfrm_state *x = d->xfrm; 1214 if (x) { 1215 spin_lock_bh(&x->lock); 1216 if (x->km.state == XFRM_STATE_VALID && 1217 x->type && x->type->get_max_size) 1218 m = x->type->get_max_size(d->xfrm, m); 1219 else 1220 m += x->props.header_len; 1221 spin_unlock_bh(&x->lock); 1222 } 1223 } while ((d = d->child) != NULL); 1224 1225 if (m <= mtu) 1226 break; 1227 res -= (m - mtu); 1228 if (res < 88) 1229 return mtu; 1230 } 1231 1232 return res + dst->header_len; 1233 } 1234 1235 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) 1236 { 1237 int err = 0; 1238 if (unlikely(afinfo == NULL)) 1239 return -EINVAL; 1240 if (unlikely(afinfo->family >= NPROTO)) 1241 return -EAFNOSUPPORT; 1242 write_lock(&xfrm_policy_afinfo_lock); 1243 if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL)) 1244 err = -ENOBUFS; 1245 else { 1246 struct dst_ops *dst_ops = afinfo->dst_ops; 1247 if (likely(dst_ops->kmem_cachep == NULL)) 1248 dst_ops->kmem_cachep = xfrm_dst_cache; 1249 if (likely(dst_ops->check == NULL)) 1250 dst_ops->check = xfrm_dst_check; 1251 if (likely(dst_ops->negative_advice == NULL)) 1252 dst_ops->negative_advice = xfrm_negative_advice; 1253 if (likely(dst_ops->link_failure == NULL)) 1254 dst_ops->link_failure = xfrm_link_failure; 1255 if (likely(dst_ops->get_mss == NULL)) 1256 dst_ops->get_mss = xfrm_get_mss; 1257 if (likely(afinfo->garbage_collect == NULL)) 1258 afinfo->garbage_collect = __xfrm_garbage_collect; 1259 xfrm_policy_afinfo[afinfo->family] = afinfo; 1260 } 1261 write_unlock(&xfrm_policy_afinfo_lock); 1262 return err; 1263 } 1264 EXPORT_SYMBOL(xfrm_policy_register_afinfo); 1265 1266 int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo) 1267 { 1268 int err = 0; 1269 if (unlikely(afinfo == NULL)) 1270 return -EINVAL; 1271 if (unlikely(afinfo->family >= NPROTO)) 1272 return -EAFNOSUPPORT; 1273 write_lock(&xfrm_policy_afinfo_lock); 1274 if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) { 1275 if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo)) 1276 err = -EINVAL; 1277 else { 1278 struct dst_ops *dst_ops = afinfo->dst_ops; 1279 xfrm_policy_afinfo[afinfo->family] = NULL; 1280 dst_ops->kmem_cachep = NULL; 1281 dst_ops->check = NULL; 1282 dst_ops->negative_advice = NULL; 1283 dst_ops->link_failure = NULL; 1284 dst_ops->get_mss = NULL; 1285 afinfo->garbage_collect = NULL; 1286 } 1287 } 1288 write_unlock(&xfrm_policy_afinfo_lock); 1289 return err; 1290 } 1291 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo); 1292 1293 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) 1294 { 1295 struct xfrm_policy_afinfo *afinfo; 1296 if (unlikely(family >= NPROTO)) 1297 return NULL; 1298 read_lock(&xfrm_policy_afinfo_lock); 1299 afinfo = xfrm_policy_afinfo[family]; 1300 if (likely(afinfo != NULL)) 1301 read_lock(&afinfo->lock); 1302 read_unlock(&xfrm_policy_afinfo_lock); 1303 return afinfo; 1304 } 1305 1306 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo) 1307 { 1308 if (unlikely(afinfo == NULL)) 1309 return; 1310 read_unlock(&afinfo->lock); 1311 } 1312 1313 static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) 1314 { 1315 switch (event) { 1316 case NETDEV_DOWN: 1317 xfrm_flush_bundles(); 1318 } 1319 return NOTIFY_DONE; 1320 } 1321 1322 static struct notifier_block xfrm_dev_notifier = { 1323 xfrm_dev_event, 1324 NULL, 1325 0 1326 }; 1327 1328 static void __init xfrm_policy_init(void) 1329 { 1330 xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", 1331 sizeof(struct xfrm_dst), 1332 0, SLAB_HWCACHE_ALIGN, 1333 NULL, NULL); 1334 if (!xfrm_dst_cache) 1335 panic("XFRM: failed to allocate xfrm_dst_cache\n"); 1336 1337 INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task, NULL); 1338 register_netdevice_notifier(&xfrm_dev_notifier); 1339 } 1340 1341 void __init xfrm_init(void) 1342 { 1343 xfrm_state_init(); 1344 xfrm_policy_init(); 1345 xfrm_input_init(); 1346 } 1347 1348