1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * xfrm_state.c 4 * 5 * Changes: 6 * Mitsuru KANDA @USAGI 7 * Kazunori MIYAZAWA @USAGI 8 * Kunihiro Ishiguro <kunihiro@ipinfusion.com> 9 * IPv6 support 10 * YOSHIFUJI Hideaki @USAGI 11 * Split up af-specific functions 12 * Derek Atkins <derek@ihtfp.com> 13 * Add UDP Encapsulation 14 * 15 */ 16 17 #include <linux/compat.h> 18 #include <linux/workqueue.h> 19 #include <net/xfrm.h> 20 #include <linux/pfkeyv2.h> 21 #include <linux/ipsec.h> 22 #include <linux/module.h> 23 #include <linux/cache.h> 24 #include <linux/audit.h> 25 #include <linux/uaccess.h> 26 #include <linux/ktime.h> 27 #include <linux/slab.h> 28 #include <linux/interrupt.h> 29 #include <linux/kernel.h> 30 31 #include <crypto/aead.h> 32 33 #include "xfrm_hash.h" 34 35 #define xfrm_state_deref_prot(table, net) \ 36 rcu_dereference_protected((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock)) 37 #define xfrm_state_deref_check(table, net) \ 38 rcu_dereference_check((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock)) 39 40 static void xfrm_state_gc_task(struct work_struct *work); 41 42 /* Each xfrm_state may be linked to two tables: 43 44 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl) 45 2. Hash table by (daddr,family,reqid) to find what SAs exist for given 46 destination/tunnel endpoint. (output) 47 */ 48 49 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; 50 static struct kmem_cache *xfrm_state_cache __ro_after_init; 51 52 static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task); 53 static HLIST_HEAD(xfrm_state_gc_list); 54 static HLIST_HEAD(xfrm_state_dev_gc_list); 55 56 static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x) 57 { 58 return refcount_inc_not_zero(&x->refcnt); 59 } 60 61 static inline unsigned int xfrm_dst_hash(struct net *net, 62 const xfrm_address_t *daddr, 63 const xfrm_address_t *saddr, 64 u32 reqid, 65 unsigned short family) 66 { 67 lockdep_assert_held(&net->xfrm.xfrm_state_lock); 68 69 return __xfrm_dst_hash(daddr, saddr, reqid, family, net->xfrm.state_hmask); 70 } 71 72 static inline unsigned int xfrm_src_hash(struct net *net, 73 const xfrm_address_t *daddr, 74 const xfrm_address_t *saddr, 75 unsigned short family) 76 { 77 lockdep_assert_held(&net->xfrm.xfrm_state_lock); 78 79 return __xfrm_src_hash(daddr, saddr, family, net->xfrm.state_hmask); 80 } 81 82 static inline unsigned int 83 xfrm_spi_hash(struct net *net, const xfrm_address_t *daddr, 84 __be32 spi, u8 proto, unsigned short family) 85 { 86 lockdep_assert_held(&net->xfrm.xfrm_state_lock); 87 88 return __xfrm_spi_hash(daddr, spi, proto, family, net->xfrm.state_hmask); 89 } 90 91 static unsigned int xfrm_seq_hash(struct net *net, u32 seq) 92 { 93 lockdep_assert_held(&net->xfrm.xfrm_state_lock); 94 95 return __xfrm_seq_hash(seq, net->xfrm.state_hmask); 96 } 97 98 #define XFRM_STATE_INSERT(by, _n, _h, _type) \ 99 { \ 100 struct xfrm_state *_x = NULL; \ 101 \ 102 if (_type != XFRM_DEV_OFFLOAD_PACKET) { \ 103 hlist_for_each_entry_rcu(_x, _h, by) { \ 104 if (_x->xso.type == XFRM_DEV_OFFLOAD_PACKET) \ 105 continue; \ 106 break; \ 107 } \ 108 } \ 109 \ 110 if (!_x || _x->xso.type == XFRM_DEV_OFFLOAD_PACKET) \ 111 /* SAD is empty or consist from HW SAs only */ \ 112 hlist_add_head_rcu(_n, _h); \ 113 else \ 114 hlist_add_before_rcu(_n, &_x->by); \ 115 } 116 117 static void xfrm_hash_transfer(struct hlist_head *list, 118 struct hlist_head *ndsttable, 119 struct hlist_head *nsrctable, 120 struct hlist_head *nspitable, 121 struct hlist_head *nseqtable, 122 unsigned int nhashmask) 123 { 124 struct hlist_node *tmp; 125 struct xfrm_state *x; 126 127 hlist_for_each_entry_safe(x, tmp, list, bydst) { 128 unsigned int h; 129 130 h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr, 131 x->props.reqid, x->props.family, 132 nhashmask); 133 XFRM_STATE_INSERT(bydst, &x->bydst, ndsttable + h, x->xso.type); 134 135 h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr, 136 x->props.family, 137 nhashmask); 138 XFRM_STATE_INSERT(bysrc, &x->bysrc, nsrctable + h, x->xso.type); 139 140 if (x->id.spi) { 141 h = __xfrm_spi_hash(&x->id.daddr, x->id.spi, 142 x->id.proto, x->props.family, 143 nhashmask); 144 XFRM_STATE_INSERT(byspi, &x->byspi, nspitable + h, 145 x->xso.type); 146 } 147 148 if (x->km.seq) { 149 h = __xfrm_seq_hash(x->km.seq, nhashmask); 150 XFRM_STATE_INSERT(byseq, &x->byseq, nseqtable + h, 151 x->xso.type); 152 } 153 } 154 } 155 156 static unsigned long xfrm_hash_new_size(unsigned int state_hmask) 157 { 158 return ((state_hmask + 1) << 1) * sizeof(struct hlist_head); 159 } 160 161 static void xfrm_hash_resize(struct work_struct *work) 162 { 163 struct net *net = container_of(work, struct net, xfrm.state_hash_work); 164 struct hlist_head *ndst, *nsrc, *nspi, *nseq, *odst, *osrc, *ospi, *oseq; 165 unsigned long nsize, osize; 166 unsigned int nhashmask, ohashmask; 167 int i; 168 169 nsize = xfrm_hash_new_size(net->xfrm.state_hmask); 170 ndst = xfrm_hash_alloc(nsize); 171 if (!ndst) 172 return; 173 nsrc = xfrm_hash_alloc(nsize); 174 if (!nsrc) { 175 xfrm_hash_free(ndst, nsize); 176 return; 177 } 178 nspi = xfrm_hash_alloc(nsize); 179 if (!nspi) { 180 xfrm_hash_free(ndst, nsize); 181 xfrm_hash_free(nsrc, nsize); 182 return; 183 } 184 nseq = xfrm_hash_alloc(nsize); 185 if (!nseq) { 186 xfrm_hash_free(ndst, nsize); 187 xfrm_hash_free(nsrc, nsize); 188 xfrm_hash_free(nspi, nsize); 189 return; 190 } 191 192 spin_lock_bh(&net->xfrm.xfrm_state_lock); 193 write_seqcount_begin(&net->xfrm.xfrm_state_hash_generation); 194 195 nhashmask = (nsize / sizeof(struct hlist_head)) - 1U; 196 odst = xfrm_state_deref_prot(net->xfrm.state_bydst, net); 197 for (i = net->xfrm.state_hmask; i >= 0; i--) 198 xfrm_hash_transfer(odst + i, ndst, nsrc, nspi, nseq, nhashmask); 199 200 osrc = xfrm_state_deref_prot(net->xfrm.state_bysrc, net); 201 ospi = xfrm_state_deref_prot(net->xfrm.state_byspi, net); 202 oseq = xfrm_state_deref_prot(net->xfrm.state_byseq, net); 203 ohashmask = net->xfrm.state_hmask; 204 205 rcu_assign_pointer(net->xfrm.state_bydst, ndst); 206 rcu_assign_pointer(net->xfrm.state_bysrc, nsrc); 207 rcu_assign_pointer(net->xfrm.state_byspi, nspi); 208 rcu_assign_pointer(net->xfrm.state_byseq, nseq); 209 net->xfrm.state_hmask = nhashmask; 210 211 write_seqcount_end(&net->xfrm.xfrm_state_hash_generation); 212 spin_unlock_bh(&net->xfrm.xfrm_state_lock); 213 214 osize = (ohashmask + 1) * sizeof(struct hlist_head); 215 216 synchronize_rcu(); 217 218 xfrm_hash_free(odst, osize); 219 xfrm_hash_free(osrc, osize); 220 xfrm_hash_free(ospi, osize); 221 xfrm_hash_free(oseq, osize); 222 } 223 224 static DEFINE_SPINLOCK(xfrm_state_afinfo_lock); 225 static struct xfrm_state_afinfo __rcu *xfrm_state_afinfo[NPROTO]; 226 227 static DEFINE_SPINLOCK(xfrm_state_gc_lock); 228 static DEFINE_SPINLOCK(xfrm_state_dev_gc_lock); 229 230 int __xfrm_state_delete(struct xfrm_state *x); 231 232 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol); 233 static bool km_is_alive(const struct km_event *c); 234 void km_state_expired(struct xfrm_state *x, int hard, u32 portid); 235 236 int xfrm_register_type(const struct xfrm_type *type, unsigned short family) 237 { 238 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); 239 int err = 0; 240 241 if (!afinfo) 242 return -EAFNOSUPPORT; 243 244 #define X(afi, T, name) do { \ 245 WARN_ON((afi)->type_ ## name); \ 246 (afi)->type_ ## name = (T); \ 247 } while (0) 248 249 switch (type->proto) { 250 case IPPROTO_COMP: 251 X(afinfo, type, comp); 252 break; 253 case IPPROTO_AH: 254 X(afinfo, type, ah); 255 break; 256 case IPPROTO_ESP: 257 X(afinfo, type, esp); 258 break; 259 case IPPROTO_IPIP: 260 X(afinfo, type, ipip); 261 break; 262 case IPPROTO_DSTOPTS: 263 X(afinfo, type, dstopts); 264 break; 265 case IPPROTO_ROUTING: 266 X(afinfo, type, routing); 267 break; 268 case IPPROTO_IPV6: 269 X(afinfo, type, ipip6); 270 break; 271 default: 272 WARN_ON(1); 273 err = -EPROTONOSUPPORT; 274 break; 275 } 276 #undef X 277 rcu_read_unlock(); 278 return err; 279 } 280 EXPORT_SYMBOL(xfrm_register_type); 281 282 void xfrm_unregister_type(const struct xfrm_type *type, unsigned short family) 283 { 284 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); 285 286 if (unlikely(afinfo == NULL)) 287 return; 288 289 #define X(afi, T, name) do { \ 290 WARN_ON((afi)->type_ ## name != (T)); \ 291 (afi)->type_ ## name = NULL; \ 292 } while (0) 293 294 switch (type->proto) { 295 case IPPROTO_COMP: 296 X(afinfo, type, comp); 297 break; 298 case IPPROTO_AH: 299 X(afinfo, type, ah); 300 break; 301 case IPPROTO_ESP: 302 X(afinfo, type, esp); 303 break; 304 case IPPROTO_IPIP: 305 X(afinfo, type, ipip); 306 break; 307 case IPPROTO_DSTOPTS: 308 X(afinfo, type, dstopts); 309 break; 310 case IPPROTO_ROUTING: 311 X(afinfo, type, routing); 312 break; 313 case IPPROTO_IPV6: 314 X(afinfo, type, ipip6); 315 break; 316 default: 317 WARN_ON(1); 318 break; 319 } 320 #undef X 321 rcu_read_unlock(); 322 } 323 EXPORT_SYMBOL(xfrm_unregister_type); 324 325 static const struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family) 326 { 327 const struct xfrm_type *type = NULL; 328 struct xfrm_state_afinfo *afinfo; 329 int modload_attempted = 0; 330 331 retry: 332 afinfo = xfrm_state_get_afinfo(family); 333 if (unlikely(afinfo == NULL)) 334 return NULL; 335 336 switch (proto) { 337 case IPPROTO_COMP: 338 type = afinfo->type_comp; 339 break; 340 case IPPROTO_AH: 341 type = afinfo->type_ah; 342 break; 343 case IPPROTO_ESP: 344 type = afinfo->type_esp; 345 break; 346 case IPPROTO_IPIP: 347 type = afinfo->type_ipip; 348 break; 349 case IPPROTO_DSTOPTS: 350 type = afinfo->type_dstopts; 351 break; 352 case IPPROTO_ROUTING: 353 type = afinfo->type_routing; 354 break; 355 case IPPROTO_IPV6: 356 type = afinfo->type_ipip6; 357 break; 358 default: 359 break; 360 } 361 362 if (unlikely(type && !try_module_get(type->owner))) 363 type = NULL; 364 365 rcu_read_unlock(); 366 367 if (!type && !modload_attempted) { 368 request_module("xfrm-type-%d-%d", family, proto); 369 modload_attempted = 1; 370 goto retry; 371 } 372 373 return type; 374 } 375 376 static void xfrm_put_type(const struct xfrm_type *type) 377 { 378 module_put(type->owner); 379 } 380 381 int xfrm_register_type_offload(const struct xfrm_type_offload *type, 382 unsigned short family) 383 { 384 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); 385 int err = 0; 386 387 if (unlikely(afinfo == NULL)) 388 return -EAFNOSUPPORT; 389 390 switch (type->proto) { 391 case IPPROTO_ESP: 392 WARN_ON(afinfo->type_offload_esp); 393 afinfo->type_offload_esp = type; 394 break; 395 default: 396 WARN_ON(1); 397 err = -EPROTONOSUPPORT; 398 break; 399 } 400 401 rcu_read_unlock(); 402 return err; 403 } 404 EXPORT_SYMBOL(xfrm_register_type_offload); 405 406 void xfrm_unregister_type_offload(const struct xfrm_type_offload *type, 407 unsigned short family) 408 { 409 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); 410 411 if (unlikely(afinfo == NULL)) 412 return; 413 414 switch (type->proto) { 415 case IPPROTO_ESP: 416 WARN_ON(afinfo->type_offload_esp != type); 417 afinfo->type_offload_esp = NULL; 418 break; 419 default: 420 WARN_ON(1); 421 break; 422 } 423 rcu_read_unlock(); 424 } 425 EXPORT_SYMBOL(xfrm_unregister_type_offload); 426 427 void xfrm_set_type_offload(struct xfrm_state *x) 428 { 429 const struct xfrm_type_offload *type = NULL; 430 struct xfrm_state_afinfo *afinfo; 431 bool try_load = true; 432 433 retry: 434 afinfo = xfrm_state_get_afinfo(x->props.family); 435 if (unlikely(afinfo == NULL)) 436 goto out; 437 438 switch (x->id.proto) { 439 case IPPROTO_ESP: 440 type = afinfo->type_offload_esp; 441 break; 442 default: 443 break; 444 } 445 446 if ((type && !try_module_get(type->owner))) 447 type = NULL; 448 449 rcu_read_unlock(); 450 451 if (!type && try_load) { 452 request_module("xfrm-offload-%d-%d", x->props.family, 453 x->id.proto); 454 try_load = false; 455 goto retry; 456 } 457 458 out: 459 x->type_offload = type; 460 } 461 EXPORT_SYMBOL(xfrm_set_type_offload); 462 463 static const struct xfrm_mode xfrm4_mode_map[XFRM_MODE_MAX] = { 464 [XFRM_MODE_BEET] = { 465 .encap = XFRM_MODE_BEET, 466 .flags = XFRM_MODE_FLAG_TUNNEL, 467 .family = AF_INET, 468 }, 469 [XFRM_MODE_TRANSPORT] = { 470 .encap = XFRM_MODE_TRANSPORT, 471 .family = AF_INET, 472 }, 473 [XFRM_MODE_TUNNEL] = { 474 .encap = XFRM_MODE_TUNNEL, 475 .flags = XFRM_MODE_FLAG_TUNNEL, 476 .family = AF_INET, 477 }, 478 [XFRM_MODE_IPTFS] = { 479 .encap = XFRM_MODE_IPTFS, 480 .flags = XFRM_MODE_FLAG_TUNNEL, 481 .family = AF_INET, 482 }, 483 }; 484 485 static const struct xfrm_mode xfrm6_mode_map[XFRM_MODE_MAX] = { 486 [XFRM_MODE_BEET] = { 487 .encap = XFRM_MODE_BEET, 488 .flags = XFRM_MODE_FLAG_TUNNEL, 489 .family = AF_INET6, 490 }, 491 [XFRM_MODE_ROUTEOPTIMIZATION] = { 492 .encap = XFRM_MODE_ROUTEOPTIMIZATION, 493 .family = AF_INET6, 494 }, 495 [XFRM_MODE_TRANSPORT] = { 496 .encap = XFRM_MODE_TRANSPORT, 497 .family = AF_INET6, 498 }, 499 [XFRM_MODE_TUNNEL] = { 500 .encap = XFRM_MODE_TUNNEL, 501 .flags = XFRM_MODE_FLAG_TUNNEL, 502 .family = AF_INET6, 503 }, 504 [XFRM_MODE_IPTFS] = { 505 .encap = XFRM_MODE_IPTFS, 506 .flags = XFRM_MODE_FLAG_TUNNEL, 507 .family = AF_INET6, 508 }, 509 }; 510 511 static const struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family) 512 { 513 const struct xfrm_mode *mode; 514 515 if (unlikely(encap >= XFRM_MODE_MAX)) 516 return NULL; 517 518 switch (family) { 519 case AF_INET: 520 mode = &xfrm4_mode_map[encap]; 521 if (mode->family == family) 522 return mode; 523 break; 524 case AF_INET6: 525 mode = &xfrm6_mode_map[encap]; 526 if (mode->family == family) 527 return mode; 528 break; 529 default: 530 break; 531 } 532 533 return NULL; 534 } 535 536 static const struct xfrm_mode_cbs __rcu *xfrm_mode_cbs_map[XFRM_MODE_MAX]; 537 static DEFINE_SPINLOCK(xfrm_mode_cbs_map_lock); 538 539 int xfrm_register_mode_cbs(u8 mode, const struct xfrm_mode_cbs *mode_cbs) 540 { 541 if (mode >= XFRM_MODE_MAX) 542 return -EINVAL; 543 544 spin_lock_bh(&xfrm_mode_cbs_map_lock); 545 rcu_assign_pointer(xfrm_mode_cbs_map[mode], mode_cbs); 546 spin_unlock_bh(&xfrm_mode_cbs_map_lock); 547 548 return 0; 549 } 550 EXPORT_SYMBOL(xfrm_register_mode_cbs); 551 552 void xfrm_unregister_mode_cbs(u8 mode) 553 { 554 if (mode >= XFRM_MODE_MAX) 555 return; 556 557 spin_lock_bh(&xfrm_mode_cbs_map_lock); 558 RCU_INIT_POINTER(xfrm_mode_cbs_map[mode], NULL); 559 spin_unlock_bh(&xfrm_mode_cbs_map_lock); 560 synchronize_rcu(); 561 } 562 EXPORT_SYMBOL(xfrm_unregister_mode_cbs); 563 564 static const struct xfrm_mode_cbs *xfrm_get_mode_cbs(u8 mode) 565 { 566 const struct xfrm_mode_cbs *cbs; 567 bool try_load = true; 568 569 if (mode >= XFRM_MODE_MAX) 570 return NULL; 571 572 retry: 573 rcu_read_lock(); 574 575 cbs = rcu_dereference(xfrm_mode_cbs_map[mode]); 576 if (cbs && !try_module_get(cbs->owner)) 577 cbs = NULL; 578 579 rcu_read_unlock(); 580 581 if (mode == XFRM_MODE_IPTFS && !cbs && try_load) { 582 request_module("xfrm-iptfs"); 583 try_load = false; 584 goto retry; 585 } 586 587 return cbs; 588 } 589 590 void xfrm_state_free(struct xfrm_state *x) 591 { 592 kmem_cache_free(xfrm_state_cache, x); 593 } 594 EXPORT_SYMBOL(xfrm_state_free); 595 596 static void ___xfrm_state_destroy(struct xfrm_state *x) 597 { 598 if (x->mode_cbs && x->mode_cbs->destroy_state) 599 x->mode_cbs->destroy_state(x); 600 hrtimer_cancel(&x->mtimer); 601 timer_delete_sync(&x->rtimer); 602 kfree_sensitive(x->aead); 603 kfree_sensitive(x->aalg); 604 kfree_sensitive(x->ealg); 605 kfree(x->calg); 606 kfree(x->encap); 607 kfree(x->coaddr); 608 kfree(x->replay_esn); 609 kfree(x->preplay_esn); 610 if (x->type) { 611 x->type->destructor(x); 612 xfrm_put_type(x->type); 613 } 614 if (x->xfrag.page) 615 put_page(x->xfrag.page); 616 xfrm_dev_state_free(x); 617 security_xfrm_state_free(x); 618 xfrm_state_free(x); 619 } 620 621 static void xfrm_state_gc_task(struct work_struct *work) 622 { 623 struct xfrm_state *x; 624 struct hlist_node *tmp; 625 struct hlist_head gc_list; 626 627 spin_lock_bh(&xfrm_state_gc_lock); 628 hlist_move_list(&xfrm_state_gc_list, &gc_list); 629 spin_unlock_bh(&xfrm_state_gc_lock); 630 631 synchronize_rcu(); 632 633 hlist_for_each_entry_safe(x, tmp, &gc_list, gclist) 634 ___xfrm_state_destroy(x); 635 } 636 637 static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me) 638 { 639 struct xfrm_state *x = container_of(me, struct xfrm_state, mtimer); 640 enum hrtimer_restart ret = HRTIMER_NORESTART; 641 time64_t now = ktime_get_real_seconds(); 642 time64_t next = TIME64_MAX; 643 int warn = 0; 644 int err = 0; 645 646 spin_lock(&x->lock); 647 xfrm_dev_state_update_stats(x); 648 649 if (x->km.state == XFRM_STATE_DEAD) 650 goto out; 651 if (x->km.state == XFRM_STATE_EXPIRED) 652 goto expired; 653 if (x->lft.hard_add_expires_seconds) { 654 time64_t tmo = x->lft.hard_add_expires_seconds + 655 x->curlft.add_time - now; 656 if (tmo <= 0) { 657 if (x->xflags & XFRM_SOFT_EXPIRE) { 658 /* enter hard expire without soft expire first?! 659 * setting a new date could trigger this. 660 * workaround: fix x->curflt.add_time by below: 661 */ 662 x->curlft.add_time = now - x->saved_tmo - 1; 663 tmo = x->lft.hard_add_expires_seconds - x->saved_tmo; 664 } else 665 goto expired; 666 } 667 if (tmo < next) 668 next = tmo; 669 } 670 if (x->lft.hard_use_expires_seconds) { 671 time64_t tmo = x->lft.hard_use_expires_seconds + 672 (READ_ONCE(x->curlft.use_time) ? : now) - now; 673 if (tmo <= 0) 674 goto expired; 675 if (tmo < next) 676 next = tmo; 677 } 678 if (x->km.dying) 679 goto resched; 680 if (x->lft.soft_add_expires_seconds) { 681 time64_t tmo = x->lft.soft_add_expires_seconds + 682 x->curlft.add_time - now; 683 if (tmo <= 0) { 684 warn = 1; 685 x->xflags &= ~XFRM_SOFT_EXPIRE; 686 } else if (tmo < next) { 687 next = tmo; 688 x->xflags |= XFRM_SOFT_EXPIRE; 689 x->saved_tmo = tmo; 690 } 691 } 692 if (x->lft.soft_use_expires_seconds) { 693 time64_t tmo = x->lft.soft_use_expires_seconds + 694 (READ_ONCE(x->curlft.use_time) ? : now) - now; 695 if (tmo <= 0) 696 warn = 1; 697 else if (tmo < next) 698 next = tmo; 699 } 700 701 x->km.dying = warn; 702 if (warn) 703 km_state_expired(x, 0, 0); 704 resched: 705 if (next != TIME64_MAX) { 706 hrtimer_forward_now(&x->mtimer, ktime_set(next, 0)); 707 ret = HRTIMER_RESTART; 708 } 709 710 goto out; 711 712 expired: 713 if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) 714 x->km.state = XFRM_STATE_EXPIRED; 715 716 err = __xfrm_state_delete(x); 717 if (!err) 718 km_state_expired(x, 1, 0); 719 720 xfrm_audit_state_delete(x, err ? 0 : 1, true); 721 722 out: 723 spin_unlock(&x->lock); 724 return ret; 725 } 726 727 static void xfrm_replay_timer_handler(struct timer_list *t); 728 729 struct xfrm_state *xfrm_state_alloc(struct net *net) 730 { 731 struct xfrm_state *x; 732 733 x = kmem_cache_zalloc(xfrm_state_cache, GFP_ATOMIC); 734 735 if (x) { 736 write_pnet(&x->xs_net, net); 737 refcount_set(&x->refcnt, 1); 738 atomic_set(&x->tunnel_users, 0); 739 INIT_LIST_HEAD(&x->km.all); 740 INIT_HLIST_NODE(&x->state_cache); 741 INIT_HLIST_NODE(&x->bydst); 742 INIT_HLIST_NODE(&x->bysrc); 743 INIT_HLIST_NODE(&x->byspi); 744 INIT_HLIST_NODE(&x->byseq); 745 hrtimer_setup(&x->mtimer, xfrm_timer_handler, CLOCK_BOOTTIME, 746 HRTIMER_MODE_ABS_SOFT); 747 timer_setup(&x->rtimer, xfrm_replay_timer_handler, 0); 748 x->curlft.add_time = ktime_get_real_seconds(); 749 x->lft.soft_byte_limit = XFRM_INF; 750 x->lft.soft_packet_limit = XFRM_INF; 751 x->lft.hard_byte_limit = XFRM_INF; 752 x->lft.hard_packet_limit = XFRM_INF; 753 x->replay_maxage = 0; 754 x->replay_maxdiff = 0; 755 x->pcpu_num = UINT_MAX; 756 spin_lock_init(&x->lock); 757 x->mode_data = NULL; 758 } 759 return x; 760 } 761 EXPORT_SYMBOL(xfrm_state_alloc); 762 763 #ifdef CONFIG_XFRM_OFFLOAD 764 void xfrm_dev_state_delete(struct xfrm_state *x) 765 { 766 struct xfrm_dev_offload *xso = &x->xso; 767 struct net_device *dev = READ_ONCE(xso->dev); 768 769 if (dev) { 770 dev->xfrmdev_ops->xdo_dev_state_delete(dev, x); 771 spin_lock_bh(&xfrm_state_dev_gc_lock); 772 hlist_add_head(&x->dev_gclist, &xfrm_state_dev_gc_list); 773 spin_unlock_bh(&xfrm_state_dev_gc_lock); 774 } 775 } 776 EXPORT_SYMBOL_GPL(xfrm_dev_state_delete); 777 778 void xfrm_dev_state_free(struct xfrm_state *x) 779 { 780 struct xfrm_dev_offload *xso = &x->xso; 781 struct net_device *dev = READ_ONCE(xso->dev); 782 783 xfrm_unset_type_offload(x); 784 785 if (dev && dev->xfrmdev_ops) { 786 spin_lock_bh(&xfrm_state_dev_gc_lock); 787 if (!hlist_unhashed(&x->dev_gclist)) 788 hlist_del(&x->dev_gclist); 789 spin_unlock_bh(&xfrm_state_dev_gc_lock); 790 791 if (dev->xfrmdev_ops->xdo_dev_state_free) 792 dev->xfrmdev_ops->xdo_dev_state_free(dev, x); 793 WRITE_ONCE(xso->dev, NULL); 794 xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED; 795 netdev_put(dev, &xso->dev_tracker); 796 } 797 } 798 #endif 799 800 void __xfrm_state_destroy(struct xfrm_state *x, bool sync) 801 { 802 WARN_ON(x->km.state != XFRM_STATE_DEAD); 803 804 if (sync) { 805 synchronize_rcu(); 806 ___xfrm_state_destroy(x); 807 } else { 808 spin_lock_bh(&xfrm_state_gc_lock); 809 hlist_add_head(&x->gclist, &xfrm_state_gc_list); 810 spin_unlock_bh(&xfrm_state_gc_lock); 811 schedule_work(&xfrm_state_gc_work); 812 } 813 } 814 EXPORT_SYMBOL(__xfrm_state_destroy); 815 816 int __xfrm_state_delete(struct xfrm_state *x) 817 { 818 struct net *net = xs_net(x); 819 int err = -ESRCH; 820 821 if (x->km.state != XFRM_STATE_DEAD) { 822 x->km.state = XFRM_STATE_DEAD; 823 824 spin_lock(&net->xfrm.xfrm_state_lock); 825 list_del(&x->km.all); 826 hlist_del_rcu(&x->bydst); 827 hlist_del_rcu(&x->bysrc); 828 if (x->km.seq) 829 hlist_del_rcu(&x->byseq); 830 if (!hlist_unhashed(&x->state_cache)) 831 hlist_del_rcu(&x->state_cache); 832 if (!hlist_unhashed(&x->state_cache_input)) 833 hlist_del_rcu(&x->state_cache_input); 834 835 if (x->id.spi) 836 hlist_del_rcu(&x->byspi); 837 net->xfrm.state_num--; 838 xfrm_nat_keepalive_state_updated(x); 839 spin_unlock(&net->xfrm.xfrm_state_lock); 840 841 xfrm_dev_state_delete(x); 842 843 /* All xfrm_state objects are created by xfrm_state_alloc. 844 * The xfrm_state_alloc call gives a reference, and that 845 * is what we are dropping here. 846 */ 847 xfrm_state_put(x); 848 err = 0; 849 } 850 851 return err; 852 } 853 EXPORT_SYMBOL(__xfrm_state_delete); 854 855 int xfrm_state_delete(struct xfrm_state *x) 856 { 857 int err; 858 859 spin_lock_bh(&x->lock); 860 err = __xfrm_state_delete(x); 861 spin_unlock_bh(&x->lock); 862 863 return err; 864 } 865 EXPORT_SYMBOL(xfrm_state_delete); 866 867 #ifdef CONFIG_SECURITY_NETWORK_XFRM 868 static inline int 869 xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid) 870 { 871 int i, err = 0; 872 873 for (i = 0; i <= net->xfrm.state_hmask; i++) { 874 struct xfrm_state *x; 875 876 hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { 877 if (xfrm_id_proto_match(x->id.proto, proto) && 878 (err = security_xfrm_state_delete(x)) != 0) { 879 xfrm_audit_state_delete(x, 0, task_valid); 880 return err; 881 } 882 } 883 } 884 885 return err; 886 } 887 888 static inline int 889 xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool task_valid) 890 { 891 int i, err = 0; 892 893 for (i = 0; i <= net->xfrm.state_hmask; i++) { 894 struct xfrm_state *x; 895 struct xfrm_dev_offload *xso; 896 897 hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { 898 xso = &x->xso; 899 900 if (xso->dev == dev && 901 (err = security_xfrm_state_delete(x)) != 0) { 902 xfrm_audit_state_delete(x, 0, task_valid); 903 return err; 904 } 905 } 906 } 907 908 return err; 909 } 910 #else 911 static inline int 912 xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid) 913 { 914 return 0; 915 } 916 917 static inline int 918 xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool task_valid) 919 { 920 return 0; 921 } 922 #endif 923 924 int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync) 925 { 926 int i, err = 0, cnt = 0; 927 928 spin_lock_bh(&net->xfrm.xfrm_state_lock); 929 err = xfrm_state_flush_secctx_check(net, proto, task_valid); 930 if (err) 931 goto out; 932 933 err = -ESRCH; 934 for (i = 0; i <= net->xfrm.state_hmask; i++) { 935 struct xfrm_state *x; 936 restart: 937 hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { 938 if (!xfrm_state_kern(x) && 939 xfrm_id_proto_match(x->id.proto, proto)) { 940 xfrm_state_hold(x); 941 spin_unlock_bh(&net->xfrm.xfrm_state_lock); 942 943 err = xfrm_state_delete(x); 944 xfrm_audit_state_delete(x, err ? 0 : 1, 945 task_valid); 946 if (sync) 947 xfrm_state_put_sync(x); 948 else 949 xfrm_state_put(x); 950 if (!err) 951 cnt++; 952 953 spin_lock_bh(&net->xfrm.xfrm_state_lock); 954 goto restart; 955 } 956 } 957 } 958 out: 959 spin_unlock_bh(&net->xfrm.xfrm_state_lock); 960 if (cnt) 961 err = 0; 962 963 return err; 964 } 965 EXPORT_SYMBOL(xfrm_state_flush); 966 967 int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid) 968 { 969 struct xfrm_state *x; 970 struct hlist_node *tmp; 971 struct xfrm_dev_offload *xso; 972 int i, err = 0, cnt = 0; 973 974 spin_lock_bh(&net->xfrm.xfrm_state_lock); 975 err = xfrm_dev_state_flush_secctx_check(net, dev, task_valid); 976 if (err) 977 goto out; 978 979 err = -ESRCH; 980 for (i = 0; i <= net->xfrm.state_hmask; i++) { 981 restart: 982 hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { 983 xso = &x->xso; 984 985 if (!xfrm_state_kern(x) && xso->dev == dev) { 986 xfrm_state_hold(x); 987 spin_unlock_bh(&net->xfrm.xfrm_state_lock); 988 989 err = xfrm_state_delete(x); 990 xfrm_dev_state_free(x); 991 992 xfrm_audit_state_delete(x, err ? 0 : 1, 993 task_valid); 994 xfrm_state_put(x); 995 if (!err) 996 cnt++; 997 998 spin_lock_bh(&net->xfrm.xfrm_state_lock); 999 goto restart; 1000 } 1001 } 1002 } 1003 if (cnt) 1004 err = 0; 1005 1006 out: 1007 spin_unlock_bh(&net->xfrm.xfrm_state_lock); 1008 1009 spin_lock_bh(&xfrm_state_dev_gc_lock); 1010 restart_gc: 1011 hlist_for_each_entry_safe(x, tmp, &xfrm_state_dev_gc_list, dev_gclist) { 1012 xso = &x->xso; 1013 1014 if (xso->dev == dev) { 1015 spin_unlock_bh(&xfrm_state_dev_gc_lock); 1016 xfrm_dev_state_free(x); 1017 spin_lock_bh(&xfrm_state_dev_gc_lock); 1018 goto restart_gc; 1019 } 1020 1021 } 1022 spin_unlock_bh(&xfrm_state_dev_gc_lock); 1023 1024 xfrm_flush_gc(); 1025 1026 return err; 1027 } 1028 EXPORT_SYMBOL(xfrm_dev_state_flush); 1029 1030 void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si) 1031 { 1032 spin_lock_bh(&net->xfrm.xfrm_state_lock); 1033 si->sadcnt = net->xfrm.state_num; 1034 si->sadhcnt = net->xfrm.state_hmask + 1; 1035 si->sadhmcnt = xfrm_state_hashmax; 1036 spin_unlock_bh(&net->xfrm.xfrm_state_lock); 1037 } 1038 EXPORT_SYMBOL(xfrm_sad_getinfo); 1039 1040 static void 1041 __xfrm4_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl) 1042 { 1043 const struct flowi4 *fl4 = &fl->u.ip4; 1044 1045 sel->daddr.a4 = fl4->daddr; 1046 sel->saddr.a4 = fl4->saddr; 1047 sel->dport = xfrm_flowi_dport(fl, &fl4->uli); 1048 sel->dport_mask = htons(0xffff); 1049 sel->sport = xfrm_flowi_sport(fl, &fl4->uli); 1050 sel->sport_mask = htons(0xffff); 1051 sel->family = AF_INET; 1052 sel->prefixlen_d = 32; 1053 sel->prefixlen_s = 32; 1054 sel->proto = fl4->flowi4_proto; 1055 sel->ifindex = fl4->flowi4_oif; 1056 } 1057 1058 static void 1059 __xfrm6_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl) 1060 { 1061 const struct flowi6 *fl6 = &fl->u.ip6; 1062 1063 /* Initialize temporary selector matching only to current session. */ 1064 *(struct in6_addr *)&sel->daddr = fl6->daddr; 1065 *(struct in6_addr *)&sel->saddr = fl6->saddr; 1066 sel->dport = xfrm_flowi_dport(fl, &fl6->uli); 1067 sel->dport_mask = htons(0xffff); 1068 sel->sport = xfrm_flowi_sport(fl, &fl6->uli); 1069 sel->sport_mask = htons(0xffff); 1070 sel->family = AF_INET6; 1071 sel->prefixlen_d = 128; 1072 sel->prefixlen_s = 128; 1073 sel->proto = fl6->flowi6_proto; 1074 sel->ifindex = fl6->flowi6_oif; 1075 } 1076 1077 static void 1078 xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl, 1079 const struct xfrm_tmpl *tmpl, 1080 const xfrm_address_t *daddr, const xfrm_address_t *saddr, 1081 unsigned short family) 1082 { 1083 switch (family) { 1084 case AF_INET: 1085 __xfrm4_init_tempsel(&x->sel, fl); 1086 break; 1087 case AF_INET6: 1088 __xfrm6_init_tempsel(&x->sel, fl); 1089 break; 1090 } 1091 1092 x->id = tmpl->id; 1093 1094 switch (tmpl->encap_family) { 1095 case AF_INET: 1096 if (x->id.daddr.a4 == 0) 1097 x->id.daddr.a4 = daddr->a4; 1098 x->props.saddr = tmpl->saddr; 1099 if (x->props.saddr.a4 == 0) 1100 x->props.saddr.a4 = saddr->a4; 1101 break; 1102 case AF_INET6: 1103 if (ipv6_addr_any((struct in6_addr *)&x->id.daddr)) 1104 memcpy(&x->id.daddr, daddr, sizeof(x->sel.daddr)); 1105 memcpy(&x->props.saddr, &tmpl->saddr, sizeof(x->props.saddr)); 1106 if (ipv6_addr_any((struct in6_addr *)&x->props.saddr)) 1107 memcpy(&x->props.saddr, saddr, sizeof(x->props.saddr)); 1108 break; 1109 } 1110 1111 x->props.mode = tmpl->mode; 1112 x->props.reqid = tmpl->reqid; 1113 x->props.family = tmpl->encap_family; 1114 } 1115 1116 struct xfrm_hash_state_ptrs { 1117 const struct hlist_head *bydst; 1118 const struct hlist_head *bysrc; 1119 const struct hlist_head *byspi; 1120 unsigned int hmask; 1121 }; 1122 1123 static void xfrm_hash_ptrs_get(const struct net *net, struct xfrm_hash_state_ptrs *ptrs) 1124 { 1125 unsigned int sequence; 1126 1127 do { 1128 sequence = read_seqcount_begin(&net->xfrm.xfrm_state_hash_generation); 1129 1130 ptrs->bydst = xfrm_state_deref_check(net->xfrm.state_bydst, net); 1131 ptrs->bysrc = xfrm_state_deref_check(net->xfrm.state_bysrc, net); 1132 ptrs->byspi = xfrm_state_deref_check(net->xfrm.state_byspi, net); 1133 ptrs->hmask = net->xfrm.state_hmask; 1134 } while (read_seqcount_retry(&net->xfrm.xfrm_state_hash_generation, sequence)); 1135 } 1136 1137 static struct xfrm_state *__xfrm_state_lookup_all(const struct xfrm_hash_state_ptrs *state_ptrs, 1138 u32 mark, 1139 const xfrm_address_t *daddr, 1140 __be32 spi, u8 proto, 1141 unsigned short family, 1142 struct xfrm_dev_offload *xdo) 1143 { 1144 unsigned int h = __xfrm_spi_hash(daddr, spi, proto, family, state_ptrs->hmask); 1145 struct xfrm_state *x; 1146 1147 hlist_for_each_entry_rcu(x, state_ptrs->byspi + h, byspi) { 1148 #ifdef CONFIG_XFRM_OFFLOAD 1149 if (xdo->type == XFRM_DEV_OFFLOAD_PACKET) { 1150 if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) 1151 /* HW states are in the head of list, there is 1152 * no need to iterate further. 1153 */ 1154 break; 1155 1156 /* Packet offload: both policy and SA should 1157 * have same device. 1158 */ 1159 if (xdo->dev != x->xso.dev) 1160 continue; 1161 } else if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET) 1162 /* Skip HW policy for SW lookups */ 1163 continue; 1164 #endif 1165 if (x->props.family != family || 1166 x->id.spi != spi || 1167 x->id.proto != proto || 1168 !xfrm_addr_equal(&x->id.daddr, daddr, family)) 1169 continue; 1170 1171 if ((mark & x->mark.m) != x->mark.v) 1172 continue; 1173 if (!xfrm_state_hold_rcu(x)) 1174 continue; 1175 return x; 1176 } 1177 1178 return NULL; 1179 } 1180 1181 static struct xfrm_state *__xfrm_state_lookup(const struct xfrm_hash_state_ptrs *state_ptrs, 1182 u32 mark, 1183 const xfrm_address_t *daddr, 1184 __be32 spi, u8 proto, 1185 unsigned short family) 1186 { 1187 unsigned int h = __xfrm_spi_hash(daddr, spi, proto, family, state_ptrs->hmask); 1188 struct xfrm_state *x; 1189 1190 hlist_for_each_entry_rcu(x, state_ptrs->byspi + h, byspi) { 1191 if (x->props.family != family || 1192 x->id.spi != spi || 1193 x->id.proto != proto || 1194 !xfrm_addr_equal(&x->id.daddr, daddr, family)) 1195 continue; 1196 1197 if ((mark & x->mark.m) != x->mark.v) 1198 continue; 1199 if (!xfrm_state_hold_rcu(x)) 1200 continue; 1201 return x; 1202 } 1203 1204 return NULL; 1205 } 1206 1207 struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark, 1208 const xfrm_address_t *daddr, 1209 __be32 spi, u8 proto, 1210 unsigned short family) 1211 { 1212 struct xfrm_hash_state_ptrs state_ptrs; 1213 struct hlist_head *state_cache_input; 1214 struct xfrm_state *x = NULL; 1215 1216 state_cache_input = raw_cpu_ptr(net->xfrm.state_cache_input); 1217 1218 rcu_read_lock(); 1219 hlist_for_each_entry_rcu(x, state_cache_input, state_cache_input) { 1220 if (x->props.family != family || 1221 x->id.spi != spi || 1222 x->id.proto != proto || 1223 !xfrm_addr_equal(&x->id.daddr, daddr, family)) 1224 continue; 1225 1226 if ((mark & x->mark.m) != x->mark.v) 1227 continue; 1228 if (!xfrm_state_hold_rcu(x)) 1229 continue; 1230 goto out; 1231 } 1232 1233 xfrm_hash_ptrs_get(net, &state_ptrs); 1234 1235 x = __xfrm_state_lookup(&state_ptrs, mark, daddr, spi, proto, family); 1236 1237 if (x && x->km.state == XFRM_STATE_VALID) { 1238 spin_lock_bh(&net->xfrm.xfrm_state_lock); 1239 if (hlist_unhashed(&x->state_cache_input)) { 1240 hlist_add_head_rcu(&x->state_cache_input, state_cache_input); 1241 } else { 1242 hlist_del_rcu(&x->state_cache_input); 1243 hlist_add_head_rcu(&x->state_cache_input, state_cache_input); 1244 } 1245 spin_unlock_bh(&net->xfrm.xfrm_state_lock); 1246 } 1247 1248 out: 1249 rcu_read_unlock(); 1250 return x; 1251 } 1252 EXPORT_SYMBOL(xfrm_input_state_lookup); 1253 1254 static struct xfrm_state *__xfrm_state_lookup_byaddr(const struct xfrm_hash_state_ptrs *state_ptrs, 1255 u32 mark, 1256 const xfrm_address_t *daddr, 1257 const xfrm_address_t *saddr, 1258 u8 proto, unsigned short family) 1259 { 1260 unsigned int h = __xfrm_src_hash(daddr, saddr, family, state_ptrs->hmask); 1261 struct xfrm_state *x; 1262 1263 hlist_for_each_entry_rcu(x, state_ptrs->bysrc + h, bysrc) { 1264 if (x->props.family != family || 1265 x->id.proto != proto || 1266 !xfrm_addr_equal(&x->id.daddr, daddr, family) || 1267 !xfrm_addr_equal(&x->props.saddr, saddr, family)) 1268 continue; 1269 1270 if ((mark & x->mark.m) != x->mark.v) 1271 continue; 1272 if (!xfrm_state_hold_rcu(x)) 1273 continue; 1274 return x; 1275 } 1276 1277 return NULL; 1278 } 1279 1280 static inline struct xfrm_state * 1281 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family) 1282 { 1283 struct xfrm_hash_state_ptrs state_ptrs; 1284 struct net *net = xs_net(x); 1285 u32 mark = x->mark.v & x->mark.m; 1286 1287 xfrm_hash_ptrs_get(net, &state_ptrs); 1288 1289 if (use_spi) 1290 return __xfrm_state_lookup(&state_ptrs, mark, &x->id.daddr, 1291 x->id.spi, x->id.proto, family); 1292 else 1293 return __xfrm_state_lookup_byaddr(&state_ptrs, mark, 1294 &x->id.daddr, 1295 &x->props.saddr, 1296 x->id.proto, family); 1297 } 1298 1299 static void xfrm_hash_grow_check(struct net *net, int have_hash_collision) 1300 { 1301 if (have_hash_collision && 1302 (net->xfrm.state_hmask + 1) < xfrm_state_hashmax && 1303 net->xfrm.state_num > net->xfrm.state_hmask) 1304 schedule_work(&net->xfrm.state_hash_work); 1305 } 1306 1307 static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x, 1308 const struct flowi *fl, unsigned short family, 1309 struct xfrm_state **best, int *acq_in_progress, 1310 int *error) 1311 { 1312 /* We need the cpu id just as a lookup key, 1313 * we don't require it to be stable. 1314 */ 1315 unsigned int pcpu_id = get_cpu(); 1316 put_cpu(); 1317 1318 /* Resolution logic: 1319 * 1. There is a valid state with matching selector. Done. 1320 * 2. Valid state with inappropriate selector. Skip. 1321 * 1322 * Entering area of "sysdeps". 1323 * 1324 * 3. If state is not valid, selector is temporary, it selects 1325 * only session which triggered previous resolution. Key 1326 * manager will do something to install a state with proper 1327 * selector. 1328 */ 1329 if (x->km.state == XFRM_STATE_VALID) { 1330 if ((x->sel.family && 1331 (x->sel.family != family || 1332 !xfrm_selector_match(&x->sel, fl, family))) || 1333 !security_xfrm_state_pol_flow_match(x, pol, 1334 &fl->u.__fl_common)) 1335 return; 1336 1337 if (x->pcpu_num != UINT_MAX && x->pcpu_num != pcpu_id) 1338 return; 1339 1340 if (!*best || 1341 ((*best)->pcpu_num == UINT_MAX && x->pcpu_num == pcpu_id) || 1342 (*best)->km.dying > x->km.dying || 1343 ((*best)->km.dying == x->km.dying && 1344 (*best)->curlft.add_time < x->curlft.add_time)) 1345 *best = x; 1346 } else if (x->km.state == XFRM_STATE_ACQ) { 1347 if (!*best || x->pcpu_num == pcpu_id) 1348 *acq_in_progress = 1; 1349 } else if (x->km.state == XFRM_STATE_ERROR || 1350 x->km.state == XFRM_STATE_EXPIRED) { 1351 if ((!x->sel.family || 1352 (x->sel.family == family && 1353 xfrm_selector_match(&x->sel, fl, family))) && 1354 security_xfrm_state_pol_flow_match(x, pol, 1355 &fl->u.__fl_common)) 1356 *error = -ESRCH; 1357 } 1358 } 1359 1360 struct xfrm_state * 1361 xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, 1362 const struct flowi *fl, struct xfrm_tmpl *tmpl, 1363 struct xfrm_policy *pol, int *err, 1364 unsigned short family, u32 if_id) 1365 { 1366 static xfrm_address_t saddr_wildcard = { }; 1367 struct xfrm_hash_state_ptrs state_ptrs; 1368 struct net *net = xp_net(pol); 1369 unsigned int h, h_wildcard; 1370 struct xfrm_state *x, *x0, *to_put; 1371 int acquire_in_progress = 0; 1372 int error = 0; 1373 struct xfrm_state *best = NULL; 1374 u32 mark = pol->mark.v & pol->mark.m; 1375 unsigned short encap_family = tmpl->encap_family; 1376 unsigned int sequence; 1377 struct km_event c; 1378 unsigned int pcpu_id; 1379 bool cached = false; 1380 1381 /* We need the cpu id just as a lookup key, 1382 * we don't require it to be stable. 1383 */ 1384 pcpu_id = get_cpu(); 1385 put_cpu(); 1386 1387 to_put = NULL; 1388 1389 sequence = read_seqcount_begin(&net->xfrm.xfrm_state_hash_generation); 1390 1391 rcu_read_lock(); 1392 hlist_for_each_entry_rcu(x, &pol->state_cache_list, state_cache) { 1393 if (x->props.family == encap_family && 1394 x->props.reqid == tmpl->reqid && 1395 (mark & x->mark.m) == x->mark.v && 1396 x->if_id == if_id && 1397 !(x->props.flags & XFRM_STATE_WILDRECV) && 1398 xfrm_state_addr_check(x, daddr, saddr, encap_family) && 1399 tmpl->mode == x->props.mode && 1400 tmpl->id.proto == x->id.proto && 1401 (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) 1402 xfrm_state_look_at(pol, x, fl, encap_family, 1403 &best, &acquire_in_progress, &error); 1404 } 1405 1406 if (best) 1407 goto cached; 1408 1409 hlist_for_each_entry_rcu(x, &pol->state_cache_list, state_cache) { 1410 if (x->props.family == encap_family && 1411 x->props.reqid == tmpl->reqid && 1412 (mark & x->mark.m) == x->mark.v && 1413 x->if_id == if_id && 1414 !(x->props.flags & XFRM_STATE_WILDRECV) && 1415 xfrm_addr_equal(&x->id.daddr, daddr, encap_family) && 1416 tmpl->mode == x->props.mode && 1417 tmpl->id.proto == x->id.proto && 1418 (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) 1419 xfrm_state_look_at(pol, x, fl, family, 1420 &best, &acquire_in_progress, &error); 1421 } 1422 1423 cached: 1424 cached = true; 1425 if (best) 1426 goto found; 1427 else if (error) 1428 best = NULL; 1429 else if (acquire_in_progress) /* XXX: acquire_in_progress should not happen */ 1430 WARN_ON(1); 1431 1432 xfrm_hash_ptrs_get(net, &state_ptrs); 1433 1434 h = __xfrm_dst_hash(daddr, saddr, tmpl->reqid, encap_family, state_ptrs.hmask); 1435 hlist_for_each_entry_rcu(x, state_ptrs.bydst + h, bydst) { 1436 #ifdef CONFIG_XFRM_OFFLOAD 1437 if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) { 1438 if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) 1439 /* HW states are in the head of list, there is 1440 * no need to iterate further. 1441 */ 1442 break; 1443 1444 /* Packet offload: both policy and SA should 1445 * have same device. 1446 */ 1447 if (pol->xdo.dev != x->xso.dev) 1448 continue; 1449 } else if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET) 1450 /* Skip HW policy for SW lookups */ 1451 continue; 1452 #endif 1453 if (x->props.family == encap_family && 1454 x->props.reqid == tmpl->reqid && 1455 (mark & x->mark.m) == x->mark.v && 1456 x->if_id == if_id && 1457 !(x->props.flags & XFRM_STATE_WILDRECV) && 1458 xfrm_state_addr_check(x, daddr, saddr, encap_family) && 1459 tmpl->mode == x->props.mode && 1460 tmpl->id.proto == x->id.proto && 1461 (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) 1462 xfrm_state_look_at(pol, x, fl, family, 1463 &best, &acquire_in_progress, &error); 1464 } 1465 if (best || acquire_in_progress) 1466 goto found; 1467 1468 h_wildcard = __xfrm_dst_hash(daddr, &saddr_wildcard, tmpl->reqid, 1469 encap_family, state_ptrs.hmask); 1470 hlist_for_each_entry_rcu(x, state_ptrs.bydst + h_wildcard, bydst) { 1471 #ifdef CONFIG_XFRM_OFFLOAD 1472 if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) { 1473 if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) 1474 /* HW states are in the head of list, there is 1475 * no need to iterate further. 1476 */ 1477 break; 1478 1479 /* Packet offload: both policy and SA should 1480 * have same device. 1481 */ 1482 if (pol->xdo.dev != x->xso.dev) 1483 continue; 1484 } else if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET) 1485 /* Skip HW policy for SW lookups */ 1486 continue; 1487 #endif 1488 if (x->props.family == encap_family && 1489 x->props.reqid == tmpl->reqid && 1490 (mark & x->mark.m) == x->mark.v && 1491 x->if_id == if_id && 1492 !(x->props.flags & XFRM_STATE_WILDRECV) && 1493 xfrm_addr_equal(&x->id.daddr, daddr, encap_family) && 1494 tmpl->mode == x->props.mode && 1495 tmpl->id.proto == x->id.proto && 1496 (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) 1497 xfrm_state_look_at(pol, x, fl, family, 1498 &best, &acquire_in_progress, &error); 1499 } 1500 1501 found: 1502 if (!(pol->flags & XFRM_POLICY_CPU_ACQUIRE) || 1503 (best && (best->pcpu_num == pcpu_id))) 1504 x = best; 1505 1506 if (!x && !error && !acquire_in_progress) { 1507 if (tmpl->id.spi && 1508 (x0 = __xfrm_state_lookup_all(&state_ptrs, mark, daddr, 1509 tmpl->id.spi, tmpl->id.proto, 1510 encap_family, 1511 &pol->xdo)) != NULL) { 1512 to_put = x0; 1513 error = -EEXIST; 1514 goto out; 1515 } 1516 1517 c.net = net; 1518 /* If the KMs have no listeners (yet...), avoid allocating an SA 1519 * for each and every packet - garbage collection might not 1520 * handle the flood. 1521 */ 1522 if (!km_is_alive(&c)) { 1523 error = -ESRCH; 1524 goto out; 1525 } 1526 1527 x = xfrm_state_alloc(net); 1528 if (x == NULL) { 1529 error = -ENOMEM; 1530 goto out; 1531 } 1532 /* Initialize temporary state matching only 1533 * to current session. */ 1534 xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family); 1535 memcpy(&x->mark, &pol->mark, sizeof(x->mark)); 1536 x->if_id = if_id; 1537 if ((pol->flags & XFRM_POLICY_CPU_ACQUIRE) && best) 1538 x->pcpu_num = pcpu_id; 1539 1540 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->flowi_secid); 1541 if (error) { 1542 x->km.state = XFRM_STATE_DEAD; 1543 to_put = x; 1544 x = NULL; 1545 goto out; 1546 } 1547 #ifdef CONFIG_XFRM_OFFLOAD 1548 if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) { 1549 struct xfrm_dev_offload *xdo = &pol->xdo; 1550 struct xfrm_dev_offload *xso = &x->xso; 1551 struct net_device *dev = xdo->dev; 1552 1553 xso->type = XFRM_DEV_OFFLOAD_PACKET; 1554 xso->dir = xdo->dir; 1555 xso->dev = dev; 1556 xso->flags = XFRM_DEV_OFFLOAD_FLAG_ACQ; 1557 netdev_hold(dev, &xso->dev_tracker, GFP_ATOMIC); 1558 error = dev->xfrmdev_ops->xdo_dev_state_add(dev, x, 1559 NULL); 1560 if (error) { 1561 xso->dir = 0; 1562 netdev_put(dev, &xso->dev_tracker); 1563 xso->dev = NULL; 1564 xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED; 1565 x->km.state = XFRM_STATE_DEAD; 1566 to_put = x; 1567 x = NULL; 1568 goto out; 1569 } 1570 } 1571 #endif 1572 if (km_query(x, tmpl, pol) == 0) { 1573 spin_lock_bh(&net->xfrm.xfrm_state_lock); 1574 x->km.state = XFRM_STATE_ACQ; 1575 x->dir = XFRM_SA_DIR_OUT; 1576 list_add(&x->km.all, &net->xfrm.state_all); 1577 h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); 1578 XFRM_STATE_INSERT(bydst, &x->bydst, 1579 net->xfrm.state_bydst + h, 1580 x->xso.type); 1581 h = xfrm_src_hash(net, daddr, saddr, encap_family); 1582 XFRM_STATE_INSERT(bysrc, &x->bysrc, 1583 net->xfrm.state_bysrc + h, 1584 x->xso.type); 1585 INIT_HLIST_NODE(&x->state_cache); 1586 if (x->id.spi) { 1587 h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family); 1588 XFRM_STATE_INSERT(byspi, &x->byspi, 1589 net->xfrm.state_byspi + h, 1590 x->xso.type); 1591 } 1592 if (x->km.seq) { 1593 h = xfrm_seq_hash(net, x->km.seq); 1594 XFRM_STATE_INSERT(byseq, &x->byseq, 1595 net->xfrm.state_byseq + h, 1596 x->xso.type); 1597 } 1598 x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; 1599 hrtimer_start(&x->mtimer, 1600 ktime_set(net->xfrm.sysctl_acq_expires, 0), 1601 HRTIMER_MODE_REL_SOFT); 1602 net->xfrm.state_num++; 1603 xfrm_hash_grow_check(net, x->bydst.next != NULL); 1604 spin_unlock_bh(&net->xfrm.xfrm_state_lock); 1605 } else { 1606 #ifdef CONFIG_XFRM_OFFLOAD 1607 struct xfrm_dev_offload *xso = &x->xso; 1608 1609 if (xso->type == XFRM_DEV_OFFLOAD_PACKET) { 1610 xfrm_dev_state_delete(x); 1611 xfrm_dev_state_free(x); 1612 } 1613 #endif 1614 x->km.state = XFRM_STATE_DEAD; 1615 to_put = x; 1616 x = NULL; 1617 error = -ESRCH; 1618 } 1619 1620 /* Use the already installed 'fallback' while the CPU-specific 1621 * SA acquire is handled*/ 1622 if (best) 1623 x = best; 1624 } 1625 out: 1626 if (x) { 1627 if (!xfrm_state_hold_rcu(x)) { 1628 *err = -EAGAIN; 1629 x = NULL; 1630 } 1631 } else { 1632 *err = acquire_in_progress ? -EAGAIN : error; 1633 } 1634 1635 if (x && x->km.state == XFRM_STATE_VALID && !cached && 1636 (!(pol->flags & XFRM_POLICY_CPU_ACQUIRE) || x->pcpu_num == pcpu_id)) { 1637 spin_lock_bh(&net->xfrm.xfrm_state_lock); 1638 if (hlist_unhashed(&x->state_cache)) 1639 hlist_add_head_rcu(&x->state_cache, &pol->state_cache_list); 1640 spin_unlock_bh(&net->xfrm.xfrm_state_lock); 1641 } 1642 1643 rcu_read_unlock(); 1644 if (to_put) 1645 xfrm_state_put(to_put); 1646 1647 if (read_seqcount_retry(&net->xfrm.xfrm_state_hash_generation, sequence)) { 1648 *err = -EAGAIN; 1649 if (x) { 1650 xfrm_state_put(x); 1651 x = NULL; 1652 } 1653 } 1654 1655 return x; 1656 } 1657 1658 struct xfrm_state * 1659 xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id, 1660 xfrm_address_t *daddr, xfrm_address_t *saddr, 1661 unsigned short family, u8 mode, u8 proto, u32 reqid) 1662 { 1663 unsigned int h; 1664 struct xfrm_state *rx = NULL, *x = NULL; 1665 1666 spin_lock_bh(&net->xfrm.xfrm_state_lock); 1667 h = xfrm_dst_hash(net, daddr, saddr, reqid, family); 1668 hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { 1669 if (x->props.family == family && 1670 x->props.reqid == reqid && 1671 (mark & x->mark.m) == x->mark.v && 1672 x->if_id == if_id && 1673 !(x->props.flags & XFRM_STATE_WILDRECV) && 1674 xfrm_state_addr_check(x, daddr, saddr, family) && 1675 mode == x->props.mode && 1676 proto == x->id.proto && 1677 x->km.state == XFRM_STATE_VALID) { 1678 rx = x; 1679 break; 1680 } 1681 } 1682 1683 if (rx) 1684 xfrm_state_hold(rx); 1685 spin_unlock_bh(&net->xfrm.xfrm_state_lock); 1686 1687 1688 return rx; 1689 } 1690 EXPORT_SYMBOL(xfrm_stateonly_find); 1691 1692 struct xfrm_state *xfrm_state_lookup_byspi(struct net *net, __be32 spi, 1693 unsigned short family) 1694 { 1695 struct xfrm_state *x; 1696 struct xfrm_state_walk *w; 1697 1698 spin_lock_bh(&net->xfrm.xfrm_state_lock); 1699 list_for_each_entry(w, &net->xfrm.state_all, all) { 1700 x = container_of(w, struct xfrm_state, km); 1701 if (x->props.family != family || 1702 x->id.spi != spi) 1703 continue; 1704 1705 xfrm_state_hold(x); 1706 spin_unlock_bh(&net->xfrm.xfrm_state_lock); 1707 return x; 1708 } 1709 spin_unlock_bh(&net->xfrm.xfrm_state_lock); 1710 return NULL; 1711 } 1712 EXPORT_SYMBOL(xfrm_state_lookup_byspi); 1713 1714 static struct xfrm_state *xfrm_state_lookup_spi_proto(struct net *net, __be32 spi, u8 proto) 1715 { 1716 struct xfrm_state *x; 1717 unsigned int i; 1718 1719 rcu_read_lock(); 1720 for (i = 0; i <= net->xfrm.state_hmask; i++) { 1721 hlist_for_each_entry_rcu(x, &net->xfrm.state_byspi[i], byspi) { 1722 if (x->id.spi == spi && x->id.proto == proto) { 1723 if (!xfrm_state_hold_rcu(x)) 1724 continue; 1725 rcu_read_unlock(); 1726 return x; 1727 } 1728 } 1729 } 1730 rcu_read_unlock(); 1731 return NULL; 1732 } 1733 1734 static void __xfrm_state_insert(struct xfrm_state *x) 1735 { 1736 struct net *net = xs_net(x); 1737 unsigned int h; 1738 1739 list_add(&x->km.all, &net->xfrm.state_all); 1740 1741 /* Sanitize mark before store */ 1742 x->mark.v &= x->mark.m; 1743 1744 h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr, 1745 x->props.reqid, x->props.family); 1746 XFRM_STATE_INSERT(bydst, &x->bydst, net->xfrm.state_bydst + h, 1747 x->xso.type); 1748 1749 h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family); 1750 XFRM_STATE_INSERT(bysrc, &x->bysrc, net->xfrm.state_bysrc + h, 1751 x->xso.type); 1752 1753 if (x->id.spi) { 1754 h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, 1755 x->props.family); 1756 1757 XFRM_STATE_INSERT(byspi, &x->byspi, net->xfrm.state_byspi + h, 1758 x->xso.type); 1759 } 1760 1761 if (x->km.seq) { 1762 h = xfrm_seq_hash(net, x->km.seq); 1763 1764 XFRM_STATE_INSERT(byseq, &x->byseq, net->xfrm.state_byseq + h, 1765 x->xso.type); 1766 } 1767 1768 hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL_SOFT); 1769 if (x->replay_maxage) 1770 mod_timer(&x->rtimer, jiffies + x->replay_maxage); 1771 1772 net->xfrm.state_num++; 1773 1774 xfrm_hash_grow_check(net, x->bydst.next != NULL); 1775 xfrm_nat_keepalive_state_updated(x); 1776 } 1777 1778 /* net->xfrm.xfrm_state_lock is held */ 1779 static void __xfrm_state_bump_genids(struct xfrm_state *xnew) 1780 { 1781 struct net *net = xs_net(xnew); 1782 unsigned short family = xnew->props.family; 1783 u32 reqid = xnew->props.reqid; 1784 struct xfrm_state *x; 1785 unsigned int h; 1786 u32 mark = xnew->mark.v & xnew->mark.m; 1787 u32 if_id = xnew->if_id; 1788 u32 cpu_id = xnew->pcpu_num; 1789 1790 h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family); 1791 hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { 1792 if (x->props.family == family && 1793 x->props.reqid == reqid && 1794 x->if_id == if_id && 1795 x->pcpu_num == cpu_id && 1796 (mark & x->mark.m) == x->mark.v && 1797 xfrm_addr_equal(&x->id.daddr, &xnew->id.daddr, family) && 1798 xfrm_addr_equal(&x->props.saddr, &xnew->props.saddr, family)) 1799 x->genid++; 1800 } 1801 } 1802 1803 void xfrm_state_insert(struct xfrm_state *x) 1804 { 1805 struct net *net = xs_net(x); 1806 1807 spin_lock_bh(&net->xfrm.xfrm_state_lock); 1808 __xfrm_state_bump_genids(x); 1809 __xfrm_state_insert(x); 1810 spin_unlock_bh(&net->xfrm.xfrm_state_lock); 1811 } 1812 EXPORT_SYMBOL(xfrm_state_insert); 1813 1814 /* net->xfrm.xfrm_state_lock is held */ 1815 static struct xfrm_state *__find_acq_core(struct net *net, 1816 const struct xfrm_mark *m, 1817 unsigned short family, u8 mode, 1818 u32 reqid, u32 if_id, u32 pcpu_num, u8 proto, 1819 const xfrm_address_t *daddr, 1820 const xfrm_address_t *saddr, 1821 int create) 1822 { 1823 unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family); 1824 struct xfrm_state *x; 1825 u32 mark = m->v & m->m; 1826 1827 hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { 1828 if (x->props.reqid != reqid || 1829 x->props.mode != mode || 1830 x->props.family != family || 1831 x->km.state != XFRM_STATE_ACQ || 1832 x->id.spi != 0 || 1833 x->id.proto != proto || 1834 (mark & x->mark.m) != x->mark.v || 1835 x->pcpu_num != pcpu_num || 1836 !xfrm_addr_equal(&x->id.daddr, daddr, family) || 1837 !xfrm_addr_equal(&x->props.saddr, saddr, family)) 1838 continue; 1839 1840 xfrm_state_hold(x); 1841 return x; 1842 } 1843 1844 if (!create) 1845 return NULL; 1846 1847 x = xfrm_state_alloc(net); 1848 if (likely(x)) { 1849 switch (family) { 1850 case AF_INET: 1851 x->sel.daddr.a4 = daddr->a4; 1852 x->sel.saddr.a4 = saddr->a4; 1853 x->sel.prefixlen_d = 32; 1854 x->sel.prefixlen_s = 32; 1855 x->props.saddr.a4 = saddr->a4; 1856 x->id.daddr.a4 = daddr->a4; 1857 break; 1858 1859 case AF_INET6: 1860 x->sel.daddr.in6 = daddr->in6; 1861 x->sel.saddr.in6 = saddr->in6; 1862 x->sel.prefixlen_d = 128; 1863 x->sel.prefixlen_s = 128; 1864 x->props.saddr.in6 = saddr->in6; 1865 x->id.daddr.in6 = daddr->in6; 1866 break; 1867 } 1868 1869 x->pcpu_num = pcpu_num; 1870 x->km.state = XFRM_STATE_ACQ; 1871 x->id.proto = proto; 1872 x->props.family = family; 1873 x->props.mode = mode; 1874 x->props.reqid = reqid; 1875 x->if_id = if_id; 1876 x->mark.v = m->v; 1877 x->mark.m = m->m; 1878 x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; 1879 xfrm_state_hold(x); 1880 hrtimer_start(&x->mtimer, 1881 ktime_set(net->xfrm.sysctl_acq_expires, 0), 1882 HRTIMER_MODE_REL_SOFT); 1883 list_add(&x->km.all, &net->xfrm.state_all); 1884 XFRM_STATE_INSERT(bydst, &x->bydst, net->xfrm.state_bydst + h, 1885 x->xso.type); 1886 h = xfrm_src_hash(net, daddr, saddr, family); 1887 XFRM_STATE_INSERT(bysrc, &x->bysrc, net->xfrm.state_bysrc + h, 1888 x->xso.type); 1889 1890 net->xfrm.state_num++; 1891 1892 xfrm_hash_grow_check(net, x->bydst.next != NULL); 1893 } 1894 1895 return x; 1896 } 1897 1898 static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num); 1899 1900 int xfrm_state_add(struct xfrm_state *x) 1901 { 1902 struct net *net = xs_net(x); 1903 struct xfrm_state *x1, *to_put; 1904 int family; 1905 int err; 1906 u32 mark = x->mark.v & x->mark.m; 1907 int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY); 1908 1909 family = x->props.family; 1910 1911 to_put = NULL; 1912 1913 spin_lock_bh(&net->xfrm.xfrm_state_lock); 1914 1915 x1 = __xfrm_state_locate(x, use_spi, family); 1916 if (x1) { 1917 to_put = x1; 1918 x1 = NULL; 1919 err = -EEXIST; 1920 goto out; 1921 } 1922 1923 if (use_spi && x->km.seq) { 1924 x1 = __xfrm_find_acq_byseq(net, mark, x->km.seq, x->pcpu_num); 1925 if (x1 && ((x1->id.proto != x->id.proto) || 1926 !xfrm_addr_equal(&x1->id.daddr, &x->id.daddr, family))) { 1927 to_put = x1; 1928 x1 = NULL; 1929 } 1930 } 1931 1932 if (use_spi && !x1) 1933 x1 = __find_acq_core(net, &x->mark, family, x->props.mode, 1934 x->props.reqid, x->if_id, x->pcpu_num, x->id.proto, 1935 &x->id.daddr, &x->props.saddr, 0); 1936 1937 __xfrm_state_bump_genids(x); 1938 __xfrm_state_insert(x); 1939 err = 0; 1940 1941 out: 1942 spin_unlock_bh(&net->xfrm.xfrm_state_lock); 1943 1944 if (x1) { 1945 xfrm_state_delete(x1); 1946 xfrm_state_put(x1); 1947 } 1948 1949 if (to_put) 1950 xfrm_state_put(to_put); 1951 1952 return err; 1953 } 1954 EXPORT_SYMBOL(xfrm_state_add); 1955 1956 #ifdef CONFIG_XFRM_MIGRATE 1957 static inline int clone_security(struct xfrm_state *x, struct xfrm_sec_ctx *security) 1958 { 1959 struct xfrm_user_sec_ctx *uctx; 1960 int size = sizeof(*uctx) + security->ctx_len; 1961 int err; 1962 1963 uctx = kmalloc(size, GFP_KERNEL); 1964 if (!uctx) 1965 return -ENOMEM; 1966 1967 uctx->exttype = XFRMA_SEC_CTX; 1968 uctx->len = size; 1969 uctx->ctx_doi = security->ctx_doi; 1970 uctx->ctx_alg = security->ctx_alg; 1971 uctx->ctx_len = security->ctx_len; 1972 memcpy(uctx + 1, security->ctx_str, security->ctx_len); 1973 err = security_xfrm_state_alloc(x, uctx); 1974 kfree(uctx); 1975 if (err) 1976 return err; 1977 1978 return 0; 1979 } 1980 1981 static struct xfrm_state *xfrm_state_clone_and_setup(struct xfrm_state *orig, 1982 struct xfrm_encap_tmpl *encap, 1983 struct xfrm_migrate *m) 1984 { 1985 struct net *net = xs_net(orig); 1986 struct xfrm_state *x = xfrm_state_alloc(net); 1987 if (!x) 1988 goto out; 1989 1990 memcpy(&x->id, &orig->id, sizeof(x->id)); 1991 memcpy(&x->sel, &orig->sel, sizeof(x->sel)); 1992 memcpy(&x->lft, &orig->lft, sizeof(x->lft)); 1993 x->props.mode = orig->props.mode; 1994 x->props.replay_window = orig->props.replay_window; 1995 x->props.reqid = orig->props.reqid; 1996 x->props.family = orig->props.family; 1997 x->props.saddr = orig->props.saddr; 1998 1999 if (orig->aalg) { 2000 x->aalg = xfrm_algo_auth_clone(orig->aalg); 2001 if (!x->aalg) 2002 goto error; 2003 } 2004 x->props.aalgo = orig->props.aalgo; 2005 2006 if (orig->aead) { 2007 x->aead = xfrm_algo_aead_clone(orig->aead); 2008 x->geniv = orig->geniv; 2009 if (!x->aead) 2010 goto error; 2011 } 2012 if (orig->ealg) { 2013 x->ealg = xfrm_algo_clone(orig->ealg); 2014 if (!x->ealg) 2015 goto error; 2016 } 2017 x->props.ealgo = orig->props.ealgo; 2018 2019 if (orig->calg) { 2020 x->calg = xfrm_algo_clone(orig->calg); 2021 if (!x->calg) 2022 goto error; 2023 } 2024 x->props.calgo = orig->props.calgo; 2025 2026 if (encap || orig->encap) { 2027 if (encap) 2028 x->encap = kmemdup(encap, sizeof(*x->encap), 2029 GFP_KERNEL); 2030 else 2031 x->encap = kmemdup(orig->encap, sizeof(*x->encap), 2032 GFP_KERNEL); 2033 2034 if (!x->encap) 2035 goto error; 2036 } 2037 2038 if (orig->security) 2039 if (clone_security(x, orig->security)) 2040 goto error; 2041 2042 if (orig->coaddr) { 2043 x->coaddr = kmemdup(orig->coaddr, sizeof(*x->coaddr), 2044 GFP_KERNEL); 2045 if (!x->coaddr) 2046 goto error; 2047 } 2048 2049 if (orig->replay_esn) { 2050 if (xfrm_replay_clone(x, orig)) 2051 goto error; 2052 } 2053 2054 memcpy(&x->mark, &orig->mark, sizeof(x->mark)); 2055 memcpy(&x->props.smark, &orig->props.smark, sizeof(x->props.smark)); 2056 2057 x->props.flags = orig->props.flags; 2058 x->props.extra_flags = orig->props.extra_flags; 2059 2060 x->pcpu_num = orig->pcpu_num; 2061 x->if_id = orig->if_id; 2062 x->tfcpad = orig->tfcpad; 2063 x->replay_maxdiff = orig->replay_maxdiff; 2064 x->replay_maxage = orig->replay_maxage; 2065 memcpy(&x->curlft, &orig->curlft, sizeof(x->curlft)); 2066 x->km.state = orig->km.state; 2067 x->km.seq = orig->km.seq; 2068 x->replay = orig->replay; 2069 x->preplay = orig->preplay; 2070 x->mapping_maxage = orig->mapping_maxage; 2071 x->lastused = orig->lastused; 2072 x->new_mapping = 0; 2073 x->new_mapping_sport = 0; 2074 x->dir = orig->dir; 2075 2076 x->mode_cbs = orig->mode_cbs; 2077 if (x->mode_cbs && x->mode_cbs->clone_state) { 2078 if (x->mode_cbs->clone_state(x, orig)) 2079 goto error; 2080 } 2081 2082 2083 x->props.family = m->new_family; 2084 memcpy(&x->id.daddr, &m->new_daddr, sizeof(x->id.daddr)); 2085 memcpy(&x->props.saddr, &m->new_saddr, sizeof(x->props.saddr)); 2086 2087 return x; 2088 2089 error: 2090 xfrm_state_put(x); 2091 out: 2092 return NULL; 2093 } 2094 2095 struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net, 2096 u32 if_id) 2097 { 2098 unsigned int h; 2099 struct xfrm_state *x = NULL; 2100 2101 spin_lock_bh(&net->xfrm.xfrm_state_lock); 2102 2103 if (m->reqid) { 2104 h = xfrm_dst_hash(net, &m->old_daddr, &m->old_saddr, 2105 m->reqid, m->old_family); 2106 hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { 2107 if (x->props.mode != m->mode || 2108 x->id.proto != m->proto) 2109 continue; 2110 if (m->reqid && x->props.reqid != m->reqid) 2111 continue; 2112 if (if_id != 0 && x->if_id != if_id) 2113 continue; 2114 if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr, 2115 m->old_family) || 2116 !xfrm_addr_equal(&x->props.saddr, &m->old_saddr, 2117 m->old_family)) 2118 continue; 2119 xfrm_state_hold(x); 2120 break; 2121 } 2122 } else { 2123 h = xfrm_src_hash(net, &m->old_daddr, &m->old_saddr, 2124 m->old_family); 2125 hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) { 2126 if (x->props.mode != m->mode || 2127 x->id.proto != m->proto) 2128 continue; 2129 if (if_id != 0 && x->if_id != if_id) 2130 continue; 2131 if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr, 2132 m->old_family) || 2133 !xfrm_addr_equal(&x->props.saddr, &m->old_saddr, 2134 m->old_family)) 2135 continue; 2136 xfrm_state_hold(x); 2137 break; 2138 } 2139 } 2140 2141 spin_unlock_bh(&net->xfrm.xfrm_state_lock); 2142 2143 return x; 2144 } 2145 EXPORT_SYMBOL(xfrm_migrate_state_find); 2146 2147 struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x, 2148 struct xfrm_migrate *m, 2149 struct xfrm_encap_tmpl *encap, 2150 struct net *net, 2151 struct xfrm_user_offload *xuo, 2152 struct netlink_ext_ack *extack) 2153 { 2154 struct xfrm_state *xc; 2155 2156 xc = xfrm_state_clone_and_setup(x, encap, m); 2157 if (!xc) 2158 return NULL; 2159 2160 if (xfrm_init_state(xc) < 0) 2161 goto error; 2162 2163 /* configure the hardware if offload is requested */ 2164 if (xuo && xfrm_dev_state_add(net, xc, xuo, extack)) 2165 goto error; 2166 2167 /* add state */ 2168 if (xfrm_addr_equal(&x->id.daddr, &m->new_daddr, m->new_family)) { 2169 /* a care is needed when the destination address of the 2170 state is to be updated as it is a part of triplet */ 2171 xfrm_state_insert(xc); 2172 } else { 2173 if (xfrm_state_add(xc) < 0) 2174 goto error; 2175 } 2176 2177 return xc; 2178 error: 2179 xfrm_state_put(xc); 2180 return NULL; 2181 } 2182 EXPORT_SYMBOL(xfrm_state_migrate); 2183 #endif 2184 2185 int xfrm_state_update(struct xfrm_state *x) 2186 { 2187 struct xfrm_state *x1, *to_put; 2188 int err; 2189 int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY); 2190 struct net *net = xs_net(x); 2191 2192 to_put = NULL; 2193 2194 spin_lock_bh(&net->xfrm.xfrm_state_lock); 2195 x1 = __xfrm_state_locate(x, use_spi, x->props.family); 2196 2197 err = -ESRCH; 2198 if (!x1) 2199 goto out; 2200 2201 if (xfrm_state_kern(x1)) { 2202 to_put = x1; 2203 err = -EEXIST; 2204 goto out; 2205 } 2206 2207 if (x1->km.state == XFRM_STATE_ACQ) { 2208 if (x->dir && x1->dir != x->dir) 2209 goto out; 2210 2211 __xfrm_state_insert(x); 2212 x = NULL; 2213 } else { 2214 if (x1->dir != x->dir) 2215 goto out; 2216 } 2217 err = 0; 2218 2219 out: 2220 spin_unlock_bh(&net->xfrm.xfrm_state_lock); 2221 2222 if (to_put) 2223 xfrm_state_put(to_put); 2224 2225 if (err) 2226 return err; 2227 2228 if (!x) { 2229 xfrm_state_delete(x1); 2230 xfrm_state_put(x1); 2231 return 0; 2232 } 2233 2234 err = -EINVAL; 2235 spin_lock_bh(&x1->lock); 2236 if (likely(x1->km.state == XFRM_STATE_VALID)) { 2237 if (x->encap && x1->encap && 2238 x->encap->encap_type == x1->encap->encap_type) 2239 memcpy(x1->encap, x->encap, sizeof(*x1->encap)); 2240 else if (x->encap || x1->encap) 2241 goto fail; 2242 2243 if (x->coaddr && x1->coaddr) { 2244 memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr)); 2245 } 2246 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel))) 2247 memcpy(&x1->sel, &x->sel, sizeof(x1->sel)); 2248 memcpy(&x1->lft, &x->lft, sizeof(x1->lft)); 2249 x1->km.dying = 0; 2250 2251 hrtimer_start(&x1->mtimer, ktime_set(1, 0), 2252 HRTIMER_MODE_REL_SOFT); 2253 if (READ_ONCE(x1->curlft.use_time)) 2254 xfrm_state_check_expire(x1); 2255 2256 if (x->props.smark.m || x->props.smark.v || x->if_id) { 2257 spin_lock_bh(&net->xfrm.xfrm_state_lock); 2258 2259 if (x->props.smark.m || x->props.smark.v) 2260 x1->props.smark = x->props.smark; 2261 2262 if (x->if_id) 2263 x1->if_id = x->if_id; 2264 2265 __xfrm_state_bump_genids(x1); 2266 spin_unlock_bh(&net->xfrm.xfrm_state_lock); 2267 } 2268 2269 err = 0; 2270 x->km.state = XFRM_STATE_DEAD; 2271 __xfrm_state_put(x); 2272 } 2273 2274 fail: 2275 spin_unlock_bh(&x1->lock); 2276 2277 xfrm_state_put(x1); 2278 2279 return err; 2280 } 2281 EXPORT_SYMBOL(xfrm_state_update); 2282 2283 int xfrm_state_check_expire(struct xfrm_state *x) 2284 { 2285 /* All counters which are needed to decide if state is expired 2286 * are handled by SW for non-packet offload modes. Simply skip 2287 * the following update and save extra boilerplate in drivers. 2288 */ 2289 if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET) 2290 xfrm_dev_state_update_stats(x); 2291 2292 if (!READ_ONCE(x->curlft.use_time)) 2293 WRITE_ONCE(x->curlft.use_time, ktime_get_real_seconds()); 2294 2295 if (x->curlft.bytes >= x->lft.hard_byte_limit || 2296 x->curlft.packets >= x->lft.hard_packet_limit) { 2297 x->km.state = XFRM_STATE_EXPIRED; 2298 hrtimer_start(&x->mtimer, 0, HRTIMER_MODE_REL_SOFT); 2299 return -EINVAL; 2300 } 2301 2302 if (!x->km.dying && 2303 (x->curlft.bytes >= x->lft.soft_byte_limit || 2304 x->curlft.packets >= x->lft.soft_packet_limit)) { 2305 x->km.dying = 1; 2306 km_state_expired(x, 0, 0); 2307 } 2308 return 0; 2309 } 2310 EXPORT_SYMBOL(xfrm_state_check_expire); 2311 2312 void xfrm_state_update_stats(struct net *net) 2313 { 2314 struct xfrm_state *x; 2315 int i; 2316 2317 spin_lock_bh(&net->xfrm.xfrm_state_lock); 2318 for (i = 0; i <= net->xfrm.state_hmask; i++) { 2319 hlist_for_each_entry(x, net->xfrm.state_bydst + i, bydst) 2320 xfrm_dev_state_update_stats(x); 2321 } 2322 spin_unlock_bh(&net->xfrm.xfrm_state_lock); 2323 } 2324 2325 struct xfrm_state * 2326 xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 spi, 2327 u8 proto, unsigned short family) 2328 { 2329 struct xfrm_hash_state_ptrs state_ptrs; 2330 struct xfrm_state *x; 2331 2332 rcu_read_lock(); 2333 xfrm_hash_ptrs_get(net, &state_ptrs); 2334 2335 x = __xfrm_state_lookup(&state_ptrs, mark, daddr, spi, proto, family); 2336 rcu_read_unlock(); 2337 return x; 2338 } 2339 EXPORT_SYMBOL(xfrm_state_lookup); 2340 2341 struct xfrm_state * 2342 xfrm_state_lookup_byaddr(struct net *net, u32 mark, 2343 const xfrm_address_t *daddr, const xfrm_address_t *saddr, 2344 u8 proto, unsigned short family) 2345 { 2346 struct xfrm_hash_state_ptrs state_ptrs; 2347 struct xfrm_state *x; 2348 2349 rcu_read_lock(); 2350 2351 xfrm_hash_ptrs_get(net, &state_ptrs); 2352 2353 x = __xfrm_state_lookup_byaddr(&state_ptrs, mark, daddr, saddr, proto, family); 2354 rcu_read_unlock(); 2355 return x; 2356 } 2357 EXPORT_SYMBOL(xfrm_state_lookup_byaddr); 2358 2359 struct xfrm_state * 2360 xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid, 2361 u32 if_id, u32 pcpu_num, u8 proto, const xfrm_address_t *daddr, 2362 const xfrm_address_t *saddr, int create, unsigned short family) 2363 { 2364 struct xfrm_state *x; 2365 2366 spin_lock_bh(&net->xfrm.xfrm_state_lock); 2367 x = __find_acq_core(net, mark, family, mode, reqid, if_id, pcpu_num, 2368 proto, daddr, saddr, create); 2369 spin_unlock_bh(&net->xfrm.xfrm_state_lock); 2370 2371 return x; 2372 } 2373 EXPORT_SYMBOL(xfrm_find_acq); 2374 2375 #ifdef CONFIG_XFRM_SUB_POLICY 2376 #if IS_ENABLED(CONFIG_IPV6) 2377 /* distribution counting sort function for xfrm_state and xfrm_tmpl */ 2378 static void 2379 __xfrm6_sort(void **dst, void **src, int n, 2380 int (*cmp)(const void *p), int maxclass) 2381 { 2382 int count[XFRM_MAX_DEPTH] = { }; 2383 int class[XFRM_MAX_DEPTH]; 2384 int i; 2385 2386 for (i = 0; i < n; i++) { 2387 int c = cmp(src[i]); 2388 2389 class[i] = c; 2390 count[c]++; 2391 } 2392 2393 for (i = 2; i < maxclass; i++) 2394 count[i] += count[i - 1]; 2395 2396 for (i = 0; i < n; i++) { 2397 dst[count[class[i] - 1]++] = src[i]; 2398 src[i] = NULL; 2399 } 2400 } 2401 2402 /* Rule for xfrm_state: 2403 * 2404 * rule 1: select IPsec transport except AH 2405 * rule 2: select MIPv6 RO or inbound trigger 2406 * rule 3: select IPsec transport AH 2407 * rule 4: select IPsec tunnel 2408 * rule 5: others 2409 */ 2410 static int __xfrm6_state_sort_cmp(const void *p) 2411 { 2412 const struct xfrm_state *v = p; 2413 2414 switch (v->props.mode) { 2415 case XFRM_MODE_TRANSPORT: 2416 if (v->id.proto != IPPROTO_AH) 2417 return 1; 2418 else 2419 return 3; 2420 #if IS_ENABLED(CONFIG_IPV6_MIP6) 2421 case XFRM_MODE_ROUTEOPTIMIZATION: 2422 case XFRM_MODE_IN_TRIGGER: 2423 return 2; 2424 #endif 2425 case XFRM_MODE_TUNNEL: 2426 case XFRM_MODE_BEET: 2427 case XFRM_MODE_IPTFS: 2428 return 4; 2429 } 2430 return 5; 2431 } 2432 2433 /* Rule for xfrm_tmpl: 2434 * 2435 * rule 1: select IPsec transport 2436 * rule 2: select MIPv6 RO or inbound trigger 2437 * rule 3: select IPsec tunnel 2438 * rule 4: others 2439 */ 2440 static int __xfrm6_tmpl_sort_cmp(const void *p) 2441 { 2442 const struct xfrm_tmpl *v = p; 2443 2444 switch (v->mode) { 2445 case XFRM_MODE_TRANSPORT: 2446 return 1; 2447 #if IS_ENABLED(CONFIG_IPV6_MIP6) 2448 case XFRM_MODE_ROUTEOPTIMIZATION: 2449 case XFRM_MODE_IN_TRIGGER: 2450 return 2; 2451 #endif 2452 case XFRM_MODE_TUNNEL: 2453 case XFRM_MODE_BEET: 2454 case XFRM_MODE_IPTFS: 2455 return 3; 2456 } 2457 return 4; 2458 } 2459 #else 2460 static inline int __xfrm6_state_sort_cmp(const void *p) { return 5; } 2461 static inline int __xfrm6_tmpl_sort_cmp(const void *p) { return 4; } 2462 2463 static inline void 2464 __xfrm6_sort(void **dst, void **src, int n, 2465 int (*cmp)(const void *p), int maxclass) 2466 { 2467 int i; 2468 2469 for (i = 0; i < n; i++) 2470 dst[i] = src[i]; 2471 } 2472 #endif /* CONFIG_IPV6 */ 2473 2474 void 2475 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n, 2476 unsigned short family) 2477 { 2478 int i; 2479 2480 if (family == AF_INET6) 2481 __xfrm6_sort((void **)dst, (void **)src, n, 2482 __xfrm6_tmpl_sort_cmp, 5); 2483 else 2484 for (i = 0; i < n; i++) 2485 dst[i] = src[i]; 2486 } 2487 2488 void 2489 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n, 2490 unsigned short family) 2491 { 2492 int i; 2493 2494 if (family == AF_INET6) 2495 __xfrm6_sort((void **)dst, (void **)src, n, 2496 __xfrm6_state_sort_cmp, 6); 2497 else 2498 for (i = 0; i < n; i++) 2499 dst[i] = src[i]; 2500 } 2501 #endif 2502 2503 /* Silly enough, but I'm lazy to build resolution list */ 2504 2505 static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num) 2506 { 2507 unsigned int h = xfrm_seq_hash(net, seq); 2508 struct xfrm_state *x; 2509 2510 hlist_for_each_entry_rcu(x, net->xfrm.state_byseq + h, byseq) { 2511 if (x->km.seq == seq && 2512 (mark & x->mark.m) == x->mark.v && 2513 x->pcpu_num == pcpu_num && 2514 x->km.state == XFRM_STATE_ACQ) { 2515 xfrm_state_hold(x); 2516 return x; 2517 } 2518 } 2519 2520 return NULL; 2521 } 2522 2523 struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num) 2524 { 2525 struct xfrm_state *x; 2526 2527 spin_lock_bh(&net->xfrm.xfrm_state_lock); 2528 x = __xfrm_find_acq_byseq(net, mark, seq, pcpu_num); 2529 spin_unlock_bh(&net->xfrm.xfrm_state_lock); 2530 return x; 2531 } 2532 EXPORT_SYMBOL(xfrm_find_acq_byseq); 2533 2534 u32 xfrm_get_acqseq(void) 2535 { 2536 u32 res; 2537 static atomic_t acqseq; 2538 2539 do { 2540 res = atomic_inc_return(&acqseq); 2541 } while (!res); 2542 2543 return res; 2544 } 2545 EXPORT_SYMBOL(xfrm_get_acqseq); 2546 2547 int verify_spi_info(u8 proto, u32 min, u32 max, struct netlink_ext_ack *extack) 2548 { 2549 switch (proto) { 2550 case IPPROTO_AH: 2551 case IPPROTO_ESP: 2552 break; 2553 2554 case IPPROTO_COMP: 2555 /* IPCOMP spi is 16-bits. */ 2556 if (max >= 0x10000) { 2557 NL_SET_ERR_MSG(extack, "IPCOMP SPI must be <= 65535"); 2558 return -EINVAL; 2559 } 2560 break; 2561 2562 default: 2563 NL_SET_ERR_MSG(extack, "Invalid protocol, must be one of AH, ESP, IPCOMP"); 2564 return -EINVAL; 2565 } 2566 2567 if (min > max) { 2568 NL_SET_ERR_MSG(extack, "Invalid SPI range: min > max"); 2569 return -EINVAL; 2570 } 2571 2572 return 0; 2573 } 2574 EXPORT_SYMBOL(verify_spi_info); 2575 2576 int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high, 2577 struct netlink_ext_ack *extack) 2578 { 2579 struct net *net = xs_net(x); 2580 unsigned int h; 2581 struct xfrm_state *x0; 2582 int err = -ENOENT; 2583 u32 range = high - low + 1; 2584 __be32 newspi = 0; 2585 2586 spin_lock_bh(&x->lock); 2587 if (x->km.state == XFRM_STATE_DEAD) { 2588 NL_SET_ERR_MSG(extack, "Target ACQUIRE is in DEAD state"); 2589 goto unlock; 2590 } 2591 2592 err = 0; 2593 if (x->id.spi) 2594 goto unlock; 2595 2596 err = -ENOENT; 2597 2598 for (h = 0; h < range; h++) { 2599 u32 spi = (low == high) ? low : get_random_u32_inclusive(low, high); 2600 newspi = htonl(spi); 2601 2602 spin_lock_bh(&net->xfrm.xfrm_state_lock); 2603 x0 = xfrm_state_lookup_spi_proto(net, newspi, x->id.proto); 2604 if (!x0) { 2605 x->id.spi = newspi; 2606 h = xfrm_spi_hash(net, &x->id.daddr, newspi, x->id.proto, x->props.family); 2607 XFRM_STATE_INSERT(byspi, &x->byspi, net->xfrm.state_byspi + h, x->xso.type); 2608 spin_unlock_bh(&net->xfrm.xfrm_state_lock); 2609 err = 0; 2610 goto unlock; 2611 } 2612 xfrm_state_put(x0); 2613 spin_unlock_bh(&net->xfrm.xfrm_state_lock); 2614 2615 if (signal_pending(current)) { 2616 err = -ERESTARTSYS; 2617 goto unlock; 2618 } 2619 2620 if (low == high) 2621 break; 2622 } 2623 2624 if (err) 2625 NL_SET_ERR_MSG(extack, "No SPI available in the requested range"); 2626 2627 unlock: 2628 spin_unlock_bh(&x->lock); 2629 2630 return err; 2631 } 2632 EXPORT_SYMBOL(xfrm_alloc_spi); 2633 2634 static bool __xfrm_state_filter_match(struct xfrm_state *x, 2635 struct xfrm_address_filter *filter) 2636 { 2637 if (filter) { 2638 if ((filter->family == AF_INET || 2639 filter->family == AF_INET6) && 2640 x->props.family != filter->family) 2641 return false; 2642 2643 return addr_match(&x->props.saddr, &filter->saddr, 2644 filter->splen) && 2645 addr_match(&x->id.daddr, &filter->daddr, 2646 filter->dplen); 2647 } 2648 return true; 2649 } 2650 2651 int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk, 2652 int (*func)(struct xfrm_state *, int, void*), 2653 void *data) 2654 { 2655 struct xfrm_state *state; 2656 struct xfrm_state_walk *x; 2657 int err = 0; 2658 2659 if (walk->seq != 0 && list_empty(&walk->all)) 2660 return 0; 2661 2662 spin_lock_bh(&net->xfrm.xfrm_state_lock); 2663 if (list_empty(&walk->all)) 2664 x = list_first_entry(&net->xfrm.state_all, struct xfrm_state_walk, all); 2665 else 2666 x = list_first_entry(&walk->all, struct xfrm_state_walk, all); 2667 list_for_each_entry_from(x, &net->xfrm.state_all, all) { 2668 if (x->state == XFRM_STATE_DEAD) 2669 continue; 2670 state = container_of(x, struct xfrm_state, km); 2671 if (!xfrm_id_proto_match(state->id.proto, walk->proto)) 2672 continue; 2673 if (!__xfrm_state_filter_match(state, walk->filter)) 2674 continue; 2675 err = func(state, walk->seq, data); 2676 if (err) { 2677 list_move_tail(&walk->all, &x->all); 2678 goto out; 2679 } 2680 walk->seq++; 2681 } 2682 if (walk->seq == 0) { 2683 err = -ENOENT; 2684 goto out; 2685 } 2686 list_del_init(&walk->all); 2687 out: 2688 spin_unlock_bh(&net->xfrm.xfrm_state_lock); 2689 return err; 2690 } 2691 EXPORT_SYMBOL(xfrm_state_walk); 2692 2693 void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto, 2694 struct xfrm_address_filter *filter) 2695 { 2696 INIT_LIST_HEAD(&walk->all); 2697 walk->proto = proto; 2698 walk->state = XFRM_STATE_DEAD; 2699 walk->seq = 0; 2700 walk->filter = filter; 2701 } 2702 EXPORT_SYMBOL(xfrm_state_walk_init); 2703 2704 void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net) 2705 { 2706 kfree(walk->filter); 2707 2708 if (list_empty(&walk->all)) 2709 return; 2710 2711 spin_lock_bh(&net->xfrm.xfrm_state_lock); 2712 list_del(&walk->all); 2713 spin_unlock_bh(&net->xfrm.xfrm_state_lock); 2714 } 2715 EXPORT_SYMBOL(xfrm_state_walk_done); 2716 2717 static void xfrm_replay_timer_handler(struct timer_list *t) 2718 { 2719 struct xfrm_state *x = timer_container_of(x, t, rtimer); 2720 2721 spin_lock(&x->lock); 2722 2723 if (x->km.state == XFRM_STATE_VALID) { 2724 if (xfrm_aevent_is_on(xs_net(x))) 2725 xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT); 2726 else 2727 x->xflags |= XFRM_TIME_DEFER; 2728 } 2729 2730 spin_unlock(&x->lock); 2731 } 2732 2733 static LIST_HEAD(xfrm_km_list); 2734 2735 void km_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c) 2736 { 2737 struct xfrm_mgr *km; 2738 2739 rcu_read_lock(); 2740 list_for_each_entry_rcu(km, &xfrm_km_list, list) 2741 if (km->notify_policy) 2742 km->notify_policy(xp, dir, c); 2743 rcu_read_unlock(); 2744 } 2745 2746 void km_state_notify(struct xfrm_state *x, const struct km_event *c) 2747 { 2748 struct xfrm_mgr *km; 2749 rcu_read_lock(); 2750 list_for_each_entry_rcu(km, &xfrm_km_list, list) 2751 if (km->notify) 2752 km->notify(x, c); 2753 rcu_read_unlock(); 2754 } 2755 2756 EXPORT_SYMBOL(km_policy_notify); 2757 EXPORT_SYMBOL(km_state_notify); 2758 2759 void km_state_expired(struct xfrm_state *x, int hard, u32 portid) 2760 { 2761 struct km_event c; 2762 2763 c.data.hard = hard; 2764 c.portid = portid; 2765 c.event = XFRM_MSG_EXPIRE; 2766 km_state_notify(x, &c); 2767 } 2768 2769 EXPORT_SYMBOL(km_state_expired); 2770 /* 2771 * We send to all registered managers regardless of failure 2772 * We are happy with one success 2773 */ 2774 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol) 2775 { 2776 int err = -EINVAL, acqret; 2777 struct xfrm_mgr *km; 2778 2779 rcu_read_lock(); 2780 list_for_each_entry_rcu(km, &xfrm_km_list, list) { 2781 acqret = km->acquire(x, t, pol); 2782 if (!acqret) 2783 err = acqret; 2784 } 2785 rcu_read_unlock(); 2786 return err; 2787 } 2788 EXPORT_SYMBOL(km_query); 2789 2790 static int __km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport) 2791 { 2792 int err = -EINVAL; 2793 struct xfrm_mgr *km; 2794 2795 rcu_read_lock(); 2796 list_for_each_entry_rcu(km, &xfrm_km_list, list) { 2797 if (km->new_mapping) 2798 err = km->new_mapping(x, ipaddr, sport); 2799 if (!err) 2800 break; 2801 } 2802 rcu_read_unlock(); 2803 return err; 2804 } 2805 2806 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport) 2807 { 2808 int ret = 0; 2809 2810 if (x->mapping_maxage) { 2811 if ((jiffies / HZ - x->new_mapping) > x->mapping_maxage || 2812 x->new_mapping_sport != sport) { 2813 x->new_mapping_sport = sport; 2814 x->new_mapping = jiffies / HZ; 2815 ret = __km_new_mapping(x, ipaddr, sport); 2816 } 2817 } else { 2818 ret = __km_new_mapping(x, ipaddr, sport); 2819 } 2820 2821 return ret; 2822 } 2823 EXPORT_SYMBOL(km_new_mapping); 2824 2825 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 portid) 2826 { 2827 struct km_event c; 2828 2829 c.data.hard = hard; 2830 c.portid = portid; 2831 c.event = XFRM_MSG_POLEXPIRE; 2832 km_policy_notify(pol, dir, &c); 2833 } 2834 EXPORT_SYMBOL(km_policy_expired); 2835 2836 #ifdef CONFIG_XFRM_MIGRATE 2837 int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, 2838 const struct xfrm_migrate *m, int num_migrate, 2839 const struct xfrm_kmaddress *k, 2840 const struct xfrm_encap_tmpl *encap) 2841 { 2842 int err = -EINVAL; 2843 int ret; 2844 struct xfrm_mgr *km; 2845 2846 rcu_read_lock(); 2847 list_for_each_entry_rcu(km, &xfrm_km_list, list) { 2848 if (km->migrate) { 2849 ret = km->migrate(sel, dir, type, m, num_migrate, k, 2850 encap); 2851 if (!ret) 2852 err = ret; 2853 } 2854 } 2855 rcu_read_unlock(); 2856 return err; 2857 } 2858 EXPORT_SYMBOL(km_migrate); 2859 #endif 2860 2861 int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr) 2862 { 2863 int err = -EINVAL; 2864 int ret; 2865 struct xfrm_mgr *km; 2866 2867 rcu_read_lock(); 2868 list_for_each_entry_rcu(km, &xfrm_km_list, list) { 2869 if (km->report) { 2870 ret = km->report(net, proto, sel, addr); 2871 if (!ret) 2872 err = ret; 2873 } 2874 } 2875 rcu_read_unlock(); 2876 return err; 2877 } 2878 EXPORT_SYMBOL(km_report); 2879 2880 static bool km_is_alive(const struct km_event *c) 2881 { 2882 struct xfrm_mgr *km; 2883 bool is_alive = false; 2884 2885 rcu_read_lock(); 2886 list_for_each_entry_rcu(km, &xfrm_km_list, list) { 2887 if (km->is_alive && km->is_alive(c)) { 2888 is_alive = true; 2889 break; 2890 } 2891 } 2892 rcu_read_unlock(); 2893 2894 return is_alive; 2895 } 2896 2897 #if IS_ENABLED(CONFIG_XFRM_USER_COMPAT) 2898 static DEFINE_SPINLOCK(xfrm_translator_lock); 2899 static struct xfrm_translator __rcu *xfrm_translator; 2900 2901 struct xfrm_translator *xfrm_get_translator(void) 2902 { 2903 struct xfrm_translator *xtr; 2904 2905 rcu_read_lock(); 2906 xtr = rcu_dereference(xfrm_translator); 2907 if (unlikely(!xtr)) 2908 goto out; 2909 if (!try_module_get(xtr->owner)) 2910 xtr = NULL; 2911 out: 2912 rcu_read_unlock(); 2913 return xtr; 2914 } 2915 EXPORT_SYMBOL_GPL(xfrm_get_translator); 2916 2917 void xfrm_put_translator(struct xfrm_translator *xtr) 2918 { 2919 module_put(xtr->owner); 2920 } 2921 EXPORT_SYMBOL_GPL(xfrm_put_translator); 2922 2923 int xfrm_register_translator(struct xfrm_translator *xtr) 2924 { 2925 int err = 0; 2926 2927 spin_lock_bh(&xfrm_translator_lock); 2928 if (unlikely(xfrm_translator != NULL)) 2929 err = -EEXIST; 2930 else 2931 rcu_assign_pointer(xfrm_translator, xtr); 2932 spin_unlock_bh(&xfrm_translator_lock); 2933 2934 return err; 2935 } 2936 EXPORT_SYMBOL_GPL(xfrm_register_translator); 2937 2938 int xfrm_unregister_translator(struct xfrm_translator *xtr) 2939 { 2940 int err = 0; 2941 2942 spin_lock_bh(&xfrm_translator_lock); 2943 if (likely(xfrm_translator != NULL)) { 2944 if (rcu_access_pointer(xfrm_translator) != xtr) 2945 err = -EINVAL; 2946 else 2947 RCU_INIT_POINTER(xfrm_translator, NULL); 2948 } 2949 spin_unlock_bh(&xfrm_translator_lock); 2950 synchronize_rcu(); 2951 2952 return err; 2953 } 2954 EXPORT_SYMBOL_GPL(xfrm_unregister_translator); 2955 #endif 2956 2957 int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval, int optlen) 2958 { 2959 int err; 2960 u8 *data; 2961 struct xfrm_mgr *km; 2962 struct xfrm_policy *pol = NULL; 2963 2964 if (sockptr_is_null(optval) && !optlen) { 2965 xfrm_sk_policy_insert(sk, XFRM_POLICY_IN, NULL); 2966 xfrm_sk_policy_insert(sk, XFRM_POLICY_OUT, NULL); 2967 __sk_dst_reset(sk); 2968 return 0; 2969 } 2970 2971 if (optlen <= 0 || optlen > PAGE_SIZE) 2972 return -EMSGSIZE; 2973 2974 data = memdup_sockptr(optval, optlen); 2975 if (IS_ERR(data)) 2976 return PTR_ERR(data); 2977 2978 if (in_compat_syscall()) { 2979 struct xfrm_translator *xtr = xfrm_get_translator(); 2980 2981 if (!xtr) { 2982 kfree(data); 2983 return -EOPNOTSUPP; 2984 } 2985 2986 err = xtr->xlate_user_policy_sockptr(&data, optlen); 2987 xfrm_put_translator(xtr); 2988 if (err) { 2989 kfree(data); 2990 return err; 2991 } 2992 } 2993 2994 err = -EINVAL; 2995 rcu_read_lock(); 2996 list_for_each_entry_rcu(km, &xfrm_km_list, list) { 2997 pol = km->compile_policy(sk, optname, data, 2998 optlen, &err); 2999 if (err >= 0) 3000 break; 3001 } 3002 rcu_read_unlock(); 3003 3004 if (err >= 0) { 3005 xfrm_sk_policy_insert(sk, err, pol); 3006 xfrm_pol_put(pol); 3007 __sk_dst_reset(sk); 3008 err = 0; 3009 } 3010 3011 kfree(data); 3012 return err; 3013 } 3014 EXPORT_SYMBOL(xfrm_user_policy); 3015 3016 static DEFINE_SPINLOCK(xfrm_km_lock); 3017 3018 void xfrm_register_km(struct xfrm_mgr *km) 3019 { 3020 spin_lock_bh(&xfrm_km_lock); 3021 list_add_tail_rcu(&km->list, &xfrm_km_list); 3022 spin_unlock_bh(&xfrm_km_lock); 3023 } 3024 EXPORT_SYMBOL(xfrm_register_km); 3025 3026 void xfrm_unregister_km(struct xfrm_mgr *km) 3027 { 3028 spin_lock_bh(&xfrm_km_lock); 3029 list_del_rcu(&km->list); 3030 spin_unlock_bh(&xfrm_km_lock); 3031 synchronize_rcu(); 3032 } 3033 EXPORT_SYMBOL(xfrm_unregister_km); 3034 3035 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo) 3036 { 3037 int err = 0; 3038 3039 if (WARN_ON(afinfo->family >= NPROTO)) 3040 return -EAFNOSUPPORT; 3041 3042 spin_lock_bh(&xfrm_state_afinfo_lock); 3043 if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL)) 3044 err = -EEXIST; 3045 else 3046 rcu_assign_pointer(xfrm_state_afinfo[afinfo->family], afinfo); 3047 spin_unlock_bh(&xfrm_state_afinfo_lock); 3048 return err; 3049 } 3050 EXPORT_SYMBOL(xfrm_state_register_afinfo); 3051 3052 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo) 3053 { 3054 int err = 0, family = afinfo->family; 3055 3056 if (WARN_ON(family >= NPROTO)) 3057 return -EAFNOSUPPORT; 3058 3059 spin_lock_bh(&xfrm_state_afinfo_lock); 3060 if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) { 3061 if (rcu_access_pointer(xfrm_state_afinfo[family]) != afinfo) 3062 err = -EINVAL; 3063 else 3064 RCU_INIT_POINTER(xfrm_state_afinfo[afinfo->family], NULL); 3065 } 3066 spin_unlock_bh(&xfrm_state_afinfo_lock); 3067 synchronize_rcu(); 3068 return err; 3069 } 3070 EXPORT_SYMBOL(xfrm_state_unregister_afinfo); 3071 3072 struct xfrm_state_afinfo *xfrm_state_afinfo_get_rcu(unsigned int family) 3073 { 3074 if (unlikely(family >= NPROTO)) 3075 return NULL; 3076 3077 return rcu_dereference(xfrm_state_afinfo[family]); 3078 } 3079 EXPORT_SYMBOL_GPL(xfrm_state_afinfo_get_rcu); 3080 3081 struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family) 3082 { 3083 struct xfrm_state_afinfo *afinfo; 3084 if (unlikely(family >= NPROTO)) 3085 return NULL; 3086 rcu_read_lock(); 3087 afinfo = rcu_dereference(xfrm_state_afinfo[family]); 3088 if (unlikely(!afinfo)) 3089 rcu_read_unlock(); 3090 return afinfo; 3091 } 3092 3093 void xfrm_flush_gc(void) 3094 { 3095 flush_work(&xfrm_state_gc_work); 3096 } 3097 EXPORT_SYMBOL(xfrm_flush_gc); 3098 3099 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */ 3100 void xfrm_state_delete_tunnel(struct xfrm_state *x) 3101 { 3102 if (x->tunnel) { 3103 struct xfrm_state *t = x->tunnel; 3104 3105 if (atomic_read(&t->tunnel_users) == 2) 3106 xfrm_state_delete(t); 3107 atomic_dec(&t->tunnel_users); 3108 xfrm_state_put_sync(t); 3109 x->tunnel = NULL; 3110 } 3111 } 3112 EXPORT_SYMBOL(xfrm_state_delete_tunnel); 3113 3114 u32 xfrm_state_mtu(struct xfrm_state *x, int mtu) 3115 { 3116 const struct xfrm_type *type = READ_ONCE(x->type); 3117 struct crypto_aead *aead; 3118 u32 blksize, net_adj = 0; 3119 3120 if (x->km.state != XFRM_STATE_VALID || 3121 !type || type->proto != IPPROTO_ESP) 3122 return mtu - x->props.header_len; 3123 3124 aead = x->data; 3125 blksize = ALIGN(crypto_aead_blocksize(aead), 4); 3126 3127 switch (x->props.mode) { 3128 case XFRM_MODE_TRANSPORT: 3129 case XFRM_MODE_BEET: 3130 if (x->props.family == AF_INET) 3131 net_adj = sizeof(struct iphdr); 3132 else if (x->props.family == AF_INET6) 3133 net_adj = sizeof(struct ipv6hdr); 3134 break; 3135 case XFRM_MODE_TUNNEL: 3136 break; 3137 default: 3138 if (x->mode_cbs && x->mode_cbs->get_inner_mtu) 3139 return x->mode_cbs->get_inner_mtu(x, mtu); 3140 3141 WARN_ON_ONCE(1); 3142 break; 3143 } 3144 3145 return ((mtu - x->props.header_len - crypto_aead_authsize(aead) - 3146 net_adj) & ~(blksize - 1)) + net_adj - 2; 3147 } 3148 EXPORT_SYMBOL_GPL(xfrm_state_mtu); 3149 3150 int __xfrm_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack) 3151 { 3152 const struct xfrm_mode *inner_mode; 3153 const struct xfrm_mode *outer_mode; 3154 int family = x->props.family; 3155 int err; 3156 3157 if (family == AF_INET && 3158 READ_ONCE(xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc)) 3159 x->props.flags |= XFRM_STATE_NOPMTUDISC; 3160 3161 err = -EPROTONOSUPPORT; 3162 3163 if (x->sel.family != AF_UNSPEC) { 3164 inner_mode = xfrm_get_mode(x->props.mode, x->sel.family); 3165 if (inner_mode == NULL) { 3166 NL_SET_ERR_MSG(extack, "Requested mode not found"); 3167 goto error; 3168 } 3169 3170 if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL) && 3171 family != x->sel.family) { 3172 NL_SET_ERR_MSG(extack, "Only tunnel modes can accommodate a change of family"); 3173 goto error; 3174 } 3175 3176 x->inner_mode = *inner_mode; 3177 } else { 3178 const struct xfrm_mode *inner_mode_iaf; 3179 int iafamily = AF_INET; 3180 3181 inner_mode = xfrm_get_mode(x->props.mode, x->props.family); 3182 if (inner_mode == NULL) { 3183 NL_SET_ERR_MSG(extack, "Requested mode not found"); 3184 goto error; 3185 } 3186 3187 x->inner_mode = *inner_mode; 3188 3189 if (x->props.family == AF_INET) 3190 iafamily = AF_INET6; 3191 3192 inner_mode_iaf = xfrm_get_mode(x->props.mode, iafamily); 3193 if (inner_mode_iaf) { 3194 if (inner_mode_iaf->flags & XFRM_MODE_FLAG_TUNNEL) 3195 x->inner_mode_iaf = *inner_mode_iaf; 3196 } 3197 } 3198 3199 x->type = xfrm_get_type(x->id.proto, family); 3200 if (x->type == NULL) { 3201 NL_SET_ERR_MSG(extack, "Requested type not found"); 3202 goto error; 3203 } 3204 3205 err = x->type->init_state(x, extack); 3206 if (err) 3207 goto error; 3208 3209 outer_mode = xfrm_get_mode(x->props.mode, family); 3210 if (!outer_mode) { 3211 NL_SET_ERR_MSG(extack, "Requested mode not found"); 3212 err = -EPROTONOSUPPORT; 3213 goto error; 3214 } 3215 3216 x->outer_mode = *outer_mode; 3217 if (x->nat_keepalive_interval) { 3218 if (x->dir != XFRM_SA_DIR_OUT) { 3219 NL_SET_ERR_MSG(extack, "NAT keepalive is only supported for outbound SAs"); 3220 err = -EINVAL; 3221 goto error; 3222 } 3223 3224 if (!x->encap || x->encap->encap_type != UDP_ENCAP_ESPINUDP) { 3225 NL_SET_ERR_MSG(extack, 3226 "NAT keepalive is only supported for UDP encapsulation"); 3227 err = -EINVAL; 3228 goto error; 3229 } 3230 } 3231 3232 x->mode_cbs = xfrm_get_mode_cbs(x->props.mode); 3233 if (x->mode_cbs) { 3234 if (x->mode_cbs->init_state) 3235 err = x->mode_cbs->init_state(x); 3236 module_put(x->mode_cbs->owner); 3237 } 3238 error: 3239 return err; 3240 } 3241 3242 EXPORT_SYMBOL(__xfrm_init_state); 3243 3244 int xfrm_init_state(struct xfrm_state *x) 3245 { 3246 int err; 3247 3248 err = __xfrm_init_state(x, NULL); 3249 if (err) 3250 return err; 3251 3252 err = xfrm_init_replay(x, NULL); 3253 if (err) 3254 return err; 3255 3256 x->km.state = XFRM_STATE_VALID; 3257 return 0; 3258 } 3259 3260 EXPORT_SYMBOL(xfrm_init_state); 3261 3262 int __net_init xfrm_state_init(struct net *net) 3263 { 3264 unsigned int sz; 3265 3266 if (net_eq(net, &init_net)) 3267 xfrm_state_cache = KMEM_CACHE(xfrm_state, 3268 SLAB_HWCACHE_ALIGN | SLAB_PANIC); 3269 3270 INIT_LIST_HEAD(&net->xfrm.state_all); 3271 3272 sz = sizeof(struct hlist_head) * 8; 3273 3274 net->xfrm.state_bydst = xfrm_hash_alloc(sz); 3275 if (!net->xfrm.state_bydst) 3276 goto out_bydst; 3277 net->xfrm.state_bysrc = xfrm_hash_alloc(sz); 3278 if (!net->xfrm.state_bysrc) 3279 goto out_bysrc; 3280 net->xfrm.state_byspi = xfrm_hash_alloc(sz); 3281 if (!net->xfrm.state_byspi) 3282 goto out_byspi; 3283 net->xfrm.state_byseq = xfrm_hash_alloc(sz); 3284 if (!net->xfrm.state_byseq) 3285 goto out_byseq; 3286 3287 net->xfrm.state_cache_input = alloc_percpu(struct hlist_head); 3288 if (!net->xfrm.state_cache_input) 3289 goto out_state_cache_input; 3290 3291 net->xfrm.state_hmask = ((sz / sizeof(struct hlist_head)) - 1); 3292 3293 net->xfrm.state_num = 0; 3294 INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize); 3295 spin_lock_init(&net->xfrm.xfrm_state_lock); 3296 seqcount_spinlock_init(&net->xfrm.xfrm_state_hash_generation, 3297 &net->xfrm.xfrm_state_lock); 3298 return 0; 3299 3300 out_state_cache_input: 3301 xfrm_hash_free(net->xfrm.state_byseq, sz); 3302 out_byseq: 3303 xfrm_hash_free(net->xfrm.state_byspi, sz); 3304 out_byspi: 3305 xfrm_hash_free(net->xfrm.state_bysrc, sz); 3306 out_bysrc: 3307 xfrm_hash_free(net->xfrm.state_bydst, sz); 3308 out_bydst: 3309 return -ENOMEM; 3310 } 3311 3312 void xfrm_state_fini(struct net *net) 3313 { 3314 unsigned int sz; 3315 3316 flush_work(&net->xfrm.state_hash_work); 3317 flush_work(&xfrm_state_gc_work); 3318 xfrm_state_flush(net, 0, false, true); 3319 3320 WARN_ON(!list_empty(&net->xfrm.state_all)); 3321 3322 sz = (net->xfrm.state_hmask + 1) * sizeof(struct hlist_head); 3323 WARN_ON(!hlist_empty(net->xfrm.state_byseq)); 3324 xfrm_hash_free(net->xfrm.state_byseq, sz); 3325 WARN_ON(!hlist_empty(net->xfrm.state_byspi)); 3326 xfrm_hash_free(net->xfrm.state_byspi, sz); 3327 WARN_ON(!hlist_empty(net->xfrm.state_bysrc)); 3328 xfrm_hash_free(net->xfrm.state_bysrc, sz); 3329 WARN_ON(!hlist_empty(net->xfrm.state_bydst)); 3330 xfrm_hash_free(net->xfrm.state_bydst, sz); 3331 free_percpu(net->xfrm.state_cache_input); 3332 } 3333 3334 #ifdef CONFIG_AUDITSYSCALL 3335 static void xfrm_audit_helper_sainfo(struct xfrm_state *x, 3336 struct audit_buffer *audit_buf) 3337 { 3338 struct xfrm_sec_ctx *ctx = x->security; 3339 u32 spi = ntohl(x->id.spi); 3340 3341 if (ctx) 3342 audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s", 3343 ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str); 3344 3345 switch (x->props.family) { 3346 case AF_INET: 3347 audit_log_format(audit_buf, " src=%pI4 dst=%pI4", 3348 &x->props.saddr.a4, &x->id.daddr.a4); 3349 break; 3350 case AF_INET6: 3351 audit_log_format(audit_buf, " src=%pI6 dst=%pI6", 3352 x->props.saddr.a6, x->id.daddr.a6); 3353 break; 3354 } 3355 3356 audit_log_format(audit_buf, " spi=%u(0x%x)", spi, spi); 3357 } 3358 3359 static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family, 3360 struct audit_buffer *audit_buf) 3361 { 3362 const struct iphdr *iph4; 3363 const struct ipv6hdr *iph6; 3364 3365 switch (family) { 3366 case AF_INET: 3367 iph4 = ip_hdr(skb); 3368 audit_log_format(audit_buf, " src=%pI4 dst=%pI4", 3369 &iph4->saddr, &iph4->daddr); 3370 break; 3371 case AF_INET6: 3372 iph6 = ipv6_hdr(skb); 3373 audit_log_format(audit_buf, 3374 " src=%pI6 dst=%pI6 flowlbl=0x%x%02x%02x", 3375 &iph6->saddr, &iph6->daddr, 3376 iph6->flow_lbl[0] & 0x0f, 3377 iph6->flow_lbl[1], 3378 iph6->flow_lbl[2]); 3379 break; 3380 } 3381 } 3382 3383 void xfrm_audit_state_add(struct xfrm_state *x, int result, bool task_valid) 3384 { 3385 struct audit_buffer *audit_buf; 3386 3387 audit_buf = xfrm_audit_start("SAD-add"); 3388 if (audit_buf == NULL) 3389 return; 3390 xfrm_audit_helper_usrinfo(task_valid, audit_buf); 3391 xfrm_audit_helper_sainfo(x, audit_buf); 3392 audit_log_format(audit_buf, " res=%u", result); 3393 audit_log_end(audit_buf); 3394 } 3395 EXPORT_SYMBOL_GPL(xfrm_audit_state_add); 3396 3397 void xfrm_audit_state_delete(struct xfrm_state *x, int result, bool task_valid) 3398 { 3399 struct audit_buffer *audit_buf; 3400 3401 audit_buf = xfrm_audit_start("SAD-delete"); 3402 if (audit_buf == NULL) 3403 return; 3404 xfrm_audit_helper_usrinfo(task_valid, audit_buf); 3405 xfrm_audit_helper_sainfo(x, audit_buf); 3406 audit_log_format(audit_buf, " res=%u", result); 3407 audit_log_end(audit_buf); 3408 } 3409 EXPORT_SYMBOL_GPL(xfrm_audit_state_delete); 3410 3411 void xfrm_audit_state_replay_overflow(struct xfrm_state *x, 3412 struct sk_buff *skb) 3413 { 3414 struct audit_buffer *audit_buf; 3415 u32 spi; 3416 3417 audit_buf = xfrm_audit_start("SA-replay-overflow"); 3418 if (audit_buf == NULL) 3419 return; 3420 xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf); 3421 /* don't record the sequence number because it's inherent in this kind 3422 * of audit message */ 3423 spi = ntohl(x->id.spi); 3424 audit_log_format(audit_buf, " spi=%u(0x%x)", spi, spi); 3425 audit_log_end(audit_buf); 3426 } 3427 EXPORT_SYMBOL_GPL(xfrm_audit_state_replay_overflow); 3428 3429 void xfrm_audit_state_replay(struct xfrm_state *x, 3430 struct sk_buff *skb, __be32 net_seq) 3431 { 3432 struct audit_buffer *audit_buf; 3433 u32 spi; 3434 3435 audit_buf = xfrm_audit_start("SA-replayed-pkt"); 3436 if (audit_buf == NULL) 3437 return; 3438 xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf); 3439 spi = ntohl(x->id.spi); 3440 audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u", 3441 spi, spi, ntohl(net_seq)); 3442 audit_log_end(audit_buf); 3443 } 3444 EXPORT_SYMBOL_GPL(xfrm_audit_state_replay); 3445 3446 void xfrm_audit_state_notfound_simple(struct sk_buff *skb, u16 family) 3447 { 3448 struct audit_buffer *audit_buf; 3449 3450 audit_buf = xfrm_audit_start("SA-notfound"); 3451 if (audit_buf == NULL) 3452 return; 3453 xfrm_audit_helper_pktinfo(skb, family, audit_buf); 3454 audit_log_end(audit_buf); 3455 } 3456 EXPORT_SYMBOL_GPL(xfrm_audit_state_notfound_simple); 3457 3458 void xfrm_audit_state_notfound(struct sk_buff *skb, u16 family, 3459 __be32 net_spi, __be32 net_seq) 3460 { 3461 struct audit_buffer *audit_buf; 3462 u32 spi; 3463 3464 audit_buf = xfrm_audit_start("SA-notfound"); 3465 if (audit_buf == NULL) 3466 return; 3467 xfrm_audit_helper_pktinfo(skb, family, audit_buf); 3468 spi = ntohl(net_spi); 3469 audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u", 3470 spi, spi, ntohl(net_seq)); 3471 audit_log_end(audit_buf); 3472 } 3473 EXPORT_SYMBOL_GPL(xfrm_audit_state_notfound); 3474 3475 void xfrm_audit_state_icvfail(struct xfrm_state *x, 3476 struct sk_buff *skb, u8 proto) 3477 { 3478 struct audit_buffer *audit_buf; 3479 __be32 net_spi; 3480 __be32 net_seq; 3481 3482 audit_buf = xfrm_audit_start("SA-icv-failure"); 3483 if (audit_buf == NULL) 3484 return; 3485 xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf); 3486 if (xfrm_parse_spi(skb, proto, &net_spi, &net_seq) == 0) { 3487 u32 spi = ntohl(net_spi); 3488 audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u", 3489 spi, spi, ntohl(net_seq)); 3490 } 3491 audit_log_end(audit_buf); 3492 } 3493 EXPORT_SYMBOL_GPL(xfrm_audit_state_icvfail); 3494 #endif /* CONFIG_AUDITSYSCALL */ 3495