1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * xfrm_input.c 4 * 5 * Changes: 6 * YOSHIFUJI Hideaki @USAGI 7 * Split up af-specific portion 8 * 9 */ 10 11 #include <linux/bottom_half.h> 12 #include <linux/cache.h> 13 #include <linux/interrupt.h> 14 #include <linux/slab.h> 15 #include <linux/module.h> 16 #include <linux/netdevice.h> 17 #include <linux/percpu.h> 18 #include <net/dst.h> 19 #include <net/ip.h> 20 #include <net/xfrm.h> 21 #include <net/ip_tunnels.h> 22 #include <net/ip6_tunnel.h> 23 24 struct xfrm_trans_tasklet { 25 struct tasklet_struct tasklet; 26 struct sk_buff_head queue; 27 }; 28 29 struct xfrm_trans_cb { 30 union { 31 struct inet_skb_parm h4; 32 #if IS_ENABLED(CONFIG_IPV6) 33 struct inet6_skb_parm h6; 34 #endif 35 } header; 36 int (*finish)(struct net *net, struct sock *sk, struct sk_buff *skb); 37 }; 38 39 #define XFRM_TRANS_SKB_CB(__skb) ((struct xfrm_trans_cb *)&((__skb)->cb[0])) 40 41 static struct kmem_cache *secpath_cachep __ro_after_init; 42 43 static DEFINE_SPINLOCK(xfrm_input_afinfo_lock); 44 static struct xfrm_input_afinfo const __rcu *xfrm_input_afinfo[AF_INET6 + 1]; 45 46 static struct gro_cells gro_cells; 47 static struct net_device xfrm_napi_dev; 48 49 static DEFINE_PER_CPU(struct xfrm_trans_tasklet, xfrm_trans_tasklet); 50 51 int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo) 52 { 53 int err = 0; 54 55 if (WARN_ON(afinfo->family >= ARRAY_SIZE(xfrm_input_afinfo))) 56 return -EAFNOSUPPORT; 57 58 spin_lock_bh(&xfrm_input_afinfo_lock); 59 if (unlikely(xfrm_input_afinfo[afinfo->family] != NULL)) 60 err = -EEXIST; 61 else 62 rcu_assign_pointer(xfrm_input_afinfo[afinfo->family], afinfo); 63 spin_unlock_bh(&xfrm_input_afinfo_lock); 64 return err; 65 } 66 EXPORT_SYMBOL(xfrm_input_register_afinfo); 67 68 int xfrm_input_unregister_afinfo(const struct xfrm_input_afinfo *afinfo) 69 { 70 int err = 0; 71 72 spin_lock_bh(&xfrm_input_afinfo_lock); 73 if (likely(xfrm_input_afinfo[afinfo->family] != NULL)) { 74 if (unlikely(xfrm_input_afinfo[afinfo->family] != afinfo)) 75 err = -EINVAL; 76 else 77 RCU_INIT_POINTER(xfrm_input_afinfo[afinfo->family], NULL); 78 } 79 spin_unlock_bh(&xfrm_input_afinfo_lock); 80 synchronize_rcu(); 81 return err; 82 } 83 EXPORT_SYMBOL(xfrm_input_unregister_afinfo); 84 85 static const struct xfrm_input_afinfo *xfrm_input_get_afinfo(unsigned int family) 86 { 87 const struct xfrm_input_afinfo *afinfo; 88 89 if (WARN_ON_ONCE(family >= ARRAY_SIZE(xfrm_input_afinfo))) 90 return NULL; 91 92 rcu_read_lock(); 93 afinfo = rcu_dereference(xfrm_input_afinfo[family]); 94 if (unlikely(!afinfo)) 95 rcu_read_unlock(); 96 return afinfo; 97 } 98 99 static int xfrm_rcv_cb(struct sk_buff *skb, unsigned int family, u8 protocol, 100 int err) 101 { 102 int ret; 103 const struct xfrm_input_afinfo *afinfo = xfrm_input_get_afinfo(family); 104 105 if (!afinfo) 106 return -EAFNOSUPPORT; 107 108 ret = afinfo->callback(skb, protocol, err); 109 rcu_read_unlock(); 110 111 return ret; 112 } 113 114 void __secpath_destroy(struct sec_path *sp) 115 { 116 int i; 117 for (i = 0; i < sp->len; i++) 118 xfrm_state_put(sp->xvec[i]); 119 kmem_cache_free(secpath_cachep, sp); 120 } 121 EXPORT_SYMBOL(__secpath_destroy); 122 123 struct sec_path *secpath_dup(struct sec_path *src) 124 { 125 struct sec_path *sp; 126 127 sp = kmem_cache_alloc(secpath_cachep, GFP_ATOMIC); 128 if (!sp) 129 return NULL; 130 131 sp->len = 0; 132 sp->olen = 0; 133 134 memset(sp->ovec, 0, sizeof(sp->ovec)); 135 136 if (src) { 137 int i; 138 139 memcpy(sp, src, sizeof(*sp)); 140 for (i = 0; i < sp->len; i++) 141 xfrm_state_hold(sp->xvec[i]); 142 } 143 refcount_set(&sp->refcnt, 1); 144 return sp; 145 } 146 EXPORT_SYMBOL(secpath_dup); 147 148 int secpath_set(struct sk_buff *skb) 149 { 150 struct sec_path *sp; 151 152 /* Allocate new secpath or COW existing one. */ 153 if (!skb->sp || refcount_read(&skb->sp->refcnt) != 1) { 154 sp = secpath_dup(skb->sp); 155 if (!sp) 156 return -ENOMEM; 157 158 if (skb->sp) 159 secpath_put(skb->sp); 160 skb->sp = sp; 161 } 162 return 0; 163 } 164 EXPORT_SYMBOL(secpath_set); 165 166 /* Fetch spi and seq from ipsec header */ 167 168 int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq) 169 { 170 int offset, offset_seq; 171 int hlen; 172 173 switch (nexthdr) { 174 case IPPROTO_AH: 175 hlen = sizeof(struct ip_auth_hdr); 176 offset = offsetof(struct ip_auth_hdr, spi); 177 offset_seq = offsetof(struct ip_auth_hdr, seq_no); 178 break; 179 case IPPROTO_ESP: 180 hlen = sizeof(struct ip_esp_hdr); 181 offset = offsetof(struct ip_esp_hdr, spi); 182 offset_seq = offsetof(struct ip_esp_hdr, seq_no); 183 break; 184 case IPPROTO_COMP: 185 if (!pskb_may_pull(skb, sizeof(struct ip_comp_hdr))) 186 return -EINVAL; 187 *spi = htonl(ntohs(*(__be16 *)(skb_transport_header(skb) + 2))); 188 *seq = 0; 189 return 0; 190 default: 191 return 1; 192 } 193 194 if (!pskb_may_pull(skb, hlen)) 195 return -EINVAL; 196 197 *spi = *(__be32 *)(skb_transport_header(skb) + offset); 198 *seq = *(__be32 *)(skb_transport_header(skb) + offset_seq); 199 return 0; 200 } 201 EXPORT_SYMBOL(xfrm_parse_spi); 202 203 int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) 204 { 205 struct xfrm_mode *inner_mode = x->inner_mode; 206 int err; 207 208 err = x->outer_mode->afinfo->extract_input(x, skb); 209 if (err) 210 return err; 211 212 if (x->sel.family == AF_UNSPEC) { 213 inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); 214 if (inner_mode == NULL) 215 return -EAFNOSUPPORT; 216 } 217 218 skb->protocol = inner_mode->afinfo->eth_proto; 219 return inner_mode->input2(x, skb); 220 } 221 EXPORT_SYMBOL(xfrm_prepare_input); 222 223 int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) 224 { 225 struct net *net = dev_net(skb->dev); 226 int err; 227 __be32 seq; 228 __be32 seq_hi; 229 struct xfrm_state *x = NULL; 230 xfrm_address_t *daddr; 231 struct xfrm_mode *inner_mode; 232 u32 mark = skb->mark; 233 unsigned int family = AF_UNSPEC; 234 int decaps = 0; 235 int async = 0; 236 bool xfrm_gro = false; 237 bool crypto_done = false; 238 struct xfrm_offload *xo = xfrm_offload(skb); 239 240 if (encap_type < 0) { 241 x = xfrm_input_state(skb); 242 243 if (unlikely(x->km.state != XFRM_STATE_VALID)) { 244 if (x->km.state == XFRM_STATE_ACQ) 245 XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR); 246 else 247 XFRM_INC_STATS(net, 248 LINUX_MIB_XFRMINSTATEINVALID); 249 goto drop; 250 } 251 252 family = x->outer_mode->afinfo->family; 253 254 /* An encap_type of -1 indicates async resumption. */ 255 if (encap_type == -1) { 256 async = 1; 257 seq = XFRM_SKB_CB(skb)->seq.input.low; 258 goto resume; 259 } 260 261 /* encap_type < -1 indicates a GRO call. */ 262 encap_type = 0; 263 seq = XFRM_SPI_SKB_CB(skb)->seq; 264 265 if (xo && (xo->flags & CRYPTO_DONE)) { 266 crypto_done = true; 267 family = XFRM_SPI_SKB_CB(skb)->family; 268 269 if (!(xo->status & CRYPTO_SUCCESS)) { 270 if (xo->status & 271 (CRYPTO_TRANSPORT_AH_AUTH_FAILED | 272 CRYPTO_TRANSPORT_ESP_AUTH_FAILED | 273 CRYPTO_TUNNEL_AH_AUTH_FAILED | 274 CRYPTO_TUNNEL_ESP_AUTH_FAILED)) { 275 276 xfrm_audit_state_icvfail(x, skb, 277 x->type->proto); 278 x->stats.integrity_failed++; 279 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR); 280 goto drop; 281 } 282 283 if (xo->status & CRYPTO_INVALID_PROTOCOL) { 284 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR); 285 goto drop; 286 } 287 288 XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); 289 goto drop; 290 } 291 292 if ((err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) { 293 XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); 294 goto drop; 295 } 296 } 297 298 goto lock; 299 } 300 301 family = XFRM_SPI_SKB_CB(skb)->family; 302 303 /* if tunnel is present override skb->mark value with tunnel i_key */ 304 switch (family) { 305 case AF_INET: 306 if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4) 307 mark = be32_to_cpu(XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4->parms.i_key); 308 break; 309 case AF_INET6: 310 if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6) 311 mark = be32_to_cpu(XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6->parms.i_key); 312 break; 313 } 314 315 err = secpath_set(skb); 316 if (err) { 317 XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR); 318 goto drop; 319 } 320 321 seq = 0; 322 if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) { 323 secpath_reset(skb); 324 XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); 325 goto drop; 326 } 327 328 daddr = (xfrm_address_t *)(skb_network_header(skb) + 329 XFRM_SPI_SKB_CB(skb)->daddroff); 330 do { 331 if (skb->sp->len == XFRM_MAX_DEPTH) { 332 secpath_reset(skb); 333 XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); 334 goto drop; 335 } 336 337 x = xfrm_state_lookup(net, mark, daddr, spi, nexthdr, family); 338 if (x == NULL) { 339 secpath_reset(skb); 340 XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES); 341 xfrm_audit_state_notfound(skb, family, spi, seq); 342 goto drop; 343 } 344 345 skb->mark = xfrm_smark_get(skb->mark, x); 346 347 skb->sp->xvec[skb->sp->len++] = x; 348 349 skb_dst_force(skb); 350 if (!skb_dst(skb)) { 351 XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR); 352 goto drop; 353 } 354 355 lock: 356 spin_lock(&x->lock); 357 358 if (unlikely(x->km.state != XFRM_STATE_VALID)) { 359 if (x->km.state == XFRM_STATE_ACQ) 360 XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR); 361 else 362 XFRM_INC_STATS(net, 363 LINUX_MIB_XFRMINSTATEINVALID); 364 goto drop_unlock; 365 } 366 367 if ((x->encap ? x->encap->encap_type : 0) != encap_type) { 368 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH); 369 goto drop_unlock; 370 } 371 372 if (x->repl->check(x, skb, seq)) { 373 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR); 374 goto drop_unlock; 375 } 376 377 if (xfrm_state_check_expire(x)) { 378 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEEXPIRED); 379 goto drop_unlock; 380 } 381 382 spin_unlock(&x->lock); 383 384 if (xfrm_tunnel_check(skb, x, family)) { 385 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); 386 goto drop; 387 } 388 389 seq_hi = htonl(xfrm_replay_seqhi(x, seq)); 390 391 XFRM_SKB_CB(skb)->seq.input.low = seq; 392 XFRM_SKB_CB(skb)->seq.input.hi = seq_hi; 393 394 dev_hold(skb->dev); 395 396 if (crypto_done) 397 nexthdr = x->type_offload->input_tail(x, skb); 398 else 399 nexthdr = x->type->input(x, skb); 400 401 if (nexthdr == -EINPROGRESS) 402 return 0; 403 resume: 404 dev_put(skb->dev); 405 406 spin_lock(&x->lock); 407 if (nexthdr <= 0) { 408 if (nexthdr == -EBADMSG) { 409 xfrm_audit_state_icvfail(x, skb, 410 x->type->proto); 411 x->stats.integrity_failed++; 412 } 413 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR); 414 goto drop_unlock; 415 } 416 417 /* only the first xfrm gets the encap type */ 418 encap_type = 0; 419 420 if (async && x->repl->recheck(x, skb, seq)) { 421 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR); 422 goto drop_unlock; 423 } 424 425 x->repl->advance(x, seq); 426 427 x->curlft.bytes += skb->len; 428 x->curlft.packets++; 429 430 spin_unlock(&x->lock); 431 432 XFRM_MODE_SKB_CB(skb)->protocol = nexthdr; 433 434 inner_mode = x->inner_mode; 435 436 if (x->sel.family == AF_UNSPEC) { 437 inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); 438 if (inner_mode == NULL) { 439 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); 440 goto drop; 441 } 442 } 443 444 if (inner_mode->input(x, skb)) { 445 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); 446 goto drop; 447 } 448 449 if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) { 450 decaps = 1; 451 break; 452 } 453 454 /* 455 * We need the inner address. However, we only get here for 456 * transport mode so the outer address is identical. 457 */ 458 daddr = &x->id.daddr; 459 family = x->outer_mode->afinfo->family; 460 461 err = xfrm_parse_spi(skb, nexthdr, &spi, &seq); 462 if (err < 0) { 463 XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); 464 goto drop; 465 } 466 crypto_done = false; 467 } while (!err); 468 469 err = xfrm_rcv_cb(skb, family, x->type->proto, 0); 470 if (err) 471 goto drop; 472 473 nf_reset(skb); 474 475 if (decaps) { 476 if (skb->sp) 477 skb->sp->olen = 0; 478 skb_dst_drop(skb); 479 gro_cells_receive(&gro_cells, skb); 480 return 0; 481 } else { 482 xo = xfrm_offload(skb); 483 if (xo) 484 xfrm_gro = xo->flags & XFRM_GRO; 485 486 err = x->inner_mode->afinfo->transport_finish(skb, xfrm_gro || async); 487 if (xfrm_gro) { 488 if (skb->sp) 489 skb->sp->olen = 0; 490 skb_dst_drop(skb); 491 gro_cells_receive(&gro_cells, skb); 492 return err; 493 } 494 495 return err; 496 } 497 498 drop_unlock: 499 spin_unlock(&x->lock); 500 drop: 501 xfrm_rcv_cb(skb, family, x && x->type ? x->type->proto : nexthdr, -1); 502 kfree_skb(skb); 503 return 0; 504 } 505 EXPORT_SYMBOL(xfrm_input); 506 507 int xfrm_input_resume(struct sk_buff *skb, int nexthdr) 508 { 509 return xfrm_input(skb, nexthdr, 0, -1); 510 } 511 EXPORT_SYMBOL(xfrm_input_resume); 512 513 static void xfrm_trans_reinject(unsigned long data) 514 { 515 struct xfrm_trans_tasklet *trans = (void *)data; 516 struct sk_buff_head queue; 517 struct sk_buff *skb; 518 519 __skb_queue_head_init(&queue); 520 skb_queue_splice_init(&trans->queue, &queue); 521 522 while ((skb = __skb_dequeue(&queue))) 523 XFRM_TRANS_SKB_CB(skb)->finish(dev_net(skb->dev), NULL, skb); 524 } 525 526 int xfrm_trans_queue(struct sk_buff *skb, 527 int (*finish)(struct net *, struct sock *, 528 struct sk_buff *)) 529 { 530 struct xfrm_trans_tasklet *trans; 531 532 trans = this_cpu_ptr(&xfrm_trans_tasklet); 533 534 if (skb_queue_len(&trans->queue) >= netdev_max_backlog) 535 return -ENOBUFS; 536 537 XFRM_TRANS_SKB_CB(skb)->finish = finish; 538 __skb_queue_tail(&trans->queue, skb); 539 tasklet_schedule(&trans->tasklet); 540 return 0; 541 } 542 EXPORT_SYMBOL(xfrm_trans_queue); 543 544 void __init xfrm_input_init(void) 545 { 546 int err; 547 int i; 548 549 init_dummy_netdev(&xfrm_napi_dev); 550 err = gro_cells_init(&gro_cells, &xfrm_napi_dev); 551 if (err) 552 gro_cells.cells = NULL; 553 554 secpath_cachep = kmem_cache_create("secpath_cache", 555 sizeof(struct sec_path), 556 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, 557 NULL); 558 559 for_each_possible_cpu(i) { 560 struct xfrm_trans_tasklet *trans; 561 562 trans = &per_cpu(xfrm_trans_tasklet, i); 563 __skb_queue_head_init(&trans->queue); 564 tasklet_init(&trans->tasklet, xfrm_trans_reinject, 565 (unsigned long)trans); 566 } 567 } 568