1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * xfrm_input.c 4 * 5 * Changes: 6 * YOSHIFUJI Hideaki @USAGI 7 * Split up af-specific portion 8 * 9 */ 10 11 #include <linux/bottom_half.h> 12 #include <linux/interrupt.h> 13 #include <linux/slab.h> 14 #include <linux/module.h> 15 #include <linux/netdevice.h> 16 #include <linux/percpu.h> 17 #include <net/dst.h> 18 #include <net/ip.h> 19 #include <net/xfrm.h> 20 #include <net/ip_tunnels.h> 21 #include <net/ip6_tunnel.h> 22 23 struct xfrm_trans_tasklet { 24 struct tasklet_struct tasklet; 25 struct sk_buff_head queue; 26 }; 27 28 struct xfrm_trans_cb { 29 union { 30 struct inet_skb_parm h4; 31 #if IS_ENABLED(CONFIG_IPV6) 32 struct inet6_skb_parm h6; 33 #endif 34 } header; 35 int (*finish)(struct net *net, struct sock *sk, struct sk_buff *skb); 36 }; 37 38 #define XFRM_TRANS_SKB_CB(__skb) ((struct xfrm_trans_cb *)&((__skb)->cb[0])) 39 40 static struct kmem_cache *secpath_cachep __read_mostly; 41 42 static DEFINE_SPINLOCK(xfrm_input_afinfo_lock); 43 static struct xfrm_input_afinfo const __rcu *xfrm_input_afinfo[AF_INET6 + 1]; 44 45 static struct gro_cells gro_cells; 46 static struct net_device xfrm_napi_dev; 47 48 static DEFINE_PER_CPU(struct xfrm_trans_tasklet, xfrm_trans_tasklet); 49 50 int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo) 51 { 52 int err = 0; 53 54 if (WARN_ON(afinfo->family >= ARRAY_SIZE(xfrm_input_afinfo))) 55 return -EAFNOSUPPORT; 56 57 spin_lock_bh(&xfrm_input_afinfo_lock); 58 if (unlikely(xfrm_input_afinfo[afinfo->family] != NULL)) 59 err = -EEXIST; 60 else 61 rcu_assign_pointer(xfrm_input_afinfo[afinfo->family], afinfo); 62 spin_unlock_bh(&xfrm_input_afinfo_lock); 63 return err; 64 } 65 EXPORT_SYMBOL(xfrm_input_register_afinfo); 66 67 int xfrm_input_unregister_afinfo(const struct xfrm_input_afinfo *afinfo) 68 { 69 int err = 0; 70 71 spin_lock_bh(&xfrm_input_afinfo_lock); 72 if (likely(xfrm_input_afinfo[afinfo->family] != NULL)) { 73 if (unlikely(xfrm_input_afinfo[afinfo->family] != afinfo)) 74 err = -EINVAL; 75 else 76 RCU_INIT_POINTER(xfrm_input_afinfo[afinfo->family], NULL); 77 } 78 spin_unlock_bh(&xfrm_input_afinfo_lock); 79 synchronize_rcu(); 80 return err; 81 } 82 EXPORT_SYMBOL(xfrm_input_unregister_afinfo); 83 84 static const struct xfrm_input_afinfo *xfrm_input_get_afinfo(unsigned int family) 85 { 86 const struct xfrm_input_afinfo *afinfo; 87 88 if (WARN_ON_ONCE(family >= ARRAY_SIZE(xfrm_input_afinfo))) 89 return NULL; 90 91 rcu_read_lock(); 92 afinfo = rcu_dereference(xfrm_input_afinfo[family]); 93 if (unlikely(!afinfo)) 94 rcu_read_unlock(); 95 return afinfo; 96 } 97 98 static int xfrm_rcv_cb(struct sk_buff *skb, unsigned int family, u8 protocol, 99 int err) 100 { 101 int ret; 102 const struct xfrm_input_afinfo *afinfo = xfrm_input_get_afinfo(family); 103 104 if (!afinfo) 105 return -EAFNOSUPPORT; 106 107 ret = afinfo->callback(skb, protocol, err); 108 rcu_read_unlock(); 109 110 return ret; 111 } 112 113 void __secpath_destroy(struct sec_path *sp) 114 { 115 int i; 116 for (i = 0; i < sp->len; i++) 117 xfrm_state_put(sp->xvec[i]); 118 kmem_cache_free(secpath_cachep, sp); 119 } 120 EXPORT_SYMBOL(__secpath_destroy); 121 122 struct sec_path *secpath_dup(struct sec_path *src) 123 { 124 struct sec_path *sp; 125 126 sp = kmem_cache_alloc(secpath_cachep, GFP_ATOMIC); 127 if (!sp) 128 return NULL; 129 130 sp->len = 0; 131 sp->olen = 0; 132 133 memset(sp->ovec, 0, sizeof(sp->ovec[XFRM_MAX_OFFLOAD_DEPTH])); 134 135 if (src) { 136 int i; 137 138 memcpy(sp, src, sizeof(*sp)); 139 for (i = 0; i < sp->len; i++) 140 xfrm_state_hold(sp->xvec[i]); 141 } 142 refcount_set(&sp->refcnt, 1); 143 return sp; 144 } 145 EXPORT_SYMBOL(secpath_dup); 146 147 int secpath_set(struct sk_buff *skb) 148 { 149 struct sec_path *sp; 150 151 /* Allocate new secpath or COW existing one. */ 152 if (!skb->sp || refcount_read(&skb->sp->refcnt) != 1) { 153 sp = secpath_dup(skb->sp); 154 if (!sp) 155 return -ENOMEM; 156 157 if (skb->sp) 158 secpath_put(skb->sp); 159 skb->sp = sp; 160 } 161 return 0; 162 } 163 EXPORT_SYMBOL(secpath_set); 164 165 /* Fetch spi and seq from ipsec header */ 166 167 int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq) 168 { 169 int offset, offset_seq; 170 int hlen; 171 172 switch (nexthdr) { 173 case IPPROTO_AH: 174 hlen = sizeof(struct ip_auth_hdr); 175 offset = offsetof(struct ip_auth_hdr, spi); 176 offset_seq = offsetof(struct ip_auth_hdr, seq_no); 177 break; 178 case IPPROTO_ESP: 179 hlen = sizeof(struct ip_esp_hdr); 180 offset = offsetof(struct ip_esp_hdr, spi); 181 offset_seq = offsetof(struct ip_esp_hdr, seq_no); 182 break; 183 case IPPROTO_COMP: 184 if (!pskb_may_pull(skb, sizeof(struct ip_comp_hdr))) 185 return -EINVAL; 186 *spi = htonl(ntohs(*(__be16 *)(skb_transport_header(skb) + 2))); 187 *seq = 0; 188 return 0; 189 default: 190 return 1; 191 } 192 193 if (!pskb_may_pull(skb, hlen)) 194 return -EINVAL; 195 196 *spi = *(__be32 *)(skb_transport_header(skb) + offset); 197 *seq = *(__be32 *)(skb_transport_header(skb) + offset_seq); 198 return 0; 199 } 200 EXPORT_SYMBOL(xfrm_parse_spi); 201 202 int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) 203 { 204 struct xfrm_mode *inner_mode = x->inner_mode; 205 int err; 206 207 err = x->outer_mode->afinfo->extract_input(x, skb); 208 if (err) 209 return err; 210 211 if (x->sel.family == AF_UNSPEC) { 212 inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); 213 if (inner_mode == NULL) 214 return -EAFNOSUPPORT; 215 } 216 217 skb->protocol = inner_mode->afinfo->eth_proto; 218 return inner_mode->input2(x, skb); 219 } 220 EXPORT_SYMBOL(xfrm_prepare_input); 221 222 int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) 223 { 224 struct net *net = dev_net(skb->dev); 225 int err; 226 __be32 seq; 227 __be32 seq_hi; 228 struct xfrm_state *x = NULL; 229 xfrm_address_t *daddr; 230 struct xfrm_mode *inner_mode; 231 u32 mark = skb->mark; 232 unsigned int family = AF_UNSPEC; 233 int decaps = 0; 234 int async = 0; 235 bool xfrm_gro = false; 236 bool crypto_done = false; 237 struct xfrm_offload *xo = xfrm_offload(skb); 238 239 if (encap_type < 0) { 240 x = xfrm_input_state(skb); 241 242 if (unlikely(x->km.state != XFRM_STATE_VALID)) { 243 if (x->km.state == XFRM_STATE_ACQ) 244 XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR); 245 else 246 XFRM_INC_STATS(net, 247 LINUX_MIB_XFRMINSTATEINVALID); 248 goto drop; 249 } 250 251 family = x->outer_mode->afinfo->family; 252 253 /* An encap_type of -1 indicates async resumption. */ 254 if (encap_type == -1) { 255 async = 1; 256 seq = XFRM_SKB_CB(skb)->seq.input.low; 257 goto resume; 258 } 259 260 /* encap_type < -1 indicates a GRO call. */ 261 encap_type = 0; 262 seq = XFRM_SPI_SKB_CB(skb)->seq; 263 264 if (xo && (xo->flags & CRYPTO_DONE)) { 265 crypto_done = true; 266 family = XFRM_SPI_SKB_CB(skb)->family; 267 268 if (!(xo->status & CRYPTO_SUCCESS)) { 269 if (xo->status & 270 (CRYPTO_TRANSPORT_AH_AUTH_FAILED | 271 CRYPTO_TRANSPORT_ESP_AUTH_FAILED | 272 CRYPTO_TUNNEL_AH_AUTH_FAILED | 273 CRYPTO_TUNNEL_ESP_AUTH_FAILED)) { 274 275 xfrm_audit_state_icvfail(x, skb, 276 x->type->proto); 277 x->stats.integrity_failed++; 278 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR); 279 goto drop; 280 } 281 282 if (xo->status & CRYPTO_INVALID_PROTOCOL) { 283 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR); 284 goto drop; 285 } 286 287 XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); 288 goto drop; 289 } 290 291 if ((err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) { 292 XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); 293 goto drop; 294 } 295 } 296 297 goto lock; 298 } 299 300 family = XFRM_SPI_SKB_CB(skb)->family; 301 302 /* if tunnel is present override skb->mark value with tunnel i_key */ 303 switch (family) { 304 case AF_INET: 305 if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4) 306 mark = be32_to_cpu(XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4->parms.i_key); 307 break; 308 case AF_INET6: 309 if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6) 310 mark = be32_to_cpu(XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6->parms.i_key); 311 break; 312 } 313 314 err = secpath_set(skb); 315 if (err) { 316 XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR); 317 goto drop; 318 } 319 320 seq = 0; 321 if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) { 322 XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); 323 goto drop; 324 } 325 326 daddr = (xfrm_address_t *)(skb_network_header(skb) + 327 XFRM_SPI_SKB_CB(skb)->daddroff); 328 do { 329 if (skb->sp->len == XFRM_MAX_DEPTH) { 330 XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); 331 goto drop; 332 } 333 334 x = xfrm_state_lookup(net, mark, daddr, spi, nexthdr, family); 335 if (x == NULL) { 336 XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES); 337 xfrm_audit_state_notfound(skb, family, spi, seq); 338 goto drop; 339 } 340 341 skb->sp->xvec[skb->sp->len++] = x; 342 343 lock: 344 spin_lock(&x->lock); 345 346 if (unlikely(x->km.state != XFRM_STATE_VALID)) { 347 if (x->km.state == XFRM_STATE_ACQ) 348 XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR); 349 else 350 XFRM_INC_STATS(net, 351 LINUX_MIB_XFRMINSTATEINVALID); 352 goto drop_unlock; 353 } 354 355 if ((x->encap ? x->encap->encap_type : 0) != encap_type) { 356 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH); 357 goto drop_unlock; 358 } 359 360 if (x->repl->check(x, skb, seq)) { 361 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR); 362 goto drop_unlock; 363 } 364 365 if (xfrm_state_check_expire(x)) { 366 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEEXPIRED); 367 goto drop_unlock; 368 } 369 370 spin_unlock(&x->lock); 371 372 if (xfrm_tunnel_check(skb, x, family)) { 373 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); 374 goto drop; 375 } 376 377 seq_hi = htonl(xfrm_replay_seqhi(x, seq)); 378 379 XFRM_SKB_CB(skb)->seq.input.low = seq; 380 XFRM_SKB_CB(skb)->seq.input.hi = seq_hi; 381 382 skb_dst_force(skb); 383 dev_hold(skb->dev); 384 385 if (crypto_done) 386 nexthdr = x->type_offload->input_tail(x, skb); 387 else 388 nexthdr = x->type->input(x, skb); 389 390 if (nexthdr == -EINPROGRESS) 391 return 0; 392 resume: 393 dev_put(skb->dev); 394 395 spin_lock(&x->lock); 396 if (nexthdr <= 0) { 397 if (nexthdr == -EBADMSG) { 398 xfrm_audit_state_icvfail(x, skb, 399 x->type->proto); 400 x->stats.integrity_failed++; 401 } 402 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR); 403 goto drop_unlock; 404 } 405 406 /* only the first xfrm gets the encap type */ 407 encap_type = 0; 408 409 if (async && x->repl->recheck(x, skb, seq)) { 410 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR); 411 goto drop_unlock; 412 } 413 414 x->repl->advance(x, seq); 415 416 x->curlft.bytes += skb->len; 417 x->curlft.packets++; 418 419 spin_unlock(&x->lock); 420 421 XFRM_MODE_SKB_CB(skb)->protocol = nexthdr; 422 423 inner_mode = x->inner_mode; 424 425 if (x->sel.family == AF_UNSPEC) { 426 inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); 427 if (inner_mode == NULL) { 428 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); 429 goto drop; 430 } 431 } 432 433 if (inner_mode->input(x, skb)) { 434 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); 435 goto drop; 436 } 437 438 if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) { 439 decaps = 1; 440 break; 441 } 442 443 /* 444 * We need the inner address. However, we only get here for 445 * transport mode so the outer address is identical. 446 */ 447 daddr = &x->id.daddr; 448 family = x->outer_mode->afinfo->family; 449 450 err = xfrm_parse_spi(skb, nexthdr, &spi, &seq); 451 if (err < 0) { 452 XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); 453 goto drop; 454 } 455 } while (!err); 456 457 err = xfrm_rcv_cb(skb, family, x->type->proto, 0); 458 if (err) 459 goto drop; 460 461 nf_reset(skb); 462 463 if (decaps) { 464 if (skb->sp) 465 skb->sp->olen = 0; 466 skb_dst_drop(skb); 467 gro_cells_receive(&gro_cells, skb); 468 return 0; 469 } else { 470 xo = xfrm_offload(skb); 471 if (xo) 472 xfrm_gro = xo->flags & XFRM_GRO; 473 474 err = x->inner_mode->afinfo->transport_finish(skb, xfrm_gro || async); 475 if (xfrm_gro) { 476 if (skb->sp) 477 skb->sp->olen = 0; 478 skb_dst_drop(skb); 479 gro_cells_receive(&gro_cells, skb); 480 return err; 481 } 482 483 return err; 484 } 485 486 drop_unlock: 487 spin_unlock(&x->lock); 488 drop: 489 xfrm_rcv_cb(skb, family, x && x->type ? x->type->proto : nexthdr, -1); 490 kfree_skb(skb); 491 return 0; 492 } 493 EXPORT_SYMBOL(xfrm_input); 494 495 int xfrm_input_resume(struct sk_buff *skb, int nexthdr) 496 { 497 return xfrm_input(skb, nexthdr, 0, -1); 498 } 499 EXPORT_SYMBOL(xfrm_input_resume); 500 501 static void xfrm_trans_reinject(unsigned long data) 502 { 503 struct xfrm_trans_tasklet *trans = (void *)data; 504 struct sk_buff_head queue; 505 struct sk_buff *skb; 506 507 __skb_queue_head_init(&queue); 508 skb_queue_splice_init(&trans->queue, &queue); 509 510 while ((skb = __skb_dequeue(&queue))) 511 XFRM_TRANS_SKB_CB(skb)->finish(dev_net(skb->dev), NULL, skb); 512 } 513 514 int xfrm_trans_queue(struct sk_buff *skb, 515 int (*finish)(struct net *, struct sock *, 516 struct sk_buff *)) 517 { 518 struct xfrm_trans_tasklet *trans; 519 520 trans = this_cpu_ptr(&xfrm_trans_tasklet); 521 522 if (skb_queue_len(&trans->queue) >= netdev_max_backlog) 523 return -ENOBUFS; 524 525 XFRM_TRANS_SKB_CB(skb)->finish = finish; 526 __skb_queue_tail(&trans->queue, skb); 527 tasklet_schedule(&trans->tasklet); 528 return 0; 529 } 530 EXPORT_SYMBOL(xfrm_trans_queue); 531 532 void __init xfrm_input_init(void) 533 { 534 int err; 535 int i; 536 537 init_dummy_netdev(&xfrm_napi_dev); 538 err = gro_cells_init(&gro_cells, &xfrm_napi_dev); 539 if (err) 540 gro_cells.cells = NULL; 541 542 secpath_cachep = kmem_cache_create("secpath_cache", 543 sizeof(struct sec_path), 544 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, 545 NULL); 546 547 for_each_possible_cpu(i) { 548 struct xfrm_trans_tasklet *trans; 549 550 trans = &per_cpu(xfrm_trans_tasklet, i); 551 __skb_queue_head_init(&trans->queue); 552 tasklet_init(&trans->tasklet, xfrm_trans_reinject, 553 (unsigned long)trans); 554 } 555 } 556