1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * xfrm_output.c - Common IPsec encapsulation code. 4 * 5 * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au> 6 */ 7 8 #include <linux/errno.h> 9 #include <linux/module.h> 10 #include <linux/netdevice.h> 11 #include <linux/netfilter.h> 12 #include <linux/skbuff.h> 13 #include <linux/slab.h> 14 #include <linux/spinlock.h> 15 #include <net/dst.h> 16 #include <net/inet_ecn.h> 17 #include <net/xfrm.h> 18 19 #include "xfrm_inout.h" 20 21 static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb); 22 static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb); 23 24 static int xfrm_skb_check_space(struct sk_buff *skb) 25 { 26 struct dst_entry *dst = skb_dst(skb); 27 int nhead = dst->header_len + LL_RESERVED_SPACE(dst->dev) 28 - skb_headroom(skb); 29 int ntail = dst->dev->needed_tailroom - skb_tailroom(skb); 30 31 if (nhead <= 0) { 32 if (ntail <= 0) 33 return 0; 34 nhead = 0; 35 } else if (ntail < 0) 36 ntail = 0; 37 38 return pskb_expand_head(skb, nhead, ntail, GFP_ATOMIC); 39 } 40 41 /* Children define the path of the packet through the 42 * Linux networking. Thus, destinations are stackable. 43 */ 44 45 static struct dst_entry *skb_dst_pop(struct sk_buff *skb) 46 { 47 struct dst_entry *child = dst_clone(xfrm_dst_child(skb_dst(skb))); 48 49 skb_dst_drop(skb); 50 return child; 51 } 52 53 /* Add encapsulation header. 54 * 55 * The IP header will be moved forward to make space for the encapsulation 56 * header. 57 */ 58 static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) 59 { 60 struct iphdr *iph = ip_hdr(skb); 61 int ihl = iph->ihl * 4; 62 63 skb_set_inner_transport_header(skb, skb_transport_offset(skb)); 64 65 skb_set_network_header(skb, -x->props.header_len); 66 skb->mac_header = skb->network_header + 67 offsetof(struct iphdr, protocol); 68 skb->transport_header = skb->network_header + ihl; 69 __skb_pull(skb, ihl); 70 memmove(skb_network_header(skb), iph, ihl); 71 return 0; 72 } 73 74 /* Add encapsulation header. 75 * 76 * The IP header and mutable extension headers will be moved forward to make 77 * space for the encapsulation header. 78 */ 79 static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) 80 { 81 #if IS_ENABLED(CONFIG_IPV6) 82 struct ipv6hdr *iph; 83 u8 *prevhdr; 84 int hdr_len; 85 86 iph = ipv6_hdr(skb); 87 skb_set_inner_transport_header(skb, skb_transport_offset(skb)); 88 89 hdr_len = x->type->hdr_offset(x, skb, &prevhdr); 90 if (hdr_len < 0) 91 return hdr_len; 92 skb_set_mac_header(skb, 93 (prevhdr - x->props.header_len) - skb->data); 94 skb_set_network_header(skb, -x->props.header_len); 95 skb->transport_header = skb->network_header + hdr_len; 96 __skb_pull(skb, hdr_len); 97 memmove(ipv6_hdr(skb), iph, hdr_len); 98 return 0; 99 #else 100 WARN_ON_ONCE(1); 101 return -EAFNOSUPPORT; 102 #endif 103 } 104 105 /* Add route optimization header space. 106 * 107 * The IP header and mutable extension headers will be moved forward to make 108 * space for the route optimization header. 109 */ 110 static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb) 111 { 112 #if IS_ENABLED(CONFIG_IPV6) 113 struct ipv6hdr *iph; 114 u8 *prevhdr; 115 int hdr_len; 116 117 iph = ipv6_hdr(skb); 118 119 hdr_len = x->type->hdr_offset(x, skb, &prevhdr); 120 if (hdr_len < 0) 121 return hdr_len; 122 skb_set_mac_header(skb, 123 (prevhdr - x->props.header_len) - skb->data); 124 skb_set_network_header(skb, -x->props.header_len); 125 skb->transport_header = skb->network_header + hdr_len; 126 __skb_pull(skb, hdr_len); 127 memmove(ipv6_hdr(skb), iph, hdr_len); 128 129 x->lastused = ktime_get_real_seconds(); 130 131 return 0; 132 #else 133 WARN_ON_ONCE(1); 134 return -EAFNOSUPPORT; 135 #endif 136 } 137 138 /* Add encapsulation header. 139 * 140 * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt. 141 */ 142 static int xfrm4_beet_encap_add(struct xfrm_state *x, struct sk_buff *skb) 143 { 144 struct ip_beet_phdr *ph; 145 struct iphdr *top_iph; 146 int hdrlen, optlen; 147 148 hdrlen = 0; 149 optlen = XFRM_MODE_SKB_CB(skb)->optlen; 150 if (unlikely(optlen)) 151 hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4); 152 153 skb_set_network_header(skb, -x->props.header_len - hdrlen + 154 (XFRM_MODE_SKB_CB(skb)->ihl - sizeof(*top_iph))); 155 if (x->sel.family != AF_INET6) 156 skb->network_header += IPV4_BEET_PHMAXLEN; 157 skb->mac_header = skb->network_header + 158 offsetof(struct iphdr, protocol); 159 skb->transport_header = skb->network_header + sizeof(*top_iph); 160 161 xfrm4_beet_make_header(skb); 162 163 ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdrlen); 164 165 top_iph = ip_hdr(skb); 166 167 if (unlikely(optlen)) { 168 if (WARN_ON(optlen < 0)) 169 return -EINVAL; 170 171 ph->padlen = 4 - (optlen & 4); 172 ph->hdrlen = optlen / 8; 173 ph->nexthdr = top_iph->protocol; 174 if (ph->padlen) 175 memset(ph + 1, IPOPT_NOP, ph->padlen); 176 177 top_iph->protocol = IPPROTO_BEETPH; 178 top_iph->ihl = sizeof(struct iphdr) / 4; 179 } 180 181 top_iph->saddr = x->props.saddr.a4; 182 top_iph->daddr = x->id.daddr.a4; 183 184 return 0; 185 } 186 187 /* Add encapsulation header. 188 * 189 * The top IP header will be constructed per RFC 2401. 190 */ 191 static int xfrm4_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb) 192 { 193 struct dst_entry *dst = skb_dst(skb); 194 struct iphdr *top_iph; 195 int flags; 196 197 skb_set_inner_network_header(skb, skb_network_offset(skb)); 198 skb_set_inner_transport_header(skb, skb_transport_offset(skb)); 199 200 skb_set_network_header(skb, -x->props.header_len); 201 skb->mac_header = skb->network_header + 202 offsetof(struct iphdr, protocol); 203 skb->transport_header = skb->network_header + sizeof(*top_iph); 204 top_iph = ip_hdr(skb); 205 206 top_iph->ihl = 5; 207 top_iph->version = 4; 208 209 top_iph->protocol = xfrm_af2proto(skb_dst(skb)->ops->family); 210 211 /* DS disclosing depends on XFRM_SA_XFLAG_DONT_ENCAP_DSCP */ 212 if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP) 213 top_iph->tos = 0; 214 else 215 top_iph->tos = XFRM_MODE_SKB_CB(skb)->tos; 216 top_iph->tos = INET_ECN_encapsulate(top_iph->tos, 217 XFRM_MODE_SKB_CB(skb)->tos); 218 219 flags = x->props.flags; 220 if (flags & XFRM_STATE_NOECN) 221 IP_ECN_clear(top_iph); 222 223 top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? 224 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); 225 226 top_iph->ttl = ip4_dst_hoplimit(xfrm_dst_child(dst)); 227 228 top_iph->saddr = x->props.saddr.a4; 229 top_iph->daddr = x->id.daddr.a4; 230 ip_select_ident(dev_net(dst->dev), skb, NULL); 231 232 return 0; 233 } 234 235 #if IS_ENABLED(CONFIG_IPV6) 236 static int xfrm6_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb) 237 { 238 struct dst_entry *dst = skb_dst(skb); 239 struct ipv6hdr *top_iph; 240 int dsfield; 241 242 skb_set_inner_network_header(skb, skb_network_offset(skb)); 243 skb_set_inner_transport_header(skb, skb_transport_offset(skb)); 244 245 skb_set_network_header(skb, -x->props.header_len); 246 skb->mac_header = skb->network_header + 247 offsetof(struct ipv6hdr, nexthdr); 248 skb->transport_header = skb->network_header + sizeof(*top_iph); 249 top_iph = ipv6_hdr(skb); 250 251 top_iph->version = 6; 252 253 memcpy(top_iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl, 254 sizeof(top_iph->flow_lbl)); 255 top_iph->nexthdr = xfrm_af2proto(skb_dst(skb)->ops->family); 256 257 if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP) 258 dsfield = 0; 259 else 260 dsfield = XFRM_MODE_SKB_CB(skb)->tos; 261 dsfield = INET_ECN_encapsulate(dsfield, XFRM_MODE_SKB_CB(skb)->tos); 262 if (x->props.flags & XFRM_STATE_NOECN) 263 dsfield &= ~INET_ECN_MASK; 264 ipv6_change_dsfield(top_iph, 0, dsfield); 265 top_iph->hop_limit = ip6_dst_hoplimit(xfrm_dst_child(dst)); 266 top_iph->saddr = *(struct in6_addr *)&x->props.saddr; 267 top_iph->daddr = *(struct in6_addr *)&x->id.daddr; 268 return 0; 269 } 270 271 static int xfrm6_beet_encap_add(struct xfrm_state *x, struct sk_buff *skb) 272 { 273 struct ipv6hdr *top_iph; 274 struct ip_beet_phdr *ph; 275 int optlen, hdr_len; 276 277 hdr_len = 0; 278 optlen = XFRM_MODE_SKB_CB(skb)->optlen; 279 if (unlikely(optlen)) 280 hdr_len += IPV4_BEET_PHMAXLEN - (optlen & 4); 281 282 skb_set_network_header(skb, -x->props.header_len - hdr_len); 283 if (x->sel.family != AF_INET6) 284 skb->network_header += IPV4_BEET_PHMAXLEN; 285 skb->mac_header = skb->network_header + 286 offsetof(struct ipv6hdr, nexthdr); 287 skb->transport_header = skb->network_header + sizeof(*top_iph); 288 ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdr_len); 289 290 xfrm6_beet_make_header(skb); 291 292 top_iph = ipv6_hdr(skb); 293 if (unlikely(optlen)) { 294 if (WARN_ON(optlen < 0)) 295 return -EINVAL; 296 297 ph->padlen = 4 - (optlen & 4); 298 ph->hdrlen = optlen / 8; 299 ph->nexthdr = top_iph->nexthdr; 300 if (ph->padlen) 301 memset(ph + 1, IPOPT_NOP, ph->padlen); 302 303 top_iph->nexthdr = IPPROTO_BEETPH; 304 } 305 306 top_iph->saddr = *(struct in6_addr *)&x->props.saddr; 307 top_iph->daddr = *(struct in6_addr *)&x->id.daddr; 308 return 0; 309 } 310 #endif 311 312 /* Add encapsulation header. 313 * 314 * On exit, the transport header will be set to the start of the 315 * encapsulation header to be filled in by x->type->output and the mac 316 * header will be set to the nextheader (protocol for IPv4) field of the 317 * extension header directly preceding the encapsulation header, or in 318 * its absence, that of the top IP header. 319 * The value of the network header will always point to the top IP header 320 * while skb->data will point to the payload. 321 */ 322 static int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb) 323 { 324 int err; 325 326 err = xfrm_inner_extract_output(x, skb); 327 if (err) 328 return err; 329 330 IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE; 331 skb->protocol = htons(ETH_P_IP); 332 333 switch (x->outer_mode.encap) { 334 case XFRM_MODE_BEET: 335 return xfrm4_beet_encap_add(x, skb); 336 case XFRM_MODE_TUNNEL: 337 return xfrm4_tunnel_encap_add(x, skb); 338 } 339 340 WARN_ON_ONCE(1); 341 return -EOPNOTSUPP; 342 } 343 344 static int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) 345 { 346 #if IS_ENABLED(CONFIG_IPV6) 347 int err; 348 349 err = xfrm_inner_extract_output(x, skb); 350 if (err) 351 return err; 352 353 skb->ignore_df = 1; 354 skb->protocol = htons(ETH_P_IPV6); 355 356 switch (x->outer_mode.encap) { 357 case XFRM_MODE_BEET: 358 return xfrm6_beet_encap_add(x, skb); 359 case XFRM_MODE_TUNNEL: 360 return xfrm6_tunnel_encap_add(x, skb); 361 default: 362 WARN_ON_ONCE(1); 363 return -EOPNOTSUPP; 364 } 365 #endif 366 WARN_ON_ONCE(1); 367 return -EAFNOSUPPORT; 368 } 369 370 static int xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb) 371 { 372 switch (x->outer_mode.encap) { 373 case XFRM_MODE_BEET: 374 case XFRM_MODE_TUNNEL: 375 if (x->outer_mode.family == AF_INET) 376 return xfrm4_prepare_output(x, skb); 377 if (x->outer_mode.family == AF_INET6) 378 return xfrm6_prepare_output(x, skb); 379 break; 380 case XFRM_MODE_TRANSPORT: 381 if (x->outer_mode.family == AF_INET) 382 return xfrm4_transport_output(x, skb); 383 if (x->outer_mode.family == AF_INET6) 384 return xfrm6_transport_output(x, skb); 385 break; 386 case XFRM_MODE_ROUTEOPTIMIZATION: 387 if (x->outer_mode.family == AF_INET6) 388 return xfrm6_ro_output(x, skb); 389 WARN_ON_ONCE(1); 390 break; 391 default: 392 WARN_ON_ONCE(1); 393 break; 394 } 395 396 return -EOPNOTSUPP; 397 } 398 399 #if IS_ENABLED(CONFIG_NET_PKTGEN) 400 int pktgen_xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb) 401 { 402 return xfrm_outer_mode_output(x, skb); 403 } 404 EXPORT_SYMBOL_GPL(pktgen_xfrm_outer_mode_output); 405 #endif 406 407 static int xfrm_output_one(struct sk_buff *skb, int err) 408 { 409 struct dst_entry *dst = skb_dst(skb); 410 struct xfrm_state *x = dst->xfrm; 411 struct net *net = xs_net(x); 412 413 if (err <= 0) 414 goto resume; 415 416 do { 417 err = xfrm_skb_check_space(skb); 418 if (err) { 419 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 420 goto error_nolock; 421 } 422 423 skb->mark = xfrm_smark_get(skb->mark, x); 424 425 err = xfrm_outer_mode_output(x, skb); 426 if (err) { 427 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEMODEERROR); 428 goto error_nolock; 429 } 430 431 spin_lock_bh(&x->lock); 432 433 if (unlikely(x->km.state != XFRM_STATE_VALID)) { 434 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEINVALID); 435 err = -EINVAL; 436 goto error; 437 } 438 439 err = xfrm_state_check_expire(x); 440 if (err) { 441 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEEXPIRED); 442 goto error; 443 } 444 445 err = x->repl->overflow(x, skb); 446 if (err) { 447 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATESEQERROR); 448 goto error; 449 } 450 451 x->curlft.bytes += skb->len; 452 x->curlft.packets++; 453 454 spin_unlock_bh(&x->lock); 455 456 skb_dst_force(skb); 457 if (!skb_dst(skb)) { 458 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 459 err = -EHOSTUNREACH; 460 goto error_nolock; 461 } 462 463 if (xfrm_offload(skb)) { 464 x->type_offload->encap(x, skb); 465 } else { 466 /* Inner headers are invalid now. */ 467 skb->encapsulation = 0; 468 469 err = x->type->output(x, skb); 470 if (err == -EINPROGRESS) 471 goto out; 472 } 473 474 resume: 475 if (err) { 476 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEPROTOERROR); 477 goto error_nolock; 478 } 479 480 dst = skb_dst_pop(skb); 481 if (!dst) { 482 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 483 err = -EHOSTUNREACH; 484 goto error_nolock; 485 } 486 skb_dst_set(skb, dst); 487 x = dst->xfrm; 488 } while (x && !(x->outer_mode.flags & XFRM_MODE_FLAG_TUNNEL)); 489 490 return 0; 491 492 error: 493 spin_unlock_bh(&x->lock); 494 error_nolock: 495 kfree_skb(skb); 496 out: 497 return err; 498 } 499 500 int xfrm_output_resume(struct sk_buff *skb, int err) 501 { 502 struct net *net = xs_net(skb_dst(skb)->xfrm); 503 504 while (likely((err = xfrm_output_one(skb, err)) == 0)) { 505 nf_reset_ct(skb); 506 507 err = skb_dst(skb)->ops->local_out(net, skb->sk, skb); 508 if (unlikely(err != 1)) 509 goto out; 510 511 if (!skb_dst(skb)->xfrm) 512 return dst_output(net, skb->sk, skb); 513 514 err = nf_hook(skb_dst(skb)->ops->family, 515 NF_INET_POST_ROUTING, net, skb->sk, skb, 516 NULL, skb_dst(skb)->dev, xfrm_output2); 517 if (unlikely(err != 1)) 518 goto out; 519 } 520 521 if (err == -EINPROGRESS) 522 err = 0; 523 524 out: 525 return err; 526 } 527 EXPORT_SYMBOL_GPL(xfrm_output_resume); 528 529 static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb) 530 { 531 return xfrm_output_resume(skb, 1); 532 } 533 534 static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb) 535 { 536 struct sk_buff *segs, *nskb; 537 538 BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_GSO_CB_OFFSET); 539 BUILD_BUG_ON(sizeof(*IP6CB(skb)) > SKB_GSO_CB_OFFSET); 540 segs = skb_gso_segment(skb, 0); 541 kfree_skb(skb); 542 if (IS_ERR(segs)) 543 return PTR_ERR(segs); 544 if (segs == NULL) 545 return -EINVAL; 546 547 skb_list_walk_safe(segs, segs, nskb) { 548 int err; 549 550 skb_mark_not_on_list(segs); 551 err = xfrm_output2(net, sk, segs); 552 553 if (unlikely(err)) { 554 kfree_skb_list(nskb); 555 return err; 556 } 557 } 558 559 return 0; 560 } 561 562 int xfrm_output(struct sock *sk, struct sk_buff *skb) 563 { 564 struct net *net = dev_net(skb_dst(skb)->dev); 565 struct xfrm_state *x = skb_dst(skb)->xfrm; 566 int err; 567 568 secpath_reset(skb); 569 570 if (xfrm_dev_offload_ok(skb, x)) { 571 struct sec_path *sp; 572 573 sp = secpath_set(skb); 574 if (!sp) { 575 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 576 kfree_skb(skb); 577 return -ENOMEM; 578 } 579 skb->encapsulation = 1; 580 581 sp->olen++; 582 sp->xvec[sp->len++] = x; 583 xfrm_state_hold(x); 584 585 if (skb_is_gso(skb)) { 586 if (skb->inner_protocol) 587 return xfrm_output_gso(net, sk, skb); 588 589 skb_shinfo(skb)->gso_type |= SKB_GSO_ESP; 590 goto out; 591 } 592 593 if (x->xso.dev && x->xso.dev->features & NETIF_F_HW_ESP_TX_CSUM) 594 goto out; 595 } else { 596 if (skb_is_gso(skb)) 597 return xfrm_output_gso(net, sk, skb); 598 } 599 600 if (skb->ip_summed == CHECKSUM_PARTIAL) { 601 err = skb_checksum_help(skb); 602 if (err) { 603 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 604 kfree_skb(skb); 605 return err; 606 } 607 } 608 609 out: 610 return xfrm_output2(net, sk, skb); 611 } 612 EXPORT_SYMBOL_GPL(xfrm_output); 613 614 static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) 615 { 616 const struct xfrm_state_afinfo *afinfo; 617 const struct xfrm_mode *inner_mode; 618 int err = -EAFNOSUPPORT; 619 620 if (x->sel.family == AF_UNSPEC) 621 inner_mode = xfrm_ip2inner_mode(x, 622 xfrm_af2proto(skb_dst(skb)->ops->family)); 623 else 624 inner_mode = &x->inner_mode; 625 626 if (inner_mode == NULL) 627 return -EAFNOSUPPORT; 628 629 rcu_read_lock(); 630 afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family); 631 if (likely(afinfo)) 632 err = afinfo->extract_output(x, skb); 633 rcu_read_unlock(); 634 635 return err; 636 } 637 638 void xfrm_local_error(struct sk_buff *skb, int mtu) 639 { 640 unsigned int proto; 641 struct xfrm_state_afinfo *afinfo; 642 643 if (skb->protocol == htons(ETH_P_IP)) 644 proto = AF_INET; 645 else if (skb->protocol == htons(ETH_P_IPV6) && 646 skb->sk->sk_family == AF_INET6) 647 proto = AF_INET6; 648 else 649 return; 650 651 afinfo = xfrm_state_get_afinfo(proto); 652 if (afinfo) { 653 afinfo->local_error(skb, mtu); 654 rcu_read_unlock(); 655 } 656 } 657 EXPORT_SYMBOL_GPL(xfrm_local_error); 658