1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * IPv6 IOAM Lightweight Tunnel implementation 4 * 5 * Author: 6 * Justin Iurman <justin.iurman@uliege.be> 7 */ 8 9 #include <linux/kernel.h> 10 #include <linux/skbuff.h> 11 #include <linux/net.h> 12 #include <linux/in6.h> 13 #include <linux/ioam6.h> 14 #include <linux/ioam6_iptunnel.h> 15 #include <net/dst.h> 16 #include <net/sock.h> 17 #include <net/lwtunnel.h> 18 #include <net/ioam6.h> 19 #include <net/netlink.h> 20 #include <net/ipv6.h> 21 #include <net/dst_cache.h> 22 #include <net/ip6_route.h> 23 #include <net/addrconf.h> 24 25 #define IOAM6_MASK_SHORT_FIELDS 0xff100000 26 #define IOAM6_MASK_WIDE_FIELDS 0xe00000 27 28 struct ioam6_lwt_encap { 29 struct ipv6_hopopt_hdr eh; 30 u8 pad[2]; /* 2-octet padding for 4n-alignment */ 31 struct ioam6_hdr ioamh; 32 struct ioam6_trace_hdr traceh; 33 } __packed; 34 35 struct ioam6_lwt { 36 struct dst_cache cache; 37 u8 mode; 38 struct in6_addr tundst; 39 struct ioam6_lwt_encap tuninfo; 40 }; 41 42 static struct ioam6_lwt *ioam6_lwt_state(struct lwtunnel_state *lwt) 43 { 44 return (struct ioam6_lwt *)lwt->data; 45 } 46 47 static struct ioam6_lwt_encap *ioam6_lwt_info(struct lwtunnel_state *lwt) 48 { 49 return &ioam6_lwt_state(lwt)->tuninfo; 50 } 51 52 static struct ioam6_trace_hdr *ioam6_lwt_trace(struct lwtunnel_state *lwt) 53 { 54 return &(ioam6_lwt_state(lwt)->tuninfo.traceh); 55 } 56 57 static const struct nla_policy ioam6_iptunnel_policy[IOAM6_IPTUNNEL_MAX + 1] = { 58 [IOAM6_IPTUNNEL_MODE] = NLA_POLICY_RANGE(NLA_U8, 59 IOAM6_IPTUNNEL_MODE_MIN, 60 IOAM6_IPTUNNEL_MODE_MAX), 61 [IOAM6_IPTUNNEL_DST] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)), 62 [IOAM6_IPTUNNEL_TRACE] = NLA_POLICY_EXACT_LEN(sizeof(struct ioam6_trace_hdr)), 63 }; 64 65 static bool ioam6_validate_trace_hdr(struct ioam6_trace_hdr *trace) 66 { 67 u32 fields; 68 69 if (!trace->type_be32 || !trace->remlen || 70 trace->remlen > IOAM6_TRACE_DATA_SIZE_MAX / 4 || 71 trace->type.bit12 | trace->type.bit13 | trace->type.bit14 | 72 trace->type.bit15 | trace->type.bit16 | trace->type.bit17 | 73 trace->type.bit18 | trace->type.bit19 | trace->type.bit20 | 74 trace->type.bit21) 75 return false; 76 77 trace->nodelen = 0; 78 fields = be32_to_cpu(trace->type_be32); 79 80 trace->nodelen += hweight32(fields & IOAM6_MASK_SHORT_FIELDS) 81 * (sizeof(__be32) / 4); 82 trace->nodelen += hweight32(fields & IOAM6_MASK_WIDE_FIELDS) 83 * (sizeof(__be64) / 4); 84 85 return true; 86 } 87 88 static int ioam6_build_state(struct net *net, struct nlattr *nla, 89 unsigned int family, const void *cfg, 90 struct lwtunnel_state **ts, 91 struct netlink_ext_ack *extack) 92 { 93 struct nlattr *tb[IOAM6_IPTUNNEL_MAX + 1]; 94 struct ioam6_lwt_encap *tuninfo; 95 struct ioam6_trace_hdr *trace; 96 struct lwtunnel_state *lwt; 97 struct ioam6_lwt *ilwt; 98 int len_aligned, err; 99 u8 mode; 100 101 if (family != AF_INET6) 102 return -EINVAL; 103 104 err = nla_parse_nested(tb, IOAM6_IPTUNNEL_MAX, nla, 105 ioam6_iptunnel_policy, extack); 106 if (err < 0) 107 return err; 108 109 if (!tb[IOAM6_IPTUNNEL_MODE]) 110 mode = IOAM6_IPTUNNEL_MODE_INLINE; 111 else 112 mode = nla_get_u8(tb[IOAM6_IPTUNNEL_MODE]); 113 114 if (!tb[IOAM6_IPTUNNEL_DST] && mode != IOAM6_IPTUNNEL_MODE_INLINE) { 115 NL_SET_ERR_MSG(extack, "this mode needs a tunnel destination"); 116 return -EINVAL; 117 } 118 119 if (!tb[IOAM6_IPTUNNEL_TRACE]) { 120 NL_SET_ERR_MSG(extack, "missing trace"); 121 return -EINVAL; 122 } 123 124 trace = nla_data(tb[IOAM6_IPTUNNEL_TRACE]); 125 if (!ioam6_validate_trace_hdr(trace)) { 126 NL_SET_ERR_MSG_ATTR(extack, tb[IOAM6_IPTUNNEL_TRACE], 127 "invalid trace validation"); 128 return -EINVAL; 129 } 130 131 len_aligned = ALIGN(trace->remlen * 4, 8); 132 lwt = lwtunnel_state_alloc(sizeof(*ilwt) + len_aligned); 133 if (!lwt) 134 return -ENOMEM; 135 136 ilwt = ioam6_lwt_state(lwt); 137 err = dst_cache_init(&ilwt->cache, GFP_ATOMIC); 138 if (err) { 139 kfree(lwt); 140 return err; 141 } 142 143 ilwt->mode = mode; 144 if (tb[IOAM6_IPTUNNEL_DST]) 145 ilwt->tundst = nla_get_in6_addr(tb[IOAM6_IPTUNNEL_DST]); 146 147 tuninfo = ioam6_lwt_info(lwt); 148 tuninfo->eh.hdrlen = ((sizeof(*tuninfo) + len_aligned) >> 3) - 1; 149 tuninfo->pad[0] = IPV6_TLV_PADN; 150 tuninfo->ioamh.type = IOAM6_TYPE_PREALLOC; 151 tuninfo->ioamh.opt_type = IPV6_TLV_IOAM; 152 tuninfo->ioamh.opt_len = sizeof(tuninfo->ioamh) - 2 + sizeof(*trace) 153 + trace->remlen * 4; 154 155 memcpy(&tuninfo->traceh, trace, sizeof(*trace)); 156 157 if (len_aligned - trace->remlen * 4) { 158 tuninfo->traceh.data[trace->remlen * 4] = IPV6_TLV_PADN; 159 tuninfo->traceh.data[trace->remlen * 4 + 1] = 2; 160 } 161 162 lwt->type = LWTUNNEL_ENCAP_IOAM6; 163 lwt->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT; 164 165 *ts = lwt; 166 167 return 0; 168 } 169 170 static int ioam6_do_fill(struct net *net, struct sk_buff *skb) 171 { 172 struct ioam6_trace_hdr *trace; 173 struct ioam6_namespace *ns; 174 175 trace = (struct ioam6_trace_hdr *)(skb_transport_header(skb) 176 + sizeof(struct ipv6_hopopt_hdr) + 2 177 + sizeof(struct ioam6_hdr)); 178 179 ns = ioam6_namespace(net, trace->namespace_id); 180 if (ns) 181 ioam6_fill_trace_data(skb, ns, trace, false); 182 183 return 0; 184 } 185 186 static int ioam6_do_inline(struct net *net, struct sk_buff *skb, 187 struct ioam6_lwt_encap *tuninfo) 188 { 189 struct ipv6hdr *oldhdr, *hdr; 190 int hdrlen, err; 191 192 hdrlen = (tuninfo->eh.hdrlen + 1) << 3; 193 194 err = skb_cow_head(skb, hdrlen + skb->mac_len); 195 if (unlikely(err)) 196 return err; 197 198 oldhdr = ipv6_hdr(skb); 199 skb_pull(skb, sizeof(*oldhdr)); 200 skb_postpull_rcsum(skb, skb_network_header(skb), sizeof(*oldhdr)); 201 202 skb_push(skb, sizeof(*oldhdr) + hdrlen); 203 skb_reset_network_header(skb); 204 skb_mac_header_rebuild(skb); 205 206 hdr = ipv6_hdr(skb); 207 memmove(hdr, oldhdr, sizeof(*oldhdr)); 208 tuninfo->eh.nexthdr = hdr->nexthdr; 209 210 skb_set_transport_header(skb, sizeof(*hdr)); 211 skb_postpush_rcsum(skb, hdr, sizeof(*hdr) + hdrlen); 212 213 memcpy(skb_transport_header(skb), (u8 *)tuninfo, hdrlen); 214 215 hdr->nexthdr = NEXTHDR_HOP; 216 hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr)); 217 218 return ioam6_do_fill(net, skb); 219 } 220 221 static int ioam6_do_encap(struct net *net, struct sk_buff *skb, 222 struct ioam6_lwt_encap *tuninfo, 223 struct in6_addr *tundst) 224 { 225 struct dst_entry *dst = skb_dst(skb); 226 struct ipv6hdr *hdr, *inner_hdr; 227 int hdrlen, len, err; 228 229 hdrlen = (tuninfo->eh.hdrlen + 1) << 3; 230 len = sizeof(*hdr) + hdrlen; 231 232 err = skb_cow_head(skb, len + skb->mac_len); 233 if (unlikely(err)) 234 return err; 235 236 inner_hdr = ipv6_hdr(skb); 237 238 skb_push(skb, len); 239 skb_reset_network_header(skb); 240 skb_mac_header_rebuild(skb); 241 skb_set_transport_header(skb, sizeof(*hdr)); 242 243 tuninfo->eh.nexthdr = NEXTHDR_IPV6; 244 memcpy(skb_transport_header(skb), (u8 *)tuninfo, hdrlen); 245 246 hdr = ipv6_hdr(skb); 247 memcpy(hdr, inner_hdr, sizeof(*hdr)); 248 249 hdr->nexthdr = NEXTHDR_HOP; 250 hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr)); 251 hdr->daddr = *tundst; 252 ipv6_dev_get_saddr(net, dst->dev, &hdr->daddr, 253 IPV6_PREFER_SRC_PUBLIC, &hdr->saddr); 254 255 skb_postpush_rcsum(skb, hdr, len); 256 257 return ioam6_do_fill(net, skb); 258 } 259 260 static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) 261 { 262 struct dst_entry *dst = skb_dst(skb); 263 struct in6_addr orig_daddr; 264 struct ioam6_lwt *ilwt; 265 int err = -EINVAL; 266 267 if (skb->protocol != htons(ETH_P_IPV6)) 268 goto drop; 269 270 ilwt = ioam6_lwt_state(dst->lwtstate); 271 orig_daddr = ipv6_hdr(skb)->daddr; 272 273 switch (ilwt->mode) { 274 case IOAM6_IPTUNNEL_MODE_INLINE: 275 do_inline: 276 /* Direct insertion - if there is no Hop-by-Hop yet */ 277 if (ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP) 278 goto out; 279 280 err = ioam6_do_inline(net, skb, &ilwt->tuninfo); 281 if (unlikely(err)) 282 goto drop; 283 284 break; 285 case IOAM6_IPTUNNEL_MODE_ENCAP: 286 do_encap: 287 /* Encapsulation (ip6ip6) */ 288 err = ioam6_do_encap(net, skb, &ilwt->tuninfo, &ilwt->tundst); 289 if (unlikely(err)) 290 goto drop; 291 292 break; 293 case IOAM6_IPTUNNEL_MODE_AUTO: 294 /* Automatic (RFC8200 compliant): 295 * - local packets -> INLINE mode 296 * - in-transit packets -> ENCAP mode 297 */ 298 if (!skb->dev) 299 goto do_inline; 300 301 goto do_encap; 302 default: 303 goto drop; 304 } 305 306 err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); 307 if (unlikely(err)) 308 goto drop; 309 310 if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr)) { 311 preempt_disable(); 312 dst = dst_cache_get(&ilwt->cache); 313 preempt_enable(); 314 315 if (unlikely(!dst)) { 316 struct ipv6hdr *hdr = ipv6_hdr(skb); 317 struct flowi6 fl6; 318 319 memset(&fl6, 0, sizeof(fl6)); 320 fl6.daddr = hdr->daddr; 321 fl6.saddr = hdr->saddr; 322 fl6.flowlabel = ip6_flowinfo(hdr); 323 fl6.flowi6_mark = skb->mark; 324 fl6.flowi6_proto = hdr->nexthdr; 325 326 dst = ip6_route_output(net, NULL, &fl6); 327 if (dst->error) { 328 err = dst->error; 329 dst_release(dst); 330 goto drop; 331 } 332 333 preempt_disable(); 334 dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr); 335 preempt_enable(); 336 } 337 338 skb_dst_drop(skb); 339 skb_dst_set(skb, dst); 340 341 return dst_output(net, sk, skb); 342 } 343 out: 344 return dst->lwtstate->orig_output(net, sk, skb); 345 drop: 346 kfree_skb(skb); 347 return err; 348 } 349 350 static void ioam6_destroy_state(struct lwtunnel_state *lwt) 351 { 352 dst_cache_destroy(&ioam6_lwt_state(lwt)->cache); 353 } 354 355 static int ioam6_fill_encap_info(struct sk_buff *skb, 356 struct lwtunnel_state *lwtstate) 357 { 358 struct ioam6_lwt *ilwt = ioam6_lwt_state(lwtstate); 359 int err; 360 361 err = nla_put_u8(skb, IOAM6_IPTUNNEL_MODE, ilwt->mode); 362 if (err) 363 goto ret; 364 365 if (ilwt->mode != IOAM6_IPTUNNEL_MODE_INLINE) { 366 err = nla_put_in6_addr(skb, IOAM6_IPTUNNEL_DST, &ilwt->tundst); 367 if (err) 368 goto ret; 369 } 370 371 err = nla_put(skb, IOAM6_IPTUNNEL_TRACE, sizeof(ilwt->tuninfo.traceh), 372 &ilwt->tuninfo.traceh); 373 ret: 374 return err; 375 } 376 377 static int ioam6_encap_nlsize(struct lwtunnel_state *lwtstate) 378 { 379 struct ioam6_lwt *ilwt = ioam6_lwt_state(lwtstate); 380 int nlsize; 381 382 nlsize = nla_total_size(sizeof(ilwt->mode)) + 383 nla_total_size(sizeof(ilwt->tuninfo.traceh)); 384 385 if (ilwt->mode != IOAM6_IPTUNNEL_MODE_INLINE) 386 nlsize += nla_total_size(sizeof(ilwt->tundst)); 387 388 return nlsize; 389 } 390 391 static int ioam6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) 392 { 393 struct ioam6_trace_hdr *trace_a = ioam6_lwt_trace(a); 394 struct ioam6_trace_hdr *trace_b = ioam6_lwt_trace(b); 395 struct ioam6_lwt *ilwt_a = ioam6_lwt_state(a); 396 struct ioam6_lwt *ilwt_b = ioam6_lwt_state(b); 397 398 return (ilwt_a->mode != ilwt_b->mode || 399 (ilwt_a->mode != IOAM6_IPTUNNEL_MODE_INLINE && 400 !ipv6_addr_equal(&ilwt_a->tundst, &ilwt_b->tundst)) || 401 trace_a->namespace_id != trace_b->namespace_id); 402 } 403 404 static const struct lwtunnel_encap_ops ioam6_iptun_ops = { 405 .build_state = ioam6_build_state, 406 .destroy_state = ioam6_destroy_state, 407 .output = ioam6_output, 408 .fill_encap = ioam6_fill_encap_info, 409 .get_encap_size = ioam6_encap_nlsize, 410 .cmp_encap = ioam6_encap_cmp, 411 .owner = THIS_MODULE, 412 }; 413 414 int __init ioam6_iptunnel_init(void) 415 { 416 return lwtunnel_encap_add_ops(&ioam6_iptun_ops, LWTUNNEL_ENCAP_IOAM6); 417 } 418 419 void ioam6_iptunnel_exit(void) 420 { 421 lwtunnel_encap_del_ops(&ioam6_iptun_ops, LWTUNNEL_ENCAP_IOAM6); 422 } 423