1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Authors: 4 * (C) 2020 Alexander Aring <alex.aring@gmail.com> 5 */ 6 7 #include <linux/rpl_iptunnel.h> 8 9 #include <net/dst_cache.h> 10 #include <net/ip6_route.h> 11 #include <net/lwtunnel.h> 12 #include <net/ipv6.h> 13 #include <net/rpl.h> 14 15 struct rpl_iptunnel_encap { 16 DECLARE_FLEX_ARRAY(struct ipv6_rpl_sr_hdr, srh); 17 }; 18 19 struct rpl_lwt { 20 struct dst_cache cache; 21 struct rpl_iptunnel_encap tuninfo; 22 }; 23 24 static inline struct rpl_lwt *rpl_lwt_lwtunnel(struct lwtunnel_state *lwt) 25 { 26 return (struct rpl_lwt *)lwt->data; 27 } 28 29 static inline struct rpl_iptunnel_encap * 30 rpl_encap_lwtunnel(struct lwtunnel_state *lwt) 31 { 32 return &rpl_lwt_lwtunnel(lwt)->tuninfo; 33 } 34 35 static const struct nla_policy rpl_iptunnel_policy[RPL_IPTUNNEL_MAX + 1] = { 36 [RPL_IPTUNNEL_SRH] = { .type = NLA_BINARY }, 37 }; 38 39 static bool rpl_validate_srh(struct net *net, struct ipv6_rpl_sr_hdr *srh, 40 size_t seglen) 41 { 42 int err; 43 44 if ((srh->hdrlen << 3) != seglen) 45 return false; 46 47 /* check at least one segment and seglen fit with segments_left */ 48 if (!srh->segments_left || 49 (srh->segments_left * sizeof(struct in6_addr)) != seglen) 50 return false; 51 52 if (srh->cmpri || srh->cmpre) 53 return false; 54 55 err = ipv6_chk_rpl_srh_loop(net, srh->rpl_segaddr, 56 srh->segments_left); 57 if (err) 58 return false; 59 60 if (ipv6_addr_type(&srh->rpl_segaddr[srh->segments_left - 1]) & 61 IPV6_ADDR_MULTICAST) 62 return false; 63 64 return true; 65 } 66 67 static int rpl_build_state(struct net *net, struct nlattr *nla, 68 unsigned int family, const void *cfg, 69 struct lwtunnel_state **ts, 70 struct netlink_ext_ack *extack) 71 { 72 struct nlattr *tb[RPL_IPTUNNEL_MAX + 1]; 73 struct lwtunnel_state *newts; 74 struct ipv6_rpl_sr_hdr *srh; 75 struct rpl_lwt *rlwt; 76 int err, srh_len; 77 78 if (family != AF_INET6) 79 return -EINVAL; 80 81 err = nla_parse_nested(tb, RPL_IPTUNNEL_MAX, nla, 82 rpl_iptunnel_policy, extack); 83 if (err < 0) 84 return err; 85 86 if (!tb[RPL_IPTUNNEL_SRH]) 87 return -EINVAL; 88 89 srh = nla_data(tb[RPL_IPTUNNEL_SRH]); 90 srh_len = nla_len(tb[RPL_IPTUNNEL_SRH]); 91 92 if (srh_len < sizeof(*srh)) 93 return -EINVAL; 94 95 /* verify that SRH is consistent */ 96 if (!rpl_validate_srh(net, srh, srh_len - sizeof(*srh))) 97 return -EINVAL; 98 99 newts = lwtunnel_state_alloc(srh_len + sizeof(*rlwt)); 100 if (!newts) 101 return -ENOMEM; 102 103 rlwt = rpl_lwt_lwtunnel(newts); 104 105 err = dst_cache_init(&rlwt->cache, GFP_ATOMIC); 106 if (err) { 107 kfree(newts); 108 return err; 109 } 110 111 memcpy(&rlwt->tuninfo.srh, srh, srh_len); 112 113 newts->type = LWTUNNEL_ENCAP_RPL; 114 newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT; 115 newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT; 116 117 *ts = newts; 118 119 return 0; 120 } 121 122 static void rpl_destroy_state(struct lwtunnel_state *lwt) 123 { 124 dst_cache_destroy(&rpl_lwt_lwtunnel(lwt)->cache); 125 } 126 127 static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt, 128 const struct ipv6_rpl_sr_hdr *srh, 129 struct dst_entry *cache_dst) 130 { 131 struct ipv6_rpl_sr_hdr *isrh, *csrh; 132 const struct ipv6hdr *oldhdr; 133 struct ipv6hdr *hdr; 134 unsigned char *buf; 135 size_t hdrlen; 136 int err; 137 138 oldhdr = ipv6_hdr(skb); 139 140 buf = kcalloc(struct_size(srh, segments.addr, srh->segments_left), 2, GFP_ATOMIC); 141 if (!buf) 142 return -ENOMEM; 143 144 isrh = (struct ipv6_rpl_sr_hdr *)buf; 145 csrh = (struct ipv6_rpl_sr_hdr *)(buf + ((srh->hdrlen + 1) << 3)); 146 147 memcpy(isrh, srh, sizeof(*isrh)); 148 memcpy(isrh->rpl_segaddr, &srh->rpl_segaddr[1], 149 (srh->segments_left - 1) * 16); 150 isrh->rpl_segaddr[srh->segments_left - 1] = oldhdr->daddr; 151 152 ipv6_rpl_srh_compress(csrh, isrh, &srh->rpl_segaddr[0], 153 isrh->segments_left - 1); 154 155 hdrlen = ((csrh->hdrlen + 1) << 3); 156 157 err = skb_cow_head(skb, hdrlen + dst_dev_overhead(cache_dst, skb)); 158 if (unlikely(err)) { 159 kfree(buf); 160 return err; 161 } 162 163 skb_pull(skb, sizeof(struct ipv6hdr)); 164 skb_postpull_rcsum(skb, skb_network_header(skb), 165 sizeof(struct ipv6hdr)); 166 167 skb_push(skb, sizeof(struct ipv6hdr) + hdrlen); 168 skb_reset_network_header(skb); 169 skb_mac_header_rebuild(skb); 170 171 hdr = ipv6_hdr(skb); 172 memmove(hdr, oldhdr, sizeof(*hdr)); 173 isrh = (void *)hdr + sizeof(*hdr); 174 memcpy(isrh, csrh, hdrlen); 175 176 isrh->nexthdr = hdr->nexthdr; 177 hdr->nexthdr = NEXTHDR_ROUTING; 178 hdr->daddr = srh->rpl_segaddr[0]; 179 180 ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); 181 skb_set_transport_header(skb, sizeof(struct ipv6hdr)); 182 183 skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen); 184 185 kfree(buf); 186 187 return 0; 188 } 189 190 static int rpl_do_srh(struct sk_buff *skb, const struct rpl_lwt *rlwt, 191 struct dst_entry *cache_dst) 192 { 193 struct dst_entry *dst = skb_dst(skb); 194 struct rpl_iptunnel_encap *tinfo; 195 196 if (skb->protocol != htons(ETH_P_IPV6)) 197 return -EINVAL; 198 199 tinfo = rpl_encap_lwtunnel(dst->lwtstate); 200 201 return rpl_do_srh_inline(skb, rlwt, tinfo->srh, cache_dst); 202 } 203 204 static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb) 205 { 206 struct dst_entry *orig_dst = skb_dst(skb); 207 struct dst_entry *dst = NULL; 208 struct rpl_lwt *rlwt; 209 int err; 210 211 rlwt = rpl_lwt_lwtunnel(orig_dst->lwtstate); 212 213 local_bh_disable(); 214 dst = dst_cache_get(&rlwt->cache); 215 local_bh_enable(); 216 217 err = rpl_do_srh(skb, rlwt, dst); 218 if (unlikely(err)) 219 goto drop; 220 221 if (unlikely(!dst)) { 222 struct ipv6hdr *hdr = ipv6_hdr(skb); 223 struct flowi6 fl6; 224 225 memset(&fl6, 0, sizeof(fl6)); 226 fl6.daddr = hdr->daddr; 227 fl6.saddr = hdr->saddr; 228 fl6.flowlabel = ip6_flowinfo(hdr); 229 fl6.flowi6_mark = skb->mark; 230 fl6.flowi6_proto = hdr->nexthdr; 231 232 dst = ip6_route_output(net, NULL, &fl6); 233 if (dst->error) { 234 err = dst->error; 235 goto drop; 236 } 237 238 /* cache only if we don't create a dst reference loop */ 239 if (orig_dst->lwtstate != dst->lwtstate) { 240 local_bh_disable(); 241 dst_cache_set_ip6(&rlwt->cache, dst, &fl6.saddr); 242 local_bh_enable(); 243 } 244 245 err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); 246 if (unlikely(err)) 247 goto drop; 248 } 249 250 skb_dst_drop(skb); 251 skb_dst_set(skb, dst); 252 253 return dst_output(net, sk, skb); 254 255 drop: 256 dst_release(dst); 257 kfree_skb(skb); 258 return err; 259 } 260 261 static int rpl_input(struct sk_buff *skb) 262 { 263 struct dst_entry *orig_dst = skb_dst(skb); 264 struct dst_entry *dst = NULL; 265 struct lwtunnel_state *lwtst; 266 struct rpl_lwt *rlwt; 267 int err; 268 269 /* We cannot dereference "orig_dst" once ip6_route_input() or 270 * skb_dst_drop() is called. However, in order to detect a dst loop, we 271 * need the address of its lwtstate. So, save the address of lwtstate 272 * now and use it later as a comparison. 273 */ 274 lwtst = orig_dst->lwtstate; 275 276 rlwt = rpl_lwt_lwtunnel(lwtst); 277 278 local_bh_disable(); 279 dst = dst_cache_get(&rlwt->cache); 280 local_bh_enable(); 281 282 err = rpl_do_srh(skb, rlwt, dst); 283 if (unlikely(err)) { 284 dst_release(dst); 285 goto drop; 286 } 287 288 if (!dst) { 289 ip6_route_input(skb); 290 dst = skb_dst(skb); 291 292 /* cache only if we don't create a dst reference loop */ 293 if (!dst->error && lwtst != dst->lwtstate) { 294 local_bh_disable(); 295 dst_cache_set_ip6(&rlwt->cache, dst, 296 &ipv6_hdr(skb)->saddr); 297 local_bh_enable(); 298 } 299 300 err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); 301 if (unlikely(err)) 302 goto drop; 303 } else { 304 skb_dst_drop(skb); 305 skb_dst_set(skb, dst); 306 } 307 308 return dst_input(skb); 309 310 drop: 311 kfree_skb(skb); 312 return err; 313 } 314 315 static int nla_put_rpl_srh(struct sk_buff *skb, int attrtype, 316 struct rpl_iptunnel_encap *tuninfo) 317 { 318 struct rpl_iptunnel_encap *data; 319 struct nlattr *nla; 320 int len; 321 322 len = RPL_IPTUNNEL_SRH_SIZE(tuninfo->srh); 323 324 nla = nla_reserve(skb, attrtype, len); 325 if (!nla) 326 return -EMSGSIZE; 327 328 data = nla_data(nla); 329 memcpy(data, tuninfo->srh, len); 330 331 return 0; 332 } 333 334 static int rpl_fill_encap_info(struct sk_buff *skb, 335 struct lwtunnel_state *lwtstate) 336 { 337 struct rpl_iptunnel_encap *tuninfo = rpl_encap_lwtunnel(lwtstate); 338 339 if (nla_put_rpl_srh(skb, RPL_IPTUNNEL_SRH, tuninfo)) 340 return -EMSGSIZE; 341 342 return 0; 343 } 344 345 static int rpl_encap_nlsize(struct lwtunnel_state *lwtstate) 346 { 347 struct rpl_iptunnel_encap *tuninfo = rpl_encap_lwtunnel(lwtstate); 348 349 return nla_total_size(RPL_IPTUNNEL_SRH_SIZE(tuninfo->srh)); 350 } 351 352 static int rpl_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) 353 { 354 struct rpl_iptunnel_encap *a_hdr = rpl_encap_lwtunnel(a); 355 struct rpl_iptunnel_encap *b_hdr = rpl_encap_lwtunnel(b); 356 int len = RPL_IPTUNNEL_SRH_SIZE(a_hdr->srh); 357 358 if (len != RPL_IPTUNNEL_SRH_SIZE(b_hdr->srh)) 359 return 1; 360 361 return memcmp(a_hdr, b_hdr, len); 362 } 363 364 static const struct lwtunnel_encap_ops rpl_ops = { 365 .build_state = rpl_build_state, 366 .destroy_state = rpl_destroy_state, 367 .output = rpl_output, 368 .input = rpl_input, 369 .fill_encap = rpl_fill_encap_info, 370 .get_encap_size = rpl_encap_nlsize, 371 .cmp_encap = rpl_encap_cmp, 372 .owner = THIS_MODULE, 373 }; 374 375 int __init rpl_init(void) 376 { 377 int err; 378 379 err = lwtunnel_encap_add_ops(&rpl_ops, LWTUNNEL_ENCAP_RPL); 380 if (err) 381 goto out; 382 383 pr_info("RPL Segment Routing with IPv6\n"); 384 385 return 0; 386 387 out: 388 return err; 389 } 390 391 void rpl_exit(void) 392 { 393 lwtunnel_encap_del_ops(&rpl_ops, LWTUNNEL_ENCAP_RPL); 394 } 395