1 /* 2 * lwtunnel Infrastructure for light weight tunnels like mpls 3 * 4 * Authors: Roopa Prabhu, <roopa@cumulusnetworks.com> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 * 11 */ 12 13 #include <linux/capability.h> 14 #include <linux/module.h> 15 #include <linux/types.h> 16 #include <linux/kernel.h> 17 #include <linux/slab.h> 18 #include <linux/uaccess.h> 19 #include <linux/skbuff.h> 20 #include <linux/netdevice.h> 21 #include <linux/lwtunnel.h> 22 #include <linux/in.h> 23 #include <linux/init.h> 24 #include <linux/err.h> 25 26 #include <net/lwtunnel.h> 27 #include <net/rtnetlink.h> 28 #include <net/ip6_fib.h> 29 #include <net/nexthop.h> 30 31 #ifdef CONFIG_MODULES 32 33 static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type) 34 { 35 /* Only lwt encaps implemented without using an interface for 36 * the encap need to return a string here. 37 */ 38 switch (encap_type) { 39 case LWTUNNEL_ENCAP_MPLS: 40 return "MPLS"; 41 case LWTUNNEL_ENCAP_ILA: 42 return "ILA"; 43 case LWTUNNEL_ENCAP_SEG6: 44 return "SEG6"; 45 case LWTUNNEL_ENCAP_BPF: 46 return "BPF"; 47 case LWTUNNEL_ENCAP_IP6: 48 case LWTUNNEL_ENCAP_IP: 49 case LWTUNNEL_ENCAP_NONE: 50 case __LWTUNNEL_ENCAP_MAX: 51 /* should not have got here */ 52 WARN_ON(1); 53 break; 54 } 55 return NULL; 56 } 57 58 #endif /* CONFIG_MODULES */ 59 60 struct lwtunnel_state *lwtunnel_state_alloc(int encap_len) 61 { 62 struct lwtunnel_state *lws; 63 64 lws = kzalloc(sizeof(*lws) + encap_len, GFP_ATOMIC); 65 66 return lws; 67 } 68 EXPORT_SYMBOL(lwtunnel_state_alloc); 69 70 static const struct lwtunnel_encap_ops __rcu * 71 lwtun_encaps[LWTUNNEL_ENCAP_MAX + 1] __read_mostly; 72 73 int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops, 74 unsigned int num) 75 { 76 if (num > LWTUNNEL_ENCAP_MAX) 77 return -ERANGE; 78 79 return !cmpxchg((const struct lwtunnel_encap_ops **) 80 &lwtun_encaps[num], 81 NULL, ops) ? 0 : -1; 82 } 83 EXPORT_SYMBOL(lwtunnel_encap_add_ops); 84 85 int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops, 86 unsigned int encap_type) 87 { 88 int ret; 89 90 if (encap_type == LWTUNNEL_ENCAP_NONE || 91 encap_type > LWTUNNEL_ENCAP_MAX) 92 return -ERANGE; 93 94 ret = (cmpxchg((const struct lwtunnel_encap_ops **) 95 &lwtun_encaps[encap_type], 96 ops, NULL) == ops) ? 0 : -1; 97 98 synchronize_net(); 99 100 return ret; 101 } 102 EXPORT_SYMBOL(lwtunnel_encap_del_ops); 103 104 int lwtunnel_build_state(u16 encap_type, 105 struct nlattr *encap, unsigned int family, 106 const void *cfg, struct lwtunnel_state **lws) 107 { 108 const struct lwtunnel_encap_ops *ops; 109 int ret = -EINVAL; 110 111 if (encap_type == LWTUNNEL_ENCAP_NONE || 112 encap_type > LWTUNNEL_ENCAP_MAX) 113 return ret; 114 115 ret = -EOPNOTSUPP; 116 rcu_read_lock(); 117 ops = rcu_dereference(lwtun_encaps[encap_type]); 118 if (likely(ops && ops->build_state && try_module_get(ops->owner))) { 119 ret = ops->build_state(encap, family, cfg, lws); 120 if (ret) 121 module_put(ops->owner); 122 } 123 rcu_read_unlock(); 124 125 return ret; 126 } 127 EXPORT_SYMBOL(lwtunnel_build_state); 128 129 int lwtunnel_valid_encap_type(u16 encap_type) 130 { 131 const struct lwtunnel_encap_ops *ops; 132 int ret = -EINVAL; 133 134 if (encap_type == LWTUNNEL_ENCAP_NONE || 135 encap_type > LWTUNNEL_ENCAP_MAX) 136 return ret; 137 138 rcu_read_lock(); 139 ops = rcu_dereference(lwtun_encaps[encap_type]); 140 rcu_read_unlock(); 141 #ifdef CONFIG_MODULES 142 if (!ops) { 143 const char *encap_type_str = lwtunnel_encap_str(encap_type); 144 145 if (encap_type_str) { 146 __rtnl_unlock(); 147 request_module("rtnl-lwt-%s", encap_type_str); 148 rtnl_lock(); 149 150 rcu_read_lock(); 151 ops = rcu_dereference(lwtun_encaps[encap_type]); 152 rcu_read_unlock(); 153 } 154 } 155 #endif 156 return ops ? 0 : -EOPNOTSUPP; 157 } 158 EXPORT_SYMBOL(lwtunnel_valid_encap_type); 159 160 int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining) 161 { 162 struct rtnexthop *rtnh = (struct rtnexthop *)attr; 163 struct nlattr *nla_entype; 164 struct nlattr *attrs; 165 u16 encap_type; 166 int attrlen; 167 168 while (rtnh_ok(rtnh, remaining)) { 169 attrlen = rtnh_attrlen(rtnh); 170 if (attrlen > 0) { 171 attrs = rtnh_attrs(rtnh); 172 nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); 173 174 if (nla_entype) { 175 encap_type = nla_get_u16(nla_entype); 176 177 if (lwtunnel_valid_encap_type(encap_type) != 0) 178 return -EOPNOTSUPP; 179 } 180 } 181 rtnh = rtnh_next(rtnh, &remaining); 182 } 183 184 return 0; 185 } 186 EXPORT_SYMBOL(lwtunnel_valid_encap_type_attr); 187 188 void lwtstate_free(struct lwtunnel_state *lws) 189 { 190 const struct lwtunnel_encap_ops *ops = lwtun_encaps[lws->type]; 191 192 if (ops->destroy_state) { 193 ops->destroy_state(lws); 194 kfree_rcu(lws, rcu); 195 } else { 196 kfree(lws); 197 } 198 module_put(ops->owner); 199 } 200 EXPORT_SYMBOL(lwtstate_free); 201 202 int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate) 203 { 204 const struct lwtunnel_encap_ops *ops; 205 struct nlattr *nest; 206 int ret; 207 208 if (!lwtstate) 209 return 0; 210 211 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 212 lwtstate->type > LWTUNNEL_ENCAP_MAX) 213 return 0; 214 215 nest = nla_nest_start(skb, RTA_ENCAP); 216 if (!nest) 217 return -EMSGSIZE; 218 219 ret = -EOPNOTSUPP; 220 rcu_read_lock(); 221 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 222 if (likely(ops && ops->fill_encap)) 223 ret = ops->fill_encap(skb, lwtstate); 224 rcu_read_unlock(); 225 226 if (ret) 227 goto nla_put_failure; 228 nla_nest_end(skb, nest); 229 ret = nla_put_u16(skb, RTA_ENCAP_TYPE, lwtstate->type); 230 if (ret) 231 goto nla_put_failure; 232 233 return 0; 234 235 nla_put_failure: 236 nla_nest_cancel(skb, nest); 237 238 return (ret == -EOPNOTSUPP ? 0 : ret); 239 } 240 EXPORT_SYMBOL(lwtunnel_fill_encap); 241 242 int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate) 243 { 244 const struct lwtunnel_encap_ops *ops; 245 int ret = 0; 246 247 if (!lwtstate) 248 return 0; 249 250 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 251 lwtstate->type > LWTUNNEL_ENCAP_MAX) 252 return 0; 253 254 rcu_read_lock(); 255 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 256 if (likely(ops && ops->get_encap_size)) 257 ret = nla_total_size(ops->get_encap_size(lwtstate)); 258 rcu_read_unlock(); 259 260 return ret; 261 } 262 EXPORT_SYMBOL(lwtunnel_get_encap_size); 263 264 int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b) 265 { 266 const struct lwtunnel_encap_ops *ops; 267 int ret = 0; 268 269 if (!a && !b) 270 return 0; 271 272 if (!a || !b) 273 return 1; 274 275 if (a->type != b->type) 276 return 1; 277 278 if (a->type == LWTUNNEL_ENCAP_NONE || 279 a->type > LWTUNNEL_ENCAP_MAX) 280 return 0; 281 282 rcu_read_lock(); 283 ops = rcu_dereference(lwtun_encaps[a->type]); 284 if (likely(ops && ops->cmp_encap)) 285 ret = ops->cmp_encap(a, b); 286 rcu_read_unlock(); 287 288 return ret; 289 } 290 EXPORT_SYMBOL(lwtunnel_cmp_encap); 291 292 int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) 293 { 294 struct dst_entry *dst = skb_dst(skb); 295 const struct lwtunnel_encap_ops *ops; 296 struct lwtunnel_state *lwtstate; 297 int ret = -EINVAL; 298 299 if (!dst) 300 goto drop; 301 lwtstate = dst->lwtstate; 302 303 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 304 lwtstate->type > LWTUNNEL_ENCAP_MAX) 305 return 0; 306 307 ret = -EOPNOTSUPP; 308 rcu_read_lock(); 309 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 310 if (likely(ops && ops->output)) 311 ret = ops->output(net, sk, skb); 312 rcu_read_unlock(); 313 314 if (ret == -EOPNOTSUPP) 315 goto drop; 316 317 return ret; 318 319 drop: 320 kfree_skb(skb); 321 322 return ret; 323 } 324 EXPORT_SYMBOL(lwtunnel_output); 325 326 int lwtunnel_xmit(struct sk_buff *skb) 327 { 328 struct dst_entry *dst = skb_dst(skb); 329 const struct lwtunnel_encap_ops *ops; 330 struct lwtunnel_state *lwtstate; 331 int ret = -EINVAL; 332 333 if (!dst) 334 goto drop; 335 336 lwtstate = dst->lwtstate; 337 338 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 339 lwtstate->type > LWTUNNEL_ENCAP_MAX) 340 return 0; 341 342 ret = -EOPNOTSUPP; 343 rcu_read_lock(); 344 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 345 if (likely(ops && ops->xmit)) 346 ret = ops->xmit(skb); 347 rcu_read_unlock(); 348 349 if (ret == -EOPNOTSUPP) 350 goto drop; 351 352 return ret; 353 354 drop: 355 kfree_skb(skb); 356 357 return ret; 358 } 359 EXPORT_SYMBOL(lwtunnel_xmit); 360 361 int lwtunnel_input(struct sk_buff *skb) 362 { 363 struct dst_entry *dst = skb_dst(skb); 364 const struct lwtunnel_encap_ops *ops; 365 struct lwtunnel_state *lwtstate; 366 int ret = -EINVAL; 367 368 if (!dst) 369 goto drop; 370 lwtstate = dst->lwtstate; 371 372 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 373 lwtstate->type > LWTUNNEL_ENCAP_MAX) 374 return 0; 375 376 ret = -EOPNOTSUPP; 377 rcu_read_lock(); 378 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 379 if (likely(ops && ops->input)) 380 ret = ops->input(skb); 381 rcu_read_unlock(); 382 383 if (ret == -EOPNOTSUPP) 384 goto drop; 385 386 return ret; 387 388 drop: 389 kfree_skb(skb); 390 391 return ret; 392 } 393 EXPORT_SYMBOL(lwtunnel_input); 394