1 /* 2 * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us> 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 */ 9 10 #include <linux/module.h> 11 #include <linux/init.h> 12 #include <linux/kernel.h> 13 #include <linux/skbuff.h> 14 #include <linux/rtnetlink.h> 15 #include <linux/filter.h> 16 #include <linux/bpf.h> 17 18 #include <net/netlink.h> 19 #include <net/pkt_sched.h> 20 21 #include <linux/tc_act/tc_bpf.h> 22 #include <net/tc_act/tc_bpf.h> 23 24 #define BPF_TAB_MASK 15 25 #define ACT_BPF_NAME_LEN 256 26 27 struct tcf_bpf_cfg { 28 struct bpf_prog *filter; 29 struct sock_filter *bpf_ops; 30 char *bpf_name; 31 u32 bpf_fd; 32 u16 bpf_num_ops; 33 }; 34 35 static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, 36 struct tcf_result *res) 37 { 38 struct tcf_bpf *prog = act->priv; 39 int action, filter_res; 40 bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS; 41 42 if (unlikely(!skb_mac_header_was_set(skb))) 43 return TC_ACT_UNSPEC; 44 45 spin_lock(&prog->tcf_lock); 46 47 prog->tcf_tm.lastuse = jiffies; 48 bstats_update(&prog->tcf_bstats, skb); 49 50 /* Needed here for accessing maps. */ 51 rcu_read_lock(); 52 if (at_ingress) { 53 __skb_push(skb, skb->mac_len); 54 filter_res = BPF_PROG_RUN(prog->filter, skb); 55 __skb_pull(skb, skb->mac_len); 56 } else { 57 filter_res = BPF_PROG_RUN(prog->filter, skb); 58 } 59 rcu_read_unlock(); 60 61 /* A BPF program may overwrite the default action opcode. 62 * Similarly as in cls_bpf, if filter_res == -1 we use the 63 * default action specified from tc. 64 * 65 * In case a different well-known TC_ACT opcode has been 66 * returned, it will overwrite the default one. 67 * 68 * For everything else that is unkown, TC_ACT_UNSPEC is 69 * returned. 70 */ 71 switch (filter_res) { 72 case TC_ACT_PIPE: 73 case TC_ACT_RECLASSIFY: 74 case TC_ACT_OK: 75 action = filter_res; 76 break; 77 case TC_ACT_SHOT: 78 action = filter_res; 79 prog->tcf_qstats.drops++; 80 break; 81 case TC_ACT_UNSPEC: 82 action = prog->tcf_action; 83 break; 84 default: 85 action = TC_ACT_UNSPEC; 86 break; 87 } 88 89 spin_unlock(&prog->tcf_lock); 90 return action; 91 } 92 93 static bool tcf_bpf_is_ebpf(const struct tcf_bpf *prog) 94 { 95 return !prog->bpf_ops; 96 } 97 98 static int tcf_bpf_dump_bpf_info(const struct tcf_bpf *prog, 99 struct sk_buff *skb) 100 { 101 struct nlattr *nla; 102 103 if (nla_put_u16(skb, TCA_ACT_BPF_OPS_LEN, prog->bpf_num_ops)) 104 return -EMSGSIZE; 105 106 nla = nla_reserve(skb, TCA_ACT_BPF_OPS, prog->bpf_num_ops * 107 sizeof(struct sock_filter)); 108 if (nla == NULL) 109 return -EMSGSIZE; 110 111 memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla)); 112 113 return 0; 114 } 115 116 static int tcf_bpf_dump_ebpf_info(const struct tcf_bpf *prog, 117 struct sk_buff *skb) 118 { 119 if (nla_put_u32(skb, TCA_ACT_BPF_FD, prog->bpf_fd)) 120 return -EMSGSIZE; 121 122 if (prog->bpf_name && 123 nla_put_string(skb, TCA_ACT_BPF_NAME, prog->bpf_name)) 124 return -EMSGSIZE; 125 126 return 0; 127 } 128 129 static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *act, 130 int bind, int ref) 131 { 132 unsigned char *tp = skb_tail_pointer(skb); 133 struct tcf_bpf *prog = act->priv; 134 struct tc_act_bpf opt = { 135 .index = prog->tcf_index, 136 .refcnt = prog->tcf_refcnt - ref, 137 .bindcnt = prog->tcf_bindcnt - bind, 138 .action = prog->tcf_action, 139 }; 140 struct tcf_t tm; 141 int ret; 142 143 if (nla_put(skb, TCA_ACT_BPF_PARMS, sizeof(opt), &opt)) 144 goto nla_put_failure; 145 146 if (tcf_bpf_is_ebpf(prog)) 147 ret = tcf_bpf_dump_ebpf_info(prog, skb); 148 else 149 ret = tcf_bpf_dump_bpf_info(prog, skb); 150 if (ret) 151 goto nla_put_failure; 152 153 tm.install = jiffies_to_clock_t(jiffies - prog->tcf_tm.install); 154 tm.lastuse = jiffies_to_clock_t(jiffies - prog->tcf_tm.lastuse); 155 tm.expires = jiffies_to_clock_t(prog->tcf_tm.expires); 156 157 if (nla_put(skb, TCA_ACT_BPF_TM, sizeof(tm), &tm)) 158 goto nla_put_failure; 159 160 return skb->len; 161 162 nla_put_failure: 163 nlmsg_trim(skb, tp); 164 return -1; 165 } 166 167 static const struct nla_policy act_bpf_policy[TCA_ACT_BPF_MAX + 1] = { 168 [TCA_ACT_BPF_PARMS] = { .len = sizeof(struct tc_act_bpf) }, 169 [TCA_ACT_BPF_FD] = { .type = NLA_U32 }, 170 [TCA_ACT_BPF_NAME] = { .type = NLA_NUL_STRING, .len = ACT_BPF_NAME_LEN }, 171 [TCA_ACT_BPF_OPS_LEN] = { .type = NLA_U16 }, 172 [TCA_ACT_BPF_OPS] = { .type = NLA_BINARY, 173 .len = sizeof(struct sock_filter) * BPF_MAXINSNS }, 174 }; 175 176 static int tcf_bpf_init_from_ops(struct nlattr **tb, struct tcf_bpf_cfg *cfg) 177 { 178 struct sock_filter *bpf_ops; 179 struct sock_fprog_kern fprog_tmp; 180 struct bpf_prog *fp; 181 u16 bpf_size, bpf_num_ops; 182 int ret; 183 184 bpf_num_ops = nla_get_u16(tb[TCA_ACT_BPF_OPS_LEN]); 185 if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0) 186 return -EINVAL; 187 188 bpf_size = bpf_num_ops * sizeof(*bpf_ops); 189 if (bpf_size != nla_len(tb[TCA_ACT_BPF_OPS])) 190 return -EINVAL; 191 192 bpf_ops = kzalloc(bpf_size, GFP_KERNEL); 193 if (bpf_ops == NULL) 194 return -ENOMEM; 195 196 memcpy(bpf_ops, nla_data(tb[TCA_ACT_BPF_OPS]), bpf_size); 197 198 fprog_tmp.len = bpf_num_ops; 199 fprog_tmp.filter = bpf_ops; 200 201 ret = bpf_prog_create(&fp, &fprog_tmp); 202 if (ret < 0) { 203 kfree(bpf_ops); 204 return ret; 205 } 206 207 cfg->bpf_ops = bpf_ops; 208 cfg->bpf_num_ops = bpf_num_ops; 209 cfg->filter = fp; 210 211 return 0; 212 } 213 214 static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg) 215 { 216 struct bpf_prog *fp; 217 char *name = NULL; 218 u32 bpf_fd; 219 220 bpf_fd = nla_get_u32(tb[TCA_ACT_BPF_FD]); 221 222 fp = bpf_prog_get(bpf_fd); 223 if (IS_ERR(fp)) 224 return PTR_ERR(fp); 225 226 if (fp->type != BPF_PROG_TYPE_SCHED_ACT) { 227 bpf_prog_put(fp); 228 return -EINVAL; 229 } 230 231 if (tb[TCA_ACT_BPF_NAME]) { 232 name = kmemdup(nla_data(tb[TCA_ACT_BPF_NAME]), 233 nla_len(tb[TCA_ACT_BPF_NAME]), 234 GFP_KERNEL); 235 if (!name) { 236 bpf_prog_put(fp); 237 return -ENOMEM; 238 } 239 } 240 241 cfg->bpf_fd = bpf_fd; 242 cfg->bpf_name = name; 243 cfg->filter = fp; 244 245 return 0; 246 } 247 248 static int tcf_bpf_init(struct net *net, struct nlattr *nla, 249 struct nlattr *est, struct tc_action *act, 250 int replace, int bind) 251 { 252 struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; 253 struct tc_act_bpf *parm; 254 struct tcf_bpf *prog; 255 struct tcf_bpf_cfg cfg; 256 bool is_bpf, is_ebpf; 257 int ret; 258 259 if (!nla) 260 return -EINVAL; 261 262 ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy); 263 if (ret < 0) 264 return ret; 265 266 is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS]; 267 is_ebpf = tb[TCA_ACT_BPF_FD]; 268 269 if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf) || 270 !tb[TCA_ACT_BPF_PARMS]) 271 return -EINVAL; 272 273 parm = nla_data(tb[TCA_ACT_BPF_PARMS]); 274 275 memset(&cfg, 0, sizeof(cfg)); 276 277 ret = is_bpf ? tcf_bpf_init_from_ops(tb, &cfg) : 278 tcf_bpf_init_from_efd(tb, &cfg); 279 if (ret < 0) 280 return ret; 281 282 if (!tcf_hash_check(parm->index, act, bind)) { 283 ret = tcf_hash_create(parm->index, est, act, 284 sizeof(*prog), bind); 285 if (ret < 0) 286 goto destroy_fp; 287 288 ret = ACT_P_CREATED; 289 } else { 290 /* Don't override defaults. */ 291 if (bind) 292 goto destroy_fp; 293 294 tcf_hash_release(act, bind); 295 if (!replace) { 296 ret = -EEXIST; 297 goto destroy_fp; 298 } 299 } 300 301 prog = to_bpf(act); 302 spin_lock_bh(&prog->tcf_lock); 303 304 prog->bpf_ops = cfg.bpf_ops; 305 prog->bpf_name = cfg.bpf_name; 306 307 if (cfg.bpf_num_ops) 308 prog->bpf_num_ops = cfg.bpf_num_ops; 309 if (cfg.bpf_fd) 310 prog->bpf_fd = cfg.bpf_fd; 311 312 prog->tcf_action = parm->action; 313 prog->filter = cfg.filter; 314 315 spin_unlock_bh(&prog->tcf_lock); 316 317 if (ret == ACT_P_CREATED) 318 tcf_hash_insert(act); 319 320 return ret; 321 322 destroy_fp: 323 if (is_ebpf) 324 bpf_prog_put(cfg.filter); 325 else 326 bpf_prog_destroy(cfg.filter); 327 328 kfree(cfg.bpf_ops); 329 kfree(cfg.bpf_name); 330 331 return ret; 332 } 333 334 static void tcf_bpf_cleanup(struct tc_action *act, int bind) 335 { 336 const struct tcf_bpf *prog = act->priv; 337 338 if (tcf_bpf_is_ebpf(prog)) 339 bpf_prog_put(prog->filter); 340 else 341 bpf_prog_destroy(prog->filter); 342 } 343 344 static struct tc_action_ops act_bpf_ops __read_mostly = { 345 .kind = "bpf", 346 .type = TCA_ACT_BPF, 347 .owner = THIS_MODULE, 348 .act = tcf_bpf, 349 .dump = tcf_bpf_dump, 350 .cleanup = tcf_bpf_cleanup, 351 .init = tcf_bpf_init, 352 }; 353 354 static int __init bpf_init_module(void) 355 { 356 return tcf_register_action(&act_bpf_ops, BPF_TAB_MASK); 357 } 358 359 static void __exit bpf_cleanup_module(void) 360 { 361 tcf_unregister_action(&act_bpf_ops); 362 } 363 364 module_init(bpf_init_module); 365 module_exit(bpf_cleanup_module); 366 367 MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>"); 368 MODULE_DESCRIPTION("TC BPF based action"); 369 MODULE_LICENSE("GPL v2"); 370