xref: /linux/net/sched/act_bpf.c (revision 9d796e66230205cd3366f5660387bd9ecca9d336)
1 /*
2  * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  */
9 
10 #include <linux/module.h>
11 #include <linux/init.h>
12 #include <linux/kernel.h>
13 #include <linux/skbuff.h>
14 #include <linux/rtnetlink.h>
15 #include <linux/filter.h>
16 #include <linux/bpf.h>
17 
18 #include <net/netlink.h>
19 #include <net/pkt_sched.h>
20 
21 #include <linux/tc_act/tc_bpf.h>
22 #include <net/tc_act/tc_bpf.h>
23 
24 #define BPF_TAB_MASK		15
25 #define ACT_BPF_NAME_LEN	256
26 
27 struct tcf_bpf_cfg {
28 	struct bpf_prog *filter;
29 	struct sock_filter *bpf_ops;
30 	char *bpf_name;
31 	u32 bpf_fd;
32 	u16 bpf_num_ops;
33 };
34 
35 static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
36 		   struct tcf_result *res)
37 {
38 	struct tcf_bpf *prog = act->priv;
39 	int action, filter_res;
40 
41 	spin_lock(&prog->tcf_lock);
42 
43 	prog->tcf_tm.lastuse = jiffies;
44 	bstats_update(&prog->tcf_bstats, skb);
45 
46 	/* Needed here for accessing maps. */
47 	rcu_read_lock();
48 	filter_res = BPF_PROG_RUN(prog->filter, skb);
49 	rcu_read_unlock();
50 
51 	/* A BPF program may overwrite the default action opcode.
52 	 * Similarly as in cls_bpf, if filter_res == -1 we use the
53 	 * default action specified from tc.
54 	 *
55 	 * In case a different well-known TC_ACT opcode has been
56 	 * returned, it will overwrite the default one.
57 	 *
58 	 * For everything else that is unkown, TC_ACT_UNSPEC is
59 	 * returned.
60 	 */
61 	switch (filter_res) {
62 	case TC_ACT_PIPE:
63 	case TC_ACT_RECLASSIFY:
64 	case TC_ACT_OK:
65 		action = filter_res;
66 		break;
67 	case TC_ACT_SHOT:
68 		action = filter_res;
69 		prog->tcf_qstats.drops++;
70 		break;
71 	case TC_ACT_UNSPEC:
72 		action = prog->tcf_action;
73 		break;
74 	default:
75 		action = TC_ACT_UNSPEC;
76 		break;
77 	}
78 
79 	spin_unlock(&prog->tcf_lock);
80 	return action;
81 }
82 
83 static bool tcf_bpf_is_ebpf(const struct tcf_bpf *prog)
84 {
85 	return !prog->bpf_ops;
86 }
87 
88 static int tcf_bpf_dump_bpf_info(const struct tcf_bpf *prog,
89 				 struct sk_buff *skb)
90 {
91 	struct nlattr *nla;
92 
93 	if (nla_put_u16(skb, TCA_ACT_BPF_OPS_LEN, prog->bpf_num_ops))
94 		return -EMSGSIZE;
95 
96 	nla = nla_reserve(skb, TCA_ACT_BPF_OPS, prog->bpf_num_ops *
97 			  sizeof(struct sock_filter));
98 	if (nla == NULL)
99 		return -EMSGSIZE;
100 
101 	memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla));
102 
103 	return 0;
104 }
105 
106 static int tcf_bpf_dump_ebpf_info(const struct tcf_bpf *prog,
107 				  struct sk_buff *skb)
108 {
109 	if (nla_put_u32(skb, TCA_ACT_BPF_FD, prog->bpf_fd))
110 		return -EMSGSIZE;
111 
112 	if (prog->bpf_name &&
113 	    nla_put_string(skb, TCA_ACT_BPF_NAME, prog->bpf_name))
114 		return -EMSGSIZE;
115 
116 	return 0;
117 }
118 
119 static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *act,
120 			int bind, int ref)
121 {
122 	unsigned char *tp = skb_tail_pointer(skb);
123 	struct tcf_bpf *prog = act->priv;
124 	struct tc_act_bpf opt = {
125 		.index   = prog->tcf_index,
126 		.refcnt  = prog->tcf_refcnt - ref,
127 		.bindcnt = prog->tcf_bindcnt - bind,
128 		.action  = prog->tcf_action,
129 	};
130 	struct tcf_t tm;
131 	int ret;
132 
133 	if (nla_put(skb, TCA_ACT_BPF_PARMS, sizeof(opt), &opt))
134 		goto nla_put_failure;
135 
136 	if (tcf_bpf_is_ebpf(prog))
137 		ret = tcf_bpf_dump_ebpf_info(prog, skb);
138 	else
139 		ret = tcf_bpf_dump_bpf_info(prog, skb);
140 	if (ret)
141 		goto nla_put_failure;
142 
143 	tm.install = jiffies_to_clock_t(jiffies - prog->tcf_tm.install);
144 	tm.lastuse = jiffies_to_clock_t(jiffies - prog->tcf_tm.lastuse);
145 	tm.expires = jiffies_to_clock_t(prog->tcf_tm.expires);
146 
147 	if (nla_put(skb, TCA_ACT_BPF_TM, sizeof(tm), &tm))
148 		goto nla_put_failure;
149 
150 	return skb->len;
151 
152 nla_put_failure:
153 	nlmsg_trim(skb, tp);
154 	return -1;
155 }
156 
157 static const struct nla_policy act_bpf_policy[TCA_ACT_BPF_MAX + 1] = {
158 	[TCA_ACT_BPF_PARMS]	= { .len = sizeof(struct tc_act_bpf) },
159 	[TCA_ACT_BPF_FD]	= { .type = NLA_U32 },
160 	[TCA_ACT_BPF_NAME]	= { .type = NLA_NUL_STRING, .len = ACT_BPF_NAME_LEN },
161 	[TCA_ACT_BPF_OPS_LEN]	= { .type = NLA_U16 },
162 	[TCA_ACT_BPF_OPS]	= { .type = NLA_BINARY,
163 				    .len = sizeof(struct sock_filter) * BPF_MAXINSNS },
164 };
165 
166 static int tcf_bpf_init_from_ops(struct nlattr **tb, struct tcf_bpf_cfg *cfg)
167 {
168 	struct sock_filter *bpf_ops;
169 	struct sock_fprog_kern fprog_tmp;
170 	struct bpf_prog *fp;
171 	u16 bpf_size, bpf_num_ops;
172 	int ret;
173 
174 	bpf_num_ops = nla_get_u16(tb[TCA_ACT_BPF_OPS_LEN]);
175 	if (bpf_num_ops	> BPF_MAXINSNS || bpf_num_ops == 0)
176 		return -EINVAL;
177 
178 	bpf_size = bpf_num_ops * sizeof(*bpf_ops);
179 	if (bpf_size != nla_len(tb[TCA_ACT_BPF_OPS]))
180 		return -EINVAL;
181 
182 	bpf_ops = kzalloc(bpf_size, GFP_KERNEL);
183 	if (bpf_ops == NULL)
184 		return -ENOMEM;
185 
186 	memcpy(bpf_ops, nla_data(tb[TCA_ACT_BPF_OPS]), bpf_size);
187 
188 	fprog_tmp.len = bpf_num_ops;
189 	fprog_tmp.filter = bpf_ops;
190 
191 	ret = bpf_prog_create(&fp, &fprog_tmp);
192 	if (ret < 0) {
193 		kfree(bpf_ops);
194 		return ret;
195 	}
196 
197 	cfg->bpf_ops = bpf_ops;
198 	cfg->bpf_num_ops = bpf_num_ops;
199 	cfg->filter = fp;
200 
201 	return 0;
202 }
203 
204 static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg)
205 {
206 	struct bpf_prog *fp;
207 	char *name = NULL;
208 	u32 bpf_fd;
209 
210 	bpf_fd = nla_get_u32(tb[TCA_ACT_BPF_FD]);
211 
212 	fp = bpf_prog_get(bpf_fd);
213 	if (IS_ERR(fp))
214 		return PTR_ERR(fp);
215 
216 	if (fp->type != BPF_PROG_TYPE_SCHED_ACT) {
217 		bpf_prog_put(fp);
218 		return -EINVAL;
219 	}
220 
221 	if (tb[TCA_ACT_BPF_NAME]) {
222 		name = kmemdup(nla_data(tb[TCA_ACT_BPF_NAME]),
223 			       nla_len(tb[TCA_ACT_BPF_NAME]),
224 			       GFP_KERNEL);
225 		if (!name) {
226 			bpf_prog_put(fp);
227 			return -ENOMEM;
228 		}
229 	}
230 
231 	cfg->bpf_fd = bpf_fd;
232 	cfg->bpf_name = name;
233 	cfg->filter = fp;
234 
235 	return 0;
236 }
237 
238 static int tcf_bpf_init(struct net *net, struct nlattr *nla,
239 			struct nlattr *est, struct tc_action *act,
240 			int replace, int bind)
241 {
242 	struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
243 	struct tc_act_bpf *parm;
244 	struct tcf_bpf *prog;
245 	struct tcf_bpf_cfg cfg;
246 	bool is_bpf, is_ebpf;
247 	int ret;
248 
249 	if (!nla)
250 		return -EINVAL;
251 
252 	ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy);
253 	if (ret < 0)
254 		return ret;
255 
256 	is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS];
257 	is_ebpf = tb[TCA_ACT_BPF_FD];
258 
259 	if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf) ||
260 	    !tb[TCA_ACT_BPF_PARMS])
261 		return -EINVAL;
262 
263 	parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
264 
265 	memset(&cfg, 0, sizeof(cfg));
266 
267 	ret = is_bpf ? tcf_bpf_init_from_ops(tb, &cfg) :
268 		       tcf_bpf_init_from_efd(tb, &cfg);
269 	if (ret < 0)
270 		return ret;
271 
272 	if (!tcf_hash_check(parm->index, act, bind)) {
273 		ret = tcf_hash_create(parm->index, est, act,
274 				      sizeof(*prog), bind);
275 		if (ret < 0)
276 			goto destroy_fp;
277 
278 		ret = ACT_P_CREATED;
279 	} else {
280 		/* Don't override defaults. */
281 		if (bind)
282 			goto destroy_fp;
283 
284 		tcf_hash_release(act, bind);
285 		if (!replace) {
286 			ret = -EEXIST;
287 			goto destroy_fp;
288 		}
289 	}
290 
291 	prog = to_bpf(act);
292 	spin_lock_bh(&prog->tcf_lock);
293 
294 	prog->bpf_ops = cfg.bpf_ops;
295 	prog->bpf_name = cfg.bpf_name;
296 
297 	if (cfg.bpf_num_ops)
298 		prog->bpf_num_ops = cfg.bpf_num_ops;
299 	if (cfg.bpf_fd)
300 		prog->bpf_fd = cfg.bpf_fd;
301 
302 	prog->tcf_action = parm->action;
303 	prog->filter = cfg.filter;
304 
305 	spin_unlock_bh(&prog->tcf_lock);
306 
307 	if (ret == ACT_P_CREATED)
308 		tcf_hash_insert(act);
309 
310 	return ret;
311 
312 destroy_fp:
313 	if (is_ebpf)
314 		bpf_prog_put(cfg.filter);
315 	else
316 		bpf_prog_destroy(cfg.filter);
317 
318 	kfree(cfg.bpf_ops);
319 	kfree(cfg.bpf_name);
320 
321 	return ret;
322 }
323 
324 static void tcf_bpf_cleanup(struct tc_action *act, int bind)
325 {
326 	const struct tcf_bpf *prog = act->priv;
327 
328 	if (tcf_bpf_is_ebpf(prog))
329 		bpf_prog_put(prog->filter);
330 	else
331 		bpf_prog_destroy(prog->filter);
332 }
333 
334 static struct tc_action_ops act_bpf_ops __read_mostly = {
335 	.kind		=	"bpf",
336 	.type		=	TCA_ACT_BPF,
337 	.owner		=	THIS_MODULE,
338 	.act		=	tcf_bpf,
339 	.dump		=	tcf_bpf_dump,
340 	.cleanup	=	tcf_bpf_cleanup,
341 	.init		=	tcf_bpf_init,
342 };
343 
344 static int __init bpf_init_module(void)
345 {
346 	return tcf_register_action(&act_bpf_ops, BPF_TAB_MASK);
347 }
348 
349 static void __exit bpf_cleanup_module(void)
350 {
351 	tcf_unregister_action(&act_bpf_ops);
352 }
353 
354 module_init(bpf_init_module);
355 module_exit(bpf_cleanup_module);
356 
357 MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>");
358 MODULE_DESCRIPTION("TC BPF based action");
359 MODULE_LICENSE("GPL v2");
360