xref: /linux/net/core/netprio_cgroup.c (revision 26b0d14106954ae46d2f4f7eec3481828a210f7d)
1 /*
2  * net/core/netprio_cgroup.c	Priority Control Group
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  * Authors:	Neil Horman <nhorman@tuxdriver.com>
10  */
11 
12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13 
14 #include <linux/module.h>
15 #include <linux/slab.h>
16 #include <linux/types.h>
17 #include <linux/string.h>
18 #include <linux/errno.h>
19 #include <linux/skbuff.h>
20 #include <linux/cgroup.h>
21 #include <linux/rcupdate.h>
22 #include <linux/atomic.h>
23 #include <net/rtnetlink.h>
24 #include <net/pkt_cls.h>
25 #include <net/sock.h>
26 #include <net/netprio_cgroup.h>
27 
28 #define PRIOIDX_SZ 128
29 
30 static unsigned long prioidx_map[PRIOIDX_SZ];
31 static DEFINE_SPINLOCK(prioidx_map_lock);
32 static atomic_t max_prioidx = ATOMIC_INIT(0);
33 
34 static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgrp)
35 {
36 	return container_of(cgroup_subsys_state(cgrp, net_prio_subsys_id),
37 			    struct cgroup_netprio_state, css);
38 }
39 
40 static int get_prioidx(u32 *prio)
41 {
42 	unsigned long flags;
43 	u32 prioidx;
44 
45 	spin_lock_irqsave(&prioidx_map_lock, flags);
46 	prioidx = find_first_zero_bit(prioidx_map, sizeof(unsigned long) * PRIOIDX_SZ);
47 	if (prioidx == sizeof(unsigned long) * PRIOIDX_SZ) {
48 		spin_unlock_irqrestore(&prioidx_map_lock, flags);
49 		return -ENOSPC;
50 	}
51 	set_bit(prioidx, prioidx_map);
52 	spin_unlock_irqrestore(&prioidx_map_lock, flags);
53 	atomic_set(&max_prioidx, prioidx);
54 	*prio = prioidx;
55 	return 0;
56 }
57 
58 static void put_prioidx(u32 idx)
59 {
60 	unsigned long flags;
61 
62 	spin_lock_irqsave(&prioidx_map_lock, flags);
63 	clear_bit(idx, prioidx_map);
64 	spin_unlock_irqrestore(&prioidx_map_lock, flags);
65 }
66 
67 static void extend_netdev_table(struct net_device *dev, u32 new_len)
68 {
69 	size_t new_size = sizeof(struct netprio_map) +
70 			   ((sizeof(u32) * new_len));
71 	struct netprio_map *new_priomap = kzalloc(new_size, GFP_KERNEL);
72 	struct netprio_map *old_priomap;
73 	int i;
74 
75 	old_priomap  = rtnl_dereference(dev->priomap);
76 
77 	if (!new_priomap) {
78 		pr_warn("Unable to alloc new priomap!\n");
79 		return;
80 	}
81 
82 	for (i = 0;
83 	     old_priomap && (i < old_priomap->priomap_len);
84 	     i++)
85 		new_priomap->priomap[i] = old_priomap->priomap[i];
86 
87 	new_priomap->priomap_len = new_len;
88 
89 	rcu_assign_pointer(dev->priomap, new_priomap);
90 	if (old_priomap)
91 		kfree_rcu(old_priomap, rcu);
92 }
93 
94 static void update_netdev_tables(void)
95 {
96 	struct net_device *dev;
97 	u32 max_len = atomic_read(&max_prioidx) + 1;
98 	struct netprio_map *map;
99 
100 	rtnl_lock();
101 	for_each_netdev(&init_net, dev) {
102 		map = rtnl_dereference(dev->priomap);
103 		if ((!map) ||
104 		    (map->priomap_len < max_len))
105 			extend_netdev_table(dev, max_len);
106 	}
107 	rtnl_unlock();
108 }
109 
110 static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp)
111 {
112 	struct cgroup_netprio_state *cs;
113 	int ret;
114 
115 	cs = kzalloc(sizeof(*cs), GFP_KERNEL);
116 	if (!cs)
117 		return ERR_PTR(-ENOMEM);
118 
119 	if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx) {
120 		kfree(cs);
121 		return ERR_PTR(-EINVAL);
122 	}
123 
124 	ret = get_prioidx(&cs->prioidx);
125 	if (ret != 0) {
126 		pr_warn("No space in priority index array\n");
127 		kfree(cs);
128 		return ERR_PTR(ret);
129 	}
130 
131 	return &cs->css;
132 }
133 
134 static void cgrp_destroy(struct cgroup *cgrp)
135 {
136 	struct cgroup_netprio_state *cs;
137 	struct net_device *dev;
138 	struct netprio_map *map;
139 
140 	cs = cgrp_netprio_state(cgrp);
141 	rtnl_lock();
142 	for_each_netdev(&init_net, dev) {
143 		map = rtnl_dereference(dev->priomap);
144 		if (map)
145 			map->priomap[cs->prioidx] = 0;
146 	}
147 	rtnl_unlock();
148 	put_prioidx(cs->prioidx);
149 	kfree(cs);
150 }
151 
152 static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft)
153 {
154 	return (u64)cgrp_netprio_state(cgrp)->prioidx;
155 }
156 
157 static int read_priomap(struct cgroup *cont, struct cftype *cft,
158 			struct cgroup_map_cb *cb)
159 {
160 	struct net_device *dev;
161 	u32 prioidx = cgrp_netprio_state(cont)->prioidx;
162 	u32 priority;
163 	struct netprio_map *map;
164 
165 	rcu_read_lock();
166 	for_each_netdev_rcu(&init_net, dev) {
167 		map = rcu_dereference(dev->priomap);
168 		priority = map ? map->priomap[prioidx] : 0;
169 		cb->fill(cb, dev->name, priority);
170 	}
171 	rcu_read_unlock();
172 	return 0;
173 }
174 
175 static int write_priomap(struct cgroup *cgrp, struct cftype *cft,
176 			 const char *buffer)
177 {
178 	char *devname = kstrdup(buffer, GFP_KERNEL);
179 	int ret = -EINVAL;
180 	u32 prioidx = cgrp_netprio_state(cgrp)->prioidx;
181 	unsigned long priority;
182 	char *priostr;
183 	struct net_device *dev;
184 	struct netprio_map *map;
185 
186 	if (!devname)
187 		return -ENOMEM;
188 
189 	/*
190 	 * Minimally sized valid priomap string
191 	 */
192 	if (strlen(devname) < 3)
193 		goto out_free_devname;
194 
195 	priostr = strstr(devname, " ");
196 	if (!priostr)
197 		goto out_free_devname;
198 
199 	/*
200 	 *Separate the devname from the associated priority
201 	 *and advance the priostr poitner to the priority value
202 	 */
203 	*priostr = '\0';
204 	priostr++;
205 
206 	/*
207 	 * If the priostr points to NULL, we're at the end of the passed
208 	 * in string, and its not a valid write
209 	 */
210 	if (*priostr == '\0')
211 		goto out_free_devname;
212 
213 	ret = kstrtoul(priostr, 10, &priority);
214 	if (ret < 0)
215 		goto out_free_devname;
216 
217 	ret = -ENODEV;
218 
219 	dev = dev_get_by_name(&init_net, devname);
220 	if (!dev)
221 		goto out_free_devname;
222 
223 	update_netdev_tables();
224 	ret = 0;
225 	rcu_read_lock();
226 	map = rcu_dereference(dev->priomap);
227 	if (map)
228 		map->priomap[prioidx] = priority;
229 	rcu_read_unlock();
230 	dev_put(dev);
231 
232 out_free_devname:
233 	kfree(devname);
234 	return ret;
235 }
236 
237 static struct cftype ss_files[] = {
238 	{
239 		.name = "prioidx",
240 		.read_u64 = read_prioidx,
241 	},
242 	{
243 		.name = "ifpriomap",
244 		.read_map = read_priomap,
245 		.write_string = write_priomap,
246 	},
247 	{ }	/* terminate */
248 };
249 
250 struct cgroup_subsys net_prio_subsys = {
251 	.name		= "net_prio",
252 	.create		= cgrp_create,
253 	.destroy	= cgrp_destroy,
254 #ifdef CONFIG_NETPRIO_CGROUP
255 	.subsys_id	= net_prio_subsys_id,
256 #endif
257 	.base_cftypes	= ss_files,
258 	.module		= THIS_MODULE
259 };
260 
261 static int netprio_device_event(struct notifier_block *unused,
262 				unsigned long event, void *ptr)
263 {
264 	struct net_device *dev = ptr;
265 	struct netprio_map *old;
266 
267 	/*
268 	 * Note this is called with rtnl_lock held so we have update side
269 	 * protection on our rcu assignments
270 	 */
271 
272 	switch (event) {
273 	case NETDEV_UNREGISTER:
274 		old = rtnl_dereference(dev->priomap);
275 		RCU_INIT_POINTER(dev->priomap, NULL);
276 		if (old)
277 			kfree_rcu(old, rcu);
278 		break;
279 	}
280 	return NOTIFY_DONE;
281 }
282 
283 static struct notifier_block netprio_device_notifier = {
284 	.notifier_call = netprio_device_event
285 };
286 
287 static int __init init_cgroup_netprio(void)
288 {
289 	int ret;
290 
291 	ret = cgroup_load_subsys(&net_prio_subsys);
292 	if (ret)
293 		goto out;
294 #ifndef CONFIG_NETPRIO_CGROUP
295 	smp_wmb();
296 	net_prio_subsys_id = net_prio_subsys.subsys_id;
297 #endif
298 
299 	register_netdevice_notifier(&netprio_device_notifier);
300 
301 out:
302 	return ret;
303 }
304 
305 static void __exit exit_cgroup_netprio(void)
306 {
307 	struct netprio_map *old;
308 	struct net_device *dev;
309 
310 	unregister_netdevice_notifier(&netprio_device_notifier);
311 
312 	cgroup_unload_subsys(&net_prio_subsys);
313 
314 #ifndef CONFIG_NETPRIO_CGROUP
315 	net_prio_subsys_id = -1;
316 	synchronize_rcu();
317 #endif
318 
319 	rtnl_lock();
320 	for_each_netdev(&init_net, dev) {
321 		old = rtnl_dereference(dev->priomap);
322 		RCU_INIT_POINTER(dev->priomap, NULL);
323 		if (old)
324 			kfree_rcu(old, rcu);
325 	}
326 	rtnl_unlock();
327 }
328 
329 module_init(init_cgroup_netprio);
330 module_exit(exit_cgroup_netprio);
331 MODULE_LICENSE("GPL v2");
332