xref: /linux/fs/proc/proc_sysctl.c (revision 9ffc93f203c18a70623f21950f1dd473c9ec48cd)
1 /*
2  * /proc/sys support
3  */
4 #include <linux/init.h>
5 #include <linux/sysctl.h>
6 #include <linux/poll.h>
7 #include <linux/proc_fs.h>
8 #include <linux/security.h>
9 #include <linux/sched.h>
10 #include <linux/namei.h>
11 #include <linux/mm.h>
12 #include "internal.h"
13 
14 static const struct dentry_operations proc_sys_dentry_operations;
15 static const struct file_operations proc_sys_file_operations;
16 static const struct inode_operations proc_sys_inode_operations;
17 static const struct file_operations proc_sys_dir_file_operations;
18 static const struct inode_operations proc_sys_dir_operations;
19 
20 void proc_sys_poll_notify(struct ctl_table_poll *poll)
21 {
22 	if (!poll)
23 		return;
24 
25 	atomic_inc(&poll->event);
26 	wake_up_interruptible(&poll->wait);
27 }
28 
29 static struct inode *proc_sys_make_inode(struct super_block *sb,
30 		struct ctl_table_header *head, struct ctl_table *table)
31 {
32 	struct inode *inode;
33 	struct proc_inode *ei;
34 
35 	inode = new_inode(sb);
36 	if (!inode)
37 		goto out;
38 
39 	inode->i_ino = get_next_ino();
40 
41 	sysctl_head_get(head);
42 	ei = PROC_I(inode);
43 	ei->sysctl = head;
44 	ei->sysctl_entry = table;
45 
46 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
47 	inode->i_mode = table->mode;
48 	if (!table->child) {
49 		inode->i_mode |= S_IFREG;
50 		inode->i_op = &proc_sys_inode_operations;
51 		inode->i_fop = &proc_sys_file_operations;
52 	} else {
53 		inode->i_mode |= S_IFDIR;
54 		clear_nlink(inode);
55 		inode->i_op = &proc_sys_dir_operations;
56 		inode->i_fop = &proc_sys_dir_file_operations;
57 	}
58 out:
59 	return inode;
60 }
61 
62 static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name)
63 {
64 	int len;
65 	for ( ; p->procname; p++) {
66 
67 		if (!p->procname)
68 			continue;
69 
70 		len = strlen(p->procname);
71 		if (len != name->len)
72 			continue;
73 
74 		if (memcmp(p->procname, name->name, len) != 0)
75 			continue;
76 
77 		/* I have a match */
78 		return p;
79 	}
80 	return NULL;
81 }
82 
83 static struct ctl_table_header *grab_header(struct inode *inode)
84 {
85 	if (PROC_I(inode)->sysctl)
86 		return sysctl_head_grab(PROC_I(inode)->sysctl);
87 	else
88 		return sysctl_head_next(NULL);
89 }
90 
91 static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
92 					struct nameidata *nd)
93 {
94 	struct ctl_table_header *head = grab_header(dir);
95 	struct ctl_table *table = PROC_I(dir)->sysctl_entry;
96 	struct ctl_table_header *h = NULL;
97 	struct qstr *name = &dentry->d_name;
98 	struct ctl_table *p;
99 	struct inode *inode;
100 	struct dentry *err = ERR_PTR(-ENOENT);
101 
102 	if (IS_ERR(head))
103 		return ERR_CAST(head);
104 
105 	if (table && !table->child) {
106 		WARN_ON(1);
107 		goto out;
108 	}
109 
110 	table = table ? table->child : head->ctl_table;
111 
112 	p = find_in_table(table, name);
113 	if (!p) {
114 		for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) {
115 			if (h->attached_to != table)
116 				continue;
117 			p = find_in_table(h->attached_by, name);
118 			if (p)
119 				break;
120 		}
121 	}
122 
123 	if (!p)
124 		goto out;
125 
126 	err = ERR_PTR(-ENOMEM);
127 	inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p);
128 	if (h)
129 		sysctl_head_finish(h);
130 
131 	if (!inode)
132 		goto out;
133 
134 	err = NULL;
135 	d_set_d_op(dentry, &proc_sys_dentry_operations);
136 	d_add(dentry, inode);
137 
138 out:
139 	sysctl_head_finish(head);
140 	return err;
141 }
142 
143 static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
144 		size_t count, loff_t *ppos, int write)
145 {
146 	struct inode *inode = filp->f_path.dentry->d_inode;
147 	struct ctl_table_header *head = grab_header(inode);
148 	struct ctl_table *table = PROC_I(inode)->sysctl_entry;
149 	ssize_t error;
150 	size_t res;
151 
152 	if (IS_ERR(head))
153 		return PTR_ERR(head);
154 
155 	/*
156 	 * At this point we know that the sysctl was not unregistered
157 	 * and won't be until we finish.
158 	 */
159 	error = -EPERM;
160 	if (sysctl_perm(head->root, table, write ? MAY_WRITE : MAY_READ))
161 		goto out;
162 
163 	/* if that can happen at all, it should be -EINVAL, not -EISDIR */
164 	error = -EINVAL;
165 	if (!table->proc_handler)
166 		goto out;
167 
168 	/* careful: calling conventions are nasty here */
169 	res = count;
170 	error = table->proc_handler(table, write, buf, &res, ppos);
171 	if (!error)
172 		error = res;
173 out:
174 	sysctl_head_finish(head);
175 
176 	return error;
177 }
178 
179 static ssize_t proc_sys_read(struct file *filp, char __user *buf,
180 				size_t count, loff_t *ppos)
181 {
182 	return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 0);
183 }
184 
185 static ssize_t proc_sys_write(struct file *filp, const char __user *buf,
186 				size_t count, loff_t *ppos)
187 {
188 	return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 1);
189 }
190 
191 static int proc_sys_open(struct inode *inode, struct file *filp)
192 {
193 	struct ctl_table *table = PROC_I(inode)->sysctl_entry;
194 
195 	if (table->poll)
196 		filp->private_data = proc_sys_poll_event(table->poll);
197 
198 	return 0;
199 }
200 
201 static unsigned int proc_sys_poll(struct file *filp, poll_table *wait)
202 {
203 	struct inode *inode = filp->f_path.dentry->d_inode;
204 	struct ctl_table *table = PROC_I(inode)->sysctl_entry;
205 	unsigned long event = (unsigned long)filp->private_data;
206 	unsigned int ret = DEFAULT_POLLMASK;
207 
208 	if (!table->proc_handler)
209 		goto out;
210 
211 	if (!table->poll)
212 		goto out;
213 
214 	poll_wait(filp, &table->poll->wait, wait);
215 
216 	if (event != atomic_read(&table->poll->event)) {
217 		filp->private_data = proc_sys_poll_event(table->poll);
218 		ret = POLLIN | POLLRDNORM | POLLERR | POLLPRI;
219 	}
220 
221 out:
222 	return ret;
223 }
224 
225 static int proc_sys_fill_cache(struct file *filp, void *dirent,
226 				filldir_t filldir,
227 				struct ctl_table_header *head,
228 				struct ctl_table *table)
229 {
230 	struct dentry *child, *dir = filp->f_path.dentry;
231 	struct inode *inode;
232 	struct qstr qname;
233 	ino_t ino = 0;
234 	unsigned type = DT_UNKNOWN;
235 
236 	qname.name = table->procname;
237 	qname.len  = strlen(table->procname);
238 	qname.hash = full_name_hash(qname.name, qname.len);
239 
240 	child = d_lookup(dir, &qname);
241 	if (!child) {
242 		child = d_alloc(dir, &qname);
243 		if (child) {
244 			inode = proc_sys_make_inode(dir->d_sb, head, table);
245 			if (!inode) {
246 				dput(child);
247 				return -ENOMEM;
248 			} else {
249 				d_set_d_op(child, &proc_sys_dentry_operations);
250 				d_add(child, inode);
251 			}
252 		} else {
253 			return -ENOMEM;
254 		}
255 	}
256 	inode = child->d_inode;
257 	ino  = inode->i_ino;
258 	type = inode->i_mode >> 12;
259 	dput(child);
260 	return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type);
261 }
262 
263 static int scan(struct ctl_table_header *head, ctl_table *table,
264 		unsigned long *pos, struct file *file,
265 		void *dirent, filldir_t filldir)
266 {
267 
268 	for (; table->procname; table++, (*pos)++) {
269 		int res;
270 
271 		/* Can't do anything without a proc name */
272 		if (!table->procname)
273 			continue;
274 
275 		if (*pos < file->f_pos)
276 			continue;
277 
278 		res = proc_sys_fill_cache(file, dirent, filldir, head, table);
279 		if (res)
280 			return res;
281 
282 		file->f_pos = *pos + 1;
283 	}
284 	return 0;
285 }
286 
287 static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
288 {
289 	struct dentry *dentry = filp->f_path.dentry;
290 	struct inode *inode = dentry->d_inode;
291 	struct ctl_table_header *head = grab_header(inode);
292 	struct ctl_table *table = PROC_I(inode)->sysctl_entry;
293 	struct ctl_table_header *h = NULL;
294 	unsigned long pos;
295 	int ret = -EINVAL;
296 
297 	if (IS_ERR(head))
298 		return PTR_ERR(head);
299 
300 	if (table && !table->child) {
301 		WARN_ON(1);
302 		goto out;
303 	}
304 
305 	table = table ? table->child : head->ctl_table;
306 
307 	ret = 0;
308 	/* Avoid a switch here: arm builds fail with missing __cmpdi2 */
309 	if (filp->f_pos == 0) {
310 		if (filldir(dirent, ".", 1, filp->f_pos,
311 				inode->i_ino, DT_DIR) < 0)
312 			goto out;
313 		filp->f_pos++;
314 	}
315 	if (filp->f_pos == 1) {
316 		if (filldir(dirent, "..", 2, filp->f_pos,
317 				parent_ino(dentry), DT_DIR) < 0)
318 			goto out;
319 		filp->f_pos++;
320 	}
321 	pos = 2;
322 
323 	ret = scan(head, table, &pos, filp, dirent, filldir);
324 	if (ret)
325 		goto out;
326 
327 	for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) {
328 		if (h->attached_to != table)
329 			continue;
330 		ret = scan(h, h->attached_by, &pos, filp, dirent, filldir);
331 		if (ret) {
332 			sysctl_head_finish(h);
333 			break;
334 		}
335 	}
336 	ret = 1;
337 out:
338 	sysctl_head_finish(head);
339 	return ret;
340 }
341 
342 static int proc_sys_permission(struct inode *inode, int mask)
343 {
344 	/*
345 	 * sysctl entries that are not writeable,
346 	 * are _NOT_ writeable, capabilities or not.
347 	 */
348 	struct ctl_table_header *head;
349 	struct ctl_table *table;
350 	int error;
351 
352 	/* Executable files are not allowed under /proc/sys/ */
353 	if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))
354 		return -EACCES;
355 
356 	head = grab_header(inode);
357 	if (IS_ERR(head))
358 		return PTR_ERR(head);
359 
360 	table = PROC_I(inode)->sysctl_entry;
361 	if (!table) /* global root - r-xr-xr-x */
362 		error = mask & MAY_WRITE ? -EACCES : 0;
363 	else /* Use the permissions on the sysctl table entry */
364 		error = sysctl_perm(head->root, table, mask & ~MAY_NOT_BLOCK);
365 
366 	sysctl_head_finish(head);
367 	return error;
368 }
369 
370 static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
371 {
372 	struct inode *inode = dentry->d_inode;
373 	int error;
374 
375 	if (attr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID))
376 		return -EPERM;
377 
378 	error = inode_change_ok(inode, attr);
379 	if (error)
380 		return error;
381 
382 	if ((attr->ia_valid & ATTR_SIZE) &&
383 	    attr->ia_size != i_size_read(inode)) {
384 		error = vmtruncate(inode, attr->ia_size);
385 		if (error)
386 			return error;
387 	}
388 
389 	setattr_copy(inode, attr);
390 	mark_inode_dirty(inode);
391 	return 0;
392 }
393 
394 static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
395 {
396 	struct inode *inode = dentry->d_inode;
397 	struct ctl_table_header *head = grab_header(inode);
398 	struct ctl_table *table = PROC_I(inode)->sysctl_entry;
399 
400 	if (IS_ERR(head))
401 		return PTR_ERR(head);
402 
403 	generic_fillattr(inode, stat);
404 	if (table)
405 		stat->mode = (stat->mode & S_IFMT) | table->mode;
406 
407 	sysctl_head_finish(head);
408 	return 0;
409 }
410 
411 static const struct file_operations proc_sys_file_operations = {
412 	.open		= proc_sys_open,
413 	.poll		= proc_sys_poll,
414 	.read		= proc_sys_read,
415 	.write		= proc_sys_write,
416 	.llseek		= default_llseek,
417 };
418 
419 static const struct file_operations proc_sys_dir_file_operations = {
420 	.read		= generic_read_dir,
421 	.readdir	= proc_sys_readdir,
422 	.llseek		= generic_file_llseek,
423 };
424 
425 static const struct inode_operations proc_sys_inode_operations = {
426 	.permission	= proc_sys_permission,
427 	.setattr	= proc_sys_setattr,
428 	.getattr	= proc_sys_getattr,
429 };
430 
431 static const struct inode_operations proc_sys_dir_operations = {
432 	.lookup		= proc_sys_lookup,
433 	.permission	= proc_sys_permission,
434 	.setattr	= proc_sys_setattr,
435 	.getattr	= proc_sys_getattr,
436 };
437 
438 static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd)
439 {
440 	if (nd->flags & LOOKUP_RCU)
441 		return -ECHILD;
442 	return !PROC_I(dentry->d_inode)->sysctl->unregistering;
443 }
444 
445 static int proc_sys_delete(const struct dentry *dentry)
446 {
447 	return !!PROC_I(dentry->d_inode)->sysctl->unregistering;
448 }
449 
450 static int proc_sys_compare(const struct dentry *parent,
451 		const struct inode *pinode,
452 		const struct dentry *dentry, const struct inode *inode,
453 		unsigned int len, const char *str, const struct qstr *name)
454 {
455 	struct ctl_table_header *head;
456 	/* Although proc doesn't have negative dentries, rcu-walk means
457 	 * that inode here can be NULL */
458 	/* AV: can it, indeed? */
459 	if (!inode)
460 		return 1;
461 	if (name->len != len)
462 		return 1;
463 	if (memcmp(name->name, str, len))
464 		return 1;
465 	head = rcu_dereference(PROC_I(inode)->sysctl);
466 	return !head || !sysctl_is_seen(head);
467 }
468 
469 static const struct dentry_operations proc_sys_dentry_operations = {
470 	.d_revalidate	= proc_sys_revalidate,
471 	.d_delete	= proc_sys_delete,
472 	.d_compare	= proc_sys_compare,
473 };
474 
475 int __init proc_sys_init(void)
476 {
477 	struct proc_dir_entry *proc_sys_root;
478 
479 	proc_sys_root = proc_mkdir("sys", NULL);
480 	proc_sys_root->proc_iops = &proc_sys_dir_operations;
481 	proc_sys_root->proc_fops = &proc_sys_dir_file_operations;
482 	proc_sys_root->nlink = 0;
483 	return 0;
484 }
485