xref: /linux/fs/open.c (revision de2fe5e07d58424bc286fff3fd3c1b0bf933cd58)
1 /*
2  *  linux/fs/open.c
3  *
4  *  Copyright (C) 1991, 1992  Linus Torvalds
5  */
6 
7 #include <linux/string.h>
8 #include <linux/mm.h>
9 #include <linux/utime.h>
10 #include <linux/file.h>
11 #include <linux/smp_lock.h>
12 #include <linux/quotaops.h>
13 #include <linux/fsnotify.h>
14 #include <linux/module.h>
15 #include <linux/slab.h>
16 #include <linux/tty.h>
17 #include <linux/namei.h>
18 #include <linux/backing-dev.h>
19 #include <linux/capability.h>
20 #include <linux/security.h>
21 #include <linux/mount.h>
22 #include <linux/vfs.h>
23 #include <linux/fcntl.h>
24 #include <asm/uaccess.h>
25 #include <linux/fs.h>
26 #include <linux/personality.h>
27 #include <linux/pagemap.h>
28 #include <linux/syscalls.h>
29 #include <linux/rcupdate.h>
30 #include <linux/audit.h>
31 
32 #include <asm/unistd.h>
33 
34 int vfs_statfs(struct super_block *sb, struct kstatfs *buf)
35 {
36 	int retval = -ENODEV;
37 
38 	if (sb) {
39 		retval = -ENOSYS;
40 		if (sb->s_op->statfs) {
41 			memset(buf, 0, sizeof(*buf));
42 			retval = security_sb_statfs(sb);
43 			if (retval)
44 				return retval;
45 			retval = sb->s_op->statfs(sb, buf);
46 			if (retval == 0 && buf->f_frsize == 0)
47 				buf->f_frsize = buf->f_bsize;
48 		}
49 	}
50 	return retval;
51 }
52 
53 EXPORT_SYMBOL(vfs_statfs);
54 
55 static int vfs_statfs_native(struct super_block *sb, struct statfs *buf)
56 {
57 	struct kstatfs st;
58 	int retval;
59 
60 	retval = vfs_statfs(sb, &st);
61 	if (retval)
62 		return retval;
63 
64 	if (sizeof(*buf) == sizeof(st))
65 		memcpy(buf, &st, sizeof(st));
66 	else {
67 		if (sizeof buf->f_blocks == 4) {
68 			if ((st.f_blocks | st.f_bfree | st.f_bavail) &
69 			    0xffffffff00000000ULL)
70 				return -EOVERFLOW;
71 			/*
72 			 * f_files and f_ffree may be -1; it's okay to stuff
73 			 * that into 32 bits
74 			 */
75 			if (st.f_files != -1 &&
76 			    (st.f_files & 0xffffffff00000000ULL))
77 				return -EOVERFLOW;
78 			if (st.f_ffree != -1 &&
79 			    (st.f_ffree & 0xffffffff00000000ULL))
80 				return -EOVERFLOW;
81 		}
82 
83 		buf->f_type = st.f_type;
84 		buf->f_bsize = st.f_bsize;
85 		buf->f_blocks = st.f_blocks;
86 		buf->f_bfree = st.f_bfree;
87 		buf->f_bavail = st.f_bavail;
88 		buf->f_files = st.f_files;
89 		buf->f_ffree = st.f_ffree;
90 		buf->f_fsid = st.f_fsid;
91 		buf->f_namelen = st.f_namelen;
92 		buf->f_frsize = st.f_frsize;
93 		memset(buf->f_spare, 0, sizeof(buf->f_spare));
94 	}
95 	return 0;
96 }
97 
98 static int vfs_statfs64(struct super_block *sb, struct statfs64 *buf)
99 {
100 	struct kstatfs st;
101 	int retval;
102 
103 	retval = vfs_statfs(sb, &st);
104 	if (retval)
105 		return retval;
106 
107 	if (sizeof(*buf) == sizeof(st))
108 		memcpy(buf, &st, sizeof(st));
109 	else {
110 		buf->f_type = st.f_type;
111 		buf->f_bsize = st.f_bsize;
112 		buf->f_blocks = st.f_blocks;
113 		buf->f_bfree = st.f_bfree;
114 		buf->f_bavail = st.f_bavail;
115 		buf->f_files = st.f_files;
116 		buf->f_ffree = st.f_ffree;
117 		buf->f_fsid = st.f_fsid;
118 		buf->f_namelen = st.f_namelen;
119 		buf->f_frsize = st.f_frsize;
120 		memset(buf->f_spare, 0, sizeof(buf->f_spare));
121 	}
122 	return 0;
123 }
124 
125 asmlinkage long sys_statfs(const char __user * path, struct statfs __user * buf)
126 {
127 	struct nameidata nd;
128 	int error;
129 
130 	error = user_path_walk(path, &nd);
131 	if (!error) {
132 		struct statfs tmp;
133 		error = vfs_statfs_native(nd.dentry->d_inode->i_sb, &tmp);
134 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
135 			error = -EFAULT;
136 		path_release(&nd);
137 	}
138 	return error;
139 }
140 
141 
142 asmlinkage long sys_statfs64(const char __user *path, size_t sz, struct statfs64 __user *buf)
143 {
144 	struct nameidata nd;
145 	long error;
146 
147 	if (sz != sizeof(*buf))
148 		return -EINVAL;
149 	error = user_path_walk(path, &nd);
150 	if (!error) {
151 		struct statfs64 tmp;
152 		error = vfs_statfs64(nd.dentry->d_inode->i_sb, &tmp);
153 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
154 			error = -EFAULT;
155 		path_release(&nd);
156 	}
157 	return error;
158 }
159 
160 
161 asmlinkage long sys_fstatfs(unsigned int fd, struct statfs __user * buf)
162 {
163 	struct file * file;
164 	struct statfs tmp;
165 	int error;
166 
167 	error = -EBADF;
168 	file = fget(fd);
169 	if (!file)
170 		goto out;
171 	error = vfs_statfs_native(file->f_dentry->d_inode->i_sb, &tmp);
172 	if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
173 		error = -EFAULT;
174 	fput(file);
175 out:
176 	return error;
177 }
178 
179 asmlinkage long sys_fstatfs64(unsigned int fd, size_t sz, struct statfs64 __user *buf)
180 {
181 	struct file * file;
182 	struct statfs64 tmp;
183 	int error;
184 
185 	if (sz != sizeof(*buf))
186 		return -EINVAL;
187 
188 	error = -EBADF;
189 	file = fget(fd);
190 	if (!file)
191 		goto out;
192 	error = vfs_statfs64(file->f_dentry->d_inode->i_sb, &tmp);
193 	if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
194 		error = -EFAULT;
195 	fput(file);
196 out:
197 	return error;
198 }
199 
200 int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
201 	struct file *filp)
202 {
203 	int err;
204 	struct iattr newattrs;
205 
206 	/* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */
207 	if (length < 0)
208 		return -EINVAL;
209 
210 	newattrs.ia_size = length;
211 	newattrs.ia_valid = ATTR_SIZE | time_attrs;
212 	if (filp) {
213 		newattrs.ia_file = filp;
214 		newattrs.ia_valid |= ATTR_FILE;
215 	}
216 
217 	mutex_lock(&dentry->d_inode->i_mutex);
218 	err = notify_change(dentry, &newattrs);
219 	mutex_unlock(&dentry->d_inode->i_mutex);
220 	return err;
221 }
222 
223 static long do_sys_truncate(const char __user * path, loff_t length)
224 {
225 	struct nameidata nd;
226 	struct inode * inode;
227 	int error;
228 
229 	error = -EINVAL;
230 	if (length < 0)	/* sorry, but loff_t says... */
231 		goto out;
232 
233 	error = user_path_walk(path, &nd);
234 	if (error)
235 		goto out;
236 	inode = nd.dentry->d_inode;
237 
238 	/* For directories it's -EISDIR, for other non-regulars - -EINVAL */
239 	error = -EISDIR;
240 	if (S_ISDIR(inode->i_mode))
241 		goto dput_and_out;
242 
243 	error = -EINVAL;
244 	if (!S_ISREG(inode->i_mode))
245 		goto dput_and_out;
246 
247 	error = vfs_permission(&nd, MAY_WRITE);
248 	if (error)
249 		goto dput_and_out;
250 
251 	error = -EROFS;
252 	if (IS_RDONLY(inode))
253 		goto dput_and_out;
254 
255 	error = -EPERM;
256 	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
257 		goto dput_and_out;
258 
259 	/*
260 	 * Make sure that there are no leases.
261 	 */
262 	error = break_lease(inode, FMODE_WRITE);
263 	if (error)
264 		goto dput_and_out;
265 
266 	error = get_write_access(inode);
267 	if (error)
268 		goto dput_and_out;
269 
270 	error = locks_verify_truncate(inode, NULL, length);
271 	if (!error) {
272 		DQUOT_INIT(inode);
273 		error = do_truncate(nd.dentry, length, 0, NULL);
274 	}
275 	put_write_access(inode);
276 
277 dput_and_out:
278 	path_release(&nd);
279 out:
280 	return error;
281 }
282 
283 asmlinkage long sys_truncate(const char __user * path, unsigned long length)
284 {
285 	/* on 32-bit boxen it will cut the range 2^31--2^32-1 off */
286 	return do_sys_truncate(path, (long)length);
287 }
288 
289 static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
290 {
291 	struct inode * inode;
292 	struct dentry *dentry;
293 	struct file * file;
294 	int error;
295 
296 	error = -EINVAL;
297 	if (length < 0)
298 		goto out;
299 	error = -EBADF;
300 	file = fget(fd);
301 	if (!file)
302 		goto out;
303 
304 	/* explicitly opened as large or we are on 64-bit box */
305 	if (file->f_flags & O_LARGEFILE)
306 		small = 0;
307 
308 	dentry = file->f_dentry;
309 	inode = dentry->d_inode;
310 	error = -EINVAL;
311 	if (!S_ISREG(inode->i_mode) || !(file->f_mode & FMODE_WRITE))
312 		goto out_putf;
313 
314 	error = -EINVAL;
315 	/* Cannot ftruncate over 2^31 bytes without large file support */
316 	if (small && length > MAX_NON_LFS)
317 		goto out_putf;
318 
319 	error = -EPERM;
320 	if (IS_APPEND(inode))
321 		goto out_putf;
322 
323 	error = locks_verify_truncate(inode, file, length);
324 	if (!error)
325 		error = do_truncate(dentry, length, 0, file);
326 out_putf:
327 	fput(file);
328 out:
329 	return error;
330 }
331 
332 asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length)
333 {
334 	return do_sys_ftruncate(fd, length, 1);
335 }
336 
337 /* LFS versions of truncate are only needed on 32 bit machines */
338 #if BITS_PER_LONG == 32
339 asmlinkage long sys_truncate64(const char __user * path, loff_t length)
340 {
341 	return do_sys_truncate(path, length);
342 }
343 
344 asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length)
345 {
346 	return do_sys_ftruncate(fd, length, 0);
347 }
348 #endif
349 
350 #ifdef __ARCH_WANT_SYS_UTIME
351 
352 /*
353  * sys_utime() can be implemented in user-level using sys_utimes().
354  * Is this for backwards compatibility?  If so, why not move it
355  * into the appropriate arch directory (for those architectures that
356  * need it).
357  */
358 
359 /* If times==NULL, set access and modification to current time,
360  * must be owner or have write permission.
361  * Else, update from *times, must be owner or super user.
362  */
363 asmlinkage long sys_utime(char __user * filename, struct utimbuf __user * times)
364 {
365 	int error;
366 	struct nameidata nd;
367 	struct inode * inode;
368 	struct iattr newattrs;
369 
370 	error = user_path_walk(filename, &nd);
371 	if (error)
372 		goto out;
373 	inode = nd.dentry->d_inode;
374 
375 	error = -EROFS;
376 	if (IS_RDONLY(inode))
377 		goto dput_and_out;
378 
379 	/* Don't worry, the checks are done in inode_change_ok() */
380 	newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME;
381 	if (times) {
382 		error = -EPERM;
383 		if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
384 			goto dput_and_out;
385 
386 		error = get_user(newattrs.ia_atime.tv_sec, &times->actime);
387 		newattrs.ia_atime.tv_nsec = 0;
388 		if (!error)
389 			error = get_user(newattrs.ia_mtime.tv_sec, &times->modtime);
390 		newattrs.ia_mtime.tv_nsec = 0;
391 		if (error)
392 			goto dput_and_out;
393 
394 		newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
395 	} else {
396                 error = -EACCES;
397                 if (IS_IMMUTABLE(inode))
398                         goto dput_and_out;
399 
400 		if (current->fsuid != inode->i_uid &&
401 		    (error = vfs_permission(&nd, MAY_WRITE)) != 0)
402 			goto dput_and_out;
403 	}
404 	mutex_lock(&inode->i_mutex);
405 	error = notify_change(nd.dentry, &newattrs);
406 	mutex_unlock(&inode->i_mutex);
407 dput_and_out:
408 	path_release(&nd);
409 out:
410 	return error;
411 }
412 
413 #endif
414 
415 /* If times==NULL, set access and modification to current time,
416  * must be owner or have write permission.
417  * Else, update from *times, must be owner or super user.
418  */
419 long do_utimes(int dfd, char __user *filename, struct timeval *times)
420 {
421 	int error;
422 	struct nameidata nd;
423 	struct inode * inode;
424 	struct iattr newattrs;
425 
426 	error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd);
427 
428 	if (error)
429 		goto out;
430 	inode = nd.dentry->d_inode;
431 
432 	error = -EROFS;
433 	if (IS_RDONLY(inode))
434 		goto dput_and_out;
435 
436 	/* Don't worry, the checks are done in inode_change_ok() */
437 	newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME;
438 	if (times) {
439 		error = -EPERM;
440                 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
441                         goto dput_and_out;
442 
443 		newattrs.ia_atime.tv_sec = times[0].tv_sec;
444 		newattrs.ia_atime.tv_nsec = times[0].tv_usec * 1000;
445 		newattrs.ia_mtime.tv_sec = times[1].tv_sec;
446 		newattrs.ia_mtime.tv_nsec = times[1].tv_usec * 1000;
447 		newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
448 	} else {
449 		error = -EACCES;
450                 if (IS_IMMUTABLE(inode))
451                         goto dput_and_out;
452 
453 		if (current->fsuid != inode->i_uid &&
454 		    (error = vfs_permission(&nd, MAY_WRITE)) != 0)
455 			goto dput_and_out;
456 	}
457 	mutex_lock(&inode->i_mutex);
458 	error = notify_change(nd.dentry, &newattrs);
459 	mutex_unlock(&inode->i_mutex);
460 dput_and_out:
461 	path_release(&nd);
462 out:
463 	return error;
464 }
465 
466 asmlinkage long sys_futimesat(int dfd, char __user *filename, struct timeval __user *utimes)
467 {
468 	struct timeval times[2];
469 
470 	if (utimes && copy_from_user(&times, utimes, sizeof(times)))
471 		return -EFAULT;
472 	return do_utimes(dfd, filename, utimes ? times : NULL);
473 }
474 
475 asmlinkage long sys_utimes(char __user *filename, struct timeval __user *utimes)
476 {
477 	return sys_futimesat(AT_FDCWD, filename, utimes);
478 }
479 
480 
481 /*
482  * access() needs to use the real uid/gid, not the effective uid/gid.
483  * We do this by temporarily clearing all FS-related capabilities and
484  * switching the fsuid/fsgid around to the real ones.
485  */
486 asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
487 {
488 	struct nameidata nd;
489 	int old_fsuid, old_fsgid;
490 	kernel_cap_t old_cap;
491 	int res;
492 
493 	if (mode & ~S_IRWXO)	/* where's F_OK, X_OK, W_OK, R_OK? */
494 		return -EINVAL;
495 
496 	old_fsuid = current->fsuid;
497 	old_fsgid = current->fsgid;
498 	old_cap = current->cap_effective;
499 
500 	current->fsuid = current->uid;
501 	current->fsgid = current->gid;
502 
503 	/*
504 	 * Clear the capabilities if we switch to a non-root user
505 	 *
506 	 * FIXME: There is a race here against sys_capset.  The
507 	 * capabilities can change yet we will restore the old
508 	 * value below.  We should hold task_capabilities_lock,
509 	 * but we cannot because user_path_walk can sleep.
510 	 */
511 	if (current->uid)
512 		cap_clear(current->cap_effective);
513 	else
514 		current->cap_effective = current->cap_permitted;
515 
516 	res = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd);
517 	if (!res) {
518 		res = vfs_permission(&nd, mode);
519 		/* SuS v2 requires we report a read only fs too */
520 		if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
521 		   && !special_file(nd.dentry->d_inode->i_mode))
522 			res = -EROFS;
523 		path_release(&nd);
524 	}
525 
526 	current->fsuid = old_fsuid;
527 	current->fsgid = old_fsgid;
528 	current->cap_effective = old_cap;
529 
530 	return res;
531 }
532 
533 asmlinkage long sys_access(const char __user *filename, int mode)
534 {
535 	return sys_faccessat(AT_FDCWD, filename, mode);
536 }
537 
538 asmlinkage long sys_chdir(const char __user * filename)
539 {
540 	struct nameidata nd;
541 	int error;
542 
543 	error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd);
544 	if (error)
545 		goto out;
546 
547 	error = vfs_permission(&nd, MAY_EXEC);
548 	if (error)
549 		goto dput_and_out;
550 
551 	set_fs_pwd(current->fs, nd.mnt, nd.dentry);
552 
553 dput_and_out:
554 	path_release(&nd);
555 out:
556 	return error;
557 }
558 
559 asmlinkage long sys_fchdir(unsigned int fd)
560 {
561 	struct file *file;
562 	struct dentry *dentry;
563 	struct inode *inode;
564 	struct vfsmount *mnt;
565 	int error;
566 
567 	error = -EBADF;
568 	file = fget(fd);
569 	if (!file)
570 		goto out;
571 
572 	dentry = file->f_dentry;
573 	mnt = file->f_vfsmnt;
574 	inode = dentry->d_inode;
575 
576 	error = -ENOTDIR;
577 	if (!S_ISDIR(inode->i_mode))
578 		goto out_putf;
579 
580 	error = file_permission(file, MAY_EXEC);
581 	if (!error)
582 		set_fs_pwd(current->fs, mnt, dentry);
583 out_putf:
584 	fput(file);
585 out:
586 	return error;
587 }
588 
589 asmlinkage long sys_chroot(const char __user * filename)
590 {
591 	struct nameidata nd;
592 	int error;
593 
594 	error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd);
595 	if (error)
596 		goto out;
597 
598 	error = vfs_permission(&nd, MAY_EXEC);
599 	if (error)
600 		goto dput_and_out;
601 
602 	error = -EPERM;
603 	if (!capable(CAP_SYS_CHROOT))
604 		goto dput_and_out;
605 
606 	set_fs_root(current->fs, nd.mnt, nd.dentry);
607 	set_fs_altroot();
608 	error = 0;
609 dput_and_out:
610 	path_release(&nd);
611 out:
612 	return error;
613 }
614 
615 asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
616 {
617 	struct inode * inode;
618 	struct dentry * dentry;
619 	struct file * file;
620 	int err = -EBADF;
621 	struct iattr newattrs;
622 
623 	file = fget(fd);
624 	if (!file)
625 		goto out;
626 
627 	dentry = file->f_dentry;
628 	inode = dentry->d_inode;
629 
630 	audit_inode(NULL, inode, 0);
631 
632 	err = -EROFS;
633 	if (IS_RDONLY(inode))
634 		goto out_putf;
635 	err = -EPERM;
636 	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
637 		goto out_putf;
638 	mutex_lock(&inode->i_mutex);
639 	if (mode == (mode_t) -1)
640 		mode = inode->i_mode;
641 	newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
642 	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
643 	err = notify_change(dentry, &newattrs);
644 	mutex_unlock(&inode->i_mutex);
645 
646 out_putf:
647 	fput(file);
648 out:
649 	return err;
650 }
651 
652 asmlinkage long sys_fchmodat(int dfd, const char __user *filename,
653 			     mode_t mode)
654 {
655 	struct nameidata nd;
656 	struct inode * inode;
657 	int error;
658 	struct iattr newattrs;
659 
660 	error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd);
661 	if (error)
662 		goto out;
663 	inode = nd.dentry->d_inode;
664 
665 	error = -EROFS;
666 	if (IS_RDONLY(inode))
667 		goto dput_and_out;
668 
669 	error = -EPERM;
670 	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
671 		goto dput_and_out;
672 
673 	mutex_lock(&inode->i_mutex);
674 	if (mode == (mode_t) -1)
675 		mode = inode->i_mode;
676 	newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
677 	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
678 	error = notify_change(nd.dentry, &newattrs);
679 	mutex_unlock(&inode->i_mutex);
680 
681 dput_and_out:
682 	path_release(&nd);
683 out:
684 	return error;
685 }
686 
687 asmlinkage long sys_chmod(const char __user *filename, mode_t mode)
688 {
689 	return sys_fchmodat(AT_FDCWD, filename, mode);
690 }
691 
692 static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
693 {
694 	struct inode * inode;
695 	int error;
696 	struct iattr newattrs;
697 
698 	error = -ENOENT;
699 	if (!(inode = dentry->d_inode)) {
700 		printk(KERN_ERR "chown_common: NULL inode\n");
701 		goto out;
702 	}
703 	error = -EROFS;
704 	if (IS_RDONLY(inode))
705 		goto out;
706 	error = -EPERM;
707 	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
708 		goto out;
709 	newattrs.ia_valid =  ATTR_CTIME;
710 	if (user != (uid_t) -1) {
711 		newattrs.ia_valid |= ATTR_UID;
712 		newattrs.ia_uid = user;
713 	}
714 	if (group != (gid_t) -1) {
715 		newattrs.ia_valid |= ATTR_GID;
716 		newattrs.ia_gid = group;
717 	}
718 	if (!S_ISDIR(inode->i_mode))
719 		newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID;
720 	mutex_lock(&inode->i_mutex);
721 	error = notify_change(dentry, &newattrs);
722 	mutex_unlock(&inode->i_mutex);
723 out:
724 	return error;
725 }
726 
727 asmlinkage long sys_chown(const char __user * filename, uid_t user, gid_t group)
728 {
729 	struct nameidata nd;
730 	int error;
731 
732 	error = user_path_walk(filename, &nd);
733 	if (!error) {
734 		error = chown_common(nd.dentry, user, group);
735 		path_release(&nd);
736 	}
737 	return error;
738 }
739 
740 asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
741 			     gid_t group, int flag)
742 {
743 	struct nameidata nd;
744 	int error = -EINVAL;
745 	int follow;
746 
747 	if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
748 		goto out;
749 
750 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
751 	error = __user_walk_fd(dfd, filename, follow, &nd);
752 	if (!error) {
753 		error = chown_common(nd.dentry, user, group);
754 		path_release(&nd);
755 	}
756 out:
757 	return error;
758 }
759 
760 asmlinkage long sys_lchown(const char __user * filename, uid_t user, gid_t group)
761 {
762 	struct nameidata nd;
763 	int error;
764 
765 	error = user_path_walk_link(filename, &nd);
766 	if (!error) {
767 		error = chown_common(nd.dentry, user, group);
768 		path_release(&nd);
769 	}
770 	return error;
771 }
772 
773 
774 asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group)
775 {
776 	struct file * file;
777 	int error = -EBADF;
778 
779 	file = fget(fd);
780 	if (file) {
781 		struct dentry * dentry;
782 		dentry = file->f_dentry;
783 		audit_inode(NULL, dentry->d_inode, 0);
784 		error = chown_common(dentry, user, group);
785 		fput(file);
786 	}
787 	return error;
788 }
789 
790 static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
791 					int flags, struct file *f,
792 					int (*open)(struct inode *, struct file *))
793 {
794 	struct inode *inode;
795 	int error;
796 
797 	f->f_flags = flags;
798 	f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK |
799 				FMODE_PREAD | FMODE_PWRITE;
800 	inode = dentry->d_inode;
801 	if (f->f_mode & FMODE_WRITE) {
802 		error = get_write_access(inode);
803 		if (error)
804 			goto cleanup_file;
805 	}
806 
807 	f->f_mapping = inode->i_mapping;
808 	f->f_dentry = dentry;
809 	f->f_vfsmnt = mnt;
810 	f->f_pos = 0;
811 	f->f_op = fops_get(inode->i_fop);
812 	file_move(f, &inode->i_sb->s_files);
813 
814 	if (!open && f->f_op)
815 		open = f->f_op->open;
816 	if (open) {
817 		error = open(inode, f);
818 		if (error)
819 			goto cleanup_all;
820 	}
821 
822 	f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
823 
824 	file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
825 
826 	/* NB: we're sure to have correct a_ops only after f_op->open */
827 	if (f->f_flags & O_DIRECT) {
828 		if (!f->f_mapping->a_ops ||
829 		    ((!f->f_mapping->a_ops->direct_IO) &&
830 		    (!f->f_mapping->a_ops->get_xip_page))) {
831 			fput(f);
832 			f = ERR_PTR(-EINVAL);
833 		}
834 	}
835 
836 	return f;
837 
838 cleanup_all:
839 	fops_put(f->f_op);
840 	if (f->f_mode & FMODE_WRITE)
841 		put_write_access(inode);
842 	file_kill(f);
843 	f->f_dentry = NULL;
844 	f->f_vfsmnt = NULL;
845 cleanup_file:
846 	put_filp(f);
847 	dput(dentry);
848 	mntput(mnt);
849 	return ERR_PTR(error);
850 }
851 
852 /*
853  * Note that while the flag value (low two bits) for sys_open means:
854  *	00 - read-only
855  *	01 - write-only
856  *	10 - read-write
857  *	11 - special
858  * it is changed into
859  *	00 - no permissions needed
860  *	01 - read-permission
861  *	10 - write-permission
862  *	11 - read-write
863  * for the internal routines (ie open_namei()/follow_link() etc). 00 is
864  * used by symlinks.
865  */
866 static struct file *do_filp_open(int dfd, const char *filename, int flags,
867 				 int mode)
868 {
869 	int namei_flags, error;
870 	struct nameidata nd;
871 
872 	namei_flags = flags;
873 	if ((namei_flags+1) & O_ACCMODE)
874 		namei_flags++;
875 
876 	error = open_namei(dfd, filename, namei_flags, mode, &nd);
877 	if (!error)
878 		return nameidata_to_filp(&nd, flags);
879 
880 	return ERR_PTR(error);
881 }
882 
883 struct file *filp_open(const char *filename, int flags, int mode)
884 {
885 	return do_filp_open(AT_FDCWD, filename, flags, mode);
886 }
887 EXPORT_SYMBOL(filp_open);
888 
889 /**
890  * lookup_instantiate_filp - instantiates the open intent filp
891  * @nd: pointer to nameidata
892  * @dentry: pointer to dentry
893  * @open: open callback
894  *
895  * Helper for filesystems that want to use lookup open intents and pass back
896  * a fully instantiated struct file to the caller.
897  * This function is meant to be called from within a filesystem's
898  * lookup method.
899  * Beware of calling it for non-regular files! Those ->open methods might block
900  * (e.g. in fifo_open), leaving you with parent locked (and in case of fifo,
901  * leading to a deadlock, as nobody can open that fifo anymore, because
902  * another process to open fifo will block on locked parent when doing lookup).
903  * Note that in case of error, nd->intent.open.file is destroyed, but the
904  * path information remains valid.
905  * If the open callback is set to NULL, then the standard f_op->open()
906  * filesystem callback is substituted.
907  */
908 struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
909 		int (*open)(struct inode *, struct file *))
910 {
911 	if (IS_ERR(nd->intent.open.file))
912 		goto out;
913 	if (IS_ERR(dentry))
914 		goto out_err;
915 	nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->mnt),
916 					     nd->intent.open.flags - 1,
917 					     nd->intent.open.file,
918 					     open);
919 out:
920 	return nd->intent.open.file;
921 out_err:
922 	release_open_intent(nd);
923 	nd->intent.open.file = (struct file *)dentry;
924 	goto out;
925 }
926 EXPORT_SYMBOL_GPL(lookup_instantiate_filp);
927 
928 /**
929  * nameidata_to_filp - convert a nameidata to an open filp.
930  * @nd: pointer to nameidata
931  * @flags: open flags
932  *
933  * Note that this function destroys the original nameidata
934  */
935 struct file *nameidata_to_filp(struct nameidata *nd, int flags)
936 {
937 	struct file *filp;
938 
939 	/* Pick up the filp from the open intent */
940 	filp = nd->intent.open.file;
941 	/* Has the filesystem initialised the file for us? */
942 	if (filp->f_dentry == NULL)
943 		filp = __dentry_open(nd->dentry, nd->mnt, flags, filp, NULL);
944 	else
945 		path_release(nd);
946 	return filp;
947 }
948 
949 /*
950  * dentry_open() will have done dput(dentry) and mntput(mnt) if it returns an
951  * error.
952  */
953 struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
954 {
955 	int error;
956 	struct file *f;
957 
958 	error = -ENFILE;
959 	f = get_empty_filp();
960 	if (f == NULL) {
961 		dput(dentry);
962 		mntput(mnt);
963 		return ERR_PTR(error);
964 	}
965 
966 	return __dentry_open(dentry, mnt, flags, f, NULL);
967 }
968 EXPORT_SYMBOL(dentry_open);
969 
970 /*
971  * Find an empty file descriptor entry, and mark it busy.
972  */
973 int get_unused_fd(void)
974 {
975 	struct files_struct * files = current->files;
976 	int fd, error;
977 	struct fdtable *fdt;
978 
979   	error = -EMFILE;
980 	spin_lock(&files->file_lock);
981 
982 repeat:
983 	fdt = files_fdtable(files);
984  	fd = find_next_zero_bit(fdt->open_fds->fds_bits,
985 				fdt->max_fdset,
986 				files->next_fd);
987 
988 	/*
989 	 * N.B. For clone tasks sharing a files structure, this test
990 	 * will limit the total number of files that can be opened.
991 	 */
992 	if (fd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
993 		goto out;
994 
995 	/* Do we need to expand the fd array or fd set?  */
996 	error = expand_files(files, fd);
997 	if (error < 0)
998 		goto out;
999 
1000 	if (error) {
1001 		/*
1002 	 	 * If we needed to expand the fs array we
1003 		 * might have blocked - try again.
1004 		 */
1005 		error = -EMFILE;
1006 		goto repeat;
1007 	}
1008 
1009 	FD_SET(fd, fdt->open_fds);
1010 	FD_CLR(fd, fdt->close_on_exec);
1011 	files->next_fd = fd + 1;
1012 #if 1
1013 	/* Sanity check */
1014 	if (fdt->fd[fd] != NULL) {
1015 		printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd);
1016 		fdt->fd[fd] = NULL;
1017 	}
1018 #endif
1019 	error = fd;
1020 
1021 out:
1022 	spin_unlock(&files->file_lock);
1023 	return error;
1024 }
1025 
1026 EXPORT_SYMBOL(get_unused_fd);
1027 
1028 static void __put_unused_fd(struct files_struct *files, unsigned int fd)
1029 {
1030 	struct fdtable *fdt = files_fdtable(files);
1031 	__FD_CLR(fd, fdt->open_fds);
1032 	if (fd < files->next_fd)
1033 		files->next_fd = fd;
1034 }
1035 
1036 void fastcall put_unused_fd(unsigned int fd)
1037 {
1038 	struct files_struct *files = current->files;
1039 	spin_lock(&files->file_lock);
1040 	__put_unused_fd(files, fd);
1041 	spin_unlock(&files->file_lock);
1042 }
1043 
1044 EXPORT_SYMBOL(put_unused_fd);
1045 
1046 /*
1047  * Install a file pointer in the fd array.
1048  *
1049  * The VFS is full of places where we drop the files lock between
1050  * setting the open_fds bitmap and installing the file in the file
1051  * array.  At any such point, we are vulnerable to a dup2() race
1052  * installing a file in the array before us.  We need to detect this and
1053  * fput() the struct file we are about to overwrite in this case.
1054  *
1055  * It should never happen - if we allow dup2() do it, _really_ bad things
1056  * will follow.
1057  */
1058 
1059 void fastcall fd_install(unsigned int fd, struct file * file)
1060 {
1061 	struct files_struct *files = current->files;
1062 	struct fdtable *fdt;
1063 	spin_lock(&files->file_lock);
1064 	fdt = files_fdtable(files);
1065 	BUG_ON(fdt->fd[fd] != NULL);
1066 	rcu_assign_pointer(fdt->fd[fd], file);
1067 	spin_unlock(&files->file_lock);
1068 }
1069 
1070 EXPORT_SYMBOL(fd_install);
1071 
1072 long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
1073 {
1074 	char *tmp = getname(filename);
1075 	int fd = PTR_ERR(tmp);
1076 
1077 	if (!IS_ERR(tmp)) {
1078 		fd = get_unused_fd();
1079 		if (fd >= 0) {
1080 			struct file *f = do_filp_open(dfd, tmp, flags, mode);
1081 			if (IS_ERR(f)) {
1082 				put_unused_fd(fd);
1083 				fd = PTR_ERR(f);
1084 			} else {
1085 				fsnotify_open(f->f_dentry);
1086 				fd_install(fd, f);
1087 			}
1088 		}
1089 		putname(tmp);
1090 	}
1091 	return fd;
1092 }
1093 
1094 asmlinkage long sys_open(const char __user *filename, int flags, int mode)
1095 {
1096 	if (force_o_largefile())
1097 		flags |= O_LARGEFILE;
1098 
1099 	return do_sys_open(AT_FDCWD, filename, flags, mode);
1100 }
1101 EXPORT_SYMBOL_GPL(sys_open);
1102 
1103 asmlinkage long sys_openat(int dfd, const char __user *filename, int flags,
1104 			   int mode)
1105 {
1106 	if (force_o_largefile())
1107 		flags |= O_LARGEFILE;
1108 
1109 	return do_sys_open(dfd, filename, flags, mode);
1110 }
1111 EXPORT_SYMBOL_GPL(sys_openat);
1112 
1113 #ifndef __alpha__
1114 
1115 /*
1116  * For backward compatibility?  Maybe this should be moved
1117  * into arch/i386 instead?
1118  */
1119 asmlinkage long sys_creat(const char __user * pathname, int mode)
1120 {
1121 	return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode);
1122 }
1123 
1124 #endif
1125 
1126 /*
1127  * "id" is the POSIX thread ID. We use the
1128  * files pointer for this..
1129  */
1130 int filp_close(struct file *filp, fl_owner_t id)
1131 {
1132 	int retval = 0;
1133 
1134 	if (!file_count(filp)) {
1135 		printk(KERN_ERR "VFS: Close: file count is 0\n");
1136 		return 0;
1137 	}
1138 
1139 	if (filp->f_op && filp->f_op->flush)
1140 		retval = filp->f_op->flush(filp);
1141 
1142 	dnotify_flush(filp, id);
1143 	locks_remove_posix(filp, id);
1144 	fput(filp);
1145 	return retval;
1146 }
1147 
1148 EXPORT_SYMBOL(filp_close);
1149 
1150 /*
1151  * Careful here! We test whether the file pointer is NULL before
1152  * releasing the fd. This ensures that one clone task can't release
1153  * an fd while another clone is opening it.
1154  */
1155 asmlinkage long sys_close(unsigned int fd)
1156 {
1157 	struct file * filp;
1158 	struct files_struct *files = current->files;
1159 	struct fdtable *fdt;
1160 
1161 	spin_lock(&files->file_lock);
1162 	fdt = files_fdtable(files);
1163 	if (fd >= fdt->max_fds)
1164 		goto out_unlock;
1165 	filp = fdt->fd[fd];
1166 	if (!filp)
1167 		goto out_unlock;
1168 	rcu_assign_pointer(fdt->fd[fd], NULL);
1169 	FD_CLR(fd, fdt->close_on_exec);
1170 	__put_unused_fd(files, fd);
1171 	spin_unlock(&files->file_lock);
1172 	return filp_close(filp, files);
1173 
1174 out_unlock:
1175 	spin_unlock(&files->file_lock);
1176 	return -EBADF;
1177 }
1178 
1179 EXPORT_SYMBOL(sys_close);
1180 
1181 /*
1182  * This routine simulates a hangup on the tty, to arrange that users
1183  * are given clean terminals at login time.
1184  */
1185 asmlinkage long sys_vhangup(void)
1186 {
1187 	if (capable(CAP_SYS_TTY_CONFIG)) {
1188 		tty_vhangup(current->signal->tty);
1189 		return 0;
1190 	}
1191 	return -EPERM;
1192 }
1193 
1194 /*
1195  * Called when an inode is about to be open.
1196  * We use this to disallow opening large files on 32bit systems if
1197  * the caller didn't specify O_LARGEFILE.  On 64bit systems we force
1198  * on this flag in sys_open.
1199  */
1200 int generic_file_open(struct inode * inode, struct file * filp)
1201 {
1202 	if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
1203 		return -EFBIG;
1204 	return 0;
1205 }
1206 
1207 EXPORT_SYMBOL(generic_file_open);
1208 
1209 /*
1210  * This is used by subsystems that don't want seekable
1211  * file descriptors
1212  */
1213 int nonseekable_open(struct inode *inode, struct file *filp)
1214 {
1215 	filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
1216 	return 0;
1217 }
1218 
1219 EXPORT_SYMBOL(nonseekable_open);
1220