xref: /linux/fs/open.c (revision 2624f124b3b5d550ab2fbef7ee3bc0e1fed09722)
1 /*
2  *  linux/fs/open.c
3  *
4  *  Copyright (C) 1991, 1992  Linus Torvalds
5  */
6 
7 #include <linux/string.h>
8 #include <linux/mm.h>
9 #include <linux/utime.h>
10 #include <linux/file.h>
11 #include <linux/smp_lock.h>
12 #include <linux/quotaops.h>
13 #include <linux/fsnotify.h>
14 #include <linux/module.h>
15 #include <linux/slab.h>
16 #include <linux/tty.h>
17 #include <linux/namei.h>
18 #include <linux/backing-dev.h>
19 #include <linux/security.h>
20 #include <linux/mount.h>
21 #include <linux/vfs.h>
22 #include <asm/uaccess.h>
23 #include <linux/fs.h>
24 #include <linux/personality.h>
25 #include <linux/pagemap.h>
26 #include <linux/syscalls.h>
27 
28 #include <asm/unistd.h>
29 
30 int vfs_statfs(struct super_block *sb, struct kstatfs *buf)
31 {
32 	int retval = -ENODEV;
33 
34 	if (sb) {
35 		retval = -ENOSYS;
36 		if (sb->s_op->statfs) {
37 			memset(buf, 0, sizeof(*buf));
38 			retval = security_sb_statfs(sb);
39 			if (retval)
40 				return retval;
41 			retval = sb->s_op->statfs(sb, buf);
42 			if (retval == 0 && buf->f_frsize == 0)
43 				buf->f_frsize = buf->f_bsize;
44 		}
45 	}
46 	return retval;
47 }
48 
49 EXPORT_SYMBOL(vfs_statfs);
50 
51 static int vfs_statfs_native(struct super_block *sb, struct statfs *buf)
52 {
53 	struct kstatfs st;
54 	int retval;
55 
56 	retval = vfs_statfs(sb, &st);
57 	if (retval)
58 		return retval;
59 
60 	if (sizeof(*buf) == sizeof(st))
61 		memcpy(buf, &st, sizeof(st));
62 	else {
63 		if (sizeof buf->f_blocks == 4) {
64 			if ((st.f_blocks | st.f_bfree | st.f_bavail) &
65 			    0xffffffff00000000ULL)
66 				return -EOVERFLOW;
67 			/*
68 			 * f_files and f_ffree may be -1; it's okay to stuff
69 			 * that into 32 bits
70 			 */
71 			if (st.f_files != -1 &&
72 			    (st.f_files & 0xffffffff00000000ULL))
73 				return -EOVERFLOW;
74 			if (st.f_ffree != -1 &&
75 			    (st.f_ffree & 0xffffffff00000000ULL))
76 				return -EOVERFLOW;
77 		}
78 
79 		buf->f_type = st.f_type;
80 		buf->f_bsize = st.f_bsize;
81 		buf->f_blocks = st.f_blocks;
82 		buf->f_bfree = st.f_bfree;
83 		buf->f_bavail = st.f_bavail;
84 		buf->f_files = st.f_files;
85 		buf->f_ffree = st.f_ffree;
86 		buf->f_fsid = st.f_fsid;
87 		buf->f_namelen = st.f_namelen;
88 		buf->f_frsize = st.f_frsize;
89 		memset(buf->f_spare, 0, sizeof(buf->f_spare));
90 	}
91 	return 0;
92 }
93 
94 static int vfs_statfs64(struct super_block *sb, struct statfs64 *buf)
95 {
96 	struct kstatfs st;
97 	int retval;
98 
99 	retval = vfs_statfs(sb, &st);
100 	if (retval)
101 		return retval;
102 
103 	if (sizeof(*buf) == sizeof(st))
104 		memcpy(buf, &st, sizeof(st));
105 	else {
106 		buf->f_type = st.f_type;
107 		buf->f_bsize = st.f_bsize;
108 		buf->f_blocks = st.f_blocks;
109 		buf->f_bfree = st.f_bfree;
110 		buf->f_bavail = st.f_bavail;
111 		buf->f_files = st.f_files;
112 		buf->f_ffree = st.f_ffree;
113 		buf->f_fsid = st.f_fsid;
114 		buf->f_namelen = st.f_namelen;
115 		buf->f_frsize = st.f_frsize;
116 		memset(buf->f_spare, 0, sizeof(buf->f_spare));
117 	}
118 	return 0;
119 }
120 
121 asmlinkage long sys_statfs(const char __user * path, struct statfs __user * buf)
122 {
123 	struct nameidata nd;
124 	int error;
125 
126 	error = user_path_walk(path, &nd);
127 	if (!error) {
128 		struct statfs tmp;
129 		error = vfs_statfs_native(nd.dentry->d_inode->i_sb, &tmp);
130 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
131 			error = -EFAULT;
132 		path_release(&nd);
133 	}
134 	return error;
135 }
136 
137 
138 asmlinkage long sys_statfs64(const char __user *path, size_t sz, struct statfs64 __user *buf)
139 {
140 	struct nameidata nd;
141 	long error;
142 
143 	if (sz != sizeof(*buf))
144 		return -EINVAL;
145 	error = user_path_walk(path, &nd);
146 	if (!error) {
147 		struct statfs64 tmp;
148 		error = vfs_statfs64(nd.dentry->d_inode->i_sb, &tmp);
149 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
150 			error = -EFAULT;
151 		path_release(&nd);
152 	}
153 	return error;
154 }
155 
156 
157 asmlinkage long sys_fstatfs(unsigned int fd, struct statfs __user * buf)
158 {
159 	struct file * file;
160 	struct statfs tmp;
161 	int error;
162 
163 	error = -EBADF;
164 	file = fget(fd);
165 	if (!file)
166 		goto out;
167 	error = vfs_statfs_native(file->f_dentry->d_inode->i_sb, &tmp);
168 	if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
169 		error = -EFAULT;
170 	fput(file);
171 out:
172 	return error;
173 }
174 
175 asmlinkage long sys_fstatfs64(unsigned int fd, size_t sz, struct statfs64 __user *buf)
176 {
177 	struct file * file;
178 	struct statfs64 tmp;
179 	int error;
180 
181 	if (sz != sizeof(*buf))
182 		return -EINVAL;
183 
184 	error = -EBADF;
185 	file = fget(fd);
186 	if (!file)
187 		goto out;
188 	error = vfs_statfs64(file->f_dentry->d_inode->i_sb, &tmp);
189 	if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
190 		error = -EFAULT;
191 	fput(file);
192 out:
193 	return error;
194 }
195 
196 int do_truncate(struct dentry *dentry, loff_t length)
197 {
198 	int err;
199 	struct iattr newattrs;
200 
201 	/* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */
202 	if (length < 0)
203 		return -EINVAL;
204 
205 	newattrs.ia_size = length;
206 	newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
207 
208 	down(&dentry->d_inode->i_sem);
209 	err = notify_change(dentry, &newattrs);
210 	up(&dentry->d_inode->i_sem);
211 	return err;
212 }
213 
214 static inline long do_sys_truncate(const char __user * path, loff_t length)
215 {
216 	struct nameidata nd;
217 	struct inode * inode;
218 	int error;
219 
220 	error = -EINVAL;
221 	if (length < 0)	/* sorry, but loff_t says... */
222 		goto out;
223 
224 	error = user_path_walk(path, &nd);
225 	if (error)
226 		goto out;
227 	inode = nd.dentry->d_inode;
228 
229 	/* For directories it's -EISDIR, for other non-regulars - -EINVAL */
230 	error = -EISDIR;
231 	if (S_ISDIR(inode->i_mode))
232 		goto dput_and_out;
233 
234 	error = -EINVAL;
235 	if (!S_ISREG(inode->i_mode))
236 		goto dput_and_out;
237 
238 	error = permission(inode,MAY_WRITE,&nd);
239 	if (error)
240 		goto dput_and_out;
241 
242 	error = -EROFS;
243 	if (IS_RDONLY(inode))
244 		goto dput_and_out;
245 
246 	error = -EPERM;
247 	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
248 		goto dput_and_out;
249 
250 	/*
251 	 * Make sure that there are no leases.
252 	 */
253 	error = break_lease(inode, FMODE_WRITE);
254 	if (error)
255 		goto dput_and_out;
256 
257 	error = get_write_access(inode);
258 	if (error)
259 		goto dput_and_out;
260 
261 	error = locks_verify_truncate(inode, NULL, length);
262 	if (!error) {
263 		DQUOT_INIT(inode);
264 		error = do_truncate(nd.dentry, length);
265 	}
266 	put_write_access(inode);
267 
268 dput_and_out:
269 	path_release(&nd);
270 out:
271 	return error;
272 }
273 
274 asmlinkage long sys_truncate(const char __user * path, unsigned long length)
275 {
276 	/* on 32-bit boxen it will cut the range 2^31--2^32-1 off */
277 	return do_sys_truncate(path, (long)length);
278 }
279 
280 static inline long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
281 {
282 	struct inode * inode;
283 	struct dentry *dentry;
284 	struct file * file;
285 	int error;
286 
287 	error = -EINVAL;
288 	if (length < 0)
289 		goto out;
290 	error = -EBADF;
291 	file = fget(fd);
292 	if (!file)
293 		goto out;
294 
295 	/* explicitly opened as large or we are on 64-bit box */
296 	if (file->f_flags & O_LARGEFILE)
297 		small = 0;
298 
299 	dentry = file->f_dentry;
300 	inode = dentry->d_inode;
301 	error = -EINVAL;
302 	if (!S_ISREG(inode->i_mode) || !(file->f_mode & FMODE_WRITE))
303 		goto out_putf;
304 
305 	error = -EINVAL;
306 	/* Cannot ftruncate over 2^31 bytes without large file support */
307 	if (small && length > MAX_NON_LFS)
308 		goto out_putf;
309 
310 	error = -EPERM;
311 	if (IS_APPEND(inode))
312 		goto out_putf;
313 
314 	error = locks_verify_truncate(inode, file, length);
315 	if (!error)
316 		error = do_truncate(dentry, length);
317 out_putf:
318 	fput(file);
319 out:
320 	return error;
321 }
322 
323 asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length)
324 {
325 	return do_sys_ftruncate(fd, length, 1);
326 }
327 
328 /* LFS versions of truncate are only needed on 32 bit machines */
329 #if BITS_PER_LONG == 32
330 asmlinkage long sys_truncate64(const char __user * path, loff_t length)
331 {
332 	return do_sys_truncate(path, length);
333 }
334 
335 asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length)
336 {
337 	return do_sys_ftruncate(fd, length, 0);
338 }
339 #endif
340 
341 #ifdef __ARCH_WANT_SYS_UTIME
342 
343 /*
344  * sys_utime() can be implemented in user-level using sys_utimes().
345  * Is this for backwards compatibility?  If so, why not move it
346  * into the appropriate arch directory (for those architectures that
347  * need it).
348  */
349 
350 /* If times==NULL, set access and modification to current time,
351  * must be owner or have write permission.
352  * Else, update from *times, must be owner or super user.
353  */
354 asmlinkage long sys_utime(char __user * filename, struct utimbuf __user * times)
355 {
356 	int error;
357 	struct nameidata nd;
358 	struct inode * inode;
359 	struct iattr newattrs;
360 
361 	error = user_path_walk(filename, &nd);
362 	if (error)
363 		goto out;
364 	inode = nd.dentry->d_inode;
365 
366 	error = -EROFS;
367 	if (IS_RDONLY(inode))
368 		goto dput_and_out;
369 
370 	/* Don't worry, the checks are done in inode_change_ok() */
371 	newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME;
372 	if (times) {
373 		error = -EPERM;
374 		if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
375 			goto dput_and_out;
376 
377 		error = get_user(newattrs.ia_atime.tv_sec, &times->actime);
378 		newattrs.ia_atime.tv_nsec = 0;
379 		if (!error)
380 			error = get_user(newattrs.ia_mtime.tv_sec, &times->modtime);
381 		newattrs.ia_mtime.tv_nsec = 0;
382 		if (error)
383 			goto dput_and_out;
384 
385 		newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
386 	} else {
387                 error = -EACCES;
388                 if (IS_IMMUTABLE(inode))
389                         goto dput_and_out;
390 
391 		if (current->fsuid != inode->i_uid &&
392 		    (error = permission(inode,MAY_WRITE,&nd)) != 0)
393 			goto dput_and_out;
394 	}
395 	down(&inode->i_sem);
396 	error = notify_change(nd.dentry, &newattrs);
397 	up(&inode->i_sem);
398 dput_and_out:
399 	path_release(&nd);
400 out:
401 	return error;
402 }
403 
404 #endif
405 
406 /* If times==NULL, set access and modification to current time,
407  * must be owner or have write permission.
408  * Else, update from *times, must be owner or super user.
409  */
410 long do_utimes(char __user * filename, struct timeval * times)
411 {
412 	int error;
413 	struct nameidata nd;
414 	struct inode * inode;
415 	struct iattr newattrs;
416 
417 	error = user_path_walk(filename, &nd);
418 
419 	if (error)
420 		goto out;
421 	inode = nd.dentry->d_inode;
422 
423 	error = -EROFS;
424 	if (IS_RDONLY(inode))
425 		goto dput_and_out;
426 
427 	/* Don't worry, the checks are done in inode_change_ok() */
428 	newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME;
429 	if (times) {
430 		error = -EPERM;
431                 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
432                         goto dput_and_out;
433 
434 		newattrs.ia_atime.tv_sec = times[0].tv_sec;
435 		newattrs.ia_atime.tv_nsec = times[0].tv_usec * 1000;
436 		newattrs.ia_mtime.tv_sec = times[1].tv_sec;
437 		newattrs.ia_mtime.tv_nsec = times[1].tv_usec * 1000;
438 		newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
439 	} else {
440 		error = -EACCES;
441                 if (IS_IMMUTABLE(inode))
442                         goto dput_and_out;
443 
444 		if (current->fsuid != inode->i_uid &&
445 		    (error = permission(inode,MAY_WRITE,&nd)) != 0)
446 			goto dput_and_out;
447 	}
448 	down(&inode->i_sem);
449 	error = notify_change(nd.dentry, &newattrs);
450 	up(&inode->i_sem);
451 dput_and_out:
452 	path_release(&nd);
453 out:
454 	return error;
455 }
456 
457 asmlinkage long sys_utimes(char __user * filename, struct timeval __user * utimes)
458 {
459 	struct timeval times[2];
460 
461 	if (utimes && copy_from_user(&times, utimes, sizeof(times)))
462 		return -EFAULT;
463 	return do_utimes(filename, utimes ? times : NULL);
464 }
465 
466 
467 /*
468  * access() needs to use the real uid/gid, not the effective uid/gid.
469  * We do this by temporarily clearing all FS-related capabilities and
470  * switching the fsuid/fsgid around to the real ones.
471  */
472 asmlinkage long sys_access(const char __user * filename, int mode)
473 {
474 	struct nameidata nd;
475 	int old_fsuid, old_fsgid;
476 	kernel_cap_t old_cap;
477 	int res;
478 
479 	if (mode & ~S_IRWXO)	/* where's F_OK, X_OK, W_OK, R_OK? */
480 		return -EINVAL;
481 
482 	old_fsuid = current->fsuid;
483 	old_fsgid = current->fsgid;
484 	old_cap = current->cap_effective;
485 
486 	current->fsuid = current->uid;
487 	current->fsgid = current->gid;
488 
489 	/*
490 	 * Clear the capabilities if we switch to a non-root user
491 	 *
492 	 * FIXME: There is a race here against sys_capset.  The
493 	 * capabilities can change yet we will restore the old
494 	 * value below.  We should hold task_capabilities_lock,
495 	 * but we cannot because user_path_walk can sleep.
496 	 */
497 	if (current->uid)
498 		cap_clear(current->cap_effective);
499 	else
500 		current->cap_effective = current->cap_permitted;
501 
502 	res = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd);
503 	if (!res) {
504 		res = permission(nd.dentry->d_inode, mode, &nd);
505 		/* SuS v2 requires we report a read only fs too */
506 		if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
507 		   && !special_file(nd.dentry->d_inode->i_mode))
508 			res = -EROFS;
509 		path_release(&nd);
510 	}
511 
512 	current->fsuid = old_fsuid;
513 	current->fsgid = old_fsgid;
514 	current->cap_effective = old_cap;
515 
516 	return res;
517 }
518 
519 asmlinkage long sys_chdir(const char __user * filename)
520 {
521 	struct nameidata nd;
522 	int error;
523 
524 	error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd);
525 	if (error)
526 		goto out;
527 
528 	error = permission(nd.dentry->d_inode,MAY_EXEC,&nd);
529 	if (error)
530 		goto dput_and_out;
531 
532 	set_fs_pwd(current->fs, nd.mnt, nd.dentry);
533 
534 dput_and_out:
535 	path_release(&nd);
536 out:
537 	return error;
538 }
539 
540 asmlinkage long sys_fchdir(unsigned int fd)
541 {
542 	struct file *file;
543 	struct dentry *dentry;
544 	struct inode *inode;
545 	struct vfsmount *mnt;
546 	int error;
547 
548 	error = -EBADF;
549 	file = fget(fd);
550 	if (!file)
551 		goto out;
552 
553 	dentry = file->f_dentry;
554 	mnt = file->f_vfsmnt;
555 	inode = dentry->d_inode;
556 
557 	error = -ENOTDIR;
558 	if (!S_ISDIR(inode->i_mode))
559 		goto out_putf;
560 
561 	error = permission(inode, MAY_EXEC, NULL);
562 	if (!error)
563 		set_fs_pwd(current->fs, mnt, dentry);
564 out_putf:
565 	fput(file);
566 out:
567 	return error;
568 }
569 
570 asmlinkage long sys_chroot(const char __user * filename)
571 {
572 	struct nameidata nd;
573 	int error;
574 
575 	error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd);
576 	if (error)
577 		goto out;
578 
579 	error = permission(nd.dentry->d_inode,MAY_EXEC,&nd);
580 	if (error)
581 		goto dput_and_out;
582 
583 	error = -EPERM;
584 	if (!capable(CAP_SYS_CHROOT))
585 		goto dput_and_out;
586 
587 	set_fs_root(current->fs, nd.mnt, nd.dentry);
588 	set_fs_altroot();
589 	error = 0;
590 dput_and_out:
591 	path_release(&nd);
592 out:
593 	return error;
594 }
595 
596 asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
597 {
598 	struct inode * inode;
599 	struct dentry * dentry;
600 	struct file * file;
601 	int err = -EBADF;
602 	struct iattr newattrs;
603 
604 	file = fget(fd);
605 	if (!file)
606 		goto out;
607 
608 	dentry = file->f_dentry;
609 	inode = dentry->d_inode;
610 
611 	err = -EROFS;
612 	if (IS_RDONLY(inode))
613 		goto out_putf;
614 	err = -EPERM;
615 	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
616 		goto out_putf;
617 	down(&inode->i_sem);
618 	if (mode == (mode_t) -1)
619 		mode = inode->i_mode;
620 	newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
621 	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
622 	err = notify_change(dentry, &newattrs);
623 	up(&inode->i_sem);
624 
625 out_putf:
626 	fput(file);
627 out:
628 	return err;
629 }
630 
631 asmlinkage long sys_chmod(const char __user * filename, mode_t mode)
632 {
633 	struct nameidata nd;
634 	struct inode * inode;
635 	int error;
636 	struct iattr newattrs;
637 
638 	error = user_path_walk(filename, &nd);
639 	if (error)
640 		goto out;
641 	inode = nd.dentry->d_inode;
642 
643 	error = -EROFS;
644 	if (IS_RDONLY(inode))
645 		goto dput_and_out;
646 
647 	error = -EPERM;
648 	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
649 		goto dput_and_out;
650 
651 	down(&inode->i_sem);
652 	if (mode == (mode_t) -1)
653 		mode = inode->i_mode;
654 	newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
655 	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
656 	error = notify_change(nd.dentry, &newattrs);
657 	up(&inode->i_sem);
658 
659 dput_and_out:
660 	path_release(&nd);
661 out:
662 	return error;
663 }
664 
665 static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
666 {
667 	struct inode * inode;
668 	int error;
669 	struct iattr newattrs;
670 
671 	error = -ENOENT;
672 	if (!(inode = dentry->d_inode)) {
673 		printk(KERN_ERR "chown_common: NULL inode\n");
674 		goto out;
675 	}
676 	error = -EROFS;
677 	if (IS_RDONLY(inode))
678 		goto out;
679 	error = -EPERM;
680 	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
681 		goto out;
682 	newattrs.ia_valid =  ATTR_CTIME;
683 	if (user != (uid_t) -1) {
684 		newattrs.ia_valid |= ATTR_UID;
685 		newattrs.ia_uid = user;
686 	}
687 	if (group != (gid_t) -1) {
688 		newattrs.ia_valid |= ATTR_GID;
689 		newattrs.ia_gid = group;
690 	}
691 	if (!S_ISDIR(inode->i_mode))
692 		newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID;
693 	down(&inode->i_sem);
694 	error = notify_change(dentry, &newattrs);
695 	up(&inode->i_sem);
696 out:
697 	return error;
698 }
699 
700 asmlinkage long sys_chown(const char __user * filename, uid_t user, gid_t group)
701 {
702 	struct nameidata nd;
703 	int error;
704 
705 	error = user_path_walk(filename, &nd);
706 	if (!error) {
707 		error = chown_common(nd.dentry, user, group);
708 		path_release(&nd);
709 	}
710 	return error;
711 }
712 
713 asmlinkage long sys_lchown(const char __user * filename, uid_t user, gid_t group)
714 {
715 	struct nameidata nd;
716 	int error;
717 
718 	error = user_path_walk_link(filename, &nd);
719 	if (!error) {
720 		error = chown_common(nd.dentry, user, group);
721 		path_release(&nd);
722 	}
723 	return error;
724 }
725 
726 
727 asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group)
728 {
729 	struct file * file;
730 	int error = -EBADF;
731 
732 	file = fget(fd);
733 	if (file) {
734 		error = chown_common(file->f_dentry, user, group);
735 		fput(file);
736 	}
737 	return error;
738 }
739 
740 /*
741  * Note that while the flag value (low two bits) for sys_open means:
742  *	00 - read-only
743  *	01 - write-only
744  *	10 - read-write
745  *	11 - special
746  * it is changed into
747  *	00 - no permissions needed
748  *	01 - read-permission
749  *	10 - write-permission
750  *	11 - read-write
751  * for the internal routines (ie open_namei()/follow_link() etc). 00 is
752  * used by symlinks.
753  */
754 struct file *filp_open(const char * filename, int flags, int mode)
755 {
756 	int namei_flags, error;
757 	struct nameidata nd;
758 
759 	namei_flags = flags;
760 	if ((namei_flags+1) & O_ACCMODE)
761 		namei_flags++;
762 	if (namei_flags & O_TRUNC)
763 		namei_flags |= 2;
764 
765 	error = open_namei(filename, namei_flags, mode, &nd);
766 	if (!error)
767 		return dentry_open(nd.dentry, nd.mnt, flags);
768 
769 	return ERR_PTR(error);
770 }
771 
772 EXPORT_SYMBOL(filp_open);
773 
774 struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
775 {
776 	struct file * f;
777 	struct inode *inode;
778 	int error;
779 
780 	error = -ENFILE;
781 	f = get_empty_filp();
782 	if (!f)
783 		goto cleanup_dentry;
784 	f->f_flags = flags;
785 	f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
786 	inode = dentry->d_inode;
787 	if (f->f_mode & FMODE_WRITE) {
788 		error = get_write_access(inode);
789 		if (error)
790 			goto cleanup_file;
791 	}
792 
793 	f->f_mapping = inode->i_mapping;
794 	f->f_dentry = dentry;
795 	f->f_vfsmnt = mnt;
796 	f->f_pos = 0;
797 	f->f_op = fops_get(inode->i_fop);
798 	file_move(f, &inode->i_sb->s_files);
799 
800 	if (f->f_op && f->f_op->open) {
801 		error = f->f_op->open(inode,f);
802 		if (error)
803 			goto cleanup_all;
804 	}
805 	f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
806 
807 	file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
808 
809 	/* NB: we're sure to have correct a_ops only after f_op->open */
810 	if (f->f_flags & O_DIRECT) {
811 		if (!f->f_mapping->a_ops ||
812 		    ((!f->f_mapping->a_ops->direct_IO) &&
813 		    (!f->f_mapping->a_ops->get_xip_page))) {
814 			fput(f);
815 			f = ERR_PTR(-EINVAL);
816 		}
817 	}
818 
819 	return f;
820 
821 cleanup_all:
822 	fops_put(f->f_op);
823 	if (f->f_mode & FMODE_WRITE)
824 		put_write_access(inode);
825 	file_kill(f);
826 	f->f_dentry = NULL;
827 	f->f_vfsmnt = NULL;
828 cleanup_file:
829 	put_filp(f);
830 cleanup_dentry:
831 	dput(dentry);
832 	mntput(mnt);
833 	return ERR_PTR(error);
834 }
835 
836 EXPORT_SYMBOL(dentry_open);
837 
838 /*
839  * Find an empty file descriptor entry, and mark it busy.
840  */
841 int get_unused_fd(void)
842 {
843 	struct files_struct * files = current->files;
844 	int fd, error;
845 
846   	error = -EMFILE;
847 	spin_lock(&files->file_lock);
848 
849 repeat:
850  	fd = find_next_zero_bit(files->open_fds->fds_bits,
851 				files->max_fdset,
852 				files->next_fd);
853 
854 	/*
855 	 * N.B. For clone tasks sharing a files structure, this test
856 	 * will limit the total number of files that can be opened.
857 	 */
858 	if (fd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
859 		goto out;
860 
861 	/* Do we need to expand the fd array or fd set?  */
862 	error = expand_files(files, fd);
863 	if (error < 0)
864 		goto out;
865 
866 	if (error) {
867 		/*
868 	 	 * If we needed to expand the fs array we
869 		 * might have blocked - try again.
870 		 */
871 		error = -EMFILE;
872 		goto repeat;
873 	}
874 
875 	FD_SET(fd, files->open_fds);
876 	FD_CLR(fd, files->close_on_exec);
877 	files->next_fd = fd + 1;
878 #if 1
879 	/* Sanity check */
880 	if (files->fd[fd] != NULL) {
881 		printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd);
882 		files->fd[fd] = NULL;
883 	}
884 #endif
885 	error = fd;
886 
887 out:
888 	spin_unlock(&files->file_lock);
889 	return error;
890 }
891 
892 EXPORT_SYMBOL(get_unused_fd);
893 
894 static inline void __put_unused_fd(struct files_struct *files, unsigned int fd)
895 {
896 	__FD_CLR(fd, files->open_fds);
897 	if (fd < files->next_fd)
898 		files->next_fd = fd;
899 }
900 
901 void fastcall put_unused_fd(unsigned int fd)
902 {
903 	struct files_struct *files = current->files;
904 	spin_lock(&files->file_lock);
905 	__put_unused_fd(files, fd);
906 	spin_unlock(&files->file_lock);
907 }
908 
909 EXPORT_SYMBOL(put_unused_fd);
910 
911 /*
912  * Install a file pointer in the fd array.
913  *
914  * The VFS is full of places where we drop the files lock between
915  * setting the open_fds bitmap and installing the file in the file
916  * array.  At any such point, we are vulnerable to a dup2() race
917  * installing a file in the array before us.  We need to detect this and
918  * fput() the struct file we are about to overwrite in this case.
919  *
920  * It should never happen - if we allow dup2() do it, _really_ bad things
921  * will follow.
922  */
923 
924 void fastcall fd_install(unsigned int fd, struct file * file)
925 {
926 	struct files_struct *files = current->files;
927 	spin_lock(&files->file_lock);
928 	if (unlikely(files->fd[fd] != NULL))
929 		BUG();
930 	files->fd[fd] = file;
931 	spin_unlock(&files->file_lock);
932 }
933 
934 EXPORT_SYMBOL(fd_install);
935 
936 long do_sys_open(const char __user *filename, int flags, int mode)
937 {
938 	char *tmp = getname(filename);
939 	int fd = PTR_ERR(tmp);
940 
941 	if (!IS_ERR(tmp)) {
942 		fd = get_unused_fd();
943 		if (fd >= 0) {
944 			struct file *f = filp_open(tmp, flags, mode);
945 			if (IS_ERR(f)) {
946 				put_unused_fd(fd);
947 				fd = PTR_ERR(f);
948 			} else {
949 				fsnotify_open(f->f_dentry);
950 				fd_install(fd, f);
951 			}
952 		}
953 		putname(tmp);
954 	}
955 	return fd;
956 }
957 
958 asmlinkage long sys_open(const char __user *filename, int flags, int mode)
959 {
960 	if (force_o_largefile())
961 		flags |= O_LARGEFILE;
962 
963 	return do_sys_open(filename, flags, mode);
964 }
965 EXPORT_SYMBOL_GPL(sys_open);
966 
967 #ifndef __alpha__
968 
969 /*
970  * For backward compatibility?  Maybe this should be moved
971  * into arch/i386 instead?
972  */
973 asmlinkage long sys_creat(const char __user * pathname, int mode)
974 {
975 	return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode);
976 }
977 
978 #endif
979 
980 /*
981  * "id" is the POSIX thread ID. We use the
982  * files pointer for this..
983  */
984 int filp_close(struct file *filp, fl_owner_t id)
985 {
986 	int retval = 0;
987 
988 	if (!file_count(filp)) {
989 		printk(KERN_ERR "VFS: Close: file count is 0\n");
990 		return 0;
991 	}
992 
993 	if (filp->f_op && filp->f_op->flush)
994 		retval = filp->f_op->flush(filp);
995 
996 	dnotify_flush(filp, id);
997 	locks_remove_posix(filp, id);
998 	fput(filp);
999 	return retval;
1000 }
1001 
1002 EXPORT_SYMBOL(filp_close);
1003 
1004 /*
1005  * Careful here! We test whether the file pointer is NULL before
1006  * releasing the fd. This ensures that one clone task can't release
1007  * an fd while another clone is opening it.
1008  */
1009 asmlinkage long sys_close(unsigned int fd)
1010 {
1011 	struct file * filp;
1012 	struct files_struct *files = current->files;
1013 
1014 	spin_lock(&files->file_lock);
1015 	if (fd >= files->max_fds)
1016 		goto out_unlock;
1017 	filp = files->fd[fd];
1018 	if (!filp)
1019 		goto out_unlock;
1020 	files->fd[fd] = NULL;
1021 	FD_CLR(fd, files->close_on_exec);
1022 	__put_unused_fd(files, fd);
1023 	spin_unlock(&files->file_lock);
1024 	return filp_close(filp, files);
1025 
1026 out_unlock:
1027 	spin_unlock(&files->file_lock);
1028 	return -EBADF;
1029 }
1030 
1031 EXPORT_SYMBOL(sys_close);
1032 
1033 /*
1034  * This routine simulates a hangup on the tty, to arrange that users
1035  * are given clean terminals at login time.
1036  */
1037 asmlinkage long sys_vhangup(void)
1038 {
1039 	if (capable(CAP_SYS_TTY_CONFIG)) {
1040 		tty_vhangup(current->signal->tty);
1041 		return 0;
1042 	}
1043 	return -EPERM;
1044 }
1045 
1046 /*
1047  * Called when an inode is about to be open.
1048  * We use this to disallow opening large files on 32bit systems if
1049  * the caller didn't specify O_LARGEFILE.  On 64bit systems we force
1050  * on this flag in sys_open.
1051  */
1052 int generic_file_open(struct inode * inode, struct file * filp)
1053 {
1054 	if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
1055 		return -EFBIG;
1056 	return 0;
1057 }
1058 
1059 EXPORT_SYMBOL(generic_file_open);
1060 
1061 /*
1062  * This is used by subsystems that don't want seekable
1063  * file descriptors
1064  */
1065 int nonseekable_open(struct inode *inode, struct file *filp)
1066 {
1067 	filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
1068 	return 0;
1069 }
1070 
1071 EXPORT_SYMBOL(nonseekable_open);
1072