xref: /linux/fs/open.c (revision 54a8a2220c936a47840c9a3d74910c5a56fae2ed)
1 /*
2  *  linux/fs/open.c
3  *
4  *  Copyright (C) 1991, 1992  Linus Torvalds
5  */
6 
7 #include <linux/string.h>
8 #include <linux/mm.h>
9 #include <linux/utime.h>
10 #include <linux/file.h>
11 #include <linux/smp_lock.h>
12 #include <linux/quotaops.h>
13 #include <linux/fsnotify.h>
14 #include <linux/module.h>
15 #include <linux/slab.h>
16 #include <linux/tty.h>
17 #include <linux/namei.h>
18 #include <linux/backing-dev.h>
19 #include <linux/security.h>
20 #include <linux/mount.h>
21 #include <linux/vfs.h>
22 #include <asm/uaccess.h>
23 #include <linux/fs.h>
24 #include <linux/personality.h>
25 #include <linux/pagemap.h>
26 #include <linux/syscalls.h>
27 #include <linux/rcupdate.h>
28 
29 #include <asm/unistd.h>
30 
31 int vfs_statfs(struct super_block *sb, struct kstatfs *buf)
32 {
33 	int retval = -ENODEV;
34 
35 	if (sb) {
36 		retval = -ENOSYS;
37 		if (sb->s_op->statfs) {
38 			memset(buf, 0, sizeof(*buf));
39 			retval = security_sb_statfs(sb);
40 			if (retval)
41 				return retval;
42 			retval = sb->s_op->statfs(sb, buf);
43 			if (retval == 0 && buf->f_frsize == 0)
44 				buf->f_frsize = buf->f_bsize;
45 		}
46 	}
47 	return retval;
48 }
49 
50 EXPORT_SYMBOL(vfs_statfs);
51 
52 static int vfs_statfs_native(struct super_block *sb, struct statfs *buf)
53 {
54 	struct kstatfs st;
55 	int retval;
56 
57 	retval = vfs_statfs(sb, &st);
58 	if (retval)
59 		return retval;
60 
61 	if (sizeof(*buf) == sizeof(st))
62 		memcpy(buf, &st, sizeof(st));
63 	else {
64 		if (sizeof buf->f_blocks == 4) {
65 			if ((st.f_blocks | st.f_bfree | st.f_bavail) &
66 			    0xffffffff00000000ULL)
67 				return -EOVERFLOW;
68 			/*
69 			 * f_files and f_ffree may be -1; it's okay to stuff
70 			 * that into 32 bits
71 			 */
72 			if (st.f_files != -1 &&
73 			    (st.f_files & 0xffffffff00000000ULL))
74 				return -EOVERFLOW;
75 			if (st.f_ffree != -1 &&
76 			    (st.f_ffree & 0xffffffff00000000ULL))
77 				return -EOVERFLOW;
78 		}
79 
80 		buf->f_type = st.f_type;
81 		buf->f_bsize = st.f_bsize;
82 		buf->f_blocks = st.f_blocks;
83 		buf->f_bfree = st.f_bfree;
84 		buf->f_bavail = st.f_bavail;
85 		buf->f_files = st.f_files;
86 		buf->f_ffree = st.f_ffree;
87 		buf->f_fsid = st.f_fsid;
88 		buf->f_namelen = st.f_namelen;
89 		buf->f_frsize = st.f_frsize;
90 		memset(buf->f_spare, 0, sizeof(buf->f_spare));
91 	}
92 	return 0;
93 }
94 
95 static int vfs_statfs64(struct super_block *sb, struct statfs64 *buf)
96 {
97 	struct kstatfs st;
98 	int retval;
99 
100 	retval = vfs_statfs(sb, &st);
101 	if (retval)
102 		return retval;
103 
104 	if (sizeof(*buf) == sizeof(st))
105 		memcpy(buf, &st, sizeof(st));
106 	else {
107 		buf->f_type = st.f_type;
108 		buf->f_bsize = st.f_bsize;
109 		buf->f_blocks = st.f_blocks;
110 		buf->f_bfree = st.f_bfree;
111 		buf->f_bavail = st.f_bavail;
112 		buf->f_files = st.f_files;
113 		buf->f_ffree = st.f_ffree;
114 		buf->f_fsid = st.f_fsid;
115 		buf->f_namelen = st.f_namelen;
116 		buf->f_frsize = st.f_frsize;
117 		memset(buf->f_spare, 0, sizeof(buf->f_spare));
118 	}
119 	return 0;
120 }
121 
122 asmlinkage long sys_statfs(const char __user * path, struct statfs __user * buf)
123 {
124 	struct nameidata nd;
125 	int error;
126 
127 	error = user_path_walk(path, &nd);
128 	if (!error) {
129 		struct statfs tmp;
130 		error = vfs_statfs_native(nd.dentry->d_inode->i_sb, &tmp);
131 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
132 			error = -EFAULT;
133 		path_release(&nd);
134 	}
135 	return error;
136 }
137 
138 
139 asmlinkage long sys_statfs64(const char __user *path, size_t sz, struct statfs64 __user *buf)
140 {
141 	struct nameidata nd;
142 	long error;
143 
144 	if (sz != sizeof(*buf))
145 		return -EINVAL;
146 	error = user_path_walk(path, &nd);
147 	if (!error) {
148 		struct statfs64 tmp;
149 		error = vfs_statfs64(nd.dentry->d_inode->i_sb, &tmp);
150 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
151 			error = -EFAULT;
152 		path_release(&nd);
153 	}
154 	return error;
155 }
156 
157 
158 asmlinkage long sys_fstatfs(unsigned int fd, struct statfs __user * buf)
159 {
160 	struct file * file;
161 	struct statfs tmp;
162 	int error;
163 
164 	error = -EBADF;
165 	file = fget(fd);
166 	if (!file)
167 		goto out;
168 	error = vfs_statfs_native(file->f_dentry->d_inode->i_sb, &tmp);
169 	if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
170 		error = -EFAULT;
171 	fput(file);
172 out:
173 	return error;
174 }
175 
176 asmlinkage long sys_fstatfs64(unsigned int fd, size_t sz, struct statfs64 __user *buf)
177 {
178 	struct file * file;
179 	struct statfs64 tmp;
180 	int error;
181 
182 	if (sz != sizeof(*buf))
183 		return -EINVAL;
184 
185 	error = -EBADF;
186 	file = fget(fd);
187 	if (!file)
188 		goto out;
189 	error = vfs_statfs64(file->f_dentry->d_inode->i_sb, &tmp);
190 	if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
191 		error = -EFAULT;
192 	fput(file);
193 out:
194 	return error;
195 }
196 
197 int do_truncate(struct dentry *dentry, loff_t length)
198 {
199 	int err;
200 	struct iattr newattrs;
201 
202 	/* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */
203 	if (length < 0)
204 		return -EINVAL;
205 
206 	newattrs.ia_size = length;
207 	newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
208 
209 	down(&dentry->d_inode->i_sem);
210 	err = notify_change(dentry, &newattrs);
211 	up(&dentry->d_inode->i_sem);
212 	return err;
213 }
214 
215 static inline long do_sys_truncate(const char __user * path, loff_t length)
216 {
217 	struct nameidata nd;
218 	struct inode * inode;
219 	int error;
220 
221 	error = -EINVAL;
222 	if (length < 0)	/* sorry, but loff_t says... */
223 		goto out;
224 
225 	error = user_path_walk(path, &nd);
226 	if (error)
227 		goto out;
228 	inode = nd.dentry->d_inode;
229 
230 	/* For directories it's -EISDIR, for other non-regulars - -EINVAL */
231 	error = -EISDIR;
232 	if (S_ISDIR(inode->i_mode))
233 		goto dput_and_out;
234 
235 	error = -EINVAL;
236 	if (!S_ISREG(inode->i_mode))
237 		goto dput_and_out;
238 
239 	error = permission(inode,MAY_WRITE,&nd);
240 	if (error)
241 		goto dput_and_out;
242 
243 	error = -EROFS;
244 	if (IS_RDONLY(inode))
245 		goto dput_and_out;
246 
247 	error = -EPERM;
248 	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
249 		goto dput_and_out;
250 
251 	/*
252 	 * Make sure that there are no leases.
253 	 */
254 	error = break_lease(inode, FMODE_WRITE);
255 	if (error)
256 		goto dput_and_out;
257 
258 	error = get_write_access(inode);
259 	if (error)
260 		goto dput_and_out;
261 
262 	error = locks_verify_truncate(inode, NULL, length);
263 	if (!error) {
264 		DQUOT_INIT(inode);
265 		error = do_truncate(nd.dentry, length);
266 	}
267 	put_write_access(inode);
268 
269 dput_and_out:
270 	path_release(&nd);
271 out:
272 	return error;
273 }
274 
275 asmlinkage long sys_truncate(const char __user * path, unsigned long length)
276 {
277 	/* on 32-bit boxen it will cut the range 2^31--2^32-1 off */
278 	return do_sys_truncate(path, (long)length);
279 }
280 
281 static inline long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
282 {
283 	struct inode * inode;
284 	struct dentry *dentry;
285 	struct file * file;
286 	int error;
287 
288 	error = -EINVAL;
289 	if (length < 0)
290 		goto out;
291 	error = -EBADF;
292 	file = fget(fd);
293 	if (!file)
294 		goto out;
295 
296 	/* explicitly opened as large or we are on 64-bit box */
297 	if (file->f_flags & O_LARGEFILE)
298 		small = 0;
299 
300 	dentry = file->f_dentry;
301 	inode = dentry->d_inode;
302 	error = -EINVAL;
303 	if (!S_ISREG(inode->i_mode) || !(file->f_mode & FMODE_WRITE))
304 		goto out_putf;
305 
306 	error = -EINVAL;
307 	/* Cannot ftruncate over 2^31 bytes without large file support */
308 	if (small && length > MAX_NON_LFS)
309 		goto out_putf;
310 
311 	error = -EPERM;
312 	if (IS_APPEND(inode))
313 		goto out_putf;
314 
315 	error = locks_verify_truncate(inode, file, length);
316 	if (!error)
317 		error = do_truncate(dentry, length);
318 out_putf:
319 	fput(file);
320 out:
321 	return error;
322 }
323 
324 asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length)
325 {
326 	return do_sys_ftruncate(fd, length, 1);
327 }
328 
329 /* LFS versions of truncate are only needed on 32 bit machines */
330 #if BITS_PER_LONG == 32
331 asmlinkage long sys_truncate64(const char __user * path, loff_t length)
332 {
333 	return do_sys_truncate(path, length);
334 }
335 
336 asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length)
337 {
338 	return do_sys_ftruncate(fd, length, 0);
339 }
340 #endif
341 
342 #ifdef __ARCH_WANT_SYS_UTIME
343 
344 /*
345  * sys_utime() can be implemented in user-level using sys_utimes().
346  * Is this for backwards compatibility?  If so, why not move it
347  * into the appropriate arch directory (for those architectures that
348  * need it).
349  */
350 
351 /* If times==NULL, set access and modification to current time,
352  * must be owner or have write permission.
353  * Else, update from *times, must be owner or super user.
354  */
355 asmlinkage long sys_utime(char __user * filename, struct utimbuf __user * times)
356 {
357 	int error;
358 	struct nameidata nd;
359 	struct inode * inode;
360 	struct iattr newattrs;
361 
362 	error = user_path_walk(filename, &nd);
363 	if (error)
364 		goto out;
365 	inode = nd.dentry->d_inode;
366 
367 	error = -EROFS;
368 	if (IS_RDONLY(inode))
369 		goto dput_and_out;
370 
371 	/* Don't worry, the checks are done in inode_change_ok() */
372 	newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME;
373 	if (times) {
374 		error = -EPERM;
375 		if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
376 			goto dput_and_out;
377 
378 		error = get_user(newattrs.ia_atime.tv_sec, &times->actime);
379 		newattrs.ia_atime.tv_nsec = 0;
380 		if (!error)
381 			error = get_user(newattrs.ia_mtime.tv_sec, &times->modtime);
382 		newattrs.ia_mtime.tv_nsec = 0;
383 		if (error)
384 			goto dput_and_out;
385 
386 		newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
387 	} else {
388                 error = -EACCES;
389                 if (IS_IMMUTABLE(inode))
390                         goto dput_and_out;
391 
392 		if (current->fsuid != inode->i_uid &&
393 		    (error = permission(inode,MAY_WRITE,&nd)) != 0)
394 			goto dput_and_out;
395 	}
396 	down(&inode->i_sem);
397 	error = notify_change(nd.dentry, &newattrs);
398 	up(&inode->i_sem);
399 dput_and_out:
400 	path_release(&nd);
401 out:
402 	return error;
403 }
404 
405 #endif
406 
407 /* If times==NULL, set access and modification to current time,
408  * must be owner or have write permission.
409  * Else, update from *times, must be owner or super user.
410  */
411 long do_utimes(char __user * filename, struct timeval * times)
412 {
413 	int error;
414 	struct nameidata nd;
415 	struct inode * inode;
416 	struct iattr newattrs;
417 
418 	error = user_path_walk(filename, &nd);
419 
420 	if (error)
421 		goto out;
422 	inode = nd.dentry->d_inode;
423 
424 	error = -EROFS;
425 	if (IS_RDONLY(inode))
426 		goto dput_and_out;
427 
428 	/* Don't worry, the checks are done in inode_change_ok() */
429 	newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME;
430 	if (times) {
431 		error = -EPERM;
432                 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
433                         goto dput_and_out;
434 
435 		newattrs.ia_atime.tv_sec = times[0].tv_sec;
436 		newattrs.ia_atime.tv_nsec = times[0].tv_usec * 1000;
437 		newattrs.ia_mtime.tv_sec = times[1].tv_sec;
438 		newattrs.ia_mtime.tv_nsec = times[1].tv_usec * 1000;
439 		newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
440 	} else {
441 		error = -EACCES;
442                 if (IS_IMMUTABLE(inode))
443                         goto dput_and_out;
444 
445 		if (current->fsuid != inode->i_uid &&
446 		    (error = permission(inode,MAY_WRITE,&nd)) != 0)
447 			goto dput_and_out;
448 	}
449 	down(&inode->i_sem);
450 	error = notify_change(nd.dentry, &newattrs);
451 	up(&inode->i_sem);
452 dput_and_out:
453 	path_release(&nd);
454 out:
455 	return error;
456 }
457 
458 asmlinkage long sys_utimes(char __user * filename, struct timeval __user * utimes)
459 {
460 	struct timeval times[2];
461 
462 	if (utimes && copy_from_user(&times, utimes, sizeof(times)))
463 		return -EFAULT;
464 	return do_utimes(filename, utimes ? times : NULL);
465 }
466 
467 
468 /*
469  * access() needs to use the real uid/gid, not the effective uid/gid.
470  * We do this by temporarily clearing all FS-related capabilities and
471  * switching the fsuid/fsgid around to the real ones.
472  */
473 asmlinkage long sys_access(const char __user * filename, int mode)
474 {
475 	struct nameidata nd;
476 	int old_fsuid, old_fsgid;
477 	kernel_cap_t old_cap;
478 	int res;
479 
480 	if (mode & ~S_IRWXO)	/* where's F_OK, X_OK, W_OK, R_OK? */
481 		return -EINVAL;
482 
483 	old_fsuid = current->fsuid;
484 	old_fsgid = current->fsgid;
485 	old_cap = current->cap_effective;
486 
487 	current->fsuid = current->uid;
488 	current->fsgid = current->gid;
489 
490 	/*
491 	 * Clear the capabilities if we switch to a non-root user
492 	 *
493 	 * FIXME: There is a race here against sys_capset.  The
494 	 * capabilities can change yet we will restore the old
495 	 * value below.  We should hold task_capabilities_lock,
496 	 * but we cannot because user_path_walk can sleep.
497 	 */
498 	if (current->uid)
499 		cap_clear(current->cap_effective);
500 	else
501 		current->cap_effective = current->cap_permitted;
502 
503 	res = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd);
504 	if (!res) {
505 		res = permission(nd.dentry->d_inode, mode, &nd);
506 		/* SuS v2 requires we report a read only fs too */
507 		if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
508 		   && !special_file(nd.dentry->d_inode->i_mode))
509 			res = -EROFS;
510 		path_release(&nd);
511 	}
512 
513 	current->fsuid = old_fsuid;
514 	current->fsgid = old_fsgid;
515 	current->cap_effective = old_cap;
516 
517 	return res;
518 }
519 
520 asmlinkage long sys_chdir(const char __user * filename)
521 {
522 	struct nameidata nd;
523 	int error;
524 
525 	error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd);
526 	if (error)
527 		goto out;
528 
529 	error = permission(nd.dentry->d_inode,MAY_EXEC,&nd);
530 	if (error)
531 		goto dput_and_out;
532 
533 	set_fs_pwd(current->fs, nd.mnt, nd.dentry);
534 
535 dput_and_out:
536 	path_release(&nd);
537 out:
538 	return error;
539 }
540 
541 asmlinkage long sys_fchdir(unsigned int fd)
542 {
543 	struct file *file;
544 	struct dentry *dentry;
545 	struct inode *inode;
546 	struct vfsmount *mnt;
547 	int error;
548 
549 	error = -EBADF;
550 	file = fget(fd);
551 	if (!file)
552 		goto out;
553 
554 	dentry = file->f_dentry;
555 	mnt = file->f_vfsmnt;
556 	inode = dentry->d_inode;
557 
558 	error = -ENOTDIR;
559 	if (!S_ISDIR(inode->i_mode))
560 		goto out_putf;
561 
562 	error = permission(inode, MAY_EXEC, NULL);
563 	if (!error)
564 		set_fs_pwd(current->fs, mnt, dentry);
565 out_putf:
566 	fput(file);
567 out:
568 	return error;
569 }
570 
571 asmlinkage long sys_chroot(const char __user * filename)
572 {
573 	struct nameidata nd;
574 	int error;
575 
576 	error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd);
577 	if (error)
578 		goto out;
579 
580 	error = permission(nd.dentry->d_inode,MAY_EXEC,&nd);
581 	if (error)
582 		goto dput_and_out;
583 
584 	error = -EPERM;
585 	if (!capable(CAP_SYS_CHROOT))
586 		goto dput_and_out;
587 
588 	set_fs_root(current->fs, nd.mnt, nd.dentry);
589 	set_fs_altroot();
590 	error = 0;
591 dput_and_out:
592 	path_release(&nd);
593 out:
594 	return error;
595 }
596 
597 asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
598 {
599 	struct inode * inode;
600 	struct dentry * dentry;
601 	struct file * file;
602 	int err = -EBADF;
603 	struct iattr newattrs;
604 
605 	file = fget(fd);
606 	if (!file)
607 		goto out;
608 
609 	dentry = file->f_dentry;
610 	inode = dentry->d_inode;
611 
612 	err = -EROFS;
613 	if (IS_RDONLY(inode))
614 		goto out_putf;
615 	err = -EPERM;
616 	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
617 		goto out_putf;
618 	down(&inode->i_sem);
619 	if (mode == (mode_t) -1)
620 		mode = inode->i_mode;
621 	newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
622 	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
623 	err = notify_change(dentry, &newattrs);
624 	up(&inode->i_sem);
625 
626 out_putf:
627 	fput(file);
628 out:
629 	return err;
630 }
631 
632 asmlinkage long sys_chmod(const char __user * filename, mode_t mode)
633 {
634 	struct nameidata nd;
635 	struct inode * inode;
636 	int error;
637 	struct iattr newattrs;
638 
639 	error = user_path_walk(filename, &nd);
640 	if (error)
641 		goto out;
642 	inode = nd.dentry->d_inode;
643 
644 	error = -EROFS;
645 	if (IS_RDONLY(inode))
646 		goto dput_and_out;
647 
648 	error = -EPERM;
649 	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
650 		goto dput_and_out;
651 
652 	down(&inode->i_sem);
653 	if (mode == (mode_t) -1)
654 		mode = inode->i_mode;
655 	newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
656 	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
657 	error = notify_change(nd.dentry, &newattrs);
658 	up(&inode->i_sem);
659 
660 dput_and_out:
661 	path_release(&nd);
662 out:
663 	return error;
664 }
665 
666 static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
667 {
668 	struct inode * inode;
669 	int error;
670 	struct iattr newattrs;
671 
672 	error = -ENOENT;
673 	if (!(inode = dentry->d_inode)) {
674 		printk(KERN_ERR "chown_common: NULL inode\n");
675 		goto out;
676 	}
677 	error = -EROFS;
678 	if (IS_RDONLY(inode))
679 		goto out;
680 	error = -EPERM;
681 	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
682 		goto out;
683 	newattrs.ia_valid =  ATTR_CTIME;
684 	if (user != (uid_t) -1) {
685 		newattrs.ia_valid |= ATTR_UID;
686 		newattrs.ia_uid = user;
687 	}
688 	if (group != (gid_t) -1) {
689 		newattrs.ia_valid |= ATTR_GID;
690 		newattrs.ia_gid = group;
691 	}
692 	if (!S_ISDIR(inode->i_mode))
693 		newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID;
694 	down(&inode->i_sem);
695 	error = notify_change(dentry, &newattrs);
696 	up(&inode->i_sem);
697 out:
698 	return error;
699 }
700 
701 asmlinkage long sys_chown(const char __user * filename, uid_t user, gid_t group)
702 {
703 	struct nameidata nd;
704 	int error;
705 
706 	error = user_path_walk(filename, &nd);
707 	if (!error) {
708 		error = chown_common(nd.dentry, user, group);
709 		path_release(&nd);
710 	}
711 	return error;
712 }
713 
714 asmlinkage long sys_lchown(const char __user * filename, uid_t user, gid_t group)
715 {
716 	struct nameidata nd;
717 	int error;
718 
719 	error = user_path_walk_link(filename, &nd);
720 	if (!error) {
721 		error = chown_common(nd.dentry, user, group);
722 		path_release(&nd);
723 	}
724 	return error;
725 }
726 
727 
728 asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group)
729 {
730 	struct file * file;
731 	int error = -EBADF;
732 
733 	file = fget(fd);
734 	if (file) {
735 		error = chown_common(file->f_dentry, user, group);
736 		fput(file);
737 	}
738 	return error;
739 }
740 
741 static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
742 					int flags, struct file *f)
743 {
744 	struct inode *inode;
745 	int error;
746 
747 	f->f_flags = flags;
748 	f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK |
749 				FMODE_PREAD | FMODE_PWRITE;
750 	inode = dentry->d_inode;
751 	if (f->f_mode & FMODE_WRITE) {
752 		error = get_write_access(inode);
753 		if (error)
754 			goto cleanup_file;
755 	}
756 
757 	f->f_mapping = inode->i_mapping;
758 	f->f_dentry = dentry;
759 	f->f_vfsmnt = mnt;
760 	f->f_pos = 0;
761 	f->f_op = fops_get(inode->i_fop);
762 	file_move(f, &inode->i_sb->s_files);
763 
764 	if (f->f_op && f->f_op->open) {
765 		error = f->f_op->open(inode,f);
766 		if (error)
767 			goto cleanup_all;
768 	}
769 	f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
770 
771 	file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
772 
773 	/* NB: we're sure to have correct a_ops only after f_op->open */
774 	if (f->f_flags & O_DIRECT) {
775 		if (!f->f_mapping->a_ops ||
776 		    ((!f->f_mapping->a_ops->direct_IO) &&
777 		    (!f->f_mapping->a_ops->get_xip_page))) {
778 			fput(f);
779 			f = ERR_PTR(-EINVAL);
780 		}
781 	}
782 
783 	return f;
784 
785 cleanup_all:
786 	fops_put(f->f_op);
787 	if (f->f_mode & FMODE_WRITE)
788 		put_write_access(inode);
789 	file_kill(f);
790 	f->f_dentry = NULL;
791 	f->f_vfsmnt = NULL;
792 cleanup_file:
793 	put_filp(f);
794 	dput(dentry);
795 	mntput(mnt);
796 	return ERR_PTR(error);
797 }
798 
799 /*
800  * Note that while the flag value (low two bits) for sys_open means:
801  *	00 - read-only
802  *	01 - write-only
803  *	10 - read-write
804  *	11 - special
805  * it is changed into
806  *	00 - no permissions needed
807  *	01 - read-permission
808  *	10 - write-permission
809  *	11 - read-write
810  * for the internal routines (ie open_namei()/follow_link() etc). 00 is
811  * used by symlinks.
812  */
813 struct file *filp_open(const char * filename, int flags, int mode)
814 {
815 	int namei_flags, error;
816 	struct nameidata nd;
817 	struct file *f;
818 
819 	namei_flags = flags;
820 	if ((namei_flags+1) & O_ACCMODE)
821 		namei_flags++;
822 	if (namei_flags & O_TRUNC)
823 		namei_flags |= 2;
824 
825 	error = -ENFILE;
826 	f = get_empty_filp();
827 	if (f == NULL)
828 		return ERR_PTR(error);
829 
830 	error = open_namei(filename, namei_flags, mode, &nd);
831 	if (!error)
832 		return __dentry_open(nd.dentry, nd.mnt, flags, f);
833 
834 	put_filp(f);
835 	return ERR_PTR(error);
836 }
837 EXPORT_SYMBOL(filp_open);
838 
839 struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
840 {
841 	int error;
842 	struct file *f;
843 
844 	error = -ENFILE;
845 	f = get_empty_filp();
846 	if (f == NULL)
847 		return ERR_PTR(error);
848 
849 	return __dentry_open(dentry, mnt, flags, f);
850 }
851 EXPORT_SYMBOL(dentry_open);
852 
853 /*
854  * Find an empty file descriptor entry, and mark it busy.
855  */
856 int get_unused_fd(void)
857 {
858 	struct files_struct * files = current->files;
859 	int fd, error;
860 	struct fdtable *fdt;
861 
862   	error = -EMFILE;
863 	spin_lock(&files->file_lock);
864 
865 repeat:
866 	fdt = files_fdtable(files);
867  	fd = find_next_zero_bit(fdt->open_fds->fds_bits,
868 				fdt->max_fdset,
869 				fdt->next_fd);
870 
871 	/*
872 	 * N.B. For clone tasks sharing a files structure, this test
873 	 * will limit the total number of files that can be opened.
874 	 */
875 	if (fd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
876 		goto out;
877 
878 	/* Do we need to expand the fd array or fd set?  */
879 	error = expand_files(files, fd);
880 	if (error < 0)
881 		goto out;
882 
883 	if (error) {
884 		/*
885 	 	 * If we needed to expand the fs array we
886 		 * might have blocked - try again.
887 		 */
888 		error = -EMFILE;
889 		goto repeat;
890 	}
891 
892 	FD_SET(fd, fdt->open_fds);
893 	FD_CLR(fd, fdt->close_on_exec);
894 	fdt->next_fd = fd + 1;
895 #if 1
896 	/* Sanity check */
897 	if (fdt->fd[fd] != NULL) {
898 		printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd);
899 		fdt->fd[fd] = NULL;
900 	}
901 #endif
902 	error = fd;
903 
904 out:
905 	spin_unlock(&files->file_lock);
906 	return error;
907 }
908 
909 EXPORT_SYMBOL(get_unused_fd);
910 
911 static inline void __put_unused_fd(struct files_struct *files, unsigned int fd)
912 {
913 	struct fdtable *fdt = files_fdtable(files);
914 	__FD_CLR(fd, fdt->open_fds);
915 	if (fd < fdt->next_fd)
916 		fdt->next_fd = fd;
917 }
918 
919 void fastcall put_unused_fd(unsigned int fd)
920 {
921 	struct files_struct *files = current->files;
922 	spin_lock(&files->file_lock);
923 	__put_unused_fd(files, fd);
924 	spin_unlock(&files->file_lock);
925 }
926 
927 EXPORT_SYMBOL(put_unused_fd);
928 
929 /*
930  * Install a file pointer in the fd array.
931  *
932  * The VFS is full of places where we drop the files lock between
933  * setting the open_fds bitmap and installing the file in the file
934  * array.  At any such point, we are vulnerable to a dup2() race
935  * installing a file in the array before us.  We need to detect this and
936  * fput() the struct file we are about to overwrite in this case.
937  *
938  * It should never happen - if we allow dup2() do it, _really_ bad things
939  * will follow.
940  */
941 
942 void fastcall fd_install(unsigned int fd, struct file * file)
943 {
944 	struct files_struct *files = current->files;
945 	struct fdtable *fdt;
946 	spin_lock(&files->file_lock);
947 	fdt = files_fdtable(files);
948 	BUG_ON(fdt->fd[fd] != NULL);
949 	rcu_assign_pointer(fdt->fd[fd], file);
950 	spin_unlock(&files->file_lock);
951 }
952 
953 EXPORT_SYMBOL(fd_install);
954 
955 long do_sys_open(const char __user *filename, int flags, int mode)
956 {
957 	char *tmp = getname(filename);
958 	int fd = PTR_ERR(tmp);
959 
960 	if (!IS_ERR(tmp)) {
961 		fd = get_unused_fd();
962 		if (fd >= 0) {
963 			struct file *f = filp_open(tmp, flags, mode);
964 			if (IS_ERR(f)) {
965 				put_unused_fd(fd);
966 				fd = PTR_ERR(f);
967 			} else {
968 				fsnotify_open(f->f_dentry);
969 				fd_install(fd, f);
970 			}
971 		}
972 		putname(tmp);
973 	}
974 	return fd;
975 }
976 
977 asmlinkage long sys_open(const char __user *filename, int flags, int mode)
978 {
979 	if (force_o_largefile())
980 		flags |= O_LARGEFILE;
981 
982 	return do_sys_open(filename, flags, mode);
983 }
984 EXPORT_SYMBOL_GPL(sys_open);
985 
986 #ifndef __alpha__
987 
988 /*
989  * For backward compatibility?  Maybe this should be moved
990  * into arch/i386 instead?
991  */
992 asmlinkage long sys_creat(const char __user * pathname, int mode)
993 {
994 	return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode);
995 }
996 
997 #endif
998 
999 /*
1000  * "id" is the POSIX thread ID. We use the
1001  * files pointer for this..
1002  */
1003 int filp_close(struct file *filp, fl_owner_t id)
1004 {
1005 	int retval = 0;
1006 
1007 	if (!file_count(filp)) {
1008 		printk(KERN_ERR "VFS: Close: file count is 0\n");
1009 		return 0;
1010 	}
1011 
1012 	if (filp->f_op && filp->f_op->flush)
1013 		retval = filp->f_op->flush(filp);
1014 
1015 	dnotify_flush(filp, id);
1016 	locks_remove_posix(filp, id);
1017 	fput(filp);
1018 	return retval;
1019 }
1020 
1021 EXPORT_SYMBOL(filp_close);
1022 
1023 /*
1024  * Careful here! We test whether the file pointer is NULL before
1025  * releasing the fd. This ensures that one clone task can't release
1026  * an fd while another clone is opening it.
1027  */
1028 asmlinkage long sys_close(unsigned int fd)
1029 {
1030 	struct file * filp;
1031 	struct files_struct *files = current->files;
1032 	struct fdtable *fdt;
1033 
1034 	spin_lock(&files->file_lock);
1035 	fdt = files_fdtable(files);
1036 	if (fd >= fdt->max_fds)
1037 		goto out_unlock;
1038 	filp = fdt->fd[fd];
1039 	if (!filp)
1040 		goto out_unlock;
1041 	rcu_assign_pointer(fdt->fd[fd], NULL);
1042 	FD_CLR(fd, fdt->close_on_exec);
1043 	__put_unused_fd(files, fd);
1044 	spin_unlock(&files->file_lock);
1045 	return filp_close(filp, files);
1046 
1047 out_unlock:
1048 	spin_unlock(&files->file_lock);
1049 	return -EBADF;
1050 }
1051 
1052 EXPORT_SYMBOL(sys_close);
1053 
1054 /*
1055  * This routine simulates a hangup on the tty, to arrange that users
1056  * are given clean terminals at login time.
1057  */
1058 asmlinkage long sys_vhangup(void)
1059 {
1060 	if (capable(CAP_SYS_TTY_CONFIG)) {
1061 		tty_vhangup(current->signal->tty);
1062 		return 0;
1063 	}
1064 	return -EPERM;
1065 }
1066 
1067 /*
1068  * Called when an inode is about to be open.
1069  * We use this to disallow opening large files on 32bit systems if
1070  * the caller didn't specify O_LARGEFILE.  On 64bit systems we force
1071  * on this flag in sys_open.
1072  */
1073 int generic_file_open(struct inode * inode, struct file * filp)
1074 {
1075 	if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
1076 		return -EFBIG;
1077 	return 0;
1078 }
1079 
1080 EXPORT_SYMBOL(generic_file_open);
1081 
1082 /*
1083  * This is used by subsystems that don't want seekable
1084  * file descriptors
1085  */
1086 int nonseekable_open(struct inode *inode, struct file *filp)
1087 {
1088 	filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
1089 	return 0;
1090 }
1091 
1092 EXPORT_SYMBOL(nonseekable_open);
1093