1 /* 2 * linux/fs/fcntl.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 #include <linux/syscalls.h> 8 #include <linux/init.h> 9 #include <linux/mm.h> 10 #include <linux/fs.h> 11 #include <linux/file.h> 12 #include <linux/capability.h> 13 #include <linux/dnotify.h> 14 #include <linux/smp_lock.h> 15 #include <linux/slab.h> 16 #include <linux/module.h> 17 #include <linux/security.h> 18 #include <linux/ptrace.h> 19 #include <linux/signal.h> 20 #include <linux/rcupdate.h> 21 22 #include <asm/poll.h> 23 #include <asm/siginfo.h> 24 #include <asm/uaccess.h> 25 26 void fastcall set_close_on_exec(unsigned int fd, int flag) 27 { 28 struct files_struct *files = current->files; 29 struct fdtable *fdt; 30 spin_lock(&files->file_lock); 31 fdt = files_fdtable(files); 32 if (flag) 33 FD_SET(fd, fdt->close_on_exec); 34 else 35 FD_CLR(fd, fdt->close_on_exec); 36 spin_unlock(&files->file_lock); 37 } 38 39 static int get_close_on_exec(unsigned int fd) 40 { 41 struct files_struct *files = current->files; 42 struct fdtable *fdt; 43 int res; 44 rcu_read_lock(); 45 fdt = files_fdtable(files); 46 res = FD_ISSET(fd, fdt->close_on_exec); 47 rcu_read_unlock(); 48 return res; 49 } 50 51 /* 52 * locate_fd finds a free file descriptor in the open_fds fdset, 53 * expanding the fd arrays if necessary. Must be called with the 54 * file_lock held for write. 55 */ 56 57 static int locate_fd(struct files_struct *files, 58 struct file *file, unsigned int orig_start) 59 { 60 unsigned int newfd; 61 unsigned int start; 62 int error; 63 struct fdtable *fdt; 64 65 error = -EINVAL; 66 if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) 67 goto out; 68 69 repeat: 70 fdt = files_fdtable(files); 71 /* 72 * Someone might have closed fd's in the range 73 * orig_start..fdt->next_fd 74 */ 75 start = orig_start; 76 if (start < files->next_fd) 77 start = files->next_fd; 78 79 newfd = start; 80 if (start < fdt->max_fdset) { 81 newfd = find_next_zero_bit(fdt->open_fds->fds_bits, 82 fdt->max_fdset, start); 83 } 84 85 error = -EMFILE; 86 if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) 87 goto out; 88 89 error = expand_files(files, newfd); 90 if (error < 0) 91 goto out; 92 93 /* 94 * If we needed to expand the fs array we 95 * might have blocked - try again. 96 */ 97 if (error) 98 goto repeat; 99 100 /* 101 * We reacquired files_lock, so we are safe as long as 102 * we reacquire the fdtable pointer and use it while holding 103 * the lock, no one can free it during that time. 104 */ 105 if (start <= files->next_fd) 106 files->next_fd = newfd + 1; 107 108 error = newfd; 109 110 out: 111 return error; 112 } 113 114 static int dupfd(struct file *file, unsigned int start) 115 { 116 struct files_struct * files = current->files; 117 struct fdtable *fdt; 118 int fd; 119 120 spin_lock(&files->file_lock); 121 fd = locate_fd(files, file, start); 122 if (fd >= 0) { 123 /* locate_fd() may have expanded fdtable, load the ptr */ 124 fdt = files_fdtable(files); 125 FD_SET(fd, fdt->open_fds); 126 FD_CLR(fd, fdt->close_on_exec); 127 spin_unlock(&files->file_lock); 128 fd_install(fd, file); 129 } else { 130 spin_unlock(&files->file_lock); 131 fput(file); 132 } 133 134 return fd; 135 } 136 137 asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd) 138 { 139 int err = -EBADF; 140 struct file * file, *tofree; 141 struct files_struct * files = current->files; 142 struct fdtable *fdt; 143 144 spin_lock(&files->file_lock); 145 if (!(file = fcheck(oldfd))) 146 goto out_unlock; 147 err = newfd; 148 if (newfd == oldfd) 149 goto out_unlock; 150 err = -EBADF; 151 if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) 152 goto out_unlock; 153 get_file(file); /* We are now finished with oldfd */ 154 155 err = expand_files(files, newfd); 156 if (err < 0) 157 goto out_fput; 158 159 /* To avoid races with open() and dup(), we will mark the fd as 160 * in-use in the open-file bitmap throughout the entire dup2() 161 * process. This is quite safe: do_close() uses the fd array 162 * entry, not the bitmap, to decide what work needs to be 163 * done. --sct */ 164 /* Doesn't work. open() might be there first. --AV */ 165 166 /* Yes. It's a race. In user space. Nothing sane to do */ 167 err = -EBUSY; 168 fdt = files_fdtable(files); 169 tofree = fdt->fd[newfd]; 170 if (!tofree && FD_ISSET(newfd, fdt->open_fds)) 171 goto out_fput; 172 173 rcu_assign_pointer(fdt->fd[newfd], file); 174 FD_SET(newfd, fdt->open_fds); 175 FD_CLR(newfd, fdt->close_on_exec); 176 spin_unlock(&files->file_lock); 177 178 if (tofree) 179 filp_close(tofree, files); 180 err = newfd; 181 out: 182 return err; 183 out_unlock: 184 spin_unlock(&files->file_lock); 185 goto out; 186 187 out_fput: 188 spin_unlock(&files->file_lock); 189 fput(file); 190 goto out; 191 } 192 193 asmlinkage long sys_dup(unsigned int fildes) 194 { 195 int ret = -EBADF; 196 struct file * file = fget(fildes); 197 198 if (file) 199 ret = dupfd(file, 0); 200 return ret; 201 } 202 203 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | FASYNC | O_DIRECT | O_NOATIME) 204 205 static int setfl(int fd, struct file * filp, unsigned long arg) 206 { 207 struct inode * inode = filp->f_dentry->d_inode; 208 int error = 0; 209 210 /* 211 * O_APPEND cannot be cleared if the file is marked as append-only 212 * and the file is open for write. 213 */ 214 if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode)) 215 return -EPERM; 216 217 /* O_NOATIME can only be set by the owner or superuser */ 218 if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME)) 219 if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER)) 220 return -EPERM; 221 222 /* required for strict SunOS emulation */ 223 if (O_NONBLOCK != O_NDELAY) 224 if (arg & O_NDELAY) 225 arg |= O_NONBLOCK; 226 227 if (arg & O_DIRECT) { 228 if (!filp->f_mapping || !filp->f_mapping->a_ops || 229 !filp->f_mapping->a_ops->direct_IO) 230 return -EINVAL; 231 } 232 233 if (filp->f_op && filp->f_op->check_flags) 234 error = filp->f_op->check_flags(arg); 235 if (error) 236 return error; 237 238 lock_kernel(); 239 if ((arg ^ filp->f_flags) & FASYNC) { 240 if (filp->f_op && filp->f_op->fasync) { 241 error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); 242 if (error < 0) 243 goto out; 244 } 245 } 246 247 filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK); 248 out: 249 unlock_kernel(); 250 return error; 251 } 252 253 static void f_modown(struct file *filp, unsigned long pid, 254 uid_t uid, uid_t euid, int force) 255 { 256 write_lock_irq(&filp->f_owner.lock); 257 if (force || !filp->f_owner.pid) { 258 filp->f_owner.pid = pid; 259 filp->f_owner.uid = uid; 260 filp->f_owner.euid = euid; 261 } 262 write_unlock_irq(&filp->f_owner.lock); 263 } 264 265 int f_setown(struct file *filp, unsigned long arg, int force) 266 { 267 int err; 268 269 err = security_file_set_fowner(filp); 270 if (err) 271 return err; 272 273 f_modown(filp, arg, current->uid, current->euid, force); 274 return 0; 275 } 276 277 EXPORT_SYMBOL(f_setown); 278 279 void f_delown(struct file *filp) 280 { 281 f_modown(filp, 0, 0, 0, 1); 282 } 283 284 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, 285 struct file *filp) 286 { 287 long err = -EINVAL; 288 289 switch (cmd) { 290 case F_DUPFD: 291 get_file(filp); 292 err = dupfd(filp, arg); 293 break; 294 case F_GETFD: 295 err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; 296 break; 297 case F_SETFD: 298 err = 0; 299 set_close_on_exec(fd, arg & FD_CLOEXEC); 300 break; 301 case F_GETFL: 302 err = filp->f_flags; 303 break; 304 case F_SETFL: 305 err = setfl(fd, filp, arg); 306 break; 307 case F_GETLK: 308 err = fcntl_getlk(filp, (struct flock __user *) arg); 309 break; 310 case F_SETLK: 311 case F_SETLKW: 312 err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg); 313 break; 314 case F_GETOWN: 315 /* 316 * XXX If f_owner is a process group, the 317 * negative return value will get converted 318 * into an error. Oops. If we keep the 319 * current syscall conventions, the only way 320 * to fix this will be in libc. 321 */ 322 err = filp->f_owner.pid; 323 force_successful_syscall_return(); 324 break; 325 case F_SETOWN: 326 err = f_setown(filp, arg, 1); 327 break; 328 case F_GETSIG: 329 err = filp->f_owner.signum; 330 break; 331 case F_SETSIG: 332 /* arg == 0 restores default behaviour. */ 333 if (!valid_signal(arg)) { 334 break; 335 } 336 err = 0; 337 filp->f_owner.signum = arg; 338 break; 339 case F_GETLEASE: 340 err = fcntl_getlease(filp); 341 break; 342 case F_SETLEASE: 343 err = fcntl_setlease(fd, filp, arg); 344 break; 345 case F_NOTIFY: 346 err = fcntl_dirnotify(fd, filp, arg); 347 break; 348 default: 349 break; 350 } 351 return err; 352 } 353 354 asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg) 355 { 356 struct file *filp; 357 long err = -EBADF; 358 359 filp = fget(fd); 360 if (!filp) 361 goto out; 362 363 err = security_file_fcntl(filp, cmd, arg); 364 if (err) { 365 fput(filp); 366 return err; 367 } 368 369 err = do_fcntl(fd, cmd, arg, filp); 370 371 fput(filp); 372 out: 373 return err; 374 } 375 376 #if BITS_PER_LONG == 32 377 asmlinkage long sys_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg) 378 { 379 struct file * filp; 380 long err; 381 382 err = -EBADF; 383 filp = fget(fd); 384 if (!filp) 385 goto out; 386 387 err = security_file_fcntl(filp, cmd, arg); 388 if (err) { 389 fput(filp); 390 return err; 391 } 392 err = -EBADF; 393 394 switch (cmd) { 395 case F_GETLK64: 396 err = fcntl_getlk64(filp, (struct flock64 __user *) arg); 397 break; 398 case F_SETLK64: 399 case F_SETLKW64: 400 err = fcntl_setlk64(fd, filp, cmd, 401 (struct flock64 __user *) arg); 402 break; 403 default: 404 err = do_fcntl(fd, cmd, arg, filp); 405 break; 406 } 407 fput(filp); 408 out: 409 return err; 410 } 411 #endif 412 413 /* Table to convert sigio signal codes into poll band bitmaps */ 414 415 static const long band_table[NSIGPOLL] = { 416 POLLIN | POLLRDNORM, /* POLL_IN */ 417 POLLOUT | POLLWRNORM | POLLWRBAND, /* POLL_OUT */ 418 POLLIN | POLLRDNORM | POLLMSG, /* POLL_MSG */ 419 POLLERR, /* POLL_ERR */ 420 POLLPRI | POLLRDBAND, /* POLL_PRI */ 421 POLLHUP | POLLERR /* POLL_HUP */ 422 }; 423 424 static inline int sigio_perm(struct task_struct *p, 425 struct fown_struct *fown, int sig) 426 { 427 return (((fown->euid == 0) || 428 (fown->euid == p->suid) || (fown->euid == p->uid) || 429 (fown->uid == p->suid) || (fown->uid == p->uid)) && 430 !security_file_send_sigiotask(p, fown, sig)); 431 } 432 433 static void send_sigio_to_task(struct task_struct *p, 434 struct fown_struct *fown, 435 int fd, 436 int reason) 437 { 438 if (!sigio_perm(p, fown, fown->signum)) 439 return; 440 441 switch (fown->signum) { 442 siginfo_t si; 443 default: 444 /* Queue a rt signal with the appropriate fd as its 445 value. We use SI_SIGIO as the source, not 446 SI_KERNEL, since kernel signals always get 447 delivered even if we can't queue. Failure to 448 queue in this case _should_ be reported; we fall 449 back to SIGIO in that case. --sct */ 450 si.si_signo = fown->signum; 451 si.si_errno = 0; 452 si.si_code = reason; 453 /* Make sure we are called with one of the POLL_* 454 reasons, otherwise we could leak kernel stack into 455 userspace. */ 456 BUG_ON((reason & __SI_MASK) != __SI_POLL); 457 if (reason - POLL_IN >= NSIGPOLL) 458 si.si_band = ~0L; 459 else 460 si.si_band = band_table[reason - POLL_IN]; 461 si.si_fd = fd; 462 if (!group_send_sig_info(fown->signum, &si, p)) 463 break; 464 /* fall-through: fall back on the old plain SIGIO signal */ 465 case 0: 466 group_send_sig_info(SIGIO, SEND_SIG_PRIV, p); 467 } 468 } 469 470 void send_sigio(struct fown_struct *fown, int fd, int band) 471 { 472 struct task_struct *p; 473 int pid; 474 475 read_lock(&fown->lock); 476 pid = fown->pid; 477 if (!pid) 478 goto out_unlock_fown; 479 480 read_lock(&tasklist_lock); 481 if (pid > 0) { 482 p = find_task_by_pid(pid); 483 if (p) { 484 send_sigio_to_task(p, fown, fd, band); 485 } 486 } else { 487 do_each_task_pid(-pid, PIDTYPE_PGID, p) { 488 send_sigio_to_task(p, fown, fd, band); 489 } while_each_task_pid(-pid, PIDTYPE_PGID, p); 490 } 491 read_unlock(&tasklist_lock); 492 out_unlock_fown: 493 read_unlock(&fown->lock); 494 } 495 496 static void send_sigurg_to_task(struct task_struct *p, 497 struct fown_struct *fown) 498 { 499 if (sigio_perm(p, fown, SIGURG)) 500 group_send_sig_info(SIGURG, SEND_SIG_PRIV, p); 501 } 502 503 int send_sigurg(struct fown_struct *fown) 504 { 505 struct task_struct *p; 506 int pid, ret = 0; 507 508 read_lock(&fown->lock); 509 pid = fown->pid; 510 if (!pid) 511 goto out_unlock_fown; 512 513 ret = 1; 514 515 read_lock(&tasklist_lock); 516 if (pid > 0) { 517 p = find_task_by_pid(pid); 518 if (p) { 519 send_sigurg_to_task(p, fown); 520 } 521 } else { 522 do_each_task_pid(-pid, PIDTYPE_PGID, p) { 523 send_sigurg_to_task(p, fown); 524 } while_each_task_pid(-pid, PIDTYPE_PGID, p); 525 } 526 read_unlock(&tasklist_lock); 527 out_unlock_fown: 528 read_unlock(&fown->lock); 529 return ret; 530 } 531 532 static DEFINE_RWLOCK(fasync_lock); 533 static kmem_cache_t *fasync_cache __read_mostly; 534 535 /* 536 * fasync_helper() is used by some character device drivers (mainly mice) 537 * to set up the fasync queue. It returns negative on error, 0 if it did 538 * no changes and positive if it added/deleted the entry. 539 */ 540 int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp) 541 { 542 struct fasync_struct *fa, **fp; 543 struct fasync_struct *new = NULL; 544 int result = 0; 545 546 if (on) { 547 new = kmem_cache_alloc(fasync_cache, SLAB_KERNEL); 548 if (!new) 549 return -ENOMEM; 550 } 551 write_lock_irq(&fasync_lock); 552 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { 553 if (fa->fa_file == filp) { 554 if(on) { 555 fa->fa_fd = fd; 556 kmem_cache_free(fasync_cache, new); 557 } else { 558 *fp = fa->fa_next; 559 kmem_cache_free(fasync_cache, fa); 560 result = 1; 561 } 562 goto out; 563 } 564 } 565 566 if (on) { 567 new->magic = FASYNC_MAGIC; 568 new->fa_file = filp; 569 new->fa_fd = fd; 570 new->fa_next = *fapp; 571 *fapp = new; 572 result = 1; 573 } 574 out: 575 write_unlock_irq(&fasync_lock); 576 return result; 577 } 578 579 EXPORT_SYMBOL(fasync_helper); 580 581 void __kill_fasync(struct fasync_struct *fa, int sig, int band) 582 { 583 while (fa) { 584 struct fown_struct * fown; 585 if (fa->magic != FASYNC_MAGIC) { 586 printk(KERN_ERR "kill_fasync: bad magic number in " 587 "fasync_struct!\n"); 588 return; 589 } 590 fown = &fa->fa_file->f_owner; 591 /* Don't send SIGURG to processes which have not set a 592 queued signum: SIGURG has its own default signalling 593 mechanism. */ 594 if (!(sig == SIGURG && fown->signum == 0)) 595 send_sigio(fown, fa->fa_fd, band); 596 fa = fa->fa_next; 597 } 598 } 599 600 EXPORT_SYMBOL(__kill_fasync); 601 602 void kill_fasync(struct fasync_struct **fp, int sig, int band) 603 { 604 /* First a quick test without locking: usually 605 * the list is empty. 606 */ 607 if (*fp) { 608 read_lock(&fasync_lock); 609 /* reread *fp after obtaining the lock */ 610 __kill_fasync(*fp, sig, band); 611 read_unlock(&fasync_lock); 612 } 613 } 614 EXPORT_SYMBOL(kill_fasync); 615 616 static int __init fasync_init(void) 617 { 618 fasync_cache = kmem_cache_create("fasync_cache", 619 sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL, NULL); 620 return 0; 621 } 622 623 module_init(fasync_init) 624