1 /* 2 * This file contains the procedures for the handling of select and poll 3 * 4 * Created for Linux based loosely upon Mathius Lattner's minix 5 * patches by Peter MacDonald. Heavily edited by Linus. 6 * 7 * 4 February 1994 8 * COFF/ELF binary emulation. If the process has the STICKY_TIMEOUTS 9 * flag set in its personality we do *not* modify the given timeout 10 * parameter to reflect time remaining. 11 * 12 * 24 January 2000 13 * Changed sys_poll()/do_poll() to use PAGE_SIZE chunk-based allocation 14 * of fds to overcome nfds < 16390 descriptors limit (Tigran Aivazian). 15 */ 16 17 #include <linux/kernel.h> 18 #include <linux/syscalls.h> 19 #include <linux/module.h> 20 #include <linux/slab.h> 21 #include <linux/poll.h> 22 #include <linux/personality.h> /* for STICKY_TIMEOUTS */ 23 #include <linux/file.h> 24 #include <linux/fdtable.h> 25 #include <linux/fs.h> 26 #include <linux/rcupdate.h> 27 28 #include <asm/uaccess.h> 29 30 struct poll_table_page { 31 struct poll_table_page * next; 32 struct poll_table_entry * entry; 33 struct poll_table_entry entries[0]; 34 }; 35 36 #define POLL_TABLE_FULL(table) \ 37 ((unsigned long)((table)->entry+1) > PAGE_SIZE + (unsigned long)(table)) 38 39 /* 40 * Ok, Peter made a complicated, but straightforward multiple_wait() function. 41 * I have rewritten this, taking some shortcuts: This code may not be easy to 42 * follow, but it should be free of race-conditions, and it's practical. If you 43 * understand what I'm doing here, then you understand how the linux 44 * sleep/wakeup mechanism works. 45 * 46 * Two very simple procedures, poll_wait() and poll_freewait() make all the 47 * work. poll_wait() is an inline-function defined in <linux/poll.h>, 48 * as all select/poll functions have to call it to add an entry to the 49 * poll table. 50 */ 51 static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, 52 poll_table *p); 53 54 void poll_initwait(struct poll_wqueues *pwq) 55 { 56 init_poll_funcptr(&pwq->pt, __pollwait); 57 pwq->error = 0; 58 pwq->table = NULL; 59 pwq->inline_index = 0; 60 } 61 62 EXPORT_SYMBOL(poll_initwait); 63 64 static void free_poll_entry(struct poll_table_entry *entry) 65 { 66 remove_wait_queue(entry->wait_address, &entry->wait); 67 fput(entry->filp); 68 } 69 70 void poll_freewait(struct poll_wqueues *pwq) 71 { 72 struct poll_table_page * p = pwq->table; 73 int i; 74 for (i = 0; i < pwq->inline_index; i++) 75 free_poll_entry(pwq->inline_entries + i); 76 while (p) { 77 struct poll_table_entry * entry; 78 struct poll_table_page *old; 79 80 entry = p->entry; 81 do { 82 entry--; 83 free_poll_entry(entry); 84 } while (entry > p->entries); 85 old = p; 86 p = p->next; 87 free_page((unsigned long) old); 88 } 89 } 90 91 EXPORT_SYMBOL(poll_freewait); 92 93 static struct poll_table_entry *poll_get_entry(poll_table *_p) 94 { 95 struct poll_wqueues *p = container_of(_p, struct poll_wqueues, pt); 96 struct poll_table_page *table = p->table; 97 98 if (p->inline_index < N_INLINE_POLL_ENTRIES) 99 return p->inline_entries + p->inline_index++; 100 101 if (!table || POLL_TABLE_FULL(table)) { 102 struct poll_table_page *new_table; 103 104 new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL); 105 if (!new_table) { 106 p->error = -ENOMEM; 107 __set_current_state(TASK_RUNNING); 108 return NULL; 109 } 110 new_table->entry = new_table->entries; 111 new_table->next = table; 112 p->table = new_table; 113 table = new_table; 114 } 115 116 return table->entry++; 117 } 118 119 /* Add a new entry */ 120 static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, 121 poll_table *p) 122 { 123 struct poll_table_entry *entry = poll_get_entry(p); 124 if (!entry) 125 return; 126 get_file(filp); 127 entry->filp = filp; 128 entry->wait_address = wait_address; 129 init_waitqueue_entry(&entry->wait, current); 130 add_wait_queue(wait_address, &entry->wait); 131 } 132 133 #define FDS_IN(fds, n) (fds->in + n) 134 #define FDS_OUT(fds, n) (fds->out + n) 135 #define FDS_EX(fds, n) (fds->ex + n) 136 137 #define BITS(fds, n) (*FDS_IN(fds, n)|*FDS_OUT(fds, n)|*FDS_EX(fds, n)) 138 139 static int max_select_fd(unsigned long n, fd_set_bits *fds) 140 { 141 unsigned long *open_fds; 142 unsigned long set; 143 int max; 144 struct fdtable *fdt; 145 146 /* handle last in-complete long-word first */ 147 set = ~(~0UL << (n & (__NFDBITS-1))); 148 n /= __NFDBITS; 149 fdt = files_fdtable(current->files); 150 open_fds = fdt->open_fds->fds_bits+n; 151 max = 0; 152 if (set) { 153 set &= BITS(fds, n); 154 if (set) { 155 if (!(set & ~*open_fds)) 156 goto get_max; 157 return -EBADF; 158 } 159 } 160 while (n) { 161 open_fds--; 162 n--; 163 set = BITS(fds, n); 164 if (!set) 165 continue; 166 if (set & ~*open_fds) 167 return -EBADF; 168 if (max) 169 continue; 170 get_max: 171 do { 172 max++; 173 set >>= 1; 174 } while (set); 175 max += n * __NFDBITS; 176 } 177 178 return max; 179 } 180 181 #define POLLIN_SET (POLLRDNORM | POLLRDBAND | POLLIN | POLLHUP | POLLERR) 182 #define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR) 183 #define POLLEX_SET (POLLPRI) 184 185 int do_select(int n, fd_set_bits *fds, s64 *timeout) 186 { 187 struct poll_wqueues table; 188 poll_table *wait; 189 int retval, i; 190 191 rcu_read_lock(); 192 retval = max_select_fd(n, fds); 193 rcu_read_unlock(); 194 195 if (retval < 0) 196 return retval; 197 n = retval; 198 199 poll_initwait(&table); 200 wait = &table.pt; 201 if (!*timeout) 202 wait = NULL; 203 retval = 0; 204 for (;;) { 205 unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; 206 long __timeout; 207 208 set_current_state(TASK_INTERRUPTIBLE); 209 210 inp = fds->in; outp = fds->out; exp = fds->ex; 211 rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex; 212 213 for (i = 0; i < n; ++rinp, ++routp, ++rexp) { 214 unsigned long in, out, ex, all_bits, bit = 1, mask, j; 215 unsigned long res_in = 0, res_out = 0, res_ex = 0; 216 const struct file_operations *f_op = NULL; 217 struct file *file = NULL; 218 219 in = *inp++; out = *outp++; ex = *exp++; 220 all_bits = in | out | ex; 221 if (all_bits == 0) { 222 i += __NFDBITS; 223 continue; 224 } 225 226 for (j = 0; j < __NFDBITS; ++j, ++i, bit <<= 1) { 227 int fput_needed; 228 if (i >= n) 229 break; 230 if (!(bit & all_bits)) 231 continue; 232 file = fget_light(i, &fput_needed); 233 if (file) { 234 f_op = file->f_op; 235 mask = DEFAULT_POLLMASK; 236 if (f_op && f_op->poll) 237 mask = (*f_op->poll)(file, retval ? NULL : wait); 238 fput_light(file, fput_needed); 239 if ((mask & POLLIN_SET) && (in & bit)) { 240 res_in |= bit; 241 retval++; 242 } 243 if ((mask & POLLOUT_SET) && (out & bit)) { 244 res_out |= bit; 245 retval++; 246 } 247 if ((mask & POLLEX_SET) && (ex & bit)) { 248 res_ex |= bit; 249 retval++; 250 } 251 } 252 } 253 if (res_in) 254 *rinp = res_in; 255 if (res_out) 256 *routp = res_out; 257 if (res_ex) 258 *rexp = res_ex; 259 cond_resched(); 260 } 261 wait = NULL; 262 if (retval || !*timeout || signal_pending(current)) 263 break; 264 if (table.error) { 265 retval = table.error; 266 break; 267 } 268 269 if (*timeout < 0) { 270 /* Wait indefinitely */ 271 __timeout = MAX_SCHEDULE_TIMEOUT; 272 } else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT - 1)) { 273 /* Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in a loop */ 274 __timeout = MAX_SCHEDULE_TIMEOUT - 1; 275 *timeout -= __timeout; 276 } else { 277 __timeout = *timeout; 278 *timeout = 0; 279 } 280 __timeout = schedule_timeout(__timeout); 281 if (*timeout >= 0) 282 *timeout += __timeout; 283 } 284 __set_current_state(TASK_RUNNING); 285 286 poll_freewait(&table); 287 288 return retval; 289 } 290 291 /* 292 * We can actually return ERESTARTSYS instead of EINTR, but I'd 293 * like to be certain this leads to no problems. So I return 294 * EINTR just for safety. 295 * 296 * Update: ERESTARTSYS breaks at least the xview clock binary, so 297 * I'm trying ERESTARTNOHAND which restart only when you want to. 298 */ 299 #define MAX_SELECT_SECONDS \ 300 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) 301 302 int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, 303 fd_set __user *exp, s64 *timeout) 304 { 305 fd_set_bits fds; 306 void *bits; 307 int ret, max_fds; 308 unsigned int size; 309 struct fdtable *fdt; 310 /* Allocate small arguments on the stack to save memory and be faster */ 311 long stack_fds[SELECT_STACK_ALLOC/sizeof(long)]; 312 313 ret = -EINVAL; 314 if (n < 0) 315 goto out_nofds; 316 317 /* max_fds can increase, so grab it once to avoid race */ 318 rcu_read_lock(); 319 fdt = files_fdtable(current->files); 320 max_fds = fdt->max_fds; 321 rcu_read_unlock(); 322 if (n > max_fds) 323 n = max_fds; 324 325 /* 326 * We need 6 bitmaps (in/out/ex for both incoming and outgoing), 327 * since we used fdset we need to allocate memory in units of 328 * long-words. 329 */ 330 size = FDS_BYTES(n); 331 bits = stack_fds; 332 if (size > sizeof(stack_fds) / 6) { 333 /* Not enough space in on-stack array; must use kmalloc */ 334 ret = -ENOMEM; 335 bits = kmalloc(6 * size, GFP_KERNEL); 336 if (!bits) 337 goto out_nofds; 338 } 339 fds.in = bits; 340 fds.out = bits + size; 341 fds.ex = bits + 2*size; 342 fds.res_in = bits + 3*size; 343 fds.res_out = bits + 4*size; 344 fds.res_ex = bits + 5*size; 345 346 if ((ret = get_fd_set(n, inp, fds.in)) || 347 (ret = get_fd_set(n, outp, fds.out)) || 348 (ret = get_fd_set(n, exp, fds.ex))) 349 goto out; 350 zero_fd_set(n, fds.res_in); 351 zero_fd_set(n, fds.res_out); 352 zero_fd_set(n, fds.res_ex); 353 354 ret = do_select(n, &fds, timeout); 355 356 if (ret < 0) 357 goto out; 358 if (!ret) { 359 ret = -ERESTARTNOHAND; 360 if (signal_pending(current)) 361 goto out; 362 ret = 0; 363 } 364 365 if (set_fd_set(n, inp, fds.res_in) || 366 set_fd_set(n, outp, fds.res_out) || 367 set_fd_set(n, exp, fds.res_ex)) 368 ret = -EFAULT; 369 370 out: 371 if (bits != stack_fds) 372 kfree(bits); 373 out_nofds: 374 return ret; 375 } 376 377 asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp, 378 fd_set __user *exp, struct timeval __user *tvp) 379 { 380 s64 timeout = -1; 381 struct timeval tv; 382 int ret; 383 384 if (tvp) { 385 if (copy_from_user(&tv, tvp, sizeof(tv))) 386 return -EFAULT; 387 388 if (tv.tv_sec < 0 || tv.tv_usec < 0) 389 return -EINVAL; 390 391 /* Cast to u64 to make GCC stop complaining */ 392 if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS) 393 timeout = -1; /* infinite */ 394 else { 395 timeout = DIV_ROUND_UP(tv.tv_usec, USEC_PER_SEC/HZ); 396 timeout += tv.tv_sec * HZ; 397 } 398 } 399 400 ret = core_sys_select(n, inp, outp, exp, &timeout); 401 402 if (tvp) { 403 struct timeval rtv; 404 405 if (current->personality & STICKY_TIMEOUTS) 406 goto sticky; 407 rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)); 408 rtv.tv_sec = timeout; 409 if (timeval_compare(&rtv, &tv) >= 0) 410 rtv = tv; 411 if (copy_to_user(tvp, &rtv, sizeof(rtv))) { 412 sticky: 413 /* 414 * If an application puts its timeval in read-only 415 * memory, we don't want the Linux-specific update to 416 * the timeval to cause a fault after the select has 417 * completed successfully. However, because we're not 418 * updating the timeval, we can't restart the system 419 * call. 420 */ 421 if (ret == -ERESTARTNOHAND) 422 ret = -EINTR; 423 } 424 } 425 426 return ret; 427 } 428 429 #ifdef HAVE_SET_RESTORE_SIGMASK 430 asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp, 431 fd_set __user *exp, struct timespec __user *tsp, 432 const sigset_t __user *sigmask, size_t sigsetsize) 433 { 434 s64 timeout = MAX_SCHEDULE_TIMEOUT; 435 sigset_t ksigmask, sigsaved; 436 struct timespec ts; 437 int ret; 438 439 if (tsp) { 440 if (copy_from_user(&ts, tsp, sizeof(ts))) 441 return -EFAULT; 442 443 if (ts.tv_sec < 0 || ts.tv_nsec < 0) 444 return -EINVAL; 445 446 /* Cast to u64 to make GCC stop complaining */ 447 if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS) 448 timeout = -1; /* infinite */ 449 else { 450 timeout = DIV_ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ); 451 timeout += ts.tv_sec * HZ; 452 } 453 } 454 455 if (sigmask) { 456 /* XXX: Don't preclude handling different sized sigset_t's. */ 457 if (sigsetsize != sizeof(sigset_t)) 458 return -EINVAL; 459 if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) 460 return -EFAULT; 461 462 sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); 463 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 464 } 465 466 ret = core_sys_select(n, inp, outp, exp, &timeout); 467 468 if (tsp) { 469 struct timespec rts; 470 471 if (current->personality & STICKY_TIMEOUTS) 472 goto sticky; 473 rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 474 1000; 475 rts.tv_sec = timeout; 476 if (timespec_compare(&rts, &ts) >= 0) 477 rts = ts; 478 if (copy_to_user(tsp, &rts, sizeof(rts))) { 479 sticky: 480 /* 481 * If an application puts its timeval in read-only 482 * memory, we don't want the Linux-specific update to 483 * the timeval to cause a fault after the select has 484 * completed successfully. However, because we're not 485 * updating the timeval, we can't restart the system 486 * call. 487 */ 488 if (ret == -ERESTARTNOHAND) 489 ret = -EINTR; 490 } 491 } 492 493 if (ret == -ERESTARTNOHAND) { 494 /* 495 * Don't restore the signal mask yet. Let do_signal() deliver 496 * the signal on the way back to userspace, before the signal 497 * mask is restored. 498 */ 499 if (sigmask) { 500 memcpy(¤t->saved_sigmask, &sigsaved, 501 sizeof(sigsaved)); 502 set_restore_sigmask(); 503 } 504 } else if (sigmask) 505 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 506 507 return ret; 508 } 509 510 /* 511 * Most architectures can't handle 7-argument syscalls. So we provide a 512 * 6-argument version where the sixth argument is a pointer to a structure 513 * which has a pointer to the sigset_t itself followed by a size_t containing 514 * the sigset size. 515 */ 516 asmlinkage long sys_pselect6(int n, fd_set __user *inp, fd_set __user *outp, 517 fd_set __user *exp, struct timespec __user *tsp, void __user *sig) 518 { 519 size_t sigsetsize = 0; 520 sigset_t __user *up = NULL; 521 522 if (sig) { 523 if (!access_ok(VERIFY_READ, sig, sizeof(void *)+sizeof(size_t)) 524 || __get_user(up, (sigset_t __user * __user *)sig) 525 || __get_user(sigsetsize, 526 (size_t __user *)(sig+sizeof(void *)))) 527 return -EFAULT; 528 } 529 530 return sys_pselect7(n, inp, outp, exp, tsp, up, sigsetsize); 531 } 532 #endif /* HAVE_SET_RESTORE_SIGMASK */ 533 534 struct poll_list { 535 struct poll_list *next; 536 int len; 537 struct pollfd entries[0]; 538 }; 539 540 #define POLLFD_PER_PAGE ((PAGE_SIZE-sizeof(struct poll_list)) / sizeof(struct pollfd)) 541 542 /* 543 * Fish for pollable events on the pollfd->fd file descriptor. We're only 544 * interested in events matching the pollfd->events mask, and the result 545 * matching that mask is both recorded in pollfd->revents and returned. The 546 * pwait poll_table will be used by the fd-provided poll handler for waiting, 547 * if non-NULL. 548 */ 549 static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait) 550 { 551 unsigned int mask; 552 int fd; 553 554 mask = 0; 555 fd = pollfd->fd; 556 if (fd >= 0) { 557 int fput_needed; 558 struct file * file; 559 560 file = fget_light(fd, &fput_needed); 561 mask = POLLNVAL; 562 if (file != NULL) { 563 mask = DEFAULT_POLLMASK; 564 if (file->f_op && file->f_op->poll) 565 mask = file->f_op->poll(file, pwait); 566 /* Mask out unneeded events. */ 567 mask &= pollfd->events | POLLERR | POLLHUP; 568 fput_light(file, fput_needed); 569 } 570 } 571 pollfd->revents = mask; 572 573 return mask; 574 } 575 576 static int do_poll(unsigned int nfds, struct poll_list *list, 577 struct poll_wqueues *wait, s64 *timeout) 578 { 579 int count = 0; 580 poll_table* pt = &wait->pt; 581 582 /* Optimise the no-wait case */ 583 if (!(*timeout)) 584 pt = NULL; 585 586 for (;;) { 587 struct poll_list *walk; 588 long __timeout; 589 590 set_current_state(TASK_INTERRUPTIBLE); 591 for (walk = list; walk != NULL; walk = walk->next) { 592 struct pollfd * pfd, * pfd_end; 593 594 pfd = walk->entries; 595 pfd_end = pfd + walk->len; 596 for (; pfd != pfd_end; pfd++) { 597 /* 598 * Fish for events. If we found one, record it 599 * and kill the poll_table, so we don't 600 * needlessly register any other waiters after 601 * this. They'll get immediately deregistered 602 * when we break out and return. 603 */ 604 if (do_pollfd(pfd, pt)) { 605 count++; 606 pt = NULL; 607 } 608 } 609 } 610 /* 611 * All waiters have already been registered, so don't provide 612 * a poll_table to them on the next loop iteration. 613 */ 614 pt = NULL; 615 if (!count) { 616 count = wait->error; 617 if (signal_pending(current)) 618 count = -EINTR; 619 } 620 if (count || !*timeout) 621 break; 622 623 if (*timeout < 0) { 624 /* Wait indefinitely */ 625 __timeout = MAX_SCHEDULE_TIMEOUT; 626 } else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT-1)) { 627 /* 628 * Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in 629 * a loop 630 */ 631 __timeout = MAX_SCHEDULE_TIMEOUT - 1; 632 *timeout -= __timeout; 633 } else { 634 __timeout = *timeout; 635 *timeout = 0; 636 } 637 638 __timeout = schedule_timeout(__timeout); 639 if (*timeout >= 0) 640 *timeout += __timeout; 641 } 642 __set_current_state(TASK_RUNNING); 643 return count; 644 } 645 646 #define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list)) / \ 647 sizeof(struct pollfd)) 648 649 int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout) 650 { 651 struct poll_wqueues table; 652 int err = -EFAULT, fdcount, len, size; 653 /* Allocate small arguments on the stack to save memory and be 654 faster - use long to make sure the buffer is aligned properly 655 on 64 bit archs to avoid unaligned access */ 656 long stack_pps[POLL_STACK_ALLOC/sizeof(long)]; 657 struct poll_list *const head = (struct poll_list *)stack_pps; 658 struct poll_list *walk = head; 659 unsigned long todo = nfds; 660 661 if (nfds > current->signal->rlim[RLIMIT_NOFILE].rlim_cur) 662 return -EINVAL; 663 664 len = min_t(unsigned int, nfds, N_STACK_PPS); 665 for (;;) { 666 walk->next = NULL; 667 walk->len = len; 668 if (!len) 669 break; 670 671 if (copy_from_user(walk->entries, ufds + nfds-todo, 672 sizeof(struct pollfd) * walk->len)) 673 goto out_fds; 674 675 todo -= walk->len; 676 if (!todo) 677 break; 678 679 len = min(todo, POLLFD_PER_PAGE); 680 size = sizeof(struct poll_list) + sizeof(struct pollfd) * len; 681 walk = walk->next = kmalloc(size, GFP_KERNEL); 682 if (!walk) { 683 err = -ENOMEM; 684 goto out_fds; 685 } 686 } 687 688 poll_initwait(&table); 689 fdcount = do_poll(nfds, head, &table, timeout); 690 poll_freewait(&table); 691 692 for (walk = head; walk; walk = walk->next) { 693 struct pollfd *fds = walk->entries; 694 int j; 695 696 for (j = 0; j < walk->len; j++, ufds++) 697 if (__put_user(fds[j].revents, &ufds->revents)) 698 goto out_fds; 699 } 700 701 err = fdcount; 702 out_fds: 703 walk = head->next; 704 while (walk) { 705 struct poll_list *pos = walk; 706 walk = walk->next; 707 kfree(pos); 708 } 709 710 return err; 711 } 712 713 static long do_restart_poll(struct restart_block *restart_block) 714 { 715 struct pollfd __user *ufds = (struct pollfd __user*)restart_block->arg0; 716 int nfds = restart_block->arg1; 717 s64 timeout = ((s64)restart_block->arg3<<32) | (s64)restart_block->arg2; 718 int ret; 719 720 ret = do_sys_poll(ufds, nfds, &timeout); 721 if (ret == -EINTR) { 722 restart_block->fn = do_restart_poll; 723 restart_block->arg2 = timeout & 0xFFFFFFFF; 724 restart_block->arg3 = (u64)timeout >> 32; 725 ret = -ERESTART_RESTARTBLOCK; 726 } 727 return ret; 728 } 729 730 asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds, 731 long timeout_msecs) 732 { 733 s64 timeout_jiffies; 734 int ret; 735 736 if (timeout_msecs > 0) { 737 #if HZ > 1000 738 /* We can only overflow if HZ > 1000 */ 739 if (timeout_msecs / 1000 > (s64)0x7fffffffffffffffULL / (s64)HZ) 740 timeout_jiffies = -1; 741 else 742 #endif 743 timeout_jiffies = msecs_to_jiffies(timeout_msecs) + 1; 744 } else { 745 /* Infinite (< 0) or no (0) timeout */ 746 timeout_jiffies = timeout_msecs; 747 } 748 749 ret = do_sys_poll(ufds, nfds, &timeout_jiffies); 750 if (ret == -EINTR) { 751 struct restart_block *restart_block; 752 restart_block = ¤t_thread_info()->restart_block; 753 restart_block->fn = do_restart_poll; 754 restart_block->arg0 = (unsigned long)ufds; 755 restart_block->arg1 = nfds; 756 restart_block->arg2 = timeout_jiffies & 0xFFFFFFFF; 757 restart_block->arg3 = (u64)timeout_jiffies >> 32; 758 ret = -ERESTART_RESTARTBLOCK; 759 } 760 return ret; 761 } 762 763 #ifdef HAVE_SET_RESTORE_SIGMASK 764 asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, 765 struct timespec __user *tsp, const sigset_t __user *sigmask, 766 size_t sigsetsize) 767 { 768 sigset_t ksigmask, sigsaved; 769 struct timespec ts; 770 s64 timeout = -1; 771 int ret; 772 773 if (tsp) { 774 if (copy_from_user(&ts, tsp, sizeof(ts))) 775 return -EFAULT; 776 777 /* Cast to u64 to make GCC stop complaining */ 778 if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS) 779 timeout = -1; /* infinite */ 780 else { 781 timeout = DIV_ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ); 782 timeout += ts.tv_sec * HZ; 783 } 784 } 785 786 if (sigmask) { 787 /* XXX: Don't preclude handling different sized sigset_t's. */ 788 if (sigsetsize != sizeof(sigset_t)) 789 return -EINVAL; 790 if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) 791 return -EFAULT; 792 793 sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); 794 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 795 } 796 797 ret = do_sys_poll(ufds, nfds, &timeout); 798 799 /* We can restart this syscall, usually */ 800 if (ret == -EINTR) { 801 /* 802 * Don't restore the signal mask yet. Let do_signal() deliver 803 * the signal on the way back to userspace, before the signal 804 * mask is restored. 805 */ 806 if (sigmask) { 807 memcpy(¤t->saved_sigmask, &sigsaved, 808 sizeof(sigsaved)); 809 set_restore_sigmask(); 810 } 811 ret = -ERESTARTNOHAND; 812 } else if (sigmask) 813 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 814 815 if (tsp && timeout >= 0) { 816 struct timespec rts; 817 818 if (current->personality & STICKY_TIMEOUTS) 819 goto sticky; 820 /* Yes, we know it's actually an s64, but it's also positive. */ 821 rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 822 1000; 823 rts.tv_sec = timeout; 824 if (timespec_compare(&rts, &ts) >= 0) 825 rts = ts; 826 if (copy_to_user(tsp, &rts, sizeof(rts))) { 827 sticky: 828 /* 829 * If an application puts its timeval in read-only 830 * memory, we don't want the Linux-specific update to 831 * the timeval to cause a fault after the select has 832 * completed successfully. However, because we're not 833 * updating the timeval, we can't restart the system 834 * call. 835 */ 836 if (ret == -ERESTARTNOHAND && timeout >= 0) 837 ret = -EINTR; 838 } 839 } 840 841 return ret; 842 } 843 #endif /* HAVE_SET_RESTORE_SIGMASK */ 844