1 /* 2 * This file contains the procedures for the handling of select and poll 3 * 4 * Created for Linux based loosely upon Mathius Lattner's minix 5 * patches by Peter MacDonald. Heavily edited by Linus. 6 * 7 * 4 February 1994 8 * COFF/ELF binary emulation. If the process has the STICKY_TIMEOUTS 9 * flag set in its personality we do *not* modify the given timeout 10 * parameter to reflect time remaining. 11 * 12 * 24 January 2000 13 * Changed sys_poll()/do_poll() to use PAGE_SIZE chunk-based allocation 14 * of fds to overcome nfds < 16390 descriptors limit (Tigran Aivazian). 15 */ 16 17 #include <linux/syscalls.h> 18 #include <linux/module.h> 19 #include <linux/slab.h> 20 #include <linux/smp_lock.h> 21 #include <linux/poll.h> 22 #include <linux/personality.h> /* for STICKY_TIMEOUTS */ 23 #include <linux/file.h> 24 #include <linux/fs.h> 25 26 #include <asm/uaccess.h> 27 28 #define ROUND_UP(x,y) (((x)+(y)-1)/(y)) 29 #define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM) 30 31 struct poll_table_entry { 32 struct file * filp; 33 wait_queue_t wait; 34 wait_queue_head_t * wait_address; 35 }; 36 37 struct poll_table_page { 38 struct poll_table_page * next; 39 struct poll_table_entry * entry; 40 struct poll_table_entry entries[0]; 41 }; 42 43 #define POLL_TABLE_FULL(table) \ 44 ((unsigned long)((table)->entry+1) > PAGE_SIZE + (unsigned long)(table)) 45 46 /* 47 * Ok, Peter made a complicated, but straightforward multiple_wait() function. 48 * I have rewritten this, taking some shortcuts: This code may not be easy to 49 * follow, but it should be free of race-conditions, and it's practical. If you 50 * understand what I'm doing here, then you understand how the linux 51 * sleep/wakeup mechanism works. 52 * 53 * Two very simple procedures, poll_wait() and poll_freewait() make all the 54 * work. poll_wait() is an inline-function defined in <linux/poll.h>, 55 * as all select/poll functions have to call it to add an entry to the 56 * poll table. 57 */ 58 static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, 59 poll_table *p); 60 61 void poll_initwait(struct poll_wqueues *pwq) 62 { 63 init_poll_funcptr(&pwq->pt, __pollwait); 64 pwq->error = 0; 65 pwq->table = NULL; 66 } 67 68 EXPORT_SYMBOL(poll_initwait); 69 70 void poll_freewait(struct poll_wqueues *pwq) 71 { 72 struct poll_table_page * p = pwq->table; 73 while (p) { 74 struct poll_table_entry * entry; 75 struct poll_table_page *old; 76 77 entry = p->entry; 78 do { 79 entry--; 80 remove_wait_queue(entry->wait_address,&entry->wait); 81 fput(entry->filp); 82 } while (entry > p->entries); 83 old = p; 84 p = p->next; 85 free_page((unsigned long) old); 86 } 87 } 88 89 EXPORT_SYMBOL(poll_freewait); 90 91 static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, 92 poll_table *_p) 93 { 94 struct poll_wqueues *p = container_of(_p, struct poll_wqueues, pt); 95 struct poll_table_page *table = p->table; 96 97 if (!table || POLL_TABLE_FULL(table)) { 98 struct poll_table_page *new_table; 99 100 new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL); 101 if (!new_table) { 102 p->error = -ENOMEM; 103 __set_current_state(TASK_RUNNING); 104 return; 105 } 106 new_table->entry = new_table->entries; 107 new_table->next = table; 108 p->table = new_table; 109 table = new_table; 110 } 111 112 /* Add a new entry */ 113 { 114 struct poll_table_entry * entry = table->entry; 115 table->entry = entry+1; 116 get_file(filp); 117 entry->filp = filp; 118 entry->wait_address = wait_address; 119 init_waitqueue_entry(&entry->wait, current); 120 add_wait_queue(wait_address,&entry->wait); 121 } 122 } 123 124 #define FDS_IN(fds, n) (fds->in + n) 125 #define FDS_OUT(fds, n) (fds->out + n) 126 #define FDS_EX(fds, n) (fds->ex + n) 127 128 #define BITS(fds, n) (*FDS_IN(fds, n)|*FDS_OUT(fds, n)|*FDS_EX(fds, n)) 129 130 static int max_select_fd(unsigned long n, fd_set_bits *fds) 131 { 132 unsigned long *open_fds; 133 unsigned long set; 134 int max; 135 136 /* handle last in-complete long-word first */ 137 set = ~(~0UL << (n & (__NFDBITS-1))); 138 n /= __NFDBITS; 139 open_fds = current->files->open_fds->fds_bits+n; 140 max = 0; 141 if (set) { 142 set &= BITS(fds, n); 143 if (set) { 144 if (!(set & ~*open_fds)) 145 goto get_max; 146 return -EBADF; 147 } 148 } 149 while (n) { 150 open_fds--; 151 n--; 152 set = BITS(fds, n); 153 if (!set) 154 continue; 155 if (set & ~*open_fds) 156 return -EBADF; 157 if (max) 158 continue; 159 get_max: 160 do { 161 max++; 162 set >>= 1; 163 } while (set); 164 max += n * __NFDBITS; 165 } 166 167 return max; 168 } 169 170 #define BIT(i) (1UL << ((i)&(__NFDBITS-1))) 171 #define MEM(i,m) ((m)+(unsigned)(i)/__NFDBITS) 172 #define ISSET(i,m) (((i)&*(m)) != 0) 173 #define SET(i,m) (*(m) |= (i)) 174 175 #define POLLIN_SET (POLLRDNORM | POLLRDBAND | POLLIN | POLLHUP | POLLERR) 176 #define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR) 177 #define POLLEX_SET (POLLPRI) 178 179 int do_select(int n, fd_set_bits *fds, long *timeout) 180 { 181 struct poll_wqueues table; 182 poll_table *wait; 183 int retval, i; 184 long __timeout = *timeout; 185 186 spin_lock(¤t->files->file_lock); 187 retval = max_select_fd(n, fds); 188 spin_unlock(¤t->files->file_lock); 189 190 if (retval < 0) 191 return retval; 192 n = retval; 193 194 poll_initwait(&table); 195 wait = &table.pt; 196 if (!__timeout) 197 wait = NULL; 198 retval = 0; 199 for (;;) { 200 unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; 201 202 set_current_state(TASK_INTERRUPTIBLE); 203 204 inp = fds->in; outp = fds->out; exp = fds->ex; 205 rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex; 206 207 for (i = 0; i < n; ++rinp, ++routp, ++rexp) { 208 unsigned long in, out, ex, all_bits, bit = 1, mask, j; 209 unsigned long res_in = 0, res_out = 0, res_ex = 0; 210 struct file_operations *f_op = NULL; 211 struct file *file = NULL; 212 213 in = *inp++; out = *outp++; ex = *exp++; 214 all_bits = in | out | ex; 215 if (all_bits == 0) { 216 i += __NFDBITS; 217 continue; 218 } 219 220 for (j = 0; j < __NFDBITS; ++j, ++i, bit <<= 1) { 221 if (i >= n) 222 break; 223 if (!(bit & all_bits)) 224 continue; 225 file = fget(i); 226 if (file) { 227 f_op = file->f_op; 228 mask = DEFAULT_POLLMASK; 229 if (f_op && f_op->poll) 230 mask = (*f_op->poll)(file, retval ? NULL : wait); 231 fput(file); 232 if ((mask & POLLIN_SET) && (in & bit)) { 233 res_in |= bit; 234 retval++; 235 } 236 if ((mask & POLLOUT_SET) && (out & bit)) { 237 res_out |= bit; 238 retval++; 239 } 240 if ((mask & POLLEX_SET) && (ex & bit)) { 241 res_ex |= bit; 242 retval++; 243 } 244 } 245 cond_resched(); 246 } 247 if (res_in) 248 *rinp = res_in; 249 if (res_out) 250 *routp = res_out; 251 if (res_ex) 252 *rexp = res_ex; 253 } 254 wait = NULL; 255 if (retval || !__timeout || signal_pending(current)) 256 break; 257 if(table.error) { 258 retval = table.error; 259 break; 260 } 261 __timeout = schedule_timeout(__timeout); 262 } 263 __set_current_state(TASK_RUNNING); 264 265 poll_freewait(&table); 266 267 /* 268 * Up-to-date the caller timeout. 269 */ 270 *timeout = __timeout; 271 return retval; 272 } 273 274 static void *select_bits_alloc(int size) 275 { 276 return kmalloc(6 * size, GFP_KERNEL); 277 } 278 279 static void select_bits_free(void *bits, int size) 280 { 281 kfree(bits); 282 } 283 284 /* 285 * We can actually return ERESTARTSYS instead of EINTR, but I'd 286 * like to be certain this leads to no problems. So I return 287 * EINTR just for safety. 288 * 289 * Update: ERESTARTSYS breaks at least the xview clock binary, so 290 * I'm trying ERESTARTNOHAND which restart only when you want to. 291 */ 292 #define MAX_SELECT_SECONDS \ 293 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) 294 295 asmlinkage long 296 sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timeval __user *tvp) 297 { 298 fd_set_bits fds; 299 char *bits; 300 long timeout; 301 int ret, size, max_fdset; 302 303 timeout = MAX_SCHEDULE_TIMEOUT; 304 if (tvp) { 305 time_t sec, usec; 306 307 if (!access_ok(VERIFY_READ, tvp, sizeof(*tvp)) 308 || __get_user(sec, &tvp->tv_sec) 309 || __get_user(usec, &tvp->tv_usec)) { 310 ret = -EFAULT; 311 goto out_nofds; 312 } 313 314 ret = -EINVAL; 315 if (sec < 0 || usec < 0) 316 goto out_nofds; 317 318 if ((unsigned long) sec < MAX_SELECT_SECONDS) { 319 timeout = ROUND_UP(usec, 1000000/HZ); 320 timeout += sec * (unsigned long) HZ; 321 } 322 } 323 324 ret = -EINVAL; 325 if (n < 0) 326 goto out_nofds; 327 328 /* max_fdset can increase, so grab it once to avoid race */ 329 max_fdset = current->files->max_fdset; 330 if (n > max_fdset) 331 n = max_fdset; 332 333 /* 334 * We need 6 bitmaps (in/out/ex for both incoming and outgoing), 335 * since we used fdset we need to allocate memory in units of 336 * long-words. 337 */ 338 ret = -ENOMEM; 339 size = FDS_BYTES(n); 340 bits = select_bits_alloc(size); 341 if (!bits) 342 goto out_nofds; 343 fds.in = (unsigned long *) bits; 344 fds.out = (unsigned long *) (bits + size); 345 fds.ex = (unsigned long *) (bits + 2*size); 346 fds.res_in = (unsigned long *) (bits + 3*size); 347 fds.res_out = (unsigned long *) (bits + 4*size); 348 fds.res_ex = (unsigned long *) (bits + 5*size); 349 350 if ((ret = get_fd_set(n, inp, fds.in)) || 351 (ret = get_fd_set(n, outp, fds.out)) || 352 (ret = get_fd_set(n, exp, fds.ex))) 353 goto out; 354 zero_fd_set(n, fds.res_in); 355 zero_fd_set(n, fds.res_out); 356 zero_fd_set(n, fds.res_ex); 357 358 ret = do_select(n, &fds, &timeout); 359 360 if (tvp && !(current->personality & STICKY_TIMEOUTS)) { 361 time_t sec = 0, usec = 0; 362 if (timeout) { 363 sec = timeout / HZ; 364 usec = timeout % HZ; 365 usec *= (1000000/HZ); 366 } 367 put_user(sec, &tvp->tv_sec); 368 put_user(usec, &tvp->tv_usec); 369 } 370 371 if (ret < 0) 372 goto out; 373 if (!ret) { 374 ret = -ERESTARTNOHAND; 375 if (signal_pending(current)) 376 goto out; 377 ret = 0; 378 } 379 380 if (set_fd_set(n, inp, fds.res_in) || 381 set_fd_set(n, outp, fds.res_out) || 382 set_fd_set(n, exp, fds.res_ex)) 383 ret = -EFAULT; 384 385 out: 386 select_bits_free(bits, size); 387 out_nofds: 388 return ret; 389 } 390 391 struct poll_list { 392 struct poll_list *next; 393 int len; 394 struct pollfd entries[0]; 395 }; 396 397 #define POLLFD_PER_PAGE ((PAGE_SIZE-sizeof(struct poll_list)) / sizeof(struct pollfd)) 398 399 static void do_pollfd(unsigned int num, struct pollfd * fdpage, 400 poll_table ** pwait, int *count) 401 { 402 int i; 403 404 for (i = 0; i < num; i++) { 405 int fd; 406 unsigned int mask; 407 struct pollfd *fdp; 408 409 mask = 0; 410 fdp = fdpage+i; 411 fd = fdp->fd; 412 if (fd >= 0) { 413 struct file * file = fget(fd); 414 mask = POLLNVAL; 415 if (file != NULL) { 416 mask = DEFAULT_POLLMASK; 417 if (file->f_op && file->f_op->poll) 418 mask = file->f_op->poll(file, *pwait); 419 mask &= fdp->events | POLLERR | POLLHUP; 420 fput(file); 421 } 422 if (mask) { 423 *pwait = NULL; 424 (*count)++; 425 } 426 } 427 fdp->revents = mask; 428 } 429 } 430 431 static int do_poll(unsigned int nfds, struct poll_list *list, 432 struct poll_wqueues *wait, long timeout) 433 { 434 int count = 0; 435 poll_table* pt = &wait->pt; 436 437 if (!timeout) 438 pt = NULL; 439 440 for (;;) { 441 struct poll_list *walk; 442 set_current_state(TASK_INTERRUPTIBLE); 443 walk = list; 444 while(walk != NULL) { 445 do_pollfd( walk->len, walk->entries, &pt, &count); 446 walk = walk->next; 447 } 448 pt = NULL; 449 if (count || !timeout || signal_pending(current)) 450 break; 451 count = wait->error; 452 if (count) 453 break; 454 timeout = schedule_timeout(timeout); 455 } 456 __set_current_state(TASK_RUNNING); 457 return count; 458 } 459 460 asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long timeout) 461 { 462 struct poll_wqueues table; 463 int fdcount, err; 464 unsigned int i; 465 struct poll_list *head; 466 struct poll_list *walk; 467 468 /* Do a sanity check on nfds ... */ 469 if (nfds > current->files->max_fdset && nfds > OPEN_MAX) 470 return -EINVAL; 471 472 if (timeout) { 473 /* Careful about overflow in the intermediate values */ 474 if ((unsigned long) timeout < MAX_SCHEDULE_TIMEOUT / HZ) 475 timeout = (unsigned long)(timeout*HZ+999)/1000+1; 476 else /* Negative or overflow */ 477 timeout = MAX_SCHEDULE_TIMEOUT; 478 } 479 480 poll_initwait(&table); 481 482 head = NULL; 483 walk = NULL; 484 i = nfds; 485 err = -ENOMEM; 486 while(i!=0) { 487 struct poll_list *pp; 488 pp = kmalloc(sizeof(struct poll_list)+ 489 sizeof(struct pollfd)* 490 (i>POLLFD_PER_PAGE?POLLFD_PER_PAGE:i), 491 GFP_KERNEL); 492 if(pp==NULL) 493 goto out_fds; 494 pp->next=NULL; 495 pp->len = (i>POLLFD_PER_PAGE?POLLFD_PER_PAGE:i); 496 if (head == NULL) 497 head = pp; 498 else 499 walk->next = pp; 500 501 walk = pp; 502 if (copy_from_user(pp->entries, ufds + nfds-i, 503 sizeof(struct pollfd)*pp->len)) { 504 err = -EFAULT; 505 goto out_fds; 506 } 507 i -= pp->len; 508 } 509 fdcount = do_poll(nfds, head, &table, timeout); 510 511 /* OK, now copy the revents fields back to user space. */ 512 walk = head; 513 err = -EFAULT; 514 while(walk != NULL) { 515 struct pollfd *fds = walk->entries; 516 int j; 517 518 for (j=0; j < walk->len; j++, ufds++) { 519 if(__put_user(fds[j].revents, &ufds->revents)) 520 goto out_fds; 521 } 522 walk = walk->next; 523 } 524 err = fdcount; 525 if (!fdcount && signal_pending(current)) 526 err = -EINTR; 527 out_fds: 528 walk = head; 529 while(walk!=NULL) { 530 struct poll_list *pp = walk->next; 531 kfree(walk); 532 walk = pp; 533 } 534 poll_freewait(&table); 535 return err; 536 } 537