1 /* 2 * linux/ipc/sem.c 3 * Copyright (C) 1992 Krishna Balasubramanian 4 * Copyright (C) 1995 Eric Schenk, Bruno Haible 5 * 6 * IMPLEMENTATION NOTES ON CODE REWRITE (Eric Schenk, January 1995): 7 * This code underwent a massive rewrite in order to solve some problems 8 * with the original code. In particular the original code failed to 9 * wake up processes that were waiting for semval to go to 0 if the 10 * value went to 0 and was then incremented rapidly enough. In solving 11 * this problem I have also modified the implementation so that it 12 * processes pending operations in a FIFO manner, thus give a guarantee 13 * that processes waiting for a lock on the semaphore won't starve 14 * unless another locking process fails to unlock. 15 * In addition the following two changes in behavior have been introduced: 16 * - The original implementation of semop returned the value 17 * last semaphore element examined on success. This does not 18 * match the manual page specifications, and effectively 19 * allows the user to read the semaphore even if they do not 20 * have read permissions. The implementation now returns 0 21 * on success as stated in the manual page. 22 * - There is some confusion over whether the set of undo adjustments 23 * to be performed at exit should be done in an atomic manner. 24 * That is, if we are attempting to decrement the semval should we queue 25 * up and wait until we can do so legally? 26 * The original implementation attempted to do this. 27 * The current implementation does not do so. This is because I don't 28 * think it is the right thing (TM) to do, and because I couldn't 29 * see a clean way to get the old behavior with the new design. 30 * The POSIX standard and SVID should be consulted to determine 31 * what behavior is mandated. 32 * 33 * Further notes on refinement (Christoph Rohland, December 1998): 34 * - The POSIX standard says, that the undo adjustments simply should 35 * redo. So the current implementation is o.K. 36 * - The previous code had two flaws: 37 * 1) It actively gave the semaphore to the next waiting process 38 * sleeping on the semaphore. Since this process did not have the 39 * cpu this led to many unnecessary context switches and bad 40 * performance. Now we only check which process should be able to 41 * get the semaphore and if this process wants to reduce some 42 * semaphore value we simply wake it up without doing the 43 * operation. So it has to try to get it later. Thus e.g. the 44 * running process may reacquire the semaphore during the current 45 * time slice. If it only waits for zero or increases the semaphore, 46 * we do the operation in advance and wake it up. 47 * 2) It did not wake up all zero waiting processes. We try to do 48 * better but only get the semops right which only wait for zero or 49 * increase. If there are decrement operations in the operations 50 * array we do the same as before. 51 * 52 * With the incarnation of O(1) scheduler, it becomes unnecessary to perform 53 * check/retry algorithm for waking up blocked processes as the new scheduler 54 * is better at handling thread switch than the old one. 55 * 56 * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com> 57 * 58 * SMP-threaded, sysctl's added 59 * (c) 1999 Manfred Spraul <manfred@colorfullife.com> 60 * Enforced range limit on SEM_UNDO 61 * (c) 2001 Red Hat Inc <alan@redhat.com> 62 * Lockless wakeup 63 * (c) 2003 Manfred Spraul <manfred@colorfullife.com> 64 */ 65 66 #include <linux/config.h> 67 #include <linux/slab.h> 68 #include <linux/spinlock.h> 69 #include <linux/init.h> 70 #include <linux/proc_fs.h> 71 #include <linux/time.h> 72 #include <linux/smp_lock.h> 73 #include <linux/security.h> 74 #include <linux/syscalls.h> 75 #include <linux/audit.h> 76 #include <linux/capability.h> 77 #include <linux/seq_file.h> 78 #include <asm/uaccess.h> 79 #include "util.h" 80 81 82 #define sem_lock(id) ((struct sem_array*)ipc_lock(&sem_ids,id)) 83 #define sem_unlock(sma) ipc_unlock(&(sma)->sem_perm) 84 #define sem_rmid(id) ((struct sem_array*)ipc_rmid(&sem_ids,id)) 85 #define sem_checkid(sma, semid) \ 86 ipc_checkid(&sem_ids,&sma->sem_perm,semid) 87 #define sem_buildid(id, seq) \ 88 ipc_buildid(&sem_ids, id, seq) 89 static struct ipc_ids sem_ids; 90 91 static int newary (key_t, int, int); 92 static void freeary (struct sem_array *sma, int id); 93 #ifdef CONFIG_PROC_FS 94 static int sysvipc_sem_proc_show(struct seq_file *s, void *it); 95 #endif 96 97 #define SEMMSL_FAST 256 /* 512 bytes on stack */ 98 #define SEMOPM_FAST 64 /* ~ 372 bytes on stack */ 99 100 /* 101 * linked list protection: 102 * sem_undo.id_next, 103 * sem_array.sem_pending{,last}, 104 * sem_array.sem_undo: sem_lock() for read/write 105 * sem_undo.proc_next: only "current" is allowed to read/write that field. 106 * 107 */ 108 109 int sem_ctls[4] = {SEMMSL, SEMMNS, SEMOPM, SEMMNI}; 110 #define sc_semmsl (sem_ctls[0]) 111 #define sc_semmns (sem_ctls[1]) 112 #define sc_semopm (sem_ctls[2]) 113 #define sc_semmni (sem_ctls[3]) 114 115 static int used_sems; 116 117 void __init sem_init (void) 118 { 119 used_sems = 0; 120 ipc_init_ids(&sem_ids,sc_semmni); 121 ipc_init_proc_interface("sysvipc/sem", 122 " key semid perms nsems uid gid cuid cgid otime ctime\n", 123 &sem_ids, 124 sysvipc_sem_proc_show); 125 } 126 127 /* 128 * Lockless wakeup algorithm: 129 * Without the check/retry algorithm a lockless wakeup is possible: 130 * - queue.status is initialized to -EINTR before blocking. 131 * - wakeup is performed by 132 * * unlinking the queue entry from sma->sem_pending 133 * * setting queue.status to IN_WAKEUP 134 * This is the notification for the blocked thread that a 135 * result value is imminent. 136 * * call wake_up_process 137 * * set queue.status to the final value. 138 * - the previously blocked thread checks queue.status: 139 * * if it's IN_WAKEUP, then it must wait until the value changes 140 * * if it's not -EINTR, then the operation was completed by 141 * update_queue. semtimedop can return queue.status without 142 * performing any operation on the semaphore array. 143 * * otherwise it must acquire the spinlock and check what's up. 144 * 145 * The two-stage algorithm is necessary to protect against the following 146 * races: 147 * - if queue.status is set after wake_up_process, then the woken up idle 148 * thread could race forward and try (and fail) to acquire sma->lock 149 * before update_queue had a chance to set queue.status 150 * - if queue.status is written before wake_up_process and if the 151 * blocked process is woken up by a signal between writing 152 * queue.status and the wake_up_process, then the woken up 153 * process could return from semtimedop and die by calling 154 * sys_exit before wake_up_process is called. Then wake_up_process 155 * will oops, because the task structure is already invalid. 156 * (yes, this happened on s390 with sysv msg). 157 * 158 */ 159 #define IN_WAKEUP 1 160 161 static int newary (key_t key, int nsems, int semflg) 162 { 163 int id; 164 int retval; 165 struct sem_array *sma; 166 int size; 167 168 if (!nsems) 169 return -EINVAL; 170 if (used_sems + nsems > sc_semmns) 171 return -ENOSPC; 172 173 size = sizeof (*sma) + nsems * sizeof (struct sem); 174 sma = ipc_rcu_alloc(size); 175 if (!sma) { 176 return -ENOMEM; 177 } 178 memset (sma, 0, size); 179 180 sma->sem_perm.mode = (semflg & S_IRWXUGO); 181 sma->sem_perm.key = key; 182 183 sma->sem_perm.security = NULL; 184 retval = security_sem_alloc(sma); 185 if (retval) { 186 ipc_rcu_putref(sma); 187 return retval; 188 } 189 190 id = ipc_addid(&sem_ids, &sma->sem_perm, sc_semmni); 191 if(id == -1) { 192 security_sem_free(sma); 193 ipc_rcu_putref(sma); 194 return -ENOSPC; 195 } 196 used_sems += nsems; 197 198 sma->sem_id = sem_buildid(id, sma->sem_perm.seq); 199 sma->sem_base = (struct sem *) &sma[1]; 200 /* sma->sem_pending = NULL; */ 201 sma->sem_pending_last = &sma->sem_pending; 202 /* sma->undo = NULL; */ 203 sma->sem_nsems = nsems; 204 sma->sem_ctime = get_seconds(); 205 sem_unlock(sma); 206 207 return sma->sem_id; 208 } 209 210 asmlinkage long sys_semget (key_t key, int nsems, int semflg) 211 { 212 int id, err = -EINVAL; 213 struct sem_array *sma; 214 215 if (nsems < 0 || nsems > sc_semmsl) 216 return -EINVAL; 217 down(&sem_ids.sem); 218 219 if (key == IPC_PRIVATE) { 220 err = newary(key, nsems, semflg); 221 } else if ((id = ipc_findkey(&sem_ids, key)) == -1) { /* key not used */ 222 if (!(semflg & IPC_CREAT)) 223 err = -ENOENT; 224 else 225 err = newary(key, nsems, semflg); 226 } else if (semflg & IPC_CREAT && semflg & IPC_EXCL) { 227 err = -EEXIST; 228 } else { 229 sma = sem_lock(id); 230 if(sma==NULL) 231 BUG(); 232 if (nsems > sma->sem_nsems) 233 err = -EINVAL; 234 else if (ipcperms(&sma->sem_perm, semflg)) 235 err = -EACCES; 236 else { 237 int semid = sem_buildid(id, sma->sem_perm.seq); 238 err = security_sem_associate(sma, semflg); 239 if (!err) 240 err = semid; 241 } 242 sem_unlock(sma); 243 } 244 245 up(&sem_ids.sem); 246 return err; 247 } 248 249 /* Manage the doubly linked list sma->sem_pending as a FIFO: 250 * insert new queue elements at the tail sma->sem_pending_last. 251 */ 252 static inline void append_to_queue (struct sem_array * sma, 253 struct sem_queue * q) 254 { 255 *(q->prev = sma->sem_pending_last) = q; 256 *(sma->sem_pending_last = &q->next) = NULL; 257 } 258 259 static inline void prepend_to_queue (struct sem_array * sma, 260 struct sem_queue * q) 261 { 262 q->next = sma->sem_pending; 263 *(q->prev = &sma->sem_pending) = q; 264 if (q->next) 265 q->next->prev = &q->next; 266 else /* sma->sem_pending_last == &sma->sem_pending */ 267 sma->sem_pending_last = &q->next; 268 } 269 270 static inline void remove_from_queue (struct sem_array * sma, 271 struct sem_queue * q) 272 { 273 *(q->prev) = q->next; 274 if (q->next) 275 q->next->prev = q->prev; 276 else /* sma->sem_pending_last == &q->next */ 277 sma->sem_pending_last = q->prev; 278 q->prev = NULL; /* mark as removed */ 279 } 280 281 /* 282 * Determine whether a sequence of semaphore operations would succeed 283 * all at once. Return 0 if yes, 1 if need to sleep, else return error code. 284 */ 285 286 static int try_atomic_semop (struct sem_array * sma, struct sembuf * sops, 287 int nsops, struct sem_undo *un, int pid) 288 { 289 int result, sem_op; 290 struct sembuf *sop; 291 struct sem * curr; 292 293 for (sop = sops; sop < sops + nsops; sop++) { 294 curr = sma->sem_base + sop->sem_num; 295 sem_op = sop->sem_op; 296 result = curr->semval; 297 298 if (!sem_op && result) 299 goto would_block; 300 301 result += sem_op; 302 if (result < 0) 303 goto would_block; 304 if (result > SEMVMX) 305 goto out_of_range; 306 if (sop->sem_flg & SEM_UNDO) { 307 int undo = un->semadj[sop->sem_num] - sem_op; 308 /* 309 * Exceeding the undo range is an error. 310 */ 311 if (undo < (-SEMAEM - 1) || undo > SEMAEM) 312 goto out_of_range; 313 } 314 curr->semval = result; 315 } 316 317 sop--; 318 while (sop >= sops) { 319 sma->sem_base[sop->sem_num].sempid = pid; 320 if (sop->sem_flg & SEM_UNDO) 321 un->semadj[sop->sem_num] -= sop->sem_op; 322 sop--; 323 } 324 325 sma->sem_otime = get_seconds(); 326 return 0; 327 328 out_of_range: 329 result = -ERANGE; 330 goto undo; 331 332 would_block: 333 if (sop->sem_flg & IPC_NOWAIT) 334 result = -EAGAIN; 335 else 336 result = 1; 337 338 undo: 339 sop--; 340 while (sop >= sops) { 341 sma->sem_base[sop->sem_num].semval -= sop->sem_op; 342 sop--; 343 } 344 345 return result; 346 } 347 348 /* Go through the pending queue for the indicated semaphore 349 * looking for tasks that can be completed. 350 */ 351 static void update_queue (struct sem_array * sma) 352 { 353 int error; 354 struct sem_queue * q; 355 356 q = sma->sem_pending; 357 while(q) { 358 error = try_atomic_semop(sma, q->sops, q->nsops, 359 q->undo, q->pid); 360 361 /* Does q->sleeper still need to sleep? */ 362 if (error <= 0) { 363 struct sem_queue *n; 364 remove_from_queue(sma,q); 365 q->status = IN_WAKEUP; 366 /* 367 * Continue scanning. The next operation 368 * that must be checked depends on the type of the 369 * completed operation: 370 * - if the operation modified the array, then 371 * restart from the head of the queue and 372 * check for threads that might be waiting 373 * for semaphore values to become 0. 374 * - if the operation didn't modify the array, 375 * then just continue. 376 */ 377 if (q->alter) 378 n = sma->sem_pending; 379 else 380 n = q->next; 381 wake_up_process(q->sleeper); 382 /* hands-off: q will disappear immediately after 383 * writing q->status. 384 */ 385 smp_wmb(); 386 q->status = error; 387 q = n; 388 } else { 389 q = q->next; 390 } 391 } 392 } 393 394 /* The following counts are associated to each semaphore: 395 * semncnt number of tasks waiting on semval being nonzero 396 * semzcnt number of tasks waiting on semval being zero 397 * This model assumes that a task waits on exactly one semaphore. 398 * Since semaphore operations are to be performed atomically, tasks actually 399 * wait on a whole sequence of semaphores simultaneously. 400 * The counts we return here are a rough approximation, but still 401 * warrant that semncnt+semzcnt>0 if the task is on the pending queue. 402 */ 403 static int count_semncnt (struct sem_array * sma, ushort semnum) 404 { 405 int semncnt; 406 struct sem_queue * q; 407 408 semncnt = 0; 409 for (q = sma->sem_pending; q; q = q->next) { 410 struct sembuf * sops = q->sops; 411 int nsops = q->nsops; 412 int i; 413 for (i = 0; i < nsops; i++) 414 if (sops[i].sem_num == semnum 415 && (sops[i].sem_op < 0) 416 && !(sops[i].sem_flg & IPC_NOWAIT)) 417 semncnt++; 418 } 419 return semncnt; 420 } 421 static int count_semzcnt (struct sem_array * sma, ushort semnum) 422 { 423 int semzcnt; 424 struct sem_queue * q; 425 426 semzcnt = 0; 427 for (q = sma->sem_pending; q; q = q->next) { 428 struct sembuf * sops = q->sops; 429 int nsops = q->nsops; 430 int i; 431 for (i = 0; i < nsops; i++) 432 if (sops[i].sem_num == semnum 433 && (sops[i].sem_op == 0) 434 && !(sops[i].sem_flg & IPC_NOWAIT)) 435 semzcnt++; 436 } 437 return semzcnt; 438 } 439 440 /* Free a semaphore set. freeary() is called with sem_ids.sem down and 441 * the spinlock for this semaphore set hold. sem_ids.sem remains locked 442 * on exit. 443 */ 444 static void freeary (struct sem_array *sma, int id) 445 { 446 struct sem_undo *un; 447 struct sem_queue *q; 448 int size; 449 450 /* Invalidate the existing undo structures for this semaphore set. 451 * (They will be freed without any further action in exit_sem() 452 * or during the next semop.) 453 */ 454 for (un = sma->undo; un; un = un->id_next) 455 un->semid = -1; 456 457 /* Wake up all pending processes and let them fail with EIDRM. */ 458 q = sma->sem_pending; 459 while(q) { 460 struct sem_queue *n; 461 /* lazy remove_from_queue: we are killing the whole queue */ 462 q->prev = NULL; 463 n = q->next; 464 q->status = IN_WAKEUP; 465 wake_up_process(q->sleeper); /* doesn't sleep */ 466 smp_wmb(); 467 q->status = -EIDRM; /* hands-off q */ 468 q = n; 469 } 470 471 /* Remove the semaphore set from the ID array*/ 472 sma = sem_rmid(id); 473 sem_unlock(sma); 474 475 used_sems -= sma->sem_nsems; 476 size = sizeof (*sma) + sma->sem_nsems * sizeof (struct sem); 477 security_sem_free(sma); 478 ipc_rcu_putref(sma); 479 } 480 481 static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) 482 { 483 switch(version) { 484 case IPC_64: 485 return copy_to_user(buf, in, sizeof(*in)); 486 case IPC_OLD: 487 { 488 struct semid_ds out; 489 490 ipc64_perm_to_ipc_perm(&in->sem_perm, &out.sem_perm); 491 492 out.sem_otime = in->sem_otime; 493 out.sem_ctime = in->sem_ctime; 494 out.sem_nsems = in->sem_nsems; 495 496 return copy_to_user(buf, &out, sizeof(out)); 497 } 498 default: 499 return -EINVAL; 500 } 501 } 502 503 static int semctl_nolock(int semid, int semnum, int cmd, int version, union semun arg) 504 { 505 int err = -EINVAL; 506 struct sem_array *sma; 507 508 switch(cmd) { 509 case IPC_INFO: 510 case SEM_INFO: 511 { 512 struct seminfo seminfo; 513 int max_id; 514 515 err = security_sem_semctl(NULL, cmd); 516 if (err) 517 return err; 518 519 memset(&seminfo,0,sizeof(seminfo)); 520 seminfo.semmni = sc_semmni; 521 seminfo.semmns = sc_semmns; 522 seminfo.semmsl = sc_semmsl; 523 seminfo.semopm = sc_semopm; 524 seminfo.semvmx = SEMVMX; 525 seminfo.semmnu = SEMMNU; 526 seminfo.semmap = SEMMAP; 527 seminfo.semume = SEMUME; 528 down(&sem_ids.sem); 529 if (cmd == SEM_INFO) { 530 seminfo.semusz = sem_ids.in_use; 531 seminfo.semaem = used_sems; 532 } else { 533 seminfo.semusz = SEMUSZ; 534 seminfo.semaem = SEMAEM; 535 } 536 max_id = sem_ids.max_id; 537 up(&sem_ids.sem); 538 if (copy_to_user (arg.__buf, &seminfo, sizeof(struct seminfo))) 539 return -EFAULT; 540 return (max_id < 0) ? 0: max_id; 541 } 542 case SEM_STAT: 543 { 544 struct semid64_ds tbuf; 545 int id; 546 547 if(semid >= sem_ids.entries->size) 548 return -EINVAL; 549 550 memset(&tbuf,0,sizeof(tbuf)); 551 552 sma = sem_lock(semid); 553 if(sma == NULL) 554 return -EINVAL; 555 556 err = -EACCES; 557 if (ipcperms (&sma->sem_perm, S_IRUGO)) 558 goto out_unlock; 559 560 err = security_sem_semctl(sma, cmd); 561 if (err) 562 goto out_unlock; 563 564 id = sem_buildid(semid, sma->sem_perm.seq); 565 566 kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm); 567 tbuf.sem_otime = sma->sem_otime; 568 tbuf.sem_ctime = sma->sem_ctime; 569 tbuf.sem_nsems = sma->sem_nsems; 570 sem_unlock(sma); 571 if (copy_semid_to_user (arg.buf, &tbuf, version)) 572 return -EFAULT; 573 return id; 574 } 575 default: 576 return -EINVAL; 577 } 578 return err; 579 out_unlock: 580 sem_unlock(sma); 581 return err; 582 } 583 584 static int semctl_main(int semid, int semnum, int cmd, int version, union semun arg) 585 { 586 struct sem_array *sma; 587 struct sem* curr; 588 int err; 589 ushort fast_sem_io[SEMMSL_FAST]; 590 ushort* sem_io = fast_sem_io; 591 int nsems; 592 593 sma = sem_lock(semid); 594 if(sma==NULL) 595 return -EINVAL; 596 597 nsems = sma->sem_nsems; 598 599 err=-EIDRM; 600 if (sem_checkid(sma,semid)) 601 goto out_unlock; 602 603 err = -EACCES; 604 if (ipcperms (&sma->sem_perm, (cmd==SETVAL||cmd==SETALL)?S_IWUGO:S_IRUGO)) 605 goto out_unlock; 606 607 err = security_sem_semctl(sma, cmd); 608 if (err) 609 goto out_unlock; 610 611 err = -EACCES; 612 switch (cmd) { 613 case GETALL: 614 { 615 ushort __user *array = arg.array; 616 int i; 617 618 if(nsems > SEMMSL_FAST) { 619 ipc_rcu_getref(sma); 620 sem_unlock(sma); 621 622 sem_io = ipc_alloc(sizeof(ushort)*nsems); 623 if(sem_io == NULL) { 624 ipc_lock_by_ptr(&sma->sem_perm); 625 ipc_rcu_putref(sma); 626 sem_unlock(sma); 627 return -ENOMEM; 628 } 629 630 ipc_lock_by_ptr(&sma->sem_perm); 631 ipc_rcu_putref(sma); 632 if (sma->sem_perm.deleted) { 633 sem_unlock(sma); 634 err = -EIDRM; 635 goto out_free; 636 } 637 } 638 639 for (i = 0; i < sma->sem_nsems; i++) 640 sem_io[i] = sma->sem_base[i].semval; 641 sem_unlock(sma); 642 err = 0; 643 if(copy_to_user(array, sem_io, nsems*sizeof(ushort))) 644 err = -EFAULT; 645 goto out_free; 646 } 647 case SETALL: 648 { 649 int i; 650 struct sem_undo *un; 651 652 ipc_rcu_getref(sma); 653 sem_unlock(sma); 654 655 if(nsems > SEMMSL_FAST) { 656 sem_io = ipc_alloc(sizeof(ushort)*nsems); 657 if(sem_io == NULL) { 658 ipc_lock_by_ptr(&sma->sem_perm); 659 ipc_rcu_putref(sma); 660 sem_unlock(sma); 661 return -ENOMEM; 662 } 663 } 664 665 if (copy_from_user (sem_io, arg.array, nsems*sizeof(ushort))) { 666 ipc_lock_by_ptr(&sma->sem_perm); 667 ipc_rcu_putref(sma); 668 sem_unlock(sma); 669 err = -EFAULT; 670 goto out_free; 671 } 672 673 for (i = 0; i < nsems; i++) { 674 if (sem_io[i] > SEMVMX) { 675 ipc_lock_by_ptr(&sma->sem_perm); 676 ipc_rcu_putref(sma); 677 sem_unlock(sma); 678 err = -ERANGE; 679 goto out_free; 680 } 681 } 682 ipc_lock_by_ptr(&sma->sem_perm); 683 ipc_rcu_putref(sma); 684 if (sma->sem_perm.deleted) { 685 sem_unlock(sma); 686 err = -EIDRM; 687 goto out_free; 688 } 689 690 for (i = 0; i < nsems; i++) 691 sma->sem_base[i].semval = sem_io[i]; 692 for (un = sma->undo; un; un = un->id_next) 693 for (i = 0; i < nsems; i++) 694 un->semadj[i] = 0; 695 sma->sem_ctime = get_seconds(); 696 /* maybe some queued-up processes were waiting for this */ 697 update_queue(sma); 698 err = 0; 699 goto out_unlock; 700 } 701 case IPC_STAT: 702 { 703 struct semid64_ds tbuf; 704 memset(&tbuf,0,sizeof(tbuf)); 705 kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm); 706 tbuf.sem_otime = sma->sem_otime; 707 tbuf.sem_ctime = sma->sem_ctime; 708 tbuf.sem_nsems = sma->sem_nsems; 709 sem_unlock(sma); 710 if (copy_semid_to_user (arg.buf, &tbuf, version)) 711 return -EFAULT; 712 return 0; 713 } 714 /* GETVAL, GETPID, GETNCTN, GETZCNT, SETVAL: fall-through */ 715 } 716 err = -EINVAL; 717 if(semnum < 0 || semnum >= nsems) 718 goto out_unlock; 719 720 curr = &sma->sem_base[semnum]; 721 722 switch (cmd) { 723 case GETVAL: 724 err = curr->semval; 725 goto out_unlock; 726 case GETPID: 727 err = curr->sempid; 728 goto out_unlock; 729 case GETNCNT: 730 err = count_semncnt(sma,semnum); 731 goto out_unlock; 732 case GETZCNT: 733 err = count_semzcnt(sma,semnum); 734 goto out_unlock; 735 case SETVAL: 736 { 737 int val = arg.val; 738 struct sem_undo *un; 739 err = -ERANGE; 740 if (val > SEMVMX || val < 0) 741 goto out_unlock; 742 743 for (un = sma->undo; un; un = un->id_next) 744 un->semadj[semnum] = 0; 745 curr->semval = val; 746 curr->sempid = current->tgid; 747 sma->sem_ctime = get_seconds(); 748 /* maybe some queued-up processes were waiting for this */ 749 update_queue(sma); 750 err = 0; 751 goto out_unlock; 752 } 753 } 754 out_unlock: 755 sem_unlock(sma); 756 out_free: 757 if(sem_io != fast_sem_io) 758 ipc_free(sem_io, sizeof(ushort)*nsems); 759 return err; 760 } 761 762 struct sem_setbuf { 763 uid_t uid; 764 gid_t gid; 765 mode_t mode; 766 }; 767 768 static inline unsigned long copy_semid_from_user(struct sem_setbuf *out, void __user *buf, int version) 769 { 770 switch(version) { 771 case IPC_64: 772 { 773 struct semid64_ds tbuf; 774 775 if(copy_from_user(&tbuf, buf, sizeof(tbuf))) 776 return -EFAULT; 777 778 out->uid = tbuf.sem_perm.uid; 779 out->gid = tbuf.sem_perm.gid; 780 out->mode = tbuf.sem_perm.mode; 781 782 return 0; 783 } 784 case IPC_OLD: 785 { 786 struct semid_ds tbuf_old; 787 788 if(copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 789 return -EFAULT; 790 791 out->uid = tbuf_old.sem_perm.uid; 792 out->gid = tbuf_old.sem_perm.gid; 793 out->mode = tbuf_old.sem_perm.mode; 794 795 return 0; 796 } 797 default: 798 return -EINVAL; 799 } 800 } 801 802 static int semctl_down(int semid, int semnum, int cmd, int version, union semun arg) 803 { 804 struct sem_array *sma; 805 int err; 806 struct sem_setbuf setbuf; 807 struct kern_ipc_perm *ipcp; 808 809 if(cmd == IPC_SET) { 810 if(copy_semid_from_user (&setbuf, arg.buf, version)) 811 return -EFAULT; 812 if ((err = audit_ipc_perms(0, setbuf.uid, setbuf.gid, setbuf.mode))) 813 return err; 814 } 815 sma = sem_lock(semid); 816 if(sma==NULL) 817 return -EINVAL; 818 819 if (sem_checkid(sma,semid)) { 820 err=-EIDRM; 821 goto out_unlock; 822 } 823 ipcp = &sma->sem_perm; 824 825 if (current->euid != ipcp->cuid && 826 current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN)) { 827 err=-EPERM; 828 goto out_unlock; 829 } 830 831 err = security_sem_semctl(sma, cmd); 832 if (err) 833 goto out_unlock; 834 835 switch(cmd){ 836 case IPC_RMID: 837 freeary(sma, semid); 838 err = 0; 839 break; 840 case IPC_SET: 841 ipcp->uid = setbuf.uid; 842 ipcp->gid = setbuf.gid; 843 ipcp->mode = (ipcp->mode & ~S_IRWXUGO) 844 | (setbuf.mode & S_IRWXUGO); 845 sma->sem_ctime = get_seconds(); 846 sem_unlock(sma); 847 err = 0; 848 break; 849 default: 850 sem_unlock(sma); 851 err = -EINVAL; 852 break; 853 } 854 return err; 855 856 out_unlock: 857 sem_unlock(sma); 858 return err; 859 } 860 861 asmlinkage long sys_semctl (int semid, int semnum, int cmd, union semun arg) 862 { 863 int err = -EINVAL; 864 int version; 865 866 if (semid < 0) 867 return -EINVAL; 868 869 version = ipc_parse_version(&cmd); 870 871 switch(cmd) { 872 case IPC_INFO: 873 case SEM_INFO: 874 case SEM_STAT: 875 err = semctl_nolock(semid,semnum,cmd,version,arg); 876 return err; 877 case GETALL: 878 case GETVAL: 879 case GETPID: 880 case GETNCNT: 881 case GETZCNT: 882 case IPC_STAT: 883 case SETVAL: 884 case SETALL: 885 err = semctl_main(semid,semnum,cmd,version,arg); 886 return err; 887 case IPC_RMID: 888 case IPC_SET: 889 down(&sem_ids.sem); 890 err = semctl_down(semid,semnum,cmd,version,arg); 891 up(&sem_ids.sem); 892 return err; 893 default: 894 return -EINVAL; 895 } 896 } 897 898 static inline void lock_semundo(void) 899 { 900 struct sem_undo_list *undo_list; 901 902 undo_list = current->sysvsem.undo_list; 903 if (undo_list) 904 spin_lock(&undo_list->lock); 905 } 906 907 /* This code has an interaction with copy_semundo(). 908 * Consider; two tasks are sharing the undo_list. task1 909 * acquires the undo_list lock in lock_semundo(). If task2 now 910 * exits before task1 releases the lock (by calling 911 * unlock_semundo()), then task1 will never call spin_unlock(). 912 * This leave the sem_undo_list in a locked state. If task1 now creats task3 913 * and once again shares the sem_undo_list, the sem_undo_list will still be 914 * locked, and future SEM_UNDO operations will deadlock. This case is 915 * dealt with in copy_semundo() by having it reinitialize the spin lock when 916 * the refcnt goes from 1 to 2. 917 */ 918 static inline void unlock_semundo(void) 919 { 920 struct sem_undo_list *undo_list; 921 922 undo_list = current->sysvsem.undo_list; 923 if (undo_list) 924 spin_unlock(&undo_list->lock); 925 } 926 927 928 /* If the task doesn't already have a undo_list, then allocate one 929 * here. We guarantee there is only one thread using this undo list, 930 * and current is THE ONE 931 * 932 * If this allocation and assignment succeeds, but later 933 * portions of this code fail, there is no need to free the sem_undo_list. 934 * Just let it stay associated with the task, and it'll be freed later 935 * at exit time. 936 * 937 * This can block, so callers must hold no locks. 938 */ 939 static inline int get_undo_list(struct sem_undo_list **undo_listp) 940 { 941 struct sem_undo_list *undo_list; 942 int size; 943 944 undo_list = current->sysvsem.undo_list; 945 if (!undo_list) { 946 size = sizeof(struct sem_undo_list); 947 undo_list = (struct sem_undo_list *) kmalloc(size, GFP_KERNEL); 948 if (undo_list == NULL) 949 return -ENOMEM; 950 memset(undo_list, 0, size); 951 spin_lock_init(&undo_list->lock); 952 atomic_set(&undo_list->refcnt, 1); 953 current->sysvsem.undo_list = undo_list; 954 } 955 *undo_listp = undo_list; 956 return 0; 957 } 958 959 static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid) 960 { 961 struct sem_undo **last, *un; 962 963 last = &ulp->proc_list; 964 un = *last; 965 while(un != NULL) { 966 if(un->semid==semid) 967 break; 968 if(un->semid==-1) { 969 *last=un->proc_next; 970 kfree(un); 971 } else { 972 last=&un->proc_next; 973 } 974 un=*last; 975 } 976 return un; 977 } 978 979 static struct sem_undo *find_undo(int semid) 980 { 981 struct sem_array *sma; 982 struct sem_undo_list *ulp; 983 struct sem_undo *un, *new; 984 int nsems; 985 int error; 986 987 error = get_undo_list(&ulp); 988 if (error) 989 return ERR_PTR(error); 990 991 lock_semundo(); 992 un = lookup_undo(ulp, semid); 993 unlock_semundo(); 994 if (likely(un!=NULL)) 995 goto out; 996 997 /* no undo structure around - allocate one. */ 998 sma = sem_lock(semid); 999 un = ERR_PTR(-EINVAL); 1000 if(sma==NULL) 1001 goto out; 1002 un = ERR_PTR(-EIDRM); 1003 if (sem_checkid(sma,semid)) { 1004 sem_unlock(sma); 1005 goto out; 1006 } 1007 nsems = sma->sem_nsems; 1008 ipc_rcu_getref(sma); 1009 sem_unlock(sma); 1010 1011 new = (struct sem_undo *) kmalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); 1012 if (!new) { 1013 ipc_lock_by_ptr(&sma->sem_perm); 1014 ipc_rcu_putref(sma); 1015 sem_unlock(sma); 1016 return ERR_PTR(-ENOMEM); 1017 } 1018 memset(new, 0, sizeof(struct sem_undo) + sizeof(short)*nsems); 1019 new->semadj = (short *) &new[1]; 1020 new->semid = semid; 1021 1022 lock_semundo(); 1023 un = lookup_undo(ulp, semid); 1024 if (un) { 1025 unlock_semundo(); 1026 kfree(new); 1027 ipc_lock_by_ptr(&sma->sem_perm); 1028 ipc_rcu_putref(sma); 1029 sem_unlock(sma); 1030 goto out; 1031 } 1032 ipc_lock_by_ptr(&sma->sem_perm); 1033 ipc_rcu_putref(sma); 1034 if (sma->sem_perm.deleted) { 1035 sem_unlock(sma); 1036 unlock_semundo(); 1037 kfree(new); 1038 un = ERR_PTR(-EIDRM); 1039 goto out; 1040 } 1041 new->proc_next = ulp->proc_list; 1042 ulp->proc_list = new; 1043 new->id_next = sma->undo; 1044 sma->undo = new; 1045 sem_unlock(sma); 1046 un = new; 1047 unlock_semundo(); 1048 out: 1049 return un; 1050 } 1051 1052 asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops, 1053 unsigned nsops, const struct timespec __user *timeout) 1054 { 1055 int error = -EINVAL; 1056 struct sem_array *sma; 1057 struct sembuf fast_sops[SEMOPM_FAST]; 1058 struct sembuf* sops = fast_sops, *sop; 1059 struct sem_undo *un; 1060 int undos = 0, alter = 0, max; 1061 struct sem_queue queue; 1062 unsigned long jiffies_left = 0; 1063 1064 if (nsops < 1 || semid < 0) 1065 return -EINVAL; 1066 if (nsops > sc_semopm) 1067 return -E2BIG; 1068 if(nsops > SEMOPM_FAST) { 1069 sops = kmalloc(sizeof(*sops)*nsops,GFP_KERNEL); 1070 if(sops==NULL) 1071 return -ENOMEM; 1072 } 1073 if (copy_from_user (sops, tsops, nsops * sizeof(*tsops))) { 1074 error=-EFAULT; 1075 goto out_free; 1076 } 1077 if (timeout) { 1078 struct timespec _timeout; 1079 if (copy_from_user(&_timeout, timeout, sizeof(*timeout))) { 1080 error = -EFAULT; 1081 goto out_free; 1082 } 1083 if (_timeout.tv_sec < 0 || _timeout.tv_nsec < 0 || 1084 _timeout.tv_nsec >= 1000000000L) { 1085 error = -EINVAL; 1086 goto out_free; 1087 } 1088 jiffies_left = timespec_to_jiffies(&_timeout); 1089 } 1090 max = 0; 1091 for (sop = sops; sop < sops + nsops; sop++) { 1092 if (sop->sem_num >= max) 1093 max = sop->sem_num; 1094 if (sop->sem_flg & SEM_UNDO) 1095 undos = 1; 1096 if (sop->sem_op != 0) 1097 alter = 1; 1098 } 1099 1100 retry_undos: 1101 if (undos) { 1102 un = find_undo(semid); 1103 if (IS_ERR(un)) { 1104 error = PTR_ERR(un); 1105 goto out_free; 1106 } 1107 } else 1108 un = NULL; 1109 1110 sma = sem_lock(semid); 1111 error=-EINVAL; 1112 if(sma==NULL) 1113 goto out_free; 1114 error = -EIDRM; 1115 if (sem_checkid(sma,semid)) 1116 goto out_unlock_free; 1117 /* 1118 * semid identifies are not unique - find_undo may have 1119 * allocated an undo structure, it was invalidated by an RMID 1120 * and now a new array with received the same id. Check and retry. 1121 */ 1122 if (un && un->semid == -1) { 1123 sem_unlock(sma); 1124 goto retry_undos; 1125 } 1126 error = -EFBIG; 1127 if (max >= sma->sem_nsems) 1128 goto out_unlock_free; 1129 1130 error = -EACCES; 1131 if (ipcperms(&sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) 1132 goto out_unlock_free; 1133 1134 error = security_sem_semop(sma, sops, nsops, alter); 1135 if (error) 1136 goto out_unlock_free; 1137 1138 error = try_atomic_semop (sma, sops, nsops, un, current->tgid); 1139 if (error <= 0) { 1140 if (alter && error == 0) 1141 update_queue (sma); 1142 goto out_unlock_free; 1143 } 1144 1145 /* We need to sleep on this operation, so we put the current 1146 * task into the pending queue and go to sleep. 1147 */ 1148 1149 queue.sma = sma; 1150 queue.sops = sops; 1151 queue.nsops = nsops; 1152 queue.undo = un; 1153 queue.pid = current->tgid; 1154 queue.id = semid; 1155 queue.alter = alter; 1156 if (alter) 1157 append_to_queue(sma ,&queue); 1158 else 1159 prepend_to_queue(sma ,&queue); 1160 1161 queue.status = -EINTR; 1162 queue.sleeper = current; 1163 current->state = TASK_INTERRUPTIBLE; 1164 sem_unlock(sma); 1165 1166 if (timeout) 1167 jiffies_left = schedule_timeout(jiffies_left); 1168 else 1169 schedule(); 1170 1171 error = queue.status; 1172 while(unlikely(error == IN_WAKEUP)) { 1173 cpu_relax(); 1174 error = queue.status; 1175 } 1176 1177 if (error != -EINTR) { 1178 /* fast path: update_queue already obtained all requested 1179 * resources */ 1180 goto out_free; 1181 } 1182 1183 sma = sem_lock(semid); 1184 if(sma==NULL) { 1185 if(queue.prev != NULL) 1186 BUG(); 1187 error = -EIDRM; 1188 goto out_free; 1189 } 1190 1191 /* 1192 * If queue.status != -EINTR we are woken up by another process 1193 */ 1194 error = queue.status; 1195 if (error != -EINTR) { 1196 goto out_unlock_free; 1197 } 1198 1199 /* 1200 * If an interrupt occurred we have to clean up the queue 1201 */ 1202 if (timeout && jiffies_left == 0) 1203 error = -EAGAIN; 1204 remove_from_queue(sma,&queue); 1205 goto out_unlock_free; 1206 1207 out_unlock_free: 1208 sem_unlock(sma); 1209 out_free: 1210 if(sops != fast_sops) 1211 kfree(sops); 1212 return error; 1213 } 1214 1215 asmlinkage long sys_semop (int semid, struct sembuf __user *tsops, unsigned nsops) 1216 { 1217 return sys_semtimedop(semid, tsops, nsops, NULL); 1218 } 1219 1220 /* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between 1221 * parent and child tasks. 1222 * 1223 * See the notes above unlock_semundo() regarding the spin_lock_init() 1224 * in this code. Initialize the undo_list->lock here instead of get_undo_list() 1225 * because of the reasoning in the comment above unlock_semundo. 1226 */ 1227 1228 int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) 1229 { 1230 struct sem_undo_list *undo_list; 1231 int error; 1232 1233 if (clone_flags & CLONE_SYSVSEM) { 1234 error = get_undo_list(&undo_list); 1235 if (error) 1236 return error; 1237 atomic_inc(&undo_list->refcnt); 1238 tsk->sysvsem.undo_list = undo_list; 1239 } else 1240 tsk->sysvsem.undo_list = NULL; 1241 1242 return 0; 1243 } 1244 1245 /* 1246 * add semadj values to semaphores, free undo structures. 1247 * undo structures are not freed when semaphore arrays are destroyed 1248 * so some of them may be out of date. 1249 * IMPLEMENTATION NOTE: There is some confusion over whether the 1250 * set of adjustments that needs to be done should be done in an atomic 1251 * manner or not. That is, if we are attempting to decrement the semval 1252 * should we queue up and wait until we can do so legally? 1253 * The original implementation attempted to do this (queue and wait). 1254 * The current implementation does not do so. The POSIX standard 1255 * and SVID should be consulted to determine what behavior is mandated. 1256 */ 1257 void exit_sem(struct task_struct *tsk) 1258 { 1259 struct sem_undo_list *undo_list; 1260 struct sem_undo *u, **up; 1261 1262 undo_list = tsk->sysvsem.undo_list; 1263 if (!undo_list) 1264 return; 1265 1266 if (!atomic_dec_and_test(&undo_list->refcnt)) 1267 return; 1268 1269 /* There's no need to hold the semundo list lock, as current 1270 * is the last task exiting for this undo list. 1271 */ 1272 for (up = &undo_list->proc_list; (u = *up); *up = u->proc_next, kfree(u)) { 1273 struct sem_array *sma; 1274 int nsems, i; 1275 struct sem_undo *un, **unp; 1276 int semid; 1277 1278 semid = u->semid; 1279 1280 if(semid == -1) 1281 continue; 1282 sma = sem_lock(semid); 1283 if (sma == NULL) 1284 continue; 1285 1286 if (u->semid == -1) 1287 goto next_entry; 1288 1289 BUG_ON(sem_checkid(sma,u->semid)); 1290 1291 /* remove u from the sma->undo list */ 1292 for (unp = &sma->undo; (un = *unp); unp = &un->id_next) { 1293 if (u == un) 1294 goto found; 1295 } 1296 printk ("exit_sem undo list error id=%d\n", u->semid); 1297 goto next_entry; 1298 found: 1299 *unp = un->id_next; 1300 /* perform adjustments registered in u */ 1301 nsems = sma->sem_nsems; 1302 for (i = 0; i < nsems; i++) { 1303 struct sem * sem = &sma->sem_base[i]; 1304 if (u->semadj[i]) { 1305 sem->semval += u->semadj[i]; 1306 /* 1307 * Range checks of the new semaphore value, 1308 * not defined by sus: 1309 * - Some unices ignore the undo entirely 1310 * (e.g. HP UX 11i 11.22, Tru64 V5.1) 1311 * - some cap the value (e.g. FreeBSD caps 1312 * at 0, but doesn't enforce SEMVMX) 1313 * 1314 * Linux caps the semaphore value, both at 0 1315 * and at SEMVMX. 1316 * 1317 * Manfred <manfred@colorfullife.com> 1318 */ 1319 if (sem->semval < 0) 1320 sem->semval = 0; 1321 if (sem->semval > SEMVMX) 1322 sem->semval = SEMVMX; 1323 sem->sempid = current->tgid; 1324 } 1325 } 1326 sma->sem_otime = get_seconds(); 1327 /* maybe some queued-up processes were waiting for this */ 1328 update_queue(sma); 1329 next_entry: 1330 sem_unlock(sma); 1331 } 1332 kfree(undo_list); 1333 } 1334 1335 #ifdef CONFIG_PROC_FS 1336 static int sysvipc_sem_proc_show(struct seq_file *s, void *it) 1337 { 1338 struct sem_array *sma = it; 1339 1340 return seq_printf(s, 1341 "%10d %10d %4o %10lu %5u %5u %5u %5u %10lu %10lu\n", 1342 sma->sem_perm.key, 1343 sma->sem_id, 1344 sma->sem_perm.mode, 1345 sma->sem_nsems, 1346 sma->sem_perm.uid, 1347 sma->sem_perm.gid, 1348 sma->sem_perm.cuid, 1349 sma->sem_perm.cgid, 1350 sma->sem_otime, 1351 sma->sem_ctime); 1352 } 1353 #endif 1354