1 /* 2 * linux/ipc/sem.c 3 * Copyright (C) 1992 Krishna Balasubramanian 4 * Copyright (C) 1995 Eric Schenk, Bruno Haible 5 * 6 * IMPLEMENTATION NOTES ON CODE REWRITE (Eric Schenk, January 1995): 7 * This code underwent a massive rewrite in order to solve some problems 8 * with the original code. In particular the original code failed to 9 * wake up processes that were waiting for semval to go to 0 if the 10 * value went to 0 and was then incremented rapidly enough. In solving 11 * this problem I have also modified the implementation so that it 12 * processes pending operations in a FIFO manner, thus give a guarantee 13 * that processes waiting for a lock on the semaphore won't starve 14 * unless another locking process fails to unlock. 15 * In addition the following two changes in behavior have been introduced: 16 * - The original implementation of semop returned the value 17 * last semaphore element examined on success. This does not 18 * match the manual page specifications, and effectively 19 * allows the user to read the semaphore even if they do not 20 * have read permissions. The implementation now returns 0 21 * on success as stated in the manual page. 22 * - There is some confusion over whether the set of undo adjustments 23 * to be performed at exit should be done in an atomic manner. 24 * That is, if we are attempting to decrement the semval should we queue 25 * up and wait until we can do so legally? 26 * The original implementation attempted to do this. 27 * The current implementation does not do so. This is because I don't 28 * think it is the right thing (TM) to do, and because I couldn't 29 * see a clean way to get the old behavior with the new design. 30 * The POSIX standard and SVID should be consulted to determine 31 * what behavior is mandated. 32 * 33 * Further notes on refinement (Christoph Rohland, December 1998): 34 * - The POSIX standard says, that the undo adjustments simply should 35 * redo. So the current implementation is o.K. 36 * - The previous code had two flaws: 37 * 1) It actively gave the semaphore to the next waiting process 38 * sleeping on the semaphore. Since this process did not have the 39 * cpu this led to many unnecessary context switches and bad 40 * performance. Now we only check which process should be able to 41 * get the semaphore and if this process wants to reduce some 42 * semaphore value we simply wake it up without doing the 43 * operation. So it has to try to get it later. Thus e.g. the 44 * running process may reacquire the semaphore during the current 45 * time slice. If it only waits for zero or increases the semaphore, 46 * we do the operation in advance and wake it up. 47 * 2) It did not wake up all zero waiting processes. We try to do 48 * better but only get the semops right which only wait for zero or 49 * increase. If there are decrement operations in the operations 50 * array we do the same as before. 51 * 52 * With the incarnation of O(1) scheduler, it becomes unnecessary to perform 53 * check/retry algorithm for waking up blocked processes as the new scheduler 54 * is better at handling thread switch than the old one. 55 * 56 * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com> 57 * 58 * SMP-threaded, sysctl's added 59 * (c) 1999 Manfred Spraul <manfred@colorfullife.com> 60 * Enforced range limit on SEM_UNDO 61 * (c) 2001 Red Hat Inc <alan@redhat.com> 62 * Lockless wakeup 63 * (c) 2003 Manfred Spraul <manfred@colorfullife.com> 64 * 65 * support for audit of ipc object properties and permission changes 66 * Dustin Kirkland <dustin.kirkland@us.ibm.com> 67 */ 68 69 #include <linux/config.h> 70 #include <linux/slab.h> 71 #include <linux/spinlock.h> 72 #include <linux/init.h> 73 #include <linux/proc_fs.h> 74 #include <linux/time.h> 75 #include <linux/smp_lock.h> 76 #include <linux/security.h> 77 #include <linux/syscalls.h> 78 #include <linux/audit.h> 79 #include <linux/capability.h> 80 #include <linux/seq_file.h> 81 #include <linux/mutex.h> 82 83 #include <asm/uaccess.h> 84 #include "util.h" 85 86 87 #define sem_lock(id) ((struct sem_array*)ipc_lock(&sem_ids,id)) 88 #define sem_unlock(sma) ipc_unlock(&(sma)->sem_perm) 89 #define sem_rmid(id) ((struct sem_array*)ipc_rmid(&sem_ids,id)) 90 #define sem_checkid(sma, semid) \ 91 ipc_checkid(&sem_ids,&sma->sem_perm,semid) 92 #define sem_buildid(id, seq) \ 93 ipc_buildid(&sem_ids, id, seq) 94 static struct ipc_ids sem_ids; 95 96 static int newary (key_t, int, int); 97 static void freeary (struct sem_array *sma, int id); 98 #ifdef CONFIG_PROC_FS 99 static int sysvipc_sem_proc_show(struct seq_file *s, void *it); 100 #endif 101 102 #define SEMMSL_FAST 256 /* 512 bytes on stack */ 103 #define SEMOPM_FAST 64 /* ~ 372 bytes on stack */ 104 105 /* 106 * linked list protection: 107 * sem_undo.id_next, 108 * sem_array.sem_pending{,last}, 109 * sem_array.sem_undo: sem_lock() for read/write 110 * sem_undo.proc_next: only "current" is allowed to read/write that field. 111 * 112 */ 113 114 int sem_ctls[4] = {SEMMSL, SEMMNS, SEMOPM, SEMMNI}; 115 #define sc_semmsl (sem_ctls[0]) 116 #define sc_semmns (sem_ctls[1]) 117 #define sc_semopm (sem_ctls[2]) 118 #define sc_semmni (sem_ctls[3]) 119 120 static int used_sems; 121 122 void __init sem_init (void) 123 { 124 used_sems = 0; 125 ipc_init_ids(&sem_ids,sc_semmni); 126 ipc_init_proc_interface("sysvipc/sem", 127 " key semid perms nsems uid gid cuid cgid otime ctime\n", 128 &sem_ids, 129 sysvipc_sem_proc_show); 130 } 131 132 /* 133 * Lockless wakeup algorithm: 134 * Without the check/retry algorithm a lockless wakeup is possible: 135 * - queue.status is initialized to -EINTR before blocking. 136 * - wakeup is performed by 137 * * unlinking the queue entry from sma->sem_pending 138 * * setting queue.status to IN_WAKEUP 139 * This is the notification for the blocked thread that a 140 * result value is imminent. 141 * * call wake_up_process 142 * * set queue.status to the final value. 143 * - the previously blocked thread checks queue.status: 144 * * if it's IN_WAKEUP, then it must wait until the value changes 145 * * if it's not -EINTR, then the operation was completed by 146 * update_queue. semtimedop can return queue.status without 147 * performing any operation on the sem array. 148 * * otherwise it must acquire the spinlock and check what's up. 149 * 150 * The two-stage algorithm is necessary to protect against the following 151 * races: 152 * - if queue.status is set after wake_up_process, then the woken up idle 153 * thread could race forward and try (and fail) to acquire sma->lock 154 * before update_queue had a chance to set queue.status 155 * - if queue.status is written before wake_up_process and if the 156 * blocked process is woken up by a signal between writing 157 * queue.status and the wake_up_process, then the woken up 158 * process could return from semtimedop and die by calling 159 * sys_exit before wake_up_process is called. Then wake_up_process 160 * will oops, because the task structure is already invalid. 161 * (yes, this happened on s390 with sysv msg). 162 * 163 */ 164 #define IN_WAKEUP 1 165 166 static int newary (key_t key, int nsems, int semflg) 167 { 168 int id; 169 int retval; 170 struct sem_array *sma; 171 int size; 172 173 if (!nsems) 174 return -EINVAL; 175 if (used_sems + nsems > sc_semmns) 176 return -ENOSPC; 177 178 size = sizeof (*sma) + nsems * sizeof (struct sem); 179 sma = ipc_rcu_alloc(size); 180 if (!sma) { 181 return -ENOMEM; 182 } 183 memset (sma, 0, size); 184 185 sma->sem_perm.mode = (semflg & S_IRWXUGO); 186 sma->sem_perm.key = key; 187 188 sma->sem_perm.security = NULL; 189 retval = security_sem_alloc(sma); 190 if (retval) { 191 ipc_rcu_putref(sma); 192 return retval; 193 } 194 195 id = ipc_addid(&sem_ids, &sma->sem_perm, sc_semmni); 196 if(id == -1) { 197 security_sem_free(sma); 198 ipc_rcu_putref(sma); 199 return -ENOSPC; 200 } 201 used_sems += nsems; 202 203 sma->sem_id = sem_buildid(id, sma->sem_perm.seq); 204 sma->sem_base = (struct sem *) &sma[1]; 205 /* sma->sem_pending = NULL; */ 206 sma->sem_pending_last = &sma->sem_pending; 207 /* sma->undo = NULL; */ 208 sma->sem_nsems = nsems; 209 sma->sem_ctime = get_seconds(); 210 sem_unlock(sma); 211 212 return sma->sem_id; 213 } 214 215 asmlinkage long sys_semget (key_t key, int nsems, int semflg) 216 { 217 int id, err = -EINVAL; 218 struct sem_array *sma; 219 220 if (nsems < 0 || nsems > sc_semmsl) 221 return -EINVAL; 222 mutex_lock(&sem_ids.mutex); 223 224 if (key == IPC_PRIVATE) { 225 err = newary(key, nsems, semflg); 226 } else if ((id = ipc_findkey(&sem_ids, key)) == -1) { /* key not used */ 227 if (!(semflg & IPC_CREAT)) 228 err = -ENOENT; 229 else 230 err = newary(key, nsems, semflg); 231 } else if (semflg & IPC_CREAT && semflg & IPC_EXCL) { 232 err = -EEXIST; 233 } else { 234 sma = sem_lock(id); 235 BUG_ON(sma==NULL); 236 if (nsems > sma->sem_nsems) 237 err = -EINVAL; 238 else if (ipcperms(&sma->sem_perm, semflg)) 239 err = -EACCES; 240 else { 241 int semid = sem_buildid(id, sma->sem_perm.seq); 242 err = security_sem_associate(sma, semflg); 243 if (!err) 244 err = semid; 245 } 246 sem_unlock(sma); 247 } 248 249 mutex_unlock(&sem_ids.mutex); 250 return err; 251 } 252 253 /* Manage the doubly linked list sma->sem_pending as a FIFO: 254 * insert new queue elements at the tail sma->sem_pending_last. 255 */ 256 static inline void append_to_queue (struct sem_array * sma, 257 struct sem_queue * q) 258 { 259 *(q->prev = sma->sem_pending_last) = q; 260 *(sma->sem_pending_last = &q->next) = NULL; 261 } 262 263 static inline void prepend_to_queue (struct sem_array * sma, 264 struct sem_queue * q) 265 { 266 q->next = sma->sem_pending; 267 *(q->prev = &sma->sem_pending) = q; 268 if (q->next) 269 q->next->prev = &q->next; 270 else /* sma->sem_pending_last == &sma->sem_pending */ 271 sma->sem_pending_last = &q->next; 272 } 273 274 static inline void remove_from_queue (struct sem_array * sma, 275 struct sem_queue * q) 276 { 277 *(q->prev) = q->next; 278 if (q->next) 279 q->next->prev = q->prev; 280 else /* sma->sem_pending_last == &q->next */ 281 sma->sem_pending_last = q->prev; 282 q->prev = NULL; /* mark as removed */ 283 } 284 285 /* 286 * Determine whether a sequence of semaphore operations would succeed 287 * all at once. Return 0 if yes, 1 if need to sleep, else return error code. 288 */ 289 290 static int try_atomic_semop (struct sem_array * sma, struct sembuf * sops, 291 int nsops, struct sem_undo *un, int pid) 292 { 293 int result, sem_op; 294 struct sembuf *sop; 295 struct sem * curr; 296 297 for (sop = sops; sop < sops + nsops; sop++) { 298 curr = sma->sem_base + sop->sem_num; 299 sem_op = sop->sem_op; 300 result = curr->semval; 301 302 if (!sem_op && result) 303 goto would_block; 304 305 result += sem_op; 306 if (result < 0) 307 goto would_block; 308 if (result > SEMVMX) 309 goto out_of_range; 310 if (sop->sem_flg & SEM_UNDO) { 311 int undo = un->semadj[sop->sem_num] - sem_op; 312 /* 313 * Exceeding the undo range is an error. 314 */ 315 if (undo < (-SEMAEM - 1) || undo > SEMAEM) 316 goto out_of_range; 317 } 318 curr->semval = result; 319 } 320 321 sop--; 322 while (sop >= sops) { 323 sma->sem_base[sop->sem_num].sempid = pid; 324 if (sop->sem_flg & SEM_UNDO) 325 un->semadj[sop->sem_num] -= sop->sem_op; 326 sop--; 327 } 328 329 sma->sem_otime = get_seconds(); 330 return 0; 331 332 out_of_range: 333 result = -ERANGE; 334 goto undo; 335 336 would_block: 337 if (sop->sem_flg & IPC_NOWAIT) 338 result = -EAGAIN; 339 else 340 result = 1; 341 342 undo: 343 sop--; 344 while (sop >= sops) { 345 sma->sem_base[sop->sem_num].semval -= sop->sem_op; 346 sop--; 347 } 348 349 return result; 350 } 351 352 /* Go through the pending queue for the indicated semaphore 353 * looking for tasks that can be completed. 354 */ 355 static void update_queue (struct sem_array * sma) 356 { 357 int error; 358 struct sem_queue * q; 359 360 q = sma->sem_pending; 361 while(q) { 362 error = try_atomic_semop(sma, q->sops, q->nsops, 363 q->undo, q->pid); 364 365 /* Does q->sleeper still need to sleep? */ 366 if (error <= 0) { 367 struct sem_queue *n; 368 remove_from_queue(sma,q); 369 q->status = IN_WAKEUP; 370 /* 371 * Continue scanning. The next operation 372 * that must be checked depends on the type of the 373 * completed operation: 374 * - if the operation modified the array, then 375 * restart from the head of the queue and 376 * check for threads that might be waiting 377 * for semaphore values to become 0. 378 * - if the operation didn't modify the array, 379 * then just continue. 380 */ 381 if (q->alter) 382 n = sma->sem_pending; 383 else 384 n = q->next; 385 wake_up_process(q->sleeper); 386 /* hands-off: q will disappear immediately after 387 * writing q->status. 388 */ 389 smp_wmb(); 390 q->status = error; 391 q = n; 392 } else { 393 q = q->next; 394 } 395 } 396 } 397 398 /* The following counts are associated to each semaphore: 399 * semncnt number of tasks waiting on semval being nonzero 400 * semzcnt number of tasks waiting on semval being zero 401 * This model assumes that a task waits on exactly one semaphore. 402 * Since semaphore operations are to be performed atomically, tasks actually 403 * wait on a whole sequence of semaphores simultaneously. 404 * The counts we return here are a rough approximation, but still 405 * warrant that semncnt+semzcnt>0 if the task is on the pending queue. 406 */ 407 static int count_semncnt (struct sem_array * sma, ushort semnum) 408 { 409 int semncnt; 410 struct sem_queue * q; 411 412 semncnt = 0; 413 for (q = sma->sem_pending; q; q = q->next) { 414 struct sembuf * sops = q->sops; 415 int nsops = q->nsops; 416 int i; 417 for (i = 0; i < nsops; i++) 418 if (sops[i].sem_num == semnum 419 && (sops[i].sem_op < 0) 420 && !(sops[i].sem_flg & IPC_NOWAIT)) 421 semncnt++; 422 } 423 return semncnt; 424 } 425 static int count_semzcnt (struct sem_array * sma, ushort semnum) 426 { 427 int semzcnt; 428 struct sem_queue * q; 429 430 semzcnt = 0; 431 for (q = sma->sem_pending; q; q = q->next) { 432 struct sembuf * sops = q->sops; 433 int nsops = q->nsops; 434 int i; 435 for (i = 0; i < nsops; i++) 436 if (sops[i].sem_num == semnum 437 && (sops[i].sem_op == 0) 438 && !(sops[i].sem_flg & IPC_NOWAIT)) 439 semzcnt++; 440 } 441 return semzcnt; 442 } 443 444 /* Free a semaphore set. freeary() is called with sem_ids.mutex locked and 445 * the spinlock for this semaphore set hold. sem_ids.mutex remains locked 446 * on exit. 447 */ 448 static void freeary (struct sem_array *sma, int id) 449 { 450 struct sem_undo *un; 451 struct sem_queue *q; 452 int size; 453 454 /* Invalidate the existing undo structures for this semaphore set. 455 * (They will be freed without any further action in exit_sem() 456 * or during the next semop.) 457 */ 458 for (un = sma->undo; un; un = un->id_next) 459 un->semid = -1; 460 461 /* Wake up all pending processes and let them fail with EIDRM. */ 462 q = sma->sem_pending; 463 while(q) { 464 struct sem_queue *n; 465 /* lazy remove_from_queue: we are killing the whole queue */ 466 q->prev = NULL; 467 n = q->next; 468 q->status = IN_WAKEUP; 469 wake_up_process(q->sleeper); /* doesn't sleep */ 470 smp_wmb(); 471 q->status = -EIDRM; /* hands-off q */ 472 q = n; 473 } 474 475 /* Remove the semaphore set from the ID array*/ 476 sma = sem_rmid(id); 477 sem_unlock(sma); 478 479 used_sems -= sma->sem_nsems; 480 size = sizeof (*sma) + sma->sem_nsems * sizeof (struct sem); 481 security_sem_free(sma); 482 ipc_rcu_putref(sma); 483 } 484 485 static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) 486 { 487 switch(version) { 488 case IPC_64: 489 return copy_to_user(buf, in, sizeof(*in)); 490 case IPC_OLD: 491 { 492 struct semid_ds out; 493 494 ipc64_perm_to_ipc_perm(&in->sem_perm, &out.sem_perm); 495 496 out.sem_otime = in->sem_otime; 497 out.sem_ctime = in->sem_ctime; 498 out.sem_nsems = in->sem_nsems; 499 500 return copy_to_user(buf, &out, sizeof(out)); 501 } 502 default: 503 return -EINVAL; 504 } 505 } 506 507 static int semctl_nolock(int semid, int semnum, int cmd, int version, union semun arg) 508 { 509 int err = -EINVAL; 510 struct sem_array *sma; 511 512 switch(cmd) { 513 case IPC_INFO: 514 case SEM_INFO: 515 { 516 struct seminfo seminfo; 517 int max_id; 518 519 err = security_sem_semctl(NULL, cmd); 520 if (err) 521 return err; 522 523 memset(&seminfo,0,sizeof(seminfo)); 524 seminfo.semmni = sc_semmni; 525 seminfo.semmns = sc_semmns; 526 seminfo.semmsl = sc_semmsl; 527 seminfo.semopm = sc_semopm; 528 seminfo.semvmx = SEMVMX; 529 seminfo.semmnu = SEMMNU; 530 seminfo.semmap = SEMMAP; 531 seminfo.semume = SEMUME; 532 mutex_lock(&sem_ids.mutex); 533 if (cmd == SEM_INFO) { 534 seminfo.semusz = sem_ids.in_use; 535 seminfo.semaem = used_sems; 536 } else { 537 seminfo.semusz = SEMUSZ; 538 seminfo.semaem = SEMAEM; 539 } 540 max_id = sem_ids.max_id; 541 mutex_unlock(&sem_ids.mutex); 542 if (copy_to_user (arg.__buf, &seminfo, sizeof(struct seminfo))) 543 return -EFAULT; 544 return (max_id < 0) ? 0: max_id; 545 } 546 case SEM_STAT: 547 { 548 struct semid64_ds tbuf; 549 int id; 550 551 if(semid >= sem_ids.entries->size) 552 return -EINVAL; 553 554 memset(&tbuf,0,sizeof(tbuf)); 555 556 sma = sem_lock(semid); 557 if(sma == NULL) 558 return -EINVAL; 559 560 err = -EACCES; 561 if (ipcperms (&sma->sem_perm, S_IRUGO)) 562 goto out_unlock; 563 564 err = security_sem_semctl(sma, cmd); 565 if (err) 566 goto out_unlock; 567 568 id = sem_buildid(semid, sma->sem_perm.seq); 569 570 kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm); 571 tbuf.sem_otime = sma->sem_otime; 572 tbuf.sem_ctime = sma->sem_ctime; 573 tbuf.sem_nsems = sma->sem_nsems; 574 sem_unlock(sma); 575 if (copy_semid_to_user (arg.buf, &tbuf, version)) 576 return -EFAULT; 577 return id; 578 } 579 default: 580 return -EINVAL; 581 } 582 return err; 583 out_unlock: 584 sem_unlock(sma); 585 return err; 586 } 587 588 static int semctl_main(int semid, int semnum, int cmd, int version, union semun arg) 589 { 590 struct sem_array *sma; 591 struct sem* curr; 592 int err; 593 ushort fast_sem_io[SEMMSL_FAST]; 594 ushort* sem_io = fast_sem_io; 595 int nsems; 596 597 sma = sem_lock(semid); 598 if(sma==NULL) 599 return -EINVAL; 600 601 nsems = sma->sem_nsems; 602 603 err=-EIDRM; 604 if (sem_checkid(sma,semid)) 605 goto out_unlock; 606 607 err = -EACCES; 608 if (ipcperms (&sma->sem_perm, (cmd==SETVAL||cmd==SETALL)?S_IWUGO:S_IRUGO)) 609 goto out_unlock; 610 611 err = security_sem_semctl(sma, cmd); 612 if (err) 613 goto out_unlock; 614 615 err = -EACCES; 616 switch (cmd) { 617 case GETALL: 618 { 619 ushort __user *array = arg.array; 620 int i; 621 622 if(nsems > SEMMSL_FAST) { 623 ipc_rcu_getref(sma); 624 sem_unlock(sma); 625 626 sem_io = ipc_alloc(sizeof(ushort)*nsems); 627 if(sem_io == NULL) { 628 ipc_lock_by_ptr(&sma->sem_perm); 629 ipc_rcu_putref(sma); 630 sem_unlock(sma); 631 return -ENOMEM; 632 } 633 634 ipc_lock_by_ptr(&sma->sem_perm); 635 ipc_rcu_putref(sma); 636 if (sma->sem_perm.deleted) { 637 sem_unlock(sma); 638 err = -EIDRM; 639 goto out_free; 640 } 641 } 642 643 for (i = 0; i < sma->sem_nsems; i++) 644 sem_io[i] = sma->sem_base[i].semval; 645 sem_unlock(sma); 646 err = 0; 647 if(copy_to_user(array, sem_io, nsems*sizeof(ushort))) 648 err = -EFAULT; 649 goto out_free; 650 } 651 case SETALL: 652 { 653 int i; 654 struct sem_undo *un; 655 656 ipc_rcu_getref(sma); 657 sem_unlock(sma); 658 659 if(nsems > SEMMSL_FAST) { 660 sem_io = ipc_alloc(sizeof(ushort)*nsems); 661 if(sem_io == NULL) { 662 ipc_lock_by_ptr(&sma->sem_perm); 663 ipc_rcu_putref(sma); 664 sem_unlock(sma); 665 return -ENOMEM; 666 } 667 } 668 669 if (copy_from_user (sem_io, arg.array, nsems*sizeof(ushort))) { 670 ipc_lock_by_ptr(&sma->sem_perm); 671 ipc_rcu_putref(sma); 672 sem_unlock(sma); 673 err = -EFAULT; 674 goto out_free; 675 } 676 677 for (i = 0; i < nsems; i++) { 678 if (sem_io[i] > SEMVMX) { 679 ipc_lock_by_ptr(&sma->sem_perm); 680 ipc_rcu_putref(sma); 681 sem_unlock(sma); 682 err = -ERANGE; 683 goto out_free; 684 } 685 } 686 ipc_lock_by_ptr(&sma->sem_perm); 687 ipc_rcu_putref(sma); 688 if (sma->sem_perm.deleted) { 689 sem_unlock(sma); 690 err = -EIDRM; 691 goto out_free; 692 } 693 694 for (i = 0; i < nsems; i++) 695 sma->sem_base[i].semval = sem_io[i]; 696 for (un = sma->undo; un; un = un->id_next) 697 for (i = 0; i < nsems; i++) 698 un->semadj[i] = 0; 699 sma->sem_ctime = get_seconds(); 700 /* maybe some queued-up processes were waiting for this */ 701 update_queue(sma); 702 err = 0; 703 goto out_unlock; 704 } 705 case IPC_STAT: 706 { 707 struct semid64_ds tbuf; 708 memset(&tbuf,0,sizeof(tbuf)); 709 kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm); 710 tbuf.sem_otime = sma->sem_otime; 711 tbuf.sem_ctime = sma->sem_ctime; 712 tbuf.sem_nsems = sma->sem_nsems; 713 sem_unlock(sma); 714 if (copy_semid_to_user (arg.buf, &tbuf, version)) 715 return -EFAULT; 716 return 0; 717 } 718 /* GETVAL, GETPID, GETNCTN, GETZCNT, SETVAL: fall-through */ 719 } 720 err = -EINVAL; 721 if(semnum < 0 || semnum >= nsems) 722 goto out_unlock; 723 724 curr = &sma->sem_base[semnum]; 725 726 switch (cmd) { 727 case GETVAL: 728 err = curr->semval; 729 goto out_unlock; 730 case GETPID: 731 err = curr->sempid; 732 goto out_unlock; 733 case GETNCNT: 734 err = count_semncnt(sma,semnum); 735 goto out_unlock; 736 case GETZCNT: 737 err = count_semzcnt(sma,semnum); 738 goto out_unlock; 739 case SETVAL: 740 { 741 int val = arg.val; 742 struct sem_undo *un; 743 err = -ERANGE; 744 if (val > SEMVMX || val < 0) 745 goto out_unlock; 746 747 for (un = sma->undo; un; un = un->id_next) 748 un->semadj[semnum] = 0; 749 curr->semval = val; 750 curr->sempid = current->tgid; 751 sma->sem_ctime = get_seconds(); 752 /* maybe some queued-up processes were waiting for this */ 753 update_queue(sma); 754 err = 0; 755 goto out_unlock; 756 } 757 } 758 out_unlock: 759 sem_unlock(sma); 760 out_free: 761 if(sem_io != fast_sem_io) 762 ipc_free(sem_io, sizeof(ushort)*nsems); 763 return err; 764 } 765 766 struct sem_setbuf { 767 uid_t uid; 768 gid_t gid; 769 mode_t mode; 770 }; 771 772 static inline unsigned long copy_semid_from_user(struct sem_setbuf *out, void __user *buf, int version) 773 { 774 switch(version) { 775 case IPC_64: 776 { 777 struct semid64_ds tbuf; 778 779 if(copy_from_user(&tbuf, buf, sizeof(tbuf))) 780 return -EFAULT; 781 782 out->uid = tbuf.sem_perm.uid; 783 out->gid = tbuf.sem_perm.gid; 784 out->mode = tbuf.sem_perm.mode; 785 786 return 0; 787 } 788 case IPC_OLD: 789 { 790 struct semid_ds tbuf_old; 791 792 if(copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 793 return -EFAULT; 794 795 out->uid = tbuf_old.sem_perm.uid; 796 out->gid = tbuf_old.sem_perm.gid; 797 out->mode = tbuf_old.sem_perm.mode; 798 799 return 0; 800 } 801 default: 802 return -EINVAL; 803 } 804 } 805 806 static int semctl_down(int semid, int semnum, int cmd, int version, union semun arg) 807 { 808 struct sem_array *sma; 809 int err; 810 struct sem_setbuf setbuf; 811 struct kern_ipc_perm *ipcp; 812 813 if(cmd == IPC_SET) { 814 if(copy_semid_from_user (&setbuf, arg.buf, version)) 815 return -EFAULT; 816 } 817 sma = sem_lock(semid); 818 if(sma==NULL) 819 return -EINVAL; 820 821 if (sem_checkid(sma,semid)) { 822 err=-EIDRM; 823 goto out_unlock; 824 } 825 ipcp = &sma->sem_perm; 826 827 err = audit_ipc_obj(ipcp); 828 if (err) 829 goto out_unlock; 830 831 if (cmd == IPC_SET) { 832 err = audit_ipc_set_perm(0, setbuf.uid, setbuf.gid, setbuf.mode); 833 if (err) 834 goto out_unlock; 835 } 836 if (current->euid != ipcp->cuid && 837 current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN)) { 838 err=-EPERM; 839 goto out_unlock; 840 } 841 842 err = security_sem_semctl(sma, cmd); 843 if (err) 844 goto out_unlock; 845 846 switch(cmd){ 847 case IPC_RMID: 848 freeary(sma, semid); 849 err = 0; 850 break; 851 case IPC_SET: 852 ipcp->uid = setbuf.uid; 853 ipcp->gid = setbuf.gid; 854 ipcp->mode = (ipcp->mode & ~S_IRWXUGO) 855 | (setbuf.mode & S_IRWXUGO); 856 sma->sem_ctime = get_seconds(); 857 sem_unlock(sma); 858 err = 0; 859 break; 860 default: 861 sem_unlock(sma); 862 err = -EINVAL; 863 break; 864 } 865 return err; 866 867 out_unlock: 868 sem_unlock(sma); 869 return err; 870 } 871 872 asmlinkage long sys_semctl (int semid, int semnum, int cmd, union semun arg) 873 { 874 int err = -EINVAL; 875 int version; 876 877 if (semid < 0) 878 return -EINVAL; 879 880 version = ipc_parse_version(&cmd); 881 882 switch(cmd) { 883 case IPC_INFO: 884 case SEM_INFO: 885 case SEM_STAT: 886 err = semctl_nolock(semid,semnum,cmd,version,arg); 887 return err; 888 case GETALL: 889 case GETVAL: 890 case GETPID: 891 case GETNCNT: 892 case GETZCNT: 893 case IPC_STAT: 894 case SETVAL: 895 case SETALL: 896 err = semctl_main(semid,semnum,cmd,version,arg); 897 return err; 898 case IPC_RMID: 899 case IPC_SET: 900 mutex_lock(&sem_ids.mutex); 901 err = semctl_down(semid,semnum,cmd,version,arg); 902 mutex_unlock(&sem_ids.mutex); 903 return err; 904 default: 905 return -EINVAL; 906 } 907 } 908 909 static inline void lock_semundo(void) 910 { 911 struct sem_undo_list *undo_list; 912 913 undo_list = current->sysvsem.undo_list; 914 if (undo_list) 915 spin_lock(&undo_list->lock); 916 } 917 918 /* This code has an interaction with copy_semundo(). 919 * Consider; two tasks are sharing the undo_list. task1 920 * acquires the undo_list lock in lock_semundo(). If task2 now 921 * exits before task1 releases the lock (by calling 922 * unlock_semundo()), then task1 will never call spin_unlock(). 923 * This leave the sem_undo_list in a locked state. If task1 now creats task3 924 * and once again shares the sem_undo_list, the sem_undo_list will still be 925 * locked, and future SEM_UNDO operations will deadlock. This case is 926 * dealt with in copy_semundo() by having it reinitialize the spin lock when 927 * the refcnt goes from 1 to 2. 928 */ 929 static inline void unlock_semundo(void) 930 { 931 struct sem_undo_list *undo_list; 932 933 undo_list = current->sysvsem.undo_list; 934 if (undo_list) 935 spin_unlock(&undo_list->lock); 936 } 937 938 939 /* If the task doesn't already have a undo_list, then allocate one 940 * here. We guarantee there is only one thread using this undo list, 941 * and current is THE ONE 942 * 943 * If this allocation and assignment succeeds, but later 944 * portions of this code fail, there is no need to free the sem_undo_list. 945 * Just let it stay associated with the task, and it'll be freed later 946 * at exit time. 947 * 948 * This can block, so callers must hold no locks. 949 */ 950 static inline int get_undo_list(struct sem_undo_list **undo_listp) 951 { 952 struct sem_undo_list *undo_list; 953 int size; 954 955 undo_list = current->sysvsem.undo_list; 956 if (!undo_list) { 957 size = sizeof(struct sem_undo_list); 958 undo_list = (struct sem_undo_list *) kmalloc(size, GFP_KERNEL); 959 if (undo_list == NULL) 960 return -ENOMEM; 961 memset(undo_list, 0, size); 962 spin_lock_init(&undo_list->lock); 963 atomic_set(&undo_list->refcnt, 1); 964 current->sysvsem.undo_list = undo_list; 965 } 966 *undo_listp = undo_list; 967 return 0; 968 } 969 970 static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid) 971 { 972 struct sem_undo **last, *un; 973 974 last = &ulp->proc_list; 975 un = *last; 976 while(un != NULL) { 977 if(un->semid==semid) 978 break; 979 if(un->semid==-1) { 980 *last=un->proc_next; 981 kfree(un); 982 } else { 983 last=&un->proc_next; 984 } 985 un=*last; 986 } 987 return un; 988 } 989 990 static struct sem_undo *find_undo(int semid) 991 { 992 struct sem_array *sma; 993 struct sem_undo_list *ulp; 994 struct sem_undo *un, *new; 995 int nsems; 996 int error; 997 998 error = get_undo_list(&ulp); 999 if (error) 1000 return ERR_PTR(error); 1001 1002 lock_semundo(); 1003 un = lookup_undo(ulp, semid); 1004 unlock_semundo(); 1005 if (likely(un!=NULL)) 1006 goto out; 1007 1008 /* no undo structure around - allocate one. */ 1009 sma = sem_lock(semid); 1010 un = ERR_PTR(-EINVAL); 1011 if(sma==NULL) 1012 goto out; 1013 un = ERR_PTR(-EIDRM); 1014 if (sem_checkid(sma,semid)) { 1015 sem_unlock(sma); 1016 goto out; 1017 } 1018 nsems = sma->sem_nsems; 1019 ipc_rcu_getref(sma); 1020 sem_unlock(sma); 1021 1022 new = (struct sem_undo *) kmalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); 1023 if (!new) { 1024 ipc_lock_by_ptr(&sma->sem_perm); 1025 ipc_rcu_putref(sma); 1026 sem_unlock(sma); 1027 return ERR_PTR(-ENOMEM); 1028 } 1029 memset(new, 0, sizeof(struct sem_undo) + sizeof(short)*nsems); 1030 new->semadj = (short *) &new[1]; 1031 new->semid = semid; 1032 1033 lock_semundo(); 1034 un = lookup_undo(ulp, semid); 1035 if (un) { 1036 unlock_semundo(); 1037 kfree(new); 1038 ipc_lock_by_ptr(&sma->sem_perm); 1039 ipc_rcu_putref(sma); 1040 sem_unlock(sma); 1041 goto out; 1042 } 1043 ipc_lock_by_ptr(&sma->sem_perm); 1044 ipc_rcu_putref(sma); 1045 if (sma->sem_perm.deleted) { 1046 sem_unlock(sma); 1047 unlock_semundo(); 1048 kfree(new); 1049 un = ERR_PTR(-EIDRM); 1050 goto out; 1051 } 1052 new->proc_next = ulp->proc_list; 1053 ulp->proc_list = new; 1054 new->id_next = sma->undo; 1055 sma->undo = new; 1056 sem_unlock(sma); 1057 un = new; 1058 unlock_semundo(); 1059 out: 1060 return un; 1061 } 1062 1063 asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops, 1064 unsigned nsops, const struct timespec __user *timeout) 1065 { 1066 int error = -EINVAL; 1067 struct sem_array *sma; 1068 struct sembuf fast_sops[SEMOPM_FAST]; 1069 struct sembuf* sops = fast_sops, *sop; 1070 struct sem_undo *un; 1071 int undos = 0, alter = 0, max; 1072 struct sem_queue queue; 1073 unsigned long jiffies_left = 0; 1074 1075 if (nsops < 1 || semid < 0) 1076 return -EINVAL; 1077 if (nsops > sc_semopm) 1078 return -E2BIG; 1079 if(nsops > SEMOPM_FAST) { 1080 sops = kmalloc(sizeof(*sops)*nsops,GFP_KERNEL); 1081 if(sops==NULL) 1082 return -ENOMEM; 1083 } 1084 if (copy_from_user (sops, tsops, nsops * sizeof(*tsops))) { 1085 error=-EFAULT; 1086 goto out_free; 1087 } 1088 if (timeout) { 1089 struct timespec _timeout; 1090 if (copy_from_user(&_timeout, timeout, sizeof(*timeout))) { 1091 error = -EFAULT; 1092 goto out_free; 1093 } 1094 if (_timeout.tv_sec < 0 || _timeout.tv_nsec < 0 || 1095 _timeout.tv_nsec >= 1000000000L) { 1096 error = -EINVAL; 1097 goto out_free; 1098 } 1099 jiffies_left = timespec_to_jiffies(&_timeout); 1100 } 1101 max = 0; 1102 for (sop = sops; sop < sops + nsops; sop++) { 1103 if (sop->sem_num >= max) 1104 max = sop->sem_num; 1105 if (sop->sem_flg & SEM_UNDO) 1106 undos = 1; 1107 if (sop->sem_op != 0) 1108 alter = 1; 1109 } 1110 1111 retry_undos: 1112 if (undos) { 1113 un = find_undo(semid); 1114 if (IS_ERR(un)) { 1115 error = PTR_ERR(un); 1116 goto out_free; 1117 } 1118 } else 1119 un = NULL; 1120 1121 sma = sem_lock(semid); 1122 error=-EINVAL; 1123 if(sma==NULL) 1124 goto out_free; 1125 error = -EIDRM; 1126 if (sem_checkid(sma,semid)) 1127 goto out_unlock_free; 1128 /* 1129 * semid identifies are not unique - find_undo may have 1130 * allocated an undo structure, it was invalidated by an RMID 1131 * and now a new array with received the same id. Check and retry. 1132 */ 1133 if (un && un->semid == -1) { 1134 sem_unlock(sma); 1135 goto retry_undos; 1136 } 1137 error = -EFBIG; 1138 if (max >= sma->sem_nsems) 1139 goto out_unlock_free; 1140 1141 error = -EACCES; 1142 if (ipcperms(&sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) 1143 goto out_unlock_free; 1144 1145 error = security_sem_semop(sma, sops, nsops, alter); 1146 if (error) 1147 goto out_unlock_free; 1148 1149 error = try_atomic_semop (sma, sops, nsops, un, current->tgid); 1150 if (error <= 0) { 1151 if (alter && error == 0) 1152 update_queue (sma); 1153 goto out_unlock_free; 1154 } 1155 1156 /* We need to sleep on this operation, so we put the current 1157 * task into the pending queue and go to sleep. 1158 */ 1159 1160 queue.sma = sma; 1161 queue.sops = sops; 1162 queue.nsops = nsops; 1163 queue.undo = un; 1164 queue.pid = current->tgid; 1165 queue.id = semid; 1166 queue.alter = alter; 1167 if (alter) 1168 append_to_queue(sma ,&queue); 1169 else 1170 prepend_to_queue(sma ,&queue); 1171 1172 queue.status = -EINTR; 1173 queue.sleeper = current; 1174 current->state = TASK_INTERRUPTIBLE; 1175 sem_unlock(sma); 1176 1177 if (timeout) 1178 jiffies_left = schedule_timeout(jiffies_left); 1179 else 1180 schedule(); 1181 1182 error = queue.status; 1183 while(unlikely(error == IN_WAKEUP)) { 1184 cpu_relax(); 1185 error = queue.status; 1186 } 1187 1188 if (error != -EINTR) { 1189 /* fast path: update_queue already obtained all requested 1190 * resources */ 1191 goto out_free; 1192 } 1193 1194 sma = sem_lock(semid); 1195 if(sma==NULL) { 1196 BUG_ON(queue.prev != NULL); 1197 error = -EIDRM; 1198 goto out_free; 1199 } 1200 1201 /* 1202 * If queue.status != -EINTR we are woken up by another process 1203 */ 1204 error = queue.status; 1205 if (error != -EINTR) { 1206 goto out_unlock_free; 1207 } 1208 1209 /* 1210 * If an interrupt occurred we have to clean up the queue 1211 */ 1212 if (timeout && jiffies_left == 0) 1213 error = -EAGAIN; 1214 remove_from_queue(sma,&queue); 1215 goto out_unlock_free; 1216 1217 out_unlock_free: 1218 sem_unlock(sma); 1219 out_free: 1220 if(sops != fast_sops) 1221 kfree(sops); 1222 return error; 1223 } 1224 1225 asmlinkage long sys_semop (int semid, struct sembuf __user *tsops, unsigned nsops) 1226 { 1227 return sys_semtimedop(semid, tsops, nsops, NULL); 1228 } 1229 1230 /* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between 1231 * parent and child tasks. 1232 * 1233 * See the notes above unlock_semundo() regarding the spin_lock_init() 1234 * in this code. Initialize the undo_list->lock here instead of get_undo_list() 1235 * because of the reasoning in the comment above unlock_semundo. 1236 */ 1237 1238 int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) 1239 { 1240 struct sem_undo_list *undo_list; 1241 int error; 1242 1243 if (clone_flags & CLONE_SYSVSEM) { 1244 error = get_undo_list(&undo_list); 1245 if (error) 1246 return error; 1247 atomic_inc(&undo_list->refcnt); 1248 tsk->sysvsem.undo_list = undo_list; 1249 } else 1250 tsk->sysvsem.undo_list = NULL; 1251 1252 return 0; 1253 } 1254 1255 /* 1256 * add semadj values to semaphores, free undo structures. 1257 * undo structures are not freed when semaphore arrays are destroyed 1258 * so some of them may be out of date. 1259 * IMPLEMENTATION NOTE: There is some confusion over whether the 1260 * set of adjustments that needs to be done should be done in an atomic 1261 * manner or not. That is, if we are attempting to decrement the semval 1262 * should we queue up and wait until we can do so legally? 1263 * The original implementation attempted to do this (queue and wait). 1264 * The current implementation does not do so. The POSIX standard 1265 * and SVID should be consulted to determine what behavior is mandated. 1266 */ 1267 void exit_sem(struct task_struct *tsk) 1268 { 1269 struct sem_undo_list *undo_list; 1270 struct sem_undo *u, **up; 1271 1272 undo_list = tsk->sysvsem.undo_list; 1273 if (!undo_list) 1274 return; 1275 1276 if (!atomic_dec_and_test(&undo_list->refcnt)) 1277 return; 1278 1279 /* There's no need to hold the semundo list lock, as current 1280 * is the last task exiting for this undo list. 1281 */ 1282 for (up = &undo_list->proc_list; (u = *up); *up = u->proc_next, kfree(u)) { 1283 struct sem_array *sma; 1284 int nsems, i; 1285 struct sem_undo *un, **unp; 1286 int semid; 1287 1288 semid = u->semid; 1289 1290 if(semid == -1) 1291 continue; 1292 sma = sem_lock(semid); 1293 if (sma == NULL) 1294 continue; 1295 1296 if (u->semid == -1) 1297 goto next_entry; 1298 1299 BUG_ON(sem_checkid(sma,u->semid)); 1300 1301 /* remove u from the sma->undo list */ 1302 for (unp = &sma->undo; (un = *unp); unp = &un->id_next) { 1303 if (u == un) 1304 goto found; 1305 } 1306 printk ("exit_sem undo list error id=%d\n", u->semid); 1307 goto next_entry; 1308 found: 1309 *unp = un->id_next; 1310 /* perform adjustments registered in u */ 1311 nsems = sma->sem_nsems; 1312 for (i = 0; i < nsems; i++) { 1313 struct sem * semaphore = &sma->sem_base[i]; 1314 if (u->semadj[i]) { 1315 semaphore->semval += u->semadj[i]; 1316 /* 1317 * Range checks of the new semaphore value, 1318 * not defined by sus: 1319 * - Some unices ignore the undo entirely 1320 * (e.g. HP UX 11i 11.22, Tru64 V5.1) 1321 * - some cap the value (e.g. FreeBSD caps 1322 * at 0, but doesn't enforce SEMVMX) 1323 * 1324 * Linux caps the semaphore value, both at 0 1325 * and at SEMVMX. 1326 * 1327 * Manfred <manfred@colorfullife.com> 1328 */ 1329 if (semaphore->semval < 0) 1330 semaphore->semval = 0; 1331 if (semaphore->semval > SEMVMX) 1332 semaphore->semval = SEMVMX; 1333 semaphore->sempid = current->tgid; 1334 } 1335 } 1336 sma->sem_otime = get_seconds(); 1337 /* maybe some queued-up processes were waiting for this */ 1338 update_queue(sma); 1339 next_entry: 1340 sem_unlock(sma); 1341 } 1342 kfree(undo_list); 1343 } 1344 1345 #ifdef CONFIG_PROC_FS 1346 static int sysvipc_sem_proc_show(struct seq_file *s, void *it) 1347 { 1348 struct sem_array *sma = it; 1349 1350 return seq_printf(s, 1351 "%10d %10d %4o %10lu %5u %5u %5u %5u %10lu %10lu\n", 1352 sma->sem_perm.key, 1353 sma->sem_id, 1354 sma->sem_perm.mode, 1355 sma->sem_nsems, 1356 sma->sem_perm.uid, 1357 sma->sem_perm.gid, 1358 sma->sem_perm.cuid, 1359 sma->sem_perm.cgid, 1360 sma->sem_otime, 1361 sma->sem_ctime); 1362 } 1363 #endif 1364