1 /* 2 * linux/ipc/sem.c 3 * Copyright (C) 1992 Krishna Balasubramanian 4 * Copyright (C) 1995 Eric Schenk, Bruno Haible 5 * 6 * IMPLEMENTATION NOTES ON CODE REWRITE (Eric Schenk, January 1995): 7 * This code underwent a massive rewrite in order to solve some problems 8 * with the original code. In particular the original code failed to 9 * wake up processes that were waiting for semval to go to 0 if the 10 * value went to 0 and was then incremented rapidly enough. In solving 11 * this problem I have also modified the implementation so that it 12 * processes pending operations in a FIFO manner, thus give a guarantee 13 * that processes waiting for a lock on the semaphore won't starve 14 * unless another locking process fails to unlock. 15 * In addition the following two changes in behavior have been introduced: 16 * - The original implementation of semop returned the value 17 * last semaphore element examined on success. This does not 18 * match the manual page specifications, and effectively 19 * allows the user to read the semaphore even if they do not 20 * have read permissions. The implementation now returns 0 21 * on success as stated in the manual page. 22 * - There is some confusion over whether the set of undo adjustments 23 * to be performed at exit should be done in an atomic manner. 24 * That is, if we are attempting to decrement the semval should we queue 25 * up and wait until we can do so legally? 26 * The original implementation attempted to do this. 27 * The current implementation does not do so. This is because I don't 28 * think it is the right thing (TM) to do, and because I couldn't 29 * see a clean way to get the old behavior with the new design. 30 * The POSIX standard and SVID should be consulted to determine 31 * what behavior is mandated. 32 * 33 * Further notes on refinement (Christoph Rohland, December 1998): 34 * - The POSIX standard says, that the undo adjustments simply should 35 * redo. So the current implementation is o.K. 36 * - The previous code had two flaws: 37 * 1) It actively gave the semaphore to the next waiting process 38 * sleeping on the semaphore. Since this process did not have the 39 * cpu this led to many unnecessary context switches and bad 40 * performance. Now we only check which process should be able to 41 * get the semaphore and if this process wants to reduce some 42 * semaphore value we simply wake it up without doing the 43 * operation. So it has to try to get it later. Thus e.g. the 44 * running process may reacquire the semaphore during the current 45 * time slice. If it only waits for zero or increases the semaphore, 46 * we do the operation in advance and wake it up. 47 * 2) It did not wake up all zero waiting processes. We try to do 48 * better but only get the semops right which only wait for zero or 49 * increase. If there are decrement operations in the operations 50 * array we do the same as before. 51 * 52 * With the incarnation of O(1) scheduler, it becomes unnecessary to perform 53 * check/retry algorithm for waking up blocked processes as the new scheduler 54 * is better at handling thread switch than the old one. 55 * 56 * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com> 57 * 58 * SMP-threaded, sysctl's added 59 * (c) 1999 Manfred Spraul <manfred@colorfullife.com> 60 * Enforced range limit on SEM_UNDO 61 * (c) 2001 Red Hat Inc <alan@redhat.com> 62 * Lockless wakeup 63 * (c) 2003 Manfred Spraul <manfred@colorfullife.com> 64 * 65 * support for audit of ipc object properties and permission changes 66 * Dustin Kirkland <dustin.kirkland@us.ibm.com> 67 * 68 * namespaces support 69 * OpenVZ, SWsoft Inc. 70 * Pavel Emelianov <xemul@openvz.org> 71 */ 72 73 #include <linux/slab.h> 74 #include <linux/spinlock.h> 75 #include <linux/init.h> 76 #include <linux/proc_fs.h> 77 #include <linux/time.h> 78 #include <linux/security.h> 79 #include <linux/syscalls.h> 80 #include <linux/audit.h> 81 #include <linux/capability.h> 82 #include <linux/seq_file.h> 83 #include <linux/rwsem.h> 84 #include <linux/nsproxy.h> 85 86 #include <asm/uaccess.h> 87 #include "util.h" 88 89 #define sem_ids(ns) (*((ns)->ids[IPC_SEM_IDS])) 90 91 #define sem_unlock(sma) ipc_unlock(&(sma)->sem_perm) 92 #define sem_checkid(sma, semid) ipc_checkid(&sma->sem_perm, semid) 93 #define sem_buildid(id, seq) ipc_buildid(id, seq) 94 95 static struct ipc_ids init_sem_ids; 96 97 static int newary(struct ipc_namespace *, struct ipc_params *); 98 static void freeary(struct ipc_namespace *, struct sem_array *); 99 #ifdef CONFIG_PROC_FS 100 static int sysvipc_sem_proc_show(struct seq_file *s, void *it); 101 #endif 102 103 #define SEMMSL_FAST 256 /* 512 bytes on stack */ 104 #define SEMOPM_FAST 64 /* ~ 372 bytes on stack */ 105 106 /* 107 * linked list protection: 108 * sem_undo.id_next, 109 * sem_array.sem_pending{,last}, 110 * sem_array.sem_undo: sem_lock() for read/write 111 * sem_undo.proc_next: only "current" is allowed to read/write that field. 112 * 113 */ 114 115 #define sc_semmsl sem_ctls[0] 116 #define sc_semmns sem_ctls[1] 117 #define sc_semopm sem_ctls[2] 118 #define sc_semmni sem_ctls[3] 119 120 static void __sem_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids) 121 { 122 ns->ids[IPC_SEM_IDS] = ids; 123 ns->sc_semmsl = SEMMSL; 124 ns->sc_semmns = SEMMNS; 125 ns->sc_semopm = SEMOPM; 126 ns->sc_semmni = SEMMNI; 127 ns->used_sems = 0; 128 ipc_init_ids(ids); 129 } 130 131 int sem_init_ns(struct ipc_namespace *ns) 132 { 133 struct ipc_ids *ids; 134 135 ids = kmalloc(sizeof(struct ipc_ids), GFP_KERNEL); 136 if (ids == NULL) 137 return -ENOMEM; 138 139 __sem_init_ns(ns, ids); 140 return 0; 141 } 142 143 void sem_exit_ns(struct ipc_namespace *ns) 144 { 145 struct sem_array *sma; 146 int next_id; 147 int total, in_use; 148 149 down_write(&sem_ids(ns).rw_mutex); 150 151 in_use = sem_ids(ns).in_use; 152 153 for (total = 0, next_id = 0; total < in_use; next_id++) { 154 sma = idr_find(&sem_ids(ns).ipcs_idr, next_id); 155 if (sma == NULL) 156 continue; 157 ipc_lock_by_ptr(&sma->sem_perm); 158 freeary(ns, sma); 159 total++; 160 } 161 up_write(&sem_ids(ns).rw_mutex); 162 163 kfree(ns->ids[IPC_SEM_IDS]); 164 ns->ids[IPC_SEM_IDS] = NULL; 165 } 166 167 void __init sem_init (void) 168 { 169 __sem_init_ns(&init_ipc_ns, &init_sem_ids); 170 ipc_init_proc_interface("sysvipc/sem", 171 " key semid perms nsems uid gid cuid cgid otime ctime\n", 172 IPC_SEM_IDS, sysvipc_sem_proc_show); 173 } 174 175 /* 176 * This routine is called in the paths where the rw_mutex is held to protect 177 * access to the idr tree. 178 */ 179 static inline struct sem_array *sem_lock_check_down(struct ipc_namespace *ns, 180 int id) 181 { 182 struct kern_ipc_perm *ipcp = ipc_lock_check_down(&sem_ids(ns), id); 183 184 return container_of(ipcp, struct sem_array, sem_perm); 185 } 186 187 /* 188 * sem_lock_(check_) routines are called in the paths where the rw_mutex 189 * is not held. 190 */ 191 static inline struct sem_array *sem_lock(struct ipc_namespace *ns, int id) 192 { 193 struct kern_ipc_perm *ipcp = ipc_lock(&sem_ids(ns), id); 194 195 return container_of(ipcp, struct sem_array, sem_perm); 196 } 197 198 static inline struct sem_array *sem_lock_check(struct ipc_namespace *ns, 199 int id) 200 { 201 struct kern_ipc_perm *ipcp = ipc_lock_check(&sem_ids(ns), id); 202 203 return container_of(ipcp, struct sem_array, sem_perm); 204 } 205 206 static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) 207 { 208 ipc_rmid(&sem_ids(ns), &s->sem_perm); 209 } 210 211 /* 212 * Lockless wakeup algorithm: 213 * Without the check/retry algorithm a lockless wakeup is possible: 214 * - queue.status is initialized to -EINTR before blocking. 215 * - wakeup is performed by 216 * * unlinking the queue entry from sma->sem_pending 217 * * setting queue.status to IN_WAKEUP 218 * This is the notification for the blocked thread that a 219 * result value is imminent. 220 * * call wake_up_process 221 * * set queue.status to the final value. 222 * - the previously blocked thread checks queue.status: 223 * * if it's IN_WAKEUP, then it must wait until the value changes 224 * * if it's not -EINTR, then the operation was completed by 225 * update_queue. semtimedop can return queue.status without 226 * performing any operation on the sem array. 227 * * otherwise it must acquire the spinlock and check what's up. 228 * 229 * The two-stage algorithm is necessary to protect against the following 230 * races: 231 * - if queue.status is set after wake_up_process, then the woken up idle 232 * thread could race forward and try (and fail) to acquire sma->lock 233 * before update_queue had a chance to set queue.status 234 * - if queue.status is written before wake_up_process and if the 235 * blocked process is woken up by a signal between writing 236 * queue.status and the wake_up_process, then the woken up 237 * process could return from semtimedop and die by calling 238 * sys_exit before wake_up_process is called. Then wake_up_process 239 * will oops, because the task structure is already invalid. 240 * (yes, this happened on s390 with sysv msg). 241 * 242 */ 243 #define IN_WAKEUP 1 244 245 /** 246 * newary - Create a new semaphore set 247 * @ns: namespace 248 * @params: ptr to the structure that contains key, semflg and nsems 249 * 250 * Called with sem_ids.rw_mutex held (as a writer) 251 */ 252 253 static int newary(struct ipc_namespace *ns, struct ipc_params *params) 254 { 255 int id; 256 int retval; 257 struct sem_array *sma; 258 int size; 259 key_t key = params->key; 260 int nsems = params->u.nsems; 261 int semflg = params->flg; 262 263 if (!nsems) 264 return -EINVAL; 265 if (ns->used_sems + nsems > ns->sc_semmns) 266 return -ENOSPC; 267 268 size = sizeof (*sma) + nsems * sizeof (struct sem); 269 sma = ipc_rcu_alloc(size); 270 if (!sma) { 271 return -ENOMEM; 272 } 273 memset (sma, 0, size); 274 275 sma->sem_perm.mode = (semflg & S_IRWXUGO); 276 sma->sem_perm.key = key; 277 278 sma->sem_perm.security = NULL; 279 retval = security_sem_alloc(sma); 280 if (retval) { 281 ipc_rcu_putref(sma); 282 return retval; 283 } 284 285 id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); 286 if (id < 0) { 287 security_sem_free(sma); 288 ipc_rcu_putref(sma); 289 return id; 290 } 291 ns->used_sems += nsems; 292 293 sma->sem_perm.id = sem_buildid(id, sma->sem_perm.seq); 294 sma->sem_base = (struct sem *) &sma[1]; 295 /* sma->sem_pending = NULL; */ 296 sma->sem_pending_last = &sma->sem_pending; 297 /* sma->undo = NULL; */ 298 sma->sem_nsems = nsems; 299 sma->sem_ctime = get_seconds(); 300 sem_unlock(sma); 301 302 return sma->sem_perm.id; 303 } 304 305 306 /* 307 * Called with sem_ids.rw_mutex and ipcp locked. 308 */ 309 static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg) 310 { 311 struct sem_array *sma; 312 313 sma = container_of(ipcp, struct sem_array, sem_perm); 314 return security_sem_associate(sma, semflg); 315 } 316 317 /* 318 * Called with sem_ids.rw_mutex and ipcp locked. 319 */ 320 static inline int sem_more_checks(struct kern_ipc_perm *ipcp, 321 struct ipc_params *params) 322 { 323 struct sem_array *sma; 324 325 sma = container_of(ipcp, struct sem_array, sem_perm); 326 if (params->u.nsems > sma->sem_nsems) 327 return -EINVAL; 328 329 return 0; 330 } 331 332 asmlinkage long sys_semget(key_t key, int nsems, int semflg) 333 { 334 struct ipc_namespace *ns; 335 struct ipc_ops sem_ops; 336 struct ipc_params sem_params; 337 338 ns = current->nsproxy->ipc_ns; 339 340 if (nsems < 0 || nsems > ns->sc_semmsl) 341 return -EINVAL; 342 343 sem_ops.getnew = newary; 344 sem_ops.associate = sem_security; 345 sem_ops.more_checks = sem_more_checks; 346 347 sem_params.key = key; 348 sem_params.flg = semflg; 349 sem_params.u.nsems = nsems; 350 351 return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params); 352 } 353 354 /* Manage the doubly linked list sma->sem_pending as a FIFO: 355 * insert new queue elements at the tail sma->sem_pending_last. 356 */ 357 static inline void append_to_queue (struct sem_array * sma, 358 struct sem_queue * q) 359 { 360 *(q->prev = sma->sem_pending_last) = q; 361 *(sma->sem_pending_last = &q->next) = NULL; 362 } 363 364 static inline void prepend_to_queue (struct sem_array * sma, 365 struct sem_queue * q) 366 { 367 q->next = sma->sem_pending; 368 *(q->prev = &sma->sem_pending) = q; 369 if (q->next) 370 q->next->prev = &q->next; 371 else /* sma->sem_pending_last == &sma->sem_pending */ 372 sma->sem_pending_last = &q->next; 373 } 374 375 static inline void remove_from_queue (struct sem_array * sma, 376 struct sem_queue * q) 377 { 378 *(q->prev) = q->next; 379 if (q->next) 380 q->next->prev = q->prev; 381 else /* sma->sem_pending_last == &q->next */ 382 sma->sem_pending_last = q->prev; 383 q->prev = NULL; /* mark as removed */ 384 } 385 386 /* 387 * Determine whether a sequence of semaphore operations would succeed 388 * all at once. Return 0 if yes, 1 if need to sleep, else return error code. 389 */ 390 391 static int try_atomic_semop (struct sem_array * sma, struct sembuf * sops, 392 int nsops, struct sem_undo *un, int pid) 393 { 394 int result, sem_op; 395 struct sembuf *sop; 396 struct sem * curr; 397 398 for (sop = sops; sop < sops + nsops; sop++) { 399 curr = sma->sem_base + sop->sem_num; 400 sem_op = sop->sem_op; 401 result = curr->semval; 402 403 if (!sem_op && result) 404 goto would_block; 405 406 result += sem_op; 407 if (result < 0) 408 goto would_block; 409 if (result > SEMVMX) 410 goto out_of_range; 411 if (sop->sem_flg & SEM_UNDO) { 412 int undo = un->semadj[sop->sem_num] - sem_op; 413 /* 414 * Exceeding the undo range is an error. 415 */ 416 if (undo < (-SEMAEM - 1) || undo > SEMAEM) 417 goto out_of_range; 418 } 419 curr->semval = result; 420 } 421 422 sop--; 423 while (sop >= sops) { 424 sma->sem_base[sop->sem_num].sempid = pid; 425 if (sop->sem_flg & SEM_UNDO) 426 un->semadj[sop->sem_num] -= sop->sem_op; 427 sop--; 428 } 429 430 sma->sem_otime = get_seconds(); 431 return 0; 432 433 out_of_range: 434 result = -ERANGE; 435 goto undo; 436 437 would_block: 438 if (sop->sem_flg & IPC_NOWAIT) 439 result = -EAGAIN; 440 else 441 result = 1; 442 443 undo: 444 sop--; 445 while (sop >= sops) { 446 sma->sem_base[sop->sem_num].semval -= sop->sem_op; 447 sop--; 448 } 449 450 return result; 451 } 452 453 /* Go through the pending queue for the indicated semaphore 454 * looking for tasks that can be completed. 455 */ 456 static void update_queue (struct sem_array * sma) 457 { 458 int error; 459 struct sem_queue * q; 460 461 q = sma->sem_pending; 462 while(q) { 463 error = try_atomic_semop(sma, q->sops, q->nsops, 464 q->undo, q->pid); 465 466 /* Does q->sleeper still need to sleep? */ 467 if (error <= 0) { 468 struct sem_queue *n; 469 remove_from_queue(sma,q); 470 q->status = IN_WAKEUP; 471 /* 472 * Continue scanning. The next operation 473 * that must be checked depends on the type of the 474 * completed operation: 475 * - if the operation modified the array, then 476 * restart from the head of the queue and 477 * check for threads that might be waiting 478 * for semaphore values to become 0. 479 * - if the operation didn't modify the array, 480 * then just continue. 481 */ 482 if (q->alter) 483 n = sma->sem_pending; 484 else 485 n = q->next; 486 wake_up_process(q->sleeper); 487 /* hands-off: q will disappear immediately after 488 * writing q->status. 489 */ 490 smp_wmb(); 491 q->status = error; 492 q = n; 493 } else { 494 q = q->next; 495 } 496 } 497 } 498 499 /* The following counts are associated to each semaphore: 500 * semncnt number of tasks waiting on semval being nonzero 501 * semzcnt number of tasks waiting on semval being zero 502 * This model assumes that a task waits on exactly one semaphore. 503 * Since semaphore operations are to be performed atomically, tasks actually 504 * wait on a whole sequence of semaphores simultaneously. 505 * The counts we return here are a rough approximation, but still 506 * warrant that semncnt+semzcnt>0 if the task is on the pending queue. 507 */ 508 static int count_semncnt (struct sem_array * sma, ushort semnum) 509 { 510 int semncnt; 511 struct sem_queue * q; 512 513 semncnt = 0; 514 for (q = sma->sem_pending; q; q = q->next) { 515 struct sembuf * sops = q->sops; 516 int nsops = q->nsops; 517 int i; 518 for (i = 0; i < nsops; i++) 519 if (sops[i].sem_num == semnum 520 && (sops[i].sem_op < 0) 521 && !(sops[i].sem_flg & IPC_NOWAIT)) 522 semncnt++; 523 } 524 return semncnt; 525 } 526 static int count_semzcnt (struct sem_array * sma, ushort semnum) 527 { 528 int semzcnt; 529 struct sem_queue * q; 530 531 semzcnt = 0; 532 for (q = sma->sem_pending; q; q = q->next) { 533 struct sembuf * sops = q->sops; 534 int nsops = q->nsops; 535 int i; 536 for (i = 0; i < nsops; i++) 537 if (sops[i].sem_num == semnum 538 && (sops[i].sem_op == 0) 539 && !(sops[i].sem_flg & IPC_NOWAIT)) 540 semzcnt++; 541 } 542 return semzcnt; 543 } 544 545 /* Free a semaphore set. freeary() is called with sem_ids.rw_mutex locked 546 * as a writer and the spinlock for this semaphore set hold. sem_ids.rw_mutex 547 * remains locked on exit. 548 */ 549 static void freeary(struct ipc_namespace *ns, struct sem_array *sma) 550 { 551 struct sem_undo *un; 552 struct sem_queue *q; 553 554 /* Invalidate the existing undo structures for this semaphore set. 555 * (They will be freed without any further action in exit_sem() 556 * or during the next semop.) 557 */ 558 for (un = sma->undo; un; un = un->id_next) 559 un->semid = -1; 560 561 /* Wake up all pending processes and let them fail with EIDRM. */ 562 q = sma->sem_pending; 563 while(q) { 564 struct sem_queue *n; 565 /* lazy remove_from_queue: we are killing the whole queue */ 566 q->prev = NULL; 567 n = q->next; 568 q->status = IN_WAKEUP; 569 wake_up_process(q->sleeper); /* doesn't sleep */ 570 smp_wmb(); 571 q->status = -EIDRM; /* hands-off q */ 572 q = n; 573 } 574 575 /* Remove the semaphore set from the IDR */ 576 sem_rmid(ns, sma); 577 sem_unlock(sma); 578 579 ns->used_sems -= sma->sem_nsems; 580 security_sem_free(sma); 581 ipc_rcu_putref(sma); 582 } 583 584 static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) 585 { 586 switch(version) { 587 case IPC_64: 588 return copy_to_user(buf, in, sizeof(*in)); 589 case IPC_OLD: 590 { 591 struct semid_ds out; 592 593 ipc64_perm_to_ipc_perm(&in->sem_perm, &out.sem_perm); 594 595 out.sem_otime = in->sem_otime; 596 out.sem_ctime = in->sem_ctime; 597 out.sem_nsems = in->sem_nsems; 598 599 return copy_to_user(buf, &out, sizeof(out)); 600 } 601 default: 602 return -EINVAL; 603 } 604 } 605 606 static int semctl_nolock(struct ipc_namespace *ns, int semid, int semnum, 607 int cmd, int version, union semun arg) 608 { 609 int err = -EINVAL; 610 struct sem_array *sma; 611 612 switch(cmd) { 613 case IPC_INFO: 614 case SEM_INFO: 615 { 616 struct seminfo seminfo; 617 int max_id; 618 619 err = security_sem_semctl(NULL, cmd); 620 if (err) 621 return err; 622 623 memset(&seminfo,0,sizeof(seminfo)); 624 seminfo.semmni = ns->sc_semmni; 625 seminfo.semmns = ns->sc_semmns; 626 seminfo.semmsl = ns->sc_semmsl; 627 seminfo.semopm = ns->sc_semopm; 628 seminfo.semvmx = SEMVMX; 629 seminfo.semmnu = SEMMNU; 630 seminfo.semmap = SEMMAP; 631 seminfo.semume = SEMUME; 632 down_read(&sem_ids(ns).rw_mutex); 633 if (cmd == SEM_INFO) { 634 seminfo.semusz = sem_ids(ns).in_use; 635 seminfo.semaem = ns->used_sems; 636 } else { 637 seminfo.semusz = SEMUSZ; 638 seminfo.semaem = SEMAEM; 639 } 640 max_id = ipc_get_maxid(&sem_ids(ns)); 641 up_read(&sem_ids(ns).rw_mutex); 642 if (copy_to_user (arg.__buf, &seminfo, sizeof(struct seminfo))) 643 return -EFAULT; 644 return (max_id < 0) ? 0: max_id; 645 } 646 case SEM_STAT: 647 { 648 struct semid64_ds tbuf; 649 int id; 650 651 sma = sem_lock(ns, semid); 652 if (IS_ERR(sma)) 653 return PTR_ERR(sma); 654 655 err = -EACCES; 656 if (ipcperms (&sma->sem_perm, S_IRUGO)) 657 goto out_unlock; 658 659 err = security_sem_semctl(sma, cmd); 660 if (err) 661 goto out_unlock; 662 663 id = sma->sem_perm.id; 664 665 memset(&tbuf, 0, sizeof(tbuf)); 666 667 kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm); 668 tbuf.sem_otime = sma->sem_otime; 669 tbuf.sem_ctime = sma->sem_ctime; 670 tbuf.sem_nsems = sma->sem_nsems; 671 sem_unlock(sma); 672 if (copy_semid_to_user (arg.buf, &tbuf, version)) 673 return -EFAULT; 674 return id; 675 } 676 default: 677 return -EINVAL; 678 } 679 return err; 680 out_unlock: 681 sem_unlock(sma); 682 return err; 683 } 684 685 static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, 686 int cmd, int version, union semun arg) 687 { 688 struct sem_array *sma; 689 struct sem* curr; 690 int err; 691 ushort fast_sem_io[SEMMSL_FAST]; 692 ushort* sem_io = fast_sem_io; 693 int nsems; 694 695 sma = sem_lock_check(ns, semid); 696 if (IS_ERR(sma)) 697 return PTR_ERR(sma); 698 699 nsems = sma->sem_nsems; 700 701 err = -EACCES; 702 if (ipcperms (&sma->sem_perm, (cmd==SETVAL||cmd==SETALL)?S_IWUGO:S_IRUGO)) 703 goto out_unlock; 704 705 err = security_sem_semctl(sma, cmd); 706 if (err) 707 goto out_unlock; 708 709 err = -EACCES; 710 switch (cmd) { 711 case GETALL: 712 { 713 ushort __user *array = arg.array; 714 int i; 715 716 if(nsems > SEMMSL_FAST) { 717 ipc_rcu_getref(sma); 718 sem_unlock(sma); 719 720 sem_io = ipc_alloc(sizeof(ushort)*nsems); 721 if(sem_io == NULL) { 722 ipc_lock_by_ptr(&sma->sem_perm); 723 ipc_rcu_putref(sma); 724 sem_unlock(sma); 725 return -ENOMEM; 726 } 727 728 ipc_lock_by_ptr(&sma->sem_perm); 729 ipc_rcu_putref(sma); 730 if (sma->sem_perm.deleted) { 731 sem_unlock(sma); 732 err = -EIDRM; 733 goto out_free; 734 } 735 } 736 737 for (i = 0; i < sma->sem_nsems; i++) 738 sem_io[i] = sma->sem_base[i].semval; 739 sem_unlock(sma); 740 err = 0; 741 if(copy_to_user(array, sem_io, nsems*sizeof(ushort))) 742 err = -EFAULT; 743 goto out_free; 744 } 745 case SETALL: 746 { 747 int i; 748 struct sem_undo *un; 749 750 ipc_rcu_getref(sma); 751 sem_unlock(sma); 752 753 if(nsems > SEMMSL_FAST) { 754 sem_io = ipc_alloc(sizeof(ushort)*nsems); 755 if(sem_io == NULL) { 756 ipc_lock_by_ptr(&sma->sem_perm); 757 ipc_rcu_putref(sma); 758 sem_unlock(sma); 759 return -ENOMEM; 760 } 761 } 762 763 if (copy_from_user (sem_io, arg.array, nsems*sizeof(ushort))) { 764 ipc_lock_by_ptr(&sma->sem_perm); 765 ipc_rcu_putref(sma); 766 sem_unlock(sma); 767 err = -EFAULT; 768 goto out_free; 769 } 770 771 for (i = 0; i < nsems; i++) { 772 if (sem_io[i] > SEMVMX) { 773 ipc_lock_by_ptr(&sma->sem_perm); 774 ipc_rcu_putref(sma); 775 sem_unlock(sma); 776 err = -ERANGE; 777 goto out_free; 778 } 779 } 780 ipc_lock_by_ptr(&sma->sem_perm); 781 ipc_rcu_putref(sma); 782 if (sma->sem_perm.deleted) { 783 sem_unlock(sma); 784 err = -EIDRM; 785 goto out_free; 786 } 787 788 for (i = 0; i < nsems; i++) 789 sma->sem_base[i].semval = sem_io[i]; 790 for (un = sma->undo; un; un = un->id_next) 791 for (i = 0; i < nsems; i++) 792 un->semadj[i] = 0; 793 sma->sem_ctime = get_seconds(); 794 /* maybe some queued-up processes were waiting for this */ 795 update_queue(sma); 796 err = 0; 797 goto out_unlock; 798 } 799 case IPC_STAT: 800 { 801 struct semid64_ds tbuf; 802 memset(&tbuf,0,sizeof(tbuf)); 803 kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm); 804 tbuf.sem_otime = sma->sem_otime; 805 tbuf.sem_ctime = sma->sem_ctime; 806 tbuf.sem_nsems = sma->sem_nsems; 807 sem_unlock(sma); 808 if (copy_semid_to_user (arg.buf, &tbuf, version)) 809 return -EFAULT; 810 return 0; 811 } 812 /* GETVAL, GETPID, GETNCTN, GETZCNT, SETVAL: fall-through */ 813 } 814 err = -EINVAL; 815 if(semnum < 0 || semnum >= nsems) 816 goto out_unlock; 817 818 curr = &sma->sem_base[semnum]; 819 820 switch (cmd) { 821 case GETVAL: 822 err = curr->semval; 823 goto out_unlock; 824 case GETPID: 825 err = curr->sempid; 826 goto out_unlock; 827 case GETNCNT: 828 err = count_semncnt(sma,semnum); 829 goto out_unlock; 830 case GETZCNT: 831 err = count_semzcnt(sma,semnum); 832 goto out_unlock; 833 case SETVAL: 834 { 835 int val = arg.val; 836 struct sem_undo *un; 837 err = -ERANGE; 838 if (val > SEMVMX || val < 0) 839 goto out_unlock; 840 841 for (un = sma->undo; un; un = un->id_next) 842 un->semadj[semnum] = 0; 843 curr->semval = val; 844 curr->sempid = task_tgid_vnr(current); 845 sma->sem_ctime = get_seconds(); 846 /* maybe some queued-up processes were waiting for this */ 847 update_queue(sma); 848 err = 0; 849 goto out_unlock; 850 } 851 } 852 out_unlock: 853 sem_unlock(sma); 854 out_free: 855 if(sem_io != fast_sem_io) 856 ipc_free(sem_io, sizeof(ushort)*nsems); 857 return err; 858 } 859 860 struct sem_setbuf { 861 uid_t uid; 862 gid_t gid; 863 mode_t mode; 864 }; 865 866 static inline unsigned long copy_semid_from_user(struct sem_setbuf *out, void __user *buf, int version) 867 { 868 switch(version) { 869 case IPC_64: 870 { 871 struct semid64_ds tbuf; 872 873 if(copy_from_user(&tbuf, buf, sizeof(tbuf))) 874 return -EFAULT; 875 876 out->uid = tbuf.sem_perm.uid; 877 out->gid = tbuf.sem_perm.gid; 878 out->mode = tbuf.sem_perm.mode; 879 880 return 0; 881 } 882 case IPC_OLD: 883 { 884 struct semid_ds tbuf_old; 885 886 if(copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 887 return -EFAULT; 888 889 out->uid = tbuf_old.sem_perm.uid; 890 out->gid = tbuf_old.sem_perm.gid; 891 out->mode = tbuf_old.sem_perm.mode; 892 893 return 0; 894 } 895 default: 896 return -EINVAL; 897 } 898 } 899 900 static int semctl_down(struct ipc_namespace *ns, int semid, int semnum, 901 int cmd, int version, union semun arg) 902 { 903 struct sem_array *sma; 904 int err; 905 struct sem_setbuf uninitialized_var(setbuf); 906 struct kern_ipc_perm *ipcp; 907 908 if(cmd == IPC_SET) { 909 if(copy_semid_from_user (&setbuf, arg.buf, version)) 910 return -EFAULT; 911 } 912 sma = sem_lock_check_down(ns, semid); 913 if (IS_ERR(sma)) 914 return PTR_ERR(sma); 915 916 ipcp = &sma->sem_perm; 917 918 err = audit_ipc_obj(ipcp); 919 if (err) 920 goto out_unlock; 921 922 if (cmd == IPC_SET) { 923 err = audit_ipc_set_perm(0, setbuf.uid, setbuf.gid, setbuf.mode); 924 if (err) 925 goto out_unlock; 926 } 927 if (current->euid != ipcp->cuid && 928 current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN)) { 929 err=-EPERM; 930 goto out_unlock; 931 } 932 933 err = security_sem_semctl(sma, cmd); 934 if (err) 935 goto out_unlock; 936 937 switch(cmd){ 938 case IPC_RMID: 939 freeary(ns, sma); 940 err = 0; 941 break; 942 case IPC_SET: 943 ipcp->uid = setbuf.uid; 944 ipcp->gid = setbuf.gid; 945 ipcp->mode = (ipcp->mode & ~S_IRWXUGO) 946 | (setbuf.mode & S_IRWXUGO); 947 sma->sem_ctime = get_seconds(); 948 sem_unlock(sma); 949 err = 0; 950 break; 951 default: 952 sem_unlock(sma); 953 err = -EINVAL; 954 break; 955 } 956 return err; 957 958 out_unlock: 959 sem_unlock(sma); 960 return err; 961 } 962 963 asmlinkage long sys_semctl (int semid, int semnum, int cmd, union semun arg) 964 { 965 int err = -EINVAL; 966 int version; 967 struct ipc_namespace *ns; 968 969 if (semid < 0) 970 return -EINVAL; 971 972 version = ipc_parse_version(&cmd); 973 ns = current->nsproxy->ipc_ns; 974 975 switch(cmd) { 976 case IPC_INFO: 977 case SEM_INFO: 978 case SEM_STAT: 979 err = semctl_nolock(ns,semid,semnum,cmd,version,arg); 980 return err; 981 case GETALL: 982 case GETVAL: 983 case GETPID: 984 case GETNCNT: 985 case GETZCNT: 986 case IPC_STAT: 987 case SETVAL: 988 case SETALL: 989 err = semctl_main(ns,semid,semnum,cmd,version,arg); 990 return err; 991 case IPC_RMID: 992 case IPC_SET: 993 down_write(&sem_ids(ns).rw_mutex); 994 err = semctl_down(ns,semid,semnum,cmd,version,arg); 995 up_write(&sem_ids(ns).rw_mutex); 996 return err; 997 default: 998 return -EINVAL; 999 } 1000 } 1001 1002 /* If the task doesn't already have a undo_list, then allocate one 1003 * here. We guarantee there is only one thread using this undo list, 1004 * and current is THE ONE 1005 * 1006 * If this allocation and assignment succeeds, but later 1007 * portions of this code fail, there is no need to free the sem_undo_list. 1008 * Just let it stay associated with the task, and it'll be freed later 1009 * at exit time. 1010 * 1011 * This can block, so callers must hold no locks. 1012 */ 1013 static inline int get_undo_list(struct sem_undo_list **undo_listp) 1014 { 1015 struct sem_undo_list *undo_list; 1016 1017 undo_list = current->sysvsem.undo_list; 1018 if (!undo_list) { 1019 undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL); 1020 if (undo_list == NULL) 1021 return -ENOMEM; 1022 spin_lock_init(&undo_list->lock); 1023 atomic_set(&undo_list->refcnt, 1); 1024 current->sysvsem.undo_list = undo_list; 1025 } 1026 *undo_listp = undo_list; 1027 return 0; 1028 } 1029 1030 static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid) 1031 { 1032 struct sem_undo **last, *un; 1033 1034 last = &ulp->proc_list; 1035 un = *last; 1036 while(un != NULL) { 1037 if(un->semid==semid) 1038 break; 1039 if(un->semid==-1) { 1040 *last=un->proc_next; 1041 kfree(un); 1042 } else { 1043 last=&un->proc_next; 1044 } 1045 un=*last; 1046 } 1047 return un; 1048 } 1049 1050 static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid) 1051 { 1052 struct sem_array *sma; 1053 struct sem_undo_list *ulp; 1054 struct sem_undo *un, *new; 1055 int nsems; 1056 int error; 1057 1058 error = get_undo_list(&ulp); 1059 if (error) 1060 return ERR_PTR(error); 1061 1062 spin_lock(&ulp->lock); 1063 un = lookup_undo(ulp, semid); 1064 spin_unlock(&ulp->lock); 1065 if (likely(un!=NULL)) 1066 goto out; 1067 1068 /* no undo structure around - allocate one. */ 1069 sma = sem_lock_check(ns, semid); 1070 if (IS_ERR(sma)) 1071 return ERR_PTR(PTR_ERR(sma)); 1072 1073 nsems = sma->sem_nsems; 1074 ipc_rcu_getref(sma); 1075 sem_unlock(sma); 1076 1077 new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); 1078 if (!new) { 1079 ipc_lock_by_ptr(&sma->sem_perm); 1080 ipc_rcu_putref(sma); 1081 sem_unlock(sma); 1082 return ERR_PTR(-ENOMEM); 1083 } 1084 new->semadj = (short *) &new[1]; 1085 new->semid = semid; 1086 1087 spin_lock(&ulp->lock); 1088 un = lookup_undo(ulp, semid); 1089 if (un) { 1090 spin_unlock(&ulp->lock); 1091 kfree(new); 1092 ipc_lock_by_ptr(&sma->sem_perm); 1093 ipc_rcu_putref(sma); 1094 sem_unlock(sma); 1095 goto out; 1096 } 1097 ipc_lock_by_ptr(&sma->sem_perm); 1098 ipc_rcu_putref(sma); 1099 if (sma->sem_perm.deleted) { 1100 sem_unlock(sma); 1101 spin_unlock(&ulp->lock); 1102 kfree(new); 1103 un = ERR_PTR(-EIDRM); 1104 goto out; 1105 } 1106 new->proc_next = ulp->proc_list; 1107 ulp->proc_list = new; 1108 new->id_next = sma->undo; 1109 sma->undo = new; 1110 sem_unlock(sma); 1111 un = new; 1112 spin_unlock(&ulp->lock); 1113 out: 1114 return un; 1115 } 1116 1117 asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops, 1118 unsigned nsops, const struct timespec __user *timeout) 1119 { 1120 int error = -EINVAL; 1121 struct sem_array *sma; 1122 struct sembuf fast_sops[SEMOPM_FAST]; 1123 struct sembuf* sops = fast_sops, *sop; 1124 struct sem_undo *un; 1125 int undos = 0, alter = 0, max; 1126 struct sem_queue queue; 1127 unsigned long jiffies_left = 0; 1128 struct ipc_namespace *ns; 1129 1130 ns = current->nsproxy->ipc_ns; 1131 1132 if (nsops < 1 || semid < 0) 1133 return -EINVAL; 1134 if (nsops > ns->sc_semopm) 1135 return -E2BIG; 1136 if(nsops > SEMOPM_FAST) { 1137 sops = kmalloc(sizeof(*sops)*nsops,GFP_KERNEL); 1138 if(sops==NULL) 1139 return -ENOMEM; 1140 } 1141 if (copy_from_user (sops, tsops, nsops * sizeof(*tsops))) { 1142 error=-EFAULT; 1143 goto out_free; 1144 } 1145 if (timeout) { 1146 struct timespec _timeout; 1147 if (copy_from_user(&_timeout, timeout, sizeof(*timeout))) { 1148 error = -EFAULT; 1149 goto out_free; 1150 } 1151 if (_timeout.tv_sec < 0 || _timeout.tv_nsec < 0 || 1152 _timeout.tv_nsec >= 1000000000L) { 1153 error = -EINVAL; 1154 goto out_free; 1155 } 1156 jiffies_left = timespec_to_jiffies(&_timeout); 1157 } 1158 max = 0; 1159 for (sop = sops; sop < sops + nsops; sop++) { 1160 if (sop->sem_num >= max) 1161 max = sop->sem_num; 1162 if (sop->sem_flg & SEM_UNDO) 1163 undos = 1; 1164 if (sop->sem_op != 0) 1165 alter = 1; 1166 } 1167 1168 retry_undos: 1169 if (undos) { 1170 un = find_undo(ns, semid); 1171 if (IS_ERR(un)) { 1172 error = PTR_ERR(un); 1173 goto out_free; 1174 } 1175 } else 1176 un = NULL; 1177 1178 sma = sem_lock_check(ns, semid); 1179 if (IS_ERR(sma)) { 1180 error = PTR_ERR(sma); 1181 goto out_free; 1182 } 1183 1184 /* 1185 * semid identifiers are not unique - find_undo may have 1186 * allocated an undo structure, it was invalidated by an RMID 1187 * and now a new array with received the same id. Check and retry. 1188 */ 1189 if (un && un->semid == -1) { 1190 sem_unlock(sma); 1191 goto retry_undos; 1192 } 1193 error = -EFBIG; 1194 if (max >= sma->sem_nsems) 1195 goto out_unlock_free; 1196 1197 error = -EACCES; 1198 if (ipcperms(&sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) 1199 goto out_unlock_free; 1200 1201 error = security_sem_semop(sma, sops, nsops, alter); 1202 if (error) 1203 goto out_unlock_free; 1204 1205 error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current)); 1206 if (error <= 0) { 1207 if (alter && error == 0) 1208 update_queue (sma); 1209 goto out_unlock_free; 1210 } 1211 1212 /* We need to sleep on this operation, so we put the current 1213 * task into the pending queue and go to sleep. 1214 */ 1215 1216 queue.sma = sma; 1217 queue.sops = sops; 1218 queue.nsops = nsops; 1219 queue.undo = un; 1220 queue.pid = task_tgid_vnr(current); 1221 queue.id = semid; 1222 queue.alter = alter; 1223 if (alter) 1224 append_to_queue(sma ,&queue); 1225 else 1226 prepend_to_queue(sma ,&queue); 1227 1228 queue.status = -EINTR; 1229 queue.sleeper = current; 1230 current->state = TASK_INTERRUPTIBLE; 1231 sem_unlock(sma); 1232 1233 if (timeout) 1234 jiffies_left = schedule_timeout(jiffies_left); 1235 else 1236 schedule(); 1237 1238 error = queue.status; 1239 while(unlikely(error == IN_WAKEUP)) { 1240 cpu_relax(); 1241 error = queue.status; 1242 } 1243 1244 if (error != -EINTR) { 1245 /* fast path: update_queue already obtained all requested 1246 * resources */ 1247 goto out_free; 1248 } 1249 1250 sma = sem_lock(ns, semid); 1251 if (IS_ERR(sma)) { 1252 BUG_ON(queue.prev != NULL); 1253 error = -EIDRM; 1254 goto out_free; 1255 } 1256 1257 /* 1258 * If queue.status != -EINTR we are woken up by another process 1259 */ 1260 error = queue.status; 1261 if (error != -EINTR) { 1262 goto out_unlock_free; 1263 } 1264 1265 /* 1266 * If an interrupt occurred we have to clean up the queue 1267 */ 1268 if (timeout && jiffies_left == 0) 1269 error = -EAGAIN; 1270 remove_from_queue(sma,&queue); 1271 goto out_unlock_free; 1272 1273 out_unlock_free: 1274 sem_unlock(sma); 1275 out_free: 1276 if(sops != fast_sops) 1277 kfree(sops); 1278 return error; 1279 } 1280 1281 asmlinkage long sys_semop (int semid, struct sembuf __user *tsops, unsigned nsops) 1282 { 1283 return sys_semtimedop(semid, tsops, nsops, NULL); 1284 } 1285 1286 /* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between 1287 * parent and child tasks. 1288 */ 1289 1290 int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) 1291 { 1292 struct sem_undo_list *undo_list; 1293 int error; 1294 1295 if (clone_flags & CLONE_SYSVSEM) { 1296 error = get_undo_list(&undo_list); 1297 if (error) 1298 return error; 1299 atomic_inc(&undo_list->refcnt); 1300 tsk->sysvsem.undo_list = undo_list; 1301 } else 1302 tsk->sysvsem.undo_list = NULL; 1303 1304 return 0; 1305 } 1306 1307 /* 1308 * add semadj values to semaphores, free undo structures. 1309 * undo structures are not freed when semaphore arrays are destroyed 1310 * so some of them may be out of date. 1311 * IMPLEMENTATION NOTE: There is some confusion over whether the 1312 * set of adjustments that needs to be done should be done in an atomic 1313 * manner or not. That is, if we are attempting to decrement the semval 1314 * should we queue up and wait until we can do so legally? 1315 * The original implementation attempted to do this (queue and wait). 1316 * The current implementation does not do so. The POSIX standard 1317 * and SVID should be consulted to determine what behavior is mandated. 1318 */ 1319 void exit_sem(struct task_struct *tsk) 1320 { 1321 struct sem_undo_list *undo_list; 1322 struct sem_undo *u, **up; 1323 struct ipc_namespace *ns; 1324 1325 undo_list = tsk->sysvsem.undo_list; 1326 if (!undo_list) 1327 return; 1328 1329 if (!atomic_dec_and_test(&undo_list->refcnt)) 1330 return; 1331 1332 ns = tsk->nsproxy->ipc_ns; 1333 /* There's no need to hold the semundo list lock, as current 1334 * is the last task exiting for this undo list. 1335 */ 1336 for (up = &undo_list->proc_list; (u = *up); *up = u->proc_next, kfree(u)) { 1337 struct sem_array *sma; 1338 int nsems, i; 1339 struct sem_undo *un, **unp; 1340 int semid; 1341 1342 semid = u->semid; 1343 1344 if(semid == -1) 1345 continue; 1346 sma = sem_lock(ns, semid); 1347 if (IS_ERR(sma)) 1348 continue; 1349 1350 if (u->semid == -1) 1351 goto next_entry; 1352 1353 BUG_ON(sem_checkid(sma, u->semid)); 1354 1355 /* remove u from the sma->undo list */ 1356 for (unp = &sma->undo; (un = *unp); unp = &un->id_next) { 1357 if (u == un) 1358 goto found; 1359 } 1360 printk ("exit_sem undo list error id=%d\n", u->semid); 1361 goto next_entry; 1362 found: 1363 *unp = un->id_next; 1364 /* perform adjustments registered in u */ 1365 nsems = sma->sem_nsems; 1366 for (i = 0; i < nsems; i++) { 1367 struct sem * semaphore = &sma->sem_base[i]; 1368 if (u->semadj[i]) { 1369 semaphore->semval += u->semadj[i]; 1370 /* 1371 * Range checks of the new semaphore value, 1372 * not defined by sus: 1373 * - Some unices ignore the undo entirely 1374 * (e.g. HP UX 11i 11.22, Tru64 V5.1) 1375 * - some cap the value (e.g. FreeBSD caps 1376 * at 0, but doesn't enforce SEMVMX) 1377 * 1378 * Linux caps the semaphore value, both at 0 1379 * and at SEMVMX. 1380 * 1381 * Manfred <manfred@colorfullife.com> 1382 */ 1383 if (semaphore->semval < 0) 1384 semaphore->semval = 0; 1385 if (semaphore->semval > SEMVMX) 1386 semaphore->semval = SEMVMX; 1387 semaphore->sempid = task_tgid_vnr(current); 1388 } 1389 } 1390 sma->sem_otime = get_seconds(); 1391 /* maybe some queued-up processes were waiting for this */ 1392 update_queue(sma); 1393 next_entry: 1394 sem_unlock(sma); 1395 } 1396 kfree(undo_list); 1397 } 1398 1399 #ifdef CONFIG_PROC_FS 1400 static int sysvipc_sem_proc_show(struct seq_file *s, void *it) 1401 { 1402 struct sem_array *sma = it; 1403 1404 return seq_printf(s, 1405 "%10d %10d %4o %10lu %5u %5u %5u %5u %10lu %10lu\n", 1406 sma->sem_perm.key, 1407 sma->sem_perm.id, 1408 sma->sem_perm.mode, 1409 sma->sem_nsems, 1410 sma->sem_perm.uid, 1411 sma->sem_perm.gid, 1412 sma->sem_perm.cuid, 1413 sma->sem_perm.cgid, 1414 sma->sem_otime, 1415 sma->sem_ctime); 1416 } 1417 #endif 1418