1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/ipc/sem.c 4 * Copyright (C) 1992 Krishna Balasubramanian 5 * Copyright (C) 1995 Eric Schenk, Bruno Haible 6 * 7 * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com> 8 * 9 * SMP-threaded, sysctl's added 10 * (c) 1999 Manfred Spraul <manfred@colorfullife.com> 11 * Enforced range limit on SEM_UNDO 12 * (c) 2001 Red Hat Inc 13 * Lockless wakeup 14 * (c) 2003 Manfred Spraul <manfred@colorfullife.com> 15 * (c) 2016 Davidlohr Bueso <dave@stgolabs.net> 16 * Further wakeup optimizations, documentation 17 * (c) 2010 Manfred Spraul <manfred@colorfullife.com> 18 * 19 * support for audit of ipc object properties and permission changes 20 * Dustin Kirkland <dustin.kirkland@us.ibm.com> 21 * 22 * namespaces support 23 * OpenVZ, SWsoft Inc. 24 * Pavel Emelianov <xemul@openvz.org> 25 * 26 * Implementation notes: (May 2010) 27 * This file implements System V semaphores. 28 * 29 * User space visible behavior: 30 * - FIFO ordering for semop() operations (just FIFO, not starvation 31 * protection) 32 * - multiple semaphore operations that alter the same semaphore in 33 * one semop() are handled. 34 * - sem_ctime (time of last semctl()) is updated in the IPC_SET, SETVAL and 35 * SETALL calls. 36 * - two Linux specific semctl() commands: SEM_STAT, SEM_INFO. 37 * - undo adjustments at process exit are limited to 0..SEMVMX. 38 * - namespace are supported. 39 * - SEMMSL, SEMMNS, SEMOPM and SEMMNI can be configured at runtine by writing 40 * to /proc/sys/kernel/sem. 41 * - statistics about the usage are reported in /proc/sysvipc/sem. 42 * 43 * Internals: 44 * - scalability: 45 * - all global variables are read-mostly. 46 * - semop() calls and semctl(RMID) are synchronized by RCU. 47 * - most operations do write operations (actually: spin_lock calls) to 48 * the per-semaphore array structure. 49 * Thus: Perfect SMP scaling between independent semaphore arrays. 50 * If multiple semaphores in one array are used, then cache line 51 * trashing on the semaphore array spinlock will limit the scaling. 52 * - semncnt and semzcnt are calculated on demand in count_semcnt() 53 * - the task that performs a successful semop() scans the list of all 54 * sleeping tasks and completes any pending operations that can be fulfilled. 55 * Semaphores are actively given to waiting tasks (necessary for FIFO). 56 * (see update_queue()) 57 * - To improve the scalability, the actual wake-up calls are performed after 58 * dropping all locks. (see wake_up_sem_queue_prepare()) 59 * - All work is done by the waker, the woken up task does not have to do 60 * anything - not even acquiring a lock or dropping a refcount. 61 * - A woken up task may not even touch the semaphore array anymore, it may 62 * have been destroyed already by a semctl(RMID). 63 * - UNDO values are stored in an array (one per process and per 64 * semaphore array, lazily allocated). For backwards compatibility, multiple 65 * modes for the UNDO variables are supported (per process, per thread) 66 * (see copy_semundo, CLONE_SYSVSEM) 67 * - There are two lists of the pending operations: a per-array list 68 * and per-semaphore list (stored in the array). This allows to achieve FIFO 69 * ordering without always scanning all pending operations. 70 * The worst-case behavior is nevertheless O(N^2) for N wakeups. 71 */ 72 73 #include <linux/slab.h> 74 #include <linux/spinlock.h> 75 #include <linux/init.h> 76 #include <linux/proc_fs.h> 77 #include <linux/time.h> 78 #include <linux/security.h> 79 #include <linux/syscalls.h> 80 #include <linux/audit.h> 81 #include <linux/capability.h> 82 #include <linux/seq_file.h> 83 #include <linux/rwsem.h> 84 #include <linux/nsproxy.h> 85 #include <linux/ipc_namespace.h> 86 #include <linux/sched/wake_q.h> 87 88 #include <linux/uaccess.h> 89 #include "util.h" 90 91 92 /* One queue for each sleeping process in the system. */ 93 struct sem_queue { 94 struct list_head list; /* queue of pending operations */ 95 struct task_struct *sleeper; /* this process */ 96 struct sem_undo *undo; /* undo structure */ 97 int pid; /* process id of requesting process */ 98 int status; /* completion status of operation */ 99 struct sembuf *sops; /* array of pending operations */ 100 struct sembuf *blocking; /* the operation that blocked */ 101 int nsops; /* number of operations */ 102 bool alter; /* does *sops alter the array? */ 103 bool dupsop; /* sops on more than one sem_num */ 104 }; 105 106 /* Each task has a list of undo requests. They are executed automatically 107 * when the process exits. 108 */ 109 struct sem_undo { 110 struct list_head list_proc; /* per-process list: * 111 * all undos from one process 112 * rcu protected */ 113 struct rcu_head rcu; /* rcu struct for sem_undo */ 114 struct sem_undo_list *ulp; /* back ptr to sem_undo_list */ 115 struct list_head list_id; /* per semaphore array list: 116 * all undos for one array */ 117 int semid; /* semaphore set identifier */ 118 short *semadj; /* array of adjustments */ 119 /* one per semaphore */ 120 }; 121 122 /* sem_undo_list controls shared access to the list of sem_undo structures 123 * that may be shared among all a CLONE_SYSVSEM task group. 124 */ 125 struct sem_undo_list { 126 refcount_t refcnt; 127 spinlock_t lock; 128 struct list_head list_proc; 129 }; 130 131 132 #define sem_ids(ns) ((ns)->ids[IPC_SEM_IDS]) 133 134 static int newary(struct ipc_namespace *, struct ipc_params *); 135 static void freeary(struct ipc_namespace *, struct kern_ipc_perm *); 136 #ifdef CONFIG_PROC_FS 137 static int sysvipc_sem_proc_show(struct seq_file *s, void *it); 138 #endif 139 140 #define SEMMSL_FAST 256 /* 512 bytes on stack */ 141 #define SEMOPM_FAST 64 /* ~ 372 bytes on stack */ 142 143 /* 144 * Switching from the mode suitable for simple ops 145 * to the mode for complex ops is costly. Therefore: 146 * use some hysteresis 147 */ 148 #define USE_GLOBAL_LOCK_HYSTERESIS 10 149 150 /* 151 * Locking: 152 * a) global sem_lock() for read/write 153 * sem_undo.id_next, 154 * sem_array.complex_count, 155 * sem_array.pending{_alter,_const}, 156 * sem_array.sem_undo 157 * 158 * b) global or semaphore sem_lock() for read/write: 159 * sem_array.sems[i].pending_{const,alter}: 160 * 161 * c) special: 162 * sem_undo_list.list_proc: 163 * * undo_list->lock for write 164 * * rcu for read 165 * use_global_lock: 166 * * global sem_lock() for write 167 * * either local or global sem_lock() for read. 168 * 169 * Memory ordering: 170 * Most ordering is enforced by using spin_lock() and spin_unlock(). 171 * The special case is use_global_lock: 172 * Setting it from non-zero to 0 is a RELEASE, this is ensured by 173 * using smp_store_release(). 174 * Testing if it is non-zero is an ACQUIRE, this is ensured by using 175 * smp_load_acquire(). 176 * Setting it from 0 to non-zero must be ordered with regards to 177 * this smp_load_acquire(), this is guaranteed because the smp_load_acquire() 178 * is inside a spin_lock() and after a write from 0 to non-zero a 179 * spin_lock()+spin_unlock() is done. 180 */ 181 182 #define sc_semmsl sem_ctls[0] 183 #define sc_semmns sem_ctls[1] 184 #define sc_semopm sem_ctls[2] 185 #define sc_semmni sem_ctls[3] 186 187 int sem_init_ns(struct ipc_namespace *ns) 188 { 189 ns->sc_semmsl = SEMMSL; 190 ns->sc_semmns = SEMMNS; 191 ns->sc_semopm = SEMOPM; 192 ns->sc_semmni = SEMMNI; 193 ns->used_sems = 0; 194 return ipc_init_ids(&ns->ids[IPC_SEM_IDS]); 195 } 196 197 #ifdef CONFIG_IPC_NS 198 void sem_exit_ns(struct ipc_namespace *ns) 199 { 200 free_ipcs(ns, &sem_ids(ns), freeary); 201 idr_destroy(&ns->ids[IPC_SEM_IDS].ipcs_idr); 202 rhashtable_destroy(&ns->ids[IPC_SEM_IDS].key_ht); 203 } 204 #endif 205 206 int __init sem_init(void) 207 { 208 const int err = sem_init_ns(&init_ipc_ns); 209 210 ipc_init_proc_interface("sysvipc/sem", 211 " key semid perms nsems uid gid cuid cgid otime ctime\n", 212 IPC_SEM_IDS, sysvipc_sem_proc_show); 213 return err; 214 } 215 216 /** 217 * unmerge_queues - unmerge queues, if possible. 218 * @sma: semaphore array 219 * 220 * The function unmerges the wait queues if complex_count is 0. 221 * It must be called prior to dropping the global semaphore array lock. 222 */ 223 static void unmerge_queues(struct sem_array *sma) 224 { 225 struct sem_queue *q, *tq; 226 227 /* complex operations still around? */ 228 if (sma->complex_count) 229 return; 230 /* 231 * We will switch back to simple mode. 232 * Move all pending operation back into the per-semaphore 233 * queues. 234 */ 235 list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { 236 struct sem *curr; 237 curr = &sma->sems[q->sops[0].sem_num]; 238 239 list_add_tail(&q->list, &curr->pending_alter); 240 } 241 INIT_LIST_HEAD(&sma->pending_alter); 242 } 243 244 /** 245 * merge_queues - merge single semop queues into global queue 246 * @sma: semaphore array 247 * 248 * This function merges all per-semaphore queues into the global queue. 249 * It is necessary to achieve FIFO ordering for the pending single-sop 250 * operations when a multi-semop operation must sleep. 251 * Only the alter operations must be moved, the const operations can stay. 252 */ 253 static void merge_queues(struct sem_array *sma) 254 { 255 int i; 256 for (i = 0; i < sma->sem_nsems; i++) { 257 struct sem *sem = &sma->sems[i]; 258 259 list_splice_init(&sem->pending_alter, &sma->pending_alter); 260 } 261 } 262 263 static void sem_rcu_free(struct rcu_head *head) 264 { 265 struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu); 266 struct sem_array *sma = container_of(p, struct sem_array, sem_perm); 267 268 security_sem_free(sma); 269 kvfree(sma); 270 } 271 272 /* 273 * Enter the mode suitable for non-simple operations: 274 * Caller must own sem_perm.lock. 275 */ 276 static void complexmode_enter(struct sem_array *sma) 277 { 278 int i; 279 struct sem *sem; 280 281 if (sma->use_global_lock > 0) { 282 /* 283 * We are already in global lock mode. 284 * Nothing to do, just reset the 285 * counter until we return to simple mode. 286 */ 287 sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; 288 return; 289 } 290 sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; 291 292 for (i = 0; i < sma->sem_nsems; i++) { 293 sem = &sma->sems[i]; 294 spin_lock(&sem->lock); 295 spin_unlock(&sem->lock); 296 } 297 } 298 299 /* 300 * Try to leave the mode that disallows simple operations: 301 * Caller must own sem_perm.lock. 302 */ 303 static void complexmode_tryleave(struct sem_array *sma) 304 { 305 if (sma->complex_count) { 306 /* Complex ops are sleeping. 307 * We must stay in complex mode 308 */ 309 return; 310 } 311 if (sma->use_global_lock == 1) { 312 /* 313 * Immediately after setting use_global_lock to 0, 314 * a simple op can start. Thus: all memory writes 315 * performed by the current operation must be visible 316 * before we set use_global_lock to 0. 317 */ 318 smp_store_release(&sma->use_global_lock, 0); 319 } else { 320 sma->use_global_lock--; 321 } 322 } 323 324 #define SEM_GLOBAL_LOCK (-1) 325 /* 326 * If the request contains only one semaphore operation, and there are 327 * no complex transactions pending, lock only the semaphore involved. 328 * Otherwise, lock the entire semaphore array, since we either have 329 * multiple semaphores in our own semops, or we need to look at 330 * semaphores from other pending complex operations. 331 */ 332 static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, 333 int nsops) 334 { 335 struct sem *sem; 336 337 if (nsops != 1) { 338 /* Complex operation - acquire a full lock */ 339 ipc_lock_object(&sma->sem_perm); 340 341 /* Prevent parallel simple ops */ 342 complexmode_enter(sma); 343 return SEM_GLOBAL_LOCK; 344 } 345 346 /* 347 * Only one semaphore affected - try to optimize locking. 348 * Optimized locking is possible if no complex operation 349 * is either enqueued or processed right now. 350 * 351 * Both facts are tracked by use_global_mode. 352 */ 353 sem = &sma->sems[sops->sem_num]; 354 355 /* 356 * Initial check for use_global_lock. Just an optimization, 357 * no locking, no memory barrier. 358 */ 359 if (!sma->use_global_lock) { 360 /* 361 * It appears that no complex operation is around. 362 * Acquire the per-semaphore lock. 363 */ 364 spin_lock(&sem->lock); 365 366 /* pairs with smp_store_release() */ 367 if (!smp_load_acquire(&sma->use_global_lock)) { 368 /* fast path successful! */ 369 return sops->sem_num; 370 } 371 spin_unlock(&sem->lock); 372 } 373 374 /* slow path: acquire the full lock */ 375 ipc_lock_object(&sma->sem_perm); 376 377 if (sma->use_global_lock == 0) { 378 /* 379 * The use_global_lock mode ended while we waited for 380 * sma->sem_perm.lock. Thus we must switch to locking 381 * with sem->lock. 382 * Unlike in the fast path, there is no need to recheck 383 * sma->use_global_lock after we have acquired sem->lock: 384 * We own sma->sem_perm.lock, thus use_global_lock cannot 385 * change. 386 */ 387 spin_lock(&sem->lock); 388 389 ipc_unlock_object(&sma->sem_perm); 390 return sops->sem_num; 391 } else { 392 /* 393 * Not a false alarm, thus continue to use the global lock 394 * mode. No need for complexmode_enter(), this was done by 395 * the caller that has set use_global_mode to non-zero. 396 */ 397 return SEM_GLOBAL_LOCK; 398 } 399 } 400 401 static inline void sem_unlock(struct sem_array *sma, int locknum) 402 { 403 if (locknum == SEM_GLOBAL_LOCK) { 404 unmerge_queues(sma); 405 complexmode_tryleave(sma); 406 ipc_unlock_object(&sma->sem_perm); 407 } else { 408 struct sem *sem = &sma->sems[locknum]; 409 spin_unlock(&sem->lock); 410 } 411 } 412 413 /* 414 * sem_lock_(check_) routines are called in the paths where the rwsem 415 * is not held. 416 * 417 * The caller holds the RCU read lock. 418 */ 419 static inline struct sem_array *sem_obtain_object(struct ipc_namespace *ns, int id) 420 { 421 struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&sem_ids(ns), id); 422 423 if (IS_ERR(ipcp)) 424 return ERR_CAST(ipcp); 425 426 return container_of(ipcp, struct sem_array, sem_perm); 427 } 428 429 static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns, 430 int id) 431 { 432 struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&sem_ids(ns), id); 433 434 if (IS_ERR(ipcp)) 435 return ERR_CAST(ipcp); 436 437 return container_of(ipcp, struct sem_array, sem_perm); 438 } 439 440 static inline void sem_lock_and_putref(struct sem_array *sma) 441 { 442 sem_lock(sma, NULL, -1); 443 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 444 } 445 446 static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) 447 { 448 ipc_rmid(&sem_ids(ns), &s->sem_perm); 449 } 450 451 static struct sem_array *sem_alloc(size_t nsems) 452 { 453 struct sem_array *sma; 454 size_t size; 455 456 if (nsems > (INT_MAX - sizeof(*sma)) / sizeof(sma->sems[0])) 457 return NULL; 458 459 size = sizeof(*sma) + nsems * sizeof(sma->sems[0]); 460 sma = kvmalloc(size, GFP_KERNEL); 461 if (unlikely(!sma)) 462 return NULL; 463 464 memset(sma, 0, size); 465 466 return sma; 467 } 468 469 /** 470 * newary - Create a new semaphore set 471 * @ns: namespace 472 * @params: ptr to the structure that contains key, semflg and nsems 473 * 474 * Called with sem_ids.rwsem held (as a writer) 475 */ 476 static int newary(struct ipc_namespace *ns, struct ipc_params *params) 477 { 478 int retval; 479 struct sem_array *sma; 480 key_t key = params->key; 481 int nsems = params->u.nsems; 482 int semflg = params->flg; 483 int i; 484 485 if (!nsems) 486 return -EINVAL; 487 if (ns->used_sems + nsems > ns->sc_semmns) 488 return -ENOSPC; 489 490 sma = sem_alloc(nsems); 491 if (!sma) 492 return -ENOMEM; 493 494 sma->sem_perm.mode = (semflg & S_IRWXUGO); 495 sma->sem_perm.key = key; 496 497 sma->sem_perm.security = NULL; 498 retval = security_sem_alloc(sma); 499 if (retval) { 500 kvfree(sma); 501 return retval; 502 } 503 504 for (i = 0; i < nsems; i++) { 505 INIT_LIST_HEAD(&sma->sems[i].pending_alter); 506 INIT_LIST_HEAD(&sma->sems[i].pending_const); 507 spin_lock_init(&sma->sems[i].lock); 508 } 509 510 sma->complex_count = 0; 511 sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; 512 INIT_LIST_HEAD(&sma->pending_alter); 513 INIT_LIST_HEAD(&sma->pending_const); 514 INIT_LIST_HEAD(&sma->list_id); 515 sma->sem_nsems = nsems; 516 sma->sem_ctime = ktime_get_real_seconds(); 517 518 /* ipc_addid() locks sma upon success. */ 519 retval = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); 520 if (retval < 0) { 521 call_rcu(&sma->sem_perm.rcu, sem_rcu_free); 522 return retval; 523 } 524 ns->used_sems += nsems; 525 526 sem_unlock(sma, -1); 527 rcu_read_unlock(); 528 529 return sma->sem_perm.id; 530 } 531 532 533 /* 534 * Called with sem_ids.rwsem and ipcp locked. 535 */ 536 static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg) 537 { 538 struct sem_array *sma; 539 540 sma = container_of(ipcp, struct sem_array, sem_perm); 541 return security_sem_associate(sma, semflg); 542 } 543 544 /* 545 * Called with sem_ids.rwsem and ipcp locked. 546 */ 547 static inline int sem_more_checks(struct kern_ipc_perm *ipcp, 548 struct ipc_params *params) 549 { 550 struct sem_array *sma; 551 552 sma = container_of(ipcp, struct sem_array, sem_perm); 553 if (params->u.nsems > sma->sem_nsems) 554 return -EINVAL; 555 556 return 0; 557 } 558 559 long ksys_semget(key_t key, int nsems, int semflg) 560 { 561 struct ipc_namespace *ns; 562 static const struct ipc_ops sem_ops = { 563 .getnew = newary, 564 .associate = sem_security, 565 .more_checks = sem_more_checks, 566 }; 567 struct ipc_params sem_params; 568 569 ns = current->nsproxy->ipc_ns; 570 571 if (nsems < 0 || nsems > ns->sc_semmsl) 572 return -EINVAL; 573 574 sem_params.key = key; 575 sem_params.flg = semflg; 576 sem_params.u.nsems = nsems; 577 578 return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params); 579 } 580 581 SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg) 582 { 583 return ksys_semget(key, nsems, semflg); 584 } 585 586 /** 587 * perform_atomic_semop[_slow] - Attempt to perform semaphore 588 * operations on a given array. 589 * @sma: semaphore array 590 * @q: struct sem_queue that describes the operation 591 * 592 * Caller blocking are as follows, based the value 593 * indicated by the semaphore operation (sem_op): 594 * 595 * (1) >0 never blocks. 596 * (2) 0 (wait-for-zero operation): semval is non-zero. 597 * (3) <0 attempting to decrement semval to a value smaller than zero. 598 * 599 * Returns 0 if the operation was possible. 600 * Returns 1 if the operation is impossible, the caller must sleep. 601 * Returns <0 for error codes. 602 */ 603 static int perform_atomic_semop_slow(struct sem_array *sma, struct sem_queue *q) 604 { 605 int result, sem_op, nsops, pid; 606 struct sembuf *sop; 607 struct sem *curr; 608 struct sembuf *sops; 609 struct sem_undo *un; 610 611 sops = q->sops; 612 nsops = q->nsops; 613 un = q->undo; 614 615 for (sop = sops; sop < sops + nsops; sop++) { 616 curr = &sma->sems[sop->sem_num]; 617 sem_op = sop->sem_op; 618 result = curr->semval; 619 620 if (!sem_op && result) 621 goto would_block; 622 623 result += sem_op; 624 if (result < 0) 625 goto would_block; 626 if (result > SEMVMX) 627 goto out_of_range; 628 629 if (sop->sem_flg & SEM_UNDO) { 630 int undo = un->semadj[sop->sem_num] - sem_op; 631 /* Exceeding the undo range is an error. */ 632 if (undo < (-SEMAEM - 1) || undo > SEMAEM) 633 goto out_of_range; 634 un->semadj[sop->sem_num] = undo; 635 } 636 637 curr->semval = result; 638 } 639 640 sop--; 641 pid = q->pid; 642 while (sop >= sops) { 643 sma->sems[sop->sem_num].sempid = pid; 644 sop--; 645 } 646 647 return 0; 648 649 out_of_range: 650 result = -ERANGE; 651 goto undo; 652 653 would_block: 654 q->blocking = sop; 655 656 if (sop->sem_flg & IPC_NOWAIT) 657 result = -EAGAIN; 658 else 659 result = 1; 660 661 undo: 662 sop--; 663 while (sop >= sops) { 664 sem_op = sop->sem_op; 665 sma->sems[sop->sem_num].semval -= sem_op; 666 if (sop->sem_flg & SEM_UNDO) 667 un->semadj[sop->sem_num] += sem_op; 668 sop--; 669 } 670 671 return result; 672 } 673 674 static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q) 675 { 676 int result, sem_op, nsops; 677 struct sembuf *sop; 678 struct sem *curr; 679 struct sembuf *sops; 680 struct sem_undo *un; 681 682 sops = q->sops; 683 nsops = q->nsops; 684 un = q->undo; 685 686 if (unlikely(q->dupsop)) 687 return perform_atomic_semop_slow(sma, q); 688 689 /* 690 * We scan the semaphore set twice, first to ensure that the entire 691 * operation can succeed, therefore avoiding any pointless writes 692 * to shared memory and having to undo such changes in order to block 693 * until the operations can go through. 694 */ 695 for (sop = sops; sop < sops + nsops; sop++) { 696 curr = &sma->sems[sop->sem_num]; 697 sem_op = sop->sem_op; 698 result = curr->semval; 699 700 if (!sem_op && result) 701 goto would_block; /* wait-for-zero */ 702 703 result += sem_op; 704 if (result < 0) 705 goto would_block; 706 707 if (result > SEMVMX) 708 return -ERANGE; 709 710 if (sop->sem_flg & SEM_UNDO) { 711 int undo = un->semadj[sop->sem_num] - sem_op; 712 713 /* Exceeding the undo range is an error. */ 714 if (undo < (-SEMAEM - 1) || undo > SEMAEM) 715 return -ERANGE; 716 } 717 } 718 719 for (sop = sops; sop < sops + nsops; sop++) { 720 curr = &sma->sems[sop->sem_num]; 721 sem_op = sop->sem_op; 722 result = curr->semval; 723 724 if (sop->sem_flg & SEM_UNDO) { 725 int undo = un->semadj[sop->sem_num] - sem_op; 726 727 un->semadj[sop->sem_num] = undo; 728 } 729 curr->semval += sem_op; 730 curr->sempid = q->pid; 731 } 732 733 return 0; 734 735 would_block: 736 q->blocking = sop; 737 return sop->sem_flg & IPC_NOWAIT ? -EAGAIN : 1; 738 } 739 740 static inline void wake_up_sem_queue_prepare(struct sem_queue *q, int error, 741 struct wake_q_head *wake_q) 742 { 743 wake_q_add(wake_q, q->sleeper); 744 /* 745 * Rely on the above implicit barrier, such that we can 746 * ensure that we hold reference to the task before setting 747 * q->status. Otherwise we could race with do_exit if the 748 * task is awoken by an external event before calling 749 * wake_up_process(). 750 */ 751 WRITE_ONCE(q->status, error); 752 } 753 754 static void unlink_queue(struct sem_array *sma, struct sem_queue *q) 755 { 756 list_del(&q->list); 757 if (q->nsops > 1) 758 sma->complex_count--; 759 } 760 761 /** check_restart(sma, q) 762 * @sma: semaphore array 763 * @q: the operation that just completed 764 * 765 * update_queue is O(N^2) when it restarts scanning the whole queue of 766 * waiting operations. Therefore this function checks if the restart is 767 * really necessary. It is called after a previously waiting operation 768 * modified the array. 769 * Note that wait-for-zero operations are handled without restart. 770 */ 771 static inline int check_restart(struct sem_array *sma, struct sem_queue *q) 772 { 773 /* pending complex alter operations are too difficult to analyse */ 774 if (!list_empty(&sma->pending_alter)) 775 return 1; 776 777 /* we were a sleeping complex operation. Too difficult */ 778 if (q->nsops > 1) 779 return 1; 780 781 /* It is impossible that someone waits for the new value: 782 * - complex operations always restart. 783 * - wait-for-zero are handled seperately. 784 * - q is a previously sleeping simple operation that 785 * altered the array. It must be a decrement, because 786 * simple increments never sleep. 787 * - If there are older (higher priority) decrements 788 * in the queue, then they have observed the original 789 * semval value and couldn't proceed. The operation 790 * decremented to value - thus they won't proceed either. 791 */ 792 return 0; 793 } 794 795 /** 796 * wake_const_ops - wake up non-alter tasks 797 * @sma: semaphore array. 798 * @semnum: semaphore that was modified. 799 * @wake_q: lockless wake-queue head. 800 * 801 * wake_const_ops must be called after a semaphore in a semaphore array 802 * was set to 0. If complex const operations are pending, wake_const_ops must 803 * be called with semnum = -1, as well as with the number of each modified 804 * semaphore. 805 * The tasks that must be woken up are added to @wake_q. The return code 806 * is stored in q->pid. 807 * The function returns 1 if at least one operation was completed successfully. 808 */ 809 static int wake_const_ops(struct sem_array *sma, int semnum, 810 struct wake_q_head *wake_q) 811 { 812 struct sem_queue *q, *tmp; 813 struct list_head *pending_list; 814 int semop_completed = 0; 815 816 if (semnum == -1) 817 pending_list = &sma->pending_const; 818 else 819 pending_list = &sma->sems[semnum].pending_const; 820 821 list_for_each_entry_safe(q, tmp, pending_list, list) { 822 int error = perform_atomic_semop(sma, q); 823 824 if (error > 0) 825 continue; 826 /* operation completed, remove from queue & wakeup */ 827 unlink_queue(sma, q); 828 829 wake_up_sem_queue_prepare(q, error, wake_q); 830 if (error == 0) 831 semop_completed = 1; 832 } 833 834 return semop_completed; 835 } 836 837 /** 838 * do_smart_wakeup_zero - wakeup all wait for zero tasks 839 * @sma: semaphore array 840 * @sops: operations that were performed 841 * @nsops: number of operations 842 * @wake_q: lockless wake-queue head 843 * 844 * Checks all required queue for wait-for-zero operations, based 845 * on the actual changes that were performed on the semaphore array. 846 * The function returns 1 if at least one operation was completed successfully. 847 */ 848 static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops, 849 int nsops, struct wake_q_head *wake_q) 850 { 851 int i; 852 int semop_completed = 0; 853 int got_zero = 0; 854 855 /* first: the per-semaphore queues, if known */ 856 if (sops) { 857 for (i = 0; i < nsops; i++) { 858 int num = sops[i].sem_num; 859 860 if (sma->sems[num].semval == 0) { 861 got_zero = 1; 862 semop_completed |= wake_const_ops(sma, num, wake_q); 863 } 864 } 865 } else { 866 /* 867 * No sops means modified semaphores not known. 868 * Assume all were changed. 869 */ 870 for (i = 0; i < sma->sem_nsems; i++) { 871 if (sma->sems[i].semval == 0) { 872 got_zero = 1; 873 semop_completed |= wake_const_ops(sma, i, wake_q); 874 } 875 } 876 } 877 /* 878 * If one of the modified semaphores got 0, 879 * then check the global queue, too. 880 */ 881 if (got_zero) 882 semop_completed |= wake_const_ops(sma, -1, wake_q); 883 884 return semop_completed; 885 } 886 887 888 /** 889 * update_queue - look for tasks that can be completed. 890 * @sma: semaphore array. 891 * @semnum: semaphore that was modified. 892 * @wake_q: lockless wake-queue head. 893 * 894 * update_queue must be called after a semaphore in a semaphore array 895 * was modified. If multiple semaphores were modified, update_queue must 896 * be called with semnum = -1, as well as with the number of each modified 897 * semaphore. 898 * The tasks that must be woken up are added to @wake_q. The return code 899 * is stored in q->pid. 900 * The function internally checks if const operations can now succeed. 901 * 902 * The function return 1 if at least one semop was completed successfully. 903 */ 904 static int update_queue(struct sem_array *sma, int semnum, struct wake_q_head *wake_q) 905 { 906 struct sem_queue *q, *tmp; 907 struct list_head *pending_list; 908 int semop_completed = 0; 909 910 if (semnum == -1) 911 pending_list = &sma->pending_alter; 912 else 913 pending_list = &sma->sems[semnum].pending_alter; 914 915 again: 916 list_for_each_entry_safe(q, tmp, pending_list, list) { 917 int error, restart; 918 919 /* If we are scanning the single sop, per-semaphore list of 920 * one semaphore and that semaphore is 0, then it is not 921 * necessary to scan further: simple increments 922 * that affect only one entry succeed immediately and cannot 923 * be in the per semaphore pending queue, and decrements 924 * cannot be successful if the value is already 0. 925 */ 926 if (semnum != -1 && sma->sems[semnum].semval == 0) 927 break; 928 929 error = perform_atomic_semop(sma, q); 930 931 /* Does q->sleeper still need to sleep? */ 932 if (error > 0) 933 continue; 934 935 unlink_queue(sma, q); 936 937 if (error) { 938 restart = 0; 939 } else { 940 semop_completed = 1; 941 do_smart_wakeup_zero(sma, q->sops, q->nsops, wake_q); 942 restart = check_restart(sma, q); 943 } 944 945 wake_up_sem_queue_prepare(q, error, wake_q); 946 if (restart) 947 goto again; 948 } 949 return semop_completed; 950 } 951 952 /** 953 * set_semotime - set sem_otime 954 * @sma: semaphore array 955 * @sops: operations that modified the array, may be NULL 956 * 957 * sem_otime is replicated to avoid cache line trashing. 958 * This function sets one instance to the current time. 959 */ 960 static void set_semotime(struct sem_array *sma, struct sembuf *sops) 961 { 962 if (sops == NULL) { 963 sma->sems[0].sem_otime = get_seconds(); 964 } else { 965 sma->sems[sops[0].sem_num].sem_otime = 966 get_seconds(); 967 } 968 } 969 970 /** 971 * do_smart_update - optimized update_queue 972 * @sma: semaphore array 973 * @sops: operations that were performed 974 * @nsops: number of operations 975 * @otime: force setting otime 976 * @wake_q: lockless wake-queue head 977 * 978 * do_smart_update() does the required calls to update_queue and wakeup_zero, 979 * based on the actual changes that were performed on the semaphore array. 980 * Note that the function does not do the actual wake-up: the caller is 981 * responsible for calling wake_up_q(). 982 * It is safe to perform this call after dropping all locks. 983 */ 984 static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsops, 985 int otime, struct wake_q_head *wake_q) 986 { 987 int i; 988 989 otime |= do_smart_wakeup_zero(sma, sops, nsops, wake_q); 990 991 if (!list_empty(&sma->pending_alter)) { 992 /* semaphore array uses the global queue - just process it. */ 993 otime |= update_queue(sma, -1, wake_q); 994 } else { 995 if (!sops) { 996 /* 997 * No sops, thus the modified semaphores are not 998 * known. Check all. 999 */ 1000 for (i = 0; i < sma->sem_nsems; i++) 1001 otime |= update_queue(sma, i, wake_q); 1002 } else { 1003 /* 1004 * Check the semaphores that were increased: 1005 * - No complex ops, thus all sleeping ops are 1006 * decrease. 1007 * - if we decreased the value, then any sleeping 1008 * semaphore ops wont be able to run: If the 1009 * previous value was too small, then the new 1010 * value will be too small, too. 1011 */ 1012 for (i = 0; i < nsops; i++) { 1013 if (sops[i].sem_op > 0) { 1014 otime |= update_queue(sma, 1015 sops[i].sem_num, wake_q); 1016 } 1017 } 1018 } 1019 } 1020 if (otime) 1021 set_semotime(sma, sops); 1022 } 1023 1024 /* 1025 * check_qop: Test if a queued operation sleeps on the semaphore semnum 1026 */ 1027 static int check_qop(struct sem_array *sma, int semnum, struct sem_queue *q, 1028 bool count_zero) 1029 { 1030 struct sembuf *sop = q->blocking; 1031 1032 /* 1033 * Linux always (since 0.99.10) reported a task as sleeping on all 1034 * semaphores. This violates SUS, therefore it was changed to the 1035 * standard compliant behavior. 1036 * Give the administrators a chance to notice that an application 1037 * might misbehave because it relies on the Linux behavior. 1038 */ 1039 pr_info_once("semctl(GETNCNT/GETZCNT) is since 3.16 Single Unix Specification compliant.\n" 1040 "The task %s (%d) triggered the difference, watch for misbehavior.\n", 1041 current->comm, task_pid_nr(current)); 1042 1043 if (sop->sem_num != semnum) 1044 return 0; 1045 1046 if (count_zero && sop->sem_op == 0) 1047 return 1; 1048 if (!count_zero && sop->sem_op < 0) 1049 return 1; 1050 1051 return 0; 1052 } 1053 1054 /* The following counts are associated to each semaphore: 1055 * semncnt number of tasks waiting on semval being nonzero 1056 * semzcnt number of tasks waiting on semval being zero 1057 * 1058 * Per definition, a task waits only on the semaphore of the first semop 1059 * that cannot proceed, even if additional operation would block, too. 1060 */ 1061 static int count_semcnt(struct sem_array *sma, ushort semnum, 1062 bool count_zero) 1063 { 1064 struct list_head *l; 1065 struct sem_queue *q; 1066 int semcnt; 1067 1068 semcnt = 0; 1069 /* First: check the simple operations. They are easy to evaluate */ 1070 if (count_zero) 1071 l = &sma->sems[semnum].pending_const; 1072 else 1073 l = &sma->sems[semnum].pending_alter; 1074 1075 list_for_each_entry(q, l, list) { 1076 /* all task on a per-semaphore list sleep on exactly 1077 * that semaphore 1078 */ 1079 semcnt++; 1080 } 1081 1082 /* Then: check the complex operations. */ 1083 list_for_each_entry(q, &sma->pending_alter, list) { 1084 semcnt += check_qop(sma, semnum, q, count_zero); 1085 } 1086 if (count_zero) { 1087 list_for_each_entry(q, &sma->pending_const, list) { 1088 semcnt += check_qop(sma, semnum, q, count_zero); 1089 } 1090 } 1091 return semcnt; 1092 } 1093 1094 /* Free a semaphore set. freeary() is called with sem_ids.rwsem locked 1095 * as a writer and the spinlock for this semaphore set hold. sem_ids.rwsem 1096 * remains locked on exit. 1097 */ 1098 static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 1099 { 1100 struct sem_undo *un, *tu; 1101 struct sem_queue *q, *tq; 1102 struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); 1103 int i; 1104 DEFINE_WAKE_Q(wake_q); 1105 1106 /* Free the existing undo structures for this semaphore set. */ 1107 ipc_assert_locked_object(&sma->sem_perm); 1108 list_for_each_entry_safe(un, tu, &sma->list_id, list_id) { 1109 list_del(&un->list_id); 1110 spin_lock(&un->ulp->lock); 1111 un->semid = -1; 1112 list_del_rcu(&un->list_proc); 1113 spin_unlock(&un->ulp->lock); 1114 kfree_rcu(un, rcu); 1115 } 1116 1117 /* Wake up all pending processes and let them fail with EIDRM. */ 1118 list_for_each_entry_safe(q, tq, &sma->pending_const, list) { 1119 unlink_queue(sma, q); 1120 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1121 } 1122 1123 list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { 1124 unlink_queue(sma, q); 1125 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1126 } 1127 for (i = 0; i < sma->sem_nsems; i++) { 1128 struct sem *sem = &sma->sems[i]; 1129 list_for_each_entry_safe(q, tq, &sem->pending_const, list) { 1130 unlink_queue(sma, q); 1131 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1132 } 1133 list_for_each_entry_safe(q, tq, &sem->pending_alter, list) { 1134 unlink_queue(sma, q); 1135 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1136 } 1137 } 1138 1139 /* Remove the semaphore set from the IDR */ 1140 sem_rmid(ns, sma); 1141 sem_unlock(sma, -1); 1142 rcu_read_unlock(); 1143 1144 wake_up_q(&wake_q); 1145 ns->used_sems -= sma->sem_nsems; 1146 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1147 } 1148 1149 static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) 1150 { 1151 switch (version) { 1152 case IPC_64: 1153 return copy_to_user(buf, in, sizeof(*in)); 1154 case IPC_OLD: 1155 { 1156 struct semid_ds out; 1157 1158 memset(&out, 0, sizeof(out)); 1159 1160 ipc64_perm_to_ipc_perm(&in->sem_perm, &out.sem_perm); 1161 1162 out.sem_otime = in->sem_otime; 1163 out.sem_ctime = in->sem_ctime; 1164 out.sem_nsems = in->sem_nsems; 1165 1166 return copy_to_user(buf, &out, sizeof(out)); 1167 } 1168 default: 1169 return -EINVAL; 1170 } 1171 } 1172 1173 static time64_t get_semotime(struct sem_array *sma) 1174 { 1175 int i; 1176 time64_t res; 1177 1178 res = sma->sems[0].sem_otime; 1179 for (i = 1; i < sma->sem_nsems; i++) { 1180 time64_t to = sma->sems[i].sem_otime; 1181 1182 if (to > res) 1183 res = to; 1184 } 1185 return res; 1186 } 1187 1188 static int semctl_stat(struct ipc_namespace *ns, int semid, 1189 int cmd, struct semid64_ds *semid64) 1190 { 1191 struct sem_array *sma; 1192 int id = 0; 1193 int err; 1194 1195 memset(semid64, 0, sizeof(*semid64)); 1196 1197 rcu_read_lock(); 1198 if (cmd == SEM_STAT) { 1199 sma = sem_obtain_object(ns, semid); 1200 if (IS_ERR(sma)) { 1201 err = PTR_ERR(sma); 1202 goto out_unlock; 1203 } 1204 id = sma->sem_perm.id; 1205 } else { 1206 sma = sem_obtain_object_check(ns, semid); 1207 if (IS_ERR(sma)) { 1208 err = PTR_ERR(sma); 1209 goto out_unlock; 1210 } 1211 } 1212 1213 err = -EACCES; 1214 if (ipcperms(ns, &sma->sem_perm, S_IRUGO)) 1215 goto out_unlock; 1216 1217 err = security_sem_semctl(sma, cmd); 1218 if (err) 1219 goto out_unlock; 1220 1221 ipc_lock_object(&sma->sem_perm); 1222 1223 if (!ipc_valid_object(&sma->sem_perm)) { 1224 ipc_unlock_object(&sma->sem_perm); 1225 err = -EIDRM; 1226 goto out_unlock; 1227 } 1228 1229 kernel_to_ipc64_perm(&sma->sem_perm, &semid64->sem_perm); 1230 semid64->sem_otime = get_semotime(sma); 1231 semid64->sem_ctime = sma->sem_ctime; 1232 semid64->sem_nsems = sma->sem_nsems; 1233 1234 ipc_unlock_object(&sma->sem_perm); 1235 rcu_read_unlock(); 1236 return id; 1237 1238 out_unlock: 1239 rcu_read_unlock(); 1240 return err; 1241 } 1242 1243 static int semctl_info(struct ipc_namespace *ns, int semid, 1244 int cmd, void __user *p) 1245 { 1246 struct seminfo seminfo; 1247 int max_id; 1248 int err; 1249 1250 err = security_sem_semctl(NULL, cmd); 1251 if (err) 1252 return err; 1253 1254 memset(&seminfo, 0, sizeof(seminfo)); 1255 seminfo.semmni = ns->sc_semmni; 1256 seminfo.semmns = ns->sc_semmns; 1257 seminfo.semmsl = ns->sc_semmsl; 1258 seminfo.semopm = ns->sc_semopm; 1259 seminfo.semvmx = SEMVMX; 1260 seminfo.semmnu = SEMMNU; 1261 seminfo.semmap = SEMMAP; 1262 seminfo.semume = SEMUME; 1263 down_read(&sem_ids(ns).rwsem); 1264 if (cmd == SEM_INFO) { 1265 seminfo.semusz = sem_ids(ns).in_use; 1266 seminfo.semaem = ns->used_sems; 1267 } else { 1268 seminfo.semusz = SEMUSZ; 1269 seminfo.semaem = SEMAEM; 1270 } 1271 max_id = ipc_get_maxid(&sem_ids(ns)); 1272 up_read(&sem_ids(ns).rwsem); 1273 if (copy_to_user(p, &seminfo, sizeof(struct seminfo))) 1274 return -EFAULT; 1275 return (max_id < 0) ? 0 : max_id; 1276 } 1277 1278 static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, 1279 int val) 1280 { 1281 struct sem_undo *un; 1282 struct sem_array *sma; 1283 struct sem *curr; 1284 int err; 1285 DEFINE_WAKE_Q(wake_q); 1286 1287 if (val > SEMVMX || val < 0) 1288 return -ERANGE; 1289 1290 rcu_read_lock(); 1291 sma = sem_obtain_object_check(ns, semid); 1292 if (IS_ERR(sma)) { 1293 rcu_read_unlock(); 1294 return PTR_ERR(sma); 1295 } 1296 1297 if (semnum < 0 || semnum >= sma->sem_nsems) { 1298 rcu_read_unlock(); 1299 return -EINVAL; 1300 } 1301 1302 1303 if (ipcperms(ns, &sma->sem_perm, S_IWUGO)) { 1304 rcu_read_unlock(); 1305 return -EACCES; 1306 } 1307 1308 err = security_sem_semctl(sma, SETVAL); 1309 if (err) { 1310 rcu_read_unlock(); 1311 return -EACCES; 1312 } 1313 1314 sem_lock(sma, NULL, -1); 1315 1316 if (!ipc_valid_object(&sma->sem_perm)) { 1317 sem_unlock(sma, -1); 1318 rcu_read_unlock(); 1319 return -EIDRM; 1320 } 1321 1322 curr = &sma->sems[semnum]; 1323 1324 ipc_assert_locked_object(&sma->sem_perm); 1325 list_for_each_entry(un, &sma->list_id, list_id) 1326 un->semadj[semnum] = 0; 1327 1328 curr->semval = val; 1329 curr->sempid = task_tgid_vnr(current); 1330 sma->sem_ctime = ktime_get_real_seconds(); 1331 /* maybe some queued-up processes were waiting for this */ 1332 do_smart_update(sma, NULL, 0, 0, &wake_q); 1333 sem_unlock(sma, -1); 1334 rcu_read_unlock(); 1335 wake_up_q(&wake_q); 1336 return 0; 1337 } 1338 1339 static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, 1340 int cmd, void __user *p) 1341 { 1342 struct sem_array *sma; 1343 struct sem *curr; 1344 int err, nsems; 1345 ushort fast_sem_io[SEMMSL_FAST]; 1346 ushort *sem_io = fast_sem_io; 1347 DEFINE_WAKE_Q(wake_q); 1348 1349 rcu_read_lock(); 1350 sma = sem_obtain_object_check(ns, semid); 1351 if (IS_ERR(sma)) { 1352 rcu_read_unlock(); 1353 return PTR_ERR(sma); 1354 } 1355 1356 nsems = sma->sem_nsems; 1357 1358 err = -EACCES; 1359 if (ipcperms(ns, &sma->sem_perm, cmd == SETALL ? S_IWUGO : S_IRUGO)) 1360 goto out_rcu_wakeup; 1361 1362 err = security_sem_semctl(sma, cmd); 1363 if (err) 1364 goto out_rcu_wakeup; 1365 1366 err = -EACCES; 1367 switch (cmd) { 1368 case GETALL: 1369 { 1370 ushort __user *array = p; 1371 int i; 1372 1373 sem_lock(sma, NULL, -1); 1374 if (!ipc_valid_object(&sma->sem_perm)) { 1375 err = -EIDRM; 1376 goto out_unlock; 1377 } 1378 if (nsems > SEMMSL_FAST) { 1379 if (!ipc_rcu_getref(&sma->sem_perm)) { 1380 err = -EIDRM; 1381 goto out_unlock; 1382 } 1383 sem_unlock(sma, -1); 1384 rcu_read_unlock(); 1385 sem_io = kvmalloc_array(nsems, sizeof(ushort), 1386 GFP_KERNEL); 1387 if (sem_io == NULL) { 1388 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1389 return -ENOMEM; 1390 } 1391 1392 rcu_read_lock(); 1393 sem_lock_and_putref(sma); 1394 if (!ipc_valid_object(&sma->sem_perm)) { 1395 err = -EIDRM; 1396 goto out_unlock; 1397 } 1398 } 1399 for (i = 0; i < sma->sem_nsems; i++) 1400 sem_io[i] = sma->sems[i].semval; 1401 sem_unlock(sma, -1); 1402 rcu_read_unlock(); 1403 err = 0; 1404 if (copy_to_user(array, sem_io, nsems*sizeof(ushort))) 1405 err = -EFAULT; 1406 goto out_free; 1407 } 1408 case SETALL: 1409 { 1410 int i; 1411 struct sem_undo *un; 1412 1413 if (!ipc_rcu_getref(&sma->sem_perm)) { 1414 err = -EIDRM; 1415 goto out_rcu_wakeup; 1416 } 1417 rcu_read_unlock(); 1418 1419 if (nsems > SEMMSL_FAST) { 1420 sem_io = kvmalloc_array(nsems, sizeof(ushort), 1421 GFP_KERNEL); 1422 if (sem_io == NULL) { 1423 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1424 return -ENOMEM; 1425 } 1426 } 1427 1428 if (copy_from_user(sem_io, p, nsems*sizeof(ushort))) { 1429 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1430 err = -EFAULT; 1431 goto out_free; 1432 } 1433 1434 for (i = 0; i < nsems; i++) { 1435 if (sem_io[i] > SEMVMX) { 1436 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1437 err = -ERANGE; 1438 goto out_free; 1439 } 1440 } 1441 rcu_read_lock(); 1442 sem_lock_and_putref(sma); 1443 if (!ipc_valid_object(&sma->sem_perm)) { 1444 err = -EIDRM; 1445 goto out_unlock; 1446 } 1447 1448 for (i = 0; i < nsems; i++) { 1449 sma->sems[i].semval = sem_io[i]; 1450 sma->sems[i].sempid = task_tgid_vnr(current); 1451 } 1452 1453 ipc_assert_locked_object(&sma->sem_perm); 1454 list_for_each_entry(un, &sma->list_id, list_id) { 1455 for (i = 0; i < nsems; i++) 1456 un->semadj[i] = 0; 1457 } 1458 sma->sem_ctime = ktime_get_real_seconds(); 1459 /* maybe some queued-up processes were waiting for this */ 1460 do_smart_update(sma, NULL, 0, 0, &wake_q); 1461 err = 0; 1462 goto out_unlock; 1463 } 1464 /* GETVAL, GETPID, GETNCTN, GETZCNT: fall-through */ 1465 } 1466 err = -EINVAL; 1467 if (semnum < 0 || semnum >= nsems) 1468 goto out_rcu_wakeup; 1469 1470 sem_lock(sma, NULL, -1); 1471 if (!ipc_valid_object(&sma->sem_perm)) { 1472 err = -EIDRM; 1473 goto out_unlock; 1474 } 1475 curr = &sma->sems[semnum]; 1476 1477 switch (cmd) { 1478 case GETVAL: 1479 err = curr->semval; 1480 goto out_unlock; 1481 case GETPID: 1482 err = curr->sempid; 1483 goto out_unlock; 1484 case GETNCNT: 1485 err = count_semcnt(sma, semnum, 0); 1486 goto out_unlock; 1487 case GETZCNT: 1488 err = count_semcnt(sma, semnum, 1); 1489 goto out_unlock; 1490 } 1491 1492 out_unlock: 1493 sem_unlock(sma, -1); 1494 out_rcu_wakeup: 1495 rcu_read_unlock(); 1496 wake_up_q(&wake_q); 1497 out_free: 1498 if (sem_io != fast_sem_io) 1499 kvfree(sem_io); 1500 return err; 1501 } 1502 1503 static inline unsigned long 1504 copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version) 1505 { 1506 switch (version) { 1507 case IPC_64: 1508 if (copy_from_user(out, buf, sizeof(*out))) 1509 return -EFAULT; 1510 return 0; 1511 case IPC_OLD: 1512 { 1513 struct semid_ds tbuf_old; 1514 1515 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 1516 return -EFAULT; 1517 1518 out->sem_perm.uid = tbuf_old.sem_perm.uid; 1519 out->sem_perm.gid = tbuf_old.sem_perm.gid; 1520 out->sem_perm.mode = tbuf_old.sem_perm.mode; 1521 1522 return 0; 1523 } 1524 default: 1525 return -EINVAL; 1526 } 1527 } 1528 1529 /* 1530 * This function handles some semctl commands which require the rwsem 1531 * to be held in write mode. 1532 * NOTE: no locks must be held, the rwsem is taken inside this function. 1533 */ 1534 static int semctl_down(struct ipc_namespace *ns, int semid, 1535 int cmd, struct semid64_ds *semid64) 1536 { 1537 struct sem_array *sma; 1538 int err; 1539 struct kern_ipc_perm *ipcp; 1540 1541 down_write(&sem_ids(ns).rwsem); 1542 rcu_read_lock(); 1543 1544 ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd, 1545 &semid64->sem_perm, 0); 1546 if (IS_ERR(ipcp)) { 1547 err = PTR_ERR(ipcp); 1548 goto out_unlock1; 1549 } 1550 1551 sma = container_of(ipcp, struct sem_array, sem_perm); 1552 1553 err = security_sem_semctl(sma, cmd); 1554 if (err) 1555 goto out_unlock1; 1556 1557 switch (cmd) { 1558 case IPC_RMID: 1559 sem_lock(sma, NULL, -1); 1560 /* freeary unlocks the ipc object and rcu */ 1561 freeary(ns, ipcp); 1562 goto out_up; 1563 case IPC_SET: 1564 sem_lock(sma, NULL, -1); 1565 err = ipc_update_perm(&semid64->sem_perm, ipcp); 1566 if (err) 1567 goto out_unlock0; 1568 sma->sem_ctime = ktime_get_real_seconds(); 1569 break; 1570 default: 1571 err = -EINVAL; 1572 goto out_unlock1; 1573 } 1574 1575 out_unlock0: 1576 sem_unlock(sma, -1); 1577 out_unlock1: 1578 rcu_read_unlock(); 1579 out_up: 1580 up_write(&sem_ids(ns).rwsem); 1581 return err; 1582 } 1583 1584 long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg) 1585 { 1586 int version; 1587 struct ipc_namespace *ns; 1588 void __user *p = (void __user *)arg; 1589 struct semid64_ds semid64; 1590 int err; 1591 1592 if (semid < 0) 1593 return -EINVAL; 1594 1595 version = ipc_parse_version(&cmd); 1596 ns = current->nsproxy->ipc_ns; 1597 1598 switch (cmd) { 1599 case IPC_INFO: 1600 case SEM_INFO: 1601 return semctl_info(ns, semid, cmd, p); 1602 case IPC_STAT: 1603 case SEM_STAT: 1604 err = semctl_stat(ns, semid, cmd, &semid64); 1605 if (err < 0) 1606 return err; 1607 if (copy_semid_to_user(p, &semid64, version)) 1608 err = -EFAULT; 1609 return err; 1610 case GETALL: 1611 case GETVAL: 1612 case GETPID: 1613 case GETNCNT: 1614 case GETZCNT: 1615 case SETALL: 1616 return semctl_main(ns, semid, semnum, cmd, p); 1617 case SETVAL: { 1618 int val; 1619 #if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN) 1620 /* big-endian 64bit */ 1621 val = arg >> 32; 1622 #else 1623 /* 32bit or little-endian 64bit */ 1624 val = arg; 1625 #endif 1626 return semctl_setval(ns, semid, semnum, val); 1627 } 1628 case IPC_SET: 1629 if (copy_semid_from_user(&semid64, p, version)) 1630 return -EFAULT; 1631 case IPC_RMID: 1632 return semctl_down(ns, semid, cmd, &semid64); 1633 default: 1634 return -EINVAL; 1635 } 1636 } 1637 1638 SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg) 1639 { 1640 return ksys_semctl(semid, semnum, cmd, arg); 1641 } 1642 1643 #ifdef CONFIG_COMPAT 1644 1645 struct compat_semid_ds { 1646 struct compat_ipc_perm sem_perm; 1647 compat_time_t sem_otime; 1648 compat_time_t sem_ctime; 1649 compat_uptr_t sem_base; 1650 compat_uptr_t sem_pending; 1651 compat_uptr_t sem_pending_last; 1652 compat_uptr_t undo; 1653 unsigned short sem_nsems; 1654 }; 1655 1656 static int copy_compat_semid_from_user(struct semid64_ds *out, void __user *buf, 1657 int version) 1658 { 1659 memset(out, 0, sizeof(*out)); 1660 if (version == IPC_64) { 1661 struct compat_semid64_ds __user *p = buf; 1662 return get_compat_ipc64_perm(&out->sem_perm, &p->sem_perm); 1663 } else { 1664 struct compat_semid_ds __user *p = buf; 1665 return get_compat_ipc_perm(&out->sem_perm, &p->sem_perm); 1666 } 1667 } 1668 1669 static int copy_compat_semid_to_user(void __user *buf, struct semid64_ds *in, 1670 int version) 1671 { 1672 if (version == IPC_64) { 1673 struct compat_semid64_ds v; 1674 memset(&v, 0, sizeof(v)); 1675 to_compat_ipc64_perm(&v.sem_perm, &in->sem_perm); 1676 v.sem_otime = in->sem_otime; 1677 v.sem_ctime = in->sem_ctime; 1678 v.sem_nsems = in->sem_nsems; 1679 return copy_to_user(buf, &v, sizeof(v)); 1680 } else { 1681 struct compat_semid_ds v; 1682 memset(&v, 0, sizeof(v)); 1683 to_compat_ipc_perm(&v.sem_perm, &in->sem_perm); 1684 v.sem_otime = in->sem_otime; 1685 v.sem_ctime = in->sem_ctime; 1686 v.sem_nsems = in->sem_nsems; 1687 return copy_to_user(buf, &v, sizeof(v)); 1688 } 1689 } 1690 1691 long compat_ksys_semctl(int semid, int semnum, int cmd, int arg) 1692 { 1693 void __user *p = compat_ptr(arg); 1694 struct ipc_namespace *ns; 1695 struct semid64_ds semid64; 1696 int version = compat_ipc_parse_version(&cmd); 1697 int err; 1698 1699 ns = current->nsproxy->ipc_ns; 1700 1701 if (semid < 0) 1702 return -EINVAL; 1703 1704 switch (cmd & (~IPC_64)) { 1705 case IPC_INFO: 1706 case SEM_INFO: 1707 return semctl_info(ns, semid, cmd, p); 1708 case IPC_STAT: 1709 case SEM_STAT: 1710 err = semctl_stat(ns, semid, cmd, &semid64); 1711 if (err < 0) 1712 return err; 1713 if (copy_compat_semid_to_user(p, &semid64, version)) 1714 err = -EFAULT; 1715 return err; 1716 case GETVAL: 1717 case GETPID: 1718 case GETNCNT: 1719 case GETZCNT: 1720 case GETALL: 1721 case SETALL: 1722 return semctl_main(ns, semid, semnum, cmd, p); 1723 case SETVAL: 1724 return semctl_setval(ns, semid, semnum, arg); 1725 case IPC_SET: 1726 if (copy_compat_semid_from_user(&semid64, p, version)) 1727 return -EFAULT; 1728 /* fallthru */ 1729 case IPC_RMID: 1730 return semctl_down(ns, semid, cmd, &semid64); 1731 default: 1732 return -EINVAL; 1733 } 1734 } 1735 1736 COMPAT_SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, int, arg) 1737 { 1738 return compat_ksys_semctl(semid, semnum, cmd, arg); 1739 } 1740 #endif 1741 1742 /* If the task doesn't already have a undo_list, then allocate one 1743 * here. We guarantee there is only one thread using this undo list, 1744 * and current is THE ONE 1745 * 1746 * If this allocation and assignment succeeds, but later 1747 * portions of this code fail, there is no need to free the sem_undo_list. 1748 * Just let it stay associated with the task, and it'll be freed later 1749 * at exit time. 1750 * 1751 * This can block, so callers must hold no locks. 1752 */ 1753 static inline int get_undo_list(struct sem_undo_list **undo_listp) 1754 { 1755 struct sem_undo_list *undo_list; 1756 1757 undo_list = current->sysvsem.undo_list; 1758 if (!undo_list) { 1759 undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL); 1760 if (undo_list == NULL) 1761 return -ENOMEM; 1762 spin_lock_init(&undo_list->lock); 1763 refcount_set(&undo_list->refcnt, 1); 1764 INIT_LIST_HEAD(&undo_list->list_proc); 1765 1766 current->sysvsem.undo_list = undo_list; 1767 } 1768 *undo_listp = undo_list; 1769 return 0; 1770 } 1771 1772 static struct sem_undo *__lookup_undo(struct sem_undo_list *ulp, int semid) 1773 { 1774 struct sem_undo *un; 1775 1776 list_for_each_entry_rcu(un, &ulp->list_proc, list_proc) { 1777 if (un->semid == semid) 1778 return un; 1779 } 1780 return NULL; 1781 } 1782 1783 static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid) 1784 { 1785 struct sem_undo *un; 1786 1787 assert_spin_locked(&ulp->lock); 1788 1789 un = __lookup_undo(ulp, semid); 1790 if (un) { 1791 list_del_rcu(&un->list_proc); 1792 list_add_rcu(&un->list_proc, &ulp->list_proc); 1793 } 1794 return un; 1795 } 1796 1797 /** 1798 * find_alloc_undo - lookup (and if not present create) undo array 1799 * @ns: namespace 1800 * @semid: semaphore array id 1801 * 1802 * The function looks up (and if not present creates) the undo structure. 1803 * The size of the undo structure depends on the size of the semaphore 1804 * array, thus the alloc path is not that straightforward. 1805 * Lifetime-rules: sem_undo is rcu-protected, on success, the function 1806 * performs a rcu_read_lock(). 1807 */ 1808 static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) 1809 { 1810 struct sem_array *sma; 1811 struct sem_undo_list *ulp; 1812 struct sem_undo *un, *new; 1813 int nsems, error; 1814 1815 error = get_undo_list(&ulp); 1816 if (error) 1817 return ERR_PTR(error); 1818 1819 rcu_read_lock(); 1820 spin_lock(&ulp->lock); 1821 un = lookup_undo(ulp, semid); 1822 spin_unlock(&ulp->lock); 1823 if (likely(un != NULL)) 1824 goto out; 1825 1826 /* no undo structure around - allocate one. */ 1827 /* step 1: figure out the size of the semaphore array */ 1828 sma = sem_obtain_object_check(ns, semid); 1829 if (IS_ERR(sma)) { 1830 rcu_read_unlock(); 1831 return ERR_CAST(sma); 1832 } 1833 1834 nsems = sma->sem_nsems; 1835 if (!ipc_rcu_getref(&sma->sem_perm)) { 1836 rcu_read_unlock(); 1837 un = ERR_PTR(-EIDRM); 1838 goto out; 1839 } 1840 rcu_read_unlock(); 1841 1842 /* step 2: allocate new undo structure */ 1843 new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); 1844 if (!new) { 1845 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1846 return ERR_PTR(-ENOMEM); 1847 } 1848 1849 /* step 3: Acquire the lock on semaphore array */ 1850 rcu_read_lock(); 1851 sem_lock_and_putref(sma); 1852 if (!ipc_valid_object(&sma->sem_perm)) { 1853 sem_unlock(sma, -1); 1854 rcu_read_unlock(); 1855 kfree(new); 1856 un = ERR_PTR(-EIDRM); 1857 goto out; 1858 } 1859 spin_lock(&ulp->lock); 1860 1861 /* 1862 * step 4: check for races: did someone else allocate the undo struct? 1863 */ 1864 un = lookup_undo(ulp, semid); 1865 if (un) { 1866 kfree(new); 1867 goto success; 1868 } 1869 /* step 5: initialize & link new undo structure */ 1870 new->semadj = (short *) &new[1]; 1871 new->ulp = ulp; 1872 new->semid = semid; 1873 assert_spin_locked(&ulp->lock); 1874 list_add_rcu(&new->list_proc, &ulp->list_proc); 1875 ipc_assert_locked_object(&sma->sem_perm); 1876 list_add(&new->list_id, &sma->list_id); 1877 un = new; 1878 1879 success: 1880 spin_unlock(&ulp->lock); 1881 sem_unlock(sma, -1); 1882 out: 1883 return un; 1884 } 1885 1886 static long do_semtimedop(int semid, struct sembuf __user *tsops, 1887 unsigned nsops, const struct timespec64 *timeout) 1888 { 1889 int error = -EINVAL; 1890 struct sem_array *sma; 1891 struct sembuf fast_sops[SEMOPM_FAST]; 1892 struct sembuf *sops = fast_sops, *sop; 1893 struct sem_undo *un; 1894 int max, locknum; 1895 bool undos = false, alter = false, dupsop = false; 1896 struct sem_queue queue; 1897 unsigned long dup = 0, jiffies_left = 0; 1898 struct ipc_namespace *ns; 1899 1900 ns = current->nsproxy->ipc_ns; 1901 1902 if (nsops < 1 || semid < 0) 1903 return -EINVAL; 1904 if (nsops > ns->sc_semopm) 1905 return -E2BIG; 1906 if (nsops > SEMOPM_FAST) { 1907 sops = kvmalloc(sizeof(*sops)*nsops, GFP_KERNEL); 1908 if (sops == NULL) 1909 return -ENOMEM; 1910 } 1911 1912 if (copy_from_user(sops, tsops, nsops * sizeof(*tsops))) { 1913 error = -EFAULT; 1914 goto out_free; 1915 } 1916 1917 if (timeout) { 1918 if (timeout->tv_sec < 0 || timeout->tv_nsec < 0 || 1919 timeout->tv_nsec >= 1000000000L) { 1920 error = -EINVAL; 1921 goto out_free; 1922 } 1923 jiffies_left = timespec64_to_jiffies(timeout); 1924 } 1925 1926 max = 0; 1927 for (sop = sops; sop < sops + nsops; sop++) { 1928 unsigned long mask = 1ULL << ((sop->sem_num) % BITS_PER_LONG); 1929 1930 if (sop->sem_num >= max) 1931 max = sop->sem_num; 1932 if (sop->sem_flg & SEM_UNDO) 1933 undos = true; 1934 if (dup & mask) { 1935 /* 1936 * There was a previous alter access that appears 1937 * to have accessed the same semaphore, thus use 1938 * the dupsop logic. "appears", because the detection 1939 * can only check % BITS_PER_LONG. 1940 */ 1941 dupsop = true; 1942 } 1943 if (sop->sem_op != 0) { 1944 alter = true; 1945 dup |= mask; 1946 } 1947 } 1948 1949 if (undos) { 1950 /* On success, find_alloc_undo takes the rcu_read_lock */ 1951 un = find_alloc_undo(ns, semid); 1952 if (IS_ERR(un)) { 1953 error = PTR_ERR(un); 1954 goto out_free; 1955 } 1956 } else { 1957 un = NULL; 1958 rcu_read_lock(); 1959 } 1960 1961 sma = sem_obtain_object_check(ns, semid); 1962 if (IS_ERR(sma)) { 1963 rcu_read_unlock(); 1964 error = PTR_ERR(sma); 1965 goto out_free; 1966 } 1967 1968 error = -EFBIG; 1969 if (max >= sma->sem_nsems) { 1970 rcu_read_unlock(); 1971 goto out_free; 1972 } 1973 1974 error = -EACCES; 1975 if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) { 1976 rcu_read_unlock(); 1977 goto out_free; 1978 } 1979 1980 error = security_sem_semop(sma, sops, nsops, alter); 1981 if (error) { 1982 rcu_read_unlock(); 1983 goto out_free; 1984 } 1985 1986 error = -EIDRM; 1987 locknum = sem_lock(sma, sops, nsops); 1988 /* 1989 * We eventually might perform the following check in a lockless 1990 * fashion, considering ipc_valid_object() locking constraints. 1991 * If nsops == 1 and there is no contention for sem_perm.lock, then 1992 * only a per-semaphore lock is held and it's OK to proceed with the 1993 * check below. More details on the fine grained locking scheme 1994 * entangled here and why it's RMID race safe on comments at sem_lock() 1995 */ 1996 if (!ipc_valid_object(&sma->sem_perm)) 1997 goto out_unlock_free; 1998 /* 1999 * semid identifiers are not unique - find_alloc_undo may have 2000 * allocated an undo structure, it was invalidated by an RMID 2001 * and now a new array with received the same id. Check and fail. 2002 * This case can be detected checking un->semid. The existence of 2003 * "un" itself is guaranteed by rcu. 2004 */ 2005 if (un && un->semid == -1) 2006 goto out_unlock_free; 2007 2008 queue.sops = sops; 2009 queue.nsops = nsops; 2010 queue.undo = un; 2011 queue.pid = task_tgid_vnr(current); 2012 queue.alter = alter; 2013 queue.dupsop = dupsop; 2014 2015 error = perform_atomic_semop(sma, &queue); 2016 if (error == 0) { /* non-blocking succesfull path */ 2017 DEFINE_WAKE_Q(wake_q); 2018 2019 /* 2020 * If the operation was successful, then do 2021 * the required updates. 2022 */ 2023 if (alter) 2024 do_smart_update(sma, sops, nsops, 1, &wake_q); 2025 else 2026 set_semotime(sma, sops); 2027 2028 sem_unlock(sma, locknum); 2029 rcu_read_unlock(); 2030 wake_up_q(&wake_q); 2031 2032 goto out_free; 2033 } 2034 if (error < 0) /* non-blocking error path */ 2035 goto out_unlock_free; 2036 2037 /* 2038 * We need to sleep on this operation, so we put the current 2039 * task into the pending queue and go to sleep. 2040 */ 2041 if (nsops == 1) { 2042 struct sem *curr; 2043 curr = &sma->sems[sops->sem_num]; 2044 2045 if (alter) { 2046 if (sma->complex_count) { 2047 list_add_tail(&queue.list, 2048 &sma->pending_alter); 2049 } else { 2050 2051 list_add_tail(&queue.list, 2052 &curr->pending_alter); 2053 } 2054 } else { 2055 list_add_tail(&queue.list, &curr->pending_const); 2056 } 2057 } else { 2058 if (!sma->complex_count) 2059 merge_queues(sma); 2060 2061 if (alter) 2062 list_add_tail(&queue.list, &sma->pending_alter); 2063 else 2064 list_add_tail(&queue.list, &sma->pending_const); 2065 2066 sma->complex_count++; 2067 } 2068 2069 do { 2070 queue.status = -EINTR; 2071 queue.sleeper = current; 2072 2073 __set_current_state(TASK_INTERRUPTIBLE); 2074 sem_unlock(sma, locknum); 2075 rcu_read_unlock(); 2076 2077 if (timeout) 2078 jiffies_left = schedule_timeout(jiffies_left); 2079 else 2080 schedule(); 2081 2082 /* 2083 * fastpath: the semop has completed, either successfully or 2084 * not, from the syscall pov, is quite irrelevant to us at this 2085 * point; we're done. 2086 * 2087 * We _do_ care, nonetheless, about being awoken by a signal or 2088 * spuriously. The queue.status is checked again in the 2089 * slowpath (aka after taking sem_lock), such that we can detect 2090 * scenarios where we were awakened externally, during the 2091 * window between wake_q_add() and wake_up_q(). 2092 */ 2093 error = READ_ONCE(queue.status); 2094 if (error != -EINTR) { 2095 /* 2096 * User space could assume that semop() is a memory 2097 * barrier: Without the mb(), the cpu could 2098 * speculatively read in userspace stale data that was 2099 * overwritten by the previous owner of the semaphore. 2100 */ 2101 smp_mb(); 2102 goto out_free; 2103 } 2104 2105 rcu_read_lock(); 2106 locknum = sem_lock(sma, sops, nsops); 2107 2108 if (!ipc_valid_object(&sma->sem_perm)) 2109 goto out_unlock_free; 2110 2111 error = READ_ONCE(queue.status); 2112 2113 /* 2114 * If queue.status != -EINTR we are woken up by another process. 2115 * Leave without unlink_queue(), but with sem_unlock(). 2116 */ 2117 if (error != -EINTR) 2118 goto out_unlock_free; 2119 2120 /* 2121 * If an interrupt occurred we have to clean up the queue. 2122 */ 2123 if (timeout && jiffies_left == 0) 2124 error = -EAGAIN; 2125 } while (error == -EINTR && !signal_pending(current)); /* spurious */ 2126 2127 unlink_queue(sma, &queue); 2128 2129 out_unlock_free: 2130 sem_unlock(sma, locknum); 2131 rcu_read_unlock(); 2132 out_free: 2133 if (sops != fast_sops) 2134 kvfree(sops); 2135 return error; 2136 } 2137 2138 long ksys_semtimedop(int semid, struct sembuf __user *tsops, 2139 unsigned int nsops, const struct timespec __user *timeout) 2140 { 2141 if (timeout) { 2142 struct timespec64 ts; 2143 if (get_timespec64(&ts, timeout)) 2144 return -EFAULT; 2145 return do_semtimedop(semid, tsops, nsops, &ts); 2146 } 2147 return do_semtimedop(semid, tsops, nsops, NULL); 2148 } 2149 2150 SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, 2151 unsigned int, nsops, const struct timespec __user *, timeout) 2152 { 2153 return ksys_semtimedop(semid, tsops, nsops, timeout); 2154 } 2155 2156 #ifdef CONFIG_COMPAT 2157 long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems, 2158 unsigned int nsops, 2159 const struct compat_timespec __user *timeout) 2160 { 2161 if (timeout) { 2162 struct timespec64 ts; 2163 if (compat_get_timespec64(&ts, timeout)) 2164 return -EFAULT; 2165 return do_semtimedop(semid, tsems, nsops, &ts); 2166 } 2167 return do_semtimedop(semid, tsems, nsops, NULL); 2168 } 2169 2170 COMPAT_SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsems, 2171 unsigned int, nsops, 2172 const struct compat_timespec __user *, timeout) 2173 { 2174 return compat_ksys_semtimedop(semid, tsems, nsops, timeout); 2175 } 2176 #endif 2177 2178 SYSCALL_DEFINE3(semop, int, semid, struct sembuf __user *, tsops, 2179 unsigned, nsops) 2180 { 2181 return do_semtimedop(semid, tsops, nsops, NULL); 2182 } 2183 2184 /* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between 2185 * parent and child tasks. 2186 */ 2187 2188 int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) 2189 { 2190 struct sem_undo_list *undo_list; 2191 int error; 2192 2193 if (clone_flags & CLONE_SYSVSEM) { 2194 error = get_undo_list(&undo_list); 2195 if (error) 2196 return error; 2197 refcount_inc(&undo_list->refcnt); 2198 tsk->sysvsem.undo_list = undo_list; 2199 } else 2200 tsk->sysvsem.undo_list = NULL; 2201 2202 return 0; 2203 } 2204 2205 /* 2206 * add semadj values to semaphores, free undo structures. 2207 * undo structures are not freed when semaphore arrays are destroyed 2208 * so some of them may be out of date. 2209 * IMPLEMENTATION NOTE: There is some confusion over whether the 2210 * set of adjustments that needs to be done should be done in an atomic 2211 * manner or not. That is, if we are attempting to decrement the semval 2212 * should we queue up and wait until we can do so legally? 2213 * The original implementation attempted to do this (queue and wait). 2214 * The current implementation does not do so. The POSIX standard 2215 * and SVID should be consulted to determine what behavior is mandated. 2216 */ 2217 void exit_sem(struct task_struct *tsk) 2218 { 2219 struct sem_undo_list *ulp; 2220 2221 ulp = tsk->sysvsem.undo_list; 2222 if (!ulp) 2223 return; 2224 tsk->sysvsem.undo_list = NULL; 2225 2226 if (!refcount_dec_and_test(&ulp->refcnt)) 2227 return; 2228 2229 for (;;) { 2230 struct sem_array *sma; 2231 struct sem_undo *un; 2232 int semid, i; 2233 DEFINE_WAKE_Q(wake_q); 2234 2235 cond_resched(); 2236 2237 rcu_read_lock(); 2238 un = list_entry_rcu(ulp->list_proc.next, 2239 struct sem_undo, list_proc); 2240 if (&un->list_proc == &ulp->list_proc) { 2241 /* 2242 * We must wait for freeary() before freeing this ulp, 2243 * in case we raced with last sem_undo. There is a small 2244 * possibility where we exit while freeary() didn't 2245 * finish unlocking sem_undo_list. 2246 */ 2247 spin_lock(&ulp->lock); 2248 spin_unlock(&ulp->lock); 2249 rcu_read_unlock(); 2250 break; 2251 } 2252 spin_lock(&ulp->lock); 2253 semid = un->semid; 2254 spin_unlock(&ulp->lock); 2255 2256 /* exit_sem raced with IPC_RMID, nothing to do */ 2257 if (semid == -1) { 2258 rcu_read_unlock(); 2259 continue; 2260 } 2261 2262 sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, semid); 2263 /* exit_sem raced with IPC_RMID, nothing to do */ 2264 if (IS_ERR(sma)) { 2265 rcu_read_unlock(); 2266 continue; 2267 } 2268 2269 sem_lock(sma, NULL, -1); 2270 /* exit_sem raced with IPC_RMID, nothing to do */ 2271 if (!ipc_valid_object(&sma->sem_perm)) { 2272 sem_unlock(sma, -1); 2273 rcu_read_unlock(); 2274 continue; 2275 } 2276 un = __lookup_undo(ulp, semid); 2277 if (un == NULL) { 2278 /* exit_sem raced with IPC_RMID+semget() that created 2279 * exactly the same semid. Nothing to do. 2280 */ 2281 sem_unlock(sma, -1); 2282 rcu_read_unlock(); 2283 continue; 2284 } 2285 2286 /* remove un from the linked lists */ 2287 ipc_assert_locked_object(&sma->sem_perm); 2288 list_del(&un->list_id); 2289 2290 /* we are the last process using this ulp, acquiring ulp->lock 2291 * isn't required. Besides that, we are also protected against 2292 * IPC_RMID as we hold sma->sem_perm lock now 2293 */ 2294 list_del_rcu(&un->list_proc); 2295 2296 /* perform adjustments registered in un */ 2297 for (i = 0; i < sma->sem_nsems; i++) { 2298 struct sem *semaphore = &sma->sems[i]; 2299 if (un->semadj[i]) { 2300 semaphore->semval += un->semadj[i]; 2301 /* 2302 * Range checks of the new semaphore value, 2303 * not defined by sus: 2304 * - Some unices ignore the undo entirely 2305 * (e.g. HP UX 11i 11.22, Tru64 V5.1) 2306 * - some cap the value (e.g. FreeBSD caps 2307 * at 0, but doesn't enforce SEMVMX) 2308 * 2309 * Linux caps the semaphore value, both at 0 2310 * and at SEMVMX. 2311 * 2312 * Manfred <manfred@colorfullife.com> 2313 */ 2314 if (semaphore->semval < 0) 2315 semaphore->semval = 0; 2316 if (semaphore->semval > SEMVMX) 2317 semaphore->semval = SEMVMX; 2318 semaphore->sempid = task_tgid_vnr(current); 2319 } 2320 } 2321 /* maybe some queued-up processes were waiting for this */ 2322 do_smart_update(sma, NULL, 0, 1, &wake_q); 2323 sem_unlock(sma, -1); 2324 rcu_read_unlock(); 2325 wake_up_q(&wake_q); 2326 2327 kfree_rcu(un, rcu); 2328 } 2329 kfree(ulp); 2330 } 2331 2332 #ifdef CONFIG_PROC_FS 2333 static int sysvipc_sem_proc_show(struct seq_file *s, void *it) 2334 { 2335 struct user_namespace *user_ns = seq_user_ns(s); 2336 struct kern_ipc_perm *ipcp = it; 2337 struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); 2338 time64_t sem_otime; 2339 2340 /* 2341 * The proc interface isn't aware of sem_lock(), it calls 2342 * ipc_lock_object() directly (in sysvipc_find_ipc). 2343 * In order to stay compatible with sem_lock(), we must 2344 * enter / leave complex_mode. 2345 */ 2346 complexmode_enter(sma); 2347 2348 sem_otime = get_semotime(sma); 2349 2350 seq_printf(s, 2351 "%10d %10d %4o %10u %5u %5u %5u %5u %10llu %10llu\n", 2352 sma->sem_perm.key, 2353 sma->sem_perm.id, 2354 sma->sem_perm.mode, 2355 sma->sem_nsems, 2356 from_kuid_munged(user_ns, sma->sem_perm.uid), 2357 from_kgid_munged(user_ns, sma->sem_perm.gid), 2358 from_kuid_munged(user_ns, sma->sem_perm.cuid), 2359 from_kgid_munged(user_ns, sma->sem_perm.cgid), 2360 sem_otime, 2361 sma->sem_ctime); 2362 2363 complexmode_tryleave(sma); 2364 2365 return 0; 2366 } 2367 #endif 2368