1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/ipc/sem.c 4 * Copyright (C) 1992 Krishna Balasubramanian 5 * Copyright (C) 1995 Eric Schenk, Bruno Haible 6 * 7 * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com> 8 * 9 * SMP-threaded, sysctl's added 10 * (c) 1999 Manfred Spraul <manfred@colorfullife.com> 11 * Enforced range limit on SEM_UNDO 12 * (c) 2001 Red Hat Inc 13 * Lockless wakeup 14 * (c) 2003 Manfred Spraul <manfred@colorfullife.com> 15 * (c) 2016 Davidlohr Bueso <dave@stgolabs.net> 16 * Further wakeup optimizations, documentation 17 * (c) 2010 Manfred Spraul <manfred@colorfullife.com> 18 * 19 * support for audit of ipc object properties and permission changes 20 * Dustin Kirkland <dustin.kirkland@us.ibm.com> 21 * 22 * namespaces support 23 * OpenVZ, SWsoft Inc. 24 * Pavel Emelianov <xemul@openvz.org> 25 * 26 * Implementation notes: (May 2010) 27 * This file implements System V semaphores. 28 * 29 * User space visible behavior: 30 * - FIFO ordering for semop() operations (just FIFO, not starvation 31 * protection) 32 * - multiple semaphore operations that alter the same semaphore in 33 * one semop() are handled. 34 * - sem_ctime (time of last semctl()) is updated in the IPC_SET, SETVAL and 35 * SETALL calls. 36 * - two Linux specific semctl() commands: SEM_STAT, SEM_INFO. 37 * - undo adjustments at process exit are limited to 0..SEMVMX. 38 * - namespace are supported. 39 * - SEMMSL, SEMMNS, SEMOPM and SEMMNI can be configured at runtime by writing 40 * to /proc/sys/kernel/sem. 41 * - statistics about the usage are reported in /proc/sysvipc/sem. 42 * 43 * Internals: 44 * - scalability: 45 * - all global variables are read-mostly. 46 * - semop() calls and semctl(RMID) are synchronized by RCU. 47 * - most operations do write operations (actually: spin_lock calls) to 48 * the per-semaphore array structure. 49 * Thus: Perfect SMP scaling between independent semaphore arrays. 50 * If multiple semaphores in one array are used, then cache line 51 * trashing on the semaphore array spinlock will limit the scaling. 52 * - semncnt and semzcnt are calculated on demand in count_semcnt() 53 * - the task that performs a successful semop() scans the list of all 54 * sleeping tasks and completes any pending operations that can be fulfilled. 55 * Semaphores are actively given to waiting tasks (necessary for FIFO). 56 * (see update_queue()) 57 * - To improve the scalability, the actual wake-up calls are performed after 58 * dropping all locks. (see wake_up_sem_queue_prepare()) 59 * - All work is done by the waker, the woken up task does not have to do 60 * anything - not even acquiring a lock or dropping a refcount. 61 * - A woken up task may not even touch the semaphore array anymore, it may 62 * have been destroyed already by a semctl(RMID). 63 * - UNDO values are stored in an array (one per process and per 64 * semaphore array, lazily allocated). For backwards compatibility, multiple 65 * modes for the UNDO variables are supported (per process, per thread) 66 * (see copy_semundo, CLONE_SYSVSEM) 67 * - There are two lists of the pending operations: a per-array list 68 * and per-semaphore list (stored in the array). This allows to achieve FIFO 69 * ordering without always scanning all pending operations. 70 * The worst-case behavior is nevertheless O(N^2) for N wakeups. 71 */ 72 73 #include <linux/compat.h> 74 #include <linux/slab.h> 75 #include <linux/spinlock.h> 76 #include <linux/init.h> 77 #include <linux/proc_fs.h> 78 #include <linux/time.h> 79 #include <linux/security.h> 80 #include <linux/syscalls.h> 81 #include <linux/audit.h> 82 #include <linux/capability.h> 83 #include <linux/seq_file.h> 84 #include <linux/rwsem.h> 85 #include <linux/nsproxy.h> 86 #include <linux/ipc_namespace.h> 87 #include <linux/sched/wake_q.h> 88 #include <linux/nospec.h> 89 #include <linux/rhashtable.h> 90 91 #include <linux/uaccess.h> 92 #include "util.h" 93 94 /* One semaphore structure for each semaphore in the system. */ 95 struct sem { 96 int semval; /* current value */ 97 /* 98 * PID of the process that last modified the semaphore. For 99 * Linux, specifically these are: 100 * - semop 101 * - semctl, via SETVAL and SETALL. 102 * - at task exit when performing undo adjustments (see exit_sem). 103 */ 104 struct pid *sempid; 105 spinlock_t lock; /* spinlock for fine-grained semtimedop */ 106 struct list_head pending_alter; /* pending single-sop operations */ 107 /* that alter the semaphore */ 108 struct list_head pending_const; /* pending single-sop operations */ 109 /* that do not alter the semaphore*/ 110 time64_t sem_otime; /* candidate for sem_otime */ 111 } ____cacheline_aligned_in_smp; 112 113 /* One sem_array data structure for each set of semaphores in the system. */ 114 struct sem_array { 115 struct kern_ipc_perm sem_perm; /* permissions .. see ipc.h */ 116 time64_t sem_ctime; /* create/last semctl() time */ 117 struct list_head pending_alter; /* pending operations */ 118 /* that alter the array */ 119 struct list_head pending_const; /* pending complex operations */ 120 /* that do not alter semvals */ 121 struct list_head list_id; /* undo requests on this array */ 122 int sem_nsems; /* no. of semaphores in array */ 123 int complex_count; /* pending complex operations */ 124 unsigned int use_global_lock;/* >0: global lock required */ 125 126 struct sem sems[]; 127 } __randomize_layout; 128 129 /* One queue for each sleeping process in the system. */ 130 struct sem_queue { 131 struct list_head list; /* queue of pending operations */ 132 struct task_struct *sleeper; /* this process */ 133 struct sem_undo *undo; /* undo structure */ 134 struct pid *pid; /* process id of requesting process */ 135 int status; /* completion status of operation */ 136 struct sembuf *sops; /* array of pending operations */ 137 struct sembuf *blocking; /* the operation that blocked */ 138 int nsops; /* number of operations */ 139 bool alter; /* does *sops alter the array? */ 140 bool dupsop; /* sops on more than one sem_num */ 141 }; 142 143 /* Each task has a list of undo requests. They are executed automatically 144 * when the process exits. 145 */ 146 struct sem_undo { 147 struct list_head list_proc; /* per-process list: * 148 * all undos from one process 149 * rcu protected */ 150 struct rcu_head rcu; /* rcu struct for sem_undo */ 151 struct sem_undo_list *ulp; /* back ptr to sem_undo_list */ 152 struct list_head list_id; /* per semaphore array list: 153 * all undos for one array */ 154 int semid; /* semaphore set identifier */ 155 short *semadj; /* array of adjustments */ 156 /* one per semaphore */ 157 }; 158 159 /* sem_undo_list controls shared access to the list of sem_undo structures 160 * that may be shared among all a CLONE_SYSVSEM task group. 161 */ 162 struct sem_undo_list { 163 refcount_t refcnt; 164 spinlock_t lock; 165 struct list_head list_proc; 166 }; 167 168 169 #define sem_ids(ns) ((ns)->ids[IPC_SEM_IDS]) 170 171 static int newary(struct ipc_namespace *, struct ipc_params *); 172 static void freeary(struct ipc_namespace *, struct kern_ipc_perm *); 173 #ifdef CONFIG_PROC_FS 174 static int sysvipc_sem_proc_show(struct seq_file *s, void *it); 175 #endif 176 177 #define SEMMSL_FAST 256 /* 512 bytes on stack */ 178 #define SEMOPM_FAST 64 /* ~ 372 bytes on stack */ 179 180 /* 181 * Switching from the mode suitable for simple ops 182 * to the mode for complex ops is costly. Therefore: 183 * use some hysteresis 184 */ 185 #define USE_GLOBAL_LOCK_HYSTERESIS 10 186 187 /* 188 * Locking: 189 * a) global sem_lock() for read/write 190 * sem_undo.id_next, 191 * sem_array.complex_count, 192 * sem_array.pending{_alter,_const}, 193 * sem_array.sem_undo 194 * 195 * b) global or semaphore sem_lock() for read/write: 196 * sem_array.sems[i].pending_{const,alter}: 197 * 198 * c) special: 199 * sem_undo_list.list_proc: 200 * * undo_list->lock for write 201 * * rcu for read 202 * use_global_lock: 203 * * global sem_lock() for write 204 * * either local or global sem_lock() for read. 205 * 206 * Memory ordering: 207 * Most ordering is enforced by using spin_lock() and spin_unlock(). 208 * 209 * Exceptions: 210 * 1) use_global_lock: (SEM_BARRIER_1) 211 * Setting it from non-zero to 0 is a RELEASE, this is ensured by 212 * using smp_store_release(): Immediately after setting it to 0, 213 * a simple op can start. 214 * Testing if it is non-zero is an ACQUIRE, this is ensured by using 215 * smp_load_acquire(). 216 * Setting it from 0 to non-zero must be ordered with regards to 217 * this smp_load_acquire(), this is guaranteed because the smp_load_acquire() 218 * is inside a spin_lock() and after a write from 0 to non-zero a 219 * spin_lock()+spin_unlock() is done. 220 * 221 * 2) queue.status: (SEM_BARRIER_2) 222 * Initialization is done while holding sem_lock(), so no further barrier is 223 * required. 224 * Setting it to a result code is a RELEASE, this is ensured by both a 225 * smp_store_release() (for case a) and while holding sem_lock() 226 * (for case b). 227 * The ACQUIRE when reading the result code without holding sem_lock() is 228 * achieved by using READ_ONCE() + smp_acquire__after_ctrl_dep(). 229 * (case a above). 230 * Reading the result code while holding sem_lock() needs no further barriers, 231 * the locks inside sem_lock() enforce ordering (case b above) 232 * 233 * 3) current->state: 234 * current->state is set to TASK_INTERRUPTIBLE while holding sem_lock(). 235 * The wakeup is handled using the wake_q infrastructure. wake_q wakeups may 236 * happen immediately after calling wake_q_add. As wake_q_add_safe() is called 237 * when holding sem_lock(), no further barriers are required. 238 * 239 * See also ipc/mqueue.c for more details on the covered races. 240 */ 241 242 #define sc_semmsl sem_ctls[0] 243 #define sc_semmns sem_ctls[1] 244 #define sc_semopm sem_ctls[2] 245 #define sc_semmni sem_ctls[3] 246 247 void sem_init_ns(struct ipc_namespace *ns) 248 { 249 ns->sc_semmsl = SEMMSL; 250 ns->sc_semmns = SEMMNS; 251 ns->sc_semopm = SEMOPM; 252 ns->sc_semmni = SEMMNI; 253 ns->used_sems = 0; 254 ipc_init_ids(&ns->ids[IPC_SEM_IDS]); 255 } 256 257 #ifdef CONFIG_IPC_NS 258 void sem_exit_ns(struct ipc_namespace *ns) 259 { 260 free_ipcs(ns, &sem_ids(ns), freeary); 261 idr_destroy(&ns->ids[IPC_SEM_IDS].ipcs_idr); 262 rhashtable_destroy(&ns->ids[IPC_SEM_IDS].key_ht); 263 } 264 #endif 265 266 void __init sem_init(void) 267 { 268 sem_init_ns(&init_ipc_ns); 269 ipc_init_proc_interface("sysvipc/sem", 270 " key semid perms nsems uid gid cuid cgid otime ctime\n", 271 IPC_SEM_IDS, sysvipc_sem_proc_show); 272 } 273 274 /** 275 * unmerge_queues - unmerge queues, if possible. 276 * @sma: semaphore array 277 * 278 * The function unmerges the wait queues if complex_count is 0. 279 * It must be called prior to dropping the global semaphore array lock. 280 */ 281 static void unmerge_queues(struct sem_array *sma) 282 { 283 struct sem_queue *q, *tq; 284 285 /* complex operations still around? */ 286 if (sma->complex_count) 287 return; 288 /* 289 * We will switch back to simple mode. 290 * Move all pending operation back into the per-semaphore 291 * queues. 292 */ 293 list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { 294 struct sem *curr; 295 curr = &sma->sems[q->sops[0].sem_num]; 296 297 list_add_tail(&q->list, &curr->pending_alter); 298 } 299 INIT_LIST_HEAD(&sma->pending_alter); 300 } 301 302 /** 303 * merge_queues - merge single semop queues into global queue 304 * @sma: semaphore array 305 * 306 * This function merges all per-semaphore queues into the global queue. 307 * It is necessary to achieve FIFO ordering for the pending single-sop 308 * operations when a multi-semop operation must sleep. 309 * Only the alter operations must be moved, the const operations can stay. 310 */ 311 static void merge_queues(struct sem_array *sma) 312 { 313 int i; 314 for (i = 0; i < sma->sem_nsems; i++) { 315 struct sem *sem = &sma->sems[i]; 316 317 list_splice_init(&sem->pending_alter, &sma->pending_alter); 318 } 319 } 320 321 static void sem_rcu_free(struct rcu_head *head) 322 { 323 struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu); 324 struct sem_array *sma = container_of(p, struct sem_array, sem_perm); 325 326 security_sem_free(&sma->sem_perm); 327 kvfree(sma); 328 } 329 330 /* 331 * Enter the mode suitable for non-simple operations: 332 * Caller must own sem_perm.lock. 333 */ 334 static void complexmode_enter(struct sem_array *sma) 335 { 336 int i; 337 struct sem *sem; 338 339 if (sma->use_global_lock > 0) { 340 /* 341 * We are already in global lock mode. 342 * Nothing to do, just reset the 343 * counter until we return to simple mode. 344 */ 345 sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; 346 return; 347 } 348 sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; 349 350 for (i = 0; i < sma->sem_nsems; i++) { 351 sem = &sma->sems[i]; 352 spin_lock(&sem->lock); 353 spin_unlock(&sem->lock); 354 } 355 } 356 357 /* 358 * Try to leave the mode that disallows simple operations: 359 * Caller must own sem_perm.lock. 360 */ 361 static void complexmode_tryleave(struct sem_array *sma) 362 { 363 if (sma->complex_count) { 364 /* Complex ops are sleeping. 365 * We must stay in complex mode 366 */ 367 return; 368 } 369 if (sma->use_global_lock == 1) { 370 371 /* See SEM_BARRIER_1 for purpose/pairing */ 372 smp_store_release(&sma->use_global_lock, 0); 373 } else { 374 sma->use_global_lock--; 375 } 376 } 377 378 #define SEM_GLOBAL_LOCK (-1) 379 /* 380 * If the request contains only one semaphore operation, and there are 381 * no complex transactions pending, lock only the semaphore involved. 382 * Otherwise, lock the entire semaphore array, since we either have 383 * multiple semaphores in our own semops, or we need to look at 384 * semaphores from other pending complex operations. 385 */ 386 static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, 387 int nsops) 388 { 389 struct sem *sem; 390 int idx; 391 392 if (nsops != 1) { 393 /* Complex operation - acquire a full lock */ 394 ipc_lock_object(&sma->sem_perm); 395 396 /* Prevent parallel simple ops */ 397 complexmode_enter(sma); 398 return SEM_GLOBAL_LOCK; 399 } 400 401 /* 402 * Only one semaphore affected - try to optimize locking. 403 * Optimized locking is possible if no complex operation 404 * is either enqueued or processed right now. 405 * 406 * Both facts are tracked by use_global_mode. 407 */ 408 idx = array_index_nospec(sops->sem_num, sma->sem_nsems); 409 sem = &sma->sems[idx]; 410 411 /* 412 * Initial check for use_global_lock. Just an optimization, 413 * no locking, no memory barrier. 414 */ 415 if (!sma->use_global_lock) { 416 /* 417 * It appears that no complex operation is around. 418 * Acquire the per-semaphore lock. 419 */ 420 spin_lock(&sem->lock); 421 422 /* see SEM_BARRIER_1 for purpose/pairing */ 423 if (!smp_load_acquire(&sma->use_global_lock)) { 424 /* fast path successful! */ 425 return sops->sem_num; 426 } 427 spin_unlock(&sem->lock); 428 } 429 430 /* slow path: acquire the full lock */ 431 ipc_lock_object(&sma->sem_perm); 432 433 if (sma->use_global_lock == 0) { 434 /* 435 * The use_global_lock mode ended while we waited for 436 * sma->sem_perm.lock. Thus we must switch to locking 437 * with sem->lock. 438 * Unlike in the fast path, there is no need to recheck 439 * sma->use_global_lock after we have acquired sem->lock: 440 * We own sma->sem_perm.lock, thus use_global_lock cannot 441 * change. 442 */ 443 spin_lock(&sem->lock); 444 445 ipc_unlock_object(&sma->sem_perm); 446 return sops->sem_num; 447 } else { 448 /* 449 * Not a false alarm, thus continue to use the global lock 450 * mode. No need for complexmode_enter(), this was done by 451 * the caller that has set use_global_mode to non-zero. 452 */ 453 return SEM_GLOBAL_LOCK; 454 } 455 } 456 457 static inline void sem_unlock(struct sem_array *sma, int locknum) 458 { 459 if (locknum == SEM_GLOBAL_LOCK) { 460 unmerge_queues(sma); 461 complexmode_tryleave(sma); 462 ipc_unlock_object(&sma->sem_perm); 463 } else { 464 struct sem *sem = &sma->sems[locknum]; 465 spin_unlock(&sem->lock); 466 } 467 } 468 469 /* 470 * sem_lock_(check_) routines are called in the paths where the rwsem 471 * is not held. 472 * 473 * The caller holds the RCU read lock. 474 */ 475 static inline struct sem_array *sem_obtain_object(struct ipc_namespace *ns, int id) 476 { 477 struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&sem_ids(ns), id); 478 479 if (IS_ERR(ipcp)) 480 return ERR_CAST(ipcp); 481 482 return container_of(ipcp, struct sem_array, sem_perm); 483 } 484 485 static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns, 486 int id) 487 { 488 struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&sem_ids(ns), id); 489 490 if (IS_ERR(ipcp)) 491 return ERR_CAST(ipcp); 492 493 return container_of(ipcp, struct sem_array, sem_perm); 494 } 495 496 static inline void sem_lock_and_putref(struct sem_array *sma) 497 { 498 sem_lock(sma, NULL, -1); 499 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 500 } 501 502 static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) 503 { 504 ipc_rmid(&sem_ids(ns), &s->sem_perm); 505 } 506 507 static struct sem_array *sem_alloc(size_t nsems) 508 { 509 struct sem_array *sma; 510 511 if (nsems > (INT_MAX - sizeof(*sma)) / sizeof(sma->sems[0])) 512 return NULL; 513 514 sma = kvzalloc(struct_size(sma, sems, nsems), GFP_KERNEL); 515 if (unlikely(!sma)) 516 return NULL; 517 518 return sma; 519 } 520 521 /** 522 * newary - Create a new semaphore set 523 * @ns: namespace 524 * @params: ptr to the structure that contains key, semflg and nsems 525 * 526 * Called with sem_ids.rwsem held (as a writer) 527 */ 528 static int newary(struct ipc_namespace *ns, struct ipc_params *params) 529 { 530 int retval; 531 struct sem_array *sma; 532 key_t key = params->key; 533 int nsems = params->u.nsems; 534 int semflg = params->flg; 535 int i; 536 537 if (!nsems) 538 return -EINVAL; 539 if (ns->used_sems + nsems > ns->sc_semmns) 540 return -ENOSPC; 541 542 sma = sem_alloc(nsems); 543 if (!sma) 544 return -ENOMEM; 545 546 sma->sem_perm.mode = (semflg & S_IRWXUGO); 547 sma->sem_perm.key = key; 548 549 sma->sem_perm.security = NULL; 550 retval = security_sem_alloc(&sma->sem_perm); 551 if (retval) { 552 kvfree(sma); 553 return retval; 554 } 555 556 for (i = 0; i < nsems; i++) { 557 INIT_LIST_HEAD(&sma->sems[i].pending_alter); 558 INIT_LIST_HEAD(&sma->sems[i].pending_const); 559 spin_lock_init(&sma->sems[i].lock); 560 } 561 562 sma->complex_count = 0; 563 sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; 564 INIT_LIST_HEAD(&sma->pending_alter); 565 INIT_LIST_HEAD(&sma->pending_const); 566 INIT_LIST_HEAD(&sma->list_id); 567 sma->sem_nsems = nsems; 568 sma->sem_ctime = ktime_get_real_seconds(); 569 570 /* ipc_addid() locks sma upon success. */ 571 retval = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); 572 if (retval < 0) { 573 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 574 return retval; 575 } 576 ns->used_sems += nsems; 577 578 sem_unlock(sma, -1); 579 rcu_read_unlock(); 580 581 return sma->sem_perm.id; 582 } 583 584 585 /* 586 * Called with sem_ids.rwsem and ipcp locked. 587 */ 588 static int sem_more_checks(struct kern_ipc_perm *ipcp, struct ipc_params *params) 589 { 590 struct sem_array *sma; 591 592 sma = container_of(ipcp, struct sem_array, sem_perm); 593 if (params->u.nsems > sma->sem_nsems) 594 return -EINVAL; 595 596 return 0; 597 } 598 599 long ksys_semget(key_t key, int nsems, int semflg) 600 { 601 struct ipc_namespace *ns; 602 static const struct ipc_ops sem_ops = { 603 .getnew = newary, 604 .associate = security_sem_associate, 605 .more_checks = sem_more_checks, 606 }; 607 struct ipc_params sem_params; 608 609 ns = current->nsproxy->ipc_ns; 610 611 if (nsems < 0 || nsems > ns->sc_semmsl) 612 return -EINVAL; 613 614 sem_params.key = key; 615 sem_params.flg = semflg; 616 sem_params.u.nsems = nsems; 617 618 return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params); 619 } 620 621 SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg) 622 { 623 return ksys_semget(key, nsems, semflg); 624 } 625 626 /** 627 * perform_atomic_semop[_slow] - Attempt to perform semaphore 628 * operations on a given array. 629 * @sma: semaphore array 630 * @q: struct sem_queue that describes the operation 631 * 632 * Caller blocking are as follows, based the value 633 * indicated by the semaphore operation (sem_op): 634 * 635 * (1) >0 never blocks. 636 * (2) 0 (wait-for-zero operation): semval is non-zero. 637 * (3) <0 attempting to decrement semval to a value smaller than zero. 638 * 639 * Returns 0 if the operation was possible. 640 * Returns 1 if the operation is impossible, the caller must sleep. 641 * Returns <0 for error codes. 642 */ 643 static int perform_atomic_semop_slow(struct sem_array *sma, struct sem_queue *q) 644 { 645 int result, sem_op, nsops; 646 struct pid *pid; 647 struct sembuf *sop; 648 struct sem *curr; 649 struct sembuf *sops; 650 struct sem_undo *un; 651 652 sops = q->sops; 653 nsops = q->nsops; 654 un = q->undo; 655 656 for (sop = sops; sop < sops + nsops; sop++) { 657 int idx = array_index_nospec(sop->sem_num, sma->sem_nsems); 658 curr = &sma->sems[idx]; 659 sem_op = sop->sem_op; 660 result = curr->semval; 661 662 if (!sem_op && result) 663 goto would_block; 664 665 result += sem_op; 666 if (result < 0) 667 goto would_block; 668 if (result > SEMVMX) 669 goto out_of_range; 670 671 if (sop->sem_flg & SEM_UNDO) { 672 int undo = un->semadj[sop->sem_num] - sem_op; 673 /* Exceeding the undo range is an error. */ 674 if (undo < (-SEMAEM - 1) || undo > SEMAEM) 675 goto out_of_range; 676 un->semadj[sop->sem_num] = undo; 677 } 678 679 curr->semval = result; 680 } 681 682 sop--; 683 pid = q->pid; 684 while (sop >= sops) { 685 ipc_update_pid(&sma->sems[sop->sem_num].sempid, pid); 686 sop--; 687 } 688 689 return 0; 690 691 out_of_range: 692 result = -ERANGE; 693 goto undo; 694 695 would_block: 696 q->blocking = sop; 697 698 if (sop->sem_flg & IPC_NOWAIT) 699 result = -EAGAIN; 700 else 701 result = 1; 702 703 undo: 704 sop--; 705 while (sop >= sops) { 706 sem_op = sop->sem_op; 707 sma->sems[sop->sem_num].semval -= sem_op; 708 if (sop->sem_flg & SEM_UNDO) 709 un->semadj[sop->sem_num] += sem_op; 710 sop--; 711 } 712 713 return result; 714 } 715 716 static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q) 717 { 718 int result, sem_op, nsops; 719 struct sembuf *sop; 720 struct sem *curr; 721 struct sembuf *sops; 722 struct sem_undo *un; 723 724 sops = q->sops; 725 nsops = q->nsops; 726 un = q->undo; 727 728 if (unlikely(q->dupsop)) 729 return perform_atomic_semop_slow(sma, q); 730 731 /* 732 * We scan the semaphore set twice, first to ensure that the entire 733 * operation can succeed, therefore avoiding any pointless writes 734 * to shared memory and having to undo such changes in order to block 735 * until the operations can go through. 736 */ 737 for (sop = sops; sop < sops + nsops; sop++) { 738 int idx = array_index_nospec(sop->sem_num, sma->sem_nsems); 739 740 curr = &sma->sems[idx]; 741 sem_op = sop->sem_op; 742 result = curr->semval; 743 744 if (!sem_op && result) 745 goto would_block; /* wait-for-zero */ 746 747 result += sem_op; 748 if (result < 0) 749 goto would_block; 750 751 if (result > SEMVMX) 752 return -ERANGE; 753 754 if (sop->sem_flg & SEM_UNDO) { 755 int undo = un->semadj[sop->sem_num] - sem_op; 756 757 /* Exceeding the undo range is an error. */ 758 if (undo < (-SEMAEM - 1) || undo > SEMAEM) 759 return -ERANGE; 760 } 761 } 762 763 for (sop = sops; sop < sops + nsops; sop++) { 764 curr = &sma->sems[sop->sem_num]; 765 sem_op = sop->sem_op; 766 result = curr->semval; 767 768 if (sop->sem_flg & SEM_UNDO) { 769 int undo = un->semadj[sop->sem_num] - sem_op; 770 771 un->semadj[sop->sem_num] = undo; 772 } 773 curr->semval += sem_op; 774 ipc_update_pid(&curr->sempid, q->pid); 775 } 776 777 return 0; 778 779 would_block: 780 q->blocking = sop; 781 return sop->sem_flg & IPC_NOWAIT ? -EAGAIN : 1; 782 } 783 784 static inline void wake_up_sem_queue_prepare(struct sem_queue *q, int error, 785 struct wake_q_head *wake_q) 786 { 787 struct task_struct *sleeper; 788 789 sleeper = get_task_struct(q->sleeper); 790 791 /* see SEM_BARRIER_2 for purpose/pairing */ 792 smp_store_release(&q->status, error); 793 794 wake_q_add_safe(wake_q, sleeper); 795 } 796 797 static void unlink_queue(struct sem_array *sma, struct sem_queue *q) 798 { 799 list_del(&q->list); 800 if (q->nsops > 1) 801 sma->complex_count--; 802 } 803 804 /** check_restart(sma, q) 805 * @sma: semaphore array 806 * @q: the operation that just completed 807 * 808 * update_queue is O(N^2) when it restarts scanning the whole queue of 809 * waiting operations. Therefore this function checks if the restart is 810 * really necessary. It is called after a previously waiting operation 811 * modified the array. 812 * Note that wait-for-zero operations are handled without restart. 813 */ 814 static inline int check_restart(struct sem_array *sma, struct sem_queue *q) 815 { 816 /* pending complex alter operations are too difficult to analyse */ 817 if (!list_empty(&sma->pending_alter)) 818 return 1; 819 820 /* we were a sleeping complex operation. Too difficult */ 821 if (q->nsops > 1) 822 return 1; 823 824 /* It is impossible that someone waits for the new value: 825 * - complex operations always restart. 826 * - wait-for-zero are handled separately. 827 * - q is a previously sleeping simple operation that 828 * altered the array. It must be a decrement, because 829 * simple increments never sleep. 830 * - If there are older (higher priority) decrements 831 * in the queue, then they have observed the original 832 * semval value and couldn't proceed. The operation 833 * decremented to value - thus they won't proceed either. 834 */ 835 return 0; 836 } 837 838 /** 839 * wake_const_ops - wake up non-alter tasks 840 * @sma: semaphore array. 841 * @semnum: semaphore that was modified. 842 * @wake_q: lockless wake-queue head. 843 * 844 * wake_const_ops must be called after a semaphore in a semaphore array 845 * was set to 0. If complex const operations are pending, wake_const_ops must 846 * be called with semnum = -1, as well as with the number of each modified 847 * semaphore. 848 * The tasks that must be woken up are added to @wake_q. The return code 849 * is stored in q->pid. 850 * The function returns 1 if at least one operation was completed successfully. 851 */ 852 static int wake_const_ops(struct sem_array *sma, int semnum, 853 struct wake_q_head *wake_q) 854 { 855 struct sem_queue *q, *tmp; 856 struct list_head *pending_list; 857 int semop_completed = 0; 858 859 if (semnum == -1) 860 pending_list = &sma->pending_const; 861 else 862 pending_list = &sma->sems[semnum].pending_const; 863 864 list_for_each_entry_safe(q, tmp, pending_list, list) { 865 int error = perform_atomic_semop(sma, q); 866 867 if (error > 0) 868 continue; 869 /* operation completed, remove from queue & wakeup */ 870 unlink_queue(sma, q); 871 872 wake_up_sem_queue_prepare(q, error, wake_q); 873 if (error == 0) 874 semop_completed = 1; 875 } 876 877 return semop_completed; 878 } 879 880 /** 881 * do_smart_wakeup_zero - wakeup all wait for zero tasks 882 * @sma: semaphore array 883 * @sops: operations that were performed 884 * @nsops: number of operations 885 * @wake_q: lockless wake-queue head 886 * 887 * Checks all required queue for wait-for-zero operations, based 888 * on the actual changes that were performed on the semaphore array. 889 * The function returns 1 if at least one operation was completed successfully. 890 */ 891 static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops, 892 int nsops, struct wake_q_head *wake_q) 893 { 894 int i; 895 int semop_completed = 0; 896 int got_zero = 0; 897 898 /* first: the per-semaphore queues, if known */ 899 if (sops) { 900 for (i = 0; i < nsops; i++) { 901 int num = sops[i].sem_num; 902 903 if (sma->sems[num].semval == 0) { 904 got_zero = 1; 905 semop_completed |= wake_const_ops(sma, num, wake_q); 906 } 907 } 908 } else { 909 /* 910 * No sops means modified semaphores not known. 911 * Assume all were changed. 912 */ 913 for (i = 0; i < sma->sem_nsems; i++) { 914 if (sma->sems[i].semval == 0) { 915 got_zero = 1; 916 semop_completed |= wake_const_ops(sma, i, wake_q); 917 } 918 } 919 } 920 /* 921 * If one of the modified semaphores got 0, 922 * then check the global queue, too. 923 */ 924 if (got_zero) 925 semop_completed |= wake_const_ops(sma, -1, wake_q); 926 927 return semop_completed; 928 } 929 930 931 /** 932 * update_queue - look for tasks that can be completed. 933 * @sma: semaphore array. 934 * @semnum: semaphore that was modified. 935 * @wake_q: lockless wake-queue head. 936 * 937 * update_queue must be called after a semaphore in a semaphore array 938 * was modified. If multiple semaphores were modified, update_queue must 939 * be called with semnum = -1, as well as with the number of each modified 940 * semaphore. 941 * The tasks that must be woken up are added to @wake_q. The return code 942 * is stored in q->pid. 943 * The function internally checks if const operations can now succeed. 944 * 945 * The function return 1 if at least one semop was completed successfully. 946 */ 947 static int update_queue(struct sem_array *sma, int semnum, struct wake_q_head *wake_q) 948 { 949 struct sem_queue *q, *tmp; 950 struct list_head *pending_list; 951 int semop_completed = 0; 952 953 if (semnum == -1) 954 pending_list = &sma->pending_alter; 955 else 956 pending_list = &sma->sems[semnum].pending_alter; 957 958 again: 959 list_for_each_entry_safe(q, tmp, pending_list, list) { 960 int error, restart; 961 962 /* If we are scanning the single sop, per-semaphore list of 963 * one semaphore and that semaphore is 0, then it is not 964 * necessary to scan further: simple increments 965 * that affect only one entry succeed immediately and cannot 966 * be in the per semaphore pending queue, and decrements 967 * cannot be successful if the value is already 0. 968 */ 969 if (semnum != -1 && sma->sems[semnum].semval == 0) 970 break; 971 972 error = perform_atomic_semop(sma, q); 973 974 /* Does q->sleeper still need to sleep? */ 975 if (error > 0) 976 continue; 977 978 unlink_queue(sma, q); 979 980 if (error) { 981 restart = 0; 982 } else { 983 semop_completed = 1; 984 do_smart_wakeup_zero(sma, q->sops, q->nsops, wake_q); 985 restart = check_restart(sma, q); 986 } 987 988 wake_up_sem_queue_prepare(q, error, wake_q); 989 if (restart) 990 goto again; 991 } 992 return semop_completed; 993 } 994 995 /** 996 * set_semotime - set sem_otime 997 * @sma: semaphore array 998 * @sops: operations that modified the array, may be NULL 999 * 1000 * sem_otime is replicated to avoid cache line trashing. 1001 * This function sets one instance to the current time. 1002 */ 1003 static void set_semotime(struct sem_array *sma, struct sembuf *sops) 1004 { 1005 if (sops == NULL) { 1006 sma->sems[0].sem_otime = ktime_get_real_seconds(); 1007 } else { 1008 sma->sems[sops[0].sem_num].sem_otime = 1009 ktime_get_real_seconds(); 1010 } 1011 } 1012 1013 /** 1014 * do_smart_update - optimized update_queue 1015 * @sma: semaphore array 1016 * @sops: operations that were performed 1017 * @nsops: number of operations 1018 * @otime: force setting otime 1019 * @wake_q: lockless wake-queue head 1020 * 1021 * do_smart_update() does the required calls to update_queue and wakeup_zero, 1022 * based on the actual changes that were performed on the semaphore array. 1023 * Note that the function does not do the actual wake-up: the caller is 1024 * responsible for calling wake_up_q(). 1025 * It is safe to perform this call after dropping all locks. 1026 */ 1027 static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsops, 1028 int otime, struct wake_q_head *wake_q) 1029 { 1030 int i; 1031 1032 otime |= do_smart_wakeup_zero(sma, sops, nsops, wake_q); 1033 1034 if (!list_empty(&sma->pending_alter)) { 1035 /* semaphore array uses the global queue - just process it. */ 1036 otime |= update_queue(sma, -1, wake_q); 1037 } else { 1038 if (!sops) { 1039 /* 1040 * No sops, thus the modified semaphores are not 1041 * known. Check all. 1042 */ 1043 for (i = 0; i < sma->sem_nsems; i++) 1044 otime |= update_queue(sma, i, wake_q); 1045 } else { 1046 /* 1047 * Check the semaphores that were increased: 1048 * - No complex ops, thus all sleeping ops are 1049 * decrease. 1050 * - if we decreased the value, then any sleeping 1051 * semaphore ops won't be able to run: If the 1052 * previous value was too small, then the new 1053 * value will be too small, too. 1054 */ 1055 for (i = 0; i < nsops; i++) { 1056 if (sops[i].sem_op > 0) { 1057 otime |= update_queue(sma, 1058 sops[i].sem_num, wake_q); 1059 } 1060 } 1061 } 1062 } 1063 if (otime) 1064 set_semotime(sma, sops); 1065 } 1066 1067 /* 1068 * check_qop: Test if a queued operation sleeps on the semaphore semnum 1069 */ 1070 static int check_qop(struct sem_array *sma, int semnum, struct sem_queue *q, 1071 bool count_zero) 1072 { 1073 struct sembuf *sop = q->blocking; 1074 1075 /* 1076 * Linux always (since 0.99.10) reported a task as sleeping on all 1077 * semaphores. This violates SUS, therefore it was changed to the 1078 * standard compliant behavior. 1079 * Give the administrators a chance to notice that an application 1080 * might misbehave because it relies on the Linux behavior. 1081 */ 1082 pr_info_once("semctl(GETNCNT/GETZCNT) is since 3.16 Single Unix Specification compliant.\n" 1083 "The task %s (%d) triggered the difference, watch for misbehavior.\n", 1084 current->comm, task_pid_nr(current)); 1085 1086 if (sop->sem_num != semnum) 1087 return 0; 1088 1089 if (count_zero && sop->sem_op == 0) 1090 return 1; 1091 if (!count_zero && sop->sem_op < 0) 1092 return 1; 1093 1094 return 0; 1095 } 1096 1097 /* The following counts are associated to each semaphore: 1098 * semncnt number of tasks waiting on semval being nonzero 1099 * semzcnt number of tasks waiting on semval being zero 1100 * 1101 * Per definition, a task waits only on the semaphore of the first semop 1102 * that cannot proceed, even if additional operation would block, too. 1103 */ 1104 static int count_semcnt(struct sem_array *sma, ushort semnum, 1105 bool count_zero) 1106 { 1107 struct list_head *l; 1108 struct sem_queue *q; 1109 int semcnt; 1110 1111 semcnt = 0; 1112 /* First: check the simple operations. They are easy to evaluate */ 1113 if (count_zero) 1114 l = &sma->sems[semnum].pending_const; 1115 else 1116 l = &sma->sems[semnum].pending_alter; 1117 1118 list_for_each_entry(q, l, list) { 1119 /* all task on a per-semaphore list sleep on exactly 1120 * that semaphore 1121 */ 1122 semcnt++; 1123 } 1124 1125 /* Then: check the complex operations. */ 1126 list_for_each_entry(q, &sma->pending_alter, list) { 1127 semcnt += check_qop(sma, semnum, q, count_zero); 1128 } 1129 if (count_zero) { 1130 list_for_each_entry(q, &sma->pending_const, list) { 1131 semcnt += check_qop(sma, semnum, q, count_zero); 1132 } 1133 } 1134 return semcnt; 1135 } 1136 1137 /* Free a semaphore set. freeary() is called with sem_ids.rwsem locked 1138 * as a writer and the spinlock for this semaphore set hold. sem_ids.rwsem 1139 * remains locked on exit. 1140 */ 1141 static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 1142 { 1143 struct sem_undo *un, *tu; 1144 struct sem_queue *q, *tq; 1145 struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); 1146 int i; 1147 DEFINE_WAKE_Q(wake_q); 1148 1149 /* Free the existing undo structures for this semaphore set. */ 1150 ipc_assert_locked_object(&sma->sem_perm); 1151 list_for_each_entry_safe(un, tu, &sma->list_id, list_id) { 1152 list_del(&un->list_id); 1153 spin_lock(&un->ulp->lock); 1154 un->semid = -1; 1155 list_del_rcu(&un->list_proc); 1156 spin_unlock(&un->ulp->lock); 1157 kfree_rcu(un, rcu); 1158 } 1159 1160 /* Wake up all pending processes and let them fail with EIDRM. */ 1161 list_for_each_entry_safe(q, tq, &sma->pending_const, list) { 1162 unlink_queue(sma, q); 1163 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1164 } 1165 1166 list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { 1167 unlink_queue(sma, q); 1168 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1169 } 1170 for (i = 0; i < sma->sem_nsems; i++) { 1171 struct sem *sem = &sma->sems[i]; 1172 list_for_each_entry_safe(q, tq, &sem->pending_const, list) { 1173 unlink_queue(sma, q); 1174 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1175 } 1176 list_for_each_entry_safe(q, tq, &sem->pending_alter, list) { 1177 unlink_queue(sma, q); 1178 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1179 } 1180 ipc_update_pid(&sem->sempid, NULL); 1181 } 1182 1183 /* Remove the semaphore set from the IDR */ 1184 sem_rmid(ns, sma); 1185 sem_unlock(sma, -1); 1186 rcu_read_unlock(); 1187 1188 wake_up_q(&wake_q); 1189 ns->used_sems -= sma->sem_nsems; 1190 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1191 } 1192 1193 static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) 1194 { 1195 switch (version) { 1196 case IPC_64: 1197 return copy_to_user(buf, in, sizeof(*in)); 1198 case IPC_OLD: 1199 { 1200 struct semid_ds out; 1201 1202 memset(&out, 0, sizeof(out)); 1203 1204 ipc64_perm_to_ipc_perm(&in->sem_perm, &out.sem_perm); 1205 1206 out.sem_otime = in->sem_otime; 1207 out.sem_ctime = in->sem_ctime; 1208 out.sem_nsems = in->sem_nsems; 1209 1210 return copy_to_user(buf, &out, sizeof(out)); 1211 } 1212 default: 1213 return -EINVAL; 1214 } 1215 } 1216 1217 static time64_t get_semotime(struct sem_array *sma) 1218 { 1219 int i; 1220 time64_t res; 1221 1222 res = sma->sems[0].sem_otime; 1223 for (i = 1; i < sma->sem_nsems; i++) { 1224 time64_t to = sma->sems[i].sem_otime; 1225 1226 if (to > res) 1227 res = to; 1228 } 1229 return res; 1230 } 1231 1232 static int semctl_stat(struct ipc_namespace *ns, int semid, 1233 int cmd, struct semid64_ds *semid64) 1234 { 1235 struct sem_array *sma; 1236 time64_t semotime; 1237 int err; 1238 1239 memset(semid64, 0, sizeof(*semid64)); 1240 1241 rcu_read_lock(); 1242 if (cmd == SEM_STAT || cmd == SEM_STAT_ANY) { 1243 sma = sem_obtain_object(ns, semid); 1244 if (IS_ERR(sma)) { 1245 err = PTR_ERR(sma); 1246 goto out_unlock; 1247 } 1248 } else { /* IPC_STAT */ 1249 sma = sem_obtain_object_check(ns, semid); 1250 if (IS_ERR(sma)) { 1251 err = PTR_ERR(sma); 1252 goto out_unlock; 1253 } 1254 } 1255 1256 /* see comment for SHM_STAT_ANY */ 1257 if (cmd == SEM_STAT_ANY) 1258 audit_ipc_obj(&sma->sem_perm); 1259 else { 1260 err = -EACCES; 1261 if (ipcperms(ns, &sma->sem_perm, S_IRUGO)) 1262 goto out_unlock; 1263 } 1264 1265 err = security_sem_semctl(&sma->sem_perm, cmd); 1266 if (err) 1267 goto out_unlock; 1268 1269 ipc_lock_object(&sma->sem_perm); 1270 1271 if (!ipc_valid_object(&sma->sem_perm)) { 1272 ipc_unlock_object(&sma->sem_perm); 1273 err = -EIDRM; 1274 goto out_unlock; 1275 } 1276 1277 kernel_to_ipc64_perm(&sma->sem_perm, &semid64->sem_perm); 1278 semotime = get_semotime(sma); 1279 semid64->sem_otime = semotime; 1280 semid64->sem_ctime = sma->sem_ctime; 1281 #ifndef CONFIG_64BIT 1282 semid64->sem_otime_high = semotime >> 32; 1283 semid64->sem_ctime_high = sma->sem_ctime >> 32; 1284 #endif 1285 semid64->sem_nsems = sma->sem_nsems; 1286 1287 if (cmd == IPC_STAT) { 1288 /* 1289 * As defined in SUS: 1290 * Return 0 on success 1291 */ 1292 err = 0; 1293 } else { 1294 /* 1295 * SEM_STAT and SEM_STAT_ANY (both Linux specific) 1296 * Return the full id, including the sequence number 1297 */ 1298 err = sma->sem_perm.id; 1299 } 1300 ipc_unlock_object(&sma->sem_perm); 1301 out_unlock: 1302 rcu_read_unlock(); 1303 return err; 1304 } 1305 1306 static int semctl_info(struct ipc_namespace *ns, int semid, 1307 int cmd, void __user *p) 1308 { 1309 struct seminfo seminfo; 1310 int max_idx; 1311 int err; 1312 1313 err = security_sem_semctl(NULL, cmd); 1314 if (err) 1315 return err; 1316 1317 memset(&seminfo, 0, sizeof(seminfo)); 1318 seminfo.semmni = ns->sc_semmni; 1319 seminfo.semmns = ns->sc_semmns; 1320 seminfo.semmsl = ns->sc_semmsl; 1321 seminfo.semopm = ns->sc_semopm; 1322 seminfo.semvmx = SEMVMX; 1323 seminfo.semmnu = SEMMNU; 1324 seminfo.semmap = SEMMAP; 1325 seminfo.semume = SEMUME; 1326 down_read(&sem_ids(ns).rwsem); 1327 if (cmd == SEM_INFO) { 1328 seminfo.semusz = sem_ids(ns).in_use; 1329 seminfo.semaem = ns->used_sems; 1330 } else { 1331 seminfo.semusz = SEMUSZ; 1332 seminfo.semaem = SEMAEM; 1333 } 1334 max_idx = ipc_get_maxidx(&sem_ids(ns)); 1335 up_read(&sem_ids(ns).rwsem); 1336 if (copy_to_user(p, &seminfo, sizeof(struct seminfo))) 1337 return -EFAULT; 1338 return (max_idx < 0) ? 0 : max_idx; 1339 } 1340 1341 static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, 1342 int val) 1343 { 1344 struct sem_undo *un; 1345 struct sem_array *sma; 1346 struct sem *curr; 1347 int err; 1348 DEFINE_WAKE_Q(wake_q); 1349 1350 if (val > SEMVMX || val < 0) 1351 return -ERANGE; 1352 1353 rcu_read_lock(); 1354 sma = sem_obtain_object_check(ns, semid); 1355 if (IS_ERR(sma)) { 1356 rcu_read_unlock(); 1357 return PTR_ERR(sma); 1358 } 1359 1360 if (semnum < 0 || semnum >= sma->sem_nsems) { 1361 rcu_read_unlock(); 1362 return -EINVAL; 1363 } 1364 1365 1366 if (ipcperms(ns, &sma->sem_perm, S_IWUGO)) { 1367 rcu_read_unlock(); 1368 return -EACCES; 1369 } 1370 1371 err = security_sem_semctl(&sma->sem_perm, SETVAL); 1372 if (err) { 1373 rcu_read_unlock(); 1374 return -EACCES; 1375 } 1376 1377 sem_lock(sma, NULL, -1); 1378 1379 if (!ipc_valid_object(&sma->sem_perm)) { 1380 sem_unlock(sma, -1); 1381 rcu_read_unlock(); 1382 return -EIDRM; 1383 } 1384 1385 semnum = array_index_nospec(semnum, sma->sem_nsems); 1386 curr = &sma->sems[semnum]; 1387 1388 ipc_assert_locked_object(&sma->sem_perm); 1389 list_for_each_entry(un, &sma->list_id, list_id) 1390 un->semadj[semnum] = 0; 1391 1392 curr->semval = val; 1393 ipc_update_pid(&curr->sempid, task_tgid(current)); 1394 sma->sem_ctime = ktime_get_real_seconds(); 1395 /* maybe some queued-up processes were waiting for this */ 1396 do_smart_update(sma, NULL, 0, 0, &wake_q); 1397 sem_unlock(sma, -1); 1398 rcu_read_unlock(); 1399 wake_up_q(&wake_q); 1400 return 0; 1401 } 1402 1403 static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, 1404 int cmd, void __user *p) 1405 { 1406 struct sem_array *sma; 1407 struct sem *curr; 1408 int err, nsems; 1409 ushort fast_sem_io[SEMMSL_FAST]; 1410 ushort *sem_io = fast_sem_io; 1411 DEFINE_WAKE_Q(wake_q); 1412 1413 rcu_read_lock(); 1414 sma = sem_obtain_object_check(ns, semid); 1415 if (IS_ERR(sma)) { 1416 rcu_read_unlock(); 1417 return PTR_ERR(sma); 1418 } 1419 1420 nsems = sma->sem_nsems; 1421 1422 err = -EACCES; 1423 if (ipcperms(ns, &sma->sem_perm, cmd == SETALL ? S_IWUGO : S_IRUGO)) 1424 goto out_rcu_wakeup; 1425 1426 err = security_sem_semctl(&sma->sem_perm, cmd); 1427 if (err) 1428 goto out_rcu_wakeup; 1429 1430 err = -EACCES; 1431 switch (cmd) { 1432 case GETALL: 1433 { 1434 ushort __user *array = p; 1435 int i; 1436 1437 sem_lock(sma, NULL, -1); 1438 if (!ipc_valid_object(&sma->sem_perm)) { 1439 err = -EIDRM; 1440 goto out_unlock; 1441 } 1442 if (nsems > SEMMSL_FAST) { 1443 if (!ipc_rcu_getref(&sma->sem_perm)) { 1444 err = -EIDRM; 1445 goto out_unlock; 1446 } 1447 sem_unlock(sma, -1); 1448 rcu_read_unlock(); 1449 sem_io = kvmalloc_array(nsems, sizeof(ushort), 1450 GFP_KERNEL); 1451 if (sem_io == NULL) { 1452 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1453 return -ENOMEM; 1454 } 1455 1456 rcu_read_lock(); 1457 sem_lock_and_putref(sma); 1458 if (!ipc_valid_object(&sma->sem_perm)) { 1459 err = -EIDRM; 1460 goto out_unlock; 1461 } 1462 } 1463 for (i = 0; i < sma->sem_nsems; i++) 1464 sem_io[i] = sma->sems[i].semval; 1465 sem_unlock(sma, -1); 1466 rcu_read_unlock(); 1467 err = 0; 1468 if (copy_to_user(array, sem_io, nsems*sizeof(ushort))) 1469 err = -EFAULT; 1470 goto out_free; 1471 } 1472 case SETALL: 1473 { 1474 int i; 1475 struct sem_undo *un; 1476 1477 if (!ipc_rcu_getref(&sma->sem_perm)) { 1478 err = -EIDRM; 1479 goto out_rcu_wakeup; 1480 } 1481 rcu_read_unlock(); 1482 1483 if (nsems > SEMMSL_FAST) { 1484 sem_io = kvmalloc_array(nsems, sizeof(ushort), 1485 GFP_KERNEL); 1486 if (sem_io == NULL) { 1487 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1488 return -ENOMEM; 1489 } 1490 } 1491 1492 if (copy_from_user(sem_io, p, nsems*sizeof(ushort))) { 1493 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1494 err = -EFAULT; 1495 goto out_free; 1496 } 1497 1498 for (i = 0; i < nsems; i++) { 1499 if (sem_io[i] > SEMVMX) { 1500 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1501 err = -ERANGE; 1502 goto out_free; 1503 } 1504 } 1505 rcu_read_lock(); 1506 sem_lock_and_putref(sma); 1507 if (!ipc_valid_object(&sma->sem_perm)) { 1508 err = -EIDRM; 1509 goto out_unlock; 1510 } 1511 1512 for (i = 0; i < nsems; i++) { 1513 sma->sems[i].semval = sem_io[i]; 1514 ipc_update_pid(&sma->sems[i].sempid, task_tgid(current)); 1515 } 1516 1517 ipc_assert_locked_object(&sma->sem_perm); 1518 list_for_each_entry(un, &sma->list_id, list_id) { 1519 for (i = 0; i < nsems; i++) 1520 un->semadj[i] = 0; 1521 } 1522 sma->sem_ctime = ktime_get_real_seconds(); 1523 /* maybe some queued-up processes were waiting for this */ 1524 do_smart_update(sma, NULL, 0, 0, &wake_q); 1525 err = 0; 1526 goto out_unlock; 1527 } 1528 /* GETVAL, GETPID, GETNCTN, GETZCNT: fall-through */ 1529 } 1530 err = -EINVAL; 1531 if (semnum < 0 || semnum >= nsems) 1532 goto out_rcu_wakeup; 1533 1534 sem_lock(sma, NULL, -1); 1535 if (!ipc_valid_object(&sma->sem_perm)) { 1536 err = -EIDRM; 1537 goto out_unlock; 1538 } 1539 1540 semnum = array_index_nospec(semnum, nsems); 1541 curr = &sma->sems[semnum]; 1542 1543 switch (cmd) { 1544 case GETVAL: 1545 err = curr->semval; 1546 goto out_unlock; 1547 case GETPID: 1548 err = pid_vnr(curr->sempid); 1549 goto out_unlock; 1550 case GETNCNT: 1551 err = count_semcnt(sma, semnum, 0); 1552 goto out_unlock; 1553 case GETZCNT: 1554 err = count_semcnt(sma, semnum, 1); 1555 goto out_unlock; 1556 } 1557 1558 out_unlock: 1559 sem_unlock(sma, -1); 1560 out_rcu_wakeup: 1561 rcu_read_unlock(); 1562 wake_up_q(&wake_q); 1563 out_free: 1564 if (sem_io != fast_sem_io) 1565 kvfree(sem_io); 1566 return err; 1567 } 1568 1569 static inline unsigned long 1570 copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version) 1571 { 1572 switch (version) { 1573 case IPC_64: 1574 if (copy_from_user(out, buf, sizeof(*out))) 1575 return -EFAULT; 1576 return 0; 1577 case IPC_OLD: 1578 { 1579 struct semid_ds tbuf_old; 1580 1581 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 1582 return -EFAULT; 1583 1584 out->sem_perm.uid = tbuf_old.sem_perm.uid; 1585 out->sem_perm.gid = tbuf_old.sem_perm.gid; 1586 out->sem_perm.mode = tbuf_old.sem_perm.mode; 1587 1588 return 0; 1589 } 1590 default: 1591 return -EINVAL; 1592 } 1593 } 1594 1595 /* 1596 * This function handles some semctl commands which require the rwsem 1597 * to be held in write mode. 1598 * NOTE: no locks must be held, the rwsem is taken inside this function. 1599 */ 1600 static int semctl_down(struct ipc_namespace *ns, int semid, 1601 int cmd, struct semid64_ds *semid64) 1602 { 1603 struct sem_array *sma; 1604 int err; 1605 struct kern_ipc_perm *ipcp; 1606 1607 down_write(&sem_ids(ns).rwsem); 1608 rcu_read_lock(); 1609 1610 ipcp = ipcctl_obtain_check(ns, &sem_ids(ns), semid, cmd, 1611 &semid64->sem_perm, 0); 1612 if (IS_ERR(ipcp)) { 1613 err = PTR_ERR(ipcp); 1614 goto out_unlock1; 1615 } 1616 1617 sma = container_of(ipcp, struct sem_array, sem_perm); 1618 1619 err = security_sem_semctl(&sma->sem_perm, cmd); 1620 if (err) 1621 goto out_unlock1; 1622 1623 switch (cmd) { 1624 case IPC_RMID: 1625 sem_lock(sma, NULL, -1); 1626 /* freeary unlocks the ipc object and rcu */ 1627 freeary(ns, ipcp); 1628 goto out_up; 1629 case IPC_SET: 1630 sem_lock(sma, NULL, -1); 1631 err = ipc_update_perm(&semid64->sem_perm, ipcp); 1632 if (err) 1633 goto out_unlock0; 1634 sma->sem_ctime = ktime_get_real_seconds(); 1635 break; 1636 default: 1637 err = -EINVAL; 1638 goto out_unlock1; 1639 } 1640 1641 out_unlock0: 1642 sem_unlock(sma, -1); 1643 out_unlock1: 1644 rcu_read_unlock(); 1645 out_up: 1646 up_write(&sem_ids(ns).rwsem); 1647 return err; 1648 } 1649 1650 static long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg, int version) 1651 { 1652 struct ipc_namespace *ns; 1653 void __user *p = (void __user *)arg; 1654 struct semid64_ds semid64; 1655 int err; 1656 1657 if (semid < 0) 1658 return -EINVAL; 1659 1660 ns = current->nsproxy->ipc_ns; 1661 1662 switch (cmd) { 1663 case IPC_INFO: 1664 case SEM_INFO: 1665 return semctl_info(ns, semid, cmd, p); 1666 case IPC_STAT: 1667 case SEM_STAT: 1668 case SEM_STAT_ANY: 1669 err = semctl_stat(ns, semid, cmd, &semid64); 1670 if (err < 0) 1671 return err; 1672 if (copy_semid_to_user(p, &semid64, version)) 1673 err = -EFAULT; 1674 return err; 1675 case GETALL: 1676 case GETVAL: 1677 case GETPID: 1678 case GETNCNT: 1679 case GETZCNT: 1680 case SETALL: 1681 return semctl_main(ns, semid, semnum, cmd, p); 1682 case SETVAL: { 1683 int val; 1684 #if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN) 1685 /* big-endian 64bit */ 1686 val = arg >> 32; 1687 #else 1688 /* 32bit or little-endian 64bit */ 1689 val = arg; 1690 #endif 1691 return semctl_setval(ns, semid, semnum, val); 1692 } 1693 case IPC_SET: 1694 if (copy_semid_from_user(&semid64, p, version)) 1695 return -EFAULT; 1696 fallthrough; 1697 case IPC_RMID: 1698 return semctl_down(ns, semid, cmd, &semid64); 1699 default: 1700 return -EINVAL; 1701 } 1702 } 1703 1704 SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg) 1705 { 1706 return ksys_semctl(semid, semnum, cmd, arg, IPC_64); 1707 } 1708 1709 #ifdef CONFIG_ARCH_WANT_IPC_PARSE_VERSION 1710 long ksys_old_semctl(int semid, int semnum, int cmd, unsigned long arg) 1711 { 1712 int version = ipc_parse_version(&cmd); 1713 1714 return ksys_semctl(semid, semnum, cmd, arg, version); 1715 } 1716 1717 SYSCALL_DEFINE4(old_semctl, int, semid, int, semnum, int, cmd, unsigned long, arg) 1718 { 1719 return ksys_old_semctl(semid, semnum, cmd, arg); 1720 } 1721 #endif 1722 1723 #ifdef CONFIG_COMPAT 1724 1725 struct compat_semid_ds { 1726 struct compat_ipc_perm sem_perm; 1727 old_time32_t sem_otime; 1728 old_time32_t sem_ctime; 1729 compat_uptr_t sem_base; 1730 compat_uptr_t sem_pending; 1731 compat_uptr_t sem_pending_last; 1732 compat_uptr_t undo; 1733 unsigned short sem_nsems; 1734 }; 1735 1736 static int copy_compat_semid_from_user(struct semid64_ds *out, void __user *buf, 1737 int version) 1738 { 1739 memset(out, 0, sizeof(*out)); 1740 if (version == IPC_64) { 1741 struct compat_semid64_ds __user *p = buf; 1742 return get_compat_ipc64_perm(&out->sem_perm, &p->sem_perm); 1743 } else { 1744 struct compat_semid_ds __user *p = buf; 1745 return get_compat_ipc_perm(&out->sem_perm, &p->sem_perm); 1746 } 1747 } 1748 1749 static int copy_compat_semid_to_user(void __user *buf, struct semid64_ds *in, 1750 int version) 1751 { 1752 if (version == IPC_64) { 1753 struct compat_semid64_ds v; 1754 memset(&v, 0, sizeof(v)); 1755 to_compat_ipc64_perm(&v.sem_perm, &in->sem_perm); 1756 v.sem_otime = lower_32_bits(in->sem_otime); 1757 v.sem_otime_high = upper_32_bits(in->sem_otime); 1758 v.sem_ctime = lower_32_bits(in->sem_ctime); 1759 v.sem_ctime_high = upper_32_bits(in->sem_ctime); 1760 v.sem_nsems = in->sem_nsems; 1761 return copy_to_user(buf, &v, sizeof(v)); 1762 } else { 1763 struct compat_semid_ds v; 1764 memset(&v, 0, sizeof(v)); 1765 to_compat_ipc_perm(&v.sem_perm, &in->sem_perm); 1766 v.sem_otime = in->sem_otime; 1767 v.sem_ctime = in->sem_ctime; 1768 v.sem_nsems = in->sem_nsems; 1769 return copy_to_user(buf, &v, sizeof(v)); 1770 } 1771 } 1772 1773 static long compat_ksys_semctl(int semid, int semnum, int cmd, int arg, int version) 1774 { 1775 void __user *p = compat_ptr(arg); 1776 struct ipc_namespace *ns; 1777 struct semid64_ds semid64; 1778 int err; 1779 1780 ns = current->nsproxy->ipc_ns; 1781 1782 if (semid < 0) 1783 return -EINVAL; 1784 1785 switch (cmd & (~IPC_64)) { 1786 case IPC_INFO: 1787 case SEM_INFO: 1788 return semctl_info(ns, semid, cmd, p); 1789 case IPC_STAT: 1790 case SEM_STAT: 1791 case SEM_STAT_ANY: 1792 err = semctl_stat(ns, semid, cmd, &semid64); 1793 if (err < 0) 1794 return err; 1795 if (copy_compat_semid_to_user(p, &semid64, version)) 1796 err = -EFAULT; 1797 return err; 1798 case GETVAL: 1799 case GETPID: 1800 case GETNCNT: 1801 case GETZCNT: 1802 case GETALL: 1803 case SETALL: 1804 return semctl_main(ns, semid, semnum, cmd, p); 1805 case SETVAL: 1806 return semctl_setval(ns, semid, semnum, arg); 1807 case IPC_SET: 1808 if (copy_compat_semid_from_user(&semid64, p, version)) 1809 return -EFAULT; 1810 fallthrough; 1811 case IPC_RMID: 1812 return semctl_down(ns, semid, cmd, &semid64); 1813 default: 1814 return -EINVAL; 1815 } 1816 } 1817 1818 COMPAT_SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, int, arg) 1819 { 1820 return compat_ksys_semctl(semid, semnum, cmd, arg, IPC_64); 1821 } 1822 1823 #ifdef CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION 1824 long compat_ksys_old_semctl(int semid, int semnum, int cmd, int arg) 1825 { 1826 int version = compat_ipc_parse_version(&cmd); 1827 1828 return compat_ksys_semctl(semid, semnum, cmd, arg, version); 1829 } 1830 1831 COMPAT_SYSCALL_DEFINE4(old_semctl, int, semid, int, semnum, int, cmd, int, arg) 1832 { 1833 return compat_ksys_old_semctl(semid, semnum, cmd, arg); 1834 } 1835 #endif 1836 #endif 1837 1838 /* If the task doesn't already have a undo_list, then allocate one 1839 * here. We guarantee there is only one thread using this undo list, 1840 * and current is THE ONE 1841 * 1842 * If this allocation and assignment succeeds, but later 1843 * portions of this code fail, there is no need to free the sem_undo_list. 1844 * Just let it stay associated with the task, and it'll be freed later 1845 * at exit time. 1846 * 1847 * This can block, so callers must hold no locks. 1848 */ 1849 static inline int get_undo_list(struct sem_undo_list **undo_listp) 1850 { 1851 struct sem_undo_list *undo_list; 1852 1853 undo_list = current->sysvsem.undo_list; 1854 if (!undo_list) { 1855 undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL); 1856 if (undo_list == NULL) 1857 return -ENOMEM; 1858 spin_lock_init(&undo_list->lock); 1859 refcount_set(&undo_list->refcnt, 1); 1860 INIT_LIST_HEAD(&undo_list->list_proc); 1861 1862 current->sysvsem.undo_list = undo_list; 1863 } 1864 *undo_listp = undo_list; 1865 return 0; 1866 } 1867 1868 static struct sem_undo *__lookup_undo(struct sem_undo_list *ulp, int semid) 1869 { 1870 struct sem_undo *un; 1871 1872 list_for_each_entry_rcu(un, &ulp->list_proc, list_proc, 1873 spin_is_locked(&ulp->lock)) { 1874 if (un->semid == semid) 1875 return un; 1876 } 1877 return NULL; 1878 } 1879 1880 static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid) 1881 { 1882 struct sem_undo *un; 1883 1884 assert_spin_locked(&ulp->lock); 1885 1886 un = __lookup_undo(ulp, semid); 1887 if (un) { 1888 list_del_rcu(&un->list_proc); 1889 list_add_rcu(&un->list_proc, &ulp->list_proc); 1890 } 1891 return un; 1892 } 1893 1894 /** 1895 * find_alloc_undo - lookup (and if not present create) undo array 1896 * @ns: namespace 1897 * @semid: semaphore array id 1898 * 1899 * The function looks up (and if not present creates) the undo structure. 1900 * The size of the undo structure depends on the size of the semaphore 1901 * array, thus the alloc path is not that straightforward. 1902 * Lifetime-rules: sem_undo is rcu-protected, on success, the function 1903 * performs a rcu_read_lock(). 1904 */ 1905 static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) 1906 { 1907 struct sem_array *sma; 1908 struct sem_undo_list *ulp; 1909 struct sem_undo *un, *new; 1910 int nsems, error; 1911 1912 error = get_undo_list(&ulp); 1913 if (error) 1914 return ERR_PTR(error); 1915 1916 rcu_read_lock(); 1917 spin_lock(&ulp->lock); 1918 un = lookup_undo(ulp, semid); 1919 spin_unlock(&ulp->lock); 1920 if (likely(un != NULL)) 1921 goto out; 1922 1923 /* no undo structure around - allocate one. */ 1924 /* step 1: figure out the size of the semaphore array */ 1925 sma = sem_obtain_object_check(ns, semid); 1926 if (IS_ERR(sma)) { 1927 rcu_read_unlock(); 1928 return ERR_CAST(sma); 1929 } 1930 1931 nsems = sma->sem_nsems; 1932 if (!ipc_rcu_getref(&sma->sem_perm)) { 1933 rcu_read_unlock(); 1934 un = ERR_PTR(-EIDRM); 1935 goto out; 1936 } 1937 rcu_read_unlock(); 1938 1939 /* step 2: allocate new undo structure */ 1940 new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); 1941 if (!new) { 1942 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1943 return ERR_PTR(-ENOMEM); 1944 } 1945 1946 /* step 3: Acquire the lock on semaphore array */ 1947 rcu_read_lock(); 1948 sem_lock_and_putref(sma); 1949 if (!ipc_valid_object(&sma->sem_perm)) { 1950 sem_unlock(sma, -1); 1951 rcu_read_unlock(); 1952 kfree(new); 1953 un = ERR_PTR(-EIDRM); 1954 goto out; 1955 } 1956 spin_lock(&ulp->lock); 1957 1958 /* 1959 * step 4: check for races: did someone else allocate the undo struct? 1960 */ 1961 un = lookup_undo(ulp, semid); 1962 if (un) { 1963 kfree(new); 1964 goto success; 1965 } 1966 /* step 5: initialize & link new undo structure */ 1967 new->semadj = (short *) &new[1]; 1968 new->ulp = ulp; 1969 new->semid = semid; 1970 assert_spin_locked(&ulp->lock); 1971 list_add_rcu(&new->list_proc, &ulp->list_proc); 1972 ipc_assert_locked_object(&sma->sem_perm); 1973 list_add(&new->list_id, &sma->list_id); 1974 un = new; 1975 1976 success: 1977 spin_unlock(&ulp->lock); 1978 sem_unlock(sma, -1); 1979 out: 1980 return un; 1981 } 1982 1983 static long do_semtimedop(int semid, struct sembuf __user *tsops, 1984 unsigned nsops, const struct timespec64 *timeout) 1985 { 1986 int error = -EINVAL; 1987 struct sem_array *sma; 1988 struct sembuf fast_sops[SEMOPM_FAST]; 1989 struct sembuf *sops = fast_sops, *sop; 1990 struct sem_undo *un; 1991 int max, locknum; 1992 bool undos = false, alter = false, dupsop = false; 1993 struct sem_queue queue; 1994 unsigned long dup = 0, jiffies_left = 0; 1995 struct ipc_namespace *ns; 1996 1997 ns = current->nsproxy->ipc_ns; 1998 1999 if (nsops < 1 || semid < 0) 2000 return -EINVAL; 2001 if (nsops > ns->sc_semopm) 2002 return -E2BIG; 2003 if (nsops > SEMOPM_FAST) { 2004 sops = kvmalloc_array(nsops, sizeof(*sops), GFP_KERNEL); 2005 if (sops == NULL) 2006 return -ENOMEM; 2007 } 2008 2009 if (copy_from_user(sops, tsops, nsops * sizeof(*tsops))) { 2010 error = -EFAULT; 2011 goto out_free; 2012 } 2013 2014 if (timeout) { 2015 if (timeout->tv_sec < 0 || timeout->tv_nsec < 0 || 2016 timeout->tv_nsec >= 1000000000L) { 2017 error = -EINVAL; 2018 goto out_free; 2019 } 2020 jiffies_left = timespec64_to_jiffies(timeout); 2021 } 2022 2023 max = 0; 2024 for (sop = sops; sop < sops + nsops; sop++) { 2025 unsigned long mask = 1ULL << ((sop->sem_num) % BITS_PER_LONG); 2026 2027 if (sop->sem_num >= max) 2028 max = sop->sem_num; 2029 if (sop->sem_flg & SEM_UNDO) 2030 undos = true; 2031 if (dup & mask) { 2032 /* 2033 * There was a previous alter access that appears 2034 * to have accessed the same semaphore, thus use 2035 * the dupsop logic. "appears", because the detection 2036 * can only check % BITS_PER_LONG. 2037 */ 2038 dupsop = true; 2039 } 2040 if (sop->sem_op != 0) { 2041 alter = true; 2042 dup |= mask; 2043 } 2044 } 2045 2046 if (undos) { 2047 /* On success, find_alloc_undo takes the rcu_read_lock */ 2048 un = find_alloc_undo(ns, semid); 2049 if (IS_ERR(un)) { 2050 error = PTR_ERR(un); 2051 goto out_free; 2052 } 2053 } else { 2054 un = NULL; 2055 rcu_read_lock(); 2056 } 2057 2058 sma = sem_obtain_object_check(ns, semid); 2059 if (IS_ERR(sma)) { 2060 rcu_read_unlock(); 2061 error = PTR_ERR(sma); 2062 goto out_free; 2063 } 2064 2065 error = -EFBIG; 2066 if (max >= sma->sem_nsems) { 2067 rcu_read_unlock(); 2068 goto out_free; 2069 } 2070 2071 error = -EACCES; 2072 if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) { 2073 rcu_read_unlock(); 2074 goto out_free; 2075 } 2076 2077 error = security_sem_semop(&sma->sem_perm, sops, nsops, alter); 2078 if (error) { 2079 rcu_read_unlock(); 2080 goto out_free; 2081 } 2082 2083 error = -EIDRM; 2084 locknum = sem_lock(sma, sops, nsops); 2085 /* 2086 * We eventually might perform the following check in a lockless 2087 * fashion, considering ipc_valid_object() locking constraints. 2088 * If nsops == 1 and there is no contention for sem_perm.lock, then 2089 * only a per-semaphore lock is held and it's OK to proceed with the 2090 * check below. More details on the fine grained locking scheme 2091 * entangled here and why it's RMID race safe on comments at sem_lock() 2092 */ 2093 if (!ipc_valid_object(&sma->sem_perm)) 2094 goto out_unlock_free; 2095 /* 2096 * semid identifiers are not unique - find_alloc_undo may have 2097 * allocated an undo structure, it was invalidated by an RMID 2098 * and now a new array with received the same id. Check and fail. 2099 * This case can be detected checking un->semid. The existence of 2100 * "un" itself is guaranteed by rcu. 2101 */ 2102 if (un && un->semid == -1) 2103 goto out_unlock_free; 2104 2105 queue.sops = sops; 2106 queue.nsops = nsops; 2107 queue.undo = un; 2108 queue.pid = task_tgid(current); 2109 queue.alter = alter; 2110 queue.dupsop = dupsop; 2111 2112 error = perform_atomic_semop(sma, &queue); 2113 if (error == 0) { /* non-blocking successful path */ 2114 DEFINE_WAKE_Q(wake_q); 2115 2116 /* 2117 * If the operation was successful, then do 2118 * the required updates. 2119 */ 2120 if (alter) 2121 do_smart_update(sma, sops, nsops, 1, &wake_q); 2122 else 2123 set_semotime(sma, sops); 2124 2125 sem_unlock(sma, locknum); 2126 rcu_read_unlock(); 2127 wake_up_q(&wake_q); 2128 2129 goto out_free; 2130 } 2131 if (error < 0) /* non-blocking error path */ 2132 goto out_unlock_free; 2133 2134 /* 2135 * We need to sleep on this operation, so we put the current 2136 * task into the pending queue and go to sleep. 2137 */ 2138 if (nsops == 1) { 2139 struct sem *curr; 2140 int idx = array_index_nospec(sops->sem_num, sma->sem_nsems); 2141 curr = &sma->sems[idx]; 2142 2143 if (alter) { 2144 if (sma->complex_count) { 2145 list_add_tail(&queue.list, 2146 &sma->pending_alter); 2147 } else { 2148 2149 list_add_tail(&queue.list, 2150 &curr->pending_alter); 2151 } 2152 } else { 2153 list_add_tail(&queue.list, &curr->pending_const); 2154 } 2155 } else { 2156 if (!sma->complex_count) 2157 merge_queues(sma); 2158 2159 if (alter) 2160 list_add_tail(&queue.list, &sma->pending_alter); 2161 else 2162 list_add_tail(&queue.list, &sma->pending_const); 2163 2164 sma->complex_count++; 2165 } 2166 2167 do { 2168 /* memory ordering ensured by the lock in sem_lock() */ 2169 WRITE_ONCE(queue.status, -EINTR); 2170 queue.sleeper = current; 2171 2172 /* memory ordering is ensured by the lock in sem_lock() */ 2173 __set_current_state(TASK_INTERRUPTIBLE); 2174 sem_unlock(sma, locknum); 2175 rcu_read_unlock(); 2176 2177 if (timeout) 2178 jiffies_left = schedule_timeout(jiffies_left); 2179 else 2180 schedule(); 2181 2182 /* 2183 * fastpath: the semop has completed, either successfully or 2184 * not, from the syscall pov, is quite irrelevant to us at this 2185 * point; we're done. 2186 * 2187 * We _do_ care, nonetheless, about being awoken by a signal or 2188 * spuriously. The queue.status is checked again in the 2189 * slowpath (aka after taking sem_lock), such that we can detect 2190 * scenarios where we were awakened externally, during the 2191 * window between wake_q_add() and wake_up_q(). 2192 */ 2193 error = READ_ONCE(queue.status); 2194 if (error != -EINTR) { 2195 /* see SEM_BARRIER_2 for purpose/pairing */ 2196 smp_acquire__after_ctrl_dep(); 2197 goto out_free; 2198 } 2199 2200 rcu_read_lock(); 2201 locknum = sem_lock(sma, sops, nsops); 2202 2203 if (!ipc_valid_object(&sma->sem_perm)) 2204 goto out_unlock_free; 2205 2206 /* 2207 * No necessity for any barrier: We are protect by sem_lock() 2208 */ 2209 error = READ_ONCE(queue.status); 2210 2211 /* 2212 * If queue.status != -EINTR we are woken up by another process. 2213 * Leave without unlink_queue(), but with sem_unlock(). 2214 */ 2215 if (error != -EINTR) 2216 goto out_unlock_free; 2217 2218 /* 2219 * If an interrupt occurred we have to clean up the queue. 2220 */ 2221 if (timeout && jiffies_left == 0) 2222 error = -EAGAIN; 2223 } while (error == -EINTR && !signal_pending(current)); /* spurious */ 2224 2225 unlink_queue(sma, &queue); 2226 2227 out_unlock_free: 2228 sem_unlock(sma, locknum); 2229 rcu_read_unlock(); 2230 out_free: 2231 if (sops != fast_sops) 2232 kvfree(sops); 2233 return error; 2234 } 2235 2236 long ksys_semtimedop(int semid, struct sembuf __user *tsops, 2237 unsigned int nsops, const struct __kernel_timespec __user *timeout) 2238 { 2239 if (timeout) { 2240 struct timespec64 ts; 2241 if (get_timespec64(&ts, timeout)) 2242 return -EFAULT; 2243 return do_semtimedop(semid, tsops, nsops, &ts); 2244 } 2245 return do_semtimedop(semid, tsops, nsops, NULL); 2246 } 2247 2248 SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, 2249 unsigned int, nsops, const struct __kernel_timespec __user *, timeout) 2250 { 2251 return ksys_semtimedop(semid, tsops, nsops, timeout); 2252 } 2253 2254 #ifdef CONFIG_COMPAT_32BIT_TIME 2255 long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems, 2256 unsigned int nsops, 2257 const struct old_timespec32 __user *timeout) 2258 { 2259 if (timeout) { 2260 struct timespec64 ts; 2261 if (get_old_timespec32(&ts, timeout)) 2262 return -EFAULT; 2263 return do_semtimedop(semid, tsems, nsops, &ts); 2264 } 2265 return do_semtimedop(semid, tsems, nsops, NULL); 2266 } 2267 2268 SYSCALL_DEFINE4(semtimedop_time32, int, semid, struct sembuf __user *, tsems, 2269 unsigned int, nsops, 2270 const struct old_timespec32 __user *, timeout) 2271 { 2272 return compat_ksys_semtimedop(semid, tsems, nsops, timeout); 2273 } 2274 #endif 2275 2276 SYSCALL_DEFINE3(semop, int, semid, struct sembuf __user *, tsops, 2277 unsigned, nsops) 2278 { 2279 return do_semtimedop(semid, tsops, nsops, NULL); 2280 } 2281 2282 /* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between 2283 * parent and child tasks. 2284 */ 2285 2286 int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) 2287 { 2288 struct sem_undo_list *undo_list; 2289 int error; 2290 2291 if (clone_flags & CLONE_SYSVSEM) { 2292 error = get_undo_list(&undo_list); 2293 if (error) 2294 return error; 2295 refcount_inc(&undo_list->refcnt); 2296 tsk->sysvsem.undo_list = undo_list; 2297 } else 2298 tsk->sysvsem.undo_list = NULL; 2299 2300 return 0; 2301 } 2302 2303 /* 2304 * add semadj values to semaphores, free undo structures. 2305 * undo structures are not freed when semaphore arrays are destroyed 2306 * so some of them may be out of date. 2307 * IMPLEMENTATION NOTE: There is some confusion over whether the 2308 * set of adjustments that needs to be done should be done in an atomic 2309 * manner or not. That is, if we are attempting to decrement the semval 2310 * should we queue up and wait until we can do so legally? 2311 * The original implementation attempted to do this (queue and wait). 2312 * The current implementation does not do so. The POSIX standard 2313 * and SVID should be consulted to determine what behavior is mandated. 2314 */ 2315 void exit_sem(struct task_struct *tsk) 2316 { 2317 struct sem_undo_list *ulp; 2318 2319 ulp = tsk->sysvsem.undo_list; 2320 if (!ulp) 2321 return; 2322 tsk->sysvsem.undo_list = NULL; 2323 2324 if (!refcount_dec_and_test(&ulp->refcnt)) 2325 return; 2326 2327 for (;;) { 2328 struct sem_array *sma; 2329 struct sem_undo *un; 2330 int semid, i; 2331 DEFINE_WAKE_Q(wake_q); 2332 2333 cond_resched(); 2334 2335 rcu_read_lock(); 2336 un = list_entry_rcu(ulp->list_proc.next, 2337 struct sem_undo, list_proc); 2338 if (&un->list_proc == &ulp->list_proc) { 2339 /* 2340 * We must wait for freeary() before freeing this ulp, 2341 * in case we raced with last sem_undo. There is a small 2342 * possibility where we exit while freeary() didn't 2343 * finish unlocking sem_undo_list. 2344 */ 2345 spin_lock(&ulp->lock); 2346 spin_unlock(&ulp->lock); 2347 rcu_read_unlock(); 2348 break; 2349 } 2350 spin_lock(&ulp->lock); 2351 semid = un->semid; 2352 spin_unlock(&ulp->lock); 2353 2354 /* exit_sem raced with IPC_RMID, nothing to do */ 2355 if (semid == -1) { 2356 rcu_read_unlock(); 2357 continue; 2358 } 2359 2360 sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, semid); 2361 /* exit_sem raced with IPC_RMID, nothing to do */ 2362 if (IS_ERR(sma)) { 2363 rcu_read_unlock(); 2364 continue; 2365 } 2366 2367 sem_lock(sma, NULL, -1); 2368 /* exit_sem raced with IPC_RMID, nothing to do */ 2369 if (!ipc_valid_object(&sma->sem_perm)) { 2370 sem_unlock(sma, -1); 2371 rcu_read_unlock(); 2372 continue; 2373 } 2374 un = __lookup_undo(ulp, semid); 2375 if (un == NULL) { 2376 /* exit_sem raced with IPC_RMID+semget() that created 2377 * exactly the same semid. Nothing to do. 2378 */ 2379 sem_unlock(sma, -1); 2380 rcu_read_unlock(); 2381 continue; 2382 } 2383 2384 /* remove un from the linked lists */ 2385 ipc_assert_locked_object(&sma->sem_perm); 2386 list_del(&un->list_id); 2387 2388 spin_lock(&ulp->lock); 2389 list_del_rcu(&un->list_proc); 2390 spin_unlock(&ulp->lock); 2391 2392 /* perform adjustments registered in un */ 2393 for (i = 0; i < sma->sem_nsems; i++) { 2394 struct sem *semaphore = &sma->sems[i]; 2395 if (un->semadj[i]) { 2396 semaphore->semval += un->semadj[i]; 2397 /* 2398 * Range checks of the new semaphore value, 2399 * not defined by sus: 2400 * - Some unices ignore the undo entirely 2401 * (e.g. HP UX 11i 11.22, Tru64 V5.1) 2402 * - some cap the value (e.g. FreeBSD caps 2403 * at 0, but doesn't enforce SEMVMX) 2404 * 2405 * Linux caps the semaphore value, both at 0 2406 * and at SEMVMX. 2407 * 2408 * Manfred <manfred@colorfullife.com> 2409 */ 2410 if (semaphore->semval < 0) 2411 semaphore->semval = 0; 2412 if (semaphore->semval > SEMVMX) 2413 semaphore->semval = SEMVMX; 2414 ipc_update_pid(&semaphore->sempid, task_tgid(current)); 2415 } 2416 } 2417 /* maybe some queued-up processes were waiting for this */ 2418 do_smart_update(sma, NULL, 0, 1, &wake_q); 2419 sem_unlock(sma, -1); 2420 rcu_read_unlock(); 2421 wake_up_q(&wake_q); 2422 2423 kfree_rcu(un, rcu); 2424 } 2425 kfree(ulp); 2426 } 2427 2428 #ifdef CONFIG_PROC_FS 2429 static int sysvipc_sem_proc_show(struct seq_file *s, void *it) 2430 { 2431 struct user_namespace *user_ns = seq_user_ns(s); 2432 struct kern_ipc_perm *ipcp = it; 2433 struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); 2434 time64_t sem_otime; 2435 2436 /* 2437 * The proc interface isn't aware of sem_lock(), it calls 2438 * ipc_lock_object() directly (in sysvipc_find_ipc). 2439 * In order to stay compatible with sem_lock(), we must 2440 * enter / leave complex_mode. 2441 */ 2442 complexmode_enter(sma); 2443 2444 sem_otime = get_semotime(sma); 2445 2446 seq_printf(s, 2447 "%10d %10d %4o %10u %5u %5u %5u %5u %10llu %10llu\n", 2448 sma->sem_perm.key, 2449 sma->sem_perm.id, 2450 sma->sem_perm.mode, 2451 sma->sem_nsems, 2452 from_kuid_munged(user_ns, sma->sem_perm.uid), 2453 from_kgid_munged(user_ns, sma->sem_perm.gid), 2454 from_kuid_munged(user_ns, sma->sem_perm.cuid), 2455 from_kgid_munged(user_ns, sma->sem_perm.cgid), 2456 sem_otime, 2457 sma->sem_ctime); 2458 2459 complexmode_tryleave(sma); 2460 2461 return 0; 2462 } 2463 #endif 2464