1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/ipc/sem.c 4 * Copyright (C) 1992 Krishna Balasubramanian 5 * Copyright (C) 1995 Eric Schenk, Bruno Haible 6 * 7 * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com> 8 * 9 * SMP-threaded, sysctl's added 10 * (c) 1999 Manfred Spraul <manfred@colorfullife.com> 11 * Enforced range limit on SEM_UNDO 12 * (c) 2001 Red Hat Inc 13 * Lockless wakeup 14 * (c) 2003 Manfred Spraul <manfred@colorfullife.com> 15 * (c) 2016 Davidlohr Bueso <dave@stgolabs.net> 16 * Further wakeup optimizations, documentation 17 * (c) 2010 Manfred Spraul <manfred@colorfullife.com> 18 * 19 * support for audit of ipc object properties and permission changes 20 * Dustin Kirkland <dustin.kirkland@us.ibm.com> 21 * 22 * namespaces support 23 * OpenVZ, SWsoft Inc. 24 * Pavel Emelianov <xemul@openvz.org> 25 * 26 * Implementation notes: (May 2010) 27 * This file implements System V semaphores. 28 * 29 * User space visible behavior: 30 * - FIFO ordering for semop() operations (just FIFO, not starvation 31 * protection) 32 * - multiple semaphore operations that alter the same semaphore in 33 * one semop() are handled. 34 * - sem_ctime (time of last semctl()) is updated in the IPC_SET, SETVAL and 35 * SETALL calls. 36 * - two Linux specific semctl() commands: SEM_STAT, SEM_INFO. 37 * - undo adjustments at process exit are limited to 0..SEMVMX. 38 * - namespace are supported. 39 * - SEMMSL, SEMMNS, SEMOPM and SEMMNI can be configured at runtime by writing 40 * to /proc/sys/kernel/sem. 41 * - statistics about the usage are reported in /proc/sysvipc/sem. 42 * 43 * Internals: 44 * - scalability: 45 * - all global variables are read-mostly. 46 * - semop() calls and semctl(RMID) are synchronized by RCU. 47 * - most operations do write operations (actually: spin_lock calls) to 48 * the per-semaphore array structure. 49 * Thus: Perfect SMP scaling between independent semaphore arrays. 50 * If multiple semaphores in one array are used, then cache line 51 * trashing on the semaphore array spinlock will limit the scaling. 52 * - semncnt and semzcnt are calculated on demand in count_semcnt() 53 * - the task that performs a successful semop() scans the list of all 54 * sleeping tasks and completes any pending operations that can be fulfilled. 55 * Semaphores are actively given to waiting tasks (necessary for FIFO). 56 * (see update_queue()) 57 * - To improve the scalability, the actual wake-up calls are performed after 58 * dropping all locks. (see wake_up_sem_queue_prepare()) 59 * - All work is done by the waker, the woken up task does not have to do 60 * anything - not even acquiring a lock or dropping a refcount. 61 * - A woken up task may not even touch the semaphore array anymore, it may 62 * have been destroyed already by a semctl(RMID). 63 * - UNDO values are stored in an array (one per process and per 64 * semaphore array, lazily allocated). For backwards compatibility, multiple 65 * modes for the UNDO variables are supported (per process, per thread) 66 * (see copy_semundo, CLONE_SYSVSEM) 67 * - There are two lists of the pending operations: a per-array list 68 * and per-semaphore list (stored in the array). This allows to achieve FIFO 69 * ordering without always scanning all pending operations. 70 * The worst-case behavior is nevertheless O(N^2) for N wakeups. 71 */ 72 73 #include <linux/compat.h> 74 #include <linux/slab.h> 75 #include <linux/spinlock.h> 76 #include <linux/init.h> 77 #include <linux/proc_fs.h> 78 #include <linux/time.h> 79 #include <linux/security.h> 80 #include <linux/syscalls.h> 81 #include <linux/audit.h> 82 #include <linux/capability.h> 83 #include <linux/seq_file.h> 84 #include <linux/rwsem.h> 85 #include <linux/nsproxy.h> 86 #include <linux/ipc_namespace.h> 87 #include <linux/sched/wake_q.h> 88 #include <linux/nospec.h> 89 #include <linux/rhashtable.h> 90 91 #include <linux/uaccess.h> 92 #include "util.h" 93 94 /* One semaphore structure for each semaphore in the system. */ 95 struct sem { 96 int semval; /* current value */ 97 /* 98 * PID of the process that last modified the semaphore. For 99 * Linux, specifically these are: 100 * - semop 101 * - semctl, via SETVAL and SETALL. 102 * - at task exit when performing undo adjustments (see exit_sem). 103 */ 104 struct pid *sempid; 105 spinlock_t lock; /* spinlock for fine-grained semtimedop */ 106 struct list_head pending_alter; /* pending single-sop operations */ 107 /* that alter the semaphore */ 108 struct list_head pending_const; /* pending single-sop operations */ 109 /* that do not alter the semaphore*/ 110 time64_t sem_otime; /* candidate for sem_otime */ 111 } ____cacheline_aligned_in_smp; 112 113 /* One sem_array data structure for each set of semaphores in the system. */ 114 struct sem_array { 115 struct kern_ipc_perm sem_perm; /* permissions .. see ipc.h */ 116 time64_t sem_ctime; /* create/last semctl() time */ 117 struct list_head pending_alter; /* pending operations */ 118 /* that alter the array */ 119 struct list_head pending_const; /* pending complex operations */ 120 /* that do not alter semvals */ 121 struct list_head list_id; /* undo requests on this array */ 122 int sem_nsems; /* no. of semaphores in array */ 123 int complex_count; /* pending complex operations */ 124 unsigned int use_global_lock;/* >0: global lock required */ 125 126 struct sem sems[]; 127 } __randomize_layout; 128 129 /* One queue for each sleeping process in the system. */ 130 struct sem_queue { 131 struct list_head list; /* queue of pending operations */ 132 struct task_struct *sleeper; /* this process */ 133 struct sem_undo *undo; /* undo structure */ 134 struct pid *pid; /* process id of requesting process */ 135 int status; /* completion status of operation */ 136 struct sembuf *sops; /* array of pending operations */ 137 struct sembuf *blocking; /* the operation that blocked */ 138 int nsops; /* number of operations */ 139 bool alter; /* does *sops alter the array? */ 140 bool dupsop; /* sops on more than one sem_num */ 141 }; 142 143 /* Each task has a list of undo requests. They are executed automatically 144 * when the process exits. 145 */ 146 struct sem_undo { 147 struct list_head list_proc; /* per-process list: * 148 * all undos from one process 149 * rcu protected */ 150 struct rcu_head rcu; /* rcu struct for sem_undo */ 151 struct sem_undo_list *ulp; /* back ptr to sem_undo_list */ 152 struct list_head list_id; /* per semaphore array list: 153 * all undos for one array */ 154 int semid; /* semaphore set identifier */ 155 short *semadj; /* array of adjustments */ 156 /* one per semaphore */ 157 }; 158 159 /* sem_undo_list controls shared access to the list of sem_undo structures 160 * that may be shared among all a CLONE_SYSVSEM task group. 161 */ 162 struct sem_undo_list { 163 refcount_t refcnt; 164 spinlock_t lock; 165 struct list_head list_proc; 166 }; 167 168 169 #define sem_ids(ns) ((ns)->ids[IPC_SEM_IDS]) 170 171 static int newary(struct ipc_namespace *, struct ipc_params *); 172 static void freeary(struct ipc_namespace *, struct kern_ipc_perm *); 173 #ifdef CONFIG_PROC_FS 174 static int sysvipc_sem_proc_show(struct seq_file *s, void *it); 175 #endif 176 177 #define SEMMSL_FAST 256 /* 512 bytes on stack */ 178 #define SEMOPM_FAST 64 /* ~ 372 bytes on stack */ 179 180 /* 181 * Switching from the mode suitable for simple ops 182 * to the mode for complex ops is costly. Therefore: 183 * use some hysteresis 184 */ 185 #define USE_GLOBAL_LOCK_HYSTERESIS 10 186 187 /* 188 * Locking: 189 * a) global sem_lock() for read/write 190 * sem_undo.id_next, 191 * sem_array.complex_count, 192 * sem_array.pending{_alter,_const}, 193 * sem_array.sem_undo 194 * 195 * b) global or semaphore sem_lock() for read/write: 196 * sem_array.sems[i].pending_{const,alter}: 197 * 198 * c) special: 199 * sem_undo_list.list_proc: 200 * * undo_list->lock for write 201 * * rcu for read 202 * use_global_lock: 203 * * global sem_lock() for write 204 * * either local or global sem_lock() for read. 205 * 206 * Memory ordering: 207 * Most ordering is enforced by using spin_lock() and spin_unlock(). 208 * 209 * Exceptions: 210 * 1) use_global_lock: (SEM_BARRIER_1) 211 * Setting it from non-zero to 0 is a RELEASE, this is ensured by 212 * using smp_store_release(): Immediately after setting it to 0, 213 * a simple op can start. 214 * Testing if it is non-zero is an ACQUIRE, this is ensured by using 215 * smp_load_acquire(). 216 * Setting it from 0 to non-zero must be ordered with regards to 217 * this smp_load_acquire(), this is guaranteed because the smp_load_acquire() 218 * is inside a spin_lock() and after a write from 0 to non-zero a 219 * spin_lock()+spin_unlock() is done. 220 * To prevent the compiler/cpu temporarily writing 0 to use_global_lock, 221 * READ_ONCE()/WRITE_ONCE() is used. 222 * 223 * 2) queue.status: (SEM_BARRIER_2) 224 * Initialization is done while holding sem_lock(), so no further barrier is 225 * required. 226 * Setting it to a result code is a RELEASE, this is ensured by both a 227 * smp_store_release() (for case a) and while holding sem_lock() 228 * (for case b). 229 * The ACQUIRE when reading the result code without holding sem_lock() is 230 * achieved by using READ_ONCE() + smp_acquire__after_ctrl_dep(). 231 * (case a above). 232 * Reading the result code while holding sem_lock() needs no further barriers, 233 * the locks inside sem_lock() enforce ordering (case b above) 234 * 235 * 3) current->state: 236 * current->state is set to TASK_INTERRUPTIBLE while holding sem_lock(). 237 * The wakeup is handled using the wake_q infrastructure. wake_q wakeups may 238 * happen immediately after calling wake_q_add. As wake_q_add_safe() is called 239 * when holding sem_lock(), no further barriers are required. 240 * 241 * See also ipc/mqueue.c for more details on the covered races. 242 */ 243 244 #define sc_semmsl sem_ctls[0] 245 #define sc_semmns sem_ctls[1] 246 #define sc_semopm sem_ctls[2] 247 #define sc_semmni sem_ctls[3] 248 249 void sem_init_ns(struct ipc_namespace *ns) 250 { 251 ns->sc_semmsl = SEMMSL; 252 ns->sc_semmns = SEMMNS; 253 ns->sc_semopm = SEMOPM; 254 ns->sc_semmni = SEMMNI; 255 ns->used_sems = 0; 256 ipc_init_ids(&ns->ids[IPC_SEM_IDS]); 257 } 258 259 #ifdef CONFIG_IPC_NS 260 void sem_exit_ns(struct ipc_namespace *ns) 261 { 262 free_ipcs(ns, &sem_ids(ns), freeary); 263 idr_destroy(&ns->ids[IPC_SEM_IDS].ipcs_idr); 264 rhashtable_destroy(&ns->ids[IPC_SEM_IDS].key_ht); 265 } 266 #endif 267 268 void __init sem_init(void) 269 { 270 sem_init_ns(&init_ipc_ns); 271 ipc_init_proc_interface("sysvipc/sem", 272 " key semid perms nsems uid gid cuid cgid otime ctime\n", 273 IPC_SEM_IDS, sysvipc_sem_proc_show); 274 } 275 276 /** 277 * unmerge_queues - unmerge queues, if possible. 278 * @sma: semaphore array 279 * 280 * The function unmerges the wait queues if complex_count is 0. 281 * It must be called prior to dropping the global semaphore array lock. 282 */ 283 static void unmerge_queues(struct sem_array *sma) 284 { 285 struct sem_queue *q, *tq; 286 287 /* complex operations still around? */ 288 if (sma->complex_count) 289 return; 290 /* 291 * We will switch back to simple mode. 292 * Move all pending operation back into the per-semaphore 293 * queues. 294 */ 295 list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { 296 struct sem *curr; 297 curr = &sma->sems[q->sops[0].sem_num]; 298 299 list_add_tail(&q->list, &curr->pending_alter); 300 } 301 INIT_LIST_HEAD(&sma->pending_alter); 302 } 303 304 /** 305 * merge_queues - merge single semop queues into global queue 306 * @sma: semaphore array 307 * 308 * This function merges all per-semaphore queues into the global queue. 309 * It is necessary to achieve FIFO ordering for the pending single-sop 310 * operations when a multi-semop operation must sleep. 311 * Only the alter operations must be moved, the const operations can stay. 312 */ 313 static void merge_queues(struct sem_array *sma) 314 { 315 int i; 316 for (i = 0; i < sma->sem_nsems; i++) { 317 struct sem *sem = &sma->sems[i]; 318 319 list_splice_init(&sem->pending_alter, &sma->pending_alter); 320 } 321 } 322 323 static void sem_rcu_free(struct rcu_head *head) 324 { 325 struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu); 326 struct sem_array *sma = container_of(p, struct sem_array, sem_perm); 327 328 security_sem_free(&sma->sem_perm); 329 kvfree(sma); 330 } 331 332 /* 333 * Enter the mode suitable for non-simple operations: 334 * Caller must own sem_perm.lock. 335 */ 336 static void complexmode_enter(struct sem_array *sma) 337 { 338 int i; 339 struct sem *sem; 340 341 if (sma->use_global_lock > 0) { 342 /* 343 * We are already in global lock mode. 344 * Nothing to do, just reset the 345 * counter until we return to simple mode. 346 */ 347 WRITE_ONCE(sma->use_global_lock, USE_GLOBAL_LOCK_HYSTERESIS); 348 return; 349 } 350 WRITE_ONCE(sma->use_global_lock, USE_GLOBAL_LOCK_HYSTERESIS); 351 352 for (i = 0; i < sma->sem_nsems; i++) { 353 sem = &sma->sems[i]; 354 spin_lock(&sem->lock); 355 spin_unlock(&sem->lock); 356 } 357 } 358 359 /* 360 * Try to leave the mode that disallows simple operations: 361 * Caller must own sem_perm.lock. 362 */ 363 static void complexmode_tryleave(struct sem_array *sma) 364 { 365 if (sma->complex_count) { 366 /* Complex ops are sleeping. 367 * We must stay in complex mode 368 */ 369 return; 370 } 371 if (sma->use_global_lock == 1) { 372 373 /* See SEM_BARRIER_1 for purpose/pairing */ 374 smp_store_release(&sma->use_global_lock, 0); 375 } else { 376 WRITE_ONCE(sma->use_global_lock, 377 sma->use_global_lock-1); 378 } 379 } 380 381 #define SEM_GLOBAL_LOCK (-1) 382 /* 383 * If the request contains only one semaphore operation, and there are 384 * no complex transactions pending, lock only the semaphore involved. 385 * Otherwise, lock the entire semaphore array, since we either have 386 * multiple semaphores in our own semops, or we need to look at 387 * semaphores from other pending complex operations. 388 */ 389 static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, 390 int nsops) 391 { 392 struct sem *sem; 393 int idx; 394 395 if (nsops != 1) { 396 /* Complex operation - acquire a full lock */ 397 ipc_lock_object(&sma->sem_perm); 398 399 /* Prevent parallel simple ops */ 400 complexmode_enter(sma); 401 return SEM_GLOBAL_LOCK; 402 } 403 404 /* 405 * Only one semaphore affected - try to optimize locking. 406 * Optimized locking is possible if no complex operation 407 * is either enqueued or processed right now. 408 * 409 * Both facts are tracked by use_global_mode. 410 */ 411 idx = array_index_nospec(sops->sem_num, sma->sem_nsems); 412 sem = &sma->sems[idx]; 413 414 /* 415 * Initial check for use_global_lock. Just an optimization, 416 * no locking, no memory barrier. 417 */ 418 if (!READ_ONCE(sma->use_global_lock)) { 419 /* 420 * It appears that no complex operation is around. 421 * Acquire the per-semaphore lock. 422 */ 423 spin_lock(&sem->lock); 424 425 /* see SEM_BARRIER_1 for purpose/pairing */ 426 if (!smp_load_acquire(&sma->use_global_lock)) { 427 /* fast path successful! */ 428 return sops->sem_num; 429 } 430 spin_unlock(&sem->lock); 431 } 432 433 /* slow path: acquire the full lock */ 434 ipc_lock_object(&sma->sem_perm); 435 436 if (sma->use_global_lock == 0) { 437 /* 438 * The use_global_lock mode ended while we waited for 439 * sma->sem_perm.lock. Thus we must switch to locking 440 * with sem->lock. 441 * Unlike in the fast path, there is no need to recheck 442 * sma->use_global_lock after we have acquired sem->lock: 443 * We own sma->sem_perm.lock, thus use_global_lock cannot 444 * change. 445 */ 446 spin_lock(&sem->lock); 447 448 ipc_unlock_object(&sma->sem_perm); 449 return sops->sem_num; 450 } else { 451 /* 452 * Not a false alarm, thus continue to use the global lock 453 * mode. No need for complexmode_enter(), this was done by 454 * the caller that has set use_global_mode to non-zero. 455 */ 456 return SEM_GLOBAL_LOCK; 457 } 458 } 459 460 static inline void sem_unlock(struct sem_array *sma, int locknum) 461 { 462 if (locknum == SEM_GLOBAL_LOCK) { 463 unmerge_queues(sma); 464 complexmode_tryleave(sma); 465 ipc_unlock_object(&sma->sem_perm); 466 } else { 467 struct sem *sem = &sma->sems[locknum]; 468 spin_unlock(&sem->lock); 469 } 470 } 471 472 /* 473 * sem_lock_(check_) routines are called in the paths where the rwsem 474 * is not held. 475 * 476 * The caller holds the RCU read lock. 477 */ 478 static inline struct sem_array *sem_obtain_object(struct ipc_namespace *ns, int id) 479 { 480 struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&sem_ids(ns), id); 481 482 if (IS_ERR(ipcp)) 483 return ERR_CAST(ipcp); 484 485 return container_of(ipcp, struct sem_array, sem_perm); 486 } 487 488 static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns, 489 int id) 490 { 491 struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&sem_ids(ns), id); 492 493 if (IS_ERR(ipcp)) 494 return ERR_CAST(ipcp); 495 496 return container_of(ipcp, struct sem_array, sem_perm); 497 } 498 499 static inline void sem_lock_and_putref(struct sem_array *sma) 500 { 501 sem_lock(sma, NULL, -1); 502 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 503 } 504 505 static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) 506 { 507 ipc_rmid(&sem_ids(ns), &s->sem_perm); 508 } 509 510 static struct sem_array *sem_alloc(size_t nsems) 511 { 512 struct sem_array *sma; 513 514 if (nsems > (INT_MAX - sizeof(*sma)) / sizeof(sma->sems[0])) 515 return NULL; 516 517 sma = kvzalloc(struct_size(sma, sems, nsems), GFP_KERNEL_ACCOUNT); 518 if (unlikely(!sma)) 519 return NULL; 520 521 return sma; 522 } 523 524 /** 525 * newary - Create a new semaphore set 526 * @ns: namespace 527 * @params: ptr to the structure that contains key, semflg and nsems 528 * 529 * Called with sem_ids.rwsem held (as a writer) 530 */ 531 static int newary(struct ipc_namespace *ns, struct ipc_params *params) 532 { 533 int retval; 534 struct sem_array *sma; 535 key_t key = params->key; 536 int nsems = params->u.nsems; 537 int semflg = params->flg; 538 int i; 539 540 if (!nsems) 541 return -EINVAL; 542 if (ns->used_sems + nsems > ns->sc_semmns) 543 return -ENOSPC; 544 545 sma = sem_alloc(nsems); 546 if (!sma) 547 return -ENOMEM; 548 549 sma->sem_perm.mode = (semflg & S_IRWXUGO); 550 sma->sem_perm.key = key; 551 552 sma->sem_perm.security = NULL; 553 retval = security_sem_alloc(&sma->sem_perm); 554 if (retval) { 555 kvfree(sma); 556 return retval; 557 } 558 559 for (i = 0; i < nsems; i++) { 560 INIT_LIST_HEAD(&sma->sems[i].pending_alter); 561 INIT_LIST_HEAD(&sma->sems[i].pending_const); 562 spin_lock_init(&sma->sems[i].lock); 563 } 564 565 sma->complex_count = 0; 566 sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; 567 INIT_LIST_HEAD(&sma->pending_alter); 568 INIT_LIST_HEAD(&sma->pending_const); 569 INIT_LIST_HEAD(&sma->list_id); 570 sma->sem_nsems = nsems; 571 sma->sem_ctime = ktime_get_real_seconds(); 572 573 /* ipc_addid() locks sma upon success. */ 574 retval = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); 575 if (retval < 0) { 576 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 577 return retval; 578 } 579 ns->used_sems += nsems; 580 581 sem_unlock(sma, -1); 582 rcu_read_unlock(); 583 584 return sma->sem_perm.id; 585 } 586 587 588 /* 589 * Called with sem_ids.rwsem and ipcp locked. 590 */ 591 static int sem_more_checks(struct kern_ipc_perm *ipcp, struct ipc_params *params) 592 { 593 struct sem_array *sma; 594 595 sma = container_of(ipcp, struct sem_array, sem_perm); 596 if (params->u.nsems > sma->sem_nsems) 597 return -EINVAL; 598 599 return 0; 600 } 601 602 long ksys_semget(key_t key, int nsems, int semflg) 603 { 604 struct ipc_namespace *ns; 605 static const struct ipc_ops sem_ops = { 606 .getnew = newary, 607 .associate = security_sem_associate, 608 .more_checks = sem_more_checks, 609 }; 610 struct ipc_params sem_params; 611 612 ns = current->nsproxy->ipc_ns; 613 614 if (nsems < 0 || nsems > ns->sc_semmsl) 615 return -EINVAL; 616 617 sem_params.key = key; 618 sem_params.flg = semflg; 619 sem_params.u.nsems = nsems; 620 621 return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params); 622 } 623 624 SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg) 625 { 626 return ksys_semget(key, nsems, semflg); 627 } 628 629 /** 630 * perform_atomic_semop[_slow] - Attempt to perform semaphore 631 * operations on a given array. 632 * @sma: semaphore array 633 * @q: struct sem_queue that describes the operation 634 * 635 * Caller blocking are as follows, based the value 636 * indicated by the semaphore operation (sem_op): 637 * 638 * (1) >0 never blocks. 639 * (2) 0 (wait-for-zero operation): semval is non-zero. 640 * (3) <0 attempting to decrement semval to a value smaller than zero. 641 * 642 * Returns 0 if the operation was possible. 643 * Returns 1 if the operation is impossible, the caller must sleep. 644 * Returns <0 for error codes. 645 */ 646 static int perform_atomic_semop_slow(struct sem_array *sma, struct sem_queue *q) 647 { 648 int result, sem_op, nsops; 649 struct pid *pid; 650 struct sembuf *sop; 651 struct sem *curr; 652 struct sembuf *sops; 653 struct sem_undo *un; 654 655 sops = q->sops; 656 nsops = q->nsops; 657 un = q->undo; 658 659 for (sop = sops; sop < sops + nsops; sop++) { 660 int idx = array_index_nospec(sop->sem_num, sma->sem_nsems); 661 curr = &sma->sems[idx]; 662 sem_op = sop->sem_op; 663 result = curr->semval; 664 665 if (!sem_op && result) 666 goto would_block; 667 668 result += sem_op; 669 if (result < 0) 670 goto would_block; 671 if (result > SEMVMX) 672 goto out_of_range; 673 674 if (sop->sem_flg & SEM_UNDO) { 675 int undo = un->semadj[sop->sem_num] - sem_op; 676 /* Exceeding the undo range is an error. */ 677 if (undo < (-SEMAEM - 1) || undo > SEMAEM) 678 goto out_of_range; 679 un->semadj[sop->sem_num] = undo; 680 } 681 682 curr->semval = result; 683 } 684 685 sop--; 686 pid = q->pid; 687 while (sop >= sops) { 688 ipc_update_pid(&sma->sems[sop->sem_num].sempid, pid); 689 sop--; 690 } 691 692 return 0; 693 694 out_of_range: 695 result = -ERANGE; 696 goto undo; 697 698 would_block: 699 q->blocking = sop; 700 701 if (sop->sem_flg & IPC_NOWAIT) 702 result = -EAGAIN; 703 else 704 result = 1; 705 706 undo: 707 sop--; 708 while (sop >= sops) { 709 sem_op = sop->sem_op; 710 sma->sems[sop->sem_num].semval -= sem_op; 711 if (sop->sem_flg & SEM_UNDO) 712 un->semadj[sop->sem_num] += sem_op; 713 sop--; 714 } 715 716 return result; 717 } 718 719 static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q) 720 { 721 int result, sem_op, nsops; 722 struct sembuf *sop; 723 struct sem *curr; 724 struct sembuf *sops; 725 struct sem_undo *un; 726 727 sops = q->sops; 728 nsops = q->nsops; 729 un = q->undo; 730 731 if (unlikely(q->dupsop)) 732 return perform_atomic_semop_slow(sma, q); 733 734 /* 735 * We scan the semaphore set twice, first to ensure that the entire 736 * operation can succeed, therefore avoiding any pointless writes 737 * to shared memory and having to undo such changes in order to block 738 * until the operations can go through. 739 */ 740 for (sop = sops; sop < sops + nsops; sop++) { 741 int idx = array_index_nospec(sop->sem_num, sma->sem_nsems); 742 743 curr = &sma->sems[idx]; 744 sem_op = sop->sem_op; 745 result = curr->semval; 746 747 if (!sem_op && result) 748 goto would_block; /* wait-for-zero */ 749 750 result += sem_op; 751 if (result < 0) 752 goto would_block; 753 754 if (result > SEMVMX) 755 return -ERANGE; 756 757 if (sop->sem_flg & SEM_UNDO) { 758 int undo = un->semadj[sop->sem_num] - sem_op; 759 760 /* Exceeding the undo range is an error. */ 761 if (undo < (-SEMAEM - 1) || undo > SEMAEM) 762 return -ERANGE; 763 } 764 } 765 766 for (sop = sops; sop < sops + nsops; sop++) { 767 curr = &sma->sems[sop->sem_num]; 768 sem_op = sop->sem_op; 769 770 if (sop->sem_flg & SEM_UNDO) { 771 int undo = un->semadj[sop->sem_num] - sem_op; 772 773 un->semadj[sop->sem_num] = undo; 774 } 775 curr->semval += sem_op; 776 ipc_update_pid(&curr->sempid, q->pid); 777 } 778 779 return 0; 780 781 would_block: 782 q->blocking = sop; 783 return sop->sem_flg & IPC_NOWAIT ? -EAGAIN : 1; 784 } 785 786 static inline void wake_up_sem_queue_prepare(struct sem_queue *q, int error, 787 struct wake_q_head *wake_q) 788 { 789 struct task_struct *sleeper; 790 791 sleeper = get_task_struct(q->sleeper); 792 793 /* see SEM_BARRIER_2 for purpose/pairing */ 794 smp_store_release(&q->status, error); 795 796 wake_q_add_safe(wake_q, sleeper); 797 } 798 799 static void unlink_queue(struct sem_array *sma, struct sem_queue *q) 800 { 801 list_del(&q->list); 802 if (q->nsops > 1) 803 sma->complex_count--; 804 } 805 806 /** check_restart(sma, q) 807 * @sma: semaphore array 808 * @q: the operation that just completed 809 * 810 * update_queue is O(N^2) when it restarts scanning the whole queue of 811 * waiting operations. Therefore this function checks if the restart is 812 * really necessary. It is called after a previously waiting operation 813 * modified the array. 814 * Note that wait-for-zero operations are handled without restart. 815 */ 816 static inline int check_restart(struct sem_array *sma, struct sem_queue *q) 817 { 818 /* pending complex alter operations are too difficult to analyse */ 819 if (!list_empty(&sma->pending_alter)) 820 return 1; 821 822 /* we were a sleeping complex operation. Too difficult */ 823 if (q->nsops > 1) 824 return 1; 825 826 /* It is impossible that someone waits for the new value: 827 * - complex operations always restart. 828 * - wait-for-zero are handled separately. 829 * - q is a previously sleeping simple operation that 830 * altered the array. It must be a decrement, because 831 * simple increments never sleep. 832 * - If there are older (higher priority) decrements 833 * in the queue, then they have observed the original 834 * semval value and couldn't proceed. The operation 835 * decremented to value - thus they won't proceed either. 836 */ 837 return 0; 838 } 839 840 /** 841 * wake_const_ops - wake up non-alter tasks 842 * @sma: semaphore array. 843 * @semnum: semaphore that was modified. 844 * @wake_q: lockless wake-queue head. 845 * 846 * wake_const_ops must be called after a semaphore in a semaphore array 847 * was set to 0. If complex const operations are pending, wake_const_ops must 848 * be called with semnum = -1, as well as with the number of each modified 849 * semaphore. 850 * The tasks that must be woken up are added to @wake_q. The return code 851 * is stored in q->pid. 852 * The function returns 1 if at least one operation was completed successfully. 853 */ 854 static int wake_const_ops(struct sem_array *sma, int semnum, 855 struct wake_q_head *wake_q) 856 { 857 struct sem_queue *q, *tmp; 858 struct list_head *pending_list; 859 int semop_completed = 0; 860 861 if (semnum == -1) 862 pending_list = &sma->pending_const; 863 else 864 pending_list = &sma->sems[semnum].pending_const; 865 866 list_for_each_entry_safe(q, tmp, pending_list, list) { 867 int error = perform_atomic_semop(sma, q); 868 869 if (error > 0) 870 continue; 871 /* operation completed, remove from queue & wakeup */ 872 unlink_queue(sma, q); 873 874 wake_up_sem_queue_prepare(q, error, wake_q); 875 if (error == 0) 876 semop_completed = 1; 877 } 878 879 return semop_completed; 880 } 881 882 /** 883 * do_smart_wakeup_zero - wakeup all wait for zero tasks 884 * @sma: semaphore array 885 * @sops: operations that were performed 886 * @nsops: number of operations 887 * @wake_q: lockless wake-queue head 888 * 889 * Checks all required queue for wait-for-zero operations, based 890 * on the actual changes that were performed on the semaphore array. 891 * The function returns 1 if at least one operation was completed successfully. 892 */ 893 static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops, 894 int nsops, struct wake_q_head *wake_q) 895 { 896 int i; 897 int semop_completed = 0; 898 int got_zero = 0; 899 900 /* first: the per-semaphore queues, if known */ 901 if (sops) { 902 for (i = 0; i < nsops; i++) { 903 int num = sops[i].sem_num; 904 905 if (sma->sems[num].semval == 0) { 906 got_zero = 1; 907 semop_completed |= wake_const_ops(sma, num, wake_q); 908 } 909 } 910 } else { 911 /* 912 * No sops means modified semaphores not known. 913 * Assume all were changed. 914 */ 915 for (i = 0; i < sma->sem_nsems; i++) { 916 if (sma->sems[i].semval == 0) { 917 got_zero = 1; 918 semop_completed |= wake_const_ops(sma, i, wake_q); 919 } 920 } 921 } 922 /* 923 * If one of the modified semaphores got 0, 924 * then check the global queue, too. 925 */ 926 if (got_zero) 927 semop_completed |= wake_const_ops(sma, -1, wake_q); 928 929 return semop_completed; 930 } 931 932 933 /** 934 * update_queue - look for tasks that can be completed. 935 * @sma: semaphore array. 936 * @semnum: semaphore that was modified. 937 * @wake_q: lockless wake-queue head. 938 * 939 * update_queue must be called after a semaphore in a semaphore array 940 * was modified. If multiple semaphores were modified, update_queue must 941 * be called with semnum = -1, as well as with the number of each modified 942 * semaphore. 943 * The tasks that must be woken up are added to @wake_q. The return code 944 * is stored in q->pid. 945 * The function internally checks if const operations can now succeed. 946 * 947 * The function return 1 if at least one semop was completed successfully. 948 */ 949 static int update_queue(struct sem_array *sma, int semnum, struct wake_q_head *wake_q) 950 { 951 struct sem_queue *q, *tmp; 952 struct list_head *pending_list; 953 int semop_completed = 0; 954 955 if (semnum == -1) 956 pending_list = &sma->pending_alter; 957 else 958 pending_list = &sma->sems[semnum].pending_alter; 959 960 again: 961 list_for_each_entry_safe(q, tmp, pending_list, list) { 962 int error, restart; 963 964 /* If we are scanning the single sop, per-semaphore list of 965 * one semaphore and that semaphore is 0, then it is not 966 * necessary to scan further: simple increments 967 * that affect only one entry succeed immediately and cannot 968 * be in the per semaphore pending queue, and decrements 969 * cannot be successful if the value is already 0. 970 */ 971 if (semnum != -1 && sma->sems[semnum].semval == 0) 972 break; 973 974 error = perform_atomic_semop(sma, q); 975 976 /* Does q->sleeper still need to sleep? */ 977 if (error > 0) 978 continue; 979 980 unlink_queue(sma, q); 981 982 if (error) { 983 restart = 0; 984 } else { 985 semop_completed = 1; 986 do_smart_wakeup_zero(sma, q->sops, q->nsops, wake_q); 987 restart = check_restart(sma, q); 988 } 989 990 wake_up_sem_queue_prepare(q, error, wake_q); 991 if (restart) 992 goto again; 993 } 994 return semop_completed; 995 } 996 997 /** 998 * set_semotime - set sem_otime 999 * @sma: semaphore array 1000 * @sops: operations that modified the array, may be NULL 1001 * 1002 * sem_otime is replicated to avoid cache line trashing. 1003 * This function sets one instance to the current time. 1004 */ 1005 static void set_semotime(struct sem_array *sma, struct sembuf *sops) 1006 { 1007 if (sops == NULL) { 1008 sma->sems[0].sem_otime = ktime_get_real_seconds(); 1009 } else { 1010 sma->sems[sops[0].sem_num].sem_otime = 1011 ktime_get_real_seconds(); 1012 } 1013 } 1014 1015 /** 1016 * do_smart_update - optimized update_queue 1017 * @sma: semaphore array 1018 * @sops: operations that were performed 1019 * @nsops: number of operations 1020 * @otime: force setting otime 1021 * @wake_q: lockless wake-queue head 1022 * 1023 * do_smart_update() does the required calls to update_queue and wakeup_zero, 1024 * based on the actual changes that were performed on the semaphore array. 1025 * Note that the function does not do the actual wake-up: the caller is 1026 * responsible for calling wake_up_q(). 1027 * It is safe to perform this call after dropping all locks. 1028 */ 1029 static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsops, 1030 int otime, struct wake_q_head *wake_q) 1031 { 1032 int i; 1033 1034 otime |= do_smart_wakeup_zero(sma, sops, nsops, wake_q); 1035 1036 if (!list_empty(&sma->pending_alter)) { 1037 /* semaphore array uses the global queue - just process it. */ 1038 otime |= update_queue(sma, -1, wake_q); 1039 } else { 1040 if (!sops) { 1041 /* 1042 * No sops, thus the modified semaphores are not 1043 * known. Check all. 1044 */ 1045 for (i = 0; i < sma->sem_nsems; i++) 1046 otime |= update_queue(sma, i, wake_q); 1047 } else { 1048 /* 1049 * Check the semaphores that were increased: 1050 * - No complex ops, thus all sleeping ops are 1051 * decrease. 1052 * - if we decreased the value, then any sleeping 1053 * semaphore ops won't be able to run: If the 1054 * previous value was too small, then the new 1055 * value will be too small, too. 1056 */ 1057 for (i = 0; i < nsops; i++) { 1058 if (sops[i].sem_op > 0) { 1059 otime |= update_queue(sma, 1060 sops[i].sem_num, wake_q); 1061 } 1062 } 1063 } 1064 } 1065 if (otime) 1066 set_semotime(sma, sops); 1067 } 1068 1069 /* 1070 * check_qop: Test if a queued operation sleeps on the semaphore semnum 1071 */ 1072 static int check_qop(struct sem_array *sma, int semnum, struct sem_queue *q, 1073 bool count_zero) 1074 { 1075 struct sembuf *sop = q->blocking; 1076 1077 /* 1078 * Linux always (since 0.99.10) reported a task as sleeping on all 1079 * semaphores. This violates SUS, therefore it was changed to the 1080 * standard compliant behavior. 1081 * Give the administrators a chance to notice that an application 1082 * might misbehave because it relies on the Linux behavior. 1083 */ 1084 pr_info_once("semctl(GETNCNT/GETZCNT) is since 3.16 Single Unix Specification compliant.\n" 1085 "The task %s (%d) triggered the difference, watch for misbehavior.\n", 1086 current->comm, task_pid_nr(current)); 1087 1088 if (sop->sem_num != semnum) 1089 return 0; 1090 1091 if (count_zero && sop->sem_op == 0) 1092 return 1; 1093 if (!count_zero && sop->sem_op < 0) 1094 return 1; 1095 1096 return 0; 1097 } 1098 1099 /* The following counts are associated to each semaphore: 1100 * semncnt number of tasks waiting on semval being nonzero 1101 * semzcnt number of tasks waiting on semval being zero 1102 * 1103 * Per definition, a task waits only on the semaphore of the first semop 1104 * that cannot proceed, even if additional operation would block, too. 1105 */ 1106 static int count_semcnt(struct sem_array *sma, ushort semnum, 1107 bool count_zero) 1108 { 1109 struct list_head *l; 1110 struct sem_queue *q; 1111 int semcnt; 1112 1113 semcnt = 0; 1114 /* First: check the simple operations. They are easy to evaluate */ 1115 if (count_zero) 1116 l = &sma->sems[semnum].pending_const; 1117 else 1118 l = &sma->sems[semnum].pending_alter; 1119 1120 list_for_each_entry(q, l, list) { 1121 /* all task on a per-semaphore list sleep on exactly 1122 * that semaphore 1123 */ 1124 semcnt++; 1125 } 1126 1127 /* Then: check the complex operations. */ 1128 list_for_each_entry(q, &sma->pending_alter, list) { 1129 semcnt += check_qop(sma, semnum, q, count_zero); 1130 } 1131 if (count_zero) { 1132 list_for_each_entry(q, &sma->pending_const, list) { 1133 semcnt += check_qop(sma, semnum, q, count_zero); 1134 } 1135 } 1136 return semcnt; 1137 } 1138 1139 /* Free a semaphore set. freeary() is called with sem_ids.rwsem locked 1140 * as a writer and the spinlock for this semaphore set hold. sem_ids.rwsem 1141 * remains locked on exit. 1142 */ 1143 static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 1144 { 1145 struct sem_undo *un, *tu; 1146 struct sem_queue *q, *tq; 1147 struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); 1148 int i; 1149 DEFINE_WAKE_Q(wake_q); 1150 1151 /* Free the existing undo structures for this semaphore set. */ 1152 ipc_assert_locked_object(&sma->sem_perm); 1153 list_for_each_entry_safe(un, tu, &sma->list_id, list_id) { 1154 list_del(&un->list_id); 1155 spin_lock(&un->ulp->lock); 1156 un->semid = -1; 1157 list_del_rcu(&un->list_proc); 1158 spin_unlock(&un->ulp->lock); 1159 kvfree_rcu(un, rcu); 1160 } 1161 1162 /* Wake up all pending processes and let them fail with EIDRM. */ 1163 list_for_each_entry_safe(q, tq, &sma->pending_const, list) { 1164 unlink_queue(sma, q); 1165 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1166 } 1167 1168 list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { 1169 unlink_queue(sma, q); 1170 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1171 } 1172 for (i = 0; i < sma->sem_nsems; i++) { 1173 struct sem *sem = &sma->sems[i]; 1174 list_for_each_entry_safe(q, tq, &sem->pending_const, list) { 1175 unlink_queue(sma, q); 1176 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1177 } 1178 list_for_each_entry_safe(q, tq, &sem->pending_alter, list) { 1179 unlink_queue(sma, q); 1180 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1181 } 1182 ipc_update_pid(&sem->sempid, NULL); 1183 } 1184 1185 /* Remove the semaphore set from the IDR */ 1186 sem_rmid(ns, sma); 1187 sem_unlock(sma, -1); 1188 rcu_read_unlock(); 1189 1190 wake_up_q(&wake_q); 1191 ns->used_sems -= sma->sem_nsems; 1192 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1193 } 1194 1195 static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) 1196 { 1197 switch (version) { 1198 case IPC_64: 1199 return copy_to_user(buf, in, sizeof(*in)); 1200 case IPC_OLD: 1201 { 1202 struct semid_ds out; 1203 1204 memset(&out, 0, sizeof(out)); 1205 1206 ipc64_perm_to_ipc_perm(&in->sem_perm, &out.sem_perm); 1207 1208 out.sem_otime = in->sem_otime; 1209 out.sem_ctime = in->sem_ctime; 1210 out.sem_nsems = in->sem_nsems; 1211 1212 return copy_to_user(buf, &out, sizeof(out)); 1213 } 1214 default: 1215 return -EINVAL; 1216 } 1217 } 1218 1219 static time64_t get_semotime(struct sem_array *sma) 1220 { 1221 int i; 1222 time64_t res; 1223 1224 res = sma->sems[0].sem_otime; 1225 for (i = 1; i < sma->sem_nsems; i++) { 1226 time64_t to = sma->sems[i].sem_otime; 1227 1228 if (to > res) 1229 res = to; 1230 } 1231 return res; 1232 } 1233 1234 static int semctl_stat(struct ipc_namespace *ns, int semid, 1235 int cmd, struct semid64_ds *semid64) 1236 { 1237 struct sem_array *sma; 1238 time64_t semotime; 1239 int err; 1240 1241 memset(semid64, 0, sizeof(*semid64)); 1242 1243 rcu_read_lock(); 1244 if (cmd == SEM_STAT || cmd == SEM_STAT_ANY) { 1245 sma = sem_obtain_object(ns, semid); 1246 if (IS_ERR(sma)) { 1247 err = PTR_ERR(sma); 1248 goto out_unlock; 1249 } 1250 } else { /* IPC_STAT */ 1251 sma = sem_obtain_object_check(ns, semid); 1252 if (IS_ERR(sma)) { 1253 err = PTR_ERR(sma); 1254 goto out_unlock; 1255 } 1256 } 1257 1258 /* see comment for SHM_STAT_ANY */ 1259 if (cmd == SEM_STAT_ANY) 1260 audit_ipc_obj(&sma->sem_perm); 1261 else { 1262 err = -EACCES; 1263 if (ipcperms(ns, &sma->sem_perm, S_IRUGO)) 1264 goto out_unlock; 1265 } 1266 1267 err = security_sem_semctl(&sma->sem_perm, cmd); 1268 if (err) 1269 goto out_unlock; 1270 1271 ipc_lock_object(&sma->sem_perm); 1272 1273 if (!ipc_valid_object(&sma->sem_perm)) { 1274 ipc_unlock_object(&sma->sem_perm); 1275 err = -EIDRM; 1276 goto out_unlock; 1277 } 1278 1279 kernel_to_ipc64_perm(&sma->sem_perm, &semid64->sem_perm); 1280 semotime = get_semotime(sma); 1281 semid64->sem_otime = semotime; 1282 semid64->sem_ctime = sma->sem_ctime; 1283 #ifndef CONFIG_64BIT 1284 semid64->sem_otime_high = semotime >> 32; 1285 semid64->sem_ctime_high = sma->sem_ctime >> 32; 1286 #endif 1287 semid64->sem_nsems = sma->sem_nsems; 1288 1289 if (cmd == IPC_STAT) { 1290 /* 1291 * As defined in SUS: 1292 * Return 0 on success 1293 */ 1294 err = 0; 1295 } else { 1296 /* 1297 * SEM_STAT and SEM_STAT_ANY (both Linux specific) 1298 * Return the full id, including the sequence number 1299 */ 1300 err = sma->sem_perm.id; 1301 } 1302 ipc_unlock_object(&sma->sem_perm); 1303 out_unlock: 1304 rcu_read_unlock(); 1305 return err; 1306 } 1307 1308 static int semctl_info(struct ipc_namespace *ns, int semid, 1309 int cmd, void __user *p) 1310 { 1311 struct seminfo seminfo; 1312 int max_idx; 1313 int err; 1314 1315 err = security_sem_semctl(NULL, cmd); 1316 if (err) 1317 return err; 1318 1319 memset(&seminfo, 0, sizeof(seminfo)); 1320 seminfo.semmni = ns->sc_semmni; 1321 seminfo.semmns = ns->sc_semmns; 1322 seminfo.semmsl = ns->sc_semmsl; 1323 seminfo.semopm = ns->sc_semopm; 1324 seminfo.semvmx = SEMVMX; 1325 seminfo.semmnu = SEMMNU; 1326 seminfo.semmap = SEMMAP; 1327 seminfo.semume = SEMUME; 1328 down_read(&sem_ids(ns).rwsem); 1329 if (cmd == SEM_INFO) { 1330 seminfo.semusz = sem_ids(ns).in_use; 1331 seminfo.semaem = ns->used_sems; 1332 } else { 1333 seminfo.semusz = SEMUSZ; 1334 seminfo.semaem = SEMAEM; 1335 } 1336 max_idx = ipc_get_maxidx(&sem_ids(ns)); 1337 up_read(&sem_ids(ns).rwsem); 1338 if (copy_to_user(p, &seminfo, sizeof(struct seminfo))) 1339 return -EFAULT; 1340 return (max_idx < 0) ? 0 : max_idx; 1341 } 1342 1343 static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, 1344 int val) 1345 { 1346 struct sem_undo *un; 1347 struct sem_array *sma; 1348 struct sem *curr; 1349 int err; 1350 DEFINE_WAKE_Q(wake_q); 1351 1352 if (val > SEMVMX || val < 0) 1353 return -ERANGE; 1354 1355 rcu_read_lock(); 1356 sma = sem_obtain_object_check(ns, semid); 1357 if (IS_ERR(sma)) { 1358 rcu_read_unlock(); 1359 return PTR_ERR(sma); 1360 } 1361 1362 if (semnum < 0 || semnum >= sma->sem_nsems) { 1363 rcu_read_unlock(); 1364 return -EINVAL; 1365 } 1366 1367 1368 if (ipcperms(ns, &sma->sem_perm, S_IWUGO)) { 1369 rcu_read_unlock(); 1370 return -EACCES; 1371 } 1372 1373 err = security_sem_semctl(&sma->sem_perm, SETVAL); 1374 if (err) { 1375 rcu_read_unlock(); 1376 return -EACCES; 1377 } 1378 1379 sem_lock(sma, NULL, -1); 1380 1381 if (!ipc_valid_object(&sma->sem_perm)) { 1382 sem_unlock(sma, -1); 1383 rcu_read_unlock(); 1384 return -EIDRM; 1385 } 1386 1387 semnum = array_index_nospec(semnum, sma->sem_nsems); 1388 curr = &sma->sems[semnum]; 1389 1390 ipc_assert_locked_object(&sma->sem_perm); 1391 list_for_each_entry(un, &sma->list_id, list_id) 1392 un->semadj[semnum] = 0; 1393 1394 curr->semval = val; 1395 ipc_update_pid(&curr->sempid, task_tgid(current)); 1396 sma->sem_ctime = ktime_get_real_seconds(); 1397 /* maybe some queued-up processes were waiting for this */ 1398 do_smart_update(sma, NULL, 0, 0, &wake_q); 1399 sem_unlock(sma, -1); 1400 rcu_read_unlock(); 1401 wake_up_q(&wake_q); 1402 return 0; 1403 } 1404 1405 static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, 1406 int cmd, void __user *p) 1407 { 1408 struct sem_array *sma; 1409 struct sem *curr; 1410 int err, nsems; 1411 ushort fast_sem_io[SEMMSL_FAST]; 1412 ushort *sem_io = fast_sem_io; 1413 DEFINE_WAKE_Q(wake_q); 1414 1415 rcu_read_lock(); 1416 sma = sem_obtain_object_check(ns, semid); 1417 if (IS_ERR(sma)) { 1418 rcu_read_unlock(); 1419 return PTR_ERR(sma); 1420 } 1421 1422 nsems = sma->sem_nsems; 1423 1424 err = -EACCES; 1425 if (ipcperms(ns, &sma->sem_perm, cmd == SETALL ? S_IWUGO : S_IRUGO)) 1426 goto out_rcu_wakeup; 1427 1428 err = security_sem_semctl(&sma->sem_perm, cmd); 1429 if (err) 1430 goto out_rcu_wakeup; 1431 1432 switch (cmd) { 1433 case GETALL: 1434 { 1435 ushort __user *array = p; 1436 int i; 1437 1438 sem_lock(sma, NULL, -1); 1439 if (!ipc_valid_object(&sma->sem_perm)) { 1440 err = -EIDRM; 1441 goto out_unlock; 1442 } 1443 if (nsems > SEMMSL_FAST) { 1444 if (!ipc_rcu_getref(&sma->sem_perm)) { 1445 err = -EIDRM; 1446 goto out_unlock; 1447 } 1448 sem_unlock(sma, -1); 1449 rcu_read_unlock(); 1450 sem_io = kvmalloc_array(nsems, sizeof(ushort), 1451 GFP_KERNEL); 1452 if (sem_io == NULL) { 1453 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1454 return -ENOMEM; 1455 } 1456 1457 rcu_read_lock(); 1458 sem_lock_and_putref(sma); 1459 if (!ipc_valid_object(&sma->sem_perm)) { 1460 err = -EIDRM; 1461 goto out_unlock; 1462 } 1463 } 1464 for (i = 0; i < sma->sem_nsems; i++) 1465 sem_io[i] = sma->sems[i].semval; 1466 sem_unlock(sma, -1); 1467 rcu_read_unlock(); 1468 err = 0; 1469 if (copy_to_user(array, sem_io, nsems*sizeof(ushort))) 1470 err = -EFAULT; 1471 goto out_free; 1472 } 1473 case SETALL: 1474 { 1475 int i; 1476 struct sem_undo *un; 1477 1478 if (!ipc_rcu_getref(&sma->sem_perm)) { 1479 err = -EIDRM; 1480 goto out_rcu_wakeup; 1481 } 1482 rcu_read_unlock(); 1483 1484 if (nsems > SEMMSL_FAST) { 1485 sem_io = kvmalloc_array(nsems, sizeof(ushort), 1486 GFP_KERNEL); 1487 if (sem_io == NULL) { 1488 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1489 return -ENOMEM; 1490 } 1491 } 1492 1493 if (copy_from_user(sem_io, p, nsems*sizeof(ushort))) { 1494 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1495 err = -EFAULT; 1496 goto out_free; 1497 } 1498 1499 for (i = 0; i < nsems; i++) { 1500 if (sem_io[i] > SEMVMX) { 1501 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1502 err = -ERANGE; 1503 goto out_free; 1504 } 1505 } 1506 rcu_read_lock(); 1507 sem_lock_and_putref(sma); 1508 if (!ipc_valid_object(&sma->sem_perm)) { 1509 err = -EIDRM; 1510 goto out_unlock; 1511 } 1512 1513 for (i = 0; i < nsems; i++) { 1514 sma->sems[i].semval = sem_io[i]; 1515 ipc_update_pid(&sma->sems[i].sempid, task_tgid(current)); 1516 } 1517 1518 ipc_assert_locked_object(&sma->sem_perm); 1519 list_for_each_entry(un, &sma->list_id, list_id) { 1520 for (i = 0; i < nsems; i++) 1521 un->semadj[i] = 0; 1522 } 1523 sma->sem_ctime = ktime_get_real_seconds(); 1524 /* maybe some queued-up processes were waiting for this */ 1525 do_smart_update(sma, NULL, 0, 0, &wake_q); 1526 err = 0; 1527 goto out_unlock; 1528 } 1529 /* GETVAL, GETPID, GETNCTN, GETZCNT: fall-through */ 1530 } 1531 err = -EINVAL; 1532 if (semnum < 0 || semnum >= nsems) 1533 goto out_rcu_wakeup; 1534 1535 sem_lock(sma, NULL, -1); 1536 if (!ipc_valid_object(&sma->sem_perm)) { 1537 err = -EIDRM; 1538 goto out_unlock; 1539 } 1540 1541 semnum = array_index_nospec(semnum, nsems); 1542 curr = &sma->sems[semnum]; 1543 1544 switch (cmd) { 1545 case GETVAL: 1546 err = curr->semval; 1547 goto out_unlock; 1548 case GETPID: 1549 err = pid_vnr(curr->sempid); 1550 goto out_unlock; 1551 case GETNCNT: 1552 err = count_semcnt(sma, semnum, 0); 1553 goto out_unlock; 1554 case GETZCNT: 1555 err = count_semcnt(sma, semnum, 1); 1556 goto out_unlock; 1557 } 1558 1559 out_unlock: 1560 sem_unlock(sma, -1); 1561 out_rcu_wakeup: 1562 rcu_read_unlock(); 1563 wake_up_q(&wake_q); 1564 out_free: 1565 if (sem_io != fast_sem_io) 1566 kvfree(sem_io); 1567 return err; 1568 } 1569 1570 static inline unsigned long 1571 copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version) 1572 { 1573 switch (version) { 1574 case IPC_64: 1575 if (copy_from_user(out, buf, sizeof(*out))) 1576 return -EFAULT; 1577 return 0; 1578 case IPC_OLD: 1579 { 1580 struct semid_ds tbuf_old; 1581 1582 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 1583 return -EFAULT; 1584 1585 out->sem_perm.uid = tbuf_old.sem_perm.uid; 1586 out->sem_perm.gid = tbuf_old.sem_perm.gid; 1587 out->sem_perm.mode = tbuf_old.sem_perm.mode; 1588 1589 return 0; 1590 } 1591 default: 1592 return -EINVAL; 1593 } 1594 } 1595 1596 /* 1597 * This function handles some semctl commands which require the rwsem 1598 * to be held in write mode. 1599 * NOTE: no locks must be held, the rwsem is taken inside this function. 1600 */ 1601 static int semctl_down(struct ipc_namespace *ns, int semid, 1602 int cmd, struct semid64_ds *semid64) 1603 { 1604 struct sem_array *sma; 1605 int err; 1606 struct kern_ipc_perm *ipcp; 1607 1608 down_write(&sem_ids(ns).rwsem); 1609 rcu_read_lock(); 1610 1611 ipcp = ipcctl_obtain_check(ns, &sem_ids(ns), semid, cmd, 1612 &semid64->sem_perm, 0); 1613 if (IS_ERR(ipcp)) { 1614 err = PTR_ERR(ipcp); 1615 goto out_unlock1; 1616 } 1617 1618 sma = container_of(ipcp, struct sem_array, sem_perm); 1619 1620 err = security_sem_semctl(&sma->sem_perm, cmd); 1621 if (err) 1622 goto out_unlock1; 1623 1624 switch (cmd) { 1625 case IPC_RMID: 1626 sem_lock(sma, NULL, -1); 1627 /* freeary unlocks the ipc object and rcu */ 1628 freeary(ns, ipcp); 1629 goto out_up; 1630 case IPC_SET: 1631 sem_lock(sma, NULL, -1); 1632 err = ipc_update_perm(&semid64->sem_perm, ipcp); 1633 if (err) 1634 goto out_unlock0; 1635 sma->sem_ctime = ktime_get_real_seconds(); 1636 break; 1637 default: 1638 err = -EINVAL; 1639 goto out_unlock1; 1640 } 1641 1642 out_unlock0: 1643 sem_unlock(sma, -1); 1644 out_unlock1: 1645 rcu_read_unlock(); 1646 out_up: 1647 up_write(&sem_ids(ns).rwsem); 1648 return err; 1649 } 1650 1651 static long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg, int version) 1652 { 1653 struct ipc_namespace *ns; 1654 void __user *p = (void __user *)arg; 1655 struct semid64_ds semid64; 1656 int err; 1657 1658 if (semid < 0) 1659 return -EINVAL; 1660 1661 ns = current->nsproxy->ipc_ns; 1662 1663 switch (cmd) { 1664 case IPC_INFO: 1665 case SEM_INFO: 1666 return semctl_info(ns, semid, cmd, p); 1667 case IPC_STAT: 1668 case SEM_STAT: 1669 case SEM_STAT_ANY: 1670 err = semctl_stat(ns, semid, cmd, &semid64); 1671 if (err < 0) 1672 return err; 1673 if (copy_semid_to_user(p, &semid64, version)) 1674 err = -EFAULT; 1675 return err; 1676 case GETALL: 1677 case GETVAL: 1678 case GETPID: 1679 case GETNCNT: 1680 case GETZCNT: 1681 case SETALL: 1682 return semctl_main(ns, semid, semnum, cmd, p); 1683 case SETVAL: { 1684 int val; 1685 #if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN) 1686 /* big-endian 64bit */ 1687 val = arg >> 32; 1688 #else 1689 /* 32bit or little-endian 64bit */ 1690 val = arg; 1691 #endif 1692 return semctl_setval(ns, semid, semnum, val); 1693 } 1694 case IPC_SET: 1695 if (copy_semid_from_user(&semid64, p, version)) 1696 return -EFAULT; 1697 fallthrough; 1698 case IPC_RMID: 1699 return semctl_down(ns, semid, cmd, &semid64); 1700 default: 1701 return -EINVAL; 1702 } 1703 } 1704 1705 SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg) 1706 { 1707 return ksys_semctl(semid, semnum, cmd, arg, IPC_64); 1708 } 1709 1710 #ifdef CONFIG_ARCH_WANT_IPC_PARSE_VERSION 1711 long ksys_old_semctl(int semid, int semnum, int cmd, unsigned long arg) 1712 { 1713 int version = ipc_parse_version(&cmd); 1714 1715 return ksys_semctl(semid, semnum, cmd, arg, version); 1716 } 1717 1718 SYSCALL_DEFINE4(old_semctl, int, semid, int, semnum, int, cmd, unsigned long, arg) 1719 { 1720 return ksys_old_semctl(semid, semnum, cmd, arg); 1721 } 1722 #endif 1723 1724 #ifdef CONFIG_COMPAT 1725 1726 struct compat_semid_ds { 1727 struct compat_ipc_perm sem_perm; 1728 old_time32_t sem_otime; 1729 old_time32_t sem_ctime; 1730 compat_uptr_t sem_base; 1731 compat_uptr_t sem_pending; 1732 compat_uptr_t sem_pending_last; 1733 compat_uptr_t undo; 1734 unsigned short sem_nsems; 1735 }; 1736 1737 static int copy_compat_semid_from_user(struct semid64_ds *out, void __user *buf, 1738 int version) 1739 { 1740 memset(out, 0, sizeof(*out)); 1741 if (version == IPC_64) { 1742 struct compat_semid64_ds __user *p = buf; 1743 return get_compat_ipc64_perm(&out->sem_perm, &p->sem_perm); 1744 } else { 1745 struct compat_semid_ds __user *p = buf; 1746 return get_compat_ipc_perm(&out->sem_perm, &p->sem_perm); 1747 } 1748 } 1749 1750 static int copy_compat_semid_to_user(void __user *buf, struct semid64_ds *in, 1751 int version) 1752 { 1753 if (version == IPC_64) { 1754 struct compat_semid64_ds v; 1755 memset(&v, 0, sizeof(v)); 1756 to_compat_ipc64_perm(&v.sem_perm, &in->sem_perm); 1757 v.sem_otime = lower_32_bits(in->sem_otime); 1758 v.sem_otime_high = upper_32_bits(in->sem_otime); 1759 v.sem_ctime = lower_32_bits(in->sem_ctime); 1760 v.sem_ctime_high = upper_32_bits(in->sem_ctime); 1761 v.sem_nsems = in->sem_nsems; 1762 return copy_to_user(buf, &v, sizeof(v)); 1763 } else { 1764 struct compat_semid_ds v; 1765 memset(&v, 0, sizeof(v)); 1766 to_compat_ipc_perm(&v.sem_perm, &in->sem_perm); 1767 v.sem_otime = in->sem_otime; 1768 v.sem_ctime = in->sem_ctime; 1769 v.sem_nsems = in->sem_nsems; 1770 return copy_to_user(buf, &v, sizeof(v)); 1771 } 1772 } 1773 1774 static long compat_ksys_semctl(int semid, int semnum, int cmd, int arg, int version) 1775 { 1776 void __user *p = compat_ptr(arg); 1777 struct ipc_namespace *ns; 1778 struct semid64_ds semid64; 1779 int err; 1780 1781 ns = current->nsproxy->ipc_ns; 1782 1783 if (semid < 0) 1784 return -EINVAL; 1785 1786 switch (cmd & (~IPC_64)) { 1787 case IPC_INFO: 1788 case SEM_INFO: 1789 return semctl_info(ns, semid, cmd, p); 1790 case IPC_STAT: 1791 case SEM_STAT: 1792 case SEM_STAT_ANY: 1793 err = semctl_stat(ns, semid, cmd, &semid64); 1794 if (err < 0) 1795 return err; 1796 if (copy_compat_semid_to_user(p, &semid64, version)) 1797 err = -EFAULT; 1798 return err; 1799 case GETVAL: 1800 case GETPID: 1801 case GETNCNT: 1802 case GETZCNT: 1803 case GETALL: 1804 case SETALL: 1805 return semctl_main(ns, semid, semnum, cmd, p); 1806 case SETVAL: 1807 return semctl_setval(ns, semid, semnum, arg); 1808 case IPC_SET: 1809 if (copy_compat_semid_from_user(&semid64, p, version)) 1810 return -EFAULT; 1811 fallthrough; 1812 case IPC_RMID: 1813 return semctl_down(ns, semid, cmd, &semid64); 1814 default: 1815 return -EINVAL; 1816 } 1817 } 1818 1819 COMPAT_SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, int, arg) 1820 { 1821 return compat_ksys_semctl(semid, semnum, cmd, arg, IPC_64); 1822 } 1823 1824 #ifdef CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION 1825 long compat_ksys_old_semctl(int semid, int semnum, int cmd, int arg) 1826 { 1827 int version = compat_ipc_parse_version(&cmd); 1828 1829 return compat_ksys_semctl(semid, semnum, cmd, arg, version); 1830 } 1831 1832 COMPAT_SYSCALL_DEFINE4(old_semctl, int, semid, int, semnum, int, cmd, int, arg) 1833 { 1834 return compat_ksys_old_semctl(semid, semnum, cmd, arg); 1835 } 1836 #endif 1837 #endif 1838 1839 /* If the task doesn't already have a undo_list, then allocate one 1840 * here. We guarantee there is only one thread using this undo list, 1841 * and current is THE ONE 1842 * 1843 * If this allocation and assignment succeeds, but later 1844 * portions of this code fail, there is no need to free the sem_undo_list. 1845 * Just let it stay associated with the task, and it'll be freed later 1846 * at exit time. 1847 * 1848 * This can block, so callers must hold no locks. 1849 */ 1850 static inline int get_undo_list(struct sem_undo_list **undo_listp) 1851 { 1852 struct sem_undo_list *undo_list; 1853 1854 undo_list = current->sysvsem.undo_list; 1855 if (!undo_list) { 1856 undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL_ACCOUNT); 1857 if (undo_list == NULL) 1858 return -ENOMEM; 1859 spin_lock_init(&undo_list->lock); 1860 refcount_set(&undo_list->refcnt, 1); 1861 INIT_LIST_HEAD(&undo_list->list_proc); 1862 1863 current->sysvsem.undo_list = undo_list; 1864 } 1865 *undo_listp = undo_list; 1866 return 0; 1867 } 1868 1869 static struct sem_undo *__lookup_undo(struct sem_undo_list *ulp, int semid) 1870 { 1871 struct sem_undo *un; 1872 1873 list_for_each_entry_rcu(un, &ulp->list_proc, list_proc, 1874 spin_is_locked(&ulp->lock)) { 1875 if (un->semid == semid) 1876 return un; 1877 } 1878 return NULL; 1879 } 1880 1881 static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid) 1882 { 1883 struct sem_undo *un; 1884 1885 assert_spin_locked(&ulp->lock); 1886 1887 un = __lookup_undo(ulp, semid); 1888 if (un) { 1889 list_del_rcu(&un->list_proc); 1890 list_add_rcu(&un->list_proc, &ulp->list_proc); 1891 } 1892 return un; 1893 } 1894 1895 /** 1896 * find_alloc_undo - lookup (and if not present create) undo array 1897 * @ns: namespace 1898 * @semid: semaphore array id 1899 * 1900 * The function looks up (and if not present creates) the undo structure. 1901 * The size of the undo structure depends on the size of the semaphore 1902 * array, thus the alloc path is not that straightforward. 1903 * Lifetime-rules: sem_undo is rcu-protected, on success, the function 1904 * performs a rcu_read_lock(). 1905 */ 1906 static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) 1907 { 1908 struct sem_array *sma; 1909 struct sem_undo_list *ulp; 1910 struct sem_undo *un, *new; 1911 int nsems, error; 1912 1913 error = get_undo_list(&ulp); 1914 if (error) 1915 return ERR_PTR(error); 1916 1917 rcu_read_lock(); 1918 spin_lock(&ulp->lock); 1919 un = lookup_undo(ulp, semid); 1920 spin_unlock(&ulp->lock); 1921 if (likely(un != NULL)) 1922 goto out; 1923 1924 /* no undo structure around - allocate one. */ 1925 /* step 1: figure out the size of the semaphore array */ 1926 sma = sem_obtain_object_check(ns, semid); 1927 if (IS_ERR(sma)) { 1928 rcu_read_unlock(); 1929 return ERR_CAST(sma); 1930 } 1931 1932 nsems = sma->sem_nsems; 1933 if (!ipc_rcu_getref(&sma->sem_perm)) { 1934 rcu_read_unlock(); 1935 un = ERR_PTR(-EIDRM); 1936 goto out; 1937 } 1938 rcu_read_unlock(); 1939 1940 /* step 2: allocate new undo structure */ 1941 new = kvzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, 1942 GFP_KERNEL_ACCOUNT); 1943 if (!new) { 1944 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1945 return ERR_PTR(-ENOMEM); 1946 } 1947 1948 /* step 3: Acquire the lock on semaphore array */ 1949 rcu_read_lock(); 1950 sem_lock_and_putref(sma); 1951 if (!ipc_valid_object(&sma->sem_perm)) { 1952 sem_unlock(sma, -1); 1953 rcu_read_unlock(); 1954 kvfree(new); 1955 un = ERR_PTR(-EIDRM); 1956 goto out; 1957 } 1958 spin_lock(&ulp->lock); 1959 1960 /* 1961 * step 4: check for races: did someone else allocate the undo struct? 1962 */ 1963 un = lookup_undo(ulp, semid); 1964 if (un) { 1965 spin_unlock(&ulp->lock); 1966 kvfree(new); 1967 goto success; 1968 } 1969 /* step 5: initialize & link new undo structure */ 1970 new->semadj = (short *) &new[1]; 1971 new->ulp = ulp; 1972 new->semid = semid; 1973 assert_spin_locked(&ulp->lock); 1974 list_add_rcu(&new->list_proc, &ulp->list_proc); 1975 ipc_assert_locked_object(&sma->sem_perm); 1976 list_add(&new->list_id, &sma->list_id); 1977 un = new; 1978 spin_unlock(&ulp->lock); 1979 success: 1980 sem_unlock(sma, -1); 1981 out: 1982 return un; 1983 } 1984 1985 long __do_semtimedop(int semid, struct sembuf *sops, 1986 unsigned nsops, const struct timespec64 *timeout, 1987 struct ipc_namespace *ns) 1988 { 1989 int error = -EINVAL; 1990 struct sem_array *sma; 1991 struct sembuf *sop; 1992 struct sem_undo *un; 1993 int max, locknum; 1994 bool undos = false, alter = false, dupsop = false; 1995 struct sem_queue queue; 1996 unsigned long dup = 0; 1997 ktime_t expires, *exp = NULL; 1998 bool timed_out = false; 1999 2000 if (nsops < 1 || semid < 0) 2001 return -EINVAL; 2002 if (nsops > ns->sc_semopm) 2003 return -E2BIG; 2004 2005 if (timeout) { 2006 if (!timespec64_valid(timeout)) 2007 return -EINVAL; 2008 expires = ktime_add_safe(ktime_get(), 2009 timespec64_to_ktime(*timeout)); 2010 exp = &expires; 2011 } 2012 2013 2014 max = 0; 2015 for (sop = sops; sop < sops + nsops; sop++) { 2016 unsigned long mask = 1ULL << ((sop->sem_num) % BITS_PER_LONG); 2017 2018 if (sop->sem_num >= max) 2019 max = sop->sem_num; 2020 if (sop->sem_flg & SEM_UNDO) 2021 undos = true; 2022 if (dup & mask) { 2023 /* 2024 * There was a previous alter access that appears 2025 * to have accessed the same semaphore, thus use 2026 * the dupsop logic. "appears", because the detection 2027 * can only check % BITS_PER_LONG. 2028 */ 2029 dupsop = true; 2030 } 2031 if (sop->sem_op != 0) { 2032 alter = true; 2033 dup |= mask; 2034 } 2035 } 2036 2037 if (undos) { 2038 /* On success, find_alloc_undo takes the rcu_read_lock */ 2039 un = find_alloc_undo(ns, semid); 2040 if (IS_ERR(un)) { 2041 error = PTR_ERR(un); 2042 goto out; 2043 } 2044 } else { 2045 un = NULL; 2046 rcu_read_lock(); 2047 } 2048 2049 sma = sem_obtain_object_check(ns, semid); 2050 if (IS_ERR(sma)) { 2051 rcu_read_unlock(); 2052 error = PTR_ERR(sma); 2053 goto out; 2054 } 2055 2056 error = -EFBIG; 2057 if (max >= sma->sem_nsems) { 2058 rcu_read_unlock(); 2059 goto out; 2060 } 2061 2062 error = -EACCES; 2063 if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) { 2064 rcu_read_unlock(); 2065 goto out; 2066 } 2067 2068 error = security_sem_semop(&sma->sem_perm, sops, nsops, alter); 2069 if (error) { 2070 rcu_read_unlock(); 2071 goto out; 2072 } 2073 2074 error = -EIDRM; 2075 locknum = sem_lock(sma, sops, nsops); 2076 /* 2077 * We eventually might perform the following check in a lockless 2078 * fashion, considering ipc_valid_object() locking constraints. 2079 * If nsops == 1 and there is no contention for sem_perm.lock, then 2080 * only a per-semaphore lock is held and it's OK to proceed with the 2081 * check below. More details on the fine grained locking scheme 2082 * entangled here and why it's RMID race safe on comments at sem_lock() 2083 */ 2084 if (!ipc_valid_object(&sma->sem_perm)) 2085 goto out_unlock; 2086 /* 2087 * semid identifiers are not unique - find_alloc_undo may have 2088 * allocated an undo structure, it was invalidated by an RMID 2089 * and now a new array with received the same id. Check and fail. 2090 * This case can be detected checking un->semid. The existence of 2091 * "un" itself is guaranteed by rcu. 2092 */ 2093 if (un && un->semid == -1) 2094 goto out_unlock; 2095 2096 queue.sops = sops; 2097 queue.nsops = nsops; 2098 queue.undo = un; 2099 queue.pid = task_tgid(current); 2100 queue.alter = alter; 2101 queue.dupsop = dupsop; 2102 2103 error = perform_atomic_semop(sma, &queue); 2104 if (error == 0) { /* non-blocking successful path */ 2105 DEFINE_WAKE_Q(wake_q); 2106 2107 /* 2108 * If the operation was successful, then do 2109 * the required updates. 2110 */ 2111 if (alter) 2112 do_smart_update(sma, sops, nsops, 1, &wake_q); 2113 else 2114 set_semotime(sma, sops); 2115 2116 sem_unlock(sma, locknum); 2117 rcu_read_unlock(); 2118 wake_up_q(&wake_q); 2119 2120 goto out; 2121 } 2122 if (error < 0) /* non-blocking error path */ 2123 goto out_unlock; 2124 2125 /* 2126 * We need to sleep on this operation, so we put the current 2127 * task into the pending queue and go to sleep. 2128 */ 2129 if (nsops == 1) { 2130 struct sem *curr; 2131 int idx = array_index_nospec(sops->sem_num, sma->sem_nsems); 2132 curr = &sma->sems[idx]; 2133 2134 if (alter) { 2135 if (sma->complex_count) { 2136 list_add_tail(&queue.list, 2137 &sma->pending_alter); 2138 } else { 2139 2140 list_add_tail(&queue.list, 2141 &curr->pending_alter); 2142 } 2143 } else { 2144 list_add_tail(&queue.list, &curr->pending_const); 2145 } 2146 } else { 2147 if (!sma->complex_count) 2148 merge_queues(sma); 2149 2150 if (alter) 2151 list_add_tail(&queue.list, &sma->pending_alter); 2152 else 2153 list_add_tail(&queue.list, &sma->pending_const); 2154 2155 sma->complex_count++; 2156 } 2157 2158 do { 2159 /* memory ordering ensured by the lock in sem_lock() */ 2160 WRITE_ONCE(queue.status, -EINTR); 2161 queue.sleeper = current; 2162 2163 /* memory ordering is ensured by the lock in sem_lock() */ 2164 __set_current_state(TASK_INTERRUPTIBLE); 2165 sem_unlock(sma, locknum); 2166 rcu_read_unlock(); 2167 2168 timed_out = !schedule_hrtimeout_range(exp, 2169 current->timer_slack_ns, HRTIMER_MODE_ABS); 2170 2171 /* 2172 * fastpath: the semop has completed, either successfully or 2173 * not, from the syscall pov, is quite irrelevant to us at this 2174 * point; we're done. 2175 * 2176 * We _do_ care, nonetheless, about being awoken by a signal or 2177 * spuriously. The queue.status is checked again in the 2178 * slowpath (aka after taking sem_lock), such that we can detect 2179 * scenarios where we were awakened externally, during the 2180 * window between wake_q_add() and wake_up_q(). 2181 */ 2182 error = READ_ONCE(queue.status); 2183 if (error != -EINTR) { 2184 /* see SEM_BARRIER_2 for purpose/pairing */ 2185 smp_acquire__after_ctrl_dep(); 2186 goto out; 2187 } 2188 2189 rcu_read_lock(); 2190 locknum = sem_lock(sma, sops, nsops); 2191 2192 if (!ipc_valid_object(&sma->sem_perm)) 2193 goto out_unlock; 2194 2195 /* 2196 * No necessity for any barrier: We are protect by sem_lock() 2197 */ 2198 error = READ_ONCE(queue.status); 2199 2200 /* 2201 * If queue.status != -EINTR we are woken up by another process. 2202 * Leave without unlink_queue(), but with sem_unlock(). 2203 */ 2204 if (error != -EINTR) 2205 goto out_unlock; 2206 2207 /* 2208 * If an interrupt occurred we have to clean up the queue. 2209 */ 2210 if (timed_out) 2211 error = -EAGAIN; 2212 } while (error == -EINTR && !signal_pending(current)); /* spurious */ 2213 2214 unlink_queue(sma, &queue); 2215 2216 out_unlock: 2217 sem_unlock(sma, locknum); 2218 rcu_read_unlock(); 2219 out: 2220 return error; 2221 } 2222 2223 static long do_semtimedop(int semid, struct sembuf __user *tsops, 2224 unsigned nsops, const struct timespec64 *timeout) 2225 { 2226 struct sembuf fast_sops[SEMOPM_FAST]; 2227 struct sembuf *sops = fast_sops; 2228 struct ipc_namespace *ns; 2229 int ret; 2230 2231 ns = current->nsproxy->ipc_ns; 2232 if (nsops > ns->sc_semopm) 2233 return -E2BIG; 2234 if (nsops < 1) 2235 return -EINVAL; 2236 2237 if (nsops > SEMOPM_FAST) { 2238 sops = kvmalloc_array(nsops, sizeof(*sops), GFP_KERNEL); 2239 if (sops == NULL) 2240 return -ENOMEM; 2241 } 2242 2243 if (copy_from_user(sops, tsops, nsops * sizeof(*tsops))) { 2244 ret = -EFAULT; 2245 goto out_free; 2246 } 2247 2248 ret = __do_semtimedop(semid, sops, nsops, timeout, ns); 2249 2250 out_free: 2251 if (sops != fast_sops) 2252 kvfree(sops); 2253 2254 return ret; 2255 } 2256 2257 long ksys_semtimedop(int semid, struct sembuf __user *tsops, 2258 unsigned int nsops, const struct __kernel_timespec __user *timeout) 2259 { 2260 if (timeout) { 2261 struct timespec64 ts; 2262 if (get_timespec64(&ts, timeout)) 2263 return -EFAULT; 2264 return do_semtimedop(semid, tsops, nsops, &ts); 2265 } 2266 return do_semtimedop(semid, tsops, nsops, NULL); 2267 } 2268 2269 SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, 2270 unsigned int, nsops, const struct __kernel_timespec __user *, timeout) 2271 { 2272 return ksys_semtimedop(semid, tsops, nsops, timeout); 2273 } 2274 2275 #ifdef CONFIG_COMPAT_32BIT_TIME 2276 long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems, 2277 unsigned int nsops, 2278 const struct old_timespec32 __user *timeout) 2279 { 2280 if (timeout) { 2281 struct timespec64 ts; 2282 if (get_old_timespec32(&ts, timeout)) 2283 return -EFAULT; 2284 return do_semtimedop(semid, tsems, nsops, &ts); 2285 } 2286 return do_semtimedop(semid, tsems, nsops, NULL); 2287 } 2288 2289 SYSCALL_DEFINE4(semtimedop_time32, int, semid, struct sembuf __user *, tsems, 2290 unsigned int, nsops, 2291 const struct old_timespec32 __user *, timeout) 2292 { 2293 return compat_ksys_semtimedop(semid, tsems, nsops, timeout); 2294 } 2295 #endif 2296 2297 SYSCALL_DEFINE3(semop, int, semid, struct sembuf __user *, tsops, 2298 unsigned, nsops) 2299 { 2300 return do_semtimedop(semid, tsops, nsops, NULL); 2301 } 2302 2303 /* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between 2304 * parent and child tasks. 2305 */ 2306 2307 int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) 2308 { 2309 struct sem_undo_list *undo_list; 2310 int error; 2311 2312 if (clone_flags & CLONE_SYSVSEM) { 2313 error = get_undo_list(&undo_list); 2314 if (error) 2315 return error; 2316 refcount_inc(&undo_list->refcnt); 2317 tsk->sysvsem.undo_list = undo_list; 2318 } else 2319 tsk->sysvsem.undo_list = NULL; 2320 2321 return 0; 2322 } 2323 2324 /* 2325 * add semadj values to semaphores, free undo structures. 2326 * undo structures are not freed when semaphore arrays are destroyed 2327 * so some of them may be out of date. 2328 * IMPLEMENTATION NOTE: There is some confusion over whether the 2329 * set of adjustments that needs to be done should be done in an atomic 2330 * manner or not. That is, if we are attempting to decrement the semval 2331 * should we queue up and wait until we can do so legally? 2332 * The original implementation attempted to do this (queue and wait). 2333 * The current implementation does not do so. The POSIX standard 2334 * and SVID should be consulted to determine what behavior is mandated. 2335 */ 2336 void exit_sem(struct task_struct *tsk) 2337 { 2338 struct sem_undo_list *ulp; 2339 2340 ulp = tsk->sysvsem.undo_list; 2341 if (!ulp) 2342 return; 2343 tsk->sysvsem.undo_list = NULL; 2344 2345 if (!refcount_dec_and_test(&ulp->refcnt)) 2346 return; 2347 2348 for (;;) { 2349 struct sem_array *sma; 2350 struct sem_undo *un; 2351 int semid, i; 2352 DEFINE_WAKE_Q(wake_q); 2353 2354 cond_resched(); 2355 2356 rcu_read_lock(); 2357 un = list_entry_rcu(ulp->list_proc.next, 2358 struct sem_undo, list_proc); 2359 if (&un->list_proc == &ulp->list_proc) { 2360 /* 2361 * We must wait for freeary() before freeing this ulp, 2362 * in case we raced with last sem_undo. There is a small 2363 * possibility where we exit while freeary() didn't 2364 * finish unlocking sem_undo_list. 2365 */ 2366 spin_lock(&ulp->lock); 2367 spin_unlock(&ulp->lock); 2368 rcu_read_unlock(); 2369 break; 2370 } 2371 spin_lock(&ulp->lock); 2372 semid = un->semid; 2373 spin_unlock(&ulp->lock); 2374 2375 /* exit_sem raced with IPC_RMID, nothing to do */ 2376 if (semid == -1) { 2377 rcu_read_unlock(); 2378 continue; 2379 } 2380 2381 sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, semid); 2382 /* exit_sem raced with IPC_RMID, nothing to do */ 2383 if (IS_ERR(sma)) { 2384 rcu_read_unlock(); 2385 continue; 2386 } 2387 2388 sem_lock(sma, NULL, -1); 2389 /* exit_sem raced with IPC_RMID, nothing to do */ 2390 if (!ipc_valid_object(&sma->sem_perm)) { 2391 sem_unlock(sma, -1); 2392 rcu_read_unlock(); 2393 continue; 2394 } 2395 un = __lookup_undo(ulp, semid); 2396 if (un == NULL) { 2397 /* exit_sem raced with IPC_RMID+semget() that created 2398 * exactly the same semid. Nothing to do. 2399 */ 2400 sem_unlock(sma, -1); 2401 rcu_read_unlock(); 2402 continue; 2403 } 2404 2405 /* remove un from the linked lists */ 2406 ipc_assert_locked_object(&sma->sem_perm); 2407 list_del(&un->list_id); 2408 2409 spin_lock(&ulp->lock); 2410 list_del_rcu(&un->list_proc); 2411 spin_unlock(&ulp->lock); 2412 2413 /* perform adjustments registered in un */ 2414 for (i = 0; i < sma->sem_nsems; i++) { 2415 struct sem *semaphore = &sma->sems[i]; 2416 if (un->semadj[i]) { 2417 semaphore->semval += un->semadj[i]; 2418 /* 2419 * Range checks of the new semaphore value, 2420 * not defined by sus: 2421 * - Some unices ignore the undo entirely 2422 * (e.g. HP UX 11i 11.22, Tru64 V5.1) 2423 * - some cap the value (e.g. FreeBSD caps 2424 * at 0, but doesn't enforce SEMVMX) 2425 * 2426 * Linux caps the semaphore value, both at 0 2427 * and at SEMVMX. 2428 * 2429 * Manfred <manfred@colorfullife.com> 2430 */ 2431 if (semaphore->semval < 0) 2432 semaphore->semval = 0; 2433 if (semaphore->semval > SEMVMX) 2434 semaphore->semval = SEMVMX; 2435 ipc_update_pid(&semaphore->sempid, task_tgid(current)); 2436 } 2437 } 2438 /* maybe some queued-up processes were waiting for this */ 2439 do_smart_update(sma, NULL, 0, 1, &wake_q); 2440 sem_unlock(sma, -1); 2441 rcu_read_unlock(); 2442 wake_up_q(&wake_q); 2443 2444 kvfree_rcu(un, rcu); 2445 } 2446 kfree(ulp); 2447 } 2448 2449 #ifdef CONFIG_PROC_FS 2450 static int sysvipc_sem_proc_show(struct seq_file *s, void *it) 2451 { 2452 struct user_namespace *user_ns = seq_user_ns(s); 2453 struct kern_ipc_perm *ipcp = it; 2454 struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); 2455 time64_t sem_otime; 2456 2457 /* 2458 * The proc interface isn't aware of sem_lock(), it calls 2459 * ipc_lock_object(), i.e. spin_lock(&sma->sem_perm.lock). 2460 * (in sysvipc_find_ipc) 2461 * In order to stay compatible with sem_lock(), we must 2462 * enter / leave complex_mode. 2463 */ 2464 complexmode_enter(sma); 2465 2466 sem_otime = get_semotime(sma); 2467 2468 seq_printf(s, 2469 "%10d %10d %4o %10u %5u %5u %5u %5u %10llu %10llu\n", 2470 sma->sem_perm.key, 2471 sma->sem_perm.id, 2472 sma->sem_perm.mode, 2473 sma->sem_nsems, 2474 from_kuid_munged(user_ns, sma->sem_perm.uid), 2475 from_kgid_munged(user_ns, sma->sem_perm.gid), 2476 from_kuid_munged(user_ns, sma->sem_perm.cuid), 2477 from_kgid_munged(user_ns, sma->sem_perm.cgid), 2478 sem_otime, 2479 sma->sem_ctime); 2480 2481 complexmode_tryleave(sma); 2482 2483 return 0; 2484 } 2485 #endif 2486