1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/ipc/sem.c 4 * Copyright (C) 1992 Krishna Balasubramanian 5 * Copyright (C) 1995 Eric Schenk, Bruno Haible 6 * 7 * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com> 8 * 9 * SMP-threaded, sysctl's added 10 * (c) 1999 Manfred Spraul <manfred@colorfullife.com> 11 * Enforced range limit on SEM_UNDO 12 * (c) 2001 Red Hat Inc 13 * Lockless wakeup 14 * (c) 2003 Manfred Spraul <manfred@colorfullife.com> 15 * (c) 2016 Davidlohr Bueso <dave@stgolabs.net> 16 * Further wakeup optimizations, documentation 17 * (c) 2010 Manfred Spraul <manfred@colorfullife.com> 18 * 19 * support for audit of ipc object properties and permission changes 20 * Dustin Kirkland <dustin.kirkland@us.ibm.com> 21 * 22 * namespaces support 23 * OpenVZ, SWsoft Inc. 24 * Pavel Emelianov <xemul@openvz.org> 25 * 26 * Implementation notes: (May 2010) 27 * This file implements System V semaphores. 28 * 29 * User space visible behavior: 30 * - FIFO ordering for semop() operations (just FIFO, not starvation 31 * protection) 32 * - multiple semaphore operations that alter the same semaphore in 33 * one semop() are handled. 34 * - sem_ctime (time of last semctl()) is updated in the IPC_SET, SETVAL and 35 * SETALL calls. 36 * - two Linux specific semctl() commands: SEM_STAT, SEM_INFO. 37 * - undo adjustments at process exit are limited to 0..SEMVMX. 38 * - namespace are supported. 39 * - SEMMSL, SEMMNS, SEMOPM and SEMMNI can be configured at runtime by writing 40 * to /proc/sys/kernel/sem. 41 * - statistics about the usage are reported in /proc/sysvipc/sem. 42 * 43 * Internals: 44 * - scalability: 45 * - all global variables are read-mostly. 46 * - semop() calls and semctl(RMID) are synchronized by RCU. 47 * - most operations do write operations (actually: spin_lock calls) to 48 * the per-semaphore array structure. 49 * Thus: Perfect SMP scaling between independent semaphore arrays. 50 * If multiple semaphores in one array are used, then cache line 51 * trashing on the semaphore array spinlock will limit the scaling. 52 * - semncnt and semzcnt are calculated on demand in count_semcnt() 53 * - the task that performs a successful semop() scans the list of all 54 * sleeping tasks and completes any pending operations that can be fulfilled. 55 * Semaphores are actively given to waiting tasks (necessary for FIFO). 56 * (see update_queue()) 57 * - To improve the scalability, the actual wake-up calls are performed after 58 * dropping all locks. (see wake_up_sem_queue_prepare()) 59 * - All work is done by the waker, the woken up task does not have to do 60 * anything - not even acquiring a lock or dropping a refcount. 61 * - A woken up task may not even touch the semaphore array anymore, it may 62 * have been destroyed already by a semctl(RMID). 63 * - UNDO values are stored in an array (one per process and per 64 * semaphore array, lazily allocated). For backwards compatibility, multiple 65 * modes for the UNDO variables are supported (per process, per thread) 66 * (see copy_semundo, CLONE_SYSVSEM) 67 * - There are two lists of the pending operations: a per-array list 68 * and per-semaphore list (stored in the array). This allows to achieve FIFO 69 * ordering without always scanning all pending operations. 70 * The worst-case behavior is nevertheless O(N^2) for N wakeups. 71 */ 72 73 #include <linux/compat.h> 74 #include <linux/slab.h> 75 #include <linux/spinlock.h> 76 #include <linux/init.h> 77 #include <linux/proc_fs.h> 78 #include <linux/time.h> 79 #include <linux/security.h> 80 #include <linux/syscalls.h> 81 #include <linux/audit.h> 82 #include <linux/capability.h> 83 #include <linux/seq_file.h> 84 #include <linux/rwsem.h> 85 #include <linux/nsproxy.h> 86 #include <linux/ipc_namespace.h> 87 #include <linux/sched/wake_q.h> 88 #include <linux/nospec.h> 89 #include <linux/rhashtable.h> 90 91 #include <linux/uaccess.h> 92 #include "util.h" 93 94 /* One semaphore structure for each semaphore in the system. */ 95 struct sem { 96 int semval; /* current value */ 97 /* 98 * PID of the process that last modified the semaphore. For 99 * Linux, specifically these are: 100 * - semop 101 * - semctl, via SETVAL and SETALL. 102 * - at task exit when performing undo adjustments (see exit_sem). 103 */ 104 struct pid *sempid; 105 spinlock_t lock; /* spinlock for fine-grained semtimedop */ 106 struct list_head pending_alter; /* pending single-sop operations */ 107 /* that alter the semaphore */ 108 struct list_head pending_const; /* pending single-sop operations */ 109 /* that do not alter the semaphore*/ 110 time64_t sem_otime; /* candidate for sem_otime */ 111 } ____cacheline_aligned_in_smp; 112 113 /* One sem_array data structure for each set of semaphores in the system. */ 114 struct sem_array { 115 struct kern_ipc_perm sem_perm; /* permissions .. see ipc.h */ 116 time64_t sem_ctime; /* create/last semctl() time */ 117 struct list_head pending_alter; /* pending operations */ 118 /* that alter the array */ 119 struct list_head pending_const; /* pending complex operations */ 120 /* that do not alter semvals */ 121 struct list_head list_id; /* undo requests on this array */ 122 int sem_nsems; /* no. of semaphores in array */ 123 int complex_count; /* pending complex operations */ 124 unsigned int use_global_lock;/* >0: global lock required */ 125 126 struct sem sems[]; 127 } __randomize_layout; 128 129 /* One queue for each sleeping process in the system. */ 130 struct sem_queue { 131 struct list_head list; /* queue of pending operations */ 132 struct task_struct *sleeper; /* this process */ 133 struct sem_undo *undo; /* undo structure */ 134 struct pid *pid; /* process id of requesting process */ 135 int status; /* completion status of operation */ 136 struct sembuf *sops; /* array of pending operations */ 137 struct sembuf *blocking; /* the operation that blocked */ 138 int nsops; /* number of operations */ 139 bool alter; /* does *sops alter the array? */ 140 bool dupsop; /* sops on more than one sem_num */ 141 }; 142 143 /* Each task has a list of undo requests. They are executed automatically 144 * when the process exits. 145 */ 146 struct sem_undo { 147 struct list_head list_proc; /* per-process list: * 148 * all undos from one process 149 * rcu protected */ 150 struct rcu_head rcu; /* rcu struct for sem_undo */ 151 struct sem_undo_list *ulp; /* back ptr to sem_undo_list */ 152 struct list_head list_id; /* per semaphore array list: 153 * all undos for one array */ 154 int semid; /* semaphore set identifier */ 155 short *semadj; /* array of adjustments */ 156 /* one per semaphore */ 157 }; 158 159 /* sem_undo_list controls shared access to the list of sem_undo structures 160 * that may be shared among all a CLONE_SYSVSEM task group. 161 */ 162 struct sem_undo_list { 163 refcount_t refcnt; 164 spinlock_t lock; 165 struct list_head list_proc; 166 }; 167 168 169 #define sem_ids(ns) ((ns)->ids[IPC_SEM_IDS]) 170 171 static int newary(struct ipc_namespace *, struct ipc_params *); 172 static void freeary(struct ipc_namespace *, struct kern_ipc_perm *); 173 #ifdef CONFIG_PROC_FS 174 static int sysvipc_sem_proc_show(struct seq_file *s, void *it); 175 #endif 176 177 #define SEMMSL_FAST 256 /* 512 bytes on stack */ 178 #define SEMOPM_FAST 64 /* ~ 372 bytes on stack */ 179 180 /* 181 * Switching from the mode suitable for simple ops 182 * to the mode for complex ops is costly. Therefore: 183 * use some hysteresis 184 */ 185 #define USE_GLOBAL_LOCK_HYSTERESIS 10 186 187 /* 188 * Locking: 189 * a) global sem_lock() for read/write 190 * sem_undo.id_next, 191 * sem_array.complex_count, 192 * sem_array.pending{_alter,_const}, 193 * sem_array.sem_undo 194 * 195 * b) global or semaphore sem_lock() for read/write: 196 * sem_array.sems[i].pending_{const,alter}: 197 * 198 * c) special: 199 * sem_undo_list.list_proc: 200 * * undo_list->lock for write 201 * * rcu for read 202 * use_global_lock: 203 * * global sem_lock() for write 204 * * either local or global sem_lock() for read. 205 * 206 * Memory ordering: 207 * Most ordering is enforced by using spin_lock() and spin_unlock(). 208 * 209 * Exceptions: 210 * 1) use_global_lock: (SEM_BARRIER_1) 211 * Setting it from non-zero to 0 is a RELEASE, this is ensured by 212 * using smp_store_release(): Immediately after setting it to 0, 213 * a simple op can start. 214 * Testing if it is non-zero is an ACQUIRE, this is ensured by using 215 * smp_load_acquire(). 216 * Setting it from 0 to non-zero must be ordered with regards to 217 * this smp_load_acquire(), this is guaranteed because the smp_load_acquire() 218 * is inside a spin_lock() and after a write from 0 to non-zero a 219 * spin_lock()+spin_unlock() is done. 220 * To prevent the compiler/cpu temporarily writing 0 to use_global_lock, 221 * READ_ONCE()/WRITE_ONCE() is used. 222 * 223 * 2) queue.status: (SEM_BARRIER_2) 224 * Initialization is done while holding sem_lock(), so no further barrier is 225 * required. 226 * Setting it to a result code is a RELEASE, this is ensured by both a 227 * smp_store_release() (for case a) and while holding sem_lock() 228 * (for case b). 229 * The ACQUIRE when reading the result code without holding sem_lock() is 230 * achieved by using READ_ONCE() + smp_acquire__after_ctrl_dep(). 231 * (case a above). 232 * Reading the result code while holding sem_lock() needs no further barriers, 233 * the locks inside sem_lock() enforce ordering (case b above) 234 * 235 * 3) current->state: 236 * current->state is set to TASK_INTERRUPTIBLE while holding sem_lock(). 237 * The wakeup is handled using the wake_q infrastructure. wake_q wakeups may 238 * happen immediately after calling wake_q_add. As wake_q_add_safe() is called 239 * when holding sem_lock(), no further barriers are required. 240 * 241 * See also ipc/mqueue.c for more details on the covered races. 242 */ 243 244 #define sc_semmsl sem_ctls[0] 245 #define sc_semmns sem_ctls[1] 246 #define sc_semopm sem_ctls[2] 247 #define sc_semmni sem_ctls[3] 248 249 void sem_init_ns(struct ipc_namespace *ns) 250 { 251 ns->sc_semmsl = SEMMSL; 252 ns->sc_semmns = SEMMNS; 253 ns->sc_semopm = SEMOPM; 254 ns->sc_semmni = SEMMNI; 255 ns->used_sems = 0; 256 ipc_init_ids(&ns->ids[IPC_SEM_IDS]); 257 } 258 259 #ifdef CONFIG_IPC_NS 260 void sem_exit_ns(struct ipc_namespace *ns) 261 { 262 free_ipcs(ns, &sem_ids(ns), freeary); 263 idr_destroy(&ns->ids[IPC_SEM_IDS].ipcs_idr); 264 rhashtable_destroy(&ns->ids[IPC_SEM_IDS].key_ht); 265 } 266 #endif 267 268 void __init sem_init(void) 269 { 270 sem_init_ns(&init_ipc_ns); 271 ipc_init_proc_interface("sysvipc/sem", 272 " key semid perms nsems uid gid cuid cgid otime ctime\n", 273 IPC_SEM_IDS, sysvipc_sem_proc_show); 274 } 275 276 /** 277 * unmerge_queues - unmerge queues, if possible. 278 * @sma: semaphore array 279 * 280 * The function unmerges the wait queues if complex_count is 0. 281 * It must be called prior to dropping the global semaphore array lock. 282 */ 283 static void unmerge_queues(struct sem_array *sma) 284 { 285 struct sem_queue *q, *tq; 286 287 /* complex operations still around? */ 288 if (sma->complex_count) 289 return; 290 /* 291 * We will switch back to simple mode. 292 * Move all pending operation back into the per-semaphore 293 * queues. 294 */ 295 list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { 296 struct sem *curr; 297 curr = &sma->sems[q->sops[0].sem_num]; 298 299 list_add_tail(&q->list, &curr->pending_alter); 300 } 301 INIT_LIST_HEAD(&sma->pending_alter); 302 } 303 304 /** 305 * merge_queues - merge single semop queues into global queue 306 * @sma: semaphore array 307 * 308 * This function merges all per-semaphore queues into the global queue. 309 * It is necessary to achieve FIFO ordering for the pending single-sop 310 * operations when a multi-semop operation must sleep. 311 * Only the alter operations must be moved, the const operations can stay. 312 */ 313 static void merge_queues(struct sem_array *sma) 314 { 315 int i; 316 for (i = 0; i < sma->sem_nsems; i++) { 317 struct sem *sem = &sma->sems[i]; 318 319 list_splice_init(&sem->pending_alter, &sma->pending_alter); 320 } 321 } 322 323 static void sem_rcu_free(struct rcu_head *head) 324 { 325 struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu); 326 struct sem_array *sma = container_of(p, struct sem_array, sem_perm); 327 328 security_sem_free(&sma->sem_perm); 329 kvfree(sma); 330 } 331 332 /* 333 * Enter the mode suitable for non-simple operations: 334 * Caller must own sem_perm.lock. 335 */ 336 static void complexmode_enter(struct sem_array *sma) 337 { 338 int i; 339 struct sem *sem; 340 341 if (sma->use_global_lock > 0) { 342 /* 343 * We are already in global lock mode. 344 * Nothing to do, just reset the 345 * counter until we return to simple mode. 346 */ 347 WRITE_ONCE(sma->use_global_lock, USE_GLOBAL_LOCK_HYSTERESIS); 348 return; 349 } 350 WRITE_ONCE(sma->use_global_lock, USE_GLOBAL_LOCK_HYSTERESIS); 351 352 for (i = 0; i < sma->sem_nsems; i++) { 353 sem = &sma->sems[i]; 354 spin_lock(&sem->lock); 355 spin_unlock(&sem->lock); 356 } 357 } 358 359 /* 360 * Try to leave the mode that disallows simple operations: 361 * Caller must own sem_perm.lock. 362 */ 363 static void complexmode_tryleave(struct sem_array *sma) 364 { 365 if (sma->complex_count) { 366 /* Complex ops are sleeping. 367 * We must stay in complex mode 368 */ 369 return; 370 } 371 if (sma->use_global_lock == 1) { 372 373 /* See SEM_BARRIER_1 for purpose/pairing */ 374 smp_store_release(&sma->use_global_lock, 0); 375 } else { 376 WRITE_ONCE(sma->use_global_lock, 377 sma->use_global_lock-1); 378 } 379 } 380 381 #define SEM_GLOBAL_LOCK (-1) 382 /* 383 * If the request contains only one semaphore operation, and there are 384 * no complex transactions pending, lock only the semaphore involved. 385 * Otherwise, lock the entire semaphore array, since we either have 386 * multiple semaphores in our own semops, or we need to look at 387 * semaphores from other pending complex operations. 388 */ 389 static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, 390 int nsops) 391 { 392 struct sem *sem; 393 int idx; 394 395 if (nsops != 1) { 396 /* Complex operation - acquire a full lock */ 397 ipc_lock_object(&sma->sem_perm); 398 399 /* Prevent parallel simple ops */ 400 complexmode_enter(sma); 401 return SEM_GLOBAL_LOCK; 402 } 403 404 /* 405 * Only one semaphore affected - try to optimize locking. 406 * Optimized locking is possible if no complex operation 407 * is either enqueued or processed right now. 408 * 409 * Both facts are tracked by use_global_mode. 410 */ 411 idx = array_index_nospec(sops->sem_num, sma->sem_nsems); 412 sem = &sma->sems[idx]; 413 414 /* 415 * Initial check for use_global_lock. Just an optimization, 416 * no locking, no memory barrier. 417 */ 418 if (!READ_ONCE(sma->use_global_lock)) { 419 /* 420 * It appears that no complex operation is around. 421 * Acquire the per-semaphore lock. 422 */ 423 spin_lock(&sem->lock); 424 425 /* see SEM_BARRIER_1 for purpose/pairing */ 426 if (!smp_load_acquire(&sma->use_global_lock)) { 427 /* fast path successful! */ 428 return sops->sem_num; 429 } 430 spin_unlock(&sem->lock); 431 } 432 433 /* slow path: acquire the full lock */ 434 ipc_lock_object(&sma->sem_perm); 435 436 if (sma->use_global_lock == 0) { 437 /* 438 * The use_global_lock mode ended while we waited for 439 * sma->sem_perm.lock. Thus we must switch to locking 440 * with sem->lock. 441 * Unlike in the fast path, there is no need to recheck 442 * sma->use_global_lock after we have acquired sem->lock: 443 * We own sma->sem_perm.lock, thus use_global_lock cannot 444 * change. 445 */ 446 spin_lock(&sem->lock); 447 448 ipc_unlock_object(&sma->sem_perm); 449 return sops->sem_num; 450 } else { 451 /* 452 * Not a false alarm, thus continue to use the global lock 453 * mode. No need for complexmode_enter(), this was done by 454 * the caller that has set use_global_mode to non-zero. 455 */ 456 return SEM_GLOBAL_LOCK; 457 } 458 } 459 460 static inline void sem_unlock(struct sem_array *sma, int locknum) 461 { 462 if (locknum == SEM_GLOBAL_LOCK) { 463 unmerge_queues(sma); 464 complexmode_tryleave(sma); 465 ipc_unlock_object(&sma->sem_perm); 466 } else { 467 struct sem *sem = &sma->sems[locknum]; 468 spin_unlock(&sem->lock); 469 } 470 } 471 472 /* 473 * sem_lock_(check_) routines are called in the paths where the rwsem 474 * is not held. 475 * 476 * The caller holds the RCU read lock. 477 */ 478 static inline struct sem_array *sem_obtain_object(struct ipc_namespace *ns, int id) 479 { 480 struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&sem_ids(ns), id); 481 482 if (IS_ERR(ipcp)) 483 return ERR_CAST(ipcp); 484 485 return container_of(ipcp, struct sem_array, sem_perm); 486 } 487 488 static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns, 489 int id) 490 { 491 struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&sem_ids(ns), id); 492 493 if (IS_ERR(ipcp)) 494 return ERR_CAST(ipcp); 495 496 return container_of(ipcp, struct sem_array, sem_perm); 497 } 498 499 static inline void sem_lock_and_putref(struct sem_array *sma) 500 { 501 sem_lock(sma, NULL, -1); 502 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 503 } 504 505 static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) 506 { 507 ipc_rmid(&sem_ids(ns), &s->sem_perm); 508 } 509 510 static struct sem_array *sem_alloc(size_t nsems) 511 { 512 struct sem_array *sma; 513 514 if (nsems > (INT_MAX - sizeof(*sma)) / sizeof(sma->sems[0])) 515 return NULL; 516 517 sma = kvzalloc(struct_size(sma, sems, nsems), GFP_KERNEL_ACCOUNT); 518 if (unlikely(!sma)) 519 return NULL; 520 521 return sma; 522 } 523 524 /** 525 * newary - Create a new semaphore set 526 * @ns: namespace 527 * @params: ptr to the structure that contains key, semflg and nsems 528 * 529 * Called with sem_ids.rwsem held (as a writer) 530 */ 531 static int newary(struct ipc_namespace *ns, struct ipc_params *params) 532 { 533 int retval; 534 struct sem_array *sma; 535 key_t key = params->key; 536 int nsems = params->u.nsems; 537 int semflg = params->flg; 538 int i; 539 540 if (!nsems) 541 return -EINVAL; 542 if (ns->used_sems + nsems > ns->sc_semmns) 543 return -ENOSPC; 544 545 sma = sem_alloc(nsems); 546 if (!sma) 547 return -ENOMEM; 548 549 sma->sem_perm.mode = (semflg & S_IRWXUGO); 550 sma->sem_perm.key = key; 551 552 sma->sem_perm.security = NULL; 553 retval = security_sem_alloc(&sma->sem_perm); 554 if (retval) { 555 kvfree(sma); 556 return retval; 557 } 558 559 for (i = 0; i < nsems; i++) { 560 INIT_LIST_HEAD(&sma->sems[i].pending_alter); 561 INIT_LIST_HEAD(&sma->sems[i].pending_const); 562 spin_lock_init(&sma->sems[i].lock); 563 } 564 565 sma->complex_count = 0; 566 sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; 567 INIT_LIST_HEAD(&sma->pending_alter); 568 INIT_LIST_HEAD(&sma->pending_const); 569 INIT_LIST_HEAD(&sma->list_id); 570 sma->sem_nsems = nsems; 571 sma->sem_ctime = ktime_get_real_seconds(); 572 573 /* ipc_addid() locks sma upon success. */ 574 retval = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); 575 if (retval < 0) { 576 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 577 return retval; 578 } 579 ns->used_sems += nsems; 580 581 sem_unlock(sma, -1); 582 rcu_read_unlock(); 583 584 return sma->sem_perm.id; 585 } 586 587 588 /* 589 * Called with sem_ids.rwsem and ipcp locked. 590 */ 591 static int sem_more_checks(struct kern_ipc_perm *ipcp, struct ipc_params *params) 592 { 593 struct sem_array *sma; 594 595 sma = container_of(ipcp, struct sem_array, sem_perm); 596 if (params->u.nsems > sma->sem_nsems) 597 return -EINVAL; 598 599 return 0; 600 } 601 602 long ksys_semget(key_t key, int nsems, int semflg) 603 { 604 struct ipc_namespace *ns; 605 static const struct ipc_ops sem_ops = { 606 .getnew = newary, 607 .associate = security_sem_associate, 608 .more_checks = sem_more_checks, 609 }; 610 struct ipc_params sem_params; 611 612 ns = current->nsproxy->ipc_ns; 613 614 if (nsems < 0 || nsems > ns->sc_semmsl) 615 return -EINVAL; 616 617 sem_params.key = key; 618 sem_params.flg = semflg; 619 sem_params.u.nsems = nsems; 620 621 return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params); 622 } 623 624 SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg) 625 { 626 return ksys_semget(key, nsems, semflg); 627 } 628 629 /** 630 * perform_atomic_semop[_slow] - Attempt to perform semaphore 631 * operations on a given array. 632 * @sma: semaphore array 633 * @q: struct sem_queue that describes the operation 634 * 635 * Caller blocking are as follows, based the value 636 * indicated by the semaphore operation (sem_op): 637 * 638 * (1) >0 never blocks. 639 * (2) 0 (wait-for-zero operation): semval is non-zero. 640 * (3) <0 attempting to decrement semval to a value smaller than zero. 641 * 642 * Returns 0 if the operation was possible. 643 * Returns 1 if the operation is impossible, the caller must sleep. 644 * Returns <0 for error codes. 645 */ 646 static int perform_atomic_semop_slow(struct sem_array *sma, struct sem_queue *q) 647 { 648 int result, sem_op, nsops; 649 struct pid *pid; 650 struct sembuf *sop; 651 struct sem *curr; 652 struct sembuf *sops; 653 struct sem_undo *un; 654 655 sops = q->sops; 656 nsops = q->nsops; 657 un = q->undo; 658 659 for (sop = sops; sop < sops + nsops; sop++) { 660 int idx = array_index_nospec(sop->sem_num, sma->sem_nsems); 661 curr = &sma->sems[idx]; 662 sem_op = sop->sem_op; 663 result = curr->semval; 664 665 if (!sem_op && result) 666 goto would_block; 667 668 result += sem_op; 669 if (result < 0) 670 goto would_block; 671 if (result > SEMVMX) 672 goto out_of_range; 673 674 if (sop->sem_flg & SEM_UNDO) { 675 int undo = un->semadj[sop->sem_num] - sem_op; 676 /* Exceeding the undo range is an error. */ 677 if (undo < (-SEMAEM - 1) || undo > SEMAEM) 678 goto out_of_range; 679 un->semadj[sop->sem_num] = undo; 680 } 681 682 curr->semval = result; 683 } 684 685 sop--; 686 pid = q->pid; 687 while (sop >= sops) { 688 ipc_update_pid(&sma->sems[sop->sem_num].sempid, pid); 689 sop--; 690 } 691 692 return 0; 693 694 out_of_range: 695 result = -ERANGE; 696 goto undo; 697 698 would_block: 699 q->blocking = sop; 700 701 if (sop->sem_flg & IPC_NOWAIT) 702 result = -EAGAIN; 703 else 704 result = 1; 705 706 undo: 707 sop--; 708 while (sop >= sops) { 709 sem_op = sop->sem_op; 710 sma->sems[sop->sem_num].semval -= sem_op; 711 if (sop->sem_flg & SEM_UNDO) 712 un->semadj[sop->sem_num] += sem_op; 713 sop--; 714 } 715 716 return result; 717 } 718 719 static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q) 720 { 721 int result, sem_op, nsops; 722 struct sembuf *sop; 723 struct sem *curr; 724 struct sembuf *sops; 725 struct sem_undo *un; 726 727 sops = q->sops; 728 nsops = q->nsops; 729 un = q->undo; 730 731 if (unlikely(q->dupsop)) 732 return perform_atomic_semop_slow(sma, q); 733 734 /* 735 * We scan the semaphore set twice, first to ensure that the entire 736 * operation can succeed, therefore avoiding any pointless writes 737 * to shared memory and having to undo such changes in order to block 738 * until the operations can go through. 739 */ 740 for (sop = sops; sop < sops + nsops; sop++) { 741 int idx = array_index_nospec(sop->sem_num, sma->sem_nsems); 742 743 curr = &sma->sems[idx]; 744 sem_op = sop->sem_op; 745 result = curr->semval; 746 747 if (!sem_op && result) 748 goto would_block; /* wait-for-zero */ 749 750 result += sem_op; 751 if (result < 0) 752 goto would_block; 753 754 if (result > SEMVMX) 755 return -ERANGE; 756 757 if (sop->sem_flg & SEM_UNDO) { 758 int undo = un->semadj[sop->sem_num] - sem_op; 759 760 /* Exceeding the undo range is an error. */ 761 if (undo < (-SEMAEM - 1) || undo > SEMAEM) 762 return -ERANGE; 763 } 764 } 765 766 for (sop = sops; sop < sops + nsops; sop++) { 767 curr = &sma->sems[sop->sem_num]; 768 sem_op = sop->sem_op; 769 result = curr->semval; 770 771 if (sop->sem_flg & SEM_UNDO) { 772 int undo = un->semadj[sop->sem_num] - sem_op; 773 774 un->semadj[sop->sem_num] = undo; 775 } 776 curr->semval += sem_op; 777 ipc_update_pid(&curr->sempid, q->pid); 778 } 779 780 return 0; 781 782 would_block: 783 q->blocking = sop; 784 return sop->sem_flg & IPC_NOWAIT ? -EAGAIN : 1; 785 } 786 787 static inline void wake_up_sem_queue_prepare(struct sem_queue *q, int error, 788 struct wake_q_head *wake_q) 789 { 790 struct task_struct *sleeper; 791 792 sleeper = get_task_struct(q->sleeper); 793 794 /* see SEM_BARRIER_2 for purpose/pairing */ 795 smp_store_release(&q->status, error); 796 797 wake_q_add_safe(wake_q, sleeper); 798 } 799 800 static void unlink_queue(struct sem_array *sma, struct sem_queue *q) 801 { 802 list_del(&q->list); 803 if (q->nsops > 1) 804 sma->complex_count--; 805 } 806 807 /** check_restart(sma, q) 808 * @sma: semaphore array 809 * @q: the operation that just completed 810 * 811 * update_queue is O(N^2) when it restarts scanning the whole queue of 812 * waiting operations. Therefore this function checks if the restart is 813 * really necessary. It is called after a previously waiting operation 814 * modified the array. 815 * Note that wait-for-zero operations are handled without restart. 816 */ 817 static inline int check_restart(struct sem_array *sma, struct sem_queue *q) 818 { 819 /* pending complex alter operations are too difficult to analyse */ 820 if (!list_empty(&sma->pending_alter)) 821 return 1; 822 823 /* we were a sleeping complex operation. Too difficult */ 824 if (q->nsops > 1) 825 return 1; 826 827 /* It is impossible that someone waits for the new value: 828 * - complex operations always restart. 829 * - wait-for-zero are handled separately. 830 * - q is a previously sleeping simple operation that 831 * altered the array. It must be a decrement, because 832 * simple increments never sleep. 833 * - If there are older (higher priority) decrements 834 * in the queue, then they have observed the original 835 * semval value and couldn't proceed. The operation 836 * decremented to value - thus they won't proceed either. 837 */ 838 return 0; 839 } 840 841 /** 842 * wake_const_ops - wake up non-alter tasks 843 * @sma: semaphore array. 844 * @semnum: semaphore that was modified. 845 * @wake_q: lockless wake-queue head. 846 * 847 * wake_const_ops must be called after a semaphore in a semaphore array 848 * was set to 0. If complex const operations are pending, wake_const_ops must 849 * be called with semnum = -1, as well as with the number of each modified 850 * semaphore. 851 * The tasks that must be woken up are added to @wake_q. The return code 852 * is stored in q->pid. 853 * The function returns 1 if at least one operation was completed successfully. 854 */ 855 static int wake_const_ops(struct sem_array *sma, int semnum, 856 struct wake_q_head *wake_q) 857 { 858 struct sem_queue *q, *tmp; 859 struct list_head *pending_list; 860 int semop_completed = 0; 861 862 if (semnum == -1) 863 pending_list = &sma->pending_const; 864 else 865 pending_list = &sma->sems[semnum].pending_const; 866 867 list_for_each_entry_safe(q, tmp, pending_list, list) { 868 int error = perform_atomic_semop(sma, q); 869 870 if (error > 0) 871 continue; 872 /* operation completed, remove from queue & wakeup */ 873 unlink_queue(sma, q); 874 875 wake_up_sem_queue_prepare(q, error, wake_q); 876 if (error == 0) 877 semop_completed = 1; 878 } 879 880 return semop_completed; 881 } 882 883 /** 884 * do_smart_wakeup_zero - wakeup all wait for zero tasks 885 * @sma: semaphore array 886 * @sops: operations that were performed 887 * @nsops: number of operations 888 * @wake_q: lockless wake-queue head 889 * 890 * Checks all required queue for wait-for-zero operations, based 891 * on the actual changes that were performed on the semaphore array. 892 * The function returns 1 if at least one operation was completed successfully. 893 */ 894 static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops, 895 int nsops, struct wake_q_head *wake_q) 896 { 897 int i; 898 int semop_completed = 0; 899 int got_zero = 0; 900 901 /* first: the per-semaphore queues, if known */ 902 if (sops) { 903 for (i = 0; i < nsops; i++) { 904 int num = sops[i].sem_num; 905 906 if (sma->sems[num].semval == 0) { 907 got_zero = 1; 908 semop_completed |= wake_const_ops(sma, num, wake_q); 909 } 910 } 911 } else { 912 /* 913 * No sops means modified semaphores not known. 914 * Assume all were changed. 915 */ 916 for (i = 0; i < sma->sem_nsems; i++) { 917 if (sma->sems[i].semval == 0) { 918 got_zero = 1; 919 semop_completed |= wake_const_ops(sma, i, wake_q); 920 } 921 } 922 } 923 /* 924 * If one of the modified semaphores got 0, 925 * then check the global queue, too. 926 */ 927 if (got_zero) 928 semop_completed |= wake_const_ops(sma, -1, wake_q); 929 930 return semop_completed; 931 } 932 933 934 /** 935 * update_queue - look for tasks that can be completed. 936 * @sma: semaphore array. 937 * @semnum: semaphore that was modified. 938 * @wake_q: lockless wake-queue head. 939 * 940 * update_queue must be called after a semaphore in a semaphore array 941 * was modified. If multiple semaphores were modified, update_queue must 942 * be called with semnum = -1, as well as with the number of each modified 943 * semaphore. 944 * The tasks that must be woken up are added to @wake_q. The return code 945 * is stored in q->pid. 946 * The function internally checks if const operations can now succeed. 947 * 948 * The function return 1 if at least one semop was completed successfully. 949 */ 950 static int update_queue(struct sem_array *sma, int semnum, struct wake_q_head *wake_q) 951 { 952 struct sem_queue *q, *tmp; 953 struct list_head *pending_list; 954 int semop_completed = 0; 955 956 if (semnum == -1) 957 pending_list = &sma->pending_alter; 958 else 959 pending_list = &sma->sems[semnum].pending_alter; 960 961 again: 962 list_for_each_entry_safe(q, tmp, pending_list, list) { 963 int error, restart; 964 965 /* If we are scanning the single sop, per-semaphore list of 966 * one semaphore and that semaphore is 0, then it is not 967 * necessary to scan further: simple increments 968 * that affect only one entry succeed immediately and cannot 969 * be in the per semaphore pending queue, and decrements 970 * cannot be successful if the value is already 0. 971 */ 972 if (semnum != -1 && sma->sems[semnum].semval == 0) 973 break; 974 975 error = perform_atomic_semop(sma, q); 976 977 /* Does q->sleeper still need to sleep? */ 978 if (error > 0) 979 continue; 980 981 unlink_queue(sma, q); 982 983 if (error) { 984 restart = 0; 985 } else { 986 semop_completed = 1; 987 do_smart_wakeup_zero(sma, q->sops, q->nsops, wake_q); 988 restart = check_restart(sma, q); 989 } 990 991 wake_up_sem_queue_prepare(q, error, wake_q); 992 if (restart) 993 goto again; 994 } 995 return semop_completed; 996 } 997 998 /** 999 * set_semotime - set sem_otime 1000 * @sma: semaphore array 1001 * @sops: operations that modified the array, may be NULL 1002 * 1003 * sem_otime is replicated to avoid cache line trashing. 1004 * This function sets one instance to the current time. 1005 */ 1006 static void set_semotime(struct sem_array *sma, struct sembuf *sops) 1007 { 1008 if (sops == NULL) { 1009 sma->sems[0].sem_otime = ktime_get_real_seconds(); 1010 } else { 1011 sma->sems[sops[0].sem_num].sem_otime = 1012 ktime_get_real_seconds(); 1013 } 1014 } 1015 1016 /** 1017 * do_smart_update - optimized update_queue 1018 * @sma: semaphore array 1019 * @sops: operations that were performed 1020 * @nsops: number of operations 1021 * @otime: force setting otime 1022 * @wake_q: lockless wake-queue head 1023 * 1024 * do_smart_update() does the required calls to update_queue and wakeup_zero, 1025 * based on the actual changes that were performed on the semaphore array. 1026 * Note that the function does not do the actual wake-up: the caller is 1027 * responsible for calling wake_up_q(). 1028 * It is safe to perform this call after dropping all locks. 1029 */ 1030 static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsops, 1031 int otime, struct wake_q_head *wake_q) 1032 { 1033 int i; 1034 1035 otime |= do_smart_wakeup_zero(sma, sops, nsops, wake_q); 1036 1037 if (!list_empty(&sma->pending_alter)) { 1038 /* semaphore array uses the global queue - just process it. */ 1039 otime |= update_queue(sma, -1, wake_q); 1040 } else { 1041 if (!sops) { 1042 /* 1043 * No sops, thus the modified semaphores are not 1044 * known. Check all. 1045 */ 1046 for (i = 0; i < sma->sem_nsems; i++) 1047 otime |= update_queue(sma, i, wake_q); 1048 } else { 1049 /* 1050 * Check the semaphores that were increased: 1051 * - No complex ops, thus all sleeping ops are 1052 * decrease. 1053 * - if we decreased the value, then any sleeping 1054 * semaphore ops won't be able to run: If the 1055 * previous value was too small, then the new 1056 * value will be too small, too. 1057 */ 1058 for (i = 0; i < nsops; i++) { 1059 if (sops[i].sem_op > 0) { 1060 otime |= update_queue(sma, 1061 sops[i].sem_num, wake_q); 1062 } 1063 } 1064 } 1065 } 1066 if (otime) 1067 set_semotime(sma, sops); 1068 } 1069 1070 /* 1071 * check_qop: Test if a queued operation sleeps on the semaphore semnum 1072 */ 1073 static int check_qop(struct sem_array *sma, int semnum, struct sem_queue *q, 1074 bool count_zero) 1075 { 1076 struct sembuf *sop = q->blocking; 1077 1078 /* 1079 * Linux always (since 0.99.10) reported a task as sleeping on all 1080 * semaphores. This violates SUS, therefore it was changed to the 1081 * standard compliant behavior. 1082 * Give the administrators a chance to notice that an application 1083 * might misbehave because it relies on the Linux behavior. 1084 */ 1085 pr_info_once("semctl(GETNCNT/GETZCNT) is since 3.16 Single Unix Specification compliant.\n" 1086 "The task %s (%d) triggered the difference, watch for misbehavior.\n", 1087 current->comm, task_pid_nr(current)); 1088 1089 if (sop->sem_num != semnum) 1090 return 0; 1091 1092 if (count_zero && sop->sem_op == 0) 1093 return 1; 1094 if (!count_zero && sop->sem_op < 0) 1095 return 1; 1096 1097 return 0; 1098 } 1099 1100 /* The following counts are associated to each semaphore: 1101 * semncnt number of tasks waiting on semval being nonzero 1102 * semzcnt number of tasks waiting on semval being zero 1103 * 1104 * Per definition, a task waits only on the semaphore of the first semop 1105 * that cannot proceed, even if additional operation would block, too. 1106 */ 1107 static int count_semcnt(struct sem_array *sma, ushort semnum, 1108 bool count_zero) 1109 { 1110 struct list_head *l; 1111 struct sem_queue *q; 1112 int semcnt; 1113 1114 semcnt = 0; 1115 /* First: check the simple operations. They are easy to evaluate */ 1116 if (count_zero) 1117 l = &sma->sems[semnum].pending_const; 1118 else 1119 l = &sma->sems[semnum].pending_alter; 1120 1121 list_for_each_entry(q, l, list) { 1122 /* all task on a per-semaphore list sleep on exactly 1123 * that semaphore 1124 */ 1125 semcnt++; 1126 } 1127 1128 /* Then: check the complex operations. */ 1129 list_for_each_entry(q, &sma->pending_alter, list) { 1130 semcnt += check_qop(sma, semnum, q, count_zero); 1131 } 1132 if (count_zero) { 1133 list_for_each_entry(q, &sma->pending_const, list) { 1134 semcnt += check_qop(sma, semnum, q, count_zero); 1135 } 1136 } 1137 return semcnt; 1138 } 1139 1140 /* Free a semaphore set. freeary() is called with sem_ids.rwsem locked 1141 * as a writer and the spinlock for this semaphore set hold. sem_ids.rwsem 1142 * remains locked on exit. 1143 */ 1144 static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 1145 { 1146 struct sem_undo *un, *tu; 1147 struct sem_queue *q, *tq; 1148 struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); 1149 int i; 1150 DEFINE_WAKE_Q(wake_q); 1151 1152 /* Free the existing undo structures for this semaphore set. */ 1153 ipc_assert_locked_object(&sma->sem_perm); 1154 list_for_each_entry_safe(un, tu, &sma->list_id, list_id) { 1155 list_del(&un->list_id); 1156 spin_lock(&un->ulp->lock); 1157 un->semid = -1; 1158 list_del_rcu(&un->list_proc); 1159 spin_unlock(&un->ulp->lock); 1160 kvfree_rcu(un, rcu); 1161 } 1162 1163 /* Wake up all pending processes and let them fail with EIDRM. */ 1164 list_for_each_entry_safe(q, tq, &sma->pending_const, list) { 1165 unlink_queue(sma, q); 1166 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1167 } 1168 1169 list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { 1170 unlink_queue(sma, q); 1171 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1172 } 1173 for (i = 0; i < sma->sem_nsems; i++) { 1174 struct sem *sem = &sma->sems[i]; 1175 list_for_each_entry_safe(q, tq, &sem->pending_const, list) { 1176 unlink_queue(sma, q); 1177 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1178 } 1179 list_for_each_entry_safe(q, tq, &sem->pending_alter, list) { 1180 unlink_queue(sma, q); 1181 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1182 } 1183 ipc_update_pid(&sem->sempid, NULL); 1184 } 1185 1186 /* Remove the semaphore set from the IDR */ 1187 sem_rmid(ns, sma); 1188 sem_unlock(sma, -1); 1189 rcu_read_unlock(); 1190 1191 wake_up_q(&wake_q); 1192 ns->used_sems -= sma->sem_nsems; 1193 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1194 } 1195 1196 static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) 1197 { 1198 switch (version) { 1199 case IPC_64: 1200 return copy_to_user(buf, in, sizeof(*in)); 1201 case IPC_OLD: 1202 { 1203 struct semid_ds out; 1204 1205 memset(&out, 0, sizeof(out)); 1206 1207 ipc64_perm_to_ipc_perm(&in->sem_perm, &out.sem_perm); 1208 1209 out.sem_otime = in->sem_otime; 1210 out.sem_ctime = in->sem_ctime; 1211 out.sem_nsems = in->sem_nsems; 1212 1213 return copy_to_user(buf, &out, sizeof(out)); 1214 } 1215 default: 1216 return -EINVAL; 1217 } 1218 } 1219 1220 static time64_t get_semotime(struct sem_array *sma) 1221 { 1222 int i; 1223 time64_t res; 1224 1225 res = sma->sems[0].sem_otime; 1226 for (i = 1; i < sma->sem_nsems; i++) { 1227 time64_t to = sma->sems[i].sem_otime; 1228 1229 if (to > res) 1230 res = to; 1231 } 1232 return res; 1233 } 1234 1235 static int semctl_stat(struct ipc_namespace *ns, int semid, 1236 int cmd, struct semid64_ds *semid64) 1237 { 1238 struct sem_array *sma; 1239 time64_t semotime; 1240 int err; 1241 1242 memset(semid64, 0, sizeof(*semid64)); 1243 1244 rcu_read_lock(); 1245 if (cmd == SEM_STAT || cmd == SEM_STAT_ANY) { 1246 sma = sem_obtain_object(ns, semid); 1247 if (IS_ERR(sma)) { 1248 err = PTR_ERR(sma); 1249 goto out_unlock; 1250 } 1251 } else { /* IPC_STAT */ 1252 sma = sem_obtain_object_check(ns, semid); 1253 if (IS_ERR(sma)) { 1254 err = PTR_ERR(sma); 1255 goto out_unlock; 1256 } 1257 } 1258 1259 /* see comment for SHM_STAT_ANY */ 1260 if (cmd == SEM_STAT_ANY) 1261 audit_ipc_obj(&sma->sem_perm); 1262 else { 1263 err = -EACCES; 1264 if (ipcperms(ns, &sma->sem_perm, S_IRUGO)) 1265 goto out_unlock; 1266 } 1267 1268 err = security_sem_semctl(&sma->sem_perm, cmd); 1269 if (err) 1270 goto out_unlock; 1271 1272 ipc_lock_object(&sma->sem_perm); 1273 1274 if (!ipc_valid_object(&sma->sem_perm)) { 1275 ipc_unlock_object(&sma->sem_perm); 1276 err = -EIDRM; 1277 goto out_unlock; 1278 } 1279 1280 kernel_to_ipc64_perm(&sma->sem_perm, &semid64->sem_perm); 1281 semotime = get_semotime(sma); 1282 semid64->sem_otime = semotime; 1283 semid64->sem_ctime = sma->sem_ctime; 1284 #ifndef CONFIG_64BIT 1285 semid64->sem_otime_high = semotime >> 32; 1286 semid64->sem_ctime_high = sma->sem_ctime >> 32; 1287 #endif 1288 semid64->sem_nsems = sma->sem_nsems; 1289 1290 if (cmd == IPC_STAT) { 1291 /* 1292 * As defined in SUS: 1293 * Return 0 on success 1294 */ 1295 err = 0; 1296 } else { 1297 /* 1298 * SEM_STAT and SEM_STAT_ANY (both Linux specific) 1299 * Return the full id, including the sequence number 1300 */ 1301 err = sma->sem_perm.id; 1302 } 1303 ipc_unlock_object(&sma->sem_perm); 1304 out_unlock: 1305 rcu_read_unlock(); 1306 return err; 1307 } 1308 1309 static int semctl_info(struct ipc_namespace *ns, int semid, 1310 int cmd, void __user *p) 1311 { 1312 struct seminfo seminfo; 1313 int max_idx; 1314 int err; 1315 1316 err = security_sem_semctl(NULL, cmd); 1317 if (err) 1318 return err; 1319 1320 memset(&seminfo, 0, sizeof(seminfo)); 1321 seminfo.semmni = ns->sc_semmni; 1322 seminfo.semmns = ns->sc_semmns; 1323 seminfo.semmsl = ns->sc_semmsl; 1324 seminfo.semopm = ns->sc_semopm; 1325 seminfo.semvmx = SEMVMX; 1326 seminfo.semmnu = SEMMNU; 1327 seminfo.semmap = SEMMAP; 1328 seminfo.semume = SEMUME; 1329 down_read(&sem_ids(ns).rwsem); 1330 if (cmd == SEM_INFO) { 1331 seminfo.semusz = sem_ids(ns).in_use; 1332 seminfo.semaem = ns->used_sems; 1333 } else { 1334 seminfo.semusz = SEMUSZ; 1335 seminfo.semaem = SEMAEM; 1336 } 1337 max_idx = ipc_get_maxidx(&sem_ids(ns)); 1338 up_read(&sem_ids(ns).rwsem); 1339 if (copy_to_user(p, &seminfo, sizeof(struct seminfo))) 1340 return -EFAULT; 1341 return (max_idx < 0) ? 0 : max_idx; 1342 } 1343 1344 static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, 1345 int val) 1346 { 1347 struct sem_undo *un; 1348 struct sem_array *sma; 1349 struct sem *curr; 1350 int err; 1351 DEFINE_WAKE_Q(wake_q); 1352 1353 if (val > SEMVMX || val < 0) 1354 return -ERANGE; 1355 1356 rcu_read_lock(); 1357 sma = sem_obtain_object_check(ns, semid); 1358 if (IS_ERR(sma)) { 1359 rcu_read_unlock(); 1360 return PTR_ERR(sma); 1361 } 1362 1363 if (semnum < 0 || semnum >= sma->sem_nsems) { 1364 rcu_read_unlock(); 1365 return -EINVAL; 1366 } 1367 1368 1369 if (ipcperms(ns, &sma->sem_perm, S_IWUGO)) { 1370 rcu_read_unlock(); 1371 return -EACCES; 1372 } 1373 1374 err = security_sem_semctl(&sma->sem_perm, SETVAL); 1375 if (err) { 1376 rcu_read_unlock(); 1377 return -EACCES; 1378 } 1379 1380 sem_lock(sma, NULL, -1); 1381 1382 if (!ipc_valid_object(&sma->sem_perm)) { 1383 sem_unlock(sma, -1); 1384 rcu_read_unlock(); 1385 return -EIDRM; 1386 } 1387 1388 semnum = array_index_nospec(semnum, sma->sem_nsems); 1389 curr = &sma->sems[semnum]; 1390 1391 ipc_assert_locked_object(&sma->sem_perm); 1392 list_for_each_entry(un, &sma->list_id, list_id) 1393 un->semadj[semnum] = 0; 1394 1395 curr->semval = val; 1396 ipc_update_pid(&curr->sempid, task_tgid(current)); 1397 sma->sem_ctime = ktime_get_real_seconds(); 1398 /* maybe some queued-up processes were waiting for this */ 1399 do_smart_update(sma, NULL, 0, 0, &wake_q); 1400 sem_unlock(sma, -1); 1401 rcu_read_unlock(); 1402 wake_up_q(&wake_q); 1403 return 0; 1404 } 1405 1406 static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, 1407 int cmd, void __user *p) 1408 { 1409 struct sem_array *sma; 1410 struct sem *curr; 1411 int err, nsems; 1412 ushort fast_sem_io[SEMMSL_FAST]; 1413 ushort *sem_io = fast_sem_io; 1414 DEFINE_WAKE_Q(wake_q); 1415 1416 rcu_read_lock(); 1417 sma = sem_obtain_object_check(ns, semid); 1418 if (IS_ERR(sma)) { 1419 rcu_read_unlock(); 1420 return PTR_ERR(sma); 1421 } 1422 1423 nsems = sma->sem_nsems; 1424 1425 err = -EACCES; 1426 if (ipcperms(ns, &sma->sem_perm, cmd == SETALL ? S_IWUGO : S_IRUGO)) 1427 goto out_rcu_wakeup; 1428 1429 err = security_sem_semctl(&sma->sem_perm, cmd); 1430 if (err) 1431 goto out_rcu_wakeup; 1432 1433 err = -EACCES; 1434 switch (cmd) { 1435 case GETALL: 1436 { 1437 ushort __user *array = p; 1438 int i; 1439 1440 sem_lock(sma, NULL, -1); 1441 if (!ipc_valid_object(&sma->sem_perm)) { 1442 err = -EIDRM; 1443 goto out_unlock; 1444 } 1445 if (nsems > SEMMSL_FAST) { 1446 if (!ipc_rcu_getref(&sma->sem_perm)) { 1447 err = -EIDRM; 1448 goto out_unlock; 1449 } 1450 sem_unlock(sma, -1); 1451 rcu_read_unlock(); 1452 sem_io = kvmalloc_array(nsems, sizeof(ushort), 1453 GFP_KERNEL); 1454 if (sem_io == NULL) { 1455 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1456 return -ENOMEM; 1457 } 1458 1459 rcu_read_lock(); 1460 sem_lock_and_putref(sma); 1461 if (!ipc_valid_object(&sma->sem_perm)) { 1462 err = -EIDRM; 1463 goto out_unlock; 1464 } 1465 } 1466 for (i = 0; i < sma->sem_nsems; i++) 1467 sem_io[i] = sma->sems[i].semval; 1468 sem_unlock(sma, -1); 1469 rcu_read_unlock(); 1470 err = 0; 1471 if (copy_to_user(array, sem_io, nsems*sizeof(ushort))) 1472 err = -EFAULT; 1473 goto out_free; 1474 } 1475 case SETALL: 1476 { 1477 int i; 1478 struct sem_undo *un; 1479 1480 if (!ipc_rcu_getref(&sma->sem_perm)) { 1481 err = -EIDRM; 1482 goto out_rcu_wakeup; 1483 } 1484 rcu_read_unlock(); 1485 1486 if (nsems > SEMMSL_FAST) { 1487 sem_io = kvmalloc_array(nsems, sizeof(ushort), 1488 GFP_KERNEL); 1489 if (sem_io == NULL) { 1490 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1491 return -ENOMEM; 1492 } 1493 } 1494 1495 if (copy_from_user(sem_io, p, nsems*sizeof(ushort))) { 1496 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1497 err = -EFAULT; 1498 goto out_free; 1499 } 1500 1501 for (i = 0; i < nsems; i++) { 1502 if (sem_io[i] > SEMVMX) { 1503 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1504 err = -ERANGE; 1505 goto out_free; 1506 } 1507 } 1508 rcu_read_lock(); 1509 sem_lock_and_putref(sma); 1510 if (!ipc_valid_object(&sma->sem_perm)) { 1511 err = -EIDRM; 1512 goto out_unlock; 1513 } 1514 1515 for (i = 0; i < nsems; i++) { 1516 sma->sems[i].semval = sem_io[i]; 1517 ipc_update_pid(&sma->sems[i].sempid, task_tgid(current)); 1518 } 1519 1520 ipc_assert_locked_object(&sma->sem_perm); 1521 list_for_each_entry(un, &sma->list_id, list_id) { 1522 for (i = 0; i < nsems; i++) 1523 un->semadj[i] = 0; 1524 } 1525 sma->sem_ctime = ktime_get_real_seconds(); 1526 /* maybe some queued-up processes were waiting for this */ 1527 do_smart_update(sma, NULL, 0, 0, &wake_q); 1528 err = 0; 1529 goto out_unlock; 1530 } 1531 /* GETVAL, GETPID, GETNCTN, GETZCNT: fall-through */ 1532 } 1533 err = -EINVAL; 1534 if (semnum < 0 || semnum >= nsems) 1535 goto out_rcu_wakeup; 1536 1537 sem_lock(sma, NULL, -1); 1538 if (!ipc_valid_object(&sma->sem_perm)) { 1539 err = -EIDRM; 1540 goto out_unlock; 1541 } 1542 1543 semnum = array_index_nospec(semnum, nsems); 1544 curr = &sma->sems[semnum]; 1545 1546 switch (cmd) { 1547 case GETVAL: 1548 err = curr->semval; 1549 goto out_unlock; 1550 case GETPID: 1551 err = pid_vnr(curr->sempid); 1552 goto out_unlock; 1553 case GETNCNT: 1554 err = count_semcnt(sma, semnum, 0); 1555 goto out_unlock; 1556 case GETZCNT: 1557 err = count_semcnt(sma, semnum, 1); 1558 goto out_unlock; 1559 } 1560 1561 out_unlock: 1562 sem_unlock(sma, -1); 1563 out_rcu_wakeup: 1564 rcu_read_unlock(); 1565 wake_up_q(&wake_q); 1566 out_free: 1567 if (sem_io != fast_sem_io) 1568 kvfree(sem_io); 1569 return err; 1570 } 1571 1572 static inline unsigned long 1573 copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version) 1574 { 1575 switch (version) { 1576 case IPC_64: 1577 if (copy_from_user(out, buf, sizeof(*out))) 1578 return -EFAULT; 1579 return 0; 1580 case IPC_OLD: 1581 { 1582 struct semid_ds tbuf_old; 1583 1584 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 1585 return -EFAULT; 1586 1587 out->sem_perm.uid = tbuf_old.sem_perm.uid; 1588 out->sem_perm.gid = tbuf_old.sem_perm.gid; 1589 out->sem_perm.mode = tbuf_old.sem_perm.mode; 1590 1591 return 0; 1592 } 1593 default: 1594 return -EINVAL; 1595 } 1596 } 1597 1598 /* 1599 * This function handles some semctl commands which require the rwsem 1600 * to be held in write mode. 1601 * NOTE: no locks must be held, the rwsem is taken inside this function. 1602 */ 1603 static int semctl_down(struct ipc_namespace *ns, int semid, 1604 int cmd, struct semid64_ds *semid64) 1605 { 1606 struct sem_array *sma; 1607 int err; 1608 struct kern_ipc_perm *ipcp; 1609 1610 down_write(&sem_ids(ns).rwsem); 1611 rcu_read_lock(); 1612 1613 ipcp = ipcctl_obtain_check(ns, &sem_ids(ns), semid, cmd, 1614 &semid64->sem_perm, 0); 1615 if (IS_ERR(ipcp)) { 1616 err = PTR_ERR(ipcp); 1617 goto out_unlock1; 1618 } 1619 1620 sma = container_of(ipcp, struct sem_array, sem_perm); 1621 1622 err = security_sem_semctl(&sma->sem_perm, cmd); 1623 if (err) 1624 goto out_unlock1; 1625 1626 switch (cmd) { 1627 case IPC_RMID: 1628 sem_lock(sma, NULL, -1); 1629 /* freeary unlocks the ipc object and rcu */ 1630 freeary(ns, ipcp); 1631 goto out_up; 1632 case IPC_SET: 1633 sem_lock(sma, NULL, -1); 1634 err = ipc_update_perm(&semid64->sem_perm, ipcp); 1635 if (err) 1636 goto out_unlock0; 1637 sma->sem_ctime = ktime_get_real_seconds(); 1638 break; 1639 default: 1640 err = -EINVAL; 1641 goto out_unlock1; 1642 } 1643 1644 out_unlock0: 1645 sem_unlock(sma, -1); 1646 out_unlock1: 1647 rcu_read_unlock(); 1648 out_up: 1649 up_write(&sem_ids(ns).rwsem); 1650 return err; 1651 } 1652 1653 static long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg, int version) 1654 { 1655 struct ipc_namespace *ns; 1656 void __user *p = (void __user *)arg; 1657 struct semid64_ds semid64; 1658 int err; 1659 1660 if (semid < 0) 1661 return -EINVAL; 1662 1663 ns = current->nsproxy->ipc_ns; 1664 1665 switch (cmd) { 1666 case IPC_INFO: 1667 case SEM_INFO: 1668 return semctl_info(ns, semid, cmd, p); 1669 case IPC_STAT: 1670 case SEM_STAT: 1671 case SEM_STAT_ANY: 1672 err = semctl_stat(ns, semid, cmd, &semid64); 1673 if (err < 0) 1674 return err; 1675 if (copy_semid_to_user(p, &semid64, version)) 1676 err = -EFAULT; 1677 return err; 1678 case GETALL: 1679 case GETVAL: 1680 case GETPID: 1681 case GETNCNT: 1682 case GETZCNT: 1683 case SETALL: 1684 return semctl_main(ns, semid, semnum, cmd, p); 1685 case SETVAL: { 1686 int val; 1687 #if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN) 1688 /* big-endian 64bit */ 1689 val = arg >> 32; 1690 #else 1691 /* 32bit or little-endian 64bit */ 1692 val = arg; 1693 #endif 1694 return semctl_setval(ns, semid, semnum, val); 1695 } 1696 case IPC_SET: 1697 if (copy_semid_from_user(&semid64, p, version)) 1698 return -EFAULT; 1699 fallthrough; 1700 case IPC_RMID: 1701 return semctl_down(ns, semid, cmd, &semid64); 1702 default: 1703 return -EINVAL; 1704 } 1705 } 1706 1707 SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg) 1708 { 1709 return ksys_semctl(semid, semnum, cmd, arg, IPC_64); 1710 } 1711 1712 #ifdef CONFIG_ARCH_WANT_IPC_PARSE_VERSION 1713 long ksys_old_semctl(int semid, int semnum, int cmd, unsigned long arg) 1714 { 1715 int version = ipc_parse_version(&cmd); 1716 1717 return ksys_semctl(semid, semnum, cmd, arg, version); 1718 } 1719 1720 SYSCALL_DEFINE4(old_semctl, int, semid, int, semnum, int, cmd, unsigned long, arg) 1721 { 1722 return ksys_old_semctl(semid, semnum, cmd, arg); 1723 } 1724 #endif 1725 1726 #ifdef CONFIG_COMPAT 1727 1728 struct compat_semid_ds { 1729 struct compat_ipc_perm sem_perm; 1730 old_time32_t sem_otime; 1731 old_time32_t sem_ctime; 1732 compat_uptr_t sem_base; 1733 compat_uptr_t sem_pending; 1734 compat_uptr_t sem_pending_last; 1735 compat_uptr_t undo; 1736 unsigned short sem_nsems; 1737 }; 1738 1739 static int copy_compat_semid_from_user(struct semid64_ds *out, void __user *buf, 1740 int version) 1741 { 1742 memset(out, 0, sizeof(*out)); 1743 if (version == IPC_64) { 1744 struct compat_semid64_ds __user *p = buf; 1745 return get_compat_ipc64_perm(&out->sem_perm, &p->sem_perm); 1746 } else { 1747 struct compat_semid_ds __user *p = buf; 1748 return get_compat_ipc_perm(&out->sem_perm, &p->sem_perm); 1749 } 1750 } 1751 1752 static int copy_compat_semid_to_user(void __user *buf, struct semid64_ds *in, 1753 int version) 1754 { 1755 if (version == IPC_64) { 1756 struct compat_semid64_ds v; 1757 memset(&v, 0, sizeof(v)); 1758 to_compat_ipc64_perm(&v.sem_perm, &in->sem_perm); 1759 v.sem_otime = lower_32_bits(in->sem_otime); 1760 v.sem_otime_high = upper_32_bits(in->sem_otime); 1761 v.sem_ctime = lower_32_bits(in->sem_ctime); 1762 v.sem_ctime_high = upper_32_bits(in->sem_ctime); 1763 v.sem_nsems = in->sem_nsems; 1764 return copy_to_user(buf, &v, sizeof(v)); 1765 } else { 1766 struct compat_semid_ds v; 1767 memset(&v, 0, sizeof(v)); 1768 to_compat_ipc_perm(&v.sem_perm, &in->sem_perm); 1769 v.sem_otime = in->sem_otime; 1770 v.sem_ctime = in->sem_ctime; 1771 v.sem_nsems = in->sem_nsems; 1772 return copy_to_user(buf, &v, sizeof(v)); 1773 } 1774 } 1775 1776 static long compat_ksys_semctl(int semid, int semnum, int cmd, int arg, int version) 1777 { 1778 void __user *p = compat_ptr(arg); 1779 struct ipc_namespace *ns; 1780 struct semid64_ds semid64; 1781 int err; 1782 1783 ns = current->nsproxy->ipc_ns; 1784 1785 if (semid < 0) 1786 return -EINVAL; 1787 1788 switch (cmd & (~IPC_64)) { 1789 case IPC_INFO: 1790 case SEM_INFO: 1791 return semctl_info(ns, semid, cmd, p); 1792 case IPC_STAT: 1793 case SEM_STAT: 1794 case SEM_STAT_ANY: 1795 err = semctl_stat(ns, semid, cmd, &semid64); 1796 if (err < 0) 1797 return err; 1798 if (copy_compat_semid_to_user(p, &semid64, version)) 1799 err = -EFAULT; 1800 return err; 1801 case GETVAL: 1802 case GETPID: 1803 case GETNCNT: 1804 case GETZCNT: 1805 case GETALL: 1806 case SETALL: 1807 return semctl_main(ns, semid, semnum, cmd, p); 1808 case SETVAL: 1809 return semctl_setval(ns, semid, semnum, arg); 1810 case IPC_SET: 1811 if (copy_compat_semid_from_user(&semid64, p, version)) 1812 return -EFAULT; 1813 fallthrough; 1814 case IPC_RMID: 1815 return semctl_down(ns, semid, cmd, &semid64); 1816 default: 1817 return -EINVAL; 1818 } 1819 } 1820 1821 COMPAT_SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, int, arg) 1822 { 1823 return compat_ksys_semctl(semid, semnum, cmd, arg, IPC_64); 1824 } 1825 1826 #ifdef CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION 1827 long compat_ksys_old_semctl(int semid, int semnum, int cmd, int arg) 1828 { 1829 int version = compat_ipc_parse_version(&cmd); 1830 1831 return compat_ksys_semctl(semid, semnum, cmd, arg, version); 1832 } 1833 1834 COMPAT_SYSCALL_DEFINE4(old_semctl, int, semid, int, semnum, int, cmd, int, arg) 1835 { 1836 return compat_ksys_old_semctl(semid, semnum, cmd, arg); 1837 } 1838 #endif 1839 #endif 1840 1841 /* If the task doesn't already have a undo_list, then allocate one 1842 * here. We guarantee there is only one thread using this undo list, 1843 * and current is THE ONE 1844 * 1845 * If this allocation and assignment succeeds, but later 1846 * portions of this code fail, there is no need to free the sem_undo_list. 1847 * Just let it stay associated with the task, and it'll be freed later 1848 * at exit time. 1849 * 1850 * This can block, so callers must hold no locks. 1851 */ 1852 static inline int get_undo_list(struct sem_undo_list **undo_listp) 1853 { 1854 struct sem_undo_list *undo_list; 1855 1856 undo_list = current->sysvsem.undo_list; 1857 if (!undo_list) { 1858 undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL_ACCOUNT); 1859 if (undo_list == NULL) 1860 return -ENOMEM; 1861 spin_lock_init(&undo_list->lock); 1862 refcount_set(&undo_list->refcnt, 1); 1863 INIT_LIST_HEAD(&undo_list->list_proc); 1864 1865 current->sysvsem.undo_list = undo_list; 1866 } 1867 *undo_listp = undo_list; 1868 return 0; 1869 } 1870 1871 static struct sem_undo *__lookup_undo(struct sem_undo_list *ulp, int semid) 1872 { 1873 struct sem_undo *un; 1874 1875 list_for_each_entry_rcu(un, &ulp->list_proc, list_proc, 1876 spin_is_locked(&ulp->lock)) { 1877 if (un->semid == semid) 1878 return un; 1879 } 1880 return NULL; 1881 } 1882 1883 static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid) 1884 { 1885 struct sem_undo *un; 1886 1887 assert_spin_locked(&ulp->lock); 1888 1889 un = __lookup_undo(ulp, semid); 1890 if (un) { 1891 list_del_rcu(&un->list_proc); 1892 list_add_rcu(&un->list_proc, &ulp->list_proc); 1893 } 1894 return un; 1895 } 1896 1897 /** 1898 * find_alloc_undo - lookup (and if not present create) undo array 1899 * @ns: namespace 1900 * @semid: semaphore array id 1901 * 1902 * The function looks up (and if not present creates) the undo structure. 1903 * The size of the undo structure depends on the size of the semaphore 1904 * array, thus the alloc path is not that straightforward. 1905 * Lifetime-rules: sem_undo is rcu-protected, on success, the function 1906 * performs a rcu_read_lock(). 1907 */ 1908 static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) 1909 { 1910 struct sem_array *sma; 1911 struct sem_undo_list *ulp; 1912 struct sem_undo *un, *new; 1913 int nsems, error; 1914 1915 error = get_undo_list(&ulp); 1916 if (error) 1917 return ERR_PTR(error); 1918 1919 rcu_read_lock(); 1920 spin_lock(&ulp->lock); 1921 un = lookup_undo(ulp, semid); 1922 spin_unlock(&ulp->lock); 1923 if (likely(un != NULL)) 1924 goto out; 1925 1926 /* no undo structure around - allocate one. */ 1927 /* step 1: figure out the size of the semaphore array */ 1928 sma = sem_obtain_object_check(ns, semid); 1929 if (IS_ERR(sma)) { 1930 rcu_read_unlock(); 1931 return ERR_CAST(sma); 1932 } 1933 1934 nsems = sma->sem_nsems; 1935 if (!ipc_rcu_getref(&sma->sem_perm)) { 1936 rcu_read_unlock(); 1937 un = ERR_PTR(-EIDRM); 1938 goto out; 1939 } 1940 rcu_read_unlock(); 1941 1942 /* step 2: allocate new undo structure */ 1943 new = kvzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, 1944 GFP_KERNEL_ACCOUNT); 1945 if (!new) { 1946 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1947 return ERR_PTR(-ENOMEM); 1948 } 1949 1950 /* step 3: Acquire the lock on semaphore array */ 1951 rcu_read_lock(); 1952 sem_lock_and_putref(sma); 1953 if (!ipc_valid_object(&sma->sem_perm)) { 1954 sem_unlock(sma, -1); 1955 rcu_read_unlock(); 1956 kvfree(new); 1957 un = ERR_PTR(-EIDRM); 1958 goto out; 1959 } 1960 spin_lock(&ulp->lock); 1961 1962 /* 1963 * step 4: check for races: did someone else allocate the undo struct? 1964 */ 1965 un = lookup_undo(ulp, semid); 1966 if (un) { 1967 kvfree(new); 1968 goto success; 1969 } 1970 /* step 5: initialize & link new undo structure */ 1971 new->semadj = (short *) &new[1]; 1972 new->ulp = ulp; 1973 new->semid = semid; 1974 assert_spin_locked(&ulp->lock); 1975 list_add_rcu(&new->list_proc, &ulp->list_proc); 1976 ipc_assert_locked_object(&sma->sem_perm); 1977 list_add(&new->list_id, &sma->list_id); 1978 un = new; 1979 1980 success: 1981 spin_unlock(&ulp->lock); 1982 sem_unlock(sma, -1); 1983 out: 1984 return un; 1985 } 1986 1987 long __do_semtimedop(int semid, struct sembuf *sops, 1988 unsigned nsops, const struct timespec64 *timeout, 1989 struct ipc_namespace *ns) 1990 { 1991 int error = -EINVAL; 1992 struct sem_array *sma; 1993 struct sembuf *sop; 1994 struct sem_undo *un; 1995 int max, locknum; 1996 bool undos = false, alter = false, dupsop = false; 1997 struct sem_queue queue; 1998 unsigned long dup = 0, jiffies_left = 0; 1999 2000 if (nsops < 1 || semid < 0) 2001 return -EINVAL; 2002 if (nsops > ns->sc_semopm) 2003 return -E2BIG; 2004 2005 if (timeout) { 2006 if (timeout->tv_sec < 0 || timeout->tv_nsec < 0 || 2007 timeout->tv_nsec >= 1000000000L) { 2008 error = -EINVAL; 2009 goto out; 2010 } 2011 jiffies_left = timespec64_to_jiffies(timeout); 2012 } 2013 2014 2015 max = 0; 2016 for (sop = sops; sop < sops + nsops; sop++) { 2017 unsigned long mask = 1ULL << ((sop->sem_num) % BITS_PER_LONG); 2018 2019 if (sop->sem_num >= max) 2020 max = sop->sem_num; 2021 if (sop->sem_flg & SEM_UNDO) 2022 undos = true; 2023 if (dup & mask) { 2024 /* 2025 * There was a previous alter access that appears 2026 * to have accessed the same semaphore, thus use 2027 * the dupsop logic. "appears", because the detection 2028 * can only check % BITS_PER_LONG. 2029 */ 2030 dupsop = true; 2031 } 2032 if (sop->sem_op != 0) { 2033 alter = true; 2034 dup |= mask; 2035 } 2036 } 2037 2038 if (undos) { 2039 /* On success, find_alloc_undo takes the rcu_read_lock */ 2040 un = find_alloc_undo(ns, semid); 2041 if (IS_ERR(un)) { 2042 error = PTR_ERR(un); 2043 goto out; 2044 } 2045 } else { 2046 un = NULL; 2047 rcu_read_lock(); 2048 } 2049 2050 sma = sem_obtain_object_check(ns, semid); 2051 if (IS_ERR(sma)) { 2052 rcu_read_unlock(); 2053 error = PTR_ERR(sma); 2054 goto out; 2055 } 2056 2057 error = -EFBIG; 2058 if (max >= sma->sem_nsems) { 2059 rcu_read_unlock(); 2060 goto out; 2061 } 2062 2063 error = -EACCES; 2064 if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) { 2065 rcu_read_unlock(); 2066 goto out; 2067 } 2068 2069 error = security_sem_semop(&sma->sem_perm, sops, nsops, alter); 2070 if (error) { 2071 rcu_read_unlock(); 2072 goto out; 2073 } 2074 2075 error = -EIDRM; 2076 locknum = sem_lock(sma, sops, nsops); 2077 /* 2078 * We eventually might perform the following check in a lockless 2079 * fashion, considering ipc_valid_object() locking constraints. 2080 * If nsops == 1 and there is no contention for sem_perm.lock, then 2081 * only a per-semaphore lock is held and it's OK to proceed with the 2082 * check below. More details on the fine grained locking scheme 2083 * entangled here and why it's RMID race safe on comments at sem_lock() 2084 */ 2085 if (!ipc_valid_object(&sma->sem_perm)) 2086 goto out_unlock; 2087 /* 2088 * semid identifiers are not unique - find_alloc_undo may have 2089 * allocated an undo structure, it was invalidated by an RMID 2090 * and now a new array with received the same id. Check and fail. 2091 * This case can be detected checking un->semid. The existence of 2092 * "un" itself is guaranteed by rcu. 2093 */ 2094 if (un && un->semid == -1) 2095 goto out_unlock; 2096 2097 queue.sops = sops; 2098 queue.nsops = nsops; 2099 queue.undo = un; 2100 queue.pid = task_tgid(current); 2101 queue.alter = alter; 2102 queue.dupsop = dupsop; 2103 2104 error = perform_atomic_semop(sma, &queue); 2105 if (error == 0) { /* non-blocking successful path */ 2106 DEFINE_WAKE_Q(wake_q); 2107 2108 /* 2109 * If the operation was successful, then do 2110 * the required updates. 2111 */ 2112 if (alter) 2113 do_smart_update(sma, sops, nsops, 1, &wake_q); 2114 else 2115 set_semotime(sma, sops); 2116 2117 sem_unlock(sma, locknum); 2118 rcu_read_unlock(); 2119 wake_up_q(&wake_q); 2120 2121 goto out; 2122 } 2123 if (error < 0) /* non-blocking error path */ 2124 goto out_unlock; 2125 2126 /* 2127 * We need to sleep on this operation, so we put the current 2128 * task into the pending queue and go to sleep. 2129 */ 2130 if (nsops == 1) { 2131 struct sem *curr; 2132 int idx = array_index_nospec(sops->sem_num, sma->sem_nsems); 2133 curr = &sma->sems[idx]; 2134 2135 if (alter) { 2136 if (sma->complex_count) { 2137 list_add_tail(&queue.list, 2138 &sma->pending_alter); 2139 } else { 2140 2141 list_add_tail(&queue.list, 2142 &curr->pending_alter); 2143 } 2144 } else { 2145 list_add_tail(&queue.list, &curr->pending_const); 2146 } 2147 } else { 2148 if (!sma->complex_count) 2149 merge_queues(sma); 2150 2151 if (alter) 2152 list_add_tail(&queue.list, &sma->pending_alter); 2153 else 2154 list_add_tail(&queue.list, &sma->pending_const); 2155 2156 sma->complex_count++; 2157 } 2158 2159 do { 2160 /* memory ordering ensured by the lock in sem_lock() */ 2161 WRITE_ONCE(queue.status, -EINTR); 2162 queue.sleeper = current; 2163 2164 /* memory ordering is ensured by the lock in sem_lock() */ 2165 __set_current_state(TASK_INTERRUPTIBLE); 2166 sem_unlock(sma, locknum); 2167 rcu_read_unlock(); 2168 2169 if (timeout) 2170 jiffies_left = schedule_timeout(jiffies_left); 2171 else 2172 schedule(); 2173 2174 /* 2175 * fastpath: the semop has completed, either successfully or 2176 * not, from the syscall pov, is quite irrelevant to us at this 2177 * point; we're done. 2178 * 2179 * We _do_ care, nonetheless, about being awoken by a signal or 2180 * spuriously. The queue.status is checked again in the 2181 * slowpath (aka after taking sem_lock), such that we can detect 2182 * scenarios where we were awakened externally, during the 2183 * window between wake_q_add() and wake_up_q(). 2184 */ 2185 error = READ_ONCE(queue.status); 2186 if (error != -EINTR) { 2187 /* see SEM_BARRIER_2 for purpose/pairing */ 2188 smp_acquire__after_ctrl_dep(); 2189 goto out; 2190 } 2191 2192 rcu_read_lock(); 2193 locknum = sem_lock(sma, sops, nsops); 2194 2195 if (!ipc_valid_object(&sma->sem_perm)) 2196 goto out_unlock; 2197 2198 /* 2199 * No necessity for any barrier: We are protect by sem_lock() 2200 */ 2201 error = READ_ONCE(queue.status); 2202 2203 /* 2204 * If queue.status != -EINTR we are woken up by another process. 2205 * Leave without unlink_queue(), but with sem_unlock(). 2206 */ 2207 if (error != -EINTR) 2208 goto out_unlock; 2209 2210 /* 2211 * If an interrupt occurred we have to clean up the queue. 2212 */ 2213 if (timeout && jiffies_left == 0) 2214 error = -EAGAIN; 2215 } while (error == -EINTR && !signal_pending(current)); /* spurious */ 2216 2217 unlink_queue(sma, &queue); 2218 2219 out_unlock: 2220 sem_unlock(sma, locknum); 2221 rcu_read_unlock(); 2222 out: 2223 return error; 2224 } 2225 2226 static long do_semtimedop(int semid, struct sembuf __user *tsops, 2227 unsigned nsops, const struct timespec64 *timeout) 2228 { 2229 struct sembuf fast_sops[SEMOPM_FAST]; 2230 struct sembuf *sops = fast_sops; 2231 struct ipc_namespace *ns; 2232 int ret; 2233 2234 ns = current->nsproxy->ipc_ns; 2235 if (nsops > ns->sc_semopm) 2236 return -E2BIG; 2237 if (nsops < 1) 2238 return -EINVAL; 2239 2240 if (nsops > SEMOPM_FAST) { 2241 sops = kvmalloc_array(nsops, sizeof(*sops), GFP_KERNEL); 2242 if (sops == NULL) 2243 return -ENOMEM; 2244 } 2245 2246 if (copy_from_user(sops, tsops, nsops * sizeof(*tsops))) { 2247 ret = -EFAULT; 2248 goto out_free; 2249 } 2250 2251 ret = __do_semtimedop(semid, sops, nsops, timeout, ns); 2252 2253 out_free: 2254 if (sops != fast_sops) 2255 kvfree(sops); 2256 2257 return ret; 2258 } 2259 2260 long ksys_semtimedop(int semid, struct sembuf __user *tsops, 2261 unsigned int nsops, const struct __kernel_timespec __user *timeout) 2262 { 2263 if (timeout) { 2264 struct timespec64 ts; 2265 if (get_timespec64(&ts, timeout)) 2266 return -EFAULT; 2267 return do_semtimedop(semid, tsops, nsops, &ts); 2268 } 2269 return do_semtimedop(semid, tsops, nsops, NULL); 2270 } 2271 2272 SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, 2273 unsigned int, nsops, const struct __kernel_timespec __user *, timeout) 2274 { 2275 return ksys_semtimedop(semid, tsops, nsops, timeout); 2276 } 2277 2278 #ifdef CONFIG_COMPAT_32BIT_TIME 2279 long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems, 2280 unsigned int nsops, 2281 const struct old_timespec32 __user *timeout) 2282 { 2283 if (timeout) { 2284 struct timespec64 ts; 2285 if (get_old_timespec32(&ts, timeout)) 2286 return -EFAULT; 2287 return do_semtimedop(semid, tsems, nsops, &ts); 2288 } 2289 return do_semtimedop(semid, tsems, nsops, NULL); 2290 } 2291 2292 SYSCALL_DEFINE4(semtimedop_time32, int, semid, struct sembuf __user *, tsems, 2293 unsigned int, nsops, 2294 const struct old_timespec32 __user *, timeout) 2295 { 2296 return compat_ksys_semtimedop(semid, tsems, nsops, timeout); 2297 } 2298 #endif 2299 2300 SYSCALL_DEFINE3(semop, int, semid, struct sembuf __user *, tsops, 2301 unsigned, nsops) 2302 { 2303 return do_semtimedop(semid, tsops, nsops, NULL); 2304 } 2305 2306 /* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between 2307 * parent and child tasks. 2308 */ 2309 2310 int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) 2311 { 2312 struct sem_undo_list *undo_list; 2313 int error; 2314 2315 if (clone_flags & CLONE_SYSVSEM) { 2316 error = get_undo_list(&undo_list); 2317 if (error) 2318 return error; 2319 refcount_inc(&undo_list->refcnt); 2320 tsk->sysvsem.undo_list = undo_list; 2321 } else 2322 tsk->sysvsem.undo_list = NULL; 2323 2324 return 0; 2325 } 2326 2327 /* 2328 * add semadj values to semaphores, free undo structures. 2329 * undo structures are not freed when semaphore arrays are destroyed 2330 * so some of them may be out of date. 2331 * IMPLEMENTATION NOTE: There is some confusion over whether the 2332 * set of adjustments that needs to be done should be done in an atomic 2333 * manner or not. That is, if we are attempting to decrement the semval 2334 * should we queue up and wait until we can do so legally? 2335 * The original implementation attempted to do this (queue and wait). 2336 * The current implementation does not do so. The POSIX standard 2337 * and SVID should be consulted to determine what behavior is mandated. 2338 */ 2339 void exit_sem(struct task_struct *tsk) 2340 { 2341 struct sem_undo_list *ulp; 2342 2343 ulp = tsk->sysvsem.undo_list; 2344 if (!ulp) 2345 return; 2346 tsk->sysvsem.undo_list = NULL; 2347 2348 if (!refcount_dec_and_test(&ulp->refcnt)) 2349 return; 2350 2351 for (;;) { 2352 struct sem_array *sma; 2353 struct sem_undo *un; 2354 int semid, i; 2355 DEFINE_WAKE_Q(wake_q); 2356 2357 cond_resched(); 2358 2359 rcu_read_lock(); 2360 un = list_entry_rcu(ulp->list_proc.next, 2361 struct sem_undo, list_proc); 2362 if (&un->list_proc == &ulp->list_proc) { 2363 /* 2364 * We must wait for freeary() before freeing this ulp, 2365 * in case we raced with last sem_undo. There is a small 2366 * possibility where we exit while freeary() didn't 2367 * finish unlocking sem_undo_list. 2368 */ 2369 spin_lock(&ulp->lock); 2370 spin_unlock(&ulp->lock); 2371 rcu_read_unlock(); 2372 break; 2373 } 2374 spin_lock(&ulp->lock); 2375 semid = un->semid; 2376 spin_unlock(&ulp->lock); 2377 2378 /* exit_sem raced with IPC_RMID, nothing to do */ 2379 if (semid == -1) { 2380 rcu_read_unlock(); 2381 continue; 2382 } 2383 2384 sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, semid); 2385 /* exit_sem raced with IPC_RMID, nothing to do */ 2386 if (IS_ERR(sma)) { 2387 rcu_read_unlock(); 2388 continue; 2389 } 2390 2391 sem_lock(sma, NULL, -1); 2392 /* exit_sem raced with IPC_RMID, nothing to do */ 2393 if (!ipc_valid_object(&sma->sem_perm)) { 2394 sem_unlock(sma, -1); 2395 rcu_read_unlock(); 2396 continue; 2397 } 2398 un = __lookup_undo(ulp, semid); 2399 if (un == NULL) { 2400 /* exit_sem raced with IPC_RMID+semget() that created 2401 * exactly the same semid. Nothing to do. 2402 */ 2403 sem_unlock(sma, -1); 2404 rcu_read_unlock(); 2405 continue; 2406 } 2407 2408 /* remove un from the linked lists */ 2409 ipc_assert_locked_object(&sma->sem_perm); 2410 list_del(&un->list_id); 2411 2412 spin_lock(&ulp->lock); 2413 list_del_rcu(&un->list_proc); 2414 spin_unlock(&ulp->lock); 2415 2416 /* perform adjustments registered in un */ 2417 for (i = 0; i < sma->sem_nsems; i++) { 2418 struct sem *semaphore = &sma->sems[i]; 2419 if (un->semadj[i]) { 2420 semaphore->semval += un->semadj[i]; 2421 /* 2422 * Range checks of the new semaphore value, 2423 * not defined by sus: 2424 * - Some unices ignore the undo entirely 2425 * (e.g. HP UX 11i 11.22, Tru64 V5.1) 2426 * - some cap the value (e.g. FreeBSD caps 2427 * at 0, but doesn't enforce SEMVMX) 2428 * 2429 * Linux caps the semaphore value, both at 0 2430 * and at SEMVMX. 2431 * 2432 * Manfred <manfred@colorfullife.com> 2433 */ 2434 if (semaphore->semval < 0) 2435 semaphore->semval = 0; 2436 if (semaphore->semval > SEMVMX) 2437 semaphore->semval = SEMVMX; 2438 ipc_update_pid(&semaphore->sempid, task_tgid(current)); 2439 } 2440 } 2441 /* maybe some queued-up processes were waiting for this */ 2442 do_smart_update(sma, NULL, 0, 1, &wake_q); 2443 sem_unlock(sma, -1); 2444 rcu_read_unlock(); 2445 wake_up_q(&wake_q); 2446 2447 kvfree_rcu(un, rcu); 2448 } 2449 kfree(ulp); 2450 } 2451 2452 #ifdef CONFIG_PROC_FS 2453 static int sysvipc_sem_proc_show(struct seq_file *s, void *it) 2454 { 2455 struct user_namespace *user_ns = seq_user_ns(s); 2456 struct kern_ipc_perm *ipcp = it; 2457 struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); 2458 time64_t sem_otime; 2459 2460 /* 2461 * The proc interface isn't aware of sem_lock(), it calls 2462 * ipc_lock_object(), i.e. spin_lock(&sma->sem_perm.lock). 2463 * (in sysvipc_find_ipc) 2464 * In order to stay compatible with sem_lock(), we must 2465 * enter / leave complex_mode. 2466 */ 2467 complexmode_enter(sma); 2468 2469 sem_otime = get_semotime(sma); 2470 2471 seq_printf(s, 2472 "%10d %10d %4o %10u %5u %5u %5u %5u %10llu %10llu\n", 2473 sma->sem_perm.key, 2474 sma->sem_perm.id, 2475 sma->sem_perm.mode, 2476 sma->sem_nsems, 2477 from_kuid_munged(user_ns, sma->sem_perm.uid), 2478 from_kgid_munged(user_ns, sma->sem_perm.gid), 2479 from_kuid_munged(user_ns, sma->sem_perm.cuid), 2480 from_kgid_munged(user_ns, sma->sem_perm.cgid), 2481 sem_otime, 2482 sma->sem_ctime); 2483 2484 complexmode_tryleave(sma); 2485 2486 return 0; 2487 } 2488 #endif 2489