1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/ipc/sem.c 4 * Copyright (C) 1992 Krishna Balasubramanian 5 * Copyright (C) 1995 Eric Schenk, Bruno Haible 6 * 7 * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com> 8 * 9 * SMP-threaded, sysctl's added 10 * (c) 1999 Manfred Spraul <manfred@colorfullife.com> 11 * Enforced range limit on SEM_UNDO 12 * (c) 2001 Red Hat Inc 13 * Lockless wakeup 14 * (c) 2003 Manfred Spraul <manfred@colorfullife.com> 15 * (c) 2016 Davidlohr Bueso <dave@stgolabs.net> 16 * Further wakeup optimizations, documentation 17 * (c) 2010 Manfred Spraul <manfred@colorfullife.com> 18 * 19 * support for audit of ipc object properties and permission changes 20 * Dustin Kirkland <dustin.kirkland@us.ibm.com> 21 * 22 * namespaces support 23 * OpenVZ, SWsoft Inc. 24 * Pavel Emelianov <xemul@openvz.org> 25 * 26 * Implementation notes: (May 2010) 27 * This file implements System V semaphores. 28 * 29 * User space visible behavior: 30 * - FIFO ordering for semop() operations (just FIFO, not starvation 31 * protection) 32 * - multiple semaphore operations that alter the same semaphore in 33 * one semop() are handled. 34 * - sem_ctime (time of last semctl()) is updated in the IPC_SET, SETVAL and 35 * SETALL calls. 36 * - two Linux specific semctl() commands: SEM_STAT, SEM_INFO. 37 * - undo adjustments at process exit are limited to 0..SEMVMX. 38 * - namespace are supported. 39 * - SEMMSL, SEMMNS, SEMOPM and SEMMNI can be configured at runtine by writing 40 * to /proc/sys/kernel/sem. 41 * - statistics about the usage are reported in /proc/sysvipc/sem. 42 * 43 * Internals: 44 * - scalability: 45 * - all global variables are read-mostly. 46 * - semop() calls and semctl(RMID) are synchronized by RCU. 47 * - most operations do write operations (actually: spin_lock calls) to 48 * the per-semaphore array structure. 49 * Thus: Perfect SMP scaling between independent semaphore arrays. 50 * If multiple semaphores in one array are used, then cache line 51 * trashing on the semaphore array spinlock will limit the scaling. 52 * - semncnt and semzcnt are calculated on demand in count_semcnt() 53 * - the task that performs a successful semop() scans the list of all 54 * sleeping tasks and completes any pending operations that can be fulfilled. 55 * Semaphores are actively given to waiting tasks (necessary for FIFO). 56 * (see update_queue()) 57 * - To improve the scalability, the actual wake-up calls are performed after 58 * dropping all locks. (see wake_up_sem_queue_prepare()) 59 * - All work is done by the waker, the woken up task does not have to do 60 * anything - not even acquiring a lock or dropping a refcount. 61 * - A woken up task may not even touch the semaphore array anymore, it may 62 * have been destroyed already by a semctl(RMID). 63 * - UNDO values are stored in an array (one per process and per 64 * semaphore array, lazily allocated). For backwards compatibility, multiple 65 * modes for the UNDO variables are supported (per process, per thread) 66 * (see copy_semundo, CLONE_SYSVSEM) 67 * - There are two lists of the pending operations: a per-array list 68 * and per-semaphore list (stored in the array). This allows to achieve FIFO 69 * ordering without always scanning all pending operations. 70 * The worst-case behavior is nevertheless O(N^2) for N wakeups. 71 */ 72 73 #include <linux/compat.h> 74 #include <linux/slab.h> 75 #include <linux/spinlock.h> 76 #include <linux/init.h> 77 #include <linux/proc_fs.h> 78 #include <linux/time.h> 79 #include <linux/security.h> 80 #include <linux/syscalls.h> 81 #include <linux/audit.h> 82 #include <linux/capability.h> 83 #include <linux/seq_file.h> 84 #include <linux/rwsem.h> 85 #include <linux/nsproxy.h> 86 #include <linux/ipc_namespace.h> 87 #include <linux/sched/wake_q.h> 88 #include <linux/nospec.h> 89 90 #include <linux/uaccess.h> 91 #include "util.h" 92 93 /* One semaphore structure for each semaphore in the system. */ 94 struct sem { 95 int semval; /* current value */ 96 /* 97 * PID of the process that last modified the semaphore. For 98 * Linux, specifically these are: 99 * - semop 100 * - semctl, via SETVAL and SETALL. 101 * - at task exit when performing undo adjustments (see exit_sem). 102 */ 103 struct pid *sempid; 104 spinlock_t lock; /* spinlock for fine-grained semtimedop */ 105 struct list_head pending_alter; /* pending single-sop operations */ 106 /* that alter the semaphore */ 107 struct list_head pending_const; /* pending single-sop operations */ 108 /* that do not alter the semaphore*/ 109 time64_t sem_otime; /* candidate for sem_otime */ 110 } ____cacheline_aligned_in_smp; 111 112 /* One sem_array data structure for each set of semaphores in the system. */ 113 struct sem_array { 114 struct kern_ipc_perm sem_perm; /* permissions .. see ipc.h */ 115 time64_t sem_ctime; /* create/last semctl() time */ 116 struct list_head pending_alter; /* pending operations */ 117 /* that alter the array */ 118 struct list_head pending_const; /* pending complex operations */ 119 /* that do not alter semvals */ 120 struct list_head list_id; /* undo requests on this array */ 121 int sem_nsems; /* no. of semaphores in array */ 122 int complex_count; /* pending complex operations */ 123 unsigned int use_global_lock;/* >0: global lock required */ 124 125 struct sem sems[]; 126 } __randomize_layout; 127 128 /* One queue for each sleeping process in the system. */ 129 struct sem_queue { 130 struct list_head list; /* queue of pending operations */ 131 struct task_struct *sleeper; /* this process */ 132 struct sem_undo *undo; /* undo structure */ 133 struct pid *pid; /* process id of requesting process */ 134 int status; /* completion status of operation */ 135 struct sembuf *sops; /* array of pending operations */ 136 struct sembuf *blocking; /* the operation that blocked */ 137 int nsops; /* number of operations */ 138 bool alter; /* does *sops alter the array? */ 139 bool dupsop; /* sops on more than one sem_num */ 140 }; 141 142 /* Each task has a list of undo requests. They are executed automatically 143 * when the process exits. 144 */ 145 struct sem_undo { 146 struct list_head list_proc; /* per-process list: * 147 * all undos from one process 148 * rcu protected */ 149 struct rcu_head rcu; /* rcu struct for sem_undo */ 150 struct sem_undo_list *ulp; /* back ptr to sem_undo_list */ 151 struct list_head list_id; /* per semaphore array list: 152 * all undos for one array */ 153 int semid; /* semaphore set identifier */ 154 short *semadj; /* array of adjustments */ 155 /* one per semaphore */ 156 }; 157 158 /* sem_undo_list controls shared access to the list of sem_undo structures 159 * that may be shared among all a CLONE_SYSVSEM task group. 160 */ 161 struct sem_undo_list { 162 refcount_t refcnt; 163 spinlock_t lock; 164 struct list_head list_proc; 165 }; 166 167 168 #define sem_ids(ns) ((ns)->ids[IPC_SEM_IDS]) 169 170 static int newary(struct ipc_namespace *, struct ipc_params *); 171 static void freeary(struct ipc_namespace *, struct kern_ipc_perm *); 172 #ifdef CONFIG_PROC_FS 173 static int sysvipc_sem_proc_show(struct seq_file *s, void *it); 174 #endif 175 176 #define SEMMSL_FAST 256 /* 512 bytes on stack */ 177 #define SEMOPM_FAST 64 /* ~ 372 bytes on stack */ 178 179 /* 180 * Switching from the mode suitable for simple ops 181 * to the mode for complex ops is costly. Therefore: 182 * use some hysteresis 183 */ 184 #define USE_GLOBAL_LOCK_HYSTERESIS 10 185 186 /* 187 * Locking: 188 * a) global sem_lock() for read/write 189 * sem_undo.id_next, 190 * sem_array.complex_count, 191 * sem_array.pending{_alter,_const}, 192 * sem_array.sem_undo 193 * 194 * b) global or semaphore sem_lock() for read/write: 195 * sem_array.sems[i].pending_{const,alter}: 196 * 197 * c) special: 198 * sem_undo_list.list_proc: 199 * * undo_list->lock for write 200 * * rcu for read 201 * use_global_lock: 202 * * global sem_lock() for write 203 * * either local or global sem_lock() for read. 204 * 205 * Memory ordering: 206 * Most ordering is enforced by using spin_lock() and spin_unlock(). 207 * The special case is use_global_lock: 208 * Setting it from non-zero to 0 is a RELEASE, this is ensured by 209 * using smp_store_release(). 210 * Testing if it is non-zero is an ACQUIRE, this is ensured by using 211 * smp_load_acquire(). 212 * Setting it from 0 to non-zero must be ordered with regards to 213 * this smp_load_acquire(), this is guaranteed because the smp_load_acquire() 214 * is inside a spin_lock() and after a write from 0 to non-zero a 215 * spin_lock()+spin_unlock() is done. 216 */ 217 218 #define sc_semmsl sem_ctls[0] 219 #define sc_semmns sem_ctls[1] 220 #define sc_semopm sem_ctls[2] 221 #define sc_semmni sem_ctls[3] 222 223 int sem_init_ns(struct ipc_namespace *ns) 224 { 225 ns->sc_semmsl = SEMMSL; 226 ns->sc_semmns = SEMMNS; 227 ns->sc_semopm = SEMOPM; 228 ns->sc_semmni = SEMMNI; 229 ns->used_sems = 0; 230 return ipc_init_ids(&ns->ids[IPC_SEM_IDS]); 231 } 232 233 #ifdef CONFIG_IPC_NS 234 void sem_exit_ns(struct ipc_namespace *ns) 235 { 236 free_ipcs(ns, &sem_ids(ns), freeary); 237 idr_destroy(&ns->ids[IPC_SEM_IDS].ipcs_idr); 238 rhashtable_destroy(&ns->ids[IPC_SEM_IDS].key_ht); 239 } 240 #endif 241 242 int __init sem_init(void) 243 { 244 const int err = sem_init_ns(&init_ipc_ns); 245 246 ipc_init_proc_interface("sysvipc/sem", 247 " key semid perms nsems uid gid cuid cgid otime ctime\n", 248 IPC_SEM_IDS, sysvipc_sem_proc_show); 249 return err; 250 } 251 252 /** 253 * unmerge_queues - unmerge queues, if possible. 254 * @sma: semaphore array 255 * 256 * The function unmerges the wait queues if complex_count is 0. 257 * It must be called prior to dropping the global semaphore array lock. 258 */ 259 static void unmerge_queues(struct sem_array *sma) 260 { 261 struct sem_queue *q, *tq; 262 263 /* complex operations still around? */ 264 if (sma->complex_count) 265 return; 266 /* 267 * We will switch back to simple mode. 268 * Move all pending operation back into the per-semaphore 269 * queues. 270 */ 271 list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { 272 struct sem *curr; 273 curr = &sma->sems[q->sops[0].sem_num]; 274 275 list_add_tail(&q->list, &curr->pending_alter); 276 } 277 INIT_LIST_HEAD(&sma->pending_alter); 278 } 279 280 /** 281 * merge_queues - merge single semop queues into global queue 282 * @sma: semaphore array 283 * 284 * This function merges all per-semaphore queues into the global queue. 285 * It is necessary to achieve FIFO ordering for the pending single-sop 286 * operations when a multi-semop operation must sleep. 287 * Only the alter operations must be moved, the const operations can stay. 288 */ 289 static void merge_queues(struct sem_array *sma) 290 { 291 int i; 292 for (i = 0; i < sma->sem_nsems; i++) { 293 struct sem *sem = &sma->sems[i]; 294 295 list_splice_init(&sem->pending_alter, &sma->pending_alter); 296 } 297 } 298 299 static void sem_rcu_free(struct rcu_head *head) 300 { 301 struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu); 302 struct sem_array *sma = container_of(p, struct sem_array, sem_perm); 303 304 security_sem_free(&sma->sem_perm); 305 kvfree(sma); 306 } 307 308 /* 309 * Enter the mode suitable for non-simple operations: 310 * Caller must own sem_perm.lock. 311 */ 312 static void complexmode_enter(struct sem_array *sma) 313 { 314 int i; 315 struct sem *sem; 316 317 if (sma->use_global_lock > 0) { 318 /* 319 * We are already in global lock mode. 320 * Nothing to do, just reset the 321 * counter until we return to simple mode. 322 */ 323 sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; 324 return; 325 } 326 sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; 327 328 for (i = 0; i < sma->sem_nsems; i++) { 329 sem = &sma->sems[i]; 330 spin_lock(&sem->lock); 331 spin_unlock(&sem->lock); 332 } 333 } 334 335 /* 336 * Try to leave the mode that disallows simple operations: 337 * Caller must own sem_perm.lock. 338 */ 339 static void complexmode_tryleave(struct sem_array *sma) 340 { 341 if (sma->complex_count) { 342 /* Complex ops are sleeping. 343 * We must stay in complex mode 344 */ 345 return; 346 } 347 if (sma->use_global_lock == 1) { 348 /* 349 * Immediately after setting use_global_lock to 0, 350 * a simple op can start. Thus: all memory writes 351 * performed by the current operation must be visible 352 * before we set use_global_lock to 0. 353 */ 354 smp_store_release(&sma->use_global_lock, 0); 355 } else { 356 sma->use_global_lock--; 357 } 358 } 359 360 #define SEM_GLOBAL_LOCK (-1) 361 /* 362 * If the request contains only one semaphore operation, and there are 363 * no complex transactions pending, lock only the semaphore involved. 364 * Otherwise, lock the entire semaphore array, since we either have 365 * multiple semaphores in our own semops, or we need to look at 366 * semaphores from other pending complex operations. 367 */ 368 static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, 369 int nsops) 370 { 371 struct sem *sem; 372 int idx; 373 374 if (nsops != 1) { 375 /* Complex operation - acquire a full lock */ 376 ipc_lock_object(&sma->sem_perm); 377 378 /* Prevent parallel simple ops */ 379 complexmode_enter(sma); 380 return SEM_GLOBAL_LOCK; 381 } 382 383 /* 384 * Only one semaphore affected - try to optimize locking. 385 * Optimized locking is possible if no complex operation 386 * is either enqueued or processed right now. 387 * 388 * Both facts are tracked by use_global_mode. 389 */ 390 idx = array_index_nospec(sops->sem_num, sma->sem_nsems); 391 sem = &sma->sems[idx]; 392 393 /* 394 * Initial check for use_global_lock. Just an optimization, 395 * no locking, no memory barrier. 396 */ 397 if (!sma->use_global_lock) { 398 /* 399 * It appears that no complex operation is around. 400 * Acquire the per-semaphore lock. 401 */ 402 spin_lock(&sem->lock); 403 404 /* pairs with smp_store_release() */ 405 if (!smp_load_acquire(&sma->use_global_lock)) { 406 /* fast path successful! */ 407 return sops->sem_num; 408 } 409 spin_unlock(&sem->lock); 410 } 411 412 /* slow path: acquire the full lock */ 413 ipc_lock_object(&sma->sem_perm); 414 415 if (sma->use_global_lock == 0) { 416 /* 417 * The use_global_lock mode ended while we waited for 418 * sma->sem_perm.lock. Thus we must switch to locking 419 * with sem->lock. 420 * Unlike in the fast path, there is no need to recheck 421 * sma->use_global_lock after we have acquired sem->lock: 422 * We own sma->sem_perm.lock, thus use_global_lock cannot 423 * change. 424 */ 425 spin_lock(&sem->lock); 426 427 ipc_unlock_object(&sma->sem_perm); 428 return sops->sem_num; 429 } else { 430 /* 431 * Not a false alarm, thus continue to use the global lock 432 * mode. No need for complexmode_enter(), this was done by 433 * the caller that has set use_global_mode to non-zero. 434 */ 435 return SEM_GLOBAL_LOCK; 436 } 437 } 438 439 static inline void sem_unlock(struct sem_array *sma, int locknum) 440 { 441 if (locknum == SEM_GLOBAL_LOCK) { 442 unmerge_queues(sma); 443 complexmode_tryleave(sma); 444 ipc_unlock_object(&sma->sem_perm); 445 } else { 446 struct sem *sem = &sma->sems[locknum]; 447 spin_unlock(&sem->lock); 448 } 449 } 450 451 /* 452 * sem_lock_(check_) routines are called in the paths where the rwsem 453 * is not held. 454 * 455 * The caller holds the RCU read lock. 456 */ 457 static inline struct sem_array *sem_obtain_object(struct ipc_namespace *ns, int id) 458 { 459 struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&sem_ids(ns), id); 460 461 if (IS_ERR(ipcp)) 462 return ERR_CAST(ipcp); 463 464 return container_of(ipcp, struct sem_array, sem_perm); 465 } 466 467 static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns, 468 int id) 469 { 470 struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&sem_ids(ns), id); 471 472 if (IS_ERR(ipcp)) 473 return ERR_CAST(ipcp); 474 475 return container_of(ipcp, struct sem_array, sem_perm); 476 } 477 478 static inline void sem_lock_and_putref(struct sem_array *sma) 479 { 480 sem_lock(sma, NULL, -1); 481 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 482 } 483 484 static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) 485 { 486 ipc_rmid(&sem_ids(ns), &s->sem_perm); 487 } 488 489 static struct sem_array *sem_alloc(size_t nsems) 490 { 491 struct sem_array *sma; 492 size_t size; 493 494 if (nsems > (INT_MAX - sizeof(*sma)) / sizeof(sma->sems[0])) 495 return NULL; 496 497 size = sizeof(*sma) + nsems * sizeof(sma->sems[0]); 498 sma = kvmalloc(size, GFP_KERNEL); 499 if (unlikely(!sma)) 500 return NULL; 501 502 memset(sma, 0, size); 503 504 return sma; 505 } 506 507 /** 508 * newary - Create a new semaphore set 509 * @ns: namespace 510 * @params: ptr to the structure that contains key, semflg and nsems 511 * 512 * Called with sem_ids.rwsem held (as a writer) 513 */ 514 static int newary(struct ipc_namespace *ns, struct ipc_params *params) 515 { 516 int retval; 517 struct sem_array *sma; 518 key_t key = params->key; 519 int nsems = params->u.nsems; 520 int semflg = params->flg; 521 int i; 522 523 if (!nsems) 524 return -EINVAL; 525 if (ns->used_sems + nsems > ns->sc_semmns) 526 return -ENOSPC; 527 528 sma = sem_alloc(nsems); 529 if (!sma) 530 return -ENOMEM; 531 532 sma->sem_perm.mode = (semflg & S_IRWXUGO); 533 sma->sem_perm.key = key; 534 535 sma->sem_perm.security = NULL; 536 retval = security_sem_alloc(&sma->sem_perm); 537 if (retval) { 538 kvfree(sma); 539 return retval; 540 } 541 542 for (i = 0; i < nsems; i++) { 543 INIT_LIST_HEAD(&sma->sems[i].pending_alter); 544 INIT_LIST_HEAD(&sma->sems[i].pending_const); 545 spin_lock_init(&sma->sems[i].lock); 546 } 547 548 sma->complex_count = 0; 549 sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; 550 INIT_LIST_HEAD(&sma->pending_alter); 551 INIT_LIST_HEAD(&sma->pending_const); 552 INIT_LIST_HEAD(&sma->list_id); 553 sma->sem_nsems = nsems; 554 sma->sem_ctime = ktime_get_real_seconds(); 555 556 /* ipc_addid() locks sma upon success. */ 557 retval = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); 558 if (retval < 0) { 559 call_rcu(&sma->sem_perm.rcu, sem_rcu_free); 560 return retval; 561 } 562 ns->used_sems += nsems; 563 564 sem_unlock(sma, -1); 565 rcu_read_unlock(); 566 567 return sma->sem_perm.id; 568 } 569 570 571 /* 572 * Called with sem_ids.rwsem and ipcp locked. 573 */ 574 static inline int sem_more_checks(struct kern_ipc_perm *ipcp, 575 struct ipc_params *params) 576 { 577 struct sem_array *sma; 578 579 sma = container_of(ipcp, struct sem_array, sem_perm); 580 if (params->u.nsems > sma->sem_nsems) 581 return -EINVAL; 582 583 return 0; 584 } 585 586 long ksys_semget(key_t key, int nsems, int semflg) 587 { 588 struct ipc_namespace *ns; 589 static const struct ipc_ops sem_ops = { 590 .getnew = newary, 591 .associate = security_sem_associate, 592 .more_checks = sem_more_checks, 593 }; 594 struct ipc_params sem_params; 595 596 ns = current->nsproxy->ipc_ns; 597 598 if (nsems < 0 || nsems > ns->sc_semmsl) 599 return -EINVAL; 600 601 sem_params.key = key; 602 sem_params.flg = semflg; 603 sem_params.u.nsems = nsems; 604 605 return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params); 606 } 607 608 SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg) 609 { 610 return ksys_semget(key, nsems, semflg); 611 } 612 613 /** 614 * perform_atomic_semop[_slow] - Attempt to perform semaphore 615 * operations on a given array. 616 * @sma: semaphore array 617 * @q: struct sem_queue that describes the operation 618 * 619 * Caller blocking are as follows, based the value 620 * indicated by the semaphore operation (sem_op): 621 * 622 * (1) >0 never blocks. 623 * (2) 0 (wait-for-zero operation): semval is non-zero. 624 * (3) <0 attempting to decrement semval to a value smaller than zero. 625 * 626 * Returns 0 if the operation was possible. 627 * Returns 1 if the operation is impossible, the caller must sleep. 628 * Returns <0 for error codes. 629 */ 630 static int perform_atomic_semop_slow(struct sem_array *sma, struct sem_queue *q) 631 { 632 int result, sem_op, nsops; 633 struct pid *pid; 634 struct sembuf *sop; 635 struct sem *curr; 636 struct sembuf *sops; 637 struct sem_undo *un; 638 639 sops = q->sops; 640 nsops = q->nsops; 641 un = q->undo; 642 643 for (sop = sops; sop < sops + nsops; sop++) { 644 int idx = array_index_nospec(sop->sem_num, sma->sem_nsems); 645 curr = &sma->sems[idx]; 646 sem_op = sop->sem_op; 647 result = curr->semval; 648 649 if (!sem_op && result) 650 goto would_block; 651 652 result += sem_op; 653 if (result < 0) 654 goto would_block; 655 if (result > SEMVMX) 656 goto out_of_range; 657 658 if (sop->sem_flg & SEM_UNDO) { 659 int undo = un->semadj[sop->sem_num] - sem_op; 660 /* Exceeding the undo range is an error. */ 661 if (undo < (-SEMAEM - 1) || undo > SEMAEM) 662 goto out_of_range; 663 un->semadj[sop->sem_num] = undo; 664 } 665 666 curr->semval = result; 667 } 668 669 sop--; 670 pid = q->pid; 671 while (sop >= sops) { 672 ipc_update_pid(&sma->sems[sop->sem_num].sempid, pid); 673 sop--; 674 } 675 676 return 0; 677 678 out_of_range: 679 result = -ERANGE; 680 goto undo; 681 682 would_block: 683 q->blocking = sop; 684 685 if (sop->sem_flg & IPC_NOWAIT) 686 result = -EAGAIN; 687 else 688 result = 1; 689 690 undo: 691 sop--; 692 while (sop >= sops) { 693 sem_op = sop->sem_op; 694 sma->sems[sop->sem_num].semval -= sem_op; 695 if (sop->sem_flg & SEM_UNDO) 696 un->semadj[sop->sem_num] += sem_op; 697 sop--; 698 } 699 700 return result; 701 } 702 703 static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q) 704 { 705 int result, sem_op, nsops; 706 struct sembuf *sop; 707 struct sem *curr; 708 struct sembuf *sops; 709 struct sem_undo *un; 710 711 sops = q->sops; 712 nsops = q->nsops; 713 un = q->undo; 714 715 if (unlikely(q->dupsop)) 716 return perform_atomic_semop_slow(sma, q); 717 718 /* 719 * We scan the semaphore set twice, first to ensure that the entire 720 * operation can succeed, therefore avoiding any pointless writes 721 * to shared memory and having to undo such changes in order to block 722 * until the operations can go through. 723 */ 724 for (sop = sops; sop < sops + nsops; sop++) { 725 int idx = array_index_nospec(sop->sem_num, sma->sem_nsems); 726 727 curr = &sma->sems[idx]; 728 sem_op = sop->sem_op; 729 result = curr->semval; 730 731 if (!sem_op && result) 732 goto would_block; /* wait-for-zero */ 733 734 result += sem_op; 735 if (result < 0) 736 goto would_block; 737 738 if (result > SEMVMX) 739 return -ERANGE; 740 741 if (sop->sem_flg & SEM_UNDO) { 742 int undo = un->semadj[sop->sem_num] - sem_op; 743 744 /* Exceeding the undo range is an error. */ 745 if (undo < (-SEMAEM - 1) || undo > SEMAEM) 746 return -ERANGE; 747 } 748 } 749 750 for (sop = sops; sop < sops + nsops; sop++) { 751 curr = &sma->sems[sop->sem_num]; 752 sem_op = sop->sem_op; 753 result = curr->semval; 754 755 if (sop->sem_flg & SEM_UNDO) { 756 int undo = un->semadj[sop->sem_num] - sem_op; 757 758 un->semadj[sop->sem_num] = undo; 759 } 760 curr->semval += sem_op; 761 ipc_update_pid(&curr->sempid, q->pid); 762 } 763 764 return 0; 765 766 would_block: 767 q->blocking = sop; 768 return sop->sem_flg & IPC_NOWAIT ? -EAGAIN : 1; 769 } 770 771 static inline void wake_up_sem_queue_prepare(struct sem_queue *q, int error, 772 struct wake_q_head *wake_q) 773 { 774 wake_q_add(wake_q, q->sleeper); 775 /* 776 * Rely on the above implicit barrier, such that we can 777 * ensure that we hold reference to the task before setting 778 * q->status. Otherwise we could race with do_exit if the 779 * task is awoken by an external event before calling 780 * wake_up_process(). 781 */ 782 WRITE_ONCE(q->status, error); 783 } 784 785 static void unlink_queue(struct sem_array *sma, struct sem_queue *q) 786 { 787 list_del(&q->list); 788 if (q->nsops > 1) 789 sma->complex_count--; 790 } 791 792 /** check_restart(sma, q) 793 * @sma: semaphore array 794 * @q: the operation that just completed 795 * 796 * update_queue is O(N^2) when it restarts scanning the whole queue of 797 * waiting operations. Therefore this function checks if the restart is 798 * really necessary. It is called after a previously waiting operation 799 * modified the array. 800 * Note that wait-for-zero operations are handled without restart. 801 */ 802 static inline int check_restart(struct sem_array *sma, struct sem_queue *q) 803 { 804 /* pending complex alter operations are too difficult to analyse */ 805 if (!list_empty(&sma->pending_alter)) 806 return 1; 807 808 /* we were a sleeping complex operation. Too difficult */ 809 if (q->nsops > 1) 810 return 1; 811 812 /* It is impossible that someone waits for the new value: 813 * - complex operations always restart. 814 * - wait-for-zero are handled seperately. 815 * - q is a previously sleeping simple operation that 816 * altered the array. It must be a decrement, because 817 * simple increments never sleep. 818 * - If there are older (higher priority) decrements 819 * in the queue, then they have observed the original 820 * semval value and couldn't proceed. The operation 821 * decremented to value - thus they won't proceed either. 822 */ 823 return 0; 824 } 825 826 /** 827 * wake_const_ops - wake up non-alter tasks 828 * @sma: semaphore array. 829 * @semnum: semaphore that was modified. 830 * @wake_q: lockless wake-queue head. 831 * 832 * wake_const_ops must be called after a semaphore in a semaphore array 833 * was set to 0. If complex const operations are pending, wake_const_ops must 834 * be called with semnum = -1, as well as with the number of each modified 835 * semaphore. 836 * The tasks that must be woken up are added to @wake_q. The return code 837 * is stored in q->pid. 838 * The function returns 1 if at least one operation was completed successfully. 839 */ 840 static int wake_const_ops(struct sem_array *sma, int semnum, 841 struct wake_q_head *wake_q) 842 { 843 struct sem_queue *q, *tmp; 844 struct list_head *pending_list; 845 int semop_completed = 0; 846 847 if (semnum == -1) 848 pending_list = &sma->pending_const; 849 else 850 pending_list = &sma->sems[semnum].pending_const; 851 852 list_for_each_entry_safe(q, tmp, pending_list, list) { 853 int error = perform_atomic_semop(sma, q); 854 855 if (error > 0) 856 continue; 857 /* operation completed, remove from queue & wakeup */ 858 unlink_queue(sma, q); 859 860 wake_up_sem_queue_prepare(q, error, wake_q); 861 if (error == 0) 862 semop_completed = 1; 863 } 864 865 return semop_completed; 866 } 867 868 /** 869 * do_smart_wakeup_zero - wakeup all wait for zero tasks 870 * @sma: semaphore array 871 * @sops: operations that were performed 872 * @nsops: number of operations 873 * @wake_q: lockless wake-queue head 874 * 875 * Checks all required queue for wait-for-zero operations, based 876 * on the actual changes that were performed on the semaphore array. 877 * The function returns 1 if at least one operation was completed successfully. 878 */ 879 static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops, 880 int nsops, struct wake_q_head *wake_q) 881 { 882 int i; 883 int semop_completed = 0; 884 int got_zero = 0; 885 886 /* first: the per-semaphore queues, if known */ 887 if (sops) { 888 for (i = 0; i < nsops; i++) { 889 int num = sops[i].sem_num; 890 891 if (sma->sems[num].semval == 0) { 892 got_zero = 1; 893 semop_completed |= wake_const_ops(sma, num, wake_q); 894 } 895 } 896 } else { 897 /* 898 * No sops means modified semaphores not known. 899 * Assume all were changed. 900 */ 901 for (i = 0; i < sma->sem_nsems; i++) { 902 if (sma->sems[i].semval == 0) { 903 got_zero = 1; 904 semop_completed |= wake_const_ops(sma, i, wake_q); 905 } 906 } 907 } 908 /* 909 * If one of the modified semaphores got 0, 910 * then check the global queue, too. 911 */ 912 if (got_zero) 913 semop_completed |= wake_const_ops(sma, -1, wake_q); 914 915 return semop_completed; 916 } 917 918 919 /** 920 * update_queue - look for tasks that can be completed. 921 * @sma: semaphore array. 922 * @semnum: semaphore that was modified. 923 * @wake_q: lockless wake-queue head. 924 * 925 * update_queue must be called after a semaphore in a semaphore array 926 * was modified. If multiple semaphores were modified, update_queue must 927 * be called with semnum = -1, as well as with the number of each modified 928 * semaphore. 929 * The tasks that must be woken up are added to @wake_q. The return code 930 * is stored in q->pid. 931 * The function internally checks if const operations can now succeed. 932 * 933 * The function return 1 if at least one semop was completed successfully. 934 */ 935 static int update_queue(struct sem_array *sma, int semnum, struct wake_q_head *wake_q) 936 { 937 struct sem_queue *q, *tmp; 938 struct list_head *pending_list; 939 int semop_completed = 0; 940 941 if (semnum == -1) 942 pending_list = &sma->pending_alter; 943 else 944 pending_list = &sma->sems[semnum].pending_alter; 945 946 again: 947 list_for_each_entry_safe(q, tmp, pending_list, list) { 948 int error, restart; 949 950 /* If we are scanning the single sop, per-semaphore list of 951 * one semaphore and that semaphore is 0, then it is not 952 * necessary to scan further: simple increments 953 * that affect only one entry succeed immediately and cannot 954 * be in the per semaphore pending queue, and decrements 955 * cannot be successful if the value is already 0. 956 */ 957 if (semnum != -1 && sma->sems[semnum].semval == 0) 958 break; 959 960 error = perform_atomic_semop(sma, q); 961 962 /* Does q->sleeper still need to sleep? */ 963 if (error > 0) 964 continue; 965 966 unlink_queue(sma, q); 967 968 if (error) { 969 restart = 0; 970 } else { 971 semop_completed = 1; 972 do_smart_wakeup_zero(sma, q->sops, q->nsops, wake_q); 973 restart = check_restart(sma, q); 974 } 975 976 wake_up_sem_queue_prepare(q, error, wake_q); 977 if (restart) 978 goto again; 979 } 980 return semop_completed; 981 } 982 983 /** 984 * set_semotime - set sem_otime 985 * @sma: semaphore array 986 * @sops: operations that modified the array, may be NULL 987 * 988 * sem_otime is replicated to avoid cache line trashing. 989 * This function sets one instance to the current time. 990 */ 991 static void set_semotime(struct sem_array *sma, struct sembuf *sops) 992 { 993 if (sops == NULL) { 994 sma->sems[0].sem_otime = ktime_get_real_seconds(); 995 } else { 996 sma->sems[sops[0].sem_num].sem_otime = 997 ktime_get_real_seconds(); 998 } 999 } 1000 1001 /** 1002 * do_smart_update - optimized update_queue 1003 * @sma: semaphore array 1004 * @sops: operations that were performed 1005 * @nsops: number of operations 1006 * @otime: force setting otime 1007 * @wake_q: lockless wake-queue head 1008 * 1009 * do_smart_update() does the required calls to update_queue and wakeup_zero, 1010 * based on the actual changes that were performed on the semaphore array. 1011 * Note that the function does not do the actual wake-up: the caller is 1012 * responsible for calling wake_up_q(). 1013 * It is safe to perform this call after dropping all locks. 1014 */ 1015 static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsops, 1016 int otime, struct wake_q_head *wake_q) 1017 { 1018 int i; 1019 1020 otime |= do_smart_wakeup_zero(sma, sops, nsops, wake_q); 1021 1022 if (!list_empty(&sma->pending_alter)) { 1023 /* semaphore array uses the global queue - just process it. */ 1024 otime |= update_queue(sma, -1, wake_q); 1025 } else { 1026 if (!sops) { 1027 /* 1028 * No sops, thus the modified semaphores are not 1029 * known. Check all. 1030 */ 1031 for (i = 0; i < sma->sem_nsems; i++) 1032 otime |= update_queue(sma, i, wake_q); 1033 } else { 1034 /* 1035 * Check the semaphores that were increased: 1036 * - No complex ops, thus all sleeping ops are 1037 * decrease. 1038 * - if we decreased the value, then any sleeping 1039 * semaphore ops wont be able to run: If the 1040 * previous value was too small, then the new 1041 * value will be too small, too. 1042 */ 1043 for (i = 0; i < nsops; i++) { 1044 if (sops[i].sem_op > 0) { 1045 otime |= update_queue(sma, 1046 sops[i].sem_num, wake_q); 1047 } 1048 } 1049 } 1050 } 1051 if (otime) 1052 set_semotime(sma, sops); 1053 } 1054 1055 /* 1056 * check_qop: Test if a queued operation sleeps on the semaphore semnum 1057 */ 1058 static int check_qop(struct sem_array *sma, int semnum, struct sem_queue *q, 1059 bool count_zero) 1060 { 1061 struct sembuf *sop = q->blocking; 1062 1063 /* 1064 * Linux always (since 0.99.10) reported a task as sleeping on all 1065 * semaphores. This violates SUS, therefore it was changed to the 1066 * standard compliant behavior. 1067 * Give the administrators a chance to notice that an application 1068 * might misbehave because it relies on the Linux behavior. 1069 */ 1070 pr_info_once("semctl(GETNCNT/GETZCNT) is since 3.16 Single Unix Specification compliant.\n" 1071 "The task %s (%d) triggered the difference, watch for misbehavior.\n", 1072 current->comm, task_pid_nr(current)); 1073 1074 if (sop->sem_num != semnum) 1075 return 0; 1076 1077 if (count_zero && sop->sem_op == 0) 1078 return 1; 1079 if (!count_zero && sop->sem_op < 0) 1080 return 1; 1081 1082 return 0; 1083 } 1084 1085 /* The following counts are associated to each semaphore: 1086 * semncnt number of tasks waiting on semval being nonzero 1087 * semzcnt number of tasks waiting on semval being zero 1088 * 1089 * Per definition, a task waits only on the semaphore of the first semop 1090 * that cannot proceed, even if additional operation would block, too. 1091 */ 1092 static int count_semcnt(struct sem_array *sma, ushort semnum, 1093 bool count_zero) 1094 { 1095 struct list_head *l; 1096 struct sem_queue *q; 1097 int semcnt; 1098 1099 semcnt = 0; 1100 /* First: check the simple operations. They are easy to evaluate */ 1101 if (count_zero) 1102 l = &sma->sems[semnum].pending_const; 1103 else 1104 l = &sma->sems[semnum].pending_alter; 1105 1106 list_for_each_entry(q, l, list) { 1107 /* all task on a per-semaphore list sleep on exactly 1108 * that semaphore 1109 */ 1110 semcnt++; 1111 } 1112 1113 /* Then: check the complex operations. */ 1114 list_for_each_entry(q, &sma->pending_alter, list) { 1115 semcnt += check_qop(sma, semnum, q, count_zero); 1116 } 1117 if (count_zero) { 1118 list_for_each_entry(q, &sma->pending_const, list) { 1119 semcnt += check_qop(sma, semnum, q, count_zero); 1120 } 1121 } 1122 return semcnt; 1123 } 1124 1125 /* Free a semaphore set. freeary() is called with sem_ids.rwsem locked 1126 * as a writer and the spinlock for this semaphore set hold. sem_ids.rwsem 1127 * remains locked on exit. 1128 */ 1129 static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 1130 { 1131 struct sem_undo *un, *tu; 1132 struct sem_queue *q, *tq; 1133 struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); 1134 int i; 1135 DEFINE_WAKE_Q(wake_q); 1136 1137 /* Free the existing undo structures for this semaphore set. */ 1138 ipc_assert_locked_object(&sma->sem_perm); 1139 list_for_each_entry_safe(un, tu, &sma->list_id, list_id) { 1140 list_del(&un->list_id); 1141 spin_lock(&un->ulp->lock); 1142 un->semid = -1; 1143 list_del_rcu(&un->list_proc); 1144 spin_unlock(&un->ulp->lock); 1145 kfree_rcu(un, rcu); 1146 } 1147 1148 /* Wake up all pending processes and let them fail with EIDRM. */ 1149 list_for_each_entry_safe(q, tq, &sma->pending_const, list) { 1150 unlink_queue(sma, q); 1151 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1152 } 1153 1154 list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { 1155 unlink_queue(sma, q); 1156 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1157 } 1158 for (i = 0; i < sma->sem_nsems; i++) { 1159 struct sem *sem = &sma->sems[i]; 1160 list_for_each_entry_safe(q, tq, &sem->pending_const, list) { 1161 unlink_queue(sma, q); 1162 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1163 } 1164 list_for_each_entry_safe(q, tq, &sem->pending_alter, list) { 1165 unlink_queue(sma, q); 1166 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1167 } 1168 ipc_update_pid(&sem->sempid, NULL); 1169 } 1170 1171 /* Remove the semaphore set from the IDR */ 1172 sem_rmid(ns, sma); 1173 sem_unlock(sma, -1); 1174 rcu_read_unlock(); 1175 1176 wake_up_q(&wake_q); 1177 ns->used_sems -= sma->sem_nsems; 1178 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1179 } 1180 1181 static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) 1182 { 1183 switch (version) { 1184 case IPC_64: 1185 return copy_to_user(buf, in, sizeof(*in)); 1186 case IPC_OLD: 1187 { 1188 struct semid_ds out; 1189 1190 memset(&out, 0, sizeof(out)); 1191 1192 ipc64_perm_to_ipc_perm(&in->sem_perm, &out.sem_perm); 1193 1194 out.sem_otime = in->sem_otime; 1195 out.sem_ctime = in->sem_ctime; 1196 out.sem_nsems = in->sem_nsems; 1197 1198 return copy_to_user(buf, &out, sizeof(out)); 1199 } 1200 default: 1201 return -EINVAL; 1202 } 1203 } 1204 1205 static time64_t get_semotime(struct sem_array *sma) 1206 { 1207 int i; 1208 time64_t res; 1209 1210 res = sma->sems[0].sem_otime; 1211 for (i = 1; i < sma->sem_nsems; i++) { 1212 time64_t to = sma->sems[i].sem_otime; 1213 1214 if (to > res) 1215 res = to; 1216 } 1217 return res; 1218 } 1219 1220 static int semctl_stat(struct ipc_namespace *ns, int semid, 1221 int cmd, struct semid64_ds *semid64) 1222 { 1223 struct sem_array *sma; 1224 time64_t semotime; 1225 int id = 0; 1226 int err; 1227 1228 memset(semid64, 0, sizeof(*semid64)); 1229 1230 rcu_read_lock(); 1231 if (cmd == SEM_STAT || cmd == SEM_STAT_ANY) { 1232 sma = sem_obtain_object(ns, semid); 1233 if (IS_ERR(sma)) { 1234 err = PTR_ERR(sma); 1235 goto out_unlock; 1236 } 1237 id = sma->sem_perm.id; 1238 } else { /* IPC_STAT */ 1239 sma = sem_obtain_object_check(ns, semid); 1240 if (IS_ERR(sma)) { 1241 err = PTR_ERR(sma); 1242 goto out_unlock; 1243 } 1244 } 1245 1246 /* see comment for SHM_STAT_ANY */ 1247 if (cmd == SEM_STAT_ANY) 1248 audit_ipc_obj(&sma->sem_perm); 1249 else { 1250 err = -EACCES; 1251 if (ipcperms(ns, &sma->sem_perm, S_IRUGO)) 1252 goto out_unlock; 1253 } 1254 1255 err = security_sem_semctl(&sma->sem_perm, cmd); 1256 if (err) 1257 goto out_unlock; 1258 1259 ipc_lock_object(&sma->sem_perm); 1260 1261 if (!ipc_valid_object(&sma->sem_perm)) { 1262 ipc_unlock_object(&sma->sem_perm); 1263 err = -EIDRM; 1264 goto out_unlock; 1265 } 1266 1267 kernel_to_ipc64_perm(&sma->sem_perm, &semid64->sem_perm); 1268 semotime = get_semotime(sma); 1269 semid64->sem_otime = semotime; 1270 semid64->sem_ctime = sma->sem_ctime; 1271 #ifndef CONFIG_64BIT 1272 semid64->sem_otime_high = semotime >> 32; 1273 semid64->sem_ctime_high = sma->sem_ctime >> 32; 1274 #endif 1275 semid64->sem_nsems = sma->sem_nsems; 1276 1277 ipc_unlock_object(&sma->sem_perm); 1278 rcu_read_unlock(); 1279 return id; 1280 1281 out_unlock: 1282 rcu_read_unlock(); 1283 return err; 1284 } 1285 1286 static int semctl_info(struct ipc_namespace *ns, int semid, 1287 int cmd, void __user *p) 1288 { 1289 struct seminfo seminfo; 1290 int max_id; 1291 int err; 1292 1293 err = security_sem_semctl(NULL, cmd); 1294 if (err) 1295 return err; 1296 1297 memset(&seminfo, 0, sizeof(seminfo)); 1298 seminfo.semmni = ns->sc_semmni; 1299 seminfo.semmns = ns->sc_semmns; 1300 seminfo.semmsl = ns->sc_semmsl; 1301 seminfo.semopm = ns->sc_semopm; 1302 seminfo.semvmx = SEMVMX; 1303 seminfo.semmnu = SEMMNU; 1304 seminfo.semmap = SEMMAP; 1305 seminfo.semume = SEMUME; 1306 down_read(&sem_ids(ns).rwsem); 1307 if (cmd == SEM_INFO) { 1308 seminfo.semusz = sem_ids(ns).in_use; 1309 seminfo.semaem = ns->used_sems; 1310 } else { 1311 seminfo.semusz = SEMUSZ; 1312 seminfo.semaem = SEMAEM; 1313 } 1314 max_id = ipc_get_maxid(&sem_ids(ns)); 1315 up_read(&sem_ids(ns).rwsem); 1316 if (copy_to_user(p, &seminfo, sizeof(struct seminfo))) 1317 return -EFAULT; 1318 return (max_id < 0) ? 0 : max_id; 1319 } 1320 1321 static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, 1322 int val) 1323 { 1324 struct sem_undo *un; 1325 struct sem_array *sma; 1326 struct sem *curr; 1327 int err; 1328 DEFINE_WAKE_Q(wake_q); 1329 1330 if (val > SEMVMX || val < 0) 1331 return -ERANGE; 1332 1333 rcu_read_lock(); 1334 sma = sem_obtain_object_check(ns, semid); 1335 if (IS_ERR(sma)) { 1336 rcu_read_unlock(); 1337 return PTR_ERR(sma); 1338 } 1339 1340 if (semnum < 0 || semnum >= sma->sem_nsems) { 1341 rcu_read_unlock(); 1342 return -EINVAL; 1343 } 1344 1345 1346 if (ipcperms(ns, &sma->sem_perm, S_IWUGO)) { 1347 rcu_read_unlock(); 1348 return -EACCES; 1349 } 1350 1351 err = security_sem_semctl(&sma->sem_perm, SETVAL); 1352 if (err) { 1353 rcu_read_unlock(); 1354 return -EACCES; 1355 } 1356 1357 sem_lock(sma, NULL, -1); 1358 1359 if (!ipc_valid_object(&sma->sem_perm)) { 1360 sem_unlock(sma, -1); 1361 rcu_read_unlock(); 1362 return -EIDRM; 1363 } 1364 1365 semnum = array_index_nospec(semnum, sma->sem_nsems); 1366 curr = &sma->sems[semnum]; 1367 1368 ipc_assert_locked_object(&sma->sem_perm); 1369 list_for_each_entry(un, &sma->list_id, list_id) 1370 un->semadj[semnum] = 0; 1371 1372 curr->semval = val; 1373 ipc_update_pid(&curr->sempid, task_tgid(current)); 1374 sma->sem_ctime = ktime_get_real_seconds(); 1375 /* maybe some queued-up processes were waiting for this */ 1376 do_smart_update(sma, NULL, 0, 0, &wake_q); 1377 sem_unlock(sma, -1); 1378 rcu_read_unlock(); 1379 wake_up_q(&wake_q); 1380 return 0; 1381 } 1382 1383 static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, 1384 int cmd, void __user *p) 1385 { 1386 struct sem_array *sma; 1387 struct sem *curr; 1388 int err, nsems; 1389 ushort fast_sem_io[SEMMSL_FAST]; 1390 ushort *sem_io = fast_sem_io; 1391 DEFINE_WAKE_Q(wake_q); 1392 1393 rcu_read_lock(); 1394 sma = sem_obtain_object_check(ns, semid); 1395 if (IS_ERR(sma)) { 1396 rcu_read_unlock(); 1397 return PTR_ERR(sma); 1398 } 1399 1400 nsems = sma->sem_nsems; 1401 1402 err = -EACCES; 1403 if (ipcperms(ns, &sma->sem_perm, cmd == SETALL ? S_IWUGO : S_IRUGO)) 1404 goto out_rcu_wakeup; 1405 1406 err = security_sem_semctl(&sma->sem_perm, cmd); 1407 if (err) 1408 goto out_rcu_wakeup; 1409 1410 err = -EACCES; 1411 switch (cmd) { 1412 case GETALL: 1413 { 1414 ushort __user *array = p; 1415 int i; 1416 1417 sem_lock(sma, NULL, -1); 1418 if (!ipc_valid_object(&sma->sem_perm)) { 1419 err = -EIDRM; 1420 goto out_unlock; 1421 } 1422 if (nsems > SEMMSL_FAST) { 1423 if (!ipc_rcu_getref(&sma->sem_perm)) { 1424 err = -EIDRM; 1425 goto out_unlock; 1426 } 1427 sem_unlock(sma, -1); 1428 rcu_read_unlock(); 1429 sem_io = kvmalloc_array(nsems, sizeof(ushort), 1430 GFP_KERNEL); 1431 if (sem_io == NULL) { 1432 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1433 return -ENOMEM; 1434 } 1435 1436 rcu_read_lock(); 1437 sem_lock_and_putref(sma); 1438 if (!ipc_valid_object(&sma->sem_perm)) { 1439 err = -EIDRM; 1440 goto out_unlock; 1441 } 1442 } 1443 for (i = 0; i < sma->sem_nsems; i++) 1444 sem_io[i] = sma->sems[i].semval; 1445 sem_unlock(sma, -1); 1446 rcu_read_unlock(); 1447 err = 0; 1448 if (copy_to_user(array, sem_io, nsems*sizeof(ushort))) 1449 err = -EFAULT; 1450 goto out_free; 1451 } 1452 case SETALL: 1453 { 1454 int i; 1455 struct sem_undo *un; 1456 1457 if (!ipc_rcu_getref(&sma->sem_perm)) { 1458 err = -EIDRM; 1459 goto out_rcu_wakeup; 1460 } 1461 rcu_read_unlock(); 1462 1463 if (nsems > SEMMSL_FAST) { 1464 sem_io = kvmalloc_array(nsems, sizeof(ushort), 1465 GFP_KERNEL); 1466 if (sem_io == NULL) { 1467 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1468 return -ENOMEM; 1469 } 1470 } 1471 1472 if (copy_from_user(sem_io, p, nsems*sizeof(ushort))) { 1473 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1474 err = -EFAULT; 1475 goto out_free; 1476 } 1477 1478 for (i = 0; i < nsems; i++) { 1479 if (sem_io[i] > SEMVMX) { 1480 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1481 err = -ERANGE; 1482 goto out_free; 1483 } 1484 } 1485 rcu_read_lock(); 1486 sem_lock_and_putref(sma); 1487 if (!ipc_valid_object(&sma->sem_perm)) { 1488 err = -EIDRM; 1489 goto out_unlock; 1490 } 1491 1492 for (i = 0; i < nsems; i++) { 1493 sma->sems[i].semval = sem_io[i]; 1494 ipc_update_pid(&sma->sems[i].sempid, task_tgid(current)); 1495 } 1496 1497 ipc_assert_locked_object(&sma->sem_perm); 1498 list_for_each_entry(un, &sma->list_id, list_id) { 1499 for (i = 0; i < nsems; i++) 1500 un->semadj[i] = 0; 1501 } 1502 sma->sem_ctime = ktime_get_real_seconds(); 1503 /* maybe some queued-up processes were waiting for this */ 1504 do_smart_update(sma, NULL, 0, 0, &wake_q); 1505 err = 0; 1506 goto out_unlock; 1507 } 1508 /* GETVAL, GETPID, GETNCTN, GETZCNT: fall-through */ 1509 } 1510 err = -EINVAL; 1511 if (semnum < 0 || semnum >= nsems) 1512 goto out_rcu_wakeup; 1513 1514 sem_lock(sma, NULL, -1); 1515 if (!ipc_valid_object(&sma->sem_perm)) { 1516 err = -EIDRM; 1517 goto out_unlock; 1518 } 1519 1520 semnum = array_index_nospec(semnum, nsems); 1521 curr = &sma->sems[semnum]; 1522 1523 switch (cmd) { 1524 case GETVAL: 1525 err = curr->semval; 1526 goto out_unlock; 1527 case GETPID: 1528 err = pid_vnr(curr->sempid); 1529 goto out_unlock; 1530 case GETNCNT: 1531 err = count_semcnt(sma, semnum, 0); 1532 goto out_unlock; 1533 case GETZCNT: 1534 err = count_semcnt(sma, semnum, 1); 1535 goto out_unlock; 1536 } 1537 1538 out_unlock: 1539 sem_unlock(sma, -1); 1540 out_rcu_wakeup: 1541 rcu_read_unlock(); 1542 wake_up_q(&wake_q); 1543 out_free: 1544 if (sem_io != fast_sem_io) 1545 kvfree(sem_io); 1546 return err; 1547 } 1548 1549 static inline unsigned long 1550 copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version) 1551 { 1552 switch (version) { 1553 case IPC_64: 1554 if (copy_from_user(out, buf, sizeof(*out))) 1555 return -EFAULT; 1556 return 0; 1557 case IPC_OLD: 1558 { 1559 struct semid_ds tbuf_old; 1560 1561 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 1562 return -EFAULT; 1563 1564 out->sem_perm.uid = tbuf_old.sem_perm.uid; 1565 out->sem_perm.gid = tbuf_old.sem_perm.gid; 1566 out->sem_perm.mode = tbuf_old.sem_perm.mode; 1567 1568 return 0; 1569 } 1570 default: 1571 return -EINVAL; 1572 } 1573 } 1574 1575 /* 1576 * This function handles some semctl commands which require the rwsem 1577 * to be held in write mode. 1578 * NOTE: no locks must be held, the rwsem is taken inside this function. 1579 */ 1580 static int semctl_down(struct ipc_namespace *ns, int semid, 1581 int cmd, struct semid64_ds *semid64) 1582 { 1583 struct sem_array *sma; 1584 int err; 1585 struct kern_ipc_perm *ipcp; 1586 1587 down_write(&sem_ids(ns).rwsem); 1588 rcu_read_lock(); 1589 1590 ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd, 1591 &semid64->sem_perm, 0); 1592 if (IS_ERR(ipcp)) { 1593 err = PTR_ERR(ipcp); 1594 goto out_unlock1; 1595 } 1596 1597 sma = container_of(ipcp, struct sem_array, sem_perm); 1598 1599 err = security_sem_semctl(&sma->sem_perm, cmd); 1600 if (err) 1601 goto out_unlock1; 1602 1603 switch (cmd) { 1604 case IPC_RMID: 1605 sem_lock(sma, NULL, -1); 1606 /* freeary unlocks the ipc object and rcu */ 1607 freeary(ns, ipcp); 1608 goto out_up; 1609 case IPC_SET: 1610 sem_lock(sma, NULL, -1); 1611 err = ipc_update_perm(&semid64->sem_perm, ipcp); 1612 if (err) 1613 goto out_unlock0; 1614 sma->sem_ctime = ktime_get_real_seconds(); 1615 break; 1616 default: 1617 err = -EINVAL; 1618 goto out_unlock1; 1619 } 1620 1621 out_unlock0: 1622 sem_unlock(sma, -1); 1623 out_unlock1: 1624 rcu_read_unlock(); 1625 out_up: 1626 up_write(&sem_ids(ns).rwsem); 1627 return err; 1628 } 1629 1630 long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg) 1631 { 1632 int version; 1633 struct ipc_namespace *ns; 1634 void __user *p = (void __user *)arg; 1635 struct semid64_ds semid64; 1636 int err; 1637 1638 if (semid < 0) 1639 return -EINVAL; 1640 1641 version = ipc_parse_version(&cmd); 1642 ns = current->nsproxy->ipc_ns; 1643 1644 switch (cmd) { 1645 case IPC_INFO: 1646 case SEM_INFO: 1647 return semctl_info(ns, semid, cmd, p); 1648 case IPC_STAT: 1649 case SEM_STAT: 1650 case SEM_STAT_ANY: 1651 err = semctl_stat(ns, semid, cmd, &semid64); 1652 if (err < 0) 1653 return err; 1654 if (copy_semid_to_user(p, &semid64, version)) 1655 err = -EFAULT; 1656 return err; 1657 case GETALL: 1658 case GETVAL: 1659 case GETPID: 1660 case GETNCNT: 1661 case GETZCNT: 1662 case SETALL: 1663 return semctl_main(ns, semid, semnum, cmd, p); 1664 case SETVAL: { 1665 int val; 1666 #if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN) 1667 /* big-endian 64bit */ 1668 val = arg >> 32; 1669 #else 1670 /* 32bit or little-endian 64bit */ 1671 val = arg; 1672 #endif 1673 return semctl_setval(ns, semid, semnum, val); 1674 } 1675 case IPC_SET: 1676 if (copy_semid_from_user(&semid64, p, version)) 1677 return -EFAULT; 1678 case IPC_RMID: 1679 return semctl_down(ns, semid, cmd, &semid64); 1680 default: 1681 return -EINVAL; 1682 } 1683 } 1684 1685 SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg) 1686 { 1687 return ksys_semctl(semid, semnum, cmd, arg); 1688 } 1689 1690 #ifdef CONFIG_COMPAT 1691 1692 struct compat_semid_ds { 1693 struct compat_ipc_perm sem_perm; 1694 compat_time_t sem_otime; 1695 compat_time_t sem_ctime; 1696 compat_uptr_t sem_base; 1697 compat_uptr_t sem_pending; 1698 compat_uptr_t sem_pending_last; 1699 compat_uptr_t undo; 1700 unsigned short sem_nsems; 1701 }; 1702 1703 static int copy_compat_semid_from_user(struct semid64_ds *out, void __user *buf, 1704 int version) 1705 { 1706 memset(out, 0, sizeof(*out)); 1707 if (version == IPC_64) { 1708 struct compat_semid64_ds __user *p = buf; 1709 return get_compat_ipc64_perm(&out->sem_perm, &p->sem_perm); 1710 } else { 1711 struct compat_semid_ds __user *p = buf; 1712 return get_compat_ipc_perm(&out->sem_perm, &p->sem_perm); 1713 } 1714 } 1715 1716 static int copy_compat_semid_to_user(void __user *buf, struct semid64_ds *in, 1717 int version) 1718 { 1719 if (version == IPC_64) { 1720 struct compat_semid64_ds v; 1721 memset(&v, 0, sizeof(v)); 1722 to_compat_ipc64_perm(&v.sem_perm, &in->sem_perm); 1723 v.sem_otime = lower_32_bits(in->sem_otime); 1724 v.sem_otime_high = upper_32_bits(in->sem_otime); 1725 v.sem_ctime = lower_32_bits(in->sem_ctime); 1726 v.sem_ctime_high = upper_32_bits(in->sem_ctime); 1727 v.sem_nsems = in->sem_nsems; 1728 return copy_to_user(buf, &v, sizeof(v)); 1729 } else { 1730 struct compat_semid_ds v; 1731 memset(&v, 0, sizeof(v)); 1732 to_compat_ipc_perm(&v.sem_perm, &in->sem_perm); 1733 v.sem_otime = in->sem_otime; 1734 v.sem_ctime = in->sem_ctime; 1735 v.sem_nsems = in->sem_nsems; 1736 return copy_to_user(buf, &v, sizeof(v)); 1737 } 1738 } 1739 1740 long compat_ksys_semctl(int semid, int semnum, int cmd, int arg) 1741 { 1742 void __user *p = compat_ptr(arg); 1743 struct ipc_namespace *ns; 1744 struct semid64_ds semid64; 1745 int version = compat_ipc_parse_version(&cmd); 1746 int err; 1747 1748 ns = current->nsproxy->ipc_ns; 1749 1750 if (semid < 0) 1751 return -EINVAL; 1752 1753 switch (cmd & (~IPC_64)) { 1754 case IPC_INFO: 1755 case SEM_INFO: 1756 return semctl_info(ns, semid, cmd, p); 1757 case IPC_STAT: 1758 case SEM_STAT: 1759 case SEM_STAT_ANY: 1760 err = semctl_stat(ns, semid, cmd, &semid64); 1761 if (err < 0) 1762 return err; 1763 if (copy_compat_semid_to_user(p, &semid64, version)) 1764 err = -EFAULT; 1765 return err; 1766 case GETVAL: 1767 case GETPID: 1768 case GETNCNT: 1769 case GETZCNT: 1770 case GETALL: 1771 case SETALL: 1772 return semctl_main(ns, semid, semnum, cmd, p); 1773 case SETVAL: 1774 return semctl_setval(ns, semid, semnum, arg); 1775 case IPC_SET: 1776 if (copy_compat_semid_from_user(&semid64, p, version)) 1777 return -EFAULT; 1778 /* fallthru */ 1779 case IPC_RMID: 1780 return semctl_down(ns, semid, cmd, &semid64); 1781 default: 1782 return -EINVAL; 1783 } 1784 } 1785 1786 COMPAT_SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, int, arg) 1787 { 1788 return compat_ksys_semctl(semid, semnum, cmd, arg); 1789 } 1790 #endif 1791 1792 /* If the task doesn't already have a undo_list, then allocate one 1793 * here. We guarantee there is only one thread using this undo list, 1794 * and current is THE ONE 1795 * 1796 * If this allocation and assignment succeeds, but later 1797 * portions of this code fail, there is no need to free the sem_undo_list. 1798 * Just let it stay associated with the task, and it'll be freed later 1799 * at exit time. 1800 * 1801 * This can block, so callers must hold no locks. 1802 */ 1803 static inline int get_undo_list(struct sem_undo_list **undo_listp) 1804 { 1805 struct sem_undo_list *undo_list; 1806 1807 undo_list = current->sysvsem.undo_list; 1808 if (!undo_list) { 1809 undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL); 1810 if (undo_list == NULL) 1811 return -ENOMEM; 1812 spin_lock_init(&undo_list->lock); 1813 refcount_set(&undo_list->refcnt, 1); 1814 INIT_LIST_HEAD(&undo_list->list_proc); 1815 1816 current->sysvsem.undo_list = undo_list; 1817 } 1818 *undo_listp = undo_list; 1819 return 0; 1820 } 1821 1822 static struct sem_undo *__lookup_undo(struct sem_undo_list *ulp, int semid) 1823 { 1824 struct sem_undo *un; 1825 1826 list_for_each_entry_rcu(un, &ulp->list_proc, list_proc) { 1827 if (un->semid == semid) 1828 return un; 1829 } 1830 return NULL; 1831 } 1832 1833 static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid) 1834 { 1835 struct sem_undo *un; 1836 1837 assert_spin_locked(&ulp->lock); 1838 1839 un = __lookup_undo(ulp, semid); 1840 if (un) { 1841 list_del_rcu(&un->list_proc); 1842 list_add_rcu(&un->list_proc, &ulp->list_proc); 1843 } 1844 return un; 1845 } 1846 1847 /** 1848 * find_alloc_undo - lookup (and if not present create) undo array 1849 * @ns: namespace 1850 * @semid: semaphore array id 1851 * 1852 * The function looks up (and if not present creates) the undo structure. 1853 * The size of the undo structure depends on the size of the semaphore 1854 * array, thus the alloc path is not that straightforward. 1855 * Lifetime-rules: sem_undo is rcu-protected, on success, the function 1856 * performs a rcu_read_lock(). 1857 */ 1858 static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) 1859 { 1860 struct sem_array *sma; 1861 struct sem_undo_list *ulp; 1862 struct sem_undo *un, *new; 1863 int nsems, error; 1864 1865 error = get_undo_list(&ulp); 1866 if (error) 1867 return ERR_PTR(error); 1868 1869 rcu_read_lock(); 1870 spin_lock(&ulp->lock); 1871 un = lookup_undo(ulp, semid); 1872 spin_unlock(&ulp->lock); 1873 if (likely(un != NULL)) 1874 goto out; 1875 1876 /* no undo structure around - allocate one. */ 1877 /* step 1: figure out the size of the semaphore array */ 1878 sma = sem_obtain_object_check(ns, semid); 1879 if (IS_ERR(sma)) { 1880 rcu_read_unlock(); 1881 return ERR_CAST(sma); 1882 } 1883 1884 nsems = sma->sem_nsems; 1885 if (!ipc_rcu_getref(&sma->sem_perm)) { 1886 rcu_read_unlock(); 1887 un = ERR_PTR(-EIDRM); 1888 goto out; 1889 } 1890 rcu_read_unlock(); 1891 1892 /* step 2: allocate new undo structure */ 1893 new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); 1894 if (!new) { 1895 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1896 return ERR_PTR(-ENOMEM); 1897 } 1898 1899 /* step 3: Acquire the lock on semaphore array */ 1900 rcu_read_lock(); 1901 sem_lock_and_putref(sma); 1902 if (!ipc_valid_object(&sma->sem_perm)) { 1903 sem_unlock(sma, -1); 1904 rcu_read_unlock(); 1905 kfree(new); 1906 un = ERR_PTR(-EIDRM); 1907 goto out; 1908 } 1909 spin_lock(&ulp->lock); 1910 1911 /* 1912 * step 4: check for races: did someone else allocate the undo struct? 1913 */ 1914 un = lookup_undo(ulp, semid); 1915 if (un) { 1916 kfree(new); 1917 goto success; 1918 } 1919 /* step 5: initialize & link new undo structure */ 1920 new->semadj = (short *) &new[1]; 1921 new->ulp = ulp; 1922 new->semid = semid; 1923 assert_spin_locked(&ulp->lock); 1924 list_add_rcu(&new->list_proc, &ulp->list_proc); 1925 ipc_assert_locked_object(&sma->sem_perm); 1926 list_add(&new->list_id, &sma->list_id); 1927 un = new; 1928 1929 success: 1930 spin_unlock(&ulp->lock); 1931 sem_unlock(sma, -1); 1932 out: 1933 return un; 1934 } 1935 1936 static long do_semtimedop(int semid, struct sembuf __user *tsops, 1937 unsigned nsops, const struct timespec64 *timeout) 1938 { 1939 int error = -EINVAL; 1940 struct sem_array *sma; 1941 struct sembuf fast_sops[SEMOPM_FAST]; 1942 struct sembuf *sops = fast_sops, *sop; 1943 struct sem_undo *un; 1944 int max, locknum; 1945 bool undos = false, alter = false, dupsop = false; 1946 struct sem_queue queue; 1947 unsigned long dup = 0, jiffies_left = 0; 1948 struct ipc_namespace *ns; 1949 1950 ns = current->nsproxy->ipc_ns; 1951 1952 if (nsops < 1 || semid < 0) 1953 return -EINVAL; 1954 if (nsops > ns->sc_semopm) 1955 return -E2BIG; 1956 if (nsops > SEMOPM_FAST) { 1957 sops = kvmalloc_array(nsops, sizeof(*sops), GFP_KERNEL); 1958 if (sops == NULL) 1959 return -ENOMEM; 1960 } 1961 1962 if (copy_from_user(sops, tsops, nsops * sizeof(*tsops))) { 1963 error = -EFAULT; 1964 goto out_free; 1965 } 1966 1967 if (timeout) { 1968 if (timeout->tv_sec < 0 || timeout->tv_nsec < 0 || 1969 timeout->tv_nsec >= 1000000000L) { 1970 error = -EINVAL; 1971 goto out_free; 1972 } 1973 jiffies_left = timespec64_to_jiffies(timeout); 1974 } 1975 1976 max = 0; 1977 for (sop = sops; sop < sops + nsops; sop++) { 1978 unsigned long mask = 1ULL << ((sop->sem_num) % BITS_PER_LONG); 1979 1980 if (sop->sem_num >= max) 1981 max = sop->sem_num; 1982 if (sop->sem_flg & SEM_UNDO) 1983 undos = true; 1984 if (dup & mask) { 1985 /* 1986 * There was a previous alter access that appears 1987 * to have accessed the same semaphore, thus use 1988 * the dupsop logic. "appears", because the detection 1989 * can only check % BITS_PER_LONG. 1990 */ 1991 dupsop = true; 1992 } 1993 if (sop->sem_op != 0) { 1994 alter = true; 1995 dup |= mask; 1996 } 1997 } 1998 1999 if (undos) { 2000 /* On success, find_alloc_undo takes the rcu_read_lock */ 2001 un = find_alloc_undo(ns, semid); 2002 if (IS_ERR(un)) { 2003 error = PTR_ERR(un); 2004 goto out_free; 2005 } 2006 } else { 2007 un = NULL; 2008 rcu_read_lock(); 2009 } 2010 2011 sma = sem_obtain_object_check(ns, semid); 2012 if (IS_ERR(sma)) { 2013 rcu_read_unlock(); 2014 error = PTR_ERR(sma); 2015 goto out_free; 2016 } 2017 2018 error = -EFBIG; 2019 if (max >= sma->sem_nsems) { 2020 rcu_read_unlock(); 2021 goto out_free; 2022 } 2023 2024 error = -EACCES; 2025 if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) { 2026 rcu_read_unlock(); 2027 goto out_free; 2028 } 2029 2030 error = security_sem_semop(&sma->sem_perm, sops, nsops, alter); 2031 if (error) { 2032 rcu_read_unlock(); 2033 goto out_free; 2034 } 2035 2036 error = -EIDRM; 2037 locknum = sem_lock(sma, sops, nsops); 2038 /* 2039 * We eventually might perform the following check in a lockless 2040 * fashion, considering ipc_valid_object() locking constraints. 2041 * If nsops == 1 and there is no contention for sem_perm.lock, then 2042 * only a per-semaphore lock is held and it's OK to proceed with the 2043 * check below. More details on the fine grained locking scheme 2044 * entangled here and why it's RMID race safe on comments at sem_lock() 2045 */ 2046 if (!ipc_valid_object(&sma->sem_perm)) 2047 goto out_unlock_free; 2048 /* 2049 * semid identifiers are not unique - find_alloc_undo may have 2050 * allocated an undo structure, it was invalidated by an RMID 2051 * and now a new array with received the same id. Check and fail. 2052 * This case can be detected checking un->semid. The existence of 2053 * "un" itself is guaranteed by rcu. 2054 */ 2055 if (un && un->semid == -1) 2056 goto out_unlock_free; 2057 2058 queue.sops = sops; 2059 queue.nsops = nsops; 2060 queue.undo = un; 2061 queue.pid = task_tgid(current); 2062 queue.alter = alter; 2063 queue.dupsop = dupsop; 2064 2065 error = perform_atomic_semop(sma, &queue); 2066 if (error == 0) { /* non-blocking succesfull path */ 2067 DEFINE_WAKE_Q(wake_q); 2068 2069 /* 2070 * If the operation was successful, then do 2071 * the required updates. 2072 */ 2073 if (alter) 2074 do_smart_update(sma, sops, nsops, 1, &wake_q); 2075 else 2076 set_semotime(sma, sops); 2077 2078 sem_unlock(sma, locknum); 2079 rcu_read_unlock(); 2080 wake_up_q(&wake_q); 2081 2082 goto out_free; 2083 } 2084 if (error < 0) /* non-blocking error path */ 2085 goto out_unlock_free; 2086 2087 /* 2088 * We need to sleep on this operation, so we put the current 2089 * task into the pending queue and go to sleep. 2090 */ 2091 if (nsops == 1) { 2092 struct sem *curr; 2093 int idx = array_index_nospec(sops->sem_num, sma->sem_nsems); 2094 curr = &sma->sems[idx]; 2095 2096 if (alter) { 2097 if (sma->complex_count) { 2098 list_add_tail(&queue.list, 2099 &sma->pending_alter); 2100 } else { 2101 2102 list_add_tail(&queue.list, 2103 &curr->pending_alter); 2104 } 2105 } else { 2106 list_add_tail(&queue.list, &curr->pending_const); 2107 } 2108 } else { 2109 if (!sma->complex_count) 2110 merge_queues(sma); 2111 2112 if (alter) 2113 list_add_tail(&queue.list, &sma->pending_alter); 2114 else 2115 list_add_tail(&queue.list, &sma->pending_const); 2116 2117 sma->complex_count++; 2118 } 2119 2120 do { 2121 queue.status = -EINTR; 2122 queue.sleeper = current; 2123 2124 __set_current_state(TASK_INTERRUPTIBLE); 2125 sem_unlock(sma, locknum); 2126 rcu_read_unlock(); 2127 2128 if (timeout) 2129 jiffies_left = schedule_timeout(jiffies_left); 2130 else 2131 schedule(); 2132 2133 /* 2134 * fastpath: the semop has completed, either successfully or 2135 * not, from the syscall pov, is quite irrelevant to us at this 2136 * point; we're done. 2137 * 2138 * We _do_ care, nonetheless, about being awoken by a signal or 2139 * spuriously. The queue.status is checked again in the 2140 * slowpath (aka after taking sem_lock), such that we can detect 2141 * scenarios where we were awakened externally, during the 2142 * window between wake_q_add() and wake_up_q(). 2143 */ 2144 error = READ_ONCE(queue.status); 2145 if (error != -EINTR) { 2146 /* 2147 * User space could assume that semop() is a memory 2148 * barrier: Without the mb(), the cpu could 2149 * speculatively read in userspace stale data that was 2150 * overwritten by the previous owner of the semaphore. 2151 */ 2152 smp_mb(); 2153 goto out_free; 2154 } 2155 2156 rcu_read_lock(); 2157 locknum = sem_lock(sma, sops, nsops); 2158 2159 if (!ipc_valid_object(&sma->sem_perm)) 2160 goto out_unlock_free; 2161 2162 error = READ_ONCE(queue.status); 2163 2164 /* 2165 * If queue.status != -EINTR we are woken up by another process. 2166 * Leave without unlink_queue(), but with sem_unlock(). 2167 */ 2168 if (error != -EINTR) 2169 goto out_unlock_free; 2170 2171 /* 2172 * If an interrupt occurred we have to clean up the queue. 2173 */ 2174 if (timeout && jiffies_left == 0) 2175 error = -EAGAIN; 2176 } while (error == -EINTR && !signal_pending(current)); /* spurious */ 2177 2178 unlink_queue(sma, &queue); 2179 2180 out_unlock_free: 2181 sem_unlock(sma, locknum); 2182 rcu_read_unlock(); 2183 out_free: 2184 if (sops != fast_sops) 2185 kvfree(sops); 2186 return error; 2187 } 2188 2189 long ksys_semtimedop(int semid, struct sembuf __user *tsops, 2190 unsigned int nsops, const struct __kernel_timespec __user *timeout) 2191 { 2192 if (timeout) { 2193 struct timespec64 ts; 2194 if (get_timespec64(&ts, timeout)) 2195 return -EFAULT; 2196 return do_semtimedop(semid, tsops, nsops, &ts); 2197 } 2198 return do_semtimedop(semid, tsops, nsops, NULL); 2199 } 2200 2201 SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, 2202 unsigned int, nsops, const struct __kernel_timespec __user *, timeout) 2203 { 2204 return ksys_semtimedop(semid, tsops, nsops, timeout); 2205 } 2206 2207 #ifdef CONFIG_COMPAT_32BIT_TIME 2208 long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems, 2209 unsigned int nsops, 2210 const struct compat_timespec __user *timeout) 2211 { 2212 if (timeout) { 2213 struct timespec64 ts; 2214 if (compat_get_timespec64(&ts, timeout)) 2215 return -EFAULT; 2216 return do_semtimedop(semid, tsems, nsops, &ts); 2217 } 2218 return do_semtimedop(semid, tsems, nsops, NULL); 2219 } 2220 2221 COMPAT_SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsems, 2222 unsigned int, nsops, 2223 const struct compat_timespec __user *, timeout) 2224 { 2225 return compat_ksys_semtimedop(semid, tsems, nsops, timeout); 2226 } 2227 #endif 2228 2229 SYSCALL_DEFINE3(semop, int, semid, struct sembuf __user *, tsops, 2230 unsigned, nsops) 2231 { 2232 return do_semtimedop(semid, tsops, nsops, NULL); 2233 } 2234 2235 /* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between 2236 * parent and child tasks. 2237 */ 2238 2239 int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) 2240 { 2241 struct sem_undo_list *undo_list; 2242 int error; 2243 2244 if (clone_flags & CLONE_SYSVSEM) { 2245 error = get_undo_list(&undo_list); 2246 if (error) 2247 return error; 2248 refcount_inc(&undo_list->refcnt); 2249 tsk->sysvsem.undo_list = undo_list; 2250 } else 2251 tsk->sysvsem.undo_list = NULL; 2252 2253 return 0; 2254 } 2255 2256 /* 2257 * add semadj values to semaphores, free undo structures. 2258 * undo structures are not freed when semaphore arrays are destroyed 2259 * so some of them may be out of date. 2260 * IMPLEMENTATION NOTE: There is some confusion over whether the 2261 * set of adjustments that needs to be done should be done in an atomic 2262 * manner or not. That is, if we are attempting to decrement the semval 2263 * should we queue up and wait until we can do so legally? 2264 * The original implementation attempted to do this (queue and wait). 2265 * The current implementation does not do so. The POSIX standard 2266 * and SVID should be consulted to determine what behavior is mandated. 2267 */ 2268 void exit_sem(struct task_struct *tsk) 2269 { 2270 struct sem_undo_list *ulp; 2271 2272 ulp = tsk->sysvsem.undo_list; 2273 if (!ulp) 2274 return; 2275 tsk->sysvsem.undo_list = NULL; 2276 2277 if (!refcount_dec_and_test(&ulp->refcnt)) 2278 return; 2279 2280 for (;;) { 2281 struct sem_array *sma; 2282 struct sem_undo *un; 2283 int semid, i; 2284 DEFINE_WAKE_Q(wake_q); 2285 2286 cond_resched(); 2287 2288 rcu_read_lock(); 2289 un = list_entry_rcu(ulp->list_proc.next, 2290 struct sem_undo, list_proc); 2291 if (&un->list_proc == &ulp->list_proc) { 2292 /* 2293 * We must wait for freeary() before freeing this ulp, 2294 * in case we raced with last sem_undo. There is a small 2295 * possibility where we exit while freeary() didn't 2296 * finish unlocking sem_undo_list. 2297 */ 2298 spin_lock(&ulp->lock); 2299 spin_unlock(&ulp->lock); 2300 rcu_read_unlock(); 2301 break; 2302 } 2303 spin_lock(&ulp->lock); 2304 semid = un->semid; 2305 spin_unlock(&ulp->lock); 2306 2307 /* exit_sem raced with IPC_RMID, nothing to do */ 2308 if (semid == -1) { 2309 rcu_read_unlock(); 2310 continue; 2311 } 2312 2313 sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, semid); 2314 /* exit_sem raced with IPC_RMID, nothing to do */ 2315 if (IS_ERR(sma)) { 2316 rcu_read_unlock(); 2317 continue; 2318 } 2319 2320 sem_lock(sma, NULL, -1); 2321 /* exit_sem raced with IPC_RMID, nothing to do */ 2322 if (!ipc_valid_object(&sma->sem_perm)) { 2323 sem_unlock(sma, -1); 2324 rcu_read_unlock(); 2325 continue; 2326 } 2327 un = __lookup_undo(ulp, semid); 2328 if (un == NULL) { 2329 /* exit_sem raced with IPC_RMID+semget() that created 2330 * exactly the same semid. Nothing to do. 2331 */ 2332 sem_unlock(sma, -1); 2333 rcu_read_unlock(); 2334 continue; 2335 } 2336 2337 /* remove un from the linked lists */ 2338 ipc_assert_locked_object(&sma->sem_perm); 2339 list_del(&un->list_id); 2340 2341 /* we are the last process using this ulp, acquiring ulp->lock 2342 * isn't required. Besides that, we are also protected against 2343 * IPC_RMID as we hold sma->sem_perm lock now 2344 */ 2345 list_del_rcu(&un->list_proc); 2346 2347 /* perform adjustments registered in un */ 2348 for (i = 0; i < sma->sem_nsems; i++) { 2349 struct sem *semaphore = &sma->sems[i]; 2350 if (un->semadj[i]) { 2351 semaphore->semval += un->semadj[i]; 2352 /* 2353 * Range checks of the new semaphore value, 2354 * not defined by sus: 2355 * - Some unices ignore the undo entirely 2356 * (e.g. HP UX 11i 11.22, Tru64 V5.1) 2357 * - some cap the value (e.g. FreeBSD caps 2358 * at 0, but doesn't enforce SEMVMX) 2359 * 2360 * Linux caps the semaphore value, both at 0 2361 * and at SEMVMX. 2362 * 2363 * Manfred <manfred@colorfullife.com> 2364 */ 2365 if (semaphore->semval < 0) 2366 semaphore->semval = 0; 2367 if (semaphore->semval > SEMVMX) 2368 semaphore->semval = SEMVMX; 2369 ipc_update_pid(&semaphore->sempid, task_tgid(current)); 2370 } 2371 } 2372 /* maybe some queued-up processes were waiting for this */ 2373 do_smart_update(sma, NULL, 0, 1, &wake_q); 2374 sem_unlock(sma, -1); 2375 rcu_read_unlock(); 2376 wake_up_q(&wake_q); 2377 2378 kfree_rcu(un, rcu); 2379 } 2380 kfree(ulp); 2381 } 2382 2383 #ifdef CONFIG_PROC_FS 2384 static int sysvipc_sem_proc_show(struct seq_file *s, void *it) 2385 { 2386 struct user_namespace *user_ns = seq_user_ns(s); 2387 struct kern_ipc_perm *ipcp = it; 2388 struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); 2389 time64_t sem_otime; 2390 2391 /* 2392 * The proc interface isn't aware of sem_lock(), it calls 2393 * ipc_lock_object() directly (in sysvipc_find_ipc). 2394 * In order to stay compatible with sem_lock(), we must 2395 * enter / leave complex_mode. 2396 */ 2397 complexmode_enter(sma); 2398 2399 sem_otime = get_semotime(sma); 2400 2401 seq_printf(s, 2402 "%10d %10d %4o %10u %5u %5u %5u %5u %10llu %10llu\n", 2403 sma->sem_perm.key, 2404 sma->sem_perm.id, 2405 sma->sem_perm.mode, 2406 sma->sem_nsems, 2407 from_kuid_munged(user_ns, sma->sem_perm.uid), 2408 from_kgid_munged(user_ns, sma->sem_perm.gid), 2409 from_kuid_munged(user_ns, sma->sem_perm.cuid), 2410 from_kgid_munged(user_ns, sma->sem_perm.cgid), 2411 sem_otime, 2412 sma->sem_ctime); 2413 2414 complexmode_tryleave(sma); 2415 2416 return 0; 2417 } 2418 #endif 2419