1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/ipc/sem.c 4 * Copyright (C) 1992 Krishna Balasubramanian 5 * Copyright (C) 1995 Eric Schenk, Bruno Haible 6 * 7 * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com> 8 * 9 * SMP-threaded, sysctl's added 10 * (c) 1999 Manfred Spraul <manfred@colorfullife.com> 11 * Enforced range limit on SEM_UNDO 12 * (c) 2001 Red Hat Inc 13 * Lockless wakeup 14 * (c) 2003 Manfred Spraul <manfred@colorfullife.com> 15 * (c) 2016 Davidlohr Bueso <dave@stgolabs.net> 16 * Further wakeup optimizations, documentation 17 * (c) 2010 Manfred Spraul <manfred@colorfullife.com> 18 * 19 * support for audit of ipc object properties and permission changes 20 * Dustin Kirkland <dustin.kirkland@us.ibm.com> 21 * 22 * namespaces support 23 * OpenVZ, SWsoft Inc. 24 * Pavel Emelianov <xemul@openvz.org> 25 * 26 * Implementation notes: (May 2010) 27 * This file implements System V semaphores. 28 * 29 * User space visible behavior: 30 * - FIFO ordering for semop() operations (just FIFO, not starvation 31 * protection) 32 * - multiple semaphore operations that alter the same semaphore in 33 * one semop() are handled. 34 * - sem_ctime (time of last semctl()) is updated in the IPC_SET, SETVAL and 35 * SETALL calls. 36 * - two Linux specific semctl() commands: SEM_STAT, SEM_INFO. 37 * - undo adjustments at process exit are limited to 0..SEMVMX. 38 * - namespace are supported. 39 * - SEMMSL, SEMMNS, SEMOPM and SEMMNI can be configured at runtine by writing 40 * to /proc/sys/kernel/sem. 41 * - statistics about the usage are reported in /proc/sysvipc/sem. 42 * 43 * Internals: 44 * - scalability: 45 * - all global variables are read-mostly. 46 * - semop() calls and semctl(RMID) are synchronized by RCU. 47 * - most operations do write operations (actually: spin_lock calls) to 48 * the per-semaphore array structure. 49 * Thus: Perfect SMP scaling between independent semaphore arrays. 50 * If multiple semaphores in one array are used, then cache line 51 * trashing on the semaphore array spinlock will limit the scaling. 52 * - semncnt and semzcnt are calculated on demand in count_semcnt() 53 * - the task that performs a successful semop() scans the list of all 54 * sleeping tasks and completes any pending operations that can be fulfilled. 55 * Semaphores are actively given to waiting tasks (necessary for FIFO). 56 * (see update_queue()) 57 * - To improve the scalability, the actual wake-up calls are performed after 58 * dropping all locks. (see wake_up_sem_queue_prepare()) 59 * - All work is done by the waker, the woken up task does not have to do 60 * anything - not even acquiring a lock or dropping a refcount. 61 * - A woken up task may not even touch the semaphore array anymore, it may 62 * have been destroyed already by a semctl(RMID). 63 * - UNDO values are stored in an array (one per process and per 64 * semaphore array, lazily allocated). For backwards compatibility, multiple 65 * modes for the UNDO variables are supported (per process, per thread) 66 * (see copy_semundo, CLONE_SYSVSEM) 67 * - There are two lists of the pending operations: a per-array list 68 * and per-semaphore list (stored in the array). This allows to achieve FIFO 69 * ordering without always scanning all pending operations. 70 * The worst-case behavior is nevertheless O(N^2) for N wakeups. 71 */ 72 73 #include <linux/slab.h> 74 #include <linux/spinlock.h> 75 #include <linux/init.h> 76 #include <linux/proc_fs.h> 77 #include <linux/time.h> 78 #include <linux/security.h> 79 #include <linux/syscalls.h> 80 #include <linux/audit.h> 81 #include <linux/capability.h> 82 #include <linux/seq_file.h> 83 #include <linux/rwsem.h> 84 #include <linux/nsproxy.h> 85 #include <linux/ipc_namespace.h> 86 #include <linux/sched/wake_q.h> 87 88 #include <linux/uaccess.h> 89 #include "util.h" 90 91 /* One semaphore structure for each semaphore in the system. */ 92 struct sem { 93 int semval; /* current value */ 94 /* 95 * PID of the process that last modified the semaphore. For 96 * Linux, specifically these are: 97 * - semop 98 * - semctl, via SETVAL and SETALL. 99 * - at task exit when performing undo adjustments (see exit_sem). 100 */ 101 struct pid *sempid; 102 spinlock_t lock; /* spinlock for fine-grained semtimedop */ 103 struct list_head pending_alter; /* pending single-sop operations */ 104 /* that alter the semaphore */ 105 struct list_head pending_const; /* pending single-sop operations */ 106 /* that do not alter the semaphore*/ 107 time_t sem_otime; /* candidate for sem_otime */ 108 } ____cacheline_aligned_in_smp; 109 110 /* One sem_array data structure for each set of semaphores in the system. */ 111 struct sem_array { 112 struct kern_ipc_perm sem_perm; /* permissions .. see ipc.h */ 113 time64_t sem_ctime; /* create/last semctl() time */ 114 struct list_head pending_alter; /* pending operations */ 115 /* that alter the array */ 116 struct list_head pending_const; /* pending complex operations */ 117 /* that do not alter semvals */ 118 struct list_head list_id; /* undo requests on this array */ 119 int sem_nsems; /* no. of semaphores in array */ 120 int complex_count; /* pending complex operations */ 121 unsigned int use_global_lock;/* >0: global lock required */ 122 123 struct sem sems[]; 124 } __randomize_layout; 125 126 /* One queue for each sleeping process in the system. */ 127 struct sem_queue { 128 struct list_head list; /* queue of pending operations */ 129 struct task_struct *sleeper; /* this process */ 130 struct sem_undo *undo; /* undo structure */ 131 struct pid *pid; /* process id of requesting process */ 132 int status; /* completion status of operation */ 133 struct sembuf *sops; /* array of pending operations */ 134 struct sembuf *blocking; /* the operation that blocked */ 135 int nsops; /* number of operations */ 136 bool alter; /* does *sops alter the array? */ 137 bool dupsop; /* sops on more than one sem_num */ 138 }; 139 140 /* Each task has a list of undo requests. They are executed automatically 141 * when the process exits. 142 */ 143 struct sem_undo { 144 struct list_head list_proc; /* per-process list: * 145 * all undos from one process 146 * rcu protected */ 147 struct rcu_head rcu; /* rcu struct for sem_undo */ 148 struct sem_undo_list *ulp; /* back ptr to sem_undo_list */ 149 struct list_head list_id; /* per semaphore array list: 150 * all undos for one array */ 151 int semid; /* semaphore set identifier */ 152 short *semadj; /* array of adjustments */ 153 /* one per semaphore */ 154 }; 155 156 /* sem_undo_list controls shared access to the list of sem_undo structures 157 * that may be shared among all a CLONE_SYSVSEM task group. 158 */ 159 struct sem_undo_list { 160 refcount_t refcnt; 161 spinlock_t lock; 162 struct list_head list_proc; 163 }; 164 165 166 #define sem_ids(ns) ((ns)->ids[IPC_SEM_IDS]) 167 168 static int newary(struct ipc_namespace *, struct ipc_params *); 169 static void freeary(struct ipc_namespace *, struct kern_ipc_perm *); 170 #ifdef CONFIG_PROC_FS 171 static int sysvipc_sem_proc_show(struct seq_file *s, void *it); 172 #endif 173 174 #define SEMMSL_FAST 256 /* 512 bytes on stack */ 175 #define SEMOPM_FAST 64 /* ~ 372 bytes on stack */ 176 177 /* 178 * Switching from the mode suitable for simple ops 179 * to the mode for complex ops is costly. Therefore: 180 * use some hysteresis 181 */ 182 #define USE_GLOBAL_LOCK_HYSTERESIS 10 183 184 /* 185 * Locking: 186 * a) global sem_lock() for read/write 187 * sem_undo.id_next, 188 * sem_array.complex_count, 189 * sem_array.pending{_alter,_const}, 190 * sem_array.sem_undo 191 * 192 * b) global or semaphore sem_lock() for read/write: 193 * sem_array.sems[i].pending_{const,alter}: 194 * 195 * c) special: 196 * sem_undo_list.list_proc: 197 * * undo_list->lock for write 198 * * rcu for read 199 * use_global_lock: 200 * * global sem_lock() for write 201 * * either local or global sem_lock() for read. 202 * 203 * Memory ordering: 204 * Most ordering is enforced by using spin_lock() and spin_unlock(). 205 * The special case is use_global_lock: 206 * Setting it from non-zero to 0 is a RELEASE, this is ensured by 207 * using smp_store_release(). 208 * Testing if it is non-zero is an ACQUIRE, this is ensured by using 209 * smp_load_acquire(). 210 * Setting it from 0 to non-zero must be ordered with regards to 211 * this smp_load_acquire(), this is guaranteed because the smp_load_acquire() 212 * is inside a spin_lock() and after a write from 0 to non-zero a 213 * spin_lock()+spin_unlock() is done. 214 */ 215 216 #define sc_semmsl sem_ctls[0] 217 #define sc_semmns sem_ctls[1] 218 #define sc_semopm sem_ctls[2] 219 #define sc_semmni sem_ctls[3] 220 221 int sem_init_ns(struct ipc_namespace *ns) 222 { 223 ns->sc_semmsl = SEMMSL; 224 ns->sc_semmns = SEMMNS; 225 ns->sc_semopm = SEMOPM; 226 ns->sc_semmni = SEMMNI; 227 ns->used_sems = 0; 228 return ipc_init_ids(&ns->ids[IPC_SEM_IDS]); 229 } 230 231 #ifdef CONFIG_IPC_NS 232 void sem_exit_ns(struct ipc_namespace *ns) 233 { 234 free_ipcs(ns, &sem_ids(ns), freeary); 235 idr_destroy(&ns->ids[IPC_SEM_IDS].ipcs_idr); 236 rhashtable_destroy(&ns->ids[IPC_SEM_IDS].key_ht); 237 } 238 #endif 239 240 int __init sem_init(void) 241 { 242 const int err = sem_init_ns(&init_ipc_ns); 243 244 ipc_init_proc_interface("sysvipc/sem", 245 " key semid perms nsems uid gid cuid cgid otime ctime\n", 246 IPC_SEM_IDS, sysvipc_sem_proc_show); 247 return err; 248 } 249 250 /** 251 * unmerge_queues - unmerge queues, if possible. 252 * @sma: semaphore array 253 * 254 * The function unmerges the wait queues if complex_count is 0. 255 * It must be called prior to dropping the global semaphore array lock. 256 */ 257 static void unmerge_queues(struct sem_array *sma) 258 { 259 struct sem_queue *q, *tq; 260 261 /* complex operations still around? */ 262 if (sma->complex_count) 263 return; 264 /* 265 * We will switch back to simple mode. 266 * Move all pending operation back into the per-semaphore 267 * queues. 268 */ 269 list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { 270 struct sem *curr; 271 curr = &sma->sems[q->sops[0].sem_num]; 272 273 list_add_tail(&q->list, &curr->pending_alter); 274 } 275 INIT_LIST_HEAD(&sma->pending_alter); 276 } 277 278 /** 279 * merge_queues - merge single semop queues into global queue 280 * @sma: semaphore array 281 * 282 * This function merges all per-semaphore queues into the global queue. 283 * It is necessary to achieve FIFO ordering for the pending single-sop 284 * operations when a multi-semop operation must sleep. 285 * Only the alter operations must be moved, the const operations can stay. 286 */ 287 static void merge_queues(struct sem_array *sma) 288 { 289 int i; 290 for (i = 0; i < sma->sem_nsems; i++) { 291 struct sem *sem = &sma->sems[i]; 292 293 list_splice_init(&sem->pending_alter, &sma->pending_alter); 294 } 295 } 296 297 static void sem_rcu_free(struct rcu_head *head) 298 { 299 struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu); 300 struct sem_array *sma = container_of(p, struct sem_array, sem_perm); 301 302 security_sem_free(&sma->sem_perm); 303 kvfree(sma); 304 } 305 306 /* 307 * Enter the mode suitable for non-simple operations: 308 * Caller must own sem_perm.lock. 309 */ 310 static void complexmode_enter(struct sem_array *sma) 311 { 312 int i; 313 struct sem *sem; 314 315 if (sma->use_global_lock > 0) { 316 /* 317 * We are already in global lock mode. 318 * Nothing to do, just reset the 319 * counter until we return to simple mode. 320 */ 321 sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; 322 return; 323 } 324 sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; 325 326 for (i = 0; i < sma->sem_nsems; i++) { 327 sem = &sma->sems[i]; 328 spin_lock(&sem->lock); 329 spin_unlock(&sem->lock); 330 } 331 } 332 333 /* 334 * Try to leave the mode that disallows simple operations: 335 * Caller must own sem_perm.lock. 336 */ 337 static void complexmode_tryleave(struct sem_array *sma) 338 { 339 if (sma->complex_count) { 340 /* Complex ops are sleeping. 341 * We must stay in complex mode 342 */ 343 return; 344 } 345 if (sma->use_global_lock == 1) { 346 /* 347 * Immediately after setting use_global_lock to 0, 348 * a simple op can start. Thus: all memory writes 349 * performed by the current operation must be visible 350 * before we set use_global_lock to 0. 351 */ 352 smp_store_release(&sma->use_global_lock, 0); 353 } else { 354 sma->use_global_lock--; 355 } 356 } 357 358 #define SEM_GLOBAL_LOCK (-1) 359 /* 360 * If the request contains only one semaphore operation, and there are 361 * no complex transactions pending, lock only the semaphore involved. 362 * Otherwise, lock the entire semaphore array, since we either have 363 * multiple semaphores in our own semops, or we need to look at 364 * semaphores from other pending complex operations. 365 */ 366 static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, 367 int nsops) 368 { 369 struct sem *sem; 370 371 if (nsops != 1) { 372 /* Complex operation - acquire a full lock */ 373 ipc_lock_object(&sma->sem_perm); 374 375 /* Prevent parallel simple ops */ 376 complexmode_enter(sma); 377 return SEM_GLOBAL_LOCK; 378 } 379 380 /* 381 * Only one semaphore affected - try to optimize locking. 382 * Optimized locking is possible if no complex operation 383 * is either enqueued or processed right now. 384 * 385 * Both facts are tracked by use_global_mode. 386 */ 387 sem = &sma->sems[sops->sem_num]; 388 389 /* 390 * Initial check for use_global_lock. Just an optimization, 391 * no locking, no memory barrier. 392 */ 393 if (!sma->use_global_lock) { 394 /* 395 * It appears that no complex operation is around. 396 * Acquire the per-semaphore lock. 397 */ 398 spin_lock(&sem->lock); 399 400 /* pairs with smp_store_release() */ 401 if (!smp_load_acquire(&sma->use_global_lock)) { 402 /* fast path successful! */ 403 return sops->sem_num; 404 } 405 spin_unlock(&sem->lock); 406 } 407 408 /* slow path: acquire the full lock */ 409 ipc_lock_object(&sma->sem_perm); 410 411 if (sma->use_global_lock == 0) { 412 /* 413 * The use_global_lock mode ended while we waited for 414 * sma->sem_perm.lock. Thus we must switch to locking 415 * with sem->lock. 416 * Unlike in the fast path, there is no need to recheck 417 * sma->use_global_lock after we have acquired sem->lock: 418 * We own sma->sem_perm.lock, thus use_global_lock cannot 419 * change. 420 */ 421 spin_lock(&sem->lock); 422 423 ipc_unlock_object(&sma->sem_perm); 424 return sops->sem_num; 425 } else { 426 /* 427 * Not a false alarm, thus continue to use the global lock 428 * mode. No need for complexmode_enter(), this was done by 429 * the caller that has set use_global_mode to non-zero. 430 */ 431 return SEM_GLOBAL_LOCK; 432 } 433 } 434 435 static inline void sem_unlock(struct sem_array *sma, int locknum) 436 { 437 if (locknum == SEM_GLOBAL_LOCK) { 438 unmerge_queues(sma); 439 complexmode_tryleave(sma); 440 ipc_unlock_object(&sma->sem_perm); 441 } else { 442 struct sem *sem = &sma->sems[locknum]; 443 spin_unlock(&sem->lock); 444 } 445 } 446 447 /* 448 * sem_lock_(check_) routines are called in the paths where the rwsem 449 * is not held. 450 * 451 * The caller holds the RCU read lock. 452 */ 453 static inline struct sem_array *sem_obtain_object(struct ipc_namespace *ns, int id) 454 { 455 struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&sem_ids(ns), id); 456 457 if (IS_ERR(ipcp)) 458 return ERR_CAST(ipcp); 459 460 return container_of(ipcp, struct sem_array, sem_perm); 461 } 462 463 static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns, 464 int id) 465 { 466 struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&sem_ids(ns), id); 467 468 if (IS_ERR(ipcp)) 469 return ERR_CAST(ipcp); 470 471 return container_of(ipcp, struct sem_array, sem_perm); 472 } 473 474 static inline void sem_lock_and_putref(struct sem_array *sma) 475 { 476 sem_lock(sma, NULL, -1); 477 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 478 } 479 480 static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) 481 { 482 ipc_rmid(&sem_ids(ns), &s->sem_perm); 483 } 484 485 static struct sem_array *sem_alloc(size_t nsems) 486 { 487 struct sem_array *sma; 488 size_t size; 489 490 if (nsems > (INT_MAX - sizeof(*sma)) / sizeof(sma->sems[0])) 491 return NULL; 492 493 size = sizeof(*sma) + nsems * sizeof(sma->sems[0]); 494 sma = kvmalloc(size, GFP_KERNEL); 495 if (unlikely(!sma)) 496 return NULL; 497 498 memset(sma, 0, size); 499 500 return sma; 501 } 502 503 /** 504 * newary - Create a new semaphore set 505 * @ns: namespace 506 * @params: ptr to the structure that contains key, semflg and nsems 507 * 508 * Called with sem_ids.rwsem held (as a writer) 509 */ 510 static int newary(struct ipc_namespace *ns, struct ipc_params *params) 511 { 512 int retval; 513 struct sem_array *sma; 514 key_t key = params->key; 515 int nsems = params->u.nsems; 516 int semflg = params->flg; 517 int i; 518 519 if (!nsems) 520 return -EINVAL; 521 if (ns->used_sems + nsems > ns->sc_semmns) 522 return -ENOSPC; 523 524 sma = sem_alloc(nsems); 525 if (!sma) 526 return -ENOMEM; 527 528 sma->sem_perm.mode = (semflg & S_IRWXUGO); 529 sma->sem_perm.key = key; 530 531 sma->sem_perm.security = NULL; 532 retval = security_sem_alloc(&sma->sem_perm); 533 if (retval) { 534 kvfree(sma); 535 return retval; 536 } 537 538 for (i = 0; i < nsems; i++) { 539 INIT_LIST_HEAD(&sma->sems[i].pending_alter); 540 INIT_LIST_HEAD(&sma->sems[i].pending_const); 541 spin_lock_init(&sma->sems[i].lock); 542 } 543 544 sma->complex_count = 0; 545 sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; 546 INIT_LIST_HEAD(&sma->pending_alter); 547 INIT_LIST_HEAD(&sma->pending_const); 548 INIT_LIST_HEAD(&sma->list_id); 549 sma->sem_nsems = nsems; 550 sma->sem_ctime = ktime_get_real_seconds(); 551 552 /* ipc_addid() locks sma upon success. */ 553 retval = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); 554 if (retval < 0) { 555 call_rcu(&sma->sem_perm.rcu, sem_rcu_free); 556 return retval; 557 } 558 ns->used_sems += nsems; 559 560 sem_unlock(sma, -1); 561 rcu_read_unlock(); 562 563 return sma->sem_perm.id; 564 } 565 566 567 /* 568 * Called with sem_ids.rwsem and ipcp locked. 569 */ 570 static inline int sem_more_checks(struct kern_ipc_perm *ipcp, 571 struct ipc_params *params) 572 { 573 struct sem_array *sma; 574 575 sma = container_of(ipcp, struct sem_array, sem_perm); 576 if (params->u.nsems > sma->sem_nsems) 577 return -EINVAL; 578 579 return 0; 580 } 581 582 long ksys_semget(key_t key, int nsems, int semflg) 583 { 584 struct ipc_namespace *ns; 585 static const struct ipc_ops sem_ops = { 586 .getnew = newary, 587 .associate = security_sem_associate, 588 .more_checks = sem_more_checks, 589 }; 590 struct ipc_params sem_params; 591 592 ns = current->nsproxy->ipc_ns; 593 594 if (nsems < 0 || nsems > ns->sc_semmsl) 595 return -EINVAL; 596 597 sem_params.key = key; 598 sem_params.flg = semflg; 599 sem_params.u.nsems = nsems; 600 601 return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params); 602 } 603 604 SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg) 605 { 606 return ksys_semget(key, nsems, semflg); 607 } 608 609 /** 610 * perform_atomic_semop[_slow] - Attempt to perform semaphore 611 * operations on a given array. 612 * @sma: semaphore array 613 * @q: struct sem_queue that describes the operation 614 * 615 * Caller blocking are as follows, based the value 616 * indicated by the semaphore operation (sem_op): 617 * 618 * (1) >0 never blocks. 619 * (2) 0 (wait-for-zero operation): semval is non-zero. 620 * (3) <0 attempting to decrement semval to a value smaller than zero. 621 * 622 * Returns 0 if the operation was possible. 623 * Returns 1 if the operation is impossible, the caller must sleep. 624 * Returns <0 for error codes. 625 */ 626 static int perform_atomic_semop_slow(struct sem_array *sma, struct sem_queue *q) 627 { 628 int result, sem_op, nsops; 629 struct pid *pid; 630 struct sembuf *sop; 631 struct sem *curr; 632 struct sembuf *sops; 633 struct sem_undo *un; 634 635 sops = q->sops; 636 nsops = q->nsops; 637 un = q->undo; 638 639 for (sop = sops; sop < sops + nsops; sop++) { 640 curr = &sma->sems[sop->sem_num]; 641 sem_op = sop->sem_op; 642 result = curr->semval; 643 644 if (!sem_op && result) 645 goto would_block; 646 647 result += sem_op; 648 if (result < 0) 649 goto would_block; 650 if (result > SEMVMX) 651 goto out_of_range; 652 653 if (sop->sem_flg & SEM_UNDO) { 654 int undo = un->semadj[sop->sem_num] - sem_op; 655 /* Exceeding the undo range is an error. */ 656 if (undo < (-SEMAEM - 1) || undo > SEMAEM) 657 goto out_of_range; 658 un->semadj[sop->sem_num] = undo; 659 } 660 661 curr->semval = result; 662 } 663 664 sop--; 665 pid = q->pid; 666 while (sop >= sops) { 667 ipc_update_pid(&sma->sems[sop->sem_num].sempid, pid); 668 sop--; 669 } 670 671 return 0; 672 673 out_of_range: 674 result = -ERANGE; 675 goto undo; 676 677 would_block: 678 q->blocking = sop; 679 680 if (sop->sem_flg & IPC_NOWAIT) 681 result = -EAGAIN; 682 else 683 result = 1; 684 685 undo: 686 sop--; 687 while (sop >= sops) { 688 sem_op = sop->sem_op; 689 sma->sems[sop->sem_num].semval -= sem_op; 690 if (sop->sem_flg & SEM_UNDO) 691 un->semadj[sop->sem_num] += sem_op; 692 sop--; 693 } 694 695 return result; 696 } 697 698 static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q) 699 { 700 int result, sem_op, nsops; 701 struct sembuf *sop; 702 struct sem *curr; 703 struct sembuf *sops; 704 struct sem_undo *un; 705 706 sops = q->sops; 707 nsops = q->nsops; 708 un = q->undo; 709 710 if (unlikely(q->dupsop)) 711 return perform_atomic_semop_slow(sma, q); 712 713 /* 714 * We scan the semaphore set twice, first to ensure that the entire 715 * operation can succeed, therefore avoiding any pointless writes 716 * to shared memory and having to undo such changes in order to block 717 * until the operations can go through. 718 */ 719 for (sop = sops; sop < sops + nsops; sop++) { 720 curr = &sma->sems[sop->sem_num]; 721 sem_op = sop->sem_op; 722 result = curr->semval; 723 724 if (!sem_op && result) 725 goto would_block; /* wait-for-zero */ 726 727 result += sem_op; 728 if (result < 0) 729 goto would_block; 730 731 if (result > SEMVMX) 732 return -ERANGE; 733 734 if (sop->sem_flg & SEM_UNDO) { 735 int undo = un->semadj[sop->sem_num] - sem_op; 736 737 /* Exceeding the undo range is an error. */ 738 if (undo < (-SEMAEM - 1) || undo > SEMAEM) 739 return -ERANGE; 740 } 741 } 742 743 for (sop = sops; sop < sops + nsops; sop++) { 744 curr = &sma->sems[sop->sem_num]; 745 sem_op = sop->sem_op; 746 result = curr->semval; 747 748 if (sop->sem_flg & SEM_UNDO) { 749 int undo = un->semadj[sop->sem_num] - sem_op; 750 751 un->semadj[sop->sem_num] = undo; 752 } 753 curr->semval += sem_op; 754 ipc_update_pid(&curr->sempid, q->pid); 755 } 756 757 return 0; 758 759 would_block: 760 q->blocking = sop; 761 return sop->sem_flg & IPC_NOWAIT ? -EAGAIN : 1; 762 } 763 764 static inline void wake_up_sem_queue_prepare(struct sem_queue *q, int error, 765 struct wake_q_head *wake_q) 766 { 767 wake_q_add(wake_q, q->sleeper); 768 /* 769 * Rely on the above implicit barrier, such that we can 770 * ensure that we hold reference to the task before setting 771 * q->status. Otherwise we could race with do_exit if the 772 * task is awoken by an external event before calling 773 * wake_up_process(). 774 */ 775 WRITE_ONCE(q->status, error); 776 } 777 778 static void unlink_queue(struct sem_array *sma, struct sem_queue *q) 779 { 780 list_del(&q->list); 781 if (q->nsops > 1) 782 sma->complex_count--; 783 } 784 785 /** check_restart(sma, q) 786 * @sma: semaphore array 787 * @q: the operation that just completed 788 * 789 * update_queue is O(N^2) when it restarts scanning the whole queue of 790 * waiting operations. Therefore this function checks if the restart is 791 * really necessary. It is called after a previously waiting operation 792 * modified the array. 793 * Note that wait-for-zero operations are handled without restart. 794 */ 795 static inline int check_restart(struct sem_array *sma, struct sem_queue *q) 796 { 797 /* pending complex alter operations are too difficult to analyse */ 798 if (!list_empty(&sma->pending_alter)) 799 return 1; 800 801 /* we were a sleeping complex operation. Too difficult */ 802 if (q->nsops > 1) 803 return 1; 804 805 /* It is impossible that someone waits for the new value: 806 * - complex operations always restart. 807 * - wait-for-zero are handled seperately. 808 * - q is a previously sleeping simple operation that 809 * altered the array. It must be a decrement, because 810 * simple increments never sleep. 811 * - If there are older (higher priority) decrements 812 * in the queue, then they have observed the original 813 * semval value and couldn't proceed. The operation 814 * decremented to value - thus they won't proceed either. 815 */ 816 return 0; 817 } 818 819 /** 820 * wake_const_ops - wake up non-alter tasks 821 * @sma: semaphore array. 822 * @semnum: semaphore that was modified. 823 * @wake_q: lockless wake-queue head. 824 * 825 * wake_const_ops must be called after a semaphore in a semaphore array 826 * was set to 0. If complex const operations are pending, wake_const_ops must 827 * be called with semnum = -1, as well as with the number of each modified 828 * semaphore. 829 * The tasks that must be woken up are added to @wake_q. The return code 830 * is stored in q->pid. 831 * The function returns 1 if at least one operation was completed successfully. 832 */ 833 static int wake_const_ops(struct sem_array *sma, int semnum, 834 struct wake_q_head *wake_q) 835 { 836 struct sem_queue *q, *tmp; 837 struct list_head *pending_list; 838 int semop_completed = 0; 839 840 if (semnum == -1) 841 pending_list = &sma->pending_const; 842 else 843 pending_list = &sma->sems[semnum].pending_const; 844 845 list_for_each_entry_safe(q, tmp, pending_list, list) { 846 int error = perform_atomic_semop(sma, q); 847 848 if (error > 0) 849 continue; 850 /* operation completed, remove from queue & wakeup */ 851 unlink_queue(sma, q); 852 853 wake_up_sem_queue_prepare(q, error, wake_q); 854 if (error == 0) 855 semop_completed = 1; 856 } 857 858 return semop_completed; 859 } 860 861 /** 862 * do_smart_wakeup_zero - wakeup all wait for zero tasks 863 * @sma: semaphore array 864 * @sops: operations that were performed 865 * @nsops: number of operations 866 * @wake_q: lockless wake-queue head 867 * 868 * Checks all required queue for wait-for-zero operations, based 869 * on the actual changes that were performed on the semaphore array. 870 * The function returns 1 if at least one operation was completed successfully. 871 */ 872 static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops, 873 int nsops, struct wake_q_head *wake_q) 874 { 875 int i; 876 int semop_completed = 0; 877 int got_zero = 0; 878 879 /* first: the per-semaphore queues, if known */ 880 if (sops) { 881 for (i = 0; i < nsops; i++) { 882 int num = sops[i].sem_num; 883 884 if (sma->sems[num].semval == 0) { 885 got_zero = 1; 886 semop_completed |= wake_const_ops(sma, num, wake_q); 887 } 888 } 889 } else { 890 /* 891 * No sops means modified semaphores not known. 892 * Assume all were changed. 893 */ 894 for (i = 0; i < sma->sem_nsems; i++) { 895 if (sma->sems[i].semval == 0) { 896 got_zero = 1; 897 semop_completed |= wake_const_ops(sma, i, wake_q); 898 } 899 } 900 } 901 /* 902 * If one of the modified semaphores got 0, 903 * then check the global queue, too. 904 */ 905 if (got_zero) 906 semop_completed |= wake_const_ops(sma, -1, wake_q); 907 908 return semop_completed; 909 } 910 911 912 /** 913 * update_queue - look for tasks that can be completed. 914 * @sma: semaphore array. 915 * @semnum: semaphore that was modified. 916 * @wake_q: lockless wake-queue head. 917 * 918 * update_queue must be called after a semaphore in a semaphore array 919 * was modified. If multiple semaphores were modified, update_queue must 920 * be called with semnum = -1, as well as with the number of each modified 921 * semaphore. 922 * The tasks that must be woken up are added to @wake_q. The return code 923 * is stored in q->pid. 924 * The function internally checks if const operations can now succeed. 925 * 926 * The function return 1 if at least one semop was completed successfully. 927 */ 928 static int update_queue(struct sem_array *sma, int semnum, struct wake_q_head *wake_q) 929 { 930 struct sem_queue *q, *tmp; 931 struct list_head *pending_list; 932 int semop_completed = 0; 933 934 if (semnum == -1) 935 pending_list = &sma->pending_alter; 936 else 937 pending_list = &sma->sems[semnum].pending_alter; 938 939 again: 940 list_for_each_entry_safe(q, tmp, pending_list, list) { 941 int error, restart; 942 943 /* If we are scanning the single sop, per-semaphore list of 944 * one semaphore and that semaphore is 0, then it is not 945 * necessary to scan further: simple increments 946 * that affect only one entry succeed immediately and cannot 947 * be in the per semaphore pending queue, and decrements 948 * cannot be successful if the value is already 0. 949 */ 950 if (semnum != -1 && sma->sems[semnum].semval == 0) 951 break; 952 953 error = perform_atomic_semop(sma, q); 954 955 /* Does q->sleeper still need to sleep? */ 956 if (error > 0) 957 continue; 958 959 unlink_queue(sma, q); 960 961 if (error) { 962 restart = 0; 963 } else { 964 semop_completed = 1; 965 do_smart_wakeup_zero(sma, q->sops, q->nsops, wake_q); 966 restart = check_restart(sma, q); 967 } 968 969 wake_up_sem_queue_prepare(q, error, wake_q); 970 if (restart) 971 goto again; 972 } 973 return semop_completed; 974 } 975 976 /** 977 * set_semotime - set sem_otime 978 * @sma: semaphore array 979 * @sops: operations that modified the array, may be NULL 980 * 981 * sem_otime is replicated to avoid cache line trashing. 982 * This function sets one instance to the current time. 983 */ 984 static void set_semotime(struct sem_array *sma, struct sembuf *sops) 985 { 986 if (sops == NULL) { 987 sma->sems[0].sem_otime = get_seconds(); 988 } else { 989 sma->sems[sops[0].sem_num].sem_otime = 990 get_seconds(); 991 } 992 } 993 994 /** 995 * do_smart_update - optimized update_queue 996 * @sma: semaphore array 997 * @sops: operations that were performed 998 * @nsops: number of operations 999 * @otime: force setting otime 1000 * @wake_q: lockless wake-queue head 1001 * 1002 * do_smart_update() does the required calls to update_queue and wakeup_zero, 1003 * based on the actual changes that were performed on the semaphore array. 1004 * Note that the function does not do the actual wake-up: the caller is 1005 * responsible for calling wake_up_q(). 1006 * It is safe to perform this call after dropping all locks. 1007 */ 1008 static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsops, 1009 int otime, struct wake_q_head *wake_q) 1010 { 1011 int i; 1012 1013 otime |= do_smart_wakeup_zero(sma, sops, nsops, wake_q); 1014 1015 if (!list_empty(&sma->pending_alter)) { 1016 /* semaphore array uses the global queue - just process it. */ 1017 otime |= update_queue(sma, -1, wake_q); 1018 } else { 1019 if (!sops) { 1020 /* 1021 * No sops, thus the modified semaphores are not 1022 * known. Check all. 1023 */ 1024 for (i = 0; i < sma->sem_nsems; i++) 1025 otime |= update_queue(sma, i, wake_q); 1026 } else { 1027 /* 1028 * Check the semaphores that were increased: 1029 * - No complex ops, thus all sleeping ops are 1030 * decrease. 1031 * - if we decreased the value, then any sleeping 1032 * semaphore ops wont be able to run: If the 1033 * previous value was too small, then the new 1034 * value will be too small, too. 1035 */ 1036 for (i = 0; i < nsops; i++) { 1037 if (sops[i].sem_op > 0) { 1038 otime |= update_queue(sma, 1039 sops[i].sem_num, wake_q); 1040 } 1041 } 1042 } 1043 } 1044 if (otime) 1045 set_semotime(sma, sops); 1046 } 1047 1048 /* 1049 * check_qop: Test if a queued operation sleeps on the semaphore semnum 1050 */ 1051 static int check_qop(struct sem_array *sma, int semnum, struct sem_queue *q, 1052 bool count_zero) 1053 { 1054 struct sembuf *sop = q->blocking; 1055 1056 /* 1057 * Linux always (since 0.99.10) reported a task as sleeping on all 1058 * semaphores. This violates SUS, therefore it was changed to the 1059 * standard compliant behavior. 1060 * Give the administrators a chance to notice that an application 1061 * might misbehave because it relies on the Linux behavior. 1062 */ 1063 pr_info_once("semctl(GETNCNT/GETZCNT) is since 3.16 Single Unix Specification compliant.\n" 1064 "The task %s (%d) triggered the difference, watch for misbehavior.\n", 1065 current->comm, task_pid_nr(current)); 1066 1067 if (sop->sem_num != semnum) 1068 return 0; 1069 1070 if (count_zero && sop->sem_op == 0) 1071 return 1; 1072 if (!count_zero && sop->sem_op < 0) 1073 return 1; 1074 1075 return 0; 1076 } 1077 1078 /* The following counts are associated to each semaphore: 1079 * semncnt number of tasks waiting on semval being nonzero 1080 * semzcnt number of tasks waiting on semval being zero 1081 * 1082 * Per definition, a task waits only on the semaphore of the first semop 1083 * that cannot proceed, even if additional operation would block, too. 1084 */ 1085 static int count_semcnt(struct sem_array *sma, ushort semnum, 1086 bool count_zero) 1087 { 1088 struct list_head *l; 1089 struct sem_queue *q; 1090 int semcnt; 1091 1092 semcnt = 0; 1093 /* First: check the simple operations. They are easy to evaluate */ 1094 if (count_zero) 1095 l = &sma->sems[semnum].pending_const; 1096 else 1097 l = &sma->sems[semnum].pending_alter; 1098 1099 list_for_each_entry(q, l, list) { 1100 /* all task on a per-semaphore list sleep on exactly 1101 * that semaphore 1102 */ 1103 semcnt++; 1104 } 1105 1106 /* Then: check the complex operations. */ 1107 list_for_each_entry(q, &sma->pending_alter, list) { 1108 semcnt += check_qop(sma, semnum, q, count_zero); 1109 } 1110 if (count_zero) { 1111 list_for_each_entry(q, &sma->pending_const, list) { 1112 semcnt += check_qop(sma, semnum, q, count_zero); 1113 } 1114 } 1115 return semcnt; 1116 } 1117 1118 /* Free a semaphore set. freeary() is called with sem_ids.rwsem locked 1119 * as a writer and the spinlock for this semaphore set hold. sem_ids.rwsem 1120 * remains locked on exit. 1121 */ 1122 static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 1123 { 1124 struct sem_undo *un, *tu; 1125 struct sem_queue *q, *tq; 1126 struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); 1127 int i; 1128 DEFINE_WAKE_Q(wake_q); 1129 1130 /* Free the existing undo structures for this semaphore set. */ 1131 ipc_assert_locked_object(&sma->sem_perm); 1132 list_for_each_entry_safe(un, tu, &sma->list_id, list_id) { 1133 list_del(&un->list_id); 1134 spin_lock(&un->ulp->lock); 1135 un->semid = -1; 1136 list_del_rcu(&un->list_proc); 1137 spin_unlock(&un->ulp->lock); 1138 kfree_rcu(un, rcu); 1139 } 1140 1141 /* Wake up all pending processes and let them fail with EIDRM. */ 1142 list_for_each_entry_safe(q, tq, &sma->pending_const, list) { 1143 unlink_queue(sma, q); 1144 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1145 } 1146 1147 list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { 1148 unlink_queue(sma, q); 1149 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1150 } 1151 for (i = 0; i < sma->sem_nsems; i++) { 1152 struct sem *sem = &sma->sems[i]; 1153 list_for_each_entry_safe(q, tq, &sem->pending_const, list) { 1154 unlink_queue(sma, q); 1155 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1156 } 1157 list_for_each_entry_safe(q, tq, &sem->pending_alter, list) { 1158 unlink_queue(sma, q); 1159 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1160 } 1161 ipc_update_pid(&sem->sempid, NULL); 1162 } 1163 1164 /* Remove the semaphore set from the IDR */ 1165 sem_rmid(ns, sma); 1166 sem_unlock(sma, -1); 1167 rcu_read_unlock(); 1168 1169 wake_up_q(&wake_q); 1170 ns->used_sems -= sma->sem_nsems; 1171 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1172 } 1173 1174 static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) 1175 { 1176 switch (version) { 1177 case IPC_64: 1178 return copy_to_user(buf, in, sizeof(*in)); 1179 case IPC_OLD: 1180 { 1181 struct semid_ds out; 1182 1183 memset(&out, 0, sizeof(out)); 1184 1185 ipc64_perm_to_ipc_perm(&in->sem_perm, &out.sem_perm); 1186 1187 out.sem_otime = in->sem_otime; 1188 out.sem_ctime = in->sem_ctime; 1189 out.sem_nsems = in->sem_nsems; 1190 1191 return copy_to_user(buf, &out, sizeof(out)); 1192 } 1193 default: 1194 return -EINVAL; 1195 } 1196 } 1197 1198 static time64_t get_semotime(struct sem_array *sma) 1199 { 1200 int i; 1201 time64_t res; 1202 1203 res = sma->sems[0].sem_otime; 1204 for (i = 1; i < sma->sem_nsems; i++) { 1205 time64_t to = sma->sems[i].sem_otime; 1206 1207 if (to > res) 1208 res = to; 1209 } 1210 return res; 1211 } 1212 1213 static int semctl_stat(struct ipc_namespace *ns, int semid, 1214 int cmd, struct semid64_ds *semid64) 1215 { 1216 struct sem_array *sma; 1217 int id = 0; 1218 int err; 1219 1220 memset(semid64, 0, sizeof(*semid64)); 1221 1222 rcu_read_lock(); 1223 if (cmd == SEM_STAT || cmd == SEM_STAT_ANY) { 1224 sma = sem_obtain_object(ns, semid); 1225 if (IS_ERR(sma)) { 1226 err = PTR_ERR(sma); 1227 goto out_unlock; 1228 } 1229 id = sma->sem_perm.id; 1230 } else { /* IPC_STAT */ 1231 sma = sem_obtain_object_check(ns, semid); 1232 if (IS_ERR(sma)) { 1233 err = PTR_ERR(sma); 1234 goto out_unlock; 1235 } 1236 } 1237 1238 /* see comment for SHM_STAT_ANY */ 1239 if (cmd == SEM_STAT_ANY) 1240 audit_ipc_obj(&sma->sem_perm); 1241 else { 1242 err = -EACCES; 1243 if (ipcperms(ns, &sma->sem_perm, S_IRUGO)) 1244 goto out_unlock; 1245 } 1246 1247 err = security_sem_semctl(&sma->sem_perm, cmd); 1248 if (err) 1249 goto out_unlock; 1250 1251 ipc_lock_object(&sma->sem_perm); 1252 1253 if (!ipc_valid_object(&sma->sem_perm)) { 1254 ipc_unlock_object(&sma->sem_perm); 1255 err = -EIDRM; 1256 goto out_unlock; 1257 } 1258 1259 kernel_to_ipc64_perm(&sma->sem_perm, &semid64->sem_perm); 1260 semid64->sem_otime = get_semotime(sma); 1261 semid64->sem_ctime = sma->sem_ctime; 1262 semid64->sem_nsems = sma->sem_nsems; 1263 1264 ipc_unlock_object(&sma->sem_perm); 1265 rcu_read_unlock(); 1266 return id; 1267 1268 out_unlock: 1269 rcu_read_unlock(); 1270 return err; 1271 } 1272 1273 static int semctl_info(struct ipc_namespace *ns, int semid, 1274 int cmd, void __user *p) 1275 { 1276 struct seminfo seminfo; 1277 int max_id; 1278 int err; 1279 1280 err = security_sem_semctl(NULL, cmd); 1281 if (err) 1282 return err; 1283 1284 memset(&seminfo, 0, sizeof(seminfo)); 1285 seminfo.semmni = ns->sc_semmni; 1286 seminfo.semmns = ns->sc_semmns; 1287 seminfo.semmsl = ns->sc_semmsl; 1288 seminfo.semopm = ns->sc_semopm; 1289 seminfo.semvmx = SEMVMX; 1290 seminfo.semmnu = SEMMNU; 1291 seminfo.semmap = SEMMAP; 1292 seminfo.semume = SEMUME; 1293 down_read(&sem_ids(ns).rwsem); 1294 if (cmd == SEM_INFO) { 1295 seminfo.semusz = sem_ids(ns).in_use; 1296 seminfo.semaem = ns->used_sems; 1297 } else { 1298 seminfo.semusz = SEMUSZ; 1299 seminfo.semaem = SEMAEM; 1300 } 1301 max_id = ipc_get_maxid(&sem_ids(ns)); 1302 up_read(&sem_ids(ns).rwsem); 1303 if (copy_to_user(p, &seminfo, sizeof(struct seminfo))) 1304 return -EFAULT; 1305 return (max_id < 0) ? 0 : max_id; 1306 } 1307 1308 static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, 1309 int val) 1310 { 1311 struct sem_undo *un; 1312 struct sem_array *sma; 1313 struct sem *curr; 1314 int err; 1315 DEFINE_WAKE_Q(wake_q); 1316 1317 if (val > SEMVMX || val < 0) 1318 return -ERANGE; 1319 1320 rcu_read_lock(); 1321 sma = sem_obtain_object_check(ns, semid); 1322 if (IS_ERR(sma)) { 1323 rcu_read_unlock(); 1324 return PTR_ERR(sma); 1325 } 1326 1327 if (semnum < 0 || semnum >= sma->sem_nsems) { 1328 rcu_read_unlock(); 1329 return -EINVAL; 1330 } 1331 1332 1333 if (ipcperms(ns, &sma->sem_perm, S_IWUGO)) { 1334 rcu_read_unlock(); 1335 return -EACCES; 1336 } 1337 1338 err = security_sem_semctl(&sma->sem_perm, SETVAL); 1339 if (err) { 1340 rcu_read_unlock(); 1341 return -EACCES; 1342 } 1343 1344 sem_lock(sma, NULL, -1); 1345 1346 if (!ipc_valid_object(&sma->sem_perm)) { 1347 sem_unlock(sma, -1); 1348 rcu_read_unlock(); 1349 return -EIDRM; 1350 } 1351 1352 curr = &sma->sems[semnum]; 1353 1354 ipc_assert_locked_object(&sma->sem_perm); 1355 list_for_each_entry(un, &sma->list_id, list_id) 1356 un->semadj[semnum] = 0; 1357 1358 curr->semval = val; 1359 ipc_update_pid(&curr->sempid, task_tgid(current)); 1360 sma->sem_ctime = ktime_get_real_seconds(); 1361 /* maybe some queued-up processes were waiting for this */ 1362 do_smart_update(sma, NULL, 0, 0, &wake_q); 1363 sem_unlock(sma, -1); 1364 rcu_read_unlock(); 1365 wake_up_q(&wake_q); 1366 return 0; 1367 } 1368 1369 static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, 1370 int cmd, void __user *p) 1371 { 1372 struct sem_array *sma; 1373 struct sem *curr; 1374 int err, nsems; 1375 ushort fast_sem_io[SEMMSL_FAST]; 1376 ushort *sem_io = fast_sem_io; 1377 DEFINE_WAKE_Q(wake_q); 1378 1379 rcu_read_lock(); 1380 sma = sem_obtain_object_check(ns, semid); 1381 if (IS_ERR(sma)) { 1382 rcu_read_unlock(); 1383 return PTR_ERR(sma); 1384 } 1385 1386 nsems = sma->sem_nsems; 1387 1388 err = -EACCES; 1389 if (ipcperms(ns, &sma->sem_perm, cmd == SETALL ? S_IWUGO : S_IRUGO)) 1390 goto out_rcu_wakeup; 1391 1392 err = security_sem_semctl(&sma->sem_perm, cmd); 1393 if (err) 1394 goto out_rcu_wakeup; 1395 1396 err = -EACCES; 1397 switch (cmd) { 1398 case GETALL: 1399 { 1400 ushort __user *array = p; 1401 int i; 1402 1403 sem_lock(sma, NULL, -1); 1404 if (!ipc_valid_object(&sma->sem_perm)) { 1405 err = -EIDRM; 1406 goto out_unlock; 1407 } 1408 if (nsems > SEMMSL_FAST) { 1409 if (!ipc_rcu_getref(&sma->sem_perm)) { 1410 err = -EIDRM; 1411 goto out_unlock; 1412 } 1413 sem_unlock(sma, -1); 1414 rcu_read_unlock(); 1415 sem_io = kvmalloc_array(nsems, sizeof(ushort), 1416 GFP_KERNEL); 1417 if (sem_io == NULL) { 1418 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1419 return -ENOMEM; 1420 } 1421 1422 rcu_read_lock(); 1423 sem_lock_and_putref(sma); 1424 if (!ipc_valid_object(&sma->sem_perm)) { 1425 err = -EIDRM; 1426 goto out_unlock; 1427 } 1428 } 1429 for (i = 0; i < sma->sem_nsems; i++) 1430 sem_io[i] = sma->sems[i].semval; 1431 sem_unlock(sma, -1); 1432 rcu_read_unlock(); 1433 err = 0; 1434 if (copy_to_user(array, sem_io, nsems*sizeof(ushort))) 1435 err = -EFAULT; 1436 goto out_free; 1437 } 1438 case SETALL: 1439 { 1440 int i; 1441 struct sem_undo *un; 1442 1443 if (!ipc_rcu_getref(&sma->sem_perm)) { 1444 err = -EIDRM; 1445 goto out_rcu_wakeup; 1446 } 1447 rcu_read_unlock(); 1448 1449 if (nsems > SEMMSL_FAST) { 1450 sem_io = kvmalloc_array(nsems, sizeof(ushort), 1451 GFP_KERNEL); 1452 if (sem_io == NULL) { 1453 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1454 return -ENOMEM; 1455 } 1456 } 1457 1458 if (copy_from_user(sem_io, p, nsems*sizeof(ushort))) { 1459 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1460 err = -EFAULT; 1461 goto out_free; 1462 } 1463 1464 for (i = 0; i < nsems; i++) { 1465 if (sem_io[i] > SEMVMX) { 1466 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1467 err = -ERANGE; 1468 goto out_free; 1469 } 1470 } 1471 rcu_read_lock(); 1472 sem_lock_and_putref(sma); 1473 if (!ipc_valid_object(&sma->sem_perm)) { 1474 err = -EIDRM; 1475 goto out_unlock; 1476 } 1477 1478 for (i = 0; i < nsems; i++) { 1479 sma->sems[i].semval = sem_io[i]; 1480 ipc_update_pid(&sma->sems[i].sempid, task_tgid(current)); 1481 } 1482 1483 ipc_assert_locked_object(&sma->sem_perm); 1484 list_for_each_entry(un, &sma->list_id, list_id) { 1485 for (i = 0; i < nsems; i++) 1486 un->semadj[i] = 0; 1487 } 1488 sma->sem_ctime = ktime_get_real_seconds(); 1489 /* maybe some queued-up processes were waiting for this */ 1490 do_smart_update(sma, NULL, 0, 0, &wake_q); 1491 err = 0; 1492 goto out_unlock; 1493 } 1494 /* GETVAL, GETPID, GETNCTN, GETZCNT: fall-through */ 1495 } 1496 err = -EINVAL; 1497 if (semnum < 0 || semnum >= nsems) 1498 goto out_rcu_wakeup; 1499 1500 sem_lock(sma, NULL, -1); 1501 if (!ipc_valid_object(&sma->sem_perm)) { 1502 err = -EIDRM; 1503 goto out_unlock; 1504 } 1505 curr = &sma->sems[semnum]; 1506 1507 switch (cmd) { 1508 case GETVAL: 1509 err = curr->semval; 1510 goto out_unlock; 1511 case GETPID: 1512 err = pid_vnr(curr->sempid); 1513 goto out_unlock; 1514 case GETNCNT: 1515 err = count_semcnt(sma, semnum, 0); 1516 goto out_unlock; 1517 case GETZCNT: 1518 err = count_semcnt(sma, semnum, 1); 1519 goto out_unlock; 1520 } 1521 1522 out_unlock: 1523 sem_unlock(sma, -1); 1524 out_rcu_wakeup: 1525 rcu_read_unlock(); 1526 wake_up_q(&wake_q); 1527 out_free: 1528 if (sem_io != fast_sem_io) 1529 kvfree(sem_io); 1530 return err; 1531 } 1532 1533 static inline unsigned long 1534 copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version) 1535 { 1536 switch (version) { 1537 case IPC_64: 1538 if (copy_from_user(out, buf, sizeof(*out))) 1539 return -EFAULT; 1540 return 0; 1541 case IPC_OLD: 1542 { 1543 struct semid_ds tbuf_old; 1544 1545 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 1546 return -EFAULT; 1547 1548 out->sem_perm.uid = tbuf_old.sem_perm.uid; 1549 out->sem_perm.gid = tbuf_old.sem_perm.gid; 1550 out->sem_perm.mode = tbuf_old.sem_perm.mode; 1551 1552 return 0; 1553 } 1554 default: 1555 return -EINVAL; 1556 } 1557 } 1558 1559 /* 1560 * This function handles some semctl commands which require the rwsem 1561 * to be held in write mode. 1562 * NOTE: no locks must be held, the rwsem is taken inside this function. 1563 */ 1564 static int semctl_down(struct ipc_namespace *ns, int semid, 1565 int cmd, struct semid64_ds *semid64) 1566 { 1567 struct sem_array *sma; 1568 int err; 1569 struct kern_ipc_perm *ipcp; 1570 1571 down_write(&sem_ids(ns).rwsem); 1572 rcu_read_lock(); 1573 1574 ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd, 1575 &semid64->sem_perm, 0); 1576 if (IS_ERR(ipcp)) { 1577 err = PTR_ERR(ipcp); 1578 goto out_unlock1; 1579 } 1580 1581 sma = container_of(ipcp, struct sem_array, sem_perm); 1582 1583 err = security_sem_semctl(&sma->sem_perm, cmd); 1584 if (err) 1585 goto out_unlock1; 1586 1587 switch (cmd) { 1588 case IPC_RMID: 1589 sem_lock(sma, NULL, -1); 1590 /* freeary unlocks the ipc object and rcu */ 1591 freeary(ns, ipcp); 1592 goto out_up; 1593 case IPC_SET: 1594 sem_lock(sma, NULL, -1); 1595 err = ipc_update_perm(&semid64->sem_perm, ipcp); 1596 if (err) 1597 goto out_unlock0; 1598 sma->sem_ctime = ktime_get_real_seconds(); 1599 break; 1600 default: 1601 err = -EINVAL; 1602 goto out_unlock1; 1603 } 1604 1605 out_unlock0: 1606 sem_unlock(sma, -1); 1607 out_unlock1: 1608 rcu_read_unlock(); 1609 out_up: 1610 up_write(&sem_ids(ns).rwsem); 1611 return err; 1612 } 1613 1614 long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg) 1615 { 1616 int version; 1617 struct ipc_namespace *ns; 1618 void __user *p = (void __user *)arg; 1619 struct semid64_ds semid64; 1620 int err; 1621 1622 if (semid < 0) 1623 return -EINVAL; 1624 1625 version = ipc_parse_version(&cmd); 1626 ns = current->nsproxy->ipc_ns; 1627 1628 switch (cmd) { 1629 case IPC_INFO: 1630 case SEM_INFO: 1631 return semctl_info(ns, semid, cmd, p); 1632 case IPC_STAT: 1633 case SEM_STAT: 1634 case SEM_STAT_ANY: 1635 err = semctl_stat(ns, semid, cmd, &semid64); 1636 if (err < 0) 1637 return err; 1638 if (copy_semid_to_user(p, &semid64, version)) 1639 err = -EFAULT; 1640 return err; 1641 case GETALL: 1642 case GETVAL: 1643 case GETPID: 1644 case GETNCNT: 1645 case GETZCNT: 1646 case SETALL: 1647 return semctl_main(ns, semid, semnum, cmd, p); 1648 case SETVAL: { 1649 int val; 1650 #if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN) 1651 /* big-endian 64bit */ 1652 val = arg >> 32; 1653 #else 1654 /* 32bit or little-endian 64bit */ 1655 val = arg; 1656 #endif 1657 return semctl_setval(ns, semid, semnum, val); 1658 } 1659 case IPC_SET: 1660 if (copy_semid_from_user(&semid64, p, version)) 1661 return -EFAULT; 1662 case IPC_RMID: 1663 return semctl_down(ns, semid, cmd, &semid64); 1664 default: 1665 return -EINVAL; 1666 } 1667 } 1668 1669 SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg) 1670 { 1671 return ksys_semctl(semid, semnum, cmd, arg); 1672 } 1673 1674 #ifdef CONFIG_COMPAT 1675 1676 struct compat_semid_ds { 1677 struct compat_ipc_perm sem_perm; 1678 compat_time_t sem_otime; 1679 compat_time_t sem_ctime; 1680 compat_uptr_t sem_base; 1681 compat_uptr_t sem_pending; 1682 compat_uptr_t sem_pending_last; 1683 compat_uptr_t undo; 1684 unsigned short sem_nsems; 1685 }; 1686 1687 static int copy_compat_semid_from_user(struct semid64_ds *out, void __user *buf, 1688 int version) 1689 { 1690 memset(out, 0, sizeof(*out)); 1691 if (version == IPC_64) { 1692 struct compat_semid64_ds __user *p = buf; 1693 return get_compat_ipc64_perm(&out->sem_perm, &p->sem_perm); 1694 } else { 1695 struct compat_semid_ds __user *p = buf; 1696 return get_compat_ipc_perm(&out->sem_perm, &p->sem_perm); 1697 } 1698 } 1699 1700 static int copy_compat_semid_to_user(void __user *buf, struct semid64_ds *in, 1701 int version) 1702 { 1703 if (version == IPC_64) { 1704 struct compat_semid64_ds v; 1705 memset(&v, 0, sizeof(v)); 1706 to_compat_ipc64_perm(&v.sem_perm, &in->sem_perm); 1707 v.sem_otime = in->sem_otime; 1708 v.sem_ctime = in->sem_ctime; 1709 v.sem_nsems = in->sem_nsems; 1710 return copy_to_user(buf, &v, sizeof(v)); 1711 } else { 1712 struct compat_semid_ds v; 1713 memset(&v, 0, sizeof(v)); 1714 to_compat_ipc_perm(&v.sem_perm, &in->sem_perm); 1715 v.sem_otime = in->sem_otime; 1716 v.sem_ctime = in->sem_ctime; 1717 v.sem_nsems = in->sem_nsems; 1718 return copy_to_user(buf, &v, sizeof(v)); 1719 } 1720 } 1721 1722 long compat_ksys_semctl(int semid, int semnum, int cmd, int arg) 1723 { 1724 void __user *p = compat_ptr(arg); 1725 struct ipc_namespace *ns; 1726 struct semid64_ds semid64; 1727 int version = compat_ipc_parse_version(&cmd); 1728 int err; 1729 1730 ns = current->nsproxy->ipc_ns; 1731 1732 if (semid < 0) 1733 return -EINVAL; 1734 1735 switch (cmd & (~IPC_64)) { 1736 case IPC_INFO: 1737 case SEM_INFO: 1738 return semctl_info(ns, semid, cmd, p); 1739 case IPC_STAT: 1740 case SEM_STAT: 1741 case SEM_STAT_ANY: 1742 err = semctl_stat(ns, semid, cmd, &semid64); 1743 if (err < 0) 1744 return err; 1745 if (copy_compat_semid_to_user(p, &semid64, version)) 1746 err = -EFAULT; 1747 return err; 1748 case GETVAL: 1749 case GETPID: 1750 case GETNCNT: 1751 case GETZCNT: 1752 case GETALL: 1753 case SETALL: 1754 return semctl_main(ns, semid, semnum, cmd, p); 1755 case SETVAL: 1756 return semctl_setval(ns, semid, semnum, arg); 1757 case IPC_SET: 1758 if (copy_compat_semid_from_user(&semid64, p, version)) 1759 return -EFAULT; 1760 /* fallthru */ 1761 case IPC_RMID: 1762 return semctl_down(ns, semid, cmd, &semid64); 1763 default: 1764 return -EINVAL; 1765 } 1766 } 1767 1768 COMPAT_SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, int, arg) 1769 { 1770 return compat_ksys_semctl(semid, semnum, cmd, arg); 1771 } 1772 #endif 1773 1774 /* If the task doesn't already have a undo_list, then allocate one 1775 * here. We guarantee there is only one thread using this undo list, 1776 * and current is THE ONE 1777 * 1778 * If this allocation and assignment succeeds, but later 1779 * portions of this code fail, there is no need to free the sem_undo_list. 1780 * Just let it stay associated with the task, and it'll be freed later 1781 * at exit time. 1782 * 1783 * This can block, so callers must hold no locks. 1784 */ 1785 static inline int get_undo_list(struct sem_undo_list **undo_listp) 1786 { 1787 struct sem_undo_list *undo_list; 1788 1789 undo_list = current->sysvsem.undo_list; 1790 if (!undo_list) { 1791 undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL); 1792 if (undo_list == NULL) 1793 return -ENOMEM; 1794 spin_lock_init(&undo_list->lock); 1795 refcount_set(&undo_list->refcnt, 1); 1796 INIT_LIST_HEAD(&undo_list->list_proc); 1797 1798 current->sysvsem.undo_list = undo_list; 1799 } 1800 *undo_listp = undo_list; 1801 return 0; 1802 } 1803 1804 static struct sem_undo *__lookup_undo(struct sem_undo_list *ulp, int semid) 1805 { 1806 struct sem_undo *un; 1807 1808 list_for_each_entry_rcu(un, &ulp->list_proc, list_proc) { 1809 if (un->semid == semid) 1810 return un; 1811 } 1812 return NULL; 1813 } 1814 1815 static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid) 1816 { 1817 struct sem_undo *un; 1818 1819 assert_spin_locked(&ulp->lock); 1820 1821 un = __lookup_undo(ulp, semid); 1822 if (un) { 1823 list_del_rcu(&un->list_proc); 1824 list_add_rcu(&un->list_proc, &ulp->list_proc); 1825 } 1826 return un; 1827 } 1828 1829 /** 1830 * find_alloc_undo - lookup (and if not present create) undo array 1831 * @ns: namespace 1832 * @semid: semaphore array id 1833 * 1834 * The function looks up (and if not present creates) the undo structure. 1835 * The size of the undo structure depends on the size of the semaphore 1836 * array, thus the alloc path is not that straightforward. 1837 * Lifetime-rules: sem_undo is rcu-protected, on success, the function 1838 * performs a rcu_read_lock(). 1839 */ 1840 static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) 1841 { 1842 struct sem_array *sma; 1843 struct sem_undo_list *ulp; 1844 struct sem_undo *un, *new; 1845 int nsems, error; 1846 1847 error = get_undo_list(&ulp); 1848 if (error) 1849 return ERR_PTR(error); 1850 1851 rcu_read_lock(); 1852 spin_lock(&ulp->lock); 1853 un = lookup_undo(ulp, semid); 1854 spin_unlock(&ulp->lock); 1855 if (likely(un != NULL)) 1856 goto out; 1857 1858 /* no undo structure around - allocate one. */ 1859 /* step 1: figure out the size of the semaphore array */ 1860 sma = sem_obtain_object_check(ns, semid); 1861 if (IS_ERR(sma)) { 1862 rcu_read_unlock(); 1863 return ERR_CAST(sma); 1864 } 1865 1866 nsems = sma->sem_nsems; 1867 if (!ipc_rcu_getref(&sma->sem_perm)) { 1868 rcu_read_unlock(); 1869 un = ERR_PTR(-EIDRM); 1870 goto out; 1871 } 1872 rcu_read_unlock(); 1873 1874 /* step 2: allocate new undo structure */ 1875 new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); 1876 if (!new) { 1877 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1878 return ERR_PTR(-ENOMEM); 1879 } 1880 1881 /* step 3: Acquire the lock on semaphore array */ 1882 rcu_read_lock(); 1883 sem_lock_and_putref(sma); 1884 if (!ipc_valid_object(&sma->sem_perm)) { 1885 sem_unlock(sma, -1); 1886 rcu_read_unlock(); 1887 kfree(new); 1888 un = ERR_PTR(-EIDRM); 1889 goto out; 1890 } 1891 spin_lock(&ulp->lock); 1892 1893 /* 1894 * step 4: check for races: did someone else allocate the undo struct? 1895 */ 1896 un = lookup_undo(ulp, semid); 1897 if (un) { 1898 kfree(new); 1899 goto success; 1900 } 1901 /* step 5: initialize & link new undo structure */ 1902 new->semadj = (short *) &new[1]; 1903 new->ulp = ulp; 1904 new->semid = semid; 1905 assert_spin_locked(&ulp->lock); 1906 list_add_rcu(&new->list_proc, &ulp->list_proc); 1907 ipc_assert_locked_object(&sma->sem_perm); 1908 list_add(&new->list_id, &sma->list_id); 1909 un = new; 1910 1911 success: 1912 spin_unlock(&ulp->lock); 1913 sem_unlock(sma, -1); 1914 out: 1915 return un; 1916 } 1917 1918 static long do_semtimedop(int semid, struct sembuf __user *tsops, 1919 unsigned nsops, const struct timespec64 *timeout) 1920 { 1921 int error = -EINVAL; 1922 struct sem_array *sma; 1923 struct sembuf fast_sops[SEMOPM_FAST]; 1924 struct sembuf *sops = fast_sops, *sop; 1925 struct sem_undo *un; 1926 int max, locknum; 1927 bool undos = false, alter = false, dupsop = false; 1928 struct sem_queue queue; 1929 unsigned long dup = 0, jiffies_left = 0; 1930 struct ipc_namespace *ns; 1931 1932 ns = current->nsproxy->ipc_ns; 1933 1934 if (nsops < 1 || semid < 0) 1935 return -EINVAL; 1936 if (nsops > ns->sc_semopm) 1937 return -E2BIG; 1938 if (nsops > SEMOPM_FAST) { 1939 sops = kvmalloc(sizeof(*sops)*nsops, GFP_KERNEL); 1940 if (sops == NULL) 1941 return -ENOMEM; 1942 } 1943 1944 if (copy_from_user(sops, tsops, nsops * sizeof(*tsops))) { 1945 error = -EFAULT; 1946 goto out_free; 1947 } 1948 1949 if (timeout) { 1950 if (timeout->tv_sec < 0 || timeout->tv_nsec < 0 || 1951 timeout->tv_nsec >= 1000000000L) { 1952 error = -EINVAL; 1953 goto out_free; 1954 } 1955 jiffies_left = timespec64_to_jiffies(timeout); 1956 } 1957 1958 max = 0; 1959 for (sop = sops; sop < sops + nsops; sop++) { 1960 unsigned long mask = 1ULL << ((sop->sem_num) % BITS_PER_LONG); 1961 1962 if (sop->sem_num >= max) 1963 max = sop->sem_num; 1964 if (sop->sem_flg & SEM_UNDO) 1965 undos = true; 1966 if (dup & mask) { 1967 /* 1968 * There was a previous alter access that appears 1969 * to have accessed the same semaphore, thus use 1970 * the dupsop logic. "appears", because the detection 1971 * can only check % BITS_PER_LONG. 1972 */ 1973 dupsop = true; 1974 } 1975 if (sop->sem_op != 0) { 1976 alter = true; 1977 dup |= mask; 1978 } 1979 } 1980 1981 if (undos) { 1982 /* On success, find_alloc_undo takes the rcu_read_lock */ 1983 un = find_alloc_undo(ns, semid); 1984 if (IS_ERR(un)) { 1985 error = PTR_ERR(un); 1986 goto out_free; 1987 } 1988 } else { 1989 un = NULL; 1990 rcu_read_lock(); 1991 } 1992 1993 sma = sem_obtain_object_check(ns, semid); 1994 if (IS_ERR(sma)) { 1995 rcu_read_unlock(); 1996 error = PTR_ERR(sma); 1997 goto out_free; 1998 } 1999 2000 error = -EFBIG; 2001 if (max >= sma->sem_nsems) { 2002 rcu_read_unlock(); 2003 goto out_free; 2004 } 2005 2006 error = -EACCES; 2007 if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) { 2008 rcu_read_unlock(); 2009 goto out_free; 2010 } 2011 2012 error = security_sem_semop(&sma->sem_perm, sops, nsops, alter); 2013 if (error) { 2014 rcu_read_unlock(); 2015 goto out_free; 2016 } 2017 2018 error = -EIDRM; 2019 locknum = sem_lock(sma, sops, nsops); 2020 /* 2021 * We eventually might perform the following check in a lockless 2022 * fashion, considering ipc_valid_object() locking constraints. 2023 * If nsops == 1 and there is no contention for sem_perm.lock, then 2024 * only a per-semaphore lock is held and it's OK to proceed with the 2025 * check below. More details on the fine grained locking scheme 2026 * entangled here and why it's RMID race safe on comments at sem_lock() 2027 */ 2028 if (!ipc_valid_object(&sma->sem_perm)) 2029 goto out_unlock_free; 2030 /* 2031 * semid identifiers are not unique - find_alloc_undo may have 2032 * allocated an undo structure, it was invalidated by an RMID 2033 * and now a new array with received the same id. Check and fail. 2034 * This case can be detected checking un->semid. The existence of 2035 * "un" itself is guaranteed by rcu. 2036 */ 2037 if (un && un->semid == -1) 2038 goto out_unlock_free; 2039 2040 queue.sops = sops; 2041 queue.nsops = nsops; 2042 queue.undo = un; 2043 queue.pid = task_tgid(current); 2044 queue.alter = alter; 2045 queue.dupsop = dupsop; 2046 2047 error = perform_atomic_semop(sma, &queue); 2048 if (error == 0) { /* non-blocking succesfull path */ 2049 DEFINE_WAKE_Q(wake_q); 2050 2051 /* 2052 * If the operation was successful, then do 2053 * the required updates. 2054 */ 2055 if (alter) 2056 do_smart_update(sma, sops, nsops, 1, &wake_q); 2057 else 2058 set_semotime(sma, sops); 2059 2060 sem_unlock(sma, locknum); 2061 rcu_read_unlock(); 2062 wake_up_q(&wake_q); 2063 2064 goto out_free; 2065 } 2066 if (error < 0) /* non-blocking error path */ 2067 goto out_unlock_free; 2068 2069 /* 2070 * We need to sleep on this operation, so we put the current 2071 * task into the pending queue and go to sleep. 2072 */ 2073 if (nsops == 1) { 2074 struct sem *curr; 2075 curr = &sma->sems[sops->sem_num]; 2076 2077 if (alter) { 2078 if (sma->complex_count) { 2079 list_add_tail(&queue.list, 2080 &sma->pending_alter); 2081 } else { 2082 2083 list_add_tail(&queue.list, 2084 &curr->pending_alter); 2085 } 2086 } else { 2087 list_add_tail(&queue.list, &curr->pending_const); 2088 } 2089 } else { 2090 if (!sma->complex_count) 2091 merge_queues(sma); 2092 2093 if (alter) 2094 list_add_tail(&queue.list, &sma->pending_alter); 2095 else 2096 list_add_tail(&queue.list, &sma->pending_const); 2097 2098 sma->complex_count++; 2099 } 2100 2101 do { 2102 queue.status = -EINTR; 2103 queue.sleeper = current; 2104 2105 __set_current_state(TASK_INTERRUPTIBLE); 2106 sem_unlock(sma, locknum); 2107 rcu_read_unlock(); 2108 2109 if (timeout) 2110 jiffies_left = schedule_timeout(jiffies_left); 2111 else 2112 schedule(); 2113 2114 /* 2115 * fastpath: the semop has completed, either successfully or 2116 * not, from the syscall pov, is quite irrelevant to us at this 2117 * point; we're done. 2118 * 2119 * We _do_ care, nonetheless, about being awoken by a signal or 2120 * spuriously. The queue.status is checked again in the 2121 * slowpath (aka after taking sem_lock), such that we can detect 2122 * scenarios where we were awakened externally, during the 2123 * window between wake_q_add() and wake_up_q(). 2124 */ 2125 error = READ_ONCE(queue.status); 2126 if (error != -EINTR) { 2127 /* 2128 * User space could assume that semop() is a memory 2129 * barrier: Without the mb(), the cpu could 2130 * speculatively read in userspace stale data that was 2131 * overwritten by the previous owner of the semaphore. 2132 */ 2133 smp_mb(); 2134 goto out_free; 2135 } 2136 2137 rcu_read_lock(); 2138 locknum = sem_lock(sma, sops, nsops); 2139 2140 if (!ipc_valid_object(&sma->sem_perm)) 2141 goto out_unlock_free; 2142 2143 error = READ_ONCE(queue.status); 2144 2145 /* 2146 * If queue.status != -EINTR we are woken up by another process. 2147 * Leave without unlink_queue(), but with sem_unlock(). 2148 */ 2149 if (error != -EINTR) 2150 goto out_unlock_free; 2151 2152 /* 2153 * If an interrupt occurred we have to clean up the queue. 2154 */ 2155 if (timeout && jiffies_left == 0) 2156 error = -EAGAIN; 2157 } while (error == -EINTR && !signal_pending(current)); /* spurious */ 2158 2159 unlink_queue(sma, &queue); 2160 2161 out_unlock_free: 2162 sem_unlock(sma, locknum); 2163 rcu_read_unlock(); 2164 out_free: 2165 if (sops != fast_sops) 2166 kvfree(sops); 2167 return error; 2168 } 2169 2170 long ksys_semtimedop(int semid, struct sembuf __user *tsops, 2171 unsigned int nsops, const struct timespec __user *timeout) 2172 { 2173 if (timeout) { 2174 struct timespec64 ts; 2175 if (get_timespec64(&ts, timeout)) 2176 return -EFAULT; 2177 return do_semtimedop(semid, tsops, nsops, &ts); 2178 } 2179 return do_semtimedop(semid, tsops, nsops, NULL); 2180 } 2181 2182 SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, 2183 unsigned int, nsops, const struct timespec __user *, timeout) 2184 { 2185 return ksys_semtimedop(semid, tsops, nsops, timeout); 2186 } 2187 2188 #ifdef CONFIG_COMPAT 2189 long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems, 2190 unsigned int nsops, 2191 const struct compat_timespec __user *timeout) 2192 { 2193 if (timeout) { 2194 struct timespec64 ts; 2195 if (compat_get_timespec64(&ts, timeout)) 2196 return -EFAULT; 2197 return do_semtimedop(semid, tsems, nsops, &ts); 2198 } 2199 return do_semtimedop(semid, tsems, nsops, NULL); 2200 } 2201 2202 COMPAT_SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsems, 2203 unsigned int, nsops, 2204 const struct compat_timespec __user *, timeout) 2205 { 2206 return compat_ksys_semtimedop(semid, tsems, nsops, timeout); 2207 } 2208 #endif 2209 2210 SYSCALL_DEFINE3(semop, int, semid, struct sembuf __user *, tsops, 2211 unsigned, nsops) 2212 { 2213 return do_semtimedop(semid, tsops, nsops, NULL); 2214 } 2215 2216 /* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between 2217 * parent and child tasks. 2218 */ 2219 2220 int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) 2221 { 2222 struct sem_undo_list *undo_list; 2223 int error; 2224 2225 if (clone_flags & CLONE_SYSVSEM) { 2226 error = get_undo_list(&undo_list); 2227 if (error) 2228 return error; 2229 refcount_inc(&undo_list->refcnt); 2230 tsk->sysvsem.undo_list = undo_list; 2231 } else 2232 tsk->sysvsem.undo_list = NULL; 2233 2234 return 0; 2235 } 2236 2237 /* 2238 * add semadj values to semaphores, free undo structures. 2239 * undo structures are not freed when semaphore arrays are destroyed 2240 * so some of them may be out of date. 2241 * IMPLEMENTATION NOTE: There is some confusion over whether the 2242 * set of adjustments that needs to be done should be done in an atomic 2243 * manner or not. That is, if we are attempting to decrement the semval 2244 * should we queue up and wait until we can do so legally? 2245 * The original implementation attempted to do this (queue and wait). 2246 * The current implementation does not do so. The POSIX standard 2247 * and SVID should be consulted to determine what behavior is mandated. 2248 */ 2249 void exit_sem(struct task_struct *tsk) 2250 { 2251 struct sem_undo_list *ulp; 2252 2253 ulp = tsk->sysvsem.undo_list; 2254 if (!ulp) 2255 return; 2256 tsk->sysvsem.undo_list = NULL; 2257 2258 if (!refcount_dec_and_test(&ulp->refcnt)) 2259 return; 2260 2261 for (;;) { 2262 struct sem_array *sma; 2263 struct sem_undo *un; 2264 int semid, i; 2265 DEFINE_WAKE_Q(wake_q); 2266 2267 cond_resched(); 2268 2269 rcu_read_lock(); 2270 un = list_entry_rcu(ulp->list_proc.next, 2271 struct sem_undo, list_proc); 2272 if (&un->list_proc == &ulp->list_proc) { 2273 /* 2274 * We must wait for freeary() before freeing this ulp, 2275 * in case we raced with last sem_undo. There is a small 2276 * possibility where we exit while freeary() didn't 2277 * finish unlocking sem_undo_list. 2278 */ 2279 spin_lock(&ulp->lock); 2280 spin_unlock(&ulp->lock); 2281 rcu_read_unlock(); 2282 break; 2283 } 2284 spin_lock(&ulp->lock); 2285 semid = un->semid; 2286 spin_unlock(&ulp->lock); 2287 2288 /* exit_sem raced with IPC_RMID, nothing to do */ 2289 if (semid == -1) { 2290 rcu_read_unlock(); 2291 continue; 2292 } 2293 2294 sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, semid); 2295 /* exit_sem raced with IPC_RMID, nothing to do */ 2296 if (IS_ERR(sma)) { 2297 rcu_read_unlock(); 2298 continue; 2299 } 2300 2301 sem_lock(sma, NULL, -1); 2302 /* exit_sem raced with IPC_RMID, nothing to do */ 2303 if (!ipc_valid_object(&sma->sem_perm)) { 2304 sem_unlock(sma, -1); 2305 rcu_read_unlock(); 2306 continue; 2307 } 2308 un = __lookup_undo(ulp, semid); 2309 if (un == NULL) { 2310 /* exit_sem raced with IPC_RMID+semget() that created 2311 * exactly the same semid. Nothing to do. 2312 */ 2313 sem_unlock(sma, -1); 2314 rcu_read_unlock(); 2315 continue; 2316 } 2317 2318 /* remove un from the linked lists */ 2319 ipc_assert_locked_object(&sma->sem_perm); 2320 list_del(&un->list_id); 2321 2322 /* we are the last process using this ulp, acquiring ulp->lock 2323 * isn't required. Besides that, we are also protected against 2324 * IPC_RMID as we hold sma->sem_perm lock now 2325 */ 2326 list_del_rcu(&un->list_proc); 2327 2328 /* perform adjustments registered in un */ 2329 for (i = 0; i < sma->sem_nsems; i++) { 2330 struct sem *semaphore = &sma->sems[i]; 2331 if (un->semadj[i]) { 2332 semaphore->semval += un->semadj[i]; 2333 /* 2334 * Range checks of the new semaphore value, 2335 * not defined by sus: 2336 * - Some unices ignore the undo entirely 2337 * (e.g. HP UX 11i 11.22, Tru64 V5.1) 2338 * - some cap the value (e.g. FreeBSD caps 2339 * at 0, but doesn't enforce SEMVMX) 2340 * 2341 * Linux caps the semaphore value, both at 0 2342 * and at SEMVMX. 2343 * 2344 * Manfred <manfred@colorfullife.com> 2345 */ 2346 if (semaphore->semval < 0) 2347 semaphore->semval = 0; 2348 if (semaphore->semval > SEMVMX) 2349 semaphore->semval = SEMVMX; 2350 ipc_update_pid(&semaphore->sempid, task_tgid(current)); 2351 } 2352 } 2353 /* maybe some queued-up processes were waiting for this */ 2354 do_smart_update(sma, NULL, 0, 1, &wake_q); 2355 sem_unlock(sma, -1); 2356 rcu_read_unlock(); 2357 wake_up_q(&wake_q); 2358 2359 kfree_rcu(un, rcu); 2360 } 2361 kfree(ulp); 2362 } 2363 2364 #ifdef CONFIG_PROC_FS 2365 static int sysvipc_sem_proc_show(struct seq_file *s, void *it) 2366 { 2367 struct user_namespace *user_ns = seq_user_ns(s); 2368 struct kern_ipc_perm *ipcp = it; 2369 struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); 2370 time64_t sem_otime; 2371 2372 /* 2373 * The proc interface isn't aware of sem_lock(), it calls 2374 * ipc_lock_object() directly (in sysvipc_find_ipc). 2375 * In order to stay compatible with sem_lock(), we must 2376 * enter / leave complex_mode. 2377 */ 2378 complexmode_enter(sma); 2379 2380 sem_otime = get_semotime(sma); 2381 2382 seq_printf(s, 2383 "%10d %10d %4o %10u %5u %5u %5u %5u %10llu %10llu\n", 2384 sma->sem_perm.key, 2385 sma->sem_perm.id, 2386 sma->sem_perm.mode, 2387 sma->sem_nsems, 2388 from_kuid_munged(user_ns, sma->sem_perm.uid), 2389 from_kgid_munged(user_ns, sma->sem_perm.gid), 2390 from_kuid_munged(user_ns, sma->sem_perm.cuid), 2391 from_kgid_munged(user_ns, sma->sem_perm.cgid), 2392 sem_otime, 2393 sma->sem_ctime); 2394 2395 complexmode_tryleave(sma); 2396 2397 return 0; 2398 } 2399 #endif 2400