1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/ipc/sem.c 4 * Copyright (C) 1992 Krishna Balasubramanian 5 * Copyright (C) 1995 Eric Schenk, Bruno Haible 6 * 7 * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com> 8 * 9 * SMP-threaded, sysctl's added 10 * (c) 1999 Manfred Spraul <manfred@colorfullife.com> 11 * Enforced range limit on SEM_UNDO 12 * (c) 2001 Red Hat Inc 13 * Lockless wakeup 14 * (c) 2003 Manfred Spraul <manfred@colorfullife.com> 15 * (c) 2016 Davidlohr Bueso <dave@stgolabs.net> 16 * Further wakeup optimizations, documentation 17 * (c) 2010 Manfred Spraul <manfred@colorfullife.com> 18 * 19 * support for audit of ipc object properties and permission changes 20 * Dustin Kirkland <dustin.kirkland@us.ibm.com> 21 * 22 * namespaces support 23 * OpenVZ, SWsoft Inc. 24 * Pavel Emelianov <xemul@openvz.org> 25 * 26 * Implementation notes: (May 2010) 27 * This file implements System V semaphores. 28 * 29 * User space visible behavior: 30 * - FIFO ordering for semop() operations (just FIFO, not starvation 31 * protection) 32 * - multiple semaphore operations that alter the same semaphore in 33 * one semop() are handled. 34 * - sem_ctime (time of last semctl()) is updated in the IPC_SET, SETVAL and 35 * SETALL calls. 36 * - two Linux specific semctl() commands: SEM_STAT, SEM_INFO. 37 * - undo adjustments at process exit are limited to 0..SEMVMX. 38 * - namespace are supported. 39 * - SEMMSL, SEMMNS, SEMOPM and SEMMNI can be configured at runtine by writing 40 * to /proc/sys/kernel/sem. 41 * - statistics about the usage are reported in /proc/sysvipc/sem. 42 * 43 * Internals: 44 * - scalability: 45 * - all global variables are read-mostly. 46 * - semop() calls and semctl(RMID) are synchronized by RCU. 47 * - most operations do write operations (actually: spin_lock calls) to 48 * the per-semaphore array structure. 49 * Thus: Perfect SMP scaling between independent semaphore arrays. 50 * If multiple semaphores in one array are used, then cache line 51 * trashing on the semaphore array spinlock will limit the scaling. 52 * - semncnt and semzcnt are calculated on demand in count_semcnt() 53 * - the task that performs a successful semop() scans the list of all 54 * sleeping tasks and completes any pending operations that can be fulfilled. 55 * Semaphores are actively given to waiting tasks (necessary for FIFO). 56 * (see update_queue()) 57 * - To improve the scalability, the actual wake-up calls are performed after 58 * dropping all locks. (see wake_up_sem_queue_prepare()) 59 * - All work is done by the waker, the woken up task does not have to do 60 * anything - not even acquiring a lock or dropping a refcount. 61 * - A woken up task may not even touch the semaphore array anymore, it may 62 * have been destroyed already by a semctl(RMID). 63 * - UNDO values are stored in an array (one per process and per 64 * semaphore array, lazily allocated). For backwards compatibility, multiple 65 * modes for the UNDO variables are supported (per process, per thread) 66 * (see copy_semundo, CLONE_SYSVSEM) 67 * - There are two lists of the pending operations: a per-array list 68 * and per-semaphore list (stored in the array). This allows to achieve FIFO 69 * ordering without always scanning all pending operations. 70 * The worst-case behavior is nevertheless O(N^2) for N wakeups. 71 */ 72 73 #include <linux/slab.h> 74 #include <linux/spinlock.h> 75 #include <linux/init.h> 76 #include <linux/proc_fs.h> 77 #include <linux/time.h> 78 #include <linux/security.h> 79 #include <linux/syscalls.h> 80 #include <linux/audit.h> 81 #include <linux/capability.h> 82 #include <linux/seq_file.h> 83 #include <linux/rwsem.h> 84 #include <linux/nsproxy.h> 85 #include <linux/ipc_namespace.h> 86 #include <linux/sched/wake_q.h> 87 88 #include <linux/uaccess.h> 89 #include "util.h" 90 91 /* One semaphore structure for each semaphore in the system. */ 92 struct sem { 93 int semval; /* current value */ 94 /* 95 * PID of the process that last modified the semaphore. For 96 * Linux, specifically these are: 97 * - semop 98 * - semctl, via SETVAL and SETALL. 99 * - at task exit when performing undo adjustments (see exit_sem). 100 */ 101 struct pid *sempid; 102 spinlock_t lock; /* spinlock for fine-grained semtimedop */ 103 struct list_head pending_alter; /* pending single-sop operations */ 104 /* that alter the semaphore */ 105 struct list_head pending_const; /* pending single-sop operations */ 106 /* that do not alter the semaphore*/ 107 time_t sem_otime; /* candidate for sem_otime */ 108 } ____cacheline_aligned_in_smp; 109 110 /* One sem_array data structure for each set of semaphores in the system. */ 111 struct sem_array { 112 struct kern_ipc_perm sem_perm; /* permissions .. see ipc.h */ 113 time64_t sem_ctime; /* create/last semctl() time */ 114 struct list_head pending_alter; /* pending operations */ 115 /* that alter the array */ 116 struct list_head pending_const; /* pending complex operations */ 117 /* that do not alter semvals */ 118 struct list_head list_id; /* undo requests on this array */ 119 int sem_nsems; /* no. of semaphores in array */ 120 int complex_count; /* pending complex operations */ 121 unsigned int use_global_lock;/* >0: global lock required */ 122 123 struct sem sems[]; 124 } __randomize_layout; 125 126 /* One queue for each sleeping process in the system. */ 127 struct sem_queue { 128 struct list_head list; /* queue of pending operations */ 129 struct task_struct *sleeper; /* this process */ 130 struct sem_undo *undo; /* undo structure */ 131 struct pid *pid; /* process id of requesting process */ 132 int status; /* completion status of operation */ 133 struct sembuf *sops; /* array of pending operations */ 134 struct sembuf *blocking; /* the operation that blocked */ 135 int nsops; /* number of operations */ 136 bool alter; /* does *sops alter the array? */ 137 bool dupsop; /* sops on more than one sem_num */ 138 }; 139 140 /* Each task has a list of undo requests. They are executed automatically 141 * when the process exits. 142 */ 143 struct sem_undo { 144 struct list_head list_proc; /* per-process list: * 145 * all undos from one process 146 * rcu protected */ 147 struct rcu_head rcu; /* rcu struct for sem_undo */ 148 struct sem_undo_list *ulp; /* back ptr to sem_undo_list */ 149 struct list_head list_id; /* per semaphore array list: 150 * all undos for one array */ 151 int semid; /* semaphore set identifier */ 152 short *semadj; /* array of adjustments */ 153 /* one per semaphore */ 154 }; 155 156 /* sem_undo_list controls shared access to the list of sem_undo structures 157 * that may be shared among all a CLONE_SYSVSEM task group. 158 */ 159 struct sem_undo_list { 160 refcount_t refcnt; 161 spinlock_t lock; 162 struct list_head list_proc; 163 }; 164 165 166 #define sem_ids(ns) ((ns)->ids[IPC_SEM_IDS]) 167 168 static int newary(struct ipc_namespace *, struct ipc_params *); 169 static void freeary(struct ipc_namespace *, struct kern_ipc_perm *); 170 #ifdef CONFIG_PROC_FS 171 static int sysvipc_sem_proc_show(struct seq_file *s, void *it); 172 #endif 173 174 #define SEMMSL_FAST 256 /* 512 bytes on stack */ 175 #define SEMOPM_FAST 64 /* ~ 372 bytes on stack */ 176 177 /* 178 * Switching from the mode suitable for simple ops 179 * to the mode for complex ops is costly. Therefore: 180 * use some hysteresis 181 */ 182 #define USE_GLOBAL_LOCK_HYSTERESIS 10 183 184 /* 185 * Locking: 186 * a) global sem_lock() for read/write 187 * sem_undo.id_next, 188 * sem_array.complex_count, 189 * sem_array.pending{_alter,_const}, 190 * sem_array.sem_undo 191 * 192 * b) global or semaphore sem_lock() for read/write: 193 * sem_array.sems[i].pending_{const,alter}: 194 * 195 * c) special: 196 * sem_undo_list.list_proc: 197 * * undo_list->lock for write 198 * * rcu for read 199 * use_global_lock: 200 * * global sem_lock() for write 201 * * either local or global sem_lock() for read. 202 * 203 * Memory ordering: 204 * Most ordering is enforced by using spin_lock() and spin_unlock(). 205 * The special case is use_global_lock: 206 * Setting it from non-zero to 0 is a RELEASE, this is ensured by 207 * using smp_store_release(). 208 * Testing if it is non-zero is an ACQUIRE, this is ensured by using 209 * smp_load_acquire(). 210 * Setting it from 0 to non-zero must be ordered with regards to 211 * this smp_load_acquire(), this is guaranteed because the smp_load_acquire() 212 * is inside a spin_lock() and after a write from 0 to non-zero a 213 * spin_lock()+spin_unlock() is done. 214 */ 215 216 #define sc_semmsl sem_ctls[0] 217 #define sc_semmns sem_ctls[1] 218 #define sc_semopm sem_ctls[2] 219 #define sc_semmni sem_ctls[3] 220 221 int sem_init_ns(struct ipc_namespace *ns) 222 { 223 ns->sc_semmsl = SEMMSL; 224 ns->sc_semmns = SEMMNS; 225 ns->sc_semopm = SEMOPM; 226 ns->sc_semmni = SEMMNI; 227 ns->used_sems = 0; 228 return ipc_init_ids(&ns->ids[IPC_SEM_IDS]); 229 } 230 231 #ifdef CONFIG_IPC_NS 232 void sem_exit_ns(struct ipc_namespace *ns) 233 { 234 free_ipcs(ns, &sem_ids(ns), freeary); 235 idr_destroy(&ns->ids[IPC_SEM_IDS].ipcs_idr); 236 rhashtable_destroy(&ns->ids[IPC_SEM_IDS].key_ht); 237 } 238 #endif 239 240 int __init sem_init(void) 241 { 242 const int err = sem_init_ns(&init_ipc_ns); 243 244 ipc_init_proc_interface("sysvipc/sem", 245 " key semid perms nsems uid gid cuid cgid otime ctime\n", 246 IPC_SEM_IDS, sysvipc_sem_proc_show); 247 return err; 248 } 249 250 /** 251 * unmerge_queues - unmerge queues, if possible. 252 * @sma: semaphore array 253 * 254 * The function unmerges the wait queues if complex_count is 0. 255 * It must be called prior to dropping the global semaphore array lock. 256 */ 257 static void unmerge_queues(struct sem_array *sma) 258 { 259 struct sem_queue *q, *tq; 260 261 /* complex operations still around? */ 262 if (sma->complex_count) 263 return; 264 /* 265 * We will switch back to simple mode. 266 * Move all pending operation back into the per-semaphore 267 * queues. 268 */ 269 list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { 270 struct sem *curr; 271 curr = &sma->sems[q->sops[0].sem_num]; 272 273 list_add_tail(&q->list, &curr->pending_alter); 274 } 275 INIT_LIST_HEAD(&sma->pending_alter); 276 } 277 278 /** 279 * merge_queues - merge single semop queues into global queue 280 * @sma: semaphore array 281 * 282 * This function merges all per-semaphore queues into the global queue. 283 * It is necessary to achieve FIFO ordering for the pending single-sop 284 * operations when a multi-semop operation must sleep. 285 * Only the alter operations must be moved, the const operations can stay. 286 */ 287 static void merge_queues(struct sem_array *sma) 288 { 289 int i; 290 for (i = 0; i < sma->sem_nsems; i++) { 291 struct sem *sem = &sma->sems[i]; 292 293 list_splice_init(&sem->pending_alter, &sma->pending_alter); 294 } 295 } 296 297 static void sem_rcu_free(struct rcu_head *head) 298 { 299 struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu); 300 struct sem_array *sma = container_of(p, struct sem_array, sem_perm); 301 302 security_sem_free(&sma->sem_perm); 303 kvfree(sma); 304 } 305 306 /* 307 * Enter the mode suitable for non-simple operations: 308 * Caller must own sem_perm.lock. 309 */ 310 static void complexmode_enter(struct sem_array *sma) 311 { 312 int i; 313 struct sem *sem; 314 315 if (sma->use_global_lock > 0) { 316 /* 317 * We are already in global lock mode. 318 * Nothing to do, just reset the 319 * counter until we return to simple mode. 320 */ 321 sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; 322 return; 323 } 324 sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; 325 326 for (i = 0; i < sma->sem_nsems; i++) { 327 sem = &sma->sems[i]; 328 spin_lock(&sem->lock); 329 spin_unlock(&sem->lock); 330 } 331 } 332 333 /* 334 * Try to leave the mode that disallows simple operations: 335 * Caller must own sem_perm.lock. 336 */ 337 static void complexmode_tryleave(struct sem_array *sma) 338 { 339 if (sma->complex_count) { 340 /* Complex ops are sleeping. 341 * We must stay in complex mode 342 */ 343 return; 344 } 345 if (sma->use_global_lock == 1) { 346 /* 347 * Immediately after setting use_global_lock to 0, 348 * a simple op can start. Thus: all memory writes 349 * performed by the current operation must be visible 350 * before we set use_global_lock to 0. 351 */ 352 smp_store_release(&sma->use_global_lock, 0); 353 } else { 354 sma->use_global_lock--; 355 } 356 } 357 358 #define SEM_GLOBAL_LOCK (-1) 359 /* 360 * If the request contains only one semaphore operation, and there are 361 * no complex transactions pending, lock only the semaphore involved. 362 * Otherwise, lock the entire semaphore array, since we either have 363 * multiple semaphores in our own semops, or we need to look at 364 * semaphores from other pending complex operations. 365 */ 366 static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, 367 int nsops) 368 { 369 struct sem *sem; 370 371 if (nsops != 1) { 372 /* Complex operation - acquire a full lock */ 373 ipc_lock_object(&sma->sem_perm); 374 375 /* Prevent parallel simple ops */ 376 complexmode_enter(sma); 377 return SEM_GLOBAL_LOCK; 378 } 379 380 /* 381 * Only one semaphore affected - try to optimize locking. 382 * Optimized locking is possible if no complex operation 383 * is either enqueued or processed right now. 384 * 385 * Both facts are tracked by use_global_mode. 386 */ 387 sem = &sma->sems[sops->sem_num]; 388 389 /* 390 * Initial check for use_global_lock. Just an optimization, 391 * no locking, no memory barrier. 392 */ 393 if (!sma->use_global_lock) { 394 /* 395 * It appears that no complex operation is around. 396 * Acquire the per-semaphore lock. 397 */ 398 spin_lock(&sem->lock); 399 400 /* pairs with smp_store_release() */ 401 if (!smp_load_acquire(&sma->use_global_lock)) { 402 /* fast path successful! */ 403 return sops->sem_num; 404 } 405 spin_unlock(&sem->lock); 406 } 407 408 /* slow path: acquire the full lock */ 409 ipc_lock_object(&sma->sem_perm); 410 411 if (sma->use_global_lock == 0) { 412 /* 413 * The use_global_lock mode ended while we waited for 414 * sma->sem_perm.lock. Thus we must switch to locking 415 * with sem->lock. 416 * Unlike in the fast path, there is no need to recheck 417 * sma->use_global_lock after we have acquired sem->lock: 418 * We own sma->sem_perm.lock, thus use_global_lock cannot 419 * change. 420 */ 421 spin_lock(&sem->lock); 422 423 ipc_unlock_object(&sma->sem_perm); 424 return sops->sem_num; 425 } else { 426 /* 427 * Not a false alarm, thus continue to use the global lock 428 * mode. No need for complexmode_enter(), this was done by 429 * the caller that has set use_global_mode to non-zero. 430 */ 431 return SEM_GLOBAL_LOCK; 432 } 433 } 434 435 static inline void sem_unlock(struct sem_array *sma, int locknum) 436 { 437 if (locknum == SEM_GLOBAL_LOCK) { 438 unmerge_queues(sma); 439 complexmode_tryleave(sma); 440 ipc_unlock_object(&sma->sem_perm); 441 } else { 442 struct sem *sem = &sma->sems[locknum]; 443 spin_unlock(&sem->lock); 444 } 445 } 446 447 /* 448 * sem_lock_(check_) routines are called in the paths where the rwsem 449 * is not held. 450 * 451 * The caller holds the RCU read lock. 452 */ 453 static inline struct sem_array *sem_obtain_object(struct ipc_namespace *ns, int id) 454 { 455 struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&sem_ids(ns), id); 456 457 if (IS_ERR(ipcp)) 458 return ERR_CAST(ipcp); 459 460 return container_of(ipcp, struct sem_array, sem_perm); 461 } 462 463 static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns, 464 int id) 465 { 466 struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&sem_ids(ns), id); 467 468 if (IS_ERR(ipcp)) 469 return ERR_CAST(ipcp); 470 471 return container_of(ipcp, struct sem_array, sem_perm); 472 } 473 474 static inline void sem_lock_and_putref(struct sem_array *sma) 475 { 476 sem_lock(sma, NULL, -1); 477 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 478 } 479 480 static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) 481 { 482 ipc_rmid(&sem_ids(ns), &s->sem_perm); 483 } 484 485 static struct sem_array *sem_alloc(size_t nsems) 486 { 487 struct sem_array *sma; 488 size_t size; 489 490 if (nsems > (INT_MAX - sizeof(*sma)) / sizeof(sma->sems[0])) 491 return NULL; 492 493 size = sizeof(*sma) + nsems * sizeof(sma->sems[0]); 494 sma = kvmalloc(size, GFP_KERNEL); 495 if (unlikely(!sma)) 496 return NULL; 497 498 memset(sma, 0, size); 499 500 return sma; 501 } 502 503 /** 504 * newary - Create a new semaphore set 505 * @ns: namespace 506 * @params: ptr to the structure that contains key, semflg and nsems 507 * 508 * Called with sem_ids.rwsem held (as a writer) 509 */ 510 static int newary(struct ipc_namespace *ns, struct ipc_params *params) 511 { 512 int retval; 513 struct sem_array *sma; 514 key_t key = params->key; 515 int nsems = params->u.nsems; 516 int semflg = params->flg; 517 int i; 518 519 if (!nsems) 520 return -EINVAL; 521 if (ns->used_sems + nsems > ns->sc_semmns) 522 return -ENOSPC; 523 524 sma = sem_alloc(nsems); 525 if (!sma) 526 return -ENOMEM; 527 528 sma->sem_perm.mode = (semflg & S_IRWXUGO); 529 sma->sem_perm.key = key; 530 531 sma->sem_perm.security = NULL; 532 retval = security_sem_alloc(&sma->sem_perm); 533 if (retval) { 534 kvfree(sma); 535 return retval; 536 } 537 538 for (i = 0; i < nsems; i++) { 539 INIT_LIST_HEAD(&sma->sems[i].pending_alter); 540 INIT_LIST_HEAD(&sma->sems[i].pending_const); 541 spin_lock_init(&sma->sems[i].lock); 542 } 543 544 sma->complex_count = 0; 545 sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; 546 INIT_LIST_HEAD(&sma->pending_alter); 547 INIT_LIST_HEAD(&sma->pending_const); 548 INIT_LIST_HEAD(&sma->list_id); 549 sma->sem_nsems = nsems; 550 sma->sem_ctime = ktime_get_real_seconds(); 551 552 /* ipc_addid() locks sma upon success. */ 553 retval = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); 554 if (retval < 0) { 555 call_rcu(&sma->sem_perm.rcu, sem_rcu_free); 556 return retval; 557 } 558 ns->used_sems += nsems; 559 560 sem_unlock(sma, -1); 561 rcu_read_unlock(); 562 563 return sma->sem_perm.id; 564 } 565 566 567 /* 568 * Called with sem_ids.rwsem and ipcp locked. 569 */ 570 static inline int sem_more_checks(struct kern_ipc_perm *ipcp, 571 struct ipc_params *params) 572 { 573 struct sem_array *sma; 574 575 sma = container_of(ipcp, struct sem_array, sem_perm); 576 if (params->u.nsems > sma->sem_nsems) 577 return -EINVAL; 578 579 return 0; 580 } 581 582 long ksys_semget(key_t key, int nsems, int semflg) 583 { 584 struct ipc_namespace *ns; 585 static const struct ipc_ops sem_ops = { 586 .getnew = newary, 587 .associate = security_sem_associate, 588 .more_checks = sem_more_checks, 589 }; 590 struct ipc_params sem_params; 591 592 ns = current->nsproxy->ipc_ns; 593 594 if (nsems < 0 || nsems > ns->sc_semmsl) 595 return -EINVAL; 596 597 sem_params.key = key; 598 sem_params.flg = semflg; 599 sem_params.u.nsems = nsems; 600 601 return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params); 602 } 603 604 SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg) 605 { 606 return ksys_semget(key, nsems, semflg); 607 } 608 609 /** 610 * perform_atomic_semop[_slow] - Attempt to perform semaphore 611 * operations on a given array. 612 * @sma: semaphore array 613 * @q: struct sem_queue that describes the operation 614 * 615 * Caller blocking are as follows, based the value 616 * indicated by the semaphore operation (sem_op): 617 * 618 * (1) >0 never blocks. 619 * (2) 0 (wait-for-zero operation): semval is non-zero. 620 * (3) <0 attempting to decrement semval to a value smaller than zero. 621 * 622 * Returns 0 if the operation was possible. 623 * Returns 1 if the operation is impossible, the caller must sleep. 624 * Returns <0 for error codes. 625 */ 626 static int perform_atomic_semop_slow(struct sem_array *sma, struct sem_queue *q) 627 { 628 int result, sem_op, nsops; 629 struct pid *pid; 630 struct sembuf *sop; 631 struct sem *curr; 632 struct sembuf *sops; 633 struct sem_undo *un; 634 635 sops = q->sops; 636 nsops = q->nsops; 637 un = q->undo; 638 639 for (sop = sops; sop < sops + nsops; sop++) { 640 curr = &sma->sems[sop->sem_num]; 641 sem_op = sop->sem_op; 642 result = curr->semval; 643 644 if (!sem_op && result) 645 goto would_block; 646 647 result += sem_op; 648 if (result < 0) 649 goto would_block; 650 if (result > SEMVMX) 651 goto out_of_range; 652 653 if (sop->sem_flg & SEM_UNDO) { 654 int undo = un->semadj[sop->sem_num] - sem_op; 655 /* Exceeding the undo range is an error. */ 656 if (undo < (-SEMAEM - 1) || undo > SEMAEM) 657 goto out_of_range; 658 un->semadj[sop->sem_num] = undo; 659 } 660 661 curr->semval = result; 662 } 663 664 sop--; 665 pid = q->pid; 666 while (sop >= sops) { 667 ipc_update_pid(&sma->sems[sop->sem_num].sempid, pid); 668 sop--; 669 } 670 671 return 0; 672 673 out_of_range: 674 result = -ERANGE; 675 goto undo; 676 677 would_block: 678 q->blocking = sop; 679 680 if (sop->sem_flg & IPC_NOWAIT) 681 result = -EAGAIN; 682 else 683 result = 1; 684 685 undo: 686 sop--; 687 while (sop >= sops) { 688 sem_op = sop->sem_op; 689 sma->sems[sop->sem_num].semval -= sem_op; 690 if (sop->sem_flg & SEM_UNDO) 691 un->semadj[sop->sem_num] += sem_op; 692 sop--; 693 } 694 695 return result; 696 } 697 698 static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q) 699 { 700 int result, sem_op, nsops; 701 struct sembuf *sop; 702 struct sem *curr; 703 struct sembuf *sops; 704 struct sem_undo *un; 705 706 sops = q->sops; 707 nsops = q->nsops; 708 un = q->undo; 709 710 if (unlikely(q->dupsop)) 711 return perform_atomic_semop_slow(sma, q); 712 713 /* 714 * We scan the semaphore set twice, first to ensure that the entire 715 * operation can succeed, therefore avoiding any pointless writes 716 * to shared memory and having to undo such changes in order to block 717 * until the operations can go through. 718 */ 719 for (sop = sops; sop < sops + nsops; sop++) { 720 curr = &sma->sems[sop->sem_num]; 721 sem_op = sop->sem_op; 722 result = curr->semval; 723 724 if (!sem_op && result) 725 goto would_block; /* wait-for-zero */ 726 727 result += sem_op; 728 if (result < 0) 729 goto would_block; 730 731 if (result > SEMVMX) 732 return -ERANGE; 733 734 if (sop->sem_flg & SEM_UNDO) { 735 int undo = un->semadj[sop->sem_num] - sem_op; 736 737 /* Exceeding the undo range is an error. */ 738 if (undo < (-SEMAEM - 1) || undo > SEMAEM) 739 return -ERANGE; 740 } 741 } 742 743 for (sop = sops; sop < sops + nsops; sop++) { 744 curr = &sma->sems[sop->sem_num]; 745 sem_op = sop->sem_op; 746 result = curr->semval; 747 748 if (sop->sem_flg & SEM_UNDO) { 749 int undo = un->semadj[sop->sem_num] - sem_op; 750 751 un->semadj[sop->sem_num] = undo; 752 } 753 curr->semval += sem_op; 754 ipc_update_pid(&curr->sempid, q->pid); 755 } 756 757 return 0; 758 759 would_block: 760 q->blocking = sop; 761 return sop->sem_flg & IPC_NOWAIT ? -EAGAIN : 1; 762 } 763 764 static inline void wake_up_sem_queue_prepare(struct sem_queue *q, int error, 765 struct wake_q_head *wake_q) 766 { 767 wake_q_add(wake_q, q->sleeper); 768 /* 769 * Rely on the above implicit barrier, such that we can 770 * ensure that we hold reference to the task before setting 771 * q->status. Otherwise we could race with do_exit if the 772 * task is awoken by an external event before calling 773 * wake_up_process(). 774 */ 775 WRITE_ONCE(q->status, error); 776 } 777 778 static void unlink_queue(struct sem_array *sma, struct sem_queue *q) 779 { 780 list_del(&q->list); 781 if (q->nsops > 1) 782 sma->complex_count--; 783 } 784 785 /** check_restart(sma, q) 786 * @sma: semaphore array 787 * @q: the operation that just completed 788 * 789 * update_queue is O(N^2) when it restarts scanning the whole queue of 790 * waiting operations. Therefore this function checks if the restart is 791 * really necessary. It is called after a previously waiting operation 792 * modified the array. 793 * Note that wait-for-zero operations are handled without restart. 794 */ 795 static inline int check_restart(struct sem_array *sma, struct sem_queue *q) 796 { 797 /* pending complex alter operations are too difficult to analyse */ 798 if (!list_empty(&sma->pending_alter)) 799 return 1; 800 801 /* we were a sleeping complex operation. Too difficult */ 802 if (q->nsops > 1) 803 return 1; 804 805 /* It is impossible that someone waits for the new value: 806 * - complex operations always restart. 807 * - wait-for-zero are handled seperately. 808 * - q is a previously sleeping simple operation that 809 * altered the array. It must be a decrement, because 810 * simple increments never sleep. 811 * - If there are older (higher priority) decrements 812 * in the queue, then they have observed the original 813 * semval value and couldn't proceed. The operation 814 * decremented to value - thus they won't proceed either. 815 */ 816 return 0; 817 } 818 819 /** 820 * wake_const_ops - wake up non-alter tasks 821 * @sma: semaphore array. 822 * @semnum: semaphore that was modified. 823 * @wake_q: lockless wake-queue head. 824 * 825 * wake_const_ops must be called after a semaphore in a semaphore array 826 * was set to 0. If complex const operations are pending, wake_const_ops must 827 * be called with semnum = -1, as well as with the number of each modified 828 * semaphore. 829 * The tasks that must be woken up are added to @wake_q. The return code 830 * is stored in q->pid. 831 * The function returns 1 if at least one operation was completed successfully. 832 */ 833 static int wake_const_ops(struct sem_array *sma, int semnum, 834 struct wake_q_head *wake_q) 835 { 836 struct sem_queue *q, *tmp; 837 struct list_head *pending_list; 838 int semop_completed = 0; 839 840 if (semnum == -1) 841 pending_list = &sma->pending_const; 842 else 843 pending_list = &sma->sems[semnum].pending_const; 844 845 list_for_each_entry_safe(q, tmp, pending_list, list) { 846 int error = perform_atomic_semop(sma, q); 847 848 if (error > 0) 849 continue; 850 /* operation completed, remove from queue & wakeup */ 851 unlink_queue(sma, q); 852 853 wake_up_sem_queue_prepare(q, error, wake_q); 854 if (error == 0) 855 semop_completed = 1; 856 } 857 858 return semop_completed; 859 } 860 861 /** 862 * do_smart_wakeup_zero - wakeup all wait for zero tasks 863 * @sma: semaphore array 864 * @sops: operations that were performed 865 * @nsops: number of operations 866 * @wake_q: lockless wake-queue head 867 * 868 * Checks all required queue for wait-for-zero operations, based 869 * on the actual changes that were performed on the semaphore array. 870 * The function returns 1 if at least one operation was completed successfully. 871 */ 872 static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops, 873 int nsops, struct wake_q_head *wake_q) 874 { 875 int i; 876 int semop_completed = 0; 877 int got_zero = 0; 878 879 /* first: the per-semaphore queues, if known */ 880 if (sops) { 881 for (i = 0; i < nsops; i++) { 882 int num = sops[i].sem_num; 883 884 if (sma->sems[num].semval == 0) { 885 got_zero = 1; 886 semop_completed |= wake_const_ops(sma, num, wake_q); 887 } 888 } 889 } else { 890 /* 891 * No sops means modified semaphores not known. 892 * Assume all were changed. 893 */ 894 for (i = 0; i < sma->sem_nsems; i++) { 895 if (sma->sems[i].semval == 0) { 896 got_zero = 1; 897 semop_completed |= wake_const_ops(sma, i, wake_q); 898 } 899 } 900 } 901 /* 902 * If one of the modified semaphores got 0, 903 * then check the global queue, too. 904 */ 905 if (got_zero) 906 semop_completed |= wake_const_ops(sma, -1, wake_q); 907 908 return semop_completed; 909 } 910 911 912 /** 913 * update_queue - look for tasks that can be completed. 914 * @sma: semaphore array. 915 * @semnum: semaphore that was modified. 916 * @wake_q: lockless wake-queue head. 917 * 918 * update_queue must be called after a semaphore in a semaphore array 919 * was modified. If multiple semaphores were modified, update_queue must 920 * be called with semnum = -1, as well as with the number of each modified 921 * semaphore. 922 * The tasks that must be woken up are added to @wake_q. The return code 923 * is stored in q->pid. 924 * The function internally checks if const operations can now succeed. 925 * 926 * The function return 1 if at least one semop was completed successfully. 927 */ 928 static int update_queue(struct sem_array *sma, int semnum, struct wake_q_head *wake_q) 929 { 930 struct sem_queue *q, *tmp; 931 struct list_head *pending_list; 932 int semop_completed = 0; 933 934 if (semnum == -1) 935 pending_list = &sma->pending_alter; 936 else 937 pending_list = &sma->sems[semnum].pending_alter; 938 939 again: 940 list_for_each_entry_safe(q, tmp, pending_list, list) { 941 int error, restart; 942 943 /* If we are scanning the single sop, per-semaphore list of 944 * one semaphore and that semaphore is 0, then it is not 945 * necessary to scan further: simple increments 946 * that affect only one entry succeed immediately and cannot 947 * be in the per semaphore pending queue, and decrements 948 * cannot be successful if the value is already 0. 949 */ 950 if (semnum != -1 && sma->sems[semnum].semval == 0) 951 break; 952 953 error = perform_atomic_semop(sma, q); 954 955 /* Does q->sleeper still need to sleep? */ 956 if (error > 0) 957 continue; 958 959 unlink_queue(sma, q); 960 961 if (error) { 962 restart = 0; 963 } else { 964 semop_completed = 1; 965 do_smart_wakeup_zero(sma, q->sops, q->nsops, wake_q); 966 restart = check_restart(sma, q); 967 } 968 969 wake_up_sem_queue_prepare(q, error, wake_q); 970 if (restart) 971 goto again; 972 } 973 return semop_completed; 974 } 975 976 /** 977 * set_semotime - set sem_otime 978 * @sma: semaphore array 979 * @sops: operations that modified the array, may be NULL 980 * 981 * sem_otime is replicated to avoid cache line trashing. 982 * This function sets one instance to the current time. 983 */ 984 static void set_semotime(struct sem_array *sma, struct sembuf *sops) 985 { 986 if (sops == NULL) { 987 sma->sems[0].sem_otime = get_seconds(); 988 } else { 989 sma->sems[sops[0].sem_num].sem_otime = 990 get_seconds(); 991 } 992 } 993 994 /** 995 * do_smart_update - optimized update_queue 996 * @sma: semaphore array 997 * @sops: operations that were performed 998 * @nsops: number of operations 999 * @otime: force setting otime 1000 * @wake_q: lockless wake-queue head 1001 * 1002 * do_smart_update() does the required calls to update_queue and wakeup_zero, 1003 * based on the actual changes that were performed on the semaphore array. 1004 * Note that the function does not do the actual wake-up: the caller is 1005 * responsible for calling wake_up_q(). 1006 * It is safe to perform this call after dropping all locks. 1007 */ 1008 static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsops, 1009 int otime, struct wake_q_head *wake_q) 1010 { 1011 int i; 1012 1013 otime |= do_smart_wakeup_zero(sma, sops, nsops, wake_q); 1014 1015 if (!list_empty(&sma->pending_alter)) { 1016 /* semaphore array uses the global queue - just process it. */ 1017 otime |= update_queue(sma, -1, wake_q); 1018 } else { 1019 if (!sops) { 1020 /* 1021 * No sops, thus the modified semaphores are not 1022 * known. Check all. 1023 */ 1024 for (i = 0; i < sma->sem_nsems; i++) 1025 otime |= update_queue(sma, i, wake_q); 1026 } else { 1027 /* 1028 * Check the semaphores that were increased: 1029 * - No complex ops, thus all sleeping ops are 1030 * decrease. 1031 * - if we decreased the value, then any sleeping 1032 * semaphore ops wont be able to run: If the 1033 * previous value was too small, then the new 1034 * value will be too small, too. 1035 */ 1036 for (i = 0; i < nsops; i++) { 1037 if (sops[i].sem_op > 0) { 1038 otime |= update_queue(sma, 1039 sops[i].sem_num, wake_q); 1040 } 1041 } 1042 } 1043 } 1044 if (otime) 1045 set_semotime(sma, sops); 1046 } 1047 1048 /* 1049 * check_qop: Test if a queued operation sleeps on the semaphore semnum 1050 */ 1051 static int check_qop(struct sem_array *sma, int semnum, struct sem_queue *q, 1052 bool count_zero) 1053 { 1054 struct sembuf *sop = q->blocking; 1055 1056 /* 1057 * Linux always (since 0.99.10) reported a task as sleeping on all 1058 * semaphores. This violates SUS, therefore it was changed to the 1059 * standard compliant behavior. 1060 * Give the administrators a chance to notice that an application 1061 * might misbehave because it relies on the Linux behavior. 1062 */ 1063 pr_info_once("semctl(GETNCNT/GETZCNT) is since 3.16 Single Unix Specification compliant.\n" 1064 "The task %s (%d) triggered the difference, watch for misbehavior.\n", 1065 current->comm, task_pid_nr(current)); 1066 1067 if (sop->sem_num != semnum) 1068 return 0; 1069 1070 if (count_zero && sop->sem_op == 0) 1071 return 1; 1072 if (!count_zero && sop->sem_op < 0) 1073 return 1; 1074 1075 return 0; 1076 } 1077 1078 /* The following counts are associated to each semaphore: 1079 * semncnt number of tasks waiting on semval being nonzero 1080 * semzcnt number of tasks waiting on semval being zero 1081 * 1082 * Per definition, a task waits only on the semaphore of the first semop 1083 * that cannot proceed, even if additional operation would block, too. 1084 */ 1085 static int count_semcnt(struct sem_array *sma, ushort semnum, 1086 bool count_zero) 1087 { 1088 struct list_head *l; 1089 struct sem_queue *q; 1090 int semcnt; 1091 1092 semcnt = 0; 1093 /* First: check the simple operations. They are easy to evaluate */ 1094 if (count_zero) 1095 l = &sma->sems[semnum].pending_const; 1096 else 1097 l = &sma->sems[semnum].pending_alter; 1098 1099 list_for_each_entry(q, l, list) { 1100 /* all task on a per-semaphore list sleep on exactly 1101 * that semaphore 1102 */ 1103 semcnt++; 1104 } 1105 1106 /* Then: check the complex operations. */ 1107 list_for_each_entry(q, &sma->pending_alter, list) { 1108 semcnt += check_qop(sma, semnum, q, count_zero); 1109 } 1110 if (count_zero) { 1111 list_for_each_entry(q, &sma->pending_const, list) { 1112 semcnt += check_qop(sma, semnum, q, count_zero); 1113 } 1114 } 1115 return semcnt; 1116 } 1117 1118 /* Free a semaphore set. freeary() is called with sem_ids.rwsem locked 1119 * as a writer and the spinlock for this semaphore set hold. sem_ids.rwsem 1120 * remains locked on exit. 1121 */ 1122 static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 1123 { 1124 struct sem_undo *un, *tu; 1125 struct sem_queue *q, *tq; 1126 struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); 1127 int i; 1128 DEFINE_WAKE_Q(wake_q); 1129 1130 /* Free the existing undo structures for this semaphore set. */ 1131 ipc_assert_locked_object(&sma->sem_perm); 1132 list_for_each_entry_safe(un, tu, &sma->list_id, list_id) { 1133 list_del(&un->list_id); 1134 spin_lock(&un->ulp->lock); 1135 un->semid = -1; 1136 list_del_rcu(&un->list_proc); 1137 spin_unlock(&un->ulp->lock); 1138 kfree_rcu(un, rcu); 1139 } 1140 1141 /* Wake up all pending processes and let them fail with EIDRM. */ 1142 list_for_each_entry_safe(q, tq, &sma->pending_const, list) { 1143 unlink_queue(sma, q); 1144 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1145 } 1146 1147 list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { 1148 unlink_queue(sma, q); 1149 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1150 } 1151 for (i = 0; i < sma->sem_nsems; i++) { 1152 struct sem *sem = &sma->sems[i]; 1153 list_for_each_entry_safe(q, tq, &sem->pending_const, list) { 1154 unlink_queue(sma, q); 1155 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1156 } 1157 list_for_each_entry_safe(q, tq, &sem->pending_alter, list) { 1158 unlink_queue(sma, q); 1159 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1160 } 1161 ipc_update_pid(&sem->sempid, NULL); 1162 } 1163 1164 /* Remove the semaphore set from the IDR */ 1165 sem_rmid(ns, sma); 1166 sem_unlock(sma, -1); 1167 rcu_read_unlock(); 1168 1169 wake_up_q(&wake_q); 1170 ns->used_sems -= sma->sem_nsems; 1171 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1172 } 1173 1174 static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) 1175 { 1176 switch (version) { 1177 case IPC_64: 1178 return copy_to_user(buf, in, sizeof(*in)); 1179 case IPC_OLD: 1180 { 1181 struct semid_ds out; 1182 1183 memset(&out, 0, sizeof(out)); 1184 1185 ipc64_perm_to_ipc_perm(&in->sem_perm, &out.sem_perm); 1186 1187 out.sem_otime = in->sem_otime; 1188 out.sem_ctime = in->sem_ctime; 1189 out.sem_nsems = in->sem_nsems; 1190 1191 return copy_to_user(buf, &out, sizeof(out)); 1192 } 1193 default: 1194 return -EINVAL; 1195 } 1196 } 1197 1198 static time64_t get_semotime(struct sem_array *sma) 1199 { 1200 int i; 1201 time64_t res; 1202 1203 res = sma->sems[0].sem_otime; 1204 for (i = 1; i < sma->sem_nsems; i++) { 1205 time64_t to = sma->sems[i].sem_otime; 1206 1207 if (to > res) 1208 res = to; 1209 } 1210 return res; 1211 } 1212 1213 static int semctl_stat(struct ipc_namespace *ns, int semid, 1214 int cmd, struct semid64_ds *semid64) 1215 { 1216 struct sem_array *sma; 1217 int id = 0; 1218 int err; 1219 1220 memset(semid64, 0, sizeof(*semid64)); 1221 1222 rcu_read_lock(); 1223 if (cmd == SEM_STAT) { 1224 sma = sem_obtain_object(ns, semid); 1225 if (IS_ERR(sma)) { 1226 err = PTR_ERR(sma); 1227 goto out_unlock; 1228 } 1229 id = sma->sem_perm.id; 1230 } else { 1231 sma = sem_obtain_object_check(ns, semid); 1232 if (IS_ERR(sma)) { 1233 err = PTR_ERR(sma); 1234 goto out_unlock; 1235 } 1236 } 1237 1238 err = -EACCES; 1239 if (ipcperms(ns, &sma->sem_perm, S_IRUGO)) 1240 goto out_unlock; 1241 1242 err = security_sem_semctl(&sma->sem_perm, cmd); 1243 if (err) 1244 goto out_unlock; 1245 1246 ipc_lock_object(&sma->sem_perm); 1247 1248 if (!ipc_valid_object(&sma->sem_perm)) { 1249 ipc_unlock_object(&sma->sem_perm); 1250 err = -EIDRM; 1251 goto out_unlock; 1252 } 1253 1254 kernel_to_ipc64_perm(&sma->sem_perm, &semid64->sem_perm); 1255 semid64->sem_otime = get_semotime(sma); 1256 semid64->sem_ctime = sma->sem_ctime; 1257 semid64->sem_nsems = sma->sem_nsems; 1258 1259 ipc_unlock_object(&sma->sem_perm); 1260 rcu_read_unlock(); 1261 return id; 1262 1263 out_unlock: 1264 rcu_read_unlock(); 1265 return err; 1266 } 1267 1268 static int semctl_info(struct ipc_namespace *ns, int semid, 1269 int cmd, void __user *p) 1270 { 1271 struct seminfo seminfo; 1272 int max_id; 1273 int err; 1274 1275 err = security_sem_semctl(NULL, cmd); 1276 if (err) 1277 return err; 1278 1279 memset(&seminfo, 0, sizeof(seminfo)); 1280 seminfo.semmni = ns->sc_semmni; 1281 seminfo.semmns = ns->sc_semmns; 1282 seminfo.semmsl = ns->sc_semmsl; 1283 seminfo.semopm = ns->sc_semopm; 1284 seminfo.semvmx = SEMVMX; 1285 seminfo.semmnu = SEMMNU; 1286 seminfo.semmap = SEMMAP; 1287 seminfo.semume = SEMUME; 1288 down_read(&sem_ids(ns).rwsem); 1289 if (cmd == SEM_INFO) { 1290 seminfo.semusz = sem_ids(ns).in_use; 1291 seminfo.semaem = ns->used_sems; 1292 } else { 1293 seminfo.semusz = SEMUSZ; 1294 seminfo.semaem = SEMAEM; 1295 } 1296 max_id = ipc_get_maxid(&sem_ids(ns)); 1297 up_read(&sem_ids(ns).rwsem); 1298 if (copy_to_user(p, &seminfo, sizeof(struct seminfo))) 1299 return -EFAULT; 1300 return (max_id < 0) ? 0 : max_id; 1301 } 1302 1303 static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, 1304 int val) 1305 { 1306 struct sem_undo *un; 1307 struct sem_array *sma; 1308 struct sem *curr; 1309 int err; 1310 DEFINE_WAKE_Q(wake_q); 1311 1312 if (val > SEMVMX || val < 0) 1313 return -ERANGE; 1314 1315 rcu_read_lock(); 1316 sma = sem_obtain_object_check(ns, semid); 1317 if (IS_ERR(sma)) { 1318 rcu_read_unlock(); 1319 return PTR_ERR(sma); 1320 } 1321 1322 if (semnum < 0 || semnum >= sma->sem_nsems) { 1323 rcu_read_unlock(); 1324 return -EINVAL; 1325 } 1326 1327 1328 if (ipcperms(ns, &sma->sem_perm, S_IWUGO)) { 1329 rcu_read_unlock(); 1330 return -EACCES; 1331 } 1332 1333 err = security_sem_semctl(&sma->sem_perm, SETVAL); 1334 if (err) { 1335 rcu_read_unlock(); 1336 return -EACCES; 1337 } 1338 1339 sem_lock(sma, NULL, -1); 1340 1341 if (!ipc_valid_object(&sma->sem_perm)) { 1342 sem_unlock(sma, -1); 1343 rcu_read_unlock(); 1344 return -EIDRM; 1345 } 1346 1347 curr = &sma->sems[semnum]; 1348 1349 ipc_assert_locked_object(&sma->sem_perm); 1350 list_for_each_entry(un, &sma->list_id, list_id) 1351 un->semadj[semnum] = 0; 1352 1353 curr->semval = val; 1354 ipc_update_pid(&curr->sempid, task_tgid(current)); 1355 sma->sem_ctime = ktime_get_real_seconds(); 1356 /* maybe some queued-up processes were waiting for this */ 1357 do_smart_update(sma, NULL, 0, 0, &wake_q); 1358 sem_unlock(sma, -1); 1359 rcu_read_unlock(); 1360 wake_up_q(&wake_q); 1361 return 0; 1362 } 1363 1364 static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, 1365 int cmd, void __user *p) 1366 { 1367 struct sem_array *sma; 1368 struct sem *curr; 1369 int err, nsems; 1370 ushort fast_sem_io[SEMMSL_FAST]; 1371 ushort *sem_io = fast_sem_io; 1372 DEFINE_WAKE_Q(wake_q); 1373 1374 rcu_read_lock(); 1375 sma = sem_obtain_object_check(ns, semid); 1376 if (IS_ERR(sma)) { 1377 rcu_read_unlock(); 1378 return PTR_ERR(sma); 1379 } 1380 1381 nsems = sma->sem_nsems; 1382 1383 err = -EACCES; 1384 if (ipcperms(ns, &sma->sem_perm, cmd == SETALL ? S_IWUGO : S_IRUGO)) 1385 goto out_rcu_wakeup; 1386 1387 err = security_sem_semctl(&sma->sem_perm, cmd); 1388 if (err) 1389 goto out_rcu_wakeup; 1390 1391 err = -EACCES; 1392 switch (cmd) { 1393 case GETALL: 1394 { 1395 ushort __user *array = p; 1396 int i; 1397 1398 sem_lock(sma, NULL, -1); 1399 if (!ipc_valid_object(&sma->sem_perm)) { 1400 err = -EIDRM; 1401 goto out_unlock; 1402 } 1403 if (nsems > SEMMSL_FAST) { 1404 if (!ipc_rcu_getref(&sma->sem_perm)) { 1405 err = -EIDRM; 1406 goto out_unlock; 1407 } 1408 sem_unlock(sma, -1); 1409 rcu_read_unlock(); 1410 sem_io = kvmalloc_array(nsems, sizeof(ushort), 1411 GFP_KERNEL); 1412 if (sem_io == NULL) { 1413 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1414 return -ENOMEM; 1415 } 1416 1417 rcu_read_lock(); 1418 sem_lock_and_putref(sma); 1419 if (!ipc_valid_object(&sma->sem_perm)) { 1420 err = -EIDRM; 1421 goto out_unlock; 1422 } 1423 } 1424 for (i = 0; i < sma->sem_nsems; i++) 1425 sem_io[i] = sma->sems[i].semval; 1426 sem_unlock(sma, -1); 1427 rcu_read_unlock(); 1428 err = 0; 1429 if (copy_to_user(array, sem_io, nsems*sizeof(ushort))) 1430 err = -EFAULT; 1431 goto out_free; 1432 } 1433 case SETALL: 1434 { 1435 int i; 1436 struct sem_undo *un; 1437 1438 if (!ipc_rcu_getref(&sma->sem_perm)) { 1439 err = -EIDRM; 1440 goto out_rcu_wakeup; 1441 } 1442 rcu_read_unlock(); 1443 1444 if (nsems > SEMMSL_FAST) { 1445 sem_io = kvmalloc_array(nsems, sizeof(ushort), 1446 GFP_KERNEL); 1447 if (sem_io == NULL) { 1448 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1449 return -ENOMEM; 1450 } 1451 } 1452 1453 if (copy_from_user(sem_io, p, nsems*sizeof(ushort))) { 1454 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1455 err = -EFAULT; 1456 goto out_free; 1457 } 1458 1459 for (i = 0; i < nsems; i++) { 1460 if (sem_io[i] > SEMVMX) { 1461 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1462 err = -ERANGE; 1463 goto out_free; 1464 } 1465 } 1466 rcu_read_lock(); 1467 sem_lock_and_putref(sma); 1468 if (!ipc_valid_object(&sma->sem_perm)) { 1469 err = -EIDRM; 1470 goto out_unlock; 1471 } 1472 1473 for (i = 0; i < nsems; i++) { 1474 sma->sems[i].semval = sem_io[i]; 1475 ipc_update_pid(&sma->sems[i].sempid, task_tgid(current)); 1476 } 1477 1478 ipc_assert_locked_object(&sma->sem_perm); 1479 list_for_each_entry(un, &sma->list_id, list_id) { 1480 for (i = 0; i < nsems; i++) 1481 un->semadj[i] = 0; 1482 } 1483 sma->sem_ctime = ktime_get_real_seconds(); 1484 /* maybe some queued-up processes were waiting for this */ 1485 do_smart_update(sma, NULL, 0, 0, &wake_q); 1486 err = 0; 1487 goto out_unlock; 1488 } 1489 /* GETVAL, GETPID, GETNCTN, GETZCNT: fall-through */ 1490 } 1491 err = -EINVAL; 1492 if (semnum < 0 || semnum >= nsems) 1493 goto out_rcu_wakeup; 1494 1495 sem_lock(sma, NULL, -1); 1496 if (!ipc_valid_object(&sma->sem_perm)) { 1497 err = -EIDRM; 1498 goto out_unlock; 1499 } 1500 curr = &sma->sems[semnum]; 1501 1502 switch (cmd) { 1503 case GETVAL: 1504 err = curr->semval; 1505 goto out_unlock; 1506 case GETPID: 1507 err = pid_vnr(curr->sempid); 1508 goto out_unlock; 1509 case GETNCNT: 1510 err = count_semcnt(sma, semnum, 0); 1511 goto out_unlock; 1512 case GETZCNT: 1513 err = count_semcnt(sma, semnum, 1); 1514 goto out_unlock; 1515 } 1516 1517 out_unlock: 1518 sem_unlock(sma, -1); 1519 out_rcu_wakeup: 1520 rcu_read_unlock(); 1521 wake_up_q(&wake_q); 1522 out_free: 1523 if (sem_io != fast_sem_io) 1524 kvfree(sem_io); 1525 return err; 1526 } 1527 1528 static inline unsigned long 1529 copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version) 1530 { 1531 switch (version) { 1532 case IPC_64: 1533 if (copy_from_user(out, buf, sizeof(*out))) 1534 return -EFAULT; 1535 return 0; 1536 case IPC_OLD: 1537 { 1538 struct semid_ds tbuf_old; 1539 1540 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 1541 return -EFAULT; 1542 1543 out->sem_perm.uid = tbuf_old.sem_perm.uid; 1544 out->sem_perm.gid = tbuf_old.sem_perm.gid; 1545 out->sem_perm.mode = tbuf_old.sem_perm.mode; 1546 1547 return 0; 1548 } 1549 default: 1550 return -EINVAL; 1551 } 1552 } 1553 1554 /* 1555 * This function handles some semctl commands which require the rwsem 1556 * to be held in write mode. 1557 * NOTE: no locks must be held, the rwsem is taken inside this function. 1558 */ 1559 static int semctl_down(struct ipc_namespace *ns, int semid, 1560 int cmd, struct semid64_ds *semid64) 1561 { 1562 struct sem_array *sma; 1563 int err; 1564 struct kern_ipc_perm *ipcp; 1565 1566 down_write(&sem_ids(ns).rwsem); 1567 rcu_read_lock(); 1568 1569 ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd, 1570 &semid64->sem_perm, 0); 1571 if (IS_ERR(ipcp)) { 1572 err = PTR_ERR(ipcp); 1573 goto out_unlock1; 1574 } 1575 1576 sma = container_of(ipcp, struct sem_array, sem_perm); 1577 1578 err = security_sem_semctl(&sma->sem_perm, cmd); 1579 if (err) 1580 goto out_unlock1; 1581 1582 switch (cmd) { 1583 case IPC_RMID: 1584 sem_lock(sma, NULL, -1); 1585 /* freeary unlocks the ipc object and rcu */ 1586 freeary(ns, ipcp); 1587 goto out_up; 1588 case IPC_SET: 1589 sem_lock(sma, NULL, -1); 1590 err = ipc_update_perm(&semid64->sem_perm, ipcp); 1591 if (err) 1592 goto out_unlock0; 1593 sma->sem_ctime = ktime_get_real_seconds(); 1594 break; 1595 default: 1596 err = -EINVAL; 1597 goto out_unlock1; 1598 } 1599 1600 out_unlock0: 1601 sem_unlock(sma, -1); 1602 out_unlock1: 1603 rcu_read_unlock(); 1604 out_up: 1605 up_write(&sem_ids(ns).rwsem); 1606 return err; 1607 } 1608 1609 long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg) 1610 { 1611 int version; 1612 struct ipc_namespace *ns; 1613 void __user *p = (void __user *)arg; 1614 struct semid64_ds semid64; 1615 int err; 1616 1617 if (semid < 0) 1618 return -EINVAL; 1619 1620 version = ipc_parse_version(&cmd); 1621 ns = current->nsproxy->ipc_ns; 1622 1623 switch (cmd) { 1624 case IPC_INFO: 1625 case SEM_INFO: 1626 return semctl_info(ns, semid, cmd, p); 1627 case IPC_STAT: 1628 case SEM_STAT: 1629 err = semctl_stat(ns, semid, cmd, &semid64); 1630 if (err < 0) 1631 return err; 1632 if (copy_semid_to_user(p, &semid64, version)) 1633 err = -EFAULT; 1634 return err; 1635 case GETALL: 1636 case GETVAL: 1637 case GETPID: 1638 case GETNCNT: 1639 case GETZCNT: 1640 case SETALL: 1641 return semctl_main(ns, semid, semnum, cmd, p); 1642 case SETVAL: { 1643 int val; 1644 #if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN) 1645 /* big-endian 64bit */ 1646 val = arg >> 32; 1647 #else 1648 /* 32bit or little-endian 64bit */ 1649 val = arg; 1650 #endif 1651 return semctl_setval(ns, semid, semnum, val); 1652 } 1653 case IPC_SET: 1654 if (copy_semid_from_user(&semid64, p, version)) 1655 return -EFAULT; 1656 case IPC_RMID: 1657 return semctl_down(ns, semid, cmd, &semid64); 1658 default: 1659 return -EINVAL; 1660 } 1661 } 1662 1663 SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg) 1664 { 1665 return ksys_semctl(semid, semnum, cmd, arg); 1666 } 1667 1668 #ifdef CONFIG_COMPAT 1669 1670 struct compat_semid_ds { 1671 struct compat_ipc_perm sem_perm; 1672 compat_time_t sem_otime; 1673 compat_time_t sem_ctime; 1674 compat_uptr_t sem_base; 1675 compat_uptr_t sem_pending; 1676 compat_uptr_t sem_pending_last; 1677 compat_uptr_t undo; 1678 unsigned short sem_nsems; 1679 }; 1680 1681 static int copy_compat_semid_from_user(struct semid64_ds *out, void __user *buf, 1682 int version) 1683 { 1684 memset(out, 0, sizeof(*out)); 1685 if (version == IPC_64) { 1686 struct compat_semid64_ds __user *p = buf; 1687 return get_compat_ipc64_perm(&out->sem_perm, &p->sem_perm); 1688 } else { 1689 struct compat_semid_ds __user *p = buf; 1690 return get_compat_ipc_perm(&out->sem_perm, &p->sem_perm); 1691 } 1692 } 1693 1694 static int copy_compat_semid_to_user(void __user *buf, struct semid64_ds *in, 1695 int version) 1696 { 1697 if (version == IPC_64) { 1698 struct compat_semid64_ds v; 1699 memset(&v, 0, sizeof(v)); 1700 to_compat_ipc64_perm(&v.sem_perm, &in->sem_perm); 1701 v.sem_otime = in->sem_otime; 1702 v.sem_ctime = in->sem_ctime; 1703 v.sem_nsems = in->sem_nsems; 1704 return copy_to_user(buf, &v, sizeof(v)); 1705 } else { 1706 struct compat_semid_ds v; 1707 memset(&v, 0, sizeof(v)); 1708 to_compat_ipc_perm(&v.sem_perm, &in->sem_perm); 1709 v.sem_otime = in->sem_otime; 1710 v.sem_ctime = in->sem_ctime; 1711 v.sem_nsems = in->sem_nsems; 1712 return copy_to_user(buf, &v, sizeof(v)); 1713 } 1714 } 1715 1716 long compat_ksys_semctl(int semid, int semnum, int cmd, int arg) 1717 { 1718 void __user *p = compat_ptr(arg); 1719 struct ipc_namespace *ns; 1720 struct semid64_ds semid64; 1721 int version = compat_ipc_parse_version(&cmd); 1722 int err; 1723 1724 ns = current->nsproxy->ipc_ns; 1725 1726 if (semid < 0) 1727 return -EINVAL; 1728 1729 switch (cmd & (~IPC_64)) { 1730 case IPC_INFO: 1731 case SEM_INFO: 1732 return semctl_info(ns, semid, cmd, p); 1733 case IPC_STAT: 1734 case SEM_STAT: 1735 err = semctl_stat(ns, semid, cmd, &semid64); 1736 if (err < 0) 1737 return err; 1738 if (copy_compat_semid_to_user(p, &semid64, version)) 1739 err = -EFAULT; 1740 return err; 1741 case GETVAL: 1742 case GETPID: 1743 case GETNCNT: 1744 case GETZCNT: 1745 case GETALL: 1746 case SETALL: 1747 return semctl_main(ns, semid, semnum, cmd, p); 1748 case SETVAL: 1749 return semctl_setval(ns, semid, semnum, arg); 1750 case IPC_SET: 1751 if (copy_compat_semid_from_user(&semid64, p, version)) 1752 return -EFAULT; 1753 /* fallthru */ 1754 case IPC_RMID: 1755 return semctl_down(ns, semid, cmd, &semid64); 1756 default: 1757 return -EINVAL; 1758 } 1759 } 1760 1761 COMPAT_SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, int, arg) 1762 { 1763 return compat_ksys_semctl(semid, semnum, cmd, arg); 1764 } 1765 #endif 1766 1767 /* If the task doesn't already have a undo_list, then allocate one 1768 * here. We guarantee there is only one thread using this undo list, 1769 * and current is THE ONE 1770 * 1771 * If this allocation and assignment succeeds, but later 1772 * portions of this code fail, there is no need to free the sem_undo_list. 1773 * Just let it stay associated with the task, and it'll be freed later 1774 * at exit time. 1775 * 1776 * This can block, so callers must hold no locks. 1777 */ 1778 static inline int get_undo_list(struct sem_undo_list **undo_listp) 1779 { 1780 struct sem_undo_list *undo_list; 1781 1782 undo_list = current->sysvsem.undo_list; 1783 if (!undo_list) { 1784 undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL); 1785 if (undo_list == NULL) 1786 return -ENOMEM; 1787 spin_lock_init(&undo_list->lock); 1788 refcount_set(&undo_list->refcnt, 1); 1789 INIT_LIST_HEAD(&undo_list->list_proc); 1790 1791 current->sysvsem.undo_list = undo_list; 1792 } 1793 *undo_listp = undo_list; 1794 return 0; 1795 } 1796 1797 static struct sem_undo *__lookup_undo(struct sem_undo_list *ulp, int semid) 1798 { 1799 struct sem_undo *un; 1800 1801 list_for_each_entry_rcu(un, &ulp->list_proc, list_proc) { 1802 if (un->semid == semid) 1803 return un; 1804 } 1805 return NULL; 1806 } 1807 1808 static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid) 1809 { 1810 struct sem_undo *un; 1811 1812 assert_spin_locked(&ulp->lock); 1813 1814 un = __lookup_undo(ulp, semid); 1815 if (un) { 1816 list_del_rcu(&un->list_proc); 1817 list_add_rcu(&un->list_proc, &ulp->list_proc); 1818 } 1819 return un; 1820 } 1821 1822 /** 1823 * find_alloc_undo - lookup (and if not present create) undo array 1824 * @ns: namespace 1825 * @semid: semaphore array id 1826 * 1827 * The function looks up (and if not present creates) the undo structure. 1828 * The size of the undo structure depends on the size of the semaphore 1829 * array, thus the alloc path is not that straightforward. 1830 * Lifetime-rules: sem_undo is rcu-protected, on success, the function 1831 * performs a rcu_read_lock(). 1832 */ 1833 static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) 1834 { 1835 struct sem_array *sma; 1836 struct sem_undo_list *ulp; 1837 struct sem_undo *un, *new; 1838 int nsems, error; 1839 1840 error = get_undo_list(&ulp); 1841 if (error) 1842 return ERR_PTR(error); 1843 1844 rcu_read_lock(); 1845 spin_lock(&ulp->lock); 1846 un = lookup_undo(ulp, semid); 1847 spin_unlock(&ulp->lock); 1848 if (likely(un != NULL)) 1849 goto out; 1850 1851 /* no undo structure around - allocate one. */ 1852 /* step 1: figure out the size of the semaphore array */ 1853 sma = sem_obtain_object_check(ns, semid); 1854 if (IS_ERR(sma)) { 1855 rcu_read_unlock(); 1856 return ERR_CAST(sma); 1857 } 1858 1859 nsems = sma->sem_nsems; 1860 if (!ipc_rcu_getref(&sma->sem_perm)) { 1861 rcu_read_unlock(); 1862 un = ERR_PTR(-EIDRM); 1863 goto out; 1864 } 1865 rcu_read_unlock(); 1866 1867 /* step 2: allocate new undo structure */ 1868 new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); 1869 if (!new) { 1870 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1871 return ERR_PTR(-ENOMEM); 1872 } 1873 1874 /* step 3: Acquire the lock on semaphore array */ 1875 rcu_read_lock(); 1876 sem_lock_and_putref(sma); 1877 if (!ipc_valid_object(&sma->sem_perm)) { 1878 sem_unlock(sma, -1); 1879 rcu_read_unlock(); 1880 kfree(new); 1881 un = ERR_PTR(-EIDRM); 1882 goto out; 1883 } 1884 spin_lock(&ulp->lock); 1885 1886 /* 1887 * step 4: check for races: did someone else allocate the undo struct? 1888 */ 1889 un = lookup_undo(ulp, semid); 1890 if (un) { 1891 kfree(new); 1892 goto success; 1893 } 1894 /* step 5: initialize & link new undo structure */ 1895 new->semadj = (short *) &new[1]; 1896 new->ulp = ulp; 1897 new->semid = semid; 1898 assert_spin_locked(&ulp->lock); 1899 list_add_rcu(&new->list_proc, &ulp->list_proc); 1900 ipc_assert_locked_object(&sma->sem_perm); 1901 list_add(&new->list_id, &sma->list_id); 1902 un = new; 1903 1904 success: 1905 spin_unlock(&ulp->lock); 1906 sem_unlock(sma, -1); 1907 out: 1908 return un; 1909 } 1910 1911 static long do_semtimedop(int semid, struct sembuf __user *tsops, 1912 unsigned nsops, const struct timespec64 *timeout) 1913 { 1914 int error = -EINVAL; 1915 struct sem_array *sma; 1916 struct sembuf fast_sops[SEMOPM_FAST]; 1917 struct sembuf *sops = fast_sops, *sop; 1918 struct sem_undo *un; 1919 int max, locknum; 1920 bool undos = false, alter = false, dupsop = false; 1921 struct sem_queue queue; 1922 unsigned long dup = 0, jiffies_left = 0; 1923 struct ipc_namespace *ns; 1924 1925 ns = current->nsproxy->ipc_ns; 1926 1927 if (nsops < 1 || semid < 0) 1928 return -EINVAL; 1929 if (nsops > ns->sc_semopm) 1930 return -E2BIG; 1931 if (nsops > SEMOPM_FAST) { 1932 sops = kvmalloc(sizeof(*sops)*nsops, GFP_KERNEL); 1933 if (sops == NULL) 1934 return -ENOMEM; 1935 } 1936 1937 if (copy_from_user(sops, tsops, nsops * sizeof(*tsops))) { 1938 error = -EFAULT; 1939 goto out_free; 1940 } 1941 1942 if (timeout) { 1943 if (timeout->tv_sec < 0 || timeout->tv_nsec < 0 || 1944 timeout->tv_nsec >= 1000000000L) { 1945 error = -EINVAL; 1946 goto out_free; 1947 } 1948 jiffies_left = timespec64_to_jiffies(timeout); 1949 } 1950 1951 max = 0; 1952 for (sop = sops; sop < sops + nsops; sop++) { 1953 unsigned long mask = 1ULL << ((sop->sem_num) % BITS_PER_LONG); 1954 1955 if (sop->sem_num >= max) 1956 max = sop->sem_num; 1957 if (sop->sem_flg & SEM_UNDO) 1958 undos = true; 1959 if (dup & mask) { 1960 /* 1961 * There was a previous alter access that appears 1962 * to have accessed the same semaphore, thus use 1963 * the dupsop logic. "appears", because the detection 1964 * can only check % BITS_PER_LONG. 1965 */ 1966 dupsop = true; 1967 } 1968 if (sop->sem_op != 0) { 1969 alter = true; 1970 dup |= mask; 1971 } 1972 } 1973 1974 if (undos) { 1975 /* On success, find_alloc_undo takes the rcu_read_lock */ 1976 un = find_alloc_undo(ns, semid); 1977 if (IS_ERR(un)) { 1978 error = PTR_ERR(un); 1979 goto out_free; 1980 } 1981 } else { 1982 un = NULL; 1983 rcu_read_lock(); 1984 } 1985 1986 sma = sem_obtain_object_check(ns, semid); 1987 if (IS_ERR(sma)) { 1988 rcu_read_unlock(); 1989 error = PTR_ERR(sma); 1990 goto out_free; 1991 } 1992 1993 error = -EFBIG; 1994 if (max >= sma->sem_nsems) { 1995 rcu_read_unlock(); 1996 goto out_free; 1997 } 1998 1999 error = -EACCES; 2000 if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) { 2001 rcu_read_unlock(); 2002 goto out_free; 2003 } 2004 2005 error = security_sem_semop(&sma->sem_perm, sops, nsops, alter); 2006 if (error) { 2007 rcu_read_unlock(); 2008 goto out_free; 2009 } 2010 2011 error = -EIDRM; 2012 locknum = sem_lock(sma, sops, nsops); 2013 /* 2014 * We eventually might perform the following check in a lockless 2015 * fashion, considering ipc_valid_object() locking constraints. 2016 * If nsops == 1 and there is no contention for sem_perm.lock, then 2017 * only a per-semaphore lock is held and it's OK to proceed with the 2018 * check below. More details on the fine grained locking scheme 2019 * entangled here and why it's RMID race safe on comments at sem_lock() 2020 */ 2021 if (!ipc_valid_object(&sma->sem_perm)) 2022 goto out_unlock_free; 2023 /* 2024 * semid identifiers are not unique - find_alloc_undo may have 2025 * allocated an undo structure, it was invalidated by an RMID 2026 * and now a new array with received the same id. Check and fail. 2027 * This case can be detected checking un->semid. The existence of 2028 * "un" itself is guaranteed by rcu. 2029 */ 2030 if (un && un->semid == -1) 2031 goto out_unlock_free; 2032 2033 queue.sops = sops; 2034 queue.nsops = nsops; 2035 queue.undo = un; 2036 queue.pid = task_tgid(current); 2037 queue.alter = alter; 2038 queue.dupsop = dupsop; 2039 2040 error = perform_atomic_semop(sma, &queue); 2041 if (error == 0) { /* non-blocking succesfull path */ 2042 DEFINE_WAKE_Q(wake_q); 2043 2044 /* 2045 * If the operation was successful, then do 2046 * the required updates. 2047 */ 2048 if (alter) 2049 do_smart_update(sma, sops, nsops, 1, &wake_q); 2050 else 2051 set_semotime(sma, sops); 2052 2053 sem_unlock(sma, locknum); 2054 rcu_read_unlock(); 2055 wake_up_q(&wake_q); 2056 2057 goto out_free; 2058 } 2059 if (error < 0) /* non-blocking error path */ 2060 goto out_unlock_free; 2061 2062 /* 2063 * We need to sleep on this operation, so we put the current 2064 * task into the pending queue and go to sleep. 2065 */ 2066 if (nsops == 1) { 2067 struct sem *curr; 2068 curr = &sma->sems[sops->sem_num]; 2069 2070 if (alter) { 2071 if (sma->complex_count) { 2072 list_add_tail(&queue.list, 2073 &sma->pending_alter); 2074 } else { 2075 2076 list_add_tail(&queue.list, 2077 &curr->pending_alter); 2078 } 2079 } else { 2080 list_add_tail(&queue.list, &curr->pending_const); 2081 } 2082 } else { 2083 if (!sma->complex_count) 2084 merge_queues(sma); 2085 2086 if (alter) 2087 list_add_tail(&queue.list, &sma->pending_alter); 2088 else 2089 list_add_tail(&queue.list, &sma->pending_const); 2090 2091 sma->complex_count++; 2092 } 2093 2094 do { 2095 queue.status = -EINTR; 2096 queue.sleeper = current; 2097 2098 __set_current_state(TASK_INTERRUPTIBLE); 2099 sem_unlock(sma, locknum); 2100 rcu_read_unlock(); 2101 2102 if (timeout) 2103 jiffies_left = schedule_timeout(jiffies_left); 2104 else 2105 schedule(); 2106 2107 /* 2108 * fastpath: the semop has completed, either successfully or 2109 * not, from the syscall pov, is quite irrelevant to us at this 2110 * point; we're done. 2111 * 2112 * We _do_ care, nonetheless, about being awoken by a signal or 2113 * spuriously. The queue.status is checked again in the 2114 * slowpath (aka after taking sem_lock), such that we can detect 2115 * scenarios where we were awakened externally, during the 2116 * window between wake_q_add() and wake_up_q(). 2117 */ 2118 error = READ_ONCE(queue.status); 2119 if (error != -EINTR) { 2120 /* 2121 * User space could assume that semop() is a memory 2122 * barrier: Without the mb(), the cpu could 2123 * speculatively read in userspace stale data that was 2124 * overwritten by the previous owner of the semaphore. 2125 */ 2126 smp_mb(); 2127 goto out_free; 2128 } 2129 2130 rcu_read_lock(); 2131 locknum = sem_lock(sma, sops, nsops); 2132 2133 if (!ipc_valid_object(&sma->sem_perm)) 2134 goto out_unlock_free; 2135 2136 error = READ_ONCE(queue.status); 2137 2138 /* 2139 * If queue.status != -EINTR we are woken up by another process. 2140 * Leave without unlink_queue(), but with sem_unlock(). 2141 */ 2142 if (error != -EINTR) 2143 goto out_unlock_free; 2144 2145 /* 2146 * If an interrupt occurred we have to clean up the queue. 2147 */ 2148 if (timeout && jiffies_left == 0) 2149 error = -EAGAIN; 2150 } while (error == -EINTR && !signal_pending(current)); /* spurious */ 2151 2152 unlink_queue(sma, &queue); 2153 2154 out_unlock_free: 2155 sem_unlock(sma, locknum); 2156 rcu_read_unlock(); 2157 out_free: 2158 if (sops != fast_sops) 2159 kvfree(sops); 2160 return error; 2161 } 2162 2163 long ksys_semtimedop(int semid, struct sembuf __user *tsops, 2164 unsigned int nsops, const struct timespec __user *timeout) 2165 { 2166 if (timeout) { 2167 struct timespec64 ts; 2168 if (get_timespec64(&ts, timeout)) 2169 return -EFAULT; 2170 return do_semtimedop(semid, tsops, nsops, &ts); 2171 } 2172 return do_semtimedop(semid, tsops, nsops, NULL); 2173 } 2174 2175 SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, 2176 unsigned int, nsops, const struct timespec __user *, timeout) 2177 { 2178 return ksys_semtimedop(semid, tsops, nsops, timeout); 2179 } 2180 2181 #ifdef CONFIG_COMPAT 2182 long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems, 2183 unsigned int nsops, 2184 const struct compat_timespec __user *timeout) 2185 { 2186 if (timeout) { 2187 struct timespec64 ts; 2188 if (compat_get_timespec64(&ts, timeout)) 2189 return -EFAULT; 2190 return do_semtimedop(semid, tsems, nsops, &ts); 2191 } 2192 return do_semtimedop(semid, tsems, nsops, NULL); 2193 } 2194 2195 COMPAT_SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsems, 2196 unsigned int, nsops, 2197 const struct compat_timespec __user *, timeout) 2198 { 2199 return compat_ksys_semtimedop(semid, tsems, nsops, timeout); 2200 } 2201 #endif 2202 2203 SYSCALL_DEFINE3(semop, int, semid, struct sembuf __user *, tsops, 2204 unsigned, nsops) 2205 { 2206 return do_semtimedop(semid, tsops, nsops, NULL); 2207 } 2208 2209 /* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between 2210 * parent and child tasks. 2211 */ 2212 2213 int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) 2214 { 2215 struct sem_undo_list *undo_list; 2216 int error; 2217 2218 if (clone_flags & CLONE_SYSVSEM) { 2219 error = get_undo_list(&undo_list); 2220 if (error) 2221 return error; 2222 refcount_inc(&undo_list->refcnt); 2223 tsk->sysvsem.undo_list = undo_list; 2224 } else 2225 tsk->sysvsem.undo_list = NULL; 2226 2227 return 0; 2228 } 2229 2230 /* 2231 * add semadj values to semaphores, free undo structures. 2232 * undo structures are not freed when semaphore arrays are destroyed 2233 * so some of them may be out of date. 2234 * IMPLEMENTATION NOTE: There is some confusion over whether the 2235 * set of adjustments that needs to be done should be done in an atomic 2236 * manner or not. That is, if we are attempting to decrement the semval 2237 * should we queue up and wait until we can do so legally? 2238 * The original implementation attempted to do this (queue and wait). 2239 * The current implementation does not do so. The POSIX standard 2240 * and SVID should be consulted to determine what behavior is mandated. 2241 */ 2242 void exit_sem(struct task_struct *tsk) 2243 { 2244 struct sem_undo_list *ulp; 2245 2246 ulp = tsk->sysvsem.undo_list; 2247 if (!ulp) 2248 return; 2249 tsk->sysvsem.undo_list = NULL; 2250 2251 if (!refcount_dec_and_test(&ulp->refcnt)) 2252 return; 2253 2254 for (;;) { 2255 struct sem_array *sma; 2256 struct sem_undo *un; 2257 int semid, i; 2258 DEFINE_WAKE_Q(wake_q); 2259 2260 cond_resched(); 2261 2262 rcu_read_lock(); 2263 un = list_entry_rcu(ulp->list_proc.next, 2264 struct sem_undo, list_proc); 2265 if (&un->list_proc == &ulp->list_proc) { 2266 /* 2267 * We must wait for freeary() before freeing this ulp, 2268 * in case we raced with last sem_undo. There is a small 2269 * possibility where we exit while freeary() didn't 2270 * finish unlocking sem_undo_list. 2271 */ 2272 spin_lock(&ulp->lock); 2273 spin_unlock(&ulp->lock); 2274 rcu_read_unlock(); 2275 break; 2276 } 2277 spin_lock(&ulp->lock); 2278 semid = un->semid; 2279 spin_unlock(&ulp->lock); 2280 2281 /* exit_sem raced with IPC_RMID, nothing to do */ 2282 if (semid == -1) { 2283 rcu_read_unlock(); 2284 continue; 2285 } 2286 2287 sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, semid); 2288 /* exit_sem raced with IPC_RMID, nothing to do */ 2289 if (IS_ERR(sma)) { 2290 rcu_read_unlock(); 2291 continue; 2292 } 2293 2294 sem_lock(sma, NULL, -1); 2295 /* exit_sem raced with IPC_RMID, nothing to do */ 2296 if (!ipc_valid_object(&sma->sem_perm)) { 2297 sem_unlock(sma, -1); 2298 rcu_read_unlock(); 2299 continue; 2300 } 2301 un = __lookup_undo(ulp, semid); 2302 if (un == NULL) { 2303 /* exit_sem raced with IPC_RMID+semget() that created 2304 * exactly the same semid. Nothing to do. 2305 */ 2306 sem_unlock(sma, -1); 2307 rcu_read_unlock(); 2308 continue; 2309 } 2310 2311 /* remove un from the linked lists */ 2312 ipc_assert_locked_object(&sma->sem_perm); 2313 list_del(&un->list_id); 2314 2315 /* we are the last process using this ulp, acquiring ulp->lock 2316 * isn't required. Besides that, we are also protected against 2317 * IPC_RMID as we hold sma->sem_perm lock now 2318 */ 2319 list_del_rcu(&un->list_proc); 2320 2321 /* perform adjustments registered in un */ 2322 for (i = 0; i < sma->sem_nsems; i++) { 2323 struct sem *semaphore = &sma->sems[i]; 2324 if (un->semadj[i]) { 2325 semaphore->semval += un->semadj[i]; 2326 /* 2327 * Range checks of the new semaphore value, 2328 * not defined by sus: 2329 * - Some unices ignore the undo entirely 2330 * (e.g. HP UX 11i 11.22, Tru64 V5.1) 2331 * - some cap the value (e.g. FreeBSD caps 2332 * at 0, but doesn't enforce SEMVMX) 2333 * 2334 * Linux caps the semaphore value, both at 0 2335 * and at SEMVMX. 2336 * 2337 * Manfred <manfred@colorfullife.com> 2338 */ 2339 if (semaphore->semval < 0) 2340 semaphore->semval = 0; 2341 if (semaphore->semval > SEMVMX) 2342 semaphore->semval = SEMVMX; 2343 ipc_update_pid(&semaphore->sempid, task_tgid(current)); 2344 } 2345 } 2346 /* maybe some queued-up processes were waiting for this */ 2347 do_smart_update(sma, NULL, 0, 1, &wake_q); 2348 sem_unlock(sma, -1); 2349 rcu_read_unlock(); 2350 wake_up_q(&wake_q); 2351 2352 kfree_rcu(un, rcu); 2353 } 2354 kfree(ulp); 2355 } 2356 2357 #ifdef CONFIG_PROC_FS 2358 static int sysvipc_sem_proc_show(struct seq_file *s, void *it) 2359 { 2360 struct user_namespace *user_ns = seq_user_ns(s); 2361 struct kern_ipc_perm *ipcp = it; 2362 struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); 2363 time64_t sem_otime; 2364 2365 /* 2366 * The proc interface isn't aware of sem_lock(), it calls 2367 * ipc_lock_object() directly (in sysvipc_find_ipc). 2368 * In order to stay compatible with sem_lock(), we must 2369 * enter / leave complex_mode. 2370 */ 2371 complexmode_enter(sma); 2372 2373 sem_otime = get_semotime(sma); 2374 2375 seq_printf(s, 2376 "%10d %10d %4o %10u %5u %5u %5u %5u %10llu %10llu\n", 2377 sma->sem_perm.key, 2378 sma->sem_perm.id, 2379 sma->sem_perm.mode, 2380 sma->sem_nsems, 2381 from_kuid_munged(user_ns, sma->sem_perm.uid), 2382 from_kgid_munged(user_ns, sma->sem_perm.gid), 2383 from_kuid_munged(user_ns, sma->sem_perm.cuid), 2384 from_kgid_munged(user_ns, sma->sem_perm.cgid), 2385 sem_otime, 2386 sma->sem_ctime); 2387 2388 complexmode_tryleave(sma); 2389 2390 return 0; 2391 } 2392 #endif 2393