1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
28
29 /*
30 * Inter-Process Communication Semaphore Facility.
31 *
32 * See os/ipc.c for a description of common IPC functionality.
33 *
34 * Resource controls
35 * -----------------
36 *
37 * Control: zone.max-sem-ids (rc_zone_semmni)
38 * Description: Maximum number of semaphore ids allowed a zone.
39 *
40 * When semget() is used to allocate a semaphore set, one id is
41 * allocated. If the id allocation doesn't succeed, semget() fails
42 * and errno is set to ENOSPC. Upon successful semctl(, IPC_RMID)
43 * the id is deallocated.
44 *
45 * Control: project.max-sem-ids (rc_project_semmni)
46 * Description: Maximum number of semaphore ids allowed a project.
47 *
48 * When semget() is used to allocate a semaphore set, one id is
49 * allocated. If the id allocation doesn't succeed, semget() fails
50 * and errno is set to ENOSPC. Upon successful semctl(, IPC_RMID)
51 * the id is deallocated.
52 *
53 * Control: process.max-sem-nsems (rc_process_semmsl)
54 * Description: Maximum number of semaphores allowed per semaphore set.
55 *
56 * When semget() is used to allocate a semaphore set, the size of the
57 * set is compared with this limit. If the number of semaphores
58 * exceeds the limit, semget() fails and errno is set to EINVAL.
59 *
60 * Control: process.max-sem-ops (rc_process_semopm)
61 * Description: Maximum number of semaphore operations allowed per
62 * semop call.
63 *
64 * When semget() successfully allocates a semaphore set, the minimum
65 * enforced value of this limit is used to initialize the
66 * "system-imposed maximum" number of operations a semop() call for
67 * this set can perform.
68 *
69 * Undo structures
70 * ---------------
71 *
72 * Removing the undo structure tunables involved a serious redesign of
73 * how they were implemented. There is now one undo structure for
74 * every process/semaphore array combination (lazily allocated, of
75 * course), and each is equal in size to the semaphore it corresponds
76 * to. To avoid scalability and performance problems, the undo
77 * structures are stored in two places: a per-process AVL tree sorted
78 * by ksemid pointer (p_semacct, protected by p_lock) and an unsorted
79 * per-semaphore linked list (sem_undos, protected by the semaphore's
80 * ID lock). The former is used by semop, where a lookup is performed
81 * once and cached if SEM_UNDO is specified for any of the operations,
82 * and at process exit where the undoable operations are rolled back.
83 * The latter is used when removing the semaphore, so the undo
84 * structures can be removed from the appropriate processes' trees.
85 *
86 * The undo structure itself contains pointers to the ksemid and proc
87 * to which it corresponds, a list node, an AVL node, and an array of
88 * adjust-on-exit (AOE) values. When an undo structure is allocated it
89 * is immediately added to both the process's tree and the semaphore's
90 * list. Lastly, the reference count on the semaphore is increased.
91 *
92 * Avoiding a lock ordering violation between p_lock and the ID lock,
93 * wont to occur when there is a race between a process exiting and the
94 * removal of a semaphore, mandates the delicate dance that exists
95 * between semexit and sem_rmid.
96 *
97 * sem_rmid, holding the ID lock, iterates through all undo structures
98 * and for each takes the appropriate process's p_lock and checks to
99 * see if p_semacct is NULL. If it is, it skips that undo structure
100 * and continues to the next. Otherwise, it removes the undo structure
101 * from both the AVL tree and the semaphore's list, and releases the
102 * hold that the undo structure had on the semaphore.
103 *
104 * The important other half of this is semexit, which will immediately
105 * take p_lock, obtain the AVL pointer, clear p_semacct, and drop
106 * p_lock. From this point on it is semexit's responsibility to clean
107 * up all undo structures found in the tree -- a coexecuting sem_rmid
108 * will see the NULL p_semacct and skip that undo structure. It walks
109 * the AVL tree (using avl_destroy_nodes) and for each undo structure
110 * takes the appropriate semaphore's ID lock (always legal since the
111 * undo structure has a hold on the semaphore), updates all semaphores
112 * with non-zero AOE values, and removes the structure from the
113 * semaphore's list. It then drops the structure's reference on the
114 * semaphore, drops the ID lock, and frees the undo structure.
115 */
116
117 #include <sys/types.h>
118 #include <sys/t_lock.h>
119 #include <sys/param.h>
120 #include <sys/systm.h>
121 #include <sys/sysmacros.h>
122 #include <sys/cred.h>
123 #include <sys/vmem.h>
124 #include <sys/kmem.h>
125 #include <sys/errno.h>
126 #include <sys/time.h>
127 #include <sys/ipc.h>
128 #include <sys/ipc_impl.h>
129 #include <sys/sem.h>
130 #include <sys/sem_impl.h>
131 #include <sys/user.h>
132 #include <sys/proc.h>
133 #include <sys/cpuvar.h>
134 #include <sys/debug.h>
135 #include <sys/var.h>
136 #include <sys/cmn_err.h>
137 #include <sys/modctl.h>
138 #include <sys/syscall.h>
139 #include <sys/avl.h>
140 #include <sys/list.h>
141 #include <sys/zone.h>
142
143 #include <c2/audit.h>
144
145 extern rctl_hndl_t rc_zone_semmni;
146 extern rctl_hndl_t rc_project_semmni;
147 extern rctl_hndl_t rc_process_semmsl;
148 extern rctl_hndl_t rc_process_semopm;
149 static ipc_service_t *sem_svc;
150 static zone_key_t sem_zone_key;
151
152 /*
153 * The following tunables are obsolete. Though for compatibility we
154 * still read and interpret seminfo_semmsl, seminfo_semopm and
155 * seminfo_semmni (see os/project.c and os/rctl_proc.c), the preferred
156 * mechanism for administrating the IPC Semaphore facility is through
157 * the resource controls described at the top of this file.
158 */
159 int seminfo_semaem = 16384; /* (obsolete) */
160 int seminfo_semmap = 10; /* (obsolete) */
161 int seminfo_semmni = 10; /* (obsolete) */
162 int seminfo_semmns = 60; /* (obsolete) */
163 int seminfo_semmnu = 30; /* (obsolete) */
164 int seminfo_semmsl = 25; /* (obsolete) */
165 int seminfo_semopm = 10; /* (obsolete) */
166 int seminfo_semume = 10; /* (obsolete) */
167 int seminfo_semusz = 96; /* (obsolete) */
168 int seminfo_semvmx = 32767; /* (obsolete) */
169
170 #define SEM_MAXUCOPS 4096 /* max # of unchecked ops per semop call */
171 #define SEM_UNDOSZ(n) (sizeof (struct sem_undo) + (n - 1) * sizeof (int))
172
173 static int semsys(int opcode, uintptr_t a0, uintptr_t a1,
174 uintptr_t a2, uintptr_t a3);
175 static void sem_dtor(kipc_perm_t *);
176 static void sem_rmid(kipc_perm_t *);
177 static void sem_remove_zone(zoneid_t, void *);
178
179 static struct sysent ipcsem_sysent = {
180 5,
181 SE_NOUNLOAD | SE_ARGC | SE_32RVAL1,
182 semsys
183 };
184
185 /*
186 * Module linkage information for the kernel.
187 */
188 static struct modlsys modlsys = {
189 &mod_syscallops, "System V semaphore facility", &ipcsem_sysent
190 };
191
192 #ifdef _SYSCALL32_IMPL
193 static struct modlsys modlsys32 = {
194 &mod_syscallops32, "32-bit System V semaphore facility", &ipcsem_sysent
195 };
196 #endif
197
198 static struct modlinkage modlinkage = {
199 MODREV_1,
200 &modlsys,
201 #ifdef _SYSCALL32_IMPL
202 &modlsys32,
203 #endif
204 NULL
205 };
206
207
208 int
_init(void)209 _init(void)
210 {
211 int result;
212
213 sem_svc = ipcs_create("semids", rc_project_semmni, rc_zone_semmni,
214 sizeof (ksemid_t), sem_dtor, sem_rmid, AT_IPC_SEM,
215 offsetof(ipc_rqty_t, ipcq_semmni));
216 zone_key_create(&sem_zone_key, NULL, sem_remove_zone, NULL);
217
218 if ((result = mod_install(&modlinkage)) == 0)
219 return (0);
220
221 (void) zone_key_delete(sem_zone_key);
222 ipcs_destroy(sem_svc);
223
224 return (result);
225 }
226
227 int
_fini(void)228 _fini(void)
229 {
230 return (EBUSY);
231 }
232
233 int
_info(struct modinfo * modinfop)234 _info(struct modinfo *modinfop)
235 {
236 return (mod_info(&modlinkage, modinfop));
237 }
238
239 static void
sem_dtor(kipc_perm_t * perm)240 sem_dtor(kipc_perm_t *perm)
241 {
242 ksemid_t *sp = (ksemid_t *)perm;
243
244 kmem_free(sp->sem_base,
245 P2ROUNDUP(sp->sem_nsems * sizeof (struct sem), 64));
246 list_destroy(&sp->sem_undos);
247 }
248
249 /*
250 * sem_undo_add - Create or update adjust on exit entry.
251 */
252 static int
sem_undo_add(short val,ushort_t num,struct sem_undo * undo)253 sem_undo_add(short val, ushort_t num, struct sem_undo *undo)
254 {
255 int newval = undo->un_aoe[num] - val;
256
257 if (newval > USHRT_MAX || newval < -USHRT_MAX)
258 return (ERANGE);
259 undo->un_aoe[num] = newval;
260
261 return (0);
262 }
263
264 /*
265 * sem_undo_clear - clears all undo entries for specified semaphores
266 *
267 * Used when semaphores are reset by SETVAL or SETALL.
268 */
269 static void
sem_undo_clear(ksemid_t * sp,ushort_t low,ushort_t high)270 sem_undo_clear(ksemid_t *sp, ushort_t low, ushort_t high)
271 {
272 struct sem_undo *undo;
273 int i;
274
275 ASSERT(low <= high);
276 ASSERT(high < sp->sem_nsems);
277
278 for (undo = list_head(&sp->sem_undos); undo;
279 undo = list_next(&sp->sem_undos, undo))
280 for (i = low; i <= high; i++)
281 undo->un_aoe[i] = 0;
282 }
283
284 /*
285 * sem_rollback - roll back work done so far if unable to complete operation
286 */
287 static void
sem_rollback(ksemid_t * sp,struct sembuf * op,int n,struct sem_undo * undo)288 sem_rollback(ksemid_t *sp, struct sembuf *op, int n, struct sem_undo *undo)
289 {
290 struct sem *semp; /* semaphore ptr */
291
292 for (op += n - 1; n--; op--) {
293 if (op->sem_op == 0)
294 continue;
295 semp = &sp->sem_base[op->sem_num];
296 semp->semval -= op->sem_op;
297 if (op->sem_flg & SEM_UNDO) {
298 ASSERT(undo != NULL);
299 (void) sem_undo_add(-op->sem_op, op->sem_num, undo);
300 }
301 }
302 }
303
304 static void
sem_rmid(kipc_perm_t * perm)305 sem_rmid(kipc_perm_t *perm)
306 {
307 ksemid_t *sp = (ksemid_t *)perm;
308 struct sem *semp;
309 struct sem_undo *undo;
310 size_t size = SEM_UNDOSZ(sp->sem_nsems);
311 int i;
312
313 /*LINTED*/
314 while (undo = list_head(&sp->sem_undos)) {
315 list_remove(&sp->sem_undos, undo);
316 mutex_enter(&undo->un_proc->p_lock);
317 if (undo->un_proc->p_semacct == NULL) {
318 mutex_exit(&undo->un_proc->p_lock);
319 continue;
320 }
321 avl_remove(undo->un_proc->p_semacct, undo);
322 mutex_exit(&undo->un_proc->p_lock);
323 kmem_free(undo, size);
324 ipc_rele_locked(sem_svc, (kipc_perm_t *)sp);
325 }
326
327 for (i = 0; i < sp->sem_nsems; i++) {
328 semp = &sp->sem_base[i];
329 semp->semval = semp->sempid = 0;
330 if (semp->semncnt) {
331 cv_broadcast(&semp->semncnt_cv);
332 semp->semncnt = 0;
333 }
334 if (semp->semzcnt) {
335 cv_broadcast(&semp->semzcnt_cv);
336 semp->semzcnt = 0;
337 }
338 }
339 }
340
341 /*
342 * semctl - Semctl system call.
343 */
344 static int
semctl(int semid,uint_t semnum,int cmd,uintptr_t arg)345 semctl(int semid, uint_t semnum, int cmd, uintptr_t arg)
346 {
347 ksemid_t *sp; /* ptr to semaphore header */
348 struct sem *p; /* ptr to semaphore */
349 unsigned int i; /* loop control */
350 ushort_t *vals, *vp;
351 size_t vsize = 0;
352 int error = 0;
353 int retval = 0;
354 struct cred *cr;
355 kmutex_t *lock;
356 model_t mdl = get_udatamodel();
357 STRUCT_DECL(semid_ds, sid);
358 struct semid_ds64 ds64;
359
360 STRUCT_INIT(sid, mdl);
361 cr = CRED();
362
363 /*
364 * Perform pre- or non-lookup actions (e.g. copyins, RMID).
365 */
366 switch (cmd) {
367 case IPC_SET:
368 if (copyin((void *)arg, STRUCT_BUF(sid), STRUCT_SIZE(sid)))
369 return (set_errno(EFAULT));
370 break;
371
372 case IPC_SET64:
373 if (copyin((void *)arg, &ds64, sizeof (struct semid_ds64)))
374 return (set_errno(EFAULT));
375 break;
376
377 case SETALL:
378 if ((lock = ipc_lookup(sem_svc, semid,
379 (kipc_perm_t **)&sp)) == NULL)
380 return (set_errno(EINVAL));
381 vsize = sp->sem_nsems * sizeof (*vals);
382 mutex_exit(lock);
383
384 /* allocate space to hold all semaphore values */
385 vals = kmem_alloc(vsize, KM_SLEEP);
386
387 if (copyin((void *)arg, vals, vsize)) {
388 kmem_free(vals, vsize);
389 return (set_errno(EFAULT));
390 }
391 break;
392
393 case IPC_RMID:
394 if (error = ipc_rmid(sem_svc, semid, cr))
395 return (set_errno(error));
396 return (0);
397 }
398
399 if ((lock = ipc_lookup(sem_svc, semid, (kipc_perm_t **)&sp)) == NULL) {
400 if (vsize != 0)
401 kmem_free(vals, vsize);
402 return (set_errno(EINVAL));
403 }
404 switch (cmd) {
405 /* Set ownership and permissions. */
406 case IPC_SET:
407
408 if (error = ipcperm_set(sem_svc, cr, &sp->sem_perm,
409 &STRUCT_BUF(sid)->sem_perm, mdl)) {
410 mutex_exit(lock);
411 return (set_errno(error));
412 }
413 sp->sem_ctime = gethrestime_sec();
414 mutex_exit(lock);
415 return (0);
416
417 /* Get semaphore data structure. */
418 case IPC_STAT:
419
420 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) {
421 mutex_exit(lock);
422 return (set_errno(error));
423 }
424
425 ipcperm_stat(&STRUCT_BUF(sid)->sem_perm, &sp->sem_perm, mdl);
426 STRUCT_FSETP(sid, sem_base, NULL); /* kernel addr */
427 STRUCT_FSET(sid, sem_nsems, sp->sem_nsems);
428 STRUCT_FSET(sid, sem_otime, sp->sem_otime);
429 STRUCT_FSET(sid, sem_ctime, sp->sem_ctime);
430 STRUCT_FSET(sid, sem_binary, sp->sem_binary);
431 mutex_exit(lock);
432
433 if (copyout(STRUCT_BUF(sid), (void *)arg, STRUCT_SIZE(sid)))
434 return (set_errno(EFAULT));
435 return (0);
436
437 case IPC_SET64:
438
439 if (error = ipcperm_set64(sem_svc, cr, &sp->sem_perm,
440 &ds64.semx_perm)) {
441 mutex_exit(lock);
442 return (set_errno(error));
443 }
444 sp->sem_ctime = gethrestime_sec();
445 mutex_exit(lock);
446 return (0);
447
448 case IPC_STAT64:
449
450 ipcperm_stat64(&ds64.semx_perm, &sp->sem_perm);
451 ds64.semx_nsems = sp->sem_nsems;
452 ds64.semx_otime = sp->sem_otime;
453 ds64.semx_ctime = sp->sem_ctime;
454
455 mutex_exit(lock);
456 if (copyout(&ds64, (void *)arg, sizeof (struct semid_ds64)))
457 return (set_errno(EFAULT));
458
459 return (0);
460
461 /* Get # of processes sleeping for greater semval. */
462 case GETNCNT:
463 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) {
464 mutex_exit(lock);
465 return (set_errno(error));
466 }
467 if (semnum >= sp->sem_nsems) {
468 mutex_exit(lock);
469 return (set_errno(EINVAL));
470 }
471 retval = sp->sem_base[semnum].semncnt;
472 mutex_exit(lock);
473 return (retval);
474
475 /* Get pid of last process to operate on semaphore. */
476 case GETPID:
477 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) {
478 mutex_exit(lock);
479 return (set_errno(error));
480 }
481 if (semnum >= sp->sem_nsems) {
482 mutex_exit(lock);
483 return (set_errno(EINVAL));
484 }
485 retval = sp->sem_base[semnum].sempid;
486 mutex_exit(lock);
487 return (retval);
488
489 /* Get semval of one semaphore. */
490 case GETVAL:
491 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) {
492 mutex_exit(lock);
493 return (set_errno(error));
494 }
495 if (semnum >= sp->sem_nsems) {
496 mutex_exit(lock);
497 return (set_errno(EINVAL));
498 }
499 retval = sp->sem_base[semnum].semval;
500 mutex_exit(lock);
501 return (retval);
502
503 /* Get all semvals in set. */
504 case GETALL:
505 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) {
506 mutex_exit(lock);
507 return (set_errno(error));
508 }
509
510 /* allocate space to hold all semaphore values */
511 vsize = sp->sem_nsems * sizeof (*vals);
512 vals = vp = kmem_alloc(vsize, KM_SLEEP);
513
514 for (i = sp->sem_nsems, p = sp->sem_base; i--; p++, vp++)
515 bcopy(&p->semval, vp, sizeof (p->semval));
516
517 mutex_exit(lock);
518
519 if (copyout((void *)vals, (void *)arg, vsize)) {
520 kmem_free(vals, vsize);
521 return (set_errno(EFAULT));
522 }
523
524 kmem_free(vals, vsize);
525 return (0);
526
527 /* Get # of processes sleeping for semval to become zero. */
528 case GETZCNT:
529 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) {
530 mutex_exit(lock);
531 return (set_errno(error));
532 }
533 if (semnum >= sp->sem_nsems) {
534 mutex_exit(lock);
535 return (set_errno(EINVAL));
536 }
537 retval = sp->sem_base[semnum].semzcnt;
538 mutex_exit(lock);
539 return (retval);
540
541 /* Set semval of one semaphore. */
542 case SETVAL:
543 if (error = ipcperm_access(&sp->sem_perm, SEM_A, cr)) {
544 mutex_exit(lock);
545 return (set_errno(error));
546 }
547 if (semnum >= sp->sem_nsems) {
548 mutex_exit(lock);
549 return (set_errno(EINVAL));
550 }
551 if ((uint_t)arg > USHRT_MAX) {
552 mutex_exit(lock);
553 return (set_errno(ERANGE));
554 }
555 p = &sp->sem_base[semnum];
556 if ((p->semval = (ushort_t)arg) != 0) {
557 if (p->semncnt) {
558 cv_broadcast(&p->semncnt_cv);
559 }
560 } else if (p->semzcnt) {
561 cv_broadcast(&p->semzcnt_cv);
562 }
563 p->sempid = curproc->p_pid;
564 sem_undo_clear(sp, (ushort_t)semnum, (ushort_t)semnum);
565 mutex_exit(lock);
566 return (0);
567
568 /* Set semvals of all semaphores in set. */
569 case SETALL:
570 /* Check if semaphore set has been deleted and reallocated. */
571 if (sp->sem_nsems * sizeof (*vals) != vsize) {
572 error = set_errno(EINVAL);
573 goto seterr;
574 }
575 if (error = ipcperm_access(&sp->sem_perm, SEM_A, cr)) {
576 error = set_errno(error);
577 goto seterr;
578 }
579 sem_undo_clear(sp, 0, sp->sem_nsems - 1);
580 for (i = 0, p = sp->sem_base; i < sp->sem_nsems;
581 (p++)->sempid = curproc->p_pid) {
582 if ((p->semval = vals[i++]) != 0) {
583 if (p->semncnt) {
584 cv_broadcast(&p->semncnt_cv);
585 }
586 } else if (p->semzcnt) {
587 cv_broadcast(&p->semzcnt_cv);
588 }
589 }
590 seterr:
591 mutex_exit(lock);
592 kmem_free(vals, vsize);
593 return (error);
594
595 default:
596 mutex_exit(lock);
597 return (set_errno(EINVAL));
598 }
599
600 /* NOTREACHED */
601 }
602
603 /*
604 * semexit - Called by exit() to clean up on process exit.
605 */
606 void
semexit(proc_t * pp)607 semexit(proc_t *pp)
608 {
609 avl_tree_t *tree;
610 struct sem_undo *undo;
611 void *cookie = NULL;
612
613 mutex_enter(&pp->p_lock);
614 tree = pp->p_semacct;
615 pp->p_semacct = NULL;
616 mutex_exit(&pp->p_lock);
617
618 while (undo = avl_destroy_nodes(tree, &cookie)) {
619 ksemid_t *sp = undo->un_sp;
620 size_t size = SEM_UNDOSZ(sp->sem_nsems);
621 int i;
622
623 (void) ipc_lock(sem_svc, sp->sem_perm.ipc_id);
624 if (!IPC_FREE(&sp->sem_perm)) {
625 for (i = 0; i < sp->sem_nsems; i++) {
626 int adj = undo->un_aoe[i];
627 if (adj) {
628 struct sem *semp = &sp->sem_base[i];
629 int v = (int)semp->semval + adj;
630
631 if (v < 0 || v > USHRT_MAX)
632 continue;
633 semp->semval = (ushort_t)v;
634 if (v == 0 && semp->semzcnt)
635 cv_broadcast(&semp->semzcnt_cv);
636 if (adj > 0 && semp->semncnt)
637 cv_broadcast(&semp->semncnt_cv);
638 }
639 }
640 list_remove(&sp->sem_undos, undo);
641 }
642 ipc_rele(sem_svc, (kipc_perm_t *)sp);
643 kmem_free(undo, size);
644 }
645
646 avl_destroy(tree);
647 kmem_free(tree, sizeof (avl_tree_t));
648 }
649
650 /*
651 * Remove all semaphores associated with a given zone. Called by
652 * zone_shutdown when the zone is halted.
653 */
654 /*ARGSUSED1*/
655 static void
sem_remove_zone(zoneid_t zoneid,void * arg)656 sem_remove_zone(zoneid_t zoneid, void *arg)
657 {
658 ipc_remove_zone(sem_svc, zoneid);
659 }
660
661 /*
662 * semget - Semget system call.
663 */
664 static int
semget(key_t key,int nsems,int semflg)665 semget(key_t key, int nsems, int semflg)
666 {
667 ksemid_t *sp;
668 kmutex_t *lock;
669 int id, error;
670 proc_t *pp = curproc;
671
672 top:
673 if (error = ipc_get(sem_svc, key, semflg, (kipc_perm_t **)&sp, &lock))
674 return (set_errno(error));
675
676 if (!IPC_FREE(&sp->sem_perm)) {
677 /*
678 * A semaphore with the requested key exists.
679 */
680 if (!((nsems >= 0) && (nsems <= sp->sem_nsems))) {
681 mutex_exit(lock);
682 return (set_errno(EINVAL));
683 }
684 } else {
685 /*
686 * This is a new semaphore set. Finish initialization.
687 */
688 if (nsems <= 0 || (rctl_test(rc_process_semmsl, pp->p_rctls, pp,
689 nsems, RCA_SAFE) & RCT_DENY)) {
690 mutex_exit(lock);
691 mutex_exit(&pp->p_lock);
692 ipc_cleanup(sem_svc, (kipc_perm_t *)sp);
693 return (set_errno(EINVAL));
694 }
695 mutex_exit(lock);
696 mutex_exit(&pp->p_lock);
697
698 /*
699 * We round the allocation up to coherency granularity
700 * so that multiple semaphore allocations won't result
701 * in the false sharing of their sem structures.
702 */
703 sp->sem_base =
704 kmem_zalloc(P2ROUNDUP(nsems * sizeof (struct sem), 64),
705 KM_SLEEP);
706 sp->sem_binary = (nsems == 1);
707 sp->sem_nsems = (ushort_t)nsems;
708 sp->sem_ctime = gethrestime_sec();
709 sp->sem_otime = 0;
710 list_create(&sp->sem_undos, sizeof (struct sem_undo),
711 offsetof(struct sem_undo, un_list));
712
713 if (error = ipc_commit_begin(sem_svc, key, semflg,
714 (kipc_perm_t *)sp)) {
715 if (error == EAGAIN)
716 goto top;
717 return (set_errno(error));
718 }
719 sp->sem_maxops =
720 rctl_enforced_value(rc_process_semopm, pp->p_rctls, pp);
721 if (rctl_test(rc_process_semmsl, pp->p_rctls, pp, nsems,
722 RCA_SAFE) & RCT_DENY) {
723 ipc_cleanup(sem_svc, (kipc_perm_t *)sp);
724 return (set_errno(EINVAL));
725 }
726 lock = ipc_commit_end(sem_svc, &sp->sem_perm);
727 }
728
729 if (AU_AUDITING())
730 audit_ipcget(AT_IPC_SEM, (void *)sp);
731
732 id = sp->sem_perm.ipc_id;
733 mutex_exit(lock);
734 return (id);
735 }
736
737 /*
738 * semids system call.
739 */
740 static int
semids(int * buf,uint_t nids,uint_t * pnids)741 semids(int *buf, uint_t nids, uint_t *pnids)
742 {
743 int error;
744
745 if (error = ipc_ids(sem_svc, buf, nids, pnids))
746 return (set_errno(error));
747
748 return (0);
749 }
750
751
752 /*
753 * Helper function for semop - copies in the provided timespec and
754 * computes the absolute future time after which we must return.
755 */
756 static int
compute_timeout(timespec_t ** tsp,timespec_t * ts,timespec_t * now,timespec_t * timeout)757 compute_timeout(timespec_t **tsp, timespec_t *ts, timespec_t *now,
758 timespec_t *timeout)
759 {
760 model_t datamodel = get_udatamodel();
761
762 if (datamodel == DATAMODEL_NATIVE) {
763 if (copyin(timeout, ts, sizeof (timespec_t)))
764 return (EFAULT);
765 } else {
766 timespec32_t ts32;
767
768 if (copyin(timeout, &ts32, sizeof (timespec32_t)))
769 return (EFAULT);
770 TIMESPEC32_TO_TIMESPEC(ts, &ts32)
771 }
772
773 if (itimerspecfix(ts))
774 return (EINVAL);
775
776 /*
777 * Convert the timespec value into absolute time.
778 */
779 timespecadd(ts, now);
780 *tsp = ts;
781
782 return (0);
783 }
784
785 /*
786 * Undo structure comparator. We sort based on ksemid_t pointer.
787 */
788 static int
sem_undo_compar(const void * x,const void * y)789 sem_undo_compar(const void *x, const void *y)
790 {
791 struct sem_undo *undo1 = (struct sem_undo *)x;
792 struct sem_undo *undo2 = (struct sem_undo *)y;
793
794 if (undo1->un_sp < undo2->un_sp)
795 return (-1);
796 if (undo1->un_sp > undo2->un_sp)
797 return (1);
798 return (0);
799 }
800
801 /*
802 * Helper function for semop - creates an undo structure and adds it to
803 * the process's avl tree and the semaphore's list.
804 */
805 static int
sem_undo_alloc(proc_t * pp,ksemid_t * sp,kmutex_t ** lock,struct sem_undo * template,struct sem_undo ** un)806 sem_undo_alloc(proc_t *pp, ksemid_t *sp, kmutex_t **lock,
807 struct sem_undo *template, struct sem_undo **un)
808 {
809 size_t size;
810 struct sem_undo *undo;
811 avl_tree_t *tree = NULL;
812 avl_index_t where;
813
814 mutex_exit(*lock);
815
816 size = SEM_UNDOSZ(sp->sem_nsems);
817 undo = kmem_zalloc(size, KM_SLEEP);
818 undo->un_proc = pp;
819 undo->un_sp = sp;
820
821 if (pp->p_semacct == NULL)
822 tree = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP);
823
824 *lock = ipc_lock(sem_svc, sp->sem_perm.ipc_id);
825 if (IPC_FREE(&sp->sem_perm)) {
826 kmem_free(undo, size);
827 if (tree)
828 kmem_free(tree, sizeof (avl_tree_t));
829 return (EIDRM);
830 }
831
832 mutex_enter(&pp->p_lock);
833 if (tree) {
834 if (pp->p_semacct == NULL) {
835 avl_create(tree, sem_undo_compar,
836 sizeof (struct sem_undo),
837 offsetof(struct sem_undo, un_avl));
838 pp->p_semacct = tree;
839 } else {
840 kmem_free(tree, sizeof (avl_tree_t));
841 }
842 }
843
844 if (*un = avl_find(pp->p_semacct, template, &where)) {
845 mutex_exit(&pp->p_lock);
846 kmem_free(undo, size);
847 } else {
848 *un = undo;
849 avl_insert(pp->p_semacct, undo, where);
850 mutex_exit(&pp->p_lock);
851 list_insert_head(&sp->sem_undos, undo);
852 ipc_hold(sem_svc, (kipc_perm_t *)sp);
853 }
854
855
856 return (0);
857 }
858
859 /*
860 * semop - Semop system call.
861 */
862 static int
semop(int semid,struct sembuf * sops,size_t nsops,timespec_t * timeout)863 semop(int semid, struct sembuf *sops, size_t nsops, timespec_t *timeout)
864 {
865 ksemid_t *sp = NULL;
866 kmutex_t *lock;
867 struct sembuf *op; /* ptr to operation */
868 int i; /* loop control */
869 struct sem *semp; /* ptr to semaphore */
870 int error = 0;
871 struct sembuf *uops; /* ptr to copy of user ops */
872 struct sembuf x_sem; /* avoid kmem_alloc's */
873 timespec_t now, ts, *tsp = NULL;
874 int timecheck = 0;
875 int cvres, needundo, mode;
876 struct sem_undo *undo;
877 proc_t *pp = curproc;
878 int held = 0;
879
880 CPU_STATS_ADDQ(CPU, sys, sema, 1); /* bump semaphore op count */
881
882 /*
883 * To avoid the cost of copying in 'timeout' in the common
884 * case, we could only grab the time here and defer the copyin
885 * and associated computations until we are about to block.
886 *
887 * The down side to this is that we would then have to spin
888 * some goto top nonsense to avoid the copyin behind the semid
889 * lock. As a common use of timed semaphores is as an explicit
890 * blocking mechanism, this could incur a greater penalty.
891 *
892 * If we eventually decide that this would be a wise route to
893 * take, the deferrable functionality is completely contained
894 * in 'compute_timeout', and the interface is defined such that
895 * we can legally not validate 'timeout' if it is unused.
896 */
897 if (timeout != NULL) {
898 timecheck = timechanged;
899 gethrestime(&now);
900 if (error = compute_timeout(&tsp, &ts, &now, timeout))
901 return (set_errno(error));
902 }
903
904 /*
905 * Allocate space to hold the vector of semaphore ops. If
906 * there is only 1 operation we use a preallocated buffer on
907 * the stack for speed.
908 *
909 * Since we don't want to allow the user to allocate an
910 * arbitrary amount of kernel memory, we need to check against
911 * the number of operations allowed by the semaphore. We only
912 * bother doing this if the number of operations is larger than
913 * SEM_MAXUCOPS.
914 */
915 if (nsops == 1)
916 uops = &x_sem;
917 else if (nsops == 0)
918 return (0);
919 else if (nsops <= SEM_MAXUCOPS)
920 uops = kmem_alloc(nsops * sizeof (*uops), KM_SLEEP);
921
922 if (nsops > SEM_MAXUCOPS) {
923 if ((lock = ipc_lookup(sem_svc, semid,
924 (kipc_perm_t **)&sp)) == NULL)
925 return (set_errno(EFAULT));
926
927 if (nsops > sp->sem_maxops) {
928 mutex_exit(lock);
929 return (set_errno(E2BIG));
930 }
931 held = 1;
932 ipc_hold(sem_svc, (kipc_perm_t *)sp);
933 mutex_exit(lock);
934
935 uops = kmem_alloc(nsops * sizeof (*uops), KM_SLEEP);
936 if (copyin(sops, uops, nsops * sizeof (*op))) {
937 error = EFAULT;
938 (void) ipc_lock(sem_svc, sp->sem_perm.ipc_id);
939 goto semoperr;
940 }
941
942 lock = ipc_lock(sem_svc, sp->sem_perm.ipc_id);
943 if (IPC_FREE(&sp->sem_perm)) {
944 error = EIDRM;
945 goto semoperr;
946 }
947 } else {
948 /*
949 * This could be interleaved with the above code, but
950 * keeping them separate improves readability.
951 */
952 if (copyin(sops, uops, nsops * sizeof (*op))) {
953 error = EFAULT;
954 goto semoperr_unlocked;
955 }
956
957 if ((lock = ipc_lookup(sem_svc, semid,
958 (kipc_perm_t **)&sp)) == NULL) {
959 error = EINVAL;
960 goto semoperr_unlocked;
961 }
962
963 if (nsops > sp->sem_maxops) {
964 error = E2BIG;
965 goto semoperr;
966 }
967 }
968
969 /*
970 * Scan all operations. Verify that sem #s are in range and
971 * this process is allowed the requested operations. If any
972 * operations are marked SEM_UNDO, find (or allocate) the undo
973 * structure for this process and semaphore.
974 */
975 needundo = 0;
976 mode = 0;
977 for (i = 0, op = uops; i++ < nsops; op++) {
978 mode |= op->sem_op ? SEM_A : SEM_R;
979 if (op->sem_num >= sp->sem_nsems) {
980 error = EFBIG;
981 goto semoperr;
982 }
983 if ((op->sem_flg & SEM_UNDO) && op->sem_op)
984 needundo = 1;
985 }
986 if (error = ipcperm_access(&sp->sem_perm, mode, CRED()))
987 goto semoperr;
988
989 if (needundo) {
990 struct sem_undo template;
991
992 template.un_sp = sp;
993 mutex_enter(&pp->p_lock);
994 if (pp->p_semacct)
995 undo = avl_find(pp->p_semacct, &template, NULL);
996 else
997 undo = NULL;
998 mutex_exit(&pp->p_lock);
999 if (undo == NULL) {
1000 if (!held) {
1001 held = 1;
1002 ipc_hold(sem_svc, (kipc_perm_t *)sp);
1003 }
1004 if (error = sem_undo_alloc(pp, sp, &lock, &template,
1005 &undo))
1006 goto semoperr;
1007
1008 /* sem_undo_alloc unlocks the semaphore */
1009 if (error = ipcperm_access(&sp->sem_perm, mode, CRED()))
1010 goto semoperr;
1011 }
1012 }
1013
1014 check:
1015 /*
1016 * Loop waiting for the operations to be satisfied atomically.
1017 * Actually, do the operations and undo them if a wait is needed
1018 * or an error is detected.
1019 */
1020 for (i = 0; i < nsops; i++) {
1021 op = &uops[i];
1022 semp = &sp->sem_base[op->sem_num];
1023
1024 /*
1025 * Raise the semaphore (i.e. sema_v)
1026 */
1027 if (op->sem_op > 0) {
1028 if (op->sem_op + (int)semp->semval > USHRT_MAX ||
1029 ((op->sem_flg & SEM_UNDO) &&
1030 (error = sem_undo_add(op->sem_op, op->sem_num,
1031 undo)))) {
1032 if (i)
1033 sem_rollback(sp, uops, i, undo);
1034 if (error == 0)
1035 error = ERANGE;
1036 goto semoperr;
1037 }
1038 semp->semval += op->sem_op;
1039 /*
1040 * If we are only incrementing the semaphore value
1041 * by one on a binary semaphore, we can cv_signal.
1042 */
1043 if (semp->semncnt) {
1044 if (op->sem_op == 1 && sp->sem_binary)
1045 cv_signal(&semp->semncnt_cv);
1046 else
1047 cv_broadcast(&semp->semncnt_cv);
1048 }
1049 if (semp->semzcnt && !semp->semval)
1050 cv_broadcast(&semp->semzcnt_cv);
1051 continue;
1052 }
1053
1054 /*
1055 * Lower the semaphore (i.e. sema_p)
1056 */
1057 if (op->sem_op < 0) {
1058 if (semp->semval >= (unsigned)(-op->sem_op)) {
1059 if ((op->sem_flg & SEM_UNDO) &&
1060 (error = sem_undo_add(op->sem_op,
1061 op->sem_num, undo))) {
1062 if (i)
1063 sem_rollback(sp, uops, i, undo);
1064 goto semoperr;
1065 }
1066 semp->semval += op->sem_op;
1067 if (semp->semzcnt && !semp->semval)
1068 cv_broadcast(&semp->semzcnt_cv);
1069 continue;
1070 }
1071 if (i)
1072 sem_rollback(sp, uops, i, undo);
1073 if (op->sem_flg & IPC_NOWAIT) {
1074 error = EAGAIN;
1075 goto semoperr;
1076 }
1077
1078 /*
1079 * Mark the semaphore set as not a binary type
1080 * if we are decrementing the value by more than 1.
1081 *
1082 * V operations will resort to cv_broadcast
1083 * for this set because there are too many weird
1084 * cases that have to be caught.
1085 */
1086 if (op->sem_op < -1)
1087 sp->sem_binary = 0;
1088 if (!held) {
1089 held = 1;
1090 ipc_hold(sem_svc, (kipc_perm_t *)sp);
1091 }
1092 semp->semncnt++;
1093 cvres = cv_waituntil_sig(&semp->semncnt_cv, lock,
1094 tsp, timecheck);
1095 lock = ipc_relock(sem_svc, sp->sem_perm.ipc_id, lock);
1096
1097 if (!IPC_FREE(&sp->sem_perm)) {
1098 ASSERT(semp->semncnt != 0);
1099 semp->semncnt--;
1100 if (cvres > 0) /* normal wakeup */
1101 goto check;
1102 }
1103
1104 /* EINTR or EAGAIN overrides EIDRM */
1105 if (cvres == 0)
1106 error = EINTR;
1107 else if (cvres < 0)
1108 error = EAGAIN;
1109 else
1110 error = EIDRM;
1111 goto semoperr;
1112 }
1113
1114 /*
1115 * Wait for zero value
1116 */
1117 if (semp->semval) {
1118 if (i)
1119 sem_rollback(sp, uops, i, undo);
1120 if (op->sem_flg & IPC_NOWAIT) {
1121 error = EAGAIN;
1122 goto semoperr;
1123 }
1124
1125 if (!held) {
1126 held = 1;
1127 ipc_hold(sem_svc, (kipc_perm_t *)sp);
1128 }
1129 semp->semzcnt++;
1130 cvres = cv_waituntil_sig(&semp->semzcnt_cv, lock,
1131 tsp, timecheck);
1132 lock = ipc_relock(sem_svc, sp->sem_perm.ipc_id, lock);
1133
1134 /*
1135 * Don't touch semp if the semaphores have been removed.
1136 */
1137 if (!IPC_FREE(&sp->sem_perm)) {
1138 ASSERT(semp->semzcnt != 0);
1139 semp->semzcnt--;
1140 if (cvres > 0) /* normal wakeup */
1141 goto check;
1142 }
1143
1144 /* EINTR or EAGAIN overrides EIDRM */
1145 if (cvres == 0)
1146 error = EINTR;
1147 else if (cvres < 0)
1148 error = EAGAIN;
1149 else
1150 error = EIDRM;
1151 goto semoperr;
1152 }
1153 }
1154
1155 /* All operations succeeded. Update sempid for accessed semaphores. */
1156 for (i = 0, op = uops; i++ < nsops;
1157 sp->sem_base[(op++)->sem_num].sempid = pp->p_pid)
1158 ;
1159 sp->sem_otime = gethrestime_sec();
1160 if (held)
1161 ipc_rele(sem_svc, (kipc_perm_t *)sp);
1162 else
1163 mutex_exit(lock);
1164
1165 /* Before leaving, deallocate the buffer that held the user semops */
1166 if (nsops != 1)
1167 kmem_free(uops, sizeof (*uops) * nsops);
1168 return (0);
1169
1170 /*
1171 * Error return labels
1172 */
1173 semoperr:
1174 if (held)
1175 ipc_rele(sem_svc, (kipc_perm_t *)sp);
1176 else
1177 mutex_exit(lock);
1178
1179 semoperr_unlocked:
1180
1181 /* Before leaving, deallocate the buffer that held the user semops */
1182 if (nsops != 1)
1183 kmem_free(uops, sizeof (*uops) * nsops);
1184 return (set_errno(error));
1185 }
1186
1187 /*
1188 * semsys - System entry point for semctl, semget, and semop system calls.
1189 */
1190 static int
semsys(int opcode,uintptr_t a1,uintptr_t a2,uintptr_t a3,uintptr_t a4)1191 semsys(int opcode, uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4)
1192 {
1193 int error;
1194
1195 switch (opcode) {
1196 case SEMCTL:
1197 error = semctl((int)a1, (uint_t)a2, (int)a3, a4);
1198 break;
1199 case SEMGET:
1200 error = semget((key_t)a1, (int)a2, (int)a3);
1201 break;
1202 case SEMOP:
1203 error = semop((int)a1, (struct sembuf *)a2, (size_t)a3, 0);
1204 break;
1205 case SEMIDS:
1206 error = semids((int *)a1, (uint_t)a2, (uint_t *)a3);
1207 break;
1208 case SEMTIMEDOP:
1209 error = semop((int)a1, (struct sembuf *)a2, (size_t)a3,
1210 (timespec_t *)a4);
1211 break;
1212 default:
1213 error = set_errno(EINVAL);
1214 break;
1215 }
1216 return (error);
1217 }
1218