xref: /titanic_41/usr/src/uts/common/syscall/sem.c (revision 70025d765b044c6d8594bb965a2247a61e991a99)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 /*
34  * Inter-Process Communication Semaphore Facility.
35  *
36  * See os/ipc.c for a description of common IPC functionality.
37  *
38  * Resource controls
39  * -----------------
40  *
41  * Control:      project.max-sem-ids (rc_project_semmni)
42  * Description:  Maximum number of semaphore ids allowed a project.
43  *
44  *   When semget() is used to allocate a semaphore set, one id is
45  *   allocated.  If the id allocation doesn't succeed, semget() fails
46  *   and errno is set to ENOSPC.  Upon successful semctl(, IPC_RMID)
47  *   the id is deallocated.
48  *
49  * Control:      process.max-sem-nsems (rc_process_semmsl)
50  * Description:  Maximum number of semaphores allowed per semaphore set.
51  *
52  *   When semget() is used to allocate a semaphore set, the size of the
53  *   set is compared with this limit.  If the number of semaphores
54  *   exceeds the limit, semget() fails and errno is set to EINVAL.
55  *
56  * Control:      process.max-sem-ops (rc_process_semopm)
57  * Description:  Maximum number of semaphore operations allowed per
58  *               semop call.
59  *
60  *   When semget() successfully allocates a semaphore set, the minimum
61  *   enforced value of this limit is used to initialize the
62  *   "system-imposed maximum" number of operations a semop() call for
63  *   this set can perform.
64  *
65  * Undo structures
66  * ---------------
67  *
68  * Removing the undo structure tunables involved a serious redesign of
69  * how they were implemented.  There is now one undo structure for
70  * every process/semaphore array combination (lazily allocated, of
71  * course), and each is equal in size to the semaphore it corresponds
72  * to.  To avoid scalability and performance problems, the undo
73  * structures are stored in two places: a per-process AVL tree sorted
74  * by ksemid pointer (p_semacct, protected by p_lock) and an unsorted
75  * per-semaphore linked list (sem_undos, protected by the semaphore's
76  * ID lock).  The former is used by semop, where a lookup is performed
77  * once and cached if SEM_UNDO is specified for any of the operations,
78  * and at process exit where the undoable operations are rolled back.
79  * The latter is used when removing the semaphore, so the undo
80  * structures can be removed from the appropriate processes' trees.
81  *
82  * The undo structure itself contains pointers to the ksemid and proc
83  * to which it corresponds, a list node, an AVL node, and an array of
84  * adjust-on-exit (AOE) values.  When an undo structure is allocated it
85  * is immediately added to both the process's tree and the semaphore's
86  * list.  Lastly, the reference count on the semaphore is increased.
87  *
88  * Avoiding a lock ordering violation between p_lock and the ID lock,
89  * wont to occur when there is a race between a process exiting and the
90  * removal of a semaphore, mandates the delicate dance that exists
91  * between semexit and sem_rmid.
92  *
93  * sem_rmid, holding the ID lock, iterates through all undo structures
94  * and for each takes the appropriate process's p_lock and checks to
95  * see if p_semacct is NULL.  If it is, it skips that undo structure
96  * and continues to the next.  Otherwise, it removes the undo structure
97  * from both the AVL tree and the semaphore's list, and releases the
98  * hold that the undo structure had on the semaphore.
99  *
100  * The important other half of this is semexit, which will immediately
101  * take p_lock, obtain the AVL pointer, clear p_semacct, and drop
102  * p_lock.  From this point on it is semexit's responsibility to clean
103  * up all undo structures found in the tree -- a coexecuting sem_rmid
104  * will see the NULL p_semacct and skip that undo structure.  It walks
105  * the AVL tree (using avl_destroy_nodes) and for each undo structure
106  * takes the appropriate semaphore's ID lock (always legal since the
107  * undo structure has a hold on the semaphore), updates all semaphores
108  * with non-zero AOE values, and removes the structure from the
109  * semaphore's list.  It then drops the structure's reference on the
110  * semaphore, drops the ID lock, and frees the undo structure.
111  */
112 
113 #include <sys/types.h>
114 #include <sys/t_lock.h>
115 #include <sys/param.h>
116 #include <sys/systm.h>
117 #include <sys/sysmacros.h>
118 #include <sys/cred.h>
119 #include <sys/vmem.h>
120 #include <sys/kmem.h>
121 #include <sys/errno.h>
122 #include <sys/time.h>
123 #include <sys/ipc.h>
124 #include <sys/ipc_impl.h>
125 #include <sys/sem.h>
126 #include <sys/sem_impl.h>
127 #include <sys/user.h>
128 #include <sys/proc.h>
129 #include <sys/cpuvar.h>
130 #include <sys/debug.h>
131 #include <sys/var.h>
132 #include <sys/cmn_err.h>
133 #include <sys/modctl.h>
134 #include <sys/syscall.h>
135 #include <sys/avl.h>
136 #include <sys/list.h>
137 #include <sys/zone.h>
138 
139 #include <c2/audit.h>
140 
141 extern rctl_hndl_t rc_project_semmni;
142 extern rctl_hndl_t rc_process_semmsl;
143 extern rctl_hndl_t rc_process_semopm;
144 static ipc_service_t *sem_svc;
145 static zone_key_t sem_zone_key;
146 
147 /*
148  * The following tunables are obsolete.  Though for compatibility we
149  * still read and interpret seminfo_semmsl, seminfo_semopm and
150  * seminfo_semmni (see os/project.c and os/rctl_proc.c), the preferred
151  * mechanism for administrating the IPC Semaphore facility is through
152  * the resource controls described at the top of this file.
153  */
154 int seminfo_semaem = 16384;	/* (obsolete) */
155 int seminfo_semmap = 10;	/* (obsolete) */
156 int seminfo_semmni = 10;	/* (obsolete) */
157 int seminfo_semmns = 60;	/* (obsolete) */
158 int seminfo_semmnu = 30;	/* (obsolete) */
159 int seminfo_semmsl = 25;	/* (obsolete) */
160 int seminfo_semopm = 10;	/* (obsolete) */
161 int seminfo_semume = 10;	/* (obsolete) */
162 int seminfo_semusz = 96;	/* (obsolete) */
163 int seminfo_semvmx = 32767;	/* (obsolete) */
164 
165 #define	SEM_MAXUCOPS	4096	/* max # of unchecked ops per semop call */
166 #define	SEM_UNDOSZ(n)	(sizeof (struct sem_undo) + (n - 1) * sizeof (int))
167 
168 static int semsys(int opcode, uintptr_t a0, uintptr_t a1,
169     uintptr_t a2, uintptr_t a3);
170 static void sem_dtor(kipc_perm_t *);
171 static void sem_rmid(kipc_perm_t *);
172 static void sem_remove_zone(zoneid_t, void *);
173 
174 static struct sysent ipcsem_sysent = {
175 	5,
176 	SE_NOUNLOAD | SE_ARGC | SE_32RVAL1,
177 	semsys
178 };
179 
180 /*
181  * Module linkage information for the kernel.
182  */
183 static struct modlsys modlsys = {
184 	&mod_syscallops, "System V semaphore facility", &ipcsem_sysent
185 };
186 
187 #ifdef _SYSCALL32_IMPL
188 static struct modlsys modlsys32 = {
189 	&mod_syscallops32, "32-bit System V semaphore facility", &ipcsem_sysent
190 };
191 #endif
192 
193 static struct modlinkage modlinkage = {
194 	MODREV_1,
195 	&modlsys,
196 #ifdef _SYSCALL32_IMPL
197 	&modlsys32,
198 #endif
199 	NULL
200 };
201 
202 
203 int
204 _init(void)
205 {
206 	int result;
207 
208 	sem_svc = ipcs_create("semids", rc_project_semmni, sizeof (ksemid_t),
209 	    sem_dtor, sem_rmid, AT_IPC_SEM,
210 	    offsetof(kproject_data_t, kpd_semmni));
211 	zone_key_create(&sem_zone_key, NULL, sem_remove_zone, NULL);
212 
213 	if ((result = mod_install(&modlinkage)) == 0)
214 		return (0);
215 
216 	(void) zone_key_delete(sem_zone_key);
217 	ipcs_destroy(sem_svc);
218 
219 	return (result);
220 }
221 
222 int
223 _fini(void)
224 {
225 	return (EBUSY);
226 }
227 
228 int
229 _info(struct modinfo *modinfop)
230 {
231 	return (mod_info(&modlinkage, modinfop));
232 }
233 
234 static void
235 sem_dtor(kipc_perm_t *perm)
236 {
237 	ksemid_t *sp = (ksemid_t *)perm;
238 
239 	kmem_free(sp->sem_base,
240 	    P2ROUNDUP(sp->sem_nsems * sizeof (struct sem), 64));
241 	list_destroy(&sp->sem_undos);
242 }
243 
244 /*
245  * sem_undo_add - Create or update adjust on exit entry.
246  */
247 static int
248 sem_undo_add(short val, ushort_t num, struct sem_undo *undo)
249 {
250 	int newval = undo->un_aoe[num] - val;
251 
252 	if (newval > USHRT_MAX || newval < -USHRT_MAX)
253 		return (ERANGE);
254 	undo->un_aoe[num] = newval;
255 
256 	return (0);
257 }
258 
259 /*
260  * sem_undo_clear - clears all undo entries for specified semaphores
261  *
262  * Used when semaphores are reset by SETVAL or SETALL.
263  */
264 static void
265 sem_undo_clear(ksemid_t *sp, ushort_t low, ushort_t high)
266 {
267 	struct sem_undo *undo;
268 	int i;
269 
270 	ASSERT(low <= high);
271 	ASSERT(high < sp->sem_nsems);
272 
273 	for (undo = list_head(&sp->sem_undos); undo;
274 	    undo = list_next(&sp->sem_undos, undo))
275 		for (i = low; i <= high; i++)
276 			undo->un_aoe[i] = 0;
277 }
278 
279 /*
280  * sem_rollback - roll back work done so far if unable to complete operation
281  */
282 static void
283 sem_rollback(ksemid_t *sp, struct sembuf *op, int n, struct sem_undo *undo)
284 {
285 	struct sem *semp;	/* semaphore ptr */
286 
287 	for (op += n - 1; n--; op--) {
288 		if (op->sem_op == 0)
289 			continue;
290 		semp = &sp->sem_base[op->sem_num];
291 		semp->semval -= op->sem_op;
292 		if (op->sem_flg & SEM_UNDO) {
293 			ASSERT(undo != NULL);
294 			(void) sem_undo_add(-op->sem_op, op->sem_num, undo);
295 		}
296 	}
297 }
298 
299 static void
300 sem_rmid(kipc_perm_t *perm)
301 {
302 	ksemid_t *sp = (ksemid_t *)perm;
303 	struct sem *semp;
304 	struct sem_undo *undo;
305 	size_t size = SEM_UNDOSZ(sp->sem_nsems);
306 	int i;
307 
308 	/*LINTED*/
309 	while (undo = list_head(&sp->sem_undos)) {
310 		list_remove(&sp->sem_undos, undo);
311 		mutex_enter(&undo->un_proc->p_lock);
312 		if (undo->un_proc->p_semacct == NULL) {
313 			mutex_exit(&undo->un_proc->p_lock);
314 			continue;
315 		}
316 		avl_remove(undo->un_proc->p_semacct, undo);
317 		mutex_exit(&undo->un_proc->p_lock);
318 		kmem_free(undo, size);
319 		ipc_rele_locked(sem_svc, (kipc_perm_t *)sp);
320 	}
321 
322 	for (i = 0; i < sp->sem_nsems; i++) {
323 		semp = &sp->sem_base[i];
324 		semp->semval = semp->sempid = 0;
325 		if (semp->semncnt) {
326 			cv_broadcast(&semp->semncnt_cv);
327 			semp->semncnt = 0;
328 		}
329 		if (semp->semzcnt) {
330 			cv_broadcast(&semp->semzcnt_cv);
331 			semp->semzcnt = 0;
332 		}
333 	}
334 }
335 
336 /*
337  * semctl - Semctl system call.
338  */
339 static int
340 semctl(int semid, uint_t semnum, int cmd, uintptr_t arg)
341 {
342 	ksemid_t		*sp;	/* ptr to semaphore header */
343 	struct sem		*p;	/* ptr to semaphore */
344 	unsigned int		i;	/* loop control */
345 	ushort_t		*vals, *vp;
346 	size_t			vsize = 0;
347 	int			error = 0;
348 	int			retval = 0;
349 	struct cred		*cr;
350 	kmutex_t		*lock;
351 	model_t			mdl = get_udatamodel();
352 	STRUCT_DECL(semid_ds, sid);
353 	struct semid_ds64	ds64;
354 
355 	STRUCT_INIT(sid, mdl);
356 	cr = CRED();
357 
358 	/*
359 	 * Perform pre- or non-lookup actions (e.g. copyins, RMID).
360 	 */
361 	switch (cmd) {
362 	case IPC_SET:
363 		if (copyin((void *)arg, STRUCT_BUF(sid), STRUCT_SIZE(sid)))
364 			return (set_errno(EFAULT));
365 		break;
366 
367 	case IPC_SET64:
368 		if (copyin((void *)arg, &ds64, sizeof (struct semid_ds64)))
369 			return (set_errno(EFAULT));
370 		break;
371 
372 	case SETALL:
373 		if ((lock = ipc_lookup(sem_svc, semid,
374 		    (kipc_perm_t **)&sp)) == NULL)
375 			return (set_errno(EINVAL));
376 		vsize = sp->sem_nsems * sizeof (*vals);
377 		mutex_exit(lock);
378 
379 		/* allocate space to hold all semaphore values */
380 		vals = kmem_alloc(vsize, KM_SLEEP);
381 
382 		if (copyin((void *)arg, vals, vsize)) {
383 			kmem_free(vals, vsize);
384 			return (set_errno(EFAULT));
385 		}
386 		break;
387 
388 	case IPC_RMID:
389 		if (error = ipc_rmid(sem_svc, semid, cr))
390 			return (set_errno(error));
391 		return (0);
392 	}
393 
394 	if ((lock = ipc_lookup(sem_svc, semid, (kipc_perm_t **)&sp)) == NULL) {
395 		if (vsize != 0)
396 			kmem_free(vals, vsize);
397 		return (set_errno(EINVAL));
398 	}
399 	switch (cmd) {
400 	/* Set ownership and permissions. */
401 	case IPC_SET:
402 
403 		if (error = ipcperm_set(sem_svc, cr, &sp->sem_perm,
404 		    &STRUCT_BUF(sid)->sem_perm, mdl)) {
405 			mutex_exit(lock);
406 			return (set_errno(error));
407 		}
408 		sp->sem_ctime = gethrestime_sec();
409 		mutex_exit(lock);
410 		return (0);
411 
412 	/* Get semaphore data structure. */
413 	case IPC_STAT:
414 
415 		if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) {
416 			mutex_exit(lock);
417 			return (set_errno(error));
418 		}
419 
420 		ipcperm_stat(&STRUCT_BUF(sid)->sem_perm, &sp->sem_perm, mdl);
421 		STRUCT_FSETP(sid, sem_base, NULL);	/* kernel addr */
422 		STRUCT_FSET(sid, sem_nsems, sp->sem_nsems);
423 		STRUCT_FSET(sid, sem_otime, sp->sem_otime);
424 		STRUCT_FSET(sid, sem_ctime, sp->sem_ctime);
425 		STRUCT_FSET(sid, sem_binary, sp->sem_binary);
426 		mutex_exit(lock);
427 
428 		if (copyout(STRUCT_BUF(sid), (void *)arg, STRUCT_SIZE(sid)))
429 			return (set_errno(EFAULT));
430 		return (0);
431 
432 	case IPC_SET64:
433 
434 		if (error = ipcperm_set64(sem_svc, cr, &sp->sem_perm,
435 		    &ds64.semx_perm)) {
436 			mutex_exit(lock);
437 			return (set_errno(error));
438 		}
439 		sp->sem_ctime = gethrestime_sec();
440 		mutex_exit(lock);
441 		return (0);
442 
443 	case IPC_STAT64:
444 
445 		ipcperm_stat64(&ds64.semx_perm, &sp->sem_perm);
446 		ds64.semx_nsems = sp->sem_nsems;
447 		ds64.semx_otime = sp->sem_otime;
448 		ds64.semx_ctime = sp->sem_ctime;
449 
450 		mutex_exit(lock);
451 		if (copyout(&ds64, (void *)arg, sizeof (struct semid_ds64)))
452 			return (set_errno(EFAULT));
453 
454 		return (0);
455 
456 	/* Get # of processes sleeping for greater semval. */
457 	case GETNCNT:
458 		if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) {
459 			mutex_exit(lock);
460 			return (set_errno(error));
461 		}
462 		if (semnum >= sp->sem_nsems) {
463 			mutex_exit(lock);
464 			return (set_errno(EINVAL));
465 		}
466 		retval = sp->sem_base[semnum].semncnt;
467 		mutex_exit(lock);
468 		return (retval);
469 
470 	/* Get pid of last process to operate on semaphore. */
471 	case GETPID:
472 		if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) {
473 			mutex_exit(lock);
474 			return (set_errno(error));
475 		}
476 		if (semnum >= sp->sem_nsems) {
477 			mutex_exit(lock);
478 			return (set_errno(EINVAL));
479 		}
480 		retval = sp->sem_base[semnum].sempid;
481 		mutex_exit(lock);
482 		return (retval);
483 
484 	/* Get semval of one semaphore. */
485 	case GETVAL:
486 		if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) {
487 			mutex_exit(lock);
488 			return (set_errno(error));
489 		}
490 		if (semnum >= sp->sem_nsems) {
491 			mutex_exit(lock);
492 			return (set_errno(EINVAL));
493 		}
494 		retval = sp->sem_base[semnum].semval;
495 		mutex_exit(lock);
496 		return (retval);
497 
498 	/* Get all semvals in set. */
499 	case GETALL:
500 		if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) {
501 			mutex_exit(lock);
502 			return (set_errno(error));
503 		}
504 
505 		/* allocate space to hold all semaphore values */
506 		vsize = sp->sem_nsems * sizeof (*vals);
507 		vals = vp = kmem_alloc(vsize, KM_SLEEP);
508 
509 		for (i = sp->sem_nsems, p = sp->sem_base; i--; p++, vp++)
510 			bcopy(&p->semval, vp, sizeof (p->semval));
511 
512 		mutex_exit(lock);
513 
514 		if (copyout((void *)vals, (void *)arg, vsize)) {
515 			kmem_free(vals, vsize);
516 			return (set_errno(EFAULT));
517 		}
518 
519 		kmem_free(vals, vsize);
520 		return (0);
521 
522 	/* Get # of processes sleeping for semval to become zero. */
523 	case GETZCNT:
524 		if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) {
525 			mutex_exit(lock);
526 			return (set_errno(error));
527 		}
528 		if (semnum >= sp->sem_nsems) {
529 			mutex_exit(lock);
530 			return (set_errno(EINVAL));
531 		}
532 		retval = sp->sem_base[semnum].semzcnt;
533 		mutex_exit(lock);
534 		return (retval);
535 
536 	/* Set semval of one semaphore. */
537 	case SETVAL:
538 		if (error = ipcperm_access(&sp->sem_perm, SEM_A, cr)) {
539 			mutex_exit(lock);
540 			return (set_errno(error));
541 		}
542 		if (semnum >= sp->sem_nsems) {
543 			mutex_exit(lock);
544 			return (set_errno(EINVAL));
545 		}
546 		if ((uint_t)arg > USHRT_MAX) {
547 			mutex_exit(lock);
548 			return (set_errno(ERANGE));
549 		}
550 		p = &sp->sem_base[semnum];
551 		if ((p->semval = (ushort_t)arg) != 0) {
552 			if (p->semncnt) {
553 				cv_broadcast(&p->semncnt_cv);
554 			}
555 		} else if (p->semzcnt) {
556 			cv_broadcast(&p->semzcnt_cv);
557 		}
558 		p->sempid = curproc->p_pid;
559 		sem_undo_clear(sp, (ushort_t)semnum, (ushort_t)semnum);
560 		mutex_exit(lock);
561 		return (0);
562 
563 	/* Set semvals of all semaphores in set. */
564 	case SETALL:
565 		/* Check if semaphore set has been deleted and reallocated. */
566 		if (sp->sem_nsems * sizeof (*vals) != vsize) {
567 			error = set_errno(EINVAL);
568 			goto seterr;
569 		}
570 		if (error = ipcperm_access(&sp->sem_perm, SEM_A, cr)) {
571 			error = set_errno(error);
572 			goto seterr;
573 		}
574 		sem_undo_clear(sp, 0, sp->sem_nsems - 1);
575 		for (i = 0, p = sp->sem_base; i < sp->sem_nsems;
576 		    (p++)->sempid = curproc->p_pid) {
577 			if ((p->semval = vals[i++]) != 0) {
578 				if (p->semncnt) {
579 					cv_broadcast(&p->semncnt_cv);
580 				}
581 			} else if (p->semzcnt) {
582 				cv_broadcast(&p->semzcnt_cv);
583 			}
584 		}
585 seterr:
586 		mutex_exit(lock);
587 		kmem_free(vals, vsize);
588 		return (error);
589 
590 	default:
591 		mutex_exit(lock);
592 		return (set_errno(EINVAL));
593 	}
594 
595 	/* NOTREACHED */
596 }
597 
598 /*
599  * semexit - Called by exit() to clean up on process exit.
600  */
601 void
602 semexit(proc_t *pp)
603 {
604 	avl_tree_t	*tree;
605 	struct sem_undo	*undo;
606 	void		*cookie = NULL;
607 
608 	mutex_enter(&pp->p_lock);
609 	tree = pp->p_semacct;
610 	pp->p_semacct = NULL;
611 	mutex_exit(&pp->p_lock);
612 
613 	while (undo = avl_destroy_nodes(tree, &cookie)) {
614 		ksemid_t *sp = undo->un_sp;
615 		size_t size = SEM_UNDOSZ(sp->sem_nsems);
616 		int i;
617 
618 		(void) ipc_lock(sem_svc, sp->sem_perm.ipc_id);
619 		if (!IPC_FREE(&sp->sem_perm)) {
620 			for (i = 0; i < sp->sem_nsems; i++) {
621 				int adj = undo->un_aoe[i];
622 				if (adj) {
623 					struct sem *semp = &sp->sem_base[i];
624 					int v = (int)semp->semval + adj;
625 
626 					if (v < 0 || v > USHRT_MAX)
627 						continue;
628 					semp->semval = (ushort_t)v;
629 					if (v == 0 && semp->semzcnt)
630 						cv_broadcast(&semp->semzcnt_cv);
631 					if (adj > 0 && semp->semncnt)
632 						cv_broadcast(&semp->semncnt_cv);
633 				}
634 			}
635 			list_remove(&sp->sem_undos, undo);
636 		}
637 		ipc_rele(sem_svc, (kipc_perm_t *)sp);
638 		kmem_free(undo, size);
639 	}
640 
641 	avl_destroy(tree);
642 	kmem_free(tree, sizeof (avl_tree_t));
643 }
644 
645 /*
646  * Remove all semaphores associated with a given zone.  Called by
647  * zone_shutdown when the zone is halted.
648  */
649 /*ARGSUSED1*/
650 static void
651 sem_remove_zone(zoneid_t zoneid, void *arg)
652 {
653 	ipc_remove_zone(sem_svc, zoneid);
654 }
655 
656 /*
657  * semget - Semget system call.
658  */
659 static int
660 semget(key_t key, int nsems, int semflg)
661 {
662 	ksemid_t	*sp;
663 	kmutex_t	*lock;
664 	int		id, error;
665 	proc_t		*pp = curproc;
666 
667 top:
668 	if (error = ipc_get(sem_svc, key, semflg, (kipc_perm_t **)&sp, &lock))
669 		return (set_errno(error));
670 
671 	if (!IPC_FREE(&sp->sem_perm)) {
672 		/*
673 		 * A semaphore with the requested key exists.
674 		 */
675 		if (!((nsems >= 0) && (nsems <= sp->sem_nsems))) {
676 			mutex_exit(lock);
677 			return (set_errno(EINVAL));
678 		}
679 	} else {
680 		/*
681 		 * This is a new semaphore set.  Finish initialization.
682 		 */
683 		if (nsems <= 0 || (rctl_test(rc_process_semmsl, pp->p_rctls, pp,
684 		    nsems, RCA_SAFE) & RCT_DENY)) {
685 			mutex_exit(lock);
686 			mutex_exit(&pp->p_lock);
687 			ipc_cleanup(sem_svc, (kipc_perm_t *)sp);
688 			return (set_errno(EINVAL));
689 		}
690 		mutex_exit(lock);
691 		mutex_exit(&pp->p_lock);
692 
693 		/*
694 		 * We round the allocation up to coherency granularity
695 		 * so that multiple semaphore allocations won't result
696 		 * in the false sharing of their sem structures.
697 		 */
698 		sp->sem_base =
699 		    kmem_zalloc(P2ROUNDUP(nsems * sizeof (struct sem), 64),
700 		    KM_SLEEP);
701 		sp->sem_binary = (nsems == 1);
702 		sp->sem_nsems = (ushort_t)nsems;
703 		sp->sem_ctime = gethrestime_sec();
704 		sp->sem_otime = 0;
705 		list_create(&sp->sem_undos, sizeof (struct sem_undo),
706 		    offsetof(struct sem_undo, un_list));
707 
708 		if (error = ipc_commit_begin(sem_svc, key, semflg,
709 		    (kipc_perm_t *)sp)) {
710 			if (error == EAGAIN)
711 				goto top;
712 			return (set_errno(error));
713 		}
714 		sp->sem_maxops =
715 		    rctl_enforced_value(rc_process_semopm, pp->p_rctls, pp);
716 		if (rctl_test(rc_process_semmsl, pp->p_rctls, pp, nsems,
717 		    RCA_SAFE) & RCT_DENY) {
718 			ipc_cleanup(sem_svc, (kipc_perm_t *)sp);
719 			return (set_errno(EINVAL));
720 		}
721 		lock = ipc_commit_end(sem_svc, &sp->sem_perm);
722 	}
723 #ifdef C2_AUDIT
724 	if (audit_active)
725 		audit_ipcget(AT_IPC_SEM, (void *)sp);
726 #endif
727 	id = sp->sem_perm.ipc_id;
728 	mutex_exit(lock);
729 	return (id);
730 }
731 
732 /*
733  * semids system call.
734  */
735 static int
736 semids(int *buf, uint_t nids, uint_t *pnids)
737 {
738 	int error;
739 
740 	if (error = ipc_ids(sem_svc, buf, nids, pnids))
741 		return (set_errno(error));
742 
743 	return (0);
744 }
745 
746 
747 /*
748  * Helper function for semop - copies in the provided timespec and
749  * computes the absolute future time after which we must return.
750  */
751 static int
752 compute_timeout(timespec_t **tsp, timespec_t *ts, timespec_t *now,
753 	timespec_t *timeout)
754 {
755 	model_t datamodel = get_udatamodel();
756 
757 	if (datamodel == DATAMODEL_NATIVE) {
758 		if (copyin(timeout, ts, sizeof (timespec_t)))
759 			return (EFAULT);
760 	} else {
761 		timespec32_t ts32;
762 
763 		if (copyin(timeout, &ts32, sizeof (timespec32_t)))
764 			return (EFAULT);
765 		TIMESPEC32_TO_TIMESPEC(ts, &ts32)
766 	}
767 
768 	if (itimerspecfix(ts))
769 		return (EINVAL);
770 
771 	/*
772 	 * Convert the timespec value into absolute time.
773 	 */
774 	timespecadd(ts, now);
775 	*tsp = ts;
776 
777 	return (0);
778 }
779 
780 /*
781  * Undo structure comparator.  We sort based on ksemid_t pointer.
782  */
783 static int
784 sem_undo_compar(const void *x, const void *y)
785 {
786 	struct sem_undo *undo1 = (struct sem_undo *)x;
787 	struct sem_undo *undo2 = (struct sem_undo *)y;
788 
789 	if (undo1->un_sp < undo2->un_sp)
790 		return (-1);
791 	if (undo1->un_sp > undo2->un_sp)
792 		return (1);
793 	return (0);
794 }
795 
796 /*
797  * Helper function for semop - creates an undo structure and adds it to
798  * the process's avl tree and the semaphore's list.
799  */
800 static int
801 sem_undo_alloc(proc_t *pp, ksemid_t *sp, kmutex_t **lock,
802     struct sem_undo *template, struct sem_undo **un)
803 {
804 	size_t size;
805 	struct sem_undo *undo;
806 	avl_tree_t *tree = NULL;
807 	avl_index_t where;
808 
809 	mutex_exit(*lock);
810 
811 	size = SEM_UNDOSZ(sp->sem_nsems);
812 	undo = kmem_zalloc(size, KM_SLEEP);
813 	undo->un_proc = pp;
814 	undo->un_sp = sp;
815 
816 	if (pp->p_semacct == NULL)
817 		tree = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP);
818 
819 	*lock = ipc_lock(sem_svc, sp->sem_perm.ipc_id);
820 	if (IPC_FREE(&sp->sem_perm)) {
821 		kmem_free(undo, size);
822 		if (tree)
823 			kmem_free(tree, sizeof (avl_tree_t));
824 		return (EIDRM);
825 	}
826 
827 	mutex_enter(&pp->p_lock);
828 	if (tree) {
829 		if (pp->p_semacct == NULL) {
830 			avl_create(tree, sem_undo_compar,
831 			    sizeof (struct sem_undo),
832 			    offsetof(struct sem_undo, un_avl));
833 			pp->p_semacct = tree;
834 		} else {
835 			kmem_free(tree, sizeof (avl_tree_t));
836 		}
837 	}
838 
839 	if (*un = avl_find(pp->p_semacct, template, &where)) {
840 		mutex_exit(&pp->p_lock);
841 		kmem_free(undo, size);
842 	} else {
843 		*un = undo;
844 		avl_insert(pp->p_semacct, undo, where);
845 		mutex_exit(&pp->p_lock);
846 		list_insert_head(&sp->sem_undos, undo);
847 		ipc_hold(sem_svc, (kipc_perm_t *)sp);
848 	}
849 
850 
851 	return (0);
852 }
853 
854 /*
855  * semop - Semop system call.
856  */
857 static int
858 semop(int semid, struct sembuf *sops, size_t nsops, timespec_t *timeout)
859 {
860 	ksemid_t	*sp = NULL;
861 	kmutex_t	*lock;
862 	struct sembuf	*op;	/* ptr to operation */
863 	int		i;	/* loop control */
864 	struct sem	*semp;	/* ptr to semaphore */
865 	int 		error = 0;
866 	struct sembuf	*uops;	/* ptr to copy of user ops */
867 	struct sembuf 	x_sem;	/* avoid kmem_alloc's */
868 	timespec_t	now, ts, *tsp = NULL;
869 	int		timecheck = 0;
870 	int		cvres, needundo, mode;
871 	struct sem_undo	*undo;
872 	proc_t		*pp = curproc;
873 	int		held = 0;
874 
875 	CPU_STATS_ADDQ(CPU, sys, sema, 1); /* bump semaphore op count */
876 
877 	/*
878 	 * To avoid the cost of copying in 'timeout' in the common
879 	 * case, we could only grab the time here and defer the copyin
880 	 * and associated computations until we are about to block.
881 	 *
882 	 * The down side to this is that we would then have to spin
883 	 * some goto top nonsense to avoid the copyin behind the semid
884 	 * lock.  As a common use of timed semaphores is as an explicit
885 	 * blocking mechanism, this could incur a greater penalty.
886 	 *
887 	 * If we eventually decide that this would be a wise route to
888 	 * take, the deferrable functionality is completely contained
889 	 * in 'compute_timeout', and the interface is defined such that
890 	 * we can legally not validate 'timeout' if it is unused.
891 	 */
892 	if (timeout != NULL) {
893 		timecheck = timechanged;
894 		gethrestime(&now);
895 		if (error = compute_timeout(&tsp, &ts, &now, timeout))
896 			return (set_errno(error));
897 	}
898 
899 	/*
900 	 * Allocate space to hold the vector of semaphore ops.  If
901 	 * there is only 1 operation we use a preallocated buffer on
902 	 * the stack for speed.
903 	 *
904 	 * Since we don't want to allow the user to allocate an
905 	 * arbitrary amount of kernel memory, we need to check against
906 	 * the number of operations allowed by the semaphore.  We only
907 	 * bother doing this if the number of operations is larger than
908 	 * SEM_MAXUCOPS.
909 	 */
910 	if (nsops == 1)
911 		uops = &x_sem;
912 	else if (nsops == 0)
913 		return (0);
914 	else if (nsops <= SEM_MAXUCOPS)
915 		uops = kmem_alloc(nsops * sizeof (*uops), KM_SLEEP);
916 
917 	if (nsops > SEM_MAXUCOPS) {
918 		if ((lock = ipc_lookup(sem_svc, semid,
919 		    (kipc_perm_t **)&sp)) == NULL)
920 			return (set_errno(EFAULT));
921 
922 		if (nsops > sp->sem_maxops) {
923 			mutex_exit(lock);
924 			return (set_errno(E2BIG));
925 		}
926 		held = 1;
927 		ipc_hold(sem_svc, (kipc_perm_t *)sp);
928 		mutex_exit(lock);
929 
930 		uops = kmem_alloc(nsops * sizeof (*uops), KM_SLEEP);
931 		if (copyin(sops, uops, nsops * sizeof (*op))) {
932 			error = EFAULT;
933 			(void) ipc_lock(sem_svc, sp->sem_perm.ipc_id);
934 			goto semoperr;
935 		}
936 
937 		lock = ipc_lock(sem_svc, sp->sem_perm.ipc_id);
938 		if (IPC_FREE(&sp->sem_perm)) {
939 			error = EIDRM;
940 			goto semoperr;
941 		}
942 	} else {
943 		/*
944 		 * This could be interleaved with the above code, but
945 		 * keeping them separate improves readability.
946 		 */
947 		if (copyin(sops, uops, nsops * sizeof (*op))) {
948 			error = EFAULT;
949 			goto semoperr_unlocked;
950 		}
951 
952 		if ((lock = ipc_lookup(sem_svc, semid,
953 		    (kipc_perm_t **)&sp)) == NULL) {
954 			error = EINVAL;
955 			goto semoperr_unlocked;
956 		}
957 
958 		if (nsops > sp->sem_maxops) {
959 			error = E2BIG;
960 			goto semoperr;
961 		}
962 	}
963 
964 	/*
965 	 * Scan all operations.  Verify that sem #s are in range and
966 	 * this process is allowed the requested operations.  If any
967 	 * operations are marked SEM_UNDO, find (or allocate) the undo
968 	 * structure for this process and semaphore.
969 	 */
970 	needundo = 0;
971 	mode = 0;
972 	for (i = 0, op = uops; i++ < nsops; op++) {
973 		mode |= op->sem_op ? SEM_A : SEM_R;
974 		if (op->sem_num >= sp->sem_nsems) {
975 			error = EFBIG;
976 			goto semoperr;
977 		}
978 		if ((op->sem_flg & SEM_UNDO) && op->sem_op)
979 			needundo = 1;
980 	}
981 	if (error = ipcperm_access(&sp->sem_perm, mode, CRED()))
982 		goto semoperr;
983 
984 	if (needundo) {
985 		struct sem_undo template;
986 
987 		template.un_sp = sp;
988 		mutex_enter(&pp->p_lock);
989 		if (pp->p_semacct)
990 			undo = avl_find(pp->p_semacct, &template, NULL);
991 		else
992 			undo = NULL;
993 		mutex_exit(&pp->p_lock);
994 		if (undo == NULL) {
995 			if (error = sem_undo_alloc(pp, sp, &lock, &template,
996 			    &undo))
997 				goto semoperr;
998 
999 			/* sem_undo_alloc unlocks the semaphore */
1000 			if (error = ipcperm_access(&sp->sem_perm, mode, CRED()))
1001 				goto semoperr;
1002 		}
1003 	}
1004 
1005 check:
1006 	/*
1007 	 * Loop waiting for the operations to be satisfied atomically.
1008 	 * Actually, do the operations and undo them if a wait is needed
1009 	 * or an error is detected.
1010 	 */
1011 	for (i = 0; i < nsops; i++) {
1012 		op = &uops[i];
1013 		semp = &sp->sem_base[op->sem_num];
1014 
1015 		/*
1016 		 * Raise the semaphore (i.e. sema_v)
1017 		 */
1018 		if (op->sem_op > 0) {
1019 			if (op->sem_op + (int)semp->semval > USHRT_MAX ||
1020 			    ((op->sem_flg & SEM_UNDO) &&
1021 			    (error = sem_undo_add(op->sem_op, op->sem_num,
1022 			    undo)))) {
1023 				if (i)
1024 					sem_rollback(sp, uops, i, undo);
1025 				if (error == 0)
1026 					error = ERANGE;
1027 				goto semoperr;
1028 			}
1029 			semp->semval += op->sem_op;
1030 			/*
1031 			 * If we are only incrementing the semaphore value
1032 			 * by one on a binary semaphore, we can cv_signal.
1033 			 */
1034 			if (semp->semncnt) {
1035 				if (op->sem_op == 1 && sp->sem_binary)
1036 					cv_signal(&semp->semncnt_cv);
1037 				else
1038 					cv_broadcast(&semp->semncnt_cv);
1039 			}
1040 			if (semp->semzcnt && !semp->semval)
1041 				cv_broadcast(&semp->semzcnt_cv);
1042 			continue;
1043 		}
1044 
1045 		/*
1046 		 * Lower the semaphore (i.e. sema_p)
1047 		 */
1048 		if (op->sem_op < 0) {
1049 			if (semp->semval >= (unsigned)(-op->sem_op)) {
1050 				if ((op->sem_flg & SEM_UNDO) &&
1051 				    (error = sem_undo_add(op->sem_op,
1052 				    op->sem_num, undo))) {
1053 					if (i)
1054 						sem_rollback(sp, uops, i, undo);
1055 					goto semoperr;
1056 				}
1057 				semp->semval += op->sem_op;
1058 				if (semp->semzcnt && !semp->semval)
1059 					cv_broadcast(&semp->semzcnt_cv);
1060 				continue;
1061 			}
1062 			if (i)
1063 				sem_rollback(sp, uops, i, undo);
1064 			if (op->sem_flg & IPC_NOWAIT) {
1065 				error = EAGAIN;
1066 				goto semoperr;
1067 			}
1068 
1069 			/*
1070 			 * Mark the semaphore set as not a binary type
1071 			 * if we are decrementing the value by more than 1.
1072 			 *
1073 			 * V operations will resort to cv_broadcast
1074 			 * for this set because there are too many weird
1075 			 * cases that have to be caught.
1076 			 */
1077 			if (op->sem_op < -1)
1078 				sp->sem_binary = 0;
1079 			if (!held) {
1080 				held = 1;
1081 				ipc_hold(sem_svc, (kipc_perm_t *)sp);
1082 			}
1083 			semp->semncnt++;
1084 			cvres = cv_waituntil_sig(&semp->semncnt_cv, lock,
1085 				tsp, timecheck);
1086 			lock = ipc_relock(sem_svc, sp->sem_perm.ipc_id, lock);
1087 
1088 			if (!IPC_FREE(&sp->sem_perm)) {
1089 				ASSERT(semp->semncnt != 0);
1090 				semp->semncnt--;
1091 				if (cvres > 0)	/* normal wakeup */
1092 					goto check;
1093 			}
1094 
1095 			/* EINTR or EAGAIN overrides EIDRM */
1096 			if (cvres == 0)
1097 				error = EINTR;
1098 			else if (cvres < 0)
1099 				error = EAGAIN;
1100 			else
1101 				error = EIDRM;
1102 			goto semoperr;
1103 		}
1104 
1105 		/*
1106 		 * Wait for zero value
1107 		 */
1108 		if (semp->semval) {
1109 			if (i)
1110 				sem_rollback(sp, uops, i, undo);
1111 			if (op->sem_flg & IPC_NOWAIT) {
1112 				error = EAGAIN;
1113 				goto semoperr;
1114 			}
1115 
1116 			if (!held) {
1117 				held = 1;
1118 				ipc_hold(sem_svc, (kipc_perm_t *)sp);
1119 			}
1120 			semp->semzcnt++;
1121 			cvres = cv_waituntil_sig(&semp->semzcnt_cv, lock,
1122 				tsp, timecheck);
1123 			lock = ipc_relock(sem_svc, sp->sem_perm.ipc_id, lock);
1124 
1125 			/*
1126 			 * Don't touch semp if the semaphores have been removed.
1127 			 */
1128 			if (!IPC_FREE(&sp->sem_perm)) {
1129 				ASSERT(semp->semzcnt != 0);
1130 				semp->semzcnt--;
1131 				if (cvres > 0)	/* normal wakeup */
1132 					goto check;
1133 			}
1134 
1135 			/* EINTR or EAGAIN overrides EIDRM */
1136 			if (cvres == 0)
1137 				error = EINTR;
1138 			else if (cvres < 0)
1139 				error = EAGAIN;
1140 			else
1141 				error = EIDRM;
1142 			goto semoperr;
1143 		}
1144 	}
1145 
1146 	/* All operations succeeded.  Update sempid for accessed semaphores. */
1147 	for (i = 0, op = uops; i++ < nsops;
1148 	    sp->sem_base[(op++)->sem_num].sempid = pp->p_pid)
1149 		;
1150 	sp->sem_otime = gethrestime_sec();
1151 	if (held)
1152 		ipc_rele(sem_svc, (kipc_perm_t *)sp);
1153 	else
1154 		mutex_exit(lock);
1155 
1156 	/* Before leaving, deallocate the buffer that held the user semops */
1157 	if (nsops != 1)
1158 		kmem_free(uops, sizeof (*uops) * nsops);
1159 	return (0);
1160 
1161 	/*
1162 	 * Error return labels
1163 	 */
1164 semoperr:
1165 	if (held)
1166 		ipc_rele(sem_svc, (kipc_perm_t *)sp);
1167 	else
1168 		mutex_exit(lock);
1169 
1170 semoperr_unlocked:
1171 
1172 	/* Before leaving, deallocate the buffer that held the user semops */
1173 	if (nsops != 1)
1174 		kmem_free(uops, sizeof (*uops) * nsops);
1175 	return (set_errno(error));
1176 }
1177 
1178 /*
1179  * semsys - System entry point for semctl, semget, and semop system calls.
1180  */
1181 static int
1182 semsys(int opcode, uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4)
1183 {
1184 	int error;
1185 
1186 	switch (opcode) {
1187 	case SEMCTL:
1188 		error = semctl((int)a1, (uint_t)a2, (int)a3, a4);
1189 		break;
1190 	case SEMGET:
1191 		error = semget((key_t)a1, (int)a2, (int)a3);
1192 		break;
1193 	case SEMOP:
1194 		error = semop((int)a1, (struct sembuf *)a2, (size_t)a3, 0);
1195 		break;
1196 	case SEMIDS:
1197 		error = semids((int *)a1, (uint_t)a2, (uint_t *)a3);
1198 		break;
1199 	case SEMTIMEDOP:
1200 		error = semop((int)a1, (struct sembuf *)a2, (size_t)a3,
1201 		    (timespec_t *)a4);
1202 		break;
1203 	default:
1204 		error = set_errno(EINVAL);
1205 		break;
1206 	}
1207 	return (error);
1208 }
1209