xref: /titanic_50/usr/src/uts/common/syscall/sem.c (revision 005d3feb53a9a10272d4a24b03991575d6a9bcb3)
1  /*
2   * CDDL HEADER START
3   *
4   * The contents of this file are subject to the terms of the
5   * Common Development and Distribution License (the "License").
6   * You may not use this file except in compliance with the License.
7   *
8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9   * or http://www.opensolaris.org/os/licensing.
10   * See the License for the specific language governing permissions
11   * and limitations under the License.
12   *
13   * When distributing Covered Code, include this CDDL HEADER in each
14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15   * If applicable, add the following below this CDDL HEADER, with the
16   * fields enclosed by brackets "[]" replaced with your own identifying
17   * information: Portions Copyright [yyyy] [name of copyright owner]
18   *
19   * CDDL HEADER END
20   */
21  /*
22   * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23   * Use is subject to license terms.
24   */
25  
26  /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27  /*	  All Rights Reserved  	*/
28  
29  /*
30   * Inter-Process Communication Semaphore Facility.
31   *
32   * See os/ipc.c for a description of common IPC functionality.
33   *
34   * Resource controls
35   * -----------------
36   *
37   * Control:      zone.max-sem-ids (rc_zone_semmni)
38   * Description:  Maximum number of semaphore ids allowed a zone.
39   *
40   *   When semget() is used to allocate a semaphore set, one id is
41   *   allocated.  If the id allocation doesn't succeed, semget() fails
42   *   and errno is set to ENOSPC.  Upon successful semctl(, IPC_RMID)
43   *   the id is deallocated.
44   *
45   * Control:      project.max-sem-ids (rc_project_semmni)
46   * Description:  Maximum number of semaphore ids allowed a project.
47   *
48   *   When semget() is used to allocate a semaphore set, one id is
49   *   allocated.  If the id allocation doesn't succeed, semget() fails
50   *   and errno is set to ENOSPC.  Upon successful semctl(, IPC_RMID)
51   *   the id is deallocated.
52   *
53   * Control:      process.max-sem-nsems (rc_process_semmsl)
54   * Description:  Maximum number of semaphores allowed per semaphore set.
55   *
56   *   When semget() is used to allocate a semaphore set, the size of the
57   *   set is compared with this limit.  If the number of semaphores
58   *   exceeds the limit, semget() fails and errno is set to EINVAL.
59   *
60   * Control:      process.max-sem-ops (rc_process_semopm)
61   * Description:  Maximum number of semaphore operations allowed per
62   *               semop call.
63   *
64   *   When semget() successfully allocates a semaphore set, the minimum
65   *   enforced value of this limit is used to initialize the
66   *   "system-imposed maximum" number of operations a semop() call for
67   *   this set can perform.
68   *
69   * Undo structures
70   * ---------------
71   *
72   * Removing the undo structure tunables involved a serious redesign of
73   * how they were implemented.  There is now one undo structure for
74   * every process/semaphore array combination (lazily allocated, of
75   * course), and each is equal in size to the semaphore it corresponds
76   * to.  To avoid scalability and performance problems, the undo
77   * structures are stored in two places: a per-process AVL tree sorted
78   * by ksemid pointer (p_semacct, protected by p_lock) and an unsorted
79   * per-semaphore linked list (sem_undos, protected by the semaphore's
80   * ID lock).  The former is used by semop, where a lookup is performed
81   * once and cached if SEM_UNDO is specified for any of the operations,
82   * and at process exit where the undoable operations are rolled back.
83   * The latter is used when removing the semaphore, so the undo
84   * structures can be removed from the appropriate processes' trees.
85   *
86   * The undo structure itself contains pointers to the ksemid and proc
87   * to which it corresponds, a list node, an AVL node, and an array of
88   * adjust-on-exit (AOE) values.  When an undo structure is allocated it
89   * is immediately added to both the process's tree and the semaphore's
90   * list.  Lastly, the reference count on the semaphore is increased.
91   *
92   * Avoiding a lock ordering violation between p_lock and the ID lock,
93   * wont to occur when there is a race between a process exiting and the
94   * removal of a semaphore, mandates the delicate dance that exists
95   * between semexit and sem_rmid.
96   *
97   * sem_rmid, holding the ID lock, iterates through all undo structures
98   * and for each takes the appropriate process's p_lock and checks to
99   * see if p_semacct is NULL.  If it is, it skips that undo structure
100   * and continues to the next.  Otherwise, it removes the undo structure
101   * from both the AVL tree and the semaphore's list, and releases the
102   * hold that the undo structure had on the semaphore.
103   *
104   * The important other half of this is semexit, which will immediately
105   * take p_lock, obtain the AVL pointer, clear p_semacct, and drop
106   * p_lock.  From this point on it is semexit's responsibility to clean
107   * up all undo structures found in the tree -- a coexecuting sem_rmid
108   * will see the NULL p_semacct and skip that undo structure.  It walks
109   * the AVL tree (using avl_destroy_nodes) and for each undo structure
110   * takes the appropriate semaphore's ID lock (always legal since the
111   * undo structure has a hold on the semaphore), updates all semaphores
112   * with non-zero AOE values, and removes the structure from the
113   * semaphore's list.  It then drops the structure's reference on the
114   * semaphore, drops the ID lock, and frees the undo structure.
115   */
116  
117  #include <sys/types.h>
118  #include <sys/t_lock.h>
119  #include <sys/param.h>
120  #include <sys/systm.h>
121  #include <sys/sysmacros.h>
122  #include <sys/cred.h>
123  #include <sys/vmem.h>
124  #include <sys/kmem.h>
125  #include <sys/errno.h>
126  #include <sys/time.h>
127  #include <sys/ipc.h>
128  #include <sys/ipc_impl.h>
129  #include <sys/sem.h>
130  #include <sys/sem_impl.h>
131  #include <sys/user.h>
132  #include <sys/proc.h>
133  #include <sys/cpuvar.h>
134  #include <sys/debug.h>
135  #include <sys/var.h>
136  #include <sys/cmn_err.h>
137  #include <sys/modctl.h>
138  #include <sys/syscall.h>
139  #include <sys/avl.h>
140  #include <sys/list.h>
141  #include <sys/zone.h>
142  
143  #include <c2/audit.h>
144  
145  extern rctl_hndl_t rc_zone_semmni;
146  extern rctl_hndl_t rc_project_semmni;
147  extern rctl_hndl_t rc_process_semmsl;
148  extern rctl_hndl_t rc_process_semopm;
149  static ipc_service_t *sem_svc;
150  static zone_key_t sem_zone_key;
151  
152  /*
153   * The following tunables are obsolete.  Though for compatibility we
154   * still read and interpret seminfo_semmsl, seminfo_semopm and
155   * seminfo_semmni (see os/project.c and os/rctl_proc.c), the preferred
156   * mechanism for administrating the IPC Semaphore facility is through
157   * the resource controls described at the top of this file.
158   */
159  int seminfo_semaem = 16384;	/* (obsolete) */
160  int seminfo_semmap = 10;	/* (obsolete) */
161  int seminfo_semmni = 10;	/* (obsolete) */
162  int seminfo_semmns = 60;	/* (obsolete) */
163  int seminfo_semmnu = 30;	/* (obsolete) */
164  int seminfo_semmsl = 25;	/* (obsolete) */
165  int seminfo_semopm = 10;	/* (obsolete) */
166  int seminfo_semume = 10;	/* (obsolete) */
167  int seminfo_semusz = 96;	/* (obsolete) */
168  int seminfo_semvmx = 32767;	/* (obsolete) */
169  
170  #define	SEM_MAXUCOPS	4096	/* max # of unchecked ops per semop call */
171  #define	SEM_UNDOSZ(n)	(sizeof (struct sem_undo) + (n - 1) * sizeof (int))
172  
173  static int semsys(int opcode, uintptr_t a0, uintptr_t a1,
174      uintptr_t a2, uintptr_t a3);
175  static void sem_dtor(kipc_perm_t *);
176  static void sem_rmid(kipc_perm_t *);
177  static void sem_remove_zone(zoneid_t, void *);
178  
179  static struct sysent ipcsem_sysent = {
180  	5,
181  	SE_NOUNLOAD | SE_ARGC | SE_32RVAL1,
182  	semsys
183  };
184  
185  /*
186   * Module linkage information for the kernel.
187   */
188  static struct modlsys modlsys = {
189  	&mod_syscallops, "System V semaphore facility", &ipcsem_sysent
190  };
191  
192  #ifdef _SYSCALL32_IMPL
193  static struct modlsys modlsys32 = {
194  	&mod_syscallops32, "32-bit System V semaphore facility", &ipcsem_sysent
195  };
196  #endif
197  
198  static struct modlinkage modlinkage = {
199  	MODREV_1,
200  	&modlsys,
201  #ifdef _SYSCALL32_IMPL
202  	&modlsys32,
203  #endif
204  	NULL
205  };
206  
207  
208  int
_init(void)209  _init(void)
210  {
211  	int result;
212  
213  	sem_svc = ipcs_create("semids", rc_project_semmni, rc_zone_semmni,
214  	    sizeof (ksemid_t), sem_dtor, sem_rmid, AT_IPC_SEM,
215  	    offsetof(ipc_rqty_t, ipcq_semmni));
216  	zone_key_create(&sem_zone_key, NULL, sem_remove_zone, NULL);
217  
218  	if ((result = mod_install(&modlinkage)) == 0)
219  		return (0);
220  
221  	(void) zone_key_delete(sem_zone_key);
222  	ipcs_destroy(sem_svc);
223  
224  	return (result);
225  }
226  
227  int
_fini(void)228  _fini(void)
229  {
230  	return (EBUSY);
231  }
232  
233  int
_info(struct modinfo * modinfop)234  _info(struct modinfo *modinfop)
235  {
236  	return (mod_info(&modlinkage, modinfop));
237  }
238  
239  static void
sem_dtor(kipc_perm_t * perm)240  sem_dtor(kipc_perm_t *perm)
241  {
242  	ksemid_t *sp = (ksemid_t *)perm;
243  
244  	kmem_free(sp->sem_base,
245  	    P2ROUNDUP(sp->sem_nsems * sizeof (struct sem), 64));
246  	list_destroy(&sp->sem_undos);
247  }
248  
249  /*
250   * sem_undo_add - Create or update adjust on exit entry.
251   */
252  static int
sem_undo_add(short val,ushort_t num,struct sem_undo * undo)253  sem_undo_add(short val, ushort_t num, struct sem_undo *undo)
254  {
255  	int newval = undo->un_aoe[num] - val;
256  
257  	if (newval > USHRT_MAX || newval < -USHRT_MAX)
258  		return (ERANGE);
259  	undo->un_aoe[num] = newval;
260  
261  	return (0);
262  }
263  
264  /*
265   * sem_undo_clear - clears all undo entries for specified semaphores
266   *
267   * Used when semaphores are reset by SETVAL or SETALL.
268   */
269  static void
sem_undo_clear(ksemid_t * sp,ushort_t low,ushort_t high)270  sem_undo_clear(ksemid_t *sp, ushort_t low, ushort_t high)
271  {
272  	struct sem_undo *undo;
273  	int i;
274  
275  	ASSERT(low <= high);
276  	ASSERT(high < sp->sem_nsems);
277  
278  	for (undo = list_head(&sp->sem_undos); undo;
279  	    undo = list_next(&sp->sem_undos, undo))
280  		for (i = low; i <= high; i++)
281  			undo->un_aoe[i] = 0;
282  }
283  
284  /*
285   * sem_rollback - roll back work done so far if unable to complete operation
286   */
287  static void
sem_rollback(ksemid_t * sp,struct sembuf * op,int n,struct sem_undo * undo)288  sem_rollback(ksemid_t *sp, struct sembuf *op, int n, struct sem_undo *undo)
289  {
290  	struct sem *semp;	/* semaphore ptr */
291  
292  	for (op += n - 1; n--; op--) {
293  		if (op->sem_op == 0)
294  			continue;
295  		semp = &sp->sem_base[op->sem_num];
296  		semp->semval -= op->sem_op;
297  		if (op->sem_flg & SEM_UNDO) {
298  			ASSERT(undo != NULL);
299  			(void) sem_undo_add(-op->sem_op, op->sem_num, undo);
300  		}
301  	}
302  }
303  
304  static void
sem_rmid(kipc_perm_t * perm)305  sem_rmid(kipc_perm_t *perm)
306  {
307  	ksemid_t *sp = (ksemid_t *)perm;
308  	struct sem *semp;
309  	struct sem_undo *undo;
310  	size_t size = SEM_UNDOSZ(sp->sem_nsems);
311  	int i;
312  
313  	/*LINTED*/
314  	while (undo = list_head(&sp->sem_undos)) {
315  		list_remove(&sp->sem_undos, undo);
316  		mutex_enter(&undo->un_proc->p_lock);
317  		if (undo->un_proc->p_semacct == NULL) {
318  			mutex_exit(&undo->un_proc->p_lock);
319  			continue;
320  		}
321  		avl_remove(undo->un_proc->p_semacct, undo);
322  		mutex_exit(&undo->un_proc->p_lock);
323  		kmem_free(undo, size);
324  		ipc_rele_locked(sem_svc, (kipc_perm_t *)sp);
325  	}
326  
327  	for (i = 0; i < sp->sem_nsems; i++) {
328  		semp = &sp->sem_base[i];
329  		semp->semval = semp->sempid = 0;
330  		if (semp->semncnt) {
331  			cv_broadcast(&semp->semncnt_cv);
332  			semp->semncnt = 0;
333  		}
334  		if (semp->semzcnt) {
335  			cv_broadcast(&semp->semzcnt_cv);
336  			semp->semzcnt = 0;
337  		}
338  	}
339  }
340  
341  /*
342   * semctl - Semctl system call.
343   */
344  static int
semctl(int semid,uint_t semnum,int cmd,uintptr_t arg)345  semctl(int semid, uint_t semnum, int cmd, uintptr_t arg)
346  {
347  	ksemid_t		*sp;	/* ptr to semaphore header */
348  	struct sem		*p;	/* ptr to semaphore */
349  	unsigned int		i;	/* loop control */
350  	ushort_t		*vals, *vp;
351  	size_t			vsize = 0;
352  	int			error = 0;
353  	int			retval = 0;
354  	struct cred		*cr;
355  	kmutex_t		*lock;
356  	model_t			mdl = get_udatamodel();
357  	STRUCT_DECL(semid_ds, sid);
358  	struct semid_ds64	ds64;
359  
360  	STRUCT_INIT(sid, mdl);
361  	cr = CRED();
362  
363  	/*
364  	 * Perform pre- or non-lookup actions (e.g. copyins, RMID).
365  	 */
366  	switch (cmd) {
367  	case IPC_SET:
368  		if (copyin((void *)arg, STRUCT_BUF(sid), STRUCT_SIZE(sid)))
369  			return (set_errno(EFAULT));
370  		break;
371  
372  	case IPC_SET64:
373  		if (copyin((void *)arg, &ds64, sizeof (struct semid_ds64)))
374  			return (set_errno(EFAULT));
375  		break;
376  
377  	case SETALL:
378  		if ((lock = ipc_lookup(sem_svc, semid,
379  		    (kipc_perm_t **)&sp)) == NULL)
380  			return (set_errno(EINVAL));
381  		vsize = sp->sem_nsems * sizeof (*vals);
382  		mutex_exit(lock);
383  
384  		/* allocate space to hold all semaphore values */
385  		vals = kmem_alloc(vsize, KM_SLEEP);
386  
387  		if (copyin((void *)arg, vals, vsize)) {
388  			kmem_free(vals, vsize);
389  			return (set_errno(EFAULT));
390  		}
391  		break;
392  
393  	case IPC_RMID:
394  		if (error = ipc_rmid(sem_svc, semid, cr))
395  			return (set_errno(error));
396  		return (0);
397  	}
398  
399  	if ((lock = ipc_lookup(sem_svc, semid, (kipc_perm_t **)&sp)) == NULL) {
400  		if (vsize != 0)
401  			kmem_free(vals, vsize);
402  		return (set_errno(EINVAL));
403  	}
404  	switch (cmd) {
405  	/* Set ownership and permissions. */
406  	case IPC_SET:
407  
408  		if (error = ipcperm_set(sem_svc, cr, &sp->sem_perm,
409  		    &STRUCT_BUF(sid)->sem_perm, mdl)) {
410  			mutex_exit(lock);
411  			return (set_errno(error));
412  		}
413  		sp->sem_ctime = gethrestime_sec();
414  		mutex_exit(lock);
415  		return (0);
416  
417  	/* Get semaphore data structure. */
418  	case IPC_STAT:
419  
420  		if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) {
421  			mutex_exit(lock);
422  			return (set_errno(error));
423  		}
424  
425  		ipcperm_stat(&STRUCT_BUF(sid)->sem_perm, &sp->sem_perm, mdl);
426  		STRUCT_FSETP(sid, sem_base, NULL);	/* kernel addr */
427  		STRUCT_FSET(sid, sem_nsems, sp->sem_nsems);
428  		STRUCT_FSET(sid, sem_otime, sp->sem_otime);
429  		STRUCT_FSET(sid, sem_ctime, sp->sem_ctime);
430  		STRUCT_FSET(sid, sem_binary, sp->sem_binary);
431  		mutex_exit(lock);
432  
433  		if (copyout(STRUCT_BUF(sid), (void *)arg, STRUCT_SIZE(sid)))
434  			return (set_errno(EFAULT));
435  		return (0);
436  
437  	case IPC_SET64:
438  
439  		if (error = ipcperm_set64(sem_svc, cr, &sp->sem_perm,
440  		    &ds64.semx_perm)) {
441  			mutex_exit(lock);
442  			return (set_errno(error));
443  		}
444  		sp->sem_ctime = gethrestime_sec();
445  		mutex_exit(lock);
446  		return (0);
447  
448  	case IPC_STAT64:
449  
450  		ipcperm_stat64(&ds64.semx_perm, &sp->sem_perm);
451  		ds64.semx_nsems = sp->sem_nsems;
452  		ds64.semx_otime = sp->sem_otime;
453  		ds64.semx_ctime = sp->sem_ctime;
454  
455  		mutex_exit(lock);
456  		if (copyout(&ds64, (void *)arg, sizeof (struct semid_ds64)))
457  			return (set_errno(EFAULT));
458  
459  		return (0);
460  
461  	/* Get # of processes sleeping for greater semval. */
462  	case GETNCNT:
463  		if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) {
464  			mutex_exit(lock);
465  			return (set_errno(error));
466  		}
467  		if (semnum >= sp->sem_nsems) {
468  			mutex_exit(lock);
469  			return (set_errno(EINVAL));
470  		}
471  		retval = sp->sem_base[semnum].semncnt;
472  		mutex_exit(lock);
473  		return (retval);
474  
475  	/* Get pid of last process to operate on semaphore. */
476  	case GETPID:
477  		if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) {
478  			mutex_exit(lock);
479  			return (set_errno(error));
480  		}
481  		if (semnum >= sp->sem_nsems) {
482  			mutex_exit(lock);
483  			return (set_errno(EINVAL));
484  		}
485  		retval = sp->sem_base[semnum].sempid;
486  		mutex_exit(lock);
487  		return (retval);
488  
489  	/* Get semval of one semaphore. */
490  	case GETVAL:
491  		if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) {
492  			mutex_exit(lock);
493  			return (set_errno(error));
494  		}
495  		if (semnum >= sp->sem_nsems) {
496  			mutex_exit(lock);
497  			return (set_errno(EINVAL));
498  		}
499  		retval = sp->sem_base[semnum].semval;
500  		mutex_exit(lock);
501  		return (retval);
502  
503  	/* Get all semvals in set. */
504  	case GETALL:
505  		if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) {
506  			mutex_exit(lock);
507  			return (set_errno(error));
508  		}
509  
510  		/* allocate space to hold all semaphore values */
511  		vsize = sp->sem_nsems * sizeof (*vals);
512  		vals = vp = kmem_alloc(vsize, KM_SLEEP);
513  
514  		for (i = sp->sem_nsems, p = sp->sem_base; i--; p++, vp++)
515  			bcopy(&p->semval, vp, sizeof (p->semval));
516  
517  		mutex_exit(lock);
518  
519  		if (copyout((void *)vals, (void *)arg, vsize)) {
520  			kmem_free(vals, vsize);
521  			return (set_errno(EFAULT));
522  		}
523  
524  		kmem_free(vals, vsize);
525  		return (0);
526  
527  	/* Get # of processes sleeping for semval to become zero. */
528  	case GETZCNT:
529  		if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) {
530  			mutex_exit(lock);
531  			return (set_errno(error));
532  		}
533  		if (semnum >= sp->sem_nsems) {
534  			mutex_exit(lock);
535  			return (set_errno(EINVAL));
536  		}
537  		retval = sp->sem_base[semnum].semzcnt;
538  		mutex_exit(lock);
539  		return (retval);
540  
541  	/* Set semval of one semaphore. */
542  	case SETVAL:
543  		if (error = ipcperm_access(&sp->sem_perm, SEM_A, cr)) {
544  			mutex_exit(lock);
545  			return (set_errno(error));
546  		}
547  		if (semnum >= sp->sem_nsems) {
548  			mutex_exit(lock);
549  			return (set_errno(EINVAL));
550  		}
551  		if ((uint_t)arg > USHRT_MAX) {
552  			mutex_exit(lock);
553  			return (set_errno(ERANGE));
554  		}
555  		p = &sp->sem_base[semnum];
556  		if ((p->semval = (ushort_t)arg) != 0) {
557  			if (p->semncnt) {
558  				cv_broadcast(&p->semncnt_cv);
559  			}
560  		} else if (p->semzcnt) {
561  			cv_broadcast(&p->semzcnt_cv);
562  		}
563  		p->sempid = curproc->p_pid;
564  		sem_undo_clear(sp, (ushort_t)semnum, (ushort_t)semnum);
565  		mutex_exit(lock);
566  		return (0);
567  
568  	/* Set semvals of all semaphores in set. */
569  	case SETALL:
570  		/* Check if semaphore set has been deleted and reallocated. */
571  		if (sp->sem_nsems * sizeof (*vals) != vsize) {
572  			error = set_errno(EINVAL);
573  			goto seterr;
574  		}
575  		if (error = ipcperm_access(&sp->sem_perm, SEM_A, cr)) {
576  			error = set_errno(error);
577  			goto seterr;
578  		}
579  		sem_undo_clear(sp, 0, sp->sem_nsems - 1);
580  		for (i = 0, p = sp->sem_base; i < sp->sem_nsems;
581  		    (p++)->sempid = curproc->p_pid) {
582  			if ((p->semval = vals[i++]) != 0) {
583  				if (p->semncnt) {
584  					cv_broadcast(&p->semncnt_cv);
585  				}
586  			} else if (p->semzcnt) {
587  				cv_broadcast(&p->semzcnt_cv);
588  			}
589  		}
590  seterr:
591  		mutex_exit(lock);
592  		kmem_free(vals, vsize);
593  		return (error);
594  
595  	default:
596  		mutex_exit(lock);
597  		return (set_errno(EINVAL));
598  	}
599  
600  	/* NOTREACHED */
601  }
602  
603  /*
604   * semexit - Called by exit() to clean up on process exit.
605   */
606  void
semexit(proc_t * pp)607  semexit(proc_t *pp)
608  {
609  	avl_tree_t	*tree;
610  	struct sem_undo	*undo;
611  	void		*cookie = NULL;
612  
613  	mutex_enter(&pp->p_lock);
614  	tree = pp->p_semacct;
615  	pp->p_semacct = NULL;
616  	mutex_exit(&pp->p_lock);
617  
618  	while (undo = avl_destroy_nodes(tree, &cookie)) {
619  		ksemid_t *sp = undo->un_sp;
620  		size_t size = SEM_UNDOSZ(sp->sem_nsems);
621  		int i;
622  
623  		(void) ipc_lock(sem_svc, sp->sem_perm.ipc_id);
624  		if (!IPC_FREE(&sp->sem_perm)) {
625  			for (i = 0; i < sp->sem_nsems; i++) {
626  				int adj = undo->un_aoe[i];
627  				if (adj) {
628  					struct sem *semp = &sp->sem_base[i];
629  					int v = (int)semp->semval + adj;
630  
631  					if (v < 0 || v > USHRT_MAX)
632  						continue;
633  					semp->semval = (ushort_t)v;
634  					if (v == 0 && semp->semzcnt)
635  						cv_broadcast(&semp->semzcnt_cv);
636  					if (adj > 0 && semp->semncnt)
637  						cv_broadcast(&semp->semncnt_cv);
638  				}
639  			}
640  			list_remove(&sp->sem_undos, undo);
641  		}
642  		ipc_rele(sem_svc, (kipc_perm_t *)sp);
643  		kmem_free(undo, size);
644  	}
645  
646  	avl_destroy(tree);
647  	kmem_free(tree, sizeof (avl_tree_t));
648  }
649  
650  /*
651   * Remove all semaphores associated with a given zone.  Called by
652   * zone_shutdown when the zone is halted.
653   */
654  /*ARGSUSED1*/
655  static void
sem_remove_zone(zoneid_t zoneid,void * arg)656  sem_remove_zone(zoneid_t zoneid, void *arg)
657  {
658  	ipc_remove_zone(sem_svc, zoneid);
659  }
660  
661  /*
662   * semget - Semget system call.
663   */
664  static int
semget(key_t key,int nsems,int semflg)665  semget(key_t key, int nsems, int semflg)
666  {
667  	ksemid_t	*sp;
668  	kmutex_t	*lock;
669  	int		id, error;
670  	proc_t		*pp = curproc;
671  
672  top:
673  	if (error = ipc_get(sem_svc, key, semflg, (kipc_perm_t **)&sp, &lock))
674  		return (set_errno(error));
675  
676  	if (!IPC_FREE(&sp->sem_perm)) {
677  		/*
678  		 * A semaphore with the requested key exists.
679  		 */
680  		if (!((nsems >= 0) && (nsems <= sp->sem_nsems))) {
681  			mutex_exit(lock);
682  			return (set_errno(EINVAL));
683  		}
684  	} else {
685  		/*
686  		 * This is a new semaphore set.  Finish initialization.
687  		 */
688  		if (nsems <= 0 || (rctl_test(rc_process_semmsl, pp->p_rctls, pp,
689  		    nsems, RCA_SAFE) & RCT_DENY)) {
690  			mutex_exit(lock);
691  			mutex_exit(&pp->p_lock);
692  			ipc_cleanup(sem_svc, (kipc_perm_t *)sp);
693  			return (set_errno(EINVAL));
694  		}
695  		mutex_exit(lock);
696  		mutex_exit(&pp->p_lock);
697  
698  		/*
699  		 * We round the allocation up to coherency granularity
700  		 * so that multiple semaphore allocations won't result
701  		 * in the false sharing of their sem structures.
702  		 */
703  		sp->sem_base =
704  		    kmem_zalloc(P2ROUNDUP(nsems * sizeof (struct sem), 64),
705  		    KM_SLEEP);
706  		sp->sem_binary = (nsems == 1);
707  		sp->sem_nsems = (ushort_t)nsems;
708  		sp->sem_ctime = gethrestime_sec();
709  		sp->sem_otime = 0;
710  		list_create(&sp->sem_undos, sizeof (struct sem_undo),
711  		    offsetof(struct sem_undo, un_list));
712  
713  		if (error = ipc_commit_begin(sem_svc, key, semflg,
714  		    (kipc_perm_t *)sp)) {
715  			if (error == EAGAIN)
716  				goto top;
717  			return (set_errno(error));
718  		}
719  		sp->sem_maxops =
720  		    rctl_enforced_value(rc_process_semopm, pp->p_rctls, pp);
721  		if (rctl_test(rc_process_semmsl, pp->p_rctls, pp, nsems,
722  		    RCA_SAFE) & RCT_DENY) {
723  			ipc_cleanup(sem_svc, (kipc_perm_t *)sp);
724  			return (set_errno(EINVAL));
725  		}
726  		lock = ipc_commit_end(sem_svc, &sp->sem_perm);
727  	}
728  
729  	if (AU_AUDITING())
730  		audit_ipcget(AT_IPC_SEM, (void *)sp);
731  
732  	id = sp->sem_perm.ipc_id;
733  	mutex_exit(lock);
734  	return (id);
735  }
736  
737  /*
738   * semids system call.
739   */
740  static int
semids(int * buf,uint_t nids,uint_t * pnids)741  semids(int *buf, uint_t nids, uint_t *pnids)
742  {
743  	int error;
744  
745  	if (error = ipc_ids(sem_svc, buf, nids, pnids))
746  		return (set_errno(error));
747  
748  	return (0);
749  }
750  
751  
752  /*
753   * Helper function for semop - copies in the provided timespec and
754   * computes the absolute future time after which we must return.
755   */
756  static int
compute_timeout(timespec_t ** tsp,timespec_t * ts,timespec_t * now,timespec_t * timeout)757  compute_timeout(timespec_t **tsp, timespec_t *ts, timespec_t *now,
758  	timespec_t *timeout)
759  {
760  	model_t datamodel = get_udatamodel();
761  
762  	if (datamodel == DATAMODEL_NATIVE) {
763  		if (copyin(timeout, ts, sizeof (timespec_t)))
764  			return (EFAULT);
765  	} else {
766  		timespec32_t ts32;
767  
768  		if (copyin(timeout, &ts32, sizeof (timespec32_t)))
769  			return (EFAULT);
770  		TIMESPEC32_TO_TIMESPEC(ts, &ts32)
771  	}
772  
773  	if (itimerspecfix(ts))
774  		return (EINVAL);
775  
776  	/*
777  	 * Convert the timespec value into absolute time.
778  	 */
779  	timespecadd(ts, now);
780  	*tsp = ts;
781  
782  	return (0);
783  }
784  
785  /*
786   * Undo structure comparator.  We sort based on ksemid_t pointer.
787   */
788  static int
sem_undo_compar(const void * x,const void * y)789  sem_undo_compar(const void *x, const void *y)
790  {
791  	struct sem_undo *undo1 = (struct sem_undo *)x;
792  	struct sem_undo *undo2 = (struct sem_undo *)y;
793  
794  	if (undo1->un_sp < undo2->un_sp)
795  		return (-1);
796  	if (undo1->un_sp > undo2->un_sp)
797  		return (1);
798  	return (0);
799  }
800  
801  /*
802   * Helper function for semop - creates an undo structure and adds it to
803   * the process's avl tree and the semaphore's list.
804   */
805  static int
sem_undo_alloc(proc_t * pp,ksemid_t * sp,kmutex_t ** lock,struct sem_undo * template,struct sem_undo ** un)806  sem_undo_alloc(proc_t *pp, ksemid_t *sp, kmutex_t **lock,
807      struct sem_undo *template, struct sem_undo **un)
808  {
809  	size_t size;
810  	struct sem_undo *undo;
811  	avl_tree_t *tree = NULL;
812  	avl_index_t where;
813  
814  	mutex_exit(*lock);
815  
816  	size = SEM_UNDOSZ(sp->sem_nsems);
817  	undo = kmem_zalloc(size, KM_SLEEP);
818  	undo->un_proc = pp;
819  	undo->un_sp = sp;
820  
821  	if (pp->p_semacct == NULL)
822  		tree = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP);
823  
824  	*lock = ipc_lock(sem_svc, sp->sem_perm.ipc_id);
825  	if (IPC_FREE(&sp->sem_perm)) {
826  		kmem_free(undo, size);
827  		if (tree)
828  			kmem_free(tree, sizeof (avl_tree_t));
829  		return (EIDRM);
830  	}
831  
832  	mutex_enter(&pp->p_lock);
833  	if (tree) {
834  		if (pp->p_semacct == NULL) {
835  			avl_create(tree, sem_undo_compar,
836  			    sizeof (struct sem_undo),
837  			    offsetof(struct sem_undo, un_avl));
838  			pp->p_semacct = tree;
839  		} else {
840  			kmem_free(tree, sizeof (avl_tree_t));
841  		}
842  	}
843  
844  	if (*un = avl_find(pp->p_semacct, template, &where)) {
845  		mutex_exit(&pp->p_lock);
846  		kmem_free(undo, size);
847  	} else {
848  		*un = undo;
849  		avl_insert(pp->p_semacct, undo, where);
850  		mutex_exit(&pp->p_lock);
851  		list_insert_head(&sp->sem_undos, undo);
852  		ipc_hold(sem_svc, (kipc_perm_t *)sp);
853  	}
854  
855  
856  	return (0);
857  }
858  
859  /*
860   * semop - Semop system call.
861   */
862  static int
semop(int semid,struct sembuf * sops,size_t nsops,timespec_t * timeout)863  semop(int semid, struct sembuf *sops, size_t nsops, timespec_t *timeout)
864  {
865  	ksemid_t	*sp = NULL;
866  	kmutex_t	*lock;
867  	struct sembuf	*op;	/* ptr to operation */
868  	int		i;	/* loop control */
869  	struct sem	*semp;	/* ptr to semaphore */
870  	int 		error = 0;
871  	struct sembuf	*uops;	/* ptr to copy of user ops */
872  	struct sembuf 	x_sem;	/* avoid kmem_alloc's */
873  	timespec_t	now, ts, *tsp = NULL;
874  	int		timecheck = 0;
875  	int		cvres, needundo, mode;
876  	struct sem_undo	*undo;
877  	proc_t		*pp = curproc;
878  	int		held = 0;
879  
880  	CPU_STATS_ADDQ(CPU, sys, sema, 1); /* bump semaphore op count */
881  
882  	/*
883  	 * To avoid the cost of copying in 'timeout' in the common
884  	 * case, we could only grab the time here and defer the copyin
885  	 * and associated computations until we are about to block.
886  	 *
887  	 * The down side to this is that we would then have to spin
888  	 * some goto top nonsense to avoid the copyin behind the semid
889  	 * lock.  As a common use of timed semaphores is as an explicit
890  	 * blocking mechanism, this could incur a greater penalty.
891  	 *
892  	 * If we eventually decide that this would be a wise route to
893  	 * take, the deferrable functionality is completely contained
894  	 * in 'compute_timeout', and the interface is defined such that
895  	 * we can legally not validate 'timeout' if it is unused.
896  	 */
897  	if (timeout != NULL) {
898  		timecheck = timechanged;
899  		gethrestime(&now);
900  		if (error = compute_timeout(&tsp, &ts, &now, timeout))
901  			return (set_errno(error));
902  	}
903  
904  	/*
905  	 * Allocate space to hold the vector of semaphore ops.  If
906  	 * there is only 1 operation we use a preallocated buffer on
907  	 * the stack for speed.
908  	 *
909  	 * Since we don't want to allow the user to allocate an
910  	 * arbitrary amount of kernel memory, we need to check against
911  	 * the number of operations allowed by the semaphore.  We only
912  	 * bother doing this if the number of operations is larger than
913  	 * SEM_MAXUCOPS.
914  	 */
915  	if (nsops == 1)
916  		uops = &x_sem;
917  	else if (nsops == 0)
918  		return (0);
919  	else if (nsops <= SEM_MAXUCOPS)
920  		uops = kmem_alloc(nsops * sizeof (*uops), KM_SLEEP);
921  
922  	if (nsops > SEM_MAXUCOPS) {
923  		if ((lock = ipc_lookup(sem_svc, semid,
924  		    (kipc_perm_t **)&sp)) == NULL)
925  			return (set_errno(EFAULT));
926  
927  		if (nsops > sp->sem_maxops) {
928  			mutex_exit(lock);
929  			return (set_errno(E2BIG));
930  		}
931  		held = 1;
932  		ipc_hold(sem_svc, (kipc_perm_t *)sp);
933  		mutex_exit(lock);
934  
935  		uops = kmem_alloc(nsops * sizeof (*uops), KM_SLEEP);
936  		if (copyin(sops, uops, nsops * sizeof (*op))) {
937  			error = EFAULT;
938  			(void) ipc_lock(sem_svc, sp->sem_perm.ipc_id);
939  			goto semoperr;
940  		}
941  
942  		lock = ipc_lock(sem_svc, sp->sem_perm.ipc_id);
943  		if (IPC_FREE(&sp->sem_perm)) {
944  			error = EIDRM;
945  			goto semoperr;
946  		}
947  	} else {
948  		/*
949  		 * This could be interleaved with the above code, but
950  		 * keeping them separate improves readability.
951  		 */
952  		if (copyin(sops, uops, nsops * sizeof (*op))) {
953  			error = EFAULT;
954  			goto semoperr_unlocked;
955  		}
956  
957  		if ((lock = ipc_lookup(sem_svc, semid,
958  		    (kipc_perm_t **)&sp)) == NULL) {
959  			error = EINVAL;
960  			goto semoperr_unlocked;
961  		}
962  
963  		if (nsops > sp->sem_maxops) {
964  			error = E2BIG;
965  			goto semoperr;
966  		}
967  	}
968  
969  	/*
970  	 * Scan all operations.  Verify that sem #s are in range and
971  	 * this process is allowed the requested operations.  If any
972  	 * operations are marked SEM_UNDO, find (or allocate) the undo
973  	 * structure for this process and semaphore.
974  	 */
975  	needundo = 0;
976  	mode = 0;
977  	for (i = 0, op = uops; i++ < nsops; op++) {
978  		mode |= op->sem_op ? SEM_A : SEM_R;
979  		if (op->sem_num >= sp->sem_nsems) {
980  			error = EFBIG;
981  			goto semoperr;
982  		}
983  		if ((op->sem_flg & SEM_UNDO) && op->sem_op)
984  			needundo = 1;
985  	}
986  	if (error = ipcperm_access(&sp->sem_perm, mode, CRED()))
987  		goto semoperr;
988  
989  	if (needundo) {
990  		struct sem_undo template;
991  
992  		template.un_sp = sp;
993  		mutex_enter(&pp->p_lock);
994  		if (pp->p_semacct)
995  			undo = avl_find(pp->p_semacct, &template, NULL);
996  		else
997  			undo = NULL;
998  		mutex_exit(&pp->p_lock);
999  		if (undo == NULL) {
1000  			if (!held) {
1001  				held = 1;
1002  				ipc_hold(sem_svc, (kipc_perm_t *)sp);
1003  			}
1004  			if (error = sem_undo_alloc(pp, sp, &lock, &template,
1005  			    &undo))
1006  				goto semoperr;
1007  
1008  			/* sem_undo_alloc unlocks the semaphore */
1009  			if (error = ipcperm_access(&sp->sem_perm, mode, CRED()))
1010  				goto semoperr;
1011  		}
1012  	}
1013  
1014  check:
1015  	/*
1016  	 * Loop waiting for the operations to be satisfied atomically.
1017  	 * Actually, do the operations and undo them if a wait is needed
1018  	 * or an error is detected.
1019  	 */
1020  	for (i = 0; i < nsops; i++) {
1021  		op = &uops[i];
1022  		semp = &sp->sem_base[op->sem_num];
1023  
1024  		/*
1025  		 * Raise the semaphore (i.e. sema_v)
1026  		 */
1027  		if (op->sem_op > 0) {
1028  			if (op->sem_op + (int)semp->semval > USHRT_MAX ||
1029  			    ((op->sem_flg & SEM_UNDO) &&
1030  			    (error = sem_undo_add(op->sem_op, op->sem_num,
1031  			    undo)))) {
1032  				if (i)
1033  					sem_rollback(sp, uops, i, undo);
1034  				if (error == 0)
1035  					error = ERANGE;
1036  				goto semoperr;
1037  			}
1038  			semp->semval += op->sem_op;
1039  			/*
1040  			 * If we are only incrementing the semaphore value
1041  			 * by one on a binary semaphore, we can cv_signal.
1042  			 */
1043  			if (semp->semncnt) {
1044  				if (op->sem_op == 1 && sp->sem_binary)
1045  					cv_signal(&semp->semncnt_cv);
1046  				else
1047  					cv_broadcast(&semp->semncnt_cv);
1048  			}
1049  			if (semp->semzcnt && !semp->semval)
1050  				cv_broadcast(&semp->semzcnt_cv);
1051  			continue;
1052  		}
1053  
1054  		/*
1055  		 * Lower the semaphore (i.e. sema_p)
1056  		 */
1057  		if (op->sem_op < 0) {
1058  			if (semp->semval >= (unsigned)(-op->sem_op)) {
1059  				if ((op->sem_flg & SEM_UNDO) &&
1060  				    (error = sem_undo_add(op->sem_op,
1061  				    op->sem_num, undo))) {
1062  					if (i)
1063  						sem_rollback(sp, uops, i, undo);
1064  					goto semoperr;
1065  				}
1066  				semp->semval += op->sem_op;
1067  				if (semp->semzcnt && !semp->semval)
1068  					cv_broadcast(&semp->semzcnt_cv);
1069  				continue;
1070  			}
1071  			if (i)
1072  				sem_rollback(sp, uops, i, undo);
1073  			if (op->sem_flg & IPC_NOWAIT) {
1074  				error = EAGAIN;
1075  				goto semoperr;
1076  			}
1077  
1078  			/*
1079  			 * Mark the semaphore set as not a binary type
1080  			 * if we are decrementing the value by more than 1.
1081  			 *
1082  			 * V operations will resort to cv_broadcast
1083  			 * for this set because there are too many weird
1084  			 * cases that have to be caught.
1085  			 */
1086  			if (op->sem_op < -1)
1087  				sp->sem_binary = 0;
1088  			if (!held) {
1089  				held = 1;
1090  				ipc_hold(sem_svc, (kipc_perm_t *)sp);
1091  			}
1092  			semp->semncnt++;
1093  			cvres = cv_waituntil_sig(&semp->semncnt_cv, lock,
1094  			    tsp, timecheck);
1095  			lock = ipc_relock(sem_svc, sp->sem_perm.ipc_id, lock);
1096  
1097  			if (!IPC_FREE(&sp->sem_perm)) {
1098  				ASSERT(semp->semncnt != 0);
1099  				semp->semncnt--;
1100  				if (cvres > 0)	/* normal wakeup */
1101  					goto check;
1102  			}
1103  
1104  			/* EINTR or EAGAIN overrides EIDRM */
1105  			if (cvres == 0)
1106  				error = EINTR;
1107  			else if (cvres < 0)
1108  				error = EAGAIN;
1109  			else
1110  				error = EIDRM;
1111  			goto semoperr;
1112  		}
1113  
1114  		/*
1115  		 * Wait for zero value
1116  		 */
1117  		if (semp->semval) {
1118  			if (i)
1119  				sem_rollback(sp, uops, i, undo);
1120  			if (op->sem_flg & IPC_NOWAIT) {
1121  				error = EAGAIN;
1122  				goto semoperr;
1123  			}
1124  
1125  			if (!held) {
1126  				held = 1;
1127  				ipc_hold(sem_svc, (kipc_perm_t *)sp);
1128  			}
1129  			semp->semzcnt++;
1130  			cvres = cv_waituntil_sig(&semp->semzcnt_cv, lock,
1131  			    tsp, timecheck);
1132  			lock = ipc_relock(sem_svc, sp->sem_perm.ipc_id, lock);
1133  
1134  			/*
1135  			 * Don't touch semp if the semaphores have been removed.
1136  			 */
1137  			if (!IPC_FREE(&sp->sem_perm)) {
1138  				ASSERT(semp->semzcnt != 0);
1139  				semp->semzcnt--;
1140  				if (cvres > 0)	/* normal wakeup */
1141  					goto check;
1142  			}
1143  
1144  			/* EINTR or EAGAIN overrides EIDRM */
1145  			if (cvres == 0)
1146  				error = EINTR;
1147  			else if (cvres < 0)
1148  				error = EAGAIN;
1149  			else
1150  				error = EIDRM;
1151  			goto semoperr;
1152  		}
1153  	}
1154  
1155  	/* All operations succeeded.  Update sempid for accessed semaphores. */
1156  	for (i = 0, op = uops; i++ < nsops;
1157  	    sp->sem_base[(op++)->sem_num].sempid = pp->p_pid)
1158  		;
1159  	sp->sem_otime = gethrestime_sec();
1160  	if (held)
1161  		ipc_rele(sem_svc, (kipc_perm_t *)sp);
1162  	else
1163  		mutex_exit(lock);
1164  
1165  	/* Before leaving, deallocate the buffer that held the user semops */
1166  	if (nsops != 1)
1167  		kmem_free(uops, sizeof (*uops) * nsops);
1168  	return (0);
1169  
1170  	/*
1171  	 * Error return labels
1172  	 */
1173  semoperr:
1174  	if (held)
1175  		ipc_rele(sem_svc, (kipc_perm_t *)sp);
1176  	else
1177  		mutex_exit(lock);
1178  
1179  semoperr_unlocked:
1180  
1181  	/* Before leaving, deallocate the buffer that held the user semops */
1182  	if (nsops != 1)
1183  		kmem_free(uops, sizeof (*uops) * nsops);
1184  	return (set_errno(error));
1185  }
1186  
1187  /*
1188   * semsys - System entry point for semctl, semget, and semop system calls.
1189   */
1190  static int
semsys(int opcode,uintptr_t a1,uintptr_t a2,uintptr_t a3,uintptr_t a4)1191  semsys(int opcode, uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4)
1192  {
1193  	int error;
1194  
1195  	switch (opcode) {
1196  	case SEMCTL:
1197  		error = semctl((int)a1, (uint_t)a2, (int)a3, a4);
1198  		break;
1199  	case SEMGET:
1200  		error = semget((key_t)a1, (int)a2, (int)a3);
1201  		break;
1202  	case SEMOP:
1203  		error = semop((int)a1, (struct sembuf *)a2, (size_t)a3, 0);
1204  		break;
1205  	case SEMIDS:
1206  		error = semids((int *)a1, (uint_t)a2, (uint_t *)a3);
1207  		break;
1208  	case SEMTIMEDOP:
1209  		error = semop((int)a1, (struct sembuf *)a2, (size_t)a3,
1210  		    (timespec_t *)a4);
1211  		break;
1212  	default:
1213  		error = set_errno(EINVAL);
1214  		break;
1215  	}
1216  	return (error);
1217  }
1218