xref: /titanic_51/usr/src/uts/common/os/msg.c (revision 4b476ed547ce189d989c425cd2f82986abd37b4d)
1  /*
2   * CDDL HEADER START
3   *
4   * The contents of this file are subject to the terms of the
5   * Common Development and Distribution License (the "License").
6   * You may not use this file except in compliance with the License.
7   *
8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9   * or http://www.opensolaris.org/os/licensing.
10   * See the License for the specific language governing permissions
11   * and limitations under the License.
12   *
13   * When distributing Covered Code, include this CDDL HEADER in each
14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15   * If applicable, add the following below this CDDL HEADER, with the
16   * fields enclosed by brackets "[]" replaced with your own identifying
17   * information: Portions Copyright [yyyy] [name of copyright owner]
18   *
19   * CDDL HEADER END
20   */
21  /*
22   * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23   * Use is subject to license terms.
24   */
25  
26  /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27  /*	  All Rights Reserved  	*/
28  
29  
30  #pragma ident	"%Z%%M%	%I%	%E% SMI"
31  
32  /*
33   * Inter-Process Communication Message Facility.
34   *
35   * See os/ipc.c for a description of common IPC functionality.
36   *
37   * Resource controls
38   * -----------------
39   *
40   * Control:      zone.max-msg-ids (rc_zone_msgmni)
41   * Description:  Maximum number of message queue ids allowed a zone.
42   *
43   *   When msgget() is used to allocate a message queue, one id is
44   *   allocated.  If the id allocation doesn't succeed, msgget() fails
45   *   and errno is set to ENOSPC.  Upon successful msgctl(, IPC_RMID)
46   *   the id is deallocated.
47   *
48   * Control:      project.max-msg-ids (rc_project_msgmni)
49   * Description:  Maximum number of message queue ids allowed a project.
50   *
51   *   When msgget() is used to allocate a message queue, one id is
52   *   allocated.  If the id allocation doesn't succeed, msgget() fails
53   *   and errno is set to ENOSPC.  Upon successful msgctl(, IPC_RMID)
54   *   the id is deallocated.
55   *
56   * Control:      process.max-msg-qbytes (rc_process_msgmnb)
57   * Description:  Maximum number of bytes of messages on a message queue.
58   *
59   *   When msgget() successfully allocates a message queue, the minimum
60   *   enforced value of this limit is used to initialize msg_qbytes.
61   *
62   * Control:      process.max-msg-messages (rc_process_msgtql)
63   * Description:  Maximum number of messages on a message queue.
64   *
65   *   When msgget() successfully allocates a message queue, the minimum
66   *   enforced value of this limit is used to initialize a per-queue
67   *   limit on the number of messages.
68   */
69  
70  #include <sys/types.h>
71  #include <sys/t_lock.h>
72  #include <sys/param.h>
73  #include <sys/cred.h>
74  #include <sys/user.h>
75  #include <sys/proc.h>
76  #include <sys/time.h>
77  #include <sys/ipc.h>
78  #include <sys/ipc_impl.h>
79  #include <sys/msg.h>
80  #include <sys/msg_impl.h>
81  #include <sys/list.h>
82  #include <sys/systm.h>
83  #include <sys/sysmacros.h>
84  #include <sys/cpuvar.h>
85  #include <sys/kmem.h>
86  #include <sys/ddi.h>
87  #include <sys/errno.h>
88  #include <sys/cmn_err.h>
89  #include <sys/debug.h>
90  #include <sys/project.h>
91  #include <sys/modctl.h>
92  #include <sys/syscall.h>
93  #include <sys/policy.h>
94  #include <sys/zone.h>
95  
96  #include <c2/audit.h>
97  
98  /*
99   * The following tunables are obsolete.  Though for compatibility we
100   * still read and interpret msginfo_msgmnb, msginfo_msgmni, and
101   * msginfo_msgtql (see os/project.c and os/rctl_proc.c), the preferred
102   * mechanism for administrating the IPC Message facility is through the
103   * resource controls described at the top of this file.
104   */
105  size_t	msginfo_msgmax = 2048;	/* (obsolete) */
106  size_t	msginfo_msgmnb = 4096;	/* (obsolete) */
107  int	msginfo_msgmni = 50;	/* (obsolete) */
108  int	msginfo_msgtql = 40;	/* (obsolete) */
109  int	msginfo_msgssz = 8;	/* (obsolete) */
110  int	msginfo_msgmap = 0;	/* (obsolete) */
111  ushort_t msginfo_msgseg = 1024;	/* (obsolete) */
112  
113  extern rctl_hndl_t rc_zone_msgmni;
114  extern rctl_hndl_t rc_project_msgmni;
115  extern rctl_hndl_t rc_process_msgmnb;
116  extern rctl_hndl_t rc_process_msgtql;
117  static ipc_service_t *msq_svc;
118  static zone_key_t msg_zone_key;
119  
120  static void msg_dtor(kipc_perm_t *);
121  static void msg_rmid(kipc_perm_t *);
122  static void msg_remove_zone(zoneid_t, void *);
123  
124  /*
125   * Module linkage information for the kernel.
126   */
127  static ssize_t msgsys(int opcode, uintptr_t a0, uintptr_t a1, uintptr_t a2,
128  	uintptr_t a4, uintptr_t a5);
129  
130  static struct sysent ipcmsg_sysent = {
131  	6,
132  #ifdef	_LP64
133  	SE_ARGC | SE_NOUNLOAD | SE_64RVAL,
134  #else
135  	SE_ARGC | SE_NOUNLOAD | SE_32RVAL1,
136  #endif
137  	(int (*)())msgsys
138  };
139  
140  #ifdef	_SYSCALL32_IMPL
141  static ssize32_t msgsys32(int opcode, uint32_t a0, uint32_t a1, uint32_t a2,
142  	uint32_t a4, uint32_t a5);
143  
144  static struct sysent ipcmsg_sysent32 = {
145  	6,
146  	SE_ARGC | SE_NOUNLOAD | SE_32RVAL1,
147  	(int (*)())msgsys32
148  };
149  #endif	/* _SYSCALL32_IMPL */
150  
151  static struct modlsys modlsys = {
152  	&mod_syscallops, "System V message facility", &ipcmsg_sysent
153  };
154  
155  #ifdef _SYSCALL32_IMPL
156  static struct modlsys modlsys32 = {
157  	&mod_syscallops32, "32-bit System V message facility", &ipcmsg_sysent32
158  };
159  #endif
160  
161  static struct modlinkage modlinkage = {
162  	MODREV_1,
163  	&modlsys,
164  #ifdef _SYSCALL32_IMPL
165  	&modlsys32,
166  #endif
167  	NULL
168  };
169  
170  
171  int
172  _init(void)
173  {
174  	int result;
175  
176  	msq_svc = ipcs_create("msqids", rc_project_msgmni, rc_zone_msgmni,
177  	    sizeof (kmsqid_t), msg_dtor, msg_rmid, AT_IPC_MSG,
178  	    offsetof(ipc_rqty_t, ipcq_msgmni));
179  	zone_key_create(&msg_zone_key, NULL, msg_remove_zone, NULL);
180  
181  	if ((result = mod_install(&modlinkage)) == 0)
182  		return (0);
183  
184  	(void) zone_key_delete(msg_zone_key);
185  	ipcs_destroy(msq_svc);
186  
187  	return (result);
188  }
189  
190  int
191  _fini(void)
192  {
193  	return (EBUSY);
194  }
195  
196  int
197  _info(struct modinfo *modinfop)
198  {
199  	return (mod_info(&modlinkage, modinfop));
200  }
201  
202  static void
203  msg_dtor(kipc_perm_t *perm)
204  {
205  	kmsqid_t *qp = (kmsqid_t *)perm;
206  	int		ii;
207  
208  	for (ii = 0; ii < MAX_QNUM_CV; ii++)
209  		ASSERT(qp->msg_rcv_cnt[ii] == 0);
210  	ASSERT(qp->msg_snd_cnt == 0);
211  	ASSERT(qp->msg_cbytes == 0);
212  	list_destroy(&qp->msg_list);
213  }
214  
215  
216  #define	msg_hold(mp)	(mp)->msg_copycnt++
217  
218  /*
219   * msg_rele - decrement the reference count on the message.  When count
220   * reaches zero, free message header and contents.
221   */
222  static void
223  msg_rele(struct msg *mp)
224  {
225  	ASSERT(mp->msg_copycnt > 0);
226  	if (mp->msg_copycnt-- == 1) {
227  		if (mp->msg_addr)
228  			kmem_free(mp->msg_addr, mp->msg_size);
229  		kmem_free(mp, sizeof (struct msg));
230  	}
231  }
232  
233  /*
234   * msgunlink - Unlink msg from queue, decrement byte count and wake up anyone
235   * waiting for free bytes on queue.
236   *
237   * Called with queue locked.
238   */
239  static void
240  msgunlink(kmsqid_t *qp, struct msg *mp)
241  {
242  	list_remove(&qp->msg_list, mp);
243  	qp->msg_qnum--;
244  	qp->msg_cbytes -= mp->msg_size;
245  	msg_rele(mp);
246  
247  	/* Wake up waiting writers */
248  	if (qp->msg_snd_cnt)
249  		cv_broadcast(&qp->msg_snd_cv);
250  }
251  
252  static void
253  msg_rmid(kipc_perm_t *perm)
254  {
255  	kmsqid_t *qp = (kmsqid_t *)perm;
256  	struct msg *mp;
257  	int		ii;
258  
259  
260  	while ((mp = list_head(&qp->msg_list)) != NULL)
261  		msgunlink(qp, mp);
262  	ASSERT(qp->msg_cbytes == 0);
263  
264  	for (ii = 0; ii < MAX_QNUM_CV; ii++) {
265  		if (qp->msg_rcv_cnt[ii])
266  			cv_broadcast(&qp->msg_rcv_cv[ii]);
267  	}
268  	if (qp->msg_snd_cnt)
269  		cv_broadcast(&qp->msg_snd_cv);
270  }
271  
272  /*
273   * msgctl system call.
274   *
275   * gets q lock (via ipc_lookup), releases before return.
276   * may call users of msg_lock
277   */
278  static int
279  msgctl(int msgid, int cmd, void *arg)
280  {
281  	STRUCT_DECL(msqid_ds, ds);		/* SVR4 queue work area */
282  	kmsqid_t		*qp;		/* ptr to associated q */
283  	int			error, ii;
284  	struct	cred		*cr;
285  	model_t	mdl = get_udatamodel();
286  	struct msqid_ds64	ds64;
287  	kmutex_t		*lock;
288  	proc_t			*pp = curproc;
289  
290  	STRUCT_INIT(ds, mdl);
291  	cr = CRED();
292  
293  	/*
294  	 * Perform pre- or non-lookup actions (e.g. copyins, RMID).
295  	 */
296  	switch (cmd) {
297  	case IPC_SET:
298  		if (copyin(arg, STRUCT_BUF(ds), STRUCT_SIZE(ds)))
299  			return (set_errno(EFAULT));
300  		break;
301  
302  	case IPC_SET64:
303  		if (copyin(arg, &ds64, sizeof (struct msqid_ds64)))
304  			return (set_errno(EFAULT));
305  		break;
306  
307  	case IPC_RMID:
308  		if (error = ipc_rmid(msq_svc, msgid, cr))
309  			return (set_errno(error));
310  		return (0);
311  	}
312  
313  	/*
314  	 * get msqid_ds for this msgid
315  	 */
316  	if ((lock = ipc_lookup(msq_svc, msgid, (kipc_perm_t **)&qp)) == NULL)
317  		return (set_errno(EINVAL));
318  
319  	switch (cmd) {
320  	case IPC_SET:
321  		if (STRUCT_FGET(ds, msg_qbytes) > qp->msg_qbytes &&
322  		    secpolicy_ipc_config(cr) != 0) {
323  			mutex_exit(lock);
324  			return (set_errno(EPERM));
325  		}
326  		if (error = ipcperm_set(msq_svc, cr, &qp->msg_perm,
327  		    &STRUCT_BUF(ds)->msg_perm, mdl)) {
328  			mutex_exit(lock);
329  			return (set_errno(error));
330  		}
331  		qp->msg_qbytes = STRUCT_FGET(ds, msg_qbytes);
332  		qp->msg_ctime = gethrestime_sec();
333  		break;
334  
335  	case IPC_STAT:
336  		if (error = ipcperm_access(&qp->msg_perm, MSG_R, cr)) {
337  			mutex_exit(lock);
338  			return (set_errno(error));
339  		}
340  
341  		for (ii = 0; ii < MAX_QNUM_CV; ii++) {
342  			if (qp->msg_rcv_cnt[ii]) {
343  				qp->msg_perm.ipc_mode |= MSG_RWAIT;
344  				break;
345  			}
346  		}
347  		if (qp->msg_snd_cnt)
348  			qp->msg_perm.ipc_mode |= MSG_WWAIT;
349  		ipcperm_stat(&STRUCT_BUF(ds)->msg_perm, &qp->msg_perm, mdl);
350  		qp->msg_perm.ipc_mode &= ~(MSG_RWAIT|MSG_WWAIT);
351  		STRUCT_FSETP(ds, msg_first, NULL); 	/* kernel addr */
352  		STRUCT_FSETP(ds, msg_last, NULL);
353  		STRUCT_FSET(ds, msg_cbytes, qp->msg_cbytes);
354  		STRUCT_FSET(ds, msg_qnum, qp->msg_qnum);
355  		STRUCT_FSET(ds, msg_qbytes, qp->msg_qbytes);
356  		STRUCT_FSET(ds, msg_lspid, qp->msg_lspid);
357  		STRUCT_FSET(ds, msg_lrpid, qp->msg_lrpid);
358  		STRUCT_FSET(ds, msg_stime, qp->msg_stime);
359  		STRUCT_FSET(ds, msg_rtime, qp->msg_rtime);
360  		STRUCT_FSET(ds, msg_ctime, qp->msg_ctime);
361  		break;
362  
363  	case IPC_SET64:
364  		mutex_enter(&pp->p_lock);
365  		if ((ds64.msgx_qbytes > qp->msg_qbytes) &&
366  		    secpolicy_ipc_config(cr) != 0 &&
367  		    rctl_test(rc_process_msgmnb, pp->p_rctls, pp,
368  		    ds64.msgx_qbytes, RCA_SAFE) & RCT_DENY) {
369  			mutex_exit(&pp->p_lock);
370  			mutex_exit(lock);
371  			return (set_errno(EPERM));
372  		}
373  		mutex_exit(&pp->p_lock);
374  		if (error = ipcperm_set64(msq_svc, cr, &qp->msg_perm,
375  		    &ds64.msgx_perm)) {
376  			mutex_exit(lock);
377  			return (set_errno(error));
378  		}
379  		qp->msg_qbytes = ds64.msgx_qbytes;
380  		qp->msg_ctime = gethrestime_sec();
381  		break;
382  
383  	case IPC_STAT64:
384  		for (ii = 0; ii < MAX_QNUM_CV; ii++) {
385  			if (qp->msg_rcv_cnt[ii]) {
386  				qp->msg_perm.ipc_mode |= MSG_RWAIT;
387  				break;
388  			}
389  		}
390  		if (qp->msg_snd_cnt)
391  			qp->msg_perm.ipc_mode |= MSG_WWAIT;
392  		ipcperm_stat64(&ds64.msgx_perm, &qp->msg_perm);
393  		qp->msg_perm.ipc_mode &= ~(MSG_RWAIT|MSG_WWAIT);
394  		ds64.msgx_cbytes = qp->msg_cbytes;
395  		ds64.msgx_qnum = qp->msg_qnum;
396  		ds64.msgx_qbytes = qp->msg_qbytes;
397  		ds64.msgx_lspid = qp->msg_lspid;
398  		ds64.msgx_lrpid = qp->msg_lrpid;
399  		ds64.msgx_stime = qp->msg_stime;
400  		ds64.msgx_rtime = qp->msg_rtime;
401  		ds64.msgx_ctime = qp->msg_ctime;
402  		break;
403  
404  	default:
405  		mutex_exit(lock);
406  		return (set_errno(EINVAL));
407  	}
408  
409  	mutex_exit(lock);
410  
411  	/*
412  	 * Do copyout last (after releasing mutex).
413  	 */
414  	switch (cmd) {
415  	case IPC_STAT:
416  		if (copyout(STRUCT_BUF(ds), arg, STRUCT_SIZE(ds)))
417  			return (set_errno(EFAULT));
418  		break;
419  
420  	case IPC_STAT64:
421  		if (copyout(&ds64, arg, sizeof (struct msqid_ds64)))
422  			return (set_errno(EFAULT));
423  		break;
424  	}
425  
426  	return (0);
427  }
428  
429  /*
430   * Remove all message queues associated with a given zone.  Called by
431   * zone_shutdown when the zone is halted.
432   */
433  /*ARGSUSED1*/
434  static void
435  msg_remove_zone(zoneid_t zoneid, void *arg)
436  {
437  	ipc_remove_zone(msq_svc, zoneid);
438  }
439  
440  /*
441   * msgget system call.
442   */
443  static int
444  msgget(key_t key, int msgflg)
445  {
446  	kmsqid_t	*qp;
447  	kmutex_t	*lock;
448  	int		id, error;
449  	int		ii;
450  	proc_t		*pp = curproc;
451  
452  top:
453  	if (error = ipc_get(msq_svc, key, msgflg, (kipc_perm_t **)&qp, &lock))
454  		return (set_errno(error));
455  
456  	if (IPC_FREE(&qp->msg_perm)) {
457  		mutex_exit(lock);
458  		mutex_exit(&pp->p_lock);
459  
460  		list_create(&qp->msg_list, sizeof (struct msg),
461  		    offsetof(struct msg, msg_node));
462  		qp->msg_qnum = 0;
463  		qp->msg_lspid = qp->msg_lrpid = 0;
464  		qp->msg_stime = qp->msg_rtime = 0;
465  		qp->msg_ctime = gethrestime_sec();
466  		for (ii = 0; ii < MAX_QNUM_CV; ii++)
467  			qp->msg_rcv_cnt[ii] = 0;
468  		qp->msg_snd_cnt = 0;
469  
470  		if (error = ipc_commit_begin(msq_svc, key, msgflg,
471  		    (kipc_perm_t *)qp)) {
472  			if (error == EAGAIN)
473  				goto top;
474  			return (set_errno(error));
475  		}
476  		qp->msg_qbytes = rctl_enforced_value(rc_process_msgmnb,
477  		    pp->p_rctls, pp);
478  		qp->msg_qmax = rctl_enforced_value(rc_process_msgtql,
479  		    pp->p_rctls, pp);
480  		lock = ipc_commit_end(msq_svc, &qp->msg_perm);
481  	}
482  #ifdef C2_AUDIT
483  	if (audit_active)
484  		audit_ipcget(AT_IPC_MSG, (void *)qp);
485  #endif
486  	id = qp->msg_perm.ipc_id;
487  	mutex_exit(lock);
488  	return (id);
489  }
490  
491  /*
492   * msgrcv system call.
493   */
494  static ssize_t
495  msgrcv(int msqid, struct ipcmsgbuf *msgp, size_t msgsz, long msgtyp, int msgflg)
496  {
497  	struct msg	*mp;	/* ptr to msg on q */
498  	struct msg	*smp;	/* ptr to best msg on q */
499  	kmsqid_t	*qp;	/* ptr to associated q */
500  	kmutex_t	*lock;
501  	size_t		xtsz;	/* transfer byte count */
502  	int		error = 0, copyerror = 0;
503  	int		cvres;
504  	STRUCT_HANDLE(ipcmsgbuf, umsgp);
505  	model_t		mdl = get_udatamodel();
506  
507  	CPU_STATS_ADDQ(CPU, sys, msg, 1);	/* bump msg send/rcv count */
508  	STRUCT_SET_HANDLE(umsgp, mdl, msgp);
509  
510  	if ((lock = ipc_lookup(msq_svc, msqid, (kipc_perm_t **)&qp)) == NULL)
511  		return ((ssize_t)set_errno(EINVAL));
512  	ipc_hold(msq_svc, (kipc_perm_t *)qp);
513  
514  	if (error = ipcperm_access(&qp->msg_perm, MSG_R, CRED()))
515  		goto msgrcv_out;
516  
517  findmsg:
518  	smp = NULL;
519  	mp = list_head(&qp->msg_list);
520  	if (msgtyp == 0) {
521  		smp = mp;
522  	} else {
523  		for (; mp; mp = list_next(&qp->msg_list, mp)) {
524  			if (msgtyp > 0) {
525  				if (msgtyp != mp->msg_type)
526  					continue;
527  				smp = mp;
528  				break;
529  			}
530  			if (mp->msg_type <= -msgtyp) {
531  				if (smp && smp->msg_type <= mp->msg_type)
532  					continue;
533  				smp = mp;
534  			}
535  		}
536  	}
537  
538  	if (smp) {
539  		/*
540  		 * Message found.
541  		 */
542  		if ((smp->msg_flags & MSG_RCVCOPY) == 0) {
543  			/*
544  			 * No one else is copying this message. Copy it.
545  			 */
546  			if (msgsz < smp->msg_size) {
547  				if ((msgflg & MSG_NOERROR) == 0) {
548  					error = E2BIG;
549  					goto msgrcv_out;
550  				} else {
551  					xtsz = msgsz;
552  				}
553  			} else {
554  				xtsz = smp->msg_size;
555  			}
556  
557  			/*
558  			 * Mark message as being copied out. Release mutex
559  			 * while copying out.
560  			 */
561  			ASSERT((smp->msg_flags & MSG_RCVCOPY) == 0);
562  			smp->msg_flags |= MSG_RCVCOPY;
563  			msg_hold(smp);
564  			mutex_exit(lock);
565  
566  			if (mdl == DATAMODEL_NATIVE) {
567  				copyerror = copyout(&smp->msg_type, msgp,
568  				    sizeof (smp->msg_type));
569  			} else {
570  				/*
571  				 * 32-bit callers need an imploded msg type.
572  				 */
573  				int32_t	msg_type32 = smp->msg_type;
574  
575  				copyerror = copyout(&msg_type32, msgp,
576  				    sizeof (msg_type32));
577  			}
578  
579  			if (copyerror == 0 && xtsz)
580  				copyerror = copyout(smp->msg_addr,
581  				    STRUCT_FADDR(umsgp, mtext), xtsz);
582  
583  			/*
584  			 * Reclaim mutex, make sure queue still exists,
585  			 * and remove message.
586  			 */
587  			lock = ipc_lock(msq_svc, qp->msg_perm.ipc_id);
588  			ASSERT(smp->msg_flags & MSG_RCVCOPY);
589  			smp->msg_flags &= ~MSG_RCVCOPY;
590  			msg_rele(smp);
591  
592  			if (IPC_FREE(&qp->msg_perm)) {
593  				error = EIDRM;
594  				goto msgrcv_out;
595  			}
596  			/*
597  			 * MSG_RCVCOPY was set while we dropped and reaquired
598  			 * the lock. A thread looking for same message type
599  			 * might have entered during that interval and seeing
600  			 * MSG_RCVCOPY set, would have landed up in the sleepq.
601  			 */
602  			cv_broadcast(&qp->msg_rcv_cv[MSG_QNUM(smp->msg_type)]);
603  			cv_broadcast(&qp->msg_rcv_cv[0]);
604  
605  			if (copyerror) {
606  				error = EFAULT;
607  				goto msgrcv_out;
608  			}
609  			qp->msg_lrpid = ttoproc(curthread)->p_pid;
610  			qp->msg_rtime = gethrestime_sec();
611  			msgunlink(qp, smp);
612  			goto msgrcv_out;
613  		}
614  
615  	} else {
616  		/*
617  		 * No message found.
618  		 */
619  		if (msgflg & IPC_NOWAIT) {
620  			error = ENOMSG;
621  			goto msgrcv_out;
622  		}
623  	}
624  
625  	/* Wait for new message */
626  	qp->msg_rcv_cnt[MSG_QNUM(msgtyp)]++;
627  	cvres = cv_wait_sig(&qp->msg_rcv_cv[MSG_QNUM(msgtyp)], lock);
628  	lock = ipc_relock(msq_svc, qp->msg_perm.ipc_id, lock);
629  	qp->msg_rcv_cnt[MSG_QNUM(msgtyp)]--;
630  
631  	if (IPC_FREE(&qp->msg_perm)) {
632  		error = EIDRM;
633  		goto msgrcv_out;
634  	}
635  	if (cvres == 0) {
636  		error = EINTR;
637  		goto msgrcv_out;
638  	}
639  
640  	goto findmsg;
641  
642  msgrcv_out:
643  	ipc_rele(msq_svc, (kipc_perm_t *)qp);
644  	if (error)
645  		return ((ssize_t)set_errno(error));
646  	return ((ssize_t)xtsz);
647  }
648  
649  /*
650   * msgids system call.
651   */
652  static int
653  msgids(int *buf, uint_t nids, uint_t *pnids)
654  {
655  	int error;
656  
657  	if (error = ipc_ids(msq_svc, buf, nids, pnids))
658  		return (set_errno(error));
659  
660  	return (0);
661  }
662  
663  #define	RND(x)		roundup((x), sizeof (size_t))
664  #define	RND32(x)	roundup((x), sizeof (size32_t))
665  
666  /*
667   * msgsnap system call.
668   */
669  static int
670  msgsnap(int msqid, caddr_t buf, size_t bufsz, long msgtyp)
671  {
672  	struct msg	*mp;	/* ptr to msg on q */
673  	kmsqid_t	*qp;	/* ptr to associated q */
674  	kmutex_t	*lock;
675  	size_t		size;
676  	size_t		nmsg;
677  	struct msg	**snaplist;
678  	int		error, i;
679  	model_t		mdl = get_udatamodel();
680  	STRUCT_DECL(msgsnap_head, head);
681  	STRUCT_DECL(msgsnap_mhead, mhead);
682  
683  	STRUCT_INIT(head, mdl);
684  	STRUCT_INIT(mhead, mdl);
685  
686  	if (bufsz < STRUCT_SIZE(head))
687  		return (set_errno(EINVAL));
688  
689  	if ((lock = ipc_lookup(msq_svc, msqid, (kipc_perm_t **)&qp)) == NULL)
690  		return (set_errno(EINVAL));
691  
692  	if (error = ipcperm_access(&qp->msg_perm, MSG_R, CRED())) {
693  		mutex_exit(lock);
694  		return (set_errno(error));
695  	}
696  	ipc_hold(msq_svc, (kipc_perm_t *)qp);
697  
698  	/*
699  	 * First compute the required buffer size and
700  	 * the number of messages on the queue.
701  	 */
702  	size = nmsg = 0;
703  	for (mp = list_head(&qp->msg_list); mp;
704  	    mp = list_next(&qp->msg_list, mp)) {
705  		if (msgtyp == 0 ||
706  		    (msgtyp > 0 && msgtyp == mp->msg_type) ||
707  		    (msgtyp < 0 && mp->msg_type <= -msgtyp)) {
708  			nmsg++;
709  			if (mdl == DATAMODEL_NATIVE)
710  				size += RND(mp->msg_size);
711  			else
712  				size += RND32(mp->msg_size);
713  		}
714  	}
715  
716  	size += STRUCT_SIZE(head) + nmsg * STRUCT_SIZE(mhead);
717  	if (size > bufsz)
718  		nmsg = 0;
719  
720  	if (nmsg > 0) {
721  		/*
722  		 * Mark the messages as being copied.
723  		 */
724  		snaplist = (struct msg **)kmem_alloc(nmsg *
725  		    sizeof (struct msg *), KM_SLEEP);
726  		i = 0;
727  		for (mp = list_head(&qp->msg_list); mp;
728  		    mp = list_next(&qp->msg_list, mp)) {
729  			if (msgtyp == 0 ||
730  			    (msgtyp > 0 && msgtyp == mp->msg_type) ||
731  			    (msgtyp < 0 && mp->msg_type <= -msgtyp)) {
732  				msg_hold(mp);
733  				snaplist[i] = mp;
734  				i++;
735  			}
736  		}
737  	}
738  	mutex_exit(lock);
739  
740  	/*
741  	 * Copy out the buffer header.
742  	 */
743  	STRUCT_FSET(head, msgsnap_size, size);
744  	STRUCT_FSET(head, msgsnap_nmsg, nmsg);
745  	if (copyout(STRUCT_BUF(head), buf, STRUCT_SIZE(head)))
746  		error = EFAULT;
747  
748  	buf += STRUCT_SIZE(head);
749  
750  	/*
751  	 * Now copy out the messages one by one.
752  	 */
753  	for (i = 0; i < nmsg; i++) {
754  		mp = snaplist[i];
755  		if (error == 0) {
756  			STRUCT_FSET(mhead, msgsnap_mlen, mp->msg_size);
757  			STRUCT_FSET(mhead, msgsnap_mtype, mp->msg_type);
758  			if (copyout(STRUCT_BUF(mhead), buf, STRUCT_SIZE(mhead)))
759  				error = EFAULT;
760  			buf += STRUCT_SIZE(mhead);
761  
762  			if (error == 0 &&
763  			    mp->msg_size != 0 &&
764  			    copyout(mp->msg_addr, buf, mp->msg_size))
765  				error = EFAULT;
766  			if (mdl == DATAMODEL_NATIVE)
767  				buf += RND(mp->msg_size);
768  			else
769  				buf += RND32(mp->msg_size);
770  		}
771  		lock = ipc_lock(msq_svc, qp->msg_perm.ipc_id);
772  		msg_rele(mp);
773  		/* Check for msg q deleted or reallocated */
774  		if (IPC_FREE(&qp->msg_perm))
775  			error = EIDRM;
776  		mutex_exit(lock);
777  	}
778  
779  	(void) ipc_lock(msq_svc, qp->msg_perm.ipc_id);
780  	ipc_rele(msq_svc, (kipc_perm_t *)qp);
781  
782  	if (nmsg > 0)
783  		kmem_free(snaplist, nmsg * sizeof (struct msg *));
784  
785  	if (error)
786  		return (set_errno(error));
787  	return (0);
788  }
789  
790  #define	MSG_PREALLOC_LIMIT 8192
791  
792  /*
793   * msgsnd system call.
794   */
795  static int
796  msgsnd(int msqid, struct ipcmsgbuf *msgp, size_t msgsz, int msgflg)
797  {
798  	kmsqid_t	*qp;
799  	kmutex_t	*lock = NULL;
800  	struct msg	*mp = NULL;
801  	long		type;
802  	int		error = 0;
803  	model_t		mdl = get_udatamodel();
804  	STRUCT_HANDLE(ipcmsgbuf, umsgp);
805  
806  	CPU_STATS_ADDQ(CPU, sys, msg, 1);	/* bump msg send/rcv count */
807  	STRUCT_SET_HANDLE(umsgp, mdl, msgp);
808  
809  	if (mdl == DATAMODEL_NATIVE) {
810  		if (copyin(msgp, &type, sizeof (type)))
811  			return (set_errno(EFAULT));
812  	} else {
813  		int32_t	type32;
814  		if (copyin(msgp, &type32, sizeof (type32)))
815  			return (set_errno(EFAULT));
816  		type = type32;
817  	}
818  
819  	if (type < 1)
820  		return (set_errno(EINVAL));
821  
822  	/*
823  	 * We want the value here large enough that most of the
824  	 * the message operations will use the "lockless" path,
825  	 * but small enough that a user can not reserve large
826  	 * chunks of kernel memory unless they have a valid
827  	 * reason to.
828  	 */
829  	if (msgsz <= MSG_PREALLOC_LIMIT) {
830  		/*
831  		 * We are small enough that we can afford to do the
832  		 * allocation now.  This saves dropping the lock
833  		 * and then reacquiring the lock.
834  		 */
835  		mp = kmem_zalloc(sizeof (struct msg), KM_SLEEP);
836  		mp->msg_copycnt = 1;
837  		mp->msg_size = msgsz;
838  		if (msgsz) {
839  			mp->msg_addr = kmem_alloc(msgsz, KM_SLEEP);
840  			if (copyin(STRUCT_FADDR(umsgp, mtext),
841  			    mp->msg_addr, msgsz) == -1) {
842  				error = EFAULT;
843  				goto msgsnd_out;
844  			}
845  		}
846  	}
847  
848  	if ((lock = ipc_lookup(msq_svc, msqid, (kipc_perm_t **)&qp)) == NULL) {
849  		error = EINVAL;
850  		goto msgsnd_out;
851  	}
852  
853  	ipc_hold(msq_svc, (kipc_perm_t *)qp);
854  
855  	if (msgsz > qp->msg_qbytes) {
856  		error = EINVAL;
857  		goto msgsnd_out;
858  	}
859  
860  	if (error = ipcperm_access(&qp->msg_perm, MSG_W, CRED()))
861  		goto msgsnd_out;
862  
863  top:
864  	/*
865  	 * Allocate space on q, message header, & buffer space.
866  	 */
867  	ASSERT(qp->msg_qnum <= qp->msg_qmax);
868  	while ((msgsz > qp->msg_qbytes - qp->msg_cbytes) ||
869  	    (qp->msg_qnum == qp->msg_qmax)) {
870  		int cvres;
871  
872  		if (msgflg & IPC_NOWAIT) {
873  			error = EAGAIN;
874  			goto msgsnd_out;
875  		}
876  
877  		qp->msg_snd_cnt++;
878  		cvres = cv_wait_sig(&qp->msg_snd_cv, lock);
879  		lock = ipc_relock(msq_svc, qp->msg_perm.ipc_id, lock);
880  		qp->msg_snd_cnt--;
881  
882  		if (IPC_FREE(&qp->msg_perm)) {
883  			error = EIDRM;
884  			goto msgsnd_out;
885  		}
886  
887  		if (cvres == 0) {
888  			error = EINTR;
889  			goto msgsnd_out;
890  		}
891  	}
892  
893  	if (mp == NULL) {
894  		int failure;
895  
896  		mutex_exit(lock);
897  		ASSERT(msgsz > 0);
898  		mp = kmem_zalloc(sizeof (struct msg), KM_SLEEP);
899  		mp->msg_addr = kmem_alloc(msgsz, KM_SLEEP);
900  		mp->msg_size = msgsz;
901  		mp->msg_copycnt = 1;
902  
903  		failure = (copyin(STRUCT_FADDR(umsgp, mtext),
904  		    mp->msg_addr, msgsz) == -1);
905  		lock = ipc_lock(msq_svc, qp->msg_perm.ipc_id);
906  		if (IPC_FREE(&qp->msg_perm)) {
907  			error = EIDRM;
908  			goto msgsnd_out;
909  		}
910  		if (failure) {
911  			error = EFAULT;
912  			goto msgsnd_out;
913  		}
914  		goto top;
915  	}
916  
917  	/*
918  	 * Everything is available, put msg on q.
919  	 */
920  	qp->msg_qnum++;
921  	qp->msg_cbytes += msgsz;
922  	qp->msg_lspid = curproc->p_pid;
923  	qp->msg_stime = gethrestime_sec();
924  	mp->msg_type = type;
925  	mp->msg_flags = 0;
926  	list_insert_tail(&qp->msg_list, mp);
927  	/*
928  	 * For all message type >= 1.
929  	 */
930  	if (qp->msg_rcv_cnt[MSG_QNUM(type)])
931  		cv_broadcast(&qp->msg_rcv_cv[MSG_QNUM(type)]);
932  	/*
933  	 * For all message type < 1.
934  	 */
935  	if (qp->msg_rcv_cnt[0])
936  		cv_broadcast(&qp->msg_rcv_cv[0]);
937  
938  msgsnd_out:
939  	if (lock)
940  		ipc_rele(msq_svc, (kipc_perm_t *)qp);	/* drops lock */
941  
942  	if (error) {
943  		if (mp)
944  			msg_rele(mp);
945  		return (set_errno(error));
946  	}
947  
948  	return (0);
949  }
950  
951  /*
952   * msgsys - System entry point for msgctl, msgget, msgrcv, and msgsnd
953   * system calls.
954   */
955  static ssize_t
956  msgsys(int opcode, uintptr_t a1, uintptr_t a2, uintptr_t a3,
957  	uintptr_t a4, uintptr_t a5)
958  {
959  	ssize_t error;
960  
961  	switch (opcode) {
962  	case MSGGET:
963  		error = msgget((key_t)a1, (int)a2);
964  		break;
965  	case MSGCTL:
966  		error = msgctl((int)a1, (int)a2, (void *)a3);
967  		break;
968  	case MSGRCV:
969  		error = msgrcv((int)a1, (struct ipcmsgbuf *)a2,
970  		    (size_t)a3, (long)a4, (int)a5);
971  		break;
972  	case MSGSND:
973  		error = msgsnd((int)a1, (struct ipcmsgbuf *)a2,
974  		    (size_t)a3, (int)a4);
975  		break;
976  	case MSGIDS:
977  		error = msgids((int *)a1, (uint_t)a2, (uint_t *)a3);
978  		break;
979  	case MSGSNAP:
980  		error = msgsnap((int)a1, (caddr_t)a2, (size_t)a3, (long)a4);
981  		break;
982  	default:
983  		error = set_errno(EINVAL);
984  		break;
985  	}
986  
987  	return (error);
988  }
989  
990  #ifdef	_SYSCALL32_IMPL
991  /*
992   * msgsys32 - System entry point for msgctl, msgget, msgrcv, and msgsnd
993   * system calls for 32-bit callers on LP64 kernel.
994   */
995  static ssize32_t
996  msgsys32(int opcode, uint32_t a1, uint32_t a2, uint32_t a3,
997  	uint32_t a4, uint32_t a5)
998  {
999  	ssize_t error;
1000  
1001  	switch (opcode) {
1002  	case MSGGET:
1003  		error = msgget((key_t)a1, (int)a2);
1004  		break;
1005  	case MSGCTL:
1006  		error = msgctl((int)a1, (int)a2, (void *)(uintptr_t)a3);
1007  		break;
1008  	case MSGRCV:
1009  		error = msgrcv((int)a1, (struct ipcmsgbuf *)(uintptr_t)a2,
1010  		    (size_t)a3, (long)(int32_t)a4, (int)a5);
1011  		break;
1012  	case MSGSND:
1013  		error = msgsnd((int)a1, (struct ipcmsgbuf *)(uintptr_t)a2,
1014  		    (size_t)(int32_t)a3, (int)a4);
1015  		break;
1016  	case MSGIDS:
1017  		error = msgids((int *)(uintptr_t)a1, (uint_t)a2,
1018  		    (uint_t *)(uintptr_t)a3);
1019  		break;
1020  	case MSGSNAP:
1021  		error = msgsnap((int)a1, (caddr_t)(uintptr_t)a2, (size_t)a3,
1022  		    (long)(int32_t)a4);
1023  		break;
1024  	default:
1025  		error = set_errno(EINVAL);
1026  		break;
1027  	}
1028  
1029  	return (error);
1030  }
1031  #endif	/* SYSCALL32_IMPL */
1032