xref: /illumos-gate/usr/src/uts/common/sys/user.h (revision c432de9c6e1189ea0aa9b0fe1c35c18427653f27)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved	*/
28 /*
29  * Copyright (c) 2018, Joyent, Inc.
30  */
31 
32 
33 #ifndef _SYS_USER_H
34 #define	_SYS_USER_H
35 
36 #include <sys/types.h>
37 #include <sys/signal.h>
38 
39 #ifdef	__cplusplus
40 extern "C" {
41 #endif
42 
43 /*
44  * struct exdata is visible in and out of the kernel. This is because it
45  * is referenced in <sys/core.h> which doesn't have this kind of magic.
46  */
47 struct exdata {
48 	struct vnode	*vp;
49 	size_t	ux_tsize;	/* text size */
50 	size_t	ux_dsize;	/* data size */
51 	size_t	ux_bsize;	/* bss size */
52 	size_t	ux_lsize;	/* lib size */
53 	long	ux_nshlibs;	/* number of shared libs needed */
54 	short	ux_mach;	/* machine type */
55 	short	ux_mag;		/* magic number MUST be here */
56 	off_t	ux_toffset;	/* file offset to raw text */
57 	off_t	ux_doffset;	/* file offset to raw data */
58 	off_t	ux_loffset;	/* file offset to lib sctn */
59 	caddr_t	ux_txtorg;	/* start addr of text in mem */
60 	caddr_t	ux_datorg;	/* start addr of data in mem */
61 	caddr_t	ux_entloc;	/* entry location */
62 };
63 
64 #ifdef	__cplusplus
65 }
66 #endif
67 
68 #if defined(_KERNEL) || defined(_KMEMUSER)
69 
70 #include <sys/param.h>
71 #include <sys/pcb.h>
72 #include <sys/siginfo.h>
73 #include <sys/resource.h>
74 #include <sys/time.h>
75 #include <sys/auxv.h>
76 #include <sys/errno.h>
77 #include <sys/t_lock.h>
78 #include <sys/refstr.h>
79 
80 #ifdef	__cplusplus
81 extern "C" {
82 #endif
83 
84 /*
85  * File Descriptor assignment generation.
86  *
87  * Certain file descriptor consumers (namely epoll) need to be able to detect
88  * when the resource underlying an fd change due to (re)assignment.  Checks
89  * comparing old and new file_t pointers work OK, but could easily be fooled by
90  * an entry freed-to and reused-from the cache.  To better detect such
91  * assingments, a generation number is kept in the uf_entry.  Whenever a
92  * non-NULL file_t is assigned to the entry, the generation is incremented,
93  * indicating the change.  There is a minute possibility that a rollover of the
94  * value could cause assigments to evade detection by consumers, but it is
95  * considered acceptably small.
96  */
97 typedef uint_t uf_entry_gen_t;
98 
99 /*
100  * Entry in the per-process list of open files.
101  * Note: only certain fields are copied in flist_grow() and flist_fork().
102  * This is indicated in brackets in the structure member comments.
103  */
104 typedef struct uf_entry {
105 	kmutex_t	uf_lock;	/* per-fd lock [never copied] */
106 	struct file	*uf_file;	/* file pointer [grow, fork] */
107 	struct fpollinfo *uf_fpollinfo;	/* poll state [grow] */
108 	int		uf_refcnt;	/* LWPs accessing this file [grow] */
109 	int		uf_alloc;	/* right subtree allocs [grow, fork] */
110 	short		uf_flag;	/* fcntl F_GETFD flags [grow, fork] */
111 	short		uf_busy;	/* file is allocated [grow, fork] */
112 	kcondvar_t	uf_wanted_cv;	/* waiting for setf() [never copied] */
113 	kcondvar_t	uf_closing_cv;	/* waiting for close() [never copied] */
114 	struct portfd 	*uf_portfd;	/* associated with port [grow] */
115 	uf_entry_gen_t	uf_gen;		/* assigned fd generation [grow,fork] */
116 	/* Avoid false sharing - pad to coherency granularity (64 bytes) */
117 	char		uf_pad[64 - sizeof (kmutex_t) - 2 * sizeof (void*) -
118 		2 * sizeof (int) - 2 * sizeof (short) -
119 		2 * sizeof (kcondvar_t) - sizeof (struct portfd *) -
120 		sizeof (uf_entry_gen_t)];
121 } uf_entry_t;
122 
123 /*
124  * Retired file lists -- see flist_grow() for details.
125  */
126 typedef struct uf_rlist {
127 	struct uf_rlist	*ur_next;
128 	uf_entry_t	*ur_list;
129 	int		ur_nfiles;
130 } uf_rlist_t;
131 
132 /*
133  * Per-process file information.
134  */
135 typedef struct uf_info {
136 	kmutex_t	fi_lock;	/* see below */
137 	int		fi_badfd;	/* bad file descriptor # */
138 	int		fi_action;	/* action to take on bad fd use */
139 	int		fi_nfiles;	/* number of entries in fi_list[] */
140 	uf_entry_t *volatile fi_list;	/* current file list */
141 	uf_rlist_t	*fi_rlist;	/* retired file lists */
142 } uf_info_t;
143 
144 /*
145  * File list locking.
146  *
147  * Each process has a list of open files, fi_list, indexed by fd.
148  * fi_list is an array of uf_entry_t structures, each with its own lock.
149  * One might think that the correct way to lock a file descriptor would be:
150  *
151  *	ufp = fip->fi_list[fd];
152  *	mutex_enter(&ufp->uf_lock);
153  *
154  * However, that construct is only safe if fi_lock is already held.  If not,
155  * fi_list can change in the window between loading ufp and entering uf_lock.
156  * The UF_ENTER() macro deals with this possibility.  UF_ENTER(ufp, fip, fd)
157  * locks fd and sets ufp to fd's uf_entry.  The locking rules are as follows:
158  *
159  * (1) fi_lock protects fi_list and fi_nfiles.  It also protects the
160  *     uf_alloc and uf_busy fields of every fd's ufp; see fd_find() for
161  *     details on file descriptor allocation.
162  *
163  * (2) UF_ENTER(ufp, fip, fd) locks descriptor fd and sets ufp to point
164  *     to the uf_entry_t for fd.  UF_ENTER() protects all fields in ufp
165  *     except uf_alloc and uf_busy.  UF_ENTER(ufp, fip, fd) also prevents
166  *     ufp->uf_alloc, ufp->uf_busy, fip->fi_list and fip->fi_nfiles from
167  *     changing.
168  *
169  * (3) The lock ordering is (1), (2).
170  *
171  * (4) Note that fip->fi_list and fip->fi_nfiles cannot change while *any*
172  *     file list lock is held.  Thus flist_grow() must acquire all such
173  *     locks -- fi_lock and every fd's uf_lock -- to install a new file list.
174  */
175 #define	UF_ENTER(ufp, fip, fd)					\
176 	for (;;) {						\
177 		uf_entry_t *_flist = (fip)->fi_list;		\
178 		ufp = &_flist[fd];				\
179 		ASSERT((fd) < (fip)->fi_nfiles);		\
180 		mutex_enter(&ufp->uf_lock);			\
181 		if (_flist == (fip)->fi_list)			\
182 			break;					\
183 		mutex_exit(&ufp->uf_lock);			\
184 	}
185 
186 #define	UF_EXIT(ufp)	mutex_exit(&ufp->uf_lock)
187 
188 #define	PSARGSZ		80	/* Space for exec arguments (used by ps(1)) */
189 #define	MAXCOMLEN	16	/* <= MAXNAMLEN, >= sizeof (ac_comm) */
190 
191 typedef struct {		/* kernel syscall set type */
192 	uint_t	word[9];	/* space for syscall numbers [1..288] */
193 } k_sysset_t;
194 
195 /*
196  * __KERN_NAUXV_IMPL is defined as a convenience sizing mechanism
197  * for the portions of the kernel that care about aux vectors.
198  *
199  * Applications that need to know how many aux vectors the kernel
200  * supplies should use the proc(4) interface to read /proc/PID/auxv.
201  *
202  * This value should not be changed in a patch.
203  */
204 #if defined(__sparc)
205 #define	__KERN_NAUXV_IMPL 20
206 #elif defined(__i386) || defined(__amd64)
207 #define	__KERN_NAUXV_IMPL 25
208 #endif
209 
210 struct execsw;
211 
212 /*
213  * The user structure; one allocated per process.  Contains all the
214  * per-process data that doesn't need to be referenced while the
215  * process is swapped.
216  */
217 typedef	struct	user {
218 	/*
219 	 * These fields are initialized at process creation time and never
220 	 * modified.  They can be accessed without acquiring locks.
221 	 */
222 	struct execsw *u_execsw;	/* pointer to exec switch entry */
223 	auxv_t  u_auxv[__KERN_NAUXV_IMPL]; /* aux vector from exec */
224 	timestruc_t u_start;		/* hrestime at process start */
225 	clock_t	u_ticks;		/* lbolt at process start */
226 	char	u_comm[MAXCOMLEN + 1];	/* executable file name from exec */
227 	char	u_psargs[PSARGSZ];	/* arguments from exec */
228 	int	u_argc;			/* value of argc passed to main() */
229 	uintptr_t u_argv;		/* value of argv passed to main() */
230 	uintptr_t u_envp;		/* value of envp passed to main() */
231 	uintptr_t u_commpagep;		/* address of mapped comm page */
232 
233 	/*
234 	 * These fields are protected by p_lock:
235 	 */
236 	struct vnode *u_cdir;		/* current directory */
237 	struct vnode *u_rdir;		/* root directory */
238 	uint64_t u_mem;			/* accumulated memory usage */
239 	size_t	u_mem_max;		/* peak RSS (K) */
240 	mode_t	u_cmask;		/* mask for file creation */
241 	char	u_acflag;		/* accounting flag */
242 	char	u_systrap;		/* /proc: any syscall mask bits set? */
243 	refstr_t *u_cwd;		/* cached string for cwd */
244 
245 	k_sysset_t u_entrymask;		/* /proc syscall stop-on-entry mask */
246 	k_sysset_t u_exitmask;		/* /proc syscall stop-on-exit mask */
247 	k_sigset_t u_signodefer;	/* signals defered when caught */
248 	k_sigset_t u_sigonstack;	/* signals taken on alternate stack */
249 	k_sigset_t u_sigresethand;	/* signals reset when caught */
250 	k_sigset_t u_sigrestart;	/* signals that restart system calls */
251 	k_sigset_t u_sigmask[MAXSIG];	/* signals held while in catcher */
252 	void	(*u_signal[MAXSIG])();	/* Disposition of signals */
253 
254 	/*
255 	 * Resource controls provide the backend for process resource limits,
256 	 * the interfaces for which are maintained for compatibility.  To
257 	 * preserve the behaviour associated with the RLIM_SAVED_CUR and
258 	 * RLIM_SAVED_MAX tokens, we retain the "saved" rlimits.
259 	 */
260 	struct rlimit64	u_saved_rlimit[RLIM_NSAVED];
261 
262 	uf_info_t	u_finfo;	/* open file information */
263 } user_t;
264 
265 #include <sys/proc.h>			/* cannot include before user defined */
266 
267 #ifdef	_KERNEL
268 #define	P_FINFO(p)	(&(p)->p_user.u_finfo)
269 #endif	/* _KERNEL */
270 
271 #ifdef	__cplusplus
272 }
273 #endif
274 
275 #else	/* defined(_KERNEL) || defined(_KMEMUSER) */
276 
277 /*
278  * Here, we define a fake version of struct user for programs
279  * (debuggers) that use ptrace() to read and modify the saved
280  * registers directly in the u-area.  ptrace() has been removed
281  * from the operating system and now exists as a library function
282  * in libc, built on the /proc process filesystem.  The ptrace()
283  * library function provides access only to the members of the
284  * fake struct user defined here.
285  *
286  * User-level programs that must know the real contents of struct
287  * user will have to define _KMEMUSER before including <sys/user.h>.
288  * Such programs also become machine specific. Carefully consider
289  * the consequences of your actions.
290  */
291 
292 #include <sys/regset.h>
293 
294 #ifdef	__cplusplus
295 extern "C" {
296 #endif
297 
298 #define	PSARGSZ		80	/* Space for exec arguments (used by ps(1)) */
299 
300 typedef	struct	user {
301 	gregset_t	u_reg;		/* user's saved registers */
302 	greg_t		*u_ar0;		/* address of user's saved R0 */
303 	char	u_psargs[PSARGSZ];	/* arguments from exec */
304 	void	(*u_signal[MAXSIG])();	/* Disposition of signals */
305 	int		u_code;		/* fault code on trap */
306 	caddr_t		u_addr;		/* fault PC on trap */
307 } user_t;
308 
309 #ifdef	__cplusplus
310 }
311 #endif
312 
313 #endif	/* defined(_KERNEL) || defined(_KMEMUSER) */
314 
315 #endif	/* _SYS_USER_H */
316