xref: /illumos-gate/usr/src/uts/common/sys/user.h (revision ddb365bfc9e868ad24ccdcb0dc91af18b10df082)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved	*/
28 /*
29  * Copyright (c) 2018, Joyent, Inc.
30  * Copyright 2022 Oxide Computer Company
31  */
32 
33 
34 #ifndef _SYS_USER_H
35 #define	_SYS_USER_H
36 
37 #include <sys/types.h>
38 #include <sys/signal.h>
39 
40 #ifdef	__cplusplus
41 extern "C" {
42 #endif
43 
44 /*
45  * struct exdata is visible in and out of the kernel. This is because it
46  * is referenced in <sys/core.h> which doesn't have this kind of magic.
47  */
48 struct exdata {
49 	struct vnode	*vp;
50 	size_t	ux_tsize;	/* text size */
51 	size_t	ux_dsize;	/* data size */
52 	size_t	ux_bsize;	/* bss size */
53 	size_t	ux_lsize;	/* lib size */
54 	long	ux_nshlibs;	/* number of shared libs needed */
55 	short	ux_mach;	/* machine type */
56 	short	ux_mag;		/* magic number MUST be here */
57 	off_t	ux_toffset;	/* file offset to raw text */
58 	off_t	ux_doffset;	/* file offset to raw data */
59 	off_t	ux_loffset;	/* file offset to lib sctn */
60 	caddr_t	ux_txtorg;	/* start addr of text in mem */
61 	caddr_t	ux_datorg;	/* start addr of data in mem */
62 	caddr_t	ux_entloc;	/* entry location */
63 };
64 
65 #ifdef	__cplusplus
66 }
67 #endif
68 
69 #if defined(_KERNEL) || defined(_KMEMUSER)
70 
71 #include <sys/param.h>
72 #include <sys/pcb.h>
73 #include <sys/siginfo.h>
74 #include <sys/resource.h>
75 #include <sys/time.h>
76 #include <sys/auxv.h>
77 #include <sys/errno.h>
78 #include <sys/t_lock.h>
79 #include <sys/refstr.h>
80 
81 #ifdef	__cplusplus
82 extern "C" {
83 #endif
84 
85 /*
86  * File Descriptor assignment generation.
87  *
88  * Certain file descriptor consumers (namely epoll) need to be able to detect
89  * when the resource underlying an fd change due to (re)assignment.  Checks
90  * comparing old and new file_t pointers work OK, but could easily be fooled by
91  * an entry freed-to and reused-from the cache.  To better detect such
92  * assingments, a generation number is kept in the uf_entry.  Whenever a
93  * non-NULL file_t is assigned to the entry, the generation is incremented,
94  * indicating the change.  There is a minute possibility that a rollover of the
95  * value could cause assigments to evade detection by consumers, but it is
96  * considered acceptably small.
97  */
98 typedef uint_t uf_entry_gen_t;
99 
100 /*
101  * Entry in the per-process list of open files.
102  * Note: only certain fields are copied in flist_grow() and flist_fork().
103  * This is indicated in brackets in the structure member comments.
104  */
105 typedef struct uf_entry {
106 	kmutex_t	uf_lock;	/* per-fd lock [never copied] */
107 	struct file	*uf_file;	/* file pointer [grow, fork] */
108 	struct fpollinfo *uf_fpollinfo;	/* poll state [grow] */
109 	int		uf_refcnt;	/* LWPs accessing this file [grow] */
110 	int		uf_alloc;	/* right subtree allocs [grow, fork] */
111 	short		uf_flag;	/* fcntl F_GETFD flags [grow, fork] */
112 	short		uf_busy;	/* file is allocated [grow, fork] */
113 	kcondvar_t	uf_wanted_cv;	/* waiting for setf() [never copied] */
114 	kcondvar_t	uf_closing_cv;	/* waiting for close() [never copied] */
115 	struct portfd	*uf_portfd;	/* associated with port [grow] */
116 	uf_entry_gen_t	uf_gen;		/* assigned fd generation [grow,fork] */
117 	/* Avoid false sharing - pad to coherency granularity (64 bytes) */
118 	char		uf_pad[64 - sizeof (kmutex_t) - 2 * sizeof (void*) -
119 		2 * sizeof (int) - 2 * sizeof (short) -
120 		2 * sizeof (kcondvar_t) - sizeof (struct portfd *) -
121 		sizeof (uf_entry_gen_t)];
122 } uf_entry_t;
123 
124 /*
125  * Retired file lists -- see flist_grow() for details.
126  */
127 typedef struct uf_rlist {
128 	struct uf_rlist	*ur_next;
129 	uf_entry_t	*ur_list;
130 	int		ur_nfiles;
131 } uf_rlist_t;
132 
133 /*
134  * Per-process file information.
135  */
136 typedef struct uf_info {
137 	kmutex_t	fi_lock;	/* see below */
138 	int		fi_badfd;	/* bad file descriptor # */
139 	int		fi_action;	/* action to take on bad fd use */
140 	int		fi_nfiles;	/* number of entries in fi_list[] */
141 	uf_entry_t *volatile fi_list;	/* current file list */
142 	uf_rlist_t	*fi_rlist;	/* retired file lists */
143 } uf_info_t;
144 
145 /*
146  * File list locking.
147  *
148  * Each process has a list of open files, fi_list, indexed by fd.
149  * fi_list is an array of uf_entry_t structures, each with its own lock.
150  * One might think that the correct way to lock a file descriptor would be:
151  *
152  *	ufp = fip->fi_list[fd];
153  *	mutex_enter(&ufp->uf_lock);
154  *
155  * However, that construct is only safe if fi_lock is already held.  If not,
156  * fi_list can change in the window between loading ufp and entering uf_lock.
157  * The UF_ENTER() macro deals with this possibility.  UF_ENTER(ufp, fip, fd)
158  * locks fd and sets ufp to fd's uf_entry.  The locking rules are as follows:
159  *
160  * (1) fi_lock protects fi_list and fi_nfiles.  It also protects the
161  *     uf_alloc and uf_busy fields of every fd's ufp; see fd_find() for
162  *     details on file descriptor allocation.
163  *
164  * (2) UF_ENTER(ufp, fip, fd) locks descriptor fd and sets ufp to point
165  *     to the uf_entry_t for fd.  UF_ENTER() protects all fields in ufp
166  *     except uf_alloc and uf_busy.  UF_ENTER(ufp, fip, fd) also prevents
167  *     ufp->uf_alloc, ufp->uf_busy, fip->fi_list and fip->fi_nfiles from
168  *     changing.
169  *
170  * (3) The lock ordering is (1), (2).
171  *
172  * (4) Note that fip->fi_list and fip->fi_nfiles cannot change while *any*
173  *     file list lock is held.  Thus flist_grow() must acquire all such
174  *     locks -- fi_lock and every fd's uf_lock -- to install a new file list.
175  */
176 #define	UF_ENTER(ufp, fip, fd)					\
177 	for (;;) {						\
178 		uf_entry_t *_flist = (fip)->fi_list;		\
179 		ufp = &_flist[fd];				\
180 		ASSERT((fd) < (fip)->fi_nfiles);		\
181 		mutex_enter(&ufp->uf_lock);			\
182 		if (_flist == (fip)->fi_list)			\
183 			break;					\
184 		mutex_exit(&ufp->uf_lock);			\
185 	}
186 
187 #define	UF_EXIT(ufp)	mutex_exit(&ufp->uf_lock)
188 
189 #define	PSARGSZ		80	/* Space for exec arguments (used by ps(1)) */
190 #define	MAXCOMLEN	16	/* <= MAXNAMLEN, >= sizeof (ac_comm) */
191 
192 typedef struct {		/* kernel syscall set type */
193 	uint_t	word[9];	/* space for syscall numbers [1..288] */
194 } k_sysset_t;
195 
196 /*
197  * __KERN_NAUXV_IMPL is defined as a convenience sizing mechanism
198  * for the portions of the kernel that care about aux vectors.
199  *
200  * Applications that need to know how many aux vectors the kernel
201  * supplies should use the proc(5) interface to read /proc/PID/auxv.
202  *
203  * This value should not be changed in a patch.
204  */
205 #if defined(__sparc)
206 #define	__KERN_NAUXV_IMPL 20
207 #elif defined(__i386) || defined(__amd64)
208 #define	__KERN_NAUXV_IMPL 26
209 #endif
210 
211 struct execsw;
212 
213 /*
214  * The user structure; one allocated per process.  Contains all the
215  * per-process data that doesn't need to be referenced while the
216  * process is swapped.
217  */
218 typedef	struct	user {
219 	/*
220 	 * These fields are initialized at process creation time and never
221 	 * modified.  They can be accessed without acquiring locks.
222 	 */
223 	struct execsw *u_execsw;	/* pointer to exec switch entry */
224 	auxv_t  u_auxv[__KERN_NAUXV_IMPL]; /* aux vector from exec */
225 	timestruc_t u_start;		/* hrestime at process start */
226 	clock_t	u_ticks;		/* lbolt at process start */
227 	char	u_comm[MAXCOMLEN + 1];	/* executable file name from exec */
228 	char	u_psargs[PSARGSZ];	/* arguments from exec */
229 	int	u_argc;			/* value of argc passed to main() */
230 	uintptr_t u_argv;		/* value of argv passed to main() */
231 	uintptr_t u_envp;		/* value of envp passed to main() */
232 	uintptr_t u_commpagep;		/* address of mapped comm page */
233 
234 	/*
235 	 * These fields are protected by p_lock:
236 	 */
237 	struct vnode *u_cdir;		/* current directory */
238 	struct vnode *u_rdir;		/* root directory */
239 	uint64_t u_mem;			/* accumulated memory usage */
240 	size_t	u_mem_max;		/* peak RSS (K) */
241 	mode_t	u_cmask;		/* mask for file creation */
242 	char	u_acflag;		/* accounting flag */
243 	char	u_systrap;		/* /proc: any syscall mask bits set? */
244 	refstr_t *u_cwd;		/* cached string for cwd */
245 
246 	k_sysset_t u_entrymask;		/* /proc syscall stop-on-entry mask */
247 	k_sysset_t u_exitmask;		/* /proc syscall stop-on-exit mask */
248 	k_sigset_t u_signodefer;	/* signals defered when caught */
249 	k_sigset_t u_sigonstack;	/* signals taken on alternate stack */
250 	k_sigset_t u_sigresethand;	/* signals reset when caught */
251 	k_sigset_t u_sigrestart;	/* signals that restart system calls */
252 	k_sigset_t u_sigmask[MAXSIG];	/* signals held while in catcher */
253 	void	(*u_signal[MAXSIG])();	/* Disposition of signals */
254 
255 	/*
256 	 * Resource controls provide the backend for process resource limits,
257 	 * the interfaces for which are maintained for compatibility.  To
258 	 * preserve the behaviour associated with the RLIM_SAVED_CUR and
259 	 * RLIM_SAVED_MAX tokens, we retain the "saved" rlimits.
260 	 */
261 	struct rlimit64	u_saved_rlimit[RLIM_NSAVED];
262 
263 	uf_info_t	u_finfo;	/* open file information */
264 } user_t;
265 
266 #include <sys/proc.h>			/* cannot include before user defined */
267 
268 #ifdef	_KERNEL
269 #define	P_FINFO(p)	(&(p)->p_user.u_finfo)
270 #endif	/* _KERNEL */
271 
272 #ifdef	__cplusplus
273 }
274 #endif
275 
276 #else	/* defined(_KERNEL) || defined(_KMEMUSER) */
277 
278 /*
279  * Here, we define a fake version of struct user for programs
280  * (debuggers) that use ptrace() to read and modify the saved
281  * registers directly in the u-area.  ptrace() has been removed
282  * from the operating system and now exists as a library function
283  * in libc, built on the /proc process filesystem.  The ptrace()
284  * library function provides access only to the members of the
285  * fake struct user defined here.
286  *
287  * User-level programs that must know the real contents of struct
288  * user will have to define _KMEMUSER before including <sys/user.h>.
289  * Such programs also become machine specific. Carefully consider
290  * the consequences of your actions.
291  */
292 
293 #include <sys/regset.h>
294 
295 #ifdef	__cplusplus
296 extern "C" {
297 #endif
298 
299 #define	PSARGSZ		80	/* Space for exec arguments (used by ps(1)) */
300 
301 typedef	struct	user {
302 	gregset_t	u_reg;		/* user's saved registers */
303 	greg_t		*u_ar0;		/* address of user's saved R0 */
304 	char	u_psargs[PSARGSZ];	/* arguments from exec */
305 	void	(*u_signal[MAXSIG])();	/* Disposition of signals */
306 	int		u_code;		/* fault code on trap */
307 	caddr_t		u_addr;		/* fault PC on trap */
308 } user_t;
309 
310 #ifdef	__cplusplus
311 }
312 #endif
313 
314 #endif	/* defined(_KERNEL) || defined(_KMEMUSER) */
315 
316 #endif	/* _SYS_USER_H */
317