xref: /illumos-gate/usr/src/uts/common/os/subr.c (revision 35a5a3587fd94b666239c157d3722745250ccbd7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #include <sys/types.h>
33 #include <sys/sysmacros.h>
34 #include <sys/param.h>
35 #include <sys/vmparam.h>
36 #include <sys/systm.h>
37 #include <sys/cred.h>
38 #include <sys/user.h>
39 #include <sys/proc.h>
40 #include <sys/conf.h>
41 #include <sys/tuneable.h>
42 #include <sys/cpuvar.h>
43 #include <sys/archsystm.h>
44 #include <sys/vmem.h>
45 #include <vm/seg_kmem.h>
46 #include <sys/errno.h>
47 #include <sys/cmn_err.h>
48 #include <sys/debug.h>
49 #include <sys/atomic.h>
50 #include <sys/model.h>
51 #include <sys/kmem.h>
52 #include <sys/memlist.h>
53 #include <sys/autoconf.h>
54 #include <sys/ontrap.h>
55 #include <sys/utsname.h>
56 #include <sys/zone.h>
57 
58 #ifdef __sparc
59 #include <sys/membar.h>
60 #endif
61 
62 /*
63  * Routine which sets a user error; placed in
64  * illegal entries in the bdevsw and cdevsw tables.
65  */
66 
67 int
68 nodev()
69 {
70 	return (curthread->t_lwp ?
71 	    ttolwp(curthread)->lwp_error = ENXIO : ENXIO);
72 }
73 
74 /*
75  * Null routine; placed in insignificant entries
76  * in the bdevsw and cdevsw tables.
77  */
78 
79 int
80 nulldev()
81 {
82 	return (0);
83 }
84 
85 static kmutex_t udevlock;
86 
87 /*
88  * Generate an unused major device number.
89  */
90 major_t
91 getudev()
92 {
93 	static major_t next = 0;
94 	major_t ret;
95 
96 	/*
97 	 * Ensure that we start allocating major numbers above the 'devcnt'
98 	 * count.  The only limit we place on the number is that it should be a
99 	 * legal 32-bit SVR4 major number and be greater than or equal to devcnt
100 	 * in the current system).
101 	 */
102 	mutex_enter(&udevlock);
103 	if (next == 0)
104 		next = devcnt;
105 	if (next <= L_MAXMAJ32 && next >= devcnt)
106 		ret = next++;
107 	else {
108 		/*
109 		 * If we fail to allocate a major number because devcnt has
110 		 * reached L_MAXMAJ32, we may be the victim of a sparsely
111 		 * populated devnames array.  We scan the array backwards
112 		 * looking for an empty slot;  if we find one, mark it as
113 		 * DN_GETUDEV so it doesn't get taken by subsequent consumers
114 		 * users of the devnames array, and issue a warning.
115 		 * It is vital for this routine to take drastic measures to
116 		 * succeed, since the kernel really needs it to boot.
117 		 */
118 		int i;
119 		for (i = devcnt - 1; i >= 0; i--) {
120 			LOCK_DEV_OPS(&devnamesp[i].dn_lock);
121 			if (devnamesp[i].dn_name == NULL &&
122 			    ((devnamesp[i].dn_flags & DN_TAKEN_GETUDEV) == 0))
123 				break;
124 			UNLOCK_DEV_OPS(&devnamesp[i].dn_lock);
125 		}
126 		if (i != -1) {
127 			cmn_err(CE_WARN, "Reusing device major number %d.", i);
128 			ASSERT(i >= 0 && i < devcnt);
129 			devnamesp[i].dn_flags |= DN_TAKEN_GETUDEV;
130 			UNLOCK_DEV_OPS(&devnamesp[i].dn_lock);
131 			ret = (major_t)i;
132 		} else {
133 			ret = DDI_MAJOR_T_NONE;
134 		}
135 	}
136 	mutex_exit(&udevlock);
137 	return (ret);
138 }
139 
140 
141 /*
142  * Compress 'long' device number encoding to 32-bit device number
143  * encoding.  If it won't fit, we return failure, but set the
144  * device number to 32-bit NODEV for the sake of our callers.
145  */
146 int
147 cmpldev(dev32_t *dst, dev_t dev)
148 {
149 #if defined(_LP64)
150 	if (dev == NODEV) {
151 		*dst = NODEV32;
152 	} else {
153 		major_t major = dev >> L_BITSMINOR;
154 		minor_t minor = dev & L_MAXMIN;
155 
156 		if (major > L_MAXMAJ32 || minor > L_MAXMIN32) {
157 			*dst = NODEV32;
158 			return (0);
159 		}
160 
161 		*dst = (dev32_t)((major << L_BITSMINOR32) | minor);
162 	}
163 #else
164 	*dst = (dev32_t)dev;
165 #endif
166 	return (1);
167 }
168 
169 /*
170  * Expand 32-bit dev_t's to long dev_t's.  Expansion always "fits"
171  * into the return type, but we're careful to expand NODEV explicitly.
172  */
173 dev_t
174 expldev(dev32_t dev32)
175 {
176 #ifdef _LP64
177 	if (dev32 == NODEV32)
178 		return (NODEV);
179 	return (makedevice((dev32 >> L_BITSMINOR32) & L_MAXMAJ32,
180 	    dev32 & L_MAXMIN32));
181 #else
182 	return ((dev_t)dev32);
183 #endif
184 }
185 
186 #ifndef _LP64
187 /*
188  * Keep these entry points for 32-bit systems but enforce the use
189  * of MIN/MAX macros on 64-bit systems.  The DDI header files already
190  * define min/max as macros so drivers shouldn't need these functions.
191  */
192 
193 int
194 min(int a, int b)
195 {
196 	return (a < b ? a : b);
197 }
198 
199 int
200 max(int a, int b)
201 {
202 	return (a > b ? a : b);
203 }
204 
205 uint_t
206 umin(uint_t a, uint_t b)
207 {
208 	return (a < b ? a : b);
209 }
210 
211 uint_t
212 umax(uint_t a, uint_t b)
213 {
214 	return (a > b ? a : b);
215 }
216 
217 #endif /* !_LP64 */
218 
219 /*
220  * Parse suboptions from a string.
221  * Same as getsubopt(3C).
222  */
223 int
224 getsubopt(char **optionsp, char * const *tokens, char **valuep)
225 {
226 	char *s = *optionsp, *p;
227 	int i;
228 	size_t optlen;
229 
230 	*valuep = NULL;
231 	if (*s == '\0')
232 		return (-1);
233 	p = strchr(s, ',');		/* find next option */
234 	if (p == NULL) {
235 		p = s + strlen(s);
236 	} else {
237 		*p++ = '\0';		/* mark end and point to next */
238 	}
239 	*optionsp = p;			/* point to next option */
240 	p = strchr(s, '=');		/* find value */
241 	if (p == NULL) {
242 		optlen = strlen(s);
243 		*valuep = NULL;
244 	} else {
245 		optlen = p - s;
246 		*valuep = ++p;
247 	}
248 	for (i = 0; tokens[i] != NULL; i++) {
249 		if ((optlen == strlen(tokens[i])) &&
250 		    (strncmp(s, tokens[i], optlen) == 0))
251 			return (i);
252 	}
253 	/* no match, point value at option and return error */
254 	*valuep = s;
255 	return (-1);
256 }
257 
258 /*
259  * Append the suboption string 'opt' starting at the position 'str'
260  * within the buffer defined by 'buf' and 'len'. If 'buf' is not null,
261  * a comma is appended first.
262  * Return a pointer to the end of the resulting string (the null byte).
263  * Return NULL if there isn't enough space left to append 'opt'.
264  */
265 char *
266 append_subopt(const char *buf, size_t len, char *str, const char *opt)
267 {
268 	size_t l = strlen(opt);
269 
270 	/*
271 	 * Include a ',' if this is not the first option.
272 	 * Include space for the null byte.
273 	 */
274 	if (strlen(buf) + (buf[0] != '\0') + l + 1 > len)
275 		return (NULL);
276 
277 	if (buf[0] != '\0')
278 		*str++ = ',';
279 	(void) strcpy(str, opt);
280 	return (str + l);
281 }
282 
283 /*
284  * Tables to convert a single byte to/from binary-coded decimal (BCD).
285  */
286 uchar_t byte_to_bcd[256] = {
287 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
288 	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
289 	0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29,
290 	0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
291 	0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
292 	0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
293 	0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
294 	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
295 	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
296 	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
297 };
298 
299 uchar_t bcd_to_byte[256] = {		/* CSTYLED */
300 	 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  0,  0,  0,  0,  0,  0,
301 	10, 11, 12, 13, 14, 15, 16, 17, 18, 19,  0,  0,  0,  0,  0,  0,
302 	20, 21, 22, 23, 24, 25, 26, 27, 28, 29,  0,  0,  0,  0,  0,  0,
303 	30, 31, 32, 33, 34, 35, 36, 37, 38, 39,  0,  0,  0,  0,  0,  0,
304 	40, 41, 42, 43, 44, 45, 46, 47, 48, 49,  0,  0,  0,  0,  0,  0,
305 	50, 51, 52, 53, 54, 55, 56, 57, 58, 59,  0,  0,  0,  0,  0,  0,
306 	60, 61, 62, 63, 64, 65, 66, 67, 68, 69,  0,  0,  0,  0,  0,  0,
307 	70, 71, 72, 73, 74, 75, 76, 77, 78, 79,  0,  0,  0,  0,  0,  0,
308 	80, 81, 82, 83, 84, 85, 86, 87, 88, 89,  0,  0,  0,  0,  0,  0,
309 	90, 91, 92, 93, 94, 95, 96, 97, 98, 99,
310 };
311 
312 /*
313  * Hot-patch a single instruction in the kernel's text.
314  * If you want to patch multiple instructions you must
315  * arrange to do it so that all intermediate stages are
316  * sane -- we don't stop other cpus while doing this.
317  * Size must be 1, 2, or 4 bytes with iaddr aligned accordingly.
318  */
319 void
320 hot_patch_kernel_text(caddr_t iaddr, uint32_t new_instr, uint_t size)
321 {
322 	caddr_t vaddr;
323 	page_t **ppp;
324 	uintptr_t off = (uintptr_t)iaddr & PAGEOFFSET;
325 
326 	vaddr = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
327 
328 	(void) as_pagelock(&kas, &ppp, iaddr - off, PAGESIZE, S_WRITE);
329 
330 	hat_devload(kas.a_hat, vaddr, PAGESIZE,
331 	    hat_getpfnum(kas.a_hat, iaddr - off),
332 	    PROT_READ | PROT_WRITE, HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST);
333 
334 	switch (size) {
335 	case 1:
336 		*(uint8_t *)(vaddr + off) = new_instr;
337 		break;
338 	case 2:
339 		*(uint16_t *)(vaddr + off) = new_instr;
340 		break;
341 	case 4:
342 		*(uint32_t *)(vaddr + off) = new_instr;
343 		break;
344 	default:
345 		panic("illegal hot-patch");
346 	}
347 
348 	membar_enter();
349 	sync_icache(vaddr + off, size);
350 	sync_icache(iaddr, size);
351 	as_pageunlock(&kas, ppp, iaddr - off, PAGESIZE, S_WRITE);
352 	hat_unload(kas.a_hat, vaddr, PAGESIZE, HAT_UNLOAD_UNLOCK);
353 	vmem_free(heap_arena, vaddr, PAGESIZE);
354 }
355 
356 /*
357  * Routine to report an attempt to execute non-executable data.  If the
358  * address executed lies in the stack, explicitly say so.
359  */
360 void
361 report_stack_exec(proc_t *p, caddr_t addr)
362 {
363 	if (!noexec_user_stack_log)
364 		return;
365 
366 	if (addr < p->p_usrstack && addr >= (p->p_usrstack - p->p_stksize)) {
367 		cmn_err(CE_NOTE, "%s[%d] attempt to execute code "
368 		    "on stack by uid %d", p->p_user.u_comm,
369 		    p->p_pid, crgetruid(p->p_cred));
370 	} else {
371 		cmn_err(CE_NOTE, "%s[%d] attempt to execute non-executable "
372 		    "data at 0x%p by uid %d", p->p_user.u_comm,
373 		    p->p_pid, (void *) addr, crgetruid(p->p_cred));
374 	}
375 
376 	delay(hz / 50);
377 }
378 
379 /*
380  * Determine whether the address range [addr, addr + len) is in memlist mp.
381  */
382 int
383 address_in_memlist(struct memlist *mp, uint64_t addr, size_t len)
384 {
385 	while (mp != 0)	 {
386 		if ((addr >= mp->address) &&
387 		    (addr + len <= mp->address + mp->size))
388 			return (1);	 /* TRUE */
389 		mp = mp->next;
390 	}
391 	return (0);	/* FALSE */
392 }
393 
394 /*
395  * Pop the topmost element from the t_ontrap stack, removing the current set of
396  * on_trap() protections.  Refer to <sys/ontrap.h> for more info.  If the
397  * stack is already empty, no_trap() just returns.
398  */
399 void
400 no_trap(void)
401 {
402 	if (curthread->t_ontrap != NULL) {
403 #ifdef __sparc
404 		membar_sync(); /* deferred error barrier (see sparcv9_subr.s) */
405 #endif
406 		curthread->t_ontrap = curthread->t_ontrap->ot_prev;
407 	}
408 }
409 
410 /*
411  * Return utsname.nodename outside a zone, or the zone name within.
412  */
413 char *
414 uts_nodename(void)
415 {
416 	if (curproc == NULL)
417 		return (utsname.nodename);
418 	return (curproc->p_zone->zone_nodename);
419 }
420