xref: /titanic_44/usr/src/uts/common/os/subr.c (revision 56f33205c9ed776c3c909e07d52e94610a675740)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 #include <sys/types.h>
30 #include <sys/sysmacros.h>
31 #include <sys/param.h>
32 #include <sys/vmparam.h>
33 #include <sys/systm.h>
34 #include <sys/cred.h>
35 #include <sys/user.h>
36 #include <sys/proc.h>
37 #include <sys/conf.h>
38 #include <sys/tuneable.h>
39 #include <sys/cpuvar.h>
40 #include <sys/archsystm.h>
41 #include <sys/vmem.h>
42 #include <vm/seg_kmem.h>
43 #include <sys/errno.h>
44 #include <sys/cmn_err.h>
45 #include <sys/debug.h>
46 #include <sys/atomic.h>
47 #include <sys/model.h>
48 #include <sys/kmem.h>
49 #include <sys/memlist.h>
50 #include <sys/autoconf.h>
51 #include <sys/ontrap.h>
52 #include <sys/utsname.h>
53 #include <sys/zone.h>
54 
55 #ifdef __sparc
56 #include <sys/membar.h>
57 #endif
58 
59 /*
60  * Routine which sets a user error; placed in
61  * illegal entries in the bdevsw and cdevsw tables.
62  */
63 
64 int
nodev()65 nodev()
66 {
67 	return (curthread->t_lwp ?
68 	    ttolwp(curthread)->lwp_error = ENXIO : ENXIO);
69 }
70 
71 /*
72  * Null routine; placed in insignificant entries
73  * in the bdevsw and cdevsw tables.
74  */
75 
76 int
nulldev()77 nulldev()
78 {
79 	return (0);
80 }
81 
82 static kmutex_t udevlock;
83 
84 /*
85  * Generate an unused major device number.
86  */
87 major_t
getudev()88 getudev()
89 {
90 	static major_t next = 0;
91 	major_t ret;
92 
93 	/*
94 	 * Ensure that we start allocating major numbers above the 'devcnt'
95 	 * count.  The only limit we place on the number is that it should be a
96 	 * legal 32-bit SVR4 major number and be greater than or equal to devcnt
97 	 * in the current system).
98 	 */
99 	mutex_enter(&udevlock);
100 	if (next == 0)
101 		next = devcnt;
102 	if (next <= L_MAXMAJ32 && next >= devcnt)
103 		ret = next++;
104 	else {
105 		/*
106 		 * If we fail to allocate a major number because devcnt has
107 		 * reached L_MAXMAJ32, we may be the victim of a sparsely
108 		 * populated devnames array.  We scan the array backwards
109 		 * looking for an empty slot;  if we find one, mark it as
110 		 * DN_GETUDEV so it doesn't get taken by subsequent consumers
111 		 * users of the devnames array, and issue a warning.
112 		 * It is vital for this routine to take drastic measures to
113 		 * succeed, since the kernel really needs it to boot.
114 		 */
115 		int i;
116 		for (i = devcnt - 1; i >= 0; i--) {
117 			LOCK_DEV_OPS(&devnamesp[i].dn_lock);
118 			if (devnamesp[i].dn_name == NULL &&
119 			    ((devnamesp[i].dn_flags & DN_TAKEN_GETUDEV) == 0))
120 				break;
121 			UNLOCK_DEV_OPS(&devnamesp[i].dn_lock);
122 		}
123 		if (i != -1) {
124 			cmn_err(CE_WARN, "Reusing device major number %d.", i);
125 			ASSERT(i >= 0 && i < devcnt);
126 			devnamesp[i].dn_flags |= DN_TAKEN_GETUDEV;
127 			UNLOCK_DEV_OPS(&devnamesp[i].dn_lock);
128 			ret = (major_t)i;
129 		} else {
130 			ret = DDI_MAJOR_T_NONE;
131 		}
132 	}
133 	mutex_exit(&udevlock);
134 	return (ret);
135 }
136 
137 
138 /*
139  * Compress 'long' device number encoding to 32-bit device number
140  * encoding.  If it won't fit, we return failure, but set the
141  * device number to 32-bit NODEV for the sake of our callers.
142  */
143 int
cmpldev(dev32_t * dst,dev_t dev)144 cmpldev(dev32_t *dst, dev_t dev)
145 {
146 #if defined(_LP64)
147 	if (dev == NODEV) {
148 		*dst = NODEV32;
149 	} else {
150 		major_t major = dev >> L_BITSMINOR;
151 		minor_t minor = dev & L_MAXMIN;
152 
153 		if (major > L_MAXMAJ32 || minor > L_MAXMIN32) {
154 			*dst = NODEV32;
155 			return (0);
156 		}
157 
158 		*dst = (dev32_t)((major << L_BITSMINOR32) | minor);
159 	}
160 #else
161 	*dst = (dev32_t)dev;
162 #endif
163 	return (1);
164 }
165 
166 /*
167  * Expand 32-bit dev_t's to long dev_t's.  Expansion always "fits"
168  * into the return type, but we're careful to expand NODEV explicitly.
169  */
170 dev_t
expldev(dev32_t dev32)171 expldev(dev32_t dev32)
172 {
173 #ifdef _LP64
174 	if (dev32 == NODEV32)
175 		return (NODEV);
176 	return (makedevice((dev32 >> L_BITSMINOR32) & L_MAXMAJ32,
177 	    dev32 & L_MAXMIN32));
178 #else
179 	return ((dev_t)dev32);
180 #endif
181 }
182 
183 #ifndef _LP64
184 /*
185  * Keep these entry points for 32-bit systems but enforce the use
186  * of MIN/MAX macros on 64-bit systems.  The DDI header files already
187  * define min/max as macros so drivers shouldn't need these functions.
188  */
189 
190 int
min(int a,int b)191 min(int a, int b)
192 {
193 	return (a < b ? a : b);
194 }
195 
196 int
max(int a,int b)197 max(int a, int b)
198 {
199 	return (a > b ? a : b);
200 }
201 
202 uint_t
umin(uint_t a,uint_t b)203 umin(uint_t a, uint_t b)
204 {
205 	return (a < b ? a : b);
206 }
207 
208 uint_t
umax(uint_t a,uint_t b)209 umax(uint_t a, uint_t b)
210 {
211 	return (a > b ? a : b);
212 }
213 
214 #endif /* !_LP64 */
215 
216 /*
217  * Parse suboptions from a string.
218  * Same as getsubopt(3C).
219  */
220 int
getsubopt(char ** optionsp,char * const * tokens,char ** valuep)221 getsubopt(char **optionsp, char * const *tokens, char **valuep)
222 {
223 	char *s = *optionsp, *p;
224 	int i;
225 	size_t optlen;
226 
227 	*valuep = NULL;
228 	if (*s == '\0')
229 		return (-1);
230 	p = strchr(s, ',');		/* find next option */
231 	if (p == NULL) {
232 		p = s + strlen(s);
233 	} else {
234 		*p++ = '\0';		/* mark end and point to next */
235 	}
236 	*optionsp = p;			/* point to next option */
237 	p = strchr(s, '=');		/* find value */
238 	if (p == NULL) {
239 		optlen = strlen(s);
240 		*valuep = NULL;
241 	} else {
242 		optlen = p - s;
243 		*valuep = ++p;
244 	}
245 	for (i = 0; tokens[i] != NULL; i++) {
246 		if ((optlen == strlen(tokens[i])) &&
247 		    (strncmp(s, tokens[i], optlen) == 0))
248 			return (i);
249 	}
250 	/* no match, point value at option and return error */
251 	*valuep = s;
252 	return (-1);
253 }
254 
255 /*
256  * Append the suboption string 'opt' starting at the position 'str'
257  * within the buffer defined by 'buf' and 'len'. If 'buf' is not null,
258  * a comma is appended first.
259  * Return a pointer to the end of the resulting string (the null byte).
260  * Return NULL if there isn't enough space left to append 'opt'.
261  */
262 char *
append_subopt(const char * buf,size_t len,char * str,const char * opt)263 append_subopt(const char *buf, size_t len, char *str, const char *opt)
264 {
265 	size_t l = strlen(opt);
266 
267 	/*
268 	 * Include a ',' if this is not the first option.
269 	 * Include space for the null byte.
270 	 */
271 	if (strlen(buf) + (buf[0] != '\0') + l + 1 > len)
272 		return (NULL);
273 
274 	if (buf[0] != '\0')
275 		*str++ = ',';
276 	(void) strcpy(str, opt);
277 	return (str + l);
278 }
279 
280 /*
281  * Tables to convert a single byte to/from binary-coded decimal (BCD).
282  */
283 uchar_t byte_to_bcd[256] = {
284 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
285 	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
286 	0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29,
287 	0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
288 	0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
289 	0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
290 	0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
291 	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
292 	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
293 	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
294 };
295 
296 uchar_t bcd_to_byte[256] = {		/* CSTYLED */
297 	 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  0,  0,  0,  0,  0,  0,
298 	10, 11, 12, 13, 14, 15, 16, 17, 18, 19,  0,  0,  0,  0,  0,  0,
299 	20, 21, 22, 23, 24, 25, 26, 27, 28, 29,  0,  0,  0,  0,  0,  0,
300 	30, 31, 32, 33, 34, 35, 36, 37, 38, 39,  0,  0,  0,  0,  0,  0,
301 	40, 41, 42, 43, 44, 45, 46, 47, 48, 49,  0,  0,  0,  0,  0,  0,
302 	50, 51, 52, 53, 54, 55, 56, 57, 58, 59,  0,  0,  0,  0,  0,  0,
303 	60, 61, 62, 63, 64, 65, 66, 67, 68, 69,  0,  0,  0,  0,  0,  0,
304 	70, 71, 72, 73, 74, 75, 76, 77, 78, 79,  0,  0,  0,  0,  0,  0,
305 	80, 81, 82, 83, 84, 85, 86, 87, 88, 89,  0,  0,  0,  0,  0,  0,
306 	90, 91, 92, 93, 94, 95, 96, 97, 98, 99,
307 };
308 
309 /*
310  * Hot-patch a single instruction in the kernel's text.
311  * If you want to patch multiple instructions you must
312  * arrange to do it so that all intermediate stages are
313  * sane -- we don't stop other cpus while doing this.
314  * Size must be 1, 2, or 4 bytes with iaddr aligned accordingly.
315  */
316 void
hot_patch_kernel_text(caddr_t iaddr,uint32_t new_instr,uint_t size)317 hot_patch_kernel_text(caddr_t iaddr, uint32_t new_instr, uint_t size)
318 {
319 	caddr_t vaddr;
320 	page_t **ppp;
321 	uintptr_t off = (uintptr_t)iaddr & PAGEOFFSET;
322 
323 	vaddr = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
324 
325 	(void) as_pagelock(&kas, &ppp, iaddr - off, PAGESIZE, S_WRITE);
326 
327 	hat_devload(kas.a_hat, vaddr, PAGESIZE,
328 	    hat_getpfnum(kas.a_hat, iaddr - off),
329 	    PROT_READ | PROT_WRITE, HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST);
330 
331 	switch (size) {
332 	case 1:
333 		*(uint8_t *)(vaddr + off) = new_instr;
334 		break;
335 	case 2:
336 		*(uint16_t *)(vaddr + off) = new_instr;
337 		break;
338 	case 4:
339 		*(uint32_t *)(vaddr + off) = new_instr;
340 		break;
341 	default:
342 		panic("illegal hot-patch");
343 	}
344 
345 	membar_enter();
346 	sync_icache(vaddr + off, size);
347 	sync_icache(iaddr, size);
348 	as_pageunlock(&kas, ppp, iaddr - off, PAGESIZE, S_WRITE);
349 	hat_unload(kas.a_hat, vaddr, PAGESIZE, HAT_UNLOAD_UNLOCK);
350 	vmem_free(heap_arena, vaddr, PAGESIZE);
351 }
352 
353 /*
354  * Routine to report an attempt to execute non-executable data.  If the
355  * address executed lies in the stack, explicitly say so.
356  */
357 void
report_stack_exec(proc_t * p,caddr_t addr)358 report_stack_exec(proc_t *p, caddr_t addr)
359 {
360 	if (!noexec_user_stack_log)
361 		return;
362 
363 	if (addr < p->p_usrstack && addr >= (p->p_usrstack - p->p_stksize)) {
364 		cmn_err(CE_NOTE, "%s[%d] attempt to execute code "
365 		    "on stack by uid %d", p->p_user.u_comm,
366 		    p->p_pid, crgetruid(p->p_cred));
367 	} else {
368 		cmn_err(CE_NOTE, "%s[%d] attempt to execute non-executable "
369 		    "data at 0x%p by uid %d", p->p_user.u_comm,
370 		    p->p_pid, (void *) addr, crgetruid(p->p_cred));
371 	}
372 
373 	delay(hz / 50);
374 }
375 
376 /*
377  * Determine whether the address range [addr, addr + len) is in memlist mp.
378  */
379 int
address_in_memlist(struct memlist * mp,uint64_t addr,size_t len)380 address_in_memlist(struct memlist *mp, uint64_t addr, size_t len)
381 {
382 	while (mp != 0)	 {
383 		if ((addr >= mp->ml_address) &&
384 		    (addr + len <= mp->ml_address + mp->ml_size))
385 			return (1);	 /* TRUE */
386 		mp = mp->ml_next;
387 	}
388 	return (0);	/* FALSE */
389 }
390 
391 /*
392  * Pop the topmost element from the t_ontrap stack, removing the current set of
393  * on_trap() protections.  Refer to <sys/ontrap.h> for more info.  If the
394  * stack is already empty, no_trap() just returns.
395  */
396 void
no_trap(void)397 no_trap(void)
398 {
399 	if (curthread->t_ontrap != NULL) {
400 #ifdef __sparc
401 		membar_sync(); /* deferred error barrier (see sparcv9_subr.s) */
402 #endif
403 		curthread->t_ontrap = curthread->t_ontrap->ot_prev;
404 	}
405 }
406 
407 /*
408  * Return utsname.nodename outside a zone, or the zone name within.
409  */
410 char *
uts_nodename(void)411 uts_nodename(void)
412 {
413 	if (curproc == NULL)
414 		return (utsname.nodename);
415 	return (curproc->p_zone->zone_nodename);
416 }
417