1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Copyright 2019 Joyent, Inc.
28 */
29
30 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
31 /* All Rights Reserved */
32
33 #include <sys/types.h>
34 #include <sys/sysmacros.h>
35 #include <sys/param.h>
36 #include <sys/vmparam.h>
37 #include <sys/systm.h>
38 #include <sys/cred.h>
39 #include <sys/user.h>
40 #include <sys/proc.h>
41 #include <sys/conf.h>
42 #include <sys/tuneable.h>
43 #include <sys/cpuvar.h>
44 #include <sys/archsystm.h>
45 #include <sys/vmem.h>
46 #include <vm/seg_kmem.h>
47 #include <sys/errno.h>
48 #include <sys/cmn_err.h>
49 #include <sys/debug.h>
50 #include <sys/atomic.h>
51 #include <sys/model.h>
52 #include <sys/kmem.h>
53 #include <sys/memlist.h>
54 #include <sys/autoconf.h>
55 #include <sys/ontrap.h>
56 #include <sys/utsname.h>
57 #include <sys/zone.h>
58
59 #ifdef __sparc
60 #include <sys/membar.h>
61 #endif
62
63 /*
64 * Routine which sets a user error; placed in
65 * illegal entries in the bdevsw and cdevsw tables.
66 */
67
68 int
nodev()69 nodev()
70 {
71 return (curthread->t_lwp ?
72 ttolwp(curthread)->lwp_error = ENXIO : ENXIO);
73 }
74
75 /*
76 * Null routine; placed in insignificant entries
77 * in the bdevsw and cdevsw tables.
78 */
79
80 int
nulldev()81 nulldev()
82 {
83 return (0);
84 }
85
86 static kmutex_t udevlock;
87
88 /*
89 * Generate an unused major device number.
90 */
91 major_t
getudev()92 getudev()
93 {
94 static major_t next = 0;
95 major_t ret;
96
97 /*
98 * Ensure that we start allocating major numbers above the 'devcnt'
99 * count. The only limit we place on the number is that it should be a
100 * legal 32-bit SVR4 major number and be greater than or equal to devcnt
101 * in the current system).
102 */
103 mutex_enter(&udevlock);
104 if (next == 0)
105 next = devcnt;
106 if (next <= L_MAXMAJ32 && next >= devcnt)
107 ret = next++;
108 else {
109 /*
110 * If we fail to allocate a major number because devcnt has
111 * reached L_MAXMAJ32, we may be the victim of a sparsely
112 * populated devnames array. We scan the array backwards
113 * looking for an empty slot; if we find one, mark it as
114 * DN_GETUDEV so it doesn't get taken by subsequent consumers
115 * users of the devnames array, and issue a warning.
116 * It is vital for this routine to take drastic measures to
117 * succeed, since the kernel really needs it to boot.
118 */
119 int i;
120 for (i = devcnt - 1; i >= 0; i--) {
121 LOCK_DEV_OPS(&devnamesp[i].dn_lock);
122 if (devnamesp[i].dn_name == NULL &&
123 ((devnamesp[i].dn_flags & DN_TAKEN_GETUDEV) == 0))
124 break;
125 UNLOCK_DEV_OPS(&devnamesp[i].dn_lock);
126 }
127 if (i != -1) {
128 cmn_err(CE_WARN, "Reusing device major number %d.", i);
129 ASSERT(i >= 0 && i < devcnt);
130 devnamesp[i].dn_flags |= DN_TAKEN_GETUDEV;
131 UNLOCK_DEV_OPS(&devnamesp[i].dn_lock);
132 ret = (major_t)i;
133 } else {
134 ret = DDI_MAJOR_T_NONE;
135 }
136 }
137 mutex_exit(&udevlock);
138 return (ret);
139 }
140
141
142 /*
143 * Compress 'long' device number encoding to 32-bit device number
144 * encoding. If it won't fit, we return failure, but set the
145 * device number to 32-bit NODEV for the sake of our callers.
146 */
147 int
cmpldev(dev32_t * dst,dev_t dev)148 cmpldev(dev32_t *dst, dev_t dev)
149 {
150 #if defined(_LP64)
151 if (dev == NODEV) {
152 *dst = NODEV32;
153 } else {
154 major_t major = dev >> L_BITSMINOR;
155 minor_t minor = dev & L_MAXMIN;
156
157 if (major > L_MAXMAJ32 || minor > L_MAXMIN32) {
158 *dst = NODEV32;
159 return (0);
160 }
161
162 *dst = (dev32_t)((major << L_BITSMINOR32) | minor);
163 }
164 #else
165 *dst = (dev32_t)dev;
166 #endif
167 return (1);
168 }
169
170 /*
171 * Expand 32-bit dev_t's to long dev_t's. Expansion always "fits"
172 * into the return type, but we're careful to expand NODEV explicitly.
173 */
174 dev_t
expldev(dev32_t dev32)175 expldev(dev32_t dev32)
176 {
177 #ifdef _LP64
178 if (dev32 == NODEV32)
179 return (NODEV);
180 return (makedevice((dev32 >> L_BITSMINOR32) & L_MAXMAJ32,
181 dev32 & L_MAXMIN32));
182 #else
183 return ((dev_t)dev32);
184 #endif
185 }
186
187 #ifndef _LP64
188 /*
189 * Keep these entry points for 32-bit systems but enforce the use
190 * of MIN/MAX macros on 64-bit systems. The DDI header files already
191 * define min/max as macros so drivers shouldn't need these functions.
192 */
193
194 int
min(int a,int b)195 min(int a, int b)
196 {
197 return (a < b ? a : b);
198 }
199
200 int
max(int a,int b)201 max(int a, int b)
202 {
203 return (a > b ? a : b);
204 }
205
206 uint_t
umin(uint_t a,uint_t b)207 umin(uint_t a, uint_t b)
208 {
209 return (a < b ? a : b);
210 }
211
212 uint_t
umax(uint_t a,uint_t b)213 umax(uint_t a, uint_t b)
214 {
215 return (a > b ? a : b);
216 }
217
218 #endif /* !_LP64 */
219
220 /*
221 * Parse suboptions from a string.
222 * Same as getsubopt(3C).
223 */
224 int
getsubopt(char ** optionsp,char * const * tokens,char ** valuep)225 getsubopt(char **optionsp, char * const *tokens, char **valuep)
226 {
227 char *s = *optionsp, *p;
228 int i;
229 size_t optlen;
230
231 *valuep = NULL;
232 if (*s == '\0')
233 return (-1);
234 p = strchr(s, ','); /* find next option */
235 if (p == NULL) {
236 p = s + strlen(s);
237 } else {
238 *p++ = '\0'; /* mark end and point to next */
239 }
240 *optionsp = p; /* point to next option */
241 p = strchr(s, '='); /* find value */
242 if (p == NULL) {
243 optlen = strlen(s);
244 *valuep = NULL;
245 } else {
246 optlen = p - s;
247 *valuep = ++p;
248 }
249 for (i = 0; tokens[i] != NULL; i++) {
250 if ((optlen == strlen(tokens[i])) &&
251 (strncmp(s, tokens[i], optlen) == 0))
252 return (i);
253 }
254 /* no match, point value at option and return error */
255 *valuep = s;
256 return (-1);
257 }
258
259 /*
260 * Append the suboption string 'opt' starting at the position 'str'
261 * within the buffer defined by 'buf' and 'len'. If 'buf' is not null,
262 * a comma is appended first.
263 * Return a pointer to the end of the resulting string (the null byte).
264 * Return NULL if there isn't enough space left to append 'opt'.
265 */
266 char *
append_subopt(const char * buf,size_t len,char * str,const char * opt)267 append_subopt(const char *buf, size_t len, char *str, const char *opt)
268 {
269 size_t l = strlen(opt);
270
271 /*
272 * Include a ',' if this is not the first option.
273 * Include space for the null byte.
274 */
275 if (strlen(buf) + (buf[0] != '\0') + l + 1 > len)
276 return (NULL);
277
278 if (buf[0] != '\0')
279 *str++ = ',';
280 (void) strcpy(str, opt);
281 return (str + l);
282 }
283
284 /*
285 * Tables to convert a single byte to/from binary-coded decimal (BCD).
286 */
287 uchar_t byte_to_bcd[256] = {
288 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
289 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
290 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29,
291 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
292 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
293 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
294 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
295 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
296 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
297 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
298 };
299
300 uchar_t bcd_to_byte[256] = { /* CSTYLED */
301 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0,
302 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 0, 0, 0, 0, 0,
303 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 0, 0, 0, 0, 0, 0,
304 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 0, 0, 0, 0, 0, 0,
305 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 0, 0, 0, 0, 0, 0,
306 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 0, 0, 0, 0, 0, 0,
307 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 0, 0, 0, 0, 0,
308 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 0, 0, 0, 0, 0, 0,
309 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 0, 0, 0, 0, 0, 0,
310 90, 91, 92, 93, 94, 95, 96, 97, 98, 99,
311 };
312
313 /*
314 * Hot-patch a single instruction in the kernel's text.
315 *
316 * If you want to patch multiple instructions you must arrange to do it so that
317 * all intermediate stages are sane -- we don't stop other cpus while doing
318 * this.
319 *
320 * Size must be 1, 2, or 4 bytes with iaddr aligned accordingly.
321 *
322 * The instruction itself might straddle a page boundary, so we have to account
323 * for that.
324 */
325 void
hot_patch_kernel_text(caddr_t iaddr,uint32_t new_instr,uint_t size)326 hot_patch_kernel_text(caddr_t iaddr, uint32_t new_instr, uint_t size)
327 {
328 const uintptr_t pageoff = (uintptr_t)iaddr & PAGEOFFSET;
329 const boolean_t straddles = (pageoff + size > PAGESIZE);
330 const size_t mapsize = straddles ? PAGESIZE * 2 : PAGESIZE;
331 caddr_t ipageaddr = iaddr - pageoff;
332 caddr_t vaddr;
333 page_t **ppp;
334
335 vaddr = vmem_alloc(heap_arena, mapsize, VM_SLEEP);
336
337 (void) as_pagelock(&kas, &ppp, ipageaddr, mapsize, S_WRITE);
338
339 hat_devload(kas.a_hat, vaddr, PAGESIZE,
340 hat_getpfnum(kas.a_hat, ipageaddr), PROT_READ | PROT_WRITE,
341 HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST);
342
343 if (straddles) {
344 hat_devload(kas.a_hat, vaddr + PAGESIZE, PAGESIZE,
345 hat_getpfnum(kas.a_hat, ipageaddr + PAGESIZE),
346 PROT_READ | PROT_WRITE, HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST);
347 }
348
349 switch (size) {
350 case 1:
351 *(uint8_t *)(vaddr + pageoff) = new_instr;
352 break;
353 case 2:
354 *(uint16_t *)(vaddr + pageoff) = new_instr;
355 break;
356 case 4:
357 *(uint32_t *)(vaddr + pageoff) = new_instr;
358 break;
359 default:
360 panic("illegal hot-patch");
361 }
362
363 membar_enter();
364 sync_icache(vaddr + pageoff, size);
365 sync_icache(iaddr, size);
366 as_pageunlock(&kas, ppp, ipageaddr, mapsize, S_WRITE);
367 hat_unload(kas.a_hat, vaddr, mapsize, HAT_UNLOAD_UNLOCK);
368 vmem_free(heap_arena, vaddr, mapsize);
369 }
370
371 /*
372 * Routine to report an attempt to execute non-executable data. If the
373 * address executed lies in the stack, explicitly say so.
374 */
375 void
report_stack_exec(proc_t * p,caddr_t addr)376 report_stack_exec(proc_t *p, caddr_t addr)
377 {
378 if (!noexec_user_stack_log)
379 return;
380
381 if (addr < p->p_usrstack && addr >= (p->p_usrstack - p->p_stksize)) {
382 cmn_err(CE_NOTE, "%s[%d] attempt to execute code "
383 "on stack by uid %d", p->p_user.u_comm,
384 p->p_pid, crgetruid(p->p_cred));
385 } else {
386 cmn_err(CE_NOTE, "%s[%d] attempt to execute non-executable "
387 "data at 0x%p by uid %d", p->p_user.u_comm,
388 p->p_pid, (void *) addr, crgetruid(p->p_cred));
389 }
390
391 delay(hz / 50);
392 }
393
394 /*
395 * Determine whether the address range [addr, addr + len) is in memlist mp.
396 */
397 int
address_in_memlist(struct memlist * mp,uint64_t addr,size_t len)398 address_in_memlist(struct memlist *mp, uint64_t addr, size_t len)
399 {
400 while (mp != 0) {
401 if ((addr >= mp->ml_address) &&
402 (addr + len <= mp->ml_address + mp->ml_size))
403 return (1); /* TRUE */
404 mp = mp->ml_next;
405 }
406 return (0); /* FALSE */
407 }
408
409 /*
410 * Pop the topmost element from the t_ontrap stack, removing the current set of
411 * on_trap() protections. Refer to <sys/ontrap.h> for more info. If the
412 * stack is already empty, no_trap() just returns.
413 */
414 void
no_trap(void)415 no_trap(void)
416 {
417 if (curthread->t_ontrap != NULL) {
418 #ifdef __sparc
419 membar_sync(); /* deferred error barrier (see sparcv9_subr.s) */
420 #endif
421 curthread->t_ontrap = curthread->t_ontrap->ot_prev;
422 }
423 }
424
425 /*
426 * Return utsname.nodename outside a zone, or the zone name within.
427 */
428 char *
uts_nodename(void)429 uts_nodename(void)
430 {
431 if (curproc == NULL)
432 return (utsname.nodename);
433 return (curproc->p_zone->zone_nodename);
434 }
435