1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 #include <sys/types.h> 34 #include <sys/sysmacros.h> 35 #include <sys/param.h> 36 #include <sys/vmparam.h> 37 #include <sys/systm.h> 38 #include <sys/cred.h> 39 #include <sys/user.h> 40 #include <sys/proc.h> 41 #include <sys/conf.h> 42 #include <sys/tuneable.h> 43 #include <sys/cpuvar.h> 44 #include <sys/archsystm.h> 45 #include <sys/vmem.h> 46 #include <vm/seg_kmem.h> 47 #include <sys/errno.h> 48 #include <sys/cmn_err.h> 49 #include <sys/debug.h> 50 #include <sys/atomic.h> 51 #include <sys/model.h> 52 #include <sys/kmem.h> 53 #include <sys/memlist.h> 54 #include <sys/autoconf.h> 55 #include <sys/ontrap.h> 56 #include <sys/utsname.h> 57 #include <sys/zone.h> 58 59 #ifdef __sparc 60 #include <sys/membar.h> 61 #endif 62 63 /* 64 * Routine which sets a user error; placed in 65 * illegal entries in the bdevsw and cdevsw tables. 66 */ 67 68 int 69 nodev() 70 { 71 return (curthread->t_lwp ? 72 ttolwp(curthread)->lwp_error = ENXIO : ENXIO); 73 } 74 75 /* 76 * Null routine; placed in insignificant entries 77 * in the bdevsw and cdevsw tables. 78 */ 79 80 int 81 nulldev() 82 { 83 return (0); 84 } 85 86 static kmutex_t udevlock; 87 88 /* 89 * Generate an unused major device number. 90 */ 91 major_t 92 getudev() 93 { 94 static major_t next = 0; 95 major_t ret; 96 97 /* 98 * Ensure that we start allocating major numbers above the 'devcnt' 99 * count. The only limit we place on the number is that it should be a 100 * legal 32-bit SVR4 major number and be greater than or equal to devcnt 101 * in the current system). 102 */ 103 mutex_enter(&udevlock); 104 if (next == 0) 105 next = devcnt; 106 if (next <= L_MAXMAJ32 && next >= devcnt) 107 ret = next++; 108 else { 109 /* 110 * If we fail to allocate a major number because devcnt has 111 * reached L_MAXMAJ32, we may be the victim of a sparsely 112 * populated devnames array. We scan the array backwards 113 * looking for an empty slot; if we find one, mark it as 114 * DN_GETUDEV so it doesn't get taken by subsequent consumers 115 * users of the devnames array, and issue a warning. 116 * It is vital for this routine to take drastic measures to 117 * succeed, since the kernel really needs it to boot. 118 */ 119 int i; 120 for (i = devcnt - 1; i >= 0; i--) { 121 LOCK_DEV_OPS(&devnamesp[i].dn_lock); 122 if (devnamesp[i].dn_name == NULL && 123 ((devnamesp[i].dn_flags & DN_TAKEN_GETUDEV) == 0)) 124 break; 125 UNLOCK_DEV_OPS(&devnamesp[i].dn_lock); 126 } 127 if (i != -1) { 128 cmn_err(CE_WARN, "Reusing device major number %d.", i); 129 ASSERT(i >= 0 && i < devcnt); 130 devnamesp[i].dn_flags |= DN_TAKEN_GETUDEV; 131 UNLOCK_DEV_OPS(&devnamesp[i].dn_lock); 132 ret = (major_t)i; 133 } else { 134 ret = (major_t)-1; 135 } 136 } 137 mutex_exit(&udevlock); 138 return (ret); 139 } 140 141 142 /* 143 * Compress 'long' device number encoding to 32-bit device number 144 * encoding. If it won't fit, we return failure, but set the 145 * device number to 32-bit NODEV for the sake of our callers. 146 */ 147 int 148 cmpldev(dev32_t *dst, dev_t dev) 149 { 150 #if defined(_LP64) 151 if (dev == NODEV) { 152 *dst = NODEV32; 153 } else { 154 major_t major = dev >> L_BITSMINOR; 155 minor_t minor = dev & L_MAXMIN; 156 157 if (major > L_MAXMAJ32 || minor > L_MAXMIN32) { 158 *dst = NODEV32; 159 return (0); 160 } 161 162 *dst = (dev32_t)((major << L_BITSMINOR32) | minor); 163 } 164 #else 165 *dst = (dev32_t)dev; 166 #endif 167 return (1); 168 } 169 170 /* 171 * Expand 32-bit dev_t's to long dev_t's. Expansion always "fits" 172 * into the return type, but we're careful to expand NODEV explicitly. 173 */ 174 dev_t 175 expldev(dev32_t dev32) 176 { 177 #ifdef _LP64 178 if (dev32 == NODEV32) 179 return (NODEV); 180 return (makedevice((dev32 >> L_BITSMINOR32) & L_MAXMAJ32, 181 dev32 & L_MAXMIN32)); 182 #else 183 return ((dev_t)dev32); 184 #endif 185 } 186 187 #ifndef _LP64 188 /* 189 * Keep these entry points for 32-bit systems but enforce the use 190 * of MIN/MAX macros on 64-bit systems. The DDI header files already 191 * define min/max as macros so drivers shouldn't need these functions. 192 */ 193 194 int 195 min(int a, int b) 196 { 197 return (a < b ? a : b); 198 } 199 200 int 201 max(int a, int b) 202 { 203 return (a > b ? a : b); 204 } 205 206 uint_t 207 umin(uint_t a, uint_t b) 208 { 209 return (a < b ? a : b); 210 } 211 212 uint_t 213 umax(uint_t a, uint_t b) 214 { 215 return (a > b ? a : b); 216 } 217 218 #endif /* !_LP64 */ 219 220 /* 221 * Return bit position of least significant bit set in mask, 222 * starting numbering from 1. 223 */ 224 int 225 ffs(long mask) 226 { 227 int i; 228 229 if (mask == 0) 230 return (0); 231 for (i = 1; i <= NBBY * sizeof (mask); i++) { 232 if (mask & 1) 233 return (i); 234 mask >>= 1; 235 } 236 return (0); 237 } 238 239 /* 240 * Parse suboptions from a string. 241 * Same as getsubopt(3C). 242 */ 243 int 244 getsubopt(char **optionsp, char * const *tokens, char **valuep) 245 { 246 char *s = *optionsp, *p; 247 int i; 248 size_t optlen; 249 250 *valuep = NULL; 251 if (*s == '\0') 252 return (-1); 253 p = strchr(s, ','); /* find next option */ 254 if (p == NULL) { 255 p = s + strlen(s); 256 } else { 257 *p++ = '\0'; /* mark end and point to next */ 258 } 259 *optionsp = p; /* point to next option */ 260 p = strchr(s, '='); /* find value */ 261 if (p == NULL) { 262 optlen = strlen(s); 263 *valuep = NULL; 264 } else { 265 optlen = p - s; 266 *valuep = ++p; 267 } 268 for (i = 0; tokens[i] != NULL; i++) { 269 if ((optlen == strlen(tokens[i])) && 270 (strncmp(s, tokens[i], optlen) == 0)) 271 return (i); 272 } 273 /* no match, point value at option and return error */ 274 *valuep = s; 275 return (-1); 276 } 277 278 /* 279 * Append the suboption string 'opt' starting at the position 'str' 280 * within the buffer defined by 'buf' and 'len'. If 'buf' is not null, 281 * a comma is appended first. 282 * Return a pointer to the end of the resulting string (the null byte). 283 * Return NULL if there isn't enough space left to append 'opt'. 284 */ 285 char * 286 append_subopt(const char *buf, size_t len, char *str, const char *opt) 287 { 288 size_t l = strlen(opt); 289 290 /* 291 * Include a ',' if this is not the first option. 292 * Include space for the null byte. 293 */ 294 if (strlen(buf) + (buf[0] != '\0') + l + 1 > len) 295 return (NULL); 296 297 if (buf[0] != '\0') 298 *str++ = ','; 299 (void) strcpy(str, opt); 300 return (str + l); 301 } 302 303 /* 304 * Tables to convert a single byte to/from binary-coded decimal (BCD). 305 */ 306 uchar_t byte_to_bcd[256] = { 307 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 308 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 309 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 310 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 311 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 312 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 313 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 314 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 315 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 316 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 317 }; 318 319 uchar_t bcd_to_byte[256] = { /* CSTYLED */ 320 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 321 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 0, 0, 0, 0, 0, 322 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 0, 0, 0, 0, 0, 0, 323 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 0, 0, 0, 0, 0, 0, 324 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 0, 0, 0, 0, 0, 0, 325 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 0, 0, 0, 0, 0, 0, 326 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 0, 0, 0, 0, 0, 327 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 0, 0, 0, 0, 0, 0, 328 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 0, 0, 0, 0, 0, 0, 329 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 330 }; 331 332 /* 333 * Hot-patch a single instruction in the kernel's text. 334 * If you want to patch multiple instructions you must 335 * arrange to do it so that all intermediate stages are 336 * sane -- we don't stop other cpus while doing this. 337 * Size must be 1, 2, or 4 bytes with iaddr aligned accordingly. 338 */ 339 void 340 hot_patch_kernel_text(caddr_t iaddr, uint32_t new_instr, uint_t size) 341 { 342 caddr_t vaddr; 343 page_t **ppp; 344 uintptr_t off = (uintptr_t)iaddr & PAGEOFFSET; 345 346 vaddr = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 347 348 (void) as_pagelock(&kas, &ppp, iaddr - off, PAGESIZE, S_WRITE); 349 350 hat_devload(kas.a_hat, vaddr, PAGESIZE, 351 hat_getpfnum(kas.a_hat, iaddr - off), 352 PROT_READ | PROT_WRITE, HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST); 353 354 switch (size) { 355 case 1: 356 *(uint8_t *)(vaddr + off) = new_instr; 357 break; 358 case 2: 359 *(uint16_t *)(vaddr + off) = new_instr; 360 break; 361 case 4: 362 *(uint32_t *)(vaddr + off) = new_instr; 363 break; 364 default: 365 panic("illegal hot-patch"); 366 } 367 368 membar_enter(); 369 sync_icache(vaddr + off, size); 370 sync_icache(iaddr, size); 371 as_pageunlock(&kas, ppp, iaddr - off, PAGESIZE, S_WRITE); 372 hat_unload(kas.a_hat, vaddr, PAGESIZE, HAT_UNLOAD_UNLOCK); 373 vmem_free(heap_arena, vaddr, PAGESIZE); 374 } 375 376 /* 377 * Routine to report an attempt to execute non-executable data. If the 378 * address executed lies in the stack, explicitly say so. 379 */ 380 void 381 report_stack_exec(proc_t *p, caddr_t addr) 382 { 383 if (!noexec_user_stack_log) 384 return; 385 386 if (addr < p->p_usrstack && addr >= (p->p_usrstack - p->p_stksize)) { 387 cmn_err(CE_NOTE, "%s[%d] attempt to execute code " 388 "on stack by uid %d", p->p_user.u_comm, 389 p->p_pid, crgetruid(p->p_cred)); 390 } else { 391 cmn_err(CE_NOTE, "%s[%d] attempt to execute non-executable " 392 "data at 0x%p by uid %d", p->p_user.u_comm, 393 p->p_pid, (void *) addr, crgetruid(p->p_cred)); 394 } 395 396 delay(hz / 50); 397 } 398 399 /* 400 * Determine whether the address range [addr, addr + len) is in memlist mp. 401 */ 402 int 403 address_in_memlist(struct memlist *mp, uint64_t addr, size_t len) 404 { 405 while (mp != 0) { 406 if ((addr >= mp->address) && 407 (addr + len <= mp->address + mp->size)) 408 return (1); /* TRUE */ 409 mp = mp->next; 410 } 411 return (0); /* FALSE */ 412 } 413 414 /* 415 * Pop the topmost element from the t_ontrap stack, removing the current set of 416 * on_trap() protections. Refer to <sys/ontrap.h> for more info. If the 417 * stack is already empty, no_trap() just returns. 418 */ 419 void 420 no_trap(void) 421 { 422 if (curthread->t_ontrap != NULL) { 423 #ifdef __sparc 424 membar_sync(); /* deferred error barrier (see sparcv9_subr.s) */ 425 #endif 426 curthread->t_ontrap = curthread->t_ontrap->ot_prev; 427 } 428 } 429 430 /* 431 * Return utsname.nodename outside a zone, or the zone name within. 432 */ 433 char * 434 uts_nodename(void) 435 { 436 if (curproc == NULL) 437 return (utsname.nodename); 438 return (curproc->p_zone->zone_nodename); 439 } 440