1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #include <sys/types.h> 33 #include <sys/sysmacros.h> 34 #include <sys/param.h> 35 #include <sys/vmparam.h> 36 #include <sys/systm.h> 37 #include <sys/cred.h> 38 #include <sys/user.h> 39 #include <sys/proc.h> 40 #include <sys/conf.h> 41 #include <sys/tuneable.h> 42 #include <sys/cpuvar.h> 43 #include <sys/archsystm.h> 44 #include <sys/vmem.h> 45 #include <vm/seg_kmem.h> 46 #include <sys/errno.h> 47 #include <sys/cmn_err.h> 48 #include <sys/debug.h> 49 #include <sys/atomic.h> 50 #include <sys/model.h> 51 #include <sys/kmem.h> 52 #include <sys/memlist.h> 53 #include <sys/autoconf.h> 54 #include <sys/ontrap.h> 55 #include <sys/utsname.h> 56 #include <sys/zone.h> 57 58 #ifdef __sparc 59 #include <sys/membar.h> 60 #endif 61 62 /* 63 * Routine which sets a user error; placed in 64 * illegal entries in the bdevsw and cdevsw tables. 65 */ 66 67 int 68 nodev() 69 { 70 return (curthread->t_lwp ? 71 ttolwp(curthread)->lwp_error = ENXIO : ENXIO); 72 } 73 74 /* 75 * Null routine; placed in insignificant entries 76 * in the bdevsw and cdevsw tables. 77 */ 78 79 int 80 nulldev() 81 { 82 return (0); 83 } 84 85 static kmutex_t udevlock; 86 87 /* 88 * Generate an unused major device number. 89 */ 90 major_t 91 getudev() 92 { 93 static major_t next = 0; 94 major_t ret; 95 96 /* 97 * Ensure that we start allocating major numbers above the 'devcnt' 98 * count. The only limit we place on the number is that it should be a 99 * legal 32-bit SVR4 major number and be greater than or equal to devcnt 100 * in the current system). 101 */ 102 mutex_enter(&udevlock); 103 if (next == 0) 104 next = devcnt; 105 if (next <= L_MAXMAJ32 && next >= devcnt) 106 ret = next++; 107 else { 108 /* 109 * If we fail to allocate a major number because devcnt has 110 * reached L_MAXMAJ32, we may be the victim of a sparsely 111 * populated devnames array. We scan the array backwards 112 * looking for an empty slot; if we find one, mark it as 113 * DN_GETUDEV so it doesn't get taken by subsequent consumers 114 * users of the devnames array, and issue a warning. 115 * It is vital for this routine to take drastic measures to 116 * succeed, since the kernel really needs it to boot. 117 */ 118 int i; 119 for (i = devcnt - 1; i >= 0; i--) { 120 LOCK_DEV_OPS(&devnamesp[i].dn_lock); 121 if (devnamesp[i].dn_name == NULL && 122 ((devnamesp[i].dn_flags & DN_TAKEN_GETUDEV) == 0)) 123 break; 124 UNLOCK_DEV_OPS(&devnamesp[i].dn_lock); 125 } 126 if (i != -1) { 127 cmn_err(CE_WARN, "Reusing device major number %d.", i); 128 ASSERT(i >= 0 && i < devcnt); 129 devnamesp[i].dn_flags |= DN_TAKEN_GETUDEV; 130 UNLOCK_DEV_OPS(&devnamesp[i].dn_lock); 131 ret = (major_t)i; 132 } else { 133 ret = DDI_MAJOR_T_NONE; 134 } 135 } 136 mutex_exit(&udevlock); 137 return (ret); 138 } 139 140 141 /* 142 * Compress 'long' device number encoding to 32-bit device number 143 * encoding. If it won't fit, we return failure, but set the 144 * device number to 32-bit NODEV for the sake of our callers. 145 */ 146 int 147 cmpldev(dev32_t *dst, dev_t dev) 148 { 149 #if defined(_LP64) 150 if (dev == NODEV) { 151 *dst = NODEV32; 152 } else { 153 major_t major = dev >> L_BITSMINOR; 154 minor_t minor = dev & L_MAXMIN; 155 156 if (major > L_MAXMAJ32 || minor > L_MAXMIN32) { 157 *dst = NODEV32; 158 return (0); 159 } 160 161 *dst = (dev32_t)((major << L_BITSMINOR32) | minor); 162 } 163 #else 164 *dst = (dev32_t)dev; 165 #endif 166 return (1); 167 } 168 169 /* 170 * Expand 32-bit dev_t's to long dev_t's. Expansion always "fits" 171 * into the return type, but we're careful to expand NODEV explicitly. 172 */ 173 dev_t 174 expldev(dev32_t dev32) 175 { 176 #ifdef _LP64 177 if (dev32 == NODEV32) 178 return (NODEV); 179 return (makedevice((dev32 >> L_BITSMINOR32) & L_MAXMAJ32, 180 dev32 & L_MAXMIN32)); 181 #else 182 return ((dev_t)dev32); 183 #endif 184 } 185 186 #ifndef _LP64 187 /* 188 * Keep these entry points for 32-bit systems but enforce the use 189 * of MIN/MAX macros on 64-bit systems. The DDI header files already 190 * define min/max as macros so drivers shouldn't need these functions. 191 */ 192 193 int 194 min(int a, int b) 195 { 196 return (a < b ? a : b); 197 } 198 199 int 200 max(int a, int b) 201 { 202 return (a > b ? a : b); 203 } 204 205 uint_t 206 umin(uint_t a, uint_t b) 207 { 208 return (a < b ? a : b); 209 } 210 211 uint_t 212 umax(uint_t a, uint_t b) 213 { 214 return (a > b ? a : b); 215 } 216 217 #endif /* !_LP64 */ 218 219 /* 220 * Parse suboptions from a string. 221 * Same as getsubopt(3C). 222 */ 223 int 224 getsubopt(char **optionsp, char * const *tokens, char **valuep) 225 { 226 char *s = *optionsp, *p; 227 int i; 228 size_t optlen; 229 230 *valuep = NULL; 231 if (*s == '\0') 232 return (-1); 233 p = strchr(s, ','); /* find next option */ 234 if (p == NULL) { 235 p = s + strlen(s); 236 } else { 237 *p++ = '\0'; /* mark end and point to next */ 238 } 239 *optionsp = p; /* point to next option */ 240 p = strchr(s, '='); /* find value */ 241 if (p == NULL) { 242 optlen = strlen(s); 243 *valuep = NULL; 244 } else { 245 optlen = p - s; 246 *valuep = ++p; 247 } 248 for (i = 0; tokens[i] != NULL; i++) { 249 if ((optlen == strlen(tokens[i])) && 250 (strncmp(s, tokens[i], optlen) == 0)) 251 return (i); 252 } 253 /* no match, point value at option and return error */ 254 *valuep = s; 255 return (-1); 256 } 257 258 /* 259 * Append the suboption string 'opt' starting at the position 'str' 260 * within the buffer defined by 'buf' and 'len'. If 'buf' is not null, 261 * a comma is appended first. 262 * Return a pointer to the end of the resulting string (the null byte). 263 * Return NULL if there isn't enough space left to append 'opt'. 264 */ 265 char * 266 append_subopt(const char *buf, size_t len, char *str, const char *opt) 267 { 268 size_t l = strlen(opt); 269 270 /* 271 * Include a ',' if this is not the first option. 272 * Include space for the null byte. 273 */ 274 if (strlen(buf) + (buf[0] != '\0') + l + 1 > len) 275 return (NULL); 276 277 if (buf[0] != '\0') 278 *str++ = ','; 279 (void) strcpy(str, opt); 280 return (str + l); 281 } 282 283 /* 284 * Tables to convert a single byte to/from binary-coded decimal (BCD). 285 */ 286 uchar_t byte_to_bcd[256] = { 287 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 288 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 289 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 290 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 291 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 292 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 293 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 294 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 295 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 296 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 297 }; 298 299 uchar_t bcd_to_byte[256] = { /* CSTYLED */ 300 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 301 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 0, 0, 0, 0, 0, 302 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 0, 0, 0, 0, 0, 0, 303 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 0, 0, 0, 0, 0, 0, 304 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 0, 0, 0, 0, 0, 0, 305 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 0, 0, 0, 0, 0, 0, 306 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 0, 0, 0, 0, 0, 307 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 0, 0, 0, 0, 0, 0, 308 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 0, 0, 0, 0, 0, 0, 309 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 310 }; 311 312 /* 313 * Hot-patch a single instruction in the kernel's text. 314 * If you want to patch multiple instructions you must 315 * arrange to do it so that all intermediate stages are 316 * sane -- we don't stop other cpus while doing this. 317 * Size must be 1, 2, or 4 bytes with iaddr aligned accordingly. 318 */ 319 void 320 hot_patch_kernel_text(caddr_t iaddr, uint32_t new_instr, uint_t size) 321 { 322 caddr_t vaddr; 323 page_t **ppp; 324 uintptr_t off = (uintptr_t)iaddr & PAGEOFFSET; 325 326 vaddr = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 327 328 (void) as_pagelock(&kas, &ppp, iaddr - off, PAGESIZE, S_WRITE); 329 330 hat_devload(kas.a_hat, vaddr, PAGESIZE, 331 hat_getpfnum(kas.a_hat, iaddr - off), 332 PROT_READ | PROT_WRITE, HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST); 333 334 switch (size) { 335 case 1: 336 *(uint8_t *)(vaddr + off) = new_instr; 337 break; 338 case 2: 339 *(uint16_t *)(vaddr + off) = new_instr; 340 break; 341 case 4: 342 *(uint32_t *)(vaddr + off) = new_instr; 343 break; 344 default: 345 panic("illegal hot-patch"); 346 } 347 348 membar_enter(); 349 sync_icache(vaddr + off, size); 350 sync_icache(iaddr, size); 351 as_pageunlock(&kas, ppp, iaddr - off, PAGESIZE, S_WRITE); 352 hat_unload(kas.a_hat, vaddr, PAGESIZE, HAT_UNLOAD_UNLOCK); 353 vmem_free(heap_arena, vaddr, PAGESIZE); 354 } 355 356 /* 357 * Routine to report an attempt to execute non-executable data. If the 358 * address executed lies in the stack, explicitly say so. 359 */ 360 void 361 report_stack_exec(proc_t *p, caddr_t addr) 362 { 363 if (!noexec_user_stack_log) 364 return; 365 366 if (addr < p->p_usrstack && addr >= (p->p_usrstack - p->p_stksize)) { 367 cmn_err(CE_NOTE, "%s[%d] attempt to execute code " 368 "on stack by uid %d", p->p_user.u_comm, 369 p->p_pid, crgetruid(p->p_cred)); 370 } else { 371 cmn_err(CE_NOTE, "%s[%d] attempt to execute non-executable " 372 "data at 0x%p by uid %d", p->p_user.u_comm, 373 p->p_pid, (void *) addr, crgetruid(p->p_cred)); 374 } 375 376 delay(hz / 50); 377 } 378 379 /* 380 * Determine whether the address range [addr, addr + len) is in memlist mp. 381 */ 382 int 383 address_in_memlist(struct memlist *mp, uint64_t addr, size_t len) 384 { 385 while (mp != 0) { 386 if ((addr >= mp->address) && 387 (addr + len <= mp->address + mp->size)) 388 return (1); /* TRUE */ 389 mp = mp->next; 390 } 391 return (0); /* FALSE */ 392 } 393 394 /* 395 * Pop the topmost element from the t_ontrap stack, removing the current set of 396 * on_trap() protections. Refer to <sys/ontrap.h> for more info. If the 397 * stack is already empty, no_trap() just returns. 398 */ 399 void 400 no_trap(void) 401 { 402 if (curthread->t_ontrap != NULL) { 403 #ifdef __sparc 404 membar_sync(); /* deferred error barrier (see sparcv9_subr.s) */ 405 #endif 406 curthread->t_ontrap = curthread->t_ontrap->ot_prev; 407 } 408 } 409 410 /* 411 * Return utsname.nodename outside a zone, or the zone name within. 412 */ 413 char * 414 uts_nodename(void) 415 { 416 if (curproc == NULL) 417 return (utsname.nodename); 418 return (curproc->p_zone->zone_nodename); 419 } 420