1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/param.h> 31 #include <sys/sysmacros.h> 32 #include <sys/signal.h> 33 #include <sys/systm.h> 34 #include <sys/user.h> 35 #include <sys/mman.h> 36 #include <sys/class.h> 37 #include <sys/proc.h> 38 #include <sys/procfs.h> 39 #include <sys/kmem.h> 40 #include <sys/cred.h> 41 #include <sys/archsystm.h> 42 #include <sys/machsystm.h> 43 44 #include <sys/reboot.h> 45 #include <sys/uadmin.h> 46 47 #include <sys/vfs.h> 48 #include <sys/vnode.h> 49 #include <sys/session.h> 50 #include <sys/ucontext.h> 51 52 #include <sys/dnlc.h> 53 #include <sys/var.h> 54 #include <sys/cmn_err.h> 55 #include <sys/debug.h> 56 #include <sys/thread.h> 57 #include <sys/vtrace.h> 58 #include <sys/consdev.h> 59 #include <sys/frame.h> 60 #include <sys/stack.h> 61 #include <sys/swap.h> 62 #include <sys/vmparam.h> 63 #include <sys/cpuvar.h> 64 65 #include <sys/privregs.h> 66 67 #include <vm/hat.h> 68 #include <vm/anon.h> 69 #include <vm/as.h> 70 #include <vm/page.h> 71 #include <vm/seg.h> 72 #include <vm/seg_kmem.h> 73 #include <vm/seg_map.h> 74 #include <vm/seg_vn.h> 75 76 #include <sys/exec.h> 77 #include <sys/acct.h> 78 #include <sys/modctl.h> 79 #include <sys/tuneable.h> 80 81 #include <c2/audit.h> 82 83 #include <sys/trap.h> 84 #include <sys/sunddi.h> 85 #include <sys/bootconf.h> 86 #include <sys/memlist.h> 87 #include <sys/memlist_plat.h> 88 #include <sys/systeminfo.h> 89 #include <sys/promif.h> 90 91 u_longlong_t spec_hole_start = 0x80000000000ull; 92 u_longlong_t spec_hole_end = 0xfffff80000000000ull; 93 94 /* 95 * Count the number of available pages and the number of 96 * chunks in the list of available memory. 97 */ 98 void 99 size_physavail( 100 u_longlong_t *physavail, 101 size_t nelems, 102 pgcnt_t *npages, 103 int *memblocks) 104 { 105 size_t i; 106 107 *npages = 0; 108 *memblocks = 0; 109 for (i = 0; i < nelems; i += 2) { 110 *npages += (pgcnt_t)(physavail[i+1] >> PAGESHIFT); 111 (*memblocks)++; 112 } 113 } 114 115 pgcnt_t 116 size_virtalloc(u_longlong_t *avail, size_t nelems) 117 { 118 119 u_longlong_t start, end; 120 pgcnt_t allocpages = 0; 121 uint_t hole_allocated = 0; 122 uint_t i; 123 124 for (i = 0; i < (nelems - 2); i += 2) { 125 126 start = avail[i] + avail[i + 1]; 127 end = avail[i + 2]; 128 129 /* 130 * Notes: 131 * 132 * (1) OBP on platforms with US I/II pre-allocates the hole 133 * represented by [spec_hole_start, spec_hole_end); 134 * pre-allocation is done to make this range unavailable 135 * for any allocation. 136 * 137 * (2) OBP on starcat always pre-allocates the hole similar to 138 * platforms with US I/II. 139 * 140 * (3) OBP on serengeti does _not_ pre-allocate the hole. 141 * 142 * (4) OBP ignores Spitfire Errata #21; i.e. it does _not_ 143 * fill up or pre-allocate an additional 4GB on both sides 144 * of the hole. 145 * 146 * (5) kernel virtual range [spec_hole_start, spec_hole_end) 147 * is _not_ used on any platform including those with 148 * UltraSPARC III where there is no hole. 149 * 150 * Algorithm: 151 * 152 * Check if range [spec_hole_start, spec_hole_end) is 153 * pre-allocated by OBP; if so, subtract that range from 154 * allocpages. 155 */ 156 if (end >= spec_hole_end && start <= spec_hole_start) 157 hole_allocated = 1; 158 159 allocpages += btopr(end - start); 160 } 161 162 if (hole_allocated) 163 allocpages -= btop(spec_hole_end - spec_hole_start); 164 165 return (allocpages); 166 } 167 168 /* 169 * Returns the max contiguous physical memory present in the 170 * memlist "physavail". 171 */ 172 uint64_t 173 get_max_phys_size( 174 struct memlist *physavail) 175 { 176 uint64_t max_size = 0; 177 178 for (; physavail; physavail = physavail->next) { 179 if (physavail->size > max_size) 180 max_size = physavail->size; 181 } 182 183 return (max_size); 184 } 185 186 187 /* 188 * Copy boot's physavail list deducting memory at "start" 189 * for "size" bytes. 190 */ 191 int 192 copy_physavail( 193 u_longlong_t *src, 194 size_t nelems, 195 struct memlist **dstp, 196 uint_t start, 197 uint_t size) 198 { 199 struct memlist *dst, *prev; 200 uint_t end1; 201 int deducted = 0; 202 size_t i; 203 204 dst = *dstp; 205 prev = dst; 206 end1 = start + size; 207 208 for (i = 0; i < nelems; i += 2) { 209 uint64_t addr, lsize, end2; 210 211 addr = src[i]; 212 lsize = src[i+1]; 213 end2 = addr + lsize; 214 215 if ((size != 0) && start >= addr && end1 <= end2) { 216 /* deducted range in this chunk */ 217 deducted = 1; 218 if (start == addr) { 219 /* abuts start of chunk */ 220 if (end1 == end2) 221 /* is equal to the chunk */ 222 continue; 223 dst->address = end1; 224 dst->size = lsize - size; 225 } else if (end1 == end2) { 226 /* abuts end of chunk */ 227 dst->address = addr; 228 dst->size = lsize - size; 229 } else { 230 /* in the middle of the chunk */ 231 dst->address = addr; 232 dst->size = start - addr; 233 dst->next = 0; 234 if (prev == dst) { 235 dst->prev = 0; 236 dst++; 237 } else { 238 dst->prev = prev; 239 prev->next = dst; 240 dst++; 241 prev++; 242 } 243 dst->address = end1; 244 dst->size = end2 - end1; 245 } 246 dst->next = 0; 247 if (prev == dst) { 248 dst->prev = 0; 249 dst++; 250 } else { 251 dst->prev = prev; 252 prev->next = dst; 253 dst++; 254 prev++; 255 } 256 } else { 257 dst->address = src[i]; 258 dst->size = src[i+1]; 259 dst->next = 0; 260 if (prev == dst) { 261 dst->prev = 0; 262 dst++; 263 } else { 264 dst->prev = prev; 265 prev->next = dst; 266 dst++; 267 prev++; 268 } 269 } 270 } 271 272 *dstp = dst; 273 return (deducted); 274 } 275 276 struct vnode prom_ppages; 277 278 /* 279 * Find the pages allocated by the prom by diffing the original 280 * phys_avail list and the current list. In the difference, the 281 * pages not locked belong to the PROM. (The kernel has already locked 282 * and removed all the pages it has allocated from the freelist, this 283 * routine removes the remaining "free" pages that really belong to the 284 * PROM and hashs them in on the 'prom_pages' vnode.) 285 */ 286 void 287 fix_prom_pages(struct memlist *orig, struct memlist *new) 288 { 289 struct memlist *list, *nlist; 290 extern int kcage_on; 291 292 nlist = new; 293 for (list = orig; list; list = list->next) { 294 uint64_t pa, end; 295 pfn_t pfnum; 296 page_t *pp; 297 298 if (list->address == nlist->address && 299 list->size == nlist->size) { 300 nlist = nlist->next ? nlist->next : nlist; 301 continue; 302 } 303 304 /* 305 * Loop through the old list looking to 306 * see if each page is still in the new one. 307 * If a page is not in the new list then we 308 * check to see if it locked permanently. 309 * If so, the kernel allocated and owns it. 310 * If not, then the prom must own it. We 311 * remove any pages found to owned by the prom 312 * from the freelist. 313 */ 314 end = list->address + list->size; 315 for (pa = list->address; pa < end; pa += PAGESIZE) { 316 317 if (address_in_memlist(new, pa, PAGESIZE)) 318 continue; 319 320 pfnum = (pfn_t)(pa >> PAGESHIFT); 321 if ((pp = page_numtopp_nolock(pfnum)) == NULL) 322 cmn_err(CE_PANIC, "missing pfnum %lx", pfnum); 323 324 /* 325 * must break up any large pages that may have 326 * constituent pages being utilized for 327 * BOP_ALLOC()'s. page_reclaim() can't handle 328 * large pages. 329 */ 330 if (pp->p_szc != 0) 331 page_boot_demote(pp); 332 333 if (!PAGE_LOCKED(pp) && pp->p_lckcnt == 0) { 334 /* 335 * Ahhh yes, a prom page, 336 * suck it off the freelist, 337 * lock it, and hashin on prom_pages vp. 338 */ 339 if (page_trylock(pp, SE_EXCL) == 0) 340 cmn_err(CE_PANIC, "prom page locked"); 341 342 (void) page_reclaim(pp, NULL); 343 /* 344 * XXX vnode offsets on the prom_ppages vnode 345 * are page numbers (gack) for >32 bit 346 * physical memory machines. 347 */ 348 (void) page_hashin(pp, &prom_ppages, 349 (offset_t)pfnum, NULL); 350 351 if (kcage_on) { 352 ASSERT(pp->p_szc == 0); 353 PP_SETNORELOC(pp); 354 } 355 (void) page_pp_lock(pp, 0, 1); 356 page_downgrade(pp); 357 } 358 } 359 nlist = nlist->next ? nlist->next : nlist; 360 } 361 } 362 363 /* 364 * Find the page number of the highest installed physical 365 * page and the number of pages installed (one cannot be 366 * calculated from the other because memory isn't necessarily 367 * contiguous). 368 */ 369 void 370 installed_top_size_memlist_array( 371 u_longlong_t *list, /* base of array */ 372 size_t nelems, /* number of elements */ 373 pfn_t *topp, /* return ptr for top value */ 374 pgcnt_t *sumpagesp) /* return prt for sum of installed pages */ 375 { 376 pfn_t top = 0; 377 pgcnt_t sumpages = 0; 378 pfn_t highp; /* high page in a chunk */ 379 size_t i; 380 381 for (i = 0; i < nelems; i += 2) { 382 highp = (list[i] + list[i+1] - 1) >> PAGESHIFT; 383 if (top < highp) 384 top = highp; 385 sumpages += (list[i+1] >> PAGESHIFT); 386 } 387 388 *topp = top; 389 *sumpagesp = sumpages; 390 } 391 392 /* 393 * Copy a memory list. Used in startup() to copy boot's 394 * memory lists to the kernel. 395 */ 396 void 397 copy_memlist( 398 u_longlong_t *src, 399 size_t nelems, 400 struct memlist **dstp) 401 { 402 struct memlist *dst, *prev; 403 size_t i; 404 405 dst = *dstp; 406 prev = dst; 407 408 for (i = 0; i < nelems; i += 2) { 409 dst->address = src[i]; 410 dst->size = src[i+1]; 411 dst->next = 0; 412 if (prev == dst) { 413 dst->prev = 0; 414 dst++; 415 } else { 416 dst->prev = prev; 417 prev->next = dst; 418 dst++; 419 prev++; 420 } 421 } 422 423 *dstp = dst; 424 } 425 426 static struct bootmem_props { 427 char *name; 428 u_longlong_t *ptr; 429 size_t nelems; /* actual number of elements */ 430 size_t bufsize; /* length of allocated buffer */ 431 } bootmem_props[] = { 432 { "phys-installed", NULL, 0, 0 }, 433 { "phys-avail", NULL, 0, 0 }, 434 { "virt-avail", NULL, 0, 0 }, 435 { NULL, NULL, 0, 0 } 436 }; 437 438 #define PHYSINSTALLED 0 439 #define PHYSAVAIL 1 440 #define VIRTAVAIL 2 441 442 void 443 copy_boot_memlists(u_longlong_t **physinstalled, size_t *physinstalled_len, 444 u_longlong_t **physavail, size_t *physavail_len, 445 u_longlong_t **virtavail, size_t *virtavail_len) 446 { 447 int align = BO_ALIGN_L3; 448 size_t len; 449 struct bootmem_props *tmp = bootmem_props; 450 451 tryagain: 452 for (tmp = bootmem_props; tmp->name != NULL; tmp++) { 453 len = BOP_GETPROPLEN(bootops, tmp->name); 454 if (len == 0) { 455 panic("cannot get length of \"%s\" property", 456 tmp->name); 457 } 458 tmp->nelems = len / sizeof (u_longlong_t); 459 len = roundup(len, PAGESIZE); 460 if (len <= tmp->bufsize) 461 continue; 462 /* need to allocate more */ 463 if (tmp->ptr) { 464 BOP_FREE(bootops, (caddr_t)tmp->ptr, tmp->bufsize); 465 tmp->ptr = NULL; 466 tmp->bufsize = 0; 467 } 468 tmp->bufsize = len; 469 tmp->ptr = (void *)BOP_ALLOC(bootops, 0, tmp->bufsize, align); 470 if (tmp->ptr == NULL) 471 panic("cannot allocate %lu bytes for \"%s\" property", 472 tmp->bufsize, tmp->name); 473 474 } 475 /* 476 * take the most current snapshot we can by calling mem-update 477 */ 478 if (BOP_GETPROPLEN(bootops, "memory-update") == 0) 479 (void) BOP_GETPROP(bootops, "memory-update", NULL); 480 481 /* did the sizes change? */ 482 for (tmp = bootmem_props; tmp->name != NULL; tmp++) { 483 len = BOP_GETPROPLEN(bootops, tmp->name); 484 tmp->nelems = len / sizeof (u_longlong_t); 485 len = roundup(len, PAGESIZE); 486 if (len > tmp->bufsize) { 487 /* ick. Free them all and try again */ 488 for (tmp = bootmem_props; tmp->name != NULL; tmp++) { 489 BOP_FREE(bootops, (caddr_t)tmp->ptr, 490 tmp->bufsize); 491 tmp->ptr = NULL; 492 tmp->bufsize = 0; 493 } 494 goto tryagain; 495 } 496 } 497 498 /* now we can retrieve the properties */ 499 for (tmp = bootmem_props; tmp->name != NULL; tmp++) { 500 if (BOP_GETPROP(bootops, tmp->name, tmp->ptr) == -1) { 501 panic("cannot retrieve \"%s\" property", 502 tmp->name); 503 } 504 } 505 *physinstalled = bootmem_props[PHYSINSTALLED].ptr; 506 *physinstalled_len = bootmem_props[PHYSINSTALLED].nelems; 507 508 *physavail = bootmem_props[PHYSAVAIL].ptr; 509 *physavail_len = bootmem_props[PHYSAVAIL].nelems; 510 511 *virtavail = bootmem_props[VIRTAVAIL].ptr; 512 *virtavail_len = bootmem_props[VIRTAVAIL].nelems; 513 } 514 515 516 /* 517 * Find the page number of the highest installed physical 518 * page and the number of pages installed (one cannot be 519 * calculated from the other because memory isn't necessarily 520 * contiguous). 521 */ 522 void 523 installed_top_size( 524 struct memlist *list, /* pointer to start of installed list */ 525 pfn_t *topp, /* return ptr for top value */ 526 pgcnt_t *sumpagesp) /* return prt for sum of installed pages */ 527 { 528 pfn_t top = 0; 529 pfn_t highp; /* high page in a chunk */ 530 pgcnt_t sumpages = 0; 531 532 for (; list; list = list->next) { 533 highp = (list->address + list->size - 1) >> PAGESHIFT; 534 if (top < highp) 535 top = highp; 536 sumpages += (uint_t)(list->size >> PAGESHIFT); 537 } 538 539 *topp = top; 540 *sumpagesp = sumpages; 541 } 542