1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/param.h> 30 #include <sys/sysmacros.h> 31 #include <sys/signal.h> 32 #include <sys/systm.h> 33 #include <sys/user.h> 34 #include <sys/mman.h> 35 #include <sys/class.h> 36 #include <sys/proc.h> 37 #include <sys/procfs.h> 38 #include <sys/kmem.h> 39 #include <sys/cred.h> 40 #include <sys/archsystm.h> 41 #include <sys/machsystm.h> 42 43 #include <sys/reboot.h> 44 #include <sys/uadmin.h> 45 46 #include <sys/vfs.h> 47 #include <sys/vnode.h> 48 #include <sys/session.h> 49 #include <sys/ucontext.h> 50 51 #include <sys/dnlc.h> 52 #include <sys/var.h> 53 #include <sys/cmn_err.h> 54 #include <sys/debug.h> 55 #include <sys/thread.h> 56 #include <sys/vtrace.h> 57 #include <sys/consdev.h> 58 #include <sys/frame.h> 59 #include <sys/stack.h> 60 #include <sys/swap.h> 61 #include <sys/vmparam.h> 62 #include <sys/cpuvar.h> 63 64 #include <sys/privregs.h> 65 66 #include <vm/hat.h> 67 #include <vm/anon.h> 68 #include <vm/as.h> 69 #include <vm/page.h> 70 #include <vm/seg.h> 71 #include <vm/seg_kmem.h> 72 #include <vm/seg_map.h> 73 #include <vm/seg_vn.h> 74 75 #include <sys/exec.h> 76 #include <sys/acct.h> 77 #include <sys/modctl.h> 78 #include <sys/tuneable.h> 79 80 #include <c2/audit.h> 81 82 #include <sys/trap.h> 83 #include <sys/sunddi.h> 84 #include <sys/bootconf.h> 85 #include <sys/memlist.h> 86 #include <sys/memlist_plat.h> 87 #include <sys/systeminfo.h> 88 #include <sys/promif.h> 89 90 u_longlong_t spec_hole_start = 0x80000000000ull; 91 u_longlong_t spec_hole_end = 0xfffff80000000000ull; 92 93 pgcnt_t 94 num_phys_pages() 95 { 96 pgcnt_t npages = 0; 97 struct memlist *mp; 98 99 for (mp = phys_install; mp != NULL; mp = mp->next) 100 npages += mp->size >> PAGESHIFT; 101 102 return (npages); 103 } 104 105 /* 106 * Count the number of available pages and the number of 107 * chunks in the list of available memory. 108 */ 109 void 110 size_physavail( 111 u_longlong_t *physavail, 112 size_t nelems, 113 pgcnt_t *npages, 114 int *memblocks) 115 { 116 size_t i; 117 118 *npages = 0; 119 *memblocks = 0; 120 for (i = 0; i < nelems; i += 2) { 121 *npages += (pgcnt_t)(physavail[i+1] >> PAGESHIFT); 122 (*memblocks)++; 123 } 124 } 125 126 pgcnt_t 127 size_virtalloc(u_longlong_t *avail, size_t nelems) 128 { 129 130 u_longlong_t start, end; 131 pgcnt_t allocpages = 0; 132 uint_t hole_allocated = 0; 133 uint_t i; 134 135 for (i = 0; i < (nelems - 2); i += 2) { 136 137 start = avail[i] + avail[i + 1]; 138 end = avail[i + 2]; 139 140 /* 141 * Notes: 142 * 143 * (1) OBP on platforms with US I/II pre-allocates the hole 144 * represented by [spec_hole_start, spec_hole_end); 145 * pre-allocation is done to make this range unavailable 146 * for any allocation. 147 * 148 * (2) OBP on starcat always pre-allocates the hole similar to 149 * platforms with US I/II. 150 * 151 * (3) OBP on serengeti does _not_ pre-allocate the hole. 152 * 153 * (4) OBP ignores Spitfire Errata #21; i.e. it does _not_ 154 * fill up or pre-allocate an additional 4GB on both sides 155 * of the hole. 156 * 157 * (5) kernel virtual range [spec_hole_start, spec_hole_end) 158 * is _not_ used on any platform including those with 159 * UltraSPARC III where there is no hole. 160 * 161 * Algorithm: 162 * 163 * Check if range [spec_hole_start, spec_hole_end) is 164 * pre-allocated by OBP; if so, subtract that range from 165 * allocpages. 166 */ 167 if (end >= spec_hole_end && start <= spec_hole_start) 168 hole_allocated = 1; 169 170 allocpages += btopr(end - start); 171 } 172 173 if (hole_allocated) 174 allocpages -= btop(spec_hole_end - spec_hole_start); 175 176 return (allocpages); 177 } 178 179 /* 180 * Returns the max contiguous physical memory present in the 181 * memlist "physavail". 182 */ 183 uint64_t 184 get_max_phys_size( 185 struct memlist *physavail) 186 { 187 uint64_t max_size = 0; 188 189 for (; physavail; physavail = physavail->next) { 190 if (physavail->size > max_size) 191 max_size = physavail->size; 192 } 193 194 return (max_size); 195 } 196 197 198 /* 199 * Copy boot's physavail list deducting memory at "start" 200 * for "size" bytes. 201 */ 202 int 203 copy_physavail( 204 u_longlong_t *src, 205 size_t nelems, 206 struct memlist **dstp, 207 uint_t start, 208 uint_t size) 209 { 210 struct memlist *dst, *prev; 211 uint_t end1; 212 int deducted = 0; 213 size_t i; 214 215 dst = *dstp; 216 prev = dst; 217 end1 = start + size; 218 219 for (i = 0; i < nelems; i += 2) { 220 uint64_t addr, lsize, end2; 221 222 addr = src[i]; 223 lsize = src[i+1]; 224 end2 = addr + lsize; 225 226 if ((size != 0) && start >= addr && end1 <= end2) { 227 /* deducted range in this chunk */ 228 deducted = 1; 229 if (start == addr) { 230 /* abuts start of chunk */ 231 if (end1 == end2) 232 /* is equal to the chunk */ 233 continue; 234 dst->address = end1; 235 dst->size = lsize - size; 236 } else if (end1 == end2) { 237 /* abuts end of chunk */ 238 dst->address = addr; 239 dst->size = lsize - size; 240 } else { 241 /* in the middle of the chunk */ 242 dst->address = addr; 243 dst->size = start - addr; 244 dst->next = 0; 245 if (prev == dst) { 246 dst->prev = 0; 247 dst++; 248 } else { 249 dst->prev = prev; 250 prev->next = dst; 251 dst++; 252 prev++; 253 } 254 dst->address = end1; 255 dst->size = end2 - end1; 256 } 257 dst->next = 0; 258 if (prev == dst) { 259 dst->prev = 0; 260 dst++; 261 } else { 262 dst->prev = prev; 263 prev->next = dst; 264 dst++; 265 prev++; 266 } 267 } else { 268 dst->address = src[i]; 269 dst->size = src[i+1]; 270 dst->next = 0; 271 if (prev == dst) { 272 dst->prev = 0; 273 dst++; 274 } else { 275 dst->prev = prev; 276 prev->next = dst; 277 dst++; 278 prev++; 279 } 280 } 281 } 282 283 *dstp = dst; 284 return (deducted); 285 } 286 287 struct vnode prom_ppages; 288 289 /* 290 * Find the pages allocated by the prom by diffing the original 291 * phys_avail list and the current list. In the difference, the 292 * pages not locked belong to the PROM. (The kernel has already locked 293 * and removed all the pages it has allocated from the freelist, this 294 * routine removes the remaining "free" pages that really belong to the 295 * PROM and hashs them in on the 'prom_pages' vnode.) 296 */ 297 void 298 fix_prom_pages(struct memlist *orig, struct memlist *new) 299 { 300 struct memlist *list, *nlist; 301 extern int kcage_on; 302 303 nlist = new; 304 for (list = orig; list; list = list->next) { 305 uint64_t pa, end; 306 pfn_t pfnum; 307 page_t *pp; 308 309 if (list->address == nlist->address && 310 list->size == nlist->size) { 311 nlist = nlist->next ? nlist->next : nlist; 312 continue; 313 } 314 315 /* 316 * Loop through the old list looking to 317 * see if each page is still in the new one. 318 * If a page is not in the new list then we 319 * check to see if it locked permanently. 320 * If so, the kernel allocated and owns it. 321 * If not, then the prom must own it. We 322 * remove any pages found to owned by the prom 323 * from the freelist. 324 */ 325 end = list->address + list->size; 326 for (pa = list->address; pa < end; pa += PAGESIZE) { 327 328 if (address_in_memlist(new, pa, PAGESIZE)) 329 continue; 330 331 pfnum = (pfn_t)(pa >> PAGESHIFT); 332 if ((pp = page_numtopp_nolock(pfnum)) == NULL) 333 cmn_err(CE_PANIC, "missing pfnum %lx", pfnum); 334 335 /* 336 * must break up any large pages that may have 337 * constituent pages being utilized for 338 * BOP_ALLOC()'s. page_reclaim() can't handle 339 * large pages. 340 */ 341 if (pp->p_szc != 0) 342 page_boot_demote(pp); 343 344 if (!PAGE_LOCKED(pp) && pp->p_lckcnt == 0) { 345 /* 346 * Ahhh yes, a prom page, 347 * suck it off the freelist, 348 * lock it, and hashin on prom_pages vp. 349 */ 350 if (page_trylock(pp, SE_EXCL) == 0) 351 cmn_err(CE_PANIC, "prom page locked"); 352 353 (void) page_reclaim(pp, NULL); 354 /* 355 * XXX vnode offsets on the prom_ppages vnode 356 * are page numbers (gack) for >32 bit 357 * physical memory machines. 358 */ 359 (void) page_hashin(pp, &prom_ppages, 360 (offset_t)pfnum, NULL); 361 362 if (kcage_on) { 363 ASSERT(pp->p_szc == 0); 364 PP_SETNORELOC(pp); 365 } 366 (void) page_pp_lock(pp, 0, 1); 367 page_downgrade(pp); 368 } 369 } 370 nlist = nlist->next ? nlist->next : nlist; 371 } 372 } 373 374 /* 375 * Find the page number of the highest installed physical 376 * page and the number of pages installed (one cannot be 377 * calculated from the other because memory isn't necessarily 378 * contiguous). 379 */ 380 void 381 installed_top_size_memlist_array( 382 u_longlong_t *list, /* base of array */ 383 size_t nelems, /* number of elements */ 384 pfn_t *topp, /* return ptr for top value */ 385 pgcnt_t *sumpagesp) /* return prt for sum of installed pages */ 386 { 387 pfn_t top = 0; 388 pgcnt_t sumpages = 0; 389 pfn_t highp; /* high page in a chunk */ 390 size_t i; 391 392 for (i = 0; i < nelems; i += 2) { 393 highp = (list[i] + list[i+1] - 1) >> PAGESHIFT; 394 if (top < highp) 395 top = highp; 396 sumpages += (list[i+1] >> PAGESHIFT); 397 } 398 399 *topp = top; 400 *sumpagesp = sumpages; 401 } 402 403 /* 404 * Copy a memory list. Used in startup() to copy boot's 405 * memory lists to the kernel. 406 */ 407 void 408 copy_memlist( 409 u_longlong_t *src, 410 size_t nelems, 411 struct memlist **dstp) 412 { 413 struct memlist *dst, *prev; 414 size_t i; 415 416 dst = *dstp; 417 prev = dst; 418 419 for (i = 0; i < nelems; i += 2) { 420 dst->address = src[i]; 421 dst->size = src[i+1]; 422 dst->next = 0; 423 if (prev == dst) { 424 dst->prev = 0; 425 dst++; 426 } else { 427 dst->prev = prev; 428 prev->next = dst; 429 dst++; 430 prev++; 431 } 432 } 433 434 *dstp = dst; 435 } 436 437 static struct bootmem_props { 438 char *name; 439 u_longlong_t *ptr; 440 size_t nelems; /* actual number of elements */ 441 size_t bufsize; /* length of allocated buffer */ 442 } bootmem_props[] = { 443 { "phys-installed", NULL, 0, 0 }, 444 { "phys-avail", NULL, 0, 0 }, 445 { "virt-avail", NULL, 0, 0 }, 446 { NULL, NULL, 0, 0 } 447 }; 448 449 #define PHYSINSTALLED 0 450 #define PHYSAVAIL 1 451 #define VIRTAVAIL 2 452 453 void 454 copy_boot_memlists(u_longlong_t **physinstalled, size_t *physinstalled_len, 455 u_longlong_t **physavail, size_t *physavail_len, 456 u_longlong_t **virtavail, size_t *virtavail_len) 457 { 458 int align = BO_ALIGN_L3; 459 size_t len; 460 struct bootmem_props *tmp = bootmem_props; 461 462 tryagain: 463 for (tmp = bootmem_props; tmp->name != NULL; tmp++) { 464 len = BOP_GETPROPLEN(bootops, tmp->name); 465 if (len == 0) { 466 panic("cannot get length of \"%s\" property", 467 tmp->name); 468 } 469 tmp->nelems = len / sizeof (u_longlong_t); 470 len = roundup(len, PAGESIZE); 471 if (len <= tmp->bufsize) 472 continue; 473 /* need to allocate more */ 474 if (tmp->ptr) { 475 BOP_FREE(bootops, (caddr_t)tmp->ptr, tmp->bufsize); 476 tmp->ptr = NULL; 477 tmp->bufsize = 0; 478 } 479 tmp->bufsize = len; 480 tmp->ptr = (void *)BOP_ALLOC(bootops, 0, tmp->bufsize, align); 481 if (tmp->ptr == NULL) 482 panic("cannot allocate %lu bytes for \"%s\" property", 483 tmp->bufsize, tmp->name); 484 485 } 486 /* 487 * take the most current snapshot we can by calling mem-update 488 */ 489 if (BOP_GETPROPLEN(bootops, "memory-update") == 0) 490 (void) BOP_GETPROP(bootops, "memory-update", NULL); 491 492 /* did the sizes change? */ 493 for (tmp = bootmem_props; tmp->name != NULL; tmp++) { 494 len = BOP_GETPROPLEN(bootops, tmp->name); 495 tmp->nelems = len / sizeof (u_longlong_t); 496 len = roundup(len, PAGESIZE); 497 if (len > tmp->bufsize) { 498 /* ick. Free them all and try again */ 499 for (tmp = bootmem_props; tmp->name != NULL; tmp++) { 500 BOP_FREE(bootops, (caddr_t)tmp->ptr, 501 tmp->bufsize); 502 tmp->ptr = NULL; 503 tmp->bufsize = 0; 504 } 505 goto tryagain; 506 } 507 } 508 509 /* now we can retrieve the properties */ 510 for (tmp = bootmem_props; tmp->name != NULL; tmp++) { 511 if (BOP_GETPROP(bootops, tmp->name, tmp->ptr) == -1) { 512 panic("cannot retrieve \"%s\" property", 513 tmp->name); 514 } 515 } 516 *physinstalled = bootmem_props[PHYSINSTALLED].ptr; 517 *physinstalled_len = bootmem_props[PHYSINSTALLED].nelems; 518 519 *physavail = bootmem_props[PHYSAVAIL].ptr; 520 *physavail_len = bootmem_props[PHYSAVAIL].nelems; 521 522 *virtavail = bootmem_props[VIRTAVAIL].ptr; 523 *virtavail_len = bootmem_props[VIRTAVAIL].nelems; 524 } 525 526 527 /* 528 * Find the page number of the highest installed physical 529 * page and the number of pages installed (one cannot be 530 * calculated from the other because memory isn't necessarily 531 * contiguous). 532 */ 533 void 534 installed_top_size( 535 struct memlist *list, /* pointer to start of installed list */ 536 pfn_t *topp, /* return ptr for top value */ 537 pgcnt_t *sumpagesp) /* return prt for sum of installed pages */ 538 { 539 pfn_t top = 0; 540 pfn_t highp; /* high page in a chunk */ 541 pgcnt_t sumpages = 0; 542 543 for (; list; list = list->next) { 544 highp = (list->address + list->size - 1) >> PAGESHIFT; 545 if (top < highp) 546 top = highp; 547 sumpages += (uint_t)(list->size >> PAGESHIFT); 548 } 549 550 *topp = top; 551 *sumpagesp = sumpages; 552 } 553