xref: /titanic_44/usr/src/uts/sun4/os/memlist.c (revision 44743693dce3212f5edba623e0cb0327bd4337a3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/param.h>
30 #include <sys/sysmacros.h>
31 #include <sys/signal.h>
32 #include <sys/systm.h>
33 #include <sys/user.h>
34 #include <sys/mman.h>
35 #include <sys/class.h>
36 #include <sys/proc.h>
37 #include <sys/procfs.h>
38 #include <sys/kmem.h>
39 #include <sys/cred.h>
40 #include <sys/archsystm.h>
41 #include <sys/machsystm.h>
42 
43 #include <sys/reboot.h>
44 #include <sys/uadmin.h>
45 
46 #include <sys/vfs.h>
47 #include <sys/vnode.h>
48 #include <sys/session.h>
49 #include <sys/ucontext.h>
50 
51 #include <sys/dnlc.h>
52 #include <sys/var.h>
53 #include <sys/cmn_err.h>
54 #include <sys/debug.h>
55 #include <sys/thread.h>
56 #include <sys/vtrace.h>
57 #include <sys/consdev.h>
58 #include <sys/frame.h>
59 #include <sys/stack.h>
60 #include <sys/swap.h>
61 #include <sys/vmparam.h>
62 #include <sys/cpuvar.h>
63 
64 #include <sys/privregs.h>
65 
66 #include <vm/hat.h>
67 #include <vm/anon.h>
68 #include <vm/as.h>
69 #include <vm/page.h>
70 #include <vm/seg.h>
71 #include <vm/seg_kmem.h>
72 #include <vm/seg_map.h>
73 #include <vm/seg_vn.h>
74 
75 #include <sys/exec.h>
76 #include <sys/acct.h>
77 #include <sys/modctl.h>
78 #include <sys/tuneable.h>
79 
80 #include <c2/audit.h>
81 
82 #include <sys/trap.h>
83 #include <sys/sunddi.h>
84 #include <sys/bootconf.h>
85 #include <sys/memlist.h>
86 #include <sys/memlist_plat.h>
87 #include <sys/systeminfo.h>
88 #include <sys/promif.h>
89 
90 u_longlong_t	spec_hole_start = 0x80000000000ull;
91 u_longlong_t	spec_hole_end = 0xfffff80000000000ull;
92 
93 pgcnt_t
94 num_phys_pages()
95 {
96 	pgcnt_t npages = 0;
97 	struct memlist *mp;
98 
99 	for (mp = phys_install; mp != NULL; mp = mp->next)
100 		npages += mp->size >> PAGESHIFT;
101 
102 	return (npages);
103 }
104 
105 /*
106  * Count the number of available pages and the number of
107  * chunks in the list of available memory.
108  */
109 void
110 size_physavail(
111 	u_longlong_t	*physavail,
112 	size_t		nelems,
113 	pgcnt_t		*npages,
114 	int		*memblocks)
115 {
116 	size_t	i;
117 
118 	*npages = 0;
119 	*memblocks = 0;
120 	for (i = 0; i < nelems; i += 2) {
121 		*npages += (pgcnt_t)(physavail[i+1] >> PAGESHIFT);
122 		(*memblocks)++;
123 	}
124 }
125 
126 pgcnt_t
127 size_virtalloc(u_longlong_t *avail, size_t nelems)
128 {
129 
130 	u_longlong_t	start, end;
131 	pgcnt_t		allocpages = 0;
132 	uint_t		hole_allocated = 0;
133 	uint_t		i;
134 
135 	for (i = 0; i < (nelems - 2); i += 2) {
136 
137 		start = avail[i] + avail[i + 1];
138 		end = avail[i + 2];
139 
140 		/*
141 		 * Notes:
142 		 *
143 		 * (1) OBP on platforms with US I/II pre-allocates the hole
144 		 * represented by [spec_hole_start, spec_hole_end);
145 		 * pre-allocation is done to make this range unavailable
146 		 * for any allocation.
147 		 *
148 		 * (2) OBP on starcat always pre-allocates the hole similar to
149 		 * platforms with US I/II.
150 		 *
151 		 * (3) OBP on serengeti does _not_ pre-allocate the hole.
152 		 *
153 		 * (4) OBP ignores Spitfire Errata #21; i.e. it does _not_
154 		 * fill up or pre-allocate an additional 4GB on both sides
155 		 * of the hole.
156 		 *
157 		 * (5) kernel virtual range [spec_hole_start, spec_hole_end)
158 		 * is _not_ used on any platform including those with
159 		 * UltraSPARC III where there is no hole.
160 		 *
161 		 * Algorithm:
162 		 *
163 		 * Check if range [spec_hole_start, spec_hole_end) is
164 		 * pre-allocated by OBP; if so, subtract that range from
165 		 * allocpages.
166 		 */
167 		if (end >= spec_hole_end && start <= spec_hole_start)
168 			hole_allocated = 1;
169 
170 		allocpages += btopr(end - start);
171 	}
172 
173 	if (hole_allocated)
174 		allocpages -= btop(spec_hole_end - spec_hole_start);
175 
176 	return (allocpages);
177 }
178 
179 /*
180  * Returns the max contiguous physical memory present in the
181  * memlist "physavail".
182  */
183 uint64_t
184 get_max_phys_size(
185 	struct memlist	*physavail)
186 {
187 	uint64_t	max_size = 0;
188 
189 	for (; physavail; physavail = physavail->next) {
190 		if (physavail->size > max_size)
191 			max_size = physavail->size;
192 	}
193 
194 	return (max_size);
195 }
196 
197 
198 /*
199  * Copy boot's physavail list deducting memory at "start"
200  * for "size" bytes.
201  */
202 int
203 copy_physavail(
204 	u_longlong_t	*src,
205 	size_t		nelems,
206 	struct memlist	**dstp,
207 	uint_t		start,
208 	uint_t		size)
209 {
210 	struct memlist *dst, *prev;
211 	uint_t end1;
212 	int deducted = 0;
213 	size_t	i;
214 
215 	dst = *dstp;
216 	prev = dst;
217 	end1 = start + size;
218 
219 	for (i = 0; i < nelems; i += 2) {
220 		uint64_t addr, lsize, end2;
221 
222 		addr = src[i];
223 		lsize = src[i+1];
224 		end2 = addr + lsize;
225 
226 		if ((size != 0) && start >= addr && end1 <= end2) {
227 			/* deducted range in this chunk */
228 			deducted = 1;
229 			if (start == addr) {
230 				/* abuts start of chunk */
231 				if (end1 == end2)
232 					/* is equal to the chunk */
233 					continue;
234 				dst->address = end1;
235 				dst->size = lsize - size;
236 			} else if (end1 == end2) {
237 				/* abuts end of chunk */
238 				dst->address = addr;
239 				dst->size = lsize - size;
240 			} else {
241 				/* in the middle of the chunk */
242 				dst->address = addr;
243 				dst->size = start - addr;
244 				dst->next = 0;
245 				if (prev == dst) {
246 					dst->prev = 0;
247 					dst++;
248 				} else {
249 					dst->prev = prev;
250 					prev->next = dst;
251 					dst++;
252 					prev++;
253 				}
254 				dst->address = end1;
255 				dst->size = end2 - end1;
256 			}
257 			dst->next = 0;
258 			if (prev == dst) {
259 				dst->prev = 0;
260 				dst++;
261 			} else {
262 				dst->prev = prev;
263 				prev->next = dst;
264 				dst++;
265 				prev++;
266 			}
267 		} else {
268 			dst->address = src[i];
269 			dst->size = src[i+1];
270 			dst->next = 0;
271 			if (prev == dst) {
272 				dst->prev = 0;
273 				dst++;
274 			} else {
275 				dst->prev = prev;
276 				prev->next = dst;
277 				dst++;
278 				prev++;
279 			}
280 		}
281 	}
282 
283 	*dstp = dst;
284 	return (deducted);
285 }
286 
287 struct vnode prom_ppages;
288 
289 /*
290  * Find the pages allocated by the prom by diffing the original
291  * phys_avail list and the current list.  In the difference, the
292  * pages not locked belong to the PROM.  (The kernel has already locked
293  * and removed all the pages it has allocated from the freelist, this
294  * routine removes the remaining "free" pages that really belong to the
295  * PROM and hashs them in on the 'prom_pages' vnode.)
296  */
297 void
298 fix_prom_pages(struct memlist *orig, struct memlist *new)
299 {
300 	struct memlist *list, *nlist;
301 	extern int kcage_on;
302 
303 	nlist = new;
304 	for (list = orig; list; list = list->next) {
305 		uint64_t pa, end;
306 		pfn_t pfnum;
307 		page_t *pp;
308 
309 		if (list->address == nlist->address &&
310 		    list->size == nlist->size) {
311 			nlist = nlist->next ? nlist->next : nlist;
312 			continue;
313 		}
314 
315 		/*
316 		 * Loop through the old list looking to
317 		 * see if each page is still in the new one.
318 		 * If a page is not in the new list then we
319 		 * check to see if it locked permanently.
320 		 * If so, the kernel allocated and owns it.
321 		 * If not, then the prom must own it. We
322 		 * remove any pages found to owned by the prom
323 		 * from the freelist.
324 		 */
325 		end = list->address + list->size;
326 		for (pa = list->address; pa < end; pa += PAGESIZE) {
327 
328 			if (address_in_memlist(new, pa, PAGESIZE))
329 				continue;
330 
331 			pfnum = (pfn_t)(pa >> PAGESHIFT);
332 			if ((pp = page_numtopp_nolock(pfnum)) == NULL)
333 				cmn_err(CE_PANIC, "missing pfnum %lx", pfnum);
334 
335 			/*
336 			 * must break up any large pages that may have
337 			 * constituent pages being utilized for
338 			 * BOP_ALLOC()'s. page_reclaim() can't handle
339 			 * large pages.
340 			 */
341 			if (pp->p_szc != 0)
342 				page_boot_demote(pp);
343 
344 			if (!PAGE_LOCKED(pp) && pp->p_lckcnt == 0) {
345 				/*
346 				 * Ahhh yes, a prom page,
347 				 * suck it off the freelist,
348 				 * lock it, and hashin on prom_pages vp.
349 				 */
350 				if (page_trylock(pp, SE_EXCL) == 0)
351 					cmn_err(CE_PANIC, "prom page locked");
352 
353 				(void) page_reclaim(pp, NULL);
354 				/*
355 				 * XXX	vnode offsets on the prom_ppages vnode
356 				 *	are page numbers (gack) for >32 bit
357 				 *	physical memory machines.
358 				 */
359 				(void) page_hashin(pp, &prom_ppages,
360 				    (offset_t)pfnum, NULL);
361 
362 				if (kcage_on) {
363 					ASSERT(pp->p_szc == 0);
364 					PP_SETNORELOC(pp);
365 				}
366 				(void) page_pp_lock(pp, 0, 1);
367 				page_downgrade(pp);
368 			}
369 		}
370 		nlist = nlist->next ? nlist->next : nlist;
371 	}
372 }
373 
374 /*
375  * Find the page number of the highest installed physical
376  * page and the number of pages installed (one cannot be
377  * calculated from the other because memory isn't necessarily
378  * contiguous).
379  */
380 void
381 installed_top_size_memlist_array(
382 	u_longlong_t *list,	/* base of array */
383 	size_t	nelems,		/* number of elements */
384 	pfn_t *topp,		/* return ptr for top value */
385 	pgcnt_t *sumpagesp)	/* return prt for sum of installed pages */
386 {
387 	pfn_t top = 0;
388 	pgcnt_t sumpages = 0;
389 	pfn_t highp;		/* high page in a chunk */
390 	size_t i;
391 
392 	for (i = 0; i < nelems; i += 2) {
393 		highp = (list[i] + list[i+1] - 1) >> PAGESHIFT;
394 		if (top < highp)
395 			top = highp;
396 		sumpages += (list[i+1] >> PAGESHIFT);
397 	}
398 
399 	*topp = top;
400 	*sumpagesp = sumpages;
401 }
402 
403 /*
404  * Copy a memory list.  Used in startup() to copy boot's
405  * memory lists to the kernel.
406  */
407 void
408 copy_memlist(
409 	u_longlong_t	*src,
410 	size_t		nelems,
411 	struct memlist	**dstp)
412 {
413 	struct memlist *dst, *prev;
414 	size_t	i;
415 
416 	dst = *dstp;
417 	prev = dst;
418 
419 	for (i = 0; i < nelems; i += 2) {
420 		dst->address = src[i];
421 		dst->size = src[i+1];
422 		dst->next = 0;
423 		if (prev == dst) {
424 			dst->prev = 0;
425 			dst++;
426 		} else {
427 			dst->prev = prev;
428 			prev->next = dst;
429 			dst++;
430 			prev++;
431 		}
432 	}
433 
434 	*dstp = dst;
435 }
436 
437 static struct bootmem_props {
438 	char		*name;
439 	u_longlong_t	*ptr;
440 	size_t		nelems;		/* actual number of elements */
441 	size_t		bufsize;	/* length of allocated buffer */
442 } bootmem_props[] = {
443 	{ "phys-installed", NULL, 0, 0 },
444 	{ "phys-avail", NULL, 0, 0 },
445 	{ "virt-avail", NULL, 0, 0 },
446 	{ NULL, NULL, 0, 0 }
447 };
448 
449 #define	PHYSINSTALLED	0
450 #define	PHYSAVAIL	1
451 #define	VIRTAVAIL	2
452 
453 void
454 copy_boot_memlists(u_longlong_t **physinstalled, size_t *physinstalled_len,
455     u_longlong_t **physavail, size_t *physavail_len,
456     u_longlong_t **virtavail, size_t *virtavail_len)
457 {
458 	int	align = BO_ALIGN_L3;
459 	size_t	len;
460 	struct bootmem_props *tmp = bootmem_props;
461 
462 tryagain:
463 	for (tmp = bootmem_props; tmp->name != NULL; tmp++) {
464 		len = BOP_GETPROPLEN(bootops, tmp->name);
465 		if (len == 0) {
466 			panic("cannot get length of \"%s\" property",
467 			    tmp->name);
468 		}
469 		tmp->nelems = len / sizeof (u_longlong_t);
470 		len = roundup(len, PAGESIZE);
471 		if (len <= tmp->bufsize)
472 			continue;
473 		/* need to allocate more */
474 		if (tmp->ptr) {
475 			BOP_FREE(bootops, (caddr_t)tmp->ptr, tmp->bufsize);
476 			tmp->ptr = NULL;
477 			tmp->bufsize = 0;
478 		}
479 		tmp->bufsize = len;
480 		tmp->ptr = (void *)BOP_ALLOC(bootops, 0, tmp->bufsize, align);
481 		if (tmp->ptr == NULL)
482 			panic("cannot allocate %lu bytes for \"%s\" property",
483 			    tmp->bufsize, tmp->name);
484 
485 	}
486 	/*
487 	 * take the most current snapshot we can by calling mem-update
488 	 */
489 	if (BOP_GETPROPLEN(bootops, "memory-update") == 0)
490 		(void) BOP_GETPROP(bootops, "memory-update", NULL);
491 
492 	/* did the sizes change? */
493 	for (tmp = bootmem_props; tmp->name != NULL; tmp++) {
494 		len = BOP_GETPROPLEN(bootops, tmp->name);
495 		tmp->nelems = len / sizeof (u_longlong_t);
496 		len = roundup(len, PAGESIZE);
497 		if (len > tmp->bufsize) {
498 			/* ick. Free them all and try again */
499 			for (tmp = bootmem_props; tmp->name != NULL; tmp++) {
500 				BOP_FREE(bootops, (caddr_t)tmp->ptr,
501 				    tmp->bufsize);
502 				tmp->ptr = NULL;
503 				tmp->bufsize = 0;
504 			}
505 			goto tryagain;
506 		}
507 	}
508 
509 	/* now we can retrieve the properties */
510 	for (tmp = bootmem_props; tmp->name != NULL; tmp++) {
511 		if (BOP_GETPROP(bootops, tmp->name, tmp->ptr) == -1) {
512 			panic("cannot retrieve \"%s\" property",
513 			    tmp->name);
514 		}
515 	}
516 	*physinstalled = bootmem_props[PHYSINSTALLED].ptr;
517 	*physinstalled_len = bootmem_props[PHYSINSTALLED].nelems;
518 
519 	*physavail = bootmem_props[PHYSAVAIL].ptr;
520 	*physavail_len = bootmem_props[PHYSAVAIL].nelems;
521 
522 	*virtavail = bootmem_props[VIRTAVAIL].ptr;
523 	*virtavail_len = bootmem_props[VIRTAVAIL].nelems;
524 }
525 
526 
527 /*
528  * Find the page number of the highest installed physical
529  * page and the number of pages installed (one cannot be
530  * calculated from the other because memory isn't necessarily
531  * contiguous).
532  */
533 void
534 installed_top_size(
535 	struct memlist *list,	/* pointer to start of installed list */
536 	pfn_t *topp,		/* return ptr for top value */
537 	pgcnt_t *sumpagesp)	/* return prt for sum of installed pages */
538 {
539 	pfn_t top = 0;
540 	pfn_t highp;		/* high page in a chunk */
541 	pgcnt_t sumpages = 0;
542 
543 	for (; list; list = list->next) {
544 		highp = (list->address + list->size - 1) >> PAGESHIFT;
545 		if (top < highp)
546 			top = highp;
547 		sumpages += (uint_t)(list->size >> PAGESHIFT);
548 	}
549 
550 	*topp = top;
551 	*sumpagesp = sumpages;
552 }
553