xref: /titanic_44/usr/src/uts/sun4/os/memlist.c (revision d485aa23b5e424dd136afdf657683389f93f72d6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/param.h>
31 #include <sys/sysmacros.h>
32 #include <sys/signal.h>
33 #include <sys/systm.h>
34 #include <sys/user.h>
35 #include <sys/mman.h>
36 #include <sys/class.h>
37 #include <sys/proc.h>
38 #include <sys/procfs.h>
39 #include <sys/kmem.h>
40 #include <sys/cred.h>
41 #include <sys/archsystm.h>
42 #include <sys/machsystm.h>
43 
44 #include <sys/reboot.h>
45 #include <sys/uadmin.h>
46 
47 #include <sys/vfs.h>
48 #include <sys/vnode.h>
49 #include <sys/session.h>
50 #include <sys/ucontext.h>
51 
52 #include <sys/dnlc.h>
53 #include <sys/var.h>
54 #include <sys/cmn_err.h>
55 #include <sys/debug.h>
56 #include <sys/thread.h>
57 #include <sys/vtrace.h>
58 #include <sys/consdev.h>
59 #include <sys/frame.h>
60 #include <sys/stack.h>
61 #include <sys/swap.h>
62 #include <sys/vmparam.h>
63 #include <sys/cpuvar.h>
64 
65 #include <sys/privregs.h>
66 
67 #include <vm/hat.h>
68 #include <vm/anon.h>
69 #include <vm/as.h>
70 #include <vm/page.h>
71 #include <vm/seg.h>
72 #include <vm/seg_kmem.h>
73 #include <vm/seg_map.h>
74 #include <vm/seg_vn.h>
75 
76 #include <sys/exec.h>
77 #include <sys/acct.h>
78 #include <sys/modctl.h>
79 #include <sys/tuneable.h>
80 
81 #include <c2/audit.h>
82 
83 #include <sys/trap.h>
84 #include <sys/sunddi.h>
85 #include <sys/bootconf.h>
86 #include <sys/memlist.h>
87 #include <sys/memlist_plat.h>
88 #include <sys/systeminfo.h>
89 #include <sys/promif.h>
90 
91 u_longlong_t	spec_hole_start = 0x80000000000ull;
92 u_longlong_t	spec_hole_end = 0xfffff80000000000ull;
93 
94 /*
95  * Count the number of available pages and the number of
96  * chunks in the list of available memory.
97  */
98 void
99 size_physavail(
100 	u_longlong_t	*physavail,
101 	size_t		nelems,
102 	pgcnt_t		*npages,
103 	int		*memblocks)
104 {
105 	size_t	i;
106 
107 	*npages = 0;
108 	*memblocks = 0;
109 	for (i = 0; i < nelems; i += 2) {
110 		*npages += (pgcnt_t)(physavail[i+1] >> PAGESHIFT);
111 		(*memblocks)++;
112 	}
113 }
114 
115 pgcnt_t
116 size_virtalloc(u_longlong_t *avail, size_t nelems)
117 {
118 
119 	u_longlong_t	start, end;
120 	pgcnt_t		allocpages = 0;
121 	uint_t		hole_allocated = 0;
122 	uint_t		i;
123 
124 	for (i = 0; i < (nelems - 2); i += 2) {
125 
126 		start = avail[i] + avail[i + 1];
127 		end = avail[i + 2];
128 
129 		/*
130 		 * Notes:
131 		 *
132 		 * (1) OBP on platforms with US I/II pre-allocates the hole
133 		 * represented by [spec_hole_start, spec_hole_end);
134 		 * pre-allocation is done to make this range unavailable
135 		 * for any allocation.
136 		 *
137 		 * (2) OBP on starcat always pre-allocates the hole similar to
138 		 * platforms with US I/II.
139 		 *
140 		 * (3) OBP on serengeti does _not_ pre-allocate the hole.
141 		 *
142 		 * (4) OBP ignores Spitfire Errata #21; i.e. it does _not_
143 		 * fill up or pre-allocate an additional 4GB on both sides
144 		 * of the hole.
145 		 *
146 		 * (5) kernel virtual range [spec_hole_start, spec_hole_end)
147 		 * is _not_ used on any platform including those with
148 		 * UltraSPARC III where there is no hole.
149 		 *
150 		 * Algorithm:
151 		 *
152 		 * Check if range [spec_hole_start, spec_hole_end) is
153 		 * pre-allocated by OBP; if so, subtract that range from
154 		 * allocpages.
155 		 */
156 		if (end >= spec_hole_end && start <= spec_hole_start)
157 			hole_allocated = 1;
158 
159 		allocpages += btopr(end - start);
160 	}
161 
162 	if (hole_allocated)
163 		allocpages -= btop(spec_hole_end - spec_hole_start);
164 
165 	return (allocpages);
166 }
167 
168 /*
169  * Returns the max contiguous physical memory present in the
170  * memlist "physavail".
171  */
172 uint64_t
173 get_max_phys_size(
174 	struct memlist	*physavail)
175 {
176 	uint64_t	max_size = 0;
177 
178 	for (; physavail; physavail = physavail->next) {
179 		if (physavail->size > max_size)
180 			max_size = physavail->size;
181 	}
182 
183 	return (max_size);
184 }
185 
186 
187 /*
188  * Copy boot's physavail list deducting memory at "start"
189  * for "size" bytes.
190  */
191 int
192 copy_physavail(
193 	u_longlong_t	*src,
194 	size_t		nelems,
195 	struct memlist	**dstp,
196 	uint_t		start,
197 	uint_t		size)
198 {
199 	struct memlist *dst, *prev;
200 	uint_t end1;
201 	int deducted = 0;
202 	size_t	i;
203 
204 	dst = *dstp;
205 	prev = dst;
206 	end1 = start + size;
207 
208 	for (i = 0; i < nelems; i += 2) {
209 		uint64_t addr, lsize, end2;
210 
211 		addr = src[i];
212 		lsize = src[i+1];
213 		end2 = addr + lsize;
214 
215 		if ((size != 0) && start >= addr && end1 <= end2) {
216 			/* deducted range in this chunk */
217 			deducted = 1;
218 			if (start == addr) {
219 				/* abuts start of chunk */
220 				if (end1 == end2)
221 					/* is equal to the chunk */
222 					continue;
223 				dst->address = end1;
224 				dst->size = lsize - size;
225 			} else if (end1 == end2) {
226 				/* abuts end of chunk */
227 				dst->address = addr;
228 				dst->size = lsize - size;
229 			} else {
230 				/* in the middle of the chunk */
231 				dst->address = addr;
232 				dst->size = start - addr;
233 				dst->next = 0;
234 				if (prev == dst) {
235 					dst->prev = 0;
236 					dst++;
237 				} else {
238 					dst->prev = prev;
239 					prev->next = dst;
240 					dst++;
241 					prev++;
242 				}
243 				dst->address = end1;
244 				dst->size = end2 - end1;
245 			}
246 			dst->next = 0;
247 			if (prev == dst) {
248 				dst->prev = 0;
249 				dst++;
250 			} else {
251 				dst->prev = prev;
252 				prev->next = dst;
253 				dst++;
254 				prev++;
255 			}
256 		} else {
257 			dst->address = src[i];
258 			dst->size = src[i+1];
259 			dst->next = 0;
260 			if (prev == dst) {
261 				dst->prev = 0;
262 				dst++;
263 			} else {
264 				dst->prev = prev;
265 				prev->next = dst;
266 				dst++;
267 				prev++;
268 			}
269 		}
270 	}
271 
272 	*dstp = dst;
273 	return (deducted);
274 }
275 
276 struct vnode prom_ppages;
277 
278 /*
279  * Find the pages allocated by the prom by diffing the original
280  * phys_avail list and the current list.  In the difference, the
281  * pages not locked belong to the PROM.  (The kernel has already locked
282  * and removed all the pages it has allocated from the freelist, this
283  * routine removes the remaining "free" pages that really belong to the
284  * PROM and hashs them in on the 'prom_pages' vnode.)
285  */
286 void
287 fix_prom_pages(struct memlist *orig, struct memlist *new)
288 {
289 	struct memlist *list, *nlist;
290 	extern int kcage_on;
291 
292 	nlist = new;
293 	for (list = orig; list; list = list->next) {
294 		uint64_t pa, end;
295 		pfn_t pfnum;
296 		page_t *pp;
297 
298 		if (list->address == nlist->address &&
299 		    list->size == nlist->size) {
300 			nlist = nlist->next ? nlist->next : nlist;
301 			continue;
302 		}
303 
304 		/*
305 		 * Loop through the old list looking to
306 		 * see if each page is still in the new one.
307 		 * If a page is not in the new list then we
308 		 * check to see if it locked permanently.
309 		 * If so, the kernel allocated and owns it.
310 		 * If not, then the prom must own it. We
311 		 * remove any pages found to owned by the prom
312 		 * from the freelist.
313 		 */
314 		end = list->address + list->size;
315 		for (pa = list->address; pa < end; pa += PAGESIZE) {
316 
317 			if (address_in_memlist(new, pa, PAGESIZE))
318 				continue;
319 
320 			pfnum = (pfn_t)(pa >> PAGESHIFT);
321 			if ((pp = page_numtopp_nolock(pfnum)) == NULL)
322 				cmn_err(CE_PANIC, "missing pfnum %lx", pfnum);
323 
324 			/*
325 			 * must break up any large pages that may have
326 			 * constituent pages being utilized for
327 			 * BOP_ALLOC()'s. page_reclaim() can't handle
328 			 * large pages.
329 			 */
330 			if (pp->p_szc != 0)
331 				page_boot_demote(pp);
332 
333 			if (!PAGE_LOCKED(pp) && pp->p_lckcnt == 0) {
334 				/*
335 				 * Ahhh yes, a prom page,
336 				 * suck it off the freelist,
337 				 * lock it, and hashin on prom_pages vp.
338 				 */
339 				if (page_trylock(pp, SE_EXCL) == 0)
340 					cmn_err(CE_PANIC, "prom page locked");
341 
342 				(void) page_reclaim(pp, NULL);
343 				/*
344 				 * XXX	vnode offsets on the prom_ppages vnode
345 				 *	are page numbers (gack) for >32 bit
346 				 *	physical memory machines.
347 				 */
348 				(void) page_hashin(pp, &prom_ppages,
349 					(offset_t)pfnum, NULL);
350 
351 				if (kcage_on) {
352 					ASSERT(pp->p_szc == 0);
353 					PP_SETNORELOC(pp);
354 				}
355 				(void) page_pp_lock(pp, 0, 1);
356 				page_downgrade(pp);
357 			}
358 		}
359 		nlist = nlist->next ? nlist->next : nlist;
360 	}
361 }
362 
363 /*
364  * Find the page number of the highest installed physical
365  * page and the number of pages installed (one cannot be
366  * calculated from the other because memory isn't necessarily
367  * contiguous).
368  */
369 void
370 installed_top_size_memlist_array(
371 	u_longlong_t *list,	/* base of array */
372 	size_t	nelems,		/* number of elements */
373 	pfn_t *topp,		/* return ptr for top value */
374 	pgcnt_t *sumpagesp)	/* return prt for sum of installed pages */
375 {
376 	pfn_t top = 0;
377 	pgcnt_t sumpages = 0;
378 	pfn_t highp;		/* high page in a chunk */
379 	size_t i;
380 
381 	for (i = 0; i < nelems; i += 2) {
382 		highp = (list[i] + list[i+1] - 1) >> PAGESHIFT;
383 		if (top < highp)
384 			top = highp;
385 		sumpages += (list[i+1] >> PAGESHIFT);
386 	}
387 
388 	*topp = top;
389 	*sumpagesp = sumpages;
390 }
391 
392 /*
393  * Copy a memory list.  Used in startup() to copy boot's
394  * memory lists to the kernel.
395  */
396 void
397 copy_memlist(
398 	u_longlong_t	*src,
399 	size_t		nelems,
400 	struct memlist	**dstp)
401 {
402 	struct memlist *dst, *prev;
403 	size_t	i;
404 
405 	dst = *dstp;
406 	prev = dst;
407 
408 	for (i = 0; i < nelems; i += 2) {
409 		dst->address = src[i];
410 		dst->size = src[i+1];
411 		dst->next = 0;
412 		if (prev == dst) {
413 			dst->prev = 0;
414 			dst++;
415 		} else {
416 			dst->prev = prev;
417 			prev->next = dst;
418 			dst++;
419 			prev++;
420 		}
421 	}
422 
423 	*dstp = dst;
424 }
425 
426 static struct bootmem_props {
427 	char		*name;
428 	u_longlong_t	*ptr;
429 	size_t		nelems;		/* actual number of elements */
430 	size_t		bufsize;	/* length of allocated buffer */
431 } bootmem_props[] = {
432 	{ "phys-installed", NULL, 0, 0 },
433 	{ "phys-avail", NULL, 0, 0 },
434 	{ "virt-avail", NULL, 0, 0 },
435 	{ NULL, NULL, 0, 0 }
436 };
437 
438 #define	PHYSINSTALLED	0
439 #define	PHYSAVAIL	1
440 #define	VIRTAVAIL	2
441 
442 void
443 copy_boot_memlists(u_longlong_t **physinstalled, size_t *physinstalled_len,
444     u_longlong_t **physavail, size_t *physavail_len,
445     u_longlong_t **virtavail, size_t *virtavail_len)
446 {
447 	int	align = BO_ALIGN_L3;
448 	size_t	len;
449 	struct bootmem_props *tmp = bootmem_props;
450 
451 tryagain:
452 	for (tmp = bootmem_props; tmp->name != NULL; tmp++) {
453 		len = BOP_GETPROPLEN(bootops, tmp->name);
454 		if (len == 0) {
455 			panic("cannot get length of \"%s\" property",
456 			    tmp->name);
457 		}
458 		tmp->nelems = len / sizeof (u_longlong_t);
459 		len = roundup(len, PAGESIZE);
460 		if (len <= tmp->bufsize)
461 			continue;
462 		/* need to allocate more */
463 		if (tmp->ptr) {
464 			BOP_FREE(bootops, (caddr_t)tmp->ptr, tmp->bufsize);
465 			tmp->ptr = NULL;
466 			tmp->bufsize = 0;
467 		}
468 		tmp->bufsize = len;
469 		tmp->ptr = (void *)BOP_ALLOC(bootops, 0, tmp->bufsize, align);
470 		if (tmp->ptr == NULL)
471 			panic("cannot allocate %lu bytes for \"%s\" property",
472 			    tmp->bufsize, tmp->name);
473 
474 	}
475 	/*
476 	 * take the most current snapshot we can by calling mem-update
477 	 */
478 	if (BOP_GETPROPLEN(bootops, "memory-update") == 0)
479 		(void) BOP_GETPROP(bootops, "memory-update", NULL);
480 
481 	/* did the sizes change? */
482 	for (tmp = bootmem_props; tmp->name != NULL; tmp++) {
483 		len = BOP_GETPROPLEN(bootops, tmp->name);
484 		tmp->nelems = len / sizeof (u_longlong_t);
485 		len = roundup(len, PAGESIZE);
486 		if (len > tmp->bufsize) {
487 			/* ick. Free them all and try again */
488 			for (tmp = bootmem_props; tmp->name != NULL; tmp++) {
489 				BOP_FREE(bootops, (caddr_t)tmp->ptr,
490 				    tmp->bufsize);
491 				tmp->ptr = NULL;
492 				tmp->bufsize = 0;
493 			}
494 			goto tryagain;
495 		}
496 	}
497 
498 	/* now we can retrieve the properties */
499 	for (tmp = bootmem_props; tmp->name != NULL; tmp++) {
500 		if (BOP_GETPROP(bootops, tmp->name, tmp->ptr) == -1) {
501 			panic("cannot retrieve \"%s\" property",
502 			    tmp->name);
503 		}
504 	}
505 	*physinstalled = bootmem_props[PHYSINSTALLED].ptr;
506 	*physinstalled_len = bootmem_props[PHYSINSTALLED].nelems;
507 
508 	*physavail = bootmem_props[PHYSAVAIL].ptr;
509 	*physavail_len = bootmem_props[PHYSAVAIL].nelems;
510 
511 	*virtavail = bootmem_props[VIRTAVAIL].ptr;
512 	*virtavail_len = bootmem_props[VIRTAVAIL].nelems;
513 }
514 
515 
516 /*
517  * Find the page number of the highest installed physical
518  * page and the number of pages installed (one cannot be
519  * calculated from the other because memory isn't necessarily
520  * contiguous).
521  */
522 void
523 installed_top_size(
524 	struct memlist *list,	/* pointer to start of installed list */
525 	pfn_t *topp,		/* return ptr for top value */
526 	pgcnt_t *sumpagesp)	/* return prt for sum of installed pages */
527 {
528 	pfn_t top = 0;
529 	pfn_t highp;		/* high page in a chunk */
530 	pgcnt_t sumpages = 0;
531 
532 	for (; list; list = list->next) {
533 		highp = (list->address + list->size - 1) >> PAGESHIFT;
534 		if (top < highp)
535 			top = highp;
536 		sumpages += (uint_t)(list->size >> PAGESHIFT);
537 	}
538 
539 	*topp = top;
540 	*sumpagesp = sumpages;
541 }
542