xref: /titanic_41/usr/src/uts/sun4/os/memlist.c (revision 9e39c5ba00a55fa05777cc94b148296af305e135)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/param.h>
30 #include <sys/sysmacros.h>
31 #include <sys/signal.h>
32 #include <sys/systm.h>
33 #include <sys/user.h>
34 #include <sys/mman.h>
35 #include <sys/class.h>
36 #include <sys/proc.h>
37 #include <sys/procfs.h>
38 #include <sys/kmem.h>
39 #include <sys/cred.h>
40 #include <sys/archsystm.h>
41 #include <sys/machsystm.h>
42 
43 #include <sys/reboot.h>
44 #include <sys/uadmin.h>
45 
46 #include <sys/vfs.h>
47 #include <sys/vnode.h>
48 #include <sys/session.h>
49 #include <sys/ucontext.h>
50 
51 #include <sys/dnlc.h>
52 #include <sys/var.h>
53 #include <sys/cmn_err.h>
54 #include <sys/debug.h>
55 #include <sys/thread.h>
56 #include <sys/vtrace.h>
57 #include <sys/consdev.h>
58 #include <sys/frame.h>
59 #include <sys/stack.h>
60 #include <sys/swap.h>
61 #include <sys/vmparam.h>
62 #include <sys/cpuvar.h>
63 
64 #include <sys/privregs.h>
65 
66 #include <vm/hat.h>
67 #include <vm/anon.h>
68 #include <vm/as.h>
69 #include <vm/page.h>
70 #include <vm/seg.h>
71 #include <vm/seg_kmem.h>
72 #include <vm/seg_map.h>
73 #include <vm/seg_vn.h>
74 #include <vm/vm_dep.h>
75 
76 #include <sys/exec.h>
77 #include <sys/acct.h>
78 #include <sys/modctl.h>
79 #include <sys/tuneable.h>
80 
81 #include <c2/audit.h>
82 
83 #include <sys/trap.h>
84 #include <sys/sunddi.h>
85 #include <sys/bootconf.h>
86 #include <sys/memlist.h>
87 #include <sys/memlist_plat.h>
88 #include <sys/systeminfo.h>
89 #include <sys/promif.h>
90 #include <sys/prom_plat.h>
91 
92 u_longlong_t	spec_hole_start = 0x80000000000ull;
93 u_longlong_t	spec_hole_end = 0xfffff80000000000ull;
94 
95 pgcnt_t
96 num_phys_pages()
97 {
98 	pgcnt_t npages = 0;
99 	struct memlist *mp;
100 
101 	for (mp = phys_install; mp != NULL; mp = mp->next)
102 		npages += mp->size >> PAGESHIFT;
103 
104 	return (npages);
105 }
106 
107 
108 pgcnt_t
109 size_virtalloc(prom_memlist_t *avail, size_t nelems)
110 {
111 
112 	u_longlong_t	start, end;
113 	pgcnt_t		allocpages = 0;
114 	uint_t		hole_allocated = 0;
115 	uint_t		i;
116 
117 	for (i = 0; i < nelems - 1; i++) {
118 
119 		start = avail[i].addr + avail[i].size;
120 		end = avail[i + 1].addr;
121 
122 		/*
123 		 * Notes:
124 		 *
125 		 * (1) OBP on platforms with US I/II pre-allocates the hole
126 		 * represented by [spec_hole_start, spec_hole_end);
127 		 * pre-allocation is done to make this range unavailable
128 		 * for any allocation.
129 		 *
130 		 * (2) OBP on starcat always pre-allocates the hole similar to
131 		 * platforms with US I/II.
132 		 *
133 		 * (3) OBP on serengeti does _not_ pre-allocate the hole.
134 		 *
135 		 * (4) OBP ignores Spitfire Errata #21; i.e. it does _not_
136 		 * fill up or pre-allocate an additional 4GB on both sides
137 		 * of the hole.
138 		 *
139 		 * (5) kernel virtual range [spec_hole_start, spec_hole_end)
140 		 * is _not_ used on any platform including those with
141 		 * UltraSPARC III where there is no hole.
142 		 *
143 		 * Algorithm:
144 		 *
145 		 * Check if range [spec_hole_start, spec_hole_end) is
146 		 * pre-allocated by OBP; if so, subtract that range from
147 		 * allocpages.
148 		 */
149 		if (end >= spec_hole_end && start <= spec_hole_start)
150 			hole_allocated = 1;
151 
152 		allocpages += btopr(end - start);
153 	}
154 
155 	if (hole_allocated)
156 		allocpages -= btop(spec_hole_end - spec_hole_start);
157 
158 	return (allocpages);
159 }
160 
161 /*
162  * Returns the max contiguous physical memory present in the
163  * memlist "physavail".
164  */
165 uint64_t
166 get_max_phys_size(
167 	struct memlist	*physavail)
168 {
169 	uint64_t	max_size = 0;
170 
171 	for (; physavail; physavail = physavail->next) {
172 		if (physavail->size > max_size)
173 			max_size = physavail->size;
174 	}
175 
176 	return (max_size);
177 }
178 
179 
180 
181 struct vnode prom_ppages;
182 
183 static void
184 more_pages(uint64_t base, uint64_t len)
185 {
186 	void kphysm_add();
187 
188 	kphysm_add(base, len, 1);
189 }
190 
191 static void
192 less_pages(uint64_t base, uint64_t len)
193 {
194 	uint64_t pa, end = base + len;
195 	extern int kcage_on;
196 
197 	for (pa = base; pa < end; pa += PAGESIZE) {
198 		pfn_t pfnum;
199 		page_t *pp;
200 
201 		pfnum = (pfn_t)(pa >> PAGESHIFT);
202 		if ((pp = page_numtopp_nolock(pfnum)) == NULL)
203 			cmn_err(CE_PANIC, "missing pfnum %lx", pfnum);
204 
205 		/*
206 		 * must break up any large pages that may have
207 		 * constituent pages being utilized for
208 		 * prom_alloc()'s. page_reclaim() can't handle
209 		 * large pages.
210 		 */
211 		if (pp->p_szc != 0)
212 			page_boot_demote(pp);
213 
214 		if (!PAGE_LOCKED(pp) && pp->p_lckcnt == 0) {
215 			/*
216 			 * Ahhh yes, a prom page,
217 			 * suck it off the freelist,
218 			 * lock it, and hashin on prom_pages vp.
219 			 */
220 			if (page_trylock(pp, SE_EXCL) == 0)
221 				cmn_err(CE_PANIC, "prom page locked");
222 
223 			(void) page_reclaim(pp, NULL);
224 			/*
225 			 * vnode offsets on the prom_ppages vnode
226 			 * are page numbers (gack) for >32 bit
227 			 * physical memory machines.
228 			 */
229 			(void) page_hashin(pp, &prom_ppages,
230 			    (offset_t)pfnum, NULL);
231 
232 			if (kcage_on) {
233 				ASSERT(pp->p_szc == 0);
234 				if (PP_ISNORELOC(pp) == 0) {
235 					PP_SETNORELOC(pp);
236 					PLCNT_XFER_NORELOC(pp);
237 				}
238 			}
239 			(void) page_pp_lock(pp, 0, 1);
240 		}
241 	}
242 }
243 
244 void
245 diff_memlists(struct memlist *proto, struct memlist *diff, void (*func)())
246 {
247 	uint64_t p_base, p_end, d_base, d_end;
248 
249 	while (proto != NULL) {
250 		/*
251 		 * find diff item which may overlap with proto item
252 		 * if none, apply func to all of proto item
253 		 */
254 		while (diff != NULL &&
255 		    proto->address >= diff->address + diff->size)
256 			diff = diff->next;
257 		if (diff == NULL) {
258 			(*func)(proto->address, proto->size);
259 			proto = proto->next;
260 			continue;
261 		}
262 		if (proto->address == diff->address &&
263 		    proto->size == diff->size) {
264 			proto = proto->next;
265 			diff = diff->next;
266 			continue;
267 		}
268 
269 		p_base = proto->address;
270 		p_end = p_base + proto->size;
271 		d_base = diff->address;
272 		d_end = d_base + diff->size;
273 		/*
274 		 * here p_base < d_end
275 		 * there are 5 cases
276 		 */
277 
278 		/*
279 		 *	d_end
280 		 *	d_base
281 		 *  p_end
282 		 *  p_base
283 		 *
284 		 * apply func to all of proto item
285 		 */
286 		if (p_end <= d_base) {
287 			(*func)(p_base, proto->size);
288 			proto = proto->next;
289 			continue;
290 		}
291 
292 		/*
293 		 * ...
294 		 *	d_base
295 		 *  p_base
296 		 *
297 		 * normalize by applying func from p_base to d_base
298 		 */
299 		if (p_base < d_base)
300 			(*func)(p_base, d_base - p_base);
301 
302 		if (p_end <= d_end) {
303 			/*
304 			 *	d_end
305 			 *  p_end
306 			 *	d_base
307 			 *  p_base
308 			 *
309 			 *	-or-
310 			 *
311 			 *	d_end
312 			 *  p_end
313 			 *  p_base
314 			 *	d_base
315 			 *
316 			 * any non-overlapping ranges applied above,
317 			 * so just continue
318 			 */
319 			proto = proto->next;
320 			continue;
321 		}
322 
323 		/*
324 		 *  p_end
325 		 *	d_end
326 		 *	d_base
327 		 *  p_base
328 		 *
329 		 *	-or-
330 		 *
331 		 *  p_end
332 		 *	d_end
333 		 *  p_base
334 		 *	d_base
335 		 *
336 		 * Find overlapping d_base..d_end ranges, and apply func
337 		 * where no overlap occurs.  Stop when d_base is above
338 		 * p_end
339 		 */
340 		for (p_base = d_end, diff = diff->next; diff != NULL;
341 		    p_base = d_end, diff = diff->next) {
342 			d_base = diff->address;
343 			d_end = d_base + diff->size;
344 			if (p_end <= d_base) {
345 				(*func)(p_base, p_end - p_base);
346 				break;
347 			} else
348 				(*func)(p_base, d_base - p_base);
349 		}
350 		if (diff == NULL)
351 			(*func)(p_base, p_end - p_base);
352 		proto = proto->next;
353 	}
354 }
355 
356 void
357 sync_memlists(struct memlist *orig, struct memlist *new)
358 {
359 
360 	/*
361 	 * Find pages allocated via prom by looking for
362 	 * pages on orig, but no on new.
363 	 */
364 	diff_memlists(orig, new, less_pages);
365 
366 	/*
367 	 * Find pages free'd via prom by looking for
368 	 * pages on new, but not on orig.
369 	 */
370 	diff_memlists(new, orig, more_pages);
371 }
372 
373 
374 /*
375  * Find the page number of the highest installed physical
376  * page and the number of pages installed (one cannot be
377  * calculated from the other because memory isn't necessarily
378  * contiguous).
379  */
380 void
381 installed_top_size_memlist_array(
382 	prom_memlist_t *list,	/* base of array */
383 	size_t	nelems,		/* number of elements */
384 	pfn_t *topp,		/* return ptr for top value */
385 	pgcnt_t *sumpagesp)	/* return prt for sum of installed pages */
386 {
387 	pfn_t top = 0;
388 	pgcnt_t sumpages = 0;
389 	pfn_t highp;		/* high page in a chunk */
390 	size_t i;
391 
392 	for (i = 0; i < nelems; list++, i++) {
393 		highp = (list->addr + list->size - 1) >> PAGESHIFT;
394 		if (top < highp)
395 			top = highp;
396 		sumpages += (list->size >> PAGESHIFT);
397 	}
398 
399 	*topp = top;
400 	*sumpagesp = sumpages;
401 }
402 
403 /*
404  * Copy a memory list.  Used in startup() to copy boot's
405  * memory lists to the kernel.
406  */
407 void
408 copy_memlist(
409 	prom_memlist_t	*src,
410 	size_t		nelems,
411 	struct memlist	**dstp)
412 {
413 	struct memlist *dst, *prev;
414 	size_t	i;
415 
416 	dst = *dstp;
417 	prev = dst;
418 
419 	for (i = 0; i < nelems; src++, i++) {
420 		dst->address = src->addr;
421 		dst->size = src->size;
422 		dst->next = 0;
423 		if (prev == dst) {
424 			dst->prev = 0;
425 			dst++;
426 		} else {
427 			dst->prev = prev;
428 			prev->next = dst;
429 			dst++;
430 			prev++;
431 		}
432 	}
433 
434 	*dstp = dst;
435 }
436 
437 
438 static struct bootmem_props {
439 	prom_memlist_t	*ptr;
440 	size_t		nelems;		/* actual number of elements */
441 	size_t		maxsize;	/* max buffer */
442 } bootmem_props[3];
443 
444 #define	PHYSINSTALLED	0
445 #define	PHYSAVAIL	1
446 #define	VIRTAVAIL	2
447 
448 /*
449  * Comapct contiguous memory list elements
450  */
451 static void
452 compact_promlist(struct bootmem_props *bpp)
453 {
454 	int i = 0, j;
455 	struct prom_memlist *pmp = bpp->ptr;
456 
457 	for (;;) {
458 		if (pmp[i].addr + pmp[i].size == pmp[i+1].addr) {
459 			pmp[i].size += pmp[i+1].size;
460 			bpp->nelems--;
461 			for (j = i + 1; j < bpp->nelems; j++)
462 				pmp[j] = pmp[j+1];
463 			pmp[j].addr = 0;
464 		} else
465 			i++;
466 		if (i == bpp->nelems)
467 			break;
468 	}
469 }
470 
471 /*
472  *  Sort prom memory lists into ascending order
473  */
474 static void
475 sort_promlist(struct bootmem_props *bpp)
476 {
477 	int i, j, min;
478 	struct prom_memlist *pmp = bpp->ptr;
479 	struct prom_memlist temp;
480 
481 	for (i = 0; i < bpp->nelems; i++) {
482 		min = i;
483 
484 		for (j = i+1; j < bpp->nelems; j++)  {
485 			if (pmp[j].addr < pmp[min].addr)
486 				min = j;
487 		}
488 
489 		if (i != min)  {
490 			/* Swap pmp[i] and pmp[min] */
491 			temp = pmp[min];
492 			pmp[min] = pmp[i];
493 			pmp[i] = temp;
494 		}
495 	}
496 }
497 
498 static int max_bootlist_sz;
499 
500 void
501 init_boot_memlists(void)
502 {
503 	size_t	size, len;
504 	char *start;
505 	struct bootmem_props *tmp;
506 
507 	/*
508 	 * These lists can get fragmented as the prom allocates
509 	 * memory, so generously round up.
510 	 */
511 	size = prom_phys_installed_len() + prom_phys_avail_len() +
512 	    prom_virt_avail_len();
513 	size *= 4;
514 	size = roundup(size, PAGESIZE);
515 	start = prom_alloc(0, size, BO_NO_ALIGN);
516 
517 	/*
518 	 * Get physinstalled
519 	 */
520 	tmp = &bootmem_props[PHYSINSTALLED];
521 	len = prom_phys_installed_len();
522 	if (len == 0)
523 		panic("no \"reg\" in /memory");
524 	tmp->nelems = len / sizeof (struct prom_memlist);
525 	tmp->maxsize = len;
526 	tmp->ptr = (prom_memlist_t *)start;
527 	start += len;
528 	size -= len;
529 	(void) prom_phys_installed((caddr_t)tmp->ptr);
530 	sort_promlist(tmp);
531 	compact_promlist(tmp);
532 
533 	/*
534 	 * Start out giving each half of available space
535 	 */
536 	max_bootlist_sz = size;
537 	len = size / 2;
538 	tmp = &bootmem_props[PHYSAVAIL];
539 	tmp->maxsize = len;
540 	tmp->ptr = (prom_memlist_t *)start;
541 	start += len;
542 
543 	tmp = &bootmem_props[VIRTAVAIL];
544 	tmp->maxsize = len;
545 	tmp->ptr = (prom_memlist_t *)start;
546 }
547 
548 
549 void
550 copy_boot_memlists(
551     prom_memlist_t **physinstalled, size_t *physinstalled_len,
552     prom_memlist_t **physavail, size_t *physavail_len,
553     prom_memlist_t **virtavail, size_t *virtavail_len)
554 {
555 	size_t	plen, vlen, move = 0;
556 	struct bootmem_props *il, *pl, *vl;
557 
558 	plen = prom_phys_avail_len();
559 	if (plen == 0)
560 		panic("no \"available\" in /memory");
561 	vlen = prom_virt_avail_len();
562 	if (vlen == 0)
563 		panic("no \"available\" in /virtual-memory");
564 	if (plen + vlen > max_bootlist_sz)
565 		panic("ran out of prom_memlist space");
566 
567 	pl = &bootmem_props[PHYSAVAIL];
568 	vl = &bootmem_props[VIRTAVAIL];
569 
570 	/*
571 	 * re-adjust ptrs if needed
572 	 */
573 	if (plen > pl->maxsize) {
574 		/* move virt avail up */
575 		move = plen - pl->maxsize;
576 		pl->maxsize = plen;
577 		vl->ptr += move / sizeof (struct prom_memlist);
578 		vl->maxsize -= move;
579 	} else if (vlen > vl->maxsize) {
580 		/* move virt avail down */
581 		move = vlen - vl->maxsize;
582 		vl->maxsize = vlen;
583 		vl->ptr -= move / sizeof (struct prom_memlist);
584 		pl->maxsize -= move;
585 	}
586 
587 	pl->nelems = plen / sizeof (struct prom_memlist);
588 	vl->nelems = vlen / sizeof (struct prom_memlist);
589 
590 	/* now we can retrieve the properties */
591 	(void) prom_phys_avail((caddr_t)pl->ptr);
592 	(void) prom_virt_avail((caddr_t)vl->ptr);
593 
594 	/* .. and sort them */
595 	sort_promlist(pl);
596 	sort_promlist(vl);
597 
598 	il = &bootmem_props[PHYSINSTALLED];
599 	*physinstalled = il->ptr;
600 	*physinstalled_len = il->nelems;
601 
602 	*physavail = pl->ptr;
603 	*physavail_len = pl->nelems;
604 
605 	*virtavail = vl->ptr;
606 	*virtavail_len = vl->nelems;
607 }
608 
609 
610 /*
611  * Find the page number of the highest installed physical
612  * page and the number of pages installed (one cannot be
613  * calculated from the other because memory isn't necessarily
614  * contiguous).
615  */
616 void
617 installed_top_size(
618 	struct memlist *list,	/* pointer to start of installed list */
619 	pfn_t *topp,		/* return ptr for top value */
620 	pgcnt_t *sumpagesp)	/* return prt for sum of installed pages */
621 {
622 	pfn_t top = 0;
623 	pfn_t highp;		/* high page in a chunk */
624 	pgcnt_t sumpages = 0;
625 
626 	for (; list; list = list->next) {
627 		highp = (list->address + list->size - 1) >> PAGESHIFT;
628 		if (top < highp)
629 			top = highp;
630 		sumpages += (uint_t)(list->size >> PAGESHIFT);
631 	}
632 
633 	*topp = top;
634 	*sumpagesp = sumpages;
635 }
636